From 3ef23111546df9e9dab2e2befb412a9563db0628 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Tue, 6 Aug 2013 16:10:23 -0400 Subject: tile: avoid recursive backtrace faults This change adds support for avoiding recursive backtracer crashes; we haven't seen this in practice other than when things are seriously corrupt, but it may help avoid losing the root cause of a crash. Also, don't abort kernel backtracers for invalid userspace PC's. If we do, we lose the ability to backtrace through a userspace call to a bad address above PAGE_OFFSET, even though that it can be perfectly reasonable to continue the backtrace in such a case. Signed-off-by: Chris Metcalf --- arch/tile/kernel/stack.c | 30 ++++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) (limited to 'arch/tile/kernel/stack.c') diff --git a/arch/tile/kernel/stack.c b/arch/tile/kernel/stack.c index af8dfc9665f..c972689231e 100644 --- a/arch/tile/kernel/stack.c +++ b/arch/tile/kernel/stack.c @@ -103,8 +103,7 @@ static struct pt_regs *valid_fault_handler(struct KBacktraceIterator* kbt) if (kbt->verbose) pr_err(" <%s while in kernel mode>\n", fault); } else if (EX1_PL(p->ex1) == USER_PL && - p->pc < PAGE_OFFSET && - p->sp < PAGE_OFFSET) { + p->sp < PAGE_OFFSET && p->sp != 0) { if (kbt->verbose) pr_err(" <%s while in user mode>\n", fault); } else if (kbt->verbose) { @@ -351,6 +350,26 @@ static void describe_addr(struct KBacktraceIterator *kbt, vma->vm_start, vma->vm_end - vma->vm_start); } +/* + * Avoid possible crash recursion during backtrace. If it happens, it + * makes it easy to lose the actual root cause of the failure, so we + * put a simple guard on all the backtrace loops. + */ +static bool start_backtrace(void) +{ + if (current->thread.in_backtrace) { + pr_err("Backtrace requested while in backtrace!\n"); + return false; + } + current->thread.in_backtrace = true; + return true; +} + +static void end_backtrace(void) +{ + current->thread.in_backtrace = false; +} + /* * This method wraps the backtracer's more generic support. * It is only invoked from the architecture-specific code; show_stack() @@ -361,6 +380,8 @@ void tile_show_stack(struct KBacktraceIterator *kbt, int headers) int i; int have_mmap_sem = 0; + if (!start_backtrace()) + return; if (headers) { /* * Add a blank line since if we are called from panic(), @@ -402,6 +423,7 @@ void tile_show_stack(struct KBacktraceIterator *kbt, int headers) pr_err("Stack dump complete\n"); if (have_mmap_sem) up_read(&kbt->task->mm->mmap_sem); + end_backtrace(); } EXPORT_SYMBOL(tile_show_stack); @@ -463,6 +485,8 @@ void save_stack_trace_tsk(struct task_struct *task, struct stack_trace *trace) int skip = trace->skip; int i = 0; + if (!start_backtrace()) + goto done; if (task == NULL || task == current) KBacktraceIterator_init_current(&kbt); else @@ -476,6 +500,8 @@ void save_stack_trace_tsk(struct task_struct *task, struct stack_trace *trace) break; trace->entries[i++] = kbt.it.pc; } + end_backtrace(); +done: trace->nr_entries = i; } EXPORT_SYMBOL(save_stack_trace_tsk); -- cgit v1.2.3-70-g09d2 From bc1a298f4e04833db4c430df59b90039f0170515 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Wed, 7 Aug 2013 11:36:54 -0400 Subject: tile: support CONFIG_PREEMPT This change adds support for CONFIG_PREEMPT (full kernel preemption). In addition to the core support, this change includes a number of places where we fix up uses of smp_processor_id() and per-cpu variables. I also eliminate the PAGE_HOME_HERE and PAGE_HOME_UNKNOWN values for page homing, as it turns out they weren't being used. Signed-off-by: Chris Metcalf --- arch/tile/Kconfig | 2 ++ arch/tile/include/asm/homecache.h | 8 -------- arch/tile/include/asm/irqflags.h | 21 ++++++++++++++++++--- arch/tile/kernel/asm-offsets.c | 2 ++ arch/tile/kernel/hardwall.c | 18 +++++++++--------- arch/tile/kernel/intvec_32.S | 27 ++++++++++++++++++++------- arch/tile/kernel/intvec_64.S | 30 ++++++++++++++++++++++++------ arch/tile/kernel/irq.c | 1 + arch/tile/kernel/smp.c | 2 +- arch/tile/kernel/smpboot.c | 8 +++++--- arch/tile/kernel/stack.c | 4 ++-- arch/tile/kernel/sys.c | 4 +++- arch/tile/lib/memcpy_tile64.c | 12 ++++++++---- arch/tile/mm/homecache.c | 4 +++- 14 files changed, 98 insertions(+), 45 deletions(-) (limited to 'arch/tile/kernel/stack.c') diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig index 0576e1d8c4f..1126b9d2f4c 100644 --- a/arch/tile/Kconfig +++ b/arch/tile/Kconfig @@ -301,6 +301,8 @@ config PAGE_OFFSET source "mm/Kconfig" +source "kernel/Kconfig.preempt" + config CMDLINE_BOOL bool "Built-in kernel command line" default n diff --git a/arch/tile/include/asm/homecache.h b/arch/tile/include/asm/homecache.h index 7b777132864..49d19dfc063 100644 --- a/arch/tile/include/asm/homecache.h +++ b/arch/tile/include/asm/homecache.h @@ -44,16 +44,8 @@ struct zone; */ #define PAGE_HOME_INCOHERENT -3 -#if CHIP_HAS_CBOX_HOME_MAP() /* Home for the page is distributed via hash-for-home. */ #define PAGE_HOME_HASH -4 -#endif - -/* Homing is unknown or unspecified. Not valid for page_home(). */ -#define PAGE_HOME_UNKNOWN -5 - -/* Home on the current cpu. Not valid for page_home(). */ -#define PAGE_HOME_HERE -6 /* Support wrapper to use instead of explicit hv_flush_remote(). */ extern void flush_remote(unsigned long cache_pfn, unsigned long cache_length, diff --git a/arch/tile/include/asm/irqflags.h b/arch/tile/include/asm/irqflags.h index c96f9bbb760..71af5747874 100644 --- a/arch/tile/include/asm/irqflags.h +++ b/arch/tile/include/asm/irqflags.h @@ -124,6 +124,12 @@ DECLARE_PER_CPU(unsigned long long, interrupts_enabled_mask); #define INITIAL_INTERRUPTS_ENABLED (1ULL << INT_MEM_ERROR) +#ifdef CONFIG_DEBUG_PREEMPT +/* Due to inclusion issues, we can't rely on here. */ +extern unsigned int debug_smp_processor_id(void); +# define smp_processor_id() debug_smp_processor_id() +#endif + /* Disable interrupts. */ #define arch_local_irq_disable() \ interrupt_mask_set_mask(LINUX_MASKABLE_INTERRUPTS) @@ -132,9 +138,18 @@ DECLARE_PER_CPU(unsigned long long, interrupts_enabled_mask); #define arch_local_irq_disable_all() \ interrupt_mask_set_mask(-1ULL) +/* + * Read the set of maskable interrupts. + * We avoid the preemption warning here via __this_cpu_ptr since even + * if irqs are already enabled, it's harmless to read the wrong cpu's + * enabled mask. + */ +#define arch_local_irqs_enabled() \ + (*__this_cpu_ptr(&interrupts_enabled_mask)) + /* Re-enable all maskable interrupts. */ #define arch_local_irq_enable() \ - interrupt_mask_reset_mask(__get_cpu_var(interrupts_enabled_mask)) + interrupt_mask_reset_mask(arch_local_irqs_enabled()) /* Disable or enable interrupts based on flag argument. */ #define arch_local_irq_restore(disabled) do { \ @@ -161,7 +176,7 @@ DECLARE_PER_CPU(unsigned long long, interrupts_enabled_mask); /* Prevent the given interrupt from being enabled next time we enable irqs. */ #define arch_local_irq_mask(interrupt) \ - (__get_cpu_var(interrupts_enabled_mask) &= ~(1ULL << (interrupt))) + this_cpu_and(interrupts_enabled_mask, ~(1ULL << (interrupt))) /* Prevent the given interrupt from being enabled immediately. */ #define arch_local_irq_mask_now(interrupt) do { \ @@ -171,7 +186,7 @@ DECLARE_PER_CPU(unsigned long long, interrupts_enabled_mask); /* Allow the given interrupt to be enabled next time we enable irqs. */ #define arch_local_irq_unmask(interrupt) \ - (__get_cpu_var(interrupts_enabled_mask) |= (1ULL << (interrupt))) + this_cpu_or(interrupts_enabled_mask, (1ULL << (interrupt))) /* Allow the given interrupt to be enabled immediately, if !irqs_disabled. */ #define arch_local_irq_unmask_now(interrupt) do { \ diff --git a/arch/tile/kernel/asm-offsets.c b/arch/tile/kernel/asm-offsets.c index 8652b0be468..97ea6ac0a47 100644 --- a/arch/tile/kernel/asm-offsets.c +++ b/arch/tile/kernel/asm-offsets.c @@ -58,6 +58,8 @@ void foo(void) offsetof(struct thread_info, status)); DEFINE(THREAD_INFO_HOMECACHE_CPU_OFFSET, offsetof(struct thread_info, homecache_cpu)); + DEFINE(THREAD_INFO_PREEMPT_COUNT_OFFSET, + offsetof(struct thread_info, preempt_count)); DEFINE(THREAD_INFO_STEP_STATE_OFFSET, offsetof(struct thread_info, step_state)); #ifdef __tilegx__ diff --git a/arch/tile/kernel/hardwall.c b/arch/tile/kernel/hardwall.c index 7db8893d4fc..df27a1fd94a 100644 --- a/arch/tile/kernel/hardwall.c +++ b/arch/tile/kernel/hardwall.c @@ -272,9 +272,9 @@ static void hardwall_setup_func(void *info) struct hardwall_info *r = info; struct hardwall_type *hwt = r->type; - int cpu = smp_processor_id(); - int x = cpu % smp_width; - int y = cpu / smp_width; + int cpu = smp_processor_id(); /* on_each_cpu disables preemption */ + int x = cpu_x(cpu); + int y = cpu_y(cpu); int bits = 0; if (x == r->ulhc_x) bits |= W_PROTECT; @@ -317,6 +317,7 @@ static void hardwall_protect_rectangle(struct hardwall_info *r) on_each_cpu_mask(&rect_cpus, hardwall_setup_func, r, 1); } +/* Entered from INT_xDN_FIREWALL interrupt vector with irqs disabled. */ void __kprobes do_hardwall_trap(struct pt_regs* regs, int fault_num) { struct hardwall_info *rect; @@ -325,7 +326,6 @@ void __kprobes do_hardwall_trap(struct pt_regs* regs, int fault_num) struct siginfo info; int cpu = smp_processor_id(); int found_processes; - unsigned long flags; struct pt_regs *old_regs = set_irq_regs(regs); irq_enter(); @@ -346,7 +346,7 @@ void __kprobes do_hardwall_trap(struct pt_regs* regs, int fault_num) BUG_ON(hwt->disabled); /* This tile trapped a network access; find the rectangle. */ - spin_lock_irqsave(&hwt->lock, flags); + spin_lock(&hwt->lock); list_for_each_entry(rect, &hwt->list, list) { if (cpumask_test_cpu(cpu, &rect->cpumask)) break; @@ -401,7 +401,7 @@ void __kprobes do_hardwall_trap(struct pt_regs* regs, int fault_num) pr_notice("hardwall: no associated processes!\n"); done: - spin_unlock_irqrestore(&hwt->lock, flags); + spin_unlock(&hwt->lock); /* * We have to disable firewall interrupts now, or else when we @@ -661,7 +661,7 @@ static int hardwall_deactivate(struct hardwall_type *hwt, return -EINVAL; printk(KERN_DEBUG "Pid %d (%s) deactivated for %s hardwall: cpu %d\n", - task->pid, task->comm, hwt->name, smp_processor_id()); + task->pid, task->comm, hwt->name, raw_smp_processor_id()); return 0; } @@ -803,8 +803,8 @@ static void reset_xdn_network_state(struct hardwall_type *hwt) /* Reset UDN coordinates to their standard value */ { unsigned int cpu = smp_processor_id(); - unsigned int x = cpu % smp_width; - unsigned int y = cpu / smp_width; + unsigned int x = cpu_x(cpu); + unsigned int y = cpu_y(cpu); __insn_mtspr(SPR_UDN_TILE_COORD, (x << 18) | (y << 7)); } diff --git a/arch/tile/kernel/intvec_32.S b/arch/tile/kernel/intvec_32.S index 388061319c4..10767655689 100644 --- a/arch/tile/kernel/intvec_32.S +++ b/arch/tile/kernel/intvec_32.S @@ -28,10 +28,6 @@ #include #include -#ifdef CONFIG_PREEMPT -# error "No support for kernel preemption currently" -#endif - #define PTREGS_PTR(reg, ptreg) addli reg, sp, C_ABI_SAVE_AREA_SIZE + (ptreg) #define PTREGS_OFFSET_SYSCALL PTREGS_OFFSET_REG(TREG_SYSCALL_NR) @@ -812,17 +808,34 @@ STD_ENTRY(interrupt_return) } lw r29, r29 andi r29, r29, SPR_EX_CONTEXT_1_1__PL_MASK /* mask off ICS */ + bzt r29, .Lresume_userspace + +#ifdef CONFIG_PREEMPT + /* Returning to kernel space. Check if we need preemption. */ + GET_THREAD_INFO(r29) + addli r28, r29, THREAD_INFO_FLAGS_OFFSET { - bzt r29, .Lresume_userspace - PTREGS_PTR(r29, PTREGS_OFFSET_PC) + lw r28, r28 + addli r29, r29, THREAD_INFO_PREEMPT_COUNT_OFFSET + } + { + andi r28, r28, _TIF_NEED_RESCHED + lw r29, r29 } + bzt r28, 1f + bnz r29, 1f + jal preempt_schedule_irq + FEEDBACK_REENTER(interrupt_return) +1: +#endif /* If we're resuming to _cpu_idle_nap, bump PC forward by 8. */ { - lw r28, r29 + PTREGS_PTR(r29, PTREGS_OFFSET_PC) moveli r27, lo16(_cpu_idle_nap) } { + lw r28, r29 auli r27, r27, ha16(_cpu_idle_nap) } { diff --git a/arch/tile/kernel/intvec_64.S b/arch/tile/kernel/intvec_64.S index 884af9ea5be..38a60f27707 100644 --- a/arch/tile/kernel/intvec_64.S +++ b/arch/tile/kernel/intvec_64.S @@ -30,10 +30,6 @@ #include #include -#ifdef CONFIG_PREEMPT -# error "No support for kernel preemption currently" -#endif - #define PTREGS_PTR(reg, ptreg) addli reg, sp, C_ABI_SAVE_AREA_SIZE + (ptreg) #define PTREGS_OFFSET_SYSCALL PTREGS_OFFSET_REG(TREG_SYSCALL_NR) @@ -820,11 +816,33 @@ STD_ENTRY(interrupt_return) andi r29, r29, SPR_EX_CONTEXT_1_1__PL_MASK /* mask off ICS */ { beqzt r29, .Lresume_userspace - PTREGS_PTR(r29, PTREGS_OFFSET_PC) + move r29, sp + } + +#ifdef CONFIG_PREEMPT + /* Returning to kernel space. Check if we need preemption. */ + EXTRACT_THREAD_INFO(r29) + addli r28, r29, THREAD_INFO_FLAGS_OFFSET + { + ld r28, r28 + addli r29, r29, THREAD_INFO_PREEMPT_COUNT_OFFSET } + { + andi r28, r28, _TIF_NEED_RESCHED + ld4s r29, r29 + } + beqzt r28, 1f + bnez r29, 1f + jal preempt_schedule_irq + FEEDBACK_REENTER(interrupt_return) +1: +#endif /* If we're resuming to _cpu_idle_nap, bump PC forward by 8. */ - moveli r27, hw2_last(_cpu_idle_nap) + { + moveli r27, hw2_last(_cpu_idle_nap) + PTREGS_PTR(r29, PTREGS_OFFSET_PC) + } { ld r28, r29 shl16insli r27, r27, hw1(_cpu_idle_nap) diff --git a/arch/tile/kernel/irq.c b/arch/tile/kernel/irq.c index 02e62806501..c90de6c3cb7 100644 --- a/arch/tile/kernel/irq.c +++ b/arch/tile/kernel/irq.c @@ -74,6 +74,7 @@ static DEFINE_SPINLOCK(available_irqs_lock); /* * The interrupt handling path, implemented in terms of HV interrupt * emulation on TILE64 and TILEPro, and IPI hardware on TILE-Gx. + * Entered with interrupts disabled. */ void tile_dev_intr(struct pt_regs *regs, int intnum) { diff --git a/arch/tile/kernel/smp.c b/arch/tile/kernel/smp.c index cbc73a8b8fe..6cc520d71d2 100644 --- a/arch/tile/kernel/smp.c +++ b/arch/tile/kernel/smp.c @@ -100,8 +100,8 @@ static void smp_start_cpu_interrupt(void) /* Handler to stop the current cpu. */ static void smp_stop_cpu_interrupt(void) { - set_cpu_online(smp_processor_id(), 0); arch_local_irq_disable_all(); + set_cpu_online(smp_processor_id(), 0); for (;;) asm("nap; nop"); } diff --git a/arch/tile/kernel/smpboot.c b/arch/tile/kernel/smpboot.c index 44bab29bf2f..dee7f13c585 100644 --- a/arch/tile/kernel/smpboot.c +++ b/arch/tile/kernel/smpboot.c @@ -142,13 +142,15 @@ static struct cpumask cpu_started __cpuinitdata; */ static void __cpuinit start_secondary(void) { - int cpuid = smp_processor_id(); + int cpuid; + + preempt_disable(); + + cpuid = smp_processor_id(); /* Set our thread pointer appropriately. */ set_my_cpu_offset(__per_cpu_offset[cpuid]); - preempt_disable(); - /* * In large machines even this will slow us down, since we * will be contending for for the printk spinlock. diff --git a/arch/tile/kernel/stack.c b/arch/tile/kernel/stack.c index c972689231e..176ffe48eee 100644 --- a/arch/tile/kernel/stack.c +++ b/arch/tile/kernel/stack.c @@ -194,7 +194,7 @@ static int KBacktraceIterator_next_item_inclusive( */ static void validate_stack(struct pt_regs *regs) { - int cpu = smp_processor_id(); + int cpu = raw_smp_processor_id(); unsigned long ksp0 = get_current_ksp0(); unsigned long ksp0_base = ksp0 - THREAD_SIZE; unsigned long sp = stack_pointer; @@ -392,7 +392,7 @@ void tile_show_stack(struct KBacktraceIterator *kbt, int headers) pr_err("Starting stack dump of tid %d, pid %d (%s)" " on cpu %d at cycle %lld\n", kbt->task->pid, kbt->task->tgid, kbt->task->comm, - smp_processor_id(), get_cycles()); + raw_smp_processor_id(), get_cycles()); } kbt->verbose = 1; i = 0; diff --git a/arch/tile/kernel/sys.c b/arch/tile/kernel/sys.c index b881a7be24b..38debe70606 100644 --- a/arch/tile/kernel/sys.c +++ b/arch/tile/kernel/sys.c @@ -38,8 +38,10 @@ SYSCALL_DEFINE3(cacheflush, unsigned long, addr, unsigned long, len, unsigned long, flags) { + /* DCACHE is not particularly effective if not bound to one cpu. */ if (flags & DCACHE) - homecache_evict(cpumask_of(smp_processor_id())); + homecache_evict(cpumask_of(raw_smp_processor_id())); + if (flags & ICACHE) flush_remote(0, HV_FLUSH_EVICT_L1I, mm_cpumask(current->mm), 0, 0, 0, NULL, NULL, 0); diff --git a/arch/tile/lib/memcpy_tile64.c b/arch/tile/lib/memcpy_tile64.c index 3bc4b4e40d9..0290c222847 100644 --- a/arch/tile/lib/memcpy_tile64.c +++ b/arch/tile/lib/memcpy_tile64.c @@ -65,7 +65,7 @@ static void memcpy_multicache(void *dest, const void *source, pmd_t *pmdp; pte_t *ptep; int type0, type1; - int cpu = get_cpu(); + int cpu = smp_processor_id(); /* * Disable interrupts so that we don't recurse into memcpy() @@ -126,7 +126,6 @@ static void memcpy_multicache(void *dest, const void *source, kmap_atomic_idx_pop(); sim_allow_multiple_caching(0); local_irq_restore(flags); - put_cpu(); } /* @@ -137,6 +136,9 @@ static void memcpy_multicache(void *dest, const void *source, static unsigned long fast_copy(void *dest, const void *source, int len, memcpy_t func) { + int cpu = get_cpu(); + unsigned long retval; + /* * Check if it's big enough to bother with. We may end up doing a * small copy via TLB manipulation if we're near a page boundary, @@ -158,7 +160,7 @@ retry_source: !hv_pte_get_readable(src_pte) || hv_pte_get_mode(src_pte) != HV_PTE_MODE_CACHE_TILE_L3) break; - if (get_remote_cache_cpu(src_pte) == smp_processor_id()) + if (get_remote_cache_cpu(src_pte) == cpu) break; src_page = pfn_to_page(pte_pfn(src_pte)); get_page(src_page); @@ -235,7 +237,9 @@ retry_dest: len -= copy_size; } - return func(dest, source, len); + retval = func(dest, source, len); + put_cpu(); + return retval; } void *memcpy(void *to, const void *from, __kernel_size_t n) diff --git a/arch/tile/mm/homecache.c b/arch/tile/mm/homecache.c index 1ae911939a1..df46a2d5bdf 100644 --- a/arch/tile/mm/homecache.c +++ b/arch/tile/mm/homecache.c @@ -172,7 +172,8 @@ void flush_remote(unsigned long cache_pfn, unsigned long cache_control, static void homecache_finv_page_va(void* va, int home) { - if (home == smp_processor_id()) { + int cpu = get_cpu(); + if (home == cpu) { finv_buffer_local(va, PAGE_SIZE); } else if (home == PAGE_HOME_HASH) { finv_buffer_remote(va, PAGE_SIZE, 1); @@ -180,6 +181,7 @@ static void homecache_finv_page_va(void* va, int home) BUG_ON(home < 0 || home >= NR_CPUS); finv_buffer_remote(va, PAGE_SIZE, 0); } + put_cpu(); } void homecache_finv_map_page(struct page *page, int home) -- cgit v1.2.3-70-g09d2 From 4a556f4f56da3110b27e265b79f0e7582115445c Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Wed, 7 Aug 2013 15:33:32 -0400 Subject: tile: implement gettimeofday() via vDSO This change creates the framework for vDSO calls, makes the existing rt_sigreturn() mechanism use it, and adds a fast gettimeofday(). Now that we need to expose the vDSO address to userspace, we add AT_SYSINFO_EHDR to the set of aux entries provided to userspace. (You can disable any extra vDSO support by booting with vdso=0, but the rt_sigreturn vDSO page will still be provided.) Note that glibc has supported the tile vDSO since release 2.17. Signed-off-by: Chris Metcalf --- arch/tile/Kconfig | 3 + arch/tile/include/asm/elf.h | 5 + arch/tile/include/asm/mmu.h | 1 + arch/tile/include/asm/page.h | 8 +- arch/tile/include/asm/processor.h | 6 +- arch/tile/include/asm/sections.h | 4 + arch/tile/include/asm/vdso.h | 49 ++++++++ arch/tile/include/uapi/asm/auxvec.h | 3 +- arch/tile/kernel/Makefile | 4 +- arch/tile/kernel/compat_signal.c | 3 +- arch/tile/kernel/entry.S | 16 --- arch/tile/kernel/signal.c | 3 +- arch/tile/kernel/stack.c | 5 +- arch/tile/kernel/time.c | 37 +++++- arch/tile/kernel/vdso.c | 212 ++++++++++++++++++++++++++++++++++ arch/tile/kernel/vdso/Makefile | 118 +++++++++++++++++++ arch/tile/kernel/vdso/vdso.S | 28 +++++ arch/tile/kernel/vdso/vdso.lds.S | 87 ++++++++++++++ arch/tile/kernel/vdso/vdso32.S | 28 +++++ arch/tile/kernel/vdso/vgettimeofday.c | 107 +++++++++++++++++ arch/tile/kernel/vdso/vrt_sigreturn.S | 30 +++++ arch/tile/mm/elf.c | 37 +----- 22 files changed, 732 insertions(+), 62 deletions(-) create mode 100644 arch/tile/include/asm/vdso.h create mode 100644 arch/tile/kernel/vdso.c create mode 100644 arch/tile/kernel/vdso/Makefile create mode 100644 arch/tile/kernel/vdso/vdso.S create mode 100644 arch/tile/kernel/vdso/vdso.lds.S create mode 100644 arch/tile/kernel/vdso/vdso32.S create mode 100644 arch/tile/kernel/vdso/vgettimeofday.c create mode 100644 arch/tile/kernel/vdso/vrt_sigreturn.S (limited to 'arch/tile/kernel/stack.c') diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig index 1126b9d2f4c..7b87318ee69 100644 --- a/arch/tile/Kconfig +++ b/arch/tile/Kconfig @@ -64,6 +64,9 @@ config HUGETLB_SUPER_PAGES depends on HUGETLB_PAGE && TILEGX def_bool y +config GENERIC_TIME_VSYSCALL + def_bool y + # FIXME: tilegx can implement a more efficient rwsem. config RWSEM_GENERIC_SPINLOCK def_bool y diff --git a/arch/tile/include/asm/elf.h b/arch/tile/include/asm/elf.h index ff8a9340882..31d854f1b83 100644 --- a/arch/tile/include/asm/elf.h +++ b/arch/tile/include/asm/elf.h @@ -132,6 +132,11 @@ extern int dump_task_regs(struct task_struct *, elf_gregset_t *); struct linux_binprm; extern int arch_setup_additional_pages(struct linux_binprm *bprm, int executable_stack); +#define ARCH_DLINFO \ +do { \ + NEW_AUX_ENT(AT_SYSINFO_EHDR, VDSO_BASE); \ +} while (0) + #ifdef CONFIG_COMPAT #define COMPAT_ELF_PLATFORM "tilegx-m32" diff --git a/arch/tile/include/asm/mmu.h b/arch/tile/include/asm/mmu.h index e2c78909679..0cab1182bde 100644 --- a/arch/tile/include/asm/mmu.h +++ b/arch/tile/include/asm/mmu.h @@ -22,6 +22,7 @@ struct mm_context { * semaphore but atomically, but it is conservatively set. */ unsigned long priority_cached; + unsigned long vdso_base; }; typedef struct mm_context mm_context_t; diff --git a/arch/tile/include/asm/page.h b/arch/tile/include/asm/page.h index dd033a4fd62..b4f96c0024d 100644 --- a/arch/tile/include/asm/page.h +++ b/arch/tile/include/asm/page.h @@ -38,6 +38,12 @@ #define PAGE_MASK (~(PAGE_SIZE - 1)) #define HPAGE_MASK (~(HPAGE_SIZE - 1)) +/* + * We do define AT_SYSINFO_EHDR to support vDSO, + * but don't use the gate mechanism. + */ +#define __HAVE_ARCH_GATE_AREA 1 + /* * If the Kconfig doesn't specify, set a maximum zone order that * is enough so that we can create huge pages from small pages given @@ -246,7 +252,7 @@ static inline __attribute_const__ int get_order(unsigned long size) #endif /* __tilegx__ */ -#ifndef __ASSEMBLY__ +#if !defined(__ASSEMBLY__) && !defined(VDSO_BUILD) #ifdef CONFIG_HIGHMEM diff --git a/arch/tile/include/asm/processor.h b/arch/tile/include/asm/processor.h index fed1c044fec..461322b473b 100644 --- a/arch/tile/include/asm/processor.h +++ b/arch/tile/include/asm/processor.h @@ -180,10 +180,10 @@ struct thread_struct { #define TASK_SIZE TASK_SIZE_MAX #endif -/* We provide a minimal "vdso" a la x86; just the sigreturn code for now. */ -#define VDSO_BASE (TASK_SIZE - PAGE_SIZE) +#define VDSO_BASE ((unsigned long)current->active_mm->context.vdso_base) +#define VDSO_SYM(x) (VDSO_BASE + (unsigned long)(x)) -#define STACK_TOP VDSO_BASE +#define STACK_TOP TASK_SIZE /* STACK_TOP_MAX is used temporarily in execve and should not check COMPAT. */ #define STACK_TOP_MAX TASK_SIZE_MAX diff --git a/arch/tile/include/asm/sections.h b/arch/tile/include/asm/sections.h index cc95276ef9c..5d5d3b739a6 100644 --- a/arch/tile/include/asm/sections.h +++ b/arch/tile/include/asm/sections.h @@ -25,6 +25,10 @@ extern char _sinitdata[], _einitdata[]; /* Write-once data is writable only till the end of initialization. */ extern char __w1data_begin[], __w1data_end[]; +extern char vdso_start[], vdso_end[]; +#ifdef CONFIG_COMPAT +extern char vdso32_start[], vdso32_end[]; +#endif /* Not exactly sections, but PC comparison points in the code. */ extern char __rt_sigreturn[], __rt_sigreturn_end[]; diff --git a/arch/tile/include/asm/vdso.h b/arch/tile/include/asm/vdso.h new file mode 100644 index 00000000000..9f6a78d665f --- /dev/null +++ b/arch/tile/include/asm/vdso.h @@ -0,0 +1,49 @@ +/* + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#ifndef __TILE_VDSO_H__ +#define __TILE_VDSO_H__ + +#include + +/* + * Note about the vdso_data structure: + * + * NEVER USE THEM IN USERSPACE CODE DIRECTLY. The layout of the + * structure is supposed to be known only to the function in the vdso + * itself and may change without notice. + */ + +struct vdso_data { + __u64 tz_update_count; /* Timezone atomicity ctr */ + __u64 tb_update_count; /* Timebase atomicity ctr */ + __u64 xtime_tod_stamp; /* TOD clock for xtime */ + __u64 xtime_clock_sec; /* Kernel time second */ + __u64 xtime_clock_nsec; /* Kernel time nanosecond */ + __u64 wtom_clock_sec; /* Wall to monotonic clock second */ + __u64 wtom_clock_nsec; /* Wall to monotonic clock nanosecond */ + __u32 mult; /* Cycle to nanosecond multiplier */ + __u32 shift; /* Cycle to nanosecond divisor (power of two) */ + __u32 tz_minuteswest; /* Minutes west of Greenwich */ + __u32 tz_dsttime; /* Type of dst correction */ +}; + +extern struct vdso_data *vdso_data; + +/* __vdso_rt_sigreturn is defined with the addresses in the vdso page. */ +extern void __vdso_rt_sigreturn(void); + +extern int setup_vdso_pages(void); + +#endif /* __TILE_VDSO_H__ */ diff --git a/arch/tile/include/uapi/asm/auxvec.h b/arch/tile/include/uapi/asm/auxvec.h index 1d393edb064..c93e92709f1 100644 --- a/arch/tile/include/uapi/asm/auxvec.h +++ b/arch/tile/include/uapi/asm/auxvec.h @@ -15,6 +15,7 @@ #ifndef _ASM_TILE_AUXVEC_H #define _ASM_TILE_AUXVEC_H -/* No extensions to auxvec */ +/* The vDSO location. */ +#define AT_SYSINFO_EHDR 33 #endif /* _ASM_TILE_AUXVEC_H */ diff --git a/arch/tile/kernel/Makefile b/arch/tile/kernel/Makefile index 6846c4ef5bf..5157d1c4b4e 100644 --- a/arch/tile/kernel/Makefile +++ b/arch/tile/kernel/Makefile @@ -6,7 +6,7 @@ extra-y := vmlinux.lds head_$(BITS).o obj-y := backtrace.o entry.o irq.o messaging.o \ pci-dma.o proc.o process.o ptrace.o reboot.o \ setup.o signal.o single_step.o stack.o sys.o \ - sysfs.o time.o traps.o unaligned.o \ + sysfs.o time.o traps.o unaligned.o vdso.o \ intvec_$(BITS).o regs_$(BITS).o tile-desc_$(BITS).o obj-$(CONFIG_HARDWALL) += hardwall.o @@ -21,3 +21,5 @@ else obj-$(CONFIG_PCI) += pci.o endif obj-$(CONFIG_TILE_USB) += usb.o + +obj-y += vdso/ diff --git a/arch/tile/kernel/compat_signal.c b/arch/tile/kernel/compat_signal.c index d0a052e725b..85e00b2f39b 100644 --- a/arch/tile/kernel/compat_signal.c +++ b/arch/tile/kernel/compat_signal.c @@ -32,6 +32,7 @@ #include #include #include +#include #include struct compat_ucontext { @@ -227,7 +228,7 @@ int compat_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, if (err) goto give_sigsegv; - restorer = VDSO_BASE; + restorer = VDSO_SYM(&__vdso_rt_sigreturn); if (ka->sa.sa_flags & SA_RESTORER) restorer = ptr_to_compat_reg(ka->sa.sa_restorer); diff --git a/arch/tile/kernel/entry.S b/arch/tile/kernel/entry.S index f116cb0bce2..3d9175992a2 100644 --- a/arch/tile/kernel/entry.S +++ b/arch/tile/kernel/entry.S @@ -27,22 +27,6 @@ STD_ENTRY(current_text_addr) { move r0, lr; jrp lr } STD_ENDPROC(current_text_addr) -/* - * We don't run this function directly, but instead copy it to a page - * we map into every user process. See vdso_setup(). - * - * Note that libc has a copy of this function that it uses to compare - * against the PC when a stack backtrace ends, so if this code is - * changed, the libc implementation(s) should also be updated. - */ - .pushsection .data -ENTRY(__rt_sigreturn) - moveli TREG_SYSCALL_NR_NAME,__NR_rt_sigreturn - swint1 - ENDPROC(__rt_sigreturn) - ENTRY(__rt_sigreturn_end) - .popsection - STD_ENTRY(dump_stack) { move r2, lr; lnk r1 } { move r4, r52; addli r1, r1, dump_stack - . } diff --git a/arch/tile/kernel/signal.c b/arch/tile/kernel/signal.c index 9531845bf66..2d1dbf38a9a 100644 --- a/arch/tile/kernel/signal.c +++ b/arch/tile/kernel/signal.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #define DEBUG_SIG 0 @@ -190,7 +191,7 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, if (err) goto give_sigsegv; - restorer = VDSO_BASE; + restorer = VDSO_SYM(&__vdso_rt_sigreturn); if (ka->sa.sa_flags & SA_RESTORER) restorer = (unsigned long) ka->sa.sa_restorer; diff --git a/arch/tile/kernel/stack.c b/arch/tile/kernel/stack.c index 176ffe48eee..a9db923bb9e 100644 --- a/arch/tile/kernel/stack.c +++ b/arch/tile/kernel/stack.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include @@ -119,7 +120,7 @@ static struct pt_regs *valid_fault_handler(struct KBacktraceIterator* kbt) /* Is the pc pointing to a sigreturn trampoline? */ static int is_sigreturn(unsigned long pc) { - return (pc == VDSO_BASE); + return current->mm && (pc == VDSO_SYM(&__vdso_rt_sigreturn)); } /* Return a pt_regs pointer for a valid signal handler frame */ @@ -128,7 +129,7 @@ static struct pt_regs *valid_sigframe(struct KBacktraceIterator* kbt, { BacktraceIterator *b = &kbt->it; - if (b->pc == VDSO_BASE && b->sp < PAGE_OFFSET && + if (is_sigreturn(b->pc) && b->sp < PAGE_OFFSET && b->sp % sizeof(long) == 0) { int retval; pagefault_disable(); diff --git a/arch/tile/kernel/time.c b/arch/tile/kernel/time.c index 5ac397ec698..36dc1e1bc0a 100644 --- a/arch/tile/kernel/time.c +++ b/arch/tile/kernel/time.c @@ -23,8 +23,10 @@ #include #include #include +#include #include #include +#include #include #include #include @@ -110,7 +112,6 @@ void __init time_init(void) setup_tile_timer(); } - /* * Define the tile timer clock event device. The timer is driven by * the TILE_TIMER_CONTROL register, which consists of a 31-bit down @@ -237,3 +238,37 @@ cycles_t ns2cycles(unsigned long nsecs) struct clock_event_device *dev = &__raw_get_cpu_var(tile_timer); return ((u64)nsecs * dev->mult) >> dev->shift; } + +void update_vsyscall_tz(void) +{ + /* Userspace gettimeofday will spin while this value is odd. */ + ++vdso_data->tz_update_count; + smp_wmb(); + vdso_data->tz_minuteswest = sys_tz.tz_minuteswest; + vdso_data->tz_dsttime = sys_tz.tz_dsttime; + smp_wmb(); + ++vdso_data->tz_update_count; +} + +void update_vsyscall(struct timekeeper *tk) +{ + struct timespec wall_time = tk_xtime(tk); + struct timespec *wtm = &tk->wall_to_monotonic; + struct clocksource *clock = tk->clock; + + if (clock != &cycle_counter_cs) + return; + + /* Userspace gettimeofday will spin while this value is odd. */ + ++vdso_data->tb_update_count; + smp_wmb(); + vdso_data->xtime_tod_stamp = clock->cycle_last; + vdso_data->xtime_clock_sec = wall_time.tv_sec; + vdso_data->xtime_clock_nsec = wall_time.tv_nsec; + vdso_data->wtom_clock_sec = wtm->tv_sec; + vdso_data->wtom_clock_nsec = wtm->tv_nsec; + vdso_data->mult = clock->mult; + vdso_data->shift = clock->shift; + smp_wmb(); + ++vdso_data->tb_update_count; +} diff --git a/arch/tile/kernel/vdso.c b/arch/tile/kernel/vdso.c new file mode 100644 index 00000000000..1533af24106 --- /dev/null +++ b/arch/tile/kernel/vdso.c @@ -0,0 +1,212 @@ +/* + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include + +/* The alignment of the vDSO. */ +#define VDSO_ALIGNMENT PAGE_SIZE + + +static unsigned int vdso_pages; +static struct page **vdso_pagelist; + +#ifdef CONFIG_COMPAT +static unsigned int vdso32_pages; +static struct page **vdso32_pagelist; +#endif +static int vdso_ready; + +/* + * The vdso data page. + */ +static union { + struct vdso_data data; + u8 page[PAGE_SIZE]; +} vdso_data_store __page_aligned_data; + +struct vdso_data *vdso_data = &vdso_data_store.data; + +static unsigned int __read_mostly vdso_enabled = 1; + +static struct page **vdso_setup(void *vdso_kbase, unsigned int pages) +{ + int i; + struct page **pagelist; + + pagelist = kzalloc(sizeof(struct page *) * (pages + 1), GFP_KERNEL); + BUG_ON(pagelist == NULL); + for (i = 0; i < pages - 1; i++) { + struct page *pg = virt_to_page(vdso_kbase + i*PAGE_SIZE); + ClearPageReserved(pg); + pagelist[i] = pg; + } + pagelist[pages - 1] = virt_to_page(vdso_data); + pagelist[pages] = NULL; + + return pagelist; +} + +static int __init vdso_init(void) +{ + int data_pages = sizeof(vdso_data_store) >> PAGE_SHIFT; + + /* + * We can disable vDSO support generally, but we need to retain + * one page to support the two-bundle (16-byte) rt_sigreturn path. + */ + if (!vdso_enabled) { + size_t offset = (unsigned long)&__vdso_rt_sigreturn; + static struct page *sigret_page; + sigret_page = alloc_page(GFP_KERNEL | __GFP_ZERO); + BUG_ON(sigret_page == NULL); + vdso_pagelist = &sigret_page; + vdso_pages = 1; + BUG_ON(offset >= PAGE_SIZE); + memcpy(page_address(sigret_page) + offset, + vdso_start + offset, 16); +#ifdef CONFIG_COMPAT + vdso32_pages = vdso_pages; + vdso32_pagelist = vdso_pagelist; +#endif + vdso_ready = 1; + return 0; + } + + vdso_pages = (vdso_end - vdso_start) >> PAGE_SHIFT; + vdso_pages += data_pages; + vdso_pagelist = vdso_setup(vdso_start, vdso_pages); + +#ifdef CONFIG_COMPAT + vdso32_pages = (vdso32_end - vdso32_start) >> PAGE_SHIFT; + vdso32_pages += data_pages; + vdso32_pagelist = vdso_setup(vdso32_start, vdso32_pages); +#endif + + smp_wmb(); + vdso_ready = 1; + + return 0; +} +arch_initcall(vdso_init); + +const char *arch_vma_name(struct vm_area_struct *vma) +{ + if (vma->vm_mm && vma->vm_start == VDSO_BASE) + return "[vdso]"; +#ifndef __tilegx__ + if (vma->vm_start == MEM_USER_INTRPT) + return "[intrpt]"; +#endif + return NULL; +} + +struct vm_area_struct *get_gate_vma(struct mm_struct *mm) +{ + return NULL; +} + +int in_gate_area(struct mm_struct *mm, unsigned long address) +{ + return 0; +} + +int in_gate_area_no_mm(unsigned long address) +{ + return 0; +} + +int setup_vdso_pages(void) +{ + struct page **pagelist; + unsigned long pages; + struct mm_struct *mm = current->mm; + unsigned long vdso_base = 0; + int retval = 0; + + if (!vdso_ready) + return 0; + + mm->context.vdso_base = 0; + + pagelist = vdso_pagelist; + pages = vdso_pages; +#ifdef CONFIG_COMPAT + if (is_compat_task()) { + pagelist = vdso32_pagelist; + pages = vdso32_pages; + } +#endif + + /* + * vDSO has a problem and was disabled, just don't "enable" it for the + * process. + */ + if (pages == 0) + return 0; + + vdso_base = get_unmapped_area(NULL, vdso_base, + (pages << PAGE_SHIFT) + + ((VDSO_ALIGNMENT - 1) & PAGE_MASK), + 0, 0); + if (IS_ERR_VALUE(vdso_base)) { + retval = vdso_base; + return retval; + } + + /* Add required alignment. */ + vdso_base = ALIGN(vdso_base, VDSO_ALIGNMENT); + + /* + * Put vDSO base into mm struct. We need to do this before calling + * install_special_mapping or the perf counter mmap tracking code + * will fail to recognise it as a vDSO (since arch_vma_name fails). + */ + mm->context.vdso_base = vdso_base; + + /* + * our vma flags don't have VM_WRITE so by default, the process isn't + * allowed to write those pages. + * gdb can break that with ptrace interface, and thus trigger COW on + * those pages but it's then your responsibility to never do that on + * the "data" page of the vDSO or you'll stop getting kernel updates + * and your nice userland gettimeofday will be totally dead. + * It's fine to use that for setting breakpoints in the vDSO code + * pages though + */ + retval = install_special_mapping(mm, vdso_base, + pages << PAGE_SHIFT, + VM_READ|VM_EXEC | + VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC, + pagelist); + if (retval) + mm->context.vdso_base = 0; + + return retval; +} + +static __init int vdso_func(char *s) +{ + return kstrtouint(s, 0, &vdso_enabled); +} +__setup("vdso=", vdso_func); diff --git a/arch/tile/kernel/vdso/Makefile b/arch/tile/kernel/vdso/Makefile new file mode 100644 index 00000000000..e2b7a2f4ee4 --- /dev/null +++ b/arch/tile/kernel/vdso/Makefile @@ -0,0 +1,118 @@ +# Symbols present in the vdso +vdso-syms = rt_sigreturn gettimeofday + +# Files to link into the vdso +obj-vdso = $(patsubst %, v%.o, $(vdso-syms)) + +# Build rules +targets := $(obj-vdso) vdso.so vdso.so.dbg vdso.lds +obj-vdso := $(addprefix $(obj)/, $(obj-vdso)) + +# vdso32 is only for tilegx -m32 compat task. +VDSO32-$(CONFIG_COMPAT) := y + +obj-y += vdso.o +obj-$(VDSO32-y) += vdso32.o +extra-y += vdso.lds +CPPFLAGS_vdso.lds += -P -C -U$(ARCH) + +# vDSO code runs in userspace and -pg doesn't help with profiling anyway. +CFLAGS_REMOVE_vdso.o = -pg +CFLAGS_REMOVE_vdso32.o = -pg +CFLAGS_REMOVE_vrt_sigreturn.o = -pg +CFLAGS_REMOVE_vrt_sigreturn32.o = -pg +CFLAGS_REMOVE_vgettimeofday.o = -pg +CFLAGS_REMOVE_vgettimeofday32.o = -pg + +ifdef CONFIG_FEEDBACK_COLLECT +# vDSO code runs in userspace, not collecting feedback data. +CFLAGS_REMOVE_vdso.o = -ffeedback-generate +CFLAGS_REMOVE_vdso32.o = -ffeedback-generate +CFLAGS_REMOVE_vrt_sigreturn.o = -ffeedback-generate +CFLAGS_REMOVE_vrt_sigreturn32.o = -ffeedback-generate +CFLAGS_REMOVE_vgettimeofday.o = -ffeedback-generate +CFLAGS_REMOVE_vgettimeofday32.o = -ffeedback-generate +endif + +# Disable gcov profiling for VDSO code +GCOV_PROFILE := n + +# Force dependency +$(obj)/vdso.o: $(obj)/vdso.so + +# link rule for the .so file, .lds has to be first +SYSCFLAGS_vdso.so.dbg = $(c_flags) +$(obj)/vdso.so.dbg: $(src)/vdso.lds $(obj-vdso) + $(call if_changed,vdsold) + + +# We also create a special relocatable object that should mirror the symbol +# table and layout of the linked DSO. With ld -R we can then refer to +# these symbols in the kernel code rather than hand-coded addresses. +extra-y += vdso-syms.o +$(obj)/built-in.o: $(obj)/vdso-syms.o +$(obj)/built-in.o: ld_flags += -R $(obj)/vdso-syms.o + +SYSCFLAGS_vdso.so.dbg = -shared -s -Wl,-soname=linux-vdso.so.1 \ + $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) +SYSCFLAGS_vdso_syms.o = -r +$(obj)/vdso-syms.o: $(src)/vdso.lds $(obj)/vrt_sigreturn.o FORCE + $(call if_changed,vdsold) + + +# strip rule for the .so file +$(obj)/%.so: OBJCOPYFLAGS := -S +$(obj)/%.so: $(obj)/%.so.dbg FORCE + $(call if_changed,objcopy) + +# actual build commands +# The DSO images are built using a special linker script +# Add -lgcc so tilepro gets static muldi3 and lshrdi3 definitions. +# Make sure only to export the intended __vdso_xxx symbol offsets. +quiet_cmd_vdsold = VDSOLD $@ + cmd_vdsold = $(CC) $(KCFLAGS) -nostdlib $(SYSCFLAGS_$(@F)) \ + -Wl,-T,$(filter-out FORCE,$^) -o $@.tmp -lgcc && \ + $(CROSS_COMPILE)objcopy \ + $(patsubst %, -G __vdso_%, $(vdso-syms)) $@.tmp $@ + +# install commands for the unstripped file +quiet_cmd_vdso_install = INSTALL $@ + cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/$@ + +vdso.so: $(obj)/vdso.so.dbg + @mkdir -p $(MODLIB)/vdso + $(call cmd,vdso_install) + +vdso32.so: $(obj)/vdso32.so.dbg + $(call cmd,vdso_install) + +vdso_install: vdso.so +vdso32_install: vdso32.so + + +KBUILD_AFLAGS_32 := $(filter-out -m64,$(KBUILD_AFLAGS)) +KBUILD_AFLAGS_32 += -m32 -s +KBUILD_CFLAGS_32 := $(filter-out -m64,$(KBUILD_CFLAGS)) +KBUILD_CFLAGS_32 += -m32 -fPIC -shared + +obj-vdso32 = $(patsubst %, v%32.o, $(vdso-syms)) +obj-vdso32 := $(addprefix $(obj)/, $(obj-vdso32)) + +targets += $(obj-vdso32) vdso32.so vdso32.so.dbg + +$(obj-vdso32:%=%): KBUILD_AFLAGS = $(KBUILD_AFLAGS_32) +$(obj-vdso32:%=%): KBUILD_CFLAGS = $(KBUILD_CFLAGS_32) + +$(obj)/vgettimeofday32.o: $(obj)/vgettimeofday.c + $(call if_changed,cc_o_c) + +$(obj)/vrt_sigreturn32.o: $(obj)/vrt_sigreturn.S + $(call if_changed,as_o_S) + +# Force dependency +$(obj)/vdso32.o: $(obj)/vdso32.so + +SYSCFLAGS_vdso32.so.dbg = -m32 -shared -s -Wl,-soname=linux-vdso32.so.1 \ + $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) +$(obj)/vdso32.so.dbg: $(src)/vdso.lds $(obj-vdso32) + $(call if_changed,vdsold) diff --git a/arch/tile/kernel/vdso/vdso.S b/arch/tile/kernel/vdso/vdso.S new file mode 100644 index 00000000000..3467adb4163 --- /dev/null +++ b/arch/tile/kernel/vdso/vdso.S @@ -0,0 +1,28 @@ +/* + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#include +#include +#include + + __PAGE_ALIGNED_DATA + + .global vdso_start, vdso_end + .align PAGE_SIZE +vdso_start: + .incbin "arch/tile/kernel/vdso/vdso.so" + .align PAGE_SIZE +vdso_end: + + .previous diff --git a/arch/tile/kernel/vdso/vdso.lds.S b/arch/tile/kernel/vdso/vdso.lds.S new file mode 100644 index 00000000000..041cd6c39c8 --- /dev/null +++ b/arch/tile/kernel/vdso/vdso.lds.S @@ -0,0 +1,87 @@ +/* + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#define VDSO_VERSION_STRING LINUX_2.6 + + +OUTPUT_ARCH(tile) + +/* The ELF entry point can be used to set the AT_SYSINFO value. */ +ENTRY(__vdso_rt_sigreturn); + + +SECTIONS +{ + . = SIZEOF_HEADERS; + + .hash : { *(.hash) } :text + .gnu.hash : { *(.gnu.hash) } + .dynsym : { *(.dynsym) } + .dynstr : { *(.dynstr) } + .gnu.version : { *(.gnu.version) } + .gnu.version_d : { *(.gnu.version_d) } + .gnu.version_r : { *(.gnu.version_r) } + + .note : { *(.note.*) } :text :note + .dynamic : { *(.dynamic) } :text :dynamic + + .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr + .eh_frame : { KEEP (*(.eh_frame)) } :text + + .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) } + + /* + * This linker script is used both with -r and with -shared. + * For the layouts to match, we need to skip more than enough + * space for the dynamic symbol table et al. If this amount + * is insufficient, ld -shared will barf. Just increase it here. + */ + . = 0x1000; + .text : { *(.text .text.*) } :text + + .data : { + *(.got.plt) *(.got) + *(.data .data.* .gnu.linkonce.d.*) + *(.dynbss) + *(.bss .bss.* .gnu.linkonce.b.*) + } +} + + +/* + * We must supply the ELF program headers explicitly to get just one + * PT_LOAD segment, and set the flags explicitly to make segments read-only. + */ +PHDRS +{ + text PT_LOAD FLAGS(5) FILEHDR PHDRS; /* PF_R|PF_X */ + dynamic PT_DYNAMIC FLAGS(4); /* PF_R */ + note PT_NOTE FLAGS(4); /* PF_R */ + eh_frame_hdr PT_GNU_EH_FRAME; +} + + +/* + * This controls what userland symbols we export from the vDSO. + */ +VERSION +{ + VDSO_VERSION_STRING { + global: + __vdso_rt_sigreturn; + __vdso_gettimeofday; + gettimeofday; + local:*; + }; +} diff --git a/arch/tile/kernel/vdso/vdso32.S b/arch/tile/kernel/vdso/vdso32.S new file mode 100644 index 00000000000..1d1ac3257e1 --- /dev/null +++ b/arch/tile/kernel/vdso/vdso32.S @@ -0,0 +1,28 @@ +/* + * Copyright 2013 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#include +#include +#include + + __PAGE_ALIGNED_DATA + + .global vdso32_start, vdso32_end + .align PAGE_SIZE +vdso32_start: + .incbin "arch/tile/kernel/vdso/vdso32.so" + .align PAGE_SIZE +vdso32_end: + + .previous diff --git a/arch/tile/kernel/vdso/vgettimeofday.c b/arch/tile/kernel/vdso/vgettimeofday.c new file mode 100644 index 00000000000..51ec8e46f5f --- /dev/null +++ b/arch/tile/kernel/vdso/vgettimeofday.c @@ -0,0 +1,107 @@ +/* + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#define VDSO_BUILD /* avoid some shift warnings for -m32 in */ +#include +#include +#include + +#if CHIP_HAS_SPLIT_CYCLE() +static inline cycles_t get_cycles_inline(void) +{ + unsigned int high = __insn_mfspr(SPR_CYCLE_HIGH); + unsigned int low = __insn_mfspr(SPR_CYCLE_LOW); + unsigned int high2 = __insn_mfspr(SPR_CYCLE_HIGH); + + while (unlikely(high != high2)) { + low = __insn_mfspr(SPR_CYCLE_LOW); + high = high2; + high2 = __insn_mfspr(SPR_CYCLE_HIGH); + } + + return (((cycles_t)high) << 32) | low; +} +#define get_cycles get_cycles_inline +#endif + +/* + * Find out the vDSO data page address in the process address space. + */ +inline unsigned long get_datapage(void) +{ + unsigned long ret; + + /* vdso data page located in the 2nd vDSO page. */ + asm volatile ("lnk %0" : "=r"(ret)); + ret &= ~(PAGE_SIZE - 1); + ret += PAGE_SIZE; + + return ret; +} + +int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz) +{ + cycles_t cycles; + unsigned long count, sec, ns; + volatile struct vdso_data *vdso_data; + + vdso_data = (struct vdso_data *)get_datapage(); + /* The use of the timezone is obsolete, normally tz is NULL. */ + if (unlikely(tz != NULL)) { + while (1) { + /* Spin until the update finish. */ + count = vdso_data->tz_update_count; + if (count & 1) + continue; + + tz->tz_minuteswest = vdso_data->tz_minuteswest; + tz->tz_dsttime = vdso_data->tz_dsttime; + + /* Check whether updated, read again if so. */ + if (count == vdso_data->tz_update_count) + break; + } + } + + if (unlikely(tv == NULL)) + return 0; + + while (1) { + /* Spin until the update finish. */ + count = vdso_data->tb_update_count; + if (count & 1) + continue; + + cycles = (get_cycles() - vdso_data->xtime_tod_stamp); + ns = (cycles * vdso_data->mult) >> vdso_data->shift; + sec = vdso_data->xtime_clock_sec; + ns += vdso_data->xtime_clock_nsec; + if (ns >= NSEC_PER_SEC) { + ns -= NSEC_PER_SEC; + sec += 1; + } + + /* Check whether updated, read again if so. */ + if (count == vdso_data->tb_update_count) + break; + } + + tv->tv_sec = sec; + tv->tv_usec = ns / 1000; + + return 0; +} + +int gettimeofday(struct timeval *tv, struct timezone *tz) + __attribute__((weak, alias("__vdso_gettimeofday"))); diff --git a/arch/tile/kernel/vdso/vrt_sigreturn.S b/arch/tile/kernel/vdso/vrt_sigreturn.S new file mode 100644 index 00000000000..6326caf4a03 --- /dev/null +++ b/arch/tile/kernel/vdso/vrt_sigreturn.S @@ -0,0 +1,30 @@ +/* + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#include +#include +#include + +/* + * Note that libc has a copy of this function that it uses to compare + * against the PC when a stack backtrace ends, so if this code is + * changed, the libc implementation(s) should also be updated. + */ +ENTRY(__vdso_rt_sigreturn) + moveli TREG_SYSCALL_NR_NAME, __NR_rt_sigreturn + swint1 + /* We don't use ENDPROC to avoid tagging this symbol as FUNC, + * which confuses the perf tool. + */ + END(__vdso_rt_sigreturn) diff --git a/arch/tile/mm/elf.c b/arch/tile/mm/elf.c index 1691b81b2b0..23f044e8a7a 100644 --- a/arch/tile/mm/elf.c +++ b/arch/tile/mm/elf.c @@ -21,6 +21,7 @@ #include #include #include +#include #include /* Notify a running simulator, if any, that an exec just occurred. */ @@ -102,37 +103,10 @@ static void sim_notify_interp(unsigned long load_addr) } -/* Kernel address of page used to map read-only kernel data into userspace. */ -static void *vdso_page; - -/* One-entry array used for install_special_mapping. */ -static struct page *vdso_pages[1]; - -static int __init vdso_setup(void) -{ - vdso_page = (void *)get_zeroed_page(GFP_ATOMIC); - memcpy(vdso_page, __rt_sigreturn, __rt_sigreturn_end - __rt_sigreturn); - vdso_pages[0] = virt_to_page(vdso_page); - return 0; -} -device_initcall(vdso_setup); - -const char *arch_vma_name(struct vm_area_struct *vma) -{ - if (vma->vm_private_data == vdso_pages) - return "[vdso]"; -#ifndef __tilegx__ - if (vma->vm_start == MEM_USER_INTRPT) - return "[intrpt]"; -#endif - return NULL; -} - int arch_setup_additional_pages(struct linux_binprm *bprm, int executable_stack) { struct mm_struct *mm = current->mm; - unsigned long vdso_base; int retval = 0; down_write(&mm->mmap_sem); @@ -145,14 +119,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, if (!notify_exec(mm)) sim_notify_exec(bprm->filename); - /* - * MAYWRITE to allow gdb to COW and set breakpoints - */ - vdso_base = VDSO_BASE; - retval = install_special_mapping(mm, vdso_base, PAGE_SIZE, - VM_READ|VM_EXEC| - VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, - vdso_pages); + retval = setup_vdso_pages(); #ifndef __tilegx__ /* -- cgit v1.2.3-70-g09d2 From 35f059761c5ac313d13372fe3cdaa41bce3d0dbf Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Sat, 10 Aug 2013 12:35:02 -0400 Subject: tilegx: change how we find the kernel stack Previously, we used a special-purpose register (SPR_SYSTEM_SAVE_K_0) to hold the CPU number and the top of the current kernel stack by using the low bits to hold the CPU number, and using the high bits to hold the address of the page just above where we'd want the kernel stack to be. That way we could initialize a new SP when first entering the kernel by just masking the SPR value and subtracting a couple of words. However, it's actually more useful to be able to place an arbitrary kernel-top value in the SPR. This allows us to create a new stack context (e.g. for virtualization) with an arbitrary top-of-stack VA. To make this work, we now store the CPU number in the high bits, above the highest legal VA bit (42 bits in the current tilegx microarchitecture). The full 42 bits are thus available to store the top of stack value. Getting the current cpu (a relatively common operation) is still fast; it's now a shift rather than a mask. We make this change only for tilegx, since tilepro has too few SPR bits to do this, and we don't need this support on tilepro anyway. Signed-off-by: Chris Metcalf --- arch/tile/include/asm/processor.h | 47 +++++++++++++++++++++++++++------------ arch/tile/kernel/head_32.S | 3 +-- arch/tile/kernel/head_64.S | 6 ++--- arch/tile/kernel/intvec_32.S | 7 ++++-- arch/tile/kernel/intvec_64.S | 21 +++++++++-------- arch/tile/kernel/stack.c | 10 ++++----- 6 files changed, 57 insertions(+), 37 deletions(-) (limited to 'arch/tile/kernel/stack.c') diff --git a/arch/tile/include/asm/processor.h b/arch/tile/include/asm/processor.h index 461322b473b..230b830e94d 100644 --- a/arch/tile/include/asm/processor.h +++ b/arch/tile/include/asm/processor.h @@ -148,9 +148,10 @@ struct thread_struct { /* * Start with "sp" this many bytes below the top of the kernel stack. - * This preserves the invariant that a called function may write to *sp. + * This allows us to be cache-aware when handling the initial save + * of the pt_regs value to the stack. */ -#define STACK_TOP_DELTA 8 +#define STACK_TOP_DELTA 64 /* * When entering the kernel via a fault, start with the top of the @@ -234,15 +235,15 @@ extern int do_work_pending(struct pt_regs *regs, u32 flags); unsigned long get_wchan(struct task_struct *p); /* Return initial ksp value for given task. */ -#define task_ksp0(task) ((unsigned long)(task)->stack + THREAD_SIZE) +#define task_ksp0(task) \ + ((unsigned long)(task)->stack + THREAD_SIZE - STACK_TOP_DELTA) /* Return some info about the user process TASK. */ -#define KSTK_TOP(task) (task_ksp0(task) - STACK_TOP_DELTA) #define task_pt_regs(task) \ - ((struct pt_regs *)(task_ksp0(task) - KSTK_PTREGS_GAP) - 1) + ((struct pt_regs *)(task_ksp0(task) - KSTK_PTREGS_GAP) - 1) #define current_pt_regs() \ - ((struct pt_regs *)((stack_pointer | (THREAD_SIZE - 1)) - \ - (KSTK_PTREGS_GAP - 1)) - 1) + ((struct pt_regs *)((stack_pointer | (THREAD_SIZE - 1)) - \ + STACK_TOP_DELTA - (KSTK_PTREGS_GAP - 1)) - 1) #define task_sp(task) (task_pt_regs(task)->sp) #define task_pc(task) (task_pt_regs(task)->pc) /* Aliases for pc and sp (used in fs/proc/array.c) */ @@ -355,20 +356,38 @@ extern int kdata_huge; #define KERNEL_PL CONFIG_KERNEL_PL /* SYSTEM_SAVE_K_0 holds the current cpu number ORed with ksp0. */ -#define CPU_LOG_MASK_VALUE 12 -#define CPU_MASK_VALUE ((1 << CPU_LOG_MASK_VALUE) - 1) -#if CONFIG_NR_CPUS > CPU_MASK_VALUE -# error Too many cpus! +#ifdef __tilegx__ +#define CPU_SHIFT 48 +#if CHIP_VA_WIDTH() > CPU_SHIFT +# error Too many VA bits! #endif +#define MAX_CPU_ID ((1 << (64 - CPU_SHIFT)) - 1) #define raw_smp_processor_id() \ - ((int)__insn_mfspr(SPR_SYSTEM_SAVE_K_0) & CPU_MASK_VALUE) + ((int)(__insn_mfspr(SPR_SYSTEM_SAVE_K_0) >> CPU_SHIFT)) #define get_current_ksp0() \ - (__insn_mfspr(SPR_SYSTEM_SAVE_K_0) & ~CPU_MASK_VALUE) + ((unsigned long)(((long)__insn_mfspr(SPR_SYSTEM_SAVE_K_0) << \ + (64 - CPU_SHIFT)) >> (64 - CPU_SHIFT))) +#define next_current_ksp0(task) ({ \ + unsigned long __ksp0 = task_ksp0(task) & ((1UL << CPU_SHIFT) - 1); \ + unsigned long __cpu = (long)raw_smp_processor_id() << CPU_SHIFT; \ + __ksp0 | __cpu; \ +}) +#else +#define LOG2_NR_CPU_IDS 6 +#define MAX_CPU_ID ((1 << LOG2_NR_CPU_IDS) - 1) +#define raw_smp_processor_id() \ + ((int)__insn_mfspr(SPR_SYSTEM_SAVE_K_0) & MAX_CPU_ID) +#define get_current_ksp0() \ + (__insn_mfspr(SPR_SYSTEM_SAVE_K_0) & ~MAX_CPU_ID) #define next_current_ksp0(task) ({ \ unsigned long __ksp0 = task_ksp0(task); \ int __cpu = raw_smp_processor_id(); \ - BUG_ON(__ksp0 & CPU_MASK_VALUE); \ + BUG_ON(__ksp0 & MAX_CPU_ID); \ __ksp0 | __cpu; \ }) +#endif +#if CONFIG_NR_CPUS > (MAX_CPU_ID + 1) +# error Too many cpus! +#endif #endif /* _ASM_TILE_PROCESSOR_H */ diff --git a/arch/tile/kernel/head_32.S b/arch/tile/kernel/head_32.S index d1527fce286..f3f17b0283f 100644 --- a/arch/tile/kernel/head_32.S +++ b/arch/tile/kernel/head_32.S @@ -86,7 +86,7 @@ ENTRY(_start) /* * Load up our per-cpu offset. When the first (master) tile * boots, this value is still zero, so we will load boot_pc - * with start_kernel, and boot_sp with init_stack + THREAD_SIZE. + * with start_kernel, and boot_sp at the top of init_stack. * The master tile initializes the per-cpu offset array, so that * when subsequent (secondary) tiles boot, they will instead load * from their per-cpu versions of boot_sp and boot_pc. @@ -126,7 +126,6 @@ ENTRY(_start) lw sp, r1 or r4, sp, r4 mtspr SPR_SYSTEM_SAVE_K_0, r4 /* save ksp0 + cpu */ - addi sp, sp, -STACK_TOP_DELTA { move lr, zero /* stop backtraces in the called function */ jr r0 diff --git a/arch/tile/kernel/head_64.S b/arch/tile/kernel/head_64.S index 969e4f81f3b..652b8142615 100644 --- a/arch/tile/kernel/head_64.S +++ b/arch/tile/kernel/head_64.S @@ -158,7 +158,7 @@ ENTRY(_start) /* * Load up our per-cpu offset. When the first (master) tile * boots, this value is still zero, so we will load boot_pc - * with start_kernel, and boot_sp with init_stack + THREAD_SIZE. + * with start_kernel, and boot_sp with at the top of init_stack. * The master tile initializes the per-cpu offset array, so that * when subsequent (secondary) tiles boot, they will instead load * from their per-cpu versions of boot_sp and boot_pc. @@ -202,9 +202,9 @@ ENTRY(_start) } ld r0, r0 ld sp, r1 - or r4, sp, r4 + shli r4, r4, CPU_SHIFT + bfins r4, sp, 0, CPU_SHIFT-1 mtspr SPR_SYSTEM_SAVE_K_0, r4 /* save ksp0 + cpu */ - addi sp, sp, -STACK_TOP_DELTA { move lr, zero /* stop backtraces in the called function */ jr r0 diff --git a/arch/tile/kernel/intvec_32.S b/arch/tile/kernel/intvec_32.S index 9c0c3cb6aab..f3d26f48e65 100644 --- a/arch/tile/kernel/intvec_32.S +++ b/arch/tile/kernel/intvec_32.S @@ -185,7 +185,7 @@ intvec_\vecname: * point sp at the top aligned address on the actual stack page. */ mfspr r0, SPR_SYSTEM_SAVE_K_0 - mm r0, r0, zero, LOG2_THREAD_SIZE, 31 + mm r0, r0, zero, LOG2_NR_CPU_IDS, 31 0: /* @@ -203,6 +203,9 @@ intvec_\vecname: * cache line 1: r14...r29 * cache line 0: 2 x frame, r0..r13 */ +#if STACK_TOP_DELTA != 64 +#error STACK_TOP_DELTA must be 64 for assumptions here and in task_pt_regs() +#endif andi r0, r0, -64 /* @@ -464,7 +467,7 @@ intvec_\vecname: } { auli r21, r21, ha16(__per_cpu_offset) - mm r20, r20, zero, 0, LOG2_THREAD_SIZE-1 + mm r20, r20, zero, 0, LOG2_NR_CPU_IDS-1 } s2a r20, r20, r21 lw tp, r20 diff --git a/arch/tile/kernel/intvec_64.S b/arch/tile/kernel/intvec_64.S index df19d4f3946..3b35bb490d3 100644 --- a/arch/tile/kernel/intvec_64.S +++ b/arch/tile/kernel/intvec_64.S @@ -132,13 +132,9 @@ intvec_\vecname: mfspr r3, SPR_SYSTEM_SAVE_K_0 /* Get &thread_info->unalign_jit_tmp[0] in r3. */ + bfexts r3, r3, 0, CPU_SHIFT-1 mm r3, zero, LOG2_THREAD_SIZE, 63 -#if THREAD_SIZE < 65536 - addli r3, r3, -(PAGE_SIZE - THREAD_INFO_UNALIGN_JIT_TMP_OFFSET) -#else - addli r3, r3, -(PAGE_SIZE/2) - addli r3, r3, -(PAGE_SIZE/2 - THREAD_INFO_UNALIGN_JIT_TMP_OFFSET) -#endif + addli r3, r3, THREAD_INFO_UNALIGN_JIT_TMP_OFFSET /* * Save r0, r1, r2 into thread_info array r3 points to @@ -365,13 +361,13 @@ intvec_\vecname: 2: /* - * SYSTEM_SAVE_K_0 holds the cpu number in the low bits, and - * the current stack top in the higher bits. So we recover - * our stack top by just masking off the low bits, then + * SYSTEM_SAVE_K_0 holds the cpu number in the high bits, and + * the current stack top in the lower bits. So we recover + * our starting stack value by sign-extending the low bits, then * point sp at the top aligned address on the actual stack page. */ mfspr r0, SPR_SYSTEM_SAVE_K_0 - mm r0, zero, LOG2_THREAD_SIZE, 63 + bfexts r0, r0, 0, CPU_SHIFT-1 0: /* @@ -393,6 +389,9 @@ intvec_\vecname: * cache line 1: r6...r13 * cache line 0: 2 x frame, r0..r5 */ +#if STACK_TOP_DELTA != 64 +#error STACK_TOP_DELTA must be 64 for assumptions here and in task_pt_regs() +#endif andi r0, r0, -64 /* @@ -690,7 +689,7 @@ intvec_\vecname: } { shl16insli r21, r21, hw1(__per_cpu_offset) - bfextu r20, r20, 0, LOG2_THREAD_SIZE-1 + bfextu r20, r20, CPU_SHIFT, 63 } shl16insli r21, r21, hw0(__per_cpu_offset) shl3add r20, r20, r21 diff --git a/arch/tile/kernel/stack.c b/arch/tile/kernel/stack.c index a9db923bb9e..24fd223df65 100644 --- a/arch/tile/kernel/stack.c +++ b/arch/tile/kernel/stack.c @@ -197,19 +197,19 @@ static void validate_stack(struct pt_regs *regs) { int cpu = raw_smp_processor_id(); unsigned long ksp0 = get_current_ksp0(); - unsigned long ksp0_base = ksp0 - THREAD_SIZE; + unsigned long ksp0_base = ksp0 & -THREAD_SIZE; unsigned long sp = stack_pointer; if (EX1_PL(regs->ex1) == KERNEL_PL && regs->sp >= ksp0) { - pr_err("WARNING: cpu %d: kernel stack page %#lx underrun!\n" + pr_err("WARNING: cpu %d: kernel stack %#lx..%#lx underrun!\n" " sp %#lx (%#lx in caller), caller pc %#lx, lr %#lx\n", - cpu, ksp0_base, sp, regs->sp, regs->pc, regs->lr); + cpu, ksp0_base, ksp0, sp, regs->sp, regs->pc, regs->lr); } else if (sp < ksp0_base + sizeof(struct thread_info)) { - pr_err("WARNING: cpu %d: kernel stack page %#lx overrun!\n" + pr_err("WARNING: cpu %d: kernel stack %#lx..%#lx overrun!\n" " sp %#lx (%#lx in caller), caller pc %#lx, lr %#lx\n", - cpu, ksp0_base, sp, regs->sp, regs->pc, regs->lr); + cpu, ksp0_base, ksp0, sp, regs->sp, regs->pc, regs->lr); } } -- cgit v1.2.3-70-g09d2 From 051168df528fe4456d63f5f65b041c147c26fe97 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Tue, 3 Sep 2013 14:45:52 -0400 Subject: tile: don't assume user privilege is zero Technically, user privilege is anything less than kernel privilege. We modify the existing user_mode() macro to have this semantic (and use it in a couple of places it wasn't being used before), and add an IS_KERNEL_EX1() macro to the assembly code as well. Signed-off-by: Chris Metcalf --- arch/tile/include/asm/processor.h | 4 ++-- arch/tile/include/asm/ptrace.h | 2 +- arch/tile/kernel/intvec_64.S | 23 +++++++++++++++++------ arch/tile/kernel/stack.c | 2 +- arch/tile/mm/fault.c | 4 ++-- 5 files changed, 23 insertions(+), 12 deletions(-) (limited to 'arch/tile/kernel/stack.c') diff --git a/arch/tile/include/asm/processor.h b/arch/tile/include/asm/processor.h index 230b830e94d..c72fcba7016 100644 --- a/arch/tile/include/asm/processor.h +++ b/arch/tile/include/asm/processor.h @@ -15,6 +15,8 @@ #ifndef _ASM_TILE_PROCESSOR_H #define _ASM_TILE_PROCESSOR_H +#include + #ifndef __ASSEMBLY__ /* @@ -25,7 +27,6 @@ #include #include -#include #include struct task_struct; @@ -347,7 +348,6 @@ extern int kdata_huge; /* * Provide symbolic constants for PLs. - * Note that assembly code assumes that USER_PL is zero. */ #define USER_PL 0 #if CONFIG_KERNEL_PL == 2 diff --git a/arch/tile/include/asm/ptrace.h b/arch/tile/include/asm/ptrace.h index 0d25c21bcd6..b9620c077ab 100644 --- a/arch/tile/include/asm/ptrace.h +++ b/arch/tile/include/asm/ptrace.h @@ -39,7 +39,7 @@ typedef unsigned long pt_reg_t; #define user_stack_pointer(regs) ((regs)->sp) /* Does the process account for user or for system time? */ -#define user_mode(regs) (EX1_PL((regs)->ex1) == USER_PL) +#define user_mode(regs) (EX1_PL((regs)->ex1) < KERNEL_PL) /* Fill in a struct pt_regs with the current kernel registers. */ struct pt_regs *get_pt_regs(struct pt_regs *); diff --git a/arch/tile/kernel/intvec_64.S b/arch/tile/kernel/intvec_64.S index 3b35bb490d3..f020f01960c 100644 --- a/arch/tile/kernel/intvec_64.S +++ b/arch/tile/kernel/intvec_64.S @@ -34,6 +34,16 @@ #define PTREGS_OFFSET_SYSCALL PTREGS_OFFSET_REG(TREG_SYSCALL_NR) +#if CONFIG_KERNEL_PL == 1 || CONFIG_KERNEL_PL == 2 +/* + * Set "result" non-zero if ex1 holds the PL of the kernel + * (with or without ICS being set). Note this works only + * because we never find the PL at level 3. + */ +# define IS_KERNEL_EX1(result, ex1) andi result, ex1, CONFIG_KERNEL_PL +#else +# error Recode IS_KERNEL_EX1 for CONFIG_KERNEL_PL +#endif .macro push_reg reg, ptr=sp, delta=-8 { @@ -308,7 +318,7 @@ intvec_\vecname: */ { blbs sp, 2f - andi r0, r0, SPR_EX_CONTEXT_1_1__PL_MASK /* mask off ICS */ + IS_KERNEL_EX1(r0, r0) } .ifc \vecnum, INT_DOUBLE_FAULT @@ -641,11 +651,12 @@ intvec_\vecname: /* * If we will be returning to the kernel, we will need to * reset the interrupt masks to the state they had before. - * Set DISABLE_IRQ in flags iff we came from PL1 with irqs disabled. + * Set DISABLE_IRQ in flags iff we came from kernel pl with + * irqs disabled. */ mfspr r32, SPR_EX_CONTEXT_K_1 { - andi r32, r32, SPR_EX_CONTEXT_1_1__PL_MASK /* mask off ICS */ + IS_KERNEL_EX1(r22, r22) PTREGS_PTR(r21, PTREGS_OFFSET_FLAGS) } beqzt r32, 1f /* zero if from user space */ @@ -812,7 +823,7 @@ STD_ENTRY(interrupt_return) PTREGS_PTR(r29, PTREGS_OFFSET_EX1) } ld r29, r29 - andi r29, r29, SPR_EX_CONTEXT_1_1__PL_MASK /* mask off ICS */ + IS_KERNEL_EX1(r29, r29) { beqzt r29, .Lresume_userspace move r29, sp @@ -936,7 +947,7 @@ STD_ENTRY(interrupt_return) PTREGS_PTR(r32, PTREGS_OFFSET_FLAGS) } { - andi r0, r0, SPR_EX_CONTEXT_1_1__PL_MASK + IS_KERNEL_EX1(r0, r0) ld r32, r32 } bnez r0, 1f @@ -1007,7 +1018,7 @@ STD_ENTRY(interrupt_return) pop_reg r21, sp, PTREGS_OFFSET_REG(31) - PTREGS_OFFSET_PC { mtspr SPR_EX_CONTEXT_K_1, lr - andi lr, lr, SPR_EX_CONTEXT_1_1__PL_MASK /* mask off ICS */ + IS_KERNEL_EX1(lr, lr) } { mtspr SPR_EX_CONTEXT_K_0, r21 diff --git a/arch/tile/kernel/stack.c b/arch/tile/kernel/stack.c index 24fd223df65..362284af3af 100644 --- a/arch/tile/kernel/stack.c +++ b/arch/tile/kernel/stack.c @@ -103,7 +103,7 @@ static struct pt_regs *valid_fault_handler(struct KBacktraceIterator* kbt) p->sp >= sp) { if (kbt->verbose) pr_err(" <%s while in kernel mode>\n", fault); - } else if (EX1_PL(p->ex1) == USER_PL && + } else if (user_mode(p) && p->sp < PAGE_OFFSET && p->sp != 0) { if (kbt->verbose) pr_err(" <%s while in user mode>\n", fault); diff --git a/arch/tile/mm/fault.c b/arch/tile/mm/fault.c index 64eec3f9584..39c48cbe0a9 100644 --- a/arch/tile/mm/fault.c +++ b/arch/tile/mm/fault.c @@ -283,7 +283,7 @@ static int handle_page_fault(struct pt_regs *regs, flags = (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE | (write ? FAULT_FLAG_WRITE : 0)); - is_kernel_mode = (EX1_PL(regs->ex1) != USER_PL); + is_kernel_mode = !user_mode(regs); tsk = validate_current(); @@ -824,7 +824,7 @@ void do_page_fault(struct pt_regs *regs, int fault_num, } #if CHIP_HAS_TILE_DMA() || CHIP_HAS_SN_PROC() - if (EX1_PL(regs->ex1) != USER_PL) { + if (!user_mode(regs)) { struct async_tlb *async; switch (fault_num) { #if CHIP_HAS_TILE_DMA() -- cgit v1.2.3-70-g09d2