diff options
author | David S. Miller <davem@davemloft.net> | 2009-12-11 01:26:12 -0800 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2009-12-11 01:26:12 -0800 |
commit | e3f4e1cbc341bc2020241d8119bd078db3ec3b85 (patch) | |
tree | 8b23624cc792f60d0bf86d787c3514cf0fb04418 /arch/sh/kernel | |
parent | adfe67ddffbea51322b118896178bd71aaa4b4d8 (diff) | |
parent | d71cb81af3817193bc605de061da0499934263a6 (diff) |
Merge branch 'master' of /home/davem/src/GIT/linux-2.6/
Diffstat (limited to 'arch/sh/kernel')
49 files changed, 2429 insertions, 1188 deletions
diff --git a/arch/sh/kernel/Makefile b/arch/sh/kernel/Makefile index a2d0a40f384..0471a3eb25e 100644 --- a/arch/sh/kernel/Makefile +++ b/arch/sh/kernel/Makefile @@ -9,8 +9,12 @@ ifdef CONFIG_FUNCTION_TRACER CFLAGS_REMOVE_ftrace.o = -pg endif -obj-y := debugtraps.o dumpstack.o idle.o io.o io_generic.o irq.o \ - machvec.o nmi_debug.o process_$(BITS).o ptrace_$(BITS).o \ +CFLAGS_REMOVE_return_address.o = -pg + +obj-y := debugtraps.o dma-nommu.o dumpstack.o \ + idle.o io.o io_generic.o irq.o \ + irq_$(BITS).o machvec.o nmi_debug.o process_$(BITS).o \ + ptrace_$(BITS).o return_address.o \ setup.o signal_$(BITS).o sys_sh.o sys_sh$(BITS).o \ syscalls_$(BITS).o time.o topology.o traps.o \ traps_$(BITS).o unwinder.o @@ -28,13 +32,13 @@ obj-$(CONFIG_CRASH_DUMP) += crash_dump.o obj-$(CONFIG_STACKTRACE) += stacktrace.o obj-$(CONFIG_IO_TRAPPED) += io_trapped.o obj-$(CONFIG_KPROBES) += kprobes.o -obj-$(CONFIG_GENERIC_GPIO) += gpio.o obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o obj-$(CONFIG_FTRACE_SYSCALLS) += ftrace.o obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o obj-$(CONFIG_DUMP_CODE) += disassemble.o obj-$(CONFIG_HIBERNATION) += swsusp.o obj-$(CONFIG_DWARF_UNWINDER) += dwarf.o +obj-$(CONFIG_PERF_EVENTS) += perf_event.o perf_callchain.o obj-$(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) += localtimer.o diff --git a/arch/sh/kernel/asm-offsets.c b/arch/sh/kernel/asm-offsets.c index d218e808294..08a2be775b6 100644 --- a/arch/sh/kernel/asm-offsets.c +++ b/arch/sh/kernel/asm-offsets.c @@ -34,5 +34,28 @@ int main(void) DEFINE(PBE_NEXT, offsetof(struct pbe, next)); DEFINE(SWSUSP_ARCH_REGS_SIZE, sizeof(struct swsusp_arch_regs)); #endif + + DEFINE(SH_SLEEP_MODE, offsetof(struct sh_sleep_data, mode)); + DEFINE(SH_SLEEP_SF_PRE, offsetof(struct sh_sleep_data, sf_pre)); + DEFINE(SH_SLEEP_SF_POST, offsetof(struct sh_sleep_data, sf_post)); + DEFINE(SH_SLEEP_RESUME, offsetof(struct sh_sleep_data, resume)); + DEFINE(SH_SLEEP_VBR, offsetof(struct sh_sleep_data, vbr)); + DEFINE(SH_SLEEP_SPC, offsetof(struct sh_sleep_data, spc)); + DEFINE(SH_SLEEP_SR, offsetof(struct sh_sleep_data, sr)); + DEFINE(SH_SLEEP_SP, offsetof(struct sh_sleep_data, sp)); + DEFINE(SH_SLEEP_BASE_ADDR, offsetof(struct sh_sleep_data, addr)); + DEFINE(SH_SLEEP_BASE_DATA, offsetof(struct sh_sleep_data, data)); + DEFINE(SH_SLEEP_REG_STBCR, offsetof(struct sh_sleep_regs, stbcr)); + DEFINE(SH_SLEEP_REG_BAR, offsetof(struct sh_sleep_regs, bar)); + DEFINE(SH_SLEEP_REG_PTEH, offsetof(struct sh_sleep_regs, pteh)); + DEFINE(SH_SLEEP_REG_PTEL, offsetof(struct sh_sleep_regs, ptel)); + DEFINE(SH_SLEEP_REG_TTB, offsetof(struct sh_sleep_regs, ttb)); + DEFINE(SH_SLEEP_REG_TEA, offsetof(struct sh_sleep_regs, tea)); + DEFINE(SH_SLEEP_REG_MMUCR, offsetof(struct sh_sleep_regs, mmucr)); + DEFINE(SH_SLEEP_REG_PTEA, offsetof(struct sh_sleep_regs, ptea)); + DEFINE(SH_SLEEP_REG_PASCR, offsetof(struct sh_sleep_regs, pascr)); + DEFINE(SH_SLEEP_REG_IRMCR, offsetof(struct sh_sleep_regs, irmcr)); + DEFINE(SH_SLEEP_REG_CCR, offsetof(struct sh_sleep_regs, ccr)); + DEFINE(SH_SLEEP_REG_RAMCR, offsetof(struct sh_sleep_regs, ramcr)); return 0; } diff --git a/arch/sh/kernel/cpu/Makefile b/arch/sh/kernel/cpu/Makefile index 3d6b9312dc4..d97c803719e 100644 --- a/arch/sh/kernel/cpu/Makefile +++ b/arch/sh/kernel/cpu/Makefile @@ -15,7 +15,6 @@ obj-$(CONFIG_ARCH_SHMOBILE) += shmobile/ # Common interfaces. -obj-$(CONFIG_UBC_WAKEUP) += ubc.o obj-$(CONFIG_SH_ADC) += adc.o obj-$(CONFIG_SH_CLK_CPG) += clock-cpg.o diff --git a/arch/sh/kernel/cpu/init.c b/arch/sh/kernel/cpu/init.c index e932ebef473..89b4b76c0d7 100644 --- a/arch/sh/kernel/cpu/init.c +++ b/arch/sh/kernel/cpu/init.c @@ -75,16 +75,11 @@ static void __init expmask_init(void) /* * Future proofing. * - * Disable support for slottable sleep instruction - * and non-nop instructions in the rte delay slot. + * Disable support for slottable sleep instruction, non-nop + * instructions in the rte delay slot, and associative writes to + * the memory-mapped cache array. */ - expmask &= ~(EXPMASK_RTEDS | EXPMASK_BRDSSLP); - - /* - * Enable associative writes to the memory-mapped cache array - * until the cache flush ops have been rewritten. - */ - expmask |= EXPMASK_MMCAW; + expmask &= ~(EXPMASK_RTEDS | EXPMASK_BRDSSLP | EXPMASK_MMCAW); __raw_writel(expmask, EXPMASK); ctrl_barrier(); @@ -311,12 +306,12 @@ asmlinkage void __init sh_cpu_init(void) if (fpu_disabled) { printk("FPU Disabled\n"); current_cpu_data.flags &= ~CPU_HAS_FPU; - disable_fpu(); } /* FPU initialization */ + disable_fpu(); if ((current_cpu_data.flags & CPU_HAS_FPU)) { - clear_thread_flag(TIF_USEDFPU); + current_thread_info()->status &= ~TS_USEDFPU; clear_used_math(); } @@ -338,17 +333,6 @@ asmlinkage void __init sh_cpu_init(void) } #endif - /* - * Some brain-damaged loaders decided it would be a good idea to put - * the UBC to sleep. This causes some issues when it comes to things - * like PTRACE_SINGLESTEP or doing hardware watchpoints in GDB. So .. - * we wake it up and hope that all is well. - */ -#ifdef CONFIG_SUPERH32 - if (raw_smp_processor_id() == 0) - ubc_wakeup(); -#endif - speculative_execution_init(); expmask_init(); } diff --git a/arch/sh/kernel/cpu/sh2a/fpu.c b/arch/sh/kernel/cpu/sh2a/fpu.c index 6df2fb98eb3..d395ce5740e 100644 --- a/arch/sh/kernel/cpu/sh2a/fpu.c +++ b/arch/sh/kernel/cpu/sh2a/fpu.c @@ -25,14 +25,12 @@ /* * Save FPU registers onto task structure. - * Assume called with FPU enabled (SR.FD=0). */ void -save_fpu(struct task_struct *tsk, struct pt_regs *regs) +save_fpu(struct task_struct *tsk) { unsigned long dummy; - clear_tsk_thread_flag(tsk, TIF_USEDFPU); enable_fpu(); asm volatile("sts.l fpul, @-%0\n\t" "sts.l fpscr, @-%0\n\t" @@ -60,7 +58,6 @@ save_fpu(struct task_struct *tsk, struct pt_regs *regs) : "memory"); disable_fpu(); - release_fpu(regs); } static void @@ -598,31 +595,31 @@ BUILD_TRAP_HANDLER(fpu_error) struct task_struct *tsk = current; TRAP_HANDLER_DECL; - save_fpu(tsk, regs); + __unlazy_fpu(tsk, regs); if (ieee_fpe_handler(regs)) { tsk->thread.fpu.hard.fpscr &= ~(FPSCR_CAUSE_MASK | FPSCR_FLAG_MASK); grab_fpu(regs); restore_fpu(tsk); - set_tsk_thread_flag(tsk, TIF_USEDFPU); + task_thread_info(tsk)->status |= TS_USEDFPU; return; } force_sig(SIGFPE, tsk); } -BUILD_TRAP_HANDLER(fpu_state_restore) +void fpu_state_restore(struct pt_regs *regs) { struct task_struct *tsk = current; - TRAP_HANDLER_DECL; grab_fpu(regs); - if (!user_mode(regs)) { + if (unlikely(!user_mode(regs))) { printk(KERN_ERR "BUG: FPU is used in kernel mode.\n"); + BUG(); return; } - if (used_math()) { + if (likely(used_math())) { /* Using the FPU again. */ restore_fpu(tsk); } else { @@ -630,5 +627,13 @@ BUILD_TRAP_HANDLER(fpu_state_restore) fpu_init(); set_used_math(); } - set_tsk_thread_flag(tsk, TIF_USEDFPU); + task_thread_info(tsk)->status |= TS_USEDFPU; + tsk->fpu_counter++; +} + +BUILD_TRAP_HANDLER(fpu_state_restore) +{ + TRAP_HANDLER_DECL; + + fpu_state_restore(regs); } diff --git a/arch/sh/kernel/cpu/sh3/entry.S b/arch/sh/kernel/cpu/sh3/entry.S index bb407ef0b91..3f7e2a22c7c 100644 --- a/arch/sh/kernel/cpu/sh3/entry.S +++ b/arch/sh/kernel/cpu/sh3/entry.S @@ -297,41 +297,8 @@ ENTRY(vbr_base) ! .balign 256,0,256 general_exception: -#ifndef CONFIG_CPU_SUBTYPE_SHX3 bra handle_exception sts pr, k3 ! save original pr value in k3 -#else - mov.l 1f, k4 - mov.l @k4, k4 - - ! Is EXPEVT larger than 0x800? - mov #0x8, k0 - shll8 k0 - cmp/hs k0, k4 - bf 0f - - ! then add 0x580 (k2 is 0xd80 or 0xda0) - mov #0x58, k0 - shll2 k0 - shll2 k0 - add k0, k4 -0: - ! Setup stack and save DSP context (k0 contains original r15 on return) - bsr prepare_stack - nop - - ! Save registers / Switch to bank 0 - mov k4, k2 ! keep vector in k2 - mov.l 1f, k4 ! SR bits to clear in k4 - bsr save_regs ! needs original pr value in k3 - nop - - bra handle_exception_special - nop - - .align 2 -1: .long EXPEVT -#endif ! prepare_stack() ! - roll back gRB diff --git a/arch/sh/kernel/cpu/sh4/Makefile b/arch/sh/kernel/cpu/sh4/Makefile index 203b18347b8..3a1dbc70983 100644 --- a/arch/sh/kernel/cpu/sh4/Makefile +++ b/arch/sh/kernel/cpu/sh4/Makefile @@ -9,6 +9,11 @@ obj-$(CONFIG_HIBERNATION) += $(addprefix ../sh3/, swsusp.o) obj-$(CONFIG_SH_FPU) += fpu.o softfloat.o obj-$(CONFIG_SH_STORE_QUEUES) += sq.o +# Perf events +perf-$(CONFIG_CPU_SUBTYPE_SH7750) := perf_event.o +perf-$(CONFIG_CPU_SUBTYPE_SH7750S) := perf_event.o +perf-$(CONFIG_CPU_SUBTYPE_SH7091) := perf_event.o + # CPU subtype setup obj-$(CONFIG_CPU_SUBTYPE_SH7750) += setup-sh7750.o obj-$(CONFIG_CPU_SUBTYPE_SH7750R) += setup-sh7750.o @@ -27,4 +32,5 @@ endif # Additional clocks by subtype clock-$(CONFIG_CPU_SUBTYPE_SH4_202) += clock-sh4-202.o -obj-y += $(clock-y) +obj-y += $(clock-y) +obj-$(CONFIG_PERF_EVENTS) += $(perf-y) diff --git a/arch/sh/kernel/cpu/sh4/fpu.c b/arch/sh/kernel/cpu/sh4/fpu.c index e3ea5411da6..e97857aec8a 100644 --- a/arch/sh/kernel/cpu/sh4/fpu.c +++ b/arch/sh/kernel/cpu/sh4/fpu.c @@ -41,13 +41,11 @@ static unsigned int fpu_exception_flags; /* * Save FPU registers onto task structure. - * Assume called with FPU enabled (SR.FD=0). */ -void save_fpu(struct task_struct *tsk, struct pt_regs *regs) +void save_fpu(struct task_struct *tsk) { unsigned long dummy; - clear_tsk_thread_flag(tsk, TIF_USEDFPU); enable_fpu(); asm volatile ("sts.l fpul, @-%0\n\t" "sts.l fpscr, @-%0\n\t" @@ -92,7 +90,6 @@ void save_fpu(struct task_struct *tsk, struct pt_regs *regs) :"memory"); disable_fpu(); - release_fpu(regs); } static void restore_fpu(struct task_struct *tsk) @@ -285,7 +282,6 @@ static int ieee_fpe_handler(struct pt_regs *regs) /* fcnvsd */ struct task_struct *tsk = current; - save_fpu(tsk, regs); if ((tsk->thread.fpu.hard.fpscr & FPSCR_CAUSE_ERROR)) /* FPU error */ denormal_to_double(&tsk->thread.fpu.hard, @@ -462,7 +458,7 @@ BUILD_TRAP_HANDLER(fpu_error) struct task_struct *tsk = current; TRAP_HANDLER_DECL; - save_fpu(tsk, regs); + __unlazy_fpu(tsk, regs); fpu_exception_flags = 0; if (ieee_fpe_handler(regs)) { tsk->thread.fpu.hard.fpscr &= @@ -473,7 +469,7 @@ BUILD_TRAP_HANDLER(fpu_error) tsk->thread.fpu.hard.fpscr |= (fpu_exception_flags >> 10); grab_fpu(regs); restore_fpu(tsk); - set_tsk_thread_flag(tsk, TIF_USEDFPU); + task_thread_info(tsk)->status |= TS_USEDFPU; if ((((tsk->thread.fpu.hard.fpscr & FPSCR_ENABLE_MASK) >> 7) & (fpu_exception_flags >> 2)) == 0) { return; @@ -483,18 +479,18 @@ BUILD_TRAP_HANDLER(fpu_error) force_sig(SIGFPE, tsk); } -BUILD_TRAP_HANDLER(fpu_state_restore) +void fpu_state_restore(struct pt_regs *regs) { struct task_struct *tsk = current; - TRAP_HANDLER_DECL; grab_fpu(regs); - if (!user_mode(regs)) { + if (unlikely(!user_mode(regs))) { printk(KERN_ERR "BUG: FPU is used in kernel mode.\n"); + BUG(); return; } - if (used_math()) { + if (likely(used_math())) { /* Using the FPU again. */ restore_fpu(tsk); } else { @@ -502,5 +498,13 @@ BUILD_TRAP_HANDLER(fpu_state_restore) fpu_init(); set_used_math(); } - set_tsk_thread_flag(tsk, TIF_USEDFPU); + task_thread_info(tsk)->status |= TS_USEDFPU; + tsk->fpu_counter++; +} + +BUILD_TRAP_HANDLER(fpu_state_restore) +{ + TRAP_HANDLER_DECL; + + fpu_state_restore(regs); } diff --git a/arch/sh/kernel/cpu/sh4/perf_event.c b/arch/sh/kernel/cpu/sh4/perf_event.c new file mode 100644 index 00000000000..7f9ecc9c2d0 --- /dev/null +++ b/arch/sh/kernel/cpu/sh4/perf_event.c @@ -0,0 +1,253 @@ +/* + * Performance events support for SH7750-style performance counters + * + * Copyright (C) 2009 Paul Mundt + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + */ +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/io.h> +#include <linux/irq.h> +#include <linux/perf_event.h> +#include <asm/processor.h> + +#define PM_CR_BASE 0xff000084 /* 16-bit */ +#define PM_CTR_BASE 0xff100004 /* 32-bit */ + +#define PMCR(n) (PM_CR_BASE + ((n) * 0x04)) +#define PMCTRH(n) (PM_CTR_BASE + 0x00 + ((n) * 0x08)) +#define PMCTRL(n) (PM_CTR_BASE + 0x04 + ((n) * 0x08)) + +#define PMCR_PMM_MASK 0x0000003f + +#define PMCR_CLKF 0x00000100 +#define PMCR_PMCLR 0x00002000 +#define PMCR_PMST 0x00004000 +#define PMCR_PMEN 0x00008000 + +static struct sh_pmu sh7750_pmu; + +/* + * There are a number of events supported by each counter (33 in total). + * Since we have 2 counters, each counter will take the event code as it + * corresponds to the PMCR PMM setting. Each counter can be configured + * independently. + * + * Event Code Description + * ---------- ----------- + * + * 0x01 Operand read access + * 0x02 Operand write access + * 0x03 UTLB miss + * 0x04 Operand cache read miss + * 0x05 Operand cache write miss + * 0x06 Instruction fetch (w/ cache) + * 0x07 Instruction TLB miss + * 0x08 Instruction cache miss + * 0x09 All operand accesses + * 0x0a All instruction accesses + * 0x0b OC RAM operand access + * 0x0d On-chip I/O space access + * 0x0e Operand access (r/w) + * 0x0f Operand cache miss (r/w) + * 0x10 Branch instruction + * 0x11 Branch taken + * 0x12 BSR/BSRF/JSR + * 0x13 Instruction execution + * 0x14 Instruction execution in parallel + * 0x15 FPU Instruction execution + * 0x16 Interrupt + * 0x17 NMI + * 0x18 trapa instruction execution + * 0x19 UBCA match + * 0x1a UBCB match + * 0x21 Instruction cache fill + * 0x22 Operand cache fill + * 0x23 Elapsed time + * 0x24 Pipeline freeze by I-cache miss + * 0x25 Pipeline freeze by D-cache miss + * 0x27 Pipeline freeze by branch instruction + * 0x28 Pipeline freeze by CPU register + * 0x29 Pipeline freeze by FPU + */ + +static const int sh7750_general_events[] = { + [PERF_COUNT_HW_CPU_CYCLES] = 0x0023, + [PERF_COUNT_HW_INSTRUCTIONS] = 0x000a, + [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0006, /* I-cache */ + [PERF_COUNT_HW_CACHE_MISSES] = 0x0008, /* I-cache */ + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x0010, + [PERF_COUNT_HW_BRANCH_MISSES] = -1, + [PERF_COUNT_HW_BUS_CYCLES] = -1, +}; + +#define C(x) PERF_COUNT_HW_CACHE_##x + +static const int sh7750_cache_events + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = +{ + [ C(L1D) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0001, + [ C(RESULT_MISS) ] = 0x0004, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x0002, + [ C(RESULT_MISS) ] = 0x0005, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + }, + + [ C(L1I) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0006, + [ C(RESULT_MISS) ] = 0x0008, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + }, + + [ C(LL) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + }, + + [ C(DTLB) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0x0003, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + }, + + [ C(ITLB) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0x0007, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, + + [ C(BPU) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, +}; + +static int sh7750_event_map(int event) +{ + return sh7750_general_events[event]; +} + +static u64 sh7750_pmu_read(int idx) +{ + return (u64)((u64)(__raw_readl(PMCTRH(idx)) & 0xffff) << 32) | + __raw_readl(PMCTRL(idx)); +} + +static void sh7750_pmu_disable(struct hw_perf_event *hwc, int idx) +{ + unsigned int tmp; + + tmp = __raw_readw(PMCR(idx)); + tmp &= ~(PMCR_PMM_MASK | PMCR_PMEN); + __raw_writew(tmp, PMCR(idx)); +} + +static void sh7750_pmu_enable(struct hw_perf_event *hwc, int idx) +{ + __raw_writew(__raw_readw(PMCR(idx)) | PMCR_PMCLR, PMCR(idx)); + __raw_writew(hwc->config | PMCR_PMEN | PMCR_PMST, PMCR(idx)); +} + +static void sh7750_pmu_disable_all(void) +{ + int i; + + for (i = 0; i < sh7750_pmu.num_events; i++) + __raw_writew(__raw_readw(PMCR(i)) & ~PMCR_PMEN, PMCR(i)); +} + +static void sh7750_pmu_enable_all(void) +{ + int i; + + for (i = 0; i < sh7750_pmu.num_events; i++) + __raw_writew(__raw_readw(PMCR(i)) | PMCR_PMEN, PMCR(i)); +} + +static struct sh_pmu sh7750_pmu = { + .name = "SH7750", + .num_events = 2, + .event_map = sh7750_event_map, + .max_events = ARRAY_SIZE(sh7750_general_events), + .raw_event_mask = PMCR_PMM_MASK, + .cache_events = &sh7750_cache_events, + .read = sh7750_pmu_read, + .disable = sh7750_pmu_disable, + .enable = sh7750_pmu_enable, + .disable_all = sh7750_pmu_disable_all, + .enable_all = sh7750_pmu_enable_all, +}; + +static int __init sh7750_pmu_init(void) +{ + /* + * Make sure this CPU actually has perf counters. + */ + if (!(boot_cpu_data.flags & CPU_HAS_PERF_COUNTER)) { + pr_notice("HW perf events unsupported, software events only.\n"); + return -ENODEV; + } + + return register_sh_pmu(&sh7750_pmu); +} +arch_initcall(sh7750_pmu_init); diff --git a/arch/sh/kernel/cpu/sh4a/Makefile b/arch/sh/kernel/cpu/sh4a/Makefile index 490d5dc9e37..33bab477d2e 100644 --- a/arch/sh/kernel/cpu/sh4a/Makefile +++ b/arch/sh/kernel/cpu/sh4a/Makefile @@ -44,3 +44,4 @@ pinmux-$(CONFIG_CPU_SUBTYPE_SH7786) := pinmux-sh7786.o obj-y += $(clock-y) obj-$(CONFIG_SMP) += $(smp-y) obj-$(CONFIG_GENERIC_GPIO) += $(pinmux-y) +obj-$(CONFIG_PERF_EVENTS) += perf_event.o diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7724.c b/arch/sh/kernel/cpu/sh4a/clock-sh7724.c index dfe9192be63..9db743802f0 100644 --- a/arch/sh/kernel/cpu/sh4a/clock-sh7724.c +++ b/arch/sh/kernel/cpu/sh4a/clock-sh7724.c @@ -152,7 +152,7 @@ struct clk div6_clks[] = { SH_CLK_DIV6("fsia_clk", &div3_clk, FCLKACR, 0), SH_CLK_DIV6("fsib_clk", &div3_clk, FCLKBCR, 0), SH_CLK_DIV6("irda_clk", &div3_clk, IRDACLKCR, 0), - SH_CLK_DIV6("spu_clk", &div3_clk, SPUCLKCR, 0), + SH_CLK_DIV6("spu_clk", &div3_clk, SPUCLKCR, CLK_ENABLE_ON_INIT), }; #define R_CLK (&r_clk) diff --git a/arch/sh/kernel/cpu/sh4a/perf_event.c b/arch/sh/kernel/cpu/sh4a/perf_event.c new file mode 100644 index 00000000000..eddc21973fa --- /dev/null +++ b/arch/sh/kernel/cpu/sh4a/perf_event.c @@ -0,0 +1,269 @@ +/* + * Performance events support for SH-4A performance counters + * + * Copyright (C) 2009 Paul Mundt + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + */ +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/io.h> +#include <linux/irq.h> +#include <linux/perf_event.h> +#include <asm/processor.h> + +#define PPC_CCBR(idx) (0xff200800 + (sizeof(u32) * idx)) +#define PPC_PMCTR(idx) (0xfc100000 + (sizeof(u32) * idx)) + +#define CCBR_CIT_MASK (0x7ff << 6) +#define CCBR_DUC (1 << 3) +#define CCBR_CMDS (1 << 1) +#define CCBR_PPCE (1 << 0) + +#define PPC_PMCAT 0xfc100080 + +#define PMCAT_OVF3 (1 << 27) +#define PMCAT_CNN3 (1 << 26) +#define PMCAT_CLR3 (1 << 25) +#define PMCAT_OVF2 (1 << 19) +#define PMCAT_CLR2 (1 << 17) +#define PMCAT_OVF1 (1 << 11) +#define PMCAT_CNN1 (1 << 10) +#define PMCAT_CLR1 (1 << 9) +#define PMCAT_OVF0 (1 << 3) +#define PMCAT_CLR0 (1 << 1) + +static struct sh_pmu sh4a_pmu; + +/* + * Supported raw event codes: + * + * Event Code Description + * ---------- ----------- + * + * 0x0000 number of elapsed cycles + * 0x0200 number of elapsed cycles in privileged mode + * 0x0280 number of elapsed cycles while SR.BL is asserted + * 0x0202 instruction execution + * 0x0203 instruction execution in parallel + * 0x0204 number of unconditional branches + * 0x0208 number of exceptions + * 0x0209 number of interrupts + * 0x0220 UTLB miss caused by instruction fetch + * 0x0222 UTLB miss caused by operand access + * 0x02a0 number of ITLB misses + * 0x0028 number of accesses to instruction memories + * 0x0029 number of accesses to instruction cache + * 0x002a instruction cache miss + * 0x022e number of access to instruction X/Y memory + * 0x0030 number of reads to operand memories + * 0x0038 number of writes to operand memories + * 0x0031 number of operand cache read accesses + * 0x0039 number of operand cache write accesses + * 0x0032 operand cache read miss + * 0x003a operand cache write miss + * 0x0236 number of reads to operand X/Y memory + * 0x023e number of writes to operand X/Y memory + * 0x0237 number of reads to operand U memory + * 0x023f number of writes to operand U memory + * 0x0337 number of U memory read buffer misses + * 0x02b4 number of wait cycles due to operand read access + * 0x02bc number of wait cycles due to operand write access + * 0x0033 number of wait cycles due to operand cache read miss + * 0x003b number of wait cycles due to operand cache write miss + */ + +/* + * Special reserved bits used by hardware emulators, read values will + * vary, but writes must always be 0. + */ +#define PMCAT_EMU_CLR_MASK ((1 << 24) | (1 << 16) | (1 << 8) | (1 << 0)) + +static const int sh4a_general_events[] = { + [PERF_COUNT_HW_CPU_CYCLES] = 0x0000, + [PERF_COUNT_HW_INSTRUCTIONS] = 0x0202, + [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0029, /* I-cache */ + [PERF_COUNT_HW_CACHE_MISSES] = 0x002a, /* I-cache */ + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x0204, + [PERF_COUNT_HW_BRANCH_MISSES] = -1, + [PERF_COUNT_HW_BUS_CYCLES] = -1, +}; + +#define C(x) PERF_COUNT_HW_CACHE_##x + +static const int sh4a_cache_events + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = +{ + [ C(L1D) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0031, + [ C(RESULT_MISS) ] = 0x0032, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x0039, + [ C(RESULT_MISS) ] = 0x003a, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + }, + + [ C(L1I) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0029, + [ C(RESULT_MISS) ] = 0x002a, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + }, + + [ C(LL) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0030, + [ C(RESULT_MISS) ] = 0, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x0038, + [ C(RESULT_MISS) ] = 0, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + }, + + [ C(DTLB) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0222, + [ C(RESULT_MISS) ] = 0x0220, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + }, + + [ C(ITLB) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0x02a0, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, + + [ C(BPU) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, +}; + +static int sh4a_event_map(int event) +{ + return sh4a_general_events[event]; +} + +static u64 sh4a_pmu_read(int idx) +{ + return __raw_readl(PPC_PMCTR(idx)); +} + +static void sh4a_pmu_disable(struct hw_perf_event *hwc, int idx) +{ + unsigned int tmp; + + tmp = __raw_readl(PPC_CCBR(idx)); + tmp &= ~(CCBR_CIT_MASK | CCBR_DUC); + __raw_writel(tmp, PPC_CCBR(idx)); +} + +static void sh4a_pmu_enable(struct hw_perf_event *hwc, int idx) +{ + unsigned int tmp; + + tmp = __raw_readl(PPC_PMCAT); + tmp &= ~PMCAT_EMU_CLR_MASK; + tmp |= idx ? PMCAT_CLR1 : PMCAT_CLR0; + __raw_writel(tmp, PPC_PMCAT); + + tmp = __raw_readl(PPC_CCBR(idx)); + tmp |= (hwc->config << 6) | CCBR_CMDS | CCBR_PPCE; + __raw_writel(tmp, PPC_CCBR(idx)); + + __raw_writel(__raw_readl(PPC_CCBR(idx)) | CCBR_DUC, PPC_CCBR(idx)); +} + +static void sh4a_pmu_disable_all(void) +{ + int i; + + for (i = 0; i < sh4a_pmu.num_events; i++) + __raw_writel(__raw_readl(PPC_CCBR(i)) & ~CCBR_DUC, PPC_CCBR(i)); +} + +static void sh4a_pmu_enable_all(void) +{ + int i; + + for (i = 0; i < sh4a_pmu.num_events; i++) + __raw_writel(__raw_readl(PPC_CCBR(i)) | CCBR_DUC, PPC_CCBR(i)); +} + +static struct sh_pmu sh4a_pmu = { + .name = "SH-4A", + .num_events = 2, + .event_map = sh4a_event_map, + .max_events = ARRAY_SIZE(sh4a_general_events), + .raw_event_mask = 0x3ff, + .cache_events = &sh4a_cache_events, + .read = sh4a_pmu_read, + .disable = sh4a_pmu_disable, + .enable = sh4a_pmu_enable, + .disable_all = sh4a_pmu_disable_all, + .enable_all = sh4a_pmu_enable_all, +}; + +static int __init sh4a_pmu_init(void) +{ + /* + * Make sure this CPU actually has perf counters. + */ + if (!(boot_cpu_data.flags & CPU_HAS_PERF_COUNTER)) { + pr_notice("HW perf events unsupported, software events only.\n"); + return -ENODEV; + } + + return register_sh_pmu(&sh4a_pmu); +} +arch_initcall(sh4a_pmu_init); diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7724.c b/arch/sh/kernel/cpu/sh4a/setup-sh7724.c index f3851fd757e..845e89c936e 100644 --- a/arch/sh/kernel/cpu/sh4a/setup-sh7724.c +++ b/arch/sh/kernel/cpu/sh4a/setup-sh7724.c @@ -20,6 +20,8 @@ #include <linux/uio_driver.h> #include <linux/sh_timer.h> #include <linux/io.h> +#include <linux/notifier.h> +#include <asm/suspend.h> #include <asm/clock.h> #include <asm/mmzone.h> #include <cpu/sh7724.h> @@ -202,7 +204,7 @@ static struct resource veu0_resources[] = { [0] = { .name = "VEU3F0", .start = 0xfe920000, - .end = 0xfe9200cb - 1, + .end = 0xfe9200cb, .flags = IORESOURCE_MEM, }, [1] = { @@ -234,7 +236,7 @@ static struct resource veu1_resources[] = { [0] = { .name = "VEU3F1", .start = 0xfe924000, - .end = 0xfe9240cb - 1, + .end = 0xfe9240cb, .flags = IORESOURCE_MEM, }, [1] = { @@ -523,6 +525,70 @@ static struct platform_device jpu_device = { }, }; +/* SPU2DSP0 */ +static struct uio_info spu0_platform_data = { + .name = "SPU2DSP0", + .version = "0", + .irq = 86, +}; + +static struct resource spu0_resources[] = { + [0] = { + .name = "SPU2DSP0", + .start = 0xFE200000, + .end = 0xFE2FFFFF, + .flags = IORESOURCE_MEM, + }, + [1] = { + /* place holder for contiguous memory */ + }, +}; + +static struct platform_device spu0_device = { + .name = "uio_pdrv_genirq", + .id = 4, + .dev = { + .platform_data = &spu0_platform_data, + }, + .resource = spu0_resources, + .num_resources = ARRAY_SIZE(spu0_resources), + .archdata = { + .hwblk_id = HWBLK_SPU, + }, +}; + +/* SPU2DSP1 */ +static struct uio_info spu1_platform_data = { + .name = "SPU2DSP1", + .version = "0", + .irq = 87, +}; + +static struct resource spu1_resources[] = { + [0] = { + .name = "SPU2DSP1", + .start = 0xFE300000, + .end = 0xFE3FFFFF, + .flags = IORESOURCE_MEM, + }, + [1] = { + /* place holder for contiguous memory */ + }, +}; + +static struct platform_device spu1_device = { + .name = "uio_pdrv_genirq", + .id = 5, + .dev = { + .platform_data = &spu1_platform_data, + }, + .resource = spu1_resources, + .num_resources = ARRAY_SIZE(spu1_resources), + .archdata = { + .hwblk_id = HWBLK_SPU, + }, +}; + static struct platform_device *sh7724_devices[] __initdata = { &cmt_device, &tmu0_device, @@ -539,6 +605,8 @@ static struct platform_device *sh7724_devices[] __initdata = { &veu0_device, &veu1_device, &jpu_device, + &spu0_device, + &spu1_device, }; static int __init sh7724_devices_setup(void) @@ -547,6 +615,8 @@ static int __init sh7724_devices_setup(void) platform_resource_setup_memory(&veu0_device, "veu0", 2 << 20); platform_resource_setup_memory(&veu1_device, "veu1", 2 << 20); platform_resource_setup_memory(&jpu_device, "jpu", 2 << 20); + platform_resource_setup_memory(&spu0_device, "spu0", 2 << 20); + platform_resource_setup_memory(&spu1_device, "spu1", 2 << 20); return platform_add_devices(sh7724_devices, ARRAY_SIZE(sh7724_devices)); @@ -827,3 +897,193 @@ void __init plat_irq_setup(void) { register_intc_controller(&intc_desc); } + +static struct { + /* BSC */ + unsigned long mmselr; + unsigned long cs0bcr; + unsigned long cs4bcr; + unsigned long cs5abcr; + unsigned long cs5bbcr; + unsigned long cs6abcr; + unsigned long cs6bbcr; + unsigned long cs4wcr; + unsigned long cs5awcr; + unsigned long cs5bwcr; + unsigned long cs6awcr; + unsigned long cs6bwcr; + /* INTC */ + unsigned short ipra; + unsigned short iprb; + unsigned short iprc; + unsigned short iprd; + unsigned short ipre; + unsigned short iprf; + unsigned short iprg; + unsigned short iprh; + unsigned short ipri; + unsigned short iprj; + unsigned short iprk; + unsigned short iprl; + unsigned char imr0; + unsigned char imr1; + unsigned char imr2; + unsigned char imr3; + unsigned char imr4; + unsigned char imr5; + unsigned char imr6; + unsigned char imr7; + unsigned char imr8; + unsigned char imr9; + unsigned char imr10; + unsigned char imr11; + unsigned char imr12; + /* RWDT */ + unsigned short rwtcnt; + unsigned short rwtcsr; + /* CPG */ + unsigned long irdaclk; + unsigned long spuclk; +} sh7724_rstandby_state; + +static int sh7724_pre_sleep_notifier_call(struct notifier_block *nb, + unsigned long flags, void *unused) +{ + if (!(flags & SUSP_SH_RSTANDBY)) + return NOTIFY_DONE; + + /* BCR */ + sh7724_rstandby_state.mmselr = __raw_readl(0xff800020); /* MMSELR */ + sh7724_rstandby_state.mmselr |= 0xa5a50000; + sh7724_rstandby_state.cs0bcr = __raw_readl(0xfec10004); /* CS0BCR */ + sh7724_rstandby_state.cs4bcr = __raw_readl(0xfec10010); /* CS4BCR */ + sh7724_rstandby_state.cs5abcr = __raw_readl(0xfec10014); /* CS5ABCR */ + sh7724_rstandby_state.cs5bbcr = __raw_readl(0xfec10018); /* CS5BBCR */ + sh7724_rstandby_state.cs6abcr = __raw_readl(0xfec1001c); /* CS6ABCR */ + sh7724_rstandby_state.cs6bbcr = __raw_readl(0xfec10020); /* CS6BBCR */ + sh7724_rstandby_state.cs4wcr = __raw_readl(0xfec10030); /* CS4WCR */ + sh7724_rstandby_state.cs5awcr = __raw_readl(0xfec10034); /* CS5AWCR */ + sh7724_rstandby_state.cs5bwcr = __raw_readl(0xfec10038); /* CS5BWCR */ + sh7724_rstandby_state.cs6awcr = __raw_readl(0xfec1003c); /* CS6AWCR */ + sh7724_rstandby_state.cs6bwcr = __raw_readl(0xfec10040); /* CS6BWCR */ + + /* INTC */ + sh7724_rstandby_state.ipra = __raw_readw(0xa4080000); /* IPRA */ + sh7724_rstandby_state.iprb = __raw_readw(0xa4080004); /* IPRB */ + sh7724_rstandby_state.iprc = __raw_readw(0xa4080008); /* IPRC */ + sh7724_rstandby_state.iprd = __raw_readw(0xa408000c); /* IPRD */ + sh7724_rstandby_state.ipre = __raw_readw(0xa4080010); /* IPRE */ + sh7724_rstandby_state.iprf = __raw_readw(0xa4080014); /* IPRF */ + sh7724_rstandby_state.iprg = __raw_readw(0xa4080018); /* IPRG */ + sh7724_rstandby_state.iprh = __raw_readw(0xa408001c); /* IPRH */ + sh7724_rstandby_state.ipri = __raw_readw(0xa4080020); /* IPRI */ + sh7724_rstandby_state.iprj = __raw_readw(0xa4080024); /* IPRJ */ + sh7724_rstandby_state.iprk = __raw_readw(0xa4080028); /* IPRK */ + sh7724_rstandby_state.iprl = __raw_readw(0xa408002c); /* IPRL */ + sh7724_rstandby_state.imr0 = __raw_readb(0xa4080080); /* IMR0 */ + sh7724_rstandby_state.imr1 = __raw_readb(0xa4080084); /* IMR1 */ + sh7724_rstandby_state.imr2 = __raw_readb(0xa4080088); /* IMR2 */ + sh7724_rstandby_state.imr3 = __raw_readb(0xa408008c); /* IMR3 */ + sh7724_rstandby_state.imr4 = __raw_readb(0xa4080090); /* IMR4 */ + sh7724_rstandby_state.imr5 = __raw_readb(0xa4080094); /* IMR5 */ + sh7724_rstandby_state.imr6 = __raw_readb(0xa4080098); /* IMR6 */ + sh7724_rstandby_state.imr7 = __raw_readb(0xa408009c); /* IMR7 */ + sh7724_rstandby_state.imr8 = __raw_readb(0xa40800a0); /* IMR8 */ + sh7724_rstandby_state.imr9 = __raw_readb(0xa40800a4); /* IMR9 */ + sh7724_rstandby_state.imr10 = __raw_readb(0xa40800a8); /* IMR10 */ + sh7724_rstandby_state.imr11 = __raw_readb(0xa40800ac); /* IMR11 */ + sh7724_rstandby_state.imr12 = __raw_readb(0xa40800b0); /* IMR12 */ + + /* RWDT */ + sh7724_rstandby_state.rwtcnt = __raw_readb(0xa4520000); /* RWTCNT */ + sh7724_rstandby_state.rwtcnt |= 0x5a00; + sh7724_rstandby_state.rwtcsr = __raw_readb(0xa4520004); /* RWTCSR */ + sh7724_rstandby_state.rwtcsr |= 0xa500; + __raw_writew(sh7724_rstandby_state.rwtcsr & 0x07, 0xa4520004); + + /* CPG */ + sh7724_rstandby_state.irdaclk = __raw_readl(0xa4150018); /* IRDACLKCR */ + sh7724_rstandby_state.spuclk = __raw_readl(0xa415003c); /* SPUCLKCR */ + + return NOTIFY_DONE; +} + +static int sh7724_post_sleep_notifier_call(struct notifier_block *nb, + unsigned long flags, void *unused) +{ + if (!(flags & SUSP_SH_RSTANDBY)) + return NOTIFY_DONE; + + /* BCR */ + __raw_writel(sh7724_rstandby_state.mmselr, 0xff800020); /* MMSELR */ + __raw_writel(sh7724_rstandby_state.cs0bcr, 0xfec10004); /* CS0BCR */ + __raw_writel(sh7724_rstandby_state.cs4bcr, 0xfec10010); /* CS4BCR */ + __raw_writel(sh7724_rstandby_state.cs5abcr, 0xfec10014); /* CS5ABCR */ + __raw_writel(sh7724_rstandby_state.cs5bbcr, 0xfec10018); /* CS5BBCR */ + __raw_writel(sh7724_rstandby_state.cs6abcr, 0xfec1001c); /* CS6ABCR */ + __raw_writel(sh7724_rstandby_state.cs6bbcr, 0xfec10020); /* CS6BBCR */ + __raw_writel(sh7724_rstandby_state.cs4wcr, 0xfec10030); /* CS4WCR */ + __raw_writel(sh7724_rstandby_state.cs5awcr, 0xfec10034); /* CS5AWCR */ + __raw_writel(sh7724_rstandby_state.cs5bwcr, 0xfec10038); /* CS5BWCR */ + __raw_writel(sh7724_rstandby_state.cs6awcr, 0xfec1003c); /* CS6AWCR */ + __raw_writel(sh7724_rstandby_state.cs6bwcr, 0xfec10040); /* CS6BWCR */ + + /* INTC */ + __raw_writew(sh7724_rstandby_state.ipra, 0xa4080000); /* IPRA */ + __raw_writew(sh7724_rstandby_state.iprb, 0xa4080004); /* IPRB */ + __raw_writew(sh7724_rstandby_state.iprc, 0xa4080008); /* IPRC */ + __raw_writew(sh7724_rstandby_state.iprd, 0xa408000c); /* IPRD */ + __raw_writew(sh7724_rstandby_state.ipre, 0xa4080010); /* IPRE */ + __raw_writew(sh7724_rstandby_state.iprf, 0xa4080014); /* IPRF */ + __raw_writew(sh7724_rstandby_state.iprg, 0xa4080018); /* IPRG */ + __raw_writew(sh7724_rstandby_state.iprh, 0xa408001c); /* IPRH */ + __raw_writew(sh7724_rstandby_state.ipri, 0xa4080020); /* IPRI */ + __raw_writew(sh7724_rstandby_state.iprj, 0xa4080024); /* IPRJ */ + __raw_writew(sh7724_rstandby_state.iprk, 0xa4080028); /* IPRK */ + __raw_writew(sh7724_rstandby_state.iprl, 0xa408002c); /* IPRL */ + __raw_writeb(sh7724_rstandby_state.imr0, 0xa4080080); /* IMR0 */ + __raw_writeb(sh7724_rstandby_state.imr1, 0xa4080084); /* IMR1 */ + __raw_writeb(sh7724_rstandby_state.imr2, 0xa4080088); /* IMR2 */ + __raw_writeb(sh7724_rstandby_state.imr3, 0xa408008c); /* IMR3 */ + __raw_writeb(sh7724_rstandby_state.imr4, 0xa4080090); /* IMR4 */ + __raw_writeb(sh7724_rstandby_state.imr5, 0xa4080094); /* IMR5 */ + __raw_writeb(sh7724_rstandby_state.imr6, 0xa4080098); /* IMR6 */ + __raw_writeb(sh7724_rstandby_state.imr7, 0xa408009c); /* IMR7 */ + __raw_writeb(sh7724_rstandby_state.imr8, 0xa40800a0); /* IMR8 */ + __raw_writeb(sh7724_rstandby_state.imr9, 0xa40800a4); /* IMR9 */ + __raw_writeb(sh7724_rstandby_state.imr10, 0xa40800a8); /* IMR10 */ + __raw_writeb(sh7724_rstandby_state.imr11, 0xa40800ac); /* IMR11 */ + __raw_writeb(sh7724_rstandby_state.imr12, 0xa40800b0); /* IMR12 */ + + /* RWDT */ + __raw_writew(sh7724_rstandby_state.rwtcnt, 0xa4520000); /* RWTCNT */ + __raw_writew(sh7724_rstandby_state.rwtcsr, 0xa4520004); /* RWTCSR */ + + /* CPG */ + __raw_writel(sh7724_rstandby_state.irdaclk, 0xa4150018); /* IRDACLKCR */ + __raw_writel(sh7724_rstandby_state.spuclk, 0xa415003c); /* SPUCLKCR */ + + return NOTIFY_DONE; +} + +static struct notifier_block sh7724_pre_sleep_notifier = { + .notifier_call = sh7724_pre_sleep_notifier_call, + .priority = SH_MOBILE_PRE(SH_MOBILE_SLEEP_CPU), +}; + +static struct notifier_block sh7724_post_sleep_notifier = { + .notifier_call = sh7724_post_sleep_notifier_call, + .priority = SH_MOBILE_POST(SH_MOBILE_SLEEP_CPU), +}; + +static int __init sh7724_sleep_setup(void) +{ + atomic_notifier_chain_register(&sh_mobile_pre_sleep_notifier_list, + &sh7724_pre_sleep_notifier); + + atomic_notifier_chain_register(&sh_mobile_post_sleep_notifier_list, + &sh7724_post_sleep_notifier); + return 0; +} +arch_initcall(sh7724_sleep_setup); + diff --git a/arch/sh/kernel/cpu/sh4a/setup-shx3.c b/arch/sh/kernel/cpu/sh4a/setup-shx3.c index e848443deeb..c7ba9166e18 100644 --- a/arch/sh/kernel/cpu/sh4a/setup-shx3.c +++ b/arch/sh/kernel/cpu/sh4a/setup-shx3.c @@ -15,6 +15,15 @@ #include <linux/sh_timer.h> #include <asm/mmzone.h> +/* + * This intentionally only registers SCIF ports 0, 1, and 3. SCIF 2 + * INTEVT values overlap with the FPU EXPEVT ones, requiring special + * demuxing in the exception dispatch path. + * + * As this overlap is something that never should have made it in to + * silicon in the first place, we just refuse to deal with the port at + * all rather than adding infrastructure to hack around it. + */ static struct plat_sci_port sci_platform_data[] = { { .mapbase = 0xffc30000, @@ -27,11 +36,6 @@ static struct plat_sci_port sci_platform_data[] = { .type = PORT_SCIF, .irqs = { 44, 45, 47, 46 }, }, { - .mapbase = 0xffc50000, - .flags = UPF_BOOT_AUTOCONF, - .type = PORT_SCIF, - .irqs = { 48, 49, 51, 50 }, - }, { .mapbase = 0xffc60000, .flags = UPF_BOOT_AUTOCONF, .type = PORT_SCIF, @@ -268,7 +272,11 @@ enum { UNUSED = 0, /* interrupt sources */ - IRL, IRQ0, IRQ1, IRQ2, IRQ3, + IRL_LLLL, IRL_LLLH, IRL_LLHL, IRL_LLHH, + IRL_LHLL, IRL_LHLH, IRL_LHHL, IRL_LHHH, + IRL_HLLL, IRL_HLLH, IRL_HLHL, IRL_HLHH, + IRL_HHLL, IRL_HHLH, IRL_HHHL, + IRQ0, IRQ1, IRQ2, IRQ3, HUDII, TMU0, TMU1, TMU2, TMU3, TMU4, TMU5, PCII0, PCII1, PCII2, PCII3, PCII4, @@ -291,7 +299,7 @@ enum { INTICI4, INTICI5, INTICI6, INTICI7, /* interrupt groups */ - PCII56789, SCIF0, SCIF1, SCIF2, SCIF3, + IRL, PCII56789, SCIF0, SCIF1, SCIF2, SCIF3, DMAC0, DMAC1, }; @@ -309,8 +317,6 @@ static struct intc_vect vectors[] __initdata = { INTC_VECT(SCIF0_BRI, 0x740), INTC_VECT(SCIF0_TXI, 0x760), INTC_VECT(SCIF1_ERI, 0x780), INTC_VECT(SCIF1_RXI, 0x7a0), INTC_VECT(SCIF1_BRI, 0x7c0), INTC_VECT(SCIF1_TXI, 0x7e0), - INTC_VECT(SCIF2_ERI, 0x800), INTC_VECT(SCIF2_RXI, 0x820), - INTC_VECT(SCIF2_BRI, 0x840), INTC_VECT(SCIF2_TXI, 0x860), INTC_VECT(SCIF3_ERI, 0x880), INTC_VECT(SCIF3_RXI, 0x8a0), INTC_VECT(SCIF3_BRI, 0x8c0), INTC_VECT(SCIF3_TXI, 0x8e0), INTC_VECT(DMAC0_DMINT0, 0x900), INTC_VECT(DMAC0_DMINT1, 0x920), @@ -344,10 +350,13 @@ static struct intc_vect vectors[] __initdata = { }; static struct intc_group groups[] __initdata = { + INTC_GROUP(IRL, IRL_LLLL, IRL_LLLH, IRL_LLHL, IRL_LLHH, + IRL_LHLL, IRL_LHLH, IRL_LHHL, IRL_LHHH, + IRL_HLLL, IRL_HLLH, IRL_HLHL, IRL_HLHH, + IRL_HHLL, IRL_HHLH, IRL_HHHL), INTC_GROUP(PCII56789, PCII5, PCII6, PCII7, PCII8, PCII9), INTC_GROUP(SCIF0, SCIF0_ERI, SCIF0_RXI, SCIF0_BRI, SCIF0_TXI), INTC_GROUP(SCIF1, SCIF1_ERI, SCIF1_RXI, SCIF1_BRI, SCIF1_TXI), - INTC_GROUP(SCIF2, SCIF2_ERI, SCIF2_RXI, SCIF2_BRI, SCIF2_TXI), INTC_GROUP(SCIF3, SCIF3_ERI, SCIF3_RXI, SCIF3_BRI, SCIF3_TXI), INTC_GROUP(DMAC0, DMAC0_DMINT0, DMAC0_DMINT1, DMAC0_DMINT2, DMAC0_DMINT3, DMAC0_DMINT4, DMAC0_DMINT5, DMAC0_DMAE), @@ -419,14 +428,14 @@ static DECLARE_INTC_DESC(intc_desc_irq, "shx3-irq", vectors_irq, groups, /* External interrupt pins in IRL mode */ static struct intc_vect vectors_irl[] __initdata = { - INTC_VECT(IRL, 0x200), INTC_VECT(IRL, 0x220), - INTC_VECT(IRL, 0x240), INTC_VECT(IRL, 0x260), - INTC_VECT(IRL, 0x280), INTC_VECT(IRL, 0x2a0), - INTC_VECT(IRL, 0x2c0), INTC_VECT(IRL, 0x2e0), - INTC_VECT(IRL, 0x300), INTC_VECT(IRL, 0x320), - INTC_VECT(IRL, 0x340), INTC_VECT(IRL, 0x360), - INTC_VECT(IRL, 0x380), INTC_VECT(IRL, 0x3a0), - INTC_VECT(IRL, 0x3c0), + INTC_VECT(IRL_LLLL, 0x200), INTC_VECT(IRL_LLLH, 0x220), + INTC_VECT(IRL_LLHL, 0x240), INTC_VECT(IRL_LLHH, 0x260), + INTC_VECT(IRL_LHLL, 0x280), INTC_VECT(IRL_LHLH, 0x2a0), + INTC_VECT(IRL_LHHL, 0x2c0), INTC_VECT(IRL_LHHH, 0x2e0), + INTC_VECT(IRL_HLLL, 0x300), INTC_VECT(IRL_HLLH, 0x320), + INTC_VECT(IRL_HLHL, 0x340), INTC_VECT(IRL_HLHH, 0x360), + INTC_VECT(IRL_HHLL, 0x380), INTC_VECT(IRL_HHLH, 0x3a0), + INTC_VECT(IRL_HHHL, 0x3c0), }; static DECLARE_INTC_DESC(intc_desc_irl, "shx3-irl", vectors_irl, groups, diff --git a/arch/sh/kernel/cpu/sh4a/smp-shx3.c b/arch/sh/kernel/cpu/sh4a/smp-shx3.c index 185ec3976a2..5863e0c4d02 100644 --- a/arch/sh/kernel/cpu/sh4a/smp-shx3.c +++ b/arch/sh/kernel/cpu/sh4a/smp-shx3.c @@ -14,6 +14,13 @@ #include <linux/interrupt.h> #include <linux/io.h> +#define STBCR_REG(phys_id) (0xfe400004 | (phys_id << 12)) +#define RESET_REG(phys_id) (0xfe400008 | (phys_id << 12)) + +#define STBCR_MSTP 0x00000001 +#define STBCR_RESET 0x00000002 +#define STBCR_LTSLP 0x80000000 + static irqreturn_t ipi_interrupt_handler(int irq, void *arg) { unsigned int message = (unsigned int)(long)arg; @@ -21,9 +28,9 @@ static irqreturn_t ipi_interrupt_handler(int irq, void *arg) unsigned int offs = 4 * cpu; unsigned int x; - x = ctrl_inl(0xfe410070 + offs); /* C0INITICI..CnINTICI */ + x = __raw_readl(0xfe410070 + offs); /* C0INITICI..CnINTICI */ x &= (1 << (message << 2)); - ctrl_outl(x, 0xfe410080 + offs); /* C0INTICICLR..CnINTICICLR */ + __raw_writel(x, 0xfe410080 + offs); /* C0INTICICLR..CnINTICICLR */ smp_message_recv(message); @@ -37,6 +44,9 @@ void __init plat_smp_setup(void) init_cpu_possible(cpumask_of(cpu)); + /* Enable light sleep for the boot CPU */ + __raw_writel(__raw_readl(STBCR_REG(cpu)) | STBCR_LTSLP, STBCR_REG(cpu)); + __cpu_number_map[0] = 0; __cpu_logical_map[0] = 0; @@ -66,32 +76,23 @@ void __init plat_prepare_cpus(unsigned int max_cpus) "IPI", (void *)(long)i); } -#define STBCR_REG(phys_id) (0xfe400004 | (phys_id << 12)) -#define RESET_REG(phys_id) (0xfe400008 | (phys_id << 12)) - -#define STBCR_MSTP 0x00000001 -#define STBCR_RESET 0x00000002 -#define STBCR_LTSLP 0x80000000 - -#define STBCR_AP_VAL (STBCR_RESET | STBCR_LTSLP) - void plat_start_cpu(unsigned int cpu, unsigned long entry_point) { - ctrl_outl(entry_point, RESET_REG(cpu)); + __raw_writel(entry_point, RESET_REG(cpu)); - if (!(ctrl_inl(STBCR_REG(cpu)) & STBCR_MSTP)) - ctrl_outl(STBCR_MSTP, STBCR_REG(cpu)); + if (!(__raw_readl(STBCR_REG(cpu)) & STBCR_MSTP)) + __raw_writel(STBCR_MSTP, STBCR_REG(cpu)); - while (!(ctrl_inl(STBCR_REG(cpu)) & STBCR_MSTP)) + while (!(__raw_readl(STBCR_REG(cpu)) & STBCR_MSTP)) cpu_relax(); /* Start up secondary processor by sending a reset */ - ctrl_outl(STBCR_AP_VAL, STBCR_REG(cpu)); + __raw_writel(STBCR_RESET | STBCR_LTSLP, STBCR_REG(cpu)); } int plat_smp_processor_id(void) { - return ctrl_inl(0xff000048); /* CPIDR */ + return __raw_readl(0xff000048); /* CPIDR */ } void plat_send_ipi(unsigned int cpu, unsigned int message) @@ -100,5 +101,5 @@ void plat_send_ipi(unsigned int cpu, unsigned int message) BUG_ON(cpu >= 4); - ctrl_outl(1 << (message << 2), addr); /* C0INTICI..CnINTICI */ + __raw_writel(1 << (message << 2), addr); /* C0INTICI..CnINTICI */ } diff --git a/arch/sh/kernel/cpu/sh5/entry.S b/arch/sh/kernel/cpu/sh5/entry.S index b0aacf67525..8f13f73cb2c 100644 --- a/arch/sh/kernel/cpu/sh5/entry.S +++ b/arch/sh/kernel/cpu/sh5/entry.S @@ -933,7 +933,7 @@ ret_with_reschedule: pta restore_all, tr1 - movi (_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK), r8 + movi _TIF_SIGPENDING, r8 and r8, r7, r8 pta work_notifysig, tr0 bne r8, ZERO, tr0 diff --git a/arch/sh/kernel/cpu/shmobile/cpuidle.c b/arch/sh/kernel/cpu/shmobile/cpuidle.c index 1c504bd972c..83972aa319c 100644 --- a/arch/sh/kernel/cpu/shmobile/cpuidle.c +++ b/arch/sh/kernel/cpu/shmobile/cpuidle.c @@ -87,25 +87,31 @@ void sh_mobile_setup_cpuidle(void) dev->safe_state = state; - state = &dev->states[i++]; - snprintf(state->name, CPUIDLE_NAME_LEN, "C1"); - strncpy(state->desc, "SuperH Sleep Mode [SF]", CPUIDLE_DESC_LEN); - state->exit_latency = 100; - state->target_residency = 1 * 2; - state->power_usage = 1; - state->flags = 0; - state->flags |= CPUIDLE_FLAG_TIME_VALID; - state->enter = cpuidle_sleep_enter; + if (sh_mobile_sleep_supported & SUSP_SH_SF) { + state = &dev->states[i++]; + snprintf(state->name, CPUIDLE_NAME_LEN, "C1"); + strncpy(state->desc, "SuperH Sleep Mode [SF]", + CPUIDLE_DESC_LEN); + state->exit_latency = 100; + state->target_residency = 1 * 2; + state->power_usage = 1; + state->flags = 0; + state->flags |= CPUIDLE_FLAG_TIME_VALID; + state->enter = cpuidle_sleep_enter; + } - state = &dev->states[i++]; - snprintf(state->name, CPUIDLE_NAME_LEN, "C2"); - strncpy(state->desc, "SuperH Mobile Standby Mode [SF]", CPUIDLE_DESC_LEN); - state->exit_latency = 2300; - state->target_residency = 1 * 2; - state->power_usage = 1; - state->flags = 0; - state->flags |= CPUIDLE_FLAG_TIME_VALID; - state->enter = cpuidle_sleep_enter; + if (sh_mobile_sleep_supported & SUSP_SH_STANDBY) { + state = &dev->states[i++]; + snprintf(state->name, CPUIDLE_NAME_LEN, "C2"); + strncpy(state->desc, "SuperH Mobile Standby Mode [SF]", + CPUIDLE_DESC_LEN); + state->exit_latency = 2300; + state->target_residency = 1 * 2; + state->power_usage = 1; + state->flags = 0; + state->flags |= CPUIDLE_FLAG_TIME_VALID; + state->enter = cpuidle_sleep_enter; + } dev->state_count = i; diff --git a/arch/sh/kernel/cpu/shmobile/pm.c b/arch/sh/kernel/cpu/shmobile/pm.c index ee3c2aaf66f..ca029a44743 100644 --- a/arch/sh/kernel/cpu/shmobile/pm.c +++ b/arch/sh/kernel/cpu/shmobile/pm.c @@ -15,6 +15,13 @@ #include <linux/suspend.h> #include <asm/suspend.h> #include <asm/uaccess.h> +#include <asm/cacheflush.h> + +/* + * Notifier lists for pre/post sleep notification + */ +ATOMIC_NOTIFIER_HEAD(sh_mobile_pre_sleep_notifier_list); +ATOMIC_NOTIFIER_HEAD(sh_mobile_post_sleep_notifier_list); /* * Sleep modes available on SuperH Mobile: @@ -26,30 +33,105 @@ #define SUSP_MODE_SLEEP (SUSP_SH_SLEEP) #define SUSP_MODE_SLEEP_SF (SUSP_SH_SLEEP | SUSP_SH_SF) #define SUSP_MODE_STANDBY_SF (SUSP_SH_STANDBY | SUSP_SH_SF) +#define SUSP_MODE_RSTANDBY (SUSP_SH_RSTANDBY | SUSP_SH_MMU | SUSP_SH_SF) + /* + * U-standby mode is unsupported since it needs bootloader hacks + */ -/* - * The following modes are not there yet: - * - * R-standby mode is unsupported, but will be added in the future - * U-standby mode is low priority since it needs bootloader hacks - */ - -#define ILRAM_BASE 0xe5200000 - -extern const unsigned char sh_mobile_standby[]; -extern const unsigned int sh_mobile_standby_size; +#ifdef CONFIG_CPU_SUBTYPE_SH7724 +#define RAM_BASE 0xfd800000 /* RSMEM */ +#else +#define RAM_BASE 0xe5200000 /* ILRAM */ +#endif void sh_mobile_call_standby(unsigned long mode) { - void *onchip_mem = (void *)ILRAM_BASE; - void (*standby_onchip_mem)(unsigned long, unsigned long) = onchip_mem; + void *onchip_mem = (void *)RAM_BASE; + struct sh_sleep_data *sdp = onchip_mem; + void (*standby_onchip_mem)(unsigned long, unsigned long); + + /* code located directly after data structure */ + standby_onchip_mem = (void *)(sdp + 1); + + atomic_notifier_call_chain(&sh_mobile_pre_sleep_notifier_list, + mode, NULL); + + /* flush the caches if MMU flag is set */ + if (mode & SUSP_SH_MMU) + flush_cache_all(); /* Let assembly snippet in on-chip memory handle the rest */ - standby_onchip_mem(mode, ILRAM_BASE); + standby_onchip_mem(mode, RAM_BASE); + + atomic_notifier_call_chain(&sh_mobile_post_sleep_notifier_list, + mode, NULL); +} + +extern char sh_mobile_sleep_enter_start; +extern char sh_mobile_sleep_enter_end; + +extern char sh_mobile_sleep_resume_start; +extern char sh_mobile_sleep_resume_end; + +unsigned long sh_mobile_sleep_supported = SUSP_SH_SLEEP; + +void sh_mobile_register_self_refresh(unsigned long flags, + void *pre_start, void *pre_end, + void *post_start, void *post_end) +{ + void *onchip_mem = (void *)RAM_BASE; + void *vp; + struct sh_sleep_data *sdp; + int n; + + /* part 0: data area */ + sdp = onchip_mem; + sdp->addr.stbcr = 0xa4150020; /* STBCR */ + sdp->addr.bar = 0xa4150040; /* BAR */ + sdp->addr.pteh = 0xff000000; /* PTEH */ + sdp->addr.ptel = 0xff000004; /* PTEL */ + sdp->addr.ttb = 0xff000008; /* TTB */ + sdp->addr.tea = 0xff00000c; /* TEA */ + sdp->addr.mmucr = 0xff000010; /* MMUCR */ + sdp->addr.ptea = 0xff000034; /* PTEA */ + sdp->addr.pascr = 0xff000070; /* PASCR */ + sdp->addr.irmcr = 0xff000078; /* IRMCR */ + sdp->addr.ccr = 0xff00001c; /* CCR */ + sdp->addr.ramcr = 0xff000074; /* RAMCR */ + vp = sdp + 1; + + /* part 1: common code to enter sleep mode */ + n = &sh_mobile_sleep_enter_end - &sh_mobile_sleep_enter_start; + memcpy(vp, &sh_mobile_sleep_enter_start, n); + vp += roundup(n, 4); + + /* part 2: board specific code to enter self-refresh mode */ + n = pre_end - pre_start; + memcpy(vp, pre_start, n); + sdp->sf_pre = (unsigned long)vp; + vp += roundup(n, 4); + + /* part 3: board specific code to resume from self-refresh mode */ + n = post_end - post_start; + memcpy(vp, post_start, n); + sdp->sf_post = (unsigned long)vp; + vp += roundup(n, 4); + + /* part 4: common code to resume from sleep mode */ + WARN_ON(vp > (onchip_mem + 0x600)); + vp = onchip_mem + 0x600; /* located at interrupt vector */ + n = &sh_mobile_sleep_resume_end - &sh_mobile_sleep_resume_start; + memcpy(vp, &sh_mobile_sleep_resume_start, n); + sdp->resume = (unsigned long)vp; + + sh_mobile_sleep_supported |= flags; } static int sh_pm_enter(suspend_state_t state) { + if (!(sh_mobile_sleep_supported & SUSP_MODE_STANDBY_SF)) + return -ENXIO; + local_irq_disable(); set_bl_bit(); sh_mobile_call_standby(SUSP_MODE_STANDBY_SF); @@ -65,13 +147,6 @@ static struct platform_suspend_ops sh_pm_ops = { static int __init sh_pm_init(void) { - void *onchip_mem = (void *)ILRAM_BASE; - - /* Copy the assembly snippet to the otherwise ununsed ILRAM */ - memcpy(onchip_mem, sh_mobile_standby, sh_mobile_standby_size); - wmb(); - ctrl_barrier(); - suspend_set_ops(&sh_pm_ops); sh_mobile_setup_cpuidle(); return 0; diff --git a/arch/sh/kernel/cpu/shmobile/pm_runtime.c b/arch/sh/kernel/cpu/shmobile/pm_runtime.c index 7c615b17e20..6dcb8166a64 100644 --- a/arch/sh/kernel/cpu/shmobile/pm_runtime.c +++ b/arch/sh/kernel/cpu/shmobile/pm_runtime.c @@ -45,12 +45,14 @@ static int __platform_pm_runtime_resume(struct platform_device *pdev) dev_dbg(d, "__platform_pm_runtime_resume() [%d]\n", hwblk); - if (d->driver && d->driver->pm && d->driver->pm->runtime_resume) { + if (d->driver) { hwblk_enable(hwblk_info, hwblk); ret = 0; if (test_bit(PDEV_ARCHDATA_FLAG_SUSP, &ad->flags)) { - ret = d->driver->pm->runtime_resume(d); + if (d->driver->pm && d->driver->pm->runtime_resume) + ret = d->driver->pm->runtime_resume(d); + if (!ret) clear_bit(PDEV_ARCHDATA_FLAG_SUSP, &ad->flags); else @@ -73,12 +75,15 @@ static int __platform_pm_runtime_suspend(struct platform_device *pdev) dev_dbg(d, "__platform_pm_runtime_suspend() [%d]\n", hwblk); - if (d->driver && d->driver->pm && d->driver->pm->runtime_suspend) { + if (d->driver) { BUG_ON(!test_bit(PDEV_ARCHDATA_FLAG_IDLE, &ad->flags)); + ret = 0; - hwblk_enable(hwblk_info, hwblk); - ret = d->driver->pm->runtime_suspend(d); - hwblk_disable(hwblk_info, hwblk); + if (d->driver->pm && d->driver->pm->runtime_suspend) { + hwblk_enable(hwblk_info, hwblk); + ret = d->driver->pm->runtime_suspend(d); + hwblk_disable(hwblk_info, hwblk); + } if (!ret) { set_bit(PDEV_ARCHDATA_FLAG_SUSP, &ad->flags); diff --git a/arch/sh/kernel/cpu/shmobile/sleep.S b/arch/sh/kernel/cpu/shmobile/sleep.S index a439e6c7824..e9dd7fa0abd 100644 --- a/arch/sh/kernel/cpu/shmobile/sleep.S +++ b/arch/sh/kernel/cpu/shmobile/sleep.S @@ -20,79 +20,103 @@ * Kernel mode register usage, see entry.S: * k0 scratch * k1 scratch - * k4 scratch */ #define k0 r0 #define k1 r1 -#define k4 r4 -/* manage self-refresh and enter standby mode. +/* manage self-refresh and enter standby mode. must be self-contained. * this code will be copied to on-chip memory and executed from there. */ + .balign 4 +ENTRY(sh_mobile_sleep_enter_start) - .balign 4096,0,4096 -ENTRY(sh_mobile_standby) + /* save mode flags */ + mov.l r4, @(SH_SLEEP_MODE, r5) /* save original vbr */ - stc vbr, r1 - mova saved_vbr, r0 - mov.l r1, @r0 + stc vbr, r0 + mov.l r0, @(SH_SLEEP_VBR, r5) /* point vbr to our on-chip memory page */ ldc r5, vbr /* save return address */ - mova saved_spc, r0 - sts pr, r5 - mov.l r5, @r0 + sts pr, r0 + mov.l r0, @(SH_SLEEP_SPC, r5) /* save sr */ - mova saved_sr, r0 - stc sr, r5 - mov.l r5, @r0 + stc sr, r0 + mov.l r0, @(SH_SLEEP_SR, r5) - /* save mode flags */ - mova saved_mode, r0 - mov.l r4, @r0 + /* save sp */ + mov.l r15, @(SH_SLEEP_SP, r5) + + /* save stbcr */ + bsr save_register + mov #SH_SLEEP_REG_STBCR, r0 + + /* save mmu and cache context if needed */ + mov.l @(SH_SLEEP_MODE, r5), r0 + tst #SUSP_SH_MMU, r0 + bt skip_mmu_save_disable + + /* save mmu state */ + bsr save_register + mov #SH_SLEEP_REG_PTEH, r0 + + bsr save_register + mov #SH_SLEEP_REG_PTEL, r0 + + bsr save_register + mov #SH_SLEEP_REG_TTB, r0 + + bsr save_register + mov #SH_SLEEP_REG_TEA, r0 + + bsr save_register + mov #SH_SLEEP_REG_MMUCR, r0 + + bsr save_register + mov #SH_SLEEP_REG_PTEA, r0 + + bsr save_register + mov #SH_SLEEP_REG_PASCR, r0 - /* put mode flags in r0 */ - mov r4, r0 + bsr save_register + mov #SH_SLEEP_REG_IRMCR, r0 + /* invalidate TLBs and disable the MMU */ + bsr get_register + mov #SH_SLEEP_REG_MMUCR, r0 + mov #4, r1 + mov.l r1, @r0 + icbi @r0 + + /* save cache registers and disable caches */ + bsr save_register + mov #SH_SLEEP_REG_CCR, r0 + + bsr save_register + mov #SH_SLEEP_REG_RAMCR, r0 + + bsr get_register + mov #SH_SLEEP_REG_CCR, r0 + mov #0, r1 + mov.l r1, @r0 + icbi @r0 + +skip_mmu_save_disable: + /* call self-refresh entering code if needed */ + mov.l @(SH_SLEEP_MODE, r5), r0 tst #SUSP_SH_SF, r0 bt skip_set_sf -#ifdef CONFIG_CPU_SUBTYPE_SH7724 - /* DBSC: put memory in self-refresh mode */ - mov.l dben_reg, r4 - mov.l dben_data0, r1 - mov.l r1, @r4 - - mov.l dbrfpdn0_reg, r4 - mov.l dbrfpdn0_data0, r1 - mov.l r1, @r4 - - mov.l dbcmdcnt_reg, r4 - mov.l dbcmdcnt_data0, r1 - mov.l r1, @r4 - - mov.l dbcmdcnt_reg, r4 - mov.l dbcmdcnt_data1, r1 - mov.l r1, @r4 - - mov.l dbrfpdn0_reg, r4 - mov.l dbrfpdn0_data1, r1 - mov.l r1, @r4 -#else - /* SBSC: disable power down and put in self-refresh mode */ - mov.l 1f, r4 - mov.l 2f, r1 - mov.l @r4, r2 - or r1, r2 - mov.l 3f, r3 - and r3, r2 - mov.l r2, @r4 -#endif + + mov.l @(SH_SLEEP_SF_PRE, r5), r0 + jsr @r0 + nop skip_set_sf: + mov.l @(SH_SLEEP_MODE, r5), r0 tst #SUSP_SH_STANDBY, r0 bt test_rstandby @@ -104,6 +128,12 @@ test_rstandby: tst #SUSP_SH_RSTANDBY, r0 bt test_ustandby + /* setup BAR register */ + bsr get_register + mov #SH_SLEEP_REG_BAR, r0 + mov.l @(SH_SLEEP_RESUME, r5), r1 + mov.l r1, @r0 + /* set mode to "r-standby mode" */ bra do_sleep mov #0x20, r1 @@ -123,124 +153,136 @@ force_sleep: do_sleep: /* setup and enter selected standby mode */ - mov.l 5f, r4 - mov.l r1, @r4 + bsr get_register + mov #SH_SLEEP_REG_STBCR, r0 + mov.l r1, @r0 again: sleep bra again nop -restore_jump_vbr: +save_register: + add #SH_SLEEP_BASE_ADDR, r0 + mov.l @(r0, r5), r1 + add #-SH_SLEEP_BASE_ADDR, r0 + mov.l @r1, r1 + add #SH_SLEEP_BASE_DATA, r0 + mov.l r1, @(r0, r5) + add #-SH_SLEEP_BASE_DATA, r0 + rts + nop + +get_register: + add #SH_SLEEP_BASE_ADDR, r0 + mov.l @(r0, r5), r0 + rts + nop +ENTRY(sh_mobile_sleep_enter_end) + + .balign 4 +ENTRY(sh_mobile_sleep_resume_start) + + /* figure out start address */ + bsr 0f + nop +0: + sts pr, k1 + mov.l 1f, k0 + and k0, k1 + + /* store pointer to data area in VBR */ + ldc k1, vbr + + /* setup sr with saved sr */ + mov.l @(SH_SLEEP_SR, k1), k0 + ldc k0, sr + + /* now: user register set! */ + stc vbr, r5 + /* setup spc with return address to c code */ - mov.l saved_spc, k0 - ldc k0, spc + mov.l @(SH_SLEEP_SPC, r5), r0 + ldc r0, spc /* restore vbr */ - mov.l saved_vbr, k0 - ldc k0, vbr + mov.l @(SH_SLEEP_VBR, r5), r0 + ldc r0, vbr /* setup ssr with saved sr */ - mov.l saved_sr, k0 - ldc k0, ssr + mov.l @(SH_SLEEP_SR, r5), r0 + ldc r0, ssr - /* get mode flags */ - mov.l saved_mode, k0 + /* restore sp */ + mov.l @(SH_SLEEP_SP, r5), r15 -done_sleep: - /* reset standby mode to sleep mode */ - mov.l 5f, k4 - mov #0x00, k1 - mov.l k1, @k4 + /* restore sleep mode register */ + bsr restore_register + mov #SH_SLEEP_REG_STBCR, r0 - tst #SUSP_SH_SF, k0 + /* call self-refresh resume code if needed */ + mov.l @(SH_SLEEP_MODE, r5), r0 + tst #SUSP_SH_SF, r0 bt skip_restore_sf -#ifdef CONFIG_CPU_SUBTYPE_SH7724 - /* DBSC: put memory in auto-refresh mode */ - mov.l dbrfpdn0_reg, k4 - mov.l dbrfpdn0_data0, k1 - mov.l k1, @k4 - - nop /* sleep 140 ns */ - nop - nop - nop - - mov.l dbcmdcnt_reg, k4 - mov.l dbcmdcnt_data0, k1 - mov.l k1, @k4 - - mov.l dbcmdcnt_reg, k4 - mov.l dbcmdcnt_data1, k1 - mov.l k1, @k4 - - mov.l dben_reg, k4 - mov.l dben_data1, k1 - mov.l k1, @k4 - - mov.l dbrfpdn0_reg, k4 - mov.l dbrfpdn0_data2, k1 - mov.l k1, @k4 -#else - /* SBSC: set auto-refresh mode */ - mov.l 1f, k4 - mov.l @k4, k0 - mov.l 4f, k1 - and k1, k0 - mov.l k0, @k4 - mov.l 6f, k4 - mov.l 8f, k0 - mov.l @k4, k1 - mov #-1, k4 - add k4, k1 - or k1, k0 - mov.l 7f, k1 - mov.l k0, @k1 -#endif + mov.l @(SH_SLEEP_SF_POST, r5), r0 + jsr @r0 + nop + skip_restore_sf: - /* jump to vbr vector */ - mov.l saved_vbr, k0 - mov.l offset_vbr, k4 - add k4, k0 - jmp @k0 + /* restore mmu and cache state if needed */ + mov.l @(SH_SLEEP_MODE, r5), r0 + tst #SUSP_SH_MMU, r0 + bt skip_restore_mmu + + /* restore mmu state */ + bsr restore_register + mov #SH_SLEEP_REG_PTEH, r0 + + bsr restore_register + mov #SH_SLEEP_REG_PTEL, r0 + + bsr restore_register + mov #SH_SLEEP_REG_TTB, r0 + + bsr restore_register + mov #SH_SLEEP_REG_TEA, r0 + + bsr restore_register + mov #SH_SLEEP_REG_PTEA, r0 + + bsr restore_register + mov #SH_SLEEP_REG_PASCR, r0 + + bsr restore_register + mov #SH_SLEEP_REG_IRMCR, r0 + + bsr restore_register + mov #SH_SLEEP_REG_MMUCR, r0 + icbi @r0 + + /* restore cache settings */ + bsr restore_register + mov #SH_SLEEP_REG_RAMCR, r0 + icbi @r0 + + bsr restore_register + mov #SH_SLEEP_REG_CCR, r0 + icbi @r0 + +skip_restore_mmu: + rte nop - .balign 4 -saved_mode: .long 0 -saved_spc: .long 0 -saved_sr: .long 0 -saved_vbr: .long 0 -offset_vbr: .long 0x600 -#ifdef CONFIG_CPU_SUBTYPE_SH7724 -dben_reg: .long 0xfd000010 /* DBEN */ -dben_data0: .long 0 -dben_data1: .long 1 -dbrfpdn0_reg: .long 0xfd000040 /* DBRFPDN0 */ -dbrfpdn0_data0: .long 0 -dbrfpdn0_data1: .long 1 -dbrfpdn0_data2: .long 0x00010000 -dbcmdcnt_reg: .long 0xfd000014 /* DBCMDCNT */ -dbcmdcnt_data0: .long 2 -dbcmdcnt_data1: .long 4 -#else -1: .long 0xfe400008 /* SDCR0 */ -2: .long 0x00000400 -3: .long 0xffff7fff -4: .long 0xfffffbff -#endif -5: .long 0xa4150020 /* STBCR */ -6: .long 0xfe40001c /* RTCOR */ -7: .long 0xfe400018 /* RTCNT */ -8: .long 0xa55a0000 - - -/* interrupt vector @ 0x600 */ - .balign 0x400,0,0x400 - .long 0xdeadbeef - .balign 0x200,0,0x200 - bra restore_jump_vbr +restore_register: + add #SH_SLEEP_BASE_DATA, r0 + mov.l @(r0, r5), r1 + add #-SH_SLEEP_BASE_DATA, r0 + add #SH_SLEEP_BASE_ADDR, r0 + mov.l @(r0, r5), r0 + mov.l r1, @r0 + rts nop -sh_mobile_standby_end: -ENTRY(sh_mobile_standby_size) - .long sh_mobile_standby_end - sh_mobile_standby + .balign 4 +1: .long ~0x7ff +ENTRY(sh_mobile_sleep_resume_end) diff --git a/arch/sh/kernel/cpu/ubc.S b/arch/sh/kernel/cpu/ubc.S deleted file mode 100644 index 81923079fa1..00000000000 --- a/arch/sh/kernel/cpu/ubc.S +++ /dev/null @@ -1,59 +0,0 @@ -/* - * arch/sh/kernel/cpu/ubc.S - * - * Set of management routines for the User Break Controller (UBC) - * - * Copyright (C) 2002 Paul Mundt - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - */ -#include <linux/linkage.h> -#include <asm/ubc.h> - -#define STBCR2 0xffc00010 - -ENTRY(ubc_sleep) - mov #0, r0 - - mov.l 1f, r1 ! Zero out UBC_BBRA .. - mov.w r0, @r1 - - mov.l 2f, r1 ! .. same for BBRB .. - mov.w r0, @r1 - - mov.l 3f, r1 ! .. and again for BRCR. - mov.w r0, @r1 - - mov.w @r1, r0 ! Dummy read BRCR - - mov.l 4f, r1 ! Set MSTP5 in STBCR2 - mov.b @r1, r0 - or #0x01, r0 - mov.b r0, @r1 - - mov.b @r1, r0 ! Two dummy reads .. - mov.b @r1, r0 - - rts - nop - -ENTRY(ubc_wakeup) - mov.l 4f, r1 ! Clear MSTP5 - mov.b @r1, r0 - and #0xfe, r0 - mov.b r0, @r1 - - mov.b @r1, r0 ! Two more dummy reads .. - mov.b @r1, r0 - - rts - nop - -1: .long UBC_BBRA -2: .long UBC_BBRB -3: .long UBC_BRCR -4: .long STBCR2 - diff --git a/arch/sh/kernel/dma-nommu.c b/arch/sh/kernel/dma-nommu.c new file mode 100644 index 00000000000..3c55b87f8b6 --- /dev/null +++ b/arch/sh/kernel/dma-nommu.c @@ -0,0 +1,82 @@ +/* + * DMA mapping support for platforms lacking IOMMUs. + * + * Copyright (C) 2009 Paul Mundt + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + */ +#include <linux/dma-mapping.h> +#include <linux/io.h> + +static dma_addr_t nommu_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, + enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + dma_addr_t addr = page_to_phys(page) + offset; + + WARN_ON(size == 0); + dma_cache_sync(dev, page_address(page) + offset, size, dir); + + return addr; +} + +static int nommu_map_sg(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + struct scatterlist *s; + int i; + + WARN_ON(nents == 0 || sg[0].length == 0); + + for_each_sg(sg, s, nents, i) { + BUG_ON(!sg_page(s)); + + dma_cache_sync(dev, sg_virt(s), s->length, dir); + + s->dma_address = sg_phys(s); + s->dma_length = s->length; + } + + return nents; +} + +#ifdef CONFIG_DMA_NONCOHERENT +static void nommu_sync_single(struct device *dev, dma_addr_t addr, + size_t size, enum dma_data_direction dir) +{ + dma_cache_sync(dev, phys_to_virt(addr), size, dir); +} + +static void nommu_sync_sg(struct device *dev, struct scatterlist *sg, + int nelems, enum dma_data_direction dir) +{ + struct scatterlist *s; + int i; + + for_each_sg(sg, s, nelems, i) + dma_cache_sync(dev, sg_virt(s), s->length, dir); +} +#endif + +struct dma_map_ops nommu_dma_ops = { + .alloc_coherent = dma_generic_alloc_coherent, + .free_coherent = dma_generic_free_coherent, + .map_page = nommu_map_page, + .map_sg = nommu_map_sg, +#ifdef CONFIG_DMA_NONCOHERENT + .sync_single_for_device = nommu_sync_single, + .sync_sg_for_device = nommu_sync_sg, +#endif + .is_phys = 1, +}; + +void __init no_iommu_init(void) +{ + if (dma_ops) + return; + dma_ops = &nommu_dma_ops; +} diff --git a/arch/sh/kernel/dwarf.c b/arch/sh/kernel/dwarf.c index d76a23170db..3576b709f05 100644 --- a/arch/sh/kernel/dwarf.c +++ b/arch/sh/kernel/dwarf.c @@ -20,6 +20,7 @@ #include <linux/list.h> #include <linux/mempool.h> #include <linux/mm.h> +#include <linux/elf.h> #include <linux/ftrace.h> #include <asm/dwarf.h> #include <asm/unwinder.h> @@ -530,7 +531,18 @@ static int dwarf_cfa_execute_insns(unsigned char *insn_start, } /** - * dwarf_unwind_stack - recursively unwind the stack + * dwarf_free_frame - free the memory allocated for @frame + * @frame: the frame to free + */ +void dwarf_free_frame(struct dwarf_frame *frame) +{ + dwarf_frame_free_regs(frame); + mempool_free(frame, dwarf_frame_pool); +} + +/** + * dwarf_unwind_stack - unwind the stack + * * @pc: address of the function to unwind * @prev: struct dwarf_frame of the previous stackframe on the callstack * @@ -548,9 +560,9 @@ struct dwarf_frame * dwarf_unwind_stack(unsigned long pc, unsigned long addr; /* - * If this is the first invocation of this recursive function we - * need get the contents of a physical register to get the CFA - * in order to begin the virtual unwinding of the stack. + * If we're starting at the top of the stack we need get the + * contents of a physical register to get the CFA in order to + * begin the virtual unwinding of the stack. * * NOTE: the return address is guaranteed to be setup by the * time this function makes its first function call. @@ -593,9 +605,8 @@ struct dwarf_frame * dwarf_unwind_stack(unsigned long pc, fde = dwarf_lookup_fde(pc); if (!fde) { /* - * This is our normal exit path - the one that stops the - * recursion. There's two reasons why we might exit - * here, + * This is our normal exit path. There are two reasons + * why we might exit here, * * a) pc has no asscociated DWARF frame info and so * we don't know how to unwind this frame. This is @@ -637,10 +648,10 @@ struct dwarf_frame * dwarf_unwind_stack(unsigned long pc, } else { /* - * Again, this is the first invocation of this - * recurisve function. We need to physically - * read the contents of a register in order to - * get the Canonical Frame Address for this + * Again, we're starting from the top of the + * stack. We need to physically read + * the contents of a register in order to get + * the Canonical Frame Address for this * function. */ frame->cfa = dwarf_read_arch_reg(frame->cfa_register); @@ -670,13 +681,12 @@ struct dwarf_frame * dwarf_unwind_stack(unsigned long pc, return frame; bail: - dwarf_frame_free_regs(frame); - mempool_free(frame, dwarf_frame_pool); + dwarf_free_frame(frame); return NULL; } static int dwarf_parse_cie(void *entry, void *p, unsigned long len, - unsigned char *end) + unsigned char *end, struct module *mod) { struct dwarf_cie *cie; unsigned long flags; @@ -772,6 +782,8 @@ static int dwarf_parse_cie(void *entry, void *p, unsigned long len, cie->initial_instructions = p; cie->instructions_end = end; + cie->mod = mod; + /* Add to list */ spin_lock_irqsave(&dwarf_cie_lock, flags); list_add_tail(&cie->link, &dwarf_cie_list); @@ -782,7 +794,7 @@ static int dwarf_parse_cie(void *entry, void *p, unsigned long len, static int dwarf_parse_fde(void *entry, u32 entry_type, void *start, unsigned long len, - unsigned char *end) + unsigned char *end, struct module *mod) { struct dwarf_fde *fde; struct dwarf_cie *cie; @@ -831,6 +843,8 @@ static int dwarf_parse_fde(void *entry, u32 entry_type, fde->instructions = p; fde->end = end; + fde->mod = mod; + /* Add to list. */ spin_lock_irqsave(&dwarf_fde_lock, flags); list_add_tail(&fde->link, &dwarf_fde_list); @@ -854,10 +868,8 @@ static void dwarf_unwinder_dump(struct task_struct *task, while (1) { frame = dwarf_unwind_stack(return_addr, _frame); - if (_frame) { - dwarf_frame_free_regs(_frame); - mempool_free(_frame, dwarf_frame_pool); - } + if (_frame) + dwarf_free_frame(_frame); _frame = frame; @@ -867,6 +879,9 @@ static void dwarf_unwinder_dump(struct task_struct *task, return_addr = frame->return_addr; ops->address(data, return_addr, 1); } + + if (frame) + dwarf_free_frame(frame); } static struct unwinder dwarf_unwinder = { @@ -896,48 +911,28 @@ static void dwarf_unwinder_cleanup(void) } /** - * dwarf_unwinder_init - initialise the dwarf unwinder + * dwarf_parse_section - parse DWARF section + * @eh_frame_start: start address of the .eh_frame section + * @eh_frame_end: end address of the .eh_frame section + * @mod: the kernel module containing the .eh_frame section * - * Build the data structures describing the .dwarf_frame section to - * make it easier to lookup CIE and FDE entries. Because the - * .eh_frame section is packed as tightly as possible it is not - * easy to lookup the FDE for a given PC, so we build a list of FDE - * and CIE entries that make it easier. + * Parse the information in a .eh_frame section. */ -static int __init dwarf_unwinder_init(void) +static int dwarf_parse_section(char *eh_frame_start, char *eh_frame_end, + struct module *mod) { u32 entry_type; void *p, *entry; int count, err = 0; - unsigned long len; + unsigned long len = 0; unsigned int c_entries, f_entries; unsigned char *end; - INIT_LIST_HEAD(&dwarf_cie_list); - INIT_LIST_HEAD(&dwarf_fde_list); c_entries = 0; f_entries = 0; - entry = &__start_eh_frame; - - dwarf_frame_cachep = kmem_cache_create("dwarf_frames", - sizeof(struct dwarf_frame), 0, - SLAB_PANIC | SLAB_HWCACHE_ALIGN | SLAB_NOTRACK, NULL); - - dwarf_reg_cachep = kmem_cache_create("dwarf_regs", - sizeof(struct dwarf_reg), 0, - SLAB_PANIC | SLAB_HWCACHE_ALIGN | SLAB_NOTRACK, NULL); + entry = eh_frame_start; - dwarf_frame_pool = mempool_create(DWARF_FRAME_MIN_REQ, - mempool_alloc_slab, - mempool_free_slab, - dwarf_frame_cachep); - - dwarf_reg_pool = mempool_create(DWARF_REG_MIN_REQ, - mempool_alloc_slab, - mempool_free_slab, - dwarf_reg_cachep); - - while ((char *)entry < __stop_eh_frame) { + while ((char *)entry < eh_frame_end) { p = entry; count = dwarf_entry_len(p, &len); @@ -949,6 +944,7 @@ static int __init dwarf_unwinder_init(void) * entry and move to the next one because 'len' * tells us where our next entry is. */ + err = -EINVAL; goto out; } else p += count; @@ -960,13 +956,14 @@ static int __init dwarf_unwinder_init(void) p += 4; if (entry_type == DW_EH_FRAME_CIE) { - err = dwarf_parse_cie(entry, p, len, end); + err = dwarf_parse_cie(entry, p, len, end, mod); if (err < 0) goto out; else c_entries++; } else { - err = dwarf_parse_fde(entry, entry_type, p, len, end); + err = dwarf_parse_fde(entry, entry_type, p, len, + end, mod); if (err < 0) goto out; else @@ -979,6 +976,129 @@ static int __init dwarf_unwinder_init(void) printk(KERN_INFO "DWARF unwinder initialised: read %u CIEs, %u FDEs\n", c_entries, f_entries); + return 0; + +out: + return err; +} + +#ifdef CONFIG_MODULES +int module_dwarf_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, + struct module *me) +{ + unsigned int i, err; + unsigned long start, end; + char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; + + start = end = 0; + + for (i = 1; i < hdr->e_shnum; i++) { + /* Alloc bit cleared means "ignore it." */ + if ((sechdrs[i].sh_flags & SHF_ALLOC) + && !strcmp(secstrings+sechdrs[i].sh_name, ".eh_frame")) { + start = sechdrs[i].sh_addr; + end = start + sechdrs[i].sh_size; + break; + } + } + + /* Did we find the .eh_frame section? */ + if (i != hdr->e_shnum) { + err = dwarf_parse_section((char *)start, (char *)end, me); + if (err) { + printk(KERN_WARNING "%s: failed to parse DWARF info\n", + me->name); + return err; + } + } + + return 0; +} + +/** + * module_dwarf_cleanup - remove FDE/CIEs associated with @mod + * @mod: the module that is being unloaded + * + * Remove any FDEs and CIEs from the global lists that came from + * @mod's .eh_frame section because @mod is being unloaded. + */ +void module_dwarf_cleanup(struct module *mod) +{ + struct dwarf_fde *fde; + struct dwarf_cie *cie; + unsigned long flags; + + spin_lock_irqsave(&dwarf_cie_lock, flags); + +again_cie: + list_for_each_entry(cie, &dwarf_cie_list, link) { + if (cie->mod == mod) + break; + } + + if (&cie->link != &dwarf_cie_list) { + list_del(&cie->link); + kfree(cie); + goto again_cie; + } + + spin_unlock_irqrestore(&dwarf_cie_lock, flags); + + spin_lock_irqsave(&dwarf_fde_lock, flags); + +again_fde: + list_for_each_entry(fde, &dwarf_fde_list, link) { + if (fde->mod == mod) + break; + } + + if (&fde->link != &dwarf_fde_list) { + list_del(&fde->link); + kfree(fde); + goto again_fde; + } + + spin_unlock_irqrestore(&dwarf_fde_lock, flags); +} +#endif /* CONFIG_MODULES */ + +/** + * dwarf_unwinder_init - initialise the dwarf unwinder + * + * Build the data structures describing the .dwarf_frame section to + * make it easier to lookup CIE and FDE entries. Because the + * .eh_frame section is packed as tightly as possible it is not + * easy to lookup the FDE for a given PC, so we build a list of FDE + * and CIE entries that make it easier. + */ +static int __init dwarf_unwinder_init(void) +{ + int err; + INIT_LIST_HEAD(&dwarf_cie_list); + INIT_LIST_HEAD(&dwarf_fde_list); + + dwarf_frame_cachep = kmem_cache_create("dwarf_frames", + sizeof(struct dwarf_frame), 0, + SLAB_PANIC | SLAB_HWCACHE_ALIGN | SLAB_NOTRACK, NULL); + + dwarf_reg_cachep = kmem_cache_create("dwarf_regs", + sizeof(struct dwarf_reg), 0, + SLAB_PANIC | SLAB_HWCACHE_ALIGN | SLAB_NOTRACK, NULL); + + dwarf_frame_pool = mempool_create(DWARF_FRAME_MIN_REQ, + mempool_alloc_slab, + mempool_free_slab, + dwarf_frame_cachep); + + dwarf_reg_pool = mempool_create(DWARF_REG_MIN_REQ, + mempool_alloc_slab, + mempool_free_slab, + dwarf_reg_cachep); + + err = dwarf_parse_section(__start_eh_frame, __stop_eh_frame, NULL); + if (err) + goto out; + err = unwinder_register(&dwarf_unwinder); if (err) goto out; diff --git a/arch/sh/kernel/entry-common.S b/arch/sh/kernel/entry-common.S index 3eb84931d2a..f0abd58c3a6 100644 --- a/arch/sh/kernel/entry-common.S +++ b/arch/sh/kernel/entry-common.S @@ -133,7 +133,7 @@ work_pending: ! r8: current_thread_info ! t: result of "tst #_TIF_NEED_RESCHED, r0" bf/s work_resched - tst #(_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK), r0 + tst #_TIF_SIGPENDING, r0 work_notifysig: bt/s __restore_all mov r15, r4 diff --git a/arch/sh/kernel/ftrace.c b/arch/sh/kernel/ftrace.c index 2c48e267256..b6f41c109be 100644 --- a/arch/sh/kernel/ftrace.c +++ b/arch/sh/kernel/ftrace.c @@ -62,6 +62,150 @@ static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr) return ftrace_replaced_code; } +/* + * Modifying code must take extra care. On an SMP machine, if + * the code being modified is also being executed on another CPU + * that CPU will have undefined results and possibly take a GPF. + * We use kstop_machine to stop other CPUS from exectuing code. + * But this does not stop NMIs from happening. We still need + * to protect against that. We separate out the modification of + * the code to take care of this. + * + * Two buffers are added: An IP buffer and a "code" buffer. + * + * 1) Put the instruction pointer into the IP buffer + * and the new code into the "code" buffer. + * 2) Wait for any running NMIs to finish and set a flag that says + * we are modifying code, it is done in an atomic operation. + * 3) Write the code + * 4) clear the flag. + * 5) Wait for any running NMIs to finish. + * + * If an NMI is executed, the first thing it does is to call + * "ftrace_nmi_enter". This will check if the flag is set to write + * and if it is, it will write what is in the IP and "code" buffers. + * + * The trick is, it does not matter if everyone is writing the same + * content to the code location. Also, if a CPU is executing code + * it is OK to write to that code location if the contents being written + * are the same as what exists. + */ +#define MOD_CODE_WRITE_FLAG (1 << 31) /* set when NMI should do the write */ +static atomic_t nmi_running = ATOMIC_INIT(0); +static int mod_code_status; /* holds return value of text write */ +static void *mod_code_ip; /* holds the IP to write to */ +static void *mod_code_newcode; /* holds the text to write to the IP */ + +static unsigned nmi_wait_count; +static atomic_t nmi_update_count = ATOMIC_INIT(0); + +int ftrace_arch_read_dyn_info(char *buf, int size) +{ + int r; + + r = snprintf(buf, size, "%u %u", + nmi_wait_count, + atomic_read(&nmi_update_count)); + return r; +} + +static void clear_mod_flag(void) +{ + int old = atomic_read(&nmi_running); + + for (;;) { + int new = old & ~MOD_CODE_WRITE_FLAG; + + if (old == new) + break; + + old = atomic_cmpxchg(&nmi_running, old, new); + } +} + +static void ftrace_mod_code(void) +{ + /* + * Yes, more than one CPU process can be writing to mod_code_status. + * (and the code itself) + * But if one were to fail, then they all should, and if one were + * to succeed, then they all should. + */ + mod_code_status = probe_kernel_write(mod_code_ip, mod_code_newcode, + MCOUNT_INSN_SIZE); + + /* if we fail, then kill any new writers */ + if (mod_code_status) + clear_mod_flag(); +} + +void ftrace_nmi_enter(void) +{ + if (atomic_inc_return(&nmi_running) & MOD_CODE_WRITE_FLAG) { + smp_rmb(); + ftrace_mod_code(); + atomic_inc(&nmi_update_count); + } + /* Must have previous changes seen before executions */ + smp_mb(); +} + +void ftrace_nmi_exit(void) +{ + /* Finish all executions before clearing nmi_running */ + smp_mb(); + atomic_dec(&nmi_running); +} + +static void wait_for_nmi_and_set_mod_flag(void) +{ + if (!atomic_cmpxchg(&nmi_running, 0, MOD_CODE_WRITE_FLAG)) + return; + + do { + cpu_relax(); + } while (atomic_cmpxchg(&nmi_running, 0, MOD_CODE_WRITE_FLAG)); + + nmi_wait_count++; +} + +static void wait_for_nmi(void) +{ + if (!atomic_read(&nmi_running)) + return; + + do { + cpu_relax(); + } while (atomic_read(&nmi_running)); + + nmi_wait_count++; +} + +static int +do_ftrace_mod_code(unsigned long ip, void *new_code) +{ + mod_code_ip = (void *)ip; + mod_code_newcode = new_code; + + /* The buffers need to be visible before we let NMIs write them */ + smp_mb(); + + wait_for_nmi_and_set_mod_flag(); + + /* Make sure all running NMIs have finished before we write the code */ + smp_mb(); + + ftrace_mod_code(); + + /* Make sure the write happens before clearing the bit */ + smp_mb(); + + clear_mod_flag(); + wait_for_nmi(); + + return mod_code_status; +} + static int ftrace_modify_code(unsigned long ip, unsigned char *old_code, unsigned char *new_code) { @@ -86,7 +230,7 @@ static int ftrace_modify_code(unsigned long ip, unsigned char *old_code, return -EINVAL; /* replace the text with the new text */ - if (probe_kernel_write((void *)ip, new_code, MCOUNT_INSN_SIZE)) + if (do_ftrace_mod_code(ip, new_code)) return -EPERM; flush_icache_range(ip, ip + MCOUNT_INSN_SIZE); diff --git a/arch/sh/kernel/gpio.c b/arch/sh/kernel/gpio.c deleted file mode 100644 index d22e5af699f..00000000000 --- a/arch/sh/kernel/gpio.c +++ /dev/null @@ -1,584 +0,0 @@ -/* - * Pinmuxed GPIO support for SuperH. - * - * Copyright (C) 2008 Magnus Damm - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - */ - -#include <linux/errno.h> -#include <linux/kernel.h> -#include <linux/list.h> -#include <linux/module.h> -#include <linux/clk.h> -#include <linux/err.h> -#include <linux/io.h> -#include <linux/irq.h> -#include <linux/bitops.h> -#include <linux/gpio.h> - -static int enum_in_range(pinmux_enum_t enum_id, struct pinmux_range *r) -{ - if (enum_id < r->begin) - return 0; - - if (enum_id > r->end) - return 0; - - return 1; -} - -static unsigned long gpio_read_raw_reg(unsigned long reg, - unsigned long reg_width) -{ - switch (reg_width) { - case 8: - return ctrl_inb(reg); - case 16: - return ctrl_inw(reg); - case 32: - return ctrl_inl(reg); - } - - BUG(); - return 0; -} - -static void gpio_write_raw_reg(unsigned long reg, - unsigned long reg_width, - unsigned long data) -{ - switch (reg_width) { - case 8: - ctrl_outb(data, reg); - return; - case 16: - ctrl_outw(data, reg); - return; - case 32: - ctrl_outl(data, reg); - return; - } - - BUG(); -} - -static void gpio_write_bit(struct pinmux_data_reg *dr, - unsigned long in_pos, unsigned long value) -{ - unsigned long pos; - - pos = dr->reg_width - (in_pos + 1); - -#ifdef DEBUG - pr_info("write_bit addr = %lx, value = %ld, pos = %ld, " - "r_width = %ld\n", - dr->reg, !!value, pos, dr->reg_width); -#endif - - if (value) - set_bit(pos, &dr->reg_shadow); - else - clear_bit(pos, &dr->reg_shadow); - - gpio_write_raw_reg(dr->reg, dr->reg_width, dr->reg_shadow); -} - -static int gpio_read_reg(unsigned long reg, unsigned long reg_width, - unsigned long field_width, unsigned long in_pos) -{ - unsigned long data, mask, pos; - - data = 0; - mask = (1 << field_width) - 1; - pos = reg_width - ((in_pos + 1) * field_width); - -#ifdef DEBUG - pr_info("read_reg: addr = %lx, pos = %ld, " - "r_width = %ld, f_width = %ld\n", - reg, pos, reg_width, field_width); -#endif - - data = gpio_read_raw_reg(reg, reg_width); - return (data >> pos) & mask; -} - -static void gpio_write_reg(unsigned long reg, unsigned long reg_width, - unsigned long field_width, unsigned long in_pos, - unsigned long value) -{ - unsigned long mask, pos; - - mask = (1 << field_width) - 1; - pos = reg_width - ((in_pos + 1) * field_width); - -#ifdef DEBUG - pr_info("write_reg addr = %lx, value = %ld, pos = %ld, " - "r_width = %ld, f_width = %ld\n", - reg, value, pos, reg_width, field_width); -#endif - - mask = ~(mask << pos); - value = value << pos; - - switch (reg_width) { - case 8: - ctrl_outb((ctrl_inb(reg) & mask) | value, reg); - break; - case 16: - ctrl_outw((ctrl_inw(reg) & mask) | value, reg); - break; - case 32: - ctrl_outl((ctrl_inl(reg) & mask) | value, reg); - break; - } -} - -static int setup_data_reg(struct pinmux_info *gpioc, unsigned gpio) -{ - struct pinmux_gpio *gpiop = &gpioc->gpios[gpio]; - struct pinmux_data_reg *data_reg; - int k, n; - - if (!enum_in_range(gpiop->enum_id, &gpioc->data)) - return -1; - - k = 0; - while (1) { - data_reg = gpioc->data_regs + k; - - if (!data_reg->reg_width) - break; - - for (n = 0; n < data_reg->reg_width; n++) { - if (data_reg->enum_ids[n] == gpiop->enum_id) { - gpiop->flags &= ~PINMUX_FLAG_DREG; - gpiop->flags |= (k << PINMUX_FLAG_DREG_SHIFT); - gpiop->flags &= ~PINMUX_FLAG_DBIT; - gpiop->flags |= (n << PINMUX_FLAG_DBIT_SHIFT); - return 0; - } - } - k++; - } - - BUG(); - - return -1; -} - -static void setup_data_regs(struct pinmux_info *gpioc) -{ - struct pinmux_data_reg *drp; - int k; - - for (k = gpioc->first_gpio; k <= gpioc->last_gpio; k++) - setup_data_reg(gpioc, k); - - k = 0; - while (1) { - drp = gpioc->data_regs + k; - - if (!drp->reg_width) - break; - - drp->reg_shadow = gpio_read_raw_reg(drp->reg, drp->reg_width); - k++; - } -} - -static int get_data_reg(struct pinmux_info *gpioc, unsigned gpio, - struct pinmux_data_reg **drp, int *bitp) -{ - struct pinmux_gpio *gpiop = &gpioc->gpios[gpio]; - int k, n; - - if (!enum_in_range(gpiop->enum_id, &gpioc->data)) - return -1; - - k = (gpiop->flags & PINMUX_FLAG_DREG) >> PINMUX_FLAG_DREG_SHIFT; - n = (gpiop->flags & PINMUX_FLAG_DBIT) >> PINMUX_FLAG_DBIT_SHIFT; - *drp = gpioc->data_regs + k; - *bitp = n; - return 0; -} - -static int get_config_reg(struct pinmux_info *gpioc, pinmux_enum_t enum_id, - struct pinmux_cfg_reg **crp, int *indexp, - unsigned long **cntp) -{ - struct pinmux_cfg_reg *config_reg; - unsigned long r_width, f_width; - int k, n; - - k = 0; - while (1) { - config_reg = gpioc->cfg_regs + k; - - r_width = config_reg->reg_width; - f_width = config_reg->field_width; - - if (!r_width) - break; - for (n = 0; n < (r_width / f_width) * 1 << f_width; n++) { - if (config_reg->enum_ids[n] == enum_id) { - *crp = config_reg; - *indexp = n; - *cntp = &config_reg->cnt[n / (1 << f_width)]; - return 0; - } - } - k++; - } - - return -1; -} - -static int get_gpio_enum_id(struct pinmux_info *gpioc, unsigned gpio, - int pos, pinmux_enum_t *enum_idp) -{ - pinmux_enum_t enum_id = gpioc->gpios[gpio].enum_id; - pinmux_enum_t *data = gpioc->gpio_data; - int k; - - if (!enum_in_range(enum_id, &gpioc->data)) { - if (!enum_in_range(enum_id, &gpioc->mark)) { - pr_err("non data/mark enum_id for gpio %d\n", gpio); - return -1; - } - } - - if (pos) { - *enum_idp = data[pos + 1]; - return pos + 1; - } - - for (k = 0; k < gpioc->gpio_data_size; k++) { - if (data[k] == enum_id) { - *enum_idp = data[k + 1]; - return k + 1; - } - } - - pr_err("cannot locate data/mark enum_id for gpio %d\n", gpio); - return -1; -} - -static void write_config_reg(struct pinmux_info *gpioc, - struct pinmux_cfg_reg *crp, - int index) -{ - unsigned long ncomb, pos, value; - - ncomb = 1 << crp->field_width; - pos = index / ncomb; - value = index % ncomb; - - gpio_write_reg(crp->reg, crp->reg_width, crp->field_width, pos, value); -} - -static int check_config_reg(struct pinmux_info *gpioc, - struct pinmux_cfg_reg *crp, - int index) -{ - unsigned long ncomb, pos, value; - - ncomb = 1 << crp->field_width; - pos = index / ncomb; - value = index % ncomb; - - if (gpio_read_reg(crp->reg, crp->reg_width, - crp->field_width, pos) == value) - return 0; - - return -1; -} - -enum { GPIO_CFG_DRYRUN, GPIO_CFG_REQ, GPIO_CFG_FREE }; - -static int pinmux_config_gpio(struct pinmux_info *gpioc, unsigned gpio, - int pinmux_type, int cfg_mode) -{ - struct pinmux_cfg_reg *cr = NULL; - pinmux_enum_t enum_id; - struct pinmux_range *range; - int in_range, pos, index; - unsigned long *cntp; - - switch (pinmux_type) { - - case PINMUX_TYPE_FUNCTION: - range = NULL; - break; - - case PINMUX_TYPE_OUTPUT: - range = &gpioc->output; - break; - - case PINMUX_TYPE_INPUT: - range = &gpioc->input; - break; - - case PINMUX_TYPE_INPUT_PULLUP: - range = &gpioc->input_pu; - break; - - case PINMUX_TYPE_INPUT_PULLDOWN: - range = &gpioc->input_pd; - break; - - default: - goto out_err; - } - - pos = 0; - enum_id = 0; - index = 0; - while (1) { - pos = get_gpio_enum_id(gpioc, gpio, pos, &enum_id); - if (pos <= 0) - goto out_err; - - if (!enum_id) - break; - - in_range = enum_in_range(enum_id, &gpioc->function); - if (!in_range && range) { - in_range = enum_in_range(enum_id, range); - - if (in_range && enum_id == range->force) - continue; - } - - if (!in_range) - continue; - - if (get_config_reg(gpioc, enum_id, &cr, &index, &cntp) != 0) - goto out_err; - - switch (cfg_mode) { - case GPIO_CFG_DRYRUN: - if (!*cntp || !check_config_reg(gpioc, cr, index)) - continue; - break; - - case GPIO_CFG_REQ: - write_config_reg(gpioc, cr, index); - *cntp = *cntp + 1; - break; - - case GPIO_CFG_FREE: - *cntp = *cntp - 1; - break; - } - } - - return 0; - out_err: - return -1; -} - -static DEFINE_SPINLOCK(gpio_lock); - -static struct pinmux_info *chip_to_pinmux(struct gpio_chip *chip) -{ - return container_of(chip, struct pinmux_info, chip); -} - -static int sh_gpio_request(struct gpio_chip *chip, unsigned offset) -{ - struct pinmux_info *gpioc = chip_to_pinmux(chip); - struct pinmux_data_reg *dummy; - unsigned long flags; - int i, ret, pinmux_type; - - ret = -EINVAL; - - if (!gpioc) - goto err_out; - - spin_lock_irqsave(&gpio_lock, flags); - - if ((gpioc->gpios[offset].flags & PINMUX_FLAG_TYPE) != PINMUX_TYPE_NONE) - goto err_unlock; - - /* setup pin function here if no data is associated with pin */ - - if (get_data_reg(gpioc, offset, &dummy, &i) != 0) - pinmux_type = PINMUX_TYPE_FUNCTION; - else - pinmux_type = PINMUX_TYPE_GPIO; - - if (pinmux_type == PINMUX_TYPE_FUNCTION) { - if (pinmux_config_gpio(gpioc, offset, - pinmux_type, - GPIO_CFG_DRYRUN) != 0) - goto err_unlock; - - if (pinmux_config_gpio(gpioc, offset, - pinmux_type, - GPIO_CFG_REQ) != 0) - BUG(); - } - - gpioc->gpios[offset].flags &= ~PINMUX_FLAG_TYPE; - gpioc->gpios[offset].flags |= pinmux_type; - - ret = 0; - err_unlock: - spin_unlock_irqrestore(&gpio_lock, flags); - err_out: - return ret; -} - -static void sh_gpio_free(struct gpio_chip *chip, unsigned offset) -{ - struct pinmux_info *gpioc = chip_to_pinmux(chip); - unsigned long flags; - int pinmux_type; - - if (!gpioc) - return; - - spin_lock_irqsave(&gpio_lock, flags); - - pinmux_type = gpioc->gpios[offset].flags & PINMUX_FLAG_TYPE; - pinmux_config_gpio(gpioc, offset, pinmux_type, GPIO_CFG_FREE); - gpioc->gpios[offset].flags &= ~PINMUX_FLAG_TYPE; - gpioc->gpios[offset].flags |= PINMUX_TYPE_NONE; - - spin_unlock_irqrestore(&gpio_lock, flags); -} - -static int pinmux_direction(struct pinmux_info *gpioc, - unsigned gpio, int new_pinmux_type) -{ - int pinmux_type; - int ret = -EINVAL; - - if (!gpioc) - goto err_out; - - pinmux_type = gpioc->gpios[gpio].flags & PINMUX_FLAG_TYPE; - - switch (pinmux_type) { - case PINMUX_TYPE_GPIO: - break; - case PINMUX_TYPE_OUTPUT: - case PINMUX_TYPE_INPUT: - case PINMUX_TYPE_INPUT_PULLUP: - case PINMUX_TYPE_INPUT_PULLDOWN: - pinmux_config_gpio(gpioc, gpio, pinmux_type, GPIO_CFG_FREE); - break; - default: - goto err_out; - } - - if (pinmux_config_gpio(gpioc, gpio, - new_pinmux_type, - GPIO_CFG_DRYRUN) != 0) - goto err_out; - - if (pinmux_config_gpio(gpioc, gpio, - new_pinmux_type, - GPIO_CFG_REQ) != 0) - BUG(); - - gpioc->gpios[gpio].flags &= ~PINMUX_FLAG_TYPE; - gpioc->gpios[gpio].flags |= new_pinmux_type; - - ret = 0; - err_out: - return ret; -} - -static int sh_gpio_direction_input(struct gpio_chip *chip, unsigned offset) -{ - struct pinmux_info *gpioc = chip_to_pinmux(chip); - unsigned long flags; - int ret; - - spin_lock_irqsave(&gpio_lock, flags); - ret = pinmux_direction(gpioc, offset, PINMUX_TYPE_INPUT); - spin_unlock_irqrestore(&gpio_lock, flags); - - return ret; -} - -static void sh_gpio_set_value(struct pinmux_info *gpioc, - unsigned gpio, int value) -{ - struct pinmux_data_reg *dr = NULL; - int bit = 0; - - if (!gpioc || get_data_reg(gpioc, gpio, &dr, &bit) != 0) - BUG(); - else - gpio_write_bit(dr, bit, value); -} - -static int sh_gpio_direction_output(struct gpio_chip *chip, unsigned offset, - int value) -{ - struct pinmux_info *gpioc = chip_to_pinmux(chip); - unsigned long flags; - int ret; - - sh_gpio_set_value(gpioc, offset, value); - spin_lock_irqsave(&gpio_lock, flags); - ret = pinmux_direction(gpioc, offset, PINMUX_TYPE_OUTPUT); - spin_unlock_irqrestore(&gpio_lock, flags); - - return ret; -} - -static int sh_gpio_get_value(struct pinmux_info *gpioc, unsigned gpio) -{ - struct pinmux_data_reg *dr = NULL; - int bit = 0; - - if (!gpioc || get_data_reg(gpioc, gpio, &dr, &bit) != 0) { - BUG(); - return 0; - } - - return gpio_read_reg(dr->reg, dr->reg_width, 1, bit); -} - -static int sh_gpio_get(struct gpio_chip *chip, unsigned offset) -{ - return sh_gpio_get_value(chip_to_pinmux(chip), offset); -} - -static void sh_gpio_set(struct gpio_chip *chip, unsigned offset, int value) -{ - sh_gpio_set_value(chip_to_pinmux(chip), offset, value); -} - -int register_pinmux(struct pinmux_info *pip) -{ - struct gpio_chip *chip = &pip->chip; - - pr_info("sh pinmux: %s handling gpio %d -> %d\n", - pip->name, pip->first_gpio, pip->last_gpio); - - setup_data_regs(pip); - - chip->request = sh_gpio_request; - chip->free = sh_gpio_free; - chip->direction_input = sh_gpio_direction_input; - chip->get = sh_gpio_get; - chip->direction_output = sh_gpio_direction_output; - chip->set = sh_gpio_set; - - WARN_ON(pip->first_gpio != 0); /* needs testing */ - - chip->label = pip->name; - chip->owner = THIS_MODULE; - chip->base = pip->first_gpio; - chip->ngpio = (pip->last_gpio - pip->first_gpio) + 1; - - return gpiochip_add(chip); -} diff --git a/arch/sh/kernel/head_32.S b/arch/sh/kernel/head_32.S index a78be74b8d3..1151ecdffa7 100644 --- a/arch/sh/kernel/head_32.S +++ b/arch/sh/kernel/head_32.S @@ -33,7 +33,7 @@ ENTRY(empty_zero_page) .long 1 /* LOADER_TYPE */ .long 0x00000000 /* INITRD_START */ .long 0x00000000 /* INITRD_SIZE */ -#ifdef CONFIG_32BIT +#if defined(CONFIG_32BIT) && defined(CONFIG_PMB_FIXED) .long 0x53453f00 + 32 /* "SE?" = 32 bit */ #else .long 0x53453f00 + 29 /* "SE?" = 29 bit */ diff --git a/arch/sh/kernel/idle.c b/arch/sh/kernel/idle.c index 27ff2dc093c..aaff0037fcd 100644 --- a/arch/sh/kernel/idle.c +++ b/arch/sh/kernel/idle.c @@ -21,7 +21,7 @@ #include <asm/atomic.h> static int hlt_counter; -void (*pm_idle)(void); +void (*pm_idle)(void) = NULL; void (*pm_power_off)(void); EXPORT_SYMBOL(pm_power_off); @@ -39,48 +39,92 @@ static int __init hlt_setup(char *__unused) } __setup("hlt", hlt_setup); +static inline int hlt_works(void) +{ + return !hlt_counter; +} + +/* + * On SMP it's slightly faster (but much more power-consuming!) + * to poll the ->work.need_resched flag instead of waiting for the + * cross-CPU IPI to arrive. Use this option with caution. + */ +static void poll_idle(void) +{ + local_irq_enable(); + while (!need_resched()) + cpu_relax(); +} + void default_idle(void) { - if (!hlt_counter) { + if (hlt_works()) { clear_thread_flag(TIF_POLLING_NRFLAG); smp_mb__after_clear_bit(); - set_bl_bit(); - stop_critical_timings(); - while (!need_resched()) + if (!need_resched()) { + local_irq_enable(); cpu_sleep(); + } else + local_irq_enable(); - start_critical_timings(); - clear_bl_bit(); set_thread_flag(TIF_POLLING_NRFLAG); } else - while (!need_resched()) - cpu_relax(); + poll_idle(); } +/* + * The idle thread. There's no useful work to be done, so just try to conserve + * power and have a low exit latency (ie sit in a loop waiting for somebody to + * say that they'd like to reschedule) + */ void cpu_idle(void) { + unsigned int cpu = smp_processor_id(); + set_thread_flag(TIF_POLLING_NRFLAG); /* endless idle loop with no priority at all */ while (1) { - void (*idle)(void) = pm_idle; + tick_nohz_stop_sched_tick(1); - if (!idle) - idle = default_idle; + while (!need_resched() && cpu_online(cpu)) { + check_pgt_cache(); + rmb(); - tick_nohz_stop_sched_tick(1); - while (!need_resched()) - idle(); - tick_nohz_restart_sched_tick(); + local_irq_disable(); + /* Don't trace irqs off for idle */ + stop_critical_timings(); + pm_idle(); + /* + * Sanity check to ensure that pm_idle() returns + * with IRQs enabled + */ + WARN_ON(irqs_disabled()); + start_critical_timings(); + } + tick_nohz_restart_sched_tick(); preempt_enable_no_resched(); schedule(); preempt_disable(); - check_pgt_cache(); } } +void __cpuinit select_idle_routine(void) +{ + /* + * If a platform has set its own idle routine, leave it alone. + */ + if (pm_idle) + return; + + if (hlt_works()) + pm_idle = default_idle; + else + pm_idle = poll_idle; +} + static void do_nothing(void *unused) { } diff --git a/arch/sh/kernel/io_generic.c b/arch/sh/kernel/io_generic.c index b8fa6524760..e1e1dbd1955 100644 --- a/arch/sh/kernel/io_generic.c +++ b/arch/sh/kernel/io_generic.c @@ -24,7 +24,7 @@ #define dummy_read() #endif -unsigned long generic_io_base; +unsigned long generic_io_base = 0; u8 generic_inb(unsigned long port) { @@ -147,8 +147,10 @@ void generic_outsl(unsigned long port, const void *src, unsigned long count) void __iomem *generic_ioport_map(unsigned long addr, unsigned int size) { +#ifdef P1SEG if (PXSEG(addr) >= P1SEG) return (void __iomem *)addr; +#endif return (void __iomem *)(addr + generic_io_base); } diff --git a/arch/sh/kernel/irq.c b/arch/sh/kernel/irq.c index eac7da772fc..e1913f28f41 100644 --- a/arch/sh/kernel/irq.c +++ b/arch/sh/kernel/irq.c @@ -37,7 +37,15 @@ void ack_bad_irq(unsigned int irq) */ static int show_other_interrupts(struct seq_file *p, int prec) { + int j; + + seq_printf(p, "%*s: ", prec, "NMI"); + for_each_online_cpu(j) + seq_printf(p, "%10u ", irq_stat[j].__nmi_count); + seq_printf(p, " Non-maskable interrupts\n"); + seq_printf(p, "%*s: %10u\n", prec, "ERR", atomic_read(&irq_err_count)); + return 0; } @@ -255,6 +263,12 @@ void __init init_IRQ(void) { plat_irq_setup(); + /* + * Pin any of the legacy IRQ vectors that haven't already been + * grabbed by the platform + */ + reserve_irq_legacy(); + /* Perform the machine specific initialisation */ if (sh_mv.mv_init_irq) sh_mv.mv_init_irq(); diff --git a/arch/sh/kernel/irq_32.c b/arch/sh/kernel/irq_32.c new file mode 100644 index 00000000000..e33ab15831f --- /dev/null +++ b/arch/sh/kernel/irq_32.c @@ -0,0 +1,57 @@ +/* + * SHcompact irqflags support + * + * Copyright (C) 2006 - 2009 Paul Mundt + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + */ +#include <linux/irqflags.h> +#include <linux/module.h> + +void notrace raw_local_irq_restore(unsigned long flags) +{ + unsigned long __dummy0, __dummy1; + + if (flags == RAW_IRQ_DISABLED) { + __asm__ __volatile__ ( + "stc sr, %0\n\t" + "or #0xf0, %0\n\t" + "ldc %0, sr\n\t" + : "=&z" (__dummy0) + : /* no inputs */ + : "memory" + ); + } else { + __asm__ __volatile__ ( + "stc sr, %0\n\t" + "and %1, %0\n\t" +#ifdef CONFIG_CPU_HAS_SR_RB + "stc r6_bank, %1\n\t" + "or %1, %0\n\t" +#endif + "ldc %0, sr\n\t" + : "=&r" (__dummy0), "=r" (__dummy1) + : "1" (~RAW_IRQ_DISABLED) + : "memory" + ); + } +} +EXPORT_SYMBOL(raw_local_irq_restore); + +unsigned long notrace __raw_local_save_flags(void) +{ + unsigned long flags; + + __asm__ __volatile__ ( + "stc sr, %0\n\t" + "and #0xf0, %0\n\t" + : "=&z" (flags) + : /* no inputs */ + : "memory" + ); + + return flags; +} +EXPORT_SYMBOL(__raw_local_save_flags); diff --git a/arch/sh/kernel/irq_64.c b/arch/sh/kernel/irq_64.c new file mode 100644 index 00000000000..32365ba0e03 --- /dev/null +++ b/arch/sh/kernel/irq_64.c @@ -0,0 +1,51 @@ +/* + * SHmedia irqflags support + * + * Copyright (C) 2006 - 2009 Paul Mundt + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + */ +#include <linux/irqflags.h> +#include <linux/module.h> +#include <cpu/registers.h> + +void notrace raw_local_irq_restore(unsigned long flags) +{ + unsigned long long __dummy; + + if (flags == RAW_IRQ_DISABLED) { + __asm__ __volatile__ ( + "getcon " __SR ", %0\n\t" + "or %0, %1, %0\n\t" + "putcon %0, " __SR "\n\t" + : "=&r" (__dummy) + : "r" (RAW_IRQ_DISABLED) + ); + } else { + __asm__ __volatile__ ( + "getcon " __SR ", %0\n\t" + "and %0, %1, %0\n\t" + "putcon %0, " __SR "\n\t" + : "=&r" (__dummy) + : "r" (~RAW_IRQ_DISABLED) + ); + } +} +EXPORT_SYMBOL(raw_local_irq_restore); + +unsigned long notrace __raw_local_save_flags(void) +{ + unsigned long flags; + + __asm__ __volatile__ ( + "getcon " __SR ", %0\n\t" + "and %0, %1, %0" + : "=&r" (flags) + : "r" (RAW_IRQ_DISABLED) + ); + + return flags; +} +EXPORT_SYMBOL(__raw_local_save_flags); diff --git a/arch/sh/kernel/machine_kexec.c b/arch/sh/kernel/machine_kexec.c index 7ea2704ea03..76f280223eb 100644 --- a/arch/sh/kernel/machine_kexec.c +++ b/arch/sh/kernel/machine_kexec.c @@ -46,12 +46,6 @@ void machine_crash_shutdown(struct pt_regs *regs) */ int machine_kexec_prepare(struct kimage *image) { - /* older versions of kexec-tools are passing - * the zImage entry point as a virtual address. - */ - if (image->start != PHYSADDR(image->start)) - return -EINVAL; /* upgrade your kexec-tools */ - return 0; } diff --git a/arch/sh/kernel/machvec.c b/arch/sh/kernel/machvec.c index cbce639b108..1652340ba3f 100644 --- a/arch/sh/kernel/machvec.c +++ b/arch/sh/kernel/machvec.c @@ -135,5 +135,9 @@ void __init sh_mv_setup(void) if (!sh_mv.mv_nr_irqs) sh_mv.mv_nr_irqs = NR_IRQS; +#ifdef P2SEG __set_io_port_base(P2SEG); +#else + __set_io_port_base(0); +#endif } diff --git a/arch/sh/kernel/module.c b/arch/sh/kernel/module.c index c2efdcde266..43adddfe4c0 100644 --- a/arch/sh/kernel/module.c +++ b/arch/sh/kernel/module.c @@ -32,6 +32,7 @@ #include <linux/string.h> #include <linux/kernel.h> #include <asm/unaligned.h> +#include <asm/dwarf.h> void *module_alloc(unsigned long size) { @@ -145,10 +146,16 @@ int module_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *me) { - return module_bug_finalize(hdr, sechdrs, me); + int ret = 0; + + ret |= module_dwarf_finalize(hdr, sechdrs, me); + ret |= module_bug_finalize(hdr, sechdrs, me); + + return ret; } void module_arch_cleanup(struct module *mod) { module_bug_cleanup(mod); + module_dwarf_cleanup(mod); } diff --git a/arch/sh/kernel/perf_callchain.c b/arch/sh/kernel/perf_callchain.c new file mode 100644 index 00000000000..24ea837eac5 --- /dev/null +++ b/arch/sh/kernel/perf_callchain.c @@ -0,0 +1,98 @@ +/* + * Performance event callchain support - SuperH architecture code + * + * Copyright (C) 2009 Paul Mundt + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + */ +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/perf_event.h> +#include <linux/percpu.h> +#include <asm/unwinder.h> +#include <asm/ptrace.h> + +static inline void callchain_store(struct perf_callchain_entry *entry, u64 ip) +{ + if (entry->nr < PERF_MAX_STACK_DEPTH) + entry->ip[entry->nr++] = ip; +} + +static void callchain_warning(void *data, char *msg) +{ +} + +static void +callchain_warning_symbol(void *data, char *msg, unsigned long symbol) +{ +} + +static int callchain_stack(void *data, char *name) +{ + return 0; +} + +static void callchain_address(void *data, unsigned long addr, int reliable) +{ + struct perf_callchain_entry *entry = data; + + if (reliable) + callchain_store(entry, addr); +} + +static const struct stacktrace_ops callchain_ops = { + .warning = callchain_warning, + .warning_symbol = callchain_warning_symbol, + .stack = callchain_stack, + .address = callchain_address, +}; + +static void +perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) +{ + callchain_store(entry, PERF_CONTEXT_KERNEL); + callchain_store(entry, regs->pc); + + unwind_stack(NULL, regs, NULL, &callchain_ops, entry); +} + +static void +perf_do_callchain(struct pt_regs *regs, struct perf_callchain_entry *entry) +{ + int is_user; + + if (!regs) + return; + + is_user = user_mode(regs); + + if (!current || current->pid == 0) + return; + + if (is_user && current->state != TASK_RUNNING) + return; + + /* + * Only the kernel side is implemented for now. + */ + if (!is_user) + perf_callchain_kernel(regs, entry); +} + +/* + * No need for separate IRQ and NMI entries. + */ +static DEFINE_PER_CPU(struct perf_callchain_entry, callchain); + +struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) +{ + struct perf_callchain_entry *entry = &__get_cpu_var(callchain); + + entry->nr = 0; + + perf_do_callchain(regs, entry); + + return entry; +} diff --git a/arch/sh/kernel/perf_event.c b/arch/sh/kernel/perf_event.c new file mode 100644 index 00000000000..7ff0943e7a0 --- /dev/null +++ b/arch/sh/kernel/perf_event.c @@ -0,0 +1,312 @@ +/* + * Performance event support framework for SuperH hardware counters. + * + * Copyright (C) 2009 Paul Mundt + * + * Heavily based on the x86 and PowerPC implementations. + * + * x86: + * Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de> + * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar + * Copyright (C) 2009 Jaswinder Singh Rajput + * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter + * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> + * Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com> + * + * ppc: + * Copyright 2008-2009 Paul Mackerras, IBM Corporation. + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + */ +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/io.h> +#include <linux/irq.h> +#include <linux/perf_event.h> +#include <asm/processor.h> + +struct cpu_hw_events { + struct perf_event *events[MAX_HWEVENTS]; + unsigned long used_mask[BITS_TO_LONGS(MAX_HWEVENTS)]; + unsigned long active_mask[BITS_TO_LONGS(MAX_HWEVENTS)]; +}; + +DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events); + +static struct sh_pmu *sh_pmu __read_mostly; + +/* Number of perf_events counting hardware events */ +static atomic_t num_events; +/* Used to avoid races in calling reserve/release_pmc_hardware */ +static DEFINE_MUTEX(pmc_reserve_mutex); + +/* + * Stub these out for now, do something more profound later. + */ +int reserve_pmc_hardware(void) +{ + return 0; +} + +void release_pmc_hardware(void) +{ +} + +static inline int sh_pmu_initialized(void) +{ + return !!sh_pmu; +} + +/* + * Release the PMU if this is the last perf_event. + */ +static void hw_perf_event_destroy(struct perf_event *event) +{ + if (!atomic_add_unless(&num_events, -1, 1)) { + mutex_lock(&pmc_reserve_mutex); + if (atomic_dec_return(&num_events) == 0) + release_pmc_hardware(); + mutex_unlock(&pmc_reserve_mutex); + } +} + +static int hw_perf_cache_event(int config, int *evp) +{ + unsigned long type, op, result; + int ev; + + if (!sh_pmu->cache_events) + return -EINVAL; + + /* unpack config */ + type = config & 0xff; + op = (config >> 8) & 0xff; + result = (config >> 16) & 0xff; + + if (type >= PERF_COUNT_HW_CACHE_MAX || + op >= PERF_COUNT_HW_CACHE_OP_MAX || + result >= PERF_COUNT_HW_CACHE_RESULT_MAX) + return -EINVAL; + + ev = (*sh_pmu->cache_events)[type][op][result]; + if (ev == 0) + return -EOPNOTSUPP; + if (ev == -1) + return -EINVAL; + *evp = ev; + return 0; +} + +static int __hw_perf_event_init(struct perf_event *event) +{ + struct perf_event_attr *attr = &event->attr; + struct hw_perf_event *hwc = &event->hw; + int config = -1; + int err; + + if (!sh_pmu_initialized()) + return -ENODEV; + + /* + * All of the on-chip counters are "limited", in that they have + * no interrupts, and are therefore unable to do sampling without + * further work and timer assistance. + */ + if (hwc->sample_period) + return -EINVAL; + + /* + * See if we need to reserve the counter. + * + * If no events are currently in use, then we have to take a + * mutex to ensure that we don't race with another task doing + * reserve_pmc_hardware or release_pmc_hardware. + */ + err = 0; + if (!atomic_inc_not_zero(&num_events)) { + mutex_lock(&pmc_reserve_mutex); + if (atomic_read(&num_events) == 0 && + reserve_pmc_hardware()) + err = -EBUSY; + else + atomic_inc(&num_events); + mutex_unlock(&pmc_reserve_mutex); + } + + if (err) + return err; + + event->destroy = hw_perf_event_destroy; + + switch (attr->type) { + case PERF_TYPE_RAW: + config = attr->config & sh_pmu->raw_event_mask; + break; + case PERF_TYPE_HW_CACHE: + err = hw_perf_cache_event(attr->config, &config); + if (err) + return err; + break; + case PERF_TYPE_HARDWARE: + if (attr->config >= sh_pmu->max_events) + return -EINVAL; + + config = sh_pmu->event_map(attr->config); + break; + } + + if (config == -1) + return -EINVAL; + + hwc->config |= config; + + return 0; +} + +static void sh_perf_event_update(struct perf_event *event, + struct hw_perf_event *hwc, int idx) +{ + u64 prev_raw_count, new_raw_count; + s64 delta; + int shift = 0; + + /* + * Depending on the counter configuration, they may or may not + * be chained, in which case the previous counter value can be + * updated underneath us if the lower-half overflows. + * + * Our tactic to handle this is to first atomically read and + * exchange a new raw count - then add that new-prev delta + * count to the generic counter atomically. + * + * As there is no interrupt associated with the overflow events, + * this is the simplest approach for maintaining consistency. + */ +again: + prev_raw_count = atomic64_read(&hwc->prev_count); + new_raw_count = sh_pmu->read(idx); + + if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count, + new_raw_count) != prev_raw_count) + goto again; + + /* + * Now we have the new raw value and have updated the prev + * timestamp already. We can now calculate the elapsed delta + * (counter-)time and add that to the generic counter. + * + * Careful, not all hw sign-extends above the physical width + * of the count. + */ + delta = (new_raw_count << shift) - (prev_raw_count << shift); + delta >>= shift; + + atomic64_add(delta, &event->count); +} + +static void sh_pmu_disable(struct perf_event *event) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + + clear_bit(idx, cpuc->active_mask); + sh_pmu->disable(hwc, idx); + + barrier(); + + sh_perf_event_update(event, &event->hw, idx); + + cpuc->events[idx] = NULL; + clear_bit(idx, cpuc->used_mask); + + perf_event_update_userpage(event); +} + +static int sh_pmu_enable(struct perf_event *event) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + + if (test_and_set_bit(idx, cpuc->used_mask)) { + idx = find_first_zero_bit(cpuc->used_mask, sh_pmu->num_events); + if (idx == sh_pmu->num_events) + return -EAGAIN; + + set_bit(idx, cpuc->used_mask); + hwc->idx = idx; + } + + sh_pmu->disable(hwc, idx); + + cpuc->events[idx] = event; + set_bit(idx, cpuc->active_mask); + + sh_pmu->enable(hwc, idx); + + perf_event_update_userpage(event); + + return 0; +} + +static void sh_pmu_read(struct perf_event *event) +{ + sh_perf_event_update(event, &event->hw, event->hw.idx); +} + +static const struct pmu pmu = { + .enable = sh_pmu_enable, + .disable = sh_pmu_disable, + .read = sh_pmu_read, +}; + +const struct pmu *hw_perf_event_init(struct perf_event *event) +{ + int err = __hw_perf_event_init(event); + if (unlikely(err)) { + if (event->destroy) + event->destroy(event); + return ERR_PTR(err); + } + + return &pmu; +} + +void hw_perf_event_setup(int cpu) +{ + struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu); + + memset(cpuhw, 0, sizeof(struct cpu_hw_events)); +} + +void hw_perf_enable(void) +{ + if (!sh_pmu_initialized()) + return; + + sh_pmu->enable_all(); +} + +void hw_perf_disable(void) +{ + if (!sh_pmu_initialized()) + return; + + sh_pmu->disable_all(); +} + +int register_sh_pmu(struct sh_pmu *pmu) +{ + if (sh_pmu) + return -EBUSY; + sh_pmu = pmu; + + pr_info("Performance Events: %s support registered\n", pmu->name); + + WARN_ON(pmu->num_events > MAX_HWEVENTS); + + return 0; +} diff --git a/arch/sh/kernel/process_32.c b/arch/sh/kernel/process_32.c index 0673c4746be..d8af889366a 100644 --- a/arch/sh/kernel/process_32.c +++ b/arch/sh/kernel/process_32.c @@ -134,7 +134,10 @@ int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) regs.regs[5] = (unsigned long)fn; regs.pc = (unsigned long)kernel_thread_helper; - regs.sr = (1 << 30); + regs.sr = SR_MD; +#if defined(CONFIG_SH_FPU) + regs.sr |= SR_FD; +#endif /* Ok, create the new process.. */ pid = do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, @@ -142,6 +145,7 @@ int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) return pid; } +EXPORT_SYMBOL(kernel_thread); /* * Free current thread data structures etc.. @@ -186,6 +190,16 @@ int dump_fpu(struct pt_regs *regs, elf_fpregset_t *fpu) return fpvalid; } +EXPORT_SYMBOL(dump_fpu); + +/* + * This gets called before we allocate a new thread and copy + * the current task into it. + */ +void prepare_to_copy(struct task_struct *tsk) +{ + unlazy_fpu(tsk, task_pt_regs(tsk)); +} asmlinkage void ret_from_fork(void); @@ -195,16 +209,10 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, { struct thread_info *ti = task_thread_info(p); struct pt_regs *childregs; -#if defined(CONFIG_SH_FPU) || defined(CONFIG_SH_DSP) +#if defined(CONFIG_SH_DSP) struct task_struct *tsk = current; #endif -#if defined(CONFIG_SH_FPU) - unlazy_fpu(tsk, regs); - p->thread.fpu = tsk->thread.fpu; - copy_to_stopped_child_used_math(p); -#endif - #if defined(CONFIG_SH_DSP) if (is_dsp_enabled(tsk)) { /* We can use the __save_dsp or just copy the struct: @@ -224,6 +232,8 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, } else { childregs->regs[15] = (unsigned long)childregs; ti->addr_limit = KERNEL_DS; + ti->status &= ~TS_USEDFPU; + p->fpu_counter = 0; } if (clone_flags & CLONE_SETTLS) @@ -288,9 +298,13 @@ static void ubc_set_tracing(int asid, unsigned long pc) __notrace_funcgraph struct task_struct * __switch_to(struct task_struct *prev, struct task_struct *next) { -#if defined(CONFIG_SH_FPU) + struct thread_struct *next_t = &next->thread; + unlazy_fpu(prev, task_pt_regs(prev)); -#endif + + /* we're going to use this soon, after a few expensive things */ + if (next->fpu_counter > 5) + prefetch(&next_t->fpu.hard); #ifdef CONFIG_MMU /* @@ -321,6 +335,14 @@ __switch_to(struct task_struct *prev, struct task_struct *next) #endif } + /* + * If the task has used fpu the last 5 timeslices, just do a full + * restore of the math state immediately to avoid the trap; the + * chances of needing FPU soon are obviously high now + */ + if (next->fpu_counter > 5) + fpu_state_restore(task_pt_regs(next)); + return prev; } diff --git a/arch/sh/kernel/process_64.c b/arch/sh/kernel/process_64.c index 1192398ef58..359b8a2f4d2 100644 --- a/arch/sh/kernel/process_64.c +++ b/arch/sh/kernel/process_64.c @@ -335,6 +335,7 @@ int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, ®s, 0, NULL, NULL); } +EXPORT_SYMBOL(kernel_thread); /* * Free current thread data structures etc.. @@ -417,6 +418,7 @@ int dump_fpu(struct pt_regs *regs, elf_fpregset_t *fpu) return 0; /* Task didn't use the fpu at all. */ #endif } +EXPORT_SYMBOL(dump_fpu); asmlinkage void ret_from_fork(void); diff --git a/arch/sh/kernel/return_address.c b/arch/sh/kernel/return_address.c new file mode 100644 index 00000000000..df3ab581107 --- /dev/null +++ b/arch/sh/kernel/return_address.c @@ -0,0 +1,54 @@ +/* + * arch/sh/kernel/return_address.c + * + * Copyright (C) 2009 Matt Fleming + * Copyright (C) 2009 Paul Mundt + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + */ +#include <linux/kernel.h> +#include <asm/dwarf.h> + +#ifdef CONFIG_DWARF_UNWINDER + +void *return_address(unsigned int depth) +{ + struct dwarf_frame *frame; + unsigned long ra; + int i; + + for (i = 0, frame = NULL, ra = 0; i <= depth; i++) { + struct dwarf_frame *tmp; + + tmp = dwarf_unwind_stack(ra, frame); + + if (frame) + dwarf_free_frame(frame); + + frame = tmp; + + if (!frame || !frame->return_addr) + break; + + ra = frame->return_addr; + } + + /* Failed to unwind the stack to the specified depth. */ + WARN_ON(i != depth + 1); + + if (frame) + dwarf_free_frame(frame); + + return (void *)ra; +} + +#else + +void *return_address(unsigned int depth) +{ + return NULL; +} + +#endif diff --git a/arch/sh/kernel/setup.c b/arch/sh/kernel/setup.c index 99b4fb553bf..5a947a2567e 100644 --- a/arch/sh/kernel/setup.c +++ b/arch/sh/kernel/setup.c @@ -453,6 +453,10 @@ void __init setup_arch(char **cmdline_p) paging_init(); +#ifdef CONFIG_PMB_ENABLE + pmb_init(); +#endif + #ifdef CONFIG_SMP plat_smp_setup(); #endif diff --git a/arch/sh/kernel/sh_ksyms_32.c b/arch/sh/kernel/sh_ksyms_32.c index 444cce3ae92..3896f26efa4 100644 --- a/arch/sh/kernel/sh_ksyms_32.c +++ b/arch/sh/kernel/sh_ksyms_32.c @@ -1,37 +1,11 @@ #include <linux/module.h> -#include <linux/smp.h> -#include <linux/user.h> -#include <linux/elfcore.h> -#include <linux/sched.h> -#include <linux/in6.h> -#include <linux/interrupt.h> -#include <linux/vmalloc.h> -#include <linux/pci.h> -#include <linux/irq.h> -#include <asm/sections.h> -#include <asm/processor.h> -#include <asm/uaccess.h> +#include <linux/string.h> +#include <linux/uaccess.h> +#include <linux/delay.h> +#include <linux/mm.h> #include <asm/checksum.h> -#include <asm/io.h> -#include <asm/delay.h> -#include <asm/tlbflush.h> -#include <asm/cacheflush.h> -#include <asm/ftrace.h> - -extern int dump_fpu(struct pt_regs *, elf_fpregset_t *); - -/* platform dependent support */ -EXPORT_SYMBOL(dump_fpu); -EXPORT_SYMBOL(kernel_thread); -EXPORT_SYMBOL(strlen); - -/* PCI exports */ -#ifdef CONFIG_PCI -EXPORT_SYMBOL(pci_alloc_consistent); -EXPORT_SYMBOL(pci_free_consistent); -#endif +#include <asm/sections.h> -/* mem exports */ EXPORT_SYMBOL(memchr); EXPORT_SYMBOL(memcpy); EXPORT_SYMBOL(memset); @@ -40,6 +14,13 @@ EXPORT_SYMBOL(__copy_user); EXPORT_SYMBOL(__udelay); EXPORT_SYMBOL(__ndelay); EXPORT_SYMBOL(__const_udelay); +EXPORT_SYMBOL(strlen); +EXPORT_SYMBOL(csum_partial); +EXPORT_SYMBOL(csum_partial_copy_generic); +EXPORT_SYMBOL(copy_page); +EXPORT_SYMBOL(__clear_user); +EXPORT_SYMBOL(_ebss); +EXPORT_SYMBOL(empty_zero_page); #define DECLARE_EXPORT(name) \ extern void name(void);EXPORT_SYMBOL(name) @@ -107,30 +88,6 @@ DECLARE_EXPORT(__sdivsi3_i4); DECLARE_EXPORT(__udivsi3_i4); DECLARE_EXPORT(__sdivsi3_i4i); DECLARE_EXPORT(__udivsi3_i4i); - -#if !defined(CONFIG_CACHE_OFF) && (defined(CONFIG_CPU_SH4) || \ - defined(CONFIG_SH7705_CACHE_32KB)) -/* needed by some modules */ -EXPORT_SYMBOL(flush_cache_all); -EXPORT_SYMBOL(flush_cache_range); -EXPORT_SYMBOL(flush_dcache_page); -#endif - #ifdef CONFIG_MCOUNT DECLARE_EXPORT(mcount); #endif -EXPORT_SYMBOL(csum_partial); -EXPORT_SYMBOL(csum_partial_copy_generic); -#ifdef CONFIG_IPV6 -EXPORT_SYMBOL(csum_ipv6_magic); -#endif -EXPORT_SYMBOL(copy_page); -EXPORT_SYMBOL(__clear_user); -EXPORT_SYMBOL(_ebss); -EXPORT_SYMBOL(empty_zero_page); - -#ifndef CONFIG_CACHE_OFF -EXPORT_SYMBOL(__flush_purge_region); -EXPORT_SYMBOL(__flush_wback_region); -EXPORT_SYMBOL(__flush_invalidate_region); -#endif diff --git a/arch/sh/kernel/sh_ksyms_64.c b/arch/sh/kernel/sh_ksyms_64.c index d008e17eb25..45afa5c51f6 100644 --- a/arch/sh/kernel/sh_ksyms_64.c +++ b/arch/sh/kernel/sh_ksyms_64.c @@ -24,16 +24,6 @@ #include <asm/delay.h> #include <asm/irq.h> -extern int dump_fpu(struct pt_regs *, elf_fpregset_t *); - -/* platform dependent support */ -EXPORT_SYMBOL(dump_fpu); -EXPORT_SYMBOL(kernel_thread); - -#ifdef CONFIG_VT -EXPORT_SYMBOL(screen_info); -#endif - EXPORT_SYMBOL(__put_user_asm_b); EXPORT_SYMBOL(__put_user_asm_w); EXPORT_SYMBOL(__put_user_asm_l); diff --git a/arch/sh/kernel/signal_32.c b/arch/sh/kernel/signal_32.c index 3db37425210..12815ce01ec 100644 --- a/arch/sh/kernel/signal_32.c +++ b/arch/sh/kernel/signal_32.c @@ -67,7 +67,8 @@ sys_sigsuspend(old_sigset_t mask, current->state = TASK_INTERRUPTIBLE; schedule(); - set_thread_flag(TIF_RESTORE_SIGMASK); + set_restore_sigmask(); + return -ERESTARTNOHAND; } @@ -590,7 +591,7 @@ static void do_signal(struct pt_regs *regs, unsigned int save_r0) if (try_to_freeze()) goto no_signal; - if (test_thread_flag(TIF_RESTORE_SIGMASK)) + if (current_thread_info()->status & TS_RESTORE_SIGMASK) oldset = ¤t->saved_sigmask; else oldset = ¤t->blocked; @@ -602,12 +603,13 @@ static void do_signal(struct pt_regs *regs, unsigned int save_r0) /* Whee! Actually deliver the signal. */ if (handle_signal(signr, &ka, &info, oldset, regs, save_r0) == 0) { - /* a signal was successfully delivered; the saved + /* + * A signal was successfully delivered; the saved * sigmask will have been stored in the signal frame, * and will be restored by sigreturn, so we can simply - * clear the TIF_RESTORE_SIGMASK flag */ - if (test_thread_flag(TIF_RESTORE_SIGMASK)) - clear_thread_flag(TIF_RESTORE_SIGMASK); + * clear the TS_RESTORE_SIGMASK flag + */ + current_thread_info()->status &= ~TS_RESTORE_SIGMASK; tracehook_signal_handler(signr, &info, &ka, regs, test_thread_flag(TIF_SINGLESTEP)); @@ -631,10 +633,12 @@ no_signal: } } - /* if there's no signal to deliver, we just put the saved sigmask - * back */ - if (test_thread_flag(TIF_RESTORE_SIGMASK)) { - clear_thread_flag(TIF_RESTORE_SIGMASK); + /* + * If there's no signal to deliver, we just put the saved sigmask + * back. + */ + if (current_thread_info()->status & TS_RESTORE_SIGMASK) { + current_thread_info()->status &= ~TS_RESTORE_SIGMASK; sigprocmask(SIG_SETMASK, ¤t->saved_sigmask, NULL); } } diff --git a/arch/sh/kernel/signal_64.c b/arch/sh/kernel/signal_64.c index 74793c80a57..feb3dddd319 100644 --- a/arch/sh/kernel/signal_64.c +++ b/arch/sh/kernel/signal_64.c @@ -101,7 +101,7 @@ static int do_signal(struct pt_regs *regs, sigset_t *oldset) if (try_to_freeze()) goto no_signal; - if (test_thread_flag(TIF_RESTORE_SIGMASK)) + if (current_thread_info()->status & TS_RESTORE_SIGMASK) oldset = ¤t->saved_sigmask; else if (!oldset) oldset = ¤t->blocked; @@ -115,11 +115,9 @@ static int do_signal(struct pt_regs *regs, sigset_t *oldset) /* * If a signal was successfully delivered, the * saved sigmask is in its frame, and we can - * clear the TIF_RESTORE_SIGMASK flag. + * clear the TS_RESTORE_SIGMASK flag. */ - if (test_thread_flag(TIF_RESTORE_SIGMASK)) - clear_thread_flag(TIF_RESTORE_SIGMASK); - + current_thread_info()->status &= ~TS_RESTORE_SIGMASK; tracehook_signal_handler(signr, &info, &ka, regs, 0); return 1; } @@ -146,8 +144,8 @@ no_signal: } /* No signal to deliver -- put the saved sigmask back */ - if (test_thread_flag(TIF_RESTORE_SIGMASK)) { - clear_thread_flag(TIF_RESTORE_SIGMASK); + if (current_thread_info()->status & TS_RESTORE_SIGMASK) { + current_thread_info()->status &= ~TS_RESTORE_SIGMASK; sigprocmask(SIG_SETMASK, ¤t->saved_sigmask, NULL); } @@ -176,6 +174,7 @@ sys_sigsuspend(old_sigset_t mask, while (1) { current->state = TASK_INTERRUPTIBLE; schedule(); + set_restore_sigmask(); regs->pc += 4; /* because sys_sigreturn decrements the pc */ if (do_signal(regs, &saveset)) { /* pc now points at signal handler. Need to decrement diff --git a/arch/sh/kernel/smp.c b/arch/sh/kernel/smp.c index 160db1003cf..983e0792d5f 100644 --- a/arch/sh/kernel/smp.c +++ b/arch/sh/kernel/smp.c @@ -122,7 +122,9 @@ int __cpuinit __cpu_up(unsigned int cpu) stack_start.bss_start = 0; /* don't clear bss for secondary cpus */ stack_start.start_kernel_fn = start_secondary; - flush_cache_all(); + flush_icache_range((unsigned long)&stack_start, + (unsigned long)&stack_start + sizeof(stack_start)); + wmb(); plat_start_cpu(cpu, (unsigned long)_stext); diff --git a/arch/sh/kernel/topology.c b/arch/sh/kernel/topology.c index 0838942b708..9b0b633b6c9 100644 --- a/arch/sh/kernel/topology.c +++ b/arch/sh/kernel/topology.c @@ -16,6 +16,32 @@ static DEFINE_PER_CPU(struct cpu, cpu_devices); +cpumask_t cpu_core_map[NR_CPUS]; + +static cpumask_t cpu_coregroup_map(unsigned int cpu) +{ + /* + * Presently all SH-X3 SMP cores are multi-cores, so just keep it + * simple until we have a method for determining topology.. + */ + return cpu_possible_map; +} + +const struct cpumask *cpu_coregroup_mask(unsigned int cpu) +{ + return &cpu_core_map[cpu]; +} + +int arch_update_cpu_topology(void) +{ + unsigned int cpu; + + for_each_possible_cpu(cpu) + cpu_core_map[cpu] = cpu_coregroup_map(cpu); + + return 0; +} + static int __init topology_init(void) { int i, ret; diff --git a/arch/sh/kernel/traps.c b/arch/sh/kernel/traps.c index a8396f36bd1..7b036339dc9 100644 --- a/arch/sh/kernel/traps.c +++ b/arch/sh/kernel/traps.c @@ -9,8 +9,8 @@ #include <asm/unwinder.h> #include <asm/system.h> -#ifdef CONFIG_BUG -void handle_BUG(struct pt_regs *regs) +#ifdef CONFIG_GENERIC_BUG +static void handle_BUG(struct pt_regs *regs) { const struct bug_entry *bug; unsigned long bugaddr = regs->pc; @@ -81,7 +81,7 @@ BUILD_TRAP_HANDLER(bug) SIGTRAP) == NOTIFY_STOP) return; -#ifdef CONFIG_BUG +#ifdef CONFIG_GENERIC_BUG if (__kernel_text_address(instruction_pointer(regs))) { insn_size_t insn = *(insn_size_t *)instruction_pointer(regs); if (insn == TRAPA_BUG_OPCODE) @@ -95,9 +95,11 @@ BUILD_TRAP_HANDLER(bug) BUILD_TRAP_HANDLER(nmi) { + unsigned int cpu = smp_processor_id(); TRAP_HANDLER_DECL; nmi_enter(); + nmi_count(cpu)++; switch (notify_die(DIE_NMI, "NMI", regs, 0, vec & 0xff, SIGINT)) { case NOTIFY_OK: diff --git a/arch/sh/kernel/traps_32.c b/arch/sh/kernel/traps_32.c index 7a2ee3a6b8e..3da5a125d88 100644 --- a/arch/sh/kernel/traps_32.c +++ b/arch/sh/kernel/traps_32.c @@ -25,6 +25,7 @@ #include <linux/kexec.h> #include <linux/limits.h> #include <linux/proc_fs.h> +#include <linux/seq_file.h> #include <linux/sysfs.h> #include <asm/system.h> #include <asm/uaccess.h> @@ -68,61 +69,49 @@ static const char *se_usermode_action[] = { "signal+warn" }; -static int -proc_alignment_read(char *page, char **start, off_t off, int count, int *eof, - void *data) +static int alignment_proc_show(struct seq_file *m, void *v) { - char *p = page; - int len; - - p += sprintf(p, "User:\t\t%lu\n", se_user); - p += sprintf(p, "System:\t\t%lu\n", se_sys); - p += sprintf(p, "Half:\t\t%lu\n", se_half); - p += sprintf(p, "Word:\t\t%lu\n", se_word); - p += sprintf(p, "DWord:\t\t%lu\n", se_dword); - p += sprintf(p, "Multi:\t\t%lu\n", se_multi); - p += sprintf(p, "User faults:\t%i (%s)\n", se_usermode, + seq_printf(m, "User:\t\t%lu\n", se_user); + seq_printf(m, "System:\t\t%lu\n", se_sys); + seq_printf(m, "Half:\t\t%lu\n", se_half); + seq_printf(m, "Word:\t\t%lu\n", se_word); + seq_printf(m, "DWord:\t\t%lu\n", se_dword); + seq_printf(m, "Multi:\t\t%lu\n", se_multi); + seq_printf(m, "User faults:\t%i (%s)\n", se_usermode, se_usermode_action[se_usermode]); - p += sprintf(p, "Kernel faults:\t%i (fixup%s)\n", se_kernmode_warn, + seq_printf(m, "Kernel faults:\t%i (fixup%s)\n", se_kernmode_warn, se_kernmode_warn ? "+warn" : ""); - - len = (p - page) - off; - if (len < 0) - len = 0; - - *eof = (len <= count) ? 1 : 0; - *start = page + off; - - return len; + return 0; } -static int proc_alignment_write(struct file *file, const char __user *buffer, - unsigned long count, void *data) +static int alignment_proc_open(struct inode *inode, struct file *file) { - char mode; - - if (count > 0) { - if (get_user(mode, buffer)) - return -EFAULT; - if (mode >= '0' && mode <= '5') - se_usermode = mode - '0'; - } - return count; + return single_open(file, alignment_proc_show, NULL); } -static int proc_alignment_kern_write(struct file *file, const char __user *buffer, - unsigned long count, void *data) +static ssize_t alignment_proc_write(struct file *file, + const char __user *buffer, size_t count, loff_t *pos) { + int *data = PDE(file->f_path.dentry->d_inode)->data; char mode; if (count > 0) { if (get_user(mode, buffer)) return -EFAULT; - if (mode >= '0' && mode <= '1') - se_kernmode_warn = mode - '0'; + if (mode >= '0' && mode <= '5') + *data = mode - '0'; } return count; } + +static const struct file_operations alignment_proc_fops = { + .owner = THIS_MODULE, + .open = alignment_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, + .write = alignment_proc_write, +}; #endif static void dump_mem(const char *str, unsigned long bottom, unsigned long top) @@ -945,14 +934,9 @@ void __init trap_init(void) set_exception_table_evt(0x800, do_reserved_inst); set_exception_table_evt(0x820, do_illegal_slot_inst); #elif defined(CONFIG_SH_FPU) -#ifdef CONFIG_CPU_SUBTYPE_SHX3 - set_exception_table_evt(0xd80, fpu_state_restore_trap_handler); - set_exception_table_evt(0xda0, fpu_state_restore_trap_handler); -#else set_exception_table_evt(0x800, fpu_state_restore_trap_handler); set_exception_table_evt(0x820, fpu_state_restore_trap_handler); #endif -#endif #ifdef CONFIG_CPU_SH2 set_exception_table_vec(TRAP_ADDRESS_ERROR, address_error_trap_handler); @@ -1011,20 +995,16 @@ static int __init alignment_init(void) if (!dir) return -ENOMEM; - res = create_proc_entry("alignment", S_IWUSR | S_IRUGO, dir); + res = proc_create_data("alignment", S_IWUSR | S_IRUGO, dir, + &alignment_proc_fops, &se_usermode); if (!res) return -ENOMEM; - res->read_proc = proc_alignment_read; - res->write_proc = proc_alignment_write; - - res = create_proc_entry("kernel_alignment", S_IWUSR | S_IRUGO, dir); + res = proc_create_data("kernel_alignment", S_IWUSR | S_IRUGO, dir, + &alignment_proc_fops, &se_kernmode_warn); if (!res) return -ENOMEM; - res->read_proc = proc_alignment_read; - res->write_proc = proc_alignment_kern_write; - return 0; } |