diff options
Diffstat (limited to 'arch/sh/kernel/cpu/sh4')
-rw-r--r-- | arch/sh/kernel/cpu/sh4/Makefile | 8 | ||||
-rw-r--r-- | arch/sh/kernel/cpu/sh4/fpu.c | 28 | ||||
-rw-r--r-- | arch/sh/kernel/cpu/sh4/perf_event.c | 253 | ||||
-rw-r--r-- | arch/sh/kernel/cpu/sh4/setup-sh4-202.c | 23 | ||||
-rw-r--r-- | arch/sh/kernel/cpu/sh4/setup-sh7750.c | 47 | ||||
-rw-r--r-- | arch/sh/kernel/cpu/sh4/setup-sh7760.c | 89 |
6 files changed, 374 insertions, 74 deletions
diff --git a/arch/sh/kernel/cpu/sh4/Makefile b/arch/sh/kernel/cpu/sh4/Makefile index 203b18347b8..3a1dbc70983 100644 --- a/arch/sh/kernel/cpu/sh4/Makefile +++ b/arch/sh/kernel/cpu/sh4/Makefile @@ -9,6 +9,11 @@ obj-$(CONFIG_HIBERNATION) += $(addprefix ../sh3/, swsusp.o) obj-$(CONFIG_SH_FPU) += fpu.o softfloat.o obj-$(CONFIG_SH_STORE_QUEUES) += sq.o +# Perf events +perf-$(CONFIG_CPU_SUBTYPE_SH7750) := perf_event.o +perf-$(CONFIG_CPU_SUBTYPE_SH7750S) := perf_event.o +perf-$(CONFIG_CPU_SUBTYPE_SH7091) := perf_event.o + # CPU subtype setup obj-$(CONFIG_CPU_SUBTYPE_SH7750) += setup-sh7750.o obj-$(CONFIG_CPU_SUBTYPE_SH7750R) += setup-sh7750.o @@ -27,4 +32,5 @@ endif # Additional clocks by subtype clock-$(CONFIG_CPU_SUBTYPE_SH4_202) += clock-sh4-202.o -obj-y += $(clock-y) +obj-y += $(clock-y) +obj-$(CONFIG_PERF_EVENTS) += $(perf-y) diff --git a/arch/sh/kernel/cpu/sh4/fpu.c b/arch/sh/kernel/cpu/sh4/fpu.c index e3ea5411da6..e97857aec8a 100644 --- a/arch/sh/kernel/cpu/sh4/fpu.c +++ b/arch/sh/kernel/cpu/sh4/fpu.c @@ -41,13 +41,11 @@ static unsigned int fpu_exception_flags; /* * Save FPU registers onto task structure. - * Assume called with FPU enabled (SR.FD=0). */ -void save_fpu(struct task_struct *tsk, struct pt_regs *regs) +void save_fpu(struct task_struct *tsk) { unsigned long dummy; - clear_tsk_thread_flag(tsk, TIF_USEDFPU); enable_fpu(); asm volatile ("sts.l fpul, @-%0\n\t" "sts.l fpscr, @-%0\n\t" @@ -92,7 +90,6 @@ void save_fpu(struct task_struct *tsk, struct pt_regs *regs) :"memory"); disable_fpu(); - release_fpu(regs); } static void restore_fpu(struct task_struct *tsk) @@ -285,7 +282,6 @@ static int ieee_fpe_handler(struct pt_regs *regs) /* fcnvsd */ struct task_struct *tsk = current; - save_fpu(tsk, regs); if ((tsk->thread.fpu.hard.fpscr & FPSCR_CAUSE_ERROR)) /* FPU error */ denormal_to_double(&tsk->thread.fpu.hard, @@ -462,7 +458,7 @@ BUILD_TRAP_HANDLER(fpu_error) struct task_struct *tsk = current; TRAP_HANDLER_DECL; - save_fpu(tsk, regs); + __unlazy_fpu(tsk, regs); fpu_exception_flags = 0; if (ieee_fpe_handler(regs)) { tsk->thread.fpu.hard.fpscr &= @@ -473,7 +469,7 @@ BUILD_TRAP_HANDLER(fpu_error) tsk->thread.fpu.hard.fpscr |= (fpu_exception_flags >> 10); grab_fpu(regs); restore_fpu(tsk); - set_tsk_thread_flag(tsk, TIF_USEDFPU); + task_thread_info(tsk)->status |= TS_USEDFPU; if ((((tsk->thread.fpu.hard.fpscr & FPSCR_ENABLE_MASK) >> 7) & (fpu_exception_flags >> 2)) == 0) { return; @@ -483,18 +479,18 @@ BUILD_TRAP_HANDLER(fpu_error) force_sig(SIGFPE, tsk); } -BUILD_TRAP_HANDLER(fpu_state_restore) +void fpu_state_restore(struct pt_regs *regs) { struct task_struct *tsk = current; - TRAP_HANDLER_DECL; grab_fpu(regs); - if (!user_mode(regs)) { + if (unlikely(!user_mode(regs))) { printk(KERN_ERR "BUG: FPU is used in kernel mode.\n"); + BUG(); return; } - if (used_math()) { + if (likely(used_math())) { /* Using the FPU again. */ restore_fpu(tsk); } else { @@ -502,5 +498,13 @@ BUILD_TRAP_HANDLER(fpu_state_restore) fpu_init(); set_used_math(); } - set_tsk_thread_flag(tsk, TIF_USEDFPU); + task_thread_info(tsk)->status |= TS_USEDFPU; + tsk->fpu_counter++; +} + +BUILD_TRAP_HANDLER(fpu_state_restore) +{ + TRAP_HANDLER_DECL; + + fpu_state_restore(regs); } diff --git a/arch/sh/kernel/cpu/sh4/perf_event.c b/arch/sh/kernel/cpu/sh4/perf_event.c new file mode 100644 index 00000000000..7f9ecc9c2d0 --- /dev/null +++ b/arch/sh/kernel/cpu/sh4/perf_event.c @@ -0,0 +1,253 @@ +/* + * Performance events support for SH7750-style performance counters + * + * Copyright (C) 2009 Paul Mundt + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + */ +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/io.h> +#include <linux/irq.h> +#include <linux/perf_event.h> +#include <asm/processor.h> + +#define PM_CR_BASE 0xff000084 /* 16-bit */ +#define PM_CTR_BASE 0xff100004 /* 32-bit */ + +#define PMCR(n) (PM_CR_BASE + ((n) * 0x04)) +#define PMCTRH(n) (PM_CTR_BASE + 0x00 + ((n) * 0x08)) +#define PMCTRL(n) (PM_CTR_BASE + 0x04 + ((n) * 0x08)) + +#define PMCR_PMM_MASK 0x0000003f + +#define PMCR_CLKF 0x00000100 +#define PMCR_PMCLR 0x00002000 +#define PMCR_PMST 0x00004000 +#define PMCR_PMEN 0x00008000 + +static struct sh_pmu sh7750_pmu; + +/* + * There are a number of events supported by each counter (33 in total). + * Since we have 2 counters, each counter will take the event code as it + * corresponds to the PMCR PMM setting. Each counter can be configured + * independently. + * + * Event Code Description + * ---------- ----------- + * + * 0x01 Operand read access + * 0x02 Operand write access + * 0x03 UTLB miss + * 0x04 Operand cache read miss + * 0x05 Operand cache write miss + * 0x06 Instruction fetch (w/ cache) + * 0x07 Instruction TLB miss + * 0x08 Instruction cache miss + * 0x09 All operand accesses + * 0x0a All instruction accesses + * 0x0b OC RAM operand access + * 0x0d On-chip I/O space access + * 0x0e Operand access (r/w) + * 0x0f Operand cache miss (r/w) + * 0x10 Branch instruction + * 0x11 Branch taken + * 0x12 BSR/BSRF/JSR + * 0x13 Instruction execution + * 0x14 Instruction execution in parallel + * 0x15 FPU Instruction execution + * 0x16 Interrupt + * 0x17 NMI + * 0x18 trapa instruction execution + * 0x19 UBCA match + * 0x1a UBCB match + * 0x21 Instruction cache fill + * 0x22 Operand cache fill + * 0x23 Elapsed time + * 0x24 Pipeline freeze by I-cache miss + * 0x25 Pipeline freeze by D-cache miss + * 0x27 Pipeline freeze by branch instruction + * 0x28 Pipeline freeze by CPU register + * 0x29 Pipeline freeze by FPU + */ + +static const int sh7750_general_events[] = { + [PERF_COUNT_HW_CPU_CYCLES] = 0x0023, + [PERF_COUNT_HW_INSTRUCTIONS] = 0x000a, + [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0006, /* I-cache */ + [PERF_COUNT_HW_CACHE_MISSES] = 0x0008, /* I-cache */ + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x0010, + [PERF_COUNT_HW_BRANCH_MISSES] = -1, + [PERF_COUNT_HW_BUS_CYCLES] = -1, +}; + +#define C(x) PERF_COUNT_HW_CACHE_##x + +static const int sh7750_cache_events + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = +{ + [ C(L1D) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0001, + [ C(RESULT_MISS) ] = 0x0004, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x0002, + [ C(RESULT_MISS) ] = 0x0005, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + }, + + [ C(L1I) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0006, + [ C(RESULT_MISS) ] = 0x0008, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + }, + + [ C(LL) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + }, + + [ C(DTLB) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0x0003, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + }, + + [ C(ITLB) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0x0007, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, + + [ C(BPU) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, +}; + +static int sh7750_event_map(int event) +{ + return sh7750_general_events[event]; +} + +static u64 sh7750_pmu_read(int idx) +{ + return (u64)((u64)(__raw_readl(PMCTRH(idx)) & 0xffff) << 32) | + __raw_readl(PMCTRL(idx)); +} + +static void sh7750_pmu_disable(struct hw_perf_event *hwc, int idx) +{ + unsigned int tmp; + + tmp = __raw_readw(PMCR(idx)); + tmp &= ~(PMCR_PMM_MASK | PMCR_PMEN); + __raw_writew(tmp, PMCR(idx)); +} + +static void sh7750_pmu_enable(struct hw_perf_event *hwc, int idx) +{ + __raw_writew(__raw_readw(PMCR(idx)) | PMCR_PMCLR, PMCR(idx)); + __raw_writew(hwc->config | PMCR_PMEN | PMCR_PMST, PMCR(idx)); +} + +static void sh7750_pmu_disable_all(void) +{ + int i; + + for (i = 0; i < sh7750_pmu.num_events; i++) + __raw_writew(__raw_readw(PMCR(i)) & ~PMCR_PMEN, PMCR(i)); +} + +static void sh7750_pmu_enable_all(void) +{ + int i; + + for (i = 0; i < sh7750_pmu.num_events; i++) + __raw_writew(__raw_readw(PMCR(i)) | PMCR_PMEN, PMCR(i)); +} + +static struct sh_pmu sh7750_pmu = { + .name = "SH7750", + .num_events = 2, + .event_map = sh7750_event_map, + .max_events = ARRAY_SIZE(sh7750_general_events), + .raw_event_mask = PMCR_PMM_MASK, + .cache_events = &sh7750_cache_events, + .read = sh7750_pmu_read, + .disable = sh7750_pmu_disable, + .enable = sh7750_pmu_enable, + .disable_all = sh7750_pmu_disable_all, + .enable_all = sh7750_pmu_enable_all, +}; + +static int __init sh7750_pmu_init(void) +{ + /* + * Make sure this CPU actually has perf counters. + */ + if (!(boot_cpu_data.flags & CPU_HAS_PERF_COUNTER)) { + pr_notice("HW perf events unsupported, software events only.\n"); + return -ENODEV; + } + + return register_sh_pmu(&sh7750_pmu); +} +arch_initcall(sh7750_pmu_init); diff --git a/arch/sh/kernel/cpu/sh4/setup-sh4-202.c b/arch/sh/kernel/cpu/sh4/setup-sh4-202.c index de4827df19a..4b733715cdb 100644 --- a/arch/sh/kernel/cpu/sh4/setup-sh4-202.c +++ b/arch/sh/kernel/cpu/sh4/setup-sh4-202.c @@ -15,22 +15,18 @@ #include <linux/sh_timer.h> #include <linux/io.h> -static struct plat_sci_port sci_platform_data[] = { - { - .mapbase = 0xffe80000, - .flags = UPF_BOOT_AUTOCONF, - .type = PORT_SCIF, - .irqs = { 40, 41, 43, 42 }, - }, { - .flags = 0, - } +static struct plat_sci_port scif0_platform_data = { + .mapbase = 0xffe80000, + .flags = UPF_BOOT_AUTOCONF, + .type = PORT_SCIF, + .irqs = { 40, 41, 43, 42 }, }; -static struct platform_device sci_device = { +static struct platform_device scif0_device = { .name = "sh-sci", - .id = -1, + .id = 0, .dev = { - .platform_data = sci_platform_data, + .platform_data = &scif0_platform_data, }, }; @@ -127,7 +123,7 @@ static struct platform_device tmu2_device = { }; static struct platform_device *sh4202_devices[] __initdata = { - &sci_device, + &scif0_device, &tmu0_device, &tmu1_device, &tmu2_device, @@ -141,6 +137,7 @@ static int __init sh4202_devices_setup(void) arch_initcall(sh4202_devices_setup); static struct platform_device *sh4202_early_devices[] __initdata = { + &scif0_device, &tmu0_device, &tmu1_device, &tmu2_device, diff --git a/arch/sh/kernel/cpu/sh4/setup-sh7750.c b/arch/sh/kernel/cpu/sh4/setup-sh7750.c index 1b8b122e8f3..b2a9df1af64 100644 --- a/arch/sh/kernel/cpu/sh4/setup-sh7750.c +++ b/arch/sh/kernel/cpu/sh4/setup-sh7750.c @@ -35,29 +35,33 @@ static struct platform_device rtc_device = { .resource = rtc_resources, }; -static struct plat_sci_port sci_platform_data[] = { - { -#ifndef CONFIG_SH_RTS7751R2D - .mapbase = 0xffe00000, - .flags = UPF_BOOT_AUTOCONF, - .type = PORT_SCI, - .irqs = { 23, 23, 23, 0 }, - }, { -#endif - .mapbase = 0xffe80000, - .flags = UPF_BOOT_AUTOCONF, - .type = PORT_SCIF, - .irqs = { 40, 40, 40, 40 }, - }, { - .flags = 0, - } +static struct plat_sci_port scif0_platform_data = { + .mapbase = 0xffe00000, + .flags = UPF_BOOT_AUTOCONF, + .type = PORT_SCI, + .irqs = { 23, 23, 23, 0 }, }; -static struct platform_device sci_device = { +static struct platform_device scif0_device = { .name = "sh-sci", - .id = -1, + .id = 0, + .dev = { + .platform_data = &scif0_platform_data, + }, +}; + +static struct plat_sci_port scif1_platform_data = { + .mapbase = 0xffe80000, + .flags = UPF_BOOT_AUTOCONF, + .type = PORT_SCIF, + .irqs = { 40, 40, 40, 40 }, +}; + +static struct platform_device scif1_device = { + .name = "sh-sci", + .id = 1, .dev = { - .platform_data = sci_platform_data, + .platform_data = &scif1_platform_data, }, }; @@ -221,8 +225,9 @@ static struct platform_device tmu4_device = { #endif static struct platform_device *sh7750_devices[] __initdata = { + &scif0_device, + &scif1_device, &rtc_device, - &sci_device, &tmu0_device, &tmu1_device, &tmu2_device, @@ -242,6 +247,8 @@ static int __init sh7750_devices_setup(void) arch_initcall(sh7750_devices_setup); static struct platform_device *sh7750_early_devices[] __initdata = { + &scif0_device, + &scif1_device, &tmu0_device, &tmu1_device, &tmu2_device, diff --git a/arch/sh/kernel/cpu/sh4/setup-sh7760.c b/arch/sh/kernel/cpu/sh4/setup-sh7760.c index 7fbb7be9284..5b74cc0b43d 100644 --- a/arch/sh/kernel/cpu/sh4/setup-sh7760.c +++ b/arch/sh/kernel/cpu/sh4/setup-sh7760.c @@ -126,37 +126,63 @@ static struct intc_vect vectors_irq[] __initdata = { static DECLARE_INTC_DESC(intc_desc_irq, "sh7760-irq", vectors_irq, groups, mask_registers, prio_registers, NULL); -static struct plat_sci_port sci_platform_data[] = { - { - .mapbase = 0xfe600000, - .flags = UPF_BOOT_AUTOCONF, - .type = PORT_SCIF, - .irqs = { 52, 53, 55, 54 }, - }, { - .mapbase = 0xfe610000, - .flags = UPF_BOOT_AUTOCONF, - .type = PORT_SCIF, - .irqs = { 72, 73, 75, 74 }, - }, { - .mapbase = 0xfe620000, - .flags = UPF_BOOT_AUTOCONF, - .type = PORT_SCIF, - .irqs = { 76, 77, 79, 78 }, - }, { - .mapbase = 0xfe480000, - .flags = UPF_BOOT_AUTOCONF, - .type = PORT_SCI, - .irqs = { 80, 81, 82, 0 }, - }, { - .flags = 0, - } +static struct plat_sci_port scif0_platform_data = { + .mapbase = 0xfe600000, + .flags = UPF_BOOT_AUTOCONF, + .type = PORT_SCIF, + .irqs = { 52, 53, 55, 54 }, +}; + +static struct platform_device scif0_device = { + .name = "sh-sci", + .id = 0, + .dev = { + .platform_data = &scif0_platform_data, + }, +}; + +static struct plat_sci_port scif1_platform_data = { + .mapbase = 0xfe610000, + .flags = UPF_BOOT_AUTOCONF, + .type = PORT_SCIF, + .irqs = { 72, 73, 75, 74 }, +}; + +static struct platform_device scif1_device = { + .name = "sh-sci", + .id = 1, + .dev = { + .platform_data = &scif1_platform_data, + }, +}; + +static struct plat_sci_port scif2_platform_data = { + .mapbase = 0xfe620000, + .flags = UPF_BOOT_AUTOCONF, + .type = PORT_SCIF, + .irqs = { 76, 77, 79, 78 }, +}; + +static struct platform_device scif2_device = { + .name = "sh-sci", + .id = 2, + .dev = { + .platform_data = &scif2_platform_data, + }, +}; + +static struct plat_sci_port scif3_platform_data = { + .mapbase = 0xfe480000, + .flags = UPF_BOOT_AUTOCONF, + .type = PORT_SCI, + .irqs = { 80, 81, 82, 0 }, }; -static struct platform_device sci_device = { +static struct platform_device scif3_device = { .name = "sh-sci", - .id = -1, + .id = 3, .dev = { - .platform_data = sci_platform_data, + .platform_data = &scif3_platform_data, }, }; @@ -254,7 +280,10 @@ static struct platform_device tmu2_device = { static struct platform_device *sh7760_devices[] __initdata = { - &sci_device, + &scif0_device, + &scif1_device, + &scif2_device, + &scif3_device, &tmu0_device, &tmu1_device, &tmu2_device, @@ -268,6 +297,10 @@ static int __init sh7760_devices_setup(void) arch_initcall(sh7760_devices_setup); static struct platform_device *sh7760_early_devices[] __initdata = { + &scif0_device, + &scif1_device, + &scif2_device, + &scif3_device, &tmu0_device, &tmu1_device, &tmu2_device, |