diff options
Diffstat (limited to 'arch/arm')
181 files changed, 4977 insertions, 3914 deletions
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 6344543974f..32cbf3e888f 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -10,11 +10,12 @@ config ARM select GENERIC_ATOMIC64 if (!CPU_32v6K || !AEABI) select HAVE_OPROFILE if (HAVE_PERF_EVENTS) select HAVE_ARCH_KGDB - select HAVE_KPROBES if (!XIP_KERNEL) + select HAVE_KPROBES if (!XIP_KERNEL && !THUMB2_KERNEL) select HAVE_KRETPROBES if (HAVE_KPROBES) select HAVE_FUNCTION_TRACER if (!XIP_KERNEL) select HAVE_FTRACE_MCOUNT_RECORD if (!XIP_KERNEL) select HAVE_DYNAMIC_FTRACE if (!XIP_KERNEL) + select HAVE_FUNCTION_GRAPH_TRACER if (!THUMB2_KERNEL) select HAVE_GENERIC_DMA_COHERENT select HAVE_KERNEL_GZIP select HAVE_KERNEL_LZO @@ -24,6 +25,7 @@ config ARM select PERF_USE_VMALLOC select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_HW_BREAKPOINT if (PERF_EVENTS && (CPU_V6 || CPU_V7)) + select HAVE_C_RECORDMCOUNT help The ARM series is a line of low-power-consumption RISC chip designs licensed by ARM Ltd and targeted at embedded applications and @@ -41,6 +43,9 @@ config MIGHT_HAVE_PCI config SYS_SUPPORTS_APM_EMULATION bool +config HAVE_SCHED_CLOCK + bool + config GENERIC_GPIO bool @@ -236,6 +241,7 @@ config ARCH_REALVIEW bool "ARM Ltd. RealView family" select ARM_AMBA select CLKDEV_LOOKUP + select HAVE_SCHED_CLOCK select ICST select GENERIC_CLOCKEVENTS select ARCH_WANT_OPTIONAL_GPIOLIB @@ -250,6 +256,7 @@ config ARCH_VERSATILE select ARM_AMBA select ARM_VIC select CLKDEV_LOOKUP + select HAVE_SCHED_CLOCK select ICST select GENERIC_CLOCKEVENTS select ARCH_WANT_OPTIONAL_GPIOLIB @@ -266,6 +273,7 @@ config ARCH_VEXPRESS select CLKDEV_LOOKUP select GENERIC_CLOCKEVENTS select HAVE_CLK + select HAVE_SCHED_CLOCK select ICST select PLAT_VERSATILE help @@ -438,6 +446,7 @@ config ARCH_IXP4XX select CPU_XSCALE select GENERIC_GPIO select GENERIC_CLOCKEVENTS + select HAVE_SCHED_CLOCK select MIGHT_HAVE_PCI select DMABOUNCE if PCI help @@ -514,6 +523,7 @@ config ARCH_MMP select ARCH_REQUIRE_GPIOLIB select CLKDEV_LOOKUP select GENERIC_CLOCKEVENTS + select HAVE_SCHED_CLOCK select TICK_ONESHOT select PLAT_PXA select SPARSE_IRQ @@ -566,11 +576,12 @@ config ARCH_NUC93X config ARCH_TEGRA bool "NVIDIA Tegra" + select CLKDEV_LOOKUP select GENERIC_TIME select GENERIC_CLOCKEVENTS select GENERIC_GPIO select HAVE_CLK - select CLKDEV_LOOKUP + select HAVE_SCHED_CLOCK select ARCH_HAS_BARRIERS if CACHE_L2X0 select ARCH_HAS_CPUFREQ help @@ -593,6 +604,7 @@ config ARCH_PXA select CLKDEV_LOOKUP select ARCH_REQUIRE_GPIOLIB select GENERIC_CLOCKEVENTS + select HAVE_SCHED_CLOCK select TICK_ONESHOT select PLAT_PXA select SPARSE_IRQ @@ -641,6 +653,7 @@ config ARCH_SA1100 select CPU_FREQ select GENERIC_CLOCKEVENTS select HAVE_CLK + select HAVE_SCHED_CLOCK select TICK_ONESHOT select ARCH_REQUIRE_GPIOLIB help @@ -787,6 +800,7 @@ config ARCH_U300 bool "ST-Ericsson U300 Series" depends on MMU select CPU_ARM926T + select HAVE_SCHED_CLOCK select HAVE_TCM select ARM_AMBA select ARM_VIC @@ -835,6 +849,7 @@ config ARCH_OMAP select ARCH_REQUIRE_GPIOLIB select ARCH_HAS_CPUFREQ select GENERIC_CLOCKEVENTS + select HAVE_SCHED_CLOCK select ARCH_HAS_HOLES_MEMORYMODEL help Support for TI's OMAP platform (OMAP1/2/3/4). @@ -988,9 +1003,11 @@ config ARCH_ACORN config PLAT_IOP bool select GENERIC_CLOCKEVENTS + select HAVE_SCHED_CLOCK config PLAT_ORION bool + select HAVE_SCHED_CLOCK config PLAT_PXA bool @@ -1186,6 +1203,12 @@ config PCI_DOMAINS bool depends on PCI +config PCI_NANOENGINE + bool "BSE nanoEngine PCI support" + depends on SA1100_NANOENGINE + help + Enable PCI on the BSE nanoEngine board. + config PCI_SYSCALL def_bool PCI @@ -1216,10 +1239,11 @@ config SMP depends on EXPERIMENTAL depends on GENERIC_CLOCKEVENTS depends on REALVIEW_EB_ARM11MP || REALVIEW_EB_A9MP || \ - MACH_REALVIEW_PB11MP || MACH_REALVIEW_PBX || ARCH_OMAP4 ||\ - ARCH_S5PV310 || ARCH_TEGRA || ARCH_U8500 || ARCH_VEXPRESS_CA9X4 + MACH_REALVIEW_PB11MP || MACH_REALVIEW_PBX || ARCH_OMAP4 || \ + ARCH_S5PV310 || ARCH_TEGRA || ARCH_U8500 || ARCH_VEXPRESS_CA9X4 || \ + ARCH_MSM_SCORPIONMP select USE_GENERIC_SMP_HELPERS - select HAVE_ARM_SCU + select HAVE_ARM_SCU if !ARCH_MSM_SCORPIONMP help This enables support for systems with more than one CPU. If you have a system with only one CPU, like most personal computers, say N. If @@ -1295,6 +1319,7 @@ config NR_CPUS config HOTPLUG_CPU bool "Support for hot-pluggable CPUs (EXPERIMENTAL)" depends on SMP && HOTPLUG && EXPERIMENTAL + depends on !ARCH_MSM help Say Y here to experiment with turning CPUs off and on. CPUs can be controlled through /sys/devices/system/cpu. @@ -1303,7 +1328,7 @@ config LOCAL_TIMERS bool "Use local timer interrupts" depends on SMP default y - select HAVE_ARM_TWD + select HAVE_ARM_TWD if !ARCH_MSM_SCORPIONMP help Enable support for local timers on SMP platforms, rather then the legacy IPI broadcast method. Local timers allows the system @@ -1323,7 +1348,7 @@ config HZ config THUMB2_KERNEL bool "Compile the kernel in Thumb-2 mode (EXPERIMENTAL)" - depends on CPU_V7 && EXPERIMENTAL + depends on CPU_V7 && !CPU_V6 && EXPERIMENTAL select AEABI select ARM_ASM_UNIFIED help @@ -1663,6 +1688,19 @@ config ATAGS_PROC Should the atags used to boot the kernel be exported in an "atags" file in procfs. Useful with kexec. +config CRASH_DUMP + bool "Build kdump crash kernel (EXPERIMENTAL)" + depends on EXPERIMENTAL + help + Generate crash dump after being started by kexec. This should + be normally only set in special crash dump kernels which are + loaded in the main kernel with kexec-tools into a specially + reserved region and then later executed after a crash by + kdump/kexec. The crash dump kernel must be compiled to a + memory address not used by the main kernel + + For more details see Documentation/kdump/kdump.txt + config AUTO_ZRELADDR bool "Auto calculation of the decompressed kernel image address" depends on !ZBOOT_ROM && !ARCH_U300 @@ -1772,7 +1810,7 @@ comment "At least one emulation must be selected" config FPE_NWFPE bool "NWFPE math emulation" - depends on !AEABI || OABI_COMPAT + depends on (!AEABI || OABI_COMPAT) && !THUMB2_KERNEL ---help--- Say Y to include the NWFPE floating point emulator in the kernel. This is necessary to run most binaries. Linux does not currently diff --git a/arch/arm/Kconfig.debug b/arch/arm/Kconfig.debug index a5775a56f0b..494224a9b45 100644 --- a/arch/arm/Kconfig.debug +++ b/arch/arm/Kconfig.debug @@ -23,7 +23,7 @@ config STRICT_DEVMEM config FRAME_POINTER bool depends on !THUMB2_KERNEL - default y if !ARM_UNWIND + default y if !ARM_UNWIND || FUNCTION_GRAPH_TRACER help If you say N here, the resulting kernel will be slightly smaller and faster. However, if neither FRAME_POINTER nor ARM_UNWIND are enabled, diff --git a/arch/arm/boot/Makefile b/arch/arm/boot/Makefile index 4a590f4113e..4d26f2c52a7 100644 --- a/arch/arm/boot/Makefile +++ b/arch/arm/boot/Makefile @@ -70,12 +70,7 @@ else $(obj)/uImage: LOADADDR=$(ZRELADDR) endif -ifeq ($(CONFIG_THUMB2_KERNEL),y) -# Set bit 0 to 1 so that "mov pc, rx" switches to Thumb-2 mode -$(obj)/uImage: STARTADDR=$(shell echo $(LOADADDR) | sed -e "s/.$$/1/") -else $(obj)/uImage: STARTADDR=$(LOADADDR) -endif $(obj)/uImage: $(obj)/zImage FORCE $(call if_changed,uimage) diff --git a/arch/arm/boot/bootp/init.S b/arch/arm/boot/bootp/init.S index 8b0de41c3dc..78b50807516 100644 --- a/arch/arm/boot/bootp/init.S +++ b/arch/arm/boot/bootp/init.S @@ -73,6 +73,8 @@ move: ldmia r4!, {r7 - r10} @ move 32-bytes at a time .size _start, . - _start + .align + .type data,#object data: .word initrd_start @ source initrd address .word initrd_phys @ destination initrd address diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile index 65a7c1c588a..0a8f748e506 100644 --- a/arch/arm/boot/compressed/Makefile +++ b/arch/arm/boot/compressed/Makefile @@ -45,6 +45,10 @@ else endif endif +ifeq ($(CONFIG_ARCH_SHMOBILE),y) +OBJS += head-shmobile.o +endif + # # We now have a PIC decompressor implementation. Decompressors running # from RAM should not define ZTEXTADDR. Decompressors running directly diff --git a/arch/arm/boot/compressed/head-shmobile.S b/arch/arm/boot/compressed/head-shmobile.S new file mode 100644 index 00000000000..30973b76e6a --- /dev/null +++ b/arch/arm/boot/compressed/head-shmobile.S @@ -0,0 +1,53 @@ +/* + * The head-file for SH-Mobile ARM platforms + * + * Kuninori Morimoto <kuninori.morimoto.gx@renesas.com> + * Simon Horman <horms@verge.net.au> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifdef CONFIG_ZBOOT_ROM + + .section ".start", "ax" + + /* load board-specific initialization code */ +#include <mach/zboot.h> + + b 1f +__atags:@ tag #1 + .long 12 @ tag->hdr.size = tag_size(tag_core); + .long 0x54410001 @ tag->hdr.tag = ATAG_CORE; + .long 0 @ tag->u.core.flags = 0; + .long 0 @ tag->u.core.pagesize = 0; + .long 0 @ tag->u.core.rootdev = 0; + @ tag #2 + .long 8 @ tag->hdr.size = tag_size(tag_mem32); + .long 0x54410002 @ tag->hdr.tag = ATAG_MEM; + .long CONFIG_MEMORY_SIZE @ tag->u.mem.size = CONFIG_MEMORY_SIZE; + .long CONFIG_MEMORY_START @ @ tag->u.mem.start = CONFIG_MEMORY_START; + @ tag #3 + .long 0 @ tag->hdr.size = 0 + .long 0 @ tag->hdr.tag = ATAG_NONE; +1: + + /* Set board ID necessary for boot */ + ldr r7, 1f @ Set machine type register + adr r8, __atags @ Set atag register + b 2f + +1 : .long MACH_TYPE +2 : + +#endif /* CONFIG_ZBOOT_ROM */ diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S index 9be21ba648c..7193884ed8b 100644 --- a/arch/arm/boot/compressed/head.S +++ b/arch/arm/boot/compressed/head.S @@ -125,9 +125,13 @@ wait: mrc p14, 0, pc, c0, c1, 0 * sort out different calling conventions */ .align + .arm @ Always enter in ARM state start: .type start,#function - .rept 8 + THUMB( adr r12, BSYM(1f) ) + THUMB( bx r12 ) + THUMB( .rept 6 ) + ARM( .rept 8 ) mov r0, r0 .endr @@ -135,6 +139,7 @@ start: .word 0x016f2818 @ Magic numbers to help the loader .word start @ absolute load/run zImage address .word _edata @ zImage end address + THUMB( .thumb ) 1: mov r7, r1 @ save architecture ID mov r8, r2 @ save atags pointer @@ -174,7 +179,8 @@ not_angel: ldr sp, [r0, #28] #ifdef CONFIG_AUTO_ZRELADDR @ determine final kernel image address - and r4, pc, #0xf8000000 + mov r4, pc + and r4, r4, #0xf8000000 add r4, r4, #TEXT_OFFSET #else ldr r4, =zreladdr @@ -445,7 +451,8 @@ __setup_mmu: sub r3, r4, #16384 @ Page directory size */ mov r1, #0x1e orr r1, r1, #3 << 10 - mov r2, pc, lsr #20 + mov r2, pc + mov r2, r2, lsr #20 orr r1, r1, r2, lsl #20 add r0, r3, r2, lsl #2 str r1, [r0], #4 diff --git a/arch/arm/common/Makefile b/arch/arm/common/Makefile index 799e140274f..e7521bca2c3 100644 --- a/arch/arm/common/Makefile +++ b/arch/arm/common/Makefile @@ -16,3 +16,5 @@ obj-$(CONFIG_SHARP_SCOOP) += scoop.o obj-$(CONFIG_ARCH_IXP2000) += uengine.o obj-$(CONFIG_ARCH_IXP23XX) += uengine.o obj-$(CONFIG_PCI_HOST_ITE8152) += it8152.o +obj-$(CONFIG_COMMON_CLKDEV) += clkdev.o +obj-$(CONFIG_ARM_TIMER_SP804) += timer-sp.o diff --git a/arch/arm/common/gic.c b/arch/arm/common/gic.c index 772f95f1aec..0b89ef00133 100644 --- a/arch/arm/common/gic.c +++ b/arch/arm/common/gic.c @@ -35,6 +35,9 @@ static DEFINE_SPINLOCK(irq_controller_lock); +/* Address of GIC 0 CPU interface */ +void __iomem *gic_cpu_base_addr __read_mostly; + struct gic_chip_data { unsigned int irq_offset; void __iomem *dist_base; @@ -45,7 +48,7 @@ struct gic_chip_data { #define MAX_GIC_NR 1 #endif -static struct gic_chip_data gic_data[MAX_GIC_NR]; +static struct gic_chip_data gic_data[MAX_GIC_NR] __read_mostly; static inline void __iomem *gic_dist_base(unsigned int irq) { @@ -146,9 +149,15 @@ static int gic_set_cpu(unsigned int irq, const struct cpumask *mask_val) unsigned int shift = (irq % 4) * 8; unsigned int cpu = cpumask_first(mask_val); u32 val; + struct irq_desc *desc; spin_lock(&irq_controller_lock); - irq_desc[irq].node = cpu; + desc = irq_to_desc(irq); + if (desc == NULL) { + spin_unlock(&irq_controller_lock); + return -EINVAL; + } + desc->node = cpu; val = readl(reg) & ~(0xff << shift); val |= 1 << (cpu + shift); writel(val, reg); @@ -207,68 +216,65 @@ void __init gic_cascade_irq(unsigned int gic_nr, unsigned int irq) set_irq_chained_handler(irq, gic_handle_cascade_irq); } -void __init gic_dist_init(unsigned int gic_nr, void __iomem *base, - unsigned int irq_start) +static void __init gic_dist_init(struct gic_chip_data *gic, + unsigned int irq_start) { - unsigned int max_irq, i; + unsigned int gic_irqs, irq_limit, i; + void __iomem *base = gic->dist_base; u32 cpumask = 1 << smp_processor_id(); - if (gic_nr >= MAX_GIC_NR) - BUG(); - cpumask |= cpumask << 8; cpumask |= cpumask << 16; - gic_data[gic_nr].dist_base = base; - gic_data[gic_nr].irq_offset = (irq_start - 1) & ~31; - writel(0, base + GIC_DIST_CTRL); /* * Find out how many interrupts are supported. - */ - max_irq = readl(base + GIC_DIST_CTR) & 0x1f; - max_irq = (max_irq + 1) * 32; - - /* * The GIC only supports up to 1020 interrupt sources. - * Limit this to either the architected maximum, or the - * platform maximum. */ - if (max_irq > max(1020, NR_IRQS)) - max_irq = max(1020, NR_IRQS); + gic_irqs = readl(base + GIC_DIST_CTR) & 0x1f; + gic_irqs = (gic_irqs + 1) * 32; + if (gic_irqs > 1020) + gic_irqs = 1020; /* * Set all global interrupts to be level triggered, active low. */ - for (i = 32; i < max_irq; i += 16) + for (i = 32; i < gic_irqs; i += 16) writel(0, base + GIC_DIST_CONFIG + i * 4 / 16); /* * Set all global interrupts to this CPU only. */ - for (i = 32; i < max_irq; i += 4) + for (i = 32; i < gic_irqs; i += 4) writel(cpumask, base + GIC_DIST_TARGET + i * 4 / 4); /* * Set priority on all global interrupts. */ - for (i = 32; i < max_irq; i += 4) + for (i = 32; i < gic_irqs; i += 4) writel(0xa0a0a0a0, base + GIC_DIST_PRI + i * 4 / 4); /* * Disable all interrupts. Leave the PPI and SGIs alone * as these enables are banked registers. */ - for (i = 32; i < max_irq; i += 32) + for (i = 32; i < gic_irqs; i += 32) writel(0xffffffff, base + GIC_DIST_ENABLE_CLEAR + i * 4 / 32); /* + * Limit number of interrupts registered to the platform maximum + */ + irq_limit = gic->irq_offset + gic_irqs; + if (WARN_ON(irq_limit > NR_IRQS)) + irq_limit = NR_IRQS; + + /* * Setup the Linux IRQ subsystem. */ - for (i = irq_start; i < gic_data[gic_nr].irq_offset + max_irq; i++) { + for (i = irq_start; i < irq_limit; i++) { set_irq_chip(i, &gic_chip); - set_irq_chip_data(i, &gic_data[gic_nr]); + set_irq_chip_data(i, gic); set_irq_handler(i, handle_level_irq); set_irq_flags(i, IRQF_VALID | IRQF_PROBE); } @@ -276,19 +282,12 @@ void __init gic_dist_init(unsigned int gic_nr, void __iomem *base, writel(1, base + GIC_DIST_CTRL); } -void __cpuinit gic_cpu_init(unsigned int gic_nr, void __iomem *base) +static void __cpuinit gic_cpu_init(struct gic_chip_data *gic) { - void __iomem *dist_base; + void __iomem *dist_base = gic->dist_base; + void __iomem *base = gic->cpu_base; int i; - if (gic_nr >= MAX_GIC_NR) - BUG(); - - dist_base = gic_data[gic_nr].dist_base; - BUG_ON(!dist_base); - - gic_data[gic_nr].cpu_base = base; - /* * Deal with the banked PPI and SGI interrupts - disable all * PPI interrupts, ensure all SGI interrupts are enabled. @@ -306,6 +305,42 @@ void __cpuinit gic_cpu_init(unsigned int gic_nr, void __iomem *base) writel(1, base + GIC_CPU_CTRL); } +void __init gic_init(unsigned int gic_nr, unsigned int irq_start, + void __iomem *dist_base, void __iomem *cpu_base) +{ + struct gic_chip_data *gic; + + BUG_ON(gic_nr >= MAX_GIC_NR); + + gic = &gic_data[gic_nr]; + gic->dist_base = dist_base; + gic->cpu_base = cpu_base; + gic->irq_offset = (irq_start - 1) & ~31; + + if (gic_nr == 0) + gic_cpu_base_addr = cpu_base; + + gic_dist_init(gic, irq_start); + gic_cpu_init(gic); +} + +void __cpuinit gic_secondary_init(unsigned int gic_nr) +{ + BUG_ON(gic_nr >= MAX_GIC_NR); + + gic_cpu_init(&gic_data[gic_nr]); +} + +void __cpuinit gic_enable_ppi(unsigned int irq) +{ + unsigned long flags; + + local_irq_save(flags); + irq_to_desc(irq)->status |= IRQ_NOPROBE; + gic_unmask_irq(irq); + local_irq_restore(flags); +} + #ifdef CONFIG_SMP void gic_raise_softirq(const struct cpumask *mask, unsigned int irq) { diff --git a/arch/arm/common/it8152.c b/arch/arm/common/it8152.c index 1bec96e8519..42ff90b46df 100644 --- a/arch/arm/common/it8152.c +++ b/arch/arm/common/it8152.c @@ -352,3 +352,4 @@ struct pci_bus * __init it8152_pci_scan_bus(int nr, struct pci_sys_data *sys) return pci_scan_bus(nr, &it8152_ops, sys); } +EXPORT_SYMBOL(dma_set_coherent_mask); diff --git a/arch/arm/plat-versatile/timer-sp.c b/arch/arm/common/timer-sp.c index fb0d1c29971..6ef3342153b 100644 --- a/arch/arm/plat-versatile/timer-sp.c +++ b/arch/arm/common/timer-sp.c @@ -1,5 +1,5 @@ /* - * linux/arch/arm/plat-versatile/timer-sp.c + * linux/arch/arm/common/timer-sp.c * * Copyright (C) 1999 - 2003 ARM Limited * Copyright (C) 2000 Deep Blue Solutions Ltd @@ -26,8 +26,6 @@ #include <asm/hardware/arm_timer.h> -#include <plat/timer-sp.h> - /* * These timers are currently always setup to be clocked at 1MHz. */ @@ -46,7 +44,6 @@ static struct clocksource clocksource_sp804 = { .rating = 200, .read = sp804_read, .mask = CLOCKSOURCE_MASK(32), - .shift = 20, .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; @@ -63,8 +60,7 @@ void __init sp804_clocksource_init(void __iomem *base) writel(TIMER_CTRL_32BIT | TIMER_CTRL_ENABLE | TIMER_CTRL_PERIODIC, clksrc_base + TIMER_CTRL); - cs->mult = clocksource_khz2mult(TIMER_FREQ_KHZ, cs->shift); - clocksource_register(cs); + clocksource_register_khz(cs, TIMER_FREQ_KHZ); } diff --git a/arch/arm/include/asm/elf.h b/arch/arm/include/asm/elf.h index 8bb66bca2e3..c3cd8755e64 100644 --- a/arch/arm/include/asm/elf.h +++ b/arch/arm/include/asm/elf.h @@ -99,6 +99,8 @@ struct elf32_hdr; extern int elf_check_arch(const struct elf32_hdr *); #define elf_check_arch elf_check_arch +#define vmcore_elf64_check_arch(x) (0) + extern int arm_elf_read_implies_exec(const struct elf32_hdr *, int); #define elf_read_implies_exec(ex,stk) arm_elf_read_implies_exec(&(ex), stk) diff --git a/arch/arm/include/asm/hardware/entry-macro-gic.S b/arch/arm/include/asm/hardware/entry-macro-gic.S new file mode 100644 index 00000000000..c115b82fe80 --- /dev/null +++ b/arch/arm/include/asm/hardware/entry-macro-gic.S @@ -0,0 +1,75 @@ +/* + * arch/arm/include/asm/hardware/entry-macro-gic.S + * + * Low-level IRQ helper macros for GIC + * + * This file is licensed under the terms of the GNU General Public + * License version 2. This program is licensed "as is" without any + * warranty of any kind, whether express or implied. + */ + +#include <asm/hardware/gic.h> + +#ifndef HAVE_GET_IRQNR_PREAMBLE + .macro get_irqnr_preamble, base, tmp + ldr \base, =gic_cpu_base_addr + ldr \base, [\base] + .endm +#endif + +/* + * The interrupt numbering scheme is defined in the + * interrupt controller spec. To wit: + * + * Interrupts 0-15 are IPI + * 16-28 are reserved + * 29-31 are local. We allow 30 to be used for the watchdog. + * 32-1020 are global + * 1021-1022 are reserved + * 1023 is "spurious" (no interrupt) + * + * For now, we ignore all local interrupts so only return an interrupt if it's + * between 30 and 1020. The test_for_ipi routine below will pick up on IPIs. + * + * A simple read from the controller will tell us the number of the highest + * priority enabled interrupt. We then just need to check whether it is in the + * valid range for an IRQ (30-1020 inclusive). + */ + + .macro get_irqnr_and_base, irqnr, irqstat, base, tmp + + ldr \irqstat, [\base, #GIC_CPU_INTACK] + /* bits 12-10 = src CPU, 9-0 = int # */ + + ldr \tmp, =1021 + bic \irqnr, \irqstat, #0x1c00 + cmp \irqnr, #29 + cmpcc \irqnr, \irqnr + cmpne \irqnr, \tmp + cmpcs \irqnr, \irqnr + .endm + +/* We assume that irqstat (the raw value of the IRQ acknowledge + * register) is preserved from the macro above. + * If there is an IPI, we immediately signal end of interrupt on the + * controller, since this requires the original irqstat value which + * we won't easily be able to recreate later. + */ + + .macro test_for_ipi, irqnr, irqstat, base, tmp + bic \irqnr, \irqstat, #0x1c00 + cmp \irqnr, #16 + strcc \irqstat, [\base, #GIC_CPU_EOI] + cmpcs \irqnr, \irqnr + .endm + +/* As above, this assumes that irqstat and base are preserved.. */ + + .macro test_for_ltirq, irqnr, irqstat, base, tmp + bic \irqnr, \irqstat, #0x1c00 + mov \tmp, #0 + cmp \irqnr, #29 + moveq \tmp, #1 + streq \irqstat, [\base, #GIC_CPU_EOI] + cmp \tmp, #0 + .endm diff --git a/arch/arm/include/asm/hardware/gic.h b/arch/arm/include/asm/hardware/gic.h index 7f34333bb54..84557d32100 100644 --- a/arch/arm/include/asm/hardware/gic.h +++ b/arch/arm/include/asm/hardware/gic.h @@ -33,10 +33,13 @@ #define GIC_DIST_SOFTINT 0xf00 #ifndef __ASSEMBLY__ -void gic_dist_init(unsigned int gic_nr, void __iomem *base, unsigned int irq_start); -void gic_cpu_init(unsigned int gic_nr, void __iomem *base); +extern void __iomem *gic_cpu_base_addr; + +void gic_init(unsigned int, unsigned int, void __iomem *, void __iomem *); +void gic_secondary_init(unsigned int); void gic_cascade_irq(unsigned int gic_nr, unsigned int irq); void gic_raise_softirq(const struct cpumask *mask, unsigned int irq); +void gic_enable_ppi(unsigned int); #endif #endif diff --git a/arch/arm/include/asm/hardware/it8152.h b/arch/arm/include/asm/hardware/it8152.h index 21fa272301f..b2f95c72287 100644 --- a/arch/arm/include/asm/hardware/it8152.h +++ b/arch/arm/include/asm/hardware/it8152.h @@ -76,6 +76,7 @@ extern unsigned long it8152_base_address; IT8152_PD_IRQ(0) Audio controller (ACR) */ #define IT8152_IRQ(x) (IRQ_BOARD_START + (x)) +#define IT8152_LAST_IRQ (IRQ_BOARD_START + 40) /* IRQ-sources in 3 groups - local devices, LPC (serial), and external PCI */ #define IT8152_LD_IRQ_COUNT 9 diff --git a/arch/arm/plat-versatile/include/plat/timer-sp.h b/arch/arm/include/asm/hardware/timer-sp.h index 21e75e30d49..21e75e30d49 100644 --- a/arch/arm/plat-versatile/include/plat/timer-sp.h +++ b/arch/arm/include/asm/hardware/timer-sp.h diff --git a/arch/arm/include/asm/highmem.h b/arch/arm/include/asm/highmem.h index 1fc684e70ab..7080e2c8fa6 100644 --- a/arch/arm/include/asm/highmem.h +++ b/arch/arm/include/asm/highmem.h @@ -25,9 +25,6 @@ extern void *kmap_high(struct page *page); extern void *kmap_high_get(struct page *page); extern void kunmap_high(struct page *page); -extern void *kmap_high_l1_vipt(struct page *page, pte_t *saved_pte); -extern void kunmap_high_l1_vipt(struct page *page, pte_t saved_pte); - /* * The following functions are already defined by <linux/highmem.h> * when CONFIG_HIGHMEM is not set. diff --git a/arch/arm/include/asm/io.h b/arch/arm/include/asm/io.h index 815efa2d4e0..20e0f7c9e03 100644 --- a/arch/arm/include/asm/io.h +++ b/arch/arm/include/asm/io.h @@ -241,18 +241,15 @@ extern void _memset_io(volatile void __iomem *, int, size_t); * */ #ifndef __arch_ioremap -#define ioremap(cookie,size) __arm_ioremap(cookie, size, MT_DEVICE) -#define ioremap_nocache(cookie,size) __arm_ioremap(cookie, size, MT_DEVICE) -#define ioremap_cached(cookie,size) __arm_ioremap(cookie, size, MT_DEVICE_CACHED) -#define ioremap_wc(cookie,size) __arm_ioremap(cookie, size, MT_DEVICE_WC) -#define iounmap(cookie) __iounmap(cookie) -#else +#define __arch_ioremap __arm_ioremap +#define __arch_iounmap __iounmap +#endif + #define ioremap(cookie,size) __arch_ioremap((cookie), (size), MT_DEVICE) #define ioremap_nocache(cookie,size) __arch_ioremap((cookie), (size), MT_DEVICE) #define ioremap_cached(cookie,size) __arch_ioremap((cookie), (size), MT_DEVICE_CACHED) #define ioremap_wc(cookie,size) __arch_ioremap((cookie), (size), MT_DEVICE_WC) -#define iounmap(cookie) __arch_iounmap(cookie) -#endif +#define iounmap __arch_iounmap /* * io{read,write}{8,16,32} macros diff --git a/arch/arm/include/asm/kexec.h b/arch/arm/include/asm/kexec.h index 8ec9ef5c3c7..c0094d8edae 100644 --- a/arch/arm/include/asm/kexec.h +++ b/arch/arm/include/asm/kexec.h @@ -33,10 +33,20 @@ static inline void crash_setup_regs(struct pt_regs *newregs, if (oldregs) { memcpy(newregs, oldregs, sizeof(*newregs)); } else { - __asm__ __volatile__ ("stmia %0, {r0 - r15}" - : : "r" (&newregs->ARM_r0)); - __asm__ __volatile__ ("mrs %0, cpsr" - : "=r" (newregs->ARM_cpsr)); + __asm__ __volatile__ ( + "stmia %[regs_base], {r0-r12}\n\t" + "mov %[_ARM_sp], sp\n\t" + "str lr, %[_ARM_lr]\n\t" + "adr %[_ARM_pc], 1f\n\t" + "mrs %[_ARM_cpsr], cpsr\n\t" + "1:" + : [_ARM_pc] "=r" (newregs->ARM_pc), + [_ARM_cpsr] "=r" (newregs->ARM_cpsr), + [_ARM_sp] "=r" (newregs->ARM_sp), + [_ARM_lr] "=o" (newregs->ARM_lr) + : [regs_base] "r" (&newregs->ARM_r0) + : "memory" + ); } } diff --git a/arch/arm/include/asm/module.h b/arch/arm/include/asm/module.h index cbb0bc295d2..12c8e680cbf 100644 --- a/arch/arm/include/asm/module.h +++ b/arch/arm/include/asm/module.h @@ -8,11 +8,6 @@ struct unwind_table; #ifdef CONFIG_ARM_UNWIND -struct arm_unwind_mapping { - Elf_Shdr *unw_sec; - Elf_Shdr *sec_text; - struct unwind_table *unwind; -}; enum { ARM_SEC_INIT, ARM_SEC_DEVINIT, @@ -21,13 +16,13 @@ enum { ARM_SEC_DEVEXIT, ARM_SEC_MAX, }; +#endif + struct mod_arch_specific { - struct arm_unwind_mapping map[ARM_SEC_MAX]; -}; -#else -struct mod_arch_specific { -}; +#ifdef CONFIG_ARM_UNWIND + struct unwind_table *unwind[ARM_SEC_MAX]; #endif +}; /* * Include the ARM architecture version. diff --git a/arch/arm/include/asm/sched_clock.h b/arch/arm/include/asm/sched_clock.h new file mode 100644 index 00000000000..a84628be1a7 --- /dev/null +++ b/arch/arm/include/asm/sched_clock.h @@ -0,0 +1,118 @@ +/* + * sched_clock.h: support for extending counters to full 64-bit ns counter + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef ASM_SCHED_CLOCK +#define ASM_SCHED_CLOCK + +#include <linux/kernel.h> +#include <linux/types.h> + +struct clock_data { + u64 epoch_ns; + u32 epoch_cyc; + u32 epoch_cyc_copy; + u32 mult; + u32 shift; +}; + +#define DEFINE_CLOCK_DATA(name) struct clock_data name + +static inline u64 cyc_to_ns(u64 cyc, u32 mult, u32 shift) +{ + return (cyc * mult) >> shift; +} + +/* + * Atomically update the sched_clock epoch. Your update callback will + * be called from a timer before the counter wraps - read the current + * counter value, and call this function to safely move the epochs + * forward. Only use this from the update callback. + */ +static inline void update_sched_clock(struct clock_data *cd, u32 cyc, u32 mask) +{ + unsigned long flags; + u64 ns = cd->epoch_ns + + cyc_to_ns((cyc - cd->epoch_cyc) & mask, cd->mult, cd->shift); + + /* + * Write epoch_cyc and epoch_ns in a way that the update is + * detectable in cyc_to_fixed_sched_clock(). + */ + raw_local_irq_save(flags); + cd->epoch_cyc = cyc; + smp_wmb(); + cd->epoch_ns = ns; + smp_wmb(); + cd->epoch_cyc_copy = cyc; + raw_local_irq_restore(flags); +} + +/* + * If your clock rate is known at compile time, using this will allow + * you to optimize the mult/shift loads away. This is paired with + * init_fixed_sched_clock() to ensure that your mult/shift are correct. + */ +static inline unsigned long long cyc_to_fixed_sched_clock(struct clock_data *cd, + u32 cyc, u32 mask, u32 mult, u32 shift) +{ + u64 epoch_ns; + u32 epoch_cyc; + + /* + * Load the epoch_cyc and epoch_ns atomically. We do this by + * ensuring that we always write epoch_cyc, epoch_ns and + * epoch_cyc_copy in strict order, and read them in strict order. + * If epoch_cyc and epoch_cyc_copy are not equal, then we're in + * the middle of an update, and we should repeat the load. + */ + do { + epoch_cyc = cd->epoch_cyc; + smp_rmb(); + epoch_ns = cd->epoch_ns; + smp_rmb(); + } while (epoch_cyc != cd->epoch_cyc_copy); + + return epoch_ns + cyc_to_ns((cyc - epoch_cyc) & mask, mult, shift); +} + +/* + * Otherwise, you need to use this, which will obtain the mult/shift + * from the clock_data structure. Use init_sched_clock() with this. + */ +static inline unsigned long long cyc_to_sched_clock(struct clock_data *cd, + u32 cyc, u32 mask) +{ + return cyc_to_fixed_sched_clock(cd, cyc, mask, cd->mult, cd->shift); +} + +/* + * Initialize the clock data - calculate the appropriate multiplier + * and shift. Also setup a timer to ensure that the epoch is refreshed + * at the appropriate time interval, which will call your update + * handler. + */ +void init_sched_clock(struct clock_data *, void (*)(void), + unsigned int, unsigned long); + +/* + * Use this initialization function rather than init_sched_clock() if + * you're using cyc_to_fixed_sched_clock, which will warn if your + * constants are incorrect. + */ +static inline void init_fixed_sched_clock(struct clock_data *cd, + void (*update)(void), unsigned int bits, unsigned long rate, + u32 mult, u32 shift) +{ + init_sched_clock(cd, update, bits, rate); + if (cd->mult != mult || cd->shift != shift) { + pr_crit("sched_clock: wrong multiply/shift: %u>>%u vs calculated %u>>%u\n" + "sched_clock: fix multiply/shift to avoid scheduler hiccups\n", + mult, shift, cd->mult, cd->shift); + } +} + +#endif diff --git a/arch/arm/include/asm/sizes.h b/arch/arm/include/asm/sizes.h index 4fc1565e4f9..316bb2b2be3 100644 --- a/arch/arm/include/asm/sizes.h +++ b/arch/arm/include/asm/sizes.h @@ -13,9 +13,6 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -/* DO NOT EDIT!! - this file automatically generated - * from .s file by awk -f s2h.awk - */ /* Size definitions * Copyright (C) ARM Limited 1998. All rights reserved. */ @@ -25,6 +22,9 @@ /* handy sizes */ #define SZ_16 0x00000010 +#define SZ_32 0x00000020 +#define SZ_64 0x00000040 +#define SZ_128 0x00000080 #define SZ_256 0x00000100 #define SZ_512 0x00000200 diff --git a/arch/arm/include/asm/system.h b/arch/arm/include/asm/system.h index 9ab8d7e6473..97f6d60297d 100644 --- a/arch/arm/include/asm/system.h +++ b/arch/arm/include/asm/system.h @@ -63,6 +63,11 @@ #include <asm/outercache.h> #define __exception __attribute__((section(".exception.text"))) +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +#define __exception_irq_entry __irq_entry +#else +#define __exception_irq_entry __exception +#endif struct thread_info; struct task_struct; @@ -157,6 +162,7 @@ extern unsigned int user_debug; #define rmb() dmb() #define wmb() mb() #else +#include <asm/memory.h> #define mb() do { if (arch_is_coherent()) dmb(); else barrier(); } while (0) #define rmb() do { if (arch_is_coherent()) dmb(); else barrier(); } while (0) #define wmb() do { if (arch_is_coherent()) dmb(); else barrier(); } while (0) diff --git a/arch/arm/include/asm/traps.h b/arch/arm/include/asm/traps.h index af5d5d1388c..1b960d5ef6a 100644 --- a/arch/arm/include/asm/traps.h +++ b/arch/arm/include/asm/traps.h @@ -15,13 +15,32 @@ struct undef_hook { void register_undef_hook(struct undef_hook *hook); void unregister_undef_hook(struct undef_hook *hook); +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +static inline int __in_irqentry_text(unsigned long ptr) +{ + extern char __irqentry_text_start[]; + extern char __irqentry_text_end[]; + + return ptr >= (unsigned long)&__irqentry_text_start && + ptr < (unsigned long)&__irqentry_text_end; +} +#else +static inline int __in_irqentry_text(unsigned long ptr) +{ + return 0; +} +#endif + static inline int in_exception_text(unsigned long ptr) { extern char __exception_text_start[]; extern char __exception_text_end[]; + int in; + + in = ptr >= (unsigned long)&__exception_text_start && + ptr < (unsigned long)&__exception_text_end; - return ptr >= (unsigned long)&__exception_text_start && - ptr < (unsigned long)&__exception_text_end; + return in ? : __in_irqentry_text(ptr); } extern void __init early_trap_init(void); diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index 0e106795702..7c33e6f29bc 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile @@ -5,7 +5,7 @@ CPPFLAGS_vmlinux.lds := -DTEXT_OFFSET=$(TEXT_OFFSET) AFLAGS_head.o := -DTEXT_OFFSET=$(TEXT_OFFSET) -ifdef CONFIG_DYNAMIC_FTRACE +ifdef CONFIG_FUNCTION_TRACER CFLAGS_REMOVE_ftrace.o = -pg endif @@ -29,10 +29,12 @@ obj-$(CONFIG_MODULES) += armksyms.o module.o obj-$(CONFIG_ARTHUR) += arthur.o obj-$(CONFIG_ISA_DMA) += dma-isa.o obj-$(CONFIG_PCI) += bios32.o isa.o +obj-$(CONFIG_HAVE_SCHED_CLOCK) += sched_clock.o obj-$(CONFIG_SMP) += smp.o smp_tlb.o obj-$(CONFIG_HAVE_ARM_SCU) += smp_scu.o obj-$(CONFIG_HAVE_ARM_TWD) += smp_twd.o obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o +obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o obj-$(CONFIG_KPROBES) += kprobes.o kprobes-decode.o obj-$(CONFIG_ATAGS_PROC) += atags.o diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index 8e2dacdbdcc..27f64489c1c 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -891,7 +891,7 @@ __kuser_cmpxchg: @ 0xffff0fc0 * A special ghost syscall is used for that (see traps.c). */ stmfd sp!, {r7, lr} - ldr r7, =1f @ it's 20 bits + ldr r7, 1f @ it's 20 bits swi __ARM_NR_cmpxchg ldmfd sp!, {r7, pc} 1: .word __ARM_NR_cmpxchg diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index 8bfa98757cd..1e7b04a40a3 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -29,6 +29,9 @@ ret_fast_syscall: ldr r1, [tsk, #TI_FLAGS] tst r1, #_TIF_WORK_MASK bne fast_work_pending +#if defined(CONFIG_IRQSOFF_TRACER) + asm_trace_hardirqs_on +#endif /* perform architecture specific actions before user return */ arch_ret_to_user r1, lr @@ -65,6 +68,9 @@ ret_slow_syscall: tst r1, #_TIF_WORK_MASK bne work_pending no_work_pending: +#if defined(CONFIG_IRQSOFF_TRACER) + asm_trace_hardirqs_on +#endif /* perform architecture specific actions before user return */ arch_ret_to_user r1, lr @@ -141,98 +147,170 @@ ENDPROC(ret_from_fork) #endif #endif -#ifdef CONFIG_DYNAMIC_FTRACE -ENTRY(__gnu_mcount_nc) - mov ip, lr - ldmia sp!, {lr} - mov pc, ip -ENDPROC(__gnu_mcount_nc) +.macro __mcount suffix + mcount_enter + ldr r0, =ftrace_trace_function + ldr r2, [r0] + adr r0, .Lftrace_stub + cmp r0, r2 + bne 1f + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + ldr r1, =ftrace_graph_return + ldr r2, [r1] + cmp r0, r2 + bne ftrace_graph_caller\suffix + + ldr r1, =ftrace_graph_entry + ldr r2, [r1] + ldr r0, =ftrace_graph_entry_stub + cmp r0, r2 + bne ftrace_graph_caller\suffix +#endif -ENTRY(ftrace_caller) - stmdb sp!, {r0-r3, lr} - mov r0, lr + mcount_exit + +1: mcount_get_lr r1 @ lr of instrumented func + mov r0, lr @ instrumented function sub r0, r0, #MCOUNT_INSN_SIZE - ldr r1, [sp, #20] + adr lr, BSYM(2f) + mov pc, r2 +2: mcount_exit +.endm + +.macro __ftrace_caller suffix + mcount_enter - .global ftrace_call -ftrace_call: + mcount_get_lr r1 @ lr of instrumented func + mov r0, lr @ instrumented function + sub r0, r0, #MCOUNT_INSN_SIZE + + .globl ftrace_call\suffix +ftrace_call\suffix: bl ftrace_stub - ldmia sp!, {r0-r3, ip, lr} - mov pc, ip -ENDPROC(ftrace_caller) + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + .globl ftrace_graph_call\suffix +ftrace_graph_call\suffix: + mov r0, r0 +#endif + + mcount_exit +.endm + +.macro __ftrace_graph_caller + sub r0, fp, #4 @ &lr of instrumented routine (&parent) +#ifdef CONFIG_DYNAMIC_FTRACE + @ called from __ftrace_caller, saved in mcount_enter + ldr r1, [sp, #16] @ instrumented routine (func) +#else + @ called from __mcount, untouched in lr + mov r1, lr @ instrumented routine (func) +#endif + sub r1, r1, #MCOUNT_INSN_SIZE + mov r2, fp @ frame pointer + bl prepare_ftrace_return + mcount_exit +.endm #ifdef CONFIG_OLD_MCOUNT +/* + * mcount + */ + +.macro mcount_enter + stmdb sp!, {r0-r3, lr} +.endm + +.macro mcount_get_lr reg + ldr \reg, [fp, #-4] +.endm + +.macro mcount_exit + ldr lr, [fp, #-4] + ldmia sp!, {r0-r3, pc} +.endm + ENTRY(mcount) +#ifdef CONFIG_DYNAMIC_FTRACE stmdb sp!, {lr} ldr lr, [fp, #-4] ldmia sp!, {pc} +#else + __mcount _old +#endif ENDPROC(mcount) +#ifdef CONFIG_DYNAMIC_FTRACE ENTRY(ftrace_caller_old) - stmdb sp!, {r0-r3, lr} - ldr r1, [fp, #-4] - mov r0, lr - sub r0, r0, #MCOUNT_INSN_SIZE - - .globl ftrace_call_old -ftrace_call_old: - bl ftrace_stub - ldr lr, [fp, #-4] @ restore lr - ldmia sp!, {r0-r3, pc} + __ftrace_caller _old ENDPROC(ftrace_caller_old) #endif -#else +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +ENTRY(ftrace_graph_caller_old) + __ftrace_graph_caller +ENDPROC(ftrace_graph_caller_old) +#endif -ENTRY(__gnu_mcount_nc) +.purgem mcount_enter +.purgem mcount_get_lr +.purgem mcount_exit +#endif + +/* + * __gnu_mcount_nc + */ + +.macro mcount_enter stmdb sp!, {r0-r3, lr} - ldr r0, =ftrace_trace_function - ldr r2, [r0] - adr r0, .Lftrace_stub - cmp r0, r2 - bne gnu_trace +.endm + +.macro mcount_get_lr reg + ldr \reg, [sp, #20] +.endm + +.macro mcount_exit ldmia sp!, {r0-r3, ip, lr} mov pc, ip +.endm -gnu_trace: - ldr r1, [sp, #20] @ lr of instrumented routine - mov r0, lr - sub r0, r0, #MCOUNT_INSN_SIZE - adr lr, BSYM(1f) - mov pc, r2 -1: - ldmia sp!, {r0-r3, ip, lr} +ENTRY(__gnu_mcount_nc) +#ifdef CONFIG_DYNAMIC_FTRACE + mov ip, lr + ldmia sp!, {lr} mov pc, ip +#else + __mcount +#endif ENDPROC(__gnu_mcount_nc) -#ifdef CONFIG_OLD_MCOUNT -/* - * This is under an ifdef in order to force link-time errors for people trying - * to build with !FRAME_POINTER with a GCC which doesn't use the new-style - * mcount. - */ -ENTRY(mcount) - stmdb sp!, {r0-r3, lr} - ldr r0, =ftrace_trace_function - ldr r2, [r0] - adr r0, ftrace_stub - cmp r0, r2 - bne trace - ldr lr, [fp, #-4] @ restore lr - ldmia sp!, {r0-r3, pc} +#ifdef CONFIG_DYNAMIC_FTRACE +ENTRY(ftrace_caller) + __ftrace_caller +ENDPROC(ftrace_caller) +#endif -trace: - ldr r1, [fp, #-4] @ lr of instrumented routine - mov r0, lr - sub r0, r0, #MCOUNT_INSN_SIZE - mov lr, pc - mov pc, r2 - ldr lr, [fp, #-4] @ restore lr - ldmia sp!, {r0-r3, pc} -ENDPROC(mcount) +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +ENTRY(ftrace_graph_caller) + __ftrace_graph_caller +ENDPROC(ftrace_graph_caller) #endif -#endif /* CONFIG_DYNAMIC_FTRACE */ +.purgem mcount_enter +.purgem mcount_get_lr +.purgem mcount_exit + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + .globl return_to_handler +return_to_handler: + stmdb sp!, {r0-r3} + mov r0, fp @ frame pointer + bl ftrace_return_to_handler + mov lr, r0 @ r0 has real ret addr + ldmia sp!, {r0-r3} + mov pc, lr +#endif ENTRY(ftrace_stub) .Lftrace_stub: diff --git a/arch/arm/kernel/ftrace.c b/arch/arm/kernel/ftrace.c index 971ac8c36ea..c0062ad1e84 100644 --- a/arch/arm/kernel/ftrace.c +++ b/arch/arm/kernel/ftrace.c @@ -24,6 +24,7 @@ #define NOP 0xe8bd4000 /* pop {lr} */ #endif +#ifdef CONFIG_DYNAMIC_FTRACE #ifdef CONFIG_OLD_MCOUNT #define OLD_MCOUNT_ADDR ((unsigned long) mcount) #define OLD_FTRACE_ADDR ((unsigned long) ftrace_caller_old) @@ -59,9 +60,9 @@ static unsigned long adjust_address(struct dyn_ftrace *rec, unsigned long addr) } #endif -/* construct a branch (BL) instruction to addr */ #ifdef CONFIG_THUMB2_KERNEL -static unsigned long ftrace_call_replace(unsigned long pc, unsigned long addr) +static unsigned long ftrace_gen_branch(unsigned long pc, unsigned long addr, + bool link) { unsigned long s, j1, j2, i1, i2, imm10, imm11; unsigned long first, second; @@ -83,15 +84,22 @@ static unsigned long ftrace_call_replace(unsigned long pc, unsigned long addr) j2 = (!i2) ^ s; first = 0xf000 | (s << 10) | imm10; - second = 0xd000 | (j1 << 13) | (j2 << 11) | imm11; + second = 0x9000 | (j1 << 13) | (j2 << 11) | imm11; + if (link) + second |= 1 << 14; return (second << 16) | first; } #else -static unsigned long ftrace_call_replace(unsigned long pc, unsigned long addr) +static unsigned long ftrace_gen_branch(unsigned long pc, unsigned long addr, + bool link) { + unsigned long opcode = 0xea000000; long offset; + if (link) + opcode |= 1 << 24; + offset = (long)addr - (long)(pc + 8); if (unlikely(offset < -33554432 || offset > 33554428)) { /* Can't generate branches that far (from ARM ARM). Ftrace @@ -103,10 +111,15 @@ static unsigned long ftrace_call_replace(unsigned long pc, unsigned long addr) offset = (offset >> 2) & 0x00ffffff; - return 0xeb000000 | offset; + return opcode | offset; } #endif +static unsigned long ftrace_call_replace(unsigned long pc, unsigned long addr) +{ + return ftrace_gen_branch(pc, addr, true); +} + static int ftrace_modify_code(unsigned long pc, unsigned long old, unsigned long new) { @@ -193,3 +206,83 @@ int __init ftrace_dyn_arch_init(void *data) return 0; } +#endif /* CONFIG_DYNAMIC_FTRACE */ + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, + unsigned long frame_pointer) +{ + unsigned long return_hooker = (unsigned long) &return_to_handler; + struct ftrace_graph_ent trace; + unsigned long old; + int err; + + if (unlikely(atomic_read(¤t->tracing_graph_pause))) + return; + + old = *parent; + *parent = return_hooker; + + err = ftrace_push_return_trace(old, self_addr, &trace.depth, + frame_pointer); + if (err == -EBUSY) { + *parent = old; + return; + } + + trace.func = self_addr; + + /* Only trace if the calling function expects to */ + if (!ftrace_graph_entry(&trace)) { + current->curr_ret_stack--; + *parent = old; + } +} + +#ifdef CONFIG_DYNAMIC_FTRACE +extern unsigned long ftrace_graph_call; +extern unsigned long ftrace_graph_call_old; +extern void ftrace_graph_caller_old(void); + +static int __ftrace_modify_caller(unsigned long *callsite, + void (*func) (void), bool enable) +{ + unsigned long caller_fn = (unsigned long) func; + unsigned long pc = (unsigned long) callsite; + unsigned long branch = ftrace_gen_branch(pc, caller_fn, false); + unsigned long nop = 0xe1a00000; /* mov r0, r0 */ + unsigned long old = enable ? nop : branch; + unsigned long new = enable ? branch : nop; + + return ftrace_modify_code(pc, old, new); +} + +static int ftrace_modify_graph_caller(bool enable) +{ + int ret; + + ret = __ftrace_modify_caller(&ftrace_graph_call, + ftrace_graph_caller, + enable); + +#ifdef CONFIG_OLD_MCOUNT + if (!ret) + ret = __ftrace_modify_caller(&ftrace_graph_call_old, + ftrace_graph_caller_old, + enable); +#endif + + return ret; +} + +int ftrace_enable_ftrace_graph_caller(void) +{ + return ftrace_modify_graph_caller(true); +} + +int ftrace_disable_ftrace_graph_caller(void) +{ + return ftrace_modify_graph_caller(false); +} +#endif /* CONFIG_DYNAMIC_FTRACE */ +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S index 359e54e83bd..f17d9a09e8f 100644 --- a/arch/arm/kernel/head.S +++ b/arch/arm/kernel/head.S @@ -85,9 +85,11 @@ ENTRY(stext) mrc p15, 0, r9, c0, c0 @ get processor id bl __lookup_processor_type @ r5=procinfo r9=cpuid movs r10, r5 @ invalid processor (r5=0)? + THUMB( it eq ) @ force fixup-able long branch encoding beq __error_p @ yes, error 'p' bl __lookup_machine_type @ r5=machinfo movs r8, r5 @ invalid machine (r5=0)? + THUMB( it eq ) @ force fixup-able long branch encoding beq __error_a @ yes, error 'a' /* @@ -267,6 +269,7 @@ __create_page_tables: mov pc, lr ENDPROC(__create_page_tables) .ltorg + .align __enable_mmu_loc: .long . .long __enable_mmu @@ -287,6 +290,7 @@ ENTRY(secondary_startup) bl __lookup_processor_type movs r10, r5 @ invalid processor? moveq r0, #'p' @ yes, error 'p' + THUMB( it eq ) @ force fixup-able long branch encoding beq __error_p /* @@ -313,6 +317,8 @@ ENTRY(__secondary_switched) b secondary_start_kernel ENDPROC(__secondary_switched) + .align + .type __secondary_data, %object __secondary_data: .long . @@ -425,6 +431,7 @@ __fixup_smp_on_up: b 2b ENDPROC(__fixup_smp) + .align 1: .word . .word __smpalt_begin .word __smpalt_end diff --git a/arch/arm/kernel/irq.c b/arch/arm/kernel/irq.c index c1269944cc5..8135438b881 100644 --- a/arch/arm/kernel/irq.c +++ b/arch/arm/kernel/irq.c @@ -35,6 +35,7 @@ #include <linux/list.h> #include <linux/kallsyms.h> #include <linux/proc_fs.h> +#include <linux/ftrace.h> #include <asm/system.h> #include <asm/mach/arch.h> @@ -115,7 +116,8 @@ unlock: * come via this function. Instead, they should provide their * own 'handler' */ -asmlinkage void __exception asm_do_IRQ(unsigned int irq, struct pt_regs *regs) +asmlinkage void __exception_irq_entry +asm_do_IRQ(unsigned int irq, struct pt_regs *regs) { struct pt_regs *old_regs = set_irq_regs(regs); diff --git a/arch/arm/kernel/machine_kexec.c b/arch/arm/kernel/machine_kexec.c index 3a8fd5140d7..30ead135ff5 100644 --- a/arch/arm/kernel/machine_kexec.c +++ b/arch/arm/kernel/machine_kexec.c @@ -23,6 +23,8 @@ extern unsigned long kexec_indirection_page; extern unsigned long kexec_mach_type; extern unsigned long kexec_boot_atags; +static atomic_t waiting_for_crash_ipi; + /* * Provide a dummy crash_notes definition while crash dump arrives to arm. * This prevents breakage of crash_notes attribute in kernel/ksysfs.c. @@ -37,9 +39,37 @@ void machine_kexec_cleanup(struct kimage *image) { } +void machine_crash_nonpanic_core(void *unused) +{ + struct pt_regs regs; + + crash_setup_regs(®s, NULL); + printk(KERN_DEBUG "CPU %u will stop doing anything useful since another CPU has crashed\n", + smp_processor_id()); + crash_save_cpu(®s, smp_processor_id()); + flush_cache_all(); + + atomic_dec(&waiting_for_crash_ipi); + while (1) + cpu_relax(); +} + void machine_crash_shutdown(struct pt_regs *regs) { + unsigned long msecs; + local_irq_disable(); + + atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1); + smp_call_function(machine_crash_nonpanic_core, NULL, false); + msecs = 1000; /* Wait at most a second for the other cpus to stop */ + while ((atomic_read(&waiting_for_crash_ipi) > 0) && msecs) { + mdelay(1); + msecs--; + } + if (atomic_read(&waiting_for_crash_ipi) > 0) + printk(KERN_WARNING "Non-crashing CPUs did not react to IPI\n"); + crash_save_cpu(regs, smp_processor_id()); printk(KERN_INFO "Loading crashdump kernel...\n"); diff --git a/arch/arm/kernel/module.c b/arch/arm/kernel/module.c index d9bd786ce23..0c1bb68ff4a 100644 --- a/arch/arm/kernel/module.c +++ b/arch/arm/kernel/module.c @@ -67,35 +67,6 @@ int module_frob_arch_sections(Elf_Ehdr *hdr, char *secstrings, struct module *mod) { -#ifdef CONFIG_ARM_UNWIND - Elf_Shdr *s, *sechdrs_end = sechdrs + hdr->e_shnum; - struct arm_unwind_mapping *maps = mod->arch.map; - - for (s = sechdrs; s < sechdrs_end; s++) { - char const *secname = secstrings + s->sh_name; - - if (strcmp(".ARM.exidx.init.text", secname) == 0) - maps[ARM_SEC_INIT].unw_sec = s; - else if (strcmp(".ARM.exidx.devinit.text", secname) == 0) - maps[ARM_SEC_DEVINIT].unw_sec = s; - else if (strcmp(".ARM.exidx", secname) == 0) - maps[ARM_SEC_CORE].unw_sec = s; - else if (strcmp(".ARM.exidx.exit.text", secname) == 0) - maps[ARM_SEC_EXIT].unw_sec = s; - else if (strcmp(".ARM.exidx.devexit.text", secname) == 0) - maps[ARM_SEC_DEVEXIT].unw_sec = s; - else if (strcmp(".init.text", secname) == 0) - maps[ARM_SEC_INIT].sec_text = s; - else if (strcmp(".devinit.text", secname) == 0) - maps[ARM_SEC_DEVINIT].sec_text = s; - else if (strcmp(".text", secname) == 0) - maps[ARM_SEC_CORE].sec_text = s; - else if (strcmp(".exit.text", secname) == 0) - maps[ARM_SEC_EXIT].sec_text = s; - else if (strcmp(".devexit.text", secname) == 0) - maps[ARM_SEC_DEVEXIT].sec_text = s; - } -#endif return 0; } @@ -300,41 +271,69 @@ apply_relocate_add(Elf32_Shdr *sechdrs, const char *strtab, return -ENOEXEC; } -#ifdef CONFIG_ARM_UNWIND -static void register_unwind_tables(struct module *mod) +struct mod_unwind_map { + const Elf_Shdr *unw_sec; + const Elf_Shdr *txt_sec; +}; + +int module_finalize(const Elf32_Ehdr *hdr, const Elf_Shdr *sechdrs, + struct module *mod) { +#ifdef CONFIG_ARM_UNWIND + const char *secstrs = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; + const Elf_Shdr *s, *sechdrs_end = sechdrs + hdr->e_shnum; + struct mod_unwind_map maps[ARM_SEC_MAX]; int i; - for (i = 0; i < ARM_SEC_MAX; ++i) { - struct arm_unwind_mapping *map = &mod->arch.map[i]; - if (map->unw_sec && map->sec_text) - map->unwind = unwind_table_add(map->unw_sec->sh_addr, - map->unw_sec->sh_size, - map->sec_text->sh_addr, - map->sec_text->sh_size); + + memset(maps, 0, sizeof(maps)); + + for (s = sechdrs; s < sechdrs_end; s++) { + const char *secname = secstrs + s->sh_name; + + if (!(s->sh_flags & SHF_ALLOC)) + continue; + + if (strcmp(".ARM.exidx.init.text", secname) == 0) + maps[ARM_SEC_INIT].unw_sec = s; + else if (strcmp(".ARM.exidx.devinit.text", secname) == 0) + maps[ARM_SEC_DEVINIT].unw_sec = s; + else if (strcmp(".ARM.exidx", secname) == 0) + maps[ARM_SEC_CORE].unw_sec = s; + else if (strcmp(".ARM.exidx.exit.text", secname) == 0) + maps[ARM_SEC_EXIT].unw_sec = s; + else if (strcmp(".ARM.exidx.devexit.text", secname) == 0) + maps[ARM_SEC_DEVEXIT].unw_sec = s; + else if (strcmp(".init.text", secname) == 0) + maps[ARM_SEC_INIT].txt_sec = s; + else if (strcmp(".devinit.text", secname) == 0) + maps[ARM_SEC_DEVINIT].txt_sec = s; + else if (strcmp(".text", secname) == 0) + maps[ARM_SEC_CORE].txt_sec = s; + else if (strcmp(".exit.text", secname) == 0) + maps[ARM_SEC_EXIT].txt_sec = s; + else if (strcmp(".devexit.text", secname) == 0) + maps[ARM_SEC_DEVEXIT].txt_sec = s; } -} -static void unregister_unwind_tables(struct module *mod) -{ - int i = ARM_SEC_MAX; - while (--i >= 0) - unwind_table_del(mod->arch.map[i].unwind); -} -#else -static inline void register_unwind_tables(struct module *mod) { } -static inline void unregister_unwind_tables(struct module *mod) { } + for (i = 0; i < ARM_SEC_MAX; i++) + if (maps[i].unw_sec && maps[i].txt_sec) + mod->arch.unwind[i] = + unwind_table_add(maps[i].unw_sec->sh_addr, + maps[i].unw_sec->sh_size, + maps[i].txt_sec->sh_addr, + maps[i].txt_sec->sh_size); #endif - -int -module_finalize(const Elf32_Ehdr *hdr, const Elf_Shdr *sechdrs, - struct module *module) -{ - register_unwind_tables(module); return 0; } void module_arch_cleanup(struct module *mod) { - unregister_unwind_tables(mod); +#ifdef CONFIG_ARM_UNWIND + int i; + + for (i = 0; i < ARM_SEC_MAX; i++) + if (mod->arch.unwind[i]) + unwind_table_del(mod->arch.unwind[i]); +#endif } diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index 07a50357492..421a4bb88fe 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -4,9 +4,7 @@ * ARM performance counter support. * * Copyright (C) 2009 picoChip Designs, Ltd., Jamie Iles - * - * ARMv7 support: Jean Pihet <jpihet@mvista.com> - * 2010 (c) MontaVista Software, LLC. + * Copyright (C) 2010 ARM Ltd., Will Deacon <will.deacon@arm.com> * * This code is based on the sparc64 perf event code, which is in turn based * on the x86 code. Callchain code is based on the ARM OProfile backtrace @@ -69,29 +67,23 @@ struct cpu_hw_events { }; DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events); -/* PMU names. */ -static const char *arm_pmu_names[] = { - [ARM_PERF_PMU_ID_XSCALE1] = "xscale1", - [ARM_PERF_PMU_ID_XSCALE2] = "xscale2", - [ARM_PERF_PMU_ID_V6] = "v6", - [ARM_PERF_PMU_ID_V6MP] = "v6mpcore", - [ARM_PERF_PMU_ID_CA8] = "ARMv7 Cortex-A8", - [ARM_PERF_PMU_ID_CA9] = "ARMv7 Cortex-A9", -}; - struct arm_pmu { enum arm_perf_pmu_ids id; + const char *name; irqreturn_t (*handle_irq)(int irq_num, void *dev); void (*enable)(struct hw_perf_event *evt, int idx); void (*disable)(struct hw_perf_event *evt, int idx); - int (*event_map)(int evt); - u64 (*raw_event)(u64); int (*get_event_idx)(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc); u32 (*read_counter)(int idx); void (*write_counter)(int idx, u32 val); void (*start)(void); void (*stop)(void); + const unsigned (*cache_map)[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX]; + const unsigned (*event_map)[PERF_COUNT_HW_MAX]; + u32 raw_event_mask; int num_events; u64 max_period; }; @@ -136,10 +128,6 @@ EXPORT_SYMBOL_GPL(perf_num_counters); #define CACHE_OP_UNSUPPORTED 0xFFFF -static unsigned armpmu_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX]; - static int armpmu_map_cache_event(u64 config) { @@ -157,7 +145,7 @@ armpmu_map_cache_event(u64 config) if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX) return -EINVAL; - ret = (int)armpmu_perf_cache_map[cache_type][cache_op][cache_result]; + ret = (int)(*armpmu->cache_map)[cache_type][cache_op][cache_result]; if (ret == CACHE_OP_UNSUPPORTED) return -ENOENT; @@ -166,6 +154,19 @@ armpmu_map_cache_event(u64 config) } static int +armpmu_map_event(u64 config) +{ + int mapping = (*armpmu->event_map)[config]; + return mapping == HW_OP_UNSUPPORTED ? -EOPNOTSUPP : mapping; +} + +static int +armpmu_map_raw_event(u64 config) +{ + return (int)(config & armpmu->raw_event_mask); +} + +static int armpmu_event_set_period(struct perf_event *event, struct hw_perf_event *hwc, int idx) @@ -458,11 +459,11 @@ __hw_perf_event_init(struct perf_event *event) /* Decode the generic type into an ARM event identifier. */ if (PERF_TYPE_HARDWARE == event->attr.type) { - mapping = armpmu->event_map(event->attr.config); + mapping = armpmu_map_event(event->attr.config); } else if (PERF_TYPE_HW_CACHE == event->attr.type) { mapping = armpmu_map_cache_event(event->attr.config); } else if (PERF_TYPE_RAW == event->attr.type) { - mapping = armpmu->raw_event(event->attr.config); + mapping = armpmu_map_raw_event(event->attr.config); } else { pr_debug("event type %x not supported\n", event->attr.type); return -EOPNOTSUPP; @@ -603,2366 +604,10 @@ static struct pmu pmu = { .read = armpmu_read, }; -/* - * ARMv6 Performance counter handling code. - * - * ARMv6 has 2 configurable performance counters and a single cycle counter. - * They all share a single reset bit but can be written to zero so we can use - * that for a reset. - * - * The counters can't be individually enabled or disabled so when we remove - * one event and replace it with another we could get spurious counts from the - * wrong event. However, we can take advantage of the fact that the - * performance counters can export events to the event bus, and the event bus - * itself can be monitored. This requires that we *don't* export the events to - * the event bus. The procedure for disabling a configurable counter is: - * - change the counter to count the ETMEXTOUT[0] signal (0x20). This - * effectively stops the counter from counting. - * - disable the counter's interrupt generation (each counter has it's - * own interrupt enable bit). - * Once stopped, the counter value can be written as 0 to reset. - * - * To enable a counter: - * - enable the counter's interrupt generation. - * - set the new event type. - * - * Note: the dedicated cycle counter only counts cycles and can't be - * enabled/disabled independently of the others. When we want to disable the - * cycle counter, we have to just disable the interrupt reporting and start - * ignoring that counter. When re-enabling, we have to reset the value and - * enable the interrupt. - */ - -enum armv6_perf_types { - ARMV6_PERFCTR_ICACHE_MISS = 0x0, - ARMV6_PERFCTR_IBUF_STALL = 0x1, - ARMV6_PERFCTR_DDEP_STALL = 0x2, - ARMV6_PERFCTR_ITLB_MISS = 0x3, - ARMV6_PERFCTR_DTLB_MISS = 0x4, - ARMV6_PERFCTR_BR_EXEC = 0x5, - ARMV6_PERFCTR_BR_MISPREDICT = 0x6, - ARMV6_PERFCTR_INSTR_EXEC = 0x7, - ARMV6_PERFCTR_DCACHE_HIT = 0x9, - ARMV6_PERFCTR_DCACHE_ACCESS = 0xA, - ARMV6_PERFCTR_DCACHE_MISS = 0xB, - ARMV6_PERFCTR_DCACHE_WBACK = 0xC, - ARMV6_PERFCTR_SW_PC_CHANGE = 0xD, - ARMV6_PERFCTR_MAIN_TLB_MISS = 0xF, - ARMV6_PERFCTR_EXPL_D_ACCESS = 0x10, - ARMV6_PERFCTR_LSU_FULL_STALL = 0x11, - ARMV6_PERFCTR_WBUF_DRAINED = 0x12, - ARMV6_PERFCTR_CPU_CYCLES = 0xFF, - ARMV6_PERFCTR_NOP = 0x20, -}; - -enum armv6_counters { - ARMV6_CYCLE_COUNTER = 1, - ARMV6_COUNTER0, - ARMV6_COUNTER1, -}; - -/* - * The hardware events that we support. We do support cache operations but - * we have harvard caches and no way to combine instruction and data - * accesses/misses in hardware. - */ -static const unsigned armv6_perf_map[PERF_COUNT_HW_MAX] = { - [PERF_COUNT_HW_CPU_CYCLES] = ARMV6_PERFCTR_CPU_CYCLES, - [PERF_COUNT_HW_INSTRUCTIONS] = ARMV6_PERFCTR_INSTR_EXEC, - [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED, - [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV6_PERFCTR_BR_EXEC, - [PERF_COUNT_HW_BRANCH_MISSES] = ARMV6_PERFCTR_BR_MISPREDICT, - [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED, -}; - -static const unsigned armv6_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX] = { - [C(L1D)] = { - /* - * The performance counters don't differentiate between read - * and write accesses/misses so this isn't strictly correct, - * but it's the best we can do. Writes and reads get - * combined. - */ - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = ARMV6_PERFCTR_DCACHE_ACCESS, - [C(RESULT_MISS)] = ARMV6_PERFCTR_DCACHE_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = ARMV6_PERFCTR_DCACHE_ACCESS, - [C(RESULT_MISS)] = ARMV6_PERFCTR_DCACHE_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(L1I)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6_PERFCTR_ICACHE_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6_PERFCTR_ICACHE_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(LL)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(DTLB)] = { - /* - * The ARM performance counters can count micro DTLB misses, - * micro ITLB misses and main TLB misses. There isn't an event - * for TLB misses, so use the micro misses here and if users - * want the main TLB misses they can use a raw counter. - */ - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6_PERFCTR_DTLB_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6_PERFCTR_DTLB_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(ITLB)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6_PERFCTR_ITLB_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6_PERFCTR_ITLB_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(BPU)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, -}; - -enum armv6mpcore_perf_types { - ARMV6MPCORE_PERFCTR_ICACHE_MISS = 0x0, - ARMV6MPCORE_PERFCTR_IBUF_STALL = 0x1, - ARMV6MPCORE_PERFCTR_DDEP_STALL = 0x2, - ARMV6MPCORE_PERFCTR_ITLB_MISS = 0x3, - ARMV6MPCORE_PERFCTR_DTLB_MISS = 0x4, - ARMV6MPCORE_PERFCTR_BR_EXEC = 0x5, - ARMV6MPCORE_PERFCTR_BR_NOTPREDICT = 0x6, - ARMV6MPCORE_PERFCTR_BR_MISPREDICT = 0x7, - ARMV6MPCORE_PERFCTR_INSTR_EXEC = 0x8, - ARMV6MPCORE_PERFCTR_DCACHE_RDACCESS = 0xA, - ARMV6MPCORE_PERFCTR_DCACHE_RDMISS = 0xB, - ARMV6MPCORE_PERFCTR_DCACHE_WRACCESS = 0xC, - ARMV6MPCORE_PERFCTR_DCACHE_WRMISS = 0xD, - ARMV6MPCORE_PERFCTR_DCACHE_EVICTION = 0xE, - ARMV6MPCORE_PERFCTR_SW_PC_CHANGE = 0xF, - ARMV6MPCORE_PERFCTR_MAIN_TLB_MISS = 0x10, - ARMV6MPCORE_PERFCTR_EXPL_MEM_ACCESS = 0x11, - ARMV6MPCORE_PERFCTR_LSU_FULL_STALL = 0x12, - ARMV6MPCORE_PERFCTR_WBUF_DRAINED = 0x13, - ARMV6MPCORE_PERFCTR_CPU_CYCLES = 0xFF, -}; - -/* - * The hardware events that we support. We do support cache operations but - * we have harvard caches and no way to combine instruction and data - * accesses/misses in hardware. - */ -static const unsigned armv6mpcore_perf_map[PERF_COUNT_HW_MAX] = { - [PERF_COUNT_HW_CPU_CYCLES] = ARMV6MPCORE_PERFCTR_CPU_CYCLES, - [PERF_COUNT_HW_INSTRUCTIONS] = ARMV6MPCORE_PERFCTR_INSTR_EXEC, - [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED, - [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV6MPCORE_PERFCTR_BR_EXEC, - [PERF_COUNT_HW_BRANCH_MISSES] = ARMV6MPCORE_PERFCTR_BR_MISPREDICT, - [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED, -}; - -static const unsigned armv6mpcore_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX] = { - [C(L1D)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = - ARMV6MPCORE_PERFCTR_DCACHE_RDACCESS, - [C(RESULT_MISS)] = - ARMV6MPCORE_PERFCTR_DCACHE_RDMISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = - ARMV6MPCORE_PERFCTR_DCACHE_WRACCESS, - [C(RESULT_MISS)] = - ARMV6MPCORE_PERFCTR_DCACHE_WRMISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(L1I)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ICACHE_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ICACHE_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(LL)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(DTLB)] = { - /* - * The ARM performance counters can count micro DTLB misses, - * micro ITLB misses and main TLB misses. There isn't an event - * for TLB misses, so use the micro misses here and if users - * want the main TLB misses they can use a raw counter. - */ - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_DTLB_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_DTLB_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(ITLB)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ITLB_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ITLB_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(BPU)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, -}; - -static inline unsigned long -armv6_pmcr_read(void) -{ - u32 val; - asm volatile("mrc p15, 0, %0, c15, c12, 0" : "=r"(val)); - return val; -} - -static inline void -armv6_pmcr_write(unsigned long val) -{ - asm volatile("mcr p15, 0, %0, c15, c12, 0" : : "r"(val)); -} - -#define ARMV6_PMCR_ENABLE (1 << 0) -#define ARMV6_PMCR_CTR01_RESET (1 << 1) -#define ARMV6_PMCR_CCOUNT_RESET (1 << 2) -#define ARMV6_PMCR_CCOUNT_DIV (1 << 3) -#define ARMV6_PMCR_COUNT0_IEN (1 << 4) -#define ARMV6_PMCR_COUNT1_IEN (1 << 5) -#define ARMV6_PMCR_CCOUNT_IEN (1 << 6) -#define ARMV6_PMCR_COUNT0_OVERFLOW (1 << 8) -#define ARMV6_PMCR_COUNT1_OVERFLOW (1 << 9) -#define ARMV6_PMCR_CCOUNT_OVERFLOW (1 << 10) -#define ARMV6_PMCR_EVT_COUNT0_SHIFT 20 -#define ARMV6_PMCR_EVT_COUNT0_MASK (0xFF << ARMV6_PMCR_EVT_COUNT0_SHIFT) -#define ARMV6_PMCR_EVT_COUNT1_SHIFT 12 -#define ARMV6_PMCR_EVT_COUNT1_MASK (0xFF << ARMV6_PMCR_EVT_COUNT1_SHIFT) - -#define ARMV6_PMCR_OVERFLOWED_MASK \ - (ARMV6_PMCR_COUNT0_OVERFLOW | ARMV6_PMCR_COUNT1_OVERFLOW | \ - ARMV6_PMCR_CCOUNT_OVERFLOW) - -static inline int -armv6_pmcr_has_overflowed(unsigned long pmcr) -{ - return (pmcr & ARMV6_PMCR_OVERFLOWED_MASK); -} - -static inline int -armv6_pmcr_counter_has_overflowed(unsigned long pmcr, - enum armv6_counters counter) -{ - int ret = 0; - - if (ARMV6_CYCLE_COUNTER == counter) - ret = pmcr & ARMV6_PMCR_CCOUNT_OVERFLOW; - else if (ARMV6_COUNTER0 == counter) - ret = pmcr & ARMV6_PMCR_COUNT0_OVERFLOW; - else if (ARMV6_COUNTER1 == counter) - ret = pmcr & ARMV6_PMCR_COUNT1_OVERFLOW; - else - WARN_ONCE(1, "invalid counter number (%d)\n", counter); - - return ret; -} - -static inline u32 -armv6pmu_read_counter(int counter) -{ - unsigned long value = 0; - - if (ARMV6_CYCLE_COUNTER == counter) - asm volatile("mrc p15, 0, %0, c15, c12, 1" : "=r"(value)); - else if (ARMV6_COUNTER0 == counter) - asm volatile("mrc p15, 0, %0, c15, c12, 2" : "=r"(value)); - else if (ARMV6_COUNTER1 == counter) - asm volatile("mrc p15, 0, %0, c15, c12, 3" : "=r"(value)); - else - WARN_ONCE(1, "invalid counter number (%d)\n", counter); - - return value; -} - -static inline void -armv6pmu_write_counter(int counter, - u32 value) -{ - if (ARMV6_CYCLE_COUNTER == counter) - asm volatile("mcr p15, 0, %0, c15, c12, 1" : : "r"(value)); - else if (ARMV6_COUNTER0 == counter) - asm volatile("mcr p15, 0, %0, c15, c12, 2" : : "r"(value)); - else if (ARMV6_COUNTER1 == counter) - asm volatile("mcr p15, 0, %0, c15, c12, 3" : : "r"(value)); - else - WARN_ONCE(1, "invalid counter number (%d)\n", counter); -} - -void -armv6pmu_enable_event(struct hw_perf_event *hwc, - int idx) -{ - unsigned long val, mask, evt, flags; - - if (ARMV6_CYCLE_COUNTER == idx) { - mask = 0; - evt = ARMV6_PMCR_CCOUNT_IEN; - } else if (ARMV6_COUNTER0 == idx) { - mask = ARMV6_PMCR_EVT_COUNT0_MASK; - evt = (hwc->config_base << ARMV6_PMCR_EVT_COUNT0_SHIFT) | - ARMV6_PMCR_COUNT0_IEN; - } else if (ARMV6_COUNTER1 == idx) { - mask = ARMV6_PMCR_EVT_COUNT1_MASK; - evt = (hwc->config_base << ARMV6_PMCR_EVT_COUNT1_SHIFT) | - ARMV6_PMCR_COUNT1_IEN; - } else { - WARN_ONCE(1, "invalid counter number (%d)\n", idx); - return; - } - - /* - * Mask out the current event and set the counter to count the event - * that we're interested in. - */ - spin_lock_irqsave(&pmu_lock, flags); - val = armv6_pmcr_read(); - val &= ~mask; - val |= evt; - armv6_pmcr_write(val); - spin_unlock_irqrestore(&pmu_lock, flags); -} - -static irqreturn_t -armv6pmu_handle_irq(int irq_num, - void *dev) -{ - unsigned long pmcr = armv6_pmcr_read(); - struct perf_sample_data data; - struct cpu_hw_events *cpuc; - struct pt_regs *regs; - int idx; - - if (!armv6_pmcr_has_overflowed(pmcr)) - return IRQ_NONE; - - regs = get_irq_regs(); - - /* - * The interrupts are cleared by writing the overflow flags back to - * the control register. All of the other bits don't have any effect - * if they are rewritten, so write the whole value back. - */ - armv6_pmcr_write(pmcr); - - perf_sample_data_init(&data, 0); - - cpuc = &__get_cpu_var(cpu_hw_events); - for (idx = 0; idx <= armpmu->num_events; ++idx) { - struct perf_event *event = cpuc->events[idx]; - struct hw_perf_event *hwc; - - if (!test_bit(idx, cpuc->active_mask)) - continue; - - /* - * We have a single interrupt for all counters. Check that - * each counter has overflowed before we process it. - */ - if (!armv6_pmcr_counter_has_overflowed(pmcr, idx)) - continue; - - hwc = &event->hw; - armpmu_event_update(event, hwc, idx); - data.period = event->hw.last_period; - if (!armpmu_event_set_period(event, hwc, idx)) - continue; - - if (perf_event_overflow(event, 0, &data, regs)) - armpmu->disable(hwc, idx); - } - - /* - * Handle the pending perf events. - * - * Note: this call *must* be run with interrupts disabled. For - * platforms that can have the PMU interrupts raised as an NMI, this - * will not work. - */ - irq_work_run(); - - return IRQ_HANDLED; -} - -static void -armv6pmu_start(void) -{ - unsigned long flags, val; - - spin_lock_irqsave(&pmu_lock, flags); - val = armv6_pmcr_read(); - val |= ARMV6_PMCR_ENABLE; - armv6_pmcr_write(val); - spin_unlock_irqrestore(&pmu_lock, flags); -} - -void -armv6pmu_stop(void) -{ - unsigned long flags, val; - - spin_lock_irqsave(&pmu_lock, flags); - val = armv6_pmcr_read(); - val &= ~ARMV6_PMCR_ENABLE; - armv6_pmcr_write(val); - spin_unlock_irqrestore(&pmu_lock, flags); -} - -static inline int -armv6pmu_event_map(int config) -{ - int mapping = armv6_perf_map[config]; - if (HW_OP_UNSUPPORTED == mapping) - mapping = -EOPNOTSUPP; - return mapping; -} - -static inline int -armv6mpcore_pmu_event_map(int config) -{ - int mapping = armv6mpcore_perf_map[config]; - if (HW_OP_UNSUPPORTED == mapping) - mapping = -EOPNOTSUPP; - return mapping; -} - -static u64 -armv6pmu_raw_event(u64 config) -{ - return config & 0xff; -} - -static int -armv6pmu_get_event_idx(struct cpu_hw_events *cpuc, - struct hw_perf_event *event) -{ - /* Always place a cycle counter into the cycle counter. */ - if (ARMV6_PERFCTR_CPU_CYCLES == event->config_base) { - if (test_and_set_bit(ARMV6_CYCLE_COUNTER, cpuc->used_mask)) - return -EAGAIN; - - return ARMV6_CYCLE_COUNTER; - } else { - /* - * For anything other than a cycle counter, try and use - * counter0 and counter1. - */ - if (!test_and_set_bit(ARMV6_COUNTER1, cpuc->used_mask)) { - return ARMV6_COUNTER1; - } - - if (!test_and_set_bit(ARMV6_COUNTER0, cpuc->used_mask)) { - return ARMV6_COUNTER0; - } - - /* The counters are all in use. */ - return -EAGAIN; - } -} - -static void -armv6pmu_disable_event(struct hw_perf_event *hwc, - int idx) -{ - unsigned long val, mask, evt, flags; - - if (ARMV6_CYCLE_COUNTER == idx) { - mask = ARMV6_PMCR_CCOUNT_IEN; - evt = 0; - } else if (ARMV6_COUNTER0 == idx) { - mask = ARMV6_PMCR_COUNT0_IEN | ARMV6_PMCR_EVT_COUNT0_MASK; - evt = ARMV6_PERFCTR_NOP << ARMV6_PMCR_EVT_COUNT0_SHIFT; - } else if (ARMV6_COUNTER1 == idx) { - mask = ARMV6_PMCR_COUNT1_IEN | ARMV6_PMCR_EVT_COUNT1_MASK; - evt = ARMV6_PERFCTR_NOP << ARMV6_PMCR_EVT_COUNT1_SHIFT; - } else { - WARN_ONCE(1, "invalid counter number (%d)\n", idx); - return; - } - - /* - * Mask out the current event and set the counter to count the number - * of ETM bus signal assertion cycles. The external reporting should - * be disabled and so this should never increment. - */ - spin_lock_irqsave(&pmu_lock, flags); - val = armv6_pmcr_read(); - val &= ~mask; - val |= evt; - armv6_pmcr_write(val); - spin_unlock_irqrestore(&pmu_lock, flags); -} - -static void -armv6mpcore_pmu_disable_event(struct hw_perf_event *hwc, - int idx) -{ - unsigned long val, mask, flags, evt = 0; - - if (ARMV6_CYCLE_COUNTER == idx) { - mask = ARMV6_PMCR_CCOUNT_IEN; - } else if (ARMV6_COUNTER0 == idx) { - mask = ARMV6_PMCR_COUNT0_IEN; - } else if (ARMV6_COUNTER1 == idx) { - mask = ARMV6_PMCR_COUNT1_IEN; - } else { - WARN_ONCE(1, "invalid counter number (%d)\n", idx); - return; - } - - /* - * Unlike UP ARMv6, we don't have a way of stopping the counters. We - * simply disable the interrupt reporting. - */ - spin_lock_irqsave(&pmu_lock, flags); - val = armv6_pmcr_read(); - val &= ~mask; - val |= evt; - armv6_pmcr_write(val); - spin_unlock_irqrestore(&pmu_lock, flags); -} - -static const struct arm_pmu armv6pmu = { - .id = ARM_PERF_PMU_ID_V6, - .handle_irq = armv6pmu_handle_irq, - .enable = armv6pmu_enable_event, - .disable = armv6pmu_disable_event, - .event_map = armv6pmu_event_map, - .raw_event = armv6pmu_raw_event, - .read_counter = armv6pmu_read_counter, - .write_counter = armv6pmu_write_counter, - .get_event_idx = armv6pmu_get_event_idx, - .start = armv6pmu_start, - .stop = armv6pmu_stop, - .num_events = 3, - .max_period = (1LLU << 32) - 1, -}; - -/* - * ARMv6mpcore is almost identical to single core ARMv6 with the exception - * that some of the events have different enumerations and that there is no - * *hack* to stop the programmable counters. To stop the counters we simply - * disable the interrupt reporting and update the event. When unthrottling we - * reset the period and enable the interrupt reporting. - */ -static const struct arm_pmu armv6mpcore_pmu = { - .id = ARM_PERF_PMU_ID_V6MP, - .handle_irq = armv6pmu_handle_irq, - .enable = armv6pmu_enable_event, - .disable = armv6mpcore_pmu_disable_event, - .event_map = armv6mpcore_pmu_event_map, - .raw_event = armv6pmu_raw_event, - .read_counter = armv6pmu_read_counter, - .write_counter = armv6pmu_write_counter, - .get_event_idx = armv6pmu_get_event_idx, - .start = armv6pmu_start, - .stop = armv6pmu_stop, - .num_events = 3, - .max_period = (1LLU << 32) - 1, -}; - -/* - * ARMv7 Cortex-A8 and Cortex-A9 Performance Events handling code. - * - * Copied from ARMv6 code, with the low level code inspired - * by the ARMv7 Oprofile code. - * - * Cortex-A8 has up to 4 configurable performance counters and - * a single cycle counter. - * Cortex-A9 has up to 31 configurable performance counters and - * a single cycle counter. - * - * All counters can be enabled/disabled and IRQ masked separately. The cycle - * counter and all 4 performance counters together can be reset separately. - */ - -/* Common ARMv7 event types */ -enum armv7_perf_types { - ARMV7_PERFCTR_PMNC_SW_INCR = 0x00, - ARMV7_PERFCTR_IFETCH_MISS = 0x01, - ARMV7_PERFCTR_ITLB_MISS = 0x02, - ARMV7_PERFCTR_DCACHE_REFILL = 0x03, - ARMV7_PERFCTR_DCACHE_ACCESS = 0x04, - ARMV7_PERFCTR_DTLB_REFILL = 0x05, - ARMV7_PERFCTR_DREAD = 0x06, - ARMV7_PERFCTR_DWRITE = 0x07, - - ARMV7_PERFCTR_EXC_TAKEN = 0x09, - ARMV7_PERFCTR_EXC_EXECUTED = 0x0A, - ARMV7_PERFCTR_CID_WRITE = 0x0B, - /* ARMV7_PERFCTR_PC_WRITE is equivalent to HW_BRANCH_INSTRUCTIONS. - * It counts: - * - all branch instructions, - * - instructions that explicitly write the PC, - * - exception generating instructions. - */ - ARMV7_PERFCTR_PC_WRITE = 0x0C, - ARMV7_PERFCTR_PC_IMM_BRANCH = 0x0D, - ARMV7_PERFCTR_UNALIGNED_ACCESS = 0x0F, - ARMV7_PERFCTR_PC_BRANCH_MIS_PRED = 0x10, - ARMV7_PERFCTR_CLOCK_CYCLES = 0x11, - - ARMV7_PERFCTR_PC_BRANCH_MIS_USED = 0x12, - - ARMV7_PERFCTR_CPU_CYCLES = 0xFF -}; - -/* ARMv7 Cortex-A8 specific event types */ -enum armv7_a8_perf_types { - ARMV7_PERFCTR_INSTR_EXECUTED = 0x08, - - ARMV7_PERFCTR_PC_PROC_RETURN = 0x0E, - - ARMV7_PERFCTR_WRITE_BUFFER_FULL = 0x40, - ARMV7_PERFCTR_L2_STORE_MERGED = 0x41, - ARMV7_PERFCTR_L2_STORE_BUFF = 0x42, - ARMV7_PERFCTR_L2_ACCESS = 0x43, - ARMV7_PERFCTR_L2_CACH_MISS = 0x44, - ARMV7_PERFCTR_AXI_READ_CYCLES = 0x45, - ARMV7_PERFCTR_AXI_WRITE_CYCLES = 0x46, - ARMV7_PERFCTR_MEMORY_REPLAY = 0x47, - ARMV7_PERFCTR_UNALIGNED_ACCESS_REPLAY = 0x48, - ARMV7_PERFCTR_L1_DATA_MISS = 0x49, - ARMV7_PERFCTR_L1_INST_MISS = 0x4A, - ARMV7_PERFCTR_L1_DATA_COLORING = 0x4B, - ARMV7_PERFCTR_L1_NEON_DATA = 0x4C, - ARMV7_PERFCTR_L1_NEON_CACH_DATA = 0x4D, - ARMV7_PERFCTR_L2_NEON = 0x4E, - ARMV7_PERFCTR_L2_NEON_HIT = 0x4F, - ARMV7_PERFCTR_L1_INST = 0x50, - ARMV7_PERFCTR_PC_RETURN_MIS_PRED = 0x51, - ARMV7_PERFCTR_PC_BRANCH_FAILED = 0x52, - ARMV7_PERFCTR_PC_BRANCH_TAKEN = 0x53, - ARMV7_PERFCTR_PC_BRANCH_EXECUTED = 0x54, - ARMV7_PERFCTR_OP_EXECUTED = 0x55, - ARMV7_PERFCTR_CYCLES_INST_STALL = 0x56, - ARMV7_PERFCTR_CYCLES_INST = 0x57, - ARMV7_PERFCTR_CYCLES_NEON_DATA_STALL = 0x58, - ARMV7_PERFCTR_CYCLES_NEON_INST_STALL = 0x59, - ARMV7_PERFCTR_NEON_CYCLES = 0x5A, - - ARMV7_PERFCTR_PMU0_EVENTS = 0x70, - ARMV7_PERFCTR_PMU1_EVENTS = 0x71, - ARMV7_PERFCTR_PMU_EVENTS = 0x72, -}; - -/* ARMv7 Cortex-A9 specific event types */ -enum armv7_a9_perf_types { - ARMV7_PERFCTR_JAVA_HW_BYTECODE_EXEC = 0x40, - ARMV7_PERFCTR_JAVA_SW_BYTECODE_EXEC = 0x41, - ARMV7_PERFCTR_JAZELLE_BRANCH_EXEC = 0x42, - - ARMV7_PERFCTR_COHERENT_LINE_MISS = 0x50, - ARMV7_PERFCTR_COHERENT_LINE_HIT = 0x51, - - ARMV7_PERFCTR_ICACHE_DEP_STALL_CYCLES = 0x60, - ARMV7_PERFCTR_DCACHE_DEP_STALL_CYCLES = 0x61, - ARMV7_PERFCTR_TLB_MISS_DEP_STALL_CYCLES = 0x62, - ARMV7_PERFCTR_STREX_EXECUTED_PASSED = 0x63, - ARMV7_PERFCTR_STREX_EXECUTED_FAILED = 0x64, - ARMV7_PERFCTR_DATA_EVICTION = 0x65, - ARMV7_PERFCTR_ISSUE_STAGE_NO_INST = 0x66, - ARMV7_PERFCTR_ISSUE_STAGE_EMPTY = 0x67, - ARMV7_PERFCTR_INST_OUT_OF_RENAME_STAGE = 0x68, - - ARMV7_PERFCTR_PREDICTABLE_FUNCT_RETURNS = 0x6E, - - ARMV7_PERFCTR_MAIN_UNIT_EXECUTED_INST = 0x70, - ARMV7_PERFCTR_SECOND_UNIT_EXECUTED_INST = 0x71, - ARMV7_PERFCTR_LD_ST_UNIT_EXECUTED_INST = 0x72, - ARMV7_PERFCTR_FP_EXECUTED_INST = 0x73, - ARMV7_PERFCTR_NEON_EXECUTED_INST = 0x74, - - ARMV7_PERFCTR_PLD_FULL_DEP_STALL_CYCLES = 0x80, - ARMV7_PERFCTR_DATA_WR_DEP_STALL_CYCLES = 0x81, - ARMV7_PERFCTR_ITLB_MISS_DEP_STALL_CYCLES = 0x82, - ARMV7_PERFCTR_DTLB_MISS_DEP_STALL_CYCLES = 0x83, - ARMV7_PERFCTR_MICRO_ITLB_MISS_DEP_STALL_CYCLES = 0x84, - ARMV7_PERFCTR_MICRO_DTLB_MISS_DEP_STALL_CYCLES = 0x85, - ARMV7_PERFCTR_DMB_DEP_STALL_CYCLES = 0x86, - - ARMV7_PERFCTR_INTGR_CLK_ENABLED_CYCLES = 0x8A, - ARMV7_PERFCTR_DATA_ENGINE_CLK_EN_CYCLES = 0x8B, - - ARMV7_PERFCTR_ISB_INST = 0x90, - ARMV7_PERFCTR_DSB_INST = 0x91, - ARMV7_PERFCTR_DMB_INST = 0x92, - ARMV7_PERFCTR_EXT_INTERRUPTS = 0x93, - - ARMV7_PERFCTR_PLE_CACHE_LINE_RQST_COMPLETED = 0xA0, - ARMV7_PERFCTR_PLE_CACHE_LINE_RQST_SKIPPED = 0xA1, - ARMV7_PERFCTR_PLE_FIFO_FLUSH = 0xA2, - ARMV7_PERFCTR_PLE_RQST_COMPLETED = 0xA3, - ARMV7_PERFCTR_PLE_FIFO_OVERFLOW = 0xA4, - ARMV7_PERFCTR_PLE_RQST_PROG = 0xA5 -}; - -/* - * Cortex-A8 HW events mapping - * - * The hardware events that we support. We do support cache operations but - * we have harvard caches and no way to combine instruction and data - * accesses/misses in hardware. - */ -static const unsigned armv7_a8_perf_map[PERF_COUNT_HW_MAX] = { - [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES, - [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED, - [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED, - [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE, - [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, - [PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_CLOCK_CYCLES, -}; - -static const unsigned armv7_a8_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX] = { - [C(L1D)] = { - /* - * The performance counters don't differentiate between read - * and write accesses/misses so this isn't strictly correct, - * but it's the best we can do. Writes and reads get - * combined. - */ - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_DCACHE_ACCESS, - [C(RESULT_MISS)] = ARMV7_PERFCTR_DCACHE_REFILL, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_DCACHE_ACCESS, - [C(RESULT_MISS)] = ARMV7_PERFCTR_DCACHE_REFILL, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(L1I)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_INST, - [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_INST_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_INST, - [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_INST_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(LL)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L2_ACCESS, - [C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACH_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L2_ACCESS, - [C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACH_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(DTLB)] = { - /* - * Only ITLB misses and DTLB refills are supported. - * If users want the DTLB refills misses a raw counter - * must be used. - */ - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(ITLB)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(BPU)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_WRITE, - [C(RESULT_MISS)] - = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_WRITE, - [C(RESULT_MISS)] - = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, -}; - -/* - * Cortex-A9 HW events mapping - */ -static const unsigned armv7_a9_perf_map[PERF_COUNT_HW_MAX] = { - [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES, - [PERF_COUNT_HW_INSTRUCTIONS] = - ARMV7_PERFCTR_INST_OUT_OF_RENAME_STAGE, - [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV7_PERFCTR_COHERENT_LINE_HIT, - [PERF_COUNT_HW_CACHE_MISSES] = ARMV7_PERFCTR_COHERENT_LINE_MISS, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE, - [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, - [PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_CLOCK_CYCLES, -}; - -static const unsigned armv7_a9_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX] = { - [C(L1D)] = { - /* - * The performance counters don't differentiate between read - * and write accesses/misses so this isn't strictly correct, - * but it's the best we can do. Writes and reads get - * combined. - */ - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_DCACHE_ACCESS, - [C(RESULT_MISS)] = ARMV7_PERFCTR_DCACHE_REFILL, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_DCACHE_ACCESS, - [C(RESULT_MISS)] = ARMV7_PERFCTR_DCACHE_REFILL, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(L1I)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV7_PERFCTR_IFETCH_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV7_PERFCTR_IFETCH_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(LL)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(DTLB)] = { - /* - * Only ITLB misses and DTLB refills are supported. - * If users want the DTLB refills misses a raw counter - * must be used. - */ - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(ITLB)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(BPU)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_WRITE, - [C(RESULT_MISS)] - = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_WRITE, - [C(RESULT_MISS)] - = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, -}; - -/* - * Perf Events counters - */ -enum armv7_counters { - ARMV7_CYCLE_COUNTER = 1, /* Cycle counter */ - ARMV7_COUNTER0 = 2, /* First event counter */ -}; - -/* - * The cycle counter is ARMV7_CYCLE_COUNTER. - * The first event counter is ARMV7_COUNTER0. - * The last event counter is (ARMV7_COUNTER0 + armpmu->num_events - 1). - */ -#define ARMV7_COUNTER_LAST (ARMV7_COUNTER0 + armpmu->num_events - 1) - -/* - * ARMv7 low level PMNC access - */ - -/* - * Per-CPU PMNC: config reg - */ -#define ARMV7_PMNC_E (1 << 0) /* Enable all counters */ -#define ARMV7_PMNC_P (1 << 1) /* Reset all counters */ -#define ARMV7_PMNC_C (1 << 2) /* Cycle counter reset */ -#define ARMV7_PMNC_D (1 << 3) /* CCNT counts every 64th cpu cycle */ -#define ARMV7_PMNC_X (1 << 4) /* Export to ETM */ -#define ARMV7_PMNC_DP (1 << 5) /* Disable CCNT if non-invasive debug*/ -#define ARMV7_PMNC_N_SHIFT 11 /* Number of counters supported */ -#define ARMV7_PMNC_N_MASK 0x1f -#define ARMV7_PMNC_MASK 0x3f /* Mask for writable bits */ - -/* - * Available counters - */ -#define ARMV7_CNT0 0 /* First event counter */ -#define ARMV7_CCNT 31 /* Cycle counter */ - -/* Perf Event to low level counters mapping */ -#define ARMV7_EVENT_CNT_TO_CNTx (ARMV7_COUNTER0 - ARMV7_CNT0) - -/* - * CNTENS: counters enable reg - */ -#define ARMV7_CNTENS_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx)) -#define ARMV7_CNTENS_C (1 << ARMV7_CCNT) - -/* - * CNTENC: counters disable reg - */ -#define ARMV7_CNTENC_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx)) -#define ARMV7_CNTENC_C (1 << ARMV7_CCNT) - -/* - * INTENS: counters overflow interrupt enable reg - */ -#define ARMV7_INTENS_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx)) -#define ARMV7_INTENS_C (1 << ARMV7_CCNT) - -/* - * INTENC: counters overflow interrupt disable reg - */ -#define ARMV7_INTENC_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx)) -#define ARMV7_INTENC_C (1 << ARMV7_CCNT) - -/* - * EVTSEL: Event selection reg - */ -#define ARMV7_EVTSEL_MASK 0xff /* Mask for writable bits */ - -/* - * SELECT: Counter selection reg - */ -#define ARMV7_SELECT_MASK 0x1f /* Mask for writable bits */ - -/* - * FLAG: counters overflow flag status reg - */ -#define ARMV7_FLAG_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx)) -#define ARMV7_FLAG_C (1 << ARMV7_CCNT) -#define ARMV7_FLAG_MASK 0xffffffff /* Mask for writable bits */ -#define ARMV7_OVERFLOWED_MASK ARMV7_FLAG_MASK - -static inline unsigned long armv7_pmnc_read(void) -{ - u32 val; - asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r"(val)); - return val; -} - -static inline void armv7_pmnc_write(unsigned long val) -{ - val &= ARMV7_PMNC_MASK; - asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r"(val)); -} - -static inline int armv7_pmnc_has_overflowed(unsigned long pmnc) -{ - return pmnc & ARMV7_OVERFLOWED_MASK; -} - -static inline int armv7_pmnc_counter_has_overflowed(unsigned long pmnc, - enum armv7_counters counter) -{ - int ret = 0; - - if (counter == ARMV7_CYCLE_COUNTER) - ret = pmnc & ARMV7_FLAG_C; - else if ((counter >= ARMV7_COUNTER0) && (counter <= ARMV7_COUNTER_LAST)) - ret = pmnc & ARMV7_FLAG_P(counter); - else - pr_err("CPU%u checking wrong counter %d overflow status\n", - smp_processor_id(), counter); - - return ret; -} - -static inline int armv7_pmnc_select_counter(unsigned int idx) -{ - u32 val; - - if ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST)) { - pr_err("CPU%u selecting wrong PMNC counter" - " %d\n", smp_processor_id(), idx); - return -1; - } - - val = (idx - ARMV7_EVENT_CNT_TO_CNTx) & ARMV7_SELECT_MASK; - asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (val)); - - return idx; -} - -static inline u32 armv7pmu_read_counter(int idx) -{ - unsigned long value = 0; - - if (idx == ARMV7_CYCLE_COUNTER) - asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (value)); - else if ((idx >= ARMV7_COUNTER0) && (idx <= ARMV7_COUNTER_LAST)) { - if (armv7_pmnc_select_counter(idx) == idx) - asm volatile("mrc p15, 0, %0, c9, c13, 2" - : "=r" (value)); - } else - pr_err("CPU%u reading wrong counter %d\n", - smp_processor_id(), idx); - - return value; -} - -static inline void armv7pmu_write_counter(int idx, u32 value) -{ - if (idx == ARMV7_CYCLE_COUNTER) - asm volatile("mcr p15, 0, %0, c9, c13, 0" : : "r" (value)); - else if ((idx >= ARMV7_COUNTER0) && (idx <= ARMV7_COUNTER_LAST)) { - if (armv7_pmnc_select_counter(idx) == idx) - asm volatile("mcr p15, 0, %0, c9, c13, 2" - : : "r" (value)); - } else - pr_err("CPU%u writing wrong counter %d\n", - smp_processor_id(), idx); -} - -static inline void armv7_pmnc_write_evtsel(unsigned int idx, u32 val) -{ - if (armv7_pmnc_select_counter(idx) == idx) { - val &= ARMV7_EVTSEL_MASK; - asm volatile("mcr p15, 0, %0, c9, c13, 1" : : "r" (val)); - } -} - -static inline u32 armv7_pmnc_enable_counter(unsigned int idx) -{ - u32 val; - - if ((idx != ARMV7_CYCLE_COUNTER) && - ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) { - pr_err("CPU%u enabling wrong PMNC counter" - " %d\n", smp_processor_id(), idx); - return -1; - } - - if (idx == ARMV7_CYCLE_COUNTER) - val = ARMV7_CNTENS_C; - else - val = ARMV7_CNTENS_P(idx); - - asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (val)); - - return idx; -} - -static inline u32 armv7_pmnc_disable_counter(unsigned int idx) -{ - u32 val; - - - if ((idx != ARMV7_CYCLE_COUNTER) && - ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) { - pr_err("CPU%u disabling wrong PMNC counter" - " %d\n", smp_processor_id(), idx); - return -1; - } - - if (idx == ARMV7_CYCLE_COUNTER) - val = ARMV7_CNTENC_C; - else - val = ARMV7_CNTENC_P(idx); - - asm volatile("mcr p15, 0, %0, c9, c12, 2" : : "r" (val)); - - return idx; -} - -static inline u32 armv7_pmnc_enable_intens(unsigned int idx) -{ - u32 val; - - if ((idx != ARMV7_CYCLE_COUNTER) && - ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) { - pr_err("CPU%u enabling wrong PMNC counter" - " interrupt enable %d\n", smp_processor_id(), idx); - return -1; - } - - if (idx == ARMV7_CYCLE_COUNTER) - val = ARMV7_INTENS_C; - else - val = ARMV7_INTENS_P(idx); - - asm volatile("mcr p15, 0, %0, c9, c14, 1" : : "r" (val)); - - return idx; -} - -static inline u32 armv7_pmnc_disable_intens(unsigned int idx) -{ - u32 val; - - if ((idx != ARMV7_CYCLE_COUNTER) && - ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) { - pr_err("CPU%u disabling wrong PMNC counter" - " interrupt enable %d\n", smp_processor_id(), idx); - return -1; - } - - if (idx == ARMV7_CYCLE_COUNTER) - val = ARMV7_INTENC_C; - else - val = ARMV7_INTENC_P(idx); - - asm volatile("mcr p15, 0, %0, c9, c14, 2" : : "r" (val)); - - return idx; -} - -static inline u32 armv7_pmnc_getreset_flags(void) -{ - u32 val; - - /* Read */ - asm volatile("mrc p15, 0, %0, c9, c12, 3" : "=r" (val)); - - /* Write to clear flags */ - val &= ARMV7_FLAG_MASK; - asm volatile("mcr p15, 0, %0, c9, c12, 3" : : "r" (val)); - - return val; -} - -#ifdef DEBUG -static void armv7_pmnc_dump_regs(void) -{ - u32 val; - unsigned int cnt; - - printk(KERN_INFO "PMNC registers dump:\n"); - - asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (val)); - printk(KERN_INFO "PMNC =0x%08x\n", val); - - asm volatile("mrc p15, 0, %0, c9, c12, 1" : "=r" (val)); - printk(KERN_INFO "CNTENS=0x%08x\n", val); - - asm volatile("mrc p15, 0, %0, c9, c14, 1" : "=r" (val)); - printk(KERN_INFO "INTENS=0x%08x\n", val); - - asm volatile("mrc p15, 0, %0, c9, c12, 3" : "=r" (val)); - printk(KERN_INFO "FLAGS =0x%08x\n", val); - - asm volatile("mrc p15, 0, %0, c9, c12, 5" : "=r" (val)); - printk(KERN_INFO "SELECT=0x%08x\n", val); - - asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (val)); - printk(KERN_INFO "CCNT =0x%08x\n", val); - - for (cnt = ARMV7_COUNTER0; cnt < ARMV7_COUNTER_LAST; cnt++) { - armv7_pmnc_select_counter(cnt); - asm volatile("mrc p15, 0, %0, c9, c13, 2" : "=r" (val)); - printk(KERN_INFO "CNT[%d] count =0x%08x\n", - cnt-ARMV7_EVENT_CNT_TO_CNTx, val); - asm volatile("mrc p15, 0, %0, c9, c13, 1" : "=r" (val)); - printk(KERN_INFO "CNT[%d] evtsel=0x%08x\n", - cnt-ARMV7_EVENT_CNT_TO_CNTx, val); - } -} -#endif - -void armv7pmu_enable_event(struct hw_perf_event *hwc, int idx) -{ - unsigned long flags; - - /* - * Enable counter and interrupt, and set the counter to count - * the event that we're interested in. - */ - spin_lock_irqsave(&pmu_lock, flags); - - /* - * Disable counter - */ - armv7_pmnc_disable_counter(idx); - - /* - * Set event (if destined for PMNx counters) - * We don't need to set the event if it's a cycle count - */ - if (idx != ARMV7_CYCLE_COUNTER) - armv7_pmnc_write_evtsel(idx, hwc->config_base); - - /* - * Enable interrupt for this counter - */ - armv7_pmnc_enable_intens(idx); - - /* - * Enable counter - */ - armv7_pmnc_enable_counter(idx); - - spin_unlock_irqrestore(&pmu_lock, flags); -} - -static void armv7pmu_disable_event(struct hw_perf_event *hwc, int idx) -{ - unsigned long flags; - - /* - * Disable counter and interrupt - */ - spin_lock_irqsave(&pmu_lock, flags); - - /* - * Disable counter - */ - armv7_pmnc_disable_counter(idx); - - /* - * Disable interrupt for this counter - */ - armv7_pmnc_disable_intens(idx); - - spin_unlock_irqrestore(&pmu_lock, flags); -} - -static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev) -{ - unsigned long pmnc; - struct perf_sample_data data; - struct cpu_hw_events *cpuc; - struct pt_regs *regs; - int idx; - - /* - * Get and reset the IRQ flags - */ - pmnc = armv7_pmnc_getreset_flags(); - - /* - * Did an overflow occur? - */ - if (!armv7_pmnc_has_overflowed(pmnc)) - return IRQ_NONE; - - /* - * Handle the counter(s) overflow(s) - */ - regs = get_irq_regs(); - - perf_sample_data_init(&data, 0); - - cpuc = &__get_cpu_var(cpu_hw_events); - for (idx = 0; idx <= armpmu->num_events; ++idx) { - struct perf_event *event = cpuc->events[idx]; - struct hw_perf_event *hwc; - - if (!test_bit(idx, cpuc->active_mask)) - continue; - - /* - * We have a single interrupt for all counters. Check that - * each counter has overflowed before we process it. - */ - if (!armv7_pmnc_counter_has_overflowed(pmnc, idx)) - continue; - - hwc = &event->hw; - armpmu_event_update(event, hwc, idx); - data.period = event->hw.last_period; - if (!armpmu_event_set_period(event, hwc, idx)) - continue; - - if (perf_event_overflow(event, 0, &data, regs)) - armpmu->disable(hwc, idx); - } - - /* - * Handle the pending perf events. - * - * Note: this call *must* be run with interrupts disabled. For - * platforms that can have the PMU interrupts raised as an NMI, this - * will not work. - */ - irq_work_run(); - - return IRQ_HANDLED; -} - -static void armv7pmu_start(void) -{ - unsigned long flags; - - spin_lock_irqsave(&pmu_lock, flags); - /* Enable all counters */ - armv7_pmnc_write(armv7_pmnc_read() | ARMV7_PMNC_E); - spin_unlock_irqrestore(&pmu_lock, flags); -} - -static void armv7pmu_stop(void) -{ - unsigned long flags; - - spin_lock_irqsave(&pmu_lock, flags); - /* Disable all counters */ - armv7_pmnc_write(armv7_pmnc_read() & ~ARMV7_PMNC_E); - spin_unlock_irqrestore(&pmu_lock, flags); -} - -static inline int armv7_a8_pmu_event_map(int config) -{ - int mapping = armv7_a8_perf_map[config]; - if (HW_OP_UNSUPPORTED == mapping) - mapping = -EOPNOTSUPP; - return mapping; -} - -static inline int armv7_a9_pmu_event_map(int config) -{ - int mapping = armv7_a9_perf_map[config]; - if (HW_OP_UNSUPPORTED == mapping) - mapping = -EOPNOTSUPP; - return mapping; -} - -static u64 armv7pmu_raw_event(u64 config) -{ - return config & 0xff; -} - -static int armv7pmu_get_event_idx(struct cpu_hw_events *cpuc, - struct hw_perf_event *event) -{ - int idx; - - /* Always place a cycle counter into the cycle counter. */ - if (event->config_base == ARMV7_PERFCTR_CPU_CYCLES) { - if (test_and_set_bit(ARMV7_CYCLE_COUNTER, cpuc->used_mask)) - return -EAGAIN; - - return ARMV7_CYCLE_COUNTER; - } else { - /* - * For anything other than a cycle counter, try and use - * the events counters - */ - for (idx = ARMV7_COUNTER0; idx <= armpmu->num_events; ++idx) { - if (!test_and_set_bit(idx, cpuc->used_mask)) - return idx; - } - - /* The counters are all in use. */ - return -EAGAIN; - } -} - -static struct arm_pmu armv7pmu = { - .handle_irq = armv7pmu_handle_irq, - .enable = armv7pmu_enable_event, - .disable = armv7pmu_disable_event, - .raw_event = armv7pmu_raw_event, - .read_counter = armv7pmu_read_counter, - .write_counter = armv7pmu_write_counter, - .get_event_idx = armv7pmu_get_event_idx, - .start = armv7pmu_start, - .stop = armv7pmu_stop, - .max_period = (1LLU << 32) - 1, -}; - -static u32 __init armv7_reset_read_pmnc(void) -{ - u32 nb_cnt; - - /* Initialize & Reset PMNC: C and P bits */ - armv7_pmnc_write(ARMV7_PMNC_P | ARMV7_PMNC_C); - - /* Read the nb of CNTx counters supported from PMNC */ - nb_cnt = (armv7_pmnc_read() >> ARMV7_PMNC_N_SHIFT) & ARMV7_PMNC_N_MASK; - - /* Add the CPU cycles counter and return */ - return nb_cnt + 1; -} - -/* - * ARMv5 [xscale] Performance counter handling code. - * - * Based on xscale OProfile code. - * - * There are two variants of the xscale PMU that we support: - * - xscale1pmu: 2 event counters and a cycle counter - * - xscale2pmu: 4 event counters and a cycle counter - * The two variants share event definitions, but have different - * PMU structures. - */ - -enum xscale_perf_types { - XSCALE_PERFCTR_ICACHE_MISS = 0x00, - XSCALE_PERFCTR_ICACHE_NO_DELIVER = 0x01, - XSCALE_PERFCTR_DATA_STALL = 0x02, - XSCALE_PERFCTR_ITLB_MISS = 0x03, - XSCALE_PERFCTR_DTLB_MISS = 0x04, - XSCALE_PERFCTR_BRANCH = 0x05, - XSCALE_PERFCTR_BRANCH_MISS = 0x06, - XSCALE_PERFCTR_INSTRUCTION = 0x07, - XSCALE_PERFCTR_DCACHE_FULL_STALL = 0x08, - XSCALE_PERFCTR_DCACHE_FULL_STALL_CONTIG = 0x09, - XSCALE_PERFCTR_DCACHE_ACCESS = 0x0A, - XSCALE_PERFCTR_DCACHE_MISS = 0x0B, - XSCALE_PERFCTR_DCACHE_WRITE_BACK = 0x0C, - XSCALE_PERFCTR_PC_CHANGED = 0x0D, - XSCALE_PERFCTR_BCU_REQUEST = 0x10, - XSCALE_PERFCTR_BCU_FULL = 0x11, - XSCALE_PERFCTR_BCU_DRAIN = 0x12, - XSCALE_PERFCTR_BCU_ECC_NO_ELOG = 0x14, - XSCALE_PERFCTR_BCU_1_BIT_ERR = 0x15, - XSCALE_PERFCTR_RMW = 0x16, - /* XSCALE_PERFCTR_CCNT is not hardware defined */ - XSCALE_PERFCTR_CCNT = 0xFE, - XSCALE_PERFCTR_UNUSED = 0xFF, -}; - -enum xscale_counters { - XSCALE_CYCLE_COUNTER = 1, - XSCALE_COUNTER0, - XSCALE_COUNTER1, - XSCALE_COUNTER2, - XSCALE_COUNTER3, -}; - -static const unsigned xscale_perf_map[PERF_COUNT_HW_MAX] = { - [PERF_COUNT_HW_CPU_CYCLES] = XSCALE_PERFCTR_CCNT, - [PERF_COUNT_HW_INSTRUCTIONS] = XSCALE_PERFCTR_INSTRUCTION, - [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED, - [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = XSCALE_PERFCTR_BRANCH, - [PERF_COUNT_HW_BRANCH_MISSES] = XSCALE_PERFCTR_BRANCH_MISS, - [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED, -}; - -static const unsigned xscale_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX] = { - [C(L1D)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = XSCALE_PERFCTR_DCACHE_ACCESS, - [C(RESULT_MISS)] = XSCALE_PERFCTR_DCACHE_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = XSCALE_PERFCTR_DCACHE_ACCESS, - [C(RESULT_MISS)] = XSCALE_PERFCTR_DCACHE_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(L1I)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = XSCALE_PERFCTR_ICACHE_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = XSCALE_PERFCTR_ICACHE_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(LL)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(DTLB)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = XSCALE_PERFCTR_DTLB_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = XSCALE_PERFCTR_DTLB_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(ITLB)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = XSCALE_PERFCTR_ITLB_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = XSCALE_PERFCTR_ITLB_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(BPU)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, -}; - -#define XSCALE_PMU_ENABLE 0x001 -#define XSCALE_PMN_RESET 0x002 -#define XSCALE_CCNT_RESET 0x004 -#define XSCALE_PMU_RESET (CCNT_RESET | PMN_RESET) -#define XSCALE_PMU_CNT64 0x008 - -static inline int -xscalepmu_event_map(int config) -{ - int mapping = xscale_perf_map[config]; - if (HW_OP_UNSUPPORTED == mapping) - mapping = -EOPNOTSUPP; - return mapping; -} - -static u64 -xscalepmu_raw_event(u64 config) -{ - return config & 0xff; -} - -#define XSCALE1_OVERFLOWED_MASK 0x700 -#define XSCALE1_CCOUNT_OVERFLOW 0x400 -#define XSCALE1_COUNT0_OVERFLOW 0x100 -#define XSCALE1_COUNT1_OVERFLOW 0x200 -#define XSCALE1_CCOUNT_INT_EN 0x040 -#define XSCALE1_COUNT0_INT_EN 0x010 -#define XSCALE1_COUNT1_INT_EN 0x020 -#define XSCALE1_COUNT0_EVT_SHFT 12 -#define XSCALE1_COUNT0_EVT_MASK (0xff << XSCALE1_COUNT0_EVT_SHFT) -#define XSCALE1_COUNT1_EVT_SHFT 20 -#define XSCALE1_COUNT1_EVT_MASK (0xff << XSCALE1_COUNT1_EVT_SHFT) - -static inline u32 -xscale1pmu_read_pmnc(void) -{ - u32 val; - asm volatile("mrc p14, 0, %0, c0, c0, 0" : "=r" (val)); - return val; -} - -static inline void -xscale1pmu_write_pmnc(u32 val) -{ - /* upper 4bits and 7, 11 are write-as-0 */ - val &= 0xffff77f; - asm volatile("mcr p14, 0, %0, c0, c0, 0" : : "r" (val)); -} - -static inline int -xscale1_pmnc_counter_has_overflowed(unsigned long pmnc, - enum xscale_counters counter) -{ - int ret = 0; - - switch (counter) { - case XSCALE_CYCLE_COUNTER: - ret = pmnc & XSCALE1_CCOUNT_OVERFLOW; - break; - case XSCALE_COUNTER0: - ret = pmnc & XSCALE1_COUNT0_OVERFLOW; - break; - case XSCALE_COUNTER1: - ret = pmnc & XSCALE1_COUNT1_OVERFLOW; - break; - default: - WARN_ONCE(1, "invalid counter number (%d)\n", counter); - } - - return ret; -} - -static irqreturn_t -xscale1pmu_handle_irq(int irq_num, void *dev) -{ - unsigned long pmnc; - struct perf_sample_data data; - struct cpu_hw_events *cpuc; - struct pt_regs *regs; - int idx; - - /* - * NOTE: there's an A stepping erratum that states if an overflow - * bit already exists and another occurs, the previous - * Overflow bit gets cleared. There's no workaround. - * Fixed in B stepping or later. - */ - pmnc = xscale1pmu_read_pmnc(); - - /* - * Write the value back to clear the overflow flags. Overflow - * flags remain in pmnc for use below. We also disable the PMU - * while we process the interrupt. - */ - xscale1pmu_write_pmnc(pmnc & ~XSCALE_PMU_ENABLE); - - if (!(pmnc & XSCALE1_OVERFLOWED_MASK)) - return IRQ_NONE; - - regs = get_irq_regs(); - - perf_sample_data_init(&data, 0); - - cpuc = &__get_cpu_var(cpu_hw_events); - for (idx = 0; idx <= armpmu->num_events; ++idx) { - struct perf_event *event = cpuc->events[idx]; - struct hw_perf_event *hwc; - - if (!test_bit(idx, cpuc->active_mask)) - continue; - - if (!xscale1_pmnc_counter_has_overflowed(pmnc, idx)) - continue; - - hwc = &event->hw; - armpmu_event_update(event, hwc, idx); - data.period = event->hw.last_period; - if (!armpmu_event_set_period(event, hwc, idx)) - continue; - - if (perf_event_overflow(event, 0, &data, regs)) - armpmu->disable(hwc, idx); - } - - irq_work_run(); - - /* - * Re-enable the PMU. - */ - pmnc = xscale1pmu_read_pmnc() | XSCALE_PMU_ENABLE; - xscale1pmu_write_pmnc(pmnc); - - return IRQ_HANDLED; -} - -static void -xscale1pmu_enable_event(struct hw_perf_event *hwc, int idx) -{ - unsigned long val, mask, evt, flags; - - switch (idx) { - case XSCALE_CYCLE_COUNTER: - mask = 0; - evt = XSCALE1_CCOUNT_INT_EN; - break; - case XSCALE_COUNTER0: - mask = XSCALE1_COUNT0_EVT_MASK; - evt = (hwc->config_base << XSCALE1_COUNT0_EVT_SHFT) | - XSCALE1_COUNT0_INT_EN; - break; - case XSCALE_COUNTER1: - mask = XSCALE1_COUNT1_EVT_MASK; - evt = (hwc->config_base << XSCALE1_COUNT1_EVT_SHFT) | - XSCALE1_COUNT1_INT_EN; - break; - default: - WARN_ONCE(1, "invalid counter number (%d)\n", idx); - return; - } - - spin_lock_irqsave(&pmu_lock, flags); - val = xscale1pmu_read_pmnc(); - val &= ~mask; - val |= evt; - xscale1pmu_write_pmnc(val); - spin_unlock_irqrestore(&pmu_lock, flags); -} - -static void -xscale1pmu_disable_event(struct hw_perf_event *hwc, int idx) -{ - unsigned long val, mask, evt, flags; - - switch (idx) { - case XSCALE_CYCLE_COUNTER: - mask = XSCALE1_CCOUNT_INT_EN; - evt = 0; - break; - case XSCALE_COUNTER0: - mask = XSCALE1_COUNT0_INT_EN | XSCALE1_COUNT0_EVT_MASK; - evt = XSCALE_PERFCTR_UNUSED << XSCALE1_COUNT0_EVT_SHFT; - break; - case XSCALE_COUNTER1: - mask = XSCALE1_COUNT1_INT_EN | XSCALE1_COUNT1_EVT_MASK; - evt = XSCALE_PERFCTR_UNUSED << XSCALE1_COUNT1_EVT_SHFT; - break; - default: - WARN_ONCE(1, "invalid counter number (%d)\n", idx); - return; - } - - spin_lock_irqsave(&pmu_lock, flags); - val = xscale1pmu_read_pmnc(); - val &= ~mask; - val |= evt; - xscale1pmu_write_pmnc(val); - spin_unlock_irqrestore(&pmu_lock, flags); -} - -static int -xscale1pmu_get_event_idx(struct cpu_hw_events *cpuc, - struct hw_perf_event *event) -{ - if (XSCALE_PERFCTR_CCNT == event->config_base) { - if (test_and_set_bit(XSCALE_CYCLE_COUNTER, cpuc->used_mask)) - return -EAGAIN; - - return XSCALE_CYCLE_COUNTER; - } else { - if (!test_and_set_bit(XSCALE_COUNTER1, cpuc->used_mask)) { - return XSCALE_COUNTER1; - } - - if (!test_and_set_bit(XSCALE_COUNTER0, cpuc->used_mask)) { - return XSCALE_COUNTER0; - } - - return -EAGAIN; - } -} - -static void -xscale1pmu_start(void) -{ - unsigned long flags, val; - - spin_lock_irqsave(&pmu_lock, flags); - val = xscale1pmu_read_pmnc(); - val |= XSCALE_PMU_ENABLE; - xscale1pmu_write_pmnc(val); - spin_unlock_irqrestore(&pmu_lock, flags); -} - -static void -xscale1pmu_stop(void) -{ - unsigned long flags, val; - - spin_lock_irqsave(&pmu_lock, flags); - val = xscale1pmu_read_pmnc(); - val &= ~XSCALE_PMU_ENABLE; - xscale1pmu_write_pmnc(val); - spin_unlock_irqrestore(&pmu_lock, flags); -} - -static inline u32 -xscale1pmu_read_counter(int counter) -{ - u32 val = 0; - - switch (counter) { - case XSCALE_CYCLE_COUNTER: - asm volatile("mrc p14, 0, %0, c1, c0, 0" : "=r" (val)); - break; - case XSCALE_COUNTER0: - asm volatile("mrc p14, 0, %0, c2, c0, 0" : "=r" (val)); - break; - case XSCALE_COUNTER1: - asm volatile("mrc p14, 0, %0, c3, c0, 0" : "=r" (val)); - break; - } - - return val; -} - -static inline void -xscale1pmu_write_counter(int counter, u32 val) -{ - switch (counter) { - case XSCALE_CYCLE_COUNTER: - asm volatile("mcr p14, 0, %0, c1, c0, 0" : : "r" (val)); - break; - case XSCALE_COUNTER0: - asm volatile("mcr p14, 0, %0, c2, c0, 0" : : "r" (val)); - break; - case XSCALE_COUNTER1: - asm volatile("mcr p14, 0, %0, c3, c0, 0" : : "r" (val)); - break; - } -} - -static const struct arm_pmu xscale1pmu = { - .id = ARM_PERF_PMU_ID_XSCALE1, - .handle_irq = xscale1pmu_handle_irq, - .enable = xscale1pmu_enable_event, - .disable = xscale1pmu_disable_event, - .event_map = xscalepmu_event_map, - .raw_event = xscalepmu_raw_event, - .read_counter = xscale1pmu_read_counter, - .write_counter = xscale1pmu_write_counter, - .get_event_idx = xscale1pmu_get_event_idx, - .start = xscale1pmu_start, - .stop = xscale1pmu_stop, - .num_events = 3, - .max_period = (1LLU << 32) - 1, -}; - -#define XSCALE2_OVERFLOWED_MASK 0x01f -#define XSCALE2_CCOUNT_OVERFLOW 0x001 -#define XSCALE2_COUNT0_OVERFLOW 0x002 -#define XSCALE2_COUNT1_OVERFLOW 0x004 -#define XSCALE2_COUNT2_OVERFLOW 0x008 -#define XSCALE2_COUNT3_OVERFLOW 0x010 -#define XSCALE2_CCOUNT_INT_EN 0x001 -#define XSCALE2_COUNT0_INT_EN 0x002 -#define XSCALE2_COUNT1_INT_EN 0x004 -#define XSCALE2_COUNT2_INT_EN 0x008 -#define XSCALE2_COUNT3_INT_EN 0x010 -#define XSCALE2_COUNT0_EVT_SHFT 0 -#define XSCALE2_COUNT0_EVT_MASK (0xff << XSCALE2_COUNT0_EVT_SHFT) -#define XSCALE2_COUNT1_EVT_SHFT 8 -#define XSCALE2_COUNT1_EVT_MASK (0xff << XSCALE2_COUNT1_EVT_SHFT) -#define XSCALE2_COUNT2_EVT_SHFT 16 -#define XSCALE2_COUNT2_EVT_MASK (0xff << XSCALE2_COUNT2_EVT_SHFT) -#define XSCALE2_COUNT3_EVT_SHFT 24 -#define XSCALE2_COUNT3_EVT_MASK (0xff << XSCALE2_COUNT3_EVT_SHFT) - -static inline u32 -xscale2pmu_read_pmnc(void) -{ - u32 val; - asm volatile("mrc p14, 0, %0, c0, c1, 0" : "=r" (val)); - /* bits 1-2 and 4-23 are read-unpredictable */ - return val & 0xff000009; -} - -static inline void -xscale2pmu_write_pmnc(u32 val) -{ - /* bits 4-23 are write-as-0, 24-31 are write ignored */ - val &= 0xf; - asm volatile("mcr p14, 0, %0, c0, c1, 0" : : "r" (val)); -} - -static inline u32 -xscale2pmu_read_overflow_flags(void) -{ - u32 val; - asm volatile("mrc p14, 0, %0, c5, c1, 0" : "=r" (val)); - return val; -} - -static inline void -xscale2pmu_write_overflow_flags(u32 val) -{ - asm volatile("mcr p14, 0, %0, c5, c1, 0" : : "r" (val)); -} - -static inline u32 -xscale2pmu_read_event_select(void) -{ - u32 val; - asm volatile("mrc p14, 0, %0, c8, c1, 0" : "=r" (val)); - return val; -} - -static inline void -xscale2pmu_write_event_select(u32 val) -{ - asm volatile("mcr p14, 0, %0, c8, c1, 0" : : "r"(val)); -} - -static inline u32 -xscale2pmu_read_int_enable(void) -{ - u32 val; - asm volatile("mrc p14, 0, %0, c4, c1, 0" : "=r" (val)); - return val; -} - -static void -xscale2pmu_write_int_enable(u32 val) -{ - asm volatile("mcr p14, 0, %0, c4, c1, 0" : : "r" (val)); -} - -static inline int -xscale2_pmnc_counter_has_overflowed(unsigned long of_flags, - enum xscale_counters counter) -{ - int ret = 0; - - switch (counter) { - case XSCALE_CYCLE_COUNTER: - ret = of_flags & XSCALE2_CCOUNT_OVERFLOW; - break; - case XSCALE_COUNTER0: - ret = of_flags & XSCALE2_COUNT0_OVERFLOW; - break; - case XSCALE_COUNTER1: - ret = of_flags & XSCALE2_COUNT1_OVERFLOW; - break; - case XSCALE_COUNTER2: - ret = of_flags & XSCALE2_COUNT2_OVERFLOW; - break; - case XSCALE_COUNTER3: - ret = of_flags & XSCALE2_COUNT3_OVERFLOW; - break; - default: - WARN_ONCE(1, "invalid counter number (%d)\n", counter); - } - - return ret; -} - -static irqreturn_t -xscale2pmu_handle_irq(int irq_num, void *dev) -{ - unsigned long pmnc, of_flags; - struct perf_sample_data data; - struct cpu_hw_events *cpuc; - struct pt_regs *regs; - int idx; - - /* Disable the PMU. */ - pmnc = xscale2pmu_read_pmnc(); - xscale2pmu_write_pmnc(pmnc & ~XSCALE_PMU_ENABLE); - - /* Check the overflow flag register. */ - of_flags = xscale2pmu_read_overflow_flags(); - if (!(of_flags & XSCALE2_OVERFLOWED_MASK)) - return IRQ_NONE; - - /* Clear the overflow bits. */ - xscale2pmu_write_overflow_flags(of_flags); - - regs = get_irq_regs(); - - perf_sample_data_init(&data, 0); - - cpuc = &__get_cpu_var(cpu_hw_events); - for (idx = 0; idx <= armpmu->num_events; ++idx) { - struct perf_event *event = cpuc->events[idx]; - struct hw_perf_event *hwc; - - if (!test_bit(idx, cpuc->active_mask)) - continue; - - if (!xscale2_pmnc_counter_has_overflowed(pmnc, idx)) - continue; - - hwc = &event->hw; - armpmu_event_update(event, hwc, idx); - data.period = event->hw.last_period; - if (!armpmu_event_set_period(event, hwc, idx)) - continue; - - if (perf_event_overflow(event, 0, &data, regs)) - armpmu->disable(hwc, idx); - } - - irq_work_run(); - - /* - * Re-enable the PMU. - */ - pmnc = xscale2pmu_read_pmnc() | XSCALE_PMU_ENABLE; - xscale2pmu_write_pmnc(pmnc); - - return IRQ_HANDLED; -} - -static void -xscale2pmu_enable_event(struct hw_perf_event *hwc, int idx) -{ - unsigned long flags, ien, evtsel; - - ien = xscale2pmu_read_int_enable(); - evtsel = xscale2pmu_read_event_select(); - - switch (idx) { - case XSCALE_CYCLE_COUNTER: - ien |= XSCALE2_CCOUNT_INT_EN; - break; - case XSCALE_COUNTER0: - ien |= XSCALE2_COUNT0_INT_EN; - evtsel &= ~XSCALE2_COUNT0_EVT_MASK; - evtsel |= hwc->config_base << XSCALE2_COUNT0_EVT_SHFT; - break; - case XSCALE_COUNTER1: - ien |= XSCALE2_COUNT1_INT_EN; - evtsel &= ~XSCALE2_COUNT1_EVT_MASK; - evtsel |= hwc->config_base << XSCALE2_COUNT1_EVT_SHFT; - break; - case XSCALE_COUNTER2: - ien |= XSCALE2_COUNT2_INT_EN; - evtsel &= ~XSCALE2_COUNT2_EVT_MASK; - evtsel |= hwc->config_base << XSCALE2_COUNT2_EVT_SHFT; - break; - case XSCALE_COUNTER3: - ien |= XSCALE2_COUNT3_INT_EN; - evtsel &= ~XSCALE2_COUNT3_EVT_MASK; - evtsel |= hwc->config_base << XSCALE2_COUNT3_EVT_SHFT; - break; - default: - WARN_ONCE(1, "invalid counter number (%d)\n", idx); - return; - } - - spin_lock_irqsave(&pmu_lock, flags); - xscale2pmu_write_event_select(evtsel); - xscale2pmu_write_int_enable(ien); - spin_unlock_irqrestore(&pmu_lock, flags); -} - -static void -xscale2pmu_disable_event(struct hw_perf_event *hwc, int idx) -{ - unsigned long flags, ien, evtsel; - - ien = xscale2pmu_read_int_enable(); - evtsel = xscale2pmu_read_event_select(); - - switch (idx) { - case XSCALE_CYCLE_COUNTER: - ien &= ~XSCALE2_CCOUNT_INT_EN; - break; - case XSCALE_COUNTER0: - ien &= ~XSCALE2_COUNT0_INT_EN; - evtsel &= ~XSCALE2_COUNT0_EVT_MASK; - evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT0_EVT_SHFT; - break; - case XSCALE_COUNTER1: - ien &= ~XSCALE2_COUNT1_INT_EN; - evtsel &= ~XSCALE2_COUNT1_EVT_MASK; - evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT1_EVT_SHFT; - break; - case XSCALE_COUNTER2: - ien &= ~XSCALE2_COUNT2_INT_EN; - evtsel &= ~XSCALE2_COUNT2_EVT_MASK; - evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT2_EVT_SHFT; - break; - case XSCALE_COUNTER3: - ien &= ~XSCALE2_COUNT3_INT_EN; - evtsel &= ~XSCALE2_COUNT3_EVT_MASK; - evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT3_EVT_SHFT; - break; - default: - WARN_ONCE(1, "invalid counter number (%d)\n", idx); - return; - } - - spin_lock_irqsave(&pmu_lock, flags); - xscale2pmu_write_event_select(evtsel); - xscale2pmu_write_int_enable(ien); - spin_unlock_irqrestore(&pmu_lock, flags); -} - -static int -xscale2pmu_get_event_idx(struct cpu_hw_events *cpuc, - struct hw_perf_event *event) -{ - int idx = xscale1pmu_get_event_idx(cpuc, event); - if (idx >= 0) - goto out; - - if (!test_and_set_bit(XSCALE_COUNTER3, cpuc->used_mask)) - idx = XSCALE_COUNTER3; - else if (!test_and_set_bit(XSCALE_COUNTER2, cpuc->used_mask)) - idx = XSCALE_COUNTER2; -out: - return idx; -} - -static void -xscale2pmu_start(void) -{ - unsigned long flags, val; - - spin_lock_irqsave(&pmu_lock, flags); - val = xscale2pmu_read_pmnc() & ~XSCALE_PMU_CNT64; - val |= XSCALE_PMU_ENABLE; - xscale2pmu_write_pmnc(val); - spin_unlock_irqrestore(&pmu_lock, flags); -} - -static void -xscale2pmu_stop(void) -{ - unsigned long flags, val; - - spin_lock_irqsave(&pmu_lock, flags); - val = xscale2pmu_read_pmnc(); - val &= ~XSCALE_PMU_ENABLE; - xscale2pmu_write_pmnc(val); - spin_unlock_irqrestore(&pmu_lock, flags); -} - -static inline u32 -xscale2pmu_read_counter(int counter) -{ - u32 val = 0; - - switch (counter) { - case XSCALE_CYCLE_COUNTER: - asm volatile("mrc p14, 0, %0, c1, c1, 0" : "=r" (val)); - break; - case XSCALE_COUNTER0: - asm volatile("mrc p14, 0, %0, c0, c2, 0" : "=r" (val)); - break; - case XSCALE_COUNTER1: - asm volatile("mrc p14, 0, %0, c1, c2, 0" : "=r" (val)); - break; - case XSCALE_COUNTER2: - asm volatile("mrc p14, 0, %0, c2, c2, 0" : "=r" (val)); - break; - case XSCALE_COUNTER3: - asm volatile("mrc p14, 0, %0, c3, c2, 0" : "=r" (val)); - break; - } - - return val; -} - -static inline void -xscale2pmu_write_counter(int counter, u32 val) -{ - switch (counter) { - case XSCALE_CYCLE_COUNTER: - asm volatile("mcr p14, 0, %0, c1, c1, 0" : : "r" (val)); - break; - case XSCALE_COUNTER0: - asm volatile("mcr p14, 0, %0, c0, c2, 0" : : "r" (val)); - break; - case XSCALE_COUNTER1: - asm volatile("mcr p14, 0, %0, c1, c2, 0" : : "r" (val)); - break; - case XSCALE_COUNTER2: - asm volatile("mcr p14, 0, %0, c2, c2, 0" : : "r" (val)); - break; - case XSCALE_COUNTER3: - asm volatile("mcr p14, 0, %0, c3, c2, 0" : : "r" (val)); - break; - } -} - -static const struct arm_pmu xscale2pmu = { - .id = ARM_PERF_PMU_ID_XSCALE2, - .handle_irq = xscale2pmu_handle_irq, - .enable = xscale2pmu_enable_event, - .disable = xscale2pmu_disable_event, - .event_map = xscalepmu_event_map, - .raw_event = xscalepmu_raw_event, - .read_counter = xscale2pmu_read_counter, - .write_counter = xscale2pmu_write_counter, - .get_event_idx = xscale2pmu_get_event_idx, - .start = xscale2pmu_start, - .stop = xscale2pmu_stop, - .num_events = 5, - .max_period = (1LLU << 32) - 1, -}; +/* Include the PMU-specific implementations. */ +#include "perf_event_xscale.c" +#include "perf_event_v6.c" +#include "perf_event_v7.c" static int __init init_hw_perf_events(void) @@ -2977,37 +622,16 @@ init_hw_perf_events(void) case 0xB360: /* ARM1136 */ case 0xB560: /* ARM1156 */ case 0xB760: /* ARM1176 */ - armpmu = &armv6pmu; - memcpy(armpmu_perf_cache_map, armv6_perf_cache_map, - sizeof(armv6_perf_cache_map)); + armpmu = armv6pmu_init(); break; case 0xB020: /* ARM11mpcore */ - armpmu = &armv6mpcore_pmu; - memcpy(armpmu_perf_cache_map, - armv6mpcore_perf_cache_map, - sizeof(armv6mpcore_perf_cache_map)); + armpmu = armv6mpcore_pmu_init(); break; case 0xC080: /* Cortex-A8 */ - armv7pmu.id = ARM_PERF_PMU_ID_CA8; - memcpy(armpmu_perf_cache_map, armv7_a8_perf_cache_map, - sizeof(armv7_a8_perf_cache_map)); - armv7pmu.event_map = armv7_a8_pmu_event_map; - armpmu = &armv7pmu; - - /* Reset PMNC and read the nb of CNTx counters - supported */ - armv7pmu.num_events = armv7_reset_read_pmnc(); + armpmu = armv7_a8_pmu_init(); break; case 0xC090: /* Cortex-A9 */ - armv7pmu.id = ARM_PERF_PMU_ID_CA9; - memcpy(armpmu_perf_cache_map, armv7_a9_perf_cache_map, - sizeof(armv7_a9_perf_cache_map)); - armv7pmu.event_map = armv7_a9_pmu_event_map; - armpmu = &armv7pmu; - - /* Reset PMNC and read the nb of CNTx counters - supported */ - armv7pmu.num_events = armv7_reset_read_pmnc(); + armpmu = armv7_a9_pmu_init(); break; } /* Intel CPUs [xscale]. */ @@ -3015,21 +639,17 @@ init_hw_perf_events(void) part_number = (cpuid >> 13) & 0x7; switch (part_number) { case 1: - armpmu = &xscale1pmu; - memcpy(armpmu_perf_cache_map, xscale_perf_cache_map, - sizeof(xscale_perf_cache_map)); + armpmu = xscale1pmu_init(); break; case 2: - armpmu = &xscale2pmu; - memcpy(armpmu_perf_cache_map, xscale_perf_cache_map, - sizeof(xscale_perf_cache_map)); + armpmu = xscale2pmu_init(); break; } } if (armpmu) { pr_info("enabled with %s PMU driver, %d counters available\n", - arm_pmu_names[armpmu->id], armpmu->num_events); + armpmu->name, armpmu->num_events); } else { pr_info("no hardware support available\n"); } diff --git a/arch/arm/kernel/perf_event_v6.c b/arch/arm/kernel/perf_event_v6.c new file mode 100644 index 00000000000..7aeb07da907 --- /dev/null +++ b/arch/arm/kernel/perf_event_v6.c @@ -0,0 +1,672 @@ +/* + * ARMv6 Performance counter handling code. + * + * Copyright (C) 2009 picoChip Designs, Ltd., Jamie Iles + * + * ARMv6 has 2 configurable performance counters and a single cycle counter. + * They all share a single reset bit but can be written to zero so we can use + * that for a reset. + * + * The counters can't be individually enabled or disabled so when we remove + * one event and replace it with another we could get spurious counts from the + * wrong event. However, we can take advantage of the fact that the + * performance counters can export events to the event bus, and the event bus + * itself can be monitored. This requires that we *don't* export the events to + * the event bus. The procedure for disabling a configurable counter is: + * - change the counter to count the ETMEXTOUT[0] signal (0x20). This + * effectively stops the counter from counting. + * - disable the counter's interrupt generation (each counter has it's + * own interrupt enable bit). + * Once stopped, the counter value can be written as 0 to reset. + * + * To enable a counter: + * - enable the counter's interrupt generation. + * - set the new event type. + * + * Note: the dedicated cycle counter only counts cycles and can't be + * enabled/disabled independently of the others. When we want to disable the + * cycle counter, we have to just disable the interrupt reporting and start + * ignoring that counter. When re-enabling, we have to reset the value and + * enable the interrupt. + */ + +#ifdef CONFIG_CPU_V6 +enum armv6_perf_types { + ARMV6_PERFCTR_ICACHE_MISS = 0x0, + ARMV6_PERFCTR_IBUF_STALL = 0x1, + ARMV6_PERFCTR_DDEP_STALL = 0x2, + ARMV6_PERFCTR_ITLB_MISS = 0x3, + ARMV6_PERFCTR_DTLB_MISS = 0x4, + ARMV6_PERFCTR_BR_EXEC = 0x5, + ARMV6_PERFCTR_BR_MISPREDICT = 0x6, + ARMV6_PERFCTR_INSTR_EXEC = 0x7, + ARMV6_PERFCTR_DCACHE_HIT = 0x9, + ARMV6_PERFCTR_DCACHE_ACCESS = 0xA, + ARMV6_PERFCTR_DCACHE_MISS = 0xB, + ARMV6_PERFCTR_DCACHE_WBACK = 0xC, + ARMV6_PERFCTR_SW_PC_CHANGE = 0xD, + ARMV6_PERFCTR_MAIN_TLB_MISS = 0xF, + ARMV6_PERFCTR_EXPL_D_ACCESS = 0x10, + ARMV6_PERFCTR_LSU_FULL_STALL = 0x11, + ARMV6_PERFCTR_WBUF_DRAINED = 0x12, + ARMV6_PERFCTR_CPU_CYCLES = 0xFF, + ARMV6_PERFCTR_NOP = 0x20, +}; + +enum armv6_counters { + ARMV6_CYCLE_COUNTER = 1, + ARMV6_COUNTER0, + ARMV6_COUNTER1, +}; + +/* + * The hardware events that we support. We do support cache operations but + * we have harvard caches and no way to combine instruction and data + * accesses/misses in hardware. + */ +static const unsigned armv6_perf_map[PERF_COUNT_HW_MAX] = { + [PERF_COUNT_HW_CPU_CYCLES] = ARMV6_PERFCTR_CPU_CYCLES, + [PERF_COUNT_HW_INSTRUCTIONS] = ARMV6_PERFCTR_INSTR_EXEC, + [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED, + [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV6_PERFCTR_BR_EXEC, + [PERF_COUNT_HW_BRANCH_MISSES] = ARMV6_PERFCTR_BR_MISPREDICT, + [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED, +}; + +static const unsigned armv6_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { + [C(L1D)] = { + /* + * The performance counters don't differentiate between read + * and write accesses/misses so this isn't strictly correct, + * but it's the best we can do. Writes and reads get + * combined. + */ + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = ARMV6_PERFCTR_DCACHE_ACCESS, + [C(RESULT_MISS)] = ARMV6_PERFCTR_DCACHE_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = ARMV6_PERFCTR_DCACHE_ACCESS, + [C(RESULT_MISS)] = ARMV6_PERFCTR_DCACHE_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(L1I)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV6_PERFCTR_ICACHE_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV6_PERFCTR_ICACHE_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(LL)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(DTLB)] = { + /* + * The ARM performance counters can count micro DTLB misses, + * micro ITLB misses and main TLB misses. There isn't an event + * for TLB misses, so use the micro misses here and if users + * want the main TLB misses they can use a raw counter. + */ + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV6_PERFCTR_DTLB_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV6_PERFCTR_DTLB_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(ITLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV6_PERFCTR_ITLB_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV6_PERFCTR_ITLB_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(BPU)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, +}; + +enum armv6mpcore_perf_types { + ARMV6MPCORE_PERFCTR_ICACHE_MISS = 0x0, + ARMV6MPCORE_PERFCTR_IBUF_STALL = 0x1, + ARMV6MPCORE_PERFCTR_DDEP_STALL = 0x2, + ARMV6MPCORE_PERFCTR_ITLB_MISS = 0x3, + ARMV6MPCORE_PERFCTR_DTLB_MISS = 0x4, + ARMV6MPCORE_PERFCTR_BR_EXEC = 0x5, + ARMV6MPCORE_PERFCTR_BR_NOTPREDICT = 0x6, + ARMV6MPCORE_PERFCTR_BR_MISPREDICT = 0x7, + ARMV6MPCORE_PERFCTR_INSTR_EXEC = 0x8, + ARMV6MPCORE_PERFCTR_DCACHE_RDACCESS = 0xA, + ARMV6MPCORE_PERFCTR_DCACHE_RDMISS = 0xB, + ARMV6MPCORE_PERFCTR_DCACHE_WRACCESS = 0xC, + ARMV6MPCORE_PERFCTR_DCACHE_WRMISS = 0xD, + ARMV6MPCORE_PERFCTR_DCACHE_EVICTION = 0xE, + ARMV6MPCORE_PERFCTR_SW_PC_CHANGE = 0xF, + ARMV6MPCORE_PERFCTR_MAIN_TLB_MISS = 0x10, + ARMV6MPCORE_PERFCTR_EXPL_MEM_ACCESS = 0x11, + ARMV6MPCORE_PERFCTR_LSU_FULL_STALL = 0x12, + ARMV6MPCORE_PERFCTR_WBUF_DRAINED = 0x13, + ARMV6MPCORE_PERFCTR_CPU_CYCLES = 0xFF, +}; + +/* + * The hardware events that we support. We do support cache operations but + * we have harvard caches and no way to combine instruction and data + * accesses/misses in hardware. + */ +static const unsigned armv6mpcore_perf_map[PERF_COUNT_HW_MAX] = { + [PERF_COUNT_HW_CPU_CYCLES] = ARMV6MPCORE_PERFCTR_CPU_CYCLES, + [PERF_COUNT_HW_INSTRUCTIONS] = ARMV6MPCORE_PERFCTR_INSTR_EXEC, + [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED, + [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV6MPCORE_PERFCTR_BR_EXEC, + [PERF_COUNT_HW_BRANCH_MISSES] = ARMV6MPCORE_PERFCTR_BR_MISPREDICT, + [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED, +}; + +static const unsigned armv6mpcore_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { + [C(L1D)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = + ARMV6MPCORE_PERFCTR_DCACHE_RDACCESS, + [C(RESULT_MISS)] = + ARMV6MPCORE_PERFCTR_DCACHE_RDMISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = + ARMV6MPCORE_PERFCTR_DCACHE_WRACCESS, + [C(RESULT_MISS)] = + ARMV6MPCORE_PERFCTR_DCACHE_WRMISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(L1I)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ICACHE_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ICACHE_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(LL)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(DTLB)] = { + /* + * The ARM performance counters can count micro DTLB misses, + * micro ITLB misses and main TLB misses. There isn't an event + * for TLB misses, so use the micro misses here and if users + * want the main TLB misses they can use a raw counter. + */ + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_DTLB_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_DTLB_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(ITLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ITLB_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ITLB_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(BPU)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, +}; + +static inline unsigned long +armv6_pmcr_read(void) +{ + u32 val; + asm volatile("mrc p15, 0, %0, c15, c12, 0" : "=r"(val)); + return val; +} + +static inline void +armv6_pmcr_write(unsigned long val) +{ + asm volatile("mcr p15, 0, %0, c15, c12, 0" : : "r"(val)); +} + +#define ARMV6_PMCR_ENABLE (1 << 0) +#define ARMV6_PMCR_CTR01_RESET (1 << 1) +#define ARMV6_PMCR_CCOUNT_RESET (1 << 2) +#define ARMV6_PMCR_CCOUNT_DIV (1 << 3) +#define ARMV6_PMCR_COUNT0_IEN (1 << 4) +#define ARMV6_PMCR_COUNT1_IEN (1 << 5) +#define ARMV6_PMCR_CCOUNT_IEN (1 << 6) +#define ARMV6_PMCR_COUNT0_OVERFLOW (1 << 8) +#define ARMV6_PMCR_COUNT1_OVERFLOW (1 << 9) +#define ARMV6_PMCR_CCOUNT_OVERFLOW (1 << 10) +#define ARMV6_PMCR_EVT_COUNT0_SHIFT 20 +#define ARMV6_PMCR_EVT_COUNT0_MASK (0xFF << ARMV6_PMCR_EVT_COUNT0_SHIFT) +#define ARMV6_PMCR_EVT_COUNT1_SHIFT 12 +#define ARMV6_PMCR_EVT_COUNT1_MASK (0xFF << ARMV6_PMCR_EVT_COUNT1_SHIFT) + +#define ARMV6_PMCR_OVERFLOWED_MASK \ + (ARMV6_PMCR_COUNT0_OVERFLOW | ARMV6_PMCR_COUNT1_OVERFLOW | \ + ARMV6_PMCR_CCOUNT_OVERFLOW) + +static inline int +armv6_pmcr_has_overflowed(unsigned long pmcr) +{ + return pmcr & ARMV6_PMCR_OVERFLOWED_MASK; +} + +static inline int +armv6_pmcr_counter_has_overflowed(unsigned long pmcr, + enum armv6_counters counter) +{ + int ret = 0; + + if (ARMV6_CYCLE_COUNTER == counter) + ret = pmcr & ARMV6_PMCR_CCOUNT_OVERFLOW; + else if (ARMV6_COUNTER0 == counter) + ret = pmcr & ARMV6_PMCR_COUNT0_OVERFLOW; + else if (ARMV6_COUNTER1 == counter) + ret = pmcr & ARMV6_PMCR_COUNT1_OVERFLOW; + else + WARN_ONCE(1, "invalid counter number (%d)\n", counter); + + return ret; +} + +static inline u32 +armv6pmu_read_counter(int counter) +{ + unsigned long value = 0; + + if (ARMV6_CYCLE_COUNTER == counter) + asm volatile("mrc p15, 0, %0, c15, c12, 1" : "=r"(value)); + else if (ARMV6_COUNTER0 == counter) + asm volatile("mrc p15, 0, %0, c15, c12, 2" : "=r"(value)); + else if (ARMV6_COUNTER1 == counter) + asm volatile("mrc p15, 0, %0, c15, c12, 3" : "=r"(value)); + else + WARN_ONCE(1, "invalid counter number (%d)\n", counter); + + return value; +} + +static inline void +armv6pmu_write_counter(int counter, + u32 value) +{ + if (ARMV6_CYCLE_COUNTER == counter) + asm volatile("mcr p15, 0, %0, c15, c12, 1" : : "r"(value)); + else if (ARMV6_COUNTER0 == counter) + asm volatile("mcr p15, 0, %0, c15, c12, 2" : : "r"(value)); + else if (ARMV6_COUNTER1 == counter) + asm volatile("mcr p15, 0, %0, c15, c12, 3" : : "r"(value)); + else + WARN_ONCE(1, "invalid counter number (%d)\n", counter); +} + +void +armv6pmu_enable_event(struct hw_perf_event *hwc, + int idx) +{ + unsigned long val, mask, evt, flags; + + if (ARMV6_CYCLE_COUNTER == idx) { + mask = 0; + evt = ARMV6_PMCR_CCOUNT_IEN; + } else if (ARMV6_COUNTER0 == idx) { + mask = ARMV6_PMCR_EVT_COUNT0_MASK; + evt = (hwc->config_base << ARMV6_PMCR_EVT_COUNT0_SHIFT) | + ARMV6_PMCR_COUNT0_IEN; + } else if (ARMV6_COUNTER1 == idx) { + mask = ARMV6_PMCR_EVT_COUNT1_MASK; + evt = (hwc->config_base << ARMV6_PMCR_EVT_COUNT1_SHIFT) | + ARMV6_PMCR_COUNT1_IEN; + } else { + WARN_ONCE(1, "invalid counter number (%d)\n", idx); + return; + } + + /* + * Mask out the current event and set the counter to count the event + * that we're interested in. + */ + spin_lock_irqsave(&pmu_lock, flags); + val = armv6_pmcr_read(); + val &= ~mask; + val |= evt; + armv6_pmcr_write(val); + spin_unlock_irqrestore(&pmu_lock, flags); +} + +static irqreturn_t +armv6pmu_handle_irq(int irq_num, + void *dev) +{ + unsigned long pmcr = armv6_pmcr_read(); + struct perf_sample_data data; + struct cpu_hw_events *cpuc; + struct pt_regs *regs; + int idx; + + if (!armv6_pmcr_has_overflowed(pmcr)) + return IRQ_NONE; + + regs = get_irq_regs(); + + /* + * The interrupts are cleared by writing the overflow flags back to + * the control register. All of the other bits don't have any effect + * if they are rewritten, so write the whole value back. + */ + armv6_pmcr_write(pmcr); + + perf_sample_data_init(&data, 0); + + cpuc = &__get_cpu_var(cpu_hw_events); + for (idx = 0; idx <= armpmu->num_events; ++idx) { + struct perf_event *event = cpuc->events[idx]; + struct hw_perf_event *hwc; + + if (!test_bit(idx, cpuc->active_mask)) + continue; + + /* + * We have a single interrupt for all counters. Check that + * each counter has overflowed before we process it. + */ + if (!armv6_pmcr_counter_has_overflowed(pmcr, idx)) + continue; + + hwc = &event->hw; + armpmu_event_update(event, hwc, idx); + data.period = event->hw.last_period; + if (!armpmu_event_set_period(event, hwc, idx)) + continue; + + if (perf_event_overflow(event, 0, &data, regs)) + armpmu->disable(hwc, idx); + } + + /* + * Handle the pending perf events. + * + * Note: this call *must* be run with interrupts disabled. For + * platforms that can have the PMU interrupts raised as an NMI, this + * will not work. + */ + irq_work_run(); + + return IRQ_HANDLED; +} + +static void +armv6pmu_start(void) +{ + unsigned long flags, val; + + spin_lock_irqsave(&pmu_lock, flags); + val = armv6_pmcr_read(); + val |= ARMV6_PMCR_ENABLE; + armv6_pmcr_write(val); + spin_unlock_irqrestore(&pmu_lock, flags); +} + +static void +armv6pmu_stop(void) +{ + unsigned long flags, val; + + spin_lock_irqsave(&pmu_lock, flags); + val = armv6_pmcr_read(); + val &= ~ARMV6_PMCR_ENABLE; + armv6_pmcr_write(val); + spin_unlock_irqrestore(&pmu_lock, flags); +} + +static int +armv6pmu_get_event_idx(struct cpu_hw_events *cpuc, + struct hw_perf_event *event) +{ + /* Always place a cycle counter into the cycle counter. */ + if (ARMV6_PERFCTR_CPU_CYCLES == event->config_base) { + if (test_and_set_bit(ARMV6_CYCLE_COUNTER, cpuc->used_mask)) + return -EAGAIN; + + return ARMV6_CYCLE_COUNTER; + } else { + /* + * For anything other than a cycle counter, try and use + * counter0 and counter1. + */ + if (!test_and_set_bit(ARMV6_COUNTER1, cpuc->used_mask)) + return ARMV6_COUNTER1; + + if (!test_and_set_bit(ARMV6_COUNTER0, cpuc->used_mask)) + return ARMV6_COUNTER0; + + /* The counters are all in use. */ + return -EAGAIN; + } +} + +static void +armv6pmu_disable_event(struct hw_perf_event *hwc, + int idx) +{ + unsigned long val, mask, evt, flags; + + if (ARMV6_CYCLE_COUNTER == idx) { + mask = ARMV6_PMCR_CCOUNT_IEN; + evt = 0; + } else if (ARMV6_COUNTER0 == idx) { + mask = ARMV6_PMCR_COUNT0_IEN | ARMV6_PMCR_EVT_COUNT0_MASK; + evt = ARMV6_PERFCTR_NOP << ARMV6_PMCR_EVT_COUNT0_SHIFT; + } else if (ARMV6_COUNTER1 == idx) { + mask = ARMV6_PMCR_COUNT1_IEN | ARMV6_PMCR_EVT_COUNT1_MASK; + evt = ARMV6_PERFCTR_NOP << ARMV6_PMCR_EVT_COUNT1_SHIFT; + } else { + WARN_ONCE(1, "invalid counter number (%d)\n", idx); + return; + } + + /* + * Mask out the current event and set the counter to count the number + * of ETM bus signal assertion cycles. The external reporting should + * be disabled and so this should never increment. + */ + spin_lock_irqsave(&pmu_lock, flags); + val = armv6_pmcr_read(); + val &= ~mask; + val |= evt; + armv6_pmcr_write(val); + spin_unlock_irqrestore(&pmu_lock, flags); +} + +static void +armv6mpcore_pmu_disable_event(struct hw_perf_event *hwc, + int idx) +{ + unsigned long val, mask, flags, evt = 0; + + if (ARMV6_CYCLE_COUNTER == idx) { + mask = ARMV6_PMCR_CCOUNT_IEN; + } else if (ARMV6_COUNTER0 == idx) { + mask = ARMV6_PMCR_COUNT0_IEN; + } else if (ARMV6_COUNTER1 == idx) { + mask = ARMV6_PMCR_COUNT1_IEN; + } else { + WARN_ONCE(1, "invalid counter number (%d)\n", idx); + return; + } + + /* + * Unlike UP ARMv6, we don't have a way of stopping the counters. We + * simply disable the interrupt reporting. + */ + spin_lock_irqsave(&pmu_lock, flags); + val = armv6_pmcr_read(); + val &= ~mask; + val |= evt; + armv6_pmcr_write(val); + spin_unlock_irqrestore(&pmu_lock, flags); +} + +static const struct arm_pmu armv6pmu = { + .id = ARM_PERF_PMU_ID_V6, + .name = "v6", + .handle_irq = armv6pmu_handle_irq, + .enable = armv6pmu_enable_event, + .disable = armv6pmu_disable_event, + .read_counter = armv6pmu_read_counter, + .write_counter = armv6pmu_write_counter, + .get_event_idx = armv6pmu_get_event_idx, + .start = armv6pmu_start, + .stop = armv6pmu_stop, + .cache_map = &armv6_perf_cache_map, + .event_map = &armv6_perf_map, + .raw_event_mask = 0xFF, + .num_events = 3, + .max_period = (1LLU << 32) - 1, +}; + +const struct arm_pmu *__init armv6pmu_init(void) +{ + return &armv6pmu; +} + +/* + * ARMv6mpcore is almost identical to single core ARMv6 with the exception + * that some of the events have different enumerations and that there is no + * *hack* to stop the programmable counters. To stop the counters we simply + * disable the interrupt reporting and update the event. When unthrottling we + * reset the period and enable the interrupt reporting. + */ +static const struct arm_pmu armv6mpcore_pmu = { + .id = ARM_PERF_PMU_ID_V6MP, + .name = "v6mpcore", + .handle_irq = armv6pmu_handle_irq, + .enable = armv6pmu_enable_event, + .disable = armv6mpcore_pmu_disable_event, + .read_counter = armv6pmu_read_counter, + .write_counter = armv6pmu_write_counter, + .get_event_idx = armv6pmu_get_event_idx, + .start = armv6pmu_start, + .stop = armv6pmu_stop, + .cache_map = &armv6mpcore_perf_cache_map, + .event_map = &armv6mpcore_perf_map, + .raw_event_mask = 0xFF, + .num_events = 3, + .max_period = (1LLU << 32) - 1, +}; + +const struct arm_pmu *__init armv6mpcore_pmu_init(void) +{ + return &armv6mpcore_pmu; +} +#else +const struct arm_pmu *__init armv6pmu_init(void) +{ + return NULL; +} + +const struct arm_pmu *__init armv6mpcore_pmu_init(void) +{ + return NULL; +} +#endif /* CONFIG_CPU_V6 */ diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c new file mode 100644 index 00000000000..4d0423969df --- /dev/null +++ b/arch/arm/kernel/perf_event_v7.c @@ -0,0 +1,906 @@ +/* + * ARMv7 Cortex-A8 and Cortex-A9 Performance Events handling code. + * + * ARMv7 support: Jean Pihet <jpihet@mvista.com> + * 2010 (c) MontaVista Software, LLC. + * + * Copied from ARMv6 code, with the low level code inspired + * by the ARMv7 Oprofile code. + * + * Cortex-A8 has up to 4 configurable performance counters and + * a single cycle counter. + * Cortex-A9 has up to 31 configurable performance counters and + * a single cycle counter. + * + * All counters can be enabled/disabled and IRQ masked separately. The cycle + * counter and all 4 performance counters together can be reset separately. + */ + +#ifdef CONFIG_CPU_V7 +/* Common ARMv7 event types */ +enum armv7_perf_types { + ARMV7_PERFCTR_PMNC_SW_INCR = 0x00, + ARMV7_PERFCTR_IFETCH_MISS = 0x01, + ARMV7_PERFCTR_ITLB_MISS = 0x02, + ARMV7_PERFCTR_DCACHE_REFILL = 0x03, + ARMV7_PERFCTR_DCACHE_ACCESS = 0x04, + ARMV7_PERFCTR_DTLB_REFILL = 0x05, + ARMV7_PERFCTR_DREAD = 0x06, + ARMV7_PERFCTR_DWRITE = 0x07, + + ARMV7_PERFCTR_EXC_TAKEN = 0x09, + ARMV7_PERFCTR_EXC_EXECUTED = 0x0A, + ARMV7_PERFCTR_CID_WRITE = 0x0B, + /* ARMV7_PERFCTR_PC_WRITE is equivalent to HW_BRANCH_INSTRUCTIONS. + * It counts: + * - all branch instructions, + * - instructions that explicitly write the PC, + * - exception generating instructions. + */ + ARMV7_PERFCTR_PC_WRITE = 0x0C, + ARMV7_PERFCTR_PC_IMM_BRANCH = 0x0D, + ARMV7_PERFCTR_UNALIGNED_ACCESS = 0x0F, + ARMV7_PERFCTR_PC_BRANCH_MIS_PRED = 0x10, + ARMV7_PERFCTR_CLOCK_CYCLES = 0x11, + + ARMV7_PERFCTR_PC_BRANCH_MIS_USED = 0x12, + + ARMV7_PERFCTR_CPU_CYCLES = 0xFF +}; + +/* ARMv7 Cortex-A8 specific event types */ +enum armv7_a8_perf_types { + ARMV7_PERFCTR_INSTR_EXECUTED = 0x08, + + ARMV7_PERFCTR_PC_PROC_RETURN = 0x0E, + + ARMV7_PERFCTR_WRITE_BUFFER_FULL = 0x40, + ARMV7_PERFCTR_L2_STORE_MERGED = 0x41, + ARMV7_PERFCTR_L2_STORE_BUFF = 0x42, + ARMV7_PERFCTR_L2_ACCESS = 0x43, + ARMV7_PERFCTR_L2_CACH_MISS = 0x44, + ARMV7_PERFCTR_AXI_READ_CYCLES = 0x45, + ARMV7_PERFCTR_AXI_WRITE_CYCLES = 0x46, + ARMV7_PERFCTR_MEMORY_REPLAY = 0x47, + ARMV7_PERFCTR_UNALIGNED_ACCESS_REPLAY = 0x48, + ARMV7_PERFCTR_L1_DATA_MISS = 0x49, + ARMV7_PERFCTR_L1_INST_MISS = 0x4A, + ARMV7_PERFCTR_L1_DATA_COLORING = 0x4B, + ARMV7_PERFCTR_L1_NEON_DATA = 0x4C, + ARMV7_PERFCTR_L1_NEON_CACH_DATA = 0x4D, + ARMV7_PERFCTR_L2_NEON = 0x4E, + ARMV7_PERFCTR_L2_NEON_HIT = 0x4F, + ARMV7_PERFCTR_L1_INST = 0x50, + ARMV7_PERFCTR_PC_RETURN_MIS_PRED = 0x51, + ARMV7_PERFCTR_PC_BRANCH_FAILED = 0x52, + ARMV7_PERFCTR_PC_BRANCH_TAKEN = 0x53, + ARMV7_PERFCTR_PC_BRANCH_EXECUTED = 0x54, + ARMV7_PERFCTR_OP_EXECUTED = 0x55, + ARMV7_PERFCTR_CYCLES_INST_STALL = 0x56, + ARMV7_PERFCTR_CYCLES_INST = 0x57, + ARMV7_PERFCTR_CYCLES_NEON_DATA_STALL = 0x58, + ARMV7_PERFCTR_CYCLES_NEON_INST_STALL = 0x59, + ARMV7_PERFCTR_NEON_CYCLES = 0x5A, + + ARMV7_PERFCTR_PMU0_EVENTS = 0x70, + ARMV7_PERFCTR_PMU1_EVENTS = 0x71, + ARMV7_PERFCTR_PMU_EVENTS = 0x72, +}; + +/* ARMv7 Cortex-A9 specific event types */ +enum armv7_a9_perf_types { + ARMV7_PERFCTR_JAVA_HW_BYTECODE_EXEC = 0x40, + ARMV7_PERFCTR_JAVA_SW_BYTECODE_EXEC = 0x41, + ARMV7_PERFCTR_JAZELLE_BRANCH_EXEC = 0x42, + + ARMV7_PERFCTR_COHERENT_LINE_MISS = 0x50, + ARMV7_PERFCTR_COHERENT_LINE_HIT = 0x51, + + ARMV7_PERFCTR_ICACHE_DEP_STALL_CYCLES = 0x60, + ARMV7_PERFCTR_DCACHE_DEP_STALL_CYCLES = 0x61, + ARMV7_PERFCTR_TLB_MISS_DEP_STALL_CYCLES = 0x62, + ARMV7_PERFCTR_STREX_EXECUTED_PASSED = 0x63, + ARMV7_PERFCTR_STREX_EXECUTED_FAILED = 0x64, + ARMV7_PERFCTR_DATA_EVICTION = 0x65, + ARMV7_PERFCTR_ISSUE_STAGE_NO_INST = 0x66, + ARMV7_PERFCTR_ISSUE_STAGE_EMPTY = 0x67, + ARMV7_PERFCTR_INST_OUT_OF_RENAME_STAGE = 0x68, + + ARMV7_PERFCTR_PREDICTABLE_FUNCT_RETURNS = 0x6E, + + ARMV7_PERFCTR_MAIN_UNIT_EXECUTED_INST = 0x70, + ARMV7_PERFCTR_SECOND_UNIT_EXECUTED_INST = 0x71, + ARMV7_PERFCTR_LD_ST_UNIT_EXECUTED_INST = 0x72, + ARMV7_PERFCTR_FP_EXECUTED_INST = 0x73, + ARMV7_PERFCTR_NEON_EXECUTED_INST = 0x74, + + ARMV7_PERFCTR_PLD_FULL_DEP_STALL_CYCLES = 0x80, + ARMV7_PERFCTR_DATA_WR_DEP_STALL_CYCLES = 0x81, + ARMV7_PERFCTR_ITLB_MISS_DEP_STALL_CYCLES = 0x82, + ARMV7_PERFCTR_DTLB_MISS_DEP_STALL_CYCLES = 0x83, + ARMV7_PERFCTR_MICRO_ITLB_MISS_DEP_STALL_CYCLES = 0x84, + ARMV7_PERFCTR_MICRO_DTLB_MISS_DEP_STALL_CYCLES = 0x85, + ARMV7_PERFCTR_DMB_DEP_STALL_CYCLES = 0x86, + + ARMV7_PERFCTR_INTGR_CLK_ENABLED_CYCLES = 0x8A, + ARMV7_PERFCTR_DATA_ENGINE_CLK_EN_CYCLES = 0x8B, + + ARMV7_PERFCTR_ISB_INST = 0x90, + ARMV7_PERFCTR_DSB_INST = 0x91, + ARMV7_PERFCTR_DMB_INST = 0x92, + ARMV7_PERFCTR_EXT_INTERRUPTS = 0x93, + + ARMV7_PERFCTR_PLE_CACHE_LINE_RQST_COMPLETED = 0xA0, + ARMV7_PERFCTR_PLE_CACHE_LINE_RQST_SKIPPED = 0xA1, + ARMV7_PERFCTR_PLE_FIFO_FLUSH = 0xA2, + ARMV7_PERFCTR_PLE_RQST_COMPLETED = 0xA3, + ARMV7_PERFCTR_PLE_FIFO_OVERFLOW = 0xA4, + ARMV7_PERFCTR_PLE_RQST_PROG = 0xA5 +}; + +/* + * Cortex-A8 HW events mapping + * + * The hardware events that we support. We do support cache operations but + * we have harvard caches and no way to combine instruction and data + * accesses/misses in hardware. + */ +static const unsigned armv7_a8_perf_map[PERF_COUNT_HW_MAX] = { + [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES, + [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED, + [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED, + [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE, + [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, + [PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_CLOCK_CYCLES, +}; + +static const unsigned armv7_a8_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { + [C(L1D)] = { + /* + * The performance counters don't differentiate between read + * and write accesses/misses so this isn't strictly correct, + * but it's the best we can do. Writes and reads get + * combined. + */ + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_DCACHE_ACCESS, + [C(RESULT_MISS)] = ARMV7_PERFCTR_DCACHE_REFILL, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_DCACHE_ACCESS, + [C(RESULT_MISS)] = ARMV7_PERFCTR_DCACHE_REFILL, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(L1I)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_INST, + [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_INST_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_INST, + [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_INST_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(LL)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L2_ACCESS, + [C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACH_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L2_ACCESS, + [C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACH_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(DTLB)] = { + /* + * Only ITLB misses and DTLB refills are supported. + * If users want the DTLB refills misses a raw counter + * must be used. + */ + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(ITLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(BPU)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_WRITE, + [C(RESULT_MISS)] + = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_WRITE, + [C(RESULT_MISS)] + = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, +}; + +/* + * Cortex-A9 HW events mapping + */ +static const unsigned armv7_a9_perf_map[PERF_COUNT_HW_MAX] = { + [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES, + [PERF_COUNT_HW_INSTRUCTIONS] = + ARMV7_PERFCTR_INST_OUT_OF_RENAME_STAGE, + [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV7_PERFCTR_COHERENT_LINE_HIT, + [PERF_COUNT_HW_CACHE_MISSES] = ARMV7_PERFCTR_COHERENT_LINE_MISS, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE, + [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, + [PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_CLOCK_CYCLES, +}; + +static const unsigned armv7_a9_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { + [C(L1D)] = { + /* + * The performance counters don't differentiate between read + * and write accesses/misses so this isn't strictly correct, + * but it's the best we can do. Writes and reads get + * combined. + */ + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_DCACHE_ACCESS, + [C(RESULT_MISS)] = ARMV7_PERFCTR_DCACHE_REFILL, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_DCACHE_ACCESS, + [C(RESULT_MISS)] = ARMV7_PERFCTR_DCACHE_REFILL, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(L1I)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV7_PERFCTR_IFETCH_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV7_PERFCTR_IFETCH_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(LL)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(DTLB)] = { + /* + * Only ITLB misses and DTLB refills are supported. + * If users want the DTLB refills misses a raw counter + * must be used. + */ + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(ITLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(BPU)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_WRITE, + [C(RESULT_MISS)] + = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_WRITE, + [C(RESULT_MISS)] + = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, +}; + +/* + * Perf Events counters + */ +enum armv7_counters { + ARMV7_CYCLE_COUNTER = 1, /* Cycle counter */ + ARMV7_COUNTER0 = 2, /* First event counter */ +}; + +/* + * The cycle counter is ARMV7_CYCLE_COUNTER. + * The first event counter is ARMV7_COUNTER0. + * The last event counter is (ARMV7_COUNTER0 + armpmu->num_events - 1). + */ +#define ARMV7_COUNTER_LAST (ARMV7_COUNTER0 + armpmu->num_events - 1) + +/* + * ARMv7 low level PMNC access + */ + +/* + * Per-CPU PMNC: config reg + */ +#define ARMV7_PMNC_E (1 << 0) /* Enable all counters */ +#define ARMV7_PMNC_P (1 << 1) /* Reset all counters */ +#define ARMV7_PMNC_C (1 << 2) /* Cycle counter reset */ +#define ARMV7_PMNC_D (1 << 3) /* CCNT counts every 64th cpu cycle */ +#define ARMV7_PMNC_X (1 << 4) /* Export to ETM */ +#define ARMV7_PMNC_DP (1 << 5) /* Disable CCNT if non-invasive debug*/ +#define ARMV7_PMNC_N_SHIFT 11 /* Number of counters supported */ +#define ARMV7_PMNC_N_MASK 0x1f +#define ARMV7_PMNC_MASK 0x3f /* Mask for writable bits */ + +/* + * Available counters + */ +#define ARMV7_CNT0 0 /* First event counter */ +#define ARMV7_CCNT 31 /* Cycle counter */ + +/* Perf Event to low level counters mapping */ +#define ARMV7_EVENT_CNT_TO_CNTx (ARMV7_COUNTER0 - ARMV7_CNT0) + +/* + * CNTENS: counters enable reg + */ +#define ARMV7_CNTENS_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx)) +#define ARMV7_CNTENS_C (1 << ARMV7_CCNT) + +/* + * CNTENC: counters disable reg + */ +#define ARMV7_CNTENC_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx)) +#define ARMV7_CNTENC_C (1 << ARMV7_CCNT) + +/* + * INTENS: counters overflow interrupt enable reg + */ +#define ARMV7_INTENS_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx)) +#define ARMV7_INTENS_C (1 << ARMV7_CCNT) + +/* + * INTENC: counters overflow interrupt disable reg + */ +#define ARMV7_INTENC_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx)) +#define ARMV7_INTENC_C (1 << ARMV7_CCNT) + +/* + * EVTSEL: Event selection reg + */ +#define ARMV7_EVTSEL_MASK 0xff /* Mask for writable bits */ + +/* + * SELECT: Counter selection reg + */ +#define ARMV7_SELECT_MASK 0x1f /* Mask for writable bits */ + +/* + * FLAG: counters overflow flag status reg + */ +#define ARMV7_FLAG_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx)) +#define ARMV7_FLAG_C (1 << ARMV7_CCNT) +#define ARMV7_FLAG_MASK 0xffffffff /* Mask for writable bits */ +#define ARMV7_OVERFLOWED_MASK ARMV7_FLAG_MASK + +static inline unsigned long armv7_pmnc_read(void) +{ + u32 val; + asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r"(val)); + return val; +} + +static inline void armv7_pmnc_write(unsigned long val) +{ + val &= ARMV7_PMNC_MASK; + asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r"(val)); +} + +static inline int armv7_pmnc_has_overflowed(unsigned long pmnc) +{ + return pmnc & ARMV7_OVERFLOWED_MASK; +} + +static inline int armv7_pmnc_counter_has_overflowed(unsigned long pmnc, + enum armv7_counters counter) +{ + int ret = 0; + + if (counter == ARMV7_CYCLE_COUNTER) + ret = pmnc & ARMV7_FLAG_C; + else if ((counter >= ARMV7_COUNTER0) && (counter <= ARMV7_COUNTER_LAST)) + ret = pmnc & ARMV7_FLAG_P(counter); + else + pr_err("CPU%u checking wrong counter %d overflow status\n", + smp_processor_id(), counter); + + return ret; +} + +static inline int armv7_pmnc_select_counter(unsigned int idx) +{ + u32 val; + + if ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST)) { + pr_err("CPU%u selecting wrong PMNC counter" + " %d\n", smp_processor_id(), idx); + return -1; + } + + val = (idx - ARMV7_EVENT_CNT_TO_CNTx) & ARMV7_SELECT_MASK; + asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (val)); + + return idx; +} + +static inline u32 armv7pmu_read_counter(int idx) +{ + unsigned long value = 0; + + if (idx == ARMV7_CYCLE_COUNTER) + asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (value)); + else if ((idx >= ARMV7_COUNTER0) && (idx <= ARMV7_COUNTER_LAST)) { + if (armv7_pmnc_select_counter(idx) == idx) + asm volatile("mrc p15, 0, %0, c9, c13, 2" + : "=r" (value)); + } else + pr_err("CPU%u reading wrong counter %d\n", + smp_processor_id(), idx); + + return value; +} + +static inline void armv7pmu_write_counter(int idx, u32 value) +{ + if (idx == ARMV7_CYCLE_COUNTER) + asm volatile("mcr p15, 0, %0, c9, c13, 0" : : "r" (value)); + else if ((idx >= ARMV7_COUNTER0) && (idx <= ARMV7_COUNTER_LAST)) { + if (armv7_pmnc_select_counter(idx) == idx) + asm volatile("mcr p15, 0, %0, c9, c13, 2" + : : "r" (value)); + } else + pr_err("CPU%u writing wrong counter %d\n", + smp_processor_id(), idx); +} + +static inline void armv7_pmnc_write_evtsel(unsigned int idx, u32 val) +{ + if (armv7_pmnc_select_counter(idx) == idx) { + val &= ARMV7_EVTSEL_MASK; + asm volatile("mcr p15, 0, %0, c9, c13, 1" : : "r" (val)); + } +} + +static inline u32 armv7_pmnc_enable_counter(unsigned int idx) +{ + u32 val; + + if ((idx != ARMV7_CYCLE_COUNTER) && + ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) { + pr_err("CPU%u enabling wrong PMNC counter" + " %d\n", smp_processor_id(), idx); + return -1; + } + + if (idx == ARMV7_CYCLE_COUNTER) + val = ARMV7_CNTENS_C; + else + val = ARMV7_CNTENS_P(idx); + + asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (val)); + + return idx; +} + +static inline u32 armv7_pmnc_disable_counter(unsigned int idx) +{ + u32 val; + + + if ((idx != ARMV7_CYCLE_COUNTER) && + ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) { + pr_err("CPU%u disabling wrong PMNC counter" + " %d\n", smp_processor_id(), idx); + return -1; + } + + if (idx == ARMV7_CYCLE_COUNTER) + val = ARMV7_CNTENC_C; + else + val = ARMV7_CNTENC_P(idx); + + asm volatile("mcr p15, 0, %0, c9, c12, 2" : : "r" (val)); + + return idx; +} + +static inline u32 armv7_pmnc_enable_intens(unsigned int idx) +{ + u32 val; + + if ((idx != ARMV7_CYCLE_COUNTER) && + ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) { + pr_err("CPU%u enabling wrong PMNC counter" + " interrupt enable %d\n", smp_processor_id(), idx); + return -1; + } + + if (idx == ARMV7_CYCLE_COUNTER) + val = ARMV7_INTENS_C; + else + val = ARMV7_INTENS_P(idx); + + asm volatile("mcr p15, 0, %0, c9, c14, 1" : : "r" (val)); + + return idx; +} + +static inline u32 armv7_pmnc_disable_intens(unsigned int idx) +{ + u32 val; + + if ((idx != ARMV7_CYCLE_COUNTER) && + ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) { + pr_err("CPU%u disabling wrong PMNC counter" + " interrupt enable %d\n", smp_processor_id(), idx); + return -1; + } + + if (idx == ARMV7_CYCLE_COUNTER) + val = ARMV7_INTENC_C; + else + val = ARMV7_INTENC_P(idx); + + asm volatile("mcr p15, 0, %0, c9, c14, 2" : : "r" (val)); + + return idx; +} + +static inline u32 armv7_pmnc_getreset_flags(void) +{ + u32 val; + + /* Read */ + asm volatile("mrc p15, 0, %0, c9, c12, 3" : "=r" (val)); + + /* Write to clear flags */ + val &= ARMV7_FLAG_MASK; + asm volatile("mcr p15, 0, %0, c9, c12, 3" : : "r" (val)); + + return val; +} + +#ifdef DEBUG +static void armv7_pmnc_dump_regs(void) +{ + u32 val; + unsigned int cnt; + + printk(KERN_INFO "PMNC registers dump:\n"); + + asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (val)); + printk(KERN_INFO "PMNC =0x%08x\n", val); + + asm volatile("mrc p15, 0, %0, c9, c12, 1" : "=r" (val)); + printk(KERN_INFO "CNTENS=0x%08x\n", val); + + asm volatile("mrc p15, 0, %0, c9, c14, 1" : "=r" (val)); + printk(KERN_INFO "INTENS=0x%08x\n", val); + + asm volatile("mrc p15, 0, %0, c9, c12, 3" : "=r" (val)); + printk(KERN_INFO "FLAGS =0x%08x\n", val); + + asm volatile("mrc p15, 0, %0, c9, c12, 5" : "=r" (val)); + printk(KERN_INFO "SELECT=0x%08x\n", val); + + asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (val)); + printk(KERN_INFO "CCNT =0x%08x\n", val); + + for (cnt = ARMV7_COUNTER0; cnt < ARMV7_COUNTER_LAST; cnt++) { + armv7_pmnc_select_counter(cnt); + asm volatile("mrc p15, 0, %0, c9, c13, 2" : "=r" (val)); + printk(KERN_INFO "CNT[%d] count =0x%08x\n", + cnt-ARMV7_EVENT_CNT_TO_CNTx, val); + asm volatile("mrc p15, 0, %0, c9, c13, 1" : "=r" (val)); + printk(KERN_INFO "CNT[%d] evtsel=0x%08x\n", + cnt-ARMV7_EVENT_CNT_TO_CNTx, val); + } +} +#endif + +void armv7pmu_enable_event(struct hw_perf_event *hwc, int idx) +{ + unsigned long flags; + + /* + * Enable counter and interrupt, and set the counter to count + * the event that we're interested in. + */ + spin_lock_irqsave(&pmu_lock, flags); + + /* + * Disable counter + */ + armv7_pmnc_disable_counter(idx); + + /* + * Set event (if destined for PMNx counters) + * We don't need to set the event if it's a cycle count + */ + if (idx != ARMV7_CYCLE_COUNTER) + armv7_pmnc_write_evtsel(idx, hwc->config_base); + + /* + * Enable interrupt for this counter + */ + armv7_pmnc_enable_intens(idx); + + /* + * Enable counter + */ + armv7_pmnc_enable_counter(idx); + + spin_unlock_irqrestore(&pmu_lock, flags); +} + +static void armv7pmu_disable_event(struct hw_perf_event *hwc, int idx) +{ + unsigned long flags; + + /* + * Disable counter and interrupt + */ + spin_lock_irqsave(&pmu_lock, flags); + + /* + * Disable counter + */ + armv7_pmnc_disable_counter(idx); + + /* + * Disable interrupt for this counter + */ + armv7_pmnc_disable_intens(idx); + + spin_unlock_irqrestore(&pmu_lock, flags); +} + +static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev) +{ + unsigned long pmnc; + struct perf_sample_data data; + struct cpu_hw_events *cpuc; + struct pt_regs *regs; + int idx; + + /* + * Get and reset the IRQ flags + */ + pmnc = armv7_pmnc_getreset_flags(); + + /* + * Did an overflow occur? + */ + if (!armv7_pmnc_has_overflowed(pmnc)) + return IRQ_NONE; + + /* + * Handle the counter(s) overflow(s) + */ + regs = get_irq_regs(); + + perf_sample_data_init(&data, 0); + + cpuc = &__get_cpu_var(cpu_hw_events); + for (idx = 0; idx <= armpmu->num_events; ++idx) { + struct perf_event *event = cpuc->events[idx]; + struct hw_perf_event *hwc; + + if (!test_bit(idx, cpuc->active_mask)) + continue; + + /* + * We have a single interrupt for all counters. Check that + * each counter has overflowed before we process it. + */ + if (!armv7_pmnc_counter_has_overflowed(pmnc, idx)) + continue; + + hwc = &event->hw; + armpmu_event_update(event, hwc, idx); + data.period = event->hw.last_period; + if (!armpmu_event_set_period(event, hwc, idx)) + continue; + + if (perf_event_overflow(event, 0, &data, regs)) + armpmu->disable(hwc, idx); + } + + /* + * Handle the pending perf events. + * + * Note: this call *must* be run with interrupts disabled. For + * platforms that can have the PMU interrupts raised as an NMI, this + * will not work. + */ + irq_work_run(); + + return IRQ_HANDLED; +} + +static void armv7pmu_start(void) +{ + unsigned long flags; + + spin_lock_irqsave(&pmu_lock, flags); + /* Enable all counters */ + armv7_pmnc_write(armv7_pmnc_read() | ARMV7_PMNC_E); + spin_unlock_irqrestore(&pmu_lock, flags); +} + +static void armv7pmu_stop(void) +{ + unsigned long flags; + + spin_lock_irqsave(&pmu_lock, flags); + /* Disable all counters */ + armv7_pmnc_write(armv7_pmnc_read() & ~ARMV7_PMNC_E); + spin_unlock_irqrestore(&pmu_lock, flags); +} + +static int armv7pmu_get_event_idx(struct cpu_hw_events *cpuc, + struct hw_perf_event *event) +{ + int idx; + + /* Always place a cycle counter into the cycle counter. */ + if (event->config_base == ARMV7_PERFCTR_CPU_CYCLES) { + if (test_and_set_bit(ARMV7_CYCLE_COUNTER, cpuc->used_mask)) + return -EAGAIN; + + return ARMV7_CYCLE_COUNTER; + } else { + /* + * For anything other than a cycle counter, try and use + * the events counters + */ + for (idx = ARMV7_COUNTER0; idx <= armpmu->num_events; ++idx) { + if (!test_and_set_bit(idx, cpuc->used_mask)) + return idx; + } + + /* The counters are all in use. */ + return -EAGAIN; + } +} + +static struct arm_pmu armv7pmu = { + .handle_irq = armv7pmu_handle_irq, + .enable = armv7pmu_enable_event, + .disable = armv7pmu_disable_event, + .read_counter = armv7pmu_read_counter, + .write_counter = armv7pmu_write_counter, + .get_event_idx = armv7pmu_get_event_idx, + .start = armv7pmu_start, + .stop = armv7pmu_stop, + .raw_event_mask = 0xFF, + .max_period = (1LLU << 32) - 1, +}; + +static u32 __init armv7_reset_read_pmnc(void) +{ + u32 nb_cnt; + + /* Initialize & Reset PMNC: C and P bits */ + armv7_pmnc_write(ARMV7_PMNC_P | ARMV7_PMNC_C); + + /* Read the nb of CNTx counters supported from PMNC */ + nb_cnt = (armv7_pmnc_read() >> ARMV7_PMNC_N_SHIFT) & ARMV7_PMNC_N_MASK; + + /* Add the CPU cycles counter and return */ + return nb_cnt + 1; +} + +const struct arm_pmu *__init armv7_a8_pmu_init(void) +{ + armv7pmu.id = ARM_PERF_PMU_ID_CA8; + armv7pmu.name = "ARMv7 Cortex-A8"; + armv7pmu.cache_map = &armv7_a8_perf_cache_map; + armv7pmu.event_map = &armv7_a8_perf_map; + armv7pmu.num_events = armv7_reset_read_pmnc(); + return &armv7pmu; +} + +const struct arm_pmu *__init armv7_a9_pmu_init(void) +{ + armv7pmu.id = ARM_PERF_PMU_ID_CA9; + armv7pmu.name = "ARMv7 Cortex-A9"; + armv7pmu.cache_map = &armv7_a9_perf_cache_map; + armv7pmu.event_map = &armv7_a9_perf_map; + armv7pmu.num_events = armv7_reset_read_pmnc(); + return &armv7pmu; +} +#else +const struct arm_pmu *__init armv7_a8_pmu_init(void) +{ + return NULL; +} + +const struct arm_pmu *__init armv7_a9_pmu_init(void) +{ + return NULL; +} +#endif /* CONFIG_CPU_V7 */ diff --git a/arch/arm/kernel/perf_event_xscale.c b/arch/arm/kernel/perf_event_xscale.c new file mode 100644 index 00000000000..4e9592789d4 --- /dev/null +++ b/arch/arm/kernel/perf_event_xscale.c @@ -0,0 +1,807 @@ +/* + * ARMv5 [xscale] Performance counter handling code. + * + * Copyright (C) 2010, ARM Ltd., Will Deacon <will.deacon@arm.com> + * + * Based on the previous xscale OProfile code. + * + * There are two variants of the xscale PMU that we support: + * - xscale1pmu: 2 event counters and a cycle counter + * - xscale2pmu: 4 event counters and a cycle counter + * The two variants share event definitions, but have different + * PMU structures. + */ + +#ifdef CONFIG_CPU_XSCALE +enum xscale_perf_types { + XSCALE_PERFCTR_ICACHE_MISS = 0x00, + XSCALE_PERFCTR_ICACHE_NO_DELIVER = 0x01, + XSCALE_PERFCTR_DATA_STALL = 0x02, + XSCALE_PERFCTR_ITLB_MISS = 0x03, + XSCALE_PERFCTR_DTLB_MISS = 0x04, + XSCALE_PERFCTR_BRANCH = 0x05, + XSCALE_PERFCTR_BRANCH_MISS = 0x06, + XSCALE_PERFCTR_INSTRUCTION = 0x07, + XSCALE_PERFCTR_DCACHE_FULL_STALL = 0x08, + XSCALE_PERFCTR_DCACHE_FULL_STALL_CONTIG = 0x09, + XSCALE_PERFCTR_DCACHE_ACCESS = 0x0A, + XSCALE_PERFCTR_DCACHE_MISS = 0x0B, + XSCALE_PERFCTR_DCACHE_WRITE_BACK = 0x0C, + XSCALE_PERFCTR_PC_CHANGED = 0x0D, + XSCALE_PERFCTR_BCU_REQUEST = 0x10, + XSCALE_PERFCTR_BCU_FULL = 0x11, + XSCALE_PERFCTR_BCU_DRAIN = 0x12, + XSCALE_PERFCTR_BCU_ECC_NO_ELOG = 0x14, + XSCALE_PERFCTR_BCU_1_BIT_ERR = 0x15, + XSCALE_PERFCTR_RMW = 0x16, + /* XSCALE_PERFCTR_CCNT is not hardware defined */ + XSCALE_PERFCTR_CCNT = 0xFE, + XSCALE_PERFCTR_UNUSED = 0xFF, +}; + +enum xscale_counters { + XSCALE_CYCLE_COUNTER = 1, + XSCALE_COUNTER0, + XSCALE_COUNTER1, + XSCALE_COUNTER2, + XSCALE_COUNTER3, +}; + +static const unsigned xscale_perf_map[PERF_COUNT_HW_MAX] = { + [PERF_COUNT_HW_CPU_CYCLES] = XSCALE_PERFCTR_CCNT, + [PERF_COUNT_HW_INSTRUCTIONS] = XSCALE_PERFCTR_INSTRUCTION, + [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED, + [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = XSCALE_PERFCTR_BRANCH, + [PERF_COUNT_HW_BRANCH_MISSES] = XSCALE_PERFCTR_BRANCH_MISS, + [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED, +}; + +static const unsigned xscale_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { + [C(L1D)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = XSCALE_PERFCTR_DCACHE_ACCESS, + [C(RESULT_MISS)] = XSCALE_PERFCTR_DCACHE_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = XSCALE_PERFCTR_DCACHE_ACCESS, + [C(RESULT_MISS)] = XSCALE_PERFCTR_DCACHE_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(L1I)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = XSCALE_PERFCTR_ICACHE_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = XSCALE_PERFCTR_ICACHE_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(LL)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(DTLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = XSCALE_PERFCTR_DTLB_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = XSCALE_PERFCTR_DTLB_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(ITLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = XSCALE_PERFCTR_ITLB_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = XSCALE_PERFCTR_ITLB_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(BPU)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, +}; + +#define XSCALE_PMU_ENABLE 0x001 +#define XSCALE_PMN_RESET 0x002 +#define XSCALE_CCNT_RESET 0x004 +#define XSCALE_PMU_RESET (CCNT_RESET | PMN_RESET) +#define XSCALE_PMU_CNT64 0x008 + +#define XSCALE1_OVERFLOWED_MASK 0x700 +#define XSCALE1_CCOUNT_OVERFLOW 0x400 +#define XSCALE1_COUNT0_OVERFLOW 0x100 +#define XSCALE1_COUNT1_OVERFLOW 0x200 +#define XSCALE1_CCOUNT_INT_EN 0x040 +#define XSCALE1_COUNT0_INT_EN 0x010 +#define XSCALE1_COUNT1_INT_EN 0x020 +#define XSCALE1_COUNT0_EVT_SHFT 12 +#define XSCALE1_COUNT0_EVT_MASK (0xff << XSCALE1_COUNT0_EVT_SHFT) +#define XSCALE1_COUNT1_EVT_SHFT 20 +#define XSCALE1_COUNT1_EVT_MASK (0xff << XSCALE1_COUNT1_EVT_SHFT) + +static inline u32 +xscale1pmu_read_pmnc(void) +{ + u32 val; + asm volatile("mrc p14, 0, %0, c0, c0, 0" : "=r" (val)); + return val; +} + +static inline void +xscale1pmu_write_pmnc(u32 val) +{ + /* upper 4bits and 7, 11 are write-as-0 */ + val &= 0xffff77f; + asm volatile("mcr p14, 0, %0, c0, c0, 0" : : "r" (val)); +} + +static inline int +xscale1_pmnc_counter_has_overflowed(unsigned long pmnc, + enum xscale_counters counter) +{ + int ret = 0; + + switch (counter) { + case XSCALE_CYCLE_COUNTER: + ret = pmnc & XSCALE1_CCOUNT_OVERFLOW; + break; + case XSCALE_COUNTER0: + ret = pmnc & XSCALE1_COUNT0_OVERFLOW; + break; + case XSCALE_COUNTER1: + ret = pmnc & XSCALE1_COUNT1_OVERFLOW; + break; + default: + WARN_ONCE(1, "invalid counter number (%d)\n", counter); + } + + return ret; +} + +static irqreturn_t +xscale1pmu_handle_irq(int irq_num, void *dev) +{ + unsigned long pmnc; + struct perf_sample_data data; + struct cpu_hw_events *cpuc; + struct pt_regs *regs; + int idx; + + /* + * NOTE: there's an A stepping erratum that states if an overflow + * bit already exists and another occurs, the previous + * Overflow bit gets cleared. There's no workaround. + * Fixed in B stepping or later. + */ + pmnc = xscale1pmu_read_pmnc(); + + /* + * Write the value back to clear the overflow flags. Overflow + * flags remain in pmnc for use below. We also disable the PMU + * while we process the interrupt. + */ + xscale1pmu_write_pmnc(pmnc & ~XSCALE_PMU_ENABLE); + + if (!(pmnc & XSCALE1_OVERFLOWED_MASK)) + return IRQ_NONE; + + regs = get_irq_regs(); + + perf_sample_data_init(&data, 0); + + cpuc = &__get_cpu_var(cpu_hw_events); + for (idx = 0; idx <= armpmu->num_events; ++idx) { + struct perf_event *event = cpuc->events[idx]; + struct hw_perf_event *hwc; + + if (!test_bit(idx, cpuc->active_mask)) + continue; + + if (!xscale1_pmnc_counter_has_overflowed(pmnc, idx)) + continue; + + hwc = &event->hw; + armpmu_event_update(event, hwc, idx); + data.period = event->hw.last_period; + if (!armpmu_event_set_period(event, hwc, idx)) + continue; + + if (perf_event_overflow(event, 0, &data, regs)) + armpmu->disable(hwc, idx); + } + + irq_work_run(); + + /* + * Re-enable the PMU. + */ + pmnc = xscale1pmu_read_pmnc() | XSCALE_PMU_ENABLE; + xscale1pmu_write_pmnc(pmnc); + + return IRQ_HANDLED; +} + +static void +xscale1pmu_enable_event(struct hw_perf_event *hwc, int idx) +{ + unsigned long val, mask, evt, flags; + + switch (idx) { + case XSCALE_CYCLE_COUNTER: + mask = 0; + evt = XSCALE1_CCOUNT_INT_EN; + break; + case XSCALE_COUNTER0: + mask = XSCALE1_COUNT0_EVT_MASK; + evt = (hwc->config_base << XSCALE1_COUNT0_EVT_SHFT) | + XSCALE1_COUNT0_INT_EN; + break; + case XSCALE_COUNTER1: + mask = XSCALE1_COUNT1_EVT_MASK; + evt = (hwc->config_base << XSCALE1_COUNT1_EVT_SHFT) | + XSCALE1_COUNT1_INT_EN; + break; + default: + WARN_ONCE(1, "invalid counter number (%d)\n", idx); + return; + } + + spin_lock_irqsave(&pmu_lock, flags); + val = xscale1pmu_read_pmnc(); + val &= ~mask; + val |= evt; + xscale1pmu_write_pmnc(val); + spin_unlock_irqrestore(&pmu_lock, flags); +} + +static void +xscale1pmu_disable_event(struct hw_perf_event *hwc, int idx) +{ + unsigned long val, mask, evt, flags; + + switch (idx) { + case XSCALE_CYCLE_COUNTER: + mask = XSCALE1_CCOUNT_INT_EN; + evt = 0; + break; + case XSCALE_COUNTER0: + mask = XSCALE1_COUNT0_INT_EN | XSCALE1_COUNT0_EVT_MASK; + evt = XSCALE_PERFCTR_UNUSED << XSCALE1_COUNT0_EVT_SHFT; + break; + case XSCALE_COUNTER1: + mask = XSCALE1_COUNT1_INT_EN | XSCALE1_COUNT1_EVT_MASK; + evt = XSCALE_PERFCTR_UNUSED << XSCALE1_COUNT1_EVT_SHFT; + break; + default: + WARN_ONCE(1, "invalid counter number (%d)\n", idx); + return; + } + + spin_lock_irqsave(&pmu_lock, flags); + val = xscale1pmu_read_pmnc(); + val &= ~mask; + val |= evt; + xscale1pmu_write_pmnc(val); + spin_unlock_irqrestore(&pmu_lock, flags); +} + +static int +xscale1pmu_get_event_idx(struct cpu_hw_events *cpuc, + struct hw_perf_event *event) +{ + if (XSCALE_PERFCTR_CCNT == event->config_base) { + if (test_and_set_bit(XSCALE_CYCLE_COUNTER, cpuc->used_mask)) + return -EAGAIN; + + return XSCALE_CYCLE_COUNTER; + } else { + if (!test_and_set_bit(XSCALE_COUNTER1, cpuc->used_mask)) + return XSCALE_COUNTER1; + + if (!test_and_set_bit(XSCALE_COUNTER0, cpuc->used_mask)) + return XSCALE_COUNTER0; + + return -EAGAIN; + } +} + +static void +xscale1pmu_start(void) +{ + unsigned long flags, val; + + spin_lock_irqsave(&pmu_lock, flags); + val = xscale1pmu_read_pmnc(); + val |= XSCALE_PMU_ENABLE; + xscale1pmu_write_pmnc(val); + spin_unlock_irqrestore(&pmu_lock, flags); +} + +static void +xscale1pmu_stop(void) +{ + unsigned long flags, val; + + spin_lock_irqsave(&pmu_lock, flags); + val = xscale1pmu_read_pmnc(); + val &= ~XSCALE_PMU_ENABLE; + xscale1pmu_write_pmnc(val); + spin_unlock_irqrestore(&pmu_lock, flags); +} + +static inline u32 +xscale1pmu_read_counter(int counter) +{ + u32 val = 0; + + switch (counter) { + case XSCALE_CYCLE_COUNTER: + asm volatile("mrc p14, 0, %0, c1, c0, 0" : "=r" (val)); + break; + case XSCALE_COUNTER0: + asm volatile("mrc p14, 0, %0, c2, c0, 0" : "=r" (val)); + break; + case XSCALE_COUNTER1: + asm volatile("mrc p14, 0, %0, c3, c0, 0" : "=r" (val)); + break; + } + + return val; +} + +static inline void +xscale1pmu_write_counter(int counter, u32 val) +{ + switch (counter) { + case XSCALE_CYCLE_COUNTER: + asm volatile("mcr p14, 0, %0, c1, c0, 0" : : "r" (val)); + break; + case XSCALE_COUNTER0: + asm volatile("mcr p14, 0, %0, c2, c0, 0" : : "r" (val)); + break; + case XSCALE_COUNTER1: + asm volatile("mcr p14, 0, %0, c3, c0, 0" : : "r" (val)); + break; + } +} + +static const struct arm_pmu xscale1pmu = { + .id = ARM_PERF_PMU_ID_XSCALE1, + .name = "xscale1", + .handle_irq = xscale1pmu_handle_irq, + .enable = xscale1pmu_enable_event, + .disable = xscale1pmu_disable_event, + .read_counter = xscale1pmu_read_counter, + .write_counter = xscale1pmu_write_counter, + .get_event_idx = xscale1pmu_get_event_idx, + .start = xscale1pmu_start, + .stop = xscale1pmu_stop, + .cache_map = &xscale_perf_cache_map, + .event_map = &xscale_perf_map, + .raw_event_mask = 0xFF, + .num_events = 3, + .max_period = (1LLU << 32) - 1, +}; + +const struct arm_pmu *__init xscale1pmu_init(void) +{ + return &xscale1pmu; +} + +#define XSCALE2_OVERFLOWED_MASK 0x01f +#define XSCALE2_CCOUNT_OVERFLOW 0x001 +#define XSCALE2_COUNT0_OVERFLOW 0x002 +#define XSCALE2_COUNT1_OVERFLOW 0x004 +#define XSCALE2_COUNT2_OVERFLOW 0x008 +#define XSCALE2_COUNT3_OVERFLOW 0x010 +#define XSCALE2_CCOUNT_INT_EN 0x001 +#define XSCALE2_COUNT0_INT_EN 0x002 +#define XSCALE2_COUNT1_INT_EN 0x004 +#define XSCALE2_COUNT2_INT_EN 0x008 +#define XSCALE2_COUNT3_INT_EN 0x010 +#define XSCALE2_COUNT0_EVT_SHFT 0 +#define XSCALE2_COUNT0_EVT_MASK (0xff << XSCALE2_COUNT0_EVT_SHFT) +#define XSCALE2_COUNT1_EVT_SHFT 8 +#define XSCALE2_COUNT1_EVT_MASK (0xff << XSCALE2_COUNT1_EVT_SHFT) +#define XSCALE2_COUNT2_EVT_SHFT 16 +#define XSCALE2_COUNT2_EVT_MASK (0xff << XSCALE2_COUNT2_EVT_SHFT) +#define XSCALE2_COUNT3_EVT_SHFT 24 +#define XSCALE2_COUNT3_EVT_MASK (0xff << XSCALE2_COUNT3_EVT_SHFT) + +static inline u32 +xscale2pmu_read_pmnc(void) +{ + u32 val; + asm volatile("mrc p14, 0, %0, c0, c1, 0" : "=r" (val)); + /* bits 1-2 and 4-23 are read-unpredictable */ + return val & 0xff000009; +} + +static inline void +xscale2pmu_write_pmnc(u32 val) +{ + /* bits 4-23 are write-as-0, 24-31 are write ignored */ + val &= 0xf; + asm volatile("mcr p14, 0, %0, c0, c1, 0" : : "r" (val)); +} + +static inline u32 +xscale2pmu_read_overflow_flags(void) +{ + u32 val; + asm volatile("mrc p14, 0, %0, c5, c1, 0" : "=r" (val)); + return val; +} + +static inline void +xscale2pmu_write_overflow_flags(u32 val) +{ + asm volatile("mcr p14, 0, %0, c5, c1, 0" : : "r" (val)); +} + +static inline u32 +xscale2pmu_read_event_select(void) +{ + u32 val; + asm volatile("mrc p14, 0, %0, c8, c1, 0" : "=r" (val)); + return val; +} + +static inline void +xscale2pmu_write_event_select(u32 val) +{ + asm volatile("mcr p14, 0, %0, c8, c1, 0" : : "r"(val)); +} + +static inline u32 +xscale2pmu_read_int_enable(void) +{ + u32 val; + asm volatile("mrc p14, 0, %0, c4, c1, 0" : "=r" (val)); + return val; +} + +static void +xscale2pmu_write_int_enable(u32 val) +{ + asm volatile("mcr p14, 0, %0, c4, c1, 0" : : "r" (val)); +} + +static inline int +xscale2_pmnc_counter_has_overflowed(unsigned long of_flags, + enum xscale_counters counter) +{ + int ret = 0; + + switch (counter) { + case XSCALE_CYCLE_COUNTER: + ret = of_flags & XSCALE2_CCOUNT_OVERFLOW; + break; + case XSCALE_COUNTER0: + ret = of_flags & XSCALE2_COUNT0_OVERFLOW; + break; + case XSCALE_COUNTER1: + ret = of_flags & XSCALE2_COUNT1_OVERFLOW; + break; + case XSCALE_COUNTER2: + ret = of_flags & XSCALE2_COUNT2_OVERFLOW; + break; + case XSCALE_COUNTER3: + ret = of_flags & XSCALE2_COUNT3_OVERFLOW; + break; + default: + WARN_ONCE(1, "invalid counter number (%d)\n", counter); + } + + return ret; +} + +static irqreturn_t +xscale2pmu_handle_irq(int irq_num, void *dev) +{ + unsigned long pmnc, of_flags; + struct perf_sample_data data; + struct cpu_hw_events *cpuc; + struct pt_regs *regs; + int idx; + + /* Disable the PMU. */ + pmnc = xscale2pmu_read_pmnc(); + xscale2pmu_write_pmnc(pmnc & ~XSCALE_PMU_ENABLE); + + /* Check the overflow flag register. */ + of_flags = xscale2pmu_read_overflow_flags(); + if (!(of_flags & XSCALE2_OVERFLOWED_MASK)) + return IRQ_NONE; + + /* Clear the overflow bits. */ + xscale2pmu_write_overflow_flags(of_flags); + + regs = get_irq_regs(); + + perf_sample_data_init(&data, 0); + + cpuc = &__get_cpu_var(cpu_hw_events); + for (idx = 0; idx <= armpmu->num_events; ++idx) { + struct perf_event *event = cpuc->events[idx]; + struct hw_perf_event *hwc; + + if (!test_bit(idx, cpuc->active_mask)) + continue; + + if (!xscale2_pmnc_counter_has_overflowed(pmnc, idx)) + continue; + + hwc = &event->hw; + armpmu_event_update(event, hwc, idx); + data.period = event->hw.last_period; + if (!armpmu_event_set_period(event, hwc, idx)) + continue; + + if (perf_event_overflow(event, 0, &data, regs)) + armpmu->disable(hwc, idx); + } + + irq_work_run(); + + /* + * Re-enable the PMU. + */ + pmnc = xscale2pmu_read_pmnc() | XSCALE_PMU_ENABLE; + xscale2pmu_write_pmnc(pmnc); + + return IRQ_HANDLED; +} + +static void +xscale2pmu_enable_event(struct hw_perf_event *hwc, int idx) +{ + unsigned long flags, ien, evtsel; + + ien = xscale2pmu_read_int_enable(); + evtsel = xscale2pmu_read_event_select(); + + switch (idx) { + case XSCALE_CYCLE_COUNTER: + ien |= XSCALE2_CCOUNT_INT_EN; + break; + case XSCALE_COUNTER0: + ien |= XSCALE2_COUNT0_INT_EN; + evtsel &= ~XSCALE2_COUNT0_EVT_MASK; + evtsel |= hwc->config_base << XSCALE2_COUNT0_EVT_SHFT; + break; + case XSCALE_COUNTER1: + ien |= XSCALE2_COUNT1_INT_EN; + evtsel &= ~XSCALE2_COUNT1_EVT_MASK; + evtsel |= hwc->config_base << XSCALE2_COUNT1_EVT_SHFT; + break; + case XSCALE_COUNTER2: + ien |= XSCALE2_COUNT2_INT_EN; + evtsel &= ~XSCALE2_COUNT2_EVT_MASK; + evtsel |= hwc->config_base << XSCALE2_COUNT2_EVT_SHFT; + break; + case XSCALE_COUNTER3: + ien |= XSCALE2_COUNT3_INT_EN; + evtsel &= ~XSCALE2_COUNT3_EVT_MASK; + evtsel |= hwc->config_base << XSCALE2_COUNT3_EVT_SHFT; + break; + default: + WARN_ONCE(1, "invalid counter number (%d)\n", idx); + return; + } + + spin_lock_irqsave(&pmu_lock, flags); + xscale2pmu_write_event_select(evtsel); + xscale2pmu_write_int_enable(ien); + spin_unlock_irqrestore(&pmu_lock, flags); +} + +static void +xscale2pmu_disable_event(struct hw_perf_event *hwc, int idx) +{ + unsigned long flags, ien, evtsel; + + ien = xscale2pmu_read_int_enable(); + evtsel = xscale2pmu_read_event_select(); + + switch (idx) { + case XSCALE_CYCLE_COUNTER: + ien &= ~XSCALE2_CCOUNT_INT_EN; + break; + case XSCALE_COUNTER0: + ien &= ~XSCALE2_COUNT0_INT_EN; + evtsel &= ~XSCALE2_COUNT0_EVT_MASK; + evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT0_EVT_SHFT; + break; + case XSCALE_COUNTER1: + ien &= ~XSCALE2_COUNT1_INT_EN; + evtsel &= ~XSCALE2_COUNT1_EVT_MASK; + evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT1_EVT_SHFT; + break; + case XSCALE_COUNTER2: + ien &= ~XSCALE2_COUNT2_INT_EN; + evtsel &= ~XSCALE2_COUNT2_EVT_MASK; + evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT2_EVT_SHFT; + break; + case XSCALE_COUNTER3: + ien &= ~XSCALE2_COUNT3_INT_EN; + evtsel &= ~XSCALE2_COUNT3_EVT_MASK; + evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT3_EVT_SHFT; + break; + default: + WARN_ONCE(1, "invalid counter number (%d)\n", idx); + return; + } + + spin_lock_irqsave(&pmu_lock, flags); + xscale2pmu_write_event_select(evtsel); + xscale2pmu_write_int_enable(ien); + spin_unlock_irqrestore(&pmu_lock, flags); +} + +static int +xscale2pmu_get_event_idx(struct cpu_hw_events *cpuc, + struct hw_perf_event *event) +{ + int idx = xscale1pmu_get_event_idx(cpuc, event); + if (idx >= 0) + goto out; + + if (!test_and_set_bit(XSCALE_COUNTER3, cpuc->used_mask)) + idx = XSCALE_COUNTER3; + else if (!test_and_set_bit(XSCALE_COUNTER2, cpuc->used_mask)) + idx = XSCALE_COUNTER2; +out: + return idx; +} + +static void +xscale2pmu_start(void) +{ + unsigned long flags, val; + + spin_lock_irqsave(&pmu_lock, flags); + val = xscale2pmu_read_pmnc() & ~XSCALE_PMU_CNT64; + val |= XSCALE_PMU_ENABLE; + xscale2pmu_write_pmnc(val); + spin_unlock_irqrestore(&pmu_lock, flags); +} + +static void +xscale2pmu_stop(void) +{ + unsigned long flags, val; + + spin_lock_irqsave(&pmu_lock, flags); + val = xscale2pmu_read_pmnc(); + val &= ~XSCALE_PMU_ENABLE; + xscale2pmu_write_pmnc(val); + spin_unlock_irqrestore(&pmu_lock, flags); +} + +static inline u32 +xscale2pmu_read_counter(int counter) +{ + u32 val = 0; + + switch (counter) { + case XSCALE_CYCLE_COUNTER: + asm volatile("mrc p14, 0, %0, c1, c1, 0" : "=r" (val)); + break; + case XSCALE_COUNTER0: + asm volatile("mrc p14, 0, %0, c0, c2, 0" : "=r" (val)); + break; + case XSCALE_COUNTER1: + asm volatile("mrc p14, 0, %0, c1, c2, 0" : "=r" (val)); + break; + case XSCALE_COUNTER2: + asm volatile("mrc p14, 0, %0, c2, c2, 0" : "=r" (val)); + break; + case XSCALE_COUNTER3: + asm volatile("mrc p14, 0, %0, c3, c2, 0" : "=r" (val)); + break; + } + + return val; +} + +static inline void +xscale2pmu_write_counter(int counter, u32 val) +{ + switch (counter) { + case XSCALE_CYCLE_COUNTER: + asm volatile("mcr p14, 0, %0, c1, c1, 0" : : "r" (val)); + break; + case XSCALE_COUNTER0: + asm volatile("mcr p14, 0, %0, c0, c2, 0" : : "r" (val)); + break; + case XSCALE_COUNTER1: + asm volatile("mcr p14, 0, %0, c1, c2, 0" : : "r" (val)); + break; + case XSCALE_COUNTER2: + asm volatile("mcr p14, 0, %0, c2, c2, 0" : : "r" (val)); + break; + case XSCALE_COUNTER3: + asm volatile("mcr p14, 0, %0, c3, c2, 0" : : "r" (val)); + break; + } +} + +static const struct arm_pmu xscale2pmu = { + .id = ARM_PERF_PMU_ID_XSCALE2, + .name = "xscale2", + .handle_irq = xscale2pmu_handle_irq, + .enable = xscale2pmu_enable_event, + .disable = xscale2pmu_disable_event, + .read_counter = xscale2pmu_read_counter, + .write_counter = xscale2pmu_write_counter, + .get_event_idx = xscale2pmu_get_event_idx, + .start = xscale2pmu_start, + .stop = xscale2pmu_stop, + .cache_map = &xscale_perf_cache_map, + .event_map = &xscale_perf_map, + .raw_event_mask = 0xFF, + .num_events = 5, + .max_period = (1LLU << 32) - 1, +}; + +const struct arm_pmu *__init xscale2pmu_init(void) +{ + return &xscale2pmu; +} +#else +const struct arm_pmu *__init xscale1pmu_init(void) +{ + return NULL; +} + +const struct arm_pmu *__init xscale2pmu_init(void) +{ + return NULL; +} +#endif /* CONFIG_CPU_XSCALE */ diff --git a/arch/arm/kernel/relocate_kernel.S b/arch/arm/kernel/relocate_kernel.S index fd26f8d6515..9cf4cbf8f95 100644 --- a/arch/arm/kernel/relocate_kernel.S +++ b/arch/arm/kernel/relocate_kernel.S @@ -59,6 +59,8 @@ relocate_new_kernel: ldr r2,kexec_boot_atags mov pc,lr + .align + .globl kexec_start_address kexec_start_address: .long 0x0 diff --git a/arch/arm/kernel/sched_clock.c b/arch/arm/kernel/sched_clock.c new file mode 100644 index 00000000000..2cdcc9287c7 --- /dev/null +++ b/arch/arm/kernel/sched_clock.c @@ -0,0 +1,69 @@ +/* + * sched_clock.c: support for extending counters to full 64-bit ns counter + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/clocksource.h> +#include <linux/init.h> +#include <linux/jiffies.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/timer.h> + +#include <asm/sched_clock.h> + +static void sched_clock_poll(unsigned long wrap_ticks); +static DEFINE_TIMER(sched_clock_timer, sched_clock_poll, 0, 0); +static void (*sched_clock_update_fn)(void); + +static void sched_clock_poll(unsigned long wrap_ticks) +{ + mod_timer(&sched_clock_timer, round_jiffies(jiffies + wrap_ticks)); + sched_clock_update_fn(); +} + +void __init init_sched_clock(struct clock_data *cd, void (*update)(void), + unsigned int clock_bits, unsigned long rate) +{ + unsigned long r, w; + u64 res, wrap; + char r_unit; + + sched_clock_update_fn = update; + + /* calculate the mult/shift to convert counter ticks to ns. */ + clocks_calc_mult_shift(&cd->mult, &cd->shift, rate, NSEC_PER_SEC, 60); + + r = rate; + if (r >= 4000000) { + r /= 1000000; + r_unit = 'M'; + } else { + r /= 1000; + r_unit = 'k'; + } + + /* calculate how many ns until we wrap */ + wrap = cyc_to_ns((1ULL << clock_bits) - 1, cd->mult, cd->shift); + do_div(wrap, NSEC_PER_MSEC); + w = wrap; + + /* calculate the ns resolution of this counter */ + res = cyc_to_ns(1ULL, cd->mult, cd->shift); + pr_info("sched_clock: %u bits at %lu%cHz, resolution %lluns, wraps every %lums\n", + clock_bits, r, r_unit, res, w); + + /* + * Start the timer to keep sched_clock() properly updated and + * sets the initial epoch. + */ + sched_clock_timer.data = msecs_to_jiffies(w - (w / 10)); + sched_clock_poll(sched_clock_timer.data); + + /* + * Ensure that sched_clock() starts off at 0ns + */ + cd->epoch_ns = 0; +} diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 5341b0b1970..5ec79b4ff95 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -16,6 +16,7 @@ #include <linux/cache.h> #include <linux/profile.h> #include <linux/errno.h> +#include <linux/ftrace.h> #include <linux/mm.h> #include <linux/err.h> #include <linux/cpu.h> @@ -319,7 +320,6 @@ asmlinkage void __cpuinit secondary_start_kernel(void) * All kernel threads share the same mm context; grab a * reference and switch to it. */ - atomic_inc(&mm->mm_users); atomic_inc(&mm->mm_count); current->active_mm = mm; cpumask_set_cpu(cpu, mm_cpumask(mm)); @@ -475,7 +475,7 @@ static void ipi_timer(void) } #ifdef CONFIG_LOCAL_TIMERS -asmlinkage void __exception do_local_timer(struct pt_regs *regs) +asmlinkage void __exception_irq_entry do_local_timer(struct pt_regs *regs) { struct pt_regs *old_regs = set_irq_regs(regs); int cpu = smp_processor_id(); @@ -583,7 +583,7 @@ static void ipi_cpu_stop(unsigned int cpu) /* * Main handler for inter-processor interrupts */ -asmlinkage void __exception do_IPI(int ipinr, struct pt_regs *regs) +asmlinkage void __exception_irq_entry do_IPI(int ipinr, struct pt_regs *regs) { unsigned int cpu = smp_processor_id(); struct pt_regs *old_regs = set_irq_regs(regs); diff --git a/arch/arm/kernel/smp_twd.c b/arch/arm/kernel/smp_twd.c index 24585d97c10..dd790745b3e 100644 --- a/arch/arm/kernel/smp_twd.c +++ b/arch/arm/kernel/smp_twd.c @@ -127,8 +127,6 @@ static void __cpuinit twd_calibrate_rate(void) */ void __cpuinit twd_timer_setup(struct clock_event_device *clk) { - unsigned long flags; - twd_calibrate_rate(); clk->name = "local_timer"; @@ -143,10 +141,7 @@ void __cpuinit twd_timer_setup(struct clock_event_device *clk) clk->min_delta_ns = clockevent_delta2ns(0xf, clk); /* Make sure our local interrupt controller has this enabled */ - local_irq_save(flags); - irq_to_desc(clk->irq)->status |= IRQ_NOPROBE; - get_irq_chip(clk->irq)->unmask(clk->irq); - local_irq_restore(flags); + gic_enable_ppi(clk->irq); clockevents_register_device(clk); } diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S index 1581f6d18cc..86b66f3f203 100644 --- a/arch/arm/kernel/vmlinux.lds.S +++ b/arch/arm/kernel/vmlinux.lds.S @@ -101,6 +101,7 @@ SECTIONS __exception_text_start = .; *(.exception.text) __exception_text_end = .; + IRQENTRY_TEXT TEXT_TEXT SCHED_TEXT LOCK_TEXT diff --git a/arch/arm/mach-at91/Makefile b/arch/arm/mach-at91/Makefile index 62d686f0b42..d13add71f72 100644 --- a/arch/arm/mach-at91/Makefile +++ b/arch/arm/mach-at91/Makefile @@ -65,7 +65,7 @@ obj-$(CONFIG_MACH_AT91SAM9G20EK) += board-sam9g20ek.o obj-$(CONFIG_MACH_CPU9G20) += board-cpu9krea.o obj-$(CONFIG_MACH_STAMP9G20) += board-stamp9g20.o obj-$(CONFIG_MACH_PORTUXG20) += board-stamp9g20.o -obj-$(CONFIG_MACH_PCONTROL_G20) += board-pcontrol-g20.o +obj-$(CONFIG_MACH_PCONTROL_G20) += board-pcontrol-g20.o board-stamp9g20.o # AT91SAM9260/AT91SAM9G20 board-specific support obj-$(CONFIG_MACH_SNAPPER_9260) += board-snapper9260.o diff --git a/arch/arm/mach-at91/at91rm9200_time.c b/arch/arm/mach-at91/at91rm9200_time.c index 2500f41d8d2..1dd69c85dfe 100644 --- a/arch/arm/mach-at91/at91rm9200_time.c +++ b/arch/arm/mach-at91/at91rm9200_time.c @@ -101,7 +101,6 @@ static struct clocksource clk32k = { .rating = 150, .read = read_clk32k, .mask = CLOCKSOURCE_MASK(20), - .shift = 10, .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; @@ -201,8 +200,7 @@ void __init at91rm9200_timer_init(void) clockevents_register_device(&clkevt); /* register clocksource */ - clk32k.mult = clocksource_hz2mult(AT91_SLOW_CLOCK, clk32k.shift); - clocksource_register(&clk32k); + clocksource_register_hz(&clk32k, AT91_SLOW_CLOCK); } struct sys_timer at91rm9200_timer = { diff --git a/arch/arm/mach-at91/at91sam926x_time.c b/arch/arm/mach-at91/at91sam926x_time.c index 608a63240b6..4ba85499fa9 100644 --- a/arch/arm/mach-at91/at91sam926x_time.c +++ b/arch/arm/mach-at91/at91sam926x_time.c @@ -51,7 +51,6 @@ static struct clocksource pit_clk = { .name = "pit", .rating = 175, .read = read_pit_clk, - .shift = 20, .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; @@ -163,10 +162,9 @@ static void __init at91sam926x_pit_init(void) * Register clocksource. The high order bits of PIV are unused, * so this isn't a 32-bit counter unless we get clockevent irqs. */ - pit_clk.mult = clocksource_hz2mult(pit_rate, pit_clk.shift); bits = 12 /* PICNT */ + ilog2(pit_cycle) /* PIV */; pit_clk.mask = CLOCKSOURCE_MASK(bits); - clocksource_register(&pit_clk); + clocksource_register_hz(&pit_clk, pit_rate); /* Set up irq handler */ setup_irq(AT91_ID_SYS, &at91sam926x_pit_irq); diff --git a/arch/arm/mach-at91/board-pcontrol-g20.c b/arch/arm/mach-at91/board-pcontrol-g20.c index bba5a560e02..feb65787c30 100644 --- a/arch/arm/mach-at91/board-pcontrol-g20.c +++ b/arch/arm/mach-at91/board-pcontrol-g20.c @@ -31,6 +31,7 @@ #include <mach/board.h> #include <mach/at91sam9_smc.h> +#include <mach/stamp9g20.h> #include "sam9_smc.h" #include "generic.h" @@ -38,11 +39,7 @@ static void __init pcontrol_g20_map_io(void) { - /* Initialize processor: 18.432 MHz crystal */ - at91sam9260_initialize(18432000); - - /* DGBU on ttyS0. (Rx, Tx) only TTL -> JTAG connector X7 17,19 ) */ - at91_register_uart(0, 0, 0); + stamp9g20_map_io(); /* USART0 on ttyS1. (Rx, Tx, CTS, RTS) piggyback A2 */ at91_register_uart(AT91SAM9260_ID_US0, 1, ATMEL_UART_CTS @@ -54,9 +51,6 @@ static void __init pcontrol_g20_map_io(void) /* USART2 on ttyS3. (Rx, Tx) 9bit-Bus Multidrop-mode X4 */ at91_register_uart(AT91SAM9260_ID_US4, 3, 0); - - /* set serial console to ttyS0 (ie, DBGU) */ - at91_set_serial_console(0); } @@ -66,38 +60,6 @@ static void __init init_irq(void) } -/* - * NAND flash 512MiB 1,8V 8-bit, sector size 128 KiB - */ -static struct atmel_nand_data __initdata nand_data = { - .ale = 21, - .cle = 22, - .rdy_pin = AT91_PIN_PC13, - .enable_pin = AT91_PIN_PC14, -}; - -/* - * Bus timings; unit = 7.57ns - */ -static struct sam9_smc_config __initdata nand_smc_config = { - .ncs_read_setup = 0, - .nrd_setup = 2, - .ncs_write_setup = 0, - .nwe_setup = 2, - - .ncs_read_pulse = 4, - .nrd_pulse = 4, - .ncs_write_pulse = 4, - .nwe_pulse = 4, - - .read_cycle = 7, - .write_cycle = 7, - - .mode = AT91_SMC_READMODE | AT91_SMC_WRITEMODE - | AT91_SMC_EXNWMODE_DISABLE | AT91_SMC_DBW_8, - .tdf_cycles = 3, -}; - static struct sam9_smc_config __initdata pcontrol_smc_config[2] = { { .ncs_read_setup = 16, .nrd_setup = 18, @@ -138,14 +100,6 @@ static struct sam9_smc_config __initdata pcontrol_smc_config[2] = { { .tdf_cycles = 1, } }; -static void __init add_device_nand(void) -{ - /* configure chip-select 3 (NAND) */ - sam9_smc_configure(3, &nand_smc_config); - at91_add_device_nand(&nand_data); -} - - static void __init add_device_pcontrol(void) { /* configure chip-select 4 (IO compatible to 8051 X4 ) */ @@ -156,23 +110,6 @@ static void __init add_device_pcontrol(void) /* - * MCI (SD/MMC) - * det_pin, wp_pin and vcc_pin are not connected - */ -#if defined(CONFIG_MMC_ATMELMCI) || defined(CONFIG_MMC_ATMELMCI_MODULE) -static struct mci_platform_data __initdata mmc_data = { - .slot[0] = { - .bus_width = 4, - }, -}; -#else -static struct at91_mmc_data __initdata mmc_data = { - .wire4 = 1, -}; -#endif - - -/* * USB Host port */ static struct at91_usbh_data __initdata usbh_data = { @@ -265,42 +202,13 @@ static struct spi_board_info pcontrol_g20_spi_devices[] = { }; -/* - * Dallas 1-Wire DS2431 - */ -static struct w1_gpio_platform_data w1_gpio_pdata = { - .pin = AT91_PIN_PA29, - .is_open_drain = 1, -}; - -static struct platform_device w1_device = { - .name = "w1-gpio", - .id = -1, - .dev.platform_data = &w1_gpio_pdata, -}; - -static void add_wire1(void) -{ - at91_set_GPIO_periph(w1_gpio_pdata.pin, 1); - at91_set_multi_drive(w1_gpio_pdata.pin, 1); - platform_device_register(&w1_device); -} - - static void __init pcontrol_g20_board_init(void) { - at91_add_device_serial(); - add_device_nand(); -#if defined(CONFIG_MMC_ATMELMCI) || defined(CONFIG_MMC_ATMELMCI_MODULE) - at91_add_device_mci(0, &mmc_data); -#else - at91_add_device_mmc(0, &mmc_data); -#endif + stamp9g20_board_init(); at91_add_device_usbh(&usbh_data); at91_add_device_eth(&macb_data); at91_add_device_i2c(pcontrol_g20_i2c_devices, ARRAY_SIZE(pcontrol_g20_i2c_devices)); - add_wire1(); add_device_pcontrol(); at91_add_device_spi(pcontrol_g20_spi_devices, ARRAY_SIZE(pcontrol_g20_spi_devices)); diff --git a/arch/arm/mach-at91/board-stamp9g20.c b/arch/arm/mach-at91/board-stamp9g20.c index 5206eef4a67..f8902b11896 100644 --- a/arch/arm/mach-at91/board-stamp9g20.c +++ b/arch/arm/mach-at91/board-stamp9g20.c @@ -32,7 +32,7 @@ #include "generic.h" -static void __init portuxg20_map_io(void) +void __init stamp9g20_map_io(void) { /* Initialize processor: 18.432 MHz crystal */ at91sam9260_initialize(18432000); @@ -40,6 +40,24 @@ static void __init portuxg20_map_io(void) /* DGBU on ttyS0. (Rx & Tx only) */ at91_register_uart(0, 0, 0); + /* set serial console to ttyS0 (ie, DBGU) */ + at91_set_serial_console(0); +} + +static void __init stamp9g20evb_map_io(void) +{ + stamp9g20_map_io(); + + /* USART0 on ttyS1. (Rx, Tx, CTS, RTS, DTR, DSR, DCD, RI) */ + at91_register_uart(AT91SAM9260_ID_US0, 1, ATMEL_UART_CTS | ATMEL_UART_RTS + | ATMEL_UART_DTR | ATMEL_UART_DSR + | ATMEL_UART_DCD | ATMEL_UART_RI); +} + +static void __init portuxg20_map_io(void) +{ + stamp9g20_map_io(); + /* USART0 on ttyS1. (Rx, Tx, CTS, RTS, DTR, DSR, DCD, RI) */ at91_register_uart(AT91SAM9260_ID_US0, 1, ATMEL_UART_CTS | ATMEL_UART_RTS | ATMEL_UART_DTR | ATMEL_UART_DSR @@ -56,26 +74,6 @@ static void __init portuxg20_map_io(void) /* USART5 on ttyS6. (Rx, Tx only) */ at91_register_uart(AT91SAM9260_ID_US5, 6, 0); - - /* set serial console to ttyS0 (ie, DBGU) */ - at91_set_serial_console(0); -} - -static void __init stamp9g20_map_io(void) -{ - /* Initialize processor: 18.432 MHz crystal */ - at91sam9260_initialize(18432000); - - /* DGBU on ttyS0. (Rx & Tx only) */ - at91_register_uart(0, 0, 0); - - /* USART0 on ttyS1. (Rx, Tx, CTS, RTS, DTR, DSR, DCD, RI) */ - at91_register_uart(AT91SAM9260_ID_US0, 1, ATMEL_UART_CTS | ATMEL_UART_RTS - | ATMEL_UART_DTR | ATMEL_UART_DSR - | ATMEL_UART_DCD | ATMEL_UART_RI); - - /* set serial console to ttyS0 (ie, DBGU) */ - at91_set_serial_console(0); } static void __init init_irq(void) @@ -156,7 +154,7 @@ static struct at91_udc_data __initdata portuxg20_udc_data = { .pullup_pin = 0, /* pull-up driven by UDC */ }; -static struct at91_udc_data __initdata stamp9g20_udc_data = { +static struct at91_udc_data __initdata stamp9g20evb_udc_data = { .vbus_pin = AT91_PIN_PA22, .pullup_pin = 0, /* pull-up driven by UDC */ }; @@ -190,7 +188,7 @@ static struct gpio_led portuxg20_leds[] = { } }; -static struct gpio_led stamp9g20_leds[] = { +static struct gpio_led stamp9g20evb_leds[] = { { .name = "D8", .gpio = AT91_PIN_PB18, @@ -250,7 +248,7 @@ void add_w1(void) } -static void __init generic_board_init(void) +void __init stamp9g20_board_init(void) { /* Serial */ at91_add_device_serial(); @@ -262,34 +260,40 @@ static void __init generic_board_init(void) #else at91_add_device_mmc(0, &mmc_data); #endif - /* USB Host */ - at91_add_device_usbh(&usbh_data); - /* Ethernet */ - at91_add_device_eth(&macb_data); - /* I2C */ - at91_add_device_i2c(NULL, 0); /* W1 */ add_w1(); } static void __init portuxg20_board_init(void) { - generic_board_init(); - /* SPI */ - at91_add_device_spi(portuxg20_spi_devices, ARRAY_SIZE(portuxg20_spi_devices)); + stamp9g20_board_init(); + /* USB Host */ + at91_add_device_usbh(&usbh_data); /* USB Device */ at91_add_device_udc(&portuxg20_udc_data); + /* Ethernet */ + at91_add_device_eth(&macb_data); + /* I2C */ + at91_add_device_i2c(NULL, 0); + /* SPI */ + at91_add_device_spi(portuxg20_spi_devices, ARRAY_SIZE(portuxg20_spi_devices)); /* LEDs */ at91_gpio_leds(portuxg20_leds, ARRAY_SIZE(portuxg20_leds)); } -static void __init stamp9g20_board_init(void) +static void __init stamp9g20evb_board_init(void) { - generic_board_init(); + stamp9g20_board_init(); + /* USB Host */ + at91_add_device_usbh(&usbh_data); /* USB Device */ - at91_add_device_udc(&stamp9g20_udc_data); + at91_add_device_udc(&stamp9g20evb_udc_data); + /* Ethernet */ + at91_add_device_eth(&macb_data); + /* I2C */ + at91_add_device_i2c(NULL, 0); /* LEDs */ - at91_gpio_leds(stamp9g20_leds, ARRAY_SIZE(stamp9g20_leds)); + at91_gpio_leds(stamp9g20evb_leds, ARRAY_SIZE(stamp9g20evb_leds)); } MACHINE_START(PORTUXG20, "taskit PortuxG20") @@ -305,7 +309,7 @@ MACHINE_START(STAMP9G20, "taskit Stamp9G20") /* Maintainer: taskit GmbH */ .boot_params = AT91_SDRAM_BASE + 0x100, .timer = &at91sam926x_timer, - .map_io = stamp9g20_map_io, + .map_io = stamp9g20evb_map_io, .init_irq = init_irq, - .init_machine = stamp9g20_board_init, + .init_machine = stamp9g20evb_board_init, MACHINE_END diff --git a/arch/arm/mach-at91/clock.c b/arch/arm/mach-at91/clock.c index 7525cee3983..9113da6845f 100644 --- a/arch/arm/mach-at91/clock.c +++ b/arch/arm/mach-at91/clock.c @@ -658,7 +658,7 @@ static void __init at91_upll_usbfs_clock_init(unsigned long main_clock) /* Now set uhpck values */ uhpck.parent = &utmi_clk; uhpck.pmc_mask = AT91SAM926x_PMC_UHP; - uhpck.rate_hz = utmi_clk.parent->rate_hz; + uhpck.rate_hz = utmi_clk.rate_hz; uhpck.rate_hz /= 1 + ((at91_sys_read(AT91_PMC_USB) & AT91_PMC_OHCIUSBDIV) >> 8); } diff --git a/arch/arm/mach-at91/include/mach/at91_mci.h b/arch/arm/mach-at91/include/mach/at91_mci.h index 57f8ee15494..27ac6f550fe 100644 --- a/arch/arm/mach-at91/include/mach/at91_mci.h +++ b/arch/arm/mach-at91/include/mach/at91_mci.h @@ -74,6 +74,8 @@ #define AT91_MCI_TRTYP_BLOCK (0 << 19) #define AT91_MCI_TRTYP_MULTIPLE (1 << 19) #define AT91_MCI_TRTYP_STREAM (2 << 19) +#define AT91_MCI_TRTYP_SDIO_BYTE (4 << 19) +#define AT91_MCI_TRTYP_SDIO_BLOCK (5 << 19) #define AT91_MCI_BLKR 0x18 /* Block Register */ #define AT91_MCI_BLKR_BCNT(n) ((0xffff & (n)) << 0) /* Block count */ diff --git a/arch/arm/mach-at91/include/mach/stamp9g20.h b/arch/arm/mach-at91/include/mach/stamp9g20.h new file mode 100644 index 00000000000..6120f9c46d5 --- /dev/null +++ b/arch/arm/mach-at91/include/mach/stamp9g20.h @@ -0,0 +1,7 @@ +#ifndef __MACH_STAMP9G20_H +#define __MACH_STAMP9G20_H + +void stamp9g20_map_io(void); +void stamp9g20_board_init(void); + +#endif diff --git a/arch/arm/mach-bcmring/core.c b/arch/arm/mach-bcmring/core.c index ed96ef40047..8fc2035759f 100644 --- a/arch/arm/mach-bcmring/core.c +++ b/arch/arm/mach-bcmring/core.c @@ -294,7 +294,6 @@ static struct clocksource clocksource_bcmring_timer1 = { .rating = 200, .read = bcmring_get_cycles_timer1, .mask = CLOCKSOURCE_MASK(32), - .shift = 20, .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; @@ -303,7 +302,6 @@ static struct clocksource clocksource_bcmring_timer3 = { .rating = 100, .read = bcmring_get_cycles_timer3, .mask = CLOCKSOURCE_MASK(32), - .shift = 20, .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; @@ -316,10 +314,8 @@ static int __init bcmring_clocksource_init(void) writel(TIMER_CTRL_32BIT | TIMER_CTRL_ENABLE | TIMER_CTRL_PERIODIC, TIMER1_VA_BASE + TIMER_CTRL); - clocksource_bcmring_timer1.mult = - clocksource_khz2mult(TIMER1_FREQUENCY_MHZ * 1000, - clocksource_bcmring_timer1.shift); - clocksource_register(&clocksource_bcmring_timer1); + clocksource_register_khz(&clocksource_bcmring_timer1, + TIMER1_FREQUENCY_MHZ * 1000); /* setup timer3 as free-running clocksource */ writel(0, TIMER3_VA_BASE + TIMER_CTRL); @@ -328,10 +324,8 @@ static int __init bcmring_clocksource_init(void) writel(TIMER_CTRL_32BIT | TIMER_CTRL_ENABLE | TIMER_CTRL_PERIODIC, TIMER3_VA_BASE + TIMER_CTRL); - clocksource_bcmring_timer3.mult = - clocksource_khz2mult(TIMER3_FREQUENCY_KHZ, - clocksource_bcmring_timer3.shift); - clocksource_register(&clocksource_bcmring_timer3); + clocksource_register_khz(&clocksource_bcmring_timer3, + TIMER3_FREQUENCY_KHZ); return 0; } diff --git a/arch/arm/mach-cns3xxx/core.c b/arch/arm/mach-cns3xxx/core.c index 9ca4d581016..da30078a80c 100644 --- a/arch/arm/mach-cns3xxx/core.c +++ b/arch/arm/mach-cns3xxx/core.c @@ -69,13 +69,10 @@ void __init cns3xxx_map_io(void) } /* used by entry-macro.S */ -void __iomem *gic_cpu_base_addr; - void __init cns3xxx_init_irq(void) { - gic_cpu_base_addr = __io(CNS3XXX_TC11MP_GIC_CPU_BASE_VIRT); - gic_dist_init(0, __io(CNS3XXX_TC11MP_GIC_DIST_BASE_VIRT), 29); - gic_cpu_init(0, gic_cpu_base_addr); + gic_init(0, 29, __io(CNS3XXX_TC11MP_GIC_DIST_BASE_VIRT), + __io(CNS3XXX_TC11MP_GIC_CPU_BASE_VIRT)); } void cns3xxx_power_off(void) diff --git a/arch/arm/mach-cns3xxx/core.h b/arch/arm/mach-cns3xxx/core.h index 6b33ec11346..ef9e5116b1a 100644 --- a/arch/arm/mach-cns3xxx/core.h +++ b/arch/arm/mach-cns3xxx/core.h @@ -11,7 +11,6 @@ #ifndef __CNS3XXX_CORE_H #define __CNS3XXX_CORE_H -extern void __iomem *gic_cpu_base_addr; extern struct sys_timer cns3xxx_timer; void __init cns3xxx_map_io(void); diff --git a/arch/arm/mach-cns3xxx/include/mach/entry-macro.S b/arch/arm/mach-cns3xxx/include/mach/entry-macro.S index 5e1c5545680..6bd83ed90af 100644 --- a/arch/arm/mach-cns3xxx/include/mach/entry-macro.S +++ b/arch/arm/mach-cns3xxx/include/mach/entry-macro.S @@ -9,74 +9,10 @@ */ #include <mach/hardware.h> -#include <asm/hardware/gic.h> +#include <asm/hardware/entry-macro-gic.S> .macro disable_fiq .endm - .macro get_irqnr_preamble, base, tmp - ldr \base, =gic_cpu_base_addr - ldr \base, [\base] - .endm - .macro arch_ret_to_user, tmp1, tmp2 .endm - - /* - * The interrupt numbering scheme is defined in the - * interrupt controller spec. To wit: - * - * Interrupts 0-15 are IPI - * 16-28 are reserved - * 29-31 are local. We allow 30 to be used for the watchdog. - * 32-1020 are global - * 1021-1022 are reserved - * 1023 is "spurious" (no interrupt) - * - * For now, we ignore all local interrupts so only return an interrupt if it's - * between 30 and 1020. The test_for_ipi routine below will pick up on IPIs. - * - * A simple read from the controller will tell us the number of the highest - * priority enabled interrupt. We then just need to check whether it is in the - * valid range for an IRQ (30-1020 inclusive). - */ - - .macro get_irqnr_and_base, irqnr, irqstat, base, tmp - - ldr \irqstat, [\base, #GIC_CPU_INTACK] /* bits 12-10 = src CPU, 9-0 = int # */ - - ldr \tmp, =1021 - - bic \irqnr, \irqstat, #0x1c00 - - cmp \irqnr, #29 - cmpcc \irqnr, \irqnr - cmpne \irqnr, \tmp - cmpcs \irqnr, \irqnr - - .endm - - /* We assume that irqstat (the raw value of the IRQ acknowledge - * register) is preserved from the macro above. - * If there is an IPI, we immediately signal end of interrupt on the - * controller, since this requires the original irqstat value which - * we won't easily be able to recreate later. - */ - - .macro test_for_ipi, irqnr, irqstat, base, tmp - bic \irqnr, \irqstat, #0x1c00 - cmp \irqnr, #16 - strcc \irqstat, [\base, #GIC_CPU_EOI] - cmpcs \irqnr, \irqnr - .endm - - /* As above, this assumes that irqstat and base are preserved.. */ - - .macro test_for_ltirq, irqnr, irqstat, base, tmp - bic \irqnr, \irqstat, #0x1c00 - mov \tmp, #0 - cmp \irqnr, #29 - moveq \tmp, #1 - streq \irqstat, [\base, #GIC_CPU_EOI] - cmp \tmp, #0 - .endm diff --git a/arch/arm/mach-cns3xxx/pcie.c b/arch/arm/mach-cns3xxx/pcie.c index 38088c36936..78defd71a82 100644 --- a/arch/arm/mach-cns3xxx/pcie.c +++ b/arch/arm/mach-cns3xxx/pcie.c @@ -369,7 +369,7 @@ static int __init cns3xxx_pcie_init(void) { int i; - hook_fault_code(16 + 6, cns3xxx_pcie_abort_handler, SIGBUS, + hook_fault_code(16 + 6, cns3xxx_pcie_abort_handler, SIGBUS, 0, "imprecise external abort"); for (i = 0; i < ARRAY_SIZE(cns3xxx_pcie); i++) { diff --git a/arch/arm/mach-davinci/include/mach/io.h b/arch/arm/mach-davinci/include/mach/io.h index 62b0a90309a..d1b954955c1 100644 --- a/arch/arm/mach-davinci/include/mach/io.h +++ b/arch/arm/mach-davinci/include/mach/io.h @@ -22,8 +22,8 @@ #define __mem_isa(a) (a) #ifndef __ASSEMBLER__ -#define __arch_ioremap(p, s, t) davinci_ioremap(p, s, t) -#define __arch_iounmap(v) davinci_iounmap(v) +#define __arch_ioremap davinci_ioremap +#define __arch_iounmap davinci_iounmap void __iomem *davinci_ioremap(unsigned long phys, size_t size, unsigned int type); diff --git a/arch/arm/mach-davinci/time.c b/arch/arm/mach-davinci/time.c index 0f21c36e65d..c1486716de7 100644 --- a/arch/arm/mach-davinci/time.c +++ b/arch/arm/mach-davinci/time.c @@ -276,7 +276,6 @@ static struct clocksource clocksource_davinci = { .rating = 300, .read = read_cycles, .mask = CLOCKSOURCE_MASK(32), - .shift = 24, .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; @@ -378,10 +377,8 @@ static void __init davinci_timer_init(void) /* setup clocksource */ clocksource_davinci.name = id_to_name[clocksource_id]; - clocksource_davinci.mult = - clocksource_khz2mult(davinci_clock_tick_rate/1000, - clocksource_davinci.shift); - if (clocksource_register(&clocksource_davinci)) + if (clocksource_register_hz(&clocksource_davinci, + davinci_clock_tick_rate)) printk(err, clocksource_davinci.name); /* setup clockevent */ diff --git a/arch/arm/mach-integrator/integrator_ap.c b/arch/arm/mach-integrator/integrator_ap.c index 548208f1117..2774df8021d 100644 --- a/arch/arm/mach-integrator/integrator_ap.c +++ b/arch/arm/mach-integrator/integrator_ap.c @@ -372,7 +372,6 @@ static struct clocksource clocksource_timersp = { .rating = 200, .read = timersp_read, .mask = CLOCKSOURCE_MASK(16), - .shift = 16, .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; @@ -390,8 +389,7 @@ static void integrator_clocksource_init(u32 khz) writel(ctrl, base + TIMER_CTRL); writel(0xffff, base + TIMER_LOAD); - cs->mult = clocksource_khz2mult(khz, cs->shift); - clocksource_register(cs); + clocksource_register_khz(cs, khz); } static void __iomem * const clkevt_base = (void __iomem *)TIMER1_VA_BASE; diff --git a/arch/arm/mach-integrator/integrator_cp.c b/arch/arm/mach-integrator/integrator_cp.c index 9403d2fa13a..85e48a5f77b 100644 --- a/arch/arm/mach-integrator/integrator_cp.c +++ b/arch/arm/mach-integrator/integrator_cp.c @@ -40,7 +40,7 @@ #include <asm/mach/map.h> #include <asm/mach/time.h> -#include <plat/timer-sp.h> +#include <asm/hardware/timer-sp.h> #include "common.h" diff --git a/arch/arm/mach-iop13xx/include/mach/io.h b/arch/arm/mach-iop13xx/include/mach/io.h index a6e0f9e6ddc..dffb234bb96 100644 --- a/arch/arm/mach-iop13xx/include/mach/io.h +++ b/arch/arm/mach-iop13xx/include/mach/io.h @@ -35,7 +35,7 @@ extern u32 iop13xx_atux_mem_base; extern size_t iop13xx_atue_mem_size; extern size_t iop13xx_atux_mem_size; -#define __arch_ioremap(a, s, f) __iop13xx_ioremap(a, s, f) -#define __arch_iounmap(a) __iop13xx_iounmap(a) +#define __arch_ioremap __iop13xx_ioremap +#define __arch_iounmap __iop13xx_iounmap #endif diff --git a/arch/arm/mach-iop32x/include/mach/io.h b/arch/arm/mach-iop32x/include/mach/io.h index 339e5854728..059c783ce0b 100644 --- a/arch/arm/mach-iop32x/include/mach/io.h +++ b/arch/arm/mach-iop32x/include/mach/io.h @@ -21,7 +21,7 @@ extern void __iop3xx_iounmap(void __iomem *addr); #define __io(p) ((void __iomem *)IOP3XX_PCI_IO_PHYS_TO_VIRT(p)) #define __mem_pci(a) (a) -#define __arch_ioremap(a, s, f) __iop3xx_ioremap(a, s, f) -#define __arch_iounmap(a) __iop3xx_iounmap(a) +#define __arch_ioremap __iop3xx_ioremap +#define __arch_iounmap __iop3xx_iounmap #endif diff --git a/arch/arm/mach-iop33x/include/mach/io.h b/arch/arm/mach-iop33x/include/mach/io.h index e99a7ed6d05..39e893e97c2 100644 --- a/arch/arm/mach-iop33x/include/mach/io.h +++ b/arch/arm/mach-iop33x/include/mach/io.h @@ -21,7 +21,7 @@ extern void __iop3xx_iounmap(void __iomem *addr); #define __io(p) ((void __iomem *)IOP3XX_PCI_IO_PHYS_TO_VIRT(p)) #define __mem_pci(a) (a) -#define __arch_ioremap(a, s, f) __iop3xx_ioremap(a, s, f) -#define __arch_iounmap(a) __iop3xx_iounmap(a) +#define __arch_ioremap __iop3xx_ioremap +#define __arch_iounmap __iop3xx_iounmap #endif diff --git a/arch/arm/mach-ixp23xx/include/mach/io.h b/arch/arm/mach-ixp23xx/include/mach/io.h index fd9ef8e519f..a1749d0fd89 100644 --- a/arch/arm/mach-ixp23xx/include/mach/io.h +++ b/arch/arm/mach-ixp23xx/include/mach/io.h @@ -45,8 +45,8 @@ ixp23xx_iounmap(void __iomem *addr) __iounmap(addr); } -#define __arch_ioremap(a,s,f) ixp23xx_ioremap(a,s,f) -#define __arch_iounmap(a) ixp23xx_iounmap(a) +#define __arch_ioremap ixp23xx_ioremap +#define __arch_iounmap ixp23xx_iounmap #endif diff --git a/arch/arm/mach-ixp4xx/common-pci.c b/arch/arm/mach-ixp4xx/common-pci.c index 24498a932ba..a54b3db8036 100644 --- a/arch/arm/mach-ixp4xx/common-pci.c +++ b/arch/arm/mach-ixp4xx/common-pci.c @@ -513,4 +513,4 @@ int dma_set_coherent_mask(struct device *dev, u64 mask) EXPORT_SYMBOL(ixp4xx_pci_read); EXPORT_SYMBOL(ixp4xx_pci_write); - +EXPORT_SYMBOL(dma_set_coherent_mask); diff --git a/arch/arm/mach-ixp4xx/common.c b/arch/arm/mach-ixp4xx/common.c index 0bce09799d1..4dbfcbb9163 100644 --- a/arch/arm/mach-ixp4xx/common.c +++ b/arch/arm/mach-ixp4xx/common.c @@ -35,6 +35,7 @@ #include <asm/pgtable.h> #include <asm/page.h> #include <asm/irq.h> +#include <asm/sched_clock.h> #include <asm/mach/map.h> #include <asm/mach/irq.h> @@ -399,6 +400,23 @@ void __init ixp4xx_sys_init(void) } /* + * sched_clock() + */ +static DEFINE_CLOCK_DATA(cd); + +unsigned long long notrace sched_clock(void) +{ + u32 cyc = *IXP4XX_OSTS; + return cyc_to_sched_clock(&cd, cyc, (u32)~0); +} + +static void notrace ixp4xx_update_sched_clock(void) +{ + u32 cyc = *IXP4XX_OSTS; + update_sched_clock(&cd, cyc, (u32)~0); +} + +/* * clocksource */ static cycle_t ixp4xx_get_cycles(struct clocksource *cs) @@ -411,7 +429,6 @@ static struct clocksource clocksource_ixp4xx = { .rating = 200, .read = ixp4xx_get_cycles, .mask = CLOCKSOURCE_MASK(32), - .shift = 20, .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; @@ -419,21 +436,9 @@ unsigned long ixp4xx_timer_freq = FREQ; EXPORT_SYMBOL(ixp4xx_timer_freq); static void __init ixp4xx_clocksource_init(void) { - clocksource_ixp4xx.mult = - clocksource_hz2mult(ixp4xx_timer_freq, - clocksource_ixp4xx.shift); - clocksource_register(&clocksource_ixp4xx); -} - -/* - * sched_clock() - */ -unsigned long long sched_clock(void) -{ - cycle_t cyc = ixp4xx_get_cycles(NULL); - struct clocksource *cs = &clocksource_ixp4xx; + init_sched_clock(&cd, ixp4xx_update_sched_clock, 32, ixp4xx_timer_freq); - return clocksource_cyc2ns(cyc, cs->mult, cs->shift); + clocksource_register_hz(&clocksource_ixp4xx, ixp4xx_timer_freq); } /* diff --git a/arch/arm/mach-ixp4xx/include/mach/io.h b/arch/arm/mach-ixp4xx/include/mach/io.h index de274a1f19d..57b5410c31f 100644 --- a/arch/arm/mach-ixp4xx/include/mach/io.h +++ b/arch/arm/mach-ixp4xx/include/mach/io.h @@ -74,8 +74,8 @@ static inline void __indirect_iounmap(void __iomem *addr) __iounmap(addr); } -#define __arch_ioremap(a, s, f) __indirect_ioremap(a, s, f) -#define __arch_iounmap(a) __indirect_iounmap(a) +#define __arch_ioremap __indirect_ioremap +#define __arch_iounmap __indirect_iounmap #define writeb(v, p) __indirect_writeb(v, p) #define writew(v, p) __indirect_writew(v, p) diff --git a/arch/arm/mach-kirkwood/include/mach/io.h b/arch/arm/mach-kirkwood/include/mach/io.h index 44e8be04f25..1aaddc364f2 100644 --- a/arch/arm/mach-kirkwood/include/mach/io.h +++ b/arch/arm/mach-kirkwood/include/mach/io.h @@ -42,8 +42,8 @@ __arch_iounmap(void __iomem *addr) __iounmap(addr); } -#define __arch_ioremap(p, s, m) __arch_ioremap(p, s, m) -#define __arch_iounmap(a) __arch_iounmap(a) +#define __arch_ioremap __arch_ioremap +#define __arch_iounmap __arch_iounmap #define __io(a) __io(a) #define __mem_pci(a) (a) diff --git a/arch/arm/mach-lpc32xx/timer.c b/arch/arm/mach-lpc32xx/timer.c index 630dd4a74b2..6162ac308c2 100644 --- a/arch/arm/mach-lpc32xx/timer.c +++ b/arch/arm/mach-lpc32xx/timer.c @@ -38,7 +38,6 @@ static cycle_t lpc32xx_clksrc_read(struct clocksource *cs) static struct clocksource lpc32xx_clksrc = { .name = "lpc32xx_clksrc", - .shift = 24, .rating = 300, .read = lpc32xx_clksrc_read, .mask = CLOCKSOURCE_MASK(32), @@ -171,9 +170,7 @@ static void __init lpc32xx_timer_init(void) __raw_writel(0, LCP32XX_TIMER_MCR(LPC32XX_TIMER1_BASE)); __raw_writel(LCP32XX_TIMER_CNTR_TCR_EN, LCP32XX_TIMER_TCR(LPC32XX_TIMER1_BASE)); - lpc32xx_clksrc.mult = clocksource_hz2mult(clkrate, - lpc32xx_clksrc.shift); - clocksource_register(&lpc32xx_clksrc); + clocksource_register_hz(&lpc32xx_clksrc, clkrate); } struct sys_timer lpc32xx_timer = { diff --git a/arch/arm/mach-mmp/mmp2.c b/arch/arm/mach-mmp/mmp2.c index daf3993349f..2e3dd08ccc3 100644 --- a/arch/arm/mach-mmp/mmp2.c +++ b/arch/arm/mach-mmp/mmp2.c @@ -126,7 +126,6 @@ static APBC_CLK(twsi3, MMP2_TWSI3, 0, 26000000); static APBC_CLK(twsi4, MMP2_TWSI4, 0, 26000000); static APBC_CLK(twsi5, MMP2_TWSI5, 0, 26000000); static APBC_CLK(twsi6, MMP2_TWSI6, 0, 26000000); -static APBC_CLK(rtc, MMP2_RTC, 0, 32768); static APMU_CLK(nand, NAND, 0xbf, 100000000); diff --git a/arch/arm/mach-mmp/time.c b/arch/arm/mach-mmp/time.c index 66528193f93..aeb9ae23e6c 100644 --- a/arch/arm/mach-mmp/time.c +++ b/arch/arm/mach-mmp/time.c @@ -26,8 +26,8 @@ #include <linux/io.h> #include <linux/irq.h> #include <linux/sched.h> -#include <linux/cnt32_to_63.h> +#include <asm/sched_clock.h> #include <mach/addr-map.h> #include <mach/regs-timers.h> #include <mach/regs-apbc.h> @@ -42,23 +42,7 @@ #define MAX_DELTA (0xfffffffe) #define MIN_DELTA (16) -#define TCR2NS_SCALE_FACTOR 10 - -static unsigned long tcr2ns_scale; - -static void __init set_tcr2ns_scale(unsigned long tcr_rate) -{ - unsigned long long v = 1000000000ULL << TCR2NS_SCALE_FACTOR; - do_div(v, tcr_rate); - tcr2ns_scale = v; - /* - * We want an even value to automatically clear the top bit - * returned by cnt32_to_63() without an additional run time - * instruction. So if the LSB is 1 then round it up. - */ - if (tcr2ns_scale & 1) - tcr2ns_scale++; -} +static DEFINE_CLOCK_DATA(cd); /* * FIXME: the timer needs some delay to stablize the counter capture @@ -75,10 +59,16 @@ static inline uint32_t timer_read(void) return __raw_readl(TIMERS_VIRT_BASE + TMR_CVWR(0)); } -unsigned long long sched_clock(void) +unsigned long long notrace sched_clock(void) { - unsigned long long v = cnt32_to_63(timer_read()); - return (v * tcr2ns_scale) >> TCR2NS_SCALE_FACTOR; + u32 cyc = timer_read(); + return cyc_to_sched_clock(&cd, cyc, (u32)~0); +} + +static void notrace mmp_update_sched_clock(void) +{ + u32 cyc = timer_read(); + update_sched_clock(&cd, cyc, (u32)~0); } static irqreturn_t timer_interrupt(int irq, void *dev_id) @@ -146,7 +136,6 @@ static cycle_t clksrc_read(struct clocksource *cs) static struct clocksource cksrc = { .name = "clocksource", - .shift = 20, .rating = 200, .read = clksrc_read, .mask = CLOCKSOURCE_MASK(32), @@ -186,17 +175,15 @@ void __init timer_init(int irq) { timer_config(); - set_tcr2ns_scale(CLOCK_TICK_RATE); + init_sched_clock(&cd, mmp_update_sched_clock, 32, CLOCK_TICK_RATE); ckevt.mult = div_sc(CLOCK_TICK_RATE, NSEC_PER_SEC, ckevt.shift); ckevt.max_delta_ns = clockevent_delta2ns(MAX_DELTA, &ckevt); ckevt.min_delta_ns = clockevent_delta2ns(MIN_DELTA, &ckevt); ckevt.cpumask = cpumask_of(0); - cksrc.mult = clocksource_hz2mult(CLOCK_TICK_RATE, cksrc.shift); - setup_irq(irq, &timer_irq); - clocksource_register(&cksrc); + clocksource_register_hz(&cksrc, CLOCK_TICK_RATE); clockevents_register_device(&ckevt); } diff --git a/arch/arm/mach-msm/Kconfig b/arch/arm/mach-msm/Kconfig index dbbcfeb919d..31e5fd63ec9 100644 --- a/arch/arm/mach-msm/Kconfig +++ b/arch/arm/mach-msm/Kconfig @@ -49,6 +49,8 @@ endchoice config MSM_SOC_REV_A bool +config ARCH_MSM_SCORPIONMP + bool config ARCH_MSM_ARM11 bool diff --git a/arch/arm/mach-msm/board-msm8x60.c b/arch/arm/mach-msm/board-msm8x60.c index 7486a681cc7..9b5eb2b4ae1 100644 --- a/arch/arm/mach-msm/board-msm8x60.c +++ b/arch/arm/mach-msm/board-msm8x60.c @@ -28,8 +28,6 @@ #include <mach/board.h> #include <mach/msm_iomap.h> -void __iomem *gic_cpu_base_addr; - unsigned long clk_get_max_axi_khz(void) { return 0; @@ -44,9 +42,8 @@ static void __init msm8x60_init_irq(void) { unsigned int i; - gic_dist_init(0, MSM_QGIC_DIST_BASE, GIC_PPI_START); - gic_cpu_base_addr = (void *)MSM_QGIC_CPU_BASE; - gic_cpu_init(0, MSM_QGIC_CPU_BASE); + gic_init(0, GIC_PPI_START, MSM_QGIC_DIST_BASE, + (void *)MSM_QGIC_CPU_BASE); /* Edge trigger PPIs except AVS_SVICINT and AVS_SVICINTSWDONE */ writel(0xFFFFD7FF, MSM_QGIC_DIST_BASE + GIC_DIST_CONFIG + 4); diff --git a/arch/arm/mach-msm/timer.c b/arch/arm/mach-msm/timer.c index 950100f19d0..595be7fea31 100644 --- a/arch/arm/mach-msm/timer.c +++ b/arch/arm/mach-msm/timer.c @@ -137,7 +137,6 @@ static struct msm_clock msm_clocks[] = { .rating = 200, .read = msm_gpt_read, .mask = CLOCKSOURCE_MASK(32), - .shift = 17, .flags = CLOCK_SOURCE_IS_CONTINUOUS, }, .irq = { @@ -164,7 +163,6 @@ static struct msm_clock msm_clocks[] = { .rating = 300, .read = msm_dgt_read, .mask = CLOCKSOURCE_MASK((32 - MSM_DGT_SHIFT)), - .shift = 24 - MSM_DGT_SHIFT, .flags = CLOCK_SOURCE_IS_CONTINUOUS, }, .irq = { @@ -205,8 +203,7 @@ static void __init msm_timer_init(void) ce->min_delta_ns = clockevent_delta2ns(4, ce); ce->cpumask = cpumask_of(0); - cs->mult = clocksource_hz2mult(clock->freq, cs->shift); - res = clocksource_register(cs); + res = clocksource_register_hz(cs, clock->freq); if (res) printk(KERN_ERR "msm_timer_init: clocksource_register " "failed for %s\n", cs->name); diff --git a/arch/arm/mach-netx/time.c b/arch/arm/mach-netx/time.c index 82801dbf057..f12f22d09b6 100644 --- a/arch/arm/mach-netx/time.c +++ b/arch/arm/mach-netx/time.c @@ -114,7 +114,6 @@ static struct clocksource clocksource_netx = { .rating = 200, .read = netx_get_cycles, .mask = CLOCKSOURCE_MASK(32), - .shift = 20, .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; @@ -151,9 +150,7 @@ static void __init netx_timer_init(void) writel(NETX_GPIO_COUNTER_CTRL_RUN, NETX_GPIO_COUNTER_CTRL(TIMER_CLOCKSOURCE)); - clocksource_netx.mult = - clocksource_hz2mult(CLOCK_TICK_RATE, clocksource_netx.shift); - clocksource_register(&clocksource_netx); + clocksource_register_hz(&clocksource_netx, CLOCK_TICK_RATE); netx_clockevent.mult = div_sc(CLOCK_TICK_RATE, NSEC_PER_SEC, netx_clockevent.shift); diff --git a/arch/arm/mach-ns9xxx/time-ns9360.c b/arch/arm/mach-ns9xxx/time-ns9360.c index 77281260358..9ca32f55728 100644 --- a/arch/arm/mach-ns9xxx/time-ns9360.c +++ b/arch/arm/mach-ns9xxx/time-ns9360.c @@ -35,7 +35,6 @@ static struct clocksource ns9360_clocksource = { .rating = 300, .read = ns9360_clocksource_read, .mask = CLOCKSOURCE_MASK(32), - .shift = 20, .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; @@ -148,10 +147,7 @@ static void __init ns9360_timer_init(void) __raw_writel(tc, SYS_TC(TIMER_CLOCKSOURCE)); - ns9360_clocksource.mult = clocksource_hz2mult(ns9360_cpuclock(), - ns9360_clocksource.shift); - - clocksource_register(&ns9360_clocksource); + clocksource_register_hz(&ns9360_clocksource, ns9360_cpuclock()); latch = SH_DIV(ns9360_cpuclock(), HZ, 0); diff --git a/arch/arm/mach-omap1/time.c b/arch/arm/mach-omap1/time.c index 1be6a214d88..abb34ff2041 100644 --- a/arch/arm/mach-omap1/time.c +++ b/arch/arm/mach-omap1/time.c @@ -208,7 +208,6 @@ static struct clocksource clocksource_mpu = { .rating = 300, .read = mpu_read, .mask = CLOCKSOURCE_MASK(32), - .shift = 24, .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; @@ -217,13 +216,10 @@ static void __init omap_init_clocksource(unsigned long rate) static char err[] __initdata = KERN_ERR "%s: can't register clocksource!\n"; - clocksource_mpu.mult - = clocksource_khz2mult(rate/1000, clocksource_mpu.shift); - setup_irq(INT_TIMER2, &omap_mpu_timer2_irq); omap_mpu_timer_start(1, ~0, 1); - if (clocksource_register(&clocksource_mpu)) + if (clocksource_register_hz(&clocksource_mpu, rate)) printk(err, clocksource_mpu.name); } diff --git a/arch/arm/mach-omap2/board-zoom-peripherals.c b/arch/arm/mach-omap2/board-zoom-peripherals.c index 86c9b210295..9db9203667d 100644 --- a/arch/arm/mach-omap2/board-zoom-peripherals.c +++ b/arch/arm/mach-omap2/board-zoom-peripherals.c @@ -216,7 +216,7 @@ static struct omap2_hsmmc_info mmc[] __initdata = { { .name = "wl1271", .mmc = 3, - .caps = MMC_CAP_4_BIT_DATA, + .caps = MMC_CAP_4_BIT_DATA | MMC_CAP_POWER_OFF_CARD, .gpio_wp = -EINVAL, .gpio_cd = -EINVAL, .nonremovable = true, diff --git a/arch/arm/mach-omap2/include/mach/entry-macro.S b/arch/arm/mach-omap2/include/mach/entry-macro.S index 06e64e1fc28..d54c4f89a8b 100644 --- a/arch/arm/mach-omap2/include/mach/entry-macro.S +++ b/arch/arm/mach-omap2/include/mach/entry-macro.S @@ -105,6 +105,35 @@ omap_irq_base: .word 0 9999: .endm +#ifdef CONFIG_SMP + /* We assume that irqstat (the raw value of the IRQ acknowledge + * register) is preserved from the macro above. + * If there is an IPI, we immediately signal end of interrupt + * on the controller, since this requires the original irqstat + * value which we won't easily be able to recreate later. + */ + + .macro test_for_ipi, irqnr, irqstat, base, tmp + bic \irqnr, \irqstat, #0x1c00 + cmp \irqnr, #16 + it cc + strcc \irqstat, [\base, #GIC_CPU_EOI] + it cs + cmpcs \irqnr, \irqnr + .endm + + /* As above, this assumes that irqstat and base are preserved */ + + .macro test_for_ltirq, irqnr, irqstat, base, tmp + bic \irqnr, \irqstat, #0x1c00 + mov \tmp, #0 + cmp \irqnr, #29 + itt eq + moveq \tmp, #1 + streq \irqstat, [\base, #GIC_CPU_EOI] + cmp \tmp, #0 + .endm +#endif /* CONFIG_SMP */ #else /* MULTI_OMAP2 */ @@ -141,74 +170,16 @@ omap_irq_base: .word 0 #ifdef CONFIG_ARCH_OMAP4 +#define HAVE_GET_IRQNR_PREAMBLE +#include <asm/hardware/entry-macro-gic.S> .macro get_irqnr_preamble, base, tmp ldr \base, =OMAP4_IRQ_BASE .endm - /* - * The interrupt numbering scheme is defined in the - * interrupt controller spec. To wit: - * - * Interrupts 0-15 are IPI - * 16-28 are reserved - * 29-31 are local. We allow 30 to be used for the watchdog. - * 32-1020 are global - * 1021-1022 are reserved - * 1023 is "spurious" (no interrupt) - * - * For now, we ignore all local interrupts so only return an - * interrupt if it's between 30 and 1020. The test_for_ipi - * routine below will pick up on IPIs. - * A simple read from the controller will tell us the number - * of the highest priority enabled interrupt. - * We then just need to check whether it is in the - * valid range for an IRQ (30-1020 inclusive). - */ - .macro get_irqnr_and_base, irqnr, irqstat, base, tmp - ldr \irqstat, [\base, #GIC_CPU_INTACK] - - ldr \tmp, =1021 - - bic \irqnr, \irqstat, #0x1c00 - - cmp \irqnr, #29 - cmpcc \irqnr, \irqnr - cmpne \irqnr, \tmp - cmpcs \irqnr, \irqnr - .endm #endif -#endif /* MULTI_OMAP2 */ - -#ifdef CONFIG_SMP - /* We assume that irqstat (the raw value of the IRQ acknowledge - * register) is preserved from the macro above. - * If there is an IPI, we immediately signal end of interrupt - * on the controller, since this requires the original irqstat - * value which we won't easily be able to recreate later. - */ - .macro test_for_ipi, irqnr, irqstat, base, tmp - bic \irqnr, \irqstat, #0x1c00 - cmp \irqnr, #16 - it cc - strcc \irqstat, [\base, #GIC_CPU_EOI] - it cs - cmpcs \irqnr, \irqnr - .endm - - /* As above, this assumes that irqstat and base are preserved */ - - .macro test_for_ltirq, irqnr, irqstat, base, tmp - bic \irqnr, \irqstat, #0x1c00 - mov \tmp, #0 - cmp \irqnr, #29 - itt eq - moveq \tmp, #1 - streq \irqstat, [\base, #GIC_CPU_EOI] - cmp \tmp, #0 - .endm -#endif /* CONFIG_SMP */ +#endif /* MULTI_OMAP2 */ .macro irq_prio_table .endm diff --git a/arch/arm/mach-omap2/include/mach/omap4-common.h b/arch/arm/mach-omap2/include/mach/omap4-common.h index 2744dfee1ff..5b0270b2893 100644 --- a/arch/arm/mach-omap2/include/mach/omap4-common.h +++ b/arch/arm/mach-omap2/include/mach/omap4-common.h @@ -24,7 +24,6 @@ extern void __iomem *l2cache_base; #endif -extern void __iomem *gic_cpu_base_addr; extern void __iomem *gic_dist_base_addr; extern void __init gic_init_irq(void); diff --git a/arch/arm/mach-omap2/io.c b/arch/arm/mach-omap2/io.c index 40562ddd3ee..a1939b1e6f8 100644 --- a/arch/arm/mach-omap2/io.c +++ b/arch/arm/mach-omap2/io.c @@ -297,7 +297,7 @@ static int __init _omap2_init_reprogram_sdrc(void) return 0; dpll3_m2_ck = clk_get(NULL, "dpll3_m2_ck"); - if (!dpll3_m2_ck) + if (IS_ERR(dpll3_m2_ck)) return -EINVAL; rate = clk_get_rate(dpll3_m2_ck); diff --git a/arch/arm/mach-omap2/omap-smp.c b/arch/arm/mach-omap2/omap-smp.c index 9fed631ba04..b66cfe8bc46 100644 --- a/arch/arm/mach-omap2/omap-smp.c +++ b/arch/arm/mach-omap2/omap-smp.c @@ -37,7 +37,7 @@ void __cpuinit platform_secondary_init(unsigned int cpu) * core (e.g. timer irq), then they will not have been enabled * for us: do so */ - gic_cpu_init(0, gic_cpu_base_addr); + gic_secondary_init(0); /* * Synchronise with the boot thread. diff --git a/arch/arm/mach-omap2/omap4-common.c b/arch/arm/mach-omap2/omap4-common.c index 2f895553e6a..666e852988d 100644 --- a/arch/arm/mach-omap2/omap4-common.c +++ b/arch/arm/mach-omap2/omap4-common.c @@ -26,21 +26,22 @@ void __iomem *l2cache_base; #endif -void __iomem *gic_cpu_base_addr; void __iomem *gic_dist_base_addr; void __init gic_init_irq(void) { + void __iomem *gic_cpu_base; + /* Static mapping, never released */ gic_dist_base_addr = ioremap(OMAP44XX_GIC_DIST_BASE, SZ_4K); BUG_ON(!gic_dist_base_addr); - gic_dist_init(0, gic_dist_base_addr, 29); /* Static mapping, never released */ - gic_cpu_base_addr = ioremap(OMAP44XX_GIC_CPU_BASE, SZ_512); - BUG_ON(!gic_cpu_base_addr); - gic_cpu_init(0, gic_cpu_base_addr); + gic_cpu_base = ioremap(OMAP44XX_GIC_CPU_BASE, SZ_512); + BUG_ON(!gic_cpu_base); + + gic_init(0, 29, gic_dist_base_addr, gic_cpu_base); } #ifdef CONFIG_CACHE_L2X0 diff --git a/arch/arm/mach-omap2/pm-debug.c b/arch/arm/mach-omap2/pm-debug.c index 5e81517a7af..a8afb610c7d 100644 --- a/arch/arm/mach-omap2/pm-debug.c +++ b/arch/arm/mach-omap2/pm-debug.c @@ -161,6 +161,23 @@ void omap2_pm_dump(int mode, int resume, unsigned int us) printk(KERN_INFO "%-20s: 0x%08x\n", regs[i].name, regs[i].val); } +void omap2_pm_wakeup_on_timer(u32 seconds, u32 milliseconds) +{ + u32 tick_rate, cycles; + + if (!seconds && !milliseconds) + return; + + tick_rate = clk_get_rate(omap_dm_timer_get_fclk(gptimer_wakeup)); + cycles = tick_rate * seconds + tick_rate * milliseconds / 1000; + omap_dm_timer_stop(gptimer_wakeup); + omap_dm_timer_set_load_start(gptimer_wakeup, 0, 0xffffffff - cycles); + + pr_info("PM: Resume timer in %u.%03u secs" + " (%d ticks at %d ticks/sec.)\n", + seconds, milliseconds, cycles, tick_rate); +} + #ifdef CONFIG_DEBUG_FS #include <linux/debugfs.h> #include <linux/seq_file.h> @@ -354,23 +371,6 @@ void pm_dbg_update_time(struct powerdomain *pwrdm, int prev) pwrdm->timer = t; } -void omap2_pm_wakeup_on_timer(u32 seconds, u32 milliseconds) -{ - u32 tick_rate, cycles; - - if (!seconds && !milliseconds) - return; - - tick_rate = clk_get_rate(omap_dm_timer_get_fclk(gptimer_wakeup)); - cycles = tick_rate * seconds + tick_rate * milliseconds / 1000; - omap_dm_timer_stop(gptimer_wakeup); - omap_dm_timer_set_load_start(gptimer_wakeup, 0, 0xffffffff - cycles); - - pr_info("PM: Resume timer in %u.%03u secs" - " (%d ticks at %d ticks/sec.)\n", - seconds, milliseconds, cycles, tick_rate); -} - static int clkdm_dbg_show_counter(struct clockdomain *clkdm, void *user) { struct seq_file *s = (struct seq_file *)user; diff --git a/arch/arm/mach-omap2/pm24xx.c b/arch/arm/mach-omap2/pm24xx.c index c85923e56b8..aaeea49b9bd 100644 --- a/arch/arm/mach-omap2/pm24xx.c +++ b/arch/arm/mach-omap2/pm24xx.c @@ -53,6 +53,19 @@ #include <plat/powerdomain.h> #include <plat/clockdomain.h> +#ifdef CONFIG_SUSPEND +static suspend_state_t suspend_state = PM_SUSPEND_ON; +static inline bool is_suspending(void) +{ + return (suspend_state != PM_SUSPEND_ON); +} +#else +static inline bool is_suspending(void) +{ + return false; +} +#endif + static void (*omap2_sram_idle)(void); static void (*omap2_sram_suspend)(u32 dllctrl, void __iomem *sdrc_dlla_ctrl, void __iomem *sdrc_power); @@ -120,8 +133,9 @@ static void omap2_enter_full_retention(void) goto no_sleep; /* Block console output in case it is on one of the OMAP UARTs */ - if (try_acquire_console_sem()) - goto no_sleep; + if (!is_suspending()) + if (try_acquire_console_sem()) + goto no_sleep; omap_uart_prepare_idle(0); omap_uart_prepare_idle(1); @@ -136,7 +150,8 @@ static void omap2_enter_full_retention(void) omap_uart_resume_idle(1); omap_uart_resume_idle(0); - release_console_sem(); + if (!is_suspending()) + release_console_sem(); no_sleep: if (omap2_pm_debug) { @@ -284,6 +299,12 @@ out: local_irq_enable(); } +static int omap2_pm_begin(suspend_state_t state) +{ + suspend_state = state; + return 0; +} + static int omap2_pm_prepare(void) { /* We cannot sleep in idle until we have resumed */ @@ -333,10 +354,17 @@ static void omap2_pm_finish(void) enable_hlt(); } +static void omap2_pm_end(void) +{ + suspend_state = PM_SUSPEND_ON; +} + static struct platform_suspend_ops omap_pm_ops = { + .begin = omap2_pm_begin, .prepare = omap2_pm_prepare, .enter = omap2_pm_enter, .finish = omap2_pm_finish, + .end = omap2_pm_end, .valid = suspend_valid_only_mem, }; diff --git a/arch/arm/mach-omap2/pm34xx.c b/arch/arm/mach-omap2/pm34xx.c index 0ec8a04b747..648b8c50d02 100644 --- a/arch/arm/mach-omap2/pm34xx.c +++ b/arch/arm/mach-omap2/pm34xx.c @@ -50,6 +50,19 @@ #include "sdrc.h" #include "control.h" +#ifdef CONFIG_SUSPEND +static suspend_state_t suspend_state = PM_SUSPEND_ON; +static inline bool is_suspending(void) +{ + return (suspend_state != PM_SUSPEND_ON); +} +#else +static inline bool is_suspending(void) +{ + return false; +} +#endif + /* Scratchpad offsets */ #define OMAP343X_TABLE_ADDRESS_OFFSET 0xc4 #define OMAP343X_TABLE_VALUE_OFFSET 0xc0 @@ -387,10 +400,11 @@ void omap_sram_idle(void) } /* Block console output in case it is on one of the OMAP UARTs */ - if (per_next_state < PWRDM_POWER_ON || - core_next_state < PWRDM_POWER_ON) - if (try_acquire_console_sem()) - goto console_still_active; + if (!is_suspending()) + if (per_next_state < PWRDM_POWER_ON || + core_next_state < PWRDM_POWER_ON) + if (try_acquire_console_sem()) + goto console_still_active; /* PER */ if (per_next_state < PWRDM_POWER_ON) { @@ -470,7 +484,8 @@ void omap_sram_idle(void) omap_uart_resume_idle(3); } - release_console_sem(); + if (!is_suspending()) + release_console_sem(); console_still_active: /* Disable IO-PAD and IO-CHAIN wakeup */ @@ -514,8 +529,6 @@ out: } #ifdef CONFIG_SUSPEND -static suspend_state_t suspend_state; - static int omap3_pm_prepare(void) { disable_hlt(); diff --git a/arch/arm/mach-omap2/prcm-common.h b/arch/arm/mach-omap2/prcm-common.h index 298a22a754e..f81acee4738 100644 --- a/arch/arm/mach-omap2/prcm-common.h +++ b/arch/arm/mach-omap2/prcm-common.h @@ -243,13 +243,14 @@ #define OMAP24XX_EN_GPT1_MASK (1 << 0) /* PM_WKST_WKUP, CM_IDLEST_WKUP shared bits */ -#define OMAP24XX_ST_GPIOS_SHIFT (1 << 2) -#define OMAP24XX_ST_GPIOS_MASK 2 -#define OMAP24XX_ST_GPT1_SHIFT (1 << 0) -#define OMAP24XX_ST_GPT1_MASK 0 +#define OMAP24XX_ST_GPIOS_SHIFT 2 +#define OMAP24XX_ST_GPIOS_MASK (1 << 2) +#define OMAP24XX_ST_GPT1_SHIFT 0 +#define OMAP24XX_ST_GPT1_MASK (1 << 0) /* CM_IDLEST_MDM and PM_WKST_MDM shared bits */ -#define OMAP2430_ST_MDM_SHIFT (1 << 0) +#define OMAP2430_ST_MDM_SHIFT 0 +#define OMAP2430_ST_MDM_MASK (1 << 0) /* 3430 register bits shared between CM & PRM registers */ diff --git a/arch/arm/mach-omap2/timer-gp.c b/arch/arm/mach-omap2/timer-gp.c index e13c29eecf2..a7816dbdc6b 100644 --- a/arch/arm/mach-omap2/timer-gp.c +++ b/arch/arm/mach-omap2/timer-gp.c @@ -195,7 +195,6 @@ static struct clocksource clocksource_gpt = { .rating = 300, .read = clocksource_read_cycles, .mask = CLOCKSOURCE_MASK(32), - .shift = 24, .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; @@ -220,9 +219,7 @@ static void __init omap2_gp_clocksource_init(void) omap_dm_timer_set_load_start(gpt, 1, 0); - clocksource_gpt.mult = - clocksource_khz2mult(tick_rate/1000, clocksource_gpt.shift); - if (clocksource_register(&clocksource_gpt)) + if (clocksource_register_hz(&clocksource_gpt, tick_rate)) printk(err2, clocksource_gpt.name); } #endif diff --git a/arch/arm/mach-orion5x/include/mach/io.h b/arch/arm/mach-orion5x/include/mach/io.h index c47b033bd99..c5196101a23 100644 --- a/arch/arm/mach-orion5x/include/mach/io.h +++ b/arch/arm/mach-orion5x/include/mach/io.h @@ -38,8 +38,8 @@ __arch_iounmap(void __iomem *addr) __iounmap(addr); } -#define __arch_ioremap(p, s, m) __arch_ioremap(p, s, m) -#define __arch_iounmap(a) __arch_iounmap(a) +#define __arch_ioremap __arch_ioremap +#define __arch_iounmap __arch_iounmap #define __io(a) __typesafe_io(a) #define __mem_pci(a) (a) diff --git a/arch/arm/mach-pxa/Kconfig b/arch/arm/mach-pxa/Kconfig index 716a2e159c2..c98d81ff250 100644 --- a/arch/arm/mach-pxa/Kconfig +++ b/arch/arm/mach-pxa/Kconfig @@ -541,6 +541,7 @@ config MACH_ICONTROL config ARCH_PXA_ESERIES bool "PXA based Toshiba e-series PDAs" select PXA25x + select FB_W100 config MACH_E330 bool "Toshiba e330" diff --git a/arch/arm/mach-pxa/palmtx.c b/arch/arm/mach-pxa/palmtx.c index d2060a1d1d6..e5c9932b758 100644 --- a/arch/arm/mach-pxa/palmtx.c +++ b/arch/arm/mach-pxa/palmtx.c @@ -241,7 +241,8 @@ static inline void palmtx_keys_init(void) {} /****************************************************************************** * NAND Flash ******************************************************************************/ -#if defined(CONFIG_MTD_NAND_GPIO) || defined(CONFIG_MTD_NAND_GPIO_MODULE) +#if defined(CONFIG_MTD_NAND_PLATFORM) || \ + defined(CONFIG_MTD_NAND_PLATFORM_MODULE) static void palmtx_nand_cmd_ctl(struct mtd_info *mtd, int cmd, unsigned int ctrl) { diff --git a/arch/arm/mach-pxa/sleep.S b/arch/arm/mach-pxa/sleep.S index 52c30b01a67..ae008110db4 100644 --- a/arch/arm/mach-pxa/sleep.S +++ b/arch/arm/mach-pxa/sleep.S @@ -353,8 +353,8 @@ resume_turn_on_mmu: @ Let us ensure we jump to resume_after_mmu only when the mcr above @ actually took effect. They call it the "cpwait" operation. - mrc p15, 0, r1, c2, c0, 0 @ queue a dependency on CP15 - sub pc, r2, r1, lsr #32 @ jump to virtual addr + mrc p15, 0, r0, c2, c0, 0 @ queue a dependency on CP15 + sub pc, r2, r0, lsr #32 @ jump to virtual addr nop nop nop diff --git a/arch/arm/mach-pxa/time.c b/arch/arm/mach-pxa/time.c index 293e40aeaf2..e7f64d9b4f2 100644 --- a/arch/arm/mach-pxa/time.c +++ b/arch/arm/mach-pxa/time.c @@ -17,11 +17,11 @@ #include <linux/interrupt.h> #include <linux/clockchips.h> #include <linux/sched.h> -#include <linux/cnt32_to_63.h> #include <asm/div64.h> #include <asm/mach/irq.h> #include <asm/mach/time.h> +#include <asm/sched_clock.h> #include <mach/regs-ost.h> /* @@ -32,29 +32,18 @@ * long as there is always less than 582 seconds between successive * calls to sched_clock() which should always be the case in practice. */ +static DEFINE_CLOCK_DATA(cd); -#define OSCR2NS_SCALE_FACTOR 10 - -static unsigned long oscr2ns_scale; - -static void __init set_oscr2ns_scale(unsigned long oscr_rate) +unsigned long long notrace sched_clock(void) { - unsigned long long v = 1000000000ULL << OSCR2NS_SCALE_FACTOR; - do_div(v, oscr_rate); - oscr2ns_scale = v; - /* - * We want an even value to automatically clear the top bit - * returned by cnt32_to_63() without an additional run time - * instruction. So if the LSB is 1 then round it up. - */ - if (oscr2ns_scale & 1) - oscr2ns_scale++; + u32 cyc = OSCR; + return cyc_to_sched_clock(&cd, cyc, (u32)~0); } -unsigned long long sched_clock(void) +static void notrace pxa_update_sched_clock(void) { - unsigned long long v = cnt32_to_63(OSCR); - return (v * oscr2ns_scale) >> OSCR2NS_SCALE_FACTOR; + u32 cyc = OSCR; + update_sched_clock(&cd, cyc, (u32)~0); } @@ -127,7 +116,6 @@ static struct clocksource cksrc_pxa_oscr0 = { .rating = 200, .read = pxa_read_oscr, .mask = CLOCKSOURCE_MASK(32), - .shift = 20, .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; @@ -145,7 +133,7 @@ static void __init pxa_timer_init(void) OIER = 0; OSSR = OSSR_M0 | OSSR_M1 | OSSR_M2 | OSSR_M3; - set_oscr2ns_scale(clock_tick_rate); + init_sched_clock(&cd, pxa_update_sched_clock, 32, clock_tick_rate); ckevt_pxa_osmr0.mult = div_sc(clock_tick_rate, NSEC_PER_SEC, ckevt_pxa_osmr0.shift); @@ -155,12 +143,9 @@ static void __init pxa_timer_init(void) clockevent_delta2ns(MIN_OSCR_DELTA * 2, &ckevt_pxa_osmr0) + 1; ckevt_pxa_osmr0.cpumask = cpumask_of(0); - cksrc_pxa_oscr0.mult = - clocksource_hz2mult(clock_tick_rate, cksrc_pxa_oscr0.shift); - setup_irq(IRQ_OST0, &pxa_ost0_irq); - clocksource_register(&cksrc_pxa_oscr0); + clocksource_register_hz(&cksrc_pxa_oscr0, clock_tick_rate); clockevents_register_device(&ckevt_pxa_osmr0); } diff --git a/arch/arm/mach-realview/core.c b/arch/arm/mach-realview/core.c index 3d915b1ccdb..1c6602cf50e 100644 --- a/arch/arm/mach-realview/core.c +++ b/arch/arm/mach-realview/core.c @@ -49,12 +49,11 @@ #include <mach/platform.h> #include <mach/irqs.h> -#include <plat/timer-sp.h> +#include <asm/hardware/timer-sp.h> -#include "core.h" +#include <plat/sched_clock.h> -/* used by entry-macro.S and platsmp.c */ -void __iomem *gic_cpu_base_addr; +#include "core.h" #ifdef CONFIG_ZONE_DMA /* @@ -657,6 +656,12 @@ void realview_leds_event(led_event_t ledevt) #endif /* CONFIG_LEDS */ /* + * The sched_clock counter + */ +#define REFCOUNTER (__io_address(REALVIEW_SYS_BASE) + \ + REALVIEW_SYS_24MHz_OFFSET) + +/* * Where is the timer (VA)? */ void __iomem *timer0_va_base; @@ -671,6 +676,8 @@ void __init realview_timer_init(unsigned int timer_irq) { u32 val; + versatile_sched_clock_init(REFCOUNTER, 24000000); + /* * set clock frequency: * REALVIEW_REFCLK is 32KHz diff --git a/arch/arm/mach-realview/core.h b/arch/arm/mach-realview/core.h index 781bca68a9f..693239ddc39 100644 --- a/arch/arm/mach-realview/core.h +++ b/arch/arm/mach-realview/core.h @@ -53,7 +53,6 @@ extern struct platform_device realview_i2c_device; extern struct mmci_platform_data realview_mmc0_plat_data; extern struct mmci_platform_data realview_mmc1_plat_data; extern struct clcd_board clcd_plat_data; -extern void __iomem *gic_cpu_base_addr; extern void __iomem *timer0_va_base; extern void __iomem *timer1_va_base; extern void __iomem *timer2_va_base; diff --git a/arch/arm/mach-realview/headsmp.S b/arch/arm/mach-realview/headsmp.S index 4075473cf68..b34be4554d4 100644 --- a/arch/arm/mach-realview/headsmp.S +++ b/arch/arm/mach-realview/headsmp.S @@ -35,5 +35,6 @@ pen: ldr r7, [r6] */ b secondary_startup + .align 1: .long . .long pen_release diff --git a/arch/arm/mach-realview/include/mach/entry-macro.S b/arch/arm/mach-realview/include/mach/entry-macro.S index 340a5c27694..4071164aeba 100644 --- a/arch/arm/mach-realview/include/mach/entry-macro.S +++ b/arch/arm/mach-realview/include/mach/entry-macro.S @@ -8,74 +8,11 @@ * warranty of any kind, whether express or implied. */ #include <mach/hardware.h> -#include <asm/hardware/gic.h> +#include <asm/hardware/entry-macro-gic.S> .macro disable_fiq .endm - .macro get_irqnr_preamble, base, tmp - ldr \base, =gic_cpu_base_addr - ldr \base, [\base] - .endm - .macro arch_ret_to_user, tmp1, tmp2 .endm - /* - * The interrupt numbering scheme is defined in the - * interrupt controller spec. To wit: - * - * Interrupts 0-15 are IPI - * 16-28 are reserved - * 29-31 are local. We allow 30 to be used for the watchdog. - * 32-1020 are global - * 1021-1022 are reserved - * 1023 is "spurious" (no interrupt) - * - * For now, we ignore all local interrupts so only return an interrupt if it's - * between 30 and 1020. The test_for_ipi routine below will pick up on IPIs. - * - * A simple read from the controller will tell us the number of the highest - * priority enabled interrupt. We then just need to check whether it is in the - * valid range for an IRQ (30-1020 inclusive). - */ - - .macro get_irqnr_and_base, irqnr, irqstat, base, tmp - - ldr \irqstat, [\base, #GIC_CPU_INTACK] /* bits 12-10 = src CPU, 9-0 = int # */ - - ldr \tmp, =1021 - - bic \irqnr, \irqstat, #0x1c00 - - cmp \irqnr, #29 - cmpcc \irqnr, \irqnr - cmpne \irqnr, \tmp - cmpcs \irqnr, \irqnr - - .endm - - /* We assume that irqstat (the raw value of the IRQ acknowledge - * register) is preserved from the macro above. - * If there is an IPI, we immediately signal end of interrupt on the - * controller, since this requires the original irqstat value which - * we won't easily be able to recreate later. - */ - - .macro test_for_ipi, irqnr, irqstat, base, tmp - bic \irqnr, \irqstat, #0x1c00 - cmp \irqnr, #16 - strcc \irqstat, [\base, #GIC_CPU_EOI] - cmpcs \irqnr, \irqnr - .endm - - /* As above, this assumes that irqstat and base are preserved.. */ - - .macro test_for_ltirq, irqnr, irqstat, base, tmp - bic \irqnr, \irqstat, #0x1c00 - mov \tmp, #0 - cmp \irqnr, #29 - moveq \tmp, #1 - streq \irqstat, [\base, #GIC_CPU_EOI] - cmp \tmp, #0 - .endm diff --git a/arch/arm/mach-realview/platsmp.c b/arch/arm/mach-realview/platsmp.c index bb8d6c4e431..a22bf67f2f7 100644 --- a/arch/arm/mach-realview/platsmp.c +++ b/arch/arm/mach-realview/platsmp.c @@ -71,7 +71,7 @@ void __cpuinit platform_secondary_init(unsigned int cpu) * core (e.g. timer irq), then they will not have been enabled * for us: do so */ - gic_cpu_init(0, gic_cpu_base_addr); + gic_secondary_init(0); /* * let the primary processor know we're out of the diff --git a/arch/arm/mach-realview/realview_eb.c b/arch/arm/mach-realview/realview_eb.c index f2697106f80..6ef5c5e528b 100644 --- a/arch/arm/mach-realview/realview_eb.c +++ b/arch/arm/mach-realview/realview_eb.c @@ -364,21 +364,19 @@ static void __init gic_init_irq(void) writel(0x00000000, __io_address(REALVIEW_SYS_LOCK)); /* core tile GIC, primary */ - gic_cpu_base_addr = __io_address(REALVIEW_EB11MP_GIC_CPU_BASE); - gic_dist_init(0, __io_address(REALVIEW_EB11MP_GIC_DIST_BASE), 29); - gic_cpu_init(0, gic_cpu_base_addr); + gic_init(0, 29, __io_address(REALVIEW_EB11MP_GIC_DIST_BASE), + __io_address(REALVIEW_EB11MP_GIC_CPU_BASE)); #ifndef CONFIG_REALVIEW_EB_ARM11MP_REVB /* board GIC, secondary */ - gic_dist_init(1, __io_address(REALVIEW_EB_GIC_DIST_BASE), 64); - gic_cpu_init(1, __io_address(REALVIEW_EB_GIC_CPU_BASE)); + gic_init(1, 64, __io_address(REALVIEW_EB_GIC_DIST_BASE), + __io_address(REALVIEW_EB_GIC_CPU_BASE)); gic_cascade_irq(1, IRQ_EB11MP_EB_IRQ1); #endif } else { /* board GIC, primary */ - gic_cpu_base_addr = __io_address(REALVIEW_EB_GIC_CPU_BASE); - gic_dist_init(0, __io_address(REALVIEW_EB_GIC_DIST_BASE), 29); - gic_cpu_init(0, gic_cpu_base_addr); + gic_init(0, 29, __io_address(REALVIEW_EB_GIC_DIST_BASE), + __io_address(REALVIEW_EB_GIC_CPU_BASE)); } } diff --git a/arch/arm/mach-realview/realview_pb1176.c b/arch/arm/mach-realview/realview_pb1176.c index a4125619d71..cbdc97a5685 100644 --- a/arch/arm/mach-realview/realview_pb1176.c +++ b/arch/arm/mach-realview/realview_pb1176.c @@ -304,13 +304,14 @@ static struct platform_device char_lcd_device = { static void __init gic_init_irq(void) { /* ARM1176 DevChip GIC, primary */ - gic_cpu_base_addr = __io_address(REALVIEW_DC1176_GIC_CPU_BASE); - gic_dist_init(0, __io_address(REALVIEW_DC1176_GIC_DIST_BASE), IRQ_DC1176_GIC_START); - gic_cpu_init(0, gic_cpu_base_addr); + gic_init(0, IRQ_DC1176_GIC_START, + __io_address(REALVIEW_DC1176_GIC_DIST_BASE), + __io_address(REALVIEW_DC1176_GIC_CPU_BASE)); /* board GIC, secondary */ - gic_dist_init(1, __io_address(REALVIEW_PB1176_GIC_DIST_BASE), IRQ_PB1176_GIC_START); - gic_cpu_init(1, __io_address(REALVIEW_PB1176_GIC_CPU_BASE)); + gic_init(1, IRQ_PB1176_GIC_START, + __io_address(REALVIEW_PB1176_GIC_DIST_BASE), + __io_address(REALVIEW_PB1176_GIC_CPU_BASE)); gic_cascade_irq(1, IRQ_DC1176_PB_IRQ1); } diff --git a/arch/arm/mach-realview/realview_pb11mp.c b/arch/arm/mach-realview/realview_pb11mp.c index 117b95b2ca1..8e8ab7d29a6 100644 --- a/arch/arm/mach-realview/realview_pb11mp.c +++ b/arch/arm/mach-realview/realview_pb11mp.c @@ -309,13 +309,13 @@ static void __init gic_init_irq(void) writel(0x00000000, __io_address(REALVIEW_SYS_LOCK)); /* ARM11MPCore test chip GIC, primary */ - gic_cpu_base_addr = __io_address(REALVIEW_TC11MP_GIC_CPU_BASE); - gic_dist_init(0, __io_address(REALVIEW_TC11MP_GIC_DIST_BASE), 29); - gic_cpu_init(0, gic_cpu_base_addr); + gic_init(0, 29, __io_address(REALVIEW_TC11MP_GIC_DIST_BASE), + __io_address(REALVIEW_TC11MP_GIC_CPU_BASE)); /* board GIC, secondary */ - gic_dist_init(1, __io_address(REALVIEW_PB11MP_GIC_DIST_BASE), IRQ_PB11MP_GIC_START); - gic_cpu_init(1, __io_address(REALVIEW_PB11MP_GIC_CPU_BASE)); + gic_init(1, IRQ_PB11MP_GIC_START, + __io_address(REALVIEW_PB11MP_GIC_DIST_BASE), + __io_address(REALVIEW_PB11MP_GIC_CPU_BASE)); gic_cascade_irq(1, IRQ_TC11MP_PB_IRQ1); } diff --git a/arch/arm/mach-realview/realview_pba8.c b/arch/arm/mach-realview/realview_pba8.c index 929b8dc12e8..841118e3e11 100644 --- a/arch/arm/mach-realview/realview_pba8.c +++ b/arch/arm/mach-realview/realview_pba8.c @@ -273,9 +273,9 @@ static struct platform_device pmu_device = { static void __init gic_init_irq(void) { /* ARM PB-A8 on-board GIC */ - gic_cpu_base_addr = __io_address(REALVIEW_PBA8_GIC_CPU_BASE); - gic_dist_init(0, __io_address(REALVIEW_PBA8_GIC_DIST_BASE), IRQ_PBA8_GIC_START); - gic_cpu_init(0, __io_address(REALVIEW_PBA8_GIC_CPU_BASE)); + gic_init(0, IRQ_PBA8_GIC_START, + __io_address(REALVIEW_PBA8_GIC_DIST_BASE), + __io_address(REALVIEW_PBA8_GIC_CPU_BASE)); } static void __init realview_pba8_timer_init(void) diff --git a/arch/arm/mach-realview/realview_pbx.c b/arch/arm/mach-realview/realview_pbx.c index b9f9e20031a..02b755b009d 100644 --- a/arch/arm/mach-realview/realview_pbx.c +++ b/arch/arm/mach-realview/realview_pbx.c @@ -313,15 +313,12 @@ static void __init gic_init_irq(void) { /* ARM PBX on-board GIC */ if (core_tile_pbx11mp() || core_tile_pbxa9mp()) { - gic_cpu_base_addr = __io_address(REALVIEW_PBX_TILE_GIC_CPU_BASE); - gic_dist_init(0, __io_address(REALVIEW_PBX_TILE_GIC_DIST_BASE), - 29); - gic_cpu_init(0, __io_address(REALVIEW_PBX_TILE_GIC_CPU_BASE)); + gic_init(0, 29, __io_address(REALVIEW_PBX_TILE_GIC_DIST_BASE), + __io_address(REALVIEW_PBX_TILE_GIC_CPU_BASE)); } else { - gic_cpu_base_addr = __io_address(REALVIEW_PBX_GIC_CPU_BASE); - gic_dist_init(0, __io_address(REALVIEW_PBX_GIC_DIST_BASE), - IRQ_PBX_GIC_START); - gic_cpu_init(0, __io_address(REALVIEW_PBX_GIC_CPU_BASE)); + gic_init(0, IRQ_PBX_GIC_START, + __io_address(REALVIEW_PBX_GIC_DIST_BASE), + __io_address(REALVIEW_PBX_GIC_CPU_BASE)); } } diff --git a/arch/arm/mach-s3c2412/Kconfig b/arch/arm/mach-s3c2412/Kconfig index e824a85c89a..e82ab4aa7ab 100644 --- a/arch/arm/mach-s3c2412/Kconfig +++ b/arch/arm/mach-s3c2412/Kconfig @@ -16,7 +16,7 @@ config CPU_S3C2412 config CPU_S3C2412_ONLY bool depends on ARCH_S3C2410 && !CPU_S3C2400 && !CPU_S3C2410 && \ - !CPU_2416 && !CPU_S3C2440 && !CPU_S3C2442 && \ + !CPU_S3C2416 && !CPU_S3C2440 && !CPU_S3C2442 && \ !CPU_S3C2443 && CPU_S3C2412 default y if CPU_S3C2412 @@ -28,9 +28,16 @@ config S3C2412_DMA config S3C2412_PM bool + select S3C2412_PM_SLEEP help Internal config node to apply S3C2412 power management +config S3C2412_PM_SLEEP + bool + help + Internal config node to apply sleep for S3C2412 power management. + Can be selected by another SoCs with similar sleep procedure. + # Note, the S3C2412 IOtiming support is in plat-s3c24xx config S3C2412_CPUFREQ diff --git a/arch/arm/mach-s3c2412/Makefile b/arch/arm/mach-s3c2412/Makefile index 530ec46cbae..6c48a91ea39 100644 --- a/arch/arm/mach-s3c2412/Makefile +++ b/arch/arm/mach-s3c2412/Makefile @@ -14,7 +14,8 @@ obj-$(CONFIG_CPU_S3C2412) += irq.o obj-$(CONFIG_CPU_S3C2412) += clock.o obj-$(CONFIG_CPU_S3C2412) += gpio.o obj-$(CONFIG_S3C2412_DMA) += dma.o -obj-$(CONFIG_S3C2412_PM) += pm.o sleep.o +obj-$(CONFIG_S3C2412_PM) += pm.o +obj-$(CONFIG_S3C2412_PM_SLEEP) += sleep.o obj-$(CONFIG_S3C2412_CPUFREQ) += cpu-freq.o # Machine support diff --git a/arch/arm/mach-s3c2416/Kconfig b/arch/arm/mach-s3c2416/Kconfig index 87b9c9f003b..df8d14974c9 100644 --- a/arch/arm/mach-s3c2416/Kconfig +++ b/arch/arm/mach-s3c2416/Kconfig @@ -27,6 +27,7 @@ config S3C2416_DMA config S3C2416_PM bool + select S3C2412_PM_SLEEP help Internal config node to apply S3C2416 power management @@ -35,9 +36,12 @@ menu "S3C2416 Machines" config MACH_SMDK2416 bool "SMDK2416" select CPU_S3C2416 + select MACH_SMDK select S3C_DEV_FB select S3C_DEV_HSMMC select S3C_DEV_HSMMC1 + select S3C_DEV_NAND + select S3C_DEV_USB_HOST select S3C2416_PM if PM help Say Y here if you are using an SMDK2416 diff --git a/arch/arm/mach-s3c2440/Kconfig b/arch/arm/mach-s3c2440/Kconfig index ff024a6c0f8..a0cb2581894 100644 --- a/arch/arm/mach-s3c2440/Kconfig +++ b/arch/arm/mach-s3c2440/Kconfig @@ -18,6 +18,7 @@ config CPU_S3C2440 config CPU_S3C2442 bool select CPU_ARM920T + select S3C_GPIO_PULL_DOWN select S3C2410_CLOCK select S3C2410_GPIO select S3C2410_PM if PM @@ -178,6 +179,9 @@ config MACH_MINI2440 bool "MINI2440 development board" select CPU_S3C2440 select EEPROM_AT24 + select NEW_LEDS + select LEDS_CLASS + select LEDS_TRIGGER select LEDS_TRIGGER_BACKLIGHT select S3C_DEV_NAND select S3C_DEV_USB_HOST diff --git a/arch/arm/mach-s3c2440/s3c2440.c b/arch/arm/mach-s3c2440/s3c2440.c index d50f3ae6173..f7663f731ea 100644 --- a/arch/arm/mach-s3c2440/s3c2440.c +++ b/arch/arm/mach-s3c2440/s3c2440.c @@ -46,9 +46,6 @@ int __init s3c2440_init(void) { printk("S3C2440: Initialising architecture\n"); - s3c24xx_gpiocfg_default.set_pull = s3c_gpio_setpull_1up; - s3c24xx_gpiocfg_default.get_pull = s3c_gpio_getpull_1up; - /* change irq for watchdog */ s3c_device_wdt.resource[1].start = IRQ_S3C2440_WDT; @@ -58,3 +55,11 @@ int __init s3c2440_init(void) return sysdev_register(&s3c2440_sysdev); } + +void __init s3c2440_map_io(void) +{ + s3c244x_map_io(); + + s3c24xx_gpiocfg_default.set_pull = s3c_gpio_setpull_1up; + s3c24xx_gpiocfg_default.get_pull = s3c_gpio_getpull_1up; +} diff --git a/arch/arm/mach-s3c2440/s3c2442.c b/arch/arm/mach-s3c2440/s3c2442.c index 188ad1e57dc..ecf81354655 100644 --- a/arch/arm/mach-s3c2440/s3c2442.c +++ b/arch/arm/mach-s3c2440/s3c2442.c @@ -32,6 +32,7 @@ #include <linux/interrupt.h> #include <linux/ioport.h> #include <linux/mutex.h> +#include <linux/gpio.h> #include <linux/clk.h> #include <linux/io.h> @@ -43,6 +44,11 @@ #include <plat/clock.h> #include <plat/cpu.h> +#include <plat/s3c244x.h> + +#include <plat/gpio-core.h> +#include <plat/gpio-cfg.h> +#include <plat/gpio-cfg-helpers.h> /* S3C2442 extended clock support */ @@ -163,3 +169,11 @@ int __init s3c2442_init(void) return sysdev_register(&s3c2442_sysdev); } + +void __init s3c2442_map_io(void) +{ + s3c244x_map_io(); + + s3c24xx_gpiocfg_default.set_pull = s3c_gpio_setpull_1down; + s3c24xx_gpiocfg_default.get_pull = s3c_gpio_getpull_1down; +} diff --git a/arch/arm/mach-s3c2443/Kconfig b/arch/arm/mach-s3c2443/Kconfig index 4fef723126f..31babec90ce 100644 --- a/arch/arm/mach-s3c2443/Kconfig +++ b/arch/arm/mach-s3c2443/Kconfig @@ -5,6 +5,7 @@ config CPU_S3C2443 bool depends on ARCH_S3C2410 + select CPU_ARM920T select S3C2443_DMA if S3C2410_DMA select CPU_LLSERIAL_S3C2440 select SAMSUNG_CLKSRC diff --git a/arch/arm/mach-s5pv210/mach-aquila.c b/arch/arm/mach-s5pv210/mach-aquila.c index 28677caf361..461aa035afc 100644 --- a/arch/arm/mach-s5pv210/mach-aquila.c +++ b/arch/arm/mach-s5pv210/mach-aquila.c @@ -378,6 +378,12 @@ static struct max8998_regulator_data aquila_regulators[] = { static struct max8998_platform_data aquila_max8998_pdata = { .num_regulators = ARRAY_SIZE(aquila_regulators), .regulators = aquila_regulators, + .buck1_set1 = S5PV210_GPH0(3), + .buck1_set2 = S5PV210_GPH0(4), + .buck2_set3 = S5PV210_GPH0(5), + .buck1_max_voltage1 = 1200000, + .buck1_max_voltage2 = 1200000, + .buck2_max_voltage = 1200000, }; #endif diff --git a/arch/arm/mach-s5pv210/mach-goni.c b/arch/arm/mach-s5pv210/mach-goni.c index b1dcf964a76..e22d5112fd4 100644 --- a/arch/arm/mach-s5pv210/mach-goni.c +++ b/arch/arm/mach-s5pv210/mach-goni.c @@ -518,6 +518,12 @@ static struct max8998_regulator_data goni_regulators[] = { static struct max8998_platform_data goni_max8998_pdata = { .num_regulators = ARRAY_SIZE(goni_regulators), .regulators = goni_regulators, + .buck1_set1 = S5PV210_GPH0(3), + .buck1_set2 = S5PV210_GPH0(4), + .buck2_set3 = S5PV210_GPH0(5), + .buck1_max_voltage1 = 1200000, + .buck1_max_voltage2 = 1200000, + .buck2_max_voltage = 1200000, }; #endif diff --git a/arch/arm/mach-s5pv310/cpu.c b/arch/arm/mach-s5pv310/cpu.c index 82ce4aa6d61..72ab289e781 100644 --- a/arch/arm/mach-s5pv310/cpu.c +++ b/arch/arm/mach-s5pv310/cpu.c @@ -24,8 +24,6 @@ #include <mach/regs-irq.h> -void __iomem *gic_cpu_base_addr; - extern int combiner_init(unsigned int combiner_nr, void __iomem *base, unsigned int irq_start); extern void combiner_cascade_irq(unsigned int combiner_nr, unsigned int irq); @@ -122,9 +120,7 @@ void __init s5pv310_init_irq(void) { int irq; - gic_cpu_base_addr = S5P_VA_GIC_CPU; - gic_dist_init(0, S5P_VA_GIC_DIST, IRQ_LOCALTIMER); - gic_cpu_init(0, S5P_VA_GIC_CPU); + gic_init(0, IRQ_LOCALTIMER, S5P_VA_GIC_DIST, S5P_VA_GIC_CPU); for (irq = 0; irq < MAX_COMBINER_NR; irq++) { combiner_init(irq, (void __iomem *)S5P_VA_COMBINER(irq), diff --git a/arch/arm/mach-s5pv310/include/mach/smp.h b/arch/arm/mach-s5pv310/include/mach/smp.h index 77e5a841f74..393ccbd52c4 100644 --- a/arch/arm/mach-s5pv310/include/mach/smp.h +++ b/arch/arm/mach-s5pv310/include/mach/smp.h @@ -8,8 +8,6 @@ #include <asm/hardware/gic.h> -extern void __iomem *gic_cpu_base_addr; - /* * We use IRQ1 as the IPI */ diff --git a/arch/arm/mach-s5pv310/platsmp.c b/arch/arm/mach-s5pv310/platsmp.c index 98c04748ed8..34093b069f6 100644 --- a/arch/arm/mach-s5pv310/platsmp.c +++ b/arch/arm/mach-s5pv310/platsmp.c @@ -64,7 +64,7 @@ void __cpuinit platform_secondary_init(unsigned int cpu) * core (e.g. timer irq), then they will not have been enabled * for us: do so */ - gic_cpu_init(0, gic_cpu_base_addr); + gic_secondary_init(0); /* * let the primary processor know we're out of the diff --git a/arch/arm/mach-s5pv310/time.c b/arch/arm/mach-s5pv310/time.c index 01b012ad1bf..b262d461533 100644 --- a/arch/arm/mach-s5pv310/time.c +++ b/arch/arm/mach-s5pv310/time.c @@ -211,7 +211,6 @@ struct clocksource pwm_clocksource = { .rating = 250, .read = s5pv310_pwm4_read, .mask = CLOCKSOURCE_MASK(32), - .shift = 20, .flags = CLOCK_SOURCE_IS_CONTINUOUS , }; @@ -230,10 +229,7 @@ static void __init s5pv310_clocksource_init(void) s5pv310_pwm_init(4, ~0); s5pv310_pwm_start(4, 1); - pwm_clocksource.mult = - clocksource_khz2mult(clock_rate/1000, pwm_clocksource.shift); - - if (clocksource_register(&pwm_clocksource)) + if (clocksource_register_hz(&pwm_clocksource, clock_rate)) panic("%s: can't register clocksource\n", pwm_clocksource.name); } diff --git a/arch/arm/mach-sa1100/Kconfig b/arch/arm/mach-sa1100/Kconfig index 5da8c35aa0d..42625e4d949 100644 --- a/arch/arm/mach-sa1100/Kconfig +++ b/arch/arm/mach-sa1100/Kconfig @@ -118,6 +118,16 @@ config SA1100_LART (also known as the LART). See <http://www.lartmaker.nl/> for information on the LART. +config SA1100_NANOENGINE + bool "nanoEngine" + select CPU_FREQ_SA1110 + select PCI + select PCI_NANOENGINE + help + Say Y here if you are using the Bright Star Engineering nanoEngine. + See <http://www.brightstareng.com/arm/nanoeng.htm> for information + on the BSE nanoEngine. + config SA1100_PLEB bool "PLEB" select CPU_FREQ_SA1100 diff --git a/arch/arm/mach-sa1100/Makefile b/arch/arm/mach-sa1100/Makefile index 89349c1dd7a..e697691eed2 100644 --- a/arch/arm/mach-sa1100/Makefile +++ b/arch/arm/mach-sa1100/Makefile @@ -37,6 +37,9 @@ obj-$(CONFIG_SA1100_JORNADA720_SSP) += jornada720_ssp.o obj-$(CONFIG_SA1100_LART) += lart.o led-$(CONFIG_SA1100_LART) += leds-lart.o +obj-$(CONFIG_SA1100_NANOENGINE) += nanoengine.o +obj-$(CONFIG_PCI_NANOENGINE) += pci-nanoengine.o + obj-$(CONFIG_SA1100_PLEB) += pleb.o obj-$(CONFIG_SA1100_SHANNON) += shannon.o diff --git a/arch/arm/mach-sa1100/cpu-sa1100.c b/arch/arm/mach-sa1100/cpu-sa1100.c index 96f7dc103b5..07d4e8ba371 100644 --- a/arch/arm/mach-sa1100/cpu-sa1100.c +++ b/arch/arm/mach-sa1100/cpu-sa1100.c @@ -94,48 +94,47 @@ #include "generic.h" -typedef struct { +struct sa1100_dram_regs { int speed; u32 mdcnfg; u32 mdcas0; u32 mdcas1; u32 mdcas2; -} sa1100_dram_regs_t; +}; static struct cpufreq_driver sa1100_driver; -static sa1100_dram_regs_t sa1100_dram_settings[] = -{ - /* speed, mdcnfg, mdcas0, mdcas1, mdcas2 clock frequency */ - { 59000, 0x00dc88a3, 0xcccccccf, 0xfffffffc, 0xffffffff }, /* 59.0 MHz */ - { 73700, 0x011490a3, 0xcccccccf, 0xfffffffc, 0xffffffff }, /* 73.7 MHz */ - { 88500, 0x014e90a3, 0xcccccccf, 0xfffffffc, 0xffffffff }, /* 88.5 MHz */ - { 103200, 0x01889923, 0xcccccccf, 0xfffffffc, 0xffffffff }, /* 103.2 MHz */ - { 118000, 0x01c29923, 0x9999998f, 0xfffffff9, 0xffffffff }, /* 118.0 MHz */ - { 132700, 0x01fb2123, 0x9999998f, 0xfffffff9, 0xffffffff }, /* 132.7 MHz */ - { 147500, 0x02352123, 0x3333330f, 0xfffffff3, 0xffffffff }, /* 147.5 MHz */ - { 162200, 0x026b29a3, 0x38e38e1f, 0xfff8e38e, 0xffffffff }, /* 162.2 MHz */ - { 176900, 0x02a329a3, 0x71c71c1f, 0xfff1c71c, 0xffffffff }, /* 176.9 MHz */ - { 191700, 0x02dd31a3, 0xe38e383f, 0xffe38e38, 0xffffffff }, /* 191.7 MHz */ - { 206400, 0x03153223, 0xc71c703f, 0xffc71c71, 0xffffffff }, /* 206.4 MHz */ - { 221200, 0x034fba23, 0xc71c703f, 0xffc71c71, 0xffffffff }, /* 221.2 MHz */ - { 235900, 0x03853a23, 0xe1e1e07f, 0xe1e1e1e1, 0xffffffe1 }, /* 235.9 MHz */ - { 250700, 0x03bf3aa3, 0xc3c3c07f, 0xc3c3c3c3, 0xffffffc3 }, /* 250.7 MHz */ - { 265400, 0x03f7c2a3, 0xc3c3c07f, 0xc3c3c3c3, 0xffffffc3 }, /* 265.4 MHz */ - { 280200, 0x0431c2a3, 0x878780ff, 0x87878787, 0xffffff87 }, /* 280.2 MHz */ +static struct sa1100_dram_regs sa1100_dram_settings[] = { + /*speed, mdcnfg, mdcas0, mdcas1, mdcas2, clock freq */ + { 59000, 0x00dc88a3, 0xcccccccf, 0xfffffffc, 0xffffffff},/* 59.0 MHz */ + { 73700, 0x011490a3, 0xcccccccf, 0xfffffffc, 0xffffffff},/* 73.7 MHz */ + { 88500, 0x014e90a3, 0xcccccccf, 0xfffffffc, 0xffffffff},/* 88.5 MHz */ + {103200, 0x01889923, 0xcccccccf, 0xfffffffc, 0xffffffff},/* 103.2 MHz */ + {118000, 0x01c29923, 0x9999998f, 0xfffffff9, 0xffffffff},/* 118.0 MHz */ + {132700, 0x01fb2123, 0x9999998f, 0xfffffff9, 0xffffffff},/* 132.7 MHz */ + {147500, 0x02352123, 0x3333330f, 0xfffffff3, 0xffffffff},/* 147.5 MHz */ + {162200, 0x026b29a3, 0x38e38e1f, 0xfff8e38e, 0xffffffff},/* 162.2 MHz */ + {176900, 0x02a329a3, 0x71c71c1f, 0xfff1c71c, 0xffffffff},/* 176.9 MHz */ + {191700, 0x02dd31a3, 0xe38e383f, 0xffe38e38, 0xffffffff},/* 191.7 MHz */ + {206400, 0x03153223, 0xc71c703f, 0xffc71c71, 0xffffffff},/* 206.4 MHz */ + {221200, 0x034fba23, 0xc71c703f, 0xffc71c71, 0xffffffff},/* 221.2 MHz */ + {235900, 0x03853a23, 0xe1e1e07f, 0xe1e1e1e1, 0xffffffe1},/* 235.9 MHz */ + {250700, 0x03bf3aa3, 0xc3c3c07f, 0xc3c3c3c3, 0xffffffc3},/* 250.7 MHz */ + {265400, 0x03f7c2a3, 0xc3c3c07f, 0xc3c3c3c3, 0xffffffc3},/* 265.4 MHz */ + {280200, 0x0431c2a3, 0x878780ff, 0x87878787, 0xffffff87},/* 280.2 MHz */ { 0, 0, 0, 0, 0 } /* last entry */ }; static void sa1100_update_dram_timings(int current_speed, int new_speed) { - sa1100_dram_regs_t *settings = sa1100_dram_settings; + struct sa1100_dram_regs *settings = sa1100_dram_settings; /* find speed */ while (settings->speed != 0) { - if(new_speed == settings->speed) + if (new_speed == settings->speed) break; - + settings++; } @@ -149,7 +148,7 @@ static void sa1100_update_dram_timings(int current_speed, int new_speed) /* We're going FASTER, so first relax the memory * timings before changing the core frequency */ - + /* Half the memory access clock */ MDCNFG |= MDCNFG_CDB2; @@ -187,7 +186,7 @@ static int sa1100_target(struct cpufreq_policy *policy, struct cpufreq_freqs freqs; new_ppcr = sa11x0_freq_to_ppcr(target_freq); - switch(relation){ + switch (relation) { case CPUFREQ_RELATION_L: if (sa11x0_ppcr_to_freq(new_ppcr) > policy->max) new_ppcr--; diff --git a/arch/arm/mach-sa1100/cpu-sa1110.c b/arch/arm/mach-sa1100/cpu-sa1110.c index 7252874d328..675bf8ef97e 100644 --- a/arch/arm/mach-sa1100/cpu-sa1110.c +++ b/arch/arm/mach-sa1100/cpu-sa1110.c @@ -16,28 +16,24 @@ * * The SDRAM type can be passed on the command line as cpu_sa1110.sdram=type */ -#include <linux/moduleparam.h> -#include <linux/types.h> -#include <linux/kernel.h> -#include <linux/sched.h> #include <linux/cpufreq.h> #include <linux/delay.h> #include <linux/init.h> -#include <linux/io.h> +#include <linux/kernel.h> +#include <linux/moduleparam.h> +#include <linux/types.h> -#include <mach/hardware.h> #include <asm/cputype.h> #include <asm/mach-types.h> -#include <asm/system.h> + +#include <mach/hardware.h> #include "generic.h" #undef DEBUG -static struct cpufreq_driver sa1110_driver; - struct sdram_params { - const char name[16]; + const char name[20]; u_char rows; /* bits */ u_char cas_latency; /* cycles */ u_char tck; /* clock cycle time (ns) */ @@ -107,6 +103,15 @@ static struct sdram_params sdram_tbl[] __initdata = { .twr = 8, .refresh = 64000, .cas_latency = 3, + }, { /* Micron MT48LC8M16A2TG-75 */ + .name = "MT48LC8M16A2TG-75", + .rows = 12, + .tck = 8, + .trcd = 20, + .trp = 20, + .twr = 8, + .refresh = 64000, + .cas_latency = 3, }, }; @@ -180,11 +185,13 @@ sdram_calculate_timing(struct sdram_info *sd, u_int cpu_khz, sd->mdrefr |= MDREFR_K1DB2; /* initial number of '1's in MDCAS + 1 */ - set_mdcas(sd->mdcas, sd_khz >= 62000, ns_to_cycles(sdram->trcd, mem_khz)); + set_mdcas(sd->mdcas, sd_khz >= 62000, + ns_to_cycles(sdram->trcd, mem_khz)); #ifdef DEBUG - printk("MDCNFG: %08x MDREFR: %08x MDCAS0: %08x MDCAS1: %08x MDCAS2: %08x\n", - sd->mdcnfg, sd->mdrefr, sd->mdcas[0], sd->mdcas[1], sd->mdcas[2]); + printk(KERN_DEBUG "MDCNFG: %08x MDREFR: %08x MDCAS0: %08x MDCAS1: %08x MDCAS2: %08x\n", + sd->mdcnfg, sd->mdrefr, sd->mdcas[0], sd->mdcas[1], + sd->mdcas[2]); #endif } @@ -213,7 +220,7 @@ sdram_update_refresh(u_int cpu_khz, struct sdram_params *sdram) #ifdef DEBUG mdelay(250); - printk("new dri value = %d\n", dri); + printk(KERN_DEBUG "new dri value = %d\n", dri); #endif sdram_set_refresh(dri); @@ -232,7 +239,7 @@ static int sa1110_target(struct cpufreq_policy *policy, unsigned long flags; unsigned int ppcr, unused; - switch(relation){ + switch (relation) { case CPUFREQ_RELATION_L: ppcr = sa11x0_freq_to_ppcr(target_freq); if (sa11x0_ppcr_to_freq(ppcr) > policy->max) @@ -280,11 +287,10 @@ static int sa1110_target(struct cpufreq_policy *policy, * We wait 20ms to be safe. */ sdram_set_refresh(2); - if (!irqs_disabled()) { + if (!irqs_disabled()) msleep(20); - } else { + else mdelay(20); - } /* * Reprogram the DRAM timings with interrupts disabled, and @@ -295,7 +301,7 @@ static int sa1110_target(struct cpufreq_policy *policy, local_irq_save(flags); asm("mcr p15, 0, %0, c7, c10, 4" : : "r" (0)); udelay(10); - __asm__ __volatile__(" \n\ + __asm__ __volatile__("\n\ b 2f \n\ .align 5 \n\ 1: str %3, [%1, #0] @ MDCNFG \n\ @@ -336,7 +342,9 @@ static int __init sa1110_cpu_init(struct cpufreq_policy *policy) return 0; } -static struct cpufreq_driver sa1110_driver = { +/* sa1110_driver needs __refdata because it must remain after init registers + * it with cpufreq_register_driver() */ +static struct cpufreq_driver sa1110_driver __refdata = { .flags = CPUFREQ_STICKY, .verify = sa11x0_verify_speed, .target = sa1110_target, @@ -349,7 +357,8 @@ static struct sdram_params *sa1110_find_sdram(const char *name) { struct sdram_params *sdram; - for (sdram = sdram_tbl; sdram < sdram_tbl + ARRAY_SIZE(sdram_tbl); sdram++) + for (sdram = sdram_tbl; sdram < sdram_tbl + ARRAY_SIZE(sdram_tbl); + sdram++) if (strcmp(name, sdram->name) == 0) return sdram; @@ -369,14 +378,14 @@ static int __init sa1110_clk_init(void) if (!name[0]) { if (machine_is_assabet()) name = "TC59SM716-CL3"; - if (machine_is_pt_system3()) name = "K4S641632D"; - if (machine_is_h3100()) name = "KM416S4030CT"; if (machine_is_jornada720()) - name = "K4S281632B-1H"; + name = "K4S281632B-1H"; + if (machine_is_nanoengine()) + name = "MT48LC8M16A2TG-75"; } sdram = sa1110_find_sdram(name); diff --git a/arch/arm/mach-sa1100/generic.c b/arch/arm/mach-sa1100/generic.c index 3c1fcd69671..59d14f0fdcf 100644 --- a/arch/arm/mach-sa1100/generic.c +++ b/arch/arm/mach-sa1100/generic.c @@ -16,9 +16,7 @@ #include <linux/pm.h> #include <linux/cpufreq.h> #include <linux/ioport.h> -#include <linux/sched.h> /* just for sched_clock() - funny that */ #include <linux/platform_device.h> -#include <linux/cnt32_to_63.h> #include <asm/div64.h> #include <mach/hardware.h> @@ -110,27 +108,6 @@ unsigned int sa11x0_getspeed(unsigned int cpu) } /* - * This is the SA11x0 sched_clock implementation. This has - * a resolution of 271ns, and a maximum value of 32025597s (370 days). - * - * The return value is guaranteed to be monotonic in that range as - * long as there is always less than 582 seconds between successive - * calls to this function. - * - * ( * 1E9 / 3686400 => * 78125 / 288) - */ -unsigned long long sched_clock(void) -{ - unsigned long long v = cnt32_to_63(OSCR); - - /* the <<1 gets rid of the cnt_32_to_63 top bit saving on a bic insn */ - v *= 78125<<1; - do_div(v, 288<<1); - - return v; -} - -/* * Default power-off for SA1100 */ static void sa1100_power_off(void) @@ -163,10 +140,15 @@ static void sa11x0_register_device(struct platform_device *dev, void *data) static struct resource sa11x0udc_resources[] = { [0] = { - .start = 0x80000000, - .end = 0x8000ffff, + .start = __PREG(Ser0UDCCR), + .end = __PREG(Ser0UDCCR) + 0xffff, .flags = IORESOURCE_MEM, }, + [1] = { + .start = IRQ_Ser0UDC, + .end = IRQ_Ser0UDC, + .flags = IORESOURCE_IRQ, + }, }; static u64 sa11x0udc_dma_mask = 0xffffffffUL; @@ -184,10 +166,15 @@ static struct platform_device sa11x0udc_device = { static struct resource sa11x0uart1_resources[] = { [0] = { - .start = 0x80010000, - .end = 0x8001ffff, + .start = __PREG(Ser1UTCR0), + .end = __PREG(Ser1UTCR0) + 0xffff, .flags = IORESOURCE_MEM, }, + [1] = { + .start = IRQ_Ser1UART, + .end = IRQ_Ser1UART, + .flags = IORESOURCE_IRQ, + }, }; static struct platform_device sa11x0uart1_device = { @@ -199,10 +186,15 @@ static struct platform_device sa11x0uart1_device = { static struct resource sa11x0uart3_resources[] = { [0] = { - .start = 0x80050000, - .end = 0x8005ffff, + .start = __PREG(Ser3UTCR0), + .end = __PREG(Ser3UTCR0) + 0xffff, .flags = IORESOURCE_MEM, }, + [1] = { + .start = IRQ_Ser3UART, + .end = IRQ_Ser3UART, + .flags = IORESOURCE_IRQ, + }, }; static struct platform_device sa11x0uart3_device = { @@ -214,10 +206,15 @@ static struct platform_device sa11x0uart3_device = { static struct resource sa11x0mcp_resources[] = { [0] = { - .start = 0x80060000, - .end = 0x8006ffff, + .start = __PREG(Ser4MCCR0), + .end = __PREG(Ser4MCCR0) + 0xffff, .flags = IORESOURCE_MEM, }, + [1] = { + .start = IRQ_Ser4MCP, + .end = IRQ_Ser4MCP, + .flags = IORESOURCE_IRQ, + }, }; static u64 sa11x0mcp_dma_mask = 0xffffffffUL; @@ -244,6 +241,11 @@ static struct resource sa11x0ssp_resources[] = { .end = 0x8007ffff, .flags = IORESOURCE_MEM, }, + [1] = { + .start = IRQ_Ser4SSP, + .end = IRQ_Ser4SSP, + .flags = IORESOURCE_IRQ, + }, }; static u64 sa11x0ssp_dma_mask = 0xffffffffUL; diff --git a/arch/arm/mach-sa1100/include/mach/hardware.h b/arch/arm/mach-sa1100/include/mach/hardware.h index 99f5856d8de..967ae768439 100644 --- a/arch/arm/mach-sa1100/include/mach/hardware.h +++ b/arch/arm/mach-sa1100/include/mach/hardware.h @@ -76,4 +76,12 @@ static inline unsigned long get_clock_tick_rate(void) #include "SA-1101.h" #endif +#if defined(CONFIG_ARCH_SA1100) && defined(CONFIG_PCI) +#define PCIBIOS_MIN_IO 0 +#define PCIBIOS_MIN_MEM 0 +#define pcibios_assign_all_busses() 1 +#define HAVE_ARCH_PCI_SET_DMA_MASK 1 +#endif + + #endif /* _ASM_ARCH_HARDWARE_H */ diff --git a/arch/arm/mach-sa1100/include/mach/nanoengine.h b/arch/arm/mach-sa1100/include/mach/nanoengine.h new file mode 100644 index 00000000000..14f8382d066 --- /dev/null +++ b/arch/arm/mach-sa1100/include/mach/nanoengine.h @@ -0,0 +1,52 @@ +/* + * arch/arm/mach-sa1100/include/mach/nanoengine.h + * + * This file contains the hardware specific definitions for nanoEngine. + * Only include this file from SA1100-specific files. + * + * Copyright (C) 2010 Marcelo Roberto Jimenez <mroberto@cpti.cetuc.puc-rio.br> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ +#ifndef __ASM_ARCH_NANOENGINE_H +#define __ASM_ARCH_NANOENGINE_H + +#include <mach/irqs.h> + +#define GPIO_PC_READY0 GPIO_GPIO(11) /* ready for socket 0 (active high)*/ +#define GPIO_PC_READY1 GPIO_GPIO(12) /* ready for socket 1 (active high) */ +#define GPIO_PC_CD0 GPIO_GPIO(13) /* detect for socket 0 (active low) */ +#define GPIO_PC_CD1 GPIO_GPIO(14) /* detect for socket 1 (active low) */ +#define GPIO_PC_RESET0 GPIO_GPIO(15) /* reset socket 0 */ +#define GPIO_PC_RESET1 GPIO_GPIO(16) /* reset socket 1 */ + +#define NANOENGINE_IRQ_GPIO_PCI IRQ_GPIO0 +#define NANOENGINE_IRQ_GPIO_PC_READY0 IRQ_GPIO11 +#define NANOENGINE_IRQ_GPIO_PC_READY1 IRQ_GPIO12 +#define NANOENGINE_IRQ_GPIO_PC_CD0 IRQ_GPIO13 +#define NANOENGINE_IRQ_GPIO_PC_CD1 IRQ_GPIO14 + +/* + * nanoEngine Memory Map: + * + * 0000.0000 - 003F.0000 - 4 MB Flash + * C000.0000 - C1FF.FFFF - 32 MB SDRAM + * 1860.0000 - 186F.FFFF - 1 MB Internal PCI Memory Read/Write + * 18A1.0000 - 18A1.FFFF - 64 KB Internal PCI Config Space + * 4000.0000 - 47FF.FFFF - 128 MB External Bus I/O - Multiplexed Mode + * 4800.0000 - 4FFF.FFFF - 128 MB External Bus I/O - Non-Multiplexed Mode + * + */ + +#define NANO_PCI_MEM_RW_PHYS 0x18600000 +#define NANO_PCI_MEM_RW_VIRT 0xf1000000 +#define NANO_PCI_MEM_RW_SIZE SZ_1M +#define NANO_PCI_CONFIG_SPACE_PHYS 0x18A10000 +#define NANO_PCI_CONFIG_SPACE_VIRT 0xf2000000 +#define NANO_PCI_CONFIG_SPACE_SIZE SZ_64K + +#endif + diff --git a/arch/arm/mach-sa1100/nanoengine.c b/arch/arm/mach-sa1100/nanoengine.c new file mode 100644 index 00000000000..72087f0658b --- /dev/null +++ b/arch/arm/mach-sa1100/nanoengine.c @@ -0,0 +1,119 @@ +/* + * linux/arch/arm/mach-sa1100/nanoengine.c + * + * Bright Star Engineering's nanoEngine board init code. + * + * Copyright (C) 2010 Marcelo Roberto Jimenez <mroberto@cpti.cetuc.puc-rio.br> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/mtd/mtd.h> +#include <linux/mtd/partitions.h> +#include <linux/root_dev.h> + +#include <asm/mach-types.h> +#include <asm/setup.h> + +#include <asm/mach/arch.h> +#include <asm/mach/flash.h> +#include <asm/mach/map.h> +#include <asm/mach/serial_sa1100.h> + +#include <mach/hardware.h> +#include <mach/nanoengine.h> + +#include "generic.h" + +/* Flash bank 0 */ +static struct mtd_partition nanoengine_partitions[] = { + { + .name = "nanoEngine boot firmware and parameter table", + .size = 0x00010000, /* 32K */ + .offset = 0, + .mask_flags = MTD_WRITEABLE, + }, { + .name = "kernel/initrd reserved", + .size = 0x002f0000, + .offset = 0x00010000, + .mask_flags = MTD_WRITEABLE, + }, { + .name = "experimental filesystem allocation", + .size = 0x00100000, + .offset = 0x00300000, + .mask_flags = MTD_WRITEABLE, + } +}; + +static struct flash_platform_data nanoengine_flash_data = { + .map_name = "jedec_probe", + .parts = nanoengine_partitions, + .nr_parts = ARRAY_SIZE(nanoengine_partitions), +}; + +static struct resource nanoengine_flash_resources[] = { + { + .start = SA1100_CS0_PHYS, + .end = SA1100_CS0_PHYS + SZ_32M - 1, + .flags = IORESOURCE_MEM, + }, { + .start = SA1100_CS1_PHYS, + .end = SA1100_CS1_PHYS + SZ_32M - 1, + .flags = IORESOURCE_MEM, + } +}; + +static struct map_desc nanoengine_io_desc[] __initdata = { + { + /* System Registers */ + .virtual = 0xf0000000, + .pfn = __phys_to_pfn(0x10000000), + .length = 0x00100000, + .type = MT_DEVICE + }, { + /* Internal PCI Memory Read/Write */ + .virtual = NANO_PCI_MEM_RW_VIRT, + .pfn = __phys_to_pfn(NANO_PCI_MEM_RW_PHYS), + .length = NANO_PCI_MEM_RW_SIZE, + .type = MT_DEVICE + }, { + /* Internal PCI Config Space */ + .virtual = NANO_PCI_CONFIG_SPACE_VIRT, + .pfn = __phys_to_pfn(NANO_PCI_CONFIG_SPACE_PHYS), + .length = NANO_PCI_CONFIG_SPACE_SIZE, + .type = MT_DEVICE + } +}; + +static void __init nanoengine_map_io(void) +{ + sa1100_map_io(); + iotable_init(nanoengine_io_desc, ARRAY_SIZE(nanoengine_io_desc)); + + sa1100_register_uart(0, 1); + sa1100_register_uart(1, 2); + sa1100_register_uart(2, 3); + Ser1SDCR0 |= SDCR0_UART; + /* disable IRDA -- UART2 is used as a normal serial port */ + Ser2UTCR4 = 0; + Ser2HSCR0 = 0; +} + +static void __init nanoengine_init(void) +{ + sa11x0_register_mtd(&nanoengine_flash_data, nanoengine_flash_resources, + ARRAY_SIZE(nanoengine_flash_resources)); +} + +MACHINE_START(NANOENGINE, "BSE nanoEngine") + .boot_params = 0xc0000000, + .map_io = nanoengine_map_io, + .init_irq = sa1100_init_irq, + .timer = &sa1100_timer, + .init_machine = nanoengine_init, +MACHINE_END diff --git a/arch/arm/mach-sa1100/pci-nanoengine.c b/arch/arm/mach-sa1100/pci-nanoengine.c new file mode 100644 index 00000000000..fba7a913f12 --- /dev/null +++ b/arch/arm/mach-sa1100/pci-nanoengine.c @@ -0,0 +1,284 @@ +/* + * linux/arch/arm/mach-sa1100/pci-nanoengine.c + * + * PCI functions for BSE nanoEngine PCI + * + * Copyright (C) 2010 Marcelo Roberto Jimenez <mroberto@cpti.cetuc.puc-rio.br> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#include <linux/kernel.h> +#include <linux/irq.h> +#include <linux/pci.h> +#include <linux/spinlock.h> + +#include <asm/mach/pci.h> +#include <asm/mach-types.h> + +#include <mach/nanoengine.h> + +static DEFINE_SPINLOCK(nano_lock); + +static int nanoengine_get_pci_address(struct pci_bus *bus, + unsigned int devfn, int where, unsigned long *address) +{ + int ret = PCIBIOS_DEVICE_NOT_FOUND; + unsigned int busnr = bus->number; + + *address = NANO_PCI_CONFIG_SPACE_VIRT + + ((bus->number << 16) | (devfn << 8) | (where & ~3)); + + ret = (busnr > 255 || devfn > 255 || where > 255) ? + PCIBIOS_DEVICE_NOT_FOUND : PCIBIOS_SUCCESSFUL; + + return ret; +} + +static int nanoengine_read_config(struct pci_bus *bus, unsigned int devfn, int where, + int size, u32 *val) +{ + int ret; + unsigned long address; + unsigned long flags; + u32 v; + + /* nanoEngine PCI bridge does not return -1 for a non-existing + * device. We must fake the answer. We know that the only valid + * device is device zero at bus 0, which is the network chip. */ + if (bus->number != 0 || (devfn >> 3) != 0) { + v = -1; + nanoengine_get_pci_address(bus, devfn, where, &address); + goto exit_function; + } + + spin_lock_irqsave(&nano_lock, flags); + + ret = nanoengine_get_pci_address(bus, devfn, where, &address); + if (ret != PCIBIOS_SUCCESSFUL) + return ret; + v = __raw_readl(address); + + spin_unlock_irqrestore(&nano_lock, flags); + + v >>= ((where & 3) * 8); + v &= (unsigned long)(-1) >> ((4 - size) * 8); + +exit_function: + *val = v; + return PCIBIOS_SUCCESSFUL; +} + +static int nanoengine_write_config(struct pci_bus *bus, unsigned int devfn, int where, + int size, u32 val) +{ + int ret; + unsigned long address; + unsigned long flags; + unsigned shift; + u32 v; + + shift = (where & 3) * 8; + + spin_lock_irqsave(&nano_lock, flags); + + ret = nanoengine_get_pci_address(bus, devfn, where, &address); + if (ret != PCIBIOS_SUCCESSFUL) + return ret; + v = __raw_readl(address); + switch (size) { + case 1: + v &= ~(0xFF << shift); + v |= val << shift; + break; + case 2: + v &= ~(0xFFFF << shift); + v |= val << shift; + break; + case 4: + v = val; + break; + } + __raw_writel(v, address); + + spin_unlock_irqrestore(&nano_lock, flags); + + return PCIBIOS_SUCCESSFUL; +} + +static struct pci_ops pci_nano_ops = { + .read = nanoengine_read_config, + .write = nanoengine_write_config, +}; + +static int __init pci_nanoengine_map_irq(struct pci_dev *dev, u8 slot, u8 pin) +{ + return NANOENGINE_IRQ_GPIO_PCI; +} + +struct pci_bus * __init pci_nanoengine_scan_bus(int nr, struct pci_sys_data *sys) +{ + return pci_scan_bus(sys->busnr, &pci_nano_ops, sys); +} + +static struct resource pci_io_ports = { + .name = "PCI IO", + .start = 0x400, + .end = 0x7FF, + .flags = IORESOURCE_IO, +}; + +static struct resource pci_non_prefetchable_memory = { + .name = "PCI non-prefetchable", + .start = NANO_PCI_MEM_RW_PHYS, + /* nanoEngine documentation says there is a 1 Megabyte window here, + * but PCI reports just 128 + 8 kbytes. */ + .end = NANO_PCI_MEM_RW_PHYS + NANO_PCI_MEM_RW_SIZE - 1, +/* .end = NANO_PCI_MEM_RW_PHYS + SZ_128K + SZ_8K - 1,*/ + .flags = IORESOURCE_MEM, +}; + +/* + * nanoEngine PCI reports 1 Megabyte of prefetchable memory, but it + * overlaps with previously defined memory. + * + * Here is what happens: + * +# dmesg +... +pci 0000:00:00.0: [8086:1209] type 0 class 0x000200 +pci 0000:00:00.0: reg 10: [mem 0x00021000-0x00021fff] +pci 0000:00:00.0: reg 14: [io 0x0000-0x003f] +pci 0000:00:00.0: reg 18: [mem 0x00000000-0x0001ffff] +pci 0000:00:00.0: reg 30: [mem 0x00000000-0x000fffff pref] +pci 0000:00:00.0: supports D1 D2 +pci 0000:00:00.0: PME# supported from D0 D1 D2 D3hot +pci 0000:00:00.0: PME# disabled +PCI: bus0: Fast back to back transfers enabled +pci 0000:00:00.0: BAR 6: can't assign mem pref (size 0x100000) +pci 0000:00:00.0: BAR 2: assigned [mem 0x18600000-0x1861ffff] +pci 0000:00:00.0: BAR 2: set to [mem 0x18600000-0x1861ffff] (PCI address [0x0-0x1ffff]) +pci 0000:00:00.0: BAR 0: assigned [mem 0x18620000-0x18620fff] +pci 0000:00:00.0: BAR 0: set to [mem 0x18620000-0x18620fff] (PCI address [0x20000-0x20fff]) +pci 0000:00:00.0: BAR 1: assigned [io 0x0400-0x043f] +pci 0000:00:00.0: BAR 1: set to [io 0x0400-0x043f] (PCI address [0x0-0x3f]) + * + * On the other hand, if we do not request the prefetchable memory resource, + * linux will alloc it first and the two non-prefetchable memory areas that + * are our real interest will not be mapped. So we choose to map it to an + * unused area. It gets recognized as expansion ROM, but becomes disabled. + * + * Here is what happens then: + * +# dmesg +... +pci 0000:00:00.0: [8086:1209] type 0 class 0x000200 +pci 0000:00:00.0: reg 10: [mem 0x00021000-0x00021fff] +pci 0000:00:00.0: reg 14: [io 0x0000-0x003f] +pci 0000:00:00.0: reg 18: [mem 0x00000000-0x0001ffff] +pci 0000:00:00.0: reg 30: [mem 0x00000000-0x000fffff pref] +pci 0000:00:00.0: supports D1 D2 +pci 0000:00:00.0: PME# supported from D0 D1 D2 D3hot +pci 0000:00:00.0: PME# disabled +PCI: bus0: Fast back to back transfers enabled +pci 0000:00:00.0: BAR 6: assigned [mem 0x78000000-0x780fffff pref] +pci 0000:00:00.0: BAR 2: assigned [mem 0x18600000-0x1861ffff] +pci 0000:00:00.0: BAR 2: set to [mem 0x18600000-0x1861ffff] (PCI address [0x0-0x1ffff]) +pci 0000:00:00.0: BAR 0: assigned [mem 0x18620000-0x18620fff] +pci 0000:00:00.0: BAR 0: set to [mem 0x18620000-0x18620fff] (PCI address [0x20000-0x20fff]) +pci 0000:00:00.0: BAR 1: assigned [io 0x0400-0x043f] +pci 0000:00:00.0: BAR 1: set to [io 0x0400-0x043f] (PCI address [0x0-0x3f]) + +# lspci -vv -s 0000:00:00.0 +00:00.0 Class 0200: Device 8086:1209 (rev 09) + Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr+ Stepping- SERR+ FastB2B- DisINTx- + Status: Cap+ 66MHz- UDF- FastB2B+ ParErr- DEVSEL=medium >TAbort- <TAbort- <MAbort- >SERR+ <PERR+ INTx- + Latency: 0 (2000ns min, 14000ns max), Cache Line Size: 32 bytes + Interrupt: pin A routed to IRQ 0 + Region 0: Memory at 18620000 (32-bit, non-prefetchable) [size=4K] + Region 1: I/O ports at 0400 [size=64] + Region 2: [virtual] Memory at 18600000 (32-bit, non-prefetchable) [size=128K] + [virtual] Expansion ROM at 78000000 [disabled] [size=1M] + Capabilities: [dc] Power Management version 2 + Flags: PMEClk- DSI+ D1+ D2+ AuxCurrent=0mA PME(D0+,D1+,D2+,D3hot+,D3cold-) + Status: D0 NoSoftRst- PME-Enable- DSel=0 DScale=2 PME- + Kernel driver in use: e100 + Kernel modules: e100 + * + */ +static struct resource pci_prefetchable_memory = { + .name = "PCI prefetchable", + .start = 0x78000000, + .end = 0x78000000 + NANO_PCI_MEM_RW_SIZE - 1, + .flags = IORESOURCE_MEM | IORESOURCE_PREFETCH, +}; + +static int __init pci_nanoengine_setup_resources(struct resource **resource) +{ + if (request_resource(&ioport_resource, &pci_io_ports)) { + printk(KERN_ERR "PCI: unable to allocate io port region\n"); + return -EBUSY; + } + if (request_resource(&iomem_resource, &pci_non_prefetchable_memory)) { + release_resource(&pci_io_ports); + printk(KERN_ERR "PCI: unable to allocate non prefetchable\n"); + return -EBUSY; + } + if (request_resource(&iomem_resource, &pci_prefetchable_memory)) { + release_resource(&pci_io_ports); + release_resource(&pci_non_prefetchable_memory); + printk(KERN_ERR "PCI: unable to allocate prefetchable\n"); + return -EBUSY; + } + resource[0] = &pci_io_ports; + resource[1] = &pci_non_prefetchable_memory; + resource[2] = &pci_prefetchable_memory; + + return 1; +} + +int __init pci_nanoengine_setup(int nr, struct pci_sys_data *sys) +{ + int ret = 0; + + if (nr == 0) { + sys->mem_offset = NANO_PCI_MEM_RW_PHYS; + sys->io_offset = 0x400; + ret = pci_nanoengine_setup_resources(sys->resource); + /* Enable alternate memory bus master mode, see + * "Intel StrongARM SA1110 Developer's Manual", + * section 10.8, "Alternate Memory Bus Master Mode". */ + GPDR = (GPDR & ~GPIO_MBREQ) | GPIO_MBGNT; + GAFR |= GPIO_MBGNT | GPIO_MBREQ; + TUCR |= TUCR_MBGPIO; + } + + return ret; +} + +static struct hw_pci nanoengine_pci __initdata = { + .map_irq = pci_nanoengine_map_irq, + .nr_controllers = 1, + .scan = pci_nanoengine_scan_bus, + .setup = pci_nanoengine_setup, +}; + +static int __init nanoengine_pci_init(void) +{ + if (machine_is_nanoengine()) + pci_common_init(&nanoengine_pci); + return 0; +} + +subsys_initcall(nanoengine_pci_init); diff --git a/arch/arm/mach-sa1100/simpad.c b/arch/arm/mach-sa1100/simpad.c index 27692d0ffbe..cfb76077bd2 100644 --- a/arch/arm/mach-sa1100/simpad.c +++ b/arch/arm/mach-sa1100/simpad.c @@ -166,9 +166,6 @@ static void __init simpad_map_io(void) PCFR = 0; PSDR = 0; - sa11x0_register_mtd(&simpad_flash_data, simpad_flash_resources, - ARRAY_SIZE(simpad_flash_resources)); - sa11x0_register_mcp(&simpad_mcp_data); } static void simpad_power_off(void) @@ -216,6 +213,10 @@ static int __init simpad_init(void) pm_power_off = simpad_power_off; + sa11x0_register_mtd(&simpad_flash_data, simpad_flash_resources, + ARRAY_SIZE(simpad_flash_resources)); + sa11x0_register_mcp(&simpad_mcp_data); + ret = platform_add_devices(devices, ARRAY_SIZE(devices)); if(ret) printk(KERN_WARNING "simpad: Unable to register mq200 framebuffer device"); diff --git a/arch/arm/mach-sa1100/time.c b/arch/arm/mach-sa1100/time.c index 74b6e0e570b..ae4f3d80416 100644 --- a/arch/arm/mach-sa1100/time.c +++ b/arch/arm/mach-sa1100/time.c @@ -12,12 +12,39 @@ #include <linux/errno.h> #include <linux/interrupt.h> #include <linux/irq.h> +#include <linux/sched.h> /* just for sched_clock() - funny that */ #include <linux/timex.h> #include <linux/clockchips.h> #include <asm/mach/time.h> +#include <asm/sched_clock.h> #include <mach/hardware.h> +/* + * This is the SA11x0 sched_clock implementation. + */ +static DEFINE_CLOCK_DATA(cd); + +/* + * Constants generated by clocks_calc_mult_shift(m, s, 3.6864MHz, + * NSEC_PER_SEC, 60). + * This gives a resolution of about 271ns and a wrap period of about 19min. + */ +#define SC_MULT 2275555556u +#define SC_SHIFT 23 + +unsigned long long notrace sched_clock(void) +{ + u32 cyc = OSCR; + return cyc_to_fixed_sched_clock(&cd, cyc, (u32)~0, SC_MULT, SC_SHIFT); +} + +static void notrace sa1100_update_sched_clock(void) +{ + u32 cyc = OSCR; + update_sched_clock(&cd, cyc, (u32)~0); +} + #define MIN_OSCR_DELTA 2 static irqreturn_t sa1100_ost0_interrupt(int irq, void *dev_id) @@ -81,7 +108,6 @@ static struct clocksource cksrc_sa1100_oscr = { .rating = 200, .read = sa1100_read_oscr, .mask = CLOCKSOURCE_MASK(32), - .shift = 20, .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; @@ -97,6 +123,9 @@ static void __init sa1100_timer_init(void) OIER = 0; /* disable any timer interrupts */ OSSR = 0xf; /* clear status on all timers */ + init_fixed_sched_clock(&cd, sa1100_update_sched_clock, 32, + 3686400, SC_MULT, SC_SHIFT); + ckevt_sa1100_osmr0.mult = div_sc(3686400, NSEC_PER_SEC, ckevt_sa1100_osmr0.shift); ckevt_sa1100_osmr0.max_delta_ns = @@ -105,12 +134,9 @@ static void __init sa1100_timer_init(void) clockevent_delta2ns(MIN_OSCR_DELTA * 2, &ckevt_sa1100_osmr0) + 1; ckevt_sa1100_osmr0.cpumask = cpumask_of(0); - cksrc_sa1100_oscr.mult = - clocksource_hz2mult(CLOCK_TICK_RATE, cksrc_sa1100_oscr.shift); - setup_irq(IRQ_OST0, &sa1100_timer_irq); - clocksource_register(&cksrc_sa1100_oscr); + clocksource_register_hz(&cksrc_sa1100_oscr, CLOCK_TICK_RATE); clockevents_register_device(&ckevt_sa1100_osmr0); } diff --git a/arch/arm/mach-shmobile/board-ap4evb.c b/arch/arm/mach-shmobile/board-ap4evb.c index d440e5f456a..ac429ff2c20 100644 --- a/arch/arm/mach-shmobile/board-ap4evb.c +++ b/arch/arm/mach-shmobile/board-ap4evb.c @@ -61,6 +61,7 @@ #include <asm/mach/arch.h> #include <asm/mach/map.h> #include <asm/mach/time.h> +#include <asm/setup.h> /* * Address Interface BusWidth note diff --git a/arch/arm/mach-shmobile/include/mach/entry-macro.S b/arch/arm/mach-shmobile/include/mach/entry-macro.S index a285d13c741..f428c4db2b6 100644 --- a/arch/arm/mach-shmobile/include/mach/entry-macro.S +++ b/arch/arm/mach-shmobile/include/mach/entry-macro.S @@ -1,4 +1,5 @@ /* + * Copyright (C) 2010 Magnus Damm * Copyright (C) 2008 Renesas Solutions Corp. * * This program is free software; you can redistribute it and/or modify @@ -14,24 +15,45 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ -#include <mach/hardware.h> #include <mach/irqs.h> +#define INTCA_BASE 0xe6980000 +#define INTFLGA_OFFS 0x00000018 /* accept pending interrupt */ +#define INTEVTA_OFFS 0x00000020 /* vector number of accepted interrupt */ +#define INTLVLA_OFFS 0x00000030 /* priority level of accepted interrupt */ +#define INTLVLB_OFFS 0x00000034 /* previous priority level */ + .macro disable_fiq .endm .macro get_irqnr_preamble, base, tmp - ldr \base, =INTFLGA + ldr \base, =INTCA_BASE .endm .macro arch_ret_to_user, tmp1, tmp2 .endm .macro get_irqnr_and_base, irqnr, irqstat, base, tmp - ldr \irqnr, [\base] + /* The single INTFLGA read access below results in the following: + * + * 1. INTLVLB is updated with old priority value from INTLVLA + * 2. Highest priority interrupt is accepted + * 3. INTLVLA is updated to contain priority of accepted interrupt + * 4. Accepted interrupt vector is stored in INTFLGA and INTEVTA + */ + ldr \irqnr, [\base, #INTFLGA_OFFS] + + /* Restore INTLVLA with the value saved in INTLVLB. + * This is required to support interrupt priorities properly. + */ + ldrb \tmp, [\base, #INTLVLB_OFFS] + strb \tmp, [\base, #INTLVLA_OFFS] + + /* Handle invalid vector number case */ cmp \irqnr, #0 beq 1000f - /* intevt to irq number */ + + /* Convert vector to irq number, same as the evt2irq() macro */ lsr \irqnr, \irqnr, #0x5 subs \irqnr, \irqnr, #16 diff --git a/arch/arm/mach-shmobile/include/mach/head-ap4evb.txt b/arch/arm/mach-shmobile/include/mach/head-ap4evb.txt new file mode 100644 index 00000000000..e3ebfa73956 --- /dev/null +++ b/arch/arm/mach-shmobile/include/mach/head-ap4evb.txt @@ -0,0 +1,87 @@ +LIST "partner-jet-setup.txt" +LIST "(C) Copyright 2010 Renesas Solutions Corp" +LIST "Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>" + +LIST "RWT Setting" +EW 0xE6020004, 0xA500 +EW 0xE6030004, 0xA500 + +DD 0x01001000, 0x01001000 + +LIST "GPIO Setting" +EB 0xE6051013, 0xA2 + +LIST "CPG" +ED 0xE6150080, 0x00000180 +ED 0xE61500C0, 0x00000002 + +WAIT 1, 0xFE40009C + +LIST "FRQCR" +ED 0xE6150000, 0x2D1305C3 +ED 0xE61500E0, 0x9E40358E +ED 0xE6150004, 0x80331050 + +WAIT 1, 0xFE40009C + +ED 0xE61500E4, 0x00002000 + +WAIT 1, 0xFE40009C + +LIST "PLL" +ED 0xE6150028, 0x00004000 + +WAIT 1, 0xFE40009C + +ED 0xE615002C, 0x93000040 + +WAIT 1, 0xFE40009C + +LIST "BSC" +ED 0xFEC10000, 0x00E0001B + +LIST "SBSC1" +ED 0xFE400354, 0x01AD8000 +ED 0xFE400354, 0x01AD8001 + +WAIT 5, 0xFE40009C + +ED 0xFE400008, 0xBCC90151 +ED 0xFE400040, 0x41774113 +ED 0xFE400044, 0x2712E229 +ED 0xFE400048, 0x20C18505 +ED 0xFE40004C, 0x00110209 +ED 0xFE400010, 0x00000087 + +WAIT 10, 0xFE40009C + +ED 0xFE400084, 0x0000003F +EB 0xFE500000, 0x00 + +WAIT 5, 0xFE40009C + +ED 0xFE400084, 0x0000FF0A +EB 0xFE500000, 0x00 + +WAIT 1, 0xFE40009C + +ED 0xFE400084, 0x00002201 +EB 0xFE500000, 0x00 +ED 0xFE400084, 0x00000302 +EB 0xFE500000, 0x00 +EB 0xFE5C0000, 0x00 +ED 0xFE400008, 0xBCC90159 +ED 0xFE40008C, 0x88800004 +ED 0xFE400094, 0x00000004 +ED 0xFE400028, 0xA55A0032 +ED 0xFE40002C, 0xA55A000C +ED 0xFE400020, 0xA55A2048 +ED 0xFE400008, 0xBCC90959 + +LIST "Change CPGA setting" +ED 0xE61500E0, 0x9E40352E +ED 0xE6150004, 0x80331050 + +WAIT 1, 0xFE40009C + +ED 0xE6150354, 0x00000002 diff --git a/arch/arm/mach-shmobile/include/mach/vmalloc.h b/arch/arm/mach-shmobile/include/mach/vmalloc.h index 4aecf6e3a85..2b8fd8b942f 100644 --- a/arch/arm/mach-shmobile/include/mach/vmalloc.h +++ b/arch/arm/mach-shmobile/include/mach/vmalloc.h @@ -2,6 +2,6 @@ #define __ASM_MACH_VMALLOC_H /* Vmalloc at ... - 0xe5ffffff */ -#define VMALLOC_END 0xe6000000 +#define VMALLOC_END 0xe6000000UL #endif /* __ASM_MACH_VMALLOC_H */ diff --git a/arch/arm/mach-shmobile/include/mach/zboot.h b/arch/arm/mach-shmobile/include/mach/zboot.h new file mode 100644 index 00000000000..3ad86b7708e --- /dev/null +++ b/arch/arm/mach-shmobile/include/mach/zboot.h @@ -0,0 +1,20 @@ +#ifndef ZBOOT_H +#define ZBOOT_H + +#include <asm/mach-types.h> +#include <mach/zboot_macros.h> + +/************************************************** + * + * board specific settings + * + **************************************************/ + +#ifdef CONFIG_MACH_AP4EVB +#define MACH_TYPE MACH_TYPE_AP4EVB +#include "mach/head-ap4evb.txt" +#else +#error "unsupported board." +#endif + +#endif /* ZBOOT_H */ diff --git a/arch/arm/mach-shmobile/include/mach/zboot_macros.h b/arch/arm/mach-shmobile/include/mach/zboot_macros.h new file mode 100644 index 00000000000..aa6111fbc98 --- /dev/null +++ b/arch/arm/mach-shmobile/include/mach/zboot_macros.h @@ -0,0 +1,65 @@ +#ifndef __ZBOOT_MACRO_H +#define __ZBOOT_MACRO_H + +/* The LIST command is used to include comments in the script */ +.macro LIST comment +.endm + +/* The ED command is used to write a 32-bit word */ +.macro ED, addr, data + LDR r0, 1f + LDR r1, 2f + STR r1, [r0] + B 3f +1 : .long \addr +2 : .long \data +3 : +.endm + +/* The EW command is used to write a 16-bit word */ +.macro EW, addr, data + LDR r0, 1f + LDR r1, 2f + STRH r1, [r0] + B 3f +1 : .long \addr +2 : .long \data +3 : +.endm + +/* The EB command is used to write an 8-bit word */ +.macro EB, addr, data + LDR r0, 1f + LDR r1, 2f + STRB r1, [r0] + B 3f +1 : .long \addr +2 : .long \data +3 : +.endm + +/* The WAIT command is used to delay the execution */ +.macro WAIT, time, reg + LDR r1, 1f + LDR r0, 2f + STR r0, [r1] +10 : + LDR r0, [r1] + CMP r0, #0x00000000 + BNE 10b + NOP + B 3f +1 : .long \reg +2 : .long \time * 100 +3 : +.endm + +/* The DD command is used to read a 32-bit word */ +.macro DD, start, end + LDR r1, 1f + B 2f +1 : .long \start +2 : +.endm + +#endif /* __ZBOOT_MACRO_H */ diff --git a/arch/arm/mach-tcc8k/time.c b/arch/arm/mach-tcc8k/time.c index 78d06008841..e0a8d609afe 100644 --- a/arch/arm/mach-tcc8k/time.c +++ b/arch/arm/mach-tcc8k/time.c @@ -35,7 +35,6 @@ static struct clocksource clocksource_tcc = { .rating = 200, .read = tcc_get_cycles, .mask = CLOCKSOURCE_MASK(32), - .shift = 28, .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; @@ -103,9 +102,7 @@ static int __init tcc_clockevent_init(struct clk *clock) { unsigned int c = clk_get_rate(clock); - clocksource_tcc.mult = clocksource_hz2mult(c, - clocksource_tcc.shift); - clocksource_register(&clocksource_tcc); + clocksource_register_hz(&clocksource_tcc, c); clockevent_tcc.mult = div_sc(c, NSEC_PER_SEC, clockevent_tcc.shift); diff --git a/arch/arm/mach-tegra/include/mach/debug-macro.S b/arch/arm/mach-tegra/include/mach/debug-macro.S index 8ea3bffb4e0..a0e7c12868b 100644 --- a/arch/arm/mach-tegra/include/mach/debug-macro.S +++ b/arch/arm/mach-tegra/include/mach/debug-macro.S @@ -21,8 +21,8 @@ #include <mach/io.h> .macro addruart, rp, rv - ldreq \rp, =IO_APB_PHYS @ physical - ldrne \rv, =IO_APB_VIRT @ virtual + ldr \rp, =IO_APB_PHYS @ physical + ldr \rv, =IO_APB_VIRT @ virtual #if defined(CONFIG_TEGRA_DEBUG_UART_NONE) #error "A debug UART must be selected in the kernel config to use DEBUG_LL" #elif defined(CONFIG_TEGRA_DEBUG_UARTA) diff --git a/arch/arm/mach-tegra/include/mach/entry-macro.S b/arch/arm/mach-tegra/include/mach/entry-macro.S index 2ba9e5c9d2f..dd165c53889 100644 --- a/arch/arm/mach-tegra/include/mach/entry-macro.S +++ b/arch/arm/mach-tegra/include/mach/entry-macro.S @@ -16,8 +16,8 @@ #include <mach/io.h> #if defined(CONFIG_ARM_GIC) - -#include <asm/hardware/gic.h> +#define HAVE_GET_IRQNR_PREAMBLE +#include <asm/hardware/entry-macro-gic.S> /* Uses the GIC interrupt controller built into the cpu */ #define ICTRL_BASE (IO_CPU_VIRT + 0x100) @@ -32,68 +32,6 @@ .macro arch_ret_to_user, tmp1, tmp2 .endm - - /* - * The interrupt numbering scheme is defined in the - * interrupt controller spec. To wit: - * - * Interrupts 0-15 are IPI - * 16-28 are reserved - * 29-31 are local. We allow 30 to be used for the watchdog. - * 32-1020 are global - * 1021-1022 are reserved - * 1023 is "spurious" (no interrupt) - * - * For now, we ignore all local interrupts so only return an interrupt - * if it's between 30 and 1020. The test_for_ipi routine below will - * pick up on IPIs. - * - * A simple read from the controller will tell us the number of the - * highest priority enabled interrupt. We then just need to check - * whether it is in the valid range for an IRQ (30-1020 inclusive). - */ - - .macro get_irqnr_and_base, irqnr, irqstat, base, tmp - - /* bits 12-10 = src CPU, 9-0 = int # */ - ldr \irqstat, [\base, #GIC_CPU_INTACK] - - ldr \tmp, =1021 - - bic \irqnr, \irqstat, #0x1c00 - - cmp \irqnr, #29 - cmpcc \irqnr, \irqnr - cmpne \irqnr, \tmp - cmpcs \irqnr, \irqnr - - .endm - - /* We assume that irqstat (the raw value of the IRQ acknowledge - * register) is preserved from the macro above. - * If there is an IPI, we immediately signal end of interrupt on the - * controller, since this requires the original irqstat value which - * we won't easily be able to recreate later. - */ - - .macro test_for_ipi, irqnr, irqstat, base, tmp - bic \irqnr, \irqstat, #0x1c00 - cmp \irqnr, #16 - strcc \irqstat, [\base, #GIC_CPU_EOI] - cmpcs \irqnr, \irqnr - .endm - - /* As above, this assumes that irqstat and base are preserved.. */ - - .macro test_for_ltirq, irqnr, irqstat, base, tmp - bic \irqnr, \irqstat, #0x1c00 - mov \tmp, #0 - cmp \irqnr, #29 - moveq \tmp, #1 - streq \irqstat, [\base, #GIC_CPU_EOI] - cmp \tmp, #0 - .endm - #else /* legacy interrupt controller for AP16 */ .macro disable_fiq diff --git a/arch/arm/mach-tegra/include/mach/io.h b/arch/arm/mach-tegra/include/mach/io.h index f0981b1ac59..4cea2230c8d 100644 --- a/arch/arm/mach-tegra/include/mach/io.h +++ b/arch/arm/mach-tegra/include/mach/io.h @@ -65,8 +65,8 @@ #ifndef __ASSEMBLER__ -#define __arch_ioremap(p, s, t) tegra_ioremap(p, s, t) -#define __arch_iounmap(v) tegra_iounmap(v) +#define __arch_ioremap tegra_ioremap +#define __arch_iounmap tegra_iounmap void __iomem *tegra_ioremap(unsigned long phys, size_t size, unsigned int type); void tegra_iounmap(volatile void __iomem *addr); diff --git a/arch/arm/mach-tegra/irq.c b/arch/arm/mach-tegra/irq.c index 50a8dfb9a0c..5407de01abf 100644 --- a/arch/arm/mach-tegra/irq.c +++ b/arch/arm/mach-tegra/irq.c @@ -94,8 +94,8 @@ void __init tegra_init_irq(void) writel(0, ictlr_to_virt(i) + ICTLR_CPU_IEP_CLASS); } - gic_dist_init(0, IO_ADDRESS(TEGRA_ARM_INT_DIST_BASE), 29); - gic_cpu_init(0, IO_ADDRESS(TEGRA_ARM_PERIF_BASE + 0x100)); + gic_init(0, 29, IO_ADDRESS(TEGRA_ARM_INT_DIST_BASE), + IO_ADDRESS(TEGRA_ARM_PERIF_BASE + 0x100)); gic = get_irq_chip(29); gic_unmask_irq = gic->unmask; diff --git a/arch/arm/mach-tegra/platsmp.c b/arch/arm/mach-tegra/platsmp.c index c729cd72cc3..ec1f68924ed 100644 --- a/arch/arm/mach-tegra/platsmp.c +++ b/arch/arm/mach-tegra/platsmp.c @@ -45,7 +45,7 @@ void __cpuinit platform_secondary_init(unsigned int cpu) * core (e.g. timer irq), then they will not have been enabled * for us: do so */ - gic_cpu_init(0, IO_ADDRESS(TEGRA_ARM_PERIF_BASE) + 0x100); + gic_secondary_init(0); /* * Synchronise with the boot thread. diff --git a/arch/arm/mach-tegra/timer.c b/arch/arm/mach-tegra/timer.c index 9057d6fd1d3..7b8ad1f98f4 100644 --- a/arch/arm/mach-tegra/timer.c +++ b/arch/arm/mach-tegra/timer.c @@ -18,6 +18,7 @@ */ #include <linux/init.h> +#include <linux/sched.h> #include <linux/time.h> #include <linux/interrupt.h> #include <linux/irq.h> @@ -25,10 +26,10 @@ #include <linux/clocksource.h> #include <linux/clk.h> #include <linux/io.h> -#include <linux/cnt32_to_63.h> #include <asm/mach/time.h> #include <asm/localtimer.h> +#include <asm/sched_clock.h> #include <mach/iomap.h> #include <mach/irqs.h> @@ -91,7 +92,7 @@ static void tegra_timer_set_mode(enum clock_event_mode mode, static cycle_t tegra_clocksource_read(struct clocksource *cs) { - return cnt32_to_63(timer_readl(TIMERUS_CNTR_1US)); + return timer_readl(TIMERUS_CNTR_1US); } static struct clock_event_device tegra_clockevent = { @@ -106,14 +107,29 @@ static struct clocksource tegra_clocksource = { .name = "timer_us", .rating = 300, .read = tegra_clocksource_read, - .mask = 0x7FFFFFFFFFFFFFFFULL, + .mask = CLOCKSOURCE_MASK(32), .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; -unsigned long long sched_clock(void) +static DEFINE_CLOCK_DATA(cd); + +/* + * Constants generated by clocks_calc_mult_shift(m, s, 1MHz, NSEC_PER_SEC, 60). + * This gives a resolution of about 1us and a wrap period of about 1h11min. + */ +#define SC_MULT 4194304000u +#define SC_SHIFT 22 + +unsigned long long notrace sched_clock(void) { - return clocksource_cyc2ns(tegra_clocksource.read(&tegra_clocksource), - tegra_clocksource.mult, tegra_clocksource.shift); + u32 cyc = timer_readl(TIMERUS_CNTR_1US); + return cyc_to_fixed_sched_clock(&cd, cyc, (u32)~0, SC_MULT, SC_SHIFT); +} + +static void notrace tegra_update_sched_clock(void) +{ + u32 cyc = timer_readl(TIMERUS_CNTR_1US); + update_sched_clock(&cd, cyc, (u32)~0); } static irqreturn_t tegra_timer_interrupt(int irq, void *dev_id) @@ -158,6 +174,9 @@ static void __init tegra_init_timer(void) WARN(1, "Unknown clock rate"); } + init_fixed_sched_clock(&cd, tegra_update_sched_clock, 32, + 1000000, SC_MULT, SC_SHIFT); + if (clocksource_register_hz(&tegra_clocksource, 1000000)) { printk(KERN_ERR "Failed to register clocksource\n"); BUG(); diff --git a/arch/arm/mach-u300/timer.c b/arch/arm/mach-u300/timer.c index 3fc4472719b..3ec58bd2d6e 100644 --- a/arch/arm/mach-u300/timer.c +++ b/arch/arm/mach-u300/timer.c @@ -9,6 +9,7 @@ * Author: Linus Walleij <linus.walleij@stericsson.com> */ #include <linux/interrupt.h> +#include <linux/sched.h> #include <linux/time.h> #include <linux/timex.h> #include <linux/clockchips.h> @@ -21,6 +22,7 @@ #include <mach/hardware.h> /* Generic stuff */ +#include <asm/sched_clock.h> #include <asm/mach/map.h> #include <asm/mach/time.h> #include <asm/mach/irq.h> @@ -352,12 +354,18 @@ static struct clocksource clocksource_u300_1mhz = { * this wraps around for now, since it is just a relative time * stamp. (Inspired by OMAP implementation.) */ +static DEFINE_CLOCK_DATA(cd); + unsigned long long notrace sched_clock(void) { - return clocksource_cyc2ns(clocksource_u300_1mhz.read( - &clocksource_u300_1mhz), - clocksource_u300_1mhz.mult, - clocksource_u300_1mhz.shift); + u32 cyc = readl(U300_TIMER_APP_VBASE + U300_TIMER_APP_GPT2CC); + return cyc_to_sched_clock(&cd, cyc, (u32)~0); +} + +static void notrace u300_update_sched_clock(void) +{ + u32 cyc = readl(U300_TIMER_APP_VBASE + U300_TIMER_APP_GPT2CC); + update_sched_clock(&cd, cyc, (u32)~0); } @@ -375,6 +383,8 @@ static void __init u300_timer_init(void) clk_enable(clk); rate = clk_get_rate(clk); + init_sched_clock(&cd, u300_update_sched_clock, 32, rate); + /* * Disable the "OS" and "DD" timers - these are designed for Symbian! * Example usage in cnh1601578 cpu subsystem pd_timer_app.c @@ -412,9 +422,7 @@ static void __init u300_timer_init(void) writel(U300_TIMER_APP_EGPT2_TIMER_ENABLE, U300_TIMER_APP_VBASE + U300_TIMER_APP_EGPT2); - clocksource_calc_mult_shift(&clocksource_u300_1mhz, - rate, APPTIMER_MIN_RANGE); - if (clocksource_register(&clocksource_u300_1mhz)) + if (clocksource_register_hz(&clocksource_u300_1mhz, rate)) printk(KERN_ERR "timer: failed to initialize clock " "source %s\n", clocksource_u300_1mhz.name); diff --git a/arch/arm/mach-ux500/cpu.c b/arch/arm/mach-ux500/cpu.c index 608a1372b17..7328c017976 100644 --- a/arch/arm/mach-ux500/cpu.c +++ b/arch/arm/mach-ux500/cpu.c @@ -61,8 +61,8 @@ void __init ux500_init_devices(void) void __init ux500_init_irq(void) { - gic_dist_init(0, __io_address(UX500_GIC_DIST_BASE), 29); - gic_cpu_init(0, __io_address(UX500_GIC_CPU_BASE)); + gic_init(0, 29, __io_address(UX500_GIC_DIST_BASE), + __io_address(UX500_GIC_CPU_BASE)); /* * Init clocks here so that they are available for system timer diff --git a/arch/arm/mach-ux500/include/mach/entry-macro.S b/arch/arm/mach-ux500/include/mach/entry-macro.S index 60ea88db828..a37f585a3ec 100644 --- a/arch/arm/mach-ux500/include/mach/entry-macro.S +++ b/arch/arm/mach-ux500/include/mach/entry-macro.S @@ -11,7 +11,8 @@ * warranty of any kind, whether express or implied. */ #include <mach/hardware.h> -#include <asm/hardware/gic.h> +#define HAVE_GET_IRQNR_PREAMBLE +#include <asm/hardware/entry-macro-gic.S> .macro disable_fiq .endm @@ -22,68 +23,3 @@ .macro arch_ret_to_user, tmp1, tmp2 .endm - - /* - * The interrupt numbering scheme is defined in the - * interrupt controller spec. To wit: - * - * Interrupts 0-15 are IPI - * 16-28 are reserved - * 29-31 are local. We allow 30 to be used for the watchdog. - * 32-1020 are global - * 1021-1022 are reserved - * 1023 is "spurious" (no interrupt) - * - * For now, we ignore all local interrupts so only return an - * interrupt if it's between 30 and 1020. The test_for_ipi - * routine below will pick up on IPIs. - * - * A simple read from the controller will tell us the number - * of the highest priority enabled interrupt. We then just - * need to check whether it is in the valid range for an - * IRQ (30-1020 inclusive). - */ - - .macro get_irqnr_and_base, irqnr, irqstat, base, tmp - - /* bits 12-10 = src CPU, 9-0 = int # */ - ldr \irqstat, [\base, #GIC_CPU_INTACK] - - ldr \tmp, =1021 - - bic \irqnr, \irqstat, #0x1c00 - - cmp \irqnr, #29 - cmpcc \irqnr, \irqnr - cmpne \irqnr, \tmp - cmpcs \irqnr, \irqnr - - .endm - - /* We assume that irqstat (the raw value of the IRQ - * acknowledge register) is preserved from the macro above. - * If there is an IPI, we immediately signal end of - * interrupt on the controller, since this requires the - * original irqstat value which we won't easily be able - * to recreate later. - */ - - .macro test_for_ipi, irqnr, irqstat, base, tmp - bic \irqnr, \irqstat, #0x1c00 - cmp \irqnr, #16 - strcc \irqstat, [\base, #GIC_CPU_EOI] - cmpcs \irqnr, \irqnr - .endm - - /* As above, this assumes that irqstat and base - * are preserved.. - */ - - .macro test_for_ltirq, irqnr, irqstat, base, tmp - bic \irqnr, \irqstat, #0x1c00 - mov \tmp, #0 - cmp \irqnr, #29 - moveq \tmp, #1 - streq \irqstat, [\base, #GIC_CPU_EOI] - cmp \tmp, #0 - .endm diff --git a/arch/arm/mach-ux500/platsmp.c b/arch/arm/mach-ux500/platsmp.c index f71175a766d..2115a0cf07b 100644 --- a/arch/arm/mach-ux500/platsmp.c +++ b/arch/arm/mach-ux500/platsmp.c @@ -49,7 +49,7 @@ void __cpuinit platform_secondary_init(unsigned int cpu) * core (e.g. timer irq), then they will not have been enabled * for us: do so */ - gic_cpu_init(0, __io_address(UX500_GIC_CPU_BASE)); + gic_secondary_init(0); /* * let the primary processor know we're out of the diff --git a/arch/arm/mach-versatile/core.c b/arch/arm/mach-versatile/core.c index 8c1ca1d6353..13a83e45a33 100644 --- a/arch/arm/mach-versatile/core.c +++ b/arch/arm/mach-versatile/core.c @@ -48,7 +48,9 @@ #include <asm/mach/map.h> #include <mach/hardware.h> #include <mach/platform.h> -#include <plat/timer-sp.h> +#include <asm/hardware/timer-sp.h> + +#include <plat/sched_clock.h> #include "core.h" @@ -885,6 +887,12 @@ void __init versatile_init(void) } /* + * The sched_clock counter + */ +#define REFCOUNTER (__io_address(VERSATILE_SYS_BASE) + \ + VERSATILE_SYS_24MHz_OFFSET) + +/* * Where is the timer (VA)? */ #define TIMER0_VA_BASE __io_address(VERSATILE_TIMER0_1_BASE) @@ -899,6 +907,8 @@ static void __init versatile_timer_init(void) { u32 val; + versatile_sched_clock_init(REFCOUNTER, 24000000); + /* * set clock frequency: * VERSATILE_REFCLK is 32KHz diff --git a/arch/arm/mach-vexpress/core.h b/arch/arm/mach-vexpress/core.h index 57dd95ce41f..362780d868d 100644 --- a/arch/arm/mach-vexpress/core.h +++ b/arch/arm/mach-vexpress/core.h @@ -22,5 +22,3 @@ struct map_desc; void v2m_map_io(struct map_desc *tile, size_t num); extern struct sys_timer v2m_timer; - -extern void __iomem *gic_cpu_base_addr; diff --git a/arch/arm/mach-vexpress/ct-ca9x4.c b/arch/arm/mach-vexpress/ct-ca9x4.c index f4455e3ed6a..e628402b754 100644 --- a/arch/arm/mach-vexpress/ct-ca9x4.c +++ b/arch/arm/mach-vexpress/ct-ca9x4.c @@ -20,7 +20,7 @@ #include <mach/ct-ca9x4.h> -#include <plat/timer-sp.h> +#include <asm/hardware/timer-sp.h> #include <asm/mach/arch.h> #include <asm/mach/map.h> @@ -59,13 +59,10 @@ static void __init ct_ca9x4_map_io(void) v2m_map_io(ct_ca9x4_io_desc, ARRAY_SIZE(ct_ca9x4_io_desc)); } -void __iomem *gic_cpu_base_addr; - static void __init ct_ca9x4_init_irq(void) { - gic_cpu_base_addr = MMIO_P2V(A9_MPCORE_GIC_CPU); - gic_dist_init(0, MMIO_P2V(A9_MPCORE_GIC_DIST), 29); - gic_cpu_init(0, gic_cpu_base_addr); + gic_init(0, 29, MMIO_P2V(A9_MPCORE_GIC_DIST), + MMIO_P2V(A9_MPCORE_GIC_CPU)); } #if 0 diff --git a/arch/arm/mach-vexpress/headsmp.S b/arch/arm/mach-vexpress/headsmp.S index 8a78ff68e1e..7a3f0632947 100644 --- a/arch/arm/mach-vexpress/headsmp.S +++ b/arch/arm/mach-vexpress/headsmp.S @@ -35,5 +35,6 @@ pen: ldr r7, [r6] */ b secondary_startup + .align 1: .long . .long pen_release diff --git a/arch/arm/mach-vexpress/include/mach/entry-macro.S b/arch/arm/mach-vexpress/include/mach/entry-macro.S index 20e9fb514f0..73c11297509 100644 --- a/arch/arm/mach-vexpress/include/mach/entry-macro.S +++ b/arch/arm/mach-vexpress/include/mach/entry-macro.S @@ -1,67 +1,7 @@ -#include <asm/hardware/gic.h> +#include <asm/hardware/entry-macro-gic.S> .macro disable_fiq .endm - .macro get_irqnr_preamble, base, tmp - ldr \base, =gic_cpu_base_addr - ldr \base, [\base] - .endm - .macro arch_ret_to_user, tmp1, tmp2 .endm - - /* - * The interrupt numbering scheme is defined in the - * interrupt controller spec. To wit: - * - * Interrupts 0-15 are IPI - * 16-28 are reserved - * 29-31 are local. We allow 30 to be used for the watchdog. - * 32-1020 are global - * 1021-1022 are reserved - * 1023 is "spurious" (no interrupt) - * - * For now, we ignore all local interrupts so only return an interrupt if it's - * between 30 and 1020. The test_for_ipi routine below will pick up on IPIs. - * - * A simple read from the controller will tell us the number of the highest - * priority enabled interrupt. We then just need to check whether it is in the - * valid range for an IRQ (30-1020 inclusive). - */ - - .macro get_irqnr_and_base, irqnr, irqstat, base, tmp - ldr \irqstat, [\base, #GIC_CPU_INTACK] /* bits 12-10 = src CPU, 9-0 = int # */ - ldr \tmp, =1021 - bic \irqnr, \irqstat, #0x1c00 - cmp \irqnr, #29 - cmpcc \irqnr, \irqnr - cmpne \irqnr, \tmp - cmpcs \irqnr, \irqnr - .endm - - /* We assume that irqstat (the raw value of the IRQ acknowledge - * register) is preserved from the macro above. - * If there is an IPI, we immediately signal end of interrupt on the - * controller, since this requires the original irqstat value which - * we won't easily be able to recreate later. - */ - - .macro test_for_ipi, irqnr, irqstat, base, tmp - bic \irqnr, \irqstat, #0x1c00 - cmp \irqnr, #16 - strcc \irqstat, [\base, #GIC_CPU_EOI] - cmpcs \irqnr, \irqnr - .endm - - /* As above, this assumes that irqstat and base are preserved.. */ - - .macro test_for_ltirq, irqnr, irqstat, base, tmp - bic \irqnr, \irqstat, #0x1c00 - mov \tmp, #0 - cmp \irqnr, #29 - moveq \tmp, #1 - streq \irqstat, [\base, #GIC_CPU_EOI] - cmp \tmp, #0 - .endm - diff --git a/arch/arm/mach-vexpress/platsmp.c b/arch/arm/mach-vexpress/platsmp.c index 8ce9fef2955..b1687b6abe6 100644 --- a/arch/arm/mach-vexpress/platsmp.c +++ b/arch/arm/mach-vexpress/platsmp.c @@ -61,7 +61,7 @@ void __cpuinit platform_secondary_init(unsigned int cpu) * core (e.g. timer irq), then they will not have been enabled * for us: do so */ - gic_cpu_init(0, gic_cpu_base_addr); + gic_secondary_init(0); /* * let the primary processor know we're out of the diff --git a/arch/arm/mach-vexpress/v2m.c b/arch/arm/mach-vexpress/v2m.c index d374a78986e..a9ed3428a2f 100644 --- a/arch/arm/mach-vexpress/v2m.c +++ b/arch/arm/mach-vexpress/v2m.c @@ -18,10 +18,11 @@ #include <asm/mach/map.h> #include <asm/mach/time.h> #include <asm/hardware/arm_timer.h> +#include <asm/hardware/timer-sp.h> #include <mach/motherboard.h> -#include <plat/timer-sp.h> +#include <plat/sched_clock.h> #include "core.h" @@ -49,6 +50,8 @@ void __init v2m_map_io(struct map_desc *tile, size_t num) static void __init v2m_timer_init(void) { + versatile_sched_clock_init(MMIO_P2V(V2M_SYS_24MHZ), 24000000); + writel(0, MMIO_P2V(V2M_TIMER0) + TIMER_CTRL); writel(0, MMIO_P2V(V2M_TIMER1) + TIMER_CTRL); diff --git a/arch/arm/mach-w90x900/time.c b/arch/arm/mach-w90x900/time.c index b80f769bc13..4b089cb930d 100644 --- a/arch/arm/mach-w90x900/time.c +++ b/arch/arm/mach-w90x900/time.c @@ -153,7 +153,6 @@ static struct clocksource clocksource_nuc900 = { .rating = 200, .read = nuc900_get_cycles, .mask = CLOCKSOURCE_MASK(TDR_SHIFT), - .shift = 10, .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; @@ -176,9 +175,7 @@ static void __init nuc900_clocksource_init(void) val |= (COUNTEN | PERIOD | PRESCALE); __raw_writel(val, REG_TCSR1); - clocksource_nuc900.mult = - clocksource_khz2mult((rate / 1000), clocksource_nuc900.shift); - clocksource_register(&clocksource_nuc900); + clocksource_register_hz(&clocksource_nuc900, rate); } static void __init nuc900_timer_init(void) diff --git a/arch/arm/mm/cache-feroceon-l2.c b/arch/arm/mm/cache-feroceon-l2.c index 6e77c042d8e..e0b0e7a4ec6 100644 --- a/arch/arm/mm/cache-feroceon-l2.c +++ b/arch/arm/mm/cache-feroceon-l2.c @@ -13,13 +13,9 @@ */ #include <linux/init.h> +#include <linux/highmem.h> #include <asm/cacheflush.h> -#include <asm/kmap_types.h> -#include <asm/fixmap.h> -#include <asm/pgtable.h> -#include <asm/tlbflush.h> #include <plat/cache-feroceon-l2.h> -#include "mm.h" /* * Low-level cache maintenance operations. @@ -39,27 +35,30 @@ * between which we don't want to be preempted. */ -static inline unsigned long l2_start_va(unsigned long paddr) +static inline unsigned long l2_get_va(unsigned long paddr) { #ifdef CONFIG_HIGHMEM /* - * Let's do our own fixmap stuff in a minimal way here. * Because range ops can't be done on physical addresses, * we simply install a virtual mapping for it only for the * TLB lookup to occur, hence no need to flush the untouched - * memory mapping. This is protected with the disabling of - * interrupts by the caller. + * memory mapping afterwards (note: a cache flush may happen + * in some circumstances depending on the path taken in kunmap_atomic). */ - unsigned long idx = KM_L2_CACHE + KM_TYPE_NR * smp_processor_id(); - unsigned long vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); - set_pte_ext(TOP_PTE(vaddr), pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL), 0); - local_flush_tlb_kernel_page(vaddr); - return vaddr + (paddr & ~PAGE_MASK); + void *vaddr = kmap_atomic_pfn(paddr >> PAGE_SHIFT); + return (unsigned long)vaddr + (paddr & ~PAGE_MASK); #else return __phys_to_virt(paddr); #endif } +static inline void l2_put_va(unsigned long vaddr) +{ +#ifdef CONFIG_HIGHMEM + kunmap_atomic((void *)vaddr); +#endif +} + static inline void l2_clean_pa(unsigned long addr) { __asm__("mcr p15, 1, %0, c15, c9, 3" : : "r" (addr)); @@ -76,13 +75,14 @@ static inline void l2_clean_pa_range(unsigned long start, unsigned long end) */ BUG_ON((start ^ end) >> PAGE_SHIFT); - raw_local_irq_save(flags); - va_start = l2_start_va(start); + va_start = l2_get_va(start); va_end = va_start + (end - start); + raw_local_irq_save(flags); __asm__("mcr p15, 1, %0, c15, c9, 4\n\t" "mcr p15, 1, %1, c15, c9, 5" : : "r" (va_start), "r" (va_end)); raw_local_irq_restore(flags); + l2_put_va(va_start); } static inline void l2_clean_inv_pa(unsigned long addr) @@ -106,13 +106,14 @@ static inline void l2_inv_pa_range(unsigned long start, unsigned long end) */ BUG_ON((start ^ end) >> PAGE_SHIFT); - raw_local_irq_save(flags); - va_start = l2_start_va(start); + va_start = l2_get_va(start); va_end = va_start + (end - start); + raw_local_irq_save(flags); __asm__("mcr p15, 1, %0, c15, c11, 4\n\t" "mcr p15, 1, %1, c15, c11, 5" : : "r" (va_start), "r" (va_end)); raw_local_irq_restore(flags); + l2_put_va(va_start); } static inline void l2_inv_all(void) diff --git a/arch/arm/mm/cache-v6.S b/arch/arm/mm/cache-v6.S index 99fa688dfad..c96fa1b3f49 100644 --- a/arch/arm/mm/cache-v6.S +++ b/arch/arm/mm/cache-v6.S @@ -203,6 +203,10 @@ ENTRY(v6_flush_kern_dcache_area) * - end - virtual end address of region */ v6_dma_inv_range: +#ifdef CONFIG_DMA_CACHE_RWFO + ldrb r2, [r0] @ read for ownership + strb r2, [r0] @ write for ownership +#endif tst r0, #D_CACHE_LINE_SIZE - 1 bic r0, r0, #D_CACHE_LINE_SIZE - 1 #ifdef HARVARD_CACHE @@ -211,6 +215,10 @@ v6_dma_inv_range: mcrne p15, 0, r0, c7, c11, 1 @ clean unified line #endif tst r1, #D_CACHE_LINE_SIZE - 1 +#ifdef CONFIG_DMA_CACHE_RWFO + ldrneb r2, [r1, #-1] @ read for ownership + strneb r2, [r1, #-1] @ write for ownership +#endif bic r1, r1, #D_CACHE_LINE_SIZE - 1 #ifdef HARVARD_CACHE mcrne p15, 0, r1, c7, c14, 1 @ clean & invalidate D line @@ -218,10 +226,6 @@ v6_dma_inv_range: mcrne p15, 0, r1, c7, c15, 1 @ clean & invalidate unified line #endif 1: -#ifdef CONFIG_DMA_CACHE_RWFO - ldr r2, [r0] @ read for ownership - str r2, [r0] @ write for ownership -#endif #ifdef HARVARD_CACHE mcr p15, 0, r0, c7, c6, 1 @ invalidate D line #else @@ -229,6 +233,10 @@ v6_dma_inv_range: #endif add r0, r0, #D_CACHE_LINE_SIZE cmp r0, r1 +#ifdef CONFIG_DMA_CACHE_RWFO + ldrlo r2, [r0] @ read for ownership + strlo r2, [r0] @ write for ownership +#endif blo 1b mov r0, #0 mcr p15, 0, r0, c7, c10, 4 @ drain write buffer @@ -263,12 +271,12 @@ v6_dma_clean_range: * - end - virtual end address of region */ ENTRY(v6_dma_flush_range) - bic r0, r0, #D_CACHE_LINE_SIZE - 1 -1: #ifdef CONFIG_DMA_CACHE_RWFO - ldr r2, [r0] @ read for ownership - str r2, [r0] @ write for ownership + ldrb r2, [r0] @ read for ownership + strb r2, [r0] @ write for ownership #endif + bic r0, r0, #D_CACHE_LINE_SIZE - 1 +1: #ifdef HARVARD_CACHE mcr p15, 0, r0, c7, c14, 1 @ clean & invalidate D line #else @@ -276,6 +284,10 @@ ENTRY(v6_dma_flush_range) #endif add r0, r0, #D_CACHE_LINE_SIZE cmp r0, r1 +#ifdef CONFIG_DMA_CACHE_RWFO + ldrlob r2, [r0] @ read for ownership + strlob r2, [r0] @ write for ownership +#endif blo 1b mov r0, #0 mcr p15, 0, r0, c7, c10, 4 @ drain write buffer diff --git a/arch/arm/mm/cache-v7.S b/arch/arm/mm/cache-v7.S index a3ebf7a4f49..6136e68ce95 100644 --- a/arch/arm/mm/cache-v7.S +++ b/arch/arm/mm/cache-v7.S @@ -173,15 +173,22 @@ ENTRY(v7_coherent_user_range) UNWIND(.fnstart ) dcache_line_size r2, r3 sub r3, r2, #1 - bic r0, r0, r3 + bic r12, r0, r3 1: - USER( mcr p15, 0, r0, c7, c11, 1 ) @ clean D line to the point of unification + USER( mcr p15, 0, r12, c7, c11, 1 ) @ clean D line to the point of unification + add r12, r12, r2 + cmp r12, r1 + blo 1b dsb - USER( mcr p15, 0, r0, c7, c5, 1 ) @ invalidate I line - add r0, r0, r2 + icache_line_size r2, r3 + sub r3, r2, #1 + bic r12, r0, r3 2: - cmp r0, r1 - blo 1b + USER( mcr p15, 0, r12, c7, c5, 1 ) @ invalidate I line + add r12, r12, r2 + cmp r12, r1 + blo 2b +3: mov r0, #0 ALT_SMP(mcr p15, 0, r0, c7, c1, 6) @ invalidate BTB Inner Shareable ALT_UP(mcr p15, 0, r0, c7, c5, 6) @ invalidate BTB @@ -194,10 +201,10 @@ ENTRY(v7_coherent_user_range) * isn't mapped, just try the next page. */ 9001: - mov r0, r0, lsr #12 - mov r0, r0, lsl #12 - add r0, r0, #4096 - b 2b + mov r12, r12, lsr #12 + mov r12, r12, lsl #12 + add r12, r12, #4096 + b 3b UNWIND(.fnend ) ENDPROC(v7_coherent_kern_range) ENDPROC(v7_coherent_user_range) diff --git a/arch/arm/mm/cache-xsc3l2.c b/arch/arm/mm/cache-xsc3l2.c index c3154928bcc..5a32020471e 100644 --- a/arch/arm/mm/cache-xsc3l2.c +++ b/arch/arm/mm/cache-xsc3l2.c @@ -17,14 +17,10 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include <linux/init.h> +#include <linux/highmem.h> #include <asm/system.h> #include <asm/cputype.h> #include <asm/cacheflush.h> -#include <asm/kmap_types.h> -#include <asm/fixmap.h> -#include <asm/pgtable.h> -#include <asm/tlbflush.h> -#include "mm.h" #define CR_L2 (1 << 26) @@ -71,16 +67,15 @@ static inline void xsc3_l2_inv_all(void) dsb(); } +static inline void l2_unmap_va(unsigned long va) +{ #ifdef CONFIG_HIGHMEM -#define l2_map_save_flags(x) raw_local_save_flags(x) -#define l2_map_restore_flags(x) raw_local_irq_restore(x) -#else -#define l2_map_save_flags(x) ((x) = 0) -#define l2_map_restore_flags(x) ((void)(x)) + if (va != -1) + kunmap_atomic((void *)va); #endif +} -static inline unsigned long l2_map_va(unsigned long pa, unsigned long prev_va, - unsigned long flags) +static inline unsigned long l2_map_va(unsigned long pa, unsigned long prev_va) { #ifdef CONFIG_HIGHMEM unsigned long va = prev_va & PAGE_MASK; @@ -89,17 +84,10 @@ static inline unsigned long l2_map_va(unsigned long pa, unsigned long prev_va, /* * Switching to a new page. Because cache ops are * using virtual addresses only, we must put a mapping - * in place for it. We also enable interrupts for a - * short while and disable them again to protect this - * mapping. + * in place for it. */ - unsigned long idx; - raw_local_irq_restore(flags); - idx = KM_L2_CACHE + KM_TYPE_NR * smp_processor_id(); - va = __fix_to_virt(FIX_KMAP_BEGIN + idx); - raw_local_irq_restore(flags | PSR_I_BIT); - set_pte_ext(TOP_PTE(va), pfn_pte(pa >> PAGE_SHIFT, PAGE_KERNEL), 0); - local_flush_tlb_kernel_page(va); + l2_unmap_va(prev_va); + va = (unsigned long)kmap_atomic_pfn(pa >> PAGE_SHIFT); } return va + (pa_offset >> (32 - PAGE_SHIFT)); #else @@ -109,7 +97,7 @@ static inline unsigned long l2_map_va(unsigned long pa, unsigned long prev_va, static void xsc3_l2_inv_range(unsigned long start, unsigned long end) { - unsigned long vaddr, flags; + unsigned long vaddr; if (start == 0 && end == -1ul) { xsc3_l2_inv_all(); @@ -117,13 +105,12 @@ static void xsc3_l2_inv_range(unsigned long start, unsigned long end) } vaddr = -1; /* to force the first mapping */ - l2_map_save_flags(flags); /* * Clean and invalidate partial first cache line. */ if (start & (CACHE_LINE_SIZE - 1)) { - vaddr = l2_map_va(start & ~(CACHE_LINE_SIZE - 1), vaddr, flags); + vaddr = l2_map_va(start & ~(CACHE_LINE_SIZE - 1), vaddr); xsc3_l2_clean_mva(vaddr); xsc3_l2_inv_mva(vaddr); start = (start | (CACHE_LINE_SIZE - 1)) + 1; @@ -133,7 +120,7 @@ static void xsc3_l2_inv_range(unsigned long start, unsigned long end) * Invalidate all full cache lines between 'start' and 'end'. */ while (start < (end & ~(CACHE_LINE_SIZE - 1))) { - vaddr = l2_map_va(start, vaddr, flags); + vaddr = l2_map_va(start, vaddr); xsc3_l2_inv_mva(vaddr); start += CACHE_LINE_SIZE; } @@ -142,31 +129,30 @@ static void xsc3_l2_inv_range(unsigned long start, unsigned long end) * Clean and invalidate partial last cache line. */ if (start < end) { - vaddr = l2_map_va(start, vaddr, flags); + vaddr = l2_map_va(start, vaddr); xsc3_l2_clean_mva(vaddr); xsc3_l2_inv_mva(vaddr); } - l2_map_restore_flags(flags); + l2_unmap_va(vaddr); dsb(); } static void xsc3_l2_clean_range(unsigned long start, unsigned long end) { - unsigned long vaddr, flags; + unsigned long vaddr; vaddr = -1; /* to force the first mapping */ - l2_map_save_flags(flags); start &= ~(CACHE_LINE_SIZE - 1); while (start < end) { - vaddr = l2_map_va(start, vaddr, flags); + vaddr = l2_map_va(start, vaddr); xsc3_l2_clean_mva(vaddr); start += CACHE_LINE_SIZE; } - l2_map_restore_flags(flags); + l2_unmap_va(vaddr); dsb(); } @@ -193,7 +179,7 @@ static inline void xsc3_l2_flush_all(void) static void xsc3_l2_flush_range(unsigned long start, unsigned long end) { - unsigned long vaddr, flags; + unsigned long vaddr; if (start == 0 && end == -1ul) { xsc3_l2_flush_all(); @@ -201,17 +187,16 @@ static void xsc3_l2_flush_range(unsigned long start, unsigned long end) } vaddr = -1; /* to force the first mapping */ - l2_map_save_flags(flags); start &= ~(CACHE_LINE_SIZE - 1); while (start < end) { - vaddr = l2_map_va(start, vaddr, flags); + vaddr = l2_map_va(start, vaddr); xsc3_l2_clean_mva(vaddr); xsc3_l2_inv_mva(vaddr); start += CACHE_LINE_SIZE; } - l2_map_restore_flags(flags); + l2_unmap_va(vaddr); dsb(); } diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index e24fe6d9c71..6b48e0a3d7a 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -17,6 +17,7 @@ #include <linux/init.h> #include <linux/device.h> #include <linux/dma-mapping.h> +#include <linux/highmem.h> #include <asm/memory.h> #include <asm/highmem.h> @@ -480,10 +481,10 @@ static void dma_cache_maint_page(struct page *page, unsigned long offset, op(vaddr, len, dir); kunmap_high(page); } else if (cache_is_vipt()) { - pte_t saved_pte; - vaddr = kmap_high_l1_vipt(page, &saved_pte); + /* unmapped pages might still be cached */ + vaddr = kmap_atomic(page); op(vaddr + offset, len, dir); - kunmap_high_l1_vipt(page, saved_pte); + kunmap_atomic(vaddr); } } else { vaddr = page_address(page) + offset; diff --git a/arch/arm/mm/flush.c b/arch/arm/mm/flush.c index 391ffae7509..c29f2839f1d 100644 --- a/arch/arm/mm/flush.c +++ b/arch/arm/mm/flush.c @@ -10,6 +10,7 @@ #include <linux/module.h> #include <linux/mm.h> #include <linux/pagemap.h> +#include <linux/highmem.h> #include <asm/cacheflush.h> #include <asm/cachetype.h> @@ -180,10 +181,10 @@ void __flush_dcache_page(struct address_space *mapping, struct page *page) __cpuc_flush_dcache_area(addr, PAGE_SIZE); kunmap_high(page); } else if (cache_is_vipt()) { - pte_t saved_pte; - addr = kmap_high_l1_vipt(page, &saved_pte); + /* unmapped pages might still be cached */ + addr = kmap_atomic(page); __cpuc_flush_dcache_area(addr, PAGE_SIZE); - kunmap_high_l1_vipt(page, saved_pte); + kunmap_atomic(addr); } } diff --git a/arch/arm/mm/highmem.c b/arch/arm/mm/highmem.c index c435fd9e1da..807c0573abb 100644 --- a/arch/arm/mm/highmem.c +++ b/arch/arm/mm/highmem.c @@ -140,90 +140,3 @@ struct page *kmap_atomic_to_page(const void *ptr) pte = TOP_PTE(vaddr); return pte_page(*pte); } - -#ifdef CONFIG_CPU_CACHE_VIPT - -#include <linux/percpu.h> - -/* - * The VIVT cache of a highmem page is always flushed before the page - * is unmapped. Hence unmapped highmem pages need no cache maintenance - * in that case. - * - * However unmapped pages may still be cached with a VIPT cache, and - * it is not possible to perform cache maintenance on them using physical - * addresses unfortunately. So we have no choice but to set up a temporary - * virtual mapping for that purpose. - * - * Yet this VIPT cache maintenance may be triggered from DMA support - * functions which are possibly called from interrupt context. As we don't - * want to keep interrupt disabled all the time when such maintenance is - * taking place, we therefore allow for some reentrancy by preserving and - * restoring the previous fixmap entry before the interrupted context is - * resumed. If the reentrancy depth is 0 then there is no need to restore - * the previous fixmap, and leaving the current one in place allow it to - * be reused the next time without a TLB flush (common with DMA). - */ - -static DEFINE_PER_CPU(int, kmap_high_l1_vipt_depth); - -void *kmap_high_l1_vipt(struct page *page, pte_t *saved_pte) -{ - unsigned int idx, cpu; - int *depth; - unsigned long vaddr, flags; - pte_t pte, *ptep; - - if (!in_interrupt()) - preempt_disable(); - - cpu = smp_processor_id(); - depth = &per_cpu(kmap_high_l1_vipt_depth, cpu); - - idx = KM_L1_CACHE + KM_TYPE_NR * cpu; - vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); - ptep = TOP_PTE(vaddr); - pte = mk_pte(page, kmap_prot); - - raw_local_irq_save(flags); - (*depth)++; - if (pte_val(*ptep) == pte_val(pte)) { - *saved_pte = pte; - } else { - *saved_pte = *ptep; - set_pte_ext(ptep, pte, 0); - local_flush_tlb_kernel_page(vaddr); - } - raw_local_irq_restore(flags); - - return (void *)vaddr; -} - -void kunmap_high_l1_vipt(struct page *page, pte_t saved_pte) -{ - unsigned int idx, cpu = smp_processor_id(); - int *depth = &per_cpu(kmap_high_l1_vipt_depth, cpu); - unsigned long vaddr, flags; - pte_t pte, *ptep; - - idx = KM_L1_CACHE + KM_TYPE_NR * cpu; - vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); - ptep = TOP_PTE(vaddr); - pte = mk_pte(page, kmap_prot); - - BUG_ON(pte_val(*ptep) != pte_val(pte)); - BUG_ON(*depth <= 0); - - raw_local_irq_save(flags); - (*depth)--; - if (*depth != 0 && pte_val(pte) != pte_val(saved_pte)) { - set_pte_ext(ptep, saved_pte, 0); - local_flush_tlb_kernel_page(vaddr); - } - raw_local_irq_restore(flags); - - if (!in_interrupt()) - preempt_enable(); -} - -#endif /* CONFIG_CPU_CACHE_VIPT */ diff --git a/arch/arm/mm/proc-macros.S b/arch/arm/mm/proc-macros.S index 337f10256cd..f8f777df8d7 100644 --- a/arch/arm/mm/proc-macros.S +++ b/arch/arm/mm/proc-macros.S @@ -61,17 +61,27 @@ .endm /* - * cache_line_size - get the cache line size from the CSIDR register - * (available on ARMv7+). It assumes that the CSSR register was configured - * to access the L1 data cache CSIDR. + * dcache_line_size - get the minimum D-cache line size from the CTR register + * on ARMv7. */ .macro dcache_line_size, reg, tmp - mrc p15, 1, \tmp, c0, c0, 0 @ read CSIDR - and \tmp, \tmp, #7 @ cache line size encoding - mov \reg, #16 @ size offset + mrc p15, 0, \tmp, c0, c0, 1 @ read ctr + lsr \tmp, \tmp, #16 + and \tmp, \tmp, #0xf @ cache line size encoding + mov \reg, #4 @ bytes per word mov \reg, \reg, lsl \tmp @ actual cache line size .endm +/* + * icache_line_size - get the minimum I-cache line size from the CTR register + * on ARMv7. + */ + .macro icache_line_size, reg, tmp + mrc p15, 0, \tmp, c0, c0, 1 @ read ctr + and \tmp, \tmp, #0xf @ cache line size encoding + mov \reg, #4 @ bytes per word + mov \reg, \reg, lsl \tmp @ actual cache line size + .endm /* * Sanity check the PTE configuration for the code below - which makes diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S index 2b5b20baf80..7401f4d7e67 100644 --- a/arch/arm/mm/proc-v7.S +++ b/arch/arm/mm/proc-v7.S @@ -386,7 +386,7 @@ __v7_ca9mp_proc_info: PMD_SECT_XN | \ PMD_SECT_AP_WRITE | \ PMD_SECT_AP_READ - b __v7_ca9mp_setup + W(b) __v7_ca9mp_setup .long cpu_arch_name .long cpu_elf_name .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP|HWCAP_TLS @@ -418,7 +418,7 @@ __v7_proc_info: PMD_SECT_XN | \ PMD_SECT_AP_WRITE | \ PMD_SECT_AP_READ - b __v7_setup + W(b) __v7_setup .long cpu_arch_name .long cpu_elf_name .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP|HWCAP_TLS diff --git a/arch/arm/plat-iop/time.c b/arch/arm/plat-iop/time.c index 85d3e55ca4a..07f23bb42be 100644 --- a/arch/arm/plat-iop/time.c +++ b/arch/arm/plat-iop/time.c @@ -17,12 +17,15 @@ #include <linux/interrupt.h> #include <linux/time.h> #include <linux/init.h> +#include <linux/sched.h> #include <linux/timex.h> +#include <linux/sched.h> #include <linux/io.h> #include <linux/clocksource.h> #include <linux/clockchips.h> #include <mach/hardware.h> #include <asm/irq.h> +#include <asm/sched_clock.h> #include <asm/uaccess.h> #include <asm/mach/irq.h> #include <asm/mach/time.h> @@ -36,7 +39,7 @@ /* * IOP clocksource (free-running timer 1). */ -static cycle_t iop_clocksource_read(struct clocksource *unused) +static cycle_t notrace iop_clocksource_read(struct clocksource *unused) { return 0xffffffffu - read_tcr1(); } @@ -49,15 +52,21 @@ static struct clocksource iop_clocksource = { .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; +static DEFINE_CLOCK_DATA(cd); + /* * IOP sched_clock() implementation via its clocksource. */ -unsigned long long sched_clock(void) +unsigned long long notrace sched_clock(void) { - cycle_t cyc = iop_clocksource_read(NULL); - struct clocksource *cs = &iop_clocksource; + u32 cyc = 0xffffffffu - read_tcr1(); + return cyc_to_sched_clock(&cd, cyc, (u32)~0); +} - return clocksource_cyc2ns(cyc, cs->mult, cs->shift); +static void notrace iop_update_sched_clock(void) +{ + u32 cyc = 0xffffffffu - read_tcr1(); + update_sched_clock(&cd, cyc, (u32)~0); } /* @@ -87,6 +96,7 @@ static void iop_set_mode(enum clock_event_mode mode, case CLOCK_EVT_MODE_PERIODIC: write_tmr0(tmr & ~IOP_TMR_EN); write_tcr0(ticks_per_jiffy - 1); + write_trr0(ticks_per_jiffy - 1); tmr |= (IOP_TMR_RELOAD | IOP_TMR_EN); break; case CLOCK_EVT_MODE_ONESHOT: @@ -142,6 +152,8 @@ void __init iop_init_time(unsigned long tick_rate) { u32 timer_ctl; + init_sched_clock(&cd, iop_update_sched_clock, 32, tick_rate); + ticks_per_jiffy = DIV_ROUND_CLOSEST(tick_rate, HZ); iop_tick_rate = tick_rate; @@ -152,6 +164,7 @@ void __init iop_init_time(unsigned long tick_rate) * Set up interrupting clockevent timer 0. */ write_tmr0(timer_ctl & ~IOP_TMR_EN); + write_tisr(1); setup_irq(IRQ_IOP_TIMER0, &iop_timer_irq); clockevents_calc_mult_shift(&iop_clockevent, tick_rate, IOP_MIN_RANGE); @@ -161,9 +174,6 @@ void __init iop_init_time(unsigned long tick_rate) clockevent_delta2ns(0xf, &iop_clockevent); iop_clockevent.cpumask = cpumask_of(0); clockevents_register_device(&iop_clockevent); - write_trr0(ticks_per_jiffy - 1); - write_tcr0(ticks_per_jiffy - 1); - write_tmr0(timer_ctl); /* * Set up free-running clocksource timer 1. @@ -171,7 +181,5 @@ void __init iop_init_time(unsigned long tick_rate) write_trr1(0xffffffff); write_tcr1(0xffffffff); write_tmr1(timer_ctl); - clocksource_calc_mult_shift(&iop_clocksource, tick_rate, - IOP_MIN_RANGE); - clocksource_register(&iop_clocksource); + clocksource_register_hz(&iop_clocksource, tick_rate); } diff --git a/arch/arm/plat-mxc/epit.c b/arch/arm/plat-mxc/epit.c index ee9582f4972..d69d343ff61 100644 --- a/arch/arm/plat-mxc/epit.c +++ b/arch/arm/plat-mxc/epit.c @@ -93,7 +93,6 @@ static struct clocksource clocksource_epit = { .rating = 200, .read = epit_read, .mask = CLOCKSOURCE_MASK(32), - .shift = 20, .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; @@ -101,9 +100,7 @@ static int __init epit_clocksource_init(struct clk *timer_clk) { unsigned int c = clk_get_rate(timer_clk); - clocksource_epit.mult = clocksource_hz2mult(c, - clocksource_epit.shift); - clocksource_register(&clocksource_epit); + clocksource_register_hz(&clocksource_epit, c); return 0; } diff --git a/arch/arm/plat-mxc/time.c b/arch/arm/plat-mxc/time.c index f9a1b059a76..9f0c2610595 100644 --- a/arch/arm/plat-mxc/time.c +++ b/arch/arm/plat-mxc/time.c @@ -120,7 +120,6 @@ static struct clocksource clocksource_mxc = { .rating = 200, .read = mx1_2_get_cycles, .mask = CLOCKSOURCE_MASK(32), - .shift = 20, .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; @@ -131,9 +130,7 @@ static int __init mxc_clocksource_init(struct clk *timer_clk) if (timer_is_v2()) clocksource_mxc.read = v2_get_cycles; - clocksource_mxc.mult = clocksource_hz2mult(c, - clocksource_mxc.shift); - clocksource_register(&clocksource_mxc); + clocksource_register_hz(&clocksource_mxc, c); return 0; } diff --git a/arch/arm/plat-nomadik/Kconfig b/arch/arm/plat-nomadik/Kconfig index 5da3f97c537..187f4e84bb2 100644 --- a/arch/arm/plat-nomadik/Kconfig +++ b/arch/arm/plat-nomadik/Kconfig @@ -14,6 +14,7 @@ if PLAT_NOMADIK config HAS_MTU bool + select HAVE_SCHED_CLOCK help Support for Multi Timer Unit. MTU provides access to multiple interrupt generating programmable diff --git a/arch/arm/plat-nomadik/timer.c b/arch/arm/plat-nomadik/timer.c index 63cdc6025bd..41723402006 100644 --- a/arch/arm/plat-nomadik/timer.c +++ b/arch/arm/plat-nomadik/timer.c @@ -17,9 +17,9 @@ #include <linux/clk.h> #include <linux/jiffies.h> #include <linux/err.h> -#include <linux/cnt32_to_63.h> -#include <linux/timer.h> +#include <linux/sched.h> #include <asm/mach/time.h> +#include <asm/sched_clock.h> #include <plat/mtu.h> @@ -52,81 +52,24 @@ static struct clocksource nmdk_clksrc = { * Override the global weak sched_clock symbol with this * local implementation which uses the clocksource to get some * better resolution when scheduling the kernel. - * - * Because the hardware timer period may be quite short - * (32.3 secs on the 133 MHz MTU timer selection on ux500) - * and because cnt32_to_63() needs to be called at least once per - * half period to work properly, a kernel keepwarm() timer is set up - * to ensure this requirement is always met. - * - * Also the sched_clock timer will wrap around at some point, - * here we set it to run continously for a year. */ -#define SCHED_CLOCK_MIN_WRAP 3600*24*365 -static struct timer_list cnt32_to_63_keepwarm_timer; -static u32 sched_mult; -static u32 sched_shift; +static DEFINE_CLOCK_DATA(cd); unsigned long long notrace sched_clock(void) { - u64 cycles; + u32 cyc; if (unlikely(!mtu_base)) return 0; - cycles = cnt32_to_63(-readl(mtu_base + MTU_VAL(0))); - /* - * sched_mult is guaranteed to be even so will - * shift out bit 63 - */ - return (cycles * sched_mult) >> sched_shift; + cyc = -readl(mtu_base + MTU_VAL(0)); + return cyc_to_sched_clock(&cd, cyc, (u32)~0); } -/* Just kick sched_clock every so often */ -static void cnt32_to_63_keepwarm(unsigned long data) +static void notrace nomadik_update_sched_clock(void) { - mod_timer(&cnt32_to_63_keepwarm_timer, round_jiffies(jiffies + data)); - (void) sched_clock(); -} - -/* - * Set up a timer to keep sched_clock():s 32_to_63 algorithm warm - * once in half a 32bit timer wrap interval. - */ -static void __init nmdk_sched_clock_init(unsigned long rate) -{ - u32 v; - unsigned long delta; - u64 days; - - /* Find the apropriate mult and shift factors */ - clocks_calc_mult_shift(&sched_mult, &sched_shift, - rate, NSEC_PER_SEC, SCHED_CLOCK_MIN_WRAP); - /* We need to multiply by an even number to get rid of bit 63 */ - if (sched_mult & 1) - sched_mult++; - - /* Let's see what we get, take max counter and scale it */ - days = (0xFFFFFFFFFFFFFFFFLLU * sched_mult) >> sched_shift; - do_div(days, NSEC_PER_SEC); - do_div(days, (3600*24)); - - pr_info("sched_clock: using %d bits @ %lu Hz wrap in %lu days\n", - (64 - sched_shift), rate, (unsigned long) days); - - /* - * Program a timer to kick us at half 32bit wraparound - * Formula: seconds per wrap = (2^32) / f - */ - v = 0xFFFFFFFFUL / rate; - /* We want half of the wrap time to keep cnt32_to_63 warm */ - v /= 2; - pr_debug("sched_clock: prescaled timer rate: %lu Hz, " - "initialize keepwarm timer every %d seconds\n", rate, v); - /* Convert seconds to jiffies */ - delta = msecs_to_jiffies(v*1000); - setup_timer(&cnt32_to_63_keepwarm_timer, cnt32_to_63_keepwarm, delta); - mod_timer(&cnt32_to_63_keepwarm_timer, round_jiffies(jiffies + delta)); + u32 cyc = -readl(mtu_base + MTU_VAL(0)); + update_sched_clock(&cd, cyc, (u32)~0); } /* Clockevent device: use one-shot mode */ @@ -222,7 +165,6 @@ void __init nmdk_timer_init(void) } else { cr |= MTU_CRn_PRESCALE_1; } - clocksource_calc_mult_shift(&nmdk_clksrc, rate, MTU_MIN_RANGE); /* Timer 0 is the free running clocksource */ writel(cr, mtu_base + MTU_CR(0)); @@ -233,11 +175,11 @@ void __init nmdk_timer_init(void) /* Now the clock source is ready */ nmdk_clksrc.read = nmdk_read_timer; - if (clocksource_register(&nmdk_clksrc)) + if (clocksource_register_hz(&nmdk_clksrc, rate)) pr_err("timer: failed to initialize clock source %s\n", nmdk_clksrc.name); - nmdk_sched_clock_init(rate); + init_sched_clock(&cd, nomadik_update_sched_clock, 32, rate); /* Timer 1 is used for events */ diff --git a/arch/arm/plat-omap/counter_32k.c b/arch/arm/plat-omap/counter_32k.c index 155fe43a672..ea4644021fb 100644 --- a/arch/arm/plat-omap/counter_32k.c +++ b/arch/arm/plat-omap/counter_32k.c @@ -15,7 +15,11 @@ #include <linux/kernel.h> #include <linux/init.h> #include <linux/clk.h> +#include <linux/err.h> #include <linux/io.h> +#include <linux/sched.h> + +#include <asm/sched_clock.h> #include <plat/common.h> #include <plat/board.h> @@ -44,7 +48,7 @@ static u32 offset_32k __read_mostly; #ifdef CONFIG_ARCH_OMAP16XX -static cycle_t omap16xx_32k_read(struct clocksource *cs) +static cycle_t notrace omap16xx_32k_read(struct clocksource *cs) { return omap_readl(OMAP16XX_TIMER_32K_SYNCHRONIZED) - offset_32k; } @@ -53,7 +57,7 @@ static cycle_t omap16xx_32k_read(struct clocksource *cs) #endif #ifdef CONFIG_ARCH_OMAP2420 -static cycle_t omap2420_32k_read(struct clocksource *cs) +static cycle_t notrace omap2420_32k_read(struct clocksource *cs) { return omap_readl(OMAP2420_32KSYNCT_BASE + 0x10) - offset_32k; } @@ -62,7 +66,7 @@ static cycle_t omap2420_32k_read(struct clocksource *cs) #endif #ifdef CONFIG_ARCH_OMAP2430 -static cycle_t omap2430_32k_read(struct clocksource *cs) +static cycle_t notrace omap2430_32k_read(struct clocksource *cs) { return omap_readl(OMAP2430_32KSYNCT_BASE + 0x10) - offset_32k; } @@ -71,7 +75,7 @@ static cycle_t omap2430_32k_read(struct clocksource *cs) #endif #ifdef CONFIG_ARCH_OMAP3 -static cycle_t omap34xx_32k_read(struct clocksource *cs) +static cycle_t notrace omap34xx_32k_read(struct clocksource *cs) { return omap_readl(OMAP3430_32KSYNCT_BASE + 0x10) - offset_32k; } @@ -80,7 +84,7 @@ static cycle_t omap34xx_32k_read(struct clocksource *cs) #endif #ifdef CONFIG_ARCH_OMAP4 -static cycle_t omap44xx_32k_read(struct clocksource *cs) +static cycle_t notrace omap44xx_32k_read(struct clocksource *cs) { return omap_readl(OMAP4430_32KSYNCT_BASE + 0x10) - offset_32k; } @@ -92,7 +96,7 @@ static cycle_t omap44xx_32k_read(struct clocksource *cs) * Kernel assumes that sched_clock can be called early but may not have * things ready yet. */ -static cycle_t omap_32k_read_dummy(struct clocksource *cs) +static cycle_t notrace omap_32k_read_dummy(struct clocksource *cs) { return 0; } @@ -102,7 +106,6 @@ static struct clocksource clocksource_32k = { .rating = 250, .read = omap_32k_read_dummy, .mask = CLOCKSOURCE_MASK(32), - .shift = 10, .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; @@ -110,10 +113,25 @@ static struct clocksource clocksource_32k = { * Returns current time from boot in nsecs. It's OK for this to wrap * around for now, as it's just a relative time stamp. */ -unsigned long long sched_clock(void) +static DEFINE_CLOCK_DATA(cd); + +/* + * Constants generated by clocks_calc_mult_shift(m, s, 32768, NSEC_PER_SEC, 60). + * This gives a resolution of about 30us and a wrap period of about 36hrs. + */ +#define SC_MULT 4000000000u +#define SC_SHIFT 17 + +unsigned long long notrace sched_clock(void) { - return clocksource_cyc2ns(clocksource_32k.read(&clocksource_32k), - clocksource_32k.mult, clocksource_32k.shift); + u32 cyc = clocksource_32k.read(&clocksource_32k); + return cyc_to_fixed_sched_clock(&cd, cyc, (u32)~0, SC_MULT, SC_SHIFT); +} + +static void notrace omap_update_sched_clock(void) +{ + u32 cyc = clocksource_32k.read(&clocksource_32k); + update_sched_clock(&cd, cyc, (u32)~0); } /** @@ -164,16 +182,16 @@ static int __init omap_init_clocksource_32k(void) return -ENODEV; sync_32k_ick = clk_get(NULL, "omap_32ksync_ick"); - if (sync_32k_ick) + if (!IS_ERR(sync_32k_ick)) clk_enable(sync_32k_ick); - clocksource_32k.mult = clocksource_hz2mult(32768, - clocksource_32k.shift); - offset_32k = clocksource_32k.read(&clocksource_32k); - if (clocksource_register(&clocksource_32k)) + if (clocksource_register_hz(&clocksource_32k, 32768)) printk(err, clocksource_32k.name); + + init_fixed_sched_clock(&cd, omap_update_sched_clock, 32, + 32768, SC_MULT, SC_SHIFT); } return 0; } diff --git a/arch/arm/plat-omap/include/plat/io.h b/arch/arm/plat-omap/include/plat/io.h index 128b549c279..204865f91d9 100644 --- a/arch/arm/plat-omap/include/plat/io.h +++ b/arch/arm/plat-omap/include/plat/io.h @@ -294,8 +294,8 @@ static inline void omap44xx_map_common_io(void) extern void omap2_init_common_hw(struct omap_sdrc_params *sdrc_cs0, struct omap_sdrc_params *sdrc_cs1); -#define __arch_ioremap(p,s,t) omap_ioremap(p,s,t) -#define __arch_iounmap(v) omap_iounmap(v) +#define __arch_ioremap omap_ioremap +#define __arch_iounmap omap_iounmap void __iomem *omap_ioremap(unsigned long phys, size_t size, unsigned int type); void omap_iounmap(volatile void __iomem *addr); diff --git a/arch/arm/plat-omap/sram.c b/arch/arm/plat-omap/sram.c index e2c8eebe6b3..74dac419d32 100644 --- a/arch/arm/plat-omap/sram.c +++ b/arch/arm/plat-omap/sram.c @@ -166,7 +166,7 @@ static void __init omap_detect_sram(void) cpu_is_omap1710()) omap_sram_size = 0x4000; /* 16K */ else if (cpu_is_omap1611()) - omap_sram_size = 0x3e800; /* 250K */ + omap_sram_size = SZ_256K; else { printk(KERN_ERR "Could not detect SRAM size\n"); omap_sram_size = 0x4000; diff --git a/arch/arm/plat-orion/time.c b/arch/arm/plat-orion/time.c index 715a30177f2..c3da2478b2a 100644 --- a/arch/arm/plat-orion/time.c +++ b/arch/arm/plat-orion/time.c @@ -13,11 +13,11 @@ #include <linux/kernel.h> #include <linux/sched.h> -#include <linux/cnt32_to_63.h> #include <linux/timer.h> #include <linux/clockchips.h> #include <linux/interrupt.h> #include <linux/irq.h> +#include <asm/sched_clock.h> #include <asm/mach/time.h> #include <mach/bridge-regs.h> #include <mach/hardware.h> @@ -44,52 +44,26 @@ static u32 ticks_per_jiffy; /* * Orion's sched_clock implementation. It has a resolution of - * at least 7.5ns (133MHz TCLK) and a maximum value of 834 days. - * - * Because the hardware timer period is quite short (21 secs if - * 200MHz TCLK) and because cnt32_to_63() needs to be called at - * least once per half period to work properly, a kernel timer is - * set up to ensure this requirement is always met. + * at least 7.5ns (133MHz TCLK). */ -#define TCLK2NS_SCALE_FACTOR 8 - -static unsigned long tclk2ns_scale; +static DEFINE_CLOCK_DATA(cd); -unsigned long long sched_clock(void) +unsigned long long notrace sched_clock(void) { - unsigned long long v = cnt32_to_63(0xffffffff - readl(TIMER0_VAL)); - return (v * tclk2ns_scale) >> TCLK2NS_SCALE_FACTOR; + u32 cyc = 0xffffffff - readl(TIMER0_VAL); + return cyc_to_sched_clock(&cd, cyc, (u32)~0); } -static struct timer_list cnt32_to_63_keepwarm_timer; -static void cnt32_to_63_keepwarm(unsigned long data) +static void notrace orion_update_sched_clock(void) { - mod_timer(&cnt32_to_63_keepwarm_timer, round_jiffies(jiffies + data)); - (void) sched_clock(); + u32 cyc = 0xffffffff - readl(TIMER0_VAL); + update_sched_clock(&cd, cyc, (u32)~0); } static void __init setup_sched_clock(unsigned long tclk) { - unsigned long long v; - unsigned long data; - - v = NSEC_PER_SEC; - v <<= TCLK2NS_SCALE_FACTOR; - v += tclk/2; - do_div(v, tclk); - /* - * We want an even value to automatically clear the top bit - * returned by cnt32_to_63() without an additional run time - * instruction. So if the LSB is 1 then round it up. - */ - if (v & 1) - v++; - tclk2ns_scale = v; - - data = (0xffffffffUL / tclk / 2 - 2) * HZ; - setup_timer(&cnt32_to_63_keepwarm_timer, cnt32_to_63_keepwarm, data); - mod_timer(&cnt32_to_63_keepwarm_timer, round_jiffies(jiffies + data)); + init_sched_clock(&cd, orion_update_sched_clock, 32, tclk); } /* @@ -102,7 +76,6 @@ static cycle_t orion_clksrc_read(struct clocksource *cs) static struct clocksource orion_clksrc = { .name = "orion_clocksource", - .shift = 20, .rating = 300, .read = orion_clksrc_read, .mask = CLOCKSOURCE_MASK(32), @@ -245,8 +218,7 @@ void __init orion_time_init(unsigned int irq, unsigned int tclk) writel(u & ~BRIDGE_INT_TIMER0, BRIDGE_MASK); u = readl(TIMER_CTRL); writel(u | TIMER0_EN | TIMER0_RELOAD_EN, TIMER_CTRL); - orion_clksrc.mult = clocksource_hz2mult(tclk, orion_clksrc.shift); - clocksource_register(&orion_clksrc); + clocksource_register_hz(&orion_clksrc, tclk); /* * Setup clockevent timer (interrupt-driven.) diff --git a/arch/arm/plat-s3c24xx/Kconfig b/arch/arm/plat-s3c24xx/Kconfig index 5a27b1b538f..eb105e61c74 100644 --- a/arch/arm/plat-s3c24xx/Kconfig +++ b/arch/arm/plat-s3c24xx/Kconfig @@ -8,7 +8,7 @@ config PLAT_S3C24XX default y select NO_IOPORT select ARCH_REQUIRE_GPIOLIB - select S3C_DEVICE_NAND + select S3C_DEV_NAND select S3C_GPIO_CFG_S3C24XX help Base platform code for any Samsung S3C24XX device diff --git a/arch/arm/plat-s3c24xx/cpu.c b/arch/arm/plat-s3c24xx/cpu.c index 76d0858c3cb..4a10c0f684b 100644 --- a/arch/arm/plat-s3c24xx/cpu.c +++ b/arch/arm/plat-s3c24xx/cpu.c @@ -88,7 +88,7 @@ static struct cpu_table cpu_ids[] __initdata = { { .idcode = 0x32440000, .idmask = 0xffffffff, - .map_io = s3c244x_map_io, + .map_io = s3c2440_map_io, .init_clocks = s3c244x_init_clocks, .init_uarts = s3c244x_init_uarts, .init = s3c2440_init, @@ -97,7 +97,7 @@ static struct cpu_table cpu_ids[] __initdata = { { .idcode = 0x32440001, .idmask = 0xffffffff, - .map_io = s3c244x_map_io, + .map_io = s3c2440_map_io, .init_clocks = s3c244x_init_clocks, .init_uarts = s3c244x_init_uarts, .init = s3c2440_init, @@ -106,7 +106,7 @@ static struct cpu_table cpu_ids[] __initdata = { { .idcode = 0x32440aaa, .idmask = 0xffffffff, - .map_io = s3c244x_map_io, + .map_io = s3c2442_map_io, .init_clocks = s3c244x_init_clocks, .init_uarts = s3c244x_init_uarts, .init = s3c2442_init, @@ -115,7 +115,7 @@ static struct cpu_table cpu_ids[] __initdata = { { .idcode = 0x32440aab, .idmask = 0xffffffff, - .map_io = s3c244x_map_io, + .map_io = s3c2442_map_io, .init_clocks = s3c244x_init_clocks, .init_uarts = s3c244x_init_uarts, .init = s3c2442_init, diff --git a/arch/arm/plat-s3c24xx/gpiolib.c b/arch/arm/plat-s3c24xx/gpiolib.c index 24c6f5a3059..243b6411050 100644 --- a/arch/arm/plat-s3c24xx/gpiolib.c +++ b/arch/arm/plat-s3c24xx/gpiolib.c @@ -82,8 +82,6 @@ static struct s3c_gpio_cfg s3c24xx_gpiocfg_banka = { struct s3c_gpio_cfg s3c24xx_gpiocfg_default = { .set_config = s3c_gpio_setcfg_s3c24xx, .get_config = s3c_gpio_getcfg_s3c24xx, - .set_pull = s3c_gpio_setpull_1up, - .get_pull = s3c_gpio_getpull_1up, }; struct s3c_gpio_chip s3c24xx_gpios[] = { diff --git a/arch/arm/plat-s3c24xx/include/plat/s3c244x.h b/arch/arm/plat-s3c24xx/include/plat/s3c244x.h index 307248d1ccb..89e8d0a25f8 100644 --- a/arch/arm/plat-s3c24xx/include/plat/s3c244x.h +++ b/arch/arm/plat-s3c24xx/include/plat/s3c244x.h @@ -21,17 +21,22 @@ extern void s3c244x_init_clocks(int xtal); #else #define s3c244x_init_clocks NULL #define s3c244x_init_uarts NULL -#define s3c244x_map_io NULL #endif #ifdef CONFIG_CPU_S3C2440 extern int s3c2440_init(void); + +extern void s3c2440_map_io(void); #else #define s3c2440_init NULL +#define s3c2440_map_io NULL #endif #ifdef CONFIG_CPU_S3C2442 extern int s3c2442_init(void); + +extern void s3c2442_map_io(void); #else #define s3c2442_init NULL +#define s3c2442_map_io NULL #endif diff --git a/arch/arm/plat-samsung/gpio-config.c b/arch/arm/plat-samsung/gpio-config.c index b732b773b9a..0aa32f242ee 100644 --- a/arch/arm/plat-samsung/gpio-config.c +++ b/arch/arm/plat-samsung/gpio-config.c @@ -280,18 +280,17 @@ s3c_gpio_pull_t s3c_gpio_getpull_updown(struct s3c_gpio_chip *chip, } #endif -#ifdef CONFIG_S3C_GPIO_PULL_UP -int s3c_gpio_setpull_1up(struct s3c_gpio_chip *chip, - unsigned int off, s3c_gpio_pull_t pull) +#if defined(CONFIG_S3C_GPIO_PULL_UP) || defined(CONFIG_S3C_GPIO_PULL_DOWN) +static int s3c_gpio_setpull_1(struct s3c_gpio_chip *chip, + unsigned int off, s3c_gpio_pull_t pull, + s3c_gpio_pull_t updown) { void __iomem *reg = chip->base + 0x08; u32 pup = __raw_readl(reg); - pup = __raw_readl(reg); - - if (pup == S3C_GPIO_PULL_UP) + if (pull == updown) pup &= ~(1 << off); - else if (pup == S3C_GPIO_PULL_NONE) + else if (pull == S3C_GPIO_PULL_NONE) pup |= (1 << off); else return -EINVAL; @@ -300,17 +299,45 @@ int s3c_gpio_setpull_1up(struct s3c_gpio_chip *chip, return 0; } -s3c_gpio_pull_t s3c_gpio_getpull_1up(struct s3c_gpio_chip *chip, - unsigned int off) +static s3c_gpio_pull_t s3c_gpio_getpull_1(struct s3c_gpio_chip *chip, + unsigned int off, s3c_gpio_pull_t updown) { void __iomem *reg = chip->base + 0x08; u32 pup = __raw_readl(reg); pup &= (1 << off); - return pup ? S3C_GPIO_PULL_NONE : S3C_GPIO_PULL_UP; + return pup ? S3C_GPIO_PULL_NONE : updown; +} +#endif /* CONFIG_S3C_GPIO_PULL_UP || CONFIG_S3C_GPIO_PULL_DOWN */ + +#ifdef CONFIG_S3C_GPIO_PULL_UP +s3c_gpio_pull_t s3c_gpio_getpull_1up(struct s3c_gpio_chip *chip, + unsigned int off) +{ + return s3c_gpio_getpull_1(chip, off, S3C_GPIO_PULL_UP); +} + +int s3c_gpio_setpull_1up(struct s3c_gpio_chip *chip, + unsigned int off, s3c_gpio_pull_t pull) +{ + return s3c_gpio_setpull_1(chip, off, pull, S3C_GPIO_PULL_UP); } #endif /* CONFIG_S3C_GPIO_PULL_UP */ +#ifdef CONFIG_S3C_GPIO_PULL_DOWN +s3c_gpio_pull_t s3c_gpio_getpull_1down(struct s3c_gpio_chip *chip, + unsigned int off) +{ + return s3c_gpio_getpull_1(chip, off, S3C_GPIO_PULL_DOWN); +} + +int s3c_gpio_setpull_1down(struct s3c_gpio_chip *chip, + unsigned int off, s3c_gpio_pull_t pull) +{ + return s3c_gpio_setpull_1(chip, off, pull, S3C_GPIO_PULL_DOWN); +} +#endif /* CONFIG_S3C_GPIO_PULL_DOWN */ + #ifdef CONFIG_S5P_GPIO_DRVSTR s5p_gpio_drvstr_t s5p_gpio_get_drvstr(unsigned int pin) { diff --git a/arch/arm/plat-samsung/include/plat/gpio-cfg-helpers.h b/arch/arm/plat-samsung/include/plat/gpio-cfg-helpers.h index 8fd65d8b586..0d2c5703f1e 100644 --- a/arch/arm/plat-samsung/include/plat/gpio-cfg-helpers.h +++ b/arch/arm/plat-samsung/include/plat/gpio-cfg-helpers.h @@ -210,6 +210,17 @@ extern s3c_gpio_pull_t s3c_gpio_getpull_1up(struct s3c_gpio_chip *chip, unsigned int off); /** + * s3c_gpio_getpull_1down() - Get configuration for choice of down or none + * @chip: The gpio chip that the GPIO pin belongs to + * @off: The offset to the pin to get the configuration of. + * + * This helper function reads the state of the pull-down resistor for the + * given GPIO in the same case as s3c_gpio_setpull_1down. +*/ +extern s3c_gpio_pull_t s3c_gpio_getpull_1down(struct s3c_gpio_chip *chip, + unsigned int off); + +/** * s3c_gpio_setpull_s3c2443() - Pull configuration for s3c2443. * @chip: The gpio chip that is being configured. * @off: The offset for the GPIO being configured. diff --git a/arch/arm/plat-spear/time.c b/arch/arm/plat-spear/time.c index ab211652e4c..839c88df999 100644 --- a/arch/arm/plat-spear/time.c +++ b/arch/arm/plat-spear/time.c @@ -81,8 +81,6 @@ static struct clocksource clksrc = { .rating = 200, /* its a pretty decent clock */ .read = clocksource_read_cycles, .mask = 0xFFFF, /* 16 bits */ - .mult = 0, /* to be computed */ - .shift = 0, /* to be computed */ .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; @@ -105,10 +103,8 @@ static void spear_clocksource_init(void) val |= CTRL_ENABLE ; writew(val, gpt_base + CR(CLKSRC)); - clocksource_calc_mult_shift(&clksrc, tick_rate, SPEAR_MIN_RANGE); - /* register the clocksource */ - clocksource_register(&clksrc); + clocksource_register_hz(&clksrc, tick_rate); } static struct clock_event_device clkevt = { diff --git a/arch/arm/plat-stmp3xxx/timer.c b/arch/arm/plat-stmp3xxx/timer.c index 063c7bc0e74..c395630a6ed 100644 --- a/arch/arm/plat-stmp3xxx/timer.c +++ b/arch/arm/plat-stmp3xxx/timer.c @@ -89,7 +89,6 @@ static struct clocksource cksrc_stmp3xxx = { .rating = 250, .read = stmp3xxx_clock_read, .mask = CLOCKSOURCE_MASK(16), - .shift = 10, .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; @@ -106,8 +105,6 @@ static struct irqaction stmp3xxx_timer_irq = { */ static void __init stmp3xxx_init_timer(void) { - cksrc_stmp3xxx.mult = clocksource_hz2mult(CLOCK_TICK_RATE, - cksrc_stmp3xxx.shift); ckevt_timrot.mult = div_sc(CLOCK_TICK_RATE, NSEC_PER_SEC, ckevt_timrot.shift); ckevt_timrot.min_delta_ns = clockevent_delta2ns(2, &ckevt_timrot); @@ -140,7 +137,7 @@ static void __init stmp3xxx_init_timer(void) setup_irq(IRQ_TIMER0, &stmp3xxx_timer_irq); - clocksource_register(&cksrc_stmp3xxx); + clocksource_register_hz(&cksrc_stmp3xxx, CLOCK_TICK_RATE); clockevents_register_device(&ckevt_timrot); } diff --git a/arch/arm/plat-versatile/Makefile b/arch/arm/plat-versatile/Makefile index 5cf88e8427b..16dde081993 100644 --- a/arch/arm/plat-versatile/Makefile +++ b/arch/arm/plat-versatile/Makefile @@ -1,7 +1,7 @@ obj-y := clock.o -obj-$(CONFIG_ARM_TIMER_SP804) += timer-sp.o -obj-$(CONFIG_ARCH_REALVIEW) += sched-clock.o -obj-$(CONFIG_ARCH_VERSATILE) += sched-clock.o +ifneq ($(CONFIG_ARCH_INTEGRATOR),y) +obj-y += sched-clock.o +endif ifeq ($(CONFIG_LEDS_CLASS),y) obj-$(CONFIG_ARCH_REALVIEW) += leds.o obj-$(CONFIG_ARCH_VERSATILE) += leds.o diff --git a/arch/arm/plat-versatile/include/plat/sched_clock.h b/arch/arm/plat-versatile/include/plat/sched_clock.h new file mode 100644 index 00000000000..5c3e4fc9fa0 --- /dev/null +++ b/arch/arm/plat-versatile/include/plat/sched_clock.h @@ -0,0 +1,6 @@ +#ifndef ARM_PLAT_SCHED_CLOCK_H +#define ARM_PLAT_SCHED_CLOCK_H + +void versatile_sched_clock_init(void __iomem *, unsigned long); + +#endif diff --git a/arch/arm/plat-versatile/sched-clock.c b/arch/arm/plat-versatile/sched-clock.c index 9768cf7e83d..3d6a4c292ca 100644 --- a/arch/arm/plat-versatile/sched-clock.c +++ b/arch/arm/plat-versatile/sched-clock.c @@ -18,36 +18,41 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -#include <linux/cnt32_to_63.h> #include <linux/io.h> -#include <asm/div64.h> +#include <linux/sched.h> -#include <mach/hardware.h> -#include <mach/platform.h> +#include <asm/sched_clock.h> +#include <plat/sched_clock.h> -#ifdef VERSATILE_SYS_BASE -#define REFCOUNTER (__io_address(VERSATILE_SYS_BASE) + VERSATILE_SYS_24MHz_OFFSET) -#endif - -#ifdef REALVIEW_SYS_BASE -#define REFCOUNTER (__io_address(REALVIEW_SYS_BASE) + REALVIEW_SYS_24MHz_OFFSET) -#endif +static DEFINE_CLOCK_DATA(cd); +static void __iomem *ctr; /* - * This is the Realview and Versatile sched_clock implementation. This - * has a resolution of 41.7ns, and a maximum value of about 35583 days. - * - * The return value is guaranteed to be monotonic in that range as - * long as there is always less than 89 seconds between successive - * calls to this function. + * Constants generated by clocks_calc_mult_shift(m, s, 24MHz, NSEC_PER_SEC, 60). + * This gives a resolution of about 41ns and a wrap period of about 178s. */ -unsigned long long sched_clock(void) +#define SC_MULT 2796202667u +#define SC_SHIFT 26 + +unsigned long long notrace sched_clock(void) { - unsigned long long v = cnt32_to_63(readl(REFCOUNTER)); + if (ctr) { + u32 cyc = readl(ctr); + return cyc_to_fixed_sched_clock(&cd, cyc, (u32)~0, + SC_MULT, SC_SHIFT); + } else + return 0; +} - /* the <<1 gets rid of the cnt_32_to_63 top bit saving on a bic insn */ - v *= 125<<1; - do_div(v, 3<<1); +static void notrace versatile_update_sched_clock(void) +{ + u32 cyc = readl(ctr); + update_sched_clock(&cd, cyc, (u32)~0); +} - return v; +void __init versatile_sched_clock_init(void __iomem *reg, unsigned long rate) +{ + ctr = reg; + init_fixed_sched_clock(&cd, versatile_update_sched_clock, + 32, rate, SC_MULT, SC_SHIFT); } diff --git a/arch/arm/tools/mach-types b/arch/arm/tools/mach-types index 55590a4d87c..2fea897ebeb 100644 --- a/arch/arm/tools/mach-types +++ b/arch/arm/tools/mach-types @@ -12,7 +12,7 @@ # # http://www.arm.linux.org.uk/developer/machines/?action=new # -# Last update: Thu Sep 9 22:43:01 2010 +# Last update: Sun Dec 12 23:24:27 2010 # # machine_is_xxx CONFIG_xxxx MACH_TYPE_xxx number # @@ -2321,7 +2321,7 @@ mx31txtr MACH_MX31TXTR MX31TXTR 2332 u380 MACH_U380 U380 2333 oamp3_hualu MACH_HUALU_BOARD HUALU_BOARD 2334 npcmx50 MACH_NPCMX50 NPCMX50 2335 -mx51_lange51 MACH_MX51_LANGE51 MX51_LANGE51 2336 +mx51_efikamx MACH_MX51_EFIKAMX MX51_EFIKAMX 2336 mx51_lange52 MACH_MX51_LANGE52 MX51_LANGE52 2337 riom MACH_RIOM RIOM 2338 comcas MACH_COMCAS COMCAS 2339 @@ -2355,7 +2355,7 @@ at91sam9263cs MACH_AT91SAM9263CS AT91SAM9263CS 2366 csb732 MACH_CSB732 CSB732 2367 u8500 MACH_U8500 U8500 2368 huqiu MACH_HUQIU HUQIU 2369 -mx51_kunlun MACH_MX51_KUNLUN MX51_KUNLUN 2370 +mx51_efikasb MACH_MX51_EFIKASB MX51_EFIKASB 2370 pmt1g MACH_PMT1G PMT1G 2371 htcelf MACH_HTCELF HTCELF 2372 armadillo420 MACH_ARMADILLO420 ARMADILLO420 2373 @@ -2971,7 +2971,7 @@ premierwave_en MACH_PREMIERWAVE_EN PREMIERWAVE_EN 2985 wasabi MACH_WASABI WASABI 2986 vivow MACH_VIVOW VIVOW 2987 mx50_rdp MACH_MX50_RDP MX50_RDP 2988 -universal MACH_UNIVERSAL UNIVERSAL 2989 +universal_c210 MACH_UNIVERSAL_C210 UNIVERSAL_C210 2989 real6410 MACH_REAL6410 REAL6410 2990 spx_sakura MACH_SPX_SAKURA SPX_SAKURA 2991 ij3k_2440 MACH_IJ3K_2440 IJ3K_2440 2992 @@ -3044,3 +3044,178 @@ harvest_desoto MACH_HARVEST_DESOTO HARVEST_DESOTO 3059 msm8x60_qrdc MACH_MSM8X60_QRDC MSM8X60_QRDC 3060 spear900 MACH_SPEAR900 SPEAR900 3061 pcontrol_g20 MACH_PCONTROL_G20 PCONTROL_G20 3062 +rdstor MACH_RDSTOR RDSTOR 3063 +usdloader MACH_USDLOADER USDLOADER 3064 +tsoploader MACH_TSOPLOADER TSOPLOADER 3065 +kronos MACH_KRONOS KRONOS 3066 +ffcore MACH_FFCORE FFCORE 3067 +mone MACH_MONE MONE 3068 +unit2s MACH_UNIT2S UNIT2S 3069 +acer_a5 MACH_ACER_A5 ACER_A5 3070 +etherpro_isp MACH_ETHERPRO_ISP ETHERPRO_ISP 3071 +stretchs7000 MACH_STRETCHS7000 STRETCHS7000 3072 +p87_smartsim MACH_P87_SMARTSIM P87_SMARTSIM 3073 +tulip MACH_TULIP TULIP 3074 +sunflower MACH_SUNFLOWER SUNFLOWER 3075 +rib MACH_RIB RIB 3076 +clod MACH_CLOD CLOD 3077 +rump MACH_RUMP RUMP 3078 +tenderloin MACH_TENDERLOIN TENDERLOIN 3079 +shortloin MACH_SHORTLOIN SHORTLOIN 3080 +crespo MACH_CRESPO CRESPO 3081 +antares MACH_ANTARES ANTARES 3082 +wb40n MACH_WB40N WB40N 3083 +herring MACH_HERRING HERRING 3084 +naxy400 MACH_NAXY400 NAXY400 3085 +naxy1200 MACH_NAXY1200 NAXY1200 3086 +vpr200 MACH_VPR200 VPR200 3087 +bug20 MACH_BUG20 BUG20 3088 +goflexnet MACH_GOFLEXNET GOFLEXNET 3089 +torbreck MACH_TORBRECK TORBRECK 3090 +saarb_mg1 MACH_SAARB_MG1 SAARB_MG1 3091 +callisto MACH_CALLISTO CALLISTO 3092 +multhsu MACH_MULTHSU MULTHSU 3093 +saluda MACH_SALUDA SALUDA 3094 +pemp_omap3_apollo MACH_PEMP_OMAP3_APOLLO PEMP_OMAP3_APOLLO 3095 +vc0718 MACH_VC0718 VC0718 3096 +mvblx MACH_MVBLX MVBLX 3097 +inhand_apeiron MACH_INHAND_APEIRON INHAND_APEIRON 3098 +inhand_fury MACH_INHAND_FURY INHAND_FURY 3099 +inhand_siren MACH_INHAND_SIREN INHAND_SIREN 3100 +hdnvp MACH_HDNVP HDNVP 3101 +softwinner MACH_SOFTWINNER SOFTWINNER 3102 +prima2_evb MACH_PRIMA2_EVB PRIMA2_EVB 3103 +nas6210 MACH_NAS6210 NAS6210 3104 +unisdev MACH_UNISDEV UNISDEV 3105 +sbca11 MACH_SBCA11 SBCA11 3106 +saga MACH_SAGA SAGA 3107 +ns_k330 MACH_NS_K330 NS_K330 3108 +tanna MACH_TANNA TANNA 3109 +imate8502 MACH_IMATE8502 IMATE8502 3110 +aspen MACH_ASPEN ASPEN 3111 +daintree_cwac MACH_DAINTREE_CWAC DAINTREE_CWAC 3112 +zmx25 MACH_ZMX25 ZMX25 3113 +maple1 MACH_MAPLE1 MAPLE1 3114 +qsd8x72_surf MACH_QSD8X72_SURF QSD8X72_SURF 3115 +qsd8x72_ffa MACH_QSD8X72_FFA QSD8X72_FFA 3116 +abilene MACH_ABILENE ABILENE 3117 +eigen_ttr MACH_EIGEN_TTR EIGEN_TTR 3118 +iomega_ix2_200 MACH_IOMEGA_IX2_200 IOMEGA_IX2_200 3119 +coretec_vcx7400 MACH_CORETEC_VCX7400 CORETEC_VCX7400 3120 +santiago MACH_SANTIAGO SANTIAGO 3121 +mx257sol MACH_MX257SOL MX257SOL 3122 +strasbourg MACH_STRASBOURG STRASBOURG 3123 +msm8x60_fluid MACH_MSM8X60_FLUID MSM8X60_FLUID 3124 +smartqv5 MACH_SMARTQV5 SMARTQV5 3125 +smartqv3 MACH_SMARTQV3 SMARTQV3 3126 +smartqv7 MACH_SMARTQV7 SMARTQV7 3127 +paz00 MACH_PAZ00 PAZ00 3128 +acmenetusfoxg20 MACH_ACMENETUSFOXG20 ACMENETUSFOXG20 3129 +htcwillow MACH_HTCWILLOW HTCWILLOW 3130 +fwbd_0404 MACH_FWBD_0404 FWBD_0404 3131 +hdgu MACH_HDGU HDGU 3132 +pyramid MACH_PYRAMID PYRAMID 3133 +epiphan MACH_EPIPHAN EPIPHAN 3134 +omap_bender MACH_OMAP_BENDER OMAP_BENDER 3135 +gurnard MACH_GURNARD GURNARD 3136 +gtl_it5100 MACH_GTL_IT5100 GTL_IT5100 3137 +bcm2708 MACH_BCM2708 BCM2708 3138 +mx51_ggc MACH_MX51_GGC MX51_GGC 3139 +sharespace MACH_SHARESPACE SHARESPACE 3140 +haba_knx_explorer MACH_HABA_KNX_EXPLORER HABA_KNX_EXPLORER 3141 +simtec_kirkmod MACH_SIMTEC_KIRKMOD SIMTEC_KIRKMOD 3142 +crux MACH_CRUX CRUX 3143 +mx51_bravo MACH_MX51_BRAVO MX51_BRAVO 3144 +charon MACH_CHARON CHARON 3145 +picocom3 MACH_PICOCOM3 PICOCOM3 3146 +picocom4 MACH_PICOCOM4 PICOCOM4 3147 +serrano MACH_SERRANO SERRANO 3148 +doubleshot MACH_DOUBLESHOT DOUBLESHOT 3149 +evsy MACH_EVSY EVSY 3150 +huashan MACH_HUASHAN HUASHAN 3151 +lausanne MACH_LAUSANNE LAUSANNE 3152 +emerald MACH_EMERALD EMERALD 3153 +tqma35 MACH_TQMA35 TQMA35 3154 +marvel MACH_MARVEL MARVEL 3155 +manuae MACH_MANUAE MANUAE 3156 +chacha MACH_CHACHA CHACHA 3157 +lemon MACH_LEMON LEMON 3158 +csc MACH_CSC CSC 3159 +gira_knxip_router MACH_GIRA_KNXIP_ROUTER GIRA_KNXIP_ROUTER 3160 +t20 MACH_T20 T20 3161 +hdmini MACH_HDMINI HDMINI 3162 +sciphone_g2 MACH_SCIPHONE_G2 SCIPHONE_G2 3163 +express MACH_EXPRESS EXPRESS 3164 +express_kt MACH_EXPRESS_KT EXPRESS_KT 3165 +maximasp MACH_MAXIMASP MAXIMASP 3166 +nitrogen_imx51 MACH_NITROGEN_IMX51 NITROGEN_IMX51 3167 +nitrogen_imx53 MACH_NITROGEN_IMX53 NITROGEN_IMX53 3168 +sunfire MACH_SUNFIRE SUNFIRE 3169 +arowana MACH_AROWANA AROWANA 3170 +tegra_daytona MACH_TEGRA_DAYTONA TEGRA_DAYTONA 3171 +tegra_swordfish MACH_TEGRA_SWORDFISH TEGRA_SWORDFISH 3172 +edison MACH_EDISON EDISON 3173 +svp8500v1 MACH_SVP8500V1 SVP8500V1 3174 +svp8500v2 MACH_SVP8500V2 SVP8500V2 3175 +svp5500 MACH_SVP5500 SVP5500 3176 +b5500 MACH_B5500 B5500 3177 +s5500 MACH_S5500 S5500 3178 +icon MACH_ICON ICON 3179 +elephant MACH_ELEPHANT ELEPHANT 3180 +msm8x60_fusion MACH_MSM8X60_FUSION MSM8X60_FUSION 3181 +shooter MACH_SHOOTER SHOOTER 3182 +spade_lte MACH_SPADE_LTE SPADE_LTE 3183 +philhwani MACH_PHILHWANI PHILHWANI 3184 +gsncomm MACH_GSNCOMM GSNCOMM 3185 +strasbourg_a2 MACH_STRASBOURG_A2 STRASBOURG_A2 3186 +mmm MACH_MMM MMM 3187 +davinci_dm365_bv MACH_DAVINCI_DM365_BV DAVINCI_DM365_BV 3188 +ag5evm MACH_AG5EVM AG5EVM 3189 +sc575plc MACH_SC575PLC SC575PLC 3190 +sc575hmi MACH_SC575IPC SC575IPC 3191 +omap3_tdm3730 MACH_OMAP3_TDM3730 OMAP3_TDM3730 3192 +g7 MACH_G7 G7 3193 +top9000_eval MACH_TOP9000_EVAL TOP9000_EVAL 3194 +top9000_su MACH_TOP9000_SU TOP9000_SU 3195 +utm300 MACH_UTM300 UTM300 3196 +tsunagi MACH_TSUNAGI TSUNAGI 3197 +ts75xx MACH_TS75XX TS75XX 3198 +msm8x60_fusn_ffa MACH_MSM8X60_FUSN_FFA MSM8X60_FUSN_FFA 3199 +ts47xx MACH_TS47XX TS47XX 3200 +da850_k5 MACH_DA850_K5 DA850_K5 3201 +ax502 MACH_AX502 AX502 3202 +igep0032 MACH_IGEP0032 IGEP0032 3203 +antero MACH_ANTERO ANTERO 3204 +synergy MACH_SYNERGY SYNERGY 3205 +ics_if_voip MACH_ICS_IF_VOIP ICS_IF_VOIP 3206 +wlf_cragg_6410 MACH_WLF_CRAGG_6410 WLF_CRAGG_6410 3207 +punica MACH_PUNICA PUNICA 3208 +sbc_nt250 MACH_SBC_NT250 SBC_NT250 3209 +mx27_wmultra MACH_MX27_WMULTRA MX27_WMULTRA 3210 +mackerel MACH_MACKEREL MACKEREL 3211 +fa9x27 MACH_FA9X27 FA9X27 3213 +ns2816tb MACH_NS2816TB NS2816TB 3214 +ns2816_ntpad MACH_NS2816_NTPAD NS2816_NTPAD 3215 +ns2816_ntnb MACH_NS2816_NTNB NS2816_NTNB 3216 +kaen MACH_KAEN KAEN 3217 +nv1000 MACH_NV1000 NV1000 3218 +nuc950ts MACH_NUC950TS NUC950TS 3219 +nokia_rm680 MACH_NOKIA_RM680 NOKIA_RM680 3220 +ast2200 MACH_AST2200 AST2200 3221 +lead MACH_LEAD LEAD 3222 +unino1 MACH_UNINO1 UNINO1 3223 +greeco MACH_GREECO GREECO 3224 +verdi MACH_VERDI VERDI 3225 +dm6446_adbox MACH_DM6446_ADBOX DM6446_ADBOX 3226 +quad_salsa MACH_QUAD_SALSA QUAD_SALSA 3227 +abb_gma_1_1 MACH_ABB_GMA_1_1 ABB_GMA_1_1 3228 +svcid MACH_SVCID SVCID 3229 +msm8960_sim MACH_MSM8960_SIM MSM8960_SIM 3230 +msm8960_rumi3 MACH_MSM8960_RUMI3 MSM8960_RUMI3 3231 +icon_g MACH_ICON_G ICON_G 3232 +mb3 MACH_MB3 MB3 3233 +gsia18s MACH_GSIA18S GSIA18S 3234 +pivicc MACH_PIVICC PIVICC 3235 +pcm048 MACH_PCM048 PCM048 3236 +dds MACH_DDS DDS 3237 +chalten_xa1 MACH_CHALTEN_XA1 CHALTEN_XA1 3238 diff --git a/arch/arm/vfp/vfphw.S b/arch/arm/vfp/vfphw.S index d66cead97d2..9897dcfc16d 100644 --- a/arch/arm/vfp/vfphw.S +++ b/arch/arm/vfp/vfphw.S @@ -206,6 +206,7 @@ ENTRY(vfp_save_state) mov pc, lr ENDPROC(vfp_save_state) + .align last_VFP_context_address: .word last_VFP_context |