diff options
34 files changed, 981 insertions, 723 deletions
diff --git a/Documentation/devicetree/bindings/arm/cpus.txt b/Documentation/devicetree/bindings/arm/cpus.txt new file mode 100644 index 00000000000..f32494dbfe1 --- /dev/null +++ b/Documentation/devicetree/bindings/arm/cpus.txt @@ -0,0 +1,77 @@ +* ARM CPUs binding description + +The device tree allows to describe the layout of CPUs in a system through +the "cpus" node, which in turn contains a number of subnodes (ie "cpu") +defining properties for every cpu. + +Bindings for CPU nodes follow the ePAPR standard, available from: + +http://devicetree.org + +For the ARM architecture every CPU node must contain the following properties: + +- device_type: must be "cpu" +- reg: property matching the CPU MPIDR[23:0] register bits + reg[31:24] bits must be set to 0 +- compatible: should be one of: + "arm,arm1020" + "arm,arm1020e" + "arm,arm1022" + "arm,arm1026" + "arm,arm720" + "arm,arm740" + "arm,arm7tdmi" + "arm,arm920" + "arm,arm922" + "arm,arm925" + "arm,arm926" + "arm,arm940" + "arm,arm946" + "arm,arm9tdmi" + "arm,cortex-a5" + "arm,cortex-a7" + "arm,cortex-a8" + "arm,cortex-a9" + "arm,cortex-a15" + "arm,arm1136" + "arm,arm1156" + "arm,arm1176" + "arm,arm11mpcore" + "faraday,fa526" + "intel,sa110" + "intel,sa1100" + "marvell,feroceon" + "marvell,mohawk" + "marvell,xsc3" + "marvell,xscale" + +Example: + + cpus { + #size-cells = <0>; + #address-cells = <1>; + + CPU0: cpu@0 { + device_type = "cpu"; + compatible = "arm,cortex-a15"; + reg = <0x0>; + }; + + CPU1: cpu@1 { + device_type = "cpu"; + compatible = "arm,cortex-a15"; + reg = <0x1>; + }; + + CPU2: cpu@100 { + device_type = "cpu"; + compatible = "arm,cortex-a7"; + reg = <0x100>; + }; + + CPU3: cpu@101 { + device_type = "cpu"; + compatible = "arm,cortex-a7"; + reg = <0x101>; + }; + }; diff --git a/arch/arm/common/gic.c b/arch/arm/common/gic.c index aa526998418..36ae03a3f5d 100644 --- a/arch/arm/common/gic.c +++ b/arch/arm/common/gic.c @@ -70,6 +70,14 @@ struct gic_chip_data { static DEFINE_RAW_SPINLOCK(irq_controller_lock); /* + * The GIC mapping of CPU interfaces does not necessarily match + * the logical CPU numbering. Let's use a mapping as returned + * by the GIC itself. + */ +#define NR_GIC_CPU_IF 8 +static u8 gic_cpu_map[NR_GIC_CPU_IF] __read_mostly; + +/* * Supported arch specific GIC irq extension. * Default make them NULL. */ @@ -238,11 +246,11 @@ static int gic_set_affinity(struct irq_data *d, const struct cpumask *mask_val, unsigned int cpu = cpumask_any_and(mask_val, cpu_online_mask); u32 val, mask, bit; - if (cpu >= 8 || cpu >= nr_cpu_ids) + if (cpu >= NR_GIC_CPU_IF || cpu >= nr_cpu_ids) return -EINVAL; mask = 0xff << shift; - bit = 1 << (cpu_logical_map(cpu) + shift); + bit = gic_cpu_map[cpu] << shift; raw_spin_lock(&irq_controller_lock); val = readl_relaxed(reg) & ~mask; @@ -349,11 +357,6 @@ static void __init gic_dist_init(struct gic_chip_data *gic) u32 cpumask; unsigned int gic_irqs = gic->gic_irqs; void __iomem *base = gic_data_dist_base(gic); - u32 cpu = cpu_logical_map(smp_processor_id()); - - cpumask = 1 << cpu; - cpumask |= cpumask << 8; - cpumask |= cpumask << 16; writel_relaxed(0, base + GIC_DIST_CTRL); @@ -366,6 +369,7 @@ static void __init gic_dist_init(struct gic_chip_data *gic) /* * Set all global interrupts to this CPU only. */ + cpumask = readl_relaxed(base + GIC_DIST_TARGET + 0); for (i = 32; i < gic_irqs; i += 4) writel_relaxed(cpumask, base + GIC_DIST_TARGET + i * 4 / 4); @@ -389,9 +393,25 @@ static void __cpuinit gic_cpu_init(struct gic_chip_data *gic) { void __iomem *dist_base = gic_data_dist_base(gic); void __iomem *base = gic_data_cpu_base(gic); + unsigned int cpu_mask, cpu = smp_processor_id(); int i; /* + * Get what the GIC says our CPU mask is. + */ + BUG_ON(cpu >= NR_GIC_CPU_IF); + cpu_mask = readl_relaxed(dist_base + GIC_DIST_TARGET + 0); + gic_cpu_map[cpu] = cpu_mask; + + /* + * Clear our mask from the other map entries in case they're + * still undefined. + */ + for (i = 0; i < NR_GIC_CPU_IF; i++) + if (i != cpu) + gic_cpu_map[i] &= ~cpu_mask; + + /* * Deal with the banked PPI and SGI interrupts - disable all * PPI interrupts, ensure all SGI interrupts are enabled. */ @@ -646,7 +666,7 @@ void __init gic_init_bases(unsigned int gic_nr, int irq_start, { irq_hw_number_t hwirq_base; struct gic_chip_data *gic; - int gic_irqs, irq_base; + int gic_irqs, irq_base, i; BUG_ON(gic_nr >= MAX_GIC_NR); @@ -683,6 +703,13 @@ void __init gic_init_bases(unsigned int gic_nr, int irq_start, } /* + * Initialize the CPU interface map to all CPUs. + * It will be refined as each CPU probes its ID. + */ + for (i = 0; i < NR_GIC_CPU_IF; i++) + gic_cpu_map[i] = 0xff; + + /* * For primary GICs, skip over SGIs. * For secondary GICs, skip over PPIs, too. */ @@ -737,7 +764,7 @@ void gic_raise_softirq(const struct cpumask *mask, unsigned int irq) /* Convert our logical CPU mask into a physical one. */ for_each_cpu(cpu, mask) - map |= 1 << cpu_logical_map(cpu); + map |= gic_cpu_map[cpu]; /* * Ensure that stores to Normal memory are visible to the diff --git a/arch/arm/include/asm/Kbuild b/arch/arm/include/asm/Kbuild index f70ae175a3d..2ffdaacd461 100644 --- a/arch/arm/include/asm/Kbuild +++ b/arch/arm/include/asm/Kbuild @@ -16,7 +16,6 @@ generic-y += local64.h generic-y += msgbuf.h generic-y += param.h generic-y += parport.h -generic-y += percpu.h generic-y += poll.h generic-y += resource.h generic-y += sections.h diff --git a/arch/arm/include/asm/cpu.h b/arch/arm/include/asm/cpu.h index d797223b39d..2744f060255 100644 --- a/arch/arm/include/asm/cpu.h +++ b/arch/arm/include/asm/cpu.h @@ -15,6 +15,7 @@ struct cpuinfo_arm { struct cpu cpu; + u32 cpuid; #ifdef CONFIG_SMP unsigned int loops_per_jiffy; #endif diff --git a/arch/arm/include/asm/cputype.h b/arch/arm/include/asm/cputype.h index cb47d28cbe1..a59dcb5ab5f 100644 --- a/arch/arm/include/asm/cputype.h +++ b/arch/arm/include/asm/cputype.h @@ -25,6 +25,19 @@ #define CPUID_EXT_ISAR4 "c2, 4" #define CPUID_EXT_ISAR5 "c2, 5" +#define MPIDR_SMP_BITMASK (0x3 << 30) +#define MPIDR_SMP_VALUE (0x2 << 30) + +#define MPIDR_MT_BITMASK (0x1 << 24) + +#define MPIDR_HWID_BITMASK 0xFFFFFF + +#define MPIDR_LEVEL_BITS 8 +#define MPIDR_LEVEL_MASK ((1 << MPIDR_LEVEL_BITS) - 1) + +#define MPIDR_AFFINITY_LEVEL(mpidr, level) \ + ((mpidr >> (MPIDR_LEVEL_BITS * level)) & MPIDR_LEVEL_MASK) + extern unsigned int processor_id; #ifdef CONFIG_CPU_CP15 diff --git a/arch/arm/include/asm/cti.h b/arch/arm/include/asm/cti.h index a0ada3ea435..f2e5cad3f30 100644 --- a/arch/arm/include/asm/cti.h +++ b/arch/arm/include/asm/cti.h @@ -146,15 +146,7 @@ static inline void cti_irq_ack(struct cti *cti) */ static inline void cti_unlock(struct cti *cti) { - void __iomem *base = cti->base; - unsigned long val; - - val = __raw_readl(base + LOCKSTATUS); - - if (val & 1) { - val = LOCKCODE; - __raw_writel(val, base + LOCKACCESS); - } + __raw_writel(LOCKCODE, cti->base + LOCKACCESS); } /** @@ -166,14 +158,6 @@ static inline void cti_unlock(struct cti *cti) */ static inline void cti_lock(struct cti *cti) { - void __iomem *base = cti->base; - unsigned long val; - - val = __raw_readl(base + LOCKSTATUS); - - if (!(val & 1)) { - val = ~LOCKCODE; - __raw_writel(val, base + LOCKACCESS); - } + __raw_writel(~LOCKCODE, cti->base + LOCKACCESS); } #endif diff --git a/arch/arm/include/asm/hw_breakpoint.h b/arch/arm/include/asm/hw_breakpoint.h index c190bc992f0..01169dd723f 100644 --- a/arch/arm/include/asm/hw_breakpoint.h +++ b/arch/arm/include/asm/hw_breakpoint.h @@ -98,12 +98,12 @@ static inline void decode_ctrl_reg(u32 reg, #define ARM_BASE_WCR 112 /* Accessor macros for the debug registers. */ -#define ARM_DBG_READ(M, OP2, VAL) do {\ - asm volatile("mrc p14, 0, %0, c0," #M ", " #OP2 : "=r" (VAL));\ +#define ARM_DBG_READ(N, M, OP2, VAL) do {\ + asm volatile("mrc p14, 0, %0, " #N "," #M ", " #OP2 : "=r" (VAL));\ } while (0) -#define ARM_DBG_WRITE(M, OP2, VAL) do {\ - asm volatile("mcr p14, 0, %0, c0," #M ", " #OP2 : : "r" (VAL));\ +#define ARM_DBG_WRITE(N, M, OP2, VAL) do {\ + asm volatile("mcr p14, 0, %0, " #N "," #M ", " #OP2 : : "r" (VAL));\ } while (0) struct notifier_block; diff --git a/arch/arm/include/asm/mmu.h b/arch/arm/include/asm/mmu.h index 14965658a92..9f77e7804f3 100644 --- a/arch/arm/include/asm/mmu.h +++ b/arch/arm/include/asm/mmu.h @@ -5,18 +5,15 @@ typedef struct { #ifdef CONFIG_CPU_HAS_ASID - unsigned int id; - raw_spinlock_t id_lock; + u64 id; #endif - unsigned int kvm_seq; + unsigned int vmalloc_seq; } mm_context_t; #ifdef CONFIG_CPU_HAS_ASID -#define ASID(mm) ((mm)->context.id & 255) - -/* init_mm.context.id_lock should be initialized. */ -#define INIT_MM_CONTEXT(name) \ - .context.id_lock = __RAW_SPIN_LOCK_UNLOCKED(name.context.id_lock), +#define ASID_BITS 8 +#define ASID_MASK ((~0ULL) << ASID_BITS) +#define ASID(mm) ((mm)->context.id & ~ASID_MASK) #else #define ASID(mm) (0) #endif diff --git a/arch/arm/include/asm/mmu_context.h b/arch/arm/include/asm/mmu_context.h index 0306bc642c0..e1f644bc7cc 100644 --- a/arch/arm/include/asm/mmu_context.h +++ b/arch/arm/include/asm/mmu_context.h @@ -20,88 +20,12 @@ #include <asm/proc-fns.h> #include <asm-generic/mm_hooks.h> -void __check_kvm_seq(struct mm_struct *mm); +void __check_vmalloc_seq(struct mm_struct *mm); #ifdef CONFIG_CPU_HAS_ASID -/* - * On ARMv6, we have the following structure in the Context ID: - * - * 31 7 0 - * +-------------------------+-----------+ - * | process ID | ASID | - * +-------------------------+-----------+ - * | context ID | - * +-------------------------------------+ - * - * The ASID is used to tag entries in the CPU caches and TLBs. - * The context ID is used by debuggers and trace logic, and - * should be unique within all running processes. - */ -#define ASID_BITS 8 -#define ASID_MASK ((~0) << ASID_BITS) -#define ASID_FIRST_VERSION (1 << ASID_BITS) - -extern unsigned int cpu_last_asid; - -void __init_new_context(struct task_struct *tsk, struct mm_struct *mm); -void __new_context(struct mm_struct *mm); -void cpu_set_reserved_ttbr0(void); - -static inline void switch_new_context(struct mm_struct *mm) -{ - unsigned long flags; - - __new_context(mm); - - local_irq_save(flags); - cpu_switch_mm(mm->pgd, mm); - local_irq_restore(flags); -} - -static inline void check_and_switch_context(struct mm_struct *mm, - struct task_struct *tsk) -{ - if (unlikely(mm->context.kvm_seq != init_mm.context.kvm_seq)) - __check_kvm_seq(mm); - - /* - * Required during context switch to avoid speculative page table - * walking with the wrong TTBR. - */ - cpu_set_reserved_ttbr0(); - - if (!((mm->context.id ^ cpu_last_asid) >> ASID_BITS)) - /* - * The ASID is from the current generation, just switch to the - * new pgd. This condition is only true for calls from - * context_switch() and interrupts are already disabled. - */ - cpu_switch_mm(mm->pgd, mm); - else if (irqs_disabled()) - /* - * Defer the new ASID allocation until after the context - * switch critical region since __new_context() cannot be - * called with interrupts disabled (it sends IPIs). - */ - set_ti_thread_flag(task_thread_info(tsk), TIF_SWITCH_MM); - else - /* - * That is a direct call to switch_mm() or activate_mm() with - * interrupts enabled and a new context. - */ - switch_new_context(mm); -} - -#define init_new_context(tsk,mm) (__init_new_context(tsk,mm),0) - -#define finish_arch_post_lock_switch \ - finish_arch_post_lock_switch -static inline void finish_arch_post_lock_switch(void) -{ - if (test_and_clear_thread_flag(TIF_SWITCH_MM)) - switch_new_context(current->mm); -} +void check_and_switch_context(struct mm_struct *mm, struct task_struct *tsk); +#define init_new_context(tsk,mm) ({ mm->context.id = 0; }) #else /* !CONFIG_CPU_HAS_ASID */ @@ -110,8 +34,8 @@ static inline void finish_arch_post_lock_switch(void) static inline void check_and_switch_context(struct mm_struct *mm, struct task_struct *tsk) { - if (unlikely(mm->context.kvm_seq != init_mm.context.kvm_seq)) - __check_kvm_seq(mm); + if (unlikely(mm->context.vmalloc_seq != init_mm.context.vmalloc_seq)) + __check_vmalloc_seq(mm); if (irqs_disabled()) /* @@ -143,6 +67,7 @@ static inline void finish_arch_post_lock_switch(void) #endif /* CONFIG_CPU_HAS_ASID */ #define destroy_context(mm) do { } while(0) +#define activate_mm(prev,next) switch_mm(prev, next, NULL) /* * This is called when "tsk" is about to enter lazy TLB mode. @@ -186,6 +111,5 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next, } #define deactivate_mm(tsk,mm) do { } while (0) -#define activate_mm(prev,next) switch_mm(prev, next, NULL) #endif diff --git a/arch/arm/include/asm/percpu.h b/arch/arm/include/asm/percpu.h new file mode 100644 index 00000000000..968c0a14e0a --- /dev/null +++ b/arch/arm/include/asm/percpu.h @@ -0,0 +1,45 @@ +/* + * Copyright 2012 Calxeda, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see <http://www.gnu.org/licenses/>. + */ +#ifndef _ASM_ARM_PERCPU_H_ +#define _ASM_ARM_PERCPU_H_ + +/* + * Same as asm-generic/percpu.h, except that we store the per cpu offset + * in the TPIDRPRW. TPIDRPRW only exists on V6K and V7 + */ +#if defined(CONFIG_SMP) && !defined(CONFIG_CPU_V6) +static inline void set_my_cpu_offset(unsigned long off) +{ + /* Set TPIDRPRW */ + asm volatile("mcr p15, 0, %0, c13, c0, 4" : : "r" (off) : "memory"); +} + +static inline unsigned long __my_cpu_offset(void) +{ + unsigned long off; + /* Read TPIDRPRW */ + asm("mrc p15, 0, %0, c13, c0, 4" : "=r" (off) : : "memory"); + return off; +} +#define __my_cpu_offset __my_cpu_offset() +#else +#define set_my_cpu_offset(x) do {} while(0) + +#endif /* CONFIG_SMP */ + +#include <asm-generic/percpu.h> + +#endif /* _ASM_ARM_PERCPU_H_ */ diff --git a/arch/arm/include/asm/perf_event.h b/arch/arm/include/asm/perf_event.h index 625cd621a43..755877527cf 100644 --- a/arch/arm/include/asm/perf_event.h +++ b/arch/arm/include/asm/perf_event.h @@ -21,4 +21,11 @@ #define C(_x) PERF_COUNT_HW_CACHE_##_x #define CACHE_OP_UNSUPPORTED 0xFFFF +#ifdef CONFIG_HW_PERF_EVENTS +struct pt_regs; +extern unsigned long perf_instruction_pointer(struct pt_regs *regs); +extern unsigned long perf_misc_flags(struct pt_regs *regs); +#define perf_misc_flags(regs) perf_misc_flags(regs) +#endif + #endif /* __ARM_PERF_EVENT_H__ */ diff --git a/arch/arm/include/asm/pgtable-2level.h b/arch/arm/include/asm/pgtable-2level.h index 2317a71c8f8..f97ee02386e 100644 --- a/arch/arm/include/asm/pgtable-2level.h +++ b/arch/arm/include/asm/pgtable-2level.h @@ -115,6 +115,7 @@ * The PTE table pointer refers to the hardware entries; the "Linux" * entries are stored 1024 bytes below. */ +#define L_PTE_VALID (_AT(pteval_t, 1) << 0) /* Valid */ #define L_PTE_PRESENT (_AT(pteval_t, 1) << 0) #define L_PTE_YOUNG (_AT(pteval_t, 1) << 1) #define L_PTE_FILE (_AT(pteval_t, 1) << 2) /* only when !PRESENT */ @@ -123,6 +124,7 @@ #define L_PTE_USER (_AT(pteval_t, 1) << 8) #define L_PTE_XN (_AT(pteval_t, 1) << 9) #define L_PTE_SHARED (_AT(pteval_t, 1) << 10) /* shared(v6), coherent(xsc3) */ +#define L_PTE_NONE (_AT(pteval_t, 1) << 11) /* * These are the memory types, defined to be compatible with diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h index b24903549d1..a3f37929940 100644 --- a/arch/arm/include/asm/pgtable-3level.h +++ b/arch/arm/include/asm/pgtable-3level.h @@ -67,7 +67,8 @@ * These bits overlap with the hardware bits but the naming is preserved for * consistency with the classic page table format. */ -#define L_PTE_PRESENT (_AT(pteval_t, 3) << 0) /* Valid */ +#define L_PTE_VALID (_AT(pteval_t, 1) << 0) /* Valid */ +#define L_PTE_PRESENT (_AT(pteval_t, 3) << 0) /* Present */ #define L_PTE_FILE (_AT(pteval_t, 1) << 2) /* only when !PRESENT */ #define L_PTE_USER (_AT(pteval_t, 1) << 6) /* AP[1] */ #define L_PTE_RDONLY (_AT(pteval_t, 1) << 7) /* AP[2] */ @@ -76,6 +77,7 @@ #define L_PTE_XN (_AT(pteval_t, 1) << 54) /* XN */ #define L_PTE_DIRTY (_AT(pteval_t, 1) << 55) /* unused */ #define L_PTE_SPECIAL (_AT(pteval_t, 1) << 56) /* unused */ +#define L_PTE_NONE (_AT(pteval_t, 1) << 57) /* PROT_NONE */ /* * To be used in assembly code with the upper page attributes. diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h index 08c12312a1f..9c82f988c0e 100644 --- a/arch/arm/include/asm/pgtable.h +++ b/arch/arm/include/asm/pgtable.h @@ -73,7 +73,7 @@ extern pgprot_t pgprot_kernel; #define _MOD_PROT(p, b) __pgprot(pgprot_val(p) | (b)) -#define PAGE_NONE _MOD_PROT(pgprot_user, L_PTE_XN | L_PTE_RDONLY) +#define PAGE_NONE _MOD_PROT(pgprot_user, L_PTE_XN | L_PTE_RDONLY | L_PTE_NONE) #define PAGE_SHARED _MOD_PROT(pgprot_user, L_PTE_USER | L_PTE_XN) #define PAGE_SHARED_EXEC _MOD_PROT(pgprot_user, L_PTE_USER) #define PAGE_COPY _MOD_PROT(pgprot_user, L_PTE_USER | L_PTE_RDONLY | L_PTE_XN) @@ -83,7 +83,7 @@ extern pgprot_t pgprot_kernel; #define PAGE_KERNEL _MOD_PROT(pgprot_kernel, L_PTE_XN) #define PAGE_KERNEL_EXEC pgprot_kernel -#define __PAGE_NONE __pgprot(_L_PTE_DEFAULT | L_PTE_RDONLY | L_PTE_XN) +#define __PAGE_NONE __pgprot(_L_PTE_DEFAULT | L_PTE_RDONLY | L_PTE_XN | L_PTE_NONE) #define __PAGE_SHARED __pgprot(_L_PTE_DEFAULT | L_PTE_USER | L_PTE_XN) #define __PAGE_SHARED_EXEC __pgprot(_L_PTE_DEFAULT | L_PTE_USER) #define __PAGE_COPY __pgprot(_L_PTE_DEFAULT | L_PTE_USER | L_PTE_RDONLY | L_PTE_XN) @@ -203,9 +203,7 @@ static inline pte_t *pmd_page_vaddr(pmd_t pmd) #define pte_exec(pte) (!(pte_val(pte) & L_PTE_XN)) #define pte_special(pte) (0) -#define pte_present_user(pte) \ - ((pte_val(pte) & (L_PTE_PRESENT | L_PTE_USER)) == \ - (L_PTE_PRESENT | L_PTE_USER)) +#define pte_present_user(pte) (pte_present(pte) && (pte_val(pte) & L_PTE_USER)) #if __LINUX_ARM_ARCH__ < 6 static inline void __sync_icache_dcache(pte_t pteval) @@ -242,7 +240,7 @@ static inline pte_t pte_mkspecial(pte_t pte) { return pte; } static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) { - const pteval_t mask = L_PTE_XN | L_PTE_RDONLY | L_PTE_USER; + const pteval_t mask = L_PTE_XN | L_PTE_RDONLY | L_PTE_USER | L_PTE_NONE; pte_val(pte) = (pte_val(pte) & ~mask) | (pgprot_val(newprot) & mask); return pte; } diff --git a/arch/arm/include/asm/pmu.h b/arch/arm/include/asm/pmu.h index a26170dce02..f24edad26c7 100644 --- a/arch/arm/include/asm/pmu.h +++ b/arch/arm/include/asm/pmu.h @@ -67,19 +67,19 @@ struct arm_pmu { cpumask_t active_irqs; char *name; irqreturn_t (*handle_irq)(int irq_num, void *dev); - void (*enable)(struct hw_perf_event *evt, int idx); - void (*disable)(struct hw_perf_event *evt, int idx); + void (*enable)(struct perf_event *event); + void (*disable)(struct perf_event *event); int (*get_event_idx)(struct pmu_hw_events *hw_events, - struct hw_perf_event *hwc); + struct perf_event *event); int (*set_event_filter)(struct hw_perf_event *evt, struct perf_event_attr *attr); - u32 (*read_counter)(int idx); - void (*write_counter)(int idx, u32 val); - void (*start)(void); - void (*stop)(void); + u32 (*read_counter)(struct perf_event *event); + void (*write_counter)(struct perf_event *event, u32 val); + void (*start)(struct arm_pmu *); + void (*stop)(struct arm_pmu *); void (*reset)(void *); - int (*request_irq)(irq_handler_t handler); - void (*free_irq)(void); + int (*request_irq)(struct arm_pmu *, irq_handler_t handler); + void (*free_irq)(struct arm_pmu *); int (*map_event)(struct perf_event *event); int num_events; atomic_t active_events; @@ -93,15 +93,11 @@ struct arm_pmu { extern const struct dev_pm_ops armpmu_dev_pm_ops; -int armpmu_register(struct arm_pmu *armpmu, char *name, int type); +int armpmu_register(struct arm_pmu *armpmu, int type); -u64 armpmu_event_update(struct perf_event *event, - struct hw_perf_event *hwc, - int idx); +u64 armpmu_event_update(struct perf_event *event); -int armpmu_event_set_period(struct perf_event *event, - struct hw_perf_event *hwc, - int idx); +int armpmu_event_set_period(struct perf_event *event); int armpmu_map_event(struct perf_event *event, const unsigned (*event_map)[PERF_COUNT_HW_MAX], diff --git a/arch/arm/include/asm/prom.h b/arch/arm/include/asm/prom.h index aeae9c609df..8dd51dc1a36 100644 --- a/arch/arm/include/asm/prom.h +++ b/arch/arm/include/asm/prom.h @@ -15,6 +15,7 @@ extern struct machine_desc *setup_machine_fdt(unsigned int dt_phys); extern void arm_dt_memblock_reserve(void); +extern void __init arm_dt_init_cpu_maps(void); #else /* CONFIG_OF */ @@ -24,6 +25,7 @@ static inline struct machine_desc *setup_machine_fdt(unsigned int dt_phys) } static inline void arm_dt_memblock_reserve(void) { } +static inline void arm_dt_init_cpu_maps(void) { } #endif /* CONFIG_OF */ #endif /* ASMARM_PROM_H */ diff --git a/arch/arm/include/asm/smp_plat.h b/arch/arm/include/asm/smp_plat.h index 558d6c80aca..aaa61b6f50f 100644 --- a/arch/arm/include/asm/smp_plat.h +++ b/arch/arm/include/asm/smp_plat.h @@ -5,6 +5,9 @@ #ifndef __ASMARM_SMP_PLAT_H #define __ASMARM_SMP_PLAT_H +#include <linux/cpumask.h> +#include <linux/err.h> + #include <asm/cputype.h> /* @@ -48,5 +51,19 @@ static inline int cache_ops_need_broadcast(void) */ extern int __cpu_logical_map[]; #define cpu_logical_map(cpu) __cpu_logical_map[cpu] +/* + * Retrieve logical cpu index corresponding to a given MPIDR[23:0] + * - mpidr: MPIDR[23:0] to be used for the look-up + * + * Returns the cpu logical index or -EINVAL on look-up error + */ +static inline int get_logical_index(u32 mpidr) +{ + int cpu; + for (cpu = 0; cpu < nr_cpu_ids; cpu++) + if (cpu_logical_map(cpu) == mpidr) + return cpu; + return -EINVAL; +} #endif diff --git a/arch/arm/kernel/devtree.c b/arch/arm/kernel/devtree.c index bee7f9d47f0..70f1bdeb241 100644 --- a/arch/arm/kernel/devtree.c +++ b/arch/arm/kernel/devtree.c @@ -19,8 +19,10 @@ #include <linux/of_irq.h> #include <linux/of_platform.h> +#include <asm/cputype.h> #include <asm/setup.h> #include <asm/page.h> +#include <asm/smp_plat.h> #include <asm/mach/arch.h> #include <asm/mach-types.h> @@ -61,6 +63,108 @@ void __init arm_dt_memblock_reserve(void) } } +/* + * arm_dt_init_cpu_maps - Function retrieves cpu nodes from the device tree + * and builds the cpu logical map array containing MPIDR values related to + * logical cpus + * + * Updates the cpu possible mask with the number of parsed cpu nodes + */ +void __init arm_dt_init_cpu_maps(void) +{ + /* + * Temp logical map is initialized with UINT_MAX values that are + * considered invalid logical map entries since the logical map must + * contain a list of MPIDR[23:0] values where MPIDR[31:24] must + * read as 0. + */ + struct device_node *cpu, *cpus; + u32 i, j, cpuidx = 1; + u32 mpidr = is_smp() ? read_cpuid_mpidr() & MPIDR_HWID_BITMASK : 0; + + u32 tmp_map[NR_CPUS] = { [0 ... NR_CPUS-1] = UINT_MAX }; + bool bootcpu_valid = false; + cpus = of_find_node_by_path("/cpus"); + + if (!cpus) + return; + + for_each_child_of_node(cpus, cpu) { + u32 hwid; + + pr_debug(" * %s...\n", cpu->full_name); + /* + * A device tree containing CPU nodes with missing "reg" + * properties is considered invalid to build the + * cpu_logical_map. + */ + if (of_property_read_u32(cpu, "reg", &hwid)) { + pr_debug(" * %s missing reg property\n", + cpu->full_name); + return; + } + + /* + * 8 MSBs must be set to 0 in the DT since the reg property + * defines the MPIDR[23:0]. + */ + if (hwid & ~MPIDR_HWID_BITMASK) + return; + + /* + * Duplicate MPIDRs are a recipe for disaster. + * Scan all initialized entries and check for + * duplicates. If any is found just bail out. + * temp values were initialized to UINT_MAX + * to avoid matching valid MPIDR[23:0] values. + */ + for (j = 0; j < cpuidx; j++) + if (WARN(tmp_map[j] == hwid, "Duplicate /cpu reg " + "properties in the DT\n")) + return; + + /* + * Build a stashed array of MPIDR values. Numbering scheme + * requires that if detected the boot CPU must be assigned + * logical id 0. Other CPUs get sequential indexes starting + * from 1. If a CPU node with a reg property matching the + * boot CPU MPIDR is detected, this is recorded so that the + * logical map built from DT is validated and can be used + * to override the map created in smp_setup_processor_id(). + */ + if (hwid == mpidr) { + i = 0; + bootcpu_valid = true; + } else { + i = cpuidx++; + } + + if (WARN(cpuidx > nr_cpu_ids, "DT /cpu %u nodes greater than " + "max cores %u, capping them\n", + cpuidx, nr_cpu_ids)) { + cpuidx = nr_cpu_ids; + break; + } + + tmp_map[i] = hwid; + } + + if (WARN(!bootcpu_valid, "DT missing boot CPU MPIDR[23:0], " + "fall back to default cpu_logical_map\n")) + return; + + /* + * Since the boot CPU node contains proper data, and all nodes have + * a reg property, the DT CPU list can be considered valid and the + * logical map created in smp_setup_processor_id() can be overridden + */ + for (i = 0; i < cpuidx; i++) { + set_cpu_possible(i, true); + cpu_logical_map(i) = tmp_map[i]; + pr_debug("cpu logical map 0x%x\n", cpu_logical_map(i)); + } +} + /** * setup_machine_fdt - Machine setup when an dtb was passed to the kernel * @dt_phys: physical address of dt blob diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c index 281bf330124..5ff2e77782b 100644 --- a/arch/arm/kernel/hw_breakpoint.c +++ b/arch/arm/kernel/hw_breakpoint.c @@ -52,14 +52,14 @@ static u8 debug_arch; /* Maximum supported watchpoint length. */ static u8 max_watchpoint_len; -#define READ_WB_REG_CASE(OP2, M, VAL) \ - case ((OP2 << 4) + M): \ - ARM_DBG_READ(c ## M, OP2, VAL); \ +#define READ_WB_REG_CASE(OP2, M, VAL) \ + case ((OP2 << 4) + M): \ + ARM_DBG_READ(c0, c ## M, OP2, VAL); \ break -#define WRITE_WB_REG_CASE(OP2, M, VAL) \ - case ((OP2 << 4) + M): \ - ARM_DBG_WRITE(c ## M, OP2, VAL);\ +#define WRITE_WB_REG_CASE(OP2, M, VAL) \ + case ((OP2 << 4) + M): \ + ARM_DBG_WRITE(c0, c ## M, OP2, VAL); \ break #define GEN_READ_WB_REG_CASES(OP2, VAL) \ @@ -136,12 +136,12 @@ static u8 get_debug_arch(void) /* Do we implement the extended CPUID interface? */ if (((read_cpuid_id() >> 16) & 0xf) != 0xf) { - pr_warning("CPUID feature registers not supported. " - "Assuming v6 debug is present.\n"); + pr_warn_once("CPUID feature registers not supported. " + "Assuming v6 debug is present.\n"); return ARM_DEBUG_ARCH_V6; } - ARM_DBG_READ(c0, 0, didr); + ARM_DBG_READ(c0, c0, 0, didr); return (didr >> 16) & 0xf; } @@ -169,7 +169,7 @@ static int debug_exception_updates_fsr(void) static int get_num_wrp_resources(void) { u32 didr; - ARM_DBG_READ(c0, 0, didr); + ARM_DBG_READ(c0, c0, 0, didr); return ((didr >> 28) & 0xf) + 1; } @@ -177,7 +177,7 @@ static int get_num_wrp_resources(void) static int get_num_brp_resources(void) { u32 didr; - ARM_DBG_READ(c0, 0, didr); + ARM_DBG_READ(c0, c0, 0, didr); return ((didr >> 24) & 0xf) + 1; } @@ -228,19 +228,17 @@ static int get_num_brps(void) * be put into halting debug mode at any time by an external debugger * but there is nothing we can do to prevent that. */ -static int enable_monitor_mode(void) +static int monitor_mode_enabled(void) { u32 dscr; - int ret = 0; - - ARM_DBG_READ(c1, 0, dscr); + ARM_DBG_READ(c0, c1, 0, dscr); + return !!(dscr & ARM_DSCR_MDBGEN); +} - /* Ensure that halting mode is disabled. */ - if (WARN_ONCE(dscr & ARM_DSCR_HDBGEN, - "halting debug mode enabled. Unable to access hardware resources.\n")) { - ret = -EPERM; - goto out; - } +static int enable_monitor_mode(void) +{ + u32 dscr; + ARM_DBG_READ(c0, c1, 0, dscr); /* If monitor mode is already enabled, just return. */ if (dscr & ARM_DSCR_MDBGEN) @@ -250,24 +248,27 @@ static int enable_monitor_mode(void) switch (get_debug_arch()) { case ARM_DEBUG_ARCH_V6: case ARM_DEBUG_ARCH_V6_1: - ARM_DBG_WRITE(c1, 0, (dscr | ARM_DSCR_MDBGEN)); + ARM_DBG_WRITE(c0, c1, 0, (dscr | ARM_DSCR_MDBGEN)); break; case ARM_DEBUG_ARCH_V7_ECP14: case ARM_DEBUG_ARCH_V7_1: - ARM_DBG_WRITE(c2, 2, (dscr | ARM_DSCR_MDBGEN)); + ARM_DBG_WRITE(c0, c2, 2, (dscr | ARM_DSCR_MDBGEN)); + isb(); break; default: - ret = -ENODEV; - goto out; + return -ENODEV; } /* Check that the write made it through. */ - ARM_DBG_READ(c1, 0, dscr); - if (!(dscr & ARM_DSCR_MDBGEN)) - ret = -EPERM; + ARM_DBG_READ(c0, c1, 0, dscr); + if (!(dscr & ARM_DSCR_MDBGEN)) { + pr_warn_once("Failed to enable monitor mode on CPU %d.\n", + smp_processor_id()); + return -EPERM; + } out: - return ret; + return 0; } int hw_breakpoint_slots(int type) @@ -328,14 +329,9 @@ int arch_install_hw_breakpoint(struct perf_event *bp) { struct arch_hw_breakpoint *info = counter_arch_bp(bp); struct perf_event **slot, **slots; - int i, max_slots, ctrl_base, val_base, ret = 0; + int i, max_slots, ctrl_base, val_base; u32 addr, ctrl; - /* Ensure that we are in monitor mode and halting mode is disabled. */ - ret = enable_monitor_mode(); - if (ret) - goto out; - addr = info->address; ctrl = encode_ctrl_reg(info->ctrl) | 0x1; @@ -362,9 +358,9 @@ int arch_install_hw_breakpoint(struct perf_event *bp) } } - if (WARN_ONCE(i == max_slots, "Can't find any breakpoint slot\n")) { - ret = -EBUSY; - goto out; + if (i == max_slots) { + pr_warning("Can't find any breakpoint slot\n"); + return -EBUSY; } /* Override the breakpoint data with the step data. */ @@ -383,9 +379,7 @@ int arch_install_hw_breakpoint(struct perf_event *bp) /* Setup the control register. */ write_wb_reg(ctrl_base + i, ctrl); - -out: - return ret; + return 0; } void arch_uninstall_hw_breakpoint(struct perf_event *bp) @@ -416,8 +410,10 @@ void arch_uninstall_hw_breakpoint(struct perf_event *bp) } } - if (WARN_ONCE(i == max_slots, "Can't find any breakpoint slot\n")) + if (i == max_slots) { + pr_warning("Can't find any breakpoint slot\n"); return; + } /* Ensure that we disable the mismatch breakpoint. */ if (info->ctrl.type != ARM_BREAKPOINT_EXECUTE && @@ -596,6 +592,10 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp) int ret = 0; u32 offset, alignment_mask = 0x3; + /* Ensure that we are in monitor debug mode. */ + if (!monitor_mode_enabled()) + return -ENODEV; + /* Build the arch_hw_breakpoint. */ ret = arch_build_bp_info(bp); if (ret) @@ -858,7 +858,7 @@ static int hw_breakpoint_pending(unsigned long addr, unsigned int fsr, local_irq_enable(); /* We only handle watchpoints and hardware breakpoints. */ - ARM_DBG_READ(c1, 0, dscr); + ARM_DBG_READ(c0, c1, 0, dscr); /* Perform perf callbacks. */ switch (ARM_DSCR_MOE(dscr)) { @@ -906,7 +906,7 @@ static struct undef_hook debug_reg_hook = { static void reset_ctrl_regs(void *unused) { int i, raw_num_brps, err = 0, cpu = smp_processor_id(); - u32 dbg_power; + u32 val; /* * v7 debug contains save and restore registers so that debug state @@ -919,23 +919,30 @@ static void reset_ctrl_regs(void *unused) switch (debug_arch) { case ARM_DEBUG_ARCH_V6: case ARM_DEBUG_ARCH_V6_1: - /* ARMv6 cores just need to reset the registers. */ - goto reset_regs; + /* ARMv6 cores clear the registers out of reset. */ + goto out_mdbgen; case ARM_DEBUG_ARCH_V7_ECP14: /* * Ensure sticky power-down is clear (i.e. debug logic is * powered up). */ - asm volatile("mrc p14, 0, %0, c1, c5, 4" : "=r" (dbg_power)); - if ((dbg_power & 0x1) == 0) + ARM_DBG_READ(c1, c5, 4, val); + if ((val & 0x1) == 0) err = -EPERM; + + /* + * Check whether we implement OS save and restore. + */ + ARM_DBG_READ(c1, c1, 4, val); + if ((val & 0x9) == 0) + goto clear_vcr; break; case ARM_DEBUG_ARCH_V7_1: /* * Ensure the OS double lock is clear. */ - asm volatile("mrc p14, 0, %0, c1, c3, 4" : "=r" (dbg_power)); - if ((dbg_power & 0x1) == 1) + ARM_DBG_READ(c1, c3, 4, val); + if ((val & 0x1) == 1) err = -EPERM; break; } @@ -947,24 +954,29 @@ static void reset_ctrl_regs(void *unused) } /* - * Unconditionally clear the lock by writing a value + * Unconditionally clear the OS lock by writing a value * other than 0xC5ACCE55 to the access register. */ - asm volatile("mcr p14, 0, %0, c1, c0, 4" : : "r" (0)); + ARM_DBG_WRITE(c1, c0, 4, 0); isb(); /* * Clear any configured vector-catch events before * enabling monitor mode. */ - asm volatile("mcr p14, 0, %0, c0, c7, 0" : : "r" (0)); +clear_vcr: + ARM_DBG_WRITE(c0, c7, 0, 0); isb(); -reset_regs: - if (enable_monitor_mode()) + if (cpumask_intersects(&debug_err_mask, cpumask_of(cpu))) { + pr_warning("CPU %d failed to disable vector catch\n", cpu); return; + } - /* We must also reset any reserved registers. */ + /* + * The control/value register pairs are UNKNOWN out of reset so + * clear them to avoid spurious debug events. + */ raw_num_brps = get_num_brp_resources(); for (i = 0; i < raw_num_brps; ++i) { write_wb_reg(ARM_BASE_BCR + i, 0UL); @@ -975,6 +987,19 @@ reset_regs: write_wb_reg(ARM_BASE_WCR + i, 0UL); write_wb_reg(ARM_BASE_WVR + i, 0UL); } + + if (cpumask_intersects(&debug_err_mask, cpumask_of(cpu))) { + pr_warning("CPU %d failed to clear debug register pairs\n", cpu); + return; + } + + /* + * Have a crack at enabling monitor mode. We don't actually need + * it yet, but reporting an error early is useful if it fails. + */ +out_mdbgen: + if (enable_monitor_mode()) + cpumask_or(&debug_err_mask, &debug_err_mask, cpumask_of(cpu)); } static int __cpuinit dbg_reset_notify(struct notifier_block *self, @@ -992,8 +1017,6 @@ static struct notifier_block __cpuinitdata dbg_reset_nb = { static int __init arch_hw_breakpoint_init(void) { - u32 dscr; - debug_arch = get_debug_arch(); if (!debug_arch_supported()) { @@ -1028,17 +1051,10 @@ static int __init arch_hw_breakpoint_init(void) core_num_brps, core_has_mismatch_brps() ? "(+1 reserved) " : "", core_num_wrps); - ARM_DBG_READ(c1, 0, dscr); - if (dscr & ARM_DSCR_HDBGEN) { - max_watchpoint_len = 4; - pr_warning("halting debug mode enabled. Assuming maximum watchpoint size of %u bytes.\n", - max_watchpoint_len); - } else { - /* Work out the maximum supported watchpoint length. */ - max_watchpoint_len = get_max_wp_len(); - pr_info("maximum watchpoint size is %u bytes.\n", - max_watchpoint_len); - } + /* Work out the maximum supported watchpoint length. */ + max_watchpoint_len = get_max_wp_len(); + pr_info("maximum watchpoint size is %u bytes.\n", + max_watchpoint_len); /* Register debug fault handler. */ hook_fault_code(FAULT_CODE_DEBUG, hw_breakpoint_pending, SIGTRAP, diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index 53c0304b734..f9e8657dd24 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -86,12 +86,10 @@ armpmu_map_event(struct perf_event *event, return -ENOENT; } -int -armpmu_event_set_period(struct perf_event *event, - struct hw_perf_event *hwc, - int idx) +int armpmu_event_set_period(struct perf_event *event) { struct arm_pmu *armpmu = to_arm_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; s64 left = local64_read(&hwc->period_left); s64 period = hwc->sample_period; int ret = 0; @@ -119,24 +117,22 @@ armpmu_event_set_period(struct perf_event *event, local64_set(&hwc->prev_count, (u64)-left); - armpmu->write_counter(idx, (u64)(-left) & 0xffffffff); + armpmu->write_counter(event, (u64)(-left) & 0xffffffff); perf_event_update_userpage(event); return ret; } -u64 -armpmu_event_update(struct perf_event *event, - struct hw_perf_event *hwc, - int idx) +u64 armpmu_event_update(struct perf_event *event) { struct arm_pmu *armpmu = to_arm_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; u64 delta, prev_raw_count, new_raw_count; again: prev_raw_count = local64_read(&hwc->prev_count); - new_raw_count = armpmu->read_counter(idx); + new_raw_count = armpmu->read_counter(event); if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, new_raw_count) != prev_raw_count) @@ -159,7 +155,7 @@ armpmu_read(struct perf_event *event) if (hwc->idx < 0) return; - armpmu_event_update(event, hwc, hwc->idx); + armpmu_event_update(event); } static void @@ -173,14 +169,13 @@ armpmu_stop(struct perf_event *event, int flags) * PERF_EF_UPDATE, see comments in armpmu_start(). */ if (!(hwc->state & PERF_HES_STOPPED)) { - armpmu->disable(hwc, hwc->idx); - armpmu_event_update(event, hwc, hwc->idx); + armpmu->disable(event); + armpmu_event_update(event); hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE; } } -static void -armpmu_start(struct perf_event *event, int flags) +static void armpmu_start(struct perf_event *event, int flags) { struct arm_pmu *armpmu = to_arm_pmu(event->pmu); struct hw_perf_event *hwc = &event->hw; @@ -200,8 +195,8 @@ armpmu_start(struct perf_event *event, int flags) * get an interrupt too soon or *way* too late if the overflow has * happened since disabling. */ - armpmu_event_set_period(event, hwc, hwc->idx); - armpmu->enable(hwc, hwc->idx); + armpmu_event_set_period(event); + armpmu->enable(event); } static void @@ -233,7 +228,7 @@ armpmu_add(struct perf_event *event, int flags) perf_pmu_disable(event->pmu); /* If we don't have a space for the counter then finish early. */ - idx = armpmu->get_event_idx(hw_events, hwc); + idx = armpmu->get_event_idx(hw_events, event); if (idx < 0) { err = idx; goto out; @@ -244,7 +239,7 @@ armpmu_add(struct perf_event *event, int flags) * sure it is disabled. */ event->hw.idx = idx; - armpmu->disable(hwc, idx); + armpmu->disable(event); hw_events->events[idx] = event; hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE; @@ -264,13 +259,12 @@ validate_event(struct pmu_hw_events *hw_events, struct perf_event *event) { struct arm_pmu *armpmu = to_arm_pmu(event->pmu); - struct hw_perf_event fake_event = event->hw; struct pmu *leader_pmu = event->group_leader->pmu; if (event->pmu != leader_pmu || event->state <= PERF_EVENT_STATE_OFF) return 1; - return armpmu->get_event_idx(hw_events, &fake_event) >= 0; + return armpmu->get_event_idx(hw_events, event) >= 0; } static int @@ -316,7 +310,7 @@ static irqreturn_t armpmu_dispatch_irq(int irq, void *dev) static void armpmu_release_hardware(struct arm_pmu *armpmu) { - armpmu->free_irq(); + armpmu->free_irq(armpmu); pm_runtime_put_sync(&armpmu->plat_device->dev); } @@ -330,7 +324,7 @@ armpmu_reserve_hardware(struct arm_pmu *armpmu) return -ENODEV; pm_runtime_get_sync(&pmu_device->dev); - err = armpmu->request_irq(armpmu_dispatch_irq); + err = armpmu->request_irq(armpmu, armpmu_dispatch_irq); if (err) { armpmu_release_hardware(armpmu); return err; @@ -465,13 +459,13 @@ static void armpmu_enable(struct pmu *pmu) int enabled = bitmap_weight(hw_events->used_mask, armpmu->num_events); if (enabled) - armpmu->start(); + armpmu->start(armpmu); } static void armpmu_disable(struct pmu *pmu) { struct arm_pmu *armpmu = to_arm_pmu(pmu); - armpmu->stop(); + armpmu->stop(armpmu); } #ifdef CONFIG_PM_RUNTIME @@ -517,12 +511,13 @@ static void __init armpmu_init(struct arm_pmu *armpmu) }; } -int armpmu_register(struct arm_pmu *armpmu, char *name, int type) +int armpmu_register(struct arm_pmu *armpmu, int type) { armpmu_init(armpmu); + pm_runtime_enable(&armpmu->plat_device->dev); pr_info("enabled with %s PMU driver, %d counters available\n", armpmu->name, armpmu->num_events); - return perf_pmu_register(&armpmu->pmu, name, type); + return perf_pmu_register(&armpmu->pmu, armpmu->name, type); } /* @@ -576,6 +571,10 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) { struct frame_tail __user *tail; + if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { + /* We don't support guest os callchain now */ + return; + } tail = (struct frame_tail __user *)regs->ARM_fp - 1; @@ -603,9 +602,41 @@ perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs) { struct stackframe fr; + if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { + /* We don't support guest os callchain now */ + return; + } + fr.fp = regs->ARM_fp; fr.sp = regs->ARM_sp; fr.lr = regs->ARM_lr; fr.pc = regs->ARM_pc; walk_stackframe(&fr, callchain_trace, entry); } + +unsigned long perf_instruction_pointer(struct pt_regs *regs) +{ + if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) + return perf_guest_cbs->get_guest_ip(); + + return instruction_pointer(regs); +} + +unsigned long perf_misc_flags(struct pt_regs *regs) +{ + int misc = 0; + + if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { + if (perf_guest_cbs->is_user_mode()) + misc |= PERF_RECORD_MISC_GUEST_USER; + else + misc |= PERF_RECORD_MISC_GUEST_KERNEL; + } else { + if (user_mode(regs)) + misc |= PERF_RECORD_MISC_USER; + else + misc |= PERF_RECORD_MISC_KERNEL; + } + + return misc; +} diff --git a/arch/arm/kernel/perf_event_cpu.c b/arch/arm/kernel/perf_event_cpu.c index 8d7d8d4de9d..9a4f6307a01 100644 --- a/arch/arm/kernel/perf_event_cpu.c +++ b/arch/arm/kernel/perf_event_cpu.c @@ -23,6 +23,7 @@ #include <linux/kernel.h> #include <linux/of.h> #include <linux/platform_device.h> +#include <linux/slab.h> #include <linux/spinlock.h> #include <asm/cputype.h> @@ -45,7 +46,7 @@ const char *perf_pmu_name(void) if (!cpu_pmu) return NULL; - return cpu_pmu->pmu.name; + return cpu_pmu->name; } EXPORT_SYMBOL_GPL(perf_pmu_name); @@ -70,7 +71,7 @@ static struct pmu_hw_events *cpu_pmu_get_cpu_events(void) return &__get_cpu_var(cpu_hw_events); } -static void cpu_pmu_free_irq(void) +static void cpu_pmu_free_irq(struct arm_pmu *cpu_pmu) { int i, irq, irqs; struct platform_device *pmu_device = cpu_pmu->plat_device; @@ -86,7 +87,7 @@ static void cpu_pmu_free_irq(void) } } -static int cpu_pmu_request_irq(irq_handler_t handler) +static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler) { int i, err, irq, irqs; struct platform_device *pmu_device = cpu_pmu->plat_device; @@ -147,7 +148,7 @@ static void __devinit cpu_pmu_init(struct arm_pmu *cpu_pmu) /* Ensure the PMU has sane values out of reset. */ if (cpu_pmu && cpu_pmu->reset) - on_each_cpu(cpu_pmu->reset, NULL, 1); + on_each_cpu(cpu_pmu->reset, cpu_pmu, 1); } /* @@ -163,7 +164,9 @@ static int __cpuinit cpu_pmu_notify(struct notifier_block *b, return NOTIFY_DONE; if (cpu_pmu && cpu_pmu->reset) - cpu_pmu->reset(NULL); + cpu_pmu->reset(cpu_pmu); + else + return NOTIFY_DONE; return NOTIFY_OK; } @@ -195,13 +198,13 @@ static struct platform_device_id __devinitdata cpu_pmu_plat_device_ids[] = { /* * CPU PMU identification and probing. */ -static struct arm_pmu *__devinit probe_current_pmu(void) +static int __devinit probe_current_pmu(struct arm_pmu *pmu) { - struct arm_pmu *pmu = NULL; int cpu = get_cpu(); unsigned long cpuid = read_cpuid_id(); unsigned long implementor = (cpuid & 0xFF000000) >> 24; unsigned long part_number = (cpuid & 0xFFF0); + int ret = -ENODEV; pr_info("probing PMU on CPU %d\n", cpu); @@ -211,25 +214,25 @@ static struct arm_pmu *__devinit probe_current_pmu(void) case 0xB360: /* ARM1136 */ case 0xB560: /* ARM1156 */ case 0xB760: /* ARM1176 */ - pmu = armv6pmu_init(); + ret = armv6pmu_init(pmu); break; case 0xB020: /* ARM11mpcore */ - pmu = armv6mpcore_pmu_init(); + ret = armv6mpcore_pmu_init(pmu); break; case 0xC080: /* Cortex-A8 */ - pmu = armv7_a8_pmu_init(); + ret = armv7_a8_pmu_init(pmu); break; case 0xC090: /* Cortex-A9 */ - pmu = armv7_a9_pmu_init(); + ret = armv7_a9_pmu_init(pmu); break; case 0xC050: /* Cortex-A5 */ - pmu = armv7_a5_pmu_init(); + ret = armv7_a5_pmu_init(pmu); break; case 0xC0F0: /* Cortex-A15 */ - pmu = armv7_a15_pmu_init(); + ret = armv7_a15_pmu_init(pmu); break; case 0xC070: /* Cortex-A7 */ - pmu = armv7_a7_pmu_init(); + ret = armv7_a7_pmu_init(pmu); break; } /* Intel CPUs [xscale]. */ @@ -237,43 +240,54 @@ static struct arm_pmu *__devinit probe_current_pmu(void) part_number = (cpuid >> 13) & 0x7; switch (part_number) { case 1: - pmu = xscale1pmu_init(); + ret = xscale1pmu_init(pmu); break; case 2: - pmu = xscale2pmu_init(); + ret = xscale2pmu_init(pmu); break; } } put_cpu(); - return pmu; + return ret; } static int __devinit cpu_pmu_device_probe(struct platform_device *pdev) { const struct of_device_id *of_id; - struct arm_pmu *(*init_fn)(void); + int (*init_fn)(struct arm_pmu *); struct device_node *node = pdev->dev.of_node; + struct arm_pmu *pmu; + int ret = -ENODEV; if (cpu_pmu) { pr_info("attempt to register multiple PMU devices!"); return -ENOSPC; } + pmu = kzalloc(sizeof(struct arm_pmu), GFP_KERNEL); + if (!pmu) { + pr_info("failed to allocate PMU device!"); + return -ENOMEM; + } + if (node && (of_id = of_match_node(cpu_pmu_of_device_ids, pdev->dev.of_node))) { init_fn = of_id->data; - cpu_pmu = init_fn(); + ret = init_fn(pmu); } else { - cpu_pmu = probe_current_pmu(); + ret = probe_current_pmu(pmu); } - if (!cpu_pmu) - return -ENODEV; + if (ret) { + pr_info("failed to register PMU devices!"); + kfree(pmu); + return ret; + } + cpu_pmu = pmu; cpu_pmu->plat_device = pdev; cpu_pmu_init(cpu_pmu); - register_cpu_notifier(&cpu_pmu_hotplug_notifier); - armpmu_register(cpu_pmu, cpu_pmu->name, PERF_TYPE_RAW); + armpmu_register(cpu_pmu, PERF_TYPE_RAW); return 0; } @@ -290,6 +304,16 @@ static struct platform_driver cpu_pmu_driver = { static int __init register_pmu_driver(void) { - return platform_driver_register(&cpu_pmu_driver); + int err; + + err = register_cpu_notifier(&cpu_pmu_hotplug_notifier); + if (err) + return err; + + err = platform_driver_register(&cpu_pmu_driver); + if (err) + unregister_cpu_notifier(&cpu_pmu_hotplug_notifier); + + return err; } device_initcall(register_pmu_driver); diff --git a/arch/arm/kernel/perf_event_v6.c b/arch/arm/kernel/perf_event_v6.c index 6ccc0797174..f3e22ff8b6a 100644 --- a/arch/arm/kernel/perf_event_v6.c +++ b/arch/arm/kernel/perf_event_v6.c @@ -401,9 +401,10 @@ armv6_pmcr_counter_has_overflowed(unsigned long pmcr, return ret; } -static inline u32 -armv6pmu_read_counter(int counter) +static inline u32 armv6pmu_read_counter(struct perf_event *event) { + struct hw_perf_event *hwc = &event->hw; + int counter = hwc->idx; unsigned long value = 0; if (ARMV6_CYCLE_COUNTER == counter) @@ -418,10 +419,11 @@ armv6pmu_read_counter(int counter) return value; } -static inline void -armv6pmu_write_counter(int counter, - u32 value) +static inline void armv6pmu_write_counter(struct perf_event *event, u32 value) { + struct hw_perf_event *hwc = &event->hw; + int counter = hwc->idx; + if (ARMV6_CYCLE_COUNTER == counter) asm volatile("mcr p15, 0, %0, c15, c12, 1" : : "r"(value)); else if (ARMV6_COUNTER0 == counter) @@ -432,12 +434,13 @@ armv6pmu_write_counter(int counter, WARN_ONCE(1, "invalid counter number (%d)\n", counter); } -static void -armv6pmu_enable_event(struct hw_perf_event *hwc, - int idx) +static void armv6pmu_enable_event(struct perf_event *event) { unsigned long val, mask, evt, flags; + struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; struct pmu_hw_events *events = cpu_pmu->get_hw_events(); + int idx = hwc->idx; if (ARMV6_CYCLE_COUNTER == idx) { mask = 0; @@ -473,7 +476,8 @@ armv6pmu_handle_irq(int irq_num, { unsigned long pmcr = armv6_pmcr_read(); struct perf_sample_data data; - struct pmu_hw_events *cpuc; + struct arm_pmu *cpu_pmu = (struct arm_pmu *)dev; + struct pmu_hw_events *cpuc = cpu_pmu->get_hw_events(); struct pt_regs *regs; int idx; @@ -489,7 +493,6 @@ armv6pmu_handle_irq(int irq_num, */ armv6_pmcr_write(pmcr); - cpuc = &__get_cpu_var(cpu_hw_events); for (idx = 0; idx < cpu_pmu->num_events; ++idx) { struct perf_event *event = cpuc->events[idx]; struct hw_perf_event *hwc; @@ -506,13 +509,13 @@ armv6pmu_handle_irq(int irq_num, continue; hwc = &event->hw; - armpmu_event_update(event, hwc, idx); + armpmu_event_update(event); perf_sample_data_init(&data, 0, hwc->last_period); - if (!armpmu_event_set_period(event, hwc, idx)) + if (!armpmu_event_set_period(event)) continue; if (perf_event_overflow(event, &data, regs)) - cpu_pmu->disable(hwc, idx); + cpu_pmu->disable(event); } /* @@ -527,8 +530,7 @@ armv6pmu_handle_irq(int irq_num, return IRQ_HANDLED; } -static void -armv6pmu_start(void) +static void armv6pmu_start(struct arm_pmu *cpu_pmu) { unsigned long flags, val; struct pmu_hw_events *events = cpu_pmu->get_hw_events(); @@ -540,8 +542,7 @@ armv6pmu_start(void) raw_spin_unlock_irqrestore(&events->pmu_lock, flags); } -static void -armv6pmu_stop(void) +static void armv6pmu_stop(struct arm_pmu *cpu_pmu) { unsigned long flags, val; struct pmu_hw_events *events = cpu_pmu->get_hw_events(); @@ -555,10 +556,11 @@ armv6pmu_stop(void) static int armv6pmu_get_event_idx(struct pmu_hw_events *cpuc, - struct hw_perf_event *event) + struct perf_event *event) { + struct hw_perf_event *hwc = &event->hw; /* Always place a cycle counter into the cycle counter. */ - if (ARMV6_PERFCTR_CPU_CYCLES == event->config_base) { + if (ARMV6_PERFCTR_CPU_CYCLES == hwc->config_base) { if (test_and_set_bit(ARMV6_CYCLE_COUNTER, cpuc->used_mask)) return -EAGAIN; @@ -579,12 +581,13 @@ armv6pmu_get_event_idx(struct pmu_hw_events *cpuc, } } -static void -armv6pmu_disable_event(struct hw_perf_event *hwc, - int idx) +static void armv6pmu_disable_event(struct perf_event *event) { unsigned long val, mask, evt, flags; + struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; struct pmu_hw_events *events = cpu_pmu->get_hw_events(); + int idx = hwc->idx; if (ARMV6_CYCLE_COUNTER == idx) { mask = ARMV6_PMCR_CCOUNT_IEN; @@ -613,12 +616,13 @@ armv6pmu_disable_event(struct hw_perf_event *hwc, raw_spin_unlock_irqrestore(&events->pmu_lock, flags); } -static void -armv6mpcore_pmu_disable_event(struct hw_perf_event *hwc, - int idx) +static void armv6mpcore_pmu_disable_event(struct perf_event *event) { unsigned long val, mask, flags, evt = 0; + struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; struct pmu_hw_events *events = cpu_pmu->get_hw_events(); + int idx = hwc->idx; if (ARMV6_CYCLE_COUNTER == idx) { mask = ARMV6_PMCR_CCOUNT_IEN; @@ -649,24 +653,22 @@ static int armv6_map_event(struct perf_event *event) &armv6_perf_cache_map, 0xFF); } -static struct arm_pmu armv6pmu = { - .name = "v6", - .handle_irq = armv6pmu_handle_irq, - .enable = armv6pmu_enable_event, - .disable = armv6pmu_disable_event, - .read_counter = armv6pmu_read_counter, - .write_counter = armv6pmu_write_counter, - .get_event_idx = armv6pmu_get_event_idx, - .start = armv6pmu_start, - .stop = armv6pmu_stop, - .map_event = armv6_map_event, - .num_events = 3, - .max_period = (1LLU << 32) - 1, -}; - -static struct arm_pmu *__devinit armv6pmu_init(void) +static int __devinit armv6pmu_init(struct arm_pmu *cpu_pmu) { - return &armv6pmu; + cpu_pmu->name = "v6"; + cpu_pmu->handle_irq = armv6pmu_handle_irq; + cpu_pmu->enable = armv6pmu_enable_event; + cpu_pmu->disable = armv6pmu_disable_event; + cpu_pmu->read_counter = armv6pmu_read_counter; + cpu_pmu->write_counter = armv6pmu_write_counter; + cpu_pmu->get_event_idx = armv6pmu_get_event_idx; + cpu_pmu->start = armv6pmu_start; + cpu_pmu->stop = armv6pmu_stop; + cpu_pmu->map_event = armv6_map_event; + cpu_pmu->num_events = 3; + cpu_pmu->max_period = (1LLU << 32) - 1; + + return 0; } /* @@ -683,33 +685,31 @@ static int armv6mpcore_map_event(struct perf_event *event) &armv6mpcore_perf_cache_map, 0xFF); } -static struct arm_pmu armv6mpcore_pmu = { - .name = "v6mpcore", - .handle_irq = armv6pmu_handle_irq, - .enable = armv6pmu_enable_event, - .disable = armv6mpcore_pmu_disable_event, - .read_counter = armv6pmu_read_counter, - .write_counter = armv6pmu_write_counter, - .get_event_idx = armv6pmu_get_event_idx, - .start = armv6pmu_start, - .stop = armv6pmu_stop, - .map_event = armv6mpcore_map_event, - .num_events = 3, - .max_period = (1LLU << 32) - 1, -}; - -static struct arm_pmu *__devinit armv6mpcore_pmu_init(void) +static int __devinit armv6mpcore_pmu_init(struct arm_pmu *cpu_pmu) { - return &armv6mpcore_pmu; + cpu_pmu->name = "v6mpcore"; + cpu_pmu->handle_irq = armv6pmu_handle_irq; + cpu_pmu->enable = armv6pmu_enable_event; + cpu_pmu->disable = armv6mpcore_pmu_disable_event; + cpu_pmu->read_counter = armv6pmu_read_counter; + cpu_pmu->write_counter = armv6pmu_write_counter; + cpu_pmu->get_event_idx = armv6pmu_get_event_idx; + cpu_pmu->start = armv6pmu_start; + cpu_pmu->stop = armv6pmu_stop; + cpu_pmu->map_event = armv6mpcore_map_event; + cpu_pmu->num_events = 3; + cpu_pmu->max_period = (1LLU << 32) - 1; + + return 0; } #else -static struct arm_pmu *__devinit armv6pmu_init(void) +static int armv6pmu_init(struct arm_pmu *cpu_pmu) { - return NULL; + return -ENODEV; } -static struct arm_pmu *__devinit armv6mpcore_pmu_init(void) +static int armv6mpcore_pmu_init(struct arm_pmu *cpu_pmu) { - return NULL; + return -ENODEV; } #endif /* CONFIG_CPU_V6 || CONFIG_CPU_V6K */ diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c index bd4b090ebcf..7d0cce85d17 100644 --- a/arch/arm/kernel/perf_event_v7.c +++ b/arch/arm/kernel/perf_event_v7.c @@ -18,8 +18,6 @@ #ifdef CONFIG_CPU_V7 -static struct arm_pmu armv7pmu; - /* * Common ARMv7 event types * @@ -738,7 +736,8 @@ static const unsigned armv7_a7_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] */ #define ARMV7_IDX_CYCLE_COUNTER 0 #define ARMV7_IDX_COUNTER0 1 -#define ARMV7_IDX_COUNTER_LAST (ARMV7_IDX_CYCLE_COUNTER + cpu_pmu->num_events - 1) +#define ARMV7_IDX_COUNTER_LAST(cpu_pmu) \ + (ARMV7_IDX_CYCLE_COUNTER + cpu_pmu->num_events - 1) #define ARMV7_MAX_COUNTERS 32 #define ARMV7_COUNTER_MASK (ARMV7_MAX_COUNTERS - 1) @@ -804,49 +803,34 @@ static inline int armv7_pmnc_has_overflowed(u32 pmnc) return pmnc & ARMV7_OVERFLOWED_MASK; } -static inline int armv7_pmnc_counter_valid(int idx) +static inline int armv7_pmnc_counter_valid(struct arm_pmu *cpu_pmu, int idx) { - return idx >= ARMV7_IDX_CYCLE_COUNTER && idx <= ARMV7_IDX_COUNTER_LAST; + return idx >= ARMV7_IDX_CYCLE_COUNTER && + idx <= ARMV7_IDX_COUNTER_LAST(cpu_pmu); } static inline int armv7_pmnc_counter_has_overflowed(u32 pmnc, int idx) { - int ret = 0; - u32 counter; - - if (!armv7_pmnc_counter_valid(idx)) { - pr_err("CPU%u checking wrong counter %d overflow status\n", - smp_processor_id(), idx); - } else { - counter = ARMV7_IDX_TO_COUNTER(idx); - ret = pmnc & BIT(counter); - } - - return ret; + return pmnc & BIT(ARMV7_IDX_TO_COUNTER(idx)); } static inline int armv7_pmnc_select_counter(int idx) { - u32 counter; - - if (!armv7_pmnc_counter_valid(idx)) { - pr_err("CPU%u selecting wrong PMNC counter %d\n", - smp_processor_id(), idx); - return -EINVAL; - } - - counter = ARMV7_IDX_TO_COUNTER(idx); + u32 counter = ARMV7_IDX_TO_COUNTER(idx); asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (counter)); isb(); return idx; } -static inline u32 armv7pmu_read_counter(int idx) +static inline u32 armv7pmu_read_counter(struct perf_event *event) { + struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; u32 value = 0; - if (!armv7_pmnc_counter_valid(idx)) + if (!armv7_pmnc_counter_valid(cpu_pmu, idx)) pr_err("CPU%u reading wrong counter %d\n", smp_processor_id(), idx); else if (idx == ARMV7_IDX_CYCLE_COUNTER) @@ -857,9 +841,13 @@ static inline u32 armv7pmu_read_counter(int idx) return value; } -static inline void armv7pmu_write_counter(int idx, u32 value) +static inline void armv7pmu_write_counter(struct perf_event *event, u32 value) { - if (!armv7_pmnc_counter_valid(idx)) + struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + + if (!armv7_pmnc_counter_valid(cpu_pmu, idx)) pr_err("CPU%u writing wrong counter %d\n", smp_processor_id(), idx); else if (idx == ARMV7_IDX_CYCLE_COUNTER) @@ -878,60 +866,28 @@ static inline void armv7_pmnc_write_evtsel(int idx, u32 val) static inline int armv7_pmnc_enable_counter(int idx) { - u32 counter; - - if (!armv7_pmnc_counter_valid(idx)) { - pr_err("CPU%u enabling wrong PMNC counter %d\n", - smp_processor_id(), idx); - return -EINVAL; - } - - counter = ARMV7_IDX_TO_COUNTER(idx); + u32 counter = ARMV7_IDX_TO_COUNTER(idx); asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (BIT(counter))); return idx; } static inline int armv7_pmnc_disable_counter(int idx) { - u32 counter; - - if (!armv7_pmnc_counter_valid(idx)) { - pr_err("CPU%u disabling wrong PMNC counter %d\n", - smp_processor_id(), idx); - return -EINVAL; - } - - counter = ARMV7_IDX_TO_COUNTER(idx); + u32 counter = ARMV7_IDX_TO_COUNTER(idx); asm volatile("mcr p15, 0, %0, c9, c12, 2" : : "r" (BIT(counter))); return idx; } static inline int armv7_pmnc_enable_intens(int idx) { - u32 counter; - - if (!armv7_pmnc_counter_valid(idx)) { - pr_err("CPU%u enabling wrong PMNC counter IRQ enable %d\n", - smp_processor_id(), idx); - return -EINVAL; - } - - counter = ARMV7_IDX_TO_COUNTER(idx); + u32 counter = ARMV7_IDX_TO_COUNTER(idx); asm volatile("mcr p15, 0, %0, c9, c14, 1" : : "r" (BIT(counter))); return idx; } static inline int armv7_pmnc_disable_intens(int idx) { - u32 counter; - - if (!armv7_pmnc_counter_valid(idx)) { - pr_err("CPU%u disabling wrong PMNC counter IRQ enable %d\n", - smp_processor_id(), idx); - return -EINVAL; - } - - counter = ARMV7_IDX_TO_COUNTER(idx); + u32 counter = ARMV7_IDX_TO_COUNTER(idx); asm volatile("mcr p15, 0, %0, c9, c14, 2" : : "r" (BIT(counter))); isb(); /* Clear the overflow flag in case an interrupt is pending. */ @@ -956,7 +912,7 @@ static inline u32 armv7_pmnc_getreset_flags(void) } #ifdef DEBUG -static void armv7_pmnc_dump_regs(void) +static void armv7_pmnc_dump_regs(struct arm_pmu *cpu_pmu) { u32 val; unsigned int cnt; @@ -981,7 +937,8 @@ static void armv7_pmnc_dump_regs(void) asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (val)); printk(KERN_INFO "CCNT =0x%08x\n", val); - for (cnt = ARMV7_IDX_COUNTER0; cnt <= ARMV7_IDX_COUNTER_LAST; cnt++) { + for (cnt = ARMV7_IDX_COUNTER0; + cnt <= ARMV7_IDX_COUNTER_LAST(cpu_pmu); cnt++) { armv7_pmnc_select_counter(cnt); asm volatile("mrc p15, 0, %0, c9, c13, 2" : "=r" (val)); printk(KERN_INFO "CNT[%d] count =0x%08x\n", @@ -993,10 +950,19 @@ static void armv7_pmnc_dump_regs(void) } #endif -static void armv7pmu_enable_event(struct hw_perf_event *hwc, int idx) +static void armv7pmu_enable_event(struct perf_event *event) { unsigned long flags; + struct hw_perf_event *hwc = &event->hw; + struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); struct pmu_hw_events *events = cpu_pmu->get_hw_events(); + int idx = hwc->idx; + + if (!armv7_pmnc_counter_valid(cpu_pmu, idx)) { + pr_err("CPU%u enabling wrong PMNC counter IRQ enable %d\n", + smp_processor_id(), idx); + return; + } /* * Enable counter and interrupt, and set the counter to count @@ -1014,7 +980,7 @@ static void armv7pmu_enable_event(struct hw_perf_event *hwc, int idx) * We only need to set the event for the cycle counter if we * have the ability to perform event filtering. */ - if (armv7pmu.set_event_filter || idx != ARMV7_IDX_CYCLE_COUNTER) + if (cpu_pmu->set_event_filter || idx != ARMV7_IDX_CYCLE_COUNTER) armv7_pmnc_write_evtsel(idx, hwc->config_base); /* @@ -1030,10 +996,19 @@ static void armv7pmu_enable_event(struct hw_perf_event *hwc, int idx) raw_spin_unlock_irqrestore(&events->pmu_lock, flags); } -static void armv7pmu_disable_event(struct hw_perf_event *hwc, int idx) +static void armv7pmu_disable_event(struct perf_event *event) { unsigned long flags; + struct hw_perf_event *hwc = &event->hw; + struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); struct pmu_hw_events *events = cpu_pmu->get_hw_events(); + int idx = hwc->idx; + + if (!armv7_pmnc_counter_valid(cpu_pmu, idx)) { + pr_err("CPU%u disabling wrong PMNC counter IRQ enable %d\n", + smp_processor_id(), idx); + return; + } /* * Disable counter and interrupt @@ -1057,7 +1032,8 @@ static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev) { u32 pmnc; struct perf_sample_data data; - struct pmu_hw_events *cpuc; + struct arm_pmu *cpu_pmu = (struct arm_pmu *)dev; + struct pmu_hw_events *cpuc = cpu_pmu->get_hw_events(); struct pt_regs *regs; int idx; @@ -1077,7 +1053,6 @@ static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev) */ regs = get_irq_regs(); - cpuc = &__get_cpu_var(cpu_hw_events); for (idx = 0; idx < cpu_pmu->num_events; ++idx) { struct perf_event *event = cpuc->events[idx]; struct hw_perf_event *hwc; @@ -1094,13 +1069,13 @@ static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev) continue; hwc = &event->hw; - armpmu_event_update(event, hwc, idx); + armpmu_event_update(event); perf_sample_data_init(&data, 0, hwc->last_period); - if (!armpmu_event_set_period(event, hwc, idx)) + if (!armpmu_event_set_period(event)) continue; if (perf_event_overflow(event, &data, regs)) - cpu_pmu->disable(hwc, idx); + cpu_pmu->disable(event); } /* @@ -1115,7 +1090,7 @@ static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev) return IRQ_HANDLED; } -static void armv7pmu_start(void) +static void armv7pmu_start(struct arm_pmu *cpu_pmu) { unsigned long flags; struct pmu_hw_events *events = cpu_pmu->get_hw_events(); @@ -1126,7 +1101,7 @@ static void armv7pmu_start(void) raw_spin_unlock_irqrestore(&events->pmu_lock, flags); } -static void armv7pmu_stop(void) +static void armv7pmu_stop(struct arm_pmu *cpu_pmu) { unsigned long flags; struct pmu_hw_events *events = cpu_pmu->get_hw_events(); @@ -1138,10 +1113,12 @@ static void armv7pmu_stop(void) } static int armv7pmu_get_event_idx(struct pmu_hw_events *cpuc, - struct hw_perf_event *event) + struct perf_event *event) { int idx; - unsigned long evtype = event->config_base & ARMV7_EVTYPE_EVENT; + struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + unsigned long evtype = hwc->config_base & ARMV7_EVTYPE_EVENT; /* Always place a cycle counter into the cycle counter. */ if (evtype == ARMV7_PERFCTR_CPU_CYCLES) { @@ -1192,11 +1169,14 @@ static int armv7pmu_set_event_filter(struct hw_perf_event *event, static void armv7pmu_reset(void *info) { + struct arm_pmu *cpu_pmu = (struct arm_pmu *)info; u32 idx, nb_cnt = cpu_pmu->num_events; /* The counter and interrupt enable registers are unknown at reset. */ - for (idx = ARMV7_IDX_CYCLE_COUNTER; idx < nb_cnt; ++idx) - armv7pmu_disable_event(NULL, idx); + for (idx = ARMV7_IDX_CYCLE_COUNTER; idx < nb_cnt; ++idx) { + armv7_pmnc_disable_counter(idx); + armv7_pmnc_disable_intens(idx); + } /* Initialize & Reset PMNC: C and P bits */ armv7_pmnc_write(ARMV7_PMNC_P | ARMV7_PMNC_C); @@ -1232,17 +1212,18 @@ static int armv7_a7_map_event(struct perf_event *event) &armv7_a7_perf_cache_map, 0xFF); } -static struct arm_pmu armv7pmu = { - .handle_irq = armv7pmu_handle_irq, - .enable = armv7pmu_enable_event, - .disable = armv7pmu_disable_event, - .read_counter = armv7pmu_read_counter, - .write_counter = armv7pmu_write_counter, - .get_event_idx = armv7pmu_get_event_idx, - .start = armv7pmu_start, - .stop = armv7pmu_stop, - .reset = armv7pmu_reset, - .max_period = (1LLU << 32) - 1, +static void armv7pmu_init(struct arm_pmu *cpu_pmu) +{ + cpu_pmu->handle_irq = armv7pmu_handle_irq; + cpu_pmu->enable = armv7pmu_enable_event; + cpu_pmu->disable = armv7pmu_disable_event; + cpu_pmu->read_counter = armv7pmu_read_counter; + cpu_pmu->write_counter = armv7pmu_write_counter; + cpu_pmu->get_event_idx = armv7pmu_get_event_idx; + cpu_pmu->start = armv7pmu_start; + cpu_pmu->stop = armv7pmu_stop; + cpu_pmu->reset = armv7pmu_reset; + cpu_pmu->max_period = (1LLU << 32) - 1; }; static u32 __devinit armv7_read_num_pmnc_events(void) @@ -1256,70 +1237,75 @@ static u32 __devinit armv7_read_num_pmnc_events(void) return nb_cnt + 1; } -static struct arm_pmu *__devinit armv7_a8_pmu_init(void) +static int __devinit armv7_a8_pmu_init(struct arm_pmu *cpu_pmu) { - armv7pmu.name = "ARMv7 Cortex-A8"; - armv7pmu.map_event = armv7_a8_map_event; - armv7pmu.num_events = armv7_read_num_pmnc_events(); - return &armv7pmu; + armv7pmu_init(cpu_pmu); + cpu_pmu->name = "ARMv7 Cortex-A8"; + cpu_pmu->map_event = armv7_a8_map_event; + cpu_pmu->num_events = armv7_read_num_pmnc_events(); + return 0; } -static struct arm_pmu *__devinit armv7_a9_pmu_init(void) +static int __devinit armv7_a9_pmu_init(struct arm_pmu *cpu_pmu) { - armv7pmu.name = "ARMv7 Cortex-A9"; - armv7pmu.map_event = armv7_a9_map_event; - armv7pmu.num_events = armv7_read_num_pmnc_events(); - return &armv7pmu; + armv7pmu_init(cpu_pmu); + cpu_pmu->name = "ARMv7 Cortex-A9"; + cpu_pmu->map_event = armv7_a9_map_event; + cpu_pmu->num_events = armv7_read_num_pmnc_events(); + return 0; } -static struct arm_pmu *__devinit armv7_a5_pmu_init(void) +static int __devinit armv7_a5_pmu_init(struct arm_pmu *cpu_pmu) { - armv7pmu.name = "ARMv7 Cortex-A5"; - armv7pmu.map_event = armv7_a5_map_event; - armv7pmu.num_events = armv7_read_num_pmnc_events(); - return &armv7pmu; + armv7pmu_init(cpu_pmu); + cpu_pmu->name = "ARMv7 Cortex-A5"; + cpu_pmu->map_event = armv7_a5_map_event; + cpu_pmu->num_events = armv7_read_num_pmnc_events(); + return 0; } -static struct arm_pmu *__devinit armv7_a15_pmu_init(void) +static int __devinit armv7_a15_pmu_init(struct arm_pmu *cpu_pmu) { - armv7pmu.name = "ARMv7 Cortex-A15"; - armv7pmu.map_event = armv7_a15_map_event; - armv7pmu.num_events = armv7_read_num_pmnc_events(); - armv7pmu.set_event_filter = armv7pmu_set_event_filter; - return &armv7pmu; + armv7pmu_init(cpu_pmu); + cpu_pmu->name = "ARMv7 Cortex-A15"; + cpu_pmu->map_event = armv7_a15_map_event; + cpu_pmu->num_events = armv7_read_num_pmnc_events(); + cpu_pmu->set_event_filter = armv7pmu_set_event_filter; + return 0; } -static struct arm_pmu *__devinit armv7_a7_pmu_init(void) +static int __devinit armv7_a7_pmu_init(struct arm_pmu *cpu_pmu) { - armv7pmu.name = "ARMv7 Cortex-A7"; - armv7pmu.map_event = armv7_a7_map_event; - armv7pmu.num_events = armv7_read_num_pmnc_events(); - armv7pmu.set_event_filter = armv7pmu_set_event_filter; - return &armv7pmu; + armv7pmu_init(cpu_pmu); + cpu_pmu->name = "ARMv7 Cortex-A7"; + cpu_pmu->map_event = armv7_a7_map_event; + cpu_pmu->num_events = armv7_read_num_pmnc_events(); + cpu_pmu->set_event_filter = armv7pmu_set_event_filter; + return 0; } #else -static struct arm_pmu *__devinit armv7_a8_pmu_init(void) +static inline int armv7_a8_pmu_init(struct arm_pmu *cpu_pmu) { - return NULL; + return -ENODEV; } -static struct arm_pmu *__devinit armv7_a9_pmu_init(void) +static inline int armv7_a9_pmu_init(struct arm_pmu *cpu_pmu) { - return NULL; + return -ENODEV; } -static struct arm_pmu *__devinit armv7_a5_pmu_init(void) +static inline int armv7_a5_pmu_init(struct arm_pmu *cpu_pmu) { - return NULL; + return -ENODEV; } -static struct arm_pmu *__devinit armv7_a15_pmu_init(void) +static inline int armv7_a15_pmu_init(struct arm_pmu *cpu_pmu) { - return NULL; + return -ENODEV; } -static struct arm_pmu *__devinit armv7_a7_pmu_init(void) +static inline int armv7_a7_pmu_init(struct arm_pmu *cpu_pmu) { - return NULL; + return -ENODEV; } #endif /* CONFIG_CPU_V7 */ diff --git a/arch/arm/kernel/perf_event_xscale.c b/arch/arm/kernel/perf_event_xscale.c index 426e19f380a..0c8265e53d5 100644 --- a/arch/arm/kernel/perf_event_xscale.c +++ b/arch/arm/kernel/perf_event_xscale.c @@ -224,7 +224,8 @@ xscale1pmu_handle_irq(int irq_num, void *dev) { unsigned long pmnc; struct perf_sample_data data; - struct pmu_hw_events *cpuc; + struct arm_pmu *cpu_pmu = (struct arm_pmu *)dev; + struct pmu_hw_events *cpuc = cpu_pmu->get_hw_events(); struct pt_regs *regs; int idx; @@ -248,7 +249,6 @@ xscale1pmu_handle_irq(int irq_num, void *dev) regs = get_irq_regs(); - cpuc = &__get_cpu_var(cpu_hw_events); for (idx = 0; idx < cpu_pmu->num_events; ++idx) { struct perf_event *event = cpuc->events[idx]; struct hw_perf_event *hwc; @@ -260,13 +260,13 @@ xscale1pmu_handle_irq(int irq_num, void *dev) continue; hwc = &event->hw; - armpmu_event_update(event, hwc, idx); + armpmu_event_update(event); perf_sample_data_init(&data, 0, hwc->last_period); - if (!armpmu_event_set_period(event, hwc, idx)) + if (!armpmu_event_set_period(event)) continue; if (perf_event_overflow(event, &data, regs)) - cpu_pmu->disable(hwc, idx); + cpu_pmu->disable(event); } irq_work_run(); @@ -280,11 +280,13 @@ xscale1pmu_handle_irq(int irq_num, void *dev) return IRQ_HANDLED; } -static void -xscale1pmu_enable_event(struct hw_perf_event *hwc, int idx) +static void xscale1pmu_enable_event(struct perf_event *event) { unsigned long val, mask, evt, flags; + struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; struct pmu_hw_events *events = cpu_pmu->get_hw_events(); + int idx = hwc->idx; switch (idx) { case XSCALE_CYCLE_COUNTER: @@ -314,11 +316,13 @@ xscale1pmu_enable_event(struct hw_perf_event *hwc, int idx) raw_spin_unlock_irqrestore(&events->pmu_lock, flags); } -static void -xscale1pmu_disable_event(struct hw_perf_event *hwc, int idx) +static void xscale1pmu_disable_event(struct perf_event *event) { unsigned long val, mask, evt, flags; + struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; struct pmu_hw_events *events = cpu_pmu->get_hw_events(); + int idx = hwc->idx; switch (idx) { case XSCALE_CYCLE_COUNTER: @@ -348,9 +352,10 @@ xscale1pmu_disable_event(struct hw_perf_event *hwc, int idx) static int xscale1pmu_get_event_idx(struct pmu_hw_events *cpuc, - struct hw_perf_event *event) + struct perf_event *event) { - if (XSCALE_PERFCTR_CCNT == event->config_base) { + struct hw_perf_event *hwc = &event->hw; + if (XSCALE_PERFCTR_CCNT == hwc->config_base) { if (test_and_set_bit(XSCALE_CYCLE_COUNTER, cpuc->used_mask)) return -EAGAIN; @@ -366,8 +371,7 @@ xscale1pmu_get_event_idx(struct pmu_hw_events *cpuc, } } -static void -xscale1pmu_start(void) +static void xscale1pmu_start(struct arm_pmu *cpu_pmu) { unsigned long flags, val; struct pmu_hw_events *events = cpu_pmu->get_hw_events(); @@ -379,8 +383,7 @@ xscale1pmu_start(void) raw_spin_unlock_irqrestore(&events->pmu_lock, flags); } -static void -xscale1pmu_stop(void) +static void xscale1pmu_stop(struct arm_pmu *cpu_pmu) { unsigned long flags, val; struct pmu_hw_events *events = cpu_pmu->get_hw_events(); @@ -392,9 +395,10 @@ xscale1pmu_stop(void) raw_spin_unlock_irqrestore(&events->pmu_lock, flags); } -static inline u32 -xscale1pmu_read_counter(int counter) +static inline u32 xscale1pmu_read_counter(struct perf_event *event) { + struct hw_perf_event *hwc = &event->hw; + int counter = hwc->idx; u32 val = 0; switch (counter) { @@ -412,9 +416,11 @@ xscale1pmu_read_counter(int counter) return val; } -static inline void -xscale1pmu_write_counter(int counter, u32 val) +static inline void xscale1pmu_write_counter(struct perf_event *event, u32 val) { + struct hw_perf_event *hwc = &event->hw; + int counter = hwc->idx; + switch (counter) { case XSCALE_CYCLE_COUNTER: asm volatile("mcr p14, 0, %0, c1, c0, 0" : : "r" (val)); @@ -434,24 +440,22 @@ static int xscale_map_event(struct perf_event *event) &xscale_perf_cache_map, 0xFF); } -static struct arm_pmu xscale1pmu = { - .name = "xscale1", - .handle_irq = xscale1pmu_handle_irq, - .enable = xscale1pmu_enable_event, - .disable = xscale1pmu_disable_event, - .read_counter = xscale1pmu_read_counter, - .write_counter = xscale1pmu_write_counter, - .get_event_idx = xscale1pmu_get_event_idx, - .start = xscale1pmu_start, - .stop = xscale1pmu_stop, - .map_event = xscale_map_event, - .num_events = 3, - .max_period = (1LLU << 32) - 1, -}; - -static struct arm_pmu *__devinit xscale1pmu_init(void) +static int __devinit xscale1pmu_init(struct arm_pmu *cpu_pmu) { - return &xscale1pmu; + cpu_pmu->name = "xscale1"; + cpu_pmu->handle_irq = xscale1pmu_handle_irq; + cpu_pmu->enable = xscale1pmu_enable_event; + cpu_pmu->disable = xscale1pmu_disable_event; + cpu_pmu->read_counter = xscale1pmu_read_counter; + cpu_pmu->write_counter = xscale1pmu_write_counter; + cpu_pmu->get_event_idx = xscale1pmu_get_event_idx; + cpu_pmu->start = xscale1pmu_start; + cpu_pmu->stop = xscale1pmu_stop; + cpu_pmu->map_event = xscale_map_event; + cpu_pmu->num_events = 3; + cpu_pmu->max_period = (1LLU << 32) - 1; + + return 0; } #define XSCALE2_OVERFLOWED_MASK 0x01f @@ -567,7 +571,8 @@ xscale2pmu_handle_irq(int irq_num, void *dev) { unsigned long pmnc, of_flags; struct perf_sample_data data; - struct pmu_hw_events *cpuc; + struct arm_pmu *cpu_pmu = (struct arm_pmu *)dev; + struct pmu_hw_events *cpuc = cpu_pmu->get_hw_events(); struct pt_regs *regs; int idx; @@ -585,7 +590,6 @@ xscale2pmu_handle_irq(int irq_num, void *dev) regs = get_irq_regs(); - cpuc = &__get_cpu_var(cpu_hw_events); for (idx = 0; idx < cpu_pmu->num_events; ++idx) { struct perf_event *event = cpuc->events[idx]; struct hw_perf_event *hwc; @@ -597,13 +601,13 @@ xscale2pmu_handle_irq(int irq_num, void *dev) continue; hwc = &event->hw; - armpmu_event_update(event, hwc, idx); + armpmu_event_update(event); perf_sample_data_init(&data, 0, hwc->last_period); - if (!armpmu_event_set_period(event, hwc, idx)) + if (!armpmu_event_set_period(event)) continue; if (perf_event_overflow(event, &data, regs)) - cpu_pmu->disable(hwc, idx); + cpu_pmu->disable(event); } irq_work_run(); @@ -617,11 +621,13 @@ xscale2pmu_handle_irq(int irq_num, void *dev) return IRQ_HANDLED; } -static void -xscale2pmu_enable_event(struct hw_perf_event *hwc, int idx) +static void xscale2pmu_enable_event(struct perf_event *event) { unsigned long flags, ien, evtsel; + struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; struct pmu_hw_events *events = cpu_pmu->get_hw_events(); + int idx = hwc->idx; ien = xscale2pmu_read_int_enable(); evtsel = xscale2pmu_read_event_select(); @@ -661,11 +667,13 @@ xscale2pmu_enable_event(struct hw_perf_event *hwc, int idx) raw_spin_unlock_irqrestore(&events->pmu_lock, flags); } -static void -xscale2pmu_disable_event(struct hw_perf_event *hwc, int idx) +static void xscale2pmu_disable_event(struct perf_event *event) { unsigned long flags, ien, evtsel, of_flags; + struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; struct pmu_hw_events *events = cpu_pmu->get_hw_events(); + int idx = hwc->idx; ien = xscale2pmu_read_int_enable(); evtsel = xscale2pmu_read_event_select(); @@ -713,7 +721,7 @@ xscale2pmu_disable_event(struct hw_perf_event *hwc, int idx) static int xscale2pmu_get_event_idx(struct pmu_hw_events *cpuc, - struct hw_perf_event *event) + struct perf_event *event) { int idx = xscale1pmu_get_event_idx(cpuc, event); if (idx >= 0) @@ -727,8 +735,7 @@ out: return idx; } -static void -xscale2pmu_start(void) +static void xscale2pmu_start(struct arm_pmu *cpu_pmu) { unsigned long flags, val; struct pmu_hw_events *events = cpu_pmu->get_hw_events(); @@ -740,8 +747,7 @@ xscale2pmu_start(void) raw_spin_unlock_irqrestore(&events->pmu_lock, flags); } -static void -xscale2pmu_stop(void) +static void xscale2pmu_stop(struct arm_pmu *cpu_pmu) { unsigned long flags, val; struct pmu_hw_events *events = cpu_pmu->get_hw_events(); @@ -753,9 +759,10 @@ xscale2pmu_stop(void) raw_spin_unlock_irqrestore(&events->pmu_lock, flags); } -static inline u32 -xscale2pmu_read_counter(int counter) +static inline u32 xscale2pmu_read_counter(struct perf_event *event) { + struct hw_perf_event *hwc = &event->hw; + int counter = hwc->idx; u32 val = 0; switch (counter) { @@ -779,9 +786,11 @@ xscale2pmu_read_counter(int counter) return val; } -static inline void -xscale2pmu_write_counter(int counter, u32 val) +static inline void xscale2pmu_write_counter(struct perf_event *event, u32 val) { + struct hw_perf_event *hwc = &event->hw; + int counter = hwc->idx; + switch (counter) { case XSCALE_CYCLE_COUNTER: asm volatile("mcr p14, 0, %0, c1, c1, 0" : : "r" (val)); @@ -801,33 +810,31 @@ xscale2pmu_write_counter(int counter, u32 val) } } -static struct arm_pmu xscale2pmu = { - .name = "xscale2", - .handle_irq = xscale2pmu_handle_irq, - .enable = xscale2pmu_enable_event, - .disable = xscale2pmu_disable_event, - .read_counter = xscale2pmu_read_counter, - .write_counter = xscale2pmu_write_counter, - .get_event_idx = xscale2pmu_get_event_idx, - .start = xscale2pmu_start, - .stop = xscale2pmu_stop, - .map_event = xscale_map_event, - .num_events = 5, - .max_period = (1LLU << 32) - 1, -}; - -static struct arm_pmu *__devinit xscale2pmu_init(void) +static int __devinit xscale2pmu_init(struct arm_pmu *cpu_pmu) { - return &xscale2pmu; + cpu_pmu->name = "xscale2"; + cpu_pmu->handle_irq = xscale2pmu_handle_irq; + cpu_pmu->enable = xscale2pmu_enable_event; + cpu_pmu->disable = xscale2pmu_disable_event; + cpu_pmu->read_counter = xscale2pmu_read_counter; + cpu_pmu->write_counter = xscale2pmu_write_counter; + cpu_pmu->get_event_idx = xscale2pmu_get_event_idx; + cpu_pmu->start = xscale2pmu_start; + cpu_pmu->stop = xscale2pmu_stop; + cpu_pmu->map_event = xscale_map_event; + cpu_pmu->num_events = 5; + cpu_pmu->max_period = (1LLU << 32) - 1; + + return 0; } #else -static struct arm_pmu *__devinit xscale1pmu_init(void) +static inline int xscale1pmu_init(struct arm_pmu *cpu_pmu) { - return NULL; + return -ENODEV; } -static struct arm_pmu *__devinit xscale2pmu_init(void) +static inline int xscale2pmu_init(struct arm_pmu *cpu_pmu) { - return NULL; + return -ENODEV; } #endif /* CONFIG_CPU_XSCALE */ diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index da1d1aa20ad..9a89bf4aefe 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -383,6 +383,12 @@ void cpu_init(void) BUG(); } + /* + * This only works on resume and secondary cores. For booting on the + * boot cpu, smp_prepare_boot_cpu is called after percpu area setup. + */ + set_my_cpu_offset(per_cpu_offset(cpu)); + cpu_proc_init(); /* @@ -426,13 +432,14 @@ int __cpu_logical_map[NR_CPUS]; void __init smp_setup_processor_id(void) { int i; - u32 cpu = is_smp() ? read_cpuid_mpidr() & 0xff : 0; + u32 mpidr = is_smp() ? read_cpuid_mpidr() & MPIDR_HWID_BITMASK : 0; + u32 cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0); cpu_logical_map(0) = cpu; - for (i = 1; i < NR_CPUS; ++i) + for (i = 1; i < nr_cpu_ids; ++i) cpu_logical_map(i) = i == cpu ? 0 : i; - printk(KERN_INFO "Booting Linux on physical CPU %d\n", cpu); + printk(KERN_INFO "Booting Linux on physical CPU 0x%x\n", mpidr); } static void __init setup_processor(void) @@ -758,6 +765,7 @@ void __init setup_arch(char **cmdline_p) unflatten_device_tree(); + arm_dt_init_cpu_maps(); #ifdef CONFIG_SMP if (is_smp()) { smp_set_ops(mdesc->smp); @@ -841,12 +849,9 @@ static const char *hwcap_str[] = { static int c_show(struct seq_file *m, void *v) { - int i; + int i, j; + u32 cpuid; - seq_printf(m, "Processor\t: %s rev %d (%s)\n", - cpu_name, read_cpuid_id() & 15, elf_platform); - -#if defined(CONFIG_SMP) for_each_online_cpu(i) { /* * glibc reads /proc/cpuinfo to determine the number of @@ -854,45 +859,48 @@ static int c_show(struct seq_file *m, void *v) * "processor". Give glibc what it expects. */ seq_printf(m, "processor\t: %d\n", i); - seq_printf(m, "BogoMIPS\t: %lu.%02lu\n\n", + cpuid = is_smp() ? per_cpu(cpu_data, i).cpuid : read_cpuid_id(); + seq_printf(m, "model name\t: %s rev %d (%s)\n", + cpu_name, cpuid & 15, elf_platform); + +#if defined(CONFIG_SMP) + seq_printf(m, "BogoMIPS\t: %lu.%02lu\n", per_cpu(cpu_data, i).loops_per_jiffy / (500000UL/HZ), (per_cpu(cpu_data, i).loops_per_jiffy / (5000UL/HZ)) % 100); - } -#else /* CONFIG_SMP */ - seq_printf(m, "BogoMIPS\t: %lu.%02lu\n", - loops_per_jiffy / (500000/HZ), - (loops_per_jiffy / (5000/HZ)) % 100); +#else + seq_printf(m, "BogoMIPS\t: %lu.%02lu\n", + loops_per_jiffy / (500000/HZ), + (loops_per_jiffy / (5000/HZ)) % 100); #endif + /* dump out the processor features */ + seq_puts(m, "Features\t: "); - /* dump out the processor features */ - seq_puts(m, "Features\t: "); - - for (i = 0; hwcap_str[i]; i++) - if (elf_hwcap & (1 << i)) - seq_printf(m, "%s ", hwcap_str[i]); + for (j = 0; hwcap_str[j]; j++) + if (elf_hwcap & (1 << j)) + seq_printf(m, "%s ", hwcap_str[j]); - seq_printf(m, "\nCPU implementer\t: 0x%02x\n", read_cpuid_id() >> 24); - seq_printf(m, "CPU architecture: %s\n", proc_arch[cpu_architecture()]); + seq_printf(m, "\nCPU implementer\t: 0x%02x\n", cpuid >> 24); + seq_printf(m, "CPU architecture: %s\n", + proc_arch[cpu_architecture()]); - if ((read_cpuid_id() & 0x0008f000) == 0x00000000) { - /* pre-ARM7 */ - seq_printf(m, "CPU part\t: %07x\n", read_cpuid_id() >> 4); - } else { - if ((read_cpuid_id() & 0x0008f000) == 0x00007000) { - /* ARM7 */ - seq_printf(m, "CPU variant\t: 0x%02x\n", - (read_cpuid_id() >> 16) & 127); + if ((cpuid & 0x0008f000) == 0x00000000) { + /* pre-ARM7 */ + seq_printf(m, "CPU part\t: %07x\n", cpuid >> 4); } else { - /* post-ARM7 */ - seq_printf(m, "CPU variant\t: 0x%x\n", - (read_cpuid_id() >> 20) & 15); + if ((cpuid & 0x0008f000) == 0x00007000) { + /* ARM7 */ + seq_printf(m, "CPU variant\t: 0x%02x\n", + (cpuid >> 16) & 127); + } else { + /* post-ARM7 */ + seq_printf(m, "CPU variant\t: 0x%x\n", + (cpuid >> 20) & 15); + } + seq_printf(m, "CPU part\t: 0x%03x\n", + (cpuid >> 4) & 0xfff); } - seq_printf(m, "CPU part\t: 0x%03x\n", - (read_cpuid_id() >> 4) & 0xfff); + seq_printf(m, "CPU revision\t: %d\n\n", cpuid & 15); } - seq_printf(m, "CPU revision\t: %d\n", read_cpuid_id() & 15); - - seq_puts(m, "\n"); seq_printf(m, "Hardware\t: %s\n", machine_name); seq_printf(m, "Revision\t: %04x\n", system_rev); diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 57f53773197..84f4cbf652e 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -281,6 +281,7 @@ static void __cpuinit smp_store_cpu_info(unsigned int cpuid) struct cpuinfo_arm *cpu_info = &per_cpu(cpu_data, cpuid); cpu_info->loops_per_jiffy = loops_per_jiffy; + cpu_info->cpuid = read_cpuid_id(); store_cpu_topology(cpuid); } @@ -313,9 +314,10 @@ asmlinkage void __cpuinit secondary_start_kernel(void) current->active_mm = mm; cpumask_set_cpu(cpu, mm_cpumask(mm)); + cpu_init(); + printk("CPU%u: Booted secondary processor\n", cpu); - cpu_init(); preempt_disable(); trace_hardirqs_off(); @@ -371,6 +373,7 @@ void __init smp_cpus_done(unsigned int max_cpus) void __init smp_prepare_boot_cpu(void) { + set_my_cpu_offset(per_cpu_offset(smp_processor_id())); } void __init smp_prepare_cpus(unsigned int max_cpus) diff --git a/arch/arm/kernel/topology.c b/arch/arm/kernel/topology.c index 26c12c6440f..79282ebcd93 100644 --- a/arch/arm/kernel/topology.c +++ b/arch/arm/kernel/topology.c @@ -196,32 +196,7 @@ static inline void parse_dt_topology(void) {} static inline void update_cpu_power(unsigned int cpuid, unsigned int mpidr) {} #endif - -/* - * cpu topology management - */ - -#define MPIDR_SMP_BITMASK (0x3 << 30) -#define MPIDR_SMP_VALUE (0x2 << 30) - -#define MPIDR_MT_BITMASK (0x1 << 24) - -/* - * These masks reflect the current use of the affinity levels. - * The affinity level can be up to 16 bits according to ARM ARM - */ -#define MPIDR_HWID_BITMASK 0xFFFFFF - -#define MPIDR_LEVEL0_MASK 0x3 -#define MPIDR_LEVEL0_SHIFT 0 - -#define MPIDR_LEVEL1_MASK 0xF -#define MPIDR_LEVEL1_SHIFT 8 - -#define MPIDR_LEVEL2_MASK 0xFF -#define MPIDR_LEVEL2_SHIFT 16 - -/* + /* * cpu topology table */ struct cputopo_arm cpu_topology[NR_CPUS]; @@ -282,19 +257,14 @@ void store_cpu_topology(unsigned int cpuid) if (mpidr & MPIDR_MT_BITMASK) { /* core performance interdependency */ - cpuid_topo->thread_id = (mpidr >> MPIDR_LEVEL0_SHIFT) - & MPIDR_LEVEL0_MASK; - cpuid_topo->core_id = (mpidr >> MPIDR_LEVEL1_SHIFT) - & MPIDR_LEVEL1_MASK; - cpuid_topo->socket_id = (mpidr >> MPIDR_LEVEL2_SHIFT) - & MPIDR_LEVEL2_MASK; + cpuid_topo->thread_id = MPIDR_AFFINITY_LEVEL(mpidr, 0); + cpuid_topo->core_id = MPIDR_AFFINITY_LEVEL(mpidr, 1); + cpuid_topo->socket_id = MPIDR_AFFINITY_LEVEL(mpidr, 2); } else { /* largely independent cores */ cpuid_topo->thread_id = -1; - cpuid_topo->core_id = (mpidr >> MPIDR_LEVEL0_SHIFT) - & MPIDR_LEVEL0_MASK; - cpuid_topo->socket_id = (mpidr >> MPIDR_LEVEL1_SHIFT) - & MPIDR_LEVEL1_MASK; + cpuid_topo->core_id = MPIDR_AFFINITY_LEVEL(mpidr, 0); + cpuid_topo->socket_id = MPIDR_AFFINITY_LEVEL(mpidr, 1); } } else { /* diff --git a/arch/arm/mach-omap2/pmu.c b/arch/arm/mach-omap2/pmu.c index 2a791766283..031e2fbd0e1 100644 --- a/arch/arm/mach-omap2/pmu.c +++ b/arch/arm/mach-omap2/pmu.c @@ -57,8 +57,6 @@ static int __init omap2_init_pmu(unsigned oh_num, char *oh_names[]) if (IS_ERR(omap_pmu_dev)) return PTR_ERR(omap_pmu_dev); - pm_runtime_enable(&omap_pmu_dev->dev); - return 0; } diff --git a/arch/arm/mm/context.c b/arch/arm/mm/context.c index 4e07eec1270..bc4a5e9ebb7 100644 --- a/arch/arm/mm/context.c +++ b/arch/arm/mm/context.c @@ -2,6 +2,9 @@ * linux/arch/arm/mm/context.c * * Copyright (C) 2002-2003 Deep Blue Solutions Ltd, all rights reserved. + * Copyright (C) 2012 ARM Limited + * + * Author: Will Deacon <will.deacon@arm.com> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -14,14 +17,40 @@ #include <linux/percpu.h> #include <asm/mmu_context.h> +#include <asm/smp_plat.h> #include <asm/thread_notify.h> #include <asm/tlbflush.h> +/* + * On ARMv6, we have the following structure in the Context ID: + * + * 31 7 0 + * +-------------------------+-----------+ + * | process ID | ASID | + * +-------------------------+-----------+ + * | context ID | + * +-------------------------------------+ + * + * The ASID is used to tag entries in the CPU caches and TLBs. + * The context ID is used by debuggers and trace logic, and + * should be unique within all running processes. + */ +#define ASID_FIRST_VERSION (1ULL << ASID_BITS) +#define NUM_USER_ASIDS (ASID_FIRST_VERSION - 1) + +#define ASID_TO_IDX(asid) ((asid & ~ASID_MASK) - 1) +#define IDX_TO_ASID(idx) ((idx + 1) & ~ASID_MASK) + static DEFINE_RAW_SPINLOCK(cpu_asid_lock); -unsigned int cpu_last_asid = ASID_FIRST_VERSION; +static atomic64_t asid_generation = ATOMIC64_INIT(ASID_FIRST_VERSION); +static DECLARE_BITMAP(asid_map, NUM_USER_ASIDS); + +static DEFINE_PER_CPU(atomic64_t, active_asids); +static DEFINE_PER_CPU(u64, reserved_asids); +static cpumask_t tlb_flush_pending; #ifdef CONFIG_ARM_LPAE -void cpu_set_reserved_ttbr0(void) +static void cpu_set_reserved_ttbr0(void) { unsigned long ttbl = __pa(swapper_pg_dir); unsigned long ttbh = 0; @@ -37,7 +66,7 @@ void cpu_set_reserved_ttbr0(void) isb(); } #else -void cpu_set_reserved_ttbr0(void) +static void cpu_set_reserved_ttbr0(void) { u32 ttb; /* Copy TTBR1 into TTBR0 */ @@ -84,124 +113,104 @@ static int __init contextidr_notifier_init(void) arch_initcall(contextidr_notifier_init); #endif -/* - * We fork()ed a process, and we need a new context for the child - * to run in. - */ -void __init_new_context(struct task_struct *tsk, struct mm_struct *mm) +static void flush_context(unsigned int cpu) { - mm->context.id = 0; - raw_spin_lock_init(&mm->context.id_lock); -} + int i; + u64 asid; + + /* Update the list of reserved ASIDs and the ASID bitmap. */ + bitmap_clear(asid_map, 0, NUM_USER_ASIDS); + for_each_possible_cpu(i) { + if (i == cpu) { + asid = 0; + } else { + asid = atomic64_xchg(&per_cpu(active_asids, i), 0); + __set_bit(ASID_TO_IDX(asid), asid_map); + } + per_cpu(reserved_asids, i) = asid; + } -static void flush_context(void) -{ - cpu_set_reserved_ttbr0(); - local_flush_tlb_all(); - if (icache_is_vivt_asid_tagged()) { + /* Queue a TLB invalidate and flush the I-cache if necessary. */ + if (!tlb_ops_need_broadcast()) + cpumask_set_cpu(cpu, &tlb_flush_pending); + else + cpumask_setall(&tlb_flush_pending); + + if (icache_is_vivt_asid_tagged()) __flush_icache_all(); - dsb(); - } } -#ifdef CONFIG_SMP +static int is_reserved_asid(u64 asid) +{ + int cpu; + for_each_possible_cpu(cpu) + if (per_cpu(reserved_asids, cpu) == asid) + return 1; + return 0; +} -static void set_mm_context(struct mm_struct *mm, unsigned int asid) +static void new_context(struct mm_struct *mm, unsigned int cpu) { - unsigned long flags; + u64 asid = mm->context.id; + u64 generation = atomic64_read(&asid_generation); - /* - * Locking needed for multi-threaded applications where the - * same mm->context.id could be set from different CPUs during - * the broadcast. This function is also called via IPI so the - * mm->context.id_lock has to be IRQ-safe. - */ - raw_spin_lock_irqsave(&mm->context.id_lock, flags); - if (likely((mm->context.id ^ cpu_last_asid) >> ASID_BITS)) { + if (asid != 0 && is_reserved_asid(asid)) { /* - * Old version of ASID found. Set the new one and - * reset mm_cpumask(mm). + * Our current ASID was active during a rollover, we can + * continue to use it and this was just a false alarm. */ - mm->context.id = asid; + asid = generation | (asid & ~ASID_MASK); + } else { + /* + * Allocate a free ASID. If we can't find one, take a + * note of the currently active ASIDs and mark the TLBs + * as requiring flushes. + */ + asid = find_first_zero_bit(asid_map, NUM_USER_ASIDS); + if (asid == NUM_USER_ASIDS) { + generation = atomic64_add_return(ASID_FIRST_VERSION, + &asid_generation); + flush_context(cpu); + asid = find_first_zero_bit(asid_map, NUM_USER_ASIDS); + } + __set_bit(asid, asid_map); + asid = generation | IDX_TO_ASID(asid); cpumask_clear(mm_cpumask(mm)); } - raw_spin_unlock_irqrestore(&mm->context.id_lock, flags); - /* - * Set the mm_cpumask(mm) bit for the current CPU. - */ - cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm)); + mm->context.id = asid; } -/* - * Reset the ASID on the current CPU. This function call is broadcast - * from the CPU handling the ASID rollover and holding cpu_asid_lock. - */ -static void reset_context(void *info) +void check_and_switch_context(struct mm_struct *mm, struct task_struct *tsk) { - unsigned int asid; + unsigned long flags; unsigned int cpu = smp_processor_id(); - struct mm_struct *mm = current->active_mm; - smp_rmb(); - asid = cpu_last_asid + cpu + 1; + if (unlikely(mm->context.vmalloc_seq != init_mm.context.vmalloc_seq)) + __check_vmalloc_seq(mm); - flush_context(); - set_mm_context(mm, asid); - - /* set the new ASID */ - cpu_switch_mm(mm->pgd, mm); -} + /* + * Required during context switch to avoid speculative page table + * walking with the wrong TTBR. + */ + cpu_set_reserved_ttbr0(); -#else + if (!((mm->context.id ^ atomic64_read(&asid_generation)) >> ASID_BITS) + && atomic64_xchg(&per_cpu(active_asids, cpu), mm->context.id)) + goto switch_mm_fastpath; -static inline void set_mm_context(struct mm_struct *mm, unsigned int asid) -{ - mm->context.id = asid; - cpumask_copy(mm_cpumask(mm), cpumask_of(smp_processor_id())); -} + raw_spin_lock_irqsave(&cpu_asid_lock, flags); + /* Check that our ASID belongs to the current generation. */ + if ((mm->context.id ^ atomic64_read(&asid_generation)) >> ASID_BITS) + new_context(mm, cpu); -#endif + atomic64_set(&per_cpu(active_asids, cpu), mm->context.id); + cpumask_set_cpu(cpu, mm_cpumask(mm)); -void __new_context(struct mm_struct *mm) -{ - unsigned int asid; + if (cpumask_test_and_clear_cpu(cpu, &tlb_flush_pending)) + local_flush_tlb_all(); + raw_spin_unlock_irqrestore(&cpu_asid_lock, flags); - raw_spin_lock(&cpu_asid_lock); -#ifdef CONFIG_SMP - /* - * Check the ASID again, in case the change was broadcast from - * another CPU before we acquired the lock. - */ - if (unlikely(((mm->context.id ^ cpu_last_asid) >> ASID_BITS) == 0)) { - cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm)); - raw_spin_unlock(&cpu_asid_lock); - return; - } -#endif - /* - * At this point, it is guaranteed that the current mm (with - * an old ASID) isn't active on any other CPU since the ASIDs - * are changed simultaneously via IPI. - */ - asid = ++cpu_last_asid; - if (asid == 0) - asid = cpu_last_asid = ASID_FIRST_VERSION; - - /* - * If we've used up all our ASIDs, we need - * to start a new version and flush the TLB. - */ - if (unlikely((asid & ~ASID_MASK) == 0)) { - asid = cpu_last_asid + smp_processor_id() + 1; - flush_context(); -#ifdef CONFIG_SMP - smp_wmb(); - smp_call_function(reset_context, NULL, 1); -#endif - cpu_last_asid += NR_CPUS; - } - - set_mm_context(mm, asid); - raw_spin_unlock(&cpu_asid_lock); +switch_mm_fastpath: + cpu_switch_mm(mm->pgd, mm); } diff --git a/arch/arm/mm/ioremap.c b/arch/arm/mm/ioremap.c index 5dcc2fd46c4..88fd86cf3d9 100644 --- a/arch/arm/mm/ioremap.c +++ b/arch/arm/mm/ioremap.c @@ -47,18 +47,18 @@ int ioremap_page(unsigned long virt, unsigned long phys, } EXPORT_SYMBOL(ioremap_page); -void __check_kvm_seq(struct mm_struct *mm) +void __check_vmalloc_seq(struct mm_struct *mm) { unsigned int seq; do { - seq = init_mm.context.kvm_seq; + seq = init_mm.context.vmalloc_seq; memcpy(pgd_offset(mm, VMALLOC_START), pgd_offset_k(VMALLOC_START), sizeof(pgd_t) * (pgd_index(VMALLOC_END) - pgd_index(VMALLOC_START))); - mm->context.kvm_seq = seq; - } while (seq != init_mm.context.kvm_seq); + mm->context.vmalloc_seq = seq; + } while (seq != init_mm.context.vmalloc_seq); } #if !defined(CONFIG_SMP) && !defined(CONFIG_ARM_LPAE) @@ -89,13 +89,13 @@ static void unmap_area_sections(unsigned long virt, unsigned long size) if (!pmd_none(pmd)) { /* * Clear the PMD from the page table, and - * increment the kvm sequence so others + * increment the vmalloc sequence so others * notice this change. * * Note: this is still racy on SMP machines. */ pmd_clear(pmdp); - init_mm.context.kvm_seq++; + init_mm.context.vmalloc_seq++; /* * Free the page table, if there was one. @@ -112,8 +112,8 @@ static void unmap_area_sections(unsigned long virt, unsigned long size) * Ensure that the active_mm is up to date - we want to * catch any use-after-iounmap cases. */ - if (current->active_mm->context.kvm_seq != init_mm.context.kvm_seq) - __check_kvm_seq(current->active_mm); + if (current->active_mm->context.vmalloc_seq != init_mm.context.vmalloc_seq) + __check_vmalloc_seq(current->active_mm); flush_tlb_kernel_range(virt, end); } diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 941dfb9e9a7..99b47b950ef 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -488,7 +488,7 @@ static void __init build_mem_type_table(void) #endif for (i = 0; i < 16; i++) { - unsigned long v = pgprot_val(protection_map[i]); + pteval_t v = pgprot_val(protection_map[i]); protection_map[i] = __pgprot(v | user_pgprot); } diff --git a/arch/arm/mm/proc-macros.S b/arch/arm/mm/proc-macros.S index b29a2265af0..eb6aa73bc8b 100644 --- a/arch/arm/mm/proc-macros.S +++ b/arch/arm/mm/proc-macros.S @@ -167,6 +167,10 @@ tst r1, #L_PTE_YOUNG tstne r1, #L_PTE_PRESENT moveq r3, #0 +#ifndef CONFIG_CPU_USE_DOMAINS + tstne r1, #L_PTE_NONE + movne r3, #0 +#endif str r3, [r0] mcr p15, 0, r0, c7, c10, 1 @ flush_pte diff --git a/arch/arm/mm/proc-v7-2level.S b/arch/arm/mm/proc-v7-2level.S index fd045e70639..6d98c13ab82 100644 --- a/arch/arm/mm/proc-v7-2level.S +++ b/arch/arm/mm/proc-v7-2level.S @@ -100,7 +100,11 @@ ENTRY(cpu_v7_set_pte_ext) orrne r3, r3, #PTE_EXT_XN tst r1, #L_PTE_YOUNG - tstne r1, #L_PTE_PRESENT + tstne r1, #L_PTE_VALID +#ifndef CONFIG_CPU_USE_DOMAINS + eorne r1, r1, #L_PTE_NONE + tstne r1, #L_PTE_NONE +#endif moveq r3, #0 ARM( str r3, [r0, #2048]! ) @@ -161,11 +165,11 @@ ENDPROC(cpu_v7_set_pte_ext) * TFR EV X F I D LR S * .EEE ..EE PUI. .T.T 4RVI ZWRS BLDP WCAM * rxxx rrxx xxx0 0101 xxxx xxxx x111 xxxx < forced - * 1 0 110 0011 1100 .111 1101 < we want + * 01 0 110 0011 1100 .111 1101 < we want */ .align 2 .type v7_crval, #object v7_crval: - crval clear=0x0120c302, mmuset=0x10c03c7d, ucset=0x00c01c7c + crval clear=0x2120c302, mmuset=0x10c03c7d, ucset=0x00c01c7c .previous diff --git a/arch/arm/mm/proc-v7-3level.S b/arch/arm/mm/proc-v7-3level.S index 8de0f1dd154..7b56386f949 100644 --- a/arch/arm/mm/proc-v7-3level.S +++ b/arch/arm/mm/proc-v7-3level.S @@ -65,8 +65,11 @@ ENDPROC(cpu_v7_switch_mm) */ ENTRY(cpu_v7_set_pte_ext) #ifdef CONFIG_MMU - tst r2, #L_PTE_PRESENT + tst r2, #L_PTE_VALID beq 1f + tst r3, #1 << (57 - 32) @ L_PTE_NONE + bicne r2, #L_PTE_VALID + bne 1f tst r3, #1 << (55 - 32) @ L_PTE_DIRTY orreq r2, #L_PTE_RDONLY 1: strd r2, r3, [r0] |