From b5466f8728527a05a493cc4abe9e6f034a1bbaab Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 15 Jun 2012 14:47:31 +0100 Subject: ARM: mm: remove IPI broadcasting on ASID rollover ASIDs are allocated to MMU contexts based on a rolling counter. This means that after 255 allocations we must invalidate all existing ASIDs via an expensive IPI mechanism to synchronise all of the online CPUs and ensure that all tasks execute with an ASID from the new generation. This patch changes the rollover behaviour so that we rely instead on the hardware broadcasting of the TLB invalidation to avoid the IPI calls. This works by keeping track of the active ASID on each core, which is then reserved in the case of a rollover so that currently scheduled tasks can continue to run. For cores without hardware TLB broadcasting, we keep track of pending flushes in a cpumask, so cores can flush their local TLB before scheduling a new mm. Reviewed-by: Catalin Marinas Tested-by: Marc Zyngier Signed-off-by: Will Deacon --- arch/arm/include/asm/mmu.h | 11 ++--- arch/arm/include/asm/mmu_context.h | 82 ++------------------------------------ 2 files changed, 7 insertions(+), 86 deletions(-) (limited to 'arch/arm/include/asm') diff --git a/arch/arm/include/asm/mmu.h b/arch/arm/include/asm/mmu.h index 14965658a92..5b53b53ab5c 100644 --- a/arch/arm/include/asm/mmu.h +++ b/arch/arm/include/asm/mmu.h @@ -5,18 +5,15 @@ typedef struct { #ifdef CONFIG_CPU_HAS_ASID - unsigned int id; - raw_spinlock_t id_lock; + u64 id; #endif unsigned int kvm_seq; } mm_context_t; #ifdef CONFIG_CPU_HAS_ASID -#define ASID(mm) ((mm)->context.id & 255) - -/* init_mm.context.id_lock should be initialized. */ -#define INIT_MM_CONTEXT(name) \ - .context.id_lock = __RAW_SPIN_LOCK_UNLOCKED(name.context.id_lock), +#define ASID_BITS 8 +#define ASID_MASK ((~0ULL) << ASID_BITS) +#define ASID(mm) ((mm)->context.id & ~ASID_MASK) #else #define ASID(mm) (0) #endif diff --git a/arch/arm/include/asm/mmu_context.h b/arch/arm/include/asm/mmu_context.h index 0306bc642c0..a64f61cb23d 100644 --- a/arch/arm/include/asm/mmu_context.h +++ b/arch/arm/include/asm/mmu_context.h @@ -24,84 +24,8 @@ void __check_kvm_seq(struct mm_struct *mm); #ifdef CONFIG_CPU_HAS_ASID -/* - * On ARMv6, we have the following structure in the Context ID: - * - * 31 7 0 - * +-------------------------+-----------+ - * | process ID | ASID | - * +-------------------------+-----------+ - * | context ID | - * +-------------------------------------+ - * - * The ASID is used to tag entries in the CPU caches and TLBs. - * The context ID is used by debuggers and trace logic, and - * should be unique within all running processes. - */ -#define ASID_BITS 8 -#define ASID_MASK ((~0) << ASID_BITS) -#define ASID_FIRST_VERSION (1 << ASID_BITS) - -extern unsigned int cpu_last_asid; - -void __init_new_context(struct task_struct *tsk, struct mm_struct *mm); -void __new_context(struct mm_struct *mm); -void cpu_set_reserved_ttbr0(void); - -static inline void switch_new_context(struct mm_struct *mm) -{ - unsigned long flags; - - __new_context(mm); - - local_irq_save(flags); - cpu_switch_mm(mm->pgd, mm); - local_irq_restore(flags); -} - -static inline void check_and_switch_context(struct mm_struct *mm, - struct task_struct *tsk) -{ - if (unlikely(mm->context.kvm_seq != init_mm.context.kvm_seq)) - __check_kvm_seq(mm); - - /* - * Required during context switch to avoid speculative page table - * walking with the wrong TTBR. - */ - cpu_set_reserved_ttbr0(); - - if (!((mm->context.id ^ cpu_last_asid) >> ASID_BITS)) - /* - * The ASID is from the current generation, just switch to the - * new pgd. This condition is only true for calls from - * context_switch() and interrupts are already disabled. - */ - cpu_switch_mm(mm->pgd, mm); - else if (irqs_disabled()) - /* - * Defer the new ASID allocation until after the context - * switch critical region since __new_context() cannot be - * called with interrupts disabled (it sends IPIs). - */ - set_ti_thread_flag(task_thread_info(tsk), TIF_SWITCH_MM); - else - /* - * That is a direct call to switch_mm() or activate_mm() with - * interrupts enabled and a new context. - */ - switch_new_context(mm); -} - -#define init_new_context(tsk,mm) (__init_new_context(tsk,mm),0) - -#define finish_arch_post_lock_switch \ - finish_arch_post_lock_switch -static inline void finish_arch_post_lock_switch(void) -{ - if (test_and_clear_thread_flag(TIF_SWITCH_MM)) - switch_new_context(current->mm); -} +void check_and_switch_context(struct mm_struct *mm, struct task_struct *tsk); +#define init_new_context(tsk,mm) ({ mm->context.id = 0; }) #else /* !CONFIG_CPU_HAS_ASID */ @@ -143,6 +67,7 @@ static inline void finish_arch_post_lock_switch(void) #endif /* CONFIG_CPU_HAS_ASID */ #define destroy_context(mm) do { } while(0) +#define activate_mm(prev,next) switch_mm(prev, next, NULL) /* * This is called when "tsk" is about to enter lazy TLB mode. @@ -186,6 +111,5 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next, } #define deactivate_mm(tsk,mm) do { } while (0) -#define activate_mm(prev,next) switch_mm(prev, next, NULL) #endif -- cgit v1.2.3-70-g09d2 From e50c54189f7c6211a99539156e3978474f0b1a0b Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 13 Sep 2012 16:40:46 +0100 Subject: ARM: perf: add guest vs host discrimination Add minimal guest support to perf, so it can distinguish whether the PMU interrupt was in the host or the guest, as well as collecting some very basic information (guest PC, user vs kernel mode). This is not feature complete though, as it doesn't support backtracing in the guest. Based on the x86 implementation, tested with KVM/ARM. Signed-off-by: Marc Zyngier Signed-off-by: Will Deacon --- arch/arm/include/asm/perf_event.h | 5 +++++ arch/arm/kernel/perf_event.c | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+) (limited to 'arch/arm/include/asm') diff --git a/arch/arm/include/asm/perf_event.h b/arch/arm/include/asm/perf_event.h index 625cd621a43..00416edecea 100644 --- a/arch/arm/include/asm/perf_event.h +++ b/arch/arm/include/asm/perf_event.h @@ -21,4 +21,9 @@ #define C(_x) PERF_COUNT_HW_CACHE_##_x #define CACHE_OP_UNSUPPORTED 0xFFFF +struct pt_regs; +extern unsigned long perf_instruction_pointer(struct pt_regs *regs); +extern unsigned long perf_misc_flags(struct pt_regs *regs); +#define perf_misc_flags(regs) perf_misc_flags(regs) + #endif /* __ARM_PERF_EVENT_H__ */ diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index 53c0304b734..f8406af0327 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -576,6 +576,10 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) { struct frame_tail __user *tail; + if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { + /* We don't support guest os callchain now */ + return; + } tail = (struct frame_tail __user *)regs->ARM_fp - 1; @@ -603,9 +607,41 @@ perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs) { struct stackframe fr; + if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { + /* We don't support guest os callchain now */ + return; + } + fr.fp = regs->ARM_fp; fr.sp = regs->ARM_sp; fr.lr = regs->ARM_lr; fr.pc = regs->ARM_pc; walk_stackframe(&fr, callchain_trace, entry); } + +unsigned long perf_instruction_pointer(struct pt_regs *regs) +{ + if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) + return perf_guest_cbs->get_guest_ip(); + + return instruction_pointer(regs); +} + +unsigned long perf_misc_flags(struct pt_regs *regs) +{ + int misc = 0; + + if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { + if (perf_guest_cbs->is_user_mode()) + misc |= PERF_RECORD_MISC_GUEST_USER; + else + misc |= PERF_RECORD_MISC_GUEST_KERNEL; + } else { + if (user_mode(regs)) + misc |= PERF_RECORD_MISC_USER; + else + misc |= PERF_RECORD_MISC_KERNEL; + } + + return misc; +} -- cgit v1.2.3-70-g09d2 From ed6f2a522398c26559f4da23a80aa6195e6284c7 Mon Sep 17 00:00:00 2001 From: Sudeep KarkadaNagesha Date: Mon, 30 Jul 2012 12:00:02 +0100 Subject: ARM: perf: consistently use struct perf_event in arm_pmu functions The arm_pmu functions have wildly varied parameters which can often be derived from struct perf_event. This patch changes the arm_pmu function prototypes so that struct perf_event pointers are passed in preference to fields that can be derived from the event. Signed-off-by: Sudeep KarkadaNagesha Signed-off-by: Will Deacon --- arch/arm/include/asm/pmu.h | 26 +++++------- arch/arm/kernel/perf_event.c | 44 +++++++++---------- arch/arm/kernel/perf_event_cpu.c | 8 ++-- arch/arm/kernel/perf_event_v6.c | 54 ++++++++++++----------- arch/arm/kernel/perf_event_v7.c | 46 +++++++++++++------- arch/arm/kernel/perf_event_xscale.c | 85 +++++++++++++++++++++---------------- 6 files changed, 142 insertions(+), 121 deletions(-) (limited to 'arch/arm/include/asm') diff --git a/arch/arm/include/asm/pmu.h b/arch/arm/include/asm/pmu.h index a26170dce02..a209a384dbc 100644 --- a/arch/arm/include/asm/pmu.h +++ b/arch/arm/include/asm/pmu.h @@ -67,19 +67,19 @@ struct arm_pmu { cpumask_t active_irqs; char *name; irqreturn_t (*handle_irq)(int irq_num, void *dev); - void (*enable)(struct hw_perf_event *evt, int idx); - void (*disable)(struct hw_perf_event *evt, int idx); + void (*enable)(struct perf_event *event); + void (*disable)(struct perf_event *event); int (*get_event_idx)(struct pmu_hw_events *hw_events, - struct hw_perf_event *hwc); + struct perf_event *event); int (*set_event_filter)(struct hw_perf_event *evt, struct perf_event_attr *attr); - u32 (*read_counter)(int idx); - void (*write_counter)(int idx, u32 val); - void (*start)(void); - void (*stop)(void); + u32 (*read_counter)(struct perf_event *event); + void (*write_counter)(struct perf_event *event, u32 val); + void (*start)(struct arm_pmu *); + void (*stop)(struct arm_pmu *); void (*reset)(void *); - int (*request_irq)(irq_handler_t handler); - void (*free_irq)(void); + int (*request_irq)(struct arm_pmu *, irq_handler_t handler); + void (*free_irq)(struct arm_pmu *); int (*map_event)(struct perf_event *event); int num_events; atomic_t active_events; @@ -95,13 +95,9 @@ extern const struct dev_pm_ops armpmu_dev_pm_ops; int armpmu_register(struct arm_pmu *armpmu, char *name, int type); -u64 armpmu_event_update(struct perf_event *event, - struct hw_perf_event *hwc, - int idx); +u64 armpmu_event_update(struct perf_event *event); -int armpmu_event_set_period(struct perf_event *event, - struct hw_perf_event *hwc, - int idx); +int armpmu_event_set_period(struct perf_event *event); int armpmu_map_event(struct perf_event *event, const unsigned (*event_map)[PERF_COUNT_HW_MAX], diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index f8406af0327..1cfa3f35713 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -86,12 +86,10 @@ armpmu_map_event(struct perf_event *event, return -ENOENT; } -int -armpmu_event_set_period(struct perf_event *event, - struct hw_perf_event *hwc, - int idx) +int armpmu_event_set_period(struct perf_event *event) { struct arm_pmu *armpmu = to_arm_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; s64 left = local64_read(&hwc->period_left); s64 period = hwc->sample_period; int ret = 0; @@ -119,24 +117,22 @@ armpmu_event_set_period(struct perf_event *event, local64_set(&hwc->prev_count, (u64)-left); - armpmu->write_counter(idx, (u64)(-left) & 0xffffffff); + armpmu->write_counter(event, (u64)(-left) & 0xffffffff); perf_event_update_userpage(event); return ret; } -u64 -armpmu_event_update(struct perf_event *event, - struct hw_perf_event *hwc, - int idx) +u64 armpmu_event_update(struct perf_event *event) { struct arm_pmu *armpmu = to_arm_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; u64 delta, prev_raw_count, new_raw_count; again: prev_raw_count = local64_read(&hwc->prev_count); - new_raw_count = armpmu->read_counter(idx); + new_raw_count = armpmu->read_counter(event); if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, new_raw_count) != prev_raw_count) @@ -159,7 +155,7 @@ armpmu_read(struct perf_event *event) if (hwc->idx < 0) return; - armpmu_event_update(event, hwc, hwc->idx); + armpmu_event_update(event); } static void @@ -173,14 +169,13 @@ armpmu_stop(struct perf_event *event, int flags) * PERF_EF_UPDATE, see comments in armpmu_start(). */ if (!(hwc->state & PERF_HES_STOPPED)) { - armpmu->disable(hwc, hwc->idx); - armpmu_event_update(event, hwc, hwc->idx); + armpmu->disable(event); + armpmu_event_update(event); hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE; } } -static void -armpmu_start(struct perf_event *event, int flags) +static void armpmu_start(struct perf_event *event, int flags) { struct arm_pmu *armpmu = to_arm_pmu(event->pmu); struct hw_perf_event *hwc = &event->hw; @@ -200,8 +195,8 @@ armpmu_start(struct perf_event *event, int flags) * get an interrupt too soon or *way* too late if the overflow has * happened since disabling. */ - armpmu_event_set_period(event, hwc, hwc->idx); - armpmu->enable(hwc, hwc->idx); + armpmu_event_set_period(event); + armpmu->enable(event); } static void @@ -233,7 +228,7 @@ armpmu_add(struct perf_event *event, int flags) perf_pmu_disable(event->pmu); /* If we don't have a space for the counter then finish early. */ - idx = armpmu->get_event_idx(hw_events, hwc); + idx = armpmu->get_event_idx(hw_events, event); if (idx < 0) { err = idx; goto out; @@ -244,7 +239,7 @@ armpmu_add(struct perf_event *event, int flags) * sure it is disabled. */ event->hw.idx = idx; - armpmu->disable(hwc, idx); + armpmu->disable(event); hw_events->events[idx] = event; hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE; @@ -264,13 +259,12 @@ validate_event(struct pmu_hw_events *hw_events, struct perf_event *event) { struct arm_pmu *armpmu = to_arm_pmu(event->pmu); - struct hw_perf_event fake_event = event->hw; struct pmu *leader_pmu = event->group_leader->pmu; if (event->pmu != leader_pmu || event->state <= PERF_EVENT_STATE_OFF) return 1; - return armpmu->get_event_idx(hw_events, &fake_event) >= 0; + return armpmu->get_event_idx(hw_events, event) >= 0; } static int @@ -316,7 +310,7 @@ static irqreturn_t armpmu_dispatch_irq(int irq, void *dev) static void armpmu_release_hardware(struct arm_pmu *armpmu) { - armpmu->free_irq(); + armpmu->free_irq(armpmu); pm_runtime_put_sync(&armpmu->plat_device->dev); } @@ -330,7 +324,7 @@ armpmu_reserve_hardware(struct arm_pmu *armpmu) return -ENODEV; pm_runtime_get_sync(&pmu_device->dev); - err = armpmu->request_irq(armpmu_dispatch_irq); + err = armpmu->request_irq(armpmu, armpmu_dispatch_irq); if (err) { armpmu_release_hardware(armpmu); return err; @@ -465,13 +459,13 @@ static void armpmu_enable(struct pmu *pmu) int enabled = bitmap_weight(hw_events->used_mask, armpmu->num_events); if (enabled) - armpmu->start(); + armpmu->start(armpmu); } static void armpmu_disable(struct pmu *pmu) { struct arm_pmu *armpmu = to_arm_pmu(pmu); - armpmu->stop(); + armpmu->stop(armpmu); } #ifdef CONFIG_PM_RUNTIME diff --git a/arch/arm/kernel/perf_event_cpu.c b/arch/arm/kernel/perf_event_cpu.c index 3863fd405fa..02244faa539 100644 --- a/arch/arm/kernel/perf_event_cpu.c +++ b/arch/arm/kernel/perf_event_cpu.c @@ -71,7 +71,7 @@ static struct pmu_hw_events *cpu_pmu_get_cpu_events(void) return &__get_cpu_var(cpu_hw_events); } -static void cpu_pmu_free_irq(void) +static void cpu_pmu_free_irq(struct arm_pmu *cpu_pmu) { int i, irq, irqs; struct platform_device *pmu_device = cpu_pmu->plat_device; @@ -87,7 +87,7 @@ static void cpu_pmu_free_irq(void) } } -static int cpu_pmu_request_irq(irq_handler_t handler) +static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler) { int i, err, irq, irqs; struct platform_device *pmu_device = cpu_pmu->plat_device; @@ -148,7 +148,7 @@ static void __devinit cpu_pmu_init(struct arm_pmu *cpu_pmu) /* Ensure the PMU has sane values out of reset. */ if (cpu_pmu && cpu_pmu->reset) - on_each_cpu(cpu_pmu->reset, NULL, 1); + on_each_cpu(cpu_pmu->reset, cpu_pmu, 1); } /* @@ -164,7 +164,7 @@ static int __cpuinit cpu_pmu_notify(struct notifier_block *b, return NOTIFY_DONE; if (cpu_pmu && cpu_pmu->reset) - cpu_pmu->reset(NULL); + cpu_pmu->reset(cpu_pmu); return NOTIFY_OK; } diff --git a/arch/arm/kernel/perf_event_v6.c b/arch/arm/kernel/perf_event_v6.c index 3908cb4e556..f3e22ff8b6a 100644 --- a/arch/arm/kernel/perf_event_v6.c +++ b/arch/arm/kernel/perf_event_v6.c @@ -401,9 +401,10 @@ armv6_pmcr_counter_has_overflowed(unsigned long pmcr, return ret; } -static inline u32 -armv6pmu_read_counter(int counter) +static inline u32 armv6pmu_read_counter(struct perf_event *event) { + struct hw_perf_event *hwc = &event->hw; + int counter = hwc->idx; unsigned long value = 0; if (ARMV6_CYCLE_COUNTER == counter) @@ -418,10 +419,11 @@ armv6pmu_read_counter(int counter) return value; } -static inline void -armv6pmu_write_counter(int counter, - u32 value) +static inline void armv6pmu_write_counter(struct perf_event *event, u32 value) { + struct hw_perf_event *hwc = &event->hw; + int counter = hwc->idx; + if (ARMV6_CYCLE_COUNTER == counter) asm volatile("mcr p15, 0, %0, c15, c12, 1" : : "r"(value)); else if (ARMV6_COUNTER0 == counter) @@ -432,12 +434,13 @@ armv6pmu_write_counter(int counter, WARN_ONCE(1, "invalid counter number (%d)\n", counter); } -static void -armv6pmu_enable_event(struct hw_perf_event *hwc, - int idx) +static void armv6pmu_enable_event(struct perf_event *event) { unsigned long val, mask, evt, flags; + struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; struct pmu_hw_events *events = cpu_pmu->get_hw_events(); + int idx = hwc->idx; if (ARMV6_CYCLE_COUNTER == idx) { mask = 0; @@ -473,7 +476,8 @@ armv6pmu_handle_irq(int irq_num, { unsigned long pmcr = armv6_pmcr_read(); struct perf_sample_data data; - struct pmu_hw_events *cpuc; + struct arm_pmu *cpu_pmu = (struct arm_pmu *)dev; + struct pmu_hw_events *cpuc = cpu_pmu->get_hw_events(); struct pt_regs *regs; int idx; @@ -489,7 +493,6 @@ armv6pmu_handle_irq(int irq_num, */ armv6_pmcr_write(pmcr); - cpuc = &__get_cpu_var(cpu_hw_events); for (idx = 0; idx < cpu_pmu->num_events; ++idx) { struct perf_event *event = cpuc->events[idx]; struct hw_perf_event *hwc; @@ -506,13 +509,13 @@ armv6pmu_handle_irq(int irq_num, continue; hwc = &event->hw; - armpmu_event_update(event, hwc, idx); + armpmu_event_update(event); perf_sample_data_init(&data, 0, hwc->last_period); - if (!armpmu_event_set_period(event, hwc, idx)) + if (!armpmu_event_set_period(event)) continue; if (perf_event_overflow(event, &data, regs)) - cpu_pmu->disable(hwc, idx); + cpu_pmu->disable(event); } /* @@ -527,8 +530,7 @@ armv6pmu_handle_irq(int irq_num, return IRQ_HANDLED; } -static void -armv6pmu_start(void) +static void armv6pmu_start(struct arm_pmu *cpu_pmu) { unsigned long flags, val; struct pmu_hw_events *events = cpu_pmu->get_hw_events(); @@ -540,8 +542,7 @@ armv6pmu_start(void) raw_spin_unlock_irqrestore(&events->pmu_lock, flags); } -static void -armv6pmu_stop(void) +static void armv6pmu_stop(struct arm_pmu *cpu_pmu) { unsigned long flags, val; struct pmu_hw_events *events = cpu_pmu->get_hw_events(); @@ -555,10 +556,11 @@ armv6pmu_stop(void) static int armv6pmu_get_event_idx(struct pmu_hw_events *cpuc, - struct hw_perf_event *event) + struct perf_event *event) { + struct hw_perf_event *hwc = &event->hw; /* Always place a cycle counter into the cycle counter. */ - if (ARMV6_PERFCTR_CPU_CYCLES == event->config_base) { + if (ARMV6_PERFCTR_CPU_CYCLES == hwc->config_base) { if (test_and_set_bit(ARMV6_CYCLE_COUNTER, cpuc->used_mask)) return -EAGAIN; @@ -579,12 +581,13 @@ armv6pmu_get_event_idx(struct pmu_hw_events *cpuc, } } -static void -armv6pmu_disable_event(struct hw_perf_event *hwc, - int idx) +static void armv6pmu_disable_event(struct perf_event *event) { unsigned long val, mask, evt, flags; + struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; struct pmu_hw_events *events = cpu_pmu->get_hw_events(); + int idx = hwc->idx; if (ARMV6_CYCLE_COUNTER == idx) { mask = ARMV6_PMCR_CCOUNT_IEN; @@ -613,12 +616,13 @@ armv6pmu_disable_event(struct hw_perf_event *hwc, raw_spin_unlock_irqrestore(&events->pmu_lock, flags); } -static void -armv6mpcore_pmu_disable_event(struct hw_perf_event *hwc, - int idx) +static void armv6mpcore_pmu_disable_event(struct perf_event *event) { unsigned long val, mask, flags, evt = 0; + struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; struct pmu_hw_events *events = cpu_pmu->get_hw_events(); + int idx = hwc->idx; if (ARMV6_CYCLE_COUNTER == idx) { mask = ARMV6_PMCR_CCOUNT_IEN; diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c index b189403f30e..1183c81087b 100644 --- a/arch/arm/kernel/perf_event_v7.c +++ b/arch/arm/kernel/perf_event_v7.c @@ -840,8 +840,10 @@ static inline int armv7_pmnc_select_counter(int idx) return idx; } -static inline u32 armv7pmu_read_counter(int idx) +static inline u32 armv7pmu_read_counter(struct perf_event *event) { + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; u32 value = 0; if (!armv7_pmnc_counter_valid(idx)) @@ -855,8 +857,11 @@ static inline u32 armv7pmu_read_counter(int idx) return value; } -static inline void armv7pmu_write_counter(int idx, u32 value) +static inline void armv7pmu_write_counter(struct perf_event *event, u32 value) { + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + if (!armv7_pmnc_counter_valid(idx)) pr_err("CPU%u writing wrong counter %d\n", smp_processor_id(), idx); @@ -991,10 +996,13 @@ static void armv7_pmnc_dump_regs(void) } #endif -static void armv7pmu_enable_event(struct hw_perf_event *hwc, int idx) +static void armv7pmu_enable_event(struct perf_event *event) { unsigned long flags; + struct hw_perf_event *hwc = &event->hw; + struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); struct pmu_hw_events *events = cpu_pmu->get_hw_events(); + int idx = hwc->idx; /* * Enable counter and interrupt, and set the counter to count @@ -1028,10 +1036,13 @@ static void armv7pmu_enable_event(struct hw_perf_event *hwc, int idx) raw_spin_unlock_irqrestore(&events->pmu_lock, flags); } -static void armv7pmu_disable_event(struct hw_perf_event *hwc, int idx) +static void armv7pmu_disable_event(struct perf_event *event) { unsigned long flags; + struct hw_perf_event *hwc = &event->hw; + struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); struct pmu_hw_events *events = cpu_pmu->get_hw_events(); + int idx = hwc->idx; /* * Disable counter and interrupt @@ -1055,7 +1066,8 @@ static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev) { u32 pmnc; struct perf_sample_data data; - struct pmu_hw_events *cpuc; + struct arm_pmu *cpu_pmu = (struct arm_pmu *)dev; + struct pmu_hw_events *cpuc = cpu_pmu->get_hw_events(); struct pt_regs *regs; int idx; @@ -1075,7 +1087,6 @@ static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev) */ regs = get_irq_regs(); - cpuc = &__get_cpu_var(cpu_hw_events); for (idx = 0; idx < cpu_pmu->num_events; ++idx) { struct perf_event *event = cpuc->events[idx]; struct hw_perf_event *hwc; @@ -1092,13 +1103,13 @@ static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev) continue; hwc = &event->hw; - armpmu_event_update(event, hwc, idx); + armpmu_event_update(event); perf_sample_data_init(&data, 0, hwc->last_period); - if (!armpmu_event_set_period(event, hwc, idx)) + if (!armpmu_event_set_period(event)) continue; if (perf_event_overflow(event, &data, regs)) - cpu_pmu->disable(hwc, idx); + cpu_pmu->disable(event); } /* @@ -1113,7 +1124,7 @@ static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev) return IRQ_HANDLED; } -static void armv7pmu_start(void) +static void armv7pmu_start(struct arm_pmu *cpu_pmu) { unsigned long flags; struct pmu_hw_events *events = cpu_pmu->get_hw_events(); @@ -1124,7 +1135,7 @@ static void armv7pmu_start(void) raw_spin_unlock_irqrestore(&events->pmu_lock, flags); } -static void armv7pmu_stop(void) +static void armv7pmu_stop(struct arm_pmu *cpu_pmu) { unsigned long flags; struct pmu_hw_events *events = cpu_pmu->get_hw_events(); @@ -1136,10 +1147,12 @@ static void armv7pmu_stop(void) } static int armv7pmu_get_event_idx(struct pmu_hw_events *cpuc, - struct hw_perf_event *event) + struct perf_event *event) { int idx; - unsigned long evtype = event->config_base & ARMV7_EVTYPE_EVENT; + struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + unsigned long evtype = hwc->config_base & ARMV7_EVTYPE_EVENT; /* Always place a cycle counter into the cycle counter. */ if (evtype == ARMV7_PERFCTR_CPU_CYCLES) { @@ -1190,11 +1203,14 @@ static int armv7pmu_set_event_filter(struct hw_perf_event *event, static void armv7pmu_reset(void *info) { + struct arm_pmu *cpu_pmu = (struct arm_pmu *)info; u32 idx, nb_cnt = cpu_pmu->num_events; /* The counter and interrupt enable registers are unknown at reset. */ - for (idx = ARMV7_IDX_CYCLE_COUNTER; idx < nb_cnt; ++idx) - armv7pmu_disable_event(NULL, idx); + for (idx = ARMV7_IDX_CYCLE_COUNTER; idx < nb_cnt; ++idx) { + armv7_pmnc_disable_counter(idx); + armv7_pmnc_disable_intens(idx); + } /* Initialize & Reset PMNC: C and P bits */ armv7_pmnc_write(ARMV7_PMNC_P | ARMV7_PMNC_C); diff --git a/arch/arm/kernel/perf_event_xscale.c b/arch/arm/kernel/perf_event_xscale.c index 131ede6c2fd..0c8265e53d5 100644 --- a/arch/arm/kernel/perf_event_xscale.c +++ b/arch/arm/kernel/perf_event_xscale.c @@ -224,7 +224,8 @@ xscale1pmu_handle_irq(int irq_num, void *dev) { unsigned long pmnc; struct perf_sample_data data; - struct pmu_hw_events *cpuc; + struct arm_pmu *cpu_pmu = (struct arm_pmu *)dev; + struct pmu_hw_events *cpuc = cpu_pmu->get_hw_events(); struct pt_regs *regs; int idx; @@ -248,7 +249,6 @@ xscale1pmu_handle_irq(int irq_num, void *dev) regs = get_irq_regs(); - cpuc = &__get_cpu_var(cpu_hw_events); for (idx = 0; idx < cpu_pmu->num_events; ++idx) { struct perf_event *event = cpuc->events[idx]; struct hw_perf_event *hwc; @@ -260,13 +260,13 @@ xscale1pmu_handle_irq(int irq_num, void *dev) continue; hwc = &event->hw; - armpmu_event_update(event, hwc, idx); + armpmu_event_update(event); perf_sample_data_init(&data, 0, hwc->last_period); - if (!armpmu_event_set_period(event, hwc, idx)) + if (!armpmu_event_set_period(event)) continue; if (perf_event_overflow(event, &data, regs)) - cpu_pmu->disable(hwc, idx); + cpu_pmu->disable(event); } irq_work_run(); @@ -280,11 +280,13 @@ xscale1pmu_handle_irq(int irq_num, void *dev) return IRQ_HANDLED; } -static void -xscale1pmu_enable_event(struct hw_perf_event *hwc, int idx) +static void xscale1pmu_enable_event(struct perf_event *event) { unsigned long val, mask, evt, flags; + struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; struct pmu_hw_events *events = cpu_pmu->get_hw_events(); + int idx = hwc->idx; switch (idx) { case XSCALE_CYCLE_COUNTER: @@ -314,11 +316,13 @@ xscale1pmu_enable_event(struct hw_perf_event *hwc, int idx) raw_spin_unlock_irqrestore(&events->pmu_lock, flags); } -static void -xscale1pmu_disable_event(struct hw_perf_event *hwc, int idx) +static void xscale1pmu_disable_event(struct perf_event *event) { unsigned long val, mask, evt, flags; + struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; struct pmu_hw_events *events = cpu_pmu->get_hw_events(); + int idx = hwc->idx; switch (idx) { case XSCALE_CYCLE_COUNTER: @@ -348,9 +352,10 @@ xscale1pmu_disable_event(struct hw_perf_event *hwc, int idx) static int xscale1pmu_get_event_idx(struct pmu_hw_events *cpuc, - struct hw_perf_event *event) + struct perf_event *event) { - if (XSCALE_PERFCTR_CCNT == event->config_base) { + struct hw_perf_event *hwc = &event->hw; + if (XSCALE_PERFCTR_CCNT == hwc->config_base) { if (test_and_set_bit(XSCALE_CYCLE_COUNTER, cpuc->used_mask)) return -EAGAIN; @@ -366,8 +371,7 @@ xscale1pmu_get_event_idx(struct pmu_hw_events *cpuc, } } -static void -xscale1pmu_start(void) +static void xscale1pmu_start(struct arm_pmu *cpu_pmu) { unsigned long flags, val; struct pmu_hw_events *events = cpu_pmu->get_hw_events(); @@ -379,8 +383,7 @@ xscale1pmu_start(void) raw_spin_unlock_irqrestore(&events->pmu_lock, flags); } -static void -xscale1pmu_stop(void) +static void xscale1pmu_stop(struct arm_pmu *cpu_pmu) { unsigned long flags, val; struct pmu_hw_events *events = cpu_pmu->get_hw_events(); @@ -392,9 +395,10 @@ xscale1pmu_stop(void) raw_spin_unlock_irqrestore(&events->pmu_lock, flags); } -static inline u32 -xscale1pmu_read_counter(int counter) +static inline u32 xscale1pmu_read_counter(struct perf_event *event) { + struct hw_perf_event *hwc = &event->hw; + int counter = hwc->idx; u32 val = 0; switch (counter) { @@ -412,9 +416,11 @@ xscale1pmu_read_counter(int counter) return val; } -static inline void -xscale1pmu_write_counter(int counter, u32 val) +static inline void xscale1pmu_write_counter(struct perf_event *event, u32 val) { + struct hw_perf_event *hwc = &event->hw; + int counter = hwc->idx; + switch (counter) { case XSCALE_CYCLE_COUNTER: asm volatile("mcr p14, 0, %0, c1, c0, 0" : : "r" (val)); @@ -565,7 +571,8 @@ xscale2pmu_handle_irq(int irq_num, void *dev) { unsigned long pmnc, of_flags; struct perf_sample_data data; - struct pmu_hw_events *cpuc; + struct arm_pmu *cpu_pmu = (struct arm_pmu *)dev; + struct pmu_hw_events *cpuc = cpu_pmu->get_hw_events(); struct pt_regs *regs; int idx; @@ -583,7 +590,6 @@ xscale2pmu_handle_irq(int irq_num, void *dev) regs = get_irq_regs(); - cpuc = &__get_cpu_var(cpu_hw_events); for (idx = 0; idx < cpu_pmu->num_events; ++idx) { struct perf_event *event = cpuc->events[idx]; struct hw_perf_event *hwc; @@ -595,13 +601,13 @@ xscale2pmu_handle_irq(int irq_num, void *dev) continue; hwc = &event->hw; - armpmu_event_update(event, hwc, idx); + armpmu_event_update(event); perf_sample_data_init(&data, 0, hwc->last_period); - if (!armpmu_event_set_period(event, hwc, idx)) + if (!armpmu_event_set_period(event)) continue; if (perf_event_overflow(event, &data, regs)) - cpu_pmu->disable(hwc, idx); + cpu_pmu->disable(event); } irq_work_run(); @@ -615,11 +621,13 @@ xscale2pmu_handle_irq(int irq_num, void *dev) return IRQ_HANDLED; } -static void -xscale2pmu_enable_event(struct hw_perf_event *hwc, int idx) +static void xscale2pmu_enable_event(struct perf_event *event) { unsigned long flags, ien, evtsel; + struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; struct pmu_hw_events *events = cpu_pmu->get_hw_events(); + int idx = hwc->idx; ien = xscale2pmu_read_int_enable(); evtsel = xscale2pmu_read_event_select(); @@ -659,11 +667,13 @@ xscale2pmu_enable_event(struct hw_perf_event *hwc, int idx) raw_spin_unlock_irqrestore(&events->pmu_lock, flags); } -static void -xscale2pmu_disable_event(struct hw_perf_event *hwc, int idx) +static void xscale2pmu_disable_event(struct perf_event *event) { unsigned long flags, ien, evtsel, of_flags; + struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; struct pmu_hw_events *events = cpu_pmu->get_hw_events(); + int idx = hwc->idx; ien = xscale2pmu_read_int_enable(); evtsel = xscale2pmu_read_event_select(); @@ -711,7 +721,7 @@ xscale2pmu_disable_event(struct hw_perf_event *hwc, int idx) static int xscale2pmu_get_event_idx(struct pmu_hw_events *cpuc, - struct hw_perf_event *event) + struct perf_event *event) { int idx = xscale1pmu_get_event_idx(cpuc, event); if (idx >= 0) @@ -725,8 +735,7 @@ out: return idx; } -static void -xscale2pmu_start(void) +static void xscale2pmu_start(struct arm_pmu *cpu_pmu) { unsigned long flags, val; struct pmu_hw_events *events = cpu_pmu->get_hw_events(); @@ -738,8 +747,7 @@ xscale2pmu_start(void) raw_spin_unlock_irqrestore(&events->pmu_lock, flags); } -static void -xscale2pmu_stop(void) +static void xscale2pmu_stop(struct arm_pmu *cpu_pmu) { unsigned long flags, val; struct pmu_hw_events *events = cpu_pmu->get_hw_events(); @@ -751,9 +759,10 @@ xscale2pmu_stop(void) raw_spin_unlock_irqrestore(&events->pmu_lock, flags); } -static inline u32 -xscale2pmu_read_counter(int counter) +static inline u32 xscale2pmu_read_counter(struct perf_event *event) { + struct hw_perf_event *hwc = &event->hw; + int counter = hwc->idx; u32 val = 0; switch (counter) { @@ -777,9 +786,11 @@ xscale2pmu_read_counter(int counter) return val; } -static inline void -xscale2pmu_write_counter(int counter, u32 val) +static inline void xscale2pmu_write_counter(struct perf_event *event, u32 val) { + struct hw_perf_event *hwc = &event->hw; + int counter = hwc->idx; + switch (counter) { case XSCALE_CYCLE_COUNTER: asm volatile("mcr p14, 0, %0, c1, c1, 0" : : "r" (val)); -- cgit v1.2.3-70-g09d2 From 0305230a3d92d6829db89c9e0c096d4d8733f317 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 21 Sep 2012 14:23:47 +0100 Subject: ARM: perf: consistently use arm_pmu->name for PMU name Perf has three ways to name a PMU: either by passing an explicit char *, reading arm_pmu->name or accessing arm_pmu->pmu.name. Just use arm_pmu->name consistently in the ARM backend. Signed-off-by: Will Deacon --- arch/arm/include/asm/pmu.h | 2 +- arch/arm/kernel/perf_event.c | 4 ++-- arch/arm/kernel/perf_event_cpu.c | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/arm/include/asm') diff --git a/arch/arm/include/asm/pmu.h b/arch/arm/include/asm/pmu.h index a209a384dbc..f24edad26c7 100644 --- a/arch/arm/include/asm/pmu.h +++ b/arch/arm/include/asm/pmu.h @@ -93,7 +93,7 @@ struct arm_pmu { extern const struct dev_pm_ops armpmu_dev_pm_ops; -int armpmu_register(struct arm_pmu *armpmu, char *name, int type); +int armpmu_register(struct arm_pmu *armpmu, int type); u64 armpmu_event_update(struct perf_event *event); diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index 1cfa3f35713..1243deda5bb 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -511,12 +511,12 @@ static void __init armpmu_init(struct arm_pmu *armpmu) }; } -int armpmu_register(struct arm_pmu *armpmu, char *name, int type) +int armpmu_register(struct arm_pmu *armpmu, int type) { armpmu_init(armpmu); pr_info("enabled with %s PMU driver, %d counters available\n", armpmu->name, armpmu->num_events); - return perf_pmu_register(&armpmu->pmu, name, type); + return perf_pmu_register(&armpmu->pmu, armpmu->name, type); } /* diff --git a/arch/arm/kernel/perf_event_cpu.c b/arch/arm/kernel/perf_event_cpu.c index db9c6b530f3..9a4f6307a01 100644 --- a/arch/arm/kernel/perf_event_cpu.c +++ b/arch/arm/kernel/perf_event_cpu.c @@ -46,7 +46,7 @@ const char *perf_pmu_name(void) if (!cpu_pmu) return NULL; - return cpu_pmu->pmu.name; + return cpu_pmu->name; } EXPORT_SYMBOL_GPL(perf_pmu_name); @@ -287,7 +287,7 @@ static int __devinit cpu_pmu_device_probe(struct platform_device *pdev) cpu_pmu = pmu; cpu_pmu->plat_device = pdev; cpu_pmu_init(cpu_pmu); - armpmu_register(cpu_pmu, cpu_pmu->name, PERF_TYPE_RAW); + armpmu_register(cpu_pmu, PERF_TYPE_RAW); return 0; } -- cgit v1.2.3-70-g09d2 From 9e962f76602dbd293a57030f4ce5a4b57853e2ea Mon Sep 17 00:00:00 2001 From: Dietmar Eggemann Date: Wed, 26 Sep 2012 17:28:47 +0100 Subject: ARM: hw_breakpoint: use CRn as argument for debug reg accessor macros The coprocessor register CRn for accesses to the debug register can be a different one than C0. Take this into account for the ARM_DBG_READ and the ARM_DBG_WRITE macro. The inline assembler calls which used a coprocessor register CRn other than C0 are replaced by the ARM_DBG_READ or ARM_DBG_WRITE macro. Tested-by: Stephen Boyd Signed-off-by: Dietmar Eggemann Signed-off-by: Will Deacon --- arch/arm/include/asm/hw_breakpoint.h | 8 ++++---- arch/arm/kernel/hw_breakpoint.c | 40 ++++++++++++++++++------------------ 2 files changed, 24 insertions(+), 24 deletions(-) (limited to 'arch/arm/include/asm') diff --git a/arch/arm/include/asm/hw_breakpoint.h b/arch/arm/include/asm/hw_breakpoint.h index c190bc992f0..01169dd723f 100644 --- a/arch/arm/include/asm/hw_breakpoint.h +++ b/arch/arm/include/asm/hw_breakpoint.h @@ -98,12 +98,12 @@ static inline void decode_ctrl_reg(u32 reg, #define ARM_BASE_WCR 112 /* Accessor macros for the debug registers. */ -#define ARM_DBG_READ(M, OP2, VAL) do {\ - asm volatile("mrc p14, 0, %0, c0," #M ", " #OP2 : "=r" (VAL));\ +#define ARM_DBG_READ(N, M, OP2, VAL) do {\ + asm volatile("mrc p14, 0, %0, " #N "," #M ", " #OP2 : "=r" (VAL));\ } while (0) -#define ARM_DBG_WRITE(M, OP2, VAL) do {\ - asm volatile("mcr p14, 0, %0, c0," #M ", " #OP2 : : "r" (VAL));\ +#define ARM_DBG_WRITE(N, M, OP2, VAL) do {\ + asm volatile("mcr p14, 0, %0, " #N "," #M ", " #OP2 : : "r" (VAL));\ } while (0) struct notifier_block; diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c index 8e9532152bd..05febbaecb4 100644 --- a/arch/arm/kernel/hw_breakpoint.c +++ b/arch/arm/kernel/hw_breakpoint.c @@ -52,14 +52,14 @@ static u8 debug_arch; /* Maximum supported watchpoint length. */ static u8 max_watchpoint_len; -#define READ_WB_REG_CASE(OP2, M, VAL) \ - case ((OP2 << 4) + M): \ - ARM_DBG_READ(c ## M, OP2, VAL); \ +#define READ_WB_REG_CASE(OP2, M, VAL) \ + case ((OP2 << 4) + M): \ + ARM_DBG_READ(c0, c ## M, OP2, VAL); \ break -#define WRITE_WB_REG_CASE(OP2, M, VAL) \ - case ((OP2 << 4) + M): \ - ARM_DBG_WRITE(c ## M, OP2, VAL);\ +#define WRITE_WB_REG_CASE(OP2, M, VAL) \ + case ((OP2 << 4) + M): \ + ARM_DBG_WRITE(c0, c ## M, OP2, VAL); \ break #define GEN_READ_WB_REG_CASES(OP2, VAL) \ @@ -141,7 +141,7 @@ static u8 get_debug_arch(void) return ARM_DEBUG_ARCH_V6; } - ARM_DBG_READ(c0, 0, didr); + ARM_DBG_READ(c0, c0, 0, didr); return (didr >> 16) & 0xf; } @@ -169,7 +169,7 @@ static int debug_exception_updates_fsr(void) static int get_num_wrp_resources(void) { u32 didr; - ARM_DBG_READ(c0, 0, didr); + ARM_DBG_READ(c0, c0, 0, didr); return ((didr >> 28) & 0xf) + 1; } @@ -177,7 +177,7 @@ static int get_num_wrp_resources(void) static int get_num_brp_resources(void) { u32 didr; - ARM_DBG_READ(c0, 0, didr); + ARM_DBG_READ(c0, c0, 0, didr); return ((didr >> 24) & 0xf) + 1; } @@ -231,14 +231,14 @@ static int get_num_brps(void) static int monitor_mode_enabled(void) { u32 dscr; - ARM_DBG_READ(c1, 0, dscr); + ARM_DBG_READ(c0, c1, 0, dscr); return !!(dscr & ARM_DSCR_MDBGEN); } static int enable_monitor_mode(void) { u32 dscr; - ARM_DBG_READ(c1, 0, dscr); + ARM_DBG_READ(c0, c1, 0, dscr); /* If monitor mode is already enabled, just return. */ if (dscr & ARM_DSCR_MDBGEN) @@ -248,11 +248,11 @@ static int enable_monitor_mode(void) switch (get_debug_arch()) { case ARM_DEBUG_ARCH_V6: case ARM_DEBUG_ARCH_V6_1: - ARM_DBG_WRITE(c1, 0, (dscr | ARM_DSCR_MDBGEN)); + ARM_DBG_WRITE(c0, c1, 0, (dscr | ARM_DSCR_MDBGEN)); break; case ARM_DEBUG_ARCH_V7_ECP14: case ARM_DEBUG_ARCH_V7_1: - ARM_DBG_WRITE(c2, 2, (dscr | ARM_DSCR_MDBGEN)); + ARM_DBG_WRITE(c0, c2, 2, (dscr | ARM_DSCR_MDBGEN)); isb(); break; default: @@ -260,7 +260,7 @@ static int enable_monitor_mode(void) } /* Check that the write made it through. */ - ARM_DBG_READ(c1, 0, dscr); + ARM_DBG_READ(c0, c1, 0, dscr); if (WARN_ONCE(!(dscr & ARM_DSCR_MDBGEN), "Failed to enable monitor mode on CPU %d.\n", smp_processor_id())) @@ -853,7 +853,7 @@ static int hw_breakpoint_pending(unsigned long addr, unsigned int fsr, local_irq_enable(); /* We only handle watchpoints and hardware breakpoints. */ - ARM_DBG_READ(c1, 0, dscr); + ARM_DBG_READ(c0, c1, 0, dscr); /* Perform perf callbacks. */ switch (ARM_DSCR_MOE(dscr)) { @@ -921,14 +921,14 @@ static void reset_ctrl_regs(void *unused) * Ensure sticky power-down is clear (i.e. debug logic is * powered up). */ - asm volatile("mrc p14, 0, %0, c1, c5, 4" : "=r" (val)); + ARM_DBG_READ(c1, c5, 4, val); if ((val & 0x1) == 0) err = -EPERM; /* * Check whether we implement OS save and restore. */ - asm volatile("mrc p14, 0, %0, c1, c1, 4" : "=r" (val)); + ARM_DBG_READ(c1, c1, 4, val); if ((val & 0x9) == 0) goto clear_vcr; break; @@ -936,7 +936,7 @@ static void reset_ctrl_regs(void *unused) /* * Ensure the OS double lock is clear. */ - asm volatile("mrc p14, 0, %0, c1, c3, 4" : "=r" (val)); + ARM_DBG_READ(c1, c3, 4, val); if ((val & 0x1) == 1) err = -EPERM; break; @@ -952,7 +952,7 @@ static void reset_ctrl_regs(void *unused) * Unconditionally clear the OS lock by writing a value * other than 0xC5ACCE55 to the access register. */ - asm volatile("mcr p14, 0, %0, c1, c0, 4" : : "r" (0)); + ARM_DBG_WRITE(c1, c0, 4, 0); isb(); /* @@ -960,7 +960,7 @@ static void reset_ctrl_regs(void *unused) * enabling monitor mode. */ clear_vcr: - asm volatile("mcr p14, 0, %0, c0, c7, 0" : : "r" (0)); + ARM_DBG_WRITE(c0, c7, 0, 0); isb(); if (cpumask_intersects(&debug_err_mask, cpumask_of(cpu))) { -- cgit v1.2.3-70-g09d2 From dbf62d50067e55a782583fe53c3d2a3d98b1f6f3 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 19 Jul 2012 11:51:05 +0100 Subject: ARM: mm: introduce L_PTE_VALID for page table entries For long-descriptor translation table formats, the ARMv7 architecture defines the last two bits of the second- and third-level descriptors to be: x0b - Invalid 01b - Block (second-level), Reserved (third-level) 11b - Table (second-level), Page (third-level) This allows us to define L_PTE_PRESENT as (3 << 0) and use this value to create ptes directly. However, when determining whether a given pte value is present in the low-level page table accessors, we only need to check the least significant bit of the descriptor, allowing us to write faulting, present entries which are required for PROT_NONE mappings. This patch introduces L_PTE_VALID, which can be used to test whether a pte should fault, and updates the low-level page table accessors accordingly. Signed-off-by: Will Deacon --- arch/arm/include/asm/pgtable-2level.h | 1 + arch/arm/include/asm/pgtable-3level.h | 3 ++- arch/arm/include/asm/pgtable.h | 4 +--- arch/arm/mm/proc-v7-2level.S | 2 +- arch/arm/mm/proc-v7-3level.S | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) (limited to 'arch/arm/include/asm') diff --git a/arch/arm/include/asm/pgtable-2level.h b/arch/arm/include/asm/pgtable-2level.h index 2317a71c8f8..c44a1ecfc28 100644 --- a/arch/arm/include/asm/pgtable-2level.h +++ b/arch/arm/include/asm/pgtable-2level.h @@ -115,6 +115,7 @@ * The PTE table pointer refers to the hardware entries; the "Linux" * entries are stored 1024 bytes below. */ +#define L_PTE_VALID (_AT(pteval_t, 1) << 0) /* Valid */ #define L_PTE_PRESENT (_AT(pteval_t, 1) << 0) #define L_PTE_YOUNG (_AT(pteval_t, 1) << 1) #define L_PTE_FILE (_AT(pteval_t, 1) << 2) /* only when !PRESENT */ diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h index b24903549d1..e32311a9abc 100644 --- a/arch/arm/include/asm/pgtable-3level.h +++ b/arch/arm/include/asm/pgtable-3level.h @@ -67,7 +67,8 @@ * These bits overlap with the hardware bits but the naming is preserved for * consistency with the classic page table format. */ -#define L_PTE_PRESENT (_AT(pteval_t, 3) << 0) /* Valid */ +#define L_PTE_VALID (_AT(pteval_t, 1) << 0) /* Valid */ +#define L_PTE_PRESENT (_AT(pteval_t, 3) << 0) /* Present */ #define L_PTE_FILE (_AT(pteval_t, 1) << 2) /* only when !PRESENT */ #define L_PTE_USER (_AT(pteval_t, 1) << 6) /* AP[1] */ #define L_PTE_RDONLY (_AT(pteval_t, 1) << 7) /* AP[2] */ diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h index 08c12312a1f..ccf34b6e990 100644 --- a/arch/arm/include/asm/pgtable.h +++ b/arch/arm/include/asm/pgtable.h @@ -203,9 +203,7 @@ static inline pte_t *pmd_page_vaddr(pmd_t pmd) #define pte_exec(pte) (!(pte_val(pte) & L_PTE_XN)) #define pte_special(pte) (0) -#define pte_present_user(pte) \ - ((pte_val(pte) & (L_PTE_PRESENT | L_PTE_USER)) == \ - (L_PTE_PRESENT | L_PTE_USER)) +#define pte_present_user(pte) (pte_present(pte) && (pte_val(pte) & L_PTE_USER)) #if __LINUX_ARM_ARCH__ < 6 static inline void __sync_icache_dcache(pte_t pteval) diff --git a/arch/arm/mm/proc-v7-2level.S b/arch/arm/mm/proc-v7-2level.S index e37600b91b2..e755e9f8d1b 100644 --- a/arch/arm/mm/proc-v7-2level.S +++ b/arch/arm/mm/proc-v7-2level.S @@ -100,7 +100,7 @@ ENTRY(cpu_v7_set_pte_ext) orrne r3, r3, #PTE_EXT_XN tst r1, #L_PTE_YOUNG - tstne r1, #L_PTE_PRESENT + tstne r1, #L_PTE_VALID moveq r3, #0 ARM( str r3, [r0, #2048]! ) diff --git a/arch/arm/mm/proc-v7-3level.S b/arch/arm/mm/proc-v7-3level.S index 8de0f1dd154..d23d067e190 100644 --- a/arch/arm/mm/proc-v7-3level.S +++ b/arch/arm/mm/proc-v7-3level.S @@ -65,7 +65,7 @@ ENDPROC(cpu_v7_switch_mm) */ ENTRY(cpu_v7_set_pte_ext) #ifdef CONFIG_MMU - tst r2, #L_PTE_PRESENT + tst r2, #L_PTE_VALID beq 1f tst r3, #1 << (55 - 32) @ L_PTE_DIRTY orreq r2, #L_PTE_RDONLY -- cgit v1.2.3-70-g09d2 From 26ffd0d43b186b0d5186354da8714a1c2d360df0 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Sat, 1 Sep 2012 05:22:12 +0100 Subject: ARM: mm: introduce present, faulting entries for PAGE_NONE PROT_NONE mappings apply the page protection attributes defined by _P000 which translate to PAGE_NONE for ARM. These attributes specify an XN, RDONLY pte that is inaccessible to userspace. However, on kernels configured without support for domains, such a pte *is* accessible to the kernel and can be read via get_user, allowing tasks to read PROT_NONE pages via syscalls such as read/write over a pipe. This patch introduces a new software pte flag, L_PTE_NONE, that is set to identify faulting, present entries. Signed-off-by: Will Deacon --- arch/arm/include/asm/pgtable-2level.h | 1 + arch/arm/include/asm/pgtable-3level.h | 1 + arch/arm/include/asm/pgtable.h | 6 +++--- arch/arm/mm/proc-macros.S | 4 ++++ arch/arm/mm/proc-v7-2level.S | 4 ++++ arch/arm/mm/proc-v7-3level.S | 3 +++ 6 files changed, 16 insertions(+), 3 deletions(-) (limited to 'arch/arm/include/asm') diff --git a/arch/arm/include/asm/pgtable-2level.h b/arch/arm/include/asm/pgtable-2level.h index c44a1ecfc28..f97ee02386e 100644 --- a/arch/arm/include/asm/pgtable-2level.h +++ b/arch/arm/include/asm/pgtable-2level.h @@ -124,6 +124,7 @@ #define L_PTE_USER (_AT(pteval_t, 1) << 8) #define L_PTE_XN (_AT(pteval_t, 1) << 9) #define L_PTE_SHARED (_AT(pteval_t, 1) << 10) /* shared(v6), coherent(xsc3) */ +#define L_PTE_NONE (_AT(pteval_t, 1) << 11) /* * These are the memory types, defined to be compatible with diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h index e32311a9abc..a3f37929940 100644 --- a/arch/arm/include/asm/pgtable-3level.h +++ b/arch/arm/include/asm/pgtable-3level.h @@ -77,6 +77,7 @@ #define L_PTE_XN (_AT(pteval_t, 1) << 54) /* XN */ #define L_PTE_DIRTY (_AT(pteval_t, 1) << 55) /* unused */ #define L_PTE_SPECIAL (_AT(pteval_t, 1) << 56) /* unused */ +#define L_PTE_NONE (_AT(pteval_t, 1) << 57) /* PROT_NONE */ /* * To be used in assembly code with the upper page attributes. diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h index ccf34b6e990..9c82f988c0e 100644 --- a/arch/arm/include/asm/pgtable.h +++ b/arch/arm/include/asm/pgtable.h @@ -73,7 +73,7 @@ extern pgprot_t pgprot_kernel; #define _MOD_PROT(p, b) __pgprot(pgprot_val(p) | (b)) -#define PAGE_NONE _MOD_PROT(pgprot_user, L_PTE_XN | L_PTE_RDONLY) +#define PAGE_NONE _MOD_PROT(pgprot_user, L_PTE_XN | L_PTE_RDONLY | L_PTE_NONE) #define PAGE_SHARED _MOD_PROT(pgprot_user, L_PTE_USER | L_PTE_XN) #define PAGE_SHARED_EXEC _MOD_PROT(pgprot_user, L_PTE_USER) #define PAGE_COPY _MOD_PROT(pgprot_user, L_PTE_USER | L_PTE_RDONLY | L_PTE_XN) @@ -83,7 +83,7 @@ extern pgprot_t pgprot_kernel; #define PAGE_KERNEL _MOD_PROT(pgprot_kernel, L_PTE_XN) #define PAGE_KERNEL_EXEC pgprot_kernel -#define __PAGE_NONE __pgprot(_L_PTE_DEFAULT | L_PTE_RDONLY | L_PTE_XN) +#define __PAGE_NONE __pgprot(_L_PTE_DEFAULT | L_PTE_RDONLY | L_PTE_XN | L_PTE_NONE) #define __PAGE_SHARED __pgprot(_L_PTE_DEFAULT | L_PTE_USER | L_PTE_XN) #define __PAGE_SHARED_EXEC __pgprot(_L_PTE_DEFAULT | L_PTE_USER) #define __PAGE_COPY __pgprot(_L_PTE_DEFAULT | L_PTE_USER | L_PTE_RDONLY | L_PTE_XN) @@ -240,7 +240,7 @@ static inline pte_t pte_mkspecial(pte_t pte) { return pte; } static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) { - const pteval_t mask = L_PTE_XN | L_PTE_RDONLY | L_PTE_USER; + const pteval_t mask = L_PTE_XN | L_PTE_RDONLY | L_PTE_USER | L_PTE_NONE; pte_val(pte) = (pte_val(pte) & ~mask) | (pgprot_val(newprot) & mask); return pte; } diff --git a/arch/arm/mm/proc-macros.S b/arch/arm/mm/proc-macros.S index b29a2265af0..eb6aa73bc8b 100644 --- a/arch/arm/mm/proc-macros.S +++ b/arch/arm/mm/proc-macros.S @@ -167,6 +167,10 @@ tst r1, #L_PTE_YOUNG tstne r1, #L_PTE_PRESENT moveq r3, #0 +#ifndef CONFIG_CPU_USE_DOMAINS + tstne r1, #L_PTE_NONE + movne r3, #0 +#endif str r3, [r0] mcr p15, 0, r0, c7, c10, 1 @ flush_pte diff --git a/arch/arm/mm/proc-v7-2level.S b/arch/arm/mm/proc-v7-2level.S index e755e9f8d1b..6d98c13ab82 100644 --- a/arch/arm/mm/proc-v7-2level.S +++ b/arch/arm/mm/proc-v7-2level.S @@ -101,6 +101,10 @@ ENTRY(cpu_v7_set_pte_ext) tst r1, #L_PTE_YOUNG tstne r1, #L_PTE_VALID +#ifndef CONFIG_CPU_USE_DOMAINS + eorne r1, r1, #L_PTE_NONE + tstne r1, #L_PTE_NONE +#endif moveq r3, #0 ARM( str r3, [r0, #2048]! ) diff --git a/arch/arm/mm/proc-v7-3level.S b/arch/arm/mm/proc-v7-3level.S index d23d067e190..7b56386f949 100644 --- a/arch/arm/mm/proc-v7-3level.S +++ b/arch/arm/mm/proc-v7-3level.S @@ -67,6 +67,9 @@ ENTRY(cpu_v7_set_pte_ext) #ifdef CONFIG_MMU tst r2, #L_PTE_VALID beq 1f + tst r3, #1 << (57 - 32) @ L_PTE_NONE + bicne r2, #L_PTE_VALID + bne 1f tst r3, #1 << (55 - 32) @ L_PTE_DIRTY orreq r2, #L_PTE_RDONLY 1: strd r2, r3, [r0] -- cgit v1.2.3-70-g09d2 From f600b9fcd2bcb8ee0adb235f54ccdd93c729c442 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 15 Nov 2012 21:28:43 +0000 Subject: ARM: cti: fix manipulation of debug lock registers The LOCKSTATUS register for memory-mapped coresight devices indicates whether or not the device in question implements hardware locking. If not, locking is not present (i.e. LSR.SLI == 0) and LAR is write-ignore, so software doesn't actually need to check the status register at all. This patch removes the broken LSR checks. Cc: Ming Lei Reported-by: Mike Williams Signed-off-by: Will Deacon --- arch/arm/include/asm/cti.h | 20 ++------------------ 1 file changed, 2 insertions(+), 18 deletions(-) (limited to 'arch/arm/include/asm') diff --git a/arch/arm/include/asm/cti.h b/arch/arm/include/asm/cti.h index a0ada3ea435..f2e5cad3f30 100644 --- a/arch/arm/include/asm/cti.h +++ b/arch/arm/include/asm/cti.h @@ -146,15 +146,7 @@ static inline void cti_irq_ack(struct cti *cti) */ static inline void cti_unlock(struct cti *cti) { - void __iomem *base = cti->base; - unsigned long val; - - val = __raw_readl(base + LOCKSTATUS); - - if (val & 1) { - val = LOCKCODE; - __raw_writel(val, base + LOCKACCESS); - } + __raw_writel(LOCKCODE, cti->base + LOCKACCESS); } /** @@ -166,14 +158,6 @@ static inline void cti_unlock(struct cti *cti) */ static inline void cti_lock(struct cti *cti) { - void __iomem *base = cti->base; - unsigned long val; - - val = __raw_readl(base + LOCKSTATUS); - - if (!(val & 1)) { - val = ~LOCKCODE; - __raw_writel(val, base + LOCKACCESS); - } + __raw_writel(~LOCKCODE, cti->base + LOCKACCESS); } #endif -- cgit v1.2.3-70-g09d2 From e8d432c9cf0a3d569b2d00877d12c9ffe9e55286 Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Tue, 6 Nov 2012 11:57:43 +0000 Subject: ARM: kernel: add MIDR to per-CPU information data The advent of big.LITTLE ARM platforms requires the kernel to be able to identify the MIDRs of all online CPUs upon request. MIDRs are stashed at boot time so that kernel subsystems can detect the MIDR of online CPUs by simply retrieving per-CPU data updated by all booted CPUs. Signed-off-by: Lorenzo Pieralisi Acked-by: Nicolas Pitre --- arch/arm/include/asm/cpu.h | 1 + arch/arm/kernel/smp.c | 1 + 2 files changed, 2 insertions(+) (limited to 'arch/arm/include/asm') diff --git a/arch/arm/include/asm/cpu.h b/arch/arm/include/asm/cpu.h index d797223b39d..2744f060255 100644 --- a/arch/arm/include/asm/cpu.h +++ b/arch/arm/include/asm/cpu.h @@ -15,6 +15,7 @@ struct cpuinfo_arm { struct cpu cpu; + u32 cpuid; #ifdef CONFIG_SMP unsigned int loops_per_jiffy; #endif diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index fbc8b2623d8..7eacd84cdc9 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -281,6 +281,7 @@ static void __cpuinit smp_store_cpu_info(unsigned int cpuid) struct cpuinfo_arm *cpu_info = &per_cpu(cpu_data, cpuid); cpu_info->loops_per_jiffy = loops_per_jiffy; + cpu_info->cpuid = read_cpuid_id(); store_cpu_topology(cpuid); } -- cgit v1.2.3-70-g09d2 From dca463daa0151c5bbbd8ec8fd42882a3966d3c44 Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Thu, 15 Nov 2012 17:30:32 +0000 Subject: ARM: kernel: enhance MPIDR macro definitions Kernel subsystems other than the topology layer need the MPIDR mask definitions to access the MPIDR without relying on hardcoded masks. This patch moves the MPIDR register masks definition to a header file and defines a macro to simplify access to MPIDR bit fields representing affinity levels. Signed-off-by: Lorenzo Pieralisi Acked-by: Will Deacon Acked-by: Nicolas Pitre --- arch/arm/include/asm/cputype.h | 13 +++++++++++++ arch/arm/kernel/topology.c | 27 +-------------------------- 2 files changed, 14 insertions(+), 26 deletions(-) (limited to 'arch/arm/include/asm') diff --git a/arch/arm/include/asm/cputype.h b/arch/arm/include/asm/cputype.h index cb47d28cbe1..a59dcb5ab5f 100644 --- a/arch/arm/include/asm/cputype.h +++ b/arch/arm/include/asm/cputype.h @@ -25,6 +25,19 @@ #define CPUID_EXT_ISAR4 "c2, 4" #define CPUID_EXT_ISAR5 "c2, 5" +#define MPIDR_SMP_BITMASK (0x3 << 30) +#define MPIDR_SMP_VALUE (0x2 << 30) + +#define MPIDR_MT_BITMASK (0x1 << 24) + +#define MPIDR_HWID_BITMASK 0xFFFFFF + +#define MPIDR_LEVEL_BITS 8 +#define MPIDR_LEVEL_MASK ((1 << MPIDR_LEVEL_BITS) - 1) + +#define MPIDR_AFFINITY_LEVEL(mpidr, level) \ + ((mpidr >> (MPIDR_LEVEL_BITS * level)) & MPIDR_LEVEL_MASK) + extern unsigned int processor_id; #ifdef CONFIG_CPU_CP15 diff --git a/arch/arm/kernel/topology.c b/arch/arm/kernel/topology.c index 26c12c6440f..cd68d1aa9c3 100644 --- a/arch/arm/kernel/topology.c +++ b/arch/arm/kernel/topology.c @@ -196,32 +196,7 @@ static inline void parse_dt_topology(void) {} static inline void update_cpu_power(unsigned int cpuid, unsigned int mpidr) {} #endif - -/* - * cpu topology management - */ - -#define MPIDR_SMP_BITMASK (0x3 << 30) -#define MPIDR_SMP_VALUE (0x2 << 30) - -#define MPIDR_MT_BITMASK (0x1 << 24) - -/* - * These masks reflect the current use of the affinity levels. - * The affinity level can be up to 16 bits according to ARM ARM - */ -#define MPIDR_HWID_BITMASK 0xFFFFFF - -#define MPIDR_LEVEL0_MASK 0x3 -#define MPIDR_LEVEL0_SHIFT 0 - -#define MPIDR_LEVEL1_MASK 0xF -#define MPIDR_LEVEL1_SHIFT 8 - -#define MPIDR_LEVEL2_MASK 0xFF -#define MPIDR_LEVEL2_SHIFT 16 - -/* + /* * cpu topology table */ struct cputopo_arm cpu_topology[NR_CPUS]; -- cgit v1.2.3-70-g09d2 From a0ae02405076ac32bd17ece976e914b5b6075bb0 Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Thu, 17 Nov 2011 17:31:51 +0000 Subject: ARM: kernel: add device tree init map function When booting through a device tree, the kernel cpu logical id map can be initialized using device tree data passed by FW or through an embedded blob. This patch adds a function that parses device tree "cpu" nodes and retrieves the corresponding CPUs hardware identifiers (MPIDR). It sets the possible cpus and the cpu logical map values according to the number of CPUs defined in the device tree and respective properties. The device tree HW identifiers are considered valid if all CPU nodes contain a "reg" property, there are no duplicate "reg" entries and the DT defines a CPU node whose "reg" property matches the MPIDR[23:0] of the boot CPU. The primary CPU is assigned cpu logical number 0 to keep the current convention valid. Current bindings documentation is included in the patch: Documentation/devicetree/bindings/arm/cpus.txt Signed-off-by: Lorenzo Pieralisi Acked-by: Nicolas Pitre --- Documentation/devicetree/bindings/arm/cpus.txt | 77 +++++++++++++++++++ arch/arm/include/asm/prom.h | 2 + arch/arm/kernel/devtree.c | 100 +++++++++++++++++++++++++ 3 files changed, 179 insertions(+) create mode 100644 Documentation/devicetree/bindings/arm/cpus.txt (limited to 'arch/arm/include/asm') diff --git a/Documentation/devicetree/bindings/arm/cpus.txt b/Documentation/devicetree/bindings/arm/cpus.txt new file mode 100644 index 00000000000..f32494dbfe1 --- /dev/null +++ b/Documentation/devicetree/bindings/arm/cpus.txt @@ -0,0 +1,77 @@ +* ARM CPUs binding description + +The device tree allows to describe the layout of CPUs in a system through +the "cpus" node, which in turn contains a number of subnodes (ie "cpu") +defining properties for every cpu. + +Bindings for CPU nodes follow the ePAPR standard, available from: + +http://devicetree.org + +For the ARM architecture every CPU node must contain the following properties: + +- device_type: must be "cpu" +- reg: property matching the CPU MPIDR[23:0] register bits + reg[31:24] bits must be set to 0 +- compatible: should be one of: + "arm,arm1020" + "arm,arm1020e" + "arm,arm1022" + "arm,arm1026" + "arm,arm720" + "arm,arm740" + "arm,arm7tdmi" + "arm,arm920" + "arm,arm922" + "arm,arm925" + "arm,arm926" + "arm,arm940" + "arm,arm946" + "arm,arm9tdmi" + "arm,cortex-a5" + "arm,cortex-a7" + "arm,cortex-a8" + "arm,cortex-a9" + "arm,cortex-a15" + "arm,arm1136" + "arm,arm1156" + "arm,arm1176" + "arm,arm11mpcore" + "faraday,fa526" + "intel,sa110" + "intel,sa1100" + "marvell,feroceon" + "marvell,mohawk" + "marvell,xsc3" + "marvell,xscale" + +Example: + + cpus { + #size-cells = <0>; + #address-cells = <1>; + + CPU0: cpu@0 { + device_type = "cpu"; + compatible = "arm,cortex-a15"; + reg = <0x0>; + }; + + CPU1: cpu@1 { + device_type = "cpu"; + compatible = "arm,cortex-a15"; + reg = <0x1>; + }; + + CPU2: cpu@100 { + device_type = "cpu"; + compatible = "arm,cortex-a7"; + reg = <0x100>; + }; + + CPU3: cpu@101 { + device_type = "cpu"; + compatible = "arm,cortex-a7"; + reg = <0x101>; + }; + }; diff --git a/arch/arm/include/asm/prom.h b/arch/arm/include/asm/prom.h index aeae9c609df..8dd51dc1a36 100644 --- a/arch/arm/include/asm/prom.h +++ b/arch/arm/include/asm/prom.h @@ -15,6 +15,7 @@ extern struct machine_desc *setup_machine_fdt(unsigned int dt_phys); extern void arm_dt_memblock_reserve(void); +extern void __init arm_dt_init_cpu_maps(void); #else /* CONFIG_OF */ @@ -24,6 +25,7 @@ static inline struct machine_desc *setup_machine_fdt(unsigned int dt_phys) } static inline void arm_dt_memblock_reserve(void) { } +static inline void arm_dt_init_cpu_maps(void) { } #endif /* CONFIG_OF */ #endif /* ASMARM_PROM_H */ diff --git a/arch/arm/kernel/devtree.c b/arch/arm/kernel/devtree.c index bee7f9d47f0..aaf9add497f 100644 --- a/arch/arm/kernel/devtree.c +++ b/arch/arm/kernel/devtree.c @@ -19,8 +19,10 @@ #include #include +#include #include #include +#include #include #include @@ -61,6 +63,104 @@ void __init arm_dt_memblock_reserve(void) } } +/* + * arm_dt_init_cpu_maps - Function retrieves cpu nodes from the device tree + * and builds the cpu logical map array containing MPIDR values related to + * logical cpus + * + * Updates the cpu possible mask with the number of parsed cpu nodes + */ +void __init arm_dt_init_cpu_maps(void) +{ + /* + * Temp logical map is initialized with UINT_MAX values that are + * considered invalid logical map entries since the logical map must + * contain a list of MPIDR[23:0] values where MPIDR[31:24] must + * read as 0. + */ + struct device_node *cpu, *cpus; + u32 i, j, cpuidx = 1; + u32 mpidr = is_smp() ? read_cpuid_mpidr() & MPIDR_HWID_BITMASK : 0; + + u32 tmp_map[NR_CPUS] = { [0 ... NR_CPUS-1] = UINT_MAX }; + bool bootcpu_valid = false; + cpus = of_find_node_by_path("/cpus"); + + if (!cpus) + return; + + for_each_child_of_node(cpus, cpu) { + u32 hwid; + + pr_debug(" * %s...\n", cpu->full_name); + /* + * A device tree containing CPU nodes with missing "reg" + * properties is considered invalid to build the + * cpu_logical_map. + */ + if (of_property_read_u32(cpu, "reg", &hwid)) { + pr_debug(" * %s missing reg property\n", + cpu->full_name); + return; + } + + /* + * 8 MSBs must be set to 0 in the DT since the reg property + * defines the MPIDR[23:0]. + */ + if (hwid & ~MPIDR_HWID_BITMASK) + return; + + /* + * Duplicate MPIDRs are a recipe for disaster. + * Scan all initialized entries and check for + * duplicates. If any is found just bail out. + * temp values were initialized to UINT_MAX + * to avoid matching valid MPIDR[23:0] values. + */ + for (j = 0; j < cpuidx; j++) + if (WARN(tmp_map[j] == hwid, "Duplicate /cpu reg " + "properties in the DT\n")) + return; + + /* + * Build a stashed array of MPIDR values. Numbering scheme + * requires that if detected the boot CPU must be assigned + * logical id 0. Other CPUs get sequential indexes starting + * from 1. If a CPU node with a reg property matching the + * boot CPU MPIDR is detected, this is recorded so that the + * logical map built from DT is validated and can be used + * to override the map created in smp_setup_processor_id(). + */ + if (hwid == mpidr) { + i = 0; + bootcpu_valid = true; + } else { + i = cpuidx++; + } + + tmp_map[i] = hwid; + + if (cpuidx > nr_cpu_ids) + break; + } + + if (WARN(!bootcpu_valid, "DT missing boot CPU MPIDR[23:0], " + "fall back to default cpu_logical_map\n")) + return; + + /* + * Since the boot CPU node contains proper data, and all nodes have + * a reg property, the DT CPU list can be considered valid and the + * logical map created in smp_setup_processor_id() can be overridden + */ + for (i = 0; i < cpuidx; i++) { + set_cpu_possible(i, true); + cpu_logical_map(i) = tmp_map[i]; + pr_debug("cpu logical map 0x%x\n", cpu_logical_map(i)); + } +} + /** * setup_machine_fdt - Machine setup when an dtb was passed to the kernel * @dt_phys: physical address of dt blob -- cgit v1.2.3-70-g09d2 From 7f124aaf01439d2fa54283f3c375ce3b9fc776d4 Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Thu, 17 Nov 2011 17:36:24 +0000 Subject: ARM: kernel: add logical mappings look-up In ARM SMP systems the MPIDR register ([23:0] bits) is used to uniquely identify CPUs. In order to retrieve the logical CPU index corresponding to a given MPIDR value and guarantee a consistent translation throughout the kernel, this patch adds a look-up based on the MPIDR[23:0] so that kernel subsystems can use it whenever the logical cpu index corresponding to a given MPIDR value is needed. Signed-off-by: Lorenzo Pieralisi Acked-by: Will Deacon Acked-by: Nicolas Pitre --- arch/arm/include/asm/smp_plat.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'arch/arm/include/asm') diff --git a/arch/arm/include/asm/smp_plat.h b/arch/arm/include/asm/smp_plat.h index 558d6c80aca..aaa61b6f50f 100644 --- a/arch/arm/include/asm/smp_plat.h +++ b/arch/arm/include/asm/smp_plat.h @@ -5,6 +5,9 @@ #ifndef __ASMARM_SMP_PLAT_H #define __ASMARM_SMP_PLAT_H +#include +#include + #include /* @@ -48,5 +51,19 @@ static inline int cache_ops_need_broadcast(void) */ extern int __cpu_logical_map[]; #define cpu_logical_map(cpu) __cpu_logical_map[cpu] +/* + * Retrieve logical cpu index corresponding to a given MPIDR[23:0] + * - mpidr: MPIDR[23:0] to be used for the look-up + * + * Returns the cpu logical index or -EINVAL on look-up error + */ +static inline int get_logical_index(u32 mpidr) +{ + int cpu; + for (cpu = 0; cpu < nr_cpu_ids; cpu++) + if (cpu_logical_map(cpu) == mpidr) + return cpu; + return -EINVAL; +} #endif -- cgit v1.2.3-70-g09d2 From c7cc504bc351e41e871e317ca7f032f4562f34ad Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 22 Nov 2012 13:05:55 +0100 Subject: ARM: 7584/1: perf: fix link error when CONFIG_HW_PERF_EVENTS is not selected Commit e50c541 (ARM: perf: add guest vs host discrimination) broken the link as perf_instruction_pointer and perf_misc_flags are not defined when CONFIG_HW_PERF_EVENTS is not selected. As it make little sense to try and profile a guest without any HW event, just fallback to the original code when this config option is not selected. Reported-by: Russell King Acked-by: Will Deacon Signed-off-by: Marc Zyngier Signed-off-by: Russell King --- arch/arm/include/asm/perf_event.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/arm/include/asm') diff --git a/arch/arm/include/asm/perf_event.h b/arch/arm/include/asm/perf_event.h index 00416edecea..755877527cf 100644 --- a/arch/arm/include/asm/perf_event.h +++ b/arch/arm/include/asm/perf_event.h @@ -21,9 +21,11 @@ #define C(_x) PERF_COUNT_HW_CACHE_##_x #define CACHE_OP_UNSUPPORTED 0xFFFF +#ifdef CONFIG_HW_PERF_EVENTS struct pt_regs; extern unsigned long perf_instruction_pointer(struct pt_regs *regs); extern unsigned long perf_misc_flags(struct pt_regs *regs); #define perf_misc_flags(regs) perf_misc_flags(regs) +#endif #endif /* __ARM_PERF_EVENT_H__ */ -- cgit v1.2.3-70-g09d2 From 3e99675af1b25a191c467700499b1cbe5585a778 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Sun, 25 Nov 2012 03:24:32 +0100 Subject: ARM: 7582/2: rename kvm_seq to vmalloc_seq so to avoid confusion with KVM The kvm_seq value has nothing to do what so ever with this other KVM. Given that KVM support on ARM is imminent, it's best to rename kvm_seq into something else to clearly identify what it is about i.e. a sequence number for vmalloc section mappings. Signed-off-by: Nicolas Pitre Signed-off-by: Russell King --- arch/arm/include/asm/mmu.h | 2 +- arch/arm/include/asm/mmu_context.h | 6 +++--- arch/arm/mm/context.c | 4 ++-- arch/arm/mm/ioremap.c | 16 ++++++++-------- 4 files changed, 14 insertions(+), 14 deletions(-) (limited to 'arch/arm/include/asm') diff --git a/arch/arm/include/asm/mmu.h b/arch/arm/include/asm/mmu.h index 5b53b53ab5c..9f77e7804f3 100644 --- a/arch/arm/include/asm/mmu.h +++ b/arch/arm/include/asm/mmu.h @@ -7,7 +7,7 @@ typedef struct { #ifdef CONFIG_CPU_HAS_ASID u64 id; #endif - unsigned int kvm_seq; + unsigned int vmalloc_seq; } mm_context_t; #ifdef CONFIG_CPU_HAS_ASID diff --git a/arch/arm/include/asm/mmu_context.h b/arch/arm/include/asm/mmu_context.h index a64f61cb23d..e1f644bc7cc 100644 --- a/arch/arm/include/asm/mmu_context.h +++ b/arch/arm/include/asm/mmu_context.h @@ -20,7 +20,7 @@ #include #include -void __check_kvm_seq(struct mm_struct *mm); +void __check_vmalloc_seq(struct mm_struct *mm); #ifdef CONFIG_CPU_HAS_ASID @@ -34,8 +34,8 @@ void check_and_switch_context(struct mm_struct *mm, struct task_struct *tsk); static inline void check_and_switch_context(struct mm_struct *mm, struct task_struct *tsk) { - if (unlikely(mm->context.kvm_seq != init_mm.context.kvm_seq)) - __check_kvm_seq(mm); + if (unlikely(mm->context.vmalloc_seq != init_mm.context.vmalloc_seq)) + __check_vmalloc_seq(mm); if (irqs_disabled()) /* diff --git a/arch/arm/mm/context.c b/arch/arm/mm/context.c index 7a27d7363be..bc4a5e9ebb7 100644 --- a/arch/arm/mm/context.c +++ b/arch/arm/mm/context.c @@ -186,8 +186,8 @@ void check_and_switch_context(struct mm_struct *mm, struct task_struct *tsk) unsigned long flags; unsigned int cpu = smp_processor_id(); - if (unlikely(mm->context.kvm_seq != init_mm.context.kvm_seq)) - __check_kvm_seq(mm); + if (unlikely(mm->context.vmalloc_seq != init_mm.context.vmalloc_seq)) + __check_vmalloc_seq(mm); /* * Required during context switch to avoid speculative page table diff --git a/arch/arm/mm/ioremap.c b/arch/arm/mm/ioremap.c index 5dcc2fd46c4..88fd86cf3d9 100644 --- a/arch/arm/mm/ioremap.c +++ b/arch/arm/mm/ioremap.c @@ -47,18 +47,18 @@ int ioremap_page(unsigned long virt, unsigned long phys, } EXPORT_SYMBOL(ioremap_page); -void __check_kvm_seq(struct mm_struct *mm) +void __check_vmalloc_seq(struct mm_struct *mm) { unsigned int seq; do { - seq = init_mm.context.kvm_seq; + seq = init_mm.context.vmalloc_seq; memcpy(pgd_offset(mm, VMALLOC_START), pgd_offset_k(VMALLOC_START), sizeof(pgd_t) * (pgd_index(VMALLOC_END) - pgd_index(VMALLOC_START))); - mm->context.kvm_seq = seq; - } while (seq != init_mm.context.kvm_seq); + mm->context.vmalloc_seq = seq; + } while (seq != init_mm.context.vmalloc_seq); } #if !defined(CONFIG_SMP) && !defined(CONFIG_ARM_LPAE) @@ -89,13 +89,13 @@ static void unmap_area_sections(unsigned long virt, unsigned long size) if (!pmd_none(pmd)) { /* * Clear the PMD from the page table, and - * increment the kvm sequence so others + * increment the vmalloc sequence so others * notice this change. * * Note: this is still racy on SMP machines. */ pmd_clear(pmdp); - init_mm.context.kvm_seq++; + init_mm.context.vmalloc_seq++; /* * Free the page table, if there was one. @@ -112,8 +112,8 @@ static void unmap_area_sections(unsigned long virt, unsigned long size) * Ensure that the active_mm is up to date - we want to * catch any use-after-iounmap cases. */ - if (current->active_mm->context.kvm_seq != init_mm.context.kvm_seq) - __check_kvm_seq(current->active_mm); + if (current->active_mm->context.vmalloc_seq != init_mm.context.vmalloc_seq) + __check_vmalloc_seq(current->active_mm); flush_tlb_kernel_range(virt, end); } -- cgit v1.2.3-70-g09d2 From 14318efb322e2fe1a034c69463d725209eb9d548 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Thu, 29 Nov 2012 20:39:54 +0100 Subject: ARM: 7587/1: implement optimized percpu variable access Use the previously unused TPIDRPRW register to store percpu offsets. TPIDRPRW is only accessible in PL1, so it can only be used in the kernel. This replaces 2 loads with a mrc instruction for each percpu variable access. With hackbench, the performance improvement is 1.4% on Cortex-A9 (highbank). Taking an average of 30 runs of "hackbench -l 1000" yields: Before: 6.2191 After: 6.1348 Will Deacon reported similar delta on v6 with 11MPCore. The asm "memory clobber" are needed here to ensure the percpu offset gets reloaded. Testing by Will found that this would not happen in __schedule() which is a bit of a special case as preemption is disabled but the execution can move cores. Signed-off-by: Rob Herring Acked-by: Will Deacon Acked-by: Nicolas Pitre Signed-off-by: Russell King --- arch/arm/include/asm/Kbuild | 1 - arch/arm/include/asm/percpu.h | 45 +++++++++++++++++++++++++++++++++++++++++++ arch/arm/kernel/setup.c | 6 ++++++ arch/arm/kernel/smp.c | 4 +++- 4 files changed, 54 insertions(+), 2 deletions(-) create mode 100644 arch/arm/include/asm/percpu.h (limited to 'arch/arm/include/asm') diff --git a/arch/arm/include/asm/Kbuild b/arch/arm/include/asm/Kbuild index f70ae175a3d..2ffdaacd461 100644 --- a/arch/arm/include/asm/Kbuild +++ b/arch/arm/include/asm/Kbuild @@ -16,7 +16,6 @@ generic-y += local64.h generic-y += msgbuf.h generic-y += param.h generic-y += parport.h -generic-y += percpu.h generic-y += poll.h generic-y += resource.h generic-y += sections.h diff --git a/arch/arm/include/asm/percpu.h b/arch/arm/include/asm/percpu.h new file mode 100644 index 00000000000..968c0a14e0a --- /dev/null +++ b/arch/arm/include/asm/percpu.h @@ -0,0 +1,45 @@ +/* + * Copyright 2012 Calxeda, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ +#ifndef _ASM_ARM_PERCPU_H_ +#define _ASM_ARM_PERCPU_H_ + +/* + * Same as asm-generic/percpu.h, except that we store the per cpu offset + * in the TPIDRPRW. TPIDRPRW only exists on V6K and V7 + */ +#if defined(CONFIG_SMP) && !defined(CONFIG_CPU_V6) +static inline void set_my_cpu_offset(unsigned long off) +{ + /* Set TPIDRPRW */ + asm volatile("mcr p15, 0, %0, c13, c0, 4" : : "r" (off) : "memory"); +} + +static inline unsigned long __my_cpu_offset(void) +{ + unsigned long off; + /* Read TPIDRPRW */ + asm("mrc p15, 0, %0, c13, c0, 4" : "=r" (off) : : "memory"); + return off; +} +#define __my_cpu_offset __my_cpu_offset() +#else +#define set_my_cpu_offset(x) do {} while(0) + +#endif /* CONFIG_SMP */ + +#include + +#endif /* _ASM_ARM_PERCPU_H_ */ diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index f739fb1d217..9a89bf4aefe 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -383,6 +383,12 @@ void cpu_init(void) BUG(); } + /* + * This only works on resume and secondary cores. For booting on the + * boot cpu, smp_prepare_boot_cpu is called after percpu area setup. + */ + set_my_cpu_offset(per_cpu_offset(cpu)); + cpu_proc_init(); /* diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 7eacd84cdc9..f3a2be5837a 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -314,9 +314,10 @@ asmlinkage void __cpuinit secondary_start_kernel(void) current->active_mm = mm; cpumask_set_cpu(cpu, mm_cpumask(mm)); + cpu_init(); + printk("CPU%u: Booted secondary processor\n", cpu); - cpu_init(); preempt_disable(); trace_hardirqs_off(); @@ -372,6 +373,7 @@ void __init smp_cpus_done(unsigned int max_cpus) void __init smp_prepare_boot_cpu(void) { + set_my_cpu_offset(per_cpu_offset(smp_processor_id())); } void __init smp_prepare_cpus(unsigned int max_cpus) -- cgit v1.2.3-70-g09d2