From 158e8bfe802f730f9ea7cde32eee8b43285bdd4a Mon Sep 17 00:00:00 2001 From: Alessandro Rubini Date: Sun, 24 Jun 2012 12:46:26 +0100 Subject: ARM: 7432/1: use the new linux/sizes.h Signed-off-by: Alessandro Rubini Acked-by: Giancarlo Asnaghi Acked-by: Linus Walleij Cc: Alan Cox Signed-off-by: Russell King --- arch/arm/include/asm/memory.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/arm/include') diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h index fcb575747e5..e965f1b560f 100644 --- a/arch/arm/include/asm/memory.h +++ b/arch/arm/include/asm/memory.h @@ -16,7 +16,7 @@ #include #include #include -#include +#include #ifdef CONFIG_NEED_MACH_MEMORY_H #include -- cgit v1.2.3-70-g09d2 From a5203c4ce6750730b1d95a8bc1e8214765450f7e Mon Sep 17 00:00:00 2001 From: Paul Bolle Date: Sun, 8 Jul 2012 22:51:44 +0100 Subject: ARM: 7460/1: remove asm/locks.h Commit 64ac24e738823161693bf791f87adc802cf529ff ("Generic semaphore implementation") removed the last include of this header. Apparently it was just an oversight to keep this header. It can safely be removed now. Acked-by: Will Deacon Signed-off-by: Paul Bolle Signed-off-by: Russell King --- arch/arm/include/asm/locks.h | 274 ------------------------------------------- 1 file changed, 274 deletions(-) delete mode 100644 arch/arm/include/asm/locks.h (limited to 'arch/arm/include') diff --git a/arch/arm/include/asm/locks.h b/arch/arm/include/asm/locks.h deleted file mode 100644 index ef4c897772d..00000000000 --- a/arch/arm/include/asm/locks.h +++ /dev/null @@ -1,274 +0,0 @@ -/* - * arch/arm/include/asm/locks.h - * - * Copyright (C) 2000 Russell King - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * Interrupt safe locking assembler. - */ -#ifndef __ASM_PROC_LOCKS_H -#define __ASM_PROC_LOCKS_H - -#if __LINUX_ARM_ARCH__ >= 6 - -#define __down_op(ptr,fail) \ - ({ \ - __asm__ __volatile__( \ - "@ down_op\n" \ -"1: ldrex lr, [%0]\n" \ -" sub lr, lr, %1\n" \ -" strex ip, lr, [%0]\n" \ -" teq ip, #0\n" \ -" bne 1b\n" \ -" teq lr, #0\n" \ -" movmi ip, %0\n" \ -" blmi " #fail \ - : \ - : "r" (ptr), "I" (1) \ - : "ip", "lr", "cc"); \ - smp_mb(); \ - }) - -#define __down_op_ret(ptr,fail) \ - ({ \ - unsigned int ret; \ - __asm__ __volatile__( \ - "@ down_op_ret\n" \ -"1: ldrex lr, [%1]\n" \ -" sub lr, lr, %2\n" \ -" strex ip, lr, [%1]\n" \ -" teq ip, #0\n" \ -" bne 1b\n" \ -" teq lr, #0\n" \ -" movmi ip, %1\n" \ -" movpl ip, #0\n" \ -" blmi " #fail "\n" \ -" mov %0, ip" \ - : "=&r" (ret) \ - : "r" (ptr), "I" (1) \ - : "ip", "lr", "cc"); \ - smp_mb(); \ - ret; \ - }) - -#define __up_op(ptr,wake) \ - ({ \ - smp_mb(); \ - __asm__ __volatile__( \ - "@ up_op\n" \ -"1: ldrex lr, [%0]\n" \ -" add lr, lr, %1\n" \ -" strex ip, lr, [%0]\n" \ -" teq ip, #0\n" \ -" bne 1b\n" \ -" cmp lr, #0\n" \ -" movle ip, %0\n" \ -" blle " #wake \ - : \ - : "r" (ptr), "I" (1) \ - : "ip", "lr", "cc"); \ - }) - -/* - * The value 0x01000000 supports up to 128 processors and - * lots of processes. BIAS must be chosen such that sub'ing - * BIAS once per CPU will result in the long remaining - * negative. - */ -#define RW_LOCK_BIAS 0x01000000 -#define RW_LOCK_BIAS_STR "0x01000000" - -#define __down_op_write(ptr,fail) \ - ({ \ - __asm__ __volatile__( \ - "@ down_op_write\n" \ -"1: ldrex lr, [%0]\n" \ -" sub lr, lr, %1\n" \ -" strex ip, lr, [%0]\n" \ -" teq ip, #0\n" \ -" bne 1b\n" \ -" teq lr, #0\n" \ -" movne ip, %0\n" \ -" blne " #fail \ - : \ - : "r" (ptr), "I" (RW_LOCK_BIAS) \ - : "ip", "lr", "cc"); \ - smp_mb(); \ - }) - -#define __up_op_write(ptr,wake) \ - ({ \ - smp_mb(); \ - __asm__ __volatile__( \ - "@ up_op_write\n" \ -"1: ldrex lr, [%0]\n" \ -" adds lr, lr, %1\n" \ -" strex ip, lr, [%0]\n" \ -" teq ip, #0\n" \ -" bne 1b\n" \ -" movcs ip, %0\n" \ -" blcs " #wake \ - : \ - : "r" (ptr), "I" (RW_LOCK_BIAS) \ - : "ip", "lr", "cc"); \ - }) - -#define __down_op_read(ptr,fail) \ - __down_op(ptr, fail) - -#define __up_op_read(ptr,wake) \ - ({ \ - smp_mb(); \ - __asm__ __volatile__( \ - "@ up_op_read\n" \ -"1: ldrex lr, [%0]\n" \ -" add lr, lr, %1\n" \ -" strex ip, lr, [%0]\n" \ -" teq ip, #0\n" \ -" bne 1b\n" \ -" teq lr, #0\n" \ -" moveq ip, %0\n" \ -" bleq " #wake \ - : \ - : "r" (ptr), "I" (1) \ - : "ip", "lr", "cc"); \ - }) - -#else - -#define __down_op(ptr,fail) \ - ({ \ - __asm__ __volatile__( \ - "@ down_op\n" \ -" mrs ip, cpsr\n" \ -" orr lr, ip, #128\n" \ -" msr cpsr_c, lr\n" \ -" ldr lr, [%0]\n" \ -" subs lr, lr, %1\n" \ -" str lr, [%0]\n" \ -" msr cpsr_c, ip\n" \ -" movmi ip, %0\n" \ -" blmi " #fail \ - : \ - : "r" (ptr), "I" (1) \ - : "ip", "lr", "cc"); \ - smp_mb(); \ - }) - -#define __down_op_ret(ptr,fail) \ - ({ \ - unsigned int ret; \ - __asm__ __volatile__( \ - "@ down_op_ret\n" \ -" mrs ip, cpsr\n" \ -" orr lr, ip, #128\n" \ -" msr cpsr_c, lr\n" \ -" ldr lr, [%1]\n" \ -" subs lr, lr, %2\n" \ -" str lr, [%1]\n" \ -" msr cpsr_c, ip\n" \ -" movmi ip, %1\n" \ -" movpl ip, #0\n" \ -" blmi " #fail "\n" \ -" mov %0, ip" \ - : "=&r" (ret) \ - : "r" (ptr), "I" (1) \ - : "ip", "lr", "cc"); \ - smp_mb(); \ - ret; \ - }) - -#define __up_op(ptr,wake) \ - ({ \ - smp_mb(); \ - __asm__ __volatile__( \ - "@ up_op\n" \ -" mrs ip, cpsr\n" \ -" orr lr, ip, #128\n" \ -" msr cpsr_c, lr\n" \ -" ldr lr, [%0]\n" \ -" adds lr, lr, %1\n" \ -" str lr, [%0]\n" \ -" msr cpsr_c, ip\n" \ -" movle ip, %0\n" \ -" blle " #wake \ - : \ - : "r" (ptr), "I" (1) \ - : "ip", "lr", "cc"); \ - }) - -/* - * The value 0x01000000 supports up to 128 processors and - * lots of processes. BIAS must be chosen such that sub'ing - * BIAS once per CPU will result in the long remaining - * negative. - */ -#define RW_LOCK_BIAS 0x01000000 -#define RW_LOCK_BIAS_STR "0x01000000" - -#define __down_op_write(ptr,fail) \ - ({ \ - __asm__ __volatile__( \ - "@ down_op_write\n" \ -" mrs ip, cpsr\n" \ -" orr lr, ip, #128\n" \ -" msr cpsr_c, lr\n" \ -" ldr lr, [%0]\n" \ -" subs lr, lr, %1\n" \ -" str lr, [%0]\n" \ -" msr cpsr_c, ip\n" \ -" movne ip, %0\n" \ -" blne " #fail \ - : \ - : "r" (ptr), "I" (RW_LOCK_BIAS) \ - : "ip", "lr", "cc"); \ - smp_mb(); \ - }) - -#define __up_op_write(ptr,wake) \ - ({ \ - __asm__ __volatile__( \ - "@ up_op_write\n" \ -" mrs ip, cpsr\n" \ -" orr lr, ip, #128\n" \ -" msr cpsr_c, lr\n" \ -" ldr lr, [%0]\n" \ -" adds lr, lr, %1\n" \ -" str lr, [%0]\n" \ -" msr cpsr_c, ip\n" \ -" movcs ip, %0\n" \ -" blcs " #wake \ - : \ - : "r" (ptr), "I" (RW_LOCK_BIAS) \ - : "ip", "lr", "cc"); \ - smp_mb(); \ - }) - -#define __down_op_read(ptr,fail) \ - __down_op(ptr, fail) - -#define __up_op_read(ptr,wake) \ - ({ \ - smp_mb(); \ - __asm__ __volatile__( \ - "@ up_op_read\n" \ -" mrs ip, cpsr\n" \ -" orr lr, ip, #128\n" \ -" msr cpsr_c, lr\n" \ -" ldr lr, [%0]\n" \ -" adds lr, lr, %1\n" \ -" str lr, [%0]\n" \ -" msr cpsr_c, ip\n" \ -" moveq ip, %0\n" \ -" bleq " #wake \ - : \ - : "r" (ptr), "I" (1) \ - : "ip", "lr", "cc"); \ - }) - -#endif - -#endif -- cgit v1.2.3-70-g09d2 From 546c2896a42202dbc7d02f7c6ec9948ac1bf511b Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 6 Jul 2012 15:43:41 +0100 Subject: ARM: 7446/1: spinlock: use ticket algorithm for ARMv6+ locking implementation Ticket spinlocks ensure locking fairness by introducing a FIFO-like nature to the granting of lock acquisitions and also reducing the thundering herd effect when spinning on a lock by allowing the cacheline to remain in a shared state amongst the waiting CPUs. This is especially important on systems where memory-access times are not necessarily uniform when accessing the lock structure (for example, on a multi-cluster platform where the lock is allocated into L1 when a CPU releases it). This patch implements the ticket spinlock algorithm for ARM, replacing the simpler implementation for ARMv6+ processors. Reviewed-by: Nicolas Pitre Signed-off-by: Will Deacon Signed-off-by: Russell King --- arch/arm/include/asm/spinlock.h | 72 +++++++++++++++++++++++------------ arch/arm/include/asm/spinlock_types.h | 17 ++++++++- 2 files changed, 63 insertions(+), 26 deletions(-) (limited to 'arch/arm/include') diff --git a/arch/arm/include/asm/spinlock.h b/arch/arm/include/asm/spinlock.h index 65fa3c88095..0da2effd4b3 100644 --- a/arch/arm/include/asm/spinlock.h +++ b/arch/arm/include/asm/spinlock.h @@ -59,18 +59,13 @@ static inline void dsb_sev(void) } /* - * ARMv6 Spin-locking. + * ARMv6 ticket-based spin-locking. * - * We exclusively read the old value. If it is zero, we may have - * won the lock, so we try exclusively storing it. A memory barrier - * is required after we get a lock, and before we release it, because - * V6 CPUs are assumed to have weakly ordered memory. - * - * Unlocked value: 0 - * Locked value: 1 + * A memory barrier is required after we get a lock, and before we + * release it, because V6 CPUs are assumed to have weakly ordered + * memory. */ -#define arch_spin_is_locked(x) ((x)->lock != 0) #define arch_spin_unlock_wait(lock) \ do { while (arch_spin_is_locked(lock)) cpu_relax(); } while (0) @@ -79,31 +74,39 @@ static inline void dsb_sev(void) static inline void arch_spin_lock(arch_spinlock_t *lock) { unsigned long tmp; + u32 newval; + arch_spinlock_t lockval; __asm__ __volatile__( -"1: ldrex %0, [%1]\n" -" teq %0, #0\n" - WFE("ne") -" strexeq %0, %2, [%1]\n" -" teqeq %0, #0\n" +"1: ldrex %0, [%3]\n" +" add %1, %0, %4\n" +" strex %2, %1, [%3]\n" +" teq %2, #0\n" " bne 1b" - : "=&r" (tmp) - : "r" (&lock->lock), "r" (1) + : "=&r" (lockval), "=&r" (newval), "=&r" (tmp) + : "r" (&lock->slock), "I" (1 << TICKET_SHIFT) : "cc"); + while (lockval.tickets.next != lockval.tickets.owner) { + wfe(); + lockval.tickets.owner = ACCESS_ONCE(lock->tickets.owner); + } + smp_mb(); } static inline int arch_spin_trylock(arch_spinlock_t *lock) { unsigned long tmp; + u32 slock; __asm__ __volatile__( -" ldrex %0, [%1]\n" -" teq %0, #0\n" -" strexeq %0, %2, [%1]" - : "=&r" (tmp) - : "r" (&lock->lock), "r" (1) +" ldrex %0, [%2]\n" +" subs %1, %0, %0, ror #16\n" +" addeq %0, %0, %3\n" +" strexeq %1, %0, [%2]" + : "=&r" (slock), "=&r" (tmp) + : "r" (&lock->slock), "I" (1 << TICKET_SHIFT) : "cc"); if (tmp == 0) { @@ -116,17 +119,38 @@ static inline int arch_spin_trylock(arch_spinlock_t *lock) static inline void arch_spin_unlock(arch_spinlock_t *lock) { + unsigned long tmp; + u32 slock; + smp_mb(); __asm__ __volatile__( -" str %1, [%0]\n" - : - : "r" (&lock->lock), "r" (0) +" mov %1, #1\n" +"1: ldrex %0, [%2]\n" +" uadd16 %0, %0, %1\n" +" strex %1, %0, [%2]\n" +" teq %1, #0\n" +" bne 1b" + : "=&r" (slock), "=&r" (tmp) + : "r" (&lock->slock) : "cc"); dsb_sev(); } +static inline int arch_spin_is_locked(arch_spinlock_t *lock) +{ + struct __raw_tickets tickets = ACCESS_ONCE(lock->tickets); + return tickets.owner != tickets.next; +} + +static inline int arch_spin_is_contended(arch_spinlock_t *lock) +{ + struct __raw_tickets tickets = ACCESS_ONCE(lock->tickets); + return (tickets.next - tickets.owner) > 1; +} +#define arch_spin_is_contended arch_spin_is_contended + /* * RWLOCKS * diff --git a/arch/arm/include/asm/spinlock_types.h b/arch/arm/include/asm/spinlock_types.h index d14d197ae04..b262d2f8b47 100644 --- a/arch/arm/include/asm/spinlock_types.h +++ b/arch/arm/include/asm/spinlock_types.h @@ -5,11 +5,24 @@ # error "please don't include this file directly" #endif +#define TICKET_SHIFT 16 + typedef struct { - volatile unsigned int lock; + union { + u32 slock; + struct __raw_tickets { +#ifdef __ARMEB__ + u16 next; + u16 owner; +#else + u16 owner; + u16 next; +#endif + } tickets; + }; } arch_spinlock_t; -#define __ARCH_SPIN_LOCK_UNLOCKED { 0 } +#define __ARCH_SPIN_LOCK_UNLOCKED { { 0 } } typedef struct { volatile unsigned int lock; -- cgit v1.2.3-70-g09d2 From 881ccccb6bd3f0e1fff8b9addbe0de90e0b16166 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 6 Jul 2012 15:44:13 +0100 Subject: ARM: 7447/1: rwlocks: remove unused branch labels from trylock routines The ARM arch_{read,write}_trylock implementations include unused backwards branch labels, since we don't retry the locking operation if the exclusive store fails. This patch removes the labels. Acked-by: Nicolas Pitre Signed-off-by: Will Deacon Signed-off-by: Russell King --- arch/arm/include/asm/spinlock.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/arm/include') diff --git a/arch/arm/include/asm/spinlock.h b/arch/arm/include/asm/spinlock.h index 0da2effd4b3..b4ca707d0a6 100644 --- a/arch/arm/include/asm/spinlock.h +++ b/arch/arm/include/asm/spinlock.h @@ -182,7 +182,7 @@ static inline int arch_write_trylock(arch_rwlock_t *rw) unsigned long tmp; __asm__ __volatile__( -"1: ldrex %0, [%1]\n" +" ldrex %0, [%1]\n" " teq %0, #0\n" " strexeq %0, %2, [%1]" : "=&r" (tmp) @@ -268,7 +268,7 @@ static inline int arch_read_trylock(arch_rwlock_t *rw) unsigned long tmp, tmp2 = 1; __asm__ __volatile__( -"1: ldrex %0, [%2]\n" +" ldrex %0, [%2]\n" " adds %0, %0, #1\n" " strexpl %1, %0, [%2]\n" : "=&r" (tmp), "+r" (tmp2) -- cgit v1.2.3-70-g09d2 From 4295b898f5a5c7e62ae68e7a4ecc4b414622ffe6 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 6 Jul 2012 15:45:00 +0100 Subject: ARM: 7448/1: perf: remove arm_perf_pmu_ids global enumeration In order to provide PMU name strings compatible with the OProfile user ABI, an enumeration of all PMUs is currently used by perf to identify each PMU uniquely. Unfortunately, this does not scale well in the presence of multiple PMUs and creates a single, global namespace across all PMUs in the system. This patch removes the enumeration and instead uses the name string for the PMU to map onto the OProfile variant. perf_pmu_name is implemented for CPU PMUs, which is all that OProfile cares about anyway. Signed-off-by: Will Deacon Signed-off-by: Russell King --- arch/arm/include/asm/perf_event.h | 17 +------------- arch/arm/include/asm/pmu.h | 3 +-- arch/arm/kernel/perf_event.c | 15 +++++-------- arch/arm/kernel/perf_event_v6.c | 2 -- arch/arm/kernel/perf_event_v7.c | 5 ----- arch/arm/kernel/perf_event_xscale.c | 2 -- arch/arm/oprofile/common.c | 45 +++++++++++++++++++++++-------------- 7 files changed, 36 insertions(+), 53 deletions(-) (limited to 'arch/arm/include') diff --git a/arch/arm/include/asm/perf_event.h b/arch/arm/include/asm/perf_event.h index 00cbe10a50e..e074948d814 100644 --- a/arch/arm/include/asm/perf_event.h +++ b/arch/arm/include/asm/perf_event.h @@ -12,21 +12,6 @@ #ifndef __ARM_PERF_EVENT_H__ #define __ARM_PERF_EVENT_H__ -/* ARM perf PMU IDs for use by internal perf clients. */ -enum arm_perf_pmu_ids { - ARM_PERF_PMU_ID_XSCALE1 = 0, - ARM_PERF_PMU_ID_XSCALE2, - ARM_PERF_PMU_ID_V6, - ARM_PERF_PMU_ID_V6MP, - ARM_PERF_PMU_ID_CA8, - ARM_PERF_PMU_ID_CA9, - ARM_PERF_PMU_ID_CA5, - ARM_PERF_PMU_ID_CA15, - ARM_PERF_PMU_ID_CA7, - ARM_NUM_PMU_IDS, -}; - -extern enum arm_perf_pmu_ids -armpmu_get_pmu_id(void); +/* Nothing to see here... */ #endif /* __ARM_PERF_EVENT_H__ */ diff --git a/arch/arm/include/asm/pmu.h b/arch/arm/include/asm/pmu.h index 90114faa9f3..4432305f4a2 100644 --- a/arch/arm/include/asm/pmu.h +++ b/arch/arm/include/asm/pmu.h @@ -103,10 +103,9 @@ struct pmu_hw_events { struct arm_pmu { struct pmu pmu; - enum arm_perf_pmu_ids id; enum arm_pmu_type type; cpumask_t active_irqs; - const char *name; + char *name; irqreturn_t (*handle_irq)(int irq_num, void *dev); void (*enable)(struct hw_perf_event *evt, int idx); void (*disable)(struct hw_perf_event *evt, int idx); diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index 186c8cb982c..df85eda3add 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -47,17 +47,14 @@ static DEFINE_PER_CPU(struct pmu_hw_events, cpu_hw_events); /* Set at runtime when we know what CPU type we are. */ static struct arm_pmu *cpu_pmu; -enum arm_perf_pmu_ids -armpmu_get_pmu_id(void) +const char *perf_pmu_name(void) { - int id = -ENODEV; - - if (cpu_pmu != NULL) - id = cpu_pmu->id; + if (!cpu_pmu) + return NULL; - return id; + return cpu_pmu->pmu.name; } -EXPORT_SYMBOL_GPL(armpmu_get_pmu_id); +EXPORT_SYMBOL_GPL(perf_pmu_name); int perf_num_counters(void) { @@ -760,7 +757,7 @@ init_hw_perf_events(void) cpu_pmu->name, cpu_pmu->num_events); cpu_pmu_init(cpu_pmu); register_cpu_notifier(&pmu_cpu_notifier); - armpmu_register(cpu_pmu, "cpu", PERF_TYPE_RAW); + armpmu_register(cpu_pmu, cpu_pmu->name, PERF_TYPE_RAW); } else { pr_info("no hardware support available\n"); } diff --git a/arch/arm/kernel/perf_event_v6.c b/arch/arm/kernel/perf_event_v6.c index ab627a740fa..c90fcb2b696 100644 --- a/arch/arm/kernel/perf_event_v6.c +++ b/arch/arm/kernel/perf_event_v6.c @@ -650,7 +650,6 @@ static int armv6_map_event(struct perf_event *event) } static struct arm_pmu armv6pmu = { - .id = ARM_PERF_PMU_ID_V6, .name = "v6", .handle_irq = armv6pmu_handle_irq, .enable = armv6pmu_enable_event, @@ -685,7 +684,6 @@ static int armv6mpcore_map_event(struct perf_event *event) } static struct arm_pmu armv6mpcore_pmu = { - .id = ARM_PERF_PMU_ID_V6MP, .name = "v6mpcore", .handle_irq = armv6pmu_handle_irq, .enable = armv6pmu_enable_event, diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c index d3c53606816..f04070bd218 100644 --- a/arch/arm/kernel/perf_event_v7.c +++ b/arch/arm/kernel/perf_event_v7.c @@ -1258,7 +1258,6 @@ static u32 __init armv7_read_num_pmnc_events(void) static struct arm_pmu *__init armv7_a8_pmu_init(void) { - armv7pmu.id = ARM_PERF_PMU_ID_CA8; armv7pmu.name = "ARMv7 Cortex-A8"; armv7pmu.map_event = armv7_a8_map_event; armv7pmu.num_events = armv7_read_num_pmnc_events(); @@ -1267,7 +1266,6 @@ static struct arm_pmu *__init armv7_a8_pmu_init(void) static struct arm_pmu *__init armv7_a9_pmu_init(void) { - armv7pmu.id = ARM_PERF_PMU_ID_CA9; armv7pmu.name = "ARMv7 Cortex-A9"; armv7pmu.map_event = armv7_a9_map_event; armv7pmu.num_events = armv7_read_num_pmnc_events(); @@ -1276,7 +1274,6 @@ static struct arm_pmu *__init armv7_a9_pmu_init(void) static struct arm_pmu *__init armv7_a5_pmu_init(void) { - armv7pmu.id = ARM_PERF_PMU_ID_CA5; armv7pmu.name = "ARMv7 Cortex-A5"; armv7pmu.map_event = armv7_a5_map_event; armv7pmu.num_events = armv7_read_num_pmnc_events(); @@ -1285,7 +1282,6 @@ static struct arm_pmu *__init armv7_a5_pmu_init(void) static struct arm_pmu *__init armv7_a15_pmu_init(void) { - armv7pmu.id = ARM_PERF_PMU_ID_CA15; armv7pmu.name = "ARMv7 Cortex-A15"; armv7pmu.map_event = armv7_a15_map_event; armv7pmu.num_events = armv7_read_num_pmnc_events(); @@ -1295,7 +1291,6 @@ static struct arm_pmu *__init armv7_a15_pmu_init(void) static struct arm_pmu *__init armv7_a7_pmu_init(void) { - armv7pmu.id = ARM_PERF_PMU_ID_CA7; armv7pmu.name = "ARMv7 Cortex-A7"; armv7pmu.map_event = armv7_a7_map_event; armv7pmu.num_events = armv7_read_num_pmnc_events(); diff --git a/arch/arm/kernel/perf_event_xscale.c b/arch/arm/kernel/perf_event_xscale.c index e34e7254e65..f759fe0bab6 100644 --- a/arch/arm/kernel/perf_event_xscale.c +++ b/arch/arm/kernel/perf_event_xscale.c @@ -435,7 +435,6 @@ static int xscale_map_event(struct perf_event *event) } static struct arm_pmu xscale1pmu = { - .id = ARM_PERF_PMU_ID_XSCALE1, .name = "xscale1", .handle_irq = xscale1pmu_handle_irq, .enable = xscale1pmu_enable_event, @@ -803,7 +802,6 @@ xscale2pmu_write_counter(int counter, u32 val) } static struct arm_pmu xscale2pmu = { - .id = ARM_PERF_PMU_ID_XSCALE2, .name = "xscale2", .handle_irq = xscale2pmu_handle_irq, .enable = xscale2pmu_enable_event, diff --git a/arch/arm/oprofile/common.c b/arch/arm/oprofile/common.c index 4e0a371630b..99c63d4b6af 100644 --- a/arch/arm/oprofile/common.c +++ b/arch/arm/oprofile/common.c @@ -23,26 +23,37 @@ #include #ifdef CONFIG_HW_PERF_EVENTS + +/* + * OProfile has a curious naming scheme for the ARM PMUs, but they are + * part of the user ABI so we need to map from the perf PMU name for + * supported PMUs. + */ +static struct op_perf_name { + char *perf_name; + char *op_name; +} op_perf_name_map[] = { + { "xscale1", "arm/xscale1" }, + { "xscale1", "arm/xscale2" }, + { "v6", "arm/armv6" }, + { "v6mpcore", "arm/mpcore" }, + { "ARMv7 Cortex-A8", "arm/armv7" }, + { "ARMv7 Cortex-A9", "arm/armv7-ca9" }, +}; + char *op_name_from_perf_id(void) { - enum arm_perf_pmu_ids id = armpmu_get_pmu_id(); - - switch (id) { - case ARM_PERF_PMU_ID_XSCALE1: - return "arm/xscale1"; - case ARM_PERF_PMU_ID_XSCALE2: - return "arm/xscale2"; - case ARM_PERF_PMU_ID_V6: - return "arm/armv6"; - case ARM_PERF_PMU_ID_V6MP: - return "arm/mpcore"; - case ARM_PERF_PMU_ID_CA8: - return "arm/armv7"; - case ARM_PERF_PMU_ID_CA9: - return "arm/armv7-ca9"; - default: - return NULL; + int i; + struct op_perf_name names; + const char *perf_name = perf_pmu_name(); + + for (i = 0; i < ARRAY_SIZE(op_perf_name_map); ++i) { + names = op_perf_name_map[i]; + if (!strcmp(names.perf_name, perf_name)) + return names.op_name; } + + return NULL; } #endif -- cgit v1.2.3-70-g09d2 From 8c56cc8be5b38e3684eba96dc9b3f7ca7e495755 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 6 Jul 2012 15:45:39 +0100 Subject: ARM: 7449/1: use generic strnlen_user and strncpy_from_user functions This patch implements the word-at-a-time interface for ARM using the same algorithm as x86. We use the fls macro from ARMv5 onwards, where we have a clz instruction available which saves us a mov instruction when targetting Thumb-2. For older CPUs, we use the magic 0x0ff0001 constant. Big-endian configurations make use of the implementation from asm-generic. With this implemented, we can replace our byte-at-a-time strnlen_user and strncpy_from_user functions with the optimised generic versions. Reviewed-by: Nicolas Pitre Signed-off-by: Will Deacon Signed-off-by: Russell King --- arch/arm/Kconfig | 2 ++ arch/arm/include/asm/uaccess.h | 27 ++++------------- arch/arm/include/asm/word-at-a-time.h | 55 +++++++++++++++++++++++++++++++++++ arch/arm/kernel/armksyms.c | 4 --- arch/arm/lib/Makefile | 1 - arch/arm/lib/strncpy_from_user.S | 43 --------------------------- arch/arm/lib/strnlen_user.S | 40 ------------------------- 7 files changed, 63 insertions(+), 109 deletions(-) create mode 100644 arch/arm/include/asm/word-at-a-time.h delete mode 100644 arch/arm/lib/strncpy_from_user.S delete mode 100644 arch/arm/lib/strnlen_user.S (limited to 'arch/arm/include') diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 8c9d264f210..574561a66d7 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -46,6 +46,8 @@ config ARM select GENERIC_SMP_IDLE_THREAD select KTIME_SCALAR select GENERIC_CLOCKEVENTS_BROADCAST if SMP + select GENERIC_STRNCPY_FROM_USER + select GENERIC_STRNLEN_USER help The ARM series is a line of low-power-consumption RISC chip designs licensed by ARM Ltd and targeted at embedded applications and diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h index 71f6536d17a..479a6352e0b 100644 --- a/arch/arm/include/asm/uaccess.h +++ b/arch/arm/include/asm/uaccess.h @@ -189,6 +189,9 @@ static inline void set_fs(mm_segment_t fs) #define access_ok(type,addr,size) (__range_ok(addr,size) == 0) +#define user_addr_max() \ + (segment_eq(get_fs(), USER_DS) ? TASK_SIZE : ~0UL) + /* * The "__xxx" versions of the user access functions do not verify the * address space - it must have been done previously with a separate @@ -398,9 +401,6 @@ extern unsigned long __must_check __clear_user_std(void __user *addr, unsigned l #define __clear_user(addr,n) (memset((void __force *)addr, 0, n), 0) #endif -extern unsigned long __must_check __strncpy_from_user(char *to, const char __user *from, unsigned long count); -extern unsigned long __must_check __strnlen_user(const char __user *s, long n); - static inline unsigned long __must_check copy_from_user(void *to, const void __user *from, unsigned long n) { if (access_ok(VERIFY_READ, from, n)) @@ -427,24 +427,9 @@ static inline unsigned long __must_check clear_user(void __user *to, unsigned lo return n; } -static inline long __must_check strncpy_from_user(char *dst, const char __user *src, long count) -{ - long res = -EFAULT; - if (access_ok(VERIFY_READ, src, 1)) - res = __strncpy_from_user(dst, src, count); - return res; -} - -#define strlen_user(s) strnlen_user(s, ~0UL >> 1) +extern long strncpy_from_user(char *dest, const char __user *src, long count); -static inline long __must_check strnlen_user(const char __user *s, long n) -{ - unsigned long res = 0; - - if (__addr_ok(s)) - res = __strnlen_user(s, n); - - return res; -} +extern __must_check long strlen_user(const char __user *str); +extern __must_check long strnlen_user(const char __user *str, long n); #endif /* _ASMARM_UACCESS_H */ diff --git a/arch/arm/include/asm/word-at-a-time.h b/arch/arm/include/asm/word-at-a-time.h new file mode 100644 index 00000000000..74b2d457857 --- /dev/null +++ b/arch/arm/include/asm/word-at-a-time.h @@ -0,0 +1,55 @@ +#ifndef __ASM_ARM_WORD_AT_A_TIME_H +#define __ASM_ARM_WORD_AT_A_TIME_H + +#ifndef __ARMEB__ + +/* + * Little-endian word-at-a-time zero byte handling. + * Heavily based on the x86 algorithm. + */ +#include + +struct word_at_a_time { + const unsigned long one_bits, high_bits; +}; + +#define WORD_AT_A_TIME_CONSTANTS { REPEAT_BYTE(0x01), REPEAT_BYTE(0x80) } + +static inline unsigned long has_zero(unsigned long a, unsigned long *bits, + const struct word_at_a_time *c) +{ + unsigned long mask = ((a - c->one_bits) & ~a) & c->high_bits; + *bits = mask; + return mask; +} + +#define prep_zero_mask(a, bits, c) (bits) + +static inline unsigned long create_zero_mask(unsigned long bits) +{ + bits = (bits - 1) & ~bits; + return bits >> 7; +} + +static inline unsigned long find_zero(unsigned long mask) +{ + unsigned long ret; + +#if __LINUX_ARM_ARCH__ >= 5 + /* We have clz available. */ + ret = fls(mask) >> 3; +#else + /* (000000 0000ff 00ffff ffffff) -> ( 1 1 2 3 ) */ + ret = (0x0ff0001 + mask) >> 23; + /* Fix the 1 for 00 case */ + ret &= mask; +#endif + + return ret; +} + +#else /* __ARMEB__ */ +#include +#endif + +#endif /* __ASM_ARM_WORD_AT_A_TIME_H */ diff --git a/arch/arm/kernel/armksyms.c b/arch/arm/kernel/armksyms.c index b57c75e0b01..c3dff6abc89 100644 --- a/arch/arm/kernel/armksyms.c +++ b/arch/arm/kernel/armksyms.c @@ -87,10 +87,6 @@ EXPORT_SYMBOL(memmove); EXPORT_SYMBOL(memchr); EXPORT_SYMBOL(__memzero); - /* user mem (segment) */ -EXPORT_SYMBOL(__strnlen_user); -EXPORT_SYMBOL(__strncpy_from_user); - #ifdef CONFIG_MMU EXPORT_SYMBOL(copy_page); diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile index 992769ae259..d5060dab6e5 100644 --- a/arch/arm/lib/Makefile +++ b/arch/arm/lib/Makefile @@ -8,7 +8,6 @@ lib-y := backtrace.o changebit.o csumipv6.o csumpartial.o \ csumpartialcopy.o csumpartialcopyuser.o clearbit.o \ delay.o findbit.o memchr.o memcpy.o \ memmove.o memset.o memzero.o setbit.o \ - strncpy_from_user.o strnlen_user.o \ strchr.o strrchr.o \ testchangebit.o testclearbit.o testsetbit.o \ ashldi3.o ashrdi3.o lshrdi3.o muldi3.o \ diff --git a/arch/arm/lib/strncpy_from_user.S b/arch/arm/lib/strncpy_from_user.S deleted file mode 100644 index f202d7bd164..00000000000 --- a/arch/arm/lib/strncpy_from_user.S +++ /dev/null @@ -1,43 +0,0 @@ -/* - * linux/arch/arm/lib/strncpy_from_user.S - * - * Copyright (C) 1995-2000 Russell King - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#include -#include -#include - - .text - .align 5 - -/* - * Copy a string from user space to kernel space. - * r0 = dst, r1 = src, r2 = byte length - * returns the number of characters copied (strlen of copied string), - * -EFAULT on exception, or "len" if we fill the whole buffer - */ -ENTRY(__strncpy_from_user) - mov ip, r1 -1: subs r2, r2, #1 - ldrusr r3, r1, 1, pl - bmi 2f - strb r3, [r0], #1 - teq r3, #0 - bne 1b - sub r1, r1, #1 @ take NUL character out of count -2: sub r0, r1, ip - mov pc, lr -ENDPROC(__strncpy_from_user) - - .pushsection .fixup,"ax" - .align 0 -9001: mov r3, #0 - strb r3, [r0, #0] @ null terminate - mov r0, #-EFAULT - mov pc, lr - .popsection - diff --git a/arch/arm/lib/strnlen_user.S b/arch/arm/lib/strnlen_user.S deleted file mode 100644 index 0ecbb459c4f..00000000000 --- a/arch/arm/lib/strnlen_user.S +++ /dev/null @@ -1,40 +0,0 @@ -/* - * linux/arch/arm/lib/strnlen_user.S - * - * Copyright (C) 1995-2000 Russell King - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#include -#include -#include - - .text - .align 5 - -/* Prototype: unsigned long __strnlen_user(const char *str, long n) - * Purpose : get length of a string in user memory - * Params : str - address of string in user memory - * Returns : length of string *including terminator* - * or zero on exception, or n + 1 if too long - */ -ENTRY(__strnlen_user) - mov r2, r0 -1: - ldrusr r3, r0, 1 - teq r3, #0 - beq 2f - subs r1, r1, #1 - bne 1b - add r0, r0, #1 -2: sub r0, r0, r2 - mov pc, lr -ENDPROC(__strnlen_user) - - .pushsection .fixup,"ax" - .align 0 -9001: mov r0, #0 - mov pc, lr - .popsection -- cgit v1.2.3-70-g09d2 From b9a50f74905ad9126c91b495ece8a5f45434c643 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 6 Jul 2012 15:46:08 +0100 Subject: ARM: 7450/1: dcache: select DCACHE_WORD_ACCESS for little-endian ARMv6+ CPUs DCACHE_WORD_ACCESS uses the word-at-a-time API for optimised string comparisons in the vfs layer. This patch implements support for load_unaligned_zeropad for ARM CPUs with native support for unaligned memory accesses (v6+) when running little-endian. Reviewed-by: Nicolas Pitre Signed-off-by: Will Deacon Signed-off-by: Russell King --- arch/arm/Kconfig | 1 + arch/arm/include/asm/word-at-a-time.h | 41 +++++++++++++++++++++++++++++++++++ 2 files changed, 42 insertions(+) (limited to 'arch/arm/include') diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 574561a66d7..acd12efe6f3 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -48,6 +48,7 @@ config ARM select GENERIC_CLOCKEVENTS_BROADCAST if SMP select GENERIC_STRNCPY_FROM_USER select GENERIC_STRNLEN_USER + select DCACHE_WORD_ACCESS if (CPU_V6 || CPU_V6K || CPU_V7) && !CPU_BIG_ENDIAN help The ARM series is a line of low-power-consumption RISC chip designs licensed by ARM Ltd and targeted at embedded applications and diff --git a/arch/arm/include/asm/word-at-a-time.h b/arch/arm/include/asm/word-at-a-time.h index 74b2d457857..4d52f92967a 100644 --- a/arch/arm/include/asm/word-at-a-time.h +++ b/arch/arm/include/asm/word-at-a-time.h @@ -48,6 +48,47 @@ static inline unsigned long find_zero(unsigned long mask) return ret; } +#ifdef CONFIG_DCACHE_WORD_ACCESS + +#define zero_bytemask(mask) (mask) + +/* + * Load an unaligned word from kernel space. + * + * In the (very unlikely) case of the word being a page-crosser + * and the next page not being mapped, take the exception and + * return zeroes in the non-existing part. + */ +static inline unsigned long load_unaligned_zeropad(const void *addr) +{ + unsigned long ret, offset; + + /* Load word from unaligned pointer addr */ + asm( + "1: ldr %0, [%2]\n" + "2:\n" + " .pushsection .fixup,\"ax\"\n" + " .align 2\n" + "3: and %1, %2, #0x3\n" + " bic %2, %2, #0x3\n" + " ldr %0, [%2]\n" + " lsl %1, %1, #0x3\n" + " lsr %0, %0, %1\n" + " b 2b\n" + " .popsection\n" + " .pushsection __ex_table,\"a\"\n" + " .align 3\n" + " .long 1b, 3b\n" + " .popsection" + : "=&r" (ret), "=&r" (offset) + : "r" (addr), "Qo" (*(unsigned long *)addr)); + + return ret; +} + + +#endif /* DCACHE_WORD_ACCESS */ + #else /* __ARMEB__ */ #include #endif -- cgit v1.2.3-70-g09d2 From 923df96b9f31b7d08d8438ff9677326d9537accf Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 6 Jul 2012 15:46:45 +0100 Subject: ARM: 7451/1: arch timer: implement read_current_timer and get_cycles This patch implements read_current_timer using the architected timers when they are selected via CONFIG_ARM_ARCH_TIMER. If they are detected not to be usable at runtime, we return -ENXIO to the caller. Furthermore, if read_current_timer is exported then we can implement get_cycles in terms of it for use as both an entropy source and for implementing __udelay and friends. Tested-by: Shinya Kuribayashi Reviewed-by: Stephen Boyd Signed-off-by: Will Deacon Signed-off-by: Russell King --- arch/arm/include/asm/arch_timer.h | 3 +++ arch/arm/include/asm/timex.h | 10 ++++++---- arch/arm/kernel/arch_timer.c | 8 ++++++++ 3 files changed, 17 insertions(+), 4 deletions(-) (limited to 'arch/arm/include') diff --git a/arch/arm/include/asm/arch_timer.h b/arch/arm/include/asm/arch_timer.h index ed2e95d46e2..62e75475e57 100644 --- a/arch/arm/include/asm/arch_timer.h +++ b/arch/arm/include/asm/arch_timer.h @@ -1,7 +1,10 @@ #ifndef __ASMARM_ARCH_TIMER_H #define __ASMARM_ARCH_TIMER_H +#include + #ifdef CONFIG_ARM_ARCH_TIMER +#define ARCH_HAS_READ_CURRENT_TIMER int arch_timer_of_register(void); int arch_timer_sched_clock_init(void); #else diff --git a/arch/arm/include/asm/timex.h b/arch/arm/include/asm/timex.h index 3be8de3adab..ce119442277 100644 --- a/arch/arm/include/asm/timex.h +++ b/arch/arm/include/asm/timex.h @@ -12,13 +12,15 @@ #ifndef _ASMARM_TIMEX_H #define _ASMARM_TIMEX_H +#include #include typedef unsigned long cycles_t; -static inline cycles_t get_cycles (void) -{ - return 0; -} +#ifdef ARCH_HAS_READ_CURRENT_TIMER +#define get_cycles() ({ cycles_t c; read_current_timer(&c) ? 0 : c; }) +#else +#define get_cycles() (0) +#endif #endif diff --git a/arch/arm/kernel/arch_timer.c b/arch/arm/kernel/arch_timer.c index dd58035621f..dbbeec4f06e 100644 --- a/arch/arm/kernel/arch_timer.c +++ b/arch/arm/kernel/arch_timer.c @@ -223,6 +223,14 @@ static cycle_t arch_counter_read(struct clocksource *cs) return arch_counter_get_cntpct(); } +int read_current_timer(unsigned long *timer_val) +{ + if (!arch_timer_rate) + return -ENXIO; + *timer_val = arch_counter_get_cntpct(); + return 0; +} + static struct clocksource clocksource_counter = { .name = "arch_sys_counter", .rating = 400, -- cgit v1.2.3-70-g09d2 From d0a533b18235d36206b9b422efadb7cee444dfdb Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 6 Jul 2012 15:47:17 +0100 Subject: ARM: 7452/1: delay: allow timer-based delay implementation to be selected This patch allows a timer-based delay implementation to be selected by switching the delay routines over to use get_cycles, which is implemented in terms of read_current_timer. This further allows us to skip the loop calibration and have a consistent delay function in the face of core frequency scaling. To avoid the pain of dealing with memory-mapped counters, this implementation uses the co-processor interface to the architected timers when they are available. The previous loop-based implementation is kept around for CPUs without the architected timers and we retain both the maximum delay (2ms) and the corresponding conversion factors for determining the number of loops required for a given interval. Since the indirection of the timer routines will only work when called from C, the sa1100 sleep routines are modified to branch to the loop-based delay functions directly. Tested-by: Shinya Kuribayashi Reviewed-by: Stephen Boyd Signed-off-by: Will Deacon Signed-off-by: Russell King --- arch/arm/include/asm/delay.h | 32 +++++++++++++++----- arch/arm/kernel/arch_timer.c | 3 ++ arch/arm/kernel/armksyms.c | 3 +- arch/arm/lib/Makefile | 2 +- arch/arm/lib/delay-loop.S | 67 +++++++++++++++++++++++++++++++++++++++++ arch/arm/lib/delay.S | 69 ------------------------------------------ arch/arm/lib/delay.c | 71 ++++++++++++++++++++++++++++++++++++++++++++ arch/arm/mach-sa1100/sleep.S | 8 ++--- 8 files changed, 172 insertions(+), 83 deletions(-) create mode 100644 arch/arm/lib/delay-loop.S delete mode 100644 arch/arm/lib/delay.S create mode 100644 arch/arm/lib/delay.c (limited to 'arch/arm/include') diff --git a/arch/arm/include/asm/delay.h b/arch/arm/include/asm/delay.h index b2deda18154..dc6145120de 100644 --- a/arch/arm/include/asm/delay.h +++ b/arch/arm/include/asm/delay.h @@ -6,9 +6,22 @@ #ifndef __ASM_ARM_DELAY_H #define __ASM_ARM_DELAY_H +#include #include /* HZ */ -extern void __delay(int loops); +#define MAX_UDELAY_MS 2 +#define UDELAY_MULT ((UL(2199023) * HZ) >> 11) +#define UDELAY_SHIFT 30 + +#ifndef __ASSEMBLY__ + +extern struct arm_delay_ops { + void (*delay)(unsigned long); + void (*const_udelay)(unsigned long); + void (*udelay)(unsigned long); +} arm_delay_ops; + +#define __delay(n) arm_delay_ops.delay(n) /* * This function intentionally does not exist; if you see references to @@ -23,22 +36,27 @@ extern void __bad_udelay(void); * division by multiplication: you don't have to worry about * loss of precision. * - * Use only for very small delays ( < 1 msec). Should probably use a + * Use only for very small delays ( < 2 msec). Should probably use a * lookup table, really, as the multiplications take much too long with * short delays. This is a "reasonable" implementation, though (and the * first constant multiplications gets optimized away if the delay is * a constant) */ -extern void __udelay(unsigned long usecs); -extern void __const_udelay(unsigned long); - -#define MAX_UDELAY_MS 2 +#define __udelay(n) arm_delay_ops.udelay(n) +#define __const_udelay(n) arm_delay_ops.const_udelay(n) #define udelay(n) \ (__builtin_constant_p(n) ? \ ((n) > (MAX_UDELAY_MS * 1000) ? __bad_udelay() : \ - __const_udelay((n) * ((2199023U*HZ)>>11))) : \ + __const_udelay((n) * UDELAY_MULT)) : \ __udelay(n)) +/* Loop-based definitions for assembly code. */ +extern void __loop_delay(unsigned long loops); +extern void __loop_udelay(unsigned long usecs); +extern void __loop_const_udelay(unsigned long); + +#endif /* __ASSEMBLY__ */ + #endif /* defined(_ARM_DELAY_H) */ diff --git a/arch/arm/kernel/arch_timer.c b/arch/arm/kernel/arch_timer.c index dbbeec4f06e..675cee09c01 100644 --- a/arch/arm/kernel/arch_timer.c +++ b/arch/arm/kernel/arch_timer.c @@ -32,6 +32,8 @@ static int arch_timer_ppi2; static struct clock_event_device __percpu **arch_timer_evt; +extern void init_current_timer_delay(unsigned long freq); + /* * Architected system timer support. */ @@ -304,6 +306,7 @@ static int __init arch_timer_register(void) if (err) goto out_free_irq; + init_current_timer_delay(arch_timer_rate); return 0; out_free_irq: diff --git a/arch/arm/kernel/armksyms.c b/arch/arm/kernel/armksyms.c index b57c75e0b01..71962284d28 100644 --- a/arch/arm/kernel/armksyms.c +++ b/arch/arm/kernel/armksyms.c @@ -49,8 +49,7 @@ extern void __aeabi_ulcmp(void); extern void fpundefinstr(void); /* platform dependent support */ -EXPORT_SYMBOL(__udelay); -EXPORT_SYMBOL(__const_udelay); +EXPORT_SYMBOL(arm_delay_ops); /* networking */ EXPORT_SYMBOL(csum_partial); diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile index 992769ae259..b621114644f 100644 --- a/arch/arm/lib/Makefile +++ b/arch/arm/lib/Makefile @@ -6,7 +6,7 @@ lib-y := backtrace.o changebit.o csumipv6.o csumpartial.o \ csumpartialcopy.o csumpartialcopyuser.o clearbit.o \ - delay.o findbit.o memchr.o memcpy.o \ + delay.o delay-loop.o findbit.o memchr.o memcpy.o \ memmove.o memset.o memzero.o setbit.o \ strncpy_from_user.o strnlen_user.o \ strchr.o strrchr.o \ diff --git a/arch/arm/lib/delay-loop.S b/arch/arm/lib/delay-loop.S new file mode 100644 index 00000000000..36b668d8e12 --- /dev/null +++ b/arch/arm/lib/delay-loop.S @@ -0,0 +1,67 @@ +/* + * linux/arch/arm/lib/delay.S + * + * Copyright (C) 1995, 1996 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include +#include + .text + +.LC0: .word loops_per_jiffy +.LC1: .word UDELAY_MULT + +/* + * r0 <= 2000 + * lpj <= 0x01ffffff (max. 3355 bogomips) + * HZ <= 1000 + */ + +ENTRY(__loop_udelay) + ldr r2, .LC1 + mul r0, r2, r0 +ENTRY(__loop_const_udelay) @ 0 <= r0 <= 0x7fffff06 + mov r1, #-1 + ldr r2, .LC0 + ldr r2, [r2] @ max = 0x01ffffff + add r0, r0, r1, lsr #32-14 + mov r0, r0, lsr #14 @ max = 0x0001ffff + add r2, r2, r1, lsr #32-10 + mov r2, r2, lsr #10 @ max = 0x00007fff + mul r0, r2, r0 @ max = 2^32-1 + add r0, r0, r1, lsr #32-6 + movs r0, r0, lsr #6 + moveq pc, lr + +/* + * loops = r0 * HZ * loops_per_jiffy / 1000000 + */ + +@ Delay routine +ENTRY(__loop_delay) + subs r0, r0, #1 +#if 0 + movls pc, lr + subs r0, r0, #1 + movls pc, lr + subs r0, r0, #1 + movls pc, lr + subs r0, r0, #1 + movls pc, lr + subs r0, r0, #1 + movls pc, lr + subs r0, r0, #1 + movls pc, lr + subs r0, r0, #1 + movls pc, lr + subs r0, r0, #1 +#endif + bhi __loop_delay + mov pc, lr +ENDPROC(__loop_udelay) +ENDPROC(__loop_const_udelay) +ENDPROC(__loop_delay) diff --git a/arch/arm/lib/delay.S b/arch/arm/lib/delay.S deleted file mode 100644 index 3c9a05c8d20..00000000000 --- a/arch/arm/lib/delay.S +++ /dev/null @@ -1,69 +0,0 @@ -/* - * linux/arch/arm/lib/delay.S - * - * Copyright (C) 1995, 1996 Russell King - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#include -#include -#include - .text - -.LC0: .word loops_per_jiffy -.LC1: .word (2199023*HZ)>>11 - -/* - * r0 <= 2000 - * lpj <= 0x01ffffff (max. 3355 bogomips) - * HZ <= 1000 - */ - -ENTRY(__udelay) - ldr r2, .LC1 - mul r0, r2, r0 -ENTRY(__const_udelay) @ 0 <= r0 <= 0x7fffff06 - mov r1, #-1 - ldr r2, .LC0 - ldr r2, [r2] @ max = 0x01ffffff - add r0, r0, r1, lsr #32-14 - mov r0, r0, lsr #14 @ max = 0x0001ffff - add r2, r2, r1, lsr #32-10 - mov r2, r2, lsr #10 @ max = 0x00007fff - mul r0, r2, r0 @ max = 2^32-1 - add r0, r0, r1, lsr #32-6 - movs r0, r0, lsr #6 - moveq pc, lr - -/* - * loops = r0 * HZ * loops_per_jiffy / 1000000 - * - * Oh, if only we had a cycle counter... - */ - -@ Delay routine -ENTRY(__delay) - subs r0, r0, #1 -#if 0 - movls pc, lr - subs r0, r0, #1 - movls pc, lr - subs r0, r0, #1 - movls pc, lr - subs r0, r0, #1 - movls pc, lr - subs r0, r0, #1 - movls pc, lr - subs r0, r0, #1 - movls pc, lr - subs r0, r0, #1 - movls pc, lr - subs r0, r0, #1 -#endif - bhi __delay - mov pc, lr -ENDPROC(__udelay) -ENDPROC(__const_udelay) -ENDPROC(__delay) diff --git a/arch/arm/lib/delay.c b/arch/arm/lib/delay.c new file mode 100644 index 00000000000..d6dacc69254 --- /dev/null +++ b/arch/arm/lib/delay.c @@ -0,0 +1,71 @@ +/* + * Delay loops based on the OpenRISC implementation. + * + * Copyright (C) 2012 ARM Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Author: Will Deacon + */ + +#include +#include +#include +#include +#include + +/* + * Default to the loop-based delay implementation. + */ +struct arm_delay_ops arm_delay_ops = { + .delay = __loop_delay, + .const_udelay = __loop_const_udelay, + .udelay = __loop_udelay, +}; + +#ifdef ARCH_HAS_READ_CURRENT_TIMER +static void __timer_delay(unsigned long cycles) +{ + cycles_t start = get_cycles(); + + while ((get_cycles() - start) < cycles) + cpu_relax(); +} + +static void __timer_const_udelay(unsigned long xloops) +{ + unsigned long long loops = xloops; + loops *= loops_per_jiffy; + __timer_delay(loops >> UDELAY_SHIFT); +} + +static void __timer_udelay(unsigned long usecs) +{ + __timer_const_udelay(usecs * UDELAY_MULT); +} + +void __init init_current_timer_delay(unsigned long freq) +{ + pr_info("Switching to timer-based delay loop\n"); + lpj_fine = freq / HZ; + arm_delay_ops.delay = __timer_delay; + arm_delay_ops.const_udelay = __timer_const_udelay; + arm_delay_ops.udelay = __timer_udelay; +} + +unsigned long __cpuinit calibrate_delay_is_known(void) +{ + return lpj_fine; +} +#endif diff --git a/arch/arm/mach-sa1100/sleep.S b/arch/arm/mach-sa1100/sleep.S index 30cc6721665..85863741ef8 100644 --- a/arch/arm/mach-sa1100/sleep.S +++ b/arch/arm/mach-sa1100/sleep.S @@ -38,9 +38,9 @@ ENTRY(sa1100_finish_suspend) orr r4, r4, #MDREFR_K1DB2 ldr r5, =PPCR - @ Pre-load __udelay into the I-cache + @ Pre-load __loop_udelay into the I-cache mov r0, #1 - bl __udelay + bl __loop_udelay mov r0, r0 @ The following must all exist in a single cache line to @@ -53,11 +53,11 @@ ENTRY(sa1100_finish_suspend) @ delay 90us and set CPU PLL to lowest speed @ fixes resume problem on high speed SA1110 mov r0, #90 - bl __udelay + bl __loop_udelay mov r1, #0 str r1, [r5] mov r0, #90 - bl __udelay + bl __loop_udelay /* * SA1110 SDRAM controller workaround. register values: -- cgit v1.2.3-70-g09d2