From 1fc84ae84b5153e32a4b6ace507f9663e10b0cb2 Mon Sep 17 00:00:00 2001 From: Cyril Chemparathy Date: Mon, 16 Jul 2012 17:20:17 -0400 Subject: ARM: LPAE: use 64-bit accessors for TTBR registers This patch adds TTBR accessor macros, and modifies cpu_get_pgd() and the LPAE version of cpu_set_reserved_ttbr0() to use these instead. In the process, we also fix these functions to correctly handle cases where the physical address lies beyond the 4G limit of 32-bit addressing. Signed-off-by: Cyril Chemparathy Signed-off-by: Vitaly Andrianov Acked-by: Nicolas Pitre Reviewed-by: Catalin Marinas Tested-by: Santosh Shilimkar Tested-by: Subash Patel Signed-off-by: Will Deacon --- arch/arm/mm/context.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) (limited to 'arch/arm/mm/context.c') diff --git a/arch/arm/mm/context.c b/arch/arm/mm/context.c index 2ac37372ef5..3675e31473e 100644 --- a/arch/arm/mm/context.c +++ b/arch/arm/mm/context.c @@ -20,6 +20,7 @@ #include #include #include +#include /* * On ARMv6, we have the following structure in the Context ID: @@ -55,17 +56,11 @@ static cpumask_t tlb_flush_pending; #ifdef CONFIG_ARM_LPAE static void cpu_set_reserved_ttbr0(void) { - unsigned long ttbl = __pa(swapper_pg_dir); - unsigned long ttbh = 0; - /* * Set TTBR0 to swapper_pg_dir which contains only global entries. The * ASID is set to 0. */ - asm volatile( - " mcrr p15, 0, %0, %1, c2 @ set TTBR0\n" - : - : "r" (ttbl), "r" (ttbh)); + cpu_set_ttbr(0, __pa(swapper_pg_dir)); isb(); } #else -- cgit v1.2.3-70-g09d2 From ae120d9edfe96628f03d87634acda0bfa7110632 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 21 Jun 2013 12:06:19 +0100 Subject: ARM: 7767/1: let the ASID allocator handle suspended animation When a CPU is running a process, the ASID for that process is held in a per-CPU variable (the "active ASIDs" array). When the ASID allocator handles a rollover, it copies the active ASIDs into a "reserved ASIDs" array to ensure that a process currently running on another CPU will continue to run unaffected. The active array is zero-ed to indicate that a rollover occurred. Because of this mechanism, a reserved ASID is only remembered for a single rollover. A subsequent rollover will completely refill the reserved ASIDs array. In a severely oversubscribed environment where a CPU can be prevented from running for extended periods of time (think virtual machines), the above has a horrible side effect: [P{a} denotes process P running with ASID a] CPU-0 CPU-1 A{x} [active = ] [suspended] runs B{y} [active = ] [rollover: active = <0 0> reserved = ] runs B{y} [active = <0 y> reserved = ] [rollover: active = <0 0> reserved = <0 y>] runs C{x} [active = <0 x>] [resumes] runs A{x} At that stage, both A and C have the same ASID, with deadly consequences. The fix is to preserve reserved ASIDs across rollovers if the CPU doesn't have an active ASID when the rollover occurs. Cc: # 3.9 Acked-by: Will Deacon Acked-by: Catalin Carinas Signed-off-by: Marc Zyngier Signed-off-by: Russell King --- arch/arm/mm/context.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'arch/arm/mm/context.c') diff --git a/arch/arm/mm/context.c b/arch/arm/mm/context.c index 2ac37372ef5..8e12fcbb2c6 100644 --- a/arch/arm/mm/context.c +++ b/arch/arm/mm/context.c @@ -128,6 +128,15 @@ static void flush_context(unsigned int cpu) asid = 0; } else { asid = atomic64_xchg(&per_cpu(active_asids, i), 0); + /* + * If this CPU has already been through a + * rollover, but hasn't run another task in + * the meantime, we must preserve its reserved + * ASID, as this is the only trace we have of + * the process it is still running. + */ + if (asid == 0) + asid = per_cpu(reserved_asids, i); __set_bit(ASID_TO_IDX(asid), asid_map); } per_cpu(reserved_asids, i) = asid; -- cgit v1.2.3-70-g09d2 From b8e4a4740fa2b17c0a447b3ab783b3dc10702e27 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 21 Jun 2013 12:06:55 +0100 Subject: ARM: 7768/1: prevent risks of out-of-bound access in ASID allocator On a CPU that never ran anything, both the active and reserved ASID fields are set to zero. In this case the ASID_TO_IDX() macro will return -1, which is not a very useful value to index a bitmap. Instead of trying to offset the ASID so that ASID #1 is actually bit 0 in the asid_map bitmap, just always ignore bit 0 and start the search from bit 1. This makes the code a bit more readable, and without risk of OoB access. Cc: # 3.9 Acked-by: Will Deacon Acked-by: Catalin Marinas Reported-by: Catalin Marinas Signed-off-by: Marc Zyngier Signed-off-by: Russell King --- arch/arm/mm/context.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) (limited to 'arch/arm/mm/context.c') diff --git a/arch/arm/mm/context.c b/arch/arm/mm/context.c index 8e12fcbb2c6..83e09058f96 100644 --- a/arch/arm/mm/context.c +++ b/arch/arm/mm/context.c @@ -39,10 +39,7 @@ * non 64-bit operations. */ #define ASID_FIRST_VERSION (1ULL << ASID_BITS) -#define NUM_USER_ASIDS (ASID_FIRST_VERSION - 1) - -#define ASID_TO_IDX(asid) ((asid & ~ASID_MASK) - 1) -#define IDX_TO_ASID(idx) ((idx + 1) & ~ASID_MASK) +#define NUM_USER_ASIDS ASID_FIRST_VERSION static DEFINE_RAW_SPINLOCK(cpu_asid_lock); static atomic64_t asid_generation = ATOMIC64_INIT(ASID_FIRST_VERSION); @@ -137,7 +134,7 @@ static void flush_context(unsigned int cpu) */ if (asid == 0) asid = per_cpu(reserved_asids, i); - __set_bit(ASID_TO_IDX(asid), asid_map); + __set_bit(asid & ~ASID_MASK, asid_map); } per_cpu(reserved_asids, i) = asid; } @@ -176,17 +173,19 @@ static u64 new_context(struct mm_struct *mm, unsigned int cpu) /* * Allocate a free ASID. If we can't find one, take a * note of the currently active ASIDs and mark the TLBs - * as requiring flushes. + * as requiring flushes. We always count from ASID #1, + * as we reserve ASID #0 to switch via TTBR0 and indicate + * rollover events. */ - asid = find_first_zero_bit(asid_map, NUM_USER_ASIDS); + asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, 1); if (asid == NUM_USER_ASIDS) { generation = atomic64_add_return(ASID_FIRST_VERSION, &asid_generation); flush_context(cpu); - asid = find_first_zero_bit(asid_map, NUM_USER_ASIDS); + asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, 1); } __set_bit(asid, asid_map); - asid = generation | IDX_TO_ASID(asid); + asid |= generation; cpumask_clear(mm_cpumask(mm)); } -- cgit v1.2.3-70-g09d2 From 0d0752bca1f9a91fb646647aa4abbb21156f316c Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 21 Jun 2013 12:07:27 +0100 Subject: ARM: 7769/1: Cortex-A15: fix erratum 798181 implementation Looking into the active_asids array is not enough, as we also need to look into the reserved_asids array (they both represent processes that are currently running). Also, not holding the ASID allocator lock is racy, as another CPU could schedule that process and trigger a rollover, making the erratum workaround miss an IPI. Exposing this outside of context.c is a little ugly on the side, so let's define a new entry point that the erratum workaround can call to obtain the cpumask. Cc: # 3.9 Acked-by: Will Deacon Acked-by: Catalin Marinas Signed-off-by: Marc Zyngier Signed-off-by: Russell King --- arch/arm/include/asm/mmu_context.h | 10 +++++++++- arch/arm/kernel/smp_tlb.c | 18 ++---------------- arch/arm/mm/context.c | 29 ++++++++++++++++++++++++++++- 3 files changed, 39 insertions(+), 18 deletions(-) (limited to 'arch/arm/mm/context.c') diff --git a/arch/arm/include/asm/mmu_context.h b/arch/arm/include/asm/mmu_context.h index 2a45c33ebdc..b5792b7fd8d 100644 --- a/arch/arm/include/asm/mmu_context.h +++ b/arch/arm/include/asm/mmu_context.h @@ -28,7 +28,15 @@ void __check_vmalloc_seq(struct mm_struct *mm); void check_and_switch_context(struct mm_struct *mm, struct task_struct *tsk); #define init_new_context(tsk,mm) ({ atomic64_set(&mm->context.id, 0); 0; }) -DECLARE_PER_CPU(atomic64_t, active_asids); +#ifdef CONFIG_ARM_ERRATA_798181 +void a15_erratum_get_cpumask(int this_cpu, struct mm_struct *mm, + cpumask_t *mask); +#else /* !CONFIG_ARM_ERRATA_798181 */ +static inline void a15_erratum_get_cpumask(int this_cpu, struct mm_struct *mm, + cpumask_t *mask) +{ +} +#endif /* CONFIG_ARM_ERRATA_798181 */ #else /* !CONFIG_CPU_HAS_ASID */ diff --git a/arch/arm/kernel/smp_tlb.c b/arch/arm/kernel/smp_tlb.c index 9a52a07aa40..a98b62dca2f 100644 --- a/arch/arm/kernel/smp_tlb.c +++ b/arch/arm/kernel/smp_tlb.c @@ -103,7 +103,7 @@ static void broadcast_tlb_a15_erratum(void) static void broadcast_tlb_mm_a15_erratum(struct mm_struct *mm) { - int cpu, this_cpu; + int this_cpu; cpumask_t mask = { CPU_BITS_NONE }; if (!erratum_a15_798181()) @@ -111,21 +111,7 @@ static void broadcast_tlb_mm_a15_erratum(struct mm_struct *mm) dummy_flush_tlb_a15_erratum(); this_cpu = get_cpu(); - for_each_online_cpu(cpu) { - if (cpu == this_cpu) - continue; - /* - * We only need to send an IPI if the other CPUs are running - * the same ASID as the one being invalidated. There is no - * need for locking around the active_asids check since the - * switch_mm() function has at least one dmb() (as required by - * this workaround) in case a context switch happens on - * another CPU after the condition below. - */ - if (atomic64_read(&mm->context.id) == - atomic64_read(&per_cpu(active_asids, cpu))) - cpumask_set_cpu(cpu, &mask); - } + a15_erratum_get_cpumask(this_cpu, mm, &mask); smp_call_function_many(&mask, ipi_flush_tlb_a15_erratum, NULL, 1); put_cpu(); } diff --git a/arch/arm/mm/context.c b/arch/arm/mm/context.c index 83e09058f96..eeab06ebd06 100644 --- a/arch/arm/mm/context.c +++ b/arch/arm/mm/context.c @@ -45,10 +45,37 @@ static DEFINE_RAW_SPINLOCK(cpu_asid_lock); static atomic64_t asid_generation = ATOMIC64_INIT(ASID_FIRST_VERSION); static DECLARE_BITMAP(asid_map, NUM_USER_ASIDS); -DEFINE_PER_CPU(atomic64_t, active_asids); +static DEFINE_PER_CPU(atomic64_t, active_asids); static DEFINE_PER_CPU(u64, reserved_asids); static cpumask_t tlb_flush_pending; +#ifdef CONFIG_ARM_ERRATA_798181 +void a15_erratum_get_cpumask(int this_cpu, struct mm_struct *mm, + cpumask_t *mask) +{ + int cpu; + unsigned long flags; + u64 context_id, asid; + + raw_spin_lock_irqsave(&cpu_asid_lock, flags); + context_id = mm->context.id.counter; + for_each_online_cpu(cpu) { + if (cpu == this_cpu) + continue; + /* + * We only need to send an IPI if the other CPUs are + * running the same ASID as the one being invalidated. + */ + asid = per_cpu(active_asids, cpu).counter; + if (asid == 0) + asid = per_cpu(reserved_asids, cpu); + if (context_id == asid) + cpumask_set_cpu(cpu, mask); + } + raw_spin_unlock_irqrestore(&cpu_asid_lock, flags); +} +#endif + #ifdef CONFIG_ARM_LPAE static void cpu_set_reserved_ttbr0(void) { -- cgit v1.2.3-70-g09d2 From 1f49856bb029779d8f1b63517a3a3b34ffe672c7 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Tue, 23 Jul 2013 15:13:06 +0100 Subject: ARM: 7789/1: Do not run dummy_flush_tlb_a15_erratum() on non-Cortex-A15 Commit 93dc688 (ARM: 7684/1: errata: Workaround for Cortex-A15 erratum 798181 (TLBI/DSB operations)) causes the following undefined instruction error on a mx53 (Cortex-A8): Internal error: Oops - undefined instruction: 0 [#1] SMP ARM CPU: 0 PID: 275 Comm: modprobe Not tainted 3.11.0-rc2-next-20130722-00009-g9b0f371 #881 task: df46cc00 ti: df48e000 task.ti: df48e000 PC is at check_and_switch_context+0x17c/0x4d0 LR is at check_and_switch_context+0xdc/0x4d0 This problem happens because check_and_switch_context() calls dummy_flush_tlb_a15_erratum() without checking if we are really running on a Cortex-A15 or not. To avoid this issue, only call dummy_flush_tlb_a15_erratum() inside check_and_switch_context() if erratum_a15_798181() returns true, which means that we are really running on a Cortex-A15. Signed-off-by: Fabio Estevam Acked-by: Catalin Marinas Reviewed-by: Roger Quadros Signed-off-by: Russell King --- arch/arm/include/asm/tlbflush.h | 16 ++++++++++++++++ arch/arm/kernel/smp_tlb.c | 17 ----------------- arch/arm/mm/context.c | 3 ++- 3 files changed, 18 insertions(+), 18 deletions(-) (limited to 'arch/arm/mm/context.c') diff --git a/arch/arm/include/asm/tlbflush.h b/arch/arm/include/asm/tlbflush.h index fdbb9e36974..f467e9b3f8d 100644 --- a/arch/arm/include/asm/tlbflush.h +++ b/arch/arm/include/asm/tlbflush.h @@ -443,7 +443,18 @@ static inline void local_flush_bp_all(void) isb(); } +#include #ifdef CONFIG_ARM_ERRATA_798181 +static inline int erratum_a15_798181(void) +{ + unsigned int midr = read_cpuid_id(); + + /* Cortex-A15 r0p0..r3p2 affected */ + if ((midr & 0xff0ffff0) != 0x410fc0f0 || midr > 0x413fc0f2) + return 0; + return 1; +} + static inline void dummy_flush_tlb_a15_erratum(void) { /* @@ -453,6 +464,11 @@ static inline void dummy_flush_tlb_a15_erratum(void) dsb(); } #else +static inline int erratum_a15_798181(void) +{ + return 0; +} + static inline void dummy_flush_tlb_a15_erratum(void) { } diff --git a/arch/arm/kernel/smp_tlb.c b/arch/arm/kernel/smp_tlb.c index a98b62dca2f..c2edfff573c 100644 --- a/arch/arm/kernel/smp_tlb.c +++ b/arch/arm/kernel/smp_tlb.c @@ -70,23 +70,6 @@ static inline void ipi_flush_bp_all(void *ignored) local_flush_bp_all(); } -#ifdef CONFIG_ARM_ERRATA_798181 -static int erratum_a15_798181(void) -{ - unsigned int midr = read_cpuid_id(); - - /* Cortex-A15 r0p0..r3p2 affected */ - if ((midr & 0xff0ffff0) != 0x410fc0f0 || midr > 0x413fc0f2) - return 0; - return 1; -} -#else -static int erratum_a15_798181(void) -{ - return 0; -} -#endif - static void ipi_flush_tlb_a15_erratum(void *arg) { dmb(); diff --git a/arch/arm/mm/context.c b/arch/arm/mm/context.c index b55b1015724..4a0544492f1 100644 --- a/arch/arm/mm/context.c +++ b/arch/arm/mm/context.c @@ -245,7 +245,8 @@ void check_and_switch_context(struct mm_struct *mm, struct task_struct *tsk) if (cpumask_test_and_clear_cpu(cpu, &tlb_flush_pending)) { local_flush_bp_all(); local_flush_tlb_all(); - dummy_flush_tlb_a15_erratum(); + if (erratum_a15_798181()) + dummy_flush_tlb_a15_erratum(); } atomic64_set(&per_cpu(active_asids, cpu), asid); -- cgit v1.2.3-70-g09d2