diff options
Diffstat (limited to 'arch')
33 files changed, 972 insertions, 746 deletions
diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index 8057f4f6980..548da3aa0a3 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -232,23 +232,30 @@ label##_hv: \ EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, label##_common, \ EXC_HV, KVMTEST, vec) -#define __SOFTEN_TEST(h) \ +/* This associate vector numbers with bits in paca->irq_happened */ +#define SOFTEN_VALUE_0x500 PACA_IRQ_EE +#define SOFTEN_VALUE_0x502 PACA_IRQ_EE +#define SOFTEN_VALUE_0x900 PACA_IRQ_DEC +#define SOFTEN_VALUE_0x982 PACA_IRQ_DEC + +#define __SOFTEN_TEST(h, vec) \ lbz r10,PACASOFTIRQEN(r13); \ cmpwi r10,0; \ + li r10,SOFTEN_VALUE_##vec; \ beq masked_##h##interrupt -#define _SOFTEN_TEST(h) __SOFTEN_TEST(h) +#define _SOFTEN_TEST(h, vec) __SOFTEN_TEST(h, vec) #define SOFTEN_TEST_PR(vec) \ KVMTEST_PR(vec); \ - _SOFTEN_TEST(EXC_STD) + _SOFTEN_TEST(EXC_STD, vec) #define SOFTEN_TEST_HV(vec) \ KVMTEST(vec); \ - _SOFTEN_TEST(EXC_HV) + _SOFTEN_TEST(EXC_HV, vec) #define SOFTEN_TEST_HV_201(vec) \ KVMTEST(vec); \ - _SOFTEN_TEST(EXC_STD) + _SOFTEN_TEST(EXC_STD, vec) #define __MASKABLE_EXCEPTION_PSERIES(vec, label, h, extra) \ HMT_MEDIUM; \ @@ -272,73 +279,55 @@ label##_hv: \ _MASKABLE_EXCEPTION_PSERIES(vec, label, \ EXC_HV, SOFTEN_TEST_HV) -#ifdef CONFIG_PPC_ISERIES -#define DISABLE_INTS \ - li r11,0; \ - stb r11,PACASOFTIRQEN(r13); \ -BEGIN_FW_FTR_SECTION; \ - stb r11,PACAHARDIRQEN(r13); \ -END_FW_FTR_SECTION_IFCLR(FW_FEATURE_ISERIES); \ - TRACE_DISABLE_INTS; \ -BEGIN_FW_FTR_SECTION; \ - mfmsr r10; \ - ori r10,r10,MSR_EE; \ - mtmsrd r10,1; \ -END_FW_FTR_SECTION_IFSET(FW_FEATURE_ISERIES) -#else -#define DISABLE_INTS \ - li r11,0; \ - stb r11,PACASOFTIRQEN(r13); \ - stb r11,PACAHARDIRQEN(r13); \ - TRACE_DISABLE_INTS -#endif /* CONFIG_PPC_ISERIES */ +/* + * Our exception common code can be passed various "additions" + * to specify the behaviour of interrupts, whether to kick the + * runlatch, etc... + */ + +/* Exception addition: Hard disable interrupts */ +#define DISABLE_INTS SOFT_DISABLE_INTS(r10,r11) +/* Exception addition: Keep interrupt state */ #define ENABLE_INTS \ + ld r11,PACAKMSR(r13); \ ld r12,_MSR(r1); \ - mfmsr r11; \ rlwimi r11,r12,0,MSR_EE; \ mtmsrd r11,1 -#define STD_EXCEPTION_COMMON(trap, label, hdlr) \ - .align 7; \ - .globl label##_common; \ -label##_common: \ - EXCEPTION_PROLOG_COMMON(trap, PACA_EXGEN); \ - DISABLE_INTS; \ - bl .save_nvgprs; \ - addi r3,r1,STACK_FRAME_OVERHEAD; \ - bl hdlr; \ - b .ret_from_except +#define ADD_NVGPRS \ + bl .save_nvgprs + +#define RUNLATCH_ON \ +BEGIN_FTR_SECTION \ + clrrdi r3,r1,THREAD_SHIFT; \ + ld r4,TI_LOCAL_FLAGS(r3); \ + andi. r0,r4,_TLF_RUNLATCH; \ + beql ppc64_runlatch_on_trampoline; \ +END_FTR_SECTION_IFSET(CPU_FTR_CTRL) + +#define EXCEPTION_COMMON(trap, label, hdlr, ret, additions) \ + .align 7; \ + .globl label##_common; \ +label##_common: \ + EXCEPTION_PROLOG_COMMON(trap, PACA_EXGEN); \ + additions; \ + addi r3,r1,STACK_FRAME_OVERHEAD; \ + bl hdlr; \ + b ret + +#define STD_EXCEPTION_COMMON(trap, label, hdlr) \ + EXCEPTION_COMMON(trap, label, hdlr, ret_from_except, \ + ADD_NVGPRS;DISABLE_INTS) /* * Like STD_EXCEPTION_COMMON, but for exceptions that can occur - * in the idle task and therefore need the special idle handling. + * in the idle task and therefore need the special idle handling + * (finish nap and runlatch) */ -#define STD_EXCEPTION_COMMON_IDLE(trap, label, hdlr) \ - .align 7; \ - .globl label##_common; \ -label##_common: \ - EXCEPTION_PROLOG_COMMON(trap, PACA_EXGEN); \ - FINISH_NAP; \ - DISABLE_INTS; \ - bl .save_nvgprs; \ - addi r3,r1,STACK_FRAME_OVERHEAD; \ - bl hdlr; \ - b .ret_from_except - -#define STD_EXCEPTION_COMMON_LITE(trap, label, hdlr) \ - .align 7; \ - .globl label##_common; \ -label##_common: \ - EXCEPTION_PROLOG_COMMON(trap, PACA_EXGEN); \ - FINISH_NAP; \ - DISABLE_INTS; \ -BEGIN_FTR_SECTION \ - bl .ppc64_runlatch_on; \ -END_FTR_SECTION_IFSET(CPU_FTR_CTRL) \ - addi r3,r1,STACK_FRAME_OVERHEAD; \ - bl hdlr; \ - b .ret_from_except_lite +#define STD_EXCEPTION_COMMON_ASYNC(trap, label, hdlr) \ + EXCEPTION_COMMON(trap, label, hdlr, ret_from_except_lite, \ + FINISH_NAP;RUNLATCH_ON;DISABLE_INTS) /* * When the idle code in power4_idle puts the CPU into NAP mode, diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index bb712c9488b..51010bfc792 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -11,6 +11,27 @@ #include <asm/ptrace.h> #include <asm/processor.h> +#ifdef CONFIG_PPC64 + +/* + * PACA flags in paca->irq_happened. + * + * This bits are set when interrupts occur while soft-disabled + * and allow a proper replay. Additionally, PACA_IRQ_HARD_DIS + * is set whenever we manually hard disable. + */ +#define PACA_IRQ_HARD_DIS 0x01 +#define PACA_IRQ_DBELL 0x02 +#define PACA_IRQ_EE 0x04 +#define PACA_IRQ_DEC 0x08 /* Or FIT */ +#define PACA_IRQ_EE_EDGE 0x10 /* BookE only */ + +#endif /* CONFIG_PPC64 */ + +#ifndef __ASSEMBLY__ + +extern void __replay_interrupt(unsigned int vector); + extern void timer_interrupt(struct pt_regs *); #ifdef CONFIG_PPC64 @@ -42,7 +63,6 @@ static inline unsigned long arch_local_irq_disable(void) } extern void arch_local_irq_restore(unsigned long); -extern void iseries_handle_interrupts(void); static inline void arch_local_irq_enable(void) { @@ -68,16 +88,33 @@ static inline bool arch_irqs_disabled(void) #define __hard_irq_enable() asm volatile("wrteei 1" : : : "memory"); #define __hard_irq_disable() asm volatile("wrteei 0" : : : "memory"); #else -#define __hard_irq_enable() __mtmsrd(mfmsr() | MSR_EE, 1) -#define __hard_irq_disable() __mtmsrd(mfmsr() & ~MSR_EE, 1) +#define __hard_irq_enable() __mtmsrd(local_paca->kernel_msr | MSR_EE, 1) +#define __hard_irq_disable() __mtmsrd(local_paca->kernel_msr, 1) #endif -#define hard_irq_disable() \ - do { \ - __hard_irq_disable(); \ - get_paca()->soft_enabled = 0; \ - get_paca()->hard_enabled = 0; \ - } while(0) +static inline void hard_irq_disable(void) +{ + __hard_irq_disable(); + get_paca()->soft_enabled = 0; + get_paca()->irq_happened |= PACA_IRQ_HARD_DIS; +} + +/* + * This is called by asynchronous interrupts to conditionally + * re-enable hard interrupts when soft-disabled after having + * cleared the source of the interrupt + */ +static inline void may_hard_irq_enable(void) +{ + get_paca()->irq_happened &= ~PACA_IRQ_HARD_DIS; + if (!(get_paca()->irq_happened & PACA_IRQ_EE)) + __hard_irq_enable(); +} + +static inline bool arch_irq_disabled_regs(struct pt_regs *regs) +{ + return !regs->softe; +} #else /* CONFIG_PPC64 */ @@ -139,6 +176,13 @@ static inline bool arch_irqs_disabled(void) #define hard_irq_disable() arch_local_irq_disable() +static inline bool arch_irq_disabled_regs(struct pt_regs *regs) +{ + return !(regs->msr & MSR_EE); +} + +static inline void may_hard_irq_enable(void) { } + #endif /* CONFIG_PPC64 */ #define ARCH_IRQ_INIT_FLAGS IRQ_NOREQUEST @@ -149,5 +193,6 @@ static inline bool arch_irqs_disabled(void) */ struct irq_chip; +#endif /* __ASSEMBLY__ */ #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_HW_IRQ_H */ diff --git a/arch/powerpc/include/asm/irqflags.h b/arch/powerpc/include/asm/irqflags.h index b0b06d85788..6f9b6e23dc5 100644 --- a/arch/powerpc/include/asm/irqflags.h +++ b/arch/powerpc/include/asm/irqflags.h @@ -39,24 +39,31 @@ #define TRACE_ENABLE_INTS TRACE_WITH_FRAME_BUFFER(.trace_hardirqs_on) #define TRACE_DISABLE_INTS TRACE_WITH_FRAME_BUFFER(.trace_hardirqs_off) -#define TRACE_AND_RESTORE_IRQ_PARTIAL(en,skip) \ - cmpdi en,0; \ - bne 95f; \ - stb en,PACASOFTIRQEN(r13); \ - TRACE_WITH_FRAME_BUFFER(.trace_hardirqs_off) \ - b skip; \ -95: TRACE_WITH_FRAME_BUFFER(.trace_hardirqs_on) \ - li en,1; -#define TRACE_AND_RESTORE_IRQ(en) \ - TRACE_AND_RESTORE_IRQ_PARTIAL(en,96f); \ - stb en,PACASOFTIRQEN(r13); \ -96: +/* + * This is used by assembly code to soft-disable interrupts + */ +#define SOFT_DISABLE_INTS(__rA, __rB) \ + lbz __rA,PACASOFTIRQEN(r13); \ + lbz __rB,PACAIRQHAPPENED(r13); \ + cmpwi cr0,__rA,0; \ + li __rA,0; \ + ori __rB,__rB,PACA_IRQ_HARD_DIS; \ + stb __rB,PACAIRQHAPPENED(r13); \ + beq 44f; \ + stb __rA,PACASOFTIRQEN(r13); \ + TRACE_DISABLE_INTS; \ +44: + #else #define TRACE_ENABLE_INTS #define TRACE_DISABLE_INTS -#define TRACE_AND_RESTORE_IRQ_PARTIAL(en,skip) -#define TRACE_AND_RESTORE_IRQ(en) \ - stb en,PACASOFTIRQEN(r13) + +#define SOFT_DISABLE_INTS(__rA, __rB) \ + lbz __rA,PACAIRQHAPPENED(r13); \ + li __rB,0; \ + ori __rA,__rA,PACA_IRQ_HARD_DIS; \ + stb __rB,PACASOFTIRQEN(r13); \ + stb __rA,PACAIRQHAPPENED(r13) #endif #endif diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index 269c05a36d9..daf813fea91 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h @@ -132,7 +132,7 @@ struct paca_struct { u64 saved_msr; /* MSR saved here by enter_rtas */ u16 trap_save; /* Used when bad stack is encountered */ u8 soft_enabled; /* irq soft-enable flag */ - u8 hard_enabled; /* set if irqs are enabled in MSR */ + u8 irq_happened; /* irq happened while soft-disabled */ u8 io_sync; /* writel() needs spin_unlock sync */ u8 irq_work_pending; /* IRQ_WORK interrupt while soft-disable */ u8 nap_state_lost; /* NV GPR values lost in power7_idle */ diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h index 368f72f7980..50f73aa2ba2 100644 --- a/arch/powerpc/include/asm/ppc_asm.h +++ b/arch/powerpc/include/asm/ppc_asm.h @@ -60,6 +60,8 @@ BEGIN_FW_FTR_SECTION; \ cmpd cr1,r11,r10; \ beq+ cr1,33f; \ bl .accumulate_stolen_time; \ + ld r12,_MSR(r1); \ + andi. r10,r12,MSR_PR; /* Restore cr0 (coming from user) */ \ 33: \ END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR) diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 7fdc2c0b7fa..b1a215eabef 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -1079,30 +1079,12 @@ #define proc_trap() asm volatile("trap") -#ifdef CONFIG_PPC64 - -extern void ppc64_runlatch_on(void); -extern void __ppc64_runlatch_off(void); - -#define ppc64_runlatch_off() \ - do { \ - if (cpu_has_feature(CPU_FTR_CTRL) && \ - test_thread_flag(TIF_RUNLATCH)) \ - __ppc64_runlatch_off(); \ - } while (0) +#define __get_SP() ({unsigned long sp; \ + asm volatile("mr %0,1": "=r" (sp)); sp;}) extern unsigned long scom970_read(unsigned int address); extern void scom970_write(unsigned int address, unsigned long value); -#else -#define ppc64_runlatch_on() -#define ppc64_runlatch_off() - -#endif /* CONFIG_PPC64 */ - -#define __get_SP() ({unsigned long sp; \ - asm volatile("mr %0,1": "=r" (sp)); sp;}) - struct pt_regs; extern void ppc_save_regs(struct pt_regs *regs); diff --git a/arch/powerpc/include/asm/system.h b/arch/powerpc/include/asm/system.h index c377457d1b8..a02883d5af4 100644 --- a/arch/powerpc/include/asm/system.h +++ b/arch/powerpc/include/asm/system.h @@ -550,5 +550,43 @@ extern void reloc_got2(unsigned long); extern struct dentry *powerpc_debugfs_root; +#ifdef CONFIG_PPC64 + +extern void __ppc64_runlatch_on(void); +extern void __ppc64_runlatch_off(void); + +/* + * We manually hard enable-disable, this is called + * in the idle loop and we don't want to mess up + * with soft-disable/enable & interrupt replay. + */ +#define ppc64_runlatch_off() \ + do { \ + if (cpu_has_feature(CPU_FTR_CTRL) && \ + test_thread_local_flags(_TLF_RUNLATCH)) { \ + unsigned long msr = mfmsr(); \ + __hard_irq_disable(); \ + __ppc64_runlatch_off(); \ + if (msr & MSR_EE) \ + __hard_irq_enable(); \ + } \ + } while (0) + +#define ppc64_runlatch_on() \ + do { \ + if (cpu_has_feature(CPU_FTR_CTRL) && \ + !test_thread_local_flags(_TLF_RUNLATCH)) { \ + unsigned long msr = mfmsr(); \ + __hard_irq_disable(); \ + __ppc64_runlatch_on(); \ + if (msr & MSR_EE) \ + __hard_irq_enable(); \ + } \ + } while (0) +#else +#define ppc64_runlatch_on() +#define ppc64_runlatch_off() +#endif /* CONFIG_PPC64 */ + #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_SYSTEM_H */ diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h index 96471494096..4a741c7efd0 100644 --- a/arch/powerpc/include/asm/thread_info.h +++ b/arch/powerpc/include/asm/thread_info.h @@ -110,7 +110,6 @@ static inline struct thread_info *current_thread_info(void) #define TIF_NOERROR 12 /* Force successful syscall return */ #define TIF_NOTIFY_RESUME 13 /* callback before returning to user */ #define TIF_SYSCALL_TRACEPOINT 15 /* syscall tracepoint instrumentation */ -#define TIF_RUNLATCH 16 /* Is the runlatch enabled? */ /* as above, but as bit values */ #define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE) @@ -141,11 +140,13 @@ static inline struct thread_info *current_thread_info(void) #define TLF_SLEEPING 1 /* suspend code enabled SLEEP mode */ #define TLF_RESTORE_SIGMASK 2 /* Restore signal mask in do_signal */ #define TLF_LAZY_MMU 3 /* tlb_batch is active */ +#define TLF_RUNLATCH 4 /* Is the runlatch enabled? */ #define _TLF_NAPPING (1 << TLF_NAPPING) #define _TLF_SLEEPING (1 << TLF_SLEEPING) #define _TLF_RESTORE_SIGMASK (1 << TLF_RESTORE_SIGMASK) #define _TLF_LAZY_MMU (1 << TLF_LAZY_MMU) +#define _TLF_RUNLATCH (1 << TLF_RUNLATCH) #ifndef __ASSEMBLY__ #define HAVE_SET_RESTORE_SIGMASK 1 @@ -156,6 +157,12 @@ static inline void set_restore_sigmask(void) set_bit(TIF_SIGPENDING, &ti->flags); } +static inline bool test_thread_local_flags(unsigned int flags) +{ + struct thread_info *ti = current_thread_info(); + return (ti->local_flags & flags) != 0; +} + #ifdef CONFIG_PPC64 #define is_32bit_task() (test_thread_flag(TIF_32BIT)) #else diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 04caee7d9bc..cdd0d264415 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -147,7 +147,7 @@ int main(void) DEFINE(PACAKBASE, offsetof(struct paca_struct, kernelbase)); DEFINE(PACAKMSR, offsetof(struct paca_struct, kernel_msr)); DEFINE(PACASOFTIRQEN, offsetof(struct paca_struct, soft_enabled)); - DEFINE(PACAHARDIRQEN, offsetof(struct paca_struct, hard_enabled)); + DEFINE(PACAIRQHAPPENED, offsetof(struct paca_struct, irq_happened)); DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id)); #ifdef CONFIG_PPC_MM_SLICES DEFINE(PACALOWSLICESPSIZE, offsetof(struct paca_struct, diff --git a/arch/powerpc/kernel/dbell.c b/arch/powerpc/kernel/dbell.c index 2cc451aaaca..5b25c8060fd 100644 --- a/arch/powerpc/kernel/dbell.c +++ b/arch/powerpc/kernel/dbell.c @@ -37,6 +37,8 @@ void doorbell_exception(struct pt_regs *regs) irq_enter(); + may_hard_irq_enable(); + smp_ipi_demux(); irq_exit(); diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 866462cbe2d..f8a7a1a1a9f 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -32,6 +32,7 @@ #include <asm/ptrace.h> #include <asm/irqflags.h> #include <asm/ftrace.h> +#include <asm/hw_irq.h> /* * System calls. @@ -115,39 +116,33 @@ BEGIN_FW_FTR_SECTION END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR) #endif /* CONFIG_VIRT_CPU_ACCOUNTING && CONFIG_PPC_SPLPAR */ -#ifdef CONFIG_TRACE_IRQFLAGS - bl .trace_hardirqs_on - REST_GPR(0,r1) - REST_4GPRS(3,r1) - REST_2GPRS(7,r1) - addi r9,r1,STACK_FRAME_OVERHEAD - ld r12,_MSR(r1) -#endif /* CONFIG_TRACE_IRQFLAGS */ - li r10,1 - stb r10,PACASOFTIRQEN(r13) - stb r10,PACAHARDIRQEN(r13) - std r10,SOFTE(r1) -#ifdef CONFIG_PPC_ISERIES -BEGIN_FW_FTR_SECTION - /* Hack for handling interrupts when soft-enabling on iSeries */ - cmpdi cr1,r0,0x5555 /* syscall 0x5555 */ - andi. r10,r12,MSR_PR /* from kernel */ - crand 4*cr0+eq,4*cr1+eq,4*cr0+eq - bne 2f - b hardware_interrupt_entry -2: -END_FW_FTR_SECTION_IFSET(FW_FEATURE_ISERIES) -#endif /* CONFIG_PPC_ISERIES */ + /* + * A syscall should always be called with interrupts enabled + * so we just unconditionally hard-enable here. When some kind + * of irq tracing is used, we additionally check that condition + * is correct + */ +#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_BUG) + lbz r10,PACASOFTIRQEN(r13) + xori r10,r10,1 +1: tdnei r10,0 + EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,BUGFLAG_WARNING +#endif - /* Hard enable interrupts */ #ifdef CONFIG_PPC_BOOK3E wrteei 1 #else - mfmsr r11 + ld r11,PACAKMSR(r13) ori r11,r11,MSR_EE mtmsrd r11,1 #endif /* CONFIG_PPC_BOOK3E */ + /* We do need to set SOFTE in the stack frame or the return + * from interrupt will be painful + */ + li r10,1 + std r10,SOFTE(r1) + #ifdef SHOW_SYSCALLS bl .do_show_syscall REST_GPR(0,r1) @@ -198,16 +193,14 @@ syscall_exit: andi. r10,r8,MSR_RI beq- unrecov_restore #endif - - /* Disable interrupts so current_thread_info()->flags can't change, + /* + * Disable interrupts so current_thread_info()->flags can't change, * and so that we don't get interrupted after loading SRR0/1. */ #ifdef CONFIG_PPC_BOOK3E wrteei 0 #else - mfmsr r10 - rldicl r10,r10,48,1 - rotldi r10,r10,16 + ld r10,PACAKMSR(r13) mtmsrd r10,1 #endif /* CONFIG_PPC_BOOK3E */ @@ -319,7 +312,7 @@ syscall_exit_work: #ifdef CONFIG_PPC_BOOK3E wrteei 1 #else - mfmsr r10 + ld r10,PACAKMSR(r13) ori r10,r10,MSR_EE mtmsrd r10,1 #endif /* CONFIG_PPC_BOOK3E */ @@ -565,10 +558,8 @@ _GLOBAL(ret_from_except_lite) #ifdef CONFIG_PPC_BOOK3E wrteei 0 #else - mfmsr r10 /* Get current interrupt state */ - rldicl r9,r10,48,1 /* clear MSR_EE */ - rotldi r9,r9,16 - mtmsrd r9,1 /* Update machine state */ + ld r10,PACAKMSR(r13) /* Get kernel MSR without EE */ + mtmsrd r10,1 /* Update machine state */ #endif /* CONFIG_PPC_BOOK3E */ #ifdef CONFIG_PREEMPT @@ -591,25 +582,74 @@ _GLOBAL(ret_from_except_lite) ld r4,TI_FLAGS(r9) andi. r0,r4,_TIF_USER_WORK_MASK bne do_work -#endif +#endif /* !CONFIG_PREEMPT */ + .globl fast_exc_return_irq +fast_exc_return_irq: restore: -BEGIN_FW_FTR_SECTION + /* + * This is the main kernel exit path, we first check if we + * have to change our interrupt state. + */ ld r5,SOFTE(r1) -FW_FTR_SECTION_ELSE - b .Liseries_check_pending_irqs -ALT_FW_FTR_SECTION_END_IFCLR(FW_FEATURE_ISERIES) -2: - TRACE_AND_RESTORE_IRQ(r5); + lbz r6,PACASOFTIRQEN(r13) + cmpwi cr1,r5,0 + cmpw cr0,r5,r6 + beq cr0,4f + + /* We do, handle disable first, which is easy */ + bne cr1,3f; + li r0,0 + stb r0,PACASOFTIRQEN(r13); + TRACE_DISABLE_INTS + b 4f - /* extract EE bit and use it to restore paca->hard_enabled */ - ld r3,_MSR(r1) - rldicl r4,r3,49,63 /* r0 = (r3 >> 15) & 1 */ - stb r4,PACAHARDIRQEN(r13) +3: /* + * We are about to soft-enable interrupts (we are hard disabled + * at this point). We check if there's anything that needs to + * be replayed first. + */ + lbz r0,PACAIRQHAPPENED(r13) + cmpwi cr0,r0,0 + bne- restore_check_irq_replay + /* + * Get here when nothing happened while soft-disabled, just + * soft-enable and move-on. We will hard-enable as a side + * effect of rfi + */ +restore_no_replay: + TRACE_ENABLE_INTS + li r0,1 + stb r0,PACASOFTIRQEN(r13); + + /* + * Final return path. BookE is handled in a different file + */ +4: #ifdef CONFIG_PPC_BOOK3E b .exception_return_book3e #else + /* + * Clear the reservation. If we know the CPU tracks the address of + * the reservation then we can potentially save some cycles and use + * a larx. On POWER6 and POWER7 this is significantly faster. + */ +BEGIN_FTR_SECTION + stdcx. r0,0,r1 /* to clear the reservation */ +FTR_SECTION_ELSE + ldarx r4,0,r1 +ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) + + /* + * Some code path such as load_up_fpu or altivec return directly + * here. They run entirely hard disabled and do not alter the + * interrupt state. They also don't use lwarx/stwcx. and thus + * are known not to leave dangling reservations. + */ + .globl fast_exception_return +fast_exception_return: + ld r3,_MSR(r1) ld r4,_CTR(r1) ld r0,_LINK(r1) mtctr r4 @@ -623,28 +663,18 @@ ALT_FW_FTR_SECTION_END_IFCLR(FW_FEATURE_ISERIES) beq- unrecov_restore /* - * Clear the reservation. If we know the CPU tracks the address of - * the reservation then we can potentially save some cycles and use - * a larx. On POWER6 and POWER7 this is significantly faster. - */ -BEGIN_FTR_SECTION - stdcx. r0,0,r1 /* to clear the reservation */ -FTR_SECTION_ELSE - ldarx r4,0,r1 -ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) - - /* * Clear RI before restoring r13. If we are returning to * userspace and we take an exception after restoring r13, * we end up corrupting the userspace r13 value. */ - mfmsr r4 - andc r4,r4,r0 /* r0 contains MSR_RI here */ + ld r4,PACAKMSR(r13) /* Get kernel MSR without EE */ + andc r4,r4,r0 /* r0 contains MSR_RI here */ mtmsrd r4,1 /* * r13 is our per cpu area, only restore it if we are returning to - * userspace + * userspace the value stored in the stack frame may belong to + * another CPU. */ andi. r0,r3,MSR_PR beq 1f @@ -669,30 +699,55 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) #endif /* CONFIG_PPC_BOOK3E */ -.Liseries_check_pending_irqs: -#ifdef CONFIG_PPC_ISERIES - ld r5,SOFTE(r1) - cmpdi 0,r5,0 - beq 2b - /* Check for pending interrupts (iSeries) */ - ld r3,PACALPPACAPTR(r13) - ld r3,LPPACAANYINT(r3) - cmpdi r3,0 - beq+ 2b /* skip do_IRQ if no interrupts */ - - li r3,0 - stb r3,PACASOFTIRQEN(r13) /* ensure we are soft-disabled */ -#ifdef CONFIG_TRACE_IRQFLAGS - bl .trace_hardirqs_off - mfmsr r10 -#endif - ori r10,r10,MSR_EE - mtmsrd r10 /* hard-enable again */ - addi r3,r1,STACK_FRAME_OVERHEAD - bl .do_IRQ - b .ret_from_except_lite /* loop back and handle more */ -#endif + /* + * Something did happen, check if a re-emit is needed + * (this also clears paca->irq_happened) + */ +restore_check_irq_replay: + /* XXX: We could implement a fast path here where we check + * for irq_happened being just 0x01, in which case we can + * clear it and return. That means that we would potentially + * miss a decrementer having wrapped all the way around. + * + * Still, this might be useful for things like hash_page + */ + bl .__check_irq_replay + cmpwi cr0,r3,0 + beq restore_no_replay + + /* + * We need to re-emit an interrupt. We do so by re-using our + * existing exception frame. We first change the trap value, + * but we need to ensure we preserve the low nibble of it + */ + ld r4,_TRAP(r1) + clrldi r4,r4,60 + or r4,r4,r3 + std r4,_TRAP(r1) + /* + * Then find the right handler and call it. Interrupts are + * still soft-disabled and we keep them that way. + */ + cmpwi cr0,r3,0x500 + bne 1f + addi r3,r1,STACK_FRAME_OVERHEAD; + bl .do_IRQ + b .ret_from_except +1: cmpwi cr0,r3,0x900 + bne 1f + addi r3,r1,STACK_FRAME_OVERHEAD; + bl .timer_interrupt + b .ret_from_except +#ifdef CONFIG_PPC_BOOK3E +1: cmpwi cr0,r3,0x280 + bne 1f + addi r3,r1,STACK_FRAME_OVERHEAD; + bl .doorbell_exception + b .ret_from_except +#endif /* CONFIG_PPC_BOOK3E */ +1: b .ret_from_except /* What else to do here ? */ + do_work: #ifdef CONFIG_PREEMPT andi. r0,r3,MSR_PR /* Returning to user mode? */ @@ -705,31 +760,22 @@ do_work: crandc eq,cr1*4+eq,eq bne restore - /* Here we are preempting the current task. - * - * Ensure interrupts are soft-disabled. We also properly mark - * the PACA to reflect the fact that they are hard-disabled - * and trace the change + /* + * Here we are preempting the current task. We want to make + * sure we are soft-disabled first */ - li r0,0 - stb r0,PACASOFTIRQEN(r13) - stb r0,PACAHARDIRQEN(r13) - TRACE_DISABLE_INTS - - /* Call the scheduler with soft IRQs off */ + SOFT_DISABLE_INTS(r3,r4) 1: bl .preempt_schedule_irq /* Hard-disable interrupts again (and update PACA) */ #ifdef CONFIG_PPC_BOOK3E wrteei 0 #else - mfmsr r10 - rldicl r10,r10,48,1 - rotldi r10,r10,16 + ld r10,PACAKMSR(r13) /* Get kernel MSR without EE */ mtmsrd r10,1 #endif /* CONFIG_PPC_BOOK3E */ - li r0,0 - stb r0,PACAHARDIRQEN(r13) + li r0,PACA_IRQ_HARD_DIS + stb r0,PACAIRQHAPPENED(r13) /* Re-test flags and eventually loop */ clrrdi r9,r1,THREAD_SHIFT @@ -751,14 +797,12 @@ user_work: andi. r0,r4,_TIF_NEED_RESCHED beq 1f - li r5,1 - TRACE_AND_RESTORE_IRQ(r5); + bl .restore_interrupts bl .schedule b .ret_from_except_lite 1: bl .save_nvgprs - li r5,1 - TRACE_AND_RESTORE_IRQ(r5); + bl .restore_interrupts addi r3,r1,STACK_FRAME_OVERHEAD bl .do_notify_resume b .ret_from_except diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index 429983c06f9..7215cc2495d 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -24,6 +24,7 @@ #include <asm/ptrace.h> #include <asm/ppc-opcode.h> #include <asm/mmu.h> +#include <asm/hw_irq.h> /* XXX This will ultimately add space for a special exception save * structure used to save things like SRR0/SRR1, SPRGs, MAS, etc... @@ -77,59 +78,55 @@ #define SPRN_MC_SRR1 SPRN_MCSRR1 #define NORMAL_EXCEPTION_PROLOG(n, addition) \ - EXCEPTION_PROLOG(n, GEN, addition##_GEN) + EXCEPTION_PROLOG(n, GEN, addition##_GEN(n)) #define CRIT_EXCEPTION_PROLOG(n, addition) \ - EXCEPTION_PROLOG(n, CRIT, addition##_CRIT) + EXCEPTION_PROLOG(n, CRIT, addition##_CRIT(n)) #define DBG_EXCEPTION_PROLOG(n, addition) \ - EXCEPTION_PROLOG(n, DBG, addition##_DBG) + EXCEPTION_PROLOG(n, DBG, addition##_DBG(n)) #define MC_EXCEPTION_PROLOG(n, addition) \ - EXCEPTION_PROLOG(n, MC, addition##_MC) + EXCEPTION_PROLOG(n, MC, addition##_MC(n)) /* Variants of the "addition" argument for the prolog */ -#define PROLOG_ADDITION_NONE_GEN -#define PROLOG_ADDITION_NONE_CRIT -#define PROLOG_ADDITION_NONE_DBG -#define PROLOG_ADDITION_NONE_MC +#define PROLOG_ADDITION_NONE_GEN(n) +#define PROLOG_ADDITION_NONE_CRIT(n) +#define PROLOG_ADDITION_NONE_DBG(n) +#define PROLOG_ADDITION_NONE_MC(n) -#define PROLOG_ADDITION_MASKABLE_GEN \ +#define PROLOG_ADDITION_MASKABLE_GEN(n) \ lbz r11,PACASOFTIRQEN(r13); /* are irqs soft-disabled ? */ \ cmpwi cr0,r11,0; /* yes -> go out of line */ \ - beq masked_interrupt_book3e; + beq masked_interrupt_book3e_##n -#define PROLOG_ADDITION_2REGS_GEN \ +#define PROLOG_ADDITION_2REGS_GEN(n) \ std r14,PACA_EXGEN+EX_R14(r13); \ std r15,PACA_EXGEN+EX_R15(r13) -#define PROLOG_ADDITION_1REG_GEN \ +#define PROLOG_ADDITION_1REG_GEN(n) \ std r14,PACA_EXGEN+EX_R14(r13); -#define PROLOG_ADDITION_2REGS_CRIT \ +#define PROLOG_ADDITION_2REGS_CRIT(n) \ std r14,PACA_EXCRIT+EX_R14(r13); \ std r15,PACA_EXCRIT+EX_R15(r13) -#define PROLOG_ADDITION_2REGS_DBG \ +#define PROLOG_ADDITION_2REGS_DBG(n) \ std r14,PACA_EXDBG+EX_R14(r13); \ std r15,PACA_EXDBG+EX_R15(r13) -#define PROLOG_ADDITION_2REGS_MC \ +#define PROLOG_ADDITION_2REGS_MC(n) \ std r14,PACA_EXMC+EX_R14(r13); \ std r15,PACA_EXMC+EX_R15(r13) -#define PROLOG_ADDITION_DOORBELL_GEN \ - lbz r11,PACASOFTIRQEN(r13); /* are irqs soft-disabled ? */ \ - cmpwi cr0,r11,0; /* yes -> go out of line */ \ - beq masked_doorbell_book3e - /* Core exception code for all exceptions except TLB misses. * XXX: Needs to make SPRN_SPRG_GEN depend on exception type */ #define EXCEPTION_COMMON(n, excf, ints) \ +exc_##n##_common: \ std r0,GPR0(r1); /* save r0 in stackframe */ \ std r2,GPR2(r1); /* save r2 in stackframe */ \ SAVE_4GPRS(3, r1); /* save r3 - r6 in stackframe */ \ @@ -167,20 +164,21 @@ std r0,RESULT(r1); /* clear regs->result */ \ ints; -/* Variants for the "ints" argument */ +/* Variants for the "ints" argument. This one does nothing when we want + * to keep interrupts in their original state + */ #define INTS_KEEP -#define INTS_DISABLE_SOFT \ - stb r0,PACASOFTIRQEN(r13); /* mark interrupts soft-disabled */ \ - TRACE_DISABLE_INTS; -#define INTS_DISABLE_HARD \ - stb r0,PACAHARDIRQEN(r13); /* and hard disabled */ -#define INTS_DISABLE_ALL \ - INTS_DISABLE_SOFT \ - INTS_DISABLE_HARD - -/* This is called by exceptions that used INTS_KEEP (that is did not clear - * neither soft nor hard IRQ indicators in the PACA. This will restore MSR:EE - * to it's previous value + +/* This second version is meant for exceptions that don't immediately + * hard-enable. We set a bit in paca->irq_happened to ensure that + * a subsequent call to arch_local_irq_restore() will properly + * hard-enable and avoid the fast-path + */ +#define INTS_DISABLE SOFT_DISABLE_INTS(r3,r4) + +/* This is called by exceptions that used INTS_KEEP (that did not touch + * irq indicators in the PACA). This will restore MSR:EE to it's previous + * value * * XXX In the long run, we may want to open-code it in order to separate the * load from the wrtee, thus limiting the latency caused by the dependency @@ -238,7 +236,7 @@ exc_##n##_bad_stack: \ #define MASKABLE_EXCEPTION(trapnum, label, hdlr, ack) \ START_EXCEPTION(label); \ NORMAL_EXCEPTION_PROLOG(trapnum, PROLOG_ADDITION_MASKABLE) \ - EXCEPTION_COMMON(trapnum, PACA_EXGEN, INTS_DISABLE_ALL) \ + EXCEPTION_COMMON(trapnum, PACA_EXGEN, INTS_DISABLE) \ ack(r8); \ CHECK_NAPPING(); \ addi r3,r1,STACK_FRAME_OVERHEAD; \ @@ -289,7 +287,7 @@ interrupt_end_book3e: /* Critical Input Interrupt */ START_EXCEPTION(critical_input); CRIT_EXCEPTION_PROLOG(0x100, PROLOG_ADDITION_NONE) -// EXCEPTION_COMMON(0x100, PACA_EXCRIT, INTS_DISABLE_ALL) +// EXCEPTION_COMMON(0x100, PACA_EXCRIT, INTS_DISABLE) // bl special_reg_save_crit // CHECK_NAPPING(); // addi r3,r1,STACK_FRAME_OVERHEAD @@ -300,7 +298,7 @@ interrupt_end_book3e: /* Machine Check Interrupt */ START_EXCEPTION(machine_check); CRIT_EXCEPTION_PROLOG(0x200, PROLOG_ADDITION_NONE) -// EXCEPTION_COMMON(0x200, PACA_EXMC, INTS_DISABLE_ALL) +// EXCEPTION_COMMON(0x200, PACA_EXMC, INTS_DISABLE) // bl special_reg_save_mc // addi r3,r1,STACK_FRAME_OVERHEAD // CHECK_NAPPING(); @@ -313,7 +311,7 @@ interrupt_end_book3e: NORMAL_EXCEPTION_PROLOG(0x300, PROLOG_ADDITION_2REGS) mfspr r14,SPRN_DEAR mfspr r15,SPRN_ESR - EXCEPTION_COMMON(0x300, PACA_EXGEN, INTS_KEEP) + EXCEPTION_COMMON(0x300, PACA_EXGEN, INTS_DISABLE) b storage_fault_common /* Instruction Storage Interrupt */ @@ -321,7 +319,7 @@ interrupt_end_book3e: NORMAL_EXCEPTION_PROLOG(0x400, PROLOG_ADDITION_2REGS) li r15,0 mr r14,r10 - EXCEPTION_COMMON(0x400, PACA_EXGEN, INTS_KEEP) + EXCEPTION_COMMON(0x400, PACA_EXGEN, INTS_DISABLE) b storage_fault_common /* External Input Interrupt */ @@ -339,12 +337,11 @@ interrupt_end_book3e: START_EXCEPTION(program); NORMAL_EXCEPTION_PROLOG(0x700, PROLOG_ADDITION_1REG) mfspr r14,SPRN_ESR - EXCEPTION_COMMON(0x700, PACA_EXGEN, INTS_DISABLE_SOFT) + EXCEPTION_COMMON(0x700, PACA_EXGEN, INTS_DISABLE) std r14,_DSISR(r1) addi r3,r1,STACK_FRAME_OVERHEAD ld r14,PACA_EXGEN+EX_R14(r13) bl .save_nvgprs - INTS_RESTORE_HARD bl .program_check_exception b .ret_from_except @@ -353,15 +350,16 @@ interrupt_end_book3e: NORMAL_EXCEPTION_PROLOG(0x800, PROLOG_ADDITION_NONE) /* we can probably do a shorter exception entry for that one... */ EXCEPTION_COMMON(0x800, PACA_EXGEN, INTS_KEEP) - bne 1f /* if from user, just load it up */ + ld r12,_MSR(r1) + andi. r0,r12,MSR_PR; + beq- 1f + bl .load_up_fpu + b fast_exception_return +1: INTS_DISABLE bl .save_nvgprs addi r3,r1,STACK_FRAME_OVERHEAD - INTS_RESTORE_HARD bl .kernel_fp_unavailable_exception - BUG_OPCODE -1: ld r12,_MSR(r1) - bl .load_up_fpu - b fast_exception_return + b .ret_from_except /* Decrementer Interrupt */ MASKABLE_EXCEPTION(0x900, decrementer, .timer_interrupt, ACK_DEC) @@ -372,7 +370,7 @@ interrupt_end_book3e: /* Watchdog Timer Interrupt */ START_EXCEPTION(watchdog); CRIT_EXCEPTION_PROLOG(0x9f0, PROLOG_ADDITION_NONE) -// EXCEPTION_COMMON(0x9f0, PACA_EXCRIT, INTS_DISABLE_ALL) +// EXCEPTION_COMMON(0x9f0, PACA_EXCRIT, INTS_DISABLE) // bl special_reg_save_crit // CHECK_NAPPING(); // addi r3,r1,STACK_FRAME_OVERHEAD @@ -391,10 +389,9 @@ interrupt_end_book3e: /* Auxiliary Processor Unavailable Interrupt */ START_EXCEPTION(ap_unavailable); NORMAL_EXCEPTION_PROLOG(0xf20, PROLOG_ADDITION_NONE) - EXCEPTION_COMMON(0xf20, PACA_EXGEN, INTS_KEEP) - addi r3,r1,STACK_FRAME_OVERHEAD + EXCEPTION_COMMON(0xf20, PACA_EXGEN, INTS_DISABLE) bl .save_nvgprs - INTS_RESTORE_HARD + addi r3,r1,STACK_FRAME_OVERHEAD bl .unknown_exception b .ret_from_except @@ -450,7 +447,7 @@ interrupt_end_book3e: mfspr r15,SPRN_SPRG_CRIT_SCRATCH mtspr SPRN_SPRG_GEN_SCRATCH,r15 mfspr r14,SPRN_DBSR - EXCEPTION_COMMON(0xd00, PACA_EXCRIT, INTS_DISABLE_ALL) + EXCEPTION_COMMON(0xd00, PACA_EXCRIT, INTS_DISABLE) std r14,_DSISR(r1) addi r3,r1,STACK_FRAME_OVERHEAD mr r4,r14 @@ -465,7 +462,7 @@ kernel_dbg_exc: /* Debug exception as a debug interrupt*/ START_EXCEPTION(debug_debug); - DBG_EXCEPTION_PROLOG(0xd00, PROLOG_ADDITION_2REGS) + DBG_EXCEPTION_PROLOG(0xd08, PROLOG_ADDITION_2REGS) /* * If there is a single step or branch-taken exception in an @@ -515,7 +512,7 @@ kernel_dbg_exc: mfspr r15,SPRN_SPRG_DBG_SCRATCH mtspr SPRN_SPRG_GEN_SCRATCH,r15 mfspr r14,SPRN_DBSR - EXCEPTION_COMMON(0xd00, PACA_EXDBG, INTS_DISABLE_ALL) + EXCEPTION_COMMON(0xd08, PACA_EXDBG, INTS_DISABLE) std r14,_DSISR(r1) addi r3,r1,STACK_FRAME_OVERHEAD mr r4,r14 @@ -525,21 +522,20 @@ kernel_dbg_exc: bl .DebugException b .ret_from_except - MASKABLE_EXCEPTION(0x260, perfmon, .performance_monitor_exception, ACK_NONE) - -/* Doorbell interrupt */ - START_EXCEPTION(doorbell) - NORMAL_EXCEPTION_PROLOG(0x2070, PROLOG_ADDITION_DOORBELL) - EXCEPTION_COMMON(0x2070, PACA_EXGEN, INTS_DISABLE_ALL) - CHECK_NAPPING() + START_EXCEPTION(perfmon); + NORMAL_EXCEPTION_PROLOG(0x260, PROLOG_ADDITION_NONE) + EXCEPTION_COMMON(0x260, PACA_EXGEN, INTS_DISABLE) addi r3,r1,STACK_FRAME_OVERHEAD - bl .doorbell_exception + bl .performance_monitor_exception b .ret_from_except_lite +/* Doorbell interrupt */ + MASKABLE_EXCEPTION(0x280, doorbell, .doorbell_exception, ACK_NONE) + /* Doorbell critical Interrupt */ START_EXCEPTION(doorbell_crit); - CRIT_EXCEPTION_PROLOG(0x2080, PROLOG_ADDITION_NONE) -// EXCEPTION_COMMON(0x2080, PACA_EXCRIT, INTS_DISABLE_ALL) + CRIT_EXCEPTION_PROLOG(0x2a0, PROLOG_ADDITION_NONE) +// EXCEPTION_COMMON(0x2a0, PACA_EXCRIT, INTS_DISABLE) // bl special_reg_save_crit // CHECK_NAPPING(); // addi r3,r1,STACK_FRAME_OVERHEAD @@ -547,36 +543,114 @@ kernel_dbg_exc: // b ret_from_crit_except b . +/* Guest Doorbell */ MASKABLE_EXCEPTION(0x2c0, guest_doorbell, .unknown_exception, ACK_NONE) - MASKABLE_EXCEPTION(0x2e0, guest_doorbell_crit, .unknown_exception, ACK_NONE) - MASKABLE_EXCEPTION(0x310, hypercall, .unknown_exception, ACK_NONE) - MASKABLE_EXCEPTION(0x320, ehpriv, .unknown_exception, ACK_NONE) +/* Guest Doorbell critical Interrupt */ + START_EXCEPTION(guest_doorbell_crit); + CRIT_EXCEPTION_PROLOG(0x2e0, PROLOG_ADDITION_NONE) +// EXCEPTION_COMMON(0x2e0, PACA_EXCRIT, INTS_DISABLE) +// bl special_reg_save_crit +// CHECK_NAPPING(); +// addi r3,r1,STACK_FRAME_OVERHEAD +// bl .guest_doorbell_critical_exception +// b ret_from_crit_except + b . + +/* Hypervisor call */ + START_EXCEPTION(hypercall); + NORMAL_EXCEPTION_PROLOG(0x310, PROLOG_ADDITION_NONE) + EXCEPTION_COMMON(0x310, PACA_EXGEN, INTS_KEEP) + addi r3,r1,STACK_FRAME_OVERHEAD + bl .save_nvgprs + INTS_RESTORE_HARD + bl .unknown_exception + b .ret_from_except + +/* Embedded Hypervisor priviledged */ + START_EXCEPTION(ehpriv); + NORMAL_EXCEPTION_PROLOG(0x320, PROLOG_ADDITION_NONE) + EXCEPTION_COMMON(0x320, PACA_EXGEN, INTS_KEEP) + addi r3,r1,STACK_FRAME_OVERHEAD + bl .save_nvgprs + INTS_RESTORE_HARD + bl .unknown_exception + b .ret_from_except /* - * An interrupt came in while soft-disabled; clear EE in SRR1, - * clear paca->hard_enabled and return. + * An interrupt came in while soft-disabled; We mark paca->irq_happened + * accordingly and if the interrupt is level sensitive, we hard disable */ -masked_doorbell_book3e: - mtcr r10 - /* Resend the doorbell to fire again when ints enabled */ - mfspr r10,SPRN_PIR - PPC_MSGSND(r10) - b masked_interrupt_book3e_common -masked_interrupt_book3e: +masked_interrupt_book3e_0x500: + /* XXX When adding support for EPR, use PACA_IRQ_EE_EDGE */ + li r11,PACA_IRQ_EE + b masked_interrupt_book3e_full_mask + +masked_interrupt_book3e_0x900: + ACK_DEC(r11); + li r11,PACA_IRQ_DEC + b masked_interrupt_book3e_no_mask +masked_interrupt_book3e_0x980: + ACK_FIT(r11); + li r11,PACA_IRQ_DEC + b masked_interrupt_book3e_no_mask +masked_interrupt_book3e_0x280: +masked_interrupt_book3e_0x2c0: + li r11,PACA_IRQ_DBELL + b masked_interrupt_book3e_no_mask + +masked_interrupt_book3e_no_mask: + mtcr r10 + lbz r10,PACAIRQHAPPENED(r13) + or r10,r10,r11 + stb r10,PACAIRQHAPPENED(r13) + b 1f +masked_interrupt_book3e_full_mask: mtcr r10 -masked_interrupt_book3e_common: - stb r11,PACAHARDIRQEN(r13) + lbz r10,PACAIRQHAPPENED(r13) + or r10,r10,r11 + stb r10,PACAIRQHAPPENED(r13) mfspr r10,SPRN_SRR1 rldicl r11,r10,48,1 /* clear MSR_EE */ rotldi r10,r11,16 mtspr SPRN_SRR1,r10 - ld r10,PACA_EXGEN+EX_R10(r13); /* restore registers */ +1: ld r10,PACA_EXGEN+EX_R10(r13); ld r11,PACA_EXGEN+EX_R11(r13); mfspr r13,SPRN_SPRG_GEN_SCRATCH; rfi b . +/* + * Called from arch_local_irq_enable when an interrupt needs + * to be resent. r3 contains either 0x500,0x900,0x260 or 0x280 + * to indicate the kind of interrupt. MSR:EE is already off. + * We generate a stackframe like if a real interrupt had happened. + * + * Note: While MSR:EE is off, we need to make sure that _MSR + * in the generated frame has EE set to 1 or the exception + * handler will not properly re-enable them. + */ +_GLOBAL(__replay_interrupt) + /* We are going to jump to the exception common code which + * will retrieve various register values from the PACA which + * we don't give a damn about. + */ + mflr r10 + mfmsr r11 + mfcr r4 + mtspr SPRN_SPRG_GEN_SCRATCH,r13; + std r1,PACA_EXGEN+EX_R1(r13); + stw r4,PACA_EXGEN+EX_CR(r13); + ori r11,r11,MSR_EE + subi r1,r1,INT_FRAME_SIZE; + cmpwi cr0,r3,0x500 + beq exc_0x500_common + cmpwi cr0,r3,0x900 + beq exc_0x900_common + cmpwi cr0,r3,0x280 + beq exc_0x280_common + blr + /* * This is called from 0x300 and 0x400 handlers after the prologs with @@ -591,7 +665,6 @@ storage_fault_common: mr r5,r15 ld r14,PACA_EXGEN+EX_R14(r13) ld r15,PACA_EXGEN+EX_R15(r13) - INTS_RESTORE_HARD bl .do_page_fault cmpdi r3,0 bne- 1f @@ -680,6 +753,8 @@ BAD_STACK_TRAMPOLINE(0x000) BAD_STACK_TRAMPOLINE(0x100) BAD_STACK_TRAMPOLINE(0x200) BAD_STACK_TRAMPOLINE(0x260) +BAD_STACK_TRAMPOLINE(0x280) +BAD_STACK_TRAMPOLINE(0x2a0) BAD_STACK_TRAMPOLINE(0x2c0) BAD_STACK_TRAMPOLINE(0x2e0) BAD_STACK_TRAMPOLINE(0x300) @@ -697,11 +772,10 @@ BAD_STACK_TRAMPOLINE(0xa00) BAD_STACK_TRAMPOLINE(0xb00) BAD_STACK_TRAMPOLINE(0xc00) BAD_STACK_TRAMPOLINE(0xd00) +BAD_STACK_TRAMPOLINE(0xd08) BAD_STACK_TRAMPOLINE(0xe00) BAD_STACK_TRAMPOLINE(0xf00) BAD_STACK_TRAMPOLINE(0xf20) -BAD_STACK_TRAMPOLINE(0x2070) -BAD_STACK_TRAMPOLINE(0x2080) .globl bad_stack_book3e bad_stack_book3e: diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 15c5a4f6de0..2d0868a4e2f 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -12,6 +12,7 @@ * */ +#include <asm/hw_irq.h> #include <asm/exception-64s.h> #include <asm/ptrace.h> @@ -19,7 +20,7 @@ * We layout physical memory as follows: * 0x0000 - 0x00ff : Secondary processor spin code * 0x0100 - 0x2fff : pSeries Interrupt prologs - * 0x3000 - 0x5fff : interrupt support, iSeries and common interrupt prologs + * 0x3000 - 0x5fff : interrupt support common interrupt prologs * 0x6000 - 0x6fff : Initial (CPU0) segment table * 0x7000 - 0x7fff : FWNMI data area * 0x8000 - : Early init and support code @@ -356,34 +357,60 @@ do_stab_bolted_pSeries: KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xf40) /* - * An interrupt came in while soft-disabled; clear EE in SRR1, - * clear paca->hard_enabled and return. + * An interrupt came in while soft-disabled. We set paca->irq_happened, + * then, if it was a decrementer interrupt, we bump the dec to max and + * and return, else we hard disable and return. This is called with + * r10 containing the value to OR to the paca field. */ -masked_interrupt: - stb r10,PACAHARDIRQEN(r13) - mtcrf 0x80,r9 - ld r9,PACA_EXGEN+EX_R9(r13) - mfspr r10,SPRN_SRR1 - rldicl r10,r10,48,1 /* clear MSR_EE */ - rotldi r10,r10,16 - mtspr SPRN_SRR1,r10 - ld r10,PACA_EXGEN+EX_R10(r13) - GET_SCRATCH0(r13) - rfid +#define MASKED_INTERRUPT(_H) \ +masked_##_H##interrupt: \ + std r11,PACA_EXGEN+EX_R11(r13); \ + lbz r11,PACAIRQHAPPENED(r13); \ + or r11,r11,r10; \ + stb r11,PACAIRQHAPPENED(r13); \ + andi. r10,r10,PACA_IRQ_DEC; \ + beq 1f; \ + lis r10,0x7fff; \ + ori r10,r10,0xffff; \ + mtspr SPRN_DEC,r10; \ + b 2f; \ +1: mfspr r10,SPRN_##_H##SRR1; \ + rldicl r10,r10,48,1; /* clear MSR_EE */ \ + rotldi r10,r10,16; \ + mtspr SPRN_##_H##SRR1,r10; \ +2: mtcrf 0x80,r9; \ + ld r9,PACA_EXGEN+EX_R9(r13); \ + ld r10,PACA_EXGEN+EX_R10(r13); \ + ld r11,PACA_EXGEN+EX_R11(r13); \ + GET_SCRATCH0(r13); \ + ##_H##rfid; \ b . + + MASKED_INTERRUPT() + MASKED_INTERRUPT(H) -masked_Hinterrupt: - stb r10,PACAHARDIRQEN(r13) - mtcrf 0x80,r9 - ld r9,PACA_EXGEN+EX_R9(r13) - mfspr r10,SPRN_HSRR1 - rldicl r10,r10,48,1 /* clear MSR_EE */ - rotldi r10,r10,16 - mtspr SPRN_HSRR1,r10 - ld r10,PACA_EXGEN+EX_R10(r13) - GET_SCRATCH0(r13) - hrfid - b . +/* + * Called from arch_local_irq_enable when an interrupt needs + * to be resent. r3 contains 0x500 or 0x900 to indicate which + * kind of interrupt. MSR:EE is already off. We generate a + * stackframe like if a real interrupt had happened. + * + * Note: While MSR:EE is off, we need to make sure that _MSR + * in the generated frame has EE set to 1 or the exception + * handler will not properly re-enable them. + */ +_GLOBAL(__replay_interrupt) + /* We are going to jump to the exception common code which + * will retrieve various register values from the PACA which + * we don't give a damn about, so we don't bother storing them. + */ + mfmsr r12 + mflr r11 + mfcr r9 + ori r12,r12,MSR_EE + andi. r3,r3,0x0800 + bne decrementer_common + b hardware_interrupt_common #ifdef CONFIG_PPC_PSERIES /* @@ -458,14 +485,15 @@ machine_check_common: bl .machine_check_exception b .ret_from_except - STD_EXCEPTION_COMMON_LITE(0x900, decrementer, .timer_interrupt) + STD_EXCEPTION_COMMON_ASYNC(0x500, hardware_interrupt, do_IRQ) + STD_EXCEPTION_COMMON_ASYNC(0x900, decrementer, .timer_interrupt) STD_EXCEPTION_COMMON(0xa00, trap_0a, .unknown_exception) STD_EXCEPTION_COMMON(0xb00, trap_0b, .unknown_exception) STD_EXCEPTION_COMMON(0xd00, single_step, .single_step_exception) STD_EXCEPTION_COMMON(0xe00, trap_0e, .unknown_exception) STD_EXCEPTION_COMMON(0xe40, emulation_assist, .program_check_exception) STD_EXCEPTION_COMMON(0xe60, hmi_exception, .unknown_exception) - STD_EXCEPTION_COMMON_IDLE(0xf00, performance_monitor, .performance_monitor_exception) + STD_EXCEPTION_COMMON_ASYNC(0xf00, performance_monitor, .performance_monitor_exception) STD_EXCEPTION_COMMON(0x1300, instruction_breakpoint, .instruction_breakpoint_exception) #ifdef CONFIG_ALTIVEC STD_EXCEPTION_COMMON(0x1700, altivec_assist, .altivec_assist_exception) @@ -482,6 +510,9 @@ machine_check_common: system_call_entry: b system_call_common +ppc64_runlatch_on_trampoline: + b .__ppc64_runlatch_on + /* * Here we have detected that the kernel stack pointer is bad. * R9 contains the saved CR, r13 points to the paca, @@ -555,6 +586,8 @@ data_access_common: mfspr r10,SPRN_DSISR stw r10,PACA_EXGEN+EX_DSISR(r13) EXCEPTION_PROLOG_COMMON(0x300, PACA_EXGEN) + DISABLE_INTS + ld r12,_MSR(r1) ld r3,PACA_EXGEN+EX_DAR(r13) lwz r4,PACA_EXGEN+EX_DSISR(r13) li r5,0x300 @@ -569,6 +602,7 @@ h_data_storage_common: stw r10,PACA_EXGEN+EX_DSISR(r13) EXCEPTION_PROLOG_COMMON(0xe00, PACA_EXGEN) bl .save_nvgprs + DISABLE_INTS addi r3,r1,STACK_FRAME_OVERHEAD bl .unknown_exception b .ret_from_except @@ -577,6 +611,8 @@ h_data_storage_common: .globl instruction_access_common instruction_access_common: EXCEPTION_PROLOG_COMMON(0x400, PACA_EXGEN) + DISABLE_INTS + ld r12,_MSR(r1) ld r3,_NIP(r1) andis. r4,r12,0x5820 li r5,0x400 @@ -672,12 +708,6 @@ _GLOBAL(slb_miss_realmode) ld r10,PACA_EXSLB+EX_LR(r13) ld r3,PACA_EXSLB+EX_R3(r13) lwz r9,PACA_EXSLB+EX_CCR(r13) /* get saved CR */ -#ifdef CONFIG_PPC_ISERIES -BEGIN_FW_FTR_SECTION - ld r11,PACALPPACAPTR(r13) - ld r11,LPPACASRR0(r11) /* get SRR0 value */ -END_FW_FTR_SECTION_IFSET(FW_FEATURE_ISERIES) -#endif /* CONFIG_PPC_ISERIES */ mtlr r10 @@ -690,12 +720,6 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_ISERIES) mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */ .machine pop -#ifdef CONFIG_PPC_ISERIES -BEGIN_FW_FTR_SECTION - mtspr SPRN_SRR0,r11 - mtspr SPRN_SRR1,r12 -END_FW_FTR_SECTION_IFSET(FW_FEATURE_ISERIES) -#endif /* CONFIG_PPC_ISERIES */ ld r9,PACA_EXSLB+EX_R9(r13) ld r10,PACA_EXSLB+EX_R10(r13) ld r11,PACA_EXSLB+EX_R11(r13) @@ -704,13 +728,7 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_ISERIES) rfid b . /* prevent speculative execution */ -2: -#ifdef CONFIG_PPC_ISERIES -BEGIN_FW_FTR_SECTION - b unrecov_slb -END_FW_FTR_SECTION_IFSET(FW_FEATURE_ISERIES) -#endif /* CONFIG_PPC_ISERIES */ - mfspr r11,SPRN_SRR0 +2: mfspr r11,SPRN_SRR0 ld r10,PACAKBASE(r13) LOAD_HANDLER(r10,unrecov_slb) mtspr SPRN_SRR0,r10 @@ -727,20 +745,6 @@ unrecov_slb: bl .unrecoverable_exception b 1b - .align 7 - .globl hardware_interrupt_common - .globl hardware_interrupt_entry -hardware_interrupt_common: - EXCEPTION_PROLOG_COMMON(0x500, PACA_EXGEN) - FINISH_NAP -hardware_interrupt_entry: - DISABLE_INTS -BEGIN_FTR_SECTION - bl .ppc64_runlatch_on -END_FTR_SECTION_IFSET(CPU_FTR_CTRL) - addi r3,r1,STACK_FRAME_OVERHEAD - bl .do_IRQ - b .ret_from_except_lite #ifdef CONFIG_PPC_970_NAP power4_fixup_nap: @@ -785,8 +789,8 @@ fp_unavailable_common: EXCEPTION_PROLOG_COMMON(0x800, PACA_EXGEN) bne 1f /* if from user, just load it up */ bl .save_nvgprs + DISABLE_INTS addi r3,r1,STACK_FRAME_OVERHEAD - ENABLE_INTS bl .kernel_fp_unavailable_exception BUG_OPCODE 1: bl .load_up_fpu @@ -805,8 +809,8 @@ BEGIN_FTR_SECTION END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) #endif bl .save_nvgprs + DISABLE_INTS addi r3,r1,STACK_FRAME_OVERHEAD - ENABLE_INTS bl .altivec_unavailable_exception b .ret_from_except @@ -816,13 +820,14 @@ vsx_unavailable_common: EXCEPTION_PROLOG_COMMON(0xf40, PACA_EXGEN) #ifdef CONFIG_VSX BEGIN_FTR_SECTION - bne .load_up_vsx + beq 1f + b .load_up_vsx 1: END_FTR_SECTION_IFSET(CPU_FTR_VSX) #endif bl .save_nvgprs + DISABLE_INTS addi r3,r1,STACK_FRAME_OVERHEAD - ENABLE_INTS bl .vsx_unavailable_exception b .ret_from_except @@ -831,66 +836,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX) __end_handlers: /* - * Return from an exception with minimal checks. - * The caller is assumed to have done EXCEPTION_PROLOG_COMMON. - * If interrupts have been enabled, or anything has been - * done that might have changed the scheduling status of - * any task or sent any task a signal, you should use - * ret_from_except or ret_from_except_lite instead of this. - */ -fast_exc_return_irq: /* restores irq state too */ - ld r3,SOFTE(r1) - TRACE_AND_RESTORE_IRQ(r3); - ld r12,_MSR(r1) - rldicl r4,r12,49,63 /* get MSR_EE to LSB */ - stb r4,PACAHARDIRQEN(r13) /* restore paca->hard_enabled */ - b 1f - - .globl fast_exception_return -fast_exception_return: - ld r12,_MSR(r1) -1: ld r11,_NIP(r1) - andi. r3,r12,MSR_RI /* check if RI is set */ - beq- unrecov_fer - -#ifdef CONFIG_VIRT_CPU_ACCOUNTING - andi. r3,r12,MSR_PR - beq 2f - ACCOUNT_CPU_USER_EXIT(r3, r4) -2: -#endif - - ld r3,_CCR(r1) - ld r4,_LINK(r1) - ld r5,_CTR(r1) - ld r6,_XER(r1) - mtcr r3 - mtlr r4 - mtctr r5 - mtxer r6 - REST_GPR(0, r1) - REST_8GPRS(2, r1) - - mfmsr r10 - rldicl r10,r10,48,1 /* clear EE */ - rldicr r10,r10,16,61 /* clear RI (LE is 0 already) */ - mtmsrd r10,1 - - mtspr SPRN_SRR1,r12 - mtspr SPRN_SRR0,r11 - REST_4GPRS(10, r1) - ld r1,GPR1(r1) - rfid - b . /* prevent speculative execution */ - -unrecov_fer: - bl .save_nvgprs -1: addi r3,r1,STACK_FRAME_OVERHEAD - bl .unrecoverable_exception - b 1b - - -/* * Hash table stuff */ .align 7 @@ -912,28 +857,6 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_SLB) lwz r0,TI_PREEMPT(r11) /* If we're in an "NMI" */ andis. r0,r0,NMI_MASK@h /* (i.e. an irq when soft-disabled) */ bne 77f /* then don't call hash_page now */ - - /* - * On iSeries, we soft-disable interrupts here, then - * hard-enable interrupts so that the hash_page code can spin on - * the hash_table_lock without problems on a shared processor. - */ - DISABLE_INTS - - /* - * Currently, trace_hardirqs_off() will be called by DISABLE_INTS - * and will clobber volatile registers when irq tracing is enabled - * so we need to reload them. It may be possible to be smarter here - * and move the irq tracing elsewhere but let's keep it simple for - * now - */ -#ifdef CONFIG_TRACE_IRQFLAGS - ld r3,_DAR(r1) - ld r4,_DSISR(r1) - ld r5,_TRAP(r1) - ld r12,_MSR(r1) - clrrdi r5,r5,4 -#endif /* CONFIG_TRACE_IRQFLAGS */ /* * We need to set the _PAGE_USER bit if MSR_PR is set or if we are * accessing a userspace segment (even from the kernel). We assume @@ -951,62 +874,25 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_SLB) * r4 contains the required access permissions * r5 contains the trap number * - * at return r3 = 0 for success + * at return r3 = 0 for success, 1 for page fault, negative for error */ bl .hash_page /* build HPTE if possible */ cmpdi r3,0 /* see if hash_page succeeded */ -BEGIN_FW_FTR_SECTION - /* - * If we had interrupts soft-enabled at the point where the - * DSI/ISI occurred, and an interrupt came in during hash_page, - * handle it now. - * We jump to ret_from_except_lite rather than fast_exception_return - * because ret_from_except_lite will check for and handle pending - * interrupts if necessary. - */ - beq 13f -END_FW_FTR_SECTION_IFSET(FW_FEATURE_ISERIES) - -BEGIN_FW_FTR_SECTION - /* - * Here we have interrupts hard-disabled, so it is sufficient - * to restore paca->{soft,hard}_enable and get out. - */ + /* Success */ beq fast_exc_return_irq /* Return from exception on success */ -END_FW_FTR_SECTION_IFCLR(FW_FEATURE_ISERIES) - - /* For a hash failure, we don't bother re-enabling interrupts */ - ble- 12f - - /* - * hash_page couldn't handle it, set soft interrupt enable back - * to what it was before the trap. Note that .arch_local_irq_restore - * handles any interrupts pending at this point. - */ - ld r3,SOFTE(r1) - TRACE_AND_RESTORE_IRQ_PARTIAL(r3, 11f) - bl .arch_local_irq_restore - b 11f -/* We have a data breakpoint exception - handle it */ -handle_dabr_fault: - bl .save_nvgprs - ld r4,_DAR(r1) - ld r5,_DSISR(r1) - addi r3,r1,STACK_FRAME_OVERHEAD - bl .do_dabr - b .ret_from_except_lite + /* Error */ + blt- 13f /* Here we have a page fault that hash_page can't handle. */ handle_page_fault: - ENABLE_INTS 11: ld r4,_DAR(r1) ld r5,_DSISR(r1) addi r3,r1,STACK_FRAME_OVERHEAD bl .do_page_fault cmpdi r3,0 - beq+ 13f + beq+ 12f bl .save_nvgprs mr r5,r3 addi r3,r1,STACK_FRAME_OVERHEAD @@ -1014,12 +900,20 @@ handle_page_fault: bl .bad_page_fault b .ret_from_except -13: b .ret_from_except_lite +/* We have a data breakpoint exception - handle it */ +handle_dabr_fault: + bl .save_nvgprs + ld r4,_DAR(r1) + ld r5,_DSISR(r1) + addi r3,r1,STACK_FRAME_OVERHEAD + bl .do_dabr +12: b .ret_from_except_lite + /* We have a page fault that hash_page could handle but HV refused * the PTE insertion */ -12: bl .save_nvgprs +13: bl .save_nvgprs mr r5,r3 addi r3,r1,STACK_FRAME_OVERHEAD ld r4,_DAR(r1) @@ -1141,51 +1035,19 @@ _GLOBAL(do_stab_bolted) .= 0x7000 .globl fwnmi_data_area fwnmi_data_area: -#endif /* defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) */ - /* iSeries does not use the FWNMI stuff, so it is safe to put - * this here, even if we later allow kernels that will boot on - * both pSeries and iSeries */ -#ifdef CONFIG_PPC_ISERIES - . = LPARMAP_PHYS - .globl xLparMap -xLparMap: - .quad HvEsidsToMap /* xNumberEsids */ - .quad HvRangesToMap /* xNumberRanges */ - .quad STAB0_PAGE /* xSegmentTableOffs */ - .zero 40 /* xRsvd */ - /* xEsids (HvEsidsToMap entries of 2 quads) */ - .quad PAGE_OFFSET_ESID /* xKernelEsid */ - .quad PAGE_OFFSET_VSID /* xKernelVsid */ - .quad VMALLOC_START_ESID /* xKernelEsid */ - .quad VMALLOC_START_VSID /* xKernelVsid */ - /* xRanges (HvRangesToMap entries of 3 quads) */ - .quad HvPagesToMap /* xPages */ - .quad 0 /* xOffset */ - .quad PAGE_OFFSET_VSID << (SID_SHIFT - HW_PAGE_SHIFT) /* xVPN */ - -#endif /* CONFIG_PPC_ISERIES */ - -#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) /* pseries and powernv need to keep the whole page from * 0x7000 to 0x8000 free for use by the firmware */ . = 0x8000 #endif /* defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) */ -/* - * Space for CPU0's segment table. - * - * On iSeries, the hypervisor must fill in at least one entry before - * we get control (with relocate on). The address is given to the hv - * as a page number (see xLparMap above), so this must be at a - * fixed address (the linker can't compute (u64)&initial_stab >> - * PAGE_SHIFT). - */ - . = STAB0_OFFSET /* 0x8000 */ +/* Space for CPU0's segment table */ + .balign 4096 .globl initial_stab initial_stab: .space 4096 + #ifdef CONFIG_PPC_POWERNV _GLOBAL(opal_mc_secondary_handler) HMT_MEDIUM diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S index 0654dba2c1f..dc0488b6f6e 100644 --- a/arch/powerpc/kernel/head_32.S +++ b/arch/powerpc/kernel/head_32.S @@ -395,7 +395,7 @@ DataAccess: bl hash_page 1: lwz r5,_DSISR(r11) /* get DSISR value */ mfspr r4,SPRN_DAR - EXC_XFER_EE_LITE(0x300, handle_page_fault) + EXC_XFER_LITE(0x300, handle_page_fault) /* Instruction access exception. */ @@ -410,7 +410,7 @@ InstructionAccess: bl hash_page 1: mr r4,r12 mr r5,r9 - EXC_XFER_EE_LITE(0x400, handle_page_fault) + EXC_XFER_LITE(0x400, handle_page_fault) /* External interrupt */ EXCEPTION(0x500, HardwareInterrupt, do_IRQ, EXC_XFER_LITE) diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S index 872a6af83ba..4989661b710 100644 --- a/arch/powerpc/kernel/head_40x.S +++ b/arch/powerpc/kernel/head_40x.S @@ -394,7 +394,7 @@ label: NORMAL_EXCEPTION_PROLOG mr r4,r12 /* Pass SRR0 as arg2 */ li r5,0 /* Pass zero as arg3 */ - EXC_XFER_EE_LITE(0x400, handle_page_fault) + EXC_XFER_LITE(0x400, handle_page_fault) /* 0x0500 - External Interrupt Exception */ EXCEPTION(0x0500, HardwareInterrupt, do_IRQ, EXC_XFER_LITE) @@ -747,7 +747,7 @@ DataAccess: mfspr r5,SPRN_ESR /* Grab the ESR, save it, pass arg3 */ stw r5,_ESR(r11) mfspr r4,SPRN_DEAR /* Grab the DEAR, save it, pass arg2 */ - EXC_XFER_EE_LITE(0x300, handle_page_fault) + EXC_XFER_LITE(0x300, handle_page_fault) /* Other PowerPC processors, namely those derived from the 6xx-series * have vectors from 0x2100 through 0x2F00 defined, but marked as reserved. diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index 06c7251c1bf..58bddee8e1e 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -32,13 +32,13 @@ #include <asm/cputable.h> #include <asm/setup.h> #include <asm/hvcall.h> -#include <asm/iseries/lpar_map.h> #include <asm/thread_info.h> #include <asm/firmware.h> #include <asm/page_64.h> #include <asm/irqflags.h> #include <asm/kvm_book3s_asm.h> #include <asm/ptrace.h> +#include <asm/hw_irq.h> /* The physical memory is laid out such that the secondary processor * spin code sits at 0x0000...0x00ff. On server, the vectors follow @@ -57,10 +57,6 @@ * entry in r9 for debugging purposes * 2. Secondary processors enter at 0x60 with PIR in gpr3 * - * For iSeries: - * 1. The MMU is on (as it always is for iSeries) - * 2. The kernel is entered at system_reset_iSeries - * * For Book3E processors: * 1. The MMU is on running in AS0 in a state defined in ePAPR * 2. The kernel is entered at __start @@ -93,15 +89,6 @@ __secondary_hold_spinloop: __secondary_hold_acknowledge: .llong 0x0 -#ifdef CONFIG_PPC_ISERIES - /* - * At offset 0x20, there is a pointer to iSeries LPAR data. - * This is required by the hypervisor - */ - . = 0x20 - .llong hvReleaseData-KERNELBASE -#endif /* CONFIG_PPC_ISERIES */ - #ifdef CONFIG_RELOCATABLE /* This flag is set to 1 by a loader if the kernel should run * at the loaded address instead of the linked address. This @@ -564,7 +551,8 @@ _GLOBAL(pmac_secondary_start) */ li r0,0 stb r0,PACASOFTIRQEN(r13) - stb r0,PACAHARDIRQEN(r13) + li r0,PACA_IRQ_HARD_DIS + stb r0,PACAIRQHAPPENED(r13) /* Create a temp kernel stack for use before relocation is on. */ ld r1,PACAEMERGSP(r13) @@ -582,7 +570,7 @@ _GLOBAL(pmac_secondary_start) * 1. Processor number * 2. Segment table pointer (virtual address) * On entry the following are set: - * r1 = stack pointer. vaddr for iSeries, raddr (temp stack) for pSeries + * r1 = stack pointer (real addr of temp stack) * r24 = cpu# (in Linux terms) * r13 = paca virtual address * SPRG_PACA = paca virtual address @@ -595,7 +583,7 @@ __secondary_start: /* Set thread priority to MEDIUM */ HMT_MEDIUM - /* Initialize the kernel stack. Just a repeat for iSeries. */ + /* Initialize the kernel stack */ LOAD_REG_ADDR(r3, current_set) sldi r28,r24,3 /* get current_set[cpu#] */ ldx r14,r3,r28 @@ -615,20 +603,16 @@ __secondary_start: li r7,0 mtlr r7 + /* Mark interrupts soft and hard disabled (they might be enabled + * in the PACA when doing hotplug) + */ + stb r7,PACASOFTIRQEN(r13) + li r0,PACA_IRQ_HARD_DIS + stb r0,PACAIRQHAPPENED(r13) + /* enable MMU and jump to start_secondary */ LOAD_REG_ADDR(r3, .start_secondary_prolog) LOAD_REG_IMMEDIATE(r4, MSR_KERNEL) -#ifdef CONFIG_PPC_ISERIES -BEGIN_FW_FTR_SECTION - ori r4,r4,MSR_EE - li r8,1 - stb r8,PACAHARDIRQEN(r13) -END_FW_FTR_SECTION_IFSET(FW_FEATURE_ISERIES) -#endif -BEGIN_FW_FTR_SECTION - stb r7,PACAHARDIRQEN(r13) -END_FW_FTR_SECTION_IFCLR(FW_FEATURE_ISERIES) - stb r7,PACASOFTIRQEN(r13) mtspr SPRN_SRR0,r3 mtspr SPRN_SRR1,r4 @@ -771,22 +755,18 @@ _INIT_GLOBAL(start_here_common) /* Load the TOC (virtual address) */ ld r2,PACATOC(r13) + /* Do more system initializations in virtual mode */ bl .setup_system - /* Load up the kernel context */ -5: - li r5,0 - stb r5,PACASOFTIRQEN(r13) /* Soft Disabled */ -#ifdef CONFIG_PPC_ISERIES -BEGIN_FW_FTR_SECTION - mfmsr r5 - ori r5,r5,MSR_EE /* Hard Enabled on iSeries*/ - mtmsrd r5 - li r5,1 -END_FW_FTR_SECTION_IFSET(FW_FEATURE_ISERIES) -#endif - stb r5,PACAHARDIRQEN(r13) /* Hard Disabled on others */ + /* Mark interrupts soft and hard disabled (they might be enabled + * in the PACA when doing hotplug) + */ + li r0,0 + stb r0,PACASOFTIRQEN(r13) + li r0,PACA_IRQ_HARD_DIS + stb r0,PACAIRQHAPPENED(r13) + /* Generic kernel entry */ bl .start_kernel /* Not reached */ diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index b68cb173ba2..b2a5860accf 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -220,7 +220,7 @@ DataAccess: mfspr r4,SPRN_DAR li r10,0x00f0 mtspr SPRN_DAR,r10 /* Tag DAR, to be used in DTLB Error */ - EXC_XFER_EE_LITE(0x300, handle_page_fault) + EXC_XFER_LITE(0x300, handle_page_fault) /* Instruction access exception. * This is "never generated" by the MPC8xx. We jump to it for other @@ -231,7 +231,7 @@ InstructionAccess: EXCEPTION_PROLOG mr r4,r12 mr r5,r9 - EXC_XFER_EE_LITE(0x400, handle_page_fault) + EXC_XFER_LITE(0x400, handle_page_fault) /* External interrupt */ EXCEPTION(0x500, HardwareInterrupt, do_IRQ, EXC_XFER_LITE) diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h index fc921bf62e1..0e4175388f4 100644 --- a/arch/powerpc/kernel/head_booke.h +++ b/arch/powerpc/kernel/head_booke.h @@ -359,7 +359,7 @@ label: mfspr r5,SPRN_ESR; /* Grab the ESR and save it */ \ stw r5,_ESR(r11); \ mfspr r4,SPRN_DEAR; /* Grab the DEAR */ \ - EXC_XFER_EE_LITE(0x0300, handle_page_fault) + EXC_XFER_LITE(0x0300, handle_page_fault) #define INSTRUCTION_STORAGE_EXCEPTION \ START_EXCEPTION(InstructionStorage) \ @@ -368,7 +368,7 @@ label: stw r5,_ESR(r11); \ mr r4,r12; /* Pass SRR0 as arg2 */ \ li r5,0; /* Pass zero as arg3 */ \ - EXC_XFER_EE_LITE(0x0400, handle_page_fault) + EXC_XFER_LITE(0x0400, handle_page_fault) #define ALIGNMENT_EXCEPTION \ START_EXCEPTION(Alignment) \ diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index d5d78c4ceef..28e62598d0e 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -319,7 +319,7 @@ interrupt_base: mfspr r4,SPRN_DEAR /* Grab the DEAR, save it, pass arg2 */ andis. r10,r5,(ESR_ILK|ESR_DLK)@h bne 1f - EXC_XFER_EE_LITE(0x0300, handle_page_fault) + EXC_XFER_LITE(0x0300, handle_page_fault) 1: addi r3,r1,STACK_FRAME_OVERHEAD EXC_XFER_EE_LITE(0x0300, CacheLockingException) diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c index 0a48bf5db6c..8f7a2b62863 100644 --- a/arch/powerpc/kernel/idle.c +++ b/arch/powerpc/kernel/idle.c @@ -84,7 +84,11 @@ void cpu_idle(void) start_critical_timings(); - local_irq_enable(); + /* Some power_save functions return with + * interrupts enabled, some don't. + */ + if (irqs_disabled()) + local_irq_enable(); set_thread_flag(TIF_POLLING_NRFLAG); } else { diff --git a/arch/powerpc/kernel/idle_book3e.S b/arch/powerpc/kernel/idle_book3e.S index 16c002d6bdf..ff007b59448 100644 --- a/arch/powerpc/kernel/idle_book3e.S +++ b/arch/powerpc/kernel/idle_book3e.S @@ -29,43 +29,30 @@ _GLOBAL(book3e_idle) wrteei 0 /* Now check if an interrupt came in while we were soft disabled - * since we may otherwise lose it (doorbells etc...). We know - * that since PACAHARDIRQEN will have been cleared in that case. + * since we may otherwise lose it (doorbells etc...). */ - lbz r3,PACAHARDIRQEN(r13) + lbz r3,PACAIRQHAPPENED(r13) cmpwi cr0,r3,0 - beqlr + bnelr - /* Now we are going to mark ourselves as soft and hard enables in + /* Now we are going to mark ourselves as soft and hard enabled in * order to be able to take interrupts while asleep. We inform lockdep * of that. We don't actually turn interrupts on just yet tho. */ #ifdef CONFIG_TRACE_IRQFLAGS stdu r1,-128(r1) bl .trace_hardirqs_on + addi r1,r1,128 #endif li r0,1 stb r0,PACASOFTIRQEN(r13) - stb r0,PACAHARDIRQEN(r13) /* Interrupts will make use return to LR, so get something we want * in there */ bl 1f - /* Hard disable interrupts again */ - wrteei 0 - - /* Mark them off again in the PACA as well */ - li r0,0 - stb r0,PACASOFTIRQEN(r13) - stb r0,PACAHARDIRQEN(r13) - - /* Tell lockdep about it */ -#ifdef CONFIG_TRACE_IRQFLAGS - bl .trace_hardirqs_off - addi r1,r1,128 -#endif + /* And return (interrupts are on) */ ld r0,16(r1) mtlr r0 blr diff --git a/arch/powerpc/kernel/idle_power4.S b/arch/powerpc/kernel/idle_power4.S index ba319547860..d8cdba4c28b 100644 --- a/arch/powerpc/kernel/idle_power4.S +++ b/arch/powerpc/kernel/idle_power4.S @@ -14,6 +14,7 @@ #include <asm/thread_info.h> #include <asm/ppc_asm.h> #include <asm/asm-offsets.h> +#include <asm/irqflags.h> #undef DEBUG @@ -29,14 +30,31 @@ END_FTR_SECTION_IFCLR(CPU_FTR_CAN_NAP) cmpwi 0,r4,0 beqlr - /* Go to NAP now */ + /* Hard disable interrupts */ mfmsr r7 rldicl r0,r7,48,1 rotldi r0,r0,16 - mtmsrd r0,1 /* hard-disable interrupts */ + mtmsrd r0,1 + + /* Check if something happened while soft-disabled */ + lbz r0,PACAIRQHAPPENED(r13) + cmpwi cr0,r0,0 + bnelr + + /* Soft-enable interrupts */ +#ifdef CONFIG_TRACE_IRQFLAGS + mflr r0 + std r0,16(r1) + stdu r1,-128(r1) + bl .trace_hardirqs_on + addi r1,r1,128 + ld r0,16(r1) + mtlr r0 +#endif /* CONFIG_TRACE_IRQFLAGS */ + + TRACE_ENABLE_INTS li r0,1 stb r0,PACASOFTIRQEN(r13) /* we'll hard-enable shortly */ - stb r0,PACAHARDIRQEN(r13) BEGIN_FTR_SECTION DSSALL sync diff --git a/arch/powerpc/kernel/idle_power7.S b/arch/powerpc/kernel/idle_power7.S index fcdff198da4..0cdc9a39283 100644 --- a/arch/powerpc/kernel/idle_power7.S +++ b/arch/powerpc/kernel/idle_power7.S @@ -1,5 +1,5 @@ /* - * This file contains the power_save function for 970-family CPUs. + * This file contains the power_save function for Power7 CPUs. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -15,6 +15,7 @@ #include <asm/ppc_asm.h> #include <asm/asm-offsets.h> #include <asm/ppc-opcode.h> +#include <asm/hw_irq.h> #undef DEBUG @@ -51,9 +52,25 @@ _GLOBAL(power7_idle) rldicl r9,r9,48,1 rotldi r9,r9,16 mtmsrd r9,1 /* hard-disable interrupts */ + + /* Check if something happened while soft-disabled */ + lbz r0,PACAIRQHAPPENED(r13) + cmpwi cr0,r0,0 + beq 1f + addi r1,r1,INT_FRAME_SIZE + ld r0,16(r1) + mtlr r0 + blr + +1: /* We mark irqs hard disabled as this is the state we'll + * be in when returning and we need to tell arch_local_irq_restore() + * about it + */ + li r0,PACA_IRQ_HARD_DIS + stb r0,PACAIRQHAPPENED(r13) + + /* We haven't lost state ... yet */ li r0,0 - stb r0,PACASOFTIRQEN(r13) /* we'll hard-enable shortly */ - stb r0,PACAHARDIRQEN(r13) stb r0,PACA_NAPSTATELOST(r13) /* Continue saving state */ diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 9b6e80668cf..eb804e15b29 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -95,14 +95,14 @@ extern int tau_interrupts(int); int distribute_irqs = 1; -static inline notrace unsigned long get_hard_enabled(void) +static inline notrace unsigned long get_irq_happened(void) { - unsigned long enabled; + unsigned long happened; __asm__ __volatile__("lbz %0,%1(13)" - : "=r" (enabled) : "i" (offsetof(struct paca_struct, hard_enabled))); + : "=r" (happened) : "i" (offsetof(struct paca_struct, irq_happened))); - return enabled; + return happened; } static inline notrace void set_soft_enabled(unsigned long enable) @@ -111,88 +111,167 @@ static inline notrace void set_soft_enabled(unsigned long enable) : : "r" (enable), "i" (offsetof(struct paca_struct, soft_enabled))); } -static inline notrace void decrementer_check_overflow(void) +static inline notrace int decrementer_check_overflow(void) { - u64 now = get_tb_or_rtc(); - u64 *next_tb; - - preempt_disable(); - next_tb = &__get_cpu_var(decrementers_next_tb); - + u64 now = get_tb_or_rtc(); + u64 *next_tb = &__get_cpu_var(decrementers_next_tb); + if (now >= *next_tb) set_dec(1); - preempt_enable(); + return now >= *next_tb; } -notrace void arch_local_irq_restore(unsigned long en) +/* This is called whenever we are re-enabling interrupts + * and returns either 0 (nothing to do) or 500/900 if there's + * either an EE or a DEC to generate. + * + * This is called in two contexts: From arch_local_irq_restore() + * before soft-enabling interrupts, and from the exception exit + * path when returning from an interrupt from a soft-disabled to + * a soft enabled context. In both case we have interrupts hard + * disabled. + * + * We take care of only clearing the bits we handled in the + * PACA irq_happened field since we can only re-emit one at a + * time and we don't want to "lose" one. + */ +notrace unsigned int __check_irq_replay(void) { /* - * get_paca()->soft_enabled = en; - * Is it ever valid to use local_irq_restore(0) when soft_enabled is 1? - * That was allowed before, and in such a case we do need to take care - * that gcc will set soft_enabled directly via r13, not choose to use - * an intermediate register, lest we're preempted to a different cpu. + * We use local_paca rather than get_paca() to avoid all + * the debug_smp_processor_id() business in this low level + * function */ - set_soft_enabled(en); - if (!en) - return; + unsigned char happened = local_paca->irq_happened; -#ifdef CONFIG_PPC_STD_MMU_64 - if (firmware_has_feature(FW_FEATURE_ISERIES)) { - /* - * Do we need to disable preemption here? Not really: in the - * unlikely event that we're preempted to a different cpu in - * between getting r13, loading its lppaca_ptr, and loading - * its any_int, we might call iseries_handle_interrupts without - * an interrupt pending on the new cpu, but that's no disaster, - * is it? And the business of preempting us off the old cpu - * would itself involve a local_irq_restore which handles the - * interrupt to that cpu. - * - * But use "local_paca->lppaca_ptr" instead of "get_lppaca()" - * to avoid any preemption checking added into get_paca(). - */ - if (local_paca->lppaca_ptr->int_dword.any_int) - iseries_handle_interrupts(); + /* Clear bit 0 which we wouldn't clear otherwise */ + local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS; + + /* + * Force the delivery of pending soft-disabled interrupts on PS3. + * Any HV call will have this side effect. + */ + if (firmware_has_feature(FW_FEATURE_PS3_LV1)) { + u64 tmp, tmp2; + lv1_get_version_info(&tmp, &tmp2); } -#endif /* CONFIG_PPC_STD_MMU_64 */ /* - * if (get_paca()->hard_enabled) return; - * But again we need to take care that gcc gets hard_enabled directly - * via r13, not choose to use an intermediate register, lest we're - * preempted to a different cpu in between the two instructions. + * We may have missed a decrementer interrupt. We check the + * decrementer itself rather than the paca irq_happened field + * in case we also had a rollover while hard disabled + */ + local_paca->irq_happened &= ~PACA_IRQ_DEC; + if (decrementer_check_overflow()) + return 0x900; + + /* Finally check if an external interrupt happened */ + local_paca->irq_happened &= ~PACA_IRQ_EE; + if (happened & PACA_IRQ_EE) + return 0x500; + +#ifdef CONFIG_PPC_BOOK3E + /* Finally check if an EPR external interrupt happened + * this bit is typically set if we need to handle another + * "edge" interrupt from within the MPIC "EPR" handler + */ + local_paca->irq_happened &= ~PACA_IRQ_EE_EDGE; + if (happened & PACA_IRQ_EE_EDGE) + return 0x500; + + local_paca->irq_happened &= ~PACA_IRQ_DBELL; + if (happened & PACA_IRQ_DBELL) + return 0x280; +#endif /* CONFIG_PPC_BOOK3E */ + + /* There should be nothing left ! */ + BUG_ON(local_paca->irq_happened != 0); + + return 0; +} + +notrace void arch_local_irq_restore(unsigned long en) +{ + unsigned char irq_happened; + unsigned int replay; + + /* Write the new soft-enabled value */ + set_soft_enabled(en); + if (!en) + return; + /* + * From this point onward, we can take interrupts, preempt, + * etc... unless we got hard-disabled. We check if an event + * happened. If none happened, we know we can just return. + * + * We may have preempted before the check below, in which case + * we are checking the "new" CPU instead of the old one. This + * is only a problem if an event happened on the "old" CPU. + * + * External interrupt events on non-iseries will have caused + * interrupts to be hard-disabled, so there is no problem, we + * cannot have preempted. + * + * That leaves us with EEs on iSeries or decrementer interrupts, + * which I decided to safely ignore. The preemption would have + * itself been the result of an interrupt, upon which return we + * will have checked for pending events on the old CPU. */ - if (get_hard_enabled()) + irq_happened = get_irq_happened(); + if (!irq_happened) return; /* - * Need to hard-enable interrupts here. Since currently disabled, - * no need to take further asm precautions against preemption; but - * use local_paca instead of get_paca() to avoid preemption checking. + * We need to hard disable to get a trusted value from + * __check_irq_replay(). We also need to soft-disable + * again to avoid warnings in there due to the use of + * per-cpu variables. + * + * We know that if the value in irq_happened is exactly 0x01 + * then we are already hard disabled (there are other less + * common cases that we'll ignore for now), so we skip the + * (expensive) mtmsrd. */ - local_paca->hard_enabled = en; + if (unlikely(irq_happened != PACA_IRQ_HARD_DIS)) + __hard_irq_disable(); + set_soft_enabled(0); /* - * Trigger the decrementer if we have a pending event. Some processors - * only trigger on edge transitions of the sign bit. We might also - * have disabled interrupts long enough that the decrementer wrapped - * to positive. + * Check if anything needs to be re-emitted. We haven't + * soft-enabled yet to avoid warnings in decrementer_check_overflow + * accessing per-cpu variables */ - decrementer_check_overflow(); + replay = __check_irq_replay(); + + /* We can soft-enable now */ + set_soft_enabled(1); /* - * Force the delivery of pending soft-disabled interrupts on PS3. - * Any HV call will have this side effect. + * And replay if we have to. This will return with interrupts + * hard-enabled. */ - if (firmware_has_feature(FW_FEATURE_PS3_LV1)) { - u64 tmp, tmp2; - lv1_get_version_info(&tmp, &tmp2); + if (replay) { + __replay_interrupt(replay); + return; } + /* Finally, let's ensure we are hard enabled */ __hard_irq_enable(); } EXPORT_SYMBOL(arch_local_irq_restore); + +/* + * This is specifically called by assembly code to re-enable interrupts + * if they are currently disabled. This is typically called before + * schedule() or do_signal() when returning to userspace. We do it + * in C to avoid the burden of dealing with lockdep etc... + */ +void restore_interrupts(void) +{ + if (irqs_disabled()) + local_irq_enable(); +} + #endif /* CONFIG_PPC64 */ int arch_show_interrupts(struct seq_file *p, int prec) @@ -360,8 +439,17 @@ void do_IRQ(struct pt_regs *regs) check_stack_overflow(); + /* + * Query the platform PIC for the interrupt & ack it. + * + * This will typically lower the interrupt line to the CPU + */ irq = ppc_md.get_irq(); + /* We can hard enable interrupts now */ + may_hard_irq_enable(); + + /* And finally process it */ if (irq != NO_IRQ && irq != NO_IRQ_IGNORE) handle_one_irq(irq); else if (irq != NO_IRQ_IGNORE) diff --git a/arch/powerpc/kernel/misc.S b/arch/powerpc/kernel/misc.S index b69463ec201..ba16874fe29 100644 --- a/arch/powerpc/kernel/misc.S +++ b/arch/powerpc/kernel/misc.S @@ -5,7 +5,6 @@ * Largely rewritten by Cort Dougan (cort@cs.nmt.edu) * and Paul Mackerras. * - * Adapted for iSeries by Mike Corrigan (mikejc@us.ibm.com) * PPC64 updates by Dave Engebretsen (engebret@us.ibm.com) * * setjmp/longjmp code by Paul Mackerras. diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index d817ab01848..e40707032ac 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -647,6 +647,9 @@ void show_regs(struct pt_regs * regs) printk("MSR: "REG" ", regs->msr); printbits(regs->msr, msr_bits); printk(" CR: %08lx XER: %08lx\n", regs->ccr, regs->xer); +#ifdef CONFIG_PPC64 + printk("SOFTE: %ld\n", regs->softe); +#endif trap = TRAP(regs); if ((regs->trap != 0xc00) && cpu_has_feature(CPU_FTR_CFAR)) printk("CFAR: "REG"\n", regs->orig_gpr3); @@ -1220,34 +1223,32 @@ void dump_stack(void) EXPORT_SYMBOL(dump_stack); #ifdef CONFIG_PPC64 -void ppc64_runlatch_on(void) +/* Called with hard IRQs off */ +void __ppc64_runlatch_on(void) { + struct thread_info *ti = current_thread_info(); unsigned long ctrl; - if (cpu_has_feature(CPU_FTR_CTRL) && !test_thread_flag(TIF_RUNLATCH)) { - HMT_medium(); - - ctrl = mfspr(SPRN_CTRLF); - ctrl |= CTRL_RUNLATCH; - mtspr(SPRN_CTRLT, ctrl); + ctrl = mfspr(SPRN_CTRLF); + ctrl |= CTRL_RUNLATCH; + mtspr(SPRN_CTRLT, ctrl); - set_thread_flag(TIF_RUNLATCH); - } + ti->local_flags |= TLF_RUNLATCH; } +/* Called with hard IRQs off */ void __ppc64_runlatch_off(void) { + struct thread_info *ti = current_thread_info(); unsigned long ctrl; - HMT_medium(); - - clear_thread_flag(TIF_RUNLATCH); + ti->local_flags &= ~TLF_RUNLATCH; ctrl = mfspr(SPRN_CTRLF); ctrl &= ~CTRL_RUNLATCH; mtspr(SPRN_CTRLT, ctrl); } -#endif +#endif /* CONFIG_PPC64 */ #if THREAD_SHIFT < PAGE_SHIFT diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 567dd7c3ac2..f81c81b92f0 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -259,7 +259,6 @@ void accumulate_stolen_time(void) u64 sst, ust; u8 save_soft_enabled = local_paca->soft_enabled; - u8 save_hard_enabled = local_paca->hard_enabled; /* We are called early in the exception entry, before * soft/hard_enabled are sync'ed to the expected state @@ -268,7 +267,6 @@ void accumulate_stolen_time(void) * complain */ local_paca->soft_enabled = 0; - local_paca->hard_enabled = 0; sst = scan_dispatch_log(local_paca->starttime_user); ust = scan_dispatch_log(local_paca->starttime); @@ -277,7 +275,6 @@ void accumulate_stolen_time(void) local_paca->stolen_time += ust + sst; local_paca->soft_enabled = save_soft_enabled; - local_paca->hard_enabled = save_hard_enabled; } static inline u64 calculate_stolen_time(u64 stop_tb) @@ -580,6 +577,11 @@ void timer_interrupt(struct pt_regs * regs) if (!cpu_online(smp_processor_id())) return; + /* Conditionally hard-enable interrupts now that the DEC has been + * bumped to its maximum value + */ + may_hard_irq_enable(); + trace_timer_interrupt_entry(regs); __get_cpu_var(irq_stat).timer_irqs++; diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 5d40e592ffc..a750409ccc4 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -247,6 +247,9 @@ void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr) addr, regs->nip, regs->link, code); } + if (!arch_irq_disabled_regs(regs)) + local_irq_enable(); + memset(&info, 0, sizeof(info)); info.si_signo = signr; info.si_code = code; diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 710a54005df..65d1c08cf09 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -109,11 +109,6 @@ SECTIONS __ptov_table_begin = .; *(.ptov_fixup); __ptov_table_end = .; -#ifdef CONFIG_PPC_ISERIES - __dt_strings_start = .; - *(.dt_strings); - __dt_strings_end = .; -#endif } .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) { diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 2f0d1b032a8..19f2f9498b2 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -105,6 +105,82 @@ static int store_updates_sp(struct pt_regs *regs) } return 0; } +/* + * do_page_fault error handling helpers + */ + +#define MM_FAULT_RETURN 0 +#define MM_FAULT_CONTINUE -1 +#define MM_FAULT_ERR(sig) (sig) + +static int out_of_memory(struct pt_regs *regs) +{ + /* + * We ran out of memory, or some other thing happened to us that made + * us unable to handle the page fault gracefully. + */ + up_read(¤t->mm->mmap_sem); + if (!user_mode(regs)) + return MM_FAULT_ERR(SIGKILL); + pagefault_out_of_memory(); + return MM_FAULT_RETURN; +} + +static int do_sigbus(struct pt_regs *regs, unsigned long address) +{ + siginfo_t info; + + up_read(¤t->mm->mmap_sem); + + if (user_mode(regs)) { + info.si_signo = SIGBUS; + info.si_errno = 0; + info.si_code = BUS_ADRERR; + info.si_addr = (void __user *)address; + force_sig_info(SIGBUS, &info, current); + return MM_FAULT_RETURN; + } + return MM_FAULT_ERR(SIGBUS); +} + +static int mm_fault_error(struct pt_regs *regs, unsigned long addr, int fault) +{ + /* + * Pagefault was interrupted by SIGKILL. We have no reason to + * continue the pagefault. + */ + if (fatal_signal_pending(current)) { + /* + * If we have retry set, the mmap semaphore will have + * alrady been released in __lock_page_or_retry(). Else + * we release it now. + */ + if (!(fault & VM_FAULT_RETRY)) + up_read(¤t->mm->mmap_sem); + /* Coming from kernel, we need to deal with uaccess fixups */ + if (user_mode(regs)) + return MM_FAULT_RETURN; + return MM_FAULT_ERR(SIGKILL); + } + + /* No fault: be happy */ + if (!(fault & VM_FAULT_ERROR)) + return MM_FAULT_CONTINUE; + + /* Out of memory */ + if (fault & VM_FAULT_OOM) + return out_of_memory(regs); + + /* Bus error. x86 handles HWPOISON here, we'll add this if/when + * we support the feature in HW + */ + if (fault & VM_FAULT_SIGBUS) + return do_sigbus(regs, addr); + + /* We don't understand the fault code, this is fatal */ + BUG(); + return MM_FAULT_CONTINUE; +} /* * For 600- and 800-family processors, the error_code parameter is DSISR @@ -124,11 +200,12 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address, { struct vm_area_struct * vma; struct mm_struct *mm = current->mm; - siginfo_t info; + unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; int code = SEGV_MAPERR; - int is_write = 0, ret; + int is_write = 0; int trap = TRAP(regs); int is_exec = trap == 0x400; + int fault; #if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE)) /* @@ -145,6 +222,9 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address, is_write = error_code & ESR_DST; #endif /* CONFIG_4xx || CONFIG_BOOKE */ + if (is_write) + flags |= FAULT_FLAG_WRITE; + #ifdef CONFIG_PPC_ICSWX /* * we need to do this early because this "data storage @@ -152,13 +232,11 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address, * look at it */ if (error_code & ICSWX_DSI_UCT) { - int ret; - - ret = acop_handle_fault(regs, address, error_code); - if (ret) - return ret; + int rc = acop_handle_fault(regs, address, error_code); + if (rc) + return rc; } -#endif +#endif /* CONFIG_PPC_ICSWX */ if (notify_page_fault(regs)) return 0; @@ -179,6 +257,10 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address, } #endif + /* We restore the interrupt state now */ + if (!arch_irq_disabled_regs(regs)) + local_irq_enable(); + if (in_atomic() || mm == NULL) { if (!user_mode(regs)) return SIGSEGV; @@ -212,7 +294,15 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address, if (!user_mode(regs) && !search_exception_tables(regs->nip)) goto bad_area_nosemaphore; +retry: down_read(&mm->mmap_sem); + } else { + /* + * The above down_read_trylock() might have succeeded in + * which case we'll have missed the might_sleep() from + * down_read(): + */ + might_sleep(); } vma = find_vma(mm, address); @@ -327,30 +417,43 @@ good_area: * make sure we exit gracefully rather than endlessly redo * the fault. */ - ret = handle_mm_fault(mm, vma, address, is_write ? FAULT_FLAG_WRITE : 0); - if (unlikely(ret & VM_FAULT_ERROR)) { - if (ret & VM_FAULT_OOM) - goto out_of_memory; - else if (ret & VM_FAULT_SIGBUS) - goto do_sigbus; - BUG(); + fault = handle_mm_fault(mm, vma, address, flags); + if (unlikely(fault & (VM_FAULT_RETRY|VM_FAULT_ERROR))) { + int rc = mm_fault_error(regs, address, fault); + if (rc >= MM_FAULT_RETURN) + return rc; } - if (ret & VM_FAULT_MAJOR) { - current->maj_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, - regs, address); + + /* + * Major/minor page fault accounting is only done on the + * initial attempt. If we go through a retry, it is extremely + * likely that the page will be found in page cache at that point. + */ + if (flags & FAULT_FLAG_ALLOW_RETRY) { + if (fault & VM_FAULT_MAJOR) { + current->maj_flt++; + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, + regs, address); #ifdef CONFIG_PPC_SMLPAR - if (firmware_has_feature(FW_FEATURE_CMO)) { - preempt_disable(); - get_lppaca()->page_ins += (1 << PAGE_FACTOR); - preempt_enable(); + if (firmware_has_feature(FW_FEATURE_CMO)) { + preempt_disable(); + get_lppaca()->page_ins += (1 << PAGE_FACTOR); + preempt_enable(); + } +#endif /* CONFIG_PPC_SMLPAR */ + } else { + current->min_flt++; + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, + regs, address); + } + if (fault & VM_FAULT_RETRY) { + /* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk + * of starvation. */ + flags &= ~FAULT_FLAG_ALLOW_RETRY; + goto retry; } -#endif - } else { - current->min_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, - regs, address); } + up_read(&mm->mmap_sem); return 0; @@ -371,28 +474,6 @@ bad_area_nosemaphore: return SIGSEGV; -/* - * We ran out of memory, or some other thing happened to us that made - * us unable to handle the page fault gracefully. - */ -out_of_memory: - up_read(&mm->mmap_sem); - if (!user_mode(regs)) - return SIGKILL; - pagefault_out_of_memory(); - return 0; - -do_sigbus: - up_read(&mm->mmap_sem); - if (user_mode(regs)) { - info.si_signo = SIGBUS; - info.si_errno = 0; - info.si_code = BUS_ADRERR; - info.si_addr = (void __user *)address; - force_sig_info(SIGBUS, &info, current); - return 0; - } - return SIGBUS; } /* diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S index ef653dc95b6..b9ee79ce220 100644 --- a/arch/powerpc/mm/slb_low.S +++ b/arch/powerpc/mm/slb_low.S @@ -217,21 +217,6 @@ slb_finish_load: * free slot first but that took too long. Unfortunately we * dont have any LRU information to help us choose a slot. */ -#ifdef CONFIG_PPC_ISERIES -BEGIN_FW_FTR_SECTION - /* - * On iSeries, the "bolted" stack segment can be cast out on - * shared processor switch so we need to check for a miss on - * it and restore it to the right slot. - */ - ld r9,PACAKSAVE(r13) - clrrdi r9,r9,28 - clrrdi r3,r3,28 - li r10,SLB_NUM_BOLTED-1 /* Stack goes in last bolted slot */ - cmpld r9,r3 - beq 3f -END_FW_FTR_SECTION_IFSET(FW_FEATURE_ISERIES) -#endif /* CONFIG_PPC_ISERIES */ 7: ld r10,PACASTABRR(r13) addi r10,r10,1 @@ -282,7 +267,6 @@ _GLOBAL(slb_compare_rr_to_size) /* * Finish loading of a 1T SLB entry (for the kernel linear mapping) and return. - * We assume legacy iSeries will never have 1T segments. * * r3 = EA, r10 = proto-VSID, r11 = flags, clobbers r9 */ diff --git a/arch/powerpc/platforms/pseries/processor_idle.c b/arch/powerpc/platforms/pseries/processor_idle.c index 085fd3f45ad..a12e95af693 100644 --- a/arch/powerpc/platforms/pseries/processor_idle.c +++ b/arch/powerpc/platforms/pseries/processor_idle.c @@ -96,6 +96,20 @@ out: return index; } +static void check_and_cede_processor(void) +{ + /* + * Interrupts are soft-disabled at this point, + * but not hard disabled. So an interrupt might have + * occurred before entering NAP, and would be potentially + * lost (edge events, decrementer events, etc...) unless + * we first hard disable then check. + */ + hard_irq_disable(); + if (get_paca()->irq_happened == 0) + cede_processor(); +} + static int dedicated_cede_loop(struct cpuidle_device *dev, struct cpuidle_driver *drv, int index) @@ -108,7 +122,7 @@ static int dedicated_cede_loop(struct cpuidle_device *dev, ppc64_runlatch_off(); HMT_medium(); - cede_processor(); + check_and_cede_processor(); get_lppaca()->donate_dedicated_cpu = 0; dev->last_residency = @@ -132,7 +146,7 @@ static int shared_cede_loop(struct cpuidle_device *dev, * processor. When returning here, external interrupts * are enabled. */ - cede_processor(); + check_and_cede_processor(); dev->last_residency = (int)idle_loop_epilog(in_purr, kt_before); diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index cb95eea74d3..974a47b3c9b 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -1437,7 +1437,8 @@ static void excprint(struct pt_regs *fp) printf(" current = 0x%lx\n", current); #ifdef CONFIG_PPC64 - printf(" paca = 0x%lx\n", get_paca()); + printf(" paca = 0x%lx\t softe: %d\t irq_happened: 0x%02x\n", + local_paca, local_paca->soft_enabled, local_paca->irq_happened); #endif if (current) { printf(" pid = %ld, comm = %s\n", @@ -1641,7 +1642,7 @@ static void super_regs(void) /* Dump out relevant Paca data areas. */ printf("Paca: \n"); - ptrPaca = get_paca(); + ptrPaca = local_paca; printf(" Local Processor Control Area (LpPaca): \n"); ptrLpPaca = ptrPaca->lppaca_ptr; @@ -2644,7 +2645,7 @@ static void dump_slb(void) static void dump_stab(void) { int i; - unsigned long *tmp = (unsigned long *)get_paca()->stab_addr; + unsigned long *tmp = (unsigned long *)local_paca->stab_addr; printf("Segment table contents of cpu %x\n", smp_processor_id()); |