diff options
Diffstat (limited to 'arch/powerpc')
112 files changed, 1476 insertions, 2505 deletions
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index c33e3ad2c8f..508e3fe934d 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -341,7 +341,7 @@ config SWIOTLB config HOTPLUG_CPU bool "Support for enabling/disabling CPUs" - depends on SMP && HOTPLUG && (PPC_PSERIES || \ + depends on SMP && (PPC_PSERIES || \ PPC_PMAC || PPC_POWERNV || (PPC_85xx && !PPC_E500MC)) ---help--- Say Y here to be able to disable and re-enable individual diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug index 5416e28a753..863d877e0b5 100644 --- a/arch/powerpc/Kconfig.debug +++ b/arch/powerpc/Kconfig.debug @@ -262,8 +262,31 @@ config PPC_EARLY_DEBUG_OPAL_HVSI Select this to enable early debugging for the PowerNV platform using an "hvsi" console +config PPC_EARLY_DEBUG_MEMCONS + bool "In memory console" + help + Select this to enable early debugging using an in memory console. + This console provides input and output buffers stored within the + kernel BSS and should be safe to select on any system. A debugger + can then be used to read kernel output or send input to the console. endchoice +config PPC_MEMCONS_OUTPUT_SIZE + int "In memory console output buffer size" + depends on PPC_EARLY_DEBUG_MEMCONS + default 4096 + help + Selects the size of the output buffer (in bytes) of the in memory + console. + +config PPC_MEMCONS_INPUT_SIZE + int "In memory console input buffer size" + depends on PPC_EARLY_DEBUG_MEMCONS + default 128 + help + Selects the size of the input buffer (in bytes) of the in memory + console. + config PPC_EARLY_DEBUG_OPAL def_bool y depends on PPC_EARLY_DEBUG_OPAL_RAW || PPC_EARLY_DEBUG_OPAL_HVSI diff --git a/arch/powerpc/configs/ps3_defconfig b/arch/powerpc/configs/ps3_defconfig index f7919623291..139a8308070 100644 --- a/arch/powerpc/configs/ps3_defconfig +++ b/arch/powerpc/configs/ps3_defconfig @@ -136,7 +136,6 @@ CONFIG_HID_SMARTJOYPLUS=m CONFIG_USB_HIDDEV=y CONFIG_USB=m CONFIG_USB_ANNOUNCE_NEW_DEVICES=y -CONFIG_USB_SUSPEND=y CONFIG_USB_MON=m CONFIG_USB_EHCI_HCD=m # CONFIG_USB_EHCI_HCD_PPC_OF is not set diff --git a/arch/powerpc/include/asm/context_tracking.h b/arch/powerpc/include/asm/context_tracking.h new file mode 100644 index 00000000000..b6f5a33b8ee --- /dev/null +++ b/arch/powerpc/include/asm/context_tracking.h @@ -0,0 +1,10 @@ +#ifndef _ASM_POWERPC_CONTEXT_TRACKING_H +#define _ASM_POWERPC_CONTEXT_TRACKING_H + +#ifdef CONFIG_CONTEXT_TRACKING +#define SCHEDULE_USER bl .schedule_user +#else +#define SCHEDULE_USER bl .schedule +#endif + +#endif diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h index 26807e5aff5..6f3887d884d 100644 --- a/arch/powerpc/include/asm/cputable.h +++ b/arch/powerpc/include/asm/cputable.h @@ -176,6 +176,7 @@ extern const char *powerpc_base_platform; #define CPU_FTR_CFAR LONG_ASM_CONST(0x0100000000000000) #define CPU_FTR_HAS_PPR LONG_ASM_CONST(0x0200000000000000) #define CPU_FTR_DAWR LONG_ASM_CONST(0x0400000000000000) +#define CPU_FTR_DABRX LONG_ASM_CONST(0x0800000000000000) #ifndef __ASSEMBLY__ @@ -394,19 +395,20 @@ extern const char *powerpc_base_platform; CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | CPU_FTR_ARCH_201 | \ CPU_FTR_ALTIVEC_COMP | CPU_FTR_CAN_NAP | CPU_FTR_MMCRA | \ CPU_FTR_CP_USE_DCBTZ | CPU_FTR_STCX_CHECKS_ADDRESS | \ - CPU_FTR_HVMODE) + CPU_FTR_HVMODE | CPU_FTR_DABRX) #define CPU_FTRS_POWER5 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \ CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \ CPU_FTR_MMCRA | CPU_FTR_SMT | \ CPU_FTR_COHERENT_ICACHE | CPU_FTR_PURR | \ - CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB) + CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_DABRX) #define CPU_FTRS_POWER6 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \ CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \ CPU_FTR_MMCRA | CPU_FTR_SMT | \ CPU_FTR_COHERENT_ICACHE | \ CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \ CPU_FTR_DSCR | CPU_FTR_UNALIGNED_LD_STD | \ - CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_CFAR) + CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_CFAR | \ + CPU_FTR_DABRX) #define CPU_FTRS_POWER7 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \ CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | CPU_FTR_ARCH_206 |\ CPU_FTR_MMCRA | CPU_FTR_SMT | \ @@ -415,7 +417,7 @@ extern const char *powerpc_base_platform; CPU_FTR_DSCR | CPU_FTR_SAO | CPU_FTR_ASYM_SMT | \ CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \ CPU_FTR_ICSWX | CPU_FTR_CFAR | CPU_FTR_HVMODE | \ - CPU_FTR_VMX_COPY | CPU_FTR_HAS_PPR) + CPU_FTR_VMX_COPY | CPU_FTR_HAS_PPR | CPU_FTR_DABRX) #define CPU_FTRS_POWER8 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \ CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | CPU_FTR_ARCH_206 |\ CPU_FTR_MMCRA | CPU_FTR_SMT | \ @@ -430,14 +432,15 @@ extern const char *powerpc_base_platform; CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \ CPU_FTR_ALTIVEC_COMP | CPU_FTR_MMCRA | CPU_FTR_SMT | \ CPU_FTR_PAUSE_ZERO | CPU_FTR_CELL_TB_BUG | CPU_FTR_CP_USE_DCBTZ | \ - CPU_FTR_UNALIGNED_LD_STD) + CPU_FTR_UNALIGNED_LD_STD | CPU_FTR_DABRX) #define CPU_FTRS_PA6T (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \ CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_ALTIVEC_COMP | \ - CPU_FTR_PURR | CPU_FTR_REAL_LE) + CPU_FTR_PURR | CPU_FTR_REAL_LE | CPU_FTR_DABRX) #define CPU_FTRS_COMPATIBLE (CPU_FTR_USE_TB | CPU_FTR_PPCAS_ARCH_V2) #define CPU_FTRS_A2 (CPU_FTR_USE_TB | CPU_FTR_SMT | CPU_FTR_DBELL | \ - CPU_FTR_NOEXECUTE | CPU_FTR_NODSISRALIGN | CPU_FTR_ICSWX) + CPU_FTR_NOEXECUTE | CPU_FTR_NODSISRALIGN | \ + CPU_FTR_ICSWX | CPU_FTR_DABRX ) #ifdef __powerpc64__ #ifdef CONFIG_PPC_BOOK3E diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index 8e5fae8beaf..46793b58a76 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -513,7 +513,7 @@ label##_common: \ */ #define STD_EXCEPTION_COMMON_ASYNC(trap, label, hdlr) \ EXCEPTION_COMMON(trap, label, hdlr, ret_from_except_lite, \ - FINISH_NAP;RUNLATCH_ON;DISABLE_INTS) + FINISH_NAP;DISABLE_INTS;RUNLATCH_ON) /* * When the idle code in power4_idle puts the CPU into NAP mode, diff --git a/arch/powerpc/include/asm/firmware.h b/arch/powerpc/include/asm/firmware.h index 0df54646f96..681bc0314b6 100644 --- a/arch/powerpc/include/asm/firmware.h +++ b/arch/powerpc/include/asm/firmware.h @@ -52,6 +52,7 @@ #define FW_FEATURE_BEST_ENERGY ASM_CONST(0x0000000080000000) #define FW_FEATURE_TYPE1_AFFINITY ASM_CONST(0x0000000100000000) #define FW_FEATURE_PRRN ASM_CONST(0x0000000200000000) +#define FW_FEATURE_OPALv3 ASM_CONST(0x0000000400000000) #ifndef __ASSEMBLY__ @@ -69,7 +70,8 @@ enum { FW_FEATURE_SET_MODE | FW_FEATURE_BEST_ENERGY | FW_FEATURE_TYPE1_AFFINITY | FW_FEATURE_PRRN, FW_FEATURE_PSERIES_ALWAYS = 0, - FW_FEATURE_POWERNV_POSSIBLE = FW_FEATURE_OPAL | FW_FEATURE_OPALv2, + FW_FEATURE_POWERNV_POSSIBLE = FW_FEATURE_OPAL | FW_FEATURE_OPALv2 | + FW_FEATURE_OPALv3, FW_FEATURE_POWERNV_ALWAYS = 0, FW_FEATURE_PS3_POSSIBLE = FW_FEATURE_LPAR | FW_FEATURE_PS3_LV1, FW_FEATURE_PS3_ALWAYS = FW_FEATURE_LPAR | FW_FEATURE_PS3_LV1, diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index cf4df8e2139..0c7f2bfcf13 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -264,6 +264,7 @@ #define H_GET_MPP 0x2D4 #define H_HOME_NODE_ASSOCIATIVITY 0x2EC #define H_BEST_ENERGY 0x2F4 +#define H_XIRR_X 0x2FC #define H_RANDOM 0x300 #define H_COP 0x304 #define H_GET_MPP_X 0x314 diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index d615b28dda8..ba713f166fa 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -96,11 +96,12 @@ static inline bool arch_irqs_disabled(void) #endif #define hard_irq_disable() do { \ + u8 _was_enabled = get_paca()->soft_enabled; \ __hard_irq_disable(); \ - if (local_paca->soft_enabled) \ - trace_hardirqs_off(); \ get_paca()->soft_enabled = 0; \ get_paca()->irq_happened |= PACA_IRQ_HARD_DIS; \ + if (_was_enabled) \ + trace_hardirqs_off(); \ } while(0) static inline bool lazy_irq_pending(void) diff --git a/arch/powerpc/include/asm/kvm_asm.h b/arch/powerpc/include/asm/kvm_asm.h index b9dd382cb34..851bac7afa4 100644 --- a/arch/powerpc/include/asm/kvm_asm.h +++ b/arch/powerpc/include/asm/kvm_asm.h @@ -54,8 +54,16 @@ #define BOOKE_INTERRUPT_DEBUG 15 /* E500 */ -#define BOOKE_INTERRUPT_SPE_UNAVAIL 32 -#define BOOKE_INTERRUPT_SPE_FP_DATA 33 +#define BOOKE_INTERRUPT_SPE_ALTIVEC_UNAVAIL 32 +#define BOOKE_INTERRUPT_SPE_FP_DATA_ALTIVEC_ASSIST 33 +/* + * TODO: Unify 32-bit and 64-bit kernel exception handlers to use same defines + */ +#define BOOKE_INTERRUPT_SPE_UNAVAIL BOOKE_INTERRUPT_SPE_ALTIVEC_UNAVAIL +#define BOOKE_INTERRUPT_SPE_FP_DATA BOOKE_INTERRUPT_SPE_FP_DATA_ALTIVEC_ASSIST +#define BOOKE_INTERRUPT_ALTIVEC_UNAVAIL BOOKE_INTERRUPT_SPE_ALTIVEC_UNAVAIL +#define BOOKE_INTERRUPT_ALTIVEC_ASSIST \ + BOOKE_INTERRUPT_SPE_FP_DATA_ALTIVEC_ASSIST #define BOOKE_INTERRUPT_SPE_FP_ROUND 34 #define BOOKE_INTERRUPT_PERFORMANCE_MONITOR 35 #define BOOKE_INTERRUPT_DOORBELL 36 @@ -67,10 +75,6 @@ #define BOOKE_INTERRUPT_HV_SYSCALL 40 #define BOOKE_INTERRUPT_HV_PRIV 41 -/* altivec */ -#define BOOKE_INTERRUPT_ALTIVEC_UNAVAIL 42 -#define BOOKE_INTERRUPT_ALTIVEC_ASSIST 43 - /* book3s */ #define BOOK3S_INTERRUPT_SYSTEM_RESET 0x100 diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index 349ed85c7d6..08891d07aeb 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -107,8 +107,9 @@ struct kvmppc_vcpu_book3s { #define CONTEXT_GUEST 1 #define CONTEXT_GUEST_END 2 -#define VSID_REAL 0x1fffffffffc00000ULL -#define VSID_BAT 0x1fffffffffb00000ULL +#define VSID_REAL 0x0fffffffffc00000ULL +#define VSID_BAT 0x0fffffffffb00000ULL +#define VSID_1T 0x1000000000000000ULL #define VSID_REAL_DR 0x2000000000000000ULL #define VSID_REAL_IR 0x4000000000000000ULL #define VSID_PR 0x8000000000000000ULL @@ -123,6 +124,7 @@ extern void kvmppc_mmu_book3s_32_init(struct kvm_vcpu *vcpu); extern void kvmppc_mmu_book3s_hv_init(struct kvm_vcpu *vcpu); extern int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte); extern int kvmppc_mmu_map_segment(struct kvm_vcpu *vcpu, ulong eaddr); +extern void kvmppc_mmu_flush_segment(struct kvm_vcpu *vcpu, ulong eaddr, ulong seg_size); extern void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu); extern int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, unsigned long addr, diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index a73668a5f30..b467530e248 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -38,7 +38,7 @@ extern void drop_cop(unsigned long acop, struct mm_struct *mm); /* * switch_mm is the entry point called from the architecture independent - * code in kernel/sched.c + * code in kernel/sched/core.c */ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk) diff --git a/arch/powerpc/include/asm/mpc52xx_psc.h b/arch/powerpc/include/asm/mpc52xx_psc.h index 2966df60422..d0ece257d31 100644 --- a/arch/powerpc/include/asm/mpc52xx_psc.h +++ b/arch/powerpc/include/asm/mpc52xx_psc.h @@ -299,4 +299,53 @@ struct mpc512x_psc_fifo { #define rxdata_32 rxdata.rxdata_32 }; +struct mpc5125_psc { + u8 mr1; /* PSC + 0x00 */ + u8 reserved0[3]; + u8 mr2; /* PSC + 0x04 */ + u8 reserved1[3]; + struct { + u16 status; /* PSC + 0x08 */ + u8 reserved2[2]; + u8 clock_select; /* PSC + 0x0c */ + u8 reserved3[3]; + } sr_csr; + u8 command; /* PSC + 0x10 */ + u8 reserved4[3]; + union { /* PSC + 0x14 */ + u8 buffer_8; + u16 buffer_16; + u32 buffer_32; + } buffer; + struct { + u8 ipcr; /* PSC + 0x18 */ + u8 reserved5[3]; + u8 acr; /* PSC + 0x1c */ + u8 reserved6[3]; + } ipcr_acr; + struct { + u16 isr; /* PSC + 0x20 */ + u8 reserved7[2]; + u16 imr; /* PSC + 0x24 */ + u8 reserved8[2]; + } isr_imr; + u8 ctur; /* PSC + 0x28 */ + u8 reserved9[3]; + u8 ctlr; /* PSC + 0x2c */ + u8 reserved10[3]; + u32 ccr; /* PSC + 0x30 */ + u32 ac97slots; /* PSC + 0x34 */ + u32 ac97cmd; /* PSC + 0x38 */ + u32 ac97data; /* PSC + 0x3c */ + u8 reserved11[4]; + u8 ip; /* PSC + 0x44 */ + u8 reserved12[3]; + u8 op1; /* PSC + 0x48 */ + u8 reserved13[3]; + u8 op0; /* PSC + 0x4c */ + u8 reserved14[3]; + u32 sicr; /* PSC + 0x50 */ + u8 reserved15[4]; /* make eq. sizeof(mpc52xx_psc) */ +}; + #endif /* __ASM_MPC52xx_PSC_H__ */ diff --git a/arch/powerpc/include/asm/mutex.h b/arch/powerpc/include/asm/mutex.h index 5399f7e1810..127ab23e1f6 100644 --- a/arch/powerpc/include/asm/mutex.h +++ b/arch/powerpc/include/asm/mutex.h @@ -82,17 +82,15 @@ __mutex_fastpath_lock(atomic_t *count, void (*fail_fn)(atomic_t *)) * __mutex_fastpath_lock_retval - try to take the lock by moving the count * from 1 to a 0 value * @count: pointer of type atomic_t - * @fail_fn: function to call if the original value was not 1 * - * Change the count from 1 to a value lower than 1, and call <fail_fn> if - * it wasn't 1 originally. This function returns 0 if the fastpath succeeds, - * or anything the slow path function returns. + * Change the count from 1 to a value lower than 1. This function returns 0 + * if the fastpath succeeds, or -1 otherwise. */ static inline int -__mutex_fastpath_lock_retval(atomic_t *count, int (*fail_fn)(atomic_t *)) +__mutex_fastpath_lock_retval(atomic_t *count) { if (unlikely(__mutex_dec_return_lock(count) < 0)) - return fail_fn(count); + return -1; return 0; } diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index b6c8b58b1d7..cbb9305ab15 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -243,7 +243,8 @@ enum OpalMCE_TlbErrorType { enum OpalThreadStatus { OPAL_THREAD_INACTIVE = 0x0, - OPAL_THREAD_STARTED = 0x1 + OPAL_THREAD_STARTED = 0x1, + OPAL_THREAD_UNAVAILABLE = 0x2 /* opal-v3 */ }; enum OpalPciBusCompare { @@ -563,6 +564,8 @@ extern void opal_nvram_init(void); extern int opal_machine_check(struct pt_regs *regs); +extern void opal_shutdown(void); + #endif /* __ASSEMBLY__ */ #endif /* __OPAL_H */ diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h index 8b11b5bd993..2c1d8cb9b26 100644 --- a/arch/powerpc/include/asm/pci-bridge.h +++ b/arch/powerpc/include/asm/pci-bridge.h @@ -174,6 +174,8 @@ struct pci_dn { /* Get the pointer to a device_node's pci_dn */ #define PCI_DN(dn) ((struct pci_dn *) (dn)->data) +extern struct pci_dn *pci_get_pdn(struct pci_dev *pdev); + extern void * update_dn_pci_info(struct device_node *dn, void *data); static inline int pci_device_from_OF_node(struct device_node *np, diff --git a/arch/powerpc/include/asm/pgalloc-64.h b/arch/powerpc/include/asm/pgalloc-64.h index 91acb12bac9..b66ae722a8e 100644 --- a/arch/powerpc/include/asm/pgalloc-64.h +++ b/arch/powerpc/include/asm/pgalloc-64.h @@ -186,7 +186,7 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, static inline pgtable_t pmd_pgtable(pmd_t pmd) { - return (pgtable_t)(pmd_val(pmd) & -sizeof(pte_t)*PTRS_PER_PTE); + return (pgtable_t)(pmd_val(pmd) & ~PMD_MASKED_BITS); } static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h index 7aeb9555f6e..b6293d26bd3 100644 --- a/arch/powerpc/include/asm/pgtable.h +++ b/arch/powerpc/include/asm/pgtable.h @@ -198,9 +198,6 @@ extern void paging_init(void); */ #define kern_addr_valid(addr) (1) -#define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \ - remap_pfn_range(vma, vaddr, pfn, size, prot) - #include <asm-generic/pgtable.h> diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h index cea8496091f..2f1b6c5f817 100644 --- a/arch/powerpc/include/asm/ppc_asm.h +++ b/arch/powerpc/include/asm/ppc_asm.h @@ -523,6 +523,17 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,946) #define PPC440EP_ERR42 #endif +/* The following stops all load and store data streams associated with stream + * ID (ie. streams created explicitly). The embedded and server mnemonics for + * dcbt are different so we use machine "power4" here explicitly. + */ +#define DCBT_STOP_ALL_STREAM_IDS(scratch) \ +.machine push ; \ +.machine "power4" ; \ + lis scratch,0x60000000@h; \ + dcbt r0,scratch,0b01010; \ +.machine pop + /* * toreal/fromreal/tophys/tovirt macros. 32-bit BookE makes them * keep the address intact to be compatible with code shared with diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index d7e67ca8b4a..14a65836369 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -284,6 +284,12 @@ struct thread_struct { unsigned long ebbrr; unsigned long ebbhr; unsigned long bescr; + unsigned long siar; + unsigned long sdar; + unsigned long sier; + unsigned long mmcr0; + unsigned long mmcr2; + unsigned long mmcra; #endif }; @@ -403,21 +409,16 @@ static inline void prefetchw(const void *x) #endif #ifdef CONFIG_PPC64 -static inline unsigned long get_clean_sp(struct pt_regs *regs, int is_32) +static inline unsigned long get_clean_sp(unsigned long sp, int is_32) { - unsigned long sp; - if (is_32) - sp = regs->gpr[1] & 0x0ffffffffUL; - else - sp = regs->gpr[1]; - + return sp & 0x0ffffffffUL; return sp; } #else -static inline unsigned long get_clean_sp(struct pt_regs *regs, int is_32) +static inline unsigned long get_clean_sp(unsigned long sp, int is_32) { - return regs->gpr[1]; + return sp; } #endif diff --git a/arch/powerpc/include/asm/pte-hash64-64k.h b/arch/powerpc/include/asm/pte-hash64-64k.h index 3e13e23e4fd..d836d945068 100644 --- a/arch/powerpc/include/asm/pte-hash64-64k.h +++ b/arch/powerpc/include/asm/pte-hash64-64k.h @@ -47,7 +47,7 @@ * generic accessors and iterators here */ #define __real_pte(e,p) ((real_pte_t) { \ - (e), ((e) & _PAGE_COMBO) ? \ + (e), (pte_val(e) & _PAGE_COMBO) ? \ (pte_val(*((p) + PTRS_PER_PTE))) : 0 }) #define __rpte_to_hidx(r,index) ((pte_val((r).pte) & _PAGE_COMBO) ? \ (((r).hidx >> ((index)<<2)) & 0xf) : ((pte_val((r).pte) >> 12) & 0xf)) diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index a6136515c7f..4a9e408644f 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -111,17 +111,6 @@ #define MSR_TM_TRANSACTIONAL(x) (((x) & MSR_TS_MASK) == MSR_TS_T) #define MSR_TM_SUSPENDED(x) (((x) & MSR_TS_MASK) == MSR_TS_S) -/* Reason codes describing kernel causes for transaction aborts. By - convention, bit0 is copied to TEXASR[56] (IBM bit 7) which is set if - the failure is persistent. -*/ -#define TM_CAUSE_RESCHED 0xfe -#define TM_CAUSE_TLBI 0xfc -#define TM_CAUSE_FAC_UNAV 0xfa -#define TM_CAUSE_SYSCALL 0xf9 /* Persistent */ -#define TM_CAUSE_MISC 0xf6 -#define TM_CAUSE_SIGNAL 0xf4 - #if defined(CONFIG_PPC_BOOK3S_64) #define MSR_64BIT MSR_SF diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h index a8bc2bb4adc..34fd70488d8 100644 --- a/arch/powerpc/include/asm/rtas.h +++ b/arch/powerpc/include/asm/rtas.h @@ -264,6 +264,8 @@ extern void rtas_progress(char *s, unsigned short hex); extern void rtas_initialize(void); extern int rtas_suspend_cpu(struct rtas_suspend_me_data *data); extern int rtas_suspend_last_cpu(struct rtas_suspend_me_data *data); +extern int rtas_online_cpus_mask(cpumask_var_t cpus); +extern int rtas_offline_cpus_mask(cpumask_var_t cpus); extern int rtas_ibm_suspend_me(struct rtas_args *); struct rtc_time; diff --git a/arch/powerpc/include/asm/signal.h b/arch/powerpc/include/asm/signal.h index fbe66c46389..9322c28aebd 100644 --- a/arch/powerpc/include/asm/signal.h +++ b/arch/powerpc/include/asm/signal.h @@ -3,5 +3,8 @@ #define __ARCH_HAS_SA_RESTORER #include <uapi/asm/signal.h> +#include <uapi/asm/ptrace.h> + +extern unsigned long get_tm_stackpointer(struct pt_regs *regs); #endif /* _ASM_POWERPC_SIGNAL_H */ diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h index 8ceea14d6fe..ba7b1973866 100644 --- a/arch/powerpc/include/asm/thread_info.h +++ b/arch/powerpc/include/asm/thread_info.h @@ -97,7 +97,7 @@ static inline struct thread_info *current_thread_info(void) #define TIF_PERFMON_CTXSW 6 /* perfmon needs ctxsw calls */ #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ #define TIF_SINGLESTEP 8 /* singlestepping active */ -#define TIF_MEMDIE 9 /* is terminating due to OOM killer */ +#define TIF_NOHZ 9 /* in adaptive nohz mode */ #define TIF_SECCOMP 10 /* secure computing */ #define TIF_RESTOREALL 11 /* Restore all regs (implies NOERROR) */ #define TIF_NOERROR 12 /* Force successful syscall return */ @@ -106,6 +106,7 @@ static inline struct thread_info *current_thread_info(void) #define TIF_SYSCALL_TRACEPOINT 15 /* syscall tracepoint instrumentation */ #define TIF_EMULATE_STACK_STORE 16 /* Is an instruction emulation for stack store? */ +#define TIF_MEMDIE 17 /* is terminating due to OOM killer */ /* as above, but as bit values */ #define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE) @@ -124,8 +125,10 @@ static inline struct thread_info *current_thread_info(void) #define _TIF_UPROBE (1<<TIF_UPROBE) #define _TIF_SYSCALL_TRACEPOINT (1<<TIF_SYSCALL_TRACEPOINT) #define _TIF_EMULATE_STACK_STORE (1<<TIF_EMULATE_STACK_STORE) +#define _TIF_NOHZ (1<<TIF_NOHZ) #define _TIF_SYSCALL_T_OR_A (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \ - _TIF_SECCOMP | _TIF_SYSCALL_TRACEPOINT) + _TIF_SECCOMP | _TIF_SYSCALL_TRACEPOINT | \ + _TIF_NOHZ) #define _TIF_USER_WORK_MASK (_TIF_SIGPENDING | _TIF_NEED_RESCHED | \ _TIF_NOTIFY_RESUME | _TIF_UPROBE) diff --git a/arch/powerpc/include/asm/tm.h b/arch/powerpc/include/asm/tm.h index 4b4449abf3f..9dfbc34bdbf 100644 --- a/arch/powerpc/include/asm/tm.h +++ b/arch/powerpc/include/asm/tm.h @@ -5,6 +5,8 @@ * Copyright 2012 Matt Evans & Michael Neuling, IBM Corporation. */ +#include <uapi/asm/tm.h> + #ifdef CONFIG_PPC_TRANSACTIONAL_MEM extern void do_load_up_transact_fpu(struct thread_struct *thread); extern void do_load_up_transact_altivec(struct thread_struct *thread); diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index 4db49590acf..9485b43a7c0 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -178,7 +178,7 @@ do { \ long __pu_err; \ __typeof__(*(ptr)) __user *__pu_addr = (ptr); \ if (!is_kernel_addr((unsigned long)__pu_addr)) \ - might_sleep(); \ + might_fault(); \ __chk_user_ptr(ptr); \ __put_user_size((x), __pu_addr, (size), __pu_err); \ __pu_err; \ @@ -188,7 +188,7 @@ do { \ ({ \ long __pu_err = -EFAULT; \ __typeof__(*(ptr)) __user *__pu_addr = (ptr); \ - might_sleep(); \ + might_fault(); \ if (access_ok(VERIFY_WRITE, __pu_addr, size)) \ __put_user_size((x), __pu_addr, (size), __pu_err); \ __pu_err; \ @@ -268,7 +268,7 @@ do { \ const __typeof__(*(ptr)) __user *__gu_addr = (ptr); \ __chk_user_ptr(ptr); \ if (!is_kernel_addr((unsigned long)__gu_addr)) \ - might_sleep(); \ + might_fault(); \ __get_user_size(__gu_val, __gu_addr, (size), __gu_err); \ (x) = (__typeof__(*(ptr)))__gu_val; \ __gu_err; \ @@ -282,7 +282,7 @@ do { \ const __typeof__(*(ptr)) __user *__gu_addr = (ptr); \ __chk_user_ptr(ptr); \ if (!is_kernel_addr((unsigned long)__gu_addr)) \ - might_sleep(); \ + might_fault(); \ __get_user_size(__gu_val, __gu_addr, (size), __gu_err); \ (x) = (__typeof__(*(ptr)))__gu_val; \ __gu_err; \ @@ -294,7 +294,7 @@ do { \ long __gu_err = -EFAULT; \ unsigned long __gu_val = 0; \ const __typeof__(*(ptr)) __user *__gu_addr = (ptr); \ - might_sleep(); \ + might_fault(); \ if (access_ok(VERIFY_READ, __gu_addr, (size))) \ __get_user_size(__gu_val, __gu_addr, (size), __gu_err); \ (x) = (__typeof__(*(ptr)))__gu_val; \ @@ -419,14 +419,14 @@ static inline unsigned long __copy_to_user_inatomic(void __user *to, static inline unsigned long __copy_from_user(void *to, const void __user *from, unsigned long size) { - might_sleep(); + might_fault(); return __copy_from_user_inatomic(to, from, size); } static inline unsigned long __copy_to_user(void __user *to, const void *from, unsigned long size) { - might_sleep(); + might_fault(); return __copy_to_user_inatomic(to, from, size); } @@ -434,7 +434,7 @@ extern unsigned long __clear_user(void __user *addr, unsigned long size); static inline unsigned long clear_user(void __user *addr, unsigned long size) { - might_sleep(); + might_fault(); if (likely(access_ok(VERIFY_WRITE, addr, size))) return __clear_user(addr, size); if ((unsigned long)addr < TASK_SIZE) { diff --git a/arch/powerpc/include/asm/udbg.h b/arch/powerpc/include/asm/udbg.h index 5a7510e9d09..dc590919f8e 100644 --- a/arch/powerpc/include/asm/udbg.h +++ b/arch/powerpc/include/asm/udbg.h @@ -52,6 +52,7 @@ extern void __init udbg_init_40x_realmode(void); extern void __init udbg_init_cpm(void); extern void __init udbg_init_usbgecko(void); extern void __init udbg_init_wsp(void); +extern void __init udbg_init_memcons(void); extern void __init udbg_init_ehv_bc(void); extern void __init udbg_init_ps3gelic(void); extern void __init udbg_init_debug_opal_raw(void); diff --git a/arch/powerpc/include/uapi/asm/Kbuild b/arch/powerpc/include/uapi/asm/Kbuild index f7bca637074..5182c8622b5 100644 --- a/arch/powerpc/include/uapi/asm/Kbuild +++ b/arch/powerpc/include/uapi/asm/Kbuild @@ -40,6 +40,7 @@ header-y += statfs.h header-y += swab.h header-y += termbits.h header-y += termios.h +header-y += tm.h header-y += types.h header-y += ucontext.h header-y += unistd.h diff --git a/arch/powerpc/include/uapi/asm/tm.h b/arch/powerpc/include/uapi/asm/tm.h new file mode 100644 index 00000000000..85059a00f56 --- /dev/null +++ b/arch/powerpc/include/uapi/asm/tm.h @@ -0,0 +1,18 @@ +#ifndef _ASM_POWERPC_TM_H +#define _ASM_POWERPC_TM_H + +/* Reason codes describing kernel causes for transaction aborts. By + * convention, bit0 is copied to TEXASR[56] (IBM bit 7) which is set if + * the failure is persistent. PAPR saves 0xff-0xe0 for the hypervisor. + */ +#define TM_CAUSE_PERSISTENT 0x01 +#define TM_CAUSE_RESCHED 0xde +#define TM_CAUSE_TLBI 0xdc +#define TM_CAUSE_FAC_UNAV 0xda +#define TM_CAUSE_SYSCALL 0xd8 /* future use */ +#define TM_CAUSE_MISC 0xd6 /* future use */ +#define TM_CAUSE_SIGNAL 0xd4 +#define TM_CAUSE_ALIGNMENT 0xd2 +#define TM_CAUSE_EMULATE 0xd0 + +#endif diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index b51a97cfedf..6f16ffafa6f 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -127,6 +127,12 @@ int main(void) DEFINE(THREAD_BESCR, offsetof(struct thread_struct, bescr)); DEFINE(THREAD_EBBHR, offsetof(struct thread_struct, ebbhr)); DEFINE(THREAD_EBBRR, offsetof(struct thread_struct, ebbrr)); + DEFINE(THREAD_SIAR, offsetof(struct thread_struct, siar)); + DEFINE(THREAD_SDAR, offsetof(struct thread_struct, sdar)); + DEFINE(THREAD_SIER, offsetof(struct thread_struct, sier)); + DEFINE(THREAD_MMCR0, offsetof(struct thread_struct, mmcr0)); + DEFINE(THREAD_MMCR2, offsetof(struct thread_struct, mmcr2)); + DEFINE(THREAD_MMCRA, offsetof(struct thread_struct, mmcra)); #endif #ifdef CONFIG_PPC_TRANSACTIONAL_MEM DEFINE(PACATMSCRATCH, offsetof(struct paca_struct, tm_scratch)); diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S index a283b6442b2..18b5b9cf8e3 100644 --- a/arch/powerpc/kernel/cpu_setup_power.S +++ b/arch/powerpc/kernel/cpu_setup_power.S @@ -135,8 +135,12 @@ __init_HFSCR: blr __init_TLB: - /* Clear the TLB */ - li r6,128 + /* + * Clear the TLB using the "IS 3" form of tlbiel instruction + * (invalidate by congruence class). P7 has 128 CCs, P8 has 512 + * so we just always do 512 + */ + li r6,512 mtctr r6 li r7,0xc00 /* IS field = 0b11 */ ptesync diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index c60bbec25c1..2a45d0f0438 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -452,7 +452,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .mmu_features = MMU_FTRS_POWER8, .icache_bsize = 128, .dcache_bsize = 128, - .oprofile_type = PPC_OPROFILE_POWER4, + .oprofile_type = PPC_OPROFILE_INVALID, .oprofile_cpu_type = "ppc64/ibm-compat-v1", .cpu_setup = __setup_cpu_power8, .cpu_restore = __restore_cpu_power8, @@ -482,7 +482,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_name = "POWER7+ (raw)", .cpu_features = CPU_FTRS_POWER7, .cpu_user_features = COMMON_USER_POWER7, - .cpu_user_features = COMMON_USER2_POWER7, + .cpu_user_features2 = COMMON_USER2_POWER7, .mmu_features = MMU_FTRS_POWER7, .icache_bsize = 128, .dcache_bsize = 128, @@ -507,7 +507,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .num_pmcs = 6, .pmc_type = PPC_PMC_IBM, .oprofile_cpu_type = "ppc64/power8", - .oprofile_type = PPC_OPROFILE_POWER4, + .oprofile_type = PPC_OPROFILE_INVALID, .cpu_setup = __setup_cpu_power8, .cpu_restore = __restore_cpu_power8, .platform = "power8", diff --git a/arch/powerpc/kernel/crash_dump.c b/arch/powerpc/kernel/crash_dump.c index 9ec3fe174cb..779a78c2643 100644 --- a/arch/powerpc/kernel/crash_dump.c +++ b/arch/powerpc/kernel/crash_dump.c @@ -69,16 +69,6 @@ void __init setup_kdump_trampoline(void) } #endif /* CONFIG_NONSTATIC_KERNEL */ -static int __init parse_savemaxmem(char *p) -{ - if (p) - saved_max_pfn = (memparse(p, &p) >> PAGE_SHIFT) - 1; - - return 1; -} -__setup("savemaxmem=", parse_savemaxmem); - - static size_t copy_oldmem_vaddr(void *vaddr, char *buf, size_t csize, unsigned long offset, int userbuf) { diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index e514de57a12..22b45a4955c 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -439,8 +439,6 @@ ret_from_fork: ret_from_kernel_thread: REST_NVGPRS(r1) bl schedule_tail - li r3,0 - stw r3,0(r1) mtlr r14 mr r3,r15 PPC440EP_ERR42 @@ -851,7 +849,7 @@ resume_kernel: /* check current_thread_info, _TIF_EMULATE_STACK_STORE */ CURRENT_THREAD_INFO(r9, r1) lwz r8,TI_FLAGS(r9) - andis. r8,r8,_TIF_EMULATE_STACK_STORE@h + andis. r0,r8,_TIF_EMULATE_STACK_STORE@h beq+ 1f addi r8,r1,INT_FRAME_SIZE /* Get the kprobed function entry */ diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 915fbb4fc2f..8741c854e03 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -33,6 +33,7 @@ #include <asm/irqflags.h> #include <asm/ftrace.h> #include <asm/hw_irq.h> +#include <asm/context_tracking.h> /* * System calls. @@ -376,8 +377,6 @@ _GLOBAL(ret_from_fork) _GLOBAL(ret_from_kernel_thread) bl .schedule_tail REST_NVGPRS(r1) - li r3,0 - std r3,0(r1) ld r14, 0(r14) mtlr r14 mr r3,r15 @@ -488,6 +487,13 @@ BEGIN_FTR_SECTION ldarx r6,0,r1 END_FTR_SECTION_IFSET(CPU_FTR_STCX_CHECKS_ADDRESS) +#ifdef CONFIG_PPC_BOOK3S +/* Cancel all explict user streams as they will have no use after context + * switch and will stop the HW from creating streams itself + */ + DCBT_STOP_ALL_STREAM_IDS(r6) +#endif + addi r6,r4,-THREAD /* Convert THREAD to 'current' */ std r6,PACACURRENT(r13) /* Set new 'current' */ @@ -634,7 +640,7 @@ _GLOBAL(ret_from_except_lite) andi. r0,r4,_TIF_NEED_RESCHED beq 1f bl .restore_interrupts - bl .schedule + SCHEDULE_USER b .ret_from_except_lite 1: bl .save_nvgprs diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index 42a756eec9f..645170a07ad 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -489,7 +489,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) */ mfspr r14,SPRN_DBSR /* check single-step/branch taken */ - andis. r15,r14,DBSR_IC@h + andis. r15,r14,(DBSR_IC|DBSR_BT)@h beq+ 1f LOAD_REG_IMMEDIATE(r14,interrupt_base_book3e) @@ -500,7 +500,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) bge+ cr1,1f /* here it looks like we got an inappropriate debug exception. */ - lis r14,DBSR_IC@h /* clear the IC event */ + lis r14,(DBSR_IC|DBSR_BT)@h /* clear the event */ rlwinm r11,r11,0,~MSR_DE /* clear DE in the CSRR1 value */ mtspr SPRN_DBSR,r14 mtspr SPRN_CSRR1,r11 @@ -555,7 +555,7 @@ kernel_dbg_exc: */ mfspr r14,SPRN_DBSR /* check single-step/branch taken */ - andis. r15,r14,DBSR_IC@h + andis. r15,r14,(DBSR_IC|DBSR_BT)@h beq+ 1f LOAD_REG_IMMEDIATE(r14,interrupt_base_book3e) @@ -566,7 +566,7 @@ kernel_dbg_exc: bge+ cr1,1f /* here it looks like we got an inappropriate debug exception. */ - lis r14,DBSR_IC@h /* clear the IC event */ + lis r14,(DBSR_IC|DBSR_BT)@h /* clear the event */ rlwinm r11,r11,0,~MSR_DE /* clear DE in the DSRR1 value */ mtspr SPRN_DBSR,r14 mtspr SPRN_DSRR1,r11 diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index e6eba1bf61a..40e4a17c8ba 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -454,38 +454,14 @@ BEGIN_FTR_SECTION xori r10,r10,(MSR_FE0|MSR_FE1) mtmsrd r10 sync - fmr 0,0 - fmr 1,1 - fmr 2,2 - fmr 3,3 - fmr 4,4 - fmr 5,5 - fmr 6,6 - fmr 7,7 - fmr 8,8 - fmr 9,9 - fmr 10,10 - fmr 11,11 - fmr 12,12 - fmr 13,13 - fmr 14,14 - fmr 15,15 - fmr 16,16 - fmr 17,17 - fmr 18,18 - fmr 19,19 - fmr 20,20 - fmr 21,21 - fmr 22,22 - fmr 23,23 - fmr 24,24 - fmr 25,25 - fmr 26,26 - fmr 27,27 - fmr 28,28 - fmr 29,29 - fmr 30,30 - fmr 31,31 + +#define FMR2(n) fmr (n), (n) ; fmr n+1, n+1 +#define FMR4(n) FMR2(n) ; FMR2(n+2) +#define FMR8(n) FMR4(n) ; FMR4(n+4) +#define FMR16(n) FMR8(n) ; FMR8(n+8) +#define FMR32(n) FMR16(n) ; FMR16(n+16) + FMR32(0) + FTR_SECTION_ELSE /* * To denormalise we need to move a copy of the register to itself. @@ -495,39 +471,25 @@ FTR_SECTION_ELSE oris r10,r10,MSR_VSX@h mtmsrd r10 sync - XVCPSGNDP(0,0,0) - XVCPSGNDP(1,1,1) - XVCPSGNDP(2,2,2) - XVCPSGNDP(3,3,3) - XVCPSGNDP(4,4,4) - XVCPSGNDP(5,5,5) - XVCPSGNDP(6,6,6) - XVCPSGNDP(7,7,7) - XVCPSGNDP(8,8,8) - XVCPSGNDP(9,9,9) - XVCPSGNDP(10,10,10) - XVCPSGNDP(11,11,11) - XVCPSGNDP(12,12,12) - XVCPSGNDP(13,13,13) - XVCPSGNDP(14,14,14) - XVCPSGNDP(15,15,15) - XVCPSGNDP(16,16,16) - XVCPSGNDP(17,17,17) - XVCPSGNDP(18,18,18) - XVCPSGNDP(19,19,19) - XVCPSGNDP(20,20,20) - XVCPSGNDP(21,21,21) - XVCPSGNDP(22,22,22) - XVCPSGNDP(23,23,23) - XVCPSGNDP(24,24,24) - XVCPSGNDP(25,25,25) - XVCPSGNDP(26,26,26) - XVCPSGNDP(27,27,27) - XVCPSGNDP(28,28,28) - XVCPSGNDP(29,29,29) - XVCPSGNDP(30,30,30) - XVCPSGNDP(31,31,31) + +#define XVCPSGNDP2(n) XVCPSGNDP(n,n,n) ; XVCPSGNDP(n+1,n+1,n+1) +#define XVCPSGNDP4(n) XVCPSGNDP2(n) ; XVCPSGNDP2(n+2) +#define XVCPSGNDP8(n) XVCPSGNDP4(n) ; XVCPSGNDP4(n+4) +#define XVCPSGNDP16(n) XVCPSGNDP8(n) ; XVCPSGNDP8(n+8) +#define XVCPSGNDP32(n) XVCPSGNDP16(n) ; XVCPSGNDP16(n+16) + XVCPSGNDP32(0) + ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_206) + +BEGIN_FTR_SECTION + b denorm_done +END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) +/* + * To denormalise we need to move a copy of the register to itself. + * For POWER8 we need to do that for all 64 VSX registers + */ + XVCPSGNDP32(32) +denorm_done: mtspr SPRN_HSRR0,r11 mtcrf 0x80,r9 ld r9,PACA_EXGEN+EX_R9(r13) @@ -721,7 +683,7 @@ machine_check_common: STD_EXCEPTION_COMMON(0xb00, trap_0b, .unknown_exception) STD_EXCEPTION_COMMON(0xd00, single_step, .single_step_exception) STD_EXCEPTION_COMMON(0xe00, trap_0e, .unknown_exception) - STD_EXCEPTION_COMMON(0xe40, emulation_assist, .program_check_exception) + STD_EXCEPTION_COMMON(0xe40, emulation_assist, .emulation_assist_interrupt) STD_EXCEPTION_COMMON(0xe60, hmi_exception, .unknown_exception) #ifdef CONFIG_PPC_DOORBELL STD_EXCEPTION_COMMON_ASYNC(0xe80, h_doorbell, .doorbell_exception) diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 5cbcf4d5a80..ea185e0b3ca 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -162,7 +162,7 @@ notrace unsigned int __check_irq_replay(void) * in case we also had a rollover while hard disabled */ local_paca->irq_happened &= ~PACA_IRQ_DEC; - if (decrementer_check_overflow()) + if ((happened & PACA_IRQ_DEC) || decrementer_check_overflow()) return 0x900; /* Finally check if an external interrupt happened */ diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c index 6782221d49b..db28032e320 100644 --- a/arch/powerpc/kernel/kvm.c +++ b/arch/powerpc/kernel/kvm.c @@ -750,13 +750,8 @@ EXPORT_SYMBOL_GPL(kvm_hypercall); static __init void kvm_free_tmp(void) { - unsigned long start, end; - - start = (ulong)&kvm_tmp[kvm_tmp_index + (PAGE_SIZE - 1)] & PAGE_MASK; - end = (ulong)&kvm_tmp[ARRAY_SIZE(kvm_tmp)] & PAGE_MASK; - - /* Free the tmp space we don't need */ - free_reserved_area(start, end, 0, NULL); + free_reserved_area(&kvm_tmp[kvm_tmp_index], + &kvm_tmp[ARRAY_SIZE(kvm_tmp)], -1, NULL); } static int __init kvm_guest_init(void) diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c index 466a2908bb6..611acdf3009 100644 --- a/arch/powerpc/kernel/machine_kexec_64.c +++ b/arch/powerpc/kernel/machine_kexec_64.c @@ -17,6 +17,7 @@ #include <linux/errno.h> #include <linux/kernel.h> #include <linux/cpu.h> +#include <linux/hardirq.h> #include <asm/page.h> #include <asm/current.h> @@ -335,10 +336,13 @@ void default_machine_kexec(struct kimage *image) pr_debug("kexec: Starting switchover sequence.\n"); /* switch to a staticly allocated stack. Based on irq stack code. + * We setup preempt_count to avoid using VMX in memcpy. * XXX: the task struct will likely be invalid once we do the copy! */ kexec_stack.thread_info.task = current_thread_info()->task; kexec_stack.thread_info.flags = 0; + kexec_stack.thread_info.preempt_count = HARDIRQ_OFFSET; + kexec_stack.thread_info.cpu = current_thread_info()->cpu; /* We need a static PACA, too; copy this CPU's PACA over and switch to * it. Also poison per_cpu_offset to catch anyone using non-static diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S index 19e096bd0e7..e469f30e6ee 100644 --- a/arch/powerpc/kernel/misc_32.S +++ b/arch/powerpc/kernel/misc_32.S @@ -657,6 +657,17 @@ _GLOBAL(__ucmpdi2) li r3,2 blr +_GLOBAL(__bswapdi2) + rotlwi r9,r4,8 + rotlwi r10,r3,8 + rlwimi r9,r4,24,0,7 + rlwimi r10,r3,24,0,7 + rlwimi r9,r4,24,16,23 + rlwimi r10,r3,24,16,23 + mr r3,r9 + mr r4,r10 + blr + _GLOBAL(abs) srawi r4,r3,31 xor r3,r3,r4 diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S index 5cfa8008693..6820e45f557 100644 --- a/arch/powerpc/kernel/misc_64.S +++ b/arch/powerpc/kernel/misc_64.S @@ -234,6 +234,17 @@ _GLOBAL(__flush_dcache_icache) isync blr +_GLOBAL(__bswapdi2) + srdi r8,r3,32 + rlwinm r7,r3,8,0xffffffff + rlwimi r7,r3,24,0,7 + rlwinm r9,r8,8,0xffffffff + rlwimi r7,r3,24,16,23 + rlwimi r9,r8,24,0,7 + rlwimi r9,r8,24,16,23 + sldi r7,r7,32 + or r3,r7,r9 + blr #if defined(CONFIG_PPC_PMAC) || defined(CONFIG_PPC_MAPLE) /* diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index f5c5c90799a..f46914a0f33 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -359,7 +359,6 @@ static pgprot_t __pci_mmap_set_pgprot(struct pci_dev *dev, struct resource *rp, enum pci_mmap_state mmap_state, int write_combine) { - unsigned long prot = pgprot_val(protection); /* Write combine is always 0 on non-memory space mappings. On * memory space, if the user didn't pass 1, we check for a @@ -376,9 +375,9 @@ static pgprot_t __pci_mmap_set_pgprot(struct pci_dev *dev, struct resource *rp, /* XXX would be nice to have a way to ask for write-through */ if (write_combine) - return pgprot_noncached_wc(prot); + return pgprot_noncached_wc(protection); else - return pgprot_noncached(prot); + return pgprot_noncached(protection); } /* @@ -658,15 +657,6 @@ void pci_resource_to_user(const struct pci_dev *dev, int bar, * ranges. However, some machines (thanks Apple !) tend to split their * space into lots of small contiguous ranges. So we have to coalesce. * - * - We can only cope with all memory ranges having the same offset - * between CPU addresses and PCI addresses. Unfortunately, some bridges - * are setup for a large 1:1 mapping along with a small "window" which - * maps PCI address 0 to some arbitrary high address of the CPU space in - * order to give access to the ISA memory hole. - * The way out of here that I've chosen for now is to always set the - * offset based on the first resource found, then override it if we - * have a different offset and the previous was set by an ISA hole. - * * - Some busses have IO space not starting at 0, which causes trouble with * the way we do our IO resource renumbering. The code somewhat deals with * it for 64 bits but I would expect problems on 32 bits. @@ -681,10 +671,9 @@ void pci_process_bridge_OF_ranges(struct pci_controller *hose, int rlen; int pna = of_n_addr_cells(dev); int np = pna + 5; - int memno = 0, isa_hole = -1; + int memno = 0; u32 pci_space; unsigned long long pci_addr, cpu_addr, pci_next, cpu_next, size; - unsigned long long isa_mb = 0; struct resource *res; printk(KERN_INFO "PCI host bridge %s %s ranges:\n", @@ -778,8 +767,6 @@ void pci_process_bridge_OF_ranges(struct pci_controller *hose, } /* Handles ISA memory hole space here */ if (pci_addr == 0) { - isa_mb = cpu_addr; - isa_hole = memno; if (primary || isa_mem_base == 0) isa_mem_base = cpu_addr; hose->isa_mem_phys = cpu_addr; @@ -840,6 +827,7 @@ static void pcibios_fixup_resources(struct pci_dev *dev) } for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) { struct resource *res = dev->resource + i; + struct pci_bus_region reg; if (!res->flags) continue; @@ -848,8 +836,9 @@ static void pcibios_fixup_resources(struct pci_dev *dev) * at 0 as unset as well, except if PCI_PROBE_ONLY is also set * since in that case, we don't want to re-assign anything */ + pcibios_resource_to_bus(dev, ®, res); if (pci_has_flag(PCI_REASSIGN_ALL_RSRC) || - (res->start == 0 && !pci_has_flag(PCI_PROBE_ONLY))) { + (reg.start == 0 && !pci_has_flag(PCI_PROBE_ONLY))) { /* Only print message if not re-assigning */ if (!pci_has_flag(PCI_REASSIGN_ALL_RSRC)) pr_debug("PCI:%s Resource %d %016llx-%016llx [%x] " @@ -1005,7 +994,7 @@ void pcibios_setup_bus_self(struct pci_bus *bus) ppc_md.pci_dma_bus_setup(bus); } -void pcibios_setup_device(struct pci_dev *dev) +static void pcibios_setup_device(struct pci_dev *dev) { /* Fixup NUMA node as it may not be setup yet by the generic * code and is needed by the DMA init @@ -1026,6 +1015,17 @@ void pcibios_setup_device(struct pci_dev *dev) ppc_md.pci_irq_fixup(dev); } +int pcibios_add_device(struct pci_dev *dev) +{ + /* + * We can only call pcibios_setup_device() after bus setup is complete, + * since some of the platform specific DMA setup code depends on it. + */ + if (dev->bus->is_added) + pcibios_setup_device(dev); + return 0; +} + void pcibios_setup_bus_devices(struct pci_bus *bus) { struct pci_dev *dev; @@ -1480,10 +1480,6 @@ int pcibios_enable_device(struct pci_dev *dev, int mask) if (ppc_md.pcibios_enable_device_hook(dev)) return -EINVAL; - /* avoid pcie irq fix up impact on cardbus */ - if (dev->hdr_type != PCI_HEADER_TYPE_CARDBUS) - pcibios_setup_device(dev); - return pci_enable_resources(dev, mask); } @@ -1521,9 +1517,10 @@ static void pcibios_setup_phb_resources(struct pci_controller *hose, for (i = 0; i < 3; ++i) { res = &hose->mem_resources[i]; if (!res->flags) { - printk(KERN_ERR "PCI: Memory resource 0 not set for " - "host bridge %s (domain %d)\n", - hose->dn->full_name, hose->global_number); + if (i == 0) + printk(KERN_ERR "PCI: Memory resource 0 not set for " + "host bridge %s (domain %d)\n", + hose->dn->full_name, hose->global_number); continue; } offset = hose->mem_offset[i]; diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c index 873050d2684..2e8629654ca 100644 --- a/arch/powerpc/kernel/pci_64.c +++ b/arch/powerpc/kernel/pci_64.c @@ -266,3 +266,13 @@ int pcibus_to_node(struct pci_bus *bus) } EXPORT_SYMBOL(pcibus_to_node); #endif + +static void quirk_radeon_32bit_msi(struct pci_dev *dev) +{ + struct pci_dn *pdn = pci_get_pdn(dev); + + if (pdn) + pdn->force_32bit_msi = 1; +} +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x68f2, quirk_radeon_32bit_msi); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0xaa68, quirk_radeon_32bit_msi); diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c index e7af165f8b9..df038442548 100644 --- a/arch/powerpc/kernel/pci_dn.c +++ b/arch/powerpc/kernel/pci_dn.c @@ -32,6 +32,14 @@ #include <asm/ppc-pci.h> #include <asm/firmware.h> +struct pci_dn *pci_get_pdn(struct pci_dev *pdev) +{ + struct device_node *dn = pci_device_to_OF_node(pdev); + if (!dn) + return NULL; + return PCI_DN(dn); +} + /* * Traverse_func that inits the PCI fields of the device node. * NOTE: this *must* be done before read/write config to the device. diff --git a/arch/powerpc/kernel/pci_of_scan.c b/arch/powerpc/kernel/pci_of_scan.c index 2a67e9baa59..6b0ba5854d9 100644 --- a/arch/powerpc/kernel/pci_of_scan.c +++ b/arch/powerpc/kernel/pci_of_scan.c @@ -128,7 +128,7 @@ struct pci_dev *of_create_pci_dev(struct device_node *node, const char *type; struct pci_slot *slot; - dev = alloc_pci_dev(); + dev = pci_alloc_dev(bus); if (!dev) return NULL; type = of_get_property(node, "device_type", NULL); @@ -137,7 +137,6 @@ struct pci_dev *of_create_pci_dev(struct device_node *node, pr_debug(" create device, devfn: %x, type: %s\n", devfn, type); - dev->bus = bus; dev->dev.of_node = of_node_get(node); dev->dev.parent = bus->bridge; dev->dev.bus = &pci_bus_type; @@ -165,7 +164,7 @@ struct pci_dev *of_create_pci_dev(struct device_node *node, pr_debug(" class: 0x%x\n", dev->class); pr_debug(" revision: 0x%x\n", dev->revision); - dev->current_state = 4; /* unknown power state */ + dev->current_state = PCI_UNKNOWN; /* unknown power state */ dev->error_state = pci_channel_io_normal; dev->dma_mask = 0xffffffff; diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c index 78b8766fd79..c2966658699 100644 --- a/arch/powerpc/kernel/ppc_ksyms.c +++ b/arch/powerpc/kernel/ppc_ksyms.c @@ -143,7 +143,8 @@ EXPORT_SYMBOL(__lshrdi3); int __ucmpdi2(unsigned long long, unsigned long long); EXPORT_SYMBOL(__ucmpdi2); #endif - +long long __bswapdi2(long long); +EXPORT_SYMBOL(__bswapdi2); EXPORT_SYMBOL(memcpy); EXPORT_SYMBOL(memset); EXPORT_SYMBOL(memmove); diff --git a/arch/powerpc/kernel/proc_powerpc.c b/arch/powerpc/kernel/proc_powerpc.c index feb8580fdc8..c30612aad68 100644 --- a/arch/powerpc/kernel/proc_powerpc.c +++ b/arch/powerpc/kernel/proc_powerpc.c @@ -29,25 +29,9 @@ #ifdef CONFIG_PPC64 -static loff_t page_map_seek( struct file *file, loff_t off, int whence) +static loff_t page_map_seek(struct file *file, loff_t off, int whence) { - loff_t new; - switch(whence) { - case 0: - new = off; - break; - case 1: - new = file->f_pos + off; - break; - case 2: - new = PAGE_SIZE + off; - break; - default: - return -EINVAL; - } - if ( new < 0 || new > PAGE_SIZE ) - return -EINVAL; - return (file->f_pos = new); + return fixed_size_llseek(file, off, whence, PAGE_SIZE); } static ssize_t page_map_read( struct file *file, char __user *buf, size_t nbytes, diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index ceb4e7b62cf..076d1242507 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -339,6 +339,13 @@ static void set_debug_reg_defaults(struct thread_struct *thread) static void prime_debug_regs(struct thread_struct *thread) { + /* + * We could have inherited MSR_DE from userspace, since + * it doesn't get cleared on exception entry. Make sure + * MSR_DE is clear before we enable any debug events. + */ + mtmsr(mfmsr() & ~MSR_DE); + mtspr(SPRN_IAC1, thread->iac1); mtspr(SPRN_IAC2, thread->iac2); #if CONFIG_PPC_ADV_DEBUG_IACS > 2 @@ -392,7 +399,8 @@ static inline int __set_dabr(unsigned long dabr, unsigned long dabrx) static inline int __set_dabr(unsigned long dabr, unsigned long dabrx) { mtspr(SPRN_DABR, dabr); - mtspr(SPRN_DABRX, dabrx); + if (cpu_has_feature(CPU_FTR_DABRX)) + mtspr(SPRN_DABRX, dabrx); return 0; } #else @@ -971,6 +979,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, * do some house keeping and then return from the fork or clone * system call, using the stack frame created above. */ + ((unsigned long *)sp)[0] = 0; sp -= sizeof(struct pt_regs); kregs = (struct pt_regs *) sp; sp -= STACK_FRAME_OVERHEAD; @@ -1360,7 +1369,7 @@ void show_stack(struct task_struct *tsk, unsigned long *stack) #ifdef CONFIG_PPC64 /* Called with hard IRQs off */ -void __ppc64_runlatch_on(void) +void notrace __ppc64_runlatch_on(void) { struct thread_info *ti = current_thread_info(); unsigned long ctrl; @@ -1373,7 +1382,7 @@ void __ppc64_runlatch_on(void) } /* Called with hard IRQs off */ -void __ppc64_runlatch_off(void) +void notrace __ppc64_runlatch_off(void) { struct thread_info *ti = current_thread_info(); unsigned long ctrl; diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index 3b14d320e69..98c2fc19871 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -32,6 +32,7 @@ #include <trace/syscall.h> #include <linux/hw_breakpoint.h> #include <linux/perf_event.h> +#include <linux/context_tracking.h> #include <asm/uaccess.h> #include <asm/page.h> @@ -1788,6 +1789,8 @@ long do_syscall_trace_enter(struct pt_regs *regs) { long ret = 0; + user_exit(); + secure_computing_strict(regs->gpr[0]); if (test_thread_flag(TIF_SYSCALL_TRACE) && @@ -1832,4 +1835,6 @@ void do_syscall_trace_leave(struct pt_regs *regs) step = test_thread_flag(TIF_SINGLESTEP); if (step || test_thread_flag(TIF_SYSCALL_TRACE)) tracehook_report_syscall_exit(regs, step); + + user_enter(); } diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 1fd6e7b2f39..52add6f3e20 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -19,6 +19,7 @@ #include <linux/init.h> #include <linux/capability.h> #include <linux/delay.h> +#include <linux/cpu.h> #include <linux/smp.h> #include <linux/completion.h> #include <linux/cpumask.h> @@ -807,6 +808,95 @@ static void rtas_percpu_suspend_me(void *info) __rtas_suspend_cpu((struct rtas_suspend_me_data *)info, 1); } +enum rtas_cpu_state { + DOWN, + UP, +}; + +#ifndef CONFIG_SMP +static int rtas_cpu_state_change_mask(enum rtas_cpu_state state, + cpumask_var_t cpus) +{ + if (!cpumask_empty(cpus)) { + cpumask_clear(cpus); + return -EINVAL; + } else + return 0; +} +#else +/* On return cpumask will be altered to indicate CPUs changed. + * CPUs with states changed will be set in the mask, + * CPUs with status unchanged will be unset in the mask. */ +static int rtas_cpu_state_change_mask(enum rtas_cpu_state state, + cpumask_var_t cpus) +{ + int cpu; + int cpuret = 0; + int ret = 0; + + if (cpumask_empty(cpus)) + return 0; + + for_each_cpu(cpu, cpus) { + switch (state) { + case DOWN: + cpuret = cpu_down(cpu); + break; + case UP: + cpuret = cpu_up(cpu); + break; + } + if (cpuret) { + pr_debug("%s: cpu_%s for cpu#%d returned %d.\n", + __func__, + ((state == UP) ? "up" : "down"), + cpu, cpuret); + if (!ret) + ret = cpuret; + if (state == UP) { + /* clear bits for unchanged cpus, return */ + cpumask_shift_right(cpus, cpus, cpu); + cpumask_shift_left(cpus, cpus, cpu); + break; + } else { + /* clear bit for unchanged cpu, continue */ + cpumask_clear_cpu(cpu, cpus); + } + } + } + + return ret; +} +#endif + +int rtas_online_cpus_mask(cpumask_var_t cpus) +{ + int ret; + + ret = rtas_cpu_state_change_mask(UP, cpus); + + if (ret) { + cpumask_var_t tmp_mask; + + if (!alloc_cpumask_var(&tmp_mask, GFP_TEMPORARY)) + return ret; + + /* Use tmp_mask to preserve cpus mask from first failure */ + cpumask_copy(tmp_mask, cpus); + rtas_offline_cpus_mask(tmp_mask); + free_cpumask_var(tmp_mask); + } + + return ret; +} +EXPORT_SYMBOL(rtas_online_cpus_mask); + +int rtas_offline_cpus_mask(cpumask_var_t cpus) +{ + return rtas_cpu_state_change_mask(DOWN, cpus); +} +EXPORT_SYMBOL(rtas_offline_cpus_mask); + int rtas_ibm_suspend_me(struct rtas_args *args) { long state; @@ -814,6 +904,8 @@ int rtas_ibm_suspend_me(struct rtas_args *args) unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; struct rtas_suspend_me_data data; DECLARE_COMPLETION_ONSTACK(done); + cpumask_var_t offline_mask; + int cpuret; if (!rtas_service_present("ibm,suspend-me")) return -ENOSYS; @@ -837,11 +929,24 @@ int rtas_ibm_suspend_me(struct rtas_args *args) return 0; } + if (!alloc_cpumask_var(&offline_mask, GFP_TEMPORARY)) + return -ENOMEM; + atomic_set(&data.working, 0); atomic_set(&data.done, 0); atomic_set(&data.error, 0); data.token = rtas_token("ibm,suspend-me"); data.complete = &done; + + /* All present CPUs must be online */ + cpumask_andnot(offline_mask, cpu_present_mask, cpu_online_mask); + cpuret = rtas_online_cpus_mask(offline_mask); + if (cpuret) { + pr_err("%s: Could not bring present CPUs online.\n", __func__); + atomic_set(&data.error, cpuret); + goto out; + } + stop_topology_update(); /* Call function on all CPUs. One of us will make the @@ -857,6 +962,14 @@ int rtas_ibm_suspend_me(struct rtas_args *args) start_topology_update(); + /* Take down CPUs not online prior to suspend */ + cpuret = rtas_offline_cpus_mask(offline_mask); + if (cpuret) + pr_warn("%s: Could not restore CPUs to offline state.\n", + __func__); + +out: + free_cpumask_var(offline_mask); return atomic_read(&data.error); } #else /* CONFIG_PPC_PSERIES */ diff --git a/arch/powerpc/kernel/rtas_flash.c b/arch/powerpc/kernel/rtas_flash.c index 5b302247012..2f3cdb01506 100644 --- a/arch/powerpc/kernel/rtas_flash.c +++ b/arch/powerpc/kernel/rtas_flash.c @@ -89,6 +89,7 @@ /* Array sizes */ #define VALIDATE_BUF_SIZE 4096 +#define VALIDATE_MSG_LEN 256 #define RTAS_MSG_MAXLEN 64 /* Quirk - RTAS requires 4k list length and block size */ @@ -466,7 +467,7 @@ static void validate_flash(struct rtas_validate_flash_t *args_buf) } static int get_validate_flash_msg(struct rtas_validate_flash_t *args_buf, - char *msg) + char *msg, int msglen) { int n; @@ -474,7 +475,8 @@ static int get_validate_flash_msg(struct rtas_validate_flash_t *args_buf, n = sprintf(msg, "%d\n", args_buf->update_results); if ((args_buf->update_results >= VALIDATE_CUR_UNKNOWN) || (args_buf->update_results == VALIDATE_TMP_UPDATE)) - n += sprintf(msg + n, "%s\n", args_buf->buf); + n += snprintf(msg + n, msglen - n, "%s\n", + args_buf->buf); } else { n = sprintf(msg, "%d\n", args_buf->status); } @@ -486,11 +488,11 @@ static ssize_t validate_flash_read(struct file *file, char __user *buf, { struct rtas_validate_flash_t *const args_buf = &rtas_validate_flash_data; - char msg[RTAS_MSG_MAXLEN]; + char msg[VALIDATE_MSG_LEN]; int msglen; mutex_lock(&rtas_validate_flash_mutex); - msglen = get_validate_flash_msg(args_buf, msg); + msglen = get_validate_flash_msg(args_buf, msg, VALIDATE_MSG_LEN); mutex_unlock(&rtas_validate_flash_mutex); return simple_read_from_buffer(buf, count, ppos, msg, msglen); diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c index cf12eae02de..457e97aa294 100644 --- a/arch/powerpc/kernel/signal.c +++ b/arch/powerpc/kernel/signal.c @@ -13,10 +13,12 @@ #include <linux/signal.h> #include <linux/uprobes.h> #include <linux/key.h> +#include <linux/context_tracking.h> #include <asm/hw_breakpoint.h> #include <asm/uaccess.h> #include <asm/unistd.h> #include <asm/debug.h> +#include <asm/tm.h> #include "signal.h" @@ -24,18 +26,18 @@ * through debug.exception-trace sysctl. */ -int show_unhandled_signals = 0; +int show_unhandled_signals = 1; /* * Allocate space for the signal frame */ -void __user * get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, +void __user * get_sigframe(struct k_sigaction *ka, unsigned long sp, size_t frame_size, int is_32) { unsigned long oldsp, newsp; /* Default to using normal stack */ - oldsp = get_clean_sp(regs, is_32); + oldsp = get_clean_sp(sp, is_32); /* Check for alt stack */ if ((ka->sa.sa_flags & SA_ONSTACK) && @@ -159,6 +161,8 @@ static int do_signal(struct pt_regs *regs) void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags) { + user_exit(); + if (thread_info_flags & _TIF_UPROBE) uprobe_notify_resume(regs); @@ -169,4 +173,41 @@ void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags) clear_thread_flag(TIF_NOTIFY_RESUME); tracehook_notify_resume(regs); } + + user_enter(); +} + +unsigned long get_tm_stackpointer(struct pt_regs *regs) +{ + /* When in an active transaction that takes a signal, we need to be + * careful with the stack. It's possible that the stack has moved back + * up after the tbegin. The obvious case here is when the tbegin is + * called inside a function that returns before a tend. In this case, + * the stack is part of the checkpointed transactional memory state. + * If we write over this non transactionally or in suspend, we are in + * trouble because if we get a tm abort, the program counter and stack + * pointer will be back at the tbegin but our in memory stack won't be + * valid anymore. + * + * To avoid this, when taking a signal in an active transaction, we + * need to use the stack pointer from the checkpointed state, rather + * than the speculated state. This ensures that the signal context + * (written tm suspended) will be written below the stack required for + * the rollback. The transaction is aborted becuase of the treclaim, + * so any memory written between the tbegin and the signal will be + * rolled back anyway. + * + * For signals taken in non-TM or suspended mode, we use the + * normal/non-checkpointed stack pointer. + */ + +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + if (MSR_TM_ACTIVE(regs->msr)) { + tm_enable(); + tm_reclaim(¤t->thread, regs->msr, TM_CAUSE_SIGNAL); + if (MSR_TM_TRANSACTIONAL(regs->msr)) + return current->thread.ckpt_regs.gpr[1]; + } +#endif + return regs->gpr[1]; } diff --git a/arch/powerpc/kernel/signal.h b/arch/powerpc/kernel/signal.h index ec84c901cea..c69b9aeb9f2 100644 --- a/arch/powerpc/kernel/signal.h +++ b/arch/powerpc/kernel/signal.h @@ -12,7 +12,7 @@ extern void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags); -extern void __user * get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, +extern void __user * get_sigframe(struct k_sigaction *ka, unsigned long sp, size_t frame_size, int is_32); extern int handle_signal32(unsigned long sig, struct k_sigaction *ka, diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index 95068bf569a..201385c3a1a 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -503,12 +503,6 @@ static int save_tm_user_regs(struct pt_regs *regs, { unsigned long msr = regs->msr; - /* tm_reclaim rolls back all reg states, updating thread.ckpt_regs, - * thread.transact_fpr[], thread.transact_vr[], etc. - */ - tm_enable(); - tm_reclaim(¤t->thread, msr, TM_CAUSE_SIGNAL); - /* Make sure floating point registers are stored in regs */ flush_fp_to_thread(current); @@ -965,7 +959,7 @@ int handle_rt_signal32(unsigned long sig, struct k_sigaction *ka, /* Set up Signal Frame */ /* Put a Real Time Context onto stack */ - rt_sf = get_sigframe(ka, regs, sizeof(*rt_sf), 1); + rt_sf = get_sigframe(ka, get_tm_stackpointer(regs), sizeof(*rt_sf), 1); addr = rt_sf; if (unlikely(rt_sf == NULL)) goto badframe; @@ -1403,7 +1397,7 @@ int handle_signal32(unsigned long sig, struct k_sigaction *ka, unsigned long tramp; /* Set up Signal Frame */ - frame = get_sigframe(ka, regs, sizeof(*frame), 1); + frame = get_sigframe(ka, get_tm_stackpointer(regs), sizeof(*frame), 1); if (unlikely(frame == NULL)) goto badframe; sc = (struct sigcontext __user *) &frame->sctx; diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index c1794286098..345947367ec 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -154,11 +154,12 @@ static long setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs, * As above, but Transactional Memory is in use, so deliver sigcontexts * containing checkpointed and transactional register states. * - * To do this, we treclaim to gather both sets of registers and set up the - * 'normal' sigcontext registers with rolled-back register values such that a - * simple signal handler sees a correct checkpointed register state. - * If interested, a TM-aware sighandler can examine the transactional registers - * in the 2nd sigcontext to determine the real origin of the signal. + * To do this, we treclaim (done before entering here) to gather both sets of + * registers and set up the 'normal' sigcontext registers with rolled-back + * register values such that a simple signal handler sees a correct + * checkpointed register state. If interested, a TM-aware sighandler can + * examine the transactional registers in the 2nd sigcontext to determine the + * real origin of the signal. */ static long setup_tm_sigcontexts(struct sigcontext __user *sc, struct sigcontext __user *tm_sc, @@ -184,16 +185,6 @@ static long setup_tm_sigcontexts(struct sigcontext __user *sc, BUG_ON(!MSR_TM_ACTIVE(regs->msr)); - /* tm_reclaim rolls back all reg states, saving checkpointed (older) - * GPRs to thread.ckpt_regs and (if used) FPRs to (newer) - * thread.transact_fp and/or VRs to (newer) thread.transact_vr. - * THEN we save out FP/VRs, if necessary, to the checkpointed (older) - * thread.fr[]/vr[]s. The transactional (newer) GPRs are on the - * stack, in *regs. - */ - tm_enable(); - tm_reclaim(¤t->thread, msr, TM_CAUSE_SIGNAL); - flush_fp_to_thread(current); #ifdef CONFIG_ALTIVEC @@ -711,7 +702,7 @@ int handle_rt_signal64(int signr, struct k_sigaction *ka, siginfo_t *info, unsigned long newsp = 0; long err = 0; - frame = get_sigframe(ka, regs, sizeof(*frame), 0); + frame = get_sigframe(ka, get_tm_stackpointer(regs), sizeof(*frame), 0); if (unlikely(frame == NULL)) goto badframe; diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 83efa2f7d92..c0e5caf8ccc 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -35,6 +35,7 @@ #include <linux/kdebug.h> #include <linux/debugfs.h> #include <linux/ratelimit.h> +#include <linux/context_tracking.h> #include <asm/emulated_ops.h> #include <asm/pgtable.h> @@ -52,6 +53,7 @@ #ifdef CONFIG_PPC64 #include <asm/firmware.h> #include <asm/processor.h> +#include <asm/tm.h> #endif #include <asm/kexec.h> #include <asm/ppc-opcode.h> @@ -667,6 +669,7 @@ int machine_check_generic(struct pt_regs *regs) void machine_check_exception(struct pt_regs *regs) { + enum ctx_state prev_state = exception_enter(); int recover = 0; __get_cpu_var(irq_stat).mce_exceptions++; @@ -683,7 +686,7 @@ void machine_check_exception(struct pt_regs *regs) recover = cur_cpu_spec->machine_check(regs); if (recover > 0) - return; + goto bail; #if defined(CONFIG_8xx) && defined(CONFIG_PCI) /* the qspan pci read routines can cause machine checks -- Cort @@ -693,20 +696,23 @@ void machine_check_exception(struct pt_regs *regs) * -- BenH */ bad_page_fault(regs, regs->dar, SIGBUS); - return; + goto bail; #endif if (debugger_fault_handler(regs)) - return; + goto bail; if (check_io_access(regs)) - return; + goto bail; die("Machine check", regs, SIGBUS); /* Must die if the interrupt is not recoverable */ if (!(regs->msr & MSR_RI)) panic("Unrecoverable Machine check"); + +bail: + exception_exit(prev_state); } void SMIException(struct pt_regs *regs) @@ -716,20 +722,29 @@ void SMIException(struct pt_regs *regs) void unknown_exception(struct pt_regs *regs) { + enum ctx_state prev_state = exception_enter(); + printk("Bad trap at PC: %lx, SR: %lx, vector=%lx\n", regs->nip, regs->msr, regs->trap); _exception(SIGTRAP, regs, 0, 0); + + exception_exit(prev_state); } void instruction_breakpoint_exception(struct pt_regs *regs) { + enum ctx_state prev_state = exception_enter(); + if (notify_die(DIE_IABR_MATCH, "iabr_match", regs, 5, 5, SIGTRAP) == NOTIFY_STOP) - return; + goto bail; if (debugger_iabr_match(regs)) - return; + goto bail; _exception(SIGTRAP, regs, TRAP_BRKPT, regs->nip); + +bail: + exception_exit(prev_state); } void RunModeException(struct pt_regs *regs) @@ -739,15 +754,20 @@ void RunModeException(struct pt_regs *regs) void __kprobes single_step_exception(struct pt_regs *regs) { + enum ctx_state prev_state = exception_enter(); + clear_single_step(regs); if (notify_die(DIE_SSTEP, "single_step", regs, 5, 5, SIGTRAP) == NOTIFY_STOP) - return; + goto bail; if (debugger_sstep(regs)) - return; + goto bail; _exception(SIGTRAP, regs, TRAP_TRACE, regs->nip); + +bail: + exception_exit(prev_state); } /* @@ -913,6 +933,28 @@ static int emulate_isel(struct pt_regs *regs, u32 instword) return 0; } +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +static inline bool tm_abort_check(struct pt_regs *regs, int cause) +{ + /* If we're emulating a load/store in an active transaction, we cannot + * emulate it as the kernel operates in transaction suspended context. + * We need to abort the transaction. This creates a persistent TM + * abort so tell the user what caused it with a new code. + */ + if (MSR_TM_TRANSACTIONAL(regs->msr)) { + tm_enable(); + tm_abort(cause); + return true; + } + return false; +} +#else +static inline bool tm_abort_check(struct pt_regs *regs, int reason) +{ + return false; +} +#endif + static int emulate_instruction(struct pt_regs *regs) { u32 instword; @@ -952,6 +994,9 @@ static int emulate_instruction(struct pt_regs *regs) /* Emulate load/store string insn. */ if ((instword & PPC_INST_STRING_GEN_MASK) == PPC_INST_STRING) { + if (tm_abort_check(regs, + TM_CAUSE_EMULATE | TM_CAUSE_PERSISTENT)) + return -EINVAL; PPC_WARN_EMULATED(string, regs); return emulate_string_inst(regs, instword); } @@ -1005,6 +1050,7 @@ int is_valid_bugaddr(unsigned long addr) void __kprobes program_check_exception(struct pt_regs *regs) { + enum ctx_state prev_state = exception_enter(); unsigned int reason = get_reason(regs); extern int do_mathemu(struct pt_regs *regs); @@ -1014,26 +1060,26 @@ void __kprobes program_check_exception(struct pt_regs *regs) if (reason & REASON_FP) { /* IEEE FP exception */ parse_fpe(regs); - return; + goto bail; } if (reason & REASON_TRAP) { /* Debugger is first in line to stop recursive faults in * rcu_lock, notify_die, or atomic_notifier_call_chain */ if (debugger_bpt(regs)) - return; + goto bail; /* trap exception */ if (notify_die(DIE_BPT, "breakpoint", regs, 5, 5, SIGTRAP) == NOTIFY_STOP) - return; + goto bail; if (!(regs->msr & MSR_PR) && /* not user-mode */ report_bug(regs->nip, regs) == BUG_TRAP_TYPE_WARN) { regs->nip += 4; - return; + goto bail; } _exception(SIGTRAP, regs, TRAP_BRKPT, regs->nip); - return; + goto bail; } #ifdef CONFIG_PPC_TRANSACTIONAL_MEM if (reason & REASON_TM) { @@ -1049,7 +1095,7 @@ void __kprobes program_check_exception(struct pt_regs *regs) if (!user_mode(regs) && report_bug(regs->nip, regs) == BUG_TRAP_TYPE_WARN) { regs->nip += 4; - return; + goto bail; } /* If usermode caused this, it's done something illegal and * gets a SIGILL slap on the wrist. We call it an illegal @@ -1059,7 +1105,7 @@ void __kprobes program_check_exception(struct pt_regs *regs) */ if (user_mode(regs)) { _exception(SIGILL, regs, ILL_ILLOPN, regs->nip); - return; + goto bail; } else { printk(KERN_EMERG "Unexpected TM Bad Thing exception " "at %lx (msr 0x%x)\n", regs->nip, reason); @@ -1083,16 +1129,16 @@ void __kprobes program_check_exception(struct pt_regs *regs) switch (do_mathemu(regs)) { case 0: emulate_single_step(regs); - return; + goto bail; case 1: { int code = 0; code = __parse_fpscr(current->thread.fpscr.val); _exception(SIGFPE, regs, code, regs->nip); - return; + goto bail; } case -EFAULT: _exception(SIGSEGV, regs, SEGV_MAPERR, regs->nip); - return; + goto bail; } /* fall through on any other errors */ #endif /* CONFIG_MATH_EMULATION */ @@ -1103,10 +1149,10 @@ void __kprobes program_check_exception(struct pt_regs *regs) case 0: regs->nip += 4; emulate_single_step(regs); - return; + goto bail; case -EFAULT: _exception(SIGSEGV, regs, SEGV_MAPERR, regs->nip); - return; + goto bail; } } @@ -1114,16 +1160,33 @@ void __kprobes program_check_exception(struct pt_regs *regs) _exception(SIGILL, regs, ILL_PRVOPC, regs->nip); else _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); + +bail: + exception_exit(prev_state); +} + +/* + * This occurs when running in hypervisor mode on POWER6 or later + * and an illegal instruction is encountered. + */ +void __kprobes emulation_assist_interrupt(struct pt_regs *regs) +{ + regs->msr |= REASON_ILLEGAL; + program_check_exception(regs); } void alignment_exception(struct pt_regs *regs) { + enum ctx_state prev_state = exception_enter(); int sig, code, fixed = 0; /* We restore the interrupt state now */ if (!arch_irq_disabled_regs(regs)) local_irq_enable(); + if (tm_abort_check(regs, TM_CAUSE_ALIGNMENT | TM_CAUSE_PERSISTENT)) + goto bail; + /* we don't implement logging of alignment exceptions */ if (!(current->thread.align_ctl & PR_UNALIGN_SIGBUS)) fixed = fix_alignment(regs); @@ -1131,7 +1194,7 @@ void alignment_exception(struct pt_regs *regs) if (fixed == 1) { regs->nip += 4; /* skip over emulated instruction */ emulate_single_step(regs); - return; + goto bail; } /* Operand address was bad */ @@ -1146,6 +1209,9 @@ void alignment_exception(struct pt_regs *regs) _exception(sig, regs, code, regs->dar); else bad_page_fault(regs, regs->dar, sig); + +bail: + exception_exit(prev_state); } void StackOverflow(struct pt_regs *regs) @@ -1174,23 +1240,32 @@ void trace_syscall(struct pt_regs *regs) void kernel_fp_unavailable_exception(struct pt_regs *regs) { + enum ctx_state prev_state = exception_enter(); + printk(KERN_EMERG "Unrecoverable FP Unavailable Exception " "%lx at %lx\n", regs->trap, regs->nip); die("Unrecoverable FP Unavailable Exception", regs, SIGABRT); + + exception_exit(prev_state); } void altivec_unavailable_exception(struct pt_regs *regs) { + enum ctx_state prev_state = exception_enter(); + if (user_mode(regs)) { /* A user program has executed an altivec instruction, but this kernel doesn't support altivec. */ _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); - return; + goto bail; } printk(KERN_EMERG "Unrecoverable VMX/Altivec Unavailable Exception " "%lx at %lx\n", regs->trap, regs->nip); die("Unrecoverable VMX/Altivec Unavailable Exception", regs, SIGABRT); + +bail: + exception_exit(prev_state); } void vsx_unavailable_exception(struct pt_regs *regs) diff --git a/arch/powerpc/kernel/udbg.c b/arch/powerpc/kernel/udbg.c index 13b86709349..9d3fdcd6629 100644 --- a/arch/powerpc/kernel/udbg.c +++ b/arch/powerpc/kernel/udbg.c @@ -64,6 +64,9 @@ void __init udbg_early_init(void) udbg_init_usbgecko(); #elif defined(CONFIG_PPC_EARLY_DEBUG_WSP) udbg_init_wsp(); +#elif defined(CONFIG_PPC_EARLY_DEBUG_MEMCONS) + /* In memory console */ + udbg_init_memcons(); #elif defined(CONFIG_PPC_EARLY_DEBUG_EHV_BC) udbg_init_ehv_bc(); #elif defined(CONFIG_PPC_EARLY_DEBUG_PS3GELIC) diff --git a/arch/powerpc/kvm/44x_tlb.c b/arch/powerpc/kvm/44x_tlb.c index 5dd3ab46997..ed038544814 100644 --- a/arch/powerpc/kvm/44x_tlb.c +++ b/arch/powerpc/kvm/44x_tlb.c @@ -441,6 +441,7 @@ int kvmppc_44x_emul_tlbwe(struct kvm_vcpu *vcpu, u8 ra, u8 rs, u8 ws) struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); struct kvmppc_44x_tlbe *tlbe; unsigned int gtlb_index; + int idx; gtlb_index = kvmppc_get_gpr(vcpu, ra); if (gtlb_index >= KVM44x_GUEST_TLB_SIZE) { @@ -473,6 +474,8 @@ int kvmppc_44x_emul_tlbwe(struct kvm_vcpu *vcpu, u8 ra, u8 rs, u8 ws) return EMULATE_FAIL; } + idx = srcu_read_lock(&vcpu->kvm->srcu); + if (tlbe_is_host_safe(vcpu, tlbe)) { gva_t eaddr; gpa_t gpaddr; @@ -489,6 +492,8 @@ int kvmppc_44x_emul_tlbwe(struct kvm_vcpu *vcpu, u8 ra, u8 rs, u8 ws) kvmppc_mmu_map(vcpu, eaddr, gpaddr, gtlb_index); } + srcu_read_unlock(&vcpu->kvm->srcu, idx); + trace_kvm_gtlb_write(gtlb_index, tlbe->tid, tlbe->word0, tlbe->word1, tlbe->word2); diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile index 422de3f4d46..008cd856c5b 100644 --- a/arch/powerpc/kvm/Makefile +++ b/arch/powerpc/kvm/Makefile @@ -5,9 +5,10 @@ subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror ccflags-y := -Ivirt/kvm -Iarch/powerpc/kvm +KVM := ../../../virt/kvm -common-objs-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o \ - eventfd.o) +common-objs-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \ + $(KVM)/eventfd.o CFLAGS_44x_tlb.o := -I. CFLAGS_e500_mmu.o := -I. @@ -53,7 +54,7 @@ kvm-e500mc-objs := \ kvm-objs-$(CONFIG_KVM_E500MC) := $(kvm-e500mc-objs) kvm-book3s_64-objs-$(CONFIG_KVM_BOOK3S_64_PR) := \ - ../../../virt/kvm/coalesced_mmio.o \ + $(KVM)/coalesced_mmio.o \ fpu.o \ book3s_paired_singles.o \ book3s_pr.o \ @@ -86,8 +87,8 @@ kvm-book3s_64-objs-$(CONFIG_KVM_XICS) += \ book3s_xics.o kvm-book3s_64-module-objs := \ - ../../../virt/kvm/kvm_main.o \ - ../../../virt/kvm/eventfd.o \ + $(KVM)/kvm_main.o \ + $(KVM)/eventfd.o \ powerpc.o \ emulate.o \ book3s.o \ @@ -111,7 +112,7 @@ kvm-book3s_32-objs := \ kvm-objs-$(CONFIG_KVM_BOOK3S_32) := $(kvm-book3s_32-objs) kvm-objs-$(CONFIG_KVM_MPIC) += mpic.o -kvm-objs-$(CONFIG_HAVE_KVM_IRQ_ROUTING) += $(addprefix ../../../virt/kvm/, irqchip.o) +kvm-objs-$(CONFIG_HAVE_KVM_IRQ_ROUTING) += $(KVM)/irqchip.o kvm-objs := $(kvm-objs-m) $(kvm-objs-y) diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c index b871721c005..739bfbadb85 100644 --- a/arch/powerpc/kvm/book3s_64_mmu.c +++ b/arch/powerpc/kvm/book3s_64_mmu.c @@ -26,6 +26,7 @@ #include <asm/tlbflush.h> #include <asm/kvm_ppc.h> #include <asm/kvm_book3s.h> +#include <asm/mmu-hash64.h> /* #define DEBUG_MMU */ @@ -76,6 +77,24 @@ static struct kvmppc_slb *kvmppc_mmu_book3s_64_find_slbe( return NULL; } +static int kvmppc_slb_sid_shift(struct kvmppc_slb *slbe) +{ + return slbe->tb ? SID_SHIFT_1T : SID_SHIFT; +} + +static u64 kvmppc_slb_offset_mask(struct kvmppc_slb *slbe) +{ + return (1ul << kvmppc_slb_sid_shift(slbe)) - 1; +} + +static u64 kvmppc_slb_calc_vpn(struct kvmppc_slb *slb, gva_t eaddr) +{ + eaddr &= kvmppc_slb_offset_mask(slb); + + return (eaddr >> VPN_SHIFT) | + ((slb->vsid) << (kvmppc_slb_sid_shift(slb) - VPN_SHIFT)); +} + static u64 kvmppc_mmu_book3s_64_ea_to_vp(struct kvm_vcpu *vcpu, gva_t eaddr, bool data) { @@ -85,11 +104,7 @@ static u64 kvmppc_mmu_book3s_64_ea_to_vp(struct kvm_vcpu *vcpu, gva_t eaddr, if (!slb) return 0; - if (slb->tb) - return (((u64)eaddr >> 12) & 0xfffffff) | - (((u64)slb->vsid) << 28); - - return (((u64)eaddr >> 12) & 0xffff) | (((u64)slb->vsid) << 16); + return kvmppc_slb_calc_vpn(slb, eaddr); } static int kvmppc_mmu_book3s_64_get_pagesize(struct kvmppc_slb *slbe) @@ -100,7 +115,8 @@ static int kvmppc_mmu_book3s_64_get_pagesize(struct kvmppc_slb *slbe) static u32 kvmppc_mmu_book3s_64_get_page(struct kvmppc_slb *slbe, gva_t eaddr) { int p = kvmppc_mmu_book3s_64_get_pagesize(slbe); - return ((eaddr & 0xfffffff) >> p); + + return ((eaddr & kvmppc_slb_offset_mask(slbe)) >> p); } static hva_t kvmppc_mmu_book3s_64_get_pteg( @@ -109,13 +125,15 @@ static hva_t kvmppc_mmu_book3s_64_get_pteg( bool second) { u64 hash, pteg, htabsize; - u32 page; + u32 ssize; hva_t r; + u64 vpn; - page = kvmppc_mmu_book3s_64_get_page(slbe, eaddr); htabsize = ((1 << ((vcpu_book3s->sdr1 & 0x1f) + 11)) - 1); - hash = slbe->vsid ^ page; + vpn = kvmppc_slb_calc_vpn(slbe, eaddr); + ssize = slbe->tb ? MMU_SEGSIZE_1T : MMU_SEGSIZE_256M; + hash = hpt_hash(vpn, kvmppc_mmu_book3s_64_get_pagesize(slbe), ssize); if (second) hash = ~hash; hash &= ((1ULL << 39ULL) - 1ULL); @@ -146,7 +164,7 @@ static u64 kvmppc_mmu_book3s_64_get_avpn(struct kvmppc_slb *slbe, gva_t eaddr) u64 avpn; avpn = kvmppc_mmu_book3s_64_get_page(slbe, eaddr); - avpn |= slbe->vsid << (28 - p); + avpn |= slbe->vsid << (kvmppc_slb_sid_shift(slbe) - p); if (p < 24) avpn >>= ((80 - p) - 56) - 8; @@ -167,7 +185,6 @@ static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, int i; u8 key = 0; bool found = false; - bool perm_err = false; int second = 0; ulong mp_ea = vcpu->arch.magic_page_ea; @@ -190,13 +207,15 @@ static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, if (!slbe) goto no_seg_found; + avpn = kvmppc_mmu_book3s_64_get_avpn(slbe, eaddr); + if (slbe->tb) + avpn |= SLB_VSID_B_1T; + do_second: ptegp = kvmppc_mmu_book3s_64_get_pteg(vcpu_book3s, slbe, eaddr, second); if (kvm_is_error_hva(ptegp)) goto no_page_found; - avpn = kvmppc_mmu_book3s_64_get_avpn(slbe, eaddr); - if(copy_from_user(pteg, (void __user *)ptegp, sizeof(pteg))) { printk(KERN_ERR "KVM can't copy data from 0x%lx!\n", ptegp); goto no_page_found; @@ -219,7 +238,7 @@ do_second: continue; /* AVPN compare */ - if (HPTE_V_AVPN_VAL(avpn) == HPTE_V_AVPN_VAL(v)) { + if (HPTE_V_COMPARE(avpn, v)) { u8 pp = (r & HPTE_R_PP) | key; int eaddr_mask = 0xFFF; @@ -248,11 +267,6 @@ do_second: break; } - if (!gpte->may_read) { - perm_err = true; - continue; - } - dprintk("KVM MMU: Translated 0x%lx [0x%llx] -> 0x%llx " "-> 0x%lx\n", eaddr, avpn, gpte->vpage, gpte->raddr); @@ -281,6 +295,8 @@ do_second: if (pteg[i+1] != oldr) copy_to_user((void __user *)ptegp, pteg, sizeof(pteg)); + if (!gpte->may_read) + return -EPERM; return 0; } else { dprintk("KVM MMU: No PTE found (ea=0x%lx sdr1=0x%llx " @@ -296,13 +312,7 @@ do_second: } } - no_page_found: - - - if (perm_err) - return -EPERM; - return -ENOENT; no_seg_found: @@ -334,7 +344,7 @@ static void kvmppc_mmu_book3s_64_slbmte(struct kvm_vcpu *vcpu, u64 rs, u64 rb) slbe->large = (rs & SLB_VSID_L) ? 1 : 0; slbe->tb = (rs & SLB_VSID_B_1T) ? 1 : 0; slbe->esid = slbe->tb ? esid_1t : esid; - slbe->vsid = rs >> 12; + slbe->vsid = (rs & ~SLB_VSID_B) >> (kvmppc_slb_sid_shift(slbe) - 16); slbe->valid = (rb & SLB_ESID_V) ? 1 : 0; slbe->Ks = (rs & SLB_VSID_KS) ? 1 : 0; slbe->Kp = (rs & SLB_VSID_KP) ? 1 : 0; @@ -375,6 +385,7 @@ static u64 kvmppc_mmu_book3s_64_slbmfev(struct kvm_vcpu *vcpu, u64 slb_nr) static void kvmppc_mmu_book3s_64_slbie(struct kvm_vcpu *vcpu, u64 ea) { struct kvmppc_slb *slbe; + u64 seg_size; dprintk("KVM MMU: slbie(0x%llx)\n", ea); @@ -386,8 +397,11 @@ static void kvmppc_mmu_book3s_64_slbie(struct kvm_vcpu *vcpu, u64 ea) dprintk("KVM MMU: slbie(0x%llx, 0x%llx)\n", ea, slbe->esid); slbe->valid = false; + slbe->orige = 0; + slbe->origv = 0; - kvmppc_mmu_map_segment(vcpu, ea); + seg_size = 1ull << kvmppc_slb_sid_shift(slbe); + kvmppc_mmu_flush_segment(vcpu, ea & ~(seg_size - 1), seg_size); } static void kvmppc_mmu_book3s_64_slbia(struct kvm_vcpu *vcpu) @@ -396,8 +410,11 @@ static void kvmppc_mmu_book3s_64_slbia(struct kvm_vcpu *vcpu) dprintk("KVM MMU: slbia()\n"); - for (i = 1; i < vcpu->arch.slb_nr; i++) + for (i = 1; i < vcpu->arch.slb_nr; i++) { vcpu->arch.slb[i].valid = false; + vcpu->arch.slb[i].orige = 0; + vcpu->arch.slb[i].origv = 0; + } if (vcpu->arch.shared->msr & MSR_IR) { kvmppc_mmu_flush_segments(vcpu); @@ -467,8 +484,14 @@ static int kvmppc_mmu_book3s_64_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid, if (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) { slb = kvmppc_mmu_book3s_64_find_slbe(vcpu, ea); - if (slb) + if (slb) { gvsid = slb->vsid; + if (slb->tb) { + gvsid <<= SID_SHIFT_1T - SID_SHIFT; + gvsid |= esid & ((1ul << (SID_SHIFT_1T - SID_SHIFT)) - 1); + gvsid |= VSID_1T; + } + } } switch (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) { diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c index 3a9a1aceb14..b350d9494b2 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_host.c +++ b/arch/powerpc/kvm/book3s_64_mmu_host.c @@ -301,6 +301,23 @@ out: return r; } +void kvmppc_mmu_flush_segment(struct kvm_vcpu *vcpu, ulong ea, ulong seg_size) +{ + struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); + ulong seg_mask = -seg_size; + int i; + + for (i = 1; i < svcpu->slb_max; i++) { + if ((svcpu->slb[i].esid & SLB_ESID_V) && + (svcpu->slb[i].esid & seg_mask) == ea) { + /* Invalidate this entry */ + svcpu->slb[i].esid = 0; + } + } + + svcpu_put(svcpu); +} + void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu) { struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); @@ -325,9 +342,9 @@ int kvmppc_mmu_init(struct kvm_vcpu *vcpu) return -1; vcpu3s->context_id[0] = err; - vcpu3s->proto_vsid_max = ((vcpu3s->context_id[0] + 1) + vcpu3s->proto_vsid_max = ((u64)(vcpu3s->context_id[0] + 1) << ESID_BITS) - 1; - vcpu3s->proto_vsid_first = vcpu3s->context_id[0] << ESID_BITS; + vcpu3s->proto_vsid_first = (u64)vcpu3s->context_id[0] << ESID_BITS; vcpu3s->proto_vsid_next = vcpu3s->proto_vsid_first; kvmppc_mmu_hpte_init(vcpu); diff --git a/arch/powerpc/kvm/book3s_64_slb.S b/arch/powerpc/kvm/book3s_64_slb.S index 56b983e7b73..4f0caecc0f9 100644 --- a/arch/powerpc/kvm/book3s_64_slb.S +++ b/arch/powerpc/kvm/book3s_64_slb.S @@ -66,10 +66,6 @@ slb_exit_skip_ ## num: ld r12, PACA_SLBSHADOWPTR(r13) - /* Save off the first entry so we can slbie it later */ - ld r10, SHADOW_SLB_ESID(0)(r12) - ld r11, SHADOW_SLB_VSID(0)(r12) - /* Remove bolted entries */ UNBOLT_SLB_ENTRY(0) UNBOLT_SLB_ENTRY(1) @@ -81,15 +77,10 @@ slb_exit_skip_ ## num: /* Flush SLB */ + li r10, 0 + slbmte r10, r10 slbia - /* r0 = esid & ESID_MASK */ - rldicr r10, r10, 0, 35 - /* r0 |= CLASS_BIT(VSID) */ - rldic r12, r11, 56 - 36, 36 - or r10, r10, r12 - slbie r10 - /* Fill SLB with our shadow */ lbz r12, SVCPU_SLB_MAX(r3) diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 9de24f8e03c..2efa9dde741 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -562,6 +562,8 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) case H_CPPR: case H_EOI: case H_IPI: + case H_IPOLL: + case H_XIRR_X: if (kvmppc_xics_enabled(vcpu)) { ret = kvmppc_xics_hcall(vcpu, req); break; @@ -1862,7 +1864,7 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu) up_out: up_read(¤t->mm->mmap_sem); - goto out; + goto out_srcu; } int kvmppc_core_init_vm(struct kvm *kvm) diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index bdc40b8e77d..19498a567a8 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -1239,8 +1239,7 @@ out: #ifdef CONFIG_PPC64 int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm, struct kvm_ppc_smmu_info *info) { - /* No flags */ - info->flags = 0; + info->flags = KVM_PPC_1T_SEGMENTS; /* SLB is always 64 entries */ info->slb_size = 64; diff --git a/arch/powerpc/kvm/book3s_pr_papr.c b/arch/powerpc/kvm/book3s_pr_papr.c index b24309c6c2d..da0e0bc268b 100644 --- a/arch/powerpc/kvm/book3s_pr_papr.c +++ b/arch/powerpc/kvm/book3s_pr_papr.c @@ -257,6 +257,8 @@ int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd) case H_CPPR: case H_EOI: case H_IPI: + case H_IPOLL: + case H_XIRR_X: if (kvmppc_xics_enabled(vcpu)) return kvmppc_h_pr_xics_hcall(vcpu, cmd); break; diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c index f7a10375661..94c1dd46b83 100644 --- a/arch/powerpc/kvm/book3s_xics.c +++ b/arch/powerpc/kvm/book3s_xics.c @@ -650,6 +650,23 @@ static noinline int kvmppc_h_ipi(struct kvm_vcpu *vcpu, unsigned long server, return H_SUCCESS; } +static int kvmppc_h_ipoll(struct kvm_vcpu *vcpu, unsigned long server) +{ + union kvmppc_icp_state state; + struct kvmppc_icp *icp; + + icp = vcpu->arch.icp; + if (icp->server_num != server) { + icp = kvmppc_xics_find_server(vcpu->kvm, server); + if (!icp) + return H_PARAMETER; + } + state = ACCESS_ONCE(icp->state); + kvmppc_set_gpr(vcpu, 4, ((u32)state.cppr << 24) | state.xisr); + kvmppc_set_gpr(vcpu, 5, state.mfrr); + return H_SUCCESS; +} + static noinline void kvmppc_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr) { union kvmppc_icp_state old_state, new_state; @@ -787,6 +804,18 @@ int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 req) if (!xics || !vcpu->arch.icp) return H_HARDWARE; + /* These requests don't have real-mode implementations at present */ + switch (req) { + case H_XIRR_X: + res = kvmppc_h_xirr(vcpu); + kvmppc_set_gpr(vcpu, 4, res); + kvmppc_set_gpr(vcpu, 5, get_tb()); + return rc; + case H_IPOLL: + rc = kvmppc_h_ipoll(vcpu, kvmppc_get_gpr(vcpu, 4)); + return rc; + } + /* Check for real mode returning too hard */ if (xics->real_mode) return kvmppc_xics_rm_complete(vcpu, req); diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 1020119226d..dcc94f01600 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -673,7 +673,6 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) ret = s; goto out; } - kvmppc_lazy_ee_enable(); kvm_guest_enter(); @@ -699,6 +698,8 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) kvmppc_load_guest_fp(vcpu); #endif + kvmppc_lazy_ee_enable(); + ret = __kvmppc_vcpu_run(kvm_run, vcpu); /* No need for kvm_guest_exit. It's done in handle_exit. @@ -795,7 +796,7 @@ static void kvmppc_restart_interrupt(struct kvm_vcpu *vcpu, kvmppc_fill_pt_regs(®s); timer_interrupt(®s); break; -#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_BOOK3E_64) +#if defined(CONFIG_PPC_DOORBELL) case BOOKE_INTERRUPT_DOORBELL: kvmppc_fill_pt_regs(®s); doorbell_exception(®s); @@ -832,6 +833,18 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, { int r = RESUME_HOST; int s; + int idx; + +#ifdef CONFIG_PPC64 + WARN_ON(local_paca->irq_happened != 0); +#endif + + /* + * We enter with interrupts disabled in hardware, but + * we need to call hard_irq_disable anyway to ensure that + * the software state is kept in sync. + */ + hard_irq_disable(); /* update before a new last_exit_type is rewritten */ kvmppc_update_timing_stats(vcpu); @@ -1053,6 +1066,8 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, break; } + idx = srcu_read_lock(&vcpu->kvm->srcu); + gpaddr = kvmppc_mmu_xlate(vcpu, gtlb_index, eaddr); gfn = gpaddr >> PAGE_SHIFT; @@ -1075,6 +1090,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, kvmppc_account_exit(vcpu, MMIO_EXITS); } + srcu_read_unlock(&vcpu->kvm->srcu, idx); break; } @@ -1098,6 +1114,8 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, kvmppc_account_exit(vcpu, ITLB_VIRT_MISS_EXITS); + idx = srcu_read_lock(&vcpu->kvm->srcu); + gpaddr = kvmppc_mmu_xlate(vcpu, gtlb_index, eaddr); gfn = gpaddr >> PAGE_SHIFT; @@ -1114,6 +1132,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_MACHINE_CHECK); } + srcu_read_unlock(&vcpu->kvm->srcu, idx); break; } diff --git a/arch/powerpc/kvm/e500_mmu.c b/arch/powerpc/kvm/e500_mmu.c index c41a5a96b55..6d6f153b6c1 100644 --- a/arch/powerpc/kvm/e500_mmu.c +++ b/arch/powerpc/kvm/e500_mmu.c @@ -396,6 +396,7 @@ int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu) struct kvm_book3e_206_tlb_entry *gtlbe; int tlbsel, esel; int recal = 0; + int idx; tlbsel = get_tlb_tlbsel(vcpu); esel = get_tlb_esel(vcpu, tlbsel); @@ -430,6 +431,8 @@ int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu) kvmppc_set_tlb1map_range(vcpu, gtlbe); } + idx = srcu_read_lock(&vcpu->kvm->srcu); + /* Invalidate shadow mappings for the about-to-be-clobbered TLBE. */ if (tlbe_is_host_safe(vcpu, gtlbe)) { u64 eaddr = get_tlb_eaddr(gtlbe); @@ -444,6 +447,8 @@ int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu) kvmppc_mmu_map(vcpu, eaddr, raddr, index_of(tlbsel, esel)); } + srcu_read_unlock(&vcpu->kvm->srcu, idx); + kvmppc_set_exit_type(vcpu, EMULATED_TLBWE_EXITS); return EMULATE_DONE; } diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c index 753cc99eff2..19c8379575f 100644 --- a/arch/powerpc/kvm/e500mc.c +++ b/arch/powerpc/kvm/e500mc.c @@ -177,8 +177,6 @@ int kvmppc_core_check_processor_compat(void) r = 0; else if (strcmp(cur_cpu_spec->cpu_name, "e5500") == 0) r = 0; - else if (strcmp(cur_cpu_spec->cpu_name, "e6500") == 0) - r = 0; else r = -ENOTSUPP; diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c index 631a2650e4e..2c52ada3077 100644 --- a/arch/powerpc/kvm/emulate.c +++ b/arch/powerpc/kvm/emulate.c @@ -169,6 +169,9 @@ static int kvmppc_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) vcpu->arch.shared->sprg3 = spr_val; break; + /* PIR can legally be written, but we ignore it */ + case SPRN_PIR: break; + default: emulated = kvmppc_core_emulate_mtspr(vcpu, sprn, spr_val); diff --git a/arch/powerpc/lib/copypage_power7.S b/arch/powerpc/lib/copypage_power7.S index 0ef75bf0695..395c594722a 100644 --- a/arch/powerpc/lib/copypage_power7.S +++ b/arch/powerpc/lib/copypage_power7.S @@ -28,13 +28,14 @@ _GLOBAL(copypage_power7) * aligned we don't need to clear the bottom 7 bits of either * address. */ - ori r9,r3,1 /* stream=1 */ + ori r9,r3,1 /* stream=1 => to */ #ifdef CONFIG_PPC_64K_PAGES - lis r7,0x0E01 /* depth=7, units=512 */ + lis r7,0x0E01 /* depth=7 + * units/cachelines=512 */ #else lis r7,0x0E00 /* depth=7 */ - ori r7,r7,0x1000 /* units=32 */ + ori r7,r7,0x1000 /* units/cachelines=32 */ #endif ori r10,r7,1 /* stream=1 */ @@ -43,12 +44,14 @@ _GLOBAL(copypage_power7) .machine push .machine "power4" - dcbt r0,r4,0b01000 - dcbt r0,r7,0b01010 - dcbtst r0,r9,0b01000 - dcbtst r0,r10,0b01010 + /* setup read stream 0 */ + dcbt r0,r4,0b01000 /* addr from */ + dcbt r0,r7,0b01010 /* length and depth from */ + /* setup write stream 1 */ + dcbtst r0,r9,0b01000 /* addr to */ + dcbtst r0,r10,0b01010 /* length and depth to */ eieio - dcbt r0,r8,0b01010 /* GO */ + dcbt r0,r8,0b01010 /* all streams GO */ .machine pop #ifdef CONFIG_ALTIVEC diff --git a/arch/powerpc/lib/copyuser_power7.S b/arch/powerpc/lib/copyuser_power7.S index 0d24ff15f5f..d1f11795a7a 100644 --- a/arch/powerpc/lib/copyuser_power7.S +++ b/arch/powerpc/lib/copyuser_power7.S @@ -318,12 +318,14 @@ err1; stb r0,0(r3) .machine push .machine "power4" - dcbt r0,r6,0b01000 - dcbt r0,r7,0b01010 - dcbtst r0,r9,0b01000 - dcbtst r0,r10,0b01010 + /* setup read stream 0 */ + dcbt r0,r6,0b01000 /* addr from */ + dcbt r0,r7,0b01010 /* length and depth from */ + /* setup write stream 1 */ + dcbtst r0,r9,0b01000 /* addr to */ + dcbtst r0,r10,0b01010 /* length and depth to */ eieio - dcbt r0,r8,0b01010 /* GO */ + dcbt r0,r8,0b01010 /* all streams GO */ .machine pop beq cr1,.Lunwind_stack_nonvmx_copy diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 229951ffc35..8726779e140 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -32,6 +32,7 @@ #include <linux/perf_event.h> #include <linux/magic.h> #include <linux/ratelimit.h> +#include <linux/context_tracking.h> #include <asm/firmware.h> #include <asm/page.h> @@ -196,6 +197,7 @@ static int mm_fault_error(struct pt_regs *regs, unsigned long addr, int fault) int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address, unsigned long error_code) { + enum ctx_state prev_state = exception_enter(); struct vm_area_struct * vma; struct mm_struct *mm = current->mm; unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; @@ -204,6 +206,7 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address, int trap = TRAP(regs); int is_exec = trap == 0x400; int fault; + int rc = 0; #if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE)) /* @@ -230,28 +233,30 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address, * look at it */ if (error_code & ICSWX_DSI_UCT) { - int rc = acop_handle_fault(regs, address, error_code); + rc = acop_handle_fault(regs, address, error_code); if (rc) - return rc; + goto bail; } #endif /* CONFIG_PPC_ICSWX */ if (notify_page_fault(regs)) - return 0; + goto bail; if (unlikely(debugger_fault_handler(regs))) - return 0; + goto bail; /* On a kernel SLB miss we can only check for a valid exception entry */ - if (!user_mode(regs) && (address >= TASK_SIZE)) - return SIGSEGV; + if (!user_mode(regs) && (address >= TASK_SIZE)) { + rc = SIGSEGV; + goto bail; + } #if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE) || \ defined(CONFIG_PPC_BOOK3S_64)) if (error_code & DSISR_DABRMATCH) { /* breakpoint match */ do_break(regs, address, error_code); - return 0; + goto bail; } #endif @@ -260,8 +265,10 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address, local_irq_enable(); if (in_atomic() || mm == NULL) { - if (!user_mode(regs)) - return SIGSEGV; + if (!user_mode(regs)) { + rc = SIGSEGV; + goto bail; + } /* in_atomic() in user mode is really bad, as is current->mm == NULL. */ printk(KERN_EMERG "Page fault in user mode with " @@ -417,9 +424,11 @@ good_area: */ fault = handle_mm_fault(mm, vma, address, flags); if (unlikely(fault & (VM_FAULT_RETRY|VM_FAULT_ERROR))) { - int rc = mm_fault_error(regs, address, fault); + rc = mm_fault_error(regs, address, fault); if (rc >= MM_FAULT_RETURN) - return rc; + goto bail; + else + rc = 0; } /* @@ -454,7 +463,7 @@ good_area: } up_read(&mm->mmap_sem); - return 0; + goto bail; bad_area: up_read(&mm->mmap_sem); @@ -463,7 +472,7 @@ bad_area_nosemaphore: /* User mode accesses cause a SIGSEGV */ if (user_mode(regs)) { _exception(SIGSEGV, regs, code, address); - return 0; + goto bail; } if (is_exec && (error_code & DSISR_PROTFAULT)) @@ -471,7 +480,11 @@ bad_area_nosemaphore: " page (%lx) - exploit attempt? (uid: %d)\n", address, from_kuid(&init_user_ns, current_uid())); - return SIGSEGV; + rc = SIGSEGV; + +bail: + exception_exit(prev_state); + return rc; } diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index 6a2aead5b0e..4c122c3f162 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c @@ -336,11 +336,18 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp, hpte_v = hptep->v; actual_psize = hpte_actual_psize(hptep, psize); + /* + * We need to invalidate the TLB always because hpte_remove doesn't do + * a tlb invalidate. If a hash bucket gets full, we "evict" a more/less + * random entry from it. When we do that we don't invalidate the TLB + * (hpte_remove) because we assume the old translation is still + * technically "valid". + */ if (actual_psize < 0) { - native_unlock_hpte(hptep); - return -1; + actual_psize = psize; + ret = -1; + goto err_out; } - /* Even if we miss, we need to invalidate the TLB */ if (!HPTE_V_COMPARE(hpte_v, want_v)) { DBG_LOW(" -> miss\n"); ret = -1; @@ -350,6 +357,7 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp, hptep->r = (hptep->r & ~(HPTE_R_PP | HPTE_R_N)) | (newpp & (HPTE_R_PP | HPTE_R_N | HPTE_R_C)); } +err_out: native_unlock_hpte(hptep); /* Ensure it is out of the tlb too. */ @@ -409,7 +417,7 @@ static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea, hptep = htab_address + slot; actual_psize = hpte_actual_psize(hptep, psize); if (actual_psize < 0) - return; + actual_psize = psize; /* Update the HPTE */ hptep->r = (hptep->r & ~(HPTE_R_PP | HPTE_R_N)) | @@ -437,21 +445,27 @@ static void native_hpte_invalidate(unsigned long slot, unsigned long vpn, hpte_v = hptep->v; actual_psize = hpte_actual_psize(hptep, psize); + /* + * We need to invalidate the TLB always because hpte_remove doesn't do + * a tlb invalidate. If a hash bucket gets full, we "evict" a more/less + * random entry from it. When we do that we don't invalidate the TLB + * (hpte_remove) because we assume the old translation is still + * technically "valid". + */ if (actual_psize < 0) { + actual_psize = psize; native_unlock_hpte(hptep); - local_irq_restore(flags); - return; + goto err_out; } - /* Even if we miss, we need to invalidate the TLB */ if (!HPTE_V_COMPARE(hpte_v, want_v)) native_unlock_hpte(hptep); else /* Invalidate the hpte. NOTE: this also unlocks it */ hptep->v = 0; +err_out: /* Invalidate the TLB */ tlbie(vpn, psize, actual_psize, ssize, local); - local_irq_restore(flags); } diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 88ac0eeaadd..e303a6d74e3 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -33,6 +33,7 @@ #include <linux/init.h> #include <linux/signal.h> #include <linux/memblock.h> +#include <linux/context_tracking.h> #include <asm/processor.h> #include <asm/pgtable.h> @@ -954,6 +955,7 @@ void hash_failure_debug(unsigned long ea, unsigned long access, */ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) { + enum ctx_state prev_state = exception_enter(); pgd_t *pgdir; unsigned long vsid; struct mm_struct *mm; @@ -973,7 +975,8 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) mm = current->mm; if (! mm) { DBG_LOW(" user region with no mm !\n"); - return 1; + rc = 1; + goto bail; } psize = get_slice_psize(mm, ea); ssize = user_segment_size(ea); @@ -992,19 +995,23 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) /* Not a valid range * Send the problem up to do_page_fault */ - return 1; + rc = 1; + goto bail; } DBG_LOW(" mm=%p, mm->pgdir=%p, vsid=%016lx\n", mm, mm->pgd, vsid); /* Bad address. */ if (!vsid) { DBG_LOW("Bad address!\n"); - return 1; + rc = 1; + goto bail; } /* Get pgdir */ pgdir = mm->pgd; - if (pgdir == NULL) - return 1; + if (pgdir == NULL) { + rc = 1; + goto bail; + } /* Check CPU locality */ tmp = cpumask_of(smp_processor_id()); @@ -1027,7 +1034,8 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) ptep = find_linux_pte_or_hugepte(pgdir, ea, &hugeshift); if (ptep == NULL || !pte_present(*ptep)) { DBG_LOW(" no PTE !\n"); - return 1; + rc = 1; + goto bail; } /* Add _PAGE_PRESENT to the required access perm */ @@ -1038,13 +1046,16 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) */ if (access & ~pte_val(*ptep)) { DBG_LOW(" no access !\n"); - return 1; + rc = 1; + goto bail; } #ifdef CONFIG_HUGETLB_PAGE - if (hugeshift) - return __hash_page_huge(ea, access, vsid, ptep, trap, local, + if (hugeshift) { + rc = __hash_page_huge(ea, access, vsid, ptep, trap, local, ssize, hugeshift, psize); + goto bail; + } #endif /* CONFIG_HUGETLB_PAGE */ #ifndef CONFIG_PPC_64K_PAGES @@ -1124,6 +1135,9 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) pte_val(*(ptep + PTRS_PER_PTE))); #endif DBG_LOW(" -> rc=%d\n", rc); + +bail: + exception_exit(prev_state); return rc; } EXPORT_SYMBOL_GPL(hash_page); @@ -1259,6 +1273,8 @@ void flush_hash_range(unsigned long number, int local) */ void low_hash_fault(struct pt_regs *regs, unsigned long address, int rc) { + enum ctx_state prev_state = exception_enter(); + if (user_mode(regs)) { #ifdef CONFIG_PPC_SUBPAGE_PROT if (rc == -2) @@ -1268,6 +1284,8 @@ void low_hash_fault(struct pt_regs *regs, unsigned long address, int rc) _exception(SIGBUS, regs, BUS_ADRERR, address); } else bad_page_fault(regs, address, SIGBUS); + + exception_exit(prev_state); } long hpte_insert_repeating(unsigned long hash, unsigned long vpn, diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 237c8e5f264..4210549ac95 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -357,7 +357,7 @@ void add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages) int alloc_bootmem_huge_page(struct hstate *hstate) { struct huge_bootmem_page *m; - int idx = shift_to_mmu_psize(hstate->order + PAGE_SHIFT); + int idx = shift_to_mmu_psize(huge_page_shift(hstate)); int nr_gpages = gpage_freearray[idx].nr_gpages; if (nr_gpages == 0) @@ -592,8 +592,14 @@ static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, do { pmd = pmd_offset(pud, addr); next = pmd_addr_end(addr, end); - if (pmd_none_or_clear_bad(pmd)) + if (!is_hugepd(pmd)) { + /* + * if it is not hugepd pointer, we should already find + * it cleared. + */ + WARN_ON(!pmd_none_or_clear_bad(pmd)); continue; + } #ifdef CONFIG_PPC_FSL_BOOK3E /* * Increment next by the size of the huge mapping since diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index c2787bf779c..a90b9c45899 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -215,7 +215,8 @@ static void __meminit vmemmap_create_mapping(unsigned long start, unsigned long phys) { int mapped = htab_bolt_mapping(start, start + page_size, phys, - PAGE_KERNEL, mmu_vmemmap_psize, + pgprot_val(PAGE_KERNEL), + mmu_vmemmap_psize, mmu_kernel_ssize); BUG_ON(mapped < 0); } diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 0988a26e041..1cb1ea133a2 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -299,47 +299,13 @@ void __init paging_init(void) void __init mem_init(void) { -#ifdef CONFIG_NEED_MULTIPLE_NODES - int nid; -#endif - pg_data_t *pgdat; - unsigned long i; - struct page *page; - unsigned long reservedpages = 0, codesize, initsize, datasize, bsssize; - #ifdef CONFIG_SWIOTLB swiotlb_init(0); #endif - num_physpages = memblock_phys_mem_size() >> PAGE_SHIFT; high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); - -#ifdef CONFIG_NEED_MULTIPLE_NODES - for_each_online_node(nid) { - if (NODE_DATA(nid)->node_spanned_pages != 0) { - printk("freeing bootmem node %d\n", nid); - totalram_pages += - free_all_bootmem_node(NODE_DATA(nid)); - } - } -#else - max_mapnr = max_pfn; - totalram_pages += free_all_bootmem(); -#endif - for_each_online_pgdat(pgdat) { - for (i = 0; i < pgdat->node_spanned_pages; i++) { - if (!pfn_valid(pgdat->node_start_pfn + i)) - continue; - page = pgdat_page_nr(pgdat, i); - if (PageReserved(page)) - reservedpages++; - } - } - - codesize = (unsigned long)&_sdata - (unsigned long)&_stext; - datasize = (unsigned long)&_edata - (unsigned long)&_sdata; - initsize = (unsigned long)&__init_end - (unsigned long)&__init_begin; - bsssize = (unsigned long)&__bss_stop - (unsigned long)&__bss_start; + set_max_mapnr(max_pfn); + free_all_bootmem(); #ifdef CONFIG_HIGHMEM { @@ -349,13 +315,9 @@ void __init mem_init(void) for (pfn = highmem_mapnr; pfn < max_mapnr; ++pfn) { phys_addr_t paddr = (phys_addr_t)pfn << PAGE_SHIFT; struct page *page = pfn_to_page(pfn); - if (memblock_is_reserved(paddr)) - continue; - free_highmem_page(page); - reservedpages--; + if (!memblock_is_reserved(paddr)) + free_highmem_page(page); } - printk(KERN_DEBUG "High memory: %luk\n", - totalhigh_pages << (PAGE_SHIFT-10)); } #endif /* CONFIG_HIGHMEM */ @@ -368,16 +330,7 @@ void __init mem_init(void) (mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY) - 1; #endif - printk(KERN_INFO "Memory: %luk/%luk available (%luk kernel code, " - "%luk reserved, %luk data, %luk bss, %luk init)\n", - nr_free_pages() << (PAGE_SHIFT-10), - num_physpages << (PAGE_SHIFT-10), - codesize >> 10, - reservedpages << (PAGE_SHIFT-10), - datasize >> 10, - bsssize >> 10, - initsize >> 10); - + mem_init_print_info(NULL); #ifdef CONFIG_PPC32 pr_info("Kernel virtual memory layout:\n"); pr_info(" * 0x%08lx..0x%08lx : fixmap\n", FIXADDR_START, FIXADDR_TOP); @@ -407,7 +360,7 @@ void free_initmem(void) #ifdef CONFIG_BLK_DEV_INITRD void __init free_initrd_mem(unsigned long start, unsigned long end) { - free_reserved_area(start, end, 0, "initrd"); + free_reserved_area((void *)start, (void *)end, -1, "initrd"); } #endif diff --git a/arch/powerpc/mm/tlb_hash64.c b/arch/powerpc/mm/tlb_hash64.c index 023ec8a13f3..7df1c5edda8 100644 --- a/arch/powerpc/mm/tlb_hash64.c +++ b/arch/powerpc/mm/tlb_hash64.c @@ -183,8 +183,8 @@ void tlb_flush(struct mmu_gather *tlb) * since 64K pages may overlap with other bridges when using 64K pages * with 4K HW pages on IO space. * - * Because of that usage pattern, it's only available with CONFIG_HOTPLUG - * and is implemented for small size rather than speed. + * Because of that usage pattern, it is implemented for small size rather + * than speed. */ void __flush_hash_table_range(struct mm_struct *mm, unsigned long start, unsigned long end) diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index c627843c5b2..29c6482890c 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -13,11 +13,13 @@ #include <linux/perf_event.h> #include <linux/percpu.h> #include <linux/hardirq.h> +#include <linux/uaccess.h> #include <asm/reg.h> #include <asm/pmc.h> #include <asm/machdep.h> #include <asm/firmware.h> #include <asm/ptrace.h> +#include <asm/code-patching.h> #define BHRB_MAX_ENTRIES 32 #define BHRB_TARGET 0x0000000000000002 @@ -100,11 +102,15 @@ static inline int siar_valid(struct pt_regs *regs) return 1; } +static inline void power_pmu_bhrb_enable(struct perf_event *event) {} +static inline void power_pmu_bhrb_disable(struct perf_event *event) {} +void power_pmu_flush_branch_stack(void) {} +static inline void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw) {} #endif /* CONFIG_PPC32 */ static bool regs_use_siar(struct pt_regs *regs) { - return !!(regs->result & 1); + return !!regs->result; } /* @@ -130,22 +136,30 @@ static inline unsigned long perf_ip_adjust(struct pt_regs *regs) * If we're not doing instruction sampling, give them the SDAR * (sampled data address). If we are doing instruction sampling, then * only give them the SDAR if it corresponds to the instruction - * pointed to by SIAR; this is indicated by the [POWER6_]MMCRA_SDSYNC or - * the [POWER7P_]MMCRA_SDAR_VALID bit in MMCRA. + * pointed to by SIAR; this is indicated by the [POWER6_]MMCRA_SDSYNC, the + * [POWER7P_]MMCRA_SDAR_VALID bit in MMCRA, or the SDAR_VALID bit in SIER. */ static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp) { unsigned long mmcra = regs->dsisr; - unsigned long sdsync; + bool sdar_valid; - if (ppmu->flags & PPMU_SIAR_VALID) - sdsync = POWER7P_MMCRA_SDAR_VALID; - else if (ppmu->flags & PPMU_ALT_SIPR) - sdsync = POWER6_MMCRA_SDSYNC; - else - sdsync = MMCRA_SDSYNC; + if (ppmu->flags & PPMU_HAS_SIER) + sdar_valid = regs->dar & SIER_SDAR_VALID; + else { + unsigned long sdsync; + + if (ppmu->flags & PPMU_SIAR_VALID) + sdsync = POWER7P_MMCRA_SDAR_VALID; + else if (ppmu->flags & PPMU_ALT_SIPR) + sdsync = POWER6_MMCRA_SDSYNC; + else + sdsync = MMCRA_SDSYNC; - if (!(mmcra & MMCRA_SAMPLE_ENABLE) || (mmcra & sdsync)) + sdar_valid = mmcra & sdsync; + } + + if (!(mmcra & MMCRA_SAMPLE_ENABLE) || sdar_valid) *addrp = mfspr(SPRN_SDAR); } @@ -175,11 +189,6 @@ static bool regs_sipr(struct pt_regs *regs) return !!(regs->dsisr & sipr); } -static bool regs_no_sipr(struct pt_regs *regs) -{ - return !!(regs->result & 2); -} - static inline u32 perf_flags_from_msr(struct pt_regs *regs) { if (regs->msr & MSR_PR) @@ -202,7 +211,7 @@ static inline u32 perf_get_misc_flags(struct pt_regs *regs) * SIAR which should give slightly more reliable * results */ - if (regs_no_sipr(regs)) { + if (ppmu->flags & PPMU_NO_SIPR) { unsigned long siar = mfspr(SPRN_SIAR); if (siar >= PAGE_OFFSET) return PERF_RECORD_MISC_KERNEL; @@ -233,22 +242,9 @@ static inline void perf_read_regs(struct pt_regs *regs) int use_siar; regs->dsisr = mmcra; - regs->result = 0; - - if (ppmu->flags & PPMU_NO_SIPR) - regs->result |= 2; - - /* - * On power8 if we're in random sampling mode, the SIER is updated. - * If we're in continuous sampling mode, we don't have SIPR. - */ - if (ppmu->flags & PPMU_HAS_SIER) { - if (marked) - regs->dar = mfspr(SPRN_SIER); - else - regs->result |= 2; - } + if (ppmu->flags & PPMU_HAS_SIER) + regs->dar = mfspr(SPRN_SIER); /* * If this isn't a PMU exception (eg a software event) the SIAR is @@ -273,12 +269,12 @@ static inline void perf_read_regs(struct pt_regs *regs) use_siar = 1; else if ((ppmu->flags & PPMU_NO_CONT_SAMPLING)) use_siar = 0; - else if (!regs_no_sipr(regs) && regs_sipr(regs)) + else if (!(ppmu->flags & PPMU_NO_SIPR) && regs_sipr(regs)) use_siar = 0; else use_siar = 1; - regs->result |= use_siar; + regs->result = use_siar; } /* @@ -302,12 +298,170 @@ static inline int siar_valid(struct pt_regs *regs) unsigned long mmcra = regs->dsisr; int marked = mmcra & MMCRA_SAMPLE_ENABLE; - if ((ppmu->flags & PPMU_SIAR_VALID) && marked) - return mmcra & POWER7P_MMCRA_SIAR_VALID; + if (marked) { + if (ppmu->flags & PPMU_HAS_SIER) + return regs->dar & SIER_SIAR_VALID; + + if (ppmu->flags & PPMU_SIAR_VALID) + return mmcra & POWER7P_MMCRA_SIAR_VALID; + } return 1; } + +/* Reset all possible BHRB entries */ +static void power_pmu_bhrb_reset(void) +{ + asm volatile(PPC_CLRBHRB); +} + +static void power_pmu_bhrb_enable(struct perf_event *event) +{ + struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + + if (!ppmu->bhrb_nr) + return; + + /* Clear BHRB if we changed task context to avoid data leaks */ + if (event->ctx->task && cpuhw->bhrb_context != event->ctx) { + power_pmu_bhrb_reset(); + cpuhw->bhrb_context = event->ctx; + } + cpuhw->bhrb_users++; +} + +static void power_pmu_bhrb_disable(struct perf_event *event) +{ + struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + + if (!ppmu->bhrb_nr) + return; + + cpuhw->bhrb_users--; + WARN_ON_ONCE(cpuhw->bhrb_users < 0); + + if (!cpuhw->disabled && !cpuhw->bhrb_users) { + /* BHRB cannot be turned off when other + * events are active on the PMU. + */ + + /* avoid stale pointer */ + cpuhw->bhrb_context = NULL; + } +} + +/* Called from ctxsw to prevent one process's branch entries to + * mingle with the other process's entries during context switch. + */ +void power_pmu_flush_branch_stack(void) +{ + if (ppmu->bhrb_nr) + power_pmu_bhrb_reset(); +} +/* Calculate the to address for a branch */ +static __u64 power_pmu_bhrb_to(u64 addr) +{ + unsigned int instr; + int ret; + __u64 target; + + if (is_kernel_addr(addr)) + return branch_target((unsigned int *)addr); + + /* Userspace: need copy instruction here then translate it */ + pagefault_disable(); + ret = __get_user_inatomic(instr, (unsigned int __user *)addr); + if (ret) { + pagefault_enable(); + return 0; + } + pagefault_enable(); + + target = branch_target(&instr); + if ((!target) || (instr & BRANCH_ABSOLUTE)) + return target; + + /* Translate relative branch target from kernel to user address */ + return target - (unsigned long)&instr + addr; +} + +/* Processing BHRB entries */ +void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw) +{ + u64 val; + u64 addr; + int r_index, u_index, pred; + + r_index = 0; + u_index = 0; + while (r_index < ppmu->bhrb_nr) { + /* Assembly read function */ + val = read_bhrb(r_index++); + if (!val) + /* Terminal marker: End of valid BHRB entries */ + break; + else { + addr = val & BHRB_EA; + pred = val & BHRB_PREDICTION; + + if (!addr) + /* invalid entry */ + continue; + + /* Branches are read most recent first (ie. mfbhrb 0 is + * the most recent branch). + * There are two types of valid entries: + * 1) a target entry which is the to address of a + * computed goto like a blr,bctr,btar. The next + * entry read from the bhrb will be branch + * corresponding to this target (ie. the actual + * blr/bctr/btar instruction). + * 2) a from address which is an actual branch. If a + * target entry proceeds this, then this is the + * matching branch for that target. If this is not + * following a target entry, then this is a branch + * where the target is given as an immediate field + * in the instruction (ie. an i or b form branch). + * In this case we need to read the instruction from + * memory to determine the target/to address. + */ + + if (val & BHRB_TARGET) { + /* Target branches use two entries + * (ie. computed gotos/XL form) + */ + cpuhw->bhrb_entries[u_index].to = addr; + cpuhw->bhrb_entries[u_index].mispred = pred; + cpuhw->bhrb_entries[u_index].predicted = ~pred; + + /* Get from address in next entry */ + val = read_bhrb(r_index++); + addr = val & BHRB_EA; + if (val & BHRB_TARGET) { + /* Shouldn't have two targets in a + row.. Reset index and try again */ + r_index--; + addr = 0; + } + cpuhw->bhrb_entries[u_index].from = addr; + } else { + /* Branches to immediate field + (ie I or B form) */ + cpuhw->bhrb_entries[u_index].from = addr; + cpuhw->bhrb_entries[u_index].to = + power_pmu_bhrb_to(addr); + cpuhw->bhrb_entries[u_index].mispred = pred; + cpuhw->bhrb_entries[u_index].predicted = ~pred; + } + u_index++; + + } + } + cpuhw->bhrb_stack.nr = u_index; + return; +} + #endif /* CONFIG_PPC64 */ static void perf_event_interrupt(struct pt_regs *regs); @@ -904,47 +1058,6 @@ static int collect_events(struct perf_event *group, int max_count, return n; } -/* Reset all possible BHRB entries */ -static void power_pmu_bhrb_reset(void) -{ - asm volatile(PPC_CLRBHRB); -} - -void power_pmu_bhrb_enable(struct perf_event *event) -{ - struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); - - if (!ppmu->bhrb_nr) - return; - - /* Clear BHRB if we changed task context to avoid data leaks */ - if (event->ctx->task && cpuhw->bhrb_context != event->ctx) { - power_pmu_bhrb_reset(); - cpuhw->bhrb_context = event->ctx; - } - cpuhw->bhrb_users++; -} - -void power_pmu_bhrb_disable(struct perf_event *event) -{ - struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); - - if (!ppmu->bhrb_nr) - return; - - cpuhw->bhrb_users--; - WARN_ON_ONCE(cpuhw->bhrb_users < 0); - - if (!cpuhw->disabled && !cpuhw->bhrb_users) { - /* BHRB cannot be turned off when other - * events are active on the PMU. - */ - - /* avoid stale pointer */ - cpuhw->bhrb_context = NULL; - } -} - /* * Add a event to the PMU. * If all events are not already frozen, then we disable and @@ -1180,15 +1293,6 @@ int power_pmu_commit_txn(struct pmu *pmu) return 0; } -/* Called from ctxsw to prevent one process's branch entries to - * mingle with the other process's entries during context switch. - */ -void power_pmu_flush_branch_stack(void) -{ - if (ppmu->bhrb_nr) - power_pmu_bhrb_reset(); -} - /* * Return 1 if we might be able to put event on a limited PMC, * or 0 if not. @@ -1458,77 +1562,6 @@ struct pmu power_pmu = { .flush_branch_stack = power_pmu_flush_branch_stack, }; -/* Processing BHRB entries */ -void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw) -{ - u64 val; - u64 addr; - int r_index, u_index, target, pred; - - r_index = 0; - u_index = 0; - while (r_index < ppmu->bhrb_nr) { - /* Assembly read function */ - val = read_bhrb(r_index); - - /* Terminal marker: End of valid BHRB entries */ - if (val == 0) { - break; - } else { - /* BHRB field break up */ - addr = val & BHRB_EA; - pred = val & BHRB_PREDICTION; - target = val & BHRB_TARGET; - - /* Probable Missed entry: Not applicable for POWER8 */ - if ((addr == 0) && (target == 0) && (pred == 1)) { - r_index++; - continue; - } - - /* Real Missed entry: Power8 based missed entry */ - if ((addr == 0) && (target == 1) && (pred == 1)) { - r_index++; - continue; - } - - /* Reserved condition: Not a valid entry */ - if ((addr == 0) && (target == 1) && (pred == 0)) { - r_index++; - continue; - } - - /* Is a target address */ - if (val & BHRB_TARGET) { - /* First address cannot be a target address */ - if (r_index == 0) { - r_index++; - continue; - } - - /* Update target address for the previous entry */ - cpuhw->bhrb_entries[u_index - 1].to = addr; - cpuhw->bhrb_entries[u_index - 1].mispred = pred; - cpuhw->bhrb_entries[u_index - 1].predicted = ~pred; - - /* Dont increment u_index */ - r_index++; - } else { - /* Update address, flags for current entry */ - cpuhw->bhrb_entries[u_index].from = addr; - cpuhw->bhrb_entries[u_index].mispred = pred; - cpuhw->bhrb_entries[u_index].predicted = ~pred; - - /* Successfully popullated one entry */ - u_index++; - r_index++; - } - } - } - cpuhw->bhrb_stack.nr = u_index; - return; -} - /* * A counter has overflowed; update its count and record * things if requested. Note that interrupts are hard-disabled @@ -1725,7 +1758,7 @@ static void perf_event_interrupt(struct pt_regs *regs) } } } - if ((!found) && printk_ratelimit()) + if (!found && !nmi && printk_ratelimit()) printk(KERN_WARNING "Can't find PMC that caused IRQ\n"); /* diff --git a/arch/powerpc/perf/power7-pmu.c b/arch/powerpc/perf/power7-pmu.c index 3c475d6267c..13c3f0e547a 100644 --- a/arch/powerpc/perf/power7-pmu.c +++ b/arch/powerpc/perf/power7-pmu.c @@ -62,6 +62,29 @@ #define PME_PM_BRU_FIN 0x10068 #define PME_PM_BRU_MPRED 0x400f6 +#define PME_PM_CMPLU_STALL_FXU 0x20014 +#define PME_PM_CMPLU_STALL_DIV 0x40014 +#define PME_PM_CMPLU_STALL_SCALAR 0x40012 +#define PME_PM_CMPLU_STALL_SCALAR_LONG 0x20018 +#define PME_PM_CMPLU_STALL_VECTOR 0x2001c +#define PME_PM_CMPLU_STALL_VECTOR_LONG 0x4004a +#define PME_PM_CMPLU_STALL_LSU 0x20012 +#define PME_PM_CMPLU_STALL_REJECT 0x40016 +#define PME_PM_CMPLU_STALL_ERAT_MISS 0x40018 +#define PME_PM_CMPLU_STALL_DCACHE_MISS 0x20016 +#define PME_PM_CMPLU_STALL_STORE 0x2004a +#define PME_PM_CMPLU_STALL_THRD 0x1001c +#define PME_PM_CMPLU_STALL_IFU 0x4004c +#define PME_PM_CMPLU_STALL_BRU 0x4004e +#define PME_PM_GCT_NOSLOT_IC_MISS 0x2001a +#define PME_PM_GCT_NOSLOT_BR_MPRED 0x4001a +#define PME_PM_GCT_NOSLOT_BR_MPRED_IC_MISS 0x4001c +#define PME_PM_GRP_CMPL 0x30004 +#define PME_PM_1PLUS_PPC_CMPL 0x100f2 +#define PME_PM_CMPLU_STALL_DFU 0x2003c +#define PME_PM_RUN_CYC 0x200f4 +#define PME_PM_RUN_INST_CMPL 0x400fa + /* * Layout of constraint bits: * 6666555555555544444444443333333333222222222211111111110000000000 @@ -393,6 +416,31 @@ POWER_EVENT_ATTR(LD_MISS_L1, LD_MISS_L1); POWER_EVENT_ATTR(BRU_FIN, BRU_FIN) POWER_EVENT_ATTR(BRU_MPRED, BRU_MPRED); +POWER_EVENT_ATTR(CMPLU_STALL_FXU, CMPLU_STALL_FXU); +POWER_EVENT_ATTR(CMPLU_STALL_DIV, CMPLU_STALL_DIV); +POWER_EVENT_ATTR(CMPLU_STALL_SCALAR, CMPLU_STALL_SCALAR); +POWER_EVENT_ATTR(CMPLU_STALL_SCALAR_LONG, CMPLU_STALL_SCALAR_LONG); +POWER_EVENT_ATTR(CMPLU_STALL_VECTOR, CMPLU_STALL_VECTOR); +POWER_EVENT_ATTR(CMPLU_STALL_VECTOR_LONG, CMPLU_STALL_VECTOR_LONG); +POWER_EVENT_ATTR(CMPLU_STALL_LSU, CMPLU_STALL_LSU); +POWER_EVENT_ATTR(CMPLU_STALL_REJECT, CMPLU_STALL_REJECT); + +POWER_EVENT_ATTR(CMPLU_STALL_ERAT_MISS, CMPLU_STALL_ERAT_MISS); +POWER_EVENT_ATTR(CMPLU_STALL_DCACHE_MISS, CMPLU_STALL_DCACHE_MISS); +POWER_EVENT_ATTR(CMPLU_STALL_STORE, CMPLU_STALL_STORE); +POWER_EVENT_ATTR(CMPLU_STALL_THRD, CMPLU_STALL_THRD); +POWER_EVENT_ATTR(CMPLU_STALL_IFU, CMPLU_STALL_IFU); +POWER_EVENT_ATTR(CMPLU_STALL_BRU, CMPLU_STALL_BRU); +POWER_EVENT_ATTR(GCT_NOSLOT_IC_MISS, GCT_NOSLOT_IC_MISS); + +POWER_EVENT_ATTR(GCT_NOSLOT_BR_MPRED, GCT_NOSLOT_BR_MPRED); +POWER_EVENT_ATTR(GCT_NOSLOT_BR_MPRED_IC_MISS, GCT_NOSLOT_BR_MPRED_IC_MISS); +POWER_EVENT_ATTR(GRP_CMPL, GRP_CMPL); +POWER_EVENT_ATTR(1PLUS_PPC_CMPL, 1PLUS_PPC_CMPL); +POWER_EVENT_ATTR(CMPLU_STALL_DFU, CMPLU_STALL_DFU); +POWER_EVENT_ATTR(RUN_CYC, RUN_CYC); +POWER_EVENT_ATTR(RUN_INST_CMPL, RUN_INST_CMPL); + static struct attribute *power7_events_attr[] = { GENERIC_EVENT_PTR(CYC), GENERIC_EVENT_PTR(GCT_NOSLOT_CYC), @@ -411,6 +459,31 @@ static struct attribute *power7_events_attr[] = { POWER_EVENT_PTR(LD_MISS_L1), POWER_EVENT_PTR(BRU_FIN), POWER_EVENT_PTR(BRU_MPRED), + + POWER_EVENT_PTR(CMPLU_STALL_FXU), + POWER_EVENT_PTR(CMPLU_STALL_DIV), + POWER_EVENT_PTR(CMPLU_STALL_SCALAR), + POWER_EVENT_PTR(CMPLU_STALL_SCALAR_LONG), + POWER_EVENT_PTR(CMPLU_STALL_VECTOR), + POWER_EVENT_PTR(CMPLU_STALL_VECTOR_LONG), + POWER_EVENT_PTR(CMPLU_STALL_LSU), + POWER_EVENT_PTR(CMPLU_STALL_REJECT), + + POWER_EVENT_PTR(CMPLU_STALL_ERAT_MISS), + POWER_EVENT_PTR(CMPLU_STALL_DCACHE_MISS), + POWER_EVENT_PTR(CMPLU_STALL_STORE), + POWER_EVENT_PTR(CMPLU_STALL_THRD), + POWER_EVENT_PTR(CMPLU_STALL_IFU), + POWER_EVENT_PTR(CMPLU_STALL_BRU), + POWER_EVENT_PTR(GCT_NOSLOT_IC_MISS), + POWER_EVENT_PTR(GCT_NOSLOT_BR_MPRED), + + POWER_EVENT_PTR(GCT_NOSLOT_BR_MPRED_IC_MISS), + POWER_EVENT_PTR(GRP_CMPL), + POWER_EVENT_PTR(1PLUS_PPC_CMPL), + POWER_EVENT_PTR(CMPLU_STALL_DFU), + POWER_EVENT_PTR(RUN_CYC), + POWER_EVENT_PTR(RUN_INST_CMPL), NULL }; diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig index a881232a3cc..e17cdfc5ba4 100644 --- a/arch/powerpc/platforms/Kconfig +++ b/arch/powerpc/platforms/Kconfig @@ -128,7 +128,7 @@ config PPC_RTAS_DAEMON config RTAS_PROC bool "Proc interface to RTAS" - depends on PPC_RTAS + depends on PPC_RTAS && PROC_FS default y config RTAS_FLASH @@ -193,37 +193,6 @@ config PPC_IO_WORKAROUNDS source "drivers/cpufreq/Kconfig" -menu "CPU Frequency drivers" - depends on CPU_FREQ - -config CPU_FREQ_PMAC - bool "Support for Apple PowerBooks" - depends on ADB_PMU && PPC32 - select CPU_FREQ_TABLE - help - This adds support for frequency switching on Apple PowerBooks, - this currently includes some models of iBook & Titanium - PowerBook. - -config CPU_FREQ_PMAC64 - bool "Support for some Apple G5s" - depends on PPC_PMAC && PPC64 - select CPU_FREQ_TABLE - help - This adds support for frequency switching on Apple iMac G5, - and some of the more recent desktop G5 machines as well. - -config PPC_PASEMI_CPUFREQ - bool "Support for PA Semi PWRficient" - depends on PPC_PASEMI - default y - select CPU_FREQ_TABLE - help - This adds the support for frequency switching on PA Semi - PWRficient processors. - -endmenu - menu "CPUIdle driver" source "drivers/cpuidle/Kconfig" diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 54f3936001a..7819c40a6bc 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -158,6 +158,7 @@ config E500 config PPC_E500MC bool "e500mc Support" select PPC_FPU + select COMMON_CLK depends on E500 help This must be enabled for running on e500mc (and derivatives diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c index 35f77a42bed..f3900427ffa 100644 --- a/arch/powerpc/platforms/cell/spufs/inode.c +++ b/arch/powerpc/platforms/cell/spufs/inode.c @@ -238,7 +238,7 @@ const struct file_operations spufs_context_fops = { .release = spufs_dir_close, .llseek = dcache_dir_lseek, .read = generic_read_dir, - .readdir = dcache_readdir, + .iterate = dcache_readdir, .fsync = noop_fsync, }; EXPORT_SYMBOL_GPL(spufs_context_fops); diff --git a/arch/powerpc/platforms/pasemi/Makefile b/arch/powerpc/platforms/pasemi/Makefile index ce6d789e074..8e8d4cae5eb 100644 --- a/arch/powerpc/platforms/pasemi/Makefile +++ b/arch/powerpc/platforms/pasemi/Makefile @@ -1,3 +1,2 @@ obj-y += setup.o pci.o time.o idle.o powersave.o iommu.o dma_lib.o misc.o obj-$(CONFIG_PPC_PASEMI_MDIO) += gpio_mdio.o -obj-$(CONFIG_PPC_PASEMI_CPUFREQ) += cpufreq.o diff --git a/arch/powerpc/platforms/pasemi/cpufreq.c b/arch/powerpc/platforms/pasemi/cpufreq.c deleted file mode 100644 index be1e7958909..00000000000 --- a/arch/powerpc/platforms/pasemi/cpufreq.c +++ /dev/null @@ -1,330 +0,0 @@ -/* - * Copyright (C) 2007 PA Semi, Inc - * - * Authors: Egor Martovetsky <egor@pasemi.com> - * Olof Johansson <olof@lixom.net> - * - * Maintained by: Olof Johansson <olof@lixom.net> - * - * Based on arch/powerpc/platforms/cell/cbe_cpufreq.c: - * (C) Copyright IBM Deutschland Entwicklung GmbH 2005 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#include <linux/cpufreq.h> -#include <linux/timer.h> -#include <linux/module.h> - -#include <asm/hw_irq.h> -#include <asm/io.h> -#include <asm/prom.h> -#include <asm/time.h> -#include <asm/smp.h> - -#define SDCASR_REG 0x0100 -#define SDCASR_REG_STRIDE 0x1000 -#define SDCPWR_CFGA0_REG 0x0100 -#define SDCPWR_PWST0_REG 0x0000 -#define SDCPWR_GIZTIME_REG 0x0440 - -/* SDCPWR_GIZTIME_REG fields */ -#define SDCPWR_GIZTIME_GR 0x80000000 -#define SDCPWR_GIZTIME_LONGLOCK 0x000000ff - -/* Offset of ASR registers from SDC base */ -#define SDCASR_OFFSET 0x120000 - -static void __iomem *sdcpwr_mapbase; -static void __iomem *sdcasr_mapbase; - -static DEFINE_MUTEX(pas_switch_mutex); - -/* Current astate, is used when waking up from power savings on - * one core, in case the other core has switched states during - * the idle time. - */ -static int current_astate; - -/* We support 5(A0-A4) power states excluding turbo(A5-A6) modes */ -static struct cpufreq_frequency_table pas_freqs[] = { - {0, 0}, - {1, 0}, - {2, 0}, - {3, 0}, - {4, 0}, - {0, CPUFREQ_TABLE_END}, -}; - -static struct freq_attr *pas_cpu_freqs_attr[] = { - &cpufreq_freq_attr_scaling_available_freqs, - NULL, -}; - -/* - * hardware specific functions - */ - -static int get_astate_freq(int astate) -{ - u32 ret; - ret = in_le32(sdcpwr_mapbase + SDCPWR_CFGA0_REG + (astate * 0x10)); - - return ret & 0x3f; -} - -static int get_cur_astate(int cpu) -{ - u32 ret; - - ret = in_le32(sdcpwr_mapbase + SDCPWR_PWST0_REG); - ret = (ret >> (cpu * 4)) & 0x7; - - return ret; -} - -static int get_gizmo_latency(void) -{ - u32 giztime, ret; - - giztime = in_le32(sdcpwr_mapbase + SDCPWR_GIZTIME_REG); - - /* just provide the upper bound */ - if (giztime & SDCPWR_GIZTIME_GR) - ret = (giztime & SDCPWR_GIZTIME_LONGLOCK) * 128000; - else - ret = (giztime & SDCPWR_GIZTIME_LONGLOCK) * 1000; - - return ret; -} - -static void set_astate(int cpu, unsigned int astate) -{ - unsigned long flags; - - /* Return if called before init has run */ - if (unlikely(!sdcasr_mapbase)) - return; - - local_irq_save(flags); - - out_le32(sdcasr_mapbase + SDCASR_REG + SDCASR_REG_STRIDE*cpu, astate); - - local_irq_restore(flags); -} - -int check_astate(void) -{ - return get_cur_astate(hard_smp_processor_id()); -} - -void restore_astate(int cpu) -{ - set_astate(cpu, current_astate); -} - -/* - * cpufreq functions - */ - -static int pas_cpufreq_cpu_init(struct cpufreq_policy *policy) -{ - const u32 *max_freqp; - u32 max_freq; - int i, cur_astate; - struct resource res; - struct device_node *cpu, *dn; - int err = -ENODEV; - - cpu = of_get_cpu_node(policy->cpu, NULL); - - if (!cpu) - goto out; - - dn = of_find_compatible_node(NULL, NULL, "1682m-sdc"); - if (!dn) - dn = of_find_compatible_node(NULL, NULL, - "pasemi,pwrficient-sdc"); - if (!dn) - goto out; - err = of_address_to_resource(dn, 0, &res); - of_node_put(dn); - if (err) - goto out; - sdcasr_mapbase = ioremap(res.start + SDCASR_OFFSET, 0x2000); - if (!sdcasr_mapbase) { - err = -EINVAL; - goto out; - } - - dn = of_find_compatible_node(NULL, NULL, "1682m-gizmo"); - if (!dn) - dn = of_find_compatible_node(NULL, NULL, - "pasemi,pwrficient-gizmo"); - if (!dn) { - err = -ENODEV; - goto out_unmap_sdcasr; - } - err = of_address_to_resource(dn, 0, &res); - of_node_put(dn); - if (err) - goto out_unmap_sdcasr; - sdcpwr_mapbase = ioremap(res.start, 0x1000); - if (!sdcpwr_mapbase) { - err = -EINVAL; - goto out_unmap_sdcasr; - } - - pr_debug("init cpufreq on CPU %d\n", policy->cpu); - - max_freqp = of_get_property(cpu, "clock-frequency", NULL); - if (!max_freqp) { - err = -EINVAL; - goto out_unmap_sdcpwr; - } - - /* we need the freq in kHz */ - max_freq = *max_freqp / 1000; - - pr_debug("max clock-frequency is at %u kHz\n", max_freq); - pr_debug("initializing frequency table\n"); - - /* initialize frequency table */ - for (i=0; pas_freqs[i].frequency!=CPUFREQ_TABLE_END; i++) { - pas_freqs[i].frequency = get_astate_freq(pas_freqs[i].index) * 100000; - pr_debug("%d: %d\n", i, pas_freqs[i].frequency); - } - - policy->cpuinfo.transition_latency = get_gizmo_latency(); - - cur_astate = get_cur_astate(policy->cpu); - pr_debug("current astate is at %d\n",cur_astate); - - policy->cur = pas_freqs[cur_astate].frequency; - cpumask_copy(policy->cpus, cpu_online_mask); - - ppc_proc_freq = policy->cur * 1000ul; - - cpufreq_frequency_table_get_attr(pas_freqs, policy->cpu); - - /* this ensures that policy->cpuinfo_min and policy->cpuinfo_max - * are set correctly - */ - return cpufreq_frequency_table_cpuinfo(policy, pas_freqs); - -out_unmap_sdcpwr: - iounmap(sdcpwr_mapbase); - -out_unmap_sdcasr: - iounmap(sdcasr_mapbase); -out: - return err; -} - -static int pas_cpufreq_cpu_exit(struct cpufreq_policy *policy) -{ - /* - * We don't support CPU hotplug. Don't unmap after the system - * has already made it to a running state. - */ - if (system_state != SYSTEM_BOOTING) - return 0; - - if (sdcasr_mapbase) - iounmap(sdcasr_mapbase); - if (sdcpwr_mapbase) - iounmap(sdcpwr_mapbase); - - cpufreq_frequency_table_put_attr(policy->cpu); - return 0; -} - -static int pas_cpufreq_verify(struct cpufreq_policy *policy) -{ - return cpufreq_frequency_table_verify(policy, pas_freqs); -} - -static int pas_cpufreq_target(struct cpufreq_policy *policy, - unsigned int target_freq, - unsigned int relation) -{ - struct cpufreq_freqs freqs; - int pas_astate_new; - int i; - - cpufreq_frequency_table_target(policy, - pas_freqs, - target_freq, - relation, - &pas_astate_new); - - freqs.old = policy->cur; - freqs.new = pas_freqs[pas_astate_new].frequency; - - mutex_lock(&pas_switch_mutex); - cpufreq_notify_transition(policy, &freqs, CPUFREQ_PRECHANGE); - - pr_debug("setting frequency for cpu %d to %d kHz, 1/%d of max frequency\n", - policy->cpu, - pas_freqs[pas_astate_new].frequency, - pas_freqs[pas_astate_new].index); - - current_astate = pas_astate_new; - - for_each_online_cpu(i) - set_astate(i, pas_astate_new); - - cpufreq_notify_transition(policy, &freqs, CPUFREQ_POSTCHANGE); - mutex_unlock(&pas_switch_mutex); - - ppc_proc_freq = freqs.new * 1000ul; - return 0; -} - -static struct cpufreq_driver pas_cpufreq_driver = { - .name = "pas-cpufreq", - .owner = THIS_MODULE, - .flags = CPUFREQ_CONST_LOOPS, - .init = pas_cpufreq_cpu_init, - .exit = pas_cpufreq_cpu_exit, - .verify = pas_cpufreq_verify, - .target = pas_cpufreq_target, - .attr = pas_cpu_freqs_attr, -}; - -/* - * module init and destoy - */ - -static int __init pas_cpufreq_init(void) -{ - if (!of_machine_is_compatible("PA6T-1682M") && - !of_machine_is_compatible("pasemi,pwrficient")) - return -ENODEV; - - return cpufreq_register_driver(&pas_cpufreq_driver); -} - -static void __exit pas_cpufreq_exit(void) -{ - cpufreq_unregister_driver(&pas_cpufreq_driver); -} - -module_init(pas_cpufreq_init); -module_exit(pas_cpufreq_exit); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Egor Martovetsky <egor@pasemi.com>, Olof Johansson <olof@lixom.net>"); diff --git a/arch/powerpc/platforms/powermac/Makefile b/arch/powerpc/platforms/powermac/Makefile index ea47df66fee..52c6ce1cc98 100644 --- a/arch/powerpc/platforms/powermac/Makefile +++ b/arch/powerpc/platforms/powermac/Makefile @@ -9,8 +9,6 @@ obj-y += pic.o setup.o time.o feature.o pci.o \ sleep.o low_i2c.o cache.o pfunc_core.o \ pfunc_base.o udbg_scc.o udbg_adb.o obj-$(CONFIG_PMAC_BACKLIGHT) += backlight.o -obj-$(CONFIG_CPU_FREQ_PMAC) += cpufreq_32.o -obj-$(CONFIG_CPU_FREQ_PMAC64) += cpufreq_64.o # CONFIG_NVRAM is an arch. independent tristate symbol, for pmac32 we really # need this to be a bool. Cheat here and pretend CONFIG_NVRAM=m is really # CONFIG_NVRAM=y diff --git a/arch/powerpc/platforms/powermac/cpufreq_32.c b/arch/powerpc/platforms/powermac/cpufreq_32.c deleted file mode 100644 index 3104fad8248..00000000000 --- a/arch/powerpc/platforms/powermac/cpufreq_32.c +++ /dev/null @@ -1,721 +0,0 @@ -/* - * Copyright (C) 2002 - 2005 Benjamin Herrenschmidt <benh@kernel.crashing.org> - * Copyright (C) 2004 John Steele Scott <toojays@toojays.net> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * TODO: Need a big cleanup here. Basically, we need to have different - * cpufreq_driver structures for the different type of HW instead of the - * current mess. We also need to better deal with the detection of the - * type of machine. - * - */ - -#include <linux/module.h> -#include <linux/types.h> -#include <linux/errno.h> -#include <linux/kernel.h> -#include <linux/delay.h> -#include <linux/sched.h> -#include <linux/adb.h> -#include <linux/pmu.h> -#include <linux/cpufreq.h> -#include <linux/init.h> -#include <linux/device.h> -#include <linux/hardirq.h> -#include <asm/prom.h> -#include <asm/machdep.h> -#include <asm/irq.h> -#include <asm/pmac_feature.h> -#include <asm/mmu_context.h> -#include <asm/sections.h> -#include <asm/cputable.h> -#include <asm/time.h> -#include <asm/mpic.h> -#include <asm/keylargo.h> -#include <asm/switch_to.h> - -/* WARNING !!! This will cause calibrate_delay() to be called, - * but this is an __init function ! So you MUST go edit - * init/main.c to make it non-init before enabling DEBUG_FREQ - */ -#undef DEBUG_FREQ - -extern void low_choose_7447a_dfs(int dfs); -extern void low_choose_750fx_pll(int pll); -extern void low_sleep_handler(void); - -/* - * Currently, PowerMac cpufreq supports only high & low frequencies - * that are set by the firmware - */ -static unsigned int low_freq; -static unsigned int hi_freq; -static unsigned int cur_freq; -static unsigned int sleep_freq; -static unsigned long transition_latency; - -/* - * Different models uses different mechanisms to switch the frequency - */ -static int (*set_speed_proc)(int low_speed); -static unsigned int (*get_speed_proc)(void); - -/* - * Some definitions used by the various speedprocs - */ -static u32 voltage_gpio; -static u32 frequency_gpio; -static u32 slew_done_gpio; -static int no_schedule; -static int has_cpu_l2lve; -static int is_pmu_based; - -/* There are only two frequency states for each processor. Values - * are in kHz for the time being. - */ -#define CPUFREQ_HIGH 0 -#define CPUFREQ_LOW 1 - -static struct cpufreq_frequency_table pmac_cpu_freqs[] = { - {CPUFREQ_HIGH, 0}, - {CPUFREQ_LOW, 0}, - {0, CPUFREQ_TABLE_END}, -}; - -static struct freq_attr* pmac_cpu_freqs_attr[] = { - &cpufreq_freq_attr_scaling_available_freqs, - NULL, -}; - -static inline void local_delay(unsigned long ms) -{ - if (no_schedule) - mdelay(ms); - else - msleep(ms); -} - -#ifdef DEBUG_FREQ -static inline void debug_calc_bogomips(void) -{ - /* This will cause a recalc of bogomips and display the - * result. We backup/restore the value to avoid affecting the - * core cpufreq framework's own calculation. - */ - unsigned long save_lpj = loops_per_jiffy; - calibrate_delay(); - loops_per_jiffy = save_lpj; -} -#endif /* DEBUG_FREQ */ - -/* Switch CPU speed under 750FX CPU control - */ -static int cpu_750fx_cpu_speed(int low_speed) -{ - u32 hid2; - - if (low_speed == 0) { - /* ramping up, set voltage first */ - pmac_call_feature(PMAC_FTR_WRITE_GPIO, NULL, voltage_gpio, 0x05); - /* Make sure we sleep for at least 1ms */ - local_delay(10); - - /* tweak L2 for high voltage */ - if (has_cpu_l2lve) { - hid2 = mfspr(SPRN_HID2); - hid2 &= ~0x2000; - mtspr(SPRN_HID2, hid2); - } - } -#ifdef CONFIG_6xx - low_choose_750fx_pll(low_speed); -#endif - if (low_speed == 1) { - /* tweak L2 for low voltage */ - if (has_cpu_l2lve) { - hid2 = mfspr(SPRN_HID2); - hid2 |= 0x2000; - mtspr(SPRN_HID2, hid2); - } - - /* ramping down, set voltage last */ - pmac_call_feature(PMAC_FTR_WRITE_GPIO, NULL, voltage_gpio, 0x04); - local_delay(10); - } - - return 0; -} - -static unsigned int cpu_750fx_get_cpu_speed(void) -{ - if (mfspr(SPRN_HID1) & HID1_PS) - return low_freq; - else - return hi_freq; -} - -/* Switch CPU speed using DFS */ -static int dfs_set_cpu_speed(int low_speed) -{ - if (low_speed == 0) { - /* ramping up, set voltage first */ - pmac_call_feature(PMAC_FTR_WRITE_GPIO, NULL, voltage_gpio, 0x05); - /* Make sure we sleep for at least 1ms */ - local_delay(1); - } - - /* set frequency */ -#ifdef CONFIG_6xx - low_choose_7447a_dfs(low_speed); -#endif - udelay(100); - - if (low_speed == 1) { - /* ramping down, set voltage last */ - pmac_call_feature(PMAC_FTR_WRITE_GPIO, NULL, voltage_gpio, 0x04); - local_delay(1); - } - - return 0; -} - -static unsigned int dfs_get_cpu_speed(void) -{ - if (mfspr(SPRN_HID1) & HID1_DFS) - return low_freq; - else - return hi_freq; -} - - -/* Switch CPU speed using slewing GPIOs - */ -static int gpios_set_cpu_speed(int low_speed) -{ - int gpio, timeout = 0; - - /* If ramping up, set voltage first */ - if (low_speed == 0) { - pmac_call_feature(PMAC_FTR_WRITE_GPIO, NULL, voltage_gpio, 0x05); - /* Delay is way too big but it's ok, we schedule */ - local_delay(10); - } - - /* Set frequency */ - gpio = pmac_call_feature(PMAC_FTR_READ_GPIO, NULL, frequency_gpio, 0); - if (low_speed == ((gpio & 0x01) == 0)) - goto skip; - - pmac_call_feature(PMAC_FTR_WRITE_GPIO, NULL, frequency_gpio, - low_speed ? 0x04 : 0x05); - udelay(200); - do { - if (++timeout > 100) - break; - local_delay(1); - gpio = pmac_call_feature(PMAC_FTR_READ_GPIO, NULL, slew_done_gpio, 0); - } while((gpio & 0x02) == 0); - skip: - /* If ramping down, set voltage last */ - if (low_speed == 1) { - pmac_call_feature(PMAC_FTR_WRITE_GPIO, NULL, voltage_gpio, 0x04); - /* Delay is way too big but it's ok, we schedule */ - local_delay(10); - } - -#ifdef DEBUG_FREQ - debug_calc_bogomips(); -#endif - - return 0; -} - -/* Switch CPU speed under PMU control - */ -static int pmu_set_cpu_speed(int low_speed) -{ - struct adb_request req; - unsigned long save_l2cr; - unsigned long save_l3cr; - unsigned int pic_prio; - unsigned long flags; - - preempt_disable(); - -#ifdef DEBUG_FREQ - printk(KERN_DEBUG "HID1, before: %x\n", mfspr(SPRN_HID1)); -#endif - pmu_suspend(); - - /* Disable all interrupt sources on openpic */ - pic_prio = mpic_cpu_get_priority(); - mpic_cpu_set_priority(0xf); - - /* Make sure the decrementer won't interrupt us */ - asm volatile("mtdec %0" : : "r" (0x7fffffff)); - /* Make sure any pending DEC interrupt occurring while we did - * the above didn't re-enable the DEC */ - mb(); - asm volatile("mtdec %0" : : "r" (0x7fffffff)); - - /* We can now disable MSR_EE */ - local_irq_save(flags); - - /* Giveup the FPU & vec */ - enable_kernel_fp(); - -#ifdef CONFIG_ALTIVEC - if (cpu_has_feature(CPU_FTR_ALTIVEC)) - enable_kernel_altivec(); -#endif /* CONFIG_ALTIVEC */ - - /* Save & disable L2 and L3 caches */ - save_l3cr = _get_L3CR(); /* (returns -1 if not available) */ - save_l2cr = _get_L2CR(); /* (returns -1 if not available) */ - - /* Send the new speed command. My assumption is that this command - * will cause PLL_CFG[0..3] to be changed next time CPU goes to sleep - */ - pmu_request(&req, NULL, 6, PMU_CPU_SPEED, 'W', 'O', 'O', 'F', low_speed); - while (!req.complete) - pmu_poll(); - - /* Prepare the northbridge for the speed transition */ - pmac_call_feature(PMAC_FTR_SLEEP_STATE,NULL,1,1); - - /* Call low level code to backup CPU state and recover from - * hardware reset - */ - low_sleep_handler(); - - /* Restore the northbridge */ - pmac_call_feature(PMAC_FTR_SLEEP_STATE,NULL,1,0); - - /* Restore L2 cache */ - if (save_l2cr != 0xffffffff && (save_l2cr & L2CR_L2E) != 0) - _set_L2CR(save_l2cr); - /* Restore L3 cache */ - if (save_l3cr != 0xffffffff && (save_l3cr & L3CR_L3E) != 0) - _set_L3CR(save_l3cr); - - /* Restore userland MMU context */ - switch_mmu_context(NULL, current->active_mm); - -#ifdef DEBUG_FREQ - printk(KERN_DEBUG "HID1, after: %x\n", mfspr(SPRN_HID1)); -#endif - - /* Restore low level PMU operations */ - pmu_unlock(); - - /* - * Restore decrementer; we'll take a decrementer interrupt - * as soon as interrupts are re-enabled and the generic - * clockevents code will reprogram it with the right value. - */ - set_dec(1); - - /* Restore interrupts */ - mpic_cpu_set_priority(pic_prio); - - /* Let interrupts flow again ... */ - local_irq_restore(flags); - -#ifdef DEBUG_FREQ - debug_calc_bogomips(); -#endif - - pmu_resume(); - - preempt_enable(); - - return 0; -} - -static int do_set_cpu_speed(struct cpufreq_policy *policy, int speed_mode, - int notify) -{ - struct cpufreq_freqs freqs; - unsigned long l3cr; - static unsigned long prev_l3cr; - - freqs.old = cur_freq; - freqs.new = (speed_mode == CPUFREQ_HIGH) ? hi_freq : low_freq; - - if (freqs.old == freqs.new) - return 0; - - if (notify) - cpufreq_notify_transition(policy, &freqs, CPUFREQ_PRECHANGE); - if (speed_mode == CPUFREQ_LOW && - cpu_has_feature(CPU_FTR_L3CR)) { - l3cr = _get_L3CR(); - if (l3cr & L3CR_L3E) { - prev_l3cr = l3cr; - _set_L3CR(0); - } - } - set_speed_proc(speed_mode == CPUFREQ_LOW); - if (speed_mode == CPUFREQ_HIGH && - cpu_has_feature(CPU_FTR_L3CR)) { - l3cr = _get_L3CR(); - if ((prev_l3cr & L3CR_L3E) && l3cr != prev_l3cr) - _set_L3CR(prev_l3cr); - } - if (notify) - cpufreq_notify_transition(policy, &freqs, CPUFREQ_POSTCHANGE); - cur_freq = (speed_mode == CPUFREQ_HIGH) ? hi_freq : low_freq; - - return 0; -} - -static unsigned int pmac_cpufreq_get_speed(unsigned int cpu) -{ - return cur_freq; -} - -static int pmac_cpufreq_verify(struct cpufreq_policy *policy) -{ - return cpufreq_frequency_table_verify(policy, pmac_cpu_freqs); -} - -static int pmac_cpufreq_target( struct cpufreq_policy *policy, - unsigned int target_freq, - unsigned int relation) -{ - unsigned int newstate = 0; - int rc; - - if (cpufreq_frequency_table_target(policy, pmac_cpu_freqs, - target_freq, relation, &newstate)) - return -EINVAL; - - rc = do_set_cpu_speed(policy, newstate, 1); - - ppc_proc_freq = cur_freq * 1000ul; - return rc; -} - -static int pmac_cpufreq_cpu_init(struct cpufreq_policy *policy) -{ - if (policy->cpu != 0) - return -ENODEV; - - policy->cpuinfo.transition_latency = transition_latency; - policy->cur = cur_freq; - - cpufreq_frequency_table_get_attr(pmac_cpu_freqs, policy->cpu); - return cpufreq_frequency_table_cpuinfo(policy, pmac_cpu_freqs); -} - -static u32 read_gpio(struct device_node *np) -{ - const u32 *reg = of_get_property(np, "reg", NULL); - u32 offset; - - if (reg == NULL) - return 0; - /* That works for all keylargos but shall be fixed properly - * some day... The problem is that it seems we can't rely - * on the "reg" property of the GPIO nodes, they are either - * relative to the base of KeyLargo or to the base of the - * GPIO space, and the device-tree doesn't help. - */ - offset = *reg; - if (offset < KEYLARGO_GPIO_LEVELS0) - offset += KEYLARGO_GPIO_LEVELS0; - return offset; -} - -static int pmac_cpufreq_suspend(struct cpufreq_policy *policy) -{ - /* Ok, this could be made a bit smarter, but let's be robust for now. We - * always force a speed change to high speed before sleep, to make sure - * we have appropriate voltage and/or bus speed for the wakeup process, - * and to make sure our loops_per_jiffies are "good enough", that is will - * not cause too short delays if we sleep in low speed and wake in high - * speed.. - */ - no_schedule = 1; - sleep_freq = cur_freq; - if (cur_freq == low_freq && !is_pmu_based) - do_set_cpu_speed(policy, CPUFREQ_HIGH, 0); - return 0; -} - -static int pmac_cpufreq_resume(struct cpufreq_policy *policy) -{ - /* If we resume, first check if we have a get() function */ - if (get_speed_proc) - cur_freq = get_speed_proc(); - else - cur_freq = 0; - - /* We don't, hrm... we don't really know our speed here, best - * is that we force a switch to whatever it was, which is - * probably high speed due to our suspend() routine - */ - do_set_cpu_speed(policy, sleep_freq == low_freq ? - CPUFREQ_LOW : CPUFREQ_HIGH, 0); - - ppc_proc_freq = cur_freq * 1000ul; - - no_schedule = 0; - return 0; -} - -static struct cpufreq_driver pmac_cpufreq_driver = { - .verify = pmac_cpufreq_verify, - .target = pmac_cpufreq_target, - .get = pmac_cpufreq_get_speed, - .init = pmac_cpufreq_cpu_init, - .suspend = pmac_cpufreq_suspend, - .resume = pmac_cpufreq_resume, - .flags = CPUFREQ_PM_NO_WARN, - .attr = pmac_cpu_freqs_attr, - .name = "powermac", - .owner = THIS_MODULE, -}; - - -static int pmac_cpufreq_init_MacRISC3(struct device_node *cpunode) -{ - struct device_node *volt_gpio_np = of_find_node_by_name(NULL, - "voltage-gpio"); - struct device_node *freq_gpio_np = of_find_node_by_name(NULL, - "frequency-gpio"); - struct device_node *slew_done_gpio_np = of_find_node_by_name(NULL, - "slewing-done"); - const u32 *value; - - /* - * Check to see if it's GPIO driven or PMU only - * - * The way we extract the GPIO address is slightly hackish, but it - * works well enough for now. We need to abstract the whole GPIO - * stuff sooner or later anyway - */ - - if (volt_gpio_np) - voltage_gpio = read_gpio(volt_gpio_np); - if (freq_gpio_np) - frequency_gpio = read_gpio(freq_gpio_np); - if (slew_done_gpio_np) - slew_done_gpio = read_gpio(slew_done_gpio_np); - - /* If we use the frequency GPIOs, calculate the min/max speeds based - * on the bus frequencies - */ - if (frequency_gpio && slew_done_gpio) { - int lenp, rc; - const u32 *freqs, *ratio; - - freqs = of_get_property(cpunode, "bus-frequencies", &lenp); - lenp /= sizeof(u32); - if (freqs == NULL || lenp != 2) { - printk(KERN_ERR "cpufreq: bus-frequencies incorrect or missing\n"); - return 1; - } - ratio = of_get_property(cpunode, "processor-to-bus-ratio*2", - NULL); - if (ratio == NULL) { - printk(KERN_ERR "cpufreq: processor-to-bus-ratio*2 missing\n"); - return 1; - } - - /* Get the min/max bus frequencies */ - low_freq = min(freqs[0], freqs[1]); - hi_freq = max(freqs[0], freqs[1]); - - /* Grrrr.. It _seems_ that the device-tree is lying on the low bus - * frequency, it claims it to be around 84Mhz on some models while - * it appears to be approx. 101Mhz on all. Let's hack around here... - * fortunately, we don't need to be too precise - */ - if (low_freq < 98000000) - low_freq = 101000000; - - /* Convert those to CPU core clocks */ - low_freq = (low_freq * (*ratio)) / 2000; - hi_freq = (hi_freq * (*ratio)) / 2000; - - /* Now we get the frequencies, we read the GPIO to see what is out current - * speed - */ - rc = pmac_call_feature(PMAC_FTR_READ_GPIO, NULL, frequency_gpio, 0); - cur_freq = (rc & 0x01) ? hi_freq : low_freq; - - set_speed_proc = gpios_set_cpu_speed; - return 1; - } - - /* If we use the PMU, look for the min & max frequencies in the - * device-tree - */ - value = of_get_property(cpunode, "min-clock-frequency", NULL); - if (!value) - return 1; - low_freq = (*value) / 1000; - /* The PowerBook G4 12" (PowerBook6,1) has an error in the device-tree - * here */ - if (low_freq < 100000) - low_freq *= 10; - - value = of_get_property(cpunode, "max-clock-frequency", NULL); - if (!value) - return 1; - hi_freq = (*value) / 1000; - set_speed_proc = pmu_set_cpu_speed; - is_pmu_based = 1; - - return 0; -} - -static int pmac_cpufreq_init_7447A(struct device_node *cpunode) -{ - struct device_node *volt_gpio_np; - - if (of_get_property(cpunode, "dynamic-power-step", NULL) == NULL) - return 1; - - volt_gpio_np = of_find_node_by_name(NULL, "cpu-vcore-select"); - if (volt_gpio_np) - voltage_gpio = read_gpio(volt_gpio_np); - if (!voltage_gpio){ - printk(KERN_ERR "cpufreq: missing cpu-vcore-select gpio\n"); - return 1; - } - - /* OF only reports the high frequency */ - hi_freq = cur_freq; - low_freq = cur_freq/2; - - /* Read actual frequency from CPU */ - cur_freq = dfs_get_cpu_speed(); - set_speed_proc = dfs_set_cpu_speed; - get_speed_proc = dfs_get_cpu_speed; - - return 0; -} - -static int pmac_cpufreq_init_750FX(struct device_node *cpunode) -{ - struct device_node *volt_gpio_np; - u32 pvr; - const u32 *value; - - if (of_get_property(cpunode, "dynamic-power-step", NULL) == NULL) - return 1; - - hi_freq = cur_freq; - value = of_get_property(cpunode, "reduced-clock-frequency", NULL); - if (!value) - return 1; - low_freq = (*value) / 1000; - - volt_gpio_np = of_find_node_by_name(NULL, "cpu-vcore-select"); - if (volt_gpio_np) - voltage_gpio = read_gpio(volt_gpio_np); - - pvr = mfspr(SPRN_PVR); - has_cpu_l2lve = !((pvr & 0xf00) == 0x100); - - set_speed_proc = cpu_750fx_cpu_speed; - get_speed_proc = cpu_750fx_get_cpu_speed; - cur_freq = cpu_750fx_get_cpu_speed(); - - return 0; -} - -/* Currently, we support the following machines: - * - * - Titanium PowerBook 1Ghz (PMU based, 667Mhz & 1Ghz) - * - Titanium PowerBook 800 (PMU based, 667Mhz & 800Mhz) - * - Titanium PowerBook 400 (PMU based, 300Mhz & 400Mhz) - * - Titanium PowerBook 500 (PMU based, 300Mhz & 500Mhz) - * - iBook2 500/600 (PMU based, 400Mhz & 500/600Mhz) - * - iBook2 700 (CPU based, 400Mhz & 700Mhz, support low voltage) - * - Recent MacRISC3 laptops - * - All new machines with 7447A CPUs - */ -static int __init pmac_cpufreq_setup(void) -{ - struct device_node *cpunode; - const u32 *value; - - if (strstr(cmd_line, "nocpufreq")) - return 0; - - /* Assume only one CPU */ - cpunode = of_find_node_by_type(NULL, "cpu"); - if (!cpunode) - goto out; - - /* Get current cpu clock freq */ - value = of_get_property(cpunode, "clock-frequency", NULL); - if (!value) - goto out; - cur_freq = (*value) / 1000; - transition_latency = CPUFREQ_ETERNAL; - - /* Check for 7447A based MacRISC3 */ - if (of_machine_is_compatible("MacRISC3") && - of_get_property(cpunode, "dynamic-power-step", NULL) && - PVR_VER(mfspr(SPRN_PVR)) == 0x8003) { - pmac_cpufreq_init_7447A(cpunode); - transition_latency = 8000000; - /* Check for other MacRISC3 machines */ - } else if (of_machine_is_compatible("PowerBook3,4") || - of_machine_is_compatible("PowerBook3,5") || - of_machine_is_compatible("MacRISC3")) { - pmac_cpufreq_init_MacRISC3(cpunode); - /* Else check for iBook2 500/600 */ - } else if (of_machine_is_compatible("PowerBook4,1")) { - hi_freq = cur_freq; - low_freq = 400000; - set_speed_proc = pmu_set_cpu_speed; - is_pmu_based = 1; - } - /* Else check for TiPb 550 */ - else if (of_machine_is_compatible("PowerBook3,3") && cur_freq == 550000) { - hi_freq = cur_freq; - low_freq = 500000; - set_speed_proc = pmu_set_cpu_speed; - is_pmu_based = 1; - } - /* Else check for TiPb 400 & 500 */ - else if (of_machine_is_compatible("PowerBook3,2")) { - /* We only know about the 400 MHz and the 500Mhz model - * they both have 300 MHz as low frequency - */ - if (cur_freq < 350000 || cur_freq > 550000) - goto out; - hi_freq = cur_freq; - low_freq = 300000; - set_speed_proc = pmu_set_cpu_speed; - is_pmu_based = 1; - } - /* Else check for 750FX */ - else if (PVR_VER(mfspr(SPRN_PVR)) == 0x7000) - pmac_cpufreq_init_750FX(cpunode); -out: - of_node_put(cpunode); - if (set_speed_proc == NULL) - return -ENODEV; - - pmac_cpu_freqs[CPUFREQ_LOW].frequency = low_freq; - pmac_cpu_freqs[CPUFREQ_HIGH].frequency = hi_freq; - ppc_proc_freq = cur_freq * 1000ul; - - printk(KERN_INFO "Registering PowerMac CPU frequency driver\n"); - printk(KERN_INFO "Low: %d Mhz, High: %d Mhz, Boot: %d Mhz\n", - low_freq/1000, hi_freq/1000, cur_freq/1000); - - return cpufreq_register_driver(&pmac_cpufreq_driver); -} - -module_init(pmac_cpufreq_setup); - diff --git a/arch/powerpc/platforms/powermac/cpufreq_64.c b/arch/powerpc/platforms/powermac/cpufreq_64.c deleted file mode 100644 index 7ba423431cf..00000000000 --- a/arch/powerpc/platforms/powermac/cpufreq_64.c +++ /dev/null @@ -1,746 +0,0 @@ -/* - * Copyright (C) 2002 - 2005 Benjamin Herrenschmidt <benh@kernel.crashing.org> - * and Markus Demleitner <msdemlei@cl.uni-heidelberg.de> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This driver adds basic cpufreq support for SMU & 970FX based G5 Macs, - * that is iMac G5 and latest single CPU desktop. - */ - -#undef DEBUG - -#include <linux/module.h> -#include <linux/types.h> -#include <linux/errno.h> -#include <linux/kernel.h> -#include <linux/delay.h> -#include <linux/sched.h> -#include <linux/cpufreq.h> -#include <linux/init.h> -#include <linux/completion.h> -#include <linux/mutex.h> -#include <asm/prom.h> -#include <asm/machdep.h> -#include <asm/irq.h> -#include <asm/sections.h> -#include <asm/cputable.h> -#include <asm/time.h> -#include <asm/smu.h> -#include <asm/pmac_pfunc.h> - -#define DBG(fmt...) pr_debug(fmt) - -/* see 970FX user manual */ - -#define SCOM_PCR 0x0aa001 /* PCR scom addr */ - -#define PCR_HILO_SELECT 0x80000000U /* 1 = PCR, 0 = PCRH */ -#define PCR_SPEED_FULL 0x00000000U /* 1:1 speed value */ -#define PCR_SPEED_HALF 0x00020000U /* 1:2 speed value */ -#define PCR_SPEED_QUARTER 0x00040000U /* 1:4 speed value */ -#define PCR_SPEED_MASK 0x000e0000U /* speed mask */ -#define PCR_SPEED_SHIFT 17 -#define PCR_FREQ_REQ_VALID 0x00010000U /* freq request valid */ -#define PCR_VOLT_REQ_VALID 0x00008000U /* volt request valid */ -#define PCR_TARGET_TIME_MASK 0x00006000U /* target time */ -#define PCR_STATLAT_MASK 0x00001f00U /* STATLAT value */ -#define PCR_SNOOPLAT_MASK 0x000000f0U /* SNOOPLAT value */ -#define PCR_SNOOPACC_MASK 0x0000000fU /* SNOOPACC value */ - -#define SCOM_PSR 0x408001 /* PSR scom addr */ -/* warning: PSR is a 64 bits register */ -#define PSR_CMD_RECEIVED 0x2000000000000000U /* command received */ -#define PSR_CMD_COMPLETED 0x1000000000000000U /* command completed */ -#define PSR_CUR_SPEED_MASK 0x0300000000000000U /* current speed */ -#define PSR_CUR_SPEED_SHIFT (56) - -/* - * The G5 only supports two frequencies (Quarter speed is not supported) - */ -#define CPUFREQ_HIGH 0 -#define CPUFREQ_LOW 1 - -static struct cpufreq_frequency_table g5_cpu_freqs[] = { - {CPUFREQ_HIGH, 0}, - {CPUFREQ_LOW, 0}, - {0, CPUFREQ_TABLE_END}, -}; - -static struct freq_attr* g5_cpu_freqs_attr[] = { - &cpufreq_freq_attr_scaling_available_freqs, - NULL, -}; - -/* Power mode data is an array of the 32 bits PCR values to use for - * the various frequencies, retrieved from the device-tree - */ -static int g5_pmode_cur; - -static void (*g5_switch_volt)(int speed_mode); -static int (*g5_switch_freq)(int speed_mode); -static int (*g5_query_freq)(void); - -static DEFINE_MUTEX(g5_switch_mutex); - -static unsigned long transition_latency; - -#ifdef CONFIG_PMAC_SMU - -static const u32 *g5_pmode_data; -static int g5_pmode_max; - -static struct smu_sdbp_fvt *g5_fvt_table; /* table of op. points */ -static int g5_fvt_count; /* number of op. points */ -static int g5_fvt_cur; /* current op. point */ - -/* - * SMU based voltage switching for Neo2 platforms - */ - -static void g5_smu_switch_volt(int speed_mode) -{ - struct smu_simple_cmd cmd; - - DECLARE_COMPLETION_ONSTACK(comp); - smu_queue_simple(&cmd, SMU_CMD_POWER_COMMAND, 8, smu_done_complete, - &comp, 'V', 'S', 'L', 'E', 'W', - 0xff, g5_fvt_cur+1, speed_mode); - wait_for_completion(&comp); -} - -/* - * Platform function based voltage/vdnap switching for Neo2 - */ - -static struct pmf_function *pfunc_set_vdnap0; -static struct pmf_function *pfunc_vdnap0_complete; - -static void g5_vdnap_switch_volt(int speed_mode) -{ - struct pmf_args args; - u32 slew, done = 0; - unsigned long timeout; - - slew = (speed_mode == CPUFREQ_LOW) ? 1 : 0; - args.count = 1; - args.u[0].p = &slew; - - pmf_call_one(pfunc_set_vdnap0, &args); - - /* It's an irq GPIO so we should be able to just block here, - * I'll do that later after I've properly tested the IRQ code for - * platform functions - */ - timeout = jiffies + HZ/10; - while(!time_after(jiffies, timeout)) { - args.count = 1; - args.u[0].p = &done; - pmf_call_one(pfunc_vdnap0_complete, &args); - if (done) - break; - msleep(1); - } - if (done == 0) - printk(KERN_WARNING "cpufreq: Timeout in clock slewing !\n"); -} - - -/* - * SCOM based frequency switching for 970FX rev3 - */ -static int g5_scom_switch_freq(int speed_mode) -{ - unsigned long flags; - int to; - - /* If frequency is going up, first ramp up the voltage */ - if (speed_mode < g5_pmode_cur) - g5_switch_volt(speed_mode); - - local_irq_save(flags); - - /* Clear PCR high */ - scom970_write(SCOM_PCR, 0); - /* Clear PCR low */ - scom970_write(SCOM_PCR, PCR_HILO_SELECT | 0); - /* Set PCR low */ - scom970_write(SCOM_PCR, PCR_HILO_SELECT | - g5_pmode_data[speed_mode]); - - /* Wait for completion */ - for (to = 0; to < 10; to++) { - unsigned long psr = scom970_read(SCOM_PSR); - - if ((psr & PSR_CMD_RECEIVED) == 0 && - (((psr >> PSR_CUR_SPEED_SHIFT) ^ - (g5_pmode_data[speed_mode] >> PCR_SPEED_SHIFT)) & 0x3) - == 0) - break; - if (psr & PSR_CMD_COMPLETED) - break; - udelay(100); - } - - local_irq_restore(flags); - - /* If frequency is going down, last ramp the voltage */ - if (speed_mode > g5_pmode_cur) - g5_switch_volt(speed_mode); - - g5_pmode_cur = speed_mode; - ppc_proc_freq = g5_cpu_freqs[speed_mode].frequency * 1000ul; - - return 0; -} - -static int g5_scom_query_freq(void) -{ - unsigned long psr = scom970_read(SCOM_PSR); - int i; - - for (i = 0; i <= g5_pmode_max; i++) - if ((((psr >> PSR_CUR_SPEED_SHIFT) ^ - (g5_pmode_data[i] >> PCR_SPEED_SHIFT)) & 0x3) == 0) - break; - return i; -} - -/* - * Fake voltage switching for platforms with missing support - */ - -static void g5_dummy_switch_volt(int speed_mode) -{ -} - -#endif /* CONFIG_PMAC_SMU */ - -/* - * Platform function based voltage switching for PowerMac7,2 & 7,3 - */ - -static struct pmf_function *pfunc_cpu0_volt_high; -static struct pmf_function *pfunc_cpu0_volt_low; -static struct pmf_function *pfunc_cpu1_volt_high; -static struct pmf_function *pfunc_cpu1_volt_low; - -static void g5_pfunc_switch_volt(int speed_mode) -{ - if (speed_mode == CPUFREQ_HIGH) { - if (pfunc_cpu0_volt_high) - pmf_call_one(pfunc_cpu0_volt_high, NULL); - if (pfunc_cpu1_volt_high) - pmf_call_one(pfunc_cpu1_volt_high, NULL); - } else { - if (pfunc_cpu0_volt_low) - pmf_call_one(pfunc_cpu0_volt_low, NULL); - if (pfunc_cpu1_volt_low) - pmf_call_one(pfunc_cpu1_volt_low, NULL); - } - msleep(10); /* should be faster , to fix */ -} - -/* - * Platform function based frequency switching for PowerMac7,2 & 7,3 - */ - -static struct pmf_function *pfunc_cpu_setfreq_high; -static struct pmf_function *pfunc_cpu_setfreq_low; -static struct pmf_function *pfunc_cpu_getfreq; -static struct pmf_function *pfunc_slewing_done; - -static int g5_pfunc_switch_freq(int speed_mode) -{ - struct pmf_args args; - u32 done = 0; - unsigned long timeout; - int rc; - - DBG("g5_pfunc_switch_freq(%d)\n", speed_mode); - - /* If frequency is going up, first ramp up the voltage */ - if (speed_mode < g5_pmode_cur) - g5_switch_volt(speed_mode); - - /* Do it */ - if (speed_mode == CPUFREQ_HIGH) - rc = pmf_call_one(pfunc_cpu_setfreq_high, NULL); - else - rc = pmf_call_one(pfunc_cpu_setfreq_low, NULL); - - if (rc) - printk(KERN_WARNING "cpufreq: pfunc switch error %d\n", rc); - - /* It's an irq GPIO so we should be able to just block here, - * I'll do that later after I've properly tested the IRQ code for - * platform functions - */ - timeout = jiffies + HZ/10; - while(!time_after(jiffies, timeout)) { - args.count = 1; - args.u[0].p = &done; - pmf_call_one(pfunc_slewing_done, &args); - if (done) - break; - msleep(1); - } - if (done == 0) - printk(KERN_WARNING "cpufreq: Timeout in clock slewing !\n"); - - /* If frequency is going down, last ramp the voltage */ - if (speed_mode > g5_pmode_cur) - g5_switch_volt(speed_mode); - - g5_pmode_cur = speed_mode; - ppc_proc_freq = g5_cpu_freqs[speed_mode].frequency * 1000ul; - - return 0; -} - -static int g5_pfunc_query_freq(void) -{ - struct pmf_args args; - u32 val = 0; - - args.count = 1; - args.u[0].p = &val; - pmf_call_one(pfunc_cpu_getfreq, &args); - return val ? CPUFREQ_HIGH : CPUFREQ_LOW; -} - - -/* - * Common interface to the cpufreq core - */ - -static int g5_cpufreq_verify(struct cpufreq_policy *policy) -{ - return cpufreq_frequency_table_verify(policy, g5_cpu_freqs); -} - -static int g5_cpufreq_target(struct cpufreq_policy *policy, - unsigned int target_freq, unsigned int relation) -{ - unsigned int newstate = 0; - struct cpufreq_freqs freqs; - int rc; - - if (cpufreq_frequency_table_target(policy, g5_cpu_freqs, - target_freq, relation, &newstate)) - return -EINVAL; - - if (g5_pmode_cur == newstate) - return 0; - - mutex_lock(&g5_switch_mutex); - - freqs.old = g5_cpu_freqs[g5_pmode_cur].frequency; - freqs.new = g5_cpu_freqs[newstate].frequency; - - cpufreq_notify_transition(policy, &freqs, CPUFREQ_PRECHANGE); - rc = g5_switch_freq(newstate); - cpufreq_notify_transition(policy, &freqs, CPUFREQ_POSTCHANGE); - - mutex_unlock(&g5_switch_mutex); - - return rc; -} - -static unsigned int g5_cpufreq_get_speed(unsigned int cpu) -{ - return g5_cpu_freqs[g5_pmode_cur].frequency; -} - -static int g5_cpufreq_cpu_init(struct cpufreq_policy *policy) -{ - policy->cpuinfo.transition_latency = transition_latency; - policy->cur = g5_cpu_freqs[g5_query_freq()].frequency; - /* secondary CPUs are tied to the primary one by the - * cpufreq core if in the secondary policy we tell it that - * it actually must be one policy together with all others. */ - cpumask_copy(policy->cpus, cpu_online_mask); - cpufreq_frequency_table_get_attr(g5_cpu_freqs, policy->cpu); - - return cpufreq_frequency_table_cpuinfo(policy, - g5_cpu_freqs); -} - - -static struct cpufreq_driver g5_cpufreq_driver = { - .name = "powermac", - .owner = THIS_MODULE, - .flags = CPUFREQ_CONST_LOOPS, - .init = g5_cpufreq_cpu_init, - .verify = g5_cpufreq_verify, - .target = g5_cpufreq_target, - .get = g5_cpufreq_get_speed, - .attr = g5_cpu_freqs_attr, -}; - - -#ifdef CONFIG_PMAC_SMU - -static int __init g5_neo2_cpufreq_init(struct device_node *cpus) -{ - struct device_node *cpunode; - unsigned int psize, ssize; - unsigned long max_freq; - char *freq_method, *volt_method; - const u32 *valp; - u32 pvr_hi; - int use_volts_vdnap = 0; - int use_volts_smu = 0; - int rc = -ENODEV; - - /* Check supported platforms */ - if (of_machine_is_compatible("PowerMac8,1") || - of_machine_is_compatible("PowerMac8,2") || - of_machine_is_compatible("PowerMac9,1")) - use_volts_smu = 1; - else if (of_machine_is_compatible("PowerMac11,2")) - use_volts_vdnap = 1; - else - return -ENODEV; - - /* Get first CPU node */ - for (cpunode = NULL; - (cpunode = of_get_next_child(cpus, cpunode)) != NULL;) { - const u32 *reg = of_get_property(cpunode, "reg", NULL); - if (reg == NULL || (*reg) != 0) - continue; - if (!strcmp(cpunode->type, "cpu")) - break; - } - if (cpunode == NULL) { - printk(KERN_ERR "cpufreq: Can't find any CPU 0 node\n"); - return -ENODEV; - } - - /* Check 970FX for now */ - valp = of_get_property(cpunode, "cpu-version", NULL); - if (!valp) { - DBG("No cpu-version property !\n"); - goto bail_noprops; - } - pvr_hi = (*valp) >> 16; - if (pvr_hi != 0x3c && pvr_hi != 0x44) { - printk(KERN_ERR "cpufreq: Unsupported CPU version\n"); - goto bail_noprops; - } - - /* Look for the powertune data in the device-tree */ - g5_pmode_data = of_get_property(cpunode, "power-mode-data",&psize); - if (!g5_pmode_data) { - DBG("No power-mode-data !\n"); - goto bail_noprops; - } - g5_pmode_max = psize / sizeof(u32) - 1; - - if (use_volts_smu) { - const struct smu_sdbp_header *shdr; - - /* Look for the FVT table */ - shdr = smu_get_sdb_partition(SMU_SDB_FVT_ID, NULL); - if (!shdr) - goto bail_noprops; - g5_fvt_table = (struct smu_sdbp_fvt *)&shdr[1]; - ssize = (shdr->len * sizeof(u32)) - - sizeof(struct smu_sdbp_header); - g5_fvt_count = ssize / sizeof(struct smu_sdbp_fvt); - g5_fvt_cur = 0; - - /* Sanity checking */ - if (g5_fvt_count < 1 || g5_pmode_max < 1) - goto bail_noprops; - - g5_switch_volt = g5_smu_switch_volt; - volt_method = "SMU"; - } else if (use_volts_vdnap) { - struct device_node *root; - - root = of_find_node_by_path("/"); - if (root == NULL) { - printk(KERN_ERR "cpufreq: Can't find root of " - "device tree\n"); - goto bail_noprops; - } - pfunc_set_vdnap0 = pmf_find_function(root, "set-vdnap0"); - pfunc_vdnap0_complete = - pmf_find_function(root, "slewing-done"); - if (pfunc_set_vdnap0 == NULL || - pfunc_vdnap0_complete == NULL) { - printk(KERN_ERR "cpufreq: Can't find required " - "platform function\n"); - goto bail_noprops; - } - - g5_switch_volt = g5_vdnap_switch_volt; - volt_method = "GPIO"; - } else { - g5_switch_volt = g5_dummy_switch_volt; - volt_method = "none"; - } - - /* - * From what I see, clock-frequency is always the maximal frequency. - * The current driver can not slew sysclk yet, so we really only deal - * with powertune steps for now. We also only implement full freq and - * half freq in this version. So far, I haven't yet seen a machine - * supporting anything else. - */ - valp = of_get_property(cpunode, "clock-frequency", NULL); - if (!valp) - return -ENODEV; - max_freq = (*valp)/1000; - g5_cpu_freqs[0].frequency = max_freq; - g5_cpu_freqs[1].frequency = max_freq/2; - - /* Set callbacks */ - transition_latency = 12000; - g5_switch_freq = g5_scom_switch_freq; - g5_query_freq = g5_scom_query_freq; - freq_method = "SCOM"; - - /* Force apply current frequency to make sure everything is in - * sync (voltage is right for example). Firmware may leave us with - * a strange setting ... - */ - g5_switch_volt(CPUFREQ_HIGH); - msleep(10); - g5_pmode_cur = -1; - g5_switch_freq(g5_query_freq()); - - printk(KERN_INFO "Registering G5 CPU frequency driver\n"); - printk(KERN_INFO "Frequency method: %s, Voltage method: %s\n", - freq_method, volt_method); - printk(KERN_INFO "Low: %d Mhz, High: %d Mhz, Cur: %d MHz\n", - g5_cpu_freqs[1].frequency/1000, - g5_cpu_freqs[0].frequency/1000, - g5_cpu_freqs[g5_pmode_cur].frequency/1000); - - rc = cpufreq_register_driver(&g5_cpufreq_driver); - - /* We keep the CPU node on hold... hopefully, Apple G5 don't have - * hotplug CPU with a dynamic device-tree ... - */ - return rc; - - bail_noprops: - of_node_put(cpunode); - - return rc; -} - -#endif /* CONFIG_PMAC_SMU */ - - -static int __init g5_pm72_cpufreq_init(struct device_node *cpus) -{ - struct device_node *cpuid = NULL, *hwclock = NULL, *cpunode = NULL; - const u8 *eeprom = NULL; - const u32 *valp; - u64 max_freq, min_freq, ih, il; - int has_volt = 1, rc = 0; - - DBG("cpufreq: Initializing for PowerMac7,2, PowerMac7,3 and" - " RackMac3,1...\n"); - - /* Get first CPU node */ - for (cpunode = NULL; - (cpunode = of_get_next_child(cpus, cpunode)) != NULL;) { - if (!strcmp(cpunode->type, "cpu")) - break; - } - if (cpunode == NULL) { - printk(KERN_ERR "cpufreq: Can't find any CPU node\n"); - return -ENODEV; - } - - /* Lookup the cpuid eeprom node */ - cpuid = of_find_node_by_path("/u3@0,f8000000/i2c@f8001000/cpuid@a0"); - if (cpuid != NULL) - eeprom = of_get_property(cpuid, "cpuid", NULL); - if (eeprom == NULL) { - printk(KERN_ERR "cpufreq: Can't find cpuid EEPROM !\n"); - rc = -ENODEV; - goto bail; - } - - /* Lookup the i2c hwclock */ - for (hwclock = NULL; - (hwclock = of_find_node_by_name(hwclock, "i2c-hwclock")) != NULL;){ - const char *loc = of_get_property(hwclock, - "hwctrl-location", NULL); - if (loc == NULL) - continue; - if (strcmp(loc, "CPU CLOCK")) - continue; - if (!of_get_property(hwclock, "platform-get-frequency", NULL)) - continue; - break; - } - if (hwclock == NULL) { - printk(KERN_ERR "cpufreq: Can't find i2c clock chip !\n"); - rc = -ENODEV; - goto bail; - } - - DBG("cpufreq: i2c clock chip found: %s\n", hwclock->full_name); - - /* Now get all the platform functions */ - pfunc_cpu_getfreq = - pmf_find_function(hwclock, "get-frequency"); - pfunc_cpu_setfreq_high = - pmf_find_function(hwclock, "set-frequency-high"); - pfunc_cpu_setfreq_low = - pmf_find_function(hwclock, "set-frequency-low"); - pfunc_slewing_done = - pmf_find_function(hwclock, "slewing-done"); - pfunc_cpu0_volt_high = - pmf_find_function(hwclock, "set-voltage-high-0"); - pfunc_cpu0_volt_low = - pmf_find_function(hwclock, "set-voltage-low-0"); - pfunc_cpu1_volt_high = - pmf_find_function(hwclock, "set-voltage-high-1"); - pfunc_cpu1_volt_low = - pmf_find_function(hwclock, "set-voltage-low-1"); - - /* Check we have minimum requirements */ - if (pfunc_cpu_getfreq == NULL || pfunc_cpu_setfreq_high == NULL || - pfunc_cpu_setfreq_low == NULL || pfunc_slewing_done == NULL) { - printk(KERN_ERR "cpufreq: Can't find platform functions !\n"); - rc = -ENODEV; - goto bail; - } - - /* Check that we have complete sets */ - if (pfunc_cpu0_volt_high == NULL || pfunc_cpu0_volt_low == NULL) { - pmf_put_function(pfunc_cpu0_volt_high); - pmf_put_function(pfunc_cpu0_volt_low); - pfunc_cpu0_volt_high = pfunc_cpu0_volt_low = NULL; - has_volt = 0; - } - if (!has_volt || - pfunc_cpu1_volt_high == NULL || pfunc_cpu1_volt_low == NULL) { - pmf_put_function(pfunc_cpu1_volt_high); - pmf_put_function(pfunc_cpu1_volt_low); - pfunc_cpu1_volt_high = pfunc_cpu1_volt_low = NULL; - } - - /* Note: The device tree also contains a "platform-set-values" - * function for which I haven't quite figured out the usage. It - * might have to be called on init and/or wakeup, I'm not too sure - * but things seem to work fine without it so far ... - */ - - /* Get max frequency from device-tree */ - valp = of_get_property(cpunode, "clock-frequency", NULL); - if (!valp) { - printk(KERN_ERR "cpufreq: Can't find CPU frequency !\n"); - rc = -ENODEV; - goto bail; - } - - max_freq = (*valp)/1000; - - /* Now calculate reduced frequency by using the cpuid input freq - * ratio. This requires 64 bits math unless we are willing to lose - * some precision - */ - ih = *((u32 *)(eeprom + 0x10)); - il = *((u32 *)(eeprom + 0x20)); - - /* Check for machines with no useful settings */ - if (il == ih) { - printk(KERN_WARNING "cpufreq: No low frequency mode available" - " on this model !\n"); - rc = -ENODEV; - goto bail; - } - - min_freq = 0; - if (ih != 0 && il != 0) - min_freq = (max_freq * il) / ih; - - /* Sanity check */ - if (min_freq >= max_freq || min_freq < 1000) { - printk(KERN_ERR "cpufreq: Can't calculate low frequency !\n"); - rc = -ENXIO; - goto bail; - } - g5_cpu_freqs[0].frequency = max_freq; - g5_cpu_freqs[1].frequency = min_freq; - - /* Set callbacks */ - transition_latency = CPUFREQ_ETERNAL; - g5_switch_volt = g5_pfunc_switch_volt; - g5_switch_freq = g5_pfunc_switch_freq; - g5_query_freq = g5_pfunc_query_freq; - - /* Force apply current frequency to make sure everything is in - * sync (voltage is right for example). Firmware may leave us with - * a strange setting ... - */ - g5_switch_volt(CPUFREQ_HIGH); - msleep(10); - g5_pmode_cur = -1; - g5_switch_freq(g5_query_freq()); - - printk(KERN_INFO "Registering G5 CPU frequency driver\n"); - printk(KERN_INFO "Frequency method: i2c/pfunc, " - "Voltage method: %s\n", has_volt ? "i2c/pfunc" : "none"); - printk(KERN_INFO "Low: %d Mhz, High: %d Mhz, Cur: %d MHz\n", - g5_cpu_freqs[1].frequency/1000, - g5_cpu_freqs[0].frequency/1000, - g5_cpu_freqs[g5_pmode_cur].frequency/1000); - - rc = cpufreq_register_driver(&g5_cpufreq_driver); - bail: - if (rc != 0) { - pmf_put_function(pfunc_cpu_getfreq); - pmf_put_function(pfunc_cpu_setfreq_high); - pmf_put_function(pfunc_cpu_setfreq_low); - pmf_put_function(pfunc_slewing_done); - pmf_put_function(pfunc_cpu0_volt_high); - pmf_put_function(pfunc_cpu0_volt_low); - pmf_put_function(pfunc_cpu1_volt_high); - pmf_put_function(pfunc_cpu1_volt_low); - } - of_node_put(hwclock); - of_node_put(cpuid); - of_node_put(cpunode); - - return rc; -} - -static int __init g5_cpufreq_init(void) -{ - struct device_node *cpus; - int rc = 0; - - cpus = of_find_node_by_path("/cpus"); - if (cpus == NULL) { - DBG("No /cpus node !\n"); - return -ENODEV; - } - - if (of_machine_is_compatible("PowerMac7,2") || - of_machine_is_compatible("PowerMac7,3") || - of_machine_is_compatible("RackMac3,1")) - rc = g5_pm72_cpufreq_init(cpus); -#ifdef CONFIG_PMAC_SMU - else - rc = g5_neo2_cpufreq_init(cpus); -#endif /* CONFIG_PMAC_SMU */ - - of_node_put(cpus); - return rc; -} - -module_init(g5_cpufreq_init); - - -MODULE_LICENSE("GPL"); diff --git a/arch/powerpc/platforms/powernv/Kconfig b/arch/powerpc/platforms/powernv/Kconfig index d3e840d643a..c24684c818a 100644 --- a/arch/powerpc/platforms/powernv/Kconfig +++ b/arch/powerpc/platforms/powernv/Kconfig @@ -6,6 +6,7 @@ config PPC_POWERNV select PPC_ICP_NATIVE select PPC_P7_NAP select PPC_PCI_CHOICE if EMBEDDED + select EPAPR_BOOT default y config POWERNV_MSI diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index ade4463226c..628c564cead 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -15,6 +15,7 @@ #include <linux/of.h> #include <linux/of_platform.h> #include <linux/interrupt.h> +#include <linux/slab.h> #include <asm/opal.h> #include <asm/firmware.h> @@ -28,6 +29,8 @@ struct opal { static struct device_node *opal_node; static DEFINE_SPINLOCK(opal_write_lock); extern u64 opal_mc_secondary_handler[]; +static unsigned int *opal_irqs; +static unsigned int opal_irq_count; int __init early_init_dt_scan_opal(unsigned long node, const char *uname, int depth, void *data) @@ -53,7 +56,11 @@ int __init early_init_dt_scan_opal(unsigned long node, opal.entry, entryp, entrysz); powerpc_firmware_features |= FW_FEATURE_OPAL; - if (of_flat_dt_is_compatible(node, "ibm,opal-v2")) { + if (of_flat_dt_is_compatible(node, "ibm,opal-v3")) { + powerpc_firmware_features |= FW_FEATURE_OPALv2; + powerpc_firmware_features |= FW_FEATURE_OPALv3; + printk("OPAL V3 detected !\n"); + } else if (of_flat_dt_is_compatible(node, "ibm,opal-v2")) { powerpc_firmware_features |= FW_FEATURE_OPALv2; printk("OPAL V2 detected !\n"); } else { @@ -144,6 +151,13 @@ int opal_put_chars(uint32_t vtermno, const char *data, int total_len) rc == OPAL_BUSY_EVENT || rc == OPAL_SUCCESS)) { len = total_len; rc = opal_console_write(vtermno, &len, data); + + /* Closed or other error drop */ + if (rc != OPAL_SUCCESS && rc != OPAL_BUSY && + rc != OPAL_BUSY_EVENT) { + written = total_len; + break; + } if (rc == OPAL_SUCCESS) { total_len -= len; data += len; @@ -316,6 +330,8 @@ static int __init opal_init(void) irqs = of_get_property(opal_node, "opal-interrupts", &irqlen); pr_debug("opal: Found %d interrupts reserved for OPAL\n", irqs ? (irqlen / 4) : 0); + opal_irq_count = irqlen / 4; + opal_irqs = kzalloc(opal_irq_count * sizeof(unsigned int), GFP_KERNEL); for (i = 0; irqs && i < (irqlen / 4); i++, irqs++) { unsigned int hwirq = be32_to_cpup(irqs); unsigned int irq = irq_create_mapping(NULL, hwirq); @@ -327,7 +343,19 @@ static int __init opal_init(void) if (rc) pr_warning("opal: Error %d requesting irq %d" " (0x%x)\n", rc, irq, hwirq); + opal_irqs[i] = irq; } return 0; } subsys_initcall(opal_init); + +void opal_shutdown(void) +{ + unsigned int i; + + for (i = 0; i < opal_irq_count; i++) { + if (opal_irqs[i]) + free_irq(opal_irqs[i], 0); + opal_irqs[i] = 0; + } +} diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 1da578b7c1b..9c9d15e4cdf 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -68,16 +68,6 @@ define_pe_printk_level(pe_err, KERN_ERR); define_pe_printk_level(pe_warn, KERN_WARNING); define_pe_printk_level(pe_info, KERN_INFO); -static struct pci_dn *pnv_ioda_get_pdn(struct pci_dev *dev) -{ - struct device_node *np; - - np = pci_device_to_OF_node(dev); - if (!np) - return NULL; - return PCI_DN(np); -} - static int pnv_ioda_alloc_pe(struct pnv_phb *phb) { unsigned long pe; @@ -110,7 +100,7 @@ static struct pnv_ioda_pe *pnv_ioda_get_pe(struct pci_dev *dev) { struct pci_controller *hose = pci_bus_to_host(dev->bus); struct pnv_phb *phb = hose->private_data; - struct pci_dn *pdn = pnv_ioda_get_pdn(dev); + struct pci_dn *pdn = pci_get_pdn(dev); if (!pdn) return NULL; @@ -173,7 +163,7 @@ static int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe) /* Add to all parents PELT-V */ while (parent) { - struct pci_dn *pdn = pnv_ioda_get_pdn(parent); + struct pci_dn *pdn = pci_get_pdn(parent); if (pdn && pdn->pe_number != IODA_INVALID_PE) { rc = opal_pci_set_peltv(phb->opal_id, pdn->pe_number, pe->pe_number, OPAL_ADD_PE_TO_DOMAIN); @@ -252,7 +242,7 @@ static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev) { struct pci_controller *hose = pci_bus_to_host(dev->bus); struct pnv_phb *phb = hose->private_data; - struct pci_dn *pdn = pnv_ioda_get_pdn(dev); + struct pci_dn *pdn = pci_get_pdn(dev); struct pnv_ioda_pe *pe; int pe_num; @@ -323,7 +313,7 @@ static void pnv_ioda_setup_same_PE(struct pci_bus *bus, struct pnv_ioda_pe *pe) struct pci_dev *dev; list_for_each_entry(dev, &bus->devices, bus_list) { - struct pci_dn *pdn = pnv_ioda_get_pdn(dev); + struct pci_dn *pdn = pci_get_pdn(dev); if (pdn == NULL) { pr_warn("%s: No device node associated with device !\n", @@ -436,7 +426,7 @@ static void pnv_pci_ioda_setup_PEs(void) static void pnv_pci_ioda_dma_dev_setup(struct pnv_phb *phb, struct pci_dev *pdev) { - struct pci_dn *pdn = pnv_ioda_get_pdn(pdev); + struct pci_dn *pdn = pci_get_pdn(pdev); struct pnv_ioda_pe *pe; /* @@ -768,6 +758,7 @@ static int pnv_pci_ioda_msi_setup(struct pnv_phb *phb, struct pci_dev *dev, unsigned int is_64, struct msi_msg *msg) { struct pnv_ioda_pe *pe = pnv_ioda_get_pe(dev); + struct pci_dn *pdn = pci_get_pdn(dev); struct irq_data *idata; struct irq_chip *ichip; unsigned int xive_num = hwirq - phb->msi_base; @@ -783,6 +774,10 @@ static int pnv_pci_ioda_msi_setup(struct pnv_phb *phb, struct pci_dev *dev, if (pe->mve_number < 0) return -ENXIO; + /* Force 32-bit MSI on some broken devices */ + if (pdn && pdn->force_32bit_msi) + is_64 = 0; + /* Assign XIVE to PE */ rc = opal_pci_set_xive_pe(phb->opal_id, pe->pe_number, xive_num); if (rc) { @@ -1035,7 +1030,7 @@ static int pnv_pci_enable_device_hook(struct pci_dev *dev) if (!phb->initialized) return 0; - pdn = pnv_ioda_get_pdn(dev); + pdn = pci_get_pdn(dev); if (!pdn || pdn->pe_number == IODA_INVALID_PE) return -EINVAL; @@ -1048,6 +1043,12 @@ static u32 pnv_ioda_bdfn_to_pe(struct pnv_phb *phb, struct pci_bus *bus, return phb->ioda.pe_rmap[(bus->number << 8) | devfn]; } +static void pnv_pci_ioda_shutdown(struct pnv_phb *phb) +{ + opal_pci_reset(phb->opal_id, OPAL_PCI_IODA_TABLE_RESET, + OPAL_ASSERT_RESET); +} + void __init pnv_pci_init_ioda_phb(struct device_node *np, int ioda_type) { struct pci_controller *hose; @@ -1178,6 +1179,9 @@ void __init pnv_pci_init_ioda_phb(struct device_node *np, int ioda_type) /* Setup TCEs */ phb->dma_dev_setup = pnv_pci_ioda_dma_dev_setup; + /* Setup shutdown function for kexec */ + phb->shutdown = pnv_pci_ioda_shutdown; + /* Setup MSI support */ pnv_pci_init_ioda_msis(phb); diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index 55dfca844dd..277343cc6a3 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -47,6 +47,10 @@ static int pnv_msi_check_device(struct pci_dev* pdev, int nvec, int type) { struct pci_controller *hose = pci_bus_to_host(pdev->bus); struct pnv_phb *phb = hose->private_data; + struct pci_dn *pdn = pci_get_pdn(pdev); + + if (pdn && pdn->force_32bit_msi && !phb->msi32_support) + return -ENODEV; return (phb && phb->msi_bmp.bitmap) ? 0 : -ENODEV; } @@ -367,7 +371,7 @@ static void pnv_tce_free(struct iommu_table *tbl, long index, long npages) while (npages--) *(tcep++) = 0; - if (tbl->it_type & TCE_PCI_SWINV_CREATE) + if (tbl->it_type & TCE_PCI_SWINV_FREE) pnv_pci_ioda_tce_invalidate(tbl, tces, tcep - 1); } @@ -450,6 +454,18 @@ static void pnv_pci_dma_dev_setup(struct pci_dev *pdev) pnv_pci_dma_fallback_setup(hose, pdev); } +void pnv_pci_shutdown(void) +{ + struct pci_controller *hose; + + list_for_each_entry(hose, &hose_list, list_node) { + struct pnv_phb *phb = hose->private_data; + + if (phb && phb->shutdown) + phb->shutdown(phb); + } +} + /* Fixup wrong class code in p7ioc and p8 root complex */ static void pnv_p7ioc_rc_quirk(struct pci_dev *dev) { diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index 48dc4bb856a..25d76c4df50 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -86,6 +86,7 @@ struct pnv_phb { void (*dma_dev_setup)(struct pnv_phb *phb, struct pci_dev *pdev); void (*fixup_phb)(struct pci_controller *hose); u32 (*bdfn_to_pe)(struct pnv_phb *phb, struct pci_bus *bus, u32 devfn); + void (*shutdown)(struct pnv_phb *phb); union { struct { @@ -158,4 +159,5 @@ extern void pnv_pci_init_ioda_hub(struct device_node *np); extern void pnv_pci_init_ioda2_phb(struct device_node *np); extern void pnv_pci_ioda_tce_invalidate(struct iommu_table *tbl, u64 *startp, u64 *endp); + #endif /* __POWERNV_PCI_H */ diff --git a/arch/powerpc/platforms/powernv/powernv.h b/arch/powerpc/platforms/powernv/powernv.h index 8a9df7f9667..a1c6f83fc39 100644 --- a/arch/powerpc/platforms/powernv/powernv.h +++ b/arch/powerpc/platforms/powernv/powernv.h @@ -9,8 +9,10 @@ static inline void pnv_smp_init(void) { } #ifdef CONFIG_PCI extern void pnv_pci_init(void); +extern void pnv_pci_shutdown(void); #else static inline void pnv_pci_init(void) { } +static inline void pnv_pci_shutdown(void) { } #endif #endif /* _POWERNV_H */ diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index db1ad1c8f68..d4459bfc92f 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -78,7 +78,9 @@ static void pnv_show_cpuinfo(struct seq_file *m) if (root) model = of_get_property(root, "model", NULL); seq_printf(m, "machine\t\t: PowerNV %s\n", model); - if (firmware_has_feature(FW_FEATURE_OPALv2)) + if (firmware_has_feature(FW_FEATURE_OPALv3)) + seq_printf(m, "firmware\t: OPAL v3\n"); + else if (firmware_has_feature(FW_FEATURE_OPALv2)) seq_printf(m, "firmware\t: OPAL v2\n"); else if (firmware_has_feature(FW_FEATURE_OPAL)) seq_printf(m, "firmware\t: OPAL v1\n"); @@ -126,6 +128,17 @@ static void pnv_progress(char *s, unsigned short hex) { } +static void pnv_shutdown(void) +{ + /* Let the PCI code clear up IODA tables */ + pnv_pci_shutdown(); + + /* And unregister all OPAL interrupts so they don't fire + * up while we kexec + */ + opal_shutdown(); +} + #ifdef CONFIG_KEXEC static void pnv_kexec_cpu_down(int crash_shutdown, int secondary) { @@ -187,6 +200,7 @@ define_machine(powernv) { .init_IRQ = pnv_init_IRQ, .show_cpuinfo = pnv_show_cpuinfo, .progress = pnv_progress, + .machine_shutdown = pnv_shutdown, .power_save = power7_idle, .calibrate_decr = generic_calibrate_decr, #ifdef CONFIG_KEXEC diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c index 6a3ecca5b72..88c9459c3e0 100644 --- a/arch/powerpc/platforms/powernv/smp.c +++ b/arch/powerpc/platforms/powernv/smp.c @@ -71,18 +71,68 @@ int pnv_smp_kick_cpu(int nr) BUG_ON(nr < 0 || nr >= NR_CPUS); - /* On OPAL v2 the CPU are still spinning inside OPAL itself, - * get them back now + /* + * If we already started or OPALv2 is not supported, we just + * kick the CPU via the PACA */ - if (!paca[nr].cpu_start && firmware_has_feature(FW_FEATURE_OPALv2)) { - pr_devel("OPAL: Starting CPU %d (HW 0x%x)...\n", nr, pcpu); - rc = opal_start_cpu(pcpu, start_here); + if (paca[nr].cpu_start || !firmware_has_feature(FW_FEATURE_OPALv2)) + goto kick; + + /* + * At this point, the CPU can either be spinning on the way in + * from kexec or be inside OPAL waiting to be started for the + * first time. OPAL v3 allows us to query OPAL to know if it + * has the CPUs, so we do that + */ + if (firmware_has_feature(FW_FEATURE_OPALv3)) { + uint8_t status; + + rc = opal_query_cpu_status(pcpu, &status); if (rc != OPAL_SUCCESS) { - pr_warn("OPAL Error %ld starting CPU %d\n", + pr_warn("OPAL Error %ld querying CPU %d state\n", rc, nr); return -ENODEV; } + + /* + * Already started, just kick it, probably coming from + * kexec and spinning + */ + if (status == OPAL_THREAD_STARTED) + goto kick; + + /* + * Available/inactive, let's kick it + */ + if (status == OPAL_THREAD_INACTIVE) { + pr_devel("OPAL: Starting CPU %d (HW 0x%x)...\n", + nr, pcpu); + rc = opal_start_cpu(pcpu, start_here); + if (rc != OPAL_SUCCESS) { + pr_warn("OPAL Error %ld starting CPU %d\n", + rc, nr); + return -ENODEV; + } + } else { + /* + * An unavailable CPU (or any other unknown status) + * shouldn't be started. It should also + * not be in the possible map but currently it can + * happen + */ + pr_devel("OPAL: CPU %d (HW 0x%x) is unavailable" + " (status %d)...\n", nr, pcpu, status); + return -ENODEV; + } + } else { + /* + * On OPAL v2, we just kick it and hope for the best, + * we must not test the error from opal_start_cpu() or + * we would fail to get CPUs from kexec. + */ + opal_start_cpu(pcpu, start_here); } + kick: return smp_generic_kick_cpu(nr); } diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig index 9a0941bc4d3..4459eff7a75 100644 --- a/arch/powerpc/platforms/pseries/Kconfig +++ b/arch/powerpc/platforms/pseries/Kconfig @@ -18,6 +18,9 @@ config PPC_PSERIES select PPC_PCI_CHOICE if EXPERT select ZLIB_DEFLATE select PPC_DOORBELL + select HAVE_CONTEXT_TRACKING + select HOTPLUG if SMP + select HOTPLUG_CPU if SMP default y config PPC_SPLPAR diff --git a/arch/powerpc/platforms/pseries/eeh_cache.c b/arch/powerpc/platforms/pseries/eeh_cache.c index 5a4c8790305..5ce3ba7ad13 100644 --- a/arch/powerpc/platforms/pseries/eeh_cache.c +++ b/arch/powerpc/platforms/pseries/eeh_cache.c @@ -294,8 +294,6 @@ void __init eeh_addr_cache_build(void) spin_lock_init(&pci_io_addr_cache_root.piar_lock); for_each_pci_dev(dev) { - eeh_addr_cache_insert_dev(dev); - dn = pci_device_to_OF_node(dev); if (!dn) continue; @@ -308,6 +306,8 @@ void __init eeh_addr_cache_build(void) dev->dev.archdata.edev = edev; edev->pdev = dev; + eeh_addr_cache_insert_dev(dev); + eeh_sysfs_add_device(dev); } diff --git a/arch/powerpc/platforms/pseries/eeh_pe.c b/arch/powerpc/platforms/pseries/eeh_pe.c index fe43d1aa2cf..9d4a9e8562b 100644 --- a/arch/powerpc/platforms/pseries/eeh_pe.c +++ b/arch/powerpc/platforms/pseries/eeh_pe.c @@ -639,7 +639,8 @@ struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe) if (pe->type & EEH_PE_PHB) { bus = pe->phb->bus; - } else if (pe->type & EEH_PE_BUS) { + } else if (pe->type & EEH_PE_BUS || + pe->type & EEH_PE_DEVICE) { edev = list_first_entry(&pe->edevs, struct eeh_dev, list); pdev = eeh_dev_to_pci_dev(edev); if (pdev) diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c index 19506f93573..b456b157d33 100644 --- a/arch/powerpc/platforms/pseries/eeh_pseries.c +++ b/arch/powerpc/platforms/pseries/eeh_pseries.c @@ -83,7 +83,11 @@ static int pseries_eeh_init(void) ibm_configure_pe = rtas_token("ibm,configure-pe"); ibm_configure_bridge = rtas_token("ibm,configure-bridge"); - /* necessary sanity check */ + /* + * Necessary sanity check. We needn't check "get-config-addr-info" + * and its variant since the old firmware probably support address + * of domain/bus/slot/function for EEH RTAS operations. + */ if (ibm_set_eeh_option == RTAS_UNKNOWN_SERVICE) { pr_warning("%s: RTAS service <ibm,set-eeh-option> invalid\n", __func__); @@ -102,12 +106,6 @@ static int pseries_eeh_init(void) pr_warning("%s: RTAS service <ibm,slot-error-detail> invalid\n", __func__); return -EINVAL; - } else if (ibm_get_config_addr_info2 == RTAS_UNKNOWN_SERVICE && - ibm_get_config_addr_info == RTAS_UNKNOWN_SERVICE) { - pr_warning("%s: RTAS service <ibm,get-config-addr-info2> and " - "<ibm,get-config-addr-info> invalid\n", - __func__); - return -EINVAL; } else if (ibm_configure_pe == RTAS_UNKNOWN_SERVICE && ibm_configure_bridge == RTAS_UNKNOWN_SERVICE) { pr_warning("%s: RTAS service <ibm,configure-pe> and " diff --git a/arch/powerpc/platforms/pseries/msi.c b/arch/powerpc/platforms/pseries/msi.c index 420524e6f8c..6d2f0abce6f 100644 --- a/arch/powerpc/platforms/pseries/msi.c +++ b/arch/powerpc/platforms/pseries/msi.c @@ -26,26 +26,6 @@ static int query_token, change_token; #define RTAS_CHANGE_MSIX_FN 4 #define RTAS_CHANGE_32MSI_FN 5 -static struct pci_dn *get_pdn(struct pci_dev *pdev) -{ - struct device_node *dn; - struct pci_dn *pdn; - - dn = pci_device_to_OF_node(pdev); - if (!dn) { - dev_dbg(&pdev->dev, "rtas_msi: No OF device node\n"); - return NULL; - } - - pdn = PCI_DN(dn); - if (!pdn) { - dev_dbg(&pdev->dev, "rtas_msi: No PCI DN\n"); - return NULL; - } - - return pdn; -} - /* RTAS Helpers */ static int rtas_change_msi(struct pci_dn *pdn, u32 func, u32 num_irqs) @@ -91,7 +71,7 @@ static void rtas_disable_msi(struct pci_dev *pdev) { struct pci_dn *pdn; - pdn = get_pdn(pdev); + pdn = pci_get_pdn(pdev); if (!pdn) return; @@ -152,7 +132,7 @@ static int check_req(struct pci_dev *pdev, int nvec, char *prop_name) struct pci_dn *pdn; const u32 *req_msi; - pdn = get_pdn(pdev); + pdn = pci_get_pdn(pdev); if (!pdn) return -ENODEV; @@ -394,6 +374,23 @@ static int check_msix_entries(struct pci_dev *pdev) return 0; } +static void rtas_hack_32bit_msi_gen2(struct pci_dev *pdev) +{ + u32 addr_hi, addr_lo; + + /* + * We should only get in here for IODA1 configs. This is based on the + * fact that we using RTAS for MSIs, we don't have the 32 bit MSI RTAS + * support, and we are in a PCIe Gen2 slot. + */ + dev_info(&pdev->dev, + "rtas_msi: No 32 bit MSI firmware support, forcing 32 bit MSI\n"); + pci_read_config_dword(pdev, pdev->msi_cap + PCI_MSI_ADDRESS_HI, &addr_hi); + addr_lo = 0xffff0000 | ((addr_hi >> (48 - 32)) << 4); + pci_write_config_dword(pdev, pdev->msi_cap + PCI_MSI_ADDRESS_LO, addr_lo); + pci_write_config_dword(pdev, pdev->msi_cap + PCI_MSI_ADDRESS_HI, 0); +} + static int rtas_setup_msi_irqs(struct pci_dev *pdev, int nvec_in, int type) { struct pci_dn *pdn; @@ -401,8 +398,9 @@ static int rtas_setup_msi_irqs(struct pci_dev *pdev, int nvec_in, int type) struct msi_desc *entry; struct msi_msg msg; int nvec = nvec_in; + int use_32bit_msi_hack = 0; - pdn = get_pdn(pdev); + pdn = pci_get_pdn(pdev); if (!pdn) return -ENODEV; @@ -428,15 +426,31 @@ static int rtas_setup_msi_irqs(struct pci_dev *pdev, int nvec_in, int type) */ again: if (type == PCI_CAP_ID_MSI) { - if (pdn->force_32bit_msi) + if (pdn->force_32bit_msi) { rc = rtas_change_msi(pdn, RTAS_CHANGE_32MSI_FN, nvec); - else + if (rc < 0) { + /* + * We only want to run the 32 bit MSI hack below if + * the max bus speed is Gen2 speed + */ + if (pdev->bus->max_bus_speed != PCIE_SPEED_5_0GT) + return rc; + + use_32bit_msi_hack = 1; + } + } else + rc = -1; + + if (rc < 0) rc = rtas_change_msi(pdn, RTAS_CHANGE_MSI_FN, nvec); - if (rc < 0 && !pdn->force_32bit_msi) { + if (rc < 0) { pr_debug("rtas_msi: trying the old firmware call.\n"); rc = rtas_change_msi(pdn, RTAS_CHANGE_FN, nvec); } + + if (use_32bit_msi_hack && rc > 0) + rtas_hack_32bit_msi_gen2(pdev); } else rc = rtas_change_msi(pdn, RTAS_CHANGE_MSIX_FN, nvec); @@ -518,12 +532,3 @@ static int rtas_msi_init(void) } arch_initcall(rtas_msi_init); -static void quirk_radeon(struct pci_dev *dev) -{ - struct pci_dn *pdn = get_pdn(dev); - - if (pdn) - pdn->force_32bit_msi = 1; -} -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x68f2, quirk_radeon); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0xaa68, quirk_radeon); diff --git a/arch/powerpc/platforms/pseries/suspend.c b/arch/powerpc/platforms/pseries/suspend.c index 47226e04126..5f997e79d57 100644 --- a/arch/powerpc/platforms/pseries/suspend.c +++ b/arch/powerpc/platforms/pseries/suspend.c @@ -16,6 +16,7 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +#include <linux/cpu.h> #include <linux/delay.h> #include <linux/suspend.h> #include <linux/stat.h> @@ -126,11 +127,15 @@ static ssize_t store_hibernate(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { + cpumask_var_t offline_mask; int rc; if (!capable(CAP_SYS_ADMIN)) return -EPERM; + if (!alloc_cpumask_var(&offline_mask, GFP_TEMPORARY)) + return -ENOMEM; + stream_id = simple_strtoul(buf, NULL, 16); do { @@ -140,15 +145,32 @@ static ssize_t store_hibernate(struct device *dev, } while (rc == -EAGAIN); if (!rc) { + /* All present CPUs must be online */ + cpumask_andnot(offline_mask, cpu_present_mask, + cpu_online_mask); + rc = rtas_online_cpus_mask(offline_mask); + if (rc) { + pr_err("%s: Could not bring present CPUs online.\n", + __func__); + goto out; + } + stop_topology_update(); rc = pm_suspend(PM_SUSPEND_MEM); start_topology_update(); + + /* Take down CPUs not online prior to suspend */ + if (!rtas_offline_cpus_mask(offline_mask)) + pr_warn("%s: Could not restore CPUs to offline " + "state.\n", __func__); } stream_id = 0; if (!rc) rc = count; +out: + free_cpumask_var(offline_mask); return rc; } diff --git a/arch/powerpc/platforms/wsp/ics.c b/arch/powerpc/platforms/wsp/ics.c index 97fe82ee863..2d3b1dd9571 100644 --- a/arch/powerpc/platforms/wsp/ics.c +++ b/arch/powerpc/platforms/wsp/ics.c @@ -361,7 +361,7 @@ static int wsp_chip_set_affinity(struct irq_data *d, xive = xive_set_server(xive, get_irq_server(ics, hw_irq)); wsp_ics_set_xive(ics, hw_irq, xive); - return 0; + return IRQ_SET_MASK_OK; } static struct irq_chip wsp_irq_chip = { diff --git a/arch/powerpc/sysdev/Makefile b/arch/powerpc/sysdev/Makefile index b0a518e9759..99464a7bdb3 100644 --- a/arch/powerpc/sysdev/Makefile +++ b/arch/powerpc/sysdev/Makefile @@ -64,6 +64,8 @@ endif obj-$(CONFIG_PPC_SCOM) += scom.o +obj-$(CONFIG_PPC_EARLY_DEBUG_MEMCONS) += udbg_memcons.o + subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror obj-$(CONFIG_PPC_XICS) += xics/ diff --git a/arch/powerpc/sysdev/ehv_pic.c b/arch/powerpc/sysdev/ehv_pic.c index 6e0e1005227..9cd0e60716f 100644 --- a/arch/powerpc/sysdev/ehv_pic.c +++ b/arch/powerpc/sysdev/ehv_pic.c @@ -81,7 +81,7 @@ int ehv_pic_set_affinity(struct irq_data *d, const struct cpumask *dest, ev_int_set_config(src, config, prio, cpuid); spin_unlock_irqrestore(&ehv_pic_lock, flags); - return 0; + return IRQ_SET_MASK_OK; } static unsigned int ehv_pic_type_to_vecpri(unsigned int type) diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c index 028ac1f71b5..46ac1ddea68 100644 --- a/arch/powerpc/sysdev/fsl_pci.c +++ b/arch/powerpc/sysdev/fsl_pci.c @@ -97,22 +97,14 @@ static int fsl_indirect_read_config(struct pci_bus *bus, unsigned int devfn, return indirect_read_config(bus, devfn, offset, len, val); } -static struct pci_ops fsl_indirect_pci_ops = +#if defined(CONFIG_FSL_SOC_BOOKE) || defined(CONFIG_PPC_86xx) + +static struct pci_ops fsl_indirect_pcie_ops = { .read = fsl_indirect_read_config, .write = indirect_write_config, }; -static void __init fsl_setup_indirect_pci(struct pci_controller* hose, - resource_size_t cfg_addr, - resource_size_t cfg_data, u32 flags) -{ - setup_indirect_pci(hose, cfg_addr, cfg_data, flags); - hose->ops = &fsl_indirect_pci_ops; -} - -#if defined(CONFIG_FSL_SOC_BOOKE) || defined(CONFIG_PPC_86xx) - #define MAX_PHYS_ADDR_BITS 40 static u64 pci64_dma_offset = 1ull << MAX_PHYS_ADDR_BITS; @@ -504,13 +496,15 @@ int __init fsl_add_bridge(struct platform_device *pdev, int is_primary) if (!hose->private_data) goto no_bridge; - fsl_setup_indirect_pci(hose, rsrc.start, rsrc.start + 0x4, - PPC_INDIRECT_TYPE_BIG_ENDIAN); + setup_indirect_pci(hose, rsrc.start, rsrc.start + 0x4, + PPC_INDIRECT_TYPE_BIG_ENDIAN); if (in_be32(&pci->block_rev1) < PCIE_IP_REV_3_0) hose->indirect_type |= PPC_INDIRECT_TYPE_FSL_CFG_REG_LINK; if (early_find_capability(hose, 0, 0, PCI_CAP_ID_EXP)) { + /* use fsl_indirect_read_config for PCIe */ + hose->ops = &fsl_indirect_pcie_ops; /* For PCIE read HEADER_TYPE to identify controler mode */ early_read_config_byte(hose, 0, 0, PCI_HEADER_TYPE, &hdr_type); if ((hdr_type & 0x7f) != PCI_HEADER_TYPE_BRIDGE) @@ -814,8 +808,8 @@ int __init mpc83xx_add_bridge(struct device_node *dev) if (ret) goto err0; } else { - fsl_setup_indirect_pci(hose, rsrc_cfg.start, - rsrc_cfg.start + 4, 0); + setup_indirect_pci(hose, rsrc_cfg.start, + rsrc_cfg.start + 4, 0); } printk(KERN_INFO "Found FSL PCI host bridge at 0x%016llx. " diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c index ee21b5e71ae..3cc2f9159ab 100644 --- a/arch/powerpc/sysdev/mpic.c +++ b/arch/powerpc/sysdev/mpic.c @@ -54,7 +54,7 @@ static DEFINE_RAW_SPINLOCK(mpic_lock); #ifdef CONFIG_PPC32 /* XXX for now */ #ifdef CONFIG_IRQ_ALL_CPUS -#define distribute_irqs (!(mpic->flags & MPIC_SINGLE_DEST_CPU)) +#define distribute_irqs (1) #else #define distribute_irqs (0) #endif @@ -836,7 +836,7 @@ int mpic_set_affinity(struct irq_data *d, const struct cpumask *cpumask, mpic_physmask(mask)); } - return 0; + return IRQ_SET_MASK_OK; } static unsigned int mpic_type_to_vecpri(struct mpic *mpic, unsigned int type) @@ -1703,7 +1703,7 @@ void mpic_setup_this_cpu(void) * it differently, then we should make sure we also change the default * values of irq_desc[].affinity in irq.c. */ - if (distribute_irqs) { + if (distribute_irqs && !(mpic->flags & MPIC_SINGLE_DEST_CPU)) { for (i = 0; i < mpic->num_sources ; i++) mpic_irq_write(i, MPIC_INFO(IRQ_DESTINATION), mpic_irq_read(i, MPIC_INFO(IRQ_DESTINATION)) | msk); diff --git a/arch/powerpc/sysdev/udbg_memcons.c b/arch/powerpc/sysdev/udbg_memcons.c new file mode 100644 index 00000000000..ce5a7b489e4 --- /dev/null +++ b/arch/powerpc/sysdev/udbg_memcons.c @@ -0,0 +1,105 @@ +/* + * A udbg backend which logs messages and reads input from in memory + * buffers. + * + * The console output can be read from memcons_output which is a + * circular buffer whose next write position is stored in memcons.output_pos. + * + * Input may be passed by writing into the memcons_input buffer when it is + * empty. The input buffer is empty when both input_pos == input_start and + * *input_start == '\0'. + * + * Copyright (C) 2003-2005 Anton Blanchard and Milton Miller, IBM Corp + * Copyright (C) 2013 Alistair Popple, IBM Corp + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/init.h> +#include <linux/kernel.h> +#include <asm/barrier.h> +#include <asm/page.h> +#include <asm/processor.h> +#include <asm/udbg.h> + +struct memcons { + char *output_start; + char *output_pos; + char *output_end; + char *input_start; + char *input_pos; + char *input_end; +}; + +static char memcons_output[CONFIG_PPC_MEMCONS_OUTPUT_SIZE]; +static char memcons_input[CONFIG_PPC_MEMCONS_INPUT_SIZE]; + +struct memcons memcons = { + .output_start = memcons_output, + .output_pos = memcons_output, + .output_end = &memcons_output[CONFIG_PPC_MEMCONS_OUTPUT_SIZE], + .input_start = memcons_input, + .input_pos = memcons_input, + .input_end = &memcons_input[CONFIG_PPC_MEMCONS_INPUT_SIZE], +}; + +void memcons_putc(char c) +{ + char *new_output_pos; + + *memcons.output_pos = c; + wmb(); + new_output_pos = memcons.output_pos + 1; + if (new_output_pos >= memcons.output_end) + new_output_pos = memcons.output_start; + + memcons.output_pos = new_output_pos; +} + +int memcons_getc_poll(void) +{ + char c; + char *new_input_pos; + + if (*memcons.input_pos) { + c = *memcons.input_pos; + + new_input_pos = memcons.input_pos + 1; + if (new_input_pos >= memcons.input_end) + new_input_pos = memcons.input_start; + else if (*new_input_pos == '\0') + new_input_pos = memcons.input_start; + + *memcons.input_pos = '\0'; + wmb(); + memcons.input_pos = new_input_pos; + return c; + } + + return -1; +} + +int memcons_getc(void) +{ + int c; + + while (1) { + c = memcons_getc_poll(); + if (c == -1) + cpu_relax(); + else + break; + } + + return c; +} + +void udbg_init_memcons(void) +{ + udbg_putc = memcons_putc; + udbg_getc = memcons_getc; + udbg_getc_poll = memcons_getc_poll; +} diff --git a/arch/powerpc/sysdev/xics/ics-opal.c b/arch/powerpc/sysdev/xics/ics-opal.c index f7e8609df0d..39d72212655 100644 --- a/arch/powerpc/sysdev/xics/ics-opal.c +++ b/arch/powerpc/sysdev/xics/ics-opal.c @@ -148,7 +148,7 @@ static int ics_opal_set_affinity(struct irq_data *d, __func__, d->irq, hw_irq, server, rc); return -1; } - return 0; + return IRQ_SET_MASK_OK; } static struct irq_chip ics_opal_irq_chip = { |