diff options
Diffstat (limited to 'arch/powerpc/kernel')
54 files changed, 3404 insertions, 821 deletions
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 80e9fe2632b..0cc0995b81b 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -12,12 +12,12 @@ endif obj-y := semaphore.o cputable.o ptrace.o syscalls.o \ irq.o align.o signal_32.o pmc.o vdso.o \ - init_task.o process.o systbl.o + init_task.o process.o systbl.o idle.o obj-y += vdso32/ obj-$(CONFIG_PPC64) += setup_64.o binfmt_elf32.o sys_ppc32.o \ signal_64.o ptrace32.o \ paca.o cpu_setup_power4.o \ - firmware.o sysfs.o idle_64.o + firmware.o sysfs.o obj-$(CONFIG_PPC64) += vdso64/ obj-$(CONFIG_ALTIVEC) += vecemu.o vector.o obj-$(CONFIG_POWER4) += idle_power4.o @@ -34,6 +34,11 @@ obj-$(CONFIG_IBMEBUS) += ibmebus.o obj-$(CONFIG_GENERIC_TBSYNC) += smp-tbsync.o obj64-$(CONFIG_PPC_MULTIPLATFORM) += nvram_64.o obj-$(CONFIG_CRASH_DUMP) += crash_dump.o +obj-$(CONFIG_6xx) += idle_6xx.o l2cr_6xx.o cpu_setup_6xx.o +obj-$(CONFIG_TAU) += tau_6xx.o +obj32-$(CONFIG_SOFTWARE_SUSPEND) += swsusp_32.o +obj32-$(CONFIG_MODULES) += module_32.o +obj-$(CONFIG_E500) += perfmon_fsl_booke.o ifeq ($(CONFIG_PPC_MERGE),y) @@ -51,7 +56,6 @@ obj-$(CONFIG_PPC64) += misc_64.o dma_64.o iommu.o obj-$(CONFIG_PPC_MULTIPLATFORM) += prom_init.o obj-$(CONFIG_MODULES) += ppc_ksyms.o obj-$(CONFIG_BOOTX_TEXT) += btext.o -obj-$(CONFIG_6xx) += idle_6xx.o obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_KPROBES) += kprobes.o obj-$(CONFIG_PPC_UDBG_16550) += legacy_serial.o udbg_16550.o @@ -77,6 +81,7 @@ smpobj-$(CONFIG_SMP) += smp.o endif +obj-$(CONFIG_PPC32) += $(obj32-y) obj-$(CONFIG_PPC64) += $(obj64-y) extra-$(CONFIG_PPC_FPU) += fpu.o diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index c9a660e4c2d..54b48f33005 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -105,8 +105,6 @@ int main(void) DEFINE(ICACHEL1LINESIZE, offsetof(struct ppc64_caches, iline_size)); DEFINE(ICACHEL1LOGLINESIZE, offsetof(struct ppc64_caches, log_iline_size)); DEFINE(ICACHEL1LINESPERPAGE, offsetof(struct ppc64_caches, ilines_per_page)); - DEFINE(PLATFORM_LPAR, PLATFORM_LPAR); - /* paca */ DEFINE(PACA_SIZE, sizeof(struct paca_struct)); DEFINE(PACAPACAINDEX, offsetof(struct paca_struct, paca_index)); @@ -136,6 +134,9 @@ int main(void) DEFINE(PACAEMERGSP, offsetof(struct paca_struct, emergency_sp)); DEFINE(PACALPPACAPTR, offsetof(struct paca_struct, lppaca_ptr)); DEFINE(PACAHWCPUID, offsetof(struct paca_struct, hw_cpu_id)); + DEFINE(PACA_STARTPURR, offsetof(struct paca_struct, startpurr)); + DEFINE(PACA_USER_TIME, offsetof(struct paca_struct, user_time)); + DEFINE(PACA_SYSTEM_TIME, offsetof(struct paca_struct, system_time)); DEFINE(LPPACASRR0, offsetof(struct lppaca, saved_srr0)); DEFINE(LPPACASRR1, offsetof(struct lppaca, saved_srr1)); diff --git a/arch/powerpc/kernel/cpu_setup_6xx.S b/arch/powerpc/kernel/cpu_setup_6xx.S new file mode 100644 index 00000000000..55ed7716636 --- /dev/null +++ b/arch/powerpc/kernel/cpu_setup_6xx.S @@ -0,0 +1,474 @@ +/* + * This file contains low level CPU setup functions. + * Copyright (C) 2003 Benjamin Herrenschmidt (benh@kernel.crashing.org) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include <linux/config.h> +#include <asm/processor.h> +#include <asm/page.h> +#include <asm/cputable.h> +#include <asm/ppc_asm.h> +#include <asm/asm-offsets.h> +#include <asm/cache.h> + +_GLOBAL(__setup_cpu_603) + b setup_common_caches +_GLOBAL(__setup_cpu_604) + mflr r4 + bl setup_common_caches + bl setup_604_hid0 + mtlr r4 + blr +_GLOBAL(__setup_cpu_750) + mflr r4 + bl __init_fpu_registers + bl setup_common_caches + bl setup_750_7400_hid0 + mtlr r4 + blr +_GLOBAL(__setup_cpu_750cx) + mflr r4 + bl __init_fpu_registers + bl setup_common_caches + bl setup_750_7400_hid0 + bl setup_750cx + mtlr r4 + blr +_GLOBAL(__setup_cpu_750fx) + mflr r4 + bl __init_fpu_registers + bl setup_common_caches + bl setup_750_7400_hid0 + bl setup_750fx + mtlr r4 + blr +_GLOBAL(__setup_cpu_7400) + mflr r4 + bl __init_fpu_registers + bl setup_7400_workarounds + bl setup_common_caches + bl setup_750_7400_hid0 + mtlr r4 + blr +_GLOBAL(__setup_cpu_7410) + mflr r4 + bl __init_fpu_registers + bl setup_7410_workarounds + bl setup_common_caches + bl setup_750_7400_hid0 + li r3,0 + mtspr SPRN_L2CR2,r3 + mtlr r4 + blr +_GLOBAL(__setup_cpu_745x) + mflr r4 + bl setup_common_caches + bl setup_745x_specifics + mtlr r4 + blr + +/* Enable caches for 603's, 604, 750 & 7400 */ +setup_common_caches: + mfspr r11,SPRN_HID0 + andi. r0,r11,HID0_DCE + ori r11,r11,HID0_ICE|HID0_DCE + ori r8,r11,HID0_ICFI + bne 1f /* don't invalidate the D-cache */ + ori r8,r8,HID0_DCI /* unless it wasn't enabled */ +1: sync + mtspr SPRN_HID0,r8 /* enable and invalidate caches */ + sync + mtspr SPRN_HID0,r11 /* enable caches */ + sync + isync + blr + +/* 604, 604e, 604ev, ... + * Enable superscalar execution & branch history table + */ +setup_604_hid0: + mfspr r11,SPRN_HID0 + ori r11,r11,HID0_SIED|HID0_BHTE + ori r8,r11,HID0_BTCD + sync + mtspr SPRN_HID0,r8 /* flush branch target address cache */ + sync /* on 604e/604r */ + mtspr SPRN_HID0,r11 + sync + isync + blr + +/* 7400 <= rev 2.7 and 7410 rev = 1.0 suffer from some + * erratas we work around here. + * Moto MPC710CE.pdf describes them, those are errata + * #3, #4 and #5 + * Note that we assume the firmware didn't choose to + * apply other workarounds (there are other ones documented + * in the .pdf). It appear that Apple firmware only works + * around #3 and with the same fix we use. We may want to + * check if the CPU is using 60x bus mode in which case + * the workaround for errata #4 is useless. Also, we may + * want to explicitely clear HID0_NOPDST as this is not + * needed once we have applied workaround #5 (though it's + * not set by Apple's firmware at least). + */ +setup_7400_workarounds: + mfpvr r3 + rlwinm r3,r3,0,20,31 + cmpwi 0,r3,0x0207 + ble 1f + blr +setup_7410_workarounds: + mfpvr r3 + rlwinm r3,r3,0,20,31 + cmpwi 0,r3,0x0100 + bnelr +1: + mfspr r11,SPRN_MSSSR0 + /* Errata #3: Set L1OPQ_SIZE to 0x10 */ + rlwinm r11,r11,0,9,6 + oris r11,r11,0x0100 + /* Errata #4: Set L2MQ_SIZE to 1 (check for MPX mode first ?) */ + oris r11,r11,0x0002 + /* Errata #5: Set DRLT_SIZE to 0x01 */ + rlwinm r11,r11,0,5,2 + oris r11,r11,0x0800 + sync + mtspr SPRN_MSSSR0,r11 + sync + isync + blr + +/* 740/750/7400/7410 + * Enable Store Gathering (SGE), Address Brodcast (ABE), + * Branch History Table (BHTE), Branch Target ICache (BTIC) + * Dynamic Power Management (DPM), Speculative (SPD) + * Clear Instruction cache throttling (ICTC) + */ +setup_750_7400_hid0: + mfspr r11,SPRN_HID0 + ori r11,r11,HID0_SGE | HID0_ABE | HID0_BHTE | HID0_BTIC + oris r11,r11,HID0_DPM@h +BEGIN_FTR_SECTION + xori r11,r11,HID0_BTIC +END_FTR_SECTION_IFSET(CPU_FTR_NO_BTIC) +BEGIN_FTR_SECTION + xoris r11,r11,HID0_DPM@h /* disable dynamic power mgmt */ +END_FTR_SECTION_IFSET(CPU_FTR_NO_DPM) + li r3,HID0_SPD + andc r11,r11,r3 /* clear SPD: enable speculative */ + li r3,0 + mtspr SPRN_ICTC,r3 /* Instruction Cache Throttling off */ + isync + mtspr SPRN_HID0,r11 + sync + isync + blr + +/* 750cx specific + * Looks like we have to disable NAP feature for some PLL settings... + * (waiting for confirmation) + */ +setup_750cx: + mfspr r10, SPRN_HID1 + rlwinm r10,r10,4,28,31 + cmpwi cr0,r10,7 + cmpwi cr1,r10,9 + cmpwi cr2,r10,11 + cror 4*cr0+eq,4*cr0+eq,4*cr1+eq + cror 4*cr0+eq,4*cr0+eq,4*cr2+eq + bnelr + lwz r6,CPU_SPEC_FEATURES(r5) + li r7,CPU_FTR_CAN_NAP + andc r6,r6,r7 + stw r6,CPU_SPEC_FEATURES(r5) + blr + +/* 750fx specific + */ +setup_750fx: + blr + +/* MPC 745x + * Enable Store Gathering (SGE), Branch Folding (FOLD) + * Branch History Table (BHTE), Branch Target ICache (BTIC) + * Dynamic Power Management (DPM), Speculative (SPD) + * Ensure our data cache instructions really operate. + * Timebase has to be running or we wouldn't have made it here, + * just ensure we don't disable it. + * Clear Instruction cache throttling (ICTC) + * Enable L2 HW prefetch + */ +setup_745x_specifics: + /* We check for the presence of an L3 cache setup by + * the firmware. If any, we disable NAP capability as + * it's known to be bogus on rev 2.1 and earlier + */ + mfspr r11,SPRN_L3CR + andis. r11,r11,L3CR_L3E@h + beq 1f + lwz r6,CPU_SPEC_FEATURES(r5) + andi. r0,r6,CPU_FTR_L3_DISABLE_NAP + beq 1f + li r7,CPU_FTR_CAN_NAP + andc r6,r6,r7 + stw r6,CPU_SPEC_FEATURES(r5) +1: + mfspr r11,SPRN_HID0 + + /* All of the bits we have to set..... + */ + ori r11,r11,HID0_SGE | HID0_FOLD | HID0_BHTE + ori r11,r11,HID0_LRSTK | HID0_BTIC + oris r11,r11,HID0_DPM@h +BEGIN_FTR_SECTION + xori r11,r11,HID0_BTIC +END_FTR_SECTION_IFSET(CPU_FTR_NO_BTIC) +BEGIN_FTR_SECTION + xoris r11,r11,HID0_DPM@h /* disable dynamic power mgmt */ +END_FTR_SECTION_IFSET(CPU_FTR_NO_DPM) + + /* All of the bits we have to clear.... + */ + li r3,HID0_SPD | HID0_NOPDST | HID0_NOPTI + andc r11,r11,r3 /* clear SPD: enable speculative */ + li r3,0 + + mtspr SPRN_ICTC,r3 /* Instruction Cache Throttling off */ + isync + mtspr SPRN_HID0,r11 + sync + isync + + /* Enable L2 HW prefetch, if L2 is enabled + */ + mfspr r3,SPRN_L2CR + andis. r3,r3,L2CR_L2E@h + beqlr + mfspr r3,SPRN_MSSCR0 + ori r3,r3,3 + sync + mtspr SPRN_MSSCR0,r3 + sync + isync + blr + +/* + * Initialize the FPU registers. This is needed to work around an errata + * in some 750 cpus where using a not yet initialized FPU register after + * power on reset may hang the CPU + */ +_GLOBAL(__init_fpu_registers) + mfmsr r10 + ori r11,r10,MSR_FP + mtmsr r11 + isync + addis r9,r3,empty_zero_page@ha + addi r9,r9,empty_zero_page@l + REST_32FPRS(0,r9) + sync + mtmsr r10 + isync + blr + + +/* Definitions for the table use to save CPU states */ +#define CS_HID0 0 +#define CS_HID1 4 +#define CS_HID2 8 +#define CS_MSSCR0 12 +#define CS_MSSSR0 16 +#define CS_ICTRL 20 +#define CS_LDSTCR 24 +#define CS_LDSTDB 28 +#define CS_SIZE 32 + + .data + .balign L1_CACHE_BYTES +cpu_state_storage: + .space CS_SIZE + .balign L1_CACHE_BYTES,0 + .text + +/* Called in normal context to backup CPU 0 state. This + * does not include cache settings. This function is also + * called for machine sleep. This does not include the MMU + * setup, BATs, etc... but rather the "special" registers + * like HID0, HID1, MSSCR0, etc... + */ +_GLOBAL(__save_cpu_setup) + /* Some CR fields are volatile, we back it up all */ + mfcr r7 + + /* Get storage ptr */ + lis r5,cpu_state_storage@h + ori r5,r5,cpu_state_storage@l + + /* Save HID0 (common to all CONFIG_6xx cpus) */ + mfspr r3,SPRN_HID0 + stw r3,CS_HID0(r5) + + /* Now deal with CPU type dependent registers */ + mfspr r3,SPRN_PVR + srwi r3,r3,16 + cmplwi cr0,r3,0x8000 /* 7450 */ + cmplwi cr1,r3,0x000c /* 7400 */ + cmplwi cr2,r3,0x800c /* 7410 */ + cmplwi cr3,r3,0x8001 /* 7455 */ + cmplwi cr4,r3,0x8002 /* 7457 */ + cmplwi cr5,r3,0x8003 /* 7447A */ + cmplwi cr6,r3,0x7000 /* 750FX */ + cmplwi cr7,r3,0x8004 /* 7448 */ + /* cr1 is 7400 || 7410 */ + cror 4*cr1+eq,4*cr1+eq,4*cr2+eq + /* cr0 is 74xx */ + cror 4*cr0+eq,4*cr0+eq,4*cr3+eq + cror 4*cr0+eq,4*cr0+eq,4*cr4+eq + cror 4*cr0+eq,4*cr0+eq,4*cr1+eq + cror 4*cr0+eq,4*cr0+eq,4*cr5+eq + cror 4*cr0+eq,4*cr0+eq,4*cr7+eq + bne 1f + /* Backup 74xx specific regs */ + mfspr r4,SPRN_MSSCR0 + stw r4,CS_MSSCR0(r5) + mfspr r4,SPRN_MSSSR0 + stw r4,CS_MSSSR0(r5) + beq cr1,1f + /* Backup 745x specific registers */ + mfspr r4,SPRN_HID1 + stw r4,CS_HID1(r5) + mfspr r4,SPRN_ICTRL + stw r4,CS_ICTRL(r5) + mfspr r4,SPRN_LDSTCR + stw r4,CS_LDSTCR(r5) + mfspr r4,SPRN_LDSTDB + stw r4,CS_LDSTDB(r5) +1: + bne cr6,1f + /* Backup 750FX specific registers */ + mfspr r4,SPRN_HID1 + stw r4,CS_HID1(r5) + /* If rev 2.x, backup HID2 */ + mfspr r3,SPRN_PVR + andi. r3,r3,0xff00 + cmpwi cr0,r3,0x0200 + bne 1f + mfspr r4,SPRN_HID2 + stw r4,CS_HID2(r5) +1: + mtcr r7 + blr + +/* Called with no MMU context (typically MSR:IR/DR off) to + * restore CPU state as backed up by the previous + * function. This does not include cache setting + */ +_GLOBAL(__restore_cpu_setup) + /* Some CR fields are volatile, we back it up all */ + mfcr r7 + + /* Get storage ptr */ + lis r5,(cpu_state_storage-KERNELBASE)@h + ori r5,r5,cpu_state_storage@l + + /* Restore HID0 */ + lwz r3,CS_HID0(r5) + sync + isync + mtspr SPRN_HID0,r3 + sync + isync + + /* Now deal with CPU type dependent registers */ + mfspr r3,SPRN_PVR + srwi r3,r3,16 + cmplwi cr0,r3,0x8000 /* 7450 */ + cmplwi cr1,r3,0x000c /* 7400 */ + cmplwi cr2,r3,0x800c /* 7410 */ + cmplwi cr3,r3,0x8001 /* 7455 */ + cmplwi cr4,r3,0x8002 /* 7457 */ + cmplwi cr5,r3,0x8003 /* 7447A */ + cmplwi cr6,r3,0x7000 /* 750FX */ + cmplwi cr7,r3,0x8004 /* 7448 */ + /* cr1 is 7400 || 7410 */ + cror 4*cr1+eq,4*cr1+eq,4*cr2+eq + /* cr0 is 74xx */ + cror 4*cr0+eq,4*cr0+eq,4*cr3+eq + cror 4*cr0+eq,4*cr0+eq,4*cr4+eq + cror 4*cr0+eq,4*cr0+eq,4*cr1+eq + cror 4*cr0+eq,4*cr0+eq,4*cr5+eq + cror 4*cr0+eq,4*cr0+eq,4*cr7+eq + bne 2f + /* Restore 74xx specific regs */ + lwz r4,CS_MSSCR0(r5) + sync + mtspr SPRN_MSSCR0,r4 + sync + isync + lwz r4,CS_MSSSR0(r5) + sync + mtspr SPRN_MSSSR0,r4 + sync + isync + bne cr2,1f + /* Clear 7410 L2CR2 */ + li r4,0 + mtspr SPRN_L2CR2,r4 +1: beq cr1,2f + /* Restore 745x specific registers */ + lwz r4,CS_HID1(r5) + sync + mtspr SPRN_HID1,r4 + isync + sync + lwz r4,CS_ICTRL(r5) + sync + mtspr SPRN_ICTRL,r4 + isync + sync + lwz r4,CS_LDSTCR(r5) + sync + mtspr SPRN_LDSTCR,r4 + isync + sync + lwz r4,CS_LDSTDB(r5) + sync + mtspr SPRN_LDSTDB,r4 + isync + sync +2: bne cr6,1f + /* Restore 750FX specific registers + * that is restore HID2 on rev 2.x and PLL config & switch + * to PLL 0 on all + */ + /* If rev 2.x, restore HID2 with low voltage bit cleared */ + mfspr r3,SPRN_PVR + andi. r3,r3,0xff00 + cmpwi cr0,r3,0x0200 + bne 4f + lwz r4,CS_HID2(r5) + rlwinm r4,r4,0,19,17 + mtspr SPRN_HID2,r4 + sync +4: + lwz r4,CS_HID1(r5) + rlwinm r5,r4,0,16,14 + mtspr SPRN_HID1,r5 + /* Wait for PLL to stabilize */ + mftbl r5 +3: mftbl r6 + sub r6,r6,r5 + cmplwi cr0,r6,10000 + ble 3b + /* Setup final PLL */ + mtspr SPRN_HID1,r4 +1: + mtcr r7 + blr + diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index e4e81374cb9..39e348a3ade 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -894,7 +894,7 @@ struct cpu_spec cpu_specs[] = { .platform = "ppc405", }, { /* Xilinx Virtex-II Pro */ - .pvr_mask = 0xffff0000, + .pvr_mask = 0xfffff000, .pvr_value = 0x20010000, .cpu_name = "Virtex-II Pro", .cpu_features = CPU_FTRS_40X, @@ -904,6 +904,16 @@ struct cpu_spec cpu_specs[] = { .dcache_bsize = 32, .platform = "ppc405", }, + { /* Xilinx Virtex-4 FX */ + .pvr_mask = 0xfffff000, + .pvr_value = 0x20011000, + .cpu_name = "Virtex-4 FX", + .cpu_features = CPU_FTRS_40X, + .cpu_user_features = PPC_FEATURE_32 | + PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, + .icache_bsize = 32, + .dcache_bsize = 32, + }, { /* 405EP */ .pvr_mask = 0xffff0000, .pvr_value = 0x51210000, diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 4827ca1ec89..b3a97946722 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -135,10 +135,10 @@ transfer_to_handler: mfspr r11,SPRN_HID0 mtcr r11 BEGIN_FTR_SECTION - bt- 8,power_save_6xx_restore /* Check DOZE */ + bt- 8,4f /* Check DOZE */ END_FTR_SECTION_IFSET(CPU_FTR_CAN_DOZE) BEGIN_FTR_SECTION - bt- 9,power_save_6xx_restore /* Check NAP */ + bt- 9,4f /* Check NAP */ END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP) #endif /* CONFIG_6xx */ .globl transfer_to_handler_cont @@ -157,6 +157,10 @@ transfer_to_handler_cont: SYNC RFI /* jump to handler, enable MMU */ +#ifdef CONFIG_6xx +4: b power_save_6xx_restore +#endif + /* * On kernel stack overflow, load up an initial stack pointer * and call StackOverflow(regs), which should not return. diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 24be0cf86d7..19ad5c6b181 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -1,6 +1,4 @@ /* - * arch/ppc64/kernel/entry.S - * * PowerPC version * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) * Rewritten by Cort Dougan (cort@cs.nmt.edu) for PReP @@ -63,6 +61,7 @@ system_call_common: std r12,_MSR(r1) std r0,GPR0(r1) std r10,GPR1(r1) + ACCOUNT_CPU_USER_ENTRY(r10, r11) std r2,GPR2(r1) std r3,GPR3(r1) std r4,GPR4(r1) @@ -170,8 +169,9 @@ syscall_error_cont: stdcx. r0,0,r1 /* to clear the reservation */ andi. r6,r8,MSR_PR ld r4,_LINK(r1) - beq- 1f /* only restore r13 if */ - ld r13,GPR13(r1) /* returning to usermode */ + beq- 1f + ACCOUNT_CPU_USER_EXIT(r11, r12) + ld r13,GPR13(r1) /* only restore r13 if returning to usermode */ 1: ld r2,GPR2(r1) li r12,MSR_RI andc r11,r10,r12 @@ -322,7 +322,7 @@ _GLOBAL(ret_from_fork) * the fork code also. * * The code which creates the new task context is in 'copy_thread' - * in arch/ppc64/kernel/process.c + * in arch/powerpc/kernel/process.c */ .align 7 _GLOBAL(_switch) @@ -486,6 +486,7 @@ restore: * userspace */ beq 1f + ACCOUNT_CPU_USER_EXIT(r3, r4) REST_GPR(13, r1) 1: ld r3,_CTR(r1) @@ -616,6 +617,12 @@ _GLOBAL(enter_rtas) mfsrr1 r10 std r10,_SRR1(r1) + /* Temporary workaround to clear CR until RTAS can be modified to + * ignore all bits. + */ + li r0,0 + mtcr r0 + /* There is no way it is acceptable to get here with interrupts enabled, * check it with the asm equivalent of WARN_ON */ diff --git a/arch/powerpc/kernel/firmware.c b/arch/powerpc/kernel/firmware.c index 65eae752a52..0bfe9061720 100644 --- a/arch/powerpc/kernel/firmware.c +++ b/arch/powerpc/kernel/firmware.c @@ -14,32 +14,9 @@ */ #include <linux/config.h> +#include <linux/module.h> #include <asm/firmware.h> -unsigned long ppc64_firmware_features; - -#ifdef CONFIG_PPC_PSERIES -firmware_feature_t firmware_features_table[FIRMWARE_MAX_FEATURES] = { - {FW_FEATURE_PFT, "hcall-pft"}, - {FW_FEATURE_TCE, "hcall-tce"}, - {FW_FEATURE_SPRG0, "hcall-sprg0"}, - {FW_FEATURE_DABR, "hcall-dabr"}, - {FW_FEATURE_COPY, "hcall-copy"}, - {FW_FEATURE_ASR, "hcall-asr"}, - {FW_FEATURE_DEBUG, "hcall-debug"}, - {FW_FEATURE_PERF, "hcall-perf"}, - {FW_FEATURE_DUMP, "hcall-dump"}, - {FW_FEATURE_INTERRUPT, "hcall-interrupt"}, - {FW_FEATURE_MIGRATE, "hcall-migrate"}, - {FW_FEATURE_PERFMON, "hcall-perfmon"}, - {FW_FEATURE_CRQ, "hcall-crq"}, - {FW_FEATURE_VIO, "hcall-vio"}, - {FW_FEATURE_RDMA, "hcall-rdma"}, - {FW_FEATURE_LLAN, "hcall-lLAN"}, - {FW_FEATURE_BULK, "hcall-bulk"}, - {FW_FEATURE_XDABR, "hcall-xdabr"}, - {FW_FEATURE_MULTITCE, "hcall-multi-tce"}, - {FW_FEATURE_SPLPAR, "hcall-splpar"}, -}; -#endif +unsigned long powerpc_firmware_features; +EXPORT_SYMBOL_GPL(powerpc_firmware_features); diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S index 8b49679fad5..47c7fa148c9 100644 --- a/arch/powerpc/kernel/head_44x.S +++ b/arch/powerpc/kernel/head_44x.S @@ -1,6 +1,4 @@ /* - * arch/ppc/kernel/head_44x.S - * * Kernel execution entry point code. * * Copyright (c) 1995-1996 Gary Thomas <gdt@linuxppc.org> diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index 9b65029dd2a..a5ae04a57c7 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -1,6 +1,4 @@ /* - * arch/ppc64/kernel/head.S - * * PowerPC version * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) * @@ -279,6 +277,7 @@ exception_marker: std r10,0(r1); /* make stack chain pointer */ \ std r0,GPR0(r1); /* save r0 in stackframe */ \ std r10,GPR1(r1); /* save r1 in stackframe */ \ + ACCOUNT_CPU_USER_ENTRY(r9, r10); \ std r2,GPR2(r1); /* save r2 in stackframe */ \ SAVE_4GPRS(3, r1); /* save r3 - r6 in stackframe */ \ SAVE_2GPRS(7, r1); /* save r7, r8 in stackframe */ \ @@ -846,6 +845,14 @@ fast_exception_return: ld r11,_NIP(r1) andi. r3,r12,MSR_RI /* check if RI is set */ beq- unrecov_fer + +#ifdef CONFIG_VIRT_CPU_ACCOUNTING + andi. r3,r12,MSR_PR + beq 2f + ACCOUNT_CPU_USER_EXIT(r3, r4) +2: +#endif + ld r3,_CCR(r1) ld r4,_LINK(r1) ld r5,_CTR(r1) @@ -1537,7 +1544,11 @@ _STATIC(__boot_from_prom) mr r28,r6 mr r27,r7 - /* Align the stack to 16-byte boundary for broken yaboot */ + /* + * Align the stack to 16-byte boundary + * Depending on the size and layout of the ELF sections in the initial + * boot binary, the stack pointer will be unalignet on PowerMac + */ rldicr r1,r1,0,59 /* Make sure we are running in 64 bits mode */ @@ -1840,21 +1851,6 @@ _STATIC(start_here_multiplatform) bl .__save_cpu_setup sync - /* Setup a valid physical PACA pointer in SPRG3 for early_setup - * note that boot_cpuid can always be 0 nowadays since there is - * nowhere it can be initialized differently before we reach this - * code - */ - LOAD_REG_IMMEDIATE(r27, boot_cpuid) - add r27,r27,r26 - lwz r27,0(r27) - - LOAD_REG_IMMEDIATE(r24, paca) /* Get base vaddr of paca array */ - mulli r13,r27,PACA_SIZE /* Calculate vaddr of right paca */ - add r13,r13,r24 /* for this processor. */ - add r13,r13,r26 /* convert to physical addr */ - mtspr SPRN_SPRG3,r13 - /* Do very early kernel initializations, including initial hash table, * stab and slb setup before we turn on relocation. */ @@ -1923,6 +1919,17 @@ _STATIC(start_here_common) /* Not reached */ BUG_OPCODE +/* Put the paca pointer into r13 and SPRG3 */ +_GLOBAL(setup_boot_paca) + LOAD_REG_IMMEDIATE(r3, boot_cpuid) + lwz r3,0(r3) + LOAD_REG_IMMEDIATE(r4, paca) /* Get base vaddr of paca array */ + mulli r3,r3,PACA_SIZE /* Calculate vaddr of right paca */ + add r13,r3,r4 /* for this processor. */ + mtspr SPRN_SPRG3,r13 + + blr + /* * We put a few things here that have to be page-aligned. * This stuff goes at the beginning of the bss, which is page-aligned. diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index bc6d1ac5523..28941f5ce67 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -1,6 +1,4 @@ /* - * arch/ppc/kernel/except_8xx.S - * * PowerPC version * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) * Rewritten by Cort Dougan (cort@cs.nmt.edu) for PReP diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h new file mode 100644 index 00000000000..8536e767616 --- /dev/null +++ b/arch/powerpc/kernel/head_booke.h @@ -0,0 +1,363 @@ +#ifndef __HEAD_BOOKE_H__ +#define __HEAD_BOOKE_H__ + +/* + * Macros used for common Book-e exception handling + */ + +#define SET_IVOR(vector_number, vector_label) \ + li r26,vector_label@l; \ + mtspr SPRN_IVOR##vector_number,r26; \ + sync + +#define NORMAL_EXCEPTION_PROLOG \ + mtspr SPRN_SPRG0,r10; /* save two registers to work with */\ + mtspr SPRN_SPRG1,r11; \ + mtspr SPRN_SPRG4W,r1; \ + mfcr r10; /* save CR in r10 for now */\ + mfspr r11,SPRN_SRR1; /* check whether user or kernel */\ + andi. r11,r11,MSR_PR; \ + beq 1f; \ + mfspr r1,SPRN_SPRG3; /* if from user, start at top of */\ + lwz r1,THREAD_INFO-THREAD(r1); /* this thread's kernel stack */\ + addi r1,r1,THREAD_SIZE; \ +1: subi r1,r1,INT_FRAME_SIZE; /* Allocate an exception frame */\ + mr r11,r1; \ + stw r10,_CCR(r11); /* save various registers */\ + stw r12,GPR12(r11); \ + stw r9,GPR9(r11); \ + mfspr r10,SPRN_SPRG0; \ + stw r10,GPR10(r11); \ + mfspr r12,SPRN_SPRG1; \ + stw r12,GPR11(r11); \ + mflr r10; \ + stw r10,_LINK(r11); \ + mfspr r10,SPRN_SPRG4R; \ + mfspr r12,SPRN_SRR0; \ + stw r10,GPR1(r11); \ + mfspr r9,SPRN_SRR1; \ + stw r10,0(r11); \ + rlwinm r9,r9,0,14,12; /* clear MSR_WE (necessary?) */\ + stw r0,GPR0(r11); \ + SAVE_4GPRS(3, r11); \ + SAVE_2GPRS(7, r11) + +/* To handle the additional exception priority levels on 40x and Book-E + * processors we allocate a 4k stack per additional priority level. The various + * head_xxx.S files allocate space (exception_stack_top) for each priority's + * stack times the number of CPUs + * + * On 40x critical is the only additional level + * On 44x/e500 we have critical and machine check + * On e200 we have critical and debug (machine check occurs via critical) + * + * Additionally we reserve a SPRG for each priority level so we can free up a + * GPR to use as the base for indirect access to the exception stacks. This + * is necessary since the MMU is always on, for Book-E parts, and the stacks + * are offset from KERNELBASE. + * + */ +#define BOOKE_EXCEPTION_STACK_SIZE (8192) + +/* CRIT_SPRG only used in critical exception handling */ +#define CRIT_SPRG SPRN_SPRG2 +/* MCHECK_SPRG only used in machine check exception handling */ +#define MCHECK_SPRG SPRN_SPRG6W + +#define MCHECK_STACK_TOP (exception_stack_top - 4096) +#define CRIT_STACK_TOP (exception_stack_top) + +/* only on e200 for now */ +#define DEBUG_STACK_TOP (exception_stack_top - 4096) +#define DEBUG_SPRG SPRN_SPRG6W + +#ifdef CONFIG_SMP +#define BOOKE_LOAD_EXC_LEVEL_STACK(level) \ + mfspr r8,SPRN_PIR; \ + mulli r8,r8,BOOKE_EXCEPTION_STACK_SIZE; \ + neg r8,r8; \ + addis r8,r8,level##_STACK_TOP@ha; \ + addi r8,r8,level##_STACK_TOP@l +#else +#define BOOKE_LOAD_EXC_LEVEL_STACK(level) \ + lis r8,level##_STACK_TOP@h; \ + ori r8,r8,level##_STACK_TOP@l +#endif + +/* + * Exception prolog for critical/machine check exceptions. This is a + * little different from the normal exception prolog above since a + * critical/machine check exception can potentially occur at any point + * during normal exception processing. Thus we cannot use the same SPRG + * registers as the normal prolog above. Instead we use a portion of the + * critical/machine check exception stack at low physical addresses. + */ +#define EXC_LEVEL_EXCEPTION_PROLOG(exc_level, exc_level_srr0, exc_level_srr1) \ + mtspr exc_level##_SPRG,r8; \ + BOOKE_LOAD_EXC_LEVEL_STACK(exc_level);/* r8 points to the exc_level stack*/ \ + stw r10,GPR10-INT_FRAME_SIZE(r8); \ + stw r11,GPR11-INT_FRAME_SIZE(r8); \ + mfcr r10; /* save CR in r10 for now */\ + mfspr r11,exc_level_srr1; /* check whether user or kernel */\ + andi. r11,r11,MSR_PR; \ + mr r11,r8; \ + mfspr r8,exc_level##_SPRG; \ + beq 1f; \ + /* COMING FROM USER MODE */ \ + mfspr r11,SPRN_SPRG3; /* if from user, start at top of */\ + lwz r11,THREAD_INFO-THREAD(r11); /* this thread's kernel stack */\ + addi r11,r11,THREAD_SIZE; \ +1: subi r11,r11,INT_FRAME_SIZE; /* Allocate an exception frame */\ + stw r10,_CCR(r11); /* save various registers */\ + stw r12,GPR12(r11); \ + stw r9,GPR9(r11); \ + mflr r10; \ + stw r10,_LINK(r11); \ + mfspr r12,SPRN_DEAR; /* save DEAR and ESR in the frame */\ + stw r12,_DEAR(r11); /* since they may have had stuff */\ + mfspr r9,SPRN_ESR; /* in them at the point where the */\ + stw r9,_ESR(r11); /* exception was taken */\ + mfspr r12,exc_level_srr0; \ + stw r1,GPR1(r11); \ + mfspr r9,exc_level_srr1; \ + stw r1,0(r11); \ + mr r1,r11; \ + rlwinm r9,r9,0,14,12; /* clear MSR_WE (necessary?) */\ + stw r0,GPR0(r11); \ + SAVE_4GPRS(3, r11); \ + SAVE_2GPRS(7, r11) + +#define CRITICAL_EXCEPTION_PROLOG \ + EXC_LEVEL_EXCEPTION_PROLOG(CRIT, SPRN_CSRR0, SPRN_CSRR1) +#define DEBUG_EXCEPTION_PROLOG \ + EXC_LEVEL_EXCEPTION_PROLOG(DEBUG, SPRN_DSRR0, SPRN_DSRR1) +#define MCHECK_EXCEPTION_PROLOG \ + EXC_LEVEL_EXCEPTION_PROLOG(MCHECK, SPRN_MCSRR0, SPRN_MCSRR1) + +/* + * Exception vectors. + */ +#define START_EXCEPTION(label) \ + .align 5; \ +label: + +#define FINISH_EXCEPTION(func) \ + bl transfer_to_handler_full; \ + .long func; \ + .long ret_from_except_full + +#define EXCEPTION(n, label, hdlr, xfer) \ + START_EXCEPTION(label); \ + NORMAL_EXCEPTION_PROLOG; \ + addi r3,r1,STACK_FRAME_OVERHEAD; \ + xfer(n, hdlr) + +#define CRITICAL_EXCEPTION(n, label, hdlr) \ + START_EXCEPTION(label); \ + CRITICAL_EXCEPTION_PROLOG; \ + addi r3,r1,STACK_FRAME_OVERHEAD; \ + EXC_XFER_TEMPLATE(hdlr, n+2, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \ + NOCOPY, crit_transfer_to_handler, \ + ret_from_crit_exc) + +#define MCHECK_EXCEPTION(n, label, hdlr) \ + START_EXCEPTION(label); \ + MCHECK_EXCEPTION_PROLOG; \ + mfspr r5,SPRN_ESR; \ + stw r5,_ESR(r11); \ + addi r3,r1,STACK_FRAME_OVERHEAD; \ + EXC_XFER_TEMPLATE(hdlr, n+2, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \ + NOCOPY, mcheck_transfer_to_handler, \ + ret_from_mcheck_exc) + +#define EXC_XFER_TEMPLATE(hdlr, trap, msr, copyee, tfer, ret) \ + li r10,trap; \ + stw r10,_TRAP(r11); \ + lis r10,msr@h; \ + ori r10,r10,msr@l; \ + copyee(r10, r9); \ + bl tfer; \ + .long hdlr; \ + .long ret + +#define COPY_EE(d, s) rlwimi d,s,0,16,16 +#define NOCOPY(d, s) + +#define EXC_XFER_STD(n, hdlr) \ + EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, NOCOPY, transfer_to_handler_full, \ + ret_from_except_full) + +#define EXC_XFER_LITE(n, hdlr) \ + EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, NOCOPY, transfer_to_handler, \ + ret_from_except) + +#define EXC_XFER_EE(n, hdlr) \ + EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, COPY_EE, transfer_to_handler_full, \ + ret_from_except_full) + +#define EXC_XFER_EE_LITE(n, hdlr) \ + EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, COPY_EE, transfer_to_handler, \ + ret_from_except) + +/* Check for a single step debug exception while in an exception + * handler before state has been saved. This is to catch the case + * where an instruction that we are trying to single step causes + * an exception (eg ITLB/DTLB miss) and thus the first instruction of + * the exception handler generates a single step debug exception. + * + * If we get a debug trap on the first instruction of an exception handler, + * we reset the MSR_DE in the _exception handler's_ MSR (the debug trap is + * a critical exception, so we are using SPRN_CSRR1 to manipulate the MSR). + * The exception handler was handling a non-critical interrupt, so it will + * save (and later restore) the MSR via SPRN_CSRR1, which will still have + * the MSR_DE bit set. + */ +#ifdef CONFIG_E200 +#define DEBUG_EXCEPTION \ + START_EXCEPTION(Debug); \ + DEBUG_EXCEPTION_PROLOG; \ + \ + /* \ + * If there is a single step or branch-taken exception in an \ + * exception entry sequence, it was probably meant to apply to \ + * the code where the exception occurred (since exception entry \ + * doesn't turn off DE automatically). We simulate the effect \ + * of turning off DE on entry to an exception handler by turning \ + * off DE in the CSRR1 value and clearing the debug status. \ + */ \ + mfspr r10,SPRN_DBSR; /* check single-step/branch taken */ \ + andis. r10,r10,DBSR_IC@h; \ + beq+ 2f; \ + \ + lis r10,KERNELBASE@h; /* check if exception in vectors */ \ + ori r10,r10,KERNELBASE@l; \ + cmplw r12,r10; \ + blt+ 2f; /* addr below exception vectors */ \ + \ + lis r10,Debug@h; \ + ori r10,r10,Debug@l; \ + cmplw r12,r10; \ + bgt+ 2f; /* addr above exception vectors */ \ + \ + /* here it looks like we got an inappropriate debug exception. */ \ +1: rlwinm r9,r9,0,~MSR_DE; /* clear DE in the CDRR1 value */ \ + lis r10,DBSR_IC@h; /* clear the IC event */ \ + mtspr SPRN_DBSR,r10; \ + /* restore state and get out */ \ + lwz r10,_CCR(r11); \ + lwz r0,GPR0(r11); \ + lwz r1,GPR1(r11); \ + mtcrf 0x80,r10; \ + mtspr SPRN_DSRR0,r12; \ + mtspr SPRN_DSRR1,r9; \ + lwz r9,GPR9(r11); \ + lwz r12,GPR12(r11); \ + mtspr DEBUG_SPRG,r8; \ + BOOKE_LOAD_EXC_LEVEL_STACK(DEBUG); /* r8 points to the debug stack */ \ + lwz r10,GPR10-INT_FRAME_SIZE(r8); \ + lwz r11,GPR11-INT_FRAME_SIZE(r8); \ + mfspr r8,DEBUG_SPRG; \ + \ + RFDI; \ + b .; \ + \ + /* continue normal handling for a critical exception... */ \ +2: mfspr r4,SPRN_DBSR; \ + addi r3,r1,STACK_FRAME_OVERHEAD; \ + EXC_XFER_TEMPLATE(DebugException, 0x2002, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), NOCOPY, debug_transfer_to_handler, ret_from_debug_exc) +#else +#define DEBUG_EXCEPTION \ + START_EXCEPTION(Debug); \ + CRITICAL_EXCEPTION_PROLOG; \ + \ + /* \ + * If there is a single step or branch-taken exception in an \ + * exception entry sequence, it was probably meant to apply to \ + * the code where the exception occurred (since exception entry \ + * doesn't turn off DE automatically). We simulate the effect \ + * of turning off DE on entry to an exception handler by turning \ + * off DE in the CSRR1 value and clearing the debug status. \ + */ \ + mfspr r10,SPRN_DBSR; /* check single-step/branch taken */ \ + andis. r10,r10,DBSR_IC@h; \ + beq+ 2f; \ + \ + lis r10,KERNELBASE@h; /* check if exception in vectors */ \ + ori r10,r10,KERNELBASE@l; \ + cmplw r12,r10; \ + blt+ 2f; /* addr below exception vectors */ \ + \ + lis r10,Debug@h; \ + ori r10,r10,Debug@l; \ + cmplw r12,r10; \ + bgt+ 2f; /* addr above exception vectors */ \ + \ + /* here it looks like we got an inappropriate debug exception. */ \ +1: rlwinm r9,r9,0,~MSR_DE; /* clear DE in the CSRR1 value */ \ + lis r10,DBSR_IC@h; /* clear the IC event */ \ + mtspr SPRN_DBSR,r10; \ + /* restore state and get out */ \ + lwz r10,_CCR(r11); \ + lwz r0,GPR0(r11); \ + lwz r1,GPR1(r11); \ + mtcrf 0x80,r10; \ + mtspr SPRN_CSRR0,r12; \ + mtspr SPRN_CSRR1,r9; \ + lwz r9,GPR9(r11); \ + lwz r12,GPR12(r11); \ + mtspr CRIT_SPRG,r8; \ + BOOKE_LOAD_EXC_LEVEL_STACK(CRIT); /* r8 points to the debug stack */ \ + lwz r10,GPR10-INT_FRAME_SIZE(r8); \ + lwz r11,GPR11-INT_FRAME_SIZE(r8); \ + mfspr r8,CRIT_SPRG; \ + \ + rfci; \ + b .; \ + \ + /* continue normal handling for a critical exception... */ \ +2: mfspr r4,SPRN_DBSR; \ + addi r3,r1,STACK_FRAME_OVERHEAD; \ + EXC_XFER_TEMPLATE(DebugException, 0x2002, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), NOCOPY, crit_transfer_to_handler, ret_from_crit_exc) +#endif + +#define INSTRUCTION_STORAGE_EXCEPTION \ + START_EXCEPTION(InstructionStorage) \ + NORMAL_EXCEPTION_PROLOG; \ + mfspr r5,SPRN_ESR; /* Grab the ESR and save it */ \ + stw r5,_ESR(r11); \ + mr r4,r12; /* Pass SRR0 as arg2 */ \ + li r5,0; /* Pass zero as arg3 */ \ + EXC_XFER_EE_LITE(0x0400, handle_page_fault) + +#define ALIGNMENT_EXCEPTION \ + START_EXCEPTION(Alignment) \ + NORMAL_EXCEPTION_PROLOG; \ + mfspr r4,SPRN_DEAR; /* Grab the DEAR and save it */ \ + stw r4,_DEAR(r11); \ + addi r3,r1,STACK_FRAME_OVERHEAD; \ + EXC_XFER_EE(0x0600, alignment_exception) + +#define PROGRAM_EXCEPTION \ + START_EXCEPTION(Program) \ + NORMAL_EXCEPTION_PROLOG; \ + mfspr r4,SPRN_ESR; /* Grab the ESR and save it */ \ + stw r4,_ESR(r11); \ + addi r3,r1,STACK_FRAME_OVERHEAD; \ + EXC_XFER_STD(0x0700, program_check_exception) + +#define DECREMENTER_EXCEPTION \ + START_EXCEPTION(Decrementer) \ + NORMAL_EXCEPTION_PROLOG; \ + lis r0,TSR_DIS@h; /* Setup the DEC interrupt mask */ \ + mtspr SPRN_TSR,r0; /* Clear the DEC interrupt */ \ + addi r3,r1,STACK_FRAME_OVERHEAD; \ + EXC_XFER_LITE(0x0900, timer_interrupt) + +#define FP_UNAVAILABLE_EXCEPTION \ + START_EXCEPTION(FloatingPointUnavailable) \ + NORMAL_EXCEPTION_PROLOG; \ + bne load_up_fpu; /* if from user, just load it up */ \ + addi r3,r1,STACK_FRAME_OVERHEAD; \ + EXC_XFER_EE_LITE(0x800, kernel_fp_unavailable_exception) + +#endif /* __HEAD_BOOKE_H__ */ diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index 8d60fa99fc4..dd86bbed762 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -1,6 +1,4 @@ /* - * arch/ppc/kernel/head_fsl_booke.S - * * Kernel execution entry point code. * * Copyright (c) 1995-1996 Gary Thomas <gdt@linuxppc.org> @@ -316,6 +314,7 @@ skpinv: addi r6,r6,1 /* Increment */ */ lis r2,DBCR0_IDM@h mtspr SPRN_DBCR0,r2 + isync /* clear any residual debug events */ li r2,-1 mtspr SPRN_DBSR,r2 @@ -1002,12 +1001,15 @@ _GLOBAL(giveup_fpu) _GLOBAL(abort) li r13,0 mtspr SPRN_DBCR0,r13 /* disable all debug events */ + isync mfmsr r13 ori r13,r13,MSR_DE@l /* Enable Debug Events */ mtmsr r13 + isync mfspr r13,SPRN_DBCR0 lis r13,(DBCR0_IDM|DBCR0_RST_CHIP)@h mtspr SPRN_DBCR0,r13 + isync _GLOBAL(set_context) diff --git a/arch/powerpc/kernel/idle_64.c b/arch/powerpc/kernel/idle.c index b879d3057ef..e9f321d74d8 100644 --- a/arch/powerpc/kernel/idle_64.c +++ b/arch/powerpc/kernel/idle.c @@ -2,13 +2,17 @@ * Idle daemon for PowerPC. Idle daemon will handle any action * that needs to be taken when the system becomes idle. * - * Originally Written by Cort Dougan (cort@cs.nmt.edu) + * Originally written by Cort Dougan (cort@cs.nmt.edu). + * Subsequent 32-bit hacking by Tom Rini, Armin Kuster, + * Paul Mackerras and others. * * iSeries supported added by Mike Corrigan <mikejc@us.ibm.com> * * Additional shared processor, SMT, and firmware support * Copyright (c) 2003 Dave Engebretsen <engebret@us.ibm.com> * + * 32-bit and 64-bit versions merged by Paul Mackerras <paulus@samba.org> + * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version @@ -29,18 +33,43 @@ #include <asm/machdep.h> #include <asm/smp.h> -extern void power4_idle(void); +#ifdef CONFIG_HOTPLUG_CPU +#define cpu_should_die() (cpu_is_offline(smp_processor_id()) && \ + system_state == SYSTEM_RUNNING) +#else +#define cpu_should_die() 0 +#endif -void default_idle(void) +/* + * The body of the idle task. + */ +void cpu_idle(void) { - unsigned int cpu = smp_processor_id(); - set_thread_flag(TIF_POLLING_NRFLAG); + if (ppc_md.idle_loop) + ppc_md.idle_loop(); /* doesn't return */ + set_thread_flag(TIF_POLLING_NRFLAG); while (1) { - if (!need_resched()) { - while (!need_resched() && !cpu_is_offline(cpu)) { - ppc64_runlatch_off(); + ppc64_runlatch_off(); + while (!need_resched() && !cpu_should_die()) { + if (ppc_md.power_save) { + clear_thread_flag(TIF_POLLING_NRFLAG); + /* + * smp_mb is so clearing of TIF_POLLING_NRFLAG + * is ordered w.r.t. need_resched() test. + */ + smp_mb(); + local_irq_disable(); + + /* check again after disabling irqs */ + if (!need_resched() && !cpu_should_die()) + ppc_md.power_save(); + + local_irq_enable(); + set_thread_flag(TIF_POLLING_NRFLAG); + + } else { /* * Go into low thread priority and possibly * low power mode. @@ -48,46 +77,18 @@ void default_idle(void) HMT_low(); HMT_very_low(); } - - HMT_medium(); } + HMT_medium(); ppc64_runlatch_on(); + if (cpu_should_die()) + cpu_die(); preempt_enable_no_resched(); schedule(); preempt_disable(); - if (cpu_is_offline(cpu) && system_state == SYSTEM_RUNNING) - cpu_die(); } } -void native_idle(void) -{ - while (1) { - ppc64_runlatch_off(); - - if (!need_resched()) - power4_idle(); - - if (need_resched()) { - ppc64_runlatch_on(); - preempt_enable_no_resched(); - schedule(); - preempt_disable(); - } - - if (cpu_is_offline(smp_processor_id()) && - system_state == SYSTEM_RUNNING) - cpu_die(); - } -} - -void cpu_idle(void) -{ - BUG_ON(NULL == ppc_md.idle_loop); - ppc_md.idle_loop(); -} - int powersave_nap; #ifdef CONFIG_SYSCTL diff --git a/arch/powerpc/kernel/idle_6xx.S b/arch/powerpc/kernel/idle_6xx.S index 444fdcc769f..12a4efbaa08 100644 --- a/arch/powerpc/kernel/idle_6xx.S +++ b/arch/powerpc/kernel/idle_6xx.S @@ -87,19 +87,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP) cmpwi 0,r3,0 beqlr - /* Clear MSR:EE */ - mfmsr r7 - rlwinm r0,r7,0,17,15 - mtmsr r0 - - /* Check current_thread_info()->flags */ - rlwinm r4,r1,0,0,18 - lwz r4,TI_FLAGS(r4) - andi. r0,r4,_TIF_NEED_RESCHED - beq 1f - mtmsr r7 /* out of line this ? */ - blr -1: /* Some pre-nap cleanups needed on some CPUs */ andis. r0,r3,HID0_NAP@h beq 2f @@ -157,7 +144,8 @@ BEGIN_FTR_SECTION DSSALL sync END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) - ori r7,r7,MSR_EE /* Could be ommited (already set) */ + mfmsr r7 + ori r7,r7,MSR_EE oris r7,r7,MSR_POW@h sync isync @@ -220,8 +208,6 @@ _GLOBAL(nap_save_msscr0) _GLOBAL(nap_save_hid1) .space 4*NR_CPUS -_GLOBAL(powersave_nap) - .long 0 _GLOBAL(powersave_lowspeed) .long 0 diff --git a/arch/powerpc/kernel/idle_power4.S b/arch/powerpc/kernel/idle_power4.S index c16b4afab58..6dad1c02496 100644 --- a/arch/powerpc/kernel/idle_power4.S +++ b/arch/powerpc/kernel/idle_power4.S @@ -1,11 +1,5 @@ /* - * This file contains the power_save function for 6xx & 7xxx CPUs - * rewritten in assembler - * - * Warning ! This code assumes that if your machine has a 750fx - * it will have PLL 1 set to low speed mode (used during NAP/DOZE). - * if this is not the case some additional changes will have to - * be done to check a runtime var (a bit like powersave-nap) + * This file contains the power_save function for 970-family CPUs. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -26,49 +20,23 @@ .text -/* - * Here is the power_save_6xx function. This could eventually be - * split into several functions & changing the function pointer - * depending on the various features. - */ _GLOBAL(power4_idle) BEGIN_FTR_SECTION blr END_FTR_SECTION_IFCLR(CPU_FTR_CAN_NAP) - /* We must dynamically check for the NAP feature as it - * can be cleared by CPU init after the fixups are done - */ - LOAD_REG_ADDRBASE(r3,cur_cpu_spec) - ld r4,ADDROFF(cur_cpu_spec)(r3) - ld r4,CPU_SPEC_FEATURES(r4) - andi. r0,r4,CPU_FTR_CAN_NAP - beqlr /* Now check if user or arch enabled NAP mode */ LOAD_REG_ADDRBASE(r3,powersave_nap) lwz r4,ADDROFF(powersave_nap)(r3) cmpwi 0,r4,0 beqlr - /* Clear MSR:EE */ - mfmsr r7 - li r4,0 - ori r4,r4,MSR_EE - andc r0,r7,r4 - mtmsrd r0 - - /* Check current_thread_info()->flags */ - clrrdi r4,r1,THREAD_SHIFT - ld r4,TI_FLAGS(r4) - andi. r0,r4,_TIF_NEED_RESCHED - beq 1f - mtmsrd r7 /* out of line this ? */ - blr -1: /* Go to NAP now */ BEGIN_FTR_SECTION DSSALL sync END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) + mfmsr r7 + ori r7,r7,MSR_EE oris r7,r7,MSR_POW@h sync isync diff --git a/arch/powerpc/kernel/iomap.c b/arch/powerpc/kernel/iomap.c index 6160c8dbb7c..fd8214caede 100644 --- a/arch/powerpc/kernel/iomap.c +++ b/arch/powerpc/kernel/iomap.c @@ -1,6 +1,4 @@ /* - * arch/ppc64/kernel/iomap.c - * * ppc64 "iomap" interface implementation. * * (C) Copyright 2004 Linus Torvalds diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index 946f3219fd2..d9a7fdef59b 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -1,5 +1,4 @@ /* - * arch/ppc64/kernel/iommu.c * Copyright (C) 2001 Mike Corrigan & Dave Engebretsen, IBM Corporation * * Rewrite, cleanup, new allocation schemes, virtual merging: diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index d1fffce86df..bb5c9501234 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -1,6 +1,4 @@ /* - * arch/ppc/kernel/irq.c - * * Derived from arch/i386/kernel/irq.c * Copyright (C) 1992 Linus Torvalds * Adapted from arch/i386 by Gary Thomas @@ -137,9 +135,8 @@ skip: #ifdef CONFIG_TAU_INT if (tau_initialized){ seq_puts(p, "TAU: "); - for (j = 0; j < NR_CPUS; j++) - if (cpu_online(j)) - seq_printf(p, "%10u ", tau_interrupts(j)); + for_each_online_cpu(j) + seq_printf(p, "%10u ", tau_interrupts(j)); seq_puts(p, " PowerPC Thermal Assist (cpu temp)\n"); } #endif @@ -371,6 +368,7 @@ unsigned int real_irq_to_virt_slowpath(unsigned int real_irq) return NO_IRQ; } +#endif /* CONFIG_PPC64 */ #ifdef CONFIG_IRQSTACKS struct thread_info *softirq_ctx[NR_CPUS]; @@ -381,7 +379,7 @@ void irq_ctx_init(void) struct thread_info *tp; int i; - for_each_cpu(i) { + for_each_possible_cpu(i) { memset((void *)softirq_ctx[i], 0, THREAD_SIZE); tp = softirq_ctx[i]; tp->cpu = i; @@ -394,10 +392,24 @@ void irq_ctx_init(void) } } +static inline void do_softirq_onstack(void) +{ + struct thread_info *curtp, *irqtp; + + curtp = current_thread_info(); + irqtp = softirq_ctx[smp_processor_id()]; + irqtp->task = curtp->task; + call_do_softirq(irqtp); + irqtp->task = NULL; +} + +#else +#define do_softirq_onstack() __do_softirq() +#endif /* CONFIG_IRQSTACKS */ + void do_softirq(void) { unsigned long flags; - struct thread_info *curtp, *irqtp; if (in_interrupt()) return; @@ -405,19 +417,18 @@ void do_softirq(void) local_irq_save(flags); if (local_softirq_pending()) { - curtp = current_thread_info(); - irqtp = softirq_ctx[smp_processor_id()]; - irqtp->task = curtp->task; - call_do_softirq(irqtp); - irqtp->task = NULL; + account_system_vtime(current); + local_bh_disable(); + do_softirq_onstack(); + account_system_vtime(current); + __local_bh_enable(); } local_irq_restore(flags); } EXPORT_SYMBOL(do_softirq); -#endif /* CONFIG_IRQSTACKS */ - +#ifdef CONFIG_PPC64 static int __init setup_noirqdistrib(char *str) { distribute_irqs = 0; diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index cfab48566db..ad7a9021220 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -1,6 +1,5 @@ /* * Kernel Probes (KProbes) - * arch/ppc64/kernel/kprobes.c * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -31,9 +30,11 @@ #include <linux/kprobes.h> #include <linux/ptrace.h> #include <linux/preempt.h> +#include <linux/module.h> #include <asm/cacheflush.h> #include <asm/kdebug.h> #include <asm/sstep.h> +#include <asm/uaccess.h> DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); @@ -82,9 +83,9 @@ void __kprobes arch_disarm_kprobe(struct kprobe *p) void __kprobes arch_remove_kprobe(struct kprobe *p) { - down(&kprobe_mutex); + mutex_lock(&kprobe_mutex); free_insn_slot(p->ainsn.insn); - up(&kprobe_mutex); + mutex_unlock(&kprobe_mutex); } static inline void prepare_singlestep(struct kprobe *p, struct pt_regs *regs) @@ -373,17 +374,62 @@ static inline int kprobe_fault_handler(struct pt_regs *regs, int trapnr) { struct kprobe *cur = kprobe_running(); struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); - - if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr)) - return 1; - - if (kcb->kprobe_status & KPROBE_HIT_SS) { - resume_execution(cur, regs); + const struct exception_table_entry *entry; + + switch(kcb->kprobe_status) { + case KPROBE_HIT_SS: + case KPROBE_REENTER: + /* + * We are here because the instruction being single + * stepped caused a page fault. We reset the current + * kprobe and the nip points back to the probe address + * and allow the page fault handler to continue as a + * normal page fault. + */ + regs->nip = (unsigned long)cur->addr; regs->msr &= ~MSR_SE; regs->msr |= kcb->kprobe_saved_msr; - - reset_current_kprobe(); + if (kcb->kprobe_status == KPROBE_REENTER) + restore_previous_kprobe(kcb); + else + reset_current_kprobe(); preempt_enable_no_resched(); + break; + case KPROBE_HIT_ACTIVE: + case KPROBE_HIT_SSDONE: + /* + * We increment the nmissed count for accounting, + * we can also use npre/npostfault count for accouting + * these specific fault cases. + */ + kprobes_inc_nmissed_count(cur); + + /* + * We come here because instructions in the pre/post + * handler caused the page_fault, this could happen + * if handler tries to access user space by + * copy_from_user(), get_user() etc. Let the + * user-specified handler try to fix it first. + */ + if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr)) + return 1; + + /* + * In case the user-specified fault handler returned + * zero, try to fix up. + */ + if ((entry = search_exception_tables(regs->nip)) != NULL) { + regs->nip = entry->fixup; + return 1; + } + + /* + * fixup_exception() could not handle it, + * Let do_page_fault() fix it. + */ + break; + default: + break; } return 0; } @@ -397,6 +443,9 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self, struct die_args *args = (struct die_args *)data; int ret = NOTIFY_DONE; + if (args->regs && user_mode(args->regs)) + return ret; + switch (val) { case DIE_BPT: if (kprobe_handler(args->regs)) diff --git a/arch/powerpc/kernel/l2cr_6xx.S b/arch/powerpc/kernel/l2cr_6xx.S new file mode 100644 index 00000000000..d7f4e982b53 --- /dev/null +++ b/arch/powerpc/kernel/l2cr_6xx.S @@ -0,0 +1,471 @@ +/* + L2CR functions + Copyright © 1997-1998 by PowerLogix R & D, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +*/ +/* + Thur, Dec. 12, 1998. + - First public release, contributed by PowerLogix. + *********** + Sat, Aug. 7, 1999. + - Terry: Made sure code disabled interrupts before running. (Previously + it was assumed interrupts were already disabled). + - Terry: Updated for tentative G4 support. 4MB of memory is now flushed + instead of 2MB. (Prob. only 3 is necessary). + - Terry: Updated for workaround to HID0[DPM] processor bug + during global invalidates. + *********** + Thu, July 13, 2000. + - Terry: Added isync to correct for an errata. + + 22 August 2001. + - DanM: Finally added the 7450 patch I've had for the past + several months. The L2CR is similar, but I'm going + to assume the user of this functions knows what they + are doing. + + Author: Terry Greeniaus (tgree@phys.ualberta.ca) + Please e-mail updates to this file to me, thanks! +*/ +#include <linux/config.h> +#include <asm/processor.h> +#include <asm/cputable.h> +#include <asm/ppc_asm.h> +#include <asm/cache.h> +#include <asm/page.h> + +/* Usage: + + When setting the L2CR register, you must do a few special + things. If you are enabling the cache, you must perform a + global invalidate. If you are disabling the cache, you must + flush the cache contents first. This routine takes care of + doing these things. When first enabling the cache, make sure + you pass in the L2CR you want, as well as passing in the + global invalidate bit set. A global invalidate will only be + performed if the L2I bit is set in applyThis. When enabling + the cache, you should also set the L2E bit in applyThis. If + you want to modify the L2CR contents after the cache has been + enabled, the recommended procedure is to first call + __setL2CR(0) to disable the cache and then call it again with + the new values for L2CR. Examples: + + _setL2CR(0) - disables the cache + _setL2CR(0xB3A04000) - enables my G3 upgrade card: + - L2E set to turn on the cache + - L2SIZ set to 1MB + - L2CLK set to 1:1 + - L2RAM set to pipelined synchronous late-write + - L2I set to perform a global invalidation + - L2OH set to 0.5 nS + - L2DF set because this upgrade card + requires it + + A similar call should work for your card. You need to know + the correct setting for your card and then place them in the + fields I have outlined above. Other fields support optional + features, such as L2DO which caches only data, or L2TS which + causes cache pushes from the L1 cache to go to the L2 cache + instead of to main memory. + +IMPORTANT: + Starting with the 7450, the bits in this register have moved + or behave differently. The Enable, Parity Enable, Size, + and L2 Invalidate are the only bits that have not moved. + The size is read-only for these processors with internal L2 + cache, and the invalidate is a control as well as status. + -- Dan + +*/ +/* + * Summary: this procedure ignores the L2I bit in the value passed in, + * flushes the cache if it was already enabled, always invalidates the + * cache, then enables the cache if the L2E bit is set in the value + * passed in. + * -- paulus. + */ +_GLOBAL(_set_L2CR) + /* Make sure this is a 750 or 7400 chip */ +BEGIN_FTR_SECTION + li r3,-1 + blr +END_FTR_SECTION_IFCLR(CPU_FTR_L2CR) + + mflr r9 + + /* Stop DST streams */ +BEGIN_FTR_SECTION + DSSALL + sync +END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) + + /* Turn off interrupts and data relocation. */ + mfmsr r7 /* Save MSR in r7 */ + rlwinm r4,r7,0,17,15 + rlwinm r4,r4,0,28,26 /* Turn off DR bit */ + sync + mtmsr r4 + isync + + /* Before we perform the global invalidation, we must disable dynamic + * power management via HID0[DPM] to work around a processor bug where + * DPM can possibly interfere with the state machine in the processor + * that invalidates the L2 cache tags. + */ + mfspr r8,SPRN_HID0 /* Save HID0 in r8 */ + rlwinm r4,r8,0,12,10 /* Turn off HID0[DPM] */ + sync + mtspr SPRN_HID0,r4 /* Disable DPM */ + sync + + /* Get the current enable bit of the L2CR into r4 */ + mfspr r4,SPRN_L2CR + + /* Tweak some bits */ + rlwinm r5,r3,0,0,0 /* r5 contains the new enable bit */ + rlwinm r3,r3,0,11,9 /* Turn off the invalidate bit */ + rlwinm r3,r3,0,1,31 /* Turn off the enable bit */ + + /* Check to see if we need to flush */ + rlwinm. r4,r4,0,0,0 + beq 2f + + /* Flush the cache. First, read the first 4MB of memory (physical) to + * put new data in the cache. (Actually we only need + * the size of the L2 cache plus the size of the L1 cache, but 4MB will + * cover everything just to be safe). + */ + + /**** Might be a good idea to set L2DO here - to prevent instructions + from getting into the cache. But since we invalidate + the next time we enable the cache it doesn't really matter. + Don't do this unless you accomodate all processor variations. + The bit moved on the 7450..... + ****/ + +BEGIN_FTR_SECTION + /* Disable L2 prefetch on some 745x and try to ensure + * L2 prefetch engines are idle. As explained by errata + * text, we can't be sure they are, we just hope very hard + * that well be enough (sic !). At least I noticed Apple + * doesn't even bother doing the dcbf's here... + */ + mfspr r4,SPRN_MSSCR0 + rlwinm r4,r4,0,0,29 + sync + mtspr SPRN_MSSCR0,r4 + sync + isync + lis r4,KERNELBASE@h + dcbf 0,r4 + dcbf 0,r4 + dcbf 0,r4 + dcbf 0,r4 +END_FTR_SECTION_IFSET(CPU_FTR_SPEC7450) + + /* TODO: use HW flush assist when available */ + + lis r4,0x0002 + mtctr r4 + li r4,0 +1: + lwzx r0,r0,r4 + addi r4,r4,32 /* Go to start of next cache line */ + bdnz 1b + isync + + /* Now, flush the first 4MB of memory */ + lis r4,0x0002 + mtctr r4 + li r4,0 + sync +1: + dcbf 0,r4 + addi r4,r4,32 /* Go to start of next cache line */ + bdnz 1b + +2: + /* Set up the L2CR configuration bits (and switch L2 off) */ + /* CPU errata: Make sure the mtspr below is already in the + * L1 icache + */ + b 20f + .balign L1_CACHE_BYTES +22: + sync + mtspr SPRN_L2CR,r3 + sync + b 23f +20: + b 21f +21: sync + isync + b 22b + +23: + /* Perform a global invalidation */ + oris r3,r3,0x0020 + sync + mtspr SPRN_L2CR,r3 + sync + isync /* For errata */ + +BEGIN_FTR_SECTION + /* On the 7450, we wait for the L2I bit to clear...... + */ +10: mfspr r3,SPRN_L2CR + andis. r4,r3,0x0020 + bne 10b + b 11f +END_FTR_SECTION_IFSET(CPU_FTR_SPEC7450) + + /* Wait for the invalidation to complete */ +3: mfspr r3,SPRN_L2CR + rlwinm. r4,r3,0,31,31 + bne 3b + +11: rlwinm r3,r3,0,11,9 /* Turn off the L2I bit */ + sync + mtspr SPRN_L2CR,r3 + sync + + /* See if we need to enable the cache */ + cmplwi r5,0 + beq 4f + + /* Enable the cache */ + oris r3,r3,0x8000 + mtspr SPRN_L2CR,r3 + sync + + /* Enable L2 HW prefetch on 744x/745x */ +BEGIN_FTR_SECTION + mfspr r3,SPRN_MSSCR0 + ori r3,r3,3 + sync + mtspr SPRN_MSSCR0,r3 + sync + isync +END_FTR_SECTION_IFSET(CPU_FTR_SPEC7450) +4: + + /* Restore HID0[DPM] to whatever it was before */ + sync + mtspr 1008,r8 + sync + + /* Restore MSR (restores EE and DR bits to original state) */ + SYNC + mtmsr r7 + isync + + mtlr r9 + blr + +_GLOBAL(_get_L2CR) + /* Return the L2CR contents */ + li r3,0 +BEGIN_FTR_SECTION + mfspr r3,SPRN_L2CR +END_FTR_SECTION_IFSET(CPU_FTR_L2CR) + blr + + +/* + * Here is a similar routine for dealing with the L3 cache + * on the 745x family of chips + */ + +_GLOBAL(_set_L3CR) + /* Make sure this is a 745x chip */ +BEGIN_FTR_SECTION + li r3,-1 + blr +END_FTR_SECTION_IFCLR(CPU_FTR_L3CR) + + /* Turn off interrupts and data relocation. */ + mfmsr r7 /* Save MSR in r7 */ + rlwinm r4,r7,0,17,15 + rlwinm r4,r4,0,28,26 /* Turn off DR bit */ + sync + mtmsr r4 + isync + + /* Stop DST streams */ + DSSALL + sync + + /* Get the current enable bit of the L3CR into r4 */ + mfspr r4,SPRN_L3CR + + /* Tweak some bits */ + rlwinm r5,r3,0,0,0 /* r5 contains the new enable bit */ + rlwinm r3,r3,0,22,20 /* Turn off the invalidate bit */ + rlwinm r3,r3,0,2,31 /* Turn off the enable & PE bits */ + rlwinm r3,r3,0,5,3 /* Turn off the clken bit */ + /* Check to see if we need to flush */ + rlwinm. r4,r4,0,0,0 + beq 2f + + /* Flush the cache. + */ + + /* TODO: use HW flush assist */ + + lis r4,0x0008 + mtctr r4 + li r4,0 +1: + lwzx r0,r0,r4 + dcbf 0,r4 + addi r4,r4,32 /* Go to start of next cache line */ + bdnz 1b + +2: + /* Set up the L3CR configuration bits (and switch L3 off) */ + sync + mtspr SPRN_L3CR,r3 + sync + + oris r3,r3,L3CR_L3RES@h /* Set reserved bit 5 */ + mtspr SPRN_L3CR,r3 + sync + oris r3,r3,L3CR_L3CLKEN@h /* Set clken */ + mtspr SPRN_L3CR,r3 + sync + + /* Wait for stabilize */ + li r0,256 + mtctr r0 +1: bdnz 1b + + /* Perform a global invalidation */ + ori r3,r3,0x0400 + sync + mtspr SPRN_L3CR,r3 + sync + isync + + /* We wait for the L3I bit to clear...... */ +10: mfspr r3,SPRN_L3CR + andi. r4,r3,0x0400 + bne 10b + + /* Clear CLKEN */ + rlwinm r3,r3,0,5,3 /* Turn off the clken bit */ + mtspr SPRN_L3CR,r3 + sync + + /* Wait for stabilize */ + li r0,256 + mtctr r0 +1: bdnz 1b + + /* See if we need to enable the cache */ + cmplwi r5,0 + beq 4f + + /* Enable the cache */ + oris r3,r3,(L3CR_L3E | L3CR_L3CLKEN)@h + mtspr SPRN_L3CR,r3 + sync + + /* Wait for stabilize */ + li r0,256 + mtctr r0 +1: bdnz 1b + + /* Restore MSR (restores EE and DR bits to original state) */ +4: SYNC + mtmsr r7 + isync + blr + +_GLOBAL(_get_L3CR) + /* Return the L3CR contents */ + li r3,0 +BEGIN_FTR_SECTION + mfspr r3,SPRN_L3CR +END_FTR_SECTION_IFSET(CPU_FTR_L3CR) + blr + +/* --- End of PowerLogix code --- + */ + + +/* flush_disable_L1() - Flush and disable L1 cache + * + * clobbers r0, r3, ctr, cr0 + * Must be called with interrupts disabled and MMU enabled. + */ +_GLOBAL(__flush_disable_L1) + /* Stop pending alitvec streams and memory accesses */ +BEGIN_FTR_SECTION + DSSALL +END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) + sync + + /* Load counter to 0x4000 cache lines (512k) and + * load cache with datas + */ + li r3,0x4000 /* 512kB / 32B */ + mtctr r3 + lis r3,KERNELBASE@h +1: + lwz r0,0(r3) + addi r3,r3,0x0020 /* Go to start of next cache line */ + bdnz 1b + isync + sync + + /* Now flush those cache lines */ + li r3,0x4000 /* 512kB / 32B */ + mtctr r3 + lis r3,KERNELBASE@h +1: + dcbf 0,r3 + addi r3,r3,0x0020 /* Go to start of next cache line */ + bdnz 1b + sync + + /* We can now disable the L1 cache (HID0:DCE, HID0:ICE) */ + mfspr r3,SPRN_HID0 + rlwinm r3,r3,0,18,15 + mtspr SPRN_HID0,r3 + sync + isync + blr + +/* inval_enable_L1 - Invalidate and enable L1 cache + * + * Assumes L1 is already disabled and MSR:EE is off + * + * clobbers r3 + */ +_GLOBAL(__inval_enable_L1) + /* Enable and then Flash inval the instruction & data cache */ + mfspr r3,SPRN_HID0 + ori r3,r3, HID0_ICE|HID0_ICFI|HID0_DCE|HID0_DCI + sync + isync + mtspr SPRN_HID0,r3 + xori r3,r3, HID0_ICFI|HID0_DCI + mtspr SPRN_HID0,r3 + sync + + blr + + diff --git a/arch/powerpc/kernel/legacy_serial.c b/arch/powerpc/kernel/legacy_serial.c index c7a799a0951..6e67b5b49ba 100644 --- a/arch/powerpc/kernel/legacy_serial.c +++ b/arch/powerpc/kernel/legacy_serial.c @@ -37,7 +37,7 @@ static int legacy_serial_console = -1; static int __init add_legacy_port(struct device_node *np, int want_index, int iotype, phys_addr_t base, phys_addr_t taddr, unsigned long irq, - unsigned int flags) + upf_t flags) { u32 *clk, *spd, clock = BASE_BAUD * 16; int index; @@ -113,7 +113,7 @@ static int __init add_legacy_soc_port(struct device_node *np, { phys_addr_t addr; u32 *addrp; - unsigned int flags = UPF_BOOT_AUTOCONF | UPF_SKIP_TEST | UPF_SHARE_IRQ; + upf_t flags = UPF_BOOT_AUTOCONF | UPF_SKIP_TEST | UPF_SHARE_IRQ; /* We only support ports that have a clock frequency properly * encoded in the device-tree. @@ -236,6 +236,23 @@ static int __init add_legacy_pci_port(struct device_node *np, } #endif +static void __init setup_legacy_serial_console(int console) +{ + struct legacy_serial_info *info = + &legacy_serial_infos[console]; + void __iomem *addr; + + if (info->taddr == 0) + return; + addr = ioremap(info->taddr, 0x1000); + if (addr == NULL) + return; + if (info->speed == 0) + info->speed = udbg_probe_uart_speed(addr, info->clock); + DBG("default console speed = %d\n", info->speed); + udbg_init_uart(addr, info->speed, info->clock); +} + /* * This is called very early, as part of setup_system() or eventually * setup_arch(), basically before anything else in this file. This function @@ -318,25 +335,8 @@ void __init find_legacy_serial_ports(void) #endif DBG("legacy_serial_console = %d\n", legacy_serial_console); - - /* udbg is 64 bits only for now, that will change soon though ... */ - while (legacy_serial_console >= 0) { - struct legacy_serial_info *info = - &legacy_serial_infos[legacy_serial_console]; - void __iomem *addr; - - if (info->taddr == 0) - break; - addr = ioremap(info->taddr, 0x1000); - if (addr == NULL) - break; - if (info->speed == 0) - info->speed = udbg_probe_uart_speed(addr, info->clock); - DBG("default console speed = %d\n", info->speed); - udbg_init_uart(addr, info->speed, info->clock); - break; - } - + if (legacy_serial_console >= 0) + setup_legacy_serial_console(legacy_serial_console); DBG(" <- find_legacy_serial_port()\n"); } diff --git a/arch/powerpc/kernel/lparcfg.c b/arch/powerpc/kernel/lparcfg.c index e789fef4eb8..1b73508ecb2 100644 --- a/arch/powerpc/kernel/lparcfg.c +++ b/arch/powerpc/kernel/lparcfg.c @@ -56,7 +56,7 @@ static unsigned long get_purr(void) unsigned long sum_purr = 0; int cpu; - for_each_cpu(cpu) { + for_each_possible_cpu(cpu) { sum_purr += lppaca[cpu].emulated_time_base; #ifdef PURR_DEBUG @@ -222,7 +222,7 @@ static unsigned long get_purr(void) int cpu; struct cpu_usage *cu; - for_each_cpu(cpu) { + for_each_possible_cpu(cpu) { cu = &per_cpu(cpu_usage_array, cpu); sum_purr += cu->current_tb; } diff --git a/arch/powerpc/kernel/module_32.c b/arch/powerpc/kernel/module_32.c new file mode 100644 index 00000000000..92f4e5f64f0 --- /dev/null +++ b/arch/powerpc/kernel/module_32.c @@ -0,0 +1,320 @@ +/* Kernel module help for PPC. + Copyright (C) 2001 Rusty Russell. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +#include <linux/module.h> +#include <linux/moduleloader.h> +#include <linux/elf.h> +#include <linux/vmalloc.h> +#include <linux/fs.h> +#include <linux/string.h> +#include <linux/kernel.h> +#include <linux/cache.h> + +#if 0 +#define DEBUGP printk +#else +#define DEBUGP(fmt , ...) +#endif + +LIST_HEAD(module_bug_list); + +void *module_alloc(unsigned long size) +{ + if (size == 0) + return NULL; + return vmalloc(size); +} + +/* Free memory returned from module_alloc */ +void module_free(struct module *mod, void *module_region) +{ + vfree(module_region); + /* FIXME: If module_region == mod->init_region, trim exception + table entries. */ +} + +/* Count how many different relocations (different symbol, different + addend) */ +static unsigned int count_relocs(const Elf32_Rela *rela, unsigned int num) +{ + unsigned int i, j, ret = 0; + + /* Sure, this is order(n^2), but it's usually short, and not + time critical */ + for (i = 0; i < num; i++) { + for (j = 0; j < i; j++) { + /* If this addend appeared before, it's + already been counted */ + if (ELF32_R_SYM(rela[i].r_info) + == ELF32_R_SYM(rela[j].r_info) + && rela[i].r_addend == rela[j].r_addend) + break; + } + if (j == i) ret++; + } + return ret; +} + +/* Get the potential trampolines size required of the init and + non-init sections */ +static unsigned long get_plt_size(const Elf32_Ehdr *hdr, + const Elf32_Shdr *sechdrs, + const char *secstrings, + int is_init) +{ + unsigned long ret = 0; + unsigned i; + + /* Everything marked ALLOC (this includes the exported + symbols) */ + for (i = 1; i < hdr->e_shnum; i++) { + /* If it's called *.init*, and we're not init, we're + not interested */ + if ((strstr(secstrings + sechdrs[i].sh_name, ".init") != 0) + != is_init) + continue; + + /* We don't want to look at debug sections. */ + if (strstr(secstrings + sechdrs[i].sh_name, ".debug") != 0) + continue; + + if (sechdrs[i].sh_type == SHT_RELA) { + DEBUGP("Found relocations in section %u\n", i); + DEBUGP("Ptr: %p. Number: %u\n", + (void *)hdr + sechdrs[i].sh_offset, + sechdrs[i].sh_size / sizeof(Elf32_Rela)); + ret += count_relocs((void *)hdr + + sechdrs[i].sh_offset, + sechdrs[i].sh_size + / sizeof(Elf32_Rela)) + * sizeof(struct ppc_plt_entry); + } + } + + return ret; +} + +int module_frob_arch_sections(Elf32_Ehdr *hdr, + Elf32_Shdr *sechdrs, + char *secstrings, + struct module *me) +{ + unsigned int i; + + /* Find .plt and .init.plt sections */ + for (i = 0; i < hdr->e_shnum; i++) { + if (strcmp(secstrings + sechdrs[i].sh_name, ".init.plt") == 0) + me->arch.init_plt_section = i; + else if (strcmp(secstrings + sechdrs[i].sh_name, ".plt") == 0) + me->arch.core_plt_section = i; + } + if (!me->arch.core_plt_section || !me->arch.init_plt_section) { + printk("Module doesn't contain .plt or .init.plt sections.\n"); + return -ENOEXEC; + } + + /* Override their sizes */ + sechdrs[me->arch.core_plt_section].sh_size + = get_plt_size(hdr, sechdrs, secstrings, 0); + sechdrs[me->arch.init_plt_section].sh_size + = get_plt_size(hdr, sechdrs, secstrings, 1); + return 0; +} + +int apply_relocate(Elf32_Shdr *sechdrs, + const char *strtab, + unsigned int symindex, + unsigned int relsec, + struct module *module) +{ + printk(KERN_ERR "%s: Non-ADD RELOCATION unsupported\n", + module->name); + return -ENOEXEC; +} + +static inline int entry_matches(struct ppc_plt_entry *entry, Elf32_Addr val) +{ + if (entry->jump[0] == 0x3d600000 + ((val + 0x8000) >> 16) + && entry->jump[1] == 0x396b0000 + (val & 0xffff)) + return 1; + return 0; +} + +/* Set up a trampoline in the PLT to bounce us to the distant function */ +static uint32_t do_plt_call(void *location, + Elf32_Addr val, + Elf32_Shdr *sechdrs, + struct module *mod) +{ + struct ppc_plt_entry *entry; + + DEBUGP("Doing plt for call to 0x%x at 0x%x\n", val, (unsigned int)location); + /* Init, or core PLT? */ + if (location >= mod->module_core + && location < mod->module_core + mod->core_size) + entry = (void *)sechdrs[mod->arch.core_plt_section].sh_addr; + else + entry = (void *)sechdrs[mod->arch.init_plt_section].sh_addr; + + /* Find this entry, or if that fails, the next avail. entry */ + while (entry->jump[0]) { + if (entry_matches(entry, val)) return (uint32_t)entry; + entry++; + } + + /* Stolen from Paul Mackerras as well... */ + entry->jump[0] = 0x3d600000+((val+0x8000)>>16); /* lis r11,sym@ha */ + entry->jump[1] = 0x396b0000 + (val&0xffff); /* addi r11,r11,sym@l*/ + entry->jump[2] = 0x7d6903a6; /* mtctr r11 */ + entry->jump[3] = 0x4e800420; /* bctr */ + + DEBUGP("Initialized plt for 0x%x at %p\n", val, entry); + return (uint32_t)entry; +} + +int apply_relocate_add(Elf32_Shdr *sechdrs, + const char *strtab, + unsigned int symindex, + unsigned int relsec, + struct module *module) +{ + unsigned int i; + Elf32_Rela *rela = (void *)sechdrs[relsec].sh_addr; + Elf32_Sym *sym; + uint32_t *location; + uint32_t value; + + DEBUGP("Applying ADD relocate section %u to %u\n", relsec, + sechdrs[relsec].sh_info); + for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rela); i++) { + /* This is where to make the change */ + location = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr + + rela[i].r_offset; + /* This is the symbol it is referring to. Note that all + undefined symbols have been resolved. */ + sym = (Elf32_Sym *)sechdrs[symindex].sh_addr + + ELF32_R_SYM(rela[i].r_info); + /* `Everything is relative'. */ + value = sym->st_value + rela[i].r_addend; + + switch (ELF32_R_TYPE(rela[i].r_info)) { + case R_PPC_ADDR32: + /* Simply set it */ + *(uint32_t *)location = value; + break; + + case R_PPC_ADDR16_LO: + /* Low half of the symbol */ + *(uint16_t *)location = value; + break; + + case R_PPC_ADDR16_HA: + /* Sign-adjusted lower 16 bits: PPC ELF ABI says: + (((x >> 16) + ((x & 0x8000) ? 1 : 0))) & 0xFFFF. + This is the same, only sane. + */ + *(uint16_t *)location = (value + 0x8000) >> 16; + break; + + case R_PPC_REL24: + if ((int)(value - (uint32_t)location) < -0x02000000 + || (int)(value - (uint32_t)location) >= 0x02000000) + value = do_plt_call(location, value, + sechdrs, module); + + /* Only replace bits 2 through 26 */ + DEBUGP("REL24 value = %08X. location = %08X\n", + value, (uint32_t)location); + DEBUGP("Location before: %08X.\n", + *(uint32_t *)location); + *(uint32_t *)location + = (*(uint32_t *)location & ~0x03fffffc) + | ((value - (uint32_t)location) + & 0x03fffffc); + DEBUGP("Location after: %08X.\n", + *(uint32_t *)location); + DEBUGP("ie. jump to %08X+%08X = %08X\n", + *(uint32_t *)location & 0x03fffffc, + (uint32_t)location, + (*(uint32_t *)location & 0x03fffffc) + + (uint32_t)location); + break; + + case R_PPC_REL32: + /* 32-bit relative jump. */ + *(uint32_t *)location = value - (uint32_t)location; + break; + + default: + printk("%s: unknown ADD relocation: %u\n", + module->name, + ELF32_R_TYPE(rela[i].r_info)); + return -ENOEXEC; + } + } + return 0; +} + +int module_finalize(const Elf_Ehdr *hdr, + const Elf_Shdr *sechdrs, + struct module *me) +{ + char *secstrings; + unsigned int i; + + me->arch.bug_table = NULL; + me->arch.num_bugs = 0; + + /* Find the __bug_table section, if present */ + secstrings = (char *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; + for (i = 1; i < hdr->e_shnum; i++) { + if (strcmp(secstrings+sechdrs[i].sh_name, "__bug_table")) + continue; + me->arch.bug_table = (void *) sechdrs[i].sh_addr; + me->arch.num_bugs = sechdrs[i].sh_size / sizeof(struct bug_entry); + break; + } + + /* + * Strictly speaking this should have a spinlock to protect against + * traversals, but since we only traverse on BUG()s, a spinlock + * could potentially lead to deadlock and thus be counter-productive. + */ + list_add(&me->arch.bug_list, &module_bug_list); + + return 0; +} + +void module_arch_cleanup(struct module *mod) +{ + list_del(&mod->arch.bug_list); +} + +struct bug_entry *module_find_bug(unsigned long bugaddr) +{ + struct mod_arch_specific *mod; + unsigned int i; + struct bug_entry *bug; + + list_for_each_entry(mod, &module_bug_list, bug_list) { + bug = mod->bug_table; + for (i = 0; i < mod->num_bugs; ++i, ++bug) + if (bugaddr == bug->bug_addr) + return bug; + } + return NULL; +} diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c index fd7db8d542d..ada50aa5b60 100644 --- a/arch/powerpc/kernel/nvram_64.c +++ b/arch/powerpc/kernel/nvram_64.c @@ -160,7 +160,7 @@ static int dev_nvram_ioctl(struct inode *inode, struct file *file, case IOC_NVRAM_GET_OFFSET: { int part, offset; - if (_machine != PLATFORM_POWERMAC) + if (!machine_is(powermac)) return -EINVAL; if (copy_from_user(&part, (void __user*)arg, sizeof(part)) != 0) return -EFAULT; @@ -174,8 +174,9 @@ static int dev_nvram_ioctl(struct inode *inode, struct file *file, return 0; } #endif /* CONFIG_PPC_PMAC */ + default: + return -EINVAL; } - return -EINVAL; } struct file_operations nvram_fops = { @@ -443,7 +444,7 @@ static int nvram_setup_partition(void) * in our nvram, as Apple defined partitions use pretty much * all of the space */ - if (_machine == PLATFORM_POWERMAC) + if (machine_is(powermac)) return -ENOSPC; /* see if we have an OS partition that meets our needs. diff --git a/arch/powerpc/kernel/of_device.c b/arch/powerpc/kernel/of_device.c index 22d83d4d1af..9feeeef5a87 100644 --- a/arch/powerpc/kernel/of_device.c +++ b/arch/powerpc/kernel/of_device.c @@ -147,15 +147,12 @@ postcore_initcall(of_bus_driver_init); int of_register_driver(struct of_platform_driver *drv) { - int count = 0; - /* initialize common driver fields */ drv->driver.name = drv->name; drv->driver.bus = &of_platform_bus_type; /* register with core */ - count = driver_register(&drv->driver); - return count ? count : 1; + return driver_register(&drv->driver); } void of_unregister_driver(struct of_platform_driver *drv) diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index 5d1b708086b..f505a8827e3 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -56,14 +56,11 @@ struct lppaca lppaca[] = { * processors. The processor VPD array needs one entry per physical * processor (not thread). */ -#define PACA_INIT_COMMON(number, start, asrr, asrv) \ +#define PACA_INIT_COMMON(number) \ .lppaca_ptr = &lppaca[number], \ .lock_token = 0x8000, \ .paca_index = (number), /* Paca Index */ \ .kernel_toc = (unsigned long)(&__toc_start) + 0x8000UL, \ - .stab_real = (asrr), /* Real pointer to segment table */ \ - .stab_addr = (asrv), /* Virt pointer to segment table */ \ - .cpu_start = (start), /* Processor start */ \ .hw_cpu_id = 0xffff, #ifdef CONFIG_PPC_ISERIES @@ -72,30 +69,20 @@ struct lppaca lppaca[] = { #define PACA_INIT(number) \ { \ - PACA_INIT_COMMON(number, 0, 0, 0) \ - PACA_INIT_ISERIES(number) \ -} - -#define BOOTCPU_PACA_INIT(number) \ -{ \ - PACA_INIT_COMMON(number, 1, 0, (u64)&initial_stab) \ + PACA_INIT_COMMON(number) \ PACA_INIT_ISERIES(number) \ } #else #define PACA_INIT(number) \ { \ - PACA_INIT_COMMON(number, 0, 0, 0) \ + PACA_INIT_COMMON(number) \ } -#define BOOTCPU_PACA_INIT(number) \ -{ \ - PACA_INIT_COMMON(number, 1, STAB0_PHYS_ADDR, (u64)&initial_stab) \ -} #endif struct paca_struct paca[] = { - BOOTCPU_PACA_INIT(0), + PACA_INIT(0), #if NR_CPUS > 1 PACA_INIT( 1), PACA_INIT( 2), PACA_INIT( 3), #if NR_CPUS > 4 diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c index 704c846b2b0..b129d2e4b75 100644 --- a/arch/powerpc/kernel/pci_32.c +++ b/arch/powerpc/kernel/pci_32.c @@ -787,7 +787,7 @@ pci_busdev_to_OF_node(struct pci_bus *bus, int devfn) * fix has to be done by making the remapping per-host and always * filling the pci_to_OF map. --BenH */ - if (_machine == _MACH_Pmac && busnr >= 0xf0) + if (machine_is(powermac) && busnr >= 0xf0) busnr -= 0xf0; else #endif @@ -1728,7 +1728,7 @@ long sys_pciconfig_iobase(long which, unsigned long bus, unsigned long devfn) * (bus 0 is HT root), we return the AGP one instead. */ #ifdef CONFIG_PPC_PMAC - if (_machine == _MACH_Pmac && machine_is_compatible("MacRISC4")) + if (machine_is(powermac) && machine_is_compatible("MacRISC4")) if (bus == 0) bus = 0xf0; #endif /* CONFIG_PPC_PMAC */ diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c index ba92bab7cc2..4c4449be81c 100644 --- a/arch/powerpc/kernel/pci_64.c +++ b/arch/powerpc/kernel/pci_64.c @@ -78,6 +78,7 @@ int global_phb_number; /* Global phb counter */ /* Cached ISA bridge dev. */ struct pci_dev *ppc64_isabridge_dev = NULL; +EXPORT_SYMBOL_GPL(ppc64_isabridge_dev); static void fixup_broken_pcnet32(struct pci_dev* dev) { diff --git a/arch/powerpc/kernel/pci_iommu.c b/arch/powerpc/kernel/pci_iommu.c index bdf15dbbf4f..c336f3e31cf 100644 --- a/arch/powerpc/kernel/pci_iommu.c +++ b/arch/powerpc/kernel/pci_iommu.c @@ -1,5 +1,4 @@ /* - * arch/ppc64/kernel/pci_iommu.c * Copyright (C) 2001 Mike Corrigan & Dave Engebretsen, IBM Corporation * * Rewrite, cleanup, new allocation schemes: diff --git a/arch/powerpc/kernel/perfmon_fsl_booke.c b/arch/powerpc/kernel/perfmon_fsl_booke.c new file mode 100644 index 00000000000..32455dfcc36 --- /dev/null +++ b/arch/powerpc/kernel/perfmon_fsl_booke.c @@ -0,0 +1,222 @@ +/* kernel/perfmon_fsl_booke.c + * Freescale Book-E Performance Monitor code + * + * Author: Andy Fleming + * Copyright (c) 2004 Freescale Semiconductor, Inc + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/errno.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/stddef.h> +#include <linux/unistd.h> +#include <linux/ptrace.h> +#include <linux/slab.h> +#include <linux/user.h> +#include <linux/a.out.h> +#include <linux/interrupt.h> +#include <linux/config.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/prctl.h> + +#include <asm/pgtable.h> +#include <asm/uaccess.h> +#include <asm/system.h> +#include <asm/io.h> +#include <asm/reg.h> +#include <asm/xmon.h> +#include <asm/pmc.h> + +static inline u32 get_pmlca(int ctr); +static inline void set_pmlca(int ctr, u32 pmlca); + +static inline u32 get_pmlca(int ctr) +{ + u32 pmlca; + + switch (ctr) { + case 0: + pmlca = mfpmr(PMRN_PMLCA0); + break; + case 1: + pmlca = mfpmr(PMRN_PMLCA1); + break; + case 2: + pmlca = mfpmr(PMRN_PMLCA2); + break; + case 3: + pmlca = mfpmr(PMRN_PMLCA3); + break; + default: + panic("Bad ctr number\n"); + } + + return pmlca; +} + +static inline void set_pmlca(int ctr, u32 pmlca) +{ + switch (ctr) { + case 0: + mtpmr(PMRN_PMLCA0, pmlca); + break; + case 1: + mtpmr(PMRN_PMLCA1, pmlca); + break; + case 2: + mtpmr(PMRN_PMLCA2, pmlca); + break; + case 3: + mtpmr(PMRN_PMLCA3, pmlca); + break; + default: + panic("Bad ctr number\n"); + } +} + +void init_pmc_stop(int ctr) +{ + u32 pmlca = (PMLCA_FC | PMLCA_FCS | PMLCA_FCU | + PMLCA_FCM1 | PMLCA_FCM0); + u32 pmlcb = 0; + + switch (ctr) { + case 0: + mtpmr(PMRN_PMLCA0, pmlca); + mtpmr(PMRN_PMLCB0, pmlcb); + break; + case 1: + mtpmr(PMRN_PMLCA1, pmlca); + mtpmr(PMRN_PMLCB1, pmlcb); + break; + case 2: + mtpmr(PMRN_PMLCA2, pmlca); + mtpmr(PMRN_PMLCB2, pmlcb); + break; + case 3: + mtpmr(PMRN_PMLCA3, pmlca); + mtpmr(PMRN_PMLCB3, pmlcb); + break; + default: + panic("Bad ctr number!\n"); + } +} + +void set_pmc_event(int ctr, int event) +{ + u32 pmlca; + + pmlca = get_pmlca(ctr); + + pmlca = (pmlca & ~PMLCA_EVENT_MASK) | + ((event << PMLCA_EVENT_SHIFT) & + PMLCA_EVENT_MASK); + + set_pmlca(ctr, pmlca); +} + +void set_pmc_user_kernel(int ctr, int user, int kernel) +{ + u32 pmlca; + + pmlca = get_pmlca(ctr); + + if(user) + pmlca &= ~PMLCA_FCU; + else + pmlca |= PMLCA_FCU; + + if(kernel) + pmlca &= ~PMLCA_FCS; + else + pmlca |= PMLCA_FCS; + + set_pmlca(ctr, pmlca); +} + +void set_pmc_marked(int ctr, int mark0, int mark1) +{ + u32 pmlca = get_pmlca(ctr); + + if(mark0) + pmlca &= ~PMLCA_FCM0; + else + pmlca |= PMLCA_FCM0; + + if(mark1) + pmlca &= ~PMLCA_FCM1; + else + pmlca |= PMLCA_FCM1; + + set_pmlca(ctr, pmlca); +} + +void pmc_start_ctr(int ctr, int enable) +{ + u32 pmlca = get_pmlca(ctr); + + pmlca &= ~PMLCA_FC; + + if (enable) + pmlca |= PMLCA_CE; + else + pmlca &= ~PMLCA_CE; + + set_pmlca(ctr, pmlca); +} + +void pmc_start_ctrs(int enable) +{ + u32 pmgc0 = mfpmr(PMRN_PMGC0); + + pmgc0 &= ~PMGC0_FAC; + pmgc0 |= PMGC0_FCECE; + + if (enable) + pmgc0 |= PMGC0_PMIE; + else + pmgc0 &= ~PMGC0_PMIE; + + mtpmr(PMRN_PMGC0, pmgc0); +} + +void pmc_stop_ctrs(void) +{ + u32 pmgc0 = mfpmr(PMRN_PMGC0); + + pmgc0 |= PMGC0_FAC; + + pmgc0 &= ~(PMGC0_PMIE | PMGC0_FCECE); + + mtpmr(PMRN_PMGC0, pmgc0); +} + +void dump_pmcs(void) +{ + printk("pmgc0: %x\n", mfpmr(PMRN_PMGC0)); + printk("pmc\t\tpmlca\t\tpmlcb\n"); + printk("%8x\t%8x\t%8x\n", mfpmr(PMRN_PMC0), + mfpmr(PMRN_PMLCA0), mfpmr(PMRN_PMLCB0)); + printk("%8x\t%8x\t%8x\n", mfpmr(PMRN_PMC1), + mfpmr(PMRN_PMLCA1), mfpmr(PMRN_PMLCB1)); + printk("%8x\t%8x\t%8x\n", mfpmr(PMRN_PMC2), + mfpmr(PMRN_PMLCA2), mfpmr(PMRN_PMLCB2)); + printk("%8x\t%8x\t%8x\n", mfpmr(PMRN_PMC3), + mfpmr(PMRN_PMLCA3), mfpmr(PMRN_PMLCB3)); +} + +EXPORT_SYMBOL(init_pmc_stop); +EXPORT_SYMBOL(set_pmc_event); +EXPORT_SYMBOL(set_pmc_user_kernel); +EXPORT_SYMBOL(set_pmc_marked); +EXPORT_SYMBOL(pmc_start_ctr); +EXPORT_SYMBOL(pmc_start_ctrs); +EXPORT_SYMBOL(pmc_stop_ctrs); +EXPORT_SYMBOL(dump_pmcs); diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c index 63ecbec0520..dfa5398ab3c 100644 --- a/arch/powerpc/kernel/ppc_ksyms.c +++ b/arch/powerpc/kernel/ppc_ksyms.c @@ -57,7 +57,6 @@ extern void machine_check_exception(struct pt_regs *regs); extern void alignment_exception(struct pt_regs *regs); extern void program_check_exception(struct pt_regs *regs); extern void single_step_exception(struct pt_regs *regs); -extern int pmac_newworld; extern int sys_sigreturn(struct pt_regs *regs); EXPORT_SYMBOL(clear_pages); diff --git a/arch/powerpc/kernel/proc_ppc64.c b/arch/powerpc/kernel/proc_ppc64.c index 7ba42a405f4..3c2cf661f6d 100644 --- a/arch/powerpc/kernel/proc_ppc64.c +++ b/arch/powerpc/kernel/proc_ppc64.c @@ -23,6 +23,7 @@ #include <linux/slab.h> #include <linux/kernel.h> +#include <asm/machdep.h> #include <asm/vdso_datapage.h> #include <asm/rtas.h> #include <asm/uaccess.h> @@ -51,7 +52,7 @@ static int __init proc_ppc64_create(void) if (!root) return 1; - if (!(platform_is_pseries() || _machine == PLATFORM_CELL)) + if (!machine_is(pseries) && !machine_is(cell)) return 0; if (!proc_mkdir("rtas", root)) diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index c225cf154bf..706090c99f4 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1,6 +1,4 @@ /* - * arch/ppc/kernel/process.c - * * Derived from "arch/i386/kernel/process.c" * Copyright (C) 1995 Linus Torvalds * @@ -37,7 +35,6 @@ #include <linux/mqueue.h> #include <linux/hardirq.h> #include <linux/utsname.h> -#include <linux/kprobes.h> #include <asm/pgtable.h> #include <asm/uaccess.h> @@ -47,9 +44,10 @@ #include <asm/mmu.h> #include <asm/prom.h> #include <asm/machdep.h> +#include <asm/time.h> +#include <asm/syscalls.h> #ifdef CONFIG_PPC64 #include <asm/firmware.h> -#include <asm/time.h> #endif extern unsigned long _get_SP(void); @@ -330,6 +328,11 @@ struct task_struct *__switch_to(struct task_struct *prev, #endif local_irq_save(flags); + + account_system_vtime(current); + account_process_vtime(current); + calculate_steal_time(); + last = _switch(old_thread, new_thread); local_irq_restore(flags); @@ -360,7 +363,11 @@ static void show_instructions(struct pt_regs *regs) if (!(i % 8)) printk("\n"); - if (BAD_PC(pc) || __get_user(instr, (unsigned int *)pc)) { + /* We use __get_user here *only* to avoid an OOPS on a + * bad address because the pc *should* only be a + * kernel address. + */ + if (BAD_PC(pc) || __get_user(instr, (unsigned int __user *)pc)) { printk("XXXXXXXX "); } else { if (regs->nip == pc) @@ -457,7 +464,6 @@ void show_regs(struct pt_regs * regs) void exit_thread(void) { - kprobe_flush_task(current); discard_lazy_cpu_state(); } @@ -764,7 +770,7 @@ out: return error; } -static int validate_sp(unsigned long sp, struct task_struct *p, +int validate_sp(unsigned long sp, struct task_struct *p, unsigned long nbytes) { unsigned long stack_page = (unsigned long)task_stack_page(p); @@ -802,6 +808,8 @@ static int validate_sp(unsigned long sp, struct task_struct *p, #define FRAME_MARKER 2 #endif +EXPORT_SYMBOL(validate_sp); + unsigned long get_wchan(struct task_struct *p) { unsigned long ip, sp; diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 6dbd2172677..4336390bcf3 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -383,14 +383,14 @@ static int __devinit finish_node_interrupts(struct device_node *np, /* Apple uses bits in there in a different way, let's * only keep the real sense bit on macs */ - if (_machine == PLATFORM_POWERMAC) + if (machine_is(powermac)) sense &= 0x1; np->intrs[intrcount].sense = map_mpic_senses[sense]; } #ifdef CONFIG_PPC64 /* We offset irq numbers for the u3 MPIC by 128 in PowerMac */ - if (_machine == PLATFORM_POWERMAC && ic && ic->parent) { + if (machine_is(powermac) && ic && ic->parent) { char *name = get_property(ic->parent, "name", NULL); if (name && !strcmp(name, "u3")) np->intrs[intrcount].line += 128; @@ -570,6 +570,18 @@ int __init of_scan_flat_dt(int (*it)(unsigned long node, return rc; } +unsigned long __init of_get_flat_dt_root(void) +{ + unsigned long p = ((unsigned long)initial_boot_params) + + initial_boot_params->off_dt_struct; + + while(*((u32 *)p) == OF_DT_NOP) + p += 4; + BUG_ON (*((u32 *)p) != OF_DT_BEGIN_NODE); + p += 4; + return _ALIGN(p + strlen((char *)p) + 1, 4); +} + /** * This function can be used within scan_flattened_dt callback to get * access to properties @@ -612,6 +624,25 @@ void* __init of_get_flat_dt_prop(unsigned long node, const char *name, } while(1); } +int __init of_flat_dt_is_compatible(unsigned long node, const char *compat) +{ + const char* cp; + unsigned long cplen, l; + + cp = of_get_flat_dt_prop(node, "compatible", &cplen); + if (cp == NULL) + return 0; + while (cplen > 0) { + if (strncasecmp(cp, compat, strlen(compat)) == 0) + return 1; + l = strlen(cp) + 1; + cp += l; + cplen -= l; + } + + return 0; +} + static void *__init unflatten_dt_alloc(unsigned long *mem, unsigned long size, unsigned long align) { @@ -686,7 +717,7 @@ static unsigned long __init unflatten_dt_node(unsigned long mem, #ifdef DEBUG if ((strlen(p) + l + 1) != allocl) { DBG("%s: p: %d, l: %d, a: %d\n", - pathp, strlen(p), l, allocl); + pathp, (int)strlen(p), l, allocl); } #endif p += strlen(p); @@ -829,10 +860,6 @@ void __init unflatten_device_tree(void) /* Allocate memory for the expanded device tree */ mem = lmb_alloc(size + 4, __alignof__(struct device_node)); - if (!mem) { - DBG("Couldn't allocate memory with lmb_alloc()!\n"); - panic("Couldn't allocate memory with lmb_alloc()!\n"); - } mem = (unsigned long) __va(mem); ((u32 *)mem)[size / 4] = 0xdeadbeef; @@ -858,35 +885,73 @@ void __init unflatten_device_tree(void) DBG(" <- unflatten_device_tree()\n"); } - static int __init early_init_dt_scan_cpus(unsigned long node, - const char *uname, int depth, void *data) + const char *uname, int depth, + void *data) { + static int logical_cpuid = 0; + char *type = of_get_flat_dt_prop(node, "device_type", NULL); +#ifdef CONFIG_ALTIVEC u32 *prop; - unsigned long size; - char *type = of_get_flat_dt_prop(node, "device_type", &size); +#endif + u32 *intserv; + int i, nthreads; + unsigned long len; + int found = 0; /* We are scanning "cpu" nodes only */ if (type == NULL || strcmp(type, "cpu") != 0) return 0; - boot_cpuid = 0; - boot_cpuid_phys = 0; - if (initial_boot_params && initial_boot_params->version >= 2) { - /* version 2 of the kexec param format adds the phys cpuid - * of booted proc. - */ - boot_cpuid_phys = initial_boot_params->boot_cpuid_phys; + /* Get physical cpuid */ + intserv = of_get_flat_dt_prop(node, "ibm,ppc-interrupt-server#s", &len); + if (intserv) { + nthreads = len / sizeof(int); } else { - /* Check if it's the boot-cpu, set it's hw index now */ - if (of_get_flat_dt_prop(node, + intserv = of_get_flat_dt_prop(node, "reg", NULL); + nthreads = 1; + } + + /* + * Now see if any of these threads match our boot cpu. + * NOTE: This must match the parsing done in smp_setup_cpu_maps. + */ + for (i = 0; i < nthreads; i++) { + /* + * version 2 of the kexec param format adds the phys cpuid of + * booted proc. + */ + if (initial_boot_params && initial_boot_params->version >= 2) { + if (intserv[i] == + initial_boot_params->boot_cpuid_phys) { + found = 1; + break; + } + } else { + /* + * Check if it's the boot-cpu, set it's hw index now, + * unfortunately this format did not support booting + * off secondary threads. + */ + if (of_get_flat_dt_prop(node, "linux,boot-cpu", NULL) != NULL) { - prop = of_get_flat_dt_prop(node, "reg", NULL); - if (prop != NULL) - boot_cpuid_phys = *prop; + found = 1; + break; + } } + +#ifdef CONFIG_SMP + /* logical cpu id is always 0 on UP kernels */ + logical_cpuid++; +#endif + } + + if (found) { + DBG("boot cpu: logical %d physical %d\n", logical_cpuid, + intserv[i]); + boot_cpuid = logical_cpuid; + set_hard_smp_processor_id(boot_cpuid, intserv[i]); } - set_hard_smp_processor_id(0, boot_cpuid_phys); #ifdef CONFIG_ALTIVEC /* Check if we have a VMX and eventually update CPU features */ @@ -905,16 +970,10 @@ static int __init early_init_dt_scan_cpus(unsigned long node, #endif /* CONFIG_ALTIVEC */ #ifdef CONFIG_PPC_PSERIES - /* - * Check for an SMT capable CPU and set the CPU feature. We do - * this by looking at the size of the ibm,ppc-interrupt-server#s - * property - */ - prop = (u32 *)of_get_flat_dt_prop(node, "ibm,ppc-interrupt-server#s", - &size); - cur_cpu_spec->cpu_features &= ~CPU_FTR_SMT; - if (prop && ((size / sizeof(u32)) > 1)) + if (nthreads > 1) cur_cpu_spec->cpu_features |= CPU_FTR_SMT; + else + cur_cpu_spec->cpu_features &= ~CPU_FTR_SMT; #endif return 0; @@ -923,7 +982,6 @@ static int __init early_init_dt_scan_cpus(unsigned long node, static int __init early_init_dt_scan_chosen(unsigned long node, const char *uname, int depth, void *data) { - u32 *prop; unsigned long *lprop; unsigned long l; char *p; @@ -934,14 +992,6 @@ static int __init early_init_dt_scan_chosen(unsigned long node, (strcmp(uname, "chosen") != 0 && strcmp(uname, "chosen@0") != 0)) return 0; - /* get platform type */ - prop = (u32 *)of_get_flat_dt_prop(node, "linux,platform", NULL); - if (prop == NULL) - return 0; -#ifdef CONFIG_PPC_MULTIPLATFORM - _machine = *prop; -#endif - #ifdef CONFIG_PPC64 /* check if iommu is forced on or off */ if (of_get_flat_dt_prop(node, "linux,iommu-off", NULL) != NULL) @@ -968,15 +1018,15 @@ static int __init early_init_dt_scan_chosen(unsigned long node, * set of RTAS infos now if available */ { - u64 *basep, *entryp; + u64 *basep, *entryp, *sizep; basep = of_get_flat_dt_prop(node, "linux,rtas-base", NULL); entryp = of_get_flat_dt_prop(node, "linux,rtas-entry", NULL); - prop = of_get_flat_dt_prop(node, "linux,rtas-size", NULL); - if (basep && entryp && prop) { + sizep = of_get_flat_dt_prop(node, "linux,rtas-size", NULL); + if (basep && entryp && sizep) { rtas.base = *basep; rtas.entry = *entryp; - rtas.size = *prop; + rtas.size = *sizep; } } #endif /* CONFIG_PPC_RTAS */ @@ -1005,25 +1055,13 @@ static int __init early_init_dt_scan_chosen(unsigned long node, if (strstr(cmd_line, "mem=")) { char *p, *q; - unsigned long maxmem = 0; for (q = cmd_line; (p = strstr(q, "mem=")) != 0; ) { q = p + 4; if (p > cmd_line && p[-1] != ' ') continue; - maxmem = simple_strtoul(q, &q, 0); - if (*q == 'k' || *q == 'K') { - maxmem <<= 10; - ++q; - } else if (*q == 'm' || *q == 'M') { - maxmem <<= 20; - ++q; - } else if (*q == 'g' || *q == 'G') { - maxmem <<= 30; - ++q; - } + memory_limit = memparse(q, &q); } - memory_limit = maxmem; } /* break now */ @@ -1759,7 +1797,7 @@ static int of_finish_dynamic_node(struct device_node *node) /* We don't support that function on PowerMac, at least * not yet */ - if (_machine == PLATFORM_POWERMAC) + if (machine_is(powermac)) return -ENODEV; /* fix up new node's linux_phandle field */ diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index 813c2cd194c..d66c5e77fcf 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -180,6 +180,16 @@ static unsigned long __initdata prom_tce_alloc_start; static unsigned long __initdata prom_tce_alloc_end; #endif +/* Platforms codes are now obsolete in the kernel. Now only used within this + * file and ultimately gone too. Feel free to change them if you need, they + * are not shared with anything outside of this file anymore + */ +#define PLATFORM_PSERIES 0x0100 +#define PLATFORM_PSERIES_LPAR 0x0101 +#define PLATFORM_LPAR 0x0001 +#define PLATFORM_POWERMAC 0x0400 +#define PLATFORM_GENERIC 0x0500 + static int __initdata of_platform; static char __initdata prom_cmd_line[COMMAND_LINE_SIZE]; @@ -397,6 +407,11 @@ static void __init __attribute__((noreturn)) prom_panic(const char *reason) reason = PTRRELOC(reason); #endif prom_print(reason); + /* Do not call exit because it clears the screen on pmac + * it also causes some sort of double-fault on early pmacs */ + if (RELOC(of_platform) == PLATFORM_POWERMAC) + asm("trap\n"); + /* ToDo: should put up an SRC here on p/iSeries */ call_prom("exit", 0, 0); @@ -1487,7 +1502,10 @@ static int __init prom_find_machine_type(void) int len, i = 0; #ifdef CONFIG_PPC64 phandle rtas; + int x; #endif + + /* Look for a PowerMac */ len = prom_getprop(_prom->root, "compatible", compat, sizeof(compat)-1); if (len > 0) { @@ -1500,28 +1518,36 @@ static int __init prom_find_machine_type(void) if (strstr(p, RELOC("Power Macintosh")) || strstr(p, RELOC("MacRISC"))) return PLATFORM_POWERMAC; -#ifdef CONFIG_PPC64 - if (strstr(p, RELOC("Momentum,Maple"))) - return PLATFORM_MAPLE; - if (strstr(p, RELOC("IBM,CPB"))) - return PLATFORM_CELL; -#endif i += sl + 1; } } #ifdef CONFIG_PPC64 + /* If not a mac, try to figure out if it's an IBM pSeries or any other + * PAPR compliant platform. We assume it is if : + * - /device_type is "chrp" (please, do NOT use that for future + * non-IBM designs ! + * - it has /rtas + */ + len = prom_getprop(_prom->root, "model", + compat, sizeof(compat)-1); + if (len <= 0) + return PLATFORM_GENERIC; + compat[len] = 0; + if (strcmp(compat, "chrp")) + return PLATFORM_GENERIC; + /* Default to pSeries. We need to know if we are running LPAR */ rtas = call_prom("finddevice", 1, 1, ADDR("/rtas")); - if (PHANDLE_VALID(rtas)) { - int x = prom_getproplen(rtas, "ibm,hypertas-functions"); - if (x != PROM_ERROR) { - prom_printf("Hypertas detected, assuming LPAR !\n"); - return PLATFORM_PSERIES_LPAR; - } + if (!PHANDLE_VALID(rtas)) + return PLATFORM_GENERIC; + x = prom_getproplen(rtas, "ibm,hypertas-functions"); + if (x != PROM_ERROR) { + prom_printf("Hypertas detected, assuming LPAR !\n"); + return PLATFORM_PSERIES_LPAR; } return PLATFORM_PSERIES; #else - return PLATFORM_CHRP; + return PLATFORM_GENERIC; #endif } @@ -2029,7 +2055,6 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4, { struct prom_t *_prom; unsigned long hdr; - u32 getprop_rval; unsigned long offset = reloc_offset(); #ifdef CONFIG_PPC32 @@ -2060,6 +2085,12 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4, */ prom_init_stdout(); + /* + * Get default machine type. At this point, we do not differentiate + * between pSeries SMP and pSeries LPAR + */ + RELOC(of_platform) = prom_find_machine_type(); + /* Bail if this is a kdump kernel. */ if (PHYSICAL_START > 0) prom_panic("Error: You can't boot a kdump kernel from OF!\n"); @@ -2069,15 +2100,6 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4, */ prom_check_initrd(r3, r4); - /* - * Get default machine type. At this point, we do not differentiate - * between pSeries SMP and pSeries LPAR - */ - RELOC(of_platform) = prom_find_machine_type(); - getprop_rval = RELOC(of_platform); - prom_setprop(_prom->chosen, "/chosen", "linux,platform", - &getprop_rval, sizeof(getprop_rval)); - #ifdef CONFIG_PPC_PSERIES /* * On pSeries, inform the firmware about our capabilities diff --git a/arch/powerpc/kernel/ptrace-common.h b/arch/powerpc/kernel/ptrace-common.h index 5ccbdbe0d5c..c42a860c8d2 100644 --- a/arch/powerpc/kernel/ptrace-common.h +++ b/arch/powerpc/kernel/ptrace-common.h @@ -1,6 +1,4 @@ /* - * linux/arch/ppc64/kernel/ptrace-common.h - * * Copyright (c) 2002 Stephen Rothwell, IBM Coproration * Extracted from ptrace.c and ptrace32.c * diff --git a/arch/powerpc/kernel/rtas-proc.c b/arch/powerpc/kernel/rtas-proc.c index 7a95b8a2835..456286cf1d1 100644 --- a/arch/powerpc/kernel/rtas-proc.c +++ b/arch/powerpc/kernel/rtas-proc.c @@ -1,5 +1,4 @@ /* - * arch/ppc64/kernel/rtas-proc.c * Copyright (C) 2000 Tilmann Bitterberg * (tilmann@bitterberg.de) * @@ -258,7 +257,7 @@ static int __init proc_rtas_init(void) { struct proc_dir_entry *entry; - if (_machine != PLATFORM_PSERIES && _machine != PLATFORM_PSERIES_LPAR) + if (!machine_is(pseries)) return 1; rtas_node = of_find_node_by_name(NULL, "rtas"); diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index b5b2add7ad1..06636c927a7 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -25,6 +25,7 @@ #include <asm/hvcall.h> #include <asm/semaphore.h> #include <asm/machdep.h> +#include <asm/firmware.h> #include <asm/page.h> #include <asm/param.h> #include <asm/system.h> @@ -32,6 +33,7 @@ #include <asm/uaccess.h> #include <asm/lmb.h> #include <asm/udbg.h> +#include <asm/syscalls.h> struct rtas_t rtas = { .lock = SPIN_LOCK_UNLOCKED @@ -591,7 +593,7 @@ static void rtas_percpu_suspend_me(void *info) data->waiting = 0; data->args->args[data->args->nargs] = rtas_call(ibm_suspend_me_token, 0, 1, NULL); - for_each_cpu(i) + for_each_possible_cpu(i) plpar_hcall_norets(H_PROD,i); } else { data->waiting = -EBUSY; @@ -624,7 +626,7 @@ static int rtas_ibm_suspend_me(struct rtas_args *args) /* Prod each CPU. This won't hurt, and will wake * anyone we successfully put to sleep with H_Join */ - for_each_cpu(i) + for_each_possible_cpu(i) plpar_hcall_norets(H_PROD, i); return data.waiting; @@ -767,7 +769,7 @@ void __init rtas_initialize(void) * the stop-self token if any */ #ifdef CONFIG_PPC64 - if (_machine == PLATFORM_PSERIES_LPAR) { + if (machine_is(pseries) && firmware_has_feature(FW_FEATURE_LPAR)) { rtas_region = min(lmb.rmo_size, RTAS_INSTANTIATE_MAX); ibm_suspend_me_token = rtas_token("ibm,suspend-me"); } diff --git a/arch/powerpc/kernel/rtas_pci.c b/arch/powerpc/kernel/rtas_pci.c index 7442775ef2a..57b539a03fa 100644 --- a/arch/powerpc/kernel/rtas_pci.c +++ b/arch/powerpc/kernel/rtas_pci.c @@ -1,6 +1,4 @@ /* - * arch/ppc64/kernel/rtas_pci.c - * * Copyright (C) 2001 Dave Engebretsen, IBM Corporation * Copyright (C) 2003 Anton Blanchard <anton@au.ibm.com>, IBM * diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index be12041c0fc..c607f3b9ca1 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -9,6 +9,9 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. */ + +#undef DEBUG + #include <linux/config.h> #include <linux/module.h> #include <linux/string.h> @@ -41,6 +44,7 @@ #include <asm/time.h> #include <asm/cputable.h> #include <asm/sections.h> +#include <asm/firmware.h> #include <asm/btext.h> #include <asm/nvram.h> #include <asm/setup.h> @@ -56,8 +60,6 @@ #include "setup.h" -#undef DEBUG - #ifdef DEBUG #include <asm/udbg.h> #define DBG(fmt...) udbg_printf(fmt) @@ -65,10 +67,12 @@ #define DBG(fmt...) #endif -#ifdef CONFIG_PPC_MULTIPLATFORM -int _machine = 0; -EXPORT_SYMBOL(_machine); -#endif +/* The main machine-dep calls structure + */ +struct machdep_calls ppc_md; +EXPORT_SYMBOL(ppc_md); +struct machdep_calls *machine_id; +EXPORT_SYMBOL(machine_id); unsigned long klimit = (unsigned long) _end; @@ -162,14 +166,14 @@ static int show_cpuinfo(struct seq_file *m, void *v) #if defined(CONFIG_SMP) && defined(CONFIG_PPC32) unsigned long bogosum = 0; int i; - for (i = 0; i < NR_CPUS; ++i) - if (cpu_online(i)) - bogosum += loops_per_jiffy; + for_each_online_cpu(i) + bogosum += loops_per_jiffy; seq_printf(m, "total bogomips\t: %lu.%02lu\n", bogosum/(500000/HZ), bogosum/(5000/HZ) % 100); #endif /* CONFIG_SMP && CONFIG_PPC32 */ seq_printf(m, "timebase\t: %lu\n", ppc_tb_freq); - + if (ppc_md.name) + seq_printf(m, "platform\t: %s\n", ppc_md.name); if (ppc_md.show_cpuinfo != NULL) ppc_md.show_cpuinfo(m); @@ -353,12 +357,13 @@ void __init check_for_initrd(void) * must be called before using this. * * While we're here, we may as well set the "physical" cpu ids in the paca. + * + * NOTE: This must match the parsing done in early_init_dt_scan_cpus. */ void __init smp_setup_cpu_maps(void) { struct device_node *dn = NULL; int cpu = 0; - int swap_cpuid = 0; while ((dn = of_find_node_by_type(dn, "cpu")) && cpu < NR_CPUS) { int *intserv; @@ -377,30 +382,17 @@ void __init smp_setup_cpu_maps(void) for (j = 0; j < nthreads && cpu < NR_CPUS; j++) { cpu_set(cpu, cpu_present_map); set_hard_smp_processor_id(cpu, intserv[j]); - - if (intserv[j] == boot_cpuid_phys) - swap_cpuid = cpu; cpu_set(cpu, cpu_possible_map); cpu++; } } - /* Swap CPU id 0 with boot_cpuid_phys, so we can always assume that - * boot cpu is logical 0. - */ - if (boot_cpuid_phys != get_hard_smp_processor_id(0)) { - u32 tmp; - tmp = get_hard_smp_processor_id(0); - set_hard_smp_processor_id(0, boot_cpuid_phys); - set_hard_smp_processor_id(swap_cpuid, tmp); - } - #ifdef CONFIG_PPC64 /* * On pSeries LPAR, we need to know how many cpus * could possibly be added to this partition. */ - if (_machine == PLATFORM_PSERIES_LPAR && + if (machine_is(pseries) && firmware_has_feature(FW_FEATURE_LPAR) && (dn = of_find_node_by_path("/rtas"))) { int num_addr_cell, num_size_cell, maxcpus; unsigned int *ireg; @@ -439,7 +431,7 @@ void __init smp_setup_cpu_maps(void) /* * Do the sibling map; assume only two threads per processor. */ - for_each_cpu(cpu) { + for_each_possible_cpu(cpu) { cpu_set(cpu, cpu_sibling_map[cpu]); if (cpu_has_feature(CPU_FTR_SMT)) cpu_set(cpu ^ 0x1, cpu_sibling_map[cpu]); @@ -469,3 +461,34 @@ static int __init early_xmon(char *p) } early_param("xmon", early_xmon); #endif + +void probe_machine(void) +{ + extern struct machdep_calls __machine_desc_start; + extern struct machdep_calls __machine_desc_end; + + /* + * Iterate all ppc_md structures until we find the proper + * one for the current machine type + */ + DBG("Probing machine type ...\n"); + + for (machine_id = &__machine_desc_start; + machine_id < &__machine_desc_end; + machine_id++) { + DBG(" %s ...", machine_id->name); + memcpy(&ppc_md, machine_id, sizeof(struct machdep_calls)); + if (ppc_md.probe()) { + DBG(" match !\n"); + break; + } + DBG("\n"); + } + /* What can we do if we didn't find ? */ + if (machine_id >= &__machine_desc_end) { + DBG("No suitable machine found !\n"); + for (;;); + } + + printk(KERN_INFO "Using %s machine description\n", ppc_md.name); +} diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index db72a92943b..a72bf5dceee 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -53,9 +53,6 @@ extern void platform_init(void); extern void bootx_init(unsigned long r4, unsigned long phys); -extern void ppc6xx_idle(void); -extern void power4_idle(void); - boot_infos_t *boot_infos; struct ide_machdep_calls ppc_ide_md; @@ -70,10 +67,6 @@ unsigned int DMA_MODE_WRITE; int have_of = 1; #ifdef CONFIG_PPC_MULTIPLATFORM -extern void prep_init(void); -extern void pmac_init(void); -extern void chrp_init(void); - dev_t boot_dev; #endif /* CONFIG_PPC_MULTIPLATFORM */ @@ -85,9 +78,6 @@ unsigned long SYSRQ_KEY = 0x54; unsigned long vgacon_remap_base; #endif -struct machdep_calls ppc_md; -EXPORT_SYMBOL(ppc_md); - /* * These are used in binfmt_elf.c to put aux entries on the stack * for each elf executable being started. @@ -111,7 +101,7 @@ unsigned long __init early_init(unsigned long dt_ptr) /* First zero the BSS -- use memset_io, some platforms don't have * caches on yet */ - memset_io(PTRRELOC(&__bss_start), 0, _end - __bss_start); + memset_io((void __iomem *)PTRRELOC(&__bss_start), 0, _end - __bss_start); /* * Identify the CPU type and fix up code sections @@ -123,48 +113,6 @@ unsigned long __init early_init(unsigned long dt_ptr) return KERNELBASE + offset; } -#ifdef CONFIG_PPC_MULTIPLATFORM -/* - * The PPC_MULTIPLATFORM version of platform_init... - */ -void __init platform_init(void) -{ - /* if we didn't get any bootinfo telling us what we are... */ - if (_machine == 0) { - /* prep boot loader tells us if we're prep or not */ - if ( *(unsigned long *)(KERNELBASE) == (0xdeadc0de) ) - _machine = _MACH_prep; - } - -#ifdef CONFIG_PPC_PREP - /* not much more to do here, if prep */ - if (_machine == _MACH_prep) { - prep_init(); - return; - } -#endif - -#ifdef CONFIG_ADB - if (strstr(cmd_line, "adb_sync")) { - extern int __adb_probe_sync; - __adb_probe_sync = 1; - } -#endif /* CONFIG_ADB */ - - switch (_machine) { -#ifdef CONFIG_PPC_PMAC - case _MACH_Pmac: - pmac_init(); - break; -#endif -#ifdef CONFIG_PPC_CHRP - case _MACH_chrp: - chrp_init(); - break; -#endif - } -} -#endif /* * Find out what kind of machine we're on and save any data we need @@ -190,11 +138,17 @@ void __init machine_init(unsigned long dt_ptr, unsigned long phys) strlcpy(cmd_line, CONFIG_CMDLINE, sizeof(cmd_line)); #endif /* CONFIG_CMDLINE */ - /* Base init based on machine type */ +#ifdef CONFIG_PPC_MULTIPLATFORM + probe_machine(); +#else + /* Base init based on machine type. Obsoloete, please kill ! */ platform_init(); +#endif #ifdef CONFIG_6xx - ppc_md.power_save = ppc6xx_idle; + if (cpu_has_feature(CPU_FTR_CAN_DOZE) || + cpu_has_feature(CPU_FTR_CAN_NAP)) + ppc_md.power_save = ppc6xx_idle; #endif if (ppc_md.progress) @@ -272,9 +226,8 @@ int __init ppc_init(void) if ( ppc_md.progress ) ppc_md.progress(" ", 0xffff); /* register CPU devices */ - for (i = 0; i < NR_CPUS; i++) - if (cpu_possible(i)) - register_cpu(&cpu_devices[i], i, NULL); + for_each_possible_cpu(i) + register_cpu(&cpu_devices[i], i, NULL); /* call platform init */ if (ppc_md.init != NULL) { @@ -353,12 +306,6 @@ void __init setup_arch(char **cmdline_p) do_init_bootmem(); if ( ppc_md.progress ) ppc_md.progress("setup_arch: bootmem", 0x3eab); -#ifdef CONFIG_PPC_OCP - /* Initialize OCP device list */ - ocp_early_init(); - if ( ppc_md.progress ) ppc_md.progress("ocp: exit", 0x3eab); -#endif - #ifdef CONFIG_DUMMY_CONSOLE conswitchp = &dummy_con; #endif @@ -367,7 +314,4 @@ void __init setup_arch(char **cmdline_p) if ( ppc_md.progress ) ppc_md.progress("arch: exit", 0x3eab); paging_init(); - - /* this is for modules since _machine can be a define -- Cort */ - ppc_md.ppc_machine = _machine; } diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index f96c49b03ba..59aa92cd6fa 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -73,7 +73,6 @@ int have_of = 1; int boot_cpuid = 0; -int boot_cpuid_phys = 0; dev_t boot_dev; u64 ppc64_pft_size; @@ -96,11 +95,6 @@ int dcache_bsize; int icache_bsize; int ucache_bsize; -/* The main machine-dep calls structure - */ -struct machdep_calls ppc_md; -EXPORT_SYMBOL(ppc_md); - #ifdef CONFIG_MAGIC_SYSRQ unsigned long SYSRQ_KEY; #endif /* CONFIG_MAGIC_SYSRQ */ @@ -161,32 +155,6 @@ early_param("smt-enabled", early_smt_enabled); #define check_smt_enabled() #endif /* CONFIG_SMP */ -extern struct machdep_calls pSeries_md; -extern struct machdep_calls pmac_md; -extern struct machdep_calls maple_md; -extern struct machdep_calls cell_md; -extern struct machdep_calls iseries_md; - -/* Ultimately, stuff them in an elf section like initcalls... */ -static struct machdep_calls __initdata *machines[] = { -#ifdef CONFIG_PPC_PSERIES - &pSeries_md, -#endif /* CONFIG_PPC_PSERIES */ -#ifdef CONFIG_PPC_PMAC - &pmac_md, -#endif /* CONFIG_PPC_PMAC */ -#ifdef CONFIG_PPC_MAPLE - &maple_md, -#endif /* CONFIG_PPC_MAPLE */ -#ifdef CONFIG_PPC_CELL - &cell_md, -#endif -#ifdef CONFIG_PPC_ISERIES - &iseries_md, -#endif - NULL -}; - /* * Early initialization entry point. This is called by head.S * with MMU translation disabled. We rely on the "feature" of @@ -208,13 +176,10 @@ static struct machdep_calls __initdata *machines[] = { void __init early_setup(unsigned long dt_ptr) { - struct paca_struct *lpaca = get_paca(); - static struct machdep_calls **mach; - /* Enable early debugging if any specified (see udbg.h) */ udbg_early_init(); - DBG(" -> early_setup()\n"); + DBG(" -> early_setup(), dt_ptr: 0x%lx\n", dt_ptr); /* * Do early initializations using the flattened device @@ -223,22 +188,16 @@ void __init early_setup(unsigned long dt_ptr) */ early_init_devtree(__va(dt_ptr)); - /* - * Iterate all ppc_md structures until we find the proper - * one for the current machine type - */ - DBG("Probing machine type for platform %x...\n", _machine); + /* Now we know the logical id of our boot cpu, setup the paca. */ + setup_boot_paca(); - for (mach = machines; *mach; mach++) { - if ((*mach)->probe(_machine)) - break; - } - /* What can we do if we didn't find ? */ - if (*mach == NULL) { - DBG("No suitable machine found !\n"); - for (;;); - } - ppc_md = **mach; + /* Fix up paca fields required for the boot cpu */ + get_paca()->cpu_start = 1; + get_paca()->stab_real = __pa((u64)&initial_stab); + get_paca()->stab_addr = (u64)&initial_stab; + + /* Probe the machine type */ + probe_machine(); #ifdef CONFIG_CRASH_DUMP kdump_setup(); @@ -260,7 +219,7 @@ void __init early_setup(unsigned long dt_ptr) if (cpu_has_feature(CPU_FTR_SLB)) slb_initialize(); else - stab_initialize(lpaca->stab_real); + stab_initialize(get_paca()->stab_real); } DBG(" <- early_setup()\n"); @@ -340,7 +299,7 @@ static void __init initialize_cache_info(void) const char *dc, *ic; /* Then read cache informations */ - if (_machine == PLATFORM_POWERMAC) { + if (machine_is(powermac)) { dc = "d-cache-block-size"; ic = "i-cache-block-size"; } else { @@ -484,7 +443,6 @@ void __init setup_system(void) printk("ppc64_pft_size = 0x%lx\n", ppc64_pft_size); printk("ppc64_interrupt_controller = 0x%ld\n", ppc64_interrupt_controller); - printk("platform = 0x%x\n", _machine); printk("physicalMemorySize = 0x%lx\n", lmb_phys_mem_size()); printk("ppc64_caches.dcache_line_size = 0x%x\n", ppc64_caches.dline_size); @@ -497,8 +455,6 @@ void __init setup_system(void) #endif printk("-----------------------------------------------------\n"); - mm_init_ppc64(); - DBG(" <- setup_system()\n"); } @@ -518,7 +474,7 @@ static void __init irqstack_early_init(void) * interrupt stacks must be under 256MB, we cannot afford to take * SLB misses on them. */ - for_each_cpu(i) { + for_each_possible_cpu(i) { softirq_ctx[i] = (struct thread_info *) __va(lmb_alloc_base(THREAD_SIZE, THREAD_SIZE, 0x10000000)); @@ -551,7 +507,7 @@ static void __init emergency_stack_init(void) */ limit = min(0x10000000UL, lmb.rmo_size); - for_each_cpu(i) + for_each_possible_cpu(i) paca[i].emergency_sp = __va(lmb_alloc_base(HW_PAGE_SIZE, 128, limit)) + HW_PAGE_SIZE; } @@ -581,7 +537,8 @@ void __init setup_arch(char **cmdline_p) panic_timeout = 180; if (ppc_md.panic) - notifier_chain_register(&panic_notifier_list, &ppc64_panic_block); + atomic_notifier_chain_register(&panic_notifier_list, + &ppc64_panic_block); init_mm.start_code = PAGE_OFFSET; init_mm.end_code = (unsigned long) _etext; @@ -603,12 +560,6 @@ void __init setup_arch(char **cmdline_p) ppc_md.setup_arch(); - /* Use the default idle loop if the platform hasn't provided one. */ - if (NULL == ppc_md.idle_loop) { - ppc_md.idle_loop = default_idle; - printk(KERN_INFO "Using default idle loop\n"); - } - paging_init(); ppc64_boot_msg(0x15, "Setup Done"); } @@ -673,7 +624,7 @@ void __init setup_per_cpu_areas(void) size = PERCPU_ENOUGH_ROOM; #endif - for_each_cpu(i) { + for_each_possible_cpu(i) { ptr = alloc_bootmem_node(NODE_DATA(cpu_to_node(i)), size); if (!ptr) panic("Cannot allocate cpu data for CPU %d\n", i); diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index d7a4e814974..01e3c08cb55 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -42,6 +42,7 @@ #include <asm/uaccess.h> #include <asm/cacheflush.h> +#include <asm/syscalls.h> #include <asm/sigcontext.h> #include <asm/vdso.h> #ifdef CONFIG_PPC64 diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index 4324f8a8ba2..27f65b95184 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -1,6 +1,4 @@ /* - * linux/arch/ppc64/kernel/signal.c - * * PowerPC version * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) * @@ -35,6 +33,7 @@ #include <asm/pgtable.h> #include <asm/unistd.h> #include <asm/cacheflush.h> +#include <asm/syscalls.h> #include <asm/vdso.h> #define DEBUG_SIG 0 @@ -213,7 +212,7 @@ static inline void __user * get_sigframe(struct k_sigaction *ka, struct pt_regs /* Default to using normal stack */ newsp = regs->gpr[1]; - if (ka->sa.sa_flags & SA_ONSTACK) { + if ((ka->sa.sa_flags & SA_ONSTACK) && current->sas_ss_size) { if (! on_sig_stack(regs->gpr[1])) newsp = (current->sas_ss_sp + current->sas_ss_size); } diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 13595a64f01..530f7dba0bd 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -362,7 +362,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus) smp_space_timers(max_cpus); - for_each_cpu(cpu) + for_each_possible_cpu(cpu) if (cpu != boot_cpuid) smp_create_idle(cpu); } @@ -541,7 +541,7 @@ int __devinit start_secondary(void *unused) smp_ops->take_timebase(); if (system_state > SYSTEM_BOOTING) - per_cpu(last_jiffy, cpu) = get_tb(); + snapshot_timebase(); spin_lock(&call_lock); cpu_set(cpu, cpu_online_map); @@ -573,6 +573,8 @@ void __init smp_cpus_done(unsigned int max_cpus) set_cpus_allowed(current, old_mask); + snapshot_timebases(); + dump_numa_cpu_topology(); } diff --git a/arch/powerpc/kernel/swsusp_32.S b/arch/powerpc/kernel/swsusp_32.S new file mode 100644 index 00000000000..69773cc1a85 --- /dev/null +++ b/arch/powerpc/kernel/swsusp_32.S @@ -0,0 +1,349 @@ +#include <linux/config.h> +#include <linux/threads.h> +#include <asm/processor.h> +#include <asm/page.h> +#include <asm/cputable.h> +#include <asm/thread_info.h> +#include <asm/ppc_asm.h> +#include <asm/asm-offsets.h> + + +/* + * Structure for storing CPU registers on the save area. + */ +#define SL_SP 0 +#define SL_PC 4 +#define SL_MSR 8 +#define SL_SDR1 0xc +#define SL_SPRG0 0x10 /* 4 sprg's */ +#define SL_DBAT0 0x20 +#define SL_IBAT0 0x28 +#define SL_DBAT1 0x30 +#define SL_IBAT1 0x38 +#define SL_DBAT2 0x40 +#define SL_IBAT2 0x48 +#define SL_DBAT3 0x50 +#define SL_IBAT3 0x58 +#define SL_TB 0x60 +#define SL_R2 0x68 +#define SL_CR 0x6c +#define SL_LR 0x70 +#define SL_R12 0x74 /* r12 to r31 */ +#define SL_SIZE (SL_R12 + 80) + + .section .data + .align 5 + +_GLOBAL(swsusp_save_area) + .space SL_SIZE + + + .section .text + .align 5 + +_GLOBAL(swsusp_arch_suspend) + + lis r11,swsusp_save_area@h + ori r11,r11,swsusp_save_area@l + + mflr r0 + stw r0,SL_LR(r11) + mfcr r0 + stw r0,SL_CR(r11) + stw r1,SL_SP(r11) + stw r2,SL_R2(r11) + stmw r12,SL_R12(r11) + + /* Save MSR & SDR1 */ + mfmsr r4 + stw r4,SL_MSR(r11) + mfsdr1 r4 + stw r4,SL_SDR1(r11) + + /* Get a stable timebase and save it */ +1: mftbu r4 + stw r4,SL_TB(r11) + mftb r5 + stw r5,SL_TB+4(r11) + mftbu r3 + cmpw r3,r4 + bne 1b + + /* Save SPRGs */ + mfsprg r4,0 + stw r4,SL_SPRG0(r11) + mfsprg r4,1 + stw r4,SL_SPRG0+4(r11) + mfsprg r4,2 + stw r4,SL_SPRG0+8(r11) + mfsprg r4,3 + stw r4,SL_SPRG0+12(r11) + + /* Save BATs */ + mfdbatu r4,0 + stw r4,SL_DBAT0(r11) + mfdbatl r4,0 + stw r4,SL_DBAT0+4(r11) + mfdbatu r4,1 + stw r4,SL_DBAT1(r11) + mfdbatl r4,1 + stw r4,SL_DBAT1+4(r11) + mfdbatu r4,2 + stw r4,SL_DBAT2(r11) + mfdbatl r4,2 + stw r4,SL_DBAT2+4(r11) + mfdbatu r4,3 + stw r4,SL_DBAT3(r11) + mfdbatl r4,3 + stw r4,SL_DBAT3+4(r11) + mfibatu r4,0 + stw r4,SL_IBAT0(r11) + mfibatl r4,0 + stw r4,SL_IBAT0+4(r11) + mfibatu r4,1 + stw r4,SL_IBAT1(r11) + mfibatl r4,1 + stw r4,SL_IBAT1+4(r11) + mfibatu r4,2 + stw r4,SL_IBAT2(r11) + mfibatl r4,2 + stw r4,SL_IBAT2+4(r11) + mfibatu r4,3 + stw r4,SL_IBAT3(r11) + mfibatl r4,3 + stw r4,SL_IBAT3+4(r11) + +#if 0 + /* Backup various CPU config stuffs */ + bl __save_cpu_setup +#endif + /* Call the low level suspend stuff (we should probably have made + * a stackframe... + */ + bl swsusp_save + + /* Restore LR from the save area */ + lis r11,swsusp_save_area@h + ori r11,r11,swsusp_save_area@l + lwz r0,SL_LR(r11) + mtlr r0 + + blr + + +/* Resume code */ +_GLOBAL(swsusp_arch_resume) + + /* Stop pending alitvec streams and memory accesses */ +BEGIN_FTR_SECTION + DSSALL +END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) + sync + + /* Disable MSR:DR to make sure we don't take a TLB or + * hash miss during the copy, as our hash table will + * for a while be unuseable. For .text, we assume we are + * covered by a BAT. This works only for non-G5 at this + * point. G5 will need a better approach, possibly using + * a small temporary hash table filled with large mappings, + * disabling the MMU completely isn't a good option for + * performance reasons. + * (Note that 750's may have the same performance issue as + * the G5 in this case, we should investigate using moving + * BATs for these CPUs) + */ + mfmsr r0 + sync + rlwinm r0,r0,0,28,26 /* clear MSR_DR */ + mtmsr r0 + sync + isync + + /* Load ptr the list of pages to copy in r3 */ + lis r11,(pagedir_nosave - KERNELBASE)@h + ori r11,r11,pagedir_nosave@l + lwz r10,0(r11) + + /* Copy the pages. This is a very basic implementation, to + * be replaced by something more cache efficient */ +1: + tophys(r3,r10) + li r0,256 + mtctr r0 + lwz r11,pbe_address(r3) /* source */ + tophys(r5,r11) + lwz r10,pbe_orig_address(r3) /* destination */ + tophys(r6,r10) +2: + lwz r8,0(r5) + lwz r9,4(r5) + lwz r10,8(r5) + lwz r11,12(r5) + addi r5,r5,16 + stw r8,0(r6) + stw r9,4(r6) + stw r10,8(r6) + stw r11,12(r6) + addi r6,r6,16 + bdnz 2b + lwz r10,pbe_next(r3) + cmpwi 0,r10,0 + bne 1b + + /* Do a very simple cache flush/inval of the L1 to ensure + * coherency of the icache + */ + lis r3,0x0002 + mtctr r3 + li r3, 0 +1: + lwz r0,0(r3) + addi r3,r3,0x0020 + bdnz 1b + isync + sync + + /* Now flush those cache lines */ + lis r3,0x0002 + mtctr r3 + li r3, 0 +1: + dcbf 0,r3 + addi r3,r3,0x0020 + bdnz 1b + sync + + /* Ok, we are now running with the kernel data of the old + * kernel fully restored. We can get to the save area + * easily now. As for the rest of the code, it assumes the + * loader kernel and the booted one are exactly identical + */ + lis r11,swsusp_save_area@h + ori r11,r11,swsusp_save_area@l + tophys(r11,r11) + +#if 0 + /* Restore various CPU config stuffs */ + bl __restore_cpu_setup +#endif + /* Restore the BATs, and SDR1. Then we can turn on the MMU. + * This is a bit hairy as we are running out of those BATs, + * but first, our code is probably in the icache, and we are + * writing the same value to the BAT, so that should be fine, + * though a better solution will have to be found long-term + */ + lwz r4,SL_SDR1(r11) + mtsdr1 r4 + lwz r4,SL_SPRG0(r11) + mtsprg 0,r4 + lwz r4,SL_SPRG0+4(r11) + mtsprg 1,r4 + lwz r4,SL_SPRG0+8(r11) + mtsprg 2,r4 + lwz r4,SL_SPRG0+12(r11) + mtsprg 3,r4 + +#if 0 + lwz r4,SL_DBAT0(r11) + mtdbatu 0,r4 + lwz r4,SL_DBAT0+4(r11) + mtdbatl 0,r4 + lwz r4,SL_DBAT1(r11) + mtdbatu 1,r4 + lwz r4,SL_DBAT1+4(r11) + mtdbatl 1,r4 + lwz r4,SL_DBAT2(r11) + mtdbatu 2,r4 + lwz r4,SL_DBAT2+4(r11) + mtdbatl 2,r4 + lwz r4,SL_DBAT3(r11) + mtdbatu 3,r4 + lwz r4,SL_DBAT3+4(r11) + mtdbatl 3,r4 + lwz r4,SL_IBAT0(r11) + mtibatu 0,r4 + lwz r4,SL_IBAT0+4(r11) + mtibatl 0,r4 + lwz r4,SL_IBAT1(r11) + mtibatu 1,r4 + lwz r4,SL_IBAT1+4(r11) + mtibatl 1,r4 + lwz r4,SL_IBAT2(r11) + mtibatu 2,r4 + lwz r4,SL_IBAT2+4(r11) + mtibatl 2,r4 + lwz r4,SL_IBAT3(r11) + mtibatu 3,r4 + lwz r4,SL_IBAT3+4(r11) + mtibatl 3,r4 +#endif + +BEGIN_FTR_SECTION + li r4,0 + mtspr SPRN_DBAT4U,r4 + mtspr SPRN_DBAT4L,r4 + mtspr SPRN_DBAT5U,r4 + mtspr SPRN_DBAT5L,r4 + mtspr SPRN_DBAT6U,r4 + mtspr SPRN_DBAT6L,r4 + mtspr SPRN_DBAT7U,r4 + mtspr SPRN_DBAT7L,r4 + mtspr SPRN_IBAT4U,r4 + mtspr SPRN_IBAT4L,r4 + mtspr SPRN_IBAT5U,r4 + mtspr SPRN_IBAT5L,r4 + mtspr SPRN_IBAT6U,r4 + mtspr SPRN_IBAT6L,r4 + mtspr SPRN_IBAT7U,r4 + mtspr SPRN_IBAT7L,r4 +END_FTR_SECTION_IFSET(CPU_FTR_HAS_HIGH_BATS) + + /* Flush all TLBs */ + lis r4,0x1000 +1: addic. r4,r4,-0x1000 + tlbie r4 + blt 1b + sync + + /* restore the MSR and turn on the MMU */ + lwz r3,SL_MSR(r11) + bl turn_on_mmu + tovirt(r11,r11) + + /* Restore TB */ + li r3,0 + mttbl r3 + lwz r3,SL_TB(r11) + lwz r4,SL_TB+4(r11) + mttbu r3 + mttbl r4 + + /* Kick decrementer */ + li r0,1 + mtdec r0 + + /* Restore the callee-saved registers and return */ + lwz r0,SL_CR(r11) + mtcr r0 + lwz r2,SL_R2(r11) + lmw r12,SL_R12(r11) + lwz r1,SL_SP(r11) + lwz r0,SL_LR(r11) + mtlr r0 + + // XXX Note: we don't really need to call swsusp_resume + + li r3,0 + blr + +/* FIXME:This construct is actually not useful since we don't shut + * down the instruction MMU, we could just flip back MSR-DR on. + */ +turn_on_mmu: + mflr r4 + mtsrr0 r4 + mtsrr1 r3 + sync + isync + rfi + diff --git a/arch/powerpc/kernel/sys_ppc32.c b/arch/powerpc/kernel/sys_ppc32.c index cd75ab2908f..ec274e68881 100644 --- a/arch/powerpc/kernel/sys_ppc32.c +++ b/arch/powerpc/kernel/sys_ppc32.c @@ -24,7 +24,6 @@ #include <linux/resource.h> #include <linux/times.h> #include <linux/utsname.h> -#include <linux/timex.h> #include <linux/smp.h> #include <linux/smp_lock.h> #include <linux/sem.h> @@ -161,78 +160,6 @@ asmlinkage long compat_sys_sysfs(u32 option, u32 arg1, u32 arg2) return sys_sysfs((int)option, arg1, arg2); } -/* Handle adjtimex compatibility. */ -struct timex32 { - u32 modes; - s32 offset, freq, maxerror, esterror; - s32 status, constant, precision, tolerance; - struct compat_timeval time; - s32 tick; - s32 ppsfreq, jitter, shift, stabil; - s32 jitcnt, calcnt, errcnt, stbcnt; - s32 :32; s32 :32; s32 :32; s32 :32; - s32 :32; s32 :32; s32 :32; s32 :32; - s32 :32; s32 :32; s32 :32; s32 :32; -}; - -extern int do_adjtimex(struct timex *); - -asmlinkage long compat_sys_adjtimex(struct timex32 __user *utp) -{ - struct timex txc; - int ret; - - memset(&txc, 0, sizeof(struct timex)); - - if(get_user(txc.modes, &utp->modes) || - __get_user(txc.offset, &utp->offset) || - __get_user(txc.freq, &utp->freq) || - __get_user(txc.maxerror, &utp->maxerror) || - __get_user(txc.esterror, &utp->esterror) || - __get_user(txc.status, &utp->status) || - __get_user(txc.constant, &utp->constant) || - __get_user(txc.precision, &utp->precision) || - __get_user(txc.tolerance, &utp->tolerance) || - __get_user(txc.time.tv_sec, &utp->time.tv_sec) || - __get_user(txc.time.tv_usec, &utp->time.tv_usec) || - __get_user(txc.tick, &utp->tick) || - __get_user(txc.ppsfreq, &utp->ppsfreq) || - __get_user(txc.jitter, &utp->jitter) || - __get_user(txc.shift, &utp->shift) || - __get_user(txc.stabil, &utp->stabil) || - __get_user(txc.jitcnt, &utp->jitcnt) || - __get_user(txc.calcnt, &utp->calcnt) || - __get_user(txc.errcnt, &utp->errcnt) || - __get_user(txc.stbcnt, &utp->stbcnt)) - return -EFAULT; - - ret = do_adjtimex(&txc); - - if(put_user(txc.modes, &utp->modes) || - __put_user(txc.offset, &utp->offset) || - __put_user(txc.freq, &utp->freq) || - __put_user(txc.maxerror, &utp->maxerror) || - __put_user(txc.esterror, &utp->esterror) || - __put_user(txc.status, &utp->status) || - __put_user(txc.constant, &utp->constant) || - __put_user(txc.precision, &utp->precision) || - __put_user(txc.tolerance, &utp->tolerance) || - __put_user(txc.time.tv_sec, &utp->time.tv_sec) || - __put_user(txc.time.tv_usec, &utp->time.tv_usec) || - __put_user(txc.tick, &utp->tick) || - __put_user(txc.ppsfreq, &utp->ppsfreq) || - __put_user(txc.jitter, &utp->jitter) || - __put_user(txc.shift, &utp->shift) || - __put_user(txc.stabil, &utp->stabil) || - __put_user(txc.jitcnt, &utp->jitcnt) || - __put_user(txc.calcnt, &utp->calcnt) || - __put_user(txc.errcnt, &utp->errcnt) || - __put_user(txc.stbcnt, &utp->stbcnt)) - ret = -EFAULT; - - return ret; -} - asmlinkage long compat_sys_pause(void) { current->state = TASK_INTERRUPTIBLE; diff --git a/arch/powerpc/kernel/syscalls.c b/arch/powerpc/kernel/syscalls.c index ad895c99813..9b69d99a910 100644 --- a/arch/powerpc/kernel/syscalls.c +++ b/arch/powerpc/kernel/syscalls.c @@ -40,6 +40,7 @@ #include <asm/uaccess.h> #include <asm/ipc.h> #include <asm/semaphore.h> +#include <asm/syscalls.h> #include <asm/time.h> #include <asm/unistd.h> diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index 0f0c3a9ae2e..73560ef6f80 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -65,20 +65,20 @@ static int __init smt_setup(void) unsigned int cpu; if (!cpu_has_feature(CPU_FTR_SMT)) - return 1; + return -ENODEV; options = find_path_device("/options"); if (!options) - return 1; + return -ENODEV; val = (unsigned int *)get_property(options, "ibm,smt-snooze-delay", NULL); if (!smt_snooze_cmdline && val) { - for_each_cpu(cpu) + for_each_possible_cpu(cpu) per_cpu(smt_snooze_delay, cpu) = *val; } - return 1; + return 0; } __initcall(smt_setup); @@ -93,7 +93,7 @@ static int __init setup_smt_snooze_delay(char *str) smt_snooze_cmdline = 1; if (get_option(&str, &snooze)) { - for_each_cpu(cpu) + for_each_possible_cpu(cpu) per_cpu(smt_snooze_delay, cpu) = snooze; } @@ -347,7 +347,7 @@ static int __init topology_init(void) register_cpu_notifier(&sysfs_cpu_nb); - for_each_cpu(cpu) { + for_each_possible_cpu(cpu) { struct cpu *c = &per_cpu(cpu_devices, cpu); #ifdef CONFIG_NUMA diff --git a/arch/powerpc/kernel/tau_6xx.c b/arch/powerpc/kernel/tau_6xx.c new file mode 100644 index 00000000000..26bd8ea35a4 --- /dev/null +++ b/arch/powerpc/kernel/tau_6xx.c @@ -0,0 +1,271 @@ +/* + * temp.c Thermal management for cpu's with Thermal Assist Units + * + * Written by Troy Benjegerdes <hozer@drgw.net> + * + * TODO: + * dynamic power management to limit peak CPU temp (using ICTC) + * calibration??? + * + * Silly, crazy ideas: use cpu load (from scheduler) and ICTC to extend battery + * life in portables, and add a 'performance/watt' metric somewhere in /proc + */ + +#include <linux/config.h> +#include <linux/errno.h> +#include <linux/jiffies.h> +#include <linux/kernel.h> +#include <linux/param.h> +#include <linux/string.h> +#include <linux/mm.h> +#include <linux/interrupt.h> +#include <linux/init.h> + +#include <asm/io.h> +#include <asm/reg.h> +#include <asm/nvram.h> +#include <asm/cache.h> +#include <asm/8xx_immap.h> +#include <asm/machdep.h> + +static struct tau_temp +{ + int interrupts; + unsigned char low; + unsigned char high; + unsigned char grew; +} tau[NR_CPUS]; + +struct timer_list tau_timer; + +#undef DEBUG + +/* TODO: put these in a /proc interface, with some sanity checks, and maybe + * dynamic adjustment to minimize # of interrupts */ +/* configurable values for step size and how much to expand the window when + * we get an interrupt. These are based on the limit that was out of range */ +#define step_size 2 /* step size when temp goes out of range */ +#define window_expand 1 /* expand the window by this much */ +/* configurable values for shrinking the window */ +#define shrink_timer 2*HZ /* period between shrinking the window */ +#define min_window 2 /* minimum window size, degrees C */ + +void set_thresholds(unsigned long cpu) +{ +#ifdef CONFIG_TAU_INT + /* + * setup THRM1, + * threshold, valid bit, enable interrupts, interrupt when below threshold + */ + mtspr(SPRN_THRM1, THRM1_THRES(tau[cpu].low) | THRM1_V | THRM1_TIE | THRM1_TID); + + /* setup THRM2, + * threshold, valid bit, enable interrupts, interrupt when above threshhold + */ + mtspr (SPRN_THRM2, THRM1_THRES(tau[cpu].high) | THRM1_V | THRM1_TIE); +#else + /* same thing but don't enable interrupts */ + mtspr(SPRN_THRM1, THRM1_THRES(tau[cpu].low) | THRM1_V | THRM1_TID); + mtspr(SPRN_THRM2, THRM1_THRES(tau[cpu].high) | THRM1_V); +#endif +} + +void TAUupdate(int cpu) +{ + unsigned thrm; + +#ifdef DEBUG + printk("TAUupdate "); +#endif + + /* if both thresholds are crossed, the step_sizes cancel out + * and the window winds up getting expanded twice. */ + if((thrm = mfspr(SPRN_THRM1)) & THRM1_TIV){ /* is valid? */ + if(thrm & THRM1_TIN){ /* crossed low threshold */ + if (tau[cpu].low >= step_size){ + tau[cpu].low -= step_size; + tau[cpu].high -= (step_size - window_expand); + } + tau[cpu].grew = 1; +#ifdef DEBUG + printk("low threshold crossed "); +#endif + } + } + if((thrm = mfspr(SPRN_THRM2)) & THRM1_TIV){ /* is valid? */ + if(thrm & THRM1_TIN){ /* crossed high threshold */ + if (tau[cpu].high <= 127-step_size){ + tau[cpu].low += (step_size - window_expand); + tau[cpu].high += step_size; + } + tau[cpu].grew = 1; +#ifdef DEBUG + printk("high threshold crossed "); +#endif + } + } + +#ifdef DEBUG + printk("grew = %d\n", tau[cpu].grew); +#endif + +#ifndef CONFIG_TAU_INT /* tau_timeout will do this if not using interrupts */ + set_thresholds(cpu); +#endif + +} + +#ifdef CONFIG_TAU_INT +/* + * TAU interrupts - called when we have a thermal assist unit interrupt + * with interrupts disabled + */ + +void TAUException(struct pt_regs * regs) +{ + int cpu = smp_processor_id(); + + irq_enter(); + tau[cpu].interrupts++; + + TAUupdate(cpu); + + irq_exit(); +} +#endif /* CONFIG_TAU_INT */ + +static void tau_timeout(void * info) +{ + int cpu; + unsigned long flags; + int size; + int shrink; + + /* disabling interrupts *should* be okay */ + local_irq_save(flags); + cpu = smp_processor_id(); + +#ifndef CONFIG_TAU_INT + TAUupdate(cpu); +#endif + + size = tau[cpu].high - tau[cpu].low; + if (size > min_window && ! tau[cpu].grew) { + /* do an exponential shrink of half the amount currently over size */ + shrink = (2 + size - min_window) / 4; + if (shrink) { + tau[cpu].low += shrink; + tau[cpu].high -= shrink; + } else { /* size must have been min_window + 1 */ + tau[cpu].low += 1; +#if 1 /* debug */ + if ((tau[cpu].high - tau[cpu].low) != min_window){ + printk(KERN_ERR "temp.c: line %d, logic error\n", __LINE__); + } +#endif + } + } + + tau[cpu].grew = 0; + + set_thresholds(cpu); + + /* + * Do the enable every time, since otherwise a bunch of (relatively) + * complex sleep code needs to be added. One mtspr every time + * tau_timeout is called is probably not a big deal. + * + * Enable thermal sensor and set up sample interval timer + * need 20 us to do the compare.. until a nice 'cpu_speed' function + * call is implemented, just assume a 500 mhz clock. It doesn't really + * matter if we take too long for a compare since it's all interrupt + * driven anyway. + * + * use a extra long time.. (60 us @ 500 mhz) + */ + mtspr(SPRN_THRM3, THRM3_SITV(500*60) | THRM3_E); + + local_irq_restore(flags); +} + +static void tau_timeout_smp(unsigned long unused) +{ + + /* schedule ourselves to be run again */ + mod_timer(&tau_timer, jiffies + shrink_timer) ; + on_each_cpu(tau_timeout, NULL, 1, 0); +} + +/* + * setup the TAU + * + * Set things up to use THRM1 as a temperature lower bound, and THRM2 as an upper bound. + * Start off at zero + */ + +int tau_initialized = 0; + +void __init TAU_init_smp(void * info) +{ + unsigned long cpu = smp_processor_id(); + + /* set these to a reasonable value and let the timer shrink the + * window */ + tau[cpu].low = 5; + tau[cpu].high = 120; + + set_thresholds(cpu); +} + +int __init TAU_init(void) +{ + /* We assume in SMP that if one CPU has TAU support, they + * all have it --BenH + */ + if (!cpu_has_feature(CPU_FTR_TAU)) { + printk("Thermal assist unit not available\n"); + tau_initialized = 0; + return 1; + } + + + /* first, set up the window shrinking timer */ + init_timer(&tau_timer); + tau_timer.function = tau_timeout_smp; + tau_timer.expires = jiffies + shrink_timer; + add_timer(&tau_timer); + + on_each_cpu(TAU_init_smp, NULL, 1, 0); + + printk("Thermal assist unit "); +#ifdef CONFIG_TAU_INT + printk("using interrupts, "); +#else + printk("using timers, "); +#endif + printk("shrink_timer: %d jiffies\n", shrink_timer); + tau_initialized = 1; + + return 0; +} + +__initcall(TAU_init); + +/* + * return current temp + */ + +u32 cpu_temp_both(unsigned long cpu) +{ + return ((tau[cpu].high << 16) | tau[cpu].low); +} + +int cpu_temp(unsigned long cpu) +{ + return ((tau[cpu].high + tau[cpu].low) / 2); +} + +int tau_interrupts(unsigned long cpu) +{ + return (tau[cpu].interrupts); +} diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 86f7e3d154d..24e3ad756de 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -51,6 +51,7 @@ #include <linux/percpu.h> #include <linux/rtc.h> #include <linux/jiffies.h> +#include <linux/posix-timers.h> #include <asm/io.h> #include <asm/processor.h> @@ -98,6 +99,7 @@ unsigned long tb_ticks_per_jiffy; unsigned long tb_ticks_per_usec = 100; /* sane default */ EXPORT_SYMBOL(tb_ticks_per_usec); unsigned long tb_ticks_per_sec; +EXPORT_SYMBOL(tb_ticks_per_sec); /* for cputime_t conversions */ u64 tb_to_xs; unsigned tb_to_us; @@ -135,6 +137,224 @@ unsigned long tb_last_stamp; */ DEFINE_PER_CPU(unsigned long, last_jiffy); +#ifdef CONFIG_VIRT_CPU_ACCOUNTING +/* + * Factors for converting from cputime_t (timebase ticks) to + * jiffies, milliseconds, seconds, and clock_t (1/USER_HZ seconds). + * These are all stored as 0.64 fixed-point binary fractions. + */ +u64 __cputime_jiffies_factor; +EXPORT_SYMBOL(__cputime_jiffies_factor); +u64 __cputime_msec_factor; +EXPORT_SYMBOL(__cputime_msec_factor); +u64 __cputime_sec_factor; +EXPORT_SYMBOL(__cputime_sec_factor); +u64 __cputime_clockt_factor; +EXPORT_SYMBOL(__cputime_clockt_factor); + +static void calc_cputime_factors(void) +{ + struct div_result res; + + div128_by_32(HZ, 0, tb_ticks_per_sec, &res); + __cputime_jiffies_factor = res.result_low; + div128_by_32(1000, 0, tb_ticks_per_sec, &res); + __cputime_msec_factor = res.result_low; + div128_by_32(1, 0, tb_ticks_per_sec, &res); + __cputime_sec_factor = res.result_low; + div128_by_32(USER_HZ, 0, tb_ticks_per_sec, &res); + __cputime_clockt_factor = res.result_low; +} + +/* + * Read the PURR on systems that have it, otherwise the timebase. + */ +static u64 read_purr(void) +{ + if (cpu_has_feature(CPU_FTR_PURR)) + return mfspr(SPRN_PURR); + return mftb(); +} + +/* + * Account time for a transition between system, hard irq + * or soft irq state. + */ +void account_system_vtime(struct task_struct *tsk) +{ + u64 now, delta; + unsigned long flags; + + local_irq_save(flags); + now = read_purr(); + delta = now - get_paca()->startpurr; + get_paca()->startpurr = now; + if (!in_interrupt()) { + delta += get_paca()->system_time; + get_paca()->system_time = 0; + } + account_system_time(tsk, 0, delta); + local_irq_restore(flags); +} + +/* + * Transfer the user and system times accumulated in the paca + * by the exception entry and exit code to the generic process + * user and system time records. + * Must be called with interrupts disabled. + */ +void account_process_vtime(struct task_struct *tsk) +{ + cputime_t utime; + + utime = get_paca()->user_time; + get_paca()->user_time = 0; + account_user_time(tsk, utime); +} + +static void account_process_time(struct pt_regs *regs) +{ + int cpu = smp_processor_id(); + + account_process_vtime(current); + run_local_timers(); + if (rcu_pending(cpu)) + rcu_check_callbacks(cpu, user_mode(regs)); + scheduler_tick(); + run_posix_cpu_timers(current); +} + +#ifdef CONFIG_PPC_SPLPAR +/* + * Stuff for accounting stolen time. + */ +struct cpu_purr_data { + int initialized; /* thread is running */ + u64 tb0; /* timebase at origin time */ + u64 purr0; /* PURR at origin time */ + u64 tb; /* last TB value read */ + u64 purr; /* last PURR value read */ + u64 stolen; /* stolen time so far */ + spinlock_t lock; +}; + +static DEFINE_PER_CPU(struct cpu_purr_data, cpu_purr_data); + +static void snapshot_tb_and_purr(void *data) +{ + struct cpu_purr_data *p = &__get_cpu_var(cpu_purr_data); + + p->tb0 = mftb(); + p->purr0 = mfspr(SPRN_PURR); + p->tb = p->tb0; + p->purr = 0; + wmb(); + p->initialized = 1; +} + +/* + * Called during boot when all cpus have come up. + */ +void snapshot_timebases(void) +{ + int cpu; + + if (!cpu_has_feature(CPU_FTR_PURR)) + return; + for_each_possible_cpu(cpu) + spin_lock_init(&per_cpu(cpu_purr_data, cpu).lock); + on_each_cpu(snapshot_tb_and_purr, NULL, 0, 1); +} + +void calculate_steal_time(void) +{ + u64 tb, purr, t0; + s64 stolen; + struct cpu_purr_data *p0, *pme, *phim; + int cpu; + + if (!cpu_has_feature(CPU_FTR_PURR)) + return; + cpu = smp_processor_id(); + pme = &per_cpu(cpu_purr_data, cpu); + if (!pme->initialized) + return; /* this can happen in early boot */ + p0 = &per_cpu(cpu_purr_data, cpu & ~1); + phim = &per_cpu(cpu_purr_data, cpu ^ 1); + spin_lock(&p0->lock); + tb = mftb(); + purr = mfspr(SPRN_PURR) - pme->purr0; + if (!phim->initialized || !cpu_online(cpu ^ 1)) { + stolen = (tb - pme->tb) - (purr - pme->purr); + } else { + t0 = pme->tb0; + if (phim->tb0 < t0) + t0 = phim->tb0; + stolen = phim->tb - t0 - phim->purr - purr - p0->stolen; + } + if (stolen > 0) { + account_steal_time(current, stolen); + p0->stolen += stolen; + } + pme->tb = tb; + pme->purr = purr; + spin_unlock(&p0->lock); +} + +/* + * Must be called before the cpu is added to the online map when + * a cpu is being brought up at runtime. + */ +static void snapshot_purr(void) +{ + int cpu; + u64 purr; + struct cpu_purr_data *p0, *pme, *phim; + unsigned long flags; + + if (!cpu_has_feature(CPU_FTR_PURR)) + return; + cpu = smp_processor_id(); + pme = &per_cpu(cpu_purr_data, cpu); + p0 = &per_cpu(cpu_purr_data, cpu & ~1); + phim = &per_cpu(cpu_purr_data, cpu ^ 1); + spin_lock_irqsave(&p0->lock, flags); + pme->tb = pme->tb0 = mftb(); + purr = mfspr(SPRN_PURR); + if (!phim->initialized) { + pme->purr = 0; + pme->purr0 = purr; + } else { + /* set p->purr and p->purr0 for no change in p0->stolen */ + pme->purr = phim->tb - phim->tb0 - phim->purr - p0->stolen; + pme->purr0 = purr - pme->purr; + } + pme->initialized = 1; + spin_unlock_irqrestore(&p0->lock, flags); +} + +#endif /* CONFIG_PPC_SPLPAR */ + +#else /* ! CONFIG_VIRT_CPU_ACCOUNTING */ +#define calc_cputime_factors() +#define account_process_time(regs) update_process_times(user_mode(regs)) +#define calculate_steal_time() do { } while (0) +#endif + +#if !(defined(CONFIG_VIRT_CPU_ACCOUNTING) && defined(CONFIG_PPC_SPLPAR)) +#define snapshot_purr() do { } while (0) +#endif + +/* + * Called when a cpu comes up after the system has finished booting, + * i.e. as a result of a hotplug cpu action. + */ +void snapshot_timebase(void) +{ + __get_cpu_var(last_jiffy) = get_tb(); + snapshot_purr(); +} + void __delay(unsigned long loops) { unsigned long start; @@ -392,6 +612,7 @@ static void iSeries_tb_recal(void) new_tb_ticks_per_jiffy, sign, tick_diff ); tb_ticks_per_jiffy = new_tb_ticks_per_jiffy; tb_ticks_per_sec = new_tb_ticks_per_sec; + calc_cputime_factors(); div128_by_32( XSEC_PER_SEC, 0, tb_ticks_per_sec, &divres ); do_gtod.tb_ticks_per_sec = tb_ticks_per_sec; tb_to_xs = divres.result_low; @@ -440,6 +661,7 @@ void timer_interrupt(struct pt_regs * regs) irq_enter(); profile_tick(CPU_PROFILING, regs); + calculate_steal_time(); #ifdef CONFIG_PPC_ISERIES get_lppaca()->int_dword.fields.decr_int = 0; @@ -461,7 +683,7 @@ void timer_interrupt(struct pt_regs * regs) * is the case. */ if (!cpu_is_offline(cpu)) - update_process_times(user_mode(regs)); + account_process_time(regs); /* * No need to check whether cpu is offline here; boot_cpuid @@ -518,13 +740,27 @@ void wakeup_decrementer(void) void __init smp_space_timers(unsigned int max_cpus) { int i; + unsigned long half = tb_ticks_per_jiffy / 2; unsigned long offset = tb_ticks_per_jiffy / max_cpus; unsigned long previous_tb = per_cpu(last_jiffy, boot_cpuid); /* make sure tb > per_cpu(last_jiffy, cpu) for all cpus always */ previous_tb -= tb_ticks_per_jiffy; - for_each_cpu(i) { - if (i != boot_cpuid) { + /* + * The stolen time calculation for POWER5 shared-processor LPAR + * systems works better if the two threads' timebase interrupts + * are staggered by half a jiffy with respect to each other. + */ + for_each_possible_cpu(i) { + if (i == boot_cpuid) + continue; + if (i == (boot_cpuid ^ 1)) + per_cpu(last_jiffy, i) = + per_cpu(last_jiffy, boot_cpuid) - half; + else if (i & 1) + per_cpu(last_jiffy, i) = + per_cpu(last_jiffy, i ^ 1) + half; + else { previous_tb += offset; per_cpu(last_jiffy, i) = previous_tb; } @@ -720,6 +956,7 @@ void __init time_init(void) tb_ticks_per_sec = ppc_tb_freq; tb_ticks_per_usec = ppc_tb_freq / 1000000; tb_to_us = mulhwu_scale_factor(ppc_tb_freq, 1000000); + calc_cputime_factors(); /* * Calculate the length of each tick in ns. It will not be diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 98660aedeeb..4cbde211eb6 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -74,19 +74,19 @@ EXPORT_SYMBOL(__debugger_dabr_match); EXPORT_SYMBOL(__debugger_fault_handler); #endif -struct notifier_block *powerpc_die_chain; -static DEFINE_SPINLOCK(die_notifier_lock); +ATOMIC_NOTIFIER_HEAD(powerpc_die_chain); int register_die_notifier(struct notifier_block *nb) { - int err = 0; - unsigned long flags; + return atomic_notifier_chain_register(&powerpc_die_chain, nb); +} +EXPORT_SYMBOL(register_die_notifier); - spin_lock_irqsave(&die_notifier_lock, flags); - err = notifier_chain_register(&powerpc_die_chain, nb); - spin_unlock_irqrestore(&die_notifier_lock, flags); - return err; +int unregister_die_notifier(struct notifier_block *nb) +{ + return atomic_notifier_chain_unregister(&powerpc_die_chain, nb); } +EXPORT_SYMBOL(unregister_die_notifier); /* * Trap & Exception support @@ -97,7 +97,6 @@ static DEFINE_SPINLOCK(die_lock); int die(const char *str, struct pt_regs *regs, long err) { static int die_counter, crash_dump_start = 0; - int nl = 0; if (debugger(regs)) return 1; @@ -106,7 +105,7 @@ int die(const char *str, struct pt_regs *regs, long err) spin_lock_irq(&die_lock); bust_spinlocks(1); #ifdef CONFIG_PMAC_BACKLIGHT - if (_machine == _MACH_Pmac) { + if (machine_is(powermac)) { set_backlight_enable(1); set_backlight_level(BACKLIGHT_MAX); } @@ -114,46 +113,18 @@ int die(const char *str, struct pt_regs *regs, long err) printk("Oops: %s, sig: %ld [#%d]\n", str, err, ++die_counter); #ifdef CONFIG_PREEMPT printk("PREEMPT "); - nl = 1; #endif #ifdef CONFIG_SMP printk("SMP NR_CPUS=%d ", NR_CPUS); - nl = 1; #endif #ifdef CONFIG_DEBUG_PAGEALLOC printk("DEBUG_PAGEALLOC "); - nl = 1; #endif #ifdef CONFIG_NUMA printk("NUMA "); - nl = 1; #endif -#ifdef CONFIG_PPC64 - switch (_machine) { - case PLATFORM_PSERIES: - printk("PSERIES "); - nl = 1; - break; - case PLATFORM_PSERIES_LPAR: - printk("PSERIES LPAR "); - nl = 1; - break; - case PLATFORM_ISERIES_LPAR: - printk("ISERIES LPAR "); - nl = 1; - break; - case PLATFORM_POWERMAC: - printk("POWERMAC "); - nl = 1; - break; - case PLATFORM_CELL: - printk("CELL "); - nl = 1; - break; - } -#endif - if (nl) - printk("\n"); + printk("%s\n", ppc_md.name ? "" : ppc_md.name); + print_modules(); show_regs(regs); bust_spinlocks(0); diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index 04f7df39ffb..573afb68d69 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -1,6 +1,4 @@ /* - * linux/arch/ppc64/kernel/vdso.c - * * Copyright (C) 2004 Benjamin Herrenschmidt, IBM Corp. * <benh@kernel.crashing.org> * @@ -35,6 +33,7 @@ #include <asm/machdep.h> #include <asm/cputable.h> #include <asm/sections.h> +#include <asm/firmware.h> #include <asm/vdso.h> #include <asm/vdso_datapage.h> @@ -669,7 +668,13 @@ void __init vdso_init(void) vdso_data->version.major = SYSTEMCFG_MAJOR; vdso_data->version.minor = SYSTEMCFG_MINOR; vdso_data->processor = mfspr(SPRN_PVR); - vdso_data->platform = _machine; + /* + * Fake the old platform number for pSeries and iSeries and add + * in LPAR bit if necessary + */ + vdso_data->platform = machine_is(iseries) ? 0x200 : 0x100; + if (firmware_has_feature(FW_FEATURE_LPAR)) + vdso_data->platform |= 1; vdso_data->physicalMemorySize = lmb_phys_mem_size(); vdso_data->dcache_size = ppc64_caches.dsize; vdso_data->dcache_line_size = ppc64_caches.dline_size; diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 7fa7b15fd8e..fe79c2584cb 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -1,9 +1,11 @@ #include <linux/config.h> #ifdef CONFIG_PPC64 #include <asm/page.h> +#define PROVIDE32(x) PROVIDE(__unused__##x) #else #define PAGE_SIZE 4096 #define KERNELBASE CONFIG_KERNEL_START +#define PROVIDE32(x) PROVIDE(x) #endif #include <asm-generic/vmlinux.lds.h> @@ -18,43 +20,42 @@ jiffies = jiffies_64 + 4; #endif SECTIONS { - /* Sections to be discarded. */ - /DISCARD/ : { - *(.exitcall.exit) - *(.exit.data) - } - - . = KERNELBASE; - - /* Read-only sections, merged into text segment: */ - .text : { - *(.text .text.*) - SCHED_TEXT - LOCK_TEXT - KPROBES_TEXT - *(.fixup) -#ifdef CONFIG_PPC32 - *(.got1) - __got2_start = .; - *(.got2) - __got2_end = .; -#else - . = ALIGN(PAGE_SIZE); - _etext = .; -#endif - } -#ifdef CONFIG_PPC32 - _etext = .; - PROVIDE (etext = .); + /* Sections to be discarded. */ + /DISCARD/ : { + *(.exitcall.exit) + *(.exit.data) + } - RODATA - .fini : { *(.fini) } =0 - .ctors : { *(.ctors) } - .dtors : { *(.dtors) } + . = KERNELBASE; - .fixup : { *(.fixup) } -#endif +/* + * Text, read only data and other permanent read-only sections + */ + + /* Text and gots */ + .text : { + *(.text .text.*) + SCHED_TEXT + LOCK_TEXT + KPROBES_TEXT + *(.fixup) +#ifdef CONFIG_PPC32 + *(.got1) + __got2_start = .; + *(.got2) + __got2_end = .; +#endif /* CONFIG_PPC32 */ + + . = ALIGN(PAGE_SIZE); + _etext = .; + PROVIDE32 (etext = .); + } + + /* Read-only data */ + RODATA + + /* Exception & bug tables */ __ex_table : { __start___ex_table = .; *(__ex_table) @@ -67,192 +68,172 @@ SECTIONS __stop___bug_table = .; } -#ifdef CONFIG_PPC64 +/* + * Init sections discarded at runtime + */ + . = ALIGN(PAGE_SIZE); + __init_begin = .; + + .init.text : { + _sinittext = .; + *(.init.text) + _einittext = .; + } + + /* .exit.text is discarded at runtime, not link time, + * to deal with references from __bug_table + */ + .exit.text : { *(.exit.text) } + + .init.data : { + *(.init.data); + __vtop_table_begin = .; + *(.vtop_fixup); + __vtop_table_end = .; + __ptov_table_begin = .; + *(.ptov_fixup); + __ptov_table_end = .; + } + + . = ALIGN(16); + .init.setup : { + __setup_start = .; + *(.init.setup) + __setup_end = .; + } + + .initcall.init : { + __initcall_start = .; + *(.initcall1.init) + *(.initcall2.init) + *(.initcall3.init) + *(.initcall4.init) + *(.initcall5.init) + *(.initcall6.init) + *(.initcall7.init) + __initcall_end = .; + } + + .con_initcall.init : { + __con_initcall_start = .; + *(.con_initcall.init) + __con_initcall_end = .; + } + + SECURITY_INIT + + . = ALIGN(8); __ftr_fixup : { __start___ftr_fixup = .; *(__ftr_fixup) __stop___ftr_fixup = .; } - RODATA -#endif + . = ALIGN(PAGE_SIZE); + .init.ramfs : { + __initramfs_start = .; + *(.init.ramfs) + __initramfs_end = .; + } #ifdef CONFIG_PPC32 - /* Read-write section, merged into data segment: */ - . = ALIGN(PAGE_SIZE); - _sdata = .; - .data : - { - *(.data) - *(.data1) - *(.sdata) - *(.sdata2) - *(.got.plt) *(.got) - *(.dynamic) - CONSTRUCTORS - } - - . = ALIGN(PAGE_SIZE); - __nosave_begin = .; - .data_nosave : { *(.data.nosave) } - . = ALIGN(PAGE_SIZE); - __nosave_end = .; - - . = ALIGN(32); - .data.cacheline_aligned : { *(.data.cacheline_aligned) } - - _edata = .; - PROVIDE (edata = .); - - . = ALIGN(8192); - .data.init_task : { *(.data.init_task) } + . = ALIGN(32); +#else + . = ALIGN(128); #endif + .data.percpu : { + __per_cpu_start = .; + *(.data.percpu) + __per_cpu_end = .; + } - /* will be freed after init */ - . = ALIGN(PAGE_SIZE); - __init_begin = .; - .init.text : { - _sinittext = .; - *(.init.text) - _einittext = .; - } -#ifdef CONFIG_PPC32 - /* .exit.text is discarded at runtime, not link time, - to deal with references from __bug_table */ - .exit.text : { *(.exit.text) } -#endif - .init.data : { - *(.init.data); - __vtop_table_begin = .; - *(.vtop_fixup); - __vtop_table_end = .; - __ptov_table_begin = .; - *(.ptov_fixup); - __ptov_table_end = .; - } - - . = ALIGN(16); - .init.setup : { - __setup_start = .; - *(.init.setup) - __setup_end = .; - } - - .initcall.init : { - __initcall_start = .; - *(.initcall1.init) - *(.initcall2.init) - *(.initcall3.init) - *(.initcall4.init) - *(.initcall5.init) - *(.initcall6.init) - *(.initcall7.init) - __initcall_end = .; - } - - .con_initcall.init : { - __con_initcall_start = .; - *(.con_initcall.init) - __con_initcall_end = .; - } - - SECURITY_INIT + . = ALIGN(8); + .machine.desc : { + __machine_desc_start = . ; + *(.machine.desc) + __machine_desc_end = . ; + } + + /* freed after init ends here */ + . = ALIGN(PAGE_SIZE); + __init_end = .; + +/* + * And now the various read/write data + */ + + . = ALIGN(PAGE_SIZE); + _sdata = .; #ifdef CONFIG_PPC32 - __start___ftr_fixup = .; - __ftr_fixup : { *(__ftr_fixup) } - __stop___ftr_fixup = .; + .data : + { + *(.data) + *(.sdata) + *(.got.plt) *(.got) + } #else - . = ALIGN(PAGE_SIZE); - .init.ramfs : { - __initramfs_start = .; - *(.init.ramfs) - __initramfs_end = .; - } -#endif + .data : { + *(.data .data.rel* .toc1) + *(.branch_lt) + } -#ifdef CONFIG_PPC32 - . = ALIGN(32); + .opd : { + *(.opd) + } + + .got : { + __toc_start = .; + *(.got) + *(.toc) + } #endif - .data.percpu : { - __per_cpu_start = .; - *(.data.percpu) - __per_cpu_end = .; - } - . = ALIGN(PAGE_SIZE); -#ifdef CONFIG_PPC64 - . = ALIGN(16384); - __init_end = .; - /* freed after init ends here */ - - /* Read/write sections */ - . = ALIGN(PAGE_SIZE); - . = ALIGN(16384); - _sdata = .; - /* The initial task and kernel stack */ - .data.init_task : { - *(.data.init_task) - } - - . = ALIGN(PAGE_SIZE); - .data.page_aligned : { - *(.data.page_aligned) - } - - .data.cacheline_aligned : { - *(.data.cacheline_aligned) - } - - .data : { - *(.data .data.rel* .toc1) - *(.branch_lt) - } - - .opd : { - *(.opd) - } - - .got : { - __toc_start = .; - *(.got) - *(.toc) - . = ALIGN(PAGE_SIZE); - _edata = .; - } - - . = ALIGN(PAGE_SIZE); + . = ALIGN(PAGE_SIZE); + _edata = .; + PROVIDE32 (edata = .); + + /* The initial task and kernel stack */ +#ifdef CONFIG_PPC32 + . = ALIGN(8192); #else - __initramfs_start = .; - .init.ramfs : { - *(.init.ramfs) - } - __initramfs_end = .; + . = ALIGN(16384); +#endif + .data.init_task : { + *(.data.init_task) + } - . = ALIGN(4096); - __init_end = .; + . = ALIGN(PAGE_SIZE); + .data.page_aligned : { + *(.data.page_aligned) + } - . = ALIGN(4096); - _sextratext = .; - _eextratext = .; + .data.cacheline_aligned : { + *(.data.cacheline_aligned) + } - __bss_start = .; -#endif + . = ALIGN(PAGE_SIZE); + __data_nosave : { + __nosave_begin = .; + *(.data.nosave) + . = ALIGN(PAGE_SIZE); + __nosave_end = .; + } - .bss : { - __bss_start = .; - *(.sbss) *(.scommon) - *(.dynbss) - *(.bss) - *(COMMON) - __bss_stop = .; - } +/* + * And finally the bss + */ + + .bss : { + __bss_start = .; + *(.sbss) *(.scommon) + *(.dynbss) + *(.bss) + *(COMMON) + __bss_stop = .; + } -#ifdef CONFIG_PPC64 - . = ALIGN(PAGE_SIZE); -#endif - _end = . ; -#ifdef CONFIG_PPC32 - PROVIDE (end = .); -#endif + . = ALIGN(PAGE_SIZE); + _end = . ; + PROVIDE32 (end = .); } |