diff options
Diffstat (limited to 'arch/powerpc/kernel')
33 files changed, 1138 insertions, 1790 deletions
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 58d0572de6f..1dda7012914 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -34,13 +34,14 @@ obj-y += vdso32/ obj-$(CONFIG_PPC64) += setup_64.o sys_ppc32.o \ signal_64.o ptrace32.o \ paca.o nvram_64.o firmware.o +obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_ppc970.o cpu_setup_pa6t.o obj64-$(CONFIG_RELOCATABLE) += reloc_64.o -obj-$(CONFIG_PPC_BOOK3E_64) += exceptions-64e.o +obj-$(CONFIG_PPC_BOOK3E_64) += exceptions-64e.o idle_book3e.o obj-$(CONFIG_PPC64) += vdso64/ obj-$(CONFIG_ALTIVEC) += vecemu.o obj-$(CONFIG_PPC_970_NAP) += idle_power4.o -obj-$(CONFIG_PPC_OF) += of_device.o of_platform.o prom_parse.o +obj-$(CONFIG_PPC_OF) += of_platform.o prom_parse.o obj-$(CONFIG_PPC_CLOCK) += clock.o procfs-y := proc_powerpc.o obj-$(CONFIG_PROC_FS) += $(procfs-y) @@ -67,6 +68,7 @@ obj64-$(CONFIG_HIBERNATION) += swsusp_asm64.o obj-$(CONFIG_MODULES) += module.o module_$(CONFIG_WORD_SIZE).o obj-$(CONFIG_44x) += cpu_setup_44x.o obj-$(CONFIG_FSL_BOOKE) += cpu_setup_fsl_booke.o dbell.o +obj-$(CONFIG_PPC_BOOK3E_64) += dbell.o extra-y := head_$(CONFIG_WORD_SIZE).o extra-$(CONFIG_PPC_BOOK3E_32) := head_new_booke.o diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 496cc5b3984..1c0607ddccc 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -194,7 +194,6 @@ int main(void) DEFINE(PACA_STARTSPURR, offsetof(struct paca_struct, startspurr)); DEFINE(PACA_USER_TIME, offsetof(struct paca_struct, user_time)); DEFINE(PACA_SYSTEM_TIME, offsetof(struct paca_struct, system_time)); - DEFINE(PACA_DATA_OFFSET, offsetof(struct paca_struct, data_offset)); DEFINE(PACA_TRAP_SAVE, offsetof(struct paca_struct, trap_save)); #ifdef CONFIG_KVM_BOOK3S_64_HANDLER DEFINE(PACA_KVM_SVCPU, offsetof(struct paca_struct, shadow_vcpu)); @@ -342,6 +341,7 @@ int main(void) DEFINE(WTOM_CLOCK_SEC, offsetof(struct vdso_data, wtom_clock_sec)); DEFINE(WTOM_CLOCK_NSEC, offsetof(struct vdso_data, wtom_clock_nsec)); DEFINE(STAMP_XTIME, offsetof(struct vdso_data, stamp_xtime)); + DEFINE(STAMP_SEC_FRAC, offsetof(struct vdso_data, stamp_sec_fraction)); DEFINE(CFG_ICACHE_BLOCKSZ, offsetof(struct vdso_data, icache_block_size)); DEFINE(CFG_DCACHE_BLOCKSZ, offsetof(struct vdso_data, dcache_block_size)); DEFINE(CFG_ICACHE_LOGBLOCKSZ, offsetof(struct vdso_data, icache_log_block_size)); diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index 87aa0f3c604..65e2b4e10f9 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -1364,10 +1364,10 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check = machine_check_4xx, .platform = "ppc405", }, - { /* 405EX */ - .pvr_mask = 0xffff0004, - .pvr_value = 0x12910004, - .cpu_name = "405EX", + { /* 405EX Rev. A/B with Security */ + .pvr_mask = 0xffff000f, + .pvr_value = 0x12910007, + .cpu_name = "405EX Rev. A/B", .cpu_features = CPU_FTRS_40X, .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, @@ -1377,10 +1377,114 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check = machine_check_4xx, .platform = "ppc405", }, - { /* 405EXr */ - .pvr_mask = 0xffff0004, + { /* 405EX Rev. C without Security */ + .pvr_mask = 0xffff000f, + .pvr_value = 0x1291000d, + .cpu_name = "405EX Rev. C", + .cpu_features = CPU_FTRS_40X, + .cpu_user_features = PPC_FEATURE_32 | + PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, + .mmu_features = MMU_FTR_TYPE_40x, + .icache_bsize = 32, + .dcache_bsize = 32, + .machine_check = machine_check_4xx, + .platform = "ppc405", + }, + { /* 405EX Rev. C with Security */ + .pvr_mask = 0xffff000f, + .pvr_value = 0x1291000f, + .cpu_name = "405EX Rev. C", + .cpu_features = CPU_FTRS_40X, + .cpu_user_features = PPC_FEATURE_32 | + PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, + .mmu_features = MMU_FTR_TYPE_40x, + .icache_bsize = 32, + .dcache_bsize = 32, + .machine_check = machine_check_4xx, + .platform = "ppc405", + }, + { /* 405EX Rev. D without Security */ + .pvr_mask = 0xffff000f, + .pvr_value = 0x12910003, + .cpu_name = "405EX Rev. D", + .cpu_features = CPU_FTRS_40X, + .cpu_user_features = PPC_FEATURE_32 | + PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, + .mmu_features = MMU_FTR_TYPE_40x, + .icache_bsize = 32, + .dcache_bsize = 32, + .machine_check = machine_check_4xx, + .platform = "ppc405", + }, + { /* 405EX Rev. D with Security */ + .pvr_mask = 0xffff000f, + .pvr_value = 0x12910005, + .cpu_name = "405EX Rev. D", + .cpu_features = CPU_FTRS_40X, + .cpu_user_features = PPC_FEATURE_32 | + PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, + .mmu_features = MMU_FTR_TYPE_40x, + .icache_bsize = 32, + .dcache_bsize = 32, + .machine_check = machine_check_4xx, + .platform = "ppc405", + }, + { /* 405EXr Rev. A/B without Security */ + .pvr_mask = 0xffff000f, + .pvr_value = 0x12910001, + .cpu_name = "405EXr Rev. A/B", + .cpu_features = CPU_FTRS_40X, + .cpu_user_features = PPC_FEATURE_32 | + PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, + .mmu_features = MMU_FTR_TYPE_40x, + .icache_bsize = 32, + .dcache_bsize = 32, + .machine_check = machine_check_4xx, + .platform = "ppc405", + }, + { /* 405EXr Rev. C without Security */ + .pvr_mask = 0xffff000f, + .pvr_value = 0x12910009, + .cpu_name = "405EXr Rev. C", + .cpu_features = CPU_FTRS_40X, + .cpu_user_features = PPC_FEATURE_32 | + PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, + .mmu_features = MMU_FTR_TYPE_40x, + .icache_bsize = 32, + .dcache_bsize = 32, + .machine_check = machine_check_4xx, + .platform = "ppc405", + }, + { /* 405EXr Rev. C with Security */ + .pvr_mask = 0xffff000f, + .pvr_value = 0x1291000b, + .cpu_name = "405EXr Rev. C", + .cpu_features = CPU_FTRS_40X, + .cpu_user_features = PPC_FEATURE_32 | + PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, + .mmu_features = MMU_FTR_TYPE_40x, + .icache_bsize = 32, + .dcache_bsize = 32, + .machine_check = machine_check_4xx, + .platform = "ppc405", + }, + { /* 405EXr Rev. D without Security */ + .pvr_mask = 0xffff000f, .pvr_value = 0x12910000, - .cpu_name = "405EXr", + .cpu_name = "405EXr Rev. D", + .cpu_features = CPU_FTRS_40X, + .cpu_user_features = PPC_FEATURE_32 | + PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, + .mmu_features = MMU_FTR_TYPE_40x, + .icache_bsize = 32, + .dcache_bsize = 32, + .machine_check = machine_check_4xx, + .platform = "ppc405", + }, + { /* 405EXr Rev. D with Security */ + .pvr_mask = 0xffff000f, + .pvr_value = 0x12910002, + .cpu_name = "405EXr Rev. D", .cpu_features = CPU_FTRS_40X, .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, diff --git a/arch/powerpc/kernel/crash_dump.c b/arch/powerpc/kernel/crash_dump.c index 40f524643ba..8e05c16344e 100644 --- a/arch/powerpc/kernel/crash_dump.c +++ b/arch/powerpc/kernel/crash_dump.c @@ -128,9 +128,9 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf, if (!csize) return 0; - csize = min(csize, PAGE_SIZE); + csize = min_t(size_t, csize, PAGE_SIZE); - if (pfn < max_pfn) { + if ((min_low_pfn < pfn) && (pfn < max_pfn)) { vaddr = __va(pfn << PAGE_SHIFT); csize = copy_oldmem_vaddr(vaddr, buf, csize, offset, userbuf); } else { diff --git a/arch/powerpc/kernel/dbell.c b/arch/powerpc/kernel/dbell.c index 1493734cd87..3307a52d797 100644 --- a/arch/powerpc/kernel/dbell.c +++ b/arch/powerpc/kernel/dbell.c @@ -13,32 +13,88 @@ #include <linux/kernel.h> #include <linux/smp.h> #include <linux/threads.h> +#include <linux/percpu.h> #include <asm/dbell.h> +#include <asm/irq_regs.h> #ifdef CONFIG_SMP -unsigned long dbell_smp_message[NR_CPUS]; +struct doorbell_cpu_info { + unsigned long messages; /* current messages bits */ + unsigned int tag; /* tag value */ +}; -void smp_dbell_message_pass(int target, int msg) +static DEFINE_PER_CPU(struct doorbell_cpu_info, doorbell_cpu_info); + +void doorbell_setup_this_cpu(void) +{ + struct doorbell_cpu_info *info = &__get_cpu_var(doorbell_cpu_info); + + info->messages = 0; + info->tag = mfspr(SPRN_PIR) & 0x3fff; +} + +void doorbell_message_pass(int target, int msg) { + struct doorbell_cpu_info *info; int i; - if(target < NR_CPUS) { - set_bit(msg, &dbell_smp_message[target]); - ppc_msgsnd(PPC_DBELL, 0, target); + if (target < NR_CPUS) { + info = &per_cpu(doorbell_cpu_info, target); + set_bit(msg, &info->messages); + ppc_msgsnd(PPC_DBELL, 0, info->tag); } - else if(target == MSG_ALL_BUT_SELF) { + else if (target == MSG_ALL_BUT_SELF) { for_each_online_cpu(i) { if (i == smp_processor_id()) continue; - set_bit(msg, &dbell_smp_message[i]); - ppc_msgsnd(PPC_DBELL, 0, i); + info = &per_cpu(doorbell_cpu_info, i); + set_bit(msg, &info->messages); + ppc_msgsnd(PPC_DBELL, 0, info->tag); } } else { /* target == MSG_ALL */ - for_each_online_cpu(i) - set_bit(msg, &dbell_smp_message[i]); + for_each_online_cpu(i) { + info = &per_cpu(doorbell_cpu_info, i); + set_bit(msg, &info->messages); + } ppc_msgsnd(PPC_DBELL, PPC_DBELL_MSG_BRDCAST, 0); } } -#endif + +void doorbell_exception(struct pt_regs *regs) +{ + struct pt_regs *old_regs = set_irq_regs(regs); + struct doorbell_cpu_info *info = &__get_cpu_var(doorbell_cpu_info); + int msg; + + /* Warning: regs can be NULL when called from irq enable */ + + if (!info->messages || (num_online_cpus() < 2)) + goto out; + + for (msg = 0; msg < 4; msg++) + if (test_and_clear_bit(msg, &info->messages)) + smp_message_recv(msg); + +out: + set_irq_regs(old_regs); +} + +void doorbell_check_self(void) +{ + struct doorbell_cpu_info *info = &__get_cpu_var(doorbell_cpu_info); + + if (!info->messages) + return; + + ppc_msgsnd(PPC_DBELL, 0, info->tag); +} + +#else /* CONFIG_SMP */ +void doorbell_exception(struct pt_regs *regs) +{ + printk(KERN_WARNING "Received doorbell on non-smp system\n"); +} +#endif /* CONFIG_SMP */ + diff --git a/arch/powerpc/kernel/dma-swiotlb.c b/arch/powerpc/kernel/dma-swiotlb.c index 02f724f3675..4295e0b94b2 100644 --- a/arch/powerpc/kernel/dma-swiotlb.c +++ b/arch/powerpc/kernel/dma-swiotlb.c @@ -82,17 +82,9 @@ static struct notifier_block ppc_swiotlb_plat_bus_notifier = { .priority = 0, }; -static struct notifier_block ppc_swiotlb_of_bus_notifier = { - .notifier_call = ppc_swiotlb_bus_notify, - .priority = 0, -}; - int __init swiotlb_setup_bus_notifier(void) { bus_register_notifier(&platform_bus_type, &ppc_swiotlb_plat_bus_notifier); - bus_register_notifier(&of_platform_bus_type, - &ppc_swiotlb_of_bus_notifier); - return 0; } diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index 24dcc0ecf24..5c43063d250 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -191,6 +191,12 @@ exc_##n##_bad_stack: \ sth r1,PACA_TRAP_SAVE(r13); /* store trap */ \ b bad_stack_book3e; /* bad stack error */ +/* WARNING: If you change the layout of this stub, make sure you chcek + * the debug exception handler which handles single stepping + * into exceptions from userspace, and the MM code in + * arch/powerpc/mm/tlb_nohash.c which patches the branch here + * and would need to be updated if that branch is moved + */ #define EXCEPTION_STUB(loc, label) \ . = interrupt_base_book3e + loc; \ nop; /* To make debug interrupts happy */ \ @@ -204,11 +210,30 @@ exc_##n##_bad_stack: \ lis r,TSR_FIS@h; \ mtspr SPRN_TSR,r +/* Used by asynchronous interrupt that may happen in the idle loop. + * + * This check if the thread was in the idle loop, and if yes, returns + * to the caller rather than the PC. This is to avoid a race if + * interrupts happen before the wait instruction. + */ +#define CHECK_NAPPING() \ + clrrdi r11,r1,THREAD_SHIFT; \ + ld r10,TI_LOCAL_FLAGS(r11); \ + andi. r9,r10,_TLF_NAPPING; \ + beq+ 1f; \ + ld r8,_LINK(r1); \ + rlwinm r7,r10,0,~_TLF_NAPPING; \ + std r8,_NIP(r1); \ + std r7,TI_LOCAL_FLAGS(r11); \ +1: + + #define MASKABLE_EXCEPTION(trapnum, label, hdlr, ack) \ START_EXCEPTION(label); \ NORMAL_EXCEPTION_PROLOG(trapnum, PROLOG_ADDITION_MASKABLE) \ EXCEPTION_COMMON(trapnum, PACA_EXGEN, INTS_DISABLE_ALL) \ ack(r8); \ + CHECK_NAPPING(); \ addi r3,r1,STACK_FRAME_OVERHEAD; \ bl hdlr; \ b .ret_from_except_lite; @@ -246,11 +271,9 @@ interrupt_base_book3e: /* fake trap */ EXCEPTION_STUB(0x1a0, watchdog) /* 0x09f0 */ EXCEPTION_STUB(0x1c0, data_tlb_miss) EXCEPTION_STUB(0x1e0, instruction_tlb_miss) + EXCEPTION_STUB(0x280, doorbell) + EXCEPTION_STUB(0x2a0, doorbell_crit) -#if 0 - EXCEPTION_STUB(0x280, processor_doorbell) - EXCEPTION_STUB(0x220, processor_doorbell_crit) -#endif .globl interrupt_end_book3e interrupt_end_book3e: @@ -259,6 +282,7 @@ interrupt_end_book3e: CRIT_EXCEPTION_PROLOG(0x100, PROLOG_ADDITION_NONE) // EXCEPTION_COMMON(0x100, PACA_EXCRIT, INTS_DISABLE_ALL) // bl special_reg_save_crit +// CHECK_NAPPING(); // addi r3,r1,STACK_FRAME_OVERHEAD // bl .critical_exception // b ret_from_crit_except @@ -270,6 +294,7 @@ interrupt_end_book3e: // EXCEPTION_COMMON(0x200, PACA_EXMC, INTS_DISABLE_ALL) // bl special_reg_save_mc // addi r3,r1,STACK_FRAME_OVERHEAD +// CHECK_NAPPING(); // bl .machine_check_exception // b ret_from_mc_except b . @@ -340,6 +365,7 @@ interrupt_end_book3e: CRIT_EXCEPTION_PROLOG(0x9f0, PROLOG_ADDITION_NONE) // EXCEPTION_COMMON(0x9f0, PACA_EXCRIT, INTS_DISABLE_ALL) // bl special_reg_save_crit +// CHECK_NAPPING(); // addi r3,r1,STACK_FRAME_OVERHEAD // bl .unknown_exception // b ret_from_crit_except @@ -428,6 +454,20 @@ interrupt_end_book3e: kernel_dbg_exc: b . /* NYI */ +/* Doorbell interrupt */ + MASKABLE_EXCEPTION(0x2070, doorbell, .doorbell_exception, ACK_NONE) + +/* Doorbell critical Interrupt */ + START_EXCEPTION(doorbell_crit); + CRIT_EXCEPTION_PROLOG(0x2080, PROLOG_ADDITION_NONE) +// EXCEPTION_COMMON(0x2080, PACA_EXCRIT, INTS_DISABLE_ALL) +// bl special_reg_save_crit +// CHECK_NAPPING(); +// addi r3,r1,STACK_FRAME_OVERHEAD +// bl .doorbell_critical_exception +// b ret_from_crit_except + b . + /* * An interrupt came in while soft-disabled; clear EE in SRR1, @@ -563,6 +603,8 @@ BAD_STACK_TRAMPOLINE(0xd00) BAD_STACK_TRAMPOLINE(0xe00) BAD_STACK_TRAMPOLINE(0xf00) BAD_STACK_TRAMPOLINE(0xf20) +BAD_STACK_TRAMPOLINE(0x2070) +BAD_STACK_TRAMPOLINE(0x2080) .globl bad_stack_book3e bad_stack_book3e: diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 3e423fbad6b..f53029a0155 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -828,6 +828,7 @@ END_FW_FTR_SECTION_IFCLR(FW_FEATURE_ISERIES) /* We have a data breakpoint exception - handle it */ handle_dabr_fault: + bl .save_nvgprs ld r4,_DAR(r1) ld r5,_DSISR(r1) addi r3,r1,STACK_FRAME_OVERHEAD diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c new file mode 100644 index 00000000000..5ecd0401cdb --- /dev/null +++ b/arch/powerpc/kernel/hw_breakpoint.c @@ -0,0 +1,364 @@ +/* + * HW_breakpoint: a unified kernel/user-space hardware breakpoint facility, + * using the CPU's debug registers. Derived from + * "arch/x86/kernel/hw_breakpoint.c" + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright 2010 IBM Corporation + * Author: K.Prasad <prasad@linux.vnet.ibm.com> + * + */ + +#include <linux/hw_breakpoint.h> +#include <linux/notifier.h> +#include <linux/kprobes.h> +#include <linux/percpu.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/sched.h> +#include <linux/init.h> +#include <linux/smp.h> + +#include <asm/hw_breakpoint.h> +#include <asm/processor.h> +#include <asm/sstep.h> +#include <asm/uaccess.h> + +/* + * Stores the breakpoints currently in use on each breakpoint address + * register for every cpu + */ +static DEFINE_PER_CPU(struct perf_event *, bp_per_reg); + +/* + * Returns total number of data or instruction breakpoints available. + */ +int hw_breakpoint_slots(int type) +{ + if (type == TYPE_DATA) + return HBP_NUM; + return 0; /* no instruction breakpoints available */ +} + +/* + * Install a perf counter breakpoint. + * + * We seek a free debug address register and use it for this + * breakpoint. + * + * Atomic: we hold the counter->ctx->lock and we only handle variables + * and registers local to this cpu. + */ +int arch_install_hw_breakpoint(struct perf_event *bp) +{ + struct arch_hw_breakpoint *info = counter_arch_bp(bp); + struct perf_event **slot = &__get_cpu_var(bp_per_reg); + + *slot = bp; + + /* + * Do not install DABR values if the instruction must be single-stepped. + * If so, DABR will be populated in single_step_dabr_instruction(). + */ + if (current->thread.last_hit_ubp != bp) + set_dabr(info->address | info->type | DABR_TRANSLATION); + + return 0; +} + +/* + * Uninstall the breakpoint contained in the given counter. + * + * First we search the debug address register it uses and then we disable + * it. + * + * Atomic: we hold the counter->ctx->lock and we only handle variables + * and registers local to this cpu. + */ +void arch_uninstall_hw_breakpoint(struct perf_event *bp) +{ + struct perf_event **slot = &__get_cpu_var(bp_per_reg); + + if (*slot != bp) { + WARN_ONCE(1, "Can't find the breakpoint"); + return; + } + + *slot = NULL; + set_dabr(0); +} + +/* + * Perform cleanup of arch-specific counters during unregistration + * of the perf-event + */ +void arch_unregister_hw_breakpoint(struct perf_event *bp) +{ + /* + * If the breakpoint is unregistered between a hw_breakpoint_handler() + * and the single_step_dabr_instruction(), then cleanup the breakpoint + * restoration variables to prevent dangling pointers. + */ + if (bp->ctx->task) + bp->ctx->task->thread.last_hit_ubp = NULL; +} + +/* + * Check for virtual address in kernel space. + */ +int arch_check_bp_in_kernelspace(struct perf_event *bp) +{ + struct arch_hw_breakpoint *info = counter_arch_bp(bp); + + return is_kernel_addr(info->address); +} + +int arch_bp_generic_fields(int type, int *gen_bp_type) +{ + switch (type) { + case DABR_DATA_READ: + *gen_bp_type = HW_BREAKPOINT_R; + break; + case DABR_DATA_WRITE: + *gen_bp_type = HW_BREAKPOINT_W; + break; + case (DABR_DATA_WRITE | DABR_DATA_READ): + *gen_bp_type = (HW_BREAKPOINT_W | HW_BREAKPOINT_R); + break; + default: + return -EINVAL; + } + return 0; +} + +/* + * Validate the arch-specific HW Breakpoint register settings + */ +int arch_validate_hwbkpt_settings(struct perf_event *bp) +{ + int ret = -EINVAL; + struct arch_hw_breakpoint *info = counter_arch_bp(bp); + + if (!bp) + return ret; + + switch (bp->attr.bp_type) { + case HW_BREAKPOINT_R: + info->type = DABR_DATA_READ; + break; + case HW_BREAKPOINT_W: + info->type = DABR_DATA_WRITE; + break; + case HW_BREAKPOINT_R | HW_BREAKPOINT_W: + info->type = (DABR_DATA_READ | DABR_DATA_WRITE); + break; + default: + return ret; + } + + info->address = bp->attr.bp_addr; + info->len = bp->attr.bp_len; + + /* + * Since breakpoint length can be a maximum of HW_BREAKPOINT_LEN(8) + * and breakpoint addresses are aligned to nearest double-word + * HW_BREAKPOINT_ALIGN by rounding off to the lower address, the + * 'symbolsize' should satisfy the check below. + */ + if (info->len > + (HW_BREAKPOINT_LEN - (info->address & HW_BREAKPOINT_ALIGN))) + return -EINVAL; + return 0; +} + +/* + * Restores the breakpoint on the debug registers. + * Invoke this function if it is known that the execution context is + * about to change to cause loss of MSR_SE settings. + */ +void thread_change_pc(struct task_struct *tsk, struct pt_regs *regs) +{ + struct arch_hw_breakpoint *info; + + if (likely(!tsk->thread.last_hit_ubp)) + return; + + info = counter_arch_bp(tsk->thread.last_hit_ubp); + regs->msr &= ~MSR_SE; + set_dabr(info->address | info->type | DABR_TRANSLATION); + tsk->thread.last_hit_ubp = NULL; +} + +/* + * Handle debug exception notifications. + */ +int __kprobes hw_breakpoint_handler(struct die_args *args) +{ + int rc = NOTIFY_STOP; + struct perf_event *bp; + struct pt_regs *regs = args->regs; + int stepped = 1; + struct arch_hw_breakpoint *info; + unsigned int instr; + unsigned long dar = regs->dar; + + /* Disable breakpoints during exception handling */ + set_dabr(0); + + /* + * The counter may be concurrently released but that can only + * occur from a call_rcu() path. We can then safely fetch + * the breakpoint, use its callback, touch its counter + * while we are in an rcu_read_lock() path. + */ + rcu_read_lock(); + + bp = __get_cpu_var(bp_per_reg); + if (!bp) + goto out; + info = counter_arch_bp(bp); + + /* + * Return early after invoking user-callback function without restoring + * DABR if the breakpoint is from ptrace which always operates in + * one-shot mode. The ptrace-ed process will receive the SIGTRAP signal + * generated in do_dabr(). + */ + if (bp->overflow_handler == ptrace_triggered) { + perf_bp_event(bp, regs); + rc = NOTIFY_DONE; + goto out; + } + + /* + * Verify if dar lies within the address range occupied by the symbol + * being watched to filter extraneous exceptions. If it doesn't, + * we still need to single-step the instruction, but we don't + * generate an event. + */ + info->extraneous_interrupt = !((bp->attr.bp_addr <= dar) && + (dar - bp->attr.bp_addr < bp->attr.bp_len)); + + /* Do not emulate user-space instructions, instead single-step them */ + if (user_mode(regs)) { + bp->ctx->task->thread.last_hit_ubp = bp; + regs->msr |= MSR_SE; + goto out; + } + + stepped = 0; + instr = 0; + if (!__get_user_inatomic(instr, (unsigned int *) regs->nip)) + stepped = emulate_step(regs, instr); + + /* + * emulate_step() could not execute it. We've failed in reliably + * handling the hw-breakpoint. Unregister it and throw a warning + * message to let the user know about it. + */ + if (!stepped) { + WARN(1, "Unable to handle hardware breakpoint. Breakpoint at " + "0x%lx will be disabled.", info->address); + perf_event_disable(bp); + goto out; + } + /* + * As a policy, the callback is invoked in a 'trigger-after-execute' + * fashion + */ + if (!info->extraneous_interrupt) + perf_bp_event(bp, regs); + + set_dabr(info->address | info->type | DABR_TRANSLATION); +out: + rcu_read_unlock(); + return rc; +} + +/* + * Handle single-step exceptions following a DABR hit. + */ +int __kprobes single_step_dabr_instruction(struct die_args *args) +{ + struct pt_regs *regs = args->regs; + struct perf_event *bp = NULL; + struct arch_hw_breakpoint *bp_info; + + bp = current->thread.last_hit_ubp; + /* + * Check if we are single-stepping as a result of a + * previous HW Breakpoint exception + */ + if (!bp) + return NOTIFY_DONE; + + bp_info = counter_arch_bp(bp); + + /* + * We shall invoke the user-defined callback function in the single + * stepping handler to confirm to 'trigger-after-execute' semantics + */ + if (!bp_info->extraneous_interrupt) + perf_bp_event(bp, regs); + + set_dabr(bp_info->address | bp_info->type | DABR_TRANSLATION); + current->thread.last_hit_ubp = NULL; + + /* + * If the process was being single-stepped by ptrace, let the + * other single-step actions occur (e.g. generate SIGTRAP). + */ + if (test_thread_flag(TIF_SINGLESTEP)) + return NOTIFY_DONE; + + return NOTIFY_STOP; +} + +/* + * Handle debug exception notifications. + */ +int __kprobes hw_breakpoint_exceptions_notify( + struct notifier_block *unused, unsigned long val, void *data) +{ + int ret = NOTIFY_DONE; + + switch (val) { + case DIE_DABR_MATCH: + ret = hw_breakpoint_handler(data); + break; + case DIE_SSTEP: + ret = single_step_dabr_instruction(data); + break; + } + + return ret; +} + +/* + * Release the user breakpoints used by ptrace + */ +void flush_ptrace_hw_breakpoint(struct task_struct *tsk) +{ + struct thread_struct *t = &tsk->thread; + + unregister_hw_breakpoint(t->ptrace_bps[0]); + t->ptrace_bps[0] = NULL; +} + +void hw_breakpoint_pmu_read(struct perf_event *bp) +{ + /* TODO */ +} diff --git a/arch/powerpc/kernel/ibmebus.c b/arch/powerpc/kernel/ibmebus.c index 21266abfbda..9b626cfffce 100644 --- a/arch/powerpc/kernel/ibmebus.c +++ b/arch/powerpc/kernel/ibmebus.c @@ -140,19 +140,19 @@ static struct dma_map_ops ibmebus_dma_ops = { static int ibmebus_match_path(struct device *dev, void *data) { - struct device_node *dn = to_of_device(dev)->dev.of_node; + struct device_node *dn = to_platform_device(dev)->dev.of_node; return (dn->full_name && (strcasecmp((char *)data, dn->full_name) == 0)); } static int ibmebus_match_node(struct device *dev, void *data) { - return to_of_device(dev)->dev.of_node == data; + return to_platform_device(dev)->dev.of_node == data; } static int ibmebus_create_device(struct device_node *dn) { - struct of_device *dev; + struct platform_device *dev; int ret; dev = of_device_alloc(dn, NULL, &ibmebus_bus_device); @@ -298,7 +298,7 @@ static ssize_t ibmebus_store_remove(struct bus_type *bus, if ((dev = bus_find_device(&ibmebus_bus_type, NULL, path, ibmebus_match_path))) { - of_device_unregister(to_of_device(dev)); + of_device_unregister(to_platform_device(dev)); kfree(path); return count; diff --git a/arch/powerpc/kernel/idle_book3e.S b/arch/powerpc/kernel/idle_book3e.S new file mode 100644 index 00000000000..16c002d6bdf --- /dev/null +++ b/arch/powerpc/kernel/idle_book3e.S @@ -0,0 +1,86 @@ +/* + * Copyright 2010 IBM Corp, Benjamin Herrenschmidt <benh@kernel.crashing.org> + * + * Generic idle routine for Book3E processors + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/threads.h> +#include <asm/reg.h> +#include <asm/ppc_asm.h> +#include <asm/asm-offsets.h> +#include <asm/ppc-opcode.h> +#include <asm/processor.h> +#include <asm/thread_info.h> + +/* 64-bit version only for now */ +#ifdef CONFIG_PPC64 + +_GLOBAL(book3e_idle) + /* Save LR for later */ + mflr r0 + std r0,16(r1) + + /* Hard disable interrupts */ + wrteei 0 + + /* Now check if an interrupt came in while we were soft disabled + * since we may otherwise lose it (doorbells etc...). We know + * that since PACAHARDIRQEN will have been cleared in that case. + */ + lbz r3,PACAHARDIRQEN(r13) + cmpwi cr0,r3,0 + beqlr + + /* Now we are going to mark ourselves as soft and hard enables in + * order to be able to take interrupts while asleep. We inform lockdep + * of that. We don't actually turn interrupts on just yet tho. + */ +#ifdef CONFIG_TRACE_IRQFLAGS + stdu r1,-128(r1) + bl .trace_hardirqs_on +#endif + li r0,1 + stb r0,PACASOFTIRQEN(r13) + stb r0,PACAHARDIRQEN(r13) + + /* Interrupts will make use return to LR, so get something we want + * in there + */ + bl 1f + + /* Hard disable interrupts again */ + wrteei 0 + + /* Mark them off again in the PACA as well */ + li r0,0 + stb r0,PACASOFTIRQEN(r13) + stb r0,PACAHARDIRQEN(r13) + + /* Tell lockdep about it */ +#ifdef CONFIG_TRACE_IRQFLAGS + bl .trace_hardirqs_off + addi r1,r1,128 +#endif + ld r0,16(r1) + mtlr r0 + blr + +1: /* Let's set the _TLF_NAPPING flag so interrupts make us return + * to the right spot + */ + clrrdi r11,r1,THREAD_SHIFT + ld r10,TI_LOCAL_FLAGS(r11) + ori r10,r10,_TLF_NAPPING + std r10,TI_LOCAL_FLAGS(r11) + + /* We can now re-enable hard interrupts and go to sleep */ + wrteei 1 +1: PPC_WAIT(0) + b 1b + +#endif /* CONFIG_PPC64 */ diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 77be3d058a6..d3ce67cf03b 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -53,6 +53,8 @@ #include <linux/bootmem.h> #include <linux/pci.h> #include <linux/debugfs.h> +#include <linux/of.h> +#include <linux/of_irq.h> #include <asm/uaccess.h> #include <asm/system.h> @@ -64,6 +66,8 @@ #include <asm/ptrace.h> #include <asm/machdep.h> #include <asm/udbg.h> +#include <asm/dbell.h> + #ifdef CONFIG_PPC64 #include <asm/paca.h> #include <asm/firmware.h> @@ -153,14 +157,28 @@ notrace void raw_local_irq_restore(unsigned long en) if (get_hard_enabled()) return; +#if defined(CONFIG_BOOKE) && defined(CONFIG_SMP) + /* Check for pending doorbell interrupts and resend to ourself */ + doorbell_check_self(); +#endif + /* * Need to hard-enable interrupts here. Since currently disabled, * no need to take further asm precautions against preemption; but * use local_paca instead of get_paca() to avoid preemption checking. */ local_paca->hard_enabled = en; + +#ifndef CONFIG_BOOKE + /* On server, re-trigger the decrementer if it went negative since + * some processors only trigger on edge transitions of the sign bit. + * + * BookE has a level sensitive decrementer (latches in TSR) so we + * don't need that + */ if ((int)mfspr(SPRN_DEC) < 0) mtspr(SPRN_DEC, 1); +#endif /* CONFIG_BOOKE */ /* * Force the delivery of pending soft-disabled interrupts on PS3. @@ -804,18 +822,6 @@ unsigned int irq_create_of_mapping(struct device_node *controller, } EXPORT_SYMBOL_GPL(irq_create_of_mapping); -unsigned int irq_of_parse_and_map(struct device_node *dev, int index) -{ - struct of_irq oirq; - - if (of_irq_map_one(dev, index, &oirq)) - return NO_IRQ; - - return irq_create_of_mapping(oirq.controller, oirq.specifier, - oirq.size); -} -EXPORT_SYMBOL_GPL(irq_of_parse_and_map); - void irq_dispose_mapping(unsigned int virq) { struct irq_host *host; diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c index 82a7b228c81..7f61a3ac787 100644 --- a/arch/powerpc/kernel/kgdb.c +++ b/arch/powerpc/kernel/kgdb.c @@ -129,7 +129,7 @@ static int kgdb_handle_breakpoint(struct pt_regs *regs) return 0; if (*(u32 *) (regs->nip) == *(u32 *) (&arch_kgdb_ops.gdb_bpt_instr)) - regs->nip += 4; + regs->nip += BREAK_INSTR_SIZE; return 1; } diff --git a/arch/powerpc/kernel/legacy_serial.c b/arch/powerpc/kernel/legacy_serial.c index 035ada5443e..c1fd0f9658f 100644 --- a/arch/powerpc/kernel/legacy_serial.c +++ b/arch/powerpc/kernel/legacy_serial.c @@ -4,6 +4,7 @@ #include <linux/serial_core.h> #include <linux/console.h> #include <linux/pci.h> +#include <linux/of_address.h> #include <linux/of_device.h> #include <asm/io.h> #include <asm/mmu.h> diff --git a/arch/powerpc/kernel/machine_kexec.c b/arch/powerpc/kernel/machine_kexec.c index 89f005116aa..dd6c141f166 100644 --- a/arch/powerpc/kernel/machine_kexec.c +++ b/arch/powerpc/kernel/machine_kexec.c @@ -45,6 +45,18 @@ void machine_kexec_cleanup(struct kimage *image) ppc_md.machine_kexec_cleanup(image); } +void arch_crash_save_vmcoreinfo(void) +{ + +#ifdef CONFIG_NEED_MULTIPLE_NODES + VMCOREINFO_SYMBOL(node_data); + VMCOREINFO_LENGTH(node_data, MAX_NUMNODES); +#endif +#ifndef CONFIG_NEED_MULTIPLE_NODES + VMCOREINFO_SYMBOL(contig_page_data); +#endif +} + /* * Do not allocate memory (or fail in any way) in machine_kexec(). * We are past the point of no return, committed to rebooting now. @@ -144,24 +156,24 @@ int overlaps_crashkernel(unsigned long start, unsigned long size) } /* Values we need to export to the second kernel via the device tree. */ -static unsigned long kernel_end; -static unsigned long crashk_size; +static phys_addr_t kernel_end; +static phys_addr_t crashk_size; static struct property kernel_end_prop = { .name = "linux,kernel-end", - .length = sizeof(unsigned long), + .length = sizeof(phys_addr_t), .value = &kernel_end, }; static struct property crashk_base_prop = { .name = "linux,crashkernel-base", - .length = sizeof(unsigned long), + .length = sizeof(phys_addr_t), .value = &crashk_res.start, }; static struct property crashk_size_prop = { .name = "linux,crashkernel-size", - .length = sizeof(unsigned long), + .length = sizeof(phys_addr_t), .value = &crashk_size, }; diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c index ed31a29c4ff..583af70c4b1 100644 --- a/arch/powerpc/kernel/machine_kexec_64.c +++ b/arch/powerpc/kernel/machine_kexec_64.c @@ -15,6 +15,8 @@ #include <linux/thread_info.h> #include <linux/init_task.h> #include <linux/errno.h> +#include <linux/kernel.h> +#include <linux/cpu.h> #include <asm/page.h> #include <asm/current.h> @@ -25,6 +27,7 @@ #include <asm/sections.h> /* _end */ #include <asm/prom.h> #include <asm/smp.h> +#include <asm/hw_breakpoint.h> int default_machine_kexec_prepare(struct kimage *image) { @@ -165,6 +168,7 @@ static void kexec_smp_down(void *arg) while(kexec_all_irq_disabled == 0) cpu_relax(); mb(); /* make sure all irqs are disabled before this */ + hw_breakpoint_disable(); /* * Now every CPU has IRQs off, we can clear out any pending * IPIs and be sure that no more will come in after this. @@ -180,8 +184,22 @@ static void kexec_prepare_cpus_wait(int wait_state) { int my_cpu, i, notified=-1; + hw_breakpoint_disable(); my_cpu = get_cpu(); - /* Make sure each CPU has atleast made it to the state we need */ + /* Make sure each CPU has at least made it to the state we need. + * + * FIXME: There is a (slim) chance of a problem if not all of the CPUs + * are correctly onlined. If somehow we start a CPU on boot with RTAS + * start-cpu, but somehow that CPU doesn't write callin_cpu_map[] in + * time, the boot CPU will timeout. If it does eventually execute + * stuff, the secondary will start up (paca[].cpu_start was written) and + * get into a peculiar state. If the platform supports + * smp_ops->take_timebase(), the secondary CPU will probably be spinning + * in there. If not (i.e. pseries), the secondary will continue on and + * try to online itself/idle/etc. If it survives that, we need to find + * these possible-but-not-online-but-should-be CPUs and chaperone them + * into kexec_smp_wait(). + */ for_each_online_cpu(i) { if (i == my_cpu) continue; @@ -189,9 +207,9 @@ static void kexec_prepare_cpus_wait(int wait_state) while (paca[i].kexec_state < wait_state) { barrier(); if (i != notified) { - printk( "kexec: waiting for cpu %d (physical" - " %d) to enter %i state\n", - i, paca[i].hw_cpu_id, wait_state); + printk(KERN_INFO "kexec: waiting for cpu %d " + "(physical %d) to enter %i state\n", + i, paca[i].hw_cpu_id, wait_state); notified = i; } } @@ -199,9 +217,32 @@ static void kexec_prepare_cpus_wait(int wait_state) mb(); } -static void kexec_prepare_cpus(void) +/* + * We need to make sure each present CPU is online. The next kernel will scan + * the device tree and assume primary threads are online and query secondary + * threads via RTAS to online them if required. If we don't online primary + * threads, they will be stuck. However, we also online secondary threads as we + * may be using 'cede offline'. In this case RTAS doesn't see the secondary + * threads as offline -- and again, these CPUs will be stuck. + * + * So, we online all CPUs that should be running, including secondary threads. + */ +static void wake_offline_cpus(void) { + int cpu = 0; + for_each_present_cpu(cpu) { + if (!cpu_online(cpu)) { + printk(KERN_INFO "kexec: Waking offline cpu %d.\n", + cpu); + cpu_up(cpu); + } + } +} + +static void kexec_prepare_cpus(void) +{ + wake_offline_cpus(); smp_call_function(kexec_smp_down, NULL, /* wait */0); local_irq_disable(); mb(); /* make sure IRQs are disabled before we say they are */ @@ -215,7 +256,10 @@ static void kexec_prepare_cpus(void) if (ppc_md.kexec_cpu_down) ppc_md.kexec_cpu_down(0, 0); - /* Before removing MMU mapings make sure all CPUs have entered real mode */ + /* + * Before removing MMU mappings make sure all CPUs have entered real + * mode: + */ kexec_prepare_cpus_wait(KEXEC_STATE_REAL_MODE); put_cpu(); @@ -257,6 +301,12 @@ static void kexec_prepare_cpus(void) static union thread_union kexec_stack __init_task_data = { }; +/* + * For similar reasons to the stack above, the kexecing CPU needs to be on a + * static PACA; we switch to kexec_paca. + */ +struct paca_struct kexec_paca; + /* Our assembly helper, in kexec_stub.S */ extern NORET_TYPE void kexec_sequence(void *newstack, unsigned long start, void *image, void *control, @@ -278,12 +328,28 @@ void default_machine_kexec(struct kimage *image) if (crashing_cpu == -1) kexec_prepare_cpus(); + pr_debug("kexec: Starting switchover sequence.\n"); + /* switch to a staticly allocated stack. Based on irq stack code. * XXX: the task struct will likely be invalid once we do the copy! */ kexec_stack.thread_info.task = current_thread_info()->task; kexec_stack.thread_info.flags = 0; + /* We need a static PACA, too; copy this CPU's PACA over and switch to + * it. Also poison per_cpu_offset to catch anyone using non-static + * data. + */ + memcpy(&kexec_paca, get_paca(), sizeof(struct paca_struct)); + kexec_paca.data_offset = 0xedeaddeadeeeeeeeUL; + paca = (struct paca_struct *)RELOC_HIDE(&kexec_paca, 0) - + kexec_paca.paca_index; + setup_paca(&kexec_paca); + + /* XXX: If anyone does 'dynamic lppacas' this will also need to be + * switched to a static version! + */ + /* Some things are best done in assembly. Finding globals with * a toc is easier in C, so pass in what we can. */ diff --git a/arch/powerpc/kernel/of_device.c b/arch/powerpc/kernel/of_device.c deleted file mode 100644 index df78e0236a0..00000000000 --- a/arch/powerpc/kernel/of_device.c +++ /dev/null @@ -1,133 +0,0 @@ -#include <linux/string.h> -#include <linux/kernel.h> -#include <linux/of.h> -#include <linux/init.h> -#include <linux/module.h> -#include <linux/mod_devicetable.h> -#include <linux/slab.h> -#include <linux/of_device.h> - -#include <asm/errno.h> -#include <asm/dcr.h> - -static void of_device_make_bus_id(struct of_device *dev) -{ - static atomic_t bus_no_reg_magic; - struct device_node *node = dev->dev.of_node; - const u32 *reg; - u64 addr; - int magic; - - /* - * If it's a DCR based device, use 'd' for native DCRs - * and 'D' for MMIO DCRs. - */ -#ifdef CONFIG_PPC_DCR - reg = of_get_property(node, "dcr-reg", NULL); - if (reg) { -#ifdef CONFIG_PPC_DCR_NATIVE - dev_set_name(&dev->dev, "d%x.%s", *reg, node->name); -#else /* CONFIG_PPC_DCR_NATIVE */ - addr = of_translate_dcr_address(node, *reg, NULL); - if (addr != OF_BAD_ADDR) { - dev_set_name(&dev->dev, "D%llx.%s", - (unsigned long long)addr, node->name); - return; - } -#endif /* !CONFIG_PPC_DCR_NATIVE */ - } -#endif /* CONFIG_PPC_DCR */ - - /* - * For MMIO, get the physical address - */ - reg = of_get_property(node, "reg", NULL); - if (reg) { - addr = of_translate_address(node, reg); - if (addr != OF_BAD_ADDR) { - dev_set_name(&dev->dev, "%llx.%s", - (unsigned long long)addr, node->name); - return; - } - } - - /* - * No BusID, use the node name and add a globally incremented - * counter (and pray...) - */ - magic = atomic_add_return(1, &bus_no_reg_magic); - dev_set_name(&dev->dev, "%s.%d", node->name, magic - 1); -} - -struct of_device *of_device_alloc(struct device_node *np, - const char *bus_id, - struct device *parent) -{ - struct of_device *dev; - - dev = kzalloc(sizeof(*dev), GFP_KERNEL); - if (!dev) - return NULL; - - dev->dev.of_node = of_node_get(np); - dev->dev.dma_mask = &dev->archdata.dma_mask; - dev->dev.parent = parent; - dev->dev.release = of_release_dev; - - if (bus_id) - dev_set_name(&dev->dev, "%s", bus_id); - else - of_device_make_bus_id(dev); - - return dev; -} -EXPORT_SYMBOL(of_device_alloc); - -int of_device_uevent(struct device *dev, struct kobj_uevent_env *env) -{ - struct of_device *ofdev; - const char *compat; - int seen = 0, cplen, sl; - - if (!dev) - return -ENODEV; - - ofdev = to_of_device(dev); - - if (add_uevent_var(env, "OF_NAME=%s", ofdev->dev.of_node->name)) - return -ENOMEM; - - if (add_uevent_var(env, "OF_TYPE=%s", ofdev->dev.of_node->type)) - return -ENOMEM; - - /* Since the compatible field can contain pretty much anything - * it's not really legal to split it out with commas. We split it - * up using a number of environment variables instead. */ - - compat = of_get_property(ofdev->dev.of_node, "compatible", &cplen); - while (compat && *compat && cplen > 0) { - if (add_uevent_var(env, "OF_COMPATIBLE_%d=%s", seen, compat)) - return -ENOMEM; - - sl = strlen (compat) + 1; - compat += sl; - cplen -= sl; - seen++; - } - - if (add_uevent_var(env, "OF_COMPATIBLE_N=%d", seen)) - return -ENOMEM; - - /* modalias is trickier, we add it in 2 steps */ - if (add_uevent_var(env, "MODALIAS=")) - return -ENOMEM; - sl = of_device_get_modalias(ofdev, &env->buf[env->buflen-1], - sizeof(env->buf) - env->buflen); - if (sl >= (sizeof(env->buf) - env->buflen)) - return -ENOMEM; - env->buflen += sl; - - return 0; -} -EXPORT_SYMBOL(of_device_uevent); -EXPORT_SYMBOL(of_device_get_modalias); diff --git a/arch/powerpc/kernel/of_platform.c b/arch/powerpc/kernel/of_platform.c index 487a98851ba..b2c363ef38a 100644 --- a/arch/powerpc/kernel/of_platform.c +++ b/arch/powerpc/kernel/of_platform.c @@ -28,207 +28,6 @@ #include <asm/ppc-pci.h> #include <asm/atomic.h> -/* - * The list of OF IDs below is used for matching bus types in the - * system whose devices are to be exposed as of_platform_devices. - * - * This is the default list valid for most platforms. This file provides - * functions who can take an explicit list if necessary though - * - * The search is always performed recursively looking for children of - * the provided device_node and recursively if such a children matches - * a bus type in the list - */ - -static const struct of_device_id of_default_bus_ids[] = { - { .type = "soc", }, - { .compatible = "soc", }, - { .type = "spider", }, - { .type = "axon", }, - { .type = "plb5", }, - { .type = "plb4", }, - { .type = "opb", }, - { .type = "ebc", }, - {}, -}; - -struct bus_type of_platform_bus_type = { - .uevent = of_device_uevent, -}; -EXPORT_SYMBOL(of_platform_bus_type); - -static int __init of_bus_driver_init(void) -{ - return of_bus_type_init(&of_platform_bus_type, "of_platform"); -} - -postcore_initcall(of_bus_driver_init); - -struct of_device* of_platform_device_create(struct device_node *np, - const char *bus_id, - struct device *parent) -{ - struct of_device *dev; - - dev = of_device_alloc(np, bus_id, parent); - if (!dev) - return NULL; - - dev->archdata.dma_mask = 0xffffffffUL; - dev->dev.coherent_dma_mask = DMA_BIT_MASK(32); - - dev->dev.bus = &of_platform_bus_type; - - /* We do not fill the DMA ops for platform devices by default. - * This is currently the responsibility of the platform code - * to do such, possibly using a device notifier - */ - - if (of_device_register(dev) != 0) { - of_device_free(dev); - return NULL; - } - - return dev; -} -EXPORT_SYMBOL(of_platform_device_create); - - - -/** - * of_platform_bus_create - Create an OF device for a bus node and all its - * children. Optionally recursively instanciate matching busses. - * @bus: device node of the bus to instanciate - * @matches: match table, NULL to use the default, OF_NO_DEEP_PROBE to - * disallow recursive creation of child busses - */ -static int of_platform_bus_create(const struct device_node *bus, - const struct of_device_id *matches, - struct device *parent) -{ - struct device_node *child; - struct of_device *dev; - int rc = 0; - - for_each_child_of_node(bus, child) { - pr_debug(" create child: %s\n", child->full_name); - dev = of_platform_device_create(child, NULL, parent); - if (dev == NULL) - rc = -ENOMEM; - else if (!of_match_node(matches, child)) - continue; - if (rc == 0) { - pr_debug(" and sub busses\n"); - rc = of_platform_bus_create(child, matches, &dev->dev); - } if (rc) { - of_node_put(child); - break; - } - } - return rc; -} - -/** - * of_platform_bus_probe - Probe the device-tree for platform busses - * @root: parent of the first level to probe or NULL for the root of the tree - * @matches: match table, NULL to use the default - * @parent: parent to hook devices from, NULL for toplevel - * - * Note that children of the provided root are not instanciated as devices - * unless the specified root itself matches the bus list and is not NULL. - */ - -int of_platform_bus_probe(struct device_node *root, - const struct of_device_id *matches, - struct device *parent) -{ - struct device_node *child; - struct of_device *dev; - int rc = 0; - - if (matches == NULL) - matches = of_default_bus_ids; - if (matches == OF_NO_DEEP_PROBE) - return -EINVAL; - if (root == NULL) - root = of_find_node_by_path("/"); - else - of_node_get(root); - - pr_debug("of_platform_bus_probe()\n"); - pr_debug(" starting at: %s\n", root->full_name); - - /* Do a self check of bus type, if there's a match, create - * children - */ - if (of_match_node(matches, root)) { - pr_debug(" root match, create all sub devices\n"); - dev = of_platform_device_create(root, NULL, parent); - if (dev == NULL) { - rc = -ENOMEM; - goto bail; - } - pr_debug(" create all sub busses\n"); - rc = of_platform_bus_create(root, matches, &dev->dev); - goto bail; - } - for_each_child_of_node(root, child) { - if (!of_match_node(matches, child)) - continue; - - pr_debug(" match: %s\n", child->full_name); - dev = of_platform_device_create(child, NULL, parent); - if (dev == NULL) - rc = -ENOMEM; - else - rc = of_platform_bus_create(child, matches, &dev->dev); - if (rc) { - of_node_put(child); - break; - } - } - bail: - of_node_put(root); - return rc; -} -EXPORT_SYMBOL(of_platform_bus_probe); - -static int of_dev_node_match(struct device *dev, void *data) -{ - return to_of_device(dev)->dev.of_node == data; -} - -struct of_device *of_find_device_by_node(struct device_node *np) -{ - struct device *dev; - - dev = bus_find_device(&of_platform_bus_type, - NULL, np, of_dev_node_match); - if (dev) - return to_of_device(dev); - return NULL; -} -EXPORT_SYMBOL(of_find_device_by_node); - -static int of_dev_phandle_match(struct device *dev, void *data) -{ - phandle *ph = data; - return to_of_device(dev)->dev.of_node->phandle == *ph; -} - -struct of_device *of_find_device_by_phandle(phandle ph) -{ - struct device *dev; - - dev = bus_find_device(&of_platform_bus_type, - NULL, &ph, of_dev_phandle_match); - if (dev) - return to_of_device(dev); - return NULL; -} -EXPORT_SYMBOL(of_find_device_by_phandle); - - #ifdef CONFIG_PPC_OF_PLATFORM_PCI /* The probing of PCI controllers from of_platform is currently @@ -237,7 +36,7 @@ EXPORT_SYMBOL(of_find_device_by_phandle); * lacking some bits needed here. */ -static int __devinit of_pci_phb_probe(struct of_device *dev, +static int __devinit of_pci_phb_probe(struct platform_device *dev, const struct of_device_id *match) { struct pci_controller *phb; diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index 139a773853f..d0a26f1770f 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -105,6 +105,16 @@ void __init initialise_paca(struct paca_struct *new_paca, int cpu) #endif /* CONFIG_PPC_STD_MMU_64 */ } +/* Put the paca pointer into r13 and SPRG_PACA */ +void setup_paca(struct paca_struct *new_paca) +{ + local_paca = new_paca; + mtspr(SPRN_SPRG_PACA, local_paca); +#ifdef CONFIG_PPC_BOOK3E + mtspr(SPRN_SPRG_TLB_EXFRAME, local_paca->extlb); +#endif +} + static int __initdata paca_size; void __init allocate_pacas(void) diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index 5b38f6ae2b2..9021c4ad4bb 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -21,6 +21,7 @@ #include <linux/string.h> #include <linux/init.h> #include <linux/bootmem.h> +#include <linux/of_address.h> #include <linux/mm.h> #include <linux/list.h> #include <linux/syscalls.h> diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 773424df828..551f6713ff4 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -37,6 +37,7 @@ #include <linux/kernel_stat.h> #include <linux/personality.h> #include <linux/random.h> +#include <linux/hw_breakpoint.h> #include <asm/pgtable.h> #include <asm/uaccess.h> @@ -462,14 +463,42 @@ struct task_struct *__switch_to(struct task_struct *prev, #ifdef CONFIG_PPC_ADV_DEBUG_REGS switch_booke_debug_regs(&new->thread); #else +/* + * For PPC_BOOK3S_64, we use the hw-breakpoint interfaces that would + * schedule DABR + */ +#ifndef CONFIG_HAVE_HW_BREAKPOINT if (unlikely(__get_cpu_var(current_dabr) != new->thread.dabr)) set_dabr(new->thread.dabr); +#endif /* CONFIG_HAVE_HW_BREAKPOINT */ #endif new_thread = &new->thread; old_thread = ¤t->thread; +#if defined(CONFIG_PPC_BOOK3E_64) + /* XXX Current Book3E code doesn't deal with kernel side DBCR0, + * we always hold the user values, so we set it now. + * + * However, we ensure the kernel MSR:DE is appropriately cleared too + * to avoid spurrious single step exceptions in the kernel. + * + * This will have to change to merge with the ppc32 code at some point, + * but I don't like much what ppc32 is doing today so there's some + * thinking needed there + */ + if ((new_thread->dbcr0 | old_thread->dbcr0) & DBCR0_IDM) { + u32 dbcr0; + + mtmsr(mfmsr() & ~MSR_DE); + isync(); + dbcr0 = mfspr(SPRN_DBCR0); + dbcr0 = (dbcr0 & DBCR0_EDM) | new_thread->dbcr0; + mtspr(SPRN_DBCR0, dbcr0); + } +#endif /* CONFIG_PPC64_BOOK3E */ + #ifdef CONFIG_PPC64 /* * Collect processor utilization data per process @@ -642,7 +671,11 @@ void flush_thread(void) { discard_lazy_cpu_state(); +#ifdef CONFIG_HAVE_HW_BREAKPOINTS + flush_ptrace_hw_breakpoint(current); +#else /* CONFIG_HAVE_HW_BREAKPOINTS */ set_debug_reg_defaults(¤t->thread); +#endif /* CONFIG_HAVE_HW_BREAKPOINTS */ } void @@ -660,6 +693,9 @@ void prepare_to_copy(struct task_struct *tsk) flush_altivec_to_thread(current); flush_vsx_to_thread(current); flush_spe_to_thread(current); +#ifdef CONFIG_HAVE_HW_BREAKPOINT + flush_ptrace_hw_breakpoint(tsk); +#endif /* CONFIG_HAVE_HW_BREAKPOINT */ } /* diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index 3b6f8ae9b8c..941ff4dbc56 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -311,6 +311,24 @@ static void __init prom_print_hex(unsigned long val) call_prom("write", 3, 1, _prom->stdout, buf, nibbles); } +/* max number of decimal digits in an unsigned long */ +#define UL_DIGITS 21 +static void __init prom_print_dec(unsigned long val) +{ + int i, size; + char buf[UL_DIGITS+1]; + struct prom_t *_prom = &RELOC(prom); + + for (i = UL_DIGITS-1; i >= 0; i--) { + buf[i] = (val % 10) + '0'; + val = val/10; + if (val == 0) + break; + } + /* shift stuff down */ + size = UL_DIGITS - i; + call_prom("write", 3, 1, _prom->stdout, buf+i, size); +} static void __init prom_printf(const char *format, ...) { @@ -350,6 +368,14 @@ static void __init prom_printf(const char *format, ...) v = va_arg(args, unsigned long); prom_print_hex(v); break; + case 'l': + ++q; + if (*q == 'u') { /* '%lu' */ + ++q; + v = va_arg(args, unsigned long); + prom_print_dec(v); + } + break; } } } @@ -835,11 +861,11 @@ static int __init prom_count_smt_threads(void) if (plen == PROM_ERROR) break; plen >>= 2; - prom_debug("Found 0x%x smt threads per core\n", (unsigned long)plen); + prom_debug("Found %lu smt threads per core\n", (unsigned long)plen); /* Sanity check */ if (plen < 1 || plen > 64) { - prom_printf("Threads per core 0x%x out of bounds, assuming 1\n", + prom_printf("Threads per core %lu out of bounds, assuming 1\n", (unsigned long)plen); return 1; } @@ -869,12 +895,12 @@ static void __init prom_send_capabilities(void) cores = (u32 *)PTRRELOC(&ibm_architecture_vec[IBM_ARCH_VEC_NRCORES_OFFSET]); if (*cores != NR_CPUS) { prom_printf("WARNING ! " - "ibm_architecture_vec structure inconsistent: 0x%x !\n", + "ibm_architecture_vec structure inconsistent: %lu!\n", *cores); } else { *cores = DIV_ROUND_UP(NR_CPUS, prom_count_smt_threads()); - prom_printf("Max number of cores passed to firmware: 0x%x\n", - (unsigned long)*cores); + prom_printf("Max number of cores passed to firmware: %lu (NR_CPUS = %lu)\n", + *cores, NR_CPUS); } /* try calling the ibm,client-architecture-support method */ @@ -1482,7 +1508,7 @@ static void __init prom_hold_cpus(void) reg = -1; prom_getprop(node, "reg", ®, sizeof(reg)); - prom_debug("cpu hw idx = 0x%x\n", reg); + prom_debug("cpu hw idx = %lu\n", reg); /* Init the acknowledge var which will be reset by * the secondary cpu when it awakens from its OF @@ -1492,7 +1518,7 @@ static void __init prom_hold_cpus(void) if (reg != _prom->cpu) { /* Primary Thread of non-boot cpu */ - prom_printf("starting cpu hw idx %x... ", reg); + prom_printf("starting cpu hw idx %lu... ", reg); call_prom("start-cpu", 3, 0, node, secondary_hold, reg); @@ -1507,7 +1533,7 @@ static void __init prom_hold_cpus(void) } #ifdef CONFIG_SMP else - prom_printf("boot cpu hw idx %x\n", reg); + prom_printf("boot cpu hw idx %lu\n", reg); #endif /* CONFIG_SMP */ } @@ -2420,7 +2446,7 @@ static void __init prom_find_boot_cpu(void) prom_getprop(cpu_pkg, "reg", &getprop_rval, sizeof(getprop_rval)); _prom->cpu = getprop_rval; - prom_debug("Booting CPU hw index = 0x%x\n", _prom->cpu); + prom_debug("Booting CPU hw index = %lu\n", _prom->cpu); } static void __init prom_check_initrd(unsigned long r3, unsigned long r4) diff --git a/arch/powerpc/kernel/prom_parse.c b/arch/powerpc/kernel/prom_parse.c index 8362620c9e6..88334af038e 100644 --- a/arch/powerpc/kernel/prom_parse.c +++ b/arch/powerpc/kernel/prom_parse.c @@ -6,232 +6,11 @@ #include <linux/module.h> #include <linux/ioport.h> #include <linux/etherdevice.h> +#include <linux/of_address.h> #include <asm/prom.h> #include <asm/pci-bridge.h> -#ifdef DEBUG -#define DBG(fmt...) do { printk(fmt); } while(0) -#else -#define DBG(fmt...) do { } while(0) -#endif - -#ifdef CONFIG_PPC64 -#define PRu64 "%lx" -#else -#define PRu64 "%llx" -#endif - -/* Max address size we deal with */ -#define OF_MAX_ADDR_CELLS 4 -#define OF_CHECK_COUNTS(na, ns) ((na) > 0 && (na) <= OF_MAX_ADDR_CELLS && \ - (ns) > 0) - -static struct of_bus *of_match_bus(struct device_node *np); -static int __of_address_to_resource(struct device_node *dev, - const u32 *addrp, u64 size, unsigned int flags, - struct resource *r); - - -/* Debug utility */ -#ifdef DEBUG -static void of_dump_addr(const char *s, const u32 *addr, int na) -{ - printk("%s", s); - while(na--) - printk(" %08x", *(addr++)); - printk("\n"); -} -#else -static void of_dump_addr(const char *s, const u32 *addr, int na) { } -#endif - - -/* Callbacks for bus specific translators */ -struct of_bus { - const char *name; - const char *addresses; - int (*match)(struct device_node *parent); - void (*count_cells)(struct device_node *child, - int *addrc, int *sizec); - u64 (*map)(u32 *addr, const u32 *range, - int na, int ns, int pna); - int (*translate)(u32 *addr, u64 offset, int na); - unsigned int (*get_flags)(const u32 *addr); -}; - - -/* - * Default translator (generic bus) - */ - -static void of_bus_default_count_cells(struct device_node *dev, - int *addrc, int *sizec) -{ - if (addrc) - *addrc = of_n_addr_cells(dev); - if (sizec) - *sizec = of_n_size_cells(dev); -} - -static u64 of_bus_default_map(u32 *addr, const u32 *range, - int na, int ns, int pna) -{ - u64 cp, s, da; - - cp = of_read_number(range, na); - s = of_read_number(range + na + pna, ns); - da = of_read_number(addr, na); - - DBG("OF: default map, cp="PRu64", s="PRu64", da="PRu64"\n", - cp, s, da); - - if (da < cp || da >= (cp + s)) - return OF_BAD_ADDR; - return da - cp; -} - -static int of_bus_default_translate(u32 *addr, u64 offset, int na) -{ - u64 a = of_read_number(addr, na); - memset(addr, 0, na * 4); - a += offset; - if (na > 1) - addr[na - 2] = a >> 32; - addr[na - 1] = a & 0xffffffffu; - - return 0; -} - -static unsigned int of_bus_default_get_flags(const u32 *addr) -{ - return IORESOURCE_MEM; -} - - #ifdef CONFIG_PCI -/* - * PCI bus specific translator - */ - -static int of_bus_pci_match(struct device_node *np) -{ - /* "vci" is for the /chaos bridge on 1st-gen PCI powermacs */ - return !strcmp(np->type, "pci") || !strcmp(np->type, "vci"); -} - -static void of_bus_pci_count_cells(struct device_node *np, - int *addrc, int *sizec) -{ - if (addrc) - *addrc = 3; - if (sizec) - *sizec = 2; -} - -static unsigned int of_bus_pci_get_flags(const u32 *addr) -{ - unsigned int flags = 0; - u32 w = addr[0]; - - switch((w >> 24) & 0x03) { - case 0x01: - flags |= IORESOURCE_IO; - break; - case 0x02: /* 32 bits */ - case 0x03: /* 64 bits */ - flags |= IORESOURCE_MEM; - break; - } - if (w & 0x40000000) - flags |= IORESOURCE_PREFETCH; - return flags; -} - -static u64 of_bus_pci_map(u32 *addr, const u32 *range, int na, int ns, int pna) -{ - u64 cp, s, da; - unsigned int af, rf; - - af = of_bus_pci_get_flags(addr); - rf = of_bus_pci_get_flags(range); - - /* Check address type match */ - if ((af ^ rf) & (IORESOURCE_MEM | IORESOURCE_IO)) - return OF_BAD_ADDR; - - /* Read address values, skipping high cell */ - cp = of_read_number(range + 1, na - 1); - s = of_read_number(range + na + pna, ns); - da = of_read_number(addr + 1, na - 1); - - DBG("OF: PCI map, cp="PRu64", s="PRu64", da="PRu64"\n", cp, s, da); - - if (da < cp || da >= (cp + s)) - return OF_BAD_ADDR; - return da - cp; -} - -static int of_bus_pci_translate(u32 *addr, u64 offset, int na) -{ - return of_bus_default_translate(addr + 1, offset, na - 1); -} - -const u32 *of_get_pci_address(struct device_node *dev, int bar_no, u64 *size, - unsigned int *flags) -{ - const u32 *prop; - unsigned int psize; - struct device_node *parent; - struct of_bus *bus; - int onesize, i, na, ns; - - /* Get parent & match bus type */ - parent = of_get_parent(dev); - if (parent == NULL) - return NULL; - bus = of_match_bus(parent); - if (strcmp(bus->name, "pci")) { - of_node_put(parent); - return NULL; - } - bus->count_cells(dev, &na, &ns); - of_node_put(parent); - if (!OF_CHECK_COUNTS(na, ns)) - return NULL; - - /* Get "reg" or "assigned-addresses" property */ - prop = of_get_property(dev, bus->addresses, &psize); - if (prop == NULL) - return NULL; - psize /= 4; - - onesize = na + ns; - for (i = 0; psize >= onesize; psize -= onesize, prop += onesize, i++) - if ((prop[0] & 0xff) == ((bar_no * 4) + PCI_BASE_ADDRESS_0)) { - if (size) - *size = of_read_number(prop + na, ns); - if (flags) - *flags = bus->get_flags(prop); - return prop; - } - return NULL; -} -EXPORT_SYMBOL(of_get_pci_address); - -int of_pci_address_to_resource(struct device_node *dev, int bar, - struct resource *r) -{ - const u32 *addrp; - u64 size; - unsigned int flags; - - addrp = of_get_pci_address(dev, bar, &size, &flags); - if (addrp == NULL) - return -EINVAL; - return __of_address_to_resource(dev, addrp, size, flags, r); -} -EXPORT_SYMBOL_GPL(of_pci_address_to_resource); - int of_irq_map_pci(struct pci_dev *pdev, struct of_irq *out_irq) { struct device_node *dn, *ppnode; @@ -313,345 +92,6 @@ int of_irq_map_pci(struct pci_dev *pdev, struct of_irq *out_irq) EXPORT_SYMBOL_GPL(of_irq_map_pci); #endif /* CONFIG_PCI */ -/* - * ISA bus specific translator - */ - -static int of_bus_isa_match(struct device_node *np) -{ - return !strcmp(np->name, "isa"); -} - -static void of_bus_isa_count_cells(struct device_node *child, - int *addrc, int *sizec) -{ - if (addrc) - *addrc = 2; - if (sizec) - *sizec = 1; -} - -static u64 of_bus_isa_map(u32 *addr, const u32 *range, int na, int ns, int pna) -{ - u64 cp, s, da; - - /* Check address type match */ - if ((addr[0] ^ range[0]) & 0x00000001) - return OF_BAD_ADDR; - - /* Read address values, skipping high cell */ - cp = of_read_number(range + 1, na - 1); - s = of_read_number(range + na + pna, ns); - da = of_read_number(addr + 1, na - 1); - - DBG("OF: ISA map, cp="PRu64", s="PRu64", da="PRu64"\n", cp, s, da); - - if (da < cp || da >= (cp + s)) - return OF_BAD_ADDR; - return da - cp; -} - -static int of_bus_isa_translate(u32 *addr, u64 offset, int na) -{ - return of_bus_default_translate(addr + 1, offset, na - 1); -} - -static unsigned int of_bus_isa_get_flags(const u32 *addr) -{ - unsigned int flags = 0; - u32 w = addr[0]; - - if (w & 1) - flags |= IORESOURCE_IO; - else - flags |= IORESOURCE_MEM; - return flags; -} - - -/* - * Array of bus specific translators - */ - -static struct of_bus of_busses[] = { -#ifdef CONFIG_PCI - /* PCI */ - { - .name = "pci", - .addresses = "assigned-addresses", - .match = of_bus_pci_match, - .count_cells = of_bus_pci_count_cells, - .map = of_bus_pci_map, - .translate = of_bus_pci_translate, - .get_flags = of_bus_pci_get_flags, - }, -#endif /* CONFIG_PCI */ - /* ISA */ - { - .name = "isa", - .addresses = "reg", - .match = of_bus_isa_match, - .count_cells = of_bus_isa_count_cells, - .map = of_bus_isa_map, - .translate = of_bus_isa_translate, - .get_flags = of_bus_isa_get_flags, - }, - /* Default */ - { - .name = "default", - .addresses = "reg", - .match = NULL, - .count_cells = of_bus_default_count_cells, - .map = of_bus_default_map, - .translate = of_bus_default_translate, - .get_flags = of_bus_default_get_flags, - }, -}; - -static struct of_bus *of_match_bus(struct device_node *np) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(of_busses); i ++) - if (!of_busses[i].match || of_busses[i].match(np)) - return &of_busses[i]; - BUG(); - return NULL; -} - -static int of_translate_one(struct device_node *parent, struct of_bus *bus, - struct of_bus *pbus, u32 *addr, - int na, int ns, int pna, const char *rprop) -{ - const u32 *ranges; - unsigned int rlen; - int rone; - u64 offset = OF_BAD_ADDR; - - /* Normally, an absence of a "ranges" property means we are - * crossing a non-translatable boundary, and thus the addresses - * below the current not cannot be converted to CPU physical ones. - * Unfortunately, while this is very clear in the spec, it's not - * what Apple understood, and they do have things like /uni-n or - * /ht nodes with no "ranges" property and a lot of perfectly - * useable mapped devices below them. Thus we treat the absence of - * "ranges" as equivalent to an empty "ranges" property which means - * a 1:1 translation at that level. It's up to the caller not to try - * to translate addresses that aren't supposed to be translated in - * the first place. --BenH. - */ - ranges = of_get_property(parent, rprop, &rlen); - if (ranges == NULL || rlen == 0) { - offset = of_read_number(addr, na); - memset(addr, 0, pna * 4); - DBG("OF: no ranges, 1:1 translation\n"); - goto finish; - } - - DBG("OF: walking ranges...\n"); - - /* Now walk through the ranges */ - rlen /= 4; - rone = na + pna + ns; - for (; rlen >= rone; rlen -= rone, ranges += rone) { - offset = bus->map(addr, ranges, na, ns, pna); - if (offset != OF_BAD_ADDR) - break; - } - if (offset == OF_BAD_ADDR) { - DBG("OF: not found !\n"); - return 1; - } - memcpy(addr, ranges + na, 4 * pna); - - finish: - of_dump_addr("OF: parent translation for:", addr, pna); - DBG("OF: with offset: "PRu64"\n", offset); - - /* Translate it into parent bus space */ - return pbus->translate(addr, offset, pna); -} - - -/* - * Translate an address from the device-tree into a CPU physical address, - * this walks up the tree and applies the various bus mappings on the - * way. - * - * Note: We consider that crossing any level with #size-cells == 0 to mean - * that translation is impossible (that is we are not dealing with a value - * that can be mapped to a cpu physical address). This is not really specified - * that way, but this is traditionally the way IBM at least do things - */ -u64 __of_translate_address(struct device_node *dev, const u32 *in_addr, - const char *rprop) -{ - struct device_node *parent = NULL; - struct of_bus *bus, *pbus; - u32 addr[OF_MAX_ADDR_CELLS]; - int na, ns, pna, pns; - u64 result = OF_BAD_ADDR; - - DBG("OF: ** translation for device %s **\n", dev->full_name); - - /* Increase refcount at current level */ - of_node_get(dev); - - /* Get parent & match bus type */ - parent = of_get_parent(dev); - if (parent == NULL) - goto bail; - bus = of_match_bus(parent); - - /* Cound address cells & copy address locally */ - bus->count_cells(dev, &na, &ns); - if (!OF_CHECK_COUNTS(na, ns)) { - printk(KERN_ERR "prom_parse: Bad cell count for %s\n", - dev->full_name); - goto bail; - } - memcpy(addr, in_addr, na * 4); - - DBG("OF: bus is %s (na=%d, ns=%d) on %s\n", - bus->name, na, ns, parent->full_name); - of_dump_addr("OF: translating address:", addr, na); - - /* Translate */ - for (;;) { - /* Switch to parent bus */ - of_node_put(dev); - dev = parent; - parent = of_get_parent(dev); - - /* If root, we have finished */ - if (parent == NULL) { - DBG("OF: reached root node\n"); - result = of_read_number(addr, na); - break; - } - - /* Get new parent bus and counts */ - pbus = of_match_bus(parent); - pbus->count_cells(dev, &pna, &pns); - if (!OF_CHECK_COUNTS(pna, pns)) { - printk(KERN_ERR "prom_parse: Bad cell count for %s\n", - dev->full_name); - break; - } - - DBG("OF: parent bus is %s (na=%d, ns=%d) on %s\n", - pbus->name, pna, pns, parent->full_name); - - /* Apply bus translation */ - if (of_translate_one(dev, bus, pbus, addr, na, ns, pna, rprop)) - break; - - /* Complete the move up one level */ - na = pna; - ns = pns; - bus = pbus; - - of_dump_addr("OF: one level translation:", addr, na); - } - bail: - of_node_put(parent); - of_node_put(dev); - - return result; -} - -u64 of_translate_address(struct device_node *dev, const u32 *in_addr) -{ - return __of_translate_address(dev, in_addr, "ranges"); -} -EXPORT_SYMBOL(of_translate_address); - -u64 of_translate_dma_address(struct device_node *dev, const u32 *in_addr) -{ - return __of_translate_address(dev, in_addr, "dma-ranges"); -} -EXPORT_SYMBOL(of_translate_dma_address); - -const u32 *of_get_address(struct device_node *dev, int index, u64 *size, - unsigned int *flags) -{ - const u32 *prop; - unsigned int psize; - struct device_node *parent; - struct of_bus *bus; - int onesize, i, na, ns; - - /* Get parent & match bus type */ - parent = of_get_parent(dev); - if (parent == NULL) - return NULL; - bus = of_match_bus(parent); - bus->count_cells(dev, &na, &ns); - of_node_put(parent); - if (!OF_CHECK_COUNTS(na, ns)) - return NULL; - - /* Get "reg" or "assigned-addresses" property */ - prop = of_get_property(dev, bus->addresses, &psize); - if (prop == NULL) - return NULL; - psize /= 4; - - onesize = na + ns; - for (i = 0; psize >= onesize; psize -= onesize, prop += onesize, i++) - if (i == index) { - if (size) - *size = of_read_number(prop + na, ns); - if (flags) - *flags = bus->get_flags(prop); - return prop; - } - return NULL; -} -EXPORT_SYMBOL(of_get_address); - -static int __of_address_to_resource(struct device_node *dev, const u32 *addrp, - u64 size, unsigned int flags, - struct resource *r) -{ - u64 taddr; - - if ((flags & (IORESOURCE_IO | IORESOURCE_MEM)) == 0) - return -EINVAL; - taddr = of_translate_address(dev, addrp); - if (taddr == OF_BAD_ADDR) - return -EINVAL; - memset(r, 0, sizeof(struct resource)); - if (flags & IORESOURCE_IO) { - unsigned long port; - port = pci_address_to_pio(taddr); - if (port == (unsigned long)-1) - return -EINVAL; - r->start = port; - r->end = port + size - 1; - } else { - r->start = taddr; - r->end = taddr + size - 1; - } - r->flags = flags; - r->name = dev->name; - return 0; -} - -int of_address_to_resource(struct device_node *dev, int index, - struct resource *r) -{ - const u32 *addrp; - u64 size; - unsigned int flags; - - addrp = of_get_address(dev, index, &size, &flags); - if (addrp == NULL) - return -EINVAL; - return __of_address_to_resource(dev, addrp, size, flags, r); -} -EXPORT_SYMBOL_GPL(of_address_to_resource); - void of_parse_dma_window(struct device_node *dn, const void *dma_window_prop, unsigned long *busno, unsigned long *phys, unsigned long *size) { @@ -678,342 +118,6 @@ void of_parse_dma_window(struct device_node *dn, const void *dma_window_prop, *size = of_read_number(dma_window, cells); } -/* - * Interrupt remapper - */ - -static unsigned int of_irq_workarounds; -static struct device_node *of_irq_dflt_pic; - -static struct device_node *of_irq_find_parent(struct device_node *child) -{ - struct device_node *p; - const phandle *parp; - - if (!of_node_get(child)) - return NULL; - - do { - parp = of_get_property(child, "interrupt-parent", NULL); - if (parp == NULL) - p = of_get_parent(child); - else { - if (of_irq_workarounds & OF_IMAP_NO_PHANDLE) - p = of_node_get(of_irq_dflt_pic); - else - p = of_find_node_by_phandle(*parp); - } - of_node_put(child); - child = p; - } while (p && of_get_property(p, "#interrupt-cells", NULL) == NULL); - - return p; -} - -/* This doesn't need to be called if you don't have any special workaround - * flags to pass - */ -void of_irq_map_init(unsigned int flags) -{ - of_irq_workarounds = flags; - - /* OldWorld, don't bother looking at other things */ - if (flags & OF_IMAP_OLDWORLD_MAC) - return; - - /* If we don't have phandles, let's try to locate a default interrupt - * controller (happens when booting with BootX). We do a first match - * here, hopefully, that only ever happens on machines with one - * controller. - */ - if (flags & OF_IMAP_NO_PHANDLE) { - struct device_node *np; - - for_each_node_with_property(np, "interrupt-controller") { - /* Skip /chosen/interrupt-controller */ - if (strcmp(np->name, "chosen") == 0) - continue; - /* It seems like at least one person on this planet wants - * to use BootX on a machine with an AppleKiwi controller - * which happens to pretend to be an interrupt - * controller too. - */ - if (strcmp(np->name, "AppleKiwi") == 0) - continue; - /* I think we found one ! */ - of_irq_dflt_pic = np; - break; - } - } - -} - -int of_irq_map_raw(struct device_node *parent, const u32 *intspec, u32 ointsize, - const u32 *addr, struct of_irq *out_irq) -{ - struct device_node *ipar, *tnode, *old = NULL, *newpar = NULL; - const u32 *tmp, *imap, *imask; - u32 intsize = 1, addrsize, newintsize = 0, newaddrsize = 0; - int imaplen, match, i; - - DBG("of_irq_map_raw: par=%s,intspec=[0x%08x 0x%08x...],ointsize=%d\n", - parent->full_name, intspec[0], intspec[1], ointsize); - - ipar = of_node_get(parent); - - /* First get the #interrupt-cells property of the current cursor - * that tells us how to interpret the passed-in intspec. If there - * is none, we are nice and just walk up the tree - */ - do { - tmp = of_get_property(ipar, "#interrupt-cells", NULL); - if (tmp != NULL) { - intsize = *tmp; - break; - } - tnode = ipar; - ipar = of_irq_find_parent(ipar); - of_node_put(tnode); - } while (ipar); - if (ipar == NULL) { - DBG(" -> no parent found !\n"); - goto fail; - } - - DBG("of_irq_map_raw: ipar=%s, size=%d\n", ipar->full_name, intsize); - - if (ointsize != intsize) - return -EINVAL; - - /* Look for this #address-cells. We have to implement the old linux - * trick of looking for the parent here as some device-trees rely on it - */ - old = of_node_get(ipar); - do { - tmp = of_get_property(old, "#address-cells", NULL); - tnode = of_get_parent(old); - of_node_put(old); - old = tnode; - } while(old && tmp == NULL); - of_node_put(old); - old = NULL; - addrsize = (tmp == NULL) ? 2 : *tmp; - - DBG(" -> addrsize=%d\n", addrsize); - - /* Now start the actual "proper" walk of the interrupt tree */ - while (ipar != NULL) { - /* Now check if cursor is an interrupt-controller and if it is - * then we are done - */ - if (of_get_property(ipar, "interrupt-controller", NULL) != - NULL) { - DBG(" -> got it !\n"); - memcpy(out_irq->specifier, intspec, - intsize * sizeof(u32)); - out_irq->size = intsize; - out_irq->controller = ipar; - of_node_put(old); - return 0; - } - - /* Now look for an interrupt-map */ - imap = of_get_property(ipar, "interrupt-map", &imaplen); - /* No interrupt map, check for an interrupt parent */ - if (imap == NULL) { - DBG(" -> no map, getting parent\n"); - newpar = of_irq_find_parent(ipar); - goto skiplevel; - } - imaplen /= sizeof(u32); - - /* Look for a mask */ - imask = of_get_property(ipar, "interrupt-map-mask", NULL); - - /* If we were passed no "reg" property and we attempt to parse - * an interrupt-map, then #address-cells must be 0. - * Fail if it's not. - */ - if (addr == NULL && addrsize != 0) { - DBG(" -> no reg passed in when needed !\n"); - goto fail; - } - - /* Parse interrupt-map */ - match = 0; - while (imaplen > (addrsize + intsize + 1) && !match) { - /* Compare specifiers */ - match = 1; - for (i = 0; i < addrsize && match; ++i) { - u32 mask = imask ? imask[i] : 0xffffffffu; - match = ((addr[i] ^ imap[i]) & mask) == 0; - } - for (; i < (addrsize + intsize) && match; ++i) { - u32 mask = imask ? imask[i] : 0xffffffffu; - match = - ((intspec[i-addrsize] ^ imap[i]) & mask) == 0; - } - imap += addrsize + intsize; - imaplen -= addrsize + intsize; - - DBG(" -> match=%d (imaplen=%d)\n", match, imaplen); - - /* Get the interrupt parent */ - if (of_irq_workarounds & OF_IMAP_NO_PHANDLE) - newpar = of_node_get(of_irq_dflt_pic); - else - newpar = of_find_node_by_phandle((phandle)*imap); - imap++; - --imaplen; - - /* Check if not found */ - if (newpar == NULL) { - DBG(" -> imap parent not found !\n"); - goto fail; - } - - /* Get #interrupt-cells and #address-cells of new - * parent - */ - tmp = of_get_property(newpar, "#interrupt-cells", NULL); - if (tmp == NULL) { - DBG(" -> parent lacks #interrupt-cells !\n"); - goto fail; - } - newintsize = *tmp; - tmp = of_get_property(newpar, "#address-cells", NULL); - newaddrsize = (tmp == NULL) ? 0 : *tmp; - - DBG(" -> newintsize=%d, newaddrsize=%d\n", - newintsize, newaddrsize); - - /* Check for malformed properties */ - if (imaplen < (newaddrsize + newintsize)) - goto fail; - - imap += newaddrsize + newintsize; - imaplen -= newaddrsize + newintsize; - - DBG(" -> imaplen=%d\n", imaplen); - } - if (!match) - goto fail; - - of_node_put(old); - old = of_node_get(newpar); - addrsize = newaddrsize; - intsize = newintsize; - intspec = imap - intsize; - addr = intspec - addrsize; - - skiplevel: - /* Iterate again with new parent */ - DBG(" -> new parent: %s\n", newpar ? newpar->full_name : "<>"); - of_node_put(ipar); - ipar = newpar; - newpar = NULL; - } - fail: - of_node_put(ipar); - of_node_put(old); - of_node_put(newpar); - - return -EINVAL; -} -EXPORT_SYMBOL_GPL(of_irq_map_raw); - -#if defined(CONFIG_PPC_PMAC) && defined(CONFIG_PPC32) -static int of_irq_map_oldworld(struct device_node *device, int index, - struct of_irq *out_irq) -{ - const u32 *ints = NULL; - int intlen; - - /* - * Old machines just have a list of interrupt numbers - * and no interrupt-controller nodes. We also have dodgy - * cases where the APPL,interrupts property is completely - * missing behind pci-pci bridges and we have to get it - * from the parent (the bridge itself, as apple just wired - * everything together on these) - */ - while (device) { - ints = of_get_property(device, "AAPL,interrupts", &intlen); - if (ints != NULL) - break; - device = device->parent; - if (device && strcmp(device->type, "pci") != 0) - break; - } - if (ints == NULL) - return -EINVAL; - intlen /= sizeof(u32); - - if (index >= intlen) - return -EINVAL; - - out_irq->controller = NULL; - out_irq->specifier[0] = ints[index]; - out_irq->size = 1; - - return 0; -} -#else /* defined(CONFIG_PPC_PMAC) && defined(CONFIG_PPC32) */ -static int of_irq_map_oldworld(struct device_node *device, int index, - struct of_irq *out_irq) -{ - return -EINVAL; -} -#endif /* !(defined(CONFIG_PPC_PMAC) && defined(CONFIG_PPC32)) */ - -int of_irq_map_one(struct device_node *device, int index, struct of_irq *out_irq) -{ - struct device_node *p; - const u32 *intspec, *tmp, *addr; - u32 intsize, intlen; - int res = -EINVAL; - - DBG("of_irq_map_one: dev=%s, index=%d\n", device->full_name, index); - - /* OldWorld mac stuff is "special", handle out of line */ - if (of_irq_workarounds & OF_IMAP_OLDWORLD_MAC) - return of_irq_map_oldworld(device, index, out_irq); - - /* Get the interrupts property */ - intspec = of_get_property(device, "interrupts", &intlen); - if (intspec == NULL) - return -EINVAL; - intlen /= sizeof(u32); - - /* Get the reg property (if any) */ - addr = of_get_property(device, "reg", NULL); - - /* Look for the interrupt parent. */ - p = of_irq_find_parent(device); - if (p == NULL) - return -EINVAL; - - /* Get size of interrupt specifier */ - tmp = of_get_property(p, "#interrupt-cells", NULL); - if (tmp == NULL) - goto out; - intsize = *tmp; - - DBG(" intsize=%d intlen=%d\n", intsize, intlen); - - /* Check index */ - if ((index + 1) * intsize > intlen) - goto out; - - /* Get new specifier and map it */ - res = of_irq_map_raw(p, intspec + index * intsize, intsize, - addr, out_irq); -out: - of_node_put(p); - return res; -} -EXPORT_SYMBOL_GPL(of_irq_map_one); - /** * Search the device tree for the best MAC address to use. 'mac-address' is * checked first, because that is supposed to contain to "most recent" MAC @@ -1051,29 +155,3 @@ const void *of_get_mac_address(struct device_node *np) return NULL; } EXPORT_SYMBOL(of_get_mac_address); - -int of_irq_to_resource(struct device_node *dev, int index, struct resource *r) -{ - int irq = irq_of_parse_and_map(dev, index); - - /* Only dereference the resource if both the - * resource and the irq are valid. */ - if (r && irq != NO_IRQ) { - r->start = r->end = irq; - r->flags = IORESOURCE_IRQ; - } - - return irq; -} -EXPORT_SYMBOL_GPL(of_irq_to_resource); - -void __iomem *of_iomap(struct device_node *np, int index) -{ - struct resource res; - - if (of_address_to_resource(np, index, &res)) - return NULL; - - return ioremap(res.start, 1 + res.end - res.start); -} -EXPORT_SYMBOL(of_iomap); diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index 7a0c0199ea2..11f3cd9c832 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -32,6 +32,8 @@ #ifdef CONFIG_PPC32 #include <linux/module.h> #endif +#include <linux/hw_breakpoint.h> +#include <linux/perf_event.h> #include <asm/uaccess.h> #include <asm/page.h> @@ -866,9 +868,34 @@ void user_disable_single_step(struct task_struct *task) clear_tsk_thread_flag(task, TIF_SINGLESTEP); } +#ifdef CONFIG_HAVE_HW_BREAKPOINT +void ptrace_triggered(struct perf_event *bp, int nmi, + struct perf_sample_data *data, struct pt_regs *regs) +{ + struct perf_event_attr attr; + + /* + * Disable the breakpoint request here since ptrace has defined a + * one-shot behaviour for breakpoint exceptions in PPC64. + * The SIGTRAP signal is generated automatically for us in do_dabr(). + * We don't have to do anything about that here + */ + attr = bp->attr; + attr.disabled = true; + modify_user_hw_breakpoint(bp, &attr); +} +#endif /* CONFIG_HAVE_HW_BREAKPOINT */ + int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, unsigned long data) { +#ifdef CONFIG_HAVE_HW_BREAKPOINT + int ret; + struct thread_struct *thread = &(task->thread); + struct perf_event *bp; + struct perf_event_attr attr; +#endif /* CONFIG_HAVE_HW_BREAKPOINT */ + /* For ppc64 we support one DABR and no IABR's at the moment (ppc64). * For embedded processors we support one DAC and no IAC's at the * moment. @@ -896,6 +923,43 @@ int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, /* Ensure breakpoint translation bit is set */ if (data && !(data & DABR_TRANSLATION)) return -EIO; +#ifdef CONFIG_HAVE_HW_BREAKPOINT + bp = thread->ptrace_bps[0]; + if ((!data) || !(data & (DABR_DATA_WRITE | DABR_DATA_READ))) { + if (bp) { + unregister_hw_breakpoint(bp); + thread->ptrace_bps[0] = NULL; + } + return 0; + } + if (bp) { + attr = bp->attr; + attr.bp_addr = data & ~HW_BREAKPOINT_ALIGN; + arch_bp_generic_fields(data & + (DABR_DATA_WRITE | DABR_DATA_READ), + &attr.bp_type); + ret = modify_user_hw_breakpoint(bp, &attr); + if (ret) + return ret; + thread->ptrace_bps[0] = bp; + thread->dabr = data; + return 0; + } + + /* Create a new breakpoint request if one doesn't exist already */ + hw_breakpoint_init(&attr); + attr.bp_addr = data & ~HW_BREAKPOINT_ALIGN; + arch_bp_generic_fields(data & (DABR_DATA_WRITE | DABR_DATA_READ), + &attr.bp_type); + + thread->ptrace_bps[0] = bp = register_user_hw_breakpoint(&attr, + ptrace_triggered, task); + if (IS_ERR(bp)) { + thread->ptrace_bps[0] = NULL; + return PTR_ERR(bp); + } + +#endif /* CONFIG_HAVE_HW_BREAKPOINT */ /* Move contents to the DABR register */ task->thread.dabr = data; diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index d0516dbee76..41048de3c6c 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -47,14 +47,6 @@ struct rtas_t rtas = { }; EXPORT_SYMBOL(rtas); -struct rtas_suspend_me_data { - atomic_t working; /* number of cpus accessing this struct */ - atomic_t done; - int token; /* ibm,suspend-me */ - int error; - struct completion *complete; /* wait on this until working == 0 */ -}; - DEFINE_SPINLOCK(rtas_data_buf_lock); EXPORT_SYMBOL(rtas_data_buf_lock); @@ -714,14 +706,53 @@ void rtas_os_term(char *str) static int ibm_suspend_me_token = RTAS_UNKNOWN_SERVICE; #ifdef CONFIG_PPC_PSERIES -static void rtas_percpu_suspend_me(void *info) +static int __rtas_suspend_last_cpu(struct rtas_suspend_me_data *data, int wake_when_done) +{ + u16 slb_size = mmu_slb_size; + int rc = H_MULTI_THREADS_ACTIVE; + int cpu; + + slb_set_size(SLB_MIN_SIZE); + printk(KERN_DEBUG "calling ibm,suspend-me on cpu %i\n", smp_processor_id()); + + while (rc == H_MULTI_THREADS_ACTIVE && !atomic_read(&data->done) && + !atomic_read(&data->error)) + rc = rtas_call(data->token, 0, 1, NULL); + + if (rc || atomic_read(&data->error)) { + printk(KERN_DEBUG "ibm,suspend-me returned %d\n", rc); + slb_set_size(slb_size); + } + + if (atomic_read(&data->error)) + rc = atomic_read(&data->error); + + atomic_set(&data->error, rc); + + if (wake_when_done) { + atomic_set(&data->done, 1); + + for_each_online_cpu(cpu) + plpar_hcall_norets(H_PROD, get_hard_smp_processor_id(cpu)); + } + + if (atomic_dec_return(&data->working) == 0) + complete(data->complete); + + return rc; +} + +int rtas_suspend_last_cpu(struct rtas_suspend_me_data *data) +{ + atomic_inc(&data->working); + return __rtas_suspend_last_cpu(data, 0); +} + +static int __rtas_suspend_cpu(struct rtas_suspend_me_data *data, int wake_when_done) { long rc = H_SUCCESS; unsigned long msr_save; - u16 slb_size = mmu_slb_size; int cpu; - struct rtas_suspend_me_data *data = - (struct rtas_suspend_me_data *)info; atomic_inc(&data->working); @@ -729,7 +760,7 @@ static void rtas_percpu_suspend_me(void *info) msr_save = mfmsr(); mtmsr(msr_save & ~(MSR_EE)); - while (rc == H_SUCCESS && !atomic_read(&data->done)) + while (rc == H_SUCCESS && !atomic_read(&data->done) && !atomic_read(&data->error)) rc = plpar_hcall_norets(H_JOIN); mtmsr(msr_save); @@ -741,33 +772,37 @@ static void rtas_percpu_suspend_me(void *info) /* All other cpus are in H_JOIN, this cpu does * the suspend. */ - slb_set_size(SLB_MIN_SIZE); - printk(KERN_DEBUG "calling ibm,suspend-me on cpu %i\n", - smp_processor_id()); - data->error = rtas_call(data->token, 0, 1, NULL); - - if (data->error) { - printk(KERN_DEBUG "ibm,suspend-me returned %d\n", - data->error); - slb_set_size(slb_size); - } + return __rtas_suspend_last_cpu(data, wake_when_done); } else { printk(KERN_ERR "H_JOIN on cpu %i failed with rc = %ld\n", smp_processor_id(), rc); - data->error = rc; + atomic_set(&data->error, rc); } - atomic_set(&data->done, 1); + if (wake_when_done) { + atomic_set(&data->done, 1); - /* This cpu did the suspend or got an error; in either case, - * we need to prod all other other cpus out of join state. - * Extra prods are harmless. - */ - for_each_online_cpu(cpu) - plpar_hcall_norets(H_PROD, get_hard_smp_processor_id(cpu)); + /* This cpu did the suspend or got an error; in either case, + * we need to prod all other other cpus out of join state. + * Extra prods are harmless. + */ + for_each_online_cpu(cpu) + plpar_hcall_norets(H_PROD, get_hard_smp_processor_id(cpu)); + } out: if (atomic_dec_return(&data->working) == 0) complete(data->complete); + return rc; +} + +int rtas_suspend_cpu(struct rtas_suspend_me_data *data) +{ + return __rtas_suspend_cpu(data, 0); +} + +static void rtas_percpu_suspend_me(void *info) +{ + __rtas_suspend_cpu((struct rtas_suspend_me_data *)info, 1); } static int rtas_ibm_suspend_me(struct rtas_args *args) @@ -802,22 +837,22 @@ static int rtas_ibm_suspend_me(struct rtas_args *args) atomic_set(&data.working, 0); atomic_set(&data.done, 0); + atomic_set(&data.error, 0); data.token = rtas_token("ibm,suspend-me"); - data.error = 0; data.complete = &done; /* Call function on all CPUs. One of us will make the * rtas call */ if (on_each_cpu(rtas_percpu_suspend_me, &data, 0)) - data.error = -EINVAL; + atomic_set(&data.error, -EINVAL); wait_for_completion(&done); - if (data.error != 0) + if (atomic_read(&data.error) != 0) printk(KERN_ERR "Error doing global join\n"); - return data.error; + return atomic_read(&data.error); } #else /* CONFIG_PPC_PSERIES */ static int rtas_ibm_suspend_me(struct rtas_args *args) diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index b7e6c7e193a..15ade0d7bbb 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -94,6 +94,10 @@ struct screen_info screen_info = { .orig_video_points = 16 }; +/* Variables required to store legacy IO irq routing */ +int of_i8042_kbd_irq; +int of_i8042_aux_irq; + #ifdef __DO_IRQ_CANON /* XXX should go elsewhere eventually */ int ppc_do_canonicalize_irqs; @@ -575,6 +579,15 @@ int check_legacy_ioport(unsigned long base_port) np = of_find_compatible_node(NULL, NULL, "pnpPNP,f03"); if (np) { parent = of_get_parent(np); + + of_i8042_kbd_irq = irq_of_parse_and_map(parent, 0); + if (!of_i8042_kbd_irq) + of_i8042_kbd_irq = 1; + + of_i8042_aux_irq = irq_of_parse_and_map(parent, 1); + if (!of_i8042_aux_irq) + of_i8042_aux_irq = 12; + of_node_put(np); np = parent; break; @@ -701,16 +714,9 @@ static struct notifier_block ppc_dflt_plat_bus_notifier = { .priority = INT_MAX, }; -static struct notifier_block ppc_dflt_of_bus_notifier = { - .notifier_call = ppc_dflt_bus_notify, - .priority = INT_MAX, -}; - static int __init setup_bus_notifier(void) { bus_register_notifier(&platform_bus_type, &ppc_dflt_plat_bus_notifier); - bus_register_notifier(&of_platform_bus_type, &ppc_dflt_of_bus_notifier); - return 0; } diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index d135f93cb0f..1bee4b68fa4 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -142,16 +142,6 @@ early_param("smt-enabled", early_smt_enabled); #define check_smt_enabled() #endif /* CONFIG_SMP */ -/* Put the paca pointer into r13 and SPRG_PACA */ -static void __init setup_paca(struct paca_struct *new_paca) -{ - local_paca = new_paca; - mtspr(SPRN_SPRG_PACA, local_paca); -#ifdef CONFIG_PPC_BOOK3E - mtspr(SPRN_SPRG_TLB_EXFRAME, local_paca->extlb); -#endif -} - /* * Early initialization entry point. This is called by head.S * with MMU translation disabled. We rely on the "feature" of @@ -600,6 +590,9 @@ static int pcpu_cpu_distance(unsigned int from, unsigned int to) return REMOTE_DISTANCE; } +unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; +EXPORT_SYMBOL(__per_cpu_offset); + void __init setup_per_cpu_areas(void) { const size_t dyn_size = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE; @@ -624,8 +617,10 @@ void __init setup_per_cpu_areas(void) panic("cannot initialize percpu area (err=%d)", rc); delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start; - for_each_possible_cpu(cpu) - paca[cpu].data_offset = delta + pcpu_unit_offsets[cpu]; + for_each_possible_cpu(cpu) { + __per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu]; + paca[cpu].data_offset = __per_cpu_offset[cpu]; + } } #endif diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c index a0afb555a7c..7109f5b1baa 100644 --- a/arch/powerpc/kernel/signal.c +++ b/arch/powerpc/kernel/signal.c @@ -11,6 +11,7 @@ #include <linux/tracehook.h> #include <linux/signal.h> +#include <asm/hw_breakpoint.h> #include <asm/uaccess.h> #include <asm/unistd.h> @@ -149,6 +150,8 @@ static int do_signal_pending(sigset_t *oldset, struct pt_regs *regs) if (current->thread.dabr) set_dabr(current->thread.dabr); #endif + /* Re-enable the breakpoints for the signal stack */ + thread_change_pc(current, regs); if (is32) { if (ka.sa.sa_flags & SA_SIGINFO) diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 5c196d1086d..a61b3ddd7bb 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -288,8 +288,6 @@ void __init smp_prepare_cpus(unsigned int max_cpus) max_cpus = NR_CPUS; else max_cpus = 1; - - smp_space_timers(max_cpus); for_each_possible_cpu(cpu) if (cpu != boot_cpuid) @@ -501,14 +499,6 @@ int __devinit start_secondary(void *unused) current->active_mm = &init_mm; smp_store_cpu_info(cpu); - -#if defined(CONFIG_BOOKE) || defined(CONFIG_40x) - /* Clear any pending timer interrupts */ - mtspr(SPRN_TSR, TSR_ENW | TSR_WIS | TSR_DIS | TSR_FIS); - - /* Enable decrementer interrupt */ - mtspr(SPRN_TCR, TCR_DIE); -#endif set_dec(tb_ticks_per_jiffy); preempt_disable(); cpu_callin_map[cpu] = 1; diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 0441bbdadbd..ccb8759c853 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -149,16 +149,6 @@ unsigned long tb_ticks_per_usec = 100; /* sane default */ EXPORT_SYMBOL(tb_ticks_per_usec); unsigned long tb_ticks_per_sec; EXPORT_SYMBOL(tb_ticks_per_sec); /* for cputime_t conversions */ -u64 tb_to_xs; -unsigned tb_to_us; - -#define TICKLEN_SCALE NTP_SCALE_SHIFT -static u64 last_tick_len; /* units are ns / 2^TICKLEN_SCALE */ -static u64 ticklen_to_xs; /* 0.64 fraction */ - -/* If last_tick_len corresponds to about 1/HZ seconds, then - last_tick_len << TICKLEN_SHIFT will be about 2^63. */ -#define TICKLEN_SHIFT (63 - 30 - TICKLEN_SCALE + SHIFT_HZ) DEFINE_SPINLOCK(rtc_lock); EXPORT_SYMBOL_GPL(rtc_lock); @@ -174,7 +164,6 @@ unsigned long ppc_proc_freq; EXPORT_SYMBOL(ppc_proc_freq); unsigned long ppc_tb_freq; -static u64 tb_last_jiffy __cacheline_aligned_in_smp; static DEFINE_PER_CPU(u64, last_jiffy); #ifdef CONFIG_VIRT_CPU_ACCOUNTING @@ -423,30 +412,6 @@ void udelay(unsigned long usecs) } EXPORT_SYMBOL(udelay); -static inline void update_gtod(u64 new_tb_stamp, u64 new_stamp_xsec, - u64 new_tb_to_xs) -{ - /* - * tb_update_count is used to allow the userspace gettimeofday code - * to assure itself that it sees a consistent view of the tb_to_xs and - * stamp_xsec variables. It reads the tb_update_count, then reads - * tb_to_xs and stamp_xsec and then reads tb_update_count again. If - * the two values of tb_update_count match and are even then the - * tb_to_xs and stamp_xsec values are consistent. If not, then it - * loops back and reads them again until this criteria is met. - * We expect the caller to have done the first increment of - * vdso_data->tb_update_count already. - */ - vdso_data->tb_orig_stamp = new_tb_stamp; - vdso_data->stamp_xsec = new_stamp_xsec; - vdso_data->tb_to_xs = new_tb_to_xs; - vdso_data->wtom_clock_sec = wall_to_monotonic.tv_sec; - vdso_data->wtom_clock_nsec = wall_to_monotonic.tv_nsec; - vdso_data->stamp_xtime = xtime; - smp_wmb(); - ++(vdso_data->tb_update_count); -} - #ifdef CONFIG_SMP unsigned long profile_pc(struct pt_regs *regs) { @@ -470,7 +435,6 @@ EXPORT_SYMBOL(profile_pc); static int __init iSeries_tb_recal(void) { - struct div_result divres; unsigned long titan, tb; /* Make sure we only run on iSeries */ @@ -501,10 +465,7 @@ static int __init iSeries_tb_recal(void) tb_ticks_per_jiffy = new_tb_ticks_per_jiffy; tb_ticks_per_sec = new_tb_ticks_per_sec; calc_cputime_factors(); - div128_by_32( XSEC_PER_SEC, 0, tb_ticks_per_sec, &divres ); - tb_to_xs = divres.result_low; vdso_data->tb_ticks_per_sec = tb_ticks_per_sec; - vdso_data->tb_to_xs = tb_to_xs; setup_cputime_one_jiffy(); } else { @@ -667,27 +628,9 @@ void timer_interrupt(struct pt_regs * regs) trace_timer_interrupt_exit(regs); } -void wakeup_decrementer(void) -{ - unsigned long ticks; - - /* - * The timebase gets saved on sleep and restored on wakeup, - * so all we need to do is to reset the decrementer. - */ - ticks = tb_ticks_since(__get_cpu_var(last_jiffy)); - if (ticks < tb_ticks_per_jiffy) - ticks = tb_ticks_per_jiffy - ticks; - else - ticks = 1; - set_dec(ticks); -} - #ifdef CONFIG_SUSPEND -void generic_suspend_disable_irqs(void) +static void generic_suspend_disable_irqs(void) { - preempt_disable(); - /* Disable the decrementer, so that it doesn't interfere * with suspending. */ @@ -697,12 +640,9 @@ void generic_suspend_disable_irqs(void) set_dec(0x7fffffff); } -void generic_suspend_enable_irqs(void) +static void generic_suspend_enable_irqs(void) { - wakeup_decrementer(); - local_irq_enable(); - preempt_enable(); } /* Overrides the weak version in kernel/power/main.c */ @@ -722,23 +662,6 @@ void arch_suspend_enable_irqs(void) } #endif -#ifdef CONFIG_SMP -void __init smp_space_timers(unsigned int max_cpus) -{ - int i; - u64 previous_tb = per_cpu(last_jiffy, boot_cpuid); - - /* make sure tb > per_cpu(last_jiffy, cpu) for all cpus always */ - previous_tb -= tb_ticks_per_jiffy; - - for_each_possible_cpu(i) { - if (i == boot_cpuid) - continue; - per_cpu(last_jiffy, i) = previous_tb; - } -} -#endif - /* * Scheduler clock - returns current time in nanosec units. * @@ -873,10 +796,37 @@ static cycle_t timebase_read(struct clocksource *cs) return (cycle_t)get_tb(); } +static inline void update_gtod(u64 new_tb_stamp, u64 new_stamp_xsec, + u64 new_tb_to_xs, struct timespec *now, + u32 frac_sec) +{ + /* + * tb_update_count is used to allow the userspace gettimeofday code + * to assure itself that it sees a consistent view of the tb_to_xs and + * stamp_xsec variables. It reads the tb_update_count, then reads + * tb_to_xs and stamp_xsec and then reads tb_update_count again. If + * the two values of tb_update_count match and are even then the + * tb_to_xs and stamp_xsec values are consistent. If not, then it + * loops back and reads them again until this criteria is met. + * We expect the caller to have done the first increment of + * vdso_data->tb_update_count already. + */ + vdso_data->tb_orig_stamp = new_tb_stamp; + vdso_data->stamp_xsec = new_stamp_xsec; + vdso_data->tb_to_xs = new_tb_to_xs; + vdso_data->wtom_clock_sec = wall_to_monotonic.tv_sec; + vdso_data->wtom_clock_nsec = wall_to_monotonic.tv_nsec; + vdso_data->stamp_xtime = *now; + vdso_data->stamp_sec_fraction = frac_sec; + smp_wmb(); + ++(vdso_data->tb_update_count); +} + void update_vsyscall(struct timespec *wall_time, struct clocksource *clock, u32 mult) { u64 t2x, stamp_xsec; + u32 frac_sec; if (clock != &clocksource_timebase) return; @@ -888,10 +838,14 @@ void update_vsyscall(struct timespec *wall_time, struct clocksource *clock, /* XXX this assumes clock->shift == 22 */ /* 4611686018 ~= 2^(20+64-22) / 1e9 */ t2x = (u64) mult * 4611686018ULL; - stamp_xsec = (u64) xtime.tv_nsec * XSEC_PER_SEC; + stamp_xsec = (u64) wall_time->tv_nsec * XSEC_PER_SEC; do_div(stamp_xsec, 1000000000); - stamp_xsec += (u64) xtime.tv_sec * XSEC_PER_SEC; - update_gtod(clock->cycle_last, stamp_xsec, t2x); + stamp_xsec += (u64) wall_time->tv_sec * XSEC_PER_SEC; + + BUG_ON(wall_time->tv_nsec >= NSEC_PER_SEC); + /* this is tv_nsec / 1e9 as a 0.32 fraction */ + frac_sec = ((u64) wall_time->tv_nsec * 18446744073ULL) >> 32; + update_gtod(clock->cycle_last, stamp_xsec, t2x, wall_time, frac_sec); } void update_vsyscall_tz(void) @@ -1007,15 +961,13 @@ void secondary_cpu_time_init(void) /* This function is only called on the boot processor */ void __init time_init(void) { - unsigned long flags; struct div_result res; - u64 scale, x; + u64 scale; unsigned shift; if (__USE_RTC()) { /* 601 processor: dec counts down by 128 every 128ns */ ppc_tb_freq = 1000000000; - tb_last_jiffy = get_rtcl(); } else { /* Normal PowerPC with timebase register */ ppc_md.calibrate_decr(); @@ -1023,50 +975,15 @@ void __init time_init(void) ppc_tb_freq / 1000000, ppc_tb_freq % 1000000); printk(KERN_DEBUG "time_init: processor frequency = %lu.%.6lu MHz\n", ppc_proc_freq / 1000000, ppc_proc_freq % 1000000); - tb_last_jiffy = get_tb(); } tb_ticks_per_jiffy = ppc_tb_freq / HZ; tb_ticks_per_sec = ppc_tb_freq; tb_ticks_per_usec = ppc_tb_freq / 1000000; - tb_to_us = mulhwu_scale_factor(ppc_tb_freq, 1000000); calc_cputime_factors(); setup_cputime_one_jiffy(); /* - * Calculate the length of each tick in ns. It will not be - * exactly 1e9/HZ unless ppc_tb_freq is divisible by HZ. - * We compute 1e9 * tb_ticks_per_jiffy / ppc_tb_freq, - * rounded up. - */ - x = (u64) NSEC_PER_SEC * tb_ticks_per_jiffy + ppc_tb_freq - 1; - do_div(x, ppc_tb_freq); - tick_nsec = x; - last_tick_len = x << TICKLEN_SCALE; - - /* - * Compute ticklen_to_xs, which is a factor which gets multiplied - * by (last_tick_len << TICKLEN_SHIFT) to get a tb_to_xs value. - * It is computed as: - * ticklen_to_xs = 2^N / (tb_ticks_per_jiffy * 1e9) - * where N = 64 + 20 - TICKLEN_SCALE - TICKLEN_SHIFT - * which turns out to be N = 51 - SHIFT_HZ. - * This gives the result as a 0.64 fixed-point fraction. - * That value is reduced by an offset amounting to 1 xsec per - * 2^31 timebase ticks to avoid problems with time going backwards - * by 1 xsec when we do timer_recalc_offset due to losing the - * fractional xsec. That offset is equal to ppc_tb_freq/2^51 - * since there are 2^20 xsec in a second. - */ - div128_by_32((1ULL << 51) - ppc_tb_freq, 0, - tb_ticks_per_jiffy << SHIFT_HZ, &res); - div128_by_32(res.result_high, res.result_low, NSEC_PER_SEC, &res); - ticklen_to_xs = res.result_low; - - /* Compute tb_to_xs from tick_nsec */ - tb_to_xs = mulhdu(last_tick_len << TICKLEN_SHIFT, ticklen_to_xs); - - /* * Compute scale factor for sched_clock. * The calibrate_decr() function has set tb_ticks_per_sec, * which is the timebase frequency. @@ -1087,21 +1004,14 @@ void __init time_init(void) /* Save the current timebase to pretty up CONFIG_PRINTK_TIME */ boot_tb = get_tb_or_rtc(); - write_seqlock_irqsave(&xtime_lock, flags); - /* If platform provided a timezone (pmac), we correct the time */ if (timezone_offset) { sys_tz.tz_minuteswest = -timezone_offset / 60; sys_tz.tz_dsttime = 0; } - vdso_data->tb_orig_stamp = tb_last_jiffy; vdso_data->tb_update_count = 0; vdso_data->tb_ticks_per_sec = tb_ticks_per_sec; - vdso_data->stamp_xsec = (u64) xtime.tv_sec * XSEC_PER_SEC; - vdso_data->tb_to_xs = tb_to_xs; - - write_sequnlock_irqrestore(&xtime_lock, flags); /* Start the decrementer on CPUs that have manual control * such as BookE @@ -1195,39 +1105,6 @@ void to_tm(int tim, struct rtc_time * tm) GregorianDay(tm); } -/* Auxiliary function to compute scaling factors */ -/* Actually the choice of a timebase running at 1/4 the of the bus - * frequency giving resolution of a few tens of nanoseconds is quite nice. - * It makes this computation very precise (27-28 bits typically) which - * is optimistic considering the stability of most processor clock - * oscillators and the precision with which the timebase frequency - * is measured but does not harm. - */ -unsigned mulhwu_scale_factor(unsigned inscale, unsigned outscale) -{ - unsigned mlt=0, tmp, err; - /* No concern for performance, it's done once: use a stupid - * but safe and compact method to find the multiplier. - */ - - for (tmp = 1U<<31; tmp != 0; tmp >>= 1) { - if (mulhwu(inscale, mlt|tmp) < outscale) - mlt |= tmp; - } - - /* We might still be off by 1 for the best approximation. - * A side effect of this is that if outscale is too large - * the returned value will be zero. - * Many corner cases have been checked and seem to work, - * some might have been forgotten in the test however. - */ - - err = inscale * (mlt+1); - if (err <= inscale/2) - mlt++; - return mlt; -} - /* * Divide a 128-bit dividend by a 32-bit divisor, leaving a 128 bit * result. diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 25fc33984c2..a45a63c3a0c 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -55,9 +55,6 @@ #endif #include <asm/kexec.h> #include <asm/ppc-opcode.h> -#ifdef CONFIG_FSL_BOOKE -#include <asm/dbell.h> -#endif #if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC) int (*__debugger)(struct pt_regs *regs) __read_mostly; @@ -688,7 +685,7 @@ void RunModeException(struct pt_regs *regs) void __kprobes single_step_exception(struct pt_regs *regs) { - regs->msr &= ~(MSR_SE | MSR_BE); /* Turn off 'trace' bits */ + clear_single_step(regs); if (notify_die(DIE_SSTEP, "single_step", regs, 5, 5, SIGTRAP) == NOTIFY_STOP) @@ -707,10 +704,8 @@ void __kprobes single_step_exception(struct pt_regs *regs) */ static void emulate_single_step(struct pt_regs *regs) { - if (single_stepping(regs)) { - clear_single_step(regs); - _exception(SIGTRAP, regs, TRAP_TRACE, 0); - } + if (single_stepping(regs)) + single_step_exception(regs); } static inline int __parse_fpscr(unsigned long fpscr) @@ -1344,24 +1339,6 @@ void vsx_assist_exception(struct pt_regs *regs) #endif /* CONFIG_VSX */ #ifdef CONFIG_FSL_BOOKE - -void doorbell_exception(struct pt_regs *regs) -{ -#ifdef CONFIG_SMP - int cpu = smp_processor_id(); - int msg; - - if (num_online_cpus() < 2) - return; - - for (msg = 0; msg < 4; msg++) - if (test_and_clear_bit(msg, &dbell_smp_message[cpu])) - smp_message_recv(msg); -#else - printk(KERN_WARNING "Received doorbell on non-smp system\n"); -#endif -} - void CacheLockingException(struct pt_regs *regs, unsigned long address, unsigned long error_code) { diff --git a/arch/powerpc/kernel/vdso32/gettimeofday.S b/arch/powerpc/kernel/vdso32/gettimeofday.S index ee038d4bf25..4ee09ee2e83 100644 --- a/arch/powerpc/kernel/vdso32/gettimeofday.S +++ b/arch/powerpc/kernel/vdso32/gettimeofday.S @@ -19,8 +19,10 @@ /* Offset for the low 32-bit part of a field of long type */ #ifdef CONFIG_PPC64 #define LOPART 4 +#define TSPEC_TV_SEC TSPC64_TV_SEC+LOPART #else #define LOPART 0 +#define TSPEC_TV_SEC TSPC32_TV_SEC #endif .text @@ -41,23 +43,11 @@ V_FUNCTION_BEGIN(__kernel_gettimeofday) mr r9, r3 /* datapage ptr in r9 */ cmplwi r10,0 /* check if tv is NULL */ beq 3f - bl __do_get_xsec@local /* get xsec from tb & kernel */ - bne- 2f /* out of line -> do syscall */ - - /* seconds are xsec >> 20 */ - rlwinm r5,r4,12,20,31 - rlwimi r5,r3,12,0,19 - stw r5,TVAL32_TV_SEC(r10) - - /* get remaining xsec and convert to usec. we scale - * up remaining xsec by 12 bits and get the top 32 bits - * of the multiplication - */ - rlwinm r5,r4,12,0,19 - lis r6,1000000@h - ori r6,r6,1000000@l - mulhwu r5,r5,r6 - stw r5,TVAL32_TV_USEC(r10) + lis r7,1000000@ha /* load up USEC_PER_SEC */ + addi r7,r7,1000000@l /* so we get microseconds in r4 */ + bl __do_get_tspec@local /* get sec/usec from tb & kernel */ + stw r3,TVAL32_TV_SEC(r10) + stw r4,TVAL32_TV_USEC(r10) 3: cmplwi r11,0 /* check if tz is NULL */ beq 1f @@ -70,14 +60,6 @@ V_FUNCTION_BEGIN(__kernel_gettimeofday) crclr cr0*4+so li r3,0 blr - -2: - mtlr r12 - mr r3,r10 - mr r4,r11 - li r0,__NR_gettimeofday - sc - blr .cfi_endproc V_FUNCTION_END(__kernel_gettimeofday) @@ -100,7 +82,8 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime) mr r11,r4 /* r11 saves tp */ bl __get_datapage@local /* get data page */ mr r9,r3 /* datapage ptr in r9 */ - + lis r7,NSEC_PER_SEC@h /* want nanoseconds */ + ori r7,r7,NSEC_PER_SEC@l 50: bl __do_get_tspec@local /* get sec/nsec from tb & kernel */ bne cr1,80f /* not monotonic -> all done */ @@ -198,83 +181,12 @@ V_FUNCTION_END(__kernel_clock_getres) /* - * This is the core of gettimeofday() & friends, it returns the xsec - * value in r3 & r4 and expects the datapage ptr (non clobbered) - * in r9. clobbers r0,r4,r5,r6,r7,r8. - * When returning, r8 contains the counter value that can be reused - * by the monotonic clock implementation - */ -__do_get_xsec: - .cfi_startproc - /* Check for update count & load values. We use the low - * order 32 bits of the update count - */ -1: lwz r8,(CFG_TB_UPDATE_COUNT+LOPART)(r9) - andi. r0,r8,1 /* pending update ? loop */ - bne- 1b - xor r0,r8,r8 /* create dependency */ - add r9,r9,r0 - - /* Load orig stamp (offset to TB) */ - lwz r5,CFG_TB_ORIG_STAMP(r9) - lwz r6,(CFG_TB_ORIG_STAMP+4)(r9) - - /* Get a stable TB value */ -2: mftbu r3 - mftbl r4 - mftbu r0 - cmpl cr0,r3,r0 - bne- 2b - - /* Substract tb orig stamp. If the high part is non-zero, we jump to - * the slow path which call the syscall. - * If it's ok, then we have our 32 bits tb_ticks value in r7 - */ - subfc r7,r6,r4 - subfe. r0,r5,r3 - bne- 3f - - /* Load scale factor & do multiplication */ - lwz r5,CFG_TB_TO_XS(r9) /* load values */ - lwz r6,(CFG_TB_TO_XS+4)(r9) - mulhwu r4,r7,r5 - mulhwu r6,r7,r6 - mullw r0,r7,r5 - addc r6,r6,r0 - - /* At this point, we have the scaled xsec value in r4 + XER:CA - * we load & add the stamp since epoch - */ - lwz r5,CFG_STAMP_XSEC(r9) - lwz r6,(CFG_STAMP_XSEC+4)(r9) - adde r4,r4,r6 - addze r3,r5 - - /* We now have our result in r3,r4. We create a fake dependency - * on that result and re-check the counter - */ - or r6,r4,r3 - xor r0,r6,r6 - add r9,r9,r0 - lwz r0,(CFG_TB_UPDATE_COUNT+LOPART)(r9) - cmpl cr0,r8,r0 /* check if updated */ - bne- 1b - - /* Warning ! The caller expects CR:EQ to be set to indicate a - * successful calculation (so it won't fallback to the syscall - * method). We have overriden that CR bit in the counter check, - * but fortunately, the loop exit condition _is_ CR:EQ set, so - * we can exit safely here. If you change this code, be careful - * of that side effect. - */ -3: blr - .cfi_endproc - -/* - * This is the core of clock_gettime(), it returns the current - * time in seconds and nanoseconds in r3 and r4. + * This is the core of clock_gettime() and gettimeofday(), + * it returns the current time in r3 (seconds) and r4. + * On entry, r7 gives the resolution of r4, either USEC_PER_SEC + * or NSEC_PER_SEC, giving r4 in microseconds or nanoseconds. * It expects the datapage ptr in r9 and doesn't clobber it. - * It clobbers r0, r5, r6, r10 and returns NSEC_PER_SEC in r7. + * It clobbers r0, r5 and r6. * On return, r8 contains the counter value that can be reused. * This clobbers cr0 but not any other cr field. */ @@ -297,70 +209,58 @@ __do_get_tspec: 2: mftbu r3 mftbl r4 mftbu r0 - cmpl cr0,r3,r0 + cmplw cr0,r3,r0 bne- 2b /* Subtract tb orig stamp and shift left 12 bits. */ - subfc r7,r6,r4 + subfc r4,r6,r4 subfe r0,r5,r3 slwi r0,r0,12 - rlwimi. r0,r7,12,20,31 - slwi r7,r7,12 + rlwimi. r0,r4,12,20,31 + slwi r4,r4,12 - /* Load scale factor & do multiplication */ + /* + * Load scale factor & do multiplication. + * We only use the high 32 bits of the tb_to_xs value. + * Even with a 1GHz timebase clock, the high 32 bits of + * tb_to_xs will be at least 4 million, so the error from + * ignoring the low 32 bits will be no more than 0.25ppm. + * The error will just make the clock run very very slightly + * slow until the next time the kernel updates the VDSO data, + * at which point the clock will catch up to the kernel's value, + * so there is no long-term error accumulation. + */ lwz r5,CFG_TB_TO_XS(r9) /* load values */ - lwz r6,(CFG_TB_TO_XS+4)(r9) - mulhwu r3,r7,r6 - mullw r10,r7,r5 - mulhwu r4,r7,r5 - addc r10,r3,r10 + mulhwu r4,r4,r5 li r3,0 beq+ 4f /* skip high part computation if 0 */ mulhwu r3,r0,r5 - mullw r7,r0,r5 - mulhwu r5,r0,r6 - mullw r6,r0,r6 - adde r4,r4,r7 - addze r3,r3 + mullw r5,r0,r5 addc r4,r4,r5 addze r3,r3 - addc r10,r10,r6 - -4: addze r4,r4 /* add in carry */ - lis r7,NSEC_PER_SEC@h - ori r7,r7,NSEC_PER_SEC@l - mulhwu r4,r4,r7 /* convert to nanoseconds */ - - /* At this point, we have seconds & nanoseconds since the xtime - * stamp in r3+CA and r4. Load & add the xtime stamp. +4: + /* At this point, we have seconds since the xtime stamp + * as a 32.32 fixed-point number in r3 and r4. + * Load & add the xtime stamp. */ -#ifdef CONFIG_PPC64 - lwz r5,STAMP_XTIME+TSPC64_TV_SEC+LOPART(r9) - lwz r6,STAMP_XTIME+TSPC64_TV_NSEC+LOPART(r9) -#else - lwz r5,STAMP_XTIME+TSPC32_TV_SEC(r9) - lwz r6,STAMP_XTIME+TSPC32_TV_NSEC(r9) -#endif - add r4,r4,r6 + lwz r5,STAMP_XTIME+TSPEC_TV_SEC(r9) + lwz r6,STAMP_SEC_FRAC(r9) + addc r4,r4,r6 adde r3,r3,r5 - /* We now have our result in r3,r4. We create a fake dependency - * on that result and re-check the counter + /* We create a fake dependency on the result in r3/r4 + * and re-check the counter */ or r6,r4,r3 xor r0,r6,r6 add r9,r9,r0 lwz r0,(CFG_TB_UPDATE_COUNT+LOPART)(r9) - cmpl cr0,r8,r0 /* check if updated */ + cmplw cr0,r8,r0 /* check if updated */ bne- 1b - /* check for nanosecond overflow and adjust if necessary */ - cmpw r4,r7 - bltlr /* all done if no overflow */ - subf r4,r7,r4 /* adjust if overflow */ - addi r3,r3,1 + mulhwu r4,r4,r7 /* convert to micro or nanoseconds */ blr .cfi_endproc diff --git a/arch/powerpc/kernel/vdso64/gettimeofday.S b/arch/powerpc/kernel/vdso64/gettimeofday.S index 262cd5857a5..e97a9a0dc4a 100644 --- a/arch/powerpc/kernel/vdso64/gettimeofday.S +++ b/arch/powerpc/kernel/vdso64/gettimeofday.S @@ -33,18 +33,11 @@ V_FUNCTION_BEGIN(__kernel_gettimeofday) bl V_LOCAL_FUNC(__get_datapage) /* get data page */ cmpldi r11,0 /* check if tv is NULL */ beq 2f - bl V_LOCAL_FUNC(__do_get_xsec) /* get xsec from tb & kernel */ - lis r7,15 /* r7 = 1000000 = USEC_PER_SEC */ - ori r7,r7,16960 - rldicl r5,r4,44,20 /* r5 = sec = xsec / XSEC_PER_SEC */ - rldicr r6,r5,20,43 /* r6 = sec * XSEC_PER_SEC */ - std r5,TVAL64_TV_SEC(r11) /* store sec in tv */ - subf r0,r6,r4 /* r0 = xsec = (xsec - r6) */ - mulld r0,r0,r7 /* usec = (xsec * USEC_PER_SEC) / - * XSEC_PER_SEC - */ - rldicl r0,r0,44,20 - std r0,TVAL64_TV_USEC(r11) /* store usec in tv */ + lis r7,1000000@ha /* load up USEC_PER_SEC */ + addi r7,r7,1000000@l + bl V_LOCAL_FUNC(__do_get_tspec) /* get sec/us from tb & kernel */ + std r4,TVAL64_TV_SEC(r11) /* store sec in tv */ + std r5,TVAL64_TV_USEC(r11) /* store usec in tv */ 2: cmpldi r10,0 /* check if tz is NULL */ beq 1f lwz r4,CFG_TZ_MINUTEWEST(r3)/* fill tz */ @@ -77,6 +70,8 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime) .cfi_register lr,r12 mr r11,r4 /* r11 saves tp */ bl V_LOCAL_FUNC(__get_datapage) /* get data page */ + lis r7,NSEC_PER_SEC@h /* want nanoseconds */ + ori r7,r7,NSEC_PER_SEC@l 50: bl V_LOCAL_FUNC(__do_get_tspec) /* get time from tb & kernel */ bne cr1,80f /* if not monotonic, all done */ @@ -171,49 +166,12 @@ V_FUNCTION_END(__kernel_clock_getres) /* - * This is the core of gettimeofday(), it returns the xsec - * value in r4 and expects the datapage ptr (non clobbered) - * in r3. clobbers r0,r4,r5,r6,r7,r8 - * When returning, r8 contains the counter value that can be reused - */ -V_FUNCTION_BEGIN(__do_get_xsec) - .cfi_startproc - /* check for update count & load values */ -1: ld r8,CFG_TB_UPDATE_COUNT(r3) - andi. r0,r8,1 /* pending update ? loop */ - bne- 1b - xor r0,r8,r8 /* create dependency */ - add r3,r3,r0 - - /* Get TB & offset it. We use the MFTB macro which will generate - * workaround code for Cell. - */ - MFTB(r7) - ld r9,CFG_TB_ORIG_STAMP(r3) - subf r7,r9,r7 - - /* Scale result */ - ld r5,CFG_TB_TO_XS(r3) - mulhdu r7,r7,r5 - - /* Add stamp since epoch */ - ld r6,CFG_STAMP_XSEC(r3) - add r4,r6,r7 - - xor r0,r4,r4 - add r3,r3,r0 - ld r0,CFG_TB_UPDATE_COUNT(r3) - cmpld cr0,r0,r8 /* check if updated */ - bne- 1b - blr - .cfi_endproc -V_FUNCTION_END(__do_get_xsec) - -/* - * This is the core of clock_gettime(), it returns the current - * time in seconds and nanoseconds in r4 and r5. + * This is the core of clock_gettime() and gettimeofday(), + * it returns the current time in r4 (seconds) and r5. + * On entry, r7 gives the resolution of r5, either USEC_PER_SEC + * or NSEC_PER_SEC, giving r5 in microseconds or nanoseconds. * It expects the datapage ptr in r3 and doesn't clobber it. - * It clobbers r0 and r6 and returns NSEC_PER_SEC in r7. + * It clobbers r0, r6 and r9. * On return, r8 contains the counter value that can be reused. * This clobbers cr0 but not any other cr field. */ @@ -229,18 +187,18 @@ V_FUNCTION_BEGIN(__do_get_tspec) /* Get TB & offset it. We use the MFTB macro which will generate * workaround code for Cell. */ - MFTB(r7) + MFTB(r6) ld r9,CFG_TB_ORIG_STAMP(r3) - subf r7,r9,r7 + subf r6,r9,r6 /* Scale result */ ld r5,CFG_TB_TO_XS(r3) - sldi r7,r7,12 /* compute time since stamp_xtime */ - mulhdu r6,r7,r5 /* in units of 2^-32 seconds */ + sldi r6,r6,12 /* compute time since stamp_xtime */ + mulhdu r6,r6,r5 /* in units of 2^-32 seconds */ /* Add stamp since epoch */ ld r4,STAMP_XTIME+TSPC64_TV_SEC(r3) - ld r5,STAMP_XTIME+TSPC64_TV_NSEC(r3) + lwz r5,STAMP_SEC_FRAC(r3) or r0,r4,r5 or r0,r0,r6 xor r0,r0,r0 @@ -250,17 +208,11 @@ V_FUNCTION_BEGIN(__do_get_tspec) bne- 1b /* reload if so */ /* convert to seconds & nanoseconds and add to stamp */ - lis r7,NSEC_PER_SEC@h - ori r7,r7,NSEC_PER_SEC@l - mulhwu r0,r6,r7 /* compute nanoseconds and */ + add r6,r6,r5 /* add on fractional seconds of xtime */ + mulhwu r5,r6,r7 /* compute micro or nanoseconds and */ srdi r6,r6,32 /* seconds since stamp_xtime */ - clrldi r0,r0,32 - add r5,r5,r0 /* add nanoseconds together */ - cmpd r5,r7 /* overflow? */ + clrldi r5,r5,32 add r4,r4,r6 - bltlr /* all done if no overflow */ - subf r5,r7,r5 /* if overflow, adjust */ - addi r4,r4,1 blr .cfi_endproc V_FUNCTION_END(__do_get_tspec) |