diff options
Diffstat (limited to 'arch/powerpc/kernel')
55 files changed, 2004 insertions, 1302 deletions
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index c002b041021..877326320e7 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -98,11 +98,16 @@ obj64-$(CONFIG_AUDIT) += compat_audit.o obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o -obj-$(CONFIG_PPC_PERF_CTRS) += perf_event.o perf_callchain.o +obj-$(CONFIG_PERF_EVENTS) += perf_callchain.o + +obj-$(CONFIG_PPC_PERF_CTRS) += perf_event.o obj64-$(CONFIG_PPC_PERF_CTRS) += power4-pmu.o ppc970-pmu.o power5-pmu.o \ power5+-pmu.o power6-pmu.o power7-pmu.o obj32-$(CONFIG_PPC_PERF_CTRS) += mpc7450-pmu.o +obj-$(CONFIG_FSL_EMB_PERF_EVENT) += perf_event_fsl_emb.o +obj-$(CONFIG_FSL_EMB_PERF_EVENT_E500) += e500-pmu.o + obj-$(CONFIG_8XX_MINIMAL_FPEMU) += softemu8xx.o ifneq ($(CONFIG_PPC_INDIRECT_IO),y) diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index a6c2b63227b..957ceb7059c 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -194,6 +194,30 @@ int main(void) DEFINE(PACA_KVM_IN_GUEST, offsetof(struct paca_struct, kvm_in_guest)); DEFINE(PACA_KVM_SLB, offsetof(struct paca_struct, kvm_slb)); DEFINE(PACA_KVM_SLB_MAX, offsetof(struct paca_struct, kvm_slb_max)); + DEFINE(PACA_KVM_CR, offsetof(struct paca_struct, shadow_vcpu.cr)); + DEFINE(PACA_KVM_XER, offsetof(struct paca_struct, shadow_vcpu.xer)); + DEFINE(PACA_KVM_R0, offsetof(struct paca_struct, shadow_vcpu.gpr[0])); + DEFINE(PACA_KVM_R1, offsetof(struct paca_struct, shadow_vcpu.gpr[1])); + DEFINE(PACA_KVM_R2, offsetof(struct paca_struct, shadow_vcpu.gpr[2])); + DEFINE(PACA_KVM_R3, offsetof(struct paca_struct, shadow_vcpu.gpr[3])); + DEFINE(PACA_KVM_R4, offsetof(struct paca_struct, shadow_vcpu.gpr[4])); + DEFINE(PACA_KVM_R5, offsetof(struct paca_struct, shadow_vcpu.gpr[5])); + DEFINE(PACA_KVM_R6, offsetof(struct paca_struct, shadow_vcpu.gpr[6])); + DEFINE(PACA_KVM_R7, offsetof(struct paca_struct, shadow_vcpu.gpr[7])); + DEFINE(PACA_KVM_R8, offsetof(struct paca_struct, shadow_vcpu.gpr[8])); + DEFINE(PACA_KVM_R9, offsetof(struct paca_struct, shadow_vcpu.gpr[9])); + DEFINE(PACA_KVM_R10, offsetof(struct paca_struct, shadow_vcpu.gpr[10])); + DEFINE(PACA_KVM_R11, offsetof(struct paca_struct, shadow_vcpu.gpr[11])); + DEFINE(PACA_KVM_R12, offsetof(struct paca_struct, shadow_vcpu.gpr[12])); + DEFINE(PACA_KVM_R13, offsetof(struct paca_struct, shadow_vcpu.gpr[13])); + DEFINE(PACA_KVM_HOST_R1, offsetof(struct paca_struct, shadow_vcpu.host_r1)); + DEFINE(PACA_KVM_HOST_R2, offsetof(struct paca_struct, shadow_vcpu.host_r2)); + DEFINE(PACA_KVM_VMHANDLER, offsetof(struct paca_struct, + shadow_vcpu.vmhandler)); + DEFINE(PACA_KVM_SCRATCH0, offsetof(struct paca_struct, + shadow_vcpu.scratch0)); + DEFINE(PACA_KVM_SCRATCH1, offsetof(struct paca_struct, + shadow_vcpu.scratch1)); #endif #endif /* CONFIG_PPC64 */ @@ -389,8 +413,6 @@ int main(void) DEFINE(VCPU_HOST_PID, offsetof(struct kvm_vcpu, arch.host_pid)); DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr)); DEFINE(VCPU_LR, offsetof(struct kvm_vcpu, arch.lr)); - DEFINE(VCPU_CR, offsetof(struct kvm_vcpu, arch.cr)); - DEFINE(VCPU_XER, offsetof(struct kvm_vcpu, arch.xer)); DEFINE(VCPU_CTR, offsetof(struct kvm_vcpu, arch.ctr)); DEFINE(VCPU_PC, offsetof(struct kvm_vcpu, arch.pc)); DEFINE(VCPU_MSR, offsetof(struct kvm_vcpu, arch.msr)); @@ -411,11 +433,16 @@ int main(void) DEFINE(VCPU_HOST_R2, offsetof(struct kvm_vcpu, arch.host_r2)); DEFINE(VCPU_HOST_MSR, offsetof(struct kvm_vcpu, arch.host_msr)); DEFINE(VCPU_SHADOW_MSR, offsetof(struct kvm_vcpu, arch.shadow_msr)); + DEFINE(VCPU_SHADOW_SRR1, offsetof(struct kvm_vcpu, arch.shadow_srr1)); DEFINE(VCPU_TRAMPOLINE_LOWMEM, offsetof(struct kvm_vcpu, arch.trampoline_lowmem)); DEFINE(VCPU_TRAMPOLINE_ENTER, offsetof(struct kvm_vcpu, arch.trampoline_enter)); DEFINE(VCPU_HIGHMEM_HANDLER, offsetof(struct kvm_vcpu, arch.highmem_handler)); + DEFINE(VCPU_RMCALL, offsetof(struct kvm_vcpu, arch.rmcall)); DEFINE(VCPU_HFLAGS, offsetof(struct kvm_vcpu, arch.hflags)); -#endif +#else + DEFINE(VCPU_CR, offsetof(struct kvm_vcpu, arch.cr)); + DEFINE(VCPU_XER, offsetof(struct kvm_vcpu, arch.xer)); +#endif /* CONFIG_PPC64 */ #endif #ifdef CONFIG_44x DEFINE(PGD_T_LOG2, PGD_T_LOG2); diff --git a/arch/powerpc/kernel/cacheinfo.c b/arch/powerpc/kernel/cacheinfo.c index bb37b1d19a5..a3c684b4c86 100644 --- a/arch/powerpc/kernel/cacheinfo.c +++ b/arch/powerpc/kernel/cacheinfo.c @@ -19,6 +19,7 @@ #include <linux/notifier.h> #include <linux/of.h> #include <linux/percpu.h> +#include <linux/slab.h> #include <asm/prom.h> #include "cacheinfo.h" @@ -642,7 +643,7 @@ static struct kobj_attribute *cache_index_opt_attrs[] = { &cache_assoc_attr, }; -static struct sysfs_ops cache_index_ops = { +static const struct sysfs_ops cache_index_ops = { .show = cache_index_show, }; diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index 2fc82bac3bb..8af4949434b 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -1808,7 +1808,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .icache_bsize = 64, .dcache_bsize = 64, .num_pmcs = 4, - .oprofile_cpu_type = "ppc/e500", /* xxx - galak, e500mc? */ + .oprofile_cpu_type = "ppc/e500mc", .oprofile_type = PPC_OPROFILE_FSL_EMB, .cpu_setup = __setup_cpu_e500mc, .machine_check = machine_check_e500, diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c index 6215062caf8..6c1df5757cd 100644 --- a/arch/powerpc/kernel/dma.c +++ b/arch/powerpc/kernel/dma.c @@ -8,6 +8,7 @@ #include <linux/device.h> #include <linux/dma-mapping.h> #include <linux/dma-debug.h> +#include <linux/gfp.h> #include <linux/lmb.h> #include <asm/bug.h> #include <asm/abs_addr.h> diff --git a/arch/powerpc/kernel/e500-pmu.c b/arch/powerpc/kernel/e500-pmu.c new file mode 100644 index 00000000000..7c07de0d894 --- /dev/null +++ b/arch/powerpc/kernel/e500-pmu.c @@ -0,0 +1,129 @@ +/* + * Performance counter support for e500 family processors. + * + * Copyright 2008-2009 Paul Mackerras, IBM Corporation. + * Copyright 2010 Freescale Semiconductor, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include <linux/string.h> +#include <linux/perf_event.h> +#include <asm/reg.h> +#include <asm/cputable.h> + +/* + * Map of generic hardware event types to hardware events + * Zero if unsupported + */ +static int e500_generic_events[] = { + [PERF_COUNT_HW_CPU_CYCLES] = 1, + [PERF_COUNT_HW_INSTRUCTIONS] = 2, + [PERF_COUNT_HW_CACHE_MISSES] = 41, /* Data L1 cache reloads */ + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 12, + [PERF_COUNT_HW_BRANCH_MISSES] = 15, +}; + +#define C(x) PERF_COUNT_HW_CACHE_##x + +/* + * Table of generalized cache-related events. + * 0 means not supported, -1 means nonsensical, other values + * are event codes. + */ +static int e500_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { + /* + * D-cache misses are not split into read/write/prefetch; + * use raw event 41. + */ + [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */ + [C(OP_READ)] = { 27, 0 }, + [C(OP_WRITE)] = { 28, 0 }, + [C(OP_PREFETCH)] = { 29, 0 }, + }, + [C(L1I)] = { /* RESULT_ACCESS RESULT_MISS */ + [C(OP_READ)] = { 2, 60 }, + [C(OP_WRITE)] = { -1, -1 }, + [C(OP_PREFETCH)] = { 0, 0 }, + }, + /* + * Assuming LL means L2, it's not a good match for this model. + * It allocates only on L1 castout or explicit prefetch, and + * does not have separate read/write events (but it does have + * separate instruction/data events). + */ + [C(LL)] = { /* RESULT_ACCESS RESULT_MISS */ + [C(OP_READ)] = { 0, 0 }, + [C(OP_WRITE)] = { 0, 0 }, + [C(OP_PREFETCH)] = { 0, 0 }, + }, + /* + * There are data/instruction MMU misses, but that's a miss on + * the chip's internal level-one TLB which is probably not + * what the user wants. Instead, unified level-two TLB misses + * are reported here. + */ + [C(DTLB)] = { /* RESULT_ACCESS RESULT_MISS */ + [C(OP_READ)] = { 26, 66 }, + [C(OP_WRITE)] = { -1, -1 }, + [C(OP_PREFETCH)] = { -1, -1 }, + }, + [C(BPU)] = { /* RESULT_ACCESS RESULT_MISS */ + [C(OP_READ)] = { 12, 15 }, + [C(OP_WRITE)] = { -1, -1 }, + [C(OP_PREFETCH)] = { -1, -1 }, + }, +}; + +static int num_events = 128; + +/* Upper half of event id is PMLCb, for threshold events */ +static u64 e500_xlate_event(u64 event_id) +{ + u32 event_low = (u32)event_id; + u64 ret; + + if (event_low >= num_events) + return 0; + + ret = FSL_EMB_EVENT_VALID; + + if (event_low >= 76 && event_low <= 81) { + ret |= FSL_EMB_EVENT_RESTRICTED; + ret |= event_id & + (FSL_EMB_EVENT_THRESHMUL | FSL_EMB_EVENT_THRESH); + } else if (event_id & + (FSL_EMB_EVENT_THRESHMUL | FSL_EMB_EVENT_THRESH)) { + /* Threshold requested on non-threshold event */ + return 0; + } + + return ret; +} + +static struct fsl_emb_pmu e500_pmu = { + .name = "e500 family", + .n_counter = 4, + .n_restricted = 2, + .xlate_event = e500_xlate_event, + .n_generic = ARRAY_SIZE(e500_generic_events), + .generic_events = e500_generic_events, + .cache_events = &e500_cache_events, +}; + +static int init_e500_pmu(void) +{ + if (!cur_cpu_spec->oprofile_cpu_type) + return -ENODEV; + + if (!strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc/e500mc")) + num_events = 256; + else if (strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc/e500")) + return -ENODEV; + + return register_fsl_emb_pmu(&e500_pmu); +} + +arch_initcall(init_e500_pmu); diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index bdcb557d470..07109d84378 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -791,9 +791,8 @@ _GLOBAL(enter_rtas) li r9,1 rldicr r9,r9,MSR_SF_LG,(63-MSR_SF_LG) - ori r9,r9,MSR_IR|MSR_DR|MSR_FE0|MSR_FE1|MSR_FP + ori r9,r9,MSR_IR|MSR_DR|MSR_FE0|MSR_FE1|MSR_FP|MSR_RI andc r6,r0,r9 - ori r6,r6,MSR_RI sync /* disable interrupts so SRR0/1 */ mtmsrd r0 /* don't get trashed */ diff --git a/arch/powerpc/kernel/firmware.c b/arch/powerpc/kernel/firmware.c index 1679a70bbca..6b1f4271eb5 100644 --- a/arch/powerpc/kernel/firmware.c +++ b/arch/powerpc/kernel/firmware.c @@ -17,5 +17,5 @@ #include <asm/firmware.h> -unsigned long powerpc_firmware_features; +unsigned long powerpc_firmware_features __read_mostly; EXPORT_SYMBOL_GPL(powerpc_firmware_features); diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index 92580748802..bed9a29ee38 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -219,7 +219,8 @@ generic_secondary_common_init: * physical cpu id in r24, we need to search the pacas to find * which logical id maps to our physical one. */ - LOAD_REG_ADDR(r13, paca) /* Get base vaddr of paca array */ + LOAD_REG_ADDR(r13, paca) /* Load paca pointer */ + ld r13,0(r13) /* Get base vaddr of paca array */ li r5,0 /* logical cpu id */ 1: lhz r6,PACAHWCPUID(r13) /* Load HW procid from paca */ cmpw r6,r24 /* Compare to our id */ @@ -536,7 +537,8 @@ _GLOBAL(pmac_secondary_start) mtmsrd r3 /* RI on */ /* Set up a paca value for this processor. */ - LOAD_REG_ADDR(r4,paca) /* Get base vaddr of paca array */ + LOAD_REG_ADDR(r4,paca) /* Load paca pointer */ + ld r4,0(r4) /* Get base vaddr of paca array */ mulli r13,r24,PACA_SIZE /* Calculate vaddr of right paca */ add r13,r13,r4 /* for this processor. */ mtspr SPRN_SPRG_PACA,r13 /* Save vaddr of paca in an SPRG*/ @@ -615,6 +617,17 @@ _GLOBAL(start_secondary_prolog) std r3,0(r1) /* Zero the stack frame pointer */ bl .start_secondary b . +/* + * Reset stack pointer and call start_secondary + * to continue with online operation when woken up + * from cede in cpu offline. + */ +_GLOBAL(start_secondary_resume) + ld r1,PACAKSAVE(r13) /* Reload kernel stack pointer */ + li r3,0 + std r3,0(r1) /* Zero the stack frame pointer */ + bl .start_secondary + b . #endif /* diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 678f98cd5e6..3ef743fa5d7 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -542,11 +542,11 @@ DARFixed:/* Return from dcbx instruction bug workaround, r10 holds value of DAR FixupDAR:/* Entry point for dcbx workaround. */ /* fetch instruction from memory. */ mfspr r10, SPRN_SRR0 + andis. r11, r10, 0x8000 /* Address >= 0x80000000 */ DO_8xx_CPU6(0x3780, r3) mtspr SPRN_MD_EPN, r10 mfspr r11, SPRN_M_TWB /* Get level 1 table entry address */ - cmplwi cr0, r11, 0x0800 - blt- 3f /* Branch if user space */ + beq- 3f /* Branch if user space */ lis r11, (swapper_pg_dir-PAGE_OFFSET)@h ori r11, r11, (swapper_pg_dir-PAGE_OFFSET)@l rlwimi r11, r10, 32-20, 0xffc /* r11 = r11&~0xffc|(r10>>20)&0xffc */ @@ -768,12 +768,12 @@ start_here: */ initial_mmu: tlbia /* Invalidate all TLB entries */ -#ifdef CONFIG_PIN_TLB +/* Always pin the first 8 MB ITLB to prevent ITLB + misses while mucking around with SRR0/SRR1 in asm +*/ lis r8, MI_RSV4I@h ori r8, r8, 0x1c00 -#else - li r8, 0 -#endif + mtspr SPRN_MI_CTR, r8 /* Set instruction MMU control */ #ifdef CONFIG_PIN_TLB diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index 7f4bd7f3b6a..72552654799 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -214,11 +214,11 @@ skpinv: addi r6,r6,1 /* Increment */ bl 1f /* Find our address */ 1: mflr r9 rlwimi r7,r9,0,20,31 - addi r7,r7,24 + addi r7,r7,(2f - 1b) mtspr SPRN_SRR0,r7 mtspr SPRN_SRR1,r6 rfi - +2: /* 4. Clear out PIDs & Search info */ li r6,0 mtspr SPRN_MAS6,r6 @@ -747,9 +747,6 @@ finish_tlb_load: #else rlwimi r12, r11, 26, 27, 31 /* extract WIMGE from pte */ #endif -#ifdef CONFIG_SMP - ori r12, r12, MAS2_M -#endif mtspr SPRN_MAS2, r12 #ifdef CONFIG_PTE_64BIT @@ -887,13 +884,17 @@ KernelSPE: lwz r3,_MSR(r1) oris r3,r3,MSR_SPE@h stw r3,_MSR(r1) /* enable use of SPE after return */ +#ifdef CONFIG_PRINTK lis r3,87f@h ori r3,r3,87f@l mr r4,r2 /* current */ lwz r5,_NIP(r1) bl printk +#endif b ret_from_except +#ifdef CONFIG_PRINTK 87: .string "SPE used in kernel (task=%p, pc=%x) \n" +#endif .align 4,0 #endif /* CONFIG_SPE */ diff --git a/arch/powerpc/kernel/ibmebus.c b/arch/powerpc/kernel/ibmebus.c index a4c8b38b0ba..71cf280da18 100644 --- a/arch/powerpc/kernel/ibmebus.c +++ b/arch/powerpc/kernel/ibmebus.c @@ -42,6 +42,7 @@ #include <linux/dma-mapping.h> #include <linux/interrupt.h> #include <linux/of.h> +#include <linux/slab.h> #include <linux/of_platform.h> #include <asm/ibmebus.h> #include <asm/abs_addr.h> diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index 5547ae6e6b0..ec94f906ea4 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -42,12 +42,7 @@ #define DBG(...) -#ifdef CONFIG_IOMMU_VMERGE -static int novmerge = 0; -#else -static int novmerge = 1; -#endif - +static int novmerge; static int protect4gb = 1; static void __iommu_free(struct iommu_table *, dma_addr_t, unsigned int); diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 9040330b053..64f6f2031c2 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -73,8 +73,10 @@ #define CREATE_TRACE_POINTS #include <asm/trace.h> +DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); +EXPORT_PER_CPU_SYMBOL(irq_stat); + int __irq_offset_value; -static int ppc_spurious_interrupts; #ifdef CONFIG_PPC32 EXPORT_SYMBOL(__irq_offset_value); @@ -180,30 +182,64 @@ notrace void raw_local_irq_restore(unsigned long en) EXPORT_SYMBOL(raw_local_irq_restore); #endif /* CONFIG_PPC64 */ +static int show_other_interrupts(struct seq_file *p, int prec) +{ + int j; + +#if defined(CONFIG_PPC32) && defined(CONFIG_TAU_INT) + if (tau_initialized) { + seq_printf(p, "%*s: ", prec, "TAU"); + for_each_online_cpu(j) + seq_printf(p, "%10u ", tau_interrupts(j)); + seq_puts(p, " PowerPC Thermal Assist (cpu temp)\n"); + } +#endif /* CONFIG_PPC32 && CONFIG_TAU_INT */ + + seq_printf(p, "%*s: ", prec, "LOC"); + for_each_online_cpu(j) + seq_printf(p, "%10u ", per_cpu(irq_stat, j).timer_irqs); + seq_printf(p, " Local timer interrupts\n"); + + seq_printf(p, "%*s: ", prec, "SPU"); + for_each_online_cpu(j) + seq_printf(p, "%10u ", per_cpu(irq_stat, j).spurious_irqs); + seq_printf(p, " Spurious interrupts\n"); + + seq_printf(p, "%*s: ", prec, "CNT"); + for_each_online_cpu(j) + seq_printf(p, "%10u ", per_cpu(irq_stat, j).pmu_irqs); + seq_printf(p, " Performance monitoring interrupts\n"); + + seq_printf(p, "%*s: ", prec, "MCE"); + for_each_online_cpu(j) + seq_printf(p, "%10u ", per_cpu(irq_stat, j).mce_exceptions); + seq_printf(p, " Machine check exceptions\n"); + + return 0; +} + int show_interrupts(struct seq_file *p, void *v) { - int i = *(loff_t *)v, j; + unsigned long flags, any_count = 0; + int i = *(loff_t *) v, j, prec; struct irqaction *action; struct irq_desc *desc; - unsigned long flags; + if (i > nr_irqs) + return 0; + + for (prec = 3, j = 1000; prec < 10 && j <= nr_irqs; ++prec) + j *= 10; + + if (i == nr_irqs) + return show_other_interrupts(p, prec); + + /* print header */ if (i == 0) { - seq_puts(p, " "); + seq_printf(p, "%*s", prec + 8, ""); for_each_online_cpu(j) - seq_printf(p, "CPU%d ", j); + seq_printf(p, "CPU%-8d", j); seq_putc(p, '\n'); - } else if (i == nr_irqs) { -#if defined(CONFIG_PPC32) && defined(CONFIG_TAU_INT) - if (tau_initialized){ - seq_puts(p, "TAU: "); - for_each_online_cpu(j) - seq_printf(p, "%10u ", tau_interrupts(j)); - seq_puts(p, " PowerPC Thermal Assist (cpu temp)\n"); - } -#endif /* CONFIG_PPC32 && CONFIG_TAU_INT*/ - seq_printf(p, "BAD: %10u\n", ppc_spurious_interrupts); - - return 0; } desc = irq_to_desc(i); @@ -211,37 +247,48 @@ int show_interrupts(struct seq_file *p, void *v) return 0; raw_spin_lock_irqsave(&desc->lock, flags); - + for_each_online_cpu(j) + any_count |= kstat_irqs_cpu(i, j); action = desc->action; - if (!action || !action->handler) - goto skip; + if (!action && !any_count) + goto out; - seq_printf(p, "%3d: ", i); -#ifdef CONFIG_SMP + seq_printf(p, "%*d: ", prec, i); for_each_online_cpu(j) seq_printf(p, "%10u ", kstat_irqs_cpu(i, j)); -#else - seq_printf(p, "%10u ", kstat_irqs(i)); -#endif /* CONFIG_SMP */ if (desc->chip) - seq_printf(p, " %s ", desc->chip->name); + seq_printf(p, " %-16s", desc->chip->name); else - seq_puts(p, " None "); + seq_printf(p, " %-16s", "None"); + seq_printf(p, " %-8s", (desc->status & IRQ_LEVEL) ? "Level" : "Edge"); - seq_printf(p, "%s", (desc->status & IRQ_LEVEL) ? "Level " : "Edge "); - seq_printf(p, " %s", action->name); + if (action) { + seq_printf(p, " %s", action->name); + while ((action = action->next) != NULL) + seq_printf(p, ", %s", action->name); + } - for (action = action->next; action; action = action->next) - seq_printf(p, ", %s", action->name); seq_putc(p, '\n'); - -skip: +out: raw_spin_unlock_irqrestore(&desc->lock, flags); - return 0; } +/* + * /proc/stat helpers + */ +u64 arch_irq_stat_cpu(unsigned int cpu) +{ + u64 sum = per_cpu(irq_stat, cpu).timer_irqs; + + sum += per_cpu(irq_stat, cpu).pmu_irqs; + sum += per_cpu(irq_stat, cpu).mce_exceptions; + sum += per_cpu(irq_stat, cpu).spurious_irqs; + + return sum; +} + #ifdef CONFIG_HOTPLUG_CPU void fixup_irqs(cpumask_t map) { @@ -353,8 +400,7 @@ void do_IRQ(struct pt_regs *regs) if (irq != NO_IRQ && irq != NO_IRQ_IGNORE) handle_one_irq(irq); else if (irq != NO_IRQ_IGNORE) - /* That's not SMP safe ... but who cares ? */ - ppc_spurious_interrupts++; + __get_cpu_var(irq_stat).spurious_irqs++; irq_exit(); set_irq_regs(old_regs); @@ -474,7 +520,7 @@ void do_softirq(void) */ static LIST_HEAD(irq_hosts); -static DEFINE_SPINLOCK(irq_big_lock); +static DEFINE_RAW_SPINLOCK(irq_big_lock); static unsigned int revmap_trees_allocated; static DEFINE_MUTEX(revmap_trees_mutex); struct irq_map_entry irq_map[NR_IRQS]; @@ -520,14 +566,14 @@ struct irq_host *irq_alloc_host(struct device_node *of_node, if (host->ops->match == NULL) host->ops->match = default_irq_host_match; - spin_lock_irqsave(&irq_big_lock, flags); + raw_spin_lock_irqsave(&irq_big_lock, flags); /* If it's a legacy controller, check for duplicates and * mark it as allocated (we use irq 0 host pointer for that */ if (revmap_type == IRQ_HOST_MAP_LEGACY) { if (irq_map[0].host != NULL) { - spin_unlock_irqrestore(&irq_big_lock, flags); + raw_spin_unlock_irqrestore(&irq_big_lock, flags); /* If we are early boot, we can't free the structure, * too bad... * this will be fixed once slab is made available early @@ -541,7 +587,7 @@ struct irq_host *irq_alloc_host(struct device_node *of_node, } list_add(&host->link, &irq_hosts); - spin_unlock_irqrestore(&irq_big_lock, flags); + raw_spin_unlock_irqrestore(&irq_big_lock, flags); /* Additional setups per revmap type */ switch(revmap_type) { @@ -592,13 +638,13 @@ struct irq_host *irq_find_host(struct device_node *node) * the absence of a device node. This isn't a problem so far * yet though... */ - spin_lock_irqsave(&irq_big_lock, flags); + raw_spin_lock_irqsave(&irq_big_lock, flags); list_for_each_entry(h, &irq_hosts, link) if (h->ops->match(h, node)) { found = h; break; } - spin_unlock_irqrestore(&irq_big_lock, flags); + raw_spin_unlock_irqrestore(&irq_big_lock, flags); return found; } EXPORT_SYMBOL_GPL(irq_find_host); @@ -967,7 +1013,7 @@ unsigned int irq_alloc_virt(struct irq_host *host, if (count == 0 || count > (irq_virq_count - NUM_ISA_INTERRUPTS)) return NO_IRQ; - spin_lock_irqsave(&irq_big_lock, flags); + raw_spin_lock_irqsave(&irq_big_lock, flags); /* Use hint for 1 interrupt if any */ if (count == 1 && hint >= NUM_ISA_INTERRUPTS && @@ -991,7 +1037,7 @@ unsigned int irq_alloc_virt(struct irq_host *host, } } if (found == NO_IRQ) { - spin_unlock_irqrestore(&irq_big_lock, flags); + raw_spin_unlock_irqrestore(&irq_big_lock, flags); return NO_IRQ; } hint_found: @@ -1000,7 +1046,7 @@ unsigned int irq_alloc_virt(struct irq_host *host, smp_wmb(); irq_map[i].host = host; } - spin_unlock_irqrestore(&irq_big_lock, flags); + raw_spin_unlock_irqrestore(&irq_big_lock, flags); return found; } @@ -1012,7 +1058,7 @@ void irq_free_virt(unsigned int virq, unsigned int count) WARN_ON (virq < NUM_ISA_INTERRUPTS); WARN_ON (count == 0 || (virq + count) > irq_virq_count); - spin_lock_irqsave(&irq_big_lock, flags); + raw_spin_lock_irqsave(&irq_big_lock, flags); for (i = virq; i < (virq + count); i++) { struct irq_host *host; @@ -1025,7 +1071,7 @@ void irq_free_virt(unsigned int virq, unsigned int count) smp_wmb(); irq_map[i].host = NULL; } - spin_unlock_irqrestore(&irq_big_lock, flags); + raw_spin_unlock_irqrestore(&irq_big_lock, flags); } int arch_early_irq_init(void) diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c index b6bd1eaa1c2..41bada0298c 100644 --- a/arch/powerpc/kernel/kgdb.c +++ b/arch/powerpc/kernel/kgdb.c @@ -333,7 +333,7 @@ int kgdb_arch_handle_exception(int vector, int signo, int err_code, atomic_set(&kgdb_cpu_doing_single_step, -1); /* set the trace bit if we're stepping */ if (remcom_in_buffer[0] == 's') { -#if defined(CONFIG_40x) || defined(CONFIG_BOOKE) +#ifdef CONFIG_PPC_ADV_DEBUG_REGS mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) | DBCR0_IC | DBCR0_IDM); linux_regs->msr |= MSR_DE; diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index c9329786073..b36f074524a 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -31,12 +31,13 @@ #include <linux/preempt.h> #include <linux/module.h> #include <linux/kdebug.h> +#include <linux/slab.h> #include <asm/cacheflush.h> #include <asm/sstep.h> #include <asm/uaccess.h> #include <asm/system.h> -#ifdef CONFIG_BOOKE +#ifdef CONFIG_PPC_ADV_DEBUG_REGS #define MSR_SINGLESTEP (MSR_DE) #else #define MSR_SINGLESTEP (MSR_SE) @@ -110,7 +111,7 @@ static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs) * like Decrementer or External Interrupt */ regs->msr &= ~MSR_EE; regs->msr |= MSR_SINGLESTEP; -#ifdef CONFIG_BOOKE +#ifdef CONFIG_PPC_ADV_DEBUG_REGS regs->msr &= ~MSR_CE; mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) | DBCR0_IC | DBCR0_IDM); #endif diff --git a/arch/powerpc/kernel/legacy_serial.c b/arch/powerpc/kernel/legacy_serial.c index 9ddfaef1a18..035ada5443e 100644 --- a/arch/powerpc/kernel/legacy_serial.c +++ b/arch/powerpc/kernel/legacy_serial.c @@ -469,7 +469,7 @@ static int __init serial_dev_init(void) return -ENODEV; /* - * Before we register the platfrom serial devices, we need + * Before we register the platform serial devices, we need * to fixup their interrupts and their IO ports. */ DBG("Fixing serial ports interrupts and IO ports ...\n"); diff --git a/arch/powerpc/kernel/lparcfg.c b/arch/powerpc/kernel/lparcfg.c index 79a00bb9c64..c2c70e1b32c 100644 --- a/arch/powerpc/kernel/lparcfg.c +++ b/arch/powerpc/kernel/lparcfg.c @@ -24,6 +24,7 @@ #include <linux/proc_fs.h> #include <linux/init.h> #include <linux/seq_file.h> +#include <linux/slab.h> #include <asm/uaccess.h> #include <asm/iseries/hv_lp_config.h> #include <asm/lppaca.h> @@ -359,7 +360,7 @@ static void parse_system_parameter_string(struct seq_file *m) unsigned char *local_buffer = kmalloc(SPLPAR_MAXLENGTH, GFP_KERNEL); if (!local_buffer) { - printk(KERN_ERR "%s %s kmalloc failure at line %d \n", + printk(KERN_ERR "%s %s kmalloc failure at line %d\n", __FILE__, __func__, __LINE__); return; } @@ -383,13 +384,13 @@ static void parse_system_parameter_string(struct seq_file *m) int idx, w_idx; char *workbuffer = kzalloc(SPLPAR_MAXLENGTH, GFP_KERNEL); if (!workbuffer) { - printk(KERN_ERR "%s %s kmalloc failure at line %d \n", + printk(KERN_ERR "%s %s kmalloc failure at line %d\n", __FILE__, __func__, __LINE__); kfree(local_buffer); return; } #ifdef LPARCFG_DEBUG - printk(KERN_INFO "success calling get-system-parameter \n"); + printk(KERN_INFO "success calling get-system-parameter\n"); #endif splpar_strlen = local_buffer[0] * 256 + local_buffer[1]; local_buffer += 2; /* step over strlen value */ @@ -440,7 +441,7 @@ static int lparcfg_count_active_processors(void) while ((cpus_dn = of_find_node_by_type(cpus_dn, "cpu"))) { #ifdef LPARCFG_DEBUG - printk(KERN_ERR "cpus_dn %p \n", cpus_dn); + printk(KERN_ERR "cpus_dn %p\n", cpus_dn); #endif count++; } @@ -725,7 +726,7 @@ static int lparcfg_data(struct seq_file *m, void *v) const unsigned int *lp_index_ptr; unsigned int lp_index = 0; - seq_printf(m, "%s %s \n", MODULE_NAME, MODULE_VERS); + seq_printf(m, "%s %s\n", MODULE_NAME, MODULE_VERS); rootdn = of_find_node_by_path("/"); if (rootdn) { diff --git a/arch/powerpc/kernel/misc.S b/arch/powerpc/kernel/misc.S index 2d29752cbe1..22e507c8a55 100644 --- a/arch/powerpc/kernel/misc.S +++ b/arch/powerpc/kernel/misc.S @@ -127,3 +127,29 @@ _GLOBAL(__setup_cpu_power7) _GLOBAL(__restore_cpu_power7) /* place holder */ blr + +/* + * Get a minimal set of registers for our caller's nth caller. + * r3 = regs pointer, r5 = n. + * + * We only get R1 (stack pointer), NIP (next instruction pointer) + * and LR (link register). These are all we can get in the + * general case without doing complicated stack unwinding, but + * fortunately they are enough to do a stack backtrace, which + * is all we need them for. + */ +_GLOBAL(perf_arch_fetch_caller_regs) + mr r6,r1 + cmpwi r5,0 + mflr r4 + ble 2f + mtctr r5 +1: PPC_LL r6,0(r6) + bdnz 1b + PPC_LL r4,PPC_LR_STKOFF(r6) +2: PPC_LL r7,0(r6) + PPC_LL r7,PPC_LR_STKOFF(r7) + PPC_STL r6,GPR1-STACK_FRAME_OVERHEAD(r3) + PPC_STL r4,_NIP-STACK_FRAME_OVERHEAD(r3) + PPC_STL r7,_LINK-STACK_FRAME_OVERHEAD(r3) + blr diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c index ad461e735ae..9cf197f01e9 100644 --- a/arch/powerpc/kernel/nvram_64.c +++ b/arch/powerpc/kernel/nvram_64.c @@ -338,8 +338,8 @@ static int __init nvram_create_os_partition(void) rc = nvram_write_header(new_part); if (rc <= 0) { - printk(KERN_ERR "nvram_create_os_partition: nvram_write_header \ - failed (%d)\n", rc); + printk(KERN_ERR "nvram_create_os_partition: nvram_write_header " + "failed (%d)\n", rc); return rc; } @@ -349,7 +349,7 @@ static int __init nvram_create_os_partition(void) rc = ppc_md.nvram_write((char *)&seq_init, sizeof(seq_init), &tmp_index); if (rc <= 0) { printk(KERN_ERR "nvram_create_os_partition: nvram_write " - "failed (%d)\n", rc); + "failed (%d)\n", rc); return rc; } diff --git a/arch/powerpc/kernel/of_platform.c b/arch/powerpc/kernel/of_platform.c index 1a4fc0d11a0..6c1dfc3ff8b 100644 --- a/arch/powerpc/kernel/of_platform.c +++ b/arch/powerpc/kernel/of_platform.c @@ -17,7 +17,6 @@ #include <linux/init.h> #include <linux/module.h> #include <linux/mod_devicetable.h> -#include <linux/slab.h> #include <linux/pci.h> #include <linux/of.h> #include <linux/of_device.h> @@ -214,7 +213,7 @@ EXPORT_SYMBOL(of_find_device_by_node); static int of_dev_phandle_match(struct device *dev, void *data) { phandle *ph = data; - return to_of_device(dev)->node->linux_phandle == *ph; + return to_of_device(dev)->node->phandle == *ph; } struct of_device *of_find_device_by_phandle(phandle ph) diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index d16b1ea55d4..0c40c6f476f 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -9,11 +9,15 @@ #include <linux/threads.h> #include <linux/module.h> +#include <linux/lmb.h> +#include <asm/firmware.h> #include <asm/lppaca.h> #include <asm/paca.h> #include <asm/sections.h> #include <asm/pgtable.h> +#include <asm/iseries/lpar_map.h> +#include <asm/iseries/hv_types.h> /* This symbol is provided by the linker - let it fill in the paca * field correctly */ @@ -70,37 +74,82 @@ struct slb_shadow slb_shadow[] __cacheline_aligned = { * processors. The processor VPD array needs one entry per physical * processor (not thread). */ -struct paca_struct paca[NR_CPUS]; +struct paca_struct *paca; EXPORT_SYMBOL(paca); -void __init initialise_pacas(void) -{ - int cpu; +struct paca_struct boot_paca; - /* The TOC register (GPR2) points 32kB into the TOC, so that 64kB - * of the TOC can be addressed using a single machine instruction. - */ +void __init initialise_paca(struct paca_struct *new_paca, int cpu) +{ + /* The TOC register (GPR2) points 32kB into the TOC, so that 64kB + * of the TOC can be addressed using a single machine instruction. + */ unsigned long kernel_toc = (unsigned long)(&__toc_start) + 0x8000UL; - /* Can't use for_each_*_cpu, as they aren't functional yet */ - for (cpu = 0; cpu < NR_CPUS; cpu++) { - struct paca_struct *new_paca = &paca[cpu]; - #ifdef CONFIG_PPC_BOOK3S - new_paca->lppaca_ptr = &lppaca[cpu]; + new_paca->lppaca_ptr = &lppaca[cpu]; #else - new_paca->kernel_pgd = swapper_pg_dir; + new_paca->kernel_pgd = swapper_pg_dir; #endif - new_paca->lock_token = 0x8000; - new_paca->paca_index = cpu; - new_paca->kernel_toc = kernel_toc; - new_paca->kernelbase = (unsigned long) _stext; - new_paca->kernel_msr = MSR_KERNEL; - new_paca->hw_cpu_id = 0xffff; - new_paca->__current = &init_task; + new_paca->lock_token = 0x8000; + new_paca->paca_index = cpu; + new_paca->kernel_toc = kernel_toc; + new_paca->kernelbase = (unsigned long) _stext; + new_paca->kernel_msr = MSR_KERNEL; + new_paca->hw_cpu_id = 0xffff; + new_paca->__current = &init_task; #ifdef CONFIG_PPC_STD_MMU_64 - new_paca->slb_shadow_ptr = &slb_shadow[cpu]; + new_paca->slb_shadow_ptr = &slb_shadow[cpu]; #endif /* CONFIG_PPC_STD_MMU_64 */ +} + +static int __initdata paca_size; + +void __init allocate_pacas(void) +{ + int nr_cpus, cpu, limit; + + /* + * We can't take SLB misses on the paca, and we want to access them + * in real mode, so allocate them within the RMA and also within + * the first segment. On iSeries they must be within the area mapped + * by the HV, which is HvPagesToMap * HVPAGESIZE bytes. + */ + limit = min(0x10000000ULL, lmb.rmo_size); + if (firmware_has_feature(FW_FEATURE_ISERIES)) + limit = min(limit, HvPagesToMap * HVPAGESIZE); + + nr_cpus = NR_CPUS; + /* On iSeries we know we can never have more than 64 cpus */ + if (firmware_has_feature(FW_FEATURE_ISERIES)) + nr_cpus = min(64, nr_cpus); + + paca_size = PAGE_ALIGN(sizeof(struct paca_struct) * nr_cpus); + + paca = __va(lmb_alloc_base(paca_size, PAGE_SIZE, limit)); + memset(paca, 0, paca_size); + + printk(KERN_DEBUG "Allocated %u bytes for %d pacas at %p\n", + paca_size, nr_cpus, paca); + + /* Can't use for_each_*_cpu, as they aren't functional yet */ + for (cpu = 0; cpu < nr_cpus; cpu++) + initialise_paca(&paca[cpu], cpu); +} + +void __init free_unused_pacas(void) +{ + int new_size; + + new_size = PAGE_ALIGN(sizeof(struct paca_struct) * num_possible_cpus()); + + if (new_size >= paca_size) + return; + + lmb_free(__pa(paca) + new_size, paca_size - new_size); + + printk(KERN_DEBUG "Freed %u bytes for unused pacas\n", + paca_size - new_size); - } + paca_size = new_size; } diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index e8dfdbd9327..0c0567e5840 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -26,6 +26,7 @@ #include <linux/syscalls.h> #include <linux/irq.h> #include <linux/vmalloc.h> +#include <linux/slab.h> #include <asm/processor.h> #include <asm/io.h> @@ -63,21 +64,6 @@ struct dma_map_ops *get_pci_dma_ops(void) } EXPORT_SYMBOL(get_pci_dma_ops); -int pci_set_dma_mask(struct pci_dev *dev, u64 mask) -{ - return dma_set_mask(&dev->dev, mask); -} - -int pci_set_consistent_dma_mask(struct pci_dev *dev, u64 mask) -{ - int rc; - - rc = dma_set_mask(&dev->dev, mask); - dev->dev.coherent_dma_mask = dev->dma_mask; - - return rc; -} - struct pci_controller *pcibios_alloc_controller(struct device_node *dev) { struct pci_controller *phb; @@ -1047,10 +1033,8 @@ static void __devinit pcibios_fixup_bridge(struct pci_bus *bus) struct pci_dev *dev = bus->self; - for (i = 0; i < PCI_BUS_NUM_RESOURCES; ++i) { - if ((res = bus->resource[i]) == NULL) - continue; - if (!res->flags) + pci_bus_for_each_resource(bus, res, i) { + if (!res || !res->flags) continue; if (i >= 3 && bus->self->transparent) continue; @@ -1107,6 +1091,12 @@ void __devinit pcibios_setup_bus_devices(struct pci_bus *bus) list_for_each_entry(dev, &bus->devices, bus_list) { struct dev_archdata *sd = &dev->dev.archdata; + /* Cardbus can call us to add new devices to a bus, so ignore + * those who are already fully discovered + */ + if (dev->is_added) + continue; + /* Setup OF node pointer in archdata */ sd->of_node = pci_device_to_OF_node(dev); @@ -1147,6 +1137,13 @@ void __devinit pcibios_fixup_bus(struct pci_bus *bus) } EXPORT_SYMBOL(pcibios_fixup_bus); +void __devinit pci_fixup_cardbus(struct pci_bus *bus) +{ + /* Now fixup devices on that bus */ + pcibios_setup_bus_devices(bus); +} + + static int skip_isa_ioresource_align(struct pci_dev *dev) { if ((ppc_pci_flags & PPC_PCI_CAN_SKIP_ISA_ALIGN) && @@ -1168,21 +1165,20 @@ static int skip_isa_ioresource_align(struct pci_dev *dev) * but we want to try to avoid allocating at 0x2900-0x2bff * which might have be mirrored at 0x0100-0x03ff.. */ -void pcibios_align_resource(void *data, struct resource *res, +resource_size_t pcibios_align_resource(void *data, const struct resource *res, resource_size_t size, resource_size_t align) { struct pci_dev *dev = data; + resource_size_t start = res->start; if (res->flags & IORESOURCE_IO) { - resource_size_t start = res->start; - if (skip_isa_ioresource_align(dev)) - return; - if (start & 0x300) { + return start; + if (start & 0x300) start = (start + 0x3ff) & ~0x3ff; - res->start = start; - } } + + return start; } EXPORT_SYMBOL(pcibios_align_resource); @@ -1265,9 +1261,8 @@ void pcibios_allocate_bus_resources(struct pci_bus *bus) pr_debug("PCI: Allocating bus resources for %04x:%02x...\n", pci_domain_nr(bus), bus->number); - for (i = 0; i < PCI_BUS_NUM_RESOURCES; ++i) { - if ((res = bus->resource[i]) == NULL || !res->flags - || res->start > res->end || res->parent) + pci_bus_for_each_resource(bus, res, i) { + if (!res || !res->flags || res->start > res->end || res->parent) continue; if (bus->parent == NULL) pr = (res->flags & IORESOURCE_IO) ? diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c index c13668cf36d..e7db5b48004 100644 --- a/arch/powerpc/kernel/pci_32.c +++ b/arch/powerpc/kernel/pci_32.c @@ -14,6 +14,7 @@ #include <linux/irq.h> #include <linux/list.h> #include <linux/of.h> +#include <linux/slab.h> #include <asm/processor.h> #include <asm/io.h> diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c index ccf56ac92de..d43fc65749c 100644 --- a/arch/powerpc/kernel/pci_64.c +++ b/arch/powerpc/kernel/pci_64.c @@ -224,7 +224,7 @@ long sys_pciconfig_iobase(long which, unsigned long in_bus, * G5 machines... So when something asks for bus 0 io base * (bus 0 is HT root), we return the AGP one instead. */ - if (in_bus == 0 && machine_is_compatible("MacRISC4")) { + if (in_bus == 0 && of_machine_is_compatible("MacRISC4")) { struct device_node *agp; agp = of_find_compatible_node(NULL, NULL, "u3-agp"); diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c index d5e36e5dc7c..d56b35ee7f7 100644 --- a/arch/powerpc/kernel/pci_dn.c +++ b/arch/powerpc/kernel/pci_dn.c @@ -23,6 +23,7 @@ #include <linux/pci.h> #include <linux/string.h> #include <linux/init.h> +#include <linux/gfp.h> #include <asm/io.h> #include <asm/prom.h> diff --git a/arch/powerpc/kernel/pci_of_scan.c b/arch/powerpc/kernel/pci_of_scan.c index 7311fdfb9bf..cd11d5ca80d 100644 --- a/arch/powerpc/kernel/pci_of_scan.c +++ b/arch/powerpc/kernel/pci_of_scan.c @@ -123,6 +123,7 @@ struct pci_dev *of_create_pci_dev(struct device_node *node, { struct pci_dev *dev; const char *type; + struct pci_slot *slot; dev = alloc_pci_dev(); if (!dev) @@ -140,6 +141,11 @@ struct pci_dev *of_create_pci_dev(struct device_node *node, dev->devfn = devfn; dev->multifunction = 0; /* maybe a lie? */ dev->needs_freset = 0; /* pcie fundamental reset required */ + set_pcie_port_type(dev); + + list_for_each_entry(slot, &dev->bus->slots, list) + if (PCI_SLOT(dev->devfn) == slot->number) + dev->slot = slot; dev->vendor = get_int_prop(node, "vendor-id", 0xffff); dev->device = get_int_prop(node, "device-id", 0xffff); @@ -160,10 +166,14 @@ struct pci_dev *of_create_pci_dev(struct device_node *node, dev->error_state = pci_channel_io_normal; dev->dma_mask = 0xffffffff; + /* Early fixups, before probing the BARs */ + pci_fixup_device(pci_fixup_early, dev); + if (!strcmp(type, "pci") || !strcmp(type, "pciex")) { /* a PCI-PCI bridge */ dev->hdr_type = PCI_HEADER_TYPE_BRIDGE; dev->rom_base_reg = PCI_ROM_ADDRESS1; + set_pcie_hotplug_bridge(dev); } else if (!strcmp(type, "cardbus")) { dev->hdr_type = PCI_HEADER_TYPE_CARDBUS; } else { @@ -294,7 +304,7 @@ static void __devinit __of_scan_bus(struct device_node *node, int reglen, devfn; struct pci_dev *dev; - pr_debug("of_scan_bus(%s) bus no %d... \n", + pr_debug("of_scan_bus(%s) bus no %d...\n", node->full_name, bus->number); /* Scan direct children */ diff --git a/arch/powerpc/kernel/perf_callchain.c b/arch/powerpc/kernel/perf_callchain.c index a3c11cac3d7..95ad9dad298 100644 --- a/arch/powerpc/kernel/perf_callchain.c +++ b/arch/powerpc/kernel/perf_callchain.c @@ -495,9 +495,6 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) entry->nr = 0; - if (current->pid == 0) /* idle task? */ - return entry; - if (!user_mode(regs)) { perf_callchain_kernel(regs, entry); if (current->mm) diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c index 1eb85fbf53a..08460a2e9f4 100644 --- a/arch/powerpc/kernel/perf_event.c +++ b/arch/powerpc/kernel/perf_event.c @@ -718,10 +718,10 @@ static int collect_events(struct perf_event *group, int max_count, return n; } -static void event_sched_in(struct perf_event *event, int cpu) +static void event_sched_in(struct perf_event *event) { event->state = PERF_EVENT_STATE_ACTIVE; - event->oncpu = cpu; + event->oncpu = smp_processor_id(); event->tstamp_running += event->ctx->time - event->tstamp_stopped; if (is_software_event(event)) event->pmu->enable(event); @@ -735,7 +735,7 @@ static void event_sched_in(struct perf_event *event, int cpu) */ int hw_perf_group_sched_in(struct perf_event *group_leader, struct perf_cpu_context *cpuctx, - struct perf_event_context *ctx, int cpu) + struct perf_event_context *ctx) { struct cpu_hw_events *cpuhw; long i, n, n0; @@ -766,10 +766,10 @@ int hw_perf_group_sched_in(struct perf_event *group_leader, cpuhw->event[i]->hw.config = cpuhw->events[i]; cpuctx->active_oncpu += n; n = 1; - event_sched_in(group_leader, cpu); + event_sched_in(group_leader); list_for_each_entry(sub, &group_leader->sibling_list, group_entry) { if (sub->state != PERF_EVENT_STATE_OFF) { - event_sched_in(sub, cpu); + event_sched_in(sub); ++n; } } @@ -1164,10 +1164,10 @@ static void record_and_restart(struct perf_event *event, unsigned long val, * Finally record data if requested. */ if (record) { - struct perf_sample_data data = { - .addr = ~0ULL, - .period = event->hw.last_period, - }; + struct perf_sample_data data; + + perf_sample_data_init(&data, ~0ULL); + data.period = event->hw.last_period; if (event->attr.sample_type & PERF_SAMPLE_ADDR) perf_get_data_addr(regs, &data.addr); @@ -1287,7 +1287,7 @@ static void perf_event_interrupt(struct pt_regs *regs) irq_exit(); } -void hw_perf_event_setup(int cpu) +static void power_pmu_setup(int cpu) { struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu); @@ -1297,6 +1297,23 @@ void hw_perf_event_setup(int cpu) cpuhw->mmcr[0] = MMCR0_FC; } +static int __cpuinit +power_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu) +{ + unsigned int cpu = (long)hcpu; + + switch (action & ~CPU_TASKS_FROZEN) { + case CPU_UP_PREPARE: + power_pmu_setup(cpu); + break; + + default: + break; + } + + return NOTIFY_OK; +} + int register_power_pmu(struct power_pmu *pmu) { if (ppmu) @@ -1314,5 +1331,7 @@ int register_power_pmu(struct power_pmu *pmu) freeze_events_kernel = MMCR0_FCHV; #endif /* CONFIG_PPC64 */ + perf_cpu_notifier(power_pmu_notifier); + return 0; } diff --git a/arch/powerpc/kernel/perf_event_fsl_emb.c b/arch/powerpc/kernel/perf_event_fsl_emb.c new file mode 100644 index 00000000000..369872f6cf7 --- /dev/null +++ b/arch/powerpc/kernel/perf_event_fsl_emb.c @@ -0,0 +1,654 @@ +/* + * Performance event support - Freescale Embedded Performance Monitor + * + * Copyright 2008-2009 Paul Mackerras, IBM Corporation. + * Copyright 2010 Freescale Semiconductor, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/perf_event.h> +#include <linux/percpu.h> +#include <linux/hardirq.h> +#include <asm/reg_fsl_emb.h> +#include <asm/pmc.h> +#include <asm/machdep.h> +#include <asm/firmware.h> +#include <asm/ptrace.h> + +struct cpu_hw_events { + int n_events; + int disabled; + u8 pmcs_enabled; + struct perf_event *event[MAX_HWEVENTS]; +}; +static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events); + +static struct fsl_emb_pmu *ppmu; + +/* Number of perf_events counting hardware events */ +static atomic_t num_events; +/* Used to avoid races in calling reserve/release_pmc_hardware */ +static DEFINE_MUTEX(pmc_reserve_mutex); + +/* + * If interrupts were soft-disabled when a PMU interrupt occurs, treat + * it as an NMI. + */ +static inline int perf_intr_is_nmi(struct pt_regs *regs) +{ +#ifdef __powerpc64__ + return !regs->softe; +#else + return 0; +#endif +} + +static void perf_event_interrupt(struct pt_regs *regs); + +/* + * Read one performance monitor counter (PMC). + */ +static unsigned long read_pmc(int idx) +{ + unsigned long val; + + switch (idx) { + case 0: + val = mfpmr(PMRN_PMC0); + break; + case 1: + val = mfpmr(PMRN_PMC1); + break; + case 2: + val = mfpmr(PMRN_PMC2); + break; + case 3: + val = mfpmr(PMRN_PMC3); + break; + default: + printk(KERN_ERR "oops trying to read PMC%d\n", idx); + val = 0; + } + return val; +} + +/* + * Write one PMC. + */ +static void write_pmc(int idx, unsigned long val) +{ + switch (idx) { + case 0: + mtpmr(PMRN_PMC0, val); + break; + case 1: + mtpmr(PMRN_PMC1, val); + break; + case 2: + mtpmr(PMRN_PMC2, val); + break; + case 3: + mtpmr(PMRN_PMC3, val); + break; + default: + printk(KERN_ERR "oops trying to write PMC%d\n", idx); + } + + isync(); +} + +/* + * Write one local control A register + */ +static void write_pmlca(int idx, unsigned long val) +{ + switch (idx) { + case 0: + mtpmr(PMRN_PMLCA0, val); + break; + case 1: + mtpmr(PMRN_PMLCA1, val); + break; + case 2: + mtpmr(PMRN_PMLCA2, val); + break; + case 3: + mtpmr(PMRN_PMLCA3, val); + break; + default: + printk(KERN_ERR "oops trying to write PMLCA%d\n", idx); + } + + isync(); +} + +/* + * Write one local control B register + */ +static void write_pmlcb(int idx, unsigned long val) +{ + switch (idx) { + case 0: + mtpmr(PMRN_PMLCB0, val); + break; + case 1: + mtpmr(PMRN_PMLCB1, val); + break; + case 2: + mtpmr(PMRN_PMLCB2, val); + break; + case 3: + mtpmr(PMRN_PMLCB3, val); + break; + default: + printk(KERN_ERR "oops trying to write PMLCB%d\n", idx); + } + + isync(); +} + +static void fsl_emb_pmu_read(struct perf_event *event) +{ + s64 val, delta, prev; + + /* + * Performance monitor interrupts come even when interrupts + * are soft-disabled, as long as interrupts are hard-enabled. + * Therefore we treat them like NMIs. + */ + do { + prev = atomic64_read(&event->hw.prev_count); + barrier(); + val = read_pmc(event->hw.idx); + } while (atomic64_cmpxchg(&event->hw.prev_count, prev, val) != prev); + + /* The counters are only 32 bits wide */ + delta = (val - prev) & 0xfffffffful; + atomic64_add(delta, &event->count); + atomic64_sub(delta, &event->hw.period_left); +} + +/* + * Disable all events to prevent PMU interrupts and to allow + * events to be added or removed. + */ +void hw_perf_disable(void) +{ + struct cpu_hw_events *cpuhw; + unsigned long flags; + + local_irq_save(flags); + cpuhw = &__get_cpu_var(cpu_hw_events); + + if (!cpuhw->disabled) { + cpuhw->disabled = 1; + + /* + * Check if we ever enabled the PMU on this cpu. + */ + if (!cpuhw->pmcs_enabled) { + ppc_enable_pmcs(); + cpuhw->pmcs_enabled = 1; + } + + if (atomic_read(&num_events)) { + /* + * Set the 'freeze all counters' bit, and disable + * interrupts. The barrier is to make sure the + * mtpmr has been executed and the PMU has frozen + * the events before we return. + */ + + mtpmr(PMRN_PMGC0, PMGC0_FAC); + isync(); + } + } + local_irq_restore(flags); +} + +/* + * Re-enable all events if disable == 0. + * If we were previously disabled and events were added, then + * put the new config on the PMU. + */ +void hw_perf_enable(void) +{ + struct cpu_hw_events *cpuhw; + unsigned long flags; + + local_irq_save(flags); + cpuhw = &__get_cpu_var(cpu_hw_events); + if (!cpuhw->disabled) + goto out; + + cpuhw->disabled = 0; + ppc_set_pmu_inuse(cpuhw->n_events != 0); + + if (cpuhw->n_events > 0) { + mtpmr(PMRN_PMGC0, PMGC0_PMIE | PMGC0_FCECE); + isync(); + } + + out: + local_irq_restore(flags); +} + +static int collect_events(struct perf_event *group, int max_count, + struct perf_event *ctrs[]) +{ + int n = 0; + struct perf_event *event; + + if (!is_software_event(group)) { + if (n >= max_count) + return -1; + ctrs[n] = group; + n++; + } + list_for_each_entry(event, &group->sibling_list, group_entry) { + if (!is_software_event(event) && + event->state != PERF_EVENT_STATE_OFF) { + if (n >= max_count) + return -1; + ctrs[n] = event; + n++; + } + } + return n; +} + +/* perf must be disabled, context locked on entry */ +static int fsl_emb_pmu_enable(struct perf_event *event) +{ + struct cpu_hw_events *cpuhw; + int ret = -EAGAIN; + int num_counters = ppmu->n_counter; + u64 val; + int i; + + cpuhw = &get_cpu_var(cpu_hw_events); + + if (event->hw.config & FSL_EMB_EVENT_RESTRICTED) + num_counters = ppmu->n_restricted; + + /* + * Allocate counters from top-down, so that restricted-capable + * counters are kept free as long as possible. + */ + for (i = num_counters - 1; i >= 0; i--) { + if (cpuhw->event[i]) + continue; + + break; + } + + if (i < 0) + goto out; + + event->hw.idx = i; + cpuhw->event[i] = event; + ++cpuhw->n_events; + + val = 0; + if (event->hw.sample_period) { + s64 left = atomic64_read(&event->hw.period_left); + if (left < 0x80000000L) + val = 0x80000000L - left; + } + atomic64_set(&event->hw.prev_count, val); + write_pmc(i, val); + perf_event_update_userpage(event); + + write_pmlcb(i, event->hw.config >> 32); + write_pmlca(i, event->hw.config_base); + + ret = 0; + out: + put_cpu_var(cpu_hw_events); + return ret; +} + +/* perf must be disabled, context locked on entry */ +static void fsl_emb_pmu_disable(struct perf_event *event) +{ + struct cpu_hw_events *cpuhw; + int i = event->hw.idx; + + if (i < 0) + goto out; + + fsl_emb_pmu_read(event); + + cpuhw = &get_cpu_var(cpu_hw_events); + + WARN_ON(event != cpuhw->event[event->hw.idx]); + + write_pmlca(i, 0); + write_pmlcb(i, 0); + write_pmc(i, 0); + + cpuhw->event[i] = NULL; + event->hw.idx = -1; + + /* + * TODO: if at least one restricted event exists, and we + * just freed up a non-restricted-capable counter, and + * there is a restricted-capable counter occupied by + * a non-restricted event, migrate that event to the + * vacated counter. + */ + + cpuhw->n_events--; + + out: + put_cpu_var(cpu_hw_events); +} + +/* + * Re-enable interrupts on a event after they were throttled + * because they were coming too fast. + * + * Context is locked on entry, but perf is not disabled. + */ +static void fsl_emb_pmu_unthrottle(struct perf_event *event) +{ + s64 val, left; + unsigned long flags; + + if (event->hw.idx < 0 || !event->hw.sample_period) + return; + local_irq_save(flags); + perf_disable(); + fsl_emb_pmu_read(event); + left = event->hw.sample_period; + event->hw.last_period = left; + val = 0; + if (left < 0x80000000L) + val = 0x80000000L - left; + write_pmc(event->hw.idx, val); + atomic64_set(&event->hw.prev_count, val); + atomic64_set(&event->hw.period_left, left); + perf_event_update_userpage(event); + perf_enable(); + local_irq_restore(flags); +} + +static struct pmu fsl_emb_pmu = { + .enable = fsl_emb_pmu_enable, + .disable = fsl_emb_pmu_disable, + .read = fsl_emb_pmu_read, + .unthrottle = fsl_emb_pmu_unthrottle, +}; + +/* + * Release the PMU if this is the last perf_event. + */ +static void hw_perf_event_destroy(struct perf_event *event) +{ + if (!atomic_add_unless(&num_events, -1, 1)) { + mutex_lock(&pmc_reserve_mutex); + if (atomic_dec_return(&num_events) == 0) + release_pmc_hardware(); + mutex_unlock(&pmc_reserve_mutex); + } +} + +/* + * Translate a generic cache event_id config to a raw event_id code. + */ +static int hw_perf_cache_event(u64 config, u64 *eventp) +{ + unsigned long type, op, result; + int ev; + + if (!ppmu->cache_events) + return -EINVAL; + + /* unpack config */ + type = config & 0xff; + op = (config >> 8) & 0xff; + result = (config >> 16) & 0xff; + + if (type >= PERF_COUNT_HW_CACHE_MAX || + op >= PERF_COUNT_HW_CACHE_OP_MAX || + result >= PERF_COUNT_HW_CACHE_RESULT_MAX) + return -EINVAL; + + ev = (*ppmu->cache_events)[type][op][result]; + if (ev == 0) + return -EOPNOTSUPP; + if (ev == -1) + return -EINVAL; + *eventp = ev; + return 0; +} + +const struct pmu *hw_perf_event_init(struct perf_event *event) +{ + u64 ev; + struct perf_event *events[MAX_HWEVENTS]; + int n; + int err; + int num_restricted; + int i; + + switch (event->attr.type) { + case PERF_TYPE_HARDWARE: + ev = event->attr.config; + if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0) + return ERR_PTR(-EOPNOTSUPP); + ev = ppmu->generic_events[ev]; + break; + + case PERF_TYPE_HW_CACHE: + err = hw_perf_cache_event(event->attr.config, &ev); + if (err) + return ERR_PTR(err); + break; + + case PERF_TYPE_RAW: + ev = event->attr.config; + break; + + default: + return ERR_PTR(-EINVAL); + } + + event->hw.config = ppmu->xlate_event(ev); + if (!(event->hw.config & FSL_EMB_EVENT_VALID)) + return ERR_PTR(-EINVAL); + + /* + * If this is in a group, check if it can go on with all the + * other hardware events in the group. We assume the event + * hasn't been linked into its leader's sibling list at this point. + */ + n = 0; + if (event->group_leader != event) { + n = collect_events(event->group_leader, + ppmu->n_counter - 1, events); + if (n < 0) + return ERR_PTR(-EINVAL); + } + + if (event->hw.config & FSL_EMB_EVENT_RESTRICTED) { + num_restricted = 0; + for (i = 0; i < n; i++) { + if (events[i]->hw.config & FSL_EMB_EVENT_RESTRICTED) + num_restricted++; + } + + if (num_restricted >= ppmu->n_restricted) + return ERR_PTR(-EINVAL); + } + + event->hw.idx = -1; + + event->hw.config_base = PMLCA_CE | PMLCA_FCM1 | + (u32)((ev << 16) & PMLCA_EVENT_MASK); + + if (event->attr.exclude_user) + event->hw.config_base |= PMLCA_FCU; + if (event->attr.exclude_kernel) + event->hw.config_base |= PMLCA_FCS; + if (event->attr.exclude_idle) + return ERR_PTR(-ENOTSUPP); + + event->hw.last_period = event->hw.sample_period; + atomic64_set(&event->hw.period_left, event->hw.last_period); + + /* + * See if we need to reserve the PMU. + * If no events are currently in use, then we have to take a + * mutex to ensure that we don't race with another task doing + * reserve_pmc_hardware or release_pmc_hardware. + */ + err = 0; + if (!atomic_inc_not_zero(&num_events)) { + mutex_lock(&pmc_reserve_mutex); + if (atomic_read(&num_events) == 0 && + reserve_pmc_hardware(perf_event_interrupt)) + err = -EBUSY; + else + atomic_inc(&num_events); + mutex_unlock(&pmc_reserve_mutex); + + mtpmr(PMRN_PMGC0, PMGC0_FAC); + isync(); + } + event->destroy = hw_perf_event_destroy; + + if (err) + return ERR_PTR(err); + return &fsl_emb_pmu; +} + +/* + * A counter has overflowed; update its count and record + * things if requested. Note that interrupts are hard-disabled + * here so there is no possibility of being interrupted. + */ +static void record_and_restart(struct perf_event *event, unsigned long val, + struct pt_regs *regs, int nmi) +{ + u64 period = event->hw.sample_period; + s64 prev, delta, left; + int record = 0; + + /* we don't have to worry about interrupts here */ + prev = atomic64_read(&event->hw.prev_count); + delta = (val - prev) & 0xfffffffful; + atomic64_add(delta, &event->count); + + /* + * See if the total period for this event has expired, + * and update for the next period. + */ + val = 0; + left = atomic64_read(&event->hw.period_left) - delta; + if (period) { + if (left <= 0) { + left += period; + if (left <= 0) + left = period; + record = 1; + } + if (left < 0x80000000LL) + val = 0x80000000LL - left; + } + + /* + * Finally record data if requested. + */ + if (record) { + struct perf_sample_data data = { + .period = event->hw.last_period, + }; + + if (perf_event_overflow(event, nmi, &data, regs)) { + /* + * Interrupts are coming too fast - throttle them + * by setting the event to 0, so it will be + * at least 2^30 cycles until the next interrupt + * (assuming each event counts at most 2 counts + * per cycle). + */ + val = 0; + left = ~0ULL >> 1; + } + } + + write_pmc(event->hw.idx, val); + atomic64_set(&event->hw.prev_count, val); + atomic64_set(&event->hw.period_left, left); + perf_event_update_userpage(event); +} + +static void perf_event_interrupt(struct pt_regs *regs) +{ + int i; + struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + struct perf_event *event; + unsigned long val; + int found = 0; + int nmi; + + nmi = perf_intr_is_nmi(regs); + if (nmi) + nmi_enter(); + else + irq_enter(); + + for (i = 0; i < ppmu->n_counter; ++i) { + event = cpuhw->event[i]; + + val = read_pmc(i); + if ((int)val < 0) { + if (event) { + /* event has overflowed */ + found = 1; + record_and_restart(event, val, regs, nmi); + } else { + /* + * Disabled counter is negative, + * reset it just in case. + */ + write_pmc(i, 0); + } + } + } + + /* PMM will keep counters frozen until we return from the interrupt. */ + mtmsr(mfmsr() | MSR_PMM); + mtpmr(PMRN_PMGC0, PMGC0_PMIE | PMGC0_FCECE); + isync(); + + if (nmi) + nmi_exit(); + else + irq_exit(); +} + +void hw_perf_event_setup(int cpu) +{ + struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu); + + memset(cpuhw, 0, sizeof(*cpuhw)); +} + +int register_fsl_emb_pmu(struct fsl_emb_pmu *pmu) +{ + if (ppmu) + return -EBUSY; /* something's already registered */ + + ppmu = pmu; + pr_info("%s performance monitor hardware support registered\n", + pmu->name); + + return 0; +} diff --git a/arch/powerpc/kernel/pmc.c b/arch/powerpc/kernel/pmc.c index 0516e2d3e02..461499b43cf 100644 --- a/arch/powerpc/kernel/pmc.c +++ b/arch/powerpc/kernel/pmc.c @@ -37,7 +37,7 @@ static void dummy_perf(struct pt_regs *regs) } -static DEFINE_SPINLOCK(pmc_owner_lock); +static DEFINE_RAW_SPINLOCK(pmc_owner_lock); static void *pmc_owner_caller; /* mostly for debugging */ perf_irq_t perf_irq = dummy_perf; @@ -45,7 +45,7 @@ int reserve_pmc_hardware(perf_irq_t new_perf_irq) { int err = 0; - spin_lock(&pmc_owner_lock); + raw_spin_lock(&pmc_owner_lock); if (pmc_owner_caller) { printk(KERN_WARNING "reserve_pmc_hardware: " @@ -59,21 +59,21 @@ int reserve_pmc_hardware(perf_irq_t new_perf_irq) perf_irq = new_perf_irq ? new_perf_irq : dummy_perf; out: - spin_unlock(&pmc_owner_lock); + raw_spin_unlock(&pmc_owner_lock); return err; } EXPORT_SYMBOL_GPL(reserve_pmc_hardware); void release_pmc_hardware(void) { - spin_lock(&pmc_owner_lock); + raw_spin_lock(&pmc_owner_lock); WARN_ON(! pmc_owner_caller); pmc_owner_caller = NULL; perf_irq = dummy_perf; - spin_unlock(&pmc_owner_lock); + raw_spin_unlock(&pmc_owner_lock); } EXPORT_SYMBOL_GPL(release_pmc_hardware); diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c index 425451453e9..ab3e392ac63 100644 --- a/arch/powerpc/kernel/ppc_ksyms.c +++ b/arch/powerpc/kernel/ppc_ksyms.c @@ -107,6 +107,7 @@ EXPORT_SYMBOL(giveup_altivec); #endif /* CONFIG_ALTIVEC */ #ifdef CONFIG_VSX EXPORT_SYMBOL(giveup_vsx); +EXPORT_SYMBOL_GPL(__giveup_vsx); #endif /* CONFIG_VSX */ #ifdef CONFIG_SPE EXPORT_SYMBOL(giveup_spe); diff --git a/arch/powerpc/kernel/proc_powerpc.c b/arch/powerpc/kernel/proc_powerpc.c index 1ed3b8d7981..c8ae3714e79 100644 --- a/arch/powerpc/kernel/proc_powerpc.c +++ b/arch/powerpc/kernel/proc_powerpc.c @@ -19,7 +19,6 @@ #include <linux/init.h> #include <linux/mm.h> #include <linux/proc_fs.h> -#include <linux/slab.h> #include <linux/kernel.h> #include <asm/machdep.h> diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index c930ac38e59..e4d71ced97e 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -245,6 +245,24 @@ void discard_lazy_cpu_state(void) } #endif /* CONFIG_SMP */ +#ifdef CONFIG_PPC_ADV_DEBUG_REGS +void do_send_trap(struct pt_regs *regs, unsigned long address, + unsigned long error_code, int signal_code, int breakpt) +{ + siginfo_t info; + + if (notify_die(DIE_DABR_MATCH, "dabr_match", regs, error_code, + 11, SIGSEGV) == NOTIFY_STOP) + return; + + /* Deliver the signal to userspace */ + info.si_signo = SIGTRAP; + info.si_errno = breakpt; /* breakpoint or watchpoint id */ + info.si_code = signal_code; + info.si_addr = (void __user *)address; + force_sig_info(SIGTRAP, &info, current); +} +#else /* !CONFIG_PPC_ADV_DEBUG_REGS */ void do_dabr(struct pt_regs *regs, unsigned long address, unsigned long error_code) { @@ -257,12 +275,6 @@ void do_dabr(struct pt_regs *regs, unsigned long address, if (debugger_dabr_match(regs)) return; - /* Clear the DAC and struct entries. One shot trigger */ -#if defined(CONFIG_BOOKE) - mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) & ~(DBSR_DAC1R | DBSR_DAC1W - | DBCR0_IDM)); -#endif - /* Clear the DABR */ set_dabr(0); @@ -273,9 +285,82 @@ void do_dabr(struct pt_regs *regs, unsigned long address, info.si_addr = (void __user *)address; force_sig_info(SIGTRAP, &info, current); } +#endif /* CONFIG_PPC_ADV_DEBUG_REGS */ static DEFINE_PER_CPU(unsigned long, current_dabr); +#ifdef CONFIG_PPC_ADV_DEBUG_REGS +/* + * Set the debug registers back to their default "safe" values. + */ +static void set_debug_reg_defaults(struct thread_struct *thread) +{ + thread->iac1 = thread->iac2 = 0; +#if CONFIG_PPC_ADV_DEBUG_IACS > 2 + thread->iac3 = thread->iac4 = 0; +#endif + thread->dac1 = thread->dac2 = 0; +#if CONFIG_PPC_ADV_DEBUG_DVCS > 0 + thread->dvc1 = thread->dvc2 = 0; +#endif + thread->dbcr0 = 0; +#ifdef CONFIG_BOOKE + /* + * Force User/Supervisor bits to b11 (user-only MSR[PR]=1) + */ + thread->dbcr1 = DBCR1_IAC1US | DBCR1_IAC2US | \ + DBCR1_IAC3US | DBCR1_IAC4US; + /* + * Force Data Address Compare User/Supervisor bits to be User-only + * (0b11 MSR[PR]=1) and set all other bits in DBCR2 register to be 0. + */ + thread->dbcr2 = DBCR2_DAC1US | DBCR2_DAC2US; +#else + thread->dbcr1 = 0; +#endif +} + +static void prime_debug_regs(struct thread_struct *thread) +{ + mtspr(SPRN_IAC1, thread->iac1); + mtspr(SPRN_IAC2, thread->iac2); +#if CONFIG_PPC_ADV_DEBUG_IACS > 2 + mtspr(SPRN_IAC3, thread->iac3); + mtspr(SPRN_IAC4, thread->iac4); +#endif + mtspr(SPRN_DAC1, thread->dac1); + mtspr(SPRN_DAC2, thread->dac2); +#if CONFIG_PPC_ADV_DEBUG_DVCS > 0 + mtspr(SPRN_DVC1, thread->dvc1); + mtspr(SPRN_DVC2, thread->dvc2); +#endif + mtspr(SPRN_DBCR0, thread->dbcr0); + mtspr(SPRN_DBCR1, thread->dbcr1); +#ifdef CONFIG_BOOKE + mtspr(SPRN_DBCR2, thread->dbcr2); +#endif +} +/* + * Unless neither the old or new thread are making use of the + * debug registers, set the debug registers from the values + * stored in the new thread. + */ +static void switch_booke_debug_regs(struct thread_struct *new_thread) +{ + if ((current->thread.dbcr0 & DBCR0_IDM) + || (new_thread->dbcr0 & DBCR0_IDM)) + prime_debug_regs(new_thread); +} +#else /* !CONFIG_PPC_ADV_DEBUG_REGS */ +static void set_debug_reg_defaults(struct thread_struct *thread) +{ + if (thread->dabr) { + thread->dabr = 0; + set_dabr(0); + } +} +#endif /* CONFIG_PPC_ADV_DEBUG_REGS */ + int set_dabr(unsigned long dabr) { __get_cpu_var(current_dabr) = dabr; @@ -284,7 +369,7 @@ int set_dabr(unsigned long dabr) return ppc_md.set_dabr(dabr); /* XXX should we have a CPU_FTR_HAS_DABR ? */ -#if defined(CONFIG_BOOKE) +#ifdef CONFIG_PPC_ADV_DEBUG_REGS mtspr(SPRN_DAC1, dabr); #elif defined(CONFIG_PPC_BOOK3S) mtspr(SPRN_DABR, dabr); @@ -371,10 +456,8 @@ struct task_struct *__switch_to(struct task_struct *prev, #endif /* CONFIG_SMP */ -#if defined(CONFIG_BOOKE) - /* If new thread DAC (HW breakpoint) is the same then leave it */ - if (new->thread.dabr) - set_dabr(new->thread.dabr); +#ifdef CONFIG_PPC_ADV_DEBUG_REGS + switch_booke_debug_regs(&new->thread); #else if (unlikely(__get_cpu_var(current_dabr) != new->thread.dabr)) set_dabr(new->thread.dabr); @@ -514,7 +597,7 @@ void show_regs(struct pt_regs * regs) printk(" CR: %08lx XER: %08lx\n", regs->ccr, regs->xer); trap = TRAP(regs); if (trap == 0x300 || trap == 0x600) -#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE) +#ifdef CONFIG_PPC_ADV_DEBUG_REGS printk("DEAR: "REG", ESR: "REG"\n", regs->dar, regs->dsisr); #else printk("DAR: "REG", DSISR: "REG"\n", regs->dar, regs->dsisr); @@ -554,28 +637,9 @@ void exit_thread(void) void flush_thread(void) { -#ifdef CONFIG_PPC64 - struct thread_info *t = current_thread_info(); - - if (test_ti_thread_flag(t, TIF_ABI_PENDING)) { - clear_ti_thread_flag(t, TIF_ABI_PENDING); - if (test_ti_thread_flag(t, TIF_32BIT)) - clear_ti_thread_flag(t, TIF_32BIT); - else - set_ti_thread_flag(t, TIF_32BIT); - } -#endif - discard_lazy_cpu_state(); - if (current->thread.dabr) { - current->thread.dabr = 0; - set_dabr(0); - -#if defined(CONFIG_BOOKE) - current->thread.dbcr0 &= ~(DBSR_DAC1R | DBSR_DAC1W); -#endif - } + set_debug_reg_defaults(¤t->thread); } void diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 4ec30086246..05131d634e7 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -43,6 +43,7 @@ #include <asm/smp.h> #include <asm/system.h> #include <asm/mmu.h> +#include <asm/paca.h> #include <asm/pgtable.h> #include <asm/pci.h> #include <asm/iommu.h> @@ -61,365 +62,12 @@ #define DBG(fmt...) #endif - -static int __initdata dt_root_addr_cells; -static int __initdata dt_root_size_cells; - #ifdef CONFIG_PPC64 int __initdata iommu_is_off; int __initdata iommu_force_on; unsigned long tce_alloc_start, tce_alloc_end; #endif -typedef u32 cell_t; - -#if 0 -static struct boot_param_header *initial_boot_params __initdata; -#else -struct boot_param_header *initial_boot_params; -#endif - -extern struct device_node *allnodes; /* temporary while merging */ - -extern rwlock_t devtree_lock; /* temporary while merging */ - -/* export that to outside world */ -struct device_node *of_chosen; - -static inline char *find_flat_dt_string(u32 offset) -{ - return ((char *)initial_boot_params) + - initial_boot_params->off_dt_strings + offset; -} - -/** - * This function is used to scan the flattened device-tree, it is - * used to extract the memory informations at boot before we can - * unflatten the tree - */ -int __init of_scan_flat_dt(int (*it)(unsigned long node, - const char *uname, int depth, - void *data), - void *data) -{ - unsigned long p = ((unsigned long)initial_boot_params) + - initial_boot_params->off_dt_struct; - int rc = 0; - int depth = -1; - - do { - u32 tag = *((u32 *)p); - char *pathp; - - p += 4; - if (tag == OF_DT_END_NODE) { - depth --; - continue; - } - if (tag == OF_DT_NOP) - continue; - if (tag == OF_DT_END) - break; - if (tag == OF_DT_PROP) { - u32 sz = *((u32 *)p); - p += 8; - if (initial_boot_params->version < 0x10) - p = _ALIGN(p, sz >= 8 ? 8 : 4); - p += sz; - p = _ALIGN(p, 4); - continue; - } - if (tag != OF_DT_BEGIN_NODE) { - printk(KERN_WARNING "Invalid tag %x scanning flattened" - " device tree !\n", tag); - return -EINVAL; - } - depth++; - pathp = (char *)p; - p = _ALIGN(p + strlen(pathp) + 1, 4); - if ((*pathp) == '/') { - char *lp, *np; - for (lp = NULL, np = pathp; *np; np++) - if ((*np) == '/') - lp = np+1; - if (lp != NULL) - pathp = lp; - } - rc = it(p, pathp, depth, data); - if (rc != 0) - break; - } while(1); - - return rc; -} - -unsigned long __init of_get_flat_dt_root(void) -{ - unsigned long p = ((unsigned long)initial_boot_params) + - initial_boot_params->off_dt_struct; - - while(*((u32 *)p) == OF_DT_NOP) - p += 4; - BUG_ON (*((u32 *)p) != OF_DT_BEGIN_NODE); - p += 4; - return _ALIGN(p + strlen((char *)p) + 1, 4); -} - -/** - * This function can be used within scan_flattened_dt callback to get - * access to properties - */ -void* __init of_get_flat_dt_prop(unsigned long node, const char *name, - unsigned long *size) -{ - unsigned long p = node; - - do { - u32 tag = *((u32 *)p); - u32 sz, noff; - const char *nstr; - - p += 4; - if (tag == OF_DT_NOP) - continue; - if (tag != OF_DT_PROP) - return NULL; - - sz = *((u32 *)p); - noff = *((u32 *)(p + 4)); - p += 8; - if (initial_boot_params->version < 0x10) - p = _ALIGN(p, sz >= 8 ? 8 : 4); - - nstr = find_flat_dt_string(noff); - if (nstr == NULL) { - printk(KERN_WARNING "Can't find property index" - " name !\n"); - return NULL; - } - if (strcmp(name, nstr) == 0) { - if (size) - *size = sz; - return (void *)p; - } - p += sz; - p = _ALIGN(p, 4); - } while(1); -} - -int __init of_flat_dt_is_compatible(unsigned long node, const char *compat) -{ - const char* cp; - unsigned long cplen, l; - - cp = of_get_flat_dt_prop(node, "compatible", &cplen); - if (cp == NULL) - return 0; - while (cplen > 0) { - if (strncasecmp(cp, compat, strlen(compat)) == 0) - return 1; - l = strlen(cp) + 1; - cp += l; - cplen -= l; - } - - return 0; -} - -static void *__init unflatten_dt_alloc(unsigned long *mem, unsigned long size, - unsigned long align) -{ - void *res; - - *mem = _ALIGN(*mem, align); - res = (void *)*mem; - *mem += size; - - return res; -} - -static unsigned long __init unflatten_dt_node(unsigned long mem, - unsigned long *p, - struct device_node *dad, - struct device_node ***allnextpp, - unsigned long fpsize) -{ - struct device_node *np; - struct property *pp, **prev_pp = NULL; - char *pathp; - u32 tag; - unsigned int l, allocl; - int has_name = 0; - int new_format = 0; - - tag = *((u32 *)(*p)); - if (tag != OF_DT_BEGIN_NODE) { - printk("Weird tag at start of node: %x\n", tag); - return mem; - } - *p += 4; - pathp = (char *)*p; - l = allocl = strlen(pathp) + 1; - *p = _ALIGN(*p + l, 4); - - /* version 0x10 has a more compact unit name here instead of the full - * path. we accumulate the full path size using "fpsize", we'll rebuild - * it later. We detect this because the first character of the name is - * not '/'. - */ - if ((*pathp) != '/') { - new_format = 1; - if (fpsize == 0) { - /* root node: special case. fpsize accounts for path - * plus terminating zero. root node only has '/', so - * fpsize should be 2, but we want to avoid the first - * level nodes to have two '/' so we use fpsize 1 here - */ - fpsize = 1; - allocl = 2; - } else { - /* account for '/' and path size minus terminal 0 - * already in 'l' - */ - fpsize += l; - allocl = fpsize; - } - } - - - np = unflatten_dt_alloc(&mem, sizeof(struct device_node) + allocl, - __alignof__(struct device_node)); - if (allnextpp) { - memset(np, 0, sizeof(*np)); - np->full_name = ((char*)np) + sizeof(struct device_node); - if (new_format) { - char *p = np->full_name; - /* rebuild full path for new format */ - if (dad && dad->parent) { - strcpy(p, dad->full_name); -#ifdef DEBUG - if ((strlen(p) + l + 1) != allocl) { - DBG("%s: p: %d, l: %d, a: %d\n", - pathp, (int)strlen(p), l, allocl); - } -#endif - p += strlen(p); - } - *(p++) = '/'; - memcpy(p, pathp, l); - } else - memcpy(np->full_name, pathp, l); - prev_pp = &np->properties; - **allnextpp = np; - *allnextpp = &np->allnext; - if (dad != NULL) { - np->parent = dad; - /* we temporarily use the next field as `last_child'*/ - if (dad->next == 0) - dad->child = np; - else - dad->next->sibling = np; - dad->next = np; - } - kref_init(&np->kref); - } - while(1) { - u32 sz, noff; - char *pname; - - tag = *((u32 *)(*p)); - if (tag == OF_DT_NOP) { - *p += 4; - continue; - } - if (tag != OF_DT_PROP) - break; - *p += 4; - sz = *((u32 *)(*p)); - noff = *((u32 *)((*p) + 4)); - *p += 8; - if (initial_boot_params->version < 0x10) - *p = _ALIGN(*p, sz >= 8 ? 8 : 4); - - pname = find_flat_dt_string(noff); - if (pname == NULL) { - printk("Can't find property name in list !\n"); - break; - } - if (strcmp(pname, "name") == 0) - has_name = 1; - l = strlen(pname) + 1; - pp = unflatten_dt_alloc(&mem, sizeof(struct property), - __alignof__(struct property)); - if (allnextpp) { - if (strcmp(pname, "linux,phandle") == 0) { - np->node = *((u32 *)*p); - if (np->linux_phandle == 0) - np->linux_phandle = np->node; - } - if (strcmp(pname, "ibm,phandle") == 0) - np->linux_phandle = *((u32 *)*p); - pp->name = pname; - pp->length = sz; - pp->value = (void *)*p; - *prev_pp = pp; - prev_pp = &pp->next; - } - *p = _ALIGN((*p) + sz, 4); - } - /* with version 0x10 we may not have the name property, recreate - * it here from the unit name if absent - */ - if (!has_name) { - char *p = pathp, *ps = pathp, *pa = NULL; - int sz; - - while (*p) { - if ((*p) == '@') - pa = p; - if ((*p) == '/') - ps = p + 1; - p++; - } - if (pa < ps) - pa = p; - sz = (pa - ps) + 1; - pp = unflatten_dt_alloc(&mem, sizeof(struct property) + sz, - __alignof__(struct property)); - if (allnextpp) { - pp->name = "name"; - pp->length = sz; - pp->value = pp + 1; - *prev_pp = pp; - prev_pp = &pp->next; - memcpy(pp->value, ps, sz - 1); - ((char *)pp->value)[sz - 1] = 0; - DBG("fixed up name for %s -> %s\n", pathp, - (char *)pp->value); - } - } - if (allnextpp) { - *prev_pp = NULL; - np->name = of_get_property(np, "name", NULL); - np->type = of_get_property(np, "device_type", NULL); - - if (!np->name) - np->name = "<NULL>"; - if (!np->type) - np->type = "<NULL>"; - } - while (tag == OF_DT_BEGIN_NODE) { - mem = unflatten_dt_node(mem, p, np, allnextpp, fpsize); - tag = *((u32 *)(*p)); - } - if (tag != OF_DT_END_NODE) { - printk("Weird tag at end of node: %x\n", tag); - return mem; - } - *p += 4; - return mem; -} - static int __init early_parse_mem(char *p) { if (!p) @@ -446,7 +94,7 @@ static void __init move_device_tree(void) DBG("-> move_device_tree\n"); start = __pa(initial_boot_params); - size = initial_boot_params->totalsize; + size = be32_to_cpu(initial_boot_params->totalsize); if ((memory_limit && (start + size) > memory_limit) || overlaps_crashkernel(start, size)) { @@ -459,54 +107,6 @@ static void __init move_device_tree(void) DBG("<- move_device_tree\n"); } -/** - * unflattens the device-tree passed by the firmware, creating the - * tree of struct device_node. It also fills the "name" and "type" - * pointers of the nodes so the normal device-tree walking functions - * can be used (this used to be done by finish_device_tree) - */ -void __init unflatten_device_tree(void) -{ - unsigned long start, mem, size; - struct device_node **allnextp = &allnodes; - - DBG(" -> unflatten_device_tree()\n"); - - /* First pass, scan for size */ - start = ((unsigned long)initial_boot_params) + - initial_boot_params->off_dt_struct; - size = unflatten_dt_node(0, &start, NULL, NULL, 0); - size = (size | 3) + 1; - - DBG(" size is %lx, allocating...\n", size); - - /* Allocate memory for the expanded device tree */ - mem = lmb_alloc(size + 4, __alignof__(struct device_node)); - mem = (unsigned long) __va(mem); - - ((u32 *)mem)[size / 4] = 0xdeadbeef; - - DBG(" unflattening %lx...\n", mem); - - /* Second pass, do actual unflattening */ - start = ((unsigned long)initial_boot_params) + - initial_boot_params->off_dt_struct; - unflatten_dt_node(mem, &start, NULL, &allnextp, 0); - if (*((u32 *)start) != OF_DT_END) - printk(KERN_WARNING "Weird tag at end of tree: %08x\n", *((u32 *)start)); - if (((u32 *)mem)[size / 4] != 0xdeadbeef) - printk(KERN_WARNING "End of tree marker overwritten: %08x\n", - ((u32 *)mem)[size / 4] ); - *allnextp = NULL; - - /* Get pointer to OF "/chosen" node for use everywhere */ - of_chosen = of_find_node_by_path("/chosen"); - if (of_chosen == NULL) - of_chosen = of_find_node_by_path("/chosen@0"); - - DBG(" <- unflatten_device_tree()\n"); -} - /* * ibm,pa-features is a per-cpu property that contains a string of * attribute descriptors, each of which has a 2 byte header plus up @@ -763,48 +363,9 @@ static int __init early_init_dt_scan_cpus(unsigned long node, return 0; } -#ifdef CONFIG_BLK_DEV_INITRD -static void __init early_init_dt_check_for_initrd(unsigned long node) -{ - unsigned long l; - u32 *prop; - - DBG("Looking for initrd properties... "); - - prop = of_get_flat_dt_prop(node, "linux,initrd-start", &l); - if (prop) { - initrd_start = (unsigned long)__va(of_read_ulong(prop, l/4)); - - prop = of_get_flat_dt_prop(node, "linux,initrd-end", &l); - if (prop) { - initrd_end = (unsigned long) - __va(of_read_ulong(prop, l/4)); - initrd_below_start_ok = 1; - } else { - initrd_start = 0; - } - } - - DBG("initrd_start=0x%lx initrd_end=0x%lx\n", initrd_start, initrd_end); -} -#else -static inline void early_init_dt_check_for_initrd(unsigned long node) -{ -} -#endif /* CONFIG_BLK_DEV_INITRD */ - -static int __init early_init_dt_scan_chosen(unsigned long node, - const char *uname, int depth, void *data) +void __init early_init_dt_scan_chosen_arch(unsigned long node) { unsigned long *lprop; - unsigned long l; - char *p; - - DBG("search \"chosen\", depth: %d, uname: %s\n", depth, uname); - - if (depth != 1 || - (strcmp(uname, "chosen") != 0 && strcmp(uname, "chosen@0") != 0)) - return 0; #ifdef CONFIG_PPC64 /* check if iommu is forced on or off */ @@ -815,17 +376,17 @@ static int __init early_init_dt_scan_chosen(unsigned long node, #endif /* mem=x on the command line is the preferred mechanism */ - lprop = of_get_flat_dt_prop(node, "linux,memory-limit", NULL); - if (lprop) - memory_limit = *lprop; + lprop = of_get_flat_dt_prop(node, "linux,memory-limit", NULL); + if (lprop) + memory_limit = *lprop; #ifdef CONFIG_PPC64 - lprop = of_get_flat_dt_prop(node, "linux,tce-alloc-start", NULL); - if (lprop) - tce_alloc_start = *lprop; - lprop = of_get_flat_dt_prop(node, "linux,tce-alloc-end", NULL); - if (lprop) - tce_alloc_end = *lprop; + lprop = of_get_flat_dt_prop(node, "linux,tce-alloc-start", NULL); + if (lprop) + tce_alloc_start = *lprop; + lprop = of_get_flat_dt_prop(node, "linux,tce-alloc-end", NULL); + if (lprop) + tce_alloc_end = *lprop; #endif #ifdef CONFIG_KEXEC @@ -837,51 +398,6 @@ static int __init early_init_dt_scan_chosen(unsigned long node, if (lprop) crashk_res.end = crashk_res.start + *lprop - 1; #endif - - early_init_dt_check_for_initrd(node); - - /* Retreive command line */ - p = of_get_flat_dt_prop(node, "bootargs", &l); - if (p != NULL && l > 0) - strlcpy(cmd_line, p, min((int)l, COMMAND_LINE_SIZE)); - -#ifdef CONFIG_CMDLINE - if (p == NULL || l == 0 || (l == 1 && (*p) == 0)) - strlcpy(cmd_line, CONFIG_CMDLINE, COMMAND_LINE_SIZE); -#endif /* CONFIG_CMDLINE */ - - DBG("Command line is: %s\n", cmd_line); - - /* break now */ - return 1; -} - -static int __init early_init_dt_scan_root(unsigned long node, - const char *uname, int depth, void *data) -{ - u32 *prop; - - if (depth != 0) - return 0; - - prop = of_get_flat_dt_prop(node, "#size-cells", NULL); - dt_root_size_cells = (prop == NULL) ? 1 : *prop; - DBG("dt_root_size_cells = %x\n", dt_root_size_cells); - - prop = of_get_flat_dt_prop(node, "#address-cells", NULL); - dt_root_addr_cells = (prop == NULL) ? 2 : *prop; - DBG("dt_root_addr_cells = %x\n", dt_root_addr_cells); - - /* break now */ - return 1; -} - -static u64 __init dt_mem_next_cell(int s, cell_t **cellp) -{ - cell_t *p = *cellp; - - *cellp = p + s; - return of_read_number(p, s); } #ifdef CONFIG_PPC_PSERIES @@ -893,22 +409,22 @@ static u64 __init dt_mem_next_cell(int s, cell_t **cellp) */ static int __init early_init_dt_scan_drconf_memory(unsigned long node) { - cell_t *dm, *ls, *usm; + __be32 *dm, *ls, *usm; unsigned long l, n, flags; u64 base, size, lmb_size; unsigned int is_kexec_kdump = 0, rngs; ls = of_get_flat_dt_prop(node, "ibm,lmb-size", &l); - if (ls == NULL || l < dt_root_size_cells * sizeof(cell_t)) + if (ls == NULL || l < dt_root_size_cells * sizeof(__be32)) return 0; lmb_size = dt_mem_next_cell(dt_root_size_cells, &ls); dm = of_get_flat_dt_prop(node, "ibm,dynamic-memory", &l); - if (dm == NULL || l < sizeof(cell_t)) + if (dm == NULL || l < sizeof(__be32)) return 0; n = *dm++; /* number of entries */ - if (l < (n * (dt_root_addr_cells + 4) + 1) * sizeof(cell_t)) + if (l < (n * (dt_root_addr_cells + 4) + 1) * sizeof(__be32)) return 0; /* check if this is a kexec/kdump kernel. */ @@ -963,65 +479,47 @@ static int __init early_init_dt_scan_drconf_memory(unsigned long node) #define early_init_dt_scan_drconf_memory(node) 0 #endif /* CONFIG_PPC_PSERIES */ -static int __init early_init_dt_scan_memory(unsigned long node, - const char *uname, int depth, void *data) +static int __init early_init_dt_scan_memory_ppc(unsigned long node, + const char *uname, + int depth, void *data) { - char *type = of_get_flat_dt_prop(node, "device_type", NULL); - cell_t *reg, *endp; - unsigned long l; - - /* Look for the ibm,dynamic-reconfiguration-memory node */ if (depth == 1 && strcmp(uname, "ibm,dynamic-reconfiguration-memory") == 0) return early_init_dt_scan_drconf_memory(node); + + return early_init_dt_scan_memory(node, uname, depth, data); +} - /* We are scanning "memory" nodes only */ - if (type == NULL) { - /* - * The longtrail doesn't have a device_type on the - * /memory node, so look for the node called /memory@0. - */ - if (depth != 1 || strcmp(uname, "memory@0") != 0) - return 0; - } else if (strcmp(type, "memory") != 0) - return 0; - - reg = of_get_flat_dt_prop(node, "linux,usable-memory", &l); - if (reg == NULL) - reg = of_get_flat_dt_prop(node, "reg", &l); - if (reg == NULL) - return 0; - - endp = reg + (l / sizeof(cell_t)); - - DBG("memory scan node %s, reg size %ld, data: %x %x %x %x,\n", - uname, l, reg[0], reg[1], reg[2], reg[3]); - - while ((endp - reg) >= (dt_root_addr_cells + dt_root_size_cells)) { - u64 base, size; +void __init early_init_dt_add_memory_arch(u64 base, u64 size) +{ +#if defined(CONFIG_PPC64) + if (iommu_is_off) { + if (base >= 0x80000000ul) + return; + if ((base + size) > 0x80000000ul) + size = 0x80000000ul - base; + } +#endif - base = dt_mem_next_cell(dt_root_addr_cells, ®); - size = dt_mem_next_cell(dt_root_size_cells, ®); + lmb_add(base, size); - if (size == 0) - continue; - DBG(" - %llx , %llx\n", (unsigned long long)base, - (unsigned long long)size); -#ifdef CONFIG_PPC64 - if (iommu_is_off) { - if (base >= 0x80000000ul) - continue; - if ((base + size) > 0x80000000ul) - size = 0x80000000ul - base; - } -#endif - lmb_add(base, size); + memstart_addr = min((u64)memstart_addr, base); +} - memstart_addr = min((u64)memstart_addr, base); - } +u64 __init early_init_dt_alloc_memory_arch(u64 size, u64 align) +{ + return lmb_alloc(size, align); +} - return 0; +#ifdef CONFIG_BLK_DEV_INITRD +void __init early_init_dt_setup_initrd_arch(unsigned long start, + unsigned long end) +{ + initrd_start = (unsigned long)__va(start); + initrd_end = (unsigned long)__va(end); + initrd_below_start_ok = 1; } +#endif static void __init early_reserve_mem(void) { @@ -1186,7 +684,7 @@ void __init early_init_devtree(void *params) /* Scan memory nodes and rebuild LMBs */ lmb_init(); of_scan_flat_dt(early_init_dt_scan_root, NULL); - of_scan_flat_dt(early_init_dt_scan_memory, NULL); + of_scan_flat_dt(early_init_dt_scan_memory_ppc, NULL); /* Save command line for /proc/cmdline and then parse parameters */ strlcpy(boot_command_line, cmd_line, COMMAND_LINE_SIZE); @@ -1224,6 +722,8 @@ void __init early_init_devtree(void *params) * FIXME .. and the initrd too? */ move_device_tree(); + allocate_pacas(); + DBG("Scanning CPUs ...\n"); /* Retreive CPU related informations from the flat tree @@ -1234,25 +734,6 @@ void __init early_init_devtree(void *params) DBG(" <- early_init_devtree()\n"); } - -/** - * Indicates whether the root node has a given value in its - * compatible property. - */ -int machine_is_compatible(const char *compat) -{ - struct device_node *root; - int rc = 0; - - root = of_find_node_by_path("/"); - if (root) { - rc = of_device_is_compatible(root, compat); - of_node_put(root); - } - return rc; -} -EXPORT_SYMBOL(machine_is_compatible); - /******* * * New implementation of the OF "find" APIs, return a refcounted @@ -1265,27 +746,6 @@ EXPORT_SYMBOL(machine_is_compatible); *******/ /** - * of_find_node_by_phandle - Find a node given a phandle - * @handle: phandle of the node to find - * - * Returns a node pointer with refcount incremented, use - * of_node_put() on it when done. - */ -struct device_node *of_find_node_by_phandle(phandle handle) -{ - struct device_node *np; - - read_lock(&devtree_lock); - for (np = allnodes; np != 0; np = np->allnext) - if (np->linux_phandle == handle) - break; - of_node_get(np); - read_unlock(&devtree_lock); - return np; -} -EXPORT_SYMBOL(of_find_node_by_phandle); - -/** * of_find_next_cache_node - Find a node's subsidiary cache * @np: node of type "cpu" or "cache" * @@ -1316,138 +776,6 @@ struct device_node *of_find_next_cache_node(struct device_node *np) return NULL; } -/** - * of_node_get - Increment refcount of a node - * @node: Node to inc refcount, NULL is supported to - * simplify writing of callers - * - * Returns node. - */ -struct device_node *of_node_get(struct device_node *node) -{ - if (node) - kref_get(&node->kref); - return node; -} -EXPORT_SYMBOL(of_node_get); - -static inline struct device_node * kref_to_device_node(struct kref *kref) -{ - return container_of(kref, struct device_node, kref); -} - -/** - * of_node_release - release a dynamically allocated node - * @kref: kref element of the node to be released - * - * In of_node_put() this function is passed to kref_put() - * as the destructor. - */ -static void of_node_release(struct kref *kref) -{ - struct device_node *node = kref_to_device_node(kref); - struct property *prop = node->properties; - - /* We should never be releasing nodes that haven't been detached. */ - if (!of_node_check_flag(node, OF_DETACHED)) { - printk("WARNING: Bad of_node_put() on %s\n", node->full_name); - dump_stack(); - kref_init(&node->kref); - return; - } - - if (!of_node_check_flag(node, OF_DYNAMIC)) - return; - - while (prop) { - struct property *next = prop->next; - kfree(prop->name); - kfree(prop->value); - kfree(prop); - prop = next; - - if (!prop) { - prop = node->deadprops; - node->deadprops = NULL; - } - } - kfree(node->full_name); - kfree(node->data); - kfree(node); -} - -/** - * of_node_put - Decrement refcount of a node - * @node: Node to dec refcount, NULL is supported to - * simplify writing of callers - * - */ -void of_node_put(struct device_node *node) -{ - if (node) - kref_put(&node->kref, of_node_release); -} -EXPORT_SYMBOL(of_node_put); - -/* - * Plug a device node into the tree and global list. - */ -void of_attach_node(struct device_node *np) -{ - unsigned long flags; - - write_lock_irqsave(&devtree_lock, flags); - np->sibling = np->parent->child; - np->allnext = allnodes; - np->parent->child = np; - allnodes = np; - write_unlock_irqrestore(&devtree_lock, flags); -} - -/* - * "Unplug" a node from the device tree. The caller must hold - * a reference to the node. The memory associated with the node - * is not freed until its refcount goes to zero. - */ -void of_detach_node(struct device_node *np) -{ - struct device_node *parent; - unsigned long flags; - - write_lock_irqsave(&devtree_lock, flags); - - parent = np->parent; - if (!parent) - goto out_unlock; - - if (allnodes == np) - allnodes = np->allnext; - else { - struct device_node *prev; - for (prev = allnodes; - prev->allnext != np; - prev = prev->allnext) - ; - prev->allnext = np->allnext; - } - - if (parent->child == np) - parent->child = np->sibling; - else { - struct device_node *prevsib; - for (prevsib = np->parent->child; - prevsib->sibling != np; - prevsib = prevsib->sibling) - ; - prevsib->sibling = np->sibling; - } - - of_node_set_flag(np, OF_DETACHED); - -out_unlock: - write_unlock_irqrestore(&devtree_lock, flags); -} - #ifdef CONFIG_PPC_PSERIES /* * Fix up the uninitialized fields in a new device node: @@ -1479,9 +807,9 @@ static int of_finish_dynamic_node(struct device_node *node) if (machine_is(powermac)) return -ENODEV; - /* fix up new node's linux_phandle field */ + /* fix up new node's phandle field */ if ((ibm_phandle = of_get_property(node, "ibm,phandle", NULL))) - node->linux_phandle = *ibm_phandle; + node->phandle = *ibm_phandle; out: of_node_put(parent); @@ -1520,120 +848,6 @@ static int __init prom_reconfig_setup(void) __initcall(prom_reconfig_setup); #endif -/* - * Add a property to a node - */ -int prom_add_property(struct device_node* np, struct property* prop) -{ - struct property **next; - unsigned long flags; - - prop->next = NULL; - write_lock_irqsave(&devtree_lock, flags); - next = &np->properties; - while (*next) { - if (strcmp(prop->name, (*next)->name) == 0) { - /* duplicate ! don't insert it */ - write_unlock_irqrestore(&devtree_lock, flags); - return -1; - } - next = &(*next)->next; - } - *next = prop; - write_unlock_irqrestore(&devtree_lock, flags); - -#ifdef CONFIG_PROC_DEVICETREE - /* try to add to proc as well if it was initialized */ - if (np->pde) - proc_device_tree_add_prop(np->pde, prop); -#endif /* CONFIG_PROC_DEVICETREE */ - - return 0; -} - -/* - * Remove a property from a node. Note that we don't actually - * remove it, since we have given out who-knows-how-many pointers - * to the data using get-property. Instead we just move the property - * to the "dead properties" list, so it won't be found any more. - */ -int prom_remove_property(struct device_node *np, struct property *prop) -{ - struct property **next; - unsigned long flags; - int found = 0; - - write_lock_irqsave(&devtree_lock, flags); - next = &np->properties; - while (*next) { - if (*next == prop) { - /* found the node */ - *next = prop->next; - prop->next = np->deadprops; - np->deadprops = prop; - found = 1; - break; - } - next = &(*next)->next; - } - write_unlock_irqrestore(&devtree_lock, flags); - - if (!found) - return -ENODEV; - -#ifdef CONFIG_PROC_DEVICETREE - /* try to remove the proc node as well */ - if (np->pde) - proc_device_tree_remove_prop(np->pde, prop); -#endif /* CONFIG_PROC_DEVICETREE */ - - return 0; -} - -/* - * Update a property in a node. Note that we don't actually - * remove it, since we have given out who-knows-how-many pointers - * to the data using get-property. Instead we just move the property - * to the "dead properties" list, and add the new property to the - * property list - */ -int prom_update_property(struct device_node *np, - struct property *newprop, - struct property *oldprop) -{ - struct property **next; - unsigned long flags; - int found = 0; - - write_lock_irqsave(&devtree_lock, flags); - next = &np->properties; - while (*next) { - if (*next == oldprop) { - /* found the node */ - newprop->next = oldprop->next; - *next = newprop; - oldprop->next = np->deadprops; - np->deadprops = oldprop; - found = 1; - break; - } - next = &(*next)->next; - } - write_unlock_irqrestore(&devtree_lock, flags); - - if (!found) - return -ENODEV; - -#ifdef CONFIG_PROC_DEVICETREE - /* try to add to proc as well if it was initialized */ - if (np->pde) - proc_device_tree_update_prop(np->pde, newprop, oldprop); -#endif /* CONFIG_PROC_DEVICETREE */ - - return 0; -} - - /* Find the device node for a given logical cpu number, also returns the cpu * local thread number (index in ibm,interrupt-server#s) if relevant and * asked for (non NULL) diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index bafac2e41ae..5f306c4946e 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -654,6 +654,9 @@ static void __init early_cmdline_parse(void) #define OV5_CMO 0x00 #endif +/* Option Vector 6: IBM PAPR hints */ +#define OV6_LINUX 0x02 /* Linux is our OS */ + /* * The architecture vector has an array of PVR mask/value pairs, * followed by # option vectors - 1, followed by the option vectors. @@ -665,7 +668,7 @@ static unsigned char ibm_architecture_vec[] = { W(0xffffffff), W(0x0f000003), /* all 2.06-compliant */ W(0xffffffff), W(0x0f000002), /* all 2.05-compliant */ W(0xfffffffe), W(0x0f000001), /* all 2.04-compliant and earlier */ - 5 - 1, /* 5 option vectors */ + 6 - 1, /* 6 option vectors */ /* option vector 1: processor architectures supported */ 3 - 2, /* length */ @@ -697,12 +700,29 @@ static unsigned char ibm_architecture_vec[] = { 0, /* don't halt */ /* option vector 5: PAPR/OF options */ - 5 - 2, /* length */ + 13 - 2, /* length */ 0, /* don't ignore, don't halt */ OV5_LPAR | OV5_SPLPAR | OV5_LARGE_PAGES | OV5_DRCONF_MEMORY | OV5_DONATE_DEDICATE_CPU | OV5_MSI, 0, OV5_CMO, + 0, + 0, + 0, + 0, + /* WARNING: The offset of the "number of cores" field below + * must match by the macro below. Update the definition if + * the structure layout changes. + */ +#define IBM_ARCH_VEC_NRCORES_OFFSET 100 + W(NR_CPUS), /* number of cores supported */ + + /* option vector 6: IBM PAPR hints */ + 4 - 2, /* length */ + 0, + 0, + OV6_LINUX, + }; /* Old method - ELF header with PT_NOTE sections */ @@ -792,13 +812,70 @@ static struct fake_elf { } }; +static int __init prom_count_smt_threads(void) +{ + phandle node; + char type[64]; + unsigned int plen; + + /* Pick up th first CPU node we can find */ + for (node = 0; prom_next_node(&node); ) { + type[0] = 0; + prom_getprop(node, "device_type", type, sizeof(type)); + + if (strcmp(type, RELOC("cpu"))) + continue; + /* + * There is an entry for each smt thread, each entry being + * 4 bytes long. All cpus should have the same number of + * smt threads, so return after finding the first. + */ + plen = prom_getproplen(node, "ibm,ppc-interrupt-server#s"); + if (plen == PROM_ERROR) + break; + plen >>= 2; + prom_debug("Found 0x%x smt threads per core\n", (unsigned long)plen); + + /* Sanity check */ + if (plen < 1 || plen > 64) { + prom_printf("Threads per core 0x%x out of bounds, assuming 1\n", + (unsigned long)plen); + return 1; + } + return plen; + } + prom_debug("No threads found, assuming 1 per core\n"); + + return 1; + +} + + static void __init prom_send_capabilities(void) { ihandle elfloader, root; prom_arg_t ret; + u32 *cores; root = call_prom("open", 1, 1, ADDR("/")); if (root != 0) { + /* We need to tell the FW about the number of cores we support. + * + * To do that, we count the number of threads on the first core + * (we assume this is the same for all cores) and use it to + * divide NR_CPUS. + */ + cores = (u32 *)PTRRELOC(&ibm_architecture_vec[IBM_ARCH_VEC_NRCORES_OFFSET]); + if (*cores != NR_CPUS) { + prom_printf("WARNING ! " + "ibm_architecture_vec structure inconsistent: 0x%x !\n", + *cores); + } else { + *cores = NR_CPUS / prom_count_smt_threads(); + prom_printf("Max number of cores passed to firmware: 0x%x\n", + (unsigned long)*cores); + } + /* try calling the ibm,client-architecture-support method */ prom_printf("Calling ibm,client-architecture-support..."); if (call_prom_ret("call-method", 3, 2, &ret, diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index ef149880c14..ed2cfe17d25 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -46,7 +46,7 @@ /* * Set of msr bits that gdb can change on behalf of a process. */ -#if defined(CONFIG_40x) || defined(CONFIG_BOOKE) +#ifdef CONFIG_PPC_ADV_DEBUG_REGS #define MSR_DEBUGCHANGE 0 #else #define MSR_DEBUGCHANGE (MSR_SE | MSR_BE) @@ -703,7 +703,7 @@ void user_enable_single_step(struct task_struct *task) struct pt_regs *regs = task->thread.regs; if (regs != NULL) { -#if defined(CONFIG_40x) || defined(CONFIG_BOOKE) +#ifdef CONFIG_PPC_ADV_DEBUG_REGS task->thread.dbcr0 &= ~DBCR0_BT; task->thread.dbcr0 |= DBCR0_IDM | DBCR0_IC; regs->msr |= MSR_DE; @@ -720,7 +720,7 @@ void user_enable_block_step(struct task_struct *task) struct pt_regs *regs = task->thread.regs; if (regs != NULL) { -#if defined(CONFIG_40x) || defined(CONFIG_BOOKE) +#ifdef CONFIG_PPC_ADV_DEBUG_REGS task->thread.dbcr0 &= ~DBCR0_IC; task->thread.dbcr0 = DBCR0_IDM | DBCR0_BT; regs->msr |= MSR_DE; @@ -737,17 +737,25 @@ void user_disable_single_step(struct task_struct *task) struct pt_regs *regs = task->thread.regs; if (regs != NULL) { -#if defined(CONFIG_BOOKE) - /* If DAC don't clear DBCRO_IDM or MSR_DE */ - if (task->thread.dabr) - task->thread.dbcr0 &= ~(DBCR0_IC | DBCR0_BT); - else { - task->thread.dbcr0 &= ~(DBCR0_IC | DBCR0_BT | DBCR0_IDM); +#ifdef CONFIG_PPC_ADV_DEBUG_REGS + /* + * The logic to disable single stepping should be as + * simple as turning off the Instruction Complete flag. + * And, after doing so, if all debug flags are off, turn + * off DBCR0(IDM) and MSR(DE) .... Torez + */ + task->thread.dbcr0 &= ~DBCR0_IC; + /* + * Test to see if any of the DBCR_ACTIVE_EVENTS bits are set. + */ + if (!DBCR_ACTIVE_EVENTS(task->thread.dbcr0, + task->thread.dbcr1)) { + /* + * All debug events were off..... + */ + task->thread.dbcr0 &= ~DBCR0_IDM; regs->msr &= ~MSR_DE; } -#elif defined(CONFIG_40x) - task->thread.dbcr0 &= ~(DBCR0_IC | DBCR0_BT | DBCR0_IDM); - regs->msr &= ~MSR_DE; #else regs->msr &= ~(MSR_SE | MSR_BE); #endif @@ -769,8 +777,7 @@ int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, if ((data & ~0x7UL) >= TASK_SIZE) return -EIO; -#ifndef CONFIG_BOOKE - +#ifndef CONFIG_PPC_ADV_DEBUG_REGS /* For processors using DABR (i.e. 970), the bottom 3 bits are flags. * It was assumed, on previous implementations, that 3 bits were * passed together with the data address, fitting the design of the @@ -789,21 +796,22 @@ int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, /* Move contents to the DABR register */ task->thread.dabr = data; - -#endif -#if defined(CONFIG_BOOKE) - +#else /* CONFIG_PPC_ADV_DEBUG_REGS */ /* As described above, it was assumed 3 bits were passed with the data * address, but we will assume only the mode bits will be passed * as to not cause alignment restrictions for DAC-based processors. */ /* DAC's hold the whole address without any mode flags */ - task->thread.dabr = data & ~0x3UL; - - if (task->thread.dabr == 0) { - task->thread.dbcr0 &= ~(DBSR_DAC1R | DBSR_DAC1W | DBCR0_IDM); - task->thread.regs->msr &= ~MSR_DE; + task->thread.dac1 = data & ~0x3UL; + + if (task->thread.dac1 == 0) { + dbcr_dac(task) &= ~(DBCR_DAC1R | DBCR_DAC1W); + if (!DBCR_ACTIVE_EVENTS(task->thread.dbcr0, + task->thread.dbcr1)) { + task->thread.regs->msr &= ~MSR_DE; + task->thread.dbcr0 &= ~DBCR0_IDM; + } return 0; } @@ -814,17 +822,17 @@ int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, /* Set the Internal Debugging flag (IDM bit 1) for the DBCR0 register */ - task->thread.dbcr0 = DBCR0_IDM; + task->thread.dbcr0 |= DBCR0_IDM; /* Check for write and read flags and set DBCR0 accordingly */ + dbcr_dac(task) &= ~(DBCR_DAC1R|DBCR_DAC1W); if (data & 0x1UL) - task->thread.dbcr0 |= DBSR_DAC1R; + dbcr_dac(task) |= DBCR_DAC1R; if (data & 0x2UL) - task->thread.dbcr0 |= DBSR_DAC1W; - + dbcr_dac(task) |= DBCR_DAC1W; task->thread.regs->msr |= MSR_DE; -#endif +#endif /* CONFIG_PPC_ADV_DEBUG_REGS */ return 0; } @@ -839,6 +847,394 @@ void ptrace_disable(struct task_struct *child) user_disable_single_step(child); } +#ifdef CONFIG_PPC_ADV_DEBUG_REGS +static long set_intruction_bp(struct task_struct *child, + struct ppc_hw_breakpoint *bp_info) +{ + int slot; + int slot1_in_use = ((child->thread.dbcr0 & DBCR0_IAC1) != 0); + int slot2_in_use = ((child->thread.dbcr0 & DBCR0_IAC2) != 0); + int slot3_in_use = ((child->thread.dbcr0 & DBCR0_IAC3) != 0); + int slot4_in_use = ((child->thread.dbcr0 & DBCR0_IAC4) != 0); + + if (dbcr_iac_range(child) & DBCR_IAC12MODE) + slot2_in_use = 1; + if (dbcr_iac_range(child) & DBCR_IAC34MODE) + slot4_in_use = 1; + + if (bp_info->addr >= TASK_SIZE) + return -EIO; + + if (bp_info->addr_mode != PPC_BREAKPOINT_MODE_EXACT) { + + /* Make sure range is valid. */ + if (bp_info->addr2 >= TASK_SIZE) + return -EIO; + + /* We need a pair of IAC regsisters */ + if ((!slot1_in_use) && (!slot2_in_use)) { + slot = 1; + child->thread.iac1 = bp_info->addr; + child->thread.iac2 = bp_info->addr2; + child->thread.dbcr0 |= DBCR0_IAC1; + if (bp_info->addr_mode == + PPC_BREAKPOINT_MODE_RANGE_EXCLUSIVE) + dbcr_iac_range(child) |= DBCR_IAC12X; + else + dbcr_iac_range(child) |= DBCR_IAC12I; +#if CONFIG_PPC_ADV_DEBUG_IACS > 2 + } else if ((!slot3_in_use) && (!slot4_in_use)) { + slot = 3; + child->thread.iac3 = bp_info->addr; + child->thread.iac4 = bp_info->addr2; + child->thread.dbcr0 |= DBCR0_IAC3; + if (bp_info->addr_mode == + PPC_BREAKPOINT_MODE_RANGE_EXCLUSIVE) + dbcr_iac_range(child) |= DBCR_IAC34X; + else + dbcr_iac_range(child) |= DBCR_IAC34I; +#endif + } else + return -ENOSPC; + } else { + /* We only need one. If possible leave a pair free in + * case a range is needed later + */ + if (!slot1_in_use) { + /* + * Don't use iac1 if iac1-iac2 are free and either + * iac3 or iac4 (but not both) are free + */ + if (slot2_in_use || (slot3_in_use == slot4_in_use)) { + slot = 1; + child->thread.iac1 = bp_info->addr; + child->thread.dbcr0 |= DBCR0_IAC1; + goto out; + } + } + if (!slot2_in_use) { + slot = 2; + child->thread.iac2 = bp_info->addr; + child->thread.dbcr0 |= DBCR0_IAC2; +#if CONFIG_PPC_ADV_DEBUG_IACS > 2 + } else if (!slot3_in_use) { + slot = 3; + child->thread.iac3 = bp_info->addr; + child->thread.dbcr0 |= DBCR0_IAC3; + } else if (!slot4_in_use) { + slot = 4; + child->thread.iac4 = bp_info->addr; + child->thread.dbcr0 |= DBCR0_IAC4; +#endif + } else + return -ENOSPC; + } +out: + child->thread.dbcr0 |= DBCR0_IDM; + child->thread.regs->msr |= MSR_DE; + + return slot; +} + +static int del_instruction_bp(struct task_struct *child, int slot) +{ + switch (slot) { + case 1: + if ((child->thread.dbcr0 & DBCR0_IAC1) == 0) + return -ENOENT; + + if (dbcr_iac_range(child) & DBCR_IAC12MODE) { + /* address range - clear slots 1 & 2 */ + child->thread.iac2 = 0; + dbcr_iac_range(child) &= ~DBCR_IAC12MODE; + } + child->thread.iac1 = 0; + child->thread.dbcr0 &= ~DBCR0_IAC1; + break; + case 2: + if ((child->thread.dbcr0 & DBCR0_IAC2) == 0) + return -ENOENT; + + if (dbcr_iac_range(child) & DBCR_IAC12MODE) + /* used in a range */ + return -EINVAL; + child->thread.iac2 = 0; + child->thread.dbcr0 &= ~DBCR0_IAC2; + break; +#if CONFIG_PPC_ADV_DEBUG_IACS > 2 + case 3: + if ((child->thread.dbcr0 & DBCR0_IAC3) == 0) + return -ENOENT; + + if (dbcr_iac_range(child) & DBCR_IAC34MODE) { + /* address range - clear slots 3 & 4 */ + child->thread.iac4 = 0; + dbcr_iac_range(child) &= ~DBCR_IAC34MODE; + } + child->thread.iac3 = 0; + child->thread.dbcr0 &= ~DBCR0_IAC3; + break; + case 4: + if ((child->thread.dbcr0 & DBCR0_IAC4) == 0) + return -ENOENT; + + if (dbcr_iac_range(child) & DBCR_IAC34MODE) + /* Used in a range */ + return -EINVAL; + child->thread.iac4 = 0; + child->thread.dbcr0 &= ~DBCR0_IAC4; + break; +#endif + default: + return -EINVAL; + } + return 0; +} + +static int set_dac(struct task_struct *child, struct ppc_hw_breakpoint *bp_info) +{ + int byte_enable = + (bp_info->condition_mode >> PPC_BREAKPOINT_CONDITION_BE_SHIFT) + & 0xf; + int condition_mode = + bp_info->condition_mode & PPC_BREAKPOINT_CONDITION_MODE; + int slot; + + if (byte_enable && (condition_mode == 0)) + return -EINVAL; + + if (bp_info->addr >= TASK_SIZE) + return -EIO; + + if ((dbcr_dac(child) & (DBCR_DAC1R | DBCR_DAC1W)) == 0) { + slot = 1; + if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_READ) + dbcr_dac(child) |= DBCR_DAC1R; + if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE) + dbcr_dac(child) |= DBCR_DAC1W; + child->thread.dac1 = (unsigned long)bp_info->addr; +#if CONFIG_PPC_ADV_DEBUG_DVCS > 0 + if (byte_enable) { + child->thread.dvc1 = + (unsigned long)bp_info->condition_value; + child->thread.dbcr2 |= + ((byte_enable << DBCR2_DVC1BE_SHIFT) | + (condition_mode << DBCR2_DVC1M_SHIFT)); + } +#endif +#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE + } else if (child->thread.dbcr2 & DBCR2_DAC12MODE) { + /* Both dac1 and dac2 are part of a range */ + return -ENOSPC; +#endif + } else if ((dbcr_dac(child) & (DBCR_DAC2R | DBCR_DAC2W)) == 0) { + slot = 2; + if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_READ) + dbcr_dac(child) |= DBCR_DAC2R; + if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE) + dbcr_dac(child) |= DBCR_DAC2W; + child->thread.dac2 = (unsigned long)bp_info->addr; +#if CONFIG_PPC_ADV_DEBUG_DVCS > 0 + if (byte_enable) { + child->thread.dvc2 = + (unsigned long)bp_info->condition_value; + child->thread.dbcr2 |= + ((byte_enable << DBCR2_DVC2BE_SHIFT) | + (condition_mode << DBCR2_DVC2M_SHIFT)); + } +#endif + } else + return -ENOSPC; + child->thread.dbcr0 |= DBCR0_IDM; + child->thread.regs->msr |= MSR_DE; + + return slot + 4; +} + +static int del_dac(struct task_struct *child, int slot) +{ + if (slot == 1) { + if ((dbcr_dac(child) & (DBCR_DAC1R | DBCR_DAC1W)) == 0) + return -ENOENT; + + child->thread.dac1 = 0; + dbcr_dac(child) &= ~(DBCR_DAC1R | DBCR_DAC1W); +#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE + if (child->thread.dbcr2 & DBCR2_DAC12MODE) { + child->thread.dac2 = 0; + child->thread.dbcr2 &= ~DBCR2_DAC12MODE; + } + child->thread.dbcr2 &= ~(DBCR2_DVC1M | DBCR2_DVC1BE); +#endif +#if CONFIG_PPC_ADV_DEBUG_DVCS > 0 + child->thread.dvc1 = 0; +#endif + } else if (slot == 2) { + if ((dbcr_dac(child) & (DBCR_DAC2R | DBCR_DAC2W)) == 0) + return -ENOENT; + +#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE + if (child->thread.dbcr2 & DBCR2_DAC12MODE) + /* Part of a range */ + return -EINVAL; + child->thread.dbcr2 &= ~(DBCR2_DVC2M | DBCR2_DVC2BE); +#endif +#if CONFIG_PPC_ADV_DEBUG_DVCS > 0 + child->thread.dvc2 = 0; +#endif + child->thread.dac2 = 0; + dbcr_dac(child) &= ~(DBCR_DAC2R | DBCR_DAC2W); + } else + return -EINVAL; + + return 0; +} +#endif /* CONFIG_PPC_ADV_DEBUG_REGS */ + +#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE +static int set_dac_range(struct task_struct *child, + struct ppc_hw_breakpoint *bp_info) +{ + int mode = bp_info->addr_mode & PPC_BREAKPOINT_MODE_MASK; + + /* We don't allow range watchpoints to be used with DVC */ + if (bp_info->condition_mode) + return -EINVAL; + + /* + * Best effort to verify the address range. The user/supervisor bits + * prevent trapping in kernel space, but let's fail on an obvious bad + * range. The simple test on the mask is not fool-proof, and any + * exclusive range will spill over into kernel space. + */ + if (bp_info->addr >= TASK_SIZE) + return -EIO; + if (mode == PPC_BREAKPOINT_MODE_MASK) { + /* + * dac2 is a bitmask. Don't allow a mask that makes a + * kernel space address from a valid dac1 value + */ + if (~((unsigned long)bp_info->addr2) >= TASK_SIZE) + return -EIO; + } else { + /* + * For range breakpoints, addr2 must also be a valid address + */ + if (bp_info->addr2 >= TASK_SIZE) + return -EIO; + } + + if (child->thread.dbcr0 & + (DBCR0_DAC1R | DBCR0_DAC1W | DBCR0_DAC2R | DBCR0_DAC2W)) + return -ENOSPC; + + if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_READ) + child->thread.dbcr0 |= (DBCR0_DAC1R | DBCR0_IDM); + if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE) + child->thread.dbcr0 |= (DBCR0_DAC1W | DBCR0_IDM); + child->thread.dac1 = bp_info->addr; + child->thread.dac2 = bp_info->addr2; + if (mode == PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE) + child->thread.dbcr2 |= DBCR2_DAC12M; + else if (mode == PPC_BREAKPOINT_MODE_RANGE_EXCLUSIVE) + child->thread.dbcr2 |= DBCR2_DAC12MX; + else /* PPC_BREAKPOINT_MODE_MASK */ + child->thread.dbcr2 |= DBCR2_DAC12MM; + child->thread.regs->msr |= MSR_DE; + + return 5; +} +#endif /* CONFIG_PPC_ADV_DEBUG_DAC_RANGE */ + +static long ppc_set_hwdebug(struct task_struct *child, + struct ppc_hw_breakpoint *bp_info) +{ + if (bp_info->version != 1) + return -ENOTSUPP; +#ifdef CONFIG_PPC_ADV_DEBUG_REGS + /* + * Check for invalid flags and combinations + */ + if ((bp_info->trigger_type == 0) || + (bp_info->trigger_type & ~(PPC_BREAKPOINT_TRIGGER_EXECUTE | + PPC_BREAKPOINT_TRIGGER_RW)) || + (bp_info->addr_mode & ~PPC_BREAKPOINT_MODE_MASK) || + (bp_info->condition_mode & + ~(PPC_BREAKPOINT_CONDITION_MODE | + PPC_BREAKPOINT_CONDITION_BE_ALL))) + return -EINVAL; +#if CONFIG_PPC_ADV_DEBUG_DVCS == 0 + if (bp_info->condition_mode != PPC_BREAKPOINT_CONDITION_NONE) + return -EINVAL; +#endif + + if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_EXECUTE) { + if ((bp_info->trigger_type != PPC_BREAKPOINT_TRIGGER_EXECUTE) || + (bp_info->condition_mode != PPC_BREAKPOINT_CONDITION_NONE)) + return -EINVAL; + return set_intruction_bp(child, bp_info); + } + if (bp_info->addr_mode == PPC_BREAKPOINT_MODE_EXACT) + return set_dac(child, bp_info); + +#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE + return set_dac_range(child, bp_info); +#else + return -EINVAL; +#endif +#else /* !CONFIG_PPC_ADV_DEBUG_DVCS */ + /* + * We only support one data breakpoint + */ + if (((bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_RW) == 0) || + ((bp_info->trigger_type & ~PPC_BREAKPOINT_TRIGGER_RW) != 0) || + (bp_info->trigger_type != PPC_BREAKPOINT_TRIGGER_WRITE) || + (bp_info->addr_mode != PPC_BREAKPOINT_MODE_EXACT) || + (bp_info->condition_mode != PPC_BREAKPOINT_CONDITION_NONE)) + return -EINVAL; + + if (child->thread.dabr) + return -ENOSPC; + + if ((unsigned long)bp_info->addr >= TASK_SIZE) + return -EIO; + + child->thread.dabr = (unsigned long)bp_info->addr; + + return 1; +#endif /* !CONFIG_PPC_ADV_DEBUG_DVCS */ +} + +static long ppc_del_hwdebug(struct task_struct *child, long addr, long data) +{ +#ifdef CONFIG_PPC_ADV_DEBUG_REGS + int rc; + + if (data <= 4) + rc = del_instruction_bp(child, (int)data); + else + rc = del_dac(child, (int)data - 4); + + if (!rc) { + if (!DBCR_ACTIVE_EVENTS(child->thread.dbcr0, + child->thread.dbcr1)) { + child->thread.dbcr0 &= ~DBCR0_IDM; + child->thread.regs->msr &= ~MSR_DE; + } + } + return rc; +#else + if (data != 1) + return -EINVAL; + if (child->thread.dabr == 0) + return -ENOENT; + + child->thread.dabr = 0; + + return 0; +#endif +} + /* * Here are the old "legacy" powerpc specific getregs/setregs ptrace calls, * we mark them as obsolete now, they will be removed in a future version @@ -932,13 +1328,77 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) break; } + case PPC_PTRACE_GETHWDBGINFO: { + struct ppc_debug_info dbginfo; + + dbginfo.version = 1; +#ifdef CONFIG_PPC_ADV_DEBUG_REGS + dbginfo.num_instruction_bps = CONFIG_PPC_ADV_DEBUG_IACS; + dbginfo.num_data_bps = CONFIG_PPC_ADV_DEBUG_DACS; + dbginfo.num_condition_regs = CONFIG_PPC_ADV_DEBUG_DVCS; + dbginfo.data_bp_alignment = 4; + dbginfo.sizeof_condition = 4; + dbginfo.features = PPC_DEBUG_FEATURE_INSN_BP_RANGE | + PPC_DEBUG_FEATURE_INSN_BP_MASK; +#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE + dbginfo.features |= + PPC_DEBUG_FEATURE_DATA_BP_RANGE | + PPC_DEBUG_FEATURE_DATA_BP_MASK; +#endif +#else /* !CONFIG_PPC_ADV_DEBUG_REGS */ + dbginfo.num_instruction_bps = 0; + dbginfo.num_data_bps = 1; + dbginfo.num_condition_regs = 0; +#ifdef CONFIG_PPC64 + dbginfo.data_bp_alignment = 8; +#else + dbginfo.data_bp_alignment = 4; +#endif + dbginfo.sizeof_condition = 0; + dbginfo.features = 0; +#endif /* CONFIG_PPC_ADV_DEBUG_REGS */ + + if (!access_ok(VERIFY_WRITE, data, + sizeof(struct ppc_debug_info))) + return -EFAULT; + ret = __copy_to_user((struct ppc_debug_info __user *)data, + &dbginfo, sizeof(struct ppc_debug_info)) ? + -EFAULT : 0; + break; + } + + case PPC_PTRACE_SETHWDEBUG: { + struct ppc_hw_breakpoint bp_info; + + if (!access_ok(VERIFY_READ, data, + sizeof(struct ppc_hw_breakpoint))) + return -EFAULT; + ret = __copy_from_user(&bp_info, + (struct ppc_hw_breakpoint __user *)data, + sizeof(struct ppc_hw_breakpoint)) ? + -EFAULT : 0; + if (!ret) + ret = ppc_set_hwdebug(child, &bp_info); + break; + } + + case PPC_PTRACE_DELHWDEBUG: { + ret = ppc_del_hwdebug(child, addr, data); + break; + } + case PTRACE_GET_DEBUGREG: { ret = -EINVAL; /* We only support one DABR and no IABRS at the moment */ if (addr > 0) break; +#ifdef CONFIG_PPC_ADV_DEBUG_REGS + ret = put_user(child->thread.dac1, + (unsigned long __user *)data); +#else ret = put_user(child->thread.dabr, (unsigned long __user *)data); +#endif break; } diff --git a/arch/powerpc/kernel/rtas-proc.c b/arch/powerpc/kernel/rtas-proc.c index 1be9fe38bcb..8777fb02349 100644 --- a/arch/powerpc/kernel/rtas-proc.c +++ b/arch/powerpc/kernel/rtas-proc.c @@ -262,19 +262,19 @@ static int __init proc_rtas_init(void) if (rtas_node == NULL) return -ENODEV; - proc_create("ppc64/rtas/progress", S_IRUGO|S_IWUSR, NULL, + proc_create("powerpc/rtas/progress", S_IRUGO|S_IWUSR, NULL, &ppc_rtas_progress_operations); - proc_create("ppc64/rtas/clock", S_IRUGO|S_IWUSR, NULL, + proc_create("powerpc/rtas/clock", S_IRUGO|S_IWUSR, NULL, &ppc_rtas_clock_operations); - proc_create("ppc64/rtas/poweron", S_IWUSR|S_IRUGO, NULL, + proc_create("powerpc/rtas/poweron", S_IWUSR|S_IRUGO, NULL, &ppc_rtas_poweron_operations); - proc_create("ppc64/rtas/sensors", S_IRUGO, NULL, + proc_create("powerpc/rtas/sensors", S_IRUGO, NULL, &ppc_rtas_sensors_operations); - proc_create("ppc64/rtas/frequency", S_IWUSR|S_IRUGO, NULL, + proc_create("powerpc/rtas/frequency", S_IWUSR|S_IRUGO, NULL, &ppc_rtas_tone_freq_operations); - proc_create("ppc64/rtas/volume", S_IWUSR|S_IRUGO, NULL, + proc_create("powerpc/rtas/volume", S_IWUSR|S_IRUGO, NULL, &ppc_rtas_tone_volume_operations); - proc_create("ppc64/rtas/rmo_buffer", S_IRUSR, NULL, + proc_create("powerpc/rtas/rmo_buffer", S_IRUSR, NULL, &ppc_rtas_rmo_buf_ops); return 0; } diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index fd0d29493fd..74367841615 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -23,6 +23,7 @@ #include <linux/completion.h> #include <linux/cpumask.h> #include <linux/lmb.h> +#include <linux/slab.h> #include <asm/prom.h> #include <asm/rtas.h> diff --git a/arch/powerpc/kernel/rtas_flash.c b/arch/powerpc/kernel/rtas_flash.c index a85117d5c9a..bfc2abafac4 100644 --- a/arch/powerpc/kernel/rtas_flash.c +++ b/arch/powerpc/kernel/rtas_flash.c @@ -15,6 +15,7 @@ #include <linux/module.h> #include <linux/init.h> +#include <linux/slab.h> #include <linux/proc_fs.h> #include <asm/delay.h> #include <asm/uaccess.h> diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c index 2e4832ab210..4190eae7850 100644 --- a/arch/powerpc/kernel/rtasd.c +++ b/arch/powerpc/kernel/rtasd.c @@ -20,6 +20,7 @@ #include <linux/spinlock.h> #include <linux/cpu.h> #include <linux/workqueue.h> +#include <linux/slab.h> #include <asm/uaccess.h> #include <asm/io.h> diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 03dd6a24819..48f0a008b20 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -36,6 +36,7 @@ #include <linux/lmb.h> #include <linux/of_platform.h> #include <asm/io.h> +#include <asm/paca.h> #include <asm/prom.h> #include <asm/processor.h> #include <asm/vdso_datapage.h> @@ -493,6 +494,8 @@ void __init smp_setup_cpu_maps(void) * here will have to be reworked */ cpu_init_thread_core_maps(nthreads); + + free_unused_pacas(); } #endif /* CONFIG_SMP */ diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index b152de3e64d..8f58986c2ad 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -39,7 +39,6 @@ #include <asm/serial.h> #include <asm/udbg.h> #include <asm/mmu_context.h> -#include <asm/swiotlb.h> #include "setup.h" @@ -343,11 +342,6 @@ void __init setup_arch(char **cmdline_p) ppc_md.setup_arch(); if ( ppc_md.progress ) ppc_md.progress("arch: exit", 0x3eab); -#ifdef CONFIG_SWIOTLB - if (ppc_swiotlb_enable) - swiotlb_init(1); -#endif - paging_init(); /* Initialize the MMU context management stuff */ diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 6568406b2a3..914389158a9 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -61,7 +61,6 @@ #include <asm/xmon.h> #include <asm/udbg.h> #include <asm/kexec.h> -#include <asm/swiotlb.h> #include <asm/mmu_context.h> #include "setup.h" @@ -144,9 +143,9 @@ early_param("smt-enabled", early_smt_enabled); #endif /* CONFIG_SMP */ /* Put the paca pointer into r13 and SPRG_PACA */ -void __init setup_paca(int cpu) +static void __init setup_paca(struct paca_struct *new_paca) { - local_paca = &paca[cpu]; + local_paca = new_paca; mtspr(SPRN_SPRG_PACA, local_paca); #ifdef CONFIG_PPC_BOOK3E mtspr(SPRN_SPRG_TLB_EXFRAME, local_paca->extlb); @@ -176,14 +175,12 @@ void __init early_setup(unsigned long dt_ptr) { /* -------- printk is _NOT_ safe to use here ! ------- */ - /* Fill in any unititialised pacas */ - initialise_pacas(); - /* Identify CPU type */ identify_cpu(0, mfspr(SPRN_PVR)); /* Assume we're on cpu 0 for now. Don't write to the paca yet! */ - setup_paca(0); + initialise_paca(&boot_paca, 0); + setup_paca(&boot_paca); /* Initialize lockdep early or else spinlocks will blow */ lockdep_init(); @@ -203,7 +200,7 @@ void __init early_setup(unsigned long dt_ptr) early_init_devtree(__va(dt_ptr)); /* Now we know the logical id of our boot cpu, setup the paca. */ - setup_paca(boot_cpuid); + setup_paca(&paca[boot_cpuid]); /* Fix up paca fields required for the boot cpu */ get_paca()->cpu_start = 1; @@ -543,11 +540,6 @@ void __init setup_arch(char **cmdline_p) if (ppc_md.setup_arch) ppc_md.setup_arch(); -#ifdef CONFIG_SWIOTLB - if (ppc_swiotlb_enable) - swiotlb_init(1); -#endif - paging_init(); /* Initialize the MMU context management stuff */ diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c index 00b5078da9a..a0afb555a7c 100644 --- a/arch/powerpc/kernel/signal.c +++ b/arch/powerpc/kernel/signal.c @@ -140,17 +140,15 @@ static int do_signal_pending(sigset_t *oldset, struct pt_regs *regs) return 0; /* no signals delivered */ } +#ifndef CONFIG_PPC_ADV_DEBUG_REGS /* * Reenable the DABR before delivering the signal to * user space. The DABR will have been cleared if it * triggered inside the kernel. */ - if (current->thread.dabr) { + if (current->thread.dabr) set_dabr(current->thread.dabr); -#if defined(CONFIG_BOOKE) - mtspr(SPRN_DBCR0, current->thread.dbcr0); #endif - } if (is32) { if (ka.sa.sa_flags & SA_SIGINFO) diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index d670429a160..266610119f6 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -1078,7 +1078,7 @@ int sys_debug_setcontext(struct ucontext __user *ctx, int i; unsigned char tmp; unsigned long new_msr = regs->msr; -#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE) +#ifdef CONFIG_PPC_ADV_DEBUG_REGS unsigned long new_dbcr0 = current->thread.dbcr0; #endif @@ -1087,13 +1087,17 @@ int sys_debug_setcontext(struct ucontext __user *ctx, return -EFAULT; switch (op.dbg_type) { case SIG_DBG_SINGLE_STEPPING: -#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE) +#ifdef CONFIG_PPC_ADV_DEBUG_REGS if (op.dbg_value) { new_msr |= MSR_DE; new_dbcr0 |= (DBCR0_IDM | DBCR0_IC); } else { - new_msr &= ~MSR_DE; - new_dbcr0 &= ~(DBCR0_IDM | DBCR0_IC); + new_dbcr0 &= ~DBCR0_IC; + if (!DBCR_ACTIVE_EVENTS(new_dbcr0, + current->thread.dbcr1)) { + new_msr &= ~MSR_DE; + new_dbcr0 &= ~DBCR0_IDM; + } } #else if (op.dbg_value) @@ -1103,7 +1107,7 @@ int sys_debug_setcontext(struct ucontext __user *ctx, #endif break; case SIG_DBG_BRANCH_TRACING: -#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE) +#ifdef CONFIG_PPC_ADV_DEBUG_REGS return -EINVAL; #else if (op.dbg_value) @@ -1124,7 +1128,7 @@ int sys_debug_setcontext(struct ucontext __user *ctx, failure is a problem, anyway, and it's very unlikely unless the user is really doing something wrong. */ regs->msr = new_msr; -#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE) +#ifdef CONFIG_PPC_ADV_DEBUG_REGS current->thread.dbcr0 = new_dbcr0; #endif diff --git a/arch/powerpc/kernel/smp-tbsync.c b/arch/powerpc/kernel/smp-tbsync.c index a5e54526403..03e45c4a9ef 100644 --- a/arch/powerpc/kernel/smp-tbsync.c +++ b/arch/powerpc/kernel/smp-tbsync.c @@ -10,6 +10,7 @@ #include <linux/smp.h> #include <linux/unistd.h> #include <linux/init.h> +#include <linux/slab.h> #include <asm/atomic.h> #include <asm/smp.h> #include <asm/time.h> diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index a521fb8a40e..c2ee1449807 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -619,4 +619,16 @@ void __cpu_die(unsigned int cpu) if (smp_ops->cpu_die) smp_ops->cpu_die(cpu); } + +static DEFINE_MUTEX(powerpc_cpu_hotplug_driver_mutex); + +void cpu_hotplug_driver_lock() +{ + mutex_lock(&powerpc_cpu_hotplug_driver_mutex); +} + +void cpu_hotplug_driver_unlock() +{ + mutex_unlock(&powerpc_cpu_hotplug_driver_mutex); +} #endif diff --git a/arch/powerpc/kernel/softemu8xx.c b/arch/powerpc/kernel/softemu8xx.c index 23c8c5e7dc4..af0e8290b4f 100644 --- a/arch/powerpc/kernel/softemu8xx.c +++ b/arch/powerpc/kernel/softemu8xx.c @@ -21,7 +21,6 @@ #include <linux/stddef.h> #include <linux/unistd.h> #include <linux/ptrace.h> -#include <linux/slab.h> #include <linux/user.h> #include <linux/interrupt.h> diff --git a/arch/powerpc/kernel/swsusp_32.S b/arch/powerpc/kernel/swsusp_32.S index b47d8ceffb5..b0754e23743 100644 --- a/arch/powerpc/kernel/swsusp_32.S +++ b/arch/powerpc/kernel/swsusp_32.S @@ -303,7 +303,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS) lis r4,0x1000 1: addic. r4,r4,-0x1000 tlbie r4 - blt 1b + bgt 1b sync /* restore the MSR and turn on the MMU */ diff --git a/arch/powerpc/kernel/sys_ppc32.c b/arch/powerpc/kernel/sys_ppc32.c index c5a4732bcc4..19471a1cef1 100644 --- a/arch/powerpc/kernel/sys_ppc32.c +++ b/arch/powerpc/kernel/sys_ppc32.c @@ -41,6 +41,7 @@ #include <linux/ptrace.h> #include <linux/elf.h> #include <linux/ipc.h> +#include <linux/slab.h> #include <asm/ptrace.h> #include <asm/types.h> diff --git a/arch/powerpc/kernel/syscalls.c b/arch/powerpc/kernel/syscalls.c index 3370e62e43d..f2496f2faec 100644 --- a/arch/powerpc/kernel/syscalls.c +++ b/arch/powerpc/kernel/syscalls.c @@ -42,100 +42,6 @@ #include <asm/time.h> #include <asm/unistd.h> -/* - * sys_ipc() is the de-multiplexer for the SysV IPC calls.. - * - * This is really horribly ugly. - */ -int sys_ipc(uint call, int first, unsigned long second, long third, - void __user *ptr, long fifth) -{ - int version, ret; - - version = call >> 16; /* hack for backward compatibility */ - call &= 0xffff; - - ret = -ENOSYS; - switch (call) { - case SEMOP: - ret = sys_semtimedop(first, (struct sembuf __user *)ptr, - (unsigned)second, NULL); - break; - case SEMTIMEDOP: - ret = sys_semtimedop(first, (struct sembuf __user *)ptr, - (unsigned)second, - (const struct timespec __user *) fifth); - break; - case SEMGET: - ret = sys_semget (first, (int)second, third); - break; - case SEMCTL: { - union semun fourth; - - ret = -EINVAL; - if (!ptr) - break; - if ((ret = get_user(fourth.__pad, (void __user * __user *)ptr))) - break; - ret = sys_semctl(first, (int)second, third, fourth); - break; - } - case MSGSND: - ret = sys_msgsnd(first, (struct msgbuf __user *)ptr, - (size_t)second, third); - break; - case MSGRCV: - switch (version) { - case 0: { - struct ipc_kludge tmp; - - ret = -EINVAL; - if (!ptr) - break; - if ((ret = copy_from_user(&tmp, - (struct ipc_kludge __user *) ptr, - sizeof (tmp)) ? -EFAULT : 0)) - break; - ret = sys_msgrcv(first, tmp.msgp, (size_t) second, - tmp.msgtyp, third); - break; - } - default: - ret = sys_msgrcv (first, (struct msgbuf __user *) ptr, - (size_t)second, fifth, third); - break; - } - break; - case MSGGET: - ret = sys_msgget((key_t)first, (int)second); - break; - case MSGCTL: - ret = sys_msgctl(first, (int)second, - (struct msqid_ds __user *)ptr); - break; - case SHMAT: { - ulong raddr; - ret = do_shmat(first, (char __user *)ptr, (int)second, &raddr); - if (ret) - break; - ret = put_user(raddr, (ulong __user *) third); - break; - } - case SHMDT: - ret = sys_shmdt((char __user *)ptr); - break; - case SHMGET: - ret = sys_shmget(first, (size_t)second, third); - break; - case SHMCTL: - ret = sys_shmctl(first, (int)second, - (struct shmid_ds __user *)ptr); - break; - } - - return ret; -} - static inline unsigned long do_mmap2(unsigned long addr, size_t len, unsigned long prot, unsigned long flags, unsigned long fd, unsigned long off, int shift) @@ -210,76 +116,6 @@ long ppc64_personality(unsigned long personality) } #endif -#ifdef CONFIG_PPC64 -#define OVERRIDE_MACHINE (personality(current->personality) == PER_LINUX32) -#else -#define OVERRIDE_MACHINE 0 -#endif - -static inline int override_machine(char __user *mach) -{ - if (OVERRIDE_MACHINE) { - /* change ppc64 to ppc */ - if (__put_user(0, mach+3) || __put_user(0, mach+4)) - return -EFAULT; - } - return 0; -} - -long ppc_newuname(struct new_utsname __user * name) -{ - int err = 0; - - down_read(&uts_sem); - if (copy_to_user(name, utsname(), sizeof(*name))) - err = -EFAULT; - up_read(&uts_sem); - if (!err) - err = override_machine(name->machine); - return err; -} - -int sys_uname(struct old_utsname __user *name) -{ - int err = 0; - - down_read(&uts_sem); - if (copy_to_user(name, utsname(), sizeof(*name))) - err = -EFAULT; - up_read(&uts_sem); - if (!err) - err = override_machine(name->machine); - return err; -} - -int sys_olduname(struct oldold_utsname __user *name) -{ - int error; - - if (!access_ok(VERIFY_WRITE, name, sizeof(struct oldold_utsname))) - return -EFAULT; - - down_read(&uts_sem); - error = __copy_to_user(&name->sysname, &utsname()->sysname, - __OLD_UTS_LEN); - error |= __put_user(0, name->sysname + __OLD_UTS_LEN); - error |= __copy_to_user(&name->nodename, &utsname()->nodename, - __OLD_UTS_LEN); - error |= __put_user(0, name->nodename + __OLD_UTS_LEN); - error |= __copy_to_user(&name->release, &utsname()->release, - __OLD_UTS_LEN); - error |= __put_user(0, name->release + __OLD_UTS_LEN); - error |= __copy_to_user(&name->version, &utsname()->version, - __OLD_UTS_LEN); - error |= __put_user(0, name->version + __OLD_UTS_LEN); - error |= __copy_to_user(&name->machine, &utsname()->machine, - __OLD_UTS_LEN); - error |= override_machine(name->machine); - up_read(&uts_sem); - - return error? -EFAULT: 0; -} - long ppc_fadvise64_64(int fd, int advice, u32 offset_high, u32 offset_low, u32 len_high, u32 len_low) { diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 9ba2cc88591..1b16b9a3e49 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -265,8 +265,8 @@ void account_system_vtime(struct task_struct *tsk) account_system_time(tsk, 0, delta, deltascaled); else account_idle_time(delta); - per_cpu(cputime_last_delta, smp_processor_id()) = delta; - per_cpu(cputime_scaled_last_delta, smp_processor_id()) = deltascaled; + __get_cpu_var(cputime_last_delta) = delta; + __get_cpu_var(cputime_scaled_last_delta) = deltascaled; local_irq_restore(flags); } EXPORT_SYMBOL_GPL(account_system_vtime); @@ -575,6 +575,8 @@ void timer_interrupt(struct pt_regs * regs) trace_timer_interrupt_entry(regs); + __get_cpu_var(irq_stat).timer_irqs++; + /* Ensure a positive value is written to the decrementer, or else * some CPUs will continuue to take decrementer exceptions */ set_dec(DECREMENTER_MAX); @@ -903,12 +905,21 @@ static void decrementer_set_mode(enum clock_event_mode mode, decrementer_set_next_event(DECREMENTER_MAX, dev); } +static inline uint64_t div_sc64(unsigned long ticks, unsigned long nsec, + int shift) +{ + uint64_t tmp = ((uint64_t)ticks) << shift; + + do_div(tmp, nsec); + return tmp; +} + static void __init setup_clockevent_multiplier(unsigned long hz) { u64 mult, shift = 32; while (1) { - mult = div_sc(hz, NSEC_PER_SEC, shift); + mult = div_sc64(hz, NSEC_PER_SEC, shift); if (mult && (mult >> 32UL) == 0UL) break; @@ -926,8 +937,8 @@ static void register_decrementer_clockevent(int cpu) *dec = decrementer_clockevent; dec->cpumask = cpumask_of(cpu); - printk(KERN_DEBUG "clockevent: %s mult[%x] shift[%d] cpu[%d]\n", - dec->name, dec->mult, dec->shift, cpu); + printk_once(KERN_DEBUG "clockevent: %s mult[%x] shift[%d] cpu[%d]\n", + dec->name, dec->mult, dec->shift, cpu); clockevents_register_device(dec); } diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index d069ff8a7e0..29d128eb6c4 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -21,7 +21,6 @@ #include <linux/stddef.h> #include <linux/unistd.h> #include <linux/ptrace.h> -#include <linux/slab.h> #include <linux/user.h> #include <linux/interrupt.h> #include <linux/init.h> @@ -60,13 +59,13 @@ #endif #if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC) -int (*__debugger)(struct pt_regs *regs); -int (*__debugger_ipi)(struct pt_regs *regs); -int (*__debugger_bpt)(struct pt_regs *regs); -int (*__debugger_sstep)(struct pt_regs *regs); -int (*__debugger_iabr_match)(struct pt_regs *regs); -int (*__debugger_dabr_match)(struct pt_regs *regs); -int (*__debugger_fault_handler)(struct pt_regs *regs); +int (*__debugger)(struct pt_regs *regs) __read_mostly; +int (*__debugger_ipi)(struct pt_regs *regs) __read_mostly; +int (*__debugger_bpt)(struct pt_regs *regs) __read_mostly; +int (*__debugger_sstep)(struct pt_regs *regs) __read_mostly; +int (*__debugger_iabr_match)(struct pt_regs *regs) __read_mostly; +int (*__debugger_dabr_match)(struct pt_regs *regs) __read_mostly; +int (*__debugger_fault_handler)(struct pt_regs *regs) __read_mostly; EXPORT_SYMBOL(__debugger); EXPORT_SYMBOL(__debugger_ipi); @@ -102,11 +101,11 @@ static inline void pmac_backlight_unblank(void) { } int die(const char *str, struct pt_regs *regs, long err) { static struct { - spinlock_t lock; + raw_spinlock_t lock; u32 lock_owner; int lock_owner_depth; } die = { - .lock = __SPIN_LOCK_UNLOCKED(die.lock), + .lock = __RAW_SPIN_LOCK_UNLOCKED(die.lock), .lock_owner = -1, .lock_owner_depth = 0 }; @@ -120,7 +119,7 @@ int die(const char *str, struct pt_regs *regs, long err) if (die.lock_owner != raw_smp_processor_id()) { console_verbose(); - spin_lock_irqsave(&die.lock, flags); + raw_spin_lock_irqsave(&die.lock, flags); die.lock_owner = smp_processor_id(); die.lock_owner_depth = 0; bust_spinlocks(1); @@ -146,6 +145,11 @@ int die(const char *str, struct pt_regs *regs, long err) #endif printk("%s\n", ppc_md.name ? ppc_md.name : ""); + sysfs_printk_last_file(); + if (notify_die(DIE_OOPS, str, regs, err, 255, + SIGSEGV) == NOTIFY_STOP) + return 1; + print_modules(); show_regs(regs); } else { @@ -155,7 +159,7 @@ int die(const char *str, struct pt_regs *regs, long err) bust_spinlocks(0); die.lock_owner = -1; add_taint(TAINT_DIE); - spin_unlock_irqrestore(&die.lock, flags); + raw_spin_unlock_irqrestore(&die.lock, flags); if (kexec_should_crash(current) || kexec_sr_activated(smp_processor_id())) @@ -294,7 +298,7 @@ static inline int check_io_access(struct pt_regs *regs) return 0; } -#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE) +#ifdef CONFIG_PPC_ADV_DEBUG_REGS /* On 4xx, the reason for the machine check or program exception is in the ESR. */ #define get_reason(regs) ((regs)->dsisr) @@ -478,6 +482,8 @@ void machine_check_exception(struct pt_regs *regs) { int recover = 0; + __get_cpu_var(irq_stat).mce_exceptions++; + /* See if any machine dependent calls. In theory, we would want * to call the CPU first, and call the ppc_md. one if the CPU * one returns a positive number. However there is existing code @@ -960,6 +966,8 @@ void vsx_unavailable_exception(struct pt_regs *regs) void performance_monitor_exception(struct pt_regs *regs) { + __get_cpu_var(irq_stat).pmu_irqs++; + perf_irq(regs); } @@ -1024,10 +1032,69 @@ void SoftwareEmulation(struct pt_regs *regs) } #endif /* CONFIG_8xx */ -#if defined(CONFIG_40x) || defined(CONFIG_BOOKE) +#ifdef CONFIG_PPC_ADV_DEBUG_REGS +static void handle_debug(struct pt_regs *regs, unsigned long debug_status) +{ + int changed = 0; + /* + * Determine the cause of the debug event, clear the + * event flags and send a trap to the handler. Torez + */ + if (debug_status & (DBSR_DAC1R | DBSR_DAC1W)) { + dbcr_dac(current) &= ~(DBCR_DAC1R | DBCR_DAC1W); +#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE + current->thread.dbcr2 &= ~DBCR2_DAC12MODE; +#endif + do_send_trap(regs, mfspr(SPRN_DAC1), debug_status, TRAP_HWBKPT, + 5); + changed |= 0x01; + } else if (debug_status & (DBSR_DAC2R | DBSR_DAC2W)) { + dbcr_dac(current) &= ~(DBCR_DAC2R | DBCR_DAC2W); + do_send_trap(regs, mfspr(SPRN_DAC2), debug_status, TRAP_HWBKPT, + 6); + changed |= 0x01; + } else if (debug_status & DBSR_IAC1) { + current->thread.dbcr0 &= ~DBCR0_IAC1; + dbcr_iac_range(current) &= ~DBCR_IAC12MODE; + do_send_trap(regs, mfspr(SPRN_IAC1), debug_status, TRAP_HWBKPT, + 1); + changed |= 0x01; + } else if (debug_status & DBSR_IAC2) { + current->thread.dbcr0 &= ~DBCR0_IAC2; + do_send_trap(regs, mfspr(SPRN_IAC2), debug_status, TRAP_HWBKPT, + 2); + changed |= 0x01; + } else if (debug_status & DBSR_IAC3) { + current->thread.dbcr0 &= ~DBCR0_IAC3; + dbcr_iac_range(current) &= ~DBCR_IAC34MODE; + do_send_trap(regs, mfspr(SPRN_IAC3), debug_status, TRAP_HWBKPT, + 3); + changed |= 0x01; + } else if (debug_status & DBSR_IAC4) { + current->thread.dbcr0 &= ~DBCR0_IAC4; + do_send_trap(regs, mfspr(SPRN_IAC4), debug_status, TRAP_HWBKPT, + 4); + changed |= 0x01; + } + /* + * At the point this routine was called, the MSR(DE) was turned off. + * Check all other debug flags and see if that bit needs to be turned + * back on or not. + */ + if (DBCR_ACTIVE_EVENTS(current->thread.dbcr0, current->thread.dbcr1)) + regs->msr |= MSR_DE; + else + /* Make sure the IDM flag is off */ + current->thread.dbcr0 &= ~DBCR0_IDM; + + if (changed & 0x01) + mtspr(SPRN_DBCR0, current->thread.dbcr0); +} void __kprobes DebugException(struct pt_regs *regs, unsigned long debug_status) { + current->thread.dbsr = debug_status; + /* Hack alert: On BookE, Branch Taken stops on the branch itself, while * on server, it stops on the target of the branch. In order to simulate * the server behaviour, we thus restart right away with a single step @@ -1071,29 +1138,23 @@ void __kprobes DebugException(struct pt_regs *regs, unsigned long debug_status) if (debugger_sstep(regs)) return; - if (user_mode(regs)) - current->thread.dbcr0 &= ~(DBCR0_IC); - - _exception(SIGTRAP, regs, TRAP_TRACE, regs->nip); - } else if (debug_status & (DBSR_DAC1R | DBSR_DAC1W)) { - regs->msr &= ~MSR_DE; - if (user_mode(regs)) { - current->thread.dbcr0 &= ~(DBSR_DAC1R | DBSR_DAC1W | - DBCR0_IDM); - } else { - /* Disable DAC interupts */ - mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) & ~(DBSR_DAC1R | - DBSR_DAC1W | DBCR0_IDM)); - - /* Clear the DAC event */ - mtspr(SPRN_DBSR, (DBSR_DAC1R | DBSR_DAC1W)); + current->thread.dbcr0 &= ~DBCR0_IC; +#ifdef CONFIG_PPC_ADV_DEBUG_REGS + if (DBCR_ACTIVE_EVENTS(current->thread.dbcr0, + current->thread.dbcr1)) + regs->msr |= MSR_DE; + else + /* Make sure the IDM bit is off */ + current->thread.dbcr0 &= ~DBCR0_IDM; +#endif } - /* Setup and send the trap to the handler */ - do_dabr(regs, mfspr(SPRN_DAC1), debug_status); - } + + _exception(SIGTRAP, regs, TRAP_TRACE, regs->nip); + } else + handle_debug(regs, debug_status); } -#endif /* CONFIG_4xx || CONFIG_BOOKE */ +#endif /* CONFIG_PPC_ADV_DEBUG_REGS */ #if !defined(CONFIG_TAU_INT) void TAUException(struct pt_regs *regs) diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c index 77f64218abf..82237176a2a 100644 --- a/arch/powerpc/kernel/vio.c +++ b/arch/powerpc/kernel/vio.c @@ -17,6 +17,7 @@ #include <linux/types.h> #include <linux/device.h> #include <linux/init.h> +#include <linux/slab.h> #include <linux/console.h> #include <linux/module.h> #include <linux/mm.h> |