summaryrefslogtreecommitdiffstats
path: root/arch/powerpc/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/kernel')
-rw-r--r--arch/powerpc/kernel/Makefile6
-rw-r--r--arch/powerpc/kernel/asm-offsets.c2
-rw-r--r--arch/powerpc/kernel/cputable.c118
-rw-r--r--arch/powerpc/kernel/crash_dump.c4
-rw-r--r--arch/powerpc/kernel/dbell.c78
-rw-r--r--arch/powerpc/kernel/dma-swiotlb.c8
-rw-r--r--arch/powerpc/kernel/exceptions-64e.S50
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S1
-rw-r--r--arch/powerpc/kernel/hw_breakpoint.c364
-rw-r--r--arch/powerpc/kernel/ibmebus.c8
-rw-r--r--arch/powerpc/kernel/idle_book3e.S86
-rw-r--r--arch/powerpc/kernel/irq.c30
-rw-r--r--arch/powerpc/kernel/kgdb.c2
-rw-r--r--arch/powerpc/kernel/legacy_serial.c1
-rw-r--r--arch/powerpc/kernel/machine_kexec.c22
-rw-r--r--arch/powerpc/kernel/machine_kexec_64.c78
-rw-r--r--arch/powerpc/kernel/misc.S26
-rw-r--r--arch/powerpc/kernel/of_device.c133
-rw-r--r--arch/powerpc/kernel/of_platform.c203
-rw-r--r--arch/powerpc/kernel/paca.c10
-rw-r--r--arch/powerpc/kernel/pci-common.c1
-rw-r--r--arch/powerpc/kernel/perf_event.c41
-rw-r--r--arch/powerpc/kernel/perf_event_fsl_emb.c35
-rw-r--r--arch/powerpc/kernel/ppc_ksyms.c4
-rw-r--r--arch/powerpc/kernel/process.c47
-rw-r--r--arch/powerpc/kernel/prom.c2
-rw-r--r--arch/powerpc/kernel/prom_init.c44
-rw-r--r--arch/powerpc/kernel/prom_parse.c924
-rw-r--r--arch/powerpc/kernel/ptrace.c64
-rw-r--r--arch/powerpc/kernel/rtas.c105
-rw-r--r--arch/powerpc/kernel/setup-common.c20
-rw-r--r--arch/powerpc/kernel/setup_64.c19
-rw-r--r--arch/powerpc/kernel/signal.c3
-rw-r--r--arch/powerpc/kernel/smp.c10
-rw-r--r--arch/powerpc/kernel/suspend.c2
-rw-r--r--arch/powerpc/kernel/time.c201
-rw-r--r--arch/powerpc/kernel/traps.c29
-rw-r--r--arch/powerpc/kernel/vdso32/gettimeofday.S184
-rw-r--r--arch/powerpc/kernel/vdso64/gettimeofday.S88
39 files changed, 1189 insertions, 1864 deletions
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 58d0572de6f..1dda7012914 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -34,13 +34,14 @@ obj-y += vdso32/
obj-$(CONFIG_PPC64) += setup_64.o sys_ppc32.o \
signal_64.o ptrace32.o \
paca.o nvram_64.o firmware.o
+obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_ppc970.o cpu_setup_pa6t.o
obj64-$(CONFIG_RELOCATABLE) += reloc_64.o
-obj-$(CONFIG_PPC_BOOK3E_64) += exceptions-64e.o
+obj-$(CONFIG_PPC_BOOK3E_64) += exceptions-64e.o idle_book3e.o
obj-$(CONFIG_PPC64) += vdso64/
obj-$(CONFIG_ALTIVEC) += vecemu.o
obj-$(CONFIG_PPC_970_NAP) += idle_power4.o
-obj-$(CONFIG_PPC_OF) += of_device.o of_platform.o prom_parse.o
+obj-$(CONFIG_PPC_OF) += of_platform.o prom_parse.o
obj-$(CONFIG_PPC_CLOCK) += clock.o
procfs-y := proc_powerpc.o
obj-$(CONFIG_PROC_FS) += $(procfs-y)
@@ -67,6 +68,7 @@ obj64-$(CONFIG_HIBERNATION) += swsusp_asm64.o
obj-$(CONFIG_MODULES) += module.o module_$(CONFIG_WORD_SIZE).o
obj-$(CONFIG_44x) += cpu_setup_44x.o
obj-$(CONFIG_FSL_BOOKE) += cpu_setup_fsl_booke.o dbell.o
+obj-$(CONFIG_PPC_BOOK3E_64) += dbell.o
extra-y := head_$(CONFIG_WORD_SIZE).o
extra-$(CONFIG_PPC_BOOK3E_32) := head_new_booke.o
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 496cc5b3984..1c0607ddccc 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -194,7 +194,6 @@ int main(void)
DEFINE(PACA_STARTSPURR, offsetof(struct paca_struct, startspurr));
DEFINE(PACA_USER_TIME, offsetof(struct paca_struct, user_time));
DEFINE(PACA_SYSTEM_TIME, offsetof(struct paca_struct, system_time));
- DEFINE(PACA_DATA_OFFSET, offsetof(struct paca_struct, data_offset));
DEFINE(PACA_TRAP_SAVE, offsetof(struct paca_struct, trap_save));
#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
DEFINE(PACA_KVM_SVCPU, offsetof(struct paca_struct, shadow_vcpu));
@@ -342,6 +341,7 @@ int main(void)
DEFINE(WTOM_CLOCK_SEC, offsetof(struct vdso_data, wtom_clock_sec));
DEFINE(WTOM_CLOCK_NSEC, offsetof(struct vdso_data, wtom_clock_nsec));
DEFINE(STAMP_XTIME, offsetof(struct vdso_data, stamp_xtime));
+ DEFINE(STAMP_SEC_FRAC, offsetof(struct vdso_data, stamp_sec_fraction));
DEFINE(CFG_ICACHE_BLOCKSZ, offsetof(struct vdso_data, icache_block_size));
DEFINE(CFG_DCACHE_BLOCKSZ, offsetof(struct vdso_data, dcache_block_size));
DEFINE(CFG_ICACHE_LOGBLOCKSZ, offsetof(struct vdso_data, icache_log_block_size));
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index 87aa0f3c604..65e2b4e10f9 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -1364,10 +1364,10 @@ static struct cpu_spec __initdata cpu_specs[] = {
.machine_check = machine_check_4xx,
.platform = "ppc405",
},
- { /* 405EX */
- .pvr_mask = 0xffff0004,
- .pvr_value = 0x12910004,
- .cpu_name = "405EX",
+ { /* 405EX Rev. A/B with Security */
+ .pvr_mask = 0xffff000f,
+ .pvr_value = 0x12910007,
+ .cpu_name = "405EX Rev. A/B",
.cpu_features = CPU_FTRS_40X,
.cpu_user_features = PPC_FEATURE_32 |
PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
@@ -1377,10 +1377,114 @@ static struct cpu_spec __initdata cpu_specs[] = {
.machine_check = machine_check_4xx,
.platform = "ppc405",
},
- { /* 405EXr */
- .pvr_mask = 0xffff0004,
+ { /* 405EX Rev. C without Security */
+ .pvr_mask = 0xffff000f,
+ .pvr_value = 0x1291000d,
+ .cpu_name = "405EX Rev. C",
+ .cpu_features = CPU_FTRS_40X,
+ .cpu_user_features = PPC_FEATURE_32 |
+ PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
+ .mmu_features = MMU_FTR_TYPE_40x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .machine_check = machine_check_4xx,
+ .platform = "ppc405",
+ },
+ { /* 405EX Rev. C with Security */
+ .pvr_mask = 0xffff000f,
+ .pvr_value = 0x1291000f,
+ .cpu_name = "405EX Rev. C",
+ .cpu_features = CPU_FTRS_40X,
+ .cpu_user_features = PPC_FEATURE_32 |
+ PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
+ .mmu_features = MMU_FTR_TYPE_40x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .machine_check = machine_check_4xx,
+ .platform = "ppc405",
+ },
+ { /* 405EX Rev. D without Security */
+ .pvr_mask = 0xffff000f,
+ .pvr_value = 0x12910003,
+ .cpu_name = "405EX Rev. D",
+ .cpu_features = CPU_FTRS_40X,
+ .cpu_user_features = PPC_FEATURE_32 |
+ PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
+ .mmu_features = MMU_FTR_TYPE_40x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .machine_check = machine_check_4xx,
+ .platform = "ppc405",
+ },
+ { /* 405EX Rev. D with Security */
+ .pvr_mask = 0xffff000f,
+ .pvr_value = 0x12910005,
+ .cpu_name = "405EX Rev. D",
+ .cpu_features = CPU_FTRS_40X,
+ .cpu_user_features = PPC_FEATURE_32 |
+ PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
+ .mmu_features = MMU_FTR_TYPE_40x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .machine_check = machine_check_4xx,
+ .platform = "ppc405",
+ },
+ { /* 405EXr Rev. A/B without Security */
+ .pvr_mask = 0xffff000f,
+ .pvr_value = 0x12910001,
+ .cpu_name = "405EXr Rev. A/B",
+ .cpu_features = CPU_FTRS_40X,
+ .cpu_user_features = PPC_FEATURE_32 |
+ PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
+ .mmu_features = MMU_FTR_TYPE_40x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .machine_check = machine_check_4xx,
+ .platform = "ppc405",
+ },
+ { /* 405EXr Rev. C without Security */
+ .pvr_mask = 0xffff000f,
+ .pvr_value = 0x12910009,
+ .cpu_name = "405EXr Rev. C",
+ .cpu_features = CPU_FTRS_40X,
+ .cpu_user_features = PPC_FEATURE_32 |
+ PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
+ .mmu_features = MMU_FTR_TYPE_40x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .machine_check = machine_check_4xx,
+ .platform = "ppc405",
+ },
+ { /* 405EXr Rev. C with Security */
+ .pvr_mask = 0xffff000f,
+ .pvr_value = 0x1291000b,
+ .cpu_name = "405EXr Rev. C",
+ .cpu_features = CPU_FTRS_40X,
+ .cpu_user_features = PPC_FEATURE_32 |
+ PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
+ .mmu_features = MMU_FTR_TYPE_40x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .machine_check = machine_check_4xx,
+ .platform = "ppc405",
+ },
+ { /* 405EXr Rev. D without Security */
+ .pvr_mask = 0xffff000f,
.pvr_value = 0x12910000,
- .cpu_name = "405EXr",
+ .cpu_name = "405EXr Rev. D",
+ .cpu_features = CPU_FTRS_40X,
+ .cpu_user_features = PPC_FEATURE_32 |
+ PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
+ .mmu_features = MMU_FTR_TYPE_40x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .machine_check = machine_check_4xx,
+ .platform = "ppc405",
+ },
+ { /* 405EXr Rev. D with Security */
+ .pvr_mask = 0xffff000f,
+ .pvr_value = 0x12910002,
+ .cpu_name = "405EXr Rev. D",
.cpu_features = CPU_FTRS_40X,
.cpu_user_features = PPC_FEATURE_32 |
PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
diff --git a/arch/powerpc/kernel/crash_dump.c b/arch/powerpc/kernel/crash_dump.c
index 40f524643ba..8e05c16344e 100644
--- a/arch/powerpc/kernel/crash_dump.c
+++ b/arch/powerpc/kernel/crash_dump.c
@@ -128,9 +128,9 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf,
if (!csize)
return 0;
- csize = min(csize, PAGE_SIZE);
+ csize = min_t(size_t, csize, PAGE_SIZE);
- if (pfn < max_pfn) {
+ if ((min_low_pfn < pfn) && (pfn < max_pfn)) {
vaddr = __va(pfn << PAGE_SHIFT);
csize = copy_oldmem_vaddr(vaddr, buf, csize, offset, userbuf);
} else {
diff --git a/arch/powerpc/kernel/dbell.c b/arch/powerpc/kernel/dbell.c
index 1493734cd87..3307a52d797 100644
--- a/arch/powerpc/kernel/dbell.c
+++ b/arch/powerpc/kernel/dbell.c
@@ -13,32 +13,88 @@
#include <linux/kernel.h>
#include <linux/smp.h>
#include <linux/threads.h>
+#include <linux/percpu.h>
#include <asm/dbell.h>
+#include <asm/irq_regs.h>
#ifdef CONFIG_SMP
-unsigned long dbell_smp_message[NR_CPUS];
+struct doorbell_cpu_info {
+ unsigned long messages; /* current messages bits */
+ unsigned int tag; /* tag value */
+};
-void smp_dbell_message_pass(int target, int msg)
+static DEFINE_PER_CPU(struct doorbell_cpu_info, doorbell_cpu_info);
+
+void doorbell_setup_this_cpu(void)
+{
+ struct doorbell_cpu_info *info = &__get_cpu_var(doorbell_cpu_info);
+
+ info->messages = 0;
+ info->tag = mfspr(SPRN_PIR) & 0x3fff;
+}
+
+void doorbell_message_pass(int target, int msg)
{
+ struct doorbell_cpu_info *info;
int i;
- if(target < NR_CPUS) {
- set_bit(msg, &dbell_smp_message[target]);
- ppc_msgsnd(PPC_DBELL, 0, target);
+ if (target < NR_CPUS) {
+ info = &per_cpu(doorbell_cpu_info, target);
+ set_bit(msg, &info->messages);
+ ppc_msgsnd(PPC_DBELL, 0, info->tag);
}
- else if(target == MSG_ALL_BUT_SELF) {
+ else if (target == MSG_ALL_BUT_SELF) {
for_each_online_cpu(i) {
if (i == smp_processor_id())
continue;
- set_bit(msg, &dbell_smp_message[i]);
- ppc_msgsnd(PPC_DBELL, 0, i);
+ info = &per_cpu(doorbell_cpu_info, i);
+ set_bit(msg, &info->messages);
+ ppc_msgsnd(PPC_DBELL, 0, info->tag);
}
}
else { /* target == MSG_ALL */
- for_each_online_cpu(i)
- set_bit(msg, &dbell_smp_message[i]);
+ for_each_online_cpu(i) {
+ info = &per_cpu(doorbell_cpu_info, i);
+ set_bit(msg, &info->messages);
+ }
ppc_msgsnd(PPC_DBELL, PPC_DBELL_MSG_BRDCAST, 0);
}
}
-#endif
+
+void doorbell_exception(struct pt_regs *regs)
+{
+ struct pt_regs *old_regs = set_irq_regs(regs);
+ struct doorbell_cpu_info *info = &__get_cpu_var(doorbell_cpu_info);
+ int msg;
+
+ /* Warning: regs can be NULL when called from irq enable */
+
+ if (!info->messages || (num_online_cpus() < 2))
+ goto out;
+
+ for (msg = 0; msg < 4; msg++)
+ if (test_and_clear_bit(msg, &info->messages))
+ smp_message_recv(msg);
+
+out:
+ set_irq_regs(old_regs);
+}
+
+void doorbell_check_self(void)
+{
+ struct doorbell_cpu_info *info = &__get_cpu_var(doorbell_cpu_info);
+
+ if (!info->messages)
+ return;
+
+ ppc_msgsnd(PPC_DBELL, 0, info->tag);
+}
+
+#else /* CONFIG_SMP */
+void doorbell_exception(struct pt_regs *regs)
+{
+ printk(KERN_WARNING "Received doorbell on non-smp system\n");
+}
+#endif /* CONFIG_SMP */
+
diff --git a/arch/powerpc/kernel/dma-swiotlb.c b/arch/powerpc/kernel/dma-swiotlb.c
index 02f724f3675..4295e0b94b2 100644
--- a/arch/powerpc/kernel/dma-swiotlb.c
+++ b/arch/powerpc/kernel/dma-swiotlb.c
@@ -82,17 +82,9 @@ static struct notifier_block ppc_swiotlb_plat_bus_notifier = {
.priority = 0,
};
-static struct notifier_block ppc_swiotlb_of_bus_notifier = {
- .notifier_call = ppc_swiotlb_bus_notify,
- .priority = 0,
-};
-
int __init swiotlb_setup_bus_notifier(void)
{
bus_register_notifier(&platform_bus_type,
&ppc_swiotlb_plat_bus_notifier);
- bus_register_notifier(&of_platform_bus_type,
- &ppc_swiotlb_of_bus_notifier);
-
return 0;
}
diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S
index 24dcc0ecf24..5c43063d250 100644
--- a/arch/powerpc/kernel/exceptions-64e.S
+++ b/arch/powerpc/kernel/exceptions-64e.S
@@ -191,6 +191,12 @@ exc_##n##_bad_stack: \
sth r1,PACA_TRAP_SAVE(r13); /* store trap */ \
b bad_stack_book3e; /* bad stack error */
+/* WARNING: If you change the layout of this stub, make sure you chcek
+ * the debug exception handler which handles single stepping
+ * into exceptions from userspace, and the MM code in
+ * arch/powerpc/mm/tlb_nohash.c which patches the branch here
+ * and would need to be updated if that branch is moved
+ */
#define EXCEPTION_STUB(loc, label) \
. = interrupt_base_book3e + loc; \
nop; /* To make debug interrupts happy */ \
@@ -204,11 +210,30 @@ exc_##n##_bad_stack: \
lis r,TSR_FIS@h; \
mtspr SPRN_TSR,r
+/* Used by asynchronous interrupt that may happen in the idle loop.
+ *
+ * This check if the thread was in the idle loop, and if yes, returns
+ * to the caller rather than the PC. This is to avoid a race if
+ * interrupts happen before the wait instruction.
+ */
+#define CHECK_NAPPING() \
+ clrrdi r11,r1,THREAD_SHIFT; \
+ ld r10,TI_LOCAL_FLAGS(r11); \
+ andi. r9,r10,_TLF_NAPPING; \
+ beq+ 1f; \
+ ld r8,_LINK(r1); \
+ rlwinm r7,r10,0,~_TLF_NAPPING; \
+ std r8,_NIP(r1); \
+ std r7,TI_LOCAL_FLAGS(r11); \
+1:
+
+
#define MASKABLE_EXCEPTION(trapnum, label, hdlr, ack) \
START_EXCEPTION(label); \
NORMAL_EXCEPTION_PROLOG(trapnum, PROLOG_ADDITION_MASKABLE) \
EXCEPTION_COMMON(trapnum, PACA_EXGEN, INTS_DISABLE_ALL) \
ack(r8); \
+ CHECK_NAPPING(); \
addi r3,r1,STACK_FRAME_OVERHEAD; \
bl hdlr; \
b .ret_from_except_lite;
@@ -246,11 +271,9 @@ interrupt_base_book3e: /* fake trap */
EXCEPTION_STUB(0x1a0, watchdog) /* 0x09f0 */
EXCEPTION_STUB(0x1c0, data_tlb_miss)
EXCEPTION_STUB(0x1e0, instruction_tlb_miss)
+ EXCEPTION_STUB(0x280, doorbell)
+ EXCEPTION_STUB(0x2a0, doorbell_crit)
-#if 0
- EXCEPTION_STUB(0x280, processor_doorbell)
- EXCEPTION_STUB(0x220, processor_doorbell_crit)
-#endif
.globl interrupt_end_book3e
interrupt_end_book3e:
@@ -259,6 +282,7 @@ interrupt_end_book3e:
CRIT_EXCEPTION_PROLOG(0x100, PROLOG_ADDITION_NONE)
// EXCEPTION_COMMON(0x100, PACA_EXCRIT, INTS_DISABLE_ALL)
// bl special_reg_save_crit
+// CHECK_NAPPING();
// addi r3,r1,STACK_FRAME_OVERHEAD
// bl .critical_exception
// b ret_from_crit_except
@@ -270,6 +294,7 @@ interrupt_end_book3e:
// EXCEPTION_COMMON(0x200, PACA_EXMC, INTS_DISABLE_ALL)
// bl special_reg_save_mc
// addi r3,r1,STACK_FRAME_OVERHEAD
+// CHECK_NAPPING();
// bl .machine_check_exception
// b ret_from_mc_except
b .
@@ -340,6 +365,7 @@ interrupt_end_book3e:
CRIT_EXCEPTION_PROLOG(0x9f0, PROLOG_ADDITION_NONE)
// EXCEPTION_COMMON(0x9f0, PACA_EXCRIT, INTS_DISABLE_ALL)
// bl special_reg_save_crit
+// CHECK_NAPPING();
// addi r3,r1,STACK_FRAME_OVERHEAD
// bl .unknown_exception
// b ret_from_crit_except
@@ -428,6 +454,20 @@ interrupt_end_book3e:
kernel_dbg_exc:
b . /* NYI */
+/* Doorbell interrupt */
+ MASKABLE_EXCEPTION(0x2070, doorbell, .doorbell_exception, ACK_NONE)
+
+/* Doorbell critical Interrupt */
+ START_EXCEPTION(doorbell_crit);
+ CRIT_EXCEPTION_PROLOG(0x2080, PROLOG_ADDITION_NONE)
+// EXCEPTION_COMMON(0x2080, PACA_EXCRIT, INTS_DISABLE_ALL)
+// bl special_reg_save_crit
+// CHECK_NAPPING();
+// addi r3,r1,STACK_FRAME_OVERHEAD
+// bl .doorbell_critical_exception
+// b ret_from_crit_except
+ b .
+
/*
* An interrupt came in while soft-disabled; clear EE in SRR1,
@@ -563,6 +603,8 @@ BAD_STACK_TRAMPOLINE(0xd00)
BAD_STACK_TRAMPOLINE(0xe00)
BAD_STACK_TRAMPOLINE(0xf00)
BAD_STACK_TRAMPOLINE(0xf20)
+BAD_STACK_TRAMPOLINE(0x2070)
+BAD_STACK_TRAMPOLINE(0x2080)
.globl bad_stack_book3e
bad_stack_book3e:
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 3e423fbad6b..f53029a0155 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -828,6 +828,7 @@ END_FW_FTR_SECTION_IFCLR(FW_FEATURE_ISERIES)
/* We have a data breakpoint exception - handle it */
handle_dabr_fault:
+ bl .save_nvgprs
ld r4,_DAR(r1)
ld r5,_DSISR(r1)
addi r3,r1,STACK_FRAME_OVERHEAD
diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c
new file mode 100644
index 00000000000..5ecd0401cdb
--- /dev/null
+++ b/arch/powerpc/kernel/hw_breakpoint.c
@@ -0,0 +1,364 @@
+/*
+ * HW_breakpoint: a unified kernel/user-space hardware breakpoint facility,
+ * using the CPU's debug registers. Derived from
+ * "arch/x86/kernel/hw_breakpoint.c"
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright 2010 IBM Corporation
+ * Author: K.Prasad <prasad@linux.vnet.ibm.com>
+ *
+ */
+
+#include <linux/hw_breakpoint.h>
+#include <linux/notifier.h>
+#include <linux/kprobes.h>
+#include <linux/percpu.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/init.h>
+#include <linux/smp.h>
+
+#include <asm/hw_breakpoint.h>
+#include <asm/processor.h>
+#include <asm/sstep.h>
+#include <asm/uaccess.h>
+
+/*
+ * Stores the breakpoints currently in use on each breakpoint address
+ * register for every cpu
+ */
+static DEFINE_PER_CPU(struct perf_event *, bp_per_reg);
+
+/*
+ * Returns total number of data or instruction breakpoints available.
+ */
+int hw_breakpoint_slots(int type)
+{
+ if (type == TYPE_DATA)
+ return HBP_NUM;
+ return 0; /* no instruction breakpoints available */
+}
+
+/*
+ * Install a perf counter breakpoint.
+ *
+ * We seek a free debug address register and use it for this
+ * breakpoint.
+ *
+ * Atomic: we hold the counter->ctx->lock and we only handle variables
+ * and registers local to this cpu.
+ */
+int arch_install_hw_breakpoint(struct perf_event *bp)
+{
+ struct arch_hw_breakpoint *info = counter_arch_bp(bp);
+ struct perf_event **slot = &__get_cpu_var(bp_per_reg);
+
+ *slot = bp;
+
+ /*
+ * Do not install DABR values if the instruction must be single-stepped.
+ * If so, DABR will be populated in single_step_dabr_instruction().
+ */
+ if (current->thread.last_hit_ubp != bp)
+ set_dabr(info->address | info->type | DABR_TRANSLATION);
+
+ return 0;
+}
+
+/*
+ * Uninstall the breakpoint contained in the given counter.
+ *
+ * First we search the debug address register it uses and then we disable
+ * it.
+ *
+ * Atomic: we hold the counter->ctx->lock and we only handle variables
+ * and registers local to this cpu.
+ */
+void arch_uninstall_hw_breakpoint(struct perf_event *bp)
+{
+ struct perf_event **slot = &__get_cpu_var(bp_per_reg);
+
+ if (*slot != bp) {
+ WARN_ONCE(1, "Can't find the breakpoint");
+ return;
+ }
+
+ *slot = NULL;
+ set_dabr(0);
+}
+
+/*
+ * Perform cleanup of arch-specific counters during unregistration
+ * of the perf-event
+ */
+void arch_unregister_hw_breakpoint(struct perf_event *bp)
+{
+ /*
+ * If the breakpoint is unregistered between a hw_breakpoint_handler()
+ * and the single_step_dabr_instruction(), then cleanup the breakpoint
+ * restoration variables to prevent dangling pointers.
+ */
+ if (bp->ctx->task)
+ bp->ctx->task->thread.last_hit_ubp = NULL;
+}
+
+/*
+ * Check for virtual address in kernel space.
+ */
+int arch_check_bp_in_kernelspace(struct perf_event *bp)
+{
+ struct arch_hw_breakpoint *info = counter_arch_bp(bp);
+
+ return is_kernel_addr(info->address);
+}
+
+int arch_bp_generic_fields(int type, int *gen_bp_type)
+{
+ switch (type) {
+ case DABR_DATA_READ:
+ *gen_bp_type = HW_BREAKPOINT_R;
+ break;
+ case DABR_DATA_WRITE:
+ *gen_bp_type = HW_BREAKPOINT_W;
+ break;
+ case (DABR_DATA_WRITE | DABR_DATA_READ):
+ *gen_bp_type = (HW_BREAKPOINT_W | HW_BREAKPOINT_R);
+ break;
+ default:
+ return -EINVAL;
+ }
+ return 0;
+}
+
+/*
+ * Validate the arch-specific HW Breakpoint register settings
+ */
+int arch_validate_hwbkpt_settings(struct perf_event *bp)
+{
+ int ret = -EINVAL;
+ struct arch_hw_breakpoint *info = counter_arch_bp(bp);
+
+ if (!bp)
+ return ret;
+
+ switch (bp->attr.bp_type) {
+ case HW_BREAKPOINT_R:
+ info->type = DABR_DATA_READ;
+ break;
+ case HW_BREAKPOINT_W:
+ info->type = DABR_DATA_WRITE;
+ break;
+ case HW_BREAKPOINT_R | HW_BREAKPOINT_W:
+ info->type = (DABR_DATA_READ | DABR_DATA_WRITE);
+ break;
+ default:
+ return ret;
+ }
+
+ info->address = bp->attr.bp_addr;
+ info->len = bp->attr.bp_len;
+
+ /*
+ * Since breakpoint length can be a maximum of HW_BREAKPOINT_LEN(8)
+ * and breakpoint addresses are aligned to nearest double-word
+ * HW_BREAKPOINT_ALIGN by rounding off to the lower address, the
+ * 'symbolsize' should satisfy the check below.
+ */
+ if (info->len >
+ (HW_BREAKPOINT_LEN - (info->address & HW_BREAKPOINT_ALIGN)))
+ return -EINVAL;
+ return 0;
+}
+
+/*
+ * Restores the breakpoint on the debug registers.
+ * Invoke this function if it is known that the execution context is
+ * about to change to cause loss of MSR_SE settings.
+ */
+void thread_change_pc(struct task_struct *tsk, struct pt_regs *regs)
+{
+ struct arch_hw_breakpoint *info;
+
+ if (likely(!tsk->thread.last_hit_ubp))
+ return;
+
+ info = counter_arch_bp(tsk->thread.last_hit_ubp);
+ regs->msr &= ~MSR_SE;
+ set_dabr(info->address | info->type | DABR_TRANSLATION);
+ tsk->thread.last_hit_ubp = NULL;
+}
+
+/*
+ * Handle debug exception notifications.
+ */
+int __kprobes hw_breakpoint_handler(struct die_args *args)
+{
+ int rc = NOTIFY_STOP;
+ struct perf_event *bp;
+ struct pt_regs *regs = args->regs;
+ int stepped = 1;
+ struct arch_hw_breakpoint *info;
+ unsigned int instr;
+ unsigned long dar = regs->dar;
+
+ /* Disable breakpoints during exception handling */
+ set_dabr(0);
+
+ /*
+ * The counter may be concurrently released but that can only
+ * occur from a call_rcu() path. We can then safely fetch
+ * the breakpoint, use its callback, touch its counter
+ * while we are in an rcu_read_lock() path.
+ */
+ rcu_read_lock();
+
+ bp = __get_cpu_var(bp_per_reg);
+ if (!bp)
+ goto out;
+ info = counter_arch_bp(bp);
+
+ /*
+ * Return early after invoking user-callback function without restoring
+ * DABR if the breakpoint is from ptrace which always operates in
+ * one-shot mode. The ptrace-ed process will receive the SIGTRAP signal
+ * generated in do_dabr().
+ */
+ if (bp->overflow_handler == ptrace_triggered) {
+ perf_bp_event(bp, regs);
+ rc = NOTIFY_DONE;
+ goto out;
+ }
+
+ /*
+ * Verify if dar lies within the address range occupied by the symbol
+ * being watched to filter extraneous exceptions. If it doesn't,
+ * we still need to single-step the instruction, but we don't
+ * generate an event.
+ */
+ info->extraneous_interrupt = !((bp->attr.bp_addr <= dar) &&
+ (dar - bp->attr.bp_addr < bp->attr.bp_len));
+
+ /* Do not emulate user-space instructions, instead single-step them */
+ if (user_mode(regs)) {
+ bp->ctx->task->thread.last_hit_ubp = bp;
+ regs->msr |= MSR_SE;
+ goto out;
+ }
+
+ stepped = 0;
+ instr = 0;
+ if (!__get_user_inatomic(instr, (unsigned int *) regs->nip))
+ stepped = emulate_step(regs, instr);
+
+ /*
+ * emulate_step() could not execute it. We've failed in reliably
+ * handling the hw-breakpoint. Unregister it and throw a warning
+ * message to let the user know about it.
+ */
+ if (!stepped) {
+ WARN(1, "Unable to handle hardware breakpoint. Breakpoint at "
+ "0x%lx will be disabled.", info->address);
+ perf_event_disable(bp);
+ goto out;
+ }
+ /*
+ * As a policy, the callback is invoked in a 'trigger-after-execute'
+ * fashion
+ */
+ if (!info->extraneous_interrupt)
+ perf_bp_event(bp, regs);
+
+ set_dabr(info->address | info->type | DABR_TRANSLATION);
+out:
+ rcu_read_unlock();
+ return rc;
+}
+
+/*
+ * Handle single-step exceptions following a DABR hit.
+ */
+int __kprobes single_step_dabr_instruction(struct die_args *args)
+{
+ struct pt_regs *regs = args->regs;
+ struct perf_event *bp = NULL;
+ struct arch_hw_breakpoint *bp_info;
+
+ bp = current->thread.last_hit_ubp;
+ /*
+ * Check if we are single-stepping as a result of a
+ * previous HW Breakpoint exception
+ */
+ if (!bp)
+ return NOTIFY_DONE;
+
+ bp_info = counter_arch_bp(bp);
+
+ /*
+ * We shall invoke the user-defined callback function in the single
+ * stepping handler to confirm to 'trigger-after-execute' semantics
+ */
+ if (!bp_info->extraneous_interrupt)
+ perf_bp_event(bp, regs);
+
+ set_dabr(bp_info->address | bp_info->type | DABR_TRANSLATION);
+ current->thread.last_hit_ubp = NULL;
+
+ /*
+ * If the process was being single-stepped by ptrace, let the
+ * other single-step actions occur (e.g. generate SIGTRAP).
+ */
+ if (test_thread_flag(TIF_SINGLESTEP))
+ return NOTIFY_DONE;
+
+ return NOTIFY_STOP;
+}
+
+/*
+ * Handle debug exception notifications.
+ */
+int __kprobes hw_breakpoint_exceptions_notify(
+ struct notifier_block *unused, unsigned long val, void *data)
+{
+ int ret = NOTIFY_DONE;
+
+ switch (val) {
+ case DIE_DABR_MATCH:
+ ret = hw_breakpoint_handler(data);
+ break;
+ case DIE_SSTEP:
+ ret = single_step_dabr_instruction(data);
+ break;
+ }
+
+ return ret;
+}
+
+/*
+ * Release the user breakpoints used by ptrace
+ */
+void flush_ptrace_hw_breakpoint(struct task_struct *tsk)
+{
+ struct thread_struct *t = &tsk->thread;
+
+ unregister_hw_breakpoint(t->ptrace_bps[0]);
+ t->ptrace_bps[0] = NULL;
+}
+
+void hw_breakpoint_pmu_read(struct perf_event *bp)
+{
+ /* TODO */
+}
diff --git a/arch/powerpc/kernel/ibmebus.c b/arch/powerpc/kernel/ibmebus.c
index 21266abfbda..9b626cfffce 100644
--- a/arch/powerpc/kernel/ibmebus.c
+++ b/arch/powerpc/kernel/ibmebus.c
@@ -140,19 +140,19 @@ static struct dma_map_ops ibmebus_dma_ops = {
static int ibmebus_match_path(struct device *dev, void *data)
{
- struct device_node *dn = to_of_device(dev)->dev.of_node;
+ struct device_node *dn = to_platform_device(dev)->dev.of_node;
return (dn->full_name &&
(strcasecmp((char *)data, dn->full_name) == 0));
}
static int ibmebus_match_node(struct device *dev, void *data)
{
- return to_of_device(dev)->dev.of_node == data;
+ return to_platform_device(dev)->dev.of_node == data;
}
static int ibmebus_create_device(struct device_node *dn)
{
- struct of_device *dev;
+ struct platform_device *dev;
int ret;
dev = of_device_alloc(dn, NULL, &ibmebus_bus_device);
@@ -298,7 +298,7 @@ static ssize_t ibmebus_store_remove(struct bus_type *bus,
if ((dev = bus_find_device(&ibmebus_bus_type, NULL, path,
ibmebus_match_path))) {
- of_device_unregister(to_of_device(dev));
+ of_device_unregister(to_platform_device(dev));
kfree(path);
return count;
diff --git a/arch/powerpc/kernel/idle_book3e.S b/arch/powerpc/kernel/idle_book3e.S
new file mode 100644
index 00000000000..16c002d6bdf
--- /dev/null
+++ b/arch/powerpc/kernel/idle_book3e.S
@@ -0,0 +1,86 @@
+/*
+ * Copyright 2010 IBM Corp, Benjamin Herrenschmidt <benh@kernel.crashing.org>
+ *
+ * Generic idle routine for Book3E processors
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/threads.h>
+#include <asm/reg.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/ppc-opcode.h>
+#include <asm/processor.h>
+#include <asm/thread_info.h>
+
+/* 64-bit version only for now */
+#ifdef CONFIG_PPC64
+
+_GLOBAL(book3e_idle)
+ /* Save LR for later */
+ mflr r0
+ std r0,16(r1)
+
+ /* Hard disable interrupts */
+ wrteei 0
+
+ /* Now check if an interrupt came in while we were soft disabled
+ * since we may otherwise lose it (doorbells etc...). We know
+ * that since PACAHARDIRQEN will have been cleared in that case.
+ */
+ lbz r3,PACAHARDIRQEN(r13)
+ cmpwi cr0,r3,0
+ beqlr
+
+ /* Now we are going to mark ourselves as soft and hard enables in
+ * order to be able to take interrupts while asleep. We inform lockdep
+ * of that. We don't actually turn interrupts on just yet tho.
+ */
+#ifdef CONFIG_TRACE_IRQFLAGS
+ stdu r1,-128(r1)
+ bl .trace_hardirqs_on
+#endif
+ li r0,1
+ stb r0,PACASOFTIRQEN(r13)
+ stb r0,PACAHARDIRQEN(r13)
+
+ /* Interrupts will make use return to LR, so get something we want
+ * in there
+ */
+ bl 1f
+
+ /* Hard disable interrupts again */
+ wrteei 0
+
+ /* Mark them off again in the PACA as well */
+ li r0,0
+ stb r0,PACASOFTIRQEN(r13)
+ stb r0,PACAHARDIRQEN(r13)
+
+ /* Tell lockdep about it */
+#ifdef CONFIG_TRACE_IRQFLAGS
+ bl .trace_hardirqs_off
+ addi r1,r1,128
+#endif
+ ld r0,16(r1)
+ mtlr r0
+ blr
+
+1: /* Let's set the _TLF_NAPPING flag so interrupts make us return
+ * to the right spot
+ */
+ clrrdi r11,r1,THREAD_SHIFT
+ ld r10,TI_LOCAL_FLAGS(r11)
+ ori r10,r10,_TLF_NAPPING
+ std r10,TI_LOCAL_FLAGS(r11)
+
+ /* We can now re-enable hard interrupts and go to sleep */
+ wrteei 1
+1: PPC_WAIT(0)
+ b 1b
+
+#endif /* CONFIG_PPC64 */
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 77be3d058a6..d3ce67cf03b 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -53,6 +53,8 @@
#include <linux/bootmem.h>
#include <linux/pci.h>
#include <linux/debugfs.h>
+#include <linux/of.h>
+#include <linux/of_irq.h>
#include <asm/uaccess.h>
#include <asm/system.h>
@@ -64,6 +66,8 @@
#include <asm/ptrace.h>
#include <asm/machdep.h>
#include <asm/udbg.h>
+#include <asm/dbell.h>
+
#ifdef CONFIG_PPC64
#include <asm/paca.h>
#include <asm/firmware.h>
@@ -153,14 +157,28 @@ notrace void raw_local_irq_restore(unsigned long en)
if (get_hard_enabled())
return;
+#if defined(CONFIG_BOOKE) && defined(CONFIG_SMP)
+ /* Check for pending doorbell interrupts and resend to ourself */
+ doorbell_check_self();
+#endif
+
/*
* Need to hard-enable interrupts here. Since currently disabled,
* no need to take further asm precautions against preemption; but
* use local_paca instead of get_paca() to avoid preemption checking.
*/
local_paca->hard_enabled = en;
+
+#ifndef CONFIG_BOOKE
+ /* On server, re-trigger the decrementer if it went negative since
+ * some processors only trigger on edge transitions of the sign bit.
+ *
+ * BookE has a level sensitive decrementer (latches in TSR) so we
+ * don't need that
+ */
if ((int)mfspr(SPRN_DEC) < 0)
mtspr(SPRN_DEC, 1);
+#endif /* CONFIG_BOOKE */
/*
* Force the delivery of pending soft-disabled interrupts on PS3.
@@ -804,18 +822,6 @@ unsigned int irq_create_of_mapping(struct device_node *controller,
}
EXPORT_SYMBOL_GPL(irq_create_of_mapping);
-unsigned int irq_of_parse_and_map(struct device_node *dev, int index)
-{
- struct of_irq oirq;
-
- if (of_irq_map_one(dev, index, &oirq))
- return NO_IRQ;
-
- return irq_create_of_mapping(oirq.controller, oirq.specifier,
- oirq.size);
-}
-EXPORT_SYMBOL_GPL(irq_of_parse_and_map);
-
void irq_dispose_mapping(unsigned int virq)
{
struct irq_host *host;
diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c
index 82a7b228c81..7f61a3ac787 100644
--- a/arch/powerpc/kernel/kgdb.c
+++ b/arch/powerpc/kernel/kgdb.c
@@ -129,7 +129,7 @@ static int kgdb_handle_breakpoint(struct pt_regs *regs)
return 0;
if (*(u32 *) (regs->nip) == *(u32 *) (&arch_kgdb_ops.gdb_bpt_instr))
- regs->nip += 4;
+ regs->nip += BREAK_INSTR_SIZE;
return 1;
}
diff --git a/arch/powerpc/kernel/legacy_serial.c b/arch/powerpc/kernel/legacy_serial.c
index 035ada5443e..c1fd0f9658f 100644
--- a/arch/powerpc/kernel/legacy_serial.c
+++ b/arch/powerpc/kernel/legacy_serial.c
@@ -4,6 +4,7 @@
#include <linux/serial_core.h>
#include <linux/console.h>
#include <linux/pci.h>
+#include <linux/of_address.h>
#include <linux/of_device.h>
#include <asm/io.h>
#include <asm/mmu.h>
diff --git a/arch/powerpc/kernel/machine_kexec.c b/arch/powerpc/kernel/machine_kexec.c
index 89f005116aa..dd6c141f166 100644
--- a/arch/powerpc/kernel/machine_kexec.c
+++ b/arch/powerpc/kernel/machine_kexec.c
@@ -45,6 +45,18 @@ void machine_kexec_cleanup(struct kimage *image)
ppc_md.machine_kexec_cleanup(image);
}
+void arch_crash_save_vmcoreinfo(void)
+{
+
+#ifdef CONFIG_NEED_MULTIPLE_NODES
+ VMCOREINFO_SYMBOL(node_data);
+ VMCOREINFO_LENGTH(node_data, MAX_NUMNODES);
+#endif
+#ifndef CONFIG_NEED_MULTIPLE_NODES
+ VMCOREINFO_SYMBOL(contig_page_data);
+#endif
+}
+
/*
* Do not allocate memory (or fail in any way) in machine_kexec().
* We are past the point of no return, committed to rebooting now.
@@ -144,24 +156,24 @@ int overlaps_crashkernel(unsigned long start, unsigned long size)
}
/* Values we need to export to the second kernel via the device tree. */
-static unsigned long kernel_end;
-static unsigned long crashk_size;
+static phys_addr_t kernel_end;
+static phys_addr_t crashk_size;
static struct property kernel_end_prop = {
.name = "linux,kernel-end",
- .length = sizeof(unsigned long),
+ .length = sizeof(phys_addr_t),
.value = &kernel_end,
};
static struct property crashk_base_prop = {
.name = "linux,crashkernel-base",
- .length = sizeof(unsigned long),
+ .length = sizeof(phys_addr_t),
.value = &crashk_res.start,
};
static struct property crashk_size_prop = {
.name = "linux,crashkernel-size",
- .length = sizeof(unsigned long),
+ .length = sizeof(phys_addr_t),
.value = &crashk_size,
};
diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c
index ed31a29c4ff..583af70c4b1 100644
--- a/arch/powerpc/kernel/machine_kexec_64.c
+++ b/arch/powerpc/kernel/machine_kexec_64.c
@@ -15,6 +15,8 @@
#include <linux/thread_info.h>
#include <linux/init_task.h>
#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/cpu.h>
#include <asm/page.h>
#include <asm/current.h>
@@ -25,6 +27,7 @@
#include <asm/sections.h> /* _end */
#include <asm/prom.h>
#include <asm/smp.h>
+#include <asm/hw_breakpoint.h>
int default_machine_kexec_prepare(struct kimage *image)
{
@@ -165,6 +168,7 @@ static void kexec_smp_down(void *arg)
while(kexec_all_irq_disabled == 0)
cpu_relax();
mb(); /* make sure all irqs are disabled before this */
+ hw_breakpoint_disable();
/*
* Now every CPU has IRQs off, we can clear out any pending
* IPIs and be sure that no more will come in after this.
@@ -180,8 +184,22 @@ static void kexec_prepare_cpus_wait(int wait_state)
{
int my_cpu, i, notified=-1;
+ hw_breakpoint_disable();
my_cpu = get_cpu();
- /* Make sure each CPU has atleast made it to the state we need */
+ /* Make sure each CPU has at least made it to the state we need.
+ *
+ * FIXME: There is a (slim) chance of a problem if not all of the CPUs
+ * are correctly onlined. If somehow we start a CPU on boot with RTAS
+ * start-cpu, but somehow that CPU doesn't write callin_cpu_map[] in
+ * time, the boot CPU will timeout. If it does eventually execute
+ * stuff, the secondary will start up (paca[].cpu_start was written) and
+ * get into a peculiar state. If the platform supports
+ * smp_ops->take_timebase(), the secondary CPU will probably be spinning
+ * in there. If not (i.e. pseries), the secondary will continue on and
+ * try to online itself/idle/etc. If it survives that, we need to find
+ * these possible-but-not-online-but-should-be CPUs and chaperone them
+ * into kexec_smp_wait().
+ */
for_each_online_cpu(i) {
if (i == my_cpu)
continue;
@@ -189,9 +207,9 @@ static void kexec_prepare_cpus_wait(int wait_state)
while (paca[i].kexec_state < wait_state) {
barrier();
if (i != notified) {
- printk( "kexec: waiting for cpu %d (physical"
- " %d) to enter %i state\n",
- i, paca[i].hw_cpu_id, wait_state);
+ printk(KERN_INFO "kexec: waiting for cpu %d "
+ "(physical %d) to enter %i state\n",
+ i, paca[i].hw_cpu_id, wait_state);
notified = i;
}
}
@@ -199,9 +217,32 @@ static void kexec_prepare_cpus_wait(int wait_state)
mb();
}
-static void kexec_prepare_cpus(void)
+/*
+ * We need to make sure each present CPU is online. The next kernel will scan
+ * the device tree and assume primary threads are online and query secondary
+ * threads via RTAS to online them if required. If we don't online primary
+ * threads, they will be stuck. However, we also online secondary threads as we
+ * may be using 'cede offline'. In this case RTAS doesn't see the secondary
+ * threads as offline -- and again, these CPUs will be stuck.
+ *
+ * So, we online all CPUs that should be running, including secondary threads.
+ */
+static void wake_offline_cpus(void)
{
+ int cpu = 0;
+ for_each_present_cpu(cpu) {
+ if (!cpu_online(cpu)) {
+ printk(KERN_INFO "kexec: Waking offline cpu %d.\n",
+ cpu);
+ cpu_up(cpu);
+ }
+ }
+}
+
+static void kexec_prepare_cpus(void)
+{
+ wake_offline_cpus();
smp_call_function(kexec_smp_down, NULL, /* wait */0);
local_irq_disable();
mb(); /* make sure IRQs are disabled before we say they are */
@@ -215,7 +256,10 @@ static void kexec_prepare_cpus(void)
if (ppc_md.kexec_cpu_down)
ppc_md.kexec_cpu_down(0, 0);
- /* Before removing MMU mapings make sure all CPUs have entered real mode */
+ /*
+ * Before removing MMU mappings make sure all CPUs have entered real
+ * mode:
+ */
kexec_prepare_cpus_wait(KEXEC_STATE_REAL_MODE);
put_cpu();
@@ -257,6 +301,12 @@ static void kexec_prepare_cpus(void)
static union thread_union kexec_stack __init_task_data =
{ };
+/*
+ * For similar reasons to the stack above, the kexecing CPU needs to be on a
+ * static PACA; we switch to kexec_paca.
+ */
+struct paca_struct kexec_paca;
+
/* Our assembly helper, in kexec_stub.S */
extern NORET_TYPE void kexec_sequence(void *newstack, unsigned long start,
void *image, void *control,
@@ -278,12 +328,28 @@ void default_machine_kexec(struct kimage *image)
if (crashing_cpu == -1)
kexec_prepare_cpus();
+ pr_debug("kexec: Starting switchover sequence.\n");
+
/* switch to a staticly allocated stack. Based on irq stack code.
* XXX: the task struct will likely be invalid once we do the copy!
*/
kexec_stack.thread_info.task = current_thread_info()->task;
kexec_stack.thread_info.flags = 0;
+ /* We need a static PACA, too; copy this CPU's PACA over and switch to
+ * it. Also poison per_cpu_offset to catch anyone using non-static
+ * data.
+ */
+ memcpy(&kexec_paca, get_paca(), sizeof(struct paca_struct));
+ kexec_paca.data_offset = 0xedeaddeadeeeeeeeUL;
+ paca = (struct paca_struct *)RELOC_HIDE(&kexec_paca, 0) -
+ kexec_paca.paca_index;
+ setup_paca(&kexec_paca);
+
+ /* XXX: If anyone does 'dynamic lppacas' this will also need to be
+ * switched to a static version!
+ */
+
/* Some things are best done in assembly. Finding globals with
* a toc is easier in C, so pass in what we can.
*/
diff --git a/arch/powerpc/kernel/misc.S b/arch/powerpc/kernel/misc.S
index 22e507c8a55..2d29752cbe1 100644
--- a/arch/powerpc/kernel/misc.S
+++ b/arch/powerpc/kernel/misc.S
@@ -127,29 +127,3 @@ _GLOBAL(__setup_cpu_power7)
_GLOBAL(__restore_cpu_power7)
/* place holder */
blr
-
-/*
- * Get a minimal set of registers for our caller's nth caller.
- * r3 = regs pointer, r5 = n.
- *
- * We only get R1 (stack pointer), NIP (next instruction pointer)
- * and LR (link register). These are all we can get in the
- * general case without doing complicated stack unwinding, but
- * fortunately they are enough to do a stack backtrace, which
- * is all we need them for.
- */
-_GLOBAL(perf_arch_fetch_caller_regs)
- mr r6,r1
- cmpwi r5,0
- mflr r4
- ble 2f
- mtctr r5
-1: PPC_LL r6,0(r6)
- bdnz 1b
- PPC_LL r4,PPC_LR_STKOFF(r6)
-2: PPC_LL r7,0(r6)
- PPC_LL r7,PPC_LR_STKOFF(r7)
- PPC_STL r6,GPR1-STACK_FRAME_OVERHEAD(r3)
- PPC_STL r4,_NIP-STACK_FRAME_OVERHEAD(r3)
- PPC_STL r7,_LINK-STACK_FRAME_OVERHEAD(r3)
- blr
diff --git a/arch/powerpc/kernel/of_device.c b/arch/powerpc/kernel/of_device.c
deleted file mode 100644
index df78e0236a0..00000000000
--- a/arch/powerpc/kernel/of_device.c
+++ /dev/null
@@ -1,133 +0,0 @@
-#include <linux/string.h>
-#include <linux/kernel.h>
-#include <linux/of.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/mod_devicetable.h>
-#include <linux/slab.h>
-#include <linux/of_device.h>
-
-#include <asm/errno.h>
-#include <asm/dcr.h>
-
-static void of_device_make_bus_id(struct of_device *dev)
-{
- static atomic_t bus_no_reg_magic;
- struct device_node *node = dev->dev.of_node;
- const u32 *reg;
- u64 addr;
- int magic;
-
- /*
- * If it's a DCR based device, use 'd' for native DCRs
- * and 'D' for MMIO DCRs.
- */
-#ifdef CONFIG_PPC_DCR
- reg = of_get_property(node, "dcr-reg", NULL);
- if (reg) {
-#ifdef CONFIG_PPC_DCR_NATIVE
- dev_set_name(&dev->dev, "d%x.%s", *reg, node->name);
-#else /* CONFIG_PPC_DCR_NATIVE */
- addr = of_translate_dcr_address(node, *reg, NULL);
- if (addr != OF_BAD_ADDR) {
- dev_set_name(&dev->dev, "D%llx.%s",
- (unsigned long long)addr, node->name);
- return;
- }
-#endif /* !CONFIG_PPC_DCR_NATIVE */
- }
-#endif /* CONFIG_PPC_DCR */
-
- /*
- * For MMIO, get the physical address
- */
- reg = of_get_property(node, "reg", NULL);
- if (reg) {
- addr = of_translate_address(node, reg);
- if (addr != OF_BAD_ADDR) {
- dev_set_name(&dev->dev, "%llx.%s",
- (unsigned long long)addr, node->name);
- return;
- }
- }
-
- /*
- * No BusID, use the node name and add a globally incremented
- * counter (and pray...)
- */
- magic = atomic_add_return(1, &bus_no_reg_magic);
- dev_set_name(&dev->dev, "%s.%d", node->name, magic - 1);
-}
-
-struct of_device *of_device_alloc(struct device_node *np,
- const char *bus_id,
- struct device *parent)
-{
- struct of_device *dev;
-
- dev = kzalloc(sizeof(*dev), GFP_KERNEL);
- if (!dev)
- return NULL;
-
- dev->dev.of_node = of_node_get(np);
- dev->dev.dma_mask = &dev->archdata.dma_mask;
- dev->dev.parent = parent;
- dev->dev.release = of_release_dev;
-
- if (bus_id)
- dev_set_name(&dev->dev, "%s", bus_id);
- else
- of_device_make_bus_id(dev);
-
- return dev;
-}
-EXPORT_SYMBOL(of_device_alloc);
-
-int of_device_uevent(struct device *dev, struct kobj_uevent_env *env)
-{
- struct of_device *ofdev;
- const char *compat;
- int seen = 0, cplen, sl;
-
- if (!dev)
- return -ENODEV;
-
- ofdev = to_of_device(dev);
-
- if (add_uevent_var(env, "OF_NAME=%s", ofdev->dev.of_node->name))
- return -ENOMEM;
-
- if (add_uevent_var(env, "OF_TYPE=%s", ofdev->dev.of_node->type))
- return -ENOMEM;
-
- /* Since the compatible field can contain pretty much anything
- * it's not really legal to split it out with commas. We split it
- * up using a number of environment variables instead. */
-
- compat = of_get_property(ofdev->dev.of_node, "compatible", &cplen);
- while (compat && *compat && cplen > 0) {
- if (add_uevent_var(env, "OF_COMPATIBLE_%d=%s", seen, compat))
- return -ENOMEM;
-
- sl = strlen (compat) + 1;
- compat += sl;
- cplen -= sl;
- seen++;
- }
-
- if (add_uevent_var(env, "OF_COMPATIBLE_N=%d", seen))
- return -ENOMEM;
-
- /* modalias is trickier, we add it in 2 steps */
- if (add_uevent_var(env, "MODALIAS="))
- return -ENOMEM;
- sl = of_device_get_modalias(ofdev, &env->buf[env->buflen-1],
- sizeof(env->buf) - env->buflen);
- if (sl >= (sizeof(env->buf) - env->buflen))
- return -ENOMEM;
- env->buflen += sl;
-
- return 0;
-}
-EXPORT_SYMBOL(of_device_uevent);
-EXPORT_SYMBOL(of_device_get_modalias);
diff --git a/arch/powerpc/kernel/of_platform.c b/arch/powerpc/kernel/of_platform.c
index 487a98851ba..b2c363ef38a 100644
--- a/arch/powerpc/kernel/of_platform.c
+++ b/arch/powerpc/kernel/of_platform.c
@@ -28,207 +28,6 @@
#include <asm/ppc-pci.h>
#include <asm/atomic.h>
-/*
- * The list of OF IDs below is used for matching bus types in the
- * system whose devices are to be exposed as of_platform_devices.
- *
- * This is the default list valid for most platforms. This file provides
- * functions who can take an explicit list if necessary though
- *
- * The search is always performed recursively looking for children of
- * the provided device_node and recursively if such a children matches
- * a bus type in the list
- */
-
-static const struct of_device_id of_default_bus_ids[] = {
- { .type = "soc", },
- { .compatible = "soc", },
- { .type = "spider", },
- { .type = "axon", },
- { .type = "plb5", },
- { .type = "plb4", },
- { .type = "opb", },
- { .type = "ebc", },
- {},
-};
-
-struct bus_type of_platform_bus_type = {
- .uevent = of_device_uevent,
-};
-EXPORT_SYMBOL(of_platform_bus_type);
-
-static int __init of_bus_driver_init(void)
-{
- return of_bus_type_init(&of_platform_bus_type, "of_platform");
-}
-
-postcore_initcall(of_bus_driver_init);
-
-struct of_device* of_platform_device_create(struct device_node *np,
- const char *bus_id,
- struct device *parent)
-{
- struct of_device *dev;
-
- dev = of_device_alloc(np, bus_id, parent);
- if (!dev)
- return NULL;
-
- dev->archdata.dma_mask = 0xffffffffUL;
- dev->dev.coherent_dma_mask = DMA_BIT_MASK(32);
-
- dev->dev.bus = &of_platform_bus_type;
-
- /* We do not fill the DMA ops for platform devices by default.
- * This is currently the responsibility of the platform code
- * to do such, possibly using a device notifier
- */
-
- if (of_device_register(dev) != 0) {
- of_device_free(dev);
- return NULL;
- }
-
- return dev;
-}
-EXPORT_SYMBOL(of_platform_device_create);
-
-
-
-/**
- * of_platform_bus_create - Create an OF device for a bus node and all its
- * children. Optionally recursively instanciate matching busses.
- * @bus: device node of the bus to instanciate
- * @matches: match table, NULL to use the default, OF_NO_DEEP_PROBE to
- * disallow recursive creation of child busses
- */
-static int of_platform_bus_create(const struct device_node *bus,
- const struct of_device_id *matches,
- struct device *parent)
-{
- struct device_node *child;
- struct of_device *dev;
- int rc = 0;
-
- for_each_child_of_node(bus, child) {
- pr_debug(" create child: %s\n", child->full_name);
- dev = of_platform_device_create(child, NULL, parent);
- if (dev == NULL)
- rc = -ENOMEM;
- else if (!of_match_node(matches, child))
- continue;
- if (rc == 0) {
- pr_debug(" and sub busses\n");
- rc = of_platform_bus_create(child, matches, &dev->dev);
- } if (rc) {
- of_node_put(child);
- break;
- }
- }
- return rc;
-}
-
-/**
- * of_platform_bus_probe - Probe the device-tree for platform busses
- * @root: parent of the first level to probe or NULL for the root of the tree
- * @matches: match table, NULL to use the default
- * @parent: parent to hook devices from, NULL for toplevel
- *
- * Note that children of the provided root are not instanciated as devices
- * unless the specified root itself matches the bus list and is not NULL.
- */
-
-int of_platform_bus_probe(struct device_node *root,
- const struct of_device_id *matches,
- struct device *parent)
-{
- struct device_node *child;
- struct of_device *dev;
- int rc = 0;
-
- if (matches == NULL)
- matches = of_default_bus_ids;
- if (matches == OF_NO_DEEP_PROBE)
- return -EINVAL;
- if (root == NULL)
- root = of_find_node_by_path("/");
- else
- of_node_get(root);
-
- pr_debug("of_platform_bus_probe()\n");
- pr_debug(" starting at: %s\n", root->full_name);
-
- /* Do a self check of bus type, if there's a match, create
- * children
- */
- if (of_match_node(matches, root)) {
- pr_debug(" root match, create all sub devices\n");
- dev = of_platform_device_create(root, NULL, parent);
- if (dev == NULL) {
- rc = -ENOMEM;
- goto bail;
- }
- pr_debug(" create all sub busses\n");
- rc = of_platform_bus_create(root, matches, &dev->dev);
- goto bail;
- }
- for_each_child_of_node(root, child) {
- if (!of_match_node(matches, child))
- continue;
-
- pr_debug(" match: %s\n", child->full_name);
- dev = of_platform_device_create(child, NULL, parent);
- if (dev == NULL)
- rc = -ENOMEM;
- else
- rc = of_platform_bus_create(child, matches, &dev->dev);
- if (rc) {
- of_node_put(child);
- break;
- }
- }
- bail:
- of_node_put(root);
- return rc;
-}
-EXPORT_SYMBOL(of_platform_bus_probe);
-
-static int of_dev_node_match(struct device *dev, void *data)
-{
- return to_of_device(dev)->dev.of_node == data;
-}
-
-struct of_device *of_find_device_by_node(struct device_node *np)
-{
- struct device *dev;
-
- dev = bus_find_device(&of_platform_bus_type,
- NULL, np, of_dev_node_match);
- if (dev)
- return to_of_device(dev);
- return NULL;
-}
-EXPORT_SYMBOL(of_find_device_by_node);
-
-static int of_dev_phandle_match(struct device *dev, void *data)
-{
- phandle *ph = data;
- return to_of_device(dev)->dev.of_node->phandle == *ph;
-}
-
-struct of_device *of_find_device_by_phandle(phandle ph)
-{
- struct device *dev;
-
- dev = bus_find_device(&of_platform_bus_type,
- NULL, &ph, of_dev_phandle_match);
- if (dev)
- return to_of_device(dev);
- return NULL;
-}
-EXPORT_SYMBOL(of_find_device_by_phandle);
-
-
#ifdef CONFIG_PPC_OF_PLATFORM_PCI
/* The probing of PCI controllers from of_platform is currently
@@ -237,7 +36,7 @@ EXPORT_SYMBOL(of_find_device_by_phandle);
* lacking some bits needed here.
*/
-static int __devinit of_pci_phb_probe(struct of_device *dev,
+static int __devinit of_pci_phb_probe(struct platform_device *dev,
const struct of_device_id *match)
{
struct pci_controller *phb;
diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
index 139a773853f..d0a26f1770f 100644
--- a/arch/powerpc/kernel/paca.c
+++ b/arch/powerpc/kernel/paca.c
@@ -105,6 +105,16 @@ void __init initialise_paca(struct paca_struct *new_paca, int cpu)
#endif /* CONFIG_PPC_STD_MMU_64 */
}
+/* Put the paca pointer into r13 and SPRG_PACA */
+void setup_paca(struct paca_struct *new_paca)
+{
+ local_paca = new_paca;
+ mtspr(SPRN_SPRG_PACA, local_paca);
+#ifdef CONFIG_PPC_BOOK3E
+ mtspr(SPRN_SPRG_TLB_EXFRAME, local_paca->extlb);
+#endif
+}
+
static int __initdata paca_size;
void __init allocate_pacas(void)
diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index 5b38f6ae2b2..9021c4ad4bb 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -21,6 +21,7 @@
#include <linux/string.h>
#include <linux/init.h>
#include <linux/bootmem.h>
+#include <linux/of_address.h>
#include <linux/mm.h>
#include <linux/list.h>
#include <linux/syscalls.h>
diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c
index 5c14ffe5125..d301a30445e 100644
--- a/arch/powerpc/kernel/perf_event.c
+++ b/arch/powerpc/kernel/perf_event.c
@@ -410,15 +410,15 @@ static void power_pmu_read(struct perf_event *event)
* Therefore we treat them like NMIs.
*/
do {
- prev = atomic64_read(&event->hw.prev_count);
+ prev = local64_read(&event->hw.prev_count);
barrier();
val = read_pmc(event->hw.idx);
- } while (atomic64_cmpxchg(&event->hw.prev_count, prev, val) != prev);
+ } while (local64_cmpxchg(&event->hw.prev_count, prev, val) != prev);
/* The counters are only 32 bits wide */
delta = (val - prev) & 0xfffffffful;
- atomic64_add(delta, &event->count);
- atomic64_sub(delta, &event->hw.period_left);
+ local64_add(delta, &event->count);
+ local64_sub(delta, &event->hw.period_left);
}
/*
@@ -444,10 +444,10 @@ static void freeze_limited_counters(struct cpu_hw_events *cpuhw,
if (!event->hw.idx)
continue;
val = (event->hw.idx == 5) ? pmc5 : pmc6;
- prev = atomic64_read(&event->hw.prev_count);
+ prev = local64_read(&event->hw.prev_count);
event->hw.idx = 0;
delta = (val - prev) & 0xfffffffful;
- atomic64_add(delta, &event->count);
+ local64_add(delta, &event->count);
}
}
@@ -462,7 +462,7 @@ static void thaw_limited_counters(struct cpu_hw_events *cpuhw,
event = cpuhw->limited_counter[i];
event->hw.idx = cpuhw->limited_hwidx[i];
val = (event->hw.idx == 5) ? pmc5 : pmc6;
- atomic64_set(&event->hw.prev_count, val);
+ local64_set(&event->hw.prev_count, val);
perf_event_update_userpage(event);
}
}
@@ -666,11 +666,11 @@ void hw_perf_enable(void)
}
val = 0;
if (event->hw.sample_period) {
- left = atomic64_read(&event->hw.period_left);
+ left = local64_read(&event->hw.period_left);
if (left < 0x80000000L)
val = 0x80000000L - left;
}
- atomic64_set(&event->hw.prev_count, val);
+ local64_set(&event->hw.prev_count, val);
event->hw.idx = idx;
write_pmc(idx, val);
perf_event_update_userpage(event);
@@ -754,7 +754,7 @@ static int power_pmu_enable(struct perf_event *event)
* skip the schedulability test here, it will be peformed
* at commit time(->commit_txn) as a whole
*/
- if (cpuhw->group_flag & PERF_EVENT_TXN_STARTED)
+ if (cpuhw->group_flag & PERF_EVENT_TXN)
goto nocheck;
if (check_excludes(cpuhw->event, cpuhw->flags, n0, 1))
@@ -845,8 +845,8 @@ static void power_pmu_unthrottle(struct perf_event *event)
if (left < 0x80000000L)
val = 0x80000000L - left;
write_pmc(event->hw.idx, val);
- atomic64_set(&event->hw.prev_count, val);
- atomic64_set(&event->hw.period_left, left);
+ local64_set(&event->hw.prev_count, val);
+ local64_set(&event->hw.period_left, left);
perf_event_update_userpage(event);
perf_enable();
local_irq_restore(flags);
@@ -861,7 +861,7 @@ void power_pmu_start_txn(const struct pmu *pmu)
{
struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
- cpuhw->group_flag |= PERF_EVENT_TXN_STARTED;
+ cpuhw->group_flag |= PERF_EVENT_TXN;
cpuhw->n_txn_start = cpuhw->n_events;
}
@@ -874,7 +874,7 @@ void power_pmu_cancel_txn(const struct pmu *pmu)
{
struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
- cpuhw->group_flag &= ~PERF_EVENT_TXN_STARTED;
+ cpuhw->group_flag &= ~PERF_EVENT_TXN;
}
/*
@@ -900,6 +900,7 @@ int power_pmu_commit_txn(const struct pmu *pmu)
for (i = cpuhw->n_txn_start; i < n; ++i)
cpuhw->event[i]->hw.config = cpuhw->events[i];
+ cpuhw->group_flag &= ~PERF_EVENT_TXN;
return 0;
}
@@ -1111,7 +1112,7 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
event->hw.config = events[n];
event->hw.event_base = cflags[n];
event->hw.last_period = event->hw.sample_period;
- atomic64_set(&event->hw.period_left, event->hw.last_period);
+ local64_set(&event->hw.period_left, event->hw.last_period);
/*
* See if we need to reserve the PMU.
@@ -1149,16 +1150,16 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
int record = 0;
/* we don't have to worry about interrupts here */
- prev = atomic64_read(&event->hw.prev_count);
+ prev = local64_read(&event->hw.prev_count);
delta = (val - prev) & 0xfffffffful;
- atomic64_add(delta, &event->count);
+ local64_add(delta, &event->count);
/*
* See if the total period for this event has expired,
* and update for the next period.
*/
val = 0;
- left = atomic64_read(&event->hw.period_left) - delta;
+ left = local64_read(&event->hw.period_left) - delta;
if (period) {
if (left <= 0) {
left += period;
@@ -1196,8 +1197,8 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
}
write_pmc(event->hw.idx, val);
- atomic64_set(&event->hw.prev_count, val);
- atomic64_set(&event->hw.period_left, left);
+ local64_set(&event->hw.prev_count, val);
+ local64_set(&event->hw.period_left, left);
perf_event_update_userpage(event);
}
diff --git a/arch/powerpc/kernel/perf_event_fsl_emb.c b/arch/powerpc/kernel/perf_event_fsl_emb.c
index 369872f6cf7..1ba45471ae4 100644
--- a/arch/powerpc/kernel/perf_event_fsl_emb.c
+++ b/arch/powerpc/kernel/perf_event_fsl_emb.c
@@ -162,15 +162,15 @@ static void fsl_emb_pmu_read(struct perf_event *event)
* Therefore we treat them like NMIs.
*/
do {
- prev = atomic64_read(&event->hw.prev_count);
+ prev = local64_read(&event->hw.prev_count);
barrier();
val = read_pmc(event->hw.idx);
- } while (atomic64_cmpxchg(&event->hw.prev_count, prev, val) != prev);
+ } while (local64_cmpxchg(&event->hw.prev_count, prev, val) != prev);
/* The counters are only 32 bits wide */
delta = (val - prev) & 0xfffffffful;
- atomic64_add(delta, &event->count);
- atomic64_sub(delta, &event->hw.period_left);
+ local64_add(delta, &event->count);
+ local64_sub(delta, &event->hw.period_left);
}
/*
@@ -296,11 +296,11 @@ static int fsl_emb_pmu_enable(struct perf_event *event)
val = 0;
if (event->hw.sample_period) {
- s64 left = atomic64_read(&event->hw.period_left);
+ s64 left = local64_read(&event->hw.period_left);
if (left < 0x80000000L)
val = 0x80000000L - left;
}
- atomic64_set(&event->hw.prev_count, val);
+ local64_set(&event->hw.prev_count, val);
write_pmc(i, val);
perf_event_update_userpage(event);
@@ -371,8 +371,8 @@ static void fsl_emb_pmu_unthrottle(struct perf_event *event)
if (left < 0x80000000L)
val = 0x80000000L - left;
write_pmc(event->hw.idx, val);
- atomic64_set(&event->hw.prev_count, val);
- atomic64_set(&event->hw.period_left, left);
+ local64_set(&event->hw.prev_count, val);
+ local64_set(&event->hw.period_left, left);
perf_event_update_userpage(event);
perf_enable();
local_irq_restore(flags);
@@ -500,7 +500,7 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
return ERR_PTR(-ENOTSUPP);
event->hw.last_period = event->hw.sample_period;
- atomic64_set(&event->hw.period_left, event->hw.last_period);
+ local64_set(&event->hw.period_left, event->hw.last_period);
/*
* See if we need to reserve the PMU.
@@ -541,16 +541,16 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
int record = 0;
/* we don't have to worry about interrupts here */
- prev = atomic64_read(&event->hw.prev_count);
+ prev = local64_read(&event->hw.prev_count);
delta = (val - prev) & 0xfffffffful;
- atomic64_add(delta, &event->count);
+ local64_add(delta, &event->count);
/*
* See if the total period for this event has expired,
* and update for the next period.
*/
val = 0;
- left = atomic64_read(&event->hw.period_left) - delta;
+ left = local64_read(&event->hw.period_left) - delta;
if (period) {
if (left <= 0) {
left += period;
@@ -566,9 +566,10 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
* Finally record data if requested.
*/
if (record) {
- struct perf_sample_data data = {
- .period = event->hw.last_period,
- };
+ struct perf_sample_data data;
+
+ perf_sample_data_init(&data, 0);
+ data.period = event->hw.last_period;
if (perf_event_overflow(event, nmi, &data, regs)) {
/*
@@ -584,8 +585,8 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
}
write_pmc(event->hw.idx, val);
- atomic64_set(&event->hw.prev_count, val);
- atomic64_set(&event->hw.period_left, left);
+ local64_set(&event->hw.prev_count, val);
+ local64_set(&event->hw.period_left, left);
perf_event_update_userpage(event);
}
diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c
index 3b4dcc82a4c..ab3e392ac63 100644
--- a/arch/powerpc/kernel/ppc_ksyms.c
+++ b/arch/powerpc/kernel/ppc_ksyms.c
@@ -101,10 +101,6 @@ EXPORT_SYMBOL(pci_dram_offset);
EXPORT_SYMBOL(start_thread);
EXPORT_SYMBOL(kernel_thread);
-#ifdef CONFIG_PPC_FPU
-EXPORT_SYMBOL_GPL(cvt_df);
-EXPORT_SYMBOL_GPL(cvt_fd);
-#endif
EXPORT_SYMBOL(giveup_fpu);
#ifdef CONFIG_ALTIVEC
EXPORT_SYMBOL(giveup_altivec);
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 773424df828..e78a5add7f1 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -37,6 +37,7 @@
#include <linux/kernel_stat.h>
#include <linux/personality.h>
#include <linux/random.h>
+#include <linux/hw_breakpoint.h>
#include <asm/pgtable.h>
#include <asm/uaccess.h>
@@ -462,14 +463,42 @@ struct task_struct *__switch_to(struct task_struct *prev,
#ifdef CONFIG_PPC_ADV_DEBUG_REGS
switch_booke_debug_regs(&new->thread);
#else
+/*
+ * For PPC_BOOK3S_64, we use the hw-breakpoint interfaces that would
+ * schedule DABR
+ */
+#ifndef CONFIG_HAVE_HW_BREAKPOINT
if (unlikely(__get_cpu_var(current_dabr) != new->thread.dabr))
set_dabr(new->thread.dabr);
+#endif /* CONFIG_HAVE_HW_BREAKPOINT */
#endif
new_thread = &new->thread;
old_thread = &current->thread;
+#if defined(CONFIG_PPC_BOOK3E_64)
+ /* XXX Current Book3E code doesn't deal with kernel side DBCR0,
+ * we always hold the user values, so we set it now.
+ *
+ * However, we ensure the kernel MSR:DE is appropriately cleared too
+ * to avoid spurrious single step exceptions in the kernel.
+ *
+ * This will have to change to merge with the ppc32 code at some point,
+ * but I don't like much what ppc32 is doing today so there's some
+ * thinking needed there
+ */
+ if ((new_thread->dbcr0 | old_thread->dbcr0) & DBCR0_IDM) {
+ u32 dbcr0;
+
+ mtmsr(mfmsr() & ~MSR_DE);
+ isync();
+ dbcr0 = mfspr(SPRN_DBCR0);
+ dbcr0 = (dbcr0 & DBCR0_EDM) | new_thread->dbcr0;
+ mtspr(SPRN_DBCR0, dbcr0);
+ }
+#endif /* CONFIG_PPC64_BOOK3E */
+
#ifdef CONFIG_PPC64
/*
* Collect processor utilization data per process
@@ -642,7 +671,11 @@ void flush_thread(void)
{
discard_lazy_cpu_state();
+#ifdef CONFIG_HAVE_HW_BREAKPOINTS
+ flush_ptrace_hw_breakpoint(current);
+#else /* CONFIG_HAVE_HW_BREAKPOINTS */
set_debug_reg_defaults(&current->thread);
+#endif /* CONFIG_HAVE_HW_BREAKPOINTS */
}
void
@@ -660,6 +693,9 @@ void prepare_to_copy(struct task_struct *tsk)
flush_altivec_to_thread(current);
flush_vsx_to_thread(current);
flush_spe_to_thread(current);
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+ flush_ptrace_hw_breakpoint(tsk);
+#endif /* CONFIG_HAVE_HW_BREAKPOINT */
}
/*
@@ -1263,3 +1299,14 @@ unsigned long randomize_et_dyn(unsigned long base)
return ret;
}
+
+#ifdef CONFIG_SMP
+int arch_sd_sibling_asym_packing(void)
+{
+ if (cpu_has_feature(CPU_FTR_ASYM_SMT)) {
+ printk_once(KERN_INFO "Enabling Asymmetric SMT scheduling\n");
+ return SD_ASYM_PACKING;
+ }
+ return 0;
+}
+#endif
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index 9d3953983fb..fed9bf6187d 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -414,7 +414,7 @@ static int __init early_init_dt_scan_drconf_memory(unsigned long node)
u64 base, size, memblock_size;
unsigned int is_kexec_kdump = 0, rngs;
- ls = of_get_flat_dt_prop(node, "ibm,memblock-size", &l);
+ ls = of_get_flat_dt_prop(node, "ibm,lmb-size", &l);
if (ls == NULL || l < dt_root_size_cells * sizeof(__be32))
return 0;
memblock_size = dt_mem_next_cell(dt_root_size_cells, &ls);
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index 3b6f8ae9b8c..941ff4dbc56 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -311,6 +311,24 @@ static void __init prom_print_hex(unsigned long val)
call_prom("write", 3, 1, _prom->stdout, buf, nibbles);
}
+/* max number of decimal digits in an unsigned long */
+#define UL_DIGITS 21
+static void __init prom_print_dec(unsigned long val)
+{
+ int i, size;
+ char buf[UL_DIGITS+1];
+ struct prom_t *_prom = &RELOC(prom);
+
+ for (i = UL_DIGITS-1; i >= 0; i--) {
+ buf[i] = (val % 10) + '0';
+ val = val/10;
+ if (val == 0)
+ break;
+ }
+ /* shift stuff down */
+ size = UL_DIGITS - i;
+ call_prom("write", 3, 1, _prom->stdout, buf+i, size);
+}
static void __init prom_printf(const char *format, ...)
{
@@ -350,6 +368,14 @@ static void __init prom_printf(const char *format, ...)
v = va_arg(args, unsigned long);
prom_print_hex(v);
break;
+ case 'l':
+ ++q;
+ if (*q == 'u') { /* '%lu' */
+ ++q;
+ v = va_arg(args, unsigned long);
+ prom_print_dec(v);
+ }
+ break;
}
}
}
@@ -835,11 +861,11 @@ static int __init prom_count_smt_threads(void)
if (plen == PROM_ERROR)
break;
plen >>= 2;
- prom_debug("Found 0x%x smt threads per core\n", (unsigned long)plen);
+ prom_debug("Found %lu smt threads per core\n", (unsigned long)plen);
/* Sanity check */
if (plen < 1 || plen > 64) {
- prom_printf("Threads per core 0x%x out of bounds, assuming 1\n",
+ prom_printf("Threads per core %lu out of bounds, assuming 1\n",
(unsigned long)plen);
return 1;
}
@@ -869,12 +895,12 @@ static void __init prom_send_capabilities(void)
cores = (u32 *)PTRRELOC(&ibm_architecture_vec[IBM_ARCH_VEC_NRCORES_OFFSET]);
if (*cores != NR_CPUS) {
prom_printf("WARNING ! "
- "ibm_architecture_vec structure inconsistent: 0x%x !\n",
+ "ibm_architecture_vec structure inconsistent: %lu!\n",
*cores);
} else {
*cores = DIV_ROUND_UP(NR_CPUS, prom_count_smt_threads());
- prom_printf("Max number of cores passed to firmware: 0x%x\n",
- (unsigned long)*cores);
+ prom_printf("Max number of cores passed to firmware: %lu (NR_CPUS = %lu)\n",
+ *cores, NR_CPUS);
}
/* try calling the ibm,client-architecture-support method */
@@ -1482,7 +1508,7 @@ static void __init prom_hold_cpus(void)
reg = -1;
prom_getprop(node, "reg", &reg, sizeof(reg));
- prom_debug("cpu hw idx = 0x%x\n", reg);
+ prom_debug("cpu hw idx = %lu\n", reg);
/* Init the acknowledge var which will be reset by
* the secondary cpu when it awakens from its OF
@@ -1492,7 +1518,7 @@ static void __init prom_hold_cpus(void)
if (reg != _prom->cpu) {
/* Primary Thread of non-boot cpu */
- prom_printf("starting cpu hw idx %x... ", reg);
+ prom_printf("starting cpu hw idx %lu... ", reg);
call_prom("start-cpu", 3, 0, node,
secondary_hold, reg);
@@ -1507,7 +1533,7 @@ static void __init prom_hold_cpus(void)
}
#ifdef CONFIG_SMP
else
- prom_printf("boot cpu hw idx %x\n", reg);
+ prom_printf("boot cpu hw idx %lu\n", reg);
#endif /* CONFIG_SMP */
}
@@ -2420,7 +2446,7 @@ static void __init prom_find_boot_cpu(void)
prom_getprop(cpu_pkg, "reg", &getprop_rval, sizeof(getprop_rval));
_prom->cpu = getprop_rval;
- prom_debug("Booting CPU hw index = 0x%x\n", _prom->cpu);
+ prom_debug("Booting CPU hw index = %lu\n", _prom->cpu);
}
static void __init prom_check_initrd(unsigned long r3, unsigned long r4)
diff --git a/arch/powerpc/kernel/prom_parse.c b/arch/powerpc/kernel/prom_parse.c
index 8362620c9e6..88334af038e 100644
--- a/arch/powerpc/kernel/prom_parse.c
+++ b/arch/powerpc/kernel/prom_parse.c
@@ -6,232 +6,11 @@
#include <linux/module.h>
#include <linux/ioport.h>
#include <linux/etherdevice.h>
+#include <linux/of_address.h>
#include <asm/prom.h>
#include <asm/pci-bridge.h>
-#ifdef DEBUG
-#define DBG(fmt...) do { printk(fmt); } while(0)
-#else
-#define DBG(fmt...) do { } while(0)
-#endif
-
-#ifdef CONFIG_PPC64
-#define PRu64 "%lx"
-#else
-#define PRu64 "%llx"
-#endif
-
-/* Max address size we deal with */
-#define OF_MAX_ADDR_CELLS 4
-#define OF_CHECK_COUNTS(na, ns) ((na) > 0 && (na) <= OF_MAX_ADDR_CELLS && \
- (ns) > 0)
-
-static struct of_bus *of_match_bus(struct device_node *np);
-static int __of_address_to_resource(struct device_node *dev,
- const u32 *addrp, u64 size, unsigned int flags,
- struct resource *r);
-
-
-/* Debug utility */
-#ifdef DEBUG
-static void of_dump_addr(const char *s, const u32 *addr, int na)
-{
- printk("%s", s);
- while(na--)
- printk(" %08x", *(addr++));
- printk("\n");
-}
-#else
-static void of_dump_addr(const char *s, const u32 *addr, int na) { }
-#endif
-
-
-/* Callbacks for bus specific translators */
-struct of_bus {
- const char *name;
- const char *addresses;
- int (*match)(struct device_node *parent);
- void (*count_cells)(struct device_node *child,
- int *addrc, int *sizec);
- u64 (*map)(u32 *addr, const u32 *range,
- int na, int ns, int pna);
- int (*translate)(u32 *addr, u64 offset, int na);
- unsigned int (*get_flags)(const u32 *addr);
-};
-
-
-/*
- * Default translator (generic bus)
- */
-
-static void of_bus_default_count_cells(struct device_node *dev,
- int *addrc, int *sizec)
-{
- if (addrc)
- *addrc = of_n_addr_cells(dev);
- if (sizec)
- *sizec = of_n_size_cells(dev);
-}
-
-static u64 of_bus_default_map(u32 *addr, const u32 *range,
- int na, int ns, int pna)
-{
- u64 cp, s, da;
-
- cp = of_read_number(range, na);
- s = of_read_number(range + na + pna, ns);
- da = of_read_number(addr, na);
-
- DBG("OF: default map, cp="PRu64", s="PRu64", da="PRu64"\n",
- cp, s, da);
-
- if (da < cp || da >= (cp + s))
- return OF_BAD_ADDR;
- return da - cp;
-}
-
-static int of_bus_default_translate(u32 *addr, u64 offset, int na)
-{
- u64 a = of_read_number(addr, na);
- memset(addr, 0, na * 4);
- a += offset;
- if (na > 1)
- addr[na - 2] = a >> 32;
- addr[na - 1] = a & 0xffffffffu;
-
- return 0;
-}
-
-static unsigned int of_bus_default_get_flags(const u32 *addr)
-{
- return IORESOURCE_MEM;
-}
-
-
#ifdef CONFIG_PCI
-/*
- * PCI bus specific translator
- */
-
-static int of_bus_pci_match(struct device_node *np)
-{
- /* "vci" is for the /chaos bridge on 1st-gen PCI powermacs */
- return !strcmp(np->type, "pci") || !strcmp(np->type, "vci");
-}
-
-static void of_bus_pci_count_cells(struct device_node *np,
- int *addrc, int *sizec)
-{
- if (addrc)
- *addrc = 3;
- if (sizec)
- *sizec = 2;
-}
-
-static unsigned int of_bus_pci_get_flags(const u32 *addr)
-{
- unsigned int flags = 0;
- u32 w = addr[0];
-
- switch((w >> 24) & 0x03) {
- case 0x01:
- flags |= IORESOURCE_IO;
- break;
- case 0x02: /* 32 bits */
- case 0x03: /* 64 bits */
- flags |= IORESOURCE_MEM;
- break;
- }
- if (w & 0x40000000)
- flags |= IORESOURCE_PREFETCH;
- return flags;
-}
-
-static u64 of_bus_pci_map(u32 *addr, const u32 *range, int na, int ns, int pna)
-{
- u64 cp, s, da;
- unsigned int af, rf;
-
- af = of_bus_pci_get_flags(addr);
- rf = of_bus_pci_get_flags(range);
-
- /* Check address type match */
- if ((af ^ rf) & (IORESOURCE_MEM | IORESOURCE_IO))
- return OF_BAD_ADDR;
-
- /* Read address values, skipping high cell */
- cp = of_read_number(range + 1, na - 1);
- s = of_read_number(range + na + pna, ns);
- da = of_read_number(addr + 1, na - 1);
-
- DBG("OF: PCI map, cp="PRu64", s="PRu64", da="PRu64"\n", cp, s, da);
-
- if (da < cp || da >= (cp + s))
- return OF_BAD_ADDR;
- return da - cp;
-}
-
-static int of_bus_pci_translate(u32 *addr, u64 offset, int na)
-{
- return of_bus_default_translate(addr + 1, offset, na - 1);
-}
-
-const u32 *of_get_pci_address(struct device_node *dev, int bar_no, u64 *size,
- unsigned int *flags)
-{
- const u32 *prop;
- unsigned int psize;
- struct device_node *parent;
- struct of_bus *bus;
- int onesize, i, na, ns;
-
- /* Get parent & match bus type */
- parent = of_get_parent(dev);
- if (parent == NULL)
- return NULL;
- bus = of_match_bus(parent);
- if (strcmp(bus->name, "pci")) {
- of_node_put(parent);
- return NULL;
- }
- bus->count_cells(dev, &na, &ns);
- of_node_put(parent);
- if (!OF_CHECK_COUNTS(na, ns))
- return NULL;
-
- /* Get "reg" or "assigned-addresses" property */
- prop = of_get_property(dev, bus->addresses, &psize);
- if (prop == NULL)
- return NULL;
- psize /= 4;
-
- onesize = na + ns;
- for (i = 0; psize >= onesize; psize -= onesize, prop += onesize, i++)
- if ((prop[0] & 0xff) == ((bar_no * 4) + PCI_BASE_ADDRESS_0)) {
- if (size)
- *size = of_read_number(prop + na, ns);
- if (flags)
- *flags = bus->get_flags(prop);
- return prop;
- }
- return NULL;
-}
-EXPORT_SYMBOL(of_get_pci_address);
-
-int of_pci_address_to_resource(struct device_node *dev, int bar,
- struct resource *r)
-{
- const u32 *addrp;
- u64 size;
- unsigned int flags;
-
- addrp = of_get_pci_address(dev, bar, &size, &flags);
- if (addrp == NULL)
- return -EINVAL;
- return __of_address_to_resource(dev, addrp, size, flags, r);
-}
-EXPORT_SYMBOL_GPL(of_pci_address_to_resource);
-
int of_irq_map_pci(struct pci_dev *pdev, struct of_irq *out_irq)
{
struct device_node *dn, *ppnode;
@@ -313,345 +92,6 @@ int of_irq_map_pci(struct pci_dev *pdev, struct of_irq *out_irq)
EXPORT_SYMBOL_GPL(of_irq_map_pci);
#endif /* CONFIG_PCI */
-/*
- * ISA bus specific translator
- */
-
-static int of_bus_isa_match(struct device_node *np)
-{
- return !strcmp(np->name, "isa");
-}
-
-static void of_bus_isa_count_cells(struct device_node *child,
- int *addrc, int *sizec)
-{
- if (addrc)
- *addrc = 2;
- if (sizec)
- *sizec = 1;
-}
-
-static u64 of_bus_isa_map(u32 *addr, const u32 *range, int na, int ns, int pna)
-{
- u64 cp, s, da;
-
- /* Check address type match */
- if ((addr[0] ^ range[0]) & 0x00000001)
- return OF_BAD_ADDR;
-
- /* Read address values, skipping high cell */
- cp = of_read_number(range + 1, na - 1);
- s = of_read_number(range + na + pna, ns);
- da = of_read_number(addr + 1, na - 1);
-
- DBG("OF: ISA map, cp="PRu64", s="PRu64", da="PRu64"\n", cp, s, da);
-
- if (da < cp || da >= (cp + s))
- return OF_BAD_ADDR;
- return da - cp;
-}
-
-static int of_bus_isa_translate(u32 *addr, u64 offset, int na)
-{
- return of_bus_default_translate(addr + 1, offset, na - 1);
-}
-
-static unsigned int of_bus_isa_get_flags(const u32 *addr)
-{
- unsigned int flags = 0;
- u32 w = addr[0];
-
- if (w & 1)
- flags |= IORESOURCE_IO;
- else
- flags |= IORESOURCE_MEM;
- return flags;
-}
-
-
-/*
- * Array of bus specific translators
- */
-
-static struct of_bus of_busses[] = {
-#ifdef CONFIG_PCI
- /* PCI */
- {
- .name = "pci",
- .addresses = "assigned-addresses",
- .match = of_bus_pci_match,
- .count_cells = of_bus_pci_count_cells,
- .map = of_bus_pci_map,
- .translate = of_bus_pci_translate,
- .get_flags = of_bus_pci_get_flags,
- },
-#endif /* CONFIG_PCI */
- /* ISA */
- {
- .name = "isa",
- .addresses = "reg",
- .match = of_bus_isa_match,
- .count_cells = of_bus_isa_count_cells,
- .map = of_bus_isa_map,
- .translate = of_bus_isa_translate,
- .get_flags = of_bus_isa_get_flags,
- },
- /* Default */
- {
- .name = "default",
- .addresses = "reg",
- .match = NULL,
- .count_cells = of_bus_default_count_cells,
- .map = of_bus_default_map,
- .translate = of_bus_default_translate,
- .get_flags = of_bus_default_get_flags,
- },
-};
-
-static struct of_bus *of_match_bus(struct device_node *np)
-{
- int i;
-
- for (i = 0; i < ARRAY_SIZE(of_busses); i ++)
- if (!of_busses[i].match || of_busses[i].match(np))
- return &of_busses[i];
- BUG();
- return NULL;
-}
-
-static int of_translate_one(struct device_node *parent, struct of_bus *bus,
- struct of_bus *pbus, u32 *addr,
- int na, int ns, int pna, const char *rprop)
-{
- const u32 *ranges;
- unsigned int rlen;
- int rone;
- u64 offset = OF_BAD_ADDR;
-
- /* Normally, an absence of a "ranges" property means we are
- * crossing a non-translatable boundary, and thus the addresses
- * below the current not cannot be converted to CPU physical ones.
- * Unfortunately, while this is very clear in the spec, it's not
- * what Apple understood, and they do have things like /uni-n or
- * /ht nodes with no "ranges" property and a lot of perfectly
- * useable mapped devices below them. Thus we treat the absence of
- * "ranges" as equivalent to an empty "ranges" property which means
- * a 1:1 translation at that level. It's up to the caller not to try
- * to translate addresses that aren't supposed to be translated in
- * the first place. --BenH.
- */
- ranges = of_get_property(parent, rprop, &rlen);
- if (ranges == NULL || rlen == 0) {
- offset = of_read_number(addr, na);
- memset(addr, 0, pna * 4);
- DBG("OF: no ranges, 1:1 translation\n");
- goto finish;
- }
-
- DBG("OF: walking ranges...\n");
-
- /* Now walk through the ranges */
- rlen /= 4;
- rone = na + pna + ns;
- for (; rlen >= rone; rlen -= rone, ranges += rone) {
- offset = bus->map(addr, ranges, na, ns, pna);
- if (offset != OF_BAD_ADDR)
- break;
- }
- if (offset == OF_BAD_ADDR) {
- DBG("OF: not found !\n");
- return 1;
- }
- memcpy(addr, ranges + na, 4 * pna);
-
- finish:
- of_dump_addr("OF: parent translation for:", addr, pna);
- DBG("OF: with offset: "PRu64"\n", offset);
-
- /* Translate it into parent bus space */
- return pbus->translate(addr, offset, pna);
-}
-
-
-/*
- * Translate an address from the device-tree into a CPU physical address,
- * this walks up the tree and applies the various bus mappings on the
- * way.
- *
- * Note: We consider that crossing any level with #size-cells == 0 to mean
- * that translation is impossible (that is we are not dealing with a value
- * that can be mapped to a cpu physical address). This is not really specified
- * that way, but this is traditionally the way IBM at least do things
- */
-u64 __of_translate_address(struct device_node *dev, const u32 *in_addr,
- const char *rprop)
-{
- struct device_node *parent = NULL;
- struct of_bus *bus, *pbus;
- u32 addr[OF_MAX_ADDR_CELLS];
- int na, ns, pna, pns;
- u64 result = OF_BAD_ADDR;
-
- DBG("OF: ** translation for device %s **\n", dev->full_name);
-
- /* Increase refcount at current level */
- of_node_get(dev);
-
- /* Get parent & match bus type */
- parent = of_get_parent(dev);
- if (parent == NULL)
- goto bail;
- bus = of_match_bus(parent);
-
- /* Cound address cells & copy address locally */
- bus->count_cells(dev, &na, &ns);
- if (!OF_CHECK_COUNTS(na, ns)) {
- printk(KERN_ERR "prom_parse: Bad cell count for %s\n",
- dev->full_name);
- goto bail;
- }
- memcpy(addr, in_addr, na * 4);
-
- DBG("OF: bus is %s (na=%d, ns=%d) on %s\n",
- bus->name, na, ns, parent->full_name);
- of_dump_addr("OF: translating address:", addr, na);
-
- /* Translate */
- for (;;) {
- /* Switch to parent bus */
- of_node_put(dev);
- dev = parent;
- parent = of_get_parent(dev);
-
- /* If root, we have finished */
- if (parent == NULL) {
- DBG("OF: reached root node\n");
- result = of_read_number(addr, na);
- break;
- }
-
- /* Get new parent bus and counts */
- pbus = of_match_bus(parent);
- pbus->count_cells(dev, &pna, &pns);
- if (!OF_CHECK_COUNTS(pna, pns)) {
- printk(KERN_ERR "prom_parse: Bad cell count for %s\n",
- dev->full_name);
- break;
- }
-
- DBG("OF: parent bus is %s (na=%d, ns=%d) on %s\n",
- pbus->name, pna, pns, parent->full_name);
-
- /* Apply bus translation */
- if (of_translate_one(dev, bus, pbus, addr, na, ns, pna, rprop))
- break;
-
- /* Complete the move up one level */
- na = pna;
- ns = pns;
- bus = pbus;
-
- of_dump_addr("OF: one level translation:", addr, na);
- }
- bail:
- of_node_put(parent);
- of_node_put(dev);
-
- return result;
-}
-
-u64 of_translate_address(struct device_node *dev, const u32 *in_addr)
-{
- return __of_translate_address(dev, in_addr, "ranges");
-}
-EXPORT_SYMBOL(of_translate_address);
-
-u64 of_translate_dma_address(struct device_node *dev, const u32 *in_addr)
-{
- return __of_translate_address(dev, in_addr, "dma-ranges");
-}
-EXPORT_SYMBOL(of_translate_dma_address);
-
-const u32 *of_get_address(struct device_node *dev, int index, u64 *size,
- unsigned int *flags)
-{
- const u32 *prop;
- unsigned int psize;
- struct device_node *parent;
- struct of_bus *bus;
- int onesize, i, na, ns;
-
- /* Get parent & match bus type */
- parent = of_get_parent(dev);
- if (parent == NULL)
- return NULL;
- bus = of_match_bus(parent);
- bus->count_cells(dev, &na, &ns);
- of_node_put(parent);
- if (!OF_CHECK_COUNTS(na, ns))
- return NULL;
-
- /* Get "reg" or "assigned-addresses" property */
- prop = of_get_property(dev, bus->addresses, &psize);
- if (prop == NULL)
- return NULL;
- psize /= 4;
-
- onesize = na + ns;
- for (i = 0; psize >= onesize; psize -= onesize, prop += onesize, i++)
- if (i == index) {
- if (size)
- *size = of_read_number(prop + na, ns);
- if (flags)
- *flags = bus->get_flags(prop);
- return prop;
- }
- return NULL;
-}
-EXPORT_SYMBOL(of_get_address);
-
-static int __of_address_to_resource(struct device_node *dev, const u32 *addrp,
- u64 size, unsigned int flags,
- struct resource *r)
-{
- u64 taddr;
-
- if ((flags & (IORESOURCE_IO | IORESOURCE_MEM)) == 0)
- return -EINVAL;
- taddr = of_translate_address(dev, addrp);
- if (taddr == OF_BAD_ADDR)
- return -EINVAL;
- memset(r, 0, sizeof(struct resource));
- if (flags & IORESOURCE_IO) {
- unsigned long port;
- port = pci_address_to_pio(taddr);
- if (port == (unsigned long)-1)
- return -EINVAL;
- r->start = port;
- r->end = port + size - 1;
- } else {
- r->start = taddr;
- r->end = taddr + size - 1;
- }
- r->flags = flags;
- r->name = dev->name;
- return 0;
-}
-
-int of_address_to_resource(struct device_node *dev, int index,
- struct resource *r)
-{
- const u32 *addrp;
- u64 size;
- unsigned int flags;
-
- addrp = of_get_address(dev, index, &size, &flags);
- if (addrp == NULL)
- return -EINVAL;
- return __of_address_to_resource(dev, addrp, size, flags, r);
-}
-EXPORT_SYMBOL_GPL(of_address_to_resource);
-
void of_parse_dma_window(struct device_node *dn, const void *dma_window_prop,
unsigned long *busno, unsigned long *phys, unsigned long *size)
{
@@ -678,342 +118,6 @@ void of_parse_dma_window(struct device_node *dn, const void *dma_window_prop,
*size = of_read_number(dma_window, cells);
}
-/*
- * Interrupt remapper
- */
-
-static unsigned int of_irq_workarounds;
-static struct device_node *of_irq_dflt_pic;
-
-static struct device_node *of_irq_find_parent(struct device_node *child)
-{
- struct device_node *p;
- const phandle *parp;
-
- if (!of_node_get(child))
- return NULL;
-
- do {
- parp = of_get_property(child, "interrupt-parent", NULL);
- if (parp == NULL)
- p = of_get_parent(child);
- else {
- if (of_irq_workarounds & OF_IMAP_NO_PHANDLE)
- p = of_node_get(of_irq_dflt_pic);
- else
- p = of_find_node_by_phandle(*parp);
- }
- of_node_put(child);
- child = p;
- } while (p && of_get_property(p, "#interrupt-cells", NULL) == NULL);
-
- return p;
-}
-
-/* This doesn't need to be called if you don't have any special workaround
- * flags to pass
- */
-void of_irq_map_init(unsigned int flags)
-{
- of_irq_workarounds = flags;
-
- /* OldWorld, don't bother looking at other things */
- if (flags & OF_IMAP_OLDWORLD_MAC)
- return;
-
- /* If we don't have phandles, let's try to locate a default interrupt
- * controller (happens when booting with BootX). We do a first match
- * here, hopefully, that only ever happens on machines with one
- * controller.
- */
- if (flags & OF_IMAP_NO_PHANDLE) {
- struct device_node *np;
-
- for_each_node_with_property(np, "interrupt-controller") {
- /* Skip /chosen/interrupt-controller */
- if (strcmp(np->name, "chosen") == 0)
- continue;
- /* It seems like at least one person on this planet wants
- * to use BootX on a machine with an AppleKiwi controller
- * which happens to pretend to be an interrupt
- * controller too.
- */
- if (strcmp(np->name, "AppleKiwi") == 0)
- continue;
- /* I think we found one ! */
- of_irq_dflt_pic = np;
- break;
- }
- }
-
-}
-
-int of_irq_map_raw(struct device_node *parent, const u32 *intspec, u32 ointsize,
- const u32 *addr, struct of_irq *out_irq)
-{
- struct device_node *ipar, *tnode, *old = NULL, *newpar = NULL;
- const u32 *tmp, *imap, *imask;
- u32 intsize = 1, addrsize, newintsize = 0, newaddrsize = 0;
- int imaplen, match, i;
-
- DBG("of_irq_map_raw: par=%s,intspec=[0x%08x 0x%08x...],ointsize=%d\n",
- parent->full_name, intspec[0], intspec[1], ointsize);
-
- ipar = of_node_get(parent);
-
- /* First get the #interrupt-cells property of the current cursor
- * that tells us how to interpret the passed-in intspec. If there
- * is none, we are nice and just walk up the tree
- */
- do {
- tmp = of_get_property(ipar, "#interrupt-cells", NULL);
- if (tmp != NULL) {
- intsize = *tmp;
- break;
- }
- tnode = ipar;
- ipar = of_irq_find_parent(ipar);
- of_node_put(tnode);
- } while (ipar);
- if (ipar == NULL) {
- DBG(" -> no parent found !\n");
- goto fail;
- }
-
- DBG("of_irq_map_raw: ipar=%s, size=%d\n", ipar->full_name, intsize);
-
- if (ointsize != intsize)
- return -EINVAL;
-
- /* Look for this #address-cells. We have to implement the old linux
- * trick of looking for the parent here as some device-trees rely on it
- */
- old = of_node_get(ipar);
- do {
- tmp = of_get_property(old, "#address-cells", NULL);
- tnode = of_get_parent(old);
- of_node_put(old);
- old = tnode;
- } while(old && tmp == NULL);
- of_node_put(old);
- old = NULL;
- addrsize = (tmp == NULL) ? 2 : *tmp;
-
- DBG(" -> addrsize=%d\n", addrsize);
-
- /* Now start the actual "proper" walk of the interrupt tree */
- while (ipar != NULL) {
- /* Now check if cursor is an interrupt-controller and if it is
- * then we are done
- */
- if (of_get_property(ipar, "interrupt-controller", NULL) !=
- NULL) {
- DBG(" -> got it !\n");
- memcpy(out_irq->specifier, intspec,
- intsize * sizeof(u32));
- out_irq->size = intsize;
- out_irq->controller = ipar;
- of_node_put(old);
- return 0;
- }
-
- /* Now look for an interrupt-map */
- imap = of_get_property(ipar, "interrupt-map", &imaplen);
- /* No interrupt map, check for an interrupt parent */
- if (imap == NULL) {
- DBG(" -> no map, getting parent\n");
- newpar = of_irq_find_parent(ipar);
- goto skiplevel;
- }
- imaplen /= sizeof(u32);
-
- /* Look for a mask */
- imask = of_get_property(ipar, "interrupt-map-mask", NULL);
-
- /* If we were passed no "reg" property and we attempt to parse
- * an interrupt-map, then #address-cells must be 0.
- * Fail if it's not.
- */
- if (addr == NULL && addrsize != 0) {
- DBG(" -> no reg passed in when needed !\n");
- goto fail;
- }
-
- /* Parse interrupt-map */
- match = 0;
- while (imaplen > (addrsize + intsize + 1) && !match) {
- /* Compare specifiers */
- match = 1;
- for (i = 0; i < addrsize && match; ++i) {
- u32 mask = imask ? imask[i] : 0xffffffffu;
- match = ((addr[i] ^ imap[i]) & mask) == 0;
- }
- for (; i < (addrsize + intsize) && match; ++i) {
- u32 mask = imask ? imask[i] : 0xffffffffu;
- match =
- ((intspec[i-addrsize] ^ imap[i]) & mask) == 0;
- }
- imap += addrsize + intsize;
- imaplen -= addrsize + intsize;
-
- DBG(" -> match=%d (imaplen=%d)\n", match, imaplen);
-
- /* Get the interrupt parent */
- if (of_irq_workarounds & OF_IMAP_NO_PHANDLE)
- newpar = of_node_get(of_irq_dflt_pic);
- else
- newpar = of_find_node_by_phandle((phandle)*imap);
- imap++;
- --imaplen;
-
- /* Check if not found */
- if (newpar == NULL) {
- DBG(" -> imap parent not found !\n");
- goto fail;
- }
-
- /* Get #interrupt-cells and #address-cells of new
- * parent
- */
- tmp = of_get_property(newpar, "#interrupt-cells", NULL);
- if (tmp == NULL) {
- DBG(" -> parent lacks #interrupt-cells !\n");
- goto fail;
- }
- newintsize = *tmp;
- tmp = of_get_property(newpar, "#address-cells", NULL);
- newaddrsize = (tmp == NULL) ? 0 : *tmp;
-
- DBG(" -> newintsize=%d, newaddrsize=%d\n",
- newintsize, newaddrsize);
-
- /* Check for malformed properties */
- if (imaplen < (newaddrsize + newintsize))
- goto fail;
-
- imap += newaddrsize + newintsize;
- imaplen -= newaddrsize + newintsize;
-
- DBG(" -> imaplen=%d\n", imaplen);
- }
- if (!match)
- goto fail;
-
- of_node_put(old);
- old = of_node_get(newpar);
- addrsize = newaddrsize;
- intsize = newintsize;
- intspec = imap - intsize;
- addr = intspec - addrsize;
-
- skiplevel:
- /* Iterate again with new parent */
- DBG(" -> new parent: %s\n", newpar ? newpar->full_name : "<>");
- of_node_put(ipar);
- ipar = newpar;
- newpar = NULL;
- }
- fail:
- of_node_put(ipar);
- of_node_put(old);
- of_node_put(newpar);
-
- return -EINVAL;
-}
-EXPORT_SYMBOL_GPL(of_irq_map_raw);
-
-#if defined(CONFIG_PPC_PMAC) && defined(CONFIG_PPC32)
-static int of_irq_map_oldworld(struct device_node *device, int index,
- struct of_irq *out_irq)
-{
- const u32 *ints = NULL;
- int intlen;
-
- /*
- * Old machines just have a list of interrupt numbers
- * and no interrupt-controller nodes. We also have dodgy
- * cases where the APPL,interrupts property is completely
- * missing behind pci-pci bridges and we have to get it
- * from the parent (the bridge itself, as apple just wired
- * everything together on these)
- */
- while (device) {
- ints = of_get_property(device, "AAPL,interrupts", &intlen);
- if (ints != NULL)
- break;
- device = device->parent;
- if (device && strcmp(device->type, "pci") != 0)
- break;
- }
- if (ints == NULL)
- return -EINVAL;
- intlen /= sizeof(u32);
-
- if (index >= intlen)
- return -EINVAL;
-
- out_irq->controller = NULL;
- out_irq->specifier[0] = ints[index];
- out_irq->size = 1;
-
- return 0;
-}
-#else /* defined(CONFIG_PPC_PMAC) && defined(CONFIG_PPC32) */
-static int of_irq_map_oldworld(struct device_node *device, int index,
- struct of_irq *out_irq)
-{
- return -EINVAL;
-}
-#endif /* !(defined(CONFIG_PPC_PMAC) && defined(CONFIG_PPC32)) */
-
-int of_irq_map_one(struct device_node *device, int index, struct of_irq *out_irq)
-{
- struct device_node *p;
- const u32 *intspec, *tmp, *addr;
- u32 intsize, intlen;
- int res = -EINVAL;
-
- DBG("of_irq_map_one: dev=%s, index=%d\n", device->full_name, index);
-
- /* OldWorld mac stuff is "special", handle out of line */
- if (of_irq_workarounds & OF_IMAP_OLDWORLD_MAC)
- return of_irq_map_oldworld(device, index, out_irq);
-
- /* Get the interrupts property */
- intspec = of_get_property(device, "interrupts", &intlen);
- if (intspec == NULL)
- return -EINVAL;
- intlen /= sizeof(u32);
-
- /* Get the reg property (if any) */
- addr = of_get_property(device, "reg", NULL);
-
- /* Look for the interrupt parent. */
- p = of_irq_find_parent(device);
- if (p == NULL)
- return -EINVAL;
-
- /* Get size of interrupt specifier */
- tmp = of_get_property(p, "#interrupt-cells", NULL);
- if (tmp == NULL)
- goto out;
- intsize = *tmp;
-
- DBG(" intsize=%d intlen=%d\n", intsize, intlen);
-
- /* Check index */
- if ((index + 1) * intsize > intlen)
- goto out;
-
- /* Get new specifier and map it */
- res = of_irq_map_raw(p, intspec + index * intsize, intsize,
- addr, out_irq);
-out:
- of_node_put(p);
- return res;
-}
-EXPORT_SYMBOL_GPL(of_irq_map_one);
-
/**
* Search the device tree for the best MAC address to use. 'mac-address' is
* checked first, because that is supposed to contain to "most recent" MAC
@@ -1051,29 +155,3 @@ const void *of_get_mac_address(struct device_node *np)
return NULL;
}
EXPORT_SYMBOL(of_get_mac_address);
-
-int of_irq_to_resource(struct device_node *dev, int index, struct resource *r)
-{
- int irq = irq_of_parse_and_map(dev, index);
-
- /* Only dereference the resource if both the
- * resource and the irq are valid. */
- if (r && irq != NO_IRQ) {
- r->start = r->end = irq;
- r->flags = IORESOURCE_IRQ;
- }
-
- return irq;
-}
-EXPORT_SYMBOL_GPL(of_irq_to_resource);
-
-void __iomem *of_iomap(struct device_node *np, int index)
-{
- struct resource res;
-
- if (of_address_to_resource(np, index, &res))
- return NULL;
-
- return ioremap(res.start, 1 + res.end - res.start);
-}
-EXPORT_SYMBOL(of_iomap);
diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
index 7a0c0199ea2..11f3cd9c832 100644
--- a/arch/powerpc/kernel/ptrace.c
+++ b/arch/powerpc/kernel/ptrace.c
@@ -32,6 +32,8 @@
#ifdef CONFIG_PPC32
#include <linux/module.h>
#endif
+#include <linux/hw_breakpoint.h>
+#include <linux/perf_event.h>
#include <asm/uaccess.h>
#include <asm/page.h>
@@ -866,9 +868,34 @@ void user_disable_single_step(struct task_struct *task)
clear_tsk_thread_flag(task, TIF_SINGLESTEP);
}
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+void ptrace_triggered(struct perf_event *bp, int nmi,
+ struct perf_sample_data *data, struct pt_regs *regs)
+{
+ struct perf_event_attr attr;
+
+ /*
+ * Disable the breakpoint request here since ptrace has defined a
+ * one-shot behaviour for breakpoint exceptions in PPC64.
+ * The SIGTRAP signal is generated automatically for us in do_dabr().
+ * We don't have to do anything about that here
+ */
+ attr = bp->attr;
+ attr.disabled = true;
+ modify_user_hw_breakpoint(bp, &attr);
+}
+#endif /* CONFIG_HAVE_HW_BREAKPOINT */
+
int ptrace_set_debugreg(struct task_struct *task, unsigned long addr,
unsigned long data)
{
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+ int ret;
+ struct thread_struct *thread = &(task->thread);
+ struct perf_event *bp;
+ struct perf_event_attr attr;
+#endif /* CONFIG_HAVE_HW_BREAKPOINT */
+
/* For ppc64 we support one DABR and no IABR's at the moment (ppc64).
* For embedded processors we support one DAC and no IAC's at the
* moment.
@@ -896,6 +923,43 @@ int ptrace_set_debugreg(struct task_struct *task, unsigned long addr,
/* Ensure breakpoint translation bit is set */
if (data && !(data & DABR_TRANSLATION))
return -EIO;
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+ bp = thread->ptrace_bps[0];
+ if ((!data) || !(data & (DABR_DATA_WRITE | DABR_DATA_READ))) {
+ if (bp) {
+ unregister_hw_breakpoint(bp);
+ thread->ptrace_bps[0] = NULL;
+ }
+ return 0;
+ }
+ if (bp) {
+ attr = bp->attr;
+ attr.bp_addr = data & ~HW_BREAKPOINT_ALIGN;
+ arch_bp_generic_fields(data &
+ (DABR_DATA_WRITE | DABR_DATA_READ),
+ &attr.bp_type);
+ ret = modify_user_hw_breakpoint(bp, &attr);
+ if (ret)
+ return ret;
+ thread->ptrace_bps[0] = bp;
+ thread->dabr = data;
+ return 0;
+ }
+
+ /* Create a new breakpoint request if one doesn't exist already */
+ hw_breakpoint_init(&attr);
+ attr.bp_addr = data & ~HW_BREAKPOINT_ALIGN;
+ arch_bp_generic_fields(data & (DABR_DATA_WRITE | DABR_DATA_READ),
+ &attr.bp_type);
+
+ thread->ptrace_bps[0] = bp = register_user_hw_breakpoint(&attr,
+ ptrace_triggered, task);
+ if (IS_ERR(bp)) {
+ thread->ptrace_bps[0] = NULL;
+ return PTR_ERR(bp);
+ }
+
+#endif /* CONFIG_HAVE_HW_BREAKPOINT */
/* Move contents to the DABR register */
task->thread.dabr = data;
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index d0516dbee76..41048de3c6c 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -47,14 +47,6 @@ struct rtas_t rtas = {
};
EXPORT_SYMBOL(rtas);
-struct rtas_suspend_me_data {
- atomic_t working; /* number of cpus accessing this struct */
- atomic_t done;
- int token; /* ibm,suspend-me */
- int error;
- struct completion *complete; /* wait on this until working == 0 */
-};
-
DEFINE_SPINLOCK(rtas_data_buf_lock);
EXPORT_SYMBOL(rtas_data_buf_lock);
@@ -714,14 +706,53 @@ void rtas_os_term(char *str)
static int ibm_suspend_me_token = RTAS_UNKNOWN_SERVICE;
#ifdef CONFIG_PPC_PSERIES
-static void rtas_percpu_suspend_me(void *info)
+static int __rtas_suspend_last_cpu(struct rtas_suspend_me_data *data, int wake_when_done)
+{
+ u16 slb_size = mmu_slb_size;
+ int rc = H_MULTI_THREADS_ACTIVE;
+ int cpu;
+
+ slb_set_size(SLB_MIN_SIZE);
+ printk(KERN_DEBUG "calling ibm,suspend-me on cpu %i\n", smp_processor_id());
+
+ while (rc == H_MULTI_THREADS_ACTIVE && !atomic_read(&data->done) &&
+ !atomic_read(&data->error))
+ rc = rtas_call(data->token, 0, 1, NULL);
+
+ if (rc || atomic_read(&data->error)) {
+ printk(KERN_DEBUG "ibm,suspend-me returned %d\n", rc);
+ slb_set_size(slb_size);
+ }
+
+ if (atomic_read(&data->error))
+ rc = atomic_read(&data->error);
+
+ atomic_set(&data->error, rc);
+
+ if (wake_when_done) {
+ atomic_set(&data->done, 1);
+
+ for_each_online_cpu(cpu)
+ plpar_hcall_norets(H_PROD, get_hard_smp_processor_id(cpu));
+ }
+
+ if (atomic_dec_return(&data->working) == 0)
+ complete(data->complete);
+
+ return rc;
+}
+
+int rtas_suspend_last_cpu(struct rtas_suspend_me_data *data)
+{
+ atomic_inc(&data->working);
+ return __rtas_suspend_last_cpu(data, 0);
+}
+
+static int __rtas_suspend_cpu(struct rtas_suspend_me_data *data, int wake_when_done)
{
long rc = H_SUCCESS;
unsigned long msr_save;
- u16 slb_size = mmu_slb_size;
int cpu;
- struct rtas_suspend_me_data *data =
- (struct rtas_suspend_me_data *)info;
atomic_inc(&data->working);
@@ -729,7 +760,7 @@ static void rtas_percpu_suspend_me(void *info)
msr_save = mfmsr();
mtmsr(msr_save & ~(MSR_EE));
- while (rc == H_SUCCESS && !atomic_read(&data->done))
+ while (rc == H_SUCCESS && !atomic_read(&data->done) && !atomic_read(&data->error))
rc = plpar_hcall_norets(H_JOIN);
mtmsr(msr_save);
@@ -741,33 +772,37 @@ static void rtas_percpu_suspend_me(void *info)
/* All other cpus are in H_JOIN, this cpu does
* the suspend.
*/
- slb_set_size(SLB_MIN_SIZE);
- printk(KERN_DEBUG "calling ibm,suspend-me on cpu %i\n",
- smp_processor_id());
- data->error = rtas_call(data->token, 0, 1, NULL);
-
- if (data->error) {
- printk(KERN_DEBUG "ibm,suspend-me returned %d\n",
- data->error);
- slb_set_size(slb_size);
- }
+ return __rtas_suspend_last_cpu(data, wake_when_done);
} else {
printk(KERN_ERR "H_JOIN on cpu %i failed with rc = %ld\n",
smp_processor_id(), rc);
- data->error = rc;
+ atomic_set(&data->error, rc);
}
- atomic_set(&data->done, 1);
+ if (wake_when_done) {
+ atomic_set(&data->done, 1);
- /* This cpu did the suspend or got an error; in either case,
- * we need to prod all other other cpus out of join state.
- * Extra prods are harmless.
- */
- for_each_online_cpu(cpu)
- plpar_hcall_norets(H_PROD, get_hard_smp_processor_id(cpu));
+ /* This cpu did the suspend or got an error; in either case,
+ * we need to prod all other other cpus out of join state.
+ * Extra prods are harmless.
+ */
+ for_each_online_cpu(cpu)
+ plpar_hcall_norets(H_PROD, get_hard_smp_processor_id(cpu));
+ }
out:
if (atomic_dec_return(&data->working) == 0)
complete(data->complete);
+ return rc;
+}
+
+int rtas_suspend_cpu(struct rtas_suspend_me_data *data)
+{
+ return __rtas_suspend_cpu(data, 0);
+}
+
+static void rtas_percpu_suspend_me(void *info)
+{
+ __rtas_suspend_cpu((struct rtas_suspend_me_data *)info, 1);
}
static int rtas_ibm_suspend_me(struct rtas_args *args)
@@ -802,22 +837,22 @@ static int rtas_ibm_suspend_me(struct rtas_args *args)
atomic_set(&data.working, 0);
atomic_set(&data.done, 0);
+ atomic_set(&data.error, 0);
data.token = rtas_token("ibm,suspend-me");
- data.error = 0;
data.complete = &done;
/* Call function on all CPUs. One of us will make the
* rtas call
*/
if (on_each_cpu(rtas_percpu_suspend_me, &data, 0))
- data.error = -EINVAL;
+ atomic_set(&data.error, -EINVAL);
wait_for_completion(&done);
- if (data.error != 0)
+ if (atomic_read(&data.error) != 0)
printk(KERN_ERR "Error doing global join\n");
- return data.error;
+ return atomic_read(&data.error);
}
#else /* CONFIG_PPC_PSERIES */
static int rtas_ibm_suspend_me(struct rtas_args *args)
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index b7e6c7e193a..15ade0d7bbb 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -94,6 +94,10 @@ struct screen_info screen_info = {
.orig_video_points = 16
};
+/* Variables required to store legacy IO irq routing */
+int of_i8042_kbd_irq;
+int of_i8042_aux_irq;
+
#ifdef __DO_IRQ_CANON
/* XXX should go elsewhere eventually */
int ppc_do_canonicalize_irqs;
@@ -575,6 +579,15 @@ int check_legacy_ioport(unsigned long base_port)
np = of_find_compatible_node(NULL, NULL, "pnpPNP,f03");
if (np) {
parent = of_get_parent(np);
+
+ of_i8042_kbd_irq = irq_of_parse_and_map(parent, 0);
+ if (!of_i8042_kbd_irq)
+ of_i8042_kbd_irq = 1;
+
+ of_i8042_aux_irq = irq_of_parse_and_map(parent, 1);
+ if (!of_i8042_aux_irq)
+ of_i8042_aux_irq = 12;
+
of_node_put(np);
np = parent;
break;
@@ -701,16 +714,9 @@ static struct notifier_block ppc_dflt_plat_bus_notifier = {
.priority = INT_MAX,
};
-static struct notifier_block ppc_dflt_of_bus_notifier = {
- .notifier_call = ppc_dflt_bus_notify,
- .priority = INT_MAX,
-};
-
static int __init setup_bus_notifier(void)
{
bus_register_notifier(&platform_bus_type, &ppc_dflt_plat_bus_notifier);
- bus_register_notifier(&of_platform_bus_type, &ppc_dflt_of_bus_notifier);
-
return 0;
}
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index d135f93cb0f..1bee4b68fa4 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -142,16 +142,6 @@ early_param("smt-enabled", early_smt_enabled);
#define check_smt_enabled()
#endif /* CONFIG_SMP */
-/* Put the paca pointer into r13 and SPRG_PACA */
-static void __init setup_paca(struct paca_struct *new_paca)
-{
- local_paca = new_paca;
- mtspr(SPRN_SPRG_PACA, local_paca);
-#ifdef CONFIG_PPC_BOOK3E
- mtspr(SPRN_SPRG_TLB_EXFRAME, local_paca->extlb);
-#endif
-}
-
/*
* Early initialization entry point. This is called by head.S
* with MMU translation disabled. We rely on the "feature" of
@@ -600,6 +590,9 @@ static int pcpu_cpu_distance(unsigned int from, unsigned int to)
return REMOTE_DISTANCE;
}
+unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
+EXPORT_SYMBOL(__per_cpu_offset);
+
void __init setup_per_cpu_areas(void)
{
const size_t dyn_size = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE;
@@ -624,8 +617,10 @@ void __init setup_per_cpu_areas(void)
panic("cannot initialize percpu area (err=%d)", rc);
delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start;
- for_each_possible_cpu(cpu)
- paca[cpu].data_offset = delta + pcpu_unit_offsets[cpu];
+ for_each_possible_cpu(cpu) {
+ __per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu];
+ paca[cpu].data_offset = __per_cpu_offset[cpu];
+ }
}
#endif
diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c
index a0afb555a7c..7109f5b1baa 100644
--- a/arch/powerpc/kernel/signal.c
+++ b/arch/powerpc/kernel/signal.c
@@ -11,6 +11,7 @@
#include <linux/tracehook.h>
#include <linux/signal.h>
+#include <asm/hw_breakpoint.h>
#include <asm/uaccess.h>
#include <asm/unistd.h>
@@ -149,6 +150,8 @@ static int do_signal_pending(sigset_t *oldset, struct pt_regs *regs)
if (current->thread.dabr)
set_dabr(current->thread.dabr);
#endif
+ /* Re-enable the breakpoints for the signal stack */
+ thread_change_pc(current, regs);
if (is32) {
if (ka.sa.sa_flags & SA_SIGINFO)
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 5c196d1086d..a61b3ddd7bb 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -288,8 +288,6 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
max_cpus = NR_CPUS;
else
max_cpus = 1;
-
- smp_space_timers(max_cpus);
for_each_possible_cpu(cpu)
if (cpu != boot_cpuid)
@@ -501,14 +499,6 @@ int __devinit start_secondary(void *unused)
current->active_mm = &init_mm;
smp_store_cpu_info(cpu);
-
-#if defined(CONFIG_BOOKE) || defined(CONFIG_40x)
- /* Clear any pending timer interrupts */
- mtspr(SPRN_TSR, TSR_ENW | TSR_WIS | TSR_DIS | TSR_FIS);
-
- /* Enable decrementer interrupt */
- mtspr(SPRN_TCR, TCR_DIE);
-#endif
set_dec(tb_ticks_per_jiffy);
preempt_disable();
cpu_callin_map[cpu] = 1;
diff --git a/arch/powerpc/kernel/suspend.c b/arch/powerpc/kernel/suspend.c
index 6fc6328dc62..0167d53da30 100644
--- a/arch/powerpc/kernel/suspend.c
+++ b/arch/powerpc/kernel/suspend.c
@@ -3,7 +3,7 @@
*
* Distribute under GPLv2
*
- * Copyright (c) 2002 Pavel Machek <pavel@suse.cz>
+ * Copyright (c) 2002 Pavel Machek <pavel@ucw.cz>
* Copyright (c) 2001 Patrick Mochel <mochel@osdl.org>
*/
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 0441bbdadbd..ce53dfa7130 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -149,16 +149,6 @@ unsigned long tb_ticks_per_usec = 100; /* sane default */
EXPORT_SYMBOL(tb_ticks_per_usec);
unsigned long tb_ticks_per_sec;
EXPORT_SYMBOL(tb_ticks_per_sec); /* for cputime_t conversions */
-u64 tb_to_xs;
-unsigned tb_to_us;
-
-#define TICKLEN_SCALE NTP_SCALE_SHIFT
-static u64 last_tick_len; /* units are ns / 2^TICKLEN_SCALE */
-static u64 ticklen_to_xs; /* 0.64 fraction */
-
-/* If last_tick_len corresponds to about 1/HZ seconds, then
- last_tick_len << TICKLEN_SHIFT will be about 2^63. */
-#define TICKLEN_SHIFT (63 - 30 - TICKLEN_SCALE + SHIFT_HZ)
DEFINE_SPINLOCK(rtc_lock);
EXPORT_SYMBOL_GPL(rtc_lock);
@@ -174,7 +164,6 @@ unsigned long ppc_proc_freq;
EXPORT_SYMBOL(ppc_proc_freq);
unsigned long ppc_tb_freq;
-static u64 tb_last_jiffy __cacheline_aligned_in_smp;
static DEFINE_PER_CPU(u64, last_jiffy);
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
@@ -423,30 +412,6 @@ void udelay(unsigned long usecs)
}
EXPORT_SYMBOL(udelay);
-static inline void update_gtod(u64 new_tb_stamp, u64 new_stamp_xsec,
- u64 new_tb_to_xs)
-{
- /*
- * tb_update_count is used to allow the userspace gettimeofday code
- * to assure itself that it sees a consistent view of the tb_to_xs and
- * stamp_xsec variables. It reads the tb_update_count, then reads
- * tb_to_xs and stamp_xsec and then reads tb_update_count again. If
- * the two values of tb_update_count match and are even then the
- * tb_to_xs and stamp_xsec values are consistent. If not, then it
- * loops back and reads them again until this criteria is met.
- * We expect the caller to have done the first increment of
- * vdso_data->tb_update_count already.
- */
- vdso_data->tb_orig_stamp = new_tb_stamp;
- vdso_data->stamp_xsec = new_stamp_xsec;
- vdso_data->tb_to_xs = new_tb_to_xs;
- vdso_data->wtom_clock_sec = wall_to_monotonic.tv_sec;
- vdso_data->wtom_clock_nsec = wall_to_monotonic.tv_nsec;
- vdso_data->stamp_xtime = xtime;
- smp_wmb();
- ++(vdso_data->tb_update_count);
-}
-
#ifdef CONFIG_SMP
unsigned long profile_pc(struct pt_regs *regs)
{
@@ -470,7 +435,6 @@ EXPORT_SYMBOL(profile_pc);
static int __init iSeries_tb_recal(void)
{
- struct div_result divres;
unsigned long titan, tb;
/* Make sure we only run on iSeries */
@@ -501,10 +465,7 @@ static int __init iSeries_tb_recal(void)
tb_ticks_per_jiffy = new_tb_ticks_per_jiffy;
tb_ticks_per_sec = new_tb_ticks_per_sec;
calc_cputime_factors();
- div128_by_32( XSEC_PER_SEC, 0, tb_ticks_per_sec, &divres );
- tb_to_xs = divres.result_low;
vdso_data->tb_ticks_per_sec = tb_ticks_per_sec;
- vdso_data->tb_to_xs = tb_to_xs;
setup_cputime_one_jiffy();
}
else {
@@ -667,27 +628,9 @@ void timer_interrupt(struct pt_regs * regs)
trace_timer_interrupt_exit(regs);
}
-void wakeup_decrementer(void)
-{
- unsigned long ticks;
-
- /*
- * The timebase gets saved on sleep and restored on wakeup,
- * so all we need to do is to reset the decrementer.
- */
- ticks = tb_ticks_since(__get_cpu_var(last_jiffy));
- if (ticks < tb_ticks_per_jiffy)
- ticks = tb_ticks_per_jiffy - ticks;
- else
- ticks = 1;
- set_dec(ticks);
-}
-
#ifdef CONFIG_SUSPEND
-void generic_suspend_disable_irqs(void)
+static void generic_suspend_disable_irqs(void)
{
- preempt_disable();
-
/* Disable the decrementer, so that it doesn't interfere
* with suspending.
*/
@@ -697,12 +640,9 @@ void generic_suspend_disable_irqs(void)
set_dec(0x7fffffff);
}
-void generic_suspend_enable_irqs(void)
+static void generic_suspend_enable_irqs(void)
{
- wakeup_decrementer();
-
local_irq_enable();
- preempt_enable();
}
/* Overrides the weak version in kernel/power/main.c */
@@ -722,23 +662,6 @@ void arch_suspend_enable_irqs(void)
}
#endif
-#ifdef CONFIG_SMP
-void __init smp_space_timers(unsigned int max_cpus)
-{
- int i;
- u64 previous_tb = per_cpu(last_jiffy, boot_cpuid);
-
- /* make sure tb > per_cpu(last_jiffy, cpu) for all cpus always */
- previous_tb -= tb_ticks_per_jiffy;
-
- for_each_possible_cpu(i) {
- if (i == boot_cpuid)
- continue;
- per_cpu(last_jiffy, i) = previous_tb;
- }
-}
-#endif
-
/*
* Scheduler clock - returns current time in nanosec units.
*
@@ -873,10 +796,11 @@ static cycle_t timebase_read(struct clocksource *cs)
return (cycle_t)get_tb();
}
-void update_vsyscall(struct timespec *wall_time, struct clocksource *clock,
- u32 mult)
+void update_vsyscall(struct timespec *wall_time, struct timespec *wtm,
+ struct clocksource *clock, u32 mult)
{
- u64 t2x, stamp_xsec;
+ u64 new_tb_to_xs, new_stamp_xsec;
+ u32 frac_sec;
if (clock != &clocksource_timebase)
return;
@@ -887,11 +811,35 @@ void update_vsyscall(struct timespec *wall_time, struct clocksource *clock,
/* XXX this assumes clock->shift == 22 */
/* 4611686018 ~= 2^(20+64-22) / 1e9 */
- t2x = (u64) mult * 4611686018ULL;
- stamp_xsec = (u64) xtime.tv_nsec * XSEC_PER_SEC;
- do_div(stamp_xsec, 1000000000);
- stamp_xsec += (u64) xtime.tv_sec * XSEC_PER_SEC;
- update_gtod(clock->cycle_last, stamp_xsec, t2x);
+ new_tb_to_xs = (u64) mult * 4611686018ULL;
+ new_stamp_xsec = (u64) wall_time->tv_nsec * XSEC_PER_SEC;
+ do_div(new_stamp_xsec, 1000000000);
+ new_stamp_xsec += (u64) wall_time->tv_sec * XSEC_PER_SEC;
+
+ BUG_ON(wall_time->tv_nsec >= NSEC_PER_SEC);
+ /* this is tv_nsec / 1e9 as a 0.32 fraction */
+ frac_sec = ((u64) wall_time->tv_nsec * 18446744073ULL) >> 32;
+
+ /*
+ * tb_update_count is used to allow the userspace gettimeofday code
+ * to assure itself that it sees a consistent view of the tb_to_xs and
+ * stamp_xsec variables. It reads the tb_update_count, then reads
+ * tb_to_xs and stamp_xsec and then reads tb_update_count again. If
+ * the two values of tb_update_count match and are even then the
+ * tb_to_xs and stamp_xsec values are consistent. If not, then it
+ * loops back and reads them again until this criteria is met.
+ * We expect the caller to have done the first increment of
+ * vdso_data->tb_update_count already.
+ */
+ vdso_data->tb_orig_stamp = clock->cycle_last;
+ vdso_data->stamp_xsec = new_stamp_xsec;
+ vdso_data->tb_to_xs = new_tb_to_xs;
+ vdso_data->wtom_clock_sec = wtm->tv_sec;
+ vdso_data->wtom_clock_nsec = wtm->tv_nsec;
+ vdso_data->stamp_xtime = *wall_time;
+ vdso_data->stamp_sec_fraction = frac_sec;
+ smp_wmb();
+ ++(vdso_data->tb_update_count);
}
void update_vsyscall_tz(void)
@@ -1007,15 +955,13 @@ void secondary_cpu_time_init(void)
/* This function is only called on the boot processor */
void __init time_init(void)
{
- unsigned long flags;
struct div_result res;
- u64 scale, x;
+ u64 scale;
unsigned shift;
if (__USE_RTC()) {
/* 601 processor: dec counts down by 128 every 128ns */
ppc_tb_freq = 1000000000;
- tb_last_jiffy = get_rtcl();
} else {
/* Normal PowerPC with timebase register */
ppc_md.calibrate_decr();
@@ -1023,50 +969,15 @@ void __init time_init(void)
ppc_tb_freq / 1000000, ppc_tb_freq % 1000000);
printk(KERN_DEBUG "time_init: processor frequency = %lu.%.6lu MHz\n",
ppc_proc_freq / 1000000, ppc_proc_freq % 1000000);
- tb_last_jiffy = get_tb();
}
tb_ticks_per_jiffy = ppc_tb_freq / HZ;
tb_ticks_per_sec = ppc_tb_freq;
tb_ticks_per_usec = ppc_tb_freq / 1000000;
- tb_to_us = mulhwu_scale_factor(ppc_tb_freq, 1000000);
calc_cputime_factors();
setup_cputime_one_jiffy();
/*
- * Calculate the length of each tick in ns. It will not be
- * exactly 1e9/HZ unless ppc_tb_freq is divisible by HZ.
- * We compute 1e9 * tb_ticks_per_jiffy / ppc_tb_freq,
- * rounded up.
- */
- x = (u64) NSEC_PER_SEC * tb_ticks_per_jiffy + ppc_tb_freq - 1;
- do_div(x, ppc_tb_freq);
- tick_nsec = x;
- last_tick_len = x << TICKLEN_SCALE;
-
- /*
- * Compute ticklen_to_xs, which is a factor which gets multiplied
- * by (last_tick_len << TICKLEN_SHIFT) to get a tb_to_xs value.
- * It is computed as:
- * ticklen_to_xs = 2^N / (tb_ticks_per_jiffy * 1e9)
- * where N = 64 + 20 - TICKLEN_SCALE - TICKLEN_SHIFT
- * which turns out to be N = 51 - SHIFT_HZ.
- * This gives the result as a 0.64 fixed-point fraction.
- * That value is reduced by an offset amounting to 1 xsec per
- * 2^31 timebase ticks to avoid problems with time going backwards
- * by 1 xsec when we do timer_recalc_offset due to losing the
- * fractional xsec. That offset is equal to ppc_tb_freq/2^51
- * since there are 2^20 xsec in a second.
- */
- div128_by_32((1ULL << 51) - ppc_tb_freq, 0,
- tb_ticks_per_jiffy << SHIFT_HZ, &res);
- div128_by_32(res.result_high, res.result_low, NSEC_PER_SEC, &res);
- ticklen_to_xs = res.result_low;
-
- /* Compute tb_to_xs from tick_nsec */
- tb_to_xs = mulhdu(last_tick_len << TICKLEN_SHIFT, ticklen_to_xs);
-
- /*
* Compute scale factor for sched_clock.
* The calibrate_decr() function has set tb_ticks_per_sec,
* which is the timebase frequency.
@@ -1087,21 +998,14 @@ void __init time_init(void)
/* Save the current timebase to pretty up CONFIG_PRINTK_TIME */
boot_tb = get_tb_or_rtc();
- write_seqlock_irqsave(&xtime_lock, flags);
-
/* If platform provided a timezone (pmac), we correct the time */
if (timezone_offset) {
sys_tz.tz_minuteswest = -timezone_offset / 60;
sys_tz.tz_dsttime = 0;
}
- vdso_data->tb_orig_stamp = tb_last_jiffy;
vdso_data->tb_update_count = 0;
vdso_data->tb_ticks_per_sec = tb_ticks_per_sec;
- vdso_data->stamp_xsec = (u64) xtime.tv_sec * XSEC_PER_SEC;
- vdso_data->tb_to_xs = tb_to_xs;
-
- write_sequnlock_irqrestore(&xtime_lock, flags);
/* Start the decrementer on CPUs that have manual control
* such as BookE
@@ -1195,39 +1099,6 @@ void to_tm(int tim, struct rtc_time * tm)
GregorianDay(tm);
}
-/* Auxiliary function to compute scaling factors */
-/* Actually the choice of a timebase running at 1/4 the of the bus
- * frequency giving resolution of a few tens of nanoseconds is quite nice.
- * It makes this computation very precise (27-28 bits typically) which
- * is optimistic considering the stability of most processor clock
- * oscillators and the precision with which the timebase frequency
- * is measured but does not harm.
- */
-unsigned mulhwu_scale_factor(unsigned inscale, unsigned outscale)
-{
- unsigned mlt=0, tmp, err;
- /* No concern for performance, it's done once: use a stupid
- * but safe and compact method to find the multiplier.
- */
-
- for (tmp = 1U<<31; tmp != 0; tmp >>= 1) {
- if (mulhwu(inscale, mlt|tmp) < outscale)
- mlt |= tmp;
- }
-
- /* We might still be off by 1 for the best approximation.
- * A side effect of this is that if outscale is too large
- * the returned value will be zero.
- * Many corner cases have been checked and seem to work,
- * some might have been forgotten in the test however.
- */
-
- err = inscale * (mlt+1);
- if (err <= inscale/2)
- mlt++;
- return mlt;
-}
-
/*
* Divide a 128-bit dividend by a 32-bit divisor, leaving a 128 bit
* result.
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 25fc33984c2..a45a63c3a0c 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -55,9 +55,6 @@
#endif
#include <asm/kexec.h>
#include <asm/ppc-opcode.h>
-#ifdef CONFIG_FSL_BOOKE
-#include <asm/dbell.h>
-#endif
#if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC)
int (*__debugger)(struct pt_regs *regs) __read_mostly;
@@ -688,7 +685,7 @@ void RunModeException(struct pt_regs *regs)
void __kprobes single_step_exception(struct pt_regs *regs)
{
- regs->msr &= ~(MSR_SE | MSR_BE); /* Turn off 'trace' bits */
+ clear_single_step(regs);
if (notify_die(DIE_SSTEP, "single_step", regs, 5,
5, SIGTRAP) == NOTIFY_STOP)
@@ -707,10 +704,8 @@ void __kprobes single_step_exception(struct pt_regs *regs)
*/
static void emulate_single_step(struct pt_regs *regs)
{
- if (single_stepping(regs)) {
- clear_single_step(regs);
- _exception(SIGTRAP, regs, TRAP_TRACE, 0);
- }
+ if (single_stepping(regs))
+ single_step_exception(regs);
}
static inline int __parse_fpscr(unsigned long fpscr)
@@ -1344,24 +1339,6 @@ void vsx_assist_exception(struct pt_regs *regs)
#endif /* CONFIG_VSX */
#ifdef CONFIG_FSL_BOOKE
-
-void doorbell_exception(struct pt_regs *regs)
-{
-#ifdef CONFIG_SMP
- int cpu = smp_processor_id();
- int msg;
-
- if (num_online_cpus() < 2)
- return;
-
- for (msg = 0; msg < 4; msg++)
- if (test_and_clear_bit(msg, &dbell_smp_message[cpu]))
- smp_message_recv(msg);
-#else
- printk(KERN_WARNING "Received doorbell on non-smp system\n");
-#endif
-}
-
void CacheLockingException(struct pt_regs *regs, unsigned long address,
unsigned long error_code)
{
diff --git a/arch/powerpc/kernel/vdso32/gettimeofday.S b/arch/powerpc/kernel/vdso32/gettimeofday.S
index ee038d4bf25..4ee09ee2e83 100644
--- a/arch/powerpc/kernel/vdso32/gettimeofday.S
+++ b/arch/powerpc/kernel/vdso32/gettimeofday.S
@@ -19,8 +19,10 @@
/* Offset for the low 32-bit part of a field of long type */
#ifdef CONFIG_PPC64
#define LOPART 4
+#define TSPEC_TV_SEC TSPC64_TV_SEC+LOPART
#else
#define LOPART 0
+#define TSPEC_TV_SEC TSPC32_TV_SEC
#endif
.text
@@ -41,23 +43,11 @@ V_FUNCTION_BEGIN(__kernel_gettimeofday)
mr r9, r3 /* datapage ptr in r9 */
cmplwi r10,0 /* check if tv is NULL */
beq 3f
- bl __do_get_xsec@local /* get xsec from tb & kernel */
- bne- 2f /* out of line -> do syscall */
-
- /* seconds are xsec >> 20 */
- rlwinm r5,r4,12,20,31
- rlwimi r5,r3,12,0,19
- stw r5,TVAL32_TV_SEC(r10)
-
- /* get remaining xsec and convert to usec. we scale
- * up remaining xsec by 12 bits and get the top 32 bits
- * of the multiplication
- */
- rlwinm r5,r4,12,0,19
- lis r6,1000000@h
- ori r6,r6,1000000@l
- mulhwu r5,r5,r6
- stw r5,TVAL32_TV_USEC(r10)
+ lis r7,1000000@ha /* load up USEC_PER_SEC */
+ addi r7,r7,1000000@l /* so we get microseconds in r4 */
+ bl __do_get_tspec@local /* get sec/usec from tb & kernel */
+ stw r3,TVAL32_TV_SEC(r10)
+ stw r4,TVAL32_TV_USEC(r10)
3: cmplwi r11,0 /* check if tz is NULL */
beq 1f
@@ -70,14 +60,6 @@ V_FUNCTION_BEGIN(__kernel_gettimeofday)
crclr cr0*4+so
li r3,0
blr
-
-2:
- mtlr r12
- mr r3,r10
- mr r4,r11
- li r0,__NR_gettimeofday
- sc
- blr
.cfi_endproc
V_FUNCTION_END(__kernel_gettimeofday)
@@ -100,7 +82,8 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
mr r11,r4 /* r11 saves tp */
bl __get_datapage@local /* get data page */
mr r9,r3 /* datapage ptr in r9 */
-
+ lis r7,NSEC_PER_SEC@h /* want nanoseconds */
+ ori r7,r7,NSEC_PER_SEC@l
50: bl __do_get_tspec@local /* get sec/nsec from tb & kernel */
bne cr1,80f /* not monotonic -> all done */
@@ -198,83 +181,12 @@ V_FUNCTION_END(__kernel_clock_getres)
/*
- * This is the core of gettimeofday() & friends, it returns the xsec
- * value in r3 & r4 and expects the datapage ptr (non clobbered)
- * in r9. clobbers r0,r4,r5,r6,r7,r8.
- * When returning, r8 contains the counter value that can be reused
- * by the monotonic clock implementation
- */
-__do_get_xsec:
- .cfi_startproc
- /* Check for update count & load values. We use the low
- * order 32 bits of the update count
- */
-1: lwz r8,(CFG_TB_UPDATE_COUNT+LOPART)(r9)
- andi. r0,r8,1 /* pending update ? loop */
- bne- 1b
- xor r0,r8,r8 /* create dependency */
- add r9,r9,r0
-
- /* Load orig stamp (offset to TB) */
- lwz r5,CFG_TB_ORIG_STAMP(r9)
- lwz r6,(CFG_TB_ORIG_STAMP+4)(r9)
-
- /* Get a stable TB value */
-2: mftbu r3
- mftbl r4
- mftbu r0
- cmpl cr0,r3,r0
- bne- 2b
-
- /* Substract tb orig stamp. If the high part is non-zero, we jump to
- * the slow path which call the syscall.
- * If it's ok, then we have our 32 bits tb_ticks value in r7
- */
- subfc r7,r6,r4
- subfe. r0,r5,r3
- bne- 3f
-
- /* Load scale factor & do multiplication */
- lwz r5,CFG_TB_TO_XS(r9) /* load values */
- lwz r6,(CFG_TB_TO_XS+4)(r9)
- mulhwu r4,r7,r5
- mulhwu r6,r7,r6
- mullw r0,r7,r5
- addc r6,r6,r0
-
- /* At this point, we have the scaled xsec value in r4 + XER:CA
- * we load & add the stamp since epoch
- */
- lwz r5,CFG_STAMP_XSEC(r9)
- lwz r6,(CFG_STAMP_XSEC+4)(r9)
- adde r4,r4,r6
- addze r3,r5
-
- /* We now have our result in r3,r4. We create a fake dependency
- * on that result and re-check the counter
- */
- or r6,r4,r3
- xor r0,r6,r6
- add r9,r9,r0
- lwz r0,(CFG_TB_UPDATE_COUNT+LOPART)(r9)
- cmpl cr0,r8,r0 /* check if updated */
- bne- 1b
-
- /* Warning ! The caller expects CR:EQ to be set to indicate a
- * successful calculation (so it won't fallback to the syscall
- * method). We have overriden that CR bit in the counter check,
- * but fortunately, the loop exit condition _is_ CR:EQ set, so
- * we can exit safely here. If you change this code, be careful
- * of that side effect.
- */
-3: blr
- .cfi_endproc
-
-/*
- * This is the core of clock_gettime(), it returns the current
- * time in seconds and nanoseconds in r3 and r4.
+ * This is the core of clock_gettime() and gettimeofday(),
+ * it returns the current time in r3 (seconds) and r4.
+ * On entry, r7 gives the resolution of r4, either USEC_PER_SEC
+ * or NSEC_PER_SEC, giving r4 in microseconds or nanoseconds.
* It expects the datapage ptr in r9 and doesn't clobber it.
- * It clobbers r0, r5, r6, r10 and returns NSEC_PER_SEC in r7.
+ * It clobbers r0, r5 and r6.
* On return, r8 contains the counter value that can be reused.
* This clobbers cr0 but not any other cr field.
*/
@@ -297,70 +209,58 @@ __do_get_tspec:
2: mftbu r3
mftbl r4
mftbu r0
- cmpl cr0,r3,r0
+ cmplw cr0,r3,r0
bne- 2b
/* Subtract tb orig stamp and shift left 12 bits.
*/
- subfc r7,r6,r4
+ subfc r4,r6,r4
subfe r0,r5,r3
slwi r0,r0,12
- rlwimi. r0,r7,12,20,31
- slwi r7,r7,12
+ rlwimi. r0,r4,12,20,31
+ slwi r4,r4,12
- /* Load scale factor & do multiplication */
+ /*
+ * Load scale factor & do multiplication.
+ * We only use the high 32 bits of the tb_to_xs value.
+ * Even with a 1GHz timebase clock, the high 32 bits of
+ * tb_to_xs will be at least 4 million, so the error from
+ * ignoring the low 32 bits will be no more than 0.25ppm.
+ * The error will just make the clock run very very slightly
+ * slow until the next time the kernel updates the VDSO data,
+ * at which point the clock will catch up to the kernel's value,
+ * so there is no long-term error accumulation.
+ */
lwz r5,CFG_TB_TO_XS(r9) /* load values */
- lwz r6,(CFG_TB_TO_XS+4)(r9)
- mulhwu r3,r7,r6
- mullw r10,r7,r5
- mulhwu r4,r7,r5
- addc r10,r3,r10
+ mulhwu r4,r4,r5
li r3,0
beq+ 4f /* skip high part computation if 0 */
mulhwu r3,r0,r5
- mullw r7,r0,r5
- mulhwu r5,r0,r6
- mullw r6,r0,r6
- adde r4,r4,r7
- addze r3,r3
+ mullw r5,r0,r5
addc r4,r4,r5
addze r3,r3
- addc r10,r10,r6
-
-4: addze r4,r4 /* add in carry */
- lis r7,NSEC_PER_SEC@h
- ori r7,r7,NSEC_PER_SEC@l
- mulhwu r4,r4,r7 /* convert to nanoseconds */
-
- /* At this point, we have seconds & nanoseconds since the xtime
- * stamp in r3+CA and r4. Load & add the xtime stamp.
+4:
+ /* At this point, we have seconds since the xtime stamp
+ * as a 32.32 fixed-point number in r3 and r4.
+ * Load & add the xtime stamp.
*/
-#ifdef CONFIG_PPC64
- lwz r5,STAMP_XTIME+TSPC64_TV_SEC+LOPART(r9)
- lwz r6,STAMP_XTIME+TSPC64_TV_NSEC+LOPART(r9)
-#else
- lwz r5,STAMP_XTIME+TSPC32_TV_SEC(r9)
- lwz r6,STAMP_XTIME+TSPC32_TV_NSEC(r9)
-#endif
- add r4,r4,r6
+ lwz r5,STAMP_XTIME+TSPEC_TV_SEC(r9)
+ lwz r6,STAMP_SEC_FRAC(r9)
+ addc r4,r4,r6
adde r3,r3,r5
- /* We now have our result in r3,r4. We create a fake dependency
- * on that result and re-check the counter
+ /* We create a fake dependency on the result in r3/r4
+ * and re-check the counter
*/
or r6,r4,r3
xor r0,r6,r6
add r9,r9,r0
lwz r0,(CFG_TB_UPDATE_COUNT+LOPART)(r9)
- cmpl cr0,r8,r0 /* check if updated */
+ cmplw cr0,r8,r0 /* check if updated */
bne- 1b
- /* check for nanosecond overflow and adjust if necessary */
- cmpw r4,r7
- bltlr /* all done if no overflow */
- subf r4,r7,r4 /* adjust if overflow */
- addi r3,r3,1
+ mulhwu r4,r4,r7 /* convert to micro or nanoseconds */
blr
.cfi_endproc
diff --git a/arch/powerpc/kernel/vdso64/gettimeofday.S b/arch/powerpc/kernel/vdso64/gettimeofday.S
index 262cd5857a5..e97a9a0dc4a 100644
--- a/arch/powerpc/kernel/vdso64/gettimeofday.S
+++ b/arch/powerpc/kernel/vdso64/gettimeofday.S
@@ -33,18 +33,11 @@ V_FUNCTION_BEGIN(__kernel_gettimeofday)
bl V_LOCAL_FUNC(__get_datapage) /* get data page */
cmpldi r11,0 /* check if tv is NULL */
beq 2f
- bl V_LOCAL_FUNC(__do_get_xsec) /* get xsec from tb & kernel */
- lis r7,15 /* r7 = 1000000 = USEC_PER_SEC */
- ori r7,r7,16960
- rldicl r5,r4,44,20 /* r5 = sec = xsec / XSEC_PER_SEC */
- rldicr r6,r5,20,43 /* r6 = sec * XSEC_PER_SEC */
- std r5,TVAL64_TV_SEC(r11) /* store sec in tv */
- subf r0,r6,r4 /* r0 = xsec = (xsec - r6) */
- mulld r0,r0,r7 /* usec = (xsec * USEC_PER_SEC) /
- * XSEC_PER_SEC
- */
- rldicl r0,r0,44,20
- std r0,TVAL64_TV_USEC(r11) /* store usec in tv */
+ lis r7,1000000@ha /* load up USEC_PER_SEC */
+ addi r7,r7,1000000@l
+ bl V_LOCAL_FUNC(__do_get_tspec) /* get sec/us from tb & kernel */
+ std r4,TVAL64_TV_SEC(r11) /* store sec in tv */
+ std r5,TVAL64_TV_USEC(r11) /* store usec in tv */
2: cmpldi r10,0 /* check if tz is NULL */
beq 1f
lwz r4,CFG_TZ_MINUTEWEST(r3)/* fill tz */
@@ -77,6 +70,8 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
.cfi_register lr,r12
mr r11,r4 /* r11 saves tp */
bl V_LOCAL_FUNC(__get_datapage) /* get data page */
+ lis r7,NSEC_PER_SEC@h /* want nanoseconds */
+ ori r7,r7,NSEC_PER_SEC@l
50: bl V_LOCAL_FUNC(__do_get_tspec) /* get time from tb & kernel */
bne cr1,80f /* if not monotonic, all done */
@@ -171,49 +166,12 @@ V_FUNCTION_END(__kernel_clock_getres)
/*
- * This is the core of gettimeofday(), it returns the xsec
- * value in r4 and expects the datapage ptr (non clobbered)
- * in r3. clobbers r0,r4,r5,r6,r7,r8
- * When returning, r8 contains the counter value that can be reused
- */
-V_FUNCTION_BEGIN(__do_get_xsec)
- .cfi_startproc
- /* check for update count & load values */
-1: ld r8,CFG_TB_UPDATE_COUNT(r3)
- andi. r0,r8,1 /* pending update ? loop */
- bne- 1b
- xor r0,r8,r8 /* create dependency */
- add r3,r3,r0
-
- /* Get TB & offset it. We use the MFTB macro which will generate
- * workaround code for Cell.
- */
- MFTB(r7)
- ld r9,CFG_TB_ORIG_STAMP(r3)
- subf r7,r9,r7
-
- /* Scale result */
- ld r5,CFG_TB_TO_XS(r3)
- mulhdu r7,r7,r5
-
- /* Add stamp since epoch */
- ld r6,CFG_STAMP_XSEC(r3)
- add r4,r6,r7
-
- xor r0,r4,r4
- add r3,r3,r0
- ld r0,CFG_TB_UPDATE_COUNT(r3)
- cmpld cr0,r0,r8 /* check if updated */
- bne- 1b
- blr
- .cfi_endproc
-V_FUNCTION_END(__do_get_xsec)
-
-/*
- * This is the core of clock_gettime(), it returns the current
- * time in seconds and nanoseconds in r4 and r5.
+ * This is the core of clock_gettime() and gettimeofday(),
+ * it returns the current time in r4 (seconds) and r5.
+ * On entry, r7 gives the resolution of r5, either USEC_PER_SEC
+ * or NSEC_PER_SEC, giving r5 in microseconds or nanoseconds.
* It expects the datapage ptr in r3 and doesn't clobber it.
- * It clobbers r0 and r6 and returns NSEC_PER_SEC in r7.
+ * It clobbers r0, r6 and r9.
* On return, r8 contains the counter value that can be reused.
* This clobbers cr0 but not any other cr field.
*/
@@ -229,18 +187,18 @@ V_FUNCTION_BEGIN(__do_get_tspec)
/* Get TB & offset it. We use the MFTB macro which will generate
* workaround code for Cell.
*/
- MFTB(r7)
+ MFTB(r6)
ld r9,CFG_TB_ORIG_STAMP(r3)
- subf r7,r9,r7
+ subf r6,r9,r6
/* Scale result */
ld r5,CFG_TB_TO_XS(r3)
- sldi r7,r7,12 /* compute time since stamp_xtime */
- mulhdu r6,r7,r5 /* in units of 2^-32 seconds */
+ sldi r6,r6,12 /* compute time since stamp_xtime */
+ mulhdu r6,r6,r5 /* in units of 2^-32 seconds */
/* Add stamp since epoch */
ld r4,STAMP_XTIME+TSPC64_TV_SEC(r3)
- ld r5,STAMP_XTIME+TSPC64_TV_NSEC(r3)
+ lwz r5,STAMP_SEC_FRAC(r3)
or r0,r4,r5
or r0,r0,r6
xor r0,r0,r0
@@ -250,17 +208,11 @@ V_FUNCTION_BEGIN(__do_get_tspec)
bne- 1b /* reload if so */
/* convert to seconds & nanoseconds and add to stamp */
- lis r7,NSEC_PER_SEC@h
- ori r7,r7,NSEC_PER_SEC@l
- mulhwu r0,r6,r7 /* compute nanoseconds and */
+ add r6,r6,r5 /* add on fractional seconds of xtime */
+ mulhwu r5,r6,r7 /* compute micro or nanoseconds and */
srdi r6,r6,32 /* seconds since stamp_xtime */
- clrldi r0,r0,32
- add r5,r5,r0 /* add nanoseconds together */
- cmpd r5,r7 /* overflow? */
+ clrldi r5,r5,32
add r4,r4,r6
- bltlr /* all done if no overflow */
- subf r5,r7,r5 /* if overflow, adjust */
- addi r4,r4,1
blr
.cfi_endproc
V_FUNCTION_END(__do_get_tspec)