summaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/powerpc/boot/Makefile56
-rw-r--r--arch/powerpc/boot/flatdevtree_env.h27
-rw-r--r--arch/powerpc/configs/ppc64_defconfig1
-rw-r--r--arch/powerpc/configs/pseries_defconfig1
-rw-r--r--arch/powerpc/include/asm/code-patching.h4
-rw-r--r--arch/powerpc/include/asm/iommu.h18
-rw-r--r--arch/powerpc/include/asm/mmu.h7
-rw-r--r--arch/powerpc/include/asm/processor.h2
-rw-r--r--arch/powerpc/include/asm/trace.h45
-rw-r--r--arch/powerpc/kernel/entry_64.S25
-rw-r--r--arch/powerpc/kernel/ftrace.c69
-rw-r--r--arch/powerpc/kernel/iommu.c197
-rw-r--r--arch/powerpc/kernel/pci-common.c1
-rw-r--r--arch/powerpc/kernel/pci_of_scan.c1
-rw-r--r--arch/powerpc/kernel/smp.c2
-rw-r--r--arch/powerpc/kernel/vio.c6
-rw-r--r--arch/powerpc/lib/Makefile5
-rw-r--r--arch/powerpc/lib/code-patching.c14
-rw-r--r--arch/powerpc/lib/copypage_64.S4
-rw-r--r--arch/powerpc/lib/copypage_power7.S168
-rw-r--r--arch/powerpc/lib/copyuser_power7.S70
-rw-r--r--arch/powerpc/lib/memcpy_64.S4
-rw-r--r--arch/powerpc/lib/memcpy_power7.S650
-rw-r--r--arch/powerpc/lib/string.S2
-rw-r--r--arch/powerpc/lib/string_64.S202
-rw-r--r--arch/powerpc/lib/vmx-helper.c (renamed from arch/powerpc/lib/copyuser_power7_vmx.c)27
-rw-r--r--arch/powerpc/mm/numa.c2
-rw-r--r--arch/powerpc/platforms/44x/currituck.c2
-rw-r--r--arch/powerpc/platforms/85xx/corenet_ds.c2
-rw-r--r--arch/powerpc/platforms/85xx/ge_imp3a.c2
-rw-r--r--arch/powerpc/platforms/85xx/mpc8536_ds.c2
-rw-r--r--arch/powerpc/platforms/85xx/mpc85xx_ds.c2
-rw-r--r--arch/powerpc/platforms/85xx/mpc85xx_mds.c2
-rw-r--r--arch/powerpc/platforms/85xx/p1022_ds.c2
-rw-r--r--arch/powerpc/platforms/86xx/mpc86xx_hpcn.c2
-rw-r--r--arch/powerpc/platforms/cell/iommu.c1
-rw-r--r--arch/powerpc/platforms/pseries/iommu.c113
-rw-r--r--arch/powerpc/platforms/pseries/processor_idle.c25
-rw-r--r--arch/powerpc/platforms/pseries/smp.c1
39 files changed, 1538 insertions, 228 deletions
diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile
index e8461cb18d0..c802a90ae2d 100644
--- a/arch/powerpc/boot/Makefile
+++ b/arch/powerpc/boot/Makefile
@@ -62,26 +62,45 @@ libfdtheader := fdt.h libfdt.h libfdt_internal.h
$(addprefix $(obj)/,$(libfdt) libfdt-wrapper.o simpleboot.o epapr.o): \
$(addprefix $(obj)/,$(libfdtheader))
-src-wlib := string.S crt0.S crtsavres.S stdio.c main.c \
+src-wlib-y := string.S crt0.S crtsavres.S stdio.c main.c \
$(libfdt) libfdt-wrapper.c \
ns16550.c serial.c simple_alloc.c div64.S util.S \
- gunzip_util.c elf_util.c $(zlib) devtree.c oflib.c ofconsole.c \
- 4xx.c ebony.c mv64x60.c mpsc.c mv64x60_i2c.c cuboot.c bamboo.c \
- cpm-serial.c stdlib.c mpc52xx-psc.c planetcore.c uartlite.c \
- fsl-soc.c mpc8xx.c pq2.c ugecon.c
-src-plat := of.c cuboot-52xx.c cuboot-824x.c cuboot-83xx.c cuboot-85xx.c holly.c \
- cuboot-ebony.c cuboot-hotfoot.c epapr.c treeboot-ebony.c \
- prpmc2800.c \
- ps3-head.S ps3-hvcall.S ps3.c treeboot-bamboo.c cuboot-8xx.c \
- cuboot-pq2.c cuboot-sequoia.c treeboot-walnut.c \
- cuboot-bamboo.c cuboot-mpc7448hpc2.c cuboot-taishan.c \
- fixed-head.S ep88xc.c ep405.c cuboot-c2k.c \
- cuboot-katmai.c cuboot-rainier.c redboot-8xx.c ep8248e.c \
- cuboot-warp.c cuboot-85xx-cpm2.c cuboot-yosemite.c simpleboot.c \
- virtex405-head.S virtex.c redboot-83xx.c cuboot-sam440ep.c \
- cuboot-acadia.c cuboot-amigaone.c cuboot-kilauea.c \
- gamecube-head.S gamecube.c wii-head.S wii.c treeboot-iss4xx.c \
- treeboot-currituck.c
+ gunzip_util.c elf_util.c $(zlib) devtree.c stdlib.c \
+ oflib.c ofconsole.c cuboot.c mpsc.c cpm-serial.c \
+ uartlite.c mpc52xx-psc.c
+src-wlib-$(CONFIG_40x) += 4xx.c planetcore.c
+src-wlib-$(CONFIG_44x) += 4xx.c ebony.c bamboo.c
+src-wlib-$(CONFIG_8xx) += mpc8xx.c planetcore.c
+src-wlib-$(CONFIG_PPC_82xx) += pq2.c fsl-soc.c planetcore.c
+src-wlib-$(CONFIG_EMBEDDED6xx) += mv64x60.c mv64x60_i2c.c ugecon.c
+
+src-plat-y := of.c
+src-plat-$(CONFIG_40x) += fixed-head.S ep405.c cuboot-hotfoot.c \
+ treeboot-walnut.c cuboot-acadia.c \
+ cuboot-kilauea.c simpleboot.c \
+ virtex405-head.S virtex.c
+src-plat-$(CONFIG_44x) += treeboot-ebony.c cuboot-ebony.c treeboot-bamboo.c \
+ cuboot-bamboo.c cuboot-sam440ep.c \
+ cuboot-sequoia.c cuboot-rainier.c \
+ cuboot-taishan.c cuboot-katmai.c \
+ cuboot-warp.c cuboot-yosemite.c \
+ treeboot-iss4xx.c treeboot-currituck.c \
+ simpleboot.c fixed-head.S virtex.c
+src-plat-$(CONFIG_8xx) += cuboot-8xx.c fixed-head.S ep88xc.c redboot-8xx.c
+src-plat-$(CONFIG_PPC_MPC52xx) += cuboot-52xx.c
+src-plat-$(CONFIG_PPC_82xx) += cuboot-pq2.c fixed-head.S ep8248e.c cuboot-824x.c
+src-plat-$(CONFIG_PPC_83xx) += cuboot-83xx.c fixed-head.S redboot-83xx.c
+src-plat-$(CONFIG_FSL_SOC_BOOKE) += cuboot-85xx.c cuboot-85xx-cpm2.c
+src-plat-$(CONFIG_EMBEDDED6xx) += cuboot-pq2.c cuboot-mpc7448hpc2.c \
+ cuboot-c2k.c gamecube-head.S \
+ gamecube.c wii-head.S wii.c holly.c \
+ prpmc2800.c
+src-plat-$(CONFIG_AMIGAONE) += cuboot-amigaone.c
+src-plat-$(CONFIG_PPC_PS3) += ps3-head.S ps3-hvcall.S ps3.c
+src-plat-$(CONFIG_EPAPR_BOOT) += epapr.c
+
+src-wlib := $(sort $(src-wlib-y))
+src-plat := $(sort $(src-plat-y))
src-boot := $(src-wlib) $(src-plat) empty.c
src-boot := $(addprefix $(obj)/, $(src-boot))
@@ -412,4 +431,3 @@ $(wrapper-installed): $(DESTDIR)$(WRAPPER_BINDIR) $(srctree)/$(obj)/wrapper | $(
$(call cmd,install_wrapper)
$(obj)/bootwrapper_install: $(all-installed)
-
diff --git a/arch/powerpc/boot/flatdevtree_env.h b/arch/powerpc/boot/flatdevtree_env.h
deleted file mode 100644
index 66e0ebb1a36..00000000000
--- a/arch/powerpc/boot/flatdevtree_env.h
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * This file adds the header file glue so that the shared files
- * flatdevicetree.[ch] can compile and work in the powerpc bootwrapper.
- *
- * strncmp & strchr copied from <file:lib/string.c>
- * Copyright (C) 1991, 1992 Linus Torvalds
- *
- * Maintained by: Mark A. Greer <mgreer@mvista.com>
- */
-#ifndef _PPC_BOOT_FLATDEVTREE_ENV_H_
-#define _PPC_BOOT_FLATDEVTREE_ENV_H_
-
-#include <stdarg.h>
-#include <stddef.h>
-#include "types.h"
-#include "string.h"
-#include "stdio.h"
-#include "ops.h"
-
-#define be16_to_cpu(x) (x)
-#define cpu_to_be16(x) (x)
-#define be32_to_cpu(x) (x)
-#define cpu_to_be32(x) (x)
-#define be64_to_cpu(x) (x)
-#define cpu_to_be64(x) (x)
-
-#endif /* _PPC_BOOT_FLATDEVTREE_ENV_H_ */
diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig
index c1442a3758a..273e2bd38d0 100644
--- a/arch/powerpc/configs/ppc64_defconfig
+++ b/arch/powerpc/configs/ppc64_defconfig
@@ -16,6 +16,7 @@ CONFIG_BLK_DEV_INITRD=y
CONFIG_PROFILING=y
CONFIG_OPROFILE=y
CONFIG_KPROBES=y
+CONFIG_JUMP_LABEL=y
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
CONFIG_MODVERSIONS=y
diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig
index 6608232663c..187fb8d5360 100644
--- a/arch/powerpc/configs/pseries_defconfig
+++ b/arch/powerpc/configs/pseries_defconfig
@@ -24,6 +24,7 @@ CONFIG_BLK_DEV_INITRD=y
CONFIG_PROFILING=y
CONFIG_OPROFILE=y
CONFIG_KPROBES=y
+CONFIG_JUMP_LABEL=y
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
CONFIG_MODVERSIONS=y
diff --git a/arch/powerpc/include/asm/code-patching.h b/arch/powerpc/include/asm/code-patching.h
index 37c32aba79b..a6f8c7a5cbb 100644
--- a/arch/powerpc/include/asm/code-patching.h
+++ b/arch/powerpc/include/asm/code-patching.h
@@ -26,8 +26,8 @@ unsigned int create_branch(const unsigned int *addr,
unsigned long target, int flags);
unsigned int create_cond_branch(const unsigned int *addr,
unsigned long target, int flags);
-void patch_branch(unsigned int *addr, unsigned long target, int flags);
-void patch_instruction(unsigned int *addr, unsigned int instr);
+int patch_branch(unsigned int *addr, unsigned long target, int flags);
+int patch_instruction(unsigned int *addr, unsigned int instr);
int instr_is_relative_branch(unsigned int instr);
int instr_is_branch_to_addr(const unsigned int *instr, unsigned long addr);
diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h
index 957a83f4364..cbfe678e3db 100644
--- a/arch/powerpc/include/asm/iommu.h
+++ b/arch/powerpc/include/asm/iommu.h
@@ -53,6 +53,16 @@ static __inline__ __attribute_const__ int get_iommu_order(unsigned long size)
*/
#define IOMAP_MAX_ORDER 13
+#define IOMMU_POOL_HASHBITS 2
+#define IOMMU_NR_POOLS (1 << IOMMU_POOL_HASHBITS)
+
+struct iommu_pool {
+ unsigned long start;
+ unsigned long end;
+ unsigned long hint;
+ spinlock_t lock;
+} ____cacheline_aligned_in_smp;
+
struct iommu_table {
unsigned long it_busno; /* Bus number this table belongs to */
unsigned long it_size; /* Size of iommu table in entries */
@@ -61,10 +71,10 @@ struct iommu_table {
unsigned long it_index; /* which iommu table this is */
unsigned long it_type; /* type: PCI or Virtual Bus */
unsigned long it_blocksize; /* Entries in each block (cacheline) */
- unsigned long it_hint; /* Hint for next alloc */
- unsigned long it_largehint; /* Hint for large allocs */
- unsigned long it_halfpoint; /* Breaking point for small/large allocs */
- spinlock_t it_lock; /* Protects it_map */
+ unsigned long poolsize;
+ unsigned long nr_pools;
+ struct iommu_pool large_pool;
+ struct iommu_pool pools[IOMMU_NR_POOLS];
unsigned long *it_map; /* A simple allocation bitmap for now */
};
diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h
index f0145522cfb..e8a26db2e8f 100644
--- a/arch/powerpc/include/asm/mmu.h
+++ b/arch/powerpc/include/asm/mmu.h
@@ -163,12 +163,7 @@ extern u64 ppc64_rma_size;
* to think about, feedback welcome. --BenH.
*/
-/* There are #define as they have to be used in assembly
- *
- * WARNING: If you change this list, make sure to update the array of
- * names currently in arch/powerpc/mm/hugetlbpage.c or bad things will
- * happen
- */
+/* These are #defines as they have to be used in assembly */
#define MMU_PAGE_4K 0
#define MMU_PAGE_16K 1
#define MMU_PAGE_64K 2
diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index 413a5eaef56..53b6dfa8334 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -389,10 +389,8 @@ extern int powersave_nap; /* set if nap mode can be used in idle loop */
#ifdef CONFIG_PSERIES_IDLE
extern void update_smt_snooze_delay(int snooze);
-extern int pseries_notify_cpuidle_add_cpu(int cpu);
#else
static inline void update_smt_snooze_delay(int snooze) {}
-static inline int pseries_notify_cpuidle_add_cpu(int cpu) { return 0; }
#endif
extern void flush_instruction_cache(void);
diff --git a/arch/powerpc/include/asm/trace.h b/arch/powerpc/include/asm/trace.h
index cbe2297d68b..5712f06905a 100644
--- a/arch/powerpc/include/asm/trace.h
+++ b/arch/powerpc/include/asm/trace.h
@@ -8,7 +8,7 @@
struct pt_regs;
-TRACE_EVENT(irq_entry,
+DECLARE_EVENT_CLASS(ppc64_interrupt_class,
TP_PROTO(struct pt_regs *regs),
@@ -25,55 +25,32 @@ TRACE_EVENT(irq_entry,
TP_printk("pt_regs=%p", __entry->regs)
);
-TRACE_EVENT(irq_exit,
+DEFINE_EVENT(ppc64_interrupt_class, irq_entry,
TP_PROTO(struct pt_regs *regs),
- TP_ARGS(regs),
-
- TP_STRUCT__entry(
- __field(struct pt_regs *, regs)
- ),
-
- TP_fast_assign(
- __entry->regs = regs;
- ),
-
- TP_printk("pt_regs=%p", __entry->regs)
+ TP_ARGS(regs)
);
-TRACE_EVENT(timer_interrupt_entry,
+DEFINE_EVENT(ppc64_interrupt_class, irq_exit,
TP_PROTO(struct pt_regs *regs),
- TP_ARGS(regs),
-
- TP_STRUCT__entry(
- __field(struct pt_regs *, regs)
- ),
-
- TP_fast_assign(
- __entry->regs = regs;
- ),
-
- TP_printk("pt_regs=%p", __entry->regs)
+ TP_ARGS(regs)
);
-TRACE_EVENT(timer_interrupt_exit,
+DEFINE_EVENT(ppc64_interrupt_class, timer_interrupt_entry,
TP_PROTO(struct pt_regs *regs),
- TP_ARGS(regs),
+ TP_ARGS(regs)
+);
- TP_STRUCT__entry(
- __field(struct pt_regs *, regs)
- ),
+DEFINE_EVENT(ppc64_interrupt_class, timer_interrupt_exit,
- TP_fast_assign(
- __entry->regs = regs;
- ),
+ TP_PROTO(struct pt_regs *regs),
- TP_printk("pt_regs=%p", __entry->regs)
+ TP_ARGS(regs)
);
#ifdef CONFIG_PPC_PSERIES
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 5971c85df13..cf38a17ab28 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -197,7 +197,16 @@ syscall_exit:
wrteei 0
#else
ld r10,PACAKMSR(r13)
- mtmsrd r10,1
+ /*
+ * For performance reasons we clear RI the same time that we
+ * clear EE. We only need to clear RI just before we restore r13
+ * below, but batching it with EE saves us one expensive mtmsrd call.
+ * We have to be careful to restore RI if we branch anywhere from
+ * here (eg syscall_exit_work).
+ */
+ li r9,MSR_RI
+ andc r11,r10,r9
+ mtmsrd r11,1
#endif /* CONFIG_PPC_BOOK3E */
ld r9,TI_FLAGS(r12)
@@ -214,17 +223,6 @@ BEGIN_FTR_SECTION
END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
andi. r6,r8,MSR_PR
ld r4,_LINK(r1)
- /*
- * Clear RI before restoring r13. If we are returning to
- * userspace and we take an exception after restoring r13,
- * we end up corrupting the userspace r13 value.
- */
-#ifdef CONFIG_PPC_BOOK3S
- /* No MSR:RI on BookE */
- li r12,MSR_RI
- andc r11,r10,r12
- mtmsrd r11,1 /* clear MSR.RI */
-#endif /* CONFIG_PPC_BOOK3S */
beq- 1f
ACCOUNT_CPU_USER_EXIT(r11, r12)
@@ -271,6 +269,9 @@ syscall_enosys:
b syscall_exit
syscall_exit_work:
+#ifdef CONFIG_PPC_BOOK3S
+ mtmsrd r10,1 /* Restore RI */
+#endif
/* If TIF_RESTOREALL is set, don't scribble on either r3 or ccr.
If TIF_NOERROR is set, just save r3 as it is. */
diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c
index bf99cfa6bbf..6f33296a057 100644
--- a/arch/powerpc/kernel/ftrace.c
+++ b/arch/powerpc/kernel/ftrace.c
@@ -63,11 +63,9 @@ ftrace_modify_code(unsigned long ip, unsigned int old, unsigned int new)
return -EINVAL;
/* replace the text with the new text */
- if (probe_kernel_write((void *)ip, &new, MCOUNT_INSN_SIZE))
+ if (patch_instruction((unsigned int *)ip, new))
return -EPERM;
- flush_icache_range(ip, ip + 8);
-
return 0;
}
@@ -212,12 +210,9 @@ __ftrace_make_nop(struct module *mod,
*/
op = 0x48000008; /* b +8 */
- if (probe_kernel_write((void *)ip, &op, MCOUNT_INSN_SIZE))
+ if (patch_instruction((unsigned int *)ip, op))
return -EPERM;
-
- flush_icache_range(ip, ip + 8);
-
return 0;
}
@@ -286,11 +281,9 @@ __ftrace_make_nop(struct module *mod,
op = PPC_INST_NOP;
- if (probe_kernel_write((void *)ip, &op, MCOUNT_INSN_SIZE))
+ if (patch_instruction((unsigned int *)ip, op))
return -EPERM;
- flush_icache_range(ip, ip + 8);
-
return 0;
}
#endif /* PPC64 */
@@ -426,11 +419,9 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
pr_devel("write to %lx\n", rec->ip);
- if (probe_kernel_write((void *)ip, &op, MCOUNT_INSN_SIZE))
+ if (patch_instruction((unsigned int *)ip, op))
return -EPERM;
- flush_icache_range(ip, ip + 8);
-
return 0;
}
#endif /* CONFIG_PPC64 */
@@ -484,6 +475,58 @@ int ftrace_update_ftrace_func(ftrace_func_t func)
return ret;
}
+static int __ftrace_replace_code(struct dyn_ftrace *rec, int enable)
+{
+ unsigned long ftrace_addr = (unsigned long)FTRACE_ADDR;
+ int ret;
+
+ ret = ftrace_update_record(rec, enable);
+
+ switch (ret) {
+ case FTRACE_UPDATE_IGNORE:
+ return 0;
+ case FTRACE_UPDATE_MAKE_CALL:
+ return ftrace_make_call(rec, ftrace_addr);
+ case FTRACE_UPDATE_MAKE_NOP:
+ return ftrace_make_nop(NULL, rec, ftrace_addr);
+ }
+
+ return 0;
+}
+
+void ftrace_replace_code(int enable)
+{
+ struct ftrace_rec_iter *iter;
+ struct dyn_ftrace *rec;
+ int ret;
+
+ for (iter = ftrace_rec_iter_start(); iter;
+ iter = ftrace_rec_iter_next(iter)) {
+ rec = ftrace_rec_iter_record(iter);
+ ret = __ftrace_replace_code(rec, enable);
+ if (ret) {
+ ftrace_bug(ret, rec->ip);
+ return;
+ }
+ }
+}
+
+void arch_ftrace_update_code(int command)
+{
+ if (command & FTRACE_UPDATE_CALLS)
+ ftrace_replace_code(1);
+ else if (command & FTRACE_DISABLE_CALLS)
+ ftrace_replace_code(0);
+
+ if (command & FTRACE_UPDATE_TRACE_FUNC)
+ ftrace_update_ftrace_func(ftrace_trace_function);
+
+ if (command & FTRACE_START_FUNC_RET)
+ ftrace_enable_ftrace_graph_caller();
+ else if (command & FTRACE_STOP_FUNC_RET)
+ ftrace_disable_ftrace_graph_caller();
+}
+
int __init ftrace_dyn_arch_init(void *data)
{
/* caller expects data to be zero */
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index 359f078571c..7bc94da1a83 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -33,6 +33,7 @@
#include <linux/bitmap.h>
#include <linux/iommu-helper.h>
#include <linux/crash_dump.h>
+#include <linux/hash.h>
#include <asm/io.h>
#include <asm/prom.h>
#include <asm/iommu.h>
@@ -58,6 +59,26 @@ static int __init setup_iommu(char *str)
__setup("iommu=", setup_iommu);
+static DEFINE_PER_CPU(unsigned int, iommu_pool_hash);
+
+/*
+ * We precalculate the hash to avoid doing it on every allocation.
+ *
+ * The hash is important to spread CPUs across all the pools. For example,
+ * on a POWER7 with 4 way SMT we want interrupts on the primary threads and
+ * with 4 pools all primary threads would map to the same pool.
+ */
+static int __init setup_iommu_pool_hash(void)
+{
+ unsigned int i;
+
+ for_each_possible_cpu(i)
+ per_cpu(iommu_pool_hash, i) = hash_32(i, IOMMU_POOL_HASHBITS);
+
+ return 0;
+}
+subsys_initcall(setup_iommu_pool_hash);
+
static unsigned long iommu_range_alloc(struct device *dev,
struct iommu_table *tbl,
unsigned long npages,
@@ -71,6 +92,9 @@ static unsigned long iommu_range_alloc(struct device *dev,
int pass = 0;
unsigned long align_mask;
unsigned long boundary_size;
+ unsigned long flags;
+ unsigned int pool_nr;
+ struct iommu_pool *pool;
align_mask = 0xffffffffffffffffl >> (64 - align_order);
@@ -83,36 +107,46 @@ static unsigned long iommu_range_alloc(struct device *dev,
return DMA_ERROR_CODE;
}
- if (handle && *handle)
- start = *handle;
+ /*
+ * We don't need to disable preemption here because any CPU can
+ * safely use any IOMMU pool.
+ */
+ pool_nr = __raw_get_cpu_var(iommu_pool_hash) & (tbl->nr_pools - 1);
+
+ if (largealloc)
+ pool = &(tbl->large_pool);
else
- start = largealloc ? tbl->it_largehint : tbl->it_hint;
+ pool = &(tbl->pools[pool_nr]);
+
+ spin_lock_irqsave(&(pool->lock), flags);
- /* Use only half of the table for small allocs (15 pages or less) */
- limit = largealloc ? tbl->it_size : tbl->it_halfpoint;
+again:
+ if ((pass == 0) && handle && *handle)
+ start = *handle;
+ else
+ start = pool->hint;
- if (largealloc && start < tbl->it_halfpoint)
- start = tbl->it_halfpoint;
+ limit = pool->end;
/* The case below can happen if we have a small segment appended
* to a large, or when the previous alloc was at the very end of
* the available space. If so, go back to the initial start.
*/
if (start >= limit)
- start = largealloc ? tbl->it_largehint : tbl->it_hint;
-
- again:
+ start = pool->start;
if (limit + tbl->it_offset > mask) {
limit = mask - tbl->it_offset + 1;
/* If we're constrained on address range, first try
* at the masked hint to avoid O(n) search complexity,
- * but on second pass, start at 0.
+ * but on second pass, start at 0 in pool 0.
*/
- if ((start & mask) >= limit || pass > 0)
- start = 0;
- else
+ if ((start & mask) >= limit || pass > 0) {
+ pool = &(tbl->pools[0]);
+ start = pool->start;
+ } else {
start &= mask;
+ }
}
if (dev)
@@ -126,16 +160,25 @@ static unsigned long iommu_range_alloc(struct device *dev,
tbl->it_offset, boundary_size >> IOMMU_PAGE_SHIFT,
align_mask);
if (n == -1) {
- if (likely(pass < 2)) {
- /* First failure, just rescan the half of the table.
- * Second failure, rescan the other half of the table.
- */
- start = (largealloc ^ pass) ? tbl->it_halfpoint : 0;
- limit = pass ? tbl->it_size : limit;
+ if (likely(pass == 0)) {
+ /* First try the pool from the start */
+ pool->hint = pool->start;
pass++;
goto again;
+
+ } else if (pass <= tbl->nr_pools) {
+ /* Now try scanning all the other pools */
+ spin_unlock(&(pool->lock));
+ pool_nr = (pool_nr + 1) & (tbl->nr_pools - 1);
+ pool = &tbl->pools[pool_nr];
+ spin_lock(&(pool->lock));
+ pool->hint = pool->start;
+ pass++;
+ goto again;
+
} else {
- /* Third failure, give up */
+ /* Give up */
+ spin_unlock_irqrestore(&(pool->lock), flags);
return DMA_ERROR_CODE;
}
}
@@ -145,10 +188,10 @@ static unsigned long iommu_range_alloc(struct device *dev,
/* Bump the hint to a new block for small allocs. */
if (largealloc) {
/* Don't bump to new block to avoid fragmentation */
- tbl->it_largehint = end;
+ pool->hint = end;
} else {
/* Overflow will be taken care of at the next allocation */
- tbl->it_hint = (end + tbl->it_blocksize - 1) &
+ pool->hint = (end + tbl->it_blocksize - 1) &
~(tbl->it_blocksize - 1);
}
@@ -156,6 +199,8 @@ static unsigned long iommu_range_alloc(struct device *dev,
if (handle)
*handle = end;
+ spin_unlock_irqrestore(&(pool->lock), flags);
+
return n;
}
@@ -165,18 +210,14 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl,
unsigned long mask, unsigned int align_order,
struct dma_attrs *attrs)
{
- unsigned long entry, flags;
+ unsigned long entry;
dma_addr_t ret = DMA_ERROR_CODE;
int build_fail;
- spin_lock_irqsave(&(tbl->it_lock), flags);
-
entry = iommu_range_alloc(dev, tbl, npages, NULL, mask, align_order);
- if (unlikely(entry == DMA_ERROR_CODE)) {
- spin_unlock_irqrestore(&(tbl->it_lock), flags);
+ if (unlikely(entry == DMA_ERROR_CODE))
return DMA_ERROR_CODE;
- }
entry += tbl->it_offset; /* Offset into real TCE table */
ret = entry << IOMMU_PAGE_SHIFT; /* Set the return dma address */
@@ -193,8 +234,6 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl,
*/
if (unlikely(build_fail)) {
__iommu_free(tbl, ret, npages);
-
- spin_unlock_irqrestore(&(tbl->it_lock), flags);
return DMA_ERROR_CODE;
}
@@ -202,16 +241,14 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl,
if (ppc_md.tce_flush)
ppc_md.tce_flush(tbl);
- spin_unlock_irqrestore(&(tbl->it_lock), flags);
-
/* Make sure updates are seen by hardware */
mb();
return ret;
}
-static void __iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
- unsigned int npages)
+static bool iommu_free_check(struct iommu_table *tbl, dma_addr_t dma_addr,
+ unsigned int npages)
{
unsigned long entry, free_entry;
@@ -231,20 +268,57 @@ static void __iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
printk(KERN_INFO "\tindex = 0x%llx\n", (u64)tbl->it_index);
WARN_ON(1);
}
- return;
+
+ return false;
+ }
+
+ return true;
+}
+
+static struct iommu_pool *get_pool(struct iommu_table *tbl,
+ unsigned long entry)
+{
+ struct iommu_pool *p;
+ unsigned long largepool_start = tbl->large_pool.start;
+
+ /* The large pool is the last pool at the top of the table */
+ if (entry >= largepool_start) {
+ p = &tbl->large_pool;
+ } else {
+ unsigned int pool_nr = entry / tbl->poolsize;
+
+ BUG_ON(pool_nr > tbl->nr_pools);
+ p = &tbl->pools[pool_nr];
}
+ return p;
+}
+
+static void __iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
+ unsigned int npages)
+{
+ unsigned long entry, free_entry;
+ unsigned long flags;
+ struct iommu_pool *pool;
+
+ entry = dma_addr >> IOMMU_PAGE_SHIFT;
+ free_entry = entry - tbl->it_offset;
+
+ pool = get_pool(tbl, free_entry);
+
+ if (!iommu_free_check(tbl, dma_addr, npages))
+ return;
+
ppc_md.tce_free(tbl, entry, npages);
+
+ spin_lock_irqsave(&(pool->lock), flags);
bitmap_clear(tbl->it_map, free_entry, npages);
+ spin_unlock_irqrestore(&(pool->lock), flags);
}
static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
unsigned int npages)
{
- unsigned long flags;
-
- spin_lock_irqsave(&(tbl->it_lock), flags);
-
__iommu_free(tbl, dma_addr, npages);
/* Make sure TLB cache is flushed if the HW needs it. We do
@@ -253,8 +327,6 @@ static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
*/
if (ppc_md.tce_flush)
ppc_md.tce_flush(tbl);
-
- spin_unlock_irqrestore(&(tbl->it_lock), flags);
}
int iommu_map_sg(struct device *dev, struct iommu_table *tbl,
@@ -263,7 +335,6 @@ int iommu_map_sg(struct device *dev, struct iommu_table *tbl,
struct dma_attrs *attrs)
{
dma_addr_t dma_next = 0, dma_addr;
- unsigned long flags;
struct scatterlist *s, *outs, *segstart;
int outcount, incount, i, build_fail = 0;
unsigned int align;
@@ -285,8 +356,6 @@ int iommu_map_sg(struct device *dev, struct iommu_table *tbl,
DBG("sg mapping %d elements:\n", nelems);
- spin_lock_irqsave(&(tbl->it_lock), flags);
-
max_seg_size = dma_get_max_seg_size(dev);
for_each_sg(sglist, s, nelems, i) {
unsigned long vaddr, npages, entry, slen;
@@ -369,8 +438,6 @@ int iommu_map_sg(struct device *dev, struct iommu_table *tbl,
if (ppc_md.tce_flush)
ppc_md.tce_flush(tbl);
- spin_unlock_irqrestore(&(tbl->it_lock), flags);
-
DBG("mapped %d elements:\n", outcount);
/* For the sake of iommu_unmap_sg, we clear out the length in the
@@ -402,7 +469,6 @@ int iommu_map_sg(struct device *dev, struct iommu_table *tbl,
if (s == outs)
break;
}
- spin_unlock_irqrestore(&(tbl->it_lock), flags);
return 0;
}
@@ -412,15 +478,12 @@ void iommu_unmap_sg(struct iommu_table *tbl, struct scatterlist *sglist,
struct dma_attrs *attrs)
{
struct scatterlist *sg;
- unsigned long flags;
BUG_ON(direction == DMA_NONE);
if (!tbl)
return;
- spin_lock_irqsave(&(tbl->it_lock), flags);
-
sg = sglist;
while (nelems--) {
unsigned int npages;
@@ -440,8 +503,6 @@ void iommu_unmap_sg(struct iommu_table *tbl, struct scatterlist *sglist,
*/
if (ppc_md.tce_flush)
ppc_md.tce_flush(tbl);
-
- spin_unlock_irqrestore(&(tbl->it_lock), flags);
}
static void iommu_table_clear(struct iommu_table *tbl)
@@ -494,9 +555,8 @@ struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid)
unsigned long sz;
static int welcomed = 0;
struct page *page;
-
- /* Set aside 1/4 of the table for large allocations. */
- tbl->it_halfpoint = tbl->it_size * 3 / 4;
+ unsigned int i;
+ struct iommu_pool *p;
/* number of bytes needed for the bitmap */
sz = (tbl->it_size + 7) >> 3;
@@ -515,9 +575,28 @@ struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid)
if (tbl->it_offset == 0)
set_bit(0, tbl->it_map);
- tbl->it_hint = 0;
- tbl->it_largehint = tbl->it_halfpoint;
- spin_lock_init(&tbl->it_lock);
+ /* We only split the IOMMU table if we have 1GB or more of space */
+ if ((tbl->it_size << IOMMU_PAGE_SHIFT) >= (1UL * 1024 * 1024 * 1024))
+ tbl->nr_pools = IOMMU_NR_POOLS;
+ else
+ tbl->nr_pools = 1;
+
+ /* We reserve the top 1/4 of the table for large allocations */
+ tbl->poolsize = (tbl->it_size * 3 / 4) / IOMMU_NR_POOLS;
+
+ for (i = 0; i < IOMMU_NR_POOLS; i++) {
+ p = &tbl->pools[i];
+ spin_lock_init(&(p->lock));
+ p->start = tbl->poolsize * i;
+ p->hint = p->start;
+ p->end = p->start + tbl->poolsize;
+ }
+
+ p = &tbl->large_pool;
+ spin_lock_init(&(p->lock));
+ p->start = tbl->poolsize * i;
+ p->hint = p->start;
+ p->end = tbl->it_size;
iommu_table_clear(tbl);
diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index 8e78e93c818..0f75bd50040 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -1646,7 +1646,6 @@ void __devinit pcibios_scan_phb(struct pci_controller *hose)
pci_free_resource_list(&resources);
return;
}
- bus->secondary = hose->first_busno;
hose->bus = bus;
/* Get probe mode and perform scan */
diff --git a/arch/powerpc/kernel/pci_of_scan.c b/arch/powerpc/kernel/pci_of_scan.c
index 89dde171a6f..d7dd42bd145 100644
--- a/arch/powerpc/kernel/pci_of_scan.c
+++ b/arch/powerpc/kernel/pci_of_scan.c
@@ -198,7 +198,6 @@ EXPORT_SYMBOL(of_create_pci_dev);
/**
* of_scan_pci_bridge - Set up a PCI bridge and scan for child nodes
- * @node: device tree node of bridge
* @dev: pci_dev structure for the bridge
*
* of_scan_bus() calls this routine for each PCI bridge that it finds, and
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index e4cb34322de..e1417c42155 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -571,7 +571,6 @@ void __devinit start_secondary(void *unused)
if (system_state == SYSTEM_RUNNING)
vdso_data->processorCount++;
#endif
- ipi_call_lock();
notify_cpu_starting(cpu);
set_cpu_online(cpu, true);
/* Update sibling maps */
@@ -601,7 +600,6 @@ void __devinit start_secondary(void *unused)
of_node_put(np);
}
of_node_put(l2_cache);
- ipi_call_unlock();
local_irq_enable();
diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c
index cb87301ccd5..06cbc309b81 100644
--- a/arch/powerpc/kernel/vio.c
+++ b/arch/powerpc/kernel/vio.c
@@ -625,7 +625,7 @@ struct dma_map_ops vio_dma_mapping_ops = {
* vio_cmo_set_dev_desired - Set desired entitlement for a device
*
* @viodev: struct vio_dev for device to alter
- * @new_desired: new desired entitlement level in bytes
+ * @desired: new desired entitlement level in bytes
*
* For use by devices to request a change to their entitlement at runtime or
* through sysfs. The desired entitlement level is changed and a balancing
@@ -1262,7 +1262,7 @@ static int vio_bus_remove(struct device *dev)
/**
* vio_register_driver: - Register a new vio driver
- * @drv: The vio_driver structure to be registered.
+ * @viodrv: The vio_driver structure to be registered.
*/
int __vio_register_driver(struct vio_driver *viodrv, struct module *owner,
const char *mod_name)
@@ -1282,7 +1282,7 @@ EXPORT_SYMBOL(__vio_register_driver);
/**
* vio_unregister_driver - Remove registration of vio driver.
- * @driver: The vio_driver struct to be removed form registration
+ * @viodrv: The vio_driver struct to be removed form registration
*/
void vio_unregister_driver(struct vio_driver *viodrv)
{
diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index 7735a2c2e6d..746e0c895cd 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -17,14 +17,15 @@ obj-$(CONFIG_HAS_IOMEM) += devres.o
obj-$(CONFIG_PPC64) += copypage_64.o copyuser_64.o \
memcpy_64.o usercopy_64.o mem_64.o string.o \
checksum_wrappers_64.o hweight_64.o \
- copyuser_power7.o
+ copyuser_power7.o string_64.o copypage_power7.o \
+ memcpy_power7.o
obj-$(CONFIG_XMON) += sstep.o ldstfp.o
obj-$(CONFIG_KPROBES) += sstep.o ldstfp.o
obj-$(CONFIG_HAVE_HW_BREAKPOINT) += sstep.o ldstfp.o
ifeq ($(CONFIG_PPC64),y)
obj-$(CONFIG_SMP) += locks.o
-obj-$(CONFIG_ALTIVEC) += copyuser_power7_vmx.o
+obj-$(CONFIG_ALTIVEC) += vmx-helper.o
endif
obj-$(CONFIG_PPC_LIB_RHEAP) += rheap.o
diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c
index 7c975d43e3f..dd223b3eb33 100644
--- a/arch/powerpc/lib/code-patching.c
+++ b/arch/powerpc/lib/code-patching.c
@@ -13,17 +13,23 @@
#include <linux/mm.h>
#include <asm/page.h>
#include <asm/code-patching.h>
+#include <asm/uaccess.h>
-void patch_instruction(unsigned int *addr, unsigned int instr)
+int patch_instruction(unsigned int *addr, unsigned int instr)
{
- *addr = instr;
+ int err;
+
+ err = __put_user(instr, addr);
+ if (err)
+ return err;
asm ("dcbst 0, %0; sync; icbi 0,%0; sync; isync" : : "r" (addr));
+ return 0;
}
-void patch_branch(unsigned int *addr, unsigned long target, int flags)
+int patch_branch(unsigned int *addr, unsigned long target, int flags)
{
- patch_instruction(addr, create_branch(addr, target, flags));
+ return patch_instruction(addr, create_branch(addr, target, flags));
}
unsigned int create_branch(const unsigned int *addr,
diff --git a/arch/powerpc/lib/copypage_64.S b/arch/powerpc/lib/copypage_64.S
index 53dcb6b1b70..9f9434a8526 100644
--- a/arch/powerpc/lib/copypage_64.S
+++ b/arch/powerpc/lib/copypage_64.S
@@ -17,7 +17,11 @@ PPC64_CACHES:
.section ".text"
_GLOBAL(copy_page)
+BEGIN_FTR_SECTION
lis r5,PAGE_SIZE@h
+FTR_SECTION_ELSE
+ b .copypage_power7
+ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY)
ori r5,r5,PAGE_SIZE@l
BEGIN_FTR_SECTION
ld r10,PPC64_CACHES@toc(r2)
diff --git a/arch/powerpc/lib/copypage_power7.S b/arch/powerpc/lib/copypage_power7.S
new file mode 100644
index 00000000000..01e2b5db325
--- /dev/null
+++ b/arch/powerpc/lib/copypage_power7.S
@@ -0,0 +1,168 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2012
+ *
+ * Author: Anton Blanchard <anton@au.ibm.com>
+ */
+#include <asm/page.h>
+#include <asm/ppc_asm.h>
+
+#define STACKFRAMESIZE 256
+#define STK_REG(i) (112 + ((i)-14)*8)
+
+_GLOBAL(copypage_power7)
+ /*
+ * We prefetch both the source and destination using enhanced touch
+ * instructions. We use a stream ID of 0 for the load side and
+ * 1 for the store side. Since source and destination are page
+ * aligned we don't need to clear the bottom 7 bits of either
+ * address.
+ */
+ ori r9,r3,1 /* stream=1 */
+
+#ifdef CONFIG_PPC_64K_PAGES
+ lis r7,0x0E01 /* depth=7, units=512 */
+#else
+ lis r7,0x0E00 /* depth=7 */
+ ori r7,r7,0x1000 /* units=32 */
+#endif
+ ori r10,r7,1 /* stream=1 */
+
+ lis r8,0x8000 /* GO=1 */
+ clrldi r8,r8,32
+
+.machine push
+.machine "power4"
+ dcbt r0,r4,0b01000
+ dcbt r0,r7,0b01010
+ dcbtst r0,r9,0b01000
+ dcbtst r0,r10,0b01010
+ eieio
+ dcbt r0,r8,0b01010 /* GO */
+.machine pop
+
+#ifdef CONFIG_ALTIVEC
+ mflr r0
+ std r3,48(r1)
+ std r4,56(r1)
+ std r0,16(r1)
+ stdu r1,-STACKFRAMESIZE(r1)
+ bl .enter_vmx_copy
+ cmpwi r3,0
+ ld r0,STACKFRAMESIZE+16(r1)
+ ld r3,STACKFRAMESIZE+48(r1)
+ ld r4,STACKFRAMESIZE+56(r1)
+ mtlr r0
+
+ li r0,(PAGE_SIZE/128)
+ mtctr r0
+
+ beq .Lnonvmx_copy
+
+ addi r1,r1,STACKFRAMESIZE
+
+ li r6,16
+ li r7,32
+ li r8,48
+ li r9,64
+ li r10,80
+ li r11,96
+ li r12,112
+
+ .align 5
+1: lvx vr7,r0,r4
+ lvx vr6,r4,r6
+ lvx vr5,r4,r7
+ lvx vr4,r4,r8
+ lvx vr3,r4,r9
+ lvx vr2,r4,r10
+ lvx vr1,r4,r11
+ lvx vr0,r4,r12
+ addi r4,r4,128
+ stvx vr7,r0,r3
+ stvx vr6,r3,r6
+ stvx vr5,r3,r7
+ stvx vr4,r3,r8
+ stvx vr3,r3,r9
+ stvx vr2,r3,r10
+ stvx vr1,r3,r11
+ stvx vr0,r3,r12
+ addi r3,r3,128
+ bdnz 1b
+
+ b .exit_vmx_copy /* tail call optimise */
+
+#else
+ li r0,(PAGE_SIZE/128)
+ mtctr r0
+
+ stdu r1,-STACKFRAMESIZE(r1)
+#endif
+
+.Lnonvmx_copy:
+ std r14,STK_REG(r14)(r1)
+ std r15,STK_REG(r15)(r1)
+ std r16,STK_REG(r16)(r1)
+ std r17,STK_REG(r17)(r1)
+ std r18,STK_REG(r18)(r1)
+ std r19,STK_REG(r19)(r1)
+ std r20,STK_REG(r20)(r1)
+
+1: ld r0,0(r4)
+ ld r5,8(r4)
+ ld r6,16(r4)
+ ld r7,24(r4)
+ ld r8,32(r4)
+ ld r9,40(r4)
+ ld r10,48(r4)
+ ld r11,56(r4)
+ ld r12,64(r4)
+ ld r14,72(r4)
+ ld r15,80(r4)
+ ld r16,88(r4)
+ ld r17,96(r4)
+ ld r18,104(r4)
+ ld r19,112(r4)
+ ld r20,120(r4)
+ addi r4,r4,128
+ std r0,0(r3)
+ std r5,8(r3)
+ std r6,16(r3)
+ std r7,24(r3)
+ std r8,32(r3)
+ std r9,40(r3)
+ std r10,48(r3)
+ std r11,56(r3)
+ std r12,64(r3)
+ std r14,72(r3)
+ std r15,80(r3)
+ std r16,88(r3)
+ std r17,96(r3)
+ std r18,104(r3)
+ std r19,112(r3)
+ std r20,120(r3)
+ addi r3,r3,128
+ bdnz 1b
+
+ ld r14,STK_REG(r14)(r1)
+ ld r15,STK_REG(r15)(r1)
+ ld r16,STK_REG(r16)(r1)
+ ld r17,STK_REG(r17)(r1)
+ ld r18,STK_REG(r18)(r1)
+ ld r19,STK_REG(r19)(r1)
+ ld r20,STK_REG(r20)(r1)
+ addi r1,r1,STACKFRAMESIZE
+ blr
diff --git a/arch/powerpc/lib/copyuser_power7.S b/arch/powerpc/lib/copyuser_power7.S
index 497db7b23bb..48e3f8c5768 100644
--- a/arch/powerpc/lib/copyuser_power7.S
+++ b/arch/powerpc/lib/copyuser_power7.S
@@ -61,7 +61,7 @@
ld r15,STK_REG(r15)(r1)
ld r14,STK_REG(r14)(r1)
.Ldo_err3:
- bl .exit_vmx_copy
+ bl .exit_vmx_usercopy
ld r0,STACKFRAMESIZE+16(r1)
mtlr r0
b .Lexit
@@ -290,7 +290,7 @@ err1; stb r0,0(r3)
mflr r0
std r0,16(r1)
stdu r1,-STACKFRAMESIZE(r1)
- bl .enter_vmx_copy
+ bl .enter_vmx_usercopy
cmpwi r3,0
ld r0,STACKFRAMESIZE+16(r1)
ld r3,STACKFRAMESIZE+48(r1)
@@ -298,6 +298,68 @@ err1; stb r0,0(r3)
ld r5,STACKFRAMESIZE+64(r1)
mtlr r0
+ /*
+ * We prefetch both the source and destination using enhanced touch
+ * instructions. We use a stream ID of 0 for the load side and
+ * 1 for the store side.
+ */
+ clrrdi r6,r4,7
+ clrrdi r9,r3,7
+ ori r9,r9,1 /* stream=1 */
+
+ srdi r7,r5,7 /* length in cachelines, capped at 0x3FF */
+ cmpldi r7,0x3FF
+ ble 1f
+ li r7,0x3FF
+1: lis r0,0x0E00 /* depth=7 */
+ sldi r7,r7,7
+ or r7,r7,r0
+ ori r10,r7,1 /* stream=1 */
+
+ lis r8,0x8000 /* GO=1 */
+ clrldi r8,r8,32
+
+.machine push
+.machine "power4"
+ dcbt r0,r6,0b01000
+ dcbt r0,r7,0b01010
+ dcbtst r0,r9,0b01000
+ dcbtst r0,r10,0b01010
+ eieio
+ dcbt r0,r8,0b01010 /* GO */
+.machine pop
+
+ /*
+ * We prefetch both the source and destination using enhanced touch
+ * instructions. We use a stream ID of 0 for the load side and
+ * 1 for the store side.
+ */
+ clrrdi r6,r4,7
+ clrrdi r9,r3,7
+ ori r9,r9,1 /* stream=1 */
+
+ srdi r7,r5,7 /* length in cachelines, capped at 0x3FF */
+ cmpldi cr1,r7,0x3FF
+ ble cr1,1f
+ li r7,0x3FF
+1: lis r0,0x0E00 /* depth=7 */
+ sldi r7,r7,7
+ or r7,r7,r0
+ ori r10,r7,1 /* stream=1 */
+
+ lis r8,0x8000 /* GO=1 */
+ clrldi r8,r8,32
+
+.machine push
+.machine "power4"
+ dcbt r0,r6,0b01000
+ dcbt r0,r7,0b01010
+ dcbtst r0,r9,0b01000
+ dcbtst r0,r10,0b01010
+ eieio
+ dcbt r0,r8,0b01010 /* GO */
+.machine pop
+
beq .Lunwind_stack_nonvmx_copy
/*
@@ -476,7 +538,7 @@ err3; lbz r0,0(r4)
err3; stb r0,0(r3)
15: addi r1,r1,STACKFRAMESIZE
- b .exit_vmx_copy /* tail call optimise */
+ b .exit_vmx_usercopy /* tail call optimise */
.Lvmx_unaligned_copy:
/* Get the destination 16B aligned */
@@ -679,5 +741,5 @@ err3; lbz r0,0(r4)
err3; stb r0,0(r3)
15: addi r1,r1,STACKFRAMESIZE
- b .exit_vmx_copy /* tail call optimise */
+ b .exit_vmx_usercopy /* tail call optimise */
#endif /* CONFiG_ALTIVEC */
diff --git a/arch/powerpc/lib/memcpy_64.S b/arch/powerpc/lib/memcpy_64.S
index 82fea3963e1..d2bbbc8d7dc 100644
--- a/arch/powerpc/lib/memcpy_64.S
+++ b/arch/powerpc/lib/memcpy_64.S
@@ -11,7 +11,11 @@
.align 7
_GLOBAL(memcpy)
+BEGIN_FTR_SECTION
std r3,48(r1) /* save destination pointer for return value */
+FTR_SECTION_ELSE
+ b memcpy_power7
+ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY)
PPC_MTOCRF(0x01,r5)
cmpldi cr1,r5,16
neg r6,r3 # LS 3 bits = # bytes to 8-byte dest bdry
diff --git a/arch/powerpc/lib/memcpy_power7.S b/arch/powerpc/lib/memcpy_power7.S
new file mode 100644
index 00000000000..84674d89793
--- /dev/null
+++ b/arch/powerpc/lib/memcpy_power7.S
@@ -0,0 +1,650 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2012
+ *
+ * Author: Anton Blanchard <anton@au.ibm.com>
+ */
+#include <asm/ppc_asm.h>
+
+#define STACKFRAMESIZE 256
+#define STK_REG(i) (112 + ((i)-14)*8)
+
+_GLOBAL(memcpy_power7)
+#ifdef CONFIG_ALTIVEC
+ cmpldi r5,16
+ cmpldi cr1,r5,4096
+
+ std r3,48(r1)
+
+ blt .Lshort_copy
+ bgt cr1,.Lvmx_copy
+#else
+ cmpldi r5,16
+
+ std r3,48(r1)
+
+ blt .Lshort_copy
+#endif
+
+.Lnonvmx_copy:
+ /* Get the source 8B aligned */
+ neg r6,r4
+ mtocrf 0x01,r6
+ clrldi r6,r6,(64-3)
+
+ bf cr7*4+3,1f
+ lbz r0,0(r4)
+ addi r4,r4,1
+ stb r0,0(r3)
+ addi r3,r3,1
+
+1: bf cr7*4+2,2f
+ lhz r0,0(r4)
+ addi r4,r4,2
+ sth r0,0(r3)
+ addi r3,r3,2
+
+2: bf cr7*4+1,3f
+ lwz r0,0(r4)
+ addi r4,r4,4
+ stw r0,0(r3)
+ addi r3,r3,4
+
+3: sub r5,r5,r6
+ cmpldi r5,128
+ blt 5f
+
+ mflr r0
+ stdu r1,-STACKFRAMESIZE(r1)
+ std r14,STK_REG(r14)(r1)
+ std r15,STK_REG(r15)(r1)
+ std r16,STK_REG(r16)(r1)
+ std r17,STK_REG(r17)(r1)
+ std r18,STK_REG(r18)(r1)
+ std r19,STK_REG(r19)(r1)
+ std r20,STK_REG(r20)(r1)
+ std r21,STK_REG(r21)(r1)
+ std r22,STK_REG(r22)(r1)
+ std r0,STACKFRAMESIZE+16(r1)
+
+ srdi r6,r5,7
+ mtctr r6
+
+ /* Now do cacheline (128B) sized loads and stores. */
+ .align 5
+4:
+ ld r0,0(r4)
+ ld r6,8(r4)
+ ld r7,16(r4)
+ ld r8,24(r4)
+ ld r9,32(r4)
+ ld r10,40(r4)
+ ld r11,48(r4)
+ ld r12,56(r4)
+ ld r14,64(r4)
+ ld r15,72(r4)
+ ld r16,80(r4)
+ ld r17,88(r4)
+ ld r18,96(r4)
+ ld r19,104(r4)
+ ld r20,112(r4)
+ ld r21,120(r4)
+ addi r4,r4,128
+ std r0,0(r3)
+ std r6,8(r3)
+ std r7,16(r3)
+ std r8,24(r3)
+ std r9,32(r3)
+ std r10,40(r3)
+ std r11,48(r3)
+ std r12,56(r3)
+ std r14,64(r3)
+ std r15,72(r3)
+ std r16,80(r3)
+ std r17,88(r3)
+ std r18,96(r3)
+ std r19,104(r3)
+ std r20,112(r3)
+ std r21,120(r3)
+ addi r3,r3,128
+ bdnz 4b
+
+ clrldi r5,r5,(64-7)
+
+ ld r14,STK_REG(r14)(r1)
+ ld r15,STK_REG(r15)(r1)
+ ld r16,STK_REG(r16)(r1)
+ ld r17,STK_REG(r17)(r1)
+ ld r18,STK_REG(r18)(r1)
+ ld r19,STK_REG(r19)(r1)
+ ld r20,STK_REG(r20)(r1)
+ ld r21,STK_REG(r21)(r1)
+ ld r22,STK_REG(r22)(r1)
+ addi r1,r1,STACKFRAMESIZE
+
+ /* Up to 127B to go */
+5: srdi r6,r5,4
+ mtocrf 0x01,r6
+
+6: bf cr7*4+1,7f
+ ld r0,0(r4)
+ ld r6,8(r4)
+ ld r7,16(r4)
+ ld r8,24(r4)
+ ld r9,32(r4)
+ ld r10,40(r4)
+ ld r11,48(r4)
+ ld r12,56(r4)
+ addi r4,r4,64
+ std r0,0(r3)
+ std r6,8(r3)
+ std r7,16(r3)
+ std r8,24(r3)
+ std r9,32(r3)
+ std r10,40(r3)
+ std r11,48(r3)
+ std r12,56(r3)
+ addi r3,r3,64
+
+ /* Up to 63B to go */
+7: bf cr7*4+2,8f
+ ld r0,0(r4)
+ ld r6,8(r4)
+ ld r7,16(r4)
+ ld r8,24(r4)
+ addi r4,r4,32
+ std r0,0(r3)
+ std r6,8(r3)
+ std r7,16(r3)
+ std r8,24(r3)
+ addi r3,r3,32
+
+ /* Up to 31B to go */
+8: bf cr7*4+3,9f
+ ld r0,0(r4)
+ ld r6,8(r4)
+ addi r4,r4,16
+ std r0,0(r3)
+ std r6,8(r3)
+ addi r3,r3,16
+
+9: clrldi r5,r5,(64-4)
+
+ /* Up to 15B to go */
+.Lshort_copy:
+ mtocrf 0x01,r5
+ bf cr7*4+0,12f
+ lwz r0,0(r4) /* Less chance of a reject with word ops */
+ lwz r6,4(r4)
+ addi r4,r4,8
+ stw r0,0(r3)
+ stw r6,4(r3)
+ addi r3,r3,8
+
+12: bf cr7*4+1,13f
+ lwz r0,0(r4)
+ addi r4,r4,4
+ stw r0,0(r3)
+ addi r3,r3,4
+
+13: bf cr7*4+2,14f
+ lhz r0,0(r4)
+ addi r4,r4,2
+ sth r0,0(r3)
+ addi r3,r3,2
+
+14: bf cr7*4+3,15f
+ lbz r0,0(r4)
+ stb r0,0(r3)
+
+15: ld r3,48(r1)
+ blr
+
+.Lunwind_stack_nonvmx_copy:
+ addi r1,r1,STACKFRAMESIZE
+ b .Lnonvmx_copy
+
+#ifdef CONFIG_ALTIVEC
+.Lvmx_copy:
+ mflr r0
+ std r4,56(r1)
+ std r5,64(r1)
+ std r0,16(r1)
+ stdu r1,-STACKFRAMESIZE(r1)
+ bl .enter_vmx_copy
+ cmpwi r3,0
+ ld r0,STACKFRAMESIZE+16(r1)
+ ld r3,STACKFRAMESIZE+48(r1)
+ ld r4,STACKFRAMESIZE+56(r1)
+ ld r5,STACKFRAMESIZE+64(r1)
+ mtlr r0
+
+ /*
+ * We prefetch both the source and destination using enhanced touch
+ * instructions. We use a stream ID of 0 for the load side and
+ * 1 for the store side.
+ */
+ clrrdi r6,r4,7
+ clrrdi r9,r3,7
+ ori r9,r9,1 /* stream=1 */
+
+ srdi r7,r5,7 /* length in cachelines, capped at 0x3FF */
+ cmpldi cr1,r7,0x3FF
+ ble cr1,1f
+ li r7,0x3FF
+1: lis r0,0x0E00 /* depth=7 */
+ sldi r7,r7,7
+ or r7,r7,r0
+ ori r10,r7,1 /* stream=1 */
+
+ lis r8,0x8000 /* GO=1 */
+ clrldi r8,r8,32
+
+.machine push
+.machine "power4"
+ dcbt r0,r6,0b01000
+ dcbt r0,r7,0b01010
+ dcbtst r0,r9,0b01000
+ dcbtst r0,r10,0b01010
+ eieio
+ dcbt r0,r8,0b01010 /* GO */
+.machine pop
+
+ beq .Lunwind_stack_nonvmx_copy
+
+ /*
+ * If source and destination are not relatively aligned we use a
+ * slower permute loop.
+ */
+ xor r6,r4,r3
+ rldicl. r6,r6,0,(64-4)
+ bne .Lvmx_unaligned_copy
+
+ /* Get the destination 16B aligned */
+ neg r6,r3
+ mtocrf 0x01,r6
+ clrldi r6,r6,(64-4)
+
+ bf cr7*4+3,1f
+ lbz r0,0(r4)
+ addi r4,r4,1
+ stb r0,0(r3)
+ addi r3,r3,1
+
+1: bf cr7*4+2,2f
+ lhz r0,0(r4)
+ addi r4,r4,2
+ sth r0,0(r3)
+ addi r3,r3,2
+
+2: bf cr7*4+1,3f
+ lwz r0,0(r4)
+ addi r4,r4,4
+ stw r0,0(r3)
+ addi r3,r3,4
+
+3: bf cr7*4+0,4f
+ ld r0,0(r4)
+ addi r4,r4,8
+ std r0,0(r3)
+ addi r3,r3,8
+
+4: sub r5,r5,r6
+
+ /* Get the desination 128B aligned */
+ neg r6,r3
+ srdi r7,r6,4
+ mtocrf 0x01,r7
+ clrldi r6,r6,(64-7)
+
+ li r9,16
+ li r10,32
+ li r11,48
+
+ bf cr7*4+3,5f
+ lvx vr1,r0,r4
+ addi r4,r4,16
+ stvx vr1,r0,r3
+ addi r3,r3,16
+
+5: bf cr7*4+2,6f
+ lvx vr1,r0,r4
+ lvx vr0,r4,r9
+ addi r4,r4,32
+ stvx vr1,r0,r3
+ stvx vr0,r3,r9
+ addi r3,r3,32
+
+6: bf cr7*4+1,7f
+ lvx vr3,r0,r4
+ lvx vr2,r4,r9
+ lvx vr1,r4,r10
+ lvx vr0,r4,r11
+ addi r4,r4,64
+ stvx vr3,r0,r3
+ stvx vr2,r3,r9
+ stvx vr1,r3,r10
+ stvx vr0,r3,r11
+ addi r3,r3,64
+
+7: sub r5,r5,r6
+ srdi r6,r5,7
+
+ std r14,STK_REG(r14)(r1)
+ std r15,STK_REG(r15)(r1)
+ std r16,STK_REG(r16)(r1)
+
+ li r12,64
+ li r14,80
+ li r15,96
+ li r16,112
+
+ mtctr r6
+
+ /*
+ * Now do cacheline sized loads and stores. By this stage the
+ * cacheline stores are also cacheline aligned.
+ */
+ .align 5
+8:
+ lvx vr7,r0,r4
+ lvx vr6,r4,r9
+ lvx vr5,r4,r10
+ lvx vr4,r4,r11
+ lvx vr3,r4,r12
+ lvx vr2,r4,r14
+ lvx vr1,r4,r15
+ lvx vr0,r4,r16
+ addi r4,r4,128
+ stvx vr7,r0,r3
+ stvx vr6,r3,r9
+ stvx vr5,r3,r10
+ stvx vr4,r3,r11
+ stvx vr3,r3,r12
+ stvx vr2,r3,r14
+ stvx vr1,r3,r15
+ stvx vr0,r3,r16
+ addi r3,r3,128
+ bdnz 8b
+
+ ld r14,STK_REG(r14)(r1)
+ ld r15,STK_REG(r15)(r1)
+ ld r16,STK_REG(r16)(r1)
+
+ /* Up to 127B to go */
+ clrldi r5,r5,(64-7)
+ srdi r6,r5,4
+ mtocrf 0x01,r6
+
+ bf cr7*4+1,9f
+ lvx vr3,r0,r4
+ lvx vr2,r4,r9
+ lvx vr1,r4,r10
+ lvx vr0,r4,r11
+ addi r4,r4,64
+ stvx vr3,r0,r3
+ stvx vr2,r3,r9
+ stvx vr1,r3,r10
+ stvx vr0,r3,r11
+ addi r3,r3,64
+
+9: bf cr7*4+2,10f
+ lvx vr1,r0,r4
+ lvx vr0,r4,r9
+ addi r4,r4,32
+ stvx vr1,r0,r3
+ stvx vr0,r3,r9
+ addi r3,r3,32
+
+10: bf cr7*4+3,11f
+ lvx vr1,r0,r4
+ addi r4,r4,16
+ stvx vr1,r0,r3
+ addi r3,r3,16
+
+ /* Up to 15B to go */
+11: clrldi r5,r5,(64-4)
+ mtocrf 0x01,r5
+ bf cr7*4+0,12f
+ ld r0,0(r4)
+ addi r4,r4,8
+ std r0,0(r3)
+ addi r3,r3,8
+
+12: bf cr7*4+1,13f
+ lwz r0,0(r4)
+ addi r4,r4,4
+ stw r0,0(r3)
+ addi r3,r3,4
+
+13: bf cr7*4+2,14f
+ lhz r0,0(r4)
+ addi r4,r4,2
+ sth r0,0(r3)
+ addi r3,r3,2
+
+14: bf cr7*4+3,15f
+ lbz r0,0(r4)
+ stb r0,0(r3)
+
+15: addi r1,r1,STACKFRAMESIZE
+ ld r3,48(r1)
+ b .exit_vmx_copy /* tail call optimise */
+
+.Lvmx_unaligned_copy:
+ /* Get the destination 16B aligned */
+ neg r6,r3
+ mtocrf 0x01,r6
+ clrldi r6,r6,(64-4)
+
+ bf cr7*4+3,1f
+ lbz r0,0(r4)
+ addi r4,r4,1
+ stb r0,0(r3)
+ addi r3,r3,1
+
+1: bf cr7*4+2,2f
+ lhz r0,0(r4)
+ addi r4,r4,2
+ sth r0,0(r3)
+ addi r3,r3,2
+
+2: bf cr7*4+1,3f
+ lwz r0,0(r4)
+ addi r4,r4,4
+ stw r0,0(r3)
+ addi r3,r3,4
+
+3: bf cr7*4+0,4f
+ lwz r0,0(r4) /* Less chance of a reject with word ops */
+ lwz r7,4(r4)
+ addi r4,r4,8
+ stw r0,0(r3)
+ stw r7,4(r3)
+ addi r3,r3,8
+
+4: sub r5,r5,r6
+
+ /* Get the desination 128B aligned */
+ neg r6,r3
+ srdi r7,r6,4
+ mtocrf 0x01,r7
+ clrldi r6,r6,(64-7)
+
+ li r9,16
+ li r10,32
+ li r11,48
+
+ lvsl vr16,0,r4 /* Setup permute control vector */
+ lvx vr0,0,r4
+ addi r4,r4,16
+
+ bf cr7*4+3,5f
+ lvx vr1,r0,r4
+ vperm vr8,vr0,vr1,vr16
+ addi r4,r4,16
+ stvx vr8,r0,r3
+ addi r3,r3,16
+ vor vr0,vr1,vr1
+
+5: bf cr7*4+2,6f
+ lvx vr1,r0,r4
+ vperm vr8,vr0,vr1,vr16
+ lvx vr0,r4,r9
+ vperm vr9,vr1,vr0,vr16
+ addi r4,r4,32
+ stvx vr8,r0,r3
+ stvx vr9,r3,r9
+ addi r3,r3,32
+
+6: bf cr7*4+1,7f
+ lvx vr3,r0,r4
+ vperm vr8,vr0,vr3,vr16
+ lvx vr2,r4,r9
+ vperm vr9,vr3,vr2,vr16
+ lvx vr1,r4,r10
+ vperm vr10,vr2,vr1,vr16
+ lvx vr0,r4,r11
+ vperm vr11,vr1,vr0,vr16
+ addi r4,r4,64
+ stvx vr8,r0,r3
+ stvx vr9,r3,r9
+ stvx vr10,r3,r10
+ stvx vr11,r3,r11
+ addi r3,r3,64
+
+7: sub r5,r5,r6
+ srdi r6,r5,7
+
+ std r14,STK_REG(r14)(r1)
+ std r15,STK_REG(r15)(r1)
+ std r16,STK_REG(r16)(r1)
+
+ li r12,64
+ li r14,80
+ li r15,96
+ li r16,112
+
+ mtctr r6
+
+ /*
+ * Now do cacheline sized loads and stores. By this stage the
+ * cacheline stores are also cacheline aligned.
+ */
+ .align 5
+8:
+ lvx vr7,r0,r4
+ vperm vr8,vr0,vr7,vr16
+ lvx vr6,r4,r9
+ vperm vr9,vr7,vr6,vr16
+ lvx vr5,r4,r10
+ vperm vr10,vr6,vr5,vr16
+ lvx vr4,r4,r11
+ vperm vr11,vr5,vr4,vr16
+ lvx vr3,r4,r12
+ vperm vr12,vr4,vr3,vr16
+ lvx vr2,r4,r14
+ vperm vr13,vr3,vr2,vr16
+ lvx vr1,r4,r15
+ vperm vr14,vr2,vr1,vr16
+ lvx vr0,r4,r16
+ vperm vr15,vr1,vr0,vr16
+ addi r4,r4,128
+ stvx vr8,r0,r3
+ stvx vr9,r3,r9
+ stvx vr10,r3,r10
+ stvx vr11,r3,r11
+ stvx vr12,r3,r12
+ stvx vr13,r3,r14
+ stvx vr14,r3,r15
+ stvx vr15,r3,r16
+ addi r3,r3,128
+ bdnz 8b
+
+ ld r14,STK_REG(r14)(r1)
+ ld r15,STK_REG(r15)(r1)
+ ld r16,STK_REG(r16)(r1)
+
+ /* Up to 127B to go */
+ clrldi r5,r5,(64-7)
+ srdi r6,r5,4
+ mtocrf 0x01,r6
+
+ bf cr7*4+1,9f
+ lvx vr3,r0,r4
+ vperm vr8,vr0,vr3,vr16
+ lvx vr2,r4,r9
+ vperm vr9,vr3,vr2,vr16
+ lvx vr1,r4,r10
+ vperm vr10,vr2,vr1,vr16
+ lvx vr0,r4,r11
+ vperm vr11,vr1,vr0,vr16
+ addi r4,r4,64
+ stvx vr8,r0,r3
+ stvx vr9,r3,r9
+ stvx vr10,r3,r10
+ stvx vr11,r3,r11
+ addi r3,r3,64
+
+9: bf cr7*4+2,10f
+ lvx vr1,r0,r4
+ vperm vr8,vr0,vr1,vr16
+ lvx vr0,r4,r9
+ vperm vr9,vr1,vr0,vr16
+ addi r4,r4,32
+ stvx vr8,r0,r3
+ stvx vr9,r3,r9
+ addi r3,r3,32
+
+10: bf cr7*4+3,11f
+ lvx vr1,r0,r4
+ vperm vr8,vr0,vr1,vr16
+ addi r4,r4,16
+ stvx vr8,r0,r3
+ addi r3,r3,16
+
+ /* Up to 15B to go */
+11: clrldi r5,r5,(64-4)
+ addi r4,r4,-16 /* Unwind the +16 load offset */
+ mtocrf 0x01,r5
+ bf cr7*4+0,12f
+ lwz r0,0(r4) /* Less chance of a reject with word ops */
+ lwz r6,4(r4)
+ addi r4,r4,8
+ stw r0,0(r3)
+ stw r6,4(r3)
+ addi r3,r3,8
+
+12: bf cr7*4+1,13f
+ lwz r0,0(r4)
+ addi r4,r4,4
+ stw r0,0(r3)
+ addi r3,r3,4
+
+13: bf cr7*4+2,14f
+ lhz r0,0(r4)
+ addi r4,r4,2
+ sth r0,0(r3)
+ addi r3,r3,2
+
+14: bf cr7*4+3,15f
+ lbz r0,0(r4)
+ stb r0,0(r3)
+
+15: addi r1,r1,STACKFRAMESIZE
+ ld r3,48(r1)
+ b .exit_vmx_copy /* tail call optimise */
+#endif /* CONFiG_ALTIVEC */
diff --git a/arch/powerpc/lib/string.S b/arch/powerpc/lib/string.S
index 093d6316435..1b5a0a09d60 100644
--- a/arch/powerpc/lib/string.S
+++ b/arch/powerpc/lib/string.S
@@ -119,6 +119,7 @@ _GLOBAL(memchr)
2: li r3,0
blr
+#ifdef CONFIG_PPC32
_GLOBAL(__clear_user)
addi r6,r3,-4
li r3,0
@@ -160,3 +161,4 @@ _GLOBAL(__clear_user)
PPC_LONG 1b,91b
PPC_LONG 8b,92b
.text
+#endif
diff --git a/arch/powerpc/lib/string_64.S b/arch/powerpc/lib/string_64.S
new file mode 100644
index 00000000000..3b1e48049fa
--- /dev/null
+++ b/arch/powerpc/lib/string_64.S
@@ -0,0 +1,202 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2012
+ *
+ * Author: Anton Blanchard <anton@au.ibm.com>
+ */
+
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+
+ .section ".toc","aw"
+PPC64_CACHES:
+ .tc ppc64_caches[TC],ppc64_caches
+ .section ".text"
+
+/**
+ * __clear_user: - Zero a block of memory in user space, with less checking.
+ * @to: Destination address, in user space.
+ * @n: Number of bytes to zero.
+ *
+ * Zero a block of memory in user space. Caller must check
+ * the specified block with access_ok() before calling this function.
+ *
+ * Returns number of bytes that could not be cleared.
+ * On success, this will be zero.
+ */
+
+ .macro err1
+100:
+ .section __ex_table,"a"
+ .align 3
+ .llong 100b,.Ldo_err1
+ .previous
+ .endm
+
+ .macro err2
+200:
+ .section __ex_table,"a"
+ .align 3
+ .llong 200b,.Ldo_err2
+ .previous
+ .endm
+
+ .macro err3
+300:
+ .section __ex_table,"a"
+ .align 3
+ .llong 300b,.Ldo_err3
+ .previous
+ .endm
+
+.Ldo_err1:
+ mr r3,r8
+
+.Ldo_err2:
+ mtctr r4
+1:
+err3; stb r0,0(r3)
+ addi r3,r3,1
+ addi r4,r4,-1
+ bdnz 1b
+
+.Ldo_err3:
+ mr r3,r4
+ blr
+
+_GLOBAL(__clear_user)
+ cmpdi r4,32
+ neg r6,r3
+ li r0,0
+ blt .Lshort_clear
+ mr r8,r3
+ mtocrf 0x01,r6
+ clrldi r6,r6,(64-3)
+
+ /* Get the destination 8 byte aligned */
+ bf cr7*4+3,1f
+err1; stb r0,0(r3)
+ addi r3,r3,1
+
+1: bf cr7*4+2,2f
+err1; sth r0,0(r3)
+ addi r3,r3,2
+
+2: bf cr7*4+1,3f
+err1; stw r0,0(r3)
+ addi r3,r3,4
+
+3: sub r4,r4,r6
+
+ cmpdi r4,32
+ cmpdi cr1,r4,512
+ blt .Lshort_clear
+ bgt cr1,.Llong_clear
+
+.Lmedium_clear:
+ srdi r6,r4,5
+ mtctr r6
+
+ /* Do 32 byte chunks */
+4:
+err2; std r0,0(r3)
+err2; std r0,8(r3)
+err2; std r0,16(r3)
+err2; std r0,24(r3)
+ addi r3,r3,32
+ addi r4,r4,-32
+ bdnz 4b
+
+.Lshort_clear:
+ /* up to 31 bytes to go */
+ cmpdi r4,16
+ blt 6f
+err2; std r0,0(r3)
+err2; std r0,8(r3)
+ addi r3,r3,16
+ addi r4,r4,-16
+
+ /* Up to 15 bytes to go */
+6: mr r8,r3
+ clrldi r4,r4,(64-4)
+ mtocrf 0x01,r4
+ bf cr7*4+0,7f
+err1; std r0,0(r3)
+ addi r3,r3,8
+
+7: bf cr7*4+1,8f
+err1; stw r0,0(r3)
+ addi r3,r3,4
+
+8: bf cr7*4+2,9f
+err1; sth r0,0(r3)
+ addi r3,r3,2
+
+9: bf cr7*4+3,10f
+err1; stb r0,0(r3)
+
+10: li r3,0
+ blr
+
+.Llong_clear:
+ ld r5,PPC64_CACHES@toc(r2)
+
+ bf cr7*4+0,11f
+err2; std r0,0(r3)
+ addi r3,r3,8
+ addi r4,r4,-8
+
+ /* Destination is 16 byte aligned, need to get it cacheline aligned */
+11: lwz r7,DCACHEL1LOGLINESIZE(r5)
+ lwz r9,DCACHEL1LINESIZE(r5)
+
+ /*
+ * With worst case alignment the long clear loop takes a minimum
+ * of 1 byte less than 2 cachelines.
+ */
+ sldi r10,r9,2
+ cmpd r4,r10
+ blt .Lmedium_clear
+
+ neg r6,r3
+ addi r10,r9,-1
+ and. r5,r6,r10
+ beq 13f
+
+ srdi r6,r5,4
+ mtctr r6
+ mr r8,r3
+12:
+err1; std r0,0(r3)
+err1; std r0,8(r3)
+ addi r3,r3,16
+ bdnz 12b
+
+ sub r4,r4,r5
+
+13: srd r6,r4,r7
+ mtctr r6
+ mr r8,r3
+14:
+err1; dcbz r0,r3
+ add r3,r3,r9
+ bdnz 14b
+
+ and r4,r4,r10
+
+ cmpdi r4,32
+ blt .Lshort_clear
+ b .Lmedium_clear
diff --git a/arch/powerpc/lib/copyuser_power7_vmx.c b/arch/powerpc/lib/vmx-helper.c
index bf2654f2b68..3cf529ceec5 100644
--- a/arch/powerpc/lib/copyuser_power7_vmx.c
+++ b/arch/powerpc/lib/vmx-helper.c
@@ -22,7 +22,7 @@
#include <linux/hardirq.h>
#include <asm/switch_to.h>
-int enter_vmx_copy(void)
+int enter_vmx_usercopy(void)
{
if (in_interrupt())
return 0;
@@ -44,8 +44,31 @@ int enter_vmx_copy(void)
* This function must return 0 because we tail call optimise when calling
* from __copy_tofrom_user_power7 which returns 0 on success.
*/
-int exit_vmx_copy(void)
+int exit_vmx_usercopy(void)
{
pagefault_enable();
return 0;
}
+
+int enter_vmx_copy(void)
+{
+ if (in_interrupt())
+ return 0;
+
+ preempt_disable();
+
+ enable_kernel_altivec();
+
+ return 1;
+}
+
+/*
+ * All calls to this function will be optimised into tail calls. We are
+ * passed a pointer to the destination which we return as required by a
+ * memcpy implementation.
+ */
+void *exit_vmx_copy(void *dest)
+{
+ preempt_enable();
+ return dest;
+}
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 1e95556dc69..39b159751c3 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -340,6 +340,8 @@ static int __init find_min_common_depth(void)
dbg("Using form 1 affinity\n");
form1_affinity = 1;
}
+
+ of_node_put(chosen);
}
}
diff --git a/arch/powerpc/platforms/44x/currituck.c b/arch/powerpc/platforms/44x/currituck.c
index 583e67fee37..9f6c33d63a4 100644
--- a/arch/powerpc/platforms/44x/currituck.c
+++ b/arch/powerpc/platforms/44x/currituck.c
@@ -160,7 +160,7 @@ static void __init ppc47x_setup_arch(void)
/* No need to check the DMA config as we /know/ our windows are all of
* RAM. Lets hope that doesn't change */
#ifdef CONFIG_SWIOTLB
- if (memblock_end_of_DRAM() > 0xffffffff) {
+ if ((memblock_end_of_DRAM() - 1) > 0xffffffff) {
ppc_swiotlb_enable = 1;
set_pci_dma_ops(&swiotlb_dma_ops);
ppc_md.pci_dma_dev_setup = pci_dma_dev_setup_swiotlb;
diff --git a/arch/powerpc/platforms/85xx/corenet_ds.c b/arch/powerpc/platforms/85xx/corenet_ds.c
index dd3617c531d..925b0287423 100644
--- a/arch/powerpc/platforms/85xx/corenet_ds.c
+++ b/arch/powerpc/platforms/85xx/corenet_ds.c
@@ -77,7 +77,7 @@ void __init corenet_ds_setup_arch(void)
#endif
#ifdef CONFIG_SWIOTLB
- if (memblock_end_of_DRAM() > max) {
+ if ((memblock_end_of_DRAM() - 1) > max) {
ppc_swiotlb_enable = 1;
set_pci_dma_ops(&swiotlb_dma_ops);
ppc_md.pci_dma_dev_setup = pci_dma_dev_setup_swiotlb;
diff --git a/arch/powerpc/platforms/85xx/ge_imp3a.c b/arch/powerpc/platforms/85xx/ge_imp3a.c
index 18014629416..b6a728b0a8c 100644
--- a/arch/powerpc/platforms/85xx/ge_imp3a.c
+++ b/arch/powerpc/platforms/85xx/ge_imp3a.c
@@ -125,7 +125,7 @@ static void __init ge_imp3a_setup_arch(void)
mpc85xx_smp_init();
#ifdef CONFIG_SWIOTLB
- if (memblock_end_of_DRAM() > max) {
+ if ((memblock_end_of_DRAM() - 1) > max) {
ppc_swiotlb_enable = 1;
set_pci_dma_ops(&swiotlb_dma_ops);
ppc_md.pci_dma_dev_setup = pci_dma_dev_setup_swiotlb;
diff --git a/arch/powerpc/platforms/85xx/mpc8536_ds.c b/arch/powerpc/platforms/85xx/mpc8536_ds.c
index 585bd22b140..767c7cf18a9 100644
--- a/arch/powerpc/platforms/85xx/mpc8536_ds.c
+++ b/arch/powerpc/platforms/85xx/mpc8536_ds.c
@@ -75,7 +75,7 @@ static void __init mpc8536_ds_setup_arch(void)
#endif
#ifdef CONFIG_SWIOTLB
- if (memblock_end_of_DRAM() > max) {
+ if ((memblock_end_of_DRAM() - 1) > max) {
ppc_swiotlb_enable = 1;
set_pci_dma_ops(&swiotlb_dma_ops);
ppc_md.pci_dma_dev_setup = pci_dma_dev_setup_swiotlb;
diff --git a/arch/powerpc/platforms/85xx/mpc85xx_ds.c b/arch/powerpc/platforms/85xx/mpc85xx_ds.c
index 1fd91e9e0ff..d30f6c47917 100644
--- a/arch/powerpc/platforms/85xx/mpc85xx_ds.c
+++ b/arch/powerpc/platforms/85xx/mpc85xx_ds.c
@@ -173,7 +173,7 @@ static void __init mpc85xx_ds_setup_arch(void)
mpc85xx_smp_init();
#ifdef CONFIG_SWIOTLB
- if (memblock_end_of_DRAM() > max) {
+ if ((memblock_end_of_DRAM() - 1) > max) {
ppc_swiotlb_enable = 1;
set_pci_dma_ops(&swiotlb_dma_ops);
ppc_md.pci_dma_dev_setup = pci_dma_dev_setup_swiotlb;
diff --git a/arch/powerpc/platforms/85xx/mpc85xx_mds.c b/arch/powerpc/platforms/85xx/mpc85xx_mds.c
index d208ebccb91..8e4b094c553 100644
--- a/arch/powerpc/platforms/85xx/mpc85xx_mds.c
+++ b/arch/powerpc/platforms/85xx/mpc85xx_mds.c
@@ -359,7 +359,7 @@ static void __init mpc85xx_mds_setup_arch(void)
mpc85xx_mds_qe_init();
#ifdef CONFIG_SWIOTLB
- if (memblock_end_of_DRAM() > max) {
+ if ((memblock_end_of_DRAM() - 1) > max) {
ppc_swiotlb_enable = 1;
set_pci_dma_ops(&swiotlb_dma_ops);
ppc_md.pci_dma_dev_setup = pci_dma_dev_setup_swiotlb;
diff --git a/arch/powerpc/platforms/85xx/p1022_ds.c b/arch/powerpc/platforms/85xx/p1022_ds.c
index f700c81a132..74e310b4b46 100644
--- a/arch/powerpc/platforms/85xx/p1022_ds.c
+++ b/arch/powerpc/platforms/85xx/p1022_ds.c
@@ -450,7 +450,7 @@ static void __init p1022_ds_setup_arch(void)
mpc85xx_smp_init();
#ifdef CONFIG_SWIOTLB
- if (memblock_end_of_DRAM() > max) {
+ if ((memblock_end_of_DRAM() - 1) > max) {
ppc_swiotlb_enable = 1;
set_pci_dma_ops(&swiotlb_dma_ops);
ppc_md.pci_dma_dev_setup = pci_dma_dev_setup_swiotlb;
diff --git a/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c b/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c
index 3755e61d7ec..817245bc021 100644
--- a/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c
+++ b/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c
@@ -102,7 +102,7 @@ mpc86xx_hpcn_setup_arch(void)
#endif
#ifdef CONFIG_SWIOTLB
- if (memblock_end_of_DRAM() > max) {
+ if ((memblock_end_of_DRAM() - 1) > max) {
ppc_swiotlb_enable = 1;
set_pci_dma_ops(&swiotlb_dma_ops);
ppc_md.pci_dma_dev_setup = pci_dma_dev_setup_swiotlb;
diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c
index b9f509a34c0..c264969c931 100644
--- a/arch/powerpc/platforms/cell/iommu.c
+++ b/arch/powerpc/platforms/cell/iommu.c
@@ -518,7 +518,6 @@ cell_iommu_setup_window(struct cbe_iommu *iommu, struct device_node *np,
__set_bit(0, window->table.it_map);
tce_build_cell(&window->table, window->table.it_offset, 1,
(unsigned long)iommu->pad_page, DMA_TO_DEVICE, NULL);
- window->table.it_hint = window->table.it_blocksize;
return window;
}
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index 2d311c0caf8..07c09cbbfb1 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -192,12 +192,15 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
long l, limit;
long tcenum_start = tcenum, npages_start = npages;
int ret = 0;
+ unsigned long flags;
if (npages == 1) {
return tce_build_pSeriesLP(tbl, tcenum, npages, uaddr,
direction, attrs);
}
+ local_irq_save(flags); /* to protect tcep and the page behind it */
+
tcep = __get_cpu_var(tce_page);
/* This is safe to do since interrupts are off when we're called
@@ -207,6 +210,7 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
tcep = (u64 *)__get_free_page(GFP_ATOMIC);
/* If allocation fails, fall back to the loop implementation */
if (!tcep) {
+ local_irq_restore(flags);
return tce_build_pSeriesLP(tbl, tcenum, npages, uaddr,
direction, attrs);
}
@@ -240,6 +244,8 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
tcenum += limit;
} while (npages > 0 && !rc);
+ local_irq_restore(flags);
+
if (unlikely(rc == H_NOT_ENOUGH_RESOURCES)) {
ret = (int)rc;
tce_freemulti_pSeriesLP(tbl, tcenum_start,
@@ -707,6 +713,21 @@ static int __init disable_ddw_setup(char *str)
early_param("disable_ddw", disable_ddw_setup);
+static inline void __remove_ddw(struct device_node *np, const u32 *ddw_avail, u64 liobn)
+{
+ int ret;
+
+ ret = rtas_call(ddw_avail[2], 1, 1, NULL, liobn);
+ if (ret)
+ pr_warning("%s: failed to remove DMA window: rtas returned "
+ "%d to ibm,remove-pe-dma-window(%x) %llx\n",
+ np->full_name, ret, ddw_avail[2], liobn);
+ else
+ pr_debug("%s: successfully removed DMA window: rtas returned "
+ "%d to ibm,remove-pe-dma-window(%x) %llx\n",
+ np->full_name, ret, ddw_avail[2], liobn);
+}
+
static void remove_ddw(struct device_node *np)
{
struct dynamic_dma_window_prop *dwp;
@@ -736,15 +757,7 @@ static void remove_ddw(struct device_node *np)
pr_debug("%s successfully cleared tces in window.\n",
np->full_name);
- ret = rtas_call(ddw_avail[2], 1, 1, NULL, liobn);
- if (ret)
- pr_warning("%s: failed to remove direct window: rtas returned "
- "%d to ibm,remove-pe-dma-window(%x) %llx\n",
- np->full_name, ret, ddw_avail[2], liobn);
- else
- pr_debug("%s: successfully removed direct window: rtas returned "
- "%d to ibm,remove-pe-dma-window(%x) %llx\n",
- np->full_name, ret, ddw_avail[2], liobn);
+ __remove_ddw(np, ddw_avail, liobn);
delprop:
ret = prom_remove_property(np, win64);
@@ -869,6 +882,35 @@ static int create_ddw(struct pci_dev *dev, const u32 *ddw_avail,
return ret;
}
+static void restore_default_window(struct pci_dev *dev,
+ u32 ddw_restore_token, unsigned long liobn)
+{
+ struct eeh_dev *edev;
+ u32 cfg_addr;
+ u64 buid;
+ int ret;
+
+ /*
+ * Get the config address and phb buid of the PE window.
+ * Rely on eeh to retrieve this for us.
+ * Retrieve them from the pci device, not the node with the
+ * dma-window property
+ */
+ edev = pci_dev_to_eeh_dev(dev);
+ cfg_addr = edev->config_addr;
+ if (edev->pe_config_addr)
+ cfg_addr = edev->pe_config_addr;
+ buid = edev->phb->buid;
+
+ do {
+ ret = rtas_call(ddw_restore_token, 3, 1, NULL, cfg_addr,
+ BUID_HI(buid), BUID_LO(buid));
+ } while (rtas_busy_delay(ret));
+ dev_info(&dev->dev,
+ "ibm,reset-pe-dma-windows(%x) %x %x %x returned %d\n",
+ ddw_restore_token, cfg_addr, BUID_HI(buid), BUID_LO(buid), ret);
+}
+
/*
* If the PE supports dynamic dma windows, and there is space for a table
* that can map all pages in a linear offset, then setup such a table,
@@ -889,9 +931,13 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
u64 dma_addr, max_addr;
struct device_node *dn;
const u32 *uninitialized_var(ddw_avail);
+ const u32 *uninitialized_var(ddw_extensions);
+ u32 ddw_restore_token = 0;
struct direct_window *window;
struct property *win64;
struct dynamic_dma_window_prop *ddwprop;
+ const void *dma_window = NULL;
+ unsigned long liobn, offset, size;
mutex_lock(&direct_window_init_mutex);
@@ -911,7 +957,40 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
if (!ddw_avail || len < 3 * sizeof(u32))
goto out_unlock;
- /*
+ /*
+ * the extensions property is only required to exist in certain
+ * levels of firmware and later
+ * the ibm,ddw-extensions property is a list with the first
+ * element containing the number of extensions and each
+ * subsequent entry is a value corresponding to that extension
+ */
+ ddw_extensions = of_get_property(pdn, "ibm,ddw-extensions", &len);
+ if (ddw_extensions) {
+ /*
+ * each new defined extension length should be added to
+ * the top of the switch so the "earlier" entries also
+ * get picked up
+ */
+ switch (ddw_extensions[0]) {
+ /* ibm,reset-pe-dma-windows */
+ case 1:
+ ddw_restore_token = ddw_extensions[1];
+ break;
+ }
+ }
+
+ /*
+ * Only remove the existing DMA window if we can restore back to
+ * the default state. Removing the existing window maximizes the
+ * resources available to firmware for dynamic window creation.
+ */
+ if (ddw_restore_token) {
+ dma_window = of_get_property(pdn, "ibm,dma-window", NULL);
+ of_parse_dma_window(pdn, dma_window, &liobn, &offset, &size);
+ __remove_ddw(pdn, ddw_avail, liobn);
+ }
+
+ /*
* Query if there is a second window of size to map the
* whole partition. Query returns number of windows, largest
* block assigned to PE (partition endpoint), and two bitmasks
@@ -920,7 +999,7 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
dn = pci_device_to_OF_node(dev);
ret = query_ddw(dev, ddw_avail, &query);
if (ret != 0)
- goto out_unlock;
+ goto out_restore_window;
if (query.windows_available == 0) {
/*
@@ -929,7 +1008,7 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
* trading in for a larger page size.
*/
dev_dbg(&dev->dev, "no free dynamic windows");
- goto out_unlock;
+ goto out_restore_window;
}
if (query.page_size & 4) {
page_shift = 24; /* 16MB */
@@ -940,7 +1019,7 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
} else {
dev_dbg(&dev->dev, "no supported direct page size in mask %x",
query.page_size);
- goto out_unlock;
+ goto out_restore_window;
}
/* verify the window * number of ptes will map the partition */
/* check largest block * page size > max memory hotplug addr */
@@ -949,14 +1028,14 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
dev_dbg(&dev->dev, "can't map partiton max 0x%llx with %u "
"%llu-sized pages\n", max_addr, query.largest_available_block,
1ULL << page_shift);
- goto out_unlock;
+ goto out_restore_window;
}
len = order_base_2(max_addr);
win64 = kzalloc(sizeof(struct property), GFP_KERNEL);
if (!win64) {
dev_info(&dev->dev,
"couldn't allocate property for 64bit dma window\n");
- goto out_unlock;
+ goto out_restore_window;
}
win64->name = kstrdup(DIRECT64_PROPNAME, GFP_KERNEL);
win64->value = ddwprop = kmalloc(sizeof(*ddwprop), GFP_KERNEL);
@@ -1018,6 +1097,10 @@ out_free_prop:
kfree(win64->value);
kfree(win64);
+out_restore_window:
+ if (ddw_restore_token)
+ restore_default_window(dev, ddw_restore_token, liobn);
+
out_unlock:
mutex_unlock(&direct_window_init_mutex);
return dma_addr;
diff --git a/arch/powerpc/platforms/pseries/processor_idle.c b/arch/powerpc/platforms/pseries/processor_idle.c
index c71be66bd5d..7f5668b9416 100644
--- a/arch/powerpc/platforms/pseries/processor_idle.c
+++ b/arch/powerpc/platforms/pseries/processor_idle.c
@@ -11,6 +11,7 @@
#include <linux/moduleparam.h>
#include <linux/cpuidle.h>
#include <linux/cpu.h>
+#include <linux/notifier.h>
#include <asm/paca.h>
#include <asm/reg.h>
@@ -189,17 +190,28 @@ static struct cpuidle_state shared_states[MAX_IDLE_STATE_COUNT] = {
.enter = &shared_cede_loop },
};
-int pseries_notify_cpuidle_add_cpu(int cpu)
+static int pseries_cpuidle_add_cpu_notifier(struct notifier_block *n,
+ unsigned long action, void *hcpu)
{
+ int hotcpu = (unsigned long)hcpu;
struct cpuidle_device *dev =
- per_cpu_ptr(pseries_cpuidle_devices, cpu);
- if (dev && cpuidle_get_driver()) {
- cpuidle_disable_device(dev);
- cpuidle_enable_device(dev);
+ per_cpu_ptr(pseries_cpuidle_devices, hotcpu);
+
+ switch (action & 0xf) {
+ case CPU_ONLINE:
+ if (dev && cpuidle_get_driver()) {
+ cpuidle_disable_device(dev);
+ cpuidle_enable_device(dev);
+ }
+ break;
}
- return 0;
+ return NOTIFY_OK;
}
+static struct notifier_block setup_hotplug_notifier = {
+ .notifier_call = pseries_cpuidle_add_cpu_notifier,
+};
+
/*
* pseries_cpuidle_driver_init()
*/
@@ -324,6 +336,7 @@ static int __init pseries_processor_idle_init(void)
return retval;
}
+ register_cpu_notifier(&setup_hotplug_notifier);
printk(KERN_DEBUG "pseries_idle_driver registered\n");
return 0;
diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c
index e16bb8d4855..71706bc34a0 100644
--- a/arch/powerpc/platforms/pseries/smp.c
+++ b/arch/powerpc/platforms/pseries/smp.c
@@ -147,7 +147,6 @@ static void __devinit smp_xics_setup_cpu(int cpu)
set_cpu_current_state(cpu, CPU_STATE_ONLINE);
set_default_offline_state(cpu);
#endif
- pseries_notify_cpuidle_add_cpu(cpu);
}
static int __devinit smp_pSeries_kick_cpu(int nr)