summaryrefslogtreecommitdiffstats
path: root/arch/powerpc
diff options
context:
space:
mode:
authorDmitry Torokhov <dmitry.torokhov@gmail.com>2012-07-24 22:43:04 -0700
committerDmitry Torokhov <dmitry.torokhov@gmail.com>2012-07-24 22:43:04 -0700
commit314820c9e892d8f41ba4db300ec96770d9c8294b (patch)
tree3d5c59a429239b180c77e57f998a78d3f2b79827 /arch/powerpc
parente76b8ee25e034ab601b525abb95cea14aa167ed3 (diff)
parent07b8481d4aff73d6f451f25e74ea10240ff5131e (diff)
Merge branch 'next' into for-linus
Diffstat (limited to 'arch/powerpc')
-rw-r--r--arch/powerpc/Kconfig26
-rw-r--r--arch/powerpc/Makefile49
-rw-r--r--arch/powerpc/boot/dts/bluestone.dts25
-rw-r--r--arch/powerpc/boot/dts/fsl/pq3-mpic-message-B.dtsi43
-rw-r--r--arch/powerpc/boot/dts/fsl/pq3-mpic.dtsi10
-rw-r--r--arch/powerpc/boot/dts/mpc8569mds.dts1
-rw-r--r--arch/powerpc/configs/86xx/mpc8610_hpcd_defconfig1
-rw-r--r--arch/powerpc/configs/chroma_defconfig2
-rw-r--r--arch/powerpc/configs/g5_defconfig1
-rw-r--r--arch/powerpc/configs/gamecube_defconfig2
-rw-r--r--arch/powerpc/configs/maple_defconfig1
-rw-r--r--arch/powerpc/configs/mpc85xx_defconfig1
-rw-r--r--arch/powerpc/configs/mpc85xx_smp_defconfig1
-rw-r--r--arch/powerpc/configs/pasemi_defconfig1
-rw-r--r--arch/powerpc/configs/ps3_defconfig6
-rw-r--r--arch/powerpc/configs/wii_defconfig2
-rw-r--r--arch/powerpc/include/asm/asm-compat.h11
-rw-r--r--arch/powerpc/include/asm/cputable.h23
-rw-r--r--arch/powerpc/include/asm/cputhreads.h2
-rw-r--r--arch/powerpc/include/asm/dbell.h3
-rw-r--r--arch/powerpc/include/asm/exception-64s.h7
-rw-r--r--arch/powerpc/include/asm/gpio.h57
-rw-r--r--arch/powerpc/include/asm/hvcall.h35
-rw-r--r--arch/powerpc/include/asm/hw_irq.h9
-rw-r--r--arch/powerpc/include/asm/irq.h4
-rw-r--r--arch/powerpc/include/asm/kvm.h1
-rw-r--r--arch/powerpc/include/asm/kvm_asm.h18
-rw-r--r--arch/powerpc/include/asm/kvm_book3s.h10
-rw-r--r--arch/powerpc/include/asm/kvm_book3s_asm.h8
-rw-r--r--arch/powerpc/include/asm/kvm_booke.h3
-rw-r--r--arch/powerpc/include/asm/kvm_booke_hv_asm.h49
-rw-r--r--arch/powerpc/include/asm/kvm_e500.h96
-rw-r--r--arch/powerpc/include/asm/kvm_host.h60
-rw-r--r--arch/powerpc/include/asm/kvm_para.h5
-rw-r--r--arch/powerpc/include/asm/kvm_ppc.h20
-rw-r--r--arch/powerpc/include/asm/lppaca.h196
-rw-r--r--arch/powerpc/include/asm/lv1call.h4
-rw-r--r--arch/powerpc/include/asm/mmu-book3e.h6
-rw-r--r--arch/powerpc/include/asm/mpic.h18
-rw-r--r--arch/powerpc/include/asm/mpic_msgr.h1
-rw-r--r--arch/powerpc/include/asm/pSeries_reconfig.h12
-rw-r--r--arch/powerpc/include/asm/posix_types.h3
-rw-r--r--arch/powerpc/include/asm/ppc_asm.h10
-rw-r--r--arch/powerpc/include/asm/processor.h7
-rw-r--r--arch/powerpc/include/asm/ptrace.h6
-rw-r--r--arch/powerpc/include/asm/reg.h2
-rw-r--r--arch/powerpc/include/asm/reg_booke.h39
-rw-r--r--arch/powerpc/include/asm/stat.h4
-rw-r--r--arch/powerpc/include/asm/switch_to.h7
-rw-r--r--arch/powerpc/include/asm/thread_info.h32
-rw-r--r--arch/powerpc/include/asm/time.h1
-rw-r--r--arch/powerpc/include/asm/topology.h36
-rw-r--r--arch/powerpc/include/asm/uaccess.h41
-rw-r--r--arch/powerpc/include/asm/vio.h46
-rw-r--r--arch/powerpc/include/asm/word-at-a-time.h41
-rw-r--r--arch/powerpc/kernel/Makefile2
-rw-r--r--arch/powerpc/kernel/asm-offsets.c23
-rw-r--r--arch/powerpc/kernel/cpu_setup_fsl_booke.S1
-rw-r--r--arch/powerpc/kernel/entry_64.S183
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S18
-rw-r--r--arch/powerpc/kernel/head_44x.S31
-rw-r--r--arch/powerpc/kernel/head_booke.h69
-rw-r--r--arch/powerpc/kernel/head_fsl_booke.S106
-rw-r--r--arch/powerpc/kernel/idle.c23
-rw-r--r--arch/powerpc/kernel/idle_power7.S7
-rw-r--r--arch/powerpc/kernel/init_task.c29
-rw-r--r--arch/powerpc/kernel/irq.c31
-rw-r--r--arch/powerpc/kernel/machine_kexec.c7
-rw-r--r--arch/powerpc/kernel/misc_32.S203
-rw-r--r--arch/powerpc/kernel/module_32.c11
-rw-r--r--arch/powerpc/kernel/paca.c3
-rw-r--r--arch/powerpc/kernel/ppc_ksyms.c6
-rw-r--r--arch/powerpc/kernel/process.c52
-rw-r--r--arch/powerpc/kernel/prom_init.c21
-rw-r--r--arch/powerpc/kernel/ptrace.c44
-rw-r--r--arch/powerpc/kernel/ptrace32.c32
-rw-r--r--arch/powerpc/kernel/setup_32.c3
-rw-r--r--arch/powerpc/kernel/signal.c38
-rw-r--r--arch/powerpc/kernel/signal.h3
-rw-r--r--arch/powerpc/kernel/signal_32.c21
-rw-r--r--arch/powerpc/kernel/signal_64.c4
-rw-r--r--arch/powerpc/kernel/smp.c76
-rw-r--r--arch/powerpc/kernel/time.c17
-rw-r--r--arch/powerpc/kernel/traps.c10
-rw-r--r--arch/powerpc/kernel/vector.S10
-rw-r--r--arch/powerpc/kernel/vio.c273
-rw-r--r--arch/powerpc/kvm/44x.c12
-rw-r--r--arch/powerpc/kvm/44x_emulate.c51
-rw-r--r--arch/powerpc/kvm/Kconfig28
-rw-r--r--arch/powerpc/kvm/Makefile17
-rw-r--r--arch/powerpc/kvm/book3s.c7
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_host.c13
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_hv.c53
-rw-r--r--arch/powerpc/kvm/book3s_64_slb.S2
-rw-r--r--arch/powerpc/kvm/book3s_64_vio.c150
-rw-r--r--arch/powerpc/kvm/book3s_64_vio_hv.c3
-rw-r--r--arch/powerpc/kvm/book3s_emulate.c106
-rw-r--r--arch/powerpc/kvm/book3s_hv.c519
-rw-r--r--arch/powerpc/kvm/book3s_hv_interrupts.S9
-rw-r--r--arch/powerpc/kvm/book3s_hv_rm_mmu.c1
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S185
-rw-r--r--arch/powerpc/kvm/book3s_pr.c59
-rw-r--r--arch/powerpc/kvm/book3s_pr_papr.c101
-rw-r--r--arch/powerpc/kvm/book3s_segment.S55
-rw-r--r--arch/powerpc/kvm/booke.c471
-rw-r--r--arch/powerpc/kvm/booke.h62
-rw-r--r--arch/powerpc/kvm/booke_emulate.c118
-rw-r--r--arch/powerpc/kvm/booke_interrupts.S8
-rw-r--r--arch/powerpc/kvm/bookehv_interrupts.S597
-rw-r--r--arch/powerpc/kvm/e500.c372
-rw-r--r--arch/powerpc/kvm/e500.h306
-rw-r--r--arch/powerpc/kvm/e500_emulate.c210
-rw-r--r--arch/powerpc/kvm/e500_tlb.c666
-rw-r--r--arch/powerpc/kvm/e500_tlb.h174
-rw-r--r--arch/powerpc/kvm/e500mc.c342
-rw-r--r--arch/powerpc/kvm/emulate.c197
-rw-r--r--arch/powerpc/kvm/powerpc.c94
-rw-r--r--arch/powerpc/kvm/timing.h6
-rw-r--r--arch/powerpc/lib/copyuser_64.S6
-rw-r--r--arch/powerpc/lib/mem_64.S6
-rw-r--r--arch/powerpc/lib/memcpy_64.S6
-rw-r--r--arch/powerpc/lib/string.S45
-rw-r--r--arch/powerpc/mm/hugetlbpage.c3
-rw-r--r--arch/powerpc/mm/mmu_context_nohash.c11
-rw-r--r--arch/powerpc/mm/numa.c2
-rw-r--r--arch/powerpc/net/bpf_jit.h8
-rw-r--r--arch/powerpc/net/bpf_jit_64.S110
-rw-r--r--arch/powerpc/net/bpf_jit_comp.c26
-rw-r--r--arch/powerpc/perf/core-book3s.c3
-rw-r--r--arch/powerpc/perf/core-fsl-emb.c3
-rw-r--r--arch/powerpc/platforms/44x/Kconfig2
-rw-r--r--arch/powerpc/platforms/85xx/common.c6
-rw-r--r--arch/powerpc/platforms/85xx/mpc85xx_mds.c11
-rw-r--r--arch/powerpc/platforms/85xx/p1022_ds.c13
-rw-r--r--arch/powerpc/platforms/Kconfig.cputype43
-rw-r--r--arch/powerpc/platforms/cell/axon_msi.c8
-rw-r--r--arch/powerpc/platforms/cell/beat_interrupt.c2
-rw-r--r--arch/powerpc/platforms/cell/spufs/inode.c2
-rw-r--r--arch/powerpc/platforms/powermac/low_i2c.c10
-rw-r--r--arch/powerpc/platforms/powermac/pic.c6
-rw-r--r--arch/powerpc/platforms/ps3/Kconfig22
-rw-r--r--arch/powerpc/platforms/ps3/mm.c77
-rw-r--r--arch/powerpc/platforms/ps3/platform.h16
-rw-r--r--arch/powerpc/platforms/ps3/repository.c198
-rw-r--r--arch/powerpc/platforms/pseries/Kconfig6
-rw-r--r--arch/powerpc/platforms/pseries/eeh.c4
-rw-r--r--arch/powerpc/platforms/pseries/iommu.c4
-rw-r--r--arch/powerpc/platforms/pseries/nvram.c61
-rw-r--r--arch/powerpc/platforms/pseries/plpar_wrappers.h4
-rw-r--r--arch/powerpc/platforms/pseries/processor_idle.c2
-rw-r--r--arch/powerpc/platforms/pseries/reconfig.c7
-rw-r--r--arch/powerpc/sysdev/cpm2_pic.c3
-rw-r--r--arch/powerpc/sysdev/mpc8xx_pic.c61
-rw-r--r--arch/powerpc/sysdev/mpic.c54
-rw-r--r--arch/powerpc/sysdev/mpic_msgr.c12
-rw-r--r--arch/powerpc/sysdev/ppc4xx_msi.c42
-rw-r--r--arch/powerpc/sysdev/scom.c1
-rw-r--r--arch/powerpc/sysdev/xics/xics-common.c7
158 files changed, 5644 insertions, 2725 deletions
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index feab3bad6d0..050cb371a69 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -27,15 +27,6 @@ config MMU
bool
default y
-config GENERIC_CMOS_UPDATE
- def_bool y
-
-config GENERIC_TIME_VSYSCALL
- def_bool y
-
-config GENERIC_CLOCKEVENTS
- def_bool y
-
config HAVE_SETUP_PER_CPU_AREA
def_bool PPC64
@@ -87,10 +78,6 @@ config ARCH_HAS_ILOG2_U64
bool
default y if 64BIT
-config ARCH_HAS_CPU_IDLE_WAIT
- bool
- default y
-
config GENERIC_HWEIGHT
bool
default y
@@ -141,9 +128,15 @@ config PPC
select IRQ_FORCED_THREADING
select HAVE_RCU_TABLE_FREE if SMP
select HAVE_SYSCALL_TRACEPOINTS
- select HAVE_BPF_JIT if (PPC64 && NET)
+ select HAVE_BPF_JIT if PPC64
select HAVE_ARCH_JUMP_LABEL
select ARCH_HAVE_NMI_SAFE_CMPXCHG
+ select GENERIC_SMP_IDLE_THREAD
+ select GENERIC_CMOS_UPDATE
+ select GENERIC_TIME_VSYSCALL
+ select GENERIC_CLOCKEVENTS
+ select GENERIC_STRNCPY_FROM_USER
+ select GENERIC_STRNLEN_USER
config EARLY_PRINTK
bool
@@ -284,7 +277,6 @@ config HIGHMEM
bool "High memory support"
depends on PPC32
-source kernel/time/Kconfig
source kernel/Kconfig.hz
source kernel/Kconfig.preempt
source "fs/Kconfig.binfmt"
@@ -353,7 +345,7 @@ config ARCH_ENABLE_MEMORY_HOTREMOVE
config KEXEC
bool "kexec system call (EXPERIMENTAL)"
- depends on (PPC_BOOK3S || FSL_BOOKE || (44x && !SMP && !PPC_47x)) && EXPERIMENTAL
+ depends on (PPC_BOOK3S || FSL_BOOKE || (44x && !SMP)) && EXPERIMENTAL
help
kexec is a system call that implements the ability to shutdown your
current kernel, and to start another kernel. It is like a reboot
@@ -370,7 +362,7 @@ config KEXEC
config CRASH_DUMP
bool "Build a kdump crash kernel"
- depends on PPC64 || 6xx || FSL_BOOKE || (44x && !SMP && !PPC_47x)
+ depends on PPC64 || 6xx || FSL_BOOKE || (44x && !SMP)
select RELOCATABLE if PPC64 || 44x
select DYNAMIC_MEMSTART if FSL_BOOKE
help
diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index 6524c6e2189..950d1f7a5a3 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -69,6 +69,16 @@ LDFLAGS_vmlinux := $(LDFLAGS_vmlinux-y)
CFLAGS-$(CONFIG_PPC64) := -mminimal-toc -mtraceback=no -mcall-aixdesc
CFLAGS-$(CONFIG_PPC32) := -ffixed-r2 -mmultiple
+
+CFLAGS-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=power7,-mtune=power4)
+CFLAGS-$(CONFIG_CELL_CPU) += $(call cc-option,-mcpu=cell)
+CFLAGS-$(CONFIG_POWER4_CPU) += $(call cc-option,-mcpu=power4)
+CFLAGS-$(CONFIG_POWER5_CPU) += $(call cc-option,-mcpu=power5)
+CFLAGS-$(CONFIG_POWER6_CPU) += $(call cc-option,-mcpu=power6)
+CFLAGS-$(CONFIG_POWER7_CPU) += $(call cc-option,-mcpu=power7)
+
+CFLAGS-$(CONFIG_TUNE_CELL) += $(call cc-option,-mtune=cell)
+
KBUILD_CPPFLAGS += -Iarch/$(ARCH)
KBUILD_AFLAGS += -Iarch/$(ARCH)
KBUILD_CFLAGS += -msoft-float -pipe -Iarch/$(ARCH) $(CFLAGS-y)
@@ -76,32 +86,11 @@ CPP = $(CC) -E $(KBUILD_CFLAGS)
CHECKFLAGS += -m$(CONFIG_WORD_SIZE) -D__powerpc__ -D__powerpc$(CONFIG_WORD_SIZE)__
-ifeq ($(CONFIG_PPC64),y)
-GCC_BROKEN_VEC := $(call cc-ifversion, -lt, 0400, y)
-
-ifeq ($(CONFIG_POWER4_ONLY),y)
-ifeq ($(CONFIG_ALTIVEC),y)
-ifeq ($(GCC_BROKEN_VEC),y)
- KBUILD_CFLAGS += $(call cc-option,-mcpu=970)
-else
- KBUILD_CFLAGS += $(call cc-option,-mcpu=power4)
-endif
-else
- KBUILD_CFLAGS += $(call cc-option,-mcpu=power4)
-endif
-else
- KBUILD_CFLAGS += $(call cc-option,-mtune=power4)
-endif
-endif
-
KBUILD_LDFLAGS_MODULE += arch/powerpc/lib/crtsavres.o
-ifeq ($(CONFIG_TUNE_CELL),y)
- KBUILD_CFLAGS += $(call cc-option,-mtune=cell)
-endif
-
-# No AltiVec instruction when building kernel
+# No AltiVec or VSX instructions when building kernel
KBUILD_CFLAGS += $(call cc-option,-mno-altivec)
+KBUILD_CFLAGS += $(call cc-option,-mno-vsx)
# No SPE instruction when building kernel
# (We use all available options to help semi-broken compilers)
@@ -160,6 +149,7 @@ core-$(CONFIG_KVM) += arch/powerpc/kvm/
core-$(CONFIG_PERF_EVENTS) += arch/powerpc/perf/
drivers-$(CONFIG_OPROFILE) += arch/powerpc/oprofile/
+drivers-$(CONFIG_CRYPTO_DEV_NX) += drivers/crypto/nx/
# Default to zImage, override when needed
all: zImage
@@ -234,10 +224,11 @@ archprepare: checkbin
# Use the file '.tmp_gas_check' for binutils tests, as gas won't output
# to stdout and these checks are run even on install targets.
TOUT := .tmp_gas_check
-# Ensure this is binutils 2.12.1 (or 2.12.90.0.7) or later for altivec
-# instructions.
-# gcc-3.4 and binutils-2.14 are a fatal combination.
+# Check gcc and binutils versions:
+# - gcc-3.4 and binutils-2.14 are a fatal combination
+# - Require gcc 4.0 or above on 64-bit
+# - gcc-4.2.0 has issues compiling modules on 64-bit
checkbin:
@if test "$(call cc-version)" = "0304" ; then \
if ! /bin/echo mftb 5 | $(AS) -v -mppc -many -o $(TOUT) >/dev/null 2>&1 ; then \
@@ -247,6 +238,12 @@ checkbin:
false; \
fi ; \
fi
+ @if test "$(call cc-version)" -lt "0400" \
+ && test "x${CONFIG_PPC64}" = "xy" ; then \
+ echo -n "Sorry, GCC v4.0 or above is required to build " ; \
+ echo "the 64-bit powerpc kernel." ; \
+ false ; \
+ fi
@if test "$(call cc-fullversion)" = "040200" \
&& test "x${CONFIG_MODULES}${CONFIG_PPC64}" = "xyy" ; then \
echo -n '*** GCC-4.2.0 cannot compile the 64-bit powerpc ' ; \
diff --git a/arch/powerpc/boot/dts/bluestone.dts b/arch/powerpc/boot/dts/bluestone.dts
index 7bda373f10e..9d4917aebe6 100644
--- a/arch/powerpc/boot/dts/bluestone.dts
+++ b/arch/powerpc/boot/dts/bluestone.dts
@@ -373,5 +373,30 @@
0x0 0x0 0x0 0x3 &UIC3 0xe 0x4 /* swizzled int C */
0x0 0x0 0x0 0x4 &UIC3 0xf 0x4 /* swizzled int D */>;
};
+
+ MSI: ppc4xx-msi@C10000000 {
+ compatible = "amcc,ppc4xx-msi", "ppc4xx-msi";
+ reg = < 0xC 0x10000000 0x100
+ 0xC 0x10000000 0x100>;
+ sdr-base = <0x36C>;
+ msi-data = <0x00004440>;
+ msi-mask = <0x0000ffe0>;
+ interrupts =<0 1 2 3 4 5 6 7>;
+ interrupt-parent = <&MSI>;
+ #interrupt-cells = <1>;
+ #address-cells = <0>;
+ #size-cells = <0>;
+ msi-available-ranges = <0x0 0x100>;
+ interrupt-map = <
+ 0 &UIC3 0x18 1
+ 1 &UIC3 0x19 1
+ 2 &UIC3 0x1A 1
+ 3 &UIC3 0x1B 1
+ 4 &UIC3 0x1C 1
+ 5 &UIC3 0x1D 1
+ 6 &UIC3 0x1E 1
+ 7 &UIC3 0x1F 1
+ >;
+ };
};
};
diff --git a/arch/powerpc/boot/dts/fsl/pq3-mpic-message-B.dtsi b/arch/powerpc/boot/dts/fsl/pq3-mpic-message-B.dtsi
new file mode 100644
index 00000000000..1cf0b77b1ef
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/pq3-mpic-message-B.dtsi
@@ -0,0 +1,43 @@
+/*
+ * PQ3 MPIC Message (Group B) device tree stub [ controller @ offset 0x42400 ]
+ *
+ * Copyright 2012 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Freescale Semiconductor nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+message@42400 {
+ compatible = "fsl,mpic-v3.1-msgr";
+ reg = <0x42400 0x200>;
+ interrupts = <
+ 0xb4 2 0 0
+ 0xb5 2 0 0
+ 0xb6 2 0 0
+ 0xb7 2 0 0>;
+};
diff --git a/arch/powerpc/boot/dts/fsl/pq3-mpic.dtsi b/arch/powerpc/boot/dts/fsl/pq3-mpic.dtsi
index fdedf7b1fe0..71c30eb1005 100644
--- a/arch/powerpc/boot/dts/fsl/pq3-mpic.dtsi
+++ b/arch/powerpc/boot/dts/fsl/pq3-mpic.dtsi
@@ -53,6 +53,16 @@ timer@41100 {
3 0 3 0>;
};
+message@41400 {
+ compatible = "fsl,mpic-v3.1-msgr";
+ reg = <0x41400 0x200>;
+ interrupts = <
+ 0xb0 2 0 0
+ 0xb1 2 0 0
+ 0xb2 2 0 0
+ 0xb3 2 0 0>;
+};
+
msi@41600 {
compatible = "fsl,mpic-msi";
reg = <0x41600 0x80>;
diff --git a/arch/powerpc/boot/dts/mpc8569mds.dts b/arch/powerpc/boot/dts/mpc8569mds.dts
index 7e283c891b7..fe0d60935e9 100644
--- a/arch/powerpc/boot/dts/mpc8569mds.dts
+++ b/arch/powerpc/boot/dts/mpc8569mds.dts
@@ -119,6 +119,7 @@
sdhc@2e000 {
status = "disabled";
sdhci,1-bit-only;
+ bus-width = <1>;
};
par_io@e0100 {
diff --git a/arch/powerpc/configs/86xx/mpc8610_hpcd_defconfig b/arch/powerpc/configs/86xx/mpc8610_hpcd_defconfig
index 0db9ba0423f..c09598b31de 100644
--- a/arch/powerpc/configs/86xx/mpc8610_hpcd_defconfig
+++ b/arch/powerpc/configs/86xx/mpc8610_hpcd_defconfig
@@ -100,6 +100,7 @@ CONFIG_SND_MIXER_OSS=y
CONFIG_SND_PCM_OSS=y
# CONFIG_SND_SUPPORT_OLD_API is not set
CONFIG_SND_SOC=y
+CONFIG_SND_POWERPC_SOC=y
CONFIG_RTC_CLASS=y
CONFIG_RTC_DRV_CMOS=y
CONFIG_EXT2_FS=y
diff --git a/arch/powerpc/configs/chroma_defconfig b/arch/powerpc/configs/chroma_defconfig
index f104ccde6b5..b1f9597fe31 100644
--- a/arch/powerpc/configs/chroma_defconfig
+++ b/arch/powerpc/configs/chroma_defconfig
@@ -32,7 +32,7 @@ CONFIG_RD_LZMA=y
CONFIG_INITRAMFS_COMPRESSION_GZIP=y
CONFIG_KALLSYMS_ALL=y
CONFIG_EMBEDDED=y
-CONFIG_PERF_COUNTERS=y
+CONFIG_PERF_EVENTS=y
CONFIG_PROFILING=y
CONFIG_OPROFILE=y
CONFIG_KPROBES=y
diff --git a/arch/powerpc/configs/g5_defconfig b/arch/powerpc/configs/g5_defconfig
index 1196c34163b..07b7f2af2dc 100644
--- a/arch/powerpc/configs/g5_defconfig
+++ b/arch/powerpc/configs/g5_defconfig
@@ -1,5 +1,4 @@
CONFIG_PPC64=y
-CONFIG_POWER4_ONLY=y
CONFIG_ALTIVEC=y
CONFIG_SMP=y
CONFIG_NR_CPUS=4
diff --git a/arch/powerpc/configs/gamecube_defconfig b/arch/powerpc/configs/gamecube_defconfig
index e74d3a48370..9ef2cc13e1b 100644
--- a/arch/powerpc/configs/gamecube_defconfig
+++ b/arch/powerpc/configs/gamecube_defconfig
@@ -8,7 +8,7 @@ CONFIG_BLK_DEV_INITRD=y
# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
CONFIG_EXPERT=y
# CONFIG_ELF_CORE is not set
-CONFIG_PERF_COUNTERS=y
+CONFIG_PERF_EVENTS=y
# CONFIG_VM_EVENT_COUNTERS is not set
CONFIG_SLAB=y
CONFIG_MODULES=y
diff --git a/arch/powerpc/configs/maple_defconfig b/arch/powerpc/configs/maple_defconfig
index 2244d370f24..02ac96b679b 100644
--- a/arch/powerpc/configs/maple_defconfig
+++ b/arch/powerpc/configs/maple_defconfig
@@ -1,5 +1,4 @@
CONFIG_PPC64=y
-CONFIG_POWER4_ONLY=y
CONFIG_SMP=y
CONFIG_NR_CPUS=4
CONFIG_EXPERIMENTAL=y
diff --git a/arch/powerpc/configs/mpc85xx_defconfig b/arch/powerpc/configs/mpc85xx_defconfig
index d6b6df5e874..62bb723c5b5 100644
--- a/arch/powerpc/configs/mpc85xx_defconfig
+++ b/arch/powerpc/configs/mpc85xx_defconfig
@@ -141,6 +141,7 @@ CONFIG_SND_INTEL8X0=y
# CONFIG_SND_PPC is not set
# CONFIG_SND_USB is not set
CONFIG_SND_SOC=y
+CONFIG_SND_POWERPC_SOC=y
CONFIG_HID_A4TECH=y
CONFIG_HID_APPLE=y
CONFIG_HID_BELKIN=y
diff --git a/arch/powerpc/configs/mpc85xx_smp_defconfig b/arch/powerpc/configs/mpc85xx_smp_defconfig
index 5b0e2926bec..d1828427ae5 100644
--- a/arch/powerpc/configs/mpc85xx_smp_defconfig
+++ b/arch/powerpc/configs/mpc85xx_smp_defconfig
@@ -143,6 +143,7 @@ CONFIG_SND_INTEL8X0=y
# CONFIG_SND_PPC is not set
# CONFIG_SND_USB is not set
CONFIG_SND_SOC=y
+CONFIG_SND_POWERPC_SOC=y
CONFIG_HID_A4TECH=y
CONFIG_HID_APPLE=y
CONFIG_HID_BELKIN=y
diff --git a/arch/powerpc/configs/pasemi_defconfig b/arch/powerpc/configs/pasemi_defconfig
index f4deb0b78cf..840a2c2d043 100644
--- a/arch/powerpc/configs/pasemi_defconfig
+++ b/arch/powerpc/configs/pasemi_defconfig
@@ -1,5 +1,4 @@
CONFIG_PPC64=y
-CONFIG_POWER4_ONLY=y
CONFIG_ALTIVEC=y
# CONFIG_VIRT_CPU_ACCOUNTING is not set
CONFIG_SMP=y
diff --git a/arch/powerpc/configs/ps3_defconfig b/arch/powerpc/configs/ps3_defconfig
index ded867871e9..c2f4b4a86ec 100644
--- a/arch/powerpc/configs/ps3_defconfig
+++ b/arch/powerpc/configs/ps3_defconfig
@@ -6,7 +6,6 @@ CONFIG_NR_CPUS=2
CONFIG_EXPERIMENTAL=y
CONFIG_SYSVIPC=y
CONFIG_POSIX_MQUEUE=y
-CONFIG_SPARSE_IRQ=y
CONFIG_BLK_DEV_INITRD=y
CONFIG_CC_OPTIMIZE_FOR_SIZE=y
CONFIG_EMBEDDED=y
@@ -25,7 +24,6 @@ CONFIG_PS3_DISK=y
CONFIG_PS3_ROM=y
CONFIG_PS3_FLASH=y
CONFIG_PS3_VRAM=m
-CONFIG_PS3_LPM=m
# CONFIG_PPC_OF_BOOT_TRAMPOLINE is not set
CONFIG_HIGH_RES_TIMERS=y
# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
@@ -53,8 +51,6 @@ CONFIG_IP_PNP_DHCP=y
# CONFIG_INET_DIAG is not set
CONFIG_IPV6=y
CONFIG_BT=m
-CONFIG_BT_L2CAP=y
-CONFIG_BT_SCO=y
CONFIG_BT_RFCOMM=m
CONFIG_BT_RFCOMM_TTY=y
CONFIG_BT_BNEP=m
@@ -63,7 +59,6 @@ CONFIG_BT_BNEP_PROTO_FILTER=y
CONFIG_BT_HIDP=m
CONFIG_BT_HCIBTUSB=m
CONFIG_CFG80211=m
-# CONFIG_WIRELESS_EXT_SYSFS is not set
CONFIG_MAC80211=m
CONFIG_MAC80211_RC_PID=y
# CONFIG_MAC80211_RC_MINSTREL is not set
@@ -181,7 +176,6 @@ CONFIG_DEBUG_INFO=y
CONFIG_DEBUG_WRITECOUNT=y
CONFIG_DEBUG_MEMORY_INIT=y
CONFIG_DEBUG_LIST=y
-CONFIG_SYSCTL_SYSCALL_CHECK=y
# CONFIG_FTRACE is not set
CONFIG_DEBUG_STACKOVERFLOW=y
CONFIG_CRYPTO_CCM=m
diff --git a/arch/powerpc/configs/wii_defconfig b/arch/powerpc/configs/wii_defconfig
index 175295fbf4f..1e2b7d062aa 100644
--- a/arch/powerpc/configs/wii_defconfig
+++ b/arch/powerpc/configs/wii_defconfig
@@ -9,7 +9,7 @@ CONFIG_BLK_DEV_INITRD=y
# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
CONFIG_EXPERT=y
# CONFIG_ELF_CORE is not set
-CONFIG_PERF_COUNTERS=y
+CONFIG_PERF_EVENTS=y
# CONFIG_VM_EVENT_COUNTERS is not set
CONFIG_SLAB=y
CONFIG_MODULES=y
diff --git a/arch/powerpc/include/asm/asm-compat.h b/arch/powerpc/include/asm/asm-compat.h
index decad950f11..5d7fbe1950f 100644
--- a/arch/powerpc/include/asm/asm-compat.h
+++ b/arch/powerpc/include/asm/asm-compat.h
@@ -29,18 +29,9 @@
#define PPC_LLARX(t, a, b, eh) PPC_LDARX(t, a, b, eh)
#define PPC_STLCX stringify_in_c(stdcx.)
#define PPC_CNTLZL stringify_in_c(cntlzd)
+#define PPC_MTOCRF(FXM, RS) MTOCRF((FXM), (RS))
#define PPC_LR_STKOFF 16
#define PPC_MIN_STKFRM 112
-
-/* Move to CR, single-entry optimized version. Only available
- * on POWER4 and later.
- */
-#ifdef CONFIG_POWER4_ONLY
-#define PPC_MTOCRF stringify_in_c(mtocrf)
-#else
-#define PPC_MTOCRF stringify_in_c(mtcrf)
-#endif
-
#else /* 32-bit */
/* operations for longs and pointers */
diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h
index b9219e99bd2..50d82c8a037 100644
--- a/arch/powerpc/include/asm/cputable.h
+++ b/arch/powerpc/include/asm/cputable.h
@@ -168,6 +168,7 @@ extern const char *powerpc_base_platform;
#define CPU_FTR_LWSYNC ASM_CONST(0x0000000008000000)
#define CPU_FTR_NOEXECUTE ASM_CONST(0x0000000010000000)
#define CPU_FTR_INDEXED_DCR ASM_CONST(0x0000000020000000)
+#define CPU_FTR_EMB_HV ASM_CONST(0x0000000040000000)
/*
* Add the 64-bit processor unique features in the top half of the word;
@@ -376,7 +377,8 @@ extern const char *powerpc_base_platform;
#define CPU_FTRS_47X (CPU_FTRS_440x6)
#define CPU_FTRS_E200 (CPU_FTR_USE_TB | CPU_FTR_SPE_COMP | \
CPU_FTR_NODSISRALIGN | CPU_FTR_COHERENT_ICACHE | \
- CPU_FTR_UNIFIED_ID_CACHE | CPU_FTR_NOEXECUTE)
+ CPU_FTR_UNIFIED_ID_CACHE | CPU_FTR_NOEXECUTE | \
+ CPU_FTR_DEBUG_LVL_EXC)
#define CPU_FTRS_E500 (CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | \
CPU_FTR_SPE_COMP | CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_NODSISRALIGN | \
CPU_FTR_NOEXECUTE)
@@ -385,15 +387,15 @@ extern const char *powerpc_base_platform;
CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE)
#define CPU_FTRS_E500MC (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | \
CPU_FTR_L2CSR | CPU_FTR_LWSYNC | CPU_FTR_NOEXECUTE | \
- CPU_FTR_DBELL)
+ CPU_FTR_DBELL | CPU_FTR_DEBUG_LVL_EXC | CPU_FTR_EMB_HV)
#define CPU_FTRS_E5500 (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | \
CPU_FTR_L2CSR | CPU_FTR_LWSYNC | CPU_FTR_NOEXECUTE | \
CPU_FTR_DBELL | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \
- CPU_FTR_DEBUG_LVL_EXC)
+ CPU_FTR_DEBUG_LVL_EXC | CPU_FTR_EMB_HV)
#define CPU_FTRS_E6500 (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | \
CPU_FTR_L2CSR | CPU_FTR_LWSYNC | CPU_FTR_NOEXECUTE | \
CPU_FTR_DBELL | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \
- CPU_FTR_DEBUG_LVL_EXC)
+ CPU_FTR_DEBUG_LVL_EXC | CPU_FTR_EMB_HV)
#define CPU_FTRS_GENERIC_32 (CPU_FTR_COMMON | CPU_FTR_NODSISRALIGN)
/* 64-bit CPUs */
@@ -486,8 +488,10 @@ enum {
CPU_FTRS_E200 |
#endif
#ifdef CONFIG_E500
- CPU_FTRS_E500 | CPU_FTRS_E500_2 | CPU_FTRS_E500MC |
- CPU_FTRS_E5500 | CPU_FTRS_E6500 |
+ CPU_FTRS_E500 | CPU_FTRS_E500_2 |
+#endif
+#ifdef CONFIG_PPC_E500MC
+ CPU_FTRS_E500MC | CPU_FTRS_E5500 | CPU_FTRS_E6500 |
#endif
0,
};
@@ -531,9 +535,12 @@ enum {
CPU_FTRS_E200 &
#endif
#ifdef CONFIG_E500
- CPU_FTRS_E500 & CPU_FTRS_E500_2 & CPU_FTRS_E500MC &
- CPU_FTRS_E5500 & CPU_FTRS_E6500 &
+ CPU_FTRS_E500 & CPU_FTRS_E500_2 &
+#endif
+#ifdef CONFIG_PPC_E500MC
+ CPU_FTRS_E500MC & CPU_FTRS_E5500 & CPU_FTRS_E6500 &
#endif
+ ~CPU_FTR_EMB_HV & /* can be removed at runtime */
CPU_FTRS_POSSIBLE,
};
#endif /* __powerpc64__ */
diff --git a/arch/powerpc/include/asm/cputhreads.h b/arch/powerpc/include/asm/cputhreads.h
index ce516e5eb0d..ac3eedb9b74 100644
--- a/arch/powerpc/include/asm/cputhreads.h
+++ b/arch/powerpc/include/asm/cputhreads.h
@@ -9,7 +9,7 @@
* Note: This implementation is limited to a power of 2 number of
* threads per core and the same number for each core in the system
* (though it would work if some processors had less threads as long
- * as the CPU numbers are still allocated, just not brought offline).
+ * as the CPU numbers are still allocated, just not brought online).
*
* However, the API allows for a different implementation in the future
* if needed, as long as you only use the functions and not the variables
diff --git a/arch/powerpc/include/asm/dbell.h b/arch/powerpc/include/asm/dbell.h
index efa74ac44a3..154c067761b 100644
--- a/arch/powerpc/include/asm/dbell.h
+++ b/arch/powerpc/include/asm/dbell.h
@@ -19,6 +19,9 @@
#define PPC_DBELL_MSG_BRDCAST (0x04000000)
#define PPC_DBELL_TYPE(x) (((x) & 0xf) << (63-36))
+#define PPC_DBELL_TYPE_MASK PPC_DBELL_TYPE(0xf)
+#define PPC_DBELL_LPID(x) ((x) << (63 - 49))
+#define PPC_DBELL_PIR_MASK 0x3fff
enum ppc_dbell {
PPC_DBELL = 0, /* doorbell */
PPC_DBELL_CRIT = 1, /* critical doorbell */
diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h
index 548da3aa0a3..d58fc4e4149 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -288,13 +288,6 @@ label##_hv: \
/* Exception addition: Hard disable interrupts */
#define DISABLE_INTS SOFT_DISABLE_INTS(r10,r11)
-/* Exception addition: Keep interrupt state */
-#define ENABLE_INTS \
- ld r11,PACAKMSR(r13); \
- ld r12,_MSR(r1); \
- rlwimi r11,r12,0,MSR_EE; \
- mtmsrd r11,1
-
#define ADD_NVGPRS \
bl .save_nvgprs
diff --git a/arch/powerpc/include/asm/gpio.h b/arch/powerpc/include/asm/gpio.h
index 38762edb5e5..b3799d88ffc 100644
--- a/arch/powerpc/include/asm/gpio.h
+++ b/arch/powerpc/include/asm/gpio.h
@@ -1,53 +1,4 @@
-/*
- * Generic GPIO API implementation for PowerPC.
- *
- * Copyright (c) 2007-2008 MontaVista Software, Inc.
- *
- * Author: Anton Vorontsov <avorontsov@ru.mvista.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
-
-#ifndef __ASM_POWERPC_GPIO_H
-#define __ASM_POWERPC_GPIO_H
-
-#include <linux/errno.h>
-#include <asm-generic/gpio.h>
-
-#ifdef CONFIG_GPIOLIB
-
-/*
- * We don't (yet) implement inlined/rapid versions for on-chip gpios.
- * Just call gpiolib.
- */
-static inline int gpio_get_value(unsigned int gpio)
-{
- return __gpio_get_value(gpio);
-}
-
-static inline void gpio_set_value(unsigned int gpio, int value)
-{
- __gpio_set_value(gpio, value);
-}
-
-static inline int gpio_cansleep(unsigned int gpio)
-{
- return __gpio_cansleep(gpio);
-}
-
-static inline int gpio_to_irq(unsigned int gpio)
-{
- return __gpio_to_irq(gpio);
-}
-
-static inline int irq_to_gpio(unsigned int irq)
-{
- return -EINVAL;
-}
-
-#endif /* CONFIG_GPIOLIB */
-
-#endif /* __ASM_POWERPC_GPIO_H */
+#ifndef __LINUX_GPIO_H
+#warning Include linux/gpio.h instead of asm/gpio.h
+#include <linux/gpio.h>
+#endif
diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h
index 1c324ff55ea..423cf9eaf4a 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -77,8 +77,27 @@
#define H_MR_CONDITION -43
#define H_NOT_ENOUGH_RESOURCES -44
#define H_R_STATE -45
-#define H_RESCINDEND -46
-#define H_MULTI_THREADS_ACTIVE -9005
+#define H_RESCINDED -46
+#define H_P2 -55
+#define H_P3 -56
+#define H_P4 -57
+#define H_P5 -58
+#define H_P6 -59
+#define H_P7 -60
+#define H_P8 -61
+#define H_P9 -62
+#define H_TOO_BIG -64
+#define H_OVERLAP -68
+#define H_INTERRUPT -69
+#define H_BAD_DATA -70
+#define H_NOT_ACTIVE -71
+#define H_SG_LIST -72
+#define H_OP_MODE -73
+#define H_COP_HW -74
+#define H_UNSUPPORTED_FLAG_START -256
+#define H_UNSUPPORTED_FLAG_END -511
+#define H_MULTI_THREADS_ACTIVE -9005
+#define H_OUTSTANDING_COP_OPS -9006
/* Long Busy is a condition that can be returned by the firmware
@@ -114,6 +133,16 @@
#define H_PP1 (1UL<<(63-62))
#define H_PP2 (1UL<<(63-63))
+/* Flags for H_REGISTER_VPA subfunction field */
+#define H_VPA_FUNC_SHIFT (63-18) /* Bit posn of subfunction code */
+#define H_VPA_FUNC_MASK 7UL
+#define H_VPA_REG_VPA 1UL /* Register Virtual Processor Area */
+#define H_VPA_REG_DTL 2UL /* Register Dispatch Trace Log */
+#define H_VPA_REG_SLB 3UL /* Register SLB shadow buffer */
+#define H_VPA_DEREG_VPA 5UL /* Deregister Virtual Processor Area */
+#define H_VPA_DEREG_DTL 6UL /* Deregister Dispatch Trace Log */
+#define H_VPA_DEREG_SLB 7UL /* Deregister SLB shadow buffer */
+
/* VASI States */
#define H_VASI_INVALID 0
#define H_VASI_ENABLED 1
@@ -240,6 +269,8 @@
#define H_GET_MPP 0x2D4
#define H_HOME_NODE_ASSOCIATIVITY 0x2EC
#define H_BEST_ENERGY 0x2F4
+#define H_RANDOM 0x300
+#define H_COP 0x304
#define H_GET_MPP_X 0x314
#define MAX_HCALL_OPCODE H_GET_MPP_X
diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h
index 51010bfc792..6eb75b80488 100644
--- a/arch/powerpc/include/asm/hw_irq.h
+++ b/arch/powerpc/include/asm/hw_irq.h
@@ -33,6 +33,7 @@
extern void __replay_interrupt(unsigned int vector);
extern void timer_interrupt(struct pt_regs *);
+extern void performance_monitor_exception(struct pt_regs *regs);
#ifdef CONFIG_PPC64
#include <asm/paca.h>
@@ -99,6 +100,14 @@ static inline void hard_irq_disable(void)
get_paca()->irq_happened |= PACA_IRQ_HARD_DIS;
}
+/* include/linux/interrupt.h needs hard_irq_disable to be a macro */
+#define hard_irq_disable hard_irq_disable
+
+static inline bool lazy_irq_pending(void)
+{
+ return !!(get_paca()->irq_happened & ~PACA_IRQ_HARD_DIS);
+}
+
/*
* This is called by asynchronous interrupts to conditionally
* re-enable hard interrupts when soft-disabled after having
diff --git a/arch/powerpc/include/asm/irq.h b/arch/powerpc/include/asm/irq.h
index e648af92ced..0e40843a1c6 100644
--- a/arch/powerpc/include/asm/irq.h
+++ b/arch/powerpc/include/asm/irq.h
@@ -18,10 +18,6 @@
#include <linux/atomic.h>
-/* Define a way to iterate across irqs. */
-#define for_each_irq(i) \
- for ((i) = 0; (i) < NR_IRQS; ++(i))
-
extern atomic_t ppc_n_lost_interrupts;
/* This number is used when no interrupt has been assigned */
diff --git a/arch/powerpc/include/asm/kvm.h b/arch/powerpc/include/asm/kvm.h
index b921c3f4892..1bea4d8ea6f 100644
--- a/arch/powerpc/include/asm/kvm.h
+++ b/arch/powerpc/include/asm/kvm.h
@@ -277,6 +277,7 @@ struct kvm_sync_regs {
#define KVM_CPU_E500V2 2
#define KVM_CPU_3S_32 3
#define KVM_CPU_3S_64 4
+#define KVM_CPU_E500MC 5
/* for KVM_CAP_SPAPR_TCE */
struct kvm_create_spapr_tce {
diff --git a/arch/powerpc/include/asm/kvm_asm.h b/arch/powerpc/include/asm/kvm_asm.h
index 7b1f0e0fc65..76fdcfef088 100644
--- a/arch/powerpc/include/asm/kvm_asm.h
+++ b/arch/powerpc/include/asm/kvm_asm.h
@@ -20,6 +20,16 @@
#ifndef __POWERPC_KVM_ASM_H__
#define __POWERPC_KVM_ASM_H__
+#ifdef __ASSEMBLY__
+#ifdef CONFIG_64BIT
+#define PPC_STD(sreg, offset, areg) std sreg, (offset)(areg)
+#define PPC_LD(treg, offset, areg) ld treg, (offset)(areg)
+#else
+#define PPC_STD(sreg, offset, areg) stw sreg, (offset+4)(areg)
+#define PPC_LD(treg, offset, areg) lwz treg, (offset+4)(areg)
+#endif
+#endif
+
/* IVPR must be 64KiB-aligned. */
#define VCPU_SIZE_ORDER 4
#define VCPU_SIZE_LOG (VCPU_SIZE_ORDER + 12)
@@ -48,6 +58,14 @@
#define BOOKE_INTERRUPT_SPE_FP_DATA 33
#define BOOKE_INTERRUPT_SPE_FP_ROUND 34
#define BOOKE_INTERRUPT_PERFORMANCE_MONITOR 35
+#define BOOKE_INTERRUPT_DOORBELL 36
+#define BOOKE_INTERRUPT_DOORBELL_CRITICAL 37
+
+/* booke_hv */
+#define BOOKE_INTERRUPT_GUEST_DBELL 38
+#define BOOKE_INTERRUPT_GUEST_DBELL_CRIT 39
+#define BOOKE_INTERRUPT_HV_SYSCALL 40
+#define BOOKE_INTERRUPT_HV_PRIV 41
/* book3s */
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index aa795ccef29..f0e0c6a66d9 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -81,12 +81,13 @@ struct kvmppc_vcpu_book3s {
u64 sdr1;
u64 hior;
u64 msr_mask;
- u64 vsid_next;
#ifdef CONFIG_PPC_BOOK3S_32
u32 vsid_pool[VSID_POOL_SIZE];
+ u32 vsid_next;
#else
- u64 vsid_first;
- u64 vsid_max;
+ u64 proto_vsid_first;
+ u64 proto_vsid_max;
+ u64 proto_vsid_next;
#endif
int context_id[SID_CONTEXTS];
@@ -452,4 +453,7 @@ static inline bool kvmppc_critical_section(struct kvm_vcpu *vcpu)
#define INS_DCBZ 0x7c0007ec
+/* LPIDs we support with this build -- runtime limit may be lower */
+#define KVMPPC_NR_LPIDS (LPID_RSVD + 1)
+
#endif /* __ASM_KVM_BOOK3S_H__ */
diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h
index 1f2f5b6156b..88609b23b77 100644
--- a/arch/powerpc/include/asm/kvm_book3s_asm.h
+++ b/arch/powerpc/include/asm/kvm_book3s_asm.h
@@ -79,6 +79,9 @@ struct kvmppc_host_state {
u8 napping;
#ifdef CONFIG_KVM_BOOK3S_64_HV
+ u8 hwthread_req;
+ u8 hwthread_state;
+
struct kvm_vcpu *kvm_vcpu;
struct kvmppc_vcore *kvm_vcore;
unsigned long xics_phys;
@@ -122,4 +125,9 @@ struct kvmppc_book3s_shadow_vcpu {
#endif /*__ASSEMBLY__ */
+/* Values for kvm_state */
+#define KVM_HWTHREAD_IN_KERNEL 0
+#define KVM_HWTHREAD_IN_NAP 1
+#define KVM_HWTHREAD_IN_KVM 2
+
#endif /* __ASM_KVM_BOOK3S_ASM_H__ */
diff --git a/arch/powerpc/include/asm/kvm_booke.h b/arch/powerpc/include/asm/kvm_booke.h
index a90e0918877..b7cd3356a53 100644
--- a/arch/powerpc/include/asm/kvm_booke.h
+++ b/arch/powerpc/include/asm/kvm_booke.h
@@ -23,6 +23,9 @@
#include <linux/types.h>
#include <linux/kvm_host.h>
+/* LPIDs we support with this build -- runtime limit may be lower */
+#define KVMPPC_NR_LPIDS 64
+
static inline void kvmppc_set_gpr(struct kvm_vcpu *vcpu, int num, ulong val)
{
vcpu->arch.gpr[num] = val;
diff --git a/arch/powerpc/include/asm/kvm_booke_hv_asm.h b/arch/powerpc/include/asm/kvm_booke_hv_asm.h
new file mode 100644
index 00000000000..30a600fa1b6
--- /dev/null
+++ b/arch/powerpc/include/asm/kvm_booke_hv_asm.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright 2010-2011 Freescale Semiconductor, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef ASM_KVM_BOOKE_HV_ASM_H
+#define ASM_KVM_BOOKE_HV_ASM_H
+
+#ifdef __ASSEMBLY__
+
+/*
+ * All exceptions from guest state must go through KVM
+ * (except for those which are delivered directly to the guest) --
+ * there are no exceptions for which we fall through directly to
+ * the normal host handler.
+ *
+ * Expected inputs (normal exceptions):
+ * SCRATCH0 = saved r10
+ * r10 = thread struct
+ * r11 = appropriate SRR1 variant (currently used as scratch)
+ * r13 = saved CR
+ * *(r10 + THREAD_NORMSAVE(0)) = saved r11
+ * *(r10 + THREAD_NORMSAVE(2)) = saved r13
+ *
+ * Expected inputs (crit/mcheck/debug exceptions):
+ * appropriate SCRATCH = saved r8
+ * r8 = exception level stack frame
+ * r9 = *(r8 + _CCR) = saved CR
+ * r11 = appropriate SRR1 variant (currently used as scratch)
+ * *(r8 + GPR9) = saved r9
+ * *(r8 + GPR10) = saved r10 (r10 not yet clobbered)
+ * *(r8 + GPR11) = saved r11
+ */
+.macro DO_KVM intno srr1
+#ifdef CONFIG_KVM_BOOKE_HV
+BEGIN_FTR_SECTION
+ mtocrf 0x80, r11 /* check MSR[GS] without clobbering reg */
+ bf 3, kvmppc_resume_\intno\()_\srr1
+ b kvmppc_handler_\intno\()_\srr1
+kvmppc_resume_\intno\()_\srr1:
+END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
+#endif
+.endm
+
+#endif /*__ASSEMBLY__ */
+#endif /* ASM_KVM_BOOKE_HV_ASM_H */
diff --git a/arch/powerpc/include/asm/kvm_e500.h b/arch/powerpc/include/asm/kvm_e500.h
deleted file mode 100644
index 8cd50a51427..00000000000
--- a/arch/powerpc/include/asm/kvm_e500.h
+++ /dev/null
@@ -1,96 +0,0 @@
-/*
- * Copyright (C) 2008-2011 Freescale Semiconductor, Inc. All rights reserved.
- *
- * Author: Yu Liu, <yu.liu@freescale.com>
- *
- * Description:
- * This file is derived from arch/powerpc/include/asm/kvm_44x.h,
- * by Hollis Blanchard <hollisb@us.ibm.com>.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
- */
-
-#ifndef __ASM_KVM_E500_H__
-#define __ASM_KVM_E500_H__
-
-#include <linux/kvm_host.h>
-
-#define BOOKE_INTERRUPT_SIZE 36
-
-#define E500_PID_NUM 3
-#define E500_TLB_NUM 2
-
-#define E500_TLB_VALID 1
-#define E500_TLB_DIRTY 2
-
-struct tlbe_ref {
- pfn_t pfn;
- unsigned int flags; /* E500_TLB_* */
-};
-
-struct tlbe_priv {
- struct tlbe_ref ref; /* TLB0 only -- TLB1 uses tlb_refs */
-};
-
-struct vcpu_id_table;
-
-struct kvmppc_e500_tlb_params {
- int entries, ways, sets;
-};
-
-struct kvmppc_vcpu_e500 {
- /* Unmodified copy of the guest's TLB -- shared with host userspace. */
- struct kvm_book3e_206_tlb_entry *gtlb_arch;
-
- /* Starting entry number in gtlb_arch[] */
- int gtlb_offset[E500_TLB_NUM];
-
- /* KVM internal information associated with each guest TLB entry */
- struct tlbe_priv *gtlb_priv[E500_TLB_NUM];
-
- struct kvmppc_e500_tlb_params gtlb_params[E500_TLB_NUM];
-
- unsigned int gtlb_nv[E500_TLB_NUM];
-
- /*
- * information associated with each host TLB entry --
- * TLB1 only for now. If/when guest TLB1 entries can be
- * mapped with host TLB0, this will be used for that too.
- *
- * We don't want to use this for guest TLB0 because then we'd
- * have the overhead of doing the translation again even if
- * the entry is still in the guest TLB (e.g. we swapped out
- * and back, and our host TLB entries got evicted).
- */
- struct tlbe_ref *tlb_refs[E500_TLB_NUM];
- unsigned int host_tlb1_nv;
-
- u32 host_pid[E500_PID_NUM];
- u32 pid[E500_PID_NUM];
- u32 svr;
-
- /* vcpu id table */
- struct vcpu_id_table *idt;
-
- u32 l1csr0;
- u32 l1csr1;
- u32 hid0;
- u32 hid1;
- u32 tlb0cfg;
- u32 tlb1cfg;
- u64 mcar;
-
- struct page **shared_tlb_pages;
- int num_shared_tlb_pages;
-
- struct kvm_vcpu vcpu;
-};
-
-static inline struct kvmppc_vcpu_e500 *to_e500(struct kvm_vcpu *vcpu)
-{
- return container_of(vcpu, struct kvmppc_vcpu_e500, vcpu);
-}
-
-#endif /* __ASM_KVM_E500_H__ */
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 52eb9c1f4fe..d848cdc4971 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -82,7 +82,7 @@ struct kvm_vcpu;
struct lppaca;
struct slb_shadow;
-struct dtl;
+struct dtl_entry;
struct kvm_vm_stat {
u32 remote_tlb_flush;
@@ -106,6 +106,8 @@ struct kvm_vcpu_stat {
u32 dec_exits;
u32 ext_intr_exits;
u32 halt_wakeup;
+ u32 dbell_exits;
+ u32 gdbell_exits;
#ifdef CONFIG_PPC_BOOK3S
u32 pf_storage;
u32 pf_instruc;
@@ -140,6 +142,7 @@ enum kvm_exit_types {
EMULATED_TLBSX_EXITS,
EMULATED_TLBWE_EXITS,
EMULATED_RFI_EXITS,
+ EMULATED_RFCI_EXITS,
DEC_EXITS,
EXT_INTR_EXITS,
HALT_WAKEUP,
@@ -147,6 +150,8 @@ enum kvm_exit_types {
FP_UNAVAIL,
DEBUG_EXITS,
TIMEINGUEST,
+ DBELL_EXITS,
+ GDBELL_EXITS,
__NUMBER_OF_KVM_EXIT_TYPES
};
@@ -217,10 +222,10 @@ struct kvm_arch_memory_slot {
};
struct kvm_arch {
+ unsigned int lpid;
#ifdef CONFIG_KVM_BOOK3S_64_HV
unsigned long hpt_virt;
struct revmap_entry *revmap;
- unsigned int lpid;
unsigned int host_lpid;
unsigned long host_lpcr;
unsigned long sdr1;
@@ -232,7 +237,6 @@ struct kvm_arch {
unsigned long vrma_slb_v;
int rma_setup_done;
int using_mmu_notifiers;
- struct list_head spapr_tce_tables;
spinlock_t slot_phys_lock;
unsigned long *slot_phys[KVM_MEM_SLOTS_NUM];
int slot_npages[KVM_MEM_SLOTS_NUM];
@@ -240,6 +244,9 @@ struct kvm_arch {
struct kvmppc_vcore *vcores[KVM_MAX_VCORES];
struct kvmppc_linear_info *hpt_li;
#endif /* CONFIG_KVM_BOOK3S_64_HV */
+#ifdef CONFIG_PPC_BOOK3S_64
+ struct list_head spapr_tce_tables;
+#endif
};
/*
@@ -263,6 +270,9 @@ struct kvmppc_vcore {
struct list_head runnable_threads;
spinlock_t lock;
wait_queue_head_t wq;
+ u64 stolen_tb;
+ u64 preempt_tb;
+ struct kvm_vcpu *runner;
};
#define VCORE_ENTRY_COUNT(vc) ((vc)->entry_exit_count & 0xff)
@@ -274,6 +284,19 @@ struct kvmppc_vcore {
#define VCORE_EXITING 2
#define VCORE_SLEEPING 3
+/*
+ * Struct used to manage memory for a virtual processor area
+ * registered by a PAPR guest. There are three types of area
+ * that a guest can register.
+ */
+struct kvmppc_vpa {
+ void *pinned_addr; /* Address in kernel linear mapping */
+ void *pinned_end; /* End of region */
+ unsigned long next_gpa; /* Guest phys addr for update */
+ unsigned long len; /* Number of bytes required */
+ u8 update_pending; /* 1 => update pinned_addr from next_gpa */
+};
+
struct kvmppc_pte {
ulong eaddr;
u64 vpage;
@@ -345,6 +368,17 @@ struct kvm_vcpu_arch {
u64 vsr[64];
#endif
+#ifdef CONFIG_KVM_BOOKE_HV
+ u32 host_mas4;
+ u32 host_mas6;
+ u32 shadow_epcr;
+ u32 epcr;
+ u32 shadow_msrp;
+ u32 eplc;
+ u32 epsc;
+ u32 oldpir;
+#endif
+
#ifdef CONFIG_PPC_BOOK3S
/* For Gekko paired singles */
u32 qpr[32];
@@ -370,6 +404,7 @@ struct kvm_vcpu_arch {
#endif
u32 vrsave; /* also USPRG0 */
u32 mmucr;
+ /* shadow_msr is unused for BookE HV */
ulong shadow_msr;
ulong csrr0;
ulong csrr1;
@@ -426,8 +461,12 @@ struct kvm_vcpu_arch {
ulong fault_esr;
ulong queued_dear;
ulong queued_esr;
+ u32 tlbcfg[4];
+ u32 mmucfg;
+ u32 epr;
#endif
gpa_t paddr_accessed;
+ gva_t vaddr_accessed;
u8 io_gpr; /* GPR used as IO source/target */
u8 mmio_is_bigendian;
@@ -453,11 +492,6 @@ struct kvm_vcpu_arch {
u8 prodded;
u32 last_inst;
- struct lppaca *vpa;
- struct slb_shadow *slb_shadow;
- struct dtl *dtl;
- struct dtl *dtl_end;
-
wait_queue_head_t *wqp;
struct kvmppc_vcore *vcore;
int ret;
@@ -482,6 +516,14 @@ struct kvm_vcpu_arch {
struct task_struct *run_task;
struct kvm_run *kvm_run;
pgd_t *pgdir;
+
+ spinlock_t vpa_update_lock;
+ struct kvmppc_vpa vpa;
+ struct kvmppc_vpa dtl;
+ struct dtl_entry *dtl_ptr;
+ unsigned long dtl_index;
+ u64 stolen_logged;
+ struct kvmppc_vpa slb_shadow;
#endif
};
@@ -498,4 +540,6 @@ struct kvm_vcpu_arch {
#define KVM_MMIO_REG_QPR 0x0040
#define KVM_MMIO_REG_FQPR 0x0060
+#define __KVM_HAVE_ARCH_WQP
+
#endif /* __POWERPC_KVM_HOST_H__ */
diff --git a/arch/powerpc/include/asm/kvm_para.h b/arch/powerpc/include/asm/kvm_para.h
index 7b754e74300..c18916bff68 100644
--- a/arch/powerpc/include/asm/kvm_para.h
+++ b/arch/powerpc/include/asm/kvm_para.h
@@ -206,6 +206,11 @@ static inline unsigned int kvm_arch_para_features(void)
return r;
}
+static inline bool kvm_check_and_clear_guest_paused(void)
+{
+ return false;
+}
+
#endif /* __KERNEL__ */
#endif /* __POWERPC_KVM_PARA_H__ */
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 9d6dee0f7d4..f68c22fa2fc 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -95,7 +95,7 @@ extern int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu,
extern void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
extern void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu);
-extern void kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu);
+extern int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu);
extern int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu);
extern void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong flags);
extern void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu);
@@ -107,8 +107,10 @@ extern void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu,
extern int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
unsigned int op, int *advance);
-extern int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs);
-extern int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt);
+extern int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn,
+ ulong val);
+extern int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn,
+ ulong *val);
extern int kvmppc_booke_init(void);
extern void kvmppc_booke_exit(void);
@@ -126,6 +128,8 @@ extern void kvmppc_map_vrma(struct kvm_vcpu *vcpu,
extern int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu);
extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
struct kvm_create_spapr_tce *args);
+extern long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
+ unsigned long ioba, unsigned long tce);
extern long kvm_vm_ioctl_allocate_rma(struct kvm *kvm,
struct kvm_allocate_rma *rma);
extern struct kvmppc_linear_info *kvm_alloc_rma(void);
@@ -138,6 +142,11 @@ extern int kvmppc_core_prepare_memory_region(struct kvm *kvm,
struct kvm_userspace_memory_region *mem);
extern void kvmppc_core_commit_memory_region(struct kvm *kvm,
struct kvm_userspace_memory_region *mem);
+extern int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm,
+ struct kvm_ppc_smmu_info *info);
+
+extern int kvmppc_bookehv_init(void);
+extern void kvmppc_bookehv_exit(void);
/*
* Cuts out inst bits with ordering according to spec.
@@ -204,4 +213,9 @@ int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu,
int kvm_vcpu_ioctl_dirty_tlb(struct kvm_vcpu *vcpu,
struct kvm_dirty_tlb *cfg);
+long kvmppc_alloc_lpid(void);
+void kvmppc_claim_lpid(long lpid);
+void kvmppc_free_lpid(long lpid);
+void kvmppc_init_lpid(unsigned long nr_lpids);
+
#endif /* __POWERPC_KVM_PPC_H__ */
diff --git a/arch/powerpc/include/asm/lppaca.h b/arch/powerpc/include/asm/lppaca.h
index a76254af0aa..531fe0c3108 100644
--- a/arch/powerpc/include/asm/lppaca.h
+++ b/arch/powerpc/include/asm/lppaca.h
@@ -20,18 +20,16 @@
#define _ASM_POWERPC_LPPACA_H
#ifdef __KERNEL__
-/* These definitions relate to hypervisors that only exist when using
+/*
+ * These definitions relate to hypervisors that only exist when using
* a server type processor
*/
#ifdef CONFIG_PPC_BOOK3S
-//=============================================================================
-//
-// This control block contains the data that is shared between the
-// hypervisor (PLIC) and the OS.
-//
-//
-//----------------------------------------------------------------------------
+/*
+ * This control block contains the data that is shared between the
+ * hypervisor and the OS.
+ */
#include <linux/cache.h>
#include <linux/threads.h>
#include <asm/types.h>
@@ -43,123 +41,65 @@
*/
#define NR_LPPACAS 1
-
-/* The Hypervisor barfs if the lppaca crosses a page boundary. A 1k
- * alignment is sufficient to prevent this */
+/*
+ * The Hypervisor barfs if the lppaca crosses a page boundary. A 1k
+ * alignment is sufficient to prevent this
+ */
struct lppaca {
-//=============================================================================
-// CACHE_LINE_1 0x0000 - 0x007F Contains read-only data
-// NOTE: The xDynXyz fields are fields that will be dynamically changed by
-// PLIC when preparing to bring a processor online or when dispatching a
-// virtual processor!
-//=============================================================================
- u32 desc; // Eye catcher 0xD397D781 x00-x03
- u16 size; // Size of this struct x04-x05
- u16 reserved1; // Reserved x06-x07
- u16 reserved2:14; // Reserved x08-x09
- u8 shared_proc:1; // Shared processor indicator ...
- u8 secondary_thread:1; // Secondary thread indicator ...
- volatile u8 dyn_proc_status:8; // Dynamic Status of this proc x0A-x0A
- u8 secondary_thread_count; // Secondary thread count x0B-x0B
- volatile u16 dyn_hv_phys_proc_index;// Dynamic HV Physical Proc Index0C-x0D
- volatile u16 dyn_hv_log_proc_index;// Dynamic HV Logical Proc Indexx0E-x0F
- u32 decr_val; // Value for Decr programming x10-x13
- u32 pmc_val; // Value for PMC regs x14-x17
- volatile u32 dyn_hw_node_id; // Dynamic Hardware Node id x18-x1B
- volatile u32 dyn_hw_proc_id; // Dynamic Hardware Proc Id x1C-x1F
- volatile u32 dyn_pir; // Dynamic ProcIdReg value x20-x23
- u32 dsei_data; // DSEI data x24-x27
- u64 sprg3; // SPRG3 value x28-x2F
- u8 reserved3[40]; // Reserved x30-x57
- volatile u8 vphn_assoc_counts[8]; // Virtual processor home node
- // associativity change counters x58-x5F
- u8 reserved4[32]; // Reserved x60-x7F
-
-//=============================================================================
-// CACHE_LINE_2 0x0080 - 0x00FF Contains local read-write data
-//=============================================================================
- // This Dword contains a byte for each type of interrupt that can occur.
- // The IPI is a count while the others are just a binary 1 or 0.
- union {
- u64 any_int;
- struct {
- u16 reserved; // Reserved - cleared by #mpasmbl
- u8 xirr_int; // Indicates xXirrValue is valid or Immed IO
- u8 ipi_cnt; // IPI Count
- u8 decr_int; // DECR interrupt occurred
- u8 pdc_int; // PDC interrupt occurred
- u8 quantum_int; // Interrupt quantum reached
- u8 old_plic_deferred_ext_int; // Old PLIC has a deferred XIRR pending
- } fields;
- } int_dword;
-
- // Whenever any fields in this Dword are set then PLIC will defer the
- // processing of external interrupts. Note that PLIC will store the
- // XIRR directly into the xXirrValue field so that another XIRR will
- // not be presented until this one clears. The layout of the low
- // 4-bytes of this Dword is up to SLIC - PLIC just checks whether the
- // entire Dword is zero or not. A non-zero value in the low order
- // 2-bytes will result in SLIC being granted the highest thread
- // priority upon return. A 0 will return to SLIC as medium priority.
- u64 plic_defer_ints_area; // Entire Dword
-
- // Used to pass the real SRR0/1 from PLIC to SLIC as well as to
- // pass the target SRR0/1 from SLIC to PLIC on a SetAsrAndRfid.
- u64 saved_srr0; // Saved SRR0 x10-x17
- u64 saved_srr1; // Saved SRR1 x18-x1F
-
- // Used to pass parms from the OS to PLIC for SetAsrAndRfid
- u64 saved_gpr3; // Saved GPR3 x20-x27
- u64 saved_gpr4; // Saved GPR4 x28-x2F
- union {
- u64 saved_gpr5; /* Saved GPR5 x30-x37 */
- struct {
- u8 cede_latency_hint; /* x30 */
- u8 reserved[7]; /* x31-x36 */
- } fields;
- } gpr5_dword;
-
-
- u8 dtl_enable_mask; // Dispatch Trace Log mask x38-x38
- u8 donate_dedicated_cpu; // Donate dedicated CPU cycles x39-x39
- u8 fpregs_in_use; // FP regs in use x3A-x3A
- u8 pmcregs_in_use; // PMC regs in use x3B-x3B
- volatile u32 saved_decr; // Saved Decr Value x3C-x3F
- volatile u64 emulated_time_base;// Emulated TB for this thread x40-x47
- volatile u64 cur_plic_latency; // Unaccounted PLIC latency x48-x4F
- u64 tot_plic_latency; // Accumulated PLIC latency x50-x57
- u64 wait_state_cycles; // Wait cycles for this proc x58-x5F
- u64 end_of_quantum; // TB at end of quantum x60-x67
- u64 pdc_saved_sprg1; // Saved SPRG1 for PMC int x68-x6F
- u64 pdc_saved_srr0; // Saved SRR0 for PMC int x70-x77
- volatile u32 virtual_decr; // Virtual DECR for shared procsx78-x7B
- u16 slb_count; // # of SLBs to maintain x7C-x7D
- u8 idle; // Indicate OS is idle x7E
- u8 vmxregs_in_use; // VMX registers in use x7F
-
-
-//=============================================================================
-// CACHE_LINE_3 0x0100 - 0x017F: This line is shared with other processors
-//=============================================================================
- // This is the yield_count. An "odd" value (low bit on) means that
- // the processor is yielded (either because of an OS yield or a PLIC
- // preempt). An even value implies that the processor is currently
- // executing.
- // NOTE: This value will ALWAYS be zero for dedicated processors and
- // will NEVER be zero for shared processors (ie, initialized to a 1).
- volatile u32 yield_count; // PLIC increments each dispatchx00-x03
- volatile u32 dispersion_count; // dispatch changed phys cpu x04-x07
- volatile u64 cmo_faults; // CMO page fault count x08-x0F
- volatile u64 cmo_fault_time; // CMO page fault time x10-x17
- u8 reserved7[104]; // Reserved x18-x7F
-
-//=============================================================================
-// CACHE_LINE_4-5 0x0180 - 0x027F Contains PMC interrupt data
-//=============================================================================
- u32 page_ins; // CMO Hint - # page ins by OS x00-x03
- u8 reserved8[148]; // Reserved x04-x97
- volatile u64 dtl_idx; // Dispatch Trace Log head idx x98-x9F
- u8 reserved9[96]; // Reserved xA0-xFF
+ /* cacheline 1 contains read-only data */
+
+ u32 desc; /* Eye catcher 0xD397D781 */
+ u16 size; /* Size of this struct */
+ u16 reserved1;
+ u16 reserved2:14;
+ u8 shared_proc:1; /* Shared processor indicator */
+ u8 secondary_thread:1; /* Secondary thread indicator */
+ u8 reserved3[14];
+ volatile u32 dyn_hw_node_id; /* Dynamic hardware node id */
+ volatile u32 dyn_hw_proc_id; /* Dynamic hardware proc id */
+ u8 reserved4[56];
+ volatile u8 vphn_assoc_counts[8]; /* Virtual processor home node */
+ /* associativity change counters */
+ u8 reserved5[32];
+
+ /* cacheline 2 contains local read-write data */
+
+ u8 reserved6[48];
+ u8 cede_latency_hint;
+ u8 reserved7[7];
+ u8 dtl_enable_mask; /* Dispatch Trace Log mask */
+ u8 donate_dedicated_cpu; /* Donate dedicated CPU cycles */
+ u8 fpregs_in_use;
+ u8 pmcregs_in_use;
+ u8 reserved8[28];
+ u64 wait_state_cycles; /* Wait cycles for this proc */
+ u8 reserved9[28];
+ u16 slb_count; /* # of SLBs to maintain */
+ u8 idle; /* Indicate OS is idle */
+ u8 vmxregs_in_use;
+
+ /* cacheline 3 is shared with other processors */
+
+ /*
+ * This is the yield_count. An "odd" value (low bit on) means that
+ * the processor is yielded (either because of an OS yield or a
+ * hypervisor preempt). An even value implies that the processor is
+ * currently executing.
+ * NOTE: This value will ALWAYS be zero for dedicated processors and
+ * will NEVER be zero for shared processors (ie, initialized to a 1).
+ */
+ volatile u32 yield_count;
+ volatile u32 dispersion_count; /* dispatch changed physical cpu */
+ volatile u64 cmo_faults; /* CMO page fault count */
+ volatile u64 cmo_fault_time; /* CMO page fault time */
+ u8 reserved10[104];
+
+ /* cacheline 4-5 */
+
+ u32 page_ins; /* CMO Hint - # page ins by OS */
+ u8 reserved11[148];
+ volatile u64 dtl_idx; /* Dispatch Trace Log head index */
+ u8 reserved12[96];
} __attribute__((__aligned__(0x400)));
extern struct lppaca lppaca[];
@@ -172,13 +112,13 @@ extern struct lppaca lppaca[];
* ESID is stored in the lower 64bits, then the VSID.
*/
struct slb_shadow {
- u32 persistent; // Number of persistent SLBs x00-x03
- u32 buffer_length; // Total shadow buffer length x04-x07
- u64 reserved; // Alignment x08-x0f
+ u32 persistent; /* Number of persistent SLBs */
+ u32 buffer_length; /* Total shadow buffer length */
+ u64 reserved;
struct {
u64 esid;
u64 vsid;
- } save_area[SLB_NUM_BOLTED]; // x10-x40
+ } save_area[SLB_NUM_BOLTED];
} ____cacheline_aligned;
extern struct slb_shadow slb_shadow[];
diff --git a/arch/powerpc/include/asm/lv1call.h b/arch/powerpc/include/asm/lv1call.h
index 233f9ecae76..f5117674bf9 100644
--- a/arch/powerpc/include/asm/lv1call.h
+++ b/arch/powerpc/include/asm/lv1call.h
@@ -265,8 +265,8 @@ LV1_CALL(get_spe_irq_outlet, 2, 1, 78 )
LV1_CALL(set_spe_privilege_state_area_1_register, 3, 0, 79 )
LV1_CALL(create_repository_node, 6, 0, 90 )
LV1_CALL(read_repository_node, 5, 2, 91 )
-LV1_CALL(modify_repository_node_value, 6, 0, 92 )
-LV1_CALL(remove_repository_node, 4, 0, 93 )
+LV1_CALL(write_repository_node, 6, 0, 92 )
+LV1_CALL(delete_repository_node, 4, 0, 93 )
LV1_CALL(read_htab_entries, 2, 5, 95 )
LV1_CALL(set_dabr, 2, 0, 96 )
LV1_CALL(get_total_execution_time, 2, 1, 103 )
diff --git a/arch/powerpc/include/asm/mmu-book3e.h b/arch/powerpc/include/asm/mmu-book3e.h
index cdb5421877e..eeabcdbc30f 100644
--- a/arch/powerpc/include/asm/mmu-book3e.h
+++ b/arch/powerpc/include/asm/mmu-book3e.h
@@ -104,6 +104,8 @@
#define MAS4_TSIZED_MASK 0x00000f80 /* Default TSIZE */
#define MAS4_TSIZED_SHIFT 7
+#define MAS5_SGS 0x80000000
+
#define MAS6_SPID0 0x3FFF0000
#define MAS6_SPID1 0x00007FFE
#define MAS6_ISIZE(x) MAS1_TSIZE(x)
@@ -118,6 +120,10 @@
#define MAS7_RPN 0xFFFFFFFF
+#define MAS8_TGS 0x80000000 /* Guest space */
+#define MAS8_VF 0x40000000 /* Virtualization Fault */
+#define MAS8_TLPID 0x000000ff
+
/* Bit definitions for MMUCFG */
#define MMUCFG_MAVN 0x00000003 /* MMU Architecture Version Number */
#define MMUCFG_MAVN_V1 0x00000000 /* v1.0 */
diff --git a/arch/powerpc/include/asm/mpic.h b/arch/powerpc/include/asm/mpic.h
index c65b9294376..c9f698a994b 100644
--- a/arch/powerpc/include/asm/mpic.h
+++ b/arch/powerpc/include/asm/mpic.h
@@ -275,9 +275,6 @@ struct mpic
unsigned int isu_mask;
/* Number of sources */
unsigned int num_sources;
- /* default senses array */
- unsigned char *senses;
- unsigned int senses_count;
/* vector numbers used for internal sources (ipi/timers) */
unsigned int ipi_vecs[4];
@@ -415,21 +412,6 @@ extern struct mpic *mpic_alloc(struct device_node *node,
extern void mpic_assign_isu(struct mpic *mpic, unsigned int isu_num,
phys_addr_t phys_addr);
-/* Set default sense codes
- *
- * @mpic: controller
- * @senses: array of sense codes
- * @count: size of above array
- *
- * Optionally provide an array (indexed on hardware interrupt numbers
- * for this MPIC) of default sense codes for the chip. Those are linux
- * sense codes IRQ_TYPE_*
- *
- * The driver gets ownership of the pointer, don't dispose of it or
- * anything like that. __init only.
- */
-extern void mpic_set_default_senses(struct mpic *mpic, u8 *senses, int count);
-
/* Initialize the controller. After this has been called, none of the above
* should be called again for this mpic
diff --git a/arch/powerpc/include/asm/mpic_msgr.h b/arch/powerpc/include/asm/mpic_msgr.h
index 3ec37dc9003..326d33ca55c 100644
--- a/arch/powerpc/include/asm/mpic_msgr.h
+++ b/arch/powerpc/include/asm/mpic_msgr.h
@@ -13,6 +13,7 @@
#include <linux/types.h>
#include <linux/spinlock.h>
+#include <asm/smp.h>
struct mpic_msgr {
u32 __iomem *base;
diff --git a/arch/powerpc/include/asm/pSeries_reconfig.h b/arch/powerpc/include/asm/pSeries_reconfig.h
index 23cd6cc30bc..c07edfe98b9 100644
--- a/arch/powerpc/include/asm/pSeries_reconfig.h
+++ b/arch/powerpc/include/asm/pSeries_reconfig.h
@@ -13,6 +13,18 @@
#define PSERIES_RECONFIG_REMOVE 0x0002
#define PSERIES_DRCONF_MEM_ADD 0x0003
#define PSERIES_DRCONF_MEM_REMOVE 0x0004
+#define PSERIES_UPDATE_PROPERTY 0x0005
+
+/**
+ * pSeries_reconfig_notify - Notifier value structure for OFDT property updates
+ *
+ * @node: Device tree node which owns the property being updated
+ * @property: Updated property
+ */
+struct pSeries_reconfig_prop_update {
+ struct device_node *node;
+ struct property *property;
+};
#ifdef CONFIG_PPC_PSERIES
extern int pSeries_reconfig_notifier_register(struct notifier_block *);
diff --git a/arch/powerpc/include/asm/posix_types.h b/arch/powerpc/include/asm/posix_types.h
index f1393252bbd..2958c5b97b2 100644
--- a/arch/powerpc/include/asm/posix_types.h
+++ b/arch/powerpc/include/asm/posix_types.h
@@ -16,9 +16,6 @@ typedef int __kernel_ssize_t;
typedef long __kernel_ptrdiff_t;
#define __kernel_size_t __kernel_size_t
-typedef unsigned short __kernel_nlink_t;
-#define __kernel_nlink_t __kernel_nlink_t
-
typedef short __kernel_ipc_pid_t;
#define __kernel_ipc_pid_t __kernel_ipc_pid_t
#endif
diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h
index 50f73aa2ba2..15444204a3a 100644
--- a/arch/powerpc/include/asm/ppc_asm.h
+++ b/arch/powerpc/include/asm/ppc_asm.h
@@ -369,7 +369,15 @@ BEGIN_FTR_SECTION \
END_FTR_SECTION_IFCLR(CPU_FTR_601)
#endif
-
+#ifdef CONFIG_PPC64
+#define MTOCRF(FXM, RS) \
+ BEGIN_FTR_SECTION_NESTED(848); \
+ mtcrf (FXM), (RS); \
+ FTR_SECTION_ELSE_NESTED(848); \
+ mtocrf (FXM), (RS); \
+ ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_NOEXECUTE, 848)
+#endif
+
/*
* This instruction is not implemented on the PPC 603 or 601; however, on
* the 403GCX and 405GP tlbia IS defined and tlbie is not.
diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index 8e2d0371fe1..413a5eaef56 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -74,9 +74,6 @@ struct task_struct;
void start_thread(struct pt_regs *regs, unsigned long fdptr, unsigned long sp);
void release_thread(struct task_struct *);
-/* Prepare to copy thread state - unlazy all lazy status */
-extern void prepare_to_copy(struct task_struct *tsk);
-
/* Create a new kernel thread. */
extern long kernel_thread(int (*fn)(void *), void *arg, unsigned long flags);
@@ -243,6 +240,9 @@ struct thread_struct {
#ifdef CONFIG_KVM_BOOK3S_32_HANDLER
void* kvm_shadow_vcpu; /* KVM internal data */
#endif /* CONFIG_KVM_BOOK3S_32_HANDLER */
+#if defined(CONFIG_KVM) && defined(CONFIG_BOOKE)
+ struct kvm_vcpu *kvm_vcpu;
+#endif
#ifdef CONFIG_PPC64
unsigned long dscr;
int dscr_inherit;
@@ -386,7 +386,6 @@ extern unsigned long cpuidle_disable;
enum idle_boot_override {IDLE_NO_OVERRIDE = 0, IDLE_POWERSAVE_OFF};
extern int powersave_nap; /* set if nap mode can be used in idle loop */
-void cpu_idle_wait(void);
#ifdef CONFIG_PSERIES_IDLE
extern void update_smt_snooze_delay(int snooze);
diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h
index 84cc7840cd1..9c21ed42aba 100644
--- a/arch/powerpc/include/asm/ptrace.h
+++ b/arch/powerpc/include/asm/ptrace.h
@@ -354,12 +354,6 @@ static inline unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs,
#define PTRACE_GETREGS64 22
#define PTRACE_SETREGS64 23
-/* (old) PTRACE requests with inverted arguments */
-#define PPC_PTRACE_GETREGS 0x99 /* Get GPRs 0 - 31 */
-#define PPC_PTRACE_SETREGS 0x98 /* Set GPRs 0 - 31 */
-#define PPC_PTRACE_GETFPREGS 0x97 /* Get FPRs 0 - 31 */
-#define PPC_PTRACE_SETFPREGS 0x96 /* Set FPRs 0 - 31 */
-
/* Calls to trace a 64bit program from a 32bit program */
#define PPC_PTRACE_PEEKTEXT_3264 0x95
#define PPC_PTRACE_PEEKDATA_3264 0x94
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index 9d7f0fb6902..f0cb7f461b9 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -257,7 +257,9 @@
#define LPCR_LPES_SH 2
#define LPCR_RMI 0x00000002 /* real mode is cache inhibit */
#define LPCR_HDICE 0x00000001 /* Hyp Decr enable (HV,PR,EE) */
+#ifndef SPRN_LPID
#define SPRN_LPID 0x13F /* Logical Partition Identifier */
+#endif
#define LPID_RSVD 0x3ff /* Reserved LPID for partn switching */
#define SPRN_HMER 0x150 /* Hardware m? error recovery */
#define SPRN_HMEER 0x151 /* Hardware m? enable error recovery */
diff --git a/arch/powerpc/include/asm/reg_booke.h b/arch/powerpc/include/asm/reg_booke.h
index b86faa9107d..2d916c4982c 100644
--- a/arch/powerpc/include/asm/reg_booke.h
+++ b/arch/powerpc/include/asm/reg_booke.h
@@ -15,11 +15,6 @@
#ifndef __ASM_POWERPC_REG_BOOKE_H__
#define __ASM_POWERPC_REG_BOOKE_H__
-#ifdef CONFIG_BOOKE_WDT
-extern u32 booke_wdt_enabled;
-extern u32 booke_wdt_period;
-#endif /* CONFIG_BOOKE_WDT */
-
/* Machine State Register (MSR) Fields */
#define MSR_GS (1<<28) /* Guest state */
#define MSR_UCLE (1<<26) /* User-mode cache lock enable */
@@ -61,18 +56,30 @@ extern u32 booke_wdt_period;
#define SPRN_SPRG7W 0x117 /* Special Purpose Register General 7 Write */
#define SPRN_EPCR 0x133 /* Embedded Processor Control Register */
#define SPRN_DBCR2 0x136 /* Debug Control Register 2 */
+#define SPRN_MSRP 0x137 /* MSR Protect Register */
#define SPRN_IAC3 0x13A /* Instruction Address Compare 3 */
#define SPRN_IAC4 0x13B /* Instruction Address Compare 4 */
#define SPRN_DVC1 0x13E /* Data Value Compare Register 1 */
#define SPRN_DVC2 0x13F /* Data Value Compare Register 2 */
+#define SPRN_LPID 0x152 /* Logical Partition ID */
#define SPRN_MAS8 0x155 /* MMU Assist Register 8 */
#define SPRN_TLB0PS 0x158 /* TLB 0 Page Size Register */
#define SPRN_TLB1PS 0x159 /* TLB 1 Page Size Register */
#define SPRN_MAS5_MAS6 0x15c /* MMU Assist Register 5 || 6 */
#define SPRN_MAS8_MAS1 0x15d /* MMU Assist Register 8 || 1 */
#define SPRN_EPTCFG 0x15e /* Embedded Page Table Config */
+#define SPRN_GSPRG0 0x170 /* Guest SPRG0 */
+#define SPRN_GSPRG1 0x171 /* Guest SPRG1 */
+#define SPRN_GSPRG2 0x172 /* Guest SPRG2 */
+#define SPRN_GSPRG3 0x173 /* Guest SPRG3 */
#define SPRN_MAS7_MAS3 0x174 /* MMU Assist Register 7 || 3 */
#define SPRN_MAS0_MAS1 0x175 /* MMU Assist Register 0 || 1 */
+#define SPRN_GSRR0 0x17A /* Guest SRR0 */
+#define SPRN_GSRR1 0x17B /* Guest SRR1 */
+#define SPRN_GEPR 0x17C /* Guest EPR */
+#define SPRN_GDEAR 0x17D /* Guest DEAR */
+#define SPRN_GPIR 0x17E /* Guest PIR */
+#define SPRN_GESR 0x17F /* Guest Exception Syndrome Register */
#define SPRN_IVOR0 0x190 /* Interrupt Vector Offset Register 0 */
#define SPRN_IVOR1 0x191 /* Interrupt Vector Offset Register 1 */
#define SPRN_IVOR2 0x192 /* Interrupt Vector Offset Register 2 */
@@ -93,6 +100,13 @@ extern u32 booke_wdt_period;
#define SPRN_IVOR39 0x1B1 /* Interrupt Vector Offset Register 39 */
#define SPRN_IVOR40 0x1B2 /* Interrupt Vector Offset Register 40 */
#define SPRN_IVOR41 0x1B3 /* Interrupt Vector Offset Register 41 */
+#define SPRN_GIVOR2 0x1B8 /* Guest IVOR2 */
+#define SPRN_GIVOR3 0x1B9 /* Guest IVOR3 */
+#define SPRN_GIVOR4 0x1BA /* Guest IVOR4 */
+#define SPRN_GIVOR8 0x1BB /* Guest IVOR8 */
+#define SPRN_GIVOR13 0x1BC /* Guest IVOR13 */
+#define SPRN_GIVOR14 0x1BD /* Guest IVOR14 */
+#define SPRN_GIVPR 0x1BF /* Guest IVPR */
#define SPRN_SPEFSCR 0x200 /* SPE & Embedded FP Status & Control */
#define SPRN_BBEAR 0x201 /* Branch Buffer Entry Address Register */
#define SPRN_BBTAR 0x202 /* Branch Buffer Target Address Register */
@@ -245,6 +259,10 @@ extern u32 booke_wdt_period;
#define MCSR_LDG 0x00002000UL /* Guarded Load */
#define MCSR_TLBSYNC 0x00000002UL /* Multiple tlbsyncs detected */
#define MCSR_BSL2_ERR 0x00000001UL /* Backside L2 cache error */
+
+#define MSRP_UCLEP 0x04000000 /* Protect MSR[UCLE] */
+#define MSRP_DEP 0x00000200 /* Protect MSR[DE] */
+#define MSRP_PMMP 0x00000004 /* Protect MSR[PMM] */
#endif
#ifdef CONFIG_E200
@@ -599,6 +617,17 @@ extern u32 booke_wdt_period;
#define SPRN_EPCR_DMIUH 0x00400000 /* Disable MAS Interrupt updates
* for hypervisor */
+/* Bit definitions for EPLC/EPSC */
+#define EPC_EPR 0x80000000 /* 1 = user, 0 = kernel */
+#define EPC_EPR_SHIFT 31
+#define EPC_EAS 0x40000000 /* Address Space */
+#define EPC_EAS_SHIFT 30
+#define EPC_EGS 0x20000000 /* 1 = guest, 0 = hypervisor */
+#define EPC_EGS_SHIFT 29
+#define EPC_ELPID 0x00ff0000
+#define EPC_ELPID_SHIFT 16
+#define EPC_EPID 0x00003fff
+#define EPC_EPID_SHIFT 0
/*
* The IBM-403 is an even more odd special case, as it is much
diff --git a/arch/powerpc/include/asm/stat.h b/arch/powerpc/include/asm/stat.h
index e4edc510b53..84880b80cc1 100644
--- a/arch/powerpc/include/asm/stat.h
+++ b/arch/powerpc/include/asm/stat.h
@@ -30,11 +30,11 @@ struct stat {
unsigned long st_dev;
ino_t st_ino;
#ifdef __powerpc64__
- nlink_t st_nlink;
+ unsigned long st_nlink;
mode_t st_mode;
#else
mode_t st_mode;
- nlink_t st_nlink;
+ unsigned short st_nlink;
#endif
uid_t st_uid;
gid_t st_gid;
diff --git a/arch/powerpc/include/asm/switch_to.h b/arch/powerpc/include/asm/switch_to.h
index caf82d0a00d..200d763a0a6 100644
--- a/arch/powerpc/include/asm/switch_to.h
+++ b/arch/powerpc/include/asm/switch_to.h
@@ -17,11 +17,11 @@ extern struct task_struct *_switch(struct thread_struct *prev,
struct thread_struct *next);
extern void giveup_fpu(struct task_struct *);
+extern void load_up_fpu(void);
extern void disable_kernel_fp(void);
extern void enable_kernel_fp(void);
extern void flush_fp_to_thread(struct task_struct *);
extern void enable_kernel_altivec(void);
-extern void giveup_altivec(struct task_struct *);
extern void load_up_altivec(struct task_struct *);
extern int emulate_altivec(struct pt_regs *);
extern void __giveup_vsx(struct task_struct *);
@@ -40,10 +40,15 @@ static inline void discard_lazy_cpu_state(void)
#ifdef CONFIG_ALTIVEC
extern void flush_altivec_to_thread(struct task_struct *);
+extern void giveup_altivec(struct task_struct *);
+extern void giveup_altivec_notask(void);
#else
static inline void flush_altivec_to_thread(struct task_struct *t)
{
}
+static inline void giveup_altivec(struct task_struct *t)
+{
+}
#endif
#ifdef CONFIG_VSX
diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h
index 4a741c7efd0..68831e9cf82 100644
--- a/arch/powerpc/include/asm/thread_info.h
+++ b/arch/powerpc/include/asm/thread_info.h
@@ -62,21 +62,8 @@ struct thread_info {
#define init_thread_info (init_thread_union.thread_info)
#define init_stack (init_thread_union.stack)
-/* thread information allocation */
-
-#if THREAD_SHIFT >= PAGE_SHIFT
-
#define THREAD_SIZE_ORDER (THREAD_SHIFT - PAGE_SHIFT)
-#else /* THREAD_SHIFT < PAGE_SHIFT */
-
-#define __HAVE_ARCH_THREAD_INFO_ALLOCATOR
-
-extern struct thread_info *alloc_thread_info_node(struct task_struct *tsk, int node);
-extern void free_thread_info(struct thread_info *ti);
-
-#endif /* THREAD_SHIFT < PAGE_SHIFT */
-
/* how to get the thread information struct from C */
static inline struct thread_info *current_thread_info(void)
{
@@ -126,7 +113,6 @@ static inline struct thread_info *current_thread_info(void)
#define _TIF_NOERROR (1<<TIF_NOERROR)
#define _TIF_NOTIFY_RESUME (1<<TIF_NOTIFY_RESUME)
#define _TIF_SYSCALL_TRACEPOINT (1<<TIF_SYSCALL_TRACEPOINT)
-#define _TIF_RUNLATCH (1<<TIF_RUNLATCH)
#define _TIF_SYSCALL_T_OR_A (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
_TIF_SECCOMP | _TIF_SYSCALL_TRACEPOINT)
@@ -154,7 +140,23 @@ static inline void set_restore_sigmask(void)
{
struct thread_info *ti = current_thread_info();
ti->local_flags |= _TLF_RESTORE_SIGMASK;
- set_bit(TIF_SIGPENDING, &ti->flags);
+ WARN_ON(!test_bit(TIF_SIGPENDING, &ti->flags));
+}
+static inline void clear_restore_sigmask(void)
+{
+ current_thread_info()->local_flags &= ~_TLF_RESTORE_SIGMASK;
+}
+static inline bool test_restore_sigmask(void)
+{
+ return current_thread_info()->local_flags & _TLF_RESTORE_SIGMASK;
+}
+static inline bool test_and_clear_restore_sigmask(void)
+{
+ struct thread_info *ti = current_thread_info();
+ if (!(ti->local_flags & _TLF_RESTORE_SIGMASK))
+ return false;
+ ti->local_flags &= ~_TLF_RESTORE_SIGMASK;
+ return true;
}
static inline bool test_thread_local_flags(unsigned int flags)
diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h
index 2136f58a54e..3b4b4a8da92 100644
--- a/arch/powerpc/include/asm/time.h
+++ b/arch/powerpc/include/asm/time.h
@@ -23,6 +23,7 @@
extern unsigned long tb_ticks_per_jiffy;
extern unsigned long tb_ticks_per_usec;
extern unsigned long tb_ticks_per_sec;
+extern struct clock_event_device decrementer_clockevent;
struct rtc_time;
extern void to_tm(int tim, struct rtc_time * tm);
diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h
index c97185885c6..852ed1b384f 100644
--- a/arch/powerpc/include/asm/topology.h
+++ b/arch/powerpc/include/asm/topology.h
@@ -18,12 +18,6 @@ struct device_node;
*/
#define RECLAIM_DISTANCE 10
-/*
- * Avoid creating an extra level of balancing (SD_ALLNODES) on the largest
- * POWER7 boxes which have a maximum of 32 nodes.
- */
-#define SD_NODES_PER_DOMAIN 32
-
#include <asm/mmzone.h>
static inline int cpu_to_node(int cpu)
@@ -51,36 +45,6 @@ static inline int pcibus_to_node(struct pci_bus *bus)
cpu_all_mask : \
cpumask_of_node(pcibus_to_node(bus)))
-/* sched_domains SD_NODE_INIT for PPC64 machines */
-#define SD_NODE_INIT (struct sched_domain) { \
- .min_interval = 8, \
- .max_interval = 32, \
- .busy_factor = 32, \
- .imbalance_pct = 125, \
- .cache_nice_tries = 1, \
- .busy_idx = 3, \
- .idle_idx = 1, \
- .newidle_idx = 0, \
- .wake_idx = 0, \
- .forkexec_idx = 0, \
- \
- .flags = 1*SD_LOAD_BALANCE \
- | 0*SD_BALANCE_NEWIDLE \
- | 1*SD_BALANCE_EXEC \
- | 1*SD_BALANCE_FORK \
- | 0*SD_BALANCE_WAKE \
- | 1*SD_WAKE_AFFINE \
- | 0*SD_PREFER_LOCAL \
- | 0*SD_SHARE_CPUPOWER \
- | 0*SD_POWERSAVINGS_BALANCE \
- | 0*SD_SHARE_PKG_RESOURCES \
- | 1*SD_SERIALIZE \
- | 0*SD_PREFER_SIBLING \
- , \
- .last_balance = jiffies, \
- .balance_interval = 1, \
-}
-
extern int __node_distance(int, int);
#define node_distance(a, b) __node_distance(a, b)
diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
index bd0fb849515..17bb40cad5b 100644
--- a/arch/powerpc/include/asm/uaccess.h
+++ b/arch/powerpc/include/asm/uaccess.h
@@ -40,6 +40,8 @@
#define segment_eq(a, b) ((a).seg == (b).seg)
+#define user_addr_max() (get_fs().seg)
+
#ifdef __powerpc64__
/*
* This check is sufficient because there is a large enough
@@ -453,42 +455,9 @@ static inline unsigned long clear_user(void __user *addr, unsigned long size)
return size;
}
-extern int __strncpy_from_user(char *dst, const char __user *src, long count);
-
-static inline long strncpy_from_user(char *dst, const char __user *src,
- long count)
-{
- might_sleep();
- if (likely(access_ok(VERIFY_READ, src, 1)))
- return __strncpy_from_user(dst, src, count);
- return -EFAULT;
-}
-
-/*
- * Return the size of a string (including the ending 0)
- *
- * Return 0 for error
- */
-extern int __strnlen_user(const char __user *str, long len, unsigned long top);
-
-/*
- * Returns the length of the string at str (including the null byte),
- * or 0 if we hit a page we can't access,
- * or something > len if we didn't find a null byte.
- *
- * The `top' parameter to __strnlen_user is to make sure that
- * we can never overflow from the user area into kernel space.
- */
-static inline int strnlen_user(const char __user *str, long len)
-{
- unsigned long top = current->thread.fs.seg;
-
- if ((unsigned long)str > top)
- return 0;
- return __strnlen_user(str, len, top);
-}
-
-#define strlen_user(str) strnlen_user((str), 0x7ffffffe)
+extern long strncpy_from_user(char *dst, const char __user *src, long count);
+extern __must_check long strlen_user(const char __user *str);
+extern __must_check long strnlen_user(const char __user *str, long n);
#endif /* __ASSEMBLY__ */
#endif /* __KERNEL__ */
diff --git a/arch/powerpc/include/asm/vio.h b/arch/powerpc/include/asm/vio.h
index 6bfd5ffe1d4..b19adf751dd 100644
--- a/arch/powerpc/include/asm/vio.h
+++ b/arch/powerpc/include/asm/vio.h
@@ -46,6 +46,48 @@
struct iommu_table;
+/*
+ * Platform Facilities Option (PFO)-specific data
+ */
+
+/* Starting unit address for PFO devices on the VIO BUS */
+#define VIO_BASE_PFO_UA 0x50000000
+
+/**
+ * vio_pfo_op - PFO operation parameters
+ *
+ * @flags: h_call subfunctions and modifiers
+ * @in: Input data block logical real address
+ * @inlen: If non-negative, the length of the input data block. If negative,
+ * the length of the input data descriptor list in bytes.
+ * @out: Output data block logical real address
+ * @outlen: If non-negative, the length of the input data block. If negative,
+ * the length of the input data descriptor list in bytes.
+ * @csbcpb: Logical real address of the 4k naturally-aligned storage block
+ * containing the CSB & optional FC field specific CPB
+ * @timeout: # of milliseconds to retry h_call, 0 for no timeout.
+ * @hcall_err: pointer to return the h_call return value, else NULL
+ */
+struct vio_pfo_op {
+ u64 flags;
+ s64 in;
+ s64 inlen;
+ s64 out;
+ s64 outlen;
+ u64 csbcpb;
+ void *done;
+ unsigned long handle;
+ unsigned int timeout;
+ long hcall_err;
+};
+
+/* End PFO specific data */
+
+enum vio_dev_family {
+ VDEVICE, /* The OF node is a child of /vdevice */
+ PFO, /* The OF node is a child of /ibm,platform-facilities */
+};
+
/**
* vio_dev - This structure is used to describe virtual I/O devices.
*
@@ -58,6 +100,7 @@ struct vio_dev {
const char *name;
const char *type;
uint32_t unit_address;
+ uint32_t resource_id;
unsigned int irq;
struct {
size_t desired;
@@ -65,6 +108,7 @@ struct vio_dev {
size_t allocated;
atomic_t allocs_failed;
} cmo;
+ enum vio_dev_family family;
struct device dev;
};
@@ -95,6 +139,8 @@ extern void vio_cmo_set_dev_desired(struct vio_dev *viodev, size_t desired);
extern void __devinit vio_unregister_device(struct vio_dev *dev);
+extern int vio_h_cop_sync(struct vio_dev *vdev, struct vio_pfo_op *op);
+
struct device_node;
extern struct vio_dev *vio_register_device_node(
diff --git a/arch/powerpc/include/asm/word-at-a-time.h b/arch/powerpc/include/asm/word-at-a-time.h
new file mode 100644
index 00000000000..d0b6d4ac6dd
--- /dev/null
+++ b/arch/powerpc/include/asm/word-at-a-time.h
@@ -0,0 +1,41 @@
+#ifndef _ASM_WORD_AT_A_TIME_H
+#define _ASM_WORD_AT_A_TIME_H
+
+/*
+ * Word-at-a-time interfaces for PowerPC.
+ */
+
+#include <linux/kernel.h>
+#include <asm/asm-compat.h>
+
+struct word_at_a_time {
+ const unsigned long high_bits, low_bits;
+};
+
+#define WORD_AT_A_TIME_CONSTANTS { REPEAT_BYTE(0xfe) + 1, REPEAT_BYTE(0x7f) }
+
+/* Bit set in the bytes that have a zero */
+static inline long prep_zero_mask(unsigned long val, unsigned long rhs, const struct word_at_a_time *c)
+{
+ unsigned long mask = (val & c->low_bits) + c->low_bits;
+ return ~(mask | rhs);
+}
+
+#define create_zero_mask(mask) (mask)
+
+static inline long find_zero(unsigned long mask)
+{
+ long leading_zero_bits;
+
+ asm (PPC_CNTLZL "%0,%1" : "=r" (leading_zero_bits) : "r" (mask));
+ return leading_zero_bits >> 3;
+}
+
+static inline bool has_zero(unsigned long val, unsigned long *data, const struct word_at_a_time *c)
+{
+ unsigned long rhs = val | c->low_bits;
+ *data = rhs;
+ return (val + c->high_bits) & ~rhs;
+}
+
+#endif /* _ASM_WORD_AT_A_TIME_H */
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index f5808a35688..83afacd3ba7 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -28,7 +28,7 @@ endif
obj-y := cputable.o ptrace.o syscalls.o \
irq.o align.o signal_32.o pmc.o vdso.o \
- init_task.o process.o systbl.o idle.o \
+ process.o systbl.o idle.o \
signal.o sysfs.o cacheinfo.o time.o \
prom.o traps.o setup-common.o \
udbg.o misc.o io.o dma.o \
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 34b8afe94a5..52c7ad78242 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -116,6 +116,9 @@ int main(void)
#ifdef CONFIG_KVM_BOOK3S_32_HANDLER
DEFINE(THREAD_KVM_SVCPU, offsetof(struct thread_struct, kvm_shadow_vcpu));
#endif
+#ifdef CONFIG_KVM_BOOKE_HV
+ DEFINE(THREAD_KVM_VCPU, offsetof(struct thread_struct, kvm_vcpu));
+#endif
DEFINE(TI_FLAGS, offsetof(struct thread_info, flags));
DEFINE(TI_LOCAL_FLAGS, offsetof(struct thread_info, local_flags));
@@ -188,10 +191,6 @@ int main(void)
DEFINE(SLBSHADOW_STACKESID,
offsetof(struct slb_shadow, save_area[SLB_NUM_BOLTED - 1].esid));
DEFINE(SLBSHADOW_SAVEAREA, offsetof(struct slb_shadow, save_area));
- DEFINE(LPPACASRR0, offsetof(struct lppaca, saved_srr0));
- DEFINE(LPPACASRR1, offsetof(struct lppaca, saved_srr1));
- DEFINE(LPPACAANYINT, offsetof(struct lppaca, int_dword.any_int));
- DEFINE(LPPACADECRINT, offsetof(struct lppaca, int_dword.fields.decr_int));
DEFINE(LPPACA_PMCINUSE, offsetof(struct lppaca, pmcregs_in_use));
DEFINE(LPPACA_DTLIDX, offsetof(struct lppaca, dtl_idx));
DEFINE(LPPACA_YIELDCOUNT, offsetof(struct lppaca, yield_count));
@@ -387,6 +386,7 @@ int main(void)
#ifdef CONFIG_KVM
DEFINE(VCPU_HOST_STACK, offsetof(struct kvm_vcpu, arch.host_stack));
DEFINE(VCPU_HOST_PID, offsetof(struct kvm_vcpu, arch.host_pid));
+ DEFINE(VCPU_GUEST_PID, offsetof(struct kvm_vcpu, arch.pid));
DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr));
DEFINE(VCPU_VRSAVE, offsetof(struct kvm_vcpu, arch.vrsave));
DEFINE(VCPU_FPRS, offsetof(struct kvm_vcpu, arch.fpr));
@@ -429,9 +429,11 @@ int main(void)
DEFINE(VCPU_SHARED_MAS4, offsetof(struct kvm_vcpu_arch_shared, mas4));
DEFINE(VCPU_SHARED_MAS6, offsetof(struct kvm_vcpu_arch_shared, mas6));
+ DEFINE(VCPU_KVM, offsetof(struct kvm_vcpu, kvm));
+ DEFINE(KVM_LPID, offsetof(struct kvm, arch.lpid));
+
/* book3s */
#ifdef CONFIG_KVM_BOOK3S_64_HV
- DEFINE(KVM_LPID, offsetof(struct kvm, arch.lpid));
DEFINE(KVM_SDR1, offsetof(struct kvm, arch.sdr1));
DEFINE(KVM_HOST_LPID, offsetof(struct kvm, arch.host_lpid));
DEFINE(KVM_HOST_LPCR, offsetof(struct kvm, arch.host_lpcr));
@@ -444,9 +446,9 @@ int main(void)
DEFINE(KVM_VRMA_SLB_V, offsetof(struct kvm, arch.vrma_slb_v));
DEFINE(VCPU_DSISR, offsetof(struct kvm_vcpu, arch.shregs.dsisr));
DEFINE(VCPU_DAR, offsetof(struct kvm_vcpu, arch.shregs.dar));
+ DEFINE(VCPU_VPA, offsetof(struct kvm_vcpu, arch.vpa.pinned_addr));
#endif
#ifdef CONFIG_PPC_BOOK3S
- DEFINE(VCPU_KVM, offsetof(struct kvm_vcpu, kvm));
DEFINE(VCPU_VCPUID, offsetof(struct kvm_vcpu, vcpu_id));
DEFINE(VCPU_PURR, offsetof(struct kvm_vcpu, arch.purr));
DEFINE(VCPU_SPURR, offsetof(struct kvm_vcpu, arch.spurr));
@@ -461,7 +463,6 @@ int main(void)
DEFINE(VCPU_PENDING_EXC, offsetof(struct kvm_vcpu, arch.pending_exceptions));
DEFINE(VCPU_CEDED, offsetof(struct kvm_vcpu, arch.ceded));
DEFINE(VCPU_PRODDED, offsetof(struct kvm_vcpu, arch.prodded));
- DEFINE(VCPU_VPA, offsetof(struct kvm_vcpu, arch.vpa));
DEFINE(VCPU_MMCR, offsetof(struct kvm_vcpu, arch.mmcr));
DEFINE(VCPU_PMC, offsetof(struct kvm_vcpu, arch.pmc));
DEFINE(VCPU_SLB, offsetof(struct kvm_vcpu, arch.slb));
@@ -537,6 +538,8 @@ int main(void)
HSTATE_FIELD(HSTATE_NAPPING, napping);
#ifdef CONFIG_KVM_BOOK3S_64_HV
+ HSTATE_FIELD(HSTATE_HWTHREAD_REQ, hwthread_req);
+ HSTATE_FIELD(HSTATE_HWTHREAD_STATE, hwthread_state);
HSTATE_FIELD(HSTATE_KVM_VCPU, kvm_vcpu);
HSTATE_FIELD(HSTATE_KVM_VCORE, kvm_vcore);
HSTATE_FIELD(HSTATE_XICS_PHYS, xics_phys);
@@ -597,6 +600,12 @@ int main(void)
DEFINE(VCPU_HOST_SPEFSCR, offsetof(struct kvm_vcpu, arch.host_spefscr));
#endif
+#ifdef CONFIG_KVM_BOOKE_HV
+ DEFINE(VCPU_HOST_MAS4, offsetof(struct kvm_vcpu, arch.host_mas4));
+ DEFINE(VCPU_HOST_MAS6, offsetof(struct kvm_vcpu, arch.host_mas6));
+ DEFINE(VCPU_EPLC, offsetof(struct kvm_vcpu, arch.eplc));
+#endif
+
#ifdef CONFIG_KVM_EXIT_TIMING
DEFINE(VCPU_TIMING_EXIT_TBU, offsetof(struct kvm_vcpu,
arch.timing_exit.tv32.tbu));
diff --git a/arch/powerpc/kernel/cpu_setup_fsl_booke.S b/arch/powerpc/kernel/cpu_setup_fsl_booke.S
index 8053db02b85..69fdd2322a6 100644
--- a/arch/powerpc/kernel/cpu_setup_fsl_booke.S
+++ b/arch/powerpc/kernel/cpu_setup_fsl_booke.S
@@ -73,6 +73,7 @@ _GLOBAL(__setup_cpu_e500v2)
mtlr r4
blr
_GLOBAL(__setup_cpu_e500mc)
+ mr r5, r4
mflr r4
bl __e500_icache_setup
bl __e500_dcache_setup
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index f8a7a1a1a9f..5971c85df13 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -63,15 +63,9 @@ system_call_common:
std r0,GPR0(r1)
std r10,GPR1(r1)
ACCOUNT_CPU_USER_ENTRY(r10, r11)
- /*
- * This "crclr so" clears CR0.SO, which is the error indication on
- * return from this system call. There must be no cmp instruction
- * between it and the "mfcr r9" below, otherwise if XER.SO is set,
- * CR0.SO will get set, causing all system calls to appear to fail.
- */
- crclr so
std r2,GPR2(r1)
std r3,GPR3(r1)
+ mfcr r2
std r4,GPR4(r1)
std r5,GPR5(r1)
std r6,GPR6(r1)
@@ -82,18 +76,20 @@ system_call_common:
std r11,GPR10(r1)
std r11,GPR11(r1)
std r11,GPR12(r1)
+ std r11,_XER(r1)
+ std r11,_CTR(r1)
std r9,GPR13(r1)
- mfcr r9
mflr r10
+ /*
+ * This clears CR0.SO (bit 28), which is the error indication on
+ * return from this system call.
+ */
+ rldimi r2,r11,28,(63-28)
li r11,0xc01
- std r9,_CCR(r1)
std r10,_LINK(r1)
std r11,_TRAP(r1)
- mfxer r9
- mfctr r10
- std r9,_XER(r1)
- std r10,_CTR(r1)
std r3,ORIG_GPR3(r1)
+ std r2,_CCR(r1)
ld r2,PACATOC(r13)
addi r9,r1,STACK_FRAME_OVERHEAD
ld r11,exception_marker@toc(r2)
@@ -154,7 +150,7 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR)
ld r10,TI_FLAGS(r11)
andi. r11,r10,_TIF_SYSCALL_T_OR_A
bne- syscall_dotrace
-syscall_dotrace_cont:
+.Lsyscall_dotrace_cont:
cmpldi 0,r0,NR_syscalls
bge- syscall_enosys
@@ -211,7 +207,7 @@ syscall_exit:
cmpld r3,r11
ld r5,_CCR(r1)
bge- syscall_error
-syscall_error_cont:
+.Lsyscall_error_cont:
ld r7,_NIP(r1)
BEGIN_FTR_SECTION
stdcx. r0,0,r1 /* to clear the reservation */
@@ -246,7 +242,7 @@ syscall_error:
oris r5,r5,0x1000 /* Set SO bit in CR */
neg r3,r3
std r5,_CCR(r1)
- b syscall_error_cont
+ b .Lsyscall_error_cont
/* Traced system call support */
syscall_dotrace:
@@ -268,7 +264,7 @@ syscall_dotrace:
addi r9,r1,STACK_FRAME_OVERHEAD
clrrdi r10,r1,THREAD_SHIFT
ld r10,TI_FLAGS(r10)
- b syscall_dotrace_cont
+ b .Lsyscall_dotrace_cont
syscall_enosys:
li r3,-ENOSYS
@@ -562,49 +558,72 @@ _GLOBAL(ret_from_except_lite)
mtmsrd r10,1 /* Update machine state */
#endif /* CONFIG_PPC_BOOK3E */
-#ifdef CONFIG_PREEMPT
clrrdi r9,r1,THREAD_SHIFT /* current_thread_info() */
- li r0,_TIF_NEED_RESCHED /* bits to check */
ld r3,_MSR(r1)
ld r4,TI_FLAGS(r9)
- /* Move MSR_PR bit in r3 to _TIF_SIGPENDING position in r0 */
- rlwimi r0,r3,32+TIF_SIGPENDING-MSR_PR_LG,_TIF_SIGPENDING
- and. r0,r4,r0 /* check NEED_RESCHED and maybe SIGPENDING */
- bne do_work
-
-#else /* !CONFIG_PREEMPT */
- ld r3,_MSR(r1) /* Returning to user mode? */
andi. r3,r3,MSR_PR
- beq restore /* if not, just restore regs and return */
+ beq resume_kernel
/* Check current_thread_info()->flags */
+ andi. r0,r4,_TIF_USER_WORK_MASK
+ beq restore
+
+ andi. r0,r4,_TIF_NEED_RESCHED
+ beq 1f
+ bl .restore_interrupts
+ bl .schedule
+ b .ret_from_except_lite
+
+1: bl .save_nvgprs
+ bl .restore_interrupts
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl .do_notify_resume
+ b .ret_from_except
+
+resume_kernel:
+#ifdef CONFIG_PREEMPT
+ /* Check if we need to preempt */
+ andi. r0,r4,_TIF_NEED_RESCHED
+ beq+ restore
+ /* Check that preempt_count() == 0 and interrupts are enabled */
+ lwz r8,TI_PREEMPT(r9)
+ cmpwi cr1,r8,0
+ ld r0,SOFTE(r1)
+ cmpdi r0,0
+ crandc eq,cr1*4+eq,eq
+ bne restore
+
+ /*
+ * Here we are preempting the current task. We want to make
+ * sure we are soft-disabled first
+ */
+ SOFT_DISABLE_INTS(r3,r4)
+1: bl .preempt_schedule_irq
+
+ /* Re-test flags and eventually loop */
clrrdi r9,r1,THREAD_SHIFT
ld r4,TI_FLAGS(r9)
- andi. r0,r4,_TIF_USER_WORK_MASK
- bne do_work
-#endif /* !CONFIG_PREEMPT */
+ andi. r0,r4,_TIF_NEED_RESCHED
+ bne 1b
+#endif /* CONFIG_PREEMPT */
.globl fast_exc_return_irq
fast_exc_return_irq:
restore:
/*
- * This is the main kernel exit path, we first check if we
- * have to change our interrupt state.
+ * This is the main kernel exit path. First we check if we
+ * are about to re-enable interrupts
*/
ld r5,SOFTE(r1)
lbz r6,PACASOFTIRQEN(r13)
- cmpwi cr1,r5,0
- cmpw cr0,r5,r6
- beq cr0,4f
+ cmpwi cr0,r5,0
+ beq restore_irq_off
- /* We do, handle disable first, which is easy */
- bne cr1,3f;
- li r0,0
- stb r0,PACASOFTIRQEN(r13);
- TRACE_DISABLE_INTS
- b 4f
+ /* We are enabling, were we already enabled ? Yes, just return */
+ cmpwi cr0,r6,1
+ beq cr0,do_restore
-3: /*
+ /*
* We are about to soft-enable interrupts (we are hard disabled
* at this point). We check if there's anything that needs to
* be replayed first.
@@ -626,7 +645,7 @@ restore_no_replay:
/*
* Final return path. BookE is handled in a different file
*/
-4:
+do_restore:
#ifdef CONFIG_PPC_BOOK3E
b .exception_return_book3e
#else
@@ -700,6 +719,25 @@ fast_exception_return:
#endif /* CONFIG_PPC_BOOK3E */
/*
+ * We are returning to a context with interrupts soft disabled.
+ *
+ * However, we may also about to hard enable, so we need to
+ * make sure that in this case, we also clear PACA_IRQ_HARD_DIS
+ * or that bit can get out of sync and bad things will happen
+ */
+restore_irq_off:
+ ld r3,_MSR(r1)
+ lbz r7,PACAIRQHAPPENED(r13)
+ andi. r0,r3,MSR_EE
+ beq 1f
+ rlwinm r7,r7,0,~PACA_IRQ_HARD_DIS
+ stb r7,PACAIRQHAPPENED(r13)
+1: li r0,0
+ stb r0,PACASOFTIRQEN(r13);
+ TRACE_DISABLE_INTS
+ b do_restore
+
+ /*
* Something did happen, check if a re-emit is needed
* (this also clears paca->irq_happened)
*/
@@ -748,65 +786,6 @@ restore_check_irq_replay:
#endif /* CONFIG_PPC_BOOK3E */
1: b .ret_from_except /* What else to do here ? */
-do_work:
-#ifdef CONFIG_PREEMPT
- andi. r0,r3,MSR_PR /* Returning to user mode? */
- bne user_work
- /* Check that preempt_count() == 0 and interrupts are enabled */
- lwz r8,TI_PREEMPT(r9)
- cmpwi cr1,r8,0
- ld r0,SOFTE(r1)
- cmpdi r0,0
- crandc eq,cr1*4+eq,eq
- bne restore
-
- /*
- * Here we are preempting the current task. We want to make
- * sure we are soft-disabled first
- */
- SOFT_DISABLE_INTS(r3,r4)
-1: bl .preempt_schedule_irq
-
- /* Hard-disable interrupts again (and update PACA) */
-#ifdef CONFIG_PPC_BOOK3E
- wrteei 0
-#else
- ld r10,PACAKMSR(r13) /* Get kernel MSR without EE */
- mtmsrd r10,1
-#endif /* CONFIG_PPC_BOOK3E */
- li r0,PACA_IRQ_HARD_DIS
- stb r0,PACAIRQHAPPENED(r13)
-
- /* Re-test flags and eventually loop */
- clrrdi r9,r1,THREAD_SHIFT
- ld r4,TI_FLAGS(r9)
- andi. r0,r4,_TIF_NEED_RESCHED
- bne 1b
- b restore
-
-user_work:
-#endif /* CONFIG_PREEMPT */
-
- /* Enable interrupts */
-#ifdef CONFIG_PPC_BOOK3E
- wrteei 1
-#else
- ori r10,r10,MSR_EE
- mtmsrd r10,1
-#endif /* CONFIG_PPC_BOOK3E */
-
- andi. r0,r4,_TIF_NEED_RESCHED
- beq 1f
- bl .restore_interrupts
- bl .schedule
- b .ret_from_except_lite
-
-1: bl .save_nvgprs
- bl .restore_interrupts
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl .do_notify_resume
- b .ret_from_except
-
unrecov_restore:
addi r3,r1,STACK_FRAME_OVERHEAD
bl .unrecoverable_exception
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index cb705fdbb45..1c06d297154 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -63,11 +63,13 @@ BEGIN_FTR_SECTION
GET_PACA(r13)
#ifdef CONFIG_KVM_BOOK3S_64_HV
- lbz r0,PACAPROCSTART(r13)
- cmpwi r0,0x80
- bne 1f
- li r0,1
- stb r0,PACAPROCSTART(r13)
+ li r0,KVM_HWTHREAD_IN_KERNEL
+ stb r0,HSTATE_HWTHREAD_STATE(r13)
+ /* Order setting hwthread_state vs. testing hwthread_req */
+ sync
+ lbz r0,HSTATE_HWTHREAD_REQ(r13)
+ cmpwi r0,0
+ beq 1f
b kvm_start_guest
1:
#endif
@@ -94,12 +96,10 @@ machine_check_pSeries_1:
data_access_pSeries:
HMT_MEDIUM
SET_SCRATCH0(r13)
-#ifndef CONFIG_POWER4_ONLY
BEGIN_FTR_SECTION
b data_access_check_stab
data_access_not_stab:
END_MMU_FTR_SECTION_IFCLR(MMU_FTR_SLB)
-#endif
EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, data_access_common, EXC_STD,
KVMTEST, 0x300)
@@ -301,7 +301,6 @@ machine_check_fwnmi:
EXC_STD, KVMTEST, 0x200)
KVM_HANDLER_SKIP(PACA_EXMC, EXC_STD, 0x200)
-#ifndef CONFIG_POWER4_ONLY
/* moved from 0x300 */
data_access_check_stab:
GET_PACA(r13)
@@ -328,7 +327,6 @@ do_stab_bolted_pSeries:
GET_SCRATCH0(r10)
std r10,PACA_EXSLB+EX_R13(r13)
EXCEPTION_PROLOG_PSERIES_1(.do_stab_bolted, EXC_STD)
-#endif /* CONFIG_POWER4_ONLY */
KVM_HANDLER_SKIP(PACA_EXGEN, EXC_STD, 0x300)
KVM_HANDLER_SKIP(PACA_EXSLB, EXC_STD, 0x380)
@@ -768,8 +766,8 @@ alignment_common:
std r3,_DAR(r1)
std r4,_DSISR(r1)
bl .save_nvgprs
+ DISABLE_INTS
addi r3,r1,STACK_FRAME_OVERHEAD
- ENABLE_INTS
bl .alignment_exception
b .ret_from_except
diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S
index 7dd2981bcc5..7a2e5e421ab 100644
--- a/arch/powerpc/kernel/head_44x.S
+++ b/arch/powerpc/kernel/head_44x.S
@@ -248,10 +248,11 @@ _ENTRY(_start);
interrupt_base:
/* Critical Input Interrupt */
- CRITICAL_EXCEPTION(0x0100, CriticalInput, unknown_exception)
+ CRITICAL_EXCEPTION(0x0100, CRITICAL, CriticalInput, unknown_exception)
/* Machine Check Interrupt */
- CRITICAL_EXCEPTION(0x0200, MachineCheck, machine_check_exception)
+ CRITICAL_EXCEPTION(0x0200, MACHINE_CHECK, MachineCheck, \
+ machine_check_exception)
MCHECK_EXCEPTION(0x0210, MachineCheckA, machine_check_exception)
/* Data Storage Interrupt */
@@ -261,7 +262,8 @@ interrupt_base:
INSTRUCTION_STORAGE_EXCEPTION
/* External Input Interrupt */
- EXCEPTION(0x0500, ExternalInput, do_IRQ, EXC_XFER_LITE)
+ EXCEPTION(0x0500, BOOKE_INTERRUPT_EXTERNAL, ExternalInput, \
+ do_IRQ, EXC_XFER_LITE)
/* Alignment Interrupt */
ALIGNMENT_EXCEPTION
@@ -273,29 +275,32 @@ interrupt_base:
#ifdef CONFIG_PPC_FPU
FP_UNAVAILABLE_EXCEPTION
#else
- EXCEPTION(0x2010, FloatingPointUnavailable, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x2010, BOOKE_INTERRUPT_FP_UNAVAIL, \
+ FloatingPointUnavailable, unknown_exception, EXC_XFER_EE)
#endif
/* System Call Interrupt */
START_EXCEPTION(SystemCall)
- NORMAL_EXCEPTION_PROLOG
+ NORMAL_EXCEPTION_PROLOG(BOOKE_INTERRUPT_SYSCALL)
EXC_XFER_EE_LITE(0x0c00, DoSyscall)
/* Auxiliary Processor Unavailable Interrupt */
- EXCEPTION(0x2020, AuxillaryProcessorUnavailable, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x2020, BOOKE_INTERRUPT_AP_UNAVAIL, \
+ AuxillaryProcessorUnavailable, unknown_exception, EXC_XFER_EE)
/* Decrementer Interrupt */
DECREMENTER_EXCEPTION
/* Fixed Internal Timer Interrupt */
/* TODO: Add FIT support */
- EXCEPTION(0x1010, FixedIntervalTimer, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x1010, BOOKE_INTERRUPT_FIT, FixedIntervalTimer, \
+ unknown_exception, EXC_XFER_EE)
/* Watchdog Timer Interrupt */
/* TODO: Add watchdog support */
#ifdef CONFIG_BOOKE_WDT
- CRITICAL_EXCEPTION(0x1020, WatchdogTimer, WatchdogException)
+ CRITICAL_EXCEPTION(0x1020, WATCHDOG, WatchdogTimer, WatchdogException)
#else
- CRITICAL_EXCEPTION(0x1020, WatchdogTimer, unknown_exception)
+ CRITICAL_EXCEPTION(0x1020, WATCHDOG, WatchdogTimer, unknown_exception)
#endif
/* Data TLB Error Interrupt */
@@ -778,14 +783,6 @@ _GLOBAL(__fixup_440A_mcheck)
blr
/*
- * extern void giveup_altivec(struct task_struct *prev)
- *
- * The 44x core does not have an AltiVec unit.
- */
-_GLOBAL(giveup_altivec)
- blr
-
-/*
* extern void giveup_fpu(struct task_struct *prev)
*
* The 44x core does not have an FPU.
diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h
index 0e4175388f4..5f051eeb93a 100644
--- a/arch/powerpc/kernel/head_booke.h
+++ b/arch/powerpc/kernel/head_booke.h
@@ -2,6 +2,9 @@
#define __HEAD_BOOKE_H__
#include <asm/ptrace.h> /* for STACK_FRAME_REGS_MARKER */
+#include <asm/kvm_asm.h>
+#include <asm/kvm_booke_hv_asm.h>
+
/*
* Macros used for common Book-e exception handling
*/
@@ -28,14 +31,15 @@
*/
#define THREAD_NORMSAVE(offset) (THREAD_NORMSAVES + (offset * 4))
-#define NORMAL_EXCEPTION_PROLOG \
+#define NORMAL_EXCEPTION_PROLOG(intno) \
mtspr SPRN_SPRG_WSCRATCH0, r10; /* save one register */ \
mfspr r10, SPRN_SPRG_THREAD; \
stw r11, THREAD_NORMSAVE(0)(r10); \
stw r13, THREAD_NORMSAVE(2)(r10); \
mfcr r13; /* save CR in r13 for now */\
- mfspr r11,SPRN_SRR1; /* check whether user or kernel */\
- andi. r11,r11,MSR_PR; \
+ mfspr r11, SPRN_SRR1; \
+ DO_KVM BOOKE_INTERRUPT_##intno SPRN_SRR1; \
+ andi. r11, r11, MSR_PR; /* check whether user or kernel */\
mr r11, r1; \
beq 1f; \
/* if from user, start at top of this thread's kernel stack */ \
@@ -113,7 +117,7 @@
* registers as the normal prolog above. Instead we use a portion of the
* critical/machine check exception stack at low physical addresses.
*/
-#define EXC_LEVEL_EXCEPTION_PROLOG(exc_level, exc_level_srr0, exc_level_srr1) \
+#define EXC_LEVEL_EXCEPTION_PROLOG(exc_level, intno, exc_level_srr0, exc_level_srr1) \
mtspr SPRN_SPRG_WSCRATCH_##exc_level,r8; \
BOOKE_LOAD_EXC_LEVEL_STACK(exc_level);/* r8 points to the exc_level stack*/ \
stw r9,GPR9(r8); /* save various registers */\
@@ -121,8 +125,9 @@
stw r10,GPR10(r8); \
stw r11,GPR11(r8); \
stw r9,_CCR(r8); /* save CR on stack */\
- mfspr r10,exc_level_srr1; /* check whether user or kernel */\
- andi. r10,r10,MSR_PR; \
+ mfspr r11,exc_level_srr1; /* check whether user or kernel */\
+ DO_KVM BOOKE_INTERRUPT_##intno exc_level_srr1; \
+ andi. r11,r11,MSR_PR; \
mfspr r11,SPRN_SPRG_THREAD; /* if from user, start at top of */\
lwz r11,THREAD_INFO-THREAD(r11); /* this thread's kernel stack */\
addi r11,r11,EXC_LVL_FRAME_OVERHEAD; /* allocate stack frame */\
@@ -162,12 +167,30 @@
SAVE_4GPRS(3, r11); \
SAVE_2GPRS(7, r11)
-#define CRITICAL_EXCEPTION_PROLOG \
- EXC_LEVEL_EXCEPTION_PROLOG(CRIT, SPRN_CSRR0, SPRN_CSRR1)
+#define CRITICAL_EXCEPTION_PROLOG(intno) \
+ EXC_LEVEL_EXCEPTION_PROLOG(CRIT, intno, SPRN_CSRR0, SPRN_CSRR1)
#define DEBUG_EXCEPTION_PROLOG \
- EXC_LEVEL_EXCEPTION_PROLOG(DBG, SPRN_DSRR0, SPRN_DSRR1)
+ EXC_LEVEL_EXCEPTION_PROLOG(DBG, DEBUG, SPRN_DSRR0, SPRN_DSRR1)
#define MCHECK_EXCEPTION_PROLOG \
- EXC_LEVEL_EXCEPTION_PROLOG(MC, SPRN_MCSRR0, SPRN_MCSRR1)
+ EXC_LEVEL_EXCEPTION_PROLOG(MC, MACHINE_CHECK, \
+ SPRN_MCSRR0, SPRN_MCSRR1)
+
+/*
+ * Guest Doorbell -- this is a bit odd in that uses GSRR0/1 despite
+ * being delivered to the host. This exception can only happen
+ * inside a KVM guest -- so we just handle up to the DO_KVM rather
+ * than try to fit this into one of the existing prolog macros.
+ */
+#define GUEST_DOORBELL_EXCEPTION \
+ START_EXCEPTION(GuestDoorbell); \
+ mtspr SPRN_SPRG_WSCRATCH0, r10; /* save one register */ \
+ mfspr r10, SPRN_SPRG_THREAD; \
+ stw r11, THREAD_NORMSAVE(0)(r10); \
+ mfspr r11, SPRN_SRR1; \
+ stw r13, THREAD_NORMSAVE(2)(r10); \
+ mfcr r13; /* save CR in r13 for now */\
+ DO_KVM BOOKE_INTERRUPT_GUEST_DBELL SPRN_GSRR1; \
+ trap
/*
* Exception vectors.
@@ -181,16 +204,16 @@ label:
.long func; \
.long ret_from_except_full
-#define EXCEPTION(n, label, hdlr, xfer) \
+#define EXCEPTION(n, intno, label, hdlr, xfer) \
START_EXCEPTION(label); \
- NORMAL_EXCEPTION_PROLOG; \
+ NORMAL_EXCEPTION_PROLOG(intno); \
addi r3,r1,STACK_FRAME_OVERHEAD; \
xfer(n, hdlr)
-#define CRITICAL_EXCEPTION(n, label, hdlr) \
- START_EXCEPTION(label); \
- CRITICAL_EXCEPTION_PROLOG; \
- addi r3,r1,STACK_FRAME_OVERHEAD; \
+#define CRITICAL_EXCEPTION(n, intno, label, hdlr) \
+ START_EXCEPTION(label); \
+ CRITICAL_EXCEPTION_PROLOG(intno); \
+ addi r3,r1,STACK_FRAME_OVERHEAD; \
EXC_XFER_TEMPLATE(hdlr, n+2, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \
NOCOPY, crit_transfer_to_handler, \
ret_from_crit_exc)
@@ -302,7 +325,7 @@ label:
#define DEBUG_CRIT_EXCEPTION \
START_EXCEPTION(DebugCrit); \
- CRITICAL_EXCEPTION_PROLOG; \
+ CRITICAL_EXCEPTION_PROLOG(DEBUG); \
\
/* \
* If there is a single step or branch-taken exception in an \
@@ -355,7 +378,7 @@ label:
#define DATA_STORAGE_EXCEPTION \
START_EXCEPTION(DataStorage) \
- NORMAL_EXCEPTION_PROLOG; \
+ NORMAL_EXCEPTION_PROLOG(DATA_STORAGE); \
mfspr r5,SPRN_ESR; /* Grab the ESR and save it */ \
stw r5,_ESR(r11); \
mfspr r4,SPRN_DEAR; /* Grab the DEAR */ \
@@ -363,7 +386,7 @@ label:
#define INSTRUCTION_STORAGE_EXCEPTION \
START_EXCEPTION(InstructionStorage) \
- NORMAL_EXCEPTION_PROLOG; \
+ NORMAL_EXCEPTION_PROLOG(INST_STORAGE); \
mfspr r5,SPRN_ESR; /* Grab the ESR and save it */ \
stw r5,_ESR(r11); \
mr r4,r12; /* Pass SRR0 as arg2 */ \
@@ -372,7 +395,7 @@ label:
#define ALIGNMENT_EXCEPTION \
START_EXCEPTION(Alignment) \
- NORMAL_EXCEPTION_PROLOG; \
+ NORMAL_EXCEPTION_PROLOG(ALIGNMENT); \
mfspr r4,SPRN_DEAR; /* Grab the DEAR and save it */ \
stw r4,_DEAR(r11); \
addi r3,r1,STACK_FRAME_OVERHEAD; \
@@ -380,7 +403,7 @@ label:
#define PROGRAM_EXCEPTION \
START_EXCEPTION(Program) \
- NORMAL_EXCEPTION_PROLOG; \
+ NORMAL_EXCEPTION_PROLOG(PROGRAM); \
mfspr r4,SPRN_ESR; /* Grab the ESR and save it */ \
stw r4,_ESR(r11); \
addi r3,r1,STACK_FRAME_OVERHEAD; \
@@ -388,7 +411,7 @@ label:
#define DECREMENTER_EXCEPTION \
START_EXCEPTION(Decrementer) \
- NORMAL_EXCEPTION_PROLOG; \
+ NORMAL_EXCEPTION_PROLOG(DECREMENTER); \
lis r0,TSR_DIS@h; /* Setup the DEC interrupt mask */ \
mtspr SPRN_TSR,r0; /* Clear the DEC interrupt */ \
addi r3,r1,STACK_FRAME_OVERHEAD; \
@@ -396,7 +419,7 @@ label:
#define FP_UNAVAILABLE_EXCEPTION \
START_EXCEPTION(FloatingPointUnavailable) \
- NORMAL_EXCEPTION_PROLOG; \
+ NORMAL_EXCEPTION_PROLOG(FP_UNAVAIL); \
beq 1f; \
bl load_up_fpu; /* if from user, just load it up */ \
b fast_exception_return; \
diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S
index 28e62598d0e..1f4434a3860 100644
--- a/arch/powerpc/kernel/head_fsl_booke.S
+++ b/arch/powerpc/kernel/head_fsl_booke.S
@@ -301,19 +301,20 @@ _ENTRY(__early_start)
interrupt_base:
/* Critical Input Interrupt */
- CRITICAL_EXCEPTION(0x0100, CriticalInput, unknown_exception)
+ CRITICAL_EXCEPTION(0x0100, CRITICAL, CriticalInput, unknown_exception)
/* Machine Check Interrupt */
#ifdef CONFIG_E200
/* no RFMCI, MCSRRs on E200 */
- CRITICAL_EXCEPTION(0x0200, MachineCheck, machine_check_exception)
+ CRITICAL_EXCEPTION(0x0200, MACHINE_CHECK, MachineCheck, \
+ machine_check_exception)
#else
MCHECK_EXCEPTION(0x0200, MachineCheck, machine_check_exception)
#endif
/* Data Storage Interrupt */
START_EXCEPTION(DataStorage)
- NORMAL_EXCEPTION_PROLOG
+ NORMAL_EXCEPTION_PROLOG(DATA_STORAGE)
mfspr r5,SPRN_ESR /* Grab the ESR, save it, pass arg3 */
stw r5,_ESR(r11)
mfspr r4,SPRN_DEAR /* Grab the DEAR, save it, pass arg2 */
@@ -328,7 +329,7 @@ interrupt_base:
INSTRUCTION_STORAGE_EXCEPTION
/* External Input Interrupt */
- EXCEPTION(0x0500, ExternalInput, do_IRQ, EXC_XFER_LITE)
+ EXCEPTION(0x0500, EXTERNAL, ExternalInput, do_IRQ, EXC_XFER_LITE)
/* Alignment Interrupt */
ALIGNMENT_EXCEPTION
@@ -342,32 +343,36 @@ interrupt_base:
#else
#ifdef CONFIG_E200
/* E200 treats 'normal' floating point instructions as FP Unavail exception */
- EXCEPTION(0x0800, FloatingPointUnavailable, program_check_exception, EXC_XFER_EE)
+ EXCEPTION(0x0800, FP_UNAVAIL, FloatingPointUnavailable, \
+ program_check_exception, EXC_XFER_EE)
#else
- EXCEPTION(0x0800, FloatingPointUnavailable, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x0800, FP_UNAVAIL, FloatingPointUnavailable, \
+ unknown_exception, EXC_XFER_EE)
#endif
#endif
/* System Call Interrupt */
START_EXCEPTION(SystemCall)
- NORMAL_EXCEPTION_PROLOG
+ NORMAL_EXCEPTION_PROLOG(SYSCALL)
EXC_XFER_EE_LITE(0x0c00, DoSyscall)
/* Auxiliary Processor Unavailable Interrupt */
- EXCEPTION(0x2900, AuxillaryProcessorUnavailable, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x2900, AP_UNAVAIL, AuxillaryProcessorUnavailable, \
+ unknown_exception, EXC_XFER_EE)
/* Decrementer Interrupt */
DECREMENTER_EXCEPTION
/* Fixed Internal Timer Interrupt */
/* TODO: Add FIT support */
- EXCEPTION(0x3100, FixedIntervalTimer, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x3100, FIT, FixedIntervalTimer, \
+ unknown_exception, EXC_XFER_EE)
/* Watchdog Timer Interrupt */
#ifdef CONFIG_BOOKE_WDT
- CRITICAL_EXCEPTION(0x3200, WatchdogTimer, WatchdogException)
+ CRITICAL_EXCEPTION(0x3200, WATCHDOG, WatchdogTimer, WatchdogException)
#else
- CRITICAL_EXCEPTION(0x3200, WatchdogTimer, unknown_exception)
+ CRITICAL_EXCEPTION(0x3200, WATCHDOG, WatchdogTimer, unknown_exception)
#endif
/* Data TLB Error Interrupt */
@@ -375,10 +380,16 @@ interrupt_base:
mtspr SPRN_SPRG_WSCRATCH0, r10 /* Save some working registers */
mfspr r10, SPRN_SPRG_THREAD
stw r11, THREAD_NORMSAVE(0)(r10)
+#ifdef CONFIG_KVM_BOOKE_HV
+BEGIN_FTR_SECTION
+ mfspr r11, SPRN_SRR1
+END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
+#endif
stw r12, THREAD_NORMSAVE(1)(r10)
stw r13, THREAD_NORMSAVE(2)(r10)
mfcr r13
stw r13, THREAD_NORMSAVE(3)(r10)
+ DO_KVM BOOKE_INTERRUPT_DTLB_MISS SPRN_SRR1
mfspr r10, SPRN_DEAR /* Get faulting address */
/* If we are faulting a kernel address, we have to use the
@@ -463,10 +474,16 @@ interrupt_base:
mtspr SPRN_SPRG_WSCRATCH0, r10 /* Save some working registers */
mfspr r10, SPRN_SPRG_THREAD
stw r11, THREAD_NORMSAVE(0)(r10)
+#ifdef CONFIG_KVM_BOOKE_HV
+BEGIN_FTR_SECTION
+ mfspr r11, SPRN_SRR1
+END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
+#endif
stw r12, THREAD_NORMSAVE(1)(r10)
stw r13, THREAD_NORMSAVE(2)(r10)
mfcr r13
stw r13, THREAD_NORMSAVE(3)(r10)
+ DO_KVM BOOKE_INTERRUPT_ITLB_MISS SPRN_SRR1
mfspr r10, SPRN_SRR0 /* Get faulting address */
/* If we are faulting a kernel address, we have to use the
@@ -538,36 +555,54 @@ interrupt_base:
#ifdef CONFIG_SPE
/* SPE Unavailable */
START_EXCEPTION(SPEUnavailable)
- NORMAL_EXCEPTION_PROLOG
+ NORMAL_EXCEPTION_PROLOG(SPE_UNAVAIL)
bne load_up_spe
addi r3,r1,STACK_FRAME_OVERHEAD
EXC_XFER_EE_LITE(0x2010, KernelSPE)
#else
- EXCEPTION(0x2020, SPEUnavailable, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x2020, SPE_UNAVAIL, SPEUnavailable, \
+ unknown_exception, EXC_XFER_EE)
#endif /* CONFIG_SPE */
/* SPE Floating Point Data */
#ifdef CONFIG_SPE
- EXCEPTION(0x2030, SPEFloatingPointData, SPEFloatingPointException, EXC_XFER_EE);
+ EXCEPTION(0x2030, SPE_FP_DATA, SPEFloatingPointData, \
+ SPEFloatingPointException, EXC_XFER_EE);
/* SPE Floating Point Round */
- EXCEPTION(0x2050, SPEFloatingPointRound, SPEFloatingPointRoundException, EXC_XFER_EE)
+ EXCEPTION(0x2050, SPE_FP_ROUND, SPEFloatingPointRound, \
+ SPEFloatingPointRoundException, EXC_XFER_EE)
#else
- EXCEPTION(0x2040, SPEFloatingPointData, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x2050, SPEFloatingPointRound, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x2040, SPE_FP_DATA, SPEFloatingPointData, \
+ unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x2050, SPE_FP_ROUND, SPEFloatingPointRound, \
+ unknown_exception, EXC_XFER_EE)
#endif /* CONFIG_SPE */
/* Performance Monitor */
- EXCEPTION(0x2060, PerformanceMonitor, performance_monitor_exception, EXC_XFER_STD)
+ EXCEPTION(0x2060, PERFORMANCE_MONITOR, PerformanceMonitor, \
+ performance_monitor_exception, EXC_XFER_STD)
- EXCEPTION(0x2070, Doorbell, doorbell_exception, EXC_XFER_STD)
+ EXCEPTION(0x2070, DOORBELL, Doorbell, doorbell_exception, EXC_XFER_STD)
- CRITICAL_EXCEPTION(0x2080, CriticalDoorbell, unknown_exception)
+ CRITICAL_EXCEPTION(0x2080, DOORBELL_CRITICAL, \
+ CriticalDoorbell, unknown_exception)
/* Debug Interrupt */
DEBUG_DEBUG_EXCEPTION
DEBUG_CRIT_EXCEPTION
+ GUEST_DOORBELL_EXCEPTION
+
+ CRITICAL_EXCEPTION(0, GUEST_DBELL_CRIT, CriticalGuestDoorbell, \
+ unknown_exception)
+
+ /* Hypercall */
+ EXCEPTION(0, HV_SYSCALL, Hypercall, unknown_exception, EXC_XFER_EE)
+
+ /* Embedded Hypervisor Privilege */
+ EXCEPTION(0, HV_PRIV, Ehvpriv, unknown_exception, EXC_XFER_EE)
+
/*
* Local functions
*/
@@ -871,16 +906,31 @@ _GLOBAL(__setup_e500mc_ivors)
mtspr SPRN_IVOR36,r3
li r3,CriticalDoorbell@l
mtspr SPRN_IVOR37,r3
- sync
- blr
-/*
- * extern void giveup_altivec(struct task_struct *prev)
- *
- * The e500 core does not have an AltiVec unit.
- */
-_GLOBAL(giveup_altivec)
+ /*
+ * We only want to touch IVOR38-41 if we're running on hardware
+ * that supports category E.HV. The architectural way to determine
+ * this is MMUCFG[LPIDSIZE].
+ */
+ mfspr r3, SPRN_MMUCFG
+ andis. r3, r3, MMUCFG_LPIDSIZE@h
+ beq no_hv
+ li r3,GuestDoorbell@l
+ mtspr SPRN_IVOR38,r3
+ li r3,CriticalGuestDoorbell@l
+ mtspr SPRN_IVOR39,r3
+ li r3,Hypercall@l
+ mtspr SPRN_IVOR40,r3
+ li r3,Ehvpriv@l
+ mtspr SPRN_IVOR41,r3
+skip_hv_ivors:
+ sync
blr
+no_hv:
+ lwz r3, CPU_SPEC_FEATURES(r5)
+ rlwinm r3, r3, 0, ~CPU_FTR_EMB_HV
+ stw r3, CPU_SPEC_FEATURES(r5)
+ b skip_hv_ivors
#ifdef CONFIG_SPE
/*
diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c
index 6d2209ac0c4..2099d9a879e 100644
--- a/arch/powerpc/kernel/idle.c
+++ b/arch/powerpc/kernel/idle.c
@@ -113,29 +113,6 @@ void cpu_idle(void)
}
}
-
-/*
- * cpu_idle_wait - Used to ensure that all the CPUs come out of the old
- * idle loop and start using the new idle loop.
- * Required while changing idle handler on SMP systems.
- * Caller must have changed idle handler to the new value before the call.
- * This window may be larger on shared systems.
- */
-void cpu_idle_wait(void)
-{
- int cpu;
- smp_mb();
-
- /* kick all the CPUs so that they exit out of old idle routine */
- get_online_cpus();
- for_each_online_cpu(cpu) {
- if (cpu != smp_processor_id())
- smp_send_reschedule(cpu);
- }
- put_online_cpus();
-}
-EXPORT_SYMBOL_GPL(cpu_idle_wait);
-
int powersave_nap;
#ifdef CONFIG_SYSCTL
diff --git a/arch/powerpc/kernel/idle_power7.S b/arch/powerpc/kernel/idle_power7.S
index 0cdc9a39283..7140d838339 100644
--- a/arch/powerpc/kernel/idle_power7.S
+++ b/arch/powerpc/kernel/idle_power7.S
@@ -16,6 +16,7 @@
#include <asm/asm-offsets.h>
#include <asm/ppc-opcode.h>
#include <asm/hw_irq.h>
+#include <asm/kvm_book3s_asm.h>
#undef DEBUG
@@ -81,6 +82,12 @@ _GLOBAL(power7_idle)
std r9,_MSR(r1)
std r1,PACAR1(r13)
+#ifdef CONFIG_KVM_BOOK3S_64_HV
+ /* Tell KVM we're napping */
+ li r4,KVM_HWTHREAD_IN_NAP
+ stb r4,HSTATE_HWTHREAD_STATE(r13)
+#endif
+
/* Magic NAP mode enter sequence */
std r0,0(r1)
ptesync
diff --git a/arch/powerpc/kernel/init_task.c b/arch/powerpc/kernel/init_task.c
deleted file mode 100644
index d076d465dbd..00000000000
--- a/arch/powerpc/kernel/init_task.c
+++ /dev/null
@@ -1,29 +0,0 @@
-#include <linux/mm.h>
-#include <linux/export.h>
-#include <linux/sched.h>
-#include <linux/init.h>
-#include <linux/init_task.h>
-#include <linux/fs.h>
-#include <linux/mqueue.h>
-#include <asm/uaccess.h>
-
-static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
-static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
-/*
- * Initial thread structure.
- *
- * We need to make sure that this is 16384-byte aligned due to the
- * way process stacks are handled. This is done by having a special
- * "init_task" linker map entry..
- */
-union thread_union init_thread_union __init_task_data =
- { INIT_THREAD_INFO(init_task) };
-
-/*
- * Initial task structure.
- *
- * All other task structs will be allocated on slabs in fork.c
- */
-struct task_struct init_task = INIT_TASK(init_task);
-
-EXPORT_SYMBOL(init_task);
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 5ec1b2354ca..1b415027ec0 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -229,6 +229,19 @@ notrace void arch_local_irq_restore(unsigned long en)
*/
if (unlikely(irq_happened != PACA_IRQ_HARD_DIS))
__hard_irq_disable();
+#ifdef CONFIG_TRACE_IRQFLAG
+ else {
+ /*
+ * We should already be hard disabled here. We had bugs
+ * where that wasn't the case so let's dbl check it and
+ * warn if we are wrong. Only do that when IRQ tracing
+ * is enabled as mfmsr() can be costly.
+ */
+ if (WARN_ON(mfmsr() & MSR_EE))
+ __hard_irq_disable();
+ }
+#endif /* CONFIG_TRACE_IRQFLAG */
+
set_soft_enabled(0);
/*
@@ -260,11 +273,17 @@ EXPORT_SYMBOL(arch_local_irq_restore);
* if they are currently disabled. This is typically called before
* schedule() or do_signal() when returning to userspace. We do it
* in C to avoid the burden of dealing with lockdep etc...
+ *
+ * NOTE: This is called with interrupts hard disabled but not marked
+ * as such in paca->irq_happened, so we need to resync this.
*/
-void restore_interrupts(void)
+void notrace restore_interrupts(void)
{
- if (irqs_disabled())
+ if (irqs_disabled()) {
+ local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
local_irq_enable();
+ } else
+ __hard_irq_enable();
}
#endif /* CONFIG_PPC64 */
@@ -330,14 +349,10 @@ void migrate_irqs(void)
alloc_cpumask_var(&mask, GFP_KERNEL);
- for_each_irq(irq) {
+ for_each_irq_desc(irq, desc) {
struct irq_data *data;
struct irq_chip *chip;
- desc = irq_to_desc(irq);
- if (!desc)
- continue;
-
data = irq_desc_get_irq_data(desc);
if (irqd_is_per_cpu(data))
continue;
@@ -572,7 +587,7 @@ int irq_choose_cpu(const struct cpumask *mask)
{
int cpuid;
- if (cpumask_equal(mask, cpu_all_mask)) {
+ if (cpumask_equal(mask, cpu_online_mask)) {
static int irq_rover;
static DEFINE_RAW_SPINLOCK(irq_rover_lock);
unsigned long flags;
diff --git a/arch/powerpc/kernel/machine_kexec.c b/arch/powerpc/kernel/machine_kexec.c
index c957b1202bd..5df77779440 100644
--- a/arch/powerpc/kernel/machine_kexec.c
+++ b/arch/powerpc/kernel/machine_kexec.c
@@ -23,14 +23,11 @@
void machine_kexec_mask_interrupts(void) {
unsigned int i;
+ struct irq_desc *desc;
- for_each_irq(i) {
- struct irq_desc *desc = irq_to_desc(i);
+ for_each_irq_desc(i, desc) {
struct irq_chip *chip;
- if (!desc)
- continue;
-
chip = irq_desc_get_chip(desc);
if (!chip)
continue;
diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
index 7cd07b42ca1..386d57f66f2 100644
--- a/arch/powerpc/kernel/misc_32.S
+++ b/arch/powerpc/kernel/misc_32.S
@@ -738,8 +738,23 @@ relocate_new_kernel:
mr r5, r31
li r0, 0
-#elif defined(CONFIG_44x) && !defined(CONFIG_PPC_47x)
+#elif defined(CONFIG_44x)
+ /* Save our parameters */
+ mr r29, r3
+ mr r30, r4
+ mr r31, r5
+
+#ifdef CONFIG_PPC_47x
+ /* Check for 47x cores */
+ mfspr r3,SPRN_PVR
+ srwi r3,r3,16
+ cmplwi cr0,r3,PVR_476@h
+ beq setup_map_47x
+ cmplwi cr0,r3,PVR_476_ISS@h
+ beq setup_map_47x
+#endif /* CONFIG_PPC_47x */
+
/*
* Code for setting up 1:1 mapping for PPC440x for KEXEC
*
@@ -753,16 +768,15 @@ relocate_new_kernel:
* 5) Invalidate the tmp mapping.
*
* - Based on the kexec support code for FSL BookE
- * - Doesn't support 47x yet.
*
*/
- /* Save our parameters */
- mr r29, r3
- mr r30, r4
- mr r31, r5
- /* Load our MSR_IS and TID to MMUCR for TLB search */
- mfspr r3,SPRN_PID
+ /*
+ * Load the PID with kernel PID (0).
+ * Also load our MSR_IS and TID to MMUCR for TLB search.
+ */
+ li r3, 0
+ mtspr SPRN_PID, r3
mfmsr r4
andi. r4,r4,MSR_IS@l
beq wmmucr
@@ -900,6 +914,179 @@ next_tlb:
li r3, 0
tlbwe r3, r24, PPC44x_TLB_PAGEID
sync
+ b ppc44x_map_done
+
+#ifdef CONFIG_PPC_47x
+
+ /* 1:1 mapping for 47x */
+
+setup_map_47x:
+
+ /*
+ * Load the kernel pid (0) to PID and also to MMUCR[TID].
+ * Also set the MSR IS->MMUCR STS
+ */
+ li r3, 0
+ mtspr SPRN_PID, r3 /* Set PID */
+ mfmsr r4 /* Get MSR */
+ andi. r4, r4, MSR_IS@l /* TS=1? */
+ beq 1f /* If not, leave STS=0 */
+ oris r3, r3, PPC47x_MMUCR_STS@h /* Set STS=1 */
+1: mtspr SPRN_MMUCR, r3 /* Put MMUCR */
+ sync
+
+ /* Find the entry we are running from */
+ bl 2f
+2: mflr r23
+ tlbsx r23, 0, r23
+ tlbre r24, r23, 0 /* TLB Word 0 */
+ tlbre r25, r23, 1 /* TLB Word 1 */
+ tlbre r26, r23, 2 /* TLB Word 2 */
+
+
+ /*
+ * Invalidates all the tlb entries by writing to 256 RPNs(r4)
+ * of 4k page size in all 4 ways (0-3 in r3).
+ * This would invalidate the entire UTLB including the one we are
+ * running from. However the shadow TLB entries would help us
+ * to continue the execution, until we flush them (rfi/isync).
+ */
+ addis r3, 0, 0x8000 /* specify the way */
+ addi r4, 0, 0 /* TLB Word0 = (EPN=0, VALID = 0) */
+ addi r5, 0, 0
+ b clear_utlb_entry
+
+ /* Align the loop to speed things up. from head_44x.S */
+ .align 6
+
+clear_utlb_entry:
+
+ tlbwe r4, r3, 0
+ tlbwe r5, r3, 1
+ tlbwe r5, r3, 2
+ addis r3, r3, 0x2000 /* Increment the way */
+ cmpwi r3, 0
+ bne clear_utlb_entry
+ addis r3, 0, 0x8000
+ addis r4, r4, 0x100 /* Increment the EPN */
+ cmpwi r4, 0
+ bne clear_utlb_entry
+
+ /* Create the entries in the other address space */
+ mfmsr r5
+ rlwinm r7, r5, 27, 31, 31 /* Get the TS (Bit 26) from MSR */
+ xori r7, r7, 1 /* r7 = !TS */
+
+ insrwi r24, r7, 1, 21 /* Change the TS in the saved TLB word 0 */
+
+ /*
+ * write out the TLB entries for the tmp mapping
+ * Use way '0' so that we could easily invalidate it later.
+ */
+ lis r3, 0x8000 /* Way '0' */
+
+ tlbwe r24, r3, 0
+ tlbwe r25, r3, 1
+ tlbwe r26, r3, 2
+
+ /* Update the msr to the new TS */
+ insrwi r5, r7, 1, 26
+
+ bl 1f
+1: mflr r6
+ addi r6, r6, (2f-1b)
+
+ mtspr SPRN_SRR0, r6
+ mtspr SPRN_SRR1, r5
+ rfi
+
+ /*
+ * Now we are in the tmp address space.
+ * Create a 1:1 mapping for 0-2GiB in the original TS.
+ */
+2:
+ li r3, 0
+ li r4, 0 /* TLB Word 0 */
+ li r5, 0 /* TLB Word 1 */
+ li r6, 0
+ ori r6, r6, PPC47x_TLB2_S_RWX /* TLB word 2 */
+
+ li r8, 0 /* PageIndex */
+
+ xori r7, r7, 1 /* revert back to original TS */
+
+write_utlb:
+ rotlwi r5, r8, 28 /* RPN = PageIndex * 256M */
+ /* ERPN = 0 as we don't use memory above 2G */
+
+ mr r4, r5 /* EPN = RPN */
+ ori r4, r4, (PPC47x_TLB0_VALID | PPC47x_TLB0_256M)
+ insrwi r4, r7, 1, 21 /* Insert the TS to Word 0 */
+
+ tlbwe r4, r3, 0 /* Write out the entries */
+ tlbwe r5, r3, 1
+ tlbwe r6, r3, 2
+ addi r8, r8, 1
+ cmpwi r8, 8 /* Have we completed ? */
+ bne write_utlb
+
+ /* make sure we complete the TLB write up */
+ isync
+
+ /*
+ * Prepare to jump to the 1:1 mapping.
+ * 1) Extract page size of the tmp mapping
+ * DSIZ = TLB_Word0[22:27]
+ * 2) Calculate the physical address of the address
+ * to jump to.
+ */
+ rlwinm r10, r24, 0, 22, 27
+
+ cmpwi r10, PPC47x_TLB0_4K
+ bne 0f
+ li r10, 0x1000 /* r10 = 4k */
+ bl 1f
+
+0:
+ /* Defaults to 256M */
+ lis r10, 0x1000
+
+ bl 1f
+1: mflr r4
+ addi r4, r4, (2f-1b) /* virtual address of 2f */
+
+ subi r11, r10, 1 /* offsetmask = Pagesize - 1 */
+ not r10, r11 /* Pagemask = ~(offsetmask) */
+
+ and r5, r25, r10 /* Physical page */
+ and r6, r4, r11 /* offset within the current page */
+
+ or r5, r5, r6 /* Physical address for 2f */
+
+ /* Switch the TS in MSR to the original one */
+ mfmsr r8
+ insrwi r8, r7, 1, 26
+
+ mtspr SPRN_SRR1, r8
+ mtspr SPRN_SRR0, r5
+ rfi
+
+2:
+ /* Invalidate the tmp mapping */
+ lis r3, 0x8000 /* Way '0' */
+
+ clrrwi r24, r24, 12 /* Clear the valid bit */
+ tlbwe r24, r3, 0
+ tlbwe r25, r3, 1
+ tlbwe r26, r3, 2
+
+ /* Make sure we complete the TLB write and flush the shadow TLB */
+ isync
+
+#endif
+
+ppc44x_map_done:
+
/* Restore the parameters */
mr r3, r29
diff --git a/arch/powerpc/kernel/module_32.c b/arch/powerpc/kernel/module_32.c
index 0b6d79617d7..2e3200ca485 100644
--- a/arch/powerpc/kernel/module_32.c
+++ b/arch/powerpc/kernel/module_32.c
@@ -176,8 +176,8 @@ int module_frob_arch_sections(Elf32_Ehdr *hdr,
static inline int entry_matches(struct ppc_plt_entry *entry, Elf32_Addr val)
{
- if (entry->jump[0] == 0x3d600000 + ((val + 0x8000) >> 16)
- && entry->jump[1] == 0x396b0000 + (val & 0xffff))
+ if (entry->jump[0] == 0x3d800000 + ((val + 0x8000) >> 16)
+ && entry->jump[1] == 0x398c0000 + (val & 0xffff))
return 1;
return 0;
}
@@ -204,10 +204,9 @@ static uint32_t do_plt_call(void *location,
entry++;
}
- /* Stolen from Paul Mackerras as well... */
- entry->jump[0] = 0x3d600000+((val+0x8000)>>16); /* lis r11,sym@ha */
- entry->jump[1] = 0x396b0000 + (val&0xffff); /* addi r11,r11,sym@l*/
- entry->jump[2] = 0x7d6903a6; /* mtctr r11 */
+ entry->jump[0] = 0x3d800000+((val+0x8000)>>16); /* lis r12,sym@ha */
+ entry->jump[1] = 0x398c0000 + (val&0xffff); /* addi r12,r12,sym@l*/
+ entry->jump[2] = 0x7d8903a6; /* mtctr r12 */
entry->jump[3] = 0x4e800420; /* bctr */
DEBUGP("Initialized plt for 0x%x at %p\n", val, entry);
diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
index 0bb1f98613b..fbe1a12dc7f 100644
--- a/arch/powerpc/kernel/paca.c
+++ b/arch/powerpc/kernel/paca.c
@@ -36,10 +36,7 @@ struct lppaca lppaca[] = {
[0 ... (NR_LPPACAS-1)] = {
.desc = 0xd397d781, /* "LpPa" */
.size = sizeof(struct lppaca),
- .dyn_proc_status = 2,
- .decr_val = 0x00ff0000,
.fpregs_in_use = 1,
- .end_of_quantum = 0xfffffffffffffffful,
.slb_count = 64,
.vmxregs_in_use = 0,
.page_ins = 0,
diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c
index 786a2700ec2..3e4031581c6 100644
--- a/arch/powerpc/kernel/ppc_ksyms.c
+++ b/arch/powerpc/kernel/ppc_ksyms.c
@@ -85,8 +85,6 @@ EXPORT_SYMBOL(csum_tcpudp_magic);
EXPORT_SYMBOL(__copy_tofrom_user);
EXPORT_SYMBOL(__clear_user);
-EXPORT_SYMBOL(__strncpy_from_user);
-EXPORT_SYMBOL(__strnlen_user);
EXPORT_SYMBOL(copy_page);
#if defined(CONFIG_PCI) && defined(CONFIG_PPC32)
@@ -190,3 +188,7 @@ EXPORT_SYMBOL(__arch_hweight16);
EXPORT_SYMBOL(__arch_hweight32);
EXPORT_SYMBOL(__arch_hweight64);
#endif
+
+#ifdef CONFIG_PPC_BOOK3S_64
+EXPORT_SYMBOL_GPL(mmu_psize_defs);
+#endif
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 4937c969009..710f400476d 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -124,7 +124,7 @@ void enable_kernel_altivec(void)
if (current->thread.regs && (current->thread.regs->msr & MSR_VEC))
giveup_altivec(current);
else
- giveup_altivec(NULL); /* just enable AltiVec for kernel - force */
+ giveup_altivec_notask();
#else
giveup_altivec(last_task_used_altivec);
#endif /* CONFIG_SMP */
@@ -711,18 +711,21 @@ release_thread(struct task_struct *t)
}
/*
- * This gets called before we allocate a new thread and copy
- * the current task into it.
+ * this gets called so that we can store coprocessor state into memory and
+ * copy the current task into the new thread.
*/
-void prepare_to_copy(struct task_struct *tsk)
+int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
{
- flush_fp_to_thread(current);
- flush_altivec_to_thread(current);
- flush_vsx_to_thread(current);
- flush_spe_to_thread(current);
+ flush_fp_to_thread(src);
+ flush_altivec_to_thread(src);
+ flush_vsx_to_thread(src);
+ flush_spe_to_thread(src);
#ifdef CONFIG_HAVE_HW_BREAKPOINT
- flush_ptrace_hw_breakpoint(tsk);
+ flush_ptrace_hw_breakpoint(src);
#endif /* CONFIG_HAVE_HW_BREAKPOINT */
+
+ *dst = *src;
+ return 0;
}
/*
@@ -1252,37 +1255,6 @@ void __ppc64_runlatch_off(void)
}
#endif /* CONFIG_PPC64 */
-#if THREAD_SHIFT < PAGE_SHIFT
-
-static struct kmem_cache *thread_info_cache;
-
-struct thread_info *alloc_thread_info_node(struct task_struct *tsk, int node)
-{
- struct thread_info *ti;
-
- ti = kmem_cache_alloc_node(thread_info_cache, GFP_KERNEL, node);
- if (unlikely(ti == NULL))
- return NULL;
-#ifdef CONFIG_DEBUG_STACK_USAGE
- memset(ti, 0, THREAD_SIZE);
-#endif
- return ti;
-}
-
-void free_thread_info(struct thread_info *ti)
-{
- kmem_cache_free(thread_info_cache, ti);
-}
-
-void thread_info_cache_init(void)
-{
- thread_info_cache = kmem_cache_create("thread_info", THREAD_SIZE,
- THREAD_SIZE, 0, NULL);
- BUG_ON(thread_info_cache == NULL);
-}
-
-#endif /* THREAD_SHIFT < PAGE_SHIFT */
-
unsigned long arch_align_stack(unsigned long sp)
{
if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index 99860273211..0794a3017b1 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -680,6 +680,9 @@ static void __init early_cmdline_parse(void)
#define OV3_VMX 0x40 /* VMX/Altivec */
#define OV3_DFP 0x20 /* decimal FP */
+/* Option vector 4: IBM PAPR implementation */
+#define OV4_MIN_ENT_CAP 0x01 /* minimum VP entitled capacity */
+
/* Option vector 5: PAPR/OF options supported */
#define OV5_LPAR 0x80 /* logical partitioning supported */
#define OV5_SPLPAR 0x40 /* shared-processor LPAR supported */
@@ -701,6 +704,8 @@ static void __init early_cmdline_parse(void)
#define OV5_XCMO 0x00
#endif
#define OV5_TYPE1_AFFINITY 0x80 /* Type 1 NUMA affinity */
+#define OV5_PFO_HW_RNG 0x80 /* PFO Random Number Generator */
+#define OV5_PFO_HW_ENCR 0x20 /* PFO Encryption Accelerator */
/* Option Vector 6: IBM PAPR hints */
#define OV6_LINUX 0x02 /* Linux is our OS */
@@ -744,11 +749,12 @@ static unsigned char ibm_architecture_vec[] = {
OV3_FP | OV3_VMX | OV3_DFP,
/* option vector 4: IBM PAPR implementation */
- 2 - 2, /* length */
+ 3 - 2, /* length */
0, /* don't halt */
+ OV4_MIN_ENT_CAP, /* minimum VP entitled capacity */
/* option vector 5: PAPR/OF options */
- 13 - 2, /* length */
+ 18 - 2, /* length */
0, /* don't ignore, don't halt */
OV5_LPAR | OV5_SPLPAR | OV5_LARGE_PAGES | OV5_DRCONF_MEMORY |
OV5_DONATE_DEDICATE_CPU | OV5_MSI,
@@ -762,8 +768,13 @@ static unsigned char ibm_architecture_vec[] = {
* must match by the macro below. Update the definition if
* the structure layout changes.
*/
-#define IBM_ARCH_VEC_NRCORES_OFFSET 100
+#define IBM_ARCH_VEC_NRCORES_OFFSET 101
W(NR_CPUS), /* number of cores supported */
+ 0,
+ 0,
+ 0,
+ 0,
+ OV5_PFO_HW_RNG | OV5_PFO_HW_ENCR,
/* option vector 6: IBM PAPR hints */
4 - 2, /* length */
@@ -1301,7 +1312,7 @@ static struct opal_secondary_data {
extern char opal_secondary_entry;
-static void prom_query_opal(void)
+static void __init prom_query_opal(void)
{
long rc;
@@ -1425,7 +1436,7 @@ static void __init prom_opal_hold_cpus(void)
prom_debug("prom_opal_hold_cpus: end...\n");
}
-static void prom_opal_takeover(void)
+static void __init prom_opal_takeover(void)
{
struct opal_secondary_data *data = &RELOC(opal_secondary_data);
struct opal_takeover_args *args = &data->args;
diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
index 8d8e028893b..c10fc28b909 100644
--- a/arch/powerpc/kernel/ptrace.c
+++ b/arch/powerpc/kernel/ptrace.c
@@ -1432,40 +1432,6 @@ static long ppc_del_hwdebug(struct task_struct *child, long addr, long data)
#endif
}
-/*
- * Here are the old "legacy" powerpc specific getregs/setregs ptrace calls,
- * we mark them as obsolete now, they will be removed in a future version
- */
-static long arch_ptrace_old(struct task_struct *child, long request,
- unsigned long addr, unsigned long data)
-{
- void __user *datavp = (void __user *) data;
-
- switch (request) {
- case PPC_PTRACE_GETREGS: /* Get GPRs 0 - 31. */
- return copy_regset_to_user(child, &user_ppc_native_view,
- REGSET_GPR, 0, 32 * sizeof(long),
- datavp);
-
- case PPC_PTRACE_SETREGS: /* Set GPRs 0 - 31. */
- return copy_regset_from_user(child, &user_ppc_native_view,
- REGSET_GPR, 0, 32 * sizeof(long),
- datavp);
-
- case PPC_PTRACE_GETFPREGS: /* Get FPRs 0 - 31. */
- return copy_regset_to_user(child, &user_ppc_native_view,
- REGSET_FPR, 0, 32 * sizeof(double),
- datavp);
-
- case PPC_PTRACE_SETFPREGS: /* Set FPRs 0 - 31. */
- return copy_regset_from_user(child, &user_ppc_native_view,
- REGSET_FPR, 0, 32 * sizeof(double),
- datavp);
- }
-
- return -EPERM;
-}
-
long arch_ptrace(struct task_struct *child, long request,
unsigned long addr, unsigned long data)
{
@@ -1687,14 +1653,6 @@ long arch_ptrace(struct task_struct *child, long request,
datavp);
#endif
- /* Old reverse args ptrace callss */
- case PPC_PTRACE_GETREGS: /* Get GPRs 0 - 31. */
- case PPC_PTRACE_SETREGS: /* Set GPRs 0 - 31. */
- case PPC_PTRACE_GETFPREGS: /* Get FPRs 0 - 31. */
- case PPC_PTRACE_SETFPREGS: /* Get FPRs 0 - 31. */
- ret = arch_ptrace_old(child, request, addr, data);
- break;
-
default:
ret = ptrace_request(child, request, addr, data);
break;
@@ -1710,7 +1668,7 @@ long do_syscall_trace_enter(struct pt_regs *regs)
{
long ret = 0;
- secure_computing(regs->gpr[0]);
+ secure_computing_strict(regs->gpr[0]);
if (test_thread_flag(TIF_SYSCALL_TRACE) &&
tracehook_report_syscall_entry(regs))
diff --git a/arch/powerpc/kernel/ptrace32.c b/arch/powerpc/kernel/ptrace32.c
index 469349d14a9..8c21658719d 100644
--- a/arch/powerpc/kernel/ptrace32.c
+++ b/arch/powerpc/kernel/ptrace32.c
@@ -39,30 +39,6 @@
* in exit.c or in signal.c.
*/
-/*
- * Here are the old "legacy" powerpc specific getregs/setregs ptrace calls,
- * we mark them as obsolete now, they will be removed in a future version
- */
-static long compat_ptrace_old(struct task_struct *child, long request,
- long addr, long data)
-{
- switch (request) {
- case PPC_PTRACE_GETREGS: /* Get GPRs 0 - 31. */
- return copy_regset_to_user(child,
- task_user_regset_view(current), 0,
- 0, 32 * sizeof(compat_long_t),
- compat_ptr(data));
-
- case PPC_PTRACE_SETREGS: /* Set GPRs 0 - 31. */
- return copy_regset_from_user(child,
- task_user_regset_view(current), 0,
- 0, 32 * sizeof(compat_long_t),
- compat_ptr(data));
- }
-
- return -EPERM;
-}
-
/* Macros to workout the correct index for the FPR in the thread struct */
#define FPRNUMBER(i) (((i) - PT_FPR0) >> 1)
#define FPRHALF(i) (((i) - PT_FPR0) & 1)
@@ -308,8 +284,6 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
case PTRACE_SETVSRREGS:
case PTRACE_GETREGS64:
case PTRACE_SETREGS64:
- case PPC_PTRACE_GETFPREGS:
- case PPC_PTRACE_SETFPREGS:
case PTRACE_KILL:
case PTRACE_SINGLESTEP:
case PTRACE_DETACH:
@@ -322,12 +296,6 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
ret = arch_ptrace(child, request, addr, data);
break;
- /* Old reverse args ptrace callss */
- case PPC_PTRACE_GETREGS: /* Get GPRs 0 - 31. */
- case PPC_PTRACE_SETREGS: /* Set GPRs 0 - 31. */
- ret = compat_ptrace_old(child, request, addr, data);
- break;
-
default:
ret = compat_ptrace_request(child, request, addr, data);
break;
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index 9825f29d1fa..ec8a53fa9e8 100644
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -150,6 +150,9 @@ notrace void __init machine_init(u64 dt_ptr)
}
#ifdef CONFIG_BOOKE_WDT
+extern u32 booke_wdt_enabled;
+extern u32 booke_wdt_period;
+
/* Checks wdt=x and wdt_period=xx command-line option */
notrace int __init early_parse_wdt(char *p)
{
diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c
index 651c5963662..5c023c9cf16 100644
--- a/arch/powerpc/kernel/signal.c
+++ b/arch/powerpc/kernel/signal.c
@@ -51,16 +51,6 @@ void __user * get_sigframe(struct k_sigaction *ka, struct pt_regs *regs,
return (void __user *)newsp;
}
-
-/*
- * Restore the user process's signal mask
- */
-void restore_sigmask(sigset_t *set)
-{
- sigdelsetmask(set, ~_BLOCKABLE);
- set_current_blocked(set);
-}
-
static void check_syscall_restart(struct pt_regs *regs, struct k_sigaction *ka,
int has_handler)
{
@@ -114,30 +104,21 @@ static void check_syscall_restart(struct pt_regs *regs, struct k_sigaction *ka,
static int do_signal(struct pt_regs *regs)
{
- sigset_t *oldset;
+ sigset_t *oldset = sigmask_to_save();
siginfo_t info;
int signr;
struct k_sigaction ka;
int ret;
int is32 = is_32bit_task();
- if (current_thread_info()->local_flags & _TLF_RESTORE_SIGMASK)
- oldset = &current->saved_sigmask;
- else
- oldset = &current->blocked;
-
signr = get_signal_to_deliver(&info, &ka, regs, NULL);
/* Is there any syscall restart business here ? */
check_syscall_restart(regs, &ka, signr > 0);
if (signr <= 0) {
- struct thread_info *ti = current_thread_info();
/* No signal to deliver -- put the saved sigmask back */
- if (ti->local_flags & _TLF_RESTORE_SIGMASK) {
- ti->local_flags &= ~_TLF_RESTORE_SIGMASK;
- sigprocmask(SIG_SETMASK, &current->saved_sigmask, NULL);
- }
+ restore_saved_sigmask();
regs->trap = 0;
return 0; /* no signals delivered */
}
@@ -167,18 +148,7 @@ static int do_signal(struct pt_regs *regs)
regs->trap = 0;
if (ret) {
- block_sigmask(&ka, signr);
-
- /*
- * A signal was successfully delivered; the saved sigmask is in
- * its frame, and we can clear the TLF_RESTORE_SIGMASK flag.
- */
- current_thread_info()->local_flags &= ~_TLF_RESTORE_SIGMASK;
-
- /*
- * Let tracing know that we've done the handler setup.
- */
- tracehook_signal_handler(signr, &info, &ka, regs,
+ signal_delivered(signr, &info, &ka, regs,
test_thread_flag(TIF_SINGLESTEP));
}
@@ -193,8 +163,6 @@ void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags)
if (thread_info_flags & _TIF_NOTIFY_RESUME) {
clear_thread_flag(TIF_NOTIFY_RESUME);
tracehook_notify_resume(regs);
- if (current->replacement_session_keyring)
- key_replace_session_keyring();
}
}
diff --git a/arch/powerpc/kernel/signal.h b/arch/powerpc/kernel/signal.h
index 8dde973aaaf..e00acb41393 100644
--- a/arch/powerpc/kernel/signal.h
+++ b/arch/powerpc/kernel/signal.h
@@ -10,13 +10,10 @@
#ifndef _POWERPC_ARCH_SIGNAL_H
#define _POWERPC_ARCH_SIGNAL_H
-#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
-
extern void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags);
extern void __user * get_sigframe(struct k_sigaction *ka, struct pt_regs *regs,
size_t frame_size, int is_32);
-extern void restore_sigmask(sigset_t *set);
extern int handle_signal32(unsigned long sig, struct k_sigaction *ka,
siginfo_t *info, sigset_t *oldset,
diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c
index 45eb998557f..8b4c049aee2 100644
--- a/arch/powerpc/kernel/signal_32.c
+++ b/arch/powerpc/kernel/signal_32.c
@@ -204,10 +204,10 @@ static inline int get_old_sigaction(struct k_sigaction *new_ka,
if (!access_ok(VERIFY_READ, act, sizeof(*act)) ||
__get_user(new_ka->sa.sa_handler, &act->sa_handler) ||
- __get_user(new_ka->sa.sa_restorer, &act->sa_restorer))
+ __get_user(new_ka->sa.sa_restorer, &act->sa_restorer) ||
+ __get_user(new_ka->sa.sa_flags, &act->sa_flags) ||
+ __get_user(mask, &act->sa_mask))
return -EFAULT;
- __get_user(new_ka->sa.sa_flags, &act->sa_flags);
- __get_user(mask, &act->sa_mask);
siginitset(&new_ka->sa.sa_mask, mask);
return 0;
}
@@ -244,17 +244,8 @@ static inline int restore_general_regs(struct pt_regs *regs,
long sys_sigsuspend(old_sigset_t mask)
{
sigset_t blocked;
-
- current->saved_sigmask = current->blocked;
-
- mask &= _BLOCKABLE;
siginitset(&blocked, mask);
- set_current_blocked(&blocked);
-
- current->state = TASK_INTERRUPTIBLE;
- schedule();
- set_restore_sigmask();
- return -ERESTARTNOHAND;
+ return sigsuspend(&blocked);
}
long sys_sigaction(int sig, struct old_sigaction __user *act,
@@ -928,7 +919,7 @@ static int do_setcontext(struct ucontext __user *ucp, struct pt_regs *regs, int
if (!access_ok(VERIFY_READ, mcp, sizeof(*mcp)))
return -EFAULT;
#endif
- restore_sigmask(&set);
+ set_current_blocked(&set);
if (restore_user_regs(regs, mcp, sig))
return -EFAULT;
@@ -1282,7 +1273,7 @@ long sys_sigreturn(int r3, int r4, int r5, int r6, int r7, int r8,
set.sig[0] = sigctx.oldmask;
set.sig[1] = sigctx._unused[3];
#endif
- restore_sigmask(&set);
+ set_current_blocked(&set);
sr = (struct mcontext __user *)from_user_ptr(sigctx.regs);
addr = sr;
diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index 2692efdb154..d183f8719a5 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -335,7 +335,7 @@ int sys_swapcontext(struct ucontext __user *old_ctx,
if (__copy_from_user(&set, &new_ctx->uc_sigmask, sizeof(set)))
do_exit(SIGSEGV);
- restore_sigmask(&set);
+ set_current_blocked(&set);
if (restore_sigcontext(regs, NULL, 0, &new_ctx->uc_mcontext))
do_exit(SIGSEGV);
@@ -364,7 +364,7 @@ int sys_rt_sigreturn(unsigned long r3, unsigned long r4, unsigned long r5,
if (__copy_from_user(&set, &uc->uc_sigmask, sizeof(set)))
goto badframe;
- restore_sigmask(&set);
+ set_current_blocked(&set);
if (restore_sigcontext(regs, NULL, 1, &uc->uc_mcontext))
goto badframe;
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index d9f94410fd7..e4cb34322de 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -57,27 +57,9 @@
#define DBG(fmt...)
#endif
-
-/* Store all idle threads, this can be reused instead of creating
-* a new thread. Also avoids complicated thread destroy functionality
-* for idle threads.
-*/
#ifdef CONFIG_HOTPLUG_CPU
-/*
- * Needed only for CONFIG_HOTPLUG_CPU because __cpuinitdata is
- * removed after init for !CONFIG_HOTPLUG_CPU.
- */
-static DEFINE_PER_CPU(struct task_struct *, idle_thread_array);
-#define get_idle_for_cpu(x) (per_cpu(idle_thread_array, x))
-#define set_idle_for_cpu(x, p) (per_cpu(idle_thread_array, x) = (p))
-
/* State of each CPU during hotplug phases */
static DEFINE_PER_CPU(int, cpu_state) = { 0 };
-
-#else
-static struct task_struct *idle_thread_array[NR_CPUS] __cpuinitdata ;
-#define get_idle_for_cpu(x) (idle_thread_array[(x)])
-#define set_idle_for_cpu(x, p) (idle_thread_array[(x)] = (p))
#endif
struct thread_info *secondary_ti;
@@ -429,60 +411,19 @@ int generic_check_cpu_restart(unsigned int cpu)
}
#endif
-struct create_idle {
- struct work_struct work;
- struct task_struct *idle;
- struct completion done;
- int cpu;
-};
-
-static void __cpuinit do_fork_idle(struct work_struct *work)
+static void cpu_idle_thread_init(unsigned int cpu, struct task_struct *idle)
{
- struct create_idle *c_idle =
- container_of(work, struct create_idle, work);
-
- c_idle->idle = fork_idle(c_idle->cpu);
- complete(&c_idle->done);
-}
-
-static int __cpuinit create_idle(unsigned int cpu)
-{
- struct thread_info *ti;
- struct create_idle c_idle = {
- .cpu = cpu,
- .done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done),
- };
- INIT_WORK_ONSTACK(&c_idle.work, do_fork_idle);
-
- c_idle.idle = get_idle_for_cpu(cpu);
-
- /* We can't use kernel_thread since we must avoid to
- * reschedule the child. We use a workqueue because
- * we want to fork from a kernel thread, not whatever
- * userspace process happens to be trying to online us.
- */
- if (!c_idle.idle) {
- schedule_work(&c_idle.work);
- wait_for_completion(&c_idle.done);
- } else
- init_idle(c_idle.idle, cpu);
- if (IS_ERR(c_idle.idle)) {
- pr_err("Failed fork for CPU %u: %li", cpu, PTR_ERR(c_idle.idle));
- return PTR_ERR(c_idle.idle);
- }
- ti = task_thread_info(c_idle.idle);
+ struct thread_info *ti = task_thread_info(idle);
#ifdef CONFIG_PPC64
- paca[cpu].__current = c_idle.idle;
+ paca[cpu].__current = idle;
paca[cpu].kstack = (unsigned long)ti + THREAD_SIZE - STACK_FRAME_OVERHEAD;
#endif
ti->cpu = cpu;
- current_set[cpu] = ti;
-
- return 0;
+ secondary_ti = current_set[cpu] = ti;
}
-int __cpuinit __cpu_up(unsigned int cpu)
+int __cpuinit __cpu_up(unsigned int cpu, struct task_struct *tidle)
{
int rc, c;
@@ -490,12 +431,7 @@ int __cpuinit __cpu_up(unsigned int cpu)
(smp_ops->cpu_bootable && !smp_ops->cpu_bootable(cpu)))
return -EINVAL;
- /* Make sure we have an idle thread */
- rc = create_idle(cpu);
- if (rc)
- return rc;
-
- secondary_ti = current_set[cpu];
+ cpu_idle_thread_init(cpu, tidle);
/* Make sure callin-map entry is 0 (can be leftover a CPU
* hotplug
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 2c42cd72d0f..be171ee73bf 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -100,7 +100,7 @@ static int decrementer_set_next_event(unsigned long evt,
static void decrementer_set_mode(enum clock_event_mode mode,
struct clock_event_device *dev);
-static struct clock_event_device decrementer_clockevent = {
+struct clock_event_device decrementer_clockevent = {
.name = "decrementer",
.rating = 200,
.irq = 0,
@@ -108,6 +108,7 @@ static struct clock_event_device decrementer_clockevent = {
.set_mode = decrementer_set_mode,
.features = CLOCK_EVT_FEAT_ONESHOT,
};
+EXPORT_SYMBOL(decrementer_clockevent);
DEFINE_PER_CPU(u64, decrementers_next_tb);
static DEFINE_PER_CPU(struct clock_event_device, decrementers);
@@ -474,6 +475,7 @@ void timer_interrupt(struct pt_regs * regs)
struct pt_regs *old_regs;
u64 *next_tb = &__get_cpu_var(decrementers_next_tb);
struct clock_event_device *evt = &__get_cpu_var(decrementers);
+ u64 now;
/* Ensure a positive value is written to the decrementer, or else
* some CPUs will continue to take decrementer exceptions.
@@ -508,9 +510,16 @@ void timer_interrupt(struct pt_regs * regs)
irq_work_run();
}
- *next_tb = ~(u64)0;
- if (evt->event_handler)
- evt->event_handler(evt);
+ now = get_tb_or_rtc();
+ if (now >= *next_tb) {
+ *next_tb = ~(u64)0;
+ if (evt->event_handler)
+ evt->event_handler(evt);
+ } else {
+ now = *next_tb - now;
+ if (now <= DECREMENTER_MAX)
+ set_dec((int)now);
+ }
#ifdef CONFIG_PPC64
/* collect purr register values often, for accurate calculations */
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 6aa0c663e24..158972341a2 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -248,7 +248,7 @@ void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr)
addr, regs->nip, regs->link, code);
}
- if (!arch_irq_disabled_regs(regs))
+ if (arch_irqs_disabled() && !arch_irq_disabled_regs(regs))
local_irq_enable();
memset(&info, 0, sizeof(info));
@@ -1019,7 +1019,9 @@ void __kprobes program_check_exception(struct pt_regs *regs)
return;
}
- local_irq_enable();
+ /* We restore the interrupt state now */
+ if (!arch_irq_disabled_regs(regs))
+ local_irq_enable();
#ifdef CONFIG_MATH_EMULATION
/* (reason & REASON_ILLEGAL) would be the obvious thing here,
@@ -1069,6 +1071,10 @@ void alignment_exception(struct pt_regs *regs)
{
int sig, code, fixed = 0;
+ /* We restore the interrupt state now */
+ if (!arch_irq_disabled_regs(regs))
+ local_irq_enable();
+
/* we don't implement logging of alignment exceptions */
if (!(current->thread.align_ctl & PR_UNALIGN_SIGBUS))
fixed = fix_alignment(regs);
diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S
index 4d5a3edff49..e830289d2e4 100644
--- a/arch/powerpc/kernel/vector.S
+++ b/arch/powerpc/kernel/vector.S
@@ -89,6 +89,16 @@ _GLOBAL(load_up_altivec)
/* restore registers and return */
blr
+_GLOBAL(giveup_altivec_notask)
+ mfmsr r3
+ andis. r4,r3,MSR_VEC@h
+ bnelr /* Already enabled? */
+ oris r3,r3,MSR_VEC@h
+ SYNC
+ MTMSRD(r3) /* enable use of VMX now */
+ isync
+ blr
+
/*
* giveup_altivec(tsk)
* Disable VMX for the task given as the argument,
diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c
index a3a99901c8e..cb87301ccd5 100644
--- a/arch/powerpc/kernel/vio.c
+++ b/arch/powerpc/kernel/vio.c
@@ -14,7 +14,9 @@
* 2 of the License, or (at your option) any later version.
*/
+#include <linux/cpu.h>
#include <linux/types.h>
+#include <linux/delay.h>
#include <linux/stat.h>
#include <linux/device.h>
#include <linux/init.h>
@@ -709,13 +711,26 @@ static int vio_cmo_bus_probe(struct vio_dev *viodev)
struct vio_driver *viodrv = to_vio_driver(dev->driver);
unsigned long flags;
size_t size;
+ bool dma_capable = false;
+
+ /* A device requires entitlement if it has a DMA window property */
+ switch (viodev->family) {
+ case VDEVICE:
+ if (of_get_property(viodev->dev.of_node,
+ "ibm,my-dma-window", NULL))
+ dma_capable = true;
+ break;
+ case PFO:
+ dma_capable = false;
+ break;
+ default:
+ dev_warn(dev, "unknown device family: %d\n", viodev->family);
+ BUG();
+ break;
+ }
- /*
- * Check to see that device has a DMA window and configure
- * entitlement for the device.
- */
- if (of_get_property(viodev->dev.of_node,
- "ibm,my-dma-window", NULL)) {
+ /* Configure entitlement for the device. */
+ if (dma_capable) {
/* Check that the driver is CMO enabled and get desired DMA */
if (!viodrv->get_desired_dma) {
dev_err(dev, "%s: device driver does not support CMO\n",
@@ -1050,6 +1065,94 @@ static void vio_cmo_sysfs_init(void) { }
EXPORT_SYMBOL(vio_cmo_entitlement_update);
EXPORT_SYMBOL(vio_cmo_set_dev_desired);
+
+/*
+ * Platform Facilities Option (PFO) support
+ */
+
+/**
+ * vio_h_cop_sync - Perform a synchronous PFO co-processor operation
+ *
+ * @vdev - Pointer to a struct vio_dev for device
+ * @op - Pointer to a struct vio_pfo_op for the operation parameters
+ *
+ * Calls the hypervisor to synchronously perform the PFO operation
+ * described in @op. In the case of a busy response from the hypervisor,
+ * the operation will be re-submitted indefinitely unless a non-zero timeout
+ * is specified or an error occurs. The timeout places a limit on when to
+ * stop re-submitting a operation, the total time can be exceeded if an
+ * operation is in progress.
+ *
+ * If op->hcall_ret is not NULL, this will be set to the return from the
+ * last h_cop_op call or it will be 0 if an error not involving the h_call
+ * was encountered.
+ *
+ * Returns:
+ * 0 on success,
+ * -EINVAL if the h_call fails due to an invalid parameter,
+ * -E2BIG if the h_call can not be performed synchronously,
+ * -EBUSY if a timeout is specified and has elapsed,
+ * -EACCES if the memory area for data/status has been rescinded, or
+ * -EPERM if a hardware fault has been indicated
+ */
+int vio_h_cop_sync(struct vio_dev *vdev, struct vio_pfo_op *op)
+{
+ struct device *dev = &vdev->dev;
+ unsigned long deadline = 0;
+ long hret = 0;
+ int ret = 0;
+
+ if (op->timeout)
+ deadline = jiffies + msecs_to_jiffies(op->timeout);
+
+ while (true) {
+ hret = plpar_hcall_norets(H_COP, op->flags,
+ vdev->resource_id,
+ op->in, op->inlen, op->out,
+ op->outlen, op->csbcpb);
+
+ if (hret == H_SUCCESS ||
+ (hret != H_NOT_ENOUGH_RESOURCES &&
+ hret != H_BUSY && hret != H_RESOURCE) ||
+ (op->timeout && time_after(deadline, jiffies)))
+ break;
+
+ dev_dbg(dev, "%s: hcall ret(%ld), retrying.\n", __func__, hret);
+ }
+
+ switch (hret) {
+ case H_SUCCESS:
+ ret = 0;
+ break;
+ case H_OP_MODE:
+ case H_TOO_BIG:
+ ret = -E2BIG;
+ break;
+ case H_RESCINDED:
+ ret = -EACCES;
+ break;
+ case H_HARDWARE:
+ ret = -EPERM;
+ break;
+ case H_NOT_ENOUGH_RESOURCES:
+ case H_RESOURCE:
+ case H_BUSY:
+ ret = -EBUSY;
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+
+ if (ret)
+ dev_dbg(dev, "%s: Sync h_cop_op failure (ret:%d) (hret:%ld)\n",
+ __func__, ret, hret);
+
+ op->hcall_err = hret;
+ return ret;
+}
+EXPORT_SYMBOL(vio_h_cop_sync);
+
static struct iommu_table *vio_build_iommu_table(struct vio_dev *dev)
{
const unsigned char *dma_window;
@@ -1211,35 +1314,87 @@ static void __devinit vio_dev_release(struct device *dev)
struct vio_dev *vio_register_device_node(struct device_node *of_node)
{
struct vio_dev *viodev;
+ struct device_node *parent_node;
const unsigned int *unit_address;
+ const unsigned int *pfo_resid = NULL;
+ enum vio_dev_family family;
+ const char *of_node_name = of_node->name ? of_node->name : "<unknown>";
- /* we need the 'device_type' property, in order to match with drivers */
- if (of_node->type == NULL) {
- printk(KERN_WARNING "%s: node %s missing 'device_type'\n",
- __func__,
- of_node->name ? of_node->name : "<unknown>");
+ /*
+ * Determine if this node is a under the /vdevice node or under the
+ * /ibm,platform-facilities node. This decides the device's family.
+ */
+ parent_node = of_get_parent(of_node);
+ if (parent_node) {
+ if (!strcmp(parent_node->full_name, "/ibm,platform-facilities"))
+ family = PFO;
+ else if (!strcmp(parent_node->full_name, "/vdevice"))
+ family = VDEVICE;
+ else {
+ pr_warn("%s: parent(%s) of %s not recognized.\n",
+ __func__,
+ parent_node->full_name,
+ of_node_name);
+ of_node_put(parent_node);
+ return NULL;
+ }
+ of_node_put(parent_node);
+ } else {
+ pr_warn("%s: could not determine the parent of node %s.\n",
+ __func__, of_node_name);
return NULL;
}
- unit_address = of_get_property(of_node, "reg", NULL);
- if (unit_address == NULL) {
- printk(KERN_WARNING "%s: node %s missing 'reg'\n",
- __func__,
- of_node->name ? of_node->name : "<unknown>");
- return NULL;
+ if (family == PFO) {
+ if (of_get_property(of_node, "interrupt-controller", NULL)) {
+ pr_debug("%s: Skipping the interrupt controller %s.\n",
+ __func__, of_node_name);
+ return NULL;
+ }
}
/* allocate a vio_dev for this node */
viodev = kzalloc(sizeof(struct vio_dev), GFP_KERNEL);
- if (viodev == NULL)
+ if (viodev == NULL) {
+ pr_warn("%s: allocation failure for VIO device.\n", __func__);
return NULL;
+ }
- viodev->irq = irq_of_parse_and_map(of_node, 0);
+ /* we need the 'device_type' property, in order to match with drivers */
+ viodev->family = family;
+ if (viodev->family == VDEVICE) {
+ if (of_node->type != NULL)
+ viodev->type = of_node->type;
+ else {
+ pr_warn("%s: node %s is missing the 'device_type' "
+ "property.\n", __func__, of_node_name);
+ goto out;
+ }
+
+ unit_address = of_get_property(of_node, "reg", NULL);
+ if (unit_address == NULL) {
+ pr_warn("%s: node %s missing 'reg'\n",
+ __func__, of_node_name);
+ goto out;
+ }
+ dev_set_name(&viodev->dev, "%x", *unit_address);
+ viodev->irq = irq_of_parse_and_map(of_node, 0);
+ viodev->unit_address = *unit_address;
+ } else {
+ /* PFO devices need their resource_id for submitting COP_OPs
+ * This is an optional field for devices, but is required when
+ * performing synchronous ops */
+ pfo_resid = of_get_property(of_node, "ibm,resource-id", NULL);
+ if (pfo_resid != NULL)
+ viodev->resource_id = *pfo_resid;
+
+ unit_address = NULL;
+ dev_set_name(&viodev->dev, "%s", of_node_name);
+ viodev->type = of_node_name;
+ viodev->irq = 0;
+ }
- dev_set_name(&viodev->dev, "%x", *unit_address);
viodev->name = of_node->name;
- viodev->type = of_node->type;
- viodev->unit_address = *unit_address;
viodev->dev.of_node = of_node_get(of_node);
if (firmware_has_feature(FW_FEATURE_CMO))
@@ -1267,16 +1422,51 @@ struct vio_dev *vio_register_device_node(struct device_node *of_node)
}
return viodev;
+
+out: /* Use this exit point for any return prior to device_register */
+ kfree(viodev);
+
+ return NULL;
}
EXPORT_SYMBOL(vio_register_device_node);
+/*
+ * vio_bus_scan_for_devices - Scan OF and register each child device
+ * @root_name - OF node name for the root of the subtree to search.
+ * This must be non-NULL
+ *
+ * Starting from the root node provide, register the device node for
+ * each child beneath the root.
+ */
+static void vio_bus_scan_register_devices(char *root_name)
+{
+ struct device_node *node_root, *node_child;
+
+ if (!root_name)
+ return;
+
+ node_root = of_find_node_by_name(NULL, root_name);
+ if (node_root) {
+
+ /*
+ * Create struct vio_devices for each virtual device in
+ * the device tree. Drivers will associate with them later.
+ */
+ node_child = of_get_next_child(node_root, NULL);
+ while (node_child) {
+ vio_register_device_node(node_child);
+ node_child = of_get_next_child(node_root, node_child);
+ }
+ of_node_put(node_root);
+ }
+}
+
/**
* vio_bus_init: - Initialize the virtual IO bus
*/
static int __init vio_bus_init(void)
{
int err;
- struct device_node *node_vroot;
if (firmware_has_feature(FW_FEATURE_CMO))
vio_cmo_sysfs_init();
@@ -1301,19 +1491,8 @@ static int __init vio_bus_init(void)
if (firmware_has_feature(FW_FEATURE_CMO))
vio_cmo_bus_init();
- node_vroot = of_find_node_by_name(NULL, "vdevice");
- if (node_vroot) {
- struct device_node *of_node;
-
- /*
- * Create struct vio_devices for each virtual device in
- * the device tree. Drivers will associate with them later.
- */
- for (of_node = node_vroot->child; of_node != NULL;
- of_node = of_node->sibling)
- vio_register_device_node(of_node);
- of_node_put(node_vroot);
- }
+ vio_bus_scan_register_devices("vdevice");
+ vio_bus_scan_register_devices("ibm,platform-facilities");
return 0;
}
@@ -1436,12 +1615,28 @@ struct vio_dev *vio_find_node(struct device_node *vnode)
{
const uint32_t *unit_address;
char kobj_name[20];
+ struct device_node *vnode_parent;
+ const char *dev_type;
+
+ vnode_parent = of_get_parent(vnode);
+ if (!vnode_parent)
+ return NULL;
+
+ dev_type = of_get_property(vnode_parent, "device_type", NULL);
+ of_node_put(vnode_parent);
+ if (!dev_type)
+ return NULL;
/* construct the kobject name from the device node */
- unit_address = of_get_property(vnode, "reg", NULL);
- if (!unit_address)
+ if (!strcmp(dev_type, "vdevice")) {
+ unit_address = of_get_property(vnode, "reg", NULL);
+ if (!unit_address)
+ return NULL;
+ snprintf(kobj_name, sizeof(kobj_name), "%x", *unit_address);
+ } else if (!strcmp(dev_type, "ibm,platform-facilities"))
+ snprintf(kobj_name, sizeof(kobj_name), "%s", vnode->name);
+ else
return NULL;
- snprintf(kobj_name, sizeof(kobj_name), "%x", *unit_address);
return vio_find_name(kobj_name);
}
diff --git a/arch/powerpc/kvm/44x.c b/arch/powerpc/kvm/44x.c
index 7b612a76c70..50e7dbc7356 100644
--- a/arch/powerpc/kvm/44x.c
+++ b/arch/powerpc/kvm/44x.c
@@ -29,15 +29,18 @@
#include <asm/kvm_ppc.h>
#include "44x_tlb.h"
+#include "booke.h"
void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
+ kvmppc_booke_vcpu_load(vcpu, cpu);
kvmppc_44x_tlb_load(vcpu);
}
void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
{
kvmppc_44x_tlb_put(vcpu);
+ kvmppc_booke_vcpu_put(vcpu);
}
int kvmppc_core_check_processor_compat(void)
@@ -160,6 +163,15 @@ void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
kmem_cache_free(kvm_vcpu_cache, vcpu_44x);
}
+int kvmppc_core_init_vm(struct kvm *kvm)
+{
+ return 0;
+}
+
+void kvmppc_core_destroy_vm(struct kvm *kvm)
+{
+}
+
static int __init kvmppc_44x_init(void)
{
int r;
diff --git a/arch/powerpc/kvm/44x_emulate.c b/arch/powerpc/kvm/44x_emulate.c
index 549bb2c9a47..c8c61578fdf 100644
--- a/arch/powerpc/kvm/44x_emulate.c
+++ b/arch/powerpc/kvm/44x_emulate.c
@@ -37,22 +37,19 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
unsigned int inst, int *advance)
{
int emulated = EMULATE_DONE;
- int dcrn;
- int ra;
- int rb;
- int rc;
- int rs;
- int rt;
- int ws;
+ int dcrn = get_dcrn(inst);
+ int ra = get_ra(inst);
+ int rb = get_rb(inst);
+ int rc = get_rc(inst);
+ int rs = get_rs(inst);
+ int rt = get_rt(inst);
+ int ws = get_ws(inst);
switch (get_op(inst)) {
case 31:
switch (get_xop(inst)) {
case XOP_MFDCR:
- dcrn = get_dcrn(inst);
- rt = get_rt(inst);
-
/* The guest may access CPR0 registers to determine the timebase
* frequency, and it must know the real host frequency because it
* can directly access the timebase registers.
@@ -88,9 +85,6 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
break;
case XOP_MTDCR:
- dcrn = get_dcrn(inst);
- rs = get_rs(inst);
-
/* emulate some access in kernel */
switch (dcrn) {
case DCRN_CPR0_CONFIG_ADDR:
@@ -108,17 +102,10 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
break;
case XOP_TLBWE:
- ra = get_ra(inst);
- rs = get_rs(inst);
- ws = get_ws(inst);
emulated = kvmppc_44x_emul_tlbwe(vcpu, ra, rs, ws);
break;
case XOP_TLBSX:
- rt = get_rt(inst);
- ra = get_ra(inst);
- rb = get_rb(inst);
- rc = get_rc(inst);
emulated = kvmppc_44x_emul_tlbsx(vcpu, rt, ra, rb, rc);
break;
@@ -141,41 +128,41 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
return emulated;
}
-int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
+int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
{
int emulated = EMULATE_DONE;
switch (sprn) {
case SPRN_PID:
- kvmppc_set_pid(vcpu, kvmppc_get_gpr(vcpu, rs)); break;
+ kvmppc_set_pid(vcpu, spr_val); break;
case SPRN_MMUCR:
- vcpu->arch.mmucr = kvmppc_get_gpr(vcpu, rs); break;
+ vcpu->arch.mmucr = spr_val; break;
case SPRN_CCR0:
- vcpu->arch.ccr0 = kvmppc_get_gpr(vcpu, rs); break;
+ vcpu->arch.ccr0 = spr_val; break;
case SPRN_CCR1:
- vcpu->arch.ccr1 = kvmppc_get_gpr(vcpu, rs); break;
+ vcpu->arch.ccr1 = spr_val; break;
default:
- emulated = kvmppc_booke_emulate_mtspr(vcpu, sprn, rs);
+ emulated = kvmppc_booke_emulate_mtspr(vcpu, sprn, spr_val);
}
return emulated;
}
-int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
+int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)
{
int emulated = EMULATE_DONE;
switch (sprn) {
case SPRN_PID:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.pid); break;
+ *spr_val = vcpu->arch.pid; break;
case SPRN_MMUCR:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.mmucr); break;
+ *spr_val = vcpu->arch.mmucr; break;
case SPRN_CCR0:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.ccr0); break;
+ *spr_val = vcpu->arch.ccr0; break;
case SPRN_CCR1:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.ccr1); break;
+ *spr_val = vcpu->arch.ccr1; break;
default:
- emulated = kvmppc_booke_emulate_mfspr(vcpu, sprn, rt);
+ emulated = kvmppc_booke_emulate_mfspr(vcpu, sprn, spr_val);
}
return emulated;
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index 8f64709ae33..f4dacb9c57f 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -90,6 +90,9 @@ config KVM_BOOK3S_64_PR
depends on KVM_BOOK3S_64 && !KVM_BOOK3S_64_HV
select KVM_BOOK3S_PR
+config KVM_BOOKE_HV
+ bool
+
config KVM_440
bool "KVM support for PowerPC 440 processors"
depends on EXPERIMENTAL && 44x
@@ -106,7 +109,7 @@ config KVM_440
config KVM_EXIT_TIMING
bool "Detailed exit timing"
- depends on KVM_440 || KVM_E500
+ depends on KVM_440 || KVM_E500V2 || KVM_E500MC
---help---
Calculate elapsed time for every exit/enter cycle. A per-vcpu
report is available in debugfs kvm/vm#_vcpu#_timing.
@@ -115,14 +118,29 @@ config KVM_EXIT_TIMING
If unsure, say N.
-config KVM_E500
- bool "KVM support for PowerPC E500 processors"
- depends on EXPERIMENTAL && E500
+config KVM_E500V2
+ bool "KVM support for PowerPC E500v2 processors"
+ depends on EXPERIMENTAL && E500 && !PPC_E500MC
select KVM
select KVM_MMIO
---help---
Support running unmodified E500 guest kernels in virtual machines on
- E500 host processors.
+ E500v2 host processors.
+
+ This module provides access to the hardware capabilities through
+ a character device node named /dev/kvm.
+
+ If unsure, say N.
+
+config KVM_E500MC
+ bool "KVM support for PowerPC E500MC/E5500 processors"
+ depends on EXPERIMENTAL && PPC_E500MC
+ select KVM
+ select KVM_MMIO
+ select KVM_BOOKE_HV
+ ---help---
+ Support running unmodified E500MC/E5500 (32-bit) guest kernels in
+ virtual machines on E500MC/E5500 host processors.
This module provides access to the hardware capabilities through
a character device node named /dev/kvm.
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index 3688aeecc4b..c2a08636e6d 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -36,7 +36,17 @@ kvm-e500-objs := \
e500.o \
e500_tlb.o \
e500_emulate.o
-kvm-objs-$(CONFIG_KVM_E500) := $(kvm-e500-objs)
+kvm-objs-$(CONFIG_KVM_E500V2) := $(kvm-e500-objs)
+
+kvm-e500mc-objs := \
+ $(common-objs-y) \
+ booke.o \
+ booke_emulate.o \
+ bookehv_interrupts.o \
+ e500mc.o \
+ e500_tlb.o \
+ e500_emulate.o
+kvm-objs-$(CONFIG_KVM_E500MC) := $(kvm-e500mc-objs)
kvm-book3s_64-objs-$(CONFIG_KVM_BOOK3S_64_PR) := \
../../../virt/kvm/coalesced_mmio.o \
@@ -44,6 +54,7 @@ kvm-book3s_64-objs-$(CONFIG_KVM_BOOK3S_64_PR) := \
book3s_paired_singles.o \
book3s_pr.o \
book3s_pr_papr.o \
+ book3s_64_vio_hv.o \
book3s_emulate.o \
book3s_interrupts.o \
book3s_mmu_hpte.o \
@@ -68,6 +79,7 @@ kvm-book3s_64-module-objs := \
powerpc.o \
emulate.o \
book3s.o \
+ book3s_64_vio.o \
$(kvm-book3s_64-objs-y)
kvm-objs-$(CONFIG_KVM_BOOK3S_64) := $(kvm-book3s_64-module-objs)
@@ -88,7 +100,8 @@ kvm-objs-$(CONFIG_KVM_BOOK3S_32) := $(kvm-book3s_32-objs)
kvm-objs := $(kvm-objs-m) $(kvm-objs-y)
obj-$(CONFIG_KVM_440) += kvm.o
-obj-$(CONFIG_KVM_E500) += kvm.o
+obj-$(CONFIG_KVM_E500V2) += kvm.o
+obj-$(CONFIG_KVM_E500MC) += kvm.o
obj-$(CONFIG_KVM_BOOK3S_64) += kvm.o
obj-$(CONFIG_KVM_BOOK3S_32) += kvm.o
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 7d54f4ed6d9..3f2a8360c85 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -258,7 +258,7 @@ static bool clear_irqprio(struct kvm_vcpu *vcpu, unsigned int priority)
return true;
}
-void kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu)
+int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu)
{
unsigned long *pending = &vcpu->arch.pending_exceptions;
unsigned long old_pending = vcpu->arch.pending_exceptions;
@@ -283,12 +283,17 @@ void kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu)
/* Tell the guest about our interrupt status */
kvmppc_update_int_pending(vcpu, *pending, old_pending);
+
+ return 0;
}
pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn)
{
ulong mp_pa = vcpu->arch.magic_page_pa;
+ if (!(vcpu->arch.shared->msr & MSR_SF))
+ mp_pa = (uint32_t)mp_pa;
+
/* Magic page override */
if (unlikely(mp_pa) &&
unlikely(((gfn << PAGE_SHIFT) & KVM_PAM) ==
diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c
index 6f87f39a1ac..10fc8ec9d2a 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_host.c
@@ -194,14 +194,14 @@ static struct kvmppc_sid_map *create_sid_map(struct kvm_vcpu *vcpu, u64 gvsid)
backwards_map = !backwards_map;
/* Uh-oh ... out of mappings. Let's flush! */
- if (vcpu_book3s->vsid_next == vcpu_book3s->vsid_max) {
- vcpu_book3s->vsid_next = vcpu_book3s->vsid_first;
+ if (vcpu_book3s->proto_vsid_next == vcpu_book3s->proto_vsid_max) {
+ vcpu_book3s->proto_vsid_next = vcpu_book3s->proto_vsid_first;
memset(vcpu_book3s->sid_map, 0,
sizeof(struct kvmppc_sid_map) * SID_MAP_NUM);
kvmppc_mmu_pte_flush(vcpu, 0, 0);
kvmppc_mmu_flush_segments(vcpu);
}
- map->host_vsid = vcpu_book3s->vsid_next++;
+ map->host_vsid = vsid_scramble(vcpu_book3s->proto_vsid_next++, 256M);
map->guest_vsid = gvsid;
map->valid = true;
@@ -319,9 +319,10 @@ int kvmppc_mmu_init(struct kvm_vcpu *vcpu)
return -1;
vcpu3s->context_id[0] = err;
- vcpu3s->vsid_max = ((vcpu3s->context_id[0] + 1) << USER_ESID_BITS) - 1;
- vcpu3s->vsid_first = vcpu3s->context_id[0] << USER_ESID_BITS;
- vcpu3s->vsid_next = vcpu3s->vsid_first;
+ vcpu3s->proto_vsid_max = ((vcpu3s->context_id[0] + 1)
+ << USER_ESID_BITS) - 1;
+ vcpu3s->proto_vsid_first = vcpu3s->context_id[0] << USER_ESID_BITS;
+ vcpu3s->proto_vsid_next = vcpu3s->proto_vsid_first;
kvmppc_mmu_hpte_init(vcpu);
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index ddc485a529f..80a57751758 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -36,13 +36,11 @@
/* POWER7 has 10-bit LPIDs, PPC970 has 6-bit LPIDs */
#define MAX_LPID_970 63
-#define NR_LPIDS (LPID_RSVD + 1)
-unsigned long lpid_inuse[BITS_TO_LONGS(NR_LPIDS)];
long kvmppc_alloc_hpt(struct kvm *kvm)
{
unsigned long hpt;
- unsigned long lpid;
+ long lpid;
struct revmap_entry *rev;
struct kvmppc_linear_info *li;
@@ -72,14 +70,9 @@ long kvmppc_alloc_hpt(struct kvm *kvm)
}
kvm->arch.revmap = rev;
- /* Allocate the guest's logical partition ID */
- do {
- lpid = find_first_zero_bit(lpid_inuse, NR_LPIDS);
- if (lpid >= NR_LPIDS) {
- pr_err("kvm_alloc_hpt: No LPIDs free\n");
- goto out_freeboth;
- }
- } while (test_and_set_bit(lpid, lpid_inuse));
+ lpid = kvmppc_alloc_lpid();
+ if (lpid < 0)
+ goto out_freeboth;
kvm->arch.sdr1 = __pa(hpt) | (HPT_ORDER - 18);
kvm->arch.lpid = lpid;
@@ -96,7 +89,7 @@ long kvmppc_alloc_hpt(struct kvm *kvm)
void kvmppc_free_hpt(struct kvm *kvm)
{
- clear_bit(kvm->arch.lpid, lpid_inuse);
+ kvmppc_free_lpid(kvm->arch.lpid);
vfree(kvm->arch.revmap);
if (kvm->arch.hpt_li)
kvm_release_hpt(kvm->arch.hpt_li);
@@ -171,8 +164,7 @@ int kvmppc_mmu_hv_init(void)
if (!cpu_has_feature(CPU_FTR_HVMODE))
return -EINVAL;
- memset(lpid_inuse, 0, sizeof(lpid_inuse));
-
+ /* POWER7 has 10-bit LPIDs, PPC970 and e500mc have 6-bit LPIDs */
if (cpu_has_feature(CPU_FTR_ARCH_206)) {
host_lpid = mfspr(SPRN_LPID); /* POWER7 */
rsvd_lpid = LPID_RSVD;
@@ -181,9 +173,11 @@ int kvmppc_mmu_hv_init(void)
rsvd_lpid = MAX_LPID_970;
}
- set_bit(host_lpid, lpid_inuse);
+ kvmppc_init_lpid(rsvd_lpid + 1);
+
+ kvmppc_claim_lpid(host_lpid);
/* rsvd_lpid is reserved for use in partition switching */
- set_bit(rsvd_lpid, lpid_inuse);
+ kvmppc_claim_lpid(rsvd_lpid);
return 0;
}
@@ -258,6 +252,8 @@ static long kvmppc_get_guest_page(struct kvm *kvm, unsigned long gfn,
!(memslot->userspace_addr & (s - 1))) {
start &= ~(s - 1);
pgsize = s;
+ get_page(hpage);
+ put_page(page);
page = hpage;
}
}
@@ -281,11 +277,8 @@ static long kvmppc_get_guest_page(struct kvm *kvm, unsigned long gfn,
err = 0;
out:
- if (got) {
- if (PageHuge(page))
- page = compound_head(page);
+ if (got)
put_page(page);
- }
return err;
up_err:
@@ -453,7 +446,7 @@ static int instruction_is_store(unsigned int instr)
}
static int kvmppc_hv_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu,
- unsigned long gpa, int is_store)
+ unsigned long gpa, gva_t ea, int is_store)
{
int ret;
u32 last_inst;
@@ -500,6 +493,7 @@ static int kvmppc_hv_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu,
*/
vcpu->arch.paddr_accessed = gpa;
+ vcpu->arch.vaddr_accessed = ea;
return kvmppc_emulate_mmio(run, vcpu);
}
@@ -553,7 +547,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
/* No memslot means it's an emulated MMIO region */
if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) {
unsigned long gpa = (gfn << PAGE_SHIFT) | (ea & (psize - 1));
- return kvmppc_hv_emulate_mmio(run, vcpu, gpa,
+ return kvmppc_hv_emulate_mmio(run, vcpu, gpa, ea,
dsisr & DSISR_ISSTORE);
}
@@ -678,8 +672,15 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
SetPageDirty(page);
out_put:
- if (page)
- put_page(page);
+ if (page) {
+ /*
+ * We drop pages[0] here, not page because page might
+ * have been set to the head page of a compound, but
+ * we have to drop the reference on the correct tail
+ * page to match the get inside gup()
+ */
+ put_page(pages[0]);
+ }
return ret;
out_unlock:
@@ -979,6 +980,7 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa,
pa = *physp;
}
page = pfn_to_page(pa >> PAGE_SHIFT);
+ get_page(page);
} else {
hva = gfn_to_hva_memslot(memslot, gfn);
npages = get_user_pages_fast(hva, 1, 1, pages);
@@ -991,8 +993,6 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa,
page = compound_head(page);
psize <<= compound_order(page);
}
- if (!kvm->arch.using_mmu_notifiers)
- get_page(page);
offset = gpa & (psize - 1);
if (nb_ret)
*nb_ret = psize - offset;
@@ -1003,7 +1003,6 @@ void kvmppc_unpin_guest_page(struct kvm *kvm, void *va)
{
struct page *page = virt_to_page(va);
- page = compound_head(page);
put_page(page);
}
diff --git a/arch/powerpc/kvm/book3s_64_slb.S b/arch/powerpc/kvm/book3s_64_slb.S
index f2e6e48ea46..56b983e7b73 100644
--- a/arch/powerpc/kvm/book3s_64_slb.S
+++ b/arch/powerpc/kvm/book3s_64_slb.S
@@ -90,8 +90,6 @@ slb_exit_skip_ ## num:
or r10, r10, r12
slbie r10
- isync
-
/* Fill SLB with our shadow */
lbz r12, SVCPU_SLB_MAX(r3)
diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c
new file mode 100644
index 00000000000..72ffc899c08
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_64_vio.c
@@ -0,0 +1,150 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * Copyright 2010 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
+ * Copyright 2011 David Gibson, IBM Corporation <dwg@au1.ibm.com>
+ */
+
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <linux/highmem.h>
+#include <linux/gfp.h>
+#include <linux/slab.h>
+#include <linux/hugetlb.h>
+#include <linux/list.h>
+#include <linux/anon_inodes.h>
+
+#include <asm/tlbflush.h>
+#include <asm/kvm_ppc.h>
+#include <asm/kvm_book3s.h>
+#include <asm/mmu-hash64.h>
+#include <asm/hvcall.h>
+#include <asm/synch.h>
+#include <asm/ppc-opcode.h>
+#include <asm/kvm_host.h>
+#include <asm/udbg.h>
+
+#define TCES_PER_PAGE (PAGE_SIZE / sizeof(u64))
+
+static long kvmppc_stt_npages(unsigned long window_size)
+{
+ return ALIGN((window_size >> SPAPR_TCE_SHIFT)
+ * sizeof(u64), PAGE_SIZE) / PAGE_SIZE;
+}
+
+static void release_spapr_tce_table(struct kvmppc_spapr_tce_table *stt)
+{
+ struct kvm *kvm = stt->kvm;
+ int i;
+
+ mutex_lock(&kvm->lock);
+ list_del(&stt->list);
+ for (i = 0; i < kvmppc_stt_npages(stt->window_size); i++)
+ __free_page(stt->pages[i]);
+ kfree(stt);
+ mutex_unlock(&kvm->lock);
+
+ kvm_put_kvm(kvm);
+}
+
+static int kvm_spapr_tce_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+ struct kvmppc_spapr_tce_table *stt = vma->vm_file->private_data;
+ struct page *page;
+
+ if (vmf->pgoff >= kvmppc_stt_npages(stt->window_size))
+ return VM_FAULT_SIGBUS;
+
+ page = stt->pages[vmf->pgoff];
+ get_page(page);
+ vmf->page = page;
+ return 0;
+}
+
+static const struct vm_operations_struct kvm_spapr_tce_vm_ops = {
+ .fault = kvm_spapr_tce_fault,
+};
+
+static int kvm_spapr_tce_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ vma->vm_ops = &kvm_spapr_tce_vm_ops;
+ return 0;
+}
+
+static int kvm_spapr_tce_release(struct inode *inode, struct file *filp)
+{
+ struct kvmppc_spapr_tce_table *stt = filp->private_data;
+
+ release_spapr_tce_table(stt);
+ return 0;
+}
+
+static struct file_operations kvm_spapr_tce_fops = {
+ .mmap = kvm_spapr_tce_mmap,
+ .release = kvm_spapr_tce_release,
+};
+
+long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
+ struct kvm_create_spapr_tce *args)
+{
+ struct kvmppc_spapr_tce_table *stt = NULL;
+ long npages;
+ int ret = -ENOMEM;
+ int i;
+
+ /* Check this LIOBN hasn't been previously allocated */
+ list_for_each_entry(stt, &kvm->arch.spapr_tce_tables, list) {
+ if (stt->liobn == args->liobn)
+ return -EBUSY;
+ }
+
+ npages = kvmppc_stt_npages(args->window_size);
+
+ stt = kzalloc(sizeof(*stt) + npages * sizeof(struct page *),
+ GFP_KERNEL);
+ if (!stt)
+ goto fail;
+
+ stt->liobn = args->liobn;
+ stt->window_size = args->window_size;
+ stt->kvm = kvm;
+
+ for (i = 0; i < npages; i++) {
+ stt->pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO);
+ if (!stt->pages[i])
+ goto fail;
+ }
+
+ kvm_get_kvm(kvm);
+
+ mutex_lock(&kvm->lock);
+ list_add(&stt->list, &kvm->arch.spapr_tce_tables);
+
+ mutex_unlock(&kvm->lock);
+
+ return anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops,
+ stt, O_RDWR);
+
+fail:
+ if (stt) {
+ for (i = 0; i < npages; i++)
+ if (stt->pages[i])
+ __free_page(stt->pages[i]);
+
+ kfree(stt);
+ }
+ return ret;
+}
diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c
index ea0f8c537c2..30c2f3b134c 100644
--- a/arch/powerpc/kvm/book3s_64_vio_hv.c
+++ b/arch/powerpc/kvm/book3s_64_vio_hv.c
@@ -38,6 +38,9 @@
#define TCES_PER_PAGE (PAGE_SIZE / sizeof(u64))
+/* WARNING: This will be called in real-mode on HV KVM and virtual
+ * mode on PR KVM
+ */
long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
unsigned long ioba, unsigned long tce)
{
diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c
index 135663a3e4f..b9a989dc76c 100644
--- a/arch/powerpc/kvm/book3s_emulate.c
+++ b/arch/powerpc/kvm/book3s_emulate.c
@@ -87,6 +87,10 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
unsigned int inst, int *advance)
{
int emulated = EMULATE_DONE;
+ int rt = get_rt(inst);
+ int rs = get_rs(inst);
+ int ra = get_ra(inst);
+ int rb = get_rb(inst);
switch (get_op(inst)) {
case 19:
@@ -106,21 +110,22 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
case 31:
switch (get_xop(inst)) {
case OP_31_XOP_MFMSR:
- kvmppc_set_gpr(vcpu, get_rt(inst),
- vcpu->arch.shared->msr);
+ kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->msr);
break;
case OP_31_XOP_MTMSRD:
{
- ulong rs = kvmppc_get_gpr(vcpu, get_rs(inst));
+ ulong rs_val = kvmppc_get_gpr(vcpu, rs);
if (inst & 0x10000) {
- vcpu->arch.shared->msr &= ~(MSR_RI | MSR_EE);
- vcpu->arch.shared->msr |= rs & (MSR_RI | MSR_EE);
+ ulong new_msr = vcpu->arch.shared->msr;
+ new_msr &= ~(MSR_RI | MSR_EE);
+ new_msr |= rs_val & (MSR_RI | MSR_EE);
+ vcpu->arch.shared->msr = new_msr;
} else
- kvmppc_set_msr(vcpu, rs);
+ kvmppc_set_msr(vcpu, rs_val);
break;
}
case OP_31_XOP_MTMSR:
- kvmppc_set_msr(vcpu, kvmppc_get_gpr(vcpu, get_rs(inst)));
+ kvmppc_set_msr(vcpu, kvmppc_get_gpr(vcpu, rs));
break;
case OP_31_XOP_MFSR:
{
@@ -130,7 +135,7 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
if (vcpu->arch.mmu.mfsrin) {
u32 sr;
sr = vcpu->arch.mmu.mfsrin(vcpu, srnum);
- kvmppc_set_gpr(vcpu, get_rt(inst), sr);
+ kvmppc_set_gpr(vcpu, rt, sr);
}
break;
}
@@ -138,29 +143,29 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
{
int srnum;
- srnum = (kvmppc_get_gpr(vcpu, get_rb(inst)) >> 28) & 0xf;
+ srnum = (kvmppc_get_gpr(vcpu, rb) >> 28) & 0xf;
if (vcpu->arch.mmu.mfsrin) {
u32 sr;
sr = vcpu->arch.mmu.mfsrin(vcpu, srnum);
- kvmppc_set_gpr(vcpu, get_rt(inst), sr);
+ kvmppc_set_gpr(vcpu, rt, sr);
}
break;
}
case OP_31_XOP_MTSR:
vcpu->arch.mmu.mtsrin(vcpu,
(inst >> 16) & 0xf,
- kvmppc_get_gpr(vcpu, get_rs(inst)));
+ kvmppc_get_gpr(vcpu, rs));
break;
case OP_31_XOP_MTSRIN:
vcpu->arch.mmu.mtsrin(vcpu,
- (kvmppc_get_gpr(vcpu, get_rb(inst)) >> 28) & 0xf,
- kvmppc_get_gpr(vcpu, get_rs(inst)));
+ (kvmppc_get_gpr(vcpu, rb) >> 28) & 0xf,
+ kvmppc_get_gpr(vcpu, rs));
break;
case OP_31_XOP_TLBIE:
case OP_31_XOP_TLBIEL:
{
bool large = (inst & 0x00200000) ? true : false;
- ulong addr = kvmppc_get_gpr(vcpu, get_rb(inst));
+ ulong addr = kvmppc_get_gpr(vcpu, rb);
vcpu->arch.mmu.tlbie(vcpu, addr, large);
break;
}
@@ -171,15 +176,15 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
return EMULATE_FAIL;
vcpu->arch.mmu.slbmte(vcpu,
- kvmppc_get_gpr(vcpu, get_rs(inst)),
- kvmppc_get_gpr(vcpu, get_rb(inst)));
+ kvmppc_get_gpr(vcpu, rs),
+ kvmppc_get_gpr(vcpu, rb));
break;
case OP_31_XOP_SLBIE:
if (!vcpu->arch.mmu.slbie)
return EMULATE_FAIL;
vcpu->arch.mmu.slbie(vcpu,
- kvmppc_get_gpr(vcpu, get_rb(inst)));
+ kvmppc_get_gpr(vcpu, rb));
break;
case OP_31_XOP_SLBIA:
if (!vcpu->arch.mmu.slbia)
@@ -191,22 +196,22 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
if (!vcpu->arch.mmu.slbmfee) {
emulated = EMULATE_FAIL;
} else {
- ulong t, rb;
+ ulong t, rb_val;
- rb = kvmppc_get_gpr(vcpu, get_rb(inst));
- t = vcpu->arch.mmu.slbmfee(vcpu, rb);
- kvmppc_set_gpr(vcpu, get_rt(inst), t);
+ rb_val = kvmppc_get_gpr(vcpu, rb);
+ t = vcpu->arch.mmu.slbmfee(vcpu, rb_val);
+ kvmppc_set_gpr(vcpu, rt, t);
}
break;
case OP_31_XOP_SLBMFEV:
if (!vcpu->arch.mmu.slbmfev) {
emulated = EMULATE_FAIL;
} else {
- ulong t, rb;
+ ulong t, rb_val;
- rb = kvmppc_get_gpr(vcpu, get_rb(inst));
- t = vcpu->arch.mmu.slbmfev(vcpu, rb);
- kvmppc_set_gpr(vcpu, get_rt(inst), t);
+ rb_val = kvmppc_get_gpr(vcpu, rb);
+ t = vcpu->arch.mmu.slbmfev(vcpu, rb_val);
+ kvmppc_set_gpr(vcpu, rt, t);
}
break;
case OP_31_XOP_DCBA:
@@ -214,17 +219,17 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
break;
case OP_31_XOP_DCBZ:
{
- ulong rb = kvmppc_get_gpr(vcpu, get_rb(inst));
- ulong ra = 0;
+ ulong rb_val = kvmppc_get_gpr(vcpu, rb);
+ ulong ra_val = 0;
ulong addr, vaddr;
u32 zeros[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
u32 dsisr;
int r;
- if (get_ra(inst))
- ra = kvmppc_get_gpr(vcpu, get_ra(inst));
+ if (ra)
+ ra_val = kvmppc_get_gpr(vcpu, ra);
- addr = (ra + rb) & ~31ULL;
+ addr = (ra_val + rb_val) & ~31ULL;
if (!(vcpu->arch.shared->msr & MSR_SF))
addr &= 0xffffffff;
vaddr = addr;
@@ -313,10 +318,9 @@ static struct kvmppc_bat *kvmppc_find_bat(struct kvm_vcpu *vcpu, int sprn)
return bat;
}
-int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
+int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
{
int emulated = EMULATE_DONE;
- ulong spr_val = kvmppc_get_gpr(vcpu, rs);
switch (sprn) {
case SPRN_SDR1:
@@ -428,7 +432,7 @@ unprivileged:
return emulated;
}
-int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
+int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)
{
int emulated = EMULATE_DONE;
@@ -441,46 +445,46 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
struct kvmppc_bat *bat = kvmppc_find_bat(vcpu, sprn);
if (sprn % 2)
- kvmppc_set_gpr(vcpu, rt, bat->raw >> 32);
+ *spr_val = bat->raw >> 32;
else
- kvmppc_set_gpr(vcpu, rt, bat->raw);
+ *spr_val = bat->raw;
break;
}
case SPRN_SDR1:
if (!spr_allowed(vcpu, PRIV_HYPER))
goto unprivileged;
- kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->sdr1);
+ *spr_val = to_book3s(vcpu)->sdr1;
break;
case SPRN_DSISR:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->dsisr);
+ *spr_val = vcpu->arch.shared->dsisr;
break;
case SPRN_DAR:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->dar);
+ *spr_val = vcpu->arch.shared->dar;
break;
case SPRN_HIOR:
- kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hior);
+ *spr_val = to_book3s(vcpu)->hior;
break;
case SPRN_HID0:
- kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[0]);
+ *spr_val = to_book3s(vcpu)->hid[0];
break;
case SPRN_HID1:
- kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[1]);
+ *spr_val = to_book3s(vcpu)->hid[1];
break;
case SPRN_HID2:
case SPRN_HID2_GEKKO:
- kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[2]);
+ *spr_val = to_book3s(vcpu)->hid[2];
break;
case SPRN_HID4:
case SPRN_HID4_GEKKO:
- kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[4]);
+ *spr_val = to_book3s(vcpu)->hid[4];
break;
case SPRN_HID5:
- kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[5]);
+ *spr_val = to_book3s(vcpu)->hid[5];
break;
case SPRN_CFAR:
case SPRN_PURR:
- kvmppc_set_gpr(vcpu, rt, 0);
+ *spr_val = 0;
break;
case SPRN_GQR0:
case SPRN_GQR1:
@@ -490,8 +494,7 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
case SPRN_GQR5:
case SPRN_GQR6:
case SPRN_GQR7:
- kvmppc_set_gpr(vcpu, rt,
- to_book3s(vcpu)->gqr[sprn - SPRN_GQR0]);
+ *spr_val = to_book3s(vcpu)->gqr[sprn - SPRN_GQR0];
break;
case SPRN_THRM1:
case SPRN_THRM2:
@@ -506,7 +509,7 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
case SPRN_PMC3_GEKKO:
case SPRN_PMC4_GEKKO:
case SPRN_WPAR_GEKKO:
- kvmppc_set_gpr(vcpu, rt, 0);
+ *spr_val = 0;
break;
default:
unprivileged:
@@ -565,23 +568,22 @@ u32 kvmppc_alignment_dsisr(struct kvm_vcpu *vcpu, unsigned int inst)
ulong kvmppc_alignment_dar(struct kvm_vcpu *vcpu, unsigned int inst)
{
ulong dar = 0;
- ulong ra;
+ ulong ra = get_ra(inst);
+ ulong rb = get_rb(inst);
switch (get_op(inst)) {
case OP_LFS:
case OP_LFD:
case OP_STFD:
case OP_STFS:
- ra = get_ra(inst);
if (ra)
dar = kvmppc_get_gpr(vcpu, ra);
dar += (s32)((s16)inst);
break;
case 31:
- ra = get_ra(inst);
if (ra)
dar = kvmppc_get_gpr(vcpu, ra);
- dar += kvmppc_get_gpr(vcpu, get_rb(inst));
+ dar += kvmppc_get_gpr(vcpu, rb);
break;
default:
printk(KERN_INFO "KVM: Unaligned instruction 0x%x\n", inst);
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 01294a5099d..3abe1b86e58 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -60,12 +60,20 @@ static int kvmppc_hv_setup_rma(struct kvm_vcpu *vcpu);
void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
+ struct kvmppc_vcore *vc = vcpu->arch.vcore;
+
local_paca->kvm_hstate.kvm_vcpu = vcpu;
- local_paca->kvm_hstate.kvm_vcore = vcpu->arch.vcore;
+ local_paca->kvm_hstate.kvm_vcore = vc;
+ if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE)
+ vc->stolen_tb += mftb() - vc->preempt_tb;
}
void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
{
+ struct kvmppc_vcore *vc = vcpu->arch.vcore;
+
+ if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE)
+ vc->preempt_tb = mftb();
}
void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr)
@@ -134,6 +142,22 @@ static void init_vpa(struct kvm_vcpu *vcpu, struct lppaca *vpa)
vpa->yield_count = 1;
}
+/* Length for a per-processor buffer is passed in at offset 4 in the buffer */
+struct reg_vpa {
+ u32 dummy;
+ union {
+ u16 hword;
+ u32 word;
+ } length;
+};
+
+static int vpa_is_registered(struct kvmppc_vpa *vpap)
+{
+ if (vpap->update_pending)
+ return vpap->next_gpa != 0;
+ return vpap->pinned_addr != NULL;
+}
+
static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu,
unsigned long flags,
unsigned long vcpuid, unsigned long vpa)
@@ -142,88 +166,201 @@ static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu,
unsigned long len, nb;
void *va;
struct kvm_vcpu *tvcpu;
- int err = H_PARAMETER;
+ int err;
+ int subfunc;
+ struct kvmppc_vpa *vpap;
tvcpu = kvmppc_find_vcpu(kvm, vcpuid);
if (!tvcpu)
return H_PARAMETER;
- flags >>= 63 - 18;
- flags &= 7;
- if (flags == 0 || flags == 4)
- return H_PARAMETER;
- if (flags < 4) {
- if (vpa & 0x7f)
+ subfunc = (flags >> H_VPA_FUNC_SHIFT) & H_VPA_FUNC_MASK;
+ if (subfunc == H_VPA_REG_VPA || subfunc == H_VPA_REG_DTL ||
+ subfunc == H_VPA_REG_SLB) {
+ /* Registering new area - address must be cache-line aligned */
+ if ((vpa & (L1_CACHE_BYTES - 1)) || !vpa)
return H_PARAMETER;
- if (flags >= 2 && !tvcpu->arch.vpa)
- return H_RESOURCE;
- /* registering new area; convert logical addr to real */
+
+ /* convert logical addr to kernel addr and read length */
va = kvmppc_pin_guest_page(kvm, vpa, &nb);
if (va == NULL)
return H_PARAMETER;
- if (flags <= 1)
- len = *(unsigned short *)(va + 4);
+ if (subfunc == H_VPA_REG_VPA)
+ len = ((struct reg_vpa *)va)->length.hword;
else
- len = *(unsigned int *)(va + 4);
- if (len > nb)
- goto out_unpin;
- switch (flags) {
- case 1: /* register VPA */
- if (len < 640)
- goto out_unpin;
- if (tvcpu->arch.vpa)
- kvmppc_unpin_guest_page(kvm, vcpu->arch.vpa);
- tvcpu->arch.vpa = va;
- init_vpa(vcpu, va);
- break;
- case 2: /* register DTL */
- if (len < 48)
- goto out_unpin;
- len -= len % 48;
- if (tvcpu->arch.dtl)
- kvmppc_unpin_guest_page(kvm, vcpu->arch.dtl);
- tvcpu->arch.dtl = va;
- tvcpu->arch.dtl_end = va + len;
+ len = ((struct reg_vpa *)va)->length.word;
+ kvmppc_unpin_guest_page(kvm, va);
+
+ /* Check length */
+ if (len > nb || len < sizeof(struct reg_vpa))
+ return H_PARAMETER;
+ } else {
+ vpa = 0;
+ len = 0;
+ }
+
+ err = H_PARAMETER;
+ vpap = NULL;
+ spin_lock(&tvcpu->arch.vpa_update_lock);
+
+ switch (subfunc) {
+ case H_VPA_REG_VPA: /* register VPA */
+ if (len < sizeof(struct lppaca))
break;
- case 3: /* register SLB shadow buffer */
- if (len < 16)
- goto out_unpin;
- if (tvcpu->arch.slb_shadow)
- kvmppc_unpin_guest_page(kvm, vcpu->arch.slb_shadow);
- tvcpu->arch.slb_shadow = va;
+ vpap = &tvcpu->arch.vpa;
+ err = 0;
+ break;
+
+ case H_VPA_REG_DTL: /* register DTL */
+ if (len < sizeof(struct dtl_entry))
break;
- }
- } else {
- switch (flags) {
- case 5: /* unregister VPA */
- if (tvcpu->arch.slb_shadow || tvcpu->arch.dtl)
- return H_RESOURCE;
- if (!tvcpu->arch.vpa)
- break;
- kvmppc_unpin_guest_page(kvm, tvcpu->arch.vpa);
- tvcpu->arch.vpa = NULL;
+ len -= len % sizeof(struct dtl_entry);
+
+ /* Check that they have previously registered a VPA */
+ err = H_RESOURCE;
+ if (!vpa_is_registered(&tvcpu->arch.vpa))
break;
- case 6: /* unregister DTL */
- if (!tvcpu->arch.dtl)
- break;
- kvmppc_unpin_guest_page(kvm, tvcpu->arch.dtl);
- tvcpu->arch.dtl = NULL;
+
+ vpap = &tvcpu->arch.dtl;
+ err = 0;
+ break;
+
+ case H_VPA_REG_SLB: /* register SLB shadow buffer */
+ /* Check that they have previously registered a VPA */
+ err = H_RESOURCE;
+ if (!vpa_is_registered(&tvcpu->arch.vpa))
break;
- case 7: /* unregister SLB shadow buffer */
- if (!tvcpu->arch.slb_shadow)
- break;
- kvmppc_unpin_guest_page(kvm, tvcpu->arch.slb_shadow);
- tvcpu->arch.slb_shadow = NULL;
+
+ vpap = &tvcpu->arch.slb_shadow;
+ err = 0;
+ break;
+
+ case H_VPA_DEREG_VPA: /* deregister VPA */
+ /* Check they don't still have a DTL or SLB buf registered */
+ err = H_RESOURCE;
+ if (vpa_is_registered(&tvcpu->arch.dtl) ||
+ vpa_is_registered(&tvcpu->arch.slb_shadow))
break;
- }
+
+ vpap = &tvcpu->arch.vpa;
+ err = 0;
+ break;
+
+ case H_VPA_DEREG_DTL: /* deregister DTL */
+ vpap = &tvcpu->arch.dtl;
+ err = 0;
+ break;
+
+ case H_VPA_DEREG_SLB: /* deregister SLB shadow buffer */
+ vpap = &tvcpu->arch.slb_shadow;
+ err = 0;
+ break;
+ }
+
+ if (vpap) {
+ vpap->next_gpa = vpa;
+ vpap->len = len;
+ vpap->update_pending = 1;
}
- return H_SUCCESS;
- out_unpin:
- kvmppc_unpin_guest_page(kvm, va);
+ spin_unlock(&tvcpu->arch.vpa_update_lock);
+
return err;
}
+static void kvmppc_update_vpa(struct kvm_vcpu *vcpu, struct kvmppc_vpa *vpap)
+{
+ struct kvm *kvm = vcpu->kvm;
+ void *va;
+ unsigned long nb;
+ unsigned long gpa;
+
+ /*
+ * We need to pin the page pointed to by vpap->next_gpa,
+ * but we can't call kvmppc_pin_guest_page under the lock
+ * as it does get_user_pages() and down_read(). So we
+ * have to drop the lock, pin the page, then get the lock
+ * again and check that a new area didn't get registered
+ * in the meantime.
+ */
+ for (;;) {
+ gpa = vpap->next_gpa;
+ spin_unlock(&vcpu->arch.vpa_update_lock);
+ va = NULL;
+ nb = 0;
+ if (gpa)
+ va = kvmppc_pin_guest_page(kvm, vpap->next_gpa, &nb);
+ spin_lock(&vcpu->arch.vpa_update_lock);
+ if (gpa == vpap->next_gpa)
+ break;
+ /* sigh... unpin that one and try again */
+ if (va)
+ kvmppc_unpin_guest_page(kvm, va);
+ }
+
+ vpap->update_pending = 0;
+ if (va && nb < vpap->len) {
+ /*
+ * If it's now too short, it must be that userspace
+ * has changed the mappings underlying guest memory,
+ * so unregister the region.
+ */
+ kvmppc_unpin_guest_page(kvm, va);
+ va = NULL;
+ }
+ if (vpap->pinned_addr)
+ kvmppc_unpin_guest_page(kvm, vpap->pinned_addr);
+ vpap->pinned_addr = va;
+ if (va)
+ vpap->pinned_end = va + vpap->len;
+}
+
+static void kvmppc_update_vpas(struct kvm_vcpu *vcpu)
+{
+ spin_lock(&vcpu->arch.vpa_update_lock);
+ if (vcpu->arch.vpa.update_pending) {
+ kvmppc_update_vpa(vcpu, &vcpu->arch.vpa);
+ init_vpa(vcpu, vcpu->arch.vpa.pinned_addr);
+ }
+ if (vcpu->arch.dtl.update_pending) {
+ kvmppc_update_vpa(vcpu, &vcpu->arch.dtl);
+ vcpu->arch.dtl_ptr = vcpu->arch.dtl.pinned_addr;
+ vcpu->arch.dtl_index = 0;
+ }
+ if (vcpu->arch.slb_shadow.update_pending)
+ kvmppc_update_vpa(vcpu, &vcpu->arch.slb_shadow);
+ spin_unlock(&vcpu->arch.vpa_update_lock);
+}
+
+static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu,
+ struct kvmppc_vcore *vc)
+{
+ struct dtl_entry *dt;
+ struct lppaca *vpa;
+ unsigned long old_stolen;
+
+ dt = vcpu->arch.dtl_ptr;
+ vpa = vcpu->arch.vpa.pinned_addr;
+ old_stolen = vcpu->arch.stolen_logged;
+ vcpu->arch.stolen_logged = vc->stolen_tb;
+ if (!dt || !vpa)
+ return;
+ memset(dt, 0, sizeof(struct dtl_entry));
+ dt->dispatch_reason = 7;
+ dt->processor_id = vc->pcpu + vcpu->arch.ptid;
+ dt->timebase = mftb();
+ dt->enqueue_to_dispatch_time = vc->stolen_tb - old_stolen;
+ dt->srr0 = kvmppc_get_pc(vcpu);
+ dt->srr1 = vcpu->arch.shregs.msr;
+ ++dt;
+ if (dt == vcpu->arch.dtl.pinned_end)
+ dt = vcpu->arch.dtl.pinned_addr;
+ vcpu->arch.dtl_ptr = dt;
+ /* order writing *dt vs. writing vpa->dtl_idx */
+ smp_wmb();
+ vpa->dtl_idx = ++vcpu->arch.dtl_index;
+}
+
int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
{
unsigned long req = kvmppc_get_gpr(vcpu, 3);
@@ -468,6 +605,7 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
/* default to host PVR, since we can't spoof it */
vcpu->arch.pvr = mfspr(SPRN_PVR);
kvmppc_set_pvr(vcpu, vcpu->arch.pvr);
+ spin_lock_init(&vcpu->arch.vpa_update_lock);
kvmppc_mmu_book3s_hv_init(vcpu);
@@ -486,6 +624,7 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
INIT_LIST_HEAD(&vcore->runnable_threads);
spin_lock_init(&vcore->lock);
init_waitqueue_head(&vcore->wq);
+ vcore->preempt_tb = mftb();
}
kvm->arch.vcores[core] = vcore;
}
@@ -498,6 +637,7 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
++vcore->num_threads;
spin_unlock(&vcore->lock);
vcpu->arch.vcore = vcore;
+ vcpu->arch.stolen_logged = vcore->stolen_tb;
vcpu->arch.cpu_type = KVM_CPU_3S_64;
kvmppc_sanity_check(vcpu);
@@ -512,12 +652,14 @@ out:
void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
{
- if (vcpu->arch.dtl)
- kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.dtl);
- if (vcpu->arch.slb_shadow)
- kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.slb_shadow);
- if (vcpu->arch.vpa)
- kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.vpa);
+ spin_lock(&vcpu->arch.vpa_update_lock);
+ if (vcpu->arch.dtl.pinned_addr)
+ kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.dtl.pinned_addr);
+ if (vcpu->arch.slb_shadow.pinned_addr)
+ kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.slb_shadow.pinned_addr);
+ if (vcpu->arch.vpa.pinned_addr)
+ kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.vpa.pinned_addr);
+ spin_unlock(&vcpu->arch.vpa_update_lock);
kvm_vcpu_uninit(vcpu);
kmem_cache_free(kvm_vcpu_cache, vcpu);
}
@@ -569,6 +711,45 @@ static void kvmppc_remove_runnable(struct kvmppc_vcore *vc,
list_del(&vcpu->arch.run_list);
}
+static int kvmppc_grab_hwthread(int cpu)
+{
+ struct paca_struct *tpaca;
+ long timeout = 1000;
+
+ tpaca = &paca[cpu];
+
+ /* Ensure the thread won't go into the kernel if it wakes */
+ tpaca->kvm_hstate.hwthread_req = 1;
+
+ /*
+ * If the thread is already executing in the kernel (e.g. handling
+ * a stray interrupt), wait for it to get back to nap mode.
+ * The smp_mb() is to ensure that our setting of hwthread_req
+ * is visible before we look at hwthread_state, so if this
+ * races with the code at system_reset_pSeries and the thread
+ * misses our setting of hwthread_req, we are sure to see its
+ * setting of hwthread_state, and vice versa.
+ */
+ smp_mb();
+ while (tpaca->kvm_hstate.hwthread_state == KVM_HWTHREAD_IN_KERNEL) {
+ if (--timeout <= 0) {
+ pr_err("KVM: couldn't grab cpu %d\n", cpu);
+ return -EBUSY;
+ }
+ udelay(1);
+ }
+ return 0;
+}
+
+static void kvmppc_release_hwthread(int cpu)
+{
+ struct paca_struct *tpaca;
+
+ tpaca = &paca[cpu];
+ tpaca->kvm_hstate.hwthread_req = 0;
+ tpaca->kvm_hstate.kvm_vcpu = NULL;
+}
+
static void kvmppc_start_thread(struct kvm_vcpu *vcpu)
{
int cpu;
@@ -588,8 +769,7 @@ static void kvmppc_start_thread(struct kvm_vcpu *vcpu)
smp_wmb();
#if defined(CONFIG_PPC_ICP_NATIVE) && defined(CONFIG_SMP)
if (vcpu->arch.ptid) {
- tpaca->cpu_start = 0x80;
- wmb();
+ kvmppc_grab_hwthread(cpu);
xics_wake_cpu(cpu);
++vc->n_woken;
}
@@ -639,12 +819,39 @@ static int kvmppc_run_core(struct kvmppc_vcore *vc)
struct kvm_vcpu *vcpu, *vcpu0, *vnext;
long ret;
u64 now;
- int ptid;
+ int ptid, i, need_vpa_update;
/* don't start if any threads have a signal pending */
- list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
+ need_vpa_update = 0;
+ list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
if (signal_pending(vcpu->arch.run_task))
return 0;
+ need_vpa_update |= vcpu->arch.vpa.update_pending |
+ vcpu->arch.slb_shadow.update_pending |
+ vcpu->arch.dtl.update_pending;
+ }
+
+ /*
+ * Initialize *vc, in particular vc->vcore_state, so we can
+ * drop the vcore lock if necessary.
+ */
+ vc->n_woken = 0;
+ vc->nap_count = 0;
+ vc->entry_exit_count = 0;
+ vc->vcore_state = VCORE_RUNNING;
+ vc->in_guest = 0;
+ vc->napping_threads = 0;
+
+ /*
+ * Updating any of the vpas requires calling kvmppc_pin_guest_page,
+ * which can't be called with any spinlocks held.
+ */
+ if (need_vpa_update) {
+ spin_unlock(&vc->lock);
+ list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
+ kvmppc_update_vpas(vcpu);
+ spin_lock(&vc->lock);
+ }
/*
* Make sure we are running on thread 0, and that
@@ -677,21 +884,23 @@ static int kvmppc_run_core(struct kvmppc_vcore *vc)
if (vcpu->arch.ceded)
vcpu->arch.ptid = ptid++;
- vc->n_woken = 0;
- vc->nap_count = 0;
- vc->entry_exit_count = 0;
- vc->vcore_state = VCORE_RUNNING;
- vc->in_guest = 0;
+ vc->stolen_tb += mftb() - vc->preempt_tb;
vc->pcpu = smp_processor_id();
- vc->napping_threads = 0;
- list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
+ list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
kvmppc_start_thread(vcpu);
+ kvmppc_create_dtl_entry(vcpu, vc);
+ }
+ /* Grab any remaining hw threads so they can't go into the kernel */
+ for (i = ptid; i < threads_per_core; ++i)
+ kvmppc_grab_hwthread(vc->pcpu + i);
preempt_disable();
spin_unlock(&vc->lock);
kvm_guest_enter();
__kvmppc_vcore_entry(NULL, vcpu0);
+ for (i = 0; i < threads_per_core; ++i)
+ kvmppc_release_hwthread(vc->pcpu + i);
spin_lock(&vc->lock);
/* disable sending of IPIs on virtual external irqs */
@@ -737,6 +946,7 @@ static int kvmppc_run_core(struct kvmppc_vcore *vc)
spin_lock(&vc->lock);
out:
vc->vcore_state = VCORE_INACTIVE;
+ vc->preempt_tb = mftb();
list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads,
arch.run_list) {
if (vcpu->arch.ret != RESUME_GUEST) {
@@ -835,6 +1045,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
spin_lock(&vc->lock);
continue;
}
+ vc->runner = vcpu;
n_ceded = 0;
list_for_each_entry(v, &vc->runnable_threads, arch.run_list)
n_ceded += v->arch.ceded;
@@ -854,6 +1065,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
wake_up(&v->arch.cpu_run);
}
}
+ vc->runner = NULL;
}
if (signal_pending(current)) {
@@ -917,115 +1129,6 @@ int kvmppc_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu)
return r;
}
-static long kvmppc_stt_npages(unsigned long window_size)
-{
- return ALIGN((window_size >> SPAPR_TCE_SHIFT)
- * sizeof(u64), PAGE_SIZE) / PAGE_SIZE;
-}
-
-static void release_spapr_tce_table(struct kvmppc_spapr_tce_table *stt)
-{
- struct kvm *kvm = stt->kvm;
- int i;
-
- mutex_lock(&kvm->lock);
- list_del(&stt->list);
- for (i = 0; i < kvmppc_stt_npages(stt->window_size); i++)
- __free_page(stt->pages[i]);
- kfree(stt);
- mutex_unlock(&kvm->lock);
-
- kvm_put_kvm(kvm);
-}
-
-static int kvm_spapr_tce_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
-{
- struct kvmppc_spapr_tce_table *stt = vma->vm_file->private_data;
- struct page *page;
-
- if (vmf->pgoff >= kvmppc_stt_npages(stt->window_size))
- return VM_FAULT_SIGBUS;
-
- page = stt->pages[vmf->pgoff];
- get_page(page);
- vmf->page = page;
- return 0;
-}
-
-static const struct vm_operations_struct kvm_spapr_tce_vm_ops = {
- .fault = kvm_spapr_tce_fault,
-};
-
-static int kvm_spapr_tce_mmap(struct file *file, struct vm_area_struct *vma)
-{
- vma->vm_ops = &kvm_spapr_tce_vm_ops;
- return 0;
-}
-
-static int kvm_spapr_tce_release(struct inode *inode, struct file *filp)
-{
- struct kvmppc_spapr_tce_table *stt = filp->private_data;
-
- release_spapr_tce_table(stt);
- return 0;
-}
-
-static struct file_operations kvm_spapr_tce_fops = {
- .mmap = kvm_spapr_tce_mmap,
- .release = kvm_spapr_tce_release,
-};
-
-long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
- struct kvm_create_spapr_tce *args)
-{
- struct kvmppc_spapr_tce_table *stt = NULL;
- long npages;
- int ret = -ENOMEM;
- int i;
-
- /* Check this LIOBN hasn't been previously allocated */
- list_for_each_entry(stt, &kvm->arch.spapr_tce_tables, list) {
- if (stt->liobn == args->liobn)
- return -EBUSY;
- }
-
- npages = kvmppc_stt_npages(args->window_size);
-
- stt = kzalloc(sizeof(*stt) + npages* sizeof(struct page *),
- GFP_KERNEL);
- if (!stt)
- goto fail;
-
- stt->liobn = args->liobn;
- stt->window_size = args->window_size;
- stt->kvm = kvm;
-
- for (i = 0; i < npages; i++) {
- stt->pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO);
- if (!stt->pages[i])
- goto fail;
- }
-
- kvm_get_kvm(kvm);
-
- mutex_lock(&kvm->lock);
- list_add(&stt->list, &kvm->arch.spapr_tce_tables);
-
- mutex_unlock(&kvm->lock);
-
- return anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops,
- stt, O_RDWR);
-
-fail:
- if (stt) {
- for (i = 0; i < npages; i++)
- if (stt->pages[i])
- __free_page(stt->pages[i]);
-
- kfree(stt);
- }
- return ret;
-}
/* Work out RMLS (real mode limit selector) field value for a given RMA size.
Assumes POWER7 or PPC970. */
@@ -1108,6 +1211,38 @@ long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret)
return fd;
}
+static void kvmppc_add_seg_page_size(struct kvm_ppc_one_seg_page_size **sps,
+ int linux_psize)
+{
+ struct mmu_psize_def *def = &mmu_psize_defs[linux_psize];
+
+ if (!def->shift)
+ return;
+ (*sps)->page_shift = def->shift;
+ (*sps)->slb_enc = def->sllp;
+ (*sps)->enc[0].page_shift = def->shift;
+ (*sps)->enc[0].pte_enc = def->penc;
+ (*sps)++;
+}
+
+int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm, struct kvm_ppc_smmu_info *info)
+{
+ struct kvm_ppc_one_seg_page_size *sps;
+
+ info->flags = KVM_PPC_PAGE_SIZES_REAL;
+ if (mmu_has_feature(MMU_FTR_1T_SEGMENT))
+ info->flags |= KVM_PPC_1T_SEGMENTS;
+ info->slb_size = mmu_slb_size;
+
+ /* We only support these sizes for now, and no muti-size segments */
+ sps = &info->sps[0];
+ kvmppc_add_seg_page_size(&sps, MMU_PAGE_4K);
+ kvmppc_add_seg_page_size(&sps, MMU_PAGE_64K);
+ kvmppc_add_seg_page_size(&sps, MMU_PAGE_16M);
+
+ return 0;
+}
+
/*
* Get (and clear) the dirty memory log for a memory slot.
*/
@@ -1192,8 +1327,6 @@ static void unpin_slot(struct kvm *kvm, int slot_id)
continue;
pfn = physp[j] >> PAGE_SHIFT;
page = pfn_to_page(pfn);
- if (PageHuge(page))
- page = compound_head(page);
SetPageDirty(page);
put_page(page);
}
@@ -1406,12 +1539,12 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
return EMULATE_FAIL;
}
-int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
+int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
{
return EMULATE_FAIL;
}
-int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
+int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)
{
return EMULATE_FAIL;
}
diff --git a/arch/powerpc/kvm/book3s_hv_interrupts.S b/arch/powerpc/kvm/book3s_hv_interrupts.S
index d3fb4df02c4..84035a528c8 100644
--- a/arch/powerpc/kvm/book3s_hv_interrupts.S
+++ b/arch/powerpc/kvm/book3s_hv_interrupts.S
@@ -68,19 +68,24 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
rotldi r10,r10,16
mtmsrd r10,1
- /* Save host PMU registers and load guest PMU registers */
+ /* Save host PMU registers */
/* R4 is live here (vcpu pointer) but not r3 or r5 */
li r3, 1
sldi r3, r3, 31 /* MMCR0_FC (freeze counters) bit */
mfspr r7, SPRN_MMCR0 /* save MMCR0 */
mtspr SPRN_MMCR0, r3 /* freeze all counters, disable interrupts */
+ mfspr r6, SPRN_MMCRA
+BEGIN_FTR_SECTION
+ /* On P7, clear MMCRA in order to disable SDAR updates */
+ li r5, 0
+ mtspr SPRN_MMCRA, r5
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
isync
ld r3, PACALPPACAPTR(r13) /* is the host using the PMU? */
lbz r5, LPPACA_PMCINUSE(r3)
cmpwi r5, 0
beq 31f /* skip if not */
mfspr r5, SPRN_MMCR1
- mfspr r6, SPRN_MMCRA
std r7, HSTATE_MMCR(r13)
std r5, HSTATE_MMCR + 8(r13)
std r6, HSTATE_MMCR + 16(r13)
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index def880aea63..cec4daddbf3 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -463,6 +463,7 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
/* insert R and C bits from PTE */
rcbits = rev->guest_rpte & (HPTE_R_R|HPTE_R_C);
args[j] |= rcbits << (56 - 5);
+ hp[0] = 0;
continue;
}
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index b70bf22a3ff..a84aafce2a1 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -26,6 +26,7 @@
#include <asm/hvcall.h>
#include <asm/asm-offsets.h>
#include <asm/exception-64s.h>
+#include <asm/kvm_book3s_asm.h>
/*****************************************************************************
* *
@@ -82,6 +83,7 @@ _GLOBAL(kvmppc_hv_entry_trampoline)
#define XICS_XIRR 4
#define XICS_QIRR 0xc
+#define XICS_IPI 2 /* interrupt source # for IPIs */
/*
* We come in here when wakened from nap mode on a secondary hw thread.
@@ -94,26 +96,54 @@ kvm_start_guest:
subi r1,r1,STACK_FRAME_OVERHEAD
ld r2,PACATOC(r13)
- /* were we napping due to cede? */
- lbz r0,HSTATE_NAPPING(r13)
- cmpwi r0,0
- bne kvm_end_cede
+ li r0,KVM_HWTHREAD_IN_KVM
+ stb r0,HSTATE_HWTHREAD_STATE(r13)
- /* get vcpu pointer */
- ld r4, HSTATE_KVM_VCPU(r13)
+ /* NV GPR values from power7_idle() will no longer be valid */
+ li r0,1
+ stb r0,PACA_NAPSTATELOST(r13)
- /* We got here with an IPI; clear it */
- ld r5, HSTATE_XICS_PHYS(r13)
- li r0, 0xff
- li r6, XICS_QIRR
- li r7, XICS_XIRR
- lwzcix r8, r5, r7 /* ack the interrupt */
+ /* get vcpu pointer, NULL if we have no vcpu to run */
+ ld r4,HSTATE_KVM_VCPU(r13)
+ cmpdi cr1,r4,0
+
+ /* Check the wake reason in SRR1 to see why we got here */
+ mfspr r3,SPRN_SRR1
+ rlwinm r3,r3,44-31,0x7 /* extract wake reason field */
+ cmpwi r3,4 /* was it an external interrupt? */
+ bne 27f
+
+ /*
+ * External interrupt - for now assume it is an IPI, since we
+ * should never get any other interrupts sent to offline threads.
+ * Only do this for secondary threads.
+ */
+ beq cr1,25f
+ lwz r3,VCPU_PTID(r4)
+ cmpwi r3,0
+ beq 27f
+25: ld r5,HSTATE_XICS_PHYS(r13)
+ li r0,0xff
+ li r6,XICS_QIRR
+ li r7,XICS_XIRR
+ lwzcix r8,r5,r7 /* get and ack the interrupt */
sync
- stbcix r0, r5, r6 /* clear it */
- stwcix r8, r5, r7 /* EOI it */
+ clrldi. r9,r8,40 /* get interrupt source ID. */
+ beq 27f /* none there? */
+ cmpwi r9,XICS_IPI
+ bne 26f
+ stbcix r0,r5,r6 /* clear IPI */
+26: stwcix r8,r5,r7 /* EOI the interrupt */
- /* NV GPR values from power7_idle() will no longer be valid */
- stb r0, PACA_NAPSTATELOST(r13)
+27: /* XXX should handle hypervisor maintenance interrupts etc. here */
+
+ /* if we have no vcpu to run, go back to sleep */
+ beq cr1,kvm_no_guest
+
+ /* were we napping due to cede? */
+ lbz r0,HSTATE_NAPPING(r13)
+ cmpwi r0,0
+ bne kvm_end_cede
.global kvmppc_hv_entry
kvmppc_hv_entry:
@@ -129,24 +159,15 @@ kvmppc_hv_entry:
mflr r0
std r0, HSTATE_VMHANDLER(r13)
- ld r14, VCPU_GPR(r14)(r4)
- ld r15, VCPU_GPR(r15)(r4)
- ld r16, VCPU_GPR(r16)(r4)
- ld r17, VCPU_GPR(r17)(r4)
- ld r18, VCPU_GPR(r18)(r4)
- ld r19, VCPU_GPR(r19)(r4)
- ld r20, VCPU_GPR(r20)(r4)
- ld r21, VCPU_GPR(r21)(r4)
- ld r22, VCPU_GPR(r22)(r4)
- ld r23, VCPU_GPR(r23)(r4)
- ld r24, VCPU_GPR(r24)(r4)
- ld r25, VCPU_GPR(r25)(r4)
- ld r26, VCPU_GPR(r26)(r4)
- ld r27, VCPU_GPR(r27)(r4)
- ld r28, VCPU_GPR(r28)(r4)
- ld r29, VCPU_GPR(r29)(r4)
- ld r30, VCPU_GPR(r30)(r4)
- ld r31, VCPU_GPR(r31)(r4)
+ /* Set partition DABR */
+ /* Do this before re-enabling PMU to avoid P7 DABR corruption bug */
+ li r5,3
+ ld r6,VCPU_DABR(r4)
+ mtspr SPRN_DABRX,r5
+ mtspr SPRN_DABR,r6
+BEGIN_FTR_SECTION
+ isync
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
/* Load guest PMU registers */
/* R4 is live here (vcpu pointer) */
@@ -185,6 +206,25 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
/* Load up FP, VMX and VSX registers */
bl kvmppc_load_fp
+ ld r14, VCPU_GPR(r14)(r4)
+ ld r15, VCPU_GPR(r15)(r4)
+ ld r16, VCPU_GPR(r16)(r4)
+ ld r17, VCPU_GPR(r17)(r4)
+ ld r18, VCPU_GPR(r18)(r4)
+ ld r19, VCPU_GPR(r19)(r4)
+ ld r20, VCPU_GPR(r20)(r4)
+ ld r21, VCPU_GPR(r21)(r4)
+ ld r22, VCPU_GPR(r22)(r4)
+ ld r23, VCPU_GPR(r23)(r4)
+ ld r24, VCPU_GPR(r24)(r4)
+ ld r25, VCPU_GPR(r25)(r4)
+ ld r26, VCPU_GPR(r26)(r4)
+ ld r27, VCPU_GPR(r27)(r4)
+ ld r28, VCPU_GPR(r28)(r4)
+ ld r29, VCPU_GPR(r29)(r4)
+ ld r30, VCPU_GPR(r30)(r4)
+ ld r31, VCPU_GPR(r31)(r4)
+
BEGIN_FTR_SECTION
/* Switch DSCR to guest value */
ld r5, VCPU_DSCR(r4)
@@ -226,12 +266,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
mtspr SPRN_DAR, r5
mtspr SPRN_DSISR, r6
- /* Set partition DABR */
- li r5,3
- ld r6,VCPU_DABR(r4)
- mtspr SPRN_DABRX,r5
- mtspr SPRN_DABR,r6
-
BEGIN_FTR_SECTION
/* Restore AMR and UAMOR, set AMOR to all 1s */
ld r5,VCPU_AMR(r4)
@@ -925,12 +959,6 @@ BEGIN_FTR_SECTION
mtspr SPRN_AMR,r6
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
- /* Restore host DABR and DABRX */
- ld r5,HSTATE_DABR(r13)
- li r6,7
- mtspr SPRN_DABR,r5
- mtspr SPRN_DABRX,r6
-
/* Switch DSCR back to host value */
BEGIN_FTR_SECTION
mfspr r8, SPRN_DSCR
@@ -969,6 +997,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
std r5, VCPU_SPRG2(r9)
std r6, VCPU_SPRG3(r9)
+ /* save FP state */
+ mr r3, r9
+ bl .kvmppc_save_fp
+
/* Increment yield count if they have a VPA */
ld r8, VCPU_VPA(r9) /* do they have a VPA? */
cmpdi r8, 0
@@ -983,6 +1015,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
sldi r3, r3, 31 /* MMCR0_FC (freeze counters) bit */
mfspr r4, SPRN_MMCR0 /* save MMCR0 */
mtspr SPRN_MMCR0, r3 /* freeze all counters, disable ints */
+ mfspr r6, SPRN_MMCRA
+BEGIN_FTR_SECTION
+ /* On P7, clear MMCRA in order to disable SDAR updates */
+ li r7, 0
+ mtspr SPRN_MMCRA, r7
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
isync
beq 21f /* if no VPA, save PMU stuff anyway */
lbz r7, LPPACA_PMCINUSE(r8)
@@ -991,7 +1029,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
std r3, VCPU_MMCR(r9) /* if not, set saved MMCR0 to FC */
b 22f
21: mfspr r5, SPRN_MMCR1
- mfspr r6, SPRN_MMCRA
std r4, VCPU_MMCR(r9)
std r5, VCPU_MMCR + 8(r9)
std r6, VCPU_MMCR + 16(r9)
@@ -1016,17 +1053,20 @@ BEGIN_FTR_SECTION
stw r11, VCPU_PMC + 28(r9)
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
22:
- /* save FP state */
- mr r3, r9
- bl .kvmppc_save_fp
/* Secondary threads go off to take a nap on POWER7 */
BEGIN_FTR_SECTION
- lwz r0,VCPU_PTID(r3)
+ lwz r0,VCPU_PTID(r9)
cmpwi r0,0
bne secondary_nap
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
+ /* Restore host DABR and DABRX */
+ ld r5,HSTATE_DABR(r13)
+ li r6,7
+ mtspr SPRN_DABR,r5
+ mtspr SPRN_DABRX,r6
+
/*
* Reload DEC. HDEC interrupts were disabled when
* we reloaded the host's LPCR value.
@@ -1363,7 +1403,12 @@ bounce_ext_interrupt:
_GLOBAL(kvmppc_h_set_dabr)
std r4,VCPU_DABR(r3)
- mtspr SPRN_DABR,r4
+ /* Work around P7 bug where DABR can get corrupted on mtspr */
+1: mtspr SPRN_DABR,r4
+ mfspr r5, SPRN_DABR
+ cmpd r4, r5
+ bne 1b
+ isync
li r3,0
blr
@@ -1445,8 +1490,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206)
* Take a nap until a decrementer or external interrupt occurs,
* with PECE1 (wake on decr) and PECE0 (wake on external) set in LPCR
*/
- li r0,0x80
- stb r0,PACAPROCSTART(r13)
+ li r0,1
+ stb r0,HSTATE_HWTHREAD_REQ(r13)
mfspr r5,SPRN_LPCR
ori r5,r5,LPCR_PECE0 | LPCR_PECE1
mtspr SPRN_LPCR,r5
@@ -1463,26 +1508,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206)
kvm_end_cede:
/* Woken by external or decrementer interrupt */
ld r1, HSTATE_HOST_R1(r13)
- ld r2, PACATOC(r13)
- /* If we're a secondary thread and we got here by an IPI, ack it */
- ld r4,HSTATE_KVM_VCPU(r13)
- lwz r3,VCPU_PTID(r4)
- cmpwi r3,0
- beq 27f
- mfspr r3,SPRN_SRR1
- rlwinm r3,r3,44-31,0x7 /* extract wake reason field */
- cmpwi r3,4 /* was it an external interrupt? */
- bne 27f
- ld r5, HSTATE_XICS_PHYS(r13)
- li r0,0xff
- li r6,XICS_QIRR
- li r7,XICS_XIRR
- lwzcix r8,r5,r7 /* ack the interrupt */
- sync
- stbcix r0,r5,r6 /* clear it */
- stwcix r8,r5,r7 /* EOI it */
-27:
/* load up FP state */
bl kvmppc_load_fp
@@ -1580,12 +1606,17 @@ secondary_nap:
stwcx. r3, 0, r4
bne 51b
+kvm_no_guest:
+ li r0, KVM_HWTHREAD_IN_NAP
+ stb r0, HSTATE_HWTHREAD_STATE(r13)
+ li r0, 0
+ std r0, HSTATE_KVM_VCPU(r13)
+
li r3, LPCR_PECE0
mfspr r4, SPRN_LPCR
rlwimi r4, r3, 0, LPCR_PECE0 | LPCR_PECE1
mtspr SPRN_LPCR, r4
isync
- li r0, 0
std r0, HSTATE_SCRATCH0(r13)
ptesync
ld r0, HSTATE_SCRATCH0(r13)
@@ -1599,8 +1630,8 @@ secondary_nap:
* r3 = vcpu pointer
*/
_GLOBAL(kvmppc_save_fp)
- mfmsr r9
- ori r8,r9,MSR_FP
+ mfmsr r5
+ ori r8,r5,MSR_FP
#ifdef CONFIG_ALTIVEC
BEGIN_FTR_SECTION
oris r8,r8,MSR_VEC@h
@@ -1649,7 +1680,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
#endif
mfspr r6,SPRN_VRSAVE
stw r6,VCPU_VRSAVE(r3)
- mtmsrd r9
+ mtmsrd r5
isync
blr
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index 7759053d391..a1baec340f7 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -120,6 +120,7 @@ void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr)
if (msr & MSR_POW) {
if (!vcpu->arch.pending_exceptions) {
kvm_vcpu_block(vcpu);
+ clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
vcpu->stat.halt_wakeup++;
/* Unset POW bit after we woke up */
@@ -144,6 +145,21 @@ void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr)
}
}
+ /*
+ * When switching from 32 to 64-bit, we may have a stale 32-bit
+ * magic page around, we need to flush it. Typically 32-bit magic
+ * page will be instanciated when calling into RTAS. Note: We
+ * assume that such transition only happens while in kernel mode,
+ * ie, we never transition from user 32-bit to kernel 64-bit with
+ * a 32-bit magic page around.
+ */
+ if (vcpu->arch.magic_page_pa &&
+ !(old_msr & MSR_PR) && !(old_msr & MSR_SF) && (msr & MSR_SF)) {
+ /* going from RTAS to normal kernel code */
+ kvmppc_mmu_pte_flush(vcpu, (uint32_t)vcpu->arch.magic_page_pa,
+ ~0xFFFUL);
+ }
+
/* Preload FPU if it's enabled */
if (vcpu->arch.shared->msr & MSR_FP)
kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP);
@@ -251,6 +267,9 @@ static int kvmppc_visible_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
{
ulong mp_pa = vcpu->arch.magic_page_pa;
+ if (!(vcpu->arch.shared->msr & MSR_SF))
+ mp_pa = (uint32_t)mp_pa;
+
if (unlikely(mp_pa) &&
unlikely((mp_pa & KVM_PAM) >> PAGE_SHIFT == gfn)) {
return 1;
@@ -351,6 +370,7 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
/* MMIO */
vcpu->stat.mmio_exits++;
vcpu->arch.paddr_accessed = pte.raddr;
+ vcpu->arch.vaddr_accessed = pte.eaddr;
r = kvmppc_emulate_mmio(run, vcpu);
if ( r == RESUME_HOST_NV )
r = RESUME_HOST;
@@ -528,6 +548,9 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
run->exit_reason = KVM_EXIT_UNKNOWN;
run->ready_for_interrupt_injection = 1;
+ /* We get here with MSR.EE=0, so enable it to be a nice citizen */
+ __hard_irq_enable();
+
trace_kvm_book3s_exit(exit_nr, vcpu);
preempt_enable();
kvm_resched(vcpu);
@@ -617,10 +640,13 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
break;
/* We're good on these - the host merely wanted to get our attention */
case BOOK3S_INTERRUPT_DECREMENTER:
+ case BOOK3S_INTERRUPT_HV_DECREMENTER:
vcpu->stat.dec_exits++;
r = RESUME_GUEST;
break;
case BOOK3S_INTERRUPT_EXTERNAL:
+ case BOOK3S_INTERRUPT_EXTERNAL_LEVEL:
+ case BOOK3S_INTERRUPT_EXTERNAL_HV:
vcpu->stat.ext_intr_exits++;
r = RESUME_GUEST;
break;
@@ -628,6 +654,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
r = RESUME_GUEST;
break;
case BOOK3S_INTERRUPT_PROGRAM:
+ case BOOK3S_INTERRUPT_H_EMUL_ASSIST:
{
enum emulation_result er;
struct kvmppc_book3s_shadow_vcpu *svcpu;
@@ -1131,6 +1158,31 @@ out:
return r;
}
+#ifdef CONFIG_PPC64
+int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm, struct kvm_ppc_smmu_info *info)
+{
+ /* No flags */
+ info->flags = 0;
+
+ /* SLB is always 64 entries */
+ info->slb_size = 64;
+
+ /* Standard 4k base page size segment */
+ info->sps[0].page_shift = 12;
+ info->sps[0].slb_enc = 0;
+ info->sps[0].enc[0].page_shift = 12;
+ info->sps[0].enc[0].pte_enc = 0;
+
+ /* Standard 16M large page size segment */
+ info->sps[1].page_shift = 24;
+ info->sps[1].slb_enc = SLB_VSID_L;
+ info->sps[1].enc[0].page_shift = 24;
+ info->sps[1].enc[0].pte_enc = 0;
+
+ return 0;
+}
+#endif /* CONFIG_PPC64 */
+
int kvmppc_core_prepare_memory_region(struct kvm *kvm,
struct kvm_userspace_memory_region *mem)
{
@@ -1144,11 +1196,18 @@ void kvmppc_core_commit_memory_region(struct kvm *kvm,
int kvmppc_core_init_vm(struct kvm *kvm)
{
+#ifdef CONFIG_PPC64
+ INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables);
+#endif
+
return 0;
}
void kvmppc_core_destroy_vm(struct kvm *kvm)
{
+#ifdef CONFIG_PPC64
+ WARN_ON(!list_empty(&kvm->arch.spapr_tce_tables));
+#endif
}
static int kvmppc_book3s_init(void)
diff --git a/arch/powerpc/kvm/book3s_pr_papr.c b/arch/powerpc/kvm/book3s_pr_papr.c
index b9589324797..3ff9013d6e7 100644
--- a/arch/powerpc/kvm/book3s_pr_papr.c
+++ b/arch/powerpc/kvm/book3s_pr_papr.c
@@ -15,6 +15,8 @@
* published by the Free Software Foundation.
*/
+#include <linux/anon_inodes.h>
+
#include <asm/uaccess.h>
#include <asm/kvm_ppc.h>
#include <asm/kvm_book3s.h>
@@ -98,6 +100,83 @@ static int kvmppc_h_pr_remove(struct kvm_vcpu *vcpu)
return EMULATE_DONE;
}
+/* Request defs for kvmppc_h_pr_bulk_remove() */
+#define H_BULK_REMOVE_TYPE 0xc000000000000000ULL
+#define H_BULK_REMOVE_REQUEST 0x4000000000000000ULL
+#define H_BULK_REMOVE_RESPONSE 0x8000000000000000ULL
+#define H_BULK_REMOVE_END 0xc000000000000000ULL
+#define H_BULK_REMOVE_CODE 0x3000000000000000ULL
+#define H_BULK_REMOVE_SUCCESS 0x0000000000000000ULL
+#define H_BULK_REMOVE_NOT_FOUND 0x1000000000000000ULL
+#define H_BULK_REMOVE_PARM 0x2000000000000000ULL
+#define H_BULK_REMOVE_HW 0x3000000000000000ULL
+#define H_BULK_REMOVE_RC 0x0c00000000000000ULL
+#define H_BULK_REMOVE_FLAGS 0x0300000000000000ULL
+#define H_BULK_REMOVE_ABSOLUTE 0x0000000000000000ULL
+#define H_BULK_REMOVE_ANDCOND 0x0100000000000000ULL
+#define H_BULK_REMOVE_AVPN 0x0200000000000000ULL
+#define H_BULK_REMOVE_PTEX 0x00ffffffffffffffULL
+#define H_BULK_REMOVE_MAX_BATCH 4
+
+static int kvmppc_h_pr_bulk_remove(struct kvm_vcpu *vcpu)
+{
+ int i;
+ int paramnr = 4;
+ int ret = H_SUCCESS;
+
+ for (i = 0; i < H_BULK_REMOVE_MAX_BATCH; i++) {
+ unsigned long tsh = kvmppc_get_gpr(vcpu, paramnr+(2*i));
+ unsigned long tsl = kvmppc_get_gpr(vcpu, paramnr+(2*i)+1);
+ unsigned long pteg, rb, flags;
+ unsigned long pte[2];
+ unsigned long v = 0;
+
+ if ((tsh & H_BULK_REMOVE_TYPE) == H_BULK_REMOVE_END) {
+ break; /* Exit success */
+ } else if ((tsh & H_BULK_REMOVE_TYPE) !=
+ H_BULK_REMOVE_REQUEST) {
+ ret = H_PARAMETER;
+ break; /* Exit fail */
+ }
+
+ tsh &= H_BULK_REMOVE_PTEX | H_BULK_REMOVE_FLAGS;
+ tsh |= H_BULK_REMOVE_RESPONSE;
+
+ if ((tsh & H_BULK_REMOVE_ANDCOND) &&
+ (tsh & H_BULK_REMOVE_AVPN)) {
+ tsh |= H_BULK_REMOVE_PARM;
+ kvmppc_set_gpr(vcpu, paramnr+(2*i), tsh);
+ ret = H_PARAMETER;
+ break; /* Exit fail */
+ }
+
+ pteg = get_pteg_addr(vcpu, tsh & H_BULK_REMOVE_PTEX);
+ copy_from_user(pte, (void __user *)pteg, sizeof(pte));
+
+ /* tsl = AVPN */
+ flags = (tsh & H_BULK_REMOVE_FLAGS) >> 26;
+
+ if ((pte[0] & HPTE_V_VALID) == 0 ||
+ ((flags & H_AVPN) && (pte[0] & ~0x7fUL) != tsl) ||
+ ((flags & H_ANDCOND) && (pte[0] & tsl) != 0)) {
+ tsh |= H_BULK_REMOVE_NOT_FOUND;
+ } else {
+ /* Splat the pteg in (userland) hpt */
+ copy_to_user((void __user *)pteg, &v, sizeof(v));
+
+ rb = compute_tlbie_rb(pte[0], pte[1],
+ tsh & H_BULK_REMOVE_PTEX);
+ vcpu->arch.mmu.tlbie(vcpu, rb, rb & 1 ? true : false);
+ tsh |= H_BULK_REMOVE_SUCCESS;
+ tsh |= (pte[1] & (HPTE_R_C | HPTE_R_R)) << 43;
+ }
+ kvmppc_set_gpr(vcpu, paramnr+(2*i), tsh);
+ }
+ kvmppc_set_gpr(vcpu, 3, ret);
+
+ return EMULATE_DONE;
+}
+
static int kvmppc_h_pr_protect(struct kvm_vcpu *vcpu)
{
unsigned long flags = kvmppc_get_gpr(vcpu, 4);
@@ -134,6 +213,20 @@ static int kvmppc_h_pr_protect(struct kvm_vcpu *vcpu)
return EMULATE_DONE;
}
+static int kvmppc_h_pr_put_tce(struct kvm_vcpu *vcpu)
+{
+ unsigned long liobn = kvmppc_get_gpr(vcpu, 4);
+ unsigned long ioba = kvmppc_get_gpr(vcpu, 5);
+ unsigned long tce = kvmppc_get_gpr(vcpu, 6);
+ long rc;
+
+ rc = kvmppc_h_put_tce(vcpu, liobn, ioba, tce);
+ if (rc == H_TOO_HARD)
+ return EMULATE_FAIL;
+ kvmppc_set_gpr(vcpu, 3, rc);
+ return EMULATE_DONE;
+}
+
int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd)
{
switch (cmd) {
@@ -144,12 +237,12 @@ int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd)
case H_PROTECT:
return kvmppc_h_pr_protect(vcpu);
case H_BULK_REMOVE:
- /* We just flush all PTEs, so user space can
- handle the HPT modifications */
- kvmppc_mmu_pte_flush(vcpu, 0, 0);
- break;
+ return kvmppc_h_pr_bulk_remove(vcpu);
+ case H_PUT_TCE:
+ return kvmppc_h_pr_put_tce(vcpu);
case H_CEDE:
kvm_vcpu_block(vcpu);
+ clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
vcpu->stat.halt_wakeup++;
return EMULATE_DONE;
}
diff --git a/arch/powerpc/kvm/book3s_segment.S b/arch/powerpc/kvm/book3s_segment.S
index 0676ae249b9..798491a268b 100644
--- a/arch/powerpc/kvm/book3s_segment.S
+++ b/arch/powerpc/kvm/book3s_segment.S
@@ -128,24 +128,25 @@ no_dcbz32_on:
/* First clear RI in our current MSR value */
li r0, MSR_RI
andc r6, r6, r0
- MTMSR_EERI(r6)
- mtsrr0 r9
- mtsrr1 r4
PPC_LL r0, SVCPU_R0(r3)
PPC_LL r1, SVCPU_R1(r3)
PPC_LL r2, SVCPU_R2(r3)
- PPC_LL r4, SVCPU_R4(r3)
PPC_LL r5, SVCPU_R5(r3)
- PPC_LL r6, SVCPU_R6(r3)
PPC_LL r7, SVCPU_R7(r3)
PPC_LL r8, SVCPU_R8(r3)
- PPC_LL r9, SVCPU_R9(r3)
PPC_LL r10, SVCPU_R10(r3)
PPC_LL r11, SVCPU_R11(r3)
PPC_LL r12, SVCPU_R12(r3)
PPC_LL r13, SVCPU_R13(r3)
+ MTMSR_EERI(r6)
+ mtsrr0 r9
+ mtsrr1 r4
+
+ PPC_LL r4, SVCPU_R4(r3)
+ PPC_LL r6, SVCPU_R6(r3)
+ PPC_LL r9, SVCPU_R9(r3)
PPC_LL r3, (SVCPU_R3)(r3)
RFI
@@ -197,7 +198,8 @@ kvmppc_interrupt:
/* Save guest PC and MSR */
#ifdef CONFIG_PPC64
BEGIN_FTR_SECTION
- andi. r0,r12,0x2
+ andi. r0, r12, 0x2
+ cmpwi cr1, r0, 0
beq 1f
mfspr r3,SPRN_HSRR0
mfspr r4,SPRN_HSRR1
@@ -250,6 +252,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
beq ld_last_prev_inst
cmpwi r12, BOOK3S_INTERRUPT_ALIGNMENT
beq- ld_last_inst
+#ifdef CONFIG_PPC64
+BEGIN_FTR_SECTION
+ cmpwi r12, BOOK3S_INTERRUPT_H_EMUL_ASSIST
+ beq- ld_last_inst
+END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
+#endif
b no_ld_last_inst
@@ -316,23 +324,17 @@ no_dcbz32_off:
* Having set up SRR0/1 with the address where we want
* to continue with relocation on (potentially in module
* space), we either just go straight there with rfi[d],
- * or we jump to an interrupt handler with bctr if there
- * is an interrupt to be handled first. In the latter
- * case, the rfi[d] at the end of the interrupt handler
- * will get us back to where we want to continue.
+ * or we jump to an interrupt handler if there is an
+ * interrupt to be handled first. In the latter case,
+ * the rfi[d] at the end of the interrupt handler will
+ * get us back to where we want to continue.
*/
- cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL
- beq 1f
- cmpwi r12, BOOK3S_INTERRUPT_DECREMENTER
- beq 1f
- cmpwi r12, BOOK3S_INTERRUPT_PERFMON
-1: mtctr r12
-
/* Register usage at this point:
*
* R1 = host R1
* R2 = host R2
+ * R10 = raw exit handler id
* R12 = exit handler id
* R13 = shadow vcpu (32-bit) or PACA (64-bit)
* SVCPU.* = guest *
@@ -342,12 +344,25 @@ no_dcbz32_off:
PPC_LL r6, HSTATE_HOST_MSR(r13)
PPC_LL r8, HSTATE_VMHANDLER(r13)
- /* Restore host msr -> SRR1 */
+#ifdef CONFIG_PPC64
+BEGIN_FTR_SECTION
+ beq cr1, 1f
+ mtspr SPRN_HSRR1, r6
+ mtspr SPRN_HSRR0, r8
+END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
+#endif
+1: /* Restore host msr -> SRR1 */
mtsrr1 r6
/* Load highmem handler address */
mtsrr0 r8
/* RFI into the highmem handler, or jump to interrupt handler */
- beqctr
+ cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL
+ beqa BOOK3S_INTERRUPT_EXTERNAL
+ cmpwi r12, BOOK3S_INTERRUPT_DECREMENTER
+ beqa BOOK3S_INTERRUPT_DECREMENTER
+ cmpwi r12, BOOK3S_INTERRUPT_PERFMON
+ beqa BOOK3S_INTERRUPT_PERFMON
+
RFI
kvmppc_handler_trampoline_exit_end:
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index ee9e1ee9c85..72f13f4a06e 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -17,6 +17,8 @@
*
* Authors: Hollis Blanchard <hollisb@us.ibm.com>
* Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
+ * Scott Wood <scottwood@freescale.com>
+ * Varun Sethi <varun.sethi@freescale.com>
*/
#include <linux/errno.h>
@@ -30,9 +32,12 @@
#include <asm/cputable.h>
#include <asm/uaccess.h>
#include <asm/kvm_ppc.h>
-#include "timing.h"
#include <asm/cacheflush.h>
+#include <asm/dbell.h>
+#include <asm/hw_irq.h>
+#include <asm/irq.h>
+#include "timing.h"
#include "booke.h"
unsigned long kvmppc_booke_handlers;
@@ -55,6 +60,8 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
{ "dec", VCPU_STAT(dec_exits) },
{ "ext_intr", VCPU_STAT(ext_intr_exits) },
{ "halt_wakeup", VCPU_STAT(halt_wakeup) },
+ { "doorbell", VCPU_STAT(dbell_exits) },
+ { "guest doorbell", VCPU_STAT(gdbell_exits) },
{ NULL }
};
@@ -121,6 +128,10 @@ void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr)
{
u32 old_msr = vcpu->arch.shared->msr;
+#ifdef CONFIG_KVM_BOOKE_HV
+ new_msr |= MSR_GS;
+#endif
+
vcpu->arch.shared->msr = new_msr;
kvmppc_mmu_msr_notify(vcpu, old_msr);
@@ -195,17 +206,87 @@ void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu,
clear_bit(BOOKE_IRQPRIO_EXTERNAL_LEVEL, &vcpu->arch.pending_exceptions);
}
+static void set_guest_srr(struct kvm_vcpu *vcpu, unsigned long srr0, u32 srr1)
+{
+#ifdef CONFIG_KVM_BOOKE_HV
+ mtspr(SPRN_GSRR0, srr0);
+ mtspr(SPRN_GSRR1, srr1);
+#else
+ vcpu->arch.shared->srr0 = srr0;
+ vcpu->arch.shared->srr1 = srr1;
+#endif
+}
+
+static void set_guest_csrr(struct kvm_vcpu *vcpu, unsigned long srr0, u32 srr1)
+{
+ vcpu->arch.csrr0 = srr0;
+ vcpu->arch.csrr1 = srr1;
+}
+
+static void set_guest_dsrr(struct kvm_vcpu *vcpu, unsigned long srr0, u32 srr1)
+{
+ if (cpu_has_feature(CPU_FTR_DEBUG_LVL_EXC)) {
+ vcpu->arch.dsrr0 = srr0;
+ vcpu->arch.dsrr1 = srr1;
+ } else {
+ set_guest_csrr(vcpu, srr0, srr1);
+ }
+}
+
+static void set_guest_mcsrr(struct kvm_vcpu *vcpu, unsigned long srr0, u32 srr1)
+{
+ vcpu->arch.mcsrr0 = srr0;
+ vcpu->arch.mcsrr1 = srr1;
+}
+
+static unsigned long get_guest_dear(struct kvm_vcpu *vcpu)
+{
+#ifdef CONFIG_KVM_BOOKE_HV
+ return mfspr(SPRN_GDEAR);
+#else
+ return vcpu->arch.shared->dar;
+#endif
+}
+
+static void set_guest_dear(struct kvm_vcpu *vcpu, unsigned long dear)
+{
+#ifdef CONFIG_KVM_BOOKE_HV
+ mtspr(SPRN_GDEAR, dear);
+#else
+ vcpu->arch.shared->dar = dear;
+#endif
+}
+
+static unsigned long get_guest_esr(struct kvm_vcpu *vcpu)
+{
+#ifdef CONFIG_KVM_BOOKE_HV
+ return mfspr(SPRN_GESR);
+#else
+ return vcpu->arch.shared->esr;
+#endif
+}
+
+static void set_guest_esr(struct kvm_vcpu *vcpu, u32 esr)
+{
+#ifdef CONFIG_KVM_BOOKE_HV
+ mtspr(SPRN_GESR, esr);
+#else
+ vcpu->arch.shared->esr = esr;
+#endif
+}
+
/* Deliver the interrupt of the corresponding priority, if possible. */
static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
unsigned int priority)
{
int allowed = 0;
- ulong uninitialized_var(msr_mask);
+ ulong msr_mask = 0;
bool update_esr = false, update_dear = false;
ulong crit_raw = vcpu->arch.shared->critical;
ulong crit_r1 = kvmppc_get_gpr(vcpu, 1);
bool crit;
bool keep_irq = false;
+ enum int_class int_class;
/* Truncate crit indicators in 32 bit mode */
if (!(vcpu->arch.shared->msr & MSR_SF)) {
@@ -241,46 +322,85 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
case BOOKE_IRQPRIO_AP_UNAVAIL:
case BOOKE_IRQPRIO_ALIGNMENT:
allowed = 1;
- msr_mask = MSR_CE|MSR_ME|MSR_DE;
+ msr_mask = MSR_CE | MSR_ME | MSR_DE;
+ int_class = INT_CLASS_NONCRIT;
break;
case BOOKE_IRQPRIO_CRITICAL:
- case BOOKE_IRQPRIO_WATCHDOG:
+ case BOOKE_IRQPRIO_DBELL_CRIT:
allowed = vcpu->arch.shared->msr & MSR_CE;
+ allowed = allowed && !crit;
msr_mask = MSR_ME;
+ int_class = INT_CLASS_CRIT;
break;
case BOOKE_IRQPRIO_MACHINE_CHECK:
allowed = vcpu->arch.shared->msr & MSR_ME;
- msr_mask = 0;
+ allowed = allowed && !crit;
+ int_class = INT_CLASS_MC;
break;
case BOOKE_IRQPRIO_DECREMENTER:
case BOOKE_IRQPRIO_FIT:
keep_irq = true;
/* fall through */
case BOOKE_IRQPRIO_EXTERNAL:
+ case BOOKE_IRQPRIO_DBELL:
allowed = vcpu->arch.shared->msr & MSR_EE;
allowed = allowed && !crit;
- msr_mask = MSR_CE|MSR_ME|MSR_DE;
+ msr_mask = MSR_CE | MSR_ME | MSR_DE;
+ int_class = INT_CLASS_NONCRIT;
break;
case BOOKE_IRQPRIO_DEBUG:
allowed = vcpu->arch.shared->msr & MSR_DE;
+ allowed = allowed && !crit;
msr_mask = MSR_ME;
+ int_class = INT_CLASS_CRIT;
break;
}
if (allowed) {
- vcpu->arch.shared->srr0 = vcpu->arch.pc;
- vcpu->arch.shared->srr1 = vcpu->arch.shared->msr;
+ switch (int_class) {
+ case INT_CLASS_NONCRIT:
+ set_guest_srr(vcpu, vcpu->arch.pc,
+ vcpu->arch.shared->msr);
+ break;
+ case INT_CLASS_CRIT:
+ set_guest_csrr(vcpu, vcpu->arch.pc,
+ vcpu->arch.shared->msr);
+ break;
+ case INT_CLASS_DBG:
+ set_guest_dsrr(vcpu, vcpu->arch.pc,
+ vcpu->arch.shared->msr);
+ break;
+ case INT_CLASS_MC:
+ set_guest_mcsrr(vcpu, vcpu->arch.pc,
+ vcpu->arch.shared->msr);
+ break;
+ }
+
vcpu->arch.pc = vcpu->arch.ivpr | vcpu->arch.ivor[priority];
if (update_esr == true)
- vcpu->arch.shared->esr = vcpu->arch.queued_esr;
+ set_guest_esr(vcpu, vcpu->arch.queued_esr);
if (update_dear == true)
- vcpu->arch.shared->dar = vcpu->arch.queued_dear;
+ set_guest_dear(vcpu, vcpu->arch.queued_dear);
kvmppc_set_msr(vcpu, vcpu->arch.shared->msr & msr_mask);
if (!keep_irq)
clear_bit(priority, &vcpu->arch.pending_exceptions);
}
+#ifdef CONFIG_KVM_BOOKE_HV
+ /*
+ * If an interrupt is pending but masked, raise a guest doorbell
+ * so that we are notified when the guest enables the relevant
+ * MSR bit.
+ */
+ if (vcpu->arch.pending_exceptions & BOOKE_IRQMASK_EE)
+ kvmppc_set_pending_interrupt(vcpu, INT_CLASS_NONCRIT);
+ if (vcpu->arch.pending_exceptions & BOOKE_IRQMASK_CE)
+ kvmppc_set_pending_interrupt(vcpu, INT_CLASS_CRIT);
+ if (vcpu->arch.pending_exceptions & BOOKE_IRQPRIO_MACHINE_CHECK)
+ kvmppc_set_pending_interrupt(vcpu, INT_CLASS_MC);
+#endif
+
return allowed;
}
@@ -305,7 +425,7 @@ static void kvmppc_core_check_exceptions(struct kvm_vcpu *vcpu)
}
priority = __ffs(*pending);
- while (priority <= BOOKE_IRQPRIO_MAX) {
+ while (priority < BOOKE_IRQPRIO_MAX) {
if (kvmppc_booke_irqprio_deliver(vcpu, priority))
break;
@@ -319,8 +439,9 @@ static void kvmppc_core_check_exceptions(struct kvm_vcpu *vcpu)
}
/* Check pending exceptions and deliver one, if possible. */
-void kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu)
+int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu)
{
+ int r = 0;
WARN_ON_ONCE(!irqs_disabled());
kvmppc_core_check_exceptions(vcpu);
@@ -328,16 +449,60 @@ void kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu)
if (vcpu->arch.shared->msr & MSR_WE) {
local_irq_enable();
kvm_vcpu_block(vcpu);
+ clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
local_irq_disable();
kvmppc_set_exit_type(vcpu, EMULATED_MTMSRWE_EXITS);
- kvmppc_core_check_exceptions(vcpu);
+ r = 1;
};
+
+ return r;
+}
+
+/*
+ * Common checks before entering the guest world. Call with interrupts
+ * disabled.
+ *
+ * returns !0 if a signal is pending and check_signal is true
+ */
+static int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu)
+{
+ int r = 0;
+
+ WARN_ON_ONCE(!irqs_disabled());
+ while (true) {
+ if (need_resched()) {
+ local_irq_enable();
+ cond_resched();
+ local_irq_disable();
+ continue;
+ }
+
+ if (signal_pending(current)) {
+ r = 1;
+ break;
+ }
+
+ if (kvmppc_core_prepare_to_enter(vcpu)) {
+ /* interrupts got enabled in between, so we
+ are back at square 1 */
+ continue;
+ }
+
+ break;
+ }
+
+ return r;
}
int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
{
int ret;
+#ifdef CONFIG_PPC_FPU
+ unsigned int fpscr;
+ int fpexc_mode;
+ u64 fpr[32];
+#endif
if (!vcpu->arch.sane) {
kvm_run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
@@ -345,17 +510,53 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
}
local_irq_disable();
-
- kvmppc_core_prepare_to_enter(vcpu);
-
- if (signal_pending(current)) {
+ if (kvmppc_prepare_to_enter(vcpu)) {
kvm_run->exit_reason = KVM_EXIT_INTR;
ret = -EINTR;
goto out;
}
kvm_guest_enter();
+
+#ifdef CONFIG_PPC_FPU
+ /* Save userspace FPU state in stack */
+ enable_kernel_fp();
+ memcpy(fpr, current->thread.fpr, sizeof(current->thread.fpr));
+ fpscr = current->thread.fpscr.val;
+ fpexc_mode = current->thread.fpexc_mode;
+
+ /* Restore guest FPU state to thread */
+ memcpy(current->thread.fpr, vcpu->arch.fpr, sizeof(vcpu->arch.fpr));
+ current->thread.fpscr.val = vcpu->arch.fpscr;
+
+ /*
+ * Since we can't trap on MSR_FP in GS-mode, we consider the guest
+ * as always using the FPU. Kernel usage of FP (via
+ * enable_kernel_fp()) in this thread must not occur while
+ * vcpu->fpu_active is set.
+ */
+ vcpu->fpu_active = 1;
+
+ kvmppc_load_guest_fp(vcpu);
+#endif
+
ret = __kvmppc_vcpu_run(kvm_run, vcpu);
+
+#ifdef CONFIG_PPC_FPU
+ kvmppc_save_guest_fp(vcpu);
+
+ vcpu->fpu_active = 0;
+
+ /* Save guest FPU state from thread */
+ memcpy(vcpu->arch.fpr, current->thread.fpr, sizeof(vcpu->arch.fpr));
+ vcpu->arch.fpscr = current->thread.fpscr.val;
+
+ /* Restore userspace FPU state from stack */
+ memcpy(current->thread.fpr, fpr, sizeof(current->thread.fpr));
+ current->thread.fpscr.val = fpscr;
+ current->thread.fpexc_mode = fpexc_mode;
+#endif
+
kvm_guest_exit();
out:
@@ -363,6 +564,84 @@ out:
return ret;
}
+static int emulation_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
+{
+ enum emulation_result er;
+
+ er = kvmppc_emulate_instruction(run, vcpu);
+ switch (er) {
+ case EMULATE_DONE:
+ /* don't overwrite subtypes, just account kvm_stats */
+ kvmppc_account_exit_stat(vcpu, EMULATED_INST_EXITS);
+ /* Future optimization: only reload non-volatiles if
+ * they were actually modified by emulation. */
+ return RESUME_GUEST_NV;
+
+ case EMULATE_DO_DCR:
+ run->exit_reason = KVM_EXIT_DCR;
+ return RESUME_HOST;
+
+ case EMULATE_FAIL:
+ printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n",
+ __func__, vcpu->arch.pc, vcpu->arch.last_inst);
+ /* For debugging, encode the failing instruction and
+ * report it to userspace. */
+ run->hw.hardware_exit_reason = ~0ULL << 32;
+ run->hw.hardware_exit_reason |= vcpu->arch.last_inst;
+ kvmppc_core_queue_program(vcpu, ESR_PIL);
+ return RESUME_HOST;
+
+ default:
+ BUG();
+ }
+}
+
+static void kvmppc_fill_pt_regs(struct pt_regs *regs)
+{
+ ulong r1, ip, msr, lr;
+
+ asm("mr %0, 1" : "=r"(r1));
+ asm("mflr %0" : "=r"(lr));
+ asm("mfmsr %0" : "=r"(msr));
+ asm("bl 1f; 1: mflr %0" : "=r"(ip));
+
+ memset(regs, 0, sizeof(*regs));
+ regs->gpr[1] = r1;
+ regs->nip = ip;
+ regs->msr = msr;
+ regs->link = lr;
+}
+
+static void kvmppc_restart_interrupt(struct kvm_vcpu *vcpu,
+ unsigned int exit_nr)
+{
+ struct pt_regs regs;
+
+ switch (exit_nr) {
+ case BOOKE_INTERRUPT_EXTERNAL:
+ kvmppc_fill_pt_regs(&regs);
+ do_IRQ(&regs);
+ break;
+ case BOOKE_INTERRUPT_DECREMENTER:
+ kvmppc_fill_pt_regs(&regs);
+ timer_interrupt(&regs);
+ break;
+#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_BOOK3E_64)
+ case BOOKE_INTERRUPT_DOORBELL:
+ kvmppc_fill_pt_regs(&regs);
+ doorbell_exception(&regs);
+ break;
+#endif
+ case BOOKE_INTERRUPT_MACHINE_CHECK:
+ /* FIXME */
+ break;
+ case BOOKE_INTERRUPT_PERFORMANCE_MONITOR:
+ kvmppc_fill_pt_regs(&regs);
+ performance_monitor_exception(&regs);
+ break;
+ }
+}
+
/**
* kvmppc_handle_exit
*
@@ -371,12 +650,14 @@ out:
int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
unsigned int exit_nr)
{
- enum emulation_result er;
int r = RESUME_HOST;
/* update before a new last_exit_type is rewritten */
kvmppc_update_timing_stats(vcpu);
+ /* restart interrupts if they were meant for the host */
+ kvmppc_restart_interrupt(vcpu, exit_nr);
+
local_irq_enable();
run->exit_reason = KVM_EXIT_UNKNOWN;
@@ -386,62 +667,74 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
case BOOKE_INTERRUPT_MACHINE_CHECK:
printk("MACHINE CHECK: %lx\n", mfspr(SPRN_MCSR));
kvmppc_dump_vcpu(vcpu);
+ /* For debugging, send invalid exit reason to user space */
+ run->hw.hardware_exit_reason = ~1ULL << 32;
+ run->hw.hardware_exit_reason |= mfspr(SPRN_MCSR);
r = RESUME_HOST;
break;
case BOOKE_INTERRUPT_EXTERNAL:
kvmppc_account_exit(vcpu, EXT_INTR_EXITS);
- if (need_resched())
- cond_resched();
r = RESUME_GUEST;
break;
case BOOKE_INTERRUPT_DECREMENTER:
- /* Since we switched IVPR back to the host's value, the host
- * handled this interrupt the moment we enabled interrupts.
- * Now we just offer it a chance to reschedule the guest. */
kvmppc_account_exit(vcpu, DEC_EXITS);
- if (need_resched())
- cond_resched();
r = RESUME_GUEST;
break;
+ case BOOKE_INTERRUPT_DOORBELL:
+ kvmppc_account_exit(vcpu, DBELL_EXITS);
+ r = RESUME_GUEST;
+ break;
+
+ case BOOKE_INTERRUPT_GUEST_DBELL_CRIT:
+ kvmppc_account_exit(vcpu, GDBELL_EXITS);
+
+ /*
+ * We are here because there is a pending guest interrupt
+ * which could not be delivered as MSR_CE or MSR_ME was not
+ * set. Once we break from here we will retry delivery.
+ */
+ r = RESUME_GUEST;
+ break;
+
+ case BOOKE_INTERRUPT_GUEST_DBELL:
+ kvmppc_account_exit(vcpu, GDBELL_EXITS);
+
+ /*
+ * We are here because there is a pending guest interrupt
+ * which could not be delivered as MSR_EE was not set. Once
+ * we break from here we will retry delivery.
+ */
+ r = RESUME_GUEST;
+ break;
+
+ case BOOKE_INTERRUPT_PERFORMANCE_MONITOR:
+ r = RESUME_GUEST;
+ break;
+
+ case BOOKE_INTERRUPT_HV_PRIV:
+ r = emulation_exit(run, vcpu);
+ break;
+
case BOOKE_INTERRUPT_PROGRAM:
- if (vcpu->arch.shared->msr & MSR_PR) {
- /* Program traps generated by user-level software must be handled
- * by the guest kernel. */
+ if (vcpu->arch.shared->msr & (MSR_PR | MSR_GS)) {
+ /*
+ * Program traps generated by user-level software must
+ * be handled by the guest kernel.
+ *
+ * In GS mode, hypervisor privileged instructions trap
+ * on BOOKE_INTERRUPT_HV_PRIV, not here, so these are
+ * actual program interrupts, handled by the guest.
+ */
kvmppc_core_queue_program(vcpu, vcpu->arch.fault_esr);
r = RESUME_GUEST;
kvmppc_account_exit(vcpu, USR_PR_INST);
break;
}
- er = kvmppc_emulate_instruction(run, vcpu);
- switch (er) {
- case EMULATE_DONE:
- /* don't overwrite subtypes, just account kvm_stats */
- kvmppc_account_exit_stat(vcpu, EMULATED_INST_EXITS);
- /* Future optimization: only reload non-volatiles if
- * they were actually modified by emulation. */
- r = RESUME_GUEST_NV;
- break;
- case EMULATE_DO_DCR:
- run->exit_reason = KVM_EXIT_DCR;
- r = RESUME_HOST;
- break;
- case EMULATE_FAIL:
- /* XXX Deliver Program interrupt to guest. */
- printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n",
- __func__, vcpu->arch.pc, vcpu->arch.last_inst);
- /* For debugging, encode the failing instruction and
- * report it to userspace. */
- run->hw.hardware_exit_reason = ~0ULL << 32;
- run->hw.hardware_exit_reason |= vcpu->arch.last_inst;
- r = RESUME_HOST;
- break;
- default:
- BUG();
- }
+ r = emulation_exit(run, vcpu);
break;
case BOOKE_INTERRUPT_FP_UNAVAIL:
@@ -506,6 +799,21 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
r = RESUME_GUEST;
break;
+#ifdef CONFIG_KVM_BOOKE_HV
+ case BOOKE_INTERRUPT_HV_SYSCALL:
+ if (!(vcpu->arch.shared->msr & MSR_PR)) {
+ kvmppc_set_gpr(vcpu, 3, kvmppc_kvm_pv(vcpu));
+ } else {
+ /*
+ * hcall from guest userspace -- send privileged
+ * instruction program check.
+ */
+ kvmppc_core_queue_program(vcpu, ESR_PPR);
+ }
+
+ r = RESUME_GUEST;
+ break;
+#else
case BOOKE_INTERRUPT_SYSCALL:
if (!(vcpu->arch.shared->msr & MSR_PR) &&
(((u32)kvmppc_get_gpr(vcpu, 0)) == KVM_SC_MAGIC_R0)) {
@@ -519,6 +827,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
kvmppc_account_exit(vcpu, SYSCALL_EXITS);
r = RESUME_GUEST;
break;
+#endif
case BOOKE_INTERRUPT_DTLB_MISS: {
unsigned long eaddr = vcpu->arch.fault_dear;
@@ -526,7 +835,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
gpa_t gpaddr;
gfn_t gfn;
-#ifdef CONFIG_KVM_E500
+#ifdef CONFIG_KVM_E500V2
if (!(vcpu->arch.shared->msr & MSR_PR) &&
(eaddr & PAGE_MASK) == vcpu->arch.magic_page_ea) {
kvmppc_map_magic(vcpu);
@@ -567,6 +876,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
/* Guest has mapped and accessed a page which is not
* actually RAM. */
vcpu->arch.paddr_accessed = gpaddr;
+ vcpu->arch.vaddr_accessed = eaddr;
r = kvmppc_emulate_mmio(run, vcpu);
kvmppc_account_exit(vcpu, MMIO_EXITS);
}
@@ -634,15 +944,13 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
BUG();
}
- local_irq_disable();
-
- kvmppc_core_prepare_to_enter(vcpu);
-
+ /*
+ * To avoid clobbering exit_reason, only check for signals if we
+ * aren't already exiting to userspace for some other reason.
+ */
if (!(r & RESUME_HOST)) {
- /* To avoid clobbering exit_reason, only check for signals if
- * we aren't already exiting to userspace for some other
- * reason. */
- if (signal_pending(current)) {
+ local_irq_disable();
+ if (kvmppc_prepare_to_enter(vcpu)) {
run->exit_reason = KVM_EXIT_INTR;
r = (-EINTR << 2) | RESUME_HOST | (r & RESUME_FLAG_NV);
kvmppc_account_exit(vcpu, SIGNAL_EXITS);
@@ -659,12 +967,15 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
int r;
vcpu->arch.pc = 0;
- vcpu->arch.shared->msr = 0;
- vcpu->arch.shadow_msr = MSR_USER | MSR_DE | MSR_IS | MSR_DS;
vcpu->arch.shared->pir = vcpu->vcpu_id;
kvmppc_set_gpr(vcpu, 1, (16<<20) - 8); /* -8 for the callee-save LR slot */
+ kvmppc_set_msr(vcpu, 0);
+#ifndef CONFIG_KVM_BOOKE_HV
+ vcpu->arch.shadow_msr = MSR_USER | MSR_DE | MSR_IS | MSR_DS;
vcpu->arch.shadow_pid = 1;
+ vcpu->arch.shared->msr = 0;
+#endif
/* Eye-catching numbers so we know if the guest takes an interrupt
* before it's programmed its own IVPR/IVORs. */
@@ -745,8 +1056,8 @@ static void get_sregs_base(struct kvm_vcpu *vcpu,
sregs->u.e.csrr0 = vcpu->arch.csrr0;
sregs->u.e.csrr1 = vcpu->arch.csrr1;
sregs->u.e.mcsr = vcpu->arch.mcsr;
- sregs->u.e.esr = vcpu->arch.shared->esr;
- sregs->u.e.dear = vcpu->arch.shared->dar;
+ sregs->u.e.esr = get_guest_esr(vcpu);
+ sregs->u.e.dear = get_guest_dear(vcpu);
sregs->u.e.tsr = vcpu->arch.tsr;
sregs->u.e.tcr = vcpu->arch.tcr;
sregs->u.e.dec = kvmppc_get_dec(vcpu, tb);
@@ -763,8 +1074,8 @@ static int set_sregs_base(struct kvm_vcpu *vcpu,
vcpu->arch.csrr0 = sregs->u.e.csrr0;
vcpu->arch.csrr1 = sregs->u.e.csrr1;
vcpu->arch.mcsr = sregs->u.e.mcsr;
- vcpu->arch.shared->esr = sregs->u.e.esr;
- vcpu->arch.shared->dar = sregs->u.e.dear;
+ set_guest_esr(vcpu, sregs->u.e.esr);
+ set_guest_dear(vcpu, sregs->u.e.dear);
vcpu->arch.vrsave = sregs->u.e.vrsave;
kvmppc_set_tcr(vcpu, sregs->u.e.tcr);
@@ -932,15 +1243,6 @@ void kvmppc_core_commit_memory_region(struct kvm *kvm,
{
}
-int kvmppc_core_init_vm(struct kvm *kvm)
-{
- return 0;
-}
-
-void kvmppc_core_destroy_vm(struct kvm *kvm)
-{
-}
-
void kvmppc_set_tcr(struct kvm_vcpu *vcpu, u32 new_tcr)
{
vcpu->arch.tcr = new_tcr;
@@ -968,8 +1270,19 @@ void kvmppc_decrementer_func(unsigned long data)
kvmppc_set_tsr_bits(vcpu, TSR_DIS);
}
+void kvmppc_booke_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+{
+ current->thread.kvm_vcpu = vcpu;
+}
+
+void kvmppc_booke_vcpu_put(struct kvm_vcpu *vcpu)
+{
+ current->thread.kvm_vcpu = NULL;
+}
+
int __init kvmppc_booke_init(void)
{
+#ifndef CONFIG_KVM_BOOKE_HV
unsigned long ivor[16];
unsigned long max_ivor = 0;
int i;
@@ -1012,7 +1325,7 @@ int __init kvmppc_booke_init(void)
}
flush_icache_range(kvmppc_booke_handlers,
kvmppc_booke_handlers + max_ivor + kvmppc_handler_len);
-
+#endif /* !BOOKE_HV */
return 0;
}
diff --git a/arch/powerpc/kvm/booke.h b/arch/powerpc/kvm/booke.h
index 2fe202705a3..ba61974c1e2 100644
--- a/arch/powerpc/kvm/booke.h
+++ b/arch/powerpc/kvm/booke.h
@@ -23,6 +23,7 @@
#include <linux/types.h>
#include <linux/kvm_host.h>
#include <asm/kvm_ppc.h>
+#include <asm/switch_to.h>
#include "timing.h"
/* interrupt priortity ordering */
@@ -48,7 +49,20 @@
#define BOOKE_IRQPRIO_PERFORMANCE_MONITOR 19
/* Internal pseudo-irqprio for level triggered externals */
#define BOOKE_IRQPRIO_EXTERNAL_LEVEL 20
-#define BOOKE_IRQPRIO_MAX 20
+#define BOOKE_IRQPRIO_DBELL 21
+#define BOOKE_IRQPRIO_DBELL_CRIT 22
+#define BOOKE_IRQPRIO_MAX 23
+
+#define BOOKE_IRQMASK_EE ((1 << BOOKE_IRQPRIO_EXTERNAL_LEVEL) | \
+ (1 << BOOKE_IRQPRIO_PERFORMANCE_MONITOR) | \
+ (1 << BOOKE_IRQPRIO_DBELL) | \
+ (1 << BOOKE_IRQPRIO_DECREMENTER) | \
+ (1 << BOOKE_IRQPRIO_FIT) | \
+ (1 << BOOKE_IRQPRIO_EXTERNAL))
+
+#define BOOKE_IRQMASK_CE ((1 << BOOKE_IRQPRIO_DBELL_CRIT) | \
+ (1 << BOOKE_IRQPRIO_WATCHDOG) | \
+ (1 << BOOKE_IRQPRIO_CRITICAL))
extern unsigned long kvmppc_booke_handlers;
@@ -61,8 +75,8 @@ void kvmppc_clr_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits);
int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
unsigned int inst, int *advance);
-int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt);
-int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs);
+int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val);
+int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val);
/* low-level asm code to transfer guest state */
void kvmppc_load_guest_spe(struct kvm_vcpu *vcpu);
@@ -71,4 +85,46 @@ void kvmppc_save_guest_spe(struct kvm_vcpu *vcpu);
/* high-level function, manages flags, host state */
void kvmppc_vcpu_disable_spe(struct kvm_vcpu *vcpu);
+void kvmppc_booke_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
+void kvmppc_booke_vcpu_put(struct kvm_vcpu *vcpu);
+
+enum int_class {
+ INT_CLASS_NONCRIT,
+ INT_CLASS_CRIT,
+ INT_CLASS_MC,
+ INT_CLASS_DBG,
+};
+
+void kvmppc_set_pending_interrupt(struct kvm_vcpu *vcpu, enum int_class type);
+
+/*
+ * Load up guest vcpu FP state if it's needed.
+ * It also set the MSR_FP in thread so that host know
+ * we're holding FPU, and then host can help to save
+ * guest vcpu FP state if other threads require to use FPU.
+ * This simulates an FP unavailable fault.
+ *
+ * It requires to be called with preemption disabled.
+ */
+static inline void kvmppc_load_guest_fp(struct kvm_vcpu *vcpu)
+{
+#ifdef CONFIG_PPC_FPU
+ if (vcpu->fpu_active && !(current->thread.regs->msr & MSR_FP)) {
+ load_up_fpu();
+ current->thread.regs->msr |= MSR_FP;
+ }
+#endif
+}
+
+/*
+ * Save guest vcpu FP state into thread.
+ * It requires to be called with preemption disabled.
+ */
+static inline void kvmppc_save_guest_fp(struct kvm_vcpu *vcpu)
+{
+#ifdef CONFIG_PPC_FPU
+ if (vcpu->fpu_active && (current->thread.regs->msr & MSR_FP))
+ giveup_fpu(current);
+#endif
+}
#endif /* __KVM_BOOKE_H__ */
diff --git a/arch/powerpc/kvm/booke_emulate.c b/arch/powerpc/kvm/booke_emulate.c
index 3e652da3653..6c76397f2af 100644
--- a/arch/powerpc/kvm/booke_emulate.c
+++ b/arch/powerpc/kvm/booke_emulate.c
@@ -40,8 +40,8 @@ int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
unsigned int inst, int *advance)
{
int emulated = EMULATE_DONE;
- int rs;
- int rt;
+ int rs = get_rs(inst);
+ int rt = get_rt(inst);
switch (get_op(inst)) {
case 19:
@@ -62,19 +62,16 @@ int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
switch (get_xop(inst)) {
case OP_31_XOP_MFMSR:
- rt = get_rt(inst);
kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->msr);
kvmppc_set_exit_type(vcpu, EMULATED_MFMSR_EXITS);
break;
case OP_31_XOP_MTMSR:
- rs = get_rs(inst);
kvmppc_set_exit_type(vcpu, EMULATED_MTMSR_EXITS);
kvmppc_set_msr(vcpu, kvmppc_get_gpr(vcpu, rs));
break;
case OP_31_XOP_WRTEE:
- rs = get_rs(inst);
vcpu->arch.shared->msr = (vcpu->arch.shared->msr & ~MSR_EE)
| (kvmppc_get_gpr(vcpu, rs) & MSR_EE);
kvmppc_set_exit_type(vcpu, EMULATED_WRTEE_EXITS);
@@ -99,22 +96,32 @@ int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
return emulated;
}
-int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
+/*
+ * NOTE: some of these registers are not emulated on BOOKE_HV (GS-mode).
+ * Their backing store is in real registers, and these functions
+ * will return the wrong result if called for them in another context
+ * (such as debugging).
+ */
+int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
{
int emulated = EMULATE_DONE;
- ulong spr_val = kvmppc_get_gpr(vcpu, rs);
switch (sprn) {
case SPRN_DEAR:
- vcpu->arch.shared->dar = spr_val; break;
+ vcpu->arch.shared->dar = spr_val;
+ break;
case SPRN_ESR:
- vcpu->arch.shared->esr = spr_val; break;
+ vcpu->arch.shared->esr = spr_val;
+ break;
case SPRN_DBCR0:
- vcpu->arch.dbcr0 = spr_val; break;
+ vcpu->arch.dbcr0 = spr_val;
+ break;
case SPRN_DBCR1:
- vcpu->arch.dbcr1 = spr_val; break;
+ vcpu->arch.dbcr1 = spr_val;
+ break;
case SPRN_DBSR:
- vcpu->arch.dbsr &= ~spr_val; break;
+ vcpu->arch.dbsr &= ~spr_val;
+ break;
case SPRN_TSR:
kvmppc_clr_tsr_bits(vcpu, spr_val);
break;
@@ -122,20 +129,29 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
kvmppc_set_tcr(vcpu, spr_val);
break;
- /* Note: SPRG4-7 are user-readable. These values are
- * loaded into the real SPRGs when resuming the
- * guest. */
+ /*
+ * Note: SPRG4-7 are user-readable.
+ * These values are loaded into the real SPRGs when resuming the
+ * guest (PR-mode only).
+ */
case SPRN_SPRG4:
- vcpu->arch.shared->sprg4 = spr_val; break;
+ vcpu->arch.shared->sprg4 = spr_val;
+ break;
case SPRN_SPRG5:
- vcpu->arch.shared->sprg5 = spr_val; break;
+ vcpu->arch.shared->sprg5 = spr_val;
+ break;
case SPRN_SPRG6:
- vcpu->arch.shared->sprg6 = spr_val; break;
+ vcpu->arch.shared->sprg6 = spr_val;
+ break;
case SPRN_SPRG7:
- vcpu->arch.shared->sprg7 = spr_val; break;
+ vcpu->arch.shared->sprg7 = spr_val;
+ break;
case SPRN_IVPR:
vcpu->arch.ivpr = spr_val;
+#ifdef CONFIG_KVM_BOOKE_HV
+ mtspr(SPRN_GIVPR, spr_val);
+#endif
break;
case SPRN_IVOR0:
vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL] = spr_val;
@@ -145,6 +161,9 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
break;
case SPRN_IVOR2:
vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE] = spr_val;
+#ifdef CONFIG_KVM_BOOKE_HV
+ mtspr(SPRN_GIVOR2, spr_val);
+#endif
break;
case SPRN_IVOR3:
vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE] = spr_val;
@@ -163,6 +182,9 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
break;
case SPRN_IVOR8:
vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL] = spr_val;
+#ifdef CONFIG_KVM_BOOKE_HV
+ mtspr(SPRN_GIVOR8, spr_val);
+#endif
break;
case SPRN_IVOR9:
vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL] = spr_val;
@@ -193,75 +215,83 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
return emulated;
}
-int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
+int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)
{
int emulated = EMULATE_DONE;
switch (sprn) {
case SPRN_IVPR:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivpr); break;
+ *spr_val = vcpu->arch.ivpr;
+ break;
case SPRN_DEAR:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->dar); break;
+ *spr_val = vcpu->arch.shared->dar;
+ break;
case SPRN_ESR:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->esr); break;
+ *spr_val = vcpu->arch.shared->esr;
+ break;
case SPRN_DBCR0:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.dbcr0); break;
+ *spr_val = vcpu->arch.dbcr0;
+ break;
case SPRN_DBCR1:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.dbcr1); break;
+ *spr_val = vcpu->arch.dbcr1;
+ break;
case SPRN_DBSR:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.dbsr); break;
+ *spr_val = vcpu->arch.dbsr;
+ break;
case SPRN_TSR:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.tsr); break;
+ *spr_val = vcpu->arch.tsr;
+ break;
case SPRN_TCR:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.tcr); break;
+ *spr_val = vcpu->arch.tcr;
+ break;
case SPRN_IVOR0:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL]);
+ *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL];
break;
case SPRN_IVOR1:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK]);
+ *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK];
break;
case SPRN_IVOR2:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE]);
+ *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE];
break;
case SPRN_IVOR3:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE]);
+ *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE];
break;
case SPRN_IVOR4:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL]);
+ *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL];
break;
case SPRN_IVOR5:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT]);
+ *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT];
break;
case SPRN_IVOR6:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM]);
+ *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM];
break;
case SPRN_IVOR7:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL]);
+ *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL];
break;
case SPRN_IVOR8:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL]);
+ *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL];
break;
case SPRN_IVOR9:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL]);
+ *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL];
break;
case SPRN_IVOR10:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER]);
+ *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER];
break;
case SPRN_IVOR11:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_FIT]);
+ *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_FIT];
break;
case SPRN_IVOR12:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG]);
+ *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG];
break;
case SPRN_IVOR13:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS]);
+ *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS];
break;
case SPRN_IVOR14:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS]);
+ *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS];
break;
case SPRN_IVOR15:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG]);
+ *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG];
break;
default:
diff --git a/arch/powerpc/kvm/booke_interrupts.S b/arch/powerpc/kvm/booke_interrupts.S
index c8c4b878795..8feec2ff392 100644
--- a/arch/powerpc/kvm/booke_interrupts.S
+++ b/arch/powerpc/kvm/booke_interrupts.S
@@ -419,13 +419,13 @@ lightweight_exit:
* written directly to the shared area, so we
* need to reload them here with the guest's values.
*/
- lwz r3, VCPU_SHARED_SPRG4(r5)
+ PPC_LD(r3, VCPU_SHARED_SPRG4, r5)
mtspr SPRN_SPRG4W, r3
- lwz r3, VCPU_SHARED_SPRG5(r5)
+ PPC_LD(r3, VCPU_SHARED_SPRG5, r5)
mtspr SPRN_SPRG5W, r3
- lwz r3, VCPU_SHARED_SPRG6(r5)
+ PPC_LD(r3, VCPU_SHARED_SPRG6, r5)
mtspr SPRN_SPRG6W, r3
- lwz r3, VCPU_SHARED_SPRG7(r5)
+ PPC_LD(r3, VCPU_SHARED_SPRG7, r5)
mtspr SPRN_SPRG7W, r3
#ifdef CONFIG_KVM_EXIT_TIMING
diff --git a/arch/powerpc/kvm/bookehv_interrupts.S b/arch/powerpc/kvm/bookehv_interrupts.S
new file mode 100644
index 00000000000..6048a00515d
--- /dev/null
+++ b/arch/powerpc/kvm/bookehv_interrupts.S
@@ -0,0 +1,597 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * Copyright (C) 2010-2011 Freescale Semiconductor, Inc.
+ *
+ * Author: Varun Sethi <varun.sethi@freescale.com>
+ * Author: Scott Wood <scotwood@freescale.com>
+ *
+ * This file is derived from arch/powerpc/kvm/booke_interrupts.S
+ */
+
+#include <asm/ppc_asm.h>
+#include <asm/kvm_asm.h>
+#include <asm/reg.h>
+#include <asm/mmu-44x.h>
+#include <asm/page.h>
+#include <asm/asm-compat.h>
+#include <asm/asm-offsets.h>
+#include <asm/bitsperlong.h>
+#include <asm/thread_info.h>
+
+#include "../kernel/head_booke.h" /* for THREAD_NORMSAVE() */
+
+#define GET_VCPU(vcpu, thread) \
+ PPC_LL vcpu, THREAD_KVM_VCPU(thread)
+
+#define LONGBYTES (BITS_PER_LONG / 8)
+
+#define VCPU_GPR(n) (VCPU_GPRS + (n * LONGBYTES))
+#define VCPU_GUEST_SPRG(n) (VCPU_GUEST_SPRGS + (n * LONGBYTES))
+
+/* The host stack layout: */
+#define HOST_R1 (0 * LONGBYTES) /* Implied by stwu. */
+#define HOST_CALLEE_LR (1 * LONGBYTES)
+#define HOST_RUN (2 * LONGBYTES) /* struct kvm_run */
+/*
+ * r2 is special: it holds 'current', and it made nonvolatile in the
+ * kernel with the -ffixed-r2 gcc option.
+ */
+#define HOST_R2 (3 * LONGBYTES)
+#define HOST_CR (4 * LONGBYTES)
+#define HOST_NV_GPRS (5 * LONGBYTES)
+#define HOST_NV_GPR(n) (HOST_NV_GPRS + ((n - 14) * LONGBYTES))
+#define HOST_MIN_STACK_SIZE (HOST_NV_GPR(31) + LONGBYTES)
+#define HOST_STACK_SIZE ((HOST_MIN_STACK_SIZE + 15) & ~15) /* Align. */
+#define HOST_STACK_LR (HOST_STACK_SIZE + LONGBYTES) /* In caller stack frame. */
+
+#define NEED_EMU 0x00000001 /* emulation -- save nv regs */
+#define NEED_DEAR 0x00000002 /* save faulting DEAR */
+#define NEED_ESR 0x00000004 /* save faulting ESR */
+
+/*
+ * On entry:
+ * r4 = vcpu, r5 = srr0, r6 = srr1
+ * saved in vcpu: cr, ctr, r3-r13
+ */
+.macro kvm_handler_common intno, srr0, flags
+ /* Restore host stack pointer */
+ PPC_STL r1, VCPU_GPR(r1)(r4)
+ PPC_STL r2, VCPU_GPR(r2)(r4)
+ PPC_LL r1, VCPU_HOST_STACK(r4)
+ PPC_LL r2, HOST_R2(r1)
+
+ mfspr r10, SPRN_PID
+ lwz r8, VCPU_HOST_PID(r4)
+ PPC_LL r11, VCPU_SHARED(r4)
+ PPC_STL r14, VCPU_GPR(r14)(r4) /* We need a non-volatile GPR. */
+ li r14, \intno
+
+ stw r10, VCPU_GUEST_PID(r4)
+ mtspr SPRN_PID, r8
+
+#ifdef CONFIG_KVM_EXIT_TIMING
+ /* save exit time */
+1: mfspr r7, SPRN_TBRU
+ mfspr r8, SPRN_TBRL
+ mfspr r9, SPRN_TBRU
+ cmpw r9, r7
+ stw r8, VCPU_TIMING_EXIT_TBL(r4)
+ bne- 1b
+ stw r9, VCPU_TIMING_EXIT_TBU(r4)
+#endif
+
+ oris r8, r6, MSR_CE@h
+ PPC_STD(r6, VCPU_SHARED_MSR, r11)
+ ori r8, r8, MSR_ME | MSR_RI
+ PPC_STL r5, VCPU_PC(r4)
+
+ /*
+ * Make sure CE/ME/RI are set (if appropriate for exception type)
+ * whether or not the guest had it set. Since mfmsr/mtmsr are
+ * somewhat expensive, skip in the common case where the guest
+ * had all these bits set (and thus they're still set if
+ * appropriate for the exception type).
+ */
+ cmpw r6, r8
+ beq 1f
+ mfmsr r7
+ .if \srr0 != SPRN_MCSRR0 && \srr0 != SPRN_CSRR0
+ oris r7, r7, MSR_CE@h
+ .endif
+ .if \srr0 != SPRN_MCSRR0
+ ori r7, r7, MSR_ME | MSR_RI
+ .endif
+ mtmsr r7
+1:
+
+ .if \flags & NEED_EMU
+ /*
+ * This assumes you have external PID support.
+ * To support a bookehv CPU without external PID, you'll
+ * need to look up the TLB entry and create a temporary mapping.
+ *
+ * FIXME: we don't currently handle if the lwepx faults. PR-mode
+ * booke doesn't handle it either. Since Linux doesn't use
+ * broadcast tlbivax anymore, the only way this should happen is
+ * if the guest maps its memory execute-but-not-read, or if we
+ * somehow take a TLB miss in the middle of this entry code and
+ * evict the relevant entry. On e500mc, all kernel lowmem is
+ * bolted into TLB1 large page mappings, and we don't use
+ * broadcast invalidates, so we should not take a TLB miss here.
+ *
+ * Later we'll need to deal with faults here. Disallowing guest
+ * mappings that are execute-but-not-read could be an option on
+ * e500mc, but not on chips with an LRAT if it is used.
+ */
+
+ mfspr r3, SPRN_EPLC /* will already have correct ELPID and EGS */
+ PPC_STL r15, VCPU_GPR(r15)(r4)
+ PPC_STL r16, VCPU_GPR(r16)(r4)
+ PPC_STL r17, VCPU_GPR(r17)(r4)
+ PPC_STL r18, VCPU_GPR(r18)(r4)
+ PPC_STL r19, VCPU_GPR(r19)(r4)
+ mr r8, r3
+ PPC_STL r20, VCPU_GPR(r20)(r4)
+ rlwimi r8, r6, EPC_EAS_SHIFT - MSR_IR_LG, EPC_EAS
+ PPC_STL r21, VCPU_GPR(r21)(r4)
+ rlwimi r8, r6, EPC_EPR_SHIFT - MSR_PR_LG, EPC_EPR
+ PPC_STL r22, VCPU_GPR(r22)(r4)
+ rlwimi r8, r10, EPC_EPID_SHIFT, EPC_EPID
+ PPC_STL r23, VCPU_GPR(r23)(r4)
+ PPC_STL r24, VCPU_GPR(r24)(r4)
+ PPC_STL r25, VCPU_GPR(r25)(r4)
+ PPC_STL r26, VCPU_GPR(r26)(r4)
+ PPC_STL r27, VCPU_GPR(r27)(r4)
+ PPC_STL r28, VCPU_GPR(r28)(r4)
+ PPC_STL r29, VCPU_GPR(r29)(r4)
+ PPC_STL r30, VCPU_GPR(r30)(r4)
+ PPC_STL r31, VCPU_GPR(r31)(r4)
+ mtspr SPRN_EPLC, r8
+
+ /* disable preemption, so we are sure we hit the fixup handler */
+#ifdef CONFIG_PPC64
+ clrrdi r8,r1,THREAD_SHIFT
+#else
+ rlwinm r8,r1,0,0,31-THREAD_SHIFT /* current thread_info */
+#endif
+ li r7, 1
+ stw r7, TI_PREEMPT(r8)
+
+ isync
+
+ /*
+ * In case the read goes wrong, we catch it and write an invalid value
+ * in LAST_INST instead.
+ */
+1: lwepx r9, 0, r5
+2:
+.section .fixup, "ax"
+3: li r9, KVM_INST_FETCH_FAILED
+ b 2b
+.previous
+.section __ex_table,"a"
+ PPC_LONG_ALIGN
+ PPC_LONG 1b,3b
+.previous
+
+ mtspr SPRN_EPLC, r3
+ li r7, 0
+ stw r7, TI_PREEMPT(r8)
+ stw r9, VCPU_LAST_INST(r4)
+ .endif
+
+ .if \flags & NEED_ESR
+ mfspr r8, SPRN_ESR
+ PPC_STL r8, VCPU_FAULT_ESR(r4)
+ .endif
+
+ .if \flags & NEED_DEAR
+ mfspr r9, SPRN_DEAR
+ PPC_STL r9, VCPU_FAULT_DEAR(r4)
+ .endif
+
+ b kvmppc_resume_host
+.endm
+
+/*
+ * For input register values, see arch/powerpc/include/asm/kvm_booke_hv_asm.h
+ */
+.macro kvm_handler intno srr0, srr1, flags
+_GLOBAL(kvmppc_handler_\intno\()_\srr1)
+ GET_VCPU(r11, r10)
+ PPC_STL r3, VCPU_GPR(r3)(r11)
+ mfspr r3, SPRN_SPRG_RSCRATCH0
+ PPC_STL r4, VCPU_GPR(r4)(r11)
+ PPC_LL r4, THREAD_NORMSAVE(0)(r10)
+ PPC_STL r5, VCPU_GPR(r5)(r11)
+ stw r13, VCPU_CR(r11)
+ mfspr r5, \srr0
+ PPC_STL r3, VCPU_GPR(r10)(r11)
+ PPC_LL r3, THREAD_NORMSAVE(2)(r10)
+ PPC_STL r6, VCPU_GPR(r6)(r11)
+ PPC_STL r4, VCPU_GPR(r11)(r11)
+ mfspr r6, \srr1
+ PPC_STL r7, VCPU_GPR(r7)(r11)
+ PPC_STL r8, VCPU_GPR(r8)(r11)
+ PPC_STL r9, VCPU_GPR(r9)(r11)
+ PPC_STL r3, VCPU_GPR(r13)(r11)
+ mfctr r7
+ PPC_STL r12, VCPU_GPR(r12)(r11)
+ PPC_STL r7, VCPU_CTR(r11)
+ mr r4, r11
+ kvm_handler_common \intno, \srr0, \flags
+.endm
+
+.macro kvm_lvl_handler intno scratch srr0, srr1, flags
+_GLOBAL(kvmppc_handler_\intno\()_\srr1)
+ mfspr r10, SPRN_SPRG_THREAD
+ GET_VCPU(r11, r10)
+ PPC_STL r3, VCPU_GPR(r3)(r11)
+ mfspr r3, \scratch
+ PPC_STL r4, VCPU_GPR(r4)(r11)
+ PPC_LL r4, GPR9(r8)
+ PPC_STL r5, VCPU_GPR(r5)(r11)
+ stw r9, VCPU_CR(r11)
+ mfspr r5, \srr0
+ PPC_STL r3, VCPU_GPR(r8)(r11)
+ PPC_LL r3, GPR10(r8)
+ PPC_STL r6, VCPU_GPR(r6)(r11)
+ PPC_STL r4, VCPU_GPR(r9)(r11)
+ mfspr r6, \srr1
+ PPC_LL r4, GPR11(r8)
+ PPC_STL r7, VCPU_GPR(r7)(r11)
+ PPC_STL r3, VCPU_GPR(r10)(r11)
+ mfctr r7
+ PPC_STL r12, VCPU_GPR(r12)(r11)
+ PPC_STL r13, VCPU_GPR(r13)(r11)
+ PPC_STL r4, VCPU_GPR(r11)(r11)
+ PPC_STL r7, VCPU_CTR(r11)
+ mr r4, r11
+ kvm_handler_common \intno, \srr0, \flags
+.endm
+
+kvm_lvl_handler BOOKE_INTERRUPT_CRITICAL, \
+ SPRN_SPRG_RSCRATCH_CRIT, SPRN_CSRR0, SPRN_CSRR1, 0
+kvm_lvl_handler BOOKE_INTERRUPT_MACHINE_CHECK, \
+ SPRN_SPRG_RSCRATCH_MC, SPRN_MCSRR0, SPRN_MCSRR1, 0
+kvm_handler BOOKE_INTERRUPT_DATA_STORAGE, \
+ SPRN_SRR0, SPRN_SRR1, (NEED_EMU | NEED_DEAR)
+kvm_handler BOOKE_INTERRUPT_INST_STORAGE, SPRN_SRR0, SPRN_SRR1, NEED_ESR
+kvm_handler BOOKE_INTERRUPT_EXTERNAL, SPRN_SRR0, SPRN_SRR1, 0
+kvm_handler BOOKE_INTERRUPT_ALIGNMENT, \
+ SPRN_SRR0, SPRN_SRR1, (NEED_DEAR | NEED_ESR)
+kvm_handler BOOKE_INTERRUPT_PROGRAM, SPRN_SRR0, SPRN_SRR1, NEED_ESR
+kvm_handler BOOKE_INTERRUPT_FP_UNAVAIL, SPRN_SRR0, SPRN_SRR1, 0
+kvm_handler BOOKE_INTERRUPT_SYSCALL, SPRN_SRR0, SPRN_SRR1, 0
+kvm_handler BOOKE_INTERRUPT_AP_UNAVAIL, SPRN_SRR0, SPRN_SRR1, 0
+kvm_handler BOOKE_INTERRUPT_DECREMENTER, SPRN_SRR0, SPRN_SRR1, 0
+kvm_handler BOOKE_INTERRUPT_FIT, SPRN_SRR0, SPRN_SRR1, 0
+kvm_lvl_handler BOOKE_INTERRUPT_WATCHDOG, \
+ SPRN_SPRG_RSCRATCH_CRIT, SPRN_CSRR0, SPRN_CSRR1, 0
+kvm_handler BOOKE_INTERRUPT_DTLB_MISS, \
+ SPRN_SRR0, SPRN_SRR1, (NEED_EMU | NEED_DEAR | NEED_ESR)
+kvm_handler BOOKE_INTERRUPT_ITLB_MISS, SPRN_SRR0, SPRN_SRR1, 0
+kvm_handler BOOKE_INTERRUPT_SPE_UNAVAIL, SPRN_SRR0, SPRN_SRR1, 0
+kvm_handler BOOKE_INTERRUPT_SPE_FP_DATA, SPRN_SRR0, SPRN_SRR1, 0
+kvm_handler BOOKE_INTERRUPT_SPE_FP_ROUND, SPRN_SRR0, SPRN_SRR1, 0
+kvm_handler BOOKE_INTERRUPT_PERFORMANCE_MONITOR, SPRN_SRR0, SPRN_SRR1, 0
+kvm_handler BOOKE_INTERRUPT_DOORBELL, SPRN_SRR0, SPRN_SRR1, 0
+kvm_lvl_handler BOOKE_INTERRUPT_DOORBELL_CRITICAL, \
+ SPRN_SPRG_RSCRATCH_CRIT, SPRN_CSRR0, SPRN_CSRR1, 0
+kvm_handler BOOKE_INTERRUPT_HV_PRIV, SPRN_SRR0, SPRN_SRR1, NEED_EMU
+kvm_handler BOOKE_INTERRUPT_HV_SYSCALL, SPRN_SRR0, SPRN_SRR1, 0
+kvm_handler BOOKE_INTERRUPT_GUEST_DBELL, SPRN_GSRR0, SPRN_GSRR1, 0
+kvm_lvl_handler BOOKE_INTERRUPT_GUEST_DBELL_CRIT, \
+ SPRN_SPRG_RSCRATCH_CRIT, SPRN_CSRR0, SPRN_CSRR1, 0
+kvm_lvl_handler BOOKE_INTERRUPT_DEBUG, \
+ SPRN_SPRG_RSCRATCH_CRIT, SPRN_CSRR0, SPRN_CSRR1, 0
+kvm_lvl_handler BOOKE_INTERRUPT_DEBUG, \
+ SPRN_SPRG_RSCRATCH_DBG, SPRN_DSRR0, SPRN_DSRR1, 0
+
+
+/* Registers:
+ * SPRG_SCRATCH0: guest r10
+ * r4: vcpu pointer
+ * r11: vcpu->arch.shared
+ * r14: KVM exit number
+ */
+_GLOBAL(kvmppc_resume_host)
+ /* Save remaining volatile guest register state to vcpu. */
+ mfspr r3, SPRN_VRSAVE
+ PPC_STL r0, VCPU_GPR(r0)(r4)
+ mflr r5
+ mfspr r6, SPRN_SPRG4
+ PPC_STL r5, VCPU_LR(r4)
+ mfspr r7, SPRN_SPRG5
+ stw r3, VCPU_VRSAVE(r4)
+ PPC_STD(r6, VCPU_SHARED_SPRG4, r11)
+ mfspr r8, SPRN_SPRG6
+ PPC_STD(r7, VCPU_SHARED_SPRG5, r11)
+ mfspr r9, SPRN_SPRG7
+ PPC_STD(r8, VCPU_SHARED_SPRG6, r11)
+ mfxer r3
+ PPC_STD(r9, VCPU_SHARED_SPRG7, r11)
+
+ /* save guest MAS registers and restore host mas4 & mas6 */
+ mfspr r5, SPRN_MAS0
+ PPC_STL r3, VCPU_XER(r4)
+ mfspr r6, SPRN_MAS1
+ stw r5, VCPU_SHARED_MAS0(r11)
+ mfspr r7, SPRN_MAS2
+ stw r6, VCPU_SHARED_MAS1(r11)
+ PPC_STD(r7, VCPU_SHARED_MAS2, r11)
+ mfspr r5, SPRN_MAS3
+ mfspr r6, SPRN_MAS4
+ stw r5, VCPU_SHARED_MAS7_3+4(r11)
+ mfspr r7, SPRN_MAS6
+ stw r6, VCPU_SHARED_MAS4(r11)
+ mfspr r5, SPRN_MAS7
+ lwz r6, VCPU_HOST_MAS4(r4)
+ stw r7, VCPU_SHARED_MAS6(r11)
+ lwz r8, VCPU_HOST_MAS6(r4)
+ mtspr SPRN_MAS4, r6
+ stw r5, VCPU_SHARED_MAS7_3+0(r11)
+ mtspr SPRN_MAS6, r8
+ /* Enable MAS register updates via exception */
+ mfspr r3, SPRN_EPCR
+ rlwinm r3, r3, 0, ~SPRN_EPCR_DMIUH
+ mtspr SPRN_EPCR, r3
+ isync
+
+ /* Switch to kernel stack and jump to handler. */
+ PPC_LL r3, HOST_RUN(r1)
+ mr r5, r14 /* intno */
+ mr r14, r4 /* Save vcpu pointer. */
+ bl kvmppc_handle_exit
+
+ /* Restore vcpu pointer and the nonvolatiles we used. */
+ mr r4, r14
+ PPC_LL r14, VCPU_GPR(r14)(r4)
+
+ andi. r5, r3, RESUME_FLAG_NV
+ beq skip_nv_load
+ PPC_LL r15, VCPU_GPR(r15)(r4)
+ PPC_LL r16, VCPU_GPR(r16)(r4)
+ PPC_LL r17, VCPU_GPR(r17)(r4)
+ PPC_LL r18, VCPU_GPR(r18)(r4)
+ PPC_LL r19, VCPU_GPR(r19)(r4)
+ PPC_LL r20, VCPU_GPR(r20)(r4)
+ PPC_LL r21, VCPU_GPR(r21)(r4)
+ PPC_LL r22, VCPU_GPR(r22)(r4)
+ PPC_LL r23, VCPU_GPR(r23)(r4)
+ PPC_LL r24, VCPU_GPR(r24)(r4)
+ PPC_LL r25, VCPU_GPR(r25)(r4)
+ PPC_LL r26, VCPU_GPR(r26)(r4)
+ PPC_LL r27, VCPU_GPR(r27)(r4)
+ PPC_LL r28, VCPU_GPR(r28)(r4)
+ PPC_LL r29, VCPU_GPR(r29)(r4)
+ PPC_LL r30, VCPU_GPR(r30)(r4)
+ PPC_LL r31, VCPU_GPR(r31)(r4)
+skip_nv_load:
+ /* Should we return to the guest? */
+ andi. r5, r3, RESUME_FLAG_HOST
+ beq lightweight_exit
+
+ srawi r3, r3, 2 /* Shift -ERR back down. */
+
+heavyweight_exit:
+ /* Not returning to guest. */
+ PPC_LL r5, HOST_STACK_LR(r1)
+ lwz r6, HOST_CR(r1)
+
+ /*
+ * We already saved guest volatile register state; now save the
+ * non-volatiles.
+ */
+
+ PPC_STL r15, VCPU_GPR(r15)(r4)
+ PPC_STL r16, VCPU_GPR(r16)(r4)
+ PPC_STL r17, VCPU_GPR(r17)(r4)
+ PPC_STL r18, VCPU_GPR(r18)(r4)
+ PPC_STL r19, VCPU_GPR(r19)(r4)
+ PPC_STL r20, VCPU_GPR(r20)(r4)
+ PPC_STL r21, VCPU_GPR(r21)(r4)
+ PPC_STL r22, VCPU_GPR(r22)(r4)
+ PPC_STL r23, VCPU_GPR(r23)(r4)
+ PPC_STL r24, VCPU_GPR(r24)(r4)
+ PPC_STL r25, VCPU_GPR(r25)(r4)
+ PPC_STL r26, VCPU_GPR(r26)(r4)
+ PPC_STL r27, VCPU_GPR(r27)(r4)
+ PPC_STL r28, VCPU_GPR(r28)(r4)
+ PPC_STL r29, VCPU_GPR(r29)(r4)
+ PPC_STL r30, VCPU_GPR(r30)(r4)
+ PPC_STL r31, VCPU_GPR(r31)(r4)
+
+ /* Load host non-volatile register state from host stack. */
+ PPC_LL r14, HOST_NV_GPR(r14)(r1)
+ PPC_LL r15, HOST_NV_GPR(r15)(r1)
+ PPC_LL r16, HOST_NV_GPR(r16)(r1)
+ PPC_LL r17, HOST_NV_GPR(r17)(r1)
+ PPC_LL r18, HOST_NV_GPR(r18)(r1)
+ PPC_LL r19, HOST_NV_GPR(r19)(r1)
+ PPC_LL r20, HOST_NV_GPR(r20)(r1)
+ PPC_LL r21, HOST_NV_GPR(r21)(r1)
+ PPC_LL r22, HOST_NV_GPR(r22)(r1)
+ PPC_LL r23, HOST_NV_GPR(r23)(r1)
+ PPC_LL r24, HOST_NV_GPR(r24)(r1)
+ PPC_LL r25, HOST_NV_GPR(r25)(r1)
+ PPC_LL r26, HOST_NV_GPR(r26)(r1)
+ PPC_LL r27, HOST_NV_GPR(r27)(r1)
+ PPC_LL r28, HOST_NV_GPR(r28)(r1)
+ PPC_LL r29, HOST_NV_GPR(r29)(r1)
+ PPC_LL r30, HOST_NV_GPR(r30)(r1)
+ PPC_LL r31, HOST_NV_GPR(r31)(r1)
+
+ /* Return to kvm_vcpu_run(). */
+ mtlr r5
+ mtcr r6
+ addi r1, r1, HOST_STACK_SIZE
+ /* r3 still contains the return code from kvmppc_handle_exit(). */
+ blr
+
+/* Registers:
+ * r3: kvm_run pointer
+ * r4: vcpu pointer
+ */
+_GLOBAL(__kvmppc_vcpu_run)
+ stwu r1, -HOST_STACK_SIZE(r1)
+ PPC_STL r1, VCPU_HOST_STACK(r4) /* Save stack pointer to vcpu. */
+
+ /* Save host state to stack. */
+ PPC_STL r3, HOST_RUN(r1)
+ mflr r3
+ mfcr r5
+ PPC_STL r3, HOST_STACK_LR(r1)
+
+ stw r5, HOST_CR(r1)
+
+ /* Save host non-volatile register state to stack. */
+ PPC_STL r14, HOST_NV_GPR(r14)(r1)
+ PPC_STL r15, HOST_NV_GPR(r15)(r1)
+ PPC_STL r16, HOST_NV_GPR(r16)(r1)
+ PPC_STL r17, HOST_NV_GPR(r17)(r1)
+ PPC_STL r18, HOST_NV_GPR(r18)(r1)
+ PPC_STL r19, HOST_NV_GPR(r19)(r1)
+ PPC_STL r20, HOST_NV_GPR(r20)(r1)
+ PPC_STL r21, HOST_NV_GPR(r21)(r1)
+ PPC_STL r22, HOST_NV_GPR(r22)(r1)
+ PPC_STL r23, HOST_NV_GPR(r23)(r1)
+ PPC_STL r24, HOST_NV_GPR(r24)(r1)
+ PPC_STL r25, HOST_NV_GPR(r25)(r1)
+ PPC_STL r26, HOST_NV_GPR(r26)(r1)
+ PPC_STL r27, HOST_NV_GPR(r27)(r1)
+ PPC_STL r28, HOST_NV_GPR(r28)(r1)
+ PPC_STL r29, HOST_NV_GPR(r29)(r1)
+ PPC_STL r30, HOST_NV_GPR(r30)(r1)
+ PPC_STL r31, HOST_NV_GPR(r31)(r1)
+
+ /* Load guest non-volatiles. */
+ PPC_LL r14, VCPU_GPR(r14)(r4)
+ PPC_LL r15, VCPU_GPR(r15)(r4)
+ PPC_LL r16, VCPU_GPR(r16)(r4)
+ PPC_LL r17, VCPU_GPR(r17)(r4)
+ PPC_LL r18, VCPU_GPR(r18)(r4)
+ PPC_LL r19, VCPU_GPR(r19)(r4)
+ PPC_LL r20, VCPU_GPR(r20)(r4)
+ PPC_LL r21, VCPU_GPR(r21)(r4)
+ PPC_LL r22, VCPU_GPR(r22)(r4)
+ PPC_LL r23, VCPU_GPR(r23)(r4)
+ PPC_LL r24, VCPU_GPR(r24)(r4)
+ PPC_LL r25, VCPU_GPR(r25)(r4)
+ PPC_LL r26, VCPU_GPR(r26)(r4)
+ PPC_LL r27, VCPU_GPR(r27)(r4)
+ PPC_LL r28, VCPU_GPR(r28)(r4)
+ PPC_LL r29, VCPU_GPR(r29)(r4)
+ PPC_LL r30, VCPU_GPR(r30)(r4)
+ PPC_LL r31, VCPU_GPR(r31)(r4)
+
+
+lightweight_exit:
+ PPC_STL r2, HOST_R2(r1)
+
+ mfspr r3, SPRN_PID
+ stw r3, VCPU_HOST_PID(r4)
+ lwz r3, VCPU_GUEST_PID(r4)
+ mtspr SPRN_PID, r3
+
+ PPC_LL r11, VCPU_SHARED(r4)
+ /* Disable MAS register updates via exception */
+ mfspr r3, SPRN_EPCR
+ oris r3, r3, SPRN_EPCR_DMIUH@h
+ mtspr SPRN_EPCR, r3
+ isync
+ /* Save host mas4 and mas6 and load guest MAS registers */
+ mfspr r3, SPRN_MAS4
+ stw r3, VCPU_HOST_MAS4(r4)
+ mfspr r3, SPRN_MAS6
+ stw r3, VCPU_HOST_MAS6(r4)
+ lwz r3, VCPU_SHARED_MAS0(r11)
+ lwz r5, VCPU_SHARED_MAS1(r11)
+ PPC_LD(r6, VCPU_SHARED_MAS2, r11)
+ lwz r7, VCPU_SHARED_MAS7_3+4(r11)
+ lwz r8, VCPU_SHARED_MAS4(r11)
+ mtspr SPRN_MAS0, r3
+ mtspr SPRN_MAS1, r5
+ mtspr SPRN_MAS2, r6
+ mtspr SPRN_MAS3, r7
+ mtspr SPRN_MAS4, r8
+ lwz r3, VCPU_SHARED_MAS6(r11)
+ lwz r5, VCPU_SHARED_MAS7_3+0(r11)
+ mtspr SPRN_MAS6, r3
+ mtspr SPRN_MAS7, r5
+
+ /*
+ * Host interrupt handlers may have clobbered these guest-readable
+ * SPRGs, so we need to reload them here with the guest's values.
+ */
+ lwz r3, VCPU_VRSAVE(r4)
+ PPC_LD(r5, VCPU_SHARED_SPRG4, r11)
+ mtspr SPRN_VRSAVE, r3
+ PPC_LD(r6, VCPU_SHARED_SPRG5, r11)
+ mtspr SPRN_SPRG4W, r5
+ PPC_LD(r7, VCPU_SHARED_SPRG6, r11)
+ mtspr SPRN_SPRG5W, r6
+ PPC_LD(r8, VCPU_SHARED_SPRG7, r11)
+ mtspr SPRN_SPRG6W, r7
+ mtspr SPRN_SPRG7W, r8
+
+ /* Load some guest volatiles. */
+ PPC_LL r3, VCPU_LR(r4)
+ PPC_LL r5, VCPU_XER(r4)
+ PPC_LL r6, VCPU_CTR(r4)
+ lwz r7, VCPU_CR(r4)
+ PPC_LL r8, VCPU_PC(r4)
+ PPC_LD(r9, VCPU_SHARED_MSR, r11)
+ PPC_LL r0, VCPU_GPR(r0)(r4)
+ PPC_LL r1, VCPU_GPR(r1)(r4)
+ PPC_LL r2, VCPU_GPR(r2)(r4)
+ PPC_LL r10, VCPU_GPR(r10)(r4)
+ PPC_LL r11, VCPU_GPR(r11)(r4)
+ PPC_LL r12, VCPU_GPR(r12)(r4)
+ PPC_LL r13, VCPU_GPR(r13)(r4)
+ mtlr r3
+ mtxer r5
+ mtctr r6
+ mtsrr0 r8
+ mtsrr1 r9
+
+#ifdef CONFIG_KVM_EXIT_TIMING
+ /* save enter time */
+1:
+ mfspr r6, SPRN_TBRU
+ mfspr r9, SPRN_TBRL
+ mfspr r8, SPRN_TBRU
+ cmpw r8, r6
+ stw r9, VCPU_TIMING_LAST_ENTER_TBL(r4)
+ bne 1b
+ stw r8, VCPU_TIMING_LAST_ENTER_TBU(r4)
+#endif
+
+ /*
+ * Don't execute any instruction which can change CR after
+ * below instruction.
+ */
+ mtcr r7
+
+ /* Finish loading guest volatiles and jump to guest. */
+ PPC_LL r5, VCPU_GPR(r5)(r4)
+ PPC_LL r6, VCPU_GPR(r6)(r4)
+ PPC_LL r7, VCPU_GPR(r7)(r4)
+ PPC_LL r8, VCPU_GPR(r8)(r4)
+ PPC_LL r9, VCPU_GPR(r9)(r4)
+
+ PPC_LL r3, VCPU_GPR(r3)(r4)
+ PPC_LL r4, VCPU_GPR(r4)(r4)
+ rfi
diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c
index ddcd896fa2f..b479ed77c51 100644
--- a/arch/powerpc/kvm/e500.c
+++ b/arch/powerpc/kvm/e500.c
@@ -20,11 +20,282 @@
#include <asm/reg.h>
#include <asm/cputable.h>
#include <asm/tlbflush.h>
-#include <asm/kvm_e500.h>
#include <asm/kvm_ppc.h>
+#include "../mm/mmu_decl.h"
#include "booke.h"
-#include "e500_tlb.h"
+#include "e500.h"
+
+struct id {
+ unsigned long val;
+ struct id **pentry;
+};
+
+#define NUM_TIDS 256
+
+/*
+ * This table provide mappings from:
+ * (guestAS,guestTID,guestPR) --> ID of physical cpu
+ * guestAS [0..1]
+ * guestTID [0..255]
+ * guestPR [0..1]
+ * ID [1..255]
+ * Each vcpu keeps one vcpu_id_table.
+ */
+struct vcpu_id_table {
+ struct id id[2][NUM_TIDS][2];
+};
+
+/*
+ * This table provide reversed mappings of vcpu_id_table:
+ * ID --> address of vcpu_id_table item.
+ * Each physical core has one pcpu_id_table.
+ */
+struct pcpu_id_table {
+ struct id *entry[NUM_TIDS];
+};
+
+static DEFINE_PER_CPU(struct pcpu_id_table, pcpu_sids);
+
+/* This variable keeps last used shadow ID on local core.
+ * The valid range of shadow ID is [1..255] */
+static DEFINE_PER_CPU(unsigned long, pcpu_last_used_sid);
+
+/*
+ * Allocate a free shadow id and setup a valid sid mapping in given entry.
+ * A mapping is only valid when vcpu_id_table and pcpu_id_table are match.
+ *
+ * The caller must have preemption disabled, and keep it that way until
+ * it has finished with the returned shadow id (either written into the
+ * TLB or arch.shadow_pid, or discarded).
+ */
+static inline int local_sid_setup_one(struct id *entry)
+{
+ unsigned long sid;
+ int ret = -1;
+
+ sid = ++(__get_cpu_var(pcpu_last_used_sid));
+ if (sid < NUM_TIDS) {
+ __get_cpu_var(pcpu_sids).entry[sid] = entry;
+ entry->val = sid;
+ entry->pentry = &__get_cpu_var(pcpu_sids).entry[sid];
+ ret = sid;
+ }
+
+ /*
+ * If sid == NUM_TIDS, we've run out of sids. We return -1, and
+ * the caller will invalidate everything and start over.
+ *
+ * sid > NUM_TIDS indicates a race, which we disable preemption to
+ * avoid.
+ */
+ WARN_ON(sid > NUM_TIDS);
+
+ return ret;
+}
+
+/*
+ * Check if given entry contain a valid shadow id mapping.
+ * An ID mapping is considered valid only if
+ * both vcpu and pcpu know this mapping.
+ *
+ * The caller must have preemption disabled, and keep it that way until
+ * it has finished with the returned shadow id (either written into the
+ * TLB or arch.shadow_pid, or discarded).
+ */
+static inline int local_sid_lookup(struct id *entry)
+{
+ if (entry && entry->val != 0 &&
+ __get_cpu_var(pcpu_sids).entry[entry->val] == entry &&
+ entry->pentry == &__get_cpu_var(pcpu_sids).entry[entry->val])
+ return entry->val;
+ return -1;
+}
+
+/* Invalidate all id mappings on local core -- call with preempt disabled */
+static inline void local_sid_destroy_all(void)
+{
+ __get_cpu_var(pcpu_last_used_sid) = 0;
+ memset(&__get_cpu_var(pcpu_sids), 0, sizeof(__get_cpu_var(pcpu_sids)));
+}
+
+static void *kvmppc_e500_id_table_alloc(struct kvmppc_vcpu_e500 *vcpu_e500)
+{
+ vcpu_e500->idt = kzalloc(sizeof(struct vcpu_id_table), GFP_KERNEL);
+ return vcpu_e500->idt;
+}
+
+static void kvmppc_e500_id_table_free(struct kvmppc_vcpu_e500 *vcpu_e500)
+{
+ kfree(vcpu_e500->idt);
+ vcpu_e500->idt = NULL;
+}
+
+/* Map guest pid to shadow.
+ * We use PID to keep shadow of current guest non-zero PID,
+ * and use PID1 to keep shadow of guest zero PID.
+ * So that guest tlbe with TID=0 can be accessed at any time */
+static void kvmppc_e500_recalc_shadow_pid(struct kvmppc_vcpu_e500 *vcpu_e500)
+{
+ preempt_disable();
+ vcpu_e500->vcpu.arch.shadow_pid = kvmppc_e500_get_sid(vcpu_e500,
+ get_cur_as(&vcpu_e500->vcpu),
+ get_cur_pid(&vcpu_e500->vcpu),
+ get_cur_pr(&vcpu_e500->vcpu), 1);
+ vcpu_e500->vcpu.arch.shadow_pid1 = kvmppc_e500_get_sid(vcpu_e500,
+ get_cur_as(&vcpu_e500->vcpu), 0,
+ get_cur_pr(&vcpu_e500->vcpu), 1);
+ preempt_enable();
+}
+
+/* Invalidate all mappings on vcpu */
+static void kvmppc_e500_id_table_reset_all(struct kvmppc_vcpu_e500 *vcpu_e500)
+{
+ memset(vcpu_e500->idt, 0, sizeof(struct vcpu_id_table));
+
+ /* Update shadow pid when mappings are changed */
+ kvmppc_e500_recalc_shadow_pid(vcpu_e500);
+}
+
+/* Invalidate one ID mapping on vcpu */
+static inline void kvmppc_e500_id_table_reset_one(
+ struct kvmppc_vcpu_e500 *vcpu_e500,
+ int as, int pid, int pr)
+{
+ struct vcpu_id_table *idt = vcpu_e500->idt;
+
+ BUG_ON(as >= 2);
+ BUG_ON(pid >= NUM_TIDS);
+ BUG_ON(pr >= 2);
+
+ idt->id[as][pid][pr].val = 0;
+ idt->id[as][pid][pr].pentry = NULL;
+
+ /* Update shadow pid when mappings are changed */
+ kvmppc_e500_recalc_shadow_pid(vcpu_e500);
+}
+
+/*
+ * Map guest (vcpu,AS,ID,PR) to physical core shadow id.
+ * This function first lookup if a valid mapping exists,
+ * if not, then creates a new one.
+ *
+ * The caller must have preemption disabled, and keep it that way until
+ * it has finished with the returned shadow id (either written into the
+ * TLB or arch.shadow_pid, or discarded).
+ */
+unsigned int kvmppc_e500_get_sid(struct kvmppc_vcpu_e500 *vcpu_e500,
+ unsigned int as, unsigned int gid,
+ unsigned int pr, int avoid_recursion)
+{
+ struct vcpu_id_table *idt = vcpu_e500->idt;
+ int sid;
+
+ BUG_ON(as >= 2);
+ BUG_ON(gid >= NUM_TIDS);
+ BUG_ON(pr >= 2);
+
+ sid = local_sid_lookup(&idt->id[as][gid][pr]);
+
+ while (sid <= 0) {
+ /* No mapping yet */
+ sid = local_sid_setup_one(&idt->id[as][gid][pr]);
+ if (sid <= 0) {
+ _tlbil_all();
+ local_sid_destroy_all();
+ }
+
+ /* Update shadow pid when mappings are changed */
+ if (!avoid_recursion)
+ kvmppc_e500_recalc_shadow_pid(vcpu_e500);
+ }
+
+ return sid;
+}
+
+unsigned int kvmppc_e500_get_tlb_stid(struct kvm_vcpu *vcpu,
+ struct kvm_book3e_206_tlb_entry *gtlbe)
+{
+ return kvmppc_e500_get_sid(to_e500(vcpu), get_tlb_ts(gtlbe),
+ get_tlb_tid(gtlbe), get_cur_pr(vcpu), 0);
+}
+
+void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid)
+{
+ struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+
+ if (vcpu->arch.pid != pid) {
+ vcpu_e500->pid[0] = vcpu->arch.pid = pid;
+ kvmppc_e500_recalc_shadow_pid(vcpu_e500);
+ }
+}
+
+/* gtlbe must not be mapped by more than one host tlbe */
+void kvmppc_e500_tlbil_one(struct kvmppc_vcpu_e500 *vcpu_e500,
+ struct kvm_book3e_206_tlb_entry *gtlbe)
+{
+ struct vcpu_id_table *idt = vcpu_e500->idt;
+ unsigned int pr, tid, ts, pid;
+ u32 val, eaddr;
+ unsigned long flags;
+
+ ts = get_tlb_ts(gtlbe);
+ tid = get_tlb_tid(gtlbe);
+
+ preempt_disable();
+
+ /* One guest ID may be mapped to two shadow IDs */
+ for (pr = 0; pr < 2; pr++) {
+ /*
+ * The shadow PID can have a valid mapping on at most one
+ * host CPU. In the common case, it will be valid on this
+ * CPU, in which case we do a local invalidation of the
+ * specific address.
+ *
+ * If the shadow PID is not valid on the current host CPU,
+ * we invalidate the entire shadow PID.
+ */
+ pid = local_sid_lookup(&idt->id[ts][tid][pr]);
+ if (pid <= 0) {
+ kvmppc_e500_id_table_reset_one(vcpu_e500, ts, tid, pr);
+ continue;
+ }
+
+ /*
+ * The guest is invalidating a 4K entry which is in a PID
+ * that has a valid shadow mapping on this host CPU. We
+ * search host TLB to invalidate it's shadow TLB entry,
+ * similar to __tlbil_va except that we need to look in AS1.
+ */
+ val = (pid << MAS6_SPID_SHIFT) | MAS6_SAS;
+ eaddr = get_tlb_eaddr(gtlbe);
+
+ local_irq_save(flags);
+
+ mtspr(SPRN_MAS6, val);
+ asm volatile("tlbsx 0, %[eaddr]" : : [eaddr] "r" (eaddr));
+ val = mfspr(SPRN_MAS1);
+ if (val & MAS1_VALID) {
+ mtspr(SPRN_MAS1, val & ~MAS1_VALID);
+ asm volatile("tlbwe");
+ }
+
+ local_irq_restore(flags);
+ }
+
+ preempt_enable();
+}
+
+void kvmppc_e500_tlbil_all(struct kvmppc_vcpu_e500 *vcpu_e500)
+{
+ kvmppc_e500_id_table_reset_all(vcpu_e500);
+}
+
+void kvmppc_mmu_msr_notify(struct kvm_vcpu *vcpu, u32 old_msr)
+{
+ /* Recalc shadow pid since MSR changes */
+ kvmppc_e500_recalc_shadow_pid(to_e500(vcpu));
+}
void kvmppc_core_load_host_debugstate(struct kvm_vcpu *vcpu)
{
@@ -36,17 +307,20 @@ void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu)
void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
- kvmppc_e500_tlb_load(vcpu, cpu);
+ kvmppc_booke_vcpu_load(vcpu, cpu);
+
+ /* Shadow PID may be expired on local core */
+ kvmppc_e500_recalc_shadow_pid(to_e500(vcpu));
}
void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
{
- kvmppc_e500_tlb_put(vcpu);
-
#ifdef CONFIG_SPE
if (vcpu->arch.shadow_msr & MSR_SPE)
kvmppc_vcpu_disable_spe(vcpu);
#endif
+
+ kvmppc_booke_vcpu_put(vcpu);
}
int kvmppc_core_check_processor_compat(void)
@@ -61,6 +335,23 @@ int kvmppc_core_check_processor_compat(void)
return r;
}
+static void kvmppc_e500_tlb_setup(struct kvmppc_vcpu_e500 *vcpu_e500)
+{
+ struct kvm_book3e_206_tlb_entry *tlbe;
+
+ /* Insert large initial mapping for guest. */
+ tlbe = get_entry(vcpu_e500, 1, 0);
+ tlbe->mas1 = MAS1_VALID | MAS1_TSIZE(BOOK3E_PAGESZ_256M);
+ tlbe->mas2 = 0;
+ tlbe->mas7_3 = E500_TLB_SUPER_PERM_MASK;
+
+ /* 4K map for serial output. Used by kernel wrapper. */
+ tlbe = get_entry(vcpu_e500, 1, 1);
+ tlbe->mas1 = MAS1_VALID | MAS1_TSIZE(BOOK3E_PAGESZ_4K);
+ tlbe->mas2 = (0xe0004500 & 0xFFFFF000) | MAS2_I | MAS2_G;
+ tlbe->mas7_3 = (0xe0004500 & 0xFFFFF000) | E500_TLB_SUPER_PERM_MASK;
+}
+
int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu)
{
struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
@@ -76,32 +367,6 @@ int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu)
return 0;
}
-/* 'linear_address' is actually an encoding of AS|PID|EADDR . */
-int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu,
- struct kvm_translation *tr)
-{
- int index;
- gva_t eaddr;
- u8 pid;
- u8 as;
-
- eaddr = tr->linear_address;
- pid = (tr->linear_address >> 32) & 0xff;
- as = (tr->linear_address >> 40) & 0x1;
-
- index = kvmppc_e500_tlb_search(vcpu, eaddr, pid, as);
- if (index < 0) {
- tr->valid = 0;
- return 0;
- }
-
- tr->physical_address = kvmppc_mmu_xlate(vcpu, index, eaddr);
- /* XXX what does "writeable" and "usermode" even mean? */
- tr->valid = 1;
-
- return 0;
-}
-
void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
{
struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
@@ -115,19 +380,6 @@ void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
sregs->u.e.impl.fsl.hid0 = vcpu_e500->hid0;
sregs->u.e.impl.fsl.mcar = vcpu_e500->mcar;
- sregs->u.e.mas0 = vcpu->arch.shared->mas0;
- sregs->u.e.mas1 = vcpu->arch.shared->mas1;
- sregs->u.e.mas2 = vcpu->arch.shared->mas2;
- sregs->u.e.mas7_3 = vcpu->arch.shared->mas7_3;
- sregs->u.e.mas4 = vcpu->arch.shared->mas4;
- sregs->u.e.mas6 = vcpu->arch.shared->mas6;
-
- sregs->u.e.mmucfg = mfspr(SPRN_MMUCFG);
- sregs->u.e.tlbcfg[0] = vcpu_e500->tlb0cfg;
- sregs->u.e.tlbcfg[1] = vcpu_e500->tlb1cfg;
- sregs->u.e.tlbcfg[2] = 0;
- sregs->u.e.tlbcfg[3] = 0;
-
sregs->u.e.ivor_high[0] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL];
sregs->u.e.ivor_high[1] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_DATA];
sregs->u.e.ivor_high[2] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND];
@@ -135,11 +387,13 @@ void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR];
kvmppc_get_sregs_ivor(vcpu, sregs);
+ kvmppc_get_sregs_e500_tlb(vcpu, sregs);
}
int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
{
struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+ int ret;
if (sregs->u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
vcpu_e500->svr = sregs->u.e.impl.fsl.svr;
@@ -147,14 +401,9 @@ int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
vcpu_e500->mcar = sregs->u.e.impl.fsl.mcar;
}
- if (sregs->u.e.features & KVM_SREGS_E_ARCH206_MMU) {
- vcpu->arch.shared->mas0 = sregs->u.e.mas0;
- vcpu->arch.shared->mas1 = sregs->u.e.mas1;
- vcpu->arch.shared->mas2 = sregs->u.e.mas2;
- vcpu->arch.shared->mas7_3 = sregs->u.e.mas7_3;
- vcpu->arch.shared->mas4 = sregs->u.e.mas4;
- vcpu->arch.shared->mas6 = sregs->u.e.mas6;
- }
+ ret = kvmppc_set_sregs_e500_tlb(vcpu, sregs);
+ if (ret < 0)
+ return ret;
if (!(sregs->u.e.features & KVM_SREGS_E_IVOR))
return 0;
@@ -193,9 +442,12 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
if (err)
goto free_vcpu;
+ if (kvmppc_e500_id_table_alloc(vcpu_e500) == NULL)
+ goto uninit_vcpu;
+
err = kvmppc_e500_tlb_init(vcpu_e500);
if (err)
- goto uninit_vcpu;
+ goto uninit_id;
vcpu->arch.shared = (void*)__get_free_page(GFP_KERNEL|__GFP_ZERO);
if (!vcpu->arch.shared)
@@ -205,6 +457,8 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
uninit_tlb:
kvmppc_e500_tlb_uninit(vcpu_e500);
+uninit_id:
+ kvmppc_e500_id_table_free(vcpu_e500);
uninit_vcpu:
kvm_vcpu_uninit(vcpu);
free_vcpu:
@@ -218,11 +472,21 @@ void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
free_page((unsigned long)vcpu->arch.shared);
- kvm_vcpu_uninit(vcpu);
kvmppc_e500_tlb_uninit(vcpu_e500);
+ kvmppc_e500_id_table_free(vcpu_e500);
+ kvm_vcpu_uninit(vcpu);
kmem_cache_free(kvm_vcpu_cache, vcpu_e500);
}
+int kvmppc_core_init_vm(struct kvm *kvm)
+{
+ return 0;
+}
+
+void kvmppc_core_destroy_vm(struct kvm *kvm)
+{
+}
+
static int __init kvmppc_e500_init(void)
{
int r, i;
diff --git a/arch/powerpc/kvm/e500.h b/arch/powerpc/kvm/e500.h
new file mode 100644
index 00000000000..aa8b81428bf
--- /dev/null
+++ b/arch/powerpc/kvm/e500.h
@@ -0,0 +1,306 @@
+/*
+ * Copyright (C) 2008-2011 Freescale Semiconductor, Inc. All rights reserved.
+ *
+ * Author: Yu Liu <yu.liu@freescale.com>
+ * Scott Wood <scottwood@freescale.com>
+ * Ashish Kalra <ashish.kalra@freescale.com>
+ * Varun Sethi <varun.sethi@freescale.com>
+ *
+ * Description:
+ * This file is based on arch/powerpc/kvm/44x_tlb.h and
+ * arch/powerpc/include/asm/kvm_44x.h by Hollis Blanchard <hollisb@us.ibm.com>,
+ * Copyright IBM Corp. 2007-2008
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef KVM_E500_H
+#define KVM_E500_H
+
+#include <linux/kvm_host.h>
+#include <asm/mmu-book3e.h>
+#include <asm/tlb.h>
+
+#define E500_PID_NUM 3
+#define E500_TLB_NUM 2
+
+#define E500_TLB_VALID 1
+#define E500_TLB_DIRTY 2
+#define E500_TLB_BITMAP 4
+
+struct tlbe_ref {
+ pfn_t pfn;
+ unsigned int flags; /* E500_TLB_* */
+};
+
+struct tlbe_priv {
+ struct tlbe_ref ref; /* TLB0 only -- TLB1 uses tlb_refs */
+};
+
+#ifdef CONFIG_KVM_E500V2
+struct vcpu_id_table;
+#endif
+
+struct kvmppc_e500_tlb_params {
+ int entries, ways, sets;
+};
+
+struct kvmppc_vcpu_e500 {
+ struct kvm_vcpu vcpu;
+
+ /* Unmodified copy of the guest's TLB -- shared with host userspace. */
+ struct kvm_book3e_206_tlb_entry *gtlb_arch;
+
+ /* Starting entry number in gtlb_arch[] */
+ int gtlb_offset[E500_TLB_NUM];
+
+ /* KVM internal information associated with each guest TLB entry */
+ struct tlbe_priv *gtlb_priv[E500_TLB_NUM];
+
+ struct kvmppc_e500_tlb_params gtlb_params[E500_TLB_NUM];
+
+ unsigned int gtlb_nv[E500_TLB_NUM];
+
+ /*
+ * information associated with each host TLB entry --
+ * TLB1 only for now. If/when guest TLB1 entries can be
+ * mapped with host TLB0, this will be used for that too.
+ *
+ * We don't want to use this for guest TLB0 because then we'd
+ * have the overhead of doing the translation again even if
+ * the entry is still in the guest TLB (e.g. we swapped out
+ * and back, and our host TLB entries got evicted).
+ */
+ struct tlbe_ref *tlb_refs[E500_TLB_NUM];
+ unsigned int host_tlb1_nv;
+
+ u32 svr;
+ u32 l1csr0;
+ u32 l1csr1;
+ u32 hid0;
+ u32 hid1;
+ u64 mcar;
+
+ struct page **shared_tlb_pages;
+ int num_shared_tlb_pages;
+
+ u64 *g2h_tlb1_map;
+ unsigned int *h2g_tlb1_rmap;
+
+ /* Minimum and maximum address mapped my TLB1 */
+ unsigned long tlb1_min_eaddr;
+ unsigned long tlb1_max_eaddr;
+
+#ifdef CONFIG_KVM_E500V2
+ u32 pid[E500_PID_NUM];
+
+ /* vcpu id table */
+ struct vcpu_id_table *idt;
+#endif
+};
+
+static inline struct kvmppc_vcpu_e500 *to_e500(struct kvm_vcpu *vcpu)
+{
+ return container_of(vcpu, struct kvmppc_vcpu_e500, vcpu);
+}
+
+
+/* This geometry is the legacy default -- can be overridden by userspace */
+#define KVM_E500_TLB0_WAY_SIZE 128
+#define KVM_E500_TLB0_WAY_NUM 2
+
+#define KVM_E500_TLB0_SIZE (KVM_E500_TLB0_WAY_SIZE * KVM_E500_TLB0_WAY_NUM)
+#define KVM_E500_TLB1_SIZE 16
+
+#define index_of(tlbsel, esel) (((tlbsel) << 16) | ((esel) & 0xFFFF))
+#define tlbsel_of(index) ((index) >> 16)
+#define esel_of(index) ((index) & 0xFFFF)
+
+#define E500_TLB_USER_PERM_MASK (MAS3_UX|MAS3_UR|MAS3_UW)
+#define E500_TLB_SUPER_PERM_MASK (MAS3_SX|MAS3_SR|MAS3_SW)
+#define MAS2_ATTRIB_MASK \
+ (MAS2_X0 | MAS2_X1)
+#define MAS3_ATTRIB_MASK \
+ (MAS3_U0 | MAS3_U1 | MAS3_U2 | MAS3_U3 \
+ | E500_TLB_USER_PERM_MASK | E500_TLB_SUPER_PERM_MASK)
+
+int kvmppc_e500_emul_mt_mmucsr0(struct kvmppc_vcpu_e500 *vcpu_e500,
+ ulong value);
+int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu);
+int kvmppc_e500_emul_tlbre(struct kvm_vcpu *vcpu);
+int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *vcpu, int ra, int rb);
+int kvmppc_e500_emul_tlbilx(struct kvm_vcpu *vcpu, int rt, int ra, int rb);
+int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, int rb);
+int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500);
+void kvmppc_e500_tlb_uninit(struct kvmppc_vcpu_e500 *vcpu_e500);
+
+void kvmppc_get_sregs_e500_tlb(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
+int kvmppc_set_sregs_e500_tlb(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
+
+
+#ifdef CONFIG_KVM_E500V2
+unsigned int kvmppc_e500_get_sid(struct kvmppc_vcpu_e500 *vcpu_e500,
+ unsigned int as, unsigned int gid,
+ unsigned int pr, int avoid_recursion);
+#endif
+
+/* TLB helper functions */
+static inline unsigned int
+get_tlb_size(const struct kvm_book3e_206_tlb_entry *tlbe)
+{
+ return (tlbe->mas1 >> 7) & 0x1f;
+}
+
+static inline gva_t get_tlb_eaddr(const struct kvm_book3e_206_tlb_entry *tlbe)
+{
+ return tlbe->mas2 & 0xfffff000;
+}
+
+static inline u64 get_tlb_bytes(const struct kvm_book3e_206_tlb_entry *tlbe)
+{
+ unsigned int pgsize = get_tlb_size(tlbe);
+ return 1ULL << 10 << pgsize;
+}
+
+static inline gva_t get_tlb_end(const struct kvm_book3e_206_tlb_entry *tlbe)
+{
+ u64 bytes = get_tlb_bytes(tlbe);
+ return get_tlb_eaddr(tlbe) + bytes - 1;
+}
+
+static inline u64 get_tlb_raddr(const struct kvm_book3e_206_tlb_entry *tlbe)
+{
+ return tlbe->mas7_3 & ~0xfffULL;
+}
+
+static inline unsigned int
+get_tlb_tid(const struct kvm_book3e_206_tlb_entry *tlbe)
+{
+ return (tlbe->mas1 >> 16) & 0xff;
+}
+
+static inline unsigned int
+get_tlb_ts(const struct kvm_book3e_206_tlb_entry *tlbe)
+{
+ return (tlbe->mas1 >> 12) & 0x1;
+}
+
+static inline unsigned int
+get_tlb_v(const struct kvm_book3e_206_tlb_entry *tlbe)
+{
+ return (tlbe->mas1 >> 31) & 0x1;
+}
+
+static inline unsigned int
+get_tlb_iprot(const struct kvm_book3e_206_tlb_entry *tlbe)
+{
+ return (tlbe->mas1 >> 30) & 0x1;
+}
+
+static inline unsigned int
+get_tlb_tsize(const struct kvm_book3e_206_tlb_entry *tlbe)
+{
+ return (tlbe->mas1 & MAS1_TSIZE_MASK) >> MAS1_TSIZE_SHIFT;
+}
+
+static inline unsigned int get_cur_pid(struct kvm_vcpu *vcpu)
+{
+ return vcpu->arch.pid & 0xff;
+}
+
+static inline unsigned int get_cur_as(struct kvm_vcpu *vcpu)
+{
+ return !!(vcpu->arch.shared->msr & (MSR_IS | MSR_DS));
+}
+
+static inline unsigned int get_cur_pr(struct kvm_vcpu *vcpu)
+{
+ return !!(vcpu->arch.shared->msr & MSR_PR);
+}
+
+static inline unsigned int get_cur_spid(const struct kvm_vcpu *vcpu)
+{
+ return (vcpu->arch.shared->mas6 >> 16) & 0xff;
+}
+
+static inline unsigned int get_cur_sas(const struct kvm_vcpu *vcpu)
+{
+ return vcpu->arch.shared->mas6 & 0x1;
+}
+
+static inline unsigned int get_tlb_tlbsel(const struct kvm_vcpu *vcpu)
+{
+ /*
+ * Manual says that tlbsel has 2 bits wide.
+ * Since we only have two TLBs, only lower bit is used.
+ */
+ return (vcpu->arch.shared->mas0 >> 28) & 0x1;
+}
+
+static inline unsigned int get_tlb_nv_bit(const struct kvm_vcpu *vcpu)
+{
+ return vcpu->arch.shared->mas0 & 0xfff;
+}
+
+static inline unsigned int get_tlb_esel_bit(const struct kvm_vcpu *vcpu)
+{
+ return (vcpu->arch.shared->mas0 >> 16) & 0xfff;
+}
+
+static inline int tlbe_is_host_safe(const struct kvm_vcpu *vcpu,
+ const struct kvm_book3e_206_tlb_entry *tlbe)
+{
+ gpa_t gpa;
+
+ if (!get_tlb_v(tlbe))
+ return 0;
+
+#ifndef CONFIG_KVM_BOOKE_HV
+ /* Does it match current guest AS? */
+ /* XXX what about IS != DS? */
+ if (get_tlb_ts(tlbe) != !!(vcpu->arch.shared->msr & MSR_IS))
+ return 0;
+#endif
+
+ gpa = get_tlb_raddr(tlbe);
+ if (!gfn_to_memslot(vcpu->kvm, gpa >> PAGE_SHIFT))
+ /* Mapping is not for RAM. */
+ return 0;
+
+ return 1;
+}
+
+static inline struct kvm_book3e_206_tlb_entry *get_entry(
+ struct kvmppc_vcpu_e500 *vcpu_e500, int tlbsel, int entry)
+{
+ int offset = vcpu_e500->gtlb_offset[tlbsel];
+ return &vcpu_e500->gtlb_arch[offset + entry];
+}
+
+void kvmppc_e500_tlbil_one(struct kvmppc_vcpu_e500 *vcpu_e500,
+ struct kvm_book3e_206_tlb_entry *gtlbe);
+void kvmppc_e500_tlbil_all(struct kvmppc_vcpu_e500 *vcpu_e500);
+
+#ifdef CONFIG_KVM_BOOKE_HV
+#define kvmppc_e500_get_tlb_stid(vcpu, gtlbe) get_tlb_tid(gtlbe)
+#define get_tlbmiss_tid(vcpu) get_cur_pid(vcpu)
+#define get_tlb_sts(gtlbe) (gtlbe->mas1 & MAS1_TS)
+#else
+unsigned int kvmppc_e500_get_tlb_stid(struct kvm_vcpu *vcpu,
+ struct kvm_book3e_206_tlb_entry *gtlbe);
+
+static inline unsigned int get_tlbmiss_tid(struct kvm_vcpu *vcpu)
+{
+ struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+ unsigned int tidseld = (vcpu->arch.shared->mas4 >> 16) & 0xf;
+
+ return vcpu_e500->pid[tidseld];
+}
+
+/* Force TS=1 for all guest mappings. */
+#define get_tlb_sts(gtlbe) (MAS1_TS)
+#endif /* !BOOKE_HV */
+
+#endif /* KVM_E500_H */
diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c
index 6d0b2bd54fb..8b99e076dc8 100644
--- a/arch/powerpc/kvm/e500_emulate.c
+++ b/arch/powerpc/kvm/e500_emulate.c
@@ -14,27 +14,96 @@
#include <asm/kvm_ppc.h>
#include <asm/disassemble.h>
-#include <asm/kvm_e500.h>
+#include <asm/dbell.h>
#include "booke.h"
-#include "e500_tlb.h"
+#include "e500.h"
+#define XOP_MSGSND 206
+#define XOP_MSGCLR 238
#define XOP_TLBIVAX 786
#define XOP_TLBSX 914
#define XOP_TLBRE 946
#define XOP_TLBWE 978
+#define XOP_TLBILX 18
+
+#ifdef CONFIG_KVM_E500MC
+static int dbell2prio(ulong param)
+{
+ int msg = param & PPC_DBELL_TYPE_MASK;
+ int prio = -1;
+
+ switch (msg) {
+ case PPC_DBELL_TYPE(PPC_DBELL):
+ prio = BOOKE_IRQPRIO_DBELL;
+ break;
+ case PPC_DBELL_TYPE(PPC_DBELL_CRIT):
+ prio = BOOKE_IRQPRIO_DBELL_CRIT;
+ break;
+ default:
+ break;
+ }
+
+ return prio;
+}
+
+static int kvmppc_e500_emul_msgclr(struct kvm_vcpu *vcpu, int rb)
+{
+ ulong param = vcpu->arch.gpr[rb];
+ int prio = dbell2prio(param);
+
+ if (prio < 0)
+ return EMULATE_FAIL;
+
+ clear_bit(prio, &vcpu->arch.pending_exceptions);
+ return EMULATE_DONE;
+}
+
+static int kvmppc_e500_emul_msgsnd(struct kvm_vcpu *vcpu, int rb)
+{
+ ulong param = vcpu->arch.gpr[rb];
+ int prio = dbell2prio(rb);
+ int pir = param & PPC_DBELL_PIR_MASK;
+ int i;
+ struct kvm_vcpu *cvcpu;
+
+ if (prio < 0)
+ return EMULATE_FAIL;
+
+ kvm_for_each_vcpu(i, cvcpu, vcpu->kvm) {
+ int cpir = cvcpu->arch.shared->pir;
+ if ((param & PPC_DBELL_MSG_BRDCAST) || (cpir == pir)) {
+ set_bit(prio, &cvcpu->arch.pending_exceptions);
+ kvm_vcpu_kick(cvcpu);
+ }
+ }
+
+ return EMULATE_DONE;
+}
+#endif
int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
unsigned int inst, int *advance)
{
int emulated = EMULATE_DONE;
- int ra;
- int rb;
+ int ra = get_ra(inst);
+ int rb = get_rb(inst);
+ int rt = get_rt(inst);
switch (get_op(inst)) {
case 31:
switch (get_xop(inst)) {
+#ifdef CONFIG_KVM_E500MC
+ case XOP_MSGSND:
+ emulated = kvmppc_e500_emul_msgsnd(vcpu, rb);
+ break;
+
+ case XOP_MSGCLR:
+ emulated = kvmppc_e500_emul_msgclr(vcpu, rb);
+ break;
+#endif
+
case XOP_TLBRE:
emulated = kvmppc_e500_emul_tlbre(vcpu);
break;
@@ -44,13 +113,14 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
break;
case XOP_TLBSX:
- rb = get_rb(inst);
emulated = kvmppc_e500_emul_tlbsx(vcpu,rb);
break;
+ case XOP_TLBILX:
+ emulated = kvmppc_e500_emul_tlbilx(vcpu, rt, ra, rb);
+ break;
+
case XOP_TLBIVAX:
- ra = get_ra(inst);
- rb = get_rb(inst);
emulated = kvmppc_e500_emul_tlbivax(vcpu, ra, rb);
break;
@@ -70,52 +140,63 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
return emulated;
}
-int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
+int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
{
struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
int emulated = EMULATE_DONE;
- ulong spr_val = kvmppc_get_gpr(vcpu, rs);
switch (sprn) {
+#ifndef CONFIG_KVM_BOOKE_HV
case SPRN_PID:
kvmppc_set_pid(vcpu, spr_val);
break;
case SPRN_PID1:
if (spr_val != 0)
return EMULATE_FAIL;
- vcpu_e500->pid[1] = spr_val; break;
+ vcpu_e500->pid[1] = spr_val;
+ break;
case SPRN_PID2:
if (spr_val != 0)
return EMULATE_FAIL;
- vcpu_e500->pid[2] = spr_val; break;
+ vcpu_e500->pid[2] = spr_val;
+ break;
case SPRN_MAS0:
- vcpu->arch.shared->mas0 = spr_val; break;
+ vcpu->arch.shared->mas0 = spr_val;
+ break;
case SPRN_MAS1:
- vcpu->arch.shared->mas1 = spr_val; break;
+ vcpu->arch.shared->mas1 = spr_val;
+ break;
case SPRN_MAS2:
- vcpu->arch.shared->mas2 = spr_val; break;
+ vcpu->arch.shared->mas2 = spr_val;
+ break;
case SPRN_MAS3:
vcpu->arch.shared->mas7_3 &= ~(u64)0xffffffff;
vcpu->arch.shared->mas7_3 |= spr_val;
break;
case SPRN_MAS4:
- vcpu->arch.shared->mas4 = spr_val; break;
+ vcpu->arch.shared->mas4 = spr_val;
+ break;
case SPRN_MAS6:
- vcpu->arch.shared->mas6 = spr_val; break;
+ vcpu->arch.shared->mas6 = spr_val;
+ break;
case SPRN_MAS7:
vcpu->arch.shared->mas7_3 &= (u64)0xffffffff;
vcpu->arch.shared->mas7_3 |= (u64)spr_val << 32;
break;
+#endif
case SPRN_L1CSR0:
vcpu_e500->l1csr0 = spr_val;
vcpu_e500->l1csr0 &= ~(L1CSR0_DCFI | L1CSR0_CLFC);
break;
case SPRN_L1CSR1:
- vcpu_e500->l1csr1 = spr_val; break;
+ vcpu_e500->l1csr1 = spr_val;
+ break;
case SPRN_HID0:
- vcpu_e500->hid0 = spr_val; break;
+ vcpu_e500->hid0 = spr_val;
+ break;
case SPRN_HID1:
- vcpu_e500->hid1 = spr_val; break;
+ vcpu_e500->hid1 = spr_val;
+ break;
case SPRN_MMUCSR0:
emulated = kvmppc_e500_emul_mt_mmucsr0(vcpu_e500,
@@ -135,81 +216,112 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
case SPRN_IVOR35:
vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR] = spr_val;
break;
-
+#ifdef CONFIG_KVM_BOOKE_HV
+ case SPRN_IVOR36:
+ vcpu->arch.ivor[BOOKE_IRQPRIO_DBELL] = spr_val;
+ break;
+ case SPRN_IVOR37:
+ vcpu->arch.ivor[BOOKE_IRQPRIO_DBELL_CRIT] = spr_val;
+ break;
+#endif
default:
- emulated = kvmppc_booke_emulate_mtspr(vcpu, sprn, rs);
+ emulated = kvmppc_booke_emulate_mtspr(vcpu, sprn, spr_val);
}
return emulated;
}
-int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
+int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)
{
struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
int emulated = EMULATE_DONE;
- unsigned long val;
switch (sprn) {
+#ifndef CONFIG_KVM_BOOKE_HV
case SPRN_PID:
- kvmppc_set_gpr(vcpu, rt, vcpu_e500->pid[0]); break;
+ *spr_val = vcpu_e500->pid[0];
+ break;
case SPRN_PID1:
- kvmppc_set_gpr(vcpu, rt, vcpu_e500->pid[1]); break;
+ *spr_val = vcpu_e500->pid[1];
+ break;
case SPRN_PID2:
- kvmppc_set_gpr(vcpu, rt, vcpu_e500->pid[2]); break;
+ *spr_val = vcpu_e500->pid[2];
+ break;
case SPRN_MAS0:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->mas0); break;
+ *spr_val = vcpu->arch.shared->mas0;
+ break;
case SPRN_MAS1:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->mas1); break;
+ *spr_val = vcpu->arch.shared->mas1;
+ break;
case SPRN_MAS2:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->mas2); break;
+ *spr_val = vcpu->arch.shared->mas2;
+ break;
case SPRN_MAS3:
- val = (u32)vcpu->arch.shared->mas7_3;
- kvmppc_set_gpr(vcpu, rt, val);
+ *spr_val = (u32)vcpu->arch.shared->mas7_3;
break;
case SPRN_MAS4:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->mas4); break;
+ *spr_val = vcpu->arch.shared->mas4;
+ break;
case SPRN_MAS6:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->mas6); break;
+ *spr_val = vcpu->arch.shared->mas6;
+ break;
case SPRN_MAS7:
- val = vcpu->arch.shared->mas7_3 >> 32;
- kvmppc_set_gpr(vcpu, rt, val);
+ *spr_val = vcpu->arch.shared->mas7_3 >> 32;
break;
+#endif
case SPRN_TLB0CFG:
- kvmppc_set_gpr(vcpu, rt, vcpu_e500->tlb0cfg); break;
+ *spr_val = vcpu->arch.tlbcfg[0];
+ break;
case SPRN_TLB1CFG:
- kvmppc_set_gpr(vcpu, rt, vcpu_e500->tlb1cfg); break;
+ *spr_val = vcpu->arch.tlbcfg[1];
+ break;
case SPRN_L1CSR0:
- kvmppc_set_gpr(vcpu, rt, vcpu_e500->l1csr0); break;
+ *spr_val = vcpu_e500->l1csr0;
+ break;
case SPRN_L1CSR1:
- kvmppc_set_gpr(vcpu, rt, vcpu_e500->l1csr1); break;
+ *spr_val = vcpu_e500->l1csr1;
+ break;
case SPRN_HID0:
- kvmppc_set_gpr(vcpu, rt, vcpu_e500->hid0); break;
+ *spr_val = vcpu_e500->hid0;
+ break;
case SPRN_HID1:
- kvmppc_set_gpr(vcpu, rt, vcpu_e500->hid1); break;
+ *spr_val = vcpu_e500->hid1;
+ break;
case SPRN_SVR:
- kvmppc_set_gpr(vcpu, rt, vcpu_e500->svr); break;
+ *spr_val = vcpu_e500->svr;
+ break;
case SPRN_MMUCSR0:
- kvmppc_set_gpr(vcpu, rt, 0); break;
+ *spr_val = 0;
+ break;
case SPRN_MMUCFG:
- kvmppc_set_gpr(vcpu, rt, mfspr(SPRN_MMUCFG)); break;
+ *spr_val = vcpu->arch.mmucfg;
+ break;
/* extra exceptions */
case SPRN_IVOR32:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL]);
+ *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL];
break;
case SPRN_IVOR33:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_DATA]);
+ *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_DATA];
break;
case SPRN_IVOR34:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND]);
+ *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND];
break;
case SPRN_IVOR35:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR]);
+ *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR];
+ break;
+#ifdef CONFIG_KVM_BOOKE_HV
+ case SPRN_IVOR36:
+ *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_DBELL];
+ break;
+ case SPRN_IVOR37:
+ *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_DBELL_CRIT];
break;
+#endif
default:
- emulated = kvmppc_booke_emulate_mfspr(vcpu, sprn, rt);
+ emulated = kvmppc_booke_emulate_mfspr(vcpu, sprn, spr_val);
}
return emulated;
diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c
index 6e53e4164de..c510fc96130 100644
--- a/arch/powerpc/kvm/e500_tlb.c
+++ b/arch/powerpc/kvm/e500_tlb.c
@@ -2,6 +2,9 @@
* Copyright (C) 2008-2011 Freescale Semiconductor, Inc. All rights reserved.
*
* Author: Yu Liu, yu.liu@freescale.com
+ * Scott Wood, scottwood@freescale.com
+ * Ashish Kalra, ashish.kalra@freescale.com
+ * Varun Sethi, varun.sethi@freescale.com
*
* Description:
* This file is based on arch/powerpc/kvm/44x_tlb.c,
@@ -26,210 +29,15 @@
#include <linux/vmalloc.h>
#include <linux/hugetlb.h>
#include <asm/kvm_ppc.h>
-#include <asm/kvm_e500.h>
-#include "../mm/mmu_decl.h"
-#include "e500_tlb.h"
+#include "e500.h"
#include "trace.h"
#include "timing.h"
#define to_htlb1_esel(esel) (host_tlb_params[1].entries - (esel) - 1)
-struct id {
- unsigned long val;
- struct id **pentry;
-};
-
-#define NUM_TIDS 256
-
-/*
- * This table provide mappings from:
- * (guestAS,guestTID,guestPR) --> ID of physical cpu
- * guestAS [0..1]
- * guestTID [0..255]
- * guestPR [0..1]
- * ID [1..255]
- * Each vcpu keeps one vcpu_id_table.
- */
-struct vcpu_id_table {
- struct id id[2][NUM_TIDS][2];
-};
-
-/*
- * This table provide reversed mappings of vcpu_id_table:
- * ID --> address of vcpu_id_table item.
- * Each physical core has one pcpu_id_table.
- */
-struct pcpu_id_table {
- struct id *entry[NUM_TIDS];
-};
-
-static DEFINE_PER_CPU(struct pcpu_id_table, pcpu_sids);
-
-/* This variable keeps last used shadow ID on local core.
- * The valid range of shadow ID is [1..255] */
-static DEFINE_PER_CPU(unsigned long, pcpu_last_used_sid);
-
static struct kvmppc_e500_tlb_params host_tlb_params[E500_TLB_NUM];
-static struct kvm_book3e_206_tlb_entry *get_entry(
- struct kvmppc_vcpu_e500 *vcpu_e500, int tlbsel, int entry)
-{
- int offset = vcpu_e500->gtlb_offset[tlbsel];
- return &vcpu_e500->gtlb_arch[offset + entry];
-}
-
-/*
- * Allocate a free shadow id and setup a valid sid mapping in given entry.
- * A mapping is only valid when vcpu_id_table and pcpu_id_table are match.
- *
- * The caller must have preemption disabled, and keep it that way until
- * it has finished with the returned shadow id (either written into the
- * TLB or arch.shadow_pid, or discarded).
- */
-static inline int local_sid_setup_one(struct id *entry)
-{
- unsigned long sid;
- int ret = -1;
-
- sid = ++(__get_cpu_var(pcpu_last_used_sid));
- if (sid < NUM_TIDS) {
- __get_cpu_var(pcpu_sids).entry[sid] = entry;
- entry->val = sid;
- entry->pentry = &__get_cpu_var(pcpu_sids).entry[sid];
- ret = sid;
- }
-
- /*
- * If sid == NUM_TIDS, we've run out of sids. We return -1, and
- * the caller will invalidate everything and start over.
- *
- * sid > NUM_TIDS indicates a race, which we disable preemption to
- * avoid.
- */
- WARN_ON(sid > NUM_TIDS);
-
- return ret;
-}
-
-/*
- * Check if given entry contain a valid shadow id mapping.
- * An ID mapping is considered valid only if
- * both vcpu and pcpu know this mapping.
- *
- * The caller must have preemption disabled, and keep it that way until
- * it has finished with the returned shadow id (either written into the
- * TLB or arch.shadow_pid, or discarded).
- */
-static inline int local_sid_lookup(struct id *entry)
-{
- if (entry && entry->val != 0 &&
- __get_cpu_var(pcpu_sids).entry[entry->val] == entry &&
- entry->pentry == &__get_cpu_var(pcpu_sids).entry[entry->val])
- return entry->val;
- return -1;
-}
-
-/* Invalidate all id mappings on local core -- call with preempt disabled */
-static inline void local_sid_destroy_all(void)
-{
- __get_cpu_var(pcpu_last_used_sid) = 0;
- memset(&__get_cpu_var(pcpu_sids), 0, sizeof(__get_cpu_var(pcpu_sids)));
-}
-
-static void *kvmppc_e500_id_table_alloc(struct kvmppc_vcpu_e500 *vcpu_e500)
-{
- vcpu_e500->idt = kzalloc(sizeof(struct vcpu_id_table), GFP_KERNEL);
- return vcpu_e500->idt;
-}
-
-static void kvmppc_e500_id_table_free(struct kvmppc_vcpu_e500 *vcpu_e500)
-{
- kfree(vcpu_e500->idt);
-}
-
-/* Invalidate all mappings on vcpu */
-static void kvmppc_e500_id_table_reset_all(struct kvmppc_vcpu_e500 *vcpu_e500)
-{
- memset(vcpu_e500->idt, 0, sizeof(struct vcpu_id_table));
-
- /* Update shadow pid when mappings are changed */
- kvmppc_e500_recalc_shadow_pid(vcpu_e500);
-}
-
-/* Invalidate one ID mapping on vcpu */
-static inline void kvmppc_e500_id_table_reset_one(
- struct kvmppc_vcpu_e500 *vcpu_e500,
- int as, int pid, int pr)
-{
- struct vcpu_id_table *idt = vcpu_e500->idt;
-
- BUG_ON(as >= 2);
- BUG_ON(pid >= NUM_TIDS);
- BUG_ON(pr >= 2);
-
- idt->id[as][pid][pr].val = 0;
- idt->id[as][pid][pr].pentry = NULL;
-
- /* Update shadow pid when mappings are changed */
- kvmppc_e500_recalc_shadow_pid(vcpu_e500);
-}
-
-/*
- * Map guest (vcpu,AS,ID,PR) to physical core shadow id.
- * This function first lookup if a valid mapping exists,
- * if not, then creates a new one.
- *
- * The caller must have preemption disabled, and keep it that way until
- * it has finished with the returned shadow id (either written into the
- * TLB or arch.shadow_pid, or discarded).
- */
-static unsigned int kvmppc_e500_get_sid(struct kvmppc_vcpu_e500 *vcpu_e500,
- unsigned int as, unsigned int gid,
- unsigned int pr, int avoid_recursion)
-{
- struct vcpu_id_table *idt = vcpu_e500->idt;
- int sid;
-
- BUG_ON(as >= 2);
- BUG_ON(gid >= NUM_TIDS);
- BUG_ON(pr >= 2);
-
- sid = local_sid_lookup(&idt->id[as][gid][pr]);
-
- while (sid <= 0) {
- /* No mapping yet */
- sid = local_sid_setup_one(&idt->id[as][gid][pr]);
- if (sid <= 0) {
- _tlbil_all();
- local_sid_destroy_all();
- }
-
- /* Update shadow pid when mappings are changed */
- if (!avoid_recursion)
- kvmppc_e500_recalc_shadow_pid(vcpu_e500);
- }
-
- return sid;
-}
-
-/* Map guest pid to shadow.
- * We use PID to keep shadow of current guest non-zero PID,
- * and use PID1 to keep shadow of guest zero PID.
- * So that guest tlbe with TID=0 can be accessed at any time */
-void kvmppc_e500_recalc_shadow_pid(struct kvmppc_vcpu_e500 *vcpu_e500)
-{
- preempt_disable();
- vcpu_e500->vcpu.arch.shadow_pid = kvmppc_e500_get_sid(vcpu_e500,
- get_cur_as(&vcpu_e500->vcpu),
- get_cur_pid(&vcpu_e500->vcpu),
- get_cur_pr(&vcpu_e500->vcpu), 1);
- vcpu_e500->vcpu.arch.shadow_pid1 = kvmppc_e500_get_sid(vcpu_e500,
- get_cur_as(&vcpu_e500->vcpu), 0,
- get_cur_pr(&vcpu_e500->vcpu), 1);
- preempt_enable();
-}
-
static inline unsigned int gtlb0_get_next_victim(
struct kvmppc_vcpu_e500 *vcpu_e500)
{
@@ -258,6 +66,7 @@ static inline u32 e500_shadow_mas3_attrib(u32 mas3, int usermode)
/* Mask off reserved bits. */
mas3 &= MAS3_ATTRIB_MASK;
+#ifndef CONFIG_KVM_BOOKE_HV
if (!usermode) {
/* Guest is in supervisor mode,
* so we need to translate guest
@@ -265,8 +74,9 @@ static inline u32 e500_shadow_mas3_attrib(u32 mas3, int usermode)
mas3 &= ~E500_TLB_USER_PERM_MASK;
mas3 |= (mas3 & E500_TLB_SUPER_PERM_MASK) << 1;
}
-
- return mas3 | E500_TLB_SUPER_PERM_MASK;
+ mas3 |= E500_TLB_SUPER_PERM_MASK;
+#endif
+ return mas3;
}
static inline u32 e500_shadow_mas2_attrib(u32 mas2, int usermode)
@@ -292,7 +102,16 @@ static inline void __write_host_tlbe(struct kvm_book3e_206_tlb_entry *stlbe,
mtspr(SPRN_MAS2, (unsigned long)stlbe->mas2);
mtspr(SPRN_MAS3, (u32)stlbe->mas7_3);
mtspr(SPRN_MAS7, (u32)(stlbe->mas7_3 >> 32));
+#ifdef CONFIG_KVM_BOOKE_HV
+ mtspr(SPRN_MAS8, stlbe->mas8);
+#endif
asm volatile("isync; tlbwe" : : : "memory");
+
+#ifdef CONFIG_KVM_BOOKE_HV
+ /* Must clear mas8 for other host tlbwe's */
+ mtspr(SPRN_MAS8, 0);
+ isync();
+#endif
local_irq_restore(flags);
trace_kvm_booke206_stlb_write(mas0, stlbe->mas8, stlbe->mas1,
@@ -337,6 +156,7 @@ static inline void write_host_tlbe(struct kvmppc_vcpu_e500 *vcpu_e500,
}
}
+#ifdef CONFIG_KVM_E500V2
void kvmppc_map_magic(struct kvm_vcpu *vcpu)
{
struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
@@ -361,75 +181,41 @@ void kvmppc_map_magic(struct kvm_vcpu *vcpu)
__write_host_tlbe(&magic, MAS0_TLBSEL(1) | MAS0_ESEL(tlbcam_index));
preempt_enable();
}
-
-void kvmppc_e500_tlb_load(struct kvm_vcpu *vcpu, int cpu)
-{
- struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
-
- /* Shadow PID may be expired on local core */
- kvmppc_e500_recalc_shadow_pid(vcpu_e500);
-}
-
-void kvmppc_e500_tlb_put(struct kvm_vcpu *vcpu)
-{
-}
+#endif
static void inval_gtlbe_on_host(struct kvmppc_vcpu_e500 *vcpu_e500,
int tlbsel, int esel)
{
struct kvm_book3e_206_tlb_entry *gtlbe =
get_entry(vcpu_e500, tlbsel, esel);
- struct vcpu_id_table *idt = vcpu_e500->idt;
- unsigned int pr, tid, ts, pid;
- u32 val, eaddr;
- unsigned long flags;
-
- ts = get_tlb_ts(gtlbe);
- tid = get_tlb_tid(gtlbe);
-
- preempt_disable();
-
- /* One guest ID may be mapped to two shadow IDs */
- for (pr = 0; pr < 2; pr++) {
- /*
- * The shadow PID can have a valid mapping on at most one
- * host CPU. In the common case, it will be valid on this
- * CPU, in which case (for TLB0) we do a local invalidation
- * of the specific address.
- *
- * If the shadow PID is not valid on the current host CPU, or
- * if we're invalidating a TLB1 entry, we invalidate the
- * entire shadow PID.
- */
- if (tlbsel == 1 ||
- (pid = local_sid_lookup(&idt->id[ts][tid][pr])) <= 0) {
- kvmppc_e500_id_table_reset_one(vcpu_e500, ts, tid, pr);
- continue;
- }
- /*
- * The guest is invalidating a TLB0 entry which is in a PID
- * that has a valid shadow mapping on this host CPU. We
- * search host TLB0 to invalidate it's shadow TLB entry,
- * similar to __tlbil_va except that we need to look in AS1.
- */
- val = (pid << MAS6_SPID_SHIFT) | MAS6_SAS;
- eaddr = get_tlb_eaddr(gtlbe);
+ if (tlbsel == 1 &&
+ vcpu_e500->gtlb_priv[1][esel].ref.flags & E500_TLB_BITMAP) {
+ u64 tmp = vcpu_e500->g2h_tlb1_map[esel];
+ int hw_tlb_indx;
+ unsigned long flags;
local_irq_save(flags);
-
- mtspr(SPRN_MAS6, val);
- asm volatile("tlbsx 0, %[eaddr]" : : [eaddr] "r" (eaddr));
- val = mfspr(SPRN_MAS1);
- if (val & MAS1_VALID) {
- mtspr(SPRN_MAS1, val & ~MAS1_VALID);
+ while (tmp) {
+ hw_tlb_indx = __ilog2_u64(tmp & -tmp);
+ mtspr(SPRN_MAS0,
+ MAS0_TLBSEL(1) |
+ MAS0_ESEL(to_htlb1_esel(hw_tlb_indx)));
+ mtspr(SPRN_MAS1, 0);
asm volatile("tlbwe");
+ vcpu_e500->h2g_tlb1_rmap[hw_tlb_indx] = 0;
+ tmp &= tmp - 1;
}
-
+ mb();
+ vcpu_e500->g2h_tlb1_map[esel] = 0;
+ vcpu_e500->gtlb_priv[1][esel].ref.flags &= ~E500_TLB_BITMAP;
local_irq_restore(flags);
+
+ return;
}
- preempt_enable();
+ /* Guest tlbe is backed by at most one host tlbe per shadow pid. */
+ kvmppc_e500_tlbil_one(vcpu_e500, gtlbe);
}
static int tlb0_set_base(gva_t addr, int sets, int ways)
@@ -475,6 +261,9 @@ static int kvmppc_e500_tlb_index(struct kvmppc_vcpu_e500 *vcpu_e500,
set_base = gtlb0_set_base(vcpu_e500, eaddr);
size = vcpu_e500->gtlb_params[0].ways;
} else {
+ if (eaddr < vcpu_e500->tlb1_min_eaddr ||
+ eaddr > vcpu_e500->tlb1_max_eaddr)
+ return -1;
set_base = 0;
}
@@ -530,6 +319,16 @@ static inline void kvmppc_e500_ref_release(struct tlbe_ref *ref)
}
}
+static void clear_tlb1_bitmap(struct kvmppc_vcpu_e500 *vcpu_e500)
+{
+ if (vcpu_e500->g2h_tlb1_map)
+ memset(vcpu_e500->g2h_tlb1_map,
+ sizeof(u64) * vcpu_e500->gtlb_params[1].entries, 0);
+ if (vcpu_e500->h2g_tlb1_rmap)
+ memset(vcpu_e500->h2g_tlb1_rmap,
+ sizeof(unsigned int) * host_tlb_params[1].entries, 0);
+}
+
static void clear_tlb_privs(struct kvmppc_vcpu_e500 *vcpu_e500)
{
int tlbsel = 0;
@@ -547,7 +346,7 @@ static void clear_tlb_refs(struct kvmppc_vcpu_e500 *vcpu_e500)
int stlbsel = 1;
int i;
- kvmppc_e500_id_table_reset_all(vcpu_e500);
+ kvmppc_e500_tlbil_all(vcpu_e500);
for (i = 0; i < host_tlb_params[stlbsel].entries; i++) {
struct tlbe_ref *ref =
@@ -562,19 +361,18 @@ static inline void kvmppc_e500_deliver_tlb_miss(struct kvm_vcpu *vcpu,
unsigned int eaddr, int as)
{
struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
- unsigned int victim, pidsel, tsized;
+ unsigned int victim, tsized;
int tlbsel;
/* since we only have two TLBs, only lower bit is used. */
tlbsel = (vcpu->arch.shared->mas4 >> 28) & 0x1;
victim = (tlbsel == 0) ? gtlb0_get_next_victim(vcpu_e500) : 0;
- pidsel = (vcpu->arch.shared->mas4 >> 16) & 0xf;
tsized = (vcpu->arch.shared->mas4 >> 7) & 0x1f;
vcpu->arch.shared->mas0 = MAS0_TLBSEL(tlbsel) | MAS0_ESEL(victim)
| MAS0_NV(vcpu_e500->gtlb_nv[tlbsel]);
vcpu->arch.shared->mas1 = MAS1_VALID | (as ? MAS1_TS : 0)
- | MAS1_TID(vcpu_e500->pid[pidsel])
+ | MAS1_TID(get_tlbmiss_tid(vcpu))
| MAS1_TSIZE(tsized);
vcpu->arch.shared->mas2 = (eaddr & MAS2_EPN)
| (vcpu->arch.shared->mas4 & MAS2_ATTRIB_MASK);
@@ -586,23 +384,26 @@ static inline void kvmppc_e500_deliver_tlb_miss(struct kvm_vcpu *vcpu,
/* TID must be supplied by the caller */
static inline void kvmppc_e500_setup_stlbe(
- struct kvmppc_vcpu_e500 *vcpu_e500,
+ struct kvm_vcpu *vcpu,
struct kvm_book3e_206_tlb_entry *gtlbe,
int tsize, struct tlbe_ref *ref, u64 gvaddr,
struct kvm_book3e_206_tlb_entry *stlbe)
{
pfn_t pfn = ref->pfn;
+ u32 pr = vcpu->arch.shared->msr & MSR_PR;
BUG_ON(!(ref->flags & E500_TLB_VALID));
- /* Force TS=1 IPROT=0 for all guest mappings. */
- stlbe->mas1 = MAS1_TSIZE(tsize) | MAS1_TS | MAS1_VALID;
- stlbe->mas2 = (gvaddr & MAS2_EPN)
- | e500_shadow_mas2_attrib(gtlbe->mas2,
- vcpu_e500->vcpu.arch.shared->msr & MSR_PR);
- stlbe->mas7_3 = ((u64)pfn << PAGE_SHIFT)
- | e500_shadow_mas3_attrib(gtlbe->mas7_3,
- vcpu_e500->vcpu.arch.shared->msr & MSR_PR);
+ /* Force IPROT=0 for all guest mappings. */
+ stlbe->mas1 = MAS1_TSIZE(tsize) | get_tlb_sts(gtlbe) | MAS1_VALID;
+ stlbe->mas2 = (gvaddr & MAS2_EPN) |
+ e500_shadow_mas2_attrib(gtlbe->mas2, pr);
+ stlbe->mas7_3 = ((u64)pfn << PAGE_SHIFT) |
+ e500_shadow_mas3_attrib(gtlbe->mas7_3, pr);
+
+#ifdef CONFIG_KVM_BOOKE_HV
+ stlbe->mas8 = MAS8_TGS | vcpu->kvm->arch.lpid;
+#endif
}
static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
@@ -736,7 +537,8 @@ static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
kvmppc_e500_ref_release(ref);
kvmppc_e500_ref_setup(ref, gtlbe, pfn);
- kvmppc_e500_setup_stlbe(vcpu_e500, gtlbe, tsize, ref, gvaddr, stlbe);
+ kvmppc_e500_setup_stlbe(&vcpu_e500->vcpu, gtlbe, tsize,
+ ref, gvaddr, stlbe);
}
/* XXX only map the one-one case, for now use TLB0 */
@@ -760,7 +562,7 @@ static void kvmppc_e500_tlb0_map(struct kvmppc_vcpu_e500 *vcpu_e500,
/* XXX for both one-one and one-to-many , for now use TLB1 */
static int kvmppc_e500_tlb1_map(struct kvmppc_vcpu_e500 *vcpu_e500,
u64 gvaddr, gfn_t gfn, struct kvm_book3e_206_tlb_entry *gtlbe,
- struct kvm_book3e_206_tlb_entry *stlbe)
+ struct kvm_book3e_206_tlb_entry *stlbe, int esel)
{
struct tlbe_ref *ref;
unsigned int victim;
@@ -773,15 +575,74 @@ static int kvmppc_e500_tlb1_map(struct kvmppc_vcpu_e500 *vcpu_e500,
ref = &vcpu_e500->tlb_refs[1][victim];
kvmppc_e500_shadow_map(vcpu_e500, gvaddr, gfn, gtlbe, 1, stlbe, ref);
+ vcpu_e500->g2h_tlb1_map[esel] |= (u64)1 << victim;
+ vcpu_e500->gtlb_priv[1][esel].ref.flags |= E500_TLB_BITMAP;
+ if (vcpu_e500->h2g_tlb1_rmap[victim]) {
+ unsigned int idx = vcpu_e500->h2g_tlb1_rmap[victim];
+ vcpu_e500->g2h_tlb1_map[idx] &= ~(1ULL << victim);
+ }
+ vcpu_e500->h2g_tlb1_rmap[victim] = esel;
+
return victim;
}
-void kvmppc_mmu_msr_notify(struct kvm_vcpu *vcpu, u32 old_msr)
+static void kvmppc_recalc_tlb1map_range(struct kvmppc_vcpu_e500 *vcpu_e500)
+{
+ int size = vcpu_e500->gtlb_params[1].entries;
+ unsigned int offset;
+ gva_t eaddr;
+ int i;
+
+ vcpu_e500->tlb1_min_eaddr = ~0UL;
+ vcpu_e500->tlb1_max_eaddr = 0;
+ offset = vcpu_e500->gtlb_offset[1];
+
+ for (i = 0; i < size; i++) {
+ struct kvm_book3e_206_tlb_entry *tlbe =
+ &vcpu_e500->gtlb_arch[offset + i];
+
+ if (!get_tlb_v(tlbe))
+ continue;
+
+ eaddr = get_tlb_eaddr(tlbe);
+ vcpu_e500->tlb1_min_eaddr =
+ min(vcpu_e500->tlb1_min_eaddr, eaddr);
+
+ eaddr = get_tlb_end(tlbe);
+ vcpu_e500->tlb1_max_eaddr =
+ max(vcpu_e500->tlb1_max_eaddr, eaddr);
+ }
+}
+
+static int kvmppc_need_recalc_tlb1map_range(struct kvmppc_vcpu_e500 *vcpu_e500,
+ struct kvm_book3e_206_tlb_entry *gtlbe)
{
+ unsigned long start, end, size;
+
+ size = get_tlb_bytes(gtlbe);
+ start = get_tlb_eaddr(gtlbe) & ~(size - 1);
+ end = start + size - 1;
+
+ return vcpu_e500->tlb1_min_eaddr == start ||
+ vcpu_e500->tlb1_max_eaddr == end;
+}
+
+/* This function is supposed to be called for a adding a new valid tlb entry */
+static void kvmppc_set_tlb1map_range(struct kvm_vcpu *vcpu,
+ struct kvm_book3e_206_tlb_entry *gtlbe)
+{
+ unsigned long start, end, size;
struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
- /* Recalc shadow pid since MSR changes */
- kvmppc_e500_recalc_shadow_pid(vcpu_e500);
+ if (!get_tlb_v(gtlbe))
+ return;
+
+ size = get_tlb_bytes(gtlbe);
+ start = get_tlb_eaddr(gtlbe) & ~(size - 1);
+ end = start + size - 1;
+
+ vcpu_e500->tlb1_min_eaddr = min(vcpu_e500->tlb1_min_eaddr, start);
+ vcpu_e500->tlb1_max_eaddr = max(vcpu_e500->tlb1_max_eaddr, end);
}
static inline int kvmppc_e500_gtlbe_invalidate(
@@ -794,6 +655,9 @@ static inline int kvmppc_e500_gtlbe_invalidate(
if (unlikely(get_tlb_iprot(gtlbe)))
return -1;
+ if (tlbsel == 1 && kvmppc_need_recalc_tlb1map_range(vcpu_e500, gtlbe))
+ kvmppc_recalc_tlb1map_range(vcpu_e500);
+
gtlbe->mas1 = 0;
return 0;
@@ -811,7 +675,7 @@ int kvmppc_e500_emul_mt_mmucsr0(struct kvmppc_vcpu_e500 *vcpu_e500, ulong value)
kvmppc_e500_gtlbe_invalidate(vcpu_e500, 1, esel);
/* Invalidate all vcpu id mappings */
- kvmppc_e500_id_table_reset_all(vcpu_e500);
+ kvmppc_e500_tlbil_all(vcpu_e500);
return EMULATE_DONE;
}
@@ -844,7 +708,59 @@ int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *vcpu, int ra, int rb)
}
/* Invalidate all vcpu id mappings */
- kvmppc_e500_id_table_reset_all(vcpu_e500);
+ kvmppc_e500_tlbil_all(vcpu_e500);
+
+ return EMULATE_DONE;
+}
+
+static void tlbilx_all(struct kvmppc_vcpu_e500 *vcpu_e500, int tlbsel,
+ int pid, int rt)
+{
+ struct kvm_book3e_206_tlb_entry *tlbe;
+ int tid, esel;
+
+ /* invalidate all entries */
+ for (esel = 0; esel < vcpu_e500->gtlb_params[tlbsel].entries; esel++) {
+ tlbe = get_entry(vcpu_e500, tlbsel, esel);
+ tid = get_tlb_tid(tlbe);
+ if (rt == 0 || tid == pid) {
+ inval_gtlbe_on_host(vcpu_e500, tlbsel, esel);
+ kvmppc_e500_gtlbe_invalidate(vcpu_e500, tlbsel, esel);
+ }
+ }
+}
+
+static void tlbilx_one(struct kvmppc_vcpu_e500 *vcpu_e500, int pid,
+ int ra, int rb)
+{
+ int tlbsel, esel;
+ gva_t ea;
+
+ ea = kvmppc_get_gpr(&vcpu_e500->vcpu, rb);
+ if (ra)
+ ea += kvmppc_get_gpr(&vcpu_e500->vcpu, ra);
+
+ for (tlbsel = 0; tlbsel < 2; tlbsel++) {
+ esel = kvmppc_e500_tlb_index(vcpu_e500, ea, tlbsel, pid, -1);
+ if (esel >= 0) {
+ inval_gtlbe_on_host(vcpu_e500, tlbsel, esel);
+ kvmppc_e500_gtlbe_invalidate(vcpu_e500, tlbsel, esel);
+ break;
+ }
+ }
+}
+
+int kvmppc_e500_emul_tlbilx(struct kvm_vcpu *vcpu, int rt, int ra, int rb)
+{
+ struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+ int pid = get_cur_spid(vcpu);
+
+ if (rt == 0 || rt == 1) {
+ tlbilx_all(vcpu_e500, 0, pid, rt);
+ tlbilx_all(vcpu_e500, 1, pid, rt);
+ } else if (rt == 3) {
+ tlbilx_one(vcpu_e500, pid, ra, rb);
+ }
return EMULATE_DONE;
}
@@ -929,9 +845,7 @@ static void write_stlbe(struct kvmppc_vcpu_e500 *vcpu_e500,
int stid;
preempt_disable();
- stid = kvmppc_e500_get_sid(vcpu_e500, get_tlb_ts(gtlbe),
- get_tlb_tid(gtlbe),
- get_cur_pr(&vcpu_e500->vcpu), 0);
+ stid = kvmppc_e500_get_tlb_stid(&vcpu_e500->vcpu, gtlbe);
stlbe->mas1 |= MAS1_TID(stid);
write_host_tlbe(vcpu_e500, stlbsel, sesel, stlbe);
@@ -941,16 +855,21 @@ static void write_stlbe(struct kvmppc_vcpu_e500 *vcpu_e500,
int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu)
{
struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
- struct kvm_book3e_206_tlb_entry *gtlbe;
- int tlbsel, esel;
+ struct kvm_book3e_206_tlb_entry *gtlbe, stlbe;
+ int tlbsel, esel, stlbsel, sesel;
+ int recal = 0;
tlbsel = get_tlb_tlbsel(vcpu);
esel = get_tlb_esel(vcpu, tlbsel);
gtlbe = get_entry(vcpu_e500, tlbsel, esel);
- if (get_tlb_v(gtlbe))
+ if (get_tlb_v(gtlbe)) {
inval_gtlbe_on_host(vcpu_e500, tlbsel, esel);
+ if ((tlbsel == 1) &&
+ kvmppc_need_recalc_tlb1map_range(vcpu_e500, gtlbe))
+ recal = 1;
+ }
gtlbe->mas1 = vcpu->arch.shared->mas1;
gtlbe->mas2 = vcpu->arch.shared->mas2;
@@ -959,10 +878,20 @@ int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu)
trace_kvm_booke206_gtlb_write(vcpu->arch.shared->mas0, gtlbe->mas1,
gtlbe->mas2, gtlbe->mas7_3);
+ if (tlbsel == 1) {
+ /*
+ * If a valid tlb1 entry is overwritten then recalculate the
+ * min/max TLB1 map address range otherwise no need to look
+ * in tlb1 array.
+ */
+ if (recal)
+ kvmppc_recalc_tlb1map_range(vcpu_e500);
+ else
+ kvmppc_set_tlb1map_range(vcpu, gtlbe);
+ }
+
/* Invalidate shadow mappings for the about-to-be-clobbered TLBE. */
if (tlbe_is_host_safe(vcpu, gtlbe)) {
- struct kvm_book3e_206_tlb_entry stlbe;
- int stlbsel, sesel;
u64 eaddr;
u64 raddr;
@@ -989,7 +918,7 @@ int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu)
* are mapped on the fly. */
stlbsel = 1;
sesel = kvmppc_e500_tlb1_map(vcpu_e500, eaddr,
- raddr >> PAGE_SHIFT, gtlbe, &stlbe);
+ raddr >> PAGE_SHIFT, gtlbe, &stlbe, esel);
break;
default:
@@ -1003,6 +932,48 @@ int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu)
return EMULATE_DONE;
}
+static int kvmppc_e500_tlb_search(struct kvm_vcpu *vcpu,
+ gva_t eaddr, unsigned int pid, int as)
+{
+ struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+ int esel, tlbsel;
+
+ for (tlbsel = 0; tlbsel < 2; tlbsel++) {
+ esel = kvmppc_e500_tlb_index(vcpu_e500, eaddr, tlbsel, pid, as);
+ if (esel >= 0)
+ return index_of(tlbsel, esel);
+ }
+
+ return -1;
+}
+
+/* 'linear_address' is actually an encoding of AS|PID|EADDR . */
+int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu,
+ struct kvm_translation *tr)
+{
+ int index;
+ gva_t eaddr;
+ u8 pid;
+ u8 as;
+
+ eaddr = tr->linear_address;
+ pid = (tr->linear_address >> 32) & 0xff;
+ as = (tr->linear_address >> 40) & 0x1;
+
+ index = kvmppc_e500_tlb_search(vcpu, eaddr, pid, as);
+ if (index < 0) {
+ tr->valid = 0;
+ return 0;
+ }
+
+ tr->physical_address = kvmppc_mmu_xlate(vcpu, index, eaddr);
+ /* XXX what does "writeable" and "usermode" even mean? */
+ tr->valid = 1;
+
+ return 0;
+}
+
+
int kvmppc_mmu_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr)
{
unsigned int as = !!(vcpu->arch.shared->msr & MSR_IS);
@@ -1066,7 +1037,7 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr,
sesel = 0; /* unused */
priv = &vcpu_e500->gtlb_priv[tlbsel][esel];
- kvmppc_e500_setup_stlbe(vcpu_e500, gtlbe, BOOK3E_PAGESZ_4K,
+ kvmppc_e500_setup_stlbe(vcpu, gtlbe, BOOK3E_PAGESZ_4K,
&priv->ref, eaddr, &stlbe);
break;
@@ -1075,7 +1046,7 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr,
stlbsel = 1;
sesel = kvmppc_e500_tlb1_map(vcpu_e500, eaddr, gfn,
- gtlbe, &stlbe);
+ gtlbe, &stlbe, esel);
break;
}
@@ -1087,52 +1058,13 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr,
write_stlbe(vcpu_e500, gtlbe, &stlbe, stlbsel, sesel);
}
-int kvmppc_e500_tlb_search(struct kvm_vcpu *vcpu,
- gva_t eaddr, unsigned int pid, int as)
-{
- struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
- int esel, tlbsel;
-
- for (tlbsel = 0; tlbsel < 2; tlbsel++) {
- esel = kvmppc_e500_tlb_index(vcpu_e500, eaddr, tlbsel, pid, as);
- if (esel >= 0)
- return index_of(tlbsel, esel);
- }
-
- return -1;
-}
-
-void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid)
-{
- struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
-
- if (vcpu->arch.pid != pid) {
- vcpu_e500->pid[0] = vcpu->arch.pid = pid;
- kvmppc_e500_recalc_shadow_pid(vcpu_e500);
- }
-}
-
-void kvmppc_e500_tlb_setup(struct kvmppc_vcpu_e500 *vcpu_e500)
-{
- struct kvm_book3e_206_tlb_entry *tlbe;
-
- /* Insert large initial mapping for guest. */
- tlbe = get_entry(vcpu_e500, 1, 0);
- tlbe->mas1 = MAS1_VALID | MAS1_TSIZE(BOOK3E_PAGESZ_256M);
- tlbe->mas2 = 0;
- tlbe->mas7_3 = E500_TLB_SUPER_PERM_MASK;
-
- /* 4K map for serial output. Used by kernel wrapper. */
- tlbe = get_entry(vcpu_e500, 1, 1);
- tlbe->mas1 = MAS1_VALID | MAS1_TSIZE(BOOK3E_PAGESZ_4K);
- tlbe->mas2 = (0xe0004500 & 0xFFFFF000) | MAS2_I | MAS2_G;
- tlbe->mas7_3 = (0xe0004500 & 0xFFFFF000) | E500_TLB_SUPER_PERM_MASK;
-}
-
static void free_gtlb(struct kvmppc_vcpu_e500 *vcpu_e500)
{
int i;
+ clear_tlb1_bitmap(vcpu_e500);
+ kfree(vcpu_e500->g2h_tlb1_map);
+
clear_tlb_refs(vcpu_e500);
kfree(vcpu_e500->gtlb_priv[0]);
kfree(vcpu_e500->gtlb_priv[1]);
@@ -1155,6 +1087,36 @@ static void free_gtlb(struct kvmppc_vcpu_e500 *vcpu_e500)
vcpu_e500->gtlb_arch = NULL;
}
+void kvmppc_get_sregs_e500_tlb(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
+{
+ sregs->u.e.mas0 = vcpu->arch.shared->mas0;
+ sregs->u.e.mas1 = vcpu->arch.shared->mas1;
+ sregs->u.e.mas2 = vcpu->arch.shared->mas2;
+ sregs->u.e.mas7_3 = vcpu->arch.shared->mas7_3;
+ sregs->u.e.mas4 = vcpu->arch.shared->mas4;
+ sregs->u.e.mas6 = vcpu->arch.shared->mas6;
+
+ sregs->u.e.mmucfg = vcpu->arch.mmucfg;
+ sregs->u.e.tlbcfg[0] = vcpu->arch.tlbcfg[0];
+ sregs->u.e.tlbcfg[1] = vcpu->arch.tlbcfg[1];
+ sregs->u.e.tlbcfg[2] = 0;
+ sregs->u.e.tlbcfg[3] = 0;
+}
+
+int kvmppc_set_sregs_e500_tlb(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
+{
+ if (sregs->u.e.features & KVM_SREGS_E_ARCH206_MMU) {
+ vcpu->arch.shared->mas0 = sregs->u.e.mas0;
+ vcpu->arch.shared->mas1 = sregs->u.e.mas1;
+ vcpu->arch.shared->mas2 = sregs->u.e.mas2;
+ vcpu->arch.shared->mas7_3 = sregs->u.e.mas7_3;
+ vcpu->arch.shared->mas4 = sregs->u.e.mas4;
+ vcpu->arch.shared->mas6 = sregs->u.e.mas6;
+ }
+
+ return 0;
+}
+
int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu,
struct kvm_config_tlb *cfg)
{
@@ -1163,6 +1125,7 @@ int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu,
char *virt;
struct page **pages;
struct tlbe_priv *privs[2] = {};
+ u64 *g2h_bitmap = NULL;
size_t array_len;
u32 sets;
int num_pages, ret, i;
@@ -1224,10 +1187,16 @@ int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu,
if (!privs[0] || !privs[1])
goto err_put_page;
+ g2h_bitmap = kzalloc(sizeof(u64) * params.tlb_sizes[1],
+ GFP_KERNEL);
+ if (!g2h_bitmap)
+ goto err_put_page;
+
free_gtlb(vcpu_e500);
vcpu_e500->gtlb_priv[0] = privs[0];
vcpu_e500->gtlb_priv[1] = privs[1];
+ vcpu_e500->g2h_tlb1_map = g2h_bitmap;
vcpu_e500->gtlb_arch = (struct kvm_book3e_206_tlb_entry *)
(virt + (cfg->array & (PAGE_SIZE - 1)));
@@ -1238,14 +1207,16 @@ int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu,
vcpu_e500->gtlb_offset[0] = 0;
vcpu_e500->gtlb_offset[1] = params.tlb_sizes[0];
- vcpu_e500->tlb0cfg &= ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
+ vcpu->arch.mmucfg = mfspr(SPRN_MMUCFG) & ~MMUCFG_LPIDSIZE;
+
+ vcpu->arch.tlbcfg[0] &= ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
if (params.tlb_sizes[0] <= 2048)
- vcpu_e500->tlb0cfg |= params.tlb_sizes[0];
- vcpu_e500->tlb0cfg |= params.tlb_ways[0] << TLBnCFG_ASSOC_SHIFT;
+ vcpu->arch.tlbcfg[0] |= params.tlb_sizes[0];
+ vcpu->arch.tlbcfg[0] |= params.tlb_ways[0] << TLBnCFG_ASSOC_SHIFT;
- vcpu_e500->tlb1cfg &= ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
- vcpu_e500->tlb1cfg |= params.tlb_sizes[1];
- vcpu_e500->tlb1cfg |= params.tlb_ways[1] << TLBnCFG_ASSOC_SHIFT;
+ vcpu->arch.tlbcfg[1] &= ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
+ vcpu->arch.tlbcfg[1] |= params.tlb_sizes[1];
+ vcpu->arch.tlbcfg[1] |= params.tlb_ways[1] << TLBnCFG_ASSOC_SHIFT;
vcpu_e500->shared_tlb_pages = pages;
vcpu_e500->num_shared_tlb_pages = num_pages;
@@ -1256,6 +1227,7 @@ int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu,
vcpu_e500->gtlb_params[1].ways = params.tlb_sizes[1];
vcpu_e500->gtlb_params[1].sets = 1;
+ kvmppc_recalc_tlb1map_range(vcpu_e500);
return 0;
err_put_page:
@@ -1274,13 +1246,14 @@ int kvm_vcpu_ioctl_dirty_tlb(struct kvm_vcpu *vcpu,
struct kvm_dirty_tlb *dirty)
{
struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
-
+ kvmppc_recalc_tlb1map_range(vcpu_e500);
clear_tlb_refs(vcpu_e500);
return 0;
}
int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500)
{
+ struct kvm_vcpu *vcpu = &vcpu_e500->vcpu;
int entry_size = sizeof(struct kvm_book3e_206_tlb_entry);
int entries = KVM_E500_TLB0_SIZE + KVM_E500_TLB1_SIZE;
@@ -1357,22 +1330,32 @@ int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500)
if (!vcpu_e500->gtlb_priv[1])
goto err;
- if (kvmppc_e500_id_table_alloc(vcpu_e500) == NULL)
+ vcpu_e500->g2h_tlb1_map = kzalloc(sizeof(unsigned int) *
+ vcpu_e500->gtlb_params[1].entries,
+ GFP_KERNEL);
+ if (!vcpu_e500->g2h_tlb1_map)
+ goto err;
+
+ vcpu_e500->h2g_tlb1_rmap = kzalloc(sizeof(unsigned int) *
+ host_tlb_params[1].entries,
+ GFP_KERNEL);
+ if (!vcpu_e500->h2g_tlb1_rmap)
goto err;
/* Init TLB configuration register */
- vcpu_e500->tlb0cfg = mfspr(SPRN_TLB0CFG) &
+ vcpu->arch.tlbcfg[0] = mfspr(SPRN_TLB0CFG) &
~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
- vcpu_e500->tlb0cfg |= vcpu_e500->gtlb_params[0].entries;
- vcpu_e500->tlb0cfg |=
+ vcpu->arch.tlbcfg[0] |= vcpu_e500->gtlb_params[0].entries;
+ vcpu->arch.tlbcfg[0] |=
vcpu_e500->gtlb_params[0].ways << TLBnCFG_ASSOC_SHIFT;
- vcpu_e500->tlb1cfg = mfspr(SPRN_TLB1CFG) &
+ vcpu->arch.tlbcfg[1] = mfspr(SPRN_TLB1CFG) &
~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
- vcpu_e500->tlb0cfg |= vcpu_e500->gtlb_params[1].entries;
- vcpu_e500->tlb0cfg |=
+ vcpu->arch.tlbcfg[1] |= vcpu_e500->gtlb_params[1].entries;
+ vcpu->arch.tlbcfg[1] |=
vcpu_e500->gtlb_params[1].ways << TLBnCFG_ASSOC_SHIFT;
+ kvmppc_recalc_tlb1map_range(vcpu_e500);
return 0;
err:
@@ -1385,8 +1368,7 @@ err:
void kvmppc_e500_tlb_uninit(struct kvmppc_vcpu_e500 *vcpu_e500)
{
free_gtlb(vcpu_e500);
- kvmppc_e500_id_table_free(vcpu_e500);
-
+ kfree(vcpu_e500->h2g_tlb1_rmap);
kfree(vcpu_e500->tlb_refs[0]);
kfree(vcpu_e500->tlb_refs[1]);
}
diff --git a/arch/powerpc/kvm/e500_tlb.h b/arch/powerpc/kvm/e500_tlb.h
deleted file mode 100644
index 5c6d2d7bf05..00000000000
--- a/arch/powerpc/kvm/e500_tlb.h
+++ /dev/null
@@ -1,174 +0,0 @@
-/*
- * Copyright (C) 2008-2011 Freescale Semiconductor, Inc. All rights reserved.
- *
- * Author: Yu Liu, yu.liu@freescale.com
- *
- * Description:
- * This file is based on arch/powerpc/kvm/44x_tlb.h,
- * by Hollis Blanchard <hollisb@us.ibm.com>.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
- */
-
-#ifndef __KVM_E500_TLB_H__
-#define __KVM_E500_TLB_H__
-
-#include <linux/kvm_host.h>
-#include <asm/mmu-book3e.h>
-#include <asm/tlb.h>
-#include <asm/kvm_e500.h>
-
-/* This geometry is the legacy default -- can be overridden by userspace */
-#define KVM_E500_TLB0_WAY_SIZE 128
-#define KVM_E500_TLB0_WAY_NUM 2
-
-#define KVM_E500_TLB0_SIZE (KVM_E500_TLB0_WAY_SIZE * KVM_E500_TLB0_WAY_NUM)
-#define KVM_E500_TLB1_SIZE 16
-
-#define index_of(tlbsel, esel) (((tlbsel) << 16) | ((esel) & 0xFFFF))
-#define tlbsel_of(index) ((index) >> 16)
-#define esel_of(index) ((index) & 0xFFFF)
-
-#define E500_TLB_USER_PERM_MASK (MAS3_UX|MAS3_UR|MAS3_UW)
-#define E500_TLB_SUPER_PERM_MASK (MAS3_SX|MAS3_SR|MAS3_SW)
-#define MAS2_ATTRIB_MASK \
- (MAS2_X0 | MAS2_X1)
-#define MAS3_ATTRIB_MASK \
- (MAS3_U0 | MAS3_U1 | MAS3_U2 | MAS3_U3 \
- | E500_TLB_USER_PERM_MASK | E500_TLB_SUPER_PERM_MASK)
-
-extern void kvmppc_dump_tlbs(struct kvm_vcpu *);
-extern int kvmppc_e500_emul_mt_mmucsr0(struct kvmppc_vcpu_e500 *, ulong);
-extern int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *);
-extern int kvmppc_e500_emul_tlbre(struct kvm_vcpu *);
-extern int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *, int, int);
-extern int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *, int);
-extern int kvmppc_e500_tlb_search(struct kvm_vcpu *, gva_t, unsigned int, int);
-extern void kvmppc_e500_tlb_put(struct kvm_vcpu *);
-extern void kvmppc_e500_tlb_load(struct kvm_vcpu *, int);
-extern int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *);
-extern void kvmppc_e500_tlb_uninit(struct kvmppc_vcpu_e500 *);
-extern void kvmppc_e500_tlb_setup(struct kvmppc_vcpu_e500 *);
-extern void kvmppc_e500_recalc_shadow_pid(struct kvmppc_vcpu_e500 *);
-
-/* TLB helper functions */
-static inline unsigned int
-get_tlb_size(const struct kvm_book3e_206_tlb_entry *tlbe)
-{
- return (tlbe->mas1 >> 7) & 0x1f;
-}
-
-static inline gva_t get_tlb_eaddr(const struct kvm_book3e_206_tlb_entry *tlbe)
-{
- return tlbe->mas2 & 0xfffff000;
-}
-
-static inline u64 get_tlb_bytes(const struct kvm_book3e_206_tlb_entry *tlbe)
-{
- unsigned int pgsize = get_tlb_size(tlbe);
- return 1ULL << 10 << pgsize;
-}
-
-static inline gva_t get_tlb_end(const struct kvm_book3e_206_tlb_entry *tlbe)
-{
- u64 bytes = get_tlb_bytes(tlbe);
- return get_tlb_eaddr(tlbe) + bytes - 1;
-}
-
-static inline u64 get_tlb_raddr(const struct kvm_book3e_206_tlb_entry *tlbe)
-{
- return tlbe->mas7_3 & ~0xfffULL;
-}
-
-static inline unsigned int
-get_tlb_tid(const struct kvm_book3e_206_tlb_entry *tlbe)
-{
- return (tlbe->mas1 >> 16) & 0xff;
-}
-
-static inline unsigned int
-get_tlb_ts(const struct kvm_book3e_206_tlb_entry *tlbe)
-{
- return (tlbe->mas1 >> 12) & 0x1;
-}
-
-static inline unsigned int
-get_tlb_v(const struct kvm_book3e_206_tlb_entry *tlbe)
-{
- return (tlbe->mas1 >> 31) & 0x1;
-}
-
-static inline unsigned int
-get_tlb_iprot(const struct kvm_book3e_206_tlb_entry *tlbe)
-{
- return (tlbe->mas1 >> 30) & 0x1;
-}
-
-static inline unsigned int get_cur_pid(struct kvm_vcpu *vcpu)
-{
- return vcpu->arch.pid & 0xff;
-}
-
-static inline unsigned int get_cur_as(struct kvm_vcpu *vcpu)
-{
- return !!(vcpu->arch.shared->msr & (MSR_IS | MSR_DS));
-}
-
-static inline unsigned int get_cur_pr(struct kvm_vcpu *vcpu)
-{
- return !!(vcpu->arch.shared->msr & MSR_PR);
-}
-
-static inline unsigned int get_cur_spid(const struct kvm_vcpu *vcpu)
-{
- return (vcpu->arch.shared->mas6 >> 16) & 0xff;
-}
-
-static inline unsigned int get_cur_sas(const struct kvm_vcpu *vcpu)
-{
- return vcpu->arch.shared->mas6 & 0x1;
-}
-
-static inline unsigned int get_tlb_tlbsel(const struct kvm_vcpu *vcpu)
-{
- /*
- * Manual says that tlbsel has 2 bits wide.
- * Since we only have two TLBs, only lower bit is used.
- */
- return (vcpu->arch.shared->mas0 >> 28) & 0x1;
-}
-
-static inline unsigned int get_tlb_nv_bit(const struct kvm_vcpu *vcpu)
-{
- return vcpu->arch.shared->mas0 & 0xfff;
-}
-
-static inline unsigned int get_tlb_esel_bit(const struct kvm_vcpu *vcpu)
-{
- return (vcpu->arch.shared->mas0 >> 16) & 0xfff;
-}
-
-static inline int tlbe_is_host_safe(const struct kvm_vcpu *vcpu,
- const struct kvm_book3e_206_tlb_entry *tlbe)
-{
- gpa_t gpa;
-
- if (!get_tlb_v(tlbe))
- return 0;
-
- /* Does it match current guest AS? */
- /* XXX what about IS != DS? */
- if (get_tlb_ts(tlbe) != !!(vcpu->arch.shared->msr & MSR_IS))
- return 0;
-
- gpa = get_tlb_raddr(tlbe);
- if (!gfn_to_memslot(vcpu->kvm, gpa >> PAGE_SHIFT))
- /* Mapping is not for RAM. */
- return 0;
-
- return 1;
-}
-
-#endif /* __KVM_E500_TLB_H__ */
diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c
new file mode 100644
index 00000000000..fe6c1de6b70
--- /dev/null
+++ b/arch/powerpc/kvm/e500mc.c
@@ -0,0 +1,342 @@
+/*
+ * Copyright (C) 2010 Freescale Semiconductor, Inc. All rights reserved.
+ *
+ * Author: Varun Sethi, <varun.sethi@freescale.com>
+ *
+ * Description:
+ * This file is derived from arch/powerpc/kvm/e500.c,
+ * by Yu Liu <yu.liu@freescale.com>.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kvm_host.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/export.h>
+
+#include <asm/reg.h>
+#include <asm/cputable.h>
+#include <asm/tlbflush.h>
+#include <asm/kvm_ppc.h>
+#include <asm/dbell.h>
+
+#include "booke.h"
+#include "e500.h"
+
+void kvmppc_set_pending_interrupt(struct kvm_vcpu *vcpu, enum int_class type)
+{
+ enum ppc_dbell dbell_type;
+ unsigned long tag;
+
+ switch (type) {
+ case INT_CLASS_NONCRIT:
+ dbell_type = PPC_G_DBELL;
+ break;
+ case INT_CLASS_CRIT:
+ dbell_type = PPC_G_DBELL_CRIT;
+ break;
+ case INT_CLASS_MC:
+ dbell_type = PPC_G_DBELL_MC;
+ break;
+ default:
+ WARN_ONCE(1, "%s: unknown int type %d\n", __func__, type);
+ return;
+ }
+
+
+ tag = PPC_DBELL_LPID(vcpu->kvm->arch.lpid) | vcpu->vcpu_id;
+ mb();
+ ppc_msgsnd(dbell_type, 0, tag);
+}
+
+/* gtlbe must not be mapped by more than one host tlb entry */
+void kvmppc_e500_tlbil_one(struct kvmppc_vcpu_e500 *vcpu_e500,
+ struct kvm_book3e_206_tlb_entry *gtlbe)
+{
+ unsigned int tid, ts;
+ u32 val, eaddr, lpid;
+ unsigned long flags;
+
+ ts = get_tlb_ts(gtlbe);
+ tid = get_tlb_tid(gtlbe);
+ lpid = vcpu_e500->vcpu.kvm->arch.lpid;
+
+ /* We search the host TLB to invalidate its shadow TLB entry */
+ val = (tid << 16) | ts;
+ eaddr = get_tlb_eaddr(gtlbe);
+
+ local_irq_save(flags);
+
+ mtspr(SPRN_MAS6, val);
+ mtspr(SPRN_MAS5, MAS5_SGS | lpid);
+
+ asm volatile("tlbsx 0, %[eaddr]\n" : : [eaddr] "r" (eaddr));
+ val = mfspr(SPRN_MAS1);
+ if (val & MAS1_VALID) {
+ mtspr(SPRN_MAS1, val & ~MAS1_VALID);
+ asm volatile("tlbwe");
+ }
+ mtspr(SPRN_MAS5, 0);
+ /* NOTE: tlbsx also updates mas8, so clear it for host tlbwe */
+ mtspr(SPRN_MAS8, 0);
+ isync();
+
+ local_irq_restore(flags);
+}
+
+void kvmppc_e500_tlbil_all(struct kvmppc_vcpu_e500 *vcpu_e500)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ mtspr(SPRN_MAS5, MAS5_SGS | vcpu_e500->vcpu.kvm->arch.lpid);
+ asm volatile("tlbilxlpid");
+ mtspr(SPRN_MAS5, 0);
+ local_irq_restore(flags);
+}
+
+void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid)
+{
+ vcpu->arch.pid = pid;
+}
+
+void kvmppc_mmu_msr_notify(struct kvm_vcpu *vcpu, u32 old_msr)
+{
+}
+
+void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+{
+ struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+
+ kvmppc_booke_vcpu_load(vcpu, cpu);
+
+ mtspr(SPRN_LPID, vcpu->kvm->arch.lpid);
+ mtspr(SPRN_EPCR, vcpu->arch.shadow_epcr);
+ mtspr(SPRN_GPIR, vcpu->vcpu_id);
+ mtspr(SPRN_MSRP, vcpu->arch.shadow_msrp);
+ mtspr(SPRN_EPLC, vcpu->arch.eplc);
+ mtspr(SPRN_EPSC, vcpu->arch.epsc);
+
+ mtspr(SPRN_GIVPR, vcpu->arch.ivpr);
+ mtspr(SPRN_GIVOR2, vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE]);
+ mtspr(SPRN_GIVOR8, vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL]);
+ mtspr(SPRN_GSPRG0, (unsigned long)vcpu->arch.shared->sprg0);
+ mtspr(SPRN_GSPRG1, (unsigned long)vcpu->arch.shared->sprg1);
+ mtspr(SPRN_GSPRG2, (unsigned long)vcpu->arch.shared->sprg2);
+ mtspr(SPRN_GSPRG3, (unsigned long)vcpu->arch.shared->sprg3);
+
+ mtspr(SPRN_GSRR0, vcpu->arch.shared->srr0);
+ mtspr(SPRN_GSRR1, vcpu->arch.shared->srr1);
+
+ mtspr(SPRN_GEPR, vcpu->arch.epr);
+ mtspr(SPRN_GDEAR, vcpu->arch.shared->dar);
+ mtspr(SPRN_GESR, vcpu->arch.shared->esr);
+
+ if (vcpu->arch.oldpir != mfspr(SPRN_PIR))
+ kvmppc_e500_tlbil_all(vcpu_e500);
+
+ kvmppc_load_guest_fp(vcpu);
+}
+
+void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
+{
+ vcpu->arch.eplc = mfspr(SPRN_EPLC);
+ vcpu->arch.epsc = mfspr(SPRN_EPSC);
+
+ vcpu->arch.shared->sprg0 = mfspr(SPRN_GSPRG0);
+ vcpu->arch.shared->sprg1 = mfspr(SPRN_GSPRG1);
+ vcpu->arch.shared->sprg2 = mfspr(SPRN_GSPRG2);
+ vcpu->arch.shared->sprg3 = mfspr(SPRN_GSPRG3);
+
+ vcpu->arch.shared->srr0 = mfspr(SPRN_GSRR0);
+ vcpu->arch.shared->srr1 = mfspr(SPRN_GSRR1);
+
+ vcpu->arch.epr = mfspr(SPRN_GEPR);
+ vcpu->arch.shared->dar = mfspr(SPRN_GDEAR);
+ vcpu->arch.shared->esr = mfspr(SPRN_GESR);
+
+ vcpu->arch.oldpir = mfspr(SPRN_PIR);
+
+ kvmppc_booke_vcpu_put(vcpu);
+}
+
+int kvmppc_core_check_processor_compat(void)
+{
+ int r;
+
+ if (strcmp(cur_cpu_spec->cpu_name, "e500mc") == 0)
+ r = 0;
+ else if (strcmp(cur_cpu_spec->cpu_name, "e5500") == 0)
+ r = 0;
+ else
+ r = -ENOTSUPP;
+
+ return r;
+}
+
+int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu)
+{
+ struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+
+ vcpu->arch.shadow_epcr = SPRN_EPCR_DSIGS | SPRN_EPCR_DGTMI | \
+ SPRN_EPCR_DUVD;
+ vcpu->arch.shadow_msrp = MSRP_UCLEP | MSRP_DEP | MSRP_PMMP;
+ vcpu->arch.eplc = EPC_EGS | (vcpu->kvm->arch.lpid << EPC_ELPID_SHIFT);
+ vcpu->arch.epsc = vcpu->arch.eplc;
+
+ vcpu->arch.pvr = mfspr(SPRN_PVR);
+ vcpu_e500->svr = mfspr(SPRN_SVR);
+
+ vcpu->arch.cpu_type = KVM_CPU_E500MC;
+
+ return 0;
+}
+
+void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
+{
+ struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+
+ sregs->u.e.features |= KVM_SREGS_E_ARCH206_MMU | KVM_SREGS_E_PM |
+ KVM_SREGS_E_PC;
+ sregs->u.e.impl_id = KVM_SREGS_E_IMPL_FSL;
+
+ sregs->u.e.impl.fsl.features = 0;
+ sregs->u.e.impl.fsl.svr = vcpu_e500->svr;
+ sregs->u.e.impl.fsl.hid0 = vcpu_e500->hid0;
+ sregs->u.e.impl.fsl.mcar = vcpu_e500->mcar;
+
+ kvmppc_get_sregs_e500_tlb(vcpu, sregs);
+
+ sregs->u.e.ivor_high[3] =
+ vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR];
+ sregs->u.e.ivor_high[4] = vcpu->arch.ivor[BOOKE_IRQPRIO_DBELL];
+ sregs->u.e.ivor_high[5] = vcpu->arch.ivor[BOOKE_IRQPRIO_DBELL_CRIT];
+
+ kvmppc_get_sregs_ivor(vcpu, sregs);
+}
+
+int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
+{
+ struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+ int ret;
+
+ if (sregs->u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
+ vcpu_e500->svr = sregs->u.e.impl.fsl.svr;
+ vcpu_e500->hid0 = sregs->u.e.impl.fsl.hid0;
+ vcpu_e500->mcar = sregs->u.e.impl.fsl.mcar;
+ }
+
+ ret = kvmppc_set_sregs_e500_tlb(vcpu, sregs);
+ if (ret < 0)
+ return ret;
+
+ if (!(sregs->u.e.features & KVM_SREGS_E_IVOR))
+ return 0;
+
+ if (sregs->u.e.features & KVM_SREGS_E_PM) {
+ vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR] =
+ sregs->u.e.ivor_high[3];
+ }
+
+ if (sregs->u.e.features & KVM_SREGS_E_PC) {
+ vcpu->arch.ivor[BOOKE_IRQPRIO_DBELL] =
+ sregs->u.e.ivor_high[4];
+ vcpu->arch.ivor[BOOKE_IRQPRIO_DBELL_CRIT] =
+ sregs->u.e.ivor_high[5];
+ }
+
+ return kvmppc_set_sregs_ivor(vcpu, sregs);
+}
+
+struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
+{
+ struct kvmppc_vcpu_e500 *vcpu_e500;
+ struct kvm_vcpu *vcpu;
+ int err;
+
+ vcpu_e500 = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
+ if (!vcpu_e500) {
+ err = -ENOMEM;
+ goto out;
+ }
+ vcpu = &vcpu_e500->vcpu;
+
+ /* Invalid PIR value -- this LPID dosn't have valid state on any cpu */
+ vcpu->arch.oldpir = 0xffffffff;
+
+ err = kvm_vcpu_init(vcpu, kvm, id);
+ if (err)
+ goto free_vcpu;
+
+ err = kvmppc_e500_tlb_init(vcpu_e500);
+ if (err)
+ goto uninit_vcpu;
+
+ vcpu->arch.shared = (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
+ if (!vcpu->arch.shared)
+ goto uninit_tlb;
+
+ return vcpu;
+
+uninit_tlb:
+ kvmppc_e500_tlb_uninit(vcpu_e500);
+uninit_vcpu:
+ kvm_vcpu_uninit(vcpu);
+
+free_vcpu:
+ kmem_cache_free(kvm_vcpu_cache, vcpu_e500);
+out:
+ return ERR_PTR(err);
+}
+
+void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
+{
+ struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+
+ free_page((unsigned long)vcpu->arch.shared);
+ kvmppc_e500_tlb_uninit(vcpu_e500);
+ kvm_vcpu_uninit(vcpu);
+ kmem_cache_free(kvm_vcpu_cache, vcpu_e500);
+}
+
+int kvmppc_core_init_vm(struct kvm *kvm)
+{
+ int lpid;
+
+ lpid = kvmppc_alloc_lpid();
+ if (lpid < 0)
+ return lpid;
+
+ kvm->arch.lpid = lpid;
+ return 0;
+}
+
+void kvmppc_core_destroy_vm(struct kvm *kvm)
+{
+ kvmppc_free_lpid(kvm->arch.lpid);
+}
+
+static int __init kvmppc_e500mc_init(void)
+{
+ int r;
+
+ r = kvmppc_booke_init();
+ if (r)
+ return r;
+
+ kvmppc_init_lpid(64);
+ kvmppc_claim_lpid(0); /* host */
+
+ return kvm_init(NULL, sizeof(struct kvmppc_vcpu_e500), 0, THIS_MODULE);
+}
+
+static void __exit kvmppc_e500mc_exit(void)
+{
+ kvmppc_booke_exit();
+}
+
+module_init(kvmppc_e500mc_init);
+module_exit(kvmppc_e500mc_exit);
diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c
index 968f4010188..f90e86dea7a 100644
--- a/arch/powerpc/kvm/emulate.c
+++ b/arch/powerpc/kvm/emulate.c
@@ -23,6 +23,7 @@
#include <linux/types.h>
#include <linux/string.h>
#include <linux/kvm_host.h>
+#include <linux/clockchips.h>
#include <asm/reg.h>
#include <asm/time.h>
@@ -35,7 +36,9 @@
#define OP_TRAP 3
#define OP_TRAP_64 2
+#define OP_31_XOP_TRAP 4
#define OP_31_XOP_LWZX 23
+#define OP_31_XOP_TRAP_64 68
#define OP_31_XOP_LBZX 87
#define OP_31_XOP_STWX 151
#define OP_31_XOP_STBX 215
@@ -102,8 +105,12 @@ void kvmppc_emulate_dec(struct kvm_vcpu *vcpu)
*/
dec_time = vcpu->arch.dec;
- dec_time *= 1000;
- do_div(dec_time, tb_ticks_per_usec);
+ /*
+ * Guest timebase ticks at the same frequency as host decrementer.
+ * So use the host decrementer calculations for decrementer emulation.
+ */
+ dec_time = dec_time << decrementer_clockevent.shift;
+ do_div(dec_time, decrementer_clockevent.mult);
dec_nsec = do_div(dec_time, NSEC_PER_SEC);
hrtimer_start(&vcpu->arch.dec_timer,
ktime_set(dec_time, dec_nsec), HRTIMER_MODE_REL);
@@ -141,14 +148,13 @@ u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb)
int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
{
u32 inst = kvmppc_get_last_inst(vcpu);
- u32 ea;
- int ra;
- int rb;
- int rs;
- int rt;
- int sprn;
+ int ra = get_ra(inst);
+ int rs = get_rs(inst);
+ int rt = get_rt(inst);
+ int sprn = get_sprn(inst);
enum emulation_result emulated = EMULATE_DONE;
int advance = 1;
+ ulong spr_val = 0;
/* this default type might be overwritten by subcategories */
kvmppc_set_exit_type(vcpu, EMULATED_INST_EXITS);
@@ -170,173 +176,143 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
case 31:
switch (get_xop(inst)) {
+ case OP_31_XOP_TRAP:
+#ifdef CONFIG_64BIT
+ case OP_31_XOP_TRAP_64:
+#endif
+#ifdef CONFIG_PPC_BOOK3S
+ kvmppc_core_queue_program(vcpu, SRR1_PROGTRAP);
+#else
+ kvmppc_core_queue_program(vcpu,
+ vcpu->arch.shared->esr | ESR_PTR);
+#endif
+ advance = 0;
+ break;
case OP_31_XOP_LWZX:
- rt = get_rt(inst);
emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1);
break;
case OP_31_XOP_LBZX:
- rt = get_rt(inst);
emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1);
break;
case OP_31_XOP_LBZUX:
- rt = get_rt(inst);
- ra = get_ra(inst);
- rb = get_rb(inst);
-
- ea = kvmppc_get_gpr(vcpu, rb);
- if (ra)
- ea += kvmppc_get_gpr(vcpu, ra);
-
emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1);
- kvmppc_set_gpr(vcpu, ra, ea);
+ kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
break;
case OP_31_XOP_STWX:
- rs = get_rs(inst);
emulated = kvmppc_handle_store(run, vcpu,
kvmppc_get_gpr(vcpu, rs),
4, 1);
break;
case OP_31_XOP_STBX:
- rs = get_rs(inst);
emulated = kvmppc_handle_store(run, vcpu,
kvmppc_get_gpr(vcpu, rs),
1, 1);
break;
case OP_31_XOP_STBUX:
- rs = get_rs(inst);
- ra = get_ra(inst);
- rb = get_rb(inst);
-
- ea = kvmppc_get_gpr(vcpu, rb);
- if (ra)
- ea += kvmppc_get_gpr(vcpu, ra);
-
emulated = kvmppc_handle_store(run, vcpu,
kvmppc_get_gpr(vcpu, rs),
1, 1);
- kvmppc_set_gpr(vcpu, rs, ea);
+ kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
break;
case OP_31_XOP_LHAX:
- rt = get_rt(inst);
emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1);
break;
case OP_31_XOP_LHZX:
- rt = get_rt(inst);
emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1);
break;
case OP_31_XOP_LHZUX:
- rt = get_rt(inst);
- ra = get_ra(inst);
- rb = get_rb(inst);
-
- ea = kvmppc_get_gpr(vcpu, rb);
- if (ra)
- ea += kvmppc_get_gpr(vcpu, ra);
-
emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1);
- kvmppc_set_gpr(vcpu, ra, ea);
+ kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
break;
case OP_31_XOP_MFSPR:
- sprn = get_sprn(inst);
- rt = get_rt(inst);
-
switch (sprn) {
case SPRN_SRR0:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->srr0);
+ spr_val = vcpu->arch.shared->srr0;
break;
case SPRN_SRR1:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->srr1);
+ spr_val = vcpu->arch.shared->srr1;
break;
case SPRN_PVR:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.pvr); break;
+ spr_val = vcpu->arch.pvr;
+ break;
case SPRN_PIR:
- kvmppc_set_gpr(vcpu, rt, vcpu->vcpu_id); break;
+ spr_val = vcpu->vcpu_id;
+ break;
case SPRN_MSSSR0:
- kvmppc_set_gpr(vcpu, rt, 0); break;
+ spr_val = 0;
+ break;
/* Note: mftb and TBRL/TBWL are user-accessible, so
* the guest can always access the real TB anyways.
* In fact, we probably will never see these traps. */
case SPRN_TBWL:
- kvmppc_set_gpr(vcpu, rt, get_tb() >> 32); break;
+ spr_val = get_tb() >> 32;
+ break;
case SPRN_TBWU:
- kvmppc_set_gpr(vcpu, rt, get_tb()); break;
+ spr_val = get_tb();
+ break;
case SPRN_SPRG0:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->sprg0);
+ spr_val = vcpu->arch.shared->sprg0;
break;
case SPRN_SPRG1:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->sprg1);
+ spr_val = vcpu->arch.shared->sprg1;
break;
case SPRN_SPRG2:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->sprg2);
+ spr_val = vcpu->arch.shared->sprg2;
break;
case SPRN_SPRG3:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->sprg3);
+ spr_val = vcpu->arch.shared->sprg3;
break;
/* Note: SPRG4-7 are user-readable, so we don't get
* a trap. */
case SPRN_DEC:
- {
- kvmppc_set_gpr(vcpu, rt,
- kvmppc_get_dec(vcpu, get_tb()));
+ spr_val = kvmppc_get_dec(vcpu, get_tb());
break;
- }
default:
- emulated = kvmppc_core_emulate_mfspr(vcpu, sprn, rt);
- if (emulated == EMULATE_FAIL) {
- printk("mfspr: unknown spr %x\n", sprn);
- kvmppc_set_gpr(vcpu, rt, 0);
+ emulated = kvmppc_core_emulate_mfspr(vcpu, sprn,
+ &spr_val);
+ if (unlikely(emulated == EMULATE_FAIL)) {
+ printk(KERN_INFO "mfspr: unknown spr "
+ "0x%x\n", sprn);
}
break;
}
+ kvmppc_set_gpr(vcpu, rt, spr_val);
kvmppc_set_exit_type(vcpu, EMULATED_MFSPR_EXITS);
break;
case OP_31_XOP_STHX:
- rs = get_rs(inst);
- ra = get_ra(inst);
- rb = get_rb(inst);
-
emulated = kvmppc_handle_store(run, vcpu,
kvmppc_get_gpr(vcpu, rs),
2, 1);
break;
case OP_31_XOP_STHUX:
- rs = get_rs(inst);
- ra = get_ra(inst);
- rb = get_rb(inst);
-
- ea = kvmppc_get_gpr(vcpu, rb);
- if (ra)
- ea += kvmppc_get_gpr(vcpu, ra);
-
emulated = kvmppc_handle_store(run, vcpu,
kvmppc_get_gpr(vcpu, rs),
2, 1);
- kvmppc_set_gpr(vcpu, ra, ea);
+ kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
break;
case OP_31_XOP_MTSPR:
- sprn = get_sprn(inst);
- rs = get_rs(inst);
+ spr_val = kvmppc_get_gpr(vcpu, rs);
switch (sprn) {
case SPRN_SRR0:
- vcpu->arch.shared->srr0 = kvmppc_get_gpr(vcpu, rs);
+ vcpu->arch.shared->srr0 = spr_val;
break;
case SPRN_SRR1:
- vcpu->arch.shared->srr1 = kvmppc_get_gpr(vcpu, rs);
+ vcpu->arch.shared->srr1 = spr_val;
break;
/* XXX We need to context-switch the timebase for
@@ -347,27 +323,29 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
case SPRN_MSSSR0: break;
case SPRN_DEC:
- vcpu->arch.dec = kvmppc_get_gpr(vcpu, rs);
+ vcpu->arch.dec = spr_val;
kvmppc_emulate_dec(vcpu);
break;
case SPRN_SPRG0:
- vcpu->arch.shared->sprg0 = kvmppc_get_gpr(vcpu, rs);
+ vcpu->arch.shared->sprg0 = spr_val;
break;
case SPRN_SPRG1:
- vcpu->arch.shared->sprg1 = kvmppc_get_gpr(vcpu, rs);
+ vcpu->arch.shared->sprg1 = spr_val;
break;
case SPRN_SPRG2:
- vcpu->arch.shared->sprg2 = kvmppc_get_gpr(vcpu, rs);
+ vcpu->arch.shared->sprg2 = spr_val;
break;
case SPRN_SPRG3:
- vcpu->arch.shared->sprg3 = kvmppc_get_gpr(vcpu, rs);
+ vcpu->arch.shared->sprg3 = spr_val;
break;
default:
- emulated = kvmppc_core_emulate_mtspr(vcpu, sprn, rs);
+ emulated = kvmppc_core_emulate_mtspr(vcpu, sprn,
+ spr_val);
if (emulated == EMULATE_FAIL)
- printk("mtspr: unknown spr %x\n", sprn);
+ printk(KERN_INFO "mtspr: unknown spr "
+ "0x%x\n", sprn);
break;
}
kvmppc_set_exit_type(vcpu, EMULATED_MTSPR_EXITS);
@@ -382,7 +360,6 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
break;
case OP_31_XOP_LWBRX:
- rt = get_rt(inst);
emulated = kvmppc_handle_load(run, vcpu, rt, 4, 0);
break;
@@ -390,25 +367,16 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
break;
case OP_31_XOP_STWBRX:
- rs = get_rs(inst);
- ra = get_ra(inst);
- rb = get_rb(inst);
-
emulated = kvmppc_handle_store(run, vcpu,
kvmppc_get_gpr(vcpu, rs),
4, 0);
break;
case OP_31_XOP_LHBRX:
- rt = get_rt(inst);
emulated = kvmppc_handle_load(run, vcpu, rt, 2, 0);
break;
case OP_31_XOP_STHBRX:
- rs = get_rs(inst);
- ra = get_ra(inst);
- rb = get_rb(inst);
-
emulated = kvmppc_handle_store(run, vcpu,
kvmppc_get_gpr(vcpu, rs),
2, 0);
@@ -421,99 +389,78 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
break;
case OP_LWZ:
- rt = get_rt(inst);
emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1);
break;
case OP_LWZU:
- ra = get_ra(inst);
- rt = get_rt(inst);
emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1);
- kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed);
+ kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
break;
case OP_LBZ:
- rt = get_rt(inst);
emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1);
break;
case OP_LBZU:
- ra = get_ra(inst);
- rt = get_rt(inst);
emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1);
- kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed);
+ kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
break;
case OP_STW:
- rs = get_rs(inst);
emulated = kvmppc_handle_store(run, vcpu,
kvmppc_get_gpr(vcpu, rs),
4, 1);
break;
case OP_STWU:
- ra = get_ra(inst);
- rs = get_rs(inst);
emulated = kvmppc_handle_store(run, vcpu,
kvmppc_get_gpr(vcpu, rs),
4, 1);
- kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed);
+ kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
break;
case OP_STB:
- rs = get_rs(inst);
emulated = kvmppc_handle_store(run, vcpu,
kvmppc_get_gpr(vcpu, rs),
1, 1);
break;
case OP_STBU:
- ra = get_ra(inst);
- rs = get_rs(inst);
emulated = kvmppc_handle_store(run, vcpu,
kvmppc_get_gpr(vcpu, rs),
1, 1);
- kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed);
+ kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
break;
case OP_LHZ:
- rt = get_rt(inst);
emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1);
break;
case OP_LHZU:
- ra = get_ra(inst);
- rt = get_rt(inst);
emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1);
- kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed);
+ kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
break;
case OP_LHA:
- rt = get_rt(inst);
emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1);
break;
case OP_LHAU:
- ra = get_ra(inst);
- rt = get_rt(inst);
emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1);
- kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed);
+ kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
break;
case OP_STH:
- rs = get_rs(inst);
emulated = kvmppc_handle_store(run, vcpu,
kvmppc_get_gpr(vcpu, rs),
2, 1);
break;
case OP_STHU:
- ra = get_ra(inst);
- rs = get_rs(inst);
emulated = kvmppc_handle_store(run, vcpu,
kvmppc_get_gpr(vcpu, rs),
2, 1);
- kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed);
+ kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
break;
default:
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 00d7e345b3f..1493c8de947 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -43,6 +43,11 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
v->requests;
}
+int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
+{
+ return 1;
+}
+
int kvmppc_kvm_pv(struct kvm_vcpu *vcpu)
{
int nr = kvmppc_get_gpr(vcpu, 11);
@@ -74,7 +79,7 @@ int kvmppc_kvm_pv(struct kvm_vcpu *vcpu)
}
case HC_VENDOR_KVM | KVM_HC_FEATURES:
r = HC_EV_SUCCESS;
-#if defined(CONFIG_PPC_BOOK3S) || defined(CONFIG_KVM_E500)
+#if defined(CONFIG_PPC_BOOK3S) || defined(CONFIG_KVM_E500V2)
/* XXX Missing magic page on 44x */
r2 |= (1 << KVM_FEATURE_MAGIC_PAGE);
#endif
@@ -109,6 +114,11 @@ int kvmppc_sanity_check(struct kvm_vcpu *vcpu)
goto out;
#endif
+#ifdef CONFIG_KVM_BOOKE_HV
+ if (!cpu_has_feature(CPU_FTR_EMB_HV))
+ goto out;
+#endif
+
r = true;
out:
@@ -225,7 +235,7 @@ int kvm_dev_ioctl_check_extension(long ext)
case KVM_CAP_PPC_PAIRED_SINGLES:
case KVM_CAP_PPC_OSI:
case KVM_CAP_PPC_GET_PVINFO:
-#ifdef CONFIG_KVM_E500
+#if defined(CONFIG_KVM_E500V2) || defined(CONFIG_KVM_E500MC)
case KVM_CAP_SW_TLB:
#endif
r = 1;
@@ -234,10 +244,12 @@ int kvm_dev_ioctl_check_extension(long ext)
r = KVM_COALESCED_MMIO_PAGE_OFFSET;
break;
#endif
-#ifdef CONFIG_KVM_BOOK3S_64_HV
+#ifdef CONFIG_PPC_BOOK3S_64
case KVM_CAP_SPAPR_TCE:
r = 1;
break;
+#endif /* CONFIG_PPC_BOOK3S_64 */
+#ifdef CONFIG_KVM_BOOK3S_64_HV
case KVM_CAP_PPC_SMT:
r = threads_per_core;
break;
@@ -267,6 +279,11 @@ int kvm_dev_ioctl_check_extension(long ext)
case KVM_CAP_MAX_VCPUS:
r = KVM_MAX_VCPUS;
break;
+#ifdef CONFIG_PPC_BOOK3S_64
+ case KVM_CAP_PPC_GET_SMMU_INFO:
+ r = 1;
+ break;
+#endif
default:
r = 0;
break;
@@ -588,21 +605,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
return r;
}
-void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
-{
- int me;
- int cpu = vcpu->cpu;
-
- me = get_cpu();
- if (waitqueue_active(vcpu->arch.wqp)) {
- wake_up_interruptible(vcpu->arch.wqp);
- vcpu->stat.halt_wakeup++;
- } else if (cpu != me && cpu != -1) {
- smp_send_reschedule(vcpu->cpu);
- }
- put_cpu();
-}
-
int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq)
{
if (irq->irq == KVM_INTERRUPT_UNSET) {
@@ -611,6 +613,7 @@ int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq)
}
kvmppc_core_queue_external(vcpu, irq);
+
kvm_vcpu_kick(vcpu);
return 0;
@@ -633,7 +636,7 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
r = 0;
vcpu->arch.papr_enabled = true;
break;
-#ifdef CONFIG_KVM_E500
+#if defined(CONFIG_KVM_E500V2) || defined(CONFIG_KVM_E500MC)
case KVM_CAP_SW_TLB: {
struct kvm_config_tlb cfg;
void __user *user_ptr = (void __user *)(uintptr_t)cap->args[0];
@@ -710,7 +713,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
break;
}
-#ifdef CONFIG_KVM_E500
+#if defined(CONFIG_KVM_E500V2) || defined(CONFIG_KVM_E500MC)
case KVM_DIRTY_TLB: {
struct kvm_dirty_tlb dirty;
r = -EFAULT;
@@ -720,7 +723,6 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
break;
}
#endif
-
default:
r = -EINVAL;
}
@@ -777,7 +779,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
break;
}
-#ifdef CONFIG_KVM_BOOK3S_64_HV
+#ifdef CONFIG_PPC_BOOK3S_64
case KVM_CREATE_SPAPR_TCE: {
struct kvm_create_spapr_tce create_tce;
struct kvm *kvm = filp->private_data;
@@ -788,7 +790,9 @@ long kvm_arch_vm_ioctl(struct file *filp,
r = kvm_vm_ioctl_create_spapr_tce(kvm, &create_tce);
goto out;
}
+#endif /* CONFIG_PPC_BOOK3S_64 */
+#ifdef CONFIG_KVM_BOOK3S_64_HV
case KVM_ALLOCATE_RMA: {
struct kvm *kvm = filp->private_data;
struct kvm_allocate_rma rma;
@@ -800,6 +804,18 @@ long kvm_arch_vm_ioctl(struct file *filp,
}
#endif /* CONFIG_KVM_BOOK3S_64_HV */
+#ifdef CONFIG_PPC_BOOK3S_64
+ case KVM_PPC_GET_SMMU_INFO: {
+ struct kvm *kvm = filp->private_data;
+ struct kvm_ppc_smmu_info info;
+
+ memset(&info, 0, sizeof(info));
+ r = kvm_vm_ioctl_get_smmu_info(kvm, &info);
+ if (r >= 0 && copy_to_user(argp, &info, sizeof(info)))
+ r = -EFAULT;
+ break;
+ }
+#endif /* CONFIG_PPC_BOOK3S_64 */
default:
r = -ENOTTY;
}
@@ -808,6 +824,40 @@ out:
return r;
}
+static unsigned long lpid_inuse[BITS_TO_LONGS(KVMPPC_NR_LPIDS)];
+static unsigned long nr_lpids;
+
+long kvmppc_alloc_lpid(void)
+{
+ long lpid;
+
+ do {
+ lpid = find_first_zero_bit(lpid_inuse, KVMPPC_NR_LPIDS);
+ if (lpid >= nr_lpids) {
+ pr_err("%s: No LPIDs free\n", __func__);
+ return -ENOMEM;
+ }
+ } while (test_and_set_bit(lpid, lpid_inuse));
+
+ return lpid;
+}
+
+void kvmppc_claim_lpid(long lpid)
+{
+ set_bit(lpid, lpid_inuse);
+}
+
+void kvmppc_free_lpid(long lpid)
+{
+ clear_bit(lpid, lpid_inuse);
+}
+
+void kvmppc_init_lpid(unsigned long nr_lpids_param)
+{
+ nr_lpids = min_t(unsigned long, KVMPPC_NR_LPIDS, nr_lpids_param);
+ memset(lpid_inuse, 0, sizeof(lpid_inuse));
+}
+
int kvm_arch_init(void *opaque)
{
return 0;
diff --git a/arch/powerpc/kvm/timing.h b/arch/powerpc/kvm/timing.h
index 8167d42a776..bf191e72b2d 100644
--- a/arch/powerpc/kvm/timing.h
+++ b/arch/powerpc/kvm/timing.h
@@ -93,6 +93,12 @@ static inline void kvmppc_account_exit_stat(struct kvm_vcpu *vcpu, int type)
case SIGNAL_EXITS:
vcpu->stat.signal_exits++;
break;
+ case DBELL_EXITS:
+ vcpu->stat.dbell_exits++;
+ break;
+ case GDBELL_EXITS:
+ vcpu->stat.gdbell_exits++;
+ break;
}
}
diff --git a/arch/powerpc/lib/copyuser_64.S b/arch/powerpc/lib/copyuser_64.S
index 773d38f90aa..d73a5901490 100644
--- a/arch/powerpc/lib/copyuser_64.S
+++ b/arch/powerpc/lib/copyuser_64.S
@@ -30,7 +30,7 @@ _GLOBAL(__copy_tofrom_user_base)
dcbt 0,r4
beq .Lcopy_page_4K
andi. r6,r6,7
- PPC_MTOCRF 0x01,r5
+ PPC_MTOCRF(0x01,r5)
blt cr1,.Lshort_copy
/* Below we want to nop out the bne if we're on a CPU that has the
* CPU_FTR_UNALIGNED_LD_STD bit set and the CPU_FTR_CP_USE_DCBTZ bit
@@ -186,7 +186,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
blr
.Ldst_unaligned:
- PPC_MTOCRF 0x01,r6 /* put #bytes to 8B bdry into cr7 */
+ PPC_MTOCRF(0x01,r6) /* put #bytes to 8B bdry into cr7 */
subf r5,r6,r5
li r7,0
cmpldi cr1,r5,16
@@ -201,7 +201,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
2: bf cr7*4+1,3f
37: lwzx r0,r7,r4
83: stwx r0,r7,r3
-3: PPC_MTOCRF 0x01,r5
+3: PPC_MTOCRF(0x01,r5)
add r4,r6,r4
add r3,r6,r3
b .Ldst_aligned
diff --git a/arch/powerpc/lib/mem_64.S b/arch/powerpc/lib/mem_64.S
index 11ce045e21f..f4fcb0bc656 100644
--- a/arch/powerpc/lib/mem_64.S
+++ b/arch/powerpc/lib/mem_64.S
@@ -19,7 +19,7 @@ _GLOBAL(memset)
rlwimi r4,r4,16,0,15
cmplw cr1,r5,r0 /* do we get that far? */
rldimi r4,r4,32,0
- PPC_MTOCRF 1,r0
+ PPC_MTOCRF(1,r0)
mr r6,r3
blt cr1,8f
beq+ 3f /* if already 8-byte aligned */
@@ -49,7 +49,7 @@ _GLOBAL(memset)
bdnz 4b
5: srwi. r0,r5,3
clrlwi r5,r5,29
- PPC_MTOCRF 1,r0
+ PPC_MTOCRF(1,r0)
beq 8f
bf 29,6f
std r4,0(r6)
@@ -65,7 +65,7 @@ _GLOBAL(memset)
std r4,0(r6)
addi r6,r6,8
8: cmpwi r5,0
- PPC_MTOCRF 1,r5
+ PPC_MTOCRF(1,r5)
beqlr+
bf 29,9f
stw r4,0(r6)
diff --git a/arch/powerpc/lib/memcpy_64.S b/arch/powerpc/lib/memcpy_64.S
index e178922b2c2..82fea3963e1 100644
--- a/arch/powerpc/lib/memcpy_64.S
+++ b/arch/powerpc/lib/memcpy_64.S
@@ -12,7 +12,7 @@
.align 7
_GLOBAL(memcpy)
std r3,48(r1) /* save destination pointer for return value */
- PPC_MTOCRF 0x01,r5
+ PPC_MTOCRF(0x01,r5)
cmpldi cr1,r5,16
neg r6,r3 # LS 3 bits = # bytes to 8-byte dest bdry
andi. r6,r6,7
@@ -154,7 +154,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
blr
.Ldst_unaligned:
- PPC_MTOCRF 0x01,r6 # put #bytes to 8B bdry into cr7
+ PPC_MTOCRF(0x01,r6) # put #bytes to 8B bdry into cr7
subf r5,r6,r5
li r7,0
cmpldi cr1,r5,16
@@ -169,7 +169,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
2: bf cr7*4+1,3f
lwzx r0,r7,r4
stwx r0,r7,r3
-3: PPC_MTOCRF 0x01,r5
+3: PPC_MTOCRF(0x01,r5)
add r4,r6,r4
add r3,r6,r3
b .Ldst_aligned
diff --git a/arch/powerpc/lib/string.S b/arch/powerpc/lib/string.S
index 455881a5563..093d6316435 100644
--- a/arch/powerpc/lib/string.S
+++ b/arch/powerpc/lib/string.S
@@ -160,48 +160,3 @@ _GLOBAL(__clear_user)
PPC_LONG 1b,91b
PPC_LONG 8b,92b
.text
-
-_GLOBAL(__strncpy_from_user)
- addi r6,r3,-1
- addi r4,r4,-1
- cmpwi 0,r5,0
- beq 2f
- mtctr r5
-1: lbzu r0,1(r4)
- cmpwi 0,r0,0
- stbu r0,1(r6)
- bdnzf 2,1b /* dec ctr, branch if ctr != 0 && !cr0.eq */
- beq 3f
-2: addi r6,r6,1
-3: subf r3,r3,r6
- blr
-99: li r3,-EFAULT
- blr
-
- .section __ex_table,"a"
- PPC_LONG 1b,99b
- .text
-
-/* r3 = str, r4 = len (> 0), r5 = top (highest addr) */
-_GLOBAL(__strnlen_user)
- addi r7,r3,-1
- subf r6,r7,r5 /* top+1 - str */
- cmplw 0,r4,r6
- bge 0f
- mr r6,r4
-0: mtctr r6 /* ctr = min(len, top - str) */
-1: lbzu r0,1(r7) /* get next byte */
- cmpwi 0,r0,0
- bdnzf 2,1b /* loop if --ctr != 0 && byte != 0 */
- addi r7,r7,1
- subf r3,r3,r7 /* number of bytes we have looked at */
- beqlr /* return if we found a 0 byte */
- cmpw 0,r3,r4 /* did we look at all len bytes? */
- blt 99f /* if not, must have hit top */
- addi r3,r4,1 /* return len + 1 to indicate no null found */
- blr
-99: li r3,0 /* bad address, return 0 */
- blr
-
- .section __ex_table,"a"
- PPC_LONG 1b,99b
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index fb05b123218..1a6de0a7d8e 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -271,7 +271,8 @@ int alloc_bootmem_huge_page(struct hstate *hstate)
unsigned long gpage_npages[MMU_PAGE_COUNT];
-static int __init do_gpage_early_setup(char *param, char *val)
+static int __init do_gpage_early_setup(char *param, char *val,
+ const char *unused)
{
static phys_addr_t size;
unsigned long npages;
diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/mmu_context_nohash.c
index 5b63bd3da4a..e779642c25e 100644
--- a/arch/powerpc/mm/mmu_context_nohash.c
+++ b/arch/powerpc/mm/mmu_context_nohash.c
@@ -333,9 +333,7 @@ static int __cpuinit mmu_context_cpu_notify(struct notifier_block *self,
unsigned long action, void *hcpu)
{
unsigned int cpu = (unsigned int)(long)hcpu;
-#ifdef CONFIG_HOTPLUG_CPU
- struct task_struct *p;
-#endif
+
/* We don't touch CPU 0 map, it's allocated at aboot and kept
* around forever
*/
@@ -358,12 +356,7 @@ static int __cpuinit mmu_context_cpu_notify(struct notifier_block *self,
stale_map[cpu] = NULL;
/* We also clear the cpu_vm_mask bits of CPUs going away */
- read_lock(&tasklist_lock);
- for_each_process(p) {
- if (p->mm)
- cpumask_clear_cpu(cpu, mm_cpumask(p->mm));
- }
- read_unlock(&tasklist_lock);
+ clear_tasks_mm_cpumask(cpu);
break;
#endif /* CONFIG_HOTPLUG_CPU */
}
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index b6edbb3b4a5..6e8f677f564 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -635,7 +635,7 @@ static inline int __init read_usm_ranges(const u32 **usm)
*/
static void __init parse_drconf_memory(struct device_node *memory)
{
- const u32 *dm, *usm;
+ const u32 *uninitialized_var(dm), *usm;
unsigned int n, rc, ranges, is_kexec_kdump = 0;
unsigned long lmb_size, base, size, sz;
int nid;
diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h
index af1ab5e9a69..5c3cf2d04e4 100644
--- a/arch/powerpc/net/bpf_jit.h
+++ b/arch/powerpc/net/bpf_jit.h
@@ -48,7 +48,13 @@
/*
* Assembly helpers from arch/powerpc/net/bpf_jit.S:
*/
-extern u8 sk_load_word[], sk_load_half[], sk_load_byte[], sk_load_byte_msh[];
+#define DECLARE_LOAD_FUNC(func) \
+ extern u8 func[], func##_negative_offset[], func##_positive_offset[]
+
+DECLARE_LOAD_FUNC(sk_load_word);
+DECLARE_LOAD_FUNC(sk_load_half);
+DECLARE_LOAD_FUNC(sk_load_byte);
+DECLARE_LOAD_FUNC(sk_load_byte_msh);
#define FUNCTION_DESCR_SIZE 24
diff --git a/arch/powerpc/net/bpf_jit_64.S b/arch/powerpc/net/bpf_jit_64.S
index ff4506e85cc..7d3a3b5619a 100644
--- a/arch/powerpc/net/bpf_jit_64.S
+++ b/arch/powerpc/net/bpf_jit_64.S
@@ -31,14 +31,13 @@
* then branch directly to slow_path_XXX if required. (In fact, could
* load a spare GPR with the address of slow_path_generic and pass size
* as an argument, making the call site a mtlr, li and bllr.)
- *
- * Technically, the "is addr < 0" check is unnecessary & slowing down
- * the ABS path, as it's statically checked on generation.
*/
.globl sk_load_word
sk_load_word:
cmpdi r_addr, 0
- blt bpf_error
+ blt bpf_slow_path_word_neg
+ .globl sk_load_word_positive_offset
+sk_load_word_positive_offset:
/* Are we accessing past headlen? */
subi r_scratch1, r_HL, 4
cmpd r_scratch1, r_addr
@@ -51,7 +50,9 @@ sk_load_word:
.globl sk_load_half
sk_load_half:
cmpdi r_addr, 0
- blt bpf_error
+ blt bpf_slow_path_half_neg
+ .globl sk_load_half_positive_offset
+sk_load_half_positive_offset:
subi r_scratch1, r_HL, 2
cmpd r_scratch1, r_addr
blt bpf_slow_path_half
@@ -61,7 +62,9 @@ sk_load_half:
.globl sk_load_byte
sk_load_byte:
cmpdi r_addr, 0
- blt bpf_error
+ blt bpf_slow_path_byte_neg
+ .globl sk_load_byte_positive_offset
+sk_load_byte_positive_offset:
cmpd r_HL, r_addr
ble bpf_slow_path_byte
lbzx r_A, r_D, r_addr
@@ -69,22 +72,20 @@ sk_load_byte:
/*
* BPF_S_LDX_B_MSH: ldxb 4*([offset]&0xf)
- * r_addr is the offset value, already known positive
+ * r_addr is the offset value
*/
.globl sk_load_byte_msh
sk_load_byte_msh:
+ cmpdi r_addr, 0
+ blt bpf_slow_path_byte_msh_neg
+ .globl sk_load_byte_msh_positive_offset
+sk_load_byte_msh_positive_offset:
cmpd r_HL, r_addr
ble bpf_slow_path_byte_msh
lbzx r_X, r_D, r_addr
rlwinm r_X, r_X, 2, 32-4-2, 31-2
blr
-bpf_error:
- /* Entered with cr0 = lt */
- li r3, 0
- /* Generated code will 'blt epilogue', returning 0. */
- blr
-
/* Call out to skb_copy_bits:
* We'll need to back up our volatile regs first; we have
* local variable space at r1+(BPF_PPC_STACK_BASIC).
@@ -104,6 +105,7 @@ bpf_error:
mr r4, r_addr; \
li r6, SIZE; \
bl skb_copy_bits; \
+ nop; \
/* R3 = 0 on success */ \
addi r1, r1, BPF_PPC_SLOWPATH_FRAME; \
ld r0, 16(r1); \
@@ -136,3 +138,85 @@ bpf_slow_path_byte_msh:
lbz r_X, BPF_PPC_STACK_BASIC+(2*8)(r1)
rlwinm r_X, r_X, 2, 32-4-2, 31-2
blr
+
+/* Call out to bpf_internal_load_pointer_neg_helper:
+ * We'll need to back up our volatile regs first; we have
+ * local variable space at r1+(BPF_PPC_STACK_BASIC).
+ * Allocate a new stack frame here to remain ABI-compliant in
+ * stashing LR.
+ */
+#define sk_negative_common(SIZE) \
+ mflr r0; \
+ std r0, 16(r1); \
+ /* R3 goes in parameter space of caller's frame */ \
+ std r_skb, (BPF_PPC_STACKFRAME+48)(r1); \
+ std r_A, (BPF_PPC_STACK_BASIC+(0*8))(r1); \
+ std r_X, (BPF_PPC_STACK_BASIC+(1*8))(r1); \
+ stdu r1, -BPF_PPC_SLOWPATH_FRAME(r1); \
+ /* R3 = r_skb, as passed */ \
+ mr r4, r_addr; \
+ li r5, SIZE; \
+ bl bpf_internal_load_pointer_neg_helper; \
+ nop; \
+ /* R3 != 0 on success */ \
+ addi r1, r1, BPF_PPC_SLOWPATH_FRAME; \
+ ld r0, 16(r1); \
+ ld r_A, (BPF_PPC_STACK_BASIC+(0*8))(r1); \
+ ld r_X, (BPF_PPC_STACK_BASIC+(1*8))(r1); \
+ mtlr r0; \
+ cmpldi r3, 0; \
+ beq bpf_error_slow; /* cr0 = EQ */ \
+ mr r_addr, r3; \
+ ld r_skb, (BPF_PPC_STACKFRAME+48)(r1); \
+ /* Great success! */
+
+bpf_slow_path_word_neg:
+ lis r_scratch1,-32 /* SKF_LL_OFF */
+ cmpd r_addr, r_scratch1 /* addr < SKF_* */
+ blt bpf_error /* cr0 = LT */
+ .globl sk_load_word_negative_offset
+sk_load_word_negative_offset:
+ sk_negative_common(4)
+ lwz r_A, 0(r_addr)
+ blr
+
+bpf_slow_path_half_neg:
+ lis r_scratch1,-32 /* SKF_LL_OFF */
+ cmpd r_addr, r_scratch1 /* addr < SKF_* */
+ blt bpf_error /* cr0 = LT */
+ .globl sk_load_half_negative_offset
+sk_load_half_negative_offset:
+ sk_negative_common(2)
+ lhz r_A, 0(r_addr)
+ blr
+
+bpf_slow_path_byte_neg:
+ lis r_scratch1,-32 /* SKF_LL_OFF */
+ cmpd r_addr, r_scratch1 /* addr < SKF_* */
+ blt bpf_error /* cr0 = LT */
+ .globl sk_load_byte_negative_offset
+sk_load_byte_negative_offset:
+ sk_negative_common(1)
+ lbz r_A, 0(r_addr)
+ blr
+
+bpf_slow_path_byte_msh_neg:
+ lis r_scratch1,-32 /* SKF_LL_OFF */
+ cmpd r_addr, r_scratch1 /* addr < SKF_* */
+ blt bpf_error /* cr0 = LT */
+ .globl sk_load_byte_msh_negative_offset
+sk_load_byte_msh_negative_offset:
+ sk_negative_common(1)
+ lbz r_X, 0(r_addr)
+ rlwinm r_X, r_X, 2, 32-4-2, 31-2
+ blr
+
+bpf_error_slow:
+ /* fabricate a cr0 = lt */
+ li r_scratch1, -1
+ cmpdi r_scratch1, 0
+bpf_error:
+ /* Entered with cr0 = lt */
+ li r3, 0
+ /* Generated code will 'blt epilogue', returning 0. */
+ blr
diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index 73619d3aeb6..2dc8b148484 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -127,6 +127,9 @@ static void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx)
PPC_BLR();
}
+#define CHOOSE_LOAD_FUNC(K, func) \
+ ((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset)
+
/* Assemble the body code between the prologue & epilogue. */
static int bpf_jit_build_body(struct sk_filter *fp, u32 *image,
struct codegen_context *ctx,
@@ -391,21 +394,16 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image,
/*** Absolute loads from packet header/data ***/
case BPF_S_LD_W_ABS:
- func = sk_load_word;
+ func = CHOOSE_LOAD_FUNC(K, sk_load_word);
goto common_load;
case BPF_S_LD_H_ABS:
- func = sk_load_half;
+ func = CHOOSE_LOAD_FUNC(K, sk_load_half);
goto common_load;
case BPF_S_LD_B_ABS:
- func = sk_load_byte;
+ func = CHOOSE_LOAD_FUNC(K, sk_load_byte);
common_load:
- /*
- * Load from [K]. Reference with the (negative)
- * SKF_NET_OFF/SKF_LL_OFF offsets is unsupported.
- */
+ /* Load from [K]. */
ctx->seen |= SEEN_DATAREF;
- if ((int)K < 0)
- return -ENOTSUPP;
PPC_LI64(r_scratch1, func);
PPC_MTLR(r_scratch1);
PPC_LI32(r_addr, K);
@@ -429,7 +427,7 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image,
common_load_ind:
/*
* Load from [X + K]. Negative offsets are tested for
- * in the helper functions, and result in a 'ret 0'.
+ * in the helper functions.
*/
ctx->seen |= SEEN_DATAREF | SEEN_XREG;
PPC_LI64(r_scratch1, func);
@@ -443,13 +441,7 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image,
break;
case BPF_S_LDX_B_MSH:
- /*
- * x86 version drops packet (RET 0) when K<0, whereas
- * interpreter does allow K<0 (__load_pointer, special
- * ancillary data). common_load returns ENOTSUPP if K<0,
- * so we fall back to interpreter & filter works.
- */
- func = sk_load_byte_msh;
+ func = CHOOSE_LOAD_FUNC(K, sk_load_byte_msh);
goto common_load;
break;
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 02aee03e713..8f84bcba18d 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -1299,8 +1299,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
if (record) {
struct perf_sample_data data;
- perf_sample_data_init(&data, ~0ULL);
- data.period = event->hw.last_period;
+ perf_sample_data_init(&data, ~0ULL, event->hw.last_period);
if (event->attr.sample_type & PERF_SAMPLE_ADDR)
perf_get_data_addr(regs, &data.addr);
diff --git a/arch/powerpc/perf/core-fsl-emb.c b/arch/powerpc/perf/core-fsl-emb.c
index 0a6d2a9d569..106c5335467 100644
--- a/arch/powerpc/perf/core-fsl-emb.c
+++ b/arch/powerpc/perf/core-fsl-emb.c
@@ -613,8 +613,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
if (record) {
struct perf_sample_data data;
- perf_sample_data_init(&data, 0);
- data.period = event->hw.last_period;
+ perf_sample_data_init(&data, 0, event->hw.last_period);
if (perf_event_overflow(event, &data, regs))
fsl_emb_pmu_stop(event, 0);
diff --git a/arch/powerpc/platforms/44x/Kconfig b/arch/powerpc/platforms/44x/Kconfig
index 2e4e64abfab..8abf6fb8f41 100644
--- a/arch/powerpc/platforms/44x/Kconfig
+++ b/arch/powerpc/platforms/44x/Kconfig
@@ -23,6 +23,8 @@ config BLUESTONE
default n
select PPC44x_SIMPLE
select APM821xx
+ select PCI_MSI
+ select PPC4xx_MSI
select PPC4xx_PCI_EXPRESS
select IBM_EMAC_RGMII
help
diff --git a/arch/powerpc/platforms/85xx/common.c b/arch/powerpc/platforms/85xx/common.c
index 9fef5302adc..67dac22b436 100644
--- a/arch/powerpc/platforms/85xx/common.c
+++ b/arch/powerpc/platforms/85xx/common.c
@@ -21,6 +21,12 @@ static struct of_device_id __initdata mpc85xx_common_ids[] = {
{ .compatible = "fsl,qe", },
{ .compatible = "fsl,cpm2", },
{ .compatible = "fsl,srio", },
+ /* So that the DMA channel nodes can be probed individually: */
+ { .compatible = "fsl,eloplus-dma", },
+ /* For the PMC driver */
+ { .compatible = "fsl,mpc8548-guts", },
+ /* Probably unnecessary? */
+ { .compatible = "gpio-leds", },
{},
};
diff --git a/arch/powerpc/platforms/85xx/mpc85xx_mds.c b/arch/powerpc/platforms/85xx/mpc85xx_mds.c
index 9a6f04406e0..d208ebccb91 100644
--- a/arch/powerpc/platforms/85xx/mpc85xx_mds.c
+++ b/arch/powerpc/platforms/85xx/mpc85xx_mds.c
@@ -399,12 +399,6 @@ static int __init board_fixups(void)
machine_arch_initcall(mpc8568_mds, board_fixups);
machine_arch_initcall(mpc8569_mds, board_fixups);
-static struct of_device_id mpc85xx_ids[] = {
- { .compatible = "fsl,mpc8548-guts", },
- { .compatible = "gpio-leds", },
- {},
-};
-
static int __init mpc85xx_publish_devices(void)
{
if (machine_is(mpc8568_mds))
@@ -412,10 +406,7 @@ static int __init mpc85xx_publish_devices(void)
if (machine_is(mpc8569_mds))
simple_gpiochip_init("fsl,mpc8569mds-bcsr-gpio");
- mpc85xx_common_publish_devices();
- of_platform_bus_probe(NULL, mpc85xx_ids, NULL);
-
- return 0;
+ return mpc85xx_common_publish_devices();
}
machine_device_initcall(mpc8568_mds, mpc85xx_publish_devices);
diff --git a/arch/powerpc/platforms/85xx/p1022_ds.c b/arch/powerpc/platforms/85xx/p1022_ds.c
index e74b7cde9ae..f700c81a132 100644
--- a/arch/powerpc/platforms/85xx/p1022_ds.c
+++ b/arch/powerpc/platforms/85xx/p1022_ds.c
@@ -460,18 +460,7 @@ static void __init p1022_ds_setup_arch(void)
pr_info("Freescale P1022 DS reference board\n");
}
-static struct of_device_id __initdata p1022_ds_ids[] = {
- /* So that the DMA channel nodes can be probed individually: */
- { .compatible = "fsl,eloplus-dma", },
- {},
-};
-
-static int __init p1022_ds_publish_devices(void)
-{
- mpc85xx_common_publish_devices();
- return of_platform_bus_probe(NULL, p1022_ds_ids, NULL);
-}
-machine_device_initcall(p1022_ds, p1022_ds_publish_devices);
+machine_device_initcall(p1022_ds, mpc85xx_common_publish_devices);
machine_arch_initcall(p1022_ds, swiotlb_setup_bus_notifier);
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index 425db18580a..61c9550819a 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -78,6 +78,36 @@ config PPC_BOOK3E_64
endchoice
+choice
+ prompt "CPU selection"
+ depends on PPC64
+ default GENERIC_CPU
+ help
+ This will create a kernel which is optimised for a particular CPU.
+ The resulting kernel may not run on other CPUs, so use this with care.
+
+ If unsure, select Generic.
+
+config GENERIC_CPU
+ bool "Generic"
+
+config CELL_CPU
+ bool "Cell Broadband Engine"
+
+config POWER4_CPU
+ bool "POWER4"
+
+config POWER5_CPU
+ bool "POWER5"
+
+config POWER6_CPU
+ bool "POWER6"
+
+config POWER7_CPU
+ bool "POWER7"
+
+endchoice
+
config PPC_BOOK3S
def_bool y
depends on PPC_BOOK3S_32 || PPC_BOOK3S_64
@@ -86,15 +116,6 @@ config PPC_BOOK3E
def_bool y
depends on PPC_BOOK3E_64
-config POWER4_ONLY
- bool "Optimize for POWER4"
- depends on PPC64 && PPC_BOOK3S
- default n
- ---help---
- Cause the compiler to optimize for POWER4/POWER5/PPC970 processors.
- The resulting binary will not work on POWER3 or RS64 processors
- when compiled with binutils 2.15 or later.
-
config 6xx
def_bool y
depends on PPC32 && PPC_BOOK3S
@@ -258,7 +279,7 @@ config PPC_ICSWX_PID
default y
---help---
The PID register in server is used explicitly for ICSWX. In
- embedded systems PID managment is done by the system.
+ embedded systems PID management is done by the system.
config PPC_ICSWX_USE_SIGILL
bool "Should a bad CT cause a SIGILL?"
@@ -266,7 +287,7 @@ config PPC_ICSWX_USE_SIGILL
default n
---help---
Should a bad CT used for "non-record form ICSWX" cause an
- illegal intruction signal or should it be silent as
+ illegal instruction signal or should it be silent as
architected.
If in doubt, say N here.
diff --git a/arch/powerpc/platforms/cell/axon_msi.c b/arch/powerpc/platforms/cell/axon_msi.c
index d09f3e8e686..85825b5401e 100644
--- a/arch/powerpc/platforms/cell/axon_msi.c
+++ b/arch/powerpc/platforms/cell/axon_msi.c
@@ -114,7 +114,7 @@ static void axon_msi_cascade(unsigned int irq, struct irq_desc *desc)
pr_devel("axon_msi: woff %x roff %x msi %x\n",
write_offset, msic->read_offset, msi);
- if (msi < NR_IRQS && irq_get_chip_data(msi) == msic) {
+ if (msi < nr_irqs && irq_get_chip_data(msi) == msic) {
generic_handle_irq(msi);
msic->fifo_virt[idx] = cpu_to_le32(0xffffffff);
} else {
@@ -276,9 +276,6 @@ static int axon_msi_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
if (rc)
return rc;
- /* We rely on being able to stash a virq in a u16 */
- BUILD_BUG_ON(NR_IRQS > 65536);
-
list_for_each_entry(entry, &dev->msi_list, list) {
virq = irq_create_direct_mapping(msic->irq_domain);
if (virq == NO_IRQ) {
@@ -392,7 +389,8 @@ static int axon_msi_probe(struct platform_device *device)
}
memset(msic->fifo_virt, 0xff, MSIC_FIFO_SIZE_BYTES);
- msic->irq_domain = irq_domain_add_nomap(dn, 0, &msic_host_ops, msic);
+ /* We rely on being able to stash a virq in a u16, so limit irqs to < 65536 */
+ msic->irq_domain = irq_domain_add_nomap(dn, 65536, &msic_host_ops, msic);
if (!msic->irq_domain) {
printk(KERN_ERR "axon_msi: couldn't allocate irq_domain for %s\n",
dn->full_name);
diff --git a/arch/powerpc/platforms/cell/beat_interrupt.c b/arch/powerpc/platforms/cell/beat_interrupt.c
index f9a48af335c..8c6dc42ecf6 100644
--- a/arch/powerpc/platforms/cell/beat_interrupt.c
+++ b/arch/powerpc/platforms/cell/beat_interrupt.c
@@ -248,6 +248,6 @@ void beatic_deinit_IRQ(void)
{
int i;
- for (i = 1; i < NR_IRQS; i++)
+ for (i = 1; i < nr_irqs; i++)
beat_destruct_irq_plug(i);
}
diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c
index 1d75c92ea8f..66519d263da 100644
--- a/arch/powerpc/platforms/cell/spufs/inode.c
+++ b/arch/powerpc/platforms/cell/spufs/inode.c
@@ -151,7 +151,7 @@ static void
spufs_evict_inode(struct inode *inode)
{
struct spufs_inode_info *ei = SPUFS_I(inode);
- end_writeback(inode);
+ clear_inode(inode);
if (ei->i_ctx)
put_spu_context(ei->i_ctx);
if (ei->i_gang)
diff --git a/arch/powerpc/platforms/powermac/low_i2c.c b/arch/powerpc/platforms/powermac/low_i2c.c
index 996c5ff7824..fc536f2971c 100644
--- a/arch/powerpc/platforms/powermac/low_i2c.c
+++ b/arch/powerpc/platforms/powermac/low_i2c.c
@@ -366,11 +366,20 @@ static void kw_i2c_timeout(unsigned long data)
unsigned long flags;
spin_lock_irqsave(&host->lock, flags);
+
+ /*
+ * If the timer is pending, that means we raced with the
+ * irq, in which case we just return
+ */
+ if (timer_pending(&host->timeout_timer))
+ goto skip;
+
kw_i2c_handle_interrupt(host, kw_read_reg(reg_isr));
if (host->state != state_idle) {
host->timeout_timer.expires = jiffies + KW_POLL_TIMEOUT;
add_timer(&host->timeout_timer);
}
+ skip:
spin_unlock_irqrestore(&host->lock, flags);
}
@@ -1494,6 +1503,7 @@ static int __init pmac_i2c_create_platform_devices(void)
if (bus->platform_dev == NULL)
return -ENOMEM;
bus->platform_dev->dev.platform_data = bus;
+ bus->platform_dev->dev.of_node = bus->busnode;
platform_device_add(bus->platform_dev);
}
diff --git a/arch/powerpc/platforms/powermac/pic.c b/arch/powerpc/platforms/powermac/pic.c
index 66ad93de1d5..c4e630576ff 100644
--- a/arch/powerpc/platforms/powermac/pic.c
+++ b/arch/powerpc/platforms/powermac/pic.c
@@ -57,9 +57,9 @@ static int max_real_irqs;
static DEFINE_RAW_SPINLOCK(pmac_pic_lock);
-#define NR_MASK_WORDS ((NR_IRQS + 31) / 32)
-static unsigned long ppc_lost_interrupts[NR_MASK_WORDS];
-static unsigned long ppc_cached_irq_mask[NR_MASK_WORDS];
+/* The max irq number this driver deals with is 128; see max_irqs */
+static DECLARE_BITMAP(ppc_lost_interrupts, 128);
+static DECLARE_BITMAP(ppc_cached_irq_mask, 128);
static int pmac_irq_cascade = -1;
static struct irq_domain *pmac_pic_host;
diff --git a/arch/powerpc/platforms/ps3/Kconfig b/arch/powerpc/platforms/ps3/Kconfig
index 476d9d9b240..46b7f023252 100644
--- a/arch/powerpc/platforms/ps3/Kconfig
+++ b/arch/powerpc/platforms/ps3/Kconfig
@@ -7,7 +7,6 @@ config PPC_PS3
select USB_OHCI_BIG_ENDIAN_MMIO
select USB_ARCH_HAS_EHCI
select USB_EHCI_BIG_ENDIAN_MMIO
- select MEMORY_HOTPLUG
select PPC_PCI_CHOICE
help
This option enables support for the Sony PS3 game console
@@ -74,7 +73,7 @@ config PS3_PS3AV
help
Include support for the PS3 AV Settings driver.
- This support is required for graphics and sound. In
+ This support is required for PS3 graphics and sound. In
general, all users will say Y or M.
config PS3_SYS_MANAGER
@@ -85,9 +84,22 @@ config PS3_SYS_MANAGER
help
Include support for the PS3 System Manager.
- This support is required for system control. In
+ This support is required for PS3 system control. In
general, all users will say Y or M.
+config PS3_REPOSITORY_WRITE
+ bool "PS3 Repository write support" if PS3_ADVANCED
+ depends on PPC_PS3
+ default n
+ help
+ Enables support for writing to the PS3 System Repository.
+
+ This support is intended for bootloaders that need to store data
+ in the repository for later boot stages.
+
+ If in doubt, say N here and reduce the size of the kernel by a
+ small amount.
+
config PS3_STORAGE
depends on PPC_PS3
tristate
@@ -122,7 +134,7 @@ config PS3_FLASH
This support is required to access the PS3 FLASH ROM, which
contains the boot loader and some boot options.
- In general, all users will say Y or M.
+ In general, PS3 OtherOS users will say Y or M.
As this driver needs a fixed buffer of 256 KiB of memory, it can
be disabled on the kernel command line using "ps3flash=off", to
@@ -156,7 +168,7 @@ config PS3GELIC_UDBG
via the Ethernet port (UDP port number 18194).
This driver uses a trivial implementation and is independent
- from the main network driver.
+ from the main PS3 gelic network driver.
If in doubt, say N here.
diff --git a/arch/powerpc/platforms/ps3/mm.c b/arch/powerpc/platforms/ps3/mm.c
index de2aea42170..0c9f643d9e2 100644
--- a/arch/powerpc/platforms/ps3/mm.c
+++ b/arch/powerpc/platforms/ps3/mm.c
@@ -20,7 +20,6 @@
#include <linux/kernel.h>
#include <linux/export.h>
-#include <linux/memory_hotplug.h>
#include <linux/memblock.h>
#include <linux/slab.h>
@@ -79,12 +78,14 @@ enum {
* @base: base address
* @size: size in bytes
* @offset: difference between base and rm.size
+ * @destroy: flag if region should be destroyed upon shutdown
*/
struct mem_region {
u64 base;
u64 size;
unsigned long offset;
+ int destroy;
};
/**
@@ -96,7 +97,7 @@ struct mem_region {
* The HV virtual address space (vas) allows for hotplug memory regions.
* Memory regions can be created and destroyed in the vas at runtime.
* @rm: real mode (bootmem) region
- * @r1: hotplug memory region(s)
+ * @r1: highmem region(s)
*
* ps3 addresses
* virt_addr: a cpu 'translated' effective address
@@ -222,10 +223,6 @@ void ps3_mm_vas_destroy(void)
}
}
-/*============================================================================*/
-/* memory hotplug routines */
-/*============================================================================*/
-
/**
* ps3_mm_region_create - create a memory region in the vas
* @r: pointer to a struct mem_region to accept initialized values
@@ -262,6 +259,7 @@ static int ps3_mm_region_create(struct mem_region *r, unsigned long size)
goto zero_region;
}
+ r->destroy = 1;
r->offset = r->base - map.rm.size;
return result;
@@ -279,7 +277,14 @@ static void ps3_mm_region_destroy(struct mem_region *r)
{
int result;
+ if (!r->destroy) {
+ pr_info("%s:%d: Not destroying high region: %llxh %llxh\n",
+ __func__, __LINE__, r->base, r->size);
+ return;
+ }
+
DBG("%s:%d: r->base = %llxh\n", __func__, __LINE__, r->base);
+
if (r->base) {
result = lv1_release_memory(r->base);
BUG_ON(result);
@@ -288,50 +293,36 @@ static void ps3_mm_region_destroy(struct mem_region *r)
}
}
-/**
- * ps3_mm_add_memory - hot add memory
- */
-
-static int __init ps3_mm_add_memory(void)
+static int ps3_mm_get_repository_highmem(struct mem_region *r)
{
int result;
- unsigned long start_addr;
- unsigned long start_pfn;
- unsigned long nr_pages;
-
- if (!firmware_has_feature(FW_FEATURE_PS3_LV1))
- return -ENODEV;
- BUG_ON(!mem_init_done);
+ /* Assume a single highmem region. */
- start_addr = map.rm.size;
- start_pfn = start_addr >> PAGE_SHIFT;
- nr_pages = (map.r1.size + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ result = ps3_repository_read_highmem_info(0, &r->base, &r->size);
- DBG("%s:%d: start_addr %lxh, start_pfn %lxh, nr_pages %lxh\n",
- __func__, __LINE__, start_addr, start_pfn, nr_pages);
-
- result = add_memory(0, start_addr, map.r1.size);
+ if (result)
+ goto zero_region;
- if (result) {
- pr_err("%s:%d: add_memory failed: (%d)\n",
- __func__, __LINE__, result);
- return result;
+ if (!r->base || !r->size) {
+ result = -1;
+ goto zero_region;
}
- memblock_add(start_addr, map.r1.size);
+ r->offset = r->base - map.rm.size;
- result = online_pages(start_pfn, nr_pages);
+ DBG("%s:%d: Found high region in repository: %llxh %llxh\n",
+ __func__, __LINE__, r->base, r->size);
- if (result)
- pr_err("%s:%d: online_pages failed: (%d)\n",
- __func__, __LINE__, result);
+ return 0;
+zero_region:
+ DBG("%s:%d: No high region in repository.\n", __func__, __LINE__);
+
+ r->size = r->base = r->offset = 0;
return result;
}
-device_initcall(ps3_mm_add_memory);
-
/*============================================================================*/
/* dma routines */
/*============================================================================*/
@@ -1217,13 +1208,23 @@ void __init ps3_mm_init(void)
BUG_ON(map.rm.base);
BUG_ON(!map.rm.size);
+ /* Check if we got the highmem region from an earlier boot step */
- /* arrange to do this in ps3_mm_add_memory */
- ps3_mm_region_create(&map.r1, map.total - map.rm.size);
+ if (ps3_mm_get_repository_highmem(&map.r1))
+ ps3_mm_region_create(&map.r1, map.total - map.rm.size);
/* correct map.total for the real total amount of memory we use */
map.total = map.rm.size + map.r1.size;
+ if (!map.r1.size) {
+ DBG("%s:%d: No highmem region found\n", __func__, __LINE__);
+ } else {
+ DBG("%s:%d: Adding highmem region: %llxh %llxh\n",
+ __func__, __LINE__, map.rm.size,
+ map.total - map.rm.size);
+ memblock_add(map.rm.size, map.total - map.rm.size);
+ }
+
DBG(" <- %s:%d\n", __func__, __LINE__);
}
diff --git a/arch/powerpc/platforms/ps3/platform.h b/arch/powerpc/platforms/ps3/platform.h
index 1a633ed0fe9..d71329a8e32 100644
--- a/arch/powerpc/platforms/ps3/platform.h
+++ b/arch/powerpc/platforms/ps3/platform.h
@@ -188,6 +188,22 @@ int ps3_repository_read_rm_size(unsigned int ppe_id, u64 *rm_size);
int ps3_repository_read_region_total(u64 *region_total);
int ps3_repository_read_mm_info(u64 *rm_base, u64 *rm_size,
u64 *region_total);
+int ps3_repository_read_highmem_region_count(unsigned int *region_count);
+int ps3_repository_read_highmem_base(unsigned int region_index,
+ u64 *highmem_base);
+int ps3_repository_read_highmem_size(unsigned int region_index,
+ u64 *highmem_size);
+int ps3_repository_read_highmem_info(unsigned int region_index,
+ u64 *highmem_base, u64 *highmem_size);
+
+int ps3_repository_write_highmem_region_count(unsigned int region_count);
+int ps3_repository_write_highmem_base(unsigned int region_index,
+ u64 highmem_base);
+int ps3_repository_write_highmem_size(unsigned int region_index,
+ u64 highmem_size);
+int ps3_repository_write_highmem_info(unsigned int region_index,
+ u64 highmem_base, u64 highmem_size);
+int ps3_repository_delete_highmem_info(unsigned int region_index);
/* repository pme info */
diff --git a/arch/powerpc/platforms/ps3/repository.c b/arch/powerpc/platforms/ps3/repository.c
index 7bdfea336f5..9b47ba7a5de 100644
--- a/arch/powerpc/platforms/ps3/repository.c
+++ b/arch/powerpc/platforms/ps3/repository.c
@@ -779,6 +779,72 @@ int ps3_repository_read_mm_info(u64 *rm_base, u64 *rm_size, u64 *region_total)
}
/**
+ * ps3_repository_read_highmem_region_count - Read the number of highmem regions
+ *
+ * Bootloaders must arrange the repository nodes such that regions are indexed
+ * with a region_index from 0 to region_count-1.
+ */
+
+int ps3_repository_read_highmem_region_count(unsigned int *region_count)
+{
+ int result;
+ u64 v1 = 0;
+
+ result = read_node(PS3_LPAR_ID_CURRENT,
+ make_first_field("highmem", 0),
+ make_field("region", 0),
+ make_field("count", 0),
+ 0,
+ &v1, NULL);
+ *region_count = v1;
+ return result;
+}
+
+
+int ps3_repository_read_highmem_base(unsigned int region_index,
+ u64 *highmem_base)
+{
+ return read_node(PS3_LPAR_ID_CURRENT,
+ make_first_field("highmem", 0),
+ make_field("region", region_index),
+ make_field("base", 0),
+ 0,
+ highmem_base, NULL);
+}
+
+int ps3_repository_read_highmem_size(unsigned int region_index,
+ u64 *highmem_size)
+{
+ return read_node(PS3_LPAR_ID_CURRENT,
+ make_first_field("highmem", 0),
+ make_field("region", region_index),
+ make_field("size", 0),
+ 0,
+ highmem_size, NULL);
+}
+
+/**
+ * ps3_repository_read_highmem_info - Read high memory region info
+ * @region_index: Region index, {0,..,region_count-1}.
+ * @highmem_base: High memory base address.
+ * @highmem_size: High memory size.
+ *
+ * Bootloaders that preallocate highmem regions must place the
+ * region info into the repository at these well known nodes.
+ */
+
+int ps3_repository_read_highmem_info(unsigned int region_index,
+ u64 *highmem_base, u64 *highmem_size)
+{
+ int result;
+
+ *highmem_base = 0;
+ result = ps3_repository_read_highmem_base(region_index, highmem_base);
+ return result ? result
+ : ps3_repository_read_highmem_size(region_index, highmem_size);
+}
+
+/**
* ps3_repository_read_num_spu_reserved - Number of physical spus reserved.
* @num_spu: Number of physical spus.
*/
@@ -1002,6 +1068,138 @@ int ps3_repository_read_lpm_privileges(unsigned int be_index, u64 *lpar,
lpar, rights);
}
+#if defined(CONFIG_PS3_REPOSITORY_WRITE)
+
+static int create_node(u64 n1, u64 n2, u64 n3, u64 n4, u64 v1, u64 v2)
+{
+ int result;
+
+ dump_node(0, n1, n2, n3, n4, v1, v2);
+
+ result = lv1_create_repository_node(n1, n2, n3, n4, v1, v2);
+
+ if (result) {
+ pr_devel("%s:%d: lv1_create_repository_node failed: %s\n",
+ __func__, __LINE__, ps3_result(result));
+ return -ENOENT;
+ }
+
+ return 0;
+}
+
+static int delete_node(u64 n1, u64 n2, u64 n3, u64 n4)
+{
+ int result;
+
+ dump_node(0, n1, n2, n3, n4, 0, 0);
+
+ result = lv1_delete_repository_node(n1, n2, n3, n4);
+
+ if (result) {
+ pr_devel("%s:%d: lv1_delete_repository_node failed: %s\n",
+ __func__, __LINE__, ps3_result(result));
+ return -ENOENT;
+ }
+
+ return 0;
+}
+
+static int write_node(u64 n1, u64 n2, u64 n3, u64 n4, u64 v1, u64 v2)
+{
+ int result;
+
+ result = create_node(n1, n2, n3, n4, v1, v2);
+
+ if (!result)
+ return 0;
+
+ result = lv1_write_repository_node(n1, n2, n3, n4, v1, v2);
+
+ if (result) {
+ pr_devel("%s:%d: lv1_write_repository_node failed: %s\n",
+ __func__, __LINE__, ps3_result(result));
+ return -ENOENT;
+ }
+
+ return 0;
+}
+
+int ps3_repository_write_highmem_region_count(unsigned int region_count)
+{
+ int result;
+ u64 v1 = (u64)region_count;
+
+ result = write_node(
+ make_first_field("highmem", 0),
+ make_field("region", 0),
+ make_field("count", 0),
+ 0,
+ v1, 0);
+ return result;
+}
+
+int ps3_repository_write_highmem_base(unsigned int region_index,
+ u64 highmem_base)
+{
+ return write_node(
+ make_first_field("highmem", 0),
+ make_field("region", region_index),
+ make_field("base", 0),
+ 0,
+ highmem_base, 0);
+}
+
+int ps3_repository_write_highmem_size(unsigned int region_index,
+ u64 highmem_size)
+{
+ return write_node(
+ make_first_field("highmem", 0),
+ make_field("region", region_index),
+ make_field("size", 0),
+ 0,
+ highmem_size, 0);
+}
+
+int ps3_repository_write_highmem_info(unsigned int region_index,
+ u64 highmem_base, u64 highmem_size)
+{
+ int result;
+
+ result = ps3_repository_write_highmem_base(region_index, highmem_base);
+ return result ? result
+ : ps3_repository_write_highmem_size(region_index, highmem_size);
+}
+
+static int ps3_repository_delete_highmem_base(unsigned int region_index)
+{
+ return delete_node(
+ make_first_field("highmem", 0),
+ make_field("region", region_index),
+ make_field("base", 0),
+ 0);
+}
+
+static int ps3_repository_delete_highmem_size(unsigned int region_index)
+{
+ return delete_node(
+ make_first_field("highmem", 0),
+ make_field("region", region_index),
+ make_field("size", 0),
+ 0);
+}
+
+int ps3_repository_delete_highmem_info(unsigned int region_index)
+{
+ int result;
+
+ result = ps3_repository_delete_highmem_base(region_index);
+ result += ps3_repository_delete_highmem_size(region_index);
+
+ return result ? -1 : 0;
+}
+
+#endif /* defined(CONFIG_PS3_WRITE_REPOSITORY) */
+
#if defined(DEBUG)
int ps3_repository_dump_resource_info(const struct ps3_repository_device *repo)
diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig
index aadbe4f6d53..837cf49357e 100644
--- a/arch/powerpc/platforms/pseries/Kconfig
+++ b/arch/powerpc/platforms/pseries/Kconfig
@@ -30,9 +30,9 @@ config PPC_SPLPAR
two or more partitions.
config EEH
- bool "PCI Extended Error Handling (EEH)" if EXPERT
+ bool
depends on PPC_PSERIES && PCI
- default y if !EXPERT
+ default y
config PSERIES_MSI
bool
@@ -67,7 +67,7 @@ config IO_EVENT_IRQ
This option will only enable the IO event platform code. You
will still need to enable or compile the actual drivers
- that use this infrastruture to handle IO event interrupts.
+ that use this infrastructure to handle IO event interrupts.
Say Y if you are unsure.
diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c
index 309d38ef732..ecd394cf34e 100644
--- a/arch/powerpc/platforms/pseries/eeh.c
+++ b/arch/powerpc/platforms/pseries/eeh.c
@@ -489,7 +489,7 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev)
* a stack trace will help the device-driver authors figure
* out what happened. So print that out.
*/
- dump_stack();
+ WARN(1, "EEH: failure detected\n");
return 1;
dn_unlock:
@@ -1076,7 +1076,7 @@ static void eeh_add_device_late(struct pci_dev *dev)
pr_debug("EEH: Adding device %s\n", pci_name(dev));
dn = pci_device_to_OF_node(dev);
- edev = pci_dev_to_eeh_dev(dev);
+ edev = of_node_to_eeh_dev(dn);
if (edev->pdev == dev) {
pr_debug("EEH: Already referenced !\n");
return;
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index 0915b1ad66c..2d311c0caf8 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -106,7 +106,7 @@ static int tce_build_pSeries(struct iommu_table *tbl, long index,
tcep++;
}
- if (tbl->it_type == TCE_PCI_SWINV_CREATE)
+ if (tbl->it_type & TCE_PCI_SWINV_CREATE)
tce_invalidate_pSeries_sw(tbl, tces, tcep - 1);
return 0;
}
@@ -121,7 +121,7 @@ static void tce_free_pSeries(struct iommu_table *tbl, long index, long npages)
while (npages--)
*(tcep++) = 0;
- if (tbl->it_type == TCE_PCI_SWINV_FREE)
+ if (tbl->it_type & TCE_PCI_SWINV_FREE)
tce_invalidate_pSeries_sw(tbl, tces, tcep - 1);
}
diff --git a/arch/powerpc/platforms/pseries/nvram.c b/arch/powerpc/platforms/pseries/nvram.c
index 36f957f3184..8733a86ad52 100644
--- a/arch/powerpc/platforms/pseries/nvram.c
+++ b/arch/powerpc/platforms/pseries/nvram.c
@@ -68,9 +68,7 @@ static const char *pseries_nvram_os_partitions[] = {
};
static void oops_to_nvram(struct kmsg_dumper *dumper,
- enum kmsg_dump_reason reason,
- const char *old_msgs, unsigned long old_len,
- const char *new_msgs, unsigned long new_len);
+ enum kmsg_dump_reason reason);
static struct kmsg_dumper nvram_kmsg_dumper = {
.dump = oops_to_nvram
@@ -504,28 +502,6 @@ int __init pSeries_nvram_init(void)
}
/*
- * Try to capture the last capture_len bytes of the printk buffer. Return
- * the amount actually captured.
- */
-static size_t capture_last_msgs(const char *old_msgs, size_t old_len,
- const char *new_msgs, size_t new_len,
- char *captured, size_t capture_len)
-{
- if (new_len >= capture_len) {
- memcpy(captured, new_msgs + (new_len - capture_len),
- capture_len);
- return capture_len;
- } else {
- /* Grab the end of old_msgs. */
- size_t old_tail_len = min(old_len, capture_len - new_len);
- memcpy(captured, old_msgs + (old_len - old_tail_len),
- old_tail_len);
- memcpy(captured + old_tail_len, new_msgs, new_len);
- return old_tail_len + new_len;
- }
-}
-
-/*
* Are we using the ibm,rtas-log for oops/panic reports? And if so,
* would logging this oops/panic overwrite an RTAS event that rtas_errd
* hasn't had a chance to read and process? Return 1 if so, else 0.
@@ -541,27 +517,6 @@ static int clobbering_unread_rtas_event(void)
NVRAM_RTAS_READ_TIMEOUT);
}
-/* Squeeze out each line's <n> severity prefix. */
-static size_t elide_severities(char *buf, size_t len)
-{
- char *in, *out, *buf_end = buf + len;
- /* Assume a <n> at the very beginning marks the start of a line. */
- int newline = 1;
-
- in = out = buf;
- while (in < buf_end) {
- if (newline && in+3 <= buf_end &&
- *in == '<' && isdigit(in[1]) && in[2] == '>') {
- in += 3;
- newline = 0;
- } else {
- newline = (*in == '\n');
- *out++ = *in++;
- }
- }
- return out - buf;
-}
-
/* Derived from logfs_compress() */
static int nvram_compress(const void *in, void *out, size_t inlen,
size_t outlen)
@@ -619,9 +574,7 @@ static int zip_oops(size_t text_len)
* partition. If that's too much, go back and capture uncompressed text.
*/
static void oops_to_nvram(struct kmsg_dumper *dumper,
- enum kmsg_dump_reason reason,
- const char *old_msgs, unsigned long old_len,
- const char *new_msgs, unsigned long new_len)
+ enum kmsg_dump_reason reason)
{
static unsigned int oops_count = 0;
static bool panicking = false;
@@ -660,14 +613,14 @@ static void oops_to_nvram(struct kmsg_dumper *dumper,
return;
if (big_oops_buf) {
- text_len = capture_last_msgs(old_msgs, old_len,
- new_msgs, new_len, big_oops_buf, big_oops_buf_sz);
- text_len = elide_severities(big_oops_buf, text_len);
+ kmsg_dump_get_buffer(dumper, false,
+ big_oops_buf, big_oops_buf_sz, &text_len);
rc = zip_oops(text_len);
}
if (rc != 0) {
- text_len = capture_last_msgs(old_msgs, old_len,
- new_msgs, new_len, oops_data, oops_data_sz);
+ kmsg_dump_rewind(dumper);
+ kmsg_dump_get_buffer(dumper, true,
+ oops_data, oops_data_sz, &text_len);
err_type = ERR_TYPE_KERNEL_PANIC;
*oops_len = (u16) text_len;
}
diff --git a/arch/powerpc/platforms/pseries/plpar_wrappers.h b/arch/powerpc/platforms/pseries/plpar_wrappers.h
index 342797fc0f9..13e8cc43adf 100644
--- a/arch/powerpc/platforms/pseries/plpar_wrappers.h
+++ b/arch/powerpc/platforms/pseries/plpar_wrappers.h
@@ -22,12 +22,12 @@ static inline long poll_pending(void)
static inline u8 get_cede_latency_hint(void)
{
- return get_lppaca()->gpr5_dword.fields.cede_latency_hint;
+ return get_lppaca()->cede_latency_hint;
}
static inline void set_cede_latency_hint(u8 latency_hint)
{
- get_lppaca()->gpr5_dword.fields.cede_latency_hint = latency_hint;
+ get_lppaca()->cede_latency_hint = latency_hint;
}
static inline long cede_processor(void)
diff --git a/arch/powerpc/platforms/pseries/processor_idle.c b/arch/powerpc/platforms/pseries/processor_idle.c
index 41a34bc4a9a..e61483e8e96 100644
--- a/arch/powerpc/platforms/pseries/processor_idle.c
+++ b/arch/powerpc/platforms/pseries/processor_idle.c
@@ -106,7 +106,7 @@ static void check_and_cede_processor(void)
* we first hard disable then check.
*/
hard_irq_disable();
- if (get_paca()->irq_happened == 0)
+ if (!lazy_irq_pending())
cede_processor();
}
diff --git a/arch/powerpc/platforms/pseries/reconfig.c b/arch/powerpc/platforms/pseries/reconfig.c
index 168651acdd8..7b3bf76ef83 100644
--- a/arch/powerpc/platforms/pseries/reconfig.c
+++ b/arch/powerpc/platforms/pseries/reconfig.c
@@ -103,11 +103,13 @@ int pSeries_reconfig_notifier_register(struct notifier_block *nb)
{
return blocking_notifier_chain_register(&pSeries_reconfig_chain, nb);
}
+EXPORT_SYMBOL_GPL(pSeries_reconfig_notifier_register);
void pSeries_reconfig_notifier_unregister(struct notifier_block *nb)
{
blocking_notifier_chain_unregister(&pSeries_reconfig_chain, nb);
}
+EXPORT_SYMBOL_GPL(pSeries_reconfig_notifier_unregister);
int pSeries_reconfig_notify(unsigned long action, void *p)
{
@@ -426,6 +428,7 @@ static int do_remove_property(char *buf, size_t bufsize)
static int do_update_property(char *buf, size_t bufsize)
{
struct device_node *np;
+ struct pSeries_reconfig_prop_update upd_value;
unsigned char *value;
char *name, *end, *next_prop;
int rc, length;
@@ -454,6 +457,10 @@ static int do_update_property(char *buf, size_t bufsize)
return -ENODEV;
}
+ upd_value.node = np;
+ upd_value.property = newprop;
+ pSeries_reconfig_notify(PSERIES_UPDATE_PROPERTY, &upd_value);
+
rc = prom_update_property(np, newprop, oldprop);
if (rc)
return rc;
diff --git a/arch/powerpc/sysdev/cpm2_pic.c b/arch/powerpc/sysdev/cpm2_pic.c
index d3be961e2ae..10386b676d8 100644
--- a/arch/powerpc/sysdev/cpm2_pic.c
+++ b/arch/powerpc/sysdev/cpm2_pic.c
@@ -51,8 +51,7 @@
static intctl_cpm2_t __iomem *cpm2_intctl;
static struct irq_domain *cpm2_pic_host;
-#define NR_MASK_WORDS ((NR_IRQS + 31) / 32)
-static unsigned long ppc_cached_irq_mask[NR_MASK_WORDS];
+static unsigned long ppc_cached_irq_mask[2]; /* 2 32-bit registers */
static const u_char irq_to_siureg[] = {
1, 1, 1, 1, 1, 1, 1, 1,
diff --git a/arch/powerpc/sysdev/mpc8xx_pic.c b/arch/powerpc/sysdev/mpc8xx_pic.c
index d5f5416be31..b724622c3a0 100644
--- a/arch/powerpc/sysdev/mpc8xx_pic.c
+++ b/arch/powerpc/sysdev/mpc8xx_pic.c
@@ -18,69 +18,45 @@
extern int cpm_get_irq(struct pt_regs *regs);
static struct irq_domain *mpc8xx_pic_host;
-#define NR_MASK_WORDS ((NR_IRQS + 31) / 32)
-static unsigned long ppc_cached_irq_mask[NR_MASK_WORDS];
+static unsigned long mpc8xx_cached_irq_mask;
static sysconf8xx_t __iomem *siu_reg;
-int cpm_get_irq(struct pt_regs *regs);
+static inline unsigned long mpc8xx_irqd_to_bit(struct irq_data *d)
+{
+ return 0x80000000 >> irqd_to_hwirq(d);
+}
static void mpc8xx_unmask_irq(struct irq_data *d)
{
- int bit, word;
- unsigned int irq_nr = (unsigned int)irqd_to_hwirq(d);
-
- bit = irq_nr & 0x1f;
- word = irq_nr >> 5;
-
- ppc_cached_irq_mask[word] |= (1 << (31-bit));
- out_be32(&siu_reg->sc_simask, ppc_cached_irq_mask[word]);
+ mpc8xx_cached_irq_mask |= mpc8xx_irqd_to_bit(d);
+ out_be32(&siu_reg->sc_simask, mpc8xx_cached_irq_mask);
}
static void mpc8xx_mask_irq(struct irq_data *d)
{
- int bit, word;
- unsigned int irq_nr = (unsigned int)irqd_to_hwirq(d);
-
- bit = irq_nr & 0x1f;
- word = irq_nr >> 5;
-
- ppc_cached_irq_mask[word] &= ~(1 << (31-bit));
- out_be32(&siu_reg->sc_simask, ppc_cached_irq_mask[word]);
+ mpc8xx_cached_irq_mask &= ~mpc8xx_irqd_to_bit(d);
+ out_be32(&siu_reg->sc_simask, mpc8xx_cached_irq_mask);
}
static void mpc8xx_ack(struct irq_data *d)
{
- int bit;
- unsigned int irq_nr = (unsigned int)irqd_to_hwirq(d);
-
- bit = irq_nr & 0x1f;
- out_be32(&siu_reg->sc_sipend, 1 << (31-bit));
+ out_be32(&siu_reg->sc_sipend, mpc8xx_irqd_to_bit(d));
}
static void mpc8xx_end_irq(struct irq_data *d)
{
- int bit, word;
- unsigned int irq_nr = (unsigned int)irqd_to_hwirq(d);
-
- bit = irq_nr & 0x1f;
- word = irq_nr >> 5;
-
- ppc_cached_irq_mask[word] |= (1 << (31-bit));
- out_be32(&siu_reg->sc_simask, ppc_cached_irq_mask[word]);
+ mpc8xx_cached_irq_mask |= mpc8xx_irqd_to_bit(d);
+ out_be32(&siu_reg->sc_simask, mpc8xx_cached_irq_mask);
}
static int mpc8xx_set_irq_type(struct irq_data *d, unsigned int flow_type)
{
- if (flow_type & IRQ_TYPE_EDGE_FALLING) {
- irq_hw_number_t hw = (unsigned int)irqd_to_hwirq(d);
+ /* only external IRQ senses are programmable */
+ if ((flow_type & IRQ_TYPE_EDGE_FALLING) && !(irqd_to_hwirq(d) & 1)) {
unsigned int siel = in_be32(&siu_reg->sc_siel);
-
- /* only external IRQ senses are programmable */
- if ((hw & 1) == 0) {
- siel |= (0x80000000 >> hw);
- out_be32(&siu_reg->sc_siel, siel);
- __irq_set_handler_locked(d->irq, handle_edge_irq);
- }
+ siel |= mpc8xx_irqd_to_bit(d);
+ out_be32(&siu_reg->sc_siel, siel);
+ __irq_set_handler_locked(d->irq, handle_edge_irq);
}
return 0;
}
@@ -132,6 +108,9 @@ static int mpc8xx_pic_host_xlate(struct irq_domain *h, struct device_node *ct,
IRQ_TYPE_EDGE_FALLING,
};
+ if (intspec[0] > 0x1f)
+ return 0;
+
*out_hwirq = intspec[0];
if (intsize > 1 && intspec[1] < 4)
*out_flags = map_pic_senses[intspec[1]];
diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c
index 9ac71ebd2c4..395af134774 100644
--- a/arch/powerpc/sysdev/mpic.c
+++ b/arch/powerpc/sysdev/mpic.c
@@ -604,18 +604,14 @@ static struct mpic *mpic_find(unsigned int irq)
}
/* Determine if the linux irq is an IPI */
-static unsigned int mpic_is_ipi(struct mpic *mpic, unsigned int irq)
+static unsigned int mpic_is_ipi(struct mpic *mpic, unsigned int src)
{
- unsigned int src = virq_to_hw(irq);
-
return (src >= mpic->ipi_vecs[0] && src <= mpic->ipi_vecs[3]);
}
/* Determine if the linux irq is a timer */
-static unsigned int mpic_is_tm(struct mpic *mpic, unsigned int irq)
+static unsigned int mpic_is_tm(struct mpic *mpic, unsigned int src)
{
- unsigned int src = virq_to_hw(irq);
-
return (src >= mpic->timer_vecs[0] && src <= mpic->timer_vecs[7]);
}
@@ -876,21 +872,45 @@ int mpic_set_irq_type(struct irq_data *d, unsigned int flow_type)
if (src >= mpic->num_sources)
return -EINVAL;
+ vold = mpic_irq_read(src, MPIC_INFO(IRQ_VECTOR_PRI));
+
+ /* We don't support "none" type */
if (flow_type == IRQ_TYPE_NONE)
- if (mpic->senses && src < mpic->senses_count)
- flow_type = mpic->senses[src];
- if (flow_type == IRQ_TYPE_NONE)
- flow_type = IRQ_TYPE_LEVEL_LOW;
+ flow_type = IRQ_TYPE_DEFAULT;
+
+ /* Default: read HW settings */
+ if (flow_type == IRQ_TYPE_DEFAULT) {
+ switch(vold & (MPIC_INFO(VECPRI_POLARITY_MASK) |
+ MPIC_INFO(VECPRI_SENSE_MASK))) {
+ case MPIC_INFO(VECPRI_SENSE_EDGE) |
+ MPIC_INFO(VECPRI_POLARITY_POSITIVE):
+ flow_type = IRQ_TYPE_EDGE_RISING;
+ break;
+ case MPIC_INFO(VECPRI_SENSE_EDGE) |
+ MPIC_INFO(VECPRI_POLARITY_NEGATIVE):
+ flow_type = IRQ_TYPE_EDGE_FALLING;
+ break;
+ case MPIC_INFO(VECPRI_SENSE_LEVEL) |
+ MPIC_INFO(VECPRI_POLARITY_POSITIVE):
+ flow_type = IRQ_TYPE_LEVEL_HIGH;
+ break;
+ case MPIC_INFO(VECPRI_SENSE_LEVEL) |
+ MPIC_INFO(VECPRI_POLARITY_NEGATIVE):
+ flow_type = IRQ_TYPE_LEVEL_LOW;
+ break;
+ }
+ }
+ /* Apply to irq desc */
irqd_set_trigger_type(d, flow_type);
+ /* Apply to HW */
if (mpic_is_ht_interrupt(mpic, src))
vecpri = MPIC_VECPRI_POLARITY_POSITIVE |
MPIC_VECPRI_SENSE_EDGE;
else
vecpri = mpic_type_to_vecpri(mpic, flow_type);
- vold = mpic_irq_read(src, MPIC_INFO(IRQ_VECTOR_PRI));
vnew = vold & ~(MPIC_INFO(VECPRI_POLARITY_MASK) |
MPIC_INFO(VECPRI_SENSE_MASK));
vnew |= vecpri;
@@ -1026,7 +1046,7 @@ static int mpic_host_map(struct irq_domain *h, unsigned int virq,
irq_set_chip_and_handler(virq, chip, handle_fasteoi_irq);
/* Set default irq type */
- irq_set_irq_type(virq, IRQ_TYPE_NONE);
+ irq_set_irq_type(virq, IRQ_TYPE_DEFAULT);
/* If the MPIC was reset, then all vectors have already been
* initialized. Otherwise, a per source lazy initialization
@@ -1417,12 +1437,6 @@ void __init mpic_assign_isu(struct mpic *mpic, unsigned int isu_num,
mpic->num_sources = isu_first + mpic->isu_size;
}
-void __init mpic_set_default_senses(struct mpic *mpic, u8 *senses, int count)
-{
- mpic->senses = senses;
- mpic->senses_count = count;
-}
-
void __init mpic_init(struct mpic *mpic)
{
int i, cpu;
@@ -1555,12 +1569,12 @@ void mpic_irq_set_priority(unsigned int irq, unsigned int pri)
return;
raw_spin_lock_irqsave(&mpic_lock, flags);
- if (mpic_is_ipi(mpic, irq)) {
+ if (mpic_is_ipi(mpic, src)) {
reg = mpic_ipi_read(src - mpic->ipi_vecs[0]) &
~MPIC_VECPRI_PRIORITY_MASK;
mpic_ipi_write(src - mpic->ipi_vecs[0],
reg | (pri << MPIC_VECPRI_PRIORITY_SHIFT));
- } else if (mpic_is_tm(mpic, irq)) {
+ } else if (mpic_is_tm(mpic, src)) {
reg = mpic_tm_read(src - mpic->timer_vecs[0]) &
~MPIC_VECPRI_PRIORITY_MASK;
mpic_tm_write(src - mpic->timer_vecs[0],
diff --git a/arch/powerpc/sysdev/mpic_msgr.c b/arch/powerpc/sysdev/mpic_msgr.c
index 6e7fa386e76..483d8fa72e8 100644
--- a/arch/powerpc/sysdev/mpic_msgr.c
+++ b/arch/powerpc/sysdev/mpic_msgr.c
@@ -27,6 +27,7 @@
static struct mpic_msgr **mpic_msgrs;
static unsigned int mpic_msgr_count;
+static DEFINE_RAW_SPINLOCK(msgrs_lock);
static inline void _mpic_msgr_mer_write(struct mpic_msgr *msgr, u32 value)
{
@@ -56,12 +57,11 @@ struct mpic_msgr *mpic_msgr_get(unsigned int reg_num)
if (reg_num >= mpic_msgr_count)
return ERR_PTR(-ENODEV);
- raw_spin_lock_irqsave(&msgr->lock, flags);
- if (mpic_msgrs[reg_num]->in_use == MSGR_FREE) {
- msgr = mpic_msgrs[reg_num];
+ raw_spin_lock_irqsave(&msgrs_lock, flags);
+ msgr = mpic_msgrs[reg_num];
+ if (msgr->in_use == MSGR_FREE)
msgr->in_use = MSGR_INUSE;
- }
- raw_spin_unlock_irqrestore(&msgr->lock, flags);
+ raw_spin_unlock_irqrestore(&msgrs_lock, flags);
return msgr;
}
@@ -228,7 +228,7 @@ static __devinit int mpic_msgr_probe(struct platform_device *dev)
reg_number = block_number * MPIC_MSGR_REGISTERS_PER_BLOCK + i;
msgr->base = msgr_block_addr + i * MPIC_MSGR_STRIDE;
- msgr->mer = msgr->base + MPIC_MSGR_MER_OFFSET;
+ msgr->mer = (u32 *)((u8 *)msgr->base + MPIC_MSGR_MER_OFFSET);
msgr->in_use = MSGR_FREE;
msgr->num = i;
raw_spin_lock_init(&msgr->lock);
diff --git a/arch/powerpc/sysdev/ppc4xx_msi.c b/arch/powerpc/sysdev/ppc4xx_msi.c
index 1c2d7af17bb..82c6702dcba 100644
--- a/arch/powerpc/sysdev/ppc4xx_msi.c
+++ b/arch/powerpc/sysdev/ppc4xx_msi.c
@@ -28,10 +28,11 @@
#include <linux/of_platform.h>
#include <linux/interrupt.h>
#include <linux/export.h>
+#include <linux/kernel.h>
#include <asm/prom.h>
#include <asm/hw_irq.h>
#include <asm/ppc-pci.h>
-#include <boot/dcr.h>
+#include <asm/dcr.h>
#include <asm/dcr-regs.h>
#include <asm/msi_bitmap.h>
@@ -43,13 +44,14 @@
#define PEIH_FLUSH0 0x30
#define PEIH_FLUSH1 0x38
#define PEIH_CNTRST 0x48
-#define NR_MSI_IRQS 4
+
+static int msi_irqs;
struct ppc4xx_msi {
u32 msi_addr_lo;
u32 msi_addr_hi;
void __iomem *msi_regs;
- int msi_virqs[NR_MSI_IRQS];
+ int *msi_virqs;
struct msi_bitmap bitmap;
struct device_node *msi_dev;
};
@@ -61,7 +63,7 @@ static int ppc4xx_msi_init_allocator(struct platform_device *dev,
{
int err;
- err = msi_bitmap_alloc(&msi_data->bitmap, NR_MSI_IRQS,
+ err = msi_bitmap_alloc(&msi_data->bitmap, msi_irqs,
dev->dev.of_node);
if (err)
return err;
@@ -83,6 +85,11 @@ static int ppc4xx_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
struct msi_desc *entry;
struct ppc4xx_msi *msi_data = &ppc4xx_msi;
+ msi_data->msi_virqs = kmalloc((msi_irqs) * sizeof(int),
+ GFP_KERNEL);
+ if (!msi_data->msi_virqs)
+ return -ENOMEM;
+
list_for_each_entry(entry, &dev->msi_list, list) {
int_no = msi_bitmap_alloc_hwirqs(&msi_data->bitmap, 1);
if (int_no >= 0)
@@ -150,12 +157,11 @@ static int ppc4xx_setup_pcieh_hw(struct platform_device *dev,
if (!sdr_addr)
return -1;
- SDR0_WRITE(sdr_addr, (u64)res.start >> 32); /*HIGH addr */
- SDR0_WRITE(sdr_addr + 1, res.start & 0xFFFFFFFF); /* Low addr */
-
+ mtdcri(SDR0, *sdr_addr, upper_32_bits(res.start)); /*HIGH addr */
+ mtdcri(SDR0, *sdr_addr + 1, lower_32_bits(res.start)); /* Low addr */
msi->msi_dev = of_find_node_by_name(NULL, "ppc4xx-msi");
- if (msi->msi_dev)
+ if (!msi->msi_dev)
return -ENODEV;
msi->msi_regs = of_iomap(msi->msi_dev, 0);
@@ -167,9 +173,12 @@ static int ppc4xx_setup_pcieh_hw(struct platform_device *dev,
(u32) (msi->msi_regs + PEIH_TERMADH), (u32) (msi->msi_regs));
msi_virt = dma_alloc_coherent(&dev->dev, 64, &msi_phys, GFP_KERNEL);
- msi->msi_addr_hi = 0x0;
- msi->msi_addr_lo = (u32) msi_phys;
- dev_dbg(&dev->dev, "PCIE-MSI: msi address 0x%x\n", msi->msi_addr_lo);
+ if (!msi_virt)
+ return -ENOMEM;
+ msi->msi_addr_hi = upper_32_bits(msi_phys);
+ msi->msi_addr_lo = lower_32_bits(msi_phys & 0xffffffff);
+ dev_dbg(&dev->dev, "PCIE-MSI: msi address high 0x%x, low 0x%x\n",
+ msi->msi_addr_hi, msi->msi_addr_lo);
/* Progam the Interrupt handler Termination addr registers */
out_be32(msi->msi_regs + PEIH_TERMADH, msi->msi_addr_hi);
@@ -185,6 +194,8 @@ static int ppc4xx_setup_pcieh_hw(struct platform_device *dev,
out_be32(msi->msi_regs + PEIH_MSIED, *msi_data);
out_be32(msi->msi_regs + PEIH_MSIMK, *msi_mask);
+ dma_free_coherent(&dev->dev, 64, msi_virt, msi_phys);
+
return 0;
}
@@ -194,7 +205,7 @@ static int ppc4xx_of_msi_remove(struct platform_device *dev)
int i;
int virq;
- for (i = 0; i < NR_MSI_IRQS; i++) {
+ for (i = 0; i < msi_irqs; i++) {
virq = msi->msi_virqs[i];
if (virq != NO_IRQ)
irq_dispose_mapping(virq);
@@ -215,8 +226,6 @@ static int __devinit ppc4xx_msi_probe(struct platform_device *dev)
struct resource res;
int err = 0;
- msi = &ppc4xx_msi;/*keep the msi data for further use*/
-
dev_dbg(&dev->dev, "PCIE-MSI: Setting up MSI support...\n");
msi = kzalloc(sizeof(struct ppc4xx_msi), GFP_KERNEL);
@@ -234,6 +243,10 @@ static int __devinit ppc4xx_msi_probe(struct platform_device *dev)
goto error_out;
}
+ msi_irqs = of_irq_count(dev->dev.of_node);
+ if (!msi_irqs)
+ return -ENODEV;
+
if (ppc4xx_setup_pcieh_hw(dev, res, msi))
goto error_out;
@@ -242,6 +255,7 @@ static int __devinit ppc4xx_msi_probe(struct platform_device *dev)
dev_err(&dev->dev, "Error allocating MSI bitmap\n");
goto error_out;
}
+ ppc4xx_msi = *msi;
ppc_md.setup_msi_irqs = ppc4xx_setup_msi_irqs;
ppc_md.teardown_msi_irqs = ppc4xx_teardown_msi_irqs;
diff --git a/arch/powerpc/sysdev/scom.c b/arch/powerpc/sysdev/scom.c
index 49a3ece1c6b..702256a1ca1 100644
--- a/arch/powerpc/sysdev/scom.c
+++ b/arch/powerpc/sysdev/scom.c
@@ -22,6 +22,7 @@
#include <linux/debugfs.h>
#include <linux/slab.h>
#include <linux/export.h>
+#include <asm/debug.h>
#include <asm/prom.h>
#include <asm/scom.h>
diff --git a/arch/powerpc/sysdev/xics/xics-common.c b/arch/powerpc/sysdev/xics/xics-common.c
index ea5e204e345..cd1d18db92c 100644
--- a/arch/powerpc/sysdev/xics/xics-common.c
+++ b/arch/powerpc/sysdev/xics/xics-common.c
@@ -188,6 +188,7 @@ void xics_migrate_irqs_away(void)
{
int cpu = smp_processor_id(), hw_cpu = hard_smp_processor_id();
unsigned int irq, virq;
+ struct irq_desc *desc;
/* If we used to be the default server, move to the new "boot_cpuid" */
if (hw_cpu == xics_default_server)
@@ -202,8 +203,7 @@ void xics_migrate_irqs_away(void)
/* Allow IPIs again... */
icp_ops->set_priority(DEFAULT_PRIORITY);
- for_each_irq(virq) {
- struct irq_desc *desc;
+ for_each_irq_desc(virq, desc) {
struct irq_chip *chip;
long server;
unsigned long flags;
@@ -212,9 +212,8 @@ void xics_migrate_irqs_away(void)
/* We can't set affinity on ISA interrupts */
if (virq < NUM_ISA_INTERRUPTS)
continue;
- desc = irq_to_desc(virq);
/* We only need to migrate enabled IRQS */
- if (!desc || !desc->action)
+ if (!desc->action)
continue;
if (desc->irq_data.domain != xics_host)
continue;