diff options
Diffstat (limited to 'arch/powerpc')
392 files changed, 10576 insertions, 6482 deletions
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 80b94b0add1..88eace4e28c 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -111,6 +111,7 @@ config PPC select HAVE_DMA_API_DEBUG select HAVE_OPROFILE select HAVE_DEBUG_KMEMLEAK + select ARCH_HAS_SG_CHAIN select GENERIC_ATOMIC64 if PPC32 select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE select HAVE_PERF_EVENTS @@ -146,6 +147,7 @@ config PPC select ARCH_USE_CMPXCHG_LOCKREF if PPC64 select HAVE_ARCH_AUDITSYSCALL select ARCH_SUPPORTS_ATOMIC_RMW + select DCACHE_WORD_ACCESS if PPC64 && CPU_LITTLE_ENDIAN config GENERIC_CSUM def_bool CPU_LITTLE_ENDIAN @@ -181,7 +183,7 @@ config SCHED_OMIT_FRAME_POINTER config ARCH_MAY_HAVE_PC_FDC bool - default !PPC_PSERIES || PCI + default PCI config PPC_OF def_bool y @@ -286,6 +288,10 @@ config PPC_EMULATE_SSTEP bool default y if KPROBES || UPROBES || XMON || HAVE_HW_BREAKPOINT +config ZONE_DMA32 + bool + default y if PPC64 + source "init/Kconfig" source "kernel/Kconfig.freezer" @@ -602,6 +608,10 @@ config PPC_SUBPAGE_PROT to set access permissions (read/write, readonly, or no access) on the 4k subpages of each 64k page. +config PPC_COPRO_BASE + bool + default n + config SCHED_SMT bool "SMT (Hyperthreading) scheduler support" depends on PPC64 && SMP diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug index 35d16bd2760..ec2e40f2cc1 100644 --- a/arch/powerpc/Kconfig.debug +++ b/arch/powerpc/Kconfig.debug @@ -202,9 +202,7 @@ config PPC_EARLY_DEBUG_BEAT config PPC_EARLY_DEBUG_44x bool "Early serial debugging for IBM/AMCC 44x CPUs" - # PPC_EARLY_DEBUG on 440 leaves AS=1 mappings above the TLB high water - # mark, which doesn't work with current 440 KVM. - depends on 44x && !KVM + depends on 44x help Select this to enable early debugging for IBM 44x chips via the inbuilt serial port. If you enable this, ensure you set diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index 5687e299d0a..132d9c681d6 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -135,6 +135,7 @@ CFLAGS-$(CONFIG_POWER4_CPU) += $(call cc-option,-mcpu=power4) CFLAGS-$(CONFIG_POWER5_CPU) += $(call cc-option,-mcpu=power5) CFLAGS-$(CONFIG_POWER6_CPU) += $(call cc-option,-mcpu=power6) CFLAGS-$(CONFIG_POWER7_CPU) += $(call cc-option,-mcpu=power7) +CFLAGS-$(CONFIG_POWER8_CPU) += $(call cc-option,-mcpu=power8) # Altivec option not allowed with e500mc64 in GCC. ifeq ($(CONFIG_ALTIVEC),y) diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile index ccc25eddbcb..8a5bc1cfc6a 100644 --- a/arch/powerpc/boot/Makefile +++ b/arch/powerpc/boot/Makefile @@ -389,7 +389,12 @@ $(obj)/zImage: $(addprefix $(obj)/, $(image-y)) $(obj)/zImage.initrd: $(addprefix $(obj)/, $(initrd-y)) @rm -f $@; ln $< $@ +# Only install the vmlinux install: $(CONFIGURE) $(addprefix $(obj)/, $(image-y)) + sh -x $(srctree)/$(src)/install.sh "$(KERNELRELEASE)" vmlinux System.map "$(INSTALL_PATH)" + +# Install the vmlinux and other built boot targets. +zInstall: $(CONFIGURE) $(addprefix $(obj)/, $(image-y)) sh -x $(srctree)/$(src)/install.sh "$(KERNELRELEASE)" vmlinux System.map "$(INSTALL_PATH)" $^ # anything not in $(targets) diff --git a/arch/powerpc/boot/dts/fsl/p2041si-post.dtsi b/arch/powerpc/boot/dts/fsl/p2041si-post.dtsi index 5290df83ff3..69ce1026c94 100644 --- a/arch/powerpc/boot/dts/fsl/p2041si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/p2041si-post.dtsi @@ -359,6 +359,7 @@ compatible = "fsl,qoriq-core-mux-1.0"; clocks = <&pll0 0>, <&pll0 1>, <&pll1 0>, <&pll1 1>; clock-names = "pll0", "pll0-div2", "pll1", "pll1-div2"; + clock-output-names = "cmux2"; }; mux3: mux3@60 { diff --git a/arch/powerpc/boot/dts/fsl/t2080si-post.dtsi b/arch/powerpc/boot/dts/fsl/t2080si-post.dtsi new file mode 100644 index 00000000000..082ec204406 --- /dev/null +++ b/arch/powerpc/boot/dts/fsl/t2080si-post.dtsi @@ -0,0 +1,69 @@ +/* + * T2080 Silicon/SoC Device Tree Source (post include) + * + * Copyright 2013 Freescale Semiconductor Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Freescale Semiconductor nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * + * ALTERNATIVELY, this software may be distributed under the terms of the + * GNU General Public License ("GPL") as published by the Free Software + * Foundation, either version 2 of that License or (at your option) any + * later version. + * + * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/include/ "t2081si-post.dtsi" + +&soc { +/include/ "qoriq-sata2-0.dtsi" + sata@220000 { + fsl,iommu-parent = <&pamu1>; + fsl,liodn-reg = <&guts 0x550>; /* SATA1LIODNR */ + }; + +/include/ "qoriq-sata2-1.dtsi" + sata@221000 { + fsl,iommu-parent = <&pamu1>; + fsl,liodn-reg = <&guts 0x554>; /* SATA2LIODNR */ + }; +}; + +&rio { + compatible = "fsl,srio"; + interrupts = <16 2 1 11>; + #address-cells = <2>; + #size-cells = <2>; + ranges; + + port1 { + #address-cells = <2>; + #size-cells = <2>; + cell-index = <1>; + }; + + port2 { + #address-cells = <2>; + #size-cells = <2>; + cell-index = <2>; + }; +}; diff --git a/arch/powerpc/boot/dts/fsl/t2081si-post.dtsi b/arch/powerpc/boot/dts/fsl/t2081si-post.dtsi new file mode 100644 index 00000000000..aecee9690a8 --- /dev/null +++ b/arch/powerpc/boot/dts/fsl/t2081si-post.dtsi @@ -0,0 +1,435 @@ +/* + * T2081 Silicon/SoC Device Tree Source (post include) + * + * Copyright 2013 Freescale Semiconductor Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Freescale Semiconductor nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * + * ALTERNATIVELY, this software may be distributed under the terms of the + * GNU General Public License ("GPL") as published by the Free Software + * Foundation, either version 2 of that License or (at your option) any + * later version. + * + * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +&ifc { + #address-cells = <2>; + #size-cells = <1>; + compatible = "fsl,ifc", "simple-bus"; + interrupts = <25 2 0 0>; +}; + +/* controller at 0x240000 */ +&pci0 { + compatible = "fsl,t2080-pcie", "fsl,qoriq-pcie-v3.0", "fsl,qoriq-pcie"; + device_type = "pci"; + #size-cells = <2>; + #address-cells = <3>; + bus-range = <0x0 0xff>; + interrupts = <20 2 0 0>; + fsl,iommu-parent = <&pamu0>; + pcie@0 { + reg = <0 0 0 0 0>; + #interrupt-cells = <1>; + #size-cells = <2>; + #address-cells = <3>; + device_type = "pci"; + interrupts = <20 2 0 0>; + interrupt-map-mask = <0xf800 0 0 7>; + interrupt-map = < + /* IDSEL 0x0 */ + 0000 0 0 1 &mpic 40 1 0 0 + 0000 0 0 2 &mpic 1 1 0 0 + 0000 0 0 3 &mpic 2 1 0 0 + 0000 0 0 4 &mpic 3 1 0 0 + >; + }; +}; + +/* controller at 0x250000 */ +&pci1 { + compatible = "fsl,t2080-pcie", "fsl,qoriq-pcie-v3.0", "fsl,qoriq-pcie"; + device_type = "pci"; + #size-cells = <2>; + #address-cells = <3>; + bus-range = <0 0xff>; + interrupts = <21 2 0 0>; + fsl,iommu-parent = <&pamu0>; + pcie@0 { + reg = <0 0 0 0 0>; + #interrupt-cells = <1>; + #size-cells = <2>; + #address-cells = <3>; + device_type = "pci"; + interrupts = <21 2 0 0>; + interrupt-map-mask = <0xf800 0 0 7>; + interrupt-map = < + /* IDSEL 0x0 */ + 0000 0 0 1 &mpic 41 1 0 0 + 0000 0 0 2 &mpic 5 1 0 0 + 0000 0 0 3 &mpic 6 1 0 0 + 0000 0 0 4 &mpic 7 1 0 0 + >; + }; +}; + +/* controller at 0x260000 */ +&pci2 { + compatible = "fsl,t2080-pcie", "fsl,qoriq-pcie-v3.0", "fsl,qoriq-pcie"; + device_type = "pci"; + #size-cells = <2>; + #address-cells = <3>; + bus-range = <0x0 0xff>; + interrupts = <22 2 0 0>; + fsl,iommu-parent = <&pamu0>; + pcie@0 { + reg = <0 0 0 0 0>; + #interrupt-cells = <1>; + #size-cells = <2>; + #address-cells = <3>; + device_type = "pci"; + interrupts = <22 2 0 0>; + interrupt-map-mask = <0xf800 0 0 7>; + interrupt-map = < + /* IDSEL 0x0 */ + 0000 0 0 1 &mpic 42 1 0 0 + 0000 0 0 2 &mpic 9 1 0 0 + 0000 0 0 3 &mpic 10 1 0 0 + 0000 0 0 4 &mpic 11 1 0 0 + >; + }; +}; + +/* controller at 0x270000 */ +&pci3 { + compatible = "fsl,t2080-pcie", "fsl,qoriq-pcie-v3.0", "fsl,qoriq-pcie"; + device_type = "pci"; + #size-cells = <2>; + #address-cells = <3>; + bus-range = <0x0 0xff>; + interrupts = <23 2 0 0>; + fsl,iommu-parent = <&pamu0>; + pcie@0 { + reg = <0 0 0 0 0>; + #interrupt-cells = <1>; + #size-cells = <2>; + #address-cells = <3>; + device_type = "pci"; + interrupts = <23 2 0 0>; + interrupt-map-mask = <0xf800 0 0 7>; + interrupt-map = < + /* IDSEL 0x0 */ + 0000 0 0 1 &mpic 43 1 0 0 + 0000 0 0 2 &mpic 0 1 0 0 + 0000 0 0 3 &mpic 4 1 0 0 + 0000 0 0 4 &mpic 8 1 0 0 + >; + }; +}; + +&dcsr { + #address-cells = <1>; + #size-cells = <1>; + compatible = "fsl,dcsr", "simple-bus"; + + dcsr-epu@0 { + compatible = "fsl,t2080-dcsr-epu", "fsl,dcsr-epu"; + interrupts = <52 2 0 0 + 84 2 0 0 + 85 2 0 0 + 94 2 0 0 + 95 2 0 0>; + reg = <0x0 0x1000>; + }; + dcsr-npc { + compatible = "fsl,t2080-dcsr-cnpc", "fsl,dcsr-cnpc"; + reg = <0x1000 0x1000 0x1002000 0x10000>; + }; + dcsr-nxc@2000 { + compatible = "fsl,dcsr-nxc"; + reg = <0x2000 0x1000>; + }; + dcsr-corenet { + compatible = "fsl,dcsr-corenet"; + reg = <0x8000 0x1000 0x1A000 0x1000>; + }; + dcsr-ocn@11000 { + compatible = "fsl,t2080-dcsr-ocn", "fsl,dcsr-ocn"; + reg = <0x11000 0x1000>; + }; + dcsr-ddr@12000 { + compatible = "fsl,dcsr-ddr"; + dev-handle = <&ddr1>; + reg = <0x12000 0x1000>; + }; + dcsr-nal@18000 { + compatible = "fsl,t2080-dcsr-nal", "fsl,dcsr-nal"; + reg = <0x18000 0x1000>; + }; + dcsr-rcpm@22000 { + compatible = "fsl,t2080-dcsr-rcpm", "fsl,dcsr-rcpm"; + reg = <0x22000 0x1000>; + }; + dcsr-snpc@30000 { + compatible = "fsl,t2080-dcsr-snpc", "fsl,dcsr-snpc"; + reg = <0x30000 0x1000 0x1022000 0x10000>; + }; + dcsr-snpc@31000 { + compatible = "fsl,t2080-dcsr-snpc", "fsl,dcsr-snpc"; + reg = <0x31000 0x1000 0x1042000 0x10000>; + }; + dcsr-snpc@32000 { + compatible = "fsl,t2080-dcsr-snpc", "fsl,dcsr-snpc"; + reg = <0x32000 0x1000 0x1062000 0x10000>; + }; + dcsr-cpu-sb-proxy@100000 { + compatible = "fsl,dcsr-e6500-sb-proxy", "fsl,dcsr-cpu-sb-proxy"; + cpu-handle = <&cpu0>; + reg = <0x100000 0x1000 0x101000 0x1000>; + }; + dcsr-cpu-sb-proxy@108000 { + compatible = "fsl,dcsr-e6500-sb-proxy", "fsl,dcsr-cpu-sb-proxy"; + cpu-handle = <&cpu1>; + reg = <0x108000 0x1000 0x109000 0x1000>; + }; + dcsr-cpu-sb-proxy@110000 { + compatible = "fsl,dcsr-e6500-sb-proxy", "fsl,dcsr-cpu-sb-proxy"; + cpu-handle = <&cpu2>; + reg = <0x110000 0x1000 0x111000 0x1000>; + }; + dcsr-cpu-sb-proxy@118000 { + compatible = "fsl,dcsr-e6500-sb-proxy", "fsl,dcsr-cpu-sb-proxy"; + cpu-handle = <&cpu3>; + reg = <0x118000 0x1000 0x119000 0x1000>; + }; +}; + +&soc { + #address-cells = <1>; + #size-cells = <1>; + device_type = "soc"; + compatible = "simple-bus"; + + soc-sram-error { + compatible = "fsl,soc-sram-error"; + interrupts = <16 2 1 29>; + }; + + corenet-law@0 { + compatible = "fsl,corenet-law"; + reg = <0x0 0x1000>; + fsl,num-laws = <32>; + }; + + ddr1: memory-controller@8000 { + compatible = "fsl,qoriq-memory-controller-v4.7", + "fsl,qoriq-memory-controller"; + reg = <0x8000 0x1000>; + interrupts = <16 2 1 23>; + }; + + cpc: l3-cache-controller@10000 { + compatible = "fsl,t2080-l3-cache-controller", "cache"; + reg = <0x10000 0x1000 + 0x11000 0x1000 + 0x12000 0x1000>; + interrupts = <16 2 1 27 + 16 2 1 26 + 16 2 1 25>; + }; + + corenet-cf@18000 { + compatible = "fsl,corenet2-cf", "fsl,corenet-cf"; + reg = <0x18000 0x1000>; + interrupts = <16 2 1 31>; + fsl,ccf-num-csdids = <32>; + fsl,ccf-num-snoopids = <32>; + }; + + iommu@20000 { + compatible = "fsl,pamu-v1.0", "fsl,pamu"; + reg = <0x20000 0x3000>; + fsl,portid-mapping = <0x8000>; + ranges = <0 0x20000 0x3000>; + #address-cells = <1>; + #size-cells = <1>; + interrupts = < + 24 2 0 0 + 16 2 1 30>; + + pamu0: pamu@0 { + reg = <0 0x1000>; + fsl,primary-cache-geometry = <32 1>; + fsl,secondary-cache-geometry = <128 2>; + }; + + pamu1: pamu@1000 { + reg = <0x1000 0x1000>; + fsl,primary-cache-geometry = <32 1>; + fsl,secondary-cache-geometry = <128 2>; + }; + + pamu2: pamu@2000 { + reg = <0x2000 0x1000>; + fsl,primary-cache-geometry = <32 1>; + fsl,secondary-cache-geometry = <128 2>; + }; + }; + +/include/ "qoriq-mpic4.3.dtsi" + + guts: global-utilities@e0000 { + compatible = "fsl,t2080-device-config", "fsl,qoriq-device-config-2.0"; + reg = <0xe0000 0xe00>; + fsl,has-rstcr; + fsl,liodn-bits = <12>; + }; + + clockgen: global-utilities@e1000 { + compatible = "fsl,t2080-clockgen", "fsl,qoriq-clockgen-2.0"; + ranges = <0x0 0xe1000 0x1000>; + reg = <0xe1000 0x1000>; + #address-cells = <1>; + #size-cells = <1>; + + sysclk: sysclk { + #clock-cells = <0>; + compatible = "fsl,qoriq-sysclk-2.0"; + clock-output-names = "sysclk", "fixed-clock"; + }; + + pll0: pll0@800 { + #clock-cells = <1>; + reg = <0x800 4>; + compatible = "fsl,qoriq-core-pll-2.0"; + clocks = <&sysclk>; + clock-output-names = "pll0", "pll0-div2", "pll0-div4"; + }; + + pll1: pll1@820 { + #clock-cells = <1>; + reg = <0x820 4>; + compatible = "fsl,qoriq-core-pll-2.0"; + clocks = <&sysclk>; + clock-output-names = "pll1", "pll1-div2", "pll1-div4"; + }; + + mux0: mux0@0 { + #clock-cells = <0>; + reg = <0x0 4>; + compatible = "fsl,qoriq-core-mux-2.0"; + clocks = <&pll0 0>, <&pll0 1>, <&pll0 2>, + <&pll1 0>, <&pll1 1>, <&pll1 2>; + clock-names = "pll0", "pll0-div2", "pll1-div4", + "pll1", "pll1-div2", "pll1-div4"; + clock-output-names = "cmux0"; + }; + + mux1: mux1@20 { + #clock-cells = <0>; + reg = <0x20 4>; + compatible = "fsl,qoriq-core-mux-2.0"; + clocks = <&pll0 0>, <&pll0 1>, <&pll0 2>, + <&pll1 0>, <&pll1 1>, <&pll1 2>; + clock-names = "pll0", "pll0-div2", "pll1-div4", + "pll1", "pll1-div2", "pll1-div4"; + clock-output-names = "cmux1"; + }; + }; + + rcpm: global-utilities@e2000 { + compatible = "fsl,t2080-rcpm", "fsl,qoriq-rcpm-2.0"; + reg = <0xe2000 0x1000>; + }; + + sfp: sfp@e8000 { + compatible = "fsl,t2080-sfp"; + reg = <0xe8000 0x1000>; + }; + + serdes: serdes@ea000 { + compatible = "fsl,t2080-serdes"; + reg = <0xea000 0x4000>; + }; + +/include/ "elo3-dma-0.dtsi" + dma@100300 { + fsl,iommu-parent = <&pamu0>; + fsl,liodn-reg = <&guts 0x580>; /* DMA1LIODNR */ + }; +/include/ "elo3-dma-1.dtsi" + dma@101300 { + fsl,iommu-parent = <&pamu0>; + fsl,liodn-reg = <&guts 0x584>; /* DMA2LIODNR */ + }; +/include/ "elo3-dma-2.dtsi" + dma@102300 { + fsl,iommu-parent = <&pamu0>; + fsl,liodn-reg = <&guts 0x588>; /* DMA3LIODNR */ + }; + +/include/ "qoriq-espi-0.dtsi" + spi@110000 { + fsl,espi-num-chipselects = <4>; + }; + +/include/ "qoriq-esdhc-0.dtsi" + sdhc@114000 { + compatible = "fsl,t2080-esdhc", "fsl,esdhc"; + fsl,iommu-parent = <&pamu1>; + fsl,liodn-reg = <&guts 0x530>; /* SDMMCLIODNR */ + sdhci,auto-cmd12; + }; +/include/ "qoriq-i2c-0.dtsi" +/include/ "qoriq-i2c-1.dtsi" +/include/ "qoriq-duart-0.dtsi" +/include/ "qoriq-duart-1.dtsi" +/include/ "qoriq-gpio-0.dtsi" +/include/ "qoriq-gpio-1.dtsi" +/include/ "qoriq-gpio-2.dtsi" +/include/ "qoriq-gpio-3.dtsi" +/include/ "qoriq-usb2-mph-0.dtsi" + usb0: usb@210000 { + compatible = "fsl-usb2-mph-v2.5", "fsl-usb2-mph"; + fsl,iommu-parent = <&pamu1>; + fsl,liodn-reg = <&guts 0x520>; /* USB1LIODNR */ + phy_type = "utmi"; + port0; + }; +/include/ "qoriq-usb2-dr-0.dtsi" + usb1: usb@211000 { + compatible = "fsl-usb2-dr-v2.5", "fsl-usb2-dr"; + fsl,iommu-parent = <&pamu1>; + fsl,liodn-reg = <&guts 0x524>; /* USB1LIODNR */ + dr_mode = "host"; + phy_type = "utmi"; + }; +/include/ "qoriq-sec5.2-0.dtsi" + + L2_1: l2-cache-controller@c20000 { + /* Cluster 0 L2 cache */ + compatible = "fsl,t2080-l2-cache-controller"; + reg = <0xc20000 0x40000>; + next-level-cache = <&cpc>; + }; +}; diff --git a/arch/powerpc/boot/dts/fsl/t208xsi-pre.dtsi b/arch/powerpc/boot/dts/fsl/t208xsi-pre.dtsi new file mode 100644 index 00000000000..e71ceb0e110 --- /dev/null +++ b/arch/powerpc/boot/dts/fsl/t208xsi-pre.dtsi @@ -0,0 +1,99 @@ +/* + * T2080/T2081 Silicon/SoC Device Tree Source (pre include) + * + * Copyright 2013 Freescale Semiconductor Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Freescale Semiconductor nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * + * ALTERNATIVELY, this software may be distributed under the terms of the + * GNU General Public License ("GPL") as published by the Free Software + * Foundation, either version 2 of that License or (at your option) any + * later version. + * + * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/dts-v1/; + +/include/ "e6500_power_isa.dtsi" + +/ { + #address-cells = <2>; + #size-cells = <2>; + interrupt-parent = <&mpic>; + + aliases { + ccsr = &soc; + dcsr = &dcsr; + + serial0 = &serial0; + serial1 = &serial1; + serial2 = &serial2; + serial3 = &serial3; + + crypto = &crypto; + pci0 = &pci0; + pci1 = &pci1; + pci2 = &pci2; + pci3 = &pci3; + usb0 = &usb0; + usb1 = &usb1; + dma0 = &dma0; + dma1 = &dma1; + dma2 = &dma2; + sdhc = &sdhc; + }; + + cpus { + #address-cells = <1>; + #size-cells = <0>; + + cpu0: PowerPC,e6500@0 { + device_type = "cpu"; + reg = <0 1>; + clocks = <&mux0>; + next-level-cache = <&L2_1>; + fsl,portid-mapping = <0x80000000>; + }; + cpu1: PowerPC,e6500@2 { + device_type = "cpu"; + reg = <2 3>; + clocks = <&mux0>; + next-level-cache = <&L2_1>; + fsl,portid-mapping = <0x80000000>; + }; + cpu2: PowerPC,e6500@4 { + device_type = "cpu"; + reg = <4 5>; + clocks = <&mux0>; + next-level-cache = <&L2_1>; + fsl,portid-mapping = <0x80000000>; + }; + cpu3: PowerPC,e6500@6 { + device_type = "cpu"; + reg = <6 7>; + clocks = <&mux0>; + next-level-cache = <&L2_1>; + fsl,portid-mapping = <0x80000000>; + }; + }; +}; diff --git a/arch/powerpc/boot/dts/fsl/t4240si-post.dtsi b/arch/powerpc/boot/dts/fsl/t4240si-post.dtsi index 793669baa13..7e2fc7cdce4 100644 --- a/arch/powerpc/boot/dts/fsl/t4240si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/t4240si-post.dtsi @@ -476,6 +476,7 @@ /include/ "elo3-dma-0.dtsi" /include/ "elo3-dma-1.dtsi" +/include/ "elo3-dma-2.dtsi" /include/ "qoriq-espi-0.dtsi" spi@110000 { @@ -497,13 +498,13 @@ /include/ "qoriq-gpio-3.dtsi" /include/ "qoriq-usb2-mph-0.dtsi" usb0: usb@210000 { - compatible = "fsl-usb2-mph-v2.4", "fsl-usb2-mph"; + compatible = "fsl-usb2-mph-v2.5", "fsl-usb2-mph"; phy_type = "utmi"; port0; }; /include/ "qoriq-usb2-dr-0.dtsi" usb1: usb@211000 { - compatible = "fsl-usb2-dr-v2.4", "fsl-usb2-dr"; + compatible = "fsl-usb2-dr-v2.5", "fsl-usb2-dr"; dr_mode = "host"; phy_type = "utmi"; }; diff --git a/arch/powerpc/boot/dts/fsl/t4240si-pre.dtsi b/arch/powerpc/boot/dts/fsl/t4240si-pre.dtsi index d2f157edbe8..261a3abb1a5 100644 --- a/arch/powerpc/boot/dts/fsl/t4240si-pre.dtsi +++ b/arch/powerpc/boot/dts/fsl/t4240si-pre.dtsi @@ -57,6 +57,7 @@ pci3 = &pci3; dma0 = &dma0; dma1 = &dma1; + dma2 = &dma2; sdhc = &sdhc; }; diff --git a/arch/powerpc/boot/dts/mpc5121.dtsi b/arch/powerpc/boot/dts/mpc5121.dtsi index 2c0e1552d20..7f9d14f5c4d 100644 --- a/arch/powerpc/boot/dts/mpc5121.dtsi +++ b/arch/powerpc/boot/dts/mpc5121.dtsi @@ -498,6 +498,7 @@ compatible = "fsl,mpc5121-dma"; reg = <0x14000 0x1800>; interrupts = <65 0x8>; + #dma-cells = <1>; }; }; diff --git a/arch/powerpc/boot/dts/t1040rdb.dts b/arch/powerpc/boot/dts/t1040rdb.dts new file mode 100644 index 00000000000..79a0bed04c1 --- /dev/null +++ b/arch/powerpc/boot/dts/t1040rdb.dts @@ -0,0 +1,48 @@ +/* + * T1040RDB Device Tree Source + * + * Copyright 2014 Freescale Semiconductor Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Freescale Semiconductor nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * + * ALTERNATIVELY, this software may be distributed under the terms of the + * GNU General Public License ("GPL") as published by the Free Software + * Foundation, either version 2 of that License or (at your option) any + * later version. + * + * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/include/ "fsl/t104xsi-pre.dtsi" +/include/ "t104xrdb.dtsi" + +/ { + model = "fsl,T1040RDB"; + compatible = "fsl,T1040RDB"; + ifc: localbus@ffe124000 { + cpld@3,0 { + compatible = "fsl,t1040rdb-cpld"; + }; + }; +}; + +/include/ "fsl/t1040si-post.dtsi" diff --git a/arch/powerpc/boot/dts/t1042rdb.dts b/arch/powerpc/boot/dts/t1042rdb.dts new file mode 100644 index 00000000000..738c23790e9 --- /dev/null +++ b/arch/powerpc/boot/dts/t1042rdb.dts @@ -0,0 +1,48 @@ +/* + * T1042RDB Device Tree Source + * + * Copyright 2014 Freescale Semiconductor Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Freescale Semiconductor nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * + * ALTERNATIVELY, this software may be distributed under the terms of the + * GNU General Public License ("GPL") as published by the Free Software + * Foundation, either version 2 of that License or (at your option) any + * later version. + * + * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/include/ "fsl/t104xsi-pre.dtsi" +/include/ "t104xrdb.dtsi" + +/ { + model = "fsl,T1042RDB"; + compatible = "fsl,T1042RDB"; + ifc: localbus@ffe124000 { + cpld@3,0 { + compatible = "fsl,t1042rdb-cpld"; + }; + }; +}; + +/include/ "fsl/t1042si-post.dtsi" diff --git a/arch/powerpc/boot/dts/t1042rdb_pi.dts b/arch/powerpc/boot/dts/t1042rdb_pi.dts new file mode 100644 index 00000000000..634f751fa6d --- /dev/null +++ b/arch/powerpc/boot/dts/t1042rdb_pi.dts @@ -0,0 +1,57 @@ +/* + * T1042RDB_PI Device Tree Source + * + * Copyright 2014 Freescale Semiconductor Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Freescale Semiconductor nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * + * ALTERNATIVELY, this software may be distributed under the terms of the + * GNU General Public License ("GPL") as published by the Free Software + * Foundation, either version 2 of that License or (at your option) any + * later version. + * + * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/include/ "fsl/t104xsi-pre.dtsi" +/include/ "t104xrdb.dtsi" + +/ { + model = "fsl,T1042RDB_PI"; + compatible = "fsl,T1042RDB_PI"; + ifc: localbus@ffe124000 { + cpld@3,0 { + compatible = "fsl,t1042rdb_pi-cpld"; + }; + }; + soc: soc@ffe000000 { + i2c@118000 { + rtc@68 { + compatible = "dallas,ds1337"; + reg = <0x68>; + interrupts = <0x2 0x1 0 0>; + }; + }; + }; +}; + +/include/ "fsl/t1042si-post.dtsi" diff --git a/arch/powerpc/boot/dts/t104xrdb.dtsi b/arch/powerpc/boot/dts/t104xrdb.dtsi new file mode 100644 index 00000000000..1cf0f3c5f7e --- /dev/null +++ b/arch/powerpc/boot/dts/t104xrdb.dtsi @@ -0,0 +1,156 @@ +/* + * T1040RDB/T1042RDB Device Tree Source + * + * Copyright 2014 Freescale Semiconductor Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Freescale Semiconductor nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * + * ALTERNATIVELY, this software may be distributed under the terms of the + * GNU General Public License ("GPL") as published by the Free Software + * Foundation, either version 2 of that License or (at your option) any + * later version. + * + * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/ { + + ifc: localbus@ffe124000 { + reg = <0xf 0xfe124000 0 0x2000>; + ranges = <0 0 0xf 0xe8000000 0x08000000 + 2 0 0xf 0xff800000 0x00010000 + 3 0 0xf 0xffdf0000 0x00008000>; + + nor@0,0 { + #address-cells = <1>; + #size-cells = <1>; + compatible = "cfi-flash"; + reg = <0x0 0x0 0x8000000>; + bank-width = <2>; + device-width = <1>; + }; + + nand@2,0 { + #address-cells = <1>; + #size-cells = <1>; + compatible = "fsl,ifc-nand"; + reg = <0x2 0x0 0x10000>; + }; + + cpld@3,0 { + reg = <3 0 0x300>; + }; + }; + + memory { + device_type = "memory"; + }; + + dcsr: dcsr@f00000000 { + ranges = <0x00000000 0xf 0x00000000 0x01072000>; + }; + + soc: soc@ffe000000 { + ranges = <0x00000000 0xf 0xfe000000 0x1000000>; + reg = <0xf 0xfe000000 0 0x00001000>; + + spi@110000 { + flash@0 { + #address-cells = <1>; + #size-cells = <1>; + compatible = "micron,n25q512a"; + reg = <0>; + spi-max-frequency = <10000000>; /* input clock */ + }; + }; + + i2c@118100 { + pca9546@77 { + compatible = "nxp,pca9546"; + reg = <0x77>; + #address-cells = <1>; + #size-cells = <0>; + }; + }; + + }; + + pci0: pcie@ffe240000 { + reg = <0xf 0xfe240000 0 0x10000>; + ranges = <0x02000000 0 0xe0000000 0xc 0x00000000 0x0 0x10000000 + 0x01000000 0 0x00000000 0xf 0xf8000000 0x0 0x00010000>; + pcie@0 { + ranges = <0x02000000 0 0xe0000000 + 0x02000000 0 0xe0000000 + 0 0x10000000 + + 0x01000000 0 0x00000000 + 0x01000000 0 0x00000000 + 0 0x00010000>; + }; + }; + + pci1: pcie@ffe250000 { + reg = <0xf 0xfe250000 0 0x10000>; + ranges = <0x02000000 0x0 0xe0000000 0xc 0x10000000 0x0 0x10000000 + 0x01000000 0x0 0x00000000 0xf 0xf8010000 0x0 0x00010000>; + pcie@0 { + ranges = <0x02000000 0 0xe0000000 + 0x02000000 0 0xe0000000 + 0 0x10000000 + + 0x01000000 0 0x00000000 + 0x01000000 0 0x00000000 + 0 0x00010000>; + }; + }; + + pci2: pcie@ffe260000 { + reg = <0xf 0xfe260000 0 0x10000>; + ranges = <0x02000000 0 0xe0000000 0xc 0x20000000 0 0x10000000 + 0x01000000 0 0x00000000 0xf 0xf8020000 0 0x00010000>; + pcie@0 { + ranges = <0x02000000 0 0xe0000000 + 0x02000000 0 0xe0000000 + 0 0x10000000 + + 0x01000000 0 0x00000000 + 0x01000000 0 0x00000000 + 0 0x00010000>; + }; + }; + + pci3: pcie@ffe270000 { + reg = <0xf 0xfe270000 0 0x10000>; + ranges = <0x02000000 0 0xe0000000 0xc 0x30000000 0 0x10000000 + 0x01000000 0 0x00000000 0xf 0xf8030000 0 0x00010000>; + pcie@0 { + ranges = <0x02000000 0 0xe0000000 + 0x02000000 0 0xe0000000 + 0 0x10000000 + + 0x01000000 0 0x00000000 + 0x01000000 0 0x00000000 + 0 0x00010000>; + }; + }; +}; diff --git a/arch/powerpc/boot/dts/t2080qds.dts b/arch/powerpc/boot/dts/t2080qds.dts new file mode 100644 index 00000000000..aa1d6d8c169 --- /dev/null +++ b/arch/powerpc/boot/dts/t2080qds.dts @@ -0,0 +1,57 @@ +/* + * T2080QDS Device Tree Source + * + * Copyright 2013 Freescale Semiconductor Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Freescale Semiconductor nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * + * ALTERNATIVELY, this software may be distributed under the terms of the + * GNU General Public License ("GPL") as published by the Free Software + * Foundation, either version 2 of that License or (at your option) any + * later version. + * + * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/include/ "fsl/t208xsi-pre.dtsi" +/include/ "t208xqds.dtsi" + +/ { + model = "fsl,T2080QDS"; + compatible = "fsl,T2080QDS"; + #address-cells = <2>; + #size-cells = <2>; + interrupt-parent = <&mpic>; + + rio: rapidio@ffe0c0000 { + reg = <0xf 0xfe0c0000 0 0x11000>; + + port1 { + ranges = <0 0 0xc 0x20000000 0 0x10000000>; + }; + port2 { + ranges = <0 0 0xc 0x30000000 0 0x10000000>; + }; + }; +}; + +/include/ "fsl/t2080si-post.dtsi" diff --git a/arch/powerpc/boot/dts/t2080rdb.dts b/arch/powerpc/boot/dts/t2080rdb.dts new file mode 100644 index 00000000000..e8891047600 --- /dev/null +++ b/arch/powerpc/boot/dts/t2080rdb.dts @@ -0,0 +1,57 @@ +/* + * T2080PCIe-RDB Board Device Tree Source + * + * Copyright 2014 Freescale Semiconductor Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Freescale Semiconductor nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * + * ALTERNATIVELY, this software may be distributed under the terms of the + * GNU General Public License ("GPL") as published by the Free Software + * Foundation, either version 2 of that License or (at your option) any + * later version. + * + * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/include/ "fsl/t208xsi-pre.dtsi" +/include/ "t208xrdb.dtsi" + +/ { + model = "fsl,T2080RDB"; + compatible = "fsl,T2080RDB"; + #address-cells = <2>; + #size-cells = <2>; + interrupt-parent = <&mpic>; + + rio: rapidio@ffe0c0000 { + reg = <0xf 0xfe0c0000 0 0x11000>; + + port1 { + ranges = <0 0 0xc 0x20000000 0 0x10000000>; + }; + port2 { + ranges = <0 0 0xc 0x30000000 0 0x10000000>; + }; + }; +}; + +/include/ "fsl/t2080si-post.dtsi" diff --git a/arch/powerpc/boot/dts/t2081qds.dts b/arch/powerpc/boot/dts/t2081qds.dts new file mode 100644 index 00000000000..8ec80a71e10 --- /dev/null +++ b/arch/powerpc/boot/dts/t2081qds.dts @@ -0,0 +1,46 @@ +/* + * T2081QDS Device Tree Source + * + * Copyright 2013 Freescale Semiconductor Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Freescale Semiconductor nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * + * ALTERNATIVELY, this software may be distributed under the terms of the + * GNU General Public License ("GPL") as published by the Free Software + * Foundation, either version 2 of that License or (at your option) any + * later version. + * + * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/include/ "fsl/t208xsi-pre.dtsi" +/include/ "t208xqds.dtsi" + +/ { + model = "fsl,T2081QDS"; + compatible = "fsl,T2081QDS"; + #address-cells = <2>; + #size-cells = <2>; + interrupt-parent = <&mpic>; +}; + +/include/ "fsl/t2081si-post.dtsi" diff --git a/arch/powerpc/boot/dts/t208xqds.dtsi b/arch/powerpc/boot/dts/t208xqds.dtsi new file mode 100644 index 00000000000..555dc6e03d8 --- /dev/null +++ b/arch/powerpc/boot/dts/t208xqds.dtsi @@ -0,0 +1,239 @@ +/* + * T2080/T2081 QDS Device Tree Source + * + * Copyright 2013 Freescale Semiconductor Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Freescale Semiconductor nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * + * ALTERNATIVELY, this software may be distributed under the terms of the + * GNU General Public License ("GPL") as published by the Free Software + * Foundation, either version 2 of that License or (at your option) any + * later version. + * + * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/ { + model = "fsl,T2080QDS"; + compatible = "fsl,T2080QDS"; + #address-cells = <2>; + #size-cells = <2>; + interrupt-parent = <&mpic>; + + ifc: localbus@ffe124000 { + reg = <0xf 0xfe124000 0 0x2000>; + ranges = <0 0 0xf 0xe8000000 0x08000000 + 2 0 0xf 0xff800000 0x00010000 + 3 0 0xf 0xffdf0000 0x00008000>; + + nor@0,0 { + #address-cells = <1>; + #size-cells = <1>; + compatible = "cfi-flash"; + reg = <0x0 0x0 0x8000000>; + bank-width = <2>; + device-width = <1>; + }; + + nand@2,0 { + #address-cells = <1>; + #size-cells = <1>; + compatible = "fsl,ifc-nand"; + reg = <0x2 0x0 0x10000>; + }; + + boardctrl: board-control@3,0 { + #address-cells = <1>; + #size-cells = <1>; + compatible = "fsl,fpga-qixis"; + reg = <3 0 0x300>; + ranges = <0 3 0 0x300>; + }; + }; + + memory { + device_type = "memory"; + }; + + dcsr: dcsr@f00000000 { + ranges = <0x00000000 0xf 0x00000000 0x01072000>; + }; + + soc: soc@ffe000000 { + ranges = <0x00000000 0xf 0xfe000000 0x1000000>; + reg = <0xf 0xfe000000 0 0x00001000>; + spi@110000 { + flash@0 { + #address-cells = <1>; + #size-cells = <1>; + compatible = "micron,n25q128a11"; /* 16MB */ + reg = <0>; + spi-max-frequency = <40000000>; /* input clock */ + }; + + flash@1 { + #address-cells = <1>; + #size-cells = <1>; + compatible = "sst,sst25wf040"; + reg = <1>; + spi-max-frequency = <35000000>; + }; + + flash@2 { + #address-cells = <1>; + #size-cells = <1>; + compatible = "eon,en25s64"; + reg = <2>; + spi-max-frequency = <35000000>; + }; + }; + + i2c@118000 { + pca9547@77 { + compatible = "nxp,pca9547"; + reg = <0x77>; + #address-cells = <1>; + #size-cells = <0>; + + i2c@0 { + #address-cells = <1>; + #size-cells = <0>; + reg = <0x0>; + + eeprom@50 { + compatible = "at24,24c512"; + reg = <0x50>; + }; + + eeprom@51 { + compatible = "at24,24c02"; + reg = <0x51>; + }; + + eeprom@57 { + compatible = "at24,24c02"; + reg = <0x57>; + }; + + rtc@68 { + compatible = "dallas,ds3232"; + reg = <0x68>; + interrupts = <0x1 0x1 0 0>; + }; + }; + + i2c@1 { + #address-cells = <1>; + #size-cells = <0>; + reg = <0x1>; + + eeprom@55 { + compatible = "at24,24c02"; + reg = <0x55>; + }; + }; + + i2c@2 { + #address-cells = <1>; + #size-cells = <0>; + reg = <0x2>; + + ina220@40 { + compatible = "ti,ina220"; + reg = <0x40>; + shunt-resistor = <1000>; + }; + + ina220@41 { + compatible = "ti,ina220"; + reg = <0x41>; + shunt-resistor = <1000>; + }; + }; + }; + }; + + sdhc@114000 { + voltage-ranges = <1800 1800 3300 3300>; + }; + }; + + pci0: pcie@ffe240000 { + reg = <0xf 0xfe240000 0 0x10000>; + ranges = <0x02000000 0 0xe0000000 0xc 0x00000000 0x0 0x20000000 + 0x01000000 0 0x00000000 0xf 0xf8000000 0x0 0x00010000>; + pcie@0 { + ranges = <0x02000000 0 0xe0000000 + 0x02000000 0 0xe0000000 + 0 0x20000000 + + 0x01000000 0 0x00000000 + 0x01000000 0 0x00000000 + 0 0x00010000>; + }; + }; + + pci1: pcie@ffe250000 { + reg = <0xf 0xfe250000 0 0x10000>; + ranges = <0x02000000 0x0 0xe0000000 0xc 0x20000000 0x0 0x10000000 + 0x01000000 0x0 0x00000000 0xf 0xf8010000 0x0 0x00010000>; + pcie@0 { + ranges = <0x02000000 0 0xe0000000 + 0x02000000 0 0xe0000000 + 0 0x20000000 + + 0x01000000 0 0x00000000 + 0x01000000 0 0x00000000 + 0 0x00010000>; + }; + }; + + pci2: pcie@ffe260000 { + reg = <0xf 0xfe260000 0 0x1000>; + ranges = <0x02000000 0 0xe0000000 0xc 0x30000000 0 0x10000000 + 0x01000000 0 0x00000000 0xf 0xf8020000 0 0x00010000>; + pcie@0 { + ranges = <0x02000000 0 0xe0000000 + 0x02000000 0 0xe0000000 + 0 0x20000000 + + 0x01000000 0 0x00000000 + 0x01000000 0 0x00000000 + 0 0x00010000>; + }; + }; + + pci3: pcie@ffe270000 { + reg = <0xf 0xfe270000 0 0x10000>; + ranges = <0x02000000 0 0xe0000000 0xc 0x40000000 0 0x10000000 + 0x01000000 0 0x00000000 0xf 0xf8030000 0 0x00010000>; + pcie@0 { + ranges = <0x02000000 0 0xe0000000 + 0x02000000 0 0xe0000000 + 0 0x20000000 + + 0x01000000 0 0x00000000 + 0x01000000 0 0x00000000 + 0 0x00010000>; + }; + }; +}; diff --git a/arch/powerpc/boot/dts/t208xrdb.dtsi b/arch/powerpc/boot/dts/t208xrdb.dtsi new file mode 100644 index 00000000000..1481e192e78 --- /dev/null +++ b/arch/powerpc/boot/dts/t208xrdb.dtsi @@ -0,0 +1,184 @@ +/* + * T2080PCIe-RDB Board Device Tree Source + * + * Copyright 2014 Freescale Semiconductor Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Freescale Semiconductor nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * + * ALTERNATIVELY, this software may be distributed under the terms of the + * GNU General Public License ("GPL") as published by the Free Software + * Foundation, either version 2 of that License or (at your option) any + * later version. + * + * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/ { + model = "fsl,T2080RDB"; + compatible = "fsl,T2080RDB"; + #address-cells = <2>; + #size-cells = <2>; + interrupt-parent = <&mpic>; + + ifc: localbus@ffe124000 { + reg = <0xf 0xfe124000 0 0x2000>; + ranges = <0 0 0xf 0xe8000000 0x08000000 + 2 0 0xf 0xff800000 0x00010000 + 3 0 0xf 0xffdf0000 0x00008000>; + + nor@0,0 { + #address-cells = <1>; + #size-cells = <1>; + compatible = "cfi-flash"; + reg = <0x0 0x0 0x8000000>; + + bank-width = <2>; + device-width = <1>; + }; + + nand@1,0 { + #address-cells = <1>; + #size-cells = <1>; + compatible = "fsl,ifc-nand"; + reg = <0x2 0x0 0x10000>; + }; + + boardctrl: board-control@2,0 { + #address-cells = <1>; + #size-cells = <1>; + compatible = "fsl,t2080-cpld"; + reg = <3 0 0x300>; + ranges = <0 3 0 0x300>; + }; + }; + + memory { + device_type = "memory"; + }; + + dcsr: dcsr@f00000000 { + ranges = <0x00000000 0xf 0x00000000 0x01072000>; + }; + + soc: soc@ffe000000 { + ranges = <0x00000000 0xf 0xfe000000 0x1000000>; + reg = <0xf 0xfe000000 0 0x00001000>; + spi@110000 { + flash@0 { + #address-cells = <1>; + #size-cells = <1>; + compatible = "micron,n25q512a"; + reg = <0>; + spi-max-frequency = <10000000>; /* input clock */ + }; + }; + + i2c@118000 { + adt7481@4c { + compatible = "adi,adt7481"; + reg = <0x4c>; + }; + + rtc@68 { + compatible = "dallas,ds1339"; + reg = <0x68>; + interrupts = <0x1 0x1 0 0>; + }; + + eeprom@50 { + compatible = "atmel,24c256"; + reg = <0x50>; + }; + }; + + i2c@118100 { + pca9546@77 { + compatible = "nxp,pca9546"; + reg = <0x77>; + }; + }; + + sdhc@114000 { + voltage-ranges = <1800 1800 3300 3300>; + }; + }; + + pci0: pcie@ffe240000 { + reg = <0xf 0xfe240000 0 0x10000>; + ranges = <0x02000000 0 0xe0000000 0xc 0x00000000 0x0 0x20000000 + 0x01000000 0 0x00000000 0xf 0xf8000000 0x0 0x00010000>; + pcie@0 { + ranges = <0x02000000 0 0xe0000000 + 0x02000000 0 0xe0000000 + 0 0x20000000 + + 0x01000000 0 0x00000000 + 0x01000000 0 0x00000000 + 0 0x00010000>; + }; + }; + + pci1: pcie@ffe250000 { + reg = <0xf 0xfe250000 0 0x10000>; + ranges = <0x02000000 0x0 0xe0000000 0xc 0x20000000 0x0 0x10000000 + 0x01000000 0x0 0x00000000 0xf 0xf8010000 0x0 0x00010000>; + pcie@0 { + ranges = <0x02000000 0 0xe0000000 + 0x02000000 0 0xe0000000 + 0 0x20000000 + + 0x01000000 0 0x00000000 + 0x01000000 0 0x00000000 + 0 0x00010000>; + }; + }; + + pci2: pcie@ffe260000 { + reg = <0xf 0xfe260000 0 0x1000>; + ranges = <0x02000000 0 0xe0000000 0xc 0x30000000 0 0x10000000 + 0x01000000 0 0x00000000 0xf 0xf8020000 0 0x00010000>; + pcie@0 { + ranges = <0x02000000 0 0xe0000000 + 0x02000000 0 0xe0000000 + 0 0x20000000 + + 0x01000000 0 0x00000000 + 0x01000000 0 0x00000000 + 0 0x00010000>; + }; + }; + + pci3: pcie@ffe270000 { + reg = <0xf 0xfe270000 0 0x10000>; + ranges = <0x02000000 0 0xe0000000 0xc 0x40000000 0 0x10000000 + 0x01000000 0 0x00000000 0xf 0xf8030000 0 0x00010000>; + pcie@0 { + ranges = <0x02000000 0 0xe0000000 + 0x02000000 0 0xe0000000 + 0 0x20000000 + + 0x01000000 0 0x00000000 + 0x01000000 0 0x00000000 + 0 0x00010000>; + }; + }; +}; diff --git a/arch/powerpc/boot/dts/t4240rdb.dts b/arch/powerpc/boot/dts/t4240rdb.dts new file mode 100644 index 00000000000..53761d4e8c5 --- /dev/null +++ b/arch/powerpc/boot/dts/t4240rdb.dts @@ -0,0 +1,186 @@ +/* + * T4240RDB Device Tree Source + * + * Copyright 2014 Freescale Semiconductor Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Freescale Semiconductor nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * + * ALTERNATIVELY, this software may be distributed under the terms of the + * GNU General Public License ("GPL") as published by the Free Software + * Foundation, either version 2 of that License or (at your option) any + * later version. + * + * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/include/ "fsl/t4240si-pre.dtsi" + +/ { + model = "fsl,T4240RDB"; + compatible = "fsl,T4240RDB"; + #address-cells = <2>; + #size-cells = <2>; + interrupt-parent = <&mpic>; + + ifc: localbus@ffe124000 { + reg = <0xf 0xfe124000 0 0x2000>; + ranges = <0 0 0xf 0xe8000000 0x08000000 + 2 0 0xf 0xff800000 0x00010000 + 3 0 0xf 0xffdf0000 0x00008000>; + + nor@0,0 { + #address-cells = <1>; + #size-cells = <1>; + compatible = "cfi-flash"; + reg = <0x0 0x0 0x8000000>; + + bank-width = <2>; + device-width = <1>; + }; + + nand@2,0 { + #address-cells = <1>; + #size-cells = <1>; + compatible = "fsl,ifc-nand"; + reg = <0x2 0x0 0x10000>; + }; + }; + + memory { + device_type = "memory"; + }; + + dcsr: dcsr@f00000000 { + ranges = <0x00000000 0xf 0x00000000 0x01072000>; + }; + + soc: soc@ffe000000 { + ranges = <0x00000000 0xf 0xfe000000 0x1000000>; + reg = <0xf 0xfe000000 0 0x00001000>; + spi@110000 { + flash@0 { + #address-cells = <1>; + #size-cells = <1>; + compatible = "sst,sst25wf040"; + reg = <0>; + spi-max-frequency = <40000000>; /* input clock */ + }; + }; + + i2c@118000 { + eeprom@52 { + compatible = "at24,24c256"; + reg = <0x52>; + }; + eeprom@54 { + compatible = "at24,24c256"; + reg = <0x54>; + }; + eeprom@56 { + compatible = "at24,24c256"; + reg = <0x56>; + }; + rtc@68 { + compatible = "dallas,ds1374"; + reg = <0x68>; + interrupts = <0x1 0x1 0 0>; + }; + }; + + sdhc@114000 { + voltage-ranges = <1800 1800 3300 3300>; + }; + }; + + pci0: pcie@ffe240000 { + reg = <0xf 0xfe240000 0 0x10000>; + ranges = <0x02000000 0 0xe0000000 0xc 0x00000000 0x0 0x20000000 + 0x01000000 0 0x00000000 0xf 0xf8000000 0x0 0x00010000>; + pcie@0 { + ranges = <0x02000000 0 0xe0000000 + 0x02000000 0 0xe0000000 + 0 0x20000000 + + 0x01000000 0 0x00000000 + 0x01000000 0 0x00000000 + 0 0x00010000>; + }; + }; + + pci1: pcie@ffe250000 { + reg = <0xf 0xfe250000 0 0x10000>; + ranges = <0x02000000 0x0 0xe0000000 0xc 0x20000000 0x0 0x20000000 + 0x01000000 0x0 0x00000000 0xf 0xf8010000 0x0 0x00010000>; + pcie@0 { + ranges = <0x02000000 0 0xe0000000 + 0x02000000 0 0xe0000000 + 0 0x20000000 + + 0x01000000 0 0x00000000 + 0x01000000 0 0x00000000 + 0 0x00010000>; + }; + }; + + pci2: pcie@ffe260000 { + reg = <0xf 0xfe260000 0 0x1000>; + ranges = <0x02000000 0 0xe0000000 0xc 0x40000000 0 0x20000000 + 0x01000000 0 0x00000000 0xf 0xf8020000 0 0x00010000>; + pcie@0 { + ranges = <0x02000000 0 0xe0000000 + 0x02000000 0 0xe0000000 + 0 0x20000000 + + 0x01000000 0 0x00000000 + 0x01000000 0 0x00000000 + 0 0x00010000>; + }; + }; + + pci3: pcie@ffe270000 { + reg = <0xf 0xfe270000 0 0x10000>; + ranges = <0x02000000 0 0xe0000000 0xc 0x60000000 0 0x20000000 + 0x01000000 0 0x00000000 0xf 0xf8030000 0 0x00010000>; + pcie@0 { + ranges = <0x02000000 0 0xe0000000 + 0x02000000 0 0xe0000000 + 0 0x20000000 + + 0x01000000 0 0x00000000 + 0x01000000 0 0x00000000 + 0 0x00010000>; + }; + }; + + rio: rapidio@ffe0c0000 { + reg = <0xf 0xfe0c0000 0 0x11000>; + + port1 { + ranges = <0 0 0xc 0x20000000 0 0x10000000>; + }; + port2 { + ranges = <0 0 0xc 0x30000000 0 0x10000000>; + }; + }; +}; + +/include/ "fsl/t4240si-post.dtsi" diff --git a/arch/powerpc/boot/gunzip_util.c b/arch/powerpc/boot/gunzip_util.c index ef2aed0f63c..9dc52501de8 100644 --- a/arch/powerpc/boot/gunzip_util.c +++ b/arch/powerpc/boot/gunzip_util.c @@ -112,10 +112,10 @@ int gunzip_partial(struct gunzip_state *state, void *dst, int dstlen) r = zlib_inflate(&state->s, Z_FULL_FLUSH); if (r != Z_OK && r != Z_STREAM_END) fatal("inflate returned %d msg: %s\n\r", r, state->s.msg); - len = state->s.next_out - (unsigned char *)dst; + len = state->s.next_out - (Byte *)dst; } else { /* uncompressed image */ - len = min(state->s.avail_in, (unsigned)dstlen); + len = min(state->s.avail_in, (uLong)dstlen); memcpy(dst, state->s.next_in, len); state->s.next_in += len; state->s.avail_in -= len; diff --git a/arch/powerpc/boot/io.h b/arch/powerpc/boot/io.h index 7c09f4861fe..394da550046 100644 --- a/arch/powerpc/boot/io.h +++ b/arch/powerpc/boot/io.h @@ -1,5 +1,5 @@ #ifndef _IO_H -#define __IO_H +#define _IO_H #include "types.h" diff --git a/arch/powerpc/boot/simpleboot.c b/arch/powerpc/boot/simpleboot.c index 21cd48074ec..9f8c678f0d9 100644 --- a/arch/powerpc/boot/simpleboot.c +++ b/arch/powerpc/boot/simpleboot.c @@ -61,7 +61,7 @@ void platform_init(unsigned long r3, unsigned long r4, unsigned long r5, if (*reg++ != 0) fatal("Memory range is not based at address 0\n"); - /* get the memsize and trucate it to under 4G on 32 bit machines */ + /* get the memsize and truncate it to under 4G on 32 bit machines */ memsize64 = 0; for (i = 0; i < *ns; i++) memsize64 = (memsize64 << 32) | *reg++; diff --git a/arch/powerpc/configs/52xx/motionpro_defconfig b/arch/powerpc/configs/52xx/motionpro_defconfig index c05310a913b..c936fab9ec4 100644 --- a/arch/powerpc/configs/52xx/motionpro_defconfig +++ b/arch/powerpc/configs/52xx/motionpro_defconfig @@ -43,7 +43,6 @@ CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=32768 CONFIG_MISC_DEVICES=y CONFIG_EEPROM_LEGACY=y -CONFIG_SCSI_TGT=y CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_SG=y CONFIG_ATA=y diff --git a/arch/powerpc/configs/85xx/kmp204x_defconfig b/arch/powerpc/configs/85xx/kmp204x_defconfig index e9a81e5ba27..e362d588dfb 100644 --- a/arch/powerpc/configs/85xx/kmp204x_defconfig +++ b/arch/powerpc/configs/85xx/kmp204x_defconfig @@ -192,7 +192,6 @@ CONFIG_RTC_DRV_DS3232=y CONFIG_RTC_DRV_CMOS=y CONFIG_UIO=y CONFIG_STAGING=y -# CONFIG_NET_VENDOR_SILICOM is not set CONFIG_CLK_PPC_CORENET=y CONFIG_EXT2_FS=y CONFIG_NTFS_FS=y diff --git a/arch/powerpc/configs/86xx/mpc8610_hpcd_defconfig b/arch/powerpc/configs/86xx/mpc8610_hpcd_defconfig index bcbe7471668..9b192bb6bd3 100644 --- a/arch/powerpc/configs/86xx/mpc8610_hpcd_defconfig +++ b/arch/powerpc/configs/86xx/mpc8610_hpcd_defconfig @@ -54,7 +54,6 @@ CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=131072 CONFIG_IDE=y -CONFIG_SCSI_TGT=y CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_SG=y CONFIG_ATA=y diff --git a/arch/powerpc/configs/c2k_defconfig b/arch/powerpc/configs/c2k_defconfig index 5e2aa43562b..59734916986 100644 --- a/arch/powerpc/configs/c2k_defconfig +++ b/arch/powerpc/configs/c2k_defconfig @@ -29,6 +29,7 @@ CONFIG_PM=y CONFIG_PCI_MSI=y CONFIG_HOTPLUG_PCI=y CONFIG_HOTPLUG_PCI_SHPC=m +CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y CONFIG_XFRM_USER=y diff --git a/arch/powerpc/configs/cell_defconfig b/arch/powerpc/configs/cell_defconfig index 4bee1a6d41d..7a7b3c879f9 100644 --- a/arch/powerpc/configs/cell_defconfig +++ b/arch/powerpc/configs/cell_defconfig @@ -5,6 +5,7 @@ CONFIG_SMP=y CONFIG_NR_CPUS=4 CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y +CONFIG_FHANDLE=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=15 @@ -17,6 +18,7 @@ CONFIG_OPROFILE=m CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set +# CONFIG_PPC_POWERNV is not set # CONFIG_PPC_PSERIES is not set # CONFIG_PPC_PMAC is not set CONFIG_PPC_PS3=y diff --git a/arch/powerpc/configs/celleb_defconfig b/arch/powerpc/configs/celleb_defconfig index 6d7b22f41b5..acccbfde8a5 100644 --- a/arch/powerpc/configs/celleb_defconfig +++ b/arch/powerpc/configs/celleb_defconfig @@ -5,6 +5,7 @@ CONFIG_SMP=y CONFIG_NR_CPUS=4 CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y +CONFIG_FHANDLE=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=15 @@ -14,6 +15,7 @@ CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y CONFIG_MODVERSIONS=y CONFIG_MODULE_SRCVERSION_ALL=y +# CONFIG_PPC_POWERNV is not set # CONFIG_PPC_PSERIES is not set # CONFIG_PPC_PMAC is not set CONFIG_PPC_CELLEB=y diff --git a/arch/powerpc/configs/corenet32_smp_defconfig b/arch/powerpc/configs/corenet32_smp_defconfig index c19ff057d0f..688e9e4d29a 100644 --- a/arch/powerpc/configs/corenet32_smp_defconfig +++ b/arch/powerpc/configs/corenet32_smp_defconfig @@ -139,8 +139,9 @@ CONFIG_EDAC=y CONFIG_EDAC_MM_EDAC=y CONFIG_EDAC_MPC85XX=y CONFIG_RTC_CLASS=y +CONFIG_RTC_DRV_DS1307=y +CONFIG_RTC_DRV_DS1374=y CONFIG_RTC_DRV_DS3232=y -CONFIG_RTC_DRV_CMOS=y CONFIG_UIO=y CONFIG_STAGING=y CONFIG_VIRT_DRIVERS=y @@ -164,6 +165,8 @@ CONFIG_NFS_FS=y CONFIG_NFS_V4=y CONFIG_ROOT_NFS=y CONFIG_NFSD=m +CONFIG_NLS_CODEPAGE_437=y +CONFIG_NLS_CODEPAGE_850=y CONFIG_NLS_ISO8859_1=y CONFIG_NLS_UTF8=m CONFIG_MAGIC_SYSRQ=y @@ -179,3 +182,4 @@ CONFIG_CRYPTO_SHA512=y CONFIG_CRYPTO_AES=y # CONFIG_CRYPTO_ANSI_CPRNG is not set CONFIG_CRYPTO_DEV_FSL_CAAM=y +CONFIG_FSL_CORENET_CF=y diff --git a/arch/powerpc/configs/corenet64_smp_defconfig b/arch/powerpc/configs/corenet64_smp_defconfig index 5c7fa19ae4e..6db97e4414b 100644 --- a/arch/powerpc/configs/corenet64_smp_defconfig +++ b/arch/powerpc/configs/corenet64_smp_defconfig @@ -4,6 +4,7 @@ CONFIG_ALTIVEC=y CONFIG_SMP=y CONFIG_NR_CPUS=24 CONFIG_SYSVIPC=y +CONFIG_FHANDLE=y CONFIG_IRQ_DOMAIN_DEBUG=y CONFIG_NO_HZ=y CONFIG_HIGH_RES_TIMERS=y @@ -49,7 +50,6 @@ CONFIG_NET_IPIP=y CONFIG_IP_MROUTE=y CONFIG_IP_PIMSM_V1=y CONFIG_IP_PIMSM_V2=y -CONFIG_ARPD=y CONFIG_INET_ESP=y # CONFIG_INET_XFRM_MODE_BEET is not set # CONFIG_INET_LRO is not set @@ -59,33 +59,17 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y CONFIG_MTD=y -CONFIG_MTD_OF_PARTS=y CONFIG_MTD_CMDLINE_PARTS=y -CONFIG_MTD_CHAR=y -CONFIG_MTD_BLKDEVS=y CONFIG_MTD_BLOCK=y CONFIG_FTL=y CONFIG_MTD_CFI=y -CONFIG_MTD_GEN_PROBE=y -CONFIG_MTD_MAP_BANK_WIDTH_1=y -CONFIG_MTD_MAP_BANK_WIDTH_2=y -CONFIG_MTD_MAP_BANK_WIDTH_4=y -CONFIG_MTD_CFI_I1=y -CONFIG_MTD_CFI_I2=y CONFIG_MTD_CFI_INTELEXT=y CONFIG_MTD_CFI_AMDSTD=y CONFIG_MTD_PHYSMAP_OF=y -CONFIG_MTD_M25P80=y -CONFIG_MTD_CFI_UTIL=y -CONFIG_MTD_NAND_ECC=y CONFIG_MTD_NAND=y -CONFIG_MTD_NAND_IDS=y CONFIG_MTD_NAND_FSL_ELBC=y CONFIG_MTD_NAND_FSL_IFC=y CONFIG_MTD_UBI=y -CONFIG_MTD_UBI_WL_THRESHOLD=4096 -CONFIG_MTD_UBI_BEB_RESERVE=1 -CONFIG_PROC_DEVICETREE=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=131072 @@ -101,6 +85,7 @@ CONFIG_INPUT_FF_MEMLESS=m # CONFIG_INPUT_KEYBOARD is not set # CONFIG_INPUT_MOUSE is not set CONFIG_SERIO_LIBPS2=y +CONFIG_PPC_EPAPR_HV_BYTECHAN=y CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y CONFIG_SERIAL_8250_MANY_PORTS=y @@ -114,7 +99,6 @@ CONFIG_SPI_GPIO=y CONFIG_SPI_FSL_SPI=y CONFIG_SPI_FSL_ESPI=y # CONFIG_HWMON is not set -CONFIG_VIDEO_OUTPUT_CONTROL=y CONFIG_USB_HID=m CONFIG_USB=y CONFIG_USB_MON=y @@ -125,8 +109,15 @@ CONFIG_MMC=y CONFIG_MMC_SDHCI=y CONFIG_EDAC=y CONFIG_EDAC_MM_EDAC=y +CONFIG_RTC_CLASS=y +CONFIG_RTC_DRV_DS1307=y +CONFIG_RTC_DRV_DS1374=y +CONFIG_RTC_DRV_DS3232=y CONFIG_DMADEVICES=y CONFIG_FSL_DMA=y +CONFIG_VIRT_DRIVERS=y +CONFIG_FSL_HV_MANAGER=y +CONFIG_FSL_CORENET_CF=y CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y CONFIG_ISO9660_FS=m @@ -139,35 +130,24 @@ CONFIG_NTFS_FS=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_HUGETLBFS=y -CONFIG_MISC_FILESYSTEMS=y CONFIG_JFFS2_FS=y CONFIG_JFFS2_FS_DEBUG=1 -CONFIG_JFFS2_FS_WRITEBUFFER=y -CONFIG_JFFS2_ZLIB=y -CONFIG_JFFS2_RTIME=y CONFIG_UBIFS_FS=y -CONFIG_UBIFS_FS_XATTR=y -CONFIG_UBIFS_FS_LZO=y -CONFIG_UBIFS_FS_ZLIB=y CONFIG_NFS_FS=y CONFIG_NFS_V4=y CONFIG_ROOT_NFS=y CONFIG_NFSD=m +CONFIG_NLS_CODEPAGE_437=y +CONFIG_NLS_CODEPAGE_850=y CONFIG_NLS_ISO8859_1=y CONFIG_NLS_UTF8=m CONFIG_CRC_T10DIF=y -CONFIG_CRC16=y -CONFIG_ZLIB_DEFLATE=y -CONFIG_LZO_COMPRESS=y -CONFIG_LZO_DECOMPRESS=y -CONFIG_CRYPTO_DEFLATE=y -CONFIG_CRYPTO_LZO=y +CONFIG_DEBUG_INFO=y CONFIG_FRAME_WARN=1024 -CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_FS=y +CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_SHIRQ=y CONFIG_DETECT_HUNG_TASK=y -CONFIG_DEBUG_INFO=y CONFIG_CRYPTO_NULL=y CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_MD4=y diff --git a/arch/powerpc/configs/g5_defconfig b/arch/powerpc/configs/g5_defconfig index 3c72fa615bd..6fab06f7f41 100644 --- a/arch/powerpc/configs/g5_defconfig +++ b/arch/powerpc/configs/g5_defconfig @@ -5,6 +5,7 @@ CONFIG_NR_CPUS=4 CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y +CONFIG_FHANDLE=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_BLK_DEV_INITRD=y @@ -15,6 +16,7 @@ CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y CONFIG_MODVERSIONS=y CONFIG_MODULE_SRCVERSION_ALL=y +# CONFIG_PPC_POWERNV is not set # CONFIG_PPC_PSERIES is not set CONFIG_CPU_FREQ=y CONFIG_CPU_FREQ_GOV_POWERSAVE=y diff --git a/arch/powerpc/configs/maple_defconfig b/arch/powerpc/configs/maple_defconfig index 95e545d9f25..fbd9e416331 100644 --- a/arch/powerpc/configs/maple_defconfig +++ b/arch/powerpc/configs/maple_defconfig @@ -4,6 +4,7 @@ CONFIG_NR_CPUS=4 CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y +CONFIG_FHANDLE=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y # CONFIG_COMPAT_BRK is not set @@ -15,6 +16,7 @@ CONFIG_MODULE_UNLOAD=y CONFIG_MODVERSIONS=y CONFIG_MODULE_SRCVERSION_ALL=y # CONFIG_BLK_DEV_BSG is not set +# CONFIG_PPC_POWERNV is not set # CONFIG_PPC_PSERIES is not set # CONFIG_PPC_PMAC is not set CONFIG_PPC_MAPLE=y diff --git a/arch/powerpc/configs/mpc5200_defconfig b/arch/powerpc/configs/mpc5200_defconfig index 530601e8ccf..69fd8adf9f5 100644 --- a/arch/powerpc/configs/mpc5200_defconfig +++ b/arch/powerpc/configs/mpc5200_defconfig @@ -47,7 +47,6 @@ CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=32768 CONFIG_EEPROM_AT24=y -CONFIG_SCSI_TGT=y CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_SG=y CONFIG_ATA=y diff --git a/arch/powerpc/configs/mpc85xx_defconfig b/arch/powerpc/configs/mpc85xx_defconfig index 55765c8cb08..d2c415489f7 100644 --- a/arch/powerpc/configs/mpc85xx_defconfig +++ b/arch/powerpc/configs/mpc85xx_defconfig @@ -209,8 +209,10 @@ CONFIG_MMC_SDHCI_OF_ESDHC=y CONFIG_EDAC=y CONFIG_EDAC_MM_EDAC=y CONFIG_RTC_CLASS=y -CONFIG_RTC_DRV_CMOS=y CONFIG_RTC_DRV_DS1307=y +CONFIG_RTC_DRV_DS1374=y +CONFIG_RTC_DRV_DS3232=y +CONFIG_RTC_DRV_CMOS=y CONFIG_DMADEVICES=y CONFIG_FSL_DMA=y # CONFIG_NET_DMA is not set @@ -224,6 +226,9 @@ CONFIG_UDF_FS=m CONFIG_MSDOS_FS=m CONFIG_VFAT_FS=y CONFIG_NTFS_FS=y +CONFIG_NLS_CODEPAGE_437=y +CONFIG_NLS_CODEPAGE_850=y +CONFIG_NLS_ISO8859_1=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_HUGETLBFS=y diff --git a/arch/powerpc/configs/mpc85xx_smp_defconfig b/arch/powerpc/configs/mpc85xx_smp_defconfig index 5c6ecdc0f70..87460083dbc 100644 --- a/arch/powerpc/configs/mpc85xx_smp_defconfig +++ b/arch/powerpc/configs/mpc85xx_smp_defconfig @@ -210,8 +210,10 @@ CONFIG_MMC_SDHCI_OF_ESDHC=y CONFIG_EDAC=y CONFIG_EDAC_MM_EDAC=y CONFIG_RTC_CLASS=y -CONFIG_RTC_DRV_CMOS=y CONFIG_RTC_DRV_DS1307=y +CONFIG_RTC_DRV_DS1374=y +CONFIG_RTC_DRV_DS3232=y +CONFIG_RTC_DRV_CMOS=y CONFIG_DMADEVICES=y CONFIG_FSL_DMA=y # CONFIG_NET_DMA is not set @@ -225,6 +227,9 @@ CONFIG_UDF_FS=m CONFIG_MSDOS_FS=m CONFIG_VFAT_FS=y CONFIG_NTFS_FS=y +CONFIG_NLS_CODEPAGE_437=y +CONFIG_NLS_CODEPAGE_850=y +CONFIG_NLS_ISO8859_1=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_HUGETLBFS=y diff --git a/arch/powerpc/configs/mpc86xx_defconfig b/arch/powerpc/configs/mpc86xx_defconfig index 35595ea74ff..fc58aa8a89e 100644 --- a/arch/powerpc/configs/mpc86xx_defconfig +++ b/arch/powerpc/configs/mpc86xx_defconfig @@ -145,6 +145,9 @@ CONFIG_UDF_FS=m CONFIG_MSDOS_FS=m CONFIG_VFAT_FS=y CONFIG_NTFS_FS=y +CONFIG_NLS_CODEPAGE_437=y +CONFIG_NLS_CODEPAGE_850=y +CONFIG_NLS_ISO8859_1=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_ADFS_FS=m diff --git a/arch/powerpc/configs/pasemi_defconfig b/arch/powerpc/configs/pasemi_defconfig index cec044a3ff6..3e72c8c06a0 100644 --- a/arch/powerpc/configs/pasemi_defconfig +++ b/arch/powerpc/configs/pasemi_defconfig @@ -3,6 +3,7 @@ CONFIG_ALTIVEC=y CONFIG_SMP=y CONFIG_NR_CPUS=2 CONFIG_SYSVIPC=y +CONFIG_FHANDLE=y CONFIG_NO_HZ=y CONFIG_HIGH_RES_TIMERS=y CONFIG_BLK_DEV_INITRD=y @@ -13,6 +14,7 @@ CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set CONFIG_PARTITION_ADVANCED=y CONFIG_MAC_PARTITION=y +# CONFIG_PPC_POWERNV is not set # CONFIG_PPC_PSERIES is not set # CONFIG_PPC_PMAC is not set CONFIG_PPC_PASEMI=y diff --git a/arch/powerpc/configs/pmac32_defconfig b/arch/powerpc/configs/pmac32_defconfig index 553e6627801..0351b5ffdfe 100644 --- a/arch/powerpc/configs/pmac32_defconfig +++ b/arch/powerpc/configs/pmac32_defconfig @@ -31,6 +31,7 @@ CONFIG_HIBERNATION=y CONFIG_APM_EMULATION=y CONFIG_PCCARD=m CONFIG_YENTA=m +CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y CONFIG_XFRM_USER=y diff --git a/arch/powerpc/configs/ppc44x_defconfig b/arch/powerpc/configs/ppc44x_defconfig index ccf66b9060a..924e10df184 100644 --- a/arch/powerpc/configs/ppc44x_defconfig +++ b/arch/powerpc/configs/ppc44x_defconfig @@ -127,4 +127,3 @@ CONFIG_CRYPTO_PCBC=y # CONFIG_CRYPTO_ANSI_CPRNG is not set # CONFIG_CRYPTO_HW is not set CONFIG_VIRTUALIZATION=y -CONFIG_KVM_440=y diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig index f26b267eb71..20bc5e2d368 100644 --- a/arch/powerpc/configs/ppc64_defconfig +++ b/arch/powerpc/configs/ppc64_defconfig @@ -4,6 +4,7 @@ CONFIG_VSX=y CONFIG_SMP=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y +CONFIG_FHANDLE=y CONFIG_IRQ_DOMAIN_DEBUG=y CONFIG_NO_HZ=y CONFIG_HIGH_RES_TIMERS=y @@ -49,6 +50,7 @@ CONFIG_HZ_100=y CONFIG_BINFMT_MISC=m CONFIG_PPC_TRANSACTIONAL_MEM=y CONFIG_KEXEC=y +CONFIG_CRASH_DUMP=y CONFIG_IRQ_ALL_CPUS=y CONFIG_MEMORY_HOTREMOVE=y CONFIG_SCHED_SMT=y @@ -57,6 +59,7 @@ CONFIG_ELECTRA_CF=y CONFIG_HOTPLUG_PCI=y CONFIG_HOTPLUG_PCI_RPA=m CONFIG_HOTPLUG_PCI_RPA_DLPAR=m +CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y CONFIG_XFRM_USER=m diff --git a/arch/powerpc/configs/ppc64e_defconfig b/arch/powerpc/configs/ppc64e_defconfig index 438e813dc9c..c3a3269b086 100644 --- a/arch/powerpc/configs/ppc64e_defconfig +++ b/arch/powerpc/configs/ppc64e_defconfig @@ -3,6 +3,7 @@ CONFIG_PPC_BOOK3E_64=y CONFIG_SMP=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y +CONFIG_FHANDLE=y CONFIG_NO_HZ=y CONFIG_HIGH_RES_TIMERS=y CONFIG_TASKSTATS=y @@ -32,6 +33,7 @@ CONFIG_SPARSEMEM_MANUAL=y CONFIG_PCI_MSI=y CONFIG_PCCARD=y CONFIG_HOTPLUG_PCI=y +CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y CONFIG_XFRM_USER=m diff --git a/arch/powerpc/configs/ppc6xx_defconfig b/arch/powerpc/configs/ppc6xx_defconfig index c9106694484..fec5870f181 100644 --- a/arch/powerpc/configs/ppc6xx_defconfig +++ b/arch/powerpc/configs/ppc6xx_defconfig @@ -425,10 +425,8 @@ CONFIG_SCSI_LOGGING=y CONFIG_SCSI_SCAN_ASYNC=y CONFIG_SCSI_SPI_ATTRS=m CONFIG_SCSI_SRP_ATTRS=m -CONFIG_SCSI_SRP_TGT_ATTRS=y CONFIG_SCSI_MESH=m CONFIG_SCSI_MAC53C94=m -CONFIG_SCSI_SRP=m CONFIG_SCSI_LOWLEVEL_PCMCIA=y CONFIG_SCSI_DH=y CONFIG_SCSI_DH_RDAC=m diff --git a/arch/powerpc/configs/ps3_defconfig b/arch/powerpc/configs/ps3_defconfig index fdee37fab81..2e637c881d2 100644 --- a/arch/powerpc/configs/ps3_defconfig +++ b/arch/powerpc/configs/ps3_defconfig @@ -5,6 +5,7 @@ CONFIG_SMP=y CONFIG_NR_CPUS=2 CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y +CONFIG_FHANDLE=y CONFIG_HIGH_RES_TIMERS=y CONFIG_BLK_DEV_INITRD=y CONFIG_RD_LZMA=y diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig index a905063281c..dd2a9cab4b5 100644 --- a/arch/powerpc/configs/pseries_defconfig +++ b/arch/powerpc/configs/pseries_defconfig @@ -5,6 +5,7 @@ CONFIG_SMP=y CONFIG_NR_CPUS=2048 CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y +CONFIG_FHANDLE=y CONFIG_AUDIT=y CONFIG_AUDITSYSCALL=y CONFIG_IRQ_DOMAIN_DEBUG=y @@ -52,6 +53,7 @@ CONFIG_SCHED_SMT=y CONFIG_HOTPLUG_PCI=y CONFIG_HOTPLUG_PCI_RPA=m CONFIG_HOTPLUG_PCI_RPA_DLPAR=m +CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y CONFIG_XFRM_USER=m diff --git a/arch/powerpc/configs/pseries_le_defconfig b/arch/powerpc/configs/pseries_le_defconfig index 58e3dbf43ca..63392f4b29a 100644 --- a/arch/powerpc/configs/pseries_le_defconfig +++ b/arch/powerpc/configs/pseries_le_defconfig @@ -6,6 +6,7 @@ CONFIG_NR_CPUS=2048 CONFIG_CPU_LITTLE_ENDIAN=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y +CONFIG_FHANDLE=y CONFIG_AUDIT=y CONFIG_AUDITSYSCALL=y CONFIG_IRQ_DOMAIN_DEBUG=y @@ -54,6 +55,7 @@ CONFIG_SCHED_SMT=y CONFIG_HOTPLUG_PCI=y CONFIG_HOTPLUG_PCI_RPA=m CONFIG_HOTPLUG_PCI_RPA_DLPAR=m +CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y CONFIG_XFRM_USER=m diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild index 3fb1bc432f4..31e8f59aff3 100644 --- a/arch/powerpc/include/asm/Kbuild +++ b/arch/powerpc/include/asm/Kbuild @@ -1,8 +1,10 @@ generic-y += clkdev.h generic-y += hash.h +generic-y += irq_work.h generic-y += mcs_spinlock.h generic-y += preempt.h generic-y += rwsem.h +generic-y += scatterlist.h generic-y += trace_clock.h generic-y += vtime.h diff --git a/arch/powerpc/include/asm/asm-compat.h b/arch/powerpc/include/asm/asm-compat.h index 4b237aa3566..21be8ae8f80 100644 --- a/arch/powerpc/include/asm/asm-compat.h +++ b/arch/powerpc/include/asm/asm-compat.h @@ -34,10 +34,14 @@ #define PPC_MIN_STKFRM 112 #ifdef __BIG_ENDIAN__ +#define LWZX_BE stringify_in_c(lwzx) #define LDX_BE stringify_in_c(ldx) +#define STWX_BE stringify_in_c(stwx) #define STDX_BE stringify_in_c(stdx) #else +#define LWZX_BE stringify_in_c(lwbrx) #define LDX_BE stringify_in_c(ldbrx) +#define STWX_BE stringify_in_c(stwbrx) #define STDX_BE stringify_in_c(stdbrx) #endif diff --git a/arch/powerpc/include/asm/bug.h b/arch/powerpc/include/asm/bug.h index 3eb53d74107..3a39283333c 100644 --- a/arch/powerpc/include/asm/bug.h +++ b/arch/powerpc/include/asm/bug.h @@ -133,7 +133,6 @@ extern int do_page_fault(struct pt_regs *, unsigned long, unsigned long); extern void bad_page_fault(struct pt_regs *, unsigned long, int); extern void _exception(int, struct pt_regs *, int, unsigned long); extern void die(const char *, struct pt_regs *, long); -extern void print_backtrace(unsigned long *); #endif /* !__ASSEMBLY__ */ diff --git a/arch/powerpc/include/asm/cache.h b/arch/powerpc/include/asm/cache.h index ed0afc1e44a..34a05a1a990 100644 --- a/arch/powerpc/include/asm/cache.h +++ b/arch/powerpc/include/asm/cache.h @@ -3,6 +3,7 @@ #ifdef __KERNEL__ +#include <asm/reg.h> /* bytes per L1 cache line */ #if defined(CONFIG_8xx) || defined(CONFIG_403GCX) @@ -39,6 +40,12 @@ struct ppc64_caches { }; extern struct ppc64_caches ppc64_caches; + +static inline void logmpp(u64 x) +{ + asm volatile(PPC_LOGMPP(R1) : : "r" (x)); +} + #endif /* __powerpc64__ && ! __ASSEMBLY__ */ #if defined(__ASSEMBLY__) diff --git a/arch/powerpc/include/asm/copro.h b/arch/powerpc/include/asm/copro.h new file mode 100644 index 00000000000..ce216df3138 --- /dev/null +++ b/arch/powerpc/include/asm/copro.h @@ -0,0 +1,29 @@ +/* + * Copyright 2014 IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _ASM_POWERPC_COPRO_H +#define _ASM_POWERPC_COPRO_H + +struct copro_slb +{ + u64 esid, vsid; +}; + +int copro_handle_mm_fault(struct mm_struct *mm, unsigned long ea, + unsigned long dsisr, unsigned *flt); + +int copro_calculate_slb(struct mm_struct *mm, u64 ea, struct copro_slb *slb); + + +#ifdef CONFIG_PPC_COPRO_BASE +void copro_flush_all_slbs(struct mm_struct *mm); +#else +static inline void copro_flush_all_slbs(struct mm_struct *mm) {} +#endif +#endif /* _ASM_POWERPC_COPRO_H */ diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h index 0fdd7eece6d..daa5af91163 100644 --- a/arch/powerpc/include/asm/cputable.h +++ b/arch/powerpc/include/asm/cputable.h @@ -195,8 +195,7 @@ extern const char *powerpc_base_platform; #define CPU_FTR_PPCAS_ARCH_V2 (CPU_FTR_NOEXECUTE | CPU_FTR_NODSISRALIGN) -#define MMU_FTR_PPCAS_ARCH_V2 (MMU_FTR_SLB | MMU_FTR_TLBIEL | \ - MMU_FTR_16M_PAGE) +#define MMU_FTR_PPCAS_ARCH_V2 (MMU_FTR_TLBIEL | MMU_FTR_16M_PAGE) /* We only set the altivec features if the kernel was compiled with altivec * support @@ -268,10 +267,6 @@ extern const char *powerpc_base_platform; #define CPU_FTR_MAYBE_CAN_NAP 0 #endif -#define CLASSIC_PPC (!defined(CONFIG_8xx) && !defined(CONFIG_4xx) && \ - !defined(CONFIG_POWER3) && !defined(CONFIG_POWER4) && \ - !defined(CONFIG_BOOKE)) - #define CPU_FTRS_PPC601 (CPU_FTR_COMMON | CPU_FTR_601 | \ CPU_FTR_COHERENT_ICACHE | CPU_FTR_UNIFIED_ID_CACHE) #define CPU_FTRS_603 (CPU_FTR_COMMON | \ @@ -396,15 +391,10 @@ extern const char *powerpc_base_platform; CPU_FTR_L2CSR | CPU_FTR_LWSYNC | CPU_FTR_NOEXECUTE | \ CPU_FTR_DBELL | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \ CPU_FTR_DEBUG_LVL_EXC | CPU_FTR_EMB_HV | CPU_FTR_ALTIVEC_COMP | \ - CPU_FTR_CELL_TB_BUG) + CPU_FTR_CELL_TB_BUG | CPU_FTR_SMT) #define CPU_FTRS_GENERIC_32 (CPU_FTR_COMMON | CPU_FTR_NODSISRALIGN) /* 64-bit CPUs */ -#define CPU_FTRS_POWER3 (CPU_FTR_USE_TB | \ - CPU_FTR_IABR | CPU_FTR_PPC_LE) -#define CPU_FTRS_RS64 (CPU_FTR_USE_TB | \ - CPU_FTR_IABR | \ - CPU_FTR_MMCRA | CPU_FTR_CTRL) #define CPU_FTRS_POWER4 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \ CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \ CPU_FTR_MMCRA | CPU_FTR_CP_USE_DCBTZ | \ @@ -467,15 +457,15 @@ extern const char *powerpc_base_platform; #define CPU_FTRS_POSSIBLE (CPU_FTRS_E6500 | CPU_FTRS_E5500 | CPU_FTRS_A2) #else #define CPU_FTRS_POSSIBLE \ - (CPU_FTRS_POWER3 | CPU_FTRS_RS64 | CPU_FTRS_POWER4 | \ - CPU_FTRS_PPC970 | CPU_FTRS_POWER5 | CPU_FTRS_POWER6 | \ - CPU_FTRS_POWER7 | CPU_FTRS_POWER8E | CPU_FTRS_POWER8 | \ - CPU_FTRS_CELL | CPU_FTRS_PA6T | CPU_FTR_VSX) + (CPU_FTRS_POWER4 | CPU_FTRS_PPC970 | CPU_FTRS_POWER5 | \ + CPU_FTRS_POWER6 | CPU_FTRS_POWER7 | CPU_FTRS_POWER8E | \ + CPU_FTRS_POWER8 | CPU_FTRS_POWER8_DD1 | CPU_FTRS_CELL | \ + CPU_FTRS_PA6T | CPU_FTR_VSX) #endif #else enum { CPU_FTRS_POSSIBLE = -#if CLASSIC_PPC +#ifdef CONFIG_PPC_BOOK3S_32 CPU_FTRS_PPC601 | CPU_FTRS_603 | CPU_FTRS_604 | CPU_FTRS_740_NOTAU | CPU_FTRS_740 | CPU_FTRS_750 | CPU_FTRS_750FX1 | CPU_FTRS_750FX2 | CPU_FTRS_750FX | CPU_FTRS_750GX | @@ -518,14 +508,15 @@ enum { #define CPU_FTRS_ALWAYS (CPU_FTRS_E6500 & CPU_FTRS_E5500 & CPU_FTRS_A2) #else #define CPU_FTRS_ALWAYS \ - (CPU_FTRS_POWER3 & CPU_FTRS_RS64 & CPU_FTRS_POWER4 & \ - CPU_FTRS_PPC970 & CPU_FTRS_POWER5 & CPU_FTRS_POWER6 & \ - CPU_FTRS_POWER7 & CPU_FTRS_CELL & CPU_FTRS_PA6T & CPU_FTRS_POSSIBLE) + (CPU_FTRS_POWER4 & CPU_FTRS_PPC970 & CPU_FTRS_POWER5 & \ + CPU_FTRS_POWER6 & CPU_FTRS_POWER7 & CPU_FTRS_CELL & \ + CPU_FTRS_PA6T & CPU_FTRS_POWER8 & CPU_FTRS_POWER8E & \ + CPU_FTRS_POWER8_DD1 & CPU_FTRS_POSSIBLE) #endif #else enum { CPU_FTRS_ALWAYS = -#if CLASSIC_PPC +#ifdef CONFIG_PPC_BOOK3S_32 CPU_FTRS_PPC601 & CPU_FTRS_603 & CPU_FTRS_604 & CPU_FTRS_740_NOTAU & CPU_FTRS_740 & CPU_FTRS_750 & CPU_FTRS_750FX1 & CPU_FTRS_750FX2 & CPU_FTRS_750FX & CPU_FTRS_750GX & diff --git a/arch/powerpc/include/asm/dma-mapping.h b/arch/powerpc/include/asm/dma-mapping.h index 150866b2a3f..894d538f356 100644 --- a/arch/powerpc/include/asm/dma-mapping.h +++ b/arch/powerpc/include/asm/dma-mapping.h @@ -135,6 +135,7 @@ static inline int dma_supported(struct device *dev, u64 mask) extern int dma_set_mask(struct device *dev, u64 dma_mask); extern int __dma_set_mask(struct device *dev, u64 dma_mask); +extern u64 __dma_get_required_mask(struct device *dev); #define dma_alloc_coherent(d,s,h,f) dma_alloc_attrs(d,s,h,f,NULL) diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index fab7743c264..3b260efbfbf 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -25,6 +25,7 @@ #include <linux/list.h> #include <linux/string.h> #include <linux/time.h> +#include <linux/atomic.h> struct pci_dev; struct pci_bus; @@ -33,10 +34,11 @@ struct device_node; #ifdef CONFIG_EEH /* EEH subsystem flags */ -#define EEH_ENABLED 0x1 /* EEH enabled */ -#define EEH_FORCE_DISABLED 0x2 /* EEH disabled */ -#define EEH_PROBE_MODE_DEV 0x4 /* From PCI device */ -#define EEH_PROBE_MODE_DEVTREE 0x8 /* From device tree */ +#define EEH_ENABLED 0x01 /* EEH enabled */ +#define EEH_FORCE_DISABLED 0x02 /* EEH disabled */ +#define EEH_PROBE_MODE_DEV 0x04 /* From PCI device */ +#define EEH_PROBE_MODE_DEVTREE 0x08 /* From device tree */ +#define EEH_ENABLE_IO_FOR_LOG 0x10 /* Enable IO for log */ /* * Delay for PE reset, all in ms @@ -84,7 +86,9 @@ struct eeh_pe { int freeze_count; /* Times of froze up */ struct timeval tstamp; /* Time on first-time freeze */ int false_positives; /* Times of reported #ff's */ + atomic_t pass_dev_cnt; /* Count of passed through devs */ struct eeh_pe *parent; /* Parent PE */ + void *data; /* PE auxillary data */ struct list_head child_list; /* Link PE to the child list */ struct list_head edevs; /* Link list of EEH devices */ struct list_head child; /* Child PEs */ @@ -93,6 +97,11 @@ struct eeh_pe { #define eeh_pe_for_each_dev(pe, edev, tmp) \ list_for_each_entry_safe(edev, tmp, &pe->edevs, list) +static inline bool eeh_pe_passed(struct eeh_pe *pe) +{ + return pe ? !!atomic_read(&pe->pass_dev_cnt) : false; +} + /* * The struct is used to trace EEH state for the associated * PCI device node or PCI device. In future, it might @@ -137,6 +146,11 @@ static inline struct pci_dev *eeh_dev_to_pci_dev(struct eeh_dev *edev) return edev ? edev->pdev : NULL; } +static inline struct eeh_pe *eeh_dev_to_pe(struct eeh_dev* edev) +{ + return edev ? edev->pe : NULL; +} + /* Return values from eeh_ops::next_error */ enum { EEH_NEXT_ERR_NONE = 0, @@ -158,6 +172,7 @@ enum { #define EEH_OPT_ENABLE 1 /* EEH enable */ #define EEH_OPT_THAW_MMIO 2 /* MMIO enable */ #define EEH_OPT_THAW_DMA 3 /* DMA enable */ +#define EEH_OPT_FREEZE_PE 4 /* Freeze PE */ #define EEH_STATE_UNAVAILABLE (1 << 0) /* State unavailable */ #define EEH_STATE_NOT_SUPPORT (1 << 1) /* EEH not supported */ #define EEH_STATE_RESET_ACTIVE (1 << 2) /* Active reset */ @@ -165,6 +180,11 @@ enum { #define EEH_STATE_DMA_ACTIVE (1 << 4) /* Active DMA */ #define EEH_STATE_MMIO_ENABLED (1 << 5) /* MMIO enabled */ #define EEH_STATE_DMA_ENABLED (1 << 6) /* DMA enabled */ +#define EEH_PE_STATE_NORMAL 0 /* Normal state */ +#define EEH_PE_STATE_RESET 1 /* PE reset asserted */ +#define EEH_PE_STATE_STOPPED_IO_DMA 2 /* Frozen PE */ +#define EEH_PE_STATE_STOPPED_DMA 4 /* Stopped DMA, Enabled IO */ +#define EEH_PE_STATE_UNAVAIL 5 /* Unavailable */ #define EEH_RESET_DEACTIVATE 0 /* Deactivate the PE reset */ #define EEH_RESET_HOT 1 /* Hot reset */ #define EEH_RESET_FUNDAMENTAL 3 /* Fundamental reset */ @@ -184,6 +204,8 @@ struct eeh_ops { int (*wait_state)(struct eeh_pe *pe, int max_wait); int (*get_log)(struct eeh_pe *pe, int severity, char *drv_log, unsigned long len); int (*configure_bridge)(struct eeh_pe *pe); + int (*err_inject)(struct eeh_pe *pe, int type, int func, + unsigned long addr, unsigned long mask); int (*read_config)(struct device_node *dn, int where, int size, u32 *val); int (*write_config)(struct device_node *dn, int where, int size, u32 val); int (*next_error)(struct eeh_pe **pe); @@ -194,36 +216,28 @@ extern int eeh_subsystem_flags; extern struct eeh_ops *eeh_ops; extern raw_spinlock_t confirm_error_lock; -static inline bool eeh_enabled(void) +static inline void eeh_add_flag(int flag) { - if ((eeh_subsystem_flags & EEH_FORCE_DISABLED) || - !(eeh_subsystem_flags & EEH_ENABLED)) - return false; - - return true; + eeh_subsystem_flags |= flag; } -static inline void eeh_set_enable(bool mode) +static inline void eeh_clear_flag(int flag) { - if (mode) - eeh_subsystem_flags |= EEH_ENABLED; - else - eeh_subsystem_flags &= ~EEH_ENABLED; + eeh_subsystem_flags &= ~flag; } -static inline void eeh_probe_mode_set(int flag) +static inline bool eeh_has_flag(int flag) { - eeh_subsystem_flags |= flag; + return !!(eeh_subsystem_flags & flag); } -static inline int eeh_probe_mode_devtree(void) +static inline bool eeh_enabled(void) { - return (eeh_subsystem_flags & EEH_PROBE_MODE_DEVTREE); -} + if (eeh_has_flag(EEH_FORCE_DISABLED) || + !eeh_has_flag(EEH_ENABLED)) + return false; -static inline int eeh_probe_mode_dev(void) -{ - return (eeh_subsystem_flags & EEH_PROBE_MODE_DEV); + return true; } static inline void eeh_serialize_lock(unsigned long *flags) @@ -243,6 +257,7 @@ static inline void eeh_serialize_unlock(unsigned long flags) #define EEH_MAX_ALLOWED_FREEZES 5 typedef void *(*eeh_traverse_func)(void *data, void *flag); +void eeh_set_pe_aux_size(int size); int eeh_phb_pe_create(struct pci_controller *phb); struct eeh_pe *eeh_phb_pe_get(struct pci_controller *phb); struct eeh_pe *eeh_pe_get(struct eeh_dev *edev); @@ -262,8 +277,7 @@ void eeh_dev_phb_init_dynamic(struct pci_controller *phb); int eeh_init(void); int __init eeh_ops_register(struct eeh_ops *ops); int __exit eeh_ops_unregister(const char *name); -unsigned long eeh_check_failure(const volatile void __iomem *token, - unsigned long val); +int eeh_check_failure(const volatile void __iomem *token); int eeh_dev_check_failure(struct eeh_dev *edev); void eeh_addr_cache_build(void); void eeh_add_device_early(struct device_node *); @@ -272,6 +286,15 @@ void eeh_add_device_late(struct pci_dev *); void eeh_add_device_tree_late(struct pci_bus *); void eeh_add_sysfs_files(struct pci_bus *); void eeh_remove_device(struct pci_dev *); +int eeh_unfreeze_pe(struct eeh_pe *pe, bool sw_state); +int eeh_pe_reset_and_recover(struct eeh_pe *pe); +int eeh_dev_open(struct pci_dev *pdev); +void eeh_dev_release(struct pci_dev *pdev); +struct eeh_pe *eeh_iommu_group_to_pe(struct iommu_group *group); +int eeh_pe_set_option(struct eeh_pe *pe, int option); +int eeh_pe_get_state(struct eeh_pe *pe); +int eeh_pe_reset(struct eeh_pe *pe, int option); +int eeh_pe_configure(struct eeh_pe *pe); /** * EEH_POSSIBLE_ERROR() -- test for possible MMIO failure. @@ -295,8 +318,6 @@ static inline bool eeh_enabled(void) return false; } -static inline void eeh_set_enable(bool mode) { } - static inline int eeh_init(void) { return 0; @@ -309,9 +330,9 @@ static inline void *eeh_dev_init(struct device_node *dn, void *data) static inline void eeh_dev_phb_init_dynamic(struct pci_controller *phb) { } -static inline unsigned long eeh_check_failure(const volatile void __iomem *token, unsigned long val) +static inline int eeh_check_failure(const volatile void __iomem *token) { - return val; + return 0; } #define eeh_dev_check_failure(x) (0) @@ -342,7 +363,7 @@ static inline u8 eeh_readb(const volatile void __iomem *addr) { u8 val = in_8(addr); if (EEH_POSSIBLE_ERROR(val, u8)) - return eeh_check_failure(addr, val); + eeh_check_failure(addr); return val; } @@ -350,7 +371,7 @@ static inline u16 eeh_readw(const volatile void __iomem *addr) { u16 val = in_le16(addr); if (EEH_POSSIBLE_ERROR(val, u16)) - return eeh_check_failure(addr, val); + eeh_check_failure(addr); return val; } @@ -358,7 +379,7 @@ static inline u32 eeh_readl(const volatile void __iomem *addr) { u32 val = in_le32(addr); if (EEH_POSSIBLE_ERROR(val, u32)) - return eeh_check_failure(addr, val); + eeh_check_failure(addr); return val; } @@ -366,7 +387,7 @@ static inline u64 eeh_readq(const volatile void __iomem *addr) { u64 val = in_le64(addr); if (EEH_POSSIBLE_ERROR(val, u64)) - return eeh_check_failure(addr, val); + eeh_check_failure(addr); return val; } @@ -374,7 +395,7 @@ static inline u16 eeh_readw_be(const volatile void __iomem *addr) { u16 val = in_be16(addr); if (EEH_POSSIBLE_ERROR(val, u16)) - return eeh_check_failure(addr, val); + eeh_check_failure(addr); return val; } @@ -382,7 +403,7 @@ static inline u32 eeh_readl_be(const volatile void __iomem *addr) { u32 val = in_be32(addr); if (EEH_POSSIBLE_ERROR(val, u32)) - return eeh_check_failure(addr, val); + eeh_check_failure(addr); return val; } @@ -390,7 +411,7 @@ static inline u64 eeh_readq_be(const volatile void __iomem *addr) { u64 val = in_be64(addr); if (EEH_POSSIBLE_ERROR(val, u64)) - return eeh_check_failure(addr, val); + eeh_check_failure(addr); return val; } @@ -404,7 +425,7 @@ static inline void eeh_memcpy_fromio(void *dest, const * were copied. Check all four bytes. */ if (n >= 4 && EEH_POSSIBLE_ERROR(*((u32 *)(dest + n - 4)), u32)) - eeh_check_failure(src, *((u32 *)(dest + n - 4))); + eeh_check_failure(src); } /* in-string eeh macros */ @@ -413,7 +434,7 @@ static inline void eeh_readsb(const volatile void __iomem *addr, void * buf, { _insb(addr, buf, ns); if (EEH_POSSIBLE_ERROR((*(((u8*)buf)+ns-1)), u8)) - eeh_check_failure(addr, *(u8*)buf); + eeh_check_failure(addr); } static inline void eeh_readsw(const volatile void __iomem *addr, void * buf, @@ -421,7 +442,7 @@ static inline void eeh_readsw(const volatile void __iomem *addr, void * buf, { _insw(addr, buf, ns); if (EEH_POSSIBLE_ERROR((*(((u16*)buf)+ns-1)), u16)) - eeh_check_failure(addr, *(u16*)buf); + eeh_check_failure(addr); } static inline void eeh_readsl(const volatile void __iomem *addr, void * buf, @@ -429,7 +450,7 @@ static inline void eeh_readsl(const volatile void __iomem *addr, void * buf, { _insl(addr, buf, nl); if (EEH_POSSIBLE_ERROR((*(((u32*)buf)+nl-1)), u32)) - eeh_check_failure(addr, *(u32*)buf); + eeh_check_failure(addr); } #endif /* CONFIG_PPC64 */ diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index 8f35cd7d59c..77f52b26dad 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -425,6 +425,8 @@ label##_relon_hv: \ #define SOFTEN_VALUE_0xa00 PACA_IRQ_DBELL #define SOFTEN_VALUE_0xe80 PACA_IRQ_DBELL #define SOFTEN_VALUE_0xe82 PACA_IRQ_DBELL +#define SOFTEN_VALUE_0xe60 PACA_IRQ_HMI +#define SOFTEN_VALUE_0xe62 PACA_IRQ_HMI #define __SOFTEN_TEST(h, vec) \ lbz r10,PACASOFTIRQEN(r13); \ @@ -513,8 +515,11 @@ label##_relon_hv: \ * runlatch, etc... */ -/* Exception addition: Hard disable interrupts */ -#define DISABLE_INTS RECONCILE_IRQ_STATE(r10,r11) +/* + * This addition reconciles our actual IRQ state with the various software + * flags that track it. This may call C code. + */ +#define ADD_RECONCILE RECONCILE_IRQ_STATE(r10,r11) #define ADD_NVGPRS \ bl save_nvgprs @@ -532,6 +537,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CTRL) .globl label##_common; \ label##_common: \ EXCEPTION_PROLOG_COMMON(trap, PACA_EXGEN); \ + /* Volatile regs are potentially clobbered here */ \ additions; \ addi r3,r1,STACK_FRAME_OVERHEAD; \ bl hdlr; \ @@ -539,7 +545,7 @@ label##_common: \ #define STD_EXCEPTION_COMMON(trap, label, hdlr) \ EXCEPTION_COMMON(trap, label, hdlr, ret_from_except, \ - ADD_NVGPRS;DISABLE_INTS) + ADD_NVGPRS;ADD_RECONCILE) /* * Like STD_EXCEPTION_COMMON, but for exceptions that can occur @@ -548,7 +554,7 @@ label##_common: \ */ #define STD_EXCEPTION_COMMON_ASYNC(trap, label, hdlr) \ EXCEPTION_COMMON(trap, label, hdlr, ret_from_except_lite, \ - FINISH_NAP;DISABLE_INTS;RUNLATCH_ON) + FINISH_NAP;ADD_RECONCILE;RUNLATCH_ON) /* * When the idle code in power4_idle puts the CPU into NAP mode, diff --git a/arch/powerpc/include/asm/fs_pd.h b/arch/powerpc/include/asm/fs_pd.h index 9361cd5342c..f79d6c74eb2 100644 --- a/arch/powerpc/include/asm/fs_pd.h +++ b/arch/powerpc/include/asm/fs_pd.h @@ -28,7 +28,6 @@ #ifdef CONFIG_8xx #include <asm/8xx_immap.h> -#include <asm/mpc8xx.h> extern immap_t __iomem *mpc8xx_immr; diff --git a/arch/powerpc/include/asm/hardirq.h b/arch/powerpc/include/asm/hardirq.h index 418fb654370..1bbb3013d6a 100644 --- a/arch/powerpc/include/asm/hardirq.h +++ b/arch/powerpc/include/asm/hardirq.h @@ -11,6 +11,7 @@ typedef struct { unsigned int pmu_irqs; unsigned int mce_exceptions; unsigned int spurious_irqs; + unsigned int hmi_exceptions; #ifdef CONFIG_PPC_DOORBELL unsigned int doorbell_irqs; #endif diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index 5dbbb29f5c3..85bc8c0d257 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -279,6 +279,12 @@ #define H_GET_24X7_DATA 0xF07C #define H_GET_PERF_COUNTER_INFO 0xF080 +/* Values for 2nd argument to H_SET_MODE */ +#define H_SET_MODE_RESOURCE_SET_CIABR 1 +#define H_SET_MODE_RESOURCE_SET_DAWR 2 +#define H_SET_MODE_RESOURCE_ADDR_TRANS_MODE 3 +#define H_SET_MODE_RESOURCE_LE 4 + #ifndef __ASSEMBLY__ /** diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index 10be1dd01c6..b59ac27a6b7 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -25,6 +25,7 @@ #define PACA_IRQ_EE 0x04 #define PACA_IRQ_DEC 0x08 /* Or FIT */ #define PACA_IRQ_EE_EDGE 0x10 /* BookE only */ +#define PACA_IRQ_HMI 0x20 #endif /* CONFIG_PPC64 */ diff --git a/arch/powerpc/include/asm/hydra.h b/arch/powerpc/include/asm/hydra.h index 5b0c98bd46a..1cb39c96d15 100644 --- a/arch/powerpc/include/asm/hydra.h +++ b/arch/powerpc/include/asm/hydra.h @@ -95,7 +95,6 @@ extern volatile struct Hydra __iomem *Hydra; #define HYDRA_INT_SPARE 19 extern int hydra_init(void); -extern void macio_adb_init(void); #endif /* __KERNEL__ */ diff --git a/arch/powerpc/include/asm/irq.h b/arch/powerpc/include/asm/irq.h index 41f13cec8a8..e8e3a0a04eb 100644 --- a/arch/powerpc/include/asm/irq.h +++ b/arch/powerpc/include/asm/irq.h @@ -31,11 +31,6 @@ extern atomic_t ppc_n_lost_interrupts; extern irq_hw_number_t virq_to_hw(unsigned int virq); -/** - * irq_early_init - Init irq remapping subsystem - */ -extern void irq_early_init(void); - static __inline__ int irq_canonicalize(int irq) { return irq; diff --git a/arch/powerpc/include/asm/irqflags.h b/arch/powerpc/include/asm/irqflags.h index e20eb95429a..f2149066fe5 100644 --- a/arch/powerpc/include/asm/irqflags.h +++ b/arch/powerpc/include/asm/irqflags.h @@ -32,9 +32,8 @@ #endif /* - * Most of the CPU's IRQ-state tracing is done from assembly code; we - * have to call a C function so call a wrapper that saves all the - * C-clobbered registers. + * These are calls to C code, so the caller must be prepared for volatiles to + * be clobbered. */ #define TRACE_ENABLE_INTS TRACE_WITH_FRAME_BUFFER(trace_hardirqs_on) #define TRACE_DISABLE_INTS TRACE_WITH_FRAME_BUFFER(trace_hardirqs_off) @@ -42,6 +41,9 @@ /* * This is used by assembly code to soft-disable interrupts first and * reconcile irq state. + * + * NB: This may call C code, so the caller must be prepared for volatiles to + * be clobbered. */ #define RECONCILE_IRQ_STATE(__rA, __rB) \ lbz __rA,PACASOFTIRQEN(r13); \ diff --git a/arch/powerpc/include/asm/jump_label.h b/arch/powerpc/include/asm/jump_label.h index f016bb699b5..efbf9a322a2 100644 --- a/arch/powerpc/include/asm/jump_label.h +++ b/arch/powerpc/include/asm/jump_label.h @@ -10,6 +10,7 @@ * 2 of the License, or (at your option) any later version. */ +#ifndef __ASSEMBLY__ #include <linux/types.h> #include <asm/feature-fixups.h> @@ -42,4 +43,12 @@ struct jump_entry { jump_label_t key; }; +#else +#define ARCH_STATIC_BRANCH(LABEL, KEY) \ +1098: nop; \ + .pushsection __jump_table, "aw"; \ + FTR_ENTRY_LONG 1098b, LABEL, KEY; \ + .popsection +#endif + #endif /* _ASM_POWERPC_JUMP_LABEL_H */ diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h index 16d7e33d35e..19c36cba37c 100644 --- a/arch/powerpc/include/asm/kexec.h +++ b/arch/powerpc/include/asm/kexec.h @@ -81,7 +81,6 @@ extern void default_machine_crash_shutdown(struct pt_regs *regs); extern int crash_shutdown_register(crash_shutdown_t handler); extern int crash_shutdown_unregister(crash_shutdown_t handler); -extern void machine_kexec_simple(struct kimage *image); extern void crash_kexec_secondary(struct pt_regs *regs); extern int overlaps_crashkernel(unsigned long start, unsigned long size); extern void reserve_crashkernel(void); diff --git a/arch/powerpc/include/asm/kvm_44x.h b/arch/powerpc/include/asm/kvm_44x.h deleted file mode 100644 index a0e57618ff3..00000000000 --- a/arch/powerpc/include/asm/kvm_44x.h +++ /dev/null @@ -1,67 +0,0 @@ -/* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, version 2, as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - * - * Copyright IBM Corp. 2008 - * - * Authors: Hollis Blanchard <hollisb@us.ibm.com> - */ - -#ifndef __ASM_44X_H__ -#define __ASM_44X_H__ - -#include <linux/kvm_host.h> - -#define PPC44x_TLB_SIZE 64 - -/* If the guest is expecting it, this can be as large as we like; we'd just - * need to find some way of advertising it. */ -#define KVM44x_GUEST_TLB_SIZE 64 - -struct kvmppc_44x_tlbe { - u32 tid; /* Only the low 8 bits are used. */ - u32 word0; - u32 word1; - u32 word2; -}; - -struct kvmppc_44x_shadow_ref { - struct page *page; - u16 gtlb_index; - u8 writeable; - u8 tid; -}; - -struct kvmppc_vcpu_44x { - /* Unmodified copy of the guest's TLB. */ - struct kvmppc_44x_tlbe guest_tlb[KVM44x_GUEST_TLB_SIZE]; - - /* References to guest pages in the hardware TLB. */ - struct kvmppc_44x_shadow_ref shadow_refs[PPC44x_TLB_SIZE]; - - /* State of the shadow TLB at guest context switch time. */ - struct kvmppc_44x_tlbe shadow_tlb[PPC44x_TLB_SIZE]; - u8 shadow_tlb_mod[PPC44x_TLB_SIZE]; - - struct kvm_vcpu vcpu; -}; - -static inline struct kvmppc_vcpu_44x *to_44x(struct kvm_vcpu *vcpu) -{ - return container_of(vcpu, struct kvmppc_vcpu_44x, vcpu); -} - -void kvmppc_44x_tlb_put(struct kvm_vcpu *vcpu); -void kvmppc_44x_tlb_load(struct kvm_vcpu *vcpu); - -#endif /* __ASM_44X_H__ */ diff --git a/arch/powerpc/include/asm/kvm_asm.h b/arch/powerpc/include/asm/kvm_asm.h index 9601741080e..5bca220bbb6 100644 --- a/arch/powerpc/include/asm/kvm_asm.h +++ b/arch/powerpc/include/asm/kvm_asm.h @@ -33,7 +33,6 @@ /* IVPR must be 64KiB-aligned. */ #define VCPU_SIZE_ORDER 4 #define VCPU_SIZE_LOG (VCPU_SIZE_ORDER + 12) -#define VCPU_TLB_PGSZ PPC44x_TLB_64K #define VCPU_SIZE_BYTES (1<<VCPU_SIZE_LOG) #define BOOKE_INTERRUPT_CRITICAL 0 @@ -54,17 +53,17 @@ #define BOOKE_INTERRUPT_DEBUG 15 /* E500 */ -#define BOOKE_INTERRUPT_SPE_ALTIVEC_UNAVAIL 32 -#define BOOKE_INTERRUPT_SPE_FP_DATA_ALTIVEC_ASSIST 33 -/* - * TODO: Unify 32-bit and 64-bit kernel exception handlers to use same defines - */ -#define BOOKE_INTERRUPT_SPE_UNAVAIL BOOKE_INTERRUPT_SPE_ALTIVEC_UNAVAIL -#define BOOKE_INTERRUPT_SPE_FP_DATA BOOKE_INTERRUPT_SPE_FP_DATA_ALTIVEC_ASSIST -#define BOOKE_INTERRUPT_ALTIVEC_UNAVAIL BOOKE_INTERRUPT_SPE_ALTIVEC_UNAVAIL -#define BOOKE_INTERRUPT_ALTIVEC_ASSIST \ - BOOKE_INTERRUPT_SPE_FP_DATA_ALTIVEC_ASSIST +#ifdef CONFIG_SPE_POSSIBLE +#define BOOKE_INTERRUPT_SPE_UNAVAIL 32 +#define BOOKE_INTERRUPT_SPE_FP_DATA 33 #define BOOKE_INTERRUPT_SPE_FP_ROUND 34 +#endif + +#ifdef CONFIG_PPC_E500MC +#define BOOKE_INTERRUPT_ALTIVEC_UNAVAIL 32 +#define BOOKE_INTERRUPT_ALTIVEC_ASSIST 33 +#endif + #define BOOKE_INTERRUPT_PERFORMANCE_MONITOR 35 #define BOOKE_INTERRUPT_DOORBELL 36 #define BOOKE_INTERRUPT_DOORBELL_CRITICAL 37 @@ -98,6 +97,7 @@ #define BOOK3S_INTERRUPT_H_DATA_STORAGE 0xe00 #define BOOK3S_INTERRUPT_H_INST_STORAGE 0xe20 #define BOOK3S_INTERRUPT_H_EMUL_ASSIST 0xe40 +#define BOOK3S_INTERRUPT_HMI 0xe60 #define BOOK3S_INTERRUPT_H_DOORBELL 0xe80 #define BOOK3S_INTERRUPT_PERFMON 0xf00 #define BOOK3S_INTERRUPT_ALTIVEC 0xf20 @@ -131,6 +131,7 @@ #define BOOK3S_HFLAG_NATIVE_PS 0x8 #define BOOK3S_HFLAG_MULTI_PGSIZE 0x10 #define BOOK3S_HFLAG_NEW_TLBIE 0x20 +#define BOOK3S_HFLAG_SPLIT_HACK 0x40 #define RESUME_FLAG_NV (1<<0) /* Reload guest nonvolatile state? */ #define RESUME_FLAG_HOST (1<<1) /* Resume host? */ diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index f52f6569452..6acf0c2a0f9 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -83,8 +83,6 @@ struct kvmppc_vcpu_book3s { u64 sdr1; u64 hior; u64 msr_mask; - u64 purr_offset; - u64 spurr_offset; #ifdef CONFIG_PPC_BOOK3S_32 u32 vsid_pool[VSID_POOL_SIZE]; u32 vsid_next; @@ -148,9 +146,10 @@ extern void kvmppc_mmu_invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache * extern int kvmppc_mmu_hpte_sysinit(void); extern void kvmppc_mmu_hpte_sysexit(void); extern int kvmppc_mmu_hv_init(void); +extern int kvmppc_book3s_hcall_implemented(struct kvm *kvm, unsigned long hc); +/* XXX remove this export when load_last_inst() is generic */ extern int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data); -extern int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data); extern void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec); extern void kvmppc_book3s_dequeue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec); @@ -159,13 +158,13 @@ extern void kvmppc_set_bat(struct kvm_vcpu *vcpu, struct kvmppc_bat *bat, bool upper, u32 val); extern void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr); extern int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu); -extern pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn, bool writing, +extern pfn_t kvmppc_gpa_to_pfn(struct kvm_vcpu *vcpu, gpa_t gpa, bool writing, bool *writable); extern void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev, unsigned long *rmap, long pte_index, int realmode); -extern void kvmppc_invalidate_hpte(struct kvm *kvm, unsigned long *hptep, +extern void kvmppc_invalidate_hpte(struct kvm *kvm, __be64 *hptep, unsigned long pte_index); -void kvmppc_clear_ref_hpte(struct kvm *kvm, unsigned long *hptep, +void kvmppc_clear_ref_hpte(struct kvm *kvm, __be64 *hptep, unsigned long pte_index); extern void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long addr, unsigned long *nb_ret); @@ -183,12 +182,16 @@ extern long kvmppc_hv_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot, unsigned long *map); extern void kvmppc_update_lpcr(struct kvm *kvm, unsigned long lpcr, unsigned long mask); +extern void kvmppc_set_fscr(struct kvm_vcpu *vcpu, u64 fscr); extern void kvmppc_entry_trampoline(void); extern void kvmppc_hv_entry_trampoline(void); extern u32 kvmppc_alignment_dsisr(struct kvm_vcpu *vcpu, unsigned int inst); extern ulong kvmppc_alignment_dar(struct kvm_vcpu *vcpu, unsigned int inst); extern int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd); +extern void kvmppc_pr_init_default_hcalls(struct kvm *kvm); +extern int kvmppc_hcall_impl_pr(unsigned long cmd); +extern int kvmppc_hcall_impl_hv_realmode(unsigned long cmd); extern void kvmppc_copy_to_svcpu(struct kvmppc_book3s_shadow_vcpu *svcpu, struct kvm_vcpu *vcpu); extern void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu, @@ -274,32 +277,6 @@ static inline bool kvmppc_need_byteswap(struct kvm_vcpu *vcpu) return (kvmppc_get_msr(vcpu) & MSR_LE) != (MSR_KERNEL & MSR_LE); } -static inline u32 kvmppc_get_last_inst_internal(struct kvm_vcpu *vcpu, ulong pc) -{ - /* Load the instruction manually if it failed to do so in the - * exit path */ - if (vcpu->arch.last_inst == KVM_INST_FETCH_FAILED) - kvmppc_ld(vcpu, &pc, sizeof(u32), &vcpu->arch.last_inst, false); - - return kvmppc_need_byteswap(vcpu) ? swab32(vcpu->arch.last_inst) : - vcpu->arch.last_inst; -} - -static inline u32 kvmppc_get_last_inst(struct kvm_vcpu *vcpu) -{ - return kvmppc_get_last_inst_internal(vcpu, kvmppc_get_pc(vcpu)); -} - -/* - * Like kvmppc_get_last_inst(), but for fetching a sc instruction. - * Because the sc instruction sets SRR0 to point to the following - * instruction, we have to fetch from pc - 4. - */ -static inline u32 kvmppc_get_last_sc(struct kvm_vcpu *vcpu) -{ - return kvmppc_get_last_inst_internal(vcpu, kvmppc_get_pc(vcpu) - 4); -} - static inline ulong kvmppc_get_fault_dar(struct kvm_vcpu *vcpu) { return vcpu->arch.fault_dar; @@ -310,6 +287,13 @@ static inline bool is_kvmppc_resume_guest(int r) return (r == RESUME_GUEST || r == RESUME_GUEST_NV); } +static inline bool is_kvmppc_hv_enabled(struct kvm *kvm); +static inline bool kvmppc_supports_magic_page(struct kvm_vcpu *vcpu) +{ + /* Only PR KVM supports the magic page */ + return !is_kvmppc_hv_enabled(vcpu->kvm); +} + /* Magic register values loaded into r3 and r4 before the 'sc' assembly * instruction for the OSI hypercalls */ #define OSI_SC_MAGIC_R3 0x113724FA @@ -322,4 +306,7 @@ static inline bool is_kvmppc_resume_guest(int r) /* LPIDs we support with this build -- runtime limit may be lower */ #define KVMPPC_NR_LPIDS (LPID_RSVD + 1) +#define SPLIT_HACK_MASK 0xff000000 +#define SPLIT_HACK_OFFS 0xfb000000 + #endif /* __ASM_KVM_BOOK3S_H__ */ diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index d645428a65a..0aa817933e6 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h @@ -59,20 +59,29 @@ extern unsigned long kvm_rma_pages; /* These bits are reserved in the guest view of the HPTE */ #define HPTE_GR_RESERVED HPTE_GR_MODIFIED -static inline long try_lock_hpte(unsigned long *hpte, unsigned long bits) +static inline long try_lock_hpte(__be64 *hpte, unsigned long bits) { unsigned long tmp, old; + __be64 be_lockbit, be_bits; + + /* + * We load/store in native endian, but the HTAB is in big endian. If + * we byte swap all data we apply on the PTE we're implicitly correct + * again. + */ + be_lockbit = cpu_to_be64(HPTE_V_HVLOCK); + be_bits = cpu_to_be64(bits); asm volatile(" ldarx %0,0,%2\n" " and. %1,%0,%3\n" " bne 2f\n" - " ori %0,%0,%4\n" + " or %0,%0,%4\n" " stdcx. %0,0,%2\n" " beq+ 2f\n" " mr %1,%3\n" "2: isync" : "=&r" (tmp), "=&r" (old) - : "r" (hpte), "r" (bits), "i" (HPTE_V_HVLOCK) + : "r" (hpte), "r" (be_bits), "r" (be_lockbit) : "cc", "memory"); return old == 0; } @@ -110,16 +119,12 @@ static inline int __hpte_actual_psize(unsigned int lp, int psize) static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r, unsigned long pte_index) { - int b_psize, a_psize; + int b_psize = MMU_PAGE_4K, a_psize = MMU_PAGE_4K; unsigned int penc; unsigned long rb = 0, va_low, sllp; unsigned int lp = (r >> LP_SHIFT) & ((1 << LP_BITS) - 1); - if (!(v & HPTE_V_LARGE)) { - /* both base and actual psize is 4k */ - b_psize = MMU_PAGE_4K; - a_psize = MMU_PAGE_4K; - } else { + if (v & HPTE_V_LARGE) { for (b_psize = 0; b_psize < MMU_PAGE_COUNT; b_psize++) { /* valid entries have a shift value */ @@ -142,6 +147,8 @@ static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r, */ /* This covers 14..54 bits of va*/ rb = (v & ~0x7fUL) << 16; /* AVA field */ + + rb |= v >> (62 - 8); /* B field */ /* * AVA in v had cleared lower 23 bits. We need to derive * that from pteg index @@ -172,10 +179,10 @@ static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r, { int aval_shift; /* - * remaining 7bits of AVA/LP fields + * remaining bits of AVA/LP fields * Also contain the rr bits of LP */ - rb |= (va_low & 0x7f) << 16; + rb |= (va_low << mmu_psize_defs[b_psize].shift) & 0x7ff000; /* * Now clear not needed LP bits based on actual psize */ diff --git a/arch/powerpc/include/asm/kvm_booke.h b/arch/powerpc/include/asm/kvm_booke.h index c7aed6105ff..3286f0d6a86 100644 --- a/arch/powerpc/include/asm/kvm_booke.h +++ b/arch/powerpc/include/asm/kvm_booke.h @@ -23,15 +23,16 @@ #include <linux/types.h> #include <linux/kvm_host.h> -/* LPIDs we support with this build -- runtime limit may be lower */ +/* + * Number of available lpids. Only the low-order 6 bits of LPID rgister are + * implemented on e500mc+ cores. + */ #define KVMPPC_NR_LPIDS 64 #define KVMPPC_INST_EHPRIV 0x7c00021c #define EHPRIV_OC_SHIFT 11 /* "ehpriv 1" : ehpriv with OC = 1 is used for debug emulation */ #define EHPRIV_OC_DEBUG 1 -#define KVMPPC_INST_EHPRIV_DEBUG (KVMPPC_INST_EHPRIV | \ - (EHPRIV_OC_DEBUG << EHPRIV_OC_SHIFT)) static inline void kvmppc_set_gpr(struct kvm_vcpu *vcpu, int num, ulong val) { @@ -69,11 +70,6 @@ static inline bool kvmppc_need_byteswap(struct kvm_vcpu *vcpu) return false; } -static inline u32 kvmppc_get_last_inst(struct kvm_vcpu *vcpu) -{ - return vcpu->arch.last_inst; -} - static inline void kvmppc_set_ctr(struct kvm_vcpu *vcpu, ulong val) { vcpu->arch.ctr = val; @@ -108,4 +104,14 @@ static inline ulong kvmppc_get_fault_dar(struct kvm_vcpu *vcpu) { return vcpu->arch.fault_dear; } + +static inline bool kvmppc_supports_magic_page(struct kvm_vcpu *vcpu) +{ + /* Magic page is only supported on e500v2 */ +#ifdef CONFIG_KVM_E500V2 + return true; +#else + return false; +#endif +} #endif /* __ASM_KVM_BOOKE_H__ */ diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index bb66d8b8efd..047855619cc 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -34,6 +34,7 @@ #include <asm/processor.h> #include <asm/page.h> #include <asm/cacheflush.h> +#include <asm/hvcall.h> #define KVM_MAX_VCPUS NR_CPUS #define KVM_MAX_VCORES NR_CPUS @@ -48,20 +49,21 @@ #define KVM_NR_IRQCHIPS 1 #define KVM_IRQCHIP_NUM_PINS 256 -#if !defined(CONFIG_KVM_440) #include <linux/mmu_notifier.h> #define KVM_ARCH_WANT_MMU_NOTIFIER -struct kvm; extern int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); extern int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end); -extern int kvm_age_hva(struct kvm *kvm, unsigned long hva); +extern int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end); extern int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); extern void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); -#endif +static inline void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm, + unsigned long address) +{ +} #define HPTEG_CACHE_NUM (1 << 15) #define HPTEG_HASH_BITS_PTE 13 @@ -78,10 +80,6 @@ extern void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); /* Physical Address Mask - allowed range of real mode RAM access */ #define KVM_PAM 0x0fffffffffffffffULL -struct kvm; -struct kvm_run; -struct kvm_vcpu; - struct lppaca; struct slb_shadow; struct dtl_entry; @@ -96,7 +94,6 @@ struct kvm_vm_stat { struct kvm_vcpu_stat { u32 sum_exits; u32 mmio_exits; - u32 dcr_exits; u32 signal_exits; u32 light_exits; /* Account for special types of light exits: */ @@ -113,22 +110,21 @@ struct kvm_vcpu_stat { u32 halt_wakeup; u32 dbell_exits; u32 gdbell_exits; + u32 ld; + u32 st; #ifdef CONFIG_PPC_BOOK3S u32 pf_storage; u32 pf_instruc; u32 sp_storage; u32 sp_instruc; u32 queue_intr; - u32 ld; u32 ld_slow; - u32 st; u32 st_slow; #endif }; enum kvm_exit_types { MMIO_EXITS, - DCR_EXITS, SIGNAL_EXITS, ITLB_REAL_MISS_EXITS, ITLB_VIRT_MISS_EXITS, @@ -148,6 +144,7 @@ enum kvm_exit_types { EMULATED_TLBWE_EXITS, EMULATED_RFI_EXITS, EMULATED_RFCI_EXITS, + EMULATED_RFDI_EXITS, DEC_EXITS, EXT_INTR_EXITS, HALT_WAKEUP, @@ -254,7 +251,6 @@ struct kvm_arch { atomic_t hpte_mod_interest; spinlock_t slot_phys_lock; cpumask_t need_tlb_flush; - struct kvmppc_vcore *vcores[KVM_MAX_VCORES]; int hpt_cma_alloc; #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE @@ -263,6 +259,7 @@ struct kvm_arch { #ifdef CONFIG_PPC_BOOK3S_64 struct list_head spapr_tce_tables; struct list_head rtas_tokens; + DECLARE_BITMAP(enabled_hcalls, MAX_HCALL_OPCODE/4 + 1); #endif #ifdef CONFIG_KVM_MPIC struct openpic *mpic; @@ -271,6 +268,10 @@ struct kvm_arch { struct kvmppc_xics *xics; #endif struct kvmppc_ops *kvm_ops; +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + /* This array can grow quite large, keep it at the end */ + struct kvmppc_vcore *vcores[KVM_MAX_VCORES]; +#endif }; /* @@ -305,6 +306,8 @@ struct kvmppc_vcore { u32 arch_compat; ulong pcr; ulong dpdes; /* doorbell state (POWER8) */ + void *mpp_buffer; /* Micro Partition Prefetch buffer */ + bool mpp_buffer_is_valid; }; #define VCORE_ENTRY_COUNT(vc) ((vc)->entry_exit_count & 0xff) @@ -503,8 +506,10 @@ struct kvm_vcpu_arch { #ifdef CONFIG_BOOKE u32 decar; #endif - u32 tbl; - u32 tbu; + /* Time base value when we entered the guest */ + u64 entry_tb; + u64 entry_vtb; + u64 entry_ic; u32 tcr; ulong tsr; /* we need to perform set/clr_bits() which requires ulong */ u32 ivor[64]; @@ -580,11 +585,11 @@ struct kvm_vcpu_arch { u32 mmucfg; u32 eptcfg; u32 epr; + u64 sprg9; + u32 pwrmgtcr0; u32 crit_save; /* guest debug registers*/ struct debug_reg dbg_reg; - /* hardware visible debug registers when in guest state */ - struct debug_reg shadow_dbg_reg; #endif gpa_t paddr_accessed; gva_t vaddr_accessed; @@ -593,8 +598,6 @@ struct kvm_vcpu_arch { u8 io_gpr; /* GPR used as IO source/target */ u8 mmio_is_bigendian; u8 mmio_sign_extend; - u8 dcr_needed; - u8 dcr_is_write; u8 osi_needed; u8 osi_enabled; u8 papr_enabled; @@ -608,7 +611,6 @@ struct kvm_vcpu_arch { u32 cpr0_cfgaddr; /* holds the last set cpr0_cfgaddr */ struct hrtimer dec_timer; - struct tasklet_struct tasklet; u64 dec_jiffies; u64 dec_expires; unsigned long pending_exceptions; @@ -683,4 +685,12 @@ struct kvm_vcpu_arch { #define __KVM_HAVE_ARCH_WQP #define __KVM_HAVE_CREATE_DEVICE +static inline void kvm_arch_hardware_disable(void) {} +static inline void kvm_arch_hardware_unsetup(void) {} +static inline void kvm_arch_sync_events(struct kvm *kvm) {} +static inline void kvm_arch_memslots_updated(struct kvm *kvm) {} +static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {} +static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} +static inline void kvm_arch_exit(void) {} + #endif /* __POWERPC_KVM_HOST_H__ */ diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 9c89cdd067a..a6dcdb6d13c 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -38,15 +38,35 @@ #include <asm/paca.h> #endif +/* + * KVMPPC_INST_SW_BREAKPOINT is debug Instruction + * for supporting software breakpoint. + */ +#define KVMPPC_INST_SW_BREAKPOINT 0x00dddd00 + enum emulation_result { EMULATE_DONE, /* no further processing */ EMULATE_DO_MMIO, /* kvm_run filled with MMIO request */ - EMULATE_DO_DCR, /* kvm_run filled with DCR request */ EMULATE_FAIL, /* can't emulate this instruction */ EMULATE_AGAIN, /* something went wrong. go again */ EMULATE_EXIT_USER, /* emulation requires exit to user-space */ }; +enum instruction_type { + INST_GENERIC, + INST_SC, /* system call */ +}; + +enum xlate_instdata { + XLATE_INST, /* translate instruction address */ + XLATE_DATA /* translate data address */ +}; + +enum xlate_readwrite { + XLATE_READ, /* check for read permissions */ + XLATE_WRITE /* check for write permissions */ +}; + extern int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu); extern int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu); extern void kvmppc_handler_highmem(void); @@ -62,12 +82,20 @@ extern int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu, u64 val, unsigned int bytes, int is_default_endian); +extern int kvmppc_load_last_inst(struct kvm_vcpu *vcpu, + enum instruction_type type, u32 *inst); + +extern int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, + bool data); +extern int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, + bool data); extern int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu); +extern int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu); extern int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu); extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu); extern u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb); -extern void kvmppc_decrementer_func(unsigned long data); +extern void kvmppc_decrementer_func(struct kvm_vcpu *vcpu); extern int kvmppc_sanity_check(struct kvm_vcpu *vcpu); extern int kvmppc_subarch_vcpu_init(struct kvm_vcpu *vcpu); extern void kvmppc_subarch_vcpu_uninit(struct kvm_vcpu *vcpu); @@ -86,6 +114,9 @@ extern gpa_t kvmppc_mmu_xlate(struct kvm_vcpu *vcpu, unsigned int gtlb_index, gva_t eaddr); extern void kvmppc_mmu_dtlb_miss(struct kvm_vcpu *vcpu); extern void kvmppc_mmu_itlb_miss(struct kvm_vcpu *vcpu); +extern int kvmppc_xlate(struct kvm_vcpu *vcpu, ulong eaddr, + enum xlate_instdata xlid, enum xlate_readwrite xlrw, + struct kvmppc_pte *pte); extern struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id); @@ -106,6 +137,14 @@ extern void kvmppc_core_dequeue_dec(struct kvm_vcpu *vcpu); extern void kvmppc_core_queue_external(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq); extern void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu); +extern void kvmppc_core_queue_dtlb_miss(struct kvm_vcpu *vcpu, ulong dear_flags, + ulong esr_flags); +extern void kvmppc_core_queue_data_storage(struct kvm_vcpu *vcpu, + ulong dear_flags, + ulong esr_flags); +extern void kvmppc_core_queue_itlb_miss(struct kvm_vcpu *vcpu); +extern void kvmppc_core_queue_inst_storage(struct kvm_vcpu *vcpu, + ulong esr_flags); extern void kvmppc_core_flush_tlb(struct kvm_vcpu *vcpu); extern int kvmppc_core_check_requests(struct kvm_vcpu *vcpu); @@ -173,6 +212,9 @@ extern int kvmppc_xics_get_xive(struct kvm *kvm, u32 irq, u32 *server, extern int kvmppc_xics_int_on(struct kvm *kvm, u32 irq); extern int kvmppc_xics_int_off(struct kvm *kvm, u32 irq); +void kvmppc_core_dequeue_debug(struct kvm_vcpu *vcpu); +void kvmppc_core_queue_debug(struct kvm_vcpu *vcpu); + union kvmppc_one_reg { u32 wval; u64 dval; @@ -210,7 +252,7 @@ struct kvmppc_ops { int (*unmap_hva)(struct kvm *kvm, unsigned long hva); int (*unmap_hva_range)(struct kvm *kvm, unsigned long start, unsigned long end); - int (*age_hva)(struct kvm *kvm, unsigned long hva); + int (*age_hva)(struct kvm *kvm, unsigned long start, unsigned long end); int (*test_age_hva)(struct kvm *kvm, unsigned long hva); void (*set_spte_hva)(struct kvm *kvm, unsigned long hva, pte_t pte); void (*mmu_destroy)(struct kvm_vcpu *vcpu); @@ -228,12 +270,35 @@ struct kvmppc_ops { void (*fast_vcpu_kick)(struct kvm_vcpu *vcpu); long (*arch_vm_ioctl)(struct file *filp, unsigned int ioctl, unsigned long arg); - + int (*hcall_implemented)(unsigned long hcall); }; extern struct kvmppc_ops *kvmppc_hv_ops; extern struct kvmppc_ops *kvmppc_pr_ops; +static inline int kvmppc_get_last_inst(struct kvm_vcpu *vcpu, + enum instruction_type type, u32 *inst) +{ + int ret = EMULATE_DONE; + u32 fetched_inst; + + /* Load the instruction manually if it failed to do so in the + * exit path */ + if (vcpu->arch.last_inst == KVM_INST_FETCH_FAILED) + ret = kvmppc_load_last_inst(vcpu, type, &vcpu->arch.last_inst); + + /* Write fetch_failed unswapped if the fetch failed */ + if (ret == EMULATE_DONE) + fetched_inst = kvmppc_need_byteswap(vcpu) ? + swab32(vcpu->arch.last_inst) : + vcpu->arch.last_inst; + else + fetched_inst = vcpu->arch.last_inst; + + *inst = fetched_inst; + return ret; +} + static inline bool is_kvmppc_hv_enabled(struct kvm *kvm) { return kvm->arch.kvm_ops == kvmppc_hv_ops; @@ -392,6 +457,17 @@ static inline int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd) { return 0; } #endif +static inline unsigned long kvmppc_get_epr(struct kvm_vcpu *vcpu) +{ +#ifdef CONFIG_KVM_BOOKE_HV + return mfspr(SPRN_GEPR); +#elif defined(CONFIG_BOOKE) + return vcpu->arch.epr; +#else + return 0; +#endif +} + static inline void kvmppc_set_epr(struct kvm_vcpu *vcpu, u32 epr) { #ifdef CONFIG_KVM_BOOKE_HV @@ -472,8 +548,20 @@ static inline bool kvmppc_shared_big_endian(struct kvm_vcpu *vcpu) #endif } +#define SPRNG_WRAPPER_GET(reg, bookehv_spr) \ +static inline ulong kvmppc_get_##reg(struct kvm_vcpu *vcpu) \ +{ \ + return mfspr(bookehv_spr); \ +} \ + +#define SPRNG_WRAPPER_SET(reg, bookehv_spr) \ +static inline void kvmppc_set_##reg(struct kvm_vcpu *vcpu, ulong val) \ +{ \ + mtspr(bookehv_spr, val); \ +} \ + #define SHARED_WRAPPER_GET(reg, size) \ -static inline u##size kvmppc_get_##reg(struct kvm_vcpu *vcpu) \ +static inline u##size kvmppc_get_##reg(struct kvm_vcpu *vcpu) \ { \ if (kvmppc_shared_big_endian(vcpu)) \ return be##size##_to_cpu(vcpu->arch.shared->reg); \ @@ -494,14 +582,31 @@ static inline void kvmppc_set_##reg(struct kvm_vcpu *vcpu, u##size val) \ SHARED_WRAPPER_GET(reg, size) \ SHARED_WRAPPER_SET(reg, size) \ +#define SPRNG_WRAPPER(reg, bookehv_spr) \ + SPRNG_WRAPPER_GET(reg, bookehv_spr) \ + SPRNG_WRAPPER_SET(reg, bookehv_spr) \ + +#ifdef CONFIG_KVM_BOOKE_HV + +#define SHARED_SPRNG_WRAPPER(reg, size, bookehv_spr) \ + SPRNG_WRAPPER(reg, bookehv_spr) \ + +#else + +#define SHARED_SPRNG_WRAPPER(reg, size, bookehv_spr) \ + SHARED_WRAPPER(reg, size) \ + +#endif + SHARED_WRAPPER(critical, 64) -SHARED_WRAPPER(sprg0, 64) -SHARED_WRAPPER(sprg1, 64) -SHARED_WRAPPER(sprg2, 64) -SHARED_WRAPPER(sprg3, 64) -SHARED_WRAPPER(srr0, 64) -SHARED_WRAPPER(srr1, 64) -SHARED_WRAPPER(dar, 64) +SHARED_SPRNG_WRAPPER(sprg0, 64, SPRN_GSPRG0) +SHARED_SPRNG_WRAPPER(sprg1, 64, SPRN_GSPRG1) +SHARED_SPRNG_WRAPPER(sprg2, 64, SPRN_GSPRG2) +SHARED_SPRNG_WRAPPER(sprg3, 64, SPRN_GSPRG3) +SHARED_SPRNG_WRAPPER(srr0, 64, SPRN_GSRR0) +SHARED_SPRNG_WRAPPER(srr1, 64, SPRN_GSRR1) +SHARED_SPRNG_WRAPPER(dar, 64, SPRN_GDEAR) +SHARED_SPRNG_WRAPPER(esr, 64, SPRN_GESR) SHARED_WRAPPER_GET(msr, 64) static inline void kvmppc_set_msr_fast(struct kvm_vcpu *vcpu, u64 val) { diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index f92b0b54e92..307347f8ddb 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@ -57,10 +57,10 @@ struct machdep_calls { void (*hpte_removebolted)(unsigned long ea, int psize, int ssize); void (*flush_hash_range)(unsigned long number, int local); - void (*hugepage_invalidate)(struct mm_struct *mm, + void (*hugepage_invalidate)(unsigned long vsid, + unsigned long addr, unsigned char *hpte_slot_array, - unsigned long addr, int psize); - + int psize, int ssize); /* special for kexec, to be called in real mode, linear mapping is * destroyed as well */ void (*hpte_clear_all)(void); @@ -136,8 +136,6 @@ struct machdep_calls { int (*pci_setup_phb)(struct pci_controller *host); #ifdef CONFIG_PCI_MSI - int (*msi_check_device)(struct pci_dev* dev, - int nvec, int type); int (*setup_msi_irqs)(struct pci_dev *dev, int nvec, int type); void (*teardown_msi_irqs)(struct pci_dev *dev); @@ -174,6 +172,10 @@ struct machdep_calls { /* Exception handlers */ int (*system_reset_exception)(struct pt_regs *regs); int (*machine_check_exception)(struct pt_regs *regs); + int (*handle_hmi_exception)(struct pt_regs *regs); + + /* Early exception handlers called in realmode */ + int (*hmi_exception_early)(struct pt_regs *regs); /* Called during machine check exception to retrive fixup address. */ bool (*mce_check_early_recovery)(struct pt_regs *regs); @@ -326,8 +328,6 @@ extern struct machdep_calls *machine_id; extern void probe_machine(void); -extern char cmd_line[COMMAND_LINE_SIZE]; - #ifdef CONFIG_PPC_PMAC /* * Power macintoshes have either a CUDA, PMU or SMU controlling @@ -366,6 +366,7 @@ static inline void log_error(char *buf, unsigned int err_type, int fatal) } \ __define_initcall(__machine_initcall_##mach##_##fn, id); +#define machine_early_initcall(mach, fn) __define_machine_initcall(mach, fn, early) #define machine_core_initcall(mach, fn) __define_machine_initcall(mach, fn, 1) #define machine_core_initcall_sync(mach, fn) __define_machine_initcall(mach, fn, 1s) #define machine_postcore_initcall(mach, fn) __define_machine_initcall(mach, fn, 2) diff --git a/arch/powerpc/include/asm/mmu-book3e.h b/arch/powerpc/include/asm/mmu-book3e.h index d0918e09557..cd4f04a7480 100644 --- a/arch/powerpc/include/asm/mmu-book3e.h +++ b/arch/powerpc/include/asm/mmu-book3e.h @@ -40,7 +40,11 @@ /* MAS registers bit definitions */ -#define MAS0_TLBSEL(x) (((x) << 28) & 0x30000000) +#define MAS0_TLBSEL_MASK 0x30000000 +#define MAS0_TLBSEL_SHIFT 28 +#define MAS0_TLBSEL(x) (((x) << MAS0_TLBSEL_SHIFT) & MAS0_TLBSEL_MASK) +#define MAS0_GET_TLBSEL(mas0) (((mas0) & MAS0_TLBSEL_MASK) >> \ + MAS0_TLBSEL_SHIFT) #define MAS0_ESEL_MASK 0x0FFF0000 #define MAS0_ESEL_SHIFT 16 #define MAS0_ESEL(x) (((x) << MAS0_ESEL_SHIFT) & MAS0_ESEL_MASK) @@ -58,6 +62,7 @@ #define MAS1_TSIZE_MASK 0x00000f80 #define MAS1_TSIZE_SHIFT 7 #define MAS1_TSIZE(x) (((x) << MAS1_TSIZE_SHIFT) & MAS1_TSIZE_MASK) +#define MAS1_GET_TSIZE(mas1) (((mas1) & MAS1_TSIZE_MASK) >> MAS1_TSIZE_SHIFT) #define MAS2_EPN (~0xFFFUL) #define MAS2_X0 0x00000040 @@ -86,6 +91,7 @@ #define MAS3_SPSIZE 0x0000003e #define MAS3_SPSIZE_SHIFT 1 +#define MAS4_TLBSEL_MASK MAS0_TLBSEL_MASK #define MAS4_TLBSELD(x) MAS0_TLBSEL(x) #define MAS4_INDD 0x00008000 /* Default IND */ #define MAS4_TSIZED(x) MAS1_TSIZE(x) diff --git a/arch/powerpc/include/asm/mmu-hash64.h b/arch/powerpc/include/asm/mmu-hash64.h index c2b4dcf23d0..aeebc94b2bc 100644 --- a/arch/powerpc/include/asm/mmu-hash64.h +++ b/arch/powerpc/include/asm/mmu-hash64.h @@ -25,26 +25,6 @@ #include <asm/processor.h> /* - * Segment table - */ - -#define STE_ESID_V 0x80 -#define STE_ESID_KS 0x20 -#define STE_ESID_KP 0x10 -#define STE_ESID_N 0x08 - -#define STE_VSID_SHIFT 12 - -/* Location of cpu0's segment table */ -#define STAB0_PAGE 0x8 -#define STAB0_OFFSET (STAB0_PAGE << 12) -#define STAB0_PHYS_ADDR (STAB0_OFFSET + PHYSICAL_START) - -#ifndef __ASSEMBLY__ -extern char initial_stab[]; -#endif /* ! __ASSEMBLY */ - -/* * SLB */ @@ -210,6 +190,13 @@ static inline unsigned int mmu_psize_to_shift(unsigned int mmu_psize) #ifndef __ASSEMBLY__ +static inline int slb_vsid_shift(int ssize) +{ + if (ssize == MMU_SEGSIZE_256M) + return SLB_VSID_SHIFT; + return SLB_VSID_SHIFT_1T; +} + static inline int segment_shift(int ssize) { if (ssize == MMU_SEGSIZE_256M) @@ -337,6 +324,7 @@ extern int __hash_page_64K(unsigned long ea, unsigned long access, unsigned int local, int ssize); struct mm_struct; unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap); +extern int hash_page_mm(struct mm_struct *mm, unsigned long ea, unsigned long access, unsigned long trap); extern int hash_page(unsigned long ea, unsigned long access, unsigned long trap); int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, pte_t *ptep, unsigned long trap, int local, int ssize, @@ -362,6 +350,8 @@ extern void hash_failure_debug(unsigned long ea, unsigned long access, extern int htab_bolt_mapping(unsigned long vstart, unsigned long vend, unsigned long pstart, unsigned long prot, int psize, int ssize); +int htab_remove_mapping(unsigned long vstart, unsigned long vend, + int psize, int ssize); extern void add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages); extern void demote_segment_4k(struct mm_struct *mm, unsigned long addr); @@ -370,10 +360,8 @@ extern void hpte_init_lpar(void); extern void hpte_init_beat(void); extern void hpte_init_beat_v3(void); -extern void stabs_alloc(void); extern void slb_initialize(void); extern void slb_flush_and_rebolt(void); -extern void stab_initialize(unsigned long stab); extern void slb_vmalloc_update(void); extern void slb_set_size(u16 size); diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h index e61f24ed4e6..3d5abfe6ba6 100644 --- a/arch/powerpc/include/asm/mmu.h +++ b/arch/powerpc/include/asm/mmu.h @@ -64,9 +64,9 @@ */ #define MMU_FTR_USE_PAIRED_MAS ASM_CONST(0x01000000) -/* MMU is SLB-based +/* Doesn't support the B bit (1T segment) in SLBIE */ -#define MMU_FTR_SLB ASM_CONST(0x02000000) +#define MMU_FTR_NO_SLBIE_B ASM_CONST(0x02000000) /* Support 16M large pages */ @@ -88,10 +88,6 @@ */ #define MMU_FTR_1T_SEGMENT ASM_CONST(0x40000000) -/* Doesn't support the B bit (1T segment) in SLBIE - */ -#define MMU_FTR_NO_SLBIE_B ASM_CONST(0x80000000) - /* MMU feature bit sets for various CPUs */ #define MMU_FTRS_DEFAULT_HPTE_ARCH_V2 \ MMU_FTR_HPTE_TABLE | MMU_FTR_PPCAS_ARCH_V2 diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index b467530e248..73382eba02d 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -18,7 +18,6 @@ extern int init_new_context(struct task_struct *tsk, struct mm_struct *mm); extern void destroy_context(struct mm_struct *mm); extern void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next); -extern void switch_stab(struct task_struct *tsk, struct mm_struct *mm); extern void switch_slb(struct task_struct *tsk, struct mm_struct *mm); extern void set_context(unsigned long id, pgd_t *pgd); @@ -77,10 +76,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, * sub architectures. */ #ifdef CONFIG_PPC_STD_MMU_64 - if (mmu_has_feature(MMU_FTR_SLB)) - switch_slb(tsk, next); - else - switch_stab(tsk, next); + switch_slb(tsk, next); #else /* Out of line for now */ switch_mmu_context(prev, next); diff --git a/arch/powerpc/include/asm/mpc85xx.h b/arch/powerpc/include/asm/mpc85xx.h index 736d4acc05a..3bef74a9914 100644 --- a/arch/powerpc/include/asm/mpc85xx.h +++ b/arch/powerpc/include/asm/mpc85xx.h @@ -77,6 +77,8 @@ #define SVR_T1020 0x852100 #define SVR_T1021 0x852101 #define SVR_T1022 0x852102 +#define SVR_T2080 0x853000 +#define SVR_T2081 0x853100 #define SVR_8610 0x80A000 #define SVR_8641 0x809000 diff --git a/arch/powerpc/include/asm/mpc8xx.h b/arch/powerpc/include/asm/mpc8xx.h deleted file mode 100644 index 98f3c4f1732..00000000000 --- a/arch/powerpc/include/asm/mpc8xx.h +++ /dev/null @@ -1,12 +0,0 @@ -/* This is the single file included by all MPC8xx build options. - * Since there are many different boards and no standard configuration, - * we have a unique include file for each. Rather than change every - * file that has to include MPC8xx configuration, they all include - * this one and the configuration switching is done here. - */ -#ifndef __CONFIG_8xx_DEFS -#define __CONFIG_8xx_DEFS - -extern struct mpc8xx_pcmcia_ops m8xx_pcmcia_ops; - -#endif /* __CONFIG_8xx_DEFS */ diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index 0da1dbd42e0..9124b0ede1f 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -135,6 +135,7 @@ struct opal_sg_list { #define OPAL_FLASH_MANAGE 77 #define OPAL_FLASH_UPDATE 78 #define OPAL_RESYNC_TIMEBASE 79 +#define OPAL_CHECK_TOKEN 80 #define OPAL_DUMP_INIT 81 #define OPAL_DUMP_INFO 82 #define OPAL_DUMP_READ 83 @@ -146,7 +147,13 @@ struct opal_sg_list { #define OPAL_GET_PARAM 89 #define OPAL_SET_PARAM 90 #define OPAL_DUMP_RESEND 91 +#define OPAL_PCI_SET_PHB_CXL_MODE 93 #define OPAL_DUMP_INFO2 94 +#define OPAL_PCI_ERR_INJECT 96 +#define OPAL_PCI_EEH_FREEZE_SET 97 +#define OPAL_HANDLE_HMI 98 +#define OPAL_REGISTER_DUMP_REGION 101 +#define OPAL_UNREGISTER_DUMP_REGION 102 #ifndef __ASSEMBLY__ @@ -170,7 +177,11 @@ enum OpalFreezeState { enum OpalEehFreezeActionToken { OPAL_EEH_ACTION_CLEAR_FREEZE_MMIO = 1, OPAL_EEH_ACTION_CLEAR_FREEZE_DMA = 2, - OPAL_EEH_ACTION_CLEAR_FREEZE_ALL = 3 + OPAL_EEH_ACTION_CLEAR_FREEZE_ALL = 3, + + OPAL_EEH_ACTION_SET_FREEZE_MMIO = 1, + OPAL_EEH_ACTION_SET_FREEZE_DMA = 2, + OPAL_EEH_ACTION_SET_FREEZE_ALL = 3 }; enum OpalPciStatusToken { @@ -191,6 +202,35 @@ enum OpalPciErrorSeverity { OPAL_EEH_SEV_INF = 5 }; +enum OpalErrinjectType { + OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR = 0, + OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR64 = 1, +}; + +enum OpalErrinjectFunc { + /* IOA bus specific errors */ + OPAL_ERR_INJECT_FUNC_IOA_LD_MEM_ADDR = 0, + OPAL_ERR_INJECT_FUNC_IOA_LD_MEM_DATA = 1, + OPAL_ERR_INJECT_FUNC_IOA_LD_IO_ADDR = 2, + OPAL_ERR_INJECT_FUNC_IOA_LD_IO_DATA = 3, + OPAL_ERR_INJECT_FUNC_IOA_LD_CFG_ADDR = 4, + OPAL_ERR_INJECT_FUNC_IOA_LD_CFG_DATA = 5, + OPAL_ERR_INJECT_FUNC_IOA_ST_MEM_ADDR = 6, + OPAL_ERR_INJECT_FUNC_IOA_ST_MEM_DATA = 7, + OPAL_ERR_INJECT_FUNC_IOA_ST_IO_ADDR = 8, + OPAL_ERR_INJECT_FUNC_IOA_ST_IO_DATA = 9, + OPAL_ERR_INJECT_FUNC_IOA_ST_CFG_ADDR = 10, + OPAL_ERR_INJECT_FUNC_IOA_ST_CFG_DATA = 11, + OPAL_ERR_INJECT_FUNC_IOA_DMA_RD_ADDR = 12, + OPAL_ERR_INJECT_FUNC_IOA_DMA_RD_DATA = 13, + OPAL_ERR_INJECT_FUNC_IOA_DMA_RD_MASTER = 14, + OPAL_ERR_INJECT_FUNC_IOA_DMA_RD_TARGET = 15, + OPAL_ERR_INJECT_FUNC_IOA_DMA_WR_ADDR = 16, + OPAL_ERR_INJECT_FUNC_IOA_DMA_WR_DATA = 17, + OPAL_ERR_INJECT_FUNC_IOA_DMA_WR_MASTER = 18, + OPAL_ERR_INJECT_FUNC_IOA_DMA_WR_TARGET = 19, +}; + enum OpalShpcAction { OPAL_SHPC_GET_LINK_STATE = 0, OPAL_SHPC_GET_SLOT_STATE = 1 @@ -240,6 +280,7 @@ enum OpalMessageType { OPAL_MSG_MEM_ERR, OPAL_MSG_EPOW, OPAL_MSG_SHUTDOWN, + OPAL_MSG_HMI_EVT, OPAL_MSG_TYPE_MAX, }; @@ -340,10 +381,19 @@ enum OpalMveEnableAction { OPAL_ENABLE_MVE = 1 }; +enum OpalM64EnableAction { + OPAL_DISABLE_M64 = 0, + OPAL_ENABLE_M64_SPLIT = 1, + OPAL_ENABLE_M64_NON_SPLIT = 2 +}; + enum OpalPciResetScope { - OPAL_PHB_COMPLETE = 1, OPAL_PCI_LINK = 2, OPAL_PHB_ERROR = 3, - OPAL_PCI_HOT_RESET = 4, OPAL_PCI_FUNDAMENTAL_RESET = 5, - OPAL_PCI_IODA_TABLE_RESET = 6, + OPAL_RESET_PHB_COMPLETE = 1, + OPAL_RESET_PCI_LINK = 2, + OPAL_RESET_PHB_ERROR = 3, + OPAL_RESET_PCI_HOT = 4, + OPAL_RESET_PCI_FUNDAMENTAL = 5, + OPAL_RESET_PCI_IODA_TABLE = 6 }; enum OpalPciReinitScope { @@ -502,6 +552,50 @@ struct OpalMemoryErrorData { } u; }; +/* HMI interrupt event */ +enum OpalHMI_Version { + OpalHMIEvt_V1 = 1, +}; + +enum OpalHMI_Severity { + OpalHMI_SEV_NO_ERROR = 0, + OpalHMI_SEV_WARNING = 1, + OpalHMI_SEV_ERROR_SYNC = 2, + OpalHMI_SEV_FATAL = 3, +}; + +enum OpalHMI_Disposition { + OpalHMI_DISPOSITION_RECOVERED = 0, + OpalHMI_DISPOSITION_NOT_RECOVERED = 1, +}; + +enum OpalHMI_ErrType { + OpalHMI_ERROR_MALFUNC_ALERT = 0, + OpalHMI_ERROR_PROC_RECOV_DONE, + OpalHMI_ERROR_PROC_RECOV_DONE_AGAIN, + OpalHMI_ERROR_PROC_RECOV_MASKED, + OpalHMI_ERROR_TFAC, + OpalHMI_ERROR_TFMR_PARITY, + OpalHMI_ERROR_HA_OVERFLOW_WARN, + OpalHMI_ERROR_XSCOM_FAIL, + OpalHMI_ERROR_XSCOM_DONE, + OpalHMI_ERROR_SCOM_FIR, + OpalHMI_ERROR_DEBUG_TRIG_FIR, + OpalHMI_ERROR_HYP_RESOURCE, +}; + +struct OpalHMIEvent { + uint8_t version; /* 0x00 */ + uint8_t severity; /* 0x01 */ + uint8_t type; /* 0x02 */ + uint8_t disposition; /* 0x03 */ + uint8_t reserved_1[4]; /* 0x04 */ + + __be64 hmer; + /* TFMR register. Valid only for TFAC and TFMR_PARITY error type. */ + __be64 tfmr; +}; + enum { OPAL_P7IOC_DIAG_TYPE_NONE = 0, OPAL_P7IOC_DIAG_TYPE_RGC = 1, @@ -513,40 +607,40 @@ enum { }; struct OpalIoP7IOCErrorData { - uint16_t type; + __be16 type; /* GEM */ - uint64_t gemXfir; - uint64_t gemRfir; - uint64_t gemRirqfir; - uint64_t gemMask; - uint64_t gemRwof; + __be64 gemXfir; + __be64 gemRfir; + __be64 gemRirqfir; + __be64 gemMask; + __be64 gemRwof; /* LEM */ - uint64_t lemFir; - uint64_t lemErrMask; - uint64_t lemAction0; - uint64_t lemAction1; - uint64_t lemWof; + __be64 lemFir; + __be64 lemErrMask; + __be64 lemAction0; + __be64 lemAction1; + __be64 lemWof; union { struct OpalIoP7IOCRgcErrorData { - uint64_t rgcStatus; /* 3E1C10 */ - uint64_t rgcLdcp; /* 3E1C18 */ + __be64 rgcStatus; /* 3E1C10 */ + __be64 rgcLdcp; /* 3E1C18 */ }rgc; struct OpalIoP7IOCBiErrorData { - uint64_t biLdcp0; /* 3C0100, 3C0118 */ - uint64_t biLdcp1; /* 3C0108, 3C0120 */ - uint64_t biLdcp2; /* 3C0110, 3C0128 */ - uint64_t biFenceStatus; /* 3C0130, 3C0130 */ + __be64 biLdcp0; /* 3C0100, 3C0118 */ + __be64 biLdcp1; /* 3C0108, 3C0120 */ + __be64 biLdcp2; /* 3C0110, 3C0128 */ + __be64 biFenceStatus; /* 3C0130, 3C0130 */ - uint8_t biDownbound; /* BI Downbound or Upbound */ + u8 biDownbound; /* BI Downbound or Upbound */ }bi; struct OpalIoP7IOCCiErrorData { - uint64_t ciPortStatus; /* 3Dn008 */ - uint64_t ciPortLdcp; /* 3Dn010 */ + __be64 ciPortStatus; /* 3Dn008 */ + __be64 ciPortLdcp; /* 3Dn010 */ - uint8_t ciPort; /* Index of CI port: 0/1 */ + u8 ciPort; /* Index of CI port: 0/1 */ }ci; }; }; @@ -578,60 +672,60 @@ struct OpalIoPhbErrorCommon { struct OpalIoP7IOCPhbErrorData { struct OpalIoPhbErrorCommon common; - uint32_t brdgCtl; + __be32 brdgCtl; // P7IOC utl regs - uint32_t portStatusReg; - uint32_t rootCmplxStatus; - uint32_t busAgentStatus; + __be32 portStatusReg; + __be32 rootCmplxStatus; + __be32 busAgentStatus; // P7IOC cfg regs - uint32_t deviceStatus; - uint32_t slotStatus; - uint32_t linkStatus; - uint32_t devCmdStatus; - uint32_t devSecStatus; + __be32 deviceStatus; + __be32 slotStatus; + __be32 linkStatus; + __be32 devCmdStatus; + __be32 devSecStatus; // cfg AER regs - uint32_t rootErrorStatus; - uint32_t uncorrErrorStatus; - uint32_t corrErrorStatus; - uint32_t tlpHdr1; - uint32_t tlpHdr2; - uint32_t tlpHdr3; - uint32_t tlpHdr4; - uint32_t sourceId; + __be32 rootErrorStatus; + __be32 uncorrErrorStatus; + __be32 corrErrorStatus; + __be32 tlpHdr1; + __be32 tlpHdr2; + __be32 tlpHdr3; + __be32 tlpHdr4; + __be32 sourceId; - uint32_t rsv3; + __be32 rsv3; // Record data about the call to allocate a buffer. - uint64_t errorClass; - uint64_t correlator; + __be64 errorClass; + __be64 correlator; //P7IOC MMIO Error Regs - uint64_t p7iocPlssr; // n120 - uint64_t p7iocCsr; // n110 - uint64_t lemFir; // nC00 - uint64_t lemErrorMask; // nC18 - uint64_t lemWOF; // nC40 - uint64_t phbErrorStatus; // nC80 - uint64_t phbFirstErrorStatus; // nC88 - uint64_t phbErrorLog0; // nCC0 - uint64_t phbErrorLog1; // nCC8 - uint64_t mmioErrorStatus; // nD00 - uint64_t mmioFirstErrorStatus; // nD08 - uint64_t mmioErrorLog0; // nD40 - uint64_t mmioErrorLog1; // nD48 - uint64_t dma0ErrorStatus; // nD80 - uint64_t dma0FirstErrorStatus; // nD88 - uint64_t dma0ErrorLog0; // nDC0 - uint64_t dma0ErrorLog1; // nDC8 - uint64_t dma1ErrorStatus; // nE00 - uint64_t dma1FirstErrorStatus; // nE08 - uint64_t dma1ErrorLog0; // nE40 - uint64_t dma1ErrorLog1; // nE48 - uint64_t pestA[OPAL_P7IOC_NUM_PEST_REGS]; - uint64_t pestB[OPAL_P7IOC_NUM_PEST_REGS]; + __be64 p7iocPlssr; // n120 + __be64 p7iocCsr; // n110 + __be64 lemFir; // nC00 + __be64 lemErrorMask; // nC18 + __be64 lemWOF; // nC40 + __be64 phbErrorStatus; // nC80 + __be64 phbFirstErrorStatus; // nC88 + __be64 phbErrorLog0; // nCC0 + __be64 phbErrorLog1; // nCC8 + __be64 mmioErrorStatus; // nD00 + __be64 mmioFirstErrorStatus; // nD08 + __be64 mmioErrorLog0; // nD40 + __be64 mmioErrorLog1; // nD48 + __be64 dma0ErrorStatus; // nD80 + __be64 dma0FirstErrorStatus; // nD88 + __be64 dma0ErrorLog0; // nDC0 + __be64 dma0ErrorLog1; // nDC8 + __be64 dma1ErrorStatus; // nE00 + __be64 dma1FirstErrorStatus; // nE08 + __be64 dma1ErrorLog0; // nE40 + __be64 dma1ErrorLog1; // nE48 + __be64 pestA[OPAL_P7IOC_NUM_PEST_REGS]; + __be64 pestB[OPAL_P7IOC_NUM_PEST_REGS]; }; struct OpalIoPhb3ErrorData { @@ -758,6 +852,10 @@ int64_t opal_pci_eeh_freeze_status(uint64_t phb_id, uint64_t pe_number, __be64 *phb_status); int64_t opal_pci_eeh_freeze_clear(uint64_t phb_id, uint64_t pe_number, uint64_t eeh_action_token); +int64_t opal_pci_eeh_freeze_set(uint64_t phb_id, uint64_t pe_number, + uint64_t eeh_action_token); +int64_t opal_pci_err_inject(uint64_t phb_id, uint32_t pe_no, uint32_t type, + uint32_t func, uint64_t addr, uint64_t mask); int64_t opal_pci_shpc(uint64_t phb_id, uint64_t shpc_action, uint8_t *state); @@ -768,7 +866,7 @@ int64_t opal_pci_set_phb_mem_window(uint64_t phb_id, uint16_t window_type, uint16_t window_num, uint64_t starting_real_address, uint64_t starting_pci_address, - uint16_t segment_size); + uint64_t size); int64_t opal_pci_map_pe_mmio_window(uint64_t phb_id, uint16_t pe_number, uint16_t window_type, uint16_t window_num, uint16_t segment_num); @@ -826,6 +924,7 @@ int64_t opal_pci_next_error(uint64_t phb_id, __be64 *first_frozen_pe, __be16 *pci_error_type, __be16 *severity); int64_t opal_pci_poll(uint64_t phb_id); int64_t opal_return_cpu(void); +int64_t opal_check_token(uint64_t token); int64_t opal_reinit_cpus(uint64_t flags); int64_t opal_xscom_read(uint32_t gcid, uint64_t pcb_addr, __be64 *val); @@ -860,6 +959,10 @@ int64_t opal_get_param(uint64_t token, uint32_t param_id, uint64_t buffer, int64_t opal_set_param(uint64_t token, uint32_t param_id, uint64_t buffer, uint64_t length); int64_t opal_sensor_read(uint32_t sensor_hndl, int token, __be32 *sensor_data); +int64_t opal_handle_hmi(void); +int64_t opal_register_dump_region(uint32_t id, uint64_t start, uint64_t end); +int64_t opal_unregister_dump_region(uint32_t id); +int64_t opal_pci_set_phb_cxl_mode(uint64_t phb_id, uint64_t mode, uint64_t pe_number); /* Internal functions */ extern int early_init_dt_scan_opal(unsigned long node, const char *uname, @@ -902,6 +1005,8 @@ extern void opal_msglog_init(void); extern int opal_machine_check(struct pt_regs *regs); extern bool opal_mce_check_early_recovery(struct pt_regs *regs); +extern int opal_hmi_exception_early(struct pt_regs *regs); +extern int opal_handle_hmi_exception(struct pt_regs *regs); extern void opal_shutdown(void); extern int opal_resync_timebase(void); @@ -912,6 +1017,13 @@ struct opal_sg_list *opal_vmalloc_to_sg_list(void *vmalloc_addr, unsigned long vmalloc_size); void opal_free_sg_list(struct opal_sg_list *sg); +/* + * Dump region ID range usable by the OS + */ +#define OPAL_DUMP_REGION_HOST_START 0x80 +#define OPAL_DUMP_REGION_LOG_BUF 0x80 +#define OPAL_DUMP_REGION_HOST_END 0xFF + #endif /* __ASSEMBLY__ */ #endif /* __OPAL_H */ diff --git a/arch/powerpc/include/asm/oprofile_impl.h b/arch/powerpc/include/asm/oprofile_impl.h index d697b08994c..61fe5d6f18e 100644 --- a/arch/powerpc/include/asm/oprofile_impl.h +++ b/arch/powerpc/include/asm/oprofile_impl.h @@ -61,7 +61,6 @@ struct op_powerpc_model { }; extern struct op_powerpc_model op_model_fsl_emb; -extern struct op_powerpc_model op_model_rs64; extern struct op_powerpc_model op_model_power4; extern struct op_powerpc_model op_model_7450; extern struct op_powerpc_model op_model_cell; diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index bb0bd25f20d..a5139ea6910 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h @@ -78,10 +78,6 @@ struct paca_struct { u64 kernel_toc; /* Kernel TOC address */ u64 kernelbase; /* Base address of kernel */ u64 kernel_msr; /* MSR while running in kernel */ -#ifdef CONFIG_PPC_STD_MMU_64 - u64 stab_real; /* Absolute address of segment table */ - u64 stab_addr; /* Virtual address of segment table */ -#endif /* CONFIG_PPC_STD_MMU_64 */ void *emergency_sp; /* pointer to emergency stack */ u64 data_offset; /* per cpu data offset */ s16 hw_cpu_id; /* Physical processor number */ @@ -171,6 +167,7 @@ struct paca_struct { * and already using emergency stack. */ u16 in_mce; + u8 hmi_event_available; /* HMI event is available */ #endif /* Stuff for accurate time accounting */ diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h index 32e4e212b9c..26fe1ae1521 100644 --- a/arch/powerpc/include/asm/page.h +++ b/arch/powerpc/include/asm/page.h @@ -48,9 +48,6 @@ extern unsigned int HPAGE_SHIFT; #define HUGE_MAX_HSTATE (MMU_PAGE_COUNT-1) #endif -/* We do define AT_SYSINFO_EHDR but don't use the gate mechanism */ -#define __HAVE_ARCH_GATE_AREA 1 - /* * Subtle: (1 << PAGE_SHIFT) is an int, not an unsigned long. So if we * assign PAGE_MASK to a larger type it gets extended the way we want diff --git a/arch/powerpc/include/asm/page_64.h b/arch/powerpc/include/asm/page_64.h index 88693cef4f3..d908a46d05c 100644 --- a/arch/powerpc/include/asm/page_64.h +++ b/arch/powerpc/include/asm/page_64.h @@ -42,20 +42,40 @@ typedef unsigned long pte_basic_t; -static __inline__ void clear_page(void *addr) +static inline void clear_page(void *addr) { - unsigned long lines, line_size; - - line_size = ppc64_caches.dline_size; - lines = ppc64_caches.dlines_per_page; - - __asm__ __volatile__( + unsigned long iterations; + unsigned long onex, twox, fourx, eightx; + + iterations = ppc64_caches.dlines_per_page / 8; + + /* + * Some verisions of gcc use multiply instructions to + * calculate the offsets so lets give it a hand to + * do better. + */ + onex = ppc64_caches.dline_size; + twox = onex << 1; + fourx = onex << 2; + eightx = onex << 3; + + asm volatile( "mtctr %1 # clear_page\n\ -1: dcbz 0,%0\n\ - add %0,%0,%3\n\ + .balign 16\n\ +1: dcbz 0,%0\n\ + dcbz %3,%0\n\ + dcbz %4,%0\n\ + dcbz %5,%0\n\ + dcbz %6,%0\n\ + dcbz %7,%0\n\ + dcbz %8,%0\n\ + dcbz %9,%0\n\ + add %0,%0,%10\n\ bdnz+ 1b" - : "=r" (addr) - : "r" (lines), "0" (addr), "r" (line_size) + : "=&r" (addr) + : "r" (iterations), "0" (addr), "b" (onex), "b" (twox), + "b" (twox+onex), "b" (fourx), "b" (fourx+onex), + "b" (twox+fourx), "b" (eightx-onex), "r" (eightx) : "ctr", "memory"); } @@ -104,7 +124,6 @@ extern unsigned long slice_get_unmapped_area(unsigned long addr, extern unsigned int get_slice_psize(struct mm_struct *mm, unsigned long addr); -extern void slice_init_context(struct mm_struct *mm, unsigned int psize); extern void slice_set_user_psize(struct mm_struct *mm, unsigned int psize); extern void slice_set_range_psize(struct mm_struct *mm, unsigned long start, unsigned long len, unsigned int psize); diff --git a/arch/powerpc/include/asm/perf_event_server.h b/arch/powerpc/include/asm/perf_event_server.h index b3e936027b2..814622146d5 100644 --- a/arch/powerpc/include/asm/perf_event_server.h +++ b/arch/powerpc/include/asm/perf_event_server.h @@ -19,6 +19,8 @@ #define MAX_EVENT_ALTERNATIVES 8 #define MAX_LIMITED_HWCOUNTERS 2 +struct perf_event; + /* * This struct provides the constants and functions needed to * describe the PMU on a particular POWER-family CPU. @@ -30,7 +32,8 @@ struct power_pmu { unsigned long add_fields; unsigned long test_adder; int (*compute_mmcr)(u64 events[], int n_ev, - unsigned int hwc[], unsigned long mmcr[]); + unsigned int hwc[], unsigned long mmcr[], + struct perf_event *pevents[]); int (*get_constraint)(u64 event_id, unsigned long *mskp, unsigned long *valp); int (*get_alternatives)(u64 event_id, unsigned int flags, diff --git a/arch/powerpc/include/asm/pgtable-ppc32.h b/arch/powerpc/include/asm/pgtable-ppc32.h index 47edde8c355..945e47adf7d 100644 --- a/arch/powerpc/include/asm/pgtable-ppc32.h +++ b/arch/powerpc/include/asm/pgtable-ppc32.h @@ -8,8 +8,6 @@ #include <linux/threads.h> #include <asm/io.h> /* For sub-arch specific PPC_PIN_SIZE */ -extern unsigned long va_to_phys(unsigned long address); -extern pte_t *va_to_pte(unsigned long address); extern unsigned long ioremap_bot; #ifdef CONFIG_44x @@ -50,10 +48,10 @@ extern int icache_44x_need_flush; #define FIRST_USER_ADDRESS 0 #define pte_ERROR(e) \ - printk("%s:%d: bad pte %llx.\n", __FILE__, __LINE__, \ + pr_err("%s:%d: bad pte %llx.\n", __FILE__, __LINE__, \ (unsigned long long)pte_val(e)) #define pgd_ERROR(e) \ - printk("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e)) + pr_err("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e)) /* * This is the bottom of the PKMAP area with HIGHMEM or an arbitrary diff --git a/arch/powerpc/include/asm/pgtable-ppc64-4k.h b/arch/powerpc/include/asm/pgtable-ppc64-4k.h index 12798c9d4b4..7b935683f26 100644 --- a/arch/powerpc/include/asm/pgtable-ppc64-4k.h +++ b/arch/powerpc/include/asm/pgtable-ppc64-4k.h @@ -64,7 +64,7 @@ (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))) #define pud_ERROR(e) \ - printk("%s:%d: bad pud %08lx.\n", __FILE__, __LINE__, pud_val(e)) + pr_err("%s:%d: bad pud %08lx.\n", __FILE__, __LINE__, pud_val(e)) /* * On all 4K setups, remap_4k_pfn() equates to remap_pfn_range() */ diff --git a/arch/powerpc/include/asm/pgtable-ppc64.h b/arch/powerpc/include/asm/pgtable-ppc64.h index eb9261024f5..ae153c40ab7 100644 --- a/arch/powerpc/include/asm/pgtable-ppc64.h +++ b/arch/powerpc/include/asm/pgtable-ppc64.h @@ -328,11 +328,11 @@ static inline void __ptep_set_access_flags(pte_t *ptep, pte_t entry) #define pte_same(A,B) (((pte_val(A) ^ pte_val(B)) & ~_PAGE_HPTEFLAGS) == 0) #define pte_ERROR(e) \ - printk("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e)) + pr_err("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e)) #define pmd_ERROR(e) \ - printk("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, pmd_val(e)) + pr_err("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, pmd_val(e)) #define pgd_ERROR(e) \ - printk("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e)) + pr_err("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e)) /* Encode and de-code a swap entry */ #define __swp_type(entry) (((entry).val >> 1) & 0x3f) @@ -413,7 +413,7 @@ static inline char *get_hpte_slot_array(pmd_t *pmdp) } extern void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr, - pmd_t *pmdp); + pmd_t *pmdp, unsigned long old_pmd); #ifdef CONFIG_TRANSPARENT_HUGEPAGE extern pmd_t pfn_pmd(unsigned long pfn, pgprot_t pgprot); extern pmd_t mk_pmd(struct page *page, pgprot_t pgprot); diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h index d98c1ecc326..316f9a5da17 100644 --- a/arch/powerpc/include/asm/pgtable.h +++ b/arch/powerpc/include/asm/pgtable.h @@ -4,6 +4,7 @@ #ifndef __ASSEMBLY__ #include <linux/mmdebug.h> +#include <linux/mmzone.h> #include <asm/processor.h> /* For TASK_SIZE */ #include <asm/mmu.h> #include <asm/page.h> @@ -38,10 +39,9 @@ static inline int pte_none(pte_t pte) { return (pte_val(pte) & ~_PTE_NONE_MASK) static inline pgprot_t pte_pgprot(pte_t pte) { return __pgprot(pte_val(pte) & PAGE_PROT_BITS); } #ifdef CONFIG_NUMA_BALANCING - static inline int pte_present(pte_t pte) { - return pte_val(pte) & (_PAGE_PRESENT | _PAGE_NUMA); + return pte_val(pte) & _PAGE_NUMA_MASK; } #define pte_present_nonuma pte_present_nonuma @@ -50,37 +50,6 @@ static inline int pte_present_nonuma(pte_t pte) return pte_val(pte) & (_PAGE_PRESENT); } -#define pte_numa pte_numa -static inline int pte_numa(pte_t pte) -{ - return (pte_val(pte) & - (_PAGE_NUMA|_PAGE_PRESENT)) == _PAGE_NUMA; -} - -#define pte_mknonnuma pte_mknonnuma -static inline pte_t pte_mknonnuma(pte_t pte) -{ - pte_val(pte) &= ~_PAGE_NUMA; - pte_val(pte) |= _PAGE_PRESENT | _PAGE_ACCESSED; - return pte; -} - -#define pte_mknuma pte_mknuma -static inline pte_t pte_mknuma(pte_t pte) -{ - /* - * We should not set _PAGE_NUMA on non present ptes. Also clear the - * present bit so that hash_page will return 1 and we collect this - * as numa fault. - */ - if (pte_present(pte)) { - pte_val(pte) |= _PAGE_NUMA; - pte_val(pte) &= ~_PAGE_PRESENT; - } else - VM_BUG_ON(1); - return pte; -} - #define ptep_set_numa ptep_set_numa static inline void ptep_set_numa(struct mm_struct *mm, unsigned long addr, pte_t *ptep) @@ -92,12 +61,6 @@ static inline void ptep_set_numa(struct mm_struct *mm, unsigned long addr, return; } -#define pmd_numa pmd_numa -static inline int pmd_numa(pmd_t pmd) -{ - return pte_numa(pmd_pte(pmd)); -} - #define pmdp_set_numa pmdp_set_numa static inline void pmdp_set_numa(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp) @@ -109,16 +72,21 @@ static inline void pmdp_set_numa(struct mm_struct *mm, unsigned long addr, return; } -#define pmd_mknonnuma pmd_mknonnuma -static inline pmd_t pmd_mknonnuma(pmd_t pmd) +/* + * Generic NUMA pte helpers expect pteval_t and pmdval_t types to exist + * which was inherited from x86. For the purposes of powerpc pte_basic_t and + * pmd_t are equivalent + */ +#define pteval_t pte_basic_t +#define pmdval_t pmd_t +static inline pteval_t ptenuma_flags(pte_t pte) { - return pte_pmd(pte_mknonnuma(pmd_pte(pmd))); + return pte_val(pte) & _PAGE_NUMA_MASK; } -#define pmd_mknuma pmd_mknuma -static inline pmd_t pmd_mknuma(pmd_t pmd) +static inline pmdval_t pmdnuma_flags(pmd_t pmd) { - return pte_pmd(pte_mknuma(pmd_pte(pmd))); + return pmd_val(pmd) & _PAGE_NUMA_MASK; } # else @@ -281,6 +249,8 @@ extern unsigned long empty_zero_page[]; extern pgd_t swapper_pg_dir[]; +void limit_zone_pfn(enum zone_type zone, unsigned long max_pfn); +int dma_pfn_limit_to_zone(u64 pfn_limit); extern void paging_init(void); /* diff --git a/arch/powerpc/include/asm/plpar_wrappers.h b/arch/powerpc/include/asm/plpar_wrappers.h index 12c32c5f533..67859edbf8f 100644 --- a/arch/powerpc/include/asm/plpar_wrappers.h +++ b/arch/powerpc/include/asm/plpar_wrappers.h @@ -273,7 +273,7 @@ static inline long plpar_set_mode(unsigned long mflags, unsigned long resource, static inline long enable_reloc_on_exceptions(void) { /* mflags = 3: Exceptions at 0xC000000000004000 */ - return plpar_set_mode(3, 3, 0, 0); + return plpar_set_mode(3, H_SET_MODE_RESOURCE_ADDR_TRANS_MODE, 0, 0); } /* @@ -284,7 +284,7 @@ static inline long enable_reloc_on_exceptions(void) * returns H_SUCCESS. */ static inline long disable_reloc_on_exceptions(void) { - return plpar_set_mode(0, 3, 0, 0); + return plpar_set_mode(0, H_SET_MODE_RESOURCE_ADDR_TRANS_MODE, 0, 0); } /* @@ -297,7 +297,7 @@ static inline long disable_reloc_on_exceptions(void) { static inline long enable_big_endian_exceptions(void) { /* mflags = 0: big endian exceptions */ - return plpar_set_mode(0, 4, 0, 0); + return plpar_set_mode(0, H_SET_MODE_RESOURCE_LE, 0, 0); } /* @@ -310,17 +310,17 @@ static inline long enable_big_endian_exceptions(void) static inline long enable_little_endian_exceptions(void) { /* mflags = 1: little endian exceptions */ - return plpar_set_mode(1, 4, 0, 0); + return plpar_set_mode(1, H_SET_MODE_RESOURCE_LE, 0, 0); } static inline long plapr_set_ciabr(unsigned long ciabr) { - return plpar_set_mode(0, 1, ciabr, 0); + return plpar_set_mode(0, H_SET_MODE_RESOURCE_SET_CIABR, ciabr, 0); } static inline long plapr_set_watchpoint0(unsigned long dawr0, unsigned long dawrx0) { - return plpar_set_mode(0, 2, dawr0, dawrx0); + return plpar_set_mode(0, H_SET_MODE_RESOURCE_SET_DAWR, dawr0, dawrx0); } #endif /* _ASM_POWERPC_PLPAR_WRAPPERS_H */ diff --git a/arch/powerpc/include/asm/pnv-pci.h b/arch/powerpc/include/asm/pnv-pci.h new file mode 100644 index 00000000000..f09a22fa1bd --- /dev/null +++ b/arch/powerpc/include/asm/pnv-pci.h @@ -0,0 +1,31 @@ +/* + * Copyright 2014 IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _ASM_PNV_PCI_H +#define _ASM_PNV_PCI_H + +#include <linux/pci.h> +#include <misc/cxl.h> + +int pnv_phb_to_cxl(struct pci_dev *dev); +int pnv_cxl_ioda_msi_setup(struct pci_dev *dev, unsigned int hwirq, + unsigned int virq); +int pnv_cxl_alloc_hwirqs(struct pci_dev *dev, int num); +void pnv_cxl_release_hwirqs(struct pci_dev *dev, int hwirq, int num); +int pnv_cxl_get_irq_count(struct pci_dev *dev); +struct device_node *pnv_pci_to_phb_node(struct pci_dev *dev); + +#ifdef CONFIG_CXL_BASE +int pnv_cxl_alloc_hwirq_ranges(struct cxl_irq_ranges *irqs, + struct pci_dev *dev, int num); +void pnv_cxl_release_hwirq_ranges(struct cxl_irq_ranges *irqs, + struct pci_dev *dev); +#endif + +#endif diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index 3132bb9365f..6f853620804 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -139,6 +139,7 @@ #define PPC_INST_ISEL 0x7c00001e #define PPC_INST_ISEL_MASK 0xfc00003e #define PPC_INST_LDARX 0x7c0000a8 +#define PPC_INST_LOGMPP 0x7c0007e4 #define PPC_INST_LSWI 0x7c0004aa #define PPC_INST_LSWX 0x7c00042a #define PPC_INST_LWARX 0x7c000028 @@ -150,8 +151,10 @@ #define PPC_INST_MCRXR_MASK 0xfc0007fe #define PPC_INST_MFSPR_PVR 0x7c1f42a6 #define PPC_INST_MFSPR_PVR_MASK 0xfc1fffff +#define PPC_INST_MFTMR 0x7c0002dc #define PPC_INST_MSGSND 0x7c00019c #define PPC_INST_MSGSNDP 0x7c00011c +#define PPC_INST_MTTMR 0x7c0003dc #define PPC_INST_NOP 0x60000000 #define PPC_INST_POPCNTB 0x7c0000f4 #define PPC_INST_POPCNTB_MASK 0xfc0007fe @@ -275,6 +278,20 @@ #define __PPC_EH(eh) 0 #endif +/* POWER8 Micro Partition Prefetch (MPP) parameters */ +/* Address mask is common for LOGMPP instruction and MPPR SPR */ +#define PPC_MPPE_ADDRESS_MASK 0xffffffffc000 + +/* Bits 60 and 61 of MPP SPR should be set to one of the following */ +/* Aborting the fetch is indeed setting 00 in the table size bits */ +#define PPC_MPPR_FETCH_ABORT (0x0ULL << 60) +#define PPC_MPPR_FETCH_WHOLE_TABLE (0x2ULL << 60) + +/* Bits 54 and 55 of register for LOGMPP instruction should be set to: */ +#define PPC_LOGMPP_LOG_L2 (0x02ULL << 54) +#define PPC_LOGMPP_LOG_L2L3 (0x01ULL << 54) +#define PPC_LOGMPP_LOG_ABORT (0x03ULL << 54) + /* Deal with instructions that older assemblers aren't aware of */ #define PPC_DCBAL(a, b) stringify_in_c(.long PPC_INST_DCBAL | \ __PPC_RA(a) | __PPC_RB(b)) @@ -283,6 +300,8 @@ #define PPC_LDARX(t, a, b, eh) stringify_in_c(.long PPC_INST_LDARX | \ ___PPC_RT(t) | ___PPC_RA(a) | \ ___PPC_RB(b) | __PPC_EH(eh)) +#define PPC_LOGMPP(b) stringify_in_c(.long PPC_INST_LOGMPP | \ + __PPC_RB(b)) #define PPC_LWARX(t, a, b, eh) stringify_in_c(.long PPC_INST_LWARX | \ ___PPC_RT(t) | ___PPC_RA(a) | \ ___PPC_RB(b) | __PPC_EH(eh)) @@ -369,4 +388,11 @@ #define TABORT(r) stringify_in_c(.long PPC_INST_TABORT \ | __PPC_RA(r)) +/* book3e thread control instructions */ +#define TMRN(x) ((((x) & 0x1f) << 16) | (((x) & 0x3e0) << 6)) +#define MTTMR(tmr, r) stringify_in_c(.long PPC_INST_MTTMR | \ + TMRN(tmr) | ___PPC_RS(r)) +#define MFTMR(tmr, r) stringify_in_c(.long PPC_INST_MFTMR | \ + TMRN(tmr) | ___PPC_RT(r)) + #endif /* _ASM_POWERPC_PPC_OPCODE_H */ diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h index 74b79f07f04..7f436ba1b56 100644 --- a/arch/powerpc/include/asm/prom.h +++ b/arch/powerpc/include/asm/prom.h @@ -76,8 +76,6 @@ void of_parse_dma_window(struct device_node *dn, const __be32 *dma_window, unsigned long *busno, unsigned long *phys, unsigned long *size); -extern void kdump_move_device_tree(void); - extern void of_instantiate_rtc(void); extern int of_get_ibm_chip_id(struct device_node *np); diff --git a/arch/powerpc/include/asm/pte-common.h b/arch/powerpc/include/asm/pte-common.h index 8d1569c2904..e040c359512 100644 --- a/arch/powerpc/include/asm/pte-common.h +++ b/arch/powerpc/include/asm/pte-common.h @@ -98,6 +98,11 @@ extern unsigned long bad_call_to_PMD_PAGE_SIZE(void); _PAGE_USER | _PAGE_ACCESSED | \ _PAGE_RW | _PAGE_HWWRITE | _PAGE_DIRTY | _PAGE_EXEC) +#ifdef CONFIG_NUMA_BALANCING +/* Mask of bits that distinguish present and numa ptes */ +#define _PAGE_NUMA_MASK (_PAGE_NUMA|_PAGE_PRESENT) +#endif + /* * We define 2 sets of base prot bits, one for basic pages (ie, * cacheable kernel and user pages) and one for non cacheable diff --git a/arch/powerpc/include/asm/pte-fsl-booke.h b/arch/powerpc/include/asm/pte-fsl-booke.h index 2c12be5f677..e84dd7ed505 100644 --- a/arch/powerpc/include/asm/pte-fsl-booke.h +++ b/arch/powerpc/include/asm/pte-fsl-booke.h @@ -37,5 +37,7 @@ #define _PMD_PRESENT_MASK (PAGE_MASK) #define _PMD_BAD (~PAGE_MASK) +#define PTE_WIMGE_SHIFT (6) + #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_PTE_FSL_BOOKE_H */ diff --git a/arch/powerpc/include/asm/pte-hash64-64k.h b/arch/powerpc/include/asm/pte-hash64-64k.h index d836d945068..4f4ec2ab45c 100644 --- a/arch/powerpc/include/asm/pte-hash64-64k.h +++ b/arch/powerpc/include/asm/pte-hash64-64k.h @@ -46,11 +46,31 @@ * in order to deal with 64K made of 4K HW pages. Thus we override the * generic accessors and iterators here */ -#define __real_pte(e,p) ((real_pte_t) { \ - (e), (pte_val(e) & _PAGE_COMBO) ? \ - (pte_val(*((p) + PTRS_PER_PTE))) : 0 }) -#define __rpte_to_hidx(r,index) ((pte_val((r).pte) & _PAGE_COMBO) ? \ - (((r).hidx >> ((index)<<2)) & 0xf) : ((pte_val((r).pte) >> 12) & 0xf)) +#define __real_pte __real_pte +static inline real_pte_t __real_pte(pte_t pte, pte_t *ptep) +{ + real_pte_t rpte; + + rpte.pte = pte; + rpte.hidx = 0; + if (pte_val(pte) & _PAGE_COMBO) { + /* + * Make sure we order the hidx load against the _PAGE_COMBO + * check. The store side ordering is done in __hash_page_4K + */ + smp_rmb(); + rpte.hidx = pte_val(*((ptep) + PTRS_PER_PTE)); + } + return rpte; +} + +static inline unsigned long __rpte_to_hidx(real_pte_t rpte, unsigned long index) +{ + if ((pte_val(rpte.pte) & _PAGE_COMBO)) + return (rpte.hidx >> (index<<2)) & 0xf; + return (pte_val(rpte.pte) >> 12) & 0xf; +} + #define __rpte_to_pte(r) ((r).pte) #define __rpte_sub_valid(rpte, index) \ (pte_val(rpte.pte) & (_PAGE_HPTE_SUB0 >> (index))) @@ -75,7 +95,8 @@ (((pte) & _PAGE_COMBO)? MMU_PAGE_4K: MMU_PAGE_64K) #define remap_4k_pfn(vma, addr, pfn, prot) \ - remap_pfn_range((vma), (addr), (pfn), PAGE_SIZE, \ - __pgprot(pgprot_val((prot)) | _PAGE_4K_PFN)) + (WARN_ON(((pfn) >= (1UL << (64 - PTE_RPN_SHIFT)))) ? -EINVAL : \ + remap_pfn_range((vma), (addr), (pfn), PAGE_SIZE, \ + __pgprot(pgprot_val((prot)) | _PAGE_4K_PFN))) #endif /* __ASSEMBLY__ */ diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h index 279b80f3bb2..c0c61fa9cd9 100644 --- a/arch/powerpc/include/asm/ptrace.h +++ b/arch/powerpc/include/asm/ptrace.h @@ -47,6 +47,12 @@ STACK_FRAME_OVERHEAD + KERNEL_REDZONE_SIZE) #define STACK_FRAME_MARKER 12 +#if defined(_CALL_ELF) && _CALL_ELF == 2 +#define STACK_FRAME_MIN_SIZE 32 +#else +#define STACK_FRAME_MIN_SIZE STACK_FRAME_OVERHEAD +#endif + /* Size of dummy stack frame allocated when calling signal handler. */ #define __SIGNAL_FRAMESIZE 128 #define __SIGNAL_FRAMESIZE32 64 @@ -60,6 +66,7 @@ #define STACK_FRAME_REGS_MARKER ASM_CONST(0x72656773) #define STACK_INT_FRAME_SIZE (sizeof(struct pt_regs) + STACK_FRAME_OVERHEAD) #define STACK_FRAME_MARKER 2 +#define STACK_FRAME_MIN_SIZE STACK_FRAME_OVERHEAD /* Size of stack frame allocated when calling signal handler. */ #define __SIGNAL_FRAMESIZE 64 diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index bffd89d2730..fe3f9488f32 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -213,9 +213,8 @@ #define SPRN_ACOP 0x1F /* Available Coprocessor Register */ #define SPRN_TFIAR 0x81 /* Transaction Failure Inst Addr */ #define SPRN_TEXASR 0x82 /* Transaction EXception & Summary */ -#define TEXASR_FS __MASK(63-36) /* Transaction Failure Summary */ #define SPRN_TEXASRU 0x83 /* '' '' '' Upper 32 */ -#define TEXASR_FS __MASK(63-36) /* TEXASR Failure Summary */ +#define TEXASR_FS __MASK(63-36) /* TEXASR Failure Summary */ #define SPRN_TFHAR 0x80 /* Transaction Failure Handler Addr */ #define SPRN_CTRLF 0x088 #define SPRN_CTRLT 0x098 @@ -225,6 +224,7 @@ #define CTRL_TE 0x00c00000 /* thread enable */ #define CTRL_RUNLATCH 0x1 #define SPRN_DAWR 0xB4 +#define SPRN_MPPR 0xB8 /* Micro Partition Prefetch Register */ #define SPRN_RPR 0xBA /* Relative Priority Register */ #define SPRN_CIABR 0xBB #define CIABR_PRIV 0x3 @@ -254,7 +254,7 @@ #define DSISR_PROTFAULT 0x08000000 /* protection fault */ #define DSISR_ISSTORE 0x02000000 /* access was a store */ #define DSISR_DABRMATCH 0x00400000 /* hit data breakpoint */ -#define DSISR_NOSEGMENT 0x00200000 /* STAB/SLB miss */ +#define DSISR_NOSEGMENT 0x00200000 /* SLB miss */ #define DSISR_KEYFAULT 0x00200000 /* Key fault */ #define SPRN_TBRL 0x10C /* Time Base Read Lower Register (user, R/O) */ #define SPRN_TBRU 0x10D /* Time Base Read Upper Register (user, R/O) */ @@ -944,13 +944,10 @@ * readable variant for reads, which can avoid a fault * with KVM type virtualization. * - * (*) Under KVM, the host SPRG1 is used to point to - * the current VCPU data structure - * * 32-bit 8xx: * - SPRG0 scratch for exception vectors * - SPRG1 scratch for exception vectors - * - SPRG2 apparently unused but initialized + * - SPRG2 scratch for exception vectors * */ #ifdef CONFIG_PPC64 @@ -1060,6 +1057,7 @@ #ifdef CONFIG_8xx #define SPRN_SPRG_SCRATCH0 SPRN_SPRG0 #define SPRN_SPRG_SCRATCH1 SPRN_SPRG1 +#define SPRN_SPRG_SCRATCH2 SPRN_SPRG2 #endif @@ -1203,6 +1201,15 @@ : "r" ((unsigned long)(v)) \ : "memory") +static inline unsigned long mfvtb (void) +{ +#ifdef CONFIG_PPC_BOOK3S_64 + if (cpu_has_feature(CPU_FTR_ARCH_207S)) + return mfspr(SPRN_VTB); +#endif + return 0; +} + #ifdef __powerpc64__ #if defined(CONFIG_PPC_CELL) || defined(CONFIG_PPC_FSL_BOOK3E) #define mftb() ({unsigned long rval; \ diff --git a/arch/powerpc/include/asm/reg_booke.h b/arch/powerpc/include/asm/reg_booke.h index 464f1089b53..16547efa2d5 100644 --- a/arch/powerpc/include/asm/reg_booke.h +++ b/arch/powerpc/include/asm/reg_booke.h @@ -15,16 +15,28 @@ #ifndef __ASM_POWERPC_REG_BOOKE_H__ #define __ASM_POWERPC_REG_BOOKE_H__ +#include <asm/ppc-opcode.h> + /* Machine State Register (MSR) Fields */ -#define MSR_GS (1<<28) /* Guest state */ -#define MSR_UCLE (1<<26) /* User-mode cache lock enable */ -#define MSR_SPE (1<<25) /* Enable SPE */ -#define MSR_DWE (1<<10) /* Debug Wait Enable */ -#define MSR_UBLE (1<<10) /* BTB lock enable (e500) */ -#define MSR_IS MSR_IR /* Instruction Space */ -#define MSR_DS MSR_DR /* Data Space */ -#define MSR_PMM (1<<2) /* Performance monitor mark bit */ -#define MSR_CM (1<<31) /* Computation Mode (0=32-bit, 1=64-bit) */ +#define MSR_GS_LG 28 /* Guest state */ +#define MSR_UCLE_LG 26 /* User-mode cache lock enable */ +#define MSR_SPE_LG 25 /* Enable SPE */ +#define MSR_DWE_LG 10 /* Debug Wait Enable */ +#define MSR_UBLE_LG 10 /* BTB lock enable (e500) */ +#define MSR_IS_LG MSR_IR_LG /* Instruction Space */ +#define MSR_DS_LG MSR_DR_LG /* Data Space */ +#define MSR_PMM_LG 2 /* Performance monitor mark bit */ +#define MSR_CM_LG 31 /* Computation Mode (0=32-bit, 1=64-bit) */ + +#define MSR_GS __MASK(MSR_GS_LG) +#define MSR_UCLE __MASK(MSR_UCLE_LG) +#define MSR_SPE __MASK(MSR_SPE_LG) +#define MSR_DWE __MASK(MSR_DWE_LG) +#define MSR_UBLE __MASK(MSR_UBLE_LG) +#define MSR_IS __MASK(MSR_IS_LG) +#define MSR_DS __MASK(MSR_DS_LG) +#define MSR_PMM __MASK(MSR_PMM_LG) +#define MSR_CM __MASK(MSR_CM_LG) #if defined(CONFIG_PPC_BOOK3E_64) #define MSR_64BIT MSR_CM @@ -260,7 +272,7 @@ /* e500mc */ #define MCSR_DCPERR_MC 0x20000000UL /* D-Cache Parity Error */ -#define MCSR_L2MMU_MHIT 0x04000000UL /* Hit on multiple TLB entries */ +#define MCSR_L2MMU_MHIT 0x08000000UL /* Hit on multiple TLB entries */ #define MCSR_NMI 0x00100000UL /* Non-Maskable Interrupt */ #define MCSR_MAV 0x00080000UL /* MCAR address valid */ #define MCSR_MEA 0x00040000UL /* MCAR is effective address */ @@ -307,6 +319,8 @@ * DBSR bits which have conflicting definitions on true Book E versus IBM 40x. */ #ifdef CONFIG_BOOKE +#define DBSR_IDE 0x80000000 /* Imprecise Debug Event */ +#define DBSR_MRR 0x30000000 /* Most Recent Reset */ #define DBSR_IC 0x08000000 /* Instruction Completion */ #define DBSR_BT 0x04000000 /* Branch Taken */ #define DBSR_IRPT 0x02000000 /* Exception Debug Event */ @@ -598,6 +612,13 @@ /* Bit definitions for L1CSR2. */ #define L1CSR2_DCWS 0x40000000 /* Data Cache write shadow */ +/* Bit definitions for BUCSR. */ +#define BUCSR_STAC_EN 0x01000000 /* Segment Target Address Cache */ +#define BUCSR_LS_EN 0x00400000 /* Link Stack */ +#define BUCSR_BBFI 0x00000200 /* Branch Buffer flash invalidate */ +#define BUCSR_BPEN 0x00000001 /* Branch prediction enable */ +#define BUCSR_INIT (BUCSR_STAC_EN | BUCSR_LS_EN | BUCSR_BBFI | BUCSR_BPEN) + /* Bit definitions for L2CSR0. */ #define L2CSR0_L2E 0x80000000 /* L2 Cache Enable */ #define L2CSR0_L2PE 0x40000000 /* L2 Cache Parity/ECC Enable */ @@ -721,5 +742,23 @@ #define MMUBE1_VBE4 0x00000002 #define MMUBE1_VBE5 0x00000001 +#define TMRN_IMSR0 0x120 /* Initial MSR Register 0 (e6500) */ +#define TMRN_IMSR1 0x121 /* Initial MSR Register 1 (e6500) */ +#define TMRN_INIA0 0x140 /* Next Instruction Address Register 0 */ +#define TMRN_INIA1 0x141 /* Next Instruction Address Register 1 */ +#define SPRN_TENSR 0x1b5 /* Thread Enable Status Register */ +#define SPRN_TENS 0x1b6 /* Thread Enable Set Register */ +#define SPRN_TENC 0x1b7 /* Thread Enable Clear Register */ + +#define TEN_THREAD(x) (1 << (x)) + +#ifndef __ASSEMBLY__ +#define mftmr(rn) ({unsigned long rval; \ + asm volatile(MFTMR(rn, %0) : "=r" (rval)); rval;}) +#define mttmr(rn, v) asm volatile(MTTMR(rn, %0) : \ + : "r" ((unsigned long)(v)) \ + : "memory") +#endif /* !__ASSEMBLY__ */ + #endif /* __ASM_POWERPC_REG_BOOKE_H__ */ #endif /* __KERNEL__ */ diff --git a/arch/powerpc/include/asm/rio.h b/arch/powerpc/include/asm/rio.h index b1d2deceeed..ec800f28fec 100644 --- a/arch/powerpc/include/asm/rio.h +++ b/arch/powerpc/include/asm/rio.h @@ -13,7 +13,6 @@ #ifndef ASM_PPC_RIO_H #define ASM_PPC_RIO_H -extern void platform_rio_init(void); #ifdef CONFIG_FSL_RIO extern int fsl_rio_mcheck_exception(struct pt_regs *); #else diff --git a/arch/powerpc/include/asm/scatterlist.h b/arch/powerpc/include/asm/scatterlist.h deleted file mode 100644 index de1f620bd5c..00000000000 --- a/arch/powerpc/include/asm/scatterlist.h +++ /dev/null @@ -1,17 +0,0 @@ -#ifndef _ASM_POWERPC_SCATTERLIST_H -#define _ASM_POWERPC_SCATTERLIST_H -/* - * Copyright (C) 2001 PPC64 Team, IBM Corp - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include <asm/dma.h> -#include <asm-generic/scatterlist.h> - -#define ARCH_HAS_SG_CHAIN - -#endif /* _ASM_POWERPC_SCATTERLIST_H */ diff --git a/arch/powerpc/include/asm/spinlock.h b/arch/powerpc/include/asm/spinlock.h index 35aa339410b..4dbe072eecb 100644 --- a/arch/powerpc/include/asm/spinlock.h +++ b/arch/powerpc/include/asm/spinlock.h @@ -61,6 +61,7 @@ static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock) static inline int arch_spin_is_locked(arch_spinlock_t *lock) { + smp_mb(); return !arch_spin_value_unlocked(*lock); } diff --git a/arch/powerpc/include/asm/spu.h b/arch/powerpc/include/asm/spu.h index 37b7ca39ec9..a6e6e2bf9d1 100644 --- a/arch/powerpc/include/asm/spu.h +++ b/arch/powerpc/include/asm/spu.h @@ -27,6 +27,8 @@ #include <linux/workqueue.h> #include <linux/device.h> #include <linux/mutex.h> +#include <asm/reg.h> +#include <asm/copro.h> #define LS_SIZE (256 * 1024) #define LS_ADDR_MASK (LS_SIZE - 1) @@ -277,9 +279,6 @@ void spu_remove_dev_attr(struct device_attribute *attr); int spu_add_dev_attr_group(struct attribute_group *attrs); void spu_remove_dev_attr_group(struct attribute_group *attrs); -int spu_handle_mm_fault(struct mm_struct *mm, unsigned long ea, - unsigned long dsisr, unsigned *flt); - /* * Notifier blocks: * diff --git a/arch/powerpc/include/asm/sstep.h b/arch/powerpc/include/asm/sstep.h index f593b0f9b62..d3a42cc45a8 100644 --- a/arch/powerpc/include/asm/sstep.h +++ b/arch/powerpc/include/asm/sstep.h @@ -25,3 +25,65 @@ struct pt_regs; /* Emulate instructions that cause a transfer of control. */ extern int emulate_step(struct pt_regs *regs, unsigned int instr); + +enum instruction_type { + COMPUTE, /* arith/logical/CR op, etc. */ + LOAD, + LOAD_MULTI, + LOAD_FP, + LOAD_VMX, + LOAD_VSX, + STORE, + STORE_MULTI, + STORE_FP, + STORE_VMX, + STORE_VSX, + LARX, + STCX, + BRANCH, + MFSPR, + MTSPR, + CACHEOP, + BARRIER, + SYSCALL, + MFMSR, + MTMSR, + RFI, + INTERRUPT, + UNKNOWN +}; + +#define INSTR_TYPE_MASK 0x1f + +/* Load/store flags, ORed in with type */ +#define SIGNEXT 0x20 +#define UPDATE 0x40 /* matches bit in opcode 31 instructions */ +#define BYTEREV 0x80 + +/* Cacheop values, ORed in with type */ +#define CACHEOP_MASK 0x700 +#define DCBST 0 +#define DCBF 0x100 +#define DCBTST 0x200 +#define DCBT 0x300 +#define ICBI 0x400 + +/* Size field in type word */ +#define SIZE(n) ((n) << 8) +#define GETSIZE(w) ((w) >> 8) + +#define MKOP(t, f, s) ((t) | (f) | SIZE(s)) + +struct instruction_op { + int type; + int reg; + unsigned long val; + /* For LOAD/STORE/LARX/STCX */ + unsigned long ea; + int update_reg; + /* For MFSPR */ + int spr; +}; + +extern int analyse_instr(struct instruction_op *op, struct pt_regs *regs, + unsigned int instr); diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h index babbeca6850..7d8a6006880 100644 --- a/arch/powerpc/include/asm/systbl.h +++ b/arch/powerpc/include/asm/systbl.h @@ -77,10 +77,10 @@ SYSCALL_SPU(setreuid) SYSCALL_SPU(setregid) #define compat_sys_sigsuspend sys_sigsuspend SYS32ONLY(sigsuspend) -COMPAT_SYS(sigpending) +SYSX(sys_ni_syscall,compat_sys_sigpending,sys_sigpending) SYSCALL_SPU(sethostname) COMPAT_SYS_SPU(setrlimit) -COMPAT_SYS(old_getrlimit) +SYSX(sys_ni_syscall,compat_sys_old_getrlimit,sys_old_getrlimit) COMPAT_SYS_SPU(getrusage) COMPAT_SYS_SPU(gettimeofday) COMPAT_SYS_SPU(settimeofday) @@ -362,3 +362,6 @@ SYSCALL(ni_syscall) /* sys_kcmp */ SYSCALL_SPU(sched_setattr) SYSCALL_SPU(sched_getattr) SYSCALL_SPU(renameat2) +SYSCALL_SPU(seccomp) +SYSCALL_SPU(getrandom) +SYSCALL_SPU(memfd_create) diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h index 1d428e6007c..03cbada59d3 100644 --- a/arch/powerpc/include/asm/time.h +++ b/arch/powerpc/include/asm/time.h @@ -102,6 +102,15 @@ static inline u64 get_rtc(void) return (u64)hi * 1000000000 + lo; } +static inline u64 get_vtb(void) +{ +#ifdef CONFIG_PPC_BOOK3S_64 + if (cpu_has_feature(CPU_FTR_ARCH_207S)) + return mfvtb(); +#endif + return 0; +} + #ifdef CONFIG_PPC64 static inline u64 get_tb(void) { diff --git a/arch/powerpc/include/asm/trace.h b/arch/powerpc/include/asm/trace.h index 5712f06905a..c15da6073cb 100644 --- a/arch/powerpc/include/asm/trace.h +++ b/arch/powerpc/include/asm/trace.h @@ -99,6 +99,51 @@ TRACE_EVENT_FN(hcall_exit, ); #endif +#ifdef CONFIG_PPC_POWERNV +extern void opal_tracepoint_regfunc(void); +extern void opal_tracepoint_unregfunc(void); + +TRACE_EVENT_FN(opal_entry, + + TP_PROTO(unsigned long opcode, unsigned long *args), + + TP_ARGS(opcode, args), + + TP_STRUCT__entry( + __field(unsigned long, opcode) + ), + + TP_fast_assign( + __entry->opcode = opcode; + ), + + TP_printk("opcode=%lu", __entry->opcode), + + opal_tracepoint_regfunc, opal_tracepoint_unregfunc +); + +TRACE_EVENT_FN(opal_exit, + + TP_PROTO(unsigned long opcode, unsigned long retval), + + TP_ARGS(opcode, retval), + + TP_STRUCT__entry( + __field(unsigned long, opcode) + __field(unsigned long, retval) + ), + + TP_fast_assign( + __entry->opcode = opcode; + __entry->retval = retval; + ), + + TP_printk("opcode=%lu retval=%lu", __entry->opcode, __entry->retval), + + opal_tracepoint_regfunc, opal_tracepoint_unregfunc +); +#endif + #endif /* _TRACE_POWERPC_H */ #undef TRACE_INCLUDE_PATH diff --git a/arch/powerpc/include/asm/tsi108.h b/arch/powerpc/include/asm/tsi108.h index f8b60793b7a..d531d9e173e 100644 --- a/arch/powerpc/include/asm/tsi108.h +++ b/arch/powerpc/include/asm/tsi108.h @@ -84,10 +84,6 @@ extern u32 tsi108_pci_cfg_base; /* Exported functions */ -extern int tsi108_bridge_init(struct pci_controller *hose, uint phys_csr_base); -extern unsigned long tsi108_get_mem_size(void); -extern unsigned long tsi108_get_cpu_clk(void); -extern unsigned long tsi108_get_sdc_clk(void); extern int tsi108_direct_write_config(struct pci_bus *bus, unsigned int devfn, int offset, int len, u32 val); extern int tsi108_direct_read_config(struct pci_bus *bus, unsigned int devfn, diff --git a/arch/powerpc/include/asm/udbg.h b/arch/powerpc/include/asm/udbg.h index b51fba10e73..78f2675f2aa 100644 --- a/arch/powerpc/include/asm/udbg.h +++ b/arch/powerpc/include/asm/udbg.h @@ -52,7 +52,6 @@ extern void __init udbg_init_44x_as1(void); extern void __init udbg_init_40x_realmode(void); extern void __init udbg_init_cpm(void); extern void __init udbg_init_usbgecko(void); -extern void __init udbg_init_wsp(void); extern void __init udbg_init_memcons(void); extern void __init udbg_init_ehv_bc(void); extern void __init udbg_init_ps3gelic(void); diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h index 5ce5552ab9f..4e9af3fd43e 100644 --- a/arch/powerpc/include/asm/unistd.h +++ b/arch/powerpc/include/asm/unistd.h @@ -12,7 +12,7 @@ #include <uapi/asm/unistd.h> -#define __NR_syscalls 358 +#define __NR_syscalls 361 #define __NR__exit __NR_exit #define NR_syscalls __NR_syscalls diff --git a/arch/powerpc/include/asm/word-at-a-time.h b/arch/powerpc/include/asm/word-at-a-time.h index 9a5c928bb3c..5b3a903adae 100644 --- a/arch/powerpc/include/asm/word-at-a-time.h +++ b/arch/powerpc/include/asm/word-at-a-time.h @@ -42,32 +42,65 @@ static inline bool has_zero(unsigned long val, unsigned long *data, const struct #else +#ifdef CONFIG_64BIT + +/* unused */ struct word_at_a_time { - const unsigned long one_bits, high_bits; }; -#define WORD_AT_A_TIME_CONSTANTS { REPEAT_BYTE(0x01), REPEAT_BYTE(0x80) } +#define WORD_AT_A_TIME_CONSTANTS { } -#ifdef CONFIG_64BIT +/* This will give us 0xff for a NULL char and 0x00 elsewhere */ +static inline unsigned long has_zero(unsigned long a, unsigned long *bits, const struct word_at_a_time *c) +{ + unsigned long ret; + unsigned long zero = 0; -/* Alan Modra's little-endian strlen tail for 64-bit */ -#define create_zero_mask(mask) (mask) + asm("cmpb %0,%1,%2" : "=r" (ret) : "r" (a), "r" (zero)); + *bits = ret; -static inline unsigned long find_zero(unsigned long mask) + return ret; +} + +static inline unsigned long prep_zero_mask(unsigned long a, unsigned long bits, const struct word_at_a_time *c) +{ + return bits; +} + +/* Alan Modra's little-endian strlen tail for 64-bit */ +static inline unsigned long create_zero_mask(unsigned long bits) { unsigned long leading_zero_bits; long trailing_zero_bit_mask; - asm ("addi %1,%2,-1\n\t" - "andc %1,%1,%2\n\t" - "popcntd %0,%1" - : "=r" (leading_zero_bits), "=&r" (trailing_zero_bit_mask) - : "r" (mask)); - return leading_zero_bits >> 3; + asm("addi %1,%2,-1\n\t" + "andc %1,%1,%2\n\t" + "popcntd %0,%1" + : "=r" (leading_zero_bits), "=&r" (trailing_zero_bit_mask) + : "r" (bits)); + + return leading_zero_bits; +} + +static inline unsigned long find_zero(unsigned long mask) +{ + return mask >> 3; +} + +/* This assumes that we never ask for an all 1s bitmask */ +static inline unsigned long zero_bytemask(unsigned long mask) +{ + return (1UL << mask) - 1; } #else /* 32-bit case */ +struct word_at_a_time { + const unsigned long one_bits, high_bits; +}; + +#define WORD_AT_A_TIME_CONSTANTS { REPEAT_BYTE(0x01), REPEAT_BYTE(0x80) } + /* * This is largely generic for little-endian machines, but the * optimal byte mask counting is probably going to be something @@ -96,8 +129,6 @@ static inline unsigned long find_zero(unsigned long mask) return count_masked_bytes(mask); } -#endif - /* Return nonzero if it has a zero */ static inline unsigned long has_zero(unsigned long a, unsigned long *bits, const struct word_at_a_time *c) { @@ -114,6 +145,59 @@ static inline unsigned long prep_zero_mask(unsigned long a, unsigned long bits, /* The mask we created is directly usable as a bytemask */ #define zero_bytemask(mask) (mask) +#endif /* CONFIG_64BIT */ + +#endif /* __BIG_ENDIAN__ */ + +/* + * We use load_unaligned_zero() in a selftest, which builds a userspace + * program. Some linker scripts seem to discard the .fixup section, so allow + * the test code to use a different section name. + */ +#ifndef FIXUP_SECTION +#define FIXUP_SECTION ".fixup" +#endif + +static inline unsigned long load_unaligned_zeropad(const void *addr) +{ + unsigned long ret, offset, tmp; + + asm( + "1: " PPC_LL "%[ret], 0(%[addr])\n" + "2:\n" + ".section " FIXUP_SECTION ",\"ax\"\n" + "3: " +#ifdef __powerpc64__ + "clrrdi %[tmp], %[addr], 3\n\t" + "clrlsldi %[offset], %[addr], 61, 3\n\t" + "ld %[ret], 0(%[tmp])\n\t" +#ifdef __BIG_ENDIAN__ + "sld %[ret], %[ret], %[offset]\n\t" +#else + "srd %[ret], %[ret], %[offset]\n\t" #endif +#else + "clrrwi %[tmp], %[addr], 2\n\t" + "clrlslwi %[offset], %[addr], 30, 3\n\t" + "lwz %[ret], 0(%[tmp])\n\t" +#ifdef __BIG_ENDIAN__ + "slw %[ret], %[ret], %[offset]\n\t" +#else + "srw %[ret], %[ret], %[offset]\n\t" +#endif +#endif + "b 2b\n" + ".previous\n" + ".section __ex_table,\"a\"\n\t" + PPC_LONG_ALIGN "\n\t" + PPC_LONG "1b,3b\n" + ".previous" + : [tmp] "=&b" (tmp), [offset] "=&r" (offset), [ret] "=&r" (ret) + : [addr] "b" (addr), "m" (*(unsigned long *)addr)); + + return ret; +} + +#undef FIXUP_SECTION #endif /* _ASM_WORD_AT_A_TIME_H */ diff --git a/arch/powerpc/include/asm/xics.h b/arch/powerpc/include/asm/xics.h index 282d43a0c85..0d050ea37a0 100644 --- a/arch/powerpc/include/asm/xics.h +++ b/arch/powerpc/include/asm/xics.h @@ -29,6 +29,7 @@ /* Native ICP */ #ifdef CONFIG_PPC_ICP_NATIVE extern int icp_native_init(void); +extern void icp_native_flush_interrupt(void); #else static inline int icp_native_init(void) { return -ENODEV; } #endif diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h index 2bc4a9409a9..ab4d4732c49 100644 --- a/arch/powerpc/include/uapi/asm/kvm.h +++ b/arch/powerpc/include/uapi/asm/kvm.h @@ -476,6 +476,11 @@ struct kvm_get_htab_header { /* FP and vector status/control registers */ #define KVM_REG_PPC_FPSCR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x80) +/* + * VSCR register is documented as a 32-bit register in the ISA, but it can + * only be accesses via a vector register. Expose VSCR as a 32-bit register + * even though the kernel represents it as a 128-bit vector. + */ #define KVM_REG_PPC_VSCR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x81) /* Virtual processor areas */ @@ -548,6 +553,7 @@ struct kvm_get_htab_header { #define KVM_REG_PPC_VRSAVE (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xb4) #define KVM_REG_PPC_LPCR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xb5) +#define KVM_REG_PPC_LPCR_64 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb5) #define KVM_REG_PPC_PPR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb6) /* Architecture compatibility level */ @@ -555,6 +561,8 @@ struct kvm_get_htab_header { #define KVM_REG_PPC_DABRX (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xb8) #define KVM_REG_PPC_WORT (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb9) +#define KVM_REG_PPC_SPRG9 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xba) +#define KVM_REG_PPC_DBSR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xbb) /* Transactional Memory checkpointed state: * This is all GPRs, all VSX regs and a subset of SPRs diff --git a/arch/powerpc/include/uapi/asm/unistd.h b/arch/powerpc/include/uapi/asm/unistd.h index 2d526f7b48d..0688fc06e18 100644 --- a/arch/powerpc/include/uapi/asm/unistd.h +++ b/arch/powerpc/include/uapi/asm/unistd.h @@ -380,5 +380,8 @@ #define __NR_sched_setattr 355 #define __NR_sched_getattr 356 #define __NR_renameat2 357 +#define __NR_seccomp 358 +#define __NR_getrandom 359 +#define __NR_memfd_create 360 #endif /* _UAPI_ASM_POWERPC_UNISTD_H_ */ diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 670c312d914..502cf69b6c8 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -93,6 +93,9 @@ obj-$(CONFIG_PPC32) += entry_32.o setup_32.o obj-$(CONFIG_PPC64) += dma-iommu.o iommu.o obj-$(CONFIG_KGDB) += kgdb.o obj-$(CONFIG_MODULES) += ppc_ksyms.o +ifeq ($(CONFIG_PPC32),y) +obj-$(CONFIG_MODULES) += ppc_ksyms_32.o +endif obj-$(CONFIG_BOOTX_TEXT) += btext.o obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_KPROBES) += kprobes.o diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index f5995a91221..9d7dede2847 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -216,8 +216,6 @@ int main(void) #endif /* CONFIG_PPC_BOOK3E */ #ifdef CONFIG_PPC_STD_MMU_64 - DEFINE(PACASTABREAL, offsetof(struct paca_struct, stab_real)); - DEFINE(PACASTABVIRT, offsetof(struct paca_struct, stab_addr)); DEFINE(PACASLBCACHE, offsetof(struct paca_struct, slb_cache)); DEFINE(PACASLBCACHEPTR, offsetof(struct paca_struct, slb_cache_ptr)); DEFINE(PACAVMALLOCSLLP, offsetof(struct paca_struct, vmalloc_sllp)); @@ -493,6 +491,7 @@ int main(void) DEFINE(KVM_HOST_SDR1, offsetof(struct kvm, arch.host_sdr1)); DEFINE(KVM_TLBIE_LOCK, offsetof(struct kvm, arch.tlbie_lock)); DEFINE(KVM_NEED_FLUSH, offsetof(struct kvm, arch.need_tlb_flush.bits)); + DEFINE(KVM_ENABLED_HCALLS, offsetof(struct kvm, arch.enabled_hcalls)); DEFINE(KVM_LPCR, offsetof(struct kvm, arch.lpcr)); DEFINE(KVM_RMOR, offsetof(struct kvm, arch.rmor)); DEFINE(KVM_VRMA_SLB_V, offsetof(struct kvm, arch.vrma_slb_v)); @@ -667,6 +666,7 @@ int main(void) DEFINE(VCPU_LR, offsetof(struct kvm_vcpu, arch.lr)); DEFINE(VCPU_CTR, offsetof(struct kvm_vcpu, arch.ctr)); DEFINE(VCPU_PC, offsetof(struct kvm_vcpu, arch.pc)); + DEFINE(VCPU_SPRG9, offsetof(struct kvm_vcpu, arch.sprg9)); DEFINE(VCPU_LAST_INST, offsetof(struct kvm_vcpu, arch.last_inst)); DEFINE(VCPU_FAULT_DEAR, offsetof(struct kvm_vcpu, arch.fault_dear)); DEFINE(VCPU_FAULT_ESR, offsetof(struct kvm_vcpu, arch.fault_esr)); diff --git a/arch/powerpc/kernel/cpu_setup_fsl_booke.S b/arch/powerpc/kernel/cpu_setup_fsl_booke.S index 4f1393d2007..dddba3e9426 100644 --- a/arch/powerpc/kernel/cpu_setup_fsl_booke.S +++ b/arch/powerpc/kernel/cpu_setup_fsl_booke.S @@ -91,6 +91,7 @@ _GLOBAL(setup_altivec_idle) blr +#ifdef CONFIG_PPC_E500MC _GLOBAL(__setup_cpu_e6500) mflr r6 #ifdef CONFIG_PPC64 @@ -107,14 +108,20 @@ _GLOBAL(__setup_cpu_e6500) bl __setup_cpu_e5500 mtlr r6 blr +#endif /* CONFIG_PPC_E500MC */ #ifdef CONFIG_PPC32 +#ifdef CONFIG_E200 _GLOBAL(__setup_cpu_e200) /* enable dedicated debug exception handling resources (Debug APU) */ mfspr r3,SPRN_HID0 ori r3,r3,HID0_DAPUEN@l mtspr SPRN_HID0,r3 b __setup_e200_ivors +#endif /* CONFIG_E200 */ + +#ifdef CONFIG_E500 +#ifndef CONFIG_PPC_E500MC _GLOBAL(__setup_cpu_e500v1) _GLOBAL(__setup_cpu_e500v2) mflr r4 @@ -129,6 +136,7 @@ _GLOBAL(__setup_cpu_e500v2) #endif mtlr r4 blr +#else /* CONFIG_PPC_E500MC */ _GLOBAL(__setup_cpu_e500mc) _GLOBAL(__setup_cpu_e5500) mflr r5 @@ -159,7 +167,9 @@ _GLOBAL(__setup_cpu_e5500) 2: mtlr r5 blr -#endif +#endif /* CONFIG_PPC_E500MC */ +#endif /* CONFIG_E500 */ +#endif /* CONFIG_PPC32 */ #ifdef CONFIG_PPC_BOOK3E_64 _GLOBAL(__restore_cpu_e6500) diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index 0c157642c2a..80840590633 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -123,96 +123,6 @@ extern void __restore_cpu_e6500(void); static struct cpu_spec __initdata cpu_specs[] = { #ifdef CONFIG_PPC_BOOK3S_64 - { /* Power3 */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x00400000, - .cpu_name = "POWER3 (630)", - .cpu_features = CPU_FTRS_POWER3, - .cpu_user_features = COMMON_USER_PPC64|PPC_FEATURE_PPC_LE, - .mmu_features = MMU_FTR_HPTE_TABLE, - .icache_bsize = 128, - .dcache_bsize = 128, - .num_pmcs = 8, - .pmc_type = PPC_PMC_IBM, - .oprofile_cpu_type = "ppc64/power3", - .oprofile_type = PPC_OPROFILE_RS64, - .platform = "power3", - }, - { /* Power3+ */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x00410000, - .cpu_name = "POWER3 (630+)", - .cpu_features = CPU_FTRS_POWER3, - .cpu_user_features = COMMON_USER_PPC64|PPC_FEATURE_PPC_LE, - .mmu_features = MMU_FTR_HPTE_TABLE, - .icache_bsize = 128, - .dcache_bsize = 128, - .num_pmcs = 8, - .pmc_type = PPC_PMC_IBM, - .oprofile_cpu_type = "ppc64/power3", - .oprofile_type = PPC_OPROFILE_RS64, - .platform = "power3", - }, - { /* Northstar */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x00330000, - .cpu_name = "RS64-II (northstar)", - .cpu_features = CPU_FTRS_RS64, - .cpu_user_features = COMMON_USER_PPC64, - .mmu_features = MMU_FTR_HPTE_TABLE, - .icache_bsize = 128, - .dcache_bsize = 128, - .num_pmcs = 8, - .pmc_type = PPC_PMC_IBM, - .oprofile_cpu_type = "ppc64/rs64", - .oprofile_type = PPC_OPROFILE_RS64, - .platform = "rs64", - }, - { /* Pulsar */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x00340000, - .cpu_name = "RS64-III (pulsar)", - .cpu_features = CPU_FTRS_RS64, - .cpu_user_features = COMMON_USER_PPC64, - .mmu_features = MMU_FTR_HPTE_TABLE, - .icache_bsize = 128, - .dcache_bsize = 128, - .num_pmcs = 8, - .pmc_type = PPC_PMC_IBM, - .oprofile_cpu_type = "ppc64/rs64", - .oprofile_type = PPC_OPROFILE_RS64, - .platform = "rs64", - }, - { /* I-star */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x00360000, - .cpu_name = "RS64-III (icestar)", - .cpu_features = CPU_FTRS_RS64, - .cpu_user_features = COMMON_USER_PPC64, - .mmu_features = MMU_FTR_HPTE_TABLE, - .icache_bsize = 128, - .dcache_bsize = 128, - .num_pmcs = 8, - .pmc_type = PPC_PMC_IBM, - .oprofile_cpu_type = "ppc64/rs64", - .oprofile_type = PPC_OPROFILE_RS64, - .platform = "rs64", - }, - { /* S-star */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x00370000, - .cpu_name = "RS64-IV (sstar)", - .cpu_features = CPU_FTRS_RS64, - .cpu_user_features = COMMON_USER_PPC64, - .mmu_features = MMU_FTR_HPTE_TABLE, - .icache_bsize = 128, - .dcache_bsize = 128, - .num_pmcs = 8, - .pmc_type = PPC_PMC_IBM, - .oprofile_cpu_type = "ppc64/rs64", - .oprofile_type = PPC_OPROFILE_RS64, - .platform = "rs64", - }, { /* Power4 */ .pvr_mask = 0xffff0000, .pvr_value = 0x00350000, @@ -617,7 +527,7 @@ static struct cpu_spec __initdata cpu_specs[] = { #endif /* CONFIG_PPC_BOOK3S_64 */ #ifdef CONFIG_PPC32 -#if CLASSIC_PPC +#ifdef CONFIG_PPC_BOOK3S_32 { /* 601 */ .pvr_mask = 0xffff0000, .pvr_value = 0x00010000, @@ -1257,7 +1167,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check = machine_check_generic, .platform = "ppc603", }, -#endif /* CLASSIC_PPC */ +#endif /* CONFIG_PPC_BOOK3S_32 */ #ifdef CONFIG_8xx { /* 8xx */ .pvr_mask = 0xffff0000, @@ -2051,6 +1961,7 @@ static struct cpu_spec __initdata cpu_specs[] = { #endif /* CONFIG_PPC32 */ #ifdef CONFIG_E500 #ifdef CONFIG_PPC32 +#ifndef CONFIG_PPC_E500MC { /* e500 */ .pvr_mask = 0xffff0000, .pvr_value = 0x80200000, @@ -2090,6 +2001,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check = machine_check_e500, .platform = "ppc8548", }, +#else { /* e500mc */ .pvr_mask = 0xffff0000, .pvr_value = 0x80230000, @@ -2108,7 +2020,9 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check = machine_check_e500mc, .platform = "ppce500mc", }, +#endif /* CONFIG_PPC_E500MC */ #endif /* CONFIG_PPC32 */ +#ifdef CONFIG_PPC_E500MC { /* e5500 */ .pvr_mask = 0xffff0000, .pvr_value = 0x80240000, @@ -2152,6 +2066,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check = machine_check_e500mc, .platform = "ppce6500", }, +#endif /* CONFIG_PPC_E500MC */ #ifdef CONFIG_PPC32 { /* default match */ .pvr_mask = 0x00000000, diff --git a/arch/powerpc/kernel/crash_dump.c b/arch/powerpc/kernel/crash_dump.c index 7a13f378ca2..c78e6dac4d7 100644 --- a/arch/powerpc/kernel/crash_dump.c +++ b/arch/powerpc/kernel/crash_dump.c @@ -13,6 +13,7 @@ #include <linux/crash_dump.h> #include <linux/bootmem.h> +#include <linux/io.h> #include <linux/memblock.h> #include <asm/code-patching.h> #include <asm/kdump.h> diff --git a/arch/powerpc/kernel/dma-swiotlb.c b/arch/powerpc/kernel/dma-swiotlb.c index bd1a2aba599..735979764cd 100644 --- a/arch/powerpc/kernel/dma-swiotlb.c +++ b/arch/powerpc/kernel/dma-swiotlb.c @@ -106,10 +106,14 @@ int __init swiotlb_setup_bus_notifier(void) return 0; } -void swiotlb_detect_4g(void) +void __init swiotlb_detect_4g(void) { - if ((memblock_end_of_DRAM() - 1) > 0xffffffff) + if ((memblock_end_of_DRAM() - 1) > 0xffffffff) { ppc_swiotlb_enable = 1; +#ifdef CONFIG_ZONE_DMA32 + limit_zone_pfn(ZONE_DMA32, (1ULL << 32) >> PAGE_SHIFT); +#endif + } } static int __init swiotlb_late_init(void) diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c index ee78f6e49d6..adac9dc54ae 100644 --- a/arch/powerpc/kernel/dma.c +++ b/arch/powerpc/kernel/dma.c @@ -15,6 +15,7 @@ #include <asm/vio.h> #include <asm/bug.h> #include <asm/machdep.h> +#include <asm/swiotlb.h> /* * Generic direct DMA implementation @@ -25,6 +26,18 @@ * default the offset is PCI_DRAM_OFFSET. */ +static u64 __maybe_unused get_pfn_limit(struct device *dev) +{ + u64 pfn = (dev->coherent_dma_mask >> PAGE_SHIFT) + 1; + struct dev_archdata __maybe_unused *sd = &dev->archdata; + +#ifdef CONFIG_SWIOTLB + if (sd->max_direct_dma_addr && sd->dma_ops == &swiotlb_dma_ops) + pfn = min_t(u64, pfn, sd->max_direct_dma_addr >> PAGE_SHIFT); +#endif + + return pfn; +} void *dma_direct_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flag, @@ -40,6 +53,26 @@ void *dma_direct_alloc_coherent(struct device *dev, size_t size, #else struct page *page; int node = dev_to_node(dev); + u64 pfn = get_pfn_limit(dev); + int zone; + + zone = dma_pfn_limit_to_zone(pfn); + if (zone < 0) { + dev_err(dev, "%s: No suitable zone for pfn %#llx\n", + __func__, pfn); + return NULL; + } + + switch (zone) { + case ZONE_DMA: + flag |= GFP_DMA; + break; +#ifdef CONFIG_ZONE_DMA32 + case ZONE_DMA32: + flag |= GFP_DMA32; + break; +#endif + }; /* ignore region specifiers */ flag &= ~(__GFP_HIGHMEM); @@ -202,6 +235,7 @@ int __dma_set_mask(struct device *dev, u64 dma_mask) *dev->dma_mask = dma_mask; return 0; } + int dma_set_mask(struct device *dev, u64 dma_mask) { if (ppc_md.dma_set_mask) @@ -210,13 +244,10 @@ int dma_set_mask(struct device *dev, u64 dma_mask) } EXPORT_SYMBOL(dma_set_mask); -u64 dma_get_required_mask(struct device *dev) +u64 __dma_get_required_mask(struct device *dev) { struct dma_map_ops *dma_ops = get_dma_ops(dev); - if (ppc_md.dma_get_required_mask) - return ppc_md.dma_get_required_mask(dev); - if (unlikely(dma_ops == NULL)) return 0; @@ -225,6 +256,14 @@ u64 dma_get_required_mask(struct device *dev) return DMA_BIT_MASK(8 * sizeof(dma_addr_t)); } + +u64 dma_get_required_mask(struct device *dev) +{ + if (ppc_md.dma_get_required_mask) + return ppc_md.dma_get_required_mask(dev); + + return __dma_get_required_mask(dev); +} EXPORT_SYMBOL_GPL(dma_get_required_mask); static int __init dma_init(void) diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 86e25702aac..d543e4179c1 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -27,6 +27,7 @@ #include <linux/init.h> #include <linux/list.h> #include <linux/pci.h> +#include <linux/iommu.h> #include <linux/proc_fs.h> #include <linux/rbtree.h> #include <linux/reboot.h> @@ -40,6 +41,7 @@ #include <asm/eeh.h> #include <asm/eeh_event.h> #include <asm/io.h> +#include <asm/iommu.h> #include <asm/machdep.h> #include <asm/ppc-pci.h> #include <asm/rtas.h> @@ -108,11 +110,14 @@ struct eeh_ops *eeh_ops = NULL; /* Lock to avoid races due to multiple reports of an error */ DEFINE_RAW_SPINLOCK(confirm_error_lock); +/* Lock to protect passed flags */ +static DEFINE_MUTEX(eeh_dev_mutex); + /* Buffer for reporting pci register dumps. Its here in BSS, and * not dynamically alloced, so that it ends up in RMO where RTAS * can access it. */ -#define EEH_PCI_REGS_LOG_LEN 4096 +#define EEH_PCI_REGS_LOG_LEN 8192 static unsigned char pci_regs_buf[EEH_PCI_REGS_LOG_LEN]; /* @@ -137,27 +142,24 @@ static struct eeh_stats eeh_stats; static int __init eeh_setup(char *str) { if (!strcmp(str, "off")) - eeh_subsystem_flags |= EEH_FORCE_DISABLED; + eeh_add_flag(EEH_FORCE_DISABLED); return 1; } __setup("eeh=", eeh_setup); -/** - * eeh_gather_pci_data - Copy assorted PCI config space registers to buff - * @edev: device to report data for - * @buf: point to buffer in which to log - * @len: amount of room in buffer - * - * This routine captures assorted PCI configuration space data, - * and puts them into a buffer for RTAS error logging. +/* + * This routine captures assorted PCI configuration space data + * for the indicated PCI device, and puts them into a buffer + * for RTAS error logging. */ -static size_t eeh_gather_pci_data(struct eeh_dev *edev, char * buf, size_t len) +static size_t eeh_dump_dev_log(struct eeh_dev *edev, char *buf, size_t len) { struct device_node *dn = eeh_dev_to_of_node(edev); u32 cfg; int cap, i; - int n = 0; + int n = 0, l = 0; + char buffer[128]; n += scnprintf(buf+n, len-n, "%s\n", dn->full_name); pr_warn("EEH: of node=%s\n", dn->full_name); @@ -202,8 +204,22 @@ static size_t eeh_gather_pci_data(struct eeh_dev *edev, char * buf, size_t len) for (i=0; i<=8; i++) { eeh_ops->read_config(dn, cap+4*i, 4, &cfg); n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg); - pr_warn("EEH: PCI-E %02x: %08x\n", i, cfg); + + if ((i % 4) == 0) { + if (i != 0) + pr_warn("%s\n", buffer); + + l = scnprintf(buffer, sizeof(buffer), + "EEH: PCI-E %02x: %08x ", + 4*i, cfg); + } else { + l += scnprintf(buffer+l, sizeof(buffer)-l, + "%08x ", cfg); + } + } + + pr_warn("%s\n", buffer); } /* If AER capable, dump it */ @@ -212,16 +228,42 @@ static size_t eeh_gather_pci_data(struct eeh_dev *edev, char * buf, size_t len) n += scnprintf(buf+n, len-n, "pci-e AER:\n"); pr_warn("EEH: PCI-E AER capability register set follows:\n"); - for (i=0; i<14; i++) { + for (i=0; i<=13; i++) { eeh_ops->read_config(dn, cap+4*i, 4, &cfg); n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg); - pr_warn("EEH: PCI-E AER %02x: %08x\n", i, cfg); + + if ((i % 4) == 0) { + if (i != 0) + pr_warn("%s\n", buffer); + + l = scnprintf(buffer, sizeof(buffer), + "EEH: PCI-E AER %02x: %08x ", + 4*i, cfg); + } else { + l += scnprintf(buffer+l, sizeof(buffer)-l, + "%08x ", cfg); + } } + + pr_warn("%s\n", buffer); } return n; } +static void *eeh_dump_pe_log(void *data, void *flag) +{ + struct eeh_pe *pe = data; + struct eeh_dev *edev, *tmp; + size_t *plen = flag; + + eeh_pe_for_each_dev(pe, edev, tmp) + *plen += eeh_dump_dev_log(edev, pci_regs_buf + *plen, + EEH_PCI_REGS_LOG_LEN - *plen); + + return NULL; +} + /** * eeh_slot_error_detail - Generate combined log including driver log and error log * @pe: EEH PE @@ -235,7 +277,6 @@ static size_t eeh_gather_pci_data(struct eeh_dev *edev, char * buf, size_t len) void eeh_slot_error_detail(struct eeh_pe *pe, int severity) { size_t loglen = 0; - struct eeh_dev *edev, *tmp; /* * When the PHB is fenced or dead, it's pointless to collect @@ -247,16 +288,13 @@ void eeh_slot_error_detail(struct eeh_pe *pe, int severity) * 0xFF's is always returned from PCI config space. */ if (!(pe->type & EEH_PE_PHB)) { - if (eeh_probe_mode_devtree()) + if (eeh_has_flag(EEH_ENABLE_IO_FOR_LOG)) eeh_pci_enable(pe, EEH_OPT_THAW_MMIO); eeh_ops->configure_bridge(pe); eeh_pe_restore_bars(pe); pci_regs_buf[0] = 0; - eeh_pe_for_each_dev(pe, edev, tmp) { - loglen += eeh_gather_pci_data(edev, pci_regs_buf + loglen, - EEH_PCI_REGS_LOG_LEN - loglen); - } + eeh_pe_traverse(pe, eeh_dump_pe_log, &loglen); } eeh_ops->get_log(pe, severity, pci_regs_buf, loglen); @@ -298,14 +336,14 @@ static int eeh_phb_check_failure(struct eeh_pe *pe) unsigned long flags; int ret; - if (!eeh_probe_mode_dev()) + if (!eeh_has_flag(EEH_PROBE_MODE_DEV)) return -EPERM; /* Find the PHB PE */ phb_pe = eeh_phb_pe_get(pe->phb); if (!phb_pe) { - pr_warning("%s Can't find PE for PHB#%d\n", - __func__, pe->phb->global_number); + pr_warn("%s Can't find PE for PHB#%d\n", + __func__, pe->phb->global_number); return -EEXIST; } @@ -377,7 +415,7 @@ int eeh_dev_check_failure(struct eeh_dev *edev) } dn = eeh_dev_to_of_node(edev); dev = eeh_dev_to_pci_dev(edev); - pe = edev->pe; + pe = eeh_dev_to_pe(edev); /* Access to IO BARs might get this far and still not want checking. */ if (!pe) { @@ -400,6 +438,14 @@ int eeh_dev_check_failure(struct eeh_dev *edev) if (ret > 0) return ret; + /* + * If the PE isn't owned by us, we shouldn't check the + * state. Instead, let the owner handle it if the PE has + * been frozen. + */ + if (eeh_pe_passed(pe)) + return 0; + /* If we already have a pending isolation event for this * slot, we know it's bad already, we don't need to check. * Do this checking under a lock; as multiple PCI devices @@ -501,17 +547,16 @@ EXPORT_SYMBOL_GPL(eeh_dev_check_failure); /** * eeh_check_failure - Check if all 1's data is due to EEH slot freeze - * @token: I/O token, should be address in the form 0xA.... - * @val: value, should be all 1's (XXX why do we need this arg??) + * @token: I/O address * - * Check for an EEH failure at the given token address. Call this + * Check for an EEH failure at the given I/O address. Call this * routine if the result of a read was all 0xff's and you want to - * find out if this is due to an EEH slot freeze event. This routine + * find out if this is due to an EEH slot freeze event. This routine * will query firmware for the EEH status. * * Note this routine is safe to call in an interrupt context. */ -unsigned long eeh_check_failure(const volatile void __iomem *token, unsigned long val) +int eeh_check_failure(const volatile void __iomem *token) { unsigned long addr; struct eeh_dev *edev; @@ -521,13 +566,11 @@ unsigned long eeh_check_failure(const volatile void __iomem *token, unsigned lon edev = eeh_addr_cache_get_dev(addr); if (!edev) { eeh_stats.no_device++; - return val; + return 0; } - eeh_dev_check_failure(edev); - return val; + return eeh_dev_check_failure(edev); } - EXPORT_SYMBOL(eeh_check_failure); @@ -541,25 +584,51 @@ EXPORT_SYMBOL(eeh_check_failure); */ int eeh_pci_enable(struct eeh_pe *pe, int function) { - int rc, flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE); + int active_flag, rc; /* * pHyp doesn't allow to enable IO or DMA on unfrozen PE. * Also, it's pointless to enable them on unfrozen PE. So - * we have the check here. + * we have to check before enabling IO or DMA. + */ + switch (function) { + case EEH_OPT_THAW_MMIO: + active_flag = EEH_STATE_MMIO_ACTIVE; + break; + case EEH_OPT_THAW_DMA: + active_flag = EEH_STATE_DMA_ACTIVE; + break; + case EEH_OPT_DISABLE: + case EEH_OPT_ENABLE: + case EEH_OPT_FREEZE_PE: + active_flag = 0; + break; + default: + pr_warn("%s: Invalid function %d\n", + __func__, function); + return -EINVAL; + } + + /* + * Check if IO or DMA has been enabled before + * enabling them. */ - if (function == EEH_OPT_THAW_MMIO || - function == EEH_OPT_THAW_DMA) { + if (active_flag) { rc = eeh_ops->get_state(pe, NULL); if (rc < 0) return rc; - /* Needn't to enable or already enabled */ - if ((rc == EEH_STATE_NOT_SUPPORT) || - ((rc & flags) == flags)) + /* Needn't enable it at all */ + if (rc == EEH_STATE_NOT_SUPPORT) + return 0; + + /* It's already enabled */ + if (rc & active_flag) return 0; } + + /* Issue the request */ rc = eeh_ops->set_option(pe, function); if (rc) pr_warn("%s: Unexpected state change %d on " @@ -567,17 +636,17 @@ int eeh_pci_enable(struct eeh_pe *pe, int function) __func__, function, pe->phb->global_number, pe->addr, rc); - rc = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC); - if (rc <= 0) - return rc; + /* Check if the request is finished successfully */ + if (active_flag) { + rc = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC); + if (rc <= 0) + return rc; - if ((function == EEH_OPT_THAW_MMIO) && - (rc & EEH_STATE_MMIO_ENABLED)) - return 0; + if (rc & active_flag) + return 0; - if ((function == EEH_OPT_THAW_DMA) && - (rc & EEH_STATE_DMA_ENABLED)) - return 0; + return -EIO; + } return rc; } @@ -593,7 +662,7 @@ int eeh_pci_enable(struct eeh_pe *pe, int function) int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state state) { struct eeh_dev *edev = pci_dev_to_eeh_dev(dev); - struct eeh_pe *pe = edev->pe; + struct eeh_pe *pe = eeh_dev_to_pe(edev); if (!pe) { pr_err("%s: No PE found on PCI device %s\n", @@ -604,14 +673,18 @@ int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state stat switch (state) { case pcie_deassert_reset: eeh_ops->reset(pe, EEH_RESET_DEACTIVATE); + eeh_pe_state_clear(pe, EEH_PE_RESET); break; case pcie_hot_reset: + eeh_pe_state_mark(pe, EEH_PE_RESET); eeh_ops->reset(pe, EEH_RESET_HOT); break; case pcie_warm_reset: + eeh_pe_state_mark(pe, EEH_PE_RESET); eeh_ops->reset(pe, EEH_RESET_FUNDAMENTAL); break; default: + eeh_pe_state_clear(pe, EEH_PE_RESET); return -EINVAL; }; @@ -746,13 +819,13 @@ void eeh_save_bars(struct eeh_dev *edev) int __init eeh_ops_register(struct eeh_ops *ops) { if (!ops->name) { - pr_warning("%s: Invalid EEH ops name for %p\n", + pr_warn("%s: Invalid EEH ops name for %p\n", __func__, ops); return -EINVAL; } if (eeh_ops && eeh_ops != ops) { - pr_warning("%s: EEH ops of platform %s already existing (%s)\n", + pr_warn("%s: EEH ops of platform %s already existing (%s)\n", __func__, eeh_ops->name, ops->name); return -EEXIST; } @@ -772,7 +845,7 @@ int __init eeh_ops_register(struct eeh_ops *ops) int __exit eeh_ops_unregister(const char *name) { if (!name || !strlen(name)) { - pr_warning("%s: Invalid EEH ops name\n", + pr_warn("%s: Invalid EEH ops name\n", __func__); return -EINVAL; } @@ -788,7 +861,7 @@ int __exit eeh_ops_unregister(const char *name) static int eeh_reboot_notifier(struct notifier_block *nb, unsigned long action, void *unused) { - eeh_set_enable(false); + eeh_clear_flag(EEH_ENABLED); return NOTIFY_DONE; } @@ -837,11 +910,11 @@ int eeh_init(void) /* call platform initialization function */ if (!eeh_ops) { - pr_warning("%s: Platform EEH operation not found\n", + pr_warn("%s: Platform EEH operation not found\n", __func__); return -EEXIST; } else if ((ret = eeh_ops->init())) { - pr_warning("%s: Failed to call platform init function (%d)\n", + pr_warn("%s: Failed to call platform init function (%d)\n", __func__, ret); return ret; } @@ -852,13 +925,13 @@ int eeh_init(void) return ret; /* Enable EEH for all adapters */ - if (eeh_probe_mode_devtree()) { + if (eeh_has_flag(EEH_PROBE_MODE_DEVTREE)) { list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { phb = hose->dn; traverse_pci_devices(phb, eeh_ops->of_probe, NULL); } - } else if (eeh_probe_mode_dev()) { + } else if (eeh_has_flag(EEH_PROBE_MODE_DEV)) { list_for_each_entry_safe(hose, tmp, &hose_list, list_node) pci_walk_bus(hose->bus, eeh_ops->dev_probe, NULL); @@ -882,7 +955,7 @@ int eeh_init(void) if (eeh_enabled()) pr_info("EEH: PCI Enhanced I/O Error Handling Enabled\n"); else - pr_warning("EEH: No capable adapters found\n"); + pr_warn("EEH: No capable adapters found\n"); return ret; } @@ -910,7 +983,7 @@ void eeh_add_device_early(struct device_node *dn) * would delay the probe until late stage because * the PCI device isn't available this moment. */ - if (!eeh_probe_mode_devtree()) + if (!eeh_has_flag(EEH_PROBE_MODE_DEVTREE)) return; if (!of_node_to_eeh_dev(dn)) @@ -996,7 +1069,7 @@ void eeh_add_device_late(struct pci_dev *dev) * We have to do the EEH probe here because the PCI device * hasn't been created yet in the early stage. */ - if (eeh_probe_mode_dev()) + if (eeh_has_flag(EEH_PROBE_MODE_DEV)) eeh_ops->dev_probe(dev, NULL); eeh_addr_cache_insert_dev(dev); @@ -1100,6 +1173,404 @@ void eeh_remove_device(struct pci_dev *dev) edev->mode &= ~EEH_DEV_SYSFS; } +int eeh_unfreeze_pe(struct eeh_pe *pe, bool sw_state) +{ + int ret; + + ret = eeh_pci_enable(pe, EEH_OPT_THAW_MMIO); + if (ret) { + pr_warn("%s: Failure %d enabling IO on PHB#%x-PE#%x\n", + __func__, ret, pe->phb->global_number, pe->addr); + return ret; + } + + ret = eeh_pci_enable(pe, EEH_OPT_THAW_DMA); + if (ret) { + pr_warn("%s: Failure %d enabling DMA on PHB#%x-PE#%x\n", + __func__, ret, pe->phb->global_number, pe->addr); + return ret; + } + + /* Clear software isolated state */ + if (sw_state && (pe->state & EEH_PE_ISOLATED)) + eeh_pe_state_clear(pe, EEH_PE_ISOLATED); + + return ret; +} + + +static struct pci_device_id eeh_reset_ids[] = { + { PCI_DEVICE(0x19a2, 0x0710) }, /* Emulex, BE */ + { PCI_DEVICE(0x10df, 0xe220) }, /* Emulex, Lancer */ + { 0 } +}; + +static int eeh_pe_change_owner(struct eeh_pe *pe) +{ + struct eeh_dev *edev, *tmp; + struct pci_dev *pdev; + struct pci_device_id *id; + int flags, ret; + + /* Check PE state */ + flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE); + ret = eeh_ops->get_state(pe, NULL); + if (ret < 0 || ret == EEH_STATE_NOT_SUPPORT) + return 0; + + /* Unfrozen PE, nothing to do */ + if ((ret & flags) == flags) + return 0; + + /* Frozen PE, check if it needs PE level reset */ + eeh_pe_for_each_dev(pe, edev, tmp) { + pdev = eeh_dev_to_pci_dev(edev); + if (!pdev) + continue; + + for (id = &eeh_reset_ids[0]; id->vendor != 0; id++) { + if (id->vendor != PCI_ANY_ID && + id->vendor != pdev->vendor) + continue; + if (id->device != PCI_ANY_ID && + id->device != pdev->device) + continue; + if (id->subvendor != PCI_ANY_ID && + id->subvendor != pdev->subsystem_vendor) + continue; + if (id->subdevice != PCI_ANY_ID && + id->subdevice != pdev->subsystem_device) + continue; + + goto reset; + } + } + + return eeh_unfreeze_pe(pe, true); + +reset: + return eeh_pe_reset_and_recover(pe); +} + +/** + * eeh_dev_open - Increase count of pass through devices for PE + * @pdev: PCI device + * + * Increase count of passed through devices for the indicated + * PE. In the result, the EEH errors detected on the PE won't be + * reported. The PE owner will be responsible for detection + * and recovery. + */ +int eeh_dev_open(struct pci_dev *pdev) +{ + struct eeh_dev *edev; + int ret = -ENODEV; + + mutex_lock(&eeh_dev_mutex); + + /* No PCI device ? */ + if (!pdev) + goto out; + + /* No EEH device or PE ? */ + edev = pci_dev_to_eeh_dev(pdev); + if (!edev || !edev->pe) + goto out; + + /* + * The PE might have been put into frozen state, but we + * didn't detect that yet. The passed through PCI devices + * in frozen PE won't work properly. Clear the frozen state + * in advance. + */ + ret = eeh_pe_change_owner(edev->pe); + if (ret) + goto out; + + /* Increase PE's pass through count */ + atomic_inc(&edev->pe->pass_dev_cnt); + mutex_unlock(&eeh_dev_mutex); + + return 0; +out: + mutex_unlock(&eeh_dev_mutex); + return ret; +} +EXPORT_SYMBOL_GPL(eeh_dev_open); + +/** + * eeh_dev_release - Decrease count of pass through devices for PE + * @pdev: PCI device + * + * Decrease count of pass through devices for the indicated PE. If + * there is no passed through device in PE, the EEH errors detected + * on the PE will be reported and handled as usual. + */ +void eeh_dev_release(struct pci_dev *pdev) +{ + struct eeh_dev *edev; + + mutex_lock(&eeh_dev_mutex); + + /* No PCI device ? */ + if (!pdev) + goto out; + + /* No EEH device ? */ + edev = pci_dev_to_eeh_dev(pdev); + if (!edev || !edev->pe || !eeh_pe_passed(edev->pe)) + goto out; + + /* Decrease PE's pass through count */ + atomic_dec(&edev->pe->pass_dev_cnt); + WARN_ON(atomic_read(&edev->pe->pass_dev_cnt) < 0); + eeh_pe_change_owner(edev->pe); +out: + mutex_unlock(&eeh_dev_mutex); +} +EXPORT_SYMBOL(eeh_dev_release); + +#ifdef CONFIG_IOMMU_API + +static int dev_has_iommu_table(struct device *dev, void *data) +{ + struct pci_dev *pdev = to_pci_dev(dev); + struct pci_dev **ppdev = data; + struct iommu_table *tbl; + + if (!dev) + return 0; + + tbl = get_iommu_table_base(dev); + if (tbl && tbl->it_group) { + *ppdev = pdev; + return 1; + } + + return 0; +} + +/** + * eeh_iommu_group_to_pe - Convert IOMMU group to EEH PE + * @group: IOMMU group + * + * The routine is called to convert IOMMU group to EEH PE. + */ +struct eeh_pe *eeh_iommu_group_to_pe(struct iommu_group *group) +{ + struct pci_dev *pdev = NULL; + struct eeh_dev *edev; + int ret; + + /* No IOMMU group ? */ + if (!group) + return NULL; + + ret = iommu_group_for_each_dev(group, &pdev, dev_has_iommu_table); + if (!ret || !pdev) + return NULL; + + /* No EEH device or PE ? */ + edev = pci_dev_to_eeh_dev(pdev); + if (!edev || !edev->pe) + return NULL; + + return edev->pe; +} +EXPORT_SYMBOL_GPL(eeh_iommu_group_to_pe); + +#endif /* CONFIG_IOMMU_API */ + +/** + * eeh_pe_set_option - Set options for the indicated PE + * @pe: EEH PE + * @option: requested option + * + * The routine is called to enable or disable EEH functionality + * on the indicated PE, to enable IO or DMA for the frozen PE. + */ +int eeh_pe_set_option(struct eeh_pe *pe, int option) +{ + int ret = 0; + + /* Invalid PE ? */ + if (!pe) + return -ENODEV; + + /* + * EEH functionality could possibly be disabled, just + * return error for the case. And the EEH functinality + * isn't expected to be disabled on one specific PE. + */ + switch (option) { + case EEH_OPT_ENABLE: + if (eeh_enabled()) { + ret = eeh_pe_change_owner(pe); + break; + } + ret = -EIO; + break; + case EEH_OPT_DISABLE: + break; + case EEH_OPT_THAW_MMIO: + case EEH_OPT_THAW_DMA: + if (!eeh_ops || !eeh_ops->set_option) { + ret = -ENOENT; + break; + } + + ret = eeh_pci_enable(pe, option); + break; + default: + pr_debug("%s: Option %d out of range (%d, %d)\n", + __func__, option, EEH_OPT_DISABLE, EEH_OPT_THAW_DMA); + ret = -EINVAL; + } + + return ret; +} +EXPORT_SYMBOL_GPL(eeh_pe_set_option); + +/** + * eeh_pe_get_state - Retrieve PE's state + * @pe: EEH PE + * + * Retrieve the PE's state, which includes 3 aspects: enabled + * DMA, enabled IO and asserted reset. + */ +int eeh_pe_get_state(struct eeh_pe *pe) +{ + int result, ret = 0; + bool rst_active, dma_en, mmio_en; + + /* Existing PE ? */ + if (!pe) + return -ENODEV; + + if (!eeh_ops || !eeh_ops->get_state) + return -ENOENT; + + result = eeh_ops->get_state(pe, NULL); + rst_active = !!(result & EEH_STATE_RESET_ACTIVE); + dma_en = !!(result & EEH_STATE_DMA_ENABLED); + mmio_en = !!(result & EEH_STATE_MMIO_ENABLED); + + if (rst_active) + ret = EEH_PE_STATE_RESET; + else if (dma_en && mmio_en) + ret = EEH_PE_STATE_NORMAL; + else if (!dma_en && !mmio_en) + ret = EEH_PE_STATE_STOPPED_IO_DMA; + else if (!dma_en && mmio_en) + ret = EEH_PE_STATE_STOPPED_DMA; + else + ret = EEH_PE_STATE_UNAVAIL; + + return ret; +} +EXPORT_SYMBOL_GPL(eeh_pe_get_state); + +static int eeh_pe_reenable_devices(struct eeh_pe *pe) +{ + struct eeh_dev *edev, *tmp; + struct pci_dev *pdev; + int ret = 0; + + /* Restore config space */ + eeh_pe_restore_bars(pe); + + /* + * Reenable PCI devices as the devices passed + * through are always enabled before the reset. + */ + eeh_pe_for_each_dev(pe, edev, tmp) { + pdev = eeh_dev_to_pci_dev(edev); + if (!pdev) + continue; + + ret = pci_reenable_device(pdev); + if (ret) { + pr_warn("%s: Failure %d reenabling %s\n", + __func__, ret, pci_name(pdev)); + return ret; + } + } + + /* The PE is still in frozen state */ + return eeh_unfreeze_pe(pe, true); +} + +/** + * eeh_pe_reset - Issue PE reset according to specified type + * @pe: EEH PE + * @option: reset type + * + * The routine is called to reset the specified PE with the + * indicated type, either fundamental reset or hot reset. + * PE reset is the most important part for error recovery. + */ +int eeh_pe_reset(struct eeh_pe *pe, int option) +{ + int ret = 0; + + /* Invalid PE ? */ + if (!pe) + return -ENODEV; + + if (!eeh_ops || !eeh_ops->set_option || !eeh_ops->reset) + return -ENOENT; + + switch (option) { + case EEH_RESET_DEACTIVATE: + ret = eeh_ops->reset(pe, option); + eeh_pe_state_clear(pe, EEH_PE_RESET); + if (ret) + break; + + ret = eeh_pe_reenable_devices(pe); + break; + case EEH_RESET_HOT: + case EEH_RESET_FUNDAMENTAL: + /* + * Proactively freeze the PE to drop all MMIO access + * during reset, which should be banned as it's always + * cause recursive EEH error. + */ + eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE); + + eeh_pe_state_mark(pe, EEH_PE_RESET); + ret = eeh_ops->reset(pe, option); + break; + default: + pr_debug("%s: Unsupported option %d\n", + __func__, option); + ret = -EINVAL; + } + + return ret; +} +EXPORT_SYMBOL_GPL(eeh_pe_reset); + +/** + * eeh_pe_configure - Configure PCI bridges after PE reset + * @pe: EEH PE + * + * The routine is called to restore the PCI config space for + * those PCI devices, especially PCI bridges affected by PE + * reset issued previously. + */ +int eeh_pe_configure(struct eeh_pe *pe) +{ + int ret = 0; + + /* Invalid PE ? */ + if (!pe) + return -ENODEV; + + return ret; +} +EXPORT_SYMBOL_GPL(eeh_pe_configure); + static int proc_eeh_show(struct seq_file *m, void *v) { if (!eeh_enabled()) { @@ -1143,9 +1614,9 @@ static const struct file_operations proc_eeh_operations = { static int eeh_enable_dbgfs_set(void *data, u64 val) { if (val) - eeh_subsystem_flags &= ~EEH_FORCE_DISABLED; + eeh_clear_flag(EEH_FORCE_DISABLED); else - eeh_subsystem_flags |= EEH_FORCE_DISABLED; + eeh_add_flag(EEH_FORCE_DISABLED); /* Notify the backend */ if (eeh_ops->post_init) diff --git a/arch/powerpc/kernel/eeh_cache.c b/arch/powerpc/kernel/eeh_cache.c index e8c9fd546a5..07d8a2423a6 100644 --- a/arch/powerpc/kernel/eeh_cache.c +++ b/arch/powerpc/kernel/eeh_cache.c @@ -143,7 +143,7 @@ eeh_addr_cache_insert(struct pci_dev *dev, unsigned long alo, } else { if (dev != piar->pcidev || alo != piar->addr_lo || ahi != piar->addr_hi) { - pr_warning("PIAR: overlapping address range\n"); + pr_warn("PIAR: overlapping address range\n"); } return piar; } @@ -177,19 +177,20 @@ static void __eeh_addr_cache_insert_dev(struct pci_dev *dev) dn = pci_device_to_OF_node(dev); if (!dn) { - pr_warning("PCI: no pci dn found for dev=%s\n", pci_name(dev)); + pr_warn("PCI: no pci dn found for dev=%s\n", + pci_name(dev)); return; } edev = of_node_to_eeh_dev(dn); if (!edev) { - pr_warning("PCI: no EEH dev found for dn=%s\n", + pr_warn("PCI: no EEH dev found for dn=%s\n", dn->full_name); return; } /* Skip any devices for which EEH is not enabled. */ - if (!eeh_probe_mode_dev() && !edev->pe) { + if (!edev->pe) { #ifdef DEBUG pr_info("PCI: skip building address cache for=%s - %s\n", pci_name(dev), dn->full_name); diff --git a/arch/powerpc/kernel/eeh_dev.c b/arch/powerpc/kernel/eeh_dev.c index 1efa28f5fc5..e5274ee9a75 100644 --- a/arch/powerpc/kernel/eeh_dev.c +++ b/arch/powerpc/kernel/eeh_dev.c @@ -57,7 +57,8 @@ void *eeh_dev_init(struct device_node *dn, void *data) /* Allocate EEH device */ edev = kzalloc(sizeof(*edev), GFP_KERNEL); if (!edev) { - pr_warning("%s: out of memory\n", __func__); + pr_warn("%s: out of memory\n", + __func__); return NULL; } diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index 420da61d4ce..3fd514f8e4b 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -180,6 +180,22 @@ static bool eeh_dev_removed(struct eeh_dev *edev) return false; } +static void *eeh_dev_save_state(void *data, void *userdata) +{ + struct eeh_dev *edev = data; + struct pci_dev *pdev; + + if (!edev) + return NULL; + + pdev = eeh_dev_to_pci_dev(edev); + if (!pdev) + return NULL; + + pci_save_state(pdev); + return NULL; +} + /** * eeh_report_error - Report pci error to each device driver * @data: eeh device @@ -303,6 +319,22 @@ static void *eeh_report_reset(void *data, void *userdata) return NULL; } +static void *eeh_dev_restore_state(void *data, void *userdata) +{ + struct eeh_dev *edev = data; + struct pci_dev *pdev; + + if (!edev) + return NULL; + + pdev = eeh_dev_to_pci_dev(edev); + if (!pdev) + return NULL; + + pci_restore_state(pdev); + return NULL; +} + /** * eeh_report_resume - Tell device to resume normal operations * @data: eeh device @@ -450,38 +482,82 @@ static void *eeh_pe_detach_dev(void *data, void *userdata) static void *__eeh_clear_pe_frozen_state(void *data, void *flag) { struct eeh_pe *pe = (struct eeh_pe *)data; - int i, rc; + bool *clear_sw_state = flag; + int i, rc = 1; - for (i = 0; i < 3; i++) { - rc = eeh_pci_enable(pe, EEH_OPT_THAW_MMIO); - if (rc) - continue; - rc = eeh_pci_enable(pe, EEH_OPT_THAW_DMA); - if (!rc) - break; - } + for (i = 0; rc && i < 3; i++) + rc = eeh_unfreeze_pe(pe, clear_sw_state); - /* The PE has been isolated, clear it */ + /* Stop immediately on any errors */ if (rc) { - pr_warn("%s: Can't clear frozen PHB#%x-PE#%x (%d)\n", - __func__, pe->phb->global_number, pe->addr, rc); + pr_warn("%s: Failure %d unfreezing PHB#%x-PE#%x\n", + __func__, rc, pe->phb->global_number, pe->addr); return (void *)pe; } return NULL; } -static int eeh_clear_pe_frozen_state(struct eeh_pe *pe) +static int eeh_clear_pe_frozen_state(struct eeh_pe *pe, + bool clear_sw_state) { void *rc; - rc = eeh_pe_traverse(pe, __eeh_clear_pe_frozen_state, NULL); + rc = eeh_pe_traverse(pe, __eeh_clear_pe_frozen_state, &clear_sw_state); if (!rc) eeh_pe_state_clear(pe, EEH_PE_ISOLATED); return rc ? -EIO : 0; } +int eeh_pe_reset_and_recover(struct eeh_pe *pe) +{ + int result, ret; + + /* Bail if the PE is being recovered */ + if (pe->state & EEH_PE_RECOVERING) + return 0; + + /* Put the PE into recovery mode */ + eeh_pe_state_mark(pe, EEH_PE_RECOVERING); + + /* Save states */ + eeh_pe_dev_traverse(pe, eeh_dev_save_state, NULL); + + /* Report error */ + eeh_pe_dev_traverse(pe, eeh_report_error, &result); + + /* Issue reset */ + eeh_pe_state_mark(pe, EEH_PE_RESET); + ret = eeh_reset_pe(pe); + if (ret) { + eeh_pe_state_clear(pe, EEH_PE_RECOVERING | EEH_PE_RESET); + return ret; + } + eeh_pe_state_clear(pe, EEH_PE_RESET); + + /* Unfreeze the PE */ + ret = eeh_clear_pe_frozen_state(pe, true); + if (ret) { + eeh_pe_state_clear(pe, EEH_PE_RECOVERING); + return ret; + } + + /* Notify completion of reset */ + eeh_pe_dev_traverse(pe, eeh_report_reset, &result); + + /* Restore device state */ + eeh_pe_dev_traverse(pe, eeh_dev_restore_state, NULL); + + /* Resume */ + eeh_pe_dev_traverse(pe, eeh_report_resume, NULL); + + /* Clear recovery mode */ + eeh_pe_state_clear(pe, EEH_PE_RECOVERING); + + return 0; +} + /** * eeh_reset_device - Perform actual reset of a pci slot * @pe: EEH PE @@ -540,7 +616,7 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus) eeh_pe_state_clear(pe, EEH_PE_RESET); /* Clear frozen state */ - rc = eeh_clear_pe_frozen_state(pe); + rc = eeh_clear_pe_frozen_state(pe, false); if (rc) return rc; @@ -599,7 +675,7 @@ static void eeh_handle_normal_event(struct eeh_pe *pe) pe->freeze_count++; if (pe->freeze_count > EEH_MAX_ALLOWED_FREEZES) goto excess_failures; - pr_warning("EEH: This PCI device has failed %d times in the last hour\n", + pr_warn("EEH: This PCI device has failed %d times in the last hour\n", pe->freeze_count); /* Walk the various device drivers attached to this slot through @@ -616,7 +692,7 @@ static void eeh_handle_normal_event(struct eeh_pe *pe) */ rc = eeh_ops->wait_state(pe, MAX_WAIT_FOR_RECOVERY*1000); if (rc < 0 || rc == EEH_STATE_NOT_SUPPORT) { - pr_warning("EEH: Permanent failure\n"); + pr_warn("EEH: Permanent failure\n"); goto hard_fail; } @@ -635,8 +711,8 @@ static void eeh_handle_normal_event(struct eeh_pe *pe) pr_info("EEH: Reset with hotplug activity\n"); rc = eeh_reset_device(pe, frozen_bus); if (rc) { - pr_warning("%s: Unable to reset, err=%d\n", - __func__, rc); + pr_warn("%s: Unable to reset, err=%d\n", + __func__, rc); goto hard_fail; } } @@ -678,7 +754,7 @@ static void eeh_handle_normal_event(struct eeh_pe *pe) /* If any device has a hard failure, then shut off everything. */ if (result == PCI_ERS_RESULT_DISCONNECT) { - pr_warning("EEH: Device driver gave up\n"); + pr_warn("EEH: Device driver gave up\n"); goto hard_fail; } @@ -687,8 +763,8 @@ static void eeh_handle_normal_event(struct eeh_pe *pe) pr_info("EEH: Reset without hotplug activity\n"); rc = eeh_reset_device(pe, NULL); if (rc) { - pr_warning("%s: Cannot reset, err=%d\n", - __func__, rc); + pr_warn("%s: Cannot reset, err=%d\n", + __func__, rc); goto hard_fail; } @@ -701,7 +777,7 @@ static void eeh_handle_normal_event(struct eeh_pe *pe) /* All devices should claim they have recovered by now. */ if ((result != PCI_ERS_RESULT_RECOVERED) && (result != PCI_ERS_RESULT_NONE)) { - pr_warning("EEH: Not recovered\n"); + pr_warn("EEH: Not recovered\n"); goto hard_fail; } diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c index fbd01eba447..53dd0915e69 100644 --- a/arch/powerpc/kernel/eeh_pe.c +++ b/arch/powerpc/kernel/eeh_pe.c @@ -32,9 +32,24 @@ #include <asm/pci-bridge.h> #include <asm/ppc-pci.h> +static int eeh_pe_aux_size = 0; static LIST_HEAD(eeh_phb_pe); /** + * eeh_set_pe_aux_size - Set PE auxillary data size + * @size: PE auxillary data size + * + * Set PE auxillary data size + */ +void eeh_set_pe_aux_size(int size) +{ + if (size < 0) + return; + + eeh_pe_aux_size = size; +} + +/** * eeh_pe_alloc - Allocate PE * @phb: PCI controller * @type: PE type @@ -44,9 +59,16 @@ static LIST_HEAD(eeh_phb_pe); static struct eeh_pe *eeh_pe_alloc(struct pci_controller *phb, int type) { struct eeh_pe *pe; + size_t alloc_size; + + alloc_size = sizeof(struct eeh_pe); + if (eeh_pe_aux_size) { + alloc_size = ALIGN(alloc_size, cache_line_size()); + alloc_size += eeh_pe_aux_size; + } /* Allocate PHB PE */ - pe = kzalloc(sizeof(struct eeh_pe), GFP_KERNEL); + pe = kzalloc(alloc_size, GFP_KERNEL); if (!pe) return NULL; /* Initialize PHB PE */ @@ -56,6 +78,8 @@ static struct eeh_pe *eeh_pe_alloc(struct pci_controller *phb, int type) INIT_LIST_HEAD(&pe->child); INIT_LIST_HEAD(&pe->edevs); + pe->data = (void *)pe + ALIGN(sizeof(struct eeh_pe), + cache_line_size()); return pe; } @@ -179,7 +203,8 @@ void *eeh_pe_dev_traverse(struct eeh_pe *root, void *ret; if (!root) { - pr_warning("%s: Invalid PE %p\n", __func__, root); + pr_warn("%s: Invalid PE %p\n", + __func__, root); return NULL; } @@ -351,17 +376,6 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev) pe->config_addr = edev->config_addr; /* - * While doing PE reset, we probably hot-reset the - * upstream bridge. However, the PCI devices including - * the associated EEH devices might be removed when EEH - * core is doing recovery. So that won't safe to retrieve - * the bridge through downstream EEH device. We have to - * trace the parent PCI bus, then the upstream bridge. - */ - if (eeh_probe_mode_dev()) - pe->bus = eeh_dev_to_pci_dev(edev)->bus; - - /* * Put the new EEH PE into hierarchy tree. If the parent * can't be found, the newly created PE will be attached * to PHB directly. Otherwise, we have to associate the @@ -414,7 +428,7 @@ int eeh_rmv_from_parent_pe(struct eeh_dev *edev) } /* Remove the EEH device */ - pe = edev->pe; + pe = eeh_dev_to_pe(edev); edev->pe = NULL; list_del(&edev->list); @@ -570,6 +584,8 @@ static void *__eeh_pe_state_clear(void *data, void *flag) { struct eeh_pe *pe = (struct eeh_pe *)data; int state = *((int *)flag); + struct eeh_dev *edev, *tmp; + struct pci_dev *pdev; /* Keep the state of permanently removed PE intact */ if ((pe->freeze_count > EEH_MAX_ALLOWED_FREEZES) && @@ -578,9 +594,22 @@ static void *__eeh_pe_state_clear(void *data, void *flag) pe->state &= ~state; - /* Clear check count since last isolation */ - if (state & EEH_PE_ISOLATED) - pe->check_count = 0; + /* + * Special treatment on clearing isolated state. Clear + * check count since last isolation and put all affected + * devices to normal state. + */ + if (!(state & EEH_PE_ISOLATED)) + return NULL; + + pe->check_count = 0; + eeh_pe_for_each_dev(pe, edev, tmp) { + pdev = eeh_dev_to_pci_dev(edev); + if (!pdev) + continue; + + pdev->error_state = pci_channel_io_normal; + } return NULL; } @@ -802,53 +831,33 @@ void eeh_pe_restore_bars(struct eeh_pe *pe) */ const char *eeh_pe_loc_get(struct eeh_pe *pe) { - struct pci_controller *hose; struct pci_bus *bus = eeh_pe_bus_get(pe); - struct pci_dev *pdev; - struct device_node *dn; - const char *loc; + struct device_node *dn = pci_bus_to_OF_node(bus); + const char *loc = NULL; - if (!bus) - return "N/A"; + if (!dn) + goto out; /* PHB PE or root PE ? */ if (pci_is_root_bus(bus)) { - hose = pci_bus_to_host(bus); - loc = of_get_property(hose->dn, - "ibm,loc-code", NULL); + loc = of_get_property(dn, "ibm,loc-code", NULL); + if (!loc) + loc = of_get_property(dn, "ibm,io-base-loc-code", NULL); if (loc) - return loc; - loc = of_get_property(hose->dn, - "ibm,io-base-loc-code", NULL); - if (loc) - return loc; - - pdev = pci_get_slot(bus, 0x0); - } else { - pdev = bus->self; - } - - if (!pdev) { - loc = "N/A"; - goto out; - } + goto out; - dn = pci_device_to_OF_node(pdev); - if (!dn) { - loc = "N/A"; - goto out; + /* Check the root port */ + dn = dn->child; + if (!dn) + goto out; } loc = of_get_property(dn, "ibm,loc-code", NULL); if (!loc) loc = of_get_property(dn, "ibm,slot-location-code", NULL); - if (!loc) - loc = "N/A"; out: - if (pci_is_root_bus(bus) && pdev) - pci_dev_put(pdev); - return loc; + return loc ? loc : "N/A"; } /** diff --git a/arch/powerpc/kernel/eeh_sysfs.c b/arch/powerpc/kernel/eeh_sysfs.c index e2595ba4b72..f19b1e5cb06 100644 --- a/arch/powerpc/kernel/eeh_sysfs.c +++ b/arch/powerpc/kernel/eeh_sysfs.c @@ -54,6 +54,43 @@ EEH_SHOW_ATTR(eeh_mode, mode, "0x%x"); EEH_SHOW_ATTR(eeh_config_addr, config_addr, "0x%x"); EEH_SHOW_ATTR(eeh_pe_config_addr, pe_config_addr, "0x%x"); +static ssize_t eeh_pe_state_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct pci_dev *pdev = to_pci_dev(dev); + struct eeh_dev *edev = pci_dev_to_eeh_dev(pdev); + int state; + + if (!edev || !edev->pe) + return -ENODEV; + + state = eeh_ops->get_state(edev->pe, NULL); + return sprintf(buf, "%0x08x %0x08x\n", + state, edev->pe->state); +} + +static ssize_t eeh_pe_state_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct pci_dev *pdev = to_pci_dev(dev); + struct eeh_dev *edev = pci_dev_to_eeh_dev(pdev); + + if (!edev || !edev->pe) + return -ENODEV; + + /* Nothing to do if it's not frozen */ + if (!(edev->pe->state & EEH_PE_ISOLATED)) + return count; + + if (eeh_unfreeze_pe(edev->pe, true)) + return -EIO; + + return count; +} + +static DEVICE_ATTR_RW(eeh_pe_state); + void eeh_sysfs_add_device(struct pci_dev *pdev) { struct eeh_dev *edev = pci_dev_to_eeh_dev(pdev); @@ -68,9 +105,10 @@ void eeh_sysfs_add_device(struct pci_dev *pdev) rc += device_create_file(&pdev->dev, &dev_attr_eeh_mode); rc += device_create_file(&pdev->dev, &dev_attr_eeh_config_addr); rc += device_create_file(&pdev->dev, &dev_attr_eeh_pe_config_addr); + rc += device_create_file(&pdev->dev, &dev_attr_eeh_pe_state); if (rc) - printk(KERN_WARNING "EEH: Unable to create sysfs entries\n"); + pr_warn("EEH: Unable to create sysfs entries\n"); else if (edev) edev->mode |= EEH_DEV_SYSFS; } @@ -92,6 +130,7 @@ void eeh_sysfs_remove_device(struct pci_dev *pdev) device_remove_file(&pdev->dev, &dev_attr_eeh_mode); device_remove_file(&pdev->dev, &dev_attr_eeh_config_addr); device_remove_file(&pdev->dev, &dev_attr_eeh_pe_config_addr); + device_remove_file(&pdev->dev, &dev_attr_eeh_pe_state); if (edev) edev->mode &= ~EEH_DEV_SYSFS; diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 6528c5e2cc4..5bbd1bc8c3b 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -482,16 +482,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_STCX_CHECKS_ADDRESS) ld r8,KSP(r4) /* new stack pointer */ #ifdef CONFIG_PPC_BOOK3S BEGIN_FTR_SECTION - BEGIN_FTR_SECTION_NESTED(95) clrrdi r6,r8,28 /* get its ESID */ clrrdi r9,r1,28 /* get current sp ESID */ - FTR_SECTION_ELSE_NESTED(95) +FTR_SECTION_ELSE clrrdi r6,r8,40 /* get its 1T ESID */ clrrdi r9,r1,40 /* get current sp 1T ESID */ - ALT_MMU_FTR_SECTION_END_NESTED_IFCLR(MMU_FTR_1T_SEGMENT, 95) -FTR_SECTION_ELSE - b 2f -ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_SLB) +ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_1T_SEGMENT) clrldi. r0,r6,2 /* is new ESID c00000000? */ cmpd cr1,r6,r9 /* or is new ESID the same as current ESID? */ cror eq,4*cr1+eq,eq @@ -919,6 +915,11 @@ restore_check_irq_replay: addi r3,r1,STACK_FRAME_OVERHEAD; bl do_IRQ b ret_from_except +1: cmpwi cr0,r3,0xe60 + bne 1f + addi r3,r1,STACK_FRAME_OVERHEAD; + bl handle_hmi_exception + b ret_from_except 1: cmpwi cr0,r3,0x900 bne 1f addi r3,r1,STACK_FRAME_OVERHEAD; diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index bb9cac6c805..3e68d1c6971 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -635,7 +635,7 @@ interrupt_end_book3e: /* Altivec Unavailable Interrupt */ START_EXCEPTION(altivec_unavailable); - NORMAL_EXCEPTION_PROLOG(0x200, BOOKE_INTERRUPT_SPE_ALTIVEC_UNAVAIL, + NORMAL_EXCEPTION_PROLOG(0x200, BOOKE_INTERRUPT_ALTIVEC_UNAVAIL, PROLOG_ADDITION_NONE) /* we can probably do a shorter exception entry for that one... */ EXCEPTION_COMMON(0x200) @@ -658,7 +658,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) /* AltiVec Assist */ START_EXCEPTION(altivec_assist); NORMAL_EXCEPTION_PROLOG(0x220, - BOOKE_INTERRUPT_SPE_FP_DATA_ALTIVEC_ASSIST, + BOOKE_INTERRUPT_ALTIVEC_ASSIST, PROLOG_ADDITION_NONE) EXCEPTION_COMMON(0x220) INTS_DISABLE diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index a7d36b19221..050f79a4a16 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -188,10 +188,6 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE) data_access_pSeries: HMT_MEDIUM_PPR_DISCARD SET_SCRATCH0(r13) -BEGIN_FTR_SECTION - b data_access_check_stab -data_access_not_stab: -END_MMU_FTR_SECTION_IFCLR(MMU_FTR_SLB) EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, data_access_common, EXC_STD, KVMTEST, 0x300) @@ -339,7 +335,7 @@ emulation_assist_trampoline: hv_exception_trampoline: SET_SCRATCH0(r13) EXCEPTION_PROLOG_0(PACA_EXGEN) - b hmi_exception_hv + b hmi_exception_early . = 0xe80 hv_doorbell_trampoline: @@ -514,34 +510,6 @@ machine_check_pSeries_0: EXCEPTION_PROLOG_1(PACA_EXMC, KVMTEST, 0x200) EXCEPTION_PROLOG_PSERIES_1(machine_check_common, EXC_STD) KVM_HANDLER_SKIP(PACA_EXMC, EXC_STD, 0x200) - - /* moved from 0x300 */ -data_access_check_stab: - GET_PACA(r13) - std r9,PACA_EXSLB+EX_R9(r13) - std r10,PACA_EXSLB+EX_R10(r13) - mfspr r10,SPRN_DAR - mfspr r9,SPRN_DSISR - srdi r10,r10,60 - rlwimi r10,r9,16,0x20 -#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE - lbz r9,HSTATE_IN_GUEST(r13) - rlwimi r10,r9,8,0x300 -#endif - mfcr r9 - cmpwi r10,0x2c - beq do_stab_bolted_pSeries - mtcrf 0x80,r9 - ld r9,PACA_EXSLB+EX_R9(r13) - ld r10,PACA_EXSLB+EX_R10(r13) - b data_access_not_stab -do_stab_bolted_pSeries: - std r11,PACA_EXSLB+EX_R11(r13) - std r12,PACA_EXSLB+EX_R12(r13) - GET_SCRATCH0(r10) - std r10,PACA_EXSLB+EX_R13(r13) - EXCEPTION_PROLOG_PSERIES_1(do_stab_bolted, EXC_STD) - KVM_HANDLER_SKIP(PACA_EXGEN, EXC_STD, 0x300) KVM_HANDLER_SKIP(PACA_EXSLB, EXC_STD, 0x380) KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0x400) @@ -621,8 +589,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR) KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe22) STD_EXCEPTION_HV_OOL(0xe42, emulation_assist) KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe42) - STD_EXCEPTION_HV_OOL(0xe62, hmi_exception) /* need to flush cache ? */ + MASKABLE_EXCEPTION_HV_OOL(0xe62, hmi_exception) KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe62) + MASKABLE_EXCEPTION_HV_OOL(0xe82, h_doorbell) KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe82) @@ -643,6 +612,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR) * - If it was a decrementer interrupt, we bump the dec to max and and return. * - If it was a doorbell we return immediately since doorbells are edge * triggered and won't automatically refire. + * - If it was a HMI we return immediately since we handled it in realmode + * and it won't refire. * - else we hard disable and return. * This is called with r10 containing the value to OR to the paca field. */ @@ -660,6 +631,8 @@ masked_##_H##interrupt: \ b 2f; \ 1: cmpwi r10,PACA_IRQ_DBELL; \ beq 2f; \ + cmpwi r10,PACA_IRQ_HMI; \ + beq 2f; \ mfspr r10,SPRN_##_H##SRR1; \ rldicl r10,r10,48,1; /* clear MSR_EE */ \ rotldi r10,r10,16; \ @@ -799,7 +772,7 @@ kvmppc_skip_Hinterrupt: STD_EXCEPTION_COMMON(0xd00, single_step, single_step_exception) STD_EXCEPTION_COMMON(0xe00, trap_0e, unknown_exception) STD_EXCEPTION_COMMON(0xe40, emulation_assist, emulation_assist_interrupt) - STD_EXCEPTION_COMMON(0xe60, hmi_exception, unknown_exception) + STD_EXCEPTION_COMMON_ASYNC(0xe60, hmi_exception, handle_hmi_exception) #ifdef CONFIG_PPC_DOORBELL STD_EXCEPTION_COMMON_ASYNC(0xe80, h_doorbell, doorbell_exception) #else @@ -985,66 +958,6 @@ ppc64_runlatch_on_trampoline: b __ppc64_runlatch_on /* - * Here we have detected that the kernel stack pointer is bad. - * R9 contains the saved CR, r13 points to the paca, - * r10 contains the (bad) kernel stack pointer, - * r11 and r12 contain the saved SRR0 and SRR1. - * We switch to using an emergency stack, save the registers there, - * and call kernel_bad_stack(), which panics. - */ -bad_stack: - ld r1,PACAEMERGSP(r13) - subi r1,r1,64+INT_FRAME_SIZE - std r9,_CCR(r1) - std r10,GPR1(r1) - std r11,_NIP(r1) - std r12,_MSR(r1) - mfspr r11,SPRN_DAR - mfspr r12,SPRN_DSISR - std r11,_DAR(r1) - std r12,_DSISR(r1) - mflr r10 - mfctr r11 - mfxer r12 - std r10,_LINK(r1) - std r11,_CTR(r1) - std r12,_XER(r1) - SAVE_GPR(0,r1) - SAVE_GPR(2,r1) - ld r10,EX_R3(r3) - std r10,GPR3(r1) - SAVE_GPR(4,r1) - SAVE_4GPRS(5,r1) - ld r9,EX_R9(r3) - ld r10,EX_R10(r3) - SAVE_2GPRS(9,r1) - ld r9,EX_R11(r3) - ld r10,EX_R12(r3) - ld r11,EX_R13(r3) - std r9,GPR11(r1) - std r10,GPR12(r1) - std r11,GPR13(r1) -BEGIN_FTR_SECTION - ld r10,EX_CFAR(r3) - std r10,ORIG_GPR3(r1) -END_FTR_SECTION_IFSET(CPU_FTR_CFAR) - SAVE_8GPRS(14,r1) - SAVE_10GPRS(22,r1) - lhz r12,PACA_TRAP_SAVE(r13) - std r12,_TRAP(r1) - addi r11,r1,INT_FRAME_SIZE - std r11,0(r1) - li r12,0 - std r12,0(r11) - ld r2,PACATOC(r13) - ld r11,exception_marker@toc(r2) - std r12,RESULT(r1) - std r11,STACK_FRAME_OVERHEAD-16(r1) -1: addi r3,r1,STACK_FRAME_OVERHEAD - bl kernel_bad_stack - b 1b - -/* * Here r13 points to the paca, r9 contains the saved CR, * SRR0 and SRR1 are saved in r11 and r12, * r9 - r13 are saved in paca->exgen. @@ -1057,7 +970,7 @@ data_access_common: mfspr r10,SPRN_DSISR stw r10,PACA_EXGEN+EX_DSISR(r13) EXCEPTION_PROLOG_COMMON(0x300, PACA_EXGEN) - DISABLE_INTS + RECONCILE_IRQ_STATE(r10, r11) ld r12,_MSR(r1) ld r3,PACA_EXGEN+EX_DAR(r13) lwz r4,PACA_EXGEN+EX_DSISR(r13) @@ -1073,7 +986,7 @@ h_data_storage_common: stw r10,PACA_EXGEN+EX_DSISR(r13) EXCEPTION_PROLOG_COMMON(0xe00, PACA_EXGEN) bl save_nvgprs - DISABLE_INTS + RECONCILE_IRQ_STATE(r10, r11) addi r3,r1,STACK_FRAME_OVERHEAD bl unknown_exception b ret_from_except @@ -1082,7 +995,7 @@ h_data_storage_common: .globl instruction_access_common instruction_access_common: EXCEPTION_PROLOG_COMMON(0x400, PACA_EXGEN) - DISABLE_INTS + RECONCILE_IRQ_STATE(r10, r11) ld r12,_MSR(r1) ld r3,_NIP(r1) andis. r4,r12,0x5820 @@ -1146,7 +1059,7 @@ slb_miss_fault: unrecov_user_slb: EXCEPTION_PROLOG_COMMON(0x4200, PACA_EXGEN) - DISABLE_INTS + RECONCILE_IRQ_STATE(r10, r11) bl save_nvgprs 1: addi r3,r1,STACK_FRAME_OVERHEAD bl unrecoverable_exception @@ -1169,7 +1082,7 @@ machine_check_common: stw r10,PACA_EXGEN+EX_DSISR(r13) EXCEPTION_PROLOG_COMMON(0x200, PACA_EXMC) FINISH_NAP - DISABLE_INTS + RECONCILE_IRQ_STATE(r10, r11) ld r3,PACA_EXGEN+EX_DAR(r13) lwz r4,PACA_EXGEN+EX_DSISR(r13) std r3,_DAR(r1) @@ -1192,7 +1105,7 @@ alignment_common: std r3,_DAR(r1) std r4,_DSISR(r1) bl save_nvgprs - DISABLE_INTS + RECONCILE_IRQ_STATE(r10, r11) addi r3,r1,STACK_FRAME_OVERHEAD bl alignment_exception b ret_from_except @@ -1202,7 +1115,7 @@ alignment_common: program_check_common: EXCEPTION_PROLOG_COMMON(0x700, PACA_EXGEN) bl save_nvgprs - DISABLE_INTS + RECONCILE_IRQ_STATE(r10, r11) addi r3,r1,STACK_FRAME_OVERHEAD bl program_check_exception b ret_from_except @@ -1213,7 +1126,7 @@ fp_unavailable_common: EXCEPTION_PROLOG_COMMON(0x800, PACA_EXGEN) bne 1f /* if from user, just load it up */ bl save_nvgprs - DISABLE_INTS + RECONCILE_IRQ_STATE(r10, r11) addi r3,r1,STACK_FRAME_OVERHEAD bl kernel_fp_unavailable_exception BUG_OPCODE @@ -1232,7 +1145,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM) #ifdef CONFIG_PPC_TRANSACTIONAL_MEM 2: /* User process was in a transaction */ bl save_nvgprs - DISABLE_INTS + RECONCILE_IRQ_STATE(r10, r11) addi r3,r1,STACK_FRAME_OVERHEAD bl fp_unavailable_tm b ret_from_except @@ -1258,7 +1171,7 @@ BEGIN_FTR_SECTION #ifdef CONFIG_PPC_TRANSACTIONAL_MEM 2: /* User process was in a transaction */ bl save_nvgprs - DISABLE_INTS + RECONCILE_IRQ_STATE(r10, r11) addi r3,r1,STACK_FRAME_OVERHEAD bl altivec_unavailable_tm b ret_from_except @@ -1267,7 +1180,7 @@ BEGIN_FTR_SECTION END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) #endif bl save_nvgprs - DISABLE_INTS + RECONCILE_IRQ_STATE(r10, r11) addi r3,r1,STACK_FRAME_OVERHEAD bl altivec_unavailable_exception b ret_from_except @@ -1292,7 +1205,7 @@ BEGIN_FTR_SECTION #ifdef CONFIG_PPC_TRANSACTIONAL_MEM 2: /* User process was in a transaction */ bl save_nvgprs - DISABLE_INTS + RECONCILE_IRQ_STATE(r10, r11) addi r3,r1,STACK_FRAME_OVERHEAD bl vsx_unavailable_tm b ret_from_except @@ -1301,7 +1214,7 @@ BEGIN_FTR_SECTION END_FTR_SECTION_IFSET(CPU_FTR_VSX) #endif bl save_nvgprs - DISABLE_INTS + RECONCILE_IRQ_STATE(r10, r11) addi r3,r1,STACK_FRAME_OVERHEAD bl vsx_unavailable_exception b ret_from_except @@ -1338,11 +1251,60 @@ fwnmi_data_area: . = 0x8000 #endif /* defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) */ -/* Space for CPU0's segment table */ - .balign 4096 - .globl initial_stab -initial_stab: - .space 4096 + .globl hmi_exception_early +hmi_exception_early: + EXCEPTION_PROLOG_1(PACA_EXGEN, NOTEST, 0xe60) + mr r10,r1 /* Save r1 */ + ld r1,PACAEMERGSP(r13) /* Use emergency stack */ + subi r1,r1,INT_FRAME_SIZE /* alloc stack frame */ + std r9,_CCR(r1) /* save CR in stackframe */ + mfspr r11,SPRN_HSRR0 /* Save HSRR0 */ + std r11,_NIP(r1) /* save HSRR0 in stackframe */ + mfspr r12,SPRN_HSRR1 /* Save SRR1 */ + std r12,_MSR(r1) /* save SRR1 in stackframe */ + std r10,0(r1) /* make stack chain pointer */ + std r0,GPR0(r1) /* save r0 in stackframe */ + std r10,GPR1(r1) /* save r1 in stackframe */ + EXCEPTION_PROLOG_COMMON_2(PACA_EXGEN) + EXCEPTION_PROLOG_COMMON_3(0xe60) + addi r3,r1,STACK_FRAME_OVERHEAD + bl hmi_exception_realmode + /* Windup the stack. */ + /* Clear MSR_RI before setting SRR0 and SRR1. */ + li r0,MSR_RI + mfmsr r9 /* get MSR value */ + andc r9,r9,r0 + mtmsrd r9,1 /* Clear MSR_RI */ + /* Move original HSRR0 and HSRR1 into the respective regs */ + ld r9,_MSR(r1) + mtspr SPRN_HSRR1,r9 + ld r3,_NIP(r1) + mtspr SPRN_HSRR0,r3 + ld r9,_CTR(r1) + mtctr r9 + ld r9,_XER(r1) + mtxer r9 + ld r9,_LINK(r1) + mtlr r9 + REST_GPR(0, r1) + REST_8GPRS(2, r1) + REST_GPR(10, r1) + ld r11,_CCR(r1) + mtcr r11 + REST_GPR(11, r1) + REST_2GPRS(12, r1) + /* restore original r1. */ + ld r1,GPR1(r1) + + /* + * Go to virtual mode and pull the HMI event information from + * firmware. + */ + .globl hmi_exception_after_realmode +hmi_exception_after_realmode: + SET_SCRATCH0(r13) + EXCEPTION_PROLOG_0(PACA_EXGEN) + b hmi_exception_hv #ifdef CONFIG_PPC_POWERNV _GLOBAL(opal_mc_secondary_handler) @@ -1566,7 +1528,7 @@ slb_miss_realmode: unrecov_slb: EXCEPTION_PROLOG_COMMON(0x4100, PACA_EXSLB) - DISABLE_INTS + RECONCILE_IRQ_STATE(r10, r11) bl save_nvgprs 1: addi r3,r1,STACK_FRAME_OVERHEAD bl unrecoverable_exception @@ -1594,12 +1556,6 @@ do_hash_page: bne- handle_page_fault /* if not, try to insert a HPTE */ andis. r0,r4,DSISR_DABRMATCH@h bne- handle_dabr_fault - -BEGIN_FTR_SECTION - andis. r0,r4,0x0020 /* Is it a segment table fault? */ - bne- do_ste_alloc /* If so handle it */ -END_MMU_FTR_SECTION_IFCLR(MMU_FTR_SLB) - CURRENT_THREAD_INFO(r11, r1) lwz r0,TI_PREEMPT(r11) /* If we're in an "NMI" */ andis. r0,r0,NMI_MASK@h /* (i.e. an irq when soft-disabled) */ @@ -1681,113 +1637,62 @@ handle_dabr_fault: bl bad_page_fault b ret_from_except - /* here we have a segment miss */ -do_ste_alloc: - bl ste_allocate /* try to insert stab entry */ - cmpdi r3,0 - bne- handle_page_fault - b fast_exception_return - /* - * r13 points to the PACA, r9 contains the saved CR, + * Here we have detected that the kernel stack pointer is bad. + * R9 contains the saved CR, r13 points to the paca, + * r10 contains the (bad) kernel stack pointer, * r11 and r12 contain the saved SRR0 and SRR1. - * r9 - r13 are saved in paca->exslb. - * We assume we aren't going to take any exceptions during this procedure. - * We assume (DAR >> 60) == 0xc. + * We switch to using an emergency stack, save the registers there, + * and call kernel_bad_stack(), which panics. */ - .align 7 -do_stab_bolted: - stw r9,PACA_EXSLB+EX_CCR(r13) /* save CR in exc. frame */ - std r11,PACA_EXSLB+EX_SRR0(r13) /* save SRR0 in exc. frame */ - mfspr r11,SPRN_DAR /* ea */ - - /* - * check for bad kernel/user address - * (ea & ~REGION_MASK) >= PGTABLE_RANGE - */ - rldicr. r9,r11,4,(63 - 46 - 4) - li r9,0 /* VSID = 0 for bad address */ - bne- 0f - - /* - * Calculate VSID: - * This is the kernel vsid, we take the top for context from - * the range. context = (MAX_USER_CONTEXT) + ((ea >> 60) - 0xc) + 1 - * Here we know that (ea >> 60) == 0xc - */ - lis r9,(MAX_USER_CONTEXT + 1)@ha - addi r9,r9,(MAX_USER_CONTEXT + 1)@l - - srdi r10,r11,SID_SHIFT - rldimi r10,r9,ESID_BITS,0 /* proto vsid */ - ASM_VSID_SCRAMBLE(r10, r9, 256M) - rldic r9,r10,12,16 /* r9 = vsid << 12 */ - -0: - /* Hash to the primary group */ - ld r10,PACASTABVIRT(r13) - srdi r11,r11,SID_SHIFT - rldimi r10,r11,7,52 /* r10 = first ste of the group */ - - /* Search the primary group for a free entry */ -1: ld r11,0(r10) /* Test valid bit of the current ste */ - andi. r11,r11,0x80 - beq 2f - addi r10,r10,16 - andi. r11,r10,0x70 - bne 1b - - /* Stick for only searching the primary group for now. */ - /* At least for now, we use a very simple random castout scheme */ - /* Use the TB as a random number ; OR in 1 to avoid entry 0 */ - mftb r11 - rldic r11,r11,4,57 /* r11 = (r11 << 4) & 0x70 */ - ori r11,r11,0x10 - - /* r10 currently points to an ste one past the group of interest */ - /* make it point to the randomly selected entry */ - subi r10,r10,128 - or r10,r10,r11 /* r10 is the entry to invalidate */ - - isync /* mark the entry invalid */ - ld r11,0(r10) - rldicl r11,r11,56,1 /* clear the valid bit */ - rotldi r11,r11,8 - std r11,0(r10) - sync - - clrrdi r11,r11,28 /* Get the esid part of the ste */ - slbie r11 - -2: std r9,8(r10) /* Store the vsid part of the ste */ - eieio - - mfspr r11,SPRN_DAR /* Get the new esid */ - clrrdi r11,r11,28 /* Permits a full 32b of ESID */ - ori r11,r11,0x90 /* Turn on valid and kp */ - std r11,0(r10) /* Put new entry back into the stab */ - - sync - - /* All done -- return from exception. */ - lwz r9,PACA_EXSLB+EX_CCR(r13) /* get saved CR */ - ld r11,PACA_EXSLB+EX_SRR0(r13) /* get saved SRR0 */ - - andi. r10,r12,MSR_RI - beq- unrecov_slb - - mtcrf 0x80,r9 /* restore CR */ - - mfmsr r10 - clrrdi r10,r10,2 - mtmsrd r10,1 - - mtspr SPRN_SRR0,r11 - mtspr SPRN_SRR1,r12 - ld r9,PACA_EXSLB+EX_R9(r13) - ld r10,PACA_EXSLB+EX_R10(r13) - ld r11,PACA_EXSLB+EX_R11(r13) - ld r12,PACA_EXSLB+EX_R12(r13) - ld r13,PACA_EXSLB+EX_R13(r13) - rfid - b . /* prevent speculative execution */ +bad_stack: + ld r1,PACAEMERGSP(r13) + subi r1,r1,64+INT_FRAME_SIZE + std r9,_CCR(r1) + std r10,GPR1(r1) + std r11,_NIP(r1) + std r12,_MSR(r1) + mfspr r11,SPRN_DAR + mfspr r12,SPRN_DSISR + std r11,_DAR(r1) + std r12,_DSISR(r1) + mflr r10 + mfctr r11 + mfxer r12 + std r10,_LINK(r1) + std r11,_CTR(r1) + std r12,_XER(r1) + SAVE_GPR(0,r1) + SAVE_GPR(2,r1) + ld r10,EX_R3(r3) + std r10,GPR3(r1) + SAVE_GPR(4,r1) + SAVE_4GPRS(5,r1) + ld r9,EX_R9(r3) + ld r10,EX_R10(r3) + SAVE_2GPRS(9,r1) + ld r9,EX_R11(r3) + ld r10,EX_R12(r3) + ld r11,EX_R13(r3) + std r9,GPR11(r1) + std r10,GPR12(r1) + std r11,GPR13(r1) +BEGIN_FTR_SECTION + ld r10,EX_CFAR(r3) + std r10,ORIG_GPR3(r1) +END_FTR_SECTION_IFSET(CPU_FTR_CFAR) + SAVE_8GPRS(14,r1) + SAVE_10GPRS(22,r1) + lhz r12,PACA_TRAP_SAVE(r13) + std r12,_TRAP(r1) + addi r11,r1,INT_FRAME_SIZE + std r11,0(r1) + li r12,0 + std r12,0(r11) + ld r2,PACATOC(r13) + ld r11,exception_marker@toc(r2) + std r12,RESULT(r1) + std r11,STACK_FRAME_OVERHEAD-16(r1) +1: addi r3,r1,STACK_FRAME_OVERHEAD + bl kernel_bad_stack + b 1b diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S index c334f53453f..b5061abbd2e 100644 --- a/arch/powerpc/kernel/head_44x.S +++ b/arch/powerpc/kernel/head_44x.S @@ -1210,10 +1210,12 @@ clear_utlb_entry: /* We configure icbi to invalidate 128 bytes at a time since the * current 32-bit kernel code isn't too happy with icache != dcache - * block size + * block size. We also disable the BTAC as this can cause errors + * in some circumstances (see IBM Erratum 47). */ mfspr r3,SPRN_CCR0 oris r3,r3,0x0020 + ori r3,r3,0x0040 mtspr SPRN_CCR0,r3 isync diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index a95145d7f61..d48125d0c04 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -180,6 +180,28 @@ exception_marker: #include "exceptions-64s.S" #endif +#ifdef CONFIG_PPC_BOOK3E +_GLOBAL(fsl_secondary_thread_init) + /* Enable branch prediction */ + lis r3,BUCSR_INIT@h + ori r3,r3,BUCSR_INIT@l + mtspr SPRN_BUCSR,r3 + isync + + /* + * Fix PIR to match the linear numbering in the device tree. + * + * On e6500, the reset value of PIR uses the low three bits for + * the thread within a core, and the upper bits for the core + * number. There are two threads per core, so shift everything + * but the low bit right by two bits so that the cpu numbering is + * continuous. + */ + mfspr r3, SPRN_PIR + rlwimi r3, r3, 30, 2, 30 + mtspr SPRN_PIR, r3 +#endif + _GLOBAL(generic_secondary_thread_init) mr r24,r3 @@ -618,7 +640,7 @@ __secondary_start: addi r14,r14,THREAD_SIZE-STACK_FRAME_OVERHEAD std r14,PACAKSAVE(r13) - /* Do early setup for that CPU (stab, slb, hash table pointer) */ + /* Do early setup for that CPU (SLB and hash table pointer) */ bl early_setup_secondary /* @@ -771,8 +793,10 @@ start_here_multiplatform: li r0,0 stdu r0,-STACK_FRAME_OVERHEAD(r1) - /* Do very early kernel initializations, including initial hash table, - * stab and slb setup before we turn on relocation. */ + /* + * Do very early kernel initializations, including initial hash table + * and SLB setup before we turn on relocation. + */ /* Restore parameters passed from prom_init/kexec */ mr r3,r31 diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 7ee876d2adb..fafff8dbd5d 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -104,12 +104,15 @@ turn_on_mmu: * task's thread_struct. */ #define EXCEPTION_PROLOG \ - mtspr SPRN_SPRG_SCRATCH0,r10; \ - mtspr SPRN_SPRG_SCRATCH1,r11; \ - mfcr r10; \ + EXCEPTION_PROLOG_0; \ EXCEPTION_PROLOG_1; \ EXCEPTION_PROLOG_2 +#define EXCEPTION_PROLOG_0 \ + mtspr SPRN_SPRG_SCRATCH0,r10; \ + mtspr SPRN_SPRG_SCRATCH1,r11; \ + mfcr r10 + #define EXCEPTION_PROLOG_1 \ mfspr r11,SPRN_SRR1; /* check whether user or kernel */ \ andi. r11,r11,MSR_PR; \ @@ -145,6 +148,14 @@ turn_on_mmu: SAVE_2GPRS(7, r11) /* + * Exception exit code. + */ +#define EXCEPTION_EPILOG_0 \ + mtcr r10; \ + mfspr r10,SPRN_SPRG_SCRATCH0; \ + mfspr r11,SPRN_SPRG_SCRATCH1 + +/* * Note: code which follows this uses cr0.eq (set if from kernel), * r11, r12 (SRR0), and r9 (SRR1). * @@ -293,16 +304,8 @@ InstructionTLBMiss: #ifdef CONFIG_8xx_CPU6 stw r3, 8(r0) #endif - DO_8xx_CPU6(0x3f80, r3) - mtspr SPRN_M_TW, r10 /* Save a couple of working registers */ - mfcr r10 -#ifdef CONFIG_8xx_CPU6 - stw r10, 0(r0) - stw r11, 4(r0) -#else - mtspr SPRN_DAR, r10 - mtspr SPRN_SPRG2, r11 -#endif + EXCEPTION_PROLOG_0 + mtspr SPRN_SPRG_SCRATCH2, r10 mfspr r10, SPRN_SRR0 /* Get effective address of fault */ #ifdef CONFIG_8xx_CPU15 addi r11, r10, 0x1000 @@ -359,18 +362,11 @@ InstructionTLBMiss: mtspr SPRN_MI_RPN, r10 /* Update TLB entry */ /* Restore registers */ -#ifndef CONFIG_8xx_CPU6 - mfspr r10, SPRN_DAR - mtcr r10 - mtspr SPRN_DAR, r11 /* Tag DAR */ - mfspr r11, SPRN_SPRG2 -#else - lwz r11, 0(r0) - mtcr r11 - lwz r11, 4(r0) +#ifdef CONFIG_8xx_CPU6 lwz r3, 8(r0) #endif - mfspr r10, SPRN_M_TW + mfspr r10, SPRN_SPRG_SCRATCH2 + EXCEPTION_EPILOG_0 rfi 2: mfspr r11, SPRN_SRR1 @@ -381,19 +377,11 @@ InstructionTLBMiss: mtspr SPRN_SRR1, r11 /* Restore registers */ -#ifndef CONFIG_8xx_CPU6 - mfspr r10, SPRN_DAR - mtcr r10 - li r11, 0x00f0 - mtspr SPRN_DAR, r11 /* Tag DAR */ - mfspr r11, SPRN_SPRG2 -#else - lwz r11, 0(r0) - mtcr r11 - lwz r11, 4(r0) +#ifdef CONFIG_8xx_CPU6 lwz r3, 8(r0) #endif - mfspr r10, SPRN_M_TW + mfspr r10, SPRN_SPRG_SCRATCH2 + EXCEPTION_EPILOG_0 b InstructionAccess . = 0x1200 @@ -401,16 +389,8 @@ DataStoreTLBMiss: #ifdef CONFIG_8xx_CPU6 stw r3, 8(r0) #endif - DO_8xx_CPU6(0x3f80, r3) - mtspr SPRN_M_TW, r10 /* Save a couple of working registers */ - mfcr r10 -#ifdef CONFIG_8xx_CPU6 - stw r10, 0(r0) - stw r11, 4(r0) -#else - mtspr SPRN_DAR, r10 - mtspr SPRN_SPRG2, r11 -#endif + EXCEPTION_PROLOG_0 + mtspr SPRN_SPRG_SCRATCH2, r10 mfspr r10, SPRN_M_TWB /* Get level 1 table entry address */ /* If we are faulting a kernel address, we have to use the @@ -483,19 +463,12 @@ DataStoreTLBMiss: mtspr SPRN_MD_RPN, r10 /* Update TLB entry */ /* Restore registers */ -#ifndef CONFIG_8xx_CPU6 - mfspr r10, SPRN_DAR - mtcr r10 - mtspr SPRN_DAR, r11 /* Tag DAR */ - mfspr r11, SPRN_SPRG2 -#else - mtspr SPRN_DAR, r11 /* Tag DAR */ - lwz r11, 0(r0) - mtcr r11 - lwz r11, 4(r0) +#ifdef CONFIG_8xx_CPU6 lwz r3, 8(r0) #endif - mfspr r10, SPRN_M_TW + mtspr SPRN_DAR, r11 /* Tag DAR */ + mfspr r10, SPRN_SPRG_SCRATCH2 + EXCEPTION_EPILOG_0 rfi /* This is an instruction TLB error on the MPC8xx. This could be due @@ -507,35 +480,18 @@ InstructionTLBError: b InstructionAccess /* This is the data TLB error on the MPC8xx. This could be due to - * many reasons, including a dirty update to a pte. We can catch that - * one here, but anything else is an error. First, we track down the - * Linux pte. If it is valid, write access is allowed, but the - * page dirty bit is not set, we will set it and reload the TLB. For - * any other case, we bail out to a higher level function that can - * handle it. + * many reasons, including a dirty update to a pte. We bail out to + * a higher level function that can handle it. */ . = 0x1400 DataTLBError: -#ifdef CONFIG_8xx_CPU6 - stw r3, 8(r0) -#endif - DO_8xx_CPU6(0x3f80, r3) - mtspr SPRN_M_TW, r10 /* Save a couple of working registers */ - mfcr r10 - stw r10, 0(r0) - stw r11, 4(r0) + EXCEPTION_PROLOG_0 - mfspr r10, SPRN_DAR - cmpwi cr0, r10, 0x00f0 + mfspr r11, SPRN_DAR + cmpwi cr0, r11, 0x00f0 beq- FixupDAR /* must be a buggy dcbX, icbi insn. */ -DARFixed:/* Return from dcbx instruction bug workaround, r10 holds value of DAR */ - mfspr r10, SPRN_M_TW /* Restore registers */ - lwz r11, 0(r0) - mtcr r11 - lwz r11, 4(r0) -#ifdef CONFIG_8xx_CPU6 - lwz r3, 8(r0) -#endif +DARFixed:/* Return from dcbx instruction bug workaround */ + EXCEPTION_EPILOG_0 b DataAccess EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_EE) @@ -559,11 +515,15 @@ DARFixed:/* Return from dcbx instruction bug workaround, r10 holds value of DAR /* This is the procedure to calculate the data EA for buggy dcbx,dcbi instructions * by decoding the registers used by the dcbx instruction and adding them. - * DAR is set to the calculated address and r10 also holds the EA on exit. + * DAR is set to the calculated address. */ /* define if you don't want to use self modifying code */ #define NO_SELF_MODIFYING_CODE FixupDAR:/* Entry point for dcbx workaround. */ +#ifdef CONFIG_8xx_CPU6 + stw r3, 8(r0) +#endif + mtspr SPRN_SPRG_SCRATCH2, r10 /* fetch instruction from memory. */ mfspr r10, SPRN_SRR0 andis. r11, r10, 0x8000 /* Address >= 0x80000000 */ @@ -579,16 +539,17 @@ FixupDAR:/* Entry point for dcbx workaround. */ mtspr SPRN_MD_TWC, r11 /* Load pte table base address */ mfspr r11, SPRN_MD_TWC /* ....and get the pte address */ lwz r11, 0(r11) /* Get the pte */ +#ifdef CONFIG_8xx_CPU6 + lwz r3, 8(r0) /* restore r3 from memory */ +#endif /* concat physical page address(r11) and page offset(r10) */ rlwimi r11, r10, 0, 20, 31 lwz r11,0(r11) /* Check if it really is a dcbx instruction. */ /* dcbt and dcbtst does not generate DTLB Misses/Errors, * no need to include them here */ - srwi r10, r11, 26 /* check if major OP code is 31 */ - cmpwi cr0, r10, 31 - bne- 141f - rlwinm r10, r11, 0, 21, 30 + xoris r10, r11, 0x7c00 /* check if major OP code is 31 */ + rlwinm r10, r10, 0, 21, 5 cmpwi cr0, r10, 2028 /* Is dcbz? */ beq+ 142f cmpwi cr0, r10, 940 /* Is dcbi? */ @@ -599,16 +560,13 @@ FixupDAR:/* Entry point for dcbx workaround. */ beq+ 142f cmpwi cr0, r10, 1964 /* Is icbi? */ beq+ 142f -141: mfspr r10, SPRN_DAR /* r10 must hold DAR at exit */ +141: mfspr r10,SPRN_SPRG_SCRATCH2 b DARFixed /* Nope, go back to normal TLB processing */ 144: mfspr r10, SPRN_DSISR rlwinm r10, r10,0,7,5 /* Clear store bit for buggy dcbst insn */ mtspr SPRN_DSISR, r10 142: /* continue, it was a dcbx, dcbi instruction. */ -#ifdef CONFIG_8xx_CPU6 - lwz r3, 8(r0) /* restore r3 from memory */ -#endif #ifndef NO_SELF_MODIFYING_CODE andis. r10,r11,0x1f /* test if reg RA is r0 */ li r10,modified_instr@l @@ -619,14 +577,15 @@ FixupDAR:/* Entry point for dcbx workaround. */ stw r11,0(r10) /* store add/and instruction */ dcbf 0,r10 /* flush new instr. to memory. */ icbi 0,r10 /* invalidate instr. cache line */ - lwz r11, 4(r0) /* restore r11 from memory */ - mfspr r10, SPRN_M_TW /* restore r10 from M_TW */ + mfspr r11, SPRN_SPRG_SCRATCH1 /* restore r11 */ + mfspr r10, SPRN_SPRG_SCRATCH0 /* restore r10 */ isync /* Wait until new instr is loaded from memory */ modified_instr: .space 4 /* this is where the add instr. is stored */ bne+ 143f subf r10,r0,r10 /* r10=r10-r0, only if reg RA is r0 */ 143: mtdar r10 /* store faulting EA in DAR */ + mfspr r10,SPRN_SPRG_SCRATCH2 b DARFixed /* Go back to normal TLB handling */ #else mfctr r10 @@ -680,13 +639,16 @@ modified_instr: mfdar r11 mtctr r11 /* restore ctr reg from DAR */ mtdar r10 /* save fault EA to DAR */ + mfspr r10,SPRN_SPRG_SCRATCH2 b DARFixed /* Go back to normal TLB handling */ /* special handling for r10,r11 since these are modified already */ -153: lwz r11, 4(r0) /* load r11 from memory */ - b 155f -154: mfspr r11, SPRN_M_TW /* load r10 from M_TW */ -155: add r10, r10, r11 /* add it */ +153: mfspr r11, SPRN_SPRG_SCRATCH1 /* load r11 from SPRN_SPRG_SCRATCH1 */ + add r10, r10, r11 /* add it */ + mfctr r11 /* restore r11 */ + b 151b +154: mfspr r11, SPRN_SPRG_SCRATCH0 /* load r10 from SPRN_SPRG_SCRATCH0 */ + add r10, r10, r11 /* add it */ mfctr r11 /* restore r11 */ b 151b #endif diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index b497188a94a..fffd1f96bb1 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -613,34 +613,36 @@ END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV) mfspr r10, SPRN_SPRG_RSCRATCH0 b InstructionStorage +/* Define SPE handlers for e200 and e500v2 */ #ifdef CONFIG_SPE /* SPE Unavailable */ START_EXCEPTION(SPEUnavailable) - NORMAL_EXCEPTION_PROLOG(SPE_ALTIVEC_UNAVAIL) + NORMAL_EXCEPTION_PROLOG(SPE_UNAVAIL) beq 1f bl load_up_spe b fast_exception_return 1: addi r3,r1,STACK_FRAME_OVERHEAD EXC_XFER_EE_LITE(0x2010, KernelSPE) -#else - EXCEPTION(0x2020, SPE_ALTIVEC_UNAVAIL, SPEUnavailable, \ +#elif defined(CONFIG_SPE_POSSIBLE) + EXCEPTION(0x2020, SPE_UNAVAIL, SPEUnavailable, \ unknown_exception, EXC_XFER_EE) -#endif /* CONFIG_SPE */ +#endif /* CONFIG_SPE_POSSIBLE */ /* SPE Floating Point Data */ #ifdef CONFIG_SPE - EXCEPTION(0x2030, SPE_FP_DATA_ALTIVEC_ASSIST, SPEFloatingPointData, + EXCEPTION(0x2030, SPE_FP_DATA, SPEFloatingPointData, SPEFloatingPointException, EXC_XFER_EE) /* SPE Floating Point Round */ EXCEPTION(0x2050, SPE_FP_ROUND, SPEFloatingPointRound, \ SPEFloatingPointRoundException, EXC_XFER_EE) -#else - EXCEPTION(0x2040, SPE_FP_DATA_ALTIVEC_ASSIST, SPEFloatingPointData, +#elif defined(CONFIG_SPE_POSSIBLE) + EXCEPTION(0x2040, SPE_FP_DATA, SPEFloatingPointData, unknown_exception, EXC_XFER_EE) EXCEPTION(0x2050, SPE_FP_ROUND, SPEFloatingPointRound, \ unknown_exception, EXC_XFER_EE) -#endif /* CONFIG_SPE */ +#endif /* CONFIG_SPE_POSSIBLE */ + /* Performance Monitor */ EXCEPTION(0x2060, PERFORMANCE_MONITOR, PerformanceMonitor, \ @@ -947,6 +949,7 @@ get_phys_addr: * Global functions */ +#ifdef CONFIG_E200 /* Adjust or setup IVORs for e200 */ _GLOBAL(__setup_e200_ivors) li r3,DebugDebug@l @@ -959,7 +962,10 @@ _GLOBAL(__setup_e200_ivors) mtspr SPRN_IVOR34,r3 sync blr +#endif +#ifdef CONFIG_E500 +#ifndef CONFIG_PPC_E500MC /* Adjust or setup IVORs for e500v1/v2 */ _GLOBAL(__setup_e500_ivors) li r3,DebugCrit@l @@ -974,7 +980,7 @@ _GLOBAL(__setup_e500_ivors) mtspr SPRN_IVOR35,r3 sync blr - +#else /* Adjust or setup IVORs for e500mc */ _GLOBAL(__setup_e500mc_ivors) li r3,DebugDebug@l @@ -1000,6 +1006,8 @@ _GLOBAL(__setup_ehv_ivors) mtspr SPRN_IVOR41,r3 sync blr +#endif /* CONFIG_PPC_E500MC */ +#endif /* CONFIG_E500 */ #ifdef CONFIG_SPE /* diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c index 0bb5918faaa..1f7d84e2e8b 100644 --- a/arch/powerpc/kernel/hw_breakpoint.c +++ b/arch/powerpc/kernel/hw_breakpoint.c @@ -293,7 +293,7 @@ out: /* * Handle single-step exceptions following a DABR hit. */ -int __kprobes single_step_dabr_instruction(struct die_args *args) +static int __kprobes single_step_dabr_instruction(struct die_args *args) { struct pt_regs *regs = args->regs; struct perf_event *bp = NULL; diff --git a/arch/powerpc/kernel/ibmebus.c b/arch/powerpc/kernel/ibmebus.c index 1114d13ac19..ac86c53e254 100644 --- a/arch/powerpc/kernel/ibmebus.c +++ b/arch/powerpc/kernel/ibmebus.c @@ -55,7 +55,7 @@ static struct device ibmebus_bus_device = { /* fake "parent" device */ struct bus_type ibmebus_bus_type; /* These devices will automatically be added to the bus during init */ -static struct of_device_id __initdata ibmebus_matches[] = { +static const struct of_device_id ibmebus_matches[] __initconst = { { .compatible = "IBM,lhca" }, { .compatible = "IBM,lhea" }, {}, diff --git a/arch/powerpc/kernel/idle_power7.S b/arch/powerpc/kernel/idle_power7.S index 5cf3d367190..c0754bbf811 100644 --- a/arch/powerpc/kernel/idle_power7.S +++ b/arch/powerpc/kernel/idle_power7.S @@ -73,7 +73,7 @@ _GLOBAL(power7_powersave_common) /* Check if something happened while soft-disabled */ lbz r0,PACAIRQHAPPENED(r13) - cmpwi cr0,r0,0 + andi. r0,r0,~PACA_IRQ_HARD_DIS@l beq 1f cmpwi cr0,r4,0 beq 1f @@ -135,17 +135,68 @@ _GLOBAL(power7_sleep) b power7_powersave_common /* No return */ +/* + * Make opal call in realmode. This is a generic function to be called + * from realmode from reset vector. It handles endianess. + * + * r13 - paca pointer + * r1 - stack pointer + * r3 - opal token + */ +opal_call_realmode: + mflr r12 + std r12,_LINK(r1) + ld r2,PACATOC(r13) + /* Set opal return address */ + LOAD_REG_ADDR(r0,return_from_opal_call) + mtlr r0 + /* Handle endian-ness */ + li r0,MSR_LE + mfmsr r12 + andc r12,r12,r0 + mtspr SPRN_HSRR1,r12 + mr r0,r3 /* Move opal token to r0 */ + LOAD_REG_ADDR(r11,opal) + ld r12,8(r11) + ld r2,0(r11) + mtspr SPRN_HSRR0,r12 + hrfid + +return_from_opal_call: + FIXUP_ENDIAN + ld r0,_LINK(r1) + mtlr r0 + blr + +#define CHECK_HMI_INTERRUPT \ + mfspr r0,SPRN_SRR1; \ +BEGIN_FTR_SECTION_NESTED(66); \ + rlwinm r0,r0,45-31,0xf; /* extract wake reason field (P8) */ \ +FTR_SECTION_ELSE_NESTED(66); \ + rlwinm r0,r0,45-31,0xe; /* P7 wake reason field is 3 bits */ \ +ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_ARCH_207S, 66); \ + cmpwi r0,0xa; /* Hypervisor maintenance ? */ \ + bne 20f; \ + /* Invoke opal call to handle hmi */ \ + ld r2,PACATOC(r13); \ + ld r1,PACAR1(r13); \ + std r3,ORIG_GPR3(r1); /* Save original r3 */ \ + li r3,OPAL_HANDLE_HMI; /* Pass opal token argument*/ \ + bl opal_call_realmode; \ + ld r3,ORIG_GPR3(r1); /* Restore original r3 */ \ +20: nop; + + _GLOBAL(power7_wakeup_tb_loss) ld r2,PACATOC(r13); ld r1,PACAR1(r13) +BEGIN_FTR_SECTION + CHECK_HMI_INTERRUPT +END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) /* Time base re-sync */ - li r0,OPAL_RESYNC_TIMEBASE - LOAD_REG_ADDR(r11,opal); - ld r12,8(r11); - ld r2,0(r11); - mtctr r12 - bctrl + li r3,OPAL_RESYNC_TIMEBASE + bl opal_call_realmode; /* TODO: Check r3 for failure */ @@ -163,6 +214,9 @@ _GLOBAL(power7_wakeup_tb_loss) _GLOBAL(power7_wakeup_loss) ld r1,PACAR1(r13) +BEGIN_FTR_SECTION + CHECK_HMI_INTERRUPT +END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) REST_NVGPRS(r1) REST_GPR(2, r1) ld r3,_CCR(r1) @@ -178,6 +232,9 @@ _GLOBAL(power7_wakeup_noloss) lbz r0,PACA_NAPSTATELOST(r13) cmpwi r0,0 bne power7_wakeup_loss +BEGIN_FTR_SECTION + CHECK_HMI_INTERRUPT +END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) ld r1,PACAR1(r13) ld r4,_MSR(r1) ld r5,_NIP(r1) diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index 88e3ec6e1d9..a10642a0d86 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -1037,7 +1037,7 @@ int iommu_tce_build(struct iommu_table *tbl, unsigned long entry, /* if (unlikely(ret)) pr_err("iommu_tce: %s failed on hwaddr=%lx ioba=%lx kva=%lx ret=%d\n", - __func__, hwaddr, entry << IOMMU_PAGE_SHIFT(tbl), + __func__, hwaddr, entry << tbl->it_page_shift, hwaddr, ret); */ return ret; @@ -1056,7 +1056,7 @@ int iommu_put_tce_user_mode(struct iommu_table *tbl, unsigned long entry, direction != DMA_TO_DEVICE, &page); if (unlikely(ret != 1)) { /* pr_err("iommu_tce: get_user_pages_fast failed tce=%lx ioba=%lx ret=%d\n", - tce, entry << IOMMU_PAGE_SHIFT(tbl), ret); */ + tce, entry << tbl->it_page_shift, ret); */ return -EFAULT; } hwaddr = (unsigned long) page_address(page) + offset; @@ -1120,37 +1120,41 @@ EXPORT_SYMBOL_GPL(iommu_release_ownership); int iommu_add_device(struct device *dev) { struct iommu_table *tbl; - int ret = 0; - if (WARN_ON(dev->iommu_group)) { - pr_warn("iommu_tce: device %s is already in iommu group %d, skipping\n", - dev_name(dev), - iommu_group_id(dev->iommu_group)); + /* + * The sysfs entries should be populated before + * binding IOMMU group. If sysfs entries isn't + * ready, we simply bail. + */ + if (!device_is_registered(dev)) + return -ENOENT; + + if (dev->iommu_group) { + pr_debug("%s: Skipping device %s with iommu group %d\n", + __func__, dev_name(dev), + iommu_group_id(dev->iommu_group)); return -EBUSY; } tbl = get_iommu_table_base(dev); if (!tbl || !tbl->it_group) { - pr_debug("iommu_tce: skipping device %s with no tbl\n", - dev_name(dev)); + pr_debug("%s: Skipping device %s with no tbl\n", + __func__, dev_name(dev)); return 0; } - pr_debug("iommu_tce: adding %s to iommu group %d\n", - dev_name(dev), iommu_group_id(tbl->it_group)); + pr_debug("%s: Adding %s to iommu group %d\n", + __func__, dev_name(dev), + iommu_group_id(tbl->it_group)); if (PAGE_SIZE < IOMMU_PAGE_SIZE(tbl)) { - pr_err("iommu_tce: unsupported iommu page size."); - pr_err("%s has not been added\n", dev_name(dev)); + pr_err("%s: Invalid IOMMU page size %lx (%lx) on %s\n", + __func__, IOMMU_PAGE_SIZE(tbl), + PAGE_SIZE, dev_name(dev)); return -EINVAL; } - ret = iommu_group_add_device(tbl->it_group, dev); - if (ret < 0) - pr_err("iommu_tce: %s has not been added, ret=%d\n", - dev_name(dev), ret); - - return ret; + return iommu_group_add_device(tbl->it_group, dev); } EXPORT_SYMBOL_GPL(iommu_add_device); diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 248ee7e5beb..8eb857f216c 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -189,6 +189,11 @@ notrace unsigned int __check_irq_replay(void) } #endif /* CONFIG_PPC_BOOK3E */ + /* Check if an hypervisor Maintenance interrupt happened */ + local_paca->irq_happened &= ~PACA_IRQ_HMI; + if (happened & PACA_IRQ_HMI) + return 0xe60; + /* There should be nothing left ! */ BUG_ON(local_paca->irq_happened != 0); @@ -377,6 +382,14 @@ int arch_show_interrupts(struct seq_file *p, int prec) seq_printf(p, "%10u ", per_cpu(irq_stat, j).mce_exceptions); seq_printf(p, " Machine check exceptions\n"); + if (cpu_has_feature(CPU_FTR_HVMODE)) { + seq_printf(p, "%*s: ", prec, "HMI"); + for_each_online_cpu(j) + seq_printf(p, "%10u ", + per_cpu(irq_stat, j).hmi_exceptions); + seq_printf(p, " Hypervisor Maintenance Interrupts\n"); + } + #ifdef CONFIG_PPC_DOORBELL if (cpu_has_feature(CPU_FTR_DBELL)) { seq_printf(p, "%*s: ", prec, "DBL"); @@ -400,6 +413,7 @@ u64 arch_irq_stat_cpu(unsigned int cpu) sum += per_cpu(irq_stat, cpu).mce_exceptions; sum += per_cpu(irq_stat, cpu).spurious_irqs; sum += per_cpu(irq_stat, cpu).timer_irqs_others; + sum += per_cpu(irq_stat, cpu).hmi_exceptions; #ifdef CONFIG_PPC_DOORBELL sum += per_cpu(irq_stat, cpu).doorbell_irqs; #endif @@ -430,13 +444,13 @@ void migrate_irqs(void) cpumask_and(mask, data->affinity, map); if (cpumask_any(mask) >= nr_cpu_ids) { - printk("Breaking affinity for irq %i\n", irq); + pr_warn("Breaking affinity for irq %i\n", irq); cpumask_copy(mask, map); } if (chip->irq_set_affinity) chip->irq_set_affinity(data, mask, true); else if (desc->action && !(warned++)) - printk("Cannot set affinity for irq %i\n", irq); + pr_err("Cannot set affinity for irq %i\n", irq); } free_cpumask_var(mask); @@ -456,7 +470,7 @@ static inline void check_stack_overflow(void) /* check for stack overflow: is there less than 2KB free? */ if (unlikely(sp < (sizeof(struct thread_info) + 2048))) { - printk("do_IRQ: stack overflow: %ld\n", + pr_err("do_IRQ: stack overflow: %ld\n", sp - sizeof(struct thread_info)); dump_stack(); } diff --git a/arch/powerpc/kernel/legacy_serial.c b/arch/powerpc/kernel/legacy_serial.c index 936258881c9..7b750c4ed5c 100644 --- a/arch/powerpc/kernel/legacy_serial.c +++ b/arch/powerpc/kernel/legacy_serial.c @@ -35,7 +35,7 @@ static struct legacy_serial_info { phys_addr_t taddr; } legacy_serial_infos[MAX_LEGACY_SERIAL_PORTS]; -static struct of_device_id legacy_serial_parents[] __initdata = { +static const struct of_device_id legacy_serial_parents[] __initconst = { {.type = "soc",}, {.type = "tsi-bridge",}, {.type = "opb", }, diff --git a/arch/powerpc/kernel/module_32.c b/arch/powerpc/kernel/module_32.c index 6cff040bf45..c94d2e018d8 100644 --- a/arch/powerpc/kernel/module_32.c +++ b/arch/powerpc/kernel/module_32.c @@ -15,6 +15,9 @@ along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/module.h> #include <linux/moduleloader.h> #include <linux/elf.h> @@ -28,12 +31,6 @@ #include <linux/sort.h> #include <asm/setup.h> -#if 0 -#define DEBUGP printk -#else -#define DEBUGP(fmt , ...) -#endif - /* Count how many different relocations (different symbol, different addend) */ static unsigned int count_relocs(const Elf32_Rela *rela, unsigned int num) @@ -121,8 +118,8 @@ static unsigned long get_plt_size(const Elf32_Ehdr *hdr, continue; if (sechdrs[i].sh_type == SHT_RELA) { - DEBUGP("Found relocations in section %u\n", i); - DEBUGP("Ptr: %p. Number: %u\n", + pr_debug("Found relocations in section %u\n", i); + pr_debug("Ptr: %p. Number: %u\n", (void *)hdr + sechdrs[i].sh_offset, sechdrs[i].sh_size / sizeof(Elf32_Rela)); @@ -161,7 +158,7 @@ int module_frob_arch_sections(Elf32_Ehdr *hdr, me->arch.core_plt_section = i; } if (!me->arch.core_plt_section || !me->arch.init_plt_section) { - printk("Module doesn't contain .plt or .init.plt sections.\n"); + pr_err("Module doesn't contain .plt or .init.plt sections.\n"); return -ENOEXEC; } @@ -189,7 +186,7 @@ static uint32_t do_plt_call(void *location, { struct ppc_plt_entry *entry; - DEBUGP("Doing plt for call to 0x%x at 0x%x\n", val, (unsigned int)location); + pr_debug("Doing plt for call to 0x%x at 0x%x\n", val, (unsigned int)location); /* Init, or core PLT? */ if (location >= mod->module_core && location < mod->module_core + mod->core_size) @@ -208,7 +205,7 @@ static uint32_t do_plt_call(void *location, entry->jump[2] = 0x7d8903a6; /* mtctr r12 */ entry->jump[3] = 0x4e800420; /* bctr */ - DEBUGP("Initialized plt for 0x%x at %p\n", val, entry); + pr_debug("Initialized plt for 0x%x at %p\n", val, entry); return (uint32_t)entry; } @@ -224,7 +221,7 @@ int apply_relocate_add(Elf32_Shdr *sechdrs, uint32_t *location; uint32_t value; - DEBUGP("Applying ADD relocate section %u to %u\n", relsec, + pr_debug("Applying ADD relocate section %u to %u\n", relsec, sechdrs[relsec].sh_info); for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rela); i++) { /* This is where to make the change */ @@ -268,17 +265,17 @@ int apply_relocate_add(Elf32_Shdr *sechdrs, sechdrs, module); /* Only replace bits 2 through 26 */ - DEBUGP("REL24 value = %08X. location = %08X\n", + pr_debug("REL24 value = %08X. location = %08X\n", value, (uint32_t)location); - DEBUGP("Location before: %08X.\n", + pr_debug("Location before: %08X.\n", *(uint32_t *)location); *(uint32_t *)location = (*(uint32_t *)location & ~0x03fffffc) | ((value - (uint32_t)location) & 0x03fffffc); - DEBUGP("Location after: %08X.\n", + pr_debug("Location after: %08X.\n", *(uint32_t *)location); - DEBUGP("ie. jump to %08X+%08X = %08X\n", + pr_debug("ie. jump to %08X+%08X = %08X\n", *(uint32_t *)location & 0x03fffffc, (uint32_t)location, (*(uint32_t *)location & 0x03fffffc) @@ -291,7 +288,7 @@ int apply_relocate_add(Elf32_Shdr *sechdrs, break; default: - printk("%s: unknown ADD relocation: %u\n", + pr_err("%s: unknown ADD relocation: %u\n", module->name, ELF32_R_TYPE(rela[i].r_info)); return -ENOEXEC; diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c index d807ee626af..68384514506 100644 --- a/arch/powerpc/kernel/module_64.c +++ b/arch/powerpc/kernel/module_64.c @@ -15,6 +15,9 @@ along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/module.h> #include <linux/elf.h> #include <linux/moduleloader.h> @@ -36,11 +39,6 @@ Using a magic allocator which places modules within 32MB solves this, and makes other things simpler. Anton? --RR. */ -#if 0 -#define DEBUGP printk -#else -#define DEBUGP(fmt , ...) -#endif #if defined(_CALL_ELF) && _CALL_ELF == 2 #define R2_STACK_OFFSET 24 @@ -279,8 +277,8 @@ static unsigned long get_stubs_size(const Elf64_Ehdr *hdr, /* Every relocated section... */ for (i = 1; i < hdr->e_shnum; i++) { if (sechdrs[i].sh_type == SHT_RELA) { - DEBUGP("Found relocations in section %u\n", i); - DEBUGP("Ptr: %p. Number: %lu\n", + pr_debug("Found relocations in section %u\n", i); + pr_debug("Ptr: %p. Number: %Lu\n", (void *)sechdrs[i].sh_addr, sechdrs[i].sh_size / sizeof(Elf64_Rela)); @@ -304,7 +302,7 @@ static unsigned long get_stubs_size(const Elf64_Ehdr *hdr, relocs++; #endif - DEBUGP("Looks like a total of %lu stubs, max\n", relocs); + pr_debug("Looks like a total of %lu stubs, max\n", relocs); return relocs * sizeof(struct ppc64_stub_entry); } @@ -390,7 +388,7 @@ int module_frob_arch_sections(Elf64_Ehdr *hdr, } if (!me->arch.stubs_section) { - printk("%s: doesn't contain .stubs.\n", me->name); + pr_err("%s: doesn't contain .stubs.\n", me->name); return -ENOEXEC; } @@ -434,11 +432,11 @@ static inline int create_stub(Elf64_Shdr *sechdrs, /* Stub uses address relative to r2. */ reladdr = (unsigned long)entry - my_r2(sechdrs, me); if (reladdr > 0x7FFFFFFF || reladdr < -(0x80000000L)) { - printk("%s: Address %p of stub out of range of %p.\n", + pr_err("%s: Address %p of stub out of range of %p.\n", me->name, (void *)reladdr, (void *)my_r2); return 0; } - DEBUGP("Stub %p get data from reladdr %li\n", entry, reladdr); + pr_debug("Stub %p get data from reladdr %li\n", entry, reladdr); entry->jump[0] |= PPC_HA(reladdr); entry->jump[1] |= PPC_LO(reladdr); @@ -477,7 +475,7 @@ static unsigned long stub_for_addr(Elf64_Shdr *sechdrs, static int restore_r2(u32 *instruction, struct module *me) { if (*instruction != PPC_INST_NOP) { - printk("%s: Expect noop after relocate, got %08x\n", + pr_err("%s: Expect noop after relocate, got %08x\n", me->name, *instruction); return 0; } @@ -498,7 +496,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, unsigned long *location; unsigned long value; - DEBUGP("Applying ADD relocate section %u to %u\n", relsec, + pr_debug("Applying ADD relocate section %u to %u\n", relsec, sechdrs[relsec].sh_info); /* First time we're called, we can fix up .TOC. */ @@ -519,7 +517,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, sym = (Elf64_Sym *)sechdrs[symindex].sh_addr + ELF64_R_SYM(rela[i].r_info); - DEBUGP("RELOC at %p: %li-type as %s (%lu) + %li\n", + pr_debug("RELOC at %p: %li-type as %s (0x%lx) + %li\n", location, (long)ELF64_R_TYPE(rela[i].r_info), strtab + sym->st_name, (unsigned long)sym->st_value, (long)rela[i].r_addend); @@ -546,7 +544,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, /* Subtract TOC pointer */ value -= my_r2(sechdrs, me); if (value + 0x8000 > 0xffff) { - printk("%s: bad TOC16 relocation (%lu)\n", + pr_err("%s: bad TOC16 relocation (0x%lx)\n", me->name, value); return -ENOEXEC; } @@ -567,7 +565,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, /* Subtract TOC pointer */ value -= my_r2(sechdrs, me); if ((value & 3) != 0 || value + 0x8000 > 0xffff) { - printk("%s: bad TOC16_DS relocation (%lu)\n", + pr_err("%s: bad TOC16_DS relocation (0x%lx)\n", me->name, value); return -ENOEXEC; } @@ -580,7 +578,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, /* Subtract TOC pointer */ value -= my_r2(sechdrs, me); if ((value & 3) != 0) { - printk("%s: bad TOC16_LO_DS relocation (%lu)\n", + pr_err("%s: bad TOC16_LO_DS relocation (0x%lx)\n", me->name, value); return -ENOEXEC; } @@ -613,7 +611,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, /* Convert value to relative */ value -= (unsigned long)location; if (value + 0x2000000 > 0x3ffffff || (value & 3) != 0){ - printk("%s: REL24 %li out of range!\n", + pr_err("%s: REL24 %li out of range!\n", me->name, (long int)value); return -ENOEXEC; } @@ -655,7 +653,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, break; default: - printk("%s: Unknown ADD relocation: %lu\n", + pr_err("%s: Unknown ADD relocation: %lu\n", me->name, (unsigned long)ELF64_R_TYPE(rela[i].r_info)); return -ENOEXEC; diff --git a/arch/powerpc/kernel/msi.c b/arch/powerpc/kernel/msi.c index 8bbc12d20f5..71bd161640c 100644 --- a/arch/powerpc/kernel/msi.c +++ b/arch/powerpc/kernel/msi.c @@ -13,7 +13,7 @@ #include <asm/machdep.h> -int arch_msi_check_device(struct pci_dev* dev, int nvec, int type) +int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) { if (!ppc_md.setup_msi_irqs || !ppc_md.teardown_msi_irqs) { pr_debug("msi: Platform doesn't provide MSI callbacks.\n"); @@ -24,16 +24,6 @@ int arch_msi_check_device(struct pci_dev* dev, int nvec, int type) if (type == PCI_CAP_ID_MSI && nvec > 1) return 1; - if (ppc_md.msi_check_device) { - pr_debug("msi: Using platform check routine.\n"); - return ppc_md.msi_check_device(dev, nvec, type); - } - - return 0; -} - -int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) -{ return ppc_md.setup_msi_irqs(dev, nvec, type); } diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c index 28b898e6818..34f7c9b7cd9 100644 --- a/arch/powerpc/kernel/nvram_64.c +++ b/arch/powerpc/kernel/nvram_64.c @@ -567,7 +567,7 @@ static int __init nvram_init(void) return rc; } -void __exit nvram_cleanup(void) +static void __exit nvram_cleanup(void) { misc_deregister( &nvram_dev ); } diff --git a/arch/powerpc/kernel/of_platform.c b/arch/powerpc/kernel/of_platform.c index a7b74307672..f87bc1b4bdd 100644 --- a/arch/powerpc/kernel/of_platform.c +++ b/arch/powerpc/kernel/of_platform.c @@ -97,7 +97,7 @@ static int of_pci_phb_probe(struct platform_device *dev) return 0; } -static struct of_device_id of_pci_phb_ids[] = { +static const struct of_device_id of_pci_phb_ids[] = { { .type = "pci", }, { .type = "pcix", }, { .type = "pcie", }, diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index b2814e23e1e..bd70a51d574 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -1140,7 +1140,7 @@ static int reparent_resources(struct resource *parent, * as well. */ -void pcibios_allocate_bus_resources(struct pci_bus *bus) +static void pcibios_allocate_bus_resources(struct pci_bus *bus) { struct pci_bus *b; int i; @@ -1561,7 +1561,6 @@ EARLY_PCI_OP(write, byte, u8) EARLY_PCI_OP(write, word, u16) EARLY_PCI_OP(write, dword, u32) -extern int pci_bus_find_capability (struct pci_bus *bus, unsigned int devfn, int cap); int early_find_capability(struct pci_controller *hose, int bus, int devfn, int cap) { diff --git a/arch/powerpc/kernel/pci_of_scan.c b/arch/powerpc/kernel/pci_of_scan.c index 44562aa97f1..e6245e9c7d8 100644 --- a/arch/powerpc/kernel/pci_of_scan.c +++ b/arch/powerpc/kernel/pci_of_scan.c @@ -38,7 +38,7 @@ static u32 get_int_prop(struct device_node *np, const char *name, u32 def) * @addr0: value of 1st cell of a device tree PCI address. * @bridge: Set this flag if the address is from a bridge 'ranges' property */ -unsigned int pci_parse_of_flags(u32 addr0, int bridge) +static unsigned int pci_parse_of_flags(u32 addr0, int bridge) { unsigned int flags = 0; diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c index 48d17d6fca5..c4dfff6c271 100644 --- a/arch/powerpc/kernel/ppc_ksyms.c +++ b/arch/powerpc/kernel/ppc_ksyms.c @@ -1,207 +1,41 @@ -#include <linux/export.h> -#include <linux/threads.h> -#include <linux/smp.h> -#include <linux/sched.h> -#include <linux/elfcore.h> -#include <linux/string.h> -#include <linux/interrupt.h> -#include <linux/screen_info.h> -#include <linux/vt_kern.h> -#include <linux/nvram.h> -#include <linux/irq.h> -#include <linux/pci.h> -#include <linux/delay.h> -#include <linux/bitops.h> +#include <linux/ftrace.h> +#include <linux/mm.h> -#include <asm/page.h> #include <asm/processor.h> -#include <asm/cacheflush.h> -#include <asm/uaccess.h> -#include <asm/io.h> -#include <linux/atomic.h> -#include <asm/checksum.h> -#include <asm/pgtable.h> -#include <asm/tlbflush.h> -#include <linux/adb.h> -#include <linux/cuda.h> -#include <linux/pmu.h> -#include <asm/prom.h> -#include <asm/pci-bridge.h> -#include <asm/irq.h> -#include <asm/pmac_feature.h> -#include <asm/dma.h> -#include <asm/machdep.h> -#include <asm/hw_irq.h> -#include <asm/nvram.h> -#include <asm/mmu_context.h> -#include <asm/backlight.h> -#include <asm/time.h> -#include <asm/cputable.h> -#include <asm/btext.h> -#include <asm/div64.h> -#include <asm/signal.h> -#include <asm/dcr.h> -#include <asm/ftrace.h> #include <asm/switch_to.h> +#include <asm/cacheflush.h> #include <asm/epapr_hcalls.h> -#ifdef CONFIG_PPC32 -extern void transfer_to_handler(void); -extern void do_IRQ(struct pt_regs *regs); -extern void machine_check_exception(struct pt_regs *regs); -extern void alignment_exception(struct pt_regs *regs); -extern void program_check_exception(struct pt_regs *regs); -extern void single_step_exception(struct pt_regs *regs); -extern int sys_sigreturn(struct pt_regs *regs); +EXPORT_SYMBOL(flush_dcache_range); +EXPORT_SYMBOL(flush_icache_range); -EXPORT_SYMBOL(clear_pages); -EXPORT_SYMBOL(ISA_DMA_THRESHOLD); -EXPORT_SYMBOL(DMA_MODE_READ); -EXPORT_SYMBOL(DMA_MODE_WRITE); +EXPORT_SYMBOL(empty_zero_page); -EXPORT_SYMBOL(transfer_to_handler); -EXPORT_SYMBOL(do_IRQ); -EXPORT_SYMBOL(machine_check_exception); -EXPORT_SYMBOL(alignment_exception); -EXPORT_SYMBOL(program_check_exception); -EXPORT_SYMBOL(single_step_exception); -EXPORT_SYMBOL(sys_sigreturn); -#endif +long long __bswapdi2(long long); +EXPORT_SYMBOL(__bswapdi2); #ifdef CONFIG_FUNCTION_TRACER EXPORT_SYMBOL(_mcount); #endif -EXPORT_SYMBOL(strcpy); -EXPORT_SYMBOL(strncpy); -EXPORT_SYMBOL(strcat); -EXPORT_SYMBOL(strlen); -EXPORT_SYMBOL(strcmp); -EXPORT_SYMBOL(strncmp); - -#ifndef CONFIG_GENERIC_CSUM -EXPORT_SYMBOL(csum_partial); -EXPORT_SYMBOL(csum_partial_copy_generic); -EXPORT_SYMBOL(ip_fast_csum); -EXPORT_SYMBOL(csum_tcpudp_magic); -#endif - -EXPORT_SYMBOL(__copy_tofrom_user); -EXPORT_SYMBOL(__clear_user); -EXPORT_SYMBOL(copy_page); - -#if defined(CONFIG_PCI) && defined(CONFIG_PPC32) -EXPORT_SYMBOL(isa_io_base); -EXPORT_SYMBOL(isa_mem_base); -EXPORT_SYMBOL(pci_dram_offset); -#endif /* CONFIG_PCI */ - -EXPORT_SYMBOL(start_thread); - #ifdef CONFIG_PPC_FPU EXPORT_SYMBOL(giveup_fpu); EXPORT_SYMBOL(load_fp_state); EXPORT_SYMBOL(store_fp_state); #endif + #ifdef CONFIG_ALTIVEC EXPORT_SYMBOL(giveup_altivec); EXPORT_SYMBOL(load_vr_state); EXPORT_SYMBOL(store_vr_state); -#endif /* CONFIG_ALTIVEC */ -#ifdef CONFIG_VSX -EXPORT_SYMBOL(giveup_vsx); -EXPORT_SYMBOL_GPL(__giveup_vsx); -#endif /* CONFIG_VSX */ -#ifdef CONFIG_SPE -EXPORT_SYMBOL(giveup_spe); -#endif /* CONFIG_SPE */ - -#ifndef CONFIG_PPC64 -EXPORT_SYMBOL(flush_instruction_cache); #endif -EXPORT_SYMBOL(flush_dcache_range); -EXPORT_SYMBOL(flush_icache_range); -#ifdef CONFIG_SMP -#ifdef CONFIG_PPC32 -EXPORT_SYMBOL(smp_hw_index); -#endif -#endif - -#ifdef CONFIG_ADB -EXPORT_SYMBOL(adb_request); -EXPORT_SYMBOL(adb_register); -EXPORT_SYMBOL(adb_unregister); -EXPORT_SYMBOL(adb_poll); -EXPORT_SYMBOL(adb_try_handler_change); -#endif /* CONFIG_ADB */ -#ifdef CONFIG_ADB_CUDA -EXPORT_SYMBOL(cuda_request); -EXPORT_SYMBOL(cuda_poll); -#endif /* CONFIG_ADB_CUDA */ -EXPORT_SYMBOL(to_tm); - -#ifdef CONFIG_PPC32 -long long __ashrdi3(long long, int); -long long __ashldi3(long long, int); -long long __lshrdi3(long long, int); -EXPORT_SYMBOL(__ashrdi3); -EXPORT_SYMBOL(__ashldi3); -EXPORT_SYMBOL(__lshrdi3); -int __ucmpdi2(unsigned long long, unsigned long long); -EXPORT_SYMBOL(__ucmpdi2); -int __cmpdi2(long long, long long); -EXPORT_SYMBOL(__cmpdi2); -#endif -long long __bswapdi2(long long); -EXPORT_SYMBOL(__bswapdi2); -EXPORT_SYMBOL(memcpy); -EXPORT_SYMBOL(memset); -EXPORT_SYMBOL(memmove); -EXPORT_SYMBOL(memcmp); -EXPORT_SYMBOL(memchr); - -#if defined(CONFIG_FB_VGA16_MODULE) -EXPORT_SYMBOL(screen_info); -#endif - -#ifdef CONFIG_PPC32 -EXPORT_SYMBOL(timer_interrupt); -EXPORT_SYMBOL(tb_ticks_per_jiffy); -EXPORT_SYMBOL(cacheable_memcpy); -EXPORT_SYMBOL(cacheable_memzero); -#endif - -#ifdef CONFIG_PPC32 -EXPORT_SYMBOL(switch_mmu_context); -#endif - -#ifdef CONFIG_PPC_STD_MMU_32 -extern long mol_trampoline; -EXPORT_SYMBOL(mol_trampoline); /* For MOL */ -EXPORT_SYMBOL(flush_hash_pages); /* For MOL */ -#ifdef CONFIG_SMP -extern int mmu_hash_lock; -EXPORT_SYMBOL(mmu_hash_lock); /* For MOL */ -#endif /* CONFIG_SMP */ -extern long *intercept_table; -EXPORT_SYMBOL(intercept_table); -#endif /* CONFIG_PPC_STD_MMU_32 */ -#ifdef CONFIG_PPC_DCR_NATIVE -EXPORT_SYMBOL(__mtdcr); -EXPORT_SYMBOL(__mfdcr); -#endif -EXPORT_SYMBOL(empty_zero_page); - -#ifdef CONFIG_PPC64 -EXPORT_SYMBOL(__arch_hweight8); -EXPORT_SYMBOL(__arch_hweight16); -EXPORT_SYMBOL(__arch_hweight32); -EXPORT_SYMBOL(__arch_hweight64); +#ifdef CONFIG_VSX +EXPORT_SYMBOL_GPL(__giveup_vsx); #endif -#ifdef CONFIG_PPC_BOOK3S_64 -EXPORT_SYMBOL_GPL(mmu_psize_defs); +#ifdef CONFIG_SPE +EXPORT_SYMBOL(giveup_spe); #endif #ifdef CONFIG_EPAPR_PARAVIRT diff --git a/arch/powerpc/kernel/ppc_ksyms_32.c b/arch/powerpc/kernel/ppc_ksyms_32.c new file mode 100644 index 00000000000..30ddd8a24ee --- /dev/null +++ b/arch/powerpc/kernel/ppc_ksyms_32.c @@ -0,0 +1,61 @@ +#include <linux/export.h> +#include <linux/smp.h> + +#include <asm/page.h> +#include <asm/dma.h> +#include <asm/io.h> +#include <asm/hw_irq.h> +#include <asm/time.h> +#include <asm/mmu_context.h> +#include <asm/pgtable.h> +#include <asm/dcr.h> + +EXPORT_SYMBOL(clear_pages); +EXPORT_SYMBOL(ISA_DMA_THRESHOLD); +EXPORT_SYMBOL(DMA_MODE_READ); +EXPORT_SYMBOL(DMA_MODE_WRITE); + +#if defined(CONFIG_PCI) +EXPORT_SYMBOL(isa_io_base); +EXPORT_SYMBOL(isa_mem_base); +EXPORT_SYMBOL(pci_dram_offset); +#endif + +#ifdef CONFIG_SMP +EXPORT_SYMBOL(smp_hw_index); +#endif + +long long __ashrdi3(long long, int); +long long __ashldi3(long long, int); +long long __lshrdi3(long long, int); +int __ucmpdi2(unsigned long long, unsigned long long); +int __cmpdi2(long long, long long); +EXPORT_SYMBOL(__ashrdi3); +EXPORT_SYMBOL(__ashldi3); +EXPORT_SYMBOL(__lshrdi3); +EXPORT_SYMBOL(__ucmpdi2); +EXPORT_SYMBOL(__cmpdi2); + +EXPORT_SYMBOL(timer_interrupt); +EXPORT_SYMBOL(tb_ticks_per_jiffy); + +EXPORT_SYMBOL(switch_mmu_context); + +#ifdef CONFIG_PPC_STD_MMU_32 +extern long mol_trampoline; +EXPORT_SYMBOL(mol_trampoline); /* For MOL */ +EXPORT_SYMBOL(flush_hash_pages); /* For MOL */ +#ifdef CONFIG_SMP +extern int mmu_hash_lock; +EXPORT_SYMBOL(mmu_hash_lock); /* For MOL */ +#endif /* CONFIG_SMP */ +extern long *intercept_table; +EXPORT_SYMBOL(intercept_table); +#endif /* CONFIG_PPC_STD_MMU_32 */ + +#ifdef CONFIG_PPC_DCR_NATIVE +EXPORT_SYMBOL(__mtdcr); +EXPORT_SYMBOL(__mfdcr); +#endif + +EXPORT_SYMBOL(flush_instruction_cache); diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index be99774d3f4..aa1df89c8b2 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -228,6 +228,7 @@ void giveup_vsx(struct task_struct *tsk) giveup_altivec_maybe_transactional(tsk); __giveup_vsx(tsk); } +EXPORT_SYMBOL(giveup_vsx); void flush_vsx_to_thread(struct task_struct *tsk) { @@ -1095,6 +1096,23 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) return 0; } +static void setup_ksp_vsid(struct task_struct *p, unsigned long sp) +{ +#ifdef CONFIG_PPC_STD_MMU_64 + unsigned long sp_vsid; + unsigned long llp = mmu_psize_defs[mmu_linear_psize].sllp; + + if (mmu_has_feature(MMU_FTR_1T_SEGMENT)) + sp_vsid = get_kernel_vsid(sp, MMU_SEGSIZE_1T) + << SLB_VSID_SHIFT_1T; + else + sp_vsid = get_kernel_vsid(sp, MMU_SEGSIZE_256M) + << SLB_VSID_SHIFT; + sp_vsid |= SLB_VSID_KERNEL | llp; + p->thread.ksp_vsid = sp_vsid; +#endif +} + /* * Copy a thread.. */ @@ -1174,21 +1192,8 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, p->thread.vr_save_area = NULL; #endif -#ifdef CONFIG_PPC_STD_MMU_64 - if (mmu_has_feature(MMU_FTR_SLB)) { - unsigned long sp_vsid; - unsigned long llp = mmu_psize_defs[mmu_linear_psize].sllp; + setup_ksp_vsid(p, sp); - if (mmu_has_feature(MMU_FTR_1T_SEGMENT)) - sp_vsid = get_kernel_vsid(sp, MMU_SEGSIZE_1T) - << SLB_VSID_SHIFT_1T; - else - sp_vsid = get_kernel_vsid(sp, MMU_SEGSIZE_256M) - << SLB_VSID_SHIFT; - sp_vsid |= SLB_VSID_KERNEL | llp; - p->thread.ksp_vsid = sp_vsid; - } -#endif /* CONFIG_PPC_STD_MMU_64 */ #ifdef CONFIG_PPC64 if (cpu_has_feature(CPU_FTR_DSCR)) { p->thread.dscr_inherit = current->thread.dscr_inherit; @@ -1312,6 +1317,7 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp) current->thread.tm_tfiar = 0; #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ } +EXPORT_SYMBOL(start_thread); #define PR_FP_ALL_EXCEPT (PR_FP_EXC_DIV | PR_FP_EXC_OVF | PR_FP_EXC_UND \ | PR_FP_EXC_RES | PR_FP_EXC_INV) @@ -1577,7 +1583,7 @@ void show_stack(struct task_struct *tsk, unsigned long *stack) struct pt_regs *regs = (struct pt_regs *) (sp + STACK_FRAME_OVERHEAD); lr = regs->link; - printk("--- Exception: %lx at %pS\n LR = %pS\n", + printk("--- interrupt: %lx at %pS\n LR = %pS\n", regs->trap, (void *)regs->nip, (void *)lr); firstframe = 1; } diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index b694b073097..099f27e6d1b 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -155,7 +155,6 @@ static struct ibm_pa_feature { } ibm_pa_features[] __initdata = { {0, 0, PPC_FEATURE_HAS_MMU, 0, 0, 0}, {0, 0, PPC_FEATURE_HAS_FPU, 0, 1, 0}, - {0, MMU_FTR_SLB, 0, 0, 2, 0}, {CPU_FTR_CTRL, 0, 0, 0, 3, 0}, {CPU_FTR_NOEXECUTE, 0, 0, 0, 6, 0}, {CPU_FTR_NODSISRALIGN, 0, 0, 1, 1, 1}, @@ -309,12 +308,10 @@ static int __init early_init_dt_scan_cpus(unsigned long node, /* Get physical cpuid */ intserv = of_get_flat_dt_prop(node, "ibm,ppc-interrupt-server#s", &len); - if (intserv) { - nthreads = len / sizeof(int); - } else { - intserv = of_get_flat_dt_prop(node, "reg", NULL); - nthreads = 1; - } + if (!intserv) + intserv = of_get_flat_dt_prop(node, "reg", &len); + + nthreads = len / sizeof(int); /* * Now see if any of these threads match our boot cpu. @@ -389,8 +386,9 @@ static int __init early_init_dt_scan_cpus(unsigned long node, return 0; } -int __init early_init_dt_scan_chosen_ppc(unsigned long node, const char *uname, - int depth, void *data) +static int __init early_init_dt_scan_chosen_ppc(unsigned long node, + const char *uname, + int depth, void *data) { const unsigned long *lprop; /* All these set by kernel, so no need to convert endian */ @@ -644,6 +642,10 @@ void __init early_init_devtree(void *params) DBG(" -> early_init_devtree(%p)\n", params); + /* Too early to BUG_ON(), do it by hand */ + if (!early_init_dt_verify(params)) + panic("BUG: Failed verifying flat device tree, bad version?"); + /* Setup flat device-tree pointer */ initial_boot_params = params; @@ -666,14 +668,12 @@ void __init early_init_devtree(void *params) * device-tree, including the platform type, initrd location and * size, TCE reserve, and more ... */ - of_scan_flat_dt(early_init_dt_scan_chosen_ppc, cmd_line); + of_scan_flat_dt(early_init_dt_scan_chosen_ppc, boot_command_line); /* Scan memory nodes and rebuild MEMBLOCKs */ of_scan_flat_dt(early_init_dt_scan_root, NULL); of_scan_flat_dt(early_init_dt_scan_memory_ppc, NULL); - /* Save command line for /proc/cmdline and then parse parameters */ - strlcpy(boot_command_line, cmd_line, COMMAND_LINE_SIZE); parse_early_param(); /* make sure we've parsed cmdline for mem= before this */ @@ -821,76 +821,6 @@ int cpu_to_chip_id(int cpu) } EXPORT_SYMBOL(cpu_to_chip_id); -#ifdef CONFIG_PPC_PSERIES -/* - * Fix up the uninitialized fields in a new device node: - * name, type and pci-specific fields - */ - -static int of_finish_dynamic_node(struct device_node *node) -{ - struct device_node *parent = of_get_parent(node); - int err = 0; - const phandle *ibm_phandle; - - node->name = of_get_property(node, "name", NULL); - node->type = of_get_property(node, "device_type", NULL); - - if (!node->name) - node->name = "<NULL>"; - if (!node->type) - node->type = "<NULL>"; - - if (!parent) { - err = -ENODEV; - goto out; - } - - /* We don't support that function on PowerMac, at least - * not yet - */ - if (machine_is(powermac)) - return -ENODEV; - - /* fix up new node's phandle field */ - if ((ibm_phandle = of_get_property(node, "ibm,phandle", NULL))) - node->phandle = *ibm_phandle; - -out: - of_node_put(parent); - return err; -} - -static int prom_reconfig_notifier(struct notifier_block *nb, - unsigned long action, void *node) -{ - int err; - - switch (action) { - case OF_RECONFIG_ATTACH_NODE: - err = of_finish_dynamic_node(node); - if (err < 0) - printk(KERN_ERR "finish_node returned %d\n", err); - break; - default: - err = 0; - break; - } - return notifier_from_errno(err); -} - -static struct notifier_block prom_reconfig_nb = { - .notifier_call = prom_reconfig_notifier, - .priority = 10, /* This one needs to run first */ -}; - -static int __init prom_reconfig_setup(void) -{ - return of_reconfig_notifier_register(&prom_reconfig_nb); -} -__initcall(prom_reconfig_setup); -#endif - bool arch_match_cpu_phys_id(int cpu, u64 phys_id) { return (int)phys_id == get_hard_smp_processor_id(cpu); diff --git a/arch/powerpc/kernel/prom_init_check.sh b/arch/powerpc/kernel/prom_init_check.sh index fe8e54b9ef7..12640f7e726 100644 --- a/arch/powerpc/kernel/prom_init_check.sh +++ b/arch/powerpc/kernel/prom_init_check.sh @@ -50,24 +50,14 @@ do done # ignore register save/restore funcitons - if [ "${UNDEF:0:9}" = "_restgpr_" ]; then + case $UNDEF in + _restgpr_*|_restgpr0_*|_rest32gpr_*) OK=1 - fi - if [ "${UNDEF:0:10}" = "_restgpr0_" ]; then - OK=1 - fi - if [ "${UNDEF:0:11}" = "_rest32gpr_" ]; then - OK=1 - fi - if [ "${UNDEF:0:9}" = "_savegpr_" ]; then + ;; + _savegpr_*|_savegpr0_*|_save32gpr_*) OK=1 - fi - if [ "${UNDEF:0:10}" = "_savegpr0_" ]; then - OK=1 - fi - if [ "${UNDEF:0:11}" = "_save32gpr_" ]; then - OK=1 - fi + ;; + esac if [ $OK -eq 0 ]; then ERROR=1 diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index 2e3d2bf536c..cdb404ea346 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -932,7 +932,7 @@ void ptrace_triggered(struct perf_event *bp, } #endif /* CONFIG_HAVE_HW_BREAKPOINT */ -int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, +static int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, unsigned long data) { #ifdef CONFIG_HAVE_HW_BREAKPOINT diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c index e736387fee6..5a2c049c1c6 100644 --- a/arch/powerpc/kernel/rtasd.c +++ b/arch/powerpc/kernel/rtasd.c @@ -286,7 +286,7 @@ static void prrn_work_fn(struct work_struct *work) static DECLARE_WORK(prrn_work, prrn_work_fn); -void prrn_schedule_update(u32 scope) +static void prrn_schedule_update(u32 scope) { flush_work(&prrn_work); prrn_update_scope = scope; diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index e5b022c55cc..1362cd62b3f 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -81,8 +81,6 @@ EXPORT_SYMBOL_GPL(boot_cpuid); unsigned long klimit = (unsigned long) _end; -char cmd_line[COMMAND_LINE_SIZE]; - /* * This still seems to be needed... -- paulus */ @@ -94,6 +92,9 @@ struct screen_info screen_info = { .orig_video_isVGA = 1, .orig_video_points = 16 }; +#if defined(CONFIG_FB_VGA16_MODULE) +EXPORT_SYMBOL(screen_info); +#endif /* Variables required to store legacy IO irq routing */ int of_i8042_kbd_irq; @@ -382,7 +383,7 @@ void __init check_for_initrd(void) initrd_start = initrd_end = 0; if (initrd_start) - printk("Found initrd at 0x%lx:0x%lx\n", initrd_start, initrd_end); + pr_info("Found initrd at 0x%lx:0x%lx\n", initrd_start, initrd_end); DBG(" <- check_for_initrd()\n"); #endif /* CONFIG_BLK_DEV_INITRD */ @@ -456,18 +457,20 @@ void __init smp_setup_cpu_maps(void) intserv = of_get_property(dn, "ibm,ppc-interrupt-server#s", &len); if (intserv) { - nthreads = len / sizeof(int); DBG(" ibm,ppc-interrupt-server#s -> %d threads\n", nthreads); } else { DBG(" no ibm,ppc-interrupt-server#s -> 1 thread\n"); - intserv = of_get_property(dn, "reg", NULL); + intserv = of_get_property(dn, "reg", &len); if (!intserv) { cpu_be = cpu_to_be32(cpu); intserv = &cpu_be; /* assume logical == phys */ + len = 4; } } + nthreads = len / sizeof(int); + for (j = 0; j < nthreads && cpu < nr_cpu_ids; j++) { bool avail; diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index ea4fda60e57..07831ed0d9e 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -268,7 +268,7 @@ static void __init exc_lvl_early_init(void) /* Warning, IO base is not yet inited */ void __init setup_arch(char **cmdline_p) { - *cmdline_p = cmd_line; + *cmdline_p = boot_command_line; /* so udelay does something sensible, assume <= 1000 bogomips */ loops_per_jiffy = 500000000 / HZ; diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index ee082d77117..cd07d79ad21 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -149,13 +149,13 @@ static void check_smt_enabled(void) else if (!strcmp(smt_enabled_cmdline, "off")) smt_enabled_at_boot = 0; else { - long smt; + int smt; int rc; - rc = strict_strtol(smt_enabled_cmdline, 10, &smt); + rc = kstrtoint(smt_enabled_cmdline, 10, &smt); if (!rc) smt_enabled_at_boot = - min(threads_per_core, (int)smt); + min(threads_per_core, smt); } } else { dn = of_find_node_by_path("/options"); @@ -201,7 +201,11 @@ static void cpu_ready_for_interrupts(void) /* Set IR and DR in PACA MSR */ get_paca()->kernel_msr = MSR_KERNEL; - /* Enable AIL if supported */ + /* + * Enable AIL if supported, and we are in hypervisor mode. If we are + * not in hypervisor mode, we enable relocation-on interrupts later + * in pSeries_setup_arch() using the H_SET_MODE hcall. + */ if (cpu_has_feature(CPU_FTR_HVMODE) && cpu_has_feature(CPU_FTR_ARCH_207S)) { unsigned long lpcr = mfspr(SPRN_LPCR); @@ -507,7 +511,11 @@ void __init setup_system(void) check_smt_enabled(); setup_tlb_core_data(); -#ifdef CONFIG_SMP + /* + * Freescale Book3e parts spin in a loop provided by firmware, + * so smp_release_cpus() does nothing for them + */ +#if defined(CONFIG_SMP) && !defined(CONFIG_PPC_FSL_BOOK3E) /* Release secondary cpus out of their spinloops at 0x60 now that * we can map physical -> logical CPU ids */ @@ -517,21 +525,31 @@ void __init setup_system(void) printk("Starting Linux PPC64 %s\n", init_utsname()->version); printk("-----------------------------------------------------\n"); - printk("ppc64_pft_size = 0x%llx\n", ppc64_pft_size); - printk("physicalMemorySize = 0x%llx\n", memblock_phys_mem_size()); + printk("ppc64_pft_size = 0x%llx\n", ppc64_pft_size); + printk("phys_mem_size = 0x%llx\n", memblock_phys_mem_size()); + if (ppc64_caches.dline_size != 0x80) - printk("ppc64_caches.dcache_line_size = 0x%x\n", - ppc64_caches.dline_size); + printk("dcache_line_size = 0x%x\n", ppc64_caches.dline_size); if (ppc64_caches.iline_size != 0x80) - printk("ppc64_caches.icache_line_size = 0x%x\n", - ppc64_caches.iline_size); + printk("icache_line_size = 0x%x\n", ppc64_caches.iline_size); + + printk("cpu_features = 0x%016lx\n", cur_cpu_spec->cpu_features); + printk(" possible = 0x%016lx\n", CPU_FTRS_POSSIBLE); + printk(" always = 0x%016lx\n", CPU_FTRS_ALWAYS); + printk("cpu_user_features = 0x%08x 0x%08x\n", cur_cpu_spec->cpu_user_features, + cur_cpu_spec->cpu_user_features2); + printk("mmu_features = 0x%08x\n", cur_cpu_spec->mmu_features); + printk("firmware_features = 0x%016lx\n", powerpc_firmware_features); + #ifdef CONFIG_PPC_STD_MMU_64 if (htab_address) - printk("htab_address = 0x%p\n", htab_address); - printk("htab_hash_mask = 0x%lx\n", htab_hash_mask); -#endif /* CONFIG_PPC_STD_MMU_64 */ + printk("htab_address = 0x%p\n", htab_address); + + printk("htab_hash_mask = 0x%lx\n", htab_hash_mask); +#endif + if (PHYSICAL_START > 0) - printk("physical_start = 0x%llx\n", + printk("physical_start = 0x%llx\n", (unsigned long long)PHYSICAL_START); printk("-----------------------------------------------------\n"); @@ -649,7 +667,7 @@ void __init setup_arch(char **cmdline_p) { ppc64_boot_msg(0x12, "Setup Arch"); - *cmdline_p = cmd_line; + *cmdline_p = boot_command_line; /* * Set cache line size based on type of cpu as a default. @@ -673,9 +691,6 @@ void __init setup_arch(char **cmdline_p) exc_lvl_early_init(); emergency_stack_init(); -#ifdef CONFIG_PPC_STD_MMU_64 - stabs_alloc(); -#endif /* set up the bootmem stuff with available memory */ do_init_bootmem(); sparse_init(); diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c index 1c794cef288..cf8c7e4e0b2 100644 --- a/arch/powerpc/kernel/signal.c +++ b/arch/powerpc/kernel/signal.c @@ -31,20 +31,14 @@ int show_unhandled_signals = 1; /* * Allocate space for the signal frame */ -void __user * get_sigframe(struct k_sigaction *ka, unsigned long sp, +void __user *get_sigframe(struct ksignal *ksig, unsigned long sp, size_t frame_size, int is_32) { unsigned long oldsp, newsp; /* Default to using normal stack */ oldsp = get_clean_sp(sp, is_32); - - /* Check for alt stack */ - if ((ka->sa.sa_flags & SA_ONSTACK) && - current->sas_ss_size && !on_sig_stack(oldsp)) - oldsp = (current->sas_ss_sp + current->sas_ss_size); - - /* Get aligned frame */ + oldsp = sigsp(oldsp, ksig); newsp = (oldsp - frame_size) & ~0xFUL; /* Check access */ @@ -105,25 +99,23 @@ static void check_syscall_restart(struct pt_regs *regs, struct k_sigaction *ka, } } -static int do_signal(struct pt_regs *regs) +static void do_signal(struct pt_regs *regs) { sigset_t *oldset = sigmask_to_save(); - siginfo_t info; - int signr; - struct k_sigaction ka; + struct ksignal ksig; int ret; int is32 = is_32bit_task(); - signr = get_signal_to_deliver(&info, &ka, regs, NULL); + get_signal(&ksig); /* Is there any syscall restart business here ? */ - check_syscall_restart(regs, &ka, signr > 0); + check_syscall_restart(regs, &ksig.ka, ksig.sig > 0); - if (signr <= 0) { + if (ksig.sig <= 0) { /* No signal to deliver -- put the saved sigmask back */ restore_saved_sigmask(); regs->trap = 0; - return 0; /* no signals delivered */ + return; /* no signals delivered */ } #ifndef CONFIG_PPC_ADV_DEBUG_REGS @@ -140,23 +132,16 @@ static int do_signal(struct pt_regs *regs) thread_change_pc(current, regs); if (is32) { - if (ka.sa.sa_flags & SA_SIGINFO) - ret = handle_rt_signal32(signr, &ka, &info, oldset, - regs); + if (ksig.ka.sa.sa_flags & SA_SIGINFO) + ret = handle_rt_signal32(&ksig, oldset, regs); else - ret = handle_signal32(signr, &ka, &info, oldset, - regs); + ret = handle_signal32(&ksig, oldset, regs); } else { - ret = handle_rt_signal64(signr, &ka, &info, oldset, regs); + ret = handle_rt_signal64(&ksig, oldset, regs); } regs->trap = 0; - if (ret) { - signal_delivered(signr, &info, &ka, regs, - test_thread_flag(TIF_SINGLESTEP)); - } - - return ret; + signal_setup_done(ret, &ksig, test_thread_flag(TIF_SINGLESTEP)); } void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags) diff --git a/arch/powerpc/kernel/signal.h b/arch/powerpc/kernel/signal.h index c69b9aeb9f2..51b274199dd 100644 --- a/arch/powerpc/kernel/signal.h +++ b/arch/powerpc/kernel/signal.h @@ -12,15 +12,13 @@ extern void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags); -extern void __user * get_sigframe(struct k_sigaction *ka, unsigned long sp, +extern void __user *get_sigframe(struct ksignal *ksig, unsigned long sp, size_t frame_size, int is_32); -extern int handle_signal32(unsigned long sig, struct k_sigaction *ka, - siginfo_t *info, sigset_t *oldset, +extern int handle_signal32(struct ksignal *ksig, sigset_t *oldset, struct pt_regs *regs); -extern int handle_rt_signal32(unsigned long sig, struct k_sigaction *ka, - siginfo_t *info, sigset_t *oldset, +extern int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset, struct pt_regs *regs); extern unsigned long copy_fpr_to_user(void __user *to, @@ -44,14 +42,12 @@ extern unsigned long copy_transact_vsx_from_user(struct task_struct *task, #ifdef CONFIG_PPC64 -extern int handle_rt_signal64(int signr, struct k_sigaction *ka, - siginfo_t *info, sigset_t *set, +extern int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs); #else /* CONFIG_PPC64 */ -static inline int handle_rt_signal64(int signr, struct k_sigaction *ka, - siginfo_t *info, sigset_t *set, +static inline int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs) { return -EFAULT; diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index 1bc5a1755ed..b171001698f 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -981,9 +981,8 @@ int copy_siginfo_from_user32(siginfo_t *to, struct compat_siginfo __user *from) * Set up a signal frame for a "real-time" signal handler * (one which gets siginfo). */ -int handle_rt_signal32(unsigned long sig, struct k_sigaction *ka, - siginfo_t *info, sigset_t *oldset, - struct pt_regs *regs) +int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset, + struct pt_regs *regs) { struct rt_sigframe __user *rt_sf; struct mcontext __user *frame; @@ -995,13 +994,13 @@ int handle_rt_signal32(unsigned long sig, struct k_sigaction *ka, /* Set up Signal Frame */ /* Put a Real Time Context onto stack */ - rt_sf = get_sigframe(ka, get_tm_stackpointer(regs), sizeof(*rt_sf), 1); + rt_sf = get_sigframe(ksig, get_tm_stackpointer(regs), sizeof(*rt_sf), 1); addr = rt_sf; if (unlikely(rt_sf == NULL)) goto badframe; /* Put the siginfo & fill in most of the ucontext */ - if (copy_siginfo_to_user(&rt_sf->info, info) + if (copy_siginfo_to_user(&rt_sf->info, &ksig->info) || __put_user(0, &rt_sf->uc.uc_flags) || __save_altstack(&rt_sf->uc.uc_stack, regs->gpr[1]) || __put_user(to_user_ptr(&rt_sf->uc.uc_mcontext), @@ -1051,15 +1050,15 @@ int handle_rt_signal32(unsigned long sig, struct k_sigaction *ka, /* Fill registers for signal handler */ regs->gpr[1] = newsp; - regs->gpr[3] = sig; + regs->gpr[3] = ksig->sig; regs->gpr[4] = (unsigned long) &rt_sf->info; regs->gpr[5] = (unsigned long) &rt_sf->uc; regs->gpr[6] = (unsigned long) rt_sf; - regs->nip = (unsigned long) ka->sa.sa_handler; + regs->nip = (unsigned long) ksig->ka.sa.sa_handler; /* enter the signal handler in native-endian mode */ regs->msr &= ~MSR_LE; regs->msr |= (MSR_KERNEL & MSR_LE); - return 1; + return 0; badframe: if (show_unhandled_signals) @@ -1069,8 +1068,7 @@ badframe: current->comm, current->pid, addr, regs->nip, regs->link); - force_sigsegv(sig, current); - return 0; + return 1; } static int do_setcontext(struct ucontext __user *ucp, struct pt_regs *regs, int sig) @@ -1409,8 +1407,7 @@ int sys_debug_setcontext(struct ucontext __user *ctx, /* * OK, we're invoking a handler */ -int handle_signal32(unsigned long sig, struct k_sigaction *ka, - siginfo_t *info, sigset_t *oldset, struct pt_regs *regs) +int handle_signal32(struct ksignal *ksig, sigset_t *oldset, struct pt_regs *regs) { struct sigcontext __user *sc; struct sigframe __user *frame; @@ -1420,7 +1417,7 @@ int handle_signal32(unsigned long sig, struct k_sigaction *ka, unsigned long tramp; /* Set up Signal Frame */ - frame = get_sigframe(ka, get_tm_stackpointer(regs), sizeof(*frame), 1); + frame = get_sigframe(ksig, get_tm_stackpointer(regs), sizeof(*frame), 1); if (unlikely(frame == NULL)) goto badframe; sc = (struct sigcontext __user *) &frame->sctx; @@ -1428,7 +1425,7 @@ int handle_signal32(unsigned long sig, struct k_sigaction *ka, #if _NSIG != 64 #error "Please adjust handle_signal()" #endif - if (__put_user(to_user_ptr(ka->sa.sa_handler), &sc->handler) + if (__put_user(to_user_ptr(ksig->ka.sa.sa_handler), &sc->handler) || __put_user(oldset->sig[0], &sc->oldmask) #ifdef CONFIG_PPC64 || __put_user((oldset->sig[0] >> 32), &sc->_unused[3]) @@ -1436,7 +1433,7 @@ int handle_signal32(unsigned long sig, struct k_sigaction *ka, || __put_user(oldset->sig[1], &sc->_unused[3]) #endif || __put_user(to_user_ptr(&frame->mctx), &sc->regs) - || __put_user(sig, &sc->signal)) + || __put_user(ksig->sig, &sc->signal)) goto badframe; if (vdso32_sigtramp && current->mm->context.vdso_base) { @@ -1471,12 +1468,12 @@ int handle_signal32(unsigned long sig, struct k_sigaction *ka, goto badframe; regs->gpr[1] = newsp; - regs->gpr[3] = sig; + regs->gpr[3] = ksig->sig; regs->gpr[4] = (unsigned long) sc; - regs->nip = (unsigned long) ka->sa.sa_handler; + regs->nip = (unsigned long) (unsigned long)ksig->ka.sa.sa_handler; /* enter the signal handler in big-endian mode */ regs->msr &= ~MSR_LE; - return 1; + return 0; badframe: if (show_unhandled_signals) @@ -1486,8 +1483,7 @@ badframe: current->comm, current->pid, frame, regs->nip, regs->link); - force_sigsegv(sig, current); - return 0; + return 1; } /* diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index 97c1e4b683f..2cb0c94cafa 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -708,20 +708,19 @@ badframe: return 0; } -int handle_rt_signal64(int signr, struct k_sigaction *ka, siginfo_t *info, - sigset_t *set, struct pt_regs *regs) +int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs) { struct rt_sigframe __user *frame; unsigned long newsp = 0; long err = 0; - frame = get_sigframe(ka, get_tm_stackpointer(regs), sizeof(*frame), 0); + frame = get_sigframe(ksig, get_tm_stackpointer(regs), sizeof(*frame), 0); if (unlikely(frame == NULL)) goto badframe; err |= __put_user(&frame->info, &frame->pinfo); err |= __put_user(&frame->uc, &frame->puc); - err |= copy_siginfo_to_user(&frame->info, info); + err |= copy_siginfo_to_user(&frame->info, &ksig->info); if (err) goto badframe; @@ -736,15 +735,15 @@ int handle_rt_signal64(int signr, struct k_sigaction *ka, siginfo_t *info, err |= __put_user(&frame->uc_transact, &frame->uc.uc_link); err |= setup_tm_sigcontexts(&frame->uc.uc_mcontext, &frame->uc_transact.uc_mcontext, - regs, signr, + regs, ksig->sig, NULL, - (unsigned long)ka->sa.sa_handler); + (unsigned long)ksig->ka.sa.sa_handler); } else #endif { err |= __put_user(0, &frame->uc.uc_link); - err |= setup_sigcontext(&frame->uc.uc_mcontext, regs, signr, - NULL, (unsigned long)ka->sa.sa_handler, + err |= setup_sigcontext(&frame->uc.uc_mcontext, regs, ksig->sig, + NULL, (unsigned long)ksig->ka.sa.sa_handler, 1); } err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); @@ -770,7 +769,7 @@ int handle_rt_signal64(int signr, struct k_sigaction *ka, siginfo_t *info, /* Set up "regs" so we "return" to the signal handler. */ if (is_elf2_task()) { - regs->nip = (unsigned long) ka->sa.sa_handler; + regs->nip = (unsigned long) ksig->ka.sa.sa_handler; regs->gpr[12] = regs->nip; } else { /* Handler is *really* a pointer to the function descriptor for @@ -779,7 +778,7 @@ int handle_rt_signal64(int signr, struct k_sigaction *ka, siginfo_t *info, * entry is the TOC value we need to use. */ func_descr_t __user *funct_desc_ptr = - (func_descr_t __user *) ka->sa.sa_handler; + (func_descr_t __user *) ksig->ka.sa.sa_handler; err |= get_user(regs->nip, &funct_desc_ptr->entry); err |= get_user(regs->gpr[2], &funct_desc_ptr->toc); @@ -789,9 +788,9 @@ int handle_rt_signal64(int signr, struct k_sigaction *ka, siginfo_t *info, regs->msr &= ~MSR_LE; regs->msr |= (MSR_KERNEL & MSR_LE); regs->gpr[1] = newsp; - regs->gpr[3] = signr; + regs->gpr[3] = ksig->sig; regs->result = 0; - if (ka->sa.sa_flags & SA_SIGINFO) { + if (ksig->ka.sa.sa_flags & SA_SIGINFO) { err |= get_user(regs->gpr[4], (unsigned long __user *)&frame->pinfo); err |= get_user(regs->gpr[5], (unsigned long __user *)&frame->puc); regs->gpr[6] = (unsigned long) frame; @@ -801,7 +800,7 @@ int handle_rt_signal64(int signr, struct k_sigaction *ka, siginfo_t *info, if (err) goto badframe; - return 1; + return 0; badframe: if (show_unhandled_signals) @@ -809,6 +808,5 @@ badframe: current->comm, current->pid, "setup_rt_frame", (long)frame, regs->nip, regs->link); - force_sigsegv(signr, current); - return 0; + return 1; } diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 1007fb802e6..71e186d5f33 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -52,6 +52,7 @@ #endif #include <asm/vdso.h> #include <asm/debug.h> +#include <asm/kexec.h> #ifdef DEBUG #include <asm/udbg.h> @@ -376,6 +377,14 @@ void __init smp_prepare_cpus(unsigned int max_cpus) GFP_KERNEL, cpu_to_node(cpu)); zalloc_cpumask_var_node(&per_cpu(cpu_core_map, cpu), GFP_KERNEL, cpu_to_node(cpu)); + /* + * numa_node_id() works after this. + */ + if (cpu_present(cpu)) { + set_cpu_numa_node(cpu, numa_cpu_lookup_table[cpu]); + set_cpu_numa_mem(cpu, + local_memory_node(numa_cpu_lookup_table[cpu])); + } } cpumask_set_cpu(boot_cpuid, cpu_sibling_mask(boot_cpuid)); @@ -723,9 +732,6 @@ void start_secondary(void *unused) } traverse_core_siblings(cpu, true); - /* - * numa_node_id() works after this. - */ set_numa_node(numa_cpu_lookup_table[cpu]); set_numa_mem(local_memory_node(numa_cpu_lookup_table[cpu])); diff --git a/arch/powerpc/kernel/suspend.c b/arch/powerpc/kernel/suspend.c index 0167d53da30..a531154cc0f 100644 --- a/arch/powerpc/kernel/suspend.c +++ b/arch/powerpc/kernel/suspend.c @@ -9,9 +9,7 @@ #include <linux/mm.h> #include <asm/page.h> - -/* References to section boundaries */ -extern const void __nosave_begin, __nosave_end; +#include <asm/sections.h> /* * pfn_is_nosave - check if given pfn is in the 'nosave' section diff --git a/arch/powerpc/kernel/systbl.S b/arch/powerpc/kernel/systbl.S index 895c50ca943..7ab5d434e2e 100644 --- a/arch/powerpc/kernel/systbl.S +++ b/arch/powerpc/kernel/systbl.S @@ -39,9 +39,6 @@ .section .rodata,"a" #ifdef CONFIG_PPC64 -#define sys_sigpending sys_ni_syscall -#define sys_old_getrlimit sys_ni_syscall - .p2align 3 #endif diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 9fff9cdcc51..7505599c259 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -479,7 +479,7 @@ void arch_irq_work_raise(void) #endif /* CONFIG_IRQ_WORK */ -void __timer_interrupt(void) +static void __timer_interrupt(void) { struct pt_regs *regs = get_irq_regs(); u64 *next_tb = &__get_cpu_var(decrementers_next_tb); @@ -643,7 +643,7 @@ static int __init get_freq(char *name, int cells, unsigned long *val) return found; } -void start_cpu_decrementer(void) +static void start_cpu_decrementer(void) { #if defined(CONFIG_BOOKE) || defined(CONFIG_40x) /* Clear any pending timer interrupts */ @@ -741,7 +741,7 @@ static cycle_t timebase_read(struct clocksource *cs) } void update_vsyscall_old(struct timespec *wall_time, struct timespec *wtm, - struct clocksource *clock, u32 mult) + struct clocksource *clock, u32 mult, cycle_t cycle_last) { u64 new_tb_to_xs, new_stamp_xsec; u32 frac_sec; @@ -774,7 +774,7 @@ void update_vsyscall_old(struct timespec *wall_time, struct timespec *wtm, * We expect the caller to have done the first increment of * vdso_data->tb_update_count already. */ - vdso_data->tb_orig_stamp = clock->cycle_last; + vdso_data->tb_orig_stamp = cycle_last; vdso_data->stamp_xsec = new_stamp_xsec; vdso_data->tb_to_xs = new_tb_to_xs; vdso_data->wtom_clock_sec = wtm->tv_sec; @@ -1024,6 +1024,7 @@ void to_tm(int tim, struct rtc_time * tm) */ GregorianDay(tm); } +EXPORT_SYMBOL(to_tm); /* * Divide a 128-bit dividend by a 32-bit divisor, leaving a 128 bit diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 239f1cde3ff..0dc43f9932c 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -302,6 +302,16 @@ long machine_check_early(struct pt_regs *regs) return handled; } +long hmi_exception_realmode(struct pt_regs *regs) +{ + __get_cpu_var(irq_stat).hmi_exceptions++; + + if (ppc_md.hmi_exception_early) + ppc_md.hmi_exception_early(regs); + + return 0; +} + #endif /* @@ -609,7 +619,7 @@ int machine_check_e500(struct pt_regs *regs) if (reason & MCSR_BUS_RBERR) printk("Bus - Read Data Bus Error\n"); if (reason & MCSR_BUS_WBERR) - printk("Bus - Read Data Bus Error\n"); + printk("Bus - Write Data Bus Error\n"); if (reason & MCSR_BUS_IPERR) printk("Bus - Instruction Parity Error\n"); if (reason & MCSR_BUS_RPERR) @@ -738,6 +748,20 @@ void SMIException(struct pt_regs *regs) die("System Management Interrupt", regs, SIGABRT); } +void handle_hmi_exception(struct pt_regs *regs) +{ + struct pt_regs *old_regs; + + old_regs = set_irq_regs(regs); + irq_enter(); + + if (ppc_md.handle_hmi_exception) + ppc_md.handle_hmi_exception(regs); + + irq_exit(); + set_irq_regs(old_regs); +} + void unknown_exception(struct pt_regs *regs) { enum ctx_state prev_state = exception_enter(); diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index ce74c335a6a..f174351842c 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -840,19 +840,3 @@ static int __init vdso_init(void) return 0; } arch_initcall(vdso_init); - -int in_gate_area_no_mm(unsigned long addr) -{ - return 0; -} - -int in_gate_area(struct mm_struct *mm, unsigned long addr) -{ - return 0; -} - -struct vm_area_struct *get_gate_vma(struct mm_struct *mm) -{ - return NULL; -} - diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c index 904c66128fa..5bfdab9047b 100644 --- a/arch/powerpc/kernel/vio.c +++ b/arch/powerpc/kernel/vio.c @@ -977,7 +977,7 @@ static ssize_t viodev_cmo_desired_set(struct device *dev, size_t new_desired; int ret; - ret = strict_strtoul(buf, 10, &new_desired); + ret = kstrtoul(buf, 10, &new_desired); if (ret) return ret; diff --git a/arch/powerpc/kvm/44x.c b/arch/powerpc/kvm/44x.c deleted file mode 100644 index 9cb4b0a3603..00000000000 --- a/arch/powerpc/kvm/44x.c +++ /dev/null @@ -1,237 +0,0 @@ -/* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, version 2, as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - * - * Copyright IBM Corp. 2008 - * - * Authors: Hollis Blanchard <hollisb@us.ibm.com> - */ - -#include <linux/kvm_host.h> -#include <linux/slab.h> -#include <linux/err.h> -#include <linux/export.h> -#include <linux/module.h> -#include <linux/miscdevice.h> - -#include <asm/reg.h> -#include <asm/cputable.h> -#include <asm/tlbflush.h> -#include <asm/kvm_44x.h> -#include <asm/kvm_ppc.h> - -#include "44x_tlb.h" -#include "booke.h" - -static void kvmppc_core_vcpu_load_44x(struct kvm_vcpu *vcpu, int cpu) -{ - kvmppc_booke_vcpu_load(vcpu, cpu); - kvmppc_44x_tlb_load(vcpu); -} - -static void kvmppc_core_vcpu_put_44x(struct kvm_vcpu *vcpu) -{ - kvmppc_44x_tlb_put(vcpu); - kvmppc_booke_vcpu_put(vcpu); -} - -int kvmppc_core_check_processor_compat(void) -{ - int r; - - if (strncmp(cur_cpu_spec->platform, "ppc440", 6) == 0) - r = 0; - else - r = -ENOTSUPP; - - return r; -} - -int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu) -{ - struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); - struct kvmppc_44x_tlbe *tlbe = &vcpu_44x->guest_tlb[0]; - int i; - - tlbe->tid = 0; - tlbe->word0 = PPC44x_TLB_16M | PPC44x_TLB_VALID; - tlbe->word1 = 0; - tlbe->word2 = PPC44x_TLB_SX | PPC44x_TLB_SW | PPC44x_TLB_SR; - - tlbe++; - tlbe->tid = 0; - tlbe->word0 = 0xef600000 | PPC44x_TLB_4K | PPC44x_TLB_VALID; - tlbe->word1 = 0xef600000; - tlbe->word2 = PPC44x_TLB_SX | PPC44x_TLB_SW | PPC44x_TLB_SR - | PPC44x_TLB_I | PPC44x_TLB_G; - - /* Since the guest can directly access the timebase, it must know the - * real timebase frequency. Accordingly, it must see the state of - * CCR1[TCS]. */ - /* XXX CCR1 doesn't exist on all 440 SoCs. */ - vcpu->arch.ccr1 = mfspr(SPRN_CCR1); - - for (i = 0; i < ARRAY_SIZE(vcpu_44x->shadow_refs); i++) - vcpu_44x->shadow_refs[i].gtlb_index = -1; - - vcpu->arch.cpu_type = KVM_CPU_440; - vcpu->arch.pvr = mfspr(SPRN_PVR); - - return 0; -} - -/* 'linear_address' is actually an encoding of AS|PID|EADDR . */ -int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu, - struct kvm_translation *tr) -{ - int index; - gva_t eaddr; - u8 pid; - u8 as; - - eaddr = tr->linear_address; - pid = (tr->linear_address >> 32) & 0xff; - as = (tr->linear_address >> 40) & 0x1; - - index = kvmppc_44x_tlb_index(vcpu, eaddr, pid, as); - if (index == -1) { - tr->valid = 0; - return 0; - } - - tr->physical_address = kvmppc_mmu_xlate(vcpu, index, eaddr); - /* XXX what does "writeable" and "usermode" even mean? */ - tr->valid = 1; - - return 0; -} - -static int kvmppc_core_get_sregs_44x(struct kvm_vcpu *vcpu, - struct kvm_sregs *sregs) -{ - return kvmppc_get_sregs_ivor(vcpu, sregs); -} - -static int kvmppc_core_set_sregs_44x(struct kvm_vcpu *vcpu, - struct kvm_sregs *sregs) -{ - return kvmppc_set_sregs_ivor(vcpu, sregs); -} - -static int kvmppc_get_one_reg_44x(struct kvm_vcpu *vcpu, u64 id, - union kvmppc_one_reg *val) -{ - return -EINVAL; -} - -static int kvmppc_set_one_reg_44x(struct kvm_vcpu *vcpu, u64 id, - union kvmppc_one_reg *val) -{ - return -EINVAL; -} - -static struct kvm_vcpu *kvmppc_core_vcpu_create_44x(struct kvm *kvm, - unsigned int id) -{ - struct kvmppc_vcpu_44x *vcpu_44x; - struct kvm_vcpu *vcpu; - int err; - - vcpu_44x = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); - if (!vcpu_44x) { - err = -ENOMEM; - goto out; - } - - vcpu = &vcpu_44x->vcpu; - err = kvm_vcpu_init(vcpu, kvm, id); - if (err) - goto free_vcpu; - - vcpu->arch.shared = (void*)__get_free_page(GFP_KERNEL|__GFP_ZERO); - if (!vcpu->arch.shared) - goto uninit_vcpu; - - return vcpu; - -uninit_vcpu: - kvm_vcpu_uninit(vcpu); -free_vcpu: - kmem_cache_free(kvm_vcpu_cache, vcpu_44x); -out: - return ERR_PTR(err); -} - -static void kvmppc_core_vcpu_free_44x(struct kvm_vcpu *vcpu) -{ - struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); - - free_page((unsigned long)vcpu->arch.shared); - kvm_vcpu_uninit(vcpu); - kmem_cache_free(kvm_vcpu_cache, vcpu_44x); -} - -static int kvmppc_core_init_vm_44x(struct kvm *kvm) -{ - return 0; -} - -static void kvmppc_core_destroy_vm_44x(struct kvm *kvm) -{ -} - -static struct kvmppc_ops kvm_ops_44x = { - .get_sregs = kvmppc_core_get_sregs_44x, - .set_sregs = kvmppc_core_set_sregs_44x, - .get_one_reg = kvmppc_get_one_reg_44x, - .set_one_reg = kvmppc_set_one_reg_44x, - .vcpu_load = kvmppc_core_vcpu_load_44x, - .vcpu_put = kvmppc_core_vcpu_put_44x, - .vcpu_create = kvmppc_core_vcpu_create_44x, - .vcpu_free = kvmppc_core_vcpu_free_44x, - .mmu_destroy = kvmppc_mmu_destroy_44x, - .init_vm = kvmppc_core_init_vm_44x, - .destroy_vm = kvmppc_core_destroy_vm_44x, - .emulate_op = kvmppc_core_emulate_op_44x, - .emulate_mtspr = kvmppc_core_emulate_mtspr_44x, - .emulate_mfspr = kvmppc_core_emulate_mfspr_44x, -}; - -static int __init kvmppc_44x_init(void) -{ - int r; - - r = kvmppc_booke_init(); - if (r) - goto err_out; - - r = kvm_init(NULL, sizeof(struct kvmppc_vcpu_44x), 0, THIS_MODULE); - if (r) - goto err_out; - kvm_ops_44x.owner = THIS_MODULE; - kvmppc_pr_ops = &kvm_ops_44x; - -err_out: - return r; -} - -static void __exit kvmppc_44x_exit(void) -{ - kvmppc_pr_ops = NULL; - kvmppc_booke_exit(); -} - -module_init(kvmppc_44x_init); -module_exit(kvmppc_44x_exit); -MODULE_ALIAS_MISCDEV(KVM_MINOR); -MODULE_ALIAS("devname:kvm"); diff --git a/arch/powerpc/kvm/44x_emulate.c b/arch/powerpc/kvm/44x_emulate.c deleted file mode 100644 index 92c9ab4bcfe..00000000000 --- a/arch/powerpc/kvm/44x_emulate.c +++ /dev/null @@ -1,194 +0,0 @@ -/* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, version 2, as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - * - * Copyright IBM Corp. 2008 - * - * Authors: Hollis Blanchard <hollisb@us.ibm.com> - */ - -#include <asm/kvm_ppc.h> -#include <asm/dcr.h> -#include <asm/dcr-regs.h> -#include <asm/disassemble.h> -#include <asm/kvm_44x.h> -#include "timing.h" - -#include "booke.h" -#include "44x_tlb.h" - -#define XOP_MFDCRX 259 -#define XOP_MFDCR 323 -#define XOP_MTDCRX 387 -#define XOP_MTDCR 451 -#define XOP_TLBSX 914 -#define XOP_ICCCI 966 -#define XOP_TLBWE 978 - -static int emulate_mtdcr(struct kvm_vcpu *vcpu, int rs, int dcrn) -{ - /* emulate some access in kernel */ - switch (dcrn) { - case DCRN_CPR0_CONFIG_ADDR: - vcpu->arch.cpr0_cfgaddr = kvmppc_get_gpr(vcpu, rs); - return EMULATE_DONE; - default: - vcpu->run->dcr.dcrn = dcrn; - vcpu->run->dcr.data = kvmppc_get_gpr(vcpu, rs); - vcpu->run->dcr.is_write = 1; - vcpu->arch.dcr_is_write = 1; - vcpu->arch.dcr_needed = 1; - kvmppc_account_exit(vcpu, DCR_EXITS); - return EMULATE_DO_DCR; - } -} - -static int emulate_mfdcr(struct kvm_vcpu *vcpu, int rt, int dcrn) -{ - /* The guest may access CPR0 registers to determine the timebase - * frequency, and it must know the real host frequency because it - * can directly access the timebase registers. - * - * It would be possible to emulate those accesses in userspace, - * but userspace can really only figure out the end frequency. - * We could decompose that into the factors that compute it, but - * that's tricky math, and it's easier to just report the real - * CPR0 values. - */ - switch (dcrn) { - case DCRN_CPR0_CONFIG_ADDR: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.cpr0_cfgaddr); - break; - case DCRN_CPR0_CONFIG_DATA: - local_irq_disable(); - mtdcr(DCRN_CPR0_CONFIG_ADDR, - vcpu->arch.cpr0_cfgaddr); - kvmppc_set_gpr(vcpu, rt, - mfdcr(DCRN_CPR0_CONFIG_DATA)); - local_irq_enable(); - break; - default: - vcpu->run->dcr.dcrn = dcrn; - vcpu->run->dcr.data = 0; - vcpu->run->dcr.is_write = 0; - vcpu->arch.dcr_is_write = 0; - vcpu->arch.io_gpr = rt; - vcpu->arch.dcr_needed = 1; - kvmppc_account_exit(vcpu, DCR_EXITS); - return EMULATE_DO_DCR; - } - - return EMULATE_DONE; -} - -int kvmppc_core_emulate_op_44x(struct kvm_run *run, struct kvm_vcpu *vcpu, - unsigned int inst, int *advance) -{ - int emulated = EMULATE_DONE; - int dcrn = get_dcrn(inst); - int ra = get_ra(inst); - int rb = get_rb(inst); - int rc = get_rc(inst); - int rs = get_rs(inst); - int rt = get_rt(inst); - int ws = get_ws(inst); - - switch (get_op(inst)) { - case 31: - switch (get_xop(inst)) { - - case XOP_MFDCR: - emulated = emulate_mfdcr(vcpu, rt, dcrn); - break; - - case XOP_MFDCRX: - emulated = emulate_mfdcr(vcpu, rt, - kvmppc_get_gpr(vcpu, ra)); - break; - - case XOP_MTDCR: - emulated = emulate_mtdcr(vcpu, rs, dcrn); - break; - - case XOP_MTDCRX: - emulated = emulate_mtdcr(vcpu, rs, - kvmppc_get_gpr(vcpu, ra)); - break; - - case XOP_TLBWE: - emulated = kvmppc_44x_emul_tlbwe(vcpu, ra, rs, ws); - break; - - case XOP_TLBSX: - emulated = kvmppc_44x_emul_tlbsx(vcpu, rt, ra, rb, rc); - break; - - case XOP_ICCCI: - break; - - default: - emulated = EMULATE_FAIL; - } - - break; - - default: - emulated = EMULATE_FAIL; - } - - if (emulated == EMULATE_FAIL) - emulated = kvmppc_booke_emulate_op(run, vcpu, inst, advance); - - return emulated; -} - -int kvmppc_core_emulate_mtspr_44x(struct kvm_vcpu *vcpu, int sprn, ulong spr_val) -{ - int emulated = EMULATE_DONE; - - switch (sprn) { - case SPRN_PID: - kvmppc_set_pid(vcpu, spr_val); break; - case SPRN_MMUCR: - vcpu->arch.mmucr = spr_val; break; - case SPRN_CCR0: - vcpu->arch.ccr0 = spr_val; break; - case SPRN_CCR1: - vcpu->arch.ccr1 = spr_val; break; - default: - emulated = kvmppc_booke_emulate_mtspr(vcpu, sprn, spr_val); - } - - return emulated; -} - -int kvmppc_core_emulate_mfspr_44x(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val) -{ - int emulated = EMULATE_DONE; - - switch (sprn) { - case SPRN_PID: - *spr_val = vcpu->arch.pid; break; - case SPRN_MMUCR: - *spr_val = vcpu->arch.mmucr; break; - case SPRN_CCR0: - *spr_val = vcpu->arch.ccr0; break; - case SPRN_CCR1: - *spr_val = vcpu->arch.ccr1; break; - default: - emulated = kvmppc_booke_emulate_mfspr(vcpu, sprn, spr_val); - } - - return emulated; -} - diff --git a/arch/powerpc/kvm/44x_tlb.c b/arch/powerpc/kvm/44x_tlb.c deleted file mode 100644 index 0deef1082e0..00000000000 --- a/arch/powerpc/kvm/44x_tlb.c +++ /dev/null @@ -1,528 +0,0 @@ -/* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, version 2, as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - * - * Copyright IBM Corp. 2007 - * - * Authors: Hollis Blanchard <hollisb@us.ibm.com> - */ - -#include <linux/types.h> -#include <linux/string.h> -#include <linux/kvm.h> -#include <linux/kvm_host.h> -#include <linux/highmem.h> - -#include <asm/tlbflush.h> -#include <asm/mmu-44x.h> -#include <asm/kvm_ppc.h> -#include <asm/kvm_44x.h> -#include "timing.h" - -#include "44x_tlb.h" -#include "trace.h" - -#ifndef PPC44x_TLBE_SIZE -#define PPC44x_TLBE_SIZE PPC44x_TLB_4K -#endif - -#define PAGE_SIZE_4K (1<<12) -#define PAGE_MASK_4K (~(PAGE_SIZE_4K - 1)) - -#define PPC44x_TLB_UATTR_MASK \ - (PPC44x_TLB_U0|PPC44x_TLB_U1|PPC44x_TLB_U2|PPC44x_TLB_U3) -#define PPC44x_TLB_USER_PERM_MASK (PPC44x_TLB_UX|PPC44x_TLB_UR|PPC44x_TLB_UW) -#define PPC44x_TLB_SUPER_PERM_MASK (PPC44x_TLB_SX|PPC44x_TLB_SR|PPC44x_TLB_SW) - -#ifdef DEBUG -void kvmppc_dump_tlbs(struct kvm_vcpu *vcpu) -{ - struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); - struct kvmppc_44x_tlbe *tlbe; - int i; - - printk("vcpu %d TLB dump:\n", vcpu->vcpu_id); - printk("| %2s | %3s | %8s | %8s | %8s |\n", - "nr", "tid", "word0", "word1", "word2"); - - for (i = 0; i < ARRAY_SIZE(vcpu_44x->guest_tlb); i++) { - tlbe = &vcpu_44x->guest_tlb[i]; - if (tlbe->word0 & PPC44x_TLB_VALID) - printk(" G%2d | %02X | %08X | %08X | %08X |\n", - i, tlbe->tid, tlbe->word0, tlbe->word1, - tlbe->word2); - } -} -#endif - -static inline void kvmppc_44x_tlbie(unsigned int index) -{ - /* 0 <= index < 64, so the V bit is clear and we can use the index as - * word0. */ - asm volatile( - "tlbwe %[index], %[index], 0\n" - : - : [index] "r"(index) - ); -} - -static inline void kvmppc_44x_tlbre(unsigned int index, - struct kvmppc_44x_tlbe *tlbe) -{ - asm volatile( - "tlbre %[word0], %[index], 0\n" - "mfspr %[tid], %[sprn_mmucr]\n" - "andi. %[tid], %[tid], 0xff\n" - "tlbre %[word1], %[index], 1\n" - "tlbre %[word2], %[index], 2\n" - : [word0] "=r"(tlbe->word0), - [word1] "=r"(tlbe->word1), - [word2] "=r"(tlbe->word2), - [tid] "=r"(tlbe->tid) - : [index] "r"(index), - [sprn_mmucr] "i"(SPRN_MMUCR) - : "cc" - ); -} - -static inline void kvmppc_44x_tlbwe(unsigned int index, - struct kvmppc_44x_tlbe *stlbe) -{ - unsigned long tmp; - - asm volatile( - "mfspr %[tmp], %[sprn_mmucr]\n" - "rlwimi %[tmp], %[tid], 0, 0xff\n" - "mtspr %[sprn_mmucr], %[tmp]\n" - "tlbwe %[word0], %[index], 0\n" - "tlbwe %[word1], %[index], 1\n" - "tlbwe %[word2], %[index], 2\n" - : [tmp] "=&r"(tmp) - : [word0] "r"(stlbe->word0), - [word1] "r"(stlbe->word1), - [word2] "r"(stlbe->word2), - [tid] "r"(stlbe->tid), - [index] "r"(index), - [sprn_mmucr] "i"(SPRN_MMUCR) - ); -} - -static u32 kvmppc_44x_tlb_shadow_attrib(u32 attrib, int usermode) -{ - /* We only care about the guest's permission and user bits. */ - attrib &= PPC44x_TLB_PERM_MASK|PPC44x_TLB_UATTR_MASK; - - if (!usermode) { - /* Guest is in supervisor mode, so we need to translate guest - * supervisor permissions into user permissions. */ - attrib &= ~PPC44x_TLB_USER_PERM_MASK; - attrib |= (attrib & PPC44x_TLB_SUPER_PERM_MASK) << 3; - } - - /* Make sure host can always access this memory. */ - attrib |= PPC44x_TLB_SX|PPC44x_TLB_SR|PPC44x_TLB_SW; - - /* WIMGE = 0b00100 */ - attrib |= PPC44x_TLB_M; - - return attrib; -} - -/* Load shadow TLB back into hardware. */ -void kvmppc_44x_tlb_load(struct kvm_vcpu *vcpu) -{ - struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); - int i; - - for (i = 0; i <= tlb_44x_hwater; i++) { - struct kvmppc_44x_tlbe *stlbe = &vcpu_44x->shadow_tlb[i]; - - if (get_tlb_v(stlbe) && get_tlb_ts(stlbe)) - kvmppc_44x_tlbwe(i, stlbe); - } -} - -static void kvmppc_44x_tlbe_set_modified(struct kvmppc_vcpu_44x *vcpu_44x, - unsigned int i) -{ - vcpu_44x->shadow_tlb_mod[i] = 1; -} - -/* Save hardware TLB to the vcpu, and invalidate all guest mappings. */ -void kvmppc_44x_tlb_put(struct kvm_vcpu *vcpu) -{ - struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); - int i; - - for (i = 0; i <= tlb_44x_hwater; i++) { - struct kvmppc_44x_tlbe *stlbe = &vcpu_44x->shadow_tlb[i]; - - if (vcpu_44x->shadow_tlb_mod[i]) - kvmppc_44x_tlbre(i, stlbe); - - if (get_tlb_v(stlbe) && get_tlb_ts(stlbe)) - kvmppc_44x_tlbie(i); - } -} - - -/* Search the guest TLB for a matching entry. */ -int kvmppc_44x_tlb_index(struct kvm_vcpu *vcpu, gva_t eaddr, unsigned int pid, - unsigned int as) -{ - struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); - int i; - - /* XXX Replace loop with fancy data structures. */ - for (i = 0; i < ARRAY_SIZE(vcpu_44x->guest_tlb); i++) { - struct kvmppc_44x_tlbe *tlbe = &vcpu_44x->guest_tlb[i]; - unsigned int tid; - - if (eaddr < get_tlb_eaddr(tlbe)) - continue; - - if (eaddr > get_tlb_end(tlbe)) - continue; - - tid = get_tlb_tid(tlbe); - if (tid && (tid != pid)) - continue; - - if (!get_tlb_v(tlbe)) - continue; - - if (get_tlb_ts(tlbe) != as) - continue; - - return i; - } - - return -1; -} - -gpa_t kvmppc_mmu_xlate(struct kvm_vcpu *vcpu, unsigned int gtlb_index, - gva_t eaddr) -{ - struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); - struct kvmppc_44x_tlbe *gtlbe = &vcpu_44x->guest_tlb[gtlb_index]; - unsigned int pgmask = get_tlb_bytes(gtlbe) - 1; - - return get_tlb_raddr(gtlbe) | (eaddr & pgmask); -} - -int kvmppc_mmu_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr) -{ - unsigned int as = !!(vcpu->arch.shared->msr & MSR_IS); - - return kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as); -} - -int kvmppc_mmu_dtlb_index(struct kvm_vcpu *vcpu, gva_t eaddr) -{ - unsigned int as = !!(vcpu->arch.shared->msr & MSR_DS); - - return kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as); -} - -void kvmppc_mmu_itlb_miss(struct kvm_vcpu *vcpu) -{ -} - -void kvmppc_mmu_dtlb_miss(struct kvm_vcpu *vcpu) -{ -} - -static void kvmppc_44x_shadow_release(struct kvmppc_vcpu_44x *vcpu_44x, - unsigned int stlb_index) -{ - struct kvmppc_44x_shadow_ref *ref = &vcpu_44x->shadow_refs[stlb_index]; - - if (!ref->page) - return; - - /* Discard from the TLB. */ - /* Note: we could actually invalidate a host mapping, if the host overwrote - * this TLB entry since we inserted a guest mapping. */ - kvmppc_44x_tlbie(stlb_index); - - /* Now release the page. */ - if (ref->writeable) - kvm_release_page_dirty(ref->page); - else - kvm_release_page_clean(ref->page); - - ref->page = NULL; - - /* XXX set tlb_44x_index to stlb_index? */ - - trace_kvm_stlb_inval(stlb_index); -} - -void kvmppc_mmu_destroy_44x(struct kvm_vcpu *vcpu) -{ - struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); - int i; - - for (i = 0; i <= tlb_44x_hwater; i++) - kvmppc_44x_shadow_release(vcpu_44x, i); -} - -/** - * kvmppc_mmu_map -- create a host mapping for guest memory - * - * If the guest wanted a larger page than the host supports, only the first - * host page is mapped here and the rest are demand faulted. - * - * If the guest wanted a smaller page than the host page size, we map only the - * guest-size page (i.e. not a full host page mapping). - * - * Caller must ensure that the specified guest TLB entry is safe to insert into - * the shadow TLB. - */ -void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr, - unsigned int gtlb_index) -{ - struct kvmppc_44x_tlbe stlbe; - struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); - struct kvmppc_44x_tlbe *gtlbe = &vcpu_44x->guest_tlb[gtlb_index]; - struct kvmppc_44x_shadow_ref *ref; - struct page *new_page; - hpa_t hpaddr; - gfn_t gfn; - u32 asid = gtlbe->tid; - u32 flags = gtlbe->word2; - u32 max_bytes = get_tlb_bytes(gtlbe); - unsigned int victim; - - /* Select TLB entry to clobber. Indirectly guard against races with the TLB - * miss handler by disabling interrupts. */ - local_irq_disable(); - victim = ++tlb_44x_index; - if (victim > tlb_44x_hwater) - victim = 0; - tlb_44x_index = victim; - local_irq_enable(); - - /* Get reference to new page. */ - gfn = gpaddr >> PAGE_SHIFT; - new_page = gfn_to_page(vcpu->kvm, gfn); - if (is_error_page(new_page)) { - printk(KERN_ERR "Couldn't get guest page for gfn %llx!\n", - (unsigned long long)gfn); - return; - } - hpaddr = page_to_phys(new_page); - - /* Invalidate any previous shadow mappings. */ - kvmppc_44x_shadow_release(vcpu_44x, victim); - - /* XXX Make sure (va, size) doesn't overlap any other - * entries. 440x6 user manual says the result would be - * "undefined." */ - - /* XXX what about AS? */ - - /* Force TS=1 for all guest mappings. */ - stlbe.word0 = PPC44x_TLB_VALID | PPC44x_TLB_TS; - - if (max_bytes >= PAGE_SIZE) { - /* Guest mapping is larger than or equal to host page size. We can use - * a "native" host mapping. */ - stlbe.word0 |= (gvaddr & PAGE_MASK) | PPC44x_TLBE_SIZE; - } else { - /* Guest mapping is smaller than host page size. We must restrict the - * size of the mapping to be at most the smaller of the two, but for - * simplicity we fall back to a 4K mapping (this is probably what the - * guest is using anyways). */ - stlbe.word0 |= (gvaddr & PAGE_MASK_4K) | PPC44x_TLB_4K; - - /* 'hpaddr' is a host page, which is larger than the mapping we're - * inserting here. To compensate, we must add the in-page offset to the - * sub-page. */ - hpaddr |= gpaddr & (PAGE_MASK ^ PAGE_MASK_4K); - } - - stlbe.word1 = (hpaddr & 0xfffffc00) | ((hpaddr >> 32) & 0xf); - stlbe.word2 = kvmppc_44x_tlb_shadow_attrib(flags, - vcpu->arch.shared->msr & MSR_PR); - stlbe.tid = !(asid & 0xff); - - /* Keep track of the reference so we can properly release it later. */ - ref = &vcpu_44x->shadow_refs[victim]; - ref->page = new_page; - ref->gtlb_index = gtlb_index; - ref->writeable = !!(stlbe.word2 & PPC44x_TLB_UW); - ref->tid = stlbe.tid; - - /* Insert shadow mapping into hardware TLB. */ - kvmppc_44x_tlbe_set_modified(vcpu_44x, victim); - kvmppc_44x_tlbwe(victim, &stlbe); - trace_kvm_stlb_write(victim, stlbe.tid, stlbe.word0, stlbe.word1, - stlbe.word2); -} - -/* For a particular guest TLB entry, invalidate the corresponding host TLB - * mappings and release the host pages. */ -static void kvmppc_44x_invalidate(struct kvm_vcpu *vcpu, - unsigned int gtlb_index) -{ - struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); - int i; - - for (i = 0; i < ARRAY_SIZE(vcpu_44x->shadow_refs); i++) { - struct kvmppc_44x_shadow_ref *ref = &vcpu_44x->shadow_refs[i]; - if (ref->gtlb_index == gtlb_index) - kvmppc_44x_shadow_release(vcpu_44x, i); - } -} - -void kvmppc_mmu_msr_notify(struct kvm_vcpu *vcpu, u32 old_msr) -{ - int usermode = vcpu->arch.shared->msr & MSR_PR; - - vcpu->arch.shadow_pid = !usermode; -} - -void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 new_pid) -{ - struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); - int i; - - if (unlikely(vcpu->arch.pid == new_pid)) - return; - - vcpu->arch.pid = new_pid; - - /* Guest userspace runs with TID=0 mappings and PID=0, to make sure it - * can't access guest kernel mappings (TID=1). When we switch to a new - * guest PID, which will also use host PID=0, we must discard the old guest - * userspace mappings. */ - for (i = 0; i < ARRAY_SIZE(vcpu_44x->shadow_refs); i++) { - struct kvmppc_44x_shadow_ref *ref = &vcpu_44x->shadow_refs[i]; - - if (ref->tid == 0) - kvmppc_44x_shadow_release(vcpu_44x, i); - } -} - -static int tlbe_is_host_safe(const struct kvm_vcpu *vcpu, - const struct kvmppc_44x_tlbe *tlbe) -{ - gpa_t gpa; - - if (!get_tlb_v(tlbe)) - return 0; - - /* Does it match current guest AS? */ - /* XXX what about IS != DS? */ - if (get_tlb_ts(tlbe) != !!(vcpu->arch.shared->msr & MSR_IS)) - return 0; - - gpa = get_tlb_raddr(tlbe); - if (!gfn_to_memslot(vcpu->kvm, gpa >> PAGE_SHIFT)) - /* Mapping is not for RAM. */ - return 0; - - return 1; -} - -int kvmppc_44x_emul_tlbwe(struct kvm_vcpu *vcpu, u8 ra, u8 rs, u8 ws) -{ - struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); - struct kvmppc_44x_tlbe *tlbe; - unsigned int gtlb_index; - int idx; - - gtlb_index = kvmppc_get_gpr(vcpu, ra); - if (gtlb_index >= KVM44x_GUEST_TLB_SIZE) { - printk("%s: index %d\n", __func__, gtlb_index); - kvmppc_dump_vcpu(vcpu); - return EMULATE_FAIL; - } - - tlbe = &vcpu_44x->guest_tlb[gtlb_index]; - - /* Invalidate shadow mappings for the about-to-be-clobbered TLB entry. */ - if (tlbe->word0 & PPC44x_TLB_VALID) - kvmppc_44x_invalidate(vcpu, gtlb_index); - - switch (ws) { - case PPC44x_TLB_PAGEID: - tlbe->tid = get_mmucr_stid(vcpu); - tlbe->word0 = kvmppc_get_gpr(vcpu, rs); - break; - - case PPC44x_TLB_XLAT: - tlbe->word1 = kvmppc_get_gpr(vcpu, rs); - break; - - case PPC44x_TLB_ATTRIB: - tlbe->word2 = kvmppc_get_gpr(vcpu, rs); - break; - - default: - return EMULATE_FAIL; - } - - idx = srcu_read_lock(&vcpu->kvm->srcu); - - if (tlbe_is_host_safe(vcpu, tlbe)) { - gva_t eaddr; - gpa_t gpaddr; - u32 bytes; - - eaddr = get_tlb_eaddr(tlbe); - gpaddr = get_tlb_raddr(tlbe); - - /* Use the advertised page size to mask effective and real addrs. */ - bytes = get_tlb_bytes(tlbe); - eaddr &= ~(bytes - 1); - gpaddr &= ~(bytes - 1); - - kvmppc_mmu_map(vcpu, eaddr, gpaddr, gtlb_index); - } - - srcu_read_unlock(&vcpu->kvm->srcu, idx); - - trace_kvm_gtlb_write(gtlb_index, tlbe->tid, tlbe->word0, tlbe->word1, - tlbe->word2); - - kvmppc_set_exit_type(vcpu, EMULATED_TLBWE_EXITS); - return EMULATE_DONE; -} - -int kvmppc_44x_emul_tlbsx(struct kvm_vcpu *vcpu, u8 rt, u8 ra, u8 rb, u8 rc) -{ - u32 ea; - int gtlb_index; - unsigned int as = get_mmucr_sts(vcpu); - unsigned int pid = get_mmucr_stid(vcpu); - - ea = kvmppc_get_gpr(vcpu, rb); - if (ra) - ea += kvmppc_get_gpr(vcpu, ra); - - gtlb_index = kvmppc_44x_tlb_index(vcpu, ea, pid, as); - if (rc) { - u32 cr = kvmppc_get_cr(vcpu); - - if (gtlb_index < 0) - kvmppc_set_cr(vcpu, cr & ~0x20000000); - else - kvmppc_set_cr(vcpu, cr | 0x20000000); - } - kvmppc_set_gpr(vcpu, rt, gtlb_index); - - kvmppc_set_exit_type(vcpu, EMULATED_TLBSX_EXITS); - return EMULATE_DONE; -} diff --git a/arch/powerpc/kvm/44x_tlb.h b/arch/powerpc/kvm/44x_tlb.h deleted file mode 100644 index a9ff80e5152..00000000000 --- a/arch/powerpc/kvm/44x_tlb.h +++ /dev/null @@ -1,86 +0,0 @@ -/* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, version 2, as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - * - * Copyright IBM Corp. 2007 - * - * Authors: Hollis Blanchard <hollisb@us.ibm.com> - */ - -#ifndef __KVM_POWERPC_TLB_H__ -#define __KVM_POWERPC_TLB_H__ - -#include <linux/kvm_host.h> -#include <asm/mmu-44x.h> - -extern int kvmppc_44x_tlb_index(struct kvm_vcpu *vcpu, gva_t eaddr, - unsigned int pid, unsigned int as); - -extern int kvmppc_44x_emul_tlbsx(struct kvm_vcpu *vcpu, u8 rt, u8 ra, u8 rb, - u8 rc); -extern int kvmppc_44x_emul_tlbwe(struct kvm_vcpu *vcpu, u8 ra, u8 rs, u8 ws); - -/* TLB helper functions */ -static inline unsigned int get_tlb_size(const struct kvmppc_44x_tlbe *tlbe) -{ - return (tlbe->word0 >> 4) & 0xf; -} - -static inline gva_t get_tlb_eaddr(const struct kvmppc_44x_tlbe *tlbe) -{ - return tlbe->word0 & 0xfffffc00; -} - -static inline gva_t get_tlb_bytes(const struct kvmppc_44x_tlbe *tlbe) -{ - unsigned int pgsize = get_tlb_size(tlbe); - return 1 << 10 << (pgsize << 1); -} - -static inline gva_t get_tlb_end(const struct kvmppc_44x_tlbe *tlbe) -{ - return get_tlb_eaddr(tlbe) + get_tlb_bytes(tlbe) - 1; -} - -static inline u64 get_tlb_raddr(const struct kvmppc_44x_tlbe *tlbe) -{ - u64 word1 = tlbe->word1; - return ((word1 & 0xf) << 32) | (word1 & 0xfffffc00); -} - -static inline unsigned int get_tlb_tid(const struct kvmppc_44x_tlbe *tlbe) -{ - return tlbe->tid & 0xff; -} - -static inline unsigned int get_tlb_ts(const struct kvmppc_44x_tlbe *tlbe) -{ - return (tlbe->word0 >> 8) & 0x1; -} - -static inline unsigned int get_tlb_v(const struct kvmppc_44x_tlbe *tlbe) -{ - return (tlbe->word0 >> 9) & 0x1; -} - -static inline unsigned int get_mmucr_stid(const struct kvm_vcpu *vcpu) -{ - return vcpu->arch.mmucr & 0xff; -} - -static inline unsigned int get_mmucr_sts(const struct kvm_vcpu *vcpu) -{ - return (vcpu->arch.mmucr >> 16) & 0x1; -} - -#endif /* __KVM_POWERPC_TLB_H__ */ diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index d6a53b95de9..602eb51d20b 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig @@ -75,7 +75,6 @@ config KVM_BOOK3S_64 config KVM_BOOK3S_64_HV tristate "KVM support for POWER7 and PPC970 using hypervisor mode in host" depends on KVM_BOOK3S_64 - depends on !CPU_LITTLE_ENDIAN select KVM_BOOK3S_HV_POSSIBLE select MMU_NOTIFIER select CMA @@ -113,23 +112,9 @@ config KVM_BOOK3S_64_PR config KVM_BOOKE_HV bool -config KVM_440 - bool "KVM support for PowerPC 440 processors" - depends on 44x - select KVM - select KVM_MMIO - ---help--- - Support running unmodified 440 guest kernels in virtual machines on - 440 host processors. - - This module provides access to the hardware capabilities through - a character device node named /dev/kvm. - - If unsure, say N. - config KVM_EXIT_TIMING bool "Detailed exit timing" - depends on KVM_440 || KVM_E500V2 || KVM_E500MC + depends on KVM_E500V2 || KVM_E500MC ---help--- Calculate elapsed time for every exit/enter cycle. A per-vcpu report is available in debugfs kvm/vm#_vcpu#_timing. @@ -173,6 +158,7 @@ config KVM_MPIC bool "KVM in-kernel MPIC emulation" depends on KVM && E500 select HAVE_KVM_IRQCHIP + select HAVE_KVM_IRQFD select HAVE_KVM_IRQ_ROUTING select HAVE_KVM_MSI help @@ -184,6 +170,8 @@ config KVM_MPIC config KVM_XICS bool "KVM in-kernel XICS emulation" depends on KVM_BOOK3S_64 && !KVM_MPIC + select HAVE_KVM_IRQCHIP + select HAVE_KVM_IRQFD ---help--- Include support for the XICS (eXternal Interrupt Controller Specification) interrupt controller architecture used on diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile index ce569b6bf4d..0570eef83fb 100644 --- a/arch/powerpc/kvm/Makefile +++ b/arch/powerpc/kvm/Makefile @@ -10,27 +10,17 @@ KVM := ../../../virt/kvm common-objs-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \ $(KVM)/eventfd.o -CFLAGS_44x_tlb.o := -I. CFLAGS_e500_mmu.o := -I. CFLAGS_e500_mmu_host.o := -I. CFLAGS_emulate.o := -I. +CFLAGS_emulate_loadstore.o := -I. -common-objs-y += powerpc.o emulate.o +common-objs-y += powerpc.o emulate.o emulate_loadstore.o obj-$(CONFIG_KVM_EXIT_TIMING) += timing.o obj-$(CONFIG_KVM_BOOK3S_HANDLER) += book3s_exports.o AFLAGS_booke_interrupts.o := -I$(obj) -kvm-440-objs := \ - $(common-objs-y) \ - booke.o \ - booke_emulate.o \ - booke_interrupts.o \ - 44x.o \ - 44x_tlb.o \ - 44x_emulate.o -kvm-objs-$(CONFIG_KVM_440) := $(kvm-440-objs) - kvm-e500-objs := \ $(common-objs-y) \ booke.o \ @@ -58,6 +48,7 @@ kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) := \ kvm-pr-y := \ fpu.o \ + emulate.o \ book3s_paired_singles.o \ book3s_pr.o \ book3s_pr_papr.o \ @@ -90,7 +81,6 @@ kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) += \ book3s_hv_rm_mmu.o \ book3s_hv_ras.o \ book3s_hv_builtin.o \ - book3s_hv_cma.o \ $(kvm-book3s_64-builtin-xics-objs-y) endif @@ -101,7 +91,7 @@ kvm-book3s_64-module-objs += \ $(KVM)/kvm_main.o \ $(KVM)/eventfd.o \ powerpc.o \ - emulate.o \ + emulate_loadstore.o \ book3s.o \ book3s_64_vio.o \ book3s_rtas.o \ @@ -127,7 +117,6 @@ kvm-objs-$(CONFIG_HAVE_KVM_IRQ_ROUTING) += $(KVM)/irqchip.o kvm-objs := $(kvm-objs-m) $(kvm-objs-y) -obj-$(CONFIG_KVM_440) += kvm.o obj-$(CONFIG_KVM_E500V2) += kvm.o obj-$(CONFIG_KVM_E500MC) += kvm.o obj-$(CONFIG_KVM_BOOK3S_64) += kvm.o diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index c254c27f240..b32db4b9536 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -72,6 +72,17 @@ void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu) { } +void kvmppc_unfixup_split_real(struct kvm_vcpu *vcpu) +{ + if (vcpu->arch.hflags & BOOK3S_HFLAG_SPLIT_HACK) { + ulong pc = kvmppc_get_pc(vcpu); + if ((pc & SPLIT_HACK_MASK) == SPLIT_HACK_OFFS) + kvmppc_set_pc(vcpu, pc & ~SPLIT_HACK_MASK); + vcpu->arch.hflags &= ~BOOK3S_HFLAG_SPLIT_HACK; + } +} +EXPORT_SYMBOL_GPL(kvmppc_unfixup_split_real); + static inline unsigned long kvmppc_interrupt_offset(struct kvm_vcpu *vcpu) { if (!is_kvmppc_hv_enabled(vcpu->kvm)) @@ -118,6 +129,7 @@ static inline bool kvmppc_critical_section(struct kvm_vcpu *vcpu) void kvmppc_inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 flags) { + kvmppc_unfixup_split_real(vcpu); kvmppc_set_srr0(vcpu, kvmppc_get_pc(vcpu)); kvmppc_set_srr1(vcpu, kvmppc_get_msr(vcpu) | flags); kvmppc_set_pc(vcpu, kvmppc_interrupt_offset(vcpu) + vec); @@ -218,6 +230,23 @@ void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu) kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL_LEVEL); } +void kvmppc_core_queue_data_storage(struct kvm_vcpu *vcpu, ulong dar, + ulong flags) +{ + kvmppc_set_dar(vcpu, dar); + kvmppc_set_dsisr(vcpu, flags); + kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_DATA_STORAGE); +} + +void kvmppc_core_queue_inst_storage(struct kvm_vcpu *vcpu, ulong flags) +{ + u64 msr = kvmppc_get_msr(vcpu); + msr &= ~(SRR1_ISI_NOPT | SRR1_ISI_N_OR_G | SRR1_ISI_PROT); + msr |= flags & (SRR1_ISI_NOPT | SRR1_ISI_N_OR_G | SRR1_ISI_PROT); + kvmppc_set_msr_fast(vcpu, msr); + kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_INST_STORAGE); +} + int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu, unsigned int priority) { int deliver = 1; @@ -342,18 +371,18 @@ int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvmppc_core_prepare_to_enter); -pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn, bool writing, +pfn_t kvmppc_gpa_to_pfn(struct kvm_vcpu *vcpu, gpa_t gpa, bool writing, bool *writable) { - ulong mp_pa = vcpu->arch.magic_page_pa; + ulong mp_pa = vcpu->arch.magic_page_pa & KVM_PAM; + gfn_t gfn = gpa >> PAGE_SHIFT; if (!(kvmppc_get_msr(vcpu) & MSR_SF)) mp_pa = (uint32_t)mp_pa; /* Magic page override */ - if (unlikely(mp_pa) && - unlikely(((gfn << PAGE_SHIFT) & KVM_PAM) == - ((mp_pa & PAGE_MASK) & KVM_PAM))) { + gpa &= ~0xFFFULL; + if (unlikely(mp_pa) && unlikely((gpa & KVM_PAM) == mp_pa)) { ulong shared_page = ((ulong)vcpu->arch.shared) & PAGE_MASK; pfn_t pfn; @@ -366,11 +395,13 @@ pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn, bool writing, return gfn_to_pfn_prot(vcpu->kvm, gfn, writing, writable); } -EXPORT_SYMBOL_GPL(kvmppc_gfn_to_pfn); +EXPORT_SYMBOL_GPL(kvmppc_gpa_to_pfn); -static int kvmppc_xlate(struct kvm_vcpu *vcpu, ulong eaddr, bool data, - bool iswrite, struct kvmppc_pte *pte) +int kvmppc_xlate(struct kvm_vcpu *vcpu, ulong eaddr, enum xlate_instdata xlid, + enum xlate_readwrite xlrw, struct kvmppc_pte *pte) { + bool data = (xlid == XLATE_DATA); + bool iswrite = (xlrw == XLATE_WRITE); int relocated = (kvmppc_get_msr(vcpu) & (data ? MSR_DR : MSR_IR)); int r; @@ -384,88 +415,34 @@ static int kvmppc_xlate(struct kvm_vcpu *vcpu, ulong eaddr, bool data, pte->may_write = true; pte->may_execute = true; r = 0; + + if ((kvmppc_get_msr(vcpu) & (MSR_IR | MSR_DR)) == MSR_DR && + !data) { + if ((vcpu->arch.hflags & BOOK3S_HFLAG_SPLIT_HACK) && + ((eaddr & SPLIT_HACK_MASK) == SPLIT_HACK_OFFS)) + pte->raddr &= ~SPLIT_HACK_MASK; + } } return r; } -static hva_t kvmppc_bad_hva(void) -{ - return PAGE_OFFSET; -} - -static hva_t kvmppc_pte_to_hva(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte, - bool read) +int kvmppc_load_last_inst(struct kvm_vcpu *vcpu, enum instruction_type type, + u32 *inst) { - hva_t hpage; - - if (read && !pte->may_read) - goto err; - - if (!read && !pte->may_write) - goto err; - - hpage = gfn_to_hva(vcpu->kvm, pte->raddr >> PAGE_SHIFT); - if (kvm_is_error_hva(hpage)) - goto err; - - return hpage | (pte->raddr & ~PAGE_MASK); -err: - return kvmppc_bad_hva(); -} - -int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, - bool data) -{ - struct kvmppc_pte pte; - - vcpu->stat.st++; - - if (kvmppc_xlate(vcpu, *eaddr, data, true, &pte)) - return -ENOENT; - - *eaddr = pte.raddr; - - if (!pte.may_write) - return -EPERM; - - if (kvm_write_guest(vcpu->kvm, pte.raddr, ptr, size)) - return EMULATE_DO_MMIO; - - return EMULATE_DONE; -} -EXPORT_SYMBOL_GPL(kvmppc_st); - -int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, - bool data) -{ - struct kvmppc_pte pte; - hva_t hva = *eaddr; - - vcpu->stat.ld++; - - if (kvmppc_xlate(vcpu, *eaddr, data, false, &pte)) - goto nopte; - - *eaddr = pte.raddr; - - hva = kvmppc_pte_to_hva(vcpu, &pte, true); - if (kvm_is_error_hva(hva)) - goto mmio; - - if (copy_from_user(ptr, (void __user *)hva, size)) { - printk(KERN_INFO "kvmppc_ld at 0x%lx failed\n", hva); - goto mmio; - } + ulong pc = kvmppc_get_pc(vcpu); + int r; - return EMULATE_DONE; + if (type == INST_SC) + pc -= 4; -nopte: - return -ENOENT; -mmio: - return EMULATE_DO_MMIO; + r = kvmppc_ld(vcpu, &pc, sizeof(u32), inst, false); + if (r == EMULATE_DONE) + return r; + else + return EMULATE_AGAIN; } -EXPORT_SYMBOL_GPL(kvmppc_ld); +EXPORT_SYMBOL_GPL(kvmppc_load_last_inst); int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) { @@ -558,168 +535,111 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) return -ENOTSUPP; } -int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) +int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, + union kvmppc_one_reg *val) { - int r; - union kvmppc_one_reg val; - int size; + int r = 0; long int i; - size = one_reg_size(reg->id); - if (size > sizeof(val)) - return -EINVAL; - - r = vcpu->kvm->arch.kvm_ops->get_one_reg(vcpu, reg->id, &val); + r = vcpu->kvm->arch.kvm_ops->get_one_reg(vcpu, id, val); if (r == -EINVAL) { r = 0; - switch (reg->id) { + switch (id) { case KVM_REG_PPC_DAR: - val = get_reg_val(reg->id, kvmppc_get_dar(vcpu)); + *val = get_reg_val(id, kvmppc_get_dar(vcpu)); break; case KVM_REG_PPC_DSISR: - val = get_reg_val(reg->id, kvmppc_get_dsisr(vcpu)); + *val = get_reg_val(id, kvmppc_get_dsisr(vcpu)); break; case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31: - i = reg->id - KVM_REG_PPC_FPR0; - val = get_reg_val(reg->id, VCPU_FPR(vcpu, i)); + i = id - KVM_REG_PPC_FPR0; + *val = get_reg_val(id, VCPU_FPR(vcpu, i)); break; case KVM_REG_PPC_FPSCR: - val = get_reg_val(reg->id, vcpu->arch.fp.fpscr); - break; -#ifdef CONFIG_ALTIVEC - case KVM_REG_PPC_VR0 ... KVM_REG_PPC_VR31: - if (!cpu_has_feature(CPU_FTR_ALTIVEC)) { - r = -ENXIO; - break; - } - val.vval = vcpu->arch.vr.vr[reg->id - KVM_REG_PPC_VR0]; - break; - case KVM_REG_PPC_VSCR: - if (!cpu_has_feature(CPU_FTR_ALTIVEC)) { - r = -ENXIO; - break; - } - val = get_reg_val(reg->id, vcpu->arch.vr.vscr.u[3]); + *val = get_reg_val(id, vcpu->arch.fp.fpscr); break; - case KVM_REG_PPC_VRSAVE: - val = get_reg_val(reg->id, vcpu->arch.vrsave); - break; -#endif /* CONFIG_ALTIVEC */ #ifdef CONFIG_VSX case KVM_REG_PPC_VSR0 ... KVM_REG_PPC_VSR31: if (cpu_has_feature(CPU_FTR_VSX)) { - long int i = reg->id - KVM_REG_PPC_VSR0; - val.vsxval[0] = vcpu->arch.fp.fpr[i][0]; - val.vsxval[1] = vcpu->arch.fp.fpr[i][1]; + i = id - KVM_REG_PPC_VSR0; + val->vsxval[0] = vcpu->arch.fp.fpr[i][0]; + val->vsxval[1] = vcpu->arch.fp.fpr[i][1]; } else { r = -ENXIO; } break; #endif /* CONFIG_VSX */ - case KVM_REG_PPC_DEBUG_INST: { - u32 opcode = INS_TW; - r = copy_to_user((u32 __user *)(long)reg->addr, - &opcode, sizeof(u32)); + case KVM_REG_PPC_DEBUG_INST: + *val = get_reg_val(id, INS_TW); break; - } #ifdef CONFIG_KVM_XICS case KVM_REG_PPC_ICP_STATE: if (!vcpu->arch.icp) { r = -ENXIO; break; } - val = get_reg_val(reg->id, kvmppc_xics_get_icp(vcpu)); + *val = get_reg_val(id, kvmppc_xics_get_icp(vcpu)); break; #endif /* CONFIG_KVM_XICS */ case KVM_REG_PPC_FSCR: - val = get_reg_val(reg->id, vcpu->arch.fscr); + *val = get_reg_val(id, vcpu->arch.fscr); break; case KVM_REG_PPC_TAR: - val = get_reg_val(reg->id, vcpu->arch.tar); + *val = get_reg_val(id, vcpu->arch.tar); break; case KVM_REG_PPC_EBBHR: - val = get_reg_val(reg->id, vcpu->arch.ebbhr); + *val = get_reg_val(id, vcpu->arch.ebbhr); break; case KVM_REG_PPC_EBBRR: - val = get_reg_val(reg->id, vcpu->arch.ebbrr); + *val = get_reg_val(id, vcpu->arch.ebbrr); break; case KVM_REG_PPC_BESCR: - val = get_reg_val(reg->id, vcpu->arch.bescr); + *val = get_reg_val(id, vcpu->arch.bescr); + break; + case KVM_REG_PPC_VTB: + *val = get_reg_val(id, vcpu->arch.vtb); + break; + case KVM_REG_PPC_IC: + *val = get_reg_val(id, vcpu->arch.ic); break; default: r = -EINVAL; break; } } - if (r) - return r; - - if (copy_to_user((char __user *)(unsigned long)reg->addr, &val, size)) - r = -EFAULT; return r; } -int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) +int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, + union kvmppc_one_reg *val) { - int r; - union kvmppc_one_reg val; - int size; + int r = 0; long int i; - size = one_reg_size(reg->id); - if (size > sizeof(val)) - return -EINVAL; - - if (copy_from_user(&val, (char __user *)(unsigned long)reg->addr, size)) - return -EFAULT; - - r = vcpu->kvm->arch.kvm_ops->set_one_reg(vcpu, reg->id, &val); + r = vcpu->kvm->arch.kvm_ops->set_one_reg(vcpu, id, val); if (r == -EINVAL) { r = 0; - switch (reg->id) { + switch (id) { case KVM_REG_PPC_DAR: - kvmppc_set_dar(vcpu, set_reg_val(reg->id, val)); + kvmppc_set_dar(vcpu, set_reg_val(id, *val)); break; case KVM_REG_PPC_DSISR: - kvmppc_set_dsisr(vcpu, set_reg_val(reg->id, val)); + kvmppc_set_dsisr(vcpu, set_reg_val(id, *val)); break; case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31: - i = reg->id - KVM_REG_PPC_FPR0; - VCPU_FPR(vcpu, i) = set_reg_val(reg->id, val); + i = id - KVM_REG_PPC_FPR0; + VCPU_FPR(vcpu, i) = set_reg_val(id, *val); break; case KVM_REG_PPC_FPSCR: - vcpu->arch.fp.fpscr = set_reg_val(reg->id, val); + vcpu->arch.fp.fpscr = set_reg_val(id, *val); break; -#ifdef CONFIG_ALTIVEC - case KVM_REG_PPC_VR0 ... KVM_REG_PPC_VR31: - if (!cpu_has_feature(CPU_FTR_ALTIVEC)) { - r = -ENXIO; - break; - } - vcpu->arch.vr.vr[reg->id - KVM_REG_PPC_VR0] = val.vval; - break; - case KVM_REG_PPC_VSCR: - if (!cpu_has_feature(CPU_FTR_ALTIVEC)) { - r = -ENXIO; - break; - } - vcpu->arch.vr.vscr.u[3] = set_reg_val(reg->id, val); - break; - case KVM_REG_PPC_VRSAVE: - if (!cpu_has_feature(CPU_FTR_ALTIVEC)) { - r = -ENXIO; - break; - } - vcpu->arch.vrsave = set_reg_val(reg->id, val); - break; -#endif /* CONFIG_ALTIVEC */ #ifdef CONFIG_VSX case KVM_REG_PPC_VSR0 ... KVM_REG_PPC_VSR31: if (cpu_has_feature(CPU_FTR_VSX)) { - long int i = reg->id - KVM_REG_PPC_VSR0; - vcpu->arch.fp.fpr[i][0] = val.vsxval[0]; - vcpu->arch.fp.fpr[i][1] = val.vsxval[1]; + i = id - KVM_REG_PPC_VSR0; + vcpu->arch.fp.fpr[i][0] = val->vsxval[0]; + vcpu->arch.fp.fpr[i][1] = val->vsxval[1]; } else { r = -ENXIO; } @@ -732,23 +652,29 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) break; } r = kvmppc_xics_set_icp(vcpu, - set_reg_val(reg->id, val)); + set_reg_val(id, *val)); break; #endif /* CONFIG_KVM_XICS */ case KVM_REG_PPC_FSCR: - vcpu->arch.fscr = set_reg_val(reg->id, val); + vcpu->arch.fscr = set_reg_val(id, *val); break; case KVM_REG_PPC_TAR: - vcpu->arch.tar = set_reg_val(reg->id, val); + vcpu->arch.tar = set_reg_val(id, *val); break; case KVM_REG_PPC_EBBHR: - vcpu->arch.ebbhr = set_reg_val(reg->id, val); + vcpu->arch.ebbhr = set_reg_val(id, *val); break; case KVM_REG_PPC_EBBRR: - vcpu->arch.ebbrr = set_reg_val(reg->id, val); + vcpu->arch.ebbrr = set_reg_val(id, *val); break; case KVM_REG_PPC_BESCR: - vcpu->arch.bescr = set_reg_val(reg->id, val); + vcpu->arch.bescr = set_reg_val(id, *val); + break; + case KVM_REG_PPC_VTB: + vcpu->arch.vtb = set_reg_val(id, *val); + break; + case KVM_REG_PPC_IC: + vcpu->arch.ic = set_reg_val(id, *val); break; default: r = -EINVAL; @@ -789,13 +715,12 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg) { - return -EINVAL; + vcpu->guest_debug = dbg->control; + return 0; } -void kvmppc_decrementer_func(unsigned long data) +void kvmppc_decrementer_func(struct kvm_vcpu *vcpu) { - struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data; - kvmppc_core_queue_dec(vcpu); kvm_vcpu_kick(vcpu); } @@ -862,9 +787,9 @@ int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end) return kvm->arch.kvm_ops->unmap_hva_range(kvm, start, end); } -int kvm_age_hva(struct kvm *kvm, unsigned long hva) +int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end) { - return kvm->arch.kvm_ops->age_hva(kvm, hva); + return kvm->arch.kvm_ops->age_hva(kvm, start, end); } int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) @@ -913,6 +838,11 @@ int kvmppc_core_check_processor_compat(void) return 0; } +int kvmppc_book3s_hcall_implemented(struct kvm *kvm, unsigned long hcall) +{ + return kvm->arch.kvm_ops->hcall_implemented(hcall); +} + static int kvmppc_book3s_init(void) { int r; diff --git a/arch/powerpc/kvm/book3s.h b/arch/powerpc/kvm/book3s.h index 4bf956cf94d..d2b3ec088b8 100644 --- a/arch/powerpc/kvm/book3s.h +++ b/arch/powerpc/kvm/book3s.h @@ -17,7 +17,8 @@ extern void kvmppc_core_flush_memslot_hv(struct kvm *kvm, extern int kvm_unmap_hva_hv(struct kvm *kvm, unsigned long hva); extern int kvm_unmap_hva_range_hv(struct kvm *kvm, unsigned long start, unsigned long end); -extern int kvm_age_hva_hv(struct kvm *kvm, unsigned long hva); +extern int kvm_age_hva_hv(struct kvm *kvm, unsigned long start, + unsigned long end); extern int kvm_test_age_hva_hv(struct kvm *kvm, unsigned long hva); extern void kvm_set_spte_hva_hv(struct kvm *kvm, unsigned long hva, pte_t pte); diff --git a/arch/powerpc/kvm/book3s_32_mmu.c b/arch/powerpc/kvm/book3s_32_mmu.c index 93503bbdae4..cd0b0730e29 100644 --- a/arch/powerpc/kvm/book3s_32_mmu.c +++ b/arch/powerpc/kvm/book3s_32_mmu.c @@ -335,7 +335,7 @@ static int kvmppc_mmu_book3s_32_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, if (r < 0) r = kvmppc_mmu_book3s_32_xlate_pte(vcpu, eaddr, pte, data, iswrite, true); - if (r < 0) + if (r == -ENOENT) r = kvmppc_mmu_book3s_32_xlate_pte(vcpu, eaddr, pte, data, iswrite, false); diff --git a/arch/powerpc/kvm/book3s_32_mmu_host.c b/arch/powerpc/kvm/book3s_32_mmu_host.c index 678e7537049..2035d16a926 100644 --- a/arch/powerpc/kvm/book3s_32_mmu_host.c +++ b/arch/powerpc/kvm/book3s_32_mmu_host.c @@ -156,11 +156,10 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte, bool writable; /* Get host physical address for gpa */ - hpaddr = kvmppc_gfn_to_pfn(vcpu, orig_pte->raddr >> PAGE_SHIFT, - iswrite, &writable); + hpaddr = kvmppc_gpa_to_pfn(vcpu, orig_pte->raddr, iswrite, &writable); if (is_error_noslot_pfn(hpaddr)) { - printk(KERN_INFO "Couldn't get guest page for gfn %lx!\n", - orig_pte->eaddr); + printk(KERN_INFO "Couldn't get guest page for gpa %lx!\n", + orig_pte->raddr); r = -EINVAL; goto out; } diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c index 0ac98392f36..b982d925c71 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_host.c +++ b/arch/powerpc/kvm/book3s_64_mmu_host.c @@ -104,9 +104,10 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte, smp_rmb(); /* Get host physical address for gpa */ - pfn = kvmppc_gfn_to_pfn(vcpu, gfn, iswrite, &writable); + pfn = kvmppc_gpa_to_pfn(vcpu, orig_pte->raddr, iswrite, &writable); if (is_error_noslot_pfn(pfn)) { - printk(KERN_INFO "Couldn't get guest page for gfn %lx!\n", gfn); + printk(KERN_INFO "Couldn't get guest page for gpa %lx!\n", + orig_pte->raddr); r = -EINVAL; goto out; } diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 68468d695f1..d40770248b6 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -37,8 +37,6 @@ #include <asm/ppc-opcode.h> #include <asm/cputable.h> -#include "book3s_hv_cma.h" - /* POWER7 has 10-bit LPIDs, PPC970 has 6-bit LPIDs */ #define MAX_LPID_970 63 @@ -64,10 +62,10 @@ long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp) } kvm->arch.hpt_cma_alloc = 0; - VM_BUG_ON(order < KVM_CMA_CHUNK_ORDER); - page = kvm_alloc_hpt(1 << (order - PAGE_SHIFT)); + page = kvm_alloc_hpt(1ul << (order - PAGE_SHIFT)); if (page) { hpt = (unsigned long)pfn_to_kaddr(page_to_pfn(page)); + memset((void *)hpt, 0, (1ul << order)); kvm->arch.hpt_cma_alloc = 1; } @@ -450,7 +448,7 @@ static int kvmppc_mmu_book3s_64_hv_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, unsigned long slb_v; unsigned long pp, key; unsigned long v, gr; - unsigned long *hptep; + __be64 *hptep; int index; int virtmode = vcpu->arch.shregs.msr & (data ? MSR_DR : MSR_IR); @@ -473,13 +471,13 @@ static int kvmppc_mmu_book3s_64_hv_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, preempt_enable(); return -ENOENT; } - hptep = (unsigned long *)(kvm->arch.hpt_virt + (index << 4)); - v = hptep[0] & ~HPTE_V_HVLOCK; + hptep = (__be64 *)(kvm->arch.hpt_virt + (index << 4)); + v = be64_to_cpu(hptep[0]) & ~HPTE_V_HVLOCK; gr = kvm->arch.revmap[index].guest_rpte; /* Unlock the HPTE */ asm volatile("lwsync" : : : "memory"); - hptep[0] = v; + hptep[0] = cpu_to_be64(v); preempt_enable(); gpte->eaddr = eaddr; @@ -530,21 +528,14 @@ static int instruction_is_store(unsigned int instr) static int kvmppc_hv_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu, unsigned long gpa, gva_t ea, int is_store) { - int ret; u32 last_inst; - unsigned long srr0 = kvmppc_get_pc(vcpu); - /* We try to load the last instruction. We don't let - * emulate_instruction do it as it doesn't check what - * kvmppc_ld returns. + /* * If we fail, we just return to the guest and try executing it again. */ - if (vcpu->arch.last_inst == KVM_INST_FETCH_FAILED) { - ret = kvmppc_ld(vcpu, &srr0, sizeof(u32), &last_inst, false); - if (ret != EMULATE_DONE || last_inst == KVM_INST_FETCH_FAILED) - return RESUME_GUEST; - vcpu->arch.last_inst = last_inst; - } + if (kvmppc_get_last_inst(vcpu, INST_GENERIC, &last_inst) != + EMULATE_DONE) + return RESUME_GUEST; /* * WARNING: We do not know for sure whether the instruction we just @@ -558,7 +549,7 @@ static int kvmppc_hv_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu, * we just return and retry the instruction. */ - if (instruction_is_store(kvmppc_get_last_inst(vcpu)) != !!is_store) + if (instruction_is_store(last_inst) != !!is_store) return RESUME_GUEST; /* @@ -583,7 +574,8 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, unsigned long ea, unsigned long dsisr) { struct kvm *kvm = vcpu->kvm; - unsigned long *hptep, hpte[3], r; + unsigned long hpte[3], r; + __be64 *hptep; unsigned long mmu_seq, psize, pte_size; unsigned long gpa_base, gfn_base; unsigned long gpa, gfn, hva, pfn; @@ -606,16 +598,16 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, if (ea != vcpu->arch.pgfault_addr) return RESUME_GUEST; index = vcpu->arch.pgfault_index; - hptep = (unsigned long *)(kvm->arch.hpt_virt + (index << 4)); + hptep = (__be64 *)(kvm->arch.hpt_virt + (index << 4)); rev = &kvm->arch.revmap[index]; preempt_disable(); while (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) cpu_relax(); - hpte[0] = hptep[0] & ~HPTE_V_HVLOCK; - hpte[1] = hptep[1]; + hpte[0] = be64_to_cpu(hptep[0]) & ~HPTE_V_HVLOCK; + hpte[1] = be64_to_cpu(hptep[1]); hpte[2] = r = rev->guest_rpte; asm volatile("lwsync" : : : "memory"); - hptep[0] = hpte[0]; + hptep[0] = cpu_to_be64(hpte[0]); preempt_enable(); if (hpte[0] != vcpu->arch.pgfault_hpte[0] || @@ -731,8 +723,9 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, preempt_disable(); while (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) cpu_relax(); - if ((hptep[0] & ~HPTE_V_HVLOCK) != hpte[0] || hptep[1] != hpte[1] || - rev->guest_rpte != hpte[2]) + if ((be64_to_cpu(hptep[0]) & ~HPTE_V_HVLOCK) != hpte[0] || + be64_to_cpu(hptep[1]) != hpte[1] || + rev->guest_rpte != hpte[2]) /* HPTE has been changed under us; let the guest retry */ goto out_unlock; hpte[0] = (hpte[0] & ~HPTE_V_ABSENT) | HPTE_V_VALID; @@ -752,20 +745,20 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, rcbits = *rmap >> KVMPPC_RMAP_RC_SHIFT; r &= rcbits | ~(HPTE_R_R | HPTE_R_C); - if (hptep[0] & HPTE_V_VALID) { + if (be64_to_cpu(hptep[0]) & HPTE_V_VALID) { /* HPTE was previously valid, so we need to invalidate it */ unlock_rmap(rmap); - hptep[0] |= HPTE_V_ABSENT; + hptep[0] |= cpu_to_be64(HPTE_V_ABSENT); kvmppc_invalidate_hpte(kvm, hptep, index); /* don't lose previous R and C bits */ - r |= hptep[1] & (HPTE_R_R | HPTE_R_C); + r |= be64_to_cpu(hptep[1]) & (HPTE_R_R | HPTE_R_C); } else { kvmppc_add_revmap_chain(kvm, rev, rmap, index, 0); } - hptep[1] = r; + hptep[1] = cpu_to_be64(r); eieio(); - hptep[0] = hpte[0]; + hptep[0] = cpu_to_be64(hpte[0]); asm volatile("ptesync" : : : "memory"); preempt_enable(); if (page && hpte_is_writable(r)) @@ -784,7 +777,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, return ret; out_unlock: - hptep[0] &= ~HPTE_V_HVLOCK; + hptep[0] &= ~cpu_to_be64(HPTE_V_HVLOCK); preempt_enable(); goto out_put; } @@ -860,7 +853,7 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, { struct revmap_entry *rev = kvm->arch.revmap; unsigned long h, i, j; - unsigned long *hptep; + __be64 *hptep; unsigned long ptel, psize, rcbits; for (;;) { @@ -876,11 +869,11 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, * rmap chain lock. */ i = *rmapp & KVMPPC_RMAP_INDEX; - hptep = (unsigned long *) (kvm->arch.hpt_virt + (i << 4)); + hptep = (__be64 *) (kvm->arch.hpt_virt + (i << 4)); if (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) { /* unlock rmap before spinning on the HPTE lock */ unlock_rmap(rmapp); - while (hptep[0] & HPTE_V_HVLOCK) + while (be64_to_cpu(hptep[0]) & HPTE_V_HVLOCK) cpu_relax(); continue; } @@ -899,14 +892,14 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, /* Now check and modify the HPTE */ ptel = rev[i].guest_rpte; - psize = hpte_page_size(hptep[0], ptel); - if ((hptep[0] & HPTE_V_VALID) && + psize = hpte_page_size(be64_to_cpu(hptep[0]), ptel); + if ((be64_to_cpu(hptep[0]) & HPTE_V_VALID) && hpte_rpn(ptel, psize) == gfn) { if (kvm->arch.using_mmu_notifiers) - hptep[0] |= HPTE_V_ABSENT; + hptep[0] |= cpu_to_be64(HPTE_V_ABSENT); kvmppc_invalidate_hpte(kvm, hptep, i); /* Harvest R and C */ - rcbits = hptep[1] & (HPTE_R_R | HPTE_R_C); + rcbits = be64_to_cpu(hptep[1]) & (HPTE_R_R | HPTE_R_C); *rmapp |= rcbits << KVMPPC_RMAP_RC_SHIFT; if (rcbits & ~rev[i].guest_rpte) { rev[i].guest_rpte = ptel | rcbits; @@ -914,7 +907,7 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, } } unlock_rmap(rmapp); - hptep[0] &= ~HPTE_V_HVLOCK; + hptep[0] &= ~cpu_to_be64(HPTE_V_HVLOCK); } return 0; } @@ -961,7 +954,7 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, { struct revmap_entry *rev = kvm->arch.revmap; unsigned long head, i, j; - unsigned long *hptep; + __be64 *hptep; int ret = 0; retry: @@ -977,23 +970,24 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, i = head = *rmapp & KVMPPC_RMAP_INDEX; do { - hptep = (unsigned long *) (kvm->arch.hpt_virt + (i << 4)); + hptep = (__be64 *) (kvm->arch.hpt_virt + (i << 4)); j = rev[i].forw; /* If this HPTE isn't referenced, ignore it */ - if (!(hptep[1] & HPTE_R_R)) + if (!(be64_to_cpu(hptep[1]) & HPTE_R_R)) continue; if (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) { /* unlock rmap before spinning on the HPTE lock */ unlock_rmap(rmapp); - while (hptep[0] & HPTE_V_HVLOCK) + while (be64_to_cpu(hptep[0]) & HPTE_V_HVLOCK) cpu_relax(); goto retry; } /* Now check and modify the HPTE */ - if ((hptep[0] & HPTE_V_VALID) && (hptep[1] & HPTE_R_R)) { + if ((be64_to_cpu(hptep[0]) & HPTE_V_VALID) && + (be64_to_cpu(hptep[1]) & HPTE_R_R)) { kvmppc_clear_ref_hpte(kvm, hptep, i); if (!(rev[i].guest_rpte & HPTE_R_R)) { rev[i].guest_rpte |= HPTE_R_R; @@ -1001,18 +995,18 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, } ret = 1; } - hptep[0] &= ~HPTE_V_HVLOCK; + hptep[0] &= ~cpu_to_be64(HPTE_V_HVLOCK); } while ((i = j) != head); unlock_rmap(rmapp); return ret; } -int kvm_age_hva_hv(struct kvm *kvm, unsigned long hva) +int kvm_age_hva_hv(struct kvm *kvm, unsigned long start, unsigned long end) { if (!kvm->arch.using_mmu_notifiers) return 0; - return kvm_handle_hva(kvm, hva, kvm_age_rmapp); + return kvm_handle_hva_range(kvm, start, end, kvm_age_rmapp); } static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp, @@ -1035,7 +1029,7 @@ static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp, do { hp = (unsigned long *)(kvm->arch.hpt_virt + (i << 4)); j = rev[i].forw; - if (hp[1] & HPTE_R_R) + if (be64_to_cpu(hp[1]) & HPTE_R_R) goto out; } while ((i = j) != head); } @@ -1075,7 +1069,7 @@ static int kvm_test_clear_dirty_npages(struct kvm *kvm, unsigned long *rmapp) unsigned long head, i, j; unsigned long n; unsigned long v, r; - unsigned long *hptep; + __be64 *hptep; int npages_dirty = 0; retry: @@ -1091,7 +1085,8 @@ static int kvm_test_clear_dirty_npages(struct kvm *kvm, unsigned long *rmapp) i = head = *rmapp & KVMPPC_RMAP_INDEX; do { - hptep = (unsigned long *) (kvm->arch.hpt_virt + (i << 4)); + unsigned long hptep1; + hptep = (__be64 *) (kvm->arch.hpt_virt + (i << 4)); j = rev[i].forw; /* @@ -1108,29 +1103,30 @@ static int kvm_test_clear_dirty_npages(struct kvm *kvm, unsigned long *rmapp) * Otherwise we need to do the tlbie even if C==0 in * order to pick up any delayed writeback of C. */ - if (!(hptep[1] & HPTE_R_C) && - (!hpte_is_writable(hptep[1]) || vcpus_running(kvm))) + hptep1 = be64_to_cpu(hptep[1]); + if (!(hptep1 & HPTE_R_C) && + (!hpte_is_writable(hptep1) || vcpus_running(kvm))) continue; if (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) { /* unlock rmap before spinning on the HPTE lock */ unlock_rmap(rmapp); - while (hptep[0] & HPTE_V_HVLOCK) + while (hptep[0] & cpu_to_be64(HPTE_V_HVLOCK)) cpu_relax(); goto retry; } /* Now check and modify the HPTE */ - if (!(hptep[0] & HPTE_V_VALID)) + if (!(hptep[0] & cpu_to_be64(HPTE_V_VALID))) continue; /* need to make it temporarily absent so C is stable */ - hptep[0] |= HPTE_V_ABSENT; + hptep[0] |= cpu_to_be64(HPTE_V_ABSENT); kvmppc_invalidate_hpte(kvm, hptep, i); - v = hptep[0]; - r = hptep[1]; + v = be64_to_cpu(hptep[0]); + r = be64_to_cpu(hptep[1]); if (r & HPTE_R_C) { - hptep[1] = r & ~HPTE_R_C; + hptep[1] = cpu_to_be64(r & ~HPTE_R_C); if (!(rev[i].guest_rpte & HPTE_R_C)) { rev[i].guest_rpte |= HPTE_R_C; note_hpte_modification(kvm, &rev[i]); @@ -1143,7 +1139,7 @@ static int kvm_test_clear_dirty_npages(struct kvm *kvm, unsigned long *rmapp) } v &= ~(HPTE_V_ABSENT | HPTE_V_HVLOCK); v |= HPTE_V_VALID; - hptep[0] = v; + hptep[0] = cpu_to_be64(v); } while ((i = j) != head); unlock_rmap(rmapp); @@ -1307,7 +1303,7 @@ struct kvm_htab_ctx { * Returns 1 if this HPT entry has been modified or has pending * R/C bit changes. */ -static int hpte_dirty(struct revmap_entry *revp, unsigned long *hptp) +static int hpte_dirty(struct revmap_entry *revp, __be64 *hptp) { unsigned long rcbits_unset; @@ -1316,13 +1312,14 @@ static int hpte_dirty(struct revmap_entry *revp, unsigned long *hptp) /* Also need to consider changes in reference and changed bits */ rcbits_unset = ~revp->guest_rpte & (HPTE_R_R | HPTE_R_C); - if ((hptp[0] & HPTE_V_VALID) && (hptp[1] & rcbits_unset)) + if ((be64_to_cpu(hptp[0]) & HPTE_V_VALID) && + (be64_to_cpu(hptp[1]) & rcbits_unset)) return 1; return 0; } -static long record_hpte(unsigned long flags, unsigned long *hptp, +static long record_hpte(unsigned long flags, __be64 *hptp, unsigned long *hpte, struct revmap_entry *revp, int want_valid, int first_pass) { @@ -1337,10 +1334,10 @@ static long record_hpte(unsigned long flags, unsigned long *hptp, return 0; valid = 0; - if (hptp[0] & (HPTE_V_VALID | HPTE_V_ABSENT)) { + if (be64_to_cpu(hptp[0]) & (HPTE_V_VALID | HPTE_V_ABSENT)) { valid = 1; if ((flags & KVM_GET_HTAB_BOLTED_ONLY) && - !(hptp[0] & HPTE_V_BOLTED)) + !(be64_to_cpu(hptp[0]) & HPTE_V_BOLTED)) valid = 0; } if (valid != want_valid) @@ -1352,7 +1349,7 @@ static long record_hpte(unsigned long flags, unsigned long *hptp, preempt_disable(); while (!try_lock_hpte(hptp, HPTE_V_HVLOCK)) cpu_relax(); - v = hptp[0]; + v = be64_to_cpu(hptp[0]); /* re-evaluate valid and dirty from synchronized HPTE value */ valid = !!(v & HPTE_V_VALID); @@ -1360,9 +1357,9 @@ static long record_hpte(unsigned long flags, unsigned long *hptp, /* Harvest R and C into guest view if necessary */ rcbits_unset = ~revp->guest_rpte & (HPTE_R_R | HPTE_R_C); - if (valid && (rcbits_unset & hptp[1])) { - revp->guest_rpte |= (hptp[1] & (HPTE_R_R | HPTE_R_C)) | - HPTE_GR_MODIFIED; + if (valid && (rcbits_unset & be64_to_cpu(hptp[1]))) { + revp->guest_rpte |= (be64_to_cpu(hptp[1]) & + (HPTE_R_R | HPTE_R_C)) | HPTE_GR_MODIFIED; dirty = 1; } @@ -1381,13 +1378,13 @@ static long record_hpte(unsigned long flags, unsigned long *hptp, revp->guest_rpte = r; } asm volatile(PPC_RELEASE_BARRIER "" : : : "memory"); - hptp[0] &= ~HPTE_V_HVLOCK; + hptp[0] &= ~cpu_to_be64(HPTE_V_HVLOCK); preempt_enable(); if (!(valid == want_valid && (first_pass || dirty))) ok = 0; } - hpte[0] = v; - hpte[1] = r; + hpte[0] = cpu_to_be64(v); + hpte[1] = cpu_to_be64(r); return ok; } @@ -1397,7 +1394,7 @@ static ssize_t kvm_htab_read(struct file *file, char __user *buf, struct kvm_htab_ctx *ctx = file->private_data; struct kvm *kvm = ctx->kvm; struct kvm_get_htab_header hdr; - unsigned long *hptp; + __be64 *hptp; struct revmap_entry *revp; unsigned long i, nb, nw; unsigned long __user *lbuf; @@ -1413,7 +1410,7 @@ static ssize_t kvm_htab_read(struct file *file, char __user *buf, flags = ctx->flags; i = ctx->index; - hptp = (unsigned long *)(kvm->arch.hpt_virt + (i * HPTE_SIZE)); + hptp = (__be64 *)(kvm->arch.hpt_virt + (i * HPTE_SIZE)); revp = kvm->arch.revmap + i; lbuf = (unsigned long __user *)buf; @@ -1497,7 +1494,7 @@ static ssize_t kvm_htab_write(struct file *file, const char __user *buf, unsigned long i, j; unsigned long v, r; unsigned long __user *lbuf; - unsigned long *hptp; + __be64 *hptp; unsigned long tmp[2]; ssize_t nb; long int err, ret; @@ -1539,7 +1536,7 @@ static ssize_t kvm_htab_write(struct file *file, const char __user *buf, i + hdr.n_valid + hdr.n_invalid > kvm->arch.hpt_npte) break; - hptp = (unsigned long *)(kvm->arch.hpt_virt + (i * HPTE_SIZE)); + hptp = (__be64 *)(kvm->arch.hpt_virt + (i * HPTE_SIZE)); lbuf = (unsigned long __user *)buf; for (j = 0; j < hdr.n_valid; ++j) { err = -EFAULT; @@ -1551,7 +1548,7 @@ static ssize_t kvm_htab_write(struct file *file, const char __user *buf, lbuf += 2; nb += HPTE_SIZE; - if (hptp[0] & (HPTE_V_VALID | HPTE_V_ABSENT)) + if (be64_to_cpu(hptp[0]) & (HPTE_V_VALID | HPTE_V_ABSENT)) kvmppc_do_h_remove(kvm, 0, i, 0, tmp); err = -EIO; ret = kvmppc_virtmode_do_h_enter(kvm, H_EXACT, i, v, r, @@ -1577,7 +1574,7 @@ static ssize_t kvm_htab_write(struct file *file, const char __user *buf, } for (j = 0; j < hdr.n_invalid; ++j) { - if (hptp[0] & (HPTE_V_VALID | HPTE_V_ABSENT)) + if (be64_to_cpu(hptp[0]) & (HPTE_V_VALID | HPTE_V_ABSENT)) kvmppc_do_h_remove(kvm, 0, i, 0, tmp); ++i; hptp += 2; diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c index 3f295269af3..5a2bc4b0dfe 100644 --- a/arch/powerpc/kvm/book3s_emulate.c +++ b/arch/powerpc/kvm/book3s_emulate.c @@ -439,12 +439,6 @@ int kvmppc_core_emulate_mtspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val) (mfmsr() & MSR_HV)) vcpu->arch.hflags |= BOOK3S_HFLAG_DCBZ32; break; - case SPRN_PURR: - to_book3s(vcpu)->purr_offset = spr_val - get_tb(); - break; - case SPRN_SPURR: - to_book3s(vcpu)->spurr_offset = spr_val - get_tb(); - break; case SPRN_GQR0: case SPRN_GQR1: case SPRN_GQR2: @@ -455,10 +449,10 @@ int kvmppc_core_emulate_mtspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val) case SPRN_GQR7: to_book3s(vcpu)->gqr[sprn - SPRN_GQR0] = spr_val; break; +#ifdef CONFIG_PPC_BOOK3S_64 case SPRN_FSCR: - vcpu->arch.fscr = spr_val; + kvmppc_set_fscr(vcpu, spr_val); break; -#ifdef CONFIG_PPC_BOOK3S_64 case SPRN_BESCR: vcpu->arch.bescr = spr_val; break; @@ -572,10 +566,22 @@ int kvmppc_core_emulate_mfspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val *spr_val = 0; break; case SPRN_PURR: - *spr_val = get_tb() + to_book3s(vcpu)->purr_offset; + /* + * On exit we would have updated purr + */ + *spr_val = vcpu->arch.purr; break; case SPRN_SPURR: - *spr_val = get_tb() + to_book3s(vcpu)->purr_offset; + /* + * On exit we would have updated spurr + */ + *spr_val = vcpu->arch.spurr; + break; + case SPRN_VTB: + *spr_val = vcpu->arch.vtb; + break; + case SPRN_IC: + *spr_val = vcpu->arch.ic; break; case SPRN_GQR0: case SPRN_GQR1: @@ -587,10 +593,10 @@ int kvmppc_core_emulate_mfspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val case SPRN_GQR7: *spr_val = to_book3s(vcpu)->gqr[sprn - SPRN_GQR0]; break; +#ifdef CONFIG_PPC_BOOK3S_64 case SPRN_FSCR: *spr_val = vcpu->arch.fscr; break; -#ifdef CONFIG_PPC_BOOK3S_64 case SPRN_BESCR: *spr_val = vcpu->arch.bescr; break; diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 7a12edbb61e..e63587d30b7 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -35,6 +35,7 @@ #include <asm/reg.h> #include <asm/cputable.h> +#include <asm/cache.h> #include <asm/cacheflush.h> #include <asm/tlbflush.h> #include <asm/uaccess.h> @@ -67,6 +68,15 @@ /* Used as a "null" value for timebase values */ #define TB_NIL (~(u64)0) +static DECLARE_BITMAP(default_enabled_hcalls, MAX_HCALL_OPCODE/4 + 1); + +#if defined(CONFIG_PPC_64K_PAGES) +#define MPP_BUFFER_ORDER 0 +#elif defined(CONFIG_PPC_4K_PAGES) +#define MPP_BUFFER_ORDER 3 +#endif + + static void kvmppc_end_cede(struct kvm_vcpu *vcpu); static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu); @@ -270,7 +280,7 @@ struct kvm_vcpu *kvmppc_find_vcpu(struct kvm *kvm, int id) static void init_vpa(struct kvm_vcpu *vcpu, struct lppaca *vpa) { vpa->__old_status |= LPPACA_OLD_SHARED_PROC; - vpa->yield_count = 1; + vpa->yield_count = cpu_to_be32(1); } static int set_vpa(struct kvm_vcpu *vcpu, struct kvmppc_vpa *v, @@ -293,8 +303,8 @@ static int set_vpa(struct kvm_vcpu *vcpu, struct kvmppc_vpa *v, struct reg_vpa { u32 dummy; union { - u16 hword; - u32 word; + __be16 hword; + __be32 word; } length; }; @@ -333,9 +343,9 @@ static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu, if (va == NULL) return H_PARAMETER; if (subfunc == H_VPA_REG_VPA) - len = ((struct reg_vpa *)va)->length.hword; + len = be16_to_cpu(((struct reg_vpa *)va)->length.hword); else - len = ((struct reg_vpa *)va)->length.word; + len = be32_to_cpu(((struct reg_vpa *)va)->length.word); kvmppc_unpin_guest_page(kvm, va, vpa, false); /* Check length */ @@ -540,21 +550,63 @@ static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu, return; memset(dt, 0, sizeof(struct dtl_entry)); dt->dispatch_reason = 7; - dt->processor_id = vc->pcpu + vcpu->arch.ptid; - dt->timebase = now + vc->tb_offset; - dt->enqueue_to_dispatch_time = stolen; - dt->srr0 = kvmppc_get_pc(vcpu); - dt->srr1 = vcpu->arch.shregs.msr; + dt->processor_id = cpu_to_be16(vc->pcpu + vcpu->arch.ptid); + dt->timebase = cpu_to_be64(now + vc->tb_offset); + dt->enqueue_to_dispatch_time = cpu_to_be32(stolen); + dt->srr0 = cpu_to_be64(kvmppc_get_pc(vcpu)); + dt->srr1 = cpu_to_be64(vcpu->arch.shregs.msr); ++dt; if (dt == vcpu->arch.dtl.pinned_end) dt = vcpu->arch.dtl.pinned_addr; vcpu->arch.dtl_ptr = dt; /* order writing *dt vs. writing vpa->dtl_idx */ smp_wmb(); - vpa->dtl_idx = ++vcpu->arch.dtl_index; + vpa->dtl_idx = cpu_to_be64(++vcpu->arch.dtl_index); vcpu->arch.dtl.dirty = true; } +static bool kvmppc_power8_compatible(struct kvm_vcpu *vcpu) +{ + if (vcpu->arch.vcore->arch_compat >= PVR_ARCH_207) + return true; + if ((!vcpu->arch.vcore->arch_compat) && + cpu_has_feature(CPU_FTR_ARCH_207S)) + return true; + return false; +} + +static int kvmppc_h_set_mode(struct kvm_vcpu *vcpu, unsigned long mflags, + unsigned long resource, unsigned long value1, + unsigned long value2) +{ + switch (resource) { + case H_SET_MODE_RESOURCE_SET_CIABR: + if (!kvmppc_power8_compatible(vcpu)) + return H_P2; + if (value2) + return H_P4; + if (mflags) + return H_UNSUPPORTED_FLAG_START; + /* Guests can't breakpoint the hypervisor */ + if ((value1 & CIABR_PRIV) == CIABR_PRIV_HYPER) + return H_P3; + vcpu->arch.ciabr = value1; + return H_SUCCESS; + case H_SET_MODE_RESOURCE_SET_DAWR: + if (!kvmppc_power8_compatible(vcpu)) + return H_P2; + if (mflags) + return H_UNSUPPORTED_FLAG_START; + if (value2 & DABRX_HYP) + return H_P4; + vcpu->arch.dawr = value1; + vcpu->arch.dawrx = value2; + return H_SUCCESS; + default: + return H_TOO_HARD; + } +} + int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) { unsigned long req = kvmppc_get_gpr(vcpu, 3); @@ -562,6 +614,10 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) struct kvm_vcpu *tvcpu; int idx, rc; + if (req <= MAX_HCALL_OPCODE && + !test_bit(req/4, vcpu->kvm->arch.enabled_hcalls)) + return RESUME_HOST; + switch (req) { case H_ENTER: idx = srcu_read_lock(&vcpu->kvm->srcu); @@ -620,7 +676,14 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) /* Send the error out to userspace via KVM_RUN */ return rc; - + case H_SET_MODE: + ret = kvmppc_h_set_mode(vcpu, kvmppc_get_gpr(vcpu, 4), + kvmppc_get_gpr(vcpu, 5), + kvmppc_get_gpr(vcpu, 6), + kvmppc_get_gpr(vcpu, 7)); + if (ret == H_TOO_HARD) + return RESUME_HOST; + break; case H_XIRR: case H_CPPR: case H_EOI: @@ -639,6 +702,53 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) return RESUME_GUEST; } +static int kvmppc_hcall_impl_hv(unsigned long cmd) +{ + switch (cmd) { + case H_CEDE: + case H_PROD: + case H_CONFER: + case H_REGISTER_VPA: + case H_SET_MODE: +#ifdef CONFIG_KVM_XICS + case H_XIRR: + case H_CPPR: + case H_EOI: + case H_IPI: + case H_IPOLL: + case H_XIRR_X: +#endif + return 1; + } + + /* See if it's in the real-mode table */ + return kvmppc_hcall_impl_hv_realmode(cmd); +} + +static int kvmppc_emulate_debug_inst(struct kvm_run *run, + struct kvm_vcpu *vcpu) +{ + u32 last_inst; + + if (kvmppc_get_last_inst(vcpu, INST_GENERIC, &last_inst) != + EMULATE_DONE) { + /* + * Fetch failed, so return to guest and + * try executing it again. + */ + return RESUME_GUEST; + } + + if (last_inst == KVMPPC_INST_SW_BREAKPOINT) { + run->exit_reason = KVM_EXIT_DEBUG; + run->debug.arch.address = kvmppc_get_pc(vcpu); + return RESUME_HOST; + } else { + kvmppc_core_queue_program(vcpu, SRR1_PROGILL); + return RESUME_GUEST; + } +} + static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu, struct task_struct *tsk) { @@ -721,12 +831,18 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu, break; /* * This occurs if the guest executes an illegal instruction. - * We just generate a program interrupt to the guest, since - * we don't emulate any guest instructions at this stage. + * If the guest debug is disabled, generate a program interrupt + * to the guest. If guest debug is enabled, we need to check + * whether the instruction is a software breakpoint instruction. + * Accordingly return to Guest or Host. */ case BOOK3S_INTERRUPT_H_EMUL_ASSIST: - kvmppc_core_queue_program(vcpu, SRR1_PROGILL); - r = RESUME_GUEST; + if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) { + r = kvmppc_emulate_debug_inst(run, vcpu); + } else { + kvmppc_core_queue_program(vcpu, SRR1_PROGILL); + r = RESUME_GUEST; + } break; /* * This occurs if the guest (kernel or userspace), does something that @@ -770,7 +886,9 @@ static int kvm_arch_vcpu_ioctl_set_sregs_hv(struct kvm_vcpu *vcpu, { int i, j; - kvmppc_set_pvr_hv(vcpu, sregs->pvr); + /* Only accept the same PVR as the host's, since we can't spoof it */ + if (sregs->pvr != vcpu->arch.pvr) + return -EINVAL; j = 0; for (i = 0; i < vcpu->arch.slb_nr; i++) { @@ -785,7 +903,8 @@ static int kvm_arch_vcpu_ioctl_set_sregs_hv(struct kvm_vcpu *vcpu, return 0; } -static void kvmppc_set_lpcr(struct kvm_vcpu *vcpu, u64 new_lpcr) +static void kvmppc_set_lpcr(struct kvm_vcpu *vcpu, u64 new_lpcr, + bool preserve_top32) { struct kvmppc_vcore *vc = vcpu->arch.vcore; u64 mask; @@ -820,6 +939,10 @@ static void kvmppc_set_lpcr(struct kvm_vcpu *vcpu, u64 new_lpcr) mask = LPCR_DPFD | LPCR_ILE | LPCR_TC; if (cpu_has_feature(CPU_FTR_ARCH_207S)) mask |= LPCR_AIL; + + /* Broken 32-bit version of LPCR must not clear top bits */ + if (preserve_top32) + mask &= 0xFFFFFFFF; vc->lpcr = (vc->lpcr & ~mask) | (new_lpcr & mask); spin_unlock(&vc->lock); } @@ -831,6 +954,9 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, long int i; switch (id) { + case KVM_REG_PPC_DEBUG_INST: + *val = get_reg_val(id, KVMPPC_INST_SW_BREAKPOINT); + break; case KVM_REG_PPC_HIOR: *val = get_reg_val(id, 0); break; @@ -894,12 +1020,6 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, case KVM_REG_PPC_CIABR: *val = get_reg_val(id, vcpu->arch.ciabr); break; - case KVM_REG_PPC_IC: - *val = get_reg_val(id, vcpu->arch.ic); - break; - case KVM_REG_PPC_VTB: - *val = get_reg_val(id, vcpu->arch.vtb); - break; case KVM_REG_PPC_CSIGR: *val = get_reg_val(id, vcpu->arch.csigr); break; @@ -939,6 +1059,7 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, *val = get_reg_val(id, vcpu->arch.vcore->tb_offset); break; case KVM_REG_PPC_LPCR: + case KVM_REG_PPC_LPCR_64: *val = get_reg_val(id, vcpu->arch.vcore->lpcr); break; case KVM_REG_PPC_PPR: @@ -1094,12 +1215,6 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, if ((vcpu->arch.ciabr & CIABR_PRIV) == CIABR_PRIV_HYPER) vcpu->arch.ciabr &= ~CIABR_PRIV; /* disable */ break; - case KVM_REG_PPC_IC: - vcpu->arch.ic = set_reg_val(id, *val); - break; - case KVM_REG_PPC_VTB: - vcpu->arch.vtb = set_reg_val(id, *val); - break; case KVM_REG_PPC_CSIGR: vcpu->arch.csigr = set_reg_val(id, *val); break; @@ -1150,7 +1265,10 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, ALIGN(set_reg_val(id, *val), 1UL << 24); break; case KVM_REG_PPC_LPCR: - kvmppc_set_lpcr(vcpu, set_reg_val(id, *val)); + kvmppc_set_lpcr(vcpu, set_reg_val(id, *val), true); + break; + case KVM_REG_PPC_LPCR_64: + kvmppc_set_lpcr(vcpu, set_reg_val(id, *val), false); break; case KVM_REG_PPC_PPR: vcpu->arch.ppr = set_reg_val(id, *val); @@ -1228,6 +1346,33 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, return r; } +static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int core) +{ + struct kvmppc_vcore *vcore; + + vcore = kzalloc(sizeof(struct kvmppc_vcore), GFP_KERNEL); + + if (vcore == NULL) + return NULL; + + INIT_LIST_HEAD(&vcore->runnable_threads); + spin_lock_init(&vcore->lock); + init_waitqueue_head(&vcore->wq); + vcore->preempt_tb = TB_NIL; + vcore->lpcr = kvm->arch.lpcr; + vcore->first_vcpuid = core * threads_per_subcore; + vcore->kvm = kvm; + + vcore->mpp_buffer_is_valid = false; + + if (cpu_has_feature(CPU_FTR_ARCH_207S)) + vcore->mpp_buffer = (void *)__get_free_pages( + GFP_KERNEL|__GFP_ZERO, + MPP_BUFFER_ORDER); + + return vcore; +} + static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm, unsigned int id) { @@ -1279,16 +1424,7 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm, mutex_lock(&kvm->lock); vcore = kvm->arch.vcores[core]; if (!vcore) { - vcore = kzalloc(sizeof(struct kvmppc_vcore), GFP_KERNEL); - if (vcore) { - INIT_LIST_HEAD(&vcore->runnable_threads); - spin_lock_init(&vcore->lock); - init_waitqueue_head(&vcore->wq); - vcore->preempt_tb = TB_NIL; - vcore->lpcr = kvm->arch.lpcr; - vcore->first_vcpuid = core * threads_per_subcore; - vcore->kvm = kvm; - } + vcore = kvmppc_vcore_create(kvm, core); kvm->arch.vcores[core] = vcore; kvm->arch.online_vcores++; } @@ -1388,7 +1524,7 @@ static void kvmppc_remove_runnable(struct kvmppc_vcore *vc, static int kvmppc_grab_hwthread(int cpu) { struct paca_struct *tpaca; - long timeout = 1000; + long timeout = 10000; tpaca = &paca[cpu]; @@ -1500,6 +1636,33 @@ static int on_primary_thread(void) return 1; } +static void kvmppc_start_saving_l2_cache(struct kvmppc_vcore *vc) +{ + phys_addr_t phy_addr, mpp_addr; + + phy_addr = (phys_addr_t)virt_to_phys(vc->mpp_buffer); + mpp_addr = phy_addr & PPC_MPPE_ADDRESS_MASK; + + mtspr(SPRN_MPPR, mpp_addr | PPC_MPPR_FETCH_ABORT); + logmpp(mpp_addr | PPC_LOGMPP_LOG_L2); + + vc->mpp_buffer_is_valid = true; +} + +static void kvmppc_start_restoring_l2_cache(const struct kvmppc_vcore *vc) +{ + phys_addr_t phy_addr, mpp_addr; + + phy_addr = virt_to_phys(vc->mpp_buffer); + mpp_addr = phy_addr & PPC_MPPE_ADDRESS_MASK; + + /* We must abort any in-progress save operations to ensure + * the table is valid so that prefetch engine knows when to + * stop prefetching. */ + logmpp(mpp_addr | PPC_LOGMPP_LOG_ABORT); + mtspr(SPRN_MPPR, mpp_addr | PPC_MPPR_FETCH_WHOLE_TABLE); +} + /* * Run a set of guest threads on a physical core. * Called with vc->lock held. @@ -1577,9 +1740,16 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc) srcu_idx = srcu_read_lock(&vc->kvm->srcu); + if (vc->mpp_buffer_is_valid) + kvmppc_start_restoring_l2_cache(vc); + __kvmppc_vcore_entry(); spin_lock(&vc->lock); + + if (vc->mpp_buffer) + kvmppc_start_saving_l2_cache(vc); + /* disable sending of IPIs on virtual external irqs */ list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) vcpu->cpu = -1; @@ -1929,12 +2099,6 @@ static void kvmppc_add_seg_page_size(struct kvm_ppc_one_seg_page_size **sps, (*sps)->page_shift = def->shift; (*sps)->slb_enc = def->sllp; (*sps)->enc[0].page_shift = def->shift; - /* - * Only return base page encoding. We don't want to return - * all the supporting pte_enc, because our H_ENTER doesn't - * support MPSS yet. Once they do, we can start passing all - * support pte_enc here - */ (*sps)->enc[0].pte_enc = def->penc[linux_psize]; /* * Add 16MB MPSS support if host supports it @@ -2281,6 +2445,10 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm) */ cpumask_setall(&kvm->arch.need_tlb_flush); + /* Start out with the default set of hcalls enabled */ + memcpy(kvm->arch.enabled_hcalls, default_enabled_hcalls, + sizeof(kvm->arch.enabled_hcalls)); + kvm->arch.rma = NULL; kvm->arch.host_sdr1 = mfspr(SPRN_SDR1); @@ -2323,8 +2491,14 @@ static void kvmppc_free_vcores(struct kvm *kvm) { long int i; - for (i = 0; i < KVM_MAX_VCORES; ++i) + for (i = 0; i < KVM_MAX_VCORES; ++i) { + if (kvm->arch.vcores[i] && kvm->arch.vcores[i]->mpp_buffer) { + struct kvmppc_vcore *vc = kvm->arch.vcores[i]; + free_pages((unsigned long)vc->mpp_buffer, + MPP_BUFFER_ORDER); + } kfree(kvm->arch.vcores[i]); + } kvm->arch.online_vcores = 0; } @@ -2419,6 +2593,49 @@ static long kvm_arch_vm_ioctl_hv(struct file *filp, return r; } +/* + * List of hcall numbers to enable by default. + * For compatibility with old userspace, we enable by default + * all hcalls that were implemented before the hcall-enabling + * facility was added. Note this list should not include H_RTAS. + */ +static unsigned int default_hcall_list[] = { + H_REMOVE, + H_ENTER, + H_READ, + H_PROTECT, + H_BULK_REMOVE, + H_GET_TCE, + H_PUT_TCE, + H_SET_DABR, + H_SET_XDABR, + H_CEDE, + H_PROD, + H_CONFER, + H_REGISTER_VPA, +#ifdef CONFIG_KVM_XICS + H_EOI, + H_CPPR, + H_IPI, + H_IPOLL, + H_XIRR, + H_XIRR_X, +#endif + 0 +}; + +static void init_default_hcalls(void) +{ + int i; + unsigned int hcall; + + for (i = 0; default_hcall_list[i]; ++i) { + hcall = default_hcall_list[i]; + WARN_ON(!kvmppc_hcall_impl_hv(hcall)); + __set_bit(hcall / 4, default_enabled_hcalls); + } +} + static struct kvmppc_ops kvm_ops_hv = { .get_sregs = kvm_arch_vcpu_ioctl_get_sregs_hv, .set_sregs = kvm_arch_vcpu_ioctl_set_sregs_hv, @@ -2451,6 +2668,7 @@ static struct kvmppc_ops kvm_ops_hv = { .emulate_mfspr = kvmppc_core_emulate_mfspr_hv, .fast_vcpu_kick = kvmppc_fast_vcpu_kick_hv, .arch_vm_ioctl = kvm_arch_vm_ioctl_hv, + .hcall_implemented = kvmppc_hcall_impl_hv, }; static int kvmppc_book3s_init_hv(void) @@ -2466,6 +2684,8 @@ static int kvmppc_book3s_init_hv(void) kvm_ops_hv.owner = THIS_MODULE; kvmppc_hv_ops = &kvm_ops_hv; + init_default_hcalls(); + r = kvmppc_mmu_hv_init(); return r; } diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index 7cde8a66520..4fdc27c80f4 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -16,12 +16,14 @@ #include <linux/init.h> #include <linux/memblock.h> #include <linux/sizes.h> +#include <linux/cma.h> #include <asm/cputable.h> #include <asm/kvm_ppc.h> #include <asm/kvm_book3s.h> -#include "book3s_hv_cma.h" +#define KVM_CMA_CHUNK_ORDER 18 + /* * Hash page table alignment on newer cpus(CPU_FTR_ARCH_206) * should be power of 2. @@ -43,6 +45,8 @@ static unsigned long kvm_cma_resv_ratio = 5; unsigned long kvm_rma_pages = (1 << 27) >> PAGE_SHIFT; /* 128MB */ EXPORT_SYMBOL_GPL(kvm_rma_pages); +static struct cma *kvm_cma; + /* Work out RMLS (real mode limit selector) field value for a given RMA size. Assumes POWER7 or PPC970. */ static inline int lpcr_rmls(unsigned long rma_size) @@ -97,7 +101,7 @@ struct kvm_rma_info *kvm_alloc_rma() ri = kmalloc(sizeof(struct kvm_rma_info), GFP_KERNEL); if (!ri) return NULL; - page = kvm_alloc_cma(kvm_rma_pages, kvm_rma_pages); + page = cma_alloc(kvm_cma, kvm_rma_pages, order_base_2(kvm_rma_pages)); if (!page) goto err_out; atomic_set(&ri->use_count, 1); @@ -112,7 +116,7 @@ EXPORT_SYMBOL_GPL(kvm_alloc_rma); void kvm_release_rma(struct kvm_rma_info *ri) { if (atomic_dec_and_test(&ri->use_count)) { - kvm_release_cma(pfn_to_page(ri->base_pfn), kvm_rma_pages); + cma_release(kvm_cma, pfn_to_page(ri->base_pfn), kvm_rma_pages); kfree(ri); } } @@ -131,16 +135,18 @@ struct page *kvm_alloc_hpt(unsigned long nr_pages) { unsigned long align_pages = HPT_ALIGN_PAGES; + VM_BUG_ON(order_base_2(nr_pages) < KVM_CMA_CHUNK_ORDER - PAGE_SHIFT); + /* Old CPUs require HPT aligned on a multiple of its size */ if (!cpu_has_feature(CPU_FTR_ARCH_206)) align_pages = nr_pages; - return kvm_alloc_cma(nr_pages, align_pages); + return cma_alloc(kvm_cma, nr_pages, order_base_2(align_pages)); } EXPORT_SYMBOL_GPL(kvm_alloc_hpt); void kvm_release_hpt(struct page *page, unsigned long nr_pages) { - kvm_release_cma(page, nr_pages); + cma_release(kvm_cma, page, nr_pages); } EXPORT_SYMBOL_GPL(kvm_release_hpt); @@ -157,6 +163,12 @@ void __init kvm_cma_reserve(void) unsigned long align_size; struct memblock_region *reg; phys_addr_t selected_size = 0; + + /* + * We need CMA reservation only when we are in HV mode + */ + if (!cpu_has_feature(CPU_FTR_HVMODE)) + return; /* * We cannot use memblock_phys_mem_size() here, because * memblock_analyze() has not been called yet. @@ -179,7 +191,8 @@ void __init kvm_cma_reserve(void) align_size = HPT_ALIGN_PAGES << PAGE_SHIFT; align_size = max(kvm_rma_pages << PAGE_SHIFT, align_size); - kvm_cma_declare_contiguous(selected_size, align_size); + cma_declare_contiguous(0, selected_size, 0, align_size, + KVM_CMA_CHUNK_ORDER - PAGE_SHIFT, false, &kvm_cma); } } @@ -212,3 +225,16 @@ bool kvm_hv_mode_active(void) { return atomic_read(&hv_vm_count) != 0; } + +extern int hcall_real_table[], hcall_real_table_end[]; + +int kvmppc_hcall_impl_hv_realmode(unsigned long cmd) +{ + cmd /= 4; + if (cmd < hcall_real_table_end - hcall_real_table && + hcall_real_table[cmd]) + return 1; + + return 0; +} +EXPORT_SYMBOL_GPL(kvmppc_hcall_impl_hv_realmode); diff --git a/arch/powerpc/kvm/book3s_hv_cma.c b/arch/powerpc/kvm/book3s_hv_cma.c deleted file mode 100644 index d9d3d8553d5..00000000000 --- a/arch/powerpc/kvm/book3s_hv_cma.c +++ /dev/null @@ -1,240 +0,0 @@ -/* - * Contiguous Memory Allocator for ppc KVM hash pagetable based on CMA - * for DMA mapping framework - * - * Copyright IBM Corporation, 2013 - * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of the - * License or (at your optional) any later version of the license. - * - */ -#define pr_fmt(fmt) "kvm_cma: " fmt - -#ifdef CONFIG_CMA_DEBUG -#ifndef DEBUG -# define DEBUG -#endif -#endif - -#include <linux/memblock.h> -#include <linux/mutex.h> -#include <linux/sizes.h> -#include <linux/slab.h> - -#include "book3s_hv_cma.h" - -struct kvm_cma { - unsigned long base_pfn; - unsigned long count; - unsigned long *bitmap; -}; - -static DEFINE_MUTEX(kvm_cma_mutex); -static struct kvm_cma kvm_cma_area; - -/** - * kvm_cma_declare_contiguous() - reserve area for contiguous memory handling - * for kvm hash pagetable - * @size: Size of the reserved memory. - * @alignment: Alignment for the contiguous memory area - * - * This function reserves memory for kvm cma area. It should be - * called by arch code when early allocator (memblock or bootmem) - * is still activate. - */ -long __init kvm_cma_declare_contiguous(phys_addr_t size, phys_addr_t alignment) -{ - long base_pfn; - phys_addr_t addr; - struct kvm_cma *cma = &kvm_cma_area; - - pr_debug("%s(size %lx)\n", __func__, (unsigned long)size); - - if (!size) - return -EINVAL; - /* - * Sanitise input arguments. - * We should be pageblock aligned for CMA. - */ - alignment = max(alignment, (phys_addr_t)(PAGE_SIZE << pageblock_order)); - size = ALIGN(size, alignment); - /* - * Reserve memory - * Use __memblock_alloc_base() since - * memblock_alloc_base() panic()s. - */ - addr = __memblock_alloc_base(size, alignment, 0); - if (!addr) { - base_pfn = -ENOMEM; - goto err; - } else - base_pfn = PFN_DOWN(addr); - - /* - * Each reserved area must be initialised later, when more kernel - * subsystems (like slab allocator) are available. - */ - cma->base_pfn = base_pfn; - cma->count = size >> PAGE_SHIFT; - pr_info("CMA: reserved %ld MiB\n", (unsigned long)size / SZ_1M); - return 0; -err: - pr_err("CMA: failed to reserve %ld MiB\n", (unsigned long)size / SZ_1M); - return base_pfn; -} - -/** - * kvm_alloc_cma() - allocate pages from contiguous area - * @nr_pages: Requested number of pages. - * @align_pages: Requested alignment in number of pages - * - * This function allocates memory buffer for hash pagetable. - */ -struct page *kvm_alloc_cma(unsigned long nr_pages, unsigned long align_pages) -{ - int ret; - struct page *page = NULL; - struct kvm_cma *cma = &kvm_cma_area; - unsigned long chunk_count, nr_chunk; - unsigned long mask, pfn, pageno, start = 0; - - - if (!cma || !cma->count) - return NULL; - - pr_debug("%s(cma %p, count %lu, align pages %lu)\n", __func__, - (void *)cma, nr_pages, align_pages); - - if (!nr_pages) - return NULL; - /* - * align mask with chunk size. The bit tracks pages in chunk size - */ - VM_BUG_ON(!is_power_of_2(align_pages)); - mask = (align_pages >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT)) - 1; - BUILD_BUG_ON(PAGE_SHIFT > KVM_CMA_CHUNK_ORDER); - - chunk_count = cma->count >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT); - nr_chunk = nr_pages >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT); - - mutex_lock(&kvm_cma_mutex); - for (;;) { - pageno = bitmap_find_next_zero_area(cma->bitmap, chunk_count, - start, nr_chunk, mask); - if (pageno >= chunk_count) - break; - - pfn = cma->base_pfn + (pageno << (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT)); - ret = alloc_contig_range(pfn, pfn + nr_pages, MIGRATE_CMA); - if (ret == 0) { - bitmap_set(cma->bitmap, pageno, nr_chunk); - page = pfn_to_page(pfn); - memset(pfn_to_kaddr(pfn), 0, nr_pages << PAGE_SHIFT); - break; - } else if (ret != -EBUSY) { - break; - } - pr_debug("%s(): memory range at %p is busy, retrying\n", - __func__, pfn_to_page(pfn)); - /* try again with a bit different memory target */ - start = pageno + mask + 1; - } - mutex_unlock(&kvm_cma_mutex); - pr_debug("%s(): returned %p\n", __func__, page); - return page; -} - -/** - * kvm_release_cma() - release allocated pages for hash pagetable - * @pages: Allocated pages. - * @nr_pages: Number of allocated pages. - * - * This function releases memory allocated by kvm_alloc_cma(). - * It returns false when provided pages do not belong to contiguous area and - * true otherwise. - */ -bool kvm_release_cma(struct page *pages, unsigned long nr_pages) -{ - unsigned long pfn; - unsigned long nr_chunk; - struct kvm_cma *cma = &kvm_cma_area; - - if (!cma || !pages) - return false; - - pr_debug("%s(page %p count %lu)\n", __func__, (void *)pages, nr_pages); - - pfn = page_to_pfn(pages); - - if (pfn < cma->base_pfn || pfn >= cma->base_pfn + cma->count) - return false; - - VM_BUG_ON(pfn + nr_pages > cma->base_pfn + cma->count); - nr_chunk = nr_pages >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT); - - mutex_lock(&kvm_cma_mutex); - bitmap_clear(cma->bitmap, - (pfn - cma->base_pfn) >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT), - nr_chunk); - free_contig_range(pfn, nr_pages); - mutex_unlock(&kvm_cma_mutex); - - return true; -} - -static int __init kvm_cma_activate_area(unsigned long base_pfn, - unsigned long count) -{ - unsigned long pfn = base_pfn; - unsigned i = count >> pageblock_order; - struct zone *zone; - - WARN_ON_ONCE(!pfn_valid(pfn)); - zone = page_zone(pfn_to_page(pfn)); - do { - unsigned j; - base_pfn = pfn; - for (j = pageblock_nr_pages; j; --j, pfn++) { - WARN_ON_ONCE(!pfn_valid(pfn)); - /* - * alloc_contig_range requires the pfn range - * specified to be in the same zone. Make this - * simple by forcing the entire CMA resv range - * to be in the same zone. - */ - if (page_zone(pfn_to_page(pfn)) != zone) - return -EINVAL; - } - init_cma_reserved_pageblock(pfn_to_page(base_pfn)); - } while (--i); - return 0; -} - -static int __init kvm_cma_init_reserved_areas(void) -{ - int bitmap_size, ret; - unsigned long chunk_count; - struct kvm_cma *cma = &kvm_cma_area; - - pr_debug("%s()\n", __func__); - if (!cma->count) - return 0; - chunk_count = cma->count >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT); - bitmap_size = BITS_TO_LONGS(chunk_count) * sizeof(long); - cma->bitmap = kzalloc(bitmap_size, GFP_KERNEL); - if (!cma->bitmap) - return -ENOMEM; - - ret = kvm_cma_activate_area(cma->base_pfn, cma->count); - if (ret) - goto error; - return 0; - -error: - kfree(cma->bitmap); - return ret; -} -core_initcall(kvm_cma_init_reserved_areas); diff --git a/arch/powerpc/kvm/book3s_hv_cma.h b/arch/powerpc/kvm/book3s_hv_cma.h deleted file mode 100644 index 655144f75fa..00000000000 --- a/arch/powerpc/kvm/book3s_hv_cma.h +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Contiguous Memory Allocator for ppc KVM hash pagetable based on CMA - * for DMA mapping framework - * - * Copyright IBM Corporation, 2013 - * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of the - * License or (at your optional) any later version of the license. - * - */ - -#ifndef __POWERPC_KVM_CMA_ALLOC_H__ -#define __POWERPC_KVM_CMA_ALLOC_H__ -/* - * Both RMA and Hash page allocation will be multiple of 256K. - */ -#define KVM_CMA_CHUNK_ORDER 18 - -extern struct page *kvm_alloc_cma(unsigned long nr_pages, - unsigned long align_pages); -extern bool kvm_release_cma(struct page *pages, unsigned long nr_pages); -extern long kvm_cma_declare_contiguous(phys_addr_t size, - phys_addr_t alignment) __init; -#endif diff --git a/arch/powerpc/kvm/book3s_hv_ras.c b/arch/powerpc/kvm/book3s_hv_ras.c index 3a5c568b1e8..d562c8e2bc3 100644 --- a/arch/powerpc/kvm/book3s_hv_ras.c +++ b/arch/powerpc/kvm/book3s_hv_ras.c @@ -45,14 +45,14 @@ static void reload_slb(struct kvm_vcpu *vcpu) return; /* Sanity check */ - n = min_t(u32, slb->persistent, SLB_MIN_SIZE); + n = min_t(u32, be32_to_cpu(slb->persistent), SLB_MIN_SIZE); if ((void *) &slb->save_area[n] > vcpu->arch.slb_shadow.pinned_end) return; /* Load up the SLB from that */ for (i = 0; i < n; ++i) { - unsigned long rb = slb->save_area[i].esid; - unsigned long rs = slb->save_area[i].vsid; + unsigned long rb = be64_to_cpu(slb->save_area[i].esid); + unsigned long rs = be64_to_cpu(slb->save_area[i].vsid); rb = (rb & ~0xFFFul) | i; /* insert entry number */ asm volatile("slbmte %0,%1" : : "r" (rs), "r" (rb)); diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 5a24d3c2b6b..084ad54c73c 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -154,10 +154,10 @@ static pte_t lookup_linux_pte_and_update(pgd_t *pgdir, unsigned long hva, return kvmppc_read_update_linux_pte(ptep, writing, hugepage_shift); } -static inline void unlock_hpte(unsigned long *hpte, unsigned long hpte_v) +static inline void unlock_hpte(__be64 *hpte, unsigned long hpte_v) { asm volatile(PPC_RELEASE_BARRIER "" : : : "memory"); - hpte[0] = hpte_v; + hpte[0] = cpu_to_be64(hpte_v); } long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, @@ -166,7 +166,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, { unsigned long i, pa, gpa, gfn, psize; unsigned long slot_fn, hva; - unsigned long *hpte; + __be64 *hpte; struct revmap_entry *rev; unsigned long g_ptel; struct kvm_memory_slot *memslot; @@ -275,9 +275,9 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, return H_PARAMETER; if (likely((flags & H_EXACT) == 0)) { pte_index &= ~7UL; - hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4)); + hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4)); for (i = 0; i < 8; ++i) { - if ((*hpte & HPTE_V_VALID) == 0 && + if ((be64_to_cpu(*hpte) & HPTE_V_VALID) == 0 && try_lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID | HPTE_V_ABSENT)) break; @@ -292,11 +292,13 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, */ hpte -= 16; for (i = 0; i < 8; ++i) { + u64 pte; while (!try_lock_hpte(hpte, HPTE_V_HVLOCK)) cpu_relax(); - if (!(*hpte & (HPTE_V_VALID | HPTE_V_ABSENT))) + pte = be64_to_cpu(*hpte); + if (!(pte & (HPTE_V_VALID | HPTE_V_ABSENT))) break; - *hpte &= ~HPTE_V_HVLOCK; + *hpte &= ~cpu_to_be64(HPTE_V_HVLOCK); hpte += 2; } if (i == 8) @@ -304,14 +306,17 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, } pte_index += i; } else { - hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4)); + hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4)); if (!try_lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID | HPTE_V_ABSENT)) { /* Lock the slot and check again */ + u64 pte; + while (!try_lock_hpte(hpte, HPTE_V_HVLOCK)) cpu_relax(); - if (*hpte & (HPTE_V_VALID | HPTE_V_ABSENT)) { - *hpte &= ~HPTE_V_HVLOCK; + pte = be64_to_cpu(*hpte); + if (pte & (HPTE_V_VALID | HPTE_V_ABSENT)) { + *hpte &= ~cpu_to_be64(HPTE_V_HVLOCK); return H_PTEG_FULL; } } @@ -347,11 +352,11 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, } } - hpte[1] = ptel; + hpte[1] = cpu_to_be64(ptel); /* Write the first HPTE dword, unlocking the HPTE and making it valid */ eieio(); - hpte[0] = pteh; + hpte[0] = cpu_to_be64(pteh); asm volatile("ptesync" : : : "memory"); *pte_idx_ret = pte_index; @@ -468,30 +473,35 @@ long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags, unsigned long pte_index, unsigned long avpn, unsigned long *hpret) { - unsigned long *hpte; + __be64 *hpte; unsigned long v, r, rb; struct revmap_entry *rev; + u64 pte; if (pte_index >= kvm->arch.hpt_npte) return H_PARAMETER; - hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4)); + hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4)); while (!try_lock_hpte(hpte, HPTE_V_HVLOCK)) cpu_relax(); - if ((hpte[0] & (HPTE_V_ABSENT | HPTE_V_VALID)) == 0 || - ((flags & H_AVPN) && (hpte[0] & ~0x7fUL) != avpn) || - ((flags & H_ANDCOND) && (hpte[0] & avpn) != 0)) { - hpte[0] &= ~HPTE_V_HVLOCK; + pte = be64_to_cpu(hpte[0]); + if ((pte & (HPTE_V_ABSENT | HPTE_V_VALID)) == 0 || + ((flags & H_AVPN) && (pte & ~0x7fUL) != avpn) || + ((flags & H_ANDCOND) && (pte & avpn) != 0)) { + hpte[0] &= ~cpu_to_be64(HPTE_V_HVLOCK); return H_NOT_FOUND; } rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]); - v = hpte[0] & ~HPTE_V_HVLOCK; + v = pte & ~HPTE_V_HVLOCK; if (v & HPTE_V_VALID) { - hpte[0] &= ~HPTE_V_VALID; - rb = compute_tlbie_rb(v, hpte[1], pte_index); + u64 pte1; + + pte1 = be64_to_cpu(hpte[1]); + hpte[0] &= ~cpu_to_be64(HPTE_V_VALID); + rb = compute_tlbie_rb(v, pte1, pte_index); do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags), true); /* Read PTE low word after tlbie to get final R/C values */ - remove_revmap_chain(kvm, pte_index, rev, v, hpte[1]); + remove_revmap_chain(kvm, pte_index, rev, v, pte1); } r = rev->guest_rpte & ~HPTE_GR_RESERVED; note_hpte_modification(kvm, rev); @@ -514,12 +524,14 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu) { struct kvm *kvm = vcpu->kvm; unsigned long *args = &vcpu->arch.gpr[4]; - unsigned long *hp, *hptes[4], tlbrb[4]; + __be64 *hp, *hptes[4]; + unsigned long tlbrb[4]; long int i, j, k, n, found, indexes[4]; unsigned long flags, req, pte_index, rcbits; int global; long int ret = H_SUCCESS; struct revmap_entry *rev, *revs[4]; + u64 hp0; global = global_invalidates(kvm, 0); for (i = 0; i < 4 && ret == H_SUCCESS; ) { @@ -542,8 +554,7 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu) ret = H_PARAMETER; break; } - hp = (unsigned long *) - (kvm->arch.hpt_virt + (pte_index << 4)); + hp = (__be64 *) (kvm->arch.hpt_virt + (pte_index << 4)); /* to avoid deadlock, don't spin except for first */ if (!try_lock_hpte(hp, HPTE_V_HVLOCK)) { if (n) @@ -552,23 +563,24 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu) cpu_relax(); } found = 0; - if (hp[0] & (HPTE_V_ABSENT | HPTE_V_VALID)) { + hp0 = be64_to_cpu(hp[0]); + if (hp0 & (HPTE_V_ABSENT | HPTE_V_VALID)) { switch (flags & 3) { case 0: /* absolute */ found = 1; break; case 1: /* andcond */ - if (!(hp[0] & args[j + 1])) + if (!(hp0 & args[j + 1])) found = 1; break; case 2: /* AVPN */ - if ((hp[0] & ~0x7fUL) == args[j + 1]) + if ((hp0 & ~0x7fUL) == args[j + 1]) found = 1; break; } } if (!found) { - hp[0] &= ~HPTE_V_HVLOCK; + hp[0] &= ~cpu_to_be64(HPTE_V_HVLOCK); args[j] = ((0x90 | flags) << 56) + pte_index; continue; } @@ -577,7 +589,7 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu) rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]); note_hpte_modification(kvm, rev); - if (!(hp[0] & HPTE_V_VALID)) { + if (!(hp0 & HPTE_V_VALID)) { /* insert R and C bits from PTE */ rcbits = rev->guest_rpte & (HPTE_R_R|HPTE_R_C); args[j] |= rcbits << (56 - 5); @@ -585,8 +597,10 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu) continue; } - hp[0] &= ~HPTE_V_VALID; /* leave it locked */ - tlbrb[n] = compute_tlbie_rb(hp[0], hp[1], pte_index); + /* leave it locked */ + hp[0] &= ~cpu_to_be64(HPTE_V_VALID); + tlbrb[n] = compute_tlbie_rb(be64_to_cpu(hp[0]), + be64_to_cpu(hp[1]), pte_index); indexes[n] = j; hptes[n] = hp; revs[n] = rev; @@ -605,7 +619,8 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu) pte_index = args[j] & ((1ul << 56) - 1); hp = hptes[k]; rev = revs[k]; - remove_revmap_chain(kvm, pte_index, rev, hp[0], hp[1]); + remove_revmap_chain(kvm, pte_index, rev, + be64_to_cpu(hp[0]), be64_to_cpu(hp[1])); rcbits = rev->guest_rpte & (HPTE_R_R|HPTE_R_C); args[j] |= rcbits << (56 - 5); hp[0] = 0; @@ -620,23 +635,25 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags, unsigned long va) { struct kvm *kvm = vcpu->kvm; - unsigned long *hpte; + __be64 *hpte; struct revmap_entry *rev; unsigned long v, r, rb, mask, bits; + u64 pte; if (pte_index >= kvm->arch.hpt_npte) return H_PARAMETER; - hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4)); + hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4)); while (!try_lock_hpte(hpte, HPTE_V_HVLOCK)) cpu_relax(); - if ((hpte[0] & (HPTE_V_ABSENT | HPTE_V_VALID)) == 0 || - ((flags & H_AVPN) && (hpte[0] & ~0x7fUL) != avpn)) { - hpte[0] &= ~HPTE_V_HVLOCK; + pte = be64_to_cpu(hpte[0]); + if ((pte & (HPTE_V_ABSENT | HPTE_V_VALID)) == 0 || + ((flags & H_AVPN) && (pte & ~0x7fUL) != avpn)) { + hpte[0] &= ~cpu_to_be64(HPTE_V_HVLOCK); return H_NOT_FOUND; } - v = hpte[0]; + v = pte; bits = (flags << 55) & HPTE_R_PP0; bits |= (flags << 48) & HPTE_R_KEY_HI; bits |= flags & (HPTE_R_PP | HPTE_R_N | HPTE_R_KEY_LO); @@ -650,12 +667,12 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags, rev->guest_rpte = r; note_hpte_modification(kvm, rev); } - r = (hpte[1] & ~mask) | bits; + r = (be64_to_cpu(hpte[1]) & ~mask) | bits; /* Update HPTE */ if (v & HPTE_V_VALID) { rb = compute_tlbie_rb(v, r, pte_index); - hpte[0] = v & ~HPTE_V_VALID; + hpte[0] = cpu_to_be64(v & ~HPTE_V_VALID); do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags), true); /* * If the host has this page as readonly but the guest @@ -681,9 +698,9 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags, } } } - hpte[1] = r; + hpte[1] = cpu_to_be64(r); eieio(); - hpte[0] = v & ~HPTE_V_HVLOCK; + hpte[0] = cpu_to_be64(v & ~HPTE_V_HVLOCK); asm volatile("ptesync" : : : "memory"); return H_SUCCESS; } @@ -692,7 +709,8 @@ long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags, unsigned long pte_index) { struct kvm *kvm = vcpu->kvm; - unsigned long *hpte, v, r; + __be64 *hpte; + unsigned long v, r; int i, n = 1; struct revmap_entry *rev = NULL; @@ -704,9 +722,9 @@ long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags, } rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]); for (i = 0; i < n; ++i, ++pte_index) { - hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4)); - v = hpte[0] & ~HPTE_V_HVLOCK; - r = hpte[1]; + hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4)); + v = be64_to_cpu(hpte[0]) & ~HPTE_V_HVLOCK; + r = be64_to_cpu(hpte[1]); if (v & HPTE_V_ABSENT) { v &= ~HPTE_V_ABSENT; v |= HPTE_V_VALID; @@ -721,25 +739,27 @@ long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags, return H_SUCCESS; } -void kvmppc_invalidate_hpte(struct kvm *kvm, unsigned long *hptep, +void kvmppc_invalidate_hpte(struct kvm *kvm, __be64 *hptep, unsigned long pte_index) { unsigned long rb; - hptep[0] &= ~HPTE_V_VALID; - rb = compute_tlbie_rb(hptep[0], hptep[1], pte_index); + hptep[0] &= ~cpu_to_be64(HPTE_V_VALID); + rb = compute_tlbie_rb(be64_to_cpu(hptep[0]), be64_to_cpu(hptep[1]), + pte_index); do_tlbies(kvm, &rb, 1, 1, true); } EXPORT_SYMBOL_GPL(kvmppc_invalidate_hpte); -void kvmppc_clear_ref_hpte(struct kvm *kvm, unsigned long *hptep, +void kvmppc_clear_ref_hpte(struct kvm *kvm, __be64 *hptep, unsigned long pte_index) { unsigned long rb; unsigned char rbyte; - rb = compute_tlbie_rb(hptep[0], hptep[1], pte_index); - rbyte = (hptep[1] & ~HPTE_R_R) >> 8; + rb = compute_tlbie_rb(be64_to_cpu(hptep[0]), be64_to_cpu(hptep[1]), + pte_index); + rbyte = (be64_to_cpu(hptep[1]) & ~HPTE_R_R) >> 8; /* modify only the second-last byte, which contains the ref bit */ *((char *)hptep + 14) = rbyte; do_tlbies(kvm, &rb, 1, 1, false); @@ -765,7 +785,7 @@ long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v, unsigned long somask; unsigned long vsid, hash; unsigned long avpn; - unsigned long *hpte; + __be64 *hpte; unsigned long mask, val; unsigned long v, r; @@ -797,11 +817,11 @@ long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v, val |= avpn; for (;;) { - hpte = (unsigned long *)(kvm->arch.hpt_virt + (hash << 7)); + hpte = (__be64 *)(kvm->arch.hpt_virt + (hash << 7)); for (i = 0; i < 16; i += 2) { /* Read the PTE racily */ - v = hpte[i] & ~HPTE_V_HVLOCK; + v = be64_to_cpu(hpte[i]) & ~HPTE_V_HVLOCK; /* Check valid/absent, hash, segment size and AVPN */ if (!(v & valid) || (v & mask) != val) @@ -810,8 +830,8 @@ long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v, /* Lock the PTE and read it under the lock */ while (!try_lock_hpte(&hpte[i], HPTE_V_HVLOCK)) cpu_relax(); - v = hpte[i] & ~HPTE_V_HVLOCK; - r = hpte[i+1]; + v = be64_to_cpu(hpte[i]) & ~HPTE_V_HVLOCK; + r = be64_to_cpu(hpte[i+1]); /* * Check the HPTE again, including base page size @@ -822,7 +842,7 @@ long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v, return (hash << 3) + (i >> 1); /* Unlock and move on */ - hpte[i] = v; + hpte[i] = cpu_to_be64(v); } if (val & HPTE_V_SECONDARY) @@ -851,7 +871,7 @@ long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr, struct kvm *kvm = vcpu->kvm; long int index; unsigned long v, r, gr; - unsigned long *hpte; + __be64 *hpte; unsigned long valid; struct revmap_entry *rev; unsigned long pp, key; @@ -867,9 +887,9 @@ long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr, return status; /* there really was no HPTE */ return 0; /* for prot fault, HPTE disappeared */ } - hpte = (unsigned long *)(kvm->arch.hpt_virt + (index << 4)); - v = hpte[0] & ~HPTE_V_HVLOCK; - r = hpte[1]; + hpte = (__be64 *)(kvm->arch.hpt_virt + (index << 4)); + v = be64_to_cpu(hpte[0]) & ~HPTE_V_HVLOCK; + r = be64_to_cpu(hpte[1]); rev = real_vmalloc_addr(&kvm->arch.revmap[index]); gr = rev->guest_rpte; diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c index b4b0082f761..3ee38e6e884 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_xics.c +++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c @@ -401,6 +401,11 @@ int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr) icp->rm_action |= XICS_RM_REJECT; icp->rm_reject = irq; } + + if (!hlist_empty(&vcpu->kvm->irq_ack_notifier_list)) { + icp->rm_action |= XICS_RM_NOTIFY_EOI; + icp->rm_eoied_irq = irq; + } bail: return check_too_hard(xics, icp); } diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 558a67df812..edb2ccdbb2b 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -32,10 +32,6 @@ #define VCPU_GPRS_TM(reg) (((reg) * ULONG_SIZE) + VCPU_GPR_TM) -#ifdef __LITTLE_ENDIAN__ -#error Need to fix lppaca and SLB shadow accesses in little endian mode -#endif - /* Values in HSTATE_NAPPING(r13) */ #define NAPPING_CEDE 1 #define NAPPING_NOVCPU 2 @@ -159,6 +155,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL BEGIN_FTR_SECTION beq 11f + cmpwi cr2, r12, BOOK3S_INTERRUPT_HMI + beq cr2, 14f /* HMI check */ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) /* RFI into the highmem handler, or branch to interrupt handler */ @@ -179,6 +177,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) 13: b machine_check_fwnmi +14: mtspr SPRN_HSRR0, r8 + mtspr SPRN_HSRR1, r7 + b hmi_exception_after_realmode + kvmppc_primary_no_guest: /* We handle this much like a ceded vcpu */ /* set our bit in napping_threads */ @@ -353,6 +355,7 @@ kvmppc_hv_entry: * MSR = ~IR|DR * R13 = PACA * R1 = host R1 + * R2 = TOC * all other volatile GPRS = free */ mflr r0 @@ -501,7 +504,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) toc_tlbie_lock: .tc native_tlbie_lock[TC],native_tlbie_lock .previous - ld r3,toc_tlbie_lock@toc(2) + ld r3,toc_tlbie_lock@toc(r2) #ifdef __BIG_ENDIAN__ lwz r8,PACA_LOCK_TOKEN(r13) #else @@ -595,9 +598,10 @@ kvmppc_got_guest: ld r3, VCPU_VPA(r4) cmpdi r3, 0 beq 25f - lwz r5, LPPACA_YIELDCOUNT(r3) + li r6, LPPACA_YIELDCOUNT + LWZX_BE r5, r3, r6 addi r5, r5, 1 - stw r5, LPPACA_YIELDCOUNT(r3) + STWX_BE r5, r3, r6 li r6, 1 stb r6, VCPU_VPA_DIRTY(r4) 25: @@ -671,9 +675,9 @@ END_FTR_SECTION_IFCLR(CPU_FTR_TM) mr r31, r4 addi r3, r31, VCPU_FPRS_TM - bl .load_fp_state + bl load_fp_state addi r3, r31, VCPU_VRS_TM - bl .load_vr_state + bl load_vr_state mr r4, r31 lwz r7, VCPU_VRSAVE_TM(r4) mtspr SPRN_VRSAVE, r7 @@ -1417,9 +1421,9 @@ END_FTR_SECTION_IFCLR(CPU_FTR_TM) /* Save FP/VSX. */ addi r3, r9, VCPU_FPRS_TM - bl .store_fp_state + bl store_fp_state addi r3, r9, VCPU_VRS_TM - bl .store_vr_state + bl store_vr_state mfspr r6, SPRN_VRSAVE stw r6, VCPU_VRSAVE_TM(r9) 1: @@ -1442,9 +1446,10 @@ END_FTR_SECTION_IFCLR(CPU_FTR_TM) ld r8, VCPU_VPA(r9) /* do they have a VPA? */ cmpdi r8, 0 beq 25f - lwz r3, LPPACA_YIELDCOUNT(r8) + li r4, LPPACA_YIELDCOUNT + LWZX_BE r3, r8, r4 addi r3, r3, 1 - stw r3, LPPACA_YIELDCOUNT(r8) + STWX_BE r3, r8, r4 li r3, 1 stb r3, VCPU_VPA_DIRTY(r9) 25: @@ -1757,8 +1762,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) 33: ld r8,PACA_SLBSHADOWPTR(r13) .rept SLB_NUM_BOLTED - ld r5,SLBSHADOW_SAVEAREA(r8) - ld r6,SLBSHADOW_SAVEAREA+8(r8) + li r3, SLBSHADOW_SAVEAREA + LDX_BE r5, r8, r3 + addi r3, r3, 8 + LDX_BE r6, r8, r3 andis. r7,r5,SLB_ESID_V@h beq 1f slbmte r6,r5 @@ -1909,12 +1916,23 @@ hcall_try_real_mode: clrrdi r3,r3,2 cmpldi r3,hcall_real_table_end - hcall_real_table bge guest_exit_cont + /* See if this hcall is enabled for in-kernel handling */ + ld r4, VCPU_KVM(r9) + srdi r0, r3, 8 /* r0 = (r3 / 4) >> 6 */ + sldi r0, r0, 3 /* index into kvm->arch.enabled_hcalls[] */ + add r4, r4, r0 + ld r0, KVM_ENABLED_HCALLS(r4) + rlwinm r4, r3, 32-2, 0x3f /* r4 = (r3 / 4) & 0x3f */ + srd r0, r0, r4 + andi. r0, r0, 1 + beq guest_exit_cont + /* Get pointer to handler, if any, and call it */ LOAD_REG_ADDR(r4, hcall_real_table) lwax r3,r3,r4 cmpwi r3,0 beq guest_exit_cont - add r3,r3,r4 - mtctr r3 + add r12,r3,r4 + mtctr r12 mr r3,r9 /* get vcpu pointer */ ld r4,VCPU_GPR(R4)(r9) bctrl @@ -2031,6 +2049,7 @@ hcall_real_table: .long 0 /* 0x12c */ .long 0 /* 0x130 */ .long DOTSYM(kvmppc_h_set_xdabr) - hcall_real_table + .globl hcall_real_table_end hcall_real_table_end: ignore_hdec: @@ -2338,7 +2357,18 @@ kvmppc_read_intr: cmpdi r6, 0 beq- 1f lwzcix r0, r6, r7 - rlwinm. r3, r0, 0, 0xffffff + /* + * Save XIRR for later. Since we get in in reverse endian on LE + * systems, save it byte reversed and fetch it back in host endian. + */ + li r3, HSTATE_SAVED_XIRR + STWX_BE r0, r3, r13 +#ifdef __LITTLE_ENDIAN__ + lwz r3, HSTATE_SAVED_XIRR(r13) +#else + mr r3, r0 +#endif + rlwinm. r3, r3, 0, 0xffffff sync beq 1f /* if nothing pending in the ICP */ @@ -2370,10 +2400,9 @@ kvmppc_read_intr: li r3, -1 1: blr -42: /* It's not an IPI and it's for the host, stash it in the PACA - * before exit, it will be picked up by the host ICP driver +42: /* It's not an IPI and it's for the host. We saved a copy of XIRR in + * the PACA earlier, it will be picked up by the host ICP driver */ - stw r0, HSTATE_SAVED_XIRR(r13) li r3, 1 b 1b @@ -2408,11 +2437,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX) mtmsrd r8 isync addi r3,r3,VCPU_FPRS - bl .store_fp_state + bl store_fp_state #ifdef CONFIG_ALTIVEC BEGIN_FTR_SECTION addi r3,r31,VCPU_VRS - bl .store_vr_state + bl store_vr_state END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) #endif mfspr r6,SPRN_VRSAVE @@ -2444,11 +2473,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX) mtmsrd r8 isync addi r3,r4,VCPU_FPRS - bl .load_fp_state + bl load_fp_state #ifdef CONFIG_ALTIVEC BEGIN_FTR_SECTION addi r3,r31,VCPU_VRS - bl .load_vr_state + bl load_vr_state END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) #endif lwz r7,VCPU_VRSAVE(r31) diff --git a/arch/powerpc/kvm/book3s_paired_singles.c b/arch/powerpc/kvm/book3s_paired_singles.c index 6c8011fd57e..bfb8035314e 100644 --- a/arch/powerpc/kvm/book3s_paired_singles.c +++ b/arch/powerpc/kvm/book3s_paired_singles.c @@ -639,26 +639,36 @@ static int kvmppc_ps_one_in(struct kvm_vcpu *vcpu, bool rc, int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu) { - u32 inst = kvmppc_get_last_inst(vcpu); + u32 inst; enum emulation_result emulated = EMULATE_DONE; + int ax_rd, ax_ra, ax_rb, ax_rc; + short full_d; + u64 *fpr_d, *fpr_a, *fpr_b, *fpr_c; - int ax_rd = inst_get_field(inst, 6, 10); - int ax_ra = inst_get_field(inst, 11, 15); - int ax_rb = inst_get_field(inst, 16, 20); - int ax_rc = inst_get_field(inst, 21, 25); - short full_d = inst_get_field(inst, 16, 31); - - u64 *fpr_d = &VCPU_FPR(vcpu, ax_rd); - u64 *fpr_a = &VCPU_FPR(vcpu, ax_ra); - u64 *fpr_b = &VCPU_FPR(vcpu, ax_rb); - u64 *fpr_c = &VCPU_FPR(vcpu, ax_rc); - - bool rcomp = (inst & 1) ? true : false; - u32 cr = kvmppc_get_cr(vcpu); + bool rcomp; + u32 cr; #ifdef DEBUG int i; #endif + emulated = kvmppc_get_last_inst(vcpu, INST_GENERIC, &inst); + if (emulated != EMULATE_DONE) + return emulated; + + ax_rd = inst_get_field(inst, 6, 10); + ax_ra = inst_get_field(inst, 11, 15); + ax_rb = inst_get_field(inst, 16, 20); + ax_rc = inst_get_field(inst, 21, 25); + full_d = inst_get_field(inst, 16, 31); + + fpr_d = &VCPU_FPR(vcpu, ax_rd); + fpr_a = &VCPU_FPR(vcpu, ax_ra); + fpr_b = &VCPU_FPR(vcpu, ax_rb); + fpr_c = &VCPU_FPR(vcpu, ax_rc); + + rcomp = (inst & 1) ? true : false; + cr = kvmppc_get_cr(vcpu); + if (!kvmppc_inst_is_paired_single(vcpu, inst)) return EMULATE_FAIL; diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 8eef1e51907..cf2eb16846d 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -62,6 +62,35 @@ static void kvmppc_giveup_fac(struct kvm_vcpu *vcpu, ulong fac); #define HW_PAGE_SIZE PAGE_SIZE #endif +static bool kvmppc_is_split_real(struct kvm_vcpu *vcpu) +{ + ulong msr = kvmppc_get_msr(vcpu); + return (msr & (MSR_IR|MSR_DR)) == MSR_DR; +} + +static void kvmppc_fixup_split_real(struct kvm_vcpu *vcpu) +{ + ulong msr = kvmppc_get_msr(vcpu); + ulong pc = kvmppc_get_pc(vcpu); + + /* We are in DR only split real mode */ + if ((msr & (MSR_IR|MSR_DR)) != MSR_DR) + return; + + /* We have not fixed up the guest already */ + if (vcpu->arch.hflags & BOOK3S_HFLAG_SPLIT_HACK) + return; + + /* The code is in fixupable address space */ + if (pc & SPLIT_HACK_MASK) + return; + + vcpu->arch.hflags |= BOOK3S_HFLAG_SPLIT_HACK; + kvmppc_set_pc(vcpu, pc | SPLIT_HACK_OFFS); +} + +void kvmppc_unfixup_split_real(struct kvm_vcpu *vcpu); + static void kvmppc_core_vcpu_load_pr(struct kvm_vcpu *vcpu, int cpu) { #ifdef CONFIG_PPC_BOOK3S_64 @@ -71,10 +100,19 @@ static void kvmppc_core_vcpu_load_pr(struct kvm_vcpu *vcpu, int cpu) svcpu->in_use = 0; svcpu_put(svcpu); #endif + + /* Disable AIL if supported */ + if (cpu_has_feature(CPU_FTR_HVMODE) && + cpu_has_feature(CPU_FTR_ARCH_207S)) + mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) & ~LPCR_AIL); + vcpu->cpu = smp_processor_id(); #ifdef CONFIG_PPC_BOOK3S_32 current->thread.kvm_shadow_vcpu = vcpu->arch.shadow_vcpu; #endif + + if (kvmppc_is_split_real(vcpu)) + kvmppc_fixup_split_real(vcpu); } static void kvmppc_core_vcpu_put_pr(struct kvm_vcpu *vcpu) @@ -89,8 +127,17 @@ static void kvmppc_core_vcpu_put_pr(struct kvm_vcpu *vcpu) svcpu_put(svcpu); #endif + if (kvmppc_is_split_real(vcpu)) + kvmppc_unfixup_split_real(vcpu); + kvmppc_giveup_ext(vcpu, MSR_FP | MSR_VEC | MSR_VSX); kvmppc_giveup_fac(vcpu, FSCR_TAR_LG); + + /* Enable AIL if supported */ + if (cpu_has_feature(CPU_FTR_HVMODE) && + cpu_has_feature(CPU_FTR_ARCH_207S)) + mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) | LPCR_AIL_3); + vcpu->cpu = -1; } @@ -120,6 +167,14 @@ void kvmppc_copy_to_svcpu(struct kvmppc_book3s_shadow_vcpu *svcpu, #ifdef CONFIG_PPC_BOOK3S_64 svcpu->shadow_fscr = vcpu->arch.shadow_fscr; #endif + /* + * Now also save the current time base value. We use this + * to find the guest purr and spurr value. + */ + vcpu->arch.entry_tb = get_tb(); + vcpu->arch.entry_vtb = get_vtb(); + if (cpu_has_feature(CPU_FTR_ARCH_207S)) + vcpu->arch.entry_ic = mfspr(SPRN_IC); svcpu->in_use = true; } @@ -166,6 +221,14 @@ void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu, #ifdef CONFIG_PPC_BOOK3S_64 vcpu->arch.shadow_fscr = svcpu->shadow_fscr; #endif + /* + * Update purr and spurr using time base on exit. + */ + vcpu->arch.purr += get_tb() - vcpu->arch.entry_tb; + vcpu->arch.spurr += get_tb() - vcpu->arch.entry_tb; + vcpu->arch.vtb += get_vtb() - vcpu->arch.entry_vtb; + if (cpu_has_feature(CPU_FTR_ARCH_207S)) + vcpu->arch.ic += mfspr(SPRN_IC) - vcpu->arch.entry_ic; svcpu->in_use = false; out: @@ -232,7 +295,8 @@ static int kvm_unmap_hva_range_pr(struct kvm *kvm, unsigned long start, return 0; } -static int kvm_age_hva_pr(struct kvm *kvm, unsigned long hva) +static int kvm_age_hva_pr(struct kvm *kvm, unsigned long start, + unsigned long end) { /* XXX could be more clever ;) */ return 0; @@ -294,6 +358,11 @@ static void kvmppc_set_msr_pr(struct kvm_vcpu *vcpu, u64 msr) } } + if (kvmppc_is_split_real(vcpu)) + kvmppc_fixup_split_real(vcpu); + else + kvmppc_unfixup_split_real(vcpu); + if ((kvmppc_get_msr(vcpu) & (MSR_PR|MSR_IR|MSR_DR)) != (old_msr & (MSR_PR|MSR_IR|MSR_DR))) { kvmppc_mmu_flush_segments(vcpu); @@ -443,19 +512,19 @@ static void kvmppc_patch_dcbz(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte) put_page(hpage); } -static int kvmppc_visible_gfn(struct kvm_vcpu *vcpu, gfn_t gfn) +static int kvmppc_visible_gpa(struct kvm_vcpu *vcpu, gpa_t gpa) { ulong mp_pa = vcpu->arch.magic_page_pa; if (!(kvmppc_get_msr(vcpu) & MSR_SF)) mp_pa = (uint32_t)mp_pa; - if (unlikely(mp_pa) && - unlikely((mp_pa & KVM_PAM) >> PAGE_SHIFT == gfn)) { + gpa &= ~0xFFFULL; + if (unlikely(mp_pa) && unlikely((mp_pa & KVM_PAM) == (gpa & KVM_PAM))) { return 1; } - return kvm_is_visible_gfn(vcpu->kvm, gfn); + return kvm_is_visible_gfn(vcpu->kvm, gpa >> PAGE_SHIFT); } int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu, @@ -494,6 +563,11 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu, pte.vpage |= ((u64)VSID_REAL << (SID_SHIFT - 12)); break; case MSR_DR: + if (!data && + (vcpu->arch.hflags & BOOK3S_HFLAG_SPLIT_HACK) && + ((pte.raddr & SPLIT_HACK_MASK) == SPLIT_HACK_OFFS)) + pte.raddr &= ~SPLIT_HACK_MASK; + /* fall through */ case MSR_IR: vcpu->arch.mmu.esid_to_vsid(vcpu, eaddr >> SID_SHIFT, &vsid); @@ -541,7 +615,7 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu, kvmppc_set_dar(vcpu, kvmppc_get_fault_dar(vcpu)); kvmppc_book3s_queue_irqprio(vcpu, vec + 0x80); } else if (!is_mmio && - kvmppc_visible_gfn(vcpu, pte.raddr >> PAGE_SHIFT)) { + kvmppc_visible_gpa(vcpu, pte.raddr)) { if (data && !(vcpu->arch.fault_dsisr & DSISR_NOHPTE)) { /* * There is already a host HPTE there, presumably @@ -637,42 +711,6 @@ static void kvmppc_giveup_fac(struct kvm_vcpu *vcpu, ulong fac) #endif } -static int kvmppc_read_inst(struct kvm_vcpu *vcpu) -{ - ulong srr0 = kvmppc_get_pc(vcpu); - u32 last_inst = kvmppc_get_last_inst(vcpu); - int ret; - - ret = kvmppc_ld(vcpu, &srr0, sizeof(u32), &last_inst, false); - if (ret == -ENOENT) { - ulong msr = kvmppc_get_msr(vcpu); - - msr = kvmppc_set_field(msr, 33, 33, 1); - msr = kvmppc_set_field(msr, 34, 36, 0); - msr = kvmppc_set_field(msr, 42, 47, 0); - kvmppc_set_msr_fast(vcpu, msr); - kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_INST_STORAGE); - return EMULATE_AGAIN; - } - - return EMULATE_DONE; -} - -static int kvmppc_check_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr) -{ - - /* Need to do paired single emulation? */ - if (!(vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE)) - return EMULATE_DONE; - - /* Read out the instruction */ - if (kvmppc_read_inst(vcpu) == EMULATE_DONE) - /* Need to emulate */ - return EMULATE_FAIL; - - return EMULATE_AGAIN; -} - /* Handle external providers (FPU, Altivec, VSX) */ static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr, ulong msr) @@ -834,6 +872,15 @@ static int kvmppc_handle_fac(struct kvm_vcpu *vcpu, ulong fac) return RESUME_GUEST; } + +void kvmppc_set_fscr(struct kvm_vcpu *vcpu, u64 fscr) +{ + if ((vcpu->arch.fscr & FSCR_TAR) && !(fscr & FSCR_TAR)) { + /* TAR got dropped, drop it in shadow too */ + kvmppc_giveup_fac(vcpu, FSCR_TAR_LG); + } + vcpu->arch.fscr = fscr; +} #endif int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu, @@ -858,6 +905,9 @@ int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu, ulong shadow_srr1 = vcpu->arch.shadow_srr1; vcpu->stat.pf_instruc++; + if (kvmppc_is_split_real(vcpu)) + kvmppc_fixup_split_real(vcpu); + #ifdef CONFIG_PPC_BOOK3S_32 /* We set segments as unused segments when invalidating them. So * treat the respective fault as segment fault. */ @@ -960,6 +1010,7 @@ int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu, case BOOK3S_INTERRUPT_DECREMENTER: case BOOK3S_INTERRUPT_HV_DECREMENTER: case BOOK3S_INTERRUPT_DOORBELL: + case BOOK3S_INTERRUPT_H_DOORBELL: vcpu->stat.dec_exits++; r = RESUME_GUEST; break; @@ -977,15 +1028,24 @@ int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu, { enum emulation_result er; ulong flags; + u32 last_inst; + int emul; program_interrupt: flags = vcpu->arch.shadow_srr1 & 0x1f0000ull; + emul = kvmppc_get_last_inst(vcpu, INST_GENERIC, &last_inst); + if (emul != EMULATE_DONE) { + r = RESUME_GUEST; + break; + } + if (kvmppc_get_msr(vcpu) & MSR_PR) { #ifdef EXIT_DEBUG - printk(KERN_INFO "Userspace triggered 0x700 exception at 0x%lx (0x%x)\n", kvmppc_get_pc(vcpu), kvmppc_get_last_inst(vcpu)); + pr_info("Userspace triggered 0x700 exception at\n 0x%lx (0x%x)\n", + kvmppc_get_pc(vcpu), last_inst); #endif - if ((kvmppc_get_last_inst(vcpu) & 0xff0007ff) != + if ((last_inst & 0xff0007ff) != (INS_DCBZ & 0xfffffff7)) { kvmppc_core_queue_program(vcpu, flags); r = RESUME_GUEST; @@ -1004,7 +1064,7 @@ program_interrupt: break; case EMULATE_FAIL: printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n", - __func__, kvmppc_get_pc(vcpu), kvmppc_get_last_inst(vcpu)); + __func__, kvmppc_get_pc(vcpu), last_inst); kvmppc_core_queue_program(vcpu, flags); r = RESUME_GUEST; break; @@ -1021,8 +1081,23 @@ program_interrupt: break; } case BOOK3S_INTERRUPT_SYSCALL: + { + u32 last_sc; + int emul; + + /* Get last sc for papr */ + if (vcpu->arch.papr_enabled) { + /* The sc instuction points SRR0 to the next inst */ + emul = kvmppc_get_last_inst(vcpu, INST_SC, &last_sc); + if (emul != EMULATE_DONE) { + kvmppc_set_pc(vcpu, kvmppc_get_pc(vcpu) - 4); + r = RESUME_GUEST; + break; + } + } + if (vcpu->arch.papr_enabled && - (kvmppc_get_last_sc(vcpu) == 0x44000022) && + (last_sc == 0x44000022) && !(kvmppc_get_msr(vcpu) & MSR_PR)) { /* SC 1 papr hypercalls */ ulong cmd = kvmppc_get_gpr(vcpu, 3); @@ -1067,36 +1142,51 @@ program_interrupt: r = RESUME_GUEST; } break; + } case BOOK3S_INTERRUPT_FP_UNAVAIL: case BOOK3S_INTERRUPT_ALTIVEC: case BOOK3S_INTERRUPT_VSX: { int ext_msr = 0; + int emul; + u32 last_inst; + + if (vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE) { + /* Do paired single instruction emulation */ + emul = kvmppc_get_last_inst(vcpu, INST_GENERIC, + &last_inst); + if (emul == EMULATE_DONE) + goto program_interrupt; + else + r = RESUME_GUEST; - switch (exit_nr) { - case BOOK3S_INTERRUPT_FP_UNAVAIL: ext_msr = MSR_FP; break; - case BOOK3S_INTERRUPT_ALTIVEC: ext_msr = MSR_VEC; break; - case BOOK3S_INTERRUPT_VSX: ext_msr = MSR_VSX; break; + break; } - switch (kvmppc_check_ext(vcpu, exit_nr)) { - case EMULATE_DONE: - /* everything ok - let's enable the ext */ - r = kvmppc_handle_ext(vcpu, exit_nr, ext_msr); + /* Enable external provider */ + switch (exit_nr) { + case BOOK3S_INTERRUPT_FP_UNAVAIL: + ext_msr = MSR_FP; break; - case EMULATE_FAIL: - /* we need to emulate this instruction */ - goto program_interrupt; + + case BOOK3S_INTERRUPT_ALTIVEC: + ext_msr = MSR_VEC; break; - default: - /* nothing to worry about - go again */ + + case BOOK3S_INTERRUPT_VSX: + ext_msr = MSR_VSX; break; } + + r = kvmppc_handle_ext(vcpu, exit_nr, ext_msr); break; } case BOOK3S_INTERRUPT_ALIGNMENT: - if (kvmppc_read_inst(vcpu) == EMULATE_DONE) { - u32 last_inst = kvmppc_get_last_inst(vcpu); + { + u32 last_inst; + int emul = kvmppc_get_last_inst(vcpu, INST_GENERIC, &last_inst); + + if (emul == EMULATE_DONE) { u32 dsisr; u64 dar; @@ -1110,6 +1200,7 @@ program_interrupt: } r = RESUME_GUEST; break; + } #ifdef CONFIG_PPC_BOOK3S_64 case BOOK3S_INTERRUPT_FAC_UNAVAIL: kvmppc_handle_fac(vcpu, vcpu->arch.shadow_fscr >> 56); @@ -1229,10 +1320,14 @@ static int kvmppc_get_one_reg_pr(struct kvm_vcpu *vcpu, u64 id, int r = 0; switch (id) { + case KVM_REG_PPC_DEBUG_INST: + *val = get_reg_val(id, KVMPPC_INST_SW_BREAKPOINT); + break; case KVM_REG_PPC_HIOR: *val = get_reg_val(id, to_book3s(vcpu)->hior); break; case KVM_REG_PPC_LPCR: + case KVM_REG_PPC_LPCR_64: /* * We are only interested in the LPCR_ILE bit */ @@ -1268,6 +1363,7 @@ static int kvmppc_set_one_reg_pr(struct kvm_vcpu *vcpu, u64 id, to_book3s(vcpu)->hior_explicit = true; break; case KVM_REG_PPC_LPCR: + case KVM_REG_PPC_LPCR_64: kvmppc_set_lpcr_pr(vcpu, set_reg_val(id, *val)); break; default: @@ -1310,8 +1406,7 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_pr(struct kvm *kvm, p = __get_free_page(GFP_KERNEL|__GFP_ZERO); if (!p) goto uninit_vcpu; - /* the real shared page fills the last 4k of our page */ - vcpu->arch.shared = (void *)(p + PAGE_SIZE - 4096); + vcpu->arch.shared = (void *)p; #ifdef CONFIG_PPC_BOOK3S_64 /* Always start the shared struct in native endian mode */ #ifdef __BIG_ENDIAN__ @@ -1568,6 +1663,11 @@ static int kvmppc_core_init_vm_pr(struct kvm *kvm) { mutex_init(&kvm->arch.hpt_mutex); +#ifdef CONFIG_PPC_BOOK3S_64 + /* Start out with the default set of hcalls enabled */ + kvmppc_pr_init_default_hcalls(kvm); +#endif + if (firmware_has_feature(FW_FEATURE_SET_MODE)) { spin_lock(&kvm_global_user_count_lock); if (++kvm_global_user_count == 1) @@ -1636,6 +1736,9 @@ static struct kvmppc_ops kvm_ops_pr = { .emulate_mfspr = kvmppc_core_emulate_mfspr_pr, .fast_vcpu_kick = kvm_vcpu_kick, .arch_vm_ioctl = kvm_arch_vm_ioctl_pr, +#ifdef CONFIG_PPC_BOOK3S_64 + .hcall_implemented = kvmppc_hcall_impl_pr, +#endif }; diff --git a/arch/powerpc/kvm/book3s_pr_papr.c b/arch/powerpc/kvm/book3s_pr_papr.c index 52a63bfe3f0..ce3c893d509 100644 --- a/arch/powerpc/kvm/book3s_pr_papr.c +++ b/arch/powerpc/kvm/book3s_pr_papr.c @@ -40,8 +40,9 @@ static int kvmppc_h_pr_enter(struct kvm_vcpu *vcpu) { long flags = kvmppc_get_gpr(vcpu, 4); long pte_index = kvmppc_get_gpr(vcpu, 5); - unsigned long pteg[2 * 8]; - unsigned long pteg_addr, i, *hpte; + __be64 pteg[2 * 8]; + __be64 *hpte; + unsigned long pteg_addr, i; long int ret; i = pte_index & 7; @@ -93,8 +94,8 @@ static int kvmppc_h_pr_remove(struct kvm_vcpu *vcpu) pteg = get_pteg_addr(vcpu, pte_index); mutex_lock(&vcpu->kvm->arch.hpt_mutex); copy_from_user(pte, (void __user *)pteg, sizeof(pte)); - pte[0] = be64_to_cpu(pte[0]); - pte[1] = be64_to_cpu(pte[1]); + pte[0] = be64_to_cpu((__force __be64)pte[0]); + pte[1] = be64_to_cpu((__force __be64)pte[1]); ret = H_NOT_FOUND; if ((pte[0] & HPTE_V_VALID) == 0 || @@ -171,8 +172,8 @@ static int kvmppc_h_pr_bulk_remove(struct kvm_vcpu *vcpu) pteg = get_pteg_addr(vcpu, tsh & H_BULK_REMOVE_PTEX); copy_from_user(pte, (void __user *)pteg, sizeof(pte)); - pte[0] = be64_to_cpu(pte[0]); - pte[1] = be64_to_cpu(pte[1]); + pte[0] = be64_to_cpu((__force __be64)pte[0]); + pte[1] = be64_to_cpu((__force __be64)pte[1]); /* tsl = AVPN */ flags = (tsh & H_BULK_REMOVE_FLAGS) >> 26; @@ -211,8 +212,8 @@ static int kvmppc_h_pr_protect(struct kvm_vcpu *vcpu) pteg = get_pteg_addr(vcpu, pte_index); mutex_lock(&vcpu->kvm->arch.hpt_mutex); copy_from_user(pte, (void __user *)pteg, sizeof(pte)); - pte[0] = be64_to_cpu(pte[0]); - pte[1] = be64_to_cpu(pte[1]); + pte[0] = be64_to_cpu((__force __be64)pte[0]); + pte[1] = be64_to_cpu((__force __be64)pte[1]); ret = H_NOT_FOUND; if ((pte[0] & HPTE_V_VALID) == 0 || @@ -231,8 +232,8 @@ static int kvmppc_h_pr_protect(struct kvm_vcpu *vcpu) rb = compute_tlbie_rb(v, r, pte_index); vcpu->arch.mmu.tlbie(vcpu, rb, rb & 1 ? true : false); - pte[0] = cpu_to_be64(pte[0]); - pte[1] = cpu_to_be64(pte[1]); + pte[0] = (__force u64)cpu_to_be64(pte[0]); + pte[1] = (__force u64)cpu_to_be64(pte[1]); copy_to_user((void __user *)pteg, pte, sizeof(pte)); ret = H_SUCCESS; @@ -266,6 +267,12 @@ static int kvmppc_h_pr_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd) int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd) { + int rc, idx; + + if (cmd <= MAX_HCALL_OPCODE && + !test_bit(cmd/4, vcpu->kvm->arch.enabled_hcalls)) + return EMULATE_FAIL; + switch (cmd) { case H_ENTER: return kvmppc_h_pr_enter(vcpu); @@ -294,8 +301,11 @@ int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd) break; case H_RTAS: if (list_empty(&vcpu->kvm->arch.rtas_tokens)) - return RESUME_HOST; - if (kvmppc_rtas_hcall(vcpu)) + break; + idx = srcu_read_lock(&vcpu->kvm->srcu); + rc = kvmppc_rtas_hcall(vcpu); + srcu_read_unlock(&vcpu->kvm->srcu, idx); + if (rc) break; kvmppc_set_gpr(vcpu, 3, 0); return EMULATE_DONE; @@ -303,3 +313,61 @@ int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd) return EMULATE_FAIL; } + +int kvmppc_hcall_impl_pr(unsigned long cmd) +{ + switch (cmd) { + case H_ENTER: + case H_REMOVE: + case H_PROTECT: + case H_BULK_REMOVE: + case H_PUT_TCE: + case H_CEDE: +#ifdef CONFIG_KVM_XICS + case H_XIRR: + case H_CPPR: + case H_EOI: + case H_IPI: + case H_IPOLL: + case H_XIRR_X: +#endif + return 1; + } + return 0; +} + +/* + * List of hcall numbers to enable by default. + * For compatibility with old userspace, we enable by default + * all hcalls that were implemented before the hcall-enabling + * facility was added. Note this list should not include H_RTAS. + */ +static unsigned int default_hcall_list[] = { + H_ENTER, + H_REMOVE, + H_PROTECT, + H_BULK_REMOVE, + H_PUT_TCE, + H_CEDE, +#ifdef CONFIG_KVM_XICS + H_XIRR, + H_CPPR, + H_EOI, + H_IPI, + H_IPOLL, + H_XIRR_X, +#endif + 0 +}; + +void kvmppc_pr_init_default_hcalls(struct kvm *kvm) +{ + int i; + unsigned int hcall; + + for (i = 0; default_hcall_list[i]; ++i) { + hcall = default_hcall_list[i]; + WARN_ON(!kvmppc_hcall_impl_pr(hcall)); + __set_bit(hcall / 4, kvm->arch.enabled_hcalls); + } +} diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c index d1acd32a64c..eaeb78047fb 100644 --- a/arch/powerpc/kvm/book3s_xics.c +++ b/arch/powerpc/kvm/book3s_xics.c @@ -64,8 +64,12 @@ static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp, u32 new_irq); -static int ics_deliver_irq(struct kvmppc_xics *xics, u32 irq, u32 level, - bool report_status) +/* + * Return value ideally indicates how the interrupt was handled, but no + * callers look at it (given that we don't implement KVM_IRQ_LINE_STATUS), + * so just return 0. + */ +static int ics_deliver_irq(struct kvmppc_xics *xics, u32 irq, u32 level) { struct ics_irq_state *state; struct kvmppc_ics *ics; @@ -82,17 +86,14 @@ static int ics_deliver_irq(struct kvmppc_xics *xics, u32 irq, u32 level, if (!state->exists) return -EINVAL; - if (report_status) - return state->asserted; - /* * We set state->asserted locklessly. This should be fine as * we are the only setter, thus concurrent access is undefined * to begin with. */ - if (level == KVM_INTERRUPT_SET_LEVEL) + if (level == 1 || level == KVM_INTERRUPT_SET_LEVEL) state->asserted = 1; - else if (level == KVM_INTERRUPT_UNSET) { + else if (level == 0 || level == KVM_INTERRUPT_UNSET) { state->asserted = 0; return 0; } @@ -100,7 +101,7 @@ static int ics_deliver_irq(struct kvmppc_xics *xics, u32 irq, u32 level, /* Attempt delivery */ icp_deliver_irq(xics, NULL, irq); - return state->asserted; + return 0; } static void ics_check_resend(struct kvmppc_xics *xics, struct kvmppc_ics *ics, @@ -772,6 +773,8 @@ static noinline int kvmppc_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr) if (state->asserted) icp_deliver_irq(xics, icp, irq); + kvm_notify_acked_irq(vcpu->kvm, 0, irq); + return H_SUCCESS; } @@ -789,6 +792,8 @@ static noinline int kvmppc_xics_rm_complete(struct kvm_vcpu *vcpu, u32 hcall) icp_check_resend(xics, icp); if (icp->rm_action & XICS_RM_REJECT) icp_deliver_irq(xics, icp, icp->rm_reject); + if (icp->rm_action & XICS_RM_NOTIFY_EOI) + kvm_notify_acked_irq(vcpu->kvm, 0, icp->rm_eoied_irq); icp->rm_action = 0; @@ -1170,7 +1175,16 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level, { struct kvmppc_xics *xics = kvm->arch.xics; - return ics_deliver_irq(xics, irq, level, line_status); + return ics_deliver_irq(xics, irq, level); +} + +int kvm_set_msi(struct kvm_kernel_irq_routing_entry *irq_entry, struct kvm *kvm, + int irq_source_id, int level, bool line_status) +{ + if (!level) + return -1; + return kvm_set_irq(kvm, irq_source_id, irq_entry->gsi, + level, line_status); } static int xics_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) @@ -1301,3 +1315,26 @@ void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu) vcpu->arch.icp = NULL; vcpu->arch.irq_type = KVMPPC_IRQ_DEFAULT; } + +static int xics_set_irq(struct kvm_kernel_irq_routing_entry *e, + struct kvm *kvm, int irq_source_id, int level, + bool line_status) +{ + return kvm_set_irq(kvm, irq_source_id, e->gsi, level, line_status); +} + +int kvm_irq_map_gsi(struct kvm *kvm, + struct kvm_kernel_irq_routing_entry *entries, int gsi) +{ + entries->gsi = gsi; + entries->type = KVM_IRQ_ROUTING_IRQCHIP; + entries->set = xics_set_irq; + entries->irqchip.irqchip = 0; + entries->irqchip.pin = gsi; + return 1; +} + +int kvm_irq_map_chip_pin(struct kvm *kvm, unsigned irqchip, unsigned pin) +{ + return pin; +} diff --git a/arch/powerpc/kvm/book3s_xics.h b/arch/powerpc/kvm/book3s_xics.h index dd9326c5c19..e8aaa7a3f20 100644 --- a/arch/powerpc/kvm/book3s_xics.h +++ b/arch/powerpc/kvm/book3s_xics.h @@ -71,9 +71,11 @@ struct kvmppc_icp { #define XICS_RM_KICK_VCPU 0x1 #define XICS_RM_CHECK_RESEND 0x2 #define XICS_RM_REJECT 0x4 +#define XICS_RM_NOTIFY_EOI 0x8 u32 rm_action; struct kvm_vcpu *rm_kick_target; u32 rm_reject; + u32 rm_eoied_irq; /* Debug stuff for real mode */ union kvmppc_icp_state rm_dbgstate; diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index ab62109fdfa..9b55dec2d6c 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -51,7 +51,6 @@ unsigned long kvmppc_booke_handlers; struct kvm_stats_debugfs_item debugfs_entries[] = { { "mmio", VCPU_STAT(mmio_exits) }, - { "dcr", VCPU_STAT(dcr_exits) }, { "sig", VCPU_STAT(signal_exits) }, { "itlb_r", VCPU_STAT(itlb_real_miss_exits) }, { "itlb_v", VCPU_STAT(itlb_virt_miss_exits) }, @@ -125,6 +124,40 @@ static void kvmppc_vcpu_sync_spe(struct kvm_vcpu *vcpu) } #endif +/* + * Load up guest vcpu FP state if it's needed. + * It also set the MSR_FP in thread so that host know + * we're holding FPU, and then host can help to save + * guest vcpu FP state if other threads require to use FPU. + * This simulates an FP unavailable fault. + * + * It requires to be called with preemption disabled. + */ +static inline void kvmppc_load_guest_fp(struct kvm_vcpu *vcpu) +{ +#ifdef CONFIG_PPC_FPU + if (!(current->thread.regs->msr & MSR_FP)) { + enable_kernel_fp(); + load_fp_state(&vcpu->arch.fp); + current->thread.fp_save_area = &vcpu->arch.fp; + current->thread.regs->msr |= MSR_FP; + } +#endif +} + +/* + * Save guest vcpu FP state into thread. + * It requires to be called with preemption disabled. + */ +static inline void kvmppc_save_guest_fp(struct kvm_vcpu *vcpu) +{ +#ifdef CONFIG_PPC_FPU + if (current->thread.regs->msr & MSR_FP) + giveup_fpu(current); + current->thread.fp_save_area = NULL; +#endif +} + static void kvmppc_vcpu_sync_fpu(struct kvm_vcpu *vcpu) { #if defined(CONFIG_PPC_FPU) && !defined(CONFIG_KVM_BOOKE_HV) @@ -135,6 +168,40 @@ static void kvmppc_vcpu_sync_fpu(struct kvm_vcpu *vcpu) #endif } +/* + * Simulate AltiVec unavailable fault to load guest state + * from thread to AltiVec unit. + * It requires to be called with preemption disabled. + */ +static inline void kvmppc_load_guest_altivec(struct kvm_vcpu *vcpu) +{ +#ifdef CONFIG_ALTIVEC + if (cpu_has_feature(CPU_FTR_ALTIVEC)) { + if (!(current->thread.regs->msr & MSR_VEC)) { + enable_kernel_altivec(); + load_vr_state(&vcpu->arch.vr); + current->thread.vr_save_area = &vcpu->arch.vr; + current->thread.regs->msr |= MSR_VEC; + } + } +#endif +} + +/* + * Save guest vcpu AltiVec state into thread. + * It requires to be called with preemption disabled. + */ +static inline void kvmppc_save_guest_altivec(struct kvm_vcpu *vcpu) +{ +#ifdef CONFIG_ALTIVEC + if (cpu_has_feature(CPU_FTR_ALTIVEC)) { + if (current->thread.regs->msr & MSR_VEC) + giveup_altivec(current); + current->thread.vr_save_area = NULL; + } +#endif +} + static void kvmppc_vcpu_sync_debug(struct kvm_vcpu *vcpu) { /* Synchronize guest's desire to get debug interrupts into shadow MSR */ @@ -185,24 +252,28 @@ static void kvmppc_booke_queue_irqprio(struct kvm_vcpu *vcpu, set_bit(priority, &vcpu->arch.pending_exceptions); } -static void kvmppc_core_queue_dtlb_miss(struct kvm_vcpu *vcpu, - ulong dear_flags, ulong esr_flags) +void kvmppc_core_queue_dtlb_miss(struct kvm_vcpu *vcpu, + ulong dear_flags, ulong esr_flags) { vcpu->arch.queued_dear = dear_flags; vcpu->arch.queued_esr = esr_flags; kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DTLB_MISS); } -static void kvmppc_core_queue_data_storage(struct kvm_vcpu *vcpu, - ulong dear_flags, ulong esr_flags) +void kvmppc_core_queue_data_storage(struct kvm_vcpu *vcpu, + ulong dear_flags, ulong esr_flags) { vcpu->arch.queued_dear = dear_flags; vcpu->arch.queued_esr = esr_flags; kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DATA_STORAGE); } -static void kvmppc_core_queue_inst_storage(struct kvm_vcpu *vcpu, - ulong esr_flags) +void kvmppc_core_queue_itlb_miss(struct kvm_vcpu *vcpu) +{ + kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_ITLB_MISS); +} + +void kvmppc_core_queue_inst_storage(struct kvm_vcpu *vcpu, ulong esr_flags) { vcpu->arch.queued_esr = esr_flags; kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_INST_STORAGE); @@ -264,15 +335,20 @@ static void kvmppc_core_dequeue_watchdog(struct kvm_vcpu *vcpu) clear_bit(BOOKE_IRQPRIO_WATCHDOG, &vcpu->arch.pending_exceptions); } +void kvmppc_core_queue_debug(struct kvm_vcpu *vcpu) +{ + kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DEBUG); +} + +void kvmppc_core_dequeue_debug(struct kvm_vcpu *vcpu) +{ + clear_bit(BOOKE_IRQPRIO_DEBUG, &vcpu->arch.pending_exceptions); +} + static void set_guest_srr(struct kvm_vcpu *vcpu, unsigned long srr0, u32 srr1) { -#ifdef CONFIG_KVM_BOOKE_HV - mtspr(SPRN_GSRR0, srr0); - mtspr(SPRN_GSRR1, srr1); -#else - vcpu->arch.shared->srr0 = srr0; - vcpu->arch.shared->srr1 = srr1; -#endif + kvmppc_set_srr0(vcpu, srr0); + kvmppc_set_srr1(vcpu, srr1); } static void set_guest_csrr(struct kvm_vcpu *vcpu, unsigned long srr0, u32 srr1) @@ -297,51 +373,6 @@ static void set_guest_mcsrr(struct kvm_vcpu *vcpu, unsigned long srr0, u32 srr1) vcpu->arch.mcsrr1 = srr1; } -static unsigned long get_guest_dear(struct kvm_vcpu *vcpu) -{ -#ifdef CONFIG_KVM_BOOKE_HV - return mfspr(SPRN_GDEAR); -#else - return vcpu->arch.shared->dar; -#endif -} - -static void set_guest_dear(struct kvm_vcpu *vcpu, unsigned long dear) -{ -#ifdef CONFIG_KVM_BOOKE_HV - mtspr(SPRN_GDEAR, dear); -#else - vcpu->arch.shared->dar = dear; -#endif -} - -static unsigned long get_guest_esr(struct kvm_vcpu *vcpu) -{ -#ifdef CONFIG_KVM_BOOKE_HV - return mfspr(SPRN_GESR); -#else - return vcpu->arch.shared->esr; -#endif -} - -static void set_guest_esr(struct kvm_vcpu *vcpu, u32 esr) -{ -#ifdef CONFIG_KVM_BOOKE_HV - mtspr(SPRN_GESR, esr); -#else - vcpu->arch.shared->esr = esr; -#endif -} - -static unsigned long get_guest_epr(struct kvm_vcpu *vcpu) -{ -#ifdef CONFIG_KVM_BOOKE_HV - return mfspr(SPRN_GEPR); -#else - return vcpu->arch.epr; -#endif -} - /* Deliver the interrupt of the corresponding priority, if possible. */ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, unsigned int priority) @@ -388,9 +419,15 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, case BOOKE_IRQPRIO_ITLB_MISS: case BOOKE_IRQPRIO_SYSCALL: case BOOKE_IRQPRIO_FP_UNAVAIL: +#ifdef CONFIG_SPE_POSSIBLE case BOOKE_IRQPRIO_SPE_UNAVAIL: case BOOKE_IRQPRIO_SPE_FP_DATA: case BOOKE_IRQPRIO_SPE_FP_ROUND: +#endif +#ifdef CONFIG_ALTIVEC + case BOOKE_IRQPRIO_ALTIVEC_UNAVAIL: + case BOOKE_IRQPRIO_ALTIVEC_ASSIST: +#endif case BOOKE_IRQPRIO_AP_UNAVAIL: allowed = 1; msr_mask = MSR_CE | MSR_ME | MSR_DE; @@ -424,7 +461,11 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, allowed = vcpu->arch.shared->msr & MSR_DE; allowed = allowed && !crit; msr_mask = MSR_ME; - int_class = INT_CLASS_CRIT; + if (cpu_has_feature(CPU_FTR_DEBUG_LVL_EXC)) + int_class = INT_CLASS_DBG; + else + int_class = INT_CLASS_CRIT; + break; } @@ -450,9 +491,9 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, vcpu->arch.pc = vcpu->arch.ivpr | vcpu->arch.ivor[priority]; if (update_esr == true) - set_guest_esr(vcpu, vcpu->arch.queued_esr); + kvmppc_set_esr(vcpu, vcpu->arch.queued_esr); if (update_dear == true) - set_guest_dear(vcpu, vcpu->arch.queued_dear); + kvmppc_set_dar(vcpu, vcpu->arch.queued_dear); if (update_epr == true) { if (vcpu->arch.epr_flags & KVMPPC_EPR_USER) kvm_make_request(KVM_REQ_EPR_EXIT, vcpu); @@ -701,20 +742,27 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) /* * Since we can't trap on MSR_FP in GS-mode, we consider the guest - * as always using the FPU. Kernel usage of FP (via - * enable_kernel_fp()) in this thread must not occur while - * vcpu->fpu_active is set. + * as always using the FPU. */ - vcpu->fpu_active = 1; - kvmppc_load_guest_fp(vcpu); #endif +#ifdef CONFIG_ALTIVEC + /* Save userspace AltiVec state in stack */ + if (cpu_has_feature(CPU_FTR_ALTIVEC)) + enable_kernel_altivec(); + /* + * Since we can't trap on MSR_VEC in GS-mode, we consider the guest + * as always using the AltiVec. + */ + kvmppc_load_guest_altivec(vcpu); +#endif + /* Switch to guest debug context */ - debug = vcpu->arch.shadow_dbg_reg; + debug = vcpu->arch.dbg_reg; switch_booke_debug_regs(&debug); debug = current->thread.debug; - current->thread.debug = vcpu->arch.shadow_dbg_reg; + current->thread.debug = vcpu->arch.dbg_reg; vcpu->arch.pgdir = current->mm->pgd; kvmppc_fix_ee_before_entry(); @@ -730,8 +778,10 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) #ifdef CONFIG_PPC_FPU kvmppc_save_guest_fp(vcpu); +#endif - vcpu->fpu_active = 0; +#ifdef CONFIG_ALTIVEC + kvmppc_save_guest_altivec(vcpu); #endif out: @@ -752,9 +802,8 @@ static int emulation_exit(struct kvm_run *run, struct kvm_vcpu *vcpu) * they were actually modified by emulation. */ return RESUME_GUEST_NV; - case EMULATE_DO_DCR: - run->exit_reason = KVM_EXIT_DCR; - return RESUME_HOST; + case EMULATE_AGAIN: + return RESUME_GUEST; case EMULATE_FAIL: printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n", @@ -776,9 +825,36 @@ static int emulation_exit(struct kvm_run *run, struct kvm_vcpu *vcpu) static int kvmppc_handle_debug(struct kvm_run *run, struct kvm_vcpu *vcpu) { - struct debug_reg *dbg_reg = &(vcpu->arch.shadow_dbg_reg); + struct debug_reg *dbg_reg = &(vcpu->arch.dbg_reg); u32 dbsr = vcpu->arch.dbsr; + if (vcpu->guest_debug == 0) { + /* + * Debug resources belong to Guest. + * Imprecise debug event is not injected + */ + if (dbsr & DBSR_IDE) { + dbsr &= ~DBSR_IDE; + if (!dbsr) + return RESUME_GUEST; + } + + if (dbsr && (vcpu->arch.shared->msr & MSR_DE) && + (vcpu->arch.dbg_reg.dbcr0 & DBCR0_IDM)) + kvmppc_core_queue_debug(vcpu); + + /* Inject a program interrupt if trap debug is not allowed */ + if ((dbsr & DBSR_TIE) && !(vcpu->arch.shared->msr & MSR_DE)) + kvmppc_core_queue_program(vcpu, ESR_PTR); + + return RESUME_GUEST; + } + + /* + * Debug resource owned by userspace. + * Clear guest dbsr (vcpu->arch.dbsr) + */ + vcpu->arch.dbsr = 0; run->debug.arch.status = 0; run->debug.arch.address = vcpu->arch.pc; @@ -866,6 +942,28 @@ static void kvmppc_restart_interrupt(struct kvm_vcpu *vcpu, } } +static int kvmppc_resume_inst_load(struct kvm_run *run, struct kvm_vcpu *vcpu, + enum emulation_result emulated, u32 last_inst) +{ + switch (emulated) { + case EMULATE_AGAIN: + return RESUME_GUEST; + + case EMULATE_FAIL: + pr_debug("%s: load instruction from guest address %lx failed\n", + __func__, vcpu->arch.pc); + /* For debugging, encode the failing instruction and + * report it to userspace. */ + run->hw.hardware_exit_reason = ~0ULL << 32; + run->hw.hardware_exit_reason |= last_inst; + kvmppc_core_queue_program(vcpu, ESR_PIL); + return RESUME_HOST; + + default: + BUG(); + } +} + /** * kvmppc_handle_exit * @@ -877,6 +975,8 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, int r = RESUME_HOST; int s; int idx; + u32 last_inst = KVM_INST_FETCH_FAILED; + enum emulation_result emulated = EMULATE_DONE; /* update before a new last_exit_type is rewritten */ kvmppc_update_timing_stats(vcpu); @@ -884,6 +984,25 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, /* restart interrupts if they were meant for the host */ kvmppc_restart_interrupt(vcpu, exit_nr); + /* + * get last instruction before beeing preempted + * TODO: for e6500 check also BOOKE_INTERRUPT_LRAT_ERROR & ESR_DATA + */ + switch (exit_nr) { + case BOOKE_INTERRUPT_DATA_STORAGE: + case BOOKE_INTERRUPT_DTLB_MISS: + case BOOKE_INTERRUPT_HV_PRIV: + emulated = kvmppc_get_last_inst(vcpu, INST_GENERIC, &last_inst); + break; + case BOOKE_INTERRUPT_PROGRAM: + /* SW breakpoints arrive as illegal instructions on HV */ + if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) + emulated = kvmppc_get_last_inst(vcpu, INST_GENERIC, &last_inst); + break; + default: + break; + } + local_irq_enable(); trace_kvm_exit(exit_nr, vcpu); @@ -892,6 +1011,11 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, run->exit_reason = KVM_EXIT_UNKNOWN; run->ready_for_interrupt_injection = 1; + if (emulated != EMULATE_DONE) { + r = kvmppc_resume_inst_load(run, vcpu, emulated, last_inst); + goto out; + } + switch (exit_nr) { case BOOKE_INTERRUPT_MACHINE_CHECK: printk("MACHINE CHECK: %lx\n", mfspr(SPRN_MCSR)); @@ -952,6 +1076,18 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, break; case BOOKE_INTERRUPT_PROGRAM: + if ((vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) && + (last_inst == KVMPPC_INST_SW_BREAKPOINT)) { + /* + * We are here because of an SW breakpoint instr, + * so lets return to host to handle. + */ + r = kvmppc_handle_debug(run, vcpu); + run->exit_reason = KVM_EXIT_DEBUG; + kvmppc_account_exit(vcpu, DEBUG_EXITS); + break; + } + if (vcpu->arch.shared->msr & (MSR_PR | MSR_GS)) { /* * Program traps generated by user-level software must @@ -996,7 +1132,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_SPE_FP_ROUND); r = RESUME_GUEST; break; -#else +#elif defined(CONFIG_SPE_POSSIBLE) case BOOKE_INTERRUPT_SPE_UNAVAIL: /* * Guest wants SPE, but host kernel doesn't support it. Send @@ -1017,6 +1153,22 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, run->hw.hardware_exit_reason = exit_nr; r = RESUME_HOST; break; +#endif /* CONFIG_SPE_POSSIBLE */ + +/* + * On cores with Vector category, KVM is loaded only if CONFIG_ALTIVEC, + * see kvmppc_core_check_processor_compat(). + */ +#ifdef CONFIG_ALTIVEC + case BOOKE_INTERRUPT_ALTIVEC_UNAVAIL: + kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_ALTIVEC_UNAVAIL); + r = RESUME_GUEST; + break; + + case BOOKE_INTERRUPT_ALTIVEC_ASSIST: + kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_ALTIVEC_ASSIST); + r = RESUME_GUEST; + break; #endif case BOOKE_INTERRUPT_DATA_STORAGE: @@ -1181,6 +1333,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, BUG(); } +out: /* * To avoid clobbering exit_reason, only check for signals if we * aren't already exiting to userspace for some other reason. @@ -1192,6 +1345,8 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, else { /* interrupts now hard-disabled */ kvmppc_fix_ee_before_entry(); + kvmppc_load_guest_fp(vcpu); + kvmppc_load_guest_altivec(vcpu); } } @@ -1247,6 +1402,11 @@ int kvmppc_subarch_vcpu_init(struct kvm_vcpu *vcpu) setup_timer(&vcpu->arch.wdt_timer, kvmppc_watchdog_func, (unsigned long)vcpu); + /* + * Clear DBSR.MRR to avoid guest debug interrupt as + * this is of host interest + */ + mtspr(SPRN_DBSR, DBSR_MRR); return 0; } @@ -1265,17 +1425,17 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) regs->lr = vcpu->arch.lr; regs->xer = kvmppc_get_xer(vcpu); regs->msr = vcpu->arch.shared->msr; - regs->srr0 = vcpu->arch.shared->srr0; - regs->srr1 = vcpu->arch.shared->srr1; + regs->srr0 = kvmppc_get_srr0(vcpu); + regs->srr1 = kvmppc_get_srr1(vcpu); regs->pid = vcpu->arch.pid; - regs->sprg0 = vcpu->arch.shared->sprg0; - regs->sprg1 = vcpu->arch.shared->sprg1; - regs->sprg2 = vcpu->arch.shared->sprg2; - regs->sprg3 = vcpu->arch.shared->sprg3; - regs->sprg4 = vcpu->arch.shared->sprg4; - regs->sprg5 = vcpu->arch.shared->sprg5; - regs->sprg6 = vcpu->arch.shared->sprg6; - regs->sprg7 = vcpu->arch.shared->sprg7; + regs->sprg0 = kvmppc_get_sprg0(vcpu); + regs->sprg1 = kvmppc_get_sprg1(vcpu); + regs->sprg2 = kvmppc_get_sprg2(vcpu); + regs->sprg3 = kvmppc_get_sprg3(vcpu); + regs->sprg4 = kvmppc_get_sprg4(vcpu); + regs->sprg5 = kvmppc_get_sprg5(vcpu); + regs->sprg6 = kvmppc_get_sprg6(vcpu); + regs->sprg7 = kvmppc_get_sprg7(vcpu); for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) regs->gpr[i] = kvmppc_get_gpr(vcpu, i); @@ -1293,17 +1453,17 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) vcpu->arch.lr = regs->lr; kvmppc_set_xer(vcpu, regs->xer); kvmppc_set_msr(vcpu, regs->msr); - vcpu->arch.shared->srr0 = regs->srr0; - vcpu->arch.shared->srr1 = regs->srr1; + kvmppc_set_srr0(vcpu, regs->srr0); + kvmppc_set_srr1(vcpu, regs->srr1); kvmppc_set_pid(vcpu, regs->pid); - vcpu->arch.shared->sprg0 = regs->sprg0; - vcpu->arch.shared->sprg1 = regs->sprg1; - vcpu->arch.shared->sprg2 = regs->sprg2; - vcpu->arch.shared->sprg3 = regs->sprg3; - vcpu->arch.shared->sprg4 = regs->sprg4; - vcpu->arch.shared->sprg5 = regs->sprg5; - vcpu->arch.shared->sprg6 = regs->sprg6; - vcpu->arch.shared->sprg7 = regs->sprg7; + kvmppc_set_sprg0(vcpu, regs->sprg0); + kvmppc_set_sprg1(vcpu, regs->sprg1); + kvmppc_set_sprg2(vcpu, regs->sprg2); + kvmppc_set_sprg3(vcpu, regs->sprg3); + kvmppc_set_sprg4(vcpu, regs->sprg4); + kvmppc_set_sprg5(vcpu, regs->sprg5); + kvmppc_set_sprg6(vcpu, regs->sprg6); + kvmppc_set_sprg7(vcpu, regs->sprg7); for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) kvmppc_set_gpr(vcpu, i, regs->gpr[i]); @@ -1321,8 +1481,8 @@ static void get_sregs_base(struct kvm_vcpu *vcpu, sregs->u.e.csrr0 = vcpu->arch.csrr0; sregs->u.e.csrr1 = vcpu->arch.csrr1; sregs->u.e.mcsr = vcpu->arch.mcsr; - sregs->u.e.esr = get_guest_esr(vcpu); - sregs->u.e.dear = get_guest_dear(vcpu); + sregs->u.e.esr = kvmppc_get_esr(vcpu); + sregs->u.e.dear = kvmppc_get_dar(vcpu); sregs->u.e.tsr = vcpu->arch.tsr; sregs->u.e.tcr = vcpu->arch.tcr; sregs->u.e.dec = kvmppc_get_dec(vcpu, tb); @@ -1339,8 +1499,8 @@ static int set_sregs_base(struct kvm_vcpu *vcpu, vcpu->arch.csrr0 = sregs->u.e.csrr0; vcpu->arch.csrr1 = sregs->u.e.csrr1; vcpu->arch.mcsr = sregs->u.e.mcsr; - set_guest_esr(vcpu, sregs->u.e.esr); - set_guest_dear(vcpu, sregs->u.e.dear); + kvmppc_set_esr(vcpu, sregs->u.e.esr); + kvmppc_set_dar(vcpu, sregs->u.e.dear); vcpu->arch.vrsave = sregs->u.e.vrsave; kvmppc_set_tcr(vcpu, sregs->u.e.tcr); @@ -1461,144 +1621,125 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, return vcpu->kvm->arch.kvm_ops->set_sregs(vcpu, sregs); } -int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) +int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, + union kvmppc_one_reg *val) { int r = 0; - union kvmppc_one_reg val; - int size; - size = one_reg_size(reg->id); - if (size > sizeof(val)) - return -EINVAL; - - switch (reg->id) { + switch (id) { case KVM_REG_PPC_IAC1: - val = get_reg_val(reg->id, vcpu->arch.dbg_reg.iac1); + *val = get_reg_val(id, vcpu->arch.dbg_reg.iac1); break; case KVM_REG_PPC_IAC2: - val = get_reg_val(reg->id, vcpu->arch.dbg_reg.iac2); + *val = get_reg_val(id, vcpu->arch.dbg_reg.iac2); break; #if CONFIG_PPC_ADV_DEBUG_IACS > 2 case KVM_REG_PPC_IAC3: - val = get_reg_val(reg->id, vcpu->arch.dbg_reg.iac3); + *val = get_reg_val(id, vcpu->arch.dbg_reg.iac3); break; case KVM_REG_PPC_IAC4: - val = get_reg_val(reg->id, vcpu->arch.dbg_reg.iac4); + *val = get_reg_val(id, vcpu->arch.dbg_reg.iac4); break; #endif case KVM_REG_PPC_DAC1: - val = get_reg_val(reg->id, vcpu->arch.dbg_reg.dac1); + *val = get_reg_val(id, vcpu->arch.dbg_reg.dac1); break; case KVM_REG_PPC_DAC2: - val = get_reg_val(reg->id, vcpu->arch.dbg_reg.dac2); + *val = get_reg_val(id, vcpu->arch.dbg_reg.dac2); break; case KVM_REG_PPC_EPR: { - u32 epr = get_guest_epr(vcpu); - val = get_reg_val(reg->id, epr); + u32 epr = kvmppc_get_epr(vcpu); + *val = get_reg_val(id, epr); break; } #if defined(CONFIG_64BIT) case KVM_REG_PPC_EPCR: - val = get_reg_val(reg->id, vcpu->arch.epcr); + *val = get_reg_val(id, vcpu->arch.epcr); break; #endif case KVM_REG_PPC_TCR: - val = get_reg_val(reg->id, vcpu->arch.tcr); + *val = get_reg_val(id, vcpu->arch.tcr); break; case KVM_REG_PPC_TSR: - val = get_reg_val(reg->id, vcpu->arch.tsr); + *val = get_reg_val(id, vcpu->arch.tsr); break; case KVM_REG_PPC_DEBUG_INST: - val = get_reg_val(reg->id, KVMPPC_INST_EHPRIV_DEBUG); + *val = get_reg_val(id, KVMPPC_INST_SW_BREAKPOINT); break; case KVM_REG_PPC_VRSAVE: - val = get_reg_val(reg->id, vcpu->arch.vrsave); + *val = get_reg_val(id, vcpu->arch.vrsave); break; default: - r = vcpu->kvm->arch.kvm_ops->get_one_reg(vcpu, reg->id, &val); + r = vcpu->kvm->arch.kvm_ops->get_one_reg(vcpu, id, val); break; } - if (r) - return r; - - if (copy_to_user((char __user *)(unsigned long)reg->addr, &val, size)) - r = -EFAULT; - return r; } -int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) +int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, + union kvmppc_one_reg *val) { int r = 0; - union kvmppc_one_reg val; - int size; - - size = one_reg_size(reg->id); - if (size > sizeof(val)) - return -EINVAL; - if (copy_from_user(&val, (char __user *)(unsigned long)reg->addr, size)) - return -EFAULT; - - switch (reg->id) { + switch (id) { case KVM_REG_PPC_IAC1: - vcpu->arch.dbg_reg.iac1 = set_reg_val(reg->id, val); + vcpu->arch.dbg_reg.iac1 = set_reg_val(id, *val); break; case KVM_REG_PPC_IAC2: - vcpu->arch.dbg_reg.iac2 = set_reg_val(reg->id, val); + vcpu->arch.dbg_reg.iac2 = set_reg_val(id, *val); break; #if CONFIG_PPC_ADV_DEBUG_IACS > 2 case KVM_REG_PPC_IAC3: - vcpu->arch.dbg_reg.iac3 = set_reg_val(reg->id, val); + vcpu->arch.dbg_reg.iac3 = set_reg_val(id, *val); break; case KVM_REG_PPC_IAC4: - vcpu->arch.dbg_reg.iac4 = set_reg_val(reg->id, val); + vcpu->arch.dbg_reg.iac4 = set_reg_val(id, *val); break; #endif case KVM_REG_PPC_DAC1: - vcpu->arch.dbg_reg.dac1 = set_reg_val(reg->id, val); + vcpu->arch.dbg_reg.dac1 = set_reg_val(id, *val); break; case KVM_REG_PPC_DAC2: - vcpu->arch.dbg_reg.dac2 = set_reg_val(reg->id, val); + vcpu->arch.dbg_reg.dac2 = set_reg_val(id, *val); break; case KVM_REG_PPC_EPR: { - u32 new_epr = set_reg_val(reg->id, val); + u32 new_epr = set_reg_val(id, *val); kvmppc_set_epr(vcpu, new_epr); break; } #if defined(CONFIG_64BIT) case KVM_REG_PPC_EPCR: { - u32 new_epcr = set_reg_val(reg->id, val); + u32 new_epcr = set_reg_val(id, *val); kvmppc_set_epcr(vcpu, new_epcr); break; } #endif case KVM_REG_PPC_OR_TSR: { - u32 tsr_bits = set_reg_val(reg->id, val); + u32 tsr_bits = set_reg_val(id, *val); kvmppc_set_tsr_bits(vcpu, tsr_bits); break; } case KVM_REG_PPC_CLEAR_TSR: { - u32 tsr_bits = set_reg_val(reg->id, val); + u32 tsr_bits = set_reg_val(id, *val); kvmppc_clr_tsr_bits(vcpu, tsr_bits); break; } case KVM_REG_PPC_TSR: { - u32 tsr = set_reg_val(reg->id, val); + u32 tsr = set_reg_val(id, *val); kvmppc_set_tsr(vcpu, tsr); break; } case KVM_REG_PPC_TCR: { - u32 tcr = set_reg_val(reg->id, val); + u32 tcr = set_reg_val(id, *val); kvmppc_set_tcr(vcpu, tcr); break; } case KVM_REG_PPC_VRSAVE: - vcpu->arch.vrsave = set_reg_val(reg->id, val); + vcpu->arch.vrsave = set_reg_val(id, *val); break; default: - r = vcpu->kvm->arch.kvm_ops->set_one_reg(vcpu, reg->id, &val); + r = vcpu->kvm->arch.kvm_ops->set_one_reg(vcpu, id, val); break; } @@ -1698,10 +1839,8 @@ void kvmppc_clr_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits) update_timer_ints(vcpu); } -void kvmppc_decrementer_func(unsigned long data) +void kvmppc_decrementer_func(struct kvm_vcpu *vcpu) { - struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data; - if (vcpu->arch.tcr & TCR_ARE) { vcpu->arch.dec = vcpu->arch.decar; kvmppc_emulate_dec(vcpu); @@ -1788,6 +1927,57 @@ void kvm_guest_protect_msr(struct kvm_vcpu *vcpu, ulong prot_bitmap, bool set) #endif } +int kvmppc_xlate(struct kvm_vcpu *vcpu, ulong eaddr, enum xlate_instdata xlid, + enum xlate_readwrite xlrw, struct kvmppc_pte *pte) +{ + int gtlb_index; + gpa_t gpaddr; + +#ifdef CONFIG_KVM_E500V2 + if (!(vcpu->arch.shared->msr & MSR_PR) && + (eaddr & PAGE_MASK) == vcpu->arch.magic_page_ea) { + pte->eaddr = eaddr; + pte->raddr = (vcpu->arch.magic_page_pa & PAGE_MASK) | + (eaddr & ~PAGE_MASK); + pte->vpage = eaddr >> PAGE_SHIFT; + pte->may_read = true; + pte->may_write = true; + pte->may_execute = true; + + return 0; + } +#endif + + /* Check the guest TLB. */ + switch (xlid) { + case XLATE_INST: + gtlb_index = kvmppc_mmu_itlb_index(vcpu, eaddr); + break; + case XLATE_DATA: + gtlb_index = kvmppc_mmu_dtlb_index(vcpu, eaddr); + break; + default: + BUG(); + } + + /* Do we have a TLB entry at all? */ + if (gtlb_index < 0) + return -ENOENT; + + gpaddr = kvmppc_mmu_xlate(vcpu, gtlb_index, eaddr); + + pte->eaddr = eaddr; + pte->raddr = (gpaddr & PAGE_MASK) | (eaddr & ~PAGE_MASK); + pte->vpage = eaddr >> PAGE_SHIFT; + + /* XXX read permissions from the guest TLB */ + pte->may_read = true; + pte->may_write = true; + pte->may_execute = true; + + return 0; +} + int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg) { @@ -1795,7 +1985,7 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, int n, b = 0, w = 0; if (!(dbg->control & KVM_GUESTDBG_ENABLE)) { - vcpu->arch.shadow_dbg_reg.dbcr0 = 0; + vcpu->arch.dbg_reg.dbcr0 = 0; vcpu->guest_debug = 0; kvm_guest_protect_msr(vcpu, MSR_DE, false); return 0; @@ -1803,15 +1993,13 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, kvm_guest_protect_msr(vcpu, MSR_DE, true); vcpu->guest_debug = dbg->control; - vcpu->arch.shadow_dbg_reg.dbcr0 = 0; - /* Set DBCR0_EDM in guest visible DBCR0 register. */ - vcpu->arch.dbg_reg.dbcr0 = DBCR0_EDM; + vcpu->arch.dbg_reg.dbcr0 = 0; if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) - vcpu->arch.shadow_dbg_reg.dbcr0 |= DBCR0_IDM | DBCR0_IC; + vcpu->arch.dbg_reg.dbcr0 |= DBCR0_IDM | DBCR0_IC; /* Code below handles only HW breakpoints */ - dbg_reg = &(vcpu->arch.shadow_dbg_reg); + dbg_reg = &(vcpu->arch.dbg_reg); #ifdef CONFIG_KVM_BOOKE_HV /* diff --git a/arch/powerpc/kvm/booke.h b/arch/powerpc/kvm/booke.h index b632cd35919..22ba08ea68e 100644 --- a/arch/powerpc/kvm/booke.h +++ b/arch/powerpc/kvm/booke.h @@ -32,9 +32,15 @@ #define BOOKE_IRQPRIO_ALIGNMENT 2 #define BOOKE_IRQPRIO_PROGRAM 3 #define BOOKE_IRQPRIO_FP_UNAVAIL 4 +#ifdef CONFIG_SPE_POSSIBLE #define BOOKE_IRQPRIO_SPE_UNAVAIL 5 #define BOOKE_IRQPRIO_SPE_FP_DATA 6 #define BOOKE_IRQPRIO_SPE_FP_ROUND 7 +#endif +#ifdef CONFIG_PPC_E500MC +#define BOOKE_IRQPRIO_ALTIVEC_UNAVAIL 5 +#define BOOKE_IRQPRIO_ALTIVEC_ASSIST 6 +#endif #define BOOKE_IRQPRIO_SYSCALL 8 #define BOOKE_IRQPRIO_AP_UNAVAIL 9 #define BOOKE_IRQPRIO_DTLB_MISS 10 @@ -99,13 +105,6 @@ enum int_class { void kvmppc_set_pending_interrupt(struct kvm_vcpu *vcpu, enum int_class type); -extern void kvmppc_mmu_destroy_44x(struct kvm_vcpu *vcpu); -extern int kvmppc_core_emulate_op_44x(struct kvm_run *run, struct kvm_vcpu *vcpu, - unsigned int inst, int *advance); -extern int kvmppc_core_emulate_mtspr_44x(struct kvm_vcpu *vcpu, int sprn, - ulong spr_val); -extern int kvmppc_core_emulate_mfspr_44x(struct kvm_vcpu *vcpu, int sprn, - ulong *spr_val); extern void kvmppc_mmu_destroy_e500(struct kvm_vcpu *vcpu); extern int kvmppc_core_emulate_op_e500(struct kvm_run *run, struct kvm_vcpu *vcpu, @@ -123,40 +122,6 @@ extern int kvmppc_core_emulate_mtspr_e500(struct kvm_vcpu *vcpu, int sprn, extern int kvmppc_core_emulate_mfspr_e500(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val); -/* - * Load up guest vcpu FP state if it's needed. - * It also set the MSR_FP in thread so that host know - * we're holding FPU, and then host can help to save - * guest vcpu FP state if other threads require to use FPU. - * This simulates an FP unavailable fault. - * - * It requires to be called with preemption disabled. - */ -static inline void kvmppc_load_guest_fp(struct kvm_vcpu *vcpu) -{ -#ifdef CONFIG_PPC_FPU - if (vcpu->fpu_active && !(current->thread.regs->msr & MSR_FP)) { - enable_kernel_fp(); - load_fp_state(&vcpu->arch.fp); - current->thread.fp_save_area = &vcpu->arch.fp; - current->thread.regs->msr |= MSR_FP; - } -#endif -} - -/* - * Save guest vcpu FP state into thread. - * It requires to be called with preemption disabled. - */ -static inline void kvmppc_save_guest_fp(struct kvm_vcpu *vcpu) -{ -#ifdef CONFIG_PPC_FPU - if (vcpu->fpu_active && (current->thread.regs->msr & MSR_FP)) - giveup_fpu(current); - current->thread.fp_save_area = NULL; -#endif -} - static inline void kvmppc_clear_dbsr(void) { mtspr(SPRN_DBSR, mfspr(SPRN_DBSR)); diff --git a/arch/powerpc/kvm/booke_emulate.c b/arch/powerpc/kvm/booke_emulate.c index 27a4b2877c1..a82f64502de 100644 --- a/arch/powerpc/kvm/booke_emulate.c +++ b/arch/powerpc/kvm/booke_emulate.c @@ -25,6 +25,7 @@ #define OP_19_XOP_RFI 50 #define OP_19_XOP_RFCI 51 +#define OP_19_XOP_RFDI 39 #define OP_31_XOP_MFMSR 83 #define OP_31_XOP_WRTEE 131 @@ -37,6 +38,12 @@ static void kvmppc_emul_rfi(struct kvm_vcpu *vcpu) kvmppc_set_msr(vcpu, vcpu->arch.shared->srr1); } +static void kvmppc_emul_rfdi(struct kvm_vcpu *vcpu) +{ + vcpu->arch.pc = vcpu->arch.dsrr0; + kvmppc_set_msr(vcpu, vcpu->arch.dsrr1); +} + static void kvmppc_emul_rfci(struct kvm_vcpu *vcpu) { vcpu->arch.pc = vcpu->arch.csrr0; @@ -65,6 +72,12 @@ int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, *advance = 0; break; + case OP_19_XOP_RFDI: + kvmppc_emul_rfdi(vcpu); + kvmppc_set_exit_type(vcpu, EMULATED_RFDI_EXITS); + *advance = 0; + break; + default: emulated = EMULATE_FAIL; break; @@ -118,6 +131,7 @@ int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val) { int emulated = EMULATE_DONE; + bool debug_inst = false; switch (sprn) { case SPRN_DEAR: @@ -132,14 +146,128 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val) case SPRN_CSRR1: vcpu->arch.csrr1 = spr_val; break; + case SPRN_DSRR0: + vcpu->arch.dsrr0 = spr_val; + break; + case SPRN_DSRR1: + vcpu->arch.dsrr1 = spr_val; + break; + case SPRN_IAC1: + /* + * If userspace is debugging guest then guest + * can not access debug registers. + */ + if (vcpu->guest_debug) + break; + + debug_inst = true; + vcpu->arch.dbg_reg.iac1 = spr_val; + break; + case SPRN_IAC2: + /* + * If userspace is debugging guest then guest + * can not access debug registers. + */ + if (vcpu->guest_debug) + break; + + debug_inst = true; + vcpu->arch.dbg_reg.iac2 = spr_val; + break; +#if CONFIG_PPC_ADV_DEBUG_IACS > 2 + case SPRN_IAC3: + /* + * If userspace is debugging guest then guest + * can not access debug registers. + */ + if (vcpu->guest_debug) + break; + + debug_inst = true; + vcpu->arch.dbg_reg.iac3 = spr_val; + break; + case SPRN_IAC4: + /* + * If userspace is debugging guest then guest + * can not access debug registers. + */ + if (vcpu->guest_debug) + break; + + debug_inst = true; + vcpu->arch.dbg_reg.iac4 = spr_val; + break; +#endif + case SPRN_DAC1: + /* + * If userspace is debugging guest then guest + * can not access debug registers. + */ + if (vcpu->guest_debug) + break; + + debug_inst = true; + vcpu->arch.dbg_reg.dac1 = spr_val; + break; + case SPRN_DAC2: + /* + * If userspace is debugging guest then guest + * can not access debug registers. + */ + if (vcpu->guest_debug) + break; + + debug_inst = true; + vcpu->arch.dbg_reg.dac2 = spr_val; + break; case SPRN_DBCR0: + /* + * If userspace is debugging guest then guest + * can not access debug registers. + */ + if (vcpu->guest_debug) + break; + + debug_inst = true; + spr_val &= (DBCR0_IDM | DBCR0_IC | DBCR0_BT | DBCR0_TIE | + DBCR0_IAC1 | DBCR0_IAC2 | DBCR0_IAC3 | DBCR0_IAC4 | + DBCR0_DAC1R | DBCR0_DAC1W | DBCR0_DAC2R | DBCR0_DAC2W); + vcpu->arch.dbg_reg.dbcr0 = spr_val; break; case SPRN_DBCR1: + /* + * If userspace is debugging guest then guest + * can not access debug registers. + */ + if (vcpu->guest_debug) + break; + + debug_inst = true; vcpu->arch.dbg_reg.dbcr1 = spr_val; break; + case SPRN_DBCR2: + /* + * If userspace is debugging guest then guest + * can not access debug registers. + */ + if (vcpu->guest_debug) + break; + + debug_inst = true; + vcpu->arch.dbg_reg.dbcr2 = spr_val; + break; case SPRN_DBSR: + /* + * If userspace is debugging guest then guest + * can not access debug registers. + */ + if (vcpu->guest_debug) + break; + vcpu->arch.dbsr &= ~spr_val; + if (!(vcpu->arch.dbsr & ~DBSR_IDE)) + kvmppc_core_dequeue_debug(vcpu); break; case SPRN_TSR: kvmppc_clr_tsr_bits(vcpu, spr_val); @@ -165,16 +293,16 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val) * guest (PR-mode only). */ case SPRN_SPRG4: - vcpu->arch.shared->sprg4 = spr_val; + kvmppc_set_sprg4(vcpu, spr_val); break; case SPRN_SPRG5: - vcpu->arch.shared->sprg5 = spr_val; + kvmppc_set_sprg5(vcpu, spr_val); break; case SPRN_SPRG6: - vcpu->arch.shared->sprg6 = spr_val; + kvmppc_set_sprg6(vcpu, spr_val); break; case SPRN_SPRG7: - vcpu->arch.shared->sprg7 = spr_val; + kvmppc_set_sprg7(vcpu, spr_val); break; case SPRN_IVPR: @@ -252,6 +380,10 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val) emulated = EMULATE_FAIL; } + if (debug_inst) { + current->thread.debug = vcpu->arch.dbg_reg; + switch_booke_debug_regs(&vcpu->arch.dbg_reg); + } return emulated; } @@ -278,12 +410,43 @@ int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val) case SPRN_CSRR1: *spr_val = vcpu->arch.csrr1; break; + case SPRN_DSRR0: + *spr_val = vcpu->arch.dsrr0; + break; + case SPRN_DSRR1: + *spr_val = vcpu->arch.dsrr1; + break; + case SPRN_IAC1: + *spr_val = vcpu->arch.dbg_reg.iac1; + break; + case SPRN_IAC2: + *spr_val = vcpu->arch.dbg_reg.iac2; + break; +#if CONFIG_PPC_ADV_DEBUG_IACS > 2 + case SPRN_IAC3: + *spr_val = vcpu->arch.dbg_reg.iac3; + break; + case SPRN_IAC4: + *spr_val = vcpu->arch.dbg_reg.iac4; + break; +#endif + case SPRN_DAC1: + *spr_val = vcpu->arch.dbg_reg.dac1; + break; + case SPRN_DAC2: + *spr_val = vcpu->arch.dbg_reg.dac2; + break; case SPRN_DBCR0: *spr_val = vcpu->arch.dbg_reg.dbcr0; + if (vcpu->guest_debug) + *spr_val = *spr_val | DBCR0_EDM; break; case SPRN_DBCR1: *spr_val = vcpu->arch.dbg_reg.dbcr1; break; + case SPRN_DBCR2: + *spr_val = vcpu->arch.dbg_reg.dbcr2; + break; case SPRN_DBSR: *spr_val = vcpu->arch.dbsr; break; diff --git a/arch/powerpc/kvm/booke_interrupts.S b/arch/powerpc/kvm/booke_interrupts.S index 2c6deb5ef2f..84c308a9a37 100644 --- a/arch/powerpc/kvm/booke_interrupts.S +++ b/arch/powerpc/kvm/booke_interrupts.S @@ -21,7 +21,6 @@ #include <asm/ppc_asm.h> #include <asm/kvm_asm.h> #include <asm/reg.h> -#include <asm/mmu-44x.h> #include <asm/page.h> #include <asm/asm-offsets.h> @@ -424,10 +423,6 @@ lightweight_exit: mtspr SPRN_PID1, r3 #endif -#ifdef CONFIG_44x - iccci 0, 0 /* XXX hack */ -#endif - /* Load some guest volatiles. */ lwz r0, VCPU_GPR(R0)(r4) lwz r2, VCPU_GPR(R2)(r4) diff --git a/arch/powerpc/kvm/bookehv_interrupts.S b/arch/powerpc/kvm/bookehv_interrupts.S index a1712b818a5..81bd8a07aa5 100644 --- a/arch/powerpc/kvm/bookehv_interrupts.S +++ b/arch/powerpc/kvm/bookehv_interrupts.S @@ -24,12 +24,10 @@ #include <asm/ppc_asm.h> #include <asm/kvm_asm.h> #include <asm/reg.h> -#include <asm/mmu-44x.h> #include <asm/page.h> #include <asm/asm-compat.h> #include <asm/asm-offsets.h> #include <asm/bitsperlong.h> -#include <asm/thread_info.h> #ifdef CONFIG_64BIT #include <asm/exception-64e.h> @@ -122,38 +120,14 @@ 1: .if \flags & NEED_EMU - /* - * This assumes you have external PID support. - * To support a bookehv CPU without external PID, you'll - * need to look up the TLB entry and create a temporary mapping. - * - * FIXME: we don't currently handle if the lwepx faults. PR-mode - * booke doesn't handle it either. Since Linux doesn't use - * broadcast tlbivax anymore, the only way this should happen is - * if the guest maps its memory execute-but-not-read, or if we - * somehow take a TLB miss in the middle of this entry code and - * evict the relevant entry. On e500mc, all kernel lowmem is - * bolted into TLB1 large page mappings, and we don't use - * broadcast invalidates, so we should not take a TLB miss here. - * - * Later we'll need to deal with faults here. Disallowing guest - * mappings that are execute-but-not-read could be an option on - * e500mc, but not on chips with an LRAT if it is used. - */ - - mfspr r3, SPRN_EPLC /* will already have correct ELPID and EGS */ PPC_STL r15, VCPU_GPR(R15)(r4) PPC_STL r16, VCPU_GPR(R16)(r4) PPC_STL r17, VCPU_GPR(R17)(r4) PPC_STL r18, VCPU_GPR(R18)(r4) PPC_STL r19, VCPU_GPR(R19)(r4) - mr r8, r3 PPC_STL r20, VCPU_GPR(R20)(r4) - rlwimi r8, r6, EPC_EAS_SHIFT - MSR_IR_LG, EPC_EAS PPC_STL r21, VCPU_GPR(R21)(r4) - rlwimi r8, r6, EPC_EPR_SHIFT - MSR_PR_LG, EPC_EPR PPC_STL r22, VCPU_GPR(R22)(r4) - rlwimi r8, r10, EPC_EPID_SHIFT, EPC_EPID PPC_STL r23, VCPU_GPR(R23)(r4) PPC_STL r24, VCPU_GPR(R24)(r4) PPC_STL r25, VCPU_GPR(R25)(r4) @@ -163,33 +137,15 @@ PPC_STL r29, VCPU_GPR(R29)(r4) PPC_STL r30, VCPU_GPR(R30)(r4) PPC_STL r31, VCPU_GPR(R31)(r4) - mtspr SPRN_EPLC, r8 - - /* disable preemption, so we are sure we hit the fixup handler */ - CURRENT_THREAD_INFO(r8, r1) - li r7, 1 - stw r7, TI_PREEMPT(r8) - - isync /* - * In case the read goes wrong, we catch it and write an invalid value - * in LAST_INST instead. + * We don't use external PID support. lwepx faults would need to be + * handled by KVM and this implies aditional code in DO_KVM (for + * DTB_MISS, DSI and LRAT) to check ESR[EPID] and EPLC[EGS] which + * is too intrusive for the host. Get last instuction in + * kvmppc_get_last_inst(). */ -1: lwepx r9, 0, r5 -2: -.section .fixup, "ax" -3: li r9, KVM_INST_FETCH_FAILED - b 2b -.previous -.section __ex_table,"a" - PPC_LONG_ALIGN - PPC_LONG 1b,3b -.previous - - mtspr SPRN_EPLC, r3 - li r7, 0 - stw r7, TI_PREEMPT(r8) + li r9, KVM_INST_FETCH_FAILED stw r9, VCPU_LAST_INST(r4) .endif @@ -282,7 +238,7 @@ kvm_handler BOOKE_INTERRUPT_EXTERNAL, EX_PARAMS(GEN), \ kvm_handler BOOKE_INTERRUPT_ALIGNMENT, EX_PARAMS(GEN), \ SPRN_SRR0, SPRN_SRR1,(NEED_DEAR | NEED_ESR) kvm_handler BOOKE_INTERRUPT_PROGRAM, EX_PARAMS(GEN), \ - SPRN_SRR0, SPRN_SRR1,NEED_ESR + SPRN_SRR0, SPRN_SRR1, (NEED_ESR | NEED_EMU) kvm_handler BOOKE_INTERRUPT_FP_UNAVAIL, EX_PARAMS(GEN), \ SPRN_SRR0, SPRN_SRR1, 0 kvm_handler BOOKE_INTERRUPT_AP_UNAVAIL, EX_PARAMS(GEN), \ @@ -300,11 +256,9 @@ kvm_handler BOOKE_INTERRUPT_DTLB_MISS, EX_PARAMS_TLB, \ SPRN_SRR0, SPRN_SRR1, (NEED_EMU | NEED_DEAR | NEED_ESR) kvm_handler BOOKE_INTERRUPT_ITLB_MISS, EX_PARAMS_TLB, \ SPRN_SRR0, SPRN_SRR1, 0 -kvm_handler BOOKE_INTERRUPT_SPE_UNAVAIL, EX_PARAMS(GEN), \ - SPRN_SRR0, SPRN_SRR1, 0 -kvm_handler BOOKE_INTERRUPT_SPE_FP_DATA, EX_PARAMS(GEN), \ +kvm_handler BOOKE_INTERRUPT_ALTIVEC_UNAVAIL, EX_PARAMS(GEN), \ SPRN_SRR0, SPRN_SRR1, 0 -kvm_handler BOOKE_INTERRUPT_SPE_FP_ROUND, EX_PARAMS(GEN), \ +kvm_handler BOOKE_INTERRUPT_ALTIVEC_ASSIST, EX_PARAMS(GEN), \ SPRN_SRR0, SPRN_SRR1, 0 kvm_handler BOOKE_INTERRUPT_PERFORMANCE_MONITOR, EX_PARAMS(GEN), \ SPRN_SRR0, SPRN_SRR1, 0 @@ -394,7 +348,7 @@ kvm_handler BOOKE_INTERRUPT_INST_STORAGE, SPRN_SRR0, SPRN_SRR1, NEED_ESR kvm_handler BOOKE_INTERRUPT_EXTERNAL, SPRN_SRR0, SPRN_SRR1, 0 kvm_handler BOOKE_INTERRUPT_ALIGNMENT, \ SPRN_SRR0, SPRN_SRR1, (NEED_DEAR | NEED_ESR) -kvm_handler BOOKE_INTERRUPT_PROGRAM, SPRN_SRR0, SPRN_SRR1, NEED_ESR +kvm_handler BOOKE_INTERRUPT_PROGRAM, SPRN_SRR0, SPRN_SRR1, (NEED_ESR | NEED_EMU) kvm_handler BOOKE_INTERRUPT_FP_UNAVAIL, SPRN_SRR0, SPRN_SRR1, 0 kvm_handler BOOKE_INTERRUPT_SYSCALL, SPRN_SRR0, SPRN_SRR1, 0 kvm_handler BOOKE_INTERRUPT_AP_UNAVAIL, SPRN_SRR0, SPRN_SRR1, 0 @@ -405,9 +359,6 @@ kvm_lvl_handler BOOKE_INTERRUPT_WATCHDOG, \ kvm_handler BOOKE_INTERRUPT_DTLB_MISS, \ SPRN_SRR0, SPRN_SRR1, (NEED_EMU | NEED_DEAR | NEED_ESR) kvm_handler BOOKE_INTERRUPT_ITLB_MISS, SPRN_SRR0, SPRN_SRR1, 0 -kvm_handler BOOKE_INTERRUPT_SPE_UNAVAIL, SPRN_SRR0, SPRN_SRR1, 0 -kvm_handler BOOKE_INTERRUPT_SPE_FP_DATA, SPRN_SRR0, SPRN_SRR1, 0 -kvm_handler BOOKE_INTERRUPT_SPE_FP_ROUND, SPRN_SRR0, SPRN_SRR1, 0 kvm_handler BOOKE_INTERRUPT_PERFORMANCE_MONITOR, SPRN_SRR0, SPRN_SRR1, 0 kvm_handler BOOKE_INTERRUPT_DOORBELL, SPRN_SRR0, SPRN_SRR1, 0 kvm_lvl_handler BOOKE_INTERRUPT_DOORBELL_CRITICAL, \ @@ -441,6 +392,7 @@ _GLOBAL(kvmppc_resume_host) #ifdef CONFIG_64BIT PPC_LL r3, PACA_SPRG_VDSO(r13) #endif + mfspr r5, SPRN_SPRG9 PPC_STD(r6, VCPU_SHARED_SPRG4, r11) mfspr r8, SPRN_SPRG6 PPC_STD(r7, VCPU_SHARED_SPRG5, r11) @@ -448,6 +400,7 @@ _GLOBAL(kvmppc_resume_host) #ifdef CONFIG_64BIT mtspr SPRN_SPRG_VDSO_WRITE, r3 #endif + PPC_STD(r5, VCPU_SPRG9, r4) PPC_STD(r8, VCPU_SHARED_SPRG6, r11) mfxer r3 PPC_STD(r9, VCPU_SHARED_SPRG7, r11) @@ -682,7 +635,9 @@ lightweight_exit: mtspr SPRN_SPRG5W, r6 PPC_LD(r8, VCPU_SHARED_SPRG7, r11) mtspr SPRN_SPRG6W, r7 + PPC_LD(r5, VCPU_SPRG9, r4) mtspr SPRN_SPRG7W, r8 + mtspr SPRN_SPRG9, r5 /* Load some guest volatiles. */ PPC_LL r3, VCPU_LR(r4) diff --git a/arch/powerpc/kvm/e500.h b/arch/powerpc/kvm/e500.h index a326178bdea..72920bed3ac 100644 --- a/arch/powerpc/kvm/e500.h +++ b/arch/powerpc/kvm/e500.h @@ -22,6 +22,7 @@ #include <linux/kvm_host.h> #include <asm/mmu-book3e.h> #include <asm/tlb.h> +#include <asm/cputhreads.h> enum vcpu_ftr { VCPU_FTR_MMU_V2 @@ -289,6 +290,25 @@ void kvmppc_e500_tlbil_all(struct kvmppc_vcpu_e500 *vcpu_e500); #define kvmppc_e500_get_tlb_stid(vcpu, gtlbe) get_tlb_tid(gtlbe) #define get_tlbmiss_tid(vcpu) get_cur_pid(vcpu) #define get_tlb_sts(gtlbe) (gtlbe->mas1 & MAS1_TS) + +/* + * These functions should be called with preemption disabled + * and the returned value is valid only in that context + */ +static inline int get_thread_specific_lpid(int vm_lpid) +{ + int vcpu_lpid = vm_lpid; + + if (threads_per_core == 2) + vcpu_lpid |= smp_processor_id() & 1; + + return vcpu_lpid; +} + +static inline int get_lpid(struct kvm_vcpu *vcpu) +{ + return get_thread_specific_lpid(vcpu->kvm->arch.lpid); +} #else unsigned int kvmppc_e500_get_tlb_stid(struct kvm_vcpu *vcpu, struct kvm_book3e_206_tlb_entry *gtlbe); diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c index 002d5176414..ce7291c79f6 100644 --- a/arch/powerpc/kvm/e500_emulate.c +++ b/arch/powerpc/kvm/e500_emulate.c @@ -250,7 +250,16 @@ int kvmppc_core_emulate_mtspr_e500(struct kvm_vcpu *vcpu, int sprn, ulong spr_va spr_val); break; + case SPRN_PWRMGTCR0: + /* + * Guest relies on host power management configurations + * Treat the request as a general store + */ + vcpu->arch.pwrmgtcr0 = spr_val; + break; + /* extra exceptions */ +#ifdef CONFIG_SPE_POSSIBLE case SPRN_IVOR32: vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL] = spr_val; break; @@ -260,6 +269,15 @@ int kvmppc_core_emulate_mtspr_e500(struct kvm_vcpu *vcpu, int sprn, ulong spr_va case SPRN_IVOR34: vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND] = spr_val; break; +#endif +#ifdef CONFIG_ALTIVEC + case SPRN_IVOR32: + vcpu->arch.ivor[BOOKE_IRQPRIO_ALTIVEC_UNAVAIL] = spr_val; + break; + case SPRN_IVOR33: + vcpu->arch.ivor[BOOKE_IRQPRIO_ALTIVEC_ASSIST] = spr_val; + break; +#endif case SPRN_IVOR35: vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR] = spr_val; break; @@ -368,7 +386,12 @@ int kvmppc_core_emulate_mfspr_e500(struct kvm_vcpu *vcpu, int sprn, ulong *spr_v *spr_val = vcpu->arch.eptcfg; break; + case SPRN_PWRMGTCR0: + *spr_val = vcpu->arch.pwrmgtcr0; + break; + /* extra exceptions */ +#ifdef CONFIG_SPE_POSSIBLE case SPRN_IVOR32: *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL]; break; @@ -378,6 +401,15 @@ int kvmppc_core_emulate_mfspr_e500(struct kvm_vcpu *vcpu, int sprn, ulong *spr_v case SPRN_IVOR34: *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND]; break; +#endif +#ifdef CONFIG_ALTIVEC + case SPRN_IVOR32: + *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_ALTIVEC_UNAVAIL]; + break; + case SPRN_IVOR33: + *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_ALTIVEC_ASSIST]; + break; +#endif case SPRN_IVOR35: *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR]; break; diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c index 86903d3f5a0..769778f855b 100644 --- a/arch/powerpc/kvm/e500_mmu_host.c +++ b/arch/powerpc/kvm/e500_mmu_host.c @@ -69,7 +69,8 @@ static inline u32 e500_shadow_mas3_attrib(u32 mas3, int usermode) * writing shadow tlb entry to host TLB */ static inline void __write_host_tlbe(struct kvm_book3e_206_tlb_entry *stlbe, - uint32_t mas0) + uint32_t mas0, + uint32_t lpid) { unsigned long flags; @@ -80,7 +81,7 @@ static inline void __write_host_tlbe(struct kvm_book3e_206_tlb_entry *stlbe, mtspr(SPRN_MAS3, (u32)stlbe->mas7_3); mtspr(SPRN_MAS7, (u32)(stlbe->mas7_3 >> 32)); #ifdef CONFIG_KVM_BOOKE_HV - mtspr(SPRN_MAS8, stlbe->mas8); + mtspr(SPRN_MAS8, MAS8_TGS | get_thread_specific_lpid(lpid)); #endif asm volatile("isync; tlbwe" : : : "memory"); @@ -107,11 +108,15 @@ static u32 get_host_mas0(unsigned long eaddr) { unsigned long flags; u32 mas0; + u32 mas4; local_irq_save(flags); mtspr(SPRN_MAS6, 0); + mas4 = mfspr(SPRN_MAS4); + mtspr(SPRN_MAS4, mas4 & ~MAS4_TLBSEL_MASK); asm volatile("tlbsx 0, %0" : : "b" (eaddr & ~CONFIG_PAGE_OFFSET)); mas0 = mfspr(SPRN_MAS0); + mtspr(SPRN_MAS4, mas4); local_irq_restore(flags); return mas0; @@ -125,11 +130,12 @@ static inline void write_host_tlbe(struct kvmppc_vcpu_e500 *vcpu_e500, if (tlbsel == 0) { mas0 = get_host_mas0(stlbe->mas2); - __write_host_tlbe(stlbe, mas0); + __write_host_tlbe(stlbe, mas0, vcpu_e500->vcpu.kvm->arch.lpid); } else { __write_host_tlbe(stlbe, MAS0_TLBSEL(1) | - MAS0_ESEL(to_htlb1_esel(sesel))); + MAS0_ESEL(to_htlb1_esel(sesel)), + vcpu_e500->vcpu.kvm->arch.lpid); } } @@ -172,7 +178,7 @@ void kvmppc_map_magic(struct kvm_vcpu *vcpu) MAS3_SW | MAS3_SR | MAS3_UW | MAS3_UR; magic.mas8 = 0; - __write_host_tlbe(&magic, MAS0_TLBSEL(1) | MAS0_ESEL(tlbcam_index)); + __write_host_tlbe(&magic, MAS0_TLBSEL(1) | MAS0_ESEL(tlbcam_index), 0); preempt_enable(); } #endif @@ -313,10 +319,6 @@ static void kvmppc_e500_setup_stlbe( stlbe->mas2 = (gvaddr & MAS2_EPN) | (ref->flags & E500_TLB_MAS2_ATTR); stlbe->mas7_3 = ((u64)pfn << PAGE_SHIFT) | e500_shadow_mas3_attrib(gtlbe->mas7_3, pr); - -#ifdef CONFIG_KVM_BOOKE_HV - stlbe->mas8 = MAS8_TGS | vcpu->kvm->arch.lpid; -#endif } static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, @@ -607,6 +609,104 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr, } } +#ifdef CONFIG_KVM_BOOKE_HV +int kvmppc_load_last_inst(struct kvm_vcpu *vcpu, enum instruction_type type, + u32 *instr) +{ + gva_t geaddr; + hpa_t addr; + hfn_t pfn; + hva_t eaddr; + u32 mas1, mas2, mas3; + u64 mas7_mas3; + struct page *page; + unsigned int addr_space, psize_shift; + bool pr; + unsigned long flags; + + /* Search TLB for guest pc to get the real address */ + geaddr = kvmppc_get_pc(vcpu); + + addr_space = (vcpu->arch.shared->msr & MSR_IS) >> MSR_IR_LG; + + local_irq_save(flags); + mtspr(SPRN_MAS6, (vcpu->arch.pid << MAS6_SPID_SHIFT) | addr_space); + mtspr(SPRN_MAS5, MAS5_SGS | get_lpid(vcpu)); + asm volatile("tlbsx 0, %[geaddr]\n" : : + [geaddr] "r" (geaddr)); + mtspr(SPRN_MAS5, 0); + mtspr(SPRN_MAS8, 0); + mas1 = mfspr(SPRN_MAS1); + mas2 = mfspr(SPRN_MAS2); + mas3 = mfspr(SPRN_MAS3); +#ifdef CONFIG_64BIT + mas7_mas3 = mfspr(SPRN_MAS7_MAS3); +#else + mas7_mas3 = ((u64)mfspr(SPRN_MAS7) << 32) | mas3; +#endif + local_irq_restore(flags); + + /* + * If the TLB entry for guest pc was evicted, return to the guest. + * There are high chances to find a valid TLB entry next time. + */ + if (!(mas1 & MAS1_VALID)) + return EMULATE_AGAIN; + + /* + * Another thread may rewrite the TLB entry in parallel, don't + * execute from the address if the execute permission is not set + */ + pr = vcpu->arch.shared->msr & MSR_PR; + if (unlikely((pr && !(mas3 & MAS3_UX)) || + (!pr && !(mas3 & MAS3_SX)))) { + pr_err_ratelimited( + "%s: Instuction emulation from guest addres %08lx without execute permission\n", + __func__, geaddr); + return EMULATE_AGAIN; + } + + /* + * The real address will be mapped by a cacheable, memory coherent, + * write-back page. Check for mismatches when LRAT is used. + */ + if (has_feature(vcpu, VCPU_FTR_MMU_V2) && + unlikely((mas2 & MAS2_I) || (mas2 & MAS2_W) || !(mas2 & MAS2_M))) { + pr_err_ratelimited( + "%s: Instuction emulation from guest addres %08lx mismatches storage attributes\n", + __func__, geaddr); + return EMULATE_AGAIN; + } + + /* Get pfn */ + psize_shift = MAS1_GET_TSIZE(mas1) + 10; + addr = (mas7_mas3 & (~0ULL << psize_shift)) | + (geaddr & ((1ULL << psize_shift) - 1ULL)); + pfn = addr >> PAGE_SHIFT; + + /* Guard against emulation from devices area */ + if (unlikely(!page_is_ram(pfn))) { + pr_err_ratelimited("%s: Instruction emulation from non-RAM host addres %08llx is not supported\n", + __func__, addr); + return EMULATE_AGAIN; + } + + /* Map a page and get guest's instruction */ + page = pfn_to_page(pfn); + eaddr = (unsigned long)kmap_atomic(page); + *instr = *(u32 *)(eaddr | (unsigned long)(addr & ~PAGE_MASK)); + kunmap_atomic((u32 *)eaddr); + + return EMULATE_DONE; +} +#else +int kvmppc_load_last_inst(struct kvm_vcpu *vcpu, enum instruction_type type, + u32 *instr) +{ + return EMULATE_AGAIN; +} +#endif + /************* MMU Notifiers *************/ int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) @@ -630,7 +730,7 @@ int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end) return 0; } -int kvm_age_hva(struct kvm *kvm, unsigned long hva) +int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end) { /* XXX could be more clever ;) */ return 0; diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c index 17e45627922..2fdc8722e32 100644 --- a/arch/powerpc/kvm/e500mc.c +++ b/arch/powerpc/kvm/e500mc.c @@ -48,10 +48,11 @@ void kvmppc_set_pending_interrupt(struct kvm_vcpu *vcpu, enum int_class type) return; } - - tag = PPC_DBELL_LPID(vcpu->kvm->arch.lpid) | vcpu->vcpu_id; + preempt_disable(); + tag = PPC_DBELL_LPID(get_lpid(vcpu)) | vcpu->vcpu_id; mb(); ppc_msgsnd(dbell_type, 0, tag); + preempt_enable(); } /* gtlbe must not be mapped by more than one host tlb entry */ @@ -60,12 +61,11 @@ void kvmppc_e500_tlbil_one(struct kvmppc_vcpu_e500 *vcpu_e500, { unsigned int tid, ts; gva_t eaddr; - u32 val, lpid; + u32 val; unsigned long flags; ts = get_tlb_ts(gtlbe); tid = get_tlb_tid(gtlbe); - lpid = vcpu_e500->vcpu.kvm->arch.lpid; /* We search the host TLB to invalidate its shadow TLB entry */ val = (tid << 16) | ts; @@ -74,7 +74,7 @@ void kvmppc_e500_tlbil_one(struct kvmppc_vcpu_e500 *vcpu_e500, local_irq_save(flags); mtspr(SPRN_MAS6, val); - mtspr(SPRN_MAS5, MAS5_SGS | lpid); + mtspr(SPRN_MAS5, MAS5_SGS | get_lpid(&vcpu_e500->vcpu)); asm volatile("tlbsx 0, %[eaddr]\n" : : [eaddr] "r" (eaddr)); val = mfspr(SPRN_MAS1); @@ -95,7 +95,7 @@ void kvmppc_e500_tlbil_all(struct kvmppc_vcpu_e500 *vcpu_e500) unsigned long flags; local_irq_save(flags); - mtspr(SPRN_MAS5, MAS5_SGS | vcpu_e500->vcpu.kvm->arch.lpid); + mtspr(SPRN_MAS5, MAS5_SGS | get_lpid(&vcpu_e500->vcpu)); asm volatile("tlbilxlpid"); mtspr(SPRN_MAS5, 0); local_irq_restore(flags); @@ -110,7 +110,8 @@ void kvmppc_mmu_msr_notify(struct kvm_vcpu *vcpu, u32 old_msr) { } -static DEFINE_PER_CPU(struct kvm_vcpu *, last_vcpu_on_cpu); +/* We use two lpids per VM */ +static DEFINE_PER_CPU(struct kvm_vcpu *[KVMPPC_NR_LPIDS], last_vcpu_of_lpid); static void kvmppc_core_vcpu_load_e500mc(struct kvm_vcpu *vcpu, int cpu) { @@ -118,10 +119,12 @@ static void kvmppc_core_vcpu_load_e500mc(struct kvm_vcpu *vcpu, int cpu) kvmppc_booke_vcpu_load(vcpu, cpu); - mtspr(SPRN_LPID, vcpu->kvm->arch.lpid); + mtspr(SPRN_LPID, get_lpid(vcpu)); mtspr(SPRN_EPCR, vcpu->arch.shadow_epcr); mtspr(SPRN_GPIR, vcpu->vcpu_id); mtspr(SPRN_MSRP, vcpu->arch.shadow_msrp); + vcpu->arch.eplc = EPC_EGS | (get_lpid(vcpu) << EPC_ELPID_SHIFT); + vcpu->arch.epsc = vcpu->arch.eplc; mtspr(SPRN_EPLC, vcpu->arch.eplc); mtspr(SPRN_EPSC, vcpu->arch.epsc); @@ -141,12 +144,10 @@ static void kvmppc_core_vcpu_load_e500mc(struct kvm_vcpu *vcpu, int cpu) mtspr(SPRN_GESR, vcpu->arch.shared->esr); if (vcpu->arch.oldpir != mfspr(SPRN_PIR) || - __get_cpu_var(last_vcpu_on_cpu) != vcpu) { + __get_cpu_var(last_vcpu_of_lpid)[get_lpid(vcpu)] != vcpu) { kvmppc_e500_tlbil_all(vcpu_e500); - __get_cpu_var(last_vcpu_on_cpu) = vcpu; + __get_cpu_var(last_vcpu_of_lpid)[get_lpid(vcpu)] = vcpu; } - - kvmppc_load_guest_fp(vcpu); } static void kvmppc_core_vcpu_put_e500mc(struct kvm_vcpu *vcpu) @@ -179,6 +180,16 @@ int kvmppc_core_check_processor_compat(void) r = 0; else if (strcmp(cur_cpu_spec->cpu_name, "e5500") == 0) r = 0; +#ifdef CONFIG_ALTIVEC + /* + * Since guests have the priviledge to enable AltiVec, we need AltiVec + * support in the host to save/restore their context. + * Don't use CPU_FTR_ALTIVEC to identify cores with AltiVec unit + * because it's cleared in the absence of CONFIG_ALTIVEC! + */ + else if (strcmp(cur_cpu_spec->cpu_name, "e6500") == 0) + r = 0; +#endif else r = -ENOTSUPP; @@ -194,9 +205,7 @@ int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu) #ifdef CONFIG_64BIT vcpu->arch.shadow_epcr |= SPRN_EPCR_ICM; #endif - vcpu->arch.shadow_msrp = MSRP_UCLEP | MSRP_DEP | MSRP_PMMP; - vcpu->arch.eplc = EPC_EGS | (vcpu->kvm->arch.lpid << EPC_ELPID_SHIFT); - vcpu->arch.epsc = vcpu->arch.eplc; + vcpu->arch.shadow_msrp = MSRP_UCLEP | MSRP_PMMP; vcpu->arch.pvr = mfspr(SPRN_PVR); vcpu_e500->svr = mfspr(SPRN_SVR); @@ -267,14 +276,32 @@ static int kvmppc_core_set_sregs_e500mc(struct kvm_vcpu *vcpu, static int kvmppc_get_one_reg_e500mc(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val) { - int r = kvmppc_get_one_reg_e500_tlb(vcpu, id, val); + int r = 0; + + switch (id) { + case KVM_REG_PPC_SPRG9: + *val = get_reg_val(id, vcpu->arch.sprg9); + break; + default: + r = kvmppc_get_one_reg_e500_tlb(vcpu, id, val); + } + return r; } static int kvmppc_set_one_reg_e500mc(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val) { - int r = kvmppc_set_one_reg_e500_tlb(vcpu, id, val); + int r = 0; + + switch (id) { + case KVM_REG_PPC_SPRG9: + vcpu->arch.sprg9 = set_reg_val(id, *val); + break; + default: + r = kvmppc_set_one_reg_e500_tlb(vcpu, id, val); + } + return r; } @@ -338,13 +365,26 @@ static int kvmppc_core_init_vm_e500mc(struct kvm *kvm) if (lpid < 0) return lpid; + /* + * Use two lpids per VM on cores with two threads like e6500. Use + * even numbers to speedup vcpu lpid computation with consecutive lpids + * per VM. vm1 will use lpids 2 and 3, vm2 lpids 4 and 5, and so on. + */ + if (threads_per_core == 2) + lpid <<= 1; + kvm->arch.lpid = lpid; return 0; } static void kvmppc_core_destroy_vm_e500mc(struct kvm *kvm) { - kvmppc_free_lpid(kvm->arch.lpid); + int lpid = kvm->arch.lpid; + + if (threads_per_core == 2) + lpid >>= 1; + + kvmppc_free_lpid(lpid); } static struct kvmppc_ops kvm_ops_e500mc = { @@ -372,7 +412,13 @@ static int __init kvmppc_e500mc_init(void) if (r) goto err_out; - kvmppc_init_lpid(64); + /* + * Use two lpids per VM on dual threaded processors like e6500 + * to workarround the lack of tlb write conditional instruction. + * Expose half the number of available hardware lpids to the lpid + * allocator. + */ + kvmppc_init_lpid(KVMPPC_NR_LPIDS/threads_per_core); kvmppc_claim_lpid(0); /* host */ r = kvm_init(NULL, sizeof(struct kvmppc_vcpu_e500), 0, THIS_MODULE); diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c index da86d9ba347..5cc2e7af3a7 100644 --- a/arch/powerpc/kvm/emulate.c +++ b/arch/powerpc/kvm/emulate.c @@ -207,36 +207,28 @@ static int kvmppc_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) return emulated; } -/* XXX to do: - * lhax - * lhaux - * lswx - * lswi - * stswx - * stswi - * lha - * lhau - * lmw - * stmw - * - */ /* XXX Should probably auto-generate instruction decoding for a particular core * from opcode tables in the future. */ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) { - u32 inst = kvmppc_get_last_inst(vcpu); - int ra = get_ra(inst); - int rs = get_rs(inst); - int rt = get_rt(inst); - int sprn = get_sprn(inst); - enum emulation_result emulated = EMULATE_DONE; + u32 inst; + int rs, rt, sprn; + enum emulation_result emulated; int advance = 1; /* this default type might be overwritten by subcategories */ kvmppc_set_exit_type(vcpu, EMULATED_INST_EXITS); + emulated = kvmppc_get_last_inst(vcpu, INST_GENERIC, &inst); + if (emulated != EMULATE_DONE) + return emulated; + pr_debug("Emulating opcode %d / %d\n", get_op(inst), get_xop(inst)); + rs = get_rs(inst); + rt = get_rt(inst); + sprn = get_sprn(inst); + switch (get_op(inst)) { case OP_TRAP: #ifdef CONFIG_PPC_BOOK3S @@ -264,198 +256,37 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) #endif advance = 0; break; - case OP_31_XOP_LWZX: - emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1); - break; - - case OP_31_XOP_LBZX: - emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1); - break; - - case OP_31_XOP_LBZUX: - emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1); - kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); - break; - - case OP_31_XOP_STWX: - emulated = kvmppc_handle_store(run, vcpu, - kvmppc_get_gpr(vcpu, rs), - 4, 1); - break; - - case OP_31_XOP_STBX: - emulated = kvmppc_handle_store(run, vcpu, - kvmppc_get_gpr(vcpu, rs), - 1, 1); - break; - - case OP_31_XOP_STBUX: - emulated = kvmppc_handle_store(run, vcpu, - kvmppc_get_gpr(vcpu, rs), - 1, 1); - kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); - break; - - case OP_31_XOP_LHAX: - emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1); - break; - - case OP_31_XOP_LHZX: - emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1); - break; - - case OP_31_XOP_LHZUX: - emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1); - kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); - break; case OP_31_XOP_MFSPR: emulated = kvmppc_emulate_mfspr(vcpu, sprn, rt); break; - case OP_31_XOP_STHX: - emulated = kvmppc_handle_store(run, vcpu, - kvmppc_get_gpr(vcpu, rs), - 2, 1); - break; - - case OP_31_XOP_STHUX: - emulated = kvmppc_handle_store(run, vcpu, - kvmppc_get_gpr(vcpu, rs), - 2, 1); - kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); - break; - case OP_31_XOP_MTSPR: emulated = kvmppc_emulate_mtspr(vcpu, sprn, rs); break; - case OP_31_XOP_DCBST: - case OP_31_XOP_DCBF: - case OP_31_XOP_DCBI: - /* Do nothing. The guest is performing dcbi because - * hardware DMA is not snooped by the dcache, but - * emulated DMA either goes through the dcache as - * normal writes, or the host kernel has handled dcache - * coherence. */ - break; - - case OP_31_XOP_LWBRX: - emulated = kvmppc_handle_load(run, vcpu, rt, 4, 0); - break; - case OP_31_XOP_TLBSYNC: break; - case OP_31_XOP_STWBRX: - emulated = kvmppc_handle_store(run, vcpu, - kvmppc_get_gpr(vcpu, rs), - 4, 0); - break; - - case OP_31_XOP_LHBRX: - emulated = kvmppc_handle_load(run, vcpu, rt, 2, 0); - break; - - case OP_31_XOP_STHBRX: - emulated = kvmppc_handle_store(run, vcpu, - kvmppc_get_gpr(vcpu, rs), - 2, 0); - break; - default: /* Attempt core-specific emulation below. */ emulated = EMULATE_FAIL; } break; - case OP_LWZ: - emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1); - break; - - /* TBD: Add support for other 64 bit load variants like ldu, ldux, ldx etc. */ - case OP_LD: - rt = get_rt(inst); - emulated = kvmppc_handle_load(run, vcpu, rt, 8, 1); - break; - - case OP_LWZU: - emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1); - kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); - break; - - case OP_LBZ: - emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1); - break; - - case OP_LBZU: - emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1); - kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); - break; - - case OP_STW: - emulated = kvmppc_handle_store(run, vcpu, - kvmppc_get_gpr(vcpu, rs), - 4, 1); - break; - - /* TBD: Add support for other 64 bit store variants like stdu, stdux, stdx etc. */ - case OP_STD: - rs = get_rs(inst); - emulated = kvmppc_handle_store(run, vcpu, - kvmppc_get_gpr(vcpu, rs), - 8, 1); - break; - - case OP_STWU: - emulated = kvmppc_handle_store(run, vcpu, - kvmppc_get_gpr(vcpu, rs), - 4, 1); - kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); - break; - - case OP_STB: - emulated = kvmppc_handle_store(run, vcpu, - kvmppc_get_gpr(vcpu, rs), - 1, 1); - break; - - case OP_STBU: - emulated = kvmppc_handle_store(run, vcpu, - kvmppc_get_gpr(vcpu, rs), - 1, 1); - kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); - break; - - case OP_LHZ: - emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1); - break; - - case OP_LHZU: - emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1); - kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); - break; - - case OP_LHA: - emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1); - break; - - case OP_LHAU: - emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1); - kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); - break; - - case OP_STH: - emulated = kvmppc_handle_store(run, vcpu, - kvmppc_get_gpr(vcpu, rs), - 2, 1); - break; + case 0: + /* + * Instruction with primary opcode 0. Based on PowerISA + * these are illegal instructions. + */ + if (inst == KVMPPC_INST_SW_BREAKPOINT) { + run->exit_reason = KVM_EXIT_DEBUG; + run->debug.arch.address = kvmppc_get_pc(vcpu); + emulated = EMULATE_EXIT_USER; + advance = 0; + } else + emulated = EMULATE_FAIL; - case OP_STHU: - emulated = kvmppc_handle_store(run, vcpu, - kvmppc_get_gpr(vcpu, rs), - 2, 1); - kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); break; default: diff --git a/arch/powerpc/kvm/emulate_loadstore.c b/arch/powerpc/kvm/emulate_loadstore.c new file mode 100644 index 00000000000..6d3c0ee1d74 --- /dev/null +++ b/arch/powerpc/kvm/emulate_loadstore.c @@ -0,0 +1,272 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright IBM Corp. 2007 + * Copyright 2011 Freescale Semiconductor, Inc. + * + * Authors: Hollis Blanchard <hollisb@us.ibm.com> + */ + +#include <linux/jiffies.h> +#include <linux/hrtimer.h> +#include <linux/types.h> +#include <linux/string.h> +#include <linux/kvm_host.h> +#include <linux/clockchips.h> + +#include <asm/reg.h> +#include <asm/time.h> +#include <asm/byteorder.h> +#include <asm/kvm_ppc.h> +#include <asm/disassemble.h> +#include <asm/ppc-opcode.h> +#include "timing.h" +#include "trace.h" + +/* XXX to do: + * lhax + * lhaux + * lswx + * lswi + * stswx + * stswi + * lha + * lhau + * lmw + * stmw + * + */ +int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) +{ + struct kvm_run *run = vcpu->run; + u32 inst; + int ra, rs, rt; + enum emulation_result emulated; + int advance = 1; + + /* this default type might be overwritten by subcategories */ + kvmppc_set_exit_type(vcpu, EMULATED_INST_EXITS); + + emulated = kvmppc_get_last_inst(vcpu, INST_GENERIC, &inst); + if (emulated != EMULATE_DONE) + return emulated; + + ra = get_ra(inst); + rs = get_rs(inst); + rt = get_rt(inst); + + switch (get_op(inst)) { + case 31: + switch (get_xop(inst)) { + case OP_31_XOP_LWZX: + emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1); + break; + + case OP_31_XOP_LBZX: + emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1); + break; + + case OP_31_XOP_LBZUX: + emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1); + kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); + break; + + case OP_31_XOP_STWX: + emulated = kvmppc_handle_store(run, vcpu, + kvmppc_get_gpr(vcpu, rs), + 4, 1); + break; + + case OP_31_XOP_STBX: + emulated = kvmppc_handle_store(run, vcpu, + kvmppc_get_gpr(vcpu, rs), + 1, 1); + break; + + case OP_31_XOP_STBUX: + emulated = kvmppc_handle_store(run, vcpu, + kvmppc_get_gpr(vcpu, rs), + 1, 1); + kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); + break; + + case OP_31_XOP_LHAX: + emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1); + break; + + case OP_31_XOP_LHZX: + emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1); + break; + + case OP_31_XOP_LHZUX: + emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1); + kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); + break; + + case OP_31_XOP_STHX: + emulated = kvmppc_handle_store(run, vcpu, + kvmppc_get_gpr(vcpu, rs), + 2, 1); + break; + + case OP_31_XOP_STHUX: + emulated = kvmppc_handle_store(run, vcpu, + kvmppc_get_gpr(vcpu, rs), + 2, 1); + kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); + break; + + case OP_31_XOP_DCBST: + case OP_31_XOP_DCBF: + case OP_31_XOP_DCBI: + /* Do nothing. The guest is performing dcbi because + * hardware DMA is not snooped by the dcache, but + * emulated DMA either goes through the dcache as + * normal writes, or the host kernel has handled dcache + * coherence. */ + break; + + case OP_31_XOP_LWBRX: + emulated = kvmppc_handle_load(run, vcpu, rt, 4, 0); + break; + + case OP_31_XOP_STWBRX: + emulated = kvmppc_handle_store(run, vcpu, + kvmppc_get_gpr(vcpu, rs), + 4, 0); + break; + + case OP_31_XOP_LHBRX: + emulated = kvmppc_handle_load(run, vcpu, rt, 2, 0); + break; + + case OP_31_XOP_STHBRX: + emulated = kvmppc_handle_store(run, vcpu, + kvmppc_get_gpr(vcpu, rs), + 2, 0); + break; + + default: + emulated = EMULATE_FAIL; + break; + } + break; + + case OP_LWZ: + emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1); + break; + + /* TBD: Add support for other 64 bit load variants like ldu, ldux, ldx etc. */ + case OP_LD: + rt = get_rt(inst); + emulated = kvmppc_handle_load(run, vcpu, rt, 8, 1); + break; + + case OP_LWZU: + emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1); + kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); + break; + + case OP_LBZ: + emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1); + break; + + case OP_LBZU: + emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1); + kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); + break; + + case OP_STW: + emulated = kvmppc_handle_store(run, vcpu, + kvmppc_get_gpr(vcpu, rs), + 4, 1); + break; + + /* TBD: Add support for other 64 bit store variants like stdu, stdux, stdx etc. */ + case OP_STD: + rs = get_rs(inst); + emulated = kvmppc_handle_store(run, vcpu, + kvmppc_get_gpr(vcpu, rs), + 8, 1); + break; + + case OP_STWU: + emulated = kvmppc_handle_store(run, vcpu, + kvmppc_get_gpr(vcpu, rs), + 4, 1); + kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); + break; + + case OP_STB: + emulated = kvmppc_handle_store(run, vcpu, + kvmppc_get_gpr(vcpu, rs), + 1, 1); + break; + + case OP_STBU: + emulated = kvmppc_handle_store(run, vcpu, + kvmppc_get_gpr(vcpu, rs), + 1, 1); + kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); + break; + + case OP_LHZ: + emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1); + break; + + case OP_LHZU: + emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1); + kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); + break; + + case OP_LHA: + emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1); + break; + + case OP_LHAU: + emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1); + kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); + break; + + case OP_STH: + emulated = kvmppc_handle_store(run, vcpu, + kvmppc_get_gpr(vcpu, rs), + 2, 1); + break; + + case OP_STHU: + emulated = kvmppc_handle_store(run, vcpu, + kvmppc_get_gpr(vcpu, rs), + 2, 1); + kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); + break; + + default: + emulated = EMULATE_FAIL; + break; + } + + if (emulated == EMULATE_FAIL) { + advance = 0; + kvmppc_core_queue_program(vcpu, 0); + } + + trace_kvm_ppc_instr(inst, kvmppc_get_pc(vcpu), emulated); + + /* Advance past emulated instruction. */ + if (advance) + kvmppc_set_pc(vcpu, kvmppc_get_pc(vcpu) + 4); + + return emulated; +} diff --git a/arch/powerpc/kvm/mpic.c b/arch/powerpc/kvm/mpic.c index b68d0dc9479..39b3a8f816f 100644 --- a/arch/powerpc/kvm/mpic.c +++ b/arch/powerpc/kvm/mpic.c @@ -1826,8 +1826,7 @@ int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, return 0; } -int kvm_set_routing_entry(struct kvm_irq_routing_table *rt, - struct kvm_kernel_irq_routing_entry *e, +int kvm_set_routing_entry(struct kvm_kernel_irq_routing_entry *e, const struct kvm_irq_routing_entry *ue) { int r = -EINVAL; @@ -1839,7 +1838,6 @@ int kvm_set_routing_entry(struct kvm_irq_routing_table *rt, e->irqchip.pin = ue->u.irqchip.pin; if (e->irqchip.pin >= KVM_IRQCHIP_NUM_PINS) goto out; - rt->chip[ue->u.irqchip.irqchip][e->irqchip.pin] = ue->gsi; break; case KVM_IRQ_ROUTING_MSI: e->set = kvm_set_msi; diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 61c738ab128..c1f8f53cd31 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -190,6 +190,25 @@ int kvmppc_kvm_pv(struct kvm_vcpu *vcpu) vcpu->arch.magic_page_pa = param1 & ~0xfffULL; vcpu->arch.magic_page_ea = param2 & ~0xfffULL; +#ifdef CONFIG_PPC_64K_PAGES + /* + * Make sure our 4k magic page is in the same window of a 64k + * page within the guest and within the host's page. + */ + if ((vcpu->arch.magic_page_pa & 0xf000) != + ((ulong)vcpu->arch.shared & 0xf000)) { + void *old_shared = vcpu->arch.shared; + ulong shared = (ulong)vcpu->arch.shared; + void *new_shared; + + shared &= PAGE_MASK; + shared |= vcpu->arch.magic_page_pa & 0xf000; + new_shared = (void*)shared; + memcpy(new_shared, old_shared, 0x1000); + vcpu->arch.shared = new_shared; + } +#endif + r2 = KVM_MAGIC_FEAT_SR | KVM_MAGIC_FEAT_MAS0_TO_SPRG7; r = EV_SUCCESS; @@ -198,7 +217,6 @@ int kvmppc_kvm_pv(struct kvm_vcpu *vcpu) case KVM_HCALL_TOKEN(KVM_HC_FEATURES): r = EV_SUCCESS; #if defined(CONFIG_PPC_BOOK3S) || defined(CONFIG_KVM_E500V2) - /* XXX Missing magic page on 44x */ r2 |= (1 << KVM_FEATURE_MAGIC_PAGE); #endif @@ -254,13 +272,16 @@ int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu) enum emulation_result er; int r; - er = kvmppc_emulate_instruction(run, vcpu); + er = kvmppc_emulate_loadstore(vcpu); switch (er) { case EMULATE_DONE: /* Future optimization: only reload non-volatiles if they were * actually modified. */ r = RESUME_GUEST_NV; break; + case EMULATE_AGAIN: + r = RESUME_GUEST; + break; case EMULATE_DO_MMIO: run->exit_reason = KVM_EXIT_MMIO; /* We must reload nonvolatiles because "update" load/store @@ -270,11 +291,15 @@ int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu) r = RESUME_HOST_NV; break; case EMULATE_FAIL: + { + u32 last_inst; + + kvmppc_get_last_inst(vcpu, INST_GENERIC, &last_inst); /* XXX Deliver Program interrupt to guest. */ - printk(KERN_EMERG "%s: emulation failed (%08x)\n", __func__, - kvmppc_get_last_inst(vcpu)); + pr_emerg("%s: emulation failed (%08x)\n", __func__, last_inst); r = RESUME_HOST; break; + } default: WARN_ON(1); r = RESUME_GUEST; @@ -284,22 +309,89 @@ int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvmppc_emulate_mmio); -int kvm_arch_hardware_enable(void *garbage) +int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, + bool data) { - return 0; + ulong mp_pa = vcpu->arch.magic_page_pa & KVM_PAM & PAGE_MASK; + struct kvmppc_pte pte; + int r; + + vcpu->stat.st++; + + r = kvmppc_xlate(vcpu, *eaddr, data ? XLATE_DATA : XLATE_INST, + XLATE_WRITE, &pte); + if (r < 0) + return r; + + *eaddr = pte.raddr; + + if (!pte.may_write) + return -EPERM; + + /* Magic page override */ + if (kvmppc_supports_magic_page(vcpu) && mp_pa && + ((pte.raddr & KVM_PAM & PAGE_MASK) == mp_pa) && + !(kvmppc_get_msr(vcpu) & MSR_PR)) { + void *magic = vcpu->arch.shared; + magic += pte.eaddr & 0xfff; + memcpy(magic, ptr, size); + return EMULATE_DONE; + } + + if (kvm_write_guest(vcpu->kvm, pte.raddr, ptr, size)) + return EMULATE_DO_MMIO; + + return EMULATE_DONE; } +EXPORT_SYMBOL_GPL(kvmppc_st); -void kvm_arch_hardware_disable(void *garbage) +int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, + bool data) { + ulong mp_pa = vcpu->arch.magic_page_pa & KVM_PAM & PAGE_MASK; + struct kvmppc_pte pte; + int rc; + + vcpu->stat.ld++; + + rc = kvmppc_xlate(vcpu, *eaddr, data ? XLATE_DATA : XLATE_INST, + XLATE_READ, &pte); + if (rc) + return rc; + + *eaddr = pte.raddr; + + if (!pte.may_read) + return -EPERM; + + if (!data && !pte.may_execute) + return -ENOEXEC; + + /* Magic page override */ + if (kvmppc_supports_magic_page(vcpu) && mp_pa && + ((pte.raddr & KVM_PAM & PAGE_MASK) == mp_pa) && + !(kvmppc_get_msr(vcpu) & MSR_PR)) { + void *magic = vcpu->arch.shared; + magic += pte.eaddr & 0xfff; + memcpy(ptr, magic, size); + return EMULATE_DONE; + } + + if (kvm_read_guest(vcpu->kvm, pte.raddr, ptr, size)) + return EMULATE_DO_MMIO; + + return EMULATE_DONE; } +EXPORT_SYMBOL_GPL(kvmppc_ld); -int kvm_arch_hardware_setup(void) +int kvm_arch_hardware_enable(void) { return 0; } -void kvm_arch_hardware_unsetup(void) +int kvm_arch_hardware_setup(void) { + return 0; } void kvm_arch_check_processor_compat(void *rtn) @@ -362,18 +454,20 @@ void kvm_arch_destroy_vm(struct kvm *kvm) module_put(kvm->arch.kvm_ops->owner); } -void kvm_arch_sync_events(struct kvm *kvm) -{ -} - -int kvm_dev_ioctl_check_extension(long ext) +int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) { int r; - /* FIXME!! - * Should some of this be vm ioctl ? is it possible now ? - */ + /* Assume we're using HV mode when the HV module is loaded */ int hv_enabled = kvmppc_hv_ops ? 1 : 0; + if (kvm) { + /* + * Hooray - we know which VM type we're running on. Depend on + * that rather than the guess above. + */ + hv_enabled = is_kvmppc_hv_enabled(kvm); + } + switch (ext) { #ifdef CONFIG_BOOKE case KVM_CAP_PPC_BOOKE_SREGS: @@ -387,6 +481,7 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_PPC_UNSET_IRQ: case KVM_CAP_PPC_IRQ_LEVEL: case KVM_CAP_ENABLE_CAP: + case KVM_CAP_ENABLE_CAP_VM: case KVM_CAP_ONE_REG: case KVM_CAP_IOEVENTFD: case KVM_CAP_DEVICE_CTRL: @@ -417,6 +512,7 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_PPC_ALLOC_HTAB: case KVM_CAP_PPC_RTAS: case KVM_CAP_PPC_FIXUP_HCALL: + case KVM_CAP_PPC_ENABLE_HCALL: #ifdef CONFIG_KVM_XICS case KVM_CAP_IRQ_XICS: #endif @@ -500,10 +596,6 @@ int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, return kvmppc_core_create_memslot(kvm, slot, npages); } -void kvm_arch_memslots_updated(struct kvm *kvm) -{ -} - int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, struct kvm_userspace_memory_region *mem, @@ -520,10 +612,6 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, kvmppc_core_commit_memory_region(kvm, mem, old); } -void kvm_arch_flush_shadow_all(struct kvm *kvm) -{ -} - void kvm_arch_flush_shadow_memslot(struct kvm *kvm, struct kvm_memory_slot *slot) { @@ -550,7 +638,6 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) { /* Make sure we're not using the vcpu anymore */ hrtimer_cancel(&vcpu->arch.dec_timer); - tasklet_kill(&vcpu->arch.tasklet); kvmppc_remove_vcpu_debugfs(vcpu); @@ -576,16 +663,12 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) return kvmppc_core_pending_dec(vcpu); } -/* - * low level hrtimer wake routine. Because this runs in hardirq context - * we schedule a tasklet to do the real work. - */ enum hrtimer_restart kvmppc_decrementer_wakeup(struct hrtimer *timer) { struct kvm_vcpu *vcpu; vcpu = container_of(timer, struct kvm_vcpu, arch.dec_timer); - tasklet_schedule(&vcpu->arch.tasklet); + kvmppc_decrementer_func(vcpu); return HRTIMER_NORESTART; } @@ -595,7 +678,6 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) int ret; hrtimer_init(&vcpu->arch.dec_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS); - tasklet_init(&vcpu->arch.tasklet, kvmppc_decrementer_func, (ulong)vcpu); vcpu->arch.dec_timer.function = kvmppc_decrementer_wakeup; vcpu->arch.dec_expires = ~(u64)0; @@ -635,12 +717,6 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) #endif } -static void kvmppc_complete_dcr_load(struct kvm_vcpu *vcpu, - struct kvm_run *run) -{ - kvmppc_set_gpr(vcpu, vcpu->arch.io_gpr, run->dcr.data); -} - static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu, struct kvm_run *run) { @@ -825,6 +901,103 @@ int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu, } EXPORT_SYMBOL_GPL(kvmppc_handle_store); +int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) +{ + int r = 0; + union kvmppc_one_reg val; + int size; + + size = one_reg_size(reg->id); + if (size > sizeof(val)) + return -EINVAL; + + r = kvmppc_get_one_reg(vcpu, reg->id, &val); + if (r == -EINVAL) { + r = 0; + switch (reg->id) { +#ifdef CONFIG_ALTIVEC + case KVM_REG_PPC_VR0 ... KVM_REG_PPC_VR31: + if (!cpu_has_feature(CPU_FTR_ALTIVEC)) { + r = -ENXIO; + break; + } + vcpu->arch.vr.vr[reg->id - KVM_REG_PPC_VR0] = val.vval; + break; + case KVM_REG_PPC_VSCR: + if (!cpu_has_feature(CPU_FTR_ALTIVEC)) { + r = -ENXIO; + break; + } + vcpu->arch.vr.vscr.u[3] = set_reg_val(reg->id, val); + break; + case KVM_REG_PPC_VRSAVE: + if (!cpu_has_feature(CPU_FTR_ALTIVEC)) { + r = -ENXIO; + break; + } + vcpu->arch.vrsave = set_reg_val(reg->id, val); + break; +#endif /* CONFIG_ALTIVEC */ + default: + r = -EINVAL; + break; + } + } + + if (r) + return r; + + if (copy_to_user((char __user *)(unsigned long)reg->addr, &val, size)) + r = -EFAULT; + + return r; +} + +int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) +{ + int r; + union kvmppc_one_reg val; + int size; + + size = one_reg_size(reg->id); + if (size > sizeof(val)) + return -EINVAL; + + if (copy_from_user(&val, (char __user *)(unsigned long)reg->addr, size)) + return -EFAULT; + + r = kvmppc_set_one_reg(vcpu, reg->id, &val); + if (r == -EINVAL) { + r = 0; + switch (reg->id) { +#ifdef CONFIG_ALTIVEC + case KVM_REG_PPC_VR0 ... KVM_REG_PPC_VR31: + if (!cpu_has_feature(CPU_FTR_ALTIVEC)) { + r = -ENXIO; + break; + } + val.vval = vcpu->arch.vr.vr[reg->id - KVM_REG_PPC_VR0]; + break; + case KVM_REG_PPC_VSCR: + if (!cpu_has_feature(CPU_FTR_ALTIVEC)) { + r = -ENXIO; + break; + } + val = get_reg_val(reg->id, vcpu->arch.vr.vscr.u[3]); + break; + case KVM_REG_PPC_VRSAVE: + val = get_reg_val(reg->id, vcpu->arch.vrsave); + break; +#endif /* CONFIG_ALTIVEC */ + default: + r = -EINVAL; + break; + } + } + + return r; +} + int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) { int r; @@ -837,10 +1010,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) if (!vcpu->mmio_is_write) kvmppc_complete_mmio_load(vcpu, run); vcpu->mmio_needed = 0; - } else if (vcpu->arch.dcr_needed) { - if (!vcpu->arch.dcr_is_write) - kvmppc_complete_dcr_load(vcpu, run); - vcpu->arch.dcr_needed = 0; } else if (vcpu->arch.osi_needed) { u64 *gprs = run->osi.gprs; int i; @@ -1099,6 +1268,42 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event, return 0; } + +static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, + struct kvm_enable_cap *cap) +{ + int r; + + if (cap->flags) + return -EINVAL; + + switch (cap->cap) { +#ifdef CONFIG_KVM_BOOK3S_64_HANDLER + case KVM_CAP_PPC_ENABLE_HCALL: { + unsigned long hcall = cap->args[0]; + + r = -EINVAL; + if (hcall > MAX_HCALL_OPCODE || (hcall & 3) || + cap->args[1] > 1) + break; + if (!kvmppc_book3s_hcall_implemented(kvm, hcall)) + break; + if (cap->args[1]) + set_bit(hcall / 4, kvm->arch.enabled_hcalls); + else + clear_bit(hcall / 4, kvm->arch.enabled_hcalls); + r = 0; + break; + } +#endif + default: + r = -EINVAL; + break; + } + + return r; +} + long kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { @@ -1118,6 +1323,15 @@ long kvm_arch_vm_ioctl(struct file *filp, break; } + case KVM_ENABLE_CAP: + { + struct kvm_enable_cap cap; + r = -EFAULT; + if (copy_from_user(&cap, argp, sizeof(cap))) + goto out; + r = kvm_vm_ioctl_enable_cap(kvm, &cap); + break; + } #ifdef CONFIG_PPC_BOOK3S_64 case KVM_CREATE_SPAPR_TCE: { struct kvm_create_spapr_tce create_tce; @@ -1200,7 +1414,4 @@ int kvm_arch_init(void *opaque) return 0; } -void kvm_arch_exit(void) -{ - -} +EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_ppc_instr); diff --git a/arch/powerpc/kvm/timing.c b/arch/powerpc/kvm/timing.c index 07b6110a4bb..e44d2b2ea97 100644 --- a/arch/powerpc/kvm/timing.c +++ b/arch/powerpc/kvm/timing.c @@ -110,7 +110,6 @@ void kvmppc_update_timing_stats(struct kvm_vcpu *vcpu) static const char *kvm_exit_names[__NUMBER_OF_KVM_EXIT_TYPES] = { [MMIO_EXITS] = "MMIO", - [DCR_EXITS] = "DCR", [SIGNAL_EXITS] = "SIGNAL", [ITLB_REAL_MISS_EXITS] = "ITLBREAL", [ITLB_VIRT_MISS_EXITS] = "ITLBVIRT", diff --git a/arch/powerpc/kvm/timing.h b/arch/powerpc/kvm/timing.h index bf191e72b2d..3123690c82d 100644 --- a/arch/powerpc/kvm/timing.h +++ b/arch/powerpc/kvm/timing.h @@ -63,9 +63,6 @@ static inline void kvmppc_account_exit_stat(struct kvm_vcpu *vcpu, int type) case EMULATED_INST_EXITS: vcpu->stat.emulated_inst_exits++; break; - case DCR_EXITS: - vcpu->stat.dcr_exits++; - break; case DSI_EXITS: vcpu->stat.dsi_exits++; break; diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile index 59fa2de9546..9f342f134ae 100644 --- a/arch/powerpc/lib/Makefile +++ b/arch/powerpc/lib/Makefile @@ -10,7 +10,7 @@ CFLAGS_REMOVE_code-patching.o = -pg CFLAGS_REMOVE_feature-fixups.o = -pg obj-y := string.o alloc.o \ - crtsavres.o + crtsavres.o ppc_ksyms.o obj-$(CONFIG_PPC32) += div64.o copy_32.o obj-$(CONFIG_HAS_IOMEM) += devres.o diff --git a/arch/powerpc/lib/copyuser_64.S b/arch/powerpc/lib/copyuser_64.S index 0860ee46013..f09899e3599 100644 --- a/arch/powerpc/lib/copyuser_64.S +++ b/arch/powerpc/lib/copyuser_64.S @@ -461,8 +461,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD) /* * Routine to copy a whole page of data, optimized for POWER4. * On POWER4 it is more than 50% faster than the simple loop - * above (following the .Ldst_aligned label) but it runs slightly - * slower on POWER3. + * above (following the .Ldst_aligned label). */ .Lcopy_page_4K: std r31,-32(1) diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c index 7a8a7487cee..7ce3870d7dd 100644 --- a/arch/powerpc/lib/feature-fixups.c +++ b/arch/powerpc/lib/feature-fixups.c @@ -164,7 +164,7 @@ static long calc_offset(struct fixup_entry *entry, unsigned int *p) return (unsigned long)p - (unsigned long)entry; } -void test_basic_patching(void) +static void test_basic_patching(void) { extern unsigned int ftr_fixup_test1; extern unsigned int end_ftr_fixup_test1; diff --git a/arch/powerpc/lib/locks.c b/arch/powerpc/lib/locks.c index 0c9c8d7d073..170a0346f75 100644 --- a/arch/powerpc/lib/locks.c +++ b/arch/powerpc/lib/locks.c @@ -70,12 +70,16 @@ void __rw_yield(arch_rwlock_t *rw) void arch_spin_unlock_wait(arch_spinlock_t *lock) { + smp_mb(); + while (lock->slock) { HMT_low(); if (SHARED_PROCESSOR) __spin_yield(lock); } HMT_medium(); + + smp_mb(); } EXPORT_SYMBOL(arch_spin_unlock_wait); diff --git a/arch/powerpc/lib/ppc_ksyms.c b/arch/powerpc/lib/ppc_ksyms.c new file mode 100644 index 00000000000..f993959647b --- /dev/null +++ b/arch/powerpc/lib/ppc_ksyms.c @@ -0,0 +1,39 @@ +#include <linux/string.h> +#include <linux/uaccess.h> +#include <linux/bitops.h> +#include <net/checksum.h> + +EXPORT_SYMBOL(memcpy); +EXPORT_SYMBOL(memset); +EXPORT_SYMBOL(memmove); +EXPORT_SYMBOL(memcmp); +EXPORT_SYMBOL(memchr); +#ifdef CONFIG_PPC32 +EXPORT_SYMBOL(cacheable_memcpy); +EXPORT_SYMBOL(cacheable_memzero); +#endif + +EXPORT_SYMBOL(strcpy); +EXPORT_SYMBOL(strncpy); +EXPORT_SYMBOL(strcat); +EXPORT_SYMBOL(strlen); +EXPORT_SYMBOL(strcmp); +EXPORT_SYMBOL(strncmp); + +#ifndef CONFIG_GENERIC_CSUM +EXPORT_SYMBOL(csum_partial); +EXPORT_SYMBOL(csum_partial_copy_generic); +EXPORT_SYMBOL(ip_fast_csum); +EXPORT_SYMBOL(csum_tcpudp_magic); +#endif + +EXPORT_SYMBOL(__copy_tofrom_user); +EXPORT_SYMBOL(__clear_user); +EXPORT_SYMBOL(copy_page); + +#ifdef CONFIG_PPC64 +EXPORT_SYMBOL(__arch_hweight8); +EXPORT_SYMBOL(__arch_hweight16); +EXPORT_SYMBOL(__arch_hweight32); +EXPORT_SYMBOL(__arch_hweight64); +#endif diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index 5c09f365c84..54651fc2d41 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -98,13 +98,8 @@ static unsigned long __kprobes dform_ea(unsigned int instr, struct pt_regs *regs ra = (instr >> 16) & 0x1f; ea = (signed short) instr; /* sign-extend */ - if (ra) { + if (ra) ea += regs->gpr[ra]; - if (instr & 0x04000000) { /* update forms */ - if ((instr>>26) != 47) /* stmw is not an update form */ - regs->gpr[ra] = ea; - } - } return truncate_if_32bit(regs->msr, ea); } @@ -120,11 +115,8 @@ static unsigned long __kprobes dsform_ea(unsigned int instr, struct pt_regs *reg ra = (instr >> 16) & 0x1f; ea = (signed short) (instr & ~3); /* sign-extend */ - if (ra) { + if (ra) ea += regs->gpr[ra]; - if ((instr & 3) == 1) /* update forms */ - regs->gpr[ra] = ea; - } return truncate_if_32bit(regs->msr, ea); } @@ -133,8 +125,8 @@ static unsigned long __kprobes dsform_ea(unsigned int instr, struct pt_regs *reg /* * Calculate effective address for an X-form instruction */ -static unsigned long __kprobes xform_ea(unsigned int instr, struct pt_regs *regs, - int do_update) +static unsigned long __kprobes xform_ea(unsigned int instr, + struct pt_regs *regs) { int ra, rb; unsigned long ea; @@ -142,11 +134,8 @@ static unsigned long __kprobes xform_ea(unsigned int instr, struct pt_regs *regs ra = (instr >> 16) & 0x1f; rb = (instr >> 11) & 0x1f; ea = regs->gpr[rb]; - if (ra) { + if (ra) ea += regs->gpr[ra]; - if (do_update) /* update forms */ - regs->gpr[ra] = ea; - } return truncate_if_32bit(regs->msr, ea); } @@ -611,6 +600,23 @@ static void __kprobes do_cmp_unsigned(struct pt_regs *regs, unsigned long v1, regs->ccr = (regs->ccr & ~(0xf << shift)) | (crval << shift); } +static int __kprobes trap_compare(long v1, long v2) +{ + int ret = 0; + + if (v1 < v2) + ret |= 0x10; + else if (v1 > v2) + ret |= 0x08; + else + ret |= 0x04; + if ((unsigned long)v1 < (unsigned long)v2) + ret |= 0x02; + else if ((unsigned long)v1 > (unsigned long)v2) + ret |= 0x01; + return ret; +} + /* * Elements of 32-bit rotate and mask instructions. */ @@ -627,26 +633,27 @@ static void __kprobes do_cmp_unsigned(struct pt_regs *regs, unsigned long v1, #define ROTATE(x, n) ((n) ? (((x) << (n)) | ((x) >> (8 * sizeof(long) - (n)))) : (x)) /* - * Emulate instructions that cause a transfer of control, - * loads and stores, and a few other instructions. - * Returns 1 if the step was emulated, 0 if not, - * or -1 if the instruction is one that should not be stepped, - * such as an rfid, or a mtmsrd that would clear MSR_RI. + * Decode an instruction, and execute it if that can be done just by + * modifying *regs (i.e. integer arithmetic and logical instructions, + * branches, and barrier instructions). + * Returns 1 if the instruction has been executed, or 0 if not. + * Sets *op to indicate what the instruction does. */ -int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) +int __kprobes analyse_instr(struct instruction_op *op, struct pt_regs *regs, + unsigned int instr) { unsigned int opcode, ra, rb, rd, spr, u; unsigned long int imm; unsigned long int val, val2; - unsigned long int ea; - unsigned int cr, mb, me, sh; - int err; - unsigned long old_ra, val3; + unsigned int mb, me, sh; long ival; + op->type = COMPUTE; + opcode = instr >> 26; switch (opcode) { case 16: /* bc */ + op->type = BRANCH; imm = (signed short)(instr & 0xfffc); if ((instr & 2) == 0) imm += regs->nip; @@ -659,26 +666,14 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) return 1; #ifdef CONFIG_PPC64 case 17: /* sc */ - /* - * N.B. this uses knowledge about how the syscall - * entry code works. If that is changed, this will - * need to be changed also. - */ - if (regs->gpr[0] == 0x1ebe && - cpu_has_feature(CPU_FTR_REAL_LE)) { - regs->msr ^= MSR_LE; - goto instr_done; - } - regs->gpr[9] = regs->gpr[13]; - regs->gpr[10] = MSR_KERNEL; - regs->gpr[11] = regs->nip + 4; - regs->gpr[12] = regs->msr & MSR_MASK; - regs->gpr[13] = (unsigned long) get_paca(); - regs->nip = (unsigned long) &system_call_common; - regs->msr = MSR_KERNEL; - return 1; + if ((instr & 0xfe2) == 2) + op->type = SYSCALL; + else + op->type = UNKNOWN; + return 0; #endif case 18: /* b */ + op->type = BRANCH; imm = instr & 0x03fffffc; if (imm & 0x02000000) imm -= 0x04000000; @@ -691,8 +686,16 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) return 1; case 19: switch ((instr >> 1) & 0x3ff) { + case 0: /* mcrf */ + rd = (instr >> 21) & 0x1c; + ra = (instr >> 16) & 0x1c; + val = (regs->ccr >> ra) & 0xf; + regs->ccr = (regs->ccr & ~(0xfUL << rd)) | (val << rd); + goto instr_done; + case 16: /* bclr */ case 528: /* bcctr */ + op->type = BRANCH; imm = (instr & 0x400)? regs->ctr: regs->link; regs->nip = truncate_if_32bit(regs->msr, regs->nip + 4); imm = truncate_if_32bit(regs->msr, imm); @@ -703,9 +706,13 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) return 1; case 18: /* rfid, scary */ - return -1; + if (regs->msr & MSR_PR) + goto priv; + op->type = RFI; + return 0; case 150: /* isync */ + op->type = BARRIER; isync(); goto instr_done; @@ -731,6 +738,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) case 31: switch ((instr >> 1) & 0x3ff) { case 598: /* sync */ + op->type = BARRIER; #ifdef __powerpc64__ switch ((instr >> 21) & 3) { case 1: /* lwsync */ @@ -745,6 +753,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) goto instr_done; case 854: /* eieio */ + op->type = BARRIER; eieio(); goto instr_done; } @@ -760,6 +769,17 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) rb = (instr >> 11) & 0x1f; switch (opcode) { +#ifdef __powerpc64__ + case 2: /* tdi */ + if (rd & trap_compare(regs->gpr[ra], (short) instr)) + goto trap; + goto instr_done; +#endif + case 3: /* twi */ + if (rd & trap_compare((int)regs->gpr[ra], (short) instr)) + goto trap; + goto instr_done; + case 7: /* mulli */ regs->gpr[rd] = regs->gpr[ra] * (short) instr; goto instr_done; @@ -908,35 +928,44 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) case 31: switch ((instr >> 1) & 0x3ff) { + case 4: /* tw */ + if (rd == 0x1f || + (rd & trap_compare((int)regs->gpr[ra], + (int)regs->gpr[rb]))) + goto trap; + goto instr_done; +#ifdef __powerpc64__ + case 68: /* td */ + if (rd & trap_compare(regs->gpr[ra], regs->gpr[rb])) + goto trap; + goto instr_done; +#endif case 83: /* mfmsr */ if (regs->msr & MSR_PR) - break; - regs->gpr[rd] = regs->msr & MSR_MASK; - goto instr_done; + goto priv; + op->type = MFMSR; + op->reg = rd; + return 0; case 146: /* mtmsr */ if (regs->msr & MSR_PR) - break; - imm = regs->gpr[rd]; - if ((imm & MSR_RI) == 0) - /* can't step mtmsr that would clear MSR_RI */ - return -1; - regs->msr = imm; - goto instr_done; + goto priv; + op->type = MTMSR; + op->reg = rd; + op->val = 0xffffffff & ~(MSR_ME | MSR_LE); + return 0; #ifdef CONFIG_PPC64 case 178: /* mtmsrd */ - /* only MSR_EE and MSR_RI get changed if bit 15 set */ - /* mtmsrd doesn't change MSR_HV and MSR_ME */ if (regs->msr & MSR_PR) - break; - imm = (instr & 0x10000)? 0x8002: 0xefffffffffffefffUL; - imm = (regs->msr & MSR_MASK & ~imm) - | (regs->gpr[rd] & imm); - if ((imm & MSR_RI) == 0) - /* can't step mtmsrd that would clear MSR_RI */ - return -1; - regs->msr = imm; - goto instr_done; + goto priv; + op->type = MTMSR; + op->reg = rd; + /* only MSR_EE and MSR_RI get changed if bit 15 set */ + /* mtmsrd doesn't change MSR_HV, MSR_ME or MSR_LE */ + imm = (instr & 0x10000)? 0x8002: 0xefffffffffffeffeUL; + op->val = imm; + return 0; #endif + case 19: /* mfcr */ regs->gpr[rd] = regs->ccr; regs->gpr[rd] &= 0xffffffffUL; @@ -954,33 +983,43 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) goto instr_done; case 339: /* mfspr */ - spr = (instr >> 11) & 0x3ff; + spr = ((instr >> 16) & 0x1f) | ((instr >> 6) & 0x3e0); switch (spr) { - case 0x20: /* mfxer */ + case SPRN_XER: /* mfxer */ regs->gpr[rd] = regs->xer; regs->gpr[rd] &= 0xffffffffUL; goto instr_done; - case 0x100: /* mflr */ + case SPRN_LR: /* mflr */ regs->gpr[rd] = regs->link; goto instr_done; - case 0x120: /* mfctr */ + case SPRN_CTR: /* mfctr */ regs->gpr[rd] = regs->ctr; goto instr_done; + default: + op->type = MFSPR; + op->reg = rd; + op->spr = spr; + return 0; } break; case 467: /* mtspr */ - spr = (instr >> 11) & 0x3ff; + spr = ((instr >> 16) & 0x1f) | ((instr >> 6) & 0x3e0); switch (spr) { - case 0x20: /* mtxer */ + case SPRN_XER: /* mtxer */ regs->xer = (regs->gpr[rd] & 0xffffffffUL); goto instr_done; - case 0x100: /* mtlr */ + case SPRN_LR: /* mtlr */ regs->link = regs->gpr[rd]; goto instr_done; - case 0x120: /* mtctr */ + case SPRN_CTR: /* mtctr */ regs->ctr = regs->gpr[rd]; goto instr_done; + default: + op->type = MTSPR; + op->val = regs->gpr[rd]; + op->spr = spr; + return 0; } break; @@ -1257,294 +1296,242 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) * Cache instructions */ case 54: /* dcbst */ - ea = xform_ea(instr, regs, 0); - if (!address_ok(regs, ea, 8)) - return 0; - err = 0; - __cacheop_user_asmx(ea, err, "dcbst"); - if (err) - return 0; - goto instr_done; + op->type = MKOP(CACHEOP, DCBST, 0); + op->ea = xform_ea(instr, regs); + return 0; case 86: /* dcbf */ - ea = xform_ea(instr, regs, 0); - if (!address_ok(regs, ea, 8)) - return 0; - err = 0; - __cacheop_user_asmx(ea, err, "dcbf"); - if (err) - return 0; - goto instr_done; + op->type = MKOP(CACHEOP, DCBF, 0); + op->ea = xform_ea(instr, regs); + return 0; case 246: /* dcbtst */ - if (rd == 0) { - ea = xform_ea(instr, regs, 0); - prefetchw((void *) ea); - } - goto instr_done; + op->type = MKOP(CACHEOP, DCBTST, 0); + op->ea = xform_ea(instr, regs); + op->reg = rd; + return 0; case 278: /* dcbt */ - if (rd == 0) { - ea = xform_ea(instr, regs, 0); - prefetch((void *) ea); - } - goto instr_done; + op->type = MKOP(CACHEOP, DCBTST, 0); + op->ea = xform_ea(instr, regs); + op->reg = rd; + return 0; + case 982: /* icbi */ + op->type = MKOP(CACHEOP, ICBI, 0); + op->ea = xform_ea(instr, regs); + return 0; } break; } /* - * Following cases are for loads and stores, so bail out - * if we're in little-endian mode. + * Loads and stores. */ - if (regs->msr & MSR_LE) - return 0; - - /* - * Save register RA in case it's an update form load or store - * and the access faults. - */ - old_ra = regs->gpr[ra]; + op->type = UNKNOWN; + op->update_reg = ra; + op->reg = rd; + op->val = regs->gpr[rd]; + u = (instr >> 20) & UPDATE; switch (opcode) { case 31: - u = instr & 0x40; + u = instr & UPDATE; + op->ea = xform_ea(instr, regs); switch ((instr >> 1) & 0x3ff) { case 20: /* lwarx */ - ea = xform_ea(instr, regs, 0); - if (ea & 3) - break; /* can't handle misaligned */ - err = -EFAULT; - if (!address_ok(regs, ea, 4)) - goto ldst_done; - err = 0; - __get_user_asmx(val, ea, err, "lwarx"); - if (!err) - regs->gpr[rd] = val; - goto ldst_done; + op->type = MKOP(LARX, 0, 4); + break; case 150: /* stwcx. */ - ea = xform_ea(instr, regs, 0); - if (ea & 3) - break; /* can't handle misaligned */ - err = -EFAULT; - if (!address_ok(regs, ea, 4)) - goto ldst_done; - err = 0; - __put_user_asmx(regs->gpr[rd], ea, err, "stwcx.", cr); - if (!err) - regs->ccr = (regs->ccr & 0x0fffffff) | - (cr & 0xe0000000) | - ((regs->xer >> 3) & 0x10000000); - goto ldst_done; + op->type = MKOP(STCX, 0, 4); + break; #ifdef __powerpc64__ case 84: /* ldarx */ - ea = xform_ea(instr, regs, 0); - if (ea & 7) - break; /* can't handle misaligned */ - err = -EFAULT; - if (!address_ok(regs, ea, 8)) - goto ldst_done; - err = 0; - __get_user_asmx(val, ea, err, "ldarx"); - if (!err) - regs->gpr[rd] = val; - goto ldst_done; + op->type = MKOP(LARX, 0, 8); + break; case 214: /* stdcx. */ - ea = xform_ea(instr, regs, 0); - if (ea & 7) - break; /* can't handle misaligned */ - err = -EFAULT; - if (!address_ok(regs, ea, 8)) - goto ldst_done; - err = 0; - __put_user_asmx(regs->gpr[rd], ea, err, "stdcx.", cr); - if (!err) - regs->ccr = (regs->ccr & 0x0fffffff) | - (cr & 0xe0000000) | - ((regs->xer >> 3) & 0x10000000); - goto ldst_done; + op->type = MKOP(STCX, 0, 8); + break; case 21: /* ldx */ case 53: /* ldux */ - err = read_mem(®s->gpr[rd], xform_ea(instr, regs, u), - 8, regs); - goto ldst_done; + op->type = MKOP(LOAD, u, 8); + break; #endif case 23: /* lwzx */ case 55: /* lwzux */ - err = read_mem(®s->gpr[rd], xform_ea(instr, regs, u), - 4, regs); - goto ldst_done; + op->type = MKOP(LOAD, u, 4); + break; case 87: /* lbzx */ case 119: /* lbzux */ - err = read_mem(®s->gpr[rd], xform_ea(instr, regs, u), - 1, regs); - goto ldst_done; + op->type = MKOP(LOAD, u, 1); + break; #ifdef CONFIG_ALTIVEC case 103: /* lvx */ case 359: /* lvxl */ if (!(regs->msr & MSR_VEC)) - break; - ea = xform_ea(instr, regs, 0); - err = do_vec_load(rd, do_lvx, ea, regs); - goto ldst_done; + goto vecunavail; + op->type = MKOP(LOAD_VMX, 0, 16); + break; case 231: /* stvx */ case 487: /* stvxl */ if (!(regs->msr & MSR_VEC)) - break; - ea = xform_ea(instr, regs, 0); - err = do_vec_store(rd, do_stvx, ea, regs); - goto ldst_done; + goto vecunavail; + op->type = MKOP(STORE_VMX, 0, 16); + break; #endif /* CONFIG_ALTIVEC */ #ifdef __powerpc64__ case 149: /* stdx */ case 181: /* stdux */ - val = regs->gpr[rd]; - err = write_mem(val, xform_ea(instr, regs, u), 8, regs); - goto ldst_done; + op->type = MKOP(STORE, u, 8); + break; #endif case 151: /* stwx */ case 183: /* stwux */ - val = regs->gpr[rd]; - err = write_mem(val, xform_ea(instr, regs, u), 4, regs); - goto ldst_done; + op->type = MKOP(STORE, u, 4); + break; case 215: /* stbx */ case 247: /* stbux */ - val = regs->gpr[rd]; - err = write_mem(val, xform_ea(instr, regs, u), 1, regs); - goto ldst_done; + op->type = MKOP(STORE, u, 1); + break; case 279: /* lhzx */ case 311: /* lhzux */ - err = read_mem(®s->gpr[rd], xform_ea(instr, regs, u), - 2, regs); - goto ldst_done; + op->type = MKOP(LOAD, u, 2); + break; #ifdef __powerpc64__ case 341: /* lwax */ case 373: /* lwaux */ - err = read_mem(®s->gpr[rd], xform_ea(instr, regs, u), - 4, regs); - if (!err) - regs->gpr[rd] = (signed int) regs->gpr[rd]; - goto ldst_done; + op->type = MKOP(LOAD, SIGNEXT | u, 4); + break; #endif case 343: /* lhax */ case 375: /* lhaux */ - err = read_mem(®s->gpr[rd], xform_ea(instr, regs, u), - 2, regs); - if (!err) - regs->gpr[rd] = (signed short) regs->gpr[rd]; - goto ldst_done; + op->type = MKOP(LOAD, SIGNEXT | u, 2); + break; case 407: /* sthx */ case 439: /* sthux */ - val = regs->gpr[rd]; - err = write_mem(val, xform_ea(instr, regs, u), 2, regs); - goto ldst_done; + op->type = MKOP(STORE, u, 2); + break; #ifdef __powerpc64__ case 532: /* ldbrx */ - err = read_mem(&val, xform_ea(instr, regs, 0), 8, regs); - if (!err) - regs->gpr[rd] = byterev_8(val); - goto ldst_done; + op->type = MKOP(LOAD, BYTEREV, 8); + break; #endif + case 533: /* lswx */ + op->type = MKOP(LOAD_MULTI, 0, regs->xer & 0x7f); + break; case 534: /* lwbrx */ - err = read_mem(&val, xform_ea(instr, regs, 0), 4, regs); - if (!err) - regs->gpr[rd] = byterev_4(val); - goto ldst_done; + op->type = MKOP(LOAD, BYTEREV, 4); + break; + + case 597: /* lswi */ + if (rb == 0) + rb = 32; /* # bytes to load */ + op->type = MKOP(LOAD_MULTI, 0, rb); + op->ea = 0; + if (ra) + op->ea = truncate_if_32bit(regs->msr, + regs->gpr[ra]); + break; #ifdef CONFIG_PPC_FPU case 535: /* lfsx */ case 567: /* lfsux */ if (!(regs->msr & MSR_FP)) - break; - ea = xform_ea(instr, regs, u); - err = do_fp_load(rd, do_lfs, ea, 4, regs); - goto ldst_done; + goto fpunavail; + op->type = MKOP(LOAD_FP, u, 4); + break; case 599: /* lfdx */ case 631: /* lfdux */ if (!(regs->msr & MSR_FP)) - break; - ea = xform_ea(instr, regs, u); - err = do_fp_load(rd, do_lfd, ea, 8, regs); - goto ldst_done; + goto fpunavail; + op->type = MKOP(LOAD_FP, u, 8); + break; case 663: /* stfsx */ case 695: /* stfsux */ if (!(regs->msr & MSR_FP)) - break; - ea = xform_ea(instr, regs, u); - err = do_fp_store(rd, do_stfs, ea, 4, regs); - goto ldst_done; + goto fpunavail; + op->type = MKOP(STORE_FP, u, 4); + break; case 727: /* stfdx */ case 759: /* stfdux */ if (!(regs->msr & MSR_FP)) - break; - ea = xform_ea(instr, regs, u); - err = do_fp_store(rd, do_stfd, ea, 8, regs); - goto ldst_done; + goto fpunavail; + op->type = MKOP(STORE_FP, u, 8); + break; #endif #ifdef __powerpc64__ case 660: /* stdbrx */ - val = byterev_8(regs->gpr[rd]); - err = write_mem(val, xform_ea(instr, regs, 0), 8, regs); - goto ldst_done; + op->type = MKOP(STORE, BYTEREV, 8); + op->val = byterev_8(regs->gpr[rd]); + break; #endif + case 661: /* stswx */ + op->type = MKOP(STORE_MULTI, 0, regs->xer & 0x7f); + break; + case 662: /* stwbrx */ - val = byterev_4(regs->gpr[rd]); - err = write_mem(val, xform_ea(instr, regs, 0), 4, regs); - goto ldst_done; + op->type = MKOP(STORE, BYTEREV, 4); + op->val = byterev_4(regs->gpr[rd]); + break; + + case 725: + if (rb == 0) + rb = 32; /* # bytes to store */ + op->type = MKOP(STORE_MULTI, 0, rb); + op->ea = 0; + if (ra) + op->ea = truncate_if_32bit(regs->msr, + regs->gpr[ra]); + break; case 790: /* lhbrx */ - err = read_mem(&val, xform_ea(instr, regs, 0), 2, regs); - if (!err) - regs->gpr[rd] = byterev_2(val); - goto ldst_done; + op->type = MKOP(LOAD, BYTEREV, 2); + break; case 918: /* sthbrx */ - val = byterev_2(regs->gpr[rd]); - err = write_mem(val, xform_ea(instr, regs, 0), 2, regs); - goto ldst_done; + op->type = MKOP(STORE, BYTEREV, 2); + op->val = byterev_2(regs->gpr[rd]); + break; #ifdef CONFIG_VSX case 844: /* lxvd2x */ case 876: /* lxvd2ux */ if (!(regs->msr & MSR_VSX)) - break; - rd |= (instr & 1) << 5; - ea = xform_ea(instr, regs, u); - err = do_vsx_load(rd, do_lxvd2x, ea, regs); - goto ldst_done; + goto vsxunavail; + op->reg = rd | ((instr & 1) << 5); + op->type = MKOP(LOAD_VSX, u, 16); + break; case 972: /* stxvd2x */ case 1004: /* stxvd2ux */ if (!(regs->msr & MSR_VSX)) - break; - rd |= (instr & 1) << 5; - ea = xform_ea(instr, regs, u); - err = do_vsx_store(rd, do_stxvd2x, ea, regs); - goto ldst_done; + goto vsxunavail; + op->reg = rd | ((instr & 1) << 5); + op->type = MKOP(STORE_VSX, u, 16); + break; #endif /* CONFIG_VSX */ } @@ -1552,178 +1539,123 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) case 32: /* lwz */ case 33: /* lwzu */ - err = read_mem(®s->gpr[rd], dform_ea(instr, regs), 4, regs); - goto ldst_done; + op->type = MKOP(LOAD, u, 4); + op->ea = dform_ea(instr, regs); + break; case 34: /* lbz */ case 35: /* lbzu */ - err = read_mem(®s->gpr[rd], dform_ea(instr, regs), 1, regs); - goto ldst_done; + op->type = MKOP(LOAD, u, 1); + op->ea = dform_ea(instr, regs); + break; case 36: /* stw */ - val = regs->gpr[rd]; - err = write_mem(val, dform_ea(instr, regs), 4, regs); - goto ldst_done; - case 37: /* stwu */ - val = regs->gpr[rd]; - val3 = dform_ea(instr, regs); - /* - * For PPC32 we always use stwu to change stack point with r1. So - * this emulated store may corrupt the exception frame, now we - * have to provide the exception frame trampoline, which is pushed - * below the kprobed function stack. So we only update gpr[1] but - * don't emulate the real store operation. We will do real store - * operation safely in exception return code by checking this flag. - */ - if ((ra == 1) && !(regs->msr & MSR_PR) \ - && (val3 >= (regs->gpr[1] - STACK_INT_FRAME_SIZE))) { -#ifdef CONFIG_PPC32 - /* - * Check if we will touch kernel sack overflow - */ - if (val3 - STACK_INT_FRAME_SIZE <= current->thread.ksp_limit) { - printk(KERN_CRIT "Can't kprobe this since Kernel stack overflow.\n"); - err = -EINVAL; - break; - } -#endif /* CONFIG_PPC32 */ - /* - * Check if we already set since that means we'll - * lose the previous value. - */ - WARN_ON(test_thread_flag(TIF_EMULATE_STACK_STORE)); - set_thread_flag(TIF_EMULATE_STACK_STORE); - err = 0; - } else - err = write_mem(val, val3, 4, regs); - goto ldst_done; + op->type = MKOP(STORE, u, 4); + op->ea = dform_ea(instr, regs); + break; case 38: /* stb */ case 39: /* stbu */ - val = regs->gpr[rd]; - err = write_mem(val, dform_ea(instr, regs), 1, regs); - goto ldst_done; + op->type = MKOP(STORE, u, 1); + op->ea = dform_ea(instr, regs); + break; case 40: /* lhz */ case 41: /* lhzu */ - err = read_mem(®s->gpr[rd], dform_ea(instr, regs), 2, regs); - goto ldst_done; + op->type = MKOP(LOAD, u, 2); + op->ea = dform_ea(instr, regs); + break; case 42: /* lha */ case 43: /* lhau */ - err = read_mem(®s->gpr[rd], dform_ea(instr, regs), 2, regs); - if (!err) - regs->gpr[rd] = (signed short) regs->gpr[rd]; - goto ldst_done; + op->type = MKOP(LOAD, SIGNEXT | u, 2); + op->ea = dform_ea(instr, regs); + break; case 44: /* sth */ case 45: /* sthu */ - val = regs->gpr[rd]; - err = write_mem(val, dform_ea(instr, regs), 2, regs); - goto ldst_done; + op->type = MKOP(STORE, u, 2); + op->ea = dform_ea(instr, regs); + break; case 46: /* lmw */ - ra = (instr >> 16) & 0x1f; if (ra >= rd) break; /* invalid form, ra in range to load */ - ea = dform_ea(instr, regs); - do { - err = read_mem(®s->gpr[rd], ea, 4, regs); - if (err) - return 0; - ea += 4; - } while (++rd < 32); - goto instr_done; + op->type = MKOP(LOAD_MULTI, 0, 4 * (32 - rd)); + op->ea = dform_ea(instr, regs); + break; case 47: /* stmw */ - ea = dform_ea(instr, regs); - do { - err = write_mem(regs->gpr[rd], ea, 4, regs); - if (err) - return 0; - ea += 4; - } while (++rd < 32); - goto instr_done; + op->type = MKOP(STORE_MULTI, 0, 4 * (32 - rd)); + op->ea = dform_ea(instr, regs); + break; #ifdef CONFIG_PPC_FPU case 48: /* lfs */ case 49: /* lfsu */ if (!(regs->msr & MSR_FP)) - break; - ea = dform_ea(instr, regs); - err = do_fp_load(rd, do_lfs, ea, 4, regs); - goto ldst_done; + goto fpunavail; + op->type = MKOP(LOAD_FP, u, 4); + op->ea = dform_ea(instr, regs); + break; case 50: /* lfd */ case 51: /* lfdu */ if (!(regs->msr & MSR_FP)) - break; - ea = dform_ea(instr, regs); - err = do_fp_load(rd, do_lfd, ea, 8, regs); - goto ldst_done; + goto fpunavail; + op->type = MKOP(LOAD_FP, u, 8); + op->ea = dform_ea(instr, regs); + break; case 52: /* stfs */ case 53: /* stfsu */ if (!(regs->msr & MSR_FP)) - break; - ea = dform_ea(instr, regs); - err = do_fp_store(rd, do_stfs, ea, 4, regs); - goto ldst_done; + goto fpunavail; + op->type = MKOP(STORE_FP, u, 4); + op->ea = dform_ea(instr, regs); + break; case 54: /* stfd */ case 55: /* stfdu */ if (!(regs->msr & MSR_FP)) - break; - ea = dform_ea(instr, regs); - err = do_fp_store(rd, do_stfd, ea, 8, regs); - goto ldst_done; + goto fpunavail; + op->type = MKOP(STORE_FP, u, 8); + op->ea = dform_ea(instr, regs); + break; #endif #ifdef __powerpc64__ case 58: /* ld[u], lwa */ + op->ea = dsform_ea(instr, regs); switch (instr & 3) { case 0: /* ld */ - err = read_mem(®s->gpr[rd], dsform_ea(instr, regs), - 8, regs); - goto ldst_done; + op->type = MKOP(LOAD, 0, 8); + break; case 1: /* ldu */ - err = read_mem(®s->gpr[rd], dsform_ea(instr, regs), - 8, regs); - goto ldst_done; + op->type = MKOP(LOAD, UPDATE, 8); + break; case 2: /* lwa */ - err = read_mem(®s->gpr[rd], dsform_ea(instr, regs), - 4, regs); - if (!err) - regs->gpr[rd] = (signed int) regs->gpr[rd]; - goto ldst_done; + op->type = MKOP(LOAD, SIGNEXT, 4); + break; } break; case 62: /* std[u] */ - val = regs->gpr[rd]; + op->ea = dsform_ea(instr, regs); switch (instr & 3) { case 0: /* std */ - err = write_mem(val, dsform_ea(instr, regs), 8, regs); - goto ldst_done; + op->type = MKOP(STORE, 0, 8); + break; case 1: /* stdu */ - err = write_mem(val, dsform_ea(instr, regs), 8, regs); - goto ldst_done; + op->type = MKOP(STORE, UPDATE, 8); + break; } break; #endif /* __powerpc64__ */ } - err = -EINVAL; - - ldst_done: - if (err) { - regs->gpr[ra] = old_ra; - return 0; /* invoke DSI if -EFAULT? */ - } - instr_done: - regs->nip = truncate_if_32bit(regs->msr, regs->nip + 4); - return 1; + return 0; logical_done: if (instr & 1) @@ -1733,5 +1665,349 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) arith_done: if (instr & 1) set_cr0(regs, rd); - goto instr_done; + + instr_done: + regs->nip = truncate_if_32bit(regs->msr, regs->nip + 4); + return 1; + + priv: + op->type = INTERRUPT | 0x700; + op->val = SRR1_PROGPRIV; + return 0; + + trap: + op->type = INTERRUPT | 0x700; + op->val = SRR1_PROGTRAP; + return 0; + +#ifdef CONFIG_PPC_FPU + fpunavail: + op->type = INTERRUPT | 0x800; + return 0; +#endif + +#ifdef CONFIG_ALTIVEC + vecunavail: + op->type = INTERRUPT | 0xf20; + return 0; +#endif + +#ifdef CONFIG_VSX + vsxunavail: + op->type = INTERRUPT | 0xf40; + return 0; +#endif +} +EXPORT_SYMBOL_GPL(analyse_instr); + +/* + * For PPC32 we always use stwu with r1 to change the stack pointer. + * So this emulated store may corrupt the exception frame, now we + * have to provide the exception frame trampoline, which is pushed + * below the kprobed function stack. So we only update gpr[1] but + * don't emulate the real store operation. We will do real store + * operation safely in exception return code by checking this flag. + */ +static __kprobes int handle_stack_update(unsigned long ea, struct pt_regs *regs) +{ +#ifdef CONFIG_PPC32 + /* + * Check if we will touch kernel stack overflow + */ + if (ea - STACK_INT_FRAME_SIZE <= current->thread.ksp_limit) { + printk(KERN_CRIT "Can't kprobe this since kernel stack would overflow.\n"); + return -EINVAL; + } +#endif /* CONFIG_PPC32 */ + /* + * Check if we already set since that means we'll + * lose the previous value. + */ + WARN_ON(test_thread_flag(TIF_EMULATE_STACK_STORE)); + set_thread_flag(TIF_EMULATE_STACK_STORE); + return 0; +} + +static __kprobes void do_signext(unsigned long *valp, int size) +{ + switch (size) { + case 2: + *valp = (signed short) *valp; + break; + case 4: + *valp = (signed int) *valp; + break; + } +} + +static __kprobes void do_byterev(unsigned long *valp, int size) +{ + switch (size) { + case 2: + *valp = byterev_2(*valp); + break; + case 4: + *valp = byterev_4(*valp); + break; +#ifdef __powerpc64__ + case 8: + *valp = byterev_8(*valp); + break; +#endif + } +} + +/* + * Emulate instructions that cause a transfer of control, + * loads and stores, and a few other instructions. + * Returns 1 if the step was emulated, 0 if not, + * or -1 if the instruction is one that should not be stepped, + * such as an rfid, or a mtmsrd that would clear MSR_RI. + */ +int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) +{ + struct instruction_op op; + int r, err, size; + unsigned long val; + unsigned int cr; + int i, rd, nb; + + r = analyse_instr(&op, regs, instr); + if (r != 0) + return r; + + err = 0; + size = GETSIZE(op.type); + switch (op.type & INSTR_TYPE_MASK) { + case CACHEOP: + if (!address_ok(regs, op.ea, 8)) + return 0; + switch (op.type & CACHEOP_MASK) { + case DCBST: + __cacheop_user_asmx(op.ea, err, "dcbst"); + break; + case DCBF: + __cacheop_user_asmx(op.ea, err, "dcbf"); + break; + case DCBTST: + if (op.reg == 0) + prefetchw((void *) op.ea); + break; + case DCBT: + if (op.reg == 0) + prefetch((void *) op.ea); + break; + case ICBI: + __cacheop_user_asmx(op.ea, err, "icbi"); + break; + } + if (err) + return 0; + goto instr_done; + + case LARX: + if (regs->msr & MSR_LE) + return 0; + if (op.ea & (size - 1)) + break; /* can't handle misaligned */ + err = -EFAULT; + if (!address_ok(regs, op.ea, size)) + goto ldst_done; + err = 0; + switch (size) { + case 4: + __get_user_asmx(val, op.ea, err, "lwarx"); + break; + case 8: + __get_user_asmx(val, op.ea, err, "ldarx"); + break; + default: + return 0; + } + if (!err) + regs->gpr[op.reg] = val; + goto ldst_done; + + case STCX: + if (regs->msr & MSR_LE) + return 0; + if (op.ea & (size - 1)) + break; /* can't handle misaligned */ + err = -EFAULT; + if (!address_ok(regs, op.ea, size)) + goto ldst_done; + err = 0; + switch (size) { + case 4: + __put_user_asmx(op.val, op.ea, err, "stwcx.", cr); + break; + case 8: + __put_user_asmx(op.val, op.ea, err, "stdcx.", cr); + break; + default: + return 0; + } + if (!err) + regs->ccr = (regs->ccr & 0x0fffffff) | + (cr & 0xe0000000) | + ((regs->xer >> 3) & 0x10000000); + goto ldst_done; + + case LOAD: + if (regs->msr & MSR_LE) + return 0; + err = read_mem(®s->gpr[op.reg], op.ea, size, regs); + if (!err) { + if (op.type & SIGNEXT) + do_signext(®s->gpr[op.reg], size); + if (op.type & BYTEREV) + do_byterev(®s->gpr[op.reg], size); + } + goto ldst_done; + + case LOAD_FP: + if (regs->msr & MSR_LE) + return 0; + if (size == 4) + err = do_fp_load(op.reg, do_lfs, op.ea, size, regs); + else + err = do_fp_load(op.reg, do_lfd, op.ea, size, regs); + goto ldst_done; + +#ifdef CONFIG_ALTIVEC + case LOAD_VMX: + if (regs->msr & MSR_LE) + return 0; + err = do_vec_load(op.reg, do_lvx, op.ea & ~0xfUL, regs); + goto ldst_done; +#endif +#ifdef CONFIG_VSX + case LOAD_VSX: + if (regs->msr & MSR_LE) + return 0; + err = do_vsx_load(op.reg, do_lxvd2x, op.ea, regs); + goto ldst_done; +#endif + case LOAD_MULTI: + if (regs->msr & MSR_LE) + return 0; + rd = op.reg; + for (i = 0; i < size; i += 4) { + nb = size - i; + if (nb > 4) + nb = 4; + err = read_mem(®s->gpr[rd], op.ea, nb, regs); + if (err) + return 0; + if (nb < 4) /* left-justify last bytes */ + regs->gpr[rd] <<= 32 - 8 * nb; + op.ea += 4; + ++rd; + } + goto instr_done; + + case STORE: + if (regs->msr & MSR_LE) + return 0; + if ((op.type & UPDATE) && size == sizeof(long) && + op.reg == 1 && op.update_reg == 1 && + !(regs->msr & MSR_PR) && + op.ea >= regs->gpr[1] - STACK_INT_FRAME_SIZE) { + err = handle_stack_update(op.ea, regs); + goto ldst_done; + } + err = write_mem(op.val, op.ea, size, regs); + goto ldst_done; + + case STORE_FP: + if (regs->msr & MSR_LE) + return 0; + if (size == 4) + err = do_fp_store(op.reg, do_stfs, op.ea, size, regs); + else + err = do_fp_store(op.reg, do_stfd, op.ea, size, regs); + goto ldst_done; + +#ifdef CONFIG_ALTIVEC + case STORE_VMX: + if (regs->msr & MSR_LE) + return 0; + err = do_vec_store(op.reg, do_stvx, op.ea & ~0xfUL, regs); + goto ldst_done; +#endif +#ifdef CONFIG_VSX + case STORE_VSX: + if (regs->msr & MSR_LE) + return 0; + err = do_vsx_store(op.reg, do_stxvd2x, op.ea, regs); + goto ldst_done; +#endif + case STORE_MULTI: + if (regs->msr & MSR_LE) + return 0; + rd = op.reg; + for (i = 0; i < size; i += 4) { + val = regs->gpr[rd]; + nb = size - i; + if (nb > 4) + nb = 4; + else + val >>= 32 - 8 * nb; + err = write_mem(val, op.ea, nb, regs); + if (err) + return 0; + op.ea += 4; + ++rd; + } + goto instr_done; + + case MFMSR: + regs->gpr[op.reg] = regs->msr & MSR_MASK; + goto instr_done; + + case MTMSR: + val = regs->gpr[op.reg]; + if ((val & MSR_RI) == 0) + /* can't step mtmsr[d] that would clear MSR_RI */ + return -1; + /* here op.val is the mask of bits to change */ + regs->msr = (regs->msr & ~op.val) | (val & op.val); + goto instr_done; + +#ifdef CONFIG_PPC64 + case SYSCALL: /* sc */ + /* + * N.B. this uses knowledge about how the syscall + * entry code works. If that is changed, this will + * need to be changed also. + */ + if (regs->gpr[0] == 0x1ebe && + cpu_has_feature(CPU_FTR_REAL_LE)) { + regs->msr ^= MSR_LE; + goto instr_done; + } + regs->gpr[9] = regs->gpr[13]; + regs->gpr[10] = MSR_KERNEL; + regs->gpr[11] = regs->nip + 4; + regs->gpr[12] = regs->msr & MSR_MASK; + regs->gpr[13] = (unsigned long) get_paca(); + regs->nip = (unsigned long) &system_call_common; + regs->msr = MSR_KERNEL; + return 1; + + case RFI: + return -1; +#endif + } + return 0; + + ldst_done: + if (err) + return 0; + if (op.type & UPDATE) + regs->gpr[op.update_reg] = op.ea; + + instr_done: + regs->nip = truncate_if_32bit(regs->msr, regs->nip + 4); + return 1; } diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile index 51230ee6a40..325e861616a 100644 --- a/arch/powerpc/mm/Makefile +++ b/arch/powerpc/mm/Makefile @@ -13,9 +13,7 @@ obj-$(CONFIG_PPC_MMU_NOHASH) += mmu_context_nohash.o tlb_nohash.o \ tlb_nohash_low.o obj-$(CONFIG_PPC_BOOK3E) += tlb_low_$(CONFIG_WORD_SIZE)e.o hash64-$(CONFIG_PPC_NATIVE) := hash_native_64.o -obj-$(CONFIG_PPC_STD_MMU_64) += hash_utils_64.o \ - slb_low.o slb.o stab.o \ - $(hash64-y) +obj-$(CONFIG_PPC_STD_MMU_64) += hash_utils_64.o slb_low.o slb.o $(hash64-y) obj-$(CONFIG_PPC_STD_MMU_32) += ppc_mmu_32.o obj-$(CONFIG_PPC_STD_MMU) += hash_low_$(CONFIG_WORD_SIZE).o \ tlb_hash$(CONFIG_WORD_SIZE).o \ @@ -36,3 +34,4 @@ obj-$(CONFIG_TRANSPARENT_HUGEPAGE) += hugepage-hash64.o obj-$(CONFIG_PPC_SUBPAGE_PROT) += subpage-prot.o obj-$(CONFIG_NOT_COHERENT_CACHE) += dma-noncoherent.o obj-$(CONFIG_HIGHMEM) += highmem.o +obj-$(CONFIG_PPC_COPRO_BASE) += copro_fault.o diff --git a/arch/powerpc/platforms/cell/spu_fault.c b/arch/powerpc/mm/copro_fault.c index 641e7273d75..0f9939e693d 100644 --- a/arch/powerpc/platforms/cell/spu_fault.c +++ b/arch/powerpc/mm/copro_fault.c @@ -1,5 +1,5 @@ /* - * SPU mm fault handler + * CoProcessor (SPU/AFU) mm fault handler * * (C) Copyright IBM Deutschland Entwicklung GmbH 2007 * @@ -23,16 +23,17 @@ #include <linux/sched.h> #include <linux/mm.h> #include <linux/export.h> - +#include <asm/reg.h> +#include <asm/copro.h> #include <asm/spu.h> -#include <asm/spu_csa.h> +#include <misc/cxl.h> /* * This ought to be kept in sync with the powerpc specific do_page_fault * function. Currently, there are a few corner cases that we haven't had * to handle fortunately. */ -int spu_handle_mm_fault(struct mm_struct *mm, unsigned long ea, +int copro_handle_mm_fault(struct mm_struct *mm, unsigned long ea, unsigned long dsisr, unsigned *flt) { struct vm_area_struct *vma; @@ -58,12 +59,12 @@ int spu_handle_mm_fault(struct mm_struct *mm, unsigned long ea, goto out_unlock; } - is_write = dsisr & MFC_DSISR_ACCESS_PUT; + is_write = dsisr & DSISR_ISSTORE; if (is_write) { if (!(vma->vm_flags & VM_WRITE)) goto out_unlock; } else { - if (dsisr & MFC_DSISR_ACCESS_DENIED) + if (dsisr & DSISR_PROTFAULT) goto out_unlock; if (!(vma->vm_flags & (VM_READ | VM_EXEC))) goto out_unlock; @@ -91,4 +92,58 @@ out_unlock: up_read(&mm->mmap_sem); return ret; } -EXPORT_SYMBOL_GPL(spu_handle_mm_fault); +EXPORT_SYMBOL_GPL(copro_handle_mm_fault); + +int copro_calculate_slb(struct mm_struct *mm, u64 ea, struct copro_slb *slb) +{ + u64 vsid; + int psize, ssize; + + slb->esid = (ea & ESID_MASK) | SLB_ESID_V; + + switch (REGION_ID(ea)) { + case USER_REGION_ID: + pr_devel("%s: 0x%llx -- USER_REGION_ID\n", __func__, ea); + psize = get_slice_psize(mm, ea); + ssize = user_segment_size(ea); + vsid = get_vsid(mm->context.id, ea, ssize); + break; + case VMALLOC_REGION_ID: + pr_devel("%s: 0x%llx -- VMALLOC_REGION_ID\n", __func__, ea); + if (ea < VMALLOC_END) + psize = mmu_vmalloc_psize; + else + psize = mmu_io_psize; + ssize = mmu_kernel_ssize; + vsid = get_kernel_vsid(ea, mmu_kernel_ssize); + break; + case KERNEL_REGION_ID: + pr_devel("%s: 0x%llx -- KERNEL_REGION_ID\n", __func__, ea); + psize = mmu_linear_psize; + ssize = mmu_kernel_ssize; + vsid = get_kernel_vsid(ea, mmu_kernel_ssize); + break; + default: + pr_debug("%s: invalid region access at %016llx\n", __func__, ea); + return 1; + } + + vsid = (vsid << slb_vsid_shift(ssize)) | SLB_VSID_USER; + + vsid |= mmu_psize_defs[psize].sllp | + ((ssize == MMU_SEGSIZE_1T) ? SLB_VSID_B_1T : 0); + + slb->vsid = vsid; + + return 0; +} +EXPORT_SYMBOL_GPL(copro_calculate_slb); + +void copro_flush_all_slbs(struct mm_struct *mm) +{ +#ifdef CONFIG_SPU_BASE + spu_flush_all_slbs(mm); +#endif + cxl_slbia(mm); +} +EXPORT_SYMBOL_GPL(copro_flush_all_slbs); diff --git a/arch/powerpc/mm/dma-noncoherent.c b/arch/powerpc/mm/dma-noncoherent.c index 7b6c1075017..d85e86aac7f 100644 --- a/arch/powerpc/mm/dma-noncoherent.c +++ b/arch/powerpc/mm/dma-noncoherent.c @@ -33,6 +33,7 @@ #include <linux/export.h> #include <asm/tlbflush.h> +#include <asm/dma.h> #include "mmu_decl.h" diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 51ab9e7e6c3..24b3f4949df 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -33,6 +33,7 @@ #include <linux/magic.h> #include <linux/ratelimit.h> #include <linux/context_tracking.h> +#include <linux/hugetlb.h> #include <asm/firmware.h> #include <asm/page.h> @@ -114,22 +115,37 @@ static int store_updates_sp(struct pt_regs *regs) #define MM_FAULT_CONTINUE -1 #define MM_FAULT_ERR(sig) (sig) -static int do_sigbus(struct pt_regs *regs, unsigned long address) +static int do_sigbus(struct pt_regs *regs, unsigned long address, + unsigned int fault) { siginfo_t info; + unsigned int lsb = 0; up_read(¤t->mm->mmap_sem); - if (user_mode(regs)) { - current->thread.trap_nr = BUS_ADRERR; - info.si_signo = SIGBUS; - info.si_errno = 0; - info.si_code = BUS_ADRERR; - info.si_addr = (void __user *)address; - force_sig_info(SIGBUS, &info, current); - return MM_FAULT_RETURN; + if (!user_mode(regs)) + return MM_FAULT_ERR(SIGBUS); + + current->thread.trap_nr = BUS_ADRERR; + info.si_signo = SIGBUS; + info.si_errno = 0; + info.si_code = BUS_ADRERR; + info.si_addr = (void __user *)address; +#ifdef CONFIG_MEMORY_FAILURE + if (fault & (VM_FAULT_HWPOISON|VM_FAULT_HWPOISON_LARGE)) { + pr_err("MCE: Killing %s:%d due to hardware memory corruption fault at %lx\n", + current->comm, current->pid, address); + info.si_code = BUS_MCEERR_AR; } - return MM_FAULT_ERR(SIGBUS); + + if (fault & VM_FAULT_HWPOISON_LARGE) + lsb = hstate_index_to_shift(VM_FAULT_GET_HINDEX(fault)); + if (fault & VM_FAULT_HWPOISON) + lsb = PAGE_SHIFT; +#endif + info.si_addr_lsb = lsb; + force_sig_info(SIGBUS, &info, current); + return MM_FAULT_RETURN; } static int mm_fault_error(struct pt_regs *regs, unsigned long addr, int fault) @@ -170,11 +186,8 @@ static int mm_fault_error(struct pt_regs *regs, unsigned long addr, int fault) return MM_FAULT_RETURN; } - /* Bus error. x86 handles HWPOISON here, we'll add this if/when - * we support the feature in HW - */ - if (fault & VM_FAULT_SIGBUS) - return do_sigbus(regs, addr); + if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON|VM_FAULT_HWPOISON_LARGE)) + return do_sigbus(regs, addr, fault); /* We don't understand the fault code, this is fatal */ BUG(); diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index cf1d325eae8..ae4962a0647 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c @@ -29,6 +29,8 @@ #include <asm/kexec.h> #include <asm/ppc-opcode.h> +#include <misc/cxl.h> + #ifdef DEBUG_LOW #define DBG_LOW(fmt...) udbg_printf(fmt) #else @@ -149,9 +151,11 @@ static inline void __tlbiel(unsigned long vpn, int psize, int apsize, int ssize) static inline void tlbie(unsigned long vpn, int psize, int apsize, int ssize, int local) { - unsigned int use_local = local && mmu_has_feature(MMU_FTR_TLBIEL); + unsigned int use_local; int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE); + use_local = local && mmu_has_feature(MMU_FTR_TLBIEL) && !cxl_ctx_in_use(); + if (use_local) use_local = mmu_psize_defs[psize].tlbiel; if (lock_tlbie && !use_local) @@ -412,18 +416,18 @@ static void native_hpte_invalidate(unsigned long slot, unsigned long vpn, local_irq_restore(flags); } -static void native_hugepage_invalidate(struct mm_struct *mm, +static void native_hugepage_invalidate(unsigned long vsid, + unsigned long addr, unsigned char *hpte_slot_array, - unsigned long addr, int psize) + int psize, int ssize) { - int ssize = 0, i; - int lock_tlbie; + int i; struct hash_pte *hptep; int actual_psize = MMU_PAGE_16M; unsigned int max_hpte_count, valid; unsigned long flags, s_addr = addr; unsigned long hpte_v, want_v, shift; - unsigned long hidx, vpn = 0, vsid, hash, slot; + unsigned long hidx, vpn = 0, hash, slot; shift = mmu_psize_defs[psize].shift; max_hpte_count = 1U << (PMD_SHIFT - shift); @@ -437,15 +441,6 @@ static void native_hugepage_invalidate(struct mm_struct *mm, /* get the vpn */ addr = s_addr + (i * (1ul << shift)); - if (!is_kernel_addr(addr)) { - ssize = user_segment_size(addr); - vsid = get_vsid(mm->context.id, addr, ssize); - WARN_ON(vsid == 0); - } else { - vsid = get_kernel_vsid(addr, mmu_kernel_ssize); - ssize = mmu_kernel_ssize; - } - vpn = hpt_vpn(addr, vsid, ssize); hash = hpt_hash(vpn, shift, ssize); if (hidx & _PTEIDX_SECONDARY) @@ -465,22 +460,13 @@ static void native_hugepage_invalidate(struct mm_struct *mm, else /* Invalidate the hpte. NOTE: this also unlocks it */ hptep->v = 0; + /* + * We need to do tlb invalidate for all the address, tlbie + * instruction compares entry_VA in tlb with the VA specified + * here + */ + tlbie(vpn, psize, actual_psize, ssize, 0); } - /* - * Since this is a hugepage, we just need a single tlbie. - * use the last vpn. - */ - lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE); - if (lock_tlbie) - raw_spin_lock(&native_tlbie_lock); - - asm volatile("ptesync":::"memory"); - __tlbie(vpn, psize, actual_psize, ssize); - asm volatile("eieio; tlbsync; ptesync":::"memory"); - - if (lock_tlbie) - raw_spin_unlock(&native_tlbie_lock); - local_irq_restore(flags); } diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 88fdd9d2507..d5339a3b994 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -51,7 +51,7 @@ #include <asm/cacheflush.h> #include <asm/cputable.h> #include <asm/sections.h> -#include <asm/spu.h> +#include <asm/copro.h> #include <asm/udbg.h> #include <asm/code-patching.h> #include <asm/fadump.h> @@ -92,12 +92,14 @@ extern unsigned long dart_tablebase; static unsigned long _SDR1; struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT]; +EXPORT_SYMBOL_GPL(mmu_psize_defs); struct hash_pte *htab_address; unsigned long htab_size_bytes; unsigned long htab_hash_mask; EXPORT_SYMBOL_GPL(htab_hash_mask); int mmu_linear_psize = MMU_PAGE_4K; +EXPORT_SYMBOL_GPL(mmu_linear_psize); int mmu_virtual_psize = MMU_PAGE_4K; int mmu_vmalloc_psize = MMU_PAGE_4K; #ifdef CONFIG_SPARSEMEM_VMEMMAP @@ -105,6 +107,7 @@ int mmu_vmemmap_psize = MMU_PAGE_4K; #endif int mmu_io_psize = MMU_PAGE_4K; int mmu_kernel_ssize = MMU_SEGSIZE_256M; +EXPORT_SYMBOL_GPL(mmu_kernel_ssize); int mmu_highuser_ssize = MMU_SEGSIZE_256M; u16 mmu_slb_size = 64; EXPORT_SYMBOL_GPL(mmu_slb_size); @@ -243,7 +246,7 @@ int htab_bolt_mapping(unsigned long vstart, unsigned long vend, } #ifdef CONFIG_MEMORY_HOTPLUG -static int htab_remove_mapping(unsigned long vstart, unsigned long vend, +int htab_remove_mapping(unsigned long vstart, unsigned long vend, int psize, int ssize) { unsigned long vaddr; @@ -333,70 +336,69 @@ static int __init htab_dt_scan_page_sizes(unsigned long node, return 0; prop = of_get_flat_dt_prop(node, "ibm,segment-page-sizes", &size); - if (prop != NULL) { - pr_info("Page sizes from device-tree:\n"); - size /= 4; - cur_cpu_spec->mmu_features &= ~(MMU_FTR_16M_PAGE); - while(size > 0) { - unsigned int base_shift = be32_to_cpu(prop[0]); - unsigned int slbenc = be32_to_cpu(prop[1]); - unsigned int lpnum = be32_to_cpu(prop[2]); - struct mmu_psize_def *def; - int idx, base_idx; - - size -= 3; prop += 3; - base_idx = get_idx_from_shift(base_shift); - if (base_idx < 0) { - /* - * skip the pte encoding also - */ - prop += lpnum * 2; size -= lpnum * 2; + if (!prop) + return 0; + + pr_info("Page sizes from device-tree:\n"); + size /= 4; + cur_cpu_spec->mmu_features &= ~(MMU_FTR_16M_PAGE); + while(size > 0) { + unsigned int base_shift = be32_to_cpu(prop[0]); + unsigned int slbenc = be32_to_cpu(prop[1]); + unsigned int lpnum = be32_to_cpu(prop[2]); + struct mmu_psize_def *def; + int idx, base_idx; + + size -= 3; prop += 3; + base_idx = get_idx_from_shift(base_shift); + if (base_idx < 0) { + /* skip the pte encoding also */ + prop += lpnum * 2; size -= lpnum * 2; + continue; + } + def = &mmu_psize_defs[base_idx]; + if (base_idx == MMU_PAGE_16M) + cur_cpu_spec->mmu_features |= MMU_FTR_16M_PAGE; + + def->shift = base_shift; + if (base_shift <= 23) + def->avpnm = 0; + else + def->avpnm = (1 << (base_shift - 23)) - 1; + def->sllp = slbenc; + /* + * We don't know for sure what's up with tlbiel, so + * for now we only set it for 4K and 64K pages + */ + if (base_idx == MMU_PAGE_4K || base_idx == MMU_PAGE_64K) + def->tlbiel = 1; + else + def->tlbiel = 0; + + while (size > 0 && lpnum) { + unsigned int shift = be32_to_cpu(prop[0]); + int penc = be32_to_cpu(prop[1]); + + prop += 2; size -= 2; + lpnum--; + + idx = get_idx_from_shift(shift); + if (idx < 0) continue; - } - def = &mmu_psize_defs[base_idx]; - if (base_idx == MMU_PAGE_16M) - cur_cpu_spec->mmu_features |= MMU_FTR_16M_PAGE; - - def->shift = base_shift; - if (base_shift <= 23) - def->avpnm = 0; - else - def->avpnm = (1 << (base_shift - 23)) - 1; - def->sllp = slbenc; - /* - * We don't know for sure what's up with tlbiel, so - * for now we only set it for 4K and 64K pages - */ - if (base_idx == MMU_PAGE_4K || base_idx == MMU_PAGE_64K) - def->tlbiel = 1; - else - def->tlbiel = 0; - - while (size > 0 && lpnum) { - unsigned int shift = be32_to_cpu(prop[0]); - int penc = be32_to_cpu(prop[1]); - - prop += 2; size -= 2; - lpnum--; - - idx = get_idx_from_shift(shift); - if (idx < 0) - continue; - - if (penc == -1) - pr_err("Invalid penc for base_shift=%d " - "shift=%d\n", base_shift, shift); - - def->penc[idx] = penc; - pr_info("base_shift=%d: shift=%d, sllp=0x%04lx," - " avpnm=0x%08lx, tlbiel=%d, penc=%d\n", - base_shift, shift, def->sllp, - def->avpnm, def->tlbiel, def->penc[idx]); - } + + if (penc == -1) + pr_err("Invalid penc for base_shift=%d " + "shift=%d\n", base_shift, shift); + + def->penc[idx] = penc; + pr_info("base_shift=%d: shift=%d, sllp=0x%04lx," + " avpnm=0x%08lx, tlbiel=%d, penc=%d\n", + base_shift, shift, def->sllp, + def->avpnm, def->tlbiel, def->penc[idx]); } - return 1; } - return 0; + + return 1; } #ifdef CONFIG_HUGETLB_PAGE @@ -821,21 +823,14 @@ static void __init htab_initialize(void) void __init early_init_mmu(void) { - /* Setup initial STAB address in the PACA */ - get_paca()->stab_real = __pa((u64)&initial_stab); - get_paca()->stab_addr = (u64)&initial_stab; - /* Initialize the MMU Hash table and create the linear mapping - * of memory. Has to be done before stab/slb initialization as - * this is currently where the page size encoding is obtained + * of memory. Has to be done before SLB initialization as this is + * currently where the page size encoding is obtained. */ htab_initialize(); - /* Initialize stab / SLB management */ - if (mmu_has_feature(MMU_FTR_SLB)) - slb_initialize(); - else - stab_initialize(get_paca()->stab_real); + /* Initialize SLB management */ + slb_initialize(); } #ifdef CONFIG_SMP @@ -845,13 +840,8 @@ void early_init_mmu_secondary(void) if (!firmware_has_feature(FW_FEATURE_LPAR)) mtspr(SPRN_SDR1, _SDR1); - /* Initialize STAB/SLB. We use a virtual address as it works - * in real mode on pSeries. - */ - if (mmu_has_feature(MMU_FTR_SLB)) - slb_initialize(); - else - stab_initialize(get_paca()->stab_addr); + /* Initialize SLB */ + slb_initialize(); } #endif /* CONFIG_SMP */ @@ -879,7 +869,7 @@ unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap) } #ifdef CONFIG_PPC_MM_SLICES -unsigned int get_paca_psize(unsigned long addr) +static unsigned int get_paca_psize(unsigned long addr) { u64 lpsizes; unsigned char *hpsizes; @@ -913,10 +903,8 @@ void demote_segment_4k(struct mm_struct *mm, unsigned long addr) if (get_slice_psize(mm, addr) == MMU_PAGE_4K) return; slice_set_range_psize(mm, addr, 1, MMU_PAGE_4K); -#ifdef CONFIG_SPU_BASE - spu_flush_all_slbs(mm); -#endif - if (get_paca_psize(addr) != MMU_PAGE_4K) { + copro_flush_all_slbs(mm); + if ((get_paca_psize(addr) != MMU_PAGE_4K) && (current->mm == mm)) { get_paca()->context = mm->context; slb_flush_and_rebolt(); } @@ -1001,12 +989,11 @@ static void check_paca_psize(unsigned long ea, struct mm_struct *mm, * -1 - critical hash insertion error * -2 - access not permitted by subpage protection mechanism */ -int hash_page(unsigned long ea, unsigned long access, unsigned long trap) +int hash_page_mm(struct mm_struct *mm, unsigned long ea, unsigned long access, unsigned long trap) { enum ctx_state prev_state = exception_enter(); pgd_t *pgdir; unsigned long vsid; - struct mm_struct *mm; pte_t *ptep; unsigned hugeshift; const struct cpumask *tmp; @@ -1020,7 +1007,6 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) switch (REGION_ID(ea)) { case USER_REGION_ID: user_region = 1; - mm = current->mm; if (! mm) { DBG_LOW(" user region with no mm !\n"); rc = 1; @@ -1031,7 +1017,6 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) vsid = get_vsid(mm->context.id, ea, ssize); break; case VMALLOC_REGION_ID: - mm = &init_mm; vsid = get_kernel_vsid(ea, mmu_kernel_ssize); if (ea < VMALLOC_END) psize = mmu_vmalloc_psize; @@ -1116,7 +1101,8 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) WARN_ON(1); } #endif - check_paca_psize(ea, mm, psize, user_region); + if (current->mm == mm) + check_paca_psize(ea, mm, psize, user_region); goto bail; } @@ -1153,13 +1139,12 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) "to 4kB pages because of " "non-cacheable mapping\n"); psize = mmu_vmalloc_psize = MMU_PAGE_4K; -#ifdef CONFIG_SPU_BASE - spu_flush_all_slbs(mm); -#endif + copro_flush_all_slbs(mm); } } - check_paca_psize(ea, mm, psize, user_region); + if (current->mm == mm) + check_paca_psize(ea, mm, psize, user_region); #endif /* CONFIG_PPC_64K_PAGES */ #ifdef CONFIG_PPC_HAS_HASH_64K @@ -1194,6 +1179,17 @@ bail: exception_exit(prev_state); return rc; } +EXPORT_SYMBOL_GPL(hash_page_mm); + +int hash_page(unsigned long ea, unsigned long access, unsigned long trap) +{ + struct mm_struct *mm = current->mm; + + if (REGION_ID(ea) == VMALLOC_REGION_ID) + mm = &init_mm; + + return hash_page_mm(mm, ea, access, trap); +} EXPORT_SYMBOL_GPL(hash_page); void hash_preload(struct mm_struct *mm, unsigned long ea, diff --git a/arch/powerpc/mm/hugepage-hash64.c b/arch/powerpc/mm/hugepage-hash64.c index 826893fcb3a..5f5e6328c21 100644 --- a/arch/powerpc/mm/hugepage-hash64.c +++ b/arch/powerpc/mm/hugepage-hash64.c @@ -18,6 +18,57 @@ #include <linux/mm.h> #include <asm/machdep.h> +static void invalidate_old_hpte(unsigned long vsid, unsigned long addr, + pmd_t *pmdp, unsigned int psize, int ssize) +{ + int i, max_hpte_count, valid; + unsigned long s_addr; + unsigned char *hpte_slot_array; + unsigned long hidx, shift, vpn, hash, slot; + + s_addr = addr & HPAGE_PMD_MASK; + hpte_slot_array = get_hpte_slot_array(pmdp); + /* + * IF we try to do a HUGE PTE update after a withdraw is done. + * we will find the below NULL. This happens when we do + * split_huge_page_pmd + */ + if (!hpte_slot_array) + return; + + if (ppc_md.hugepage_invalidate) + return ppc_md.hugepage_invalidate(vsid, s_addr, hpte_slot_array, + psize, ssize); + /* + * No bluk hpte removal support, invalidate each entry + */ + shift = mmu_psize_defs[psize].shift; + max_hpte_count = HPAGE_PMD_SIZE >> shift; + for (i = 0; i < max_hpte_count; i++) { + /* + * 8 bits per each hpte entries + * 000| [ secondary group (one bit) | hidx (3 bits) | valid bit] + */ + valid = hpte_valid(hpte_slot_array, i); + if (!valid) + continue; + hidx = hpte_hash_index(hpte_slot_array, i); + + /* get the vpn */ + addr = s_addr + (i * (1ul << shift)); + vpn = hpt_vpn(addr, vsid, ssize); + hash = hpt_hash(vpn, shift, ssize); + if (hidx & _PTEIDX_SECONDARY) + hash = ~hash; + + slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; + slot += hidx & _PTEIDX_GROUP_IX; + ppc_md.hpte_invalidate(slot, vpn, psize, + MMU_PAGE_16M, ssize, 0); + } +} + + int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, pmd_t *pmdp, unsigned long trap, int local, int ssize, unsigned int psize) @@ -33,7 +84,9 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, * atomically mark the linux large page PMD busy and dirty */ do { - old_pmd = pmd_val(*pmdp); + pmd_t pmd = ACCESS_ONCE(*pmdp); + + old_pmd = pmd_val(pmd); /* If PMD busy, retry the access */ if (unlikely(old_pmd & _PAGE_BUSY)) return 0; @@ -85,6 +138,15 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, vpn = hpt_vpn(ea, vsid, ssize); hash = hpt_hash(vpn, shift, ssize); hpte_slot_array = get_hpte_slot_array(pmdp); + if (psize == MMU_PAGE_4K) { + /* + * invalidate the old hpte entry if we have that mapped via 64K + * base page size. This is because demote_segment won't flush + * hash page table entries. + */ + if ((old_pmd & _PAGE_HASHPTE) && !(old_pmd & _PAGE_COMBO)) + invalidate_old_hpte(vsid, ea, pmdp, MMU_PAGE_64K, ssize); + } valid = hpte_valid(hpte_slot_array, index); if (valid) { @@ -107,11 +169,8 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, * safely update this here. */ valid = 0; - new_pmd &= ~_PAGE_HPTEFLAGS; hpte_slot_array[index] = 0; - } else - /* clear the busy bits and set the hash pte bits */ - new_pmd = (new_pmd & ~_PAGE_HPTEFLAGS) | _PAGE_HASHPTE; + } } if (!valid) { @@ -119,11 +178,7 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, /* insert new entry */ pa = pmd_pfn(__pmd(old_pmd)) << PAGE_SHIFT; -repeat: - hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; - - /* clear the busy bits and set the hash pte bits */ - new_pmd = (new_pmd & ~_PAGE_HPTEFLAGS) | _PAGE_HASHPTE; + new_pmd |= _PAGE_HASHPTE; /* Add in WIMG bits */ rflags |= (new_pmd & (_PAGE_WRITETHRU | _PAGE_NO_CACHE | @@ -132,6 +187,8 @@ repeat: * enable the memory coherence always */ rflags |= HPTE_R_M; +repeat: + hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; /* Insert into the hash table, primary slot */ slot = ppc_md.hpte_insert(hpte_group, vpn, pa, rflags, 0, @@ -172,8 +229,17 @@ repeat: mark_hpte_slot_valid(hpte_slot_array, index, slot); } /* - * No need to use ldarx/stdcx here + * Mark the pte with _PAGE_COMBO, if we are trying to hash it with + * base page size 4k. + */ + if (psize == MMU_PAGE_4K) + new_pmd |= _PAGE_COMBO; + /* + * The hpte valid is stored in the pgtable whose address is in the + * second half of the PMD. Order this against clearing of the busy bit in + * huge pmd. */ + smp_wmb(); *pmdp = __pmd(new_pmd & ~_PAGE_BUSY); return 0; } diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c index cff59f1bec2..cad68ff8eca 100644 --- a/arch/powerpc/mm/init_32.c +++ b/arch/powerpc/mm/init_32.c @@ -106,11 +106,11 @@ unsigned long __max_low_memory = MAX_LOW_MEM; void MMU_setup(void) { /* Check for nobats option (used in mapin_ram). */ - if (strstr(cmd_line, "nobats")) { + if (strstr(boot_command_line, "nobats")) { __map_without_bats = 1; } - if (strstr(cmd_line, "noltlbs")) { + if (strstr(boot_command_line, "noltlbs")) { __map_without_ltlbs = 1; } #ifdef CONFIG_DEBUG_PAGEALLOC diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index e3734edffa6..3481556a188 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -175,9 +175,10 @@ static unsigned long __meminit vmemmap_section_start(unsigned long page) static int __meminit vmemmap_populated(unsigned long start, int page_size) { unsigned long end = start + page_size; + start = (unsigned long)(pfn_to_page(vmemmap_section_start(start))); for (; start < end; start += (PAGES_PER_SECTION * sizeof(struct page))) - if (pfn_valid(vmemmap_section_start(start))) + if (pfn_valid(page_to_pfn((struct page *)start))) return 1; return 0; @@ -212,6 +213,13 @@ static void __meminit vmemmap_create_mapping(unsigned long start, for (i = 0; i < page_size; i += PAGE_SIZE) BUG_ON(map_kernel_page(start + i, phys, flags)); } + +#ifdef CONFIG_MEMORY_HOTPLUG +static void vmemmap_remove_mapping(unsigned long start, + unsigned long page_size) +{ +} +#endif #else /* CONFIG_PPC_BOOK3E */ static void __meminit vmemmap_create_mapping(unsigned long start, unsigned long page_size, @@ -223,17 +231,39 @@ static void __meminit vmemmap_create_mapping(unsigned long start, mmu_kernel_ssize); BUG_ON(mapped < 0); } + +#ifdef CONFIG_MEMORY_HOTPLUG +static void vmemmap_remove_mapping(unsigned long start, + unsigned long page_size) +{ + int mapped = htab_remove_mapping(start, start + page_size, + mmu_vmemmap_psize, + mmu_kernel_ssize); + BUG_ON(mapped < 0); +} +#endif + #endif /* CONFIG_PPC_BOOK3E */ struct vmemmap_backing *vmemmap_list; +static struct vmemmap_backing *next; +static int num_left; +static int num_freed; static __meminit struct vmemmap_backing * vmemmap_list_alloc(int node) { - static struct vmemmap_backing *next; - static int num_left; + struct vmemmap_backing *vmem_back; + /* get from freed entries first */ + if (num_freed) { + num_freed--; + vmem_back = next; + next = next->list; + + return vmem_back; + } /* allocate a page when required and hand out chunks */ - if (!next || !num_left) { + if (!num_left) { next = vmemmap_alloc_block(PAGE_SIZE, node); if (unlikely(!next)) { WARN_ON(1); @@ -296,10 +326,85 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) return 0; } -void vmemmap_free(unsigned long start, unsigned long end) +#ifdef CONFIG_MEMORY_HOTPLUG +static unsigned long vmemmap_list_free(unsigned long start) { + struct vmemmap_backing *vmem_back, *vmem_back_prev; + + vmem_back_prev = vmem_back = vmemmap_list; + + /* look for it with prev pointer recorded */ + for (; vmem_back; vmem_back = vmem_back->list) { + if (vmem_back->virt_addr == start) + break; + vmem_back_prev = vmem_back; + } + + if (unlikely(!vmem_back)) { + WARN_ON(1); + return 0; + } + + /* remove it from vmemmap_list */ + if (vmem_back == vmemmap_list) /* remove head */ + vmemmap_list = vmem_back->list; + else + vmem_back_prev->list = vmem_back->list; + + /* next point to this freed entry */ + vmem_back->list = next; + next = vmem_back; + num_freed++; + + return vmem_back->phys; } +void __ref vmemmap_free(unsigned long start, unsigned long end) +{ + unsigned long page_size = 1 << mmu_psize_defs[mmu_vmemmap_psize].shift; + + start = _ALIGN_DOWN(start, page_size); + + pr_debug("vmemmap_free %lx...%lx\n", start, end); + + for (; start < end; start += page_size) { + unsigned long addr; + + /* + * the section has already be marked as invalid, so + * vmemmap_populated() true means some other sections still + * in this page, so skip it. + */ + if (vmemmap_populated(start, page_size)) + continue; + + addr = vmemmap_list_free(start); + if (addr) { + struct page *page = pfn_to_page(addr >> PAGE_SHIFT); + + if (PageReserved(page)) { + /* allocated from bootmem */ + if (page_size < PAGE_SIZE) { + /* + * this shouldn't happen, but if it is + * the case, leave the memory there + */ + WARN_ON_ONCE(1); + } else { + unsigned int nr_pages = + 1 << get_order(page_size); + while (nr_pages--) + free_reserved_page(page++); + } + } else + free_pages((unsigned long)(__va(addr)), + get_order(page_size)); + + vmemmap_remove_mapping(start, page_size); + } + } +} +#endif void register_page_bootmem_memmap(unsigned long section_nr, struct page *start_page, unsigned long size) { @@ -331,16 +436,16 @@ struct page *realmode_pfn_to_page(unsigned long pfn) if (pg_va < vmem_back->virt_addr) continue; - /* Check that page struct is not split between real pages */ - if ((pg_va + sizeof(struct page)) > - (vmem_back->virt_addr + page_size)) - return NULL; - - page = (struct page *) (vmem_back->phys + pg_va - + /* After vmemmap_list entry free is possible, need check all */ + if ((pg_va + sizeof(struct page)) <= + (vmem_back->virt_addr + page_size)) { + page = (struct page *) (vmem_back->phys + pg_va - vmem_back->virt_addr); - return page; + return page; + } } + /* Probably that page struct is split between real pages */ return NULL; } EXPORT_SYMBOL_GPL(realmode_pfn_to_page); diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 2c8e90f5789..8ebaac75c94 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -128,7 +128,8 @@ int arch_add_memory(int nid, u64 start, u64 size) return -EINVAL; /* this should work for most non-highmem platforms */ - zone = pgdata->node_zones; + zone = pgdata->node_zones + + zone_for_memory(nid, start, size, 0); return __add_pages(nid, zone, start_pfn, nr_pages); } @@ -259,6 +260,60 @@ static int __init mark_nonram_nosave(void) } return 0; } +#else /* CONFIG_NEED_MULTIPLE_NODES */ +static int __init mark_nonram_nosave(void) +{ + return 0; +} +#endif + +static bool zone_limits_final; + +static unsigned long max_zone_pfns[MAX_NR_ZONES] = { + [0 ... MAX_NR_ZONES - 1] = ~0UL +}; + +/* + * Restrict the specified zone and all more restrictive zones + * to be below the specified pfn. May not be called after + * paging_init(). + */ +void __init limit_zone_pfn(enum zone_type zone, unsigned long pfn_limit) +{ + int i; + + if (WARN_ON(zone_limits_final)) + return; + + for (i = zone; i >= 0; i--) { + if (max_zone_pfns[i] > pfn_limit) + max_zone_pfns[i] = pfn_limit; + } +} + +/* + * Find the least restrictive zone that is entirely below the + * specified pfn limit. Returns < 0 if no suitable zone is found. + * + * pfn_limit must be u64 because it can exceed 32 bits even on 32-bit + * systems -- the DMA limit can be higher than any possible real pfn. + */ +int dma_pfn_limit_to_zone(u64 pfn_limit) +{ + enum zone_type top_zone = ZONE_NORMAL; + int i; + +#ifdef CONFIG_HIGHMEM + top_zone = ZONE_HIGHMEM; +#endif + + for (i = top_zone; i >= 0; i--) { + if (max_zone_pfns[i] <= pfn_limit) + return i; + } + + return -EPERM; +} /* * paging_init() sets up the page tables - in fact we've already done this. @@ -267,7 +322,7 @@ void __init paging_init(void) { unsigned long long total_ram = memblock_phys_mem_size(); phys_addr_t top_of_ram = memblock_end_of_DRAM(); - unsigned long max_zone_pfns[MAX_NR_ZONES]; + enum zone_type top_zone; #ifdef CONFIG_PPC32 unsigned long v = __fix_to_virt(__end_of_fixed_addresses - 1); @@ -289,18 +344,20 @@ void __init paging_init(void) (unsigned long long)top_of_ram, total_ram); printk(KERN_DEBUG "Memory hole size: %ldMB\n", (long int)((top_of_ram - total_ram) >> 20)); - memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); + #ifdef CONFIG_HIGHMEM - max_zone_pfns[ZONE_DMA] = lowmem_end_addr >> PAGE_SHIFT; - max_zone_pfns[ZONE_HIGHMEM] = top_of_ram >> PAGE_SHIFT; + top_zone = ZONE_HIGHMEM; + limit_zone_pfn(ZONE_NORMAL, lowmem_end_addr >> PAGE_SHIFT); #else - max_zone_pfns[ZONE_DMA] = top_of_ram >> PAGE_SHIFT; + top_zone = ZONE_NORMAL; #endif + + limit_zone_pfn(top_zone, top_of_ram >> PAGE_SHIFT); + zone_limits_final = true; free_area_init_nodes(max_zone_pfns); mark_nonram_nosave(); } -#endif /* ! CONFIG_NEED_MULTIPLE_NODES */ static void __init register_page_bootmem_info(void) { diff --git a/arch/powerpc/mm/mmu_context_hash32.c b/arch/powerpc/mm/mmu_context_hash32.c index 78fef6726e1..aa5a7fd8946 100644 --- a/arch/powerpc/mm/mmu_context_hash32.c +++ b/arch/powerpc/mm/mmu_context_hash32.c @@ -2,7 +2,7 @@ * This file contains the routines for handling the MMU on those * PowerPC implementations where the MMU substantially follows the * architecture specification. This includes the 6xx, 7xx, 7xxx, - * 8260, and POWER3 implementations but excludes the 8xx and 4xx. + * and 8260 implementations but excludes the 8xx and 4xx. * -- paulus * * Derived from arch/ppc/mm/init.c: diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 3b181b22cd4..649666d5d1c 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -538,7 +538,7 @@ static int of_drconf_to_nid_single(struct of_drconf_cell *drmem, */ static int numa_setup_cpu(unsigned long lcpu) { - int nid; + int nid = -1; struct device_node *cpu; /* @@ -555,19 +555,21 @@ static int numa_setup_cpu(unsigned long lcpu) if (!cpu) { WARN_ON(1); - nid = 0; - goto out; + if (cpu_present(lcpu)) + goto out_present; + else + goto out; } nid = of_node_to_nid_single(cpu); +out_present: if (nid < 0 || !node_online(nid)) nid = first_online_node; -out: - map_cpu_to_node(lcpu, nid); + map_cpu_to_node(lcpu, nid); of_node_put(cpu); - +out: return nid; } @@ -611,8 +613,8 @@ static int cpu_numa_callback(struct notifier_block *nfb, unsigned long action, case CPU_UP_CANCELED: case CPU_UP_CANCELED_FROZEN: unmap_cpu_from_node(lcpu); - break; ret = NOTIFY_OK; + break; #endif } return ret; @@ -1049,7 +1051,7 @@ static void __init mark_reserved_regions_for_nid(int nid) void __init do_init_bootmem(void) { - int nid; + int nid, cpu; min_low_pfn = 0; max_low_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT; @@ -1122,16 +1124,14 @@ void __init do_init_bootmem(void) reset_numa_cpu_lookup_table(); register_cpu_notifier(&ppc64_numa_nb); - cpu_numa_callback(&ppc64_numa_nb, CPU_UP_PREPARE, - (void *)(unsigned long)boot_cpuid); -} - -void __init paging_init(void) -{ - unsigned long max_zone_pfns[MAX_NR_ZONES]; - memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); - max_zone_pfns[ZONE_DMA] = memblock_end_of_DRAM() >> PAGE_SHIFT; - free_area_init_nodes(max_zone_pfns); + /* + * We need the numa_cpu_lookup_table to be accurate for all CPUs, + * even before we online them, so that we can use cpu_to_{node,mem} + * early in boot, cf. smp_prepare_cpus(). + */ + for_each_present_cpu(cpu) { + numa_setup_cpu((unsigned long)cpu); + } } static int __init early_numa(char *p) diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c index c695943a513..c90e602677c 100644 --- a/arch/powerpc/mm/pgtable.c +++ b/arch/powerpc/mm/pgtable.c @@ -48,7 +48,7 @@ static inline int pte_looks_normal(pte_t pte) (_PAGE_PRESENT | _PAGE_USER); } -struct page * maybe_pte_to_page(pte_t pte) +static struct page *maybe_pte_to_page(pte_t pte) { unsigned long pfn = pte_pfn(pte); struct page *page; diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index 343a87fa78b..cf11342bf51 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -41,7 +41,7 @@ unsigned long ioremap_base; unsigned long ioremap_bot; EXPORT_SYMBOL(ioremap_bot); /* aka VMALLOC_END */ -#if defined(CONFIG_6xx) || defined(CONFIG_POWER3) +#ifdef CONFIG_6xx #define HAVE_BATS 1 #endif diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index f6ce1f111f5..c8d709ab489 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -54,6 +54,9 @@ #include "mmu_decl.h" +#define CREATE_TRACE_POINTS +#include <trace/events/thp.h> + /* Some sanity checking */ #if TASK_SIZE_USER64 > PGTABLE_RANGE #error TASK_SIZE_USER64 exceeds pagetable range @@ -68,7 +71,7 @@ unsigned long ioremap_bot = IOREMAP_BASE; #ifdef CONFIG_PPC_MMU_NOHASH -static void *early_alloc_pgtable(unsigned long size) +static __ref void *early_alloc_pgtable(unsigned long size) { void *pt; @@ -537,8 +540,9 @@ unsigned long pmd_hugepage_update(struct mm_struct *mm, unsigned long addr, old = pmd_val(*pmdp); *pmdp = __pmd((old & ~clr) | set); #endif + trace_hugepage_update(addr, old, clr, set); if (old & _PAGE_HASHPTE) - hpte_do_hugepage_flush(mm, addr, pmdp); + hpte_do_hugepage_flush(mm, addr, pmdp, old); return old; } @@ -642,10 +646,11 @@ void pmdp_splitting_flush(struct vm_area_struct *vma, * If we didn't had the splitting flag set, go and flush the * HPTE entries. */ + trace_hugepage_splitting(address, old); if (!(old & _PAGE_SPLITTING)) { /* We need to flush the hpte */ if (old & _PAGE_HASHPTE) - hpte_do_hugepage_flush(vma->vm_mm, address, pmdp); + hpte_do_hugepage_flush(vma->vm_mm, address, pmdp, old); } /* * This ensures that generic code that rely on IRQ disabling @@ -709,6 +714,7 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr, assert_spin_locked(&mm->page_table_lock); WARN_ON(!pmd_trans_huge(pmd)); #endif + trace_hugepage_set_pmd(addr, pmd); return set_pte_at(mm, addr, pmdp_ptep(pmdp), pmd_pte(pmd)); } @@ -723,7 +729,7 @@ void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, * neesd to be flushed. */ void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr, - pmd_t *pmdp) + pmd_t *pmdp, unsigned long old_pmd) { int ssize, i; unsigned long s_addr; @@ -745,12 +751,29 @@ void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr, if (!hpte_slot_array) return; - /* get the base page size */ + /* get the base page size,vsid and segment size */ +#ifdef CONFIG_DEBUG_VM psize = get_slice_psize(mm, s_addr); + BUG_ON(psize == MMU_PAGE_16M); +#endif + if (old_pmd & _PAGE_COMBO) + psize = MMU_PAGE_4K; + else + psize = MMU_PAGE_64K; + + if (!is_kernel_addr(s_addr)) { + ssize = user_segment_size(s_addr); + vsid = get_vsid(mm->context.id, s_addr, ssize); + WARN_ON(vsid == 0); + } else { + vsid = get_kernel_vsid(s_addr, mmu_kernel_ssize); + ssize = mmu_kernel_ssize; + } if (ppc_md.hugepage_invalidate) - return ppc_md.hugepage_invalidate(mm, hpte_slot_array, - s_addr, psize); + return ppc_md.hugepage_invalidate(vsid, s_addr, + hpte_slot_array, + psize, ssize); /* * No bluk hpte removal support, invalidate each entry */ @@ -768,15 +791,6 @@ void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr, /* get the vpn */ addr = s_addr + (i * (1ul << shift)); - if (!is_kernel_addr(addr)) { - ssize = user_segment_size(addr); - vsid = get_vsid(mm->context.id, addr, ssize); - WARN_ON(vsid == 0); - } else { - vsid = get_kernel_vsid(addr, mmu_kernel_ssize); - ssize = mmu_kernel_ssize; - } - vpn = hpt_vpn(addr, vsid, ssize); hash = hpt_hash(vpn, shift, ssize); if (hidx & _PTEIDX_SECONDARY) diff --git a/arch/powerpc/mm/ppc_mmu_32.c b/arch/powerpc/mm/ppc_mmu_32.c index 11571e11883..5029dc19b51 100644 --- a/arch/powerpc/mm/ppc_mmu_32.c +++ b/arch/powerpc/mm/ppc_mmu_32.c @@ -2,7 +2,7 @@ * This file contains the routines for handling the MMU on those * PowerPC implementations where the MMU substantially follows the * architecture specification. This includes the 6xx, 7xx, 7xxx, - * 8260, and POWER3 implementations but excludes the 8xx and 4xx. + * and 8260 implementations but excludes the 8xx and 4xx. * -- paulus * * Derived from arch/ppc/mm/init.c: diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c index 0399a670295..6e450ca6652 100644 --- a/arch/powerpc/mm/slb.c +++ b/arch/powerpc/mm/slb.c @@ -46,9 +46,6 @@ static inline unsigned long mk_esid_data(unsigned long ea, int ssize, return (ea & slb_esid_mask(ssize)) | SLB_ESID_V | slot; } -#define slb_vsid_shift(ssize) \ - ((ssize) == MMU_SEGSIZE_256M? SLB_VSID_SHIFT: SLB_VSID_SHIFT_1T) - static inline unsigned long mk_vsid_data(unsigned long ea, int ssize, unsigned long flags) { diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c index b0c75cc15ef..8d7bda94d19 100644 --- a/arch/powerpc/mm/slice.c +++ b/arch/powerpc/mm/slice.c @@ -30,9 +30,11 @@ #include <linux/err.h> #include <linux/spinlock.h> #include <linux/export.h> +#include <linux/hugetlb.h> #include <asm/mman.h> #include <asm/mmu.h> -#include <asm/spu.h> +#include <asm/copro.h> +#include <asm/hugetlb.h> /* some sanity checks */ #if (PGTABLE_RANGE >> 43) > SLICE_MASK_SIZE @@ -232,9 +234,7 @@ static void slice_convert(struct mm_struct *mm, struct slice_mask mask, int psiz spin_unlock_irqrestore(&slice_convert_lock, flags); -#ifdef CONFIG_SPU_BASE - spu_flush_all_slbs(mm); -#endif + copro_flush_all_slbs(mm); } /* @@ -671,9 +671,7 @@ void slice_set_psize(struct mm_struct *mm, unsigned long address, spin_unlock_irqrestore(&slice_convert_lock, flags); -#ifdef CONFIG_SPU_BASE - spu_flush_all_slbs(mm); -#endif + copro_flush_all_slbs(mm); } void slice_set_range_psize(struct mm_struct *mm, unsigned long start, diff --git a/arch/powerpc/mm/stab.c b/arch/powerpc/mm/stab.c deleted file mode 100644 index 3f8efa6f299..00000000000 --- a/arch/powerpc/mm/stab.c +++ /dev/null @@ -1,286 +0,0 @@ -/* - * PowerPC64 Segment Translation Support. - * - * Dave Engebretsen and Mike Corrigan {engebret|mikejc}@us.ibm.com - * Copyright (c) 2001 Dave Engebretsen - * - * Copyright (C) 2002 Anton Blanchard <anton@au.ibm.com>, IBM - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include <linux/memblock.h> - -#include <asm/pgtable.h> -#include <asm/mmu.h> -#include <asm/mmu_context.h> -#include <asm/paca.h> -#include <asm/cputable.h> -#include <asm/prom.h> - -struct stab_entry { - unsigned long esid_data; - unsigned long vsid_data; -}; - -#define NR_STAB_CACHE_ENTRIES 8 -static DEFINE_PER_CPU(long, stab_cache_ptr); -static DEFINE_PER_CPU(long [NR_STAB_CACHE_ENTRIES], stab_cache); - -/* - * Create a segment table entry for the given esid/vsid pair. - */ -static int make_ste(unsigned long stab, unsigned long esid, unsigned long vsid) -{ - unsigned long esid_data, vsid_data; - unsigned long entry, group, old_esid, castout_entry, i; - unsigned int global_entry; - struct stab_entry *ste, *castout_ste; - unsigned long kernel_segment = (esid << SID_SHIFT) >= PAGE_OFFSET; - - vsid_data = vsid << STE_VSID_SHIFT; - esid_data = esid << SID_SHIFT | STE_ESID_KP | STE_ESID_V; - if (! kernel_segment) - esid_data |= STE_ESID_KS; - - /* Search the primary group first. */ - global_entry = (esid & 0x1f) << 3; - ste = (struct stab_entry *)(stab | ((esid & 0x1f) << 7)); - - /* Find an empty entry, if one exists. */ - for (group = 0; group < 2; group++) { - for (entry = 0; entry < 8; entry++, ste++) { - if (!(ste->esid_data & STE_ESID_V)) { - ste->vsid_data = vsid_data; - eieio(); - ste->esid_data = esid_data; - return (global_entry | entry); - } - } - /* Now search the secondary group. */ - global_entry = ((~esid) & 0x1f) << 3; - ste = (struct stab_entry *)(stab | (((~esid) & 0x1f) << 7)); - } - - /* - * Could not find empty entry, pick one with a round robin selection. - * Search all entries in the two groups. - */ - castout_entry = get_paca()->stab_rr; - for (i = 0; i < 16; i++) { - if (castout_entry < 8) { - global_entry = (esid & 0x1f) << 3; - ste = (struct stab_entry *)(stab | ((esid & 0x1f) << 7)); - castout_ste = ste + castout_entry; - } else { - global_entry = ((~esid) & 0x1f) << 3; - ste = (struct stab_entry *)(stab | (((~esid) & 0x1f) << 7)); - castout_ste = ste + (castout_entry - 8); - } - - /* Dont cast out the first kernel segment */ - if ((castout_ste->esid_data & ESID_MASK) != PAGE_OFFSET) - break; - - castout_entry = (castout_entry + 1) & 0xf; - } - - get_paca()->stab_rr = (castout_entry + 1) & 0xf; - - /* Modify the old entry to the new value. */ - - /* Force previous translations to complete. DRENG */ - asm volatile("isync" : : : "memory"); - - old_esid = castout_ste->esid_data >> SID_SHIFT; - castout_ste->esid_data = 0; /* Invalidate old entry */ - - asm volatile("sync" : : : "memory"); /* Order update */ - - castout_ste->vsid_data = vsid_data; - eieio(); /* Order update */ - castout_ste->esid_data = esid_data; - - asm volatile("slbie %0" : : "r" (old_esid << SID_SHIFT)); - /* Ensure completion of slbie */ - asm volatile("sync" : : : "memory"); - - return (global_entry | (castout_entry & 0x7)); -} - -/* - * Allocate a segment table entry for the given ea and mm - */ -static int __ste_allocate(unsigned long ea, struct mm_struct *mm) -{ - unsigned long vsid; - unsigned char stab_entry; - unsigned long offset; - - /* Kernel or user address? */ - if (is_kernel_addr(ea)) { - vsid = get_kernel_vsid(ea, MMU_SEGSIZE_256M); - } else { - if ((ea >= TASK_SIZE_USER64) || (! mm)) - return 1; - - vsid = get_vsid(mm->context.id, ea, MMU_SEGSIZE_256M); - } - - stab_entry = make_ste(get_paca()->stab_addr, GET_ESID(ea), vsid); - - if (!is_kernel_addr(ea)) { - offset = __get_cpu_var(stab_cache_ptr); - if (offset < NR_STAB_CACHE_ENTRIES) - __get_cpu_var(stab_cache[offset++]) = stab_entry; - else - offset = NR_STAB_CACHE_ENTRIES+1; - __get_cpu_var(stab_cache_ptr) = offset; - - /* Order update */ - asm volatile("sync":::"memory"); - } - - return 0; -} - -int ste_allocate(unsigned long ea) -{ - return __ste_allocate(ea, current->mm); -} - -/* - * Do the segment table work for a context switch: flush all user - * entries from the table, then preload some probably useful entries - * for the new task - */ -void switch_stab(struct task_struct *tsk, struct mm_struct *mm) -{ - struct stab_entry *stab = (struct stab_entry *) get_paca()->stab_addr; - struct stab_entry *ste; - unsigned long offset; - unsigned long pc = KSTK_EIP(tsk); - unsigned long stack = KSTK_ESP(tsk); - unsigned long unmapped_base; - - /* Force previous translations to complete. DRENG */ - asm volatile("isync" : : : "memory"); - - /* - * We need interrupts hard-disabled here, not just soft-disabled, - * so that a PMU interrupt can't occur, which might try to access - * user memory (to get a stack trace) and possible cause an STAB miss - * which would update the stab_cache/stab_cache_ptr per-cpu variables. - */ - hard_irq_disable(); - - offset = __get_cpu_var(stab_cache_ptr); - if (offset <= NR_STAB_CACHE_ENTRIES) { - int i; - - for (i = 0; i < offset; i++) { - ste = stab + __get_cpu_var(stab_cache[i]); - ste->esid_data = 0; /* invalidate entry */ - } - } else { - unsigned long entry; - - /* Invalidate all entries. */ - ste = stab; - - /* Never flush the first entry. */ - ste += 1; - for (entry = 1; - entry < (HW_PAGE_SIZE / sizeof(struct stab_entry)); - entry++, ste++) { - unsigned long ea; - ea = ste->esid_data & ESID_MASK; - if (!is_kernel_addr(ea)) { - ste->esid_data = 0; - } - } - } - - asm volatile("sync; slbia; sync":::"memory"); - - __get_cpu_var(stab_cache_ptr) = 0; - - /* Now preload some entries for the new task */ - if (test_tsk_thread_flag(tsk, TIF_32BIT)) - unmapped_base = TASK_UNMAPPED_BASE_USER32; - else - unmapped_base = TASK_UNMAPPED_BASE_USER64; - - __ste_allocate(pc, mm); - - if (GET_ESID(pc) == GET_ESID(stack)) - return; - - __ste_allocate(stack, mm); - - if ((GET_ESID(pc) == GET_ESID(unmapped_base)) - || (GET_ESID(stack) == GET_ESID(unmapped_base))) - return; - - __ste_allocate(unmapped_base, mm); - - /* Order update */ - asm volatile("sync" : : : "memory"); -} - -/* - * Allocate segment tables for secondary CPUs. These must all go in - * the first (bolted) segment, so that do_stab_bolted won't get a - * recursive segment miss on the segment table itself. - */ -void __init stabs_alloc(void) -{ - int cpu; - - if (mmu_has_feature(MMU_FTR_SLB)) - return; - - for_each_possible_cpu(cpu) { - unsigned long newstab; - - if (cpu == 0) - continue; /* stab for CPU 0 is statically allocated */ - - newstab = memblock_alloc_base(HW_PAGE_SIZE, HW_PAGE_SIZE, - 1<<SID_SHIFT); - newstab = (unsigned long)__va(newstab); - - memset((void *)newstab, 0, HW_PAGE_SIZE); - - paca[cpu].stab_addr = newstab; - paca[cpu].stab_real = __pa(newstab); - printk(KERN_INFO "Segment table for CPU %d at 0x%llx " - "virtual, 0x%llx absolute\n", - cpu, paca[cpu].stab_addr, paca[cpu].stab_real); - } -} - -/* - * Build an entry for the base kernel segment and put it into - * the segment table or SLB. All other segment table or SLB - * entries are faulted in. - */ -void stab_initialize(unsigned long stab) -{ - unsigned long vsid = get_kernel_vsid(PAGE_OFFSET, MMU_SEGSIZE_256M); - unsigned long stabreal; - - asm volatile("isync; slbia; isync":::"memory"); - make_ste(stab, GET_ESID(PAGE_OFFSET), vsid); - - /* Order update */ - asm volatile("sync":::"memory"); - - /* Set ASR */ - stabreal = get_paca()->stab_real | 0x1ul; - - mtspr(SPRN_ASR, stabreal); -} diff --git a/arch/powerpc/mm/tlb_hash64.c b/arch/powerpc/mm/tlb_hash64.c index c99f6510a0b..d2a94b85dbc 100644 --- a/arch/powerpc/mm/tlb_hash64.c +++ b/arch/powerpc/mm/tlb_hash64.c @@ -30,6 +30,8 @@ #include <asm/tlb.h> #include <asm/bug.h> +#include <trace/events/thp.h> + DEFINE_PER_CPU(struct ppc64_tlb_batch, ppc64_tlb_batch); /* @@ -213,10 +215,12 @@ void __flush_hash_table_range(struct mm_struct *mm, unsigned long start, if (ptep == NULL) continue; pte = pte_val(*ptep); + if (hugepage_shift) + trace_hugepage_invalidate(start, pte_val(pte)); if (!(pte & _PAGE_HASHPTE)) continue; if (unlikely(hugepage_shift && pmd_trans_huge(*(pmd_t *)pte))) - hpte_do_hugepage_flush(mm, start, (pmd_t *)pte); + hpte_do_hugepage_flush(mm, start, (pmd_t *)ptep, pte); else hpte_need_flush(mm, start, ptep, pte, 0); } diff --git a/arch/powerpc/mm/tlb_low_64e.S b/arch/powerpc/mm/tlb_low_64e.S index 356e8b41fb0..89bf95bd63b 100644 --- a/arch/powerpc/mm/tlb_low_64e.S +++ b/arch/powerpc/mm/tlb_low_64e.S @@ -296,9 +296,12 @@ itlb_miss_fault_bolted: * r14 = page table base * r13 = PACA * r11 = tlb_per_core ptr - * r10 = cpu number + * r10 = crap (free to use) */ tlb_miss_common_e6500: + crmove cr2*4+2,cr0*4+2 /* cr2.eq != 0 if kernel address */ + +BEGIN_FTR_SECTION /* CPU_FTR_SMT */ /* * Search if we already have an indirect entry for that virtual * address, and if we do, bail out. @@ -309,6 +312,7 @@ tlb_miss_common_e6500: lhz r10,PACAPACAINDEX(r13) cmpdi r15,0 cmpdi cr1,r15,1 /* set cr1.eq = 0 for non-recursive */ + addi r10,r10,1 bne 2f stbcx. r10,0,r11 bne 1b @@ -322,18 +326,62 @@ tlb_miss_common_e6500: b 1b .previous + /* + * Erratum A-008139 says that we can't use tlbwe to change + * an indirect entry in any way (including replacing or + * invalidating) if the other thread could be in the process + * of a lookup. The workaround is to invalidate the entry + * with tlbilx before overwriting. + */ + + lbz r15,TCD_ESEL_NEXT(r11) + rlwinm r10,r15,16,0xff0000 + oris r10,r10,MAS0_TLBSEL(1)@h + mtspr SPRN_MAS0,r10 + isync + tlbre + mfspr r15,SPRN_MAS1 + andis. r15,r15,MAS1_VALID@h + beq 5f + +BEGIN_FTR_SECTION_NESTED(532) + mfspr r10,SPRN_MAS8 + rlwinm r10,r10,0,0x80000fff /* tgs,tlpid -> sgs,slpid */ + mtspr SPRN_MAS5,r10 +END_FTR_SECTION_NESTED(CPU_FTR_EMB_HV,CPU_FTR_EMB_HV,532) + + mfspr r10,SPRN_MAS1 + rlwinm r15,r10,0,0x3fff0000 /* tid -> spid */ + rlwimi r15,r10,20,0x00000003 /* ind,ts -> sind,sas */ + mfspr r10,SPRN_MAS6 + mtspr SPRN_MAS6,r15 + mfspr r15,SPRN_MAS2 + isync + tlbilxva 0,r15 + isync + + mtspr SPRN_MAS6,r10 + +5: +BEGIN_FTR_SECTION_NESTED(532) + li r10,0 + mtspr SPRN_MAS8,r10 + mtspr SPRN_MAS5,r10 +END_FTR_SECTION_NESTED(CPU_FTR_EMB_HV,CPU_FTR_EMB_HV,532) tlbsx 0,r16 mfspr r10,SPRN_MAS1 - andis. r10,r10,MAS1_VALID@h + andis. r15,r10,MAS1_VALID@h bne tlb_miss_done_e6500 - - /* Undo MAS-damage from the tlbsx */ +FTR_SECTION_ELSE mfspr r10,SPRN_MAS1 +ALT_FTR_SECTION_END_IFSET(CPU_FTR_SMT) + oris r10,r10,MAS1_VALID@h - mtspr SPRN_MAS1,r10 - mtspr SPRN_MAS2,r15 + beq cr2,4f + rlwinm r10,r10,0,16,1 /* Clear TID */ +4: mtspr SPRN_MAS1,r10 /* Now, we need to walk the page tables. First check if we are in * range. @@ -394,11 +442,13 @@ tlb_miss_common_e6500: tlb_miss_done_e6500: .macro tlb_unlock_e6500 +BEGIN_FTR_SECTION beq cr1,1f /* no unlock if lock was recursively grabbed */ li r15,0 isync stb r15,0(r11) 1: +END_FTR_SECTION_IFSET(CPU_FTR_SMT) .endm tlb_unlock_e6500 @@ -407,12 +457,9 @@ tlb_miss_done_e6500: rfi tlb_miss_kernel_e6500: - mfspr r10,SPRN_MAS1 ld r14,PACA_KERNELPGD(r13) - cmpldi cr0,r15,8 /* Check for vmalloc region */ - rlwinm r10,r10,0,16,1 /* Clear TID */ - mtspr SPRN_MAS1,r10 - beq+ tlb_miss_common_e6500 + cmpldi cr1,r15,8 /* Check for vmalloc region */ + beq+ cr1,tlb_miss_common_e6500 tlb_miss_fault_e6500: tlb_unlock_e6500 diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c index 92cb18d52ea..f38ea4df6a8 100644 --- a/arch/powerpc/mm/tlb_nohash.c +++ b/arch/powerpc/mm/tlb_nohash.c @@ -581,42 +581,10 @@ static void setup_mmu_htw(void) /* * Early initialization of the MMU TLB code */ -static void __early_init_mmu(int boot_cpu) +static void early_init_this_mmu(void) { unsigned int mas4; - /* XXX This will have to be decided at runtime, but right - * now our boot and TLB miss code hard wires it. Ideally - * we should find out a suitable page size and patch the - * TLB miss code (either that or use the PACA to store - * the value we want) - */ - mmu_linear_psize = MMU_PAGE_1G; - - /* XXX This should be decided at runtime based on supported - * page sizes in the TLB, but for now let's assume 16M is - * always there and a good fit (which it probably is) - * - * Freescale booke only supports 4K pages in TLB0, so use that. - */ - if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) - mmu_vmemmap_psize = MMU_PAGE_4K; - else - mmu_vmemmap_psize = MMU_PAGE_16M; - - /* XXX This code only checks for TLB 0 capabilities and doesn't - * check what page size combos are supported by the HW. It - * also doesn't handle the case where a separate array holds - * the IND entries from the array loaded by the PT. - */ - if (boot_cpu) { - /* Look for supported page sizes */ - setup_page_sizes(); - - /* Look for HW tablewalk support */ - setup_mmu_htw(); - } - /* Set MAS4 based on page table setting */ mas4 = 0x4 << MAS4_WIMGED_SHIFT; @@ -650,11 +618,6 @@ static void __early_init_mmu(int boot_cpu) } mtspr(SPRN_MAS4, mas4); - /* Set the global containing the top of the linear mapping - * for use by the TLB miss code - */ - linear_map_top = memblock_end_of_DRAM(); - #ifdef CONFIG_PPC_FSL_BOOK3E if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) { unsigned int num_cams; @@ -662,10 +625,49 @@ static void __early_init_mmu(int boot_cpu) /* use a quarter of the TLBCAM for bolted linear map */ num_cams = (mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY) / 4; linear_map_top = map_mem_in_cams(linear_map_top, num_cams); + } +#endif - /* limit memory so we dont have linear faults */ - memblock_enforce_memory_limit(linear_map_top); + /* A sync won't hurt us after mucking around with + * the MMU configuration + */ + mb(); +} +static void __init early_init_mmu_global(void) +{ + /* XXX This will have to be decided at runtime, but right + * now our boot and TLB miss code hard wires it. Ideally + * we should find out a suitable page size and patch the + * TLB miss code (either that or use the PACA to store + * the value we want) + */ + mmu_linear_psize = MMU_PAGE_1G; + + /* XXX This should be decided at runtime based on supported + * page sizes in the TLB, but for now let's assume 16M is + * always there and a good fit (which it probably is) + * + * Freescale booke only supports 4K pages in TLB0, so use that. + */ + if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) + mmu_vmemmap_psize = MMU_PAGE_4K; + else + mmu_vmemmap_psize = MMU_PAGE_16M; + + /* XXX This code only checks for TLB 0 capabilities and doesn't + * check what page size combos are supported by the HW. It + * also doesn't handle the case where a separate array holds + * the IND entries from the array loaded by the PT. + */ + /* Look for supported page sizes */ + setup_page_sizes(); + + /* Look for HW tablewalk support */ + setup_mmu_htw(); + +#ifdef CONFIG_PPC_FSL_BOOK3E + if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) { if (book3e_htw_mode == PPC_HTW_NONE) { extlb_level_exc = EX_TLB_SIZE; patch_exception(0x1c0, exc_data_tlb_miss_bolted_book3e); @@ -675,22 +677,41 @@ static void __early_init_mmu(int boot_cpu) } #endif - /* A sync won't hurt us after mucking around with - * the MMU configuration + /* Set the global containing the top of the linear mapping + * for use by the TLB miss code */ - mb(); + linear_map_top = memblock_end_of_DRAM(); +} + +static void __init early_mmu_set_memory_limit(void) +{ +#ifdef CONFIG_PPC_FSL_BOOK3E + if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) { + /* + * Limit memory so we dont have linear faults. + * Unlike memblock_set_current_limit, which limits + * memory available during early boot, this permanently + * reduces the memory available to Linux. We need to + * do this because highmem is not supported on 64-bit. + */ + memblock_enforce_memory_limit(linear_map_top); + } +#endif memblock_set_current_limit(linear_map_top); } +/* boot cpu only */ void __init early_init_mmu(void) { - __early_init_mmu(1); + early_init_mmu_global(); + early_init_this_mmu(); + early_mmu_set_memory_limit(); } void early_init_mmu_secondary(void) { - __early_init_mmu(0); + early_init_this_mmu(); } void setup_initial_memory_limit(phys_addr_t first_memblock_base, diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index 82e82cadcde..cbae2dfd053 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -25,7 +25,7 @@ static inline void bpf_flush_icache(void *start, void *end) flush_icache_range((unsigned long)start, (unsigned long)end); } -static void bpf_jit_build_prologue(struct sk_filter *fp, u32 *image, +static void bpf_jit_build_prologue(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx) { int i; @@ -121,7 +121,7 @@ static void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx) ((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset) /* Assemble the body code between the prologue & epilogue. */ -static int bpf_jit_build_body(struct sk_filter *fp, u32 *image, +static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx, unsigned int *addrs) { @@ -569,7 +569,7 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image, return 0; } -void bpf_jit_compile(struct sk_filter *fp) +void bpf_jit_compile(struct bpf_prog *fp) { unsigned int proglen; unsigned int alloclen; @@ -686,16 +686,17 @@ void bpf_jit_compile(struct sk_filter *fp) ((u64 *)image)[0] = (u64)code_base; ((u64 *)image)[1] = local_paca->kernel_toc; fp->bpf_func = (void *)image; - fp->jited = 1; + fp->jited = true; } out: kfree(addrs); return; } -void bpf_jit_free(struct sk_filter *fp) +void bpf_jit_free(struct bpf_prog *fp) { if (fp->jited) module_free(NULL, fp->bpf_func); - kfree(fp); + + bpf_prog_unlock_free(fp); } diff --git a/arch/powerpc/oprofile/Makefile b/arch/powerpc/oprofile/Makefile index 751ec7bd501..cedbbeced63 100644 --- a/arch/powerpc/oprofile/Makefile +++ b/arch/powerpc/oprofile/Makefile @@ -14,6 +14,6 @@ oprofile-y := $(DRIVER_OBJS) common.o backtrace.o oprofile-$(CONFIG_OPROFILE_CELL) += op_model_cell.o \ cell/spu_profiler.o cell/vma_map.o \ cell/spu_task_sync.o -oprofile-$(CONFIG_PPC_BOOK3S_64) += op_model_rs64.o op_model_power4.o op_model_pa6t.o +oprofile-$(CONFIG_PPC_BOOK3S_64) += op_model_power4.o op_model_pa6t.o oprofile-$(CONFIG_FSL_EMB_PERFMON) += op_model_fsl_emb.o oprofile-$(CONFIG_6xx) += op_model_7450.o diff --git a/arch/powerpc/oprofile/backtrace.c b/arch/powerpc/oprofile/backtrace.c index f75301f2c85..6adf55fa5d8 100644 --- a/arch/powerpc/oprofile/backtrace.c +++ b/arch/powerpc/oprofile/backtrace.c @@ -12,6 +12,7 @@ #include <asm/processor.h> #include <asm/uaccess.h> #include <asm/compat.h> +#include <asm/oprofile_impl.h> #define STACK_SP(STACK) *(STACK) diff --git a/arch/powerpc/oprofile/common.c b/arch/powerpc/oprofile/common.c index c77348c5d46..bf094c5a4bd 100644 --- a/arch/powerpc/oprofile/common.c +++ b/arch/powerpc/oprofile/common.c @@ -205,9 +205,6 @@ int __init oprofile_arch_init(struct oprofile_operations *ops) ops->sync_stop = model->sync_stop; break; #endif - case PPC_OPROFILE_RS64: - model = &op_model_rs64; - break; case PPC_OPROFILE_POWER4: model = &op_model_power4; break; diff --git a/arch/powerpc/oprofile/op_model_rs64.c b/arch/powerpc/oprofile/op_model_rs64.c deleted file mode 100644 index 7e5b8ed3a1b..00000000000 --- a/arch/powerpc/oprofile/op_model_rs64.c +++ /dev/null @@ -1,222 +0,0 @@ -/* - * Copyright (C) 2004 Anton Blanchard <anton@au.ibm.com>, IBM - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include <linux/oprofile.h> -#include <linux/smp.h> -#include <asm/ptrace.h> -#include <asm/processor.h> -#include <asm/cputable.h> -#include <asm/oprofile_impl.h> - -#define dbg(args...) - -static void ctrl_write(unsigned int i, unsigned int val) -{ - unsigned int tmp = 0; - unsigned long shift = 0, mask = 0; - - dbg("ctrl_write %d %x\n", i, val); - - switch(i) { - case 0: - tmp = mfspr(SPRN_MMCR0); - shift = 6; - mask = 0x7F; - break; - case 1: - tmp = mfspr(SPRN_MMCR0); - shift = 0; - mask = 0x3F; - break; - case 2: - tmp = mfspr(SPRN_MMCR1); - shift = 31 - 4; - mask = 0x1F; - break; - case 3: - tmp = mfspr(SPRN_MMCR1); - shift = 31 - 9; - mask = 0x1F; - break; - case 4: - tmp = mfspr(SPRN_MMCR1); - shift = 31 - 14; - mask = 0x1F; - break; - case 5: - tmp = mfspr(SPRN_MMCR1); - shift = 31 - 19; - mask = 0x1F; - break; - case 6: - tmp = mfspr(SPRN_MMCR1); - shift = 31 - 24; - mask = 0x1F; - break; - case 7: - tmp = mfspr(SPRN_MMCR1); - shift = 31 - 28; - mask = 0xF; - break; - } - - tmp = tmp & ~(mask << shift); - tmp |= val << shift; - - switch(i) { - case 0: - case 1: - mtspr(SPRN_MMCR0, tmp); - break; - default: - mtspr(SPRN_MMCR1, tmp); - } - - dbg("ctrl_write mmcr0 %lx mmcr1 %lx\n", mfspr(SPRN_MMCR0), - mfspr(SPRN_MMCR1)); -} - -static unsigned long reset_value[OP_MAX_COUNTER]; - -static int num_counters; - -static int rs64_reg_setup(struct op_counter_config *ctr, - struct op_system_config *sys, - int num_ctrs) -{ - int i; - - num_counters = num_ctrs; - - for (i = 0; i < num_counters; ++i) - reset_value[i] = 0x80000000UL - ctr[i].count; - - /* XXX setup user and kernel profiling */ - return 0; -} - -static int rs64_cpu_setup(struct op_counter_config *ctr) -{ - unsigned int mmcr0; - - /* reset MMCR0 and set the freeze bit */ - mmcr0 = MMCR0_FC; - mtspr(SPRN_MMCR0, mmcr0); - - /* reset MMCR1, MMCRA */ - mtspr(SPRN_MMCR1, 0); - - if (cpu_has_feature(CPU_FTR_MMCRA)) - mtspr(SPRN_MMCRA, 0); - - mmcr0 |= MMCR0_FCM1|MMCR0_PMXE|MMCR0_FCECE; - /* Only applies to POWER3, but should be safe on RS64 */ - mmcr0 |= MMCR0_PMC1CE|MMCR0_PMCjCE; - mtspr(SPRN_MMCR0, mmcr0); - - dbg("setup on cpu %d, mmcr0 %lx\n", smp_processor_id(), - mfspr(SPRN_MMCR0)); - dbg("setup on cpu %d, mmcr1 %lx\n", smp_processor_id(), - mfspr(SPRN_MMCR1)); - - return 0; -} - -static int rs64_start(struct op_counter_config *ctr) -{ - int i; - unsigned int mmcr0; - - /* set the PMM bit (see comment below) */ - mtmsrd(mfmsr() | MSR_PMM); - - for (i = 0; i < num_counters; ++i) { - if (ctr[i].enabled) { - classic_ctr_write(i, reset_value[i]); - ctrl_write(i, ctr[i].event); - } else { - classic_ctr_write(i, 0); - } - } - - mmcr0 = mfspr(SPRN_MMCR0); - - /* - * now clear the freeze bit, counting will not start until we - * rfid from this excetion, because only at that point will - * the PMM bit be cleared - */ - mmcr0 &= ~MMCR0_FC; - mtspr(SPRN_MMCR0, mmcr0); - - dbg("start on cpu %d, mmcr0 %x\n", smp_processor_id(), mmcr0); - return 0; -} - -static void rs64_stop(void) -{ - unsigned int mmcr0; - - /* freeze counters */ - mmcr0 = mfspr(SPRN_MMCR0); - mmcr0 |= MMCR0_FC; - mtspr(SPRN_MMCR0, mmcr0); - - dbg("stop on cpu %d, mmcr0 %x\n", smp_processor_id(), mmcr0); - - mb(); -} - -static void rs64_handle_interrupt(struct pt_regs *regs, - struct op_counter_config *ctr) -{ - unsigned int mmcr0; - int is_kernel; - int val; - int i; - unsigned long pc = mfspr(SPRN_SIAR); - - is_kernel = is_kernel_addr(pc); - - /* set the PMM bit (see comment below) */ - mtmsrd(mfmsr() | MSR_PMM); - - for (i = 0; i < num_counters; ++i) { - val = classic_ctr_read(i); - if (val < 0) { - if (ctr[i].enabled) { - oprofile_add_ext_sample(pc, regs, i, is_kernel); - classic_ctr_write(i, reset_value[i]); - } else { - classic_ctr_write(i, 0); - } - } - } - - mmcr0 = mfspr(SPRN_MMCR0); - - /* reset the perfmon trigger */ - mmcr0 |= MMCR0_PMXE; - - /* - * now clear the freeze bit, counting will not start until we - * rfid from this exception, because only at that point will - * the PMM bit be cleared - */ - mmcr0 &= ~MMCR0_FC; - mtspr(SPRN_MMCR0, mmcr0); -} - -struct op_powerpc_model op_model_rs64 = { - .reg_setup = rs64_reg_setup, - .cpu_setup = rs64_cpu_setup, - .start = rs64_start, - .stop = rs64_stop, - .handle_interrupt = rs64_handle_interrupt, -}; diff --git a/arch/powerpc/perf/callchain.c b/arch/powerpc/perf/callchain.c index 74d1e780748..2396dda282c 100644 --- a/arch/powerpc/perf/callchain.c +++ b/arch/powerpc/perf/callchain.c @@ -35,7 +35,7 @@ static int valid_next_sp(unsigned long sp, unsigned long prev_sp) return 0; /* must be 16-byte aligned */ if (!validate_sp(sp, current, STACK_FRAME_OVERHEAD)) return 0; - if (sp >= prev_sp + STACK_FRAME_OVERHEAD) + if (sp >= prev_sp + STACK_FRAME_MIN_SIZE) return 1; /* * sp could decrease when we jump off an interrupt stack diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index fe52db2eea6..a6995d4e93d 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -36,7 +36,12 @@ struct cpu_hw_events { struct perf_event *event[MAX_HWEVENTS]; u64 events[MAX_HWEVENTS]; unsigned int flags[MAX_HWEVENTS]; - unsigned long mmcr[3]; + /* + * The order of the MMCR array is: + * - 64-bit, MMCR0, MMCR1, MMCRA, MMCR2 + * - 32-bit, MMCR0, MMCR1, MMCR2 + */ + unsigned long mmcr[4]; struct perf_event *limited_counter[MAX_LIMITED_HWCOUNTERS]; u8 limited_hwidx[MAX_LIMITED_HWCOUNTERS]; u64 alternatives[MAX_HWEVENTS][MAX_EVENT_ALTERNATIVES]; @@ -54,9 +59,9 @@ struct cpu_hw_events { struct perf_branch_entry bhrb_entries[BHRB_MAX_ENTRIES]; }; -DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events); +static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events); -struct power_pmu *ppmu; +static struct power_pmu *ppmu; /* * Normally, to ignore kernel events we set the FCS (freeze counters @@ -112,14 +117,14 @@ static bool is_ebb_event(struct perf_event *event) { return false; } static int ebb_event_check(struct perf_event *event) { return 0; } static void ebb_event_add(struct perf_event *event) { } static void ebb_switch_out(unsigned long mmcr0) { } -static unsigned long ebb_switch_in(bool ebb, unsigned long mmcr0) +static unsigned long ebb_switch_in(bool ebb, struct cpu_hw_events *cpuhw) { - return mmcr0; + return cpuhw->mmcr[0]; } static inline void power_pmu_bhrb_enable(struct perf_event *event) {} static inline void power_pmu_bhrb_disable(struct perf_event *event) {} -void power_pmu_flush_branch_stack(void) {} +static void power_pmu_flush_branch_stack(void) {} static inline void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw) {} static void pmao_restore_workaround(bool ebb) { } #endif /* CONFIG_PPC32 */ @@ -370,7 +375,7 @@ static void power_pmu_bhrb_disable(struct perf_event *event) /* Called from ctxsw to prevent one process's branch entries to * mingle with the other process's entries during context switch. */ -void power_pmu_flush_branch_stack(void) +static void power_pmu_flush_branch_stack(void) { if (ppmu->bhrb_nr) power_pmu_bhrb_reset(); @@ -403,7 +408,7 @@ static __u64 power_pmu_bhrb_to(u64 addr) } /* Processing BHRB entries */ -void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw) +static void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw) { u64 val; u64 addr; @@ -542,8 +547,10 @@ static void ebb_switch_out(unsigned long mmcr0) current->thread.mmcr2 = mfspr(SPRN_MMCR2) & MMCR2_USER_MASK; } -static unsigned long ebb_switch_in(bool ebb, unsigned long mmcr0) +static unsigned long ebb_switch_in(bool ebb, struct cpu_hw_events *cpuhw) { + unsigned long mmcr0 = cpuhw->mmcr[0]; + if (!ebb) goto out; @@ -568,7 +575,15 @@ static unsigned long ebb_switch_in(bool ebb, unsigned long mmcr0) mtspr(SPRN_SIAR, current->thread.siar); mtspr(SPRN_SIER, current->thread.sier); mtspr(SPRN_SDAR, current->thread.sdar); - mtspr(SPRN_MMCR2, current->thread.mmcr2); + + /* + * Merge the kernel & user values of MMCR2. The semantics we implement + * are that the user MMCR2 can set bits, ie. cause counters to freeze, + * but not clear bits. If a task wants to be able to clear bits, ie. + * unfreeze counters, it should not set exclude_xxx in its events and + * instead manage the MMCR2 entirely by itself. + */ + mtspr(SPRN_MMCR2, cpuhw->mmcr[3] | current->thread.mmcr2); out: return mmcr0; } @@ -915,6 +930,14 @@ static int check_excludes(struct perf_event **ctrs, unsigned int cflags[], int i, n, first; struct perf_event *event; + /* + * If the PMU we're on supports per event exclude settings then we + * don't need to do any of this logic. NB. This assumes no PMU has both + * per event exclude and limited PMCs. + */ + if (ppmu->flags & PPMU_ARCH_207S) + return 0; + n = n_prev + n_new; if (n <= 1) return 0; @@ -1219,28 +1242,31 @@ static void power_pmu_enable(struct pmu *pmu) } /* - * Compute MMCR* values for the new set of events + * Clear all MMCR settings and recompute them for the new set of events. */ + memset(cpuhw->mmcr, 0, sizeof(cpuhw->mmcr)); + if (ppmu->compute_mmcr(cpuhw->events, cpuhw->n_events, hwc_index, - cpuhw->mmcr)) { + cpuhw->mmcr, cpuhw->event)) { /* shouldn't ever get here */ printk(KERN_ERR "oops compute_mmcr failed\n"); goto out; } - /* - * Add in MMCR0 freeze bits corresponding to the - * attr.exclude_* bits for the first event. - * We have already checked that all events have the - * same values for these bits as the first event. - */ - event = cpuhw->event[0]; - if (event->attr.exclude_user) - cpuhw->mmcr[0] |= MMCR0_FCP; - if (event->attr.exclude_kernel) - cpuhw->mmcr[0] |= freeze_events_kernel; - if (event->attr.exclude_hv) - cpuhw->mmcr[0] |= MMCR0_FCHV; + if (!(ppmu->flags & PPMU_ARCH_207S)) { + /* + * Add in MMCR0 freeze bits corresponding to the attr.exclude_* + * bits for the first event. We have already checked that all + * events have the same value for these bits as the first event. + */ + event = cpuhw->event[0]; + if (event->attr.exclude_user) + cpuhw->mmcr[0] |= MMCR0_FCP; + if (event->attr.exclude_kernel) + cpuhw->mmcr[0] |= freeze_events_kernel; + if (event->attr.exclude_hv) + cpuhw->mmcr[0] |= MMCR0_FCHV; + } /* * Write the new configuration to MMCR* with the freeze @@ -1252,6 +1278,8 @@ static void power_pmu_enable(struct pmu *pmu) mtspr(SPRN_MMCR1, cpuhw->mmcr[1]); mtspr(SPRN_MMCR0, (cpuhw->mmcr[0] & ~(MMCR0_PMC1CE | MMCR0_PMCjCE)) | MMCR0_FC); + if (ppmu->flags & PPMU_ARCH_207S) + mtspr(SPRN_MMCR2, cpuhw->mmcr[3]); /* * Read off any pre-existing events that need to move @@ -1307,10 +1335,7 @@ static void power_pmu_enable(struct pmu *pmu) out_enable: pmao_restore_workaround(ebb); - if (ppmu->flags & PPMU_ARCH_207S) - mtspr(SPRN_MMCR2, 0); - - mmcr0 = ebb_switch_in(ebb, cpuhw->mmcr[0]); + mmcr0 = ebb_switch_in(ebb, cpuhw); mb(); if (cpuhw->bhrb_users) @@ -1548,7 +1573,7 @@ static void power_pmu_stop(struct perf_event *event, int ef_flags) * Set the flag to make pmu::enable() not perform the * schedulability test, it will be performed at commit time */ -void power_pmu_start_txn(struct pmu *pmu) +static void power_pmu_start_txn(struct pmu *pmu) { struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); @@ -1562,7 +1587,7 @@ void power_pmu_start_txn(struct pmu *pmu) * Clear the flag and pmu::enable() will perform the * schedulability test. */ -void power_pmu_cancel_txn(struct pmu *pmu) +static void power_pmu_cancel_txn(struct pmu *pmu) { struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); @@ -1575,7 +1600,7 @@ void power_pmu_cancel_txn(struct pmu *pmu) * Perform the group schedulability test as a whole * Return 0 if success */ -int power_pmu_commit_txn(struct pmu *pmu) +static int power_pmu_commit_txn(struct pmu *pmu) { struct cpu_hw_events *cpuhw; long i, n; @@ -1863,7 +1888,7 @@ ssize_t power_events_sysfs_show(struct device *dev, return sprintf(page, "event=0x%02llx\n", pmu_attr->id); } -struct pmu power_pmu = { +static struct pmu power_pmu = { .pmu_enable = power_pmu_enable, .pmu_disable = power_pmu_disable, .event_init = power_pmu_event_init, diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c index 66d0f179650..6c8710dd90c 100644 --- a/arch/powerpc/perf/hv-24x7.c +++ b/arch/powerpc/perf/hv-24x7.c @@ -75,86 +75,6 @@ static struct attribute_group format_group = { static struct kmem_cache *hv_page_cache; -/* - * read_offset_data - copy data from one buffer to another while treating the - * source buffer as a small view on the total avaliable - * source data. - * - * @dest: buffer to copy into - * @dest_len: length of @dest in bytes - * @requested_offset: the offset within the source data we want. Must be > 0 - * @src: buffer to copy data from - * @src_len: length of @src in bytes - * @source_offset: the offset in the sorce data that (src,src_len) refers to. - * Must be > 0 - * - * returns the number of bytes copied. - * - * The following ascii art shows the various buffer possitioning we need to - * handle, assigns some arbitrary varibles to points on the buffer, and then - * shows how we fiddle with those values to get things we care about (copy - * start in src and copy len) - * - * s = @src buffer - * d = @dest buffer - * '.' areas in d are written to. - * - * u - * x w v z - * d |.........| - * s |----------------------| - * - * u - * x w z v - * d |........------| - * s |------------------| - * - * x w u,z,v - * d |........| - * s |------------------| - * - * x,w u,v,z - * d |..................| - * s |------------------| - * - * x u - * w v z - * d |........| - * s |------------------| - * - * x z w v - * d |------| - * s |------| - * - * x = source_offset - * w = requested_offset - * z = source_offset + src_len - * v = requested_offset + dest_len - * - * w_offset_in_s = w - x = requested_offset - source_offset - * z_offset_in_s = z - x = src_len - * v_offset_in_s = v - x = request_offset + dest_len - src_len - */ -static ssize_t read_offset_data(void *dest, size_t dest_len, - loff_t requested_offset, void *src, - size_t src_len, loff_t source_offset) -{ - size_t w_offset_in_s = requested_offset - source_offset; - size_t z_offset_in_s = src_len; - size_t v_offset_in_s = requested_offset + dest_len - src_len; - size_t u_offset_in_s = min(z_offset_in_s, v_offset_in_s); - size_t copy_len = u_offset_in_s - w_offset_in_s; - - if (requested_offset < 0 || source_offset < 0) - return -EINVAL; - - if (z_offset_in_s <= w_offset_in_s) - return 0; - - memcpy(dest, src + w_offset_in_s, copy_len); - return copy_len; -} - static unsigned long h_get_24x7_catalog_page_(unsigned long phys_4096, unsigned long version, unsigned long index) @@ -183,8 +103,10 @@ static ssize_t catalog_read(struct file *filp, struct kobject *kobj, { unsigned long hret; ssize_t ret = 0; - size_t catalog_len = 0, catalog_page_len = 0, page_count = 0; + size_t catalog_len = 0, catalog_page_len = 0; loff_t page_offset = 0; + loff_t offset_in_page; + size_t copy_len; uint64_t catalog_version_num = 0; void *page = kmem_cache_alloc(hv_page_cache, GFP_USER); struct hv_24x7_catalog_page_0 *page_0 = page; @@ -202,7 +124,7 @@ static ssize_t catalog_read(struct file *filp, struct kobject *kobj, catalog_len = catalog_page_len * 4096; page_offset = offset / 4096; - page_count = count / 4096; + offset_in_page = offset % 4096; if (page_offset >= catalog_page_len) goto e_free; @@ -216,18 +138,23 @@ static ssize_t catalog_read(struct file *filp, struct kobject *kobj, } } - ret = read_offset_data(buf, count, offset, - page, 4096, page_offset * 4096); + copy_len = 4096 - offset_in_page; + if (copy_len > count) + copy_len = count; + + memcpy(buf, page+offset_in_page, copy_len); + ret = copy_len; + e_free: if (hret) pr_err("h_get_24x7_catalog_page(ver=%lld, page=%lld) failed:" " rc=%ld\n", catalog_version_num, page_offset, hret); - kfree(page); + kmem_cache_free(hv_page_cache, page); - pr_devel("catalog_read: offset=%lld(%lld) count=%zu(%zu) catalog_len=%zu(%zu) => %zd\n", - offset, page_offset, count, page_count, catalog_len, - catalog_page_len, ret); + pr_devel("catalog_read: offset=%lld(%lld) count=%zu " + "catalog_len=%zu(%zu) => %zd\n", offset, page_offset, + count, catalog_len, catalog_page_len, ret); return ret; } @@ -294,7 +221,7 @@ static unsigned long single_24x7_request(u8 domain, u32 offset, u16 ix, u16 lpar, u64 *res, bool success_expected) { - unsigned long ret; + unsigned long ret = -ENOMEM; /* * request_buffer and result_buffer are not required to be 4k aligned, @@ -304,7 +231,27 @@ static unsigned long single_24x7_request(u8 domain, u32 offset, u16 ix, struct reqb { struct hv_24x7_request_buffer buf; struct hv_24x7_request req; - } __packed __aligned(4096) request_buffer = { + } __packed *request_buffer; + + struct { + struct hv_24x7_data_result_buffer buf; + struct hv_24x7_result res; + struct hv_24x7_result_element elem; + __be64 result; + } __packed *result_buffer; + + BUILD_BUG_ON(sizeof(*request_buffer) > 4096); + BUILD_BUG_ON(sizeof(*result_buffer) > 4096); + + request_buffer = kmem_cache_zalloc(hv_page_cache, GFP_USER); + if (!request_buffer) + goto out; + + result_buffer = kmem_cache_zalloc(hv_page_cache, GFP_USER); + if (!result_buffer) + goto out_free_request_buffer; + + *request_buffer = (struct reqb) { .buf = { .interface_version = HV_24X7_IF_VERSION_CURRENT, .num_requests = 1, @@ -320,28 +267,27 @@ static unsigned long single_24x7_request(u8 domain, u32 offset, u16 ix, } }; - struct resb { - struct hv_24x7_data_result_buffer buf; - struct hv_24x7_result res; - struct hv_24x7_result_element elem; - __be64 result; - } __packed __aligned(4096) result_buffer = {}; - ret = plpar_hcall_norets(H_GET_24X7_DATA, - virt_to_phys(&request_buffer), sizeof(request_buffer), - virt_to_phys(&result_buffer), sizeof(result_buffer)); + virt_to_phys(request_buffer), sizeof(*request_buffer), + virt_to_phys(result_buffer), sizeof(*result_buffer)); if (ret) { if (success_expected) - pr_err_ratelimited("hcall failed: %d %#x %#x %d => 0x%lx (%ld) detail=0x%x failing ix=%x\n", - domain, offset, ix, lpar, - ret, ret, - result_buffer.buf.detailed_rc, - result_buffer.buf.failing_request_ix); - return ret; + pr_err_ratelimited("hcall failed: %d %#x %#x %d => " + "0x%lx (%ld) detail=0x%x failing ix=%x\n", + domain, offset, ix, lpar, ret, ret, + result_buffer->buf.detailed_rc, + result_buffer->buf.failing_request_ix); + goto out_free_result_buffer; } - *res = be64_to_cpu(result_buffer.result); + *res = be64_to_cpu(result_buffer->result); + +out_free_result_buffer: + kfree(result_buffer); +out_free_request_buffer: + kfree(request_buffer); +out: return ret; } diff --git a/arch/powerpc/perf/mpc7450-pmu.c b/arch/powerpc/perf/mpc7450-pmu.c index fe21b515ca4..d115c5635bf 100644 --- a/arch/powerpc/perf/mpc7450-pmu.c +++ b/arch/powerpc/perf/mpc7450-pmu.c @@ -260,8 +260,9 @@ static const u32 pmcsel_mask[N_COUNTER] = { /* * Compute MMCR0/1/2 values for a set of events. */ -static int mpc7450_compute_mmcr(u64 event[], int n_ev, - unsigned int hwc[], unsigned long mmcr[]) +static int mpc7450_compute_mmcr(u64 event[], int n_ev, unsigned int hwc[], + unsigned long mmcr[], + struct perf_event *pevents[]) { u8 event_index[N_CLASSES][N_COUNTER]; int n_classevent[N_CLASSES]; diff --git a/arch/powerpc/perf/power4-pmu.c b/arch/powerpc/perf/power4-pmu.c index 9103a1de864..ce6072fa481 100644 --- a/arch/powerpc/perf/power4-pmu.c +++ b/arch/powerpc/perf/power4-pmu.c @@ -356,7 +356,7 @@ static int p4_get_alternatives(u64 event, unsigned int flags, u64 alt[]) } static int p4_compute_mmcr(u64 event[], int n_ev, - unsigned int hwc[], unsigned long mmcr[]) + unsigned int hwc[], unsigned long mmcr[], struct perf_event *pevents[]) { unsigned long mmcr0 = 0, mmcr1 = 0, mmcra = 0; unsigned int pmc, unit, byte, psel, lower; diff --git a/arch/powerpc/perf/power5+-pmu.c b/arch/powerpc/perf/power5+-pmu.c index b03b6dc0172..0526dac6600 100644 --- a/arch/powerpc/perf/power5+-pmu.c +++ b/arch/powerpc/perf/power5+-pmu.c @@ -452,7 +452,7 @@ static int power5p_marked_instr_event(u64 event) } static int power5p_compute_mmcr(u64 event[], int n_ev, - unsigned int hwc[], unsigned long mmcr[]) + unsigned int hwc[], unsigned long mmcr[], struct perf_event *pevents[]) { unsigned long mmcr1 = 0; unsigned long mmcra = 0; diff --git a/arch/powerpc/perf/power5-pmu.c b/arch/powerpc/perf/power5-pmu.c index 1e8ce423c3a..4dc99f9f796 100644 --- a/arch/powerpc/perf/power5-pmu.c +++ b/arch/powerpc/perf/power5-pmu.c @@ -383,7 +383,7 @@ static int power5_marked_instr_event(u64 event) } static int power5_compute_mmcr(u64 event[], int n_ev, - unsigned int hwc[], unsigned long mmcr[]) + unsigned int hwc[], unsigned long mmcr[], struct perf_event *pevents[]) { unsigned long mmcr1 = 0; unsigned long mmcra = MMCRA_SDAR_DCACHE_MISS | MMCRA_SDAR_ERAT_MISS; diff --git a/arch/powerpc/perf/power6-pmu.c b/arch/powerpc/perf/power6-pmu.c index 31128e086fe..9c9d646b68a 100644 --- a/arch/powerpc/perf/power6-pmu.c +++ b/arch/powerpc/perf/power6-pmu.c @@ -175,7 +175,7 @@ static int power6_marked_instr_event(u64 event) * Assign PMC numbers and compute MMCR1 value for a set of events */ static int p6_compute_mmcr(u64 event[], int n_ev, - unsigned int hwc[], unsigned long mmcr[]) + unsigned int hwc[], unsigned long mmcr[], struct perf_event *pevents[]) { unsigned long mmcr1 = 0; unsigned long mmcra = MMCRA_SDAR_DCACHE_MISS | MMCRA_SDAR_ERAT_MISS; diff --git a/arch/powerpc/perf/power7-pmu.c b/arch/powerpc/perf/power7-pmu.c index 56c67bca2f7..5b62f238929 100644 --- a/arch/powerpc/perf/power7-pmu.c +++ b/arch/powerpc/perf/power7-pmu.c @@ -245,7 +245,7 @@ static int power7_marked_instr_event(u64 event) } static int power7_compute_mmcr(u64 event[], int n_ev, - unsigned int hwc[], unsigned long mmcr[]) + unsigned int hwc[], unsigned long mmcr[], struct perf_event *pevents[]) { unsigned long mmcr1 = 0; unsigned long mmcra = MMCRA_SDAR_DCACHE_MISS | MMCRA_SDAR_ERAT_MISS; diff --git a/arch/powerpc/perf/power8-pmu.c b/arch/powerpc/perf/power8-pmu.c index 639cd915658..396351db601 100644 --- a/arch/powerpc/perf/power8-pmu.c +++ b/arch/powerpc/perf/power8-pmu.c @@ -15,6 +15,7 @@ #include <linux/kernel.h> #include <linux/perf_event.h> #include <asm/firmware.h> +#include <asm/cputable.h> /* @@ -266,6 +267,11 @@ #define MMCRA_SDAR_MODE_TLB (1ull << 42) #define MMCRA_IFM_SHIFT 30 +/* Bits in MMCR2 for POWER8 */ +#define MMCR2_FCS(pmc) (1ull << (63 - (((pmc) - 1) * 9))) +#define MMCR2_FCP(pmc) (1ull << (62 - (((pmc) - 1) * 9))) +#define MMCR2_FCH(pmc) (1ull << (57 - (((pmc) - 1) * 9))) + static inline bool event_is_fab_match(u64 event) { @@ -393,9 +399,10 @@ static int power8_get_constraint(u64 event, unsigned long *maskp, unsigned long } static int power8_compute_mmcr(u64 event[], int n_ev, - unsigned int hwc[], unsigned long mmcr[]) + unsigned int hwc[], unsigned long mmcr[], + struct perf_event *pevents[]) { - unsigned long mmcra, mmcr1, unit, combine, psel, cache, val; + unsigned long mmcra, mmcr1, mmcr2, unit, combine, psel, cache, val; unsigned int pmc, pmc_inuse; int i; @@ -410,7 +417,7 @@ static int power8_compute_mmcr(u64 event[], int n_ev, /* In continous sampling mode, update SDAR on TLB miss */ mmcra = MMCRA_SDAR_MODE_TLB; - mmcr1 = 0; + mmcr1 = mmcr2 = 0; /* Second pass: assign PMCs, set all MMCR1 fields */ for (i = 0; i < n_ev; ++i) { @@ -472,6 +479,19 @@ static int power8_compute_mmcr(u64 event[], int n_ev, mmcra |= val << MMCRA_IFM_SHIFT; } + if (pevents[i]->attr.exclude_user) + mmcr2 |= MMCR2_FCP(pmc); + + if (pevents[i]->attr.exclude_hv) + mmcr2 |= MMCR2_FCH(pmc); + + if (pevents[i]->attr.exclude_kernel) { + if (cpu_has_feature(CPU_FTR_HVMODE)) + mmcr2 |= MMCR2_FCH(pmc); + else + mmcr2 |= MMCR2_FCS(pmc); + } + hwc[i] = pmc - 1; } @@ -491,6 +511,7 @@ static int power8_compute_mmcr(u64 event[], int n_ev, mmcr[1] = mmcr1; mmcr[2] = mmcra; + mmcr[3] = mmcr2; return 0; } diff --git a/arch/powerpc/perf/ppc970-pmu.c b/arch/powerpc/perf/ppc970-pmu.c index 20139ceeacf..8b6a8a36fa3 100644 --- a/arch/powerpc/perf/ppc970-pmu.c +++ b/arch/powerpc/perf/ppc970-pmu.c @@ -257,7 +257,7 @@ static int p970_get_alternatives(u64 event, unsigned int flags, u64 alt[]) } static int p970_compute_mmcr(u64 event[], int n_ev, - unsigned int hwc[], unsigned long mmcr[]) + unsigned int hwc[], unsigned long mmcr[], struct perf_event *pevents[]) { unsigned long mmcr0 = 0, mmcr1 = 0, mmcra = 0; unsigned int pmc, unit, byte, psel; diff --git a/arch/powerpc/platforms/40x/ep405.c b/arch/powerpc/platforms/40x/ep405.c index b0389bbe4f9..ddc12a1926e 100644 --- a/arch/powerpc/platforms/40x/ep405.c +++ b/arch/powerpc/platforms/40x/ep405.c @@ -49,7 +49,7 @@ static void __iomem *bcsr_regs; /* there's more, can't be bothered typing them tho */ -static __initdata struct of_device_id ep405_of_bus[] = { +static const struct of_device_id ep405_of_bus[] __initconst = { { .compatible = "ibm,plb3", }, { .compatible = "ibm,opb", }, { .compatible = "ibm,ebc", }, diff --git a/arch/powerpc/platforms/40x/ppc40x_simple.c b/arch/powerpc/platforms/40x/ppc40x_simple.c index 8f3920e5a04..b0c46375dd9 100644 --- a/arch/powerpc/platforms/40x/ppc40x_simple.c +++ b/arch/powerpc/platforms/40x/ppc40x_simple.c @@ -24,7 +24,7 @@ #include <linux/init.h> #include <linux/of_platform.h> -static __initdata struct of_device_id ppc40x_of_bus[] = { +static const struct of_device_id ppc40x_of_bus[] __initconst = { { .compatible = "ibm,plb3", }, { .compatible = "ibm,plb4", }, { .compatible = "ibm,opb", }, diff --git a/arch/powerpc/platforms/40x/virtex.c b/arch/powerpc/platforms/40x/virtex.c index d0fc6866b00..9aa7ae2f416 100644 --- a/arch/powerpc/platforms/40x/virtex.c +++ b/arch/powerpc/platforms/40x/virtex.c @@ -17,7 +17,7 @@ #include <asm/xilinx_pci.h> #include <asm/ppc4xx.h> -static struct of_device_id xilinx_of_bus_ids[] __initdata = { +static const struct of_device_id xilinx_of_bus_ids[] __initconst = { { .compatible = "xlnx,plb-v46-1.00.a", }, { .compatible = "xlnx,plb-v34-1.01.a", }, { .compatible = "xlnx,plb-v34-1.02.a", }, diff --git a/arch/powerpc/platforms/40x/walnut.c b/arch/powerpc/platforms/40x/walnut.c index 8b691df72f7..f7ac2d0fcb4 100644 --- a/arch/powerpc/platforms/40x/walnut.c +++ b/arch/powerpc/platforms/40x/walnut.c @@ -28,7 +28,7 @@ #include <asm/pci-bridge.h> #include <asm/ppc4xx.h> -static __initdata struct of_device_id walnut_of_bus[] = { +static const struct of_device_id walnut_of_bus[] __initconst = { { .compatible = "ibm,plb3", }, { .compatible = "ibm,opb", }, { .compatible = "ibm,ebc", }, diff --git a/arch/powerpc/platforms/44x/Kconfig b/arch/powerpc/platforms/44x/Kconfig index 4d88f6a1905..82f2da28cd2 100644 --- a/arch/powerpc/platforms/44x/Kconfig +++ b/arch/powerpc/platforms/44x/Kconfig @@ -215,9 +215,9 @@ config AKEBONO select NET_VENDOR_IBM select IBM_EMAC_EMAC4 select IBM_EMAC_RGMII_WOL - select USB - select USB_OHCI_HCD_PLATFORM - select USB_EHCI_HCD_PLATFORM + select USB if USB_SUPPORT + select USB_OHCI_HCD_PLATFORM if USB_OHCI_HCD + select USB_EHCI_HCD_PLATFORM if USB_EHCI_HCD select MMC_SDHCI select MMC_SDHCI_PLTFM select MMC_SDHCI_OF_476GTR diff --git a/arch/powerpc/platforms/44x/canyonlands.c b/arch/powerpc/platforms/44x/canyonlands.c index e300dd4c89b..22ca5430c9c 100644 --- a/arch/powerpc/platforms/44x/canyonlands.c +++ b/arch/powerpc/platforms/44x/canyonlands.c @@ -33,7 +33,7 @@ #define BCSR_USB_EN 0x11 -static __initdata struct of_device_id ppc460ex_of_bus[] = { +static const struct of_device_id ppc460ex_of_bus[] __initconst = { { .compatible = "ibm,plb4", }, { .compatible = "ibm,opb", }, { .compatible = "ibm,ebc", }, diff --git a/arch/powerpc/platforms/44x/ebony.c b/arch/powerpc/platforms/44x/ebony.c index 6a4232bbdf8..ae893226392 100644 --- a/arch/powerpc/platforms/44x/ebony.c +++ b/arch/powerpc/platforms/44x/ebony.c @@ -28,7 +28,7 @@ #include <asm/pci-bridge.h> #include <asm/ppc4xx.h> -static __initdata struct of_device_id ebony_of_bus[] = { +static const struct of_device_id ebony_of_bus[] __initconst = { { .compatible = "ibm,plb4", }, { .compatible = "ibm,opb", }, { .compatible = "ibm,ebc", }, diff --git a/arch/powerpc/platforms/44x/iss4xx.c b/arch/powerpc/platforms/44x/iss4xx.c index 4241bc82580..c7c6758b3cf 100644 --- a/arch/powerpc/platforms/44x/iss4xx.c +++ b/arch/powerpc/platforms/44x/iss4xx.c @@ -32,7 +32,7 @@ #include <asm/mpic.h> #include <asm/mmu.h> -static __initdata struct of_device_id iss4xx_of_bus[] = { +static const struct of_device_id iss4xx_of_bus[] __initconst = { { .compatible = "ibm,plb4", }, { .compatible = "ibm,plb6", }, { .compatible = "ibm,opb", }, diff --git a/arch/powerpc/platforms/44x/ppc44x_simple.c b/arch/powerpc/platforms/44x/ppc44x_simple.c index 3ffb915446e..573c3d2689c 100644 --- a/arch/powerpc/platforms/44x/ppc44x_simple.c +++ b/arch/powerpc/platforms/44x/ppc44x_simple.c @@ -24,7 +24,7 @@ #include <linux/init.h> #include <linux/of_platform.h> -static __initdata struct of_device_id ppc44x_of_bus[] = { +static const struct of_device_id ppc44x_of_bus[] __initconst = { { .compatible = "ibm,plb4", }, { .compatible = "ibm,opb", }, { .compatible = "ibm,ebc", }, diff --git a/arch/powerpc/platforms/44x/ppc476.c b/arch/powerpc/platforms/44x/ppc476.c index 33986c1a05d..58db9d08396 100644 --- a/arch/powerpc/platforms/44x/ppc476.c +++ b/arch/powerpc/platforms/44x/ppc476.c @@ -38,7 +38,7 @@ #include <linux/pci.h> #include <linux/i2c.h> -static struct of_device_id ppc47x_of_bus[] __initdata = { +static const struct of_device_id ppc47x_of_bus[] __initconst = { { .compatible = "ibm,plb4", }, { .compatible = "ibm,plb6", }, { .compatible = "ibm,opb", }, diff --git a/arch/powerpc/platforms/44x/sam440ep.c b/arch/powerpc/platforms/44x/sam440ep.c index 9e09b835758..3ee4a03c149 100644 --- a/arch/powerpc/platforms/44x/sam440ep.c +++ b/arch/powerpc/platforms/44x/sam440ep.c @@ -29,7 +29,7 @@ #include <asm/ppc4xx.h> #include <linux/i2c.h> -static __initdata struct of_device_id sam440ep_of_bus[] = { +static const struct of_device_id sam440ep_of_bus[] __initconst = { { .compatible = "ibm,plb4", }, { .compatible = "ibm,opb", }, { .compatible = "ibm,ebc", }, diff --git a/arch/powerpc/platforms/44x/virtex.c b/arch/powerpc/platforms/44x/virtex.c index cf96ccaa760..ad272c17c64 100644 --- a/arch/powerpc/platforms/44x/virtex.c +++ b/arch/powerpc/platforms/44x/virtex.c @@ -21,7 +21,7 @@ #include <asm/ppc4xx.h> #include "44x.h" -static struct of_device_id xilinx_of_bus_ids[] __initdata = { +static const struct of_device_id xilinx_of_bus_ids[] __initconst = { { .compatible = "simple-bus", }, { .compatible = "xlnx,plb-v46-1.00.a", }, { .compatible = "xlnx,plb-v46-1.02.a", }, diff --git a/arch/powerpc/platforms/44x/warp.c b/arch/powerpc/platforms/44x/warp.c index 534574a97ec..501333cf42c 100644 --- a/arch/powerpc/platforms/44x/warp.c +++ b/arch/powerpc/platforms/44x/warp.c @@ -25,9 +25,10 @@ #include <asm/time.h> #include <asm/uic.h> #include <asm/ppc4xx.h> +#include <asm/dma.h> -static __initdata struct of_device_id warp_of_bus[] = { +static const struct of_device_id warp_of_bus[] __initconst = { { .compatible = "ibm,plb4", }, { .compatible = "ibm,opb", }, { .compatible = "ibm,ebc", }, diff --git a/arch/powerpc/platforms/512x/mpc512x_shared.c b/arch/powerpc/platforms/512x/mpc512x_shared.c index adb95f03d4d..e996e007bc4 100644 --- a/arch/powerpc/platforms/512x/mpc512x_shared.c +++ b/arch/powerpc/platforms/512x/mpc512x_shared.c @@ -337,7 +337,7 @@ void __init mpc512x_init_IRQ(void) /* * Nodes to do bus probe on, soc and localbus */ -static struct of_device_id __initdata of_bus_ids[] = { +static const struct of_device_id of_bus_ids[] __initconst = { { .compatible = "fsl,mpc5121-immr", }, { .compatible = "fsl,mpc5121-localbus", }, { .compatible = "fsl,mpc5121-mbx", }, diff --git a/arch/powerpc/platforms/52xx/efika.c b/arch/powerpc/platforms/52xx/efika.c index 6e19b0ad5d2..3feffde9128 100644 --- a/arch/powerpc/platforms/52xx/efika.c +++ b/arch/powerpc/platforms/52xx/efika.c @@ -13,6 +13,7 @@ #include <generated/utsrelease.h> #include <linux/pci.h> #include <linux/of.h> +#include <asm/dma.h> #include <asm/prom.h> #include <asm/time.h> #include <asm/machdep.h> diff --git a/arch/powerpc/platforms/52xx/lite5200.c b/arch/powerpc/platforms/52xx/lite5200.c index 1843bc93201..7492de3cf6d 100644 --- a/arch/powerpc/platforms/52xx/lite5200.c +++ b/arch/powerpc/platforms/52xx/lite5200.c @@ -34,13 +34,13 @@ */ /* mpc5200 device tree match tables */ -static struct of_device_id mpc5200_cdm_ids[] __initdata = { +static const struct of_device_id mpc5200_cdm_ids[] __initconst = { { .compatible = "fsl,mpc5200-cdm", }, { .compatible = "mpc5200-cdm", }, {} }; -static struct of_device_id mpc5200_gpio_ids[] __initdata = { +static const struct of_device_id mpc5200_gpio_ids[] __initconst = { { .compatible = "fsl,mpc5200-gpio", }, { .compatible = "mpc5200-gpio", }, {} diff --git a/arch/powerpc/platforms/52xx/media5200.c b/arch/powerpc/platforms/52xx/media5200.c index 070d315dd6c..32cae33c426 100644 --- a/arch/powerpc/platforms/52xx/media5200.c +++ b/arch/powerpc/platforms/52xx/media5200.c @@ -30,7 +30,7 @@ #include <asm/machdep.h> #include <asm/mpc52xx.h> -static struct of_device_id mpc5200_gpio_ids[] __initdata = { +static const struct of_device_id mpc5200_gpio_ids[] __initconst = { { .compatible = "fsl,mpc5200-gpio", }, { .compatible = "mpc5200-gpio", }, {} diff --git a/arch/powerpc/platforms/52xx/mpc52xx_common.c b/arch/powerpc/platforms/52xx/mpc52xx_common.c index d7e94f49532..26993826a79 100644 --- a/arch/powerpc/platforms/52xx/mpc52xx_common.c +++ b/arch/powerpc/platforms/52xx/mpc52xx_common.c @@ -23,12 +23,12 @@ #include <asm/mpc52xx.h> /* MPC5200 device tree match tables */ -static struct of_device_id mpc52xx_xlb_ids[] __initdata = { +static const struct of_device_id mpc52xx_xlb_ids[] __initconst = { { .compatible = "fsl,mpc5200-xlb", }, { .compatible = "mpc5200-xlb", }, {} }; -static struct of_device_id mpc52xx_bus_ids[] __initdata = { +static const struct of_device_id mpc52xx_bus_ids[] __initconst = { { .compatible = "fsl,mpc5200-immr", }, { .compatible = "fsl,mpc5200b-immr", }, { .compatible = "simple-bus", }, @@ -108,21 +108,21 @@ void __init mpc52xx_declare_of_platform_devices(void) /* * match tables used by mpc52xx_map_common_devices() */ -static struct of_device_id mpc52xx_gpt_ids[] __initdata = { +static const struct of_device_id mpc52xx_gpt_ids[] __initconst = { { .compatible = "fsl,mpc5200-gpt", }, { .compatible = "mpc5200-gpt", }, /* old */ {} }; -static struct of_device_id mpc52xx_cdm_ids[] __initdata = { +static const struct of_device_id mpc52xx_cdm_ids[] __initconst = { { .compatible = "fsl,mpc5200-cdm", }, { .compatible = "mpc5200-cdm", }, /* old */ {} }; -static const struct of_device_id mpc52xx_gpio_simple[] = { +static const struct of_device_id mpc52xx_gpio_simple[] __initconst = { { .compatible = "fsl,mpc5200-gpio", }, {} }; -static const struct of_device_id mpc52xx_gpio_wkup[] = { +static const struct of_device_id mpc52xx_gpio_wkup[] __initconst = { { .compatible = "fsl,mpc5200-gpio-wkup", }, {} }; diff --git a/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c b/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c index 37f7a89c10f..f8f0081759f 100644 --- a/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c +++ b/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c @@ -564,7 +564,7 @@ static int mpc52xx_lpbfifo_remove(struct platform_device *op) return 0; } -static struct of_device_id mpc52xx_lpbfifo_match[] = { +static const struct of_device_id mpc52xx_lpbfifo_match[] = { { .compatible = "fsl,mpc5200-lpbfifo", }, {}, }; diff --git a/arch/powerpc/platforms/52xx/mpc52xx_pic.c b/arch/powerpc/platforms/52xx/mpc52xx_pic.c index 2898b737deb..2944bc84b9d 100644 --- a/arch/powerpc/platforms/52xx/mpc52xx_pic.c +++ b/arch/powerpc/platforms/52xx/mpc52xx_pic.c @@ -119,12 +119,12 @@ /* MPC5200 device tree match tables */ -static struct of_device_id mpc52xx_pic_ids[] __initdata = { +static const struct of_device_id mpc52xx_pic_ids[] __initconst = { { .compatible = "fsl,mpc5200-pic", }, { .compatible = "mpc5200-pic", }, {} }; -static struct of_device_id mpc52xx_sdma_ids[] __initdata = { +static const struct of_device_id mpc52xx_sdma_ids[] __initconst = { { .compatible = "fsl,mpc5200-bestcomm", }, { .compatible = "mpc5200-bestcomm", }, {} diff --git a/arch/powerpc/platforms/82xx/ep8248e.c b/arch/powerpc/platforms/82xx/ep8248e.c index 79799b29ffe..3d0c3a01143 100644 --- a/arch/powerpc/platforms/82xx/ep8248e.c +++ b/arch/powerpc/platforms/82xx/ep8248e.c @@ -298,7 +298,7 @@ static void __init ep8248e_setup_arch(void) ppc_md.progress("ep8248e_setup_arch(), finish", 0); } -static __initdata struct of_device_id of_bus_ids[] = { +static const struct of_device_id of_bus_ids[] __initconst = { { .compatible = "simple-bus", }, { .compatible = "fsl,ep8248e-bcsr", }, {}, diff --git a/arch/powerpc/platforms/82xx/km82xx.c b/arch/powerpc/platforms/82xx/km82xx.c index 058cc1895c8..387b446f416 100644 --- a/arch/powerpc/platforms/82xx/km82xx.c +++ b/arch/powerpc/platforms/82xx/km82xx.c @@ -180,7 +180,7 @@ static void __init km82xx_setup_arch(void) ppc_md.progress("km82xx_setup_arch(), finish", 0); } -static __initdata struct of_device_id of_bus_ids[] = { +static const struct of_device_id of_bus_ids[] __initconst = { { .compatible = "simple-bus", }, {}, }; diff --git a/arch/powerpc/platforms/82xx/mpc8272_ads.c b/arch/powerpc/platforms/82xx/mpc8272_ads.c index 6a14cf50f4a..d24deacf07d 100644 --- a/arch/powerpc/platforms/82xx/mpc8272_ads.c +++ b/arch/powerpc/platforms/82xx/mpc8272_ads.c @@ -181,7 +181,7 @@ static void __init mpc8272_ads_setup_arch(void) ppc_md.progress("mpc8272_ads_setup_arch(), finish", 0); } -static struct of_device_id __initdata of_bus_ids[] = { +static const struct of_device_id of_bus_ids[] __initconst = { { .name = "soc", }, { .name = "cpm", }, { .name = "localbus", }, diff --git a/arch/powerpc/platforms/82xx/pq2fads.c b/arch/powerpc/platforms/82xx/pq2fads.c index e5f82ec8df1..3a5164ad10a 100644 --- a/arch/powerpc/platforms/82xx/pq2fads.c +++ b/arch/powerpc/platforms/82xx/pq2fads.c @@ -168,7 +168,7 @@ static int __init pq2fads_probe(void) return of_flat_dt_is_compatible(root, "fsl,pq2fads"); } -static struct of_device_id __initdata of_bus_ids[] = { +static const struct of_device_id of_bus_ids[] __initconst = { { .name = "soc", }, { .name = "cpm", }, { .name = "localbus", }, diff --git a/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c b/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c index e238b6a55b1..463fa91ee5b 100644 --- a/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c +++ b/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c @@ -141,7 +141,8 @@ static int mcu_gpiochip_add(struct mcu *mcu) static int mcu_gpiochip_remove(struct mcu *mcu) { - return gpiochip_remove(&mcu->gc); + gpiochip_remove(&mcu->gc); + return 0; } static int mcu_probe(struct i2c_client *client, const struct i2c_device_id *id) @@ -213,7 +214,7 @@ static const struct i2c_device_id mcu_ids[] = { }; MODULE_DEVICE_TABLE(i2c, mcu_ids); -static struct of_device_id mcu_of_match_table[] = { +static const struct of_device_id mcu_of_match_table[] = { { .compatible = "fsl,mcu-mpc8349emitx", }, { }, }; diff --git a/arch/powerpc/platforms/83xx/misc.c b/arch/powerpc/platforms/83xx/misc.c index 125336f750c..ef9d01a049c 100644 --- a/arch/powerpc/platforms/83xx/misc.c +++ b/arch/powerpc/platforms/83xx/misc.c @@ -114,7 +114,7 @@ void __init mpc83xx_ipic_and_qe_init_IRQ(void) } #endif /* CONFIG_QUICC_ENGINE */ -static struct of_device_id __initdata of_bus_ids[] = { +static const struct of_device_id of_bus_ids[] __initconst = { { .type = "soc", }, { .compatible = "soc", }, { .compatible = "simple-bus" }, diff --git a/arch/powerpc/platforms/83xx/mpc834x_itx.c b/arch/powerpc/platforms/83xx/mpc834x_itx.c index a494fa57bdf..80aea8c4b5a 100644 --- a/arch/powerpc/platforms/83xx/mpc834x_itx.c +++ b/arch/powerpc/platforms/83xx/mpc834x_itx.c @@ -38,7 +38,7 @@ #include "mpc83xx.h" -static struct of_device_id __initdata mpc834x_itx_ids[] = { +static const struct of_device_id mpc834x_itx_ids[] __initconst = { { .compatible = "fsl,pq2pro-localbus", }, {}, }; diff --git a/arch/powerpc/platforms/83xx/suspend.c b/arch/powerpc/platforms/83xx/suspend.c index 4b4c081df94..eeb80e25214 100644 --- a/arch/powerpc/platforms/83xx/suspend.c +++ b/arch/powerpc/platforms/83xx/suspend.c @@ -321,7 +321,7 @@ static const struct platform_suspend_ops mpc83xx_suspend_ops = { .end = mpc83xx_suspend_end, }; -static struct of_device_id pmc_match[]; +static const struct of_device_id pmc_match[]; static int pmc_probe(struct platform_device *ofdev) { const struct of_device_id *match; @@ -420,7 +420,7 @@ static struct pmc_type pmc_types[] = { } }; -static struct of_device_id pmc_match[] = { +static const struct of_device_id pmc_match[] = { { .compatible = "fsl,mpc8313-pmc", .data = &pmc_types[0], diff --git a/arch/powerpc/platforms/85xx/Kconfig b/arch/powerpc/platforms/85xx/Kconfig index f442120e003..f22635a71d0 100644 --- a/arch/powerpc/platforms/85xx/Kconfig +++ b/arch/powerpc/platforms/85xx/Kconfig @@ -274,9 +274,9 @@ config CORENET_GENERIC For 32bit kernel, the following boards are supported: P2041 RDB, P3041 DS, P4080 DS, kmcoge4, and OCA4080 For 64bit kernel, the following boards are supported: - T4240 QDS and B4 QDS + T208x QDS/RDB, T4240 QDS/RDB and B4 QDS The following boards are supported for both 32bit and 64bit kernel: - P5020 DS, P5040 DS and T104xQDS + P5020 DS, P5040 DS and T104xQDS/RDB endif # FSL_SOC_BOOKE diff --git a/arch/powerpc/platforms/85xx/common.c b/arch/powerpc/platforms/85xx/common.c index b564b5e23f7..4a9ad871a16 100644 --- a/arch/powerpc/platforms/85xx/common.c +++ b/arch/powerpc/platforms/85xx/common.c @@ -14,7 +14,7 @@ #include "mpc85xx.h" -static struct of_device_id __initdata mpc85xx_common_ids[] = { +static const struct of_device_id mpc85xx_common_ids[] __initconst = { { .type = "soc", }, { .compatible = "soc", }, { .compatible = "simple-bus", }, diff --git a/arch/powerpc/platforms/85xx/corenet_generic.c b/arch/powerpc/platforms/85xx/corenet_generic.c index 5db1e117fdd..e56b89a792e 100644 --- a/arch/powerpc/platforms/85xx/corenet_generic.c +++ b/arch/powerpc/platforms/85xx/corenet_generic.c @@ -20,6 +20,7 @@ #include <asm/time.h> #include <asm/machdep.h> #include <asm/pci-bridge.h> +#include <asm/pgtable.h> #include <asm/ppc-pci.h> #include <mm/mmu_decl.h> #include <asm/prom.h> @@ -67,6 +68,16 @@ void __init corenet_gen_setup_arch(void) swiotlb_detect_4g(); +#if defined(CONFIG_FSL_PCI) && defined(CONFIG_ZONE_DMA32) + /* + * Inbound windows don't cover the full lower 4 GiB + * due to conflicts with PCICSRBAR and outbound windows, + * so limit the DMA32 zone to 2 GiB, to allow consistent + * allocations to succeed. + */ + limit_zone_pfn(ZONE_DMA32, 1UL << (31 - PAGE_SHIFT)); +#endif + pr_info("%s board\n", ppc_md.name); mpc85xx_qe_init(); @@ -119,38 +130,31 @@ static const char * const boards[] __initconst = { "fsl,P4080DS", "fsl,P5020DS", "fsl,P5040DS", + "fsl,T2080QDS", + "fsl,T2080RDB", + "fsl,T2081QDS", "fsl,T4240QDS", + "fsl,T4240RDB", "fsl,B4860QDS", "fsl,B4420QDS", "fsl,B4220QDS", "fsl,T1040QDS", "fsl,T1042QDS", + "fsl,T1040RDB", + "fsl,T1042RDB", + "fsl,T1042RDB_PI", "keymile,kmcoge4", NULL }; -static const char * const hv_boards[] __initconst = { - "fsl,P2041RDB-hv", - "fsl,P3041DS-hv", - "fsl,OCA4080-hv", - "fsl,P4080DS-hv", - "fsl,P5020DS-hv", - "fsl,P5040DS-hv", - "fsl,T4240QDS-hv", - "fsl,B4860QDS-hv", - "fsl,B4420QDS-hv", - "fsl,B4220QDS-hv", - "fsl,T1040QDS-hv", - "fsl,T1042QDS-hv", - NULL -}; - /* * Called very early, device-tree isn't unflattened */ static int __init corenet_generic_probe(void) { unsigned long root = of_get_flat_dt_root(); + char hv_compat[24]; + int i; #ifdef CONFIG_SMP extern struct smp_ops_t smp_85xx_ops; #endif @@ -159,21 +163,26 @@ static int __init corenet_generic_probe(void) return 1; /* Check if we're running under the Freescale hypervisor */ - if (of_flat_dt_match(root, hv_boards)) { - ppc_md.init_IRQ = ehv_pic_init; - ppc_md.get_irq = ehv_pic_get_irq; - ppc_md.restart = fsl_hv_restart; - ppc_md.power_off = fsl_hv_halt; - ppc_md.halt = fsl_hv_halt; + for (i = 0; boards[i]; i++) { + snprintf(hv_compat, sizeof(hv_compat), "%s-hv", boards[i]); + if (of_flat_dt_is_compatible(root, hv_compat)) { + ppc_md.init_IRQ = ehv_pic_init; + + ppc_md.get_irq = ehv_pic_get_irq; + ppc_md.restart = fsl_hv_restart; + ppc_md.power_off = fsl_hv_halt; + ppc_md.halt = fsl_hv_halt; #ifdef CONFIG_SMP - /* - * Disable the timebase sync operations because we can't write - * to the timebase registers under the hypervisor. - */ - smp_85xx_ops.give_timebase = NULL; - smp_85xx_ops.take_timebase = NULL; + /* + * Disable the timebase sync operations because we + * can't write to the timebase registers under the + * hypervisor. + */ + smp_85xx_ops.give_timebase = NULL; + smp_85xx_ops.take_timebase = NULL; #endif - return 1; + return 1; + } } return 0; diff --git a/arch/powerpc/platforms/85xx/ppa8548.c b/arch/powerpc/platforms/85xx/ppa8548.c index 3daff7c6356..12019f17f29 100644 --- a/arch/powerpc/platforms/85xx/ppa8548.c +++ b/arch/powerpc/platforms/85xx/ppa8548.c @@ -59,7 +59,7 @@ static void ppa8548_show_cpuinfo(struct seq_file *m) seq_printf(m, "PLL setting\t: 0x%x\n", ((phid1 >> 24) & 0x3f)); } -static struct of_device_id __initdata of_bus_ids[] = { +static const struct of_device_id of_bus_ids[] __initconst = { { .name = "soc", }, { .type = "soc", }, { .compatible = "simple-bus", }, diff --git a/arch/powerpc/platforms/85xx/qemu_e500.c b/arch/powerpc/platforms/85xx/qemu_e500.c index 7f267329354..8ad2fe6f200 100644 --- a/arch/powerpc/platforms/85xx/qemu_e500.c +++ b/arch/powerpc/platforms/85xx/qemu_e500.c @@ -18,6 +18,7 @@ #include <linux/kernel.h> #include <linux/of_fdt.h> #include <asm/machdep.h> +#include <asm/pgtable.h> #include <asm/time.h> #include <asm/udbg.h> #include <asm/mpic.h> @@ -44,6 +45,15 @@ static void __init qemu_e500_setup_arch(void) fsl_pci_assign_primary(); swiotlb_detect_4g(); +#if defined(CONFIG_FSL_PCI) && defined(CONFIG_ZONE_DMA32) + /* + * Inbound windows don't cover the full lower 4 GiB + * due to conflicts with PCICSRBAR and outbound windows, + * so limit the DMA32 zone to 2 GiB, to allow consistent + * allocations to succeed. + */ + limit_zone_pfn(ZONE_DMA32, 1UL << (31 - PAGE_SHIFT)); +#endif mpc85xx_smp_init(); } diff --git a/arch/powerpc/platforms/85xx/sgy_cts1000.c b/arch/powerpc/platforms/85xx/sgy_cts1000.c index bb75add6708..8162b041211 100644 --- a/arch/powerpc/platforms/85xx/sgy_cts1000.c +++ b/arch/powerpc/platforms/85xx/sgy_cts1000.c @@ -24,7 +24,7 @@ static struct device_node *halt_node; -static struct of_device_id child_match[] = { +static const struct of_device_id child_match[] = { { .compatible = "sgy,gpio-halt", }, @@ -147,7 +147,7 @@ static int gpio_halt_remove(struct platform_device *pdev) return 0; } -static struct of_device_id gpio_halt_match[] = { +static const struct of_device_id gpio_halt_match[] = { /* We match on the gpio bus itself and scan the children since they * wont be matched against us. We know the bus wont match until it * has been registered too. */ diff --git a/arch/powerpc/platforms/85xx/smp.c b/arch/powerpc/platforms/85xx/smp.c index ba093f55367..d7c1e69f307 100644 --- a/arch/powerpc/platforms/85xx/smp.c +++ b/arch/powerpc/platforms/85xx/smp.c @@ -28,6 +28,7 @@ #include <asm/dbell.h> #include <asm/fsl_guts.h> #include <asm/code-patching.h> +#include <asm/cputhreads.h> #include <sysdev/fsl_soc.h> #include <sysdev/mpic.h> @@ -168,6 +169,24 @@ static inline u32 read_spin_table_addr_l(void *spin_table) return in_be32(&((struct epapr_spin_table *)spin_table)->addr_l); } +#ifdef CONFIG_PPC64 +static void wake_hw_thread(void *info) +{ + void fsl_secondary_thread_init(void); + unsigned long imsr1, inia1; + int nr = *(const int *)info; + + imsr1 = MSR_KERNEL; + inia1 = *(unsigned long *)fsl_secondary_thread_init; + + mttmr(TMRN_IMSR1, imsr1); + mttmr(TMRN_INIA1, inia1); + mtspr(SPRN_TENS, TEN_THREAD(1)); + + smp_generic_kick_cpu(nr); +} +#endif + static int smp_85xx_kick_cpu(int nr) { unsigned long flags; @@ -183,6 +202,31 @@ static int smp_85xx_kick_cpu(int nr) pr_debug("smp_85xx_kick_cpu: kick CPU #%d\n", nr); +#ifdef CONFIG_PPC64 + /* Threads don't use the spin table */ + if (cpu_thread_in_core(nr) != 0) { + int primary = cpu_first_thread_sibling(nr); + + if (WARN_ON_ONCE(!cpu_has_feature(CPU_FTR_SMT))) + return -ENOENT; + + if (cpu_thread_in_core(nr) != 1) { + pr_err("%s: cpu %d: invalid hw thread %d\n", + __func__, nr, cpu_thread_in_core(nr)); + return -ENOENT; + } + + if (!cpu_online(primary)) { + pr_err("%s: cpu %d: primary %d not online\n", + __func__, nr, primary); + return -ENOENT; + } + + smp_call_function_single(primary, wake_hw_thread, &nr, 0); + return 0; + } +#endif + np = of_get_cpu_node(nr, NULL); cpu_rel_addr = of_get_property(np, "cpu-release-addr", NULL); diff --git a/arch/powerpc/platforms/86xx/gef_ppc9a.c b/arch/powerpc/platforms/86xx/gef_ppc9a.c index c23f3443880..bf17933b20f 100644 --- a/arch/powerpc/platforms/86xx/gef_ppc9a.c +++ b/arch/powerpc/platforms/86xx/gef_ppc9a.c @@ -213,7 +213,7 @@ static long __init mpc86xx_time_init(void) return 0; } -static __initdata struct of_device_id of_bus_ids[] = { +static const struct of_device_id of_bus_ids[] __initconst = { { .compatible = "simple-bus", }, { .compatible = "gianfar", }, { .compatible = "fsl,mpc8641-pcie", }, diff --git a/arch/powerpc/platforms/86xx/gef_sbc310.c b/arch/powerpc/platforms/86xx/gef_sbc310.c index 8a6ac20686e..8facf587386 100644 --- a/arch/powerpc/platforms/86xx/gef_sbc310.c +++ b/arch/powerpc/platforms/86xx/gef_sbc310.c @@ -200,7 +200,7 @@ static long __init mpc86xx_time_init(void) return 0; } -static __initdata struct of_device_id of_bus_ids[] = { +static const struct of_device_id of_bus_ids[] __initconst = { { .compatible = "simple-bus", }, { .compatible = "gianfar", }, { .compatible = "fsl,mpc8641-pcie", }, diff --git a/arch/powerpc/platforms/86xx/gef_sbc610.c b/arch/powerpc/platforms/86xx/gef_sbc610.c index 06c72636f29..8c9058df564 100644 --- a/arch/powerpc/platforms/86xx/gef_sbc610.c +++ b/arch/powerpc/platforms/86xx/gef_sbc610.c @@ -190,7 +190,7 @@ static long __init mpc86xx_time_init(void) return 0; } -static __initdata struct of_device_id of_bus_ids[] = { +static const struct of_device_id of_bus_ids[] __initconst = { { .compatible = "simple-bus", }, { .compatible = "gianfar", }, { .compatible = "fsl,mpc8641-pcie", }, diff --git a/arch/powerpc/platforms/86xx/mpc8610_hpcd.c b/arch/powerpc/platforms/86xx/mpc8610_hpcd.c index d479d68fbb2..55413a547ea 100644 --- a/arch/powerpc/platforms/86xx/mpc8610_hpcd.c +++ b/arch/powerpc/platforms/86xx/mpc8610_hpcd.c @@ -85,7 +85,7 @@ static void __init mpc8610_suspend_init(void) static inline void mpc8610_suspend_init(void) { } #endif /* CONFIG_SUSPEND */ -static struct of_device_id __initdata mpc8610_ids[] = { +static const struct of_device_id mpc8610_ids[] __initconst = { { .compatible = "fsl,mpc8610-immr", }, { .compatible = "fsl,mpc8610-guts", }, { .compatible = "simple-bus", }, diff --git a/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c b/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c index e8bf3fae560..07ccb1b0cc7 100644 --- a/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c +++ b/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c @@ -127,7 +127,7 @@ mpc86xx_time_init(void) return 0; } -static __initdata struct of_device_id of_bus_ids[] = { +static const struct of_device_id of_bus_ids[] __initconst = { { .compatible = "simple-bus", }, { .compatible = "fsl,srio", }, { .compatible = "gianfar", }, diff --git a/arch/powerpc/platforms/86xx/sbc8641d.c b/arch/powerpc/platforms/86xx/sbc8641d.c index b47a8fd0f3d..6810b71d54a 100644 --- a/arch/powerpc/platforms/86xx/sbc8641d.c +++ b/arch/powerpc/platforms/86xx/sbc8641d.c @@ -92,7 +92,7 @@ mpc86xx_time_init(void) return 0; } -static __initdata struct of_device_id of_bus_ids[] = { +static const struct of_device_id of_bus_ids[] __initconst = { { .compatible = "simple-bus", }, { .compatible = "gianfar", }, { .compatible = "fsl,mpc8641-pcie", }, diff --git a/arch/powerpc/platforms/8xx/adder875.c b/arch/powerpc/platforms/8xx/adder875.c index 82363e98f50..61cae4c1edb 100644 --- a/arch/powerpc/platforms/8xx/adder875.c +++ b/arch/powerpc/platforms/8xx/adder875.c @@ -92,7 +92,7 @@ static int __init adder875_probe(void) return of_flat_dt_is_compatible(root, "analogue-and-micro,adder875"); } -static __initdata struct of_device_id of_bus_ids[] = { +static const struct of_device_id of_bus_ids[] __initconst = { { .compatible = "simple-bus", }, {}, }; diff --git a/arch/powerpc/platforms/8xx/ep88xc.c b/arch/powerpc/platforms/8xx/ep88xc.c index e62166681d0..2bedeb7d5f8 100644 --- a/arch/powerpc/platforms/8xx/ep88xc.c +++ b/arch/powerpc/platforms/8xx/ep88xc.c @@ -147,7 +147,7 @@ static int __init ep88xc_probe(void) return of_flat_dt_is_compatible(root, "fsl,ep88xc"); } -static struct of_device_id __initdata of_bus_ids[] = { +static const struct of_device_id of_bus_ids[] __initconst = { { .name = "soc", }, { .name = "cpm", }, { .name = "localbus", }, diff --git a/arch/powerpc/platforms/8xx/m8xx_setup.c b/arch/powerpc/platforms/8xx/m8xx_setup.c index 587a2828b06..d3037747031 100644 --- a/arch/powerpc/platforms/8xx/m8xx_setup.c +++ b/arch/powerpc/platforms/8xx/m8xx_setup.c @@ -18,7 +18,6 @@ #include <linux/fsl_devices.h> #include <asm/io.h> -#include <asm/mpc8xx.h> #include <asm/8xx_immap.h> #include <asm/prom.h> #include <asm/fs_pd.h> @@ -28,8 +27,6 @@ #include "mpc8xx.h" -struct mpc8xx_pcmcia_ops m8xx_pcmcia_ops; - extern int cpm_pic_init(void); extern int cpm_get_irq(void); diff --git a/arch/powerpc/platforms/8xx/mpc86xads_setup.c b/arch/powerpc/platforms/8xx/mpc86xads_setup.c index 63084640c5c..78180c5e73f 100644 --- a/arch/powerpc/platforms/8xx/mpc86xads_setup.c +++ b/arch/powerpc/platforms/8xx/mpc86xads_setup.c @@ -122,7 +122,7 @@ static int __init mpc86xads_probe(void) return of_flat_dt_is_compatible(root, "fsl,mpc866ads"); } -static struct of_device_id __initdata of_bus_ids[] = { +static const struct of_device_id of_bus_ids[] __initconst = { { .name = "soc", }, { .name = "cpm", }, { .name = "localbus", }, diff --git a/arch/powerpc/platforms/8xx/mpc885ads_setup.c b/arch/powerpc/platforms/8xx/mpc885ads_setup.c index c1262581b63..4d62bf9dc78 100644 --- a/arch/powerpc/platforms/8xx/mpc885ads_setup.c +++ b/arch/powerpc/platforms/8xx/mpc885ads_setup.c @@ -35,7 +35,6 @@ #include <asm/page.h> #include <asm/processor.h> #include <asm/time.h> -#include <asm/mpc8xx.h> #include <asm/8xx_immap.h> #include <asm/cpm1.h> #include <asm/fs_pd.h> @@ -46,61 +45,6 @@ static u32 __iomem *bcsr, *bcsr5; -#ifdef CONFIG_PCMCIA_M8XX -static void pcmcia_hw_setup(int slot, int enable) -{ - if (enable) - clrbits32(&bcsr[1], BCSR1_PCCEN); - else - setbits32(&bcsr[1], BCSR1_PCCEN); -} - -static int pcmcia_set_voltage(int slot, int vcc, int vpp) -{ - u32 reg = 0; - - switch (vcc) { - case 0: - break; - case 33: - reg |= BCSR1_PCCVCC0; - break; - case 50: - reg |= BCSR1_PCCVCC1; - break; - default: - return 1; - } - - switch (vpp) { - case 0: - break; - case 33: - case 50: - if (vcc == vpp) - reg |= BCSR1_PCCVPP1; - else - return 1; - break; - case 120: - if ((vcc == 33) || (vcc == 50)) - reg |= BCSR1_PCCVPP0; - else - return 1; - default: - return 1; - } - - /* first, turn off all power */ - clrbits32(&bcsr[1], 0x00610000); - - /* enable new powersettings */ - setbits32(&bcsr[1], reg); - - return 0; -} -#endif - struct cpm_pin { int port, pin, flags; }; @@ -245,12 +189,6 @@ static void __init mpc885ads_setup_arch(void) of_detach_node(np); of_node_put(np); } - -#ifdef CONFIG_PCMCIA_M8XX - /* Set up board specific hook-ups.*/ - m8xx_pcmcia_ops.hw_ctrl = pcmcia_hw_setup; - m8xx_pcmcia_ops.voltage_set = pcmcia_set_voltage; -#endif } static int __init mpc885ads_probe(void) @@ -259,7 +197,7 @@ static int __init mpc885ads_probe(void) return of_flat_dt_is_compatible(root, "fsl,mpc885ads"); } -static struct of_device_id __initdata of_bus_ids[] = { +static const struct of_device_id of_bus_ids[] __initconst = { { .name = "soc", }, { .name = "cpm", }, { .name = "localbus", }, diff --git a/arch/powerpc/platforms/8xx/tqm8xx_setup.c b/arch/powerpc/platforms/8xx/tqm8xx_setup.c index 251aba8759e..bee47a2b23e 100644 --- a/arch/powerpc/platforms/8xx/tqm8xx_setup.c +++ b/arch/powerpc/platforms/8xx/tqm8xx_setup.c @@ -37,7 +37,6 @@ #include <asm/page.h> #include <asm/processor.h> #include <asm/time.h> -#include <asm/mpc8xx.h> #include <asm/8xx_immap.h> #include <asm/cpm1.h> #include <asm/fs_pd.h> @@ -125,7 +124,7 @@ static int __init tqm8xx_probe(void) return of_flat_dt_is_compatible(node, "tqc,tqm8xx"); } -static struct of_device_id __initdata of_bus_ids[] = { +static const struct of_device_id of_bus_ids[] __initconst = { { .name = "soc", }, { .name = "cpm", }, { .name = "localbus", }, diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index a41bd023647..76483e3acd6 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -61,7 +61,7 @@ choice help There are two families of 64 bit PowerPC chips supported. The most common ones are the desktop and server CPUs - (POWER3, RS64, POWER4, POWER5, POWER5+, POWER6, ...) + (POWER4, POWER5, 970, POWER5+, POWER6, POWER7, POWER8 ...) The other are the "embedded" processors compliant with the "Book 3E" variant of the architecture @@ -116,6 +116,12 @@ config POWER6_CPU config POWER7_CPU bool "POWER7" depends on PPC_BOOK3S_64 + select ARCH_HAS_FAST_MULTIPLIER + +config POWER8_CPU + bool "POWER8" + depends on PPC_BOOK3S_64 + select ARCH_HAS_FAST_MULTIPLIER config E5500_CPU bool "Freescale e5500" @@ -140,14 +146,6 @@ config 6xx depends on PPC32 && PPC_BOOK3S select PPC_HAVE_PMU_SUPPORT -config POWER3 - depends on PPC64 && PPC_BOOK3S - def_bool y - -config POWER4 - depends on PPC64 && PPC_BOOK3S - def_bool y - config TUNE_CELL bool "Optimize for Cell Broadband Engine" depends on PPC64 && PPC_BOOK3S @@ -244,7 +242,7 @@ config PHYS_64BIT config ALTIVEC bool "AltiVec Support" - depends on 6xx || POWER4 || (PPC_E500MC && PPC64) + depends on 6xx || PPC_BOOK3S_64 || (PPC_E500MC && PPC64) ---help--- This option enables kernel support for the Altivec extensions to the PowerPC processor. The kernel currently supports saving and restoring @@ -260,7 +258,7 @@ config ALTIVEC config VSX bool "VSX Support" - depends on POWER4 && ALTIVEC && PPC_FPU + depends on PPC_BOOK3S_64 && ALTIVEC && PPC_FPU ---help--- This option enables kernel support for the Vector Scaler extensions @@ -276,7 +274,7 @@ config VSX config PPC_ICSWX bool "Support for PowerPC icswx coprocessor instruction" - depends on POWER4 + depends on PPC_BOOK3S_64 default n ---help--- @@ -294,7 +292,7 @@ config PPC_ICSWX config PPC_ICSWX_PID bool "icswx requires direct PID management" - depends on PPC_ICSWX && POWER4 + depends on PPC_ICSWX default y ---help--- The PID register in server is used explicitly for ICSWX. In @@ -311,9 +309,13 @@ config PPC_ICSWX_USE_SIGILL If in doubt, say N here. +config SPE_POSSIBLE + def_bool y + depends on E200 || (E500 && !PPC_E500MC) + config SPE bool "SPE Support" - depends on E200 || (E500 && !PPC_E500MC) + depends on SPE_POSSIBLE default y ---help--- This option enables kernel support for the Signal Processing diff --git a/arch/powerpc/platforms/amigaone/setup.c b/arch/powerpc/platforms/amigaone/setup.c index 03aabc0e16a..2fe12046279 100644 --- a/arch/powerpc/platforms/amigaone/setup.c +++ b/arch/powerpc/platforms/amigaone/setup.c @@ -24,6 +24,7 @@ #include <asm/i8259.h> #include <asm/time.h> #include <asm/udbg.h> +#include <asm/dma.h> extern void __flush_disable_L1(void); diff --git a/arch/powerpc/platforms/cell/Kconfig b/arch/powerpc/platforms/cell/Kconfig index 9978f594cac..870b6dbd4d1 100644 --- a/arch/powerpc/platforms/cell/Kconfig +++ b/arch/powerpc/platforms/cell/Kconfig @@ -86,6 +86,7 @@ config SPU_FS_64K_LS config SPU_BASE bool default n + select PPC_COPRO_BASE config CBE_RAS bool "RAS features for bare metal Cell BE" diff --git a/arch/powerpc/platforms/cell/Makefile b/arch/powerpc/platforms/cell/Makefile index fe053e7c73e..2d16884f67b 100644 --- a/arch/powerpc/platforms/cell/Makefile +++ b/arch/powerpc/platforms/cell/Makefile @@ -20,7 +20,7 @@ spu-manage-$(CONFIG_PPC_CELL_COMMON) += spu_manage.o obj-$(CONFIG_SPU_BASE) += spu_callbacks.o spu_base.o \ spu_notify.o \ - spu_syscalls.o spu_fault.o \ + spu_syscalls.o \ $(spu-priv1-y) \ $(spu-manage-y) \ spufs/ diff --git a/arch/powerpc/platforms/cell/axon_msi.c b/arch/powerpc/platforms/cell/axon_msi.c index 85825b5401e..862b32702d2 100644 --- a/arch/powerpc/platforms/cell/axon_msi.c +++ b/arch/powerpc/platforms/cell/axon_msi.c @@ -199,14 +199,6 @@ out_error: return msic; } -static int axon_msi_check_device(struct pci_dev *dev, int nvec, int type) -{ - if (!find_msi_translator(dev)) - return -ENODEV; - - return 0; -} - static int setup_msi_msg_address(struct pci_dev *dev, struct msi_msg *msg) { struct device_node *dn; @@ -416,7 +408,6 @@ static int axon_msi_probe(struct platform_device *device) ppc_md.setup_msi_irqs = axon_msi_setup_msi_irqs; ppc_md.teardown_msi_irqs = axon_msi_teardown_msi_irqs; - ppc_md.msi_check_device = axon_msi_check_device; axon_msi_debug_setup(dn, msic); diff --git a/arch/powerpc/platforms/cell/celleb_pci.c b/arch/powerpc/platforms/cell/celleb_pci.c index 173568140a3..2b98a36ef8f 100644 --- a/arch/powerpc/platforms/cell/celleb_pci.c +++ b/arch/powerpc/platforms/cell/celleb_pci.c @@ -454,7 +454,7 @@ static struct celleb_phb_spec celleb_fake_pci_spec __initdata = { .setup = celleb_setup_fake_pci, }; -static struct of_device_id celleb_phb_match[] __initdata = { +static const struct of_device_id celleb_phb_match[] __initconst = { { .name = "pci-pseudo", .data = &celleb_fake_pci_spec, diff --git a/arch/powerpc/platforms/cell/celleb_setup.c b/arch/powerpc/platforms/cell/celleb_setup.c index 1d5a4d8ddad..34e8ce2976a 100644 --- a/arch/powerpc/platforms/cell/celleb_setup.c +++ b/arch/powerpc/platforms/cell/celleb_setup.c @@ -102,7 +102,7 @@ static void __init celleb_setup_arch_common(void) #endif } -static struct of_device_id celleb_bus_ids[] __initdata = { +static const struct of_device_id celleb_bus_ids[] __initconst = { { .type = "scc", }, { .type = "ioif", }, /* old style */ {}, diff --git a/arch/powerpc/platforms/cell/spu_base.c b/arch/powerpc/platforms/cell/spu_base.c index f85db3a69b4..ffcbd242e66 100644 --- a/arch/powerpc/platforms/cell/spu_base.c +++ b/arch/powerpc/platforms/cell/spu_base.c @@ -76,10 +76,6 @@ static LIST_HEAD(spu_full_list); static DEFINE_SPINLOCK(spu_full_list_lock); static DEFINE_MUTEX(spu_full_list_mutex); -struct spu_slb { - u64 esid, vsid; -}; - void spu_invalidate_slbs(struct spu *spu) { struct spu_priv2 __iomem *priv2 = spu->priv2; @@ -149,7 +145,7 @@ static void spu_restart_dma(struct spu *spu) } } -static inline void spu_load_slb(struct spu *spu, int slbe, struct spu_slb *slb) +static inline void spu_load_slb(struct spu *spu, int slbe, struct copro_slb *slb) { struct spu_priv2 __iomem *priv2 = spu->priv2; @@ -167,45 +163,12 @@ static inline void spu_load_slb(struct spu *spu, int slbe, struct spu_slb *slb) static int __spu_trap_data_seg(struct spu *spu, unsigned long ea) { - struct mm_struct *mm = spu->mm; - struct spu_slb slb; - int psize; - - pr_debug("%s\n", __func__); - - slb.esid = (ea & ESID_MASK) | SLB_ESID_V; + struct copro_slb slb; + int ret; - switch(REGION_ID(ea)) { - case USER_REGION_ID: -#ifdef CONFIG_PPC_MM_SLICES - psize = get_slice_psize(mm, ea); -#else - psize = mm->context.user_psize; -#endif - slb.vsid = (get_vsid(mm->context.id, ea, MMU_SEGSIZE_256M) - << SLB_VSID_SHIFT) | SLB_VSID_USER; - break; - case VMALLOC_REGION_ID: - if (ea < VMALLOC_END) - psize = mmu_vmalloc_psize; - else - psize = mmu_io_psize; - slb.vsid = (get_kernel_vsid(ea, MMU_SEGSIZE_256M) - << SLB_VSID_SHIFT) | SLB_VSID_KERNEL; - break; - case KERNEL_REGION_ID: - psize = mmu_linear_psize; - slb.vsid = (get_kernel_vsid(ea, MMU_SEGSIZE_256M) - << SLB_VSID_SHIFT) | SLB_VSID_KERNEL; - break; - default: - /* Future: support kernel segments so that drivers - * can use SPUs. - */ - pr_debug("invalid region access at %016lx\n", ea); - return 1; - } - slb.vsid |= mmu_psize_defs[psize].sllp; + ret = copro_calculate_slb(spu->mm, ea, &slb); + if (ret) + return ret; spu_load_slb(spu, spu->slb_replace, &slb); @@ -253,7 +216,7 @@ static int __spu_trap_data_map(struct spu *spu, unsigned long ea, u64 dsisr) return 0; } -static void __spu_kernel_slb(void *addr, struct spu_slb *slb) +static void __spu_kernel_slb(void *addr, struct copro_slb *slb) { unsigned long ea = (unsigned long)addr; u64 llp; @@ -272,7 +235,7 @@ static void __spu_kernel_slb(void *addr, struct spu_slb *slb) * Given an array of @nr_slbs SLB entries, @slbs, return non-zero if the * address @new_addr is present. */ -static inline int __slb_present(struct spu_slb *slbs, int nr_slbs, +static inline int __slb_present(struct copro_slb *slbs, int nr_slbs, void *new_addr) { unsigned long ea = (unsigned long)new_addr; @@ -297,7 +260,7 @@ static inline int __slb_present(struct spu_slb *slbs, int nr_slbs, void spu_setup_kernel_slbs(struct spu *spu, struct spu_lscsa *lscsa, void *code, int code_size) { - struct spu_slb slbs[4]; + struct copro_slb slbs[4]; int i, nr_slbs = 0; /* start and end addresses of both mappings */ void *addrs[] = { @@ -611,7 +574,6 @@ static int __init create_spu(void *data) int ret; static int number; unsigned long flags; - struct timespec ts; ret = -ENOMEM; spu = kzalloc(sizeof (*spu), GFP_KERNEL); @@ -652,8 +614,7 @@ static int __init create_spu(void *data) mutex_unlock(&spu_full_list_mutex); spu->stats.util_state = SPU_UTIL_IDLE_LOADED; - ktime_get_ts(&ts); - spu->stats.tstamp = timespec_to_ns(&ts); + spu->stats.tstamp = ktime_get_ns(); INIT_LIST_HEAD(&spu->aff_list); @@ -676,7 +637,6 @@ static const char *spu_state_names[] = { static unsigned long long spu_acct_time(struct spu *spu, enum spu_utilization_state state) { - struct timespec ts; unsigned long long time = spu->stats.times[state]; /* @@ -684,10 +644,8 @@ static unsigned long long spu_acct_time(struct spu *spu, * statistics are not updated. Apply the time delta from the * last recorded state of the spu. */ - if (spu->stats.util_state == state) { - ktime_get_ts(&ts); - time += timespec_to_ns(&ts) - spu->stats.tstamp; - } + if (spu->stats.util_state == state) + time += ktime_get_ns() - spu->stats.tstamp; return time / NSEC_PER_MSEC; } diff --git a/arch/powerpc/platforms/cell/spufs/context.c b/arch/powerpc/platforms/cell/spufs/context.c index 9c6790d17ed..3b4152faeb1 100644 --- a/arch/powerpc/platforms/cell/spufs/context.c +++ b/arch/powerpc/platforms/cell/spufs/context.c @@ -36,7 +36,6 @@ atomic_t nr_spu_contexts = ATOMIC_INIT(0); struct spu_context *alloc_spu_context(struct spu_gang *gang) { struct spu_context *ctx; - struct timespec ts; ctx = kzalloc(sizeof *ctx, GFP_KERNEL); if (!ctx) @@ -67,8 +66,7 @@ struct spu_context *alloc_spu_context(struct spu_gang *gang) __spu_update_sched_info(ctx); spu_set_timeslice(ctx); ctx->stats.util_state = SPU_UTIL_IDLE_LOADED; - ktime_get_ts(&ts); - ctx->stats.tstamp = timespec_to_ns(&ts); + ctx->stats.tstamp = ktime_get_ns(); atomic_inc(&nr_spu_contexts); goto out; diff --git a/arch/powerpc/platforms/cell/spufs/fault.c b/arch/powerpc/platforms/cell/spufs/fault.c index 8cb6260cc80..e45894a0811 100644 --- a/arch/powerpc/platforms/cell/spufs/fault.c +++ b/arch/powerpc/platforms/cell/spufs/fault.c @@ -138,7 +138,7 @@ int spufs_handle_class1(struct spu_context *ctx) if (ctx->state == SPU_STATE_RUNNABLE) ctx->spu->stats.hash_flt++; - /* we must not hold the lock when entering spu_handle_mm_fault */ + /* we must not hold the lock when entering copro_handle_mm_fault */ spu_release(ctx); access = (_PAGE_PRESENT | _PAGE_USER); @@ -149,7 +149,7 @@ int spufs_handle_class1(struct spu_context *ctx) /* hashing failed, so try the actual fault handler */ if (ret) - ret = spu_handle_mm_fault(current->mm, ea, dsisr, &flt); + ret = copro_handle_mm_fault(current->mm, ea, dsisr, &flt); /* * This is nasty: we need the state_mutex for all the bookkeeping even diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c index 90986923a53..d966bbe58b8 100644 --- a/arch/powerpc/platforms/cell/spufs/file.c +++ b/arch/powerpc/platforms/cell/spufs/file.c @@ -2338,7 +2338,6 @@ static const char *ctx_state_names[] = { static unsigned long long spufs_acct_time(struct spu_context *ctx, enum spu_utilization_state state) { - struct timespec ts; unsigned long long time = ctx->stats.times[state]; /* @@ -2351,8 +2350,7 @@ static unsigned long long spufs_acct_time(struct spu_context *ctx, * of the spu context. */ if (ctx->spu && ctx->stats.util_state == state) { - ktime_get_ts(&ts); - time += timespec_to_ns(&ts) - ctx->stats.tstamp; + time += ktime_get_ns() - ctx->stats.tstamp; } return time / NSEC_PER_MSEC; diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c index 4a0a64fe25d..998f632e7cc 100644 --- a/arch/powerpc/platforms/cell/spufs/sched.c +++ b/arch/powerpc/platforms/cell/spufs/sched.c @@ -1039,13 +1039,11 @@ void spuctx_switch_state(struct spu_context *ctx, { unsigned long long curtime; signed long long delta; - struct timespec ts; struct spu *spu; enum spu_utilization_state old_state; int node; - ktime_get_ts(&ts); - curtime = timespec_to_ns(&ts); + curtime = ktime_get_ns(); delta = curtime - ctx->stats.tstamp; WARN_ON(!mutex_is_locked(&ctx->state_mutex)); diff --git a/arch/powerpc/platforms/chrp/setup.c b/arch/powerpc/platforms/chrp/setup.c index 7044fd36197..5b77b1919fd 100644 --- a/arch/powerpc/platforms/chrp/setup.c +++ b/arch/powerpc/platforms/chrp/setup.c @@ -258,7 +258,7 @@ static void chrp_init_early(void) struct device_node *node; const char *property; - if (strstr(cmd_line, "console=")) + if (strstr(boot_command_line, "console=")) return; /* find the boot console from /chosen/stdout */ if (!of_chosen) diff --git a/arch/powerpc/platforms/embedded6xx/gamecube.c b/arch/powerpc/platforms/embedded6xx/gamecube.c index a138e14bad2..bd4ba5d7d56 100644 --- a/arch/powerpc/platforms/embedded6xx/gamecube.c +++ b/arch/powerpc/platforms/embedded6xx/gamecube.c @@ -90,7 +90,7 @@ define_machine(gamecube) { }; -static struct of_device_id gamecube_of_bus[] = { +static const struct of_device_id gamecube_of_bus[] = { { .compatible = "nintendo,flipper", }, { }, }; diff --git a/arch/powerpc/platforms/embedded6xx/linkstation.c b/arch/powerpc/platforms/embedded6xx/linkstation.c index 455e7c08742..168e1d80b2e 100644 --- a/arch/powerpc/platforms/embedded6xx/linkstation.c +++ b/arch/powerpc/platforms/embedded6xx/linkstation.c @@ -21,7 +21,7 @@ #include "mpc10x.h" -static __initdata struct of_device_id of_bus_ids[] = { +static const struct of_device_id of_bus_ids[] __initconst = { { .type = "soc", }, { .compatible = "simple-bus", }, {}, diff --git a/arch/powerpc/platforms/embedded6xx/mvme5100.c b/arch/powerpc/platforms/embedded6xx/mvme5100.c index 25e3bfb64ef..1613303177e 100644 --- a/arch/powerpc/platforms/embedded6xx/mvme5100.c +++ b/arch/powerpc/platforms/embedded6xx/mvme5100.c @@ -149,7 +149,7 @@ static int __init mvme5100_add_bridge(struct device_node *dev) return 0; } -static struct of_device_id mvme5100_of_bus_ids[] __initdata = { +static const struct of_device_id mvme5100_of_bus_ids[] __initconst = { { .compatible = "hawk-bridge", }, {}, }; diff --git a/arch/powerpc/platforms/embedded6xx/storcenter.c b/arch/powerpc/platforms/embedded6xx/storcenter.c index c458b60d14c..d572833ebd0 100644 --- a/arch/powerpc/platforms/embedded6xx/storcenter.c +++ b/arch/powerpc/platforms/embedded6xx/storcenter.c @@ -24,7 +24,7 @@ #include "mpc10x.h" -static __initdata struct of_device_id storcenter_of_bus[] = { +static const struct of_device_id storcenter_of_bus[] __initconst = { { .name = "soc", }, {}, }; diff --git a/arch/powerpc/platforms/embedded6xx/wii.c b/arch/powerpc/platforms/embedded6xx/wii.c index 6d8dadf19f0..388e29bab8f 100644 --- a/arch/powerpc/platforms/embedded6xx/wii.c +++ b/arch/powerpc/platforms/embedded6xx/wii.c @@ -235,7 +235,7 @@ define_machine(wii) { .machine_shutdown = wii_shutdown, }; -static struct of_device_id wii_of_bus[] = { +static const struct of_device_id wii_of_bus[] = { { .compatible = "nintendo,hollywood", }, { }, }; diff --git a/arch/powerpc/platforms/pasemi/gpio_mdio.c b/arch/powerpc/platforms/pasemi/gpio_mdio.c index 15adee54463..ada33358950 100644 --- a/arch/powerpc/platforms/pasemi/gpio_mdio.c +++ b/arch/powerpc/platforms/pasemi/gpio_mdio.c @@ -290,7 +290,7 @@ static int gpio_mdio_remove(struct platform_device *dev) return 0; } -static struct of_device_id gpio_mdio_match[] = +static const struct of_device_id gpio_mdio_match[] = { { .compatible = "gpio-mdio", diff --git a/arch/powerpc/platforms/pasemi/setup.c b/arch/powerpc/platforms/pasemi/setup.c index 8c54de6d8ec..d71b2c7e840 100644 --- a/arch/powerpc/platforms/pasemi/setup.c +++ b/arch/powerpc/platforms/pasemi/setup.c @@ -393,7 +393,7 @@ static inline void pasemi_pcmcia_init(void) #endif -static struct of_device_id pasemi_bus_ids[] = { +static const struct of_device_id pasemi_bus_ids[] = { /* Unfortunately needed for legacy firmwares */ { .type = "localbus", }, { .type = "sdc", }, diff --git a/arch/powerpc/platforms/powermac/Kconfig b/arch/powerpc/platforms/powermac/Kconfig index 1afd10f6785..607124bae2e 100644 --- a/arch/powerpc/platforms/powermac/Kconfig +++ b/arch/powerpc/platforms/powermac/Kconfig @@ -10,7 +10,7 @@ config PPC_PMAC config PPC_PMAC64 bool - depends on PPC_PMAC && POWER4 + depends on PPC_PMAC && PPC64 select MPIC select U3_DART select MPIC_U3_HT_IRQS diff --git a/arch/powerpc/platforms/powermac/feature.c b/arch/powerpc/platforms/powermac/feature.c index 63d82bbc05e..4882bfd90e2 100644 --- a/arch/powerpc/platforms/powermac/feature.c +++ b/arch/powerpc/platforms/powermac/feature.c @@ -158,7 +158,7 @@ static inline int simple_feature_tweak(struct device_node *node, int type, return 0; } -#ifndef CONFIG_POWER4 +#ifndef CONFIG_PPC64 static long ohare_htw_scc_enable(struct device_node *node, long param, long value) @@ -1318,7 +1318,7 @@ intrepid_aack_delay_enable(struct device_node *node, long param, long value) } -#endif /* CONFIG_POWER4 */ +#endif /* CONFIG_PPC64 */ static long core99_read_gpio(struct device_node *node, long param, long value) @@ -1338,7 +1338,7 @@ core99_write_gpio(struct device_node *node, long param, long value) return 0; } -#ifdef CONFIG_POWER4 +#ifdef CONFIG_PPC64 static long g5_gmac_enable(struct device_node *node, long param, long value) { struct macio_chip *macio = &macio_chips[0]; @@ -1550,9 +1550,9 @@ void g5_phy_disable_cpu1(void) if (uninorth_maj == 3) UN_OUT(U3_API_PHY_CONFIG_1, 0); } -#endif /* CONFIG_POWER4 */ +#endif /* CONFIG_PPC64 */ -#ifndef CONFIG_POWER4 +#ifndef CONFIG_PPC64 #ifdef CONFIG_PM @@ -1864,7 +1864,7 @@ core99_sleep_state(struct device_node *node, long param, long value) return 0; } -#endif /* CONFIG_POWER4 */ +#endif /* CONFIG_PPC64 */ static long generic_dev_can_wake(struct device_node *node, long param, long value) @@ -1906,7 +1906,7 @@ static struct feature_table_entry any_features[] = { { 0, NULL } }; -#ifndef CONFIG_POWER4 +#ifndef CONFIG_PPC64 /* OHare based motherboards. Currently, we only use these on the * 2400,3400 and 3500 series powerbooks. Some older desktops seem @@ -2056,7 +2056,7 @@ static struct feature_table_entry intrepid_features[] = { { 0, NULL } }; -#else /* CONFIG_POWER4 */ +#else /* CONFIG_PPC64 */ /* G5 features */ @@ -2074,10 +2074,10 @@ static struct feature_table_entry g5_features[] = { { 0, NULL } }; -#endif /* CONFIG_POWER4 */ +#endif /* CONFIG_PPC64 */ static struct pmac_mb_def pmac_mb_defs[] = { -#ifndef CONFIG_POWER4 +#ifndef CONFIG_PPC64 /* * Desktops */ @@ -2342,7 +2342,7 @@ static struct pmac_mb_def pmac_mb_defs[] = { PMAC_TYPE_UNKNOWN_INTREPID, intrepid_features, PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE, }, -#else /* CONFIG_POWER4 */ +#else /* CONFIG_PPC64 */ { "PowerMac7,2", "PowerMac G5", PMAC_TYPE_POWERMAC_G5, g5_features, 0, @@ -2373,7 +2373,7 @@ static struct pmac_mb_def pmac_mb_defs[] = { 0, }, #endif /* CONFIG_PPC64 */ -#endif /* CONFIG_POWER4 */ +#endif /* CONFIG_PPC64 */ }; /* @@ -2441,7 +2441,7 @@ static int __init probe_motherboard(void) /* Fallback to selection depending on mac-io chip type */ switch(macio->type) { -#ifndef CONFIG_POWER4 +#ifndef CONFIG_PPC64 case macio_grand_central: pmac_mb.model_id = PMAC_TYPE_PSURGE; pmac_mb.model_name = "Unknown PowerSurge"; @@ -2475,7 +2475,7 @@ static int __init probe_motherboard(void) pmac_mb.model_name = "Unknown Intrepid-based"; pmac_mb.features = intrepid_features; break; -#else /* CONFIG_POWER4 */ +#else /* CONFIG_PPC64 */ case macio_keylargo2: pmac_mb.model_id = PMAC_TYPE_UNKNOWN_K2; pmac_mb.model_name = "Unknown K2-based"; @@ -2486,13 +2486,13 @@ static int __init probe_motherboard(void) pmac_mb.model_name = "Unknown Shasta-based"; pmac_mb.features = g5_features; break; -#endif /* CONFIG_POWER4 */ +#endif /* CONFIG_PPC64 */ default: ret = -ENODEV; goto done; } found: -#ifndef CONFIG_POWER4 +#ifndef CONFIG_PPC64 /* Fixup Hooper vs. Comet */ if (pmac_mb.model_id == PMAC_TYPE_HOOPER) { u32 __iomem * mach_id_ptr = ioremap(0xf3000034, 4); @@ -2546,9 +2546,9 @@ found: */ powersave_lowspeed = 1; -#else /* CONFIG_POWER4 */ +#else /* CONFIG_PPC64 */ powersave_nap = 1; -#endif /* CONFIG_POWER4 */ +#endif /* CONFIG_PPC64 */ /* Check for "mobile" machine */ if (model && (strncmp(model, "PowerBook", 9) == 0 @@ -2786,7 +2786,7 @@ set_initial_features(void) MACIO_BIS(OHARE_FCR, OH_IOBUS_ENABLE); } -#ifdef CONFIG_POWER4 +#ifdef CONFIG_PPC64 if (macio_chips[0].type == macio_keylargo2 || macio_chips[0].type == macio_shasta) { #ifndef CONFIG_SMP @@ -2805,28 +2805,23 @@ set_initial_features(void) /* Enable GMAC for now for PCI probing. It will be disabled * later on after PCI probe */ - np = of_find_node_by_name(NULL, "ethernet"); - while(np) { + for_each_node_by_name(np, "ethernet") if (of_device_is_compatible(np, "K2-GMAC")) g5_gmac_enable(np, 0, 1); - np = of_find_node_by_name(np, "ethernet"); - } /* Enable FW before PCI probe. Will be disabled later on * Note: We should have a batter way to check that we are * dealing with uninorth internal cell and not a PCI cell * on the external PCI. The code below works though. */ - np = of_find_node_by_name(NULL, "firewire"); - while(np) { + for_each_node_by_name(np, "firewire") { if (of_device_is_compatible(np, "pci106b,5811")) { macio_chips[0].flags |= MACIO_FLAG_FW_SUPPORTED; g5_fw_enable(np, 0, 1); } - np = of_find_node_by_name(np, "firewire"); } } -#else /* CONFIG_POWER4 */ +#else /* CONFIG_PPC64 */ if (macio_chips[0].type == macio_keylargo || macio_chips[0].type == macio_pangea || @@ -2834,13 +2829,11 @@ set_initial_features(void) /* Enable GMAC for now for PCI probing. It will be disabled * later on after PCI probe */ - np = of_find_node_by_name(NULL, "ethernet"); - while(np) { + for_each_node_by_name(np, "ethernet") { if (np->parent && of_device_is_compatible(np->parent, "uni-north") && of_device_is_compatible(np, "gmac")) core99_gmac_enable(np, 0, 1); - np = of_find_node_by_name(np, "ethernet"); } /* Enable FW before PCI probe. Will be disabled later on @@ -2848,8 +2841,7 @@ set_initial_features(void) * dealing with uninorth internal cell and not a PCI cell * on the external PCI. The code below works though. */ - np = of_find_node_by_name(NULL, "firewire"); - while(np) { + for_each_node_by_name(np, "firewire") { if (np->parent && of_device_is_compatible(np->parent, "uni-north") && (of_device_is_compatible(np, "pci106b,18") || @@ -2858,18 +2850,16 @@ set_initial_features(void) macio_chips[0].flags |= MACIO_FLAG_FW_SUPPORTED; core99_firewire_enable(np, 0, 1); } - np = of_find_node_by_name(np, "firewire"); } /* Enable ATA-100 before PCI probe. */ np = of_find_node_by_name(NULL, "ata-6"); - while(np) { + for_each_node_by_name(np, "ata-6") { if (np->parent && of_device_is_compatible(np->parent, "uni-north") && of_device_is_compatible(np, "kauai-ata")) { core99_ata100_enable(np, 1); } - np = of_find_node_by_name(np, "ata-6"); } /* Switch airport off */ @@ -2895,7 +2885,7 @@ set_initial_features(void) MACIO_BIC(HEATHROW_FCR, HRW_SOUND_POWER_N); } -#endif /* CONFIG_POWER4 */ +#endif /* CONFIG_PPC64 */ /* On all machines, switch modem & serial ports off */ for_each_node_by_name(np, "ch-a") diff --git a/arch/powerpc/platforms/powermac/pci.c b/arch/powerpc/platforms/powermac/pci.c index cf7009b8c7b..7e868ccf3b0 100644 --- a/arch/powerpc/platforms/powermac/pci.c +++ b/arch/powerpc/platforms/powermac/pci.c @@ -698,7 +698,7 @@ static void __init fixup_nec_usb2(void) { struct device_node *nec; - for (nec = NULL; (nec = of_find_node_by_name(nec, "usb")) != NULL;) { + for_each_node_by_name(nec, "usb") { struct pci_controller *hose; u32 data; const u32 *prop; diff --git a/arch/powerpc/platforms/powermac/setup.c b/arch/powerpc/platforms/powermac/setup.c index 141f8899a63..b127a29ac52 100644 --- a/arch/powerpc/platforms/powermac/setup.c +++ b/arch/powerpc/platforms/powermac/setup.c @@ -336,7 +336,7 @@ static void __init pmac_setup_arch(void) #endif #ifdef CONFIG_ADB - if (strstr(cmd_line, "adb_sync")) { + if (strstr(boot_command_line, "adb_sync")) { extern int __adb_probe_sync; __adb_probe_sync = 1; } @@ -460,7 +460,7 @@ pmac_halt(void) static void __init pmac_init_early(void) { /* Enable early btext debug if requested */ - if (strstr(cmd_line, "btextdbg")) { + if (strstr(boot_command_line, "btextdbg")) { udbg_adb_init_early(); register_early_udbg_console(); } @@ -469,8 +469,8 @@ static void __init pmac_init_early(void) pmac_feature_init(); /* Initialize debug stuff */ - udbg_scc_init(!!strstr(cmd_line, "sccdbg")); - udbg_adb_init(!!strstr(cmd_line, "btextdbg")); + udbg_scc_init(!!strstr(boot_command_line, "sccdbg")); + udbg_adb_init(!!strstr(boot_command_line, "btextdbg")); #ifdef CONFIG_PPC64 iommu_init_early_dart(); diff --git a/arch/powerpc/platforms/powermac/smp.c b/arch/powerpc/platforms/powermac/smp.c index 5cbd4d67d5c..af094ae03db 100644 --- a/arch/powerpc/platforms/powermac/smp.c +++ b/arch/powerpc/platforms/powermac/smp.c @@ -577,7 +577,7 @@ static void __init smp_core99_setup_i2c_hwsync(int ncpus) int ok; /* Look for the clock chip */ - while ((cc = of_find_node_by_name(cc, "i2c-hwclock")) != NULL) { + for_each_node_by_name(cc, "i2c-hwclock") { p = of_get_parent(cc); ok = p && of_device_is_compatible(p, "uni-n-i2c"); of_node_put(p); diff --git a/arch/powerpc/platforms/powermac/udbg_adb.c b/arch/powerpc/platforms/powermac/udbg_adb.c index 44e0b55a2a0..366bd221ede 100644 --- a/arch/powerpc/platforms/powermac/udbg_adb.c +++ b/arch/powerpc/platforms/powermac/udbg_adb.c @@ -191,7 +191,7 @@ int __init udbg_adb_init(int force_btext) * of type "adb". If not, we return a failure, but we keep the * bext output set for now */ - for (np = NULL; (np = of_find_node_by_name(np, "keyboard")) != NULL;) { + for_each_node_by_name(np, "keyboard") { struct device_node *parent = of_get_parent(np); int found = (parent && strcmp(parent->type, "adb") == 0); of_node_put(parent); diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile index 4ad227d04c1..f241accc053 100644 --- a/arch/powerpc/platforms/powernv/Makefile +++ b/arch/powerpc/platforms/powernv/Makefile @@ -1,10 +1,11 @@ obj-y += setup.o opal-wrappers.o opal.o opal-async.o obj-y += opal-rtc.o opal-nvram.o opal-lpc.o opal-flash.o obj-y += rng.o opal-elog.o opal-dump.o opal-sysparam.o opal-sensor.o -obj-y += opal-msglog.o +obj-y += opal-msglog.o opal-hmi.o obj-$(CONFIG_SMP) += smp.o subcore.o subcore-asm.o obj-$(CONFIG_PCI) += pci.o pci-p5ioc2.o pci-ioda.o obj-$(CONFIG_EEH) += eeh-ioda.o eeh-powernv.o obj-$(CONFIG_PPC_SCOM) += opal-xscom.o obj-$(CONFIG_MEMORY_FAILURE) += opal-memory-errors.o +obj-$(CONFIG_TRACEPOINTS) += opal-tracepoints.o diff --git a/arch/powerpc/platforms/powernv/eeh-ioda.c b/arch/powerpc/platforms/powernv/eeh-ioda.c index 8ad0c5b891f..426814a2ede 100644 --- a/arch/powerpc/platforms/powernv/eeh-ioda.c +++ b/arch/powerpc/platforms/powernv/eeh-ioda.c @@ -66,6 +66,54 @@ static struct notifier_block ioda_eeh_nb = { }; #ifdef CONFIG_DEBUG_FS +static ssize_t ioda_eeh_ei_write(struct file *filp, + const char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct pci_controller *hose = filp->private_data; + struct pnv_phb *phb = hose->private_data; + struct eeh_dev *edev; + struct eeh_pe *pe; + int pe_no, type, func; + unsigned long addr, mask; + char buf[50]; + int ret; + + if (!phb->eeh_ops || !phb->eeh_ops->err_inject) + return -ENXIO; + + ret = simple_write_to_buffer(buf, sizeof(buf), ppos, user_buf, count); + if (!ret) + return -EFAULT; + + /* Retrieve parameters */ + ret = sscanf(buf, "%x:%x:%x:%lx:%lx", + &pe_no, &type, &func, &addr, &mask); + if (ret != 5) + return -EINVAL; + + /* Retrieve PE */ + edev = kzalloc(sizeof(*edev), GFP_KERNEL); + if (!edev) + return -ENOMEM; + edev->phb = hose; + edev->pe_config_addr = pe_no; + pe = eeh_pe_get(edev); + kfree(edev); + if (!pe) + return -ENODEV; + + /* Do error injection */ + ret = phb->eeh_ops->err_inject(pe, type, func, addr, mask); + return ret < 0 ? ret : count; +} + +static const struct file_operations ioda_eeh_ei_fops = { + .open = simple_open, + .llseek = no_llseek, + .write = ioda_eeh_ei_write, +}; + static int ioda_eeh_dbgfs_set(void *data, int offset, u64 val) { struct pci_controller *hose = data; @@ -152,6 +200,10 @@ static int ioda_eeh_post_init(struct pci_controller *hose) if (!phb->has_dbgfs && phb->dbgfs) { phb->has_dbgfs = 1; + debugfs_create_file("err_injct", 0200, + phb->dbgfs, hose, + &ioda_eeh_ei_fops); + debugfs_create_file("err_injct_outbound", 0600, phb->dbgfs, hose, &ioda_eeh_outb_dbgfs_ops); @@ -187,10 +239,11 @@ static int ioda_eeh_post_init(struct pci_controller *hose) */ static int ioda_eeh_set_option(struct eeh_pe *pe, int option) { - s64 ret; - u32 pe_no; struct pci_controller *hose = pe->phb; struct pnv_phb *phb = hose->private_data; + bool freeze_pe = false; + int enable, ret = 0; + s64 rc; /* Check on PE number */ if (pe->addr < 0 || pe->addr >= phb->ioda.total_pe) { @@ -201,184 +254,236 @@ static int ioda_eeh_set_option(struct eeh_pe *pe, int option) return -EINVAL; } - pe_no = pe->addr; switch (option) { case EEH_OPT_DISABLE: - ret = -EEXIST; - break; + return -EPERM; case EEH_OPT_ENABLE: - ret = 0; - break; + return 0; case EEH_OPT_THAW_MMIO: - ret = opal_pci_eeh_freeze_clear(phb->opal_id, pe_no, - OPAL_EEH_ACTION_CLEAR_FREEZE_MMIO); - if (ret) { - pr_warning("%s: Failed to enable MMIO for " - "PHB#%x-PE#%x, err=%lld\n", - __func__, hose->global_number, pe_no, ret); - return -EIO; - } - + enable = OPAL_EEH_ACTION_CLEAR_FREEZE_MMIO; break; case EEH_OPT_THAW_DMA: - ret = opal_pci_eeh_freeze_clear(phb->opal_id, pe_no, - OPAL_EEH_ACTION_CLEAR_FREEZE_DMA); - if (ret) { - pr_warning("%s: Failed to enable DMA for " - "PHB#%x-PE#%x, err=%lld\n", - __func__, hose->global_number, pe_no, ret); - return -EIO; - } - + enable = OPAL_EEH_ACTION_CLEAR_FREEZE_DMA; + break; + case EEH_OPT_FREEZE_PE: + freeze_pe = true; + enable = OPAL_EEH_ACTION_SET_FREEZE_ALL; break; default: - pr_warning("%s: Invalid option %d\n", __func__, option); + pr_warn("%s: Invalid option %d\n", + __func__, option); return -EINVAL; } + /* If PHB supports compound PE, to handle it */ + if (freeze_pe) { + if (phb->freeze_pe) { + phb->freeze_pe(phb, pe->addr); + } else { + rc = opal_pci_eeh_freeze_set(phb->opal_id, + pe->addr, + enable); + if (rc != OPAL_SUCCESS) { + pr_warn("%s: Failure %lld freezing " + "PHB#%x-PE#%x\n", + __func__, rc, + phb->hose->global_number, pe->addr); + ret = -EIO; + } + } + } else { + if (phb->unfreeze_pe) { + ret = phb->unfreeze_pe(phb, pe->addr, enable); + } else { + rc = opal_pci_eeh_freeze_clear(phb->opal_id, + pe->addr, + enable); + if (rc != OPAL_SUCCESS) { + pr_warn("%s: Failure %lld enable %d " + "for PHB#%x-PE#%x\n", + __func__, rc, option, + phb->hose->global_number, pe->addr); + ret = -EIO; + } + } + } + return ret; } -static void ioda_eeh_phb_diag(struct pci_controller *hose) +static void ioda_eeh_phb_diag(struct eeh_pe *pe) { - struct pnv_phb *phb = hose->private_data; + struct pnv_phb *phb = pe->phb->private_data; long rc; - rc = opal_pci_get_phb_diag_data2(phb->opal_id, phb->diag.blob, + rc = opal_pci_get_phb_diag_data2(phb->opal_id, pe->data, PNV_PCI_DIAG_BUF_SIZE); - if (rc != OPAL_SUCCESS) { - pr_warning("%s: Failed to get diag-data for PHB#%x (%ld)\n", - __func__, hose->global_number, rc); - return; - } - - pnv_pci_dump_phb_diag_data(hose, phb->diag.blob); + if (rc != OPAL_SUCCESS) + pr_warn("%s: Failed to get diag-data for PHB#%x (%ld)\n", + __func__, pe->phb->global_number, rc); } -/** - * ioda_eeh_get_state - Retrieve the state of PE - * @pe: EEH PE - * - * The PE's state should be retrieved from the PEEV, PEST - * IODA tables. Since the OPAL has exported the function - * to do it, it'd better to use that. - */ -static int ioda_eeh_get_state(struct eeh_pe *pe) +static int ioda_eeh_get_phb_state(struct eeh_pe *pe) { - s64 ret = 0; + struct pnv_phb *phb = pe->phb->private_data; u8 fstate; __be16 pcierr; - u32 pe_no; - int result; - struct pci_controller *hose = pe->phb; - struct pnv_phb *phb = hose->private_data; + s64 rc; + int result = 0; + + rc = opal_pci_eeh_freeze_status(phb->opal_id, + pe->addr, + &fstate, + &pcierr, + NULL); + if (rc != OPAL_SUCCESS) { + pr_warn("%s: Failure %lld getting PHB#%x state\n", + __func__, rc, phb->hose->global_number); + return EEH_STATE_NOT_SUPPORT; + } /* - * Sanity check on PE address. The PHB PE address should - * be zero. + * Check PHB state. If the PHB is frozen for the + * first time, to dump the PHB diag-data. */ - if (pe->addr < 0 || pe->addr >= phb->ioda.total_pe) { - pr_err("%s: PE address %x out of range [0, %x] " - "on PHB#%x\n", - __func__, pe->addr, phb->ioda.total_pe, - hose->global_number); - return EEH_STATE_NOT_SUPPORT; + if (be16_to_cpu(pcierr) != OPAL_EEH_PHB_ERROR) { + result = (EEH_STATE_MMIO_ACTIVE | + EEH_STATE_DMA_ACTIVE | + EEH_STATE_MMIO_ENABLED | + EEH_STATE_DMA_ENABLED); + } else if (!(pe->state & EEH_PE_ISOLATED)) { + eeh_pe_state_mark(pe, EEH_PE_ISOLATED); + ioda_eeh_phb_diag(pe); } + return result; +} + +static int ioda_eeh_get_pe_state(struct eeh_pe *pe) +{ + struct pnv_phb *phb = pe->phb->private_data; + u8 fstate; + __be16 pcierr; + s64 rc; + int result; + /* - * If we're in middle of PE reset, return normal - * state to keep EEH core going. For PHB reset, we - * still expect to have fenced PHB cleared with - * PHB reset. + * We don't clobber hardware frozen state until PE + * reset is completed. In order to keep EEH core + * moving forward, we have to return operational + * state during PE reset. */ - if (!(pe->type & EEH_PE_PHB) && - (pe->state & EEH_PE_RESET)) { - result = (EEH_STATE_MMIO_ACTIVE | - EEH_STATE_DMA_ACTIVE | + if (pe->state & EEH_PE_RESET) { + result = (EEH_STATE_MMIO_ACTIVE | + EEH_STATE_DMA_ACTIVE | EEH_STATE_MMIO_ENABLED | EEH_STATE_DMA_ENABLED); return result; } - /* Retrieve PE status through OPAL */ - pe_no = pe->addr; - ret = opal_pci_eeh_freeze_status(phb->opal_id, pe_no, - &fstate, &pcierr, NULL); - if (ret) { - pr_err("%s: Failed to get EEH status on " - "PHB#%x-PE#%x\n, err=%lld\n", - __func__, hose->global_number, pe_no, ret); - return EEH_STATE_NOT_SUPPORT; - } - - /* Check PHB status */ - if (pe->type & EEH_PE_PHB) { - result = 0; - result &= ~EEH_STATE_RESET_ACTIVE; - - if (be16_to_cpu(pcierr) != OPAL_EEH_PHB_ERROR) { - result |= EEH_STATE_MMIO_ACTIVE; - result |= EEH_STATE_DMA_ACTIVE; - result |= EEH_STATE_MMIO_ENABLED; - result |= EEH_STATE_DMA_ENABLED; - } else if (!(pe->state & EEH_PE_ISOLATED)) { - eeh_pe_state_mark(pe, EEH_PE_ISOLATED); - ioda_eeh_phb_diag(hose); + /* + * Fetch PE state from hardware. If the PHB + * supports compound PE, let it handle that. + */ + if (phb->get_pe_state) { + fstate = phb->get_pe_state(phb, pe->addr); + } else { + rc = opal_pci_eeh_freeze_status(phb->opal_id, + pe->addr, + &fstate, + &pcierr, + NULL); + if (rc != OPAL_SUCCESS) { + pr_warn("%s: Failure %lld getting PHB#%x-PE%x state\n", + __func__, rc, phb->hose->global_number, pe->addr); + return EEH_STATE_NOT_SUPPORT; } - - return result; } - /* Parse result out */ - result = 0; + /* Figure out state */ switch (fstate) { case OPAL_EEH_STOPPED_NOT_FROZEN: - result &= ~EEH_STATE_RESET_ACTIVE; - result |= EEH_STATE_MMIO_ACTIVE; - result |= EEH_STATE_DMA_ACTIVE; - result |= EEH_STATE_MMIO_ENABLED; - result |= EEH_STATE_DMA_ENABLED; + result = (EEH_STATE_MMIO_ACTIVE | + EEH_STATE_DMA_ACTIVE | + EEH_STATE_MMIO_ENABLED | + EEH_STATE_DMA_ENABLED); break; case OPAL_EEH_STOPPED_MMIO_FREEZE: - result &= ~EEH_STATE_RESET_ACTIVE; - result |= EEH_STATE_DMA_ACTIVE; - result |= EEH_STATE_DMA_ENABLED; + result = (EEH_STATE_DMA_ACTIVE | + EEH_STATE_DMA_ENABLED); break; case OPAL_EEH_STOPPED_DMA_FREEZE: - result &= ~EEH_STATE_RESET_ACTIVE; - result |= EEH_STATE_MMIO_ACTIVE; - result |= EEH_STATE_MMIO_ENABLED; + result = (EEH_STATE_MMIO_ACTIVE | + EEH_STATE_MMIO_ENABLED); break; case OPAL_EEH_STOPPED_MMIO_DMA_FREEZE: - result &= ~EEH_STATE_RESET_ACTIVE; + result = 0; break; case OPAL_EEH_STOPPED_RESET: - result |= EEH_STATE_RESET_ACTIVE; + result = EEH_STATE_RESET_ACTIVE; break; case OPAL_EEH_STOPPED_TEMP_UNAVAIL: - result |= EEH_STATE_UNAVAILABLE; + result = EEH_STATE_UNAVAILABLE; break; case OPAL_EEH_STOPPED_PERM_UNAVAIL: - result |= EEH_STATE_NOT_SUPPORT; + result = EEH_STATE_NOT_SUPPORT; break; default: - pr_warning("%s: Unexpected EEH status 0x%x " - "on PHB#%x-PE#%x\n", - __func__, fstate, hose->global_number, pe_no); + result = EEH_STATE_NOT_SUPPORT; + pr_warn("%s: Invalid PHB#%x-PE#%x state %x\n", + __func__, phb->hose->global_number, + pe->addr, fstate); } - /* Dump PHB diag-data for frozen PE */ - if (result != EEH_STATE_NOT_SUPPORT && - (result & (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) != - (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE) && + /* + * If PHB supports compound PE, to freeze all + * slave PEs for consistency. + * + * If the PE is switching to frozen state for the + * first time, to dump the PHB diag-data. + */ + if (!(result & EEH_STATE_NOT_SUPPORT) && + !(result & EEH_STATE_UNAVAILABLE) && + !(result & EEH_STATE_MMIO_ACTIVE) && + !(result & EEH_STATE_DMA_ACTIVE) && !(pe->state & EEH_PE_ISOLATED)) { + if (phb->freeze_pe) + phb->freeze_pe(phb, pe->addr); + eeh_pe_state_mark(pe, EEH_PE_ISOLATED); - ioda_eeh_phb_diag(hose); + ioda_eeh_phb_diag(pe); } return result; } +/** + * ioda_eeh_get_state - Retrieve the state of PE + * @pe: EEH PE + * + * The PE's state should be retrieved from the PEEV, PEST + * IODA tables. Since the OPAL has exported the function + * to do it, it'd better to use that. + */ +static int ioda_eeh_get_state(struct eeh_pe *pe) +{ + struct pnv_phb *phb = pe->phb->private_data; + + /* Sanity check on PE number. PHB PE should have 0 */ + if (pe->addr < 0 || + pe->addr >= phb->ioda.total_pe) { + pr_warn("%s: PHB#%x-PE#%x out of range [0, %x]\n", + __func__, phb->hose->global_number, + pe->addr, phb->ioda.total_pe); + return EEH_STATE_NOT_SUPPORT; + } + + if (pe->type & EEH_PE_PHB) + return ioda_eeh_get_phb_state(pe); + + return ioda_eeh_get_pe_state(pe); +} + static s64 ioda_eeh_phb_poll(struct pnv_phb *phb) { s64 rc = OPAL_HARDWARE; @@ -409,11 +514,11 @@ int ioda_eeh_phb_reset(struct pci_controller *hose, int option) if (option == EEH_RESET_FUNDAMENTAL || option == EEH_RESET_HOT) rc = opal_pci_reset(phb->opal_id, - OPAL_PHB_COMPLETE, + OPAL_RESET_PHB_COMPLETE, OPAL_ASSERT_RESET); else if (option == EEH_RESET_DEACTIVATE) rc = opal_pci_reset(phb->opal_id, - OPAL_PHB_COMPLETE, + OPAL_RESET_PHB_COMPLETE, OPAL_DEASSERT_RESET); if (rc < 0) goto out; @@ -453,15 +558,15 @@ static int ioda_eeh_root_reset(struct pci_controller *hose, int option) */ if (option == EEH_RESET_FUNDAMENTAL) rc = opal_pci_reset(phb->opal_id, - OPAL_PCI_FUNDAMENTAL_RESET, + OPAL_RESET_PCI_FUNDAMENTAL, OPAL_ASSERT_RESET); else if (option == EEH_RESET_HOT) rc = opal_pci_reset(phb->opal_id, - OPAL_PCI_HOT_RESET, + OPAL_RESET_PCI_HOT, OPAL_ASSERT_RESET); else if (option == EEH_RESET_DEACTIVATE) rc = opal_pci_reset(phb->opal_id, - OPAL_PCI_HOT_RESET, + OPAL_RESET_PCI_HOT, OPAL_DEASSERT_RESET); if (rc < 0) goto out; @@ -577,6 +682,31 @@ static int ioda_eeh_reset(struct eeh_pe *pe, int option) if (pe->type & EEH_PE_PHB) { ret = ioda_eeh_phb_reset(hose, option); } else { + struct pnv_phb *phb; + s64 rc; + + /* + * The frozen PE might be caused by PAPR error injection + * registers, which are expected to be cleared after hitting + * frozen PE as stated in the hardware spec. Unfortunately, + * that's not true on P7IOC. So we have to clear it manually + * to avoid recursive EEH errors during recovery. + */ + phb = hose->private_data; + if (phb->model == PNV_PHB_MODEL_P7IOC && + (option == EEH_RESET_HOT || + option == EEH_RESET_FUNDAMENTAL)) { + rc = opal_pci_reset(phb->opal_id, + OPAL_RESET_PHB_ERROR, + OPAL_ASSERT_RESET); + if (rc != OPAL_SUCCESS) { + pr_warn("%s: Failure %lld clearing " + "error injection registers\n", + __func__, rc); + return -EIO; + } + } + bus = eeh_pe_bus_get(pe); if (pci_is_root_bus(bus) || pci_is_root_bus(bus->parent)) @@ -589,6 +719,24 @@ static int ioda_eeh_reset(struct eeh_pe *pe, int option) } /** + * ioda_eeh_get_log - Retrieve error log + * @pe: frozen PE + * @severity: permanent or temporary error + * @drv_log: device driver log + * @len: length of device driver log + * + * Retrieve error log, which contains log from device driver + * and firmware. + */ +static int ioda_eeh_get_log(struct eeh_pe *pe, int severity, + char *drv_log, unsigned long len) +{ + pnv_pci_dump_phb_diag_data(pe->phb, pe->data); + + return 0; +} + +/** * ioda_eeh_configure_bridge - Configure the PCI bridges for the indicated PE * @pe: EEH PE * @@ -602,21 +750,70 @@ static int ioda_eeh_configure_bridge(struct eeh_pe *pe) return 0; } +static int ioda_eeh_err_inject(struct eeh_pe *pe, int type, int func, + unsigned long addr, unsigned long mask) +{ + struct pci_controller *hose = pe->phb; + struct pnv_phb *phb = hose->private_data; + s64 ret; + + /* Sanity check on error type */ + if (type != OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR && + type != OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR64) { + pr_warn("%s: Invalid error type %d\n", + __func__, type); + return -ERANGE; + } + + if (func < OPAL_ERR_INJECT_FUNC_IOA_LD_MEM_ADDR || + func > OPAL_ERR_INJECT_FUNC_IOA_DMA_WR_TARGET) { + pr_warn("%s: Invalid error function %d\n", + __func__, func); + return -ERANGE; + } + + /* Firmware supports error injection ? */ + if (!opal_check_token(OPAL_PCI_ERR_INJECT)) { + pr_warn("%s: Firmware doesn't support error injection\n", + __func__); + return -ENXIO; + } + + /* Do error injection */ + ret = opal_pci_err_inject(phb->opal_id, pe->addr, + type, func, addr, mask); + if (ret != OPAL_SUCCESS) { + pr_warn("%s: Failure %lld injecting error " + "%d-%d to PHB#%x-PE#%x\n", + __func__, ret, type, func, + hose->global_number, pe->addr); + return -EIO; + } + + return 0; +} + static void ioda_eeh_hub_diag_common(struct OpalIoP7IOCErrorData *data) { /* GEM */ - pr_info(" GEM XFIR: %016llx\n", data->gemXfir); - pr_info(" GEM RFIR: %016llx\n", data->gemRfir); - pr_info(" GEM RIRQFIR: %016llx\n", data->gemRirqfir); - pr_info(" GEM Mask: %016llx\n", data->gemMask); - pr_info(" GEM RWOF: %016llx\n", data->gemRwof); + if (data->gemXfir || data->gemRfir || + data->gemRirqfir || data->gemMask || data->gemRwof) + pr_info(" GEM: %016llx %016llx %016llx %016llx %016llx\n", + be64_to_cpu(data->gemXfir), + be64_to_cpu(data->gemRfir), + be64_to_cpu(data->gemRirqfir), + be64_to_cpu(data->gemMask), + be64_to_cpu(data->gemRwof)); /* LEM */ - pr_info(" LEM FIR: %016llx\n", data->lemFir); - pr_info(" LEM Error Mask: %016llx\n", data->lemErrMask); - pr_info(" LEM Action 0: %016llx\n", data->lemAction0); - pr_info(" LEM Action 1: %016llx\n", data->lemAction1); - pr_info(" LEM WOF: %016llx\n", data->lemWof); + if (data->lemFir || data->lemErrMask || + data->lemAction0 || data->lemAction1 || data->lemWof) + pr_info(" LEM: %016llx %016llx %016llx %016llx %016llx\n", + be64_to_cpu(data->lemFir), + be64_to_cpu(data->lemErrMask), + be64_to_cpu(data->lemAction0), + be64_to_cpu(data->lemAction1), + be64_to_cpu(data->lemWof)); } static void ioda_eeh_hub_diag(struct pci_controller *hose) @@ -627,8 +824,8 @@ static void ioda_eeh_hub_diag(struct pci_controller *hose) rc = opal_pci_get_hub_diag_data(phb->hub_id, data, sizeof(*data)); if (rc != OPAL_SUCCESS) { - pr_warning("%s: Failed to get HUB#%llx diag-data (%ld)\n", - __func__, phb->hub_id, rc); + pr_warn("%s: Failed to get HUB#%llx diag-data (%ld)\n", + __func__, phb->hub_id, rc); return; } @@ -636,24 +833,31 @@ static void ioda_eeh_hub_diag(struct pci_controller *hose) case OPAL_P7IOC_DIAG_TYPE_RGC: pr_info("P7IOC diag-data for RGC\n\n"); ioda_eeh_hub_diag_common(data); - pr_info(" RGC Status: %016llx\n", data->rgc.rgcStatus); - pr_info(" RGC LDCP: %016llx\n", data->rgc.rgcLdcp); + if (data->rgc.rgcStatus || data->rgc.rgcLdcp) + pr_info(" RGC: %016llx %016llx\n", + be64_to_cpu(data->rgc.rgcStatus), + be64_to_cpu(data->rgc.rgcLdcp)); break; case OPAL_P7IOC_DIAG_TYPE_BI: pr_info("P7IOC diag-data for BI %s\n\n", data->bi.biDownbound ? "Downbound" : "Upbound"); ioda_eeh_hub_diag_common(data); - pr_info(" BI LDCP 0: %016llx\n", data->bi.biLdcp0); - pr_info(" BI LDCP 1: %016llx\n", data->bi.biLdcp1); - pr_info(" BI LDCP 2: %016llx\n", data->bi.biLdcp2); - pr_info(" BI Fence Status: %016llx\n", data->bi.biFenceStatus); + if (data->bi.biLdcp0 || data->bi.biLdcp1 || + data->bi.biLdcp2 || data->bi.biFenceStatus) + pr_info(" BI: %016llx %016llx %016llx %016llx\n", + be64_to_cpu(data->bi.biLdcp0), + be64_to_cpu(data->bi.biLdcp1), + be64_to_cpu(data->bi.biLdcp2), + be64_to_cpu(data->bi.biFenceStatus)); break; case OPAL_P7IOC_DIAG_TYPE_CI: - pr_info("P7IOC diag-data for CI Port %d\\nn", + pr_info("P7IOC diag-data for CI Port %d\n\n", data->ci.ciPort); ioda_eeh_hub_diag_common(data); - pr_info(" CI Port Status: %016llx\n", data->ci.ciPortStatus); - pr_info(" CI Port LDCP: %016llx\n", data->ci.ciPortLdcp); + if (data->ci.ciPortStatus || data->ci.ciPortLdcp) + pr_info(" CI: %016llx %016llx\n", + be64_to_cpu(data->ci.ciPortStatus), + be64_to_cpu(data->ci.ciPortLdcp)); break; case OPAL_P7IOC_DIAG_TYPE_MISC: pr_info("P7IOC diag-data for MISC\n\n"); @@ -664,30 +868,71 @@ static void ioda_eeh_hub_diag(struct pci_controller *hose) ioda_eeh_hub_diag_common(data); break; default: - pr_warning("%s: Invalid type of HUB#%llx diag-data (%d)\n", - __func__, phb->hub_id, data->type); + pr_warn("%s: Invalid type of HUB#%llx diag-data (%d)\n", + __func__, phb->hub_id, data->type); } } static int ioda_eeh_get_pe(struct pci_controller *hose, u16 pe_no, struct eeh_pe **pe) { - struct eeh_pe *phb_pe, *dev_pe; - struct eeh_dev dev; + struct pnv_phb *phb = hose->private_data; + struct pnv_ioda_pe *pnv_pe; + struct eeh_pe *dev_pe; + struct eeh_dev edev; - /* Find the PHB PE */ - phb_pe = eeh_phb_pe_get(hose); - if (!phb_pe) - return -EEXIST; + /* + * If PHB supports compound PE, to fetch + * the master PE because slave PE is invisible + * to EEH core. + */ + pnv_pe = &phb->ioda.pe_array[pe_no]; + if (pnv_pe->flags & PNV_IODA_PE_SLAVE) { + pnv_pe = pnv_pe->master; + WARN_ON(!pnv_pe || + !(pnv_pe->flags & PNV_IODA_PE_MASTER)); + pe_no = pnv_pe->pe_number; + } /* Find the PE according to PE# */ - memset(&dev, 0, sizeof(struct eeh_dev)); - dev.phb = hose; - dev.pe_config_addr = pe_no; - dev_pe = eeh_pe_get(&dev); - if (!dev_pe) return -EEXIST; + memset(&edev, 0, sizeof(struct eeh_dev)); + edev.phb = hose; + edev.pe_config_addr = pe_no; + dev_pe = eeh_pe_get(&edev); + if (!dev_pe) + return -EEXIST; + /* Freeze the (compound) PE */ *pe = dev_pe; + if (!(dev_pe->state & EEH_PE_ISOLATED)) + phb->freeze_pe(phb, pe_no); + + /* + * At this point, we're sure the (compound) PE should + * have been frozen. However, we still need poke until + * hitting the frozen PE on top level. + */ + dev_pe = dev_pe->parent; + while (dev_pe && !(dev_pe->type & EEH_PE_PHB)) { + int ret; + int active_flags = (EEH_STATE_MMIO_ACTIVE | + EEH_STATE_DMA_ACTIVE); + + ret = eeh_ops->get_state(dev_pe, NULL); + if (ret <= 0 || (ret & active_flags) == active_flags) { + dev_pe = dev_pe->parent; + continue; + } + + /* Frozen parent PE */ + *pe = dev_pe; + if (!(dev_pe->state & EEH_PE_ISOLATED)) + phb->freeze_pe(phb, dev_pe->addr); + + /* Next one */ + dev_pe = dev_pe->parent; + } + return 0; } @@ -792,7 +1037,8 @@ static int ioda_eeh_next_error(struct eeh_pe **pe) "detected, location: %s\n", hose->global_number, eeh_pe_loc_get(phb_pe)); - ioda_eeh_phb_diag(hose); + ioda_eeh_phb_diag(phb_pe); + pnv_pci_dump_phb_diag_data(hose, phb_pe->data); ret = EEH_NEXT_ERR_NONE; } @@ -812,7 +1058,8 @@ static int ioda_eeh_next_error(struct eeh_pe **pe) opal_pci_eeh_freeze_clear(phb->opal_id, frozen_pe_no, OPAL_EEH_ACTION_CLEAR_FREEZE_ALL); ret = EEH_NEXT_ERR_NONE; - } else if ((*pe)->state & EEH_PE_ISOLATED) { + } else if ((*pe)->state & EEH_PE_ISOLATED || + eeh_pe_passed(*pe)) { ret = EEH_NEXT_ERR_NONE; } else { pr_err("EEH: Frozen PE#%x on PHB#%x detected\n", @@ -839,7 +1086,7 @@ static int ioda_eeh_next_error(struct eeh_pe **pe) ret == EEH_NEXT_ERR_FENCED_PHB) && !((*pe)->state & EEH_PE_ISOLATED)) { eeh_pe_state_mark(*pe, EEH_PE_ISOLATED); - ioda_eeh_phb_diag(hose); + ioda_eeh_phb_diag(*pe); } /* @@ -885,6 +1132,8 @@ struct pnv_eeh_ops ioda_eeh_ops = { .set_option = ioda_eeh_set_option, .get_state = ioda_eeh_get_state, .reset = ioda_eeh_reset, + .get_log = ioda_eeh_get_log, .configure_bridge = ioda_eeh_configure_bridge, + .err_inject = ioda_eeh_err_inject, .next_error = ioda_eeh_next_error }; diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c index 56a206f32f7..3e89cbf5588 100644 --- a/arch/powerpc/platforms/powernv/eeh-powernv.c +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c @@ -45,14 +45,31 @@ */ static int powernv_eeh_init(void) { + struct pci_controller *hose; + struct pnv_phb *phb; + /* We require OPALv3 */ if (!firmware_has_feature(FW_FEATURE_OPALv3)) { - pr_warning("%s: OPALv3 is required !\n", __func__); + pr_warn("%s: OPALv3 is required !\n", + __func__); return -EINVAL; } - /* Set EEH probe mode */ - eeh_probe_mode_set(EEH_PROBE_MODE_DEV); + /* Set probe mode */ + eeh_add_flag(EEH_PROBE_MODE_DEV); + + /* + * P7IOC blocks PCI config access to frozen PE, but PHB3 + * doesn't do that. So we have to selectively enable I/O + * prior to collecting error log. + */ + list_for_each_entry(hose, &hose_list, list_node) { + phb = hose->private_data; + + if (phb->model == PNV_PHB_MODEL_P7IOC) + eeh_add_flag(EEH_ENABLE_IO_FOR_LOG); + break; + } return 0; } @@ -107,6 +124,7 @@ static int powernv_eeh_dev_probe(struct pci_dev *dev, void *flag) struct pnv_phb *phb = hose->private_data; struct device_node *dn = pci_device_to_OF_node(dev); struct eeh_dev *edev = of_node_to_eeh_dev(dn); + int ret; /* * When probing the root bridge, which doesn't have any @@ -143,13 +161,27 @@ static int powernv_eeh_dev_probe(struct pci_dev *dev, void *flag) edev->pe_config_addr = phb->bdfn_to_pe(phb, dev->bus, dev->devfn & 0xff); /* Create PE */ - eeh_add_to_parent_pe(edev); + ret = eeh_add_to_parent_pe(edev); + if (ret) { + pr_warn("%s: Can't add PCI dev %s to parent PE (%d)\n", + __func__, pci_name(dev), ret); + return ret; + } + + /* + * Cache the PE primary bus, which can't be fetched when + * full hotplug is in progress. In that case, all child + * PCI devices of the PE are expected to be removed prior + * to PE reset. + */ + if (!edev->pe->bus) + edev->pe->bus = dev->bus; /* * Enable EEH explicitly so that we will do EEH check * while accessing I/O stuff */ - eeh_set_enable(true); + eeh_add_flag(EEH_ENABLED); /* Save memory bars */ eeh_save_bars(edev); @@ -273,8 +305,8 @@ static int powernv_eeh_wait_state(struct eeh_pe *pe, int max_wait) max_wait -= mwait; if (max_wait <= 0) { - pr_warning("%s: Timeout getting PE#%x's state (%d)\n", - __func__, pe->addr, max_wait); + pr_warn("%s: Timeout getting PE#%x's state (%d)\n", + __func__, pe->addr, max_wait); return EEH_STATE_NOT_SUPPORT; } @@ -294,7 +326,7 @@ static int powernv_eeh_wait_state(struct eeh_pe *pe, int max_wait) * Retrieve the temporary or permanent error from the PE. */ static int powernv_eeh_get_log(struct eeh_pe *pe, int severity, - char *drv_log, unsigned long len) + char *drv_log, unsigned long len) { struct pci_controller *hose = pe->phb; struct pnv_phb *phb = hose->private_data; @@ -327,6 +359,31 @@ static int powernv_eeh_configure_bridge(struct eeh_pe *pe) } /** + * powernv_pe_err_inject - Inject specified error to the indicated PE + * @pe: the indicated PE + * @type: error type + * @func: specific error type + * @addr: address + * @mask: address mask + * + * The routine is called to inject specified error, which is + * determined by @type and @func, to the indicated PE for + * testing purpose. + */ +static int powernv_eeh_err_inject(struct eeh_pe *pe, int type, int func, + unsigned long addr, unsigned long mask) +{ + struct pci_controller *hose = pe->phb; + struct pnv_phb *phb = hose->private_data; + int ret = -EEXIST; + + if (phb->eeh_ops && phb->eeh_ops->err_inject) + ret = phb->eeh_ops->err_inject(pe, type, func, addr, mask); + + return ret; +} + +/** * powernv_eeh_next_error - Retrieve next EEH error to handle * @pe: Affected PE * @@ -382,6 +439,7 @@ static struct eeh_ops powernv_eeh_ops = { .wait_state = powernv_eeh_wait_state, .get_log = powernv_eeh_get_log, .configure_bridge = powernv_eeh_configure_bridge, + .err_inject = powernv_eeh_err_inject, .read_config = pnv_pci_cfg_read, .write_config = pnv_pci_cfg_write, .next_error = powernv_eeh_next_error, @@ -398,9 +456,7 @@ static int __init eeh_powernv_init(void) { int ret = -EINVAL; - if (!machine_is(powernv)) - return ret; - + eeh_set_pe_aux_size(PNV_PCI_DIAG_BUF_SIZE); ret = eeh_ops_register(&powernv_eeh_ops); if (!ret) pr_info("EEH: PowerNV platform initialized\n"); @@ -409,5 +465,4 @@ static int __init eeh_powernv_init(void) return ret; } - -early_initcall(eeh_powernv_init); +machine_early_initcall(powernv, eeh_powernv_init); diff --git a/arch/powerpc/platforms/powernv/opal-async.c b/arch/powerpc/platforms/powernv/opal-async.c index 32e2adfa532..e462ab947d1 100644 --- a/arch/powerpc/platforms/powernv/opal-async.c +++ b/arch/powerpc/platforms/powernv/opal-async.c @@ -20,6 +20,7 @@ #include <linux/wait.h> #include <linux/gfp.h> #include <linux/of.h> +#include <asm/machdep.h> #include <asm/opal.h> #define N_ASYNC_COMPLETIONS 64 @@ -201,4 +202,4 @@ out_opal_node: out: return err; } -subsys_initcall(opal_async_comp_init); +machine_subsys_initcall(powernv, opal_async_comp_init); diff --git a/arch/powerpc/platforms/powernv/opal-dump.c b/arch/powerpc/platforms/powernv/opal-dump.c index 788a1977b9a..23260f7dfa7 100644 --- a/arch/powerpc/platforms/powernv/opal-dump.c +++ b/arch/powerpc/platforms/powernv/opal-dump.c @@ -102,9 +102,9 @@ static ssize_t dump_ack_store(struct dump_obj *dump_obj, * due to the dynamic size of the dump */ static struct dump_attribute id_attribute = - __ATTR(id, 0666, dump_id_show, NULL); + __ATTR(id, S_IRUGO, dump_id_show, NULL); static struct dump_attribute type_attribute = - __ATTR(type, 0666, dump_type_show, NULL); + __ATTR(type, S_IRUGO, dump_type_show, NULL); static struct dump_attribute ack_attribute = __ATTR(acknowledge, 0660, dump_ack_show, dump_ack_store); @@ -112,7 +112,7 @@ static ssize_t init_dump_show(struct dump_obj *dump_obj, struct dump_attribute *attr, char *buf) { - return sprintf(buf, "1 - initiate dump\n"); + return sprintf(buf, "1 - initiate Service Processor(FSP) dump\n"); } static int64_t dump_fips_init(uint8_t type) @@ -121,7 +121,7 @@ static int64_t dump_fips_init(uint8_t type) rc = opal_dump_init(type); if (rc) - pr_warn("%s: Failed to initiate FipS dump (%d)\n", + pr_warn("%s: Failed to initiate FSP dump (%d)\n", __func__, rc); return rc; } @@ -131,8 +131,12 @@ static ssize_t init_dump_store(struct dump_obj *dump_obj, const char *buf, size_t count) { - dump_fips_init(DUMP_TYPE_FSP); - pr_info("%s: Initiated FSP dump\n", __func__); + int rc; + + rc = dump_fips_init(DUMP_TYPE_FSP); + if (rc == OPAL_SUCCESS) + pr_info("%s: Initiated FSP dump\n", __func__); + return count; } @@ -297,7 +301,7 @@ static ssize_t dump_attr_read(struct file *filep, struct kobject *kobj, * and rely on userspace to ask us to try * again. */ - pr_info("%s: Platform dump partially read.ID = 0x%x\n", + pr_info("%s: Platform dump partially read. ID = 0x%x\n", __func__, dump->id); return -EIO; } @@ -423,6 +427,10 @@ void __init opal_platform_dump_init(void) { int rc; + /* ELOG not supported by firmware */ + if (!opal_check_token(OPAL_DUMP_READ)) + return; + dump_kset = kset_create_and_add("dump", NULL, opal_kobj); if (!dump_kset) { pr_warn("%s: Failed to create dump kset\n", __func__); diff --git a/arch/powerpc/platforms/powernv/opal-elog.c b/arch/powerpc/platforms/powernv/opal-elog.c index 0ad533b617f..518fe95dbf2 100644 --- a/arch/powerpc/platforms/powernv/opal-elog.c +++ b/arch/powerpc/platforms/powernv/opal-elog.c @@ -82,9 +82,9 @@ static ssize_t elog_ack_store(struct elog_obj *elog_obj, } static struct elog_attribute id_attribute = - __ATTR(id, 0666, elog_id_show, NULL); + __ATTR(id, S_IRUGO, elog_id_show, NULL); static struct elog_attribute type_attribute = - __ATTR(type, 0666, elog_type_show, NULL); + __ATTR(type, S_IRUGO, elog_type_show, NULL); static struct elog_attribute ack_attribute = __ATTR(acknowledge, 0660, elog_ack_show, elog_ack_store); @@ -295,6 +295,10 @@ int __init opal_elog_init(void) { int rc = 0; + /* ELOG not supported by firmware */ + if (!opal_check_token(OPAL_ELOG_READ)) + return -1; + elog_kset = kset_create_and_add("elog", NULL, opal_kobj); if (!elog_kset) { pr_warn("%s: failed to create elog kset\n", __func__); diff --git a/arch/powerpc/platforms/powernv/opal-hmi.c b/arch/powerpc/platforms/powernv/opal-hmi.c new file mode 100644 index 00000000000..5e1ed1575aa --- /dev/null +++ b/arch/powerpc/platforms/powernv/opal-hmi.c @@ -0,0 +1,189 @@ +/* + * OPAL hypervisor Maintenance interrupt handling support in PowreNV. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; If not, see <http://www.gnu.org/licenses/>. + * + * Copyright 2014 IBM Corporation + * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com> + */ + +#undef DEBUG + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/of.h> +#include <linux/mm.h> +#include <linux/slab.h> + +#include <asm/opal.h> +#include <asm/cputable.h> +#include <asm/machdep.h> + +static int opal_hmi_handler_nb_init; +struct OpalHmiEvtNode { + struct list_head list; + struct OpalHMIEvent hmi_evt; +}; +static LIST_HEAD(opal_hmi_evt_list); +static DEFINE_SPINLOCK(opal_hmi_evt_lock); + +static void print_hmi_event_info(struct OpalHMIEvent *hmi_evt) +{ + const char *level, *sevstr, *error_info; + static const char *hmi_error_types[] = { + "Malfunction Alert", + "Processor Recovery done", + "Processor recovery occurred again", + "Processor recovery occurred for masked error", + "Timer facility experienced an error", + "TFMR SPR is corrupted", + "UPS (Uniterrupted Power System) Overflow indication", + "An XSCOM operation failure", + "An XSCOM operation completed", + "SCOM has set a reserved FIR bit to cause recovery", + "Debug trigger has set a reserved FIR bit to cause recovery", + "A hypervisor resource error occurred" + }; + + /* Print things out */ + if (hmi_evt->version != OpalHMIEvt_V1) { + pr_err("HMI Interrupt, Unknown event version %d !\n", + hmi_evt->version); + return; + } + switch (hmi_evt->severity) { + case OpalHMI_SEV_NO_ERROR: + level = KERN_INFO; + sevstr = "Harmless"; + break; + case OpalHMI_SEV_WARNING: + level = KERN_WARNING; + sevstr = ""; + break; + case OpalHMI_SEV_ERROR_SYNC: + level = KERN_ERR; + sevstr = "Severe"; + break; + case OpalHMI_SEV_FATAL: + default: + level = KERN_ERR; + sevstr = "Fatal"; + break; + } + + printk("%s%s Hypervisor Maintenance interrupt [%s]\n", + level, sevstr, + hmi_evt->disposition == OpalHMI_DISPOSITION_RECOVERED ? + "Recovered" : "Not recovered"); + error_info = hmi_evt->type < ARRAY_SIZE(hmi_error_types) ? + hmi_error_types[hmi_evt->type] + : "Unknown"; + printk("%s Error detail: %s\n", level, error_info); + printk("%s HMER: %016llx\n", level, be64_to_cpu(hmi_evt->hmer)); + if ((hmi_evt->type == OpalHMI_ERROR_TFAC) || + (hmi_evt->type == OpalHMI_ERROR_TFMR_PARITY)) + printk("%s TFMR: %016llx\n", level, + be64_to_cpu(hmi_evt->tfmr)); +} + +static void hmi_event_handler(struct work_struct *work) +{ + unsigned long flags; + struct OpalHMIEvent *hmi_evt; + struct OpalHmiEvtNode *msg_node; + uint8_t disposition; + + spin_lock_irqsave(&opal_hmi_evt_lock, flags); + while (!list_empty(&opal_hmi_evt_list)) { + msg_node = list_entry(opal_hmi_evt_list.next, + struct OpalHmiEvtNode, list); + list_del(&msg_node->list); + spin_unlock_irqrestore(&opal_hmi_evt_lock, flags); + + hmi_evt = (struct OpalHMIEvent *) &msg_node->hmi_evt; + print_hmi_event_info(hmi_evt); + disposition = hmi_evt->disposition; + kfree(msg_node); + + /* + * Check if HMI event has been recovered or not. If not + * then we can't continue, invoke panic. + */ + if (disposition != OpalHMI_DISPOSITION_RECOVERED) + panic("Unrecoverable HMI exception"); + + spin_lock_irqsave(&opal_hmi_evt_lock, flags); + } + spin_unlock_irqrestore(&opal_hmi_evt_lock, flags); +} + +static DECLARE_WORK(hmi_event_work, hmi_event_handler); +/* + * opal_handle_hmi_event - notifier handler that queues up HMI events + * to be preocessed later. + */ +static int opal_handle_hmi_event(struct notifier_block *nb, + unsigned long msg_type, void *msg) +{ + unsigned long flags; + struct OpalHMIEvent *hmi_evt; + struct opal_msg *hmi_msg = msg; + struct OpalHmiEvtNode *msg_node; + + /* Sanity Checks */ + if (msg_type != OPAL_MSG_HMI_EVT) + return 0; + + /* HMI event info starts from param[0] */ + hmi_evt = (struct OpalHMIEvent *)&hmi_msg->params[0]; + + /* Delay the logging of HMI events to workqueue. */ + msg_node = kzalloc(sizeof(*msg_node), GFP_ATOMIC); + if (!msg_node) { + pr_err("HMI: out of memory, Opal message event not handled\n"); + return -ENOMEM; + } + memcpy(&msg_node->hmi_evt, hmi_evt, sizeof(struct OpalHMIEvent)); + + spin_lock_irqsave(&opal_hmi_evt_lock, flags); + list_add(&msg_node->list, &opal_hmi_evt_list); + spin_unlock_irqrestore(&opal_hmi_evt_lock, flags); + + schedule_work(&hmi_event_work); + return 0; +} + +static struct notifier_block opal_hmi_handler_nb = { + .notifier_call = opal_handle_hmi_event, + .next = NULL, + .priority = 0, +}; + +static int __init opal_hmi_handler_init(void) +{ + int ret; + + if (!opal_hmi_handler_nb_init) { + ret = opal_message_notifier_register( + OPAL_MSG_HMI_EVT, &opal_hmi_handler_nb); + if (ret) { + pr_err("%s: Can't register OPAL event notifier (%d)\n", + __func__, ret); + return ret; + } + opal_hmi_handler_nb_init = 1; + } + return 0; +} +machine_subsys_initcall(powernv, opal_hmi_handler_init); diff --git a/arch/powerpc/platforms/powernv/opal-lpc.c b/arch/powerpc/platforms/powernv/opal-lpc.c index f04b4d8aca5..dd2c285ad17 100644 --- a/arch/powerpc/platforms/powernv/opal-lpc.c +++ b/arch/powerpc/platforms/powernv/opal-lpc.c @@ -191,6 +191,7 @@ static ssize_t lpc_debug_read(struct file *filp, char __user *ubuf, { struct lpc_debugfs_entry *lpc = filp->private_data; u32 data, pos, len, todo; + __be32 bedata; int rc; if (!access_ok(VERIFY_WRITE, ubuf, count)) @@ -213,9 +214,10 @@ static ssize_t lpc_debug_read(struct file *filp, char __user *ubuf, len = 2; } rc = opal_lpc_read(opal_lpc_chip_id, lpc->lpc_type, pos, - &data, len); + &bedata, len); if (rc) return -ENXIO; + data = be32_to_cpu(bedata); switch(len) { case 4: rc = __put_user((u32)data, (u32 __user *)ubuf); @@ -324,7 +326,7 @@ static int opal_lpc_init_debugfs(void) rc |= opal_lpc_debugfs_create_type(root, "fw", OPAL_LPC_FW); return rc; } -device_initcall(opal_lpc_init_debugfs); +machine_device_initcall(powernv, opal_lpc_init_debugfs); #endif /* CONFIG_DEBUG_FS */ void opal_lpc_init(void) diff --git a/arch/powerpc/platforms/powernv/opal-memory-errors.c b/arch/powerpc/platforms/powernv/opal-memory-errors.c index b17a34b695e..43db2136dbf 100644 --- a/arch/powerpc/platforms/powernv/opal-memory-errors.c +++ b/arch/powerpc/platforms/powernv/opal-memory-errors.c @@ -27,6 +27,7 @@ #include <linux/mm.h> #include <linux/slab.h> +#include <asm/machdep.h> #include <asm/opal.h> #include <asm/cputable.h> @@ -143,4 +144,4 @@ static int __init opal_mem_err_init(void) } return 0; } -subsys_initcall(opal_mem_err_init); +machine_subsys_initcall(powernv, opal_mem_err_init); diff --git a/arch/powerpc/platforms/powernv/opal-nvram.c b/arch/powerpc/platforms/powernv/opal-nvram.c index acd9f7e9667..f9896fd5d04 100644 --- a/arch/powerpc/platforms/powernv/opal-nvram.c +++ b/arch/powerpc/platforms/powernv/opal-nvram.c @@ -78,7 +78,7 @@ void __init opal_nvram_init(void) } nvram_size = be32_to_cpup(nbytes_p); - printk(KERN_INFO "OPAL nvram setup, %u bytes\n", nvram_size); + pr_info("OPAL nvram setup, %u bytes\n", nvram_size); of_node_put(np); ppc_md.nvram_read = opal_nvram_read; diff --git a/arch/powerpc/platforms/powernv/opal-rtc.c b/arch/powerpc/platforms/powernv/opal-rtc.c index b1885db8fdf..499707ddaa9 100644 --- a/arch/powerpc/platforms/powernv/opal-rtc.c +++ b/arch/powerpc/platforms/powernv/opal-rtc.c @@ -42,6 +42,9 @@ unsigned long __init opal_get_boot_time(void) __be64 __h_m_s_ms; long rc = OPAL_BUSY; + if (!opal_check_token(OPAL_RTC_READ)) + goto out; + while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) { rc = opal_rtc_read(&__y_m_d, &__h_m_s_ms); if (rc == OPAL_BUSY_EVENT) @@ -49,16 +52,18 @@ unsigned long __init opal_get_boot_time(void) else mdelay(10); } - if (rc != OPAL_SUCCESS) { - ppc_md.get_rtc_time = NULL; - ppc_md.set_rtc_time = NULL; - return 0; - } + if (rc != OPAL_SUCCESS) + goto out; + y_m_d = be32_to_cpu(__y_m_d); h_m_s_ms = be64_to_cpu(__h_m_s_ms); opal_to_tm(y_m_d, h_m_s_ms, &tm); return mktime(tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday, tm.tm_hour, tm.tm_min, tm.tm_sec); +out: + ppc_md.get_rtc_time = NULL; + ppc_md.set_rtc_time = NULL; + return 0; } void opal_get_rtc_time(struct rtc_time *tm) diff --git a/arch/powerpc/platforms/powernv/opal-tracepoints.c b/arch/powerpc/platforms/powernv/opal-tracepoints.c new file mode 100644 index 00000000000..ae14c40b4b1 --- /dev/null +++ b/arch/powerpc/platforms/powernv/opal-tracepoints.c @@ -0,0 +1,84 @@ +#include <linux/percpu.h> +#include <linux/jump_label.h> +#include <asm/trace.h> + +#ifdef HAVE_JUMP_LABEL +struct static_key opal_tracepoint_key = STATIC_KEY_INIT; + +void opal_tracepoint_regfunc(void) +{ + static_key_slow_inc(&opal_tracepoint_key); +} + +void opal_tracepoint_unregfunc(void) +{ + static_key_slow_dec(&opal_tracepoint_key); +} +#else +/* + * We optimise OPAL calls by placing opal_tracepoint_refcount + * directly in the TOC so we can check if the opal tracepoints are + * enabled via a single load. + */ + +/* NB: reg/unreg are called while guarded with the tracepoints_mutex */ +extern long opal_tracepoint_refcount; + +void opal_tracepoint_regfunc(void) +{ + opal_tracepoint_refcount++; +} + +void opal_tracepoint_unregfunc(void) +{ + opal_tracepoint_refcount--; +} +#endif + +/* + * Since the tracing code might execute OPAL calls we need to guard against + * recursion. + */ +static DEFINE_PER_CPU(unsigned int, opal_trace_depth); + +void __trace_opal_entry(unsigned long opcode, unsigned long *args) +{ + unsigned long flags; + unsigned int *depth; + + local_irq_save(flags); + + depth = &__get_cpu_var(opal_trace_depth); + + if (*depth) + goto out; + + (*depth)++; + preempt_disable(); + trace_opal_entry(opcode, args); + (*depth)--; + +out: + local_irq_restore(flags); +} + +void __trace_opal_exit(long opcode, unsigned long retval) +{ + unsigned long flags; + unsigned int *depth; + + local_irq_save(flags); + + depth = &__get_cpu_var(opal_trace_depth); + + if (*depth) + goto out; + + (*depth)++; + trace_opal_exit(opcode, retval); + preempt_enable(); + (*depth)--; + +out: + local_irq_restore(flags); +} diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S index 4abbff22a61..e9e2450c1fd 100644 --- a/arch/powerpc/platforms/powernv/opal-wrappers.S +++ b/arch/powerpc/platforms/powernv/opal-wrappers.S @@ -13,30 +13,69 @@ #include <asm/hvcall.h> #include <asm/asm-offsets.h> #include <asm/opal.h> +#include <asm/jump_label.h> + + .section ".text" + +#ifdef CONFIG_TRACEPOINTS +#ifdef CONFIG_JUMP_LABEL +#define OPAL_BRANCH(LABEL) \ + ARCH_STATIC_BRANCH(LABEL, opal_tracepoint_key) +#else + + .section ".toc","aw" + + .globl opal_tracepoint_refcount +opal_tracepoint_refcount: + .llong 0 + + .section ".text" + +/* + * We branch around this in early init by using an unconditional cpu + * feature. + */ +#define OPAL_BRANCH(LABEL) \ +BEGIN_FTR_SECTION; \ + b 1f; \ +END_FTR_SECTION(0, 1); \ + ld r12,opal_tracepoint_refcount@toc(r2); \ + std r12,32(r1); \ + cmpdi r12,0; \ + bne- LABEL; \ +1: + +#endif + +#else +#define OPAL_BRANCH(LABEL) +#endif /* TODO: * * - Trace irqs in/off (needs saving/restoring all args, argh...) * - Get r11 feed up by Dave so I can have better register usage */ + #define OPAL_CALL(name, token) \ _GLOBAL(name); \ mflr r0; \ - mfcr r12; \ std r0,16(r1); \ + li r0,token; \ + OPAL_BRANCH(opal_tracepoint_entry) \ + mfcr r12; \ stw r12,8(r1); \ std r1,PACAR1(r13); \ - li r0,0; \ + li r11,0; \ mfmsr r12; \ - ori r0,r0,MSR_EE; \ + ori r11,r11,MSR_EE; \ std r12,PACASAVEDMSR(r13); \ - andc r12,r12,r0; \ + andc r12,r12,r11; \ mtmsrd r12,1; \ - LOAD_REG_ADDR(r0,opal_return); \ - mtlr r0; \ - li r0,MSR_DR|MSR_IR|MSR_LE;\ - andc r12,r12,r0; \ - li r0,token; \ + LOAD_REG_ADDR(r11,opal_return); \ + mtlr r11; \ + li r11,MSR_DR|MSR_IR|MSR_LE;\ + andc r12,r12,r11; \ mtspr SPRN_HSRR1,r12; \ LOAD_REG_ADDR(r11,opal); \ ld r12,8(r11); \ @@ -61,6 +100,64 @@ opal_return: mtcr r4; rfid +#ifdef CONFIG_TRACEPOINTS +opal_tracepoint_entry: + stdu r1,-STACKFRAMESIZE(r1) + std r0,STK_REG(R23)(r1) + std r3,STK_REG(R24)(r1) + std r4,STK_REG(R25)(r1) + std r5,STK_REG(R26)(r1) + std r6,STK_REG(R27)(r1) + std r7,STK_REG(R28)(r1) + std r8,STK_REG(R29)(r1) + std r9,STK_REG(R30)(r1) + std r10,STK_REG(R31)(r1) + mr r3,r0 + addi r4,r1,STK_REG(R24) + bl __trace_opal_entry + ld r0,STK_REG(R23)(r1) + ld r3,STK_REG(R24)(r1) + ld r4,STK_REG(R25)(r1) + ld r5,STK_REG(R26)(r1) + ld r6,STK_REG(R27)(r1) + ld r7,STK_REG(R28)(r1) + ld r8,STK_REG(R29)(r1) + ld r9,STK_REG(R30)(r1) + ld r10,STK_REG(R31)(r1) + LOAD_REG_ADDR(r11,opal_tracepoint_return) + mfcr r12 + std r11,16(r1) + stw r12,8(r1) + std r1,PACAR1(r13) + li r11,0 + mfmsr r12 + ori r11,r11,MSR_EE + std r12,PACASAVEDMSR(r13) + andc r12,r12,r11 + mtmsrd r12,1 + LOAD_REG_ADDR(r11,opal_return) + mtlr r11 + li r11,MSR_DR|MSR_IR|MSR_LE + andc r12,r12,r11 + mtspr SPRN_HSRR1,r12 + LOAD_REG_ADDR(r11,opal) + ld r12,8(r11) + ld r2,0(r11) + mtspr SPRN_HSRR0,r12 + hrfid + +opal_tracepoint_return: + std r3,STK_REG(R31)(r1) + mr r4,r3 + ld r0,STK_REG(R23)(r1) + bl __trace_opal_exit + ld r3,STK_REG(R31)(r1) + addi r1,r1,STACKFRAMESIZE + ld r0,16(r1) + mtlr r0 + blr +#endif + OPAL_CALL(opal_invalid_call, OPAL_INVALID_CALL); OPAL_CALL(opal_console_write, OPAL_CONSOLE_WRITE); OPAL_CALL(opal_console_read, OPAL_CONSOLE_READ); @@ -86,6 +183,8 @@ OPAL_CALL(opal_get_xive, OPAL_GET_XIVE); OPAL_CALL(opal_register_exception_handler, OPAL_REGISTER_OPAL_EXCEPTION_HANDLER); OPAL_CALL(opal_pci_eeh_freeze_status, OPAL_PCI_EEH_FREEZE_STATUS); OPAL_CALL(opal_pci_eeh_freeze_clear, OPAL_PCI_EEH_FREEZE_CLEAR); +OPAL_CALL(opal_pci_eeh_freeze_set, OPAL_PCI_EEH_FREEZE_SET); +OPAL_CALL(opal_pci_err_inject, OPAL_PCI_ERR_INJECT); OPAL_CALL(opal_pci_shpc, OPAL_PCI_SHPC); OPAL_CALL(opal_pci_phb_mmio_enable, OPAL_PCI_PHB_MMIO_ENABLE); OPAL_CALL(opal_pci_set_phb_mem_window, OPAL_PCI_SET_PHB_MEM_WINDOW); @@ -134,6 +233,7 @@ OPAL_CALL(opal_validate_flash, OPAL_FLASH_VALIDATE); OPAL_CALL(opal_manage_flash, OPAL_FLASH_MANAGE); OPAL_CALL(opal_update_flash, OPAL_FLASH_UPDATE); OPAL_CALL(opal_resync_timebase, OPAL_RESYNC_TIMEBASE); +OPAL_CALL(opal_check_token, OPAL_CHECK_TOKEN); OPAL_CALL(opal_dump_init, OPAL_DUMP_INIT); OPAL_CALL(opal_dump_info, OPAL_DUMP_INFO); OPAL_CALL(opal_dump_info2, OPAL_DUMP_INFO2); @@ -146,3 +246,7 @@ OPAL_CALL(opal_sync_host_reboot, OPAL_SYNC_HOST_REBOOT); OPAL_CALL(opal_sensor_read, OPAL_SENSOR_READ); OPAL_CALL(opal_get_param, OPAL_GET_PARAM); OPAL_CALL(opal_set_param, OPAL_SET_PARAM); +OPAL_CALL(opal_handle_hmi, OPAL_HANDLE_HMI); +OPAL_CALL(opal_register_dump_region, OPAL_REGISTER_DUMP_REGION); +OPAL_CALL(opal_unregister_dump_region, OPAL_UNREGISTER_DUMP_REGION); +OPAL_CALL(opal_pci_set_phb_cxl_mode, OPAL_PCI_SET_PHB_CXL_MODE); diff --git a/arch/powerpc/platforms/powernv/opal-xscom.c b/arch/powerpc/platforms/powernv/opal-xscom.c index 4cd2ea6c0db..7634d1c6229 100644 --- a/arch/powerpc/platforms/powernv/opal-xscom.c +++ b/arch/powerpc/platforms/powernv/opal-xscom.c @@ -130,4 +130,4 @@ static int opal_xscom_init(void) scom_init(&opal_scom_controller); return 0; } -arch_initcall(opal_xscom_init); +machine_arch_initcall(powernv, opal_xscom_init); diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index 199975613fe..b642b0562f5 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -22,6 +22,8 @@ #include <linux/kobject.h> #include <linux/delay.h> #include <linux/memblock.h> + +#include <asm/machdep.h> #include <asm/opal.h> #include <asm/firmware.h> #include <asm/mce.h> @@ -103,12 +105,12 @@ int __init early_init_dt_scan_opal(unsigned long node, if (of_flat_dt_is_compatible(node, "ibm,opal-v3")) { powerpc_firmware_features |= FW_FEATURE_OPALv2; powerpc_firmware_features |= FW_FEATURE_OPALv3; - printk("OPAL V3 detected !\n"); + pr_info("OPAL V3 detected !\n"); } else if (of_flat_dt_is_compatible(node, "ibm,opal-v2")) { powerpc_firmware_features |= FW_FEATURE_OPALv2; - printk("OPAL V2 detected !\n"); + pr_info("OPAL V2 detected !\n"); } else { - printk("OPAL V1 detected !\n"); + pr_info("OPAL V1 detected !\n"); } /* Reinit all cores with the right endian */ @@ -192,16 +194,12 @@ static int __init opal_register_exception_handlers(void) * fwnmi area at 0x7000 to provide the glue space to OPAL */ glue = 0x7000; - opal_register_exception_handler(OPAL_HYPERVISOR_MAINTENANCE_HANDLER, - 0, glue); - glue += 128; opal_register_exception_handler(OPAL_SOFTPATCH_HANDLER, 0, glue); #endif return 0; } - -early_initcall(opal_register_exception_handlers); +machine_early_initcall(powernv, opal_register_exception_handlers); int opal_notifier_register(struct notifier_block *nb) { @@ -324,7 +322,7 @@ static void opal_handle_message(void) /* check for errors. */ if (ret) { - pr_warning("%s: Failed to retrive opal message, err=%lld\n", + pr_warning("%s: Failed to retrieve opal message, err=%lld\n", __func__, ret); return; } @@ -368,7 +366,7 @@ static int __init opal_message_init(void) } return 0; } -early_initcall(opal_message_init); +machine_early_initcall(powernv, opal_message_init); int opal_get_chars(uint32_t vtermno, char *buf, int count) { @@ -513,6 +511,46 @@ int opal_machine_check(struct pt_regs *regs) return 0; } +/* Early hmi handler called in real mode. */ +int opal_hmi_exception_early(struct pt_regs *regs) +{ + s64 rc; + + /* + * call opal hmi handler. Pass paca address as token. + * The return value OPAL_SUCCESS is an indication that there is + * an HMI event generated waiting to pull by Linux. + */ + rc = opal_handle_hmi(); + if (rc == OPAL_SUCCESS) { + local_paca->hmi_event_available = 1; + return 1; + } + return 0; +} + +/* HMI exception handler called in virtual mode during check_irq_replay. */ +int opal_handle_hmi_exception(struct pt_regs *regs) +{ + s64 rc; + __be64 evt = 0; + + /* + * Check if HMI event is available. + * if Yes, then call opal_poll_events to pull opal messages and + * process them. + */ + if (!local_paca->hmi_event_available) + return 0; + + local_paca->hmi_event_available = 0; + rc = opal_poll_events(&evt); + if (rc == OPAL_SUCCESS && evt) + opal_do_notifier(be64_to_cpu(evt)); + + return 1; +} + static uint64_t find_recovery_address(uint64_t nip) { int i; @@ -567,6 +605,24 @@ static int opal_sysfs_init(void) return 0; } +static void __init opal_dump_region_init(void) +{ + void *addr; + uint64_t size; + int rc; + + /* Register kernel log buffer */ + addr = log_buf_addr_get(); + size = log_buf_len_get(); + rc = opal_register_dump_region(OPAL_DUMP_REGION_LOG_BUF, + __pa(addr), size); + /* Don't warn if this is just an older OPAL that doesn't + * know about that call + */ + if (rc && rc != OPAL_UNSUPPORTED) + pr_warn("DUMP: Failed to register kernel log buffer. " + "rc = %d\n", rc); +} static int __init opal_init(void) { struct device_node *np, *consoles; @@ -616,6 +672,8 @@ static int __init opal_init(void) /* Create "opal" kobject under /sys/firmware */ rc = opal_sysfs_init(); if (rc == 0) { + /* Setup dump region interface */ + opal_dump_region_init(); /* Setup error log interface */ rc = opal_elog_init(); /* Setup code update interface */ @@ -630,7 +688,7 @@ static int __init opal_init(void) return 0; } -subsys_initcall(opal_init); +machine_subsys_initcall(powernv, opal_init); void opal_shutdown(void) { @@ -656,6 +714,9 @@ void opal_shutdown(void) else mdelay(10); } + + /* Unregister memory dump region */ + opal_unregister_dump_region(OPAL_DUMP_REGION_LOG_BUF); } /* Export this so that test modules can use it */ diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index de19edeaa7a..468a0f23c7f 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -36,41 +36,44 @@ #include <asm/tce.h> #include <asm/xics.h> #include <asm/debug.h> +#include <asm/firmware.h> +#include <asm/pnv-pci.h> + +#include <misc/cxl.h> #include "powernv.h" #include "pci.h" -#define define_pe_printk_level(func, kern_level) \ -static int func(const struct pnv_ioda_pe *pe, const char *fmt, ...) \ -{ \ - struct va_format vaf; \ - va_list args; \ - char pfix[32]; \ - int r; \ - \ - va_start(args, fmt); \ - \ - vaf.fmt = fmt; \ - vaf.va = &args; \ - \ - if (pe->pdev) \ - strlcpy(pfix, dev_name(&pe->pdev->dev), \ - sizeof(pfix)); \ - else \ - sprintf(pfix, "%04x:%02x ", \ - pci_domain_nr(pe->pbus), \ - pe->pbus->number); \ - r = printk(kern_level "pci %s: [PE# %.3d] %pV", \ - pfix, pe->pe_number, &vaf); \ - \ - va_end(args); \ - \ - return r; \ -} \ - -define_pe_printk_level(pe_err, KERN_ERR); -define_pe_printk_level(pe_warn, KERN_WARNING); -define_pe_printk_level(pe_info, KERN_INFO); +static void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level, + const char *fmt, ...) +{ + struct va_format vaf; + va_list args; + char pfix[32]; + + va_start(args, fmt); + + vaf.fmt = fmt; + vaf.va = &args; + + if (pe->pdev) + strlcpy(pfix, dev_name(&pe->pdev->dev), sizeof(pfix)); + else + sprintf(pfix, "%04x:%02x ", + pci_domain_nr(pe->pbus), pe->pbus->number); + + printk("%spci %s: [PE# %.3d] %pV", + level, pfix, pe->pe_number, &vaf); + + va_end(args); +} + +#define pe_err(pe, fmt, ...) \ + pe_level_printk(pe, KERN_ERR, fmt, ##__VA_ARGS__) +#define pe_warn(pe, fmt, ...) \ + pe_level_printk(pe, KERN_WARNING, fmt, ##__VA_ARGS__) +#define pe_info(pe, fmt, ...) \ + pe_level_printk(pe, KERN_INFO, fmt, ##__VA_ARGS__) /* * stdcix is only supposed to be used in hypervisor real mode as per @@ -82,6 +85,12 @@ static inline void __raw_rm_writeq(u64 val, volatile void __iomem *paddr) : : "r" (val), "r" (paddr) : "memory"); } +static inline bool pnv_pci_is_mem_pref_64(unsigned long flags) +{ + return ((flags & (IORESOURCE_MEM_64 | IORESOURCE_PREFETCH)) == + (IORESOURCE_MEM_64 | IORESOURCE_PREFETCH)); +} + static int pnv_ioda_alloc_pe(struct pnv_phb *phb) { unsigned long pe; @@ -106,6 +115,380 @@ static void pnv_ioda_free_pe(struct pnv_phb *phb, int pe) clear_bit(pe, phb->ioda.pe_alloc); } +/* The default M64 BAR is shared by all PEs */ +static int pnv_ioda2_init_m64(struct pnv_phb *phb) +{ + const char *desc; + struct resource *r; + s64 rc; + + /* Configure the default M64 BAR */ + rc = opal_pci_set_phb_mem_window(phb->opal_id, + OPAL_M64_WINDOW_TYPE, + phb->ioda.m64_bar_idx, + phb->ioda.m64_base, + 0, /* unused */ + phb->ioda.m64_size); + if (rc != OPAL_SUCCESS) { + desc = "configuring"; + goto fail; + } + + /* Enable the default M64 BAR */ + rc = opal_pci_phb_mmio_enable(phb->opal_id, + OPAL_M64_WINDOW_TYPE, + phb->ioda.m64_bar_idx, + OPAL_ENABLE_M64_SPLIT); + if (rc != OPAL_SUCCESS) { + desc = "enabling"; + goto fail; + } + + /* Mark the M64 BAR assigned */ + set_bit(phb->ioda.m64_bar_idx, &phb->ioda.m64_bar_alloc); + + /* + * Strip off the segment used by the reserved PE, which is + * expected to be 0 or last one of PE capabicity. + */ + r = &phb->hose->mem_resources[1]; + if (phb->ioda.reserved_pe == 0) + r->start += phb->ioda.m64_segsize; + else if (phb->ioda.reserved_pe == (phb->ioda.total_pe - 1)) + r->end -= phb->ioda.m64_segsize; + else + pr_warn(" Cannot strip M64 segment for reserved PE#%d\n", + phb->ioda.reserved_pe); + + return 0; + +fail: + pr_warn(" Failure %lld %s M64 BAR#%d\n", + rc, desc, phb->ioda.m64_bar_idx); + opal_pci_phb_mmio_enable(phb->opal_id, + OPAL_M64_WINDOW_TYPE, + phb->ioda.m64_bar_idx, + OPAL_DISABLE_M64); + return -EIO; +} + +static void pnv_ioda2_alloc_m64_pe(struct pnv_phb *phb) +{ + resource_size_t sgsz = phb->ioda.m64_segsize; + struct pci_dev *pdev; + struct resource *r; + int base, step, i; + + /* + * Root bus always has full M64 range and root port has + * M64 range used in reality. So we're checking root port + * instead of root bus. + */ + list_for_each_entry(pdev, &phb->hose->bus->devices, bus_list) { + for (i = PCI_BRIDGE_RESOURCES; + i <= PCI_BRIDGE_RESOURCE_END; i++) { + r = &pdev->resource[i]; + if (!r->parent || + !pnv_pci_is_mem_pref_64(r->flags)) + continue; + + base = (r->start - phb->ioda.m64_base) / sgsz; + for (step = 0; step < resource_size(r) / sgsz; step++) + set_bit(base + step, phb->ioda.pe_alloc); + } + } +} + +static int pnv_ioda2_pick_m64_pe(struct pnv_phb *phb, + struct pci_bus *bus, int all) +{ + resource_size_t segsz = phb->ioda.m64_segsize; + struct pci_dev *pdev; + struct resource *r; + struct pnv_ioda_pe *master_pe, *pe; + unsigned long size, *pe_alloc; + bool found; + int start, i, j; + + /* Root bus shouldn't use M64 */ + if (pci_is_root_bus(bus)) + return IODA_INVALID_PE; + + /* We support only one M64 window on each bus */ + found = false; + pci_bus_for_each_resource(bus, r, i) { + if (r && r->parent && + pnv_pci_is_mem_pref_64(r->flags)) { + found = true; + break; + } + } + + /* No M64 window found ? */ + if (!found) + return IODA_INVALID_PE; + + /* Allocate bitmap */ + size = _ALIGN_UP(phb->ioda.total_pe / 8, sizeof(unsigned long)); + pe_alloc = kzalloc(size, GFP_KERNEL); + if (!pe_alloc) { + pr_warn("%s: Out of memory !\n", + __func__); + return IODA_INVALID_PE; + } + + /* + * Figure out reserved PE numbers by the PE + * the its child PEs. + */ + start = (r->start - phb->ioda.m64_base) / segsz; + for (i = 0; i < resource_size(r) / segsz; i++) + set_bit(start + i, pe_alloc); + + if (all) + goto done; + + /* + * If the PE doesn't cover all subordinate buses, + * we need subtract from reserved PEs for children. + */ + list_for_each_entry(pdev, &bus->devices, bus_list) { + if (!pdev->subordinate) + continue; + + pci_bus_for_each_resource(pdev->subordinate, r, i) { + if (!r || !r->parent || + !pnv_pci_is_mem_pref_64(r->flags)) + continue; + + start = (r->start - phb->ioda.m64_base) / segsz; + for (j = 0; j < resource_size(r) / segsz ; j++) + clear_bit(start + j, pe_alloc); + } + } + + /* + * the current bus might not own M64 window and that's all + * contributed by its child buses. For the case, we needn't + * pick M64 dependent PE#. + */ + if (bitmap_empty(pe_alloc, phb->ioda.total_pe)) { + kfree(pe_alloc); + return IODA_INVALID_PE; + } + + /* + * Figure out the master PE and put all slave PEs to master + * PE's list to form compound PE. + */ +done: + master_pe = NULL; + i = -1; + while ((i = find_next_bit(pe_alloc, phb->ioda.total_pe, i + 1)) < + phb->ioda.total_pe) { + pe = &phb->ioda.pe_array[i]; + pe->phb = phb; + pe->pe_number = i; + + if (!master_pe) { + pe->flags |= PNV_IODA_PE_MASTER; + INIT_LIST_HEAD(&pe->slaves); + master_pe = pe; + } else { + pe->flags |= PNV_IODA_PE_SLAVE; + pe->master = master_pe; + list_add_tail(&pe->list, &master_pe->slaves); + } + } + + kfree(pe_alloc); + return master_pe->pe_number; +} + +static void __init pnv_ioda_parse_m64_window(struct pnv_phb *phb) +{ + struct pci_controller *hose = phb->hose; + struct device_node *dn = hose->dn; + struct resource *res; + const u32 *r; + u64 pci_addr; + + if (!firmware_has_feature(FW_FEATURE_OPALv3)) { + pr_info(" Firmware too old to support M64 window\n"); + return; + } + + r = of_get_property(dn, "ibm,opal-m64-window", NULL); + if (!r) { + pr_info(" No <ibm,opal-m64-window> on %s\n", + dn->full_name); + return; + } + + /* FIXME: Support M64 for P7IOC */ + if (phb->type != PNV_PHB_IODA2) { + pr_info(" Not support M64 window\n"); + return; + } + + res = &hose->mem_resources[1]; + res->start = of_translate_address(dn, r + 2); + res->end = res->start + of_read_number(r + 4, 2) - 1; + res->flags = (IORESOURCE_MEM | IORESOURCE_MEM_64 | IORESOURCE_PREFETCH); + pci_addr = of_read_number(r, 2); + hose->mem_offset[1] = res->start - pci_addr; + + phb->ioda.m64_size = resource_size(res); + phb->ioda.m64_segsize = phb->ioda.m64_size / phb->ioda.total_pe; + phb->ioda.m64_base = pci_addr; + + /* Use last M64 BAR to cover M64 window */ + phb->ioda.m64_bar_idx = 15; + phb->init_m64 = pnv_ioda2_init_m64; + phb->alloc_m64_pe = pnv_ioda2_alloc_m64_pe; + phb->pick_m64_pe = pnv_ioda2_pick_m64_pe; +} + +static void pnv_ioda_freeze_pe(struct pnv_phb *phb, int pe_no) +{ + struct pnv_ioda_pe *pe = &phb->ioda.pe_array[pe_no]; + struct pnv_ioda_pe *slave; + s64 rc; + + /* Fetch master PE */ + if (pe->flags & PNV_IODA_PE_SLAVE) { + pe = pe->master; + WARN_ON(!pe || !(pe->flags & PNV_IODA_PE_MASTER)); + pe_no = pe->pe_number; + } + + /* Freeze master PE */ + rc = opal_pci_eeh_freeze_set(phb->opal_id, + pe_no, + OPAL_EEH_ACTION_SET_FREEZE_ALL); + if (rc != OPAL_SUCCESS) { + pr_warn("%s: Failure %lld freezing PHB#%x-PE#%x\n", + __func__, rc, phb->hose->global_number, pe_no); + return; + } + + /* Freeze slave PEs */ + if (!(pe->flags & PNV_IODA_PE_MASTER)) + return; + + list_for_each_entry(slave, &pe->slaves, list) { + rc = opal_pci_eeh_freeze_set(phb->opal_id, + slave->pe_number, + OPAL_EEH_ACTION_SET_FREEZE_ALL); + if (rc != OPAL_SUCCESS) + pr_warn("%s: Failure %lld freezing PHB#%x-PE#%x\n", + __func__, rc, phb->hose->global_number, + slave->pe_number); + } +} + +static int pnv_ioda_unfreeze_pe(struct pnv_phb *phb, int pe_no, int opt) +{ + struct pnv_ioda_pe *pe, *slave; + s64 rc; + + /* Find master PE */ + pe = &phb->ioda.pe_array[pe_no]; + if (pe->flags & PNV_IODA_PE_SLAVE) { + pe = pe->master; + WARN_ON(!pe || !(pe->flags & PNV_IODA_PE_MASTER)); + pe_no = pe->pe_number; + } + + /* Clear frozen state for master PE */ + rc = opal_pci_eeh_freeze_clear(phb->opal_id, pe_no, opt); + if (rc != OPAL_SUCCESS) { + pr_warn("%s: Failure %lld clear %d on PHB#%x-PE#%x\n", + __func__, rc, opt, phb->hose->global_number, pe_no); + return -EIO; + } + + if (!(pe->flags & PNV_IODA_PE_MASTER)) + return 0; + + /* Clear frozen state for slave PEs */ + list_for_each_entry(slave, &pe->slaves, list) { + rc = opal_pci_eeh_freeze_clear(phb->opal_id, + slave->pe_number, + opt); + if (rc != OPAL_SUCCESS) { + pr_warn("%s: Failure %lld clear %d on PHB#%x-PE#%x\n", + __func__, rc, opt, phb->hose->global_number, + slave->pe_number); + return -EIO; + } + } + + return 0; +} + +static int pnv_ioda_get_pe_state(struct pnv_phb *phb, int pe_no) +{ + struct pnv_ioda_pe *slave, *pe; + u8 fstate, state; + __be16 pcierr; + s64 rc; + + /* Sanity check on PE number */ + if (pe_no < 0 || pe_no >= phb->ioda.total_pe) + return OPAL_EEH_STOPPED_PERM_UNAVAIL; + + /* + * Fetch the master PE and the PE instance might be + * not initialized yet. + */ + pe = &phb->ioda.pe_array[pe_no]; + if (pe->flags & PNV_IODA_PE_SLAVE) { + pe = pe->master; + WARN_ON(!pe || !(pe->flags & PNV_IODA_PE_MASTER)); + pe_no = pe->pe_number; + } + + /* Check the master PE */ + rc = opal_pci_eeh_freeze_status(phb->opal_id, pe_no, + &state, &pcierr, NULL); + if (rc != OPAL_SUCCESS) { + pr_warn("%s: Failure %lld getting " + "PHB#%x-PE#%x state\n", + __func__, rc, + phb->hose->global_number, pe_no); + return OPAL_EEH_STOPPED_TEMP_UNAVAIL; + } + + /* Check the slave PE */ + if (!(pe->flags & PNV_IODA_PE_MASTER)) + return state; + + list_for_each_entry(slave, &pe->slaves, list) { + rc = opal_pci_eeh_freeze_status(phb->opal_id, + slave->pe_number, + &fstate, + &pcierr, + NULL); + if (rc != OPAL_SUCCESS) { + pr_warn("%s: Failure %lld getting " + "PHB#%x-PE#%x state\n", + __func__, rc, + phb->hose->global_number, slave->pe_number); + return OPAL_EEH_STOPPED_TEMP_UNAVAIL; + } + + /* + * Override the result based on the ascending + * priority. + */ + if (fstate > state) + state = fstate; + } + + return state; +} + /* Currently those 2 are only used when MSIs are enabled, this will change * but in the meantime, we need to protect them to avoid warnings */ @@ -363,9 +746,16 @@ static void pnv_ioda_setup_bus_PE(struct pci_bus *bus, int all) struct pci_controller *hose = pci_bus_to_host(bus); struct pnv_phb *phb = hose->private_data; struct pnv_ioda_pe *pe; - int pe_num; + int pe_num = IODA_INVALID_PE; + + /* Check if PE is determined by M64 */ + if (phb->pick_m64_pe) + pe_num = phb->pick_m64_pe(phb, bus, all); + + /* The PE number isn't pinned by M64 */ + if (pe_num == IODA_INVALID_PE) + pe_num = pnv_ioda_alloc_pe(phb); - pe_num = pnv_ioda_alloc_pe(phb); if (pe_num == IODA_INVALID_PE) { pr_warning("%s: Not enough PE# available for PCI bus %04x:%02x\n", __func__, pci_domain_nr(bus), bus->number); @@ -373,7 +763,7 @@ static void pnv_ioda_setup_bus_PE(struct pci_bus *bus, int all) } pe = &phb->ioda.pe_array[pe_num]; - pe->flags = (all ? PNV_IODA_PE_BUS_ALL : PNV_IODA_PE_BUS); + pe->flags |= (all ? PNV_IODA_PE_BUS_ALL : PNV_IODA_PE_BUS); pe->pbus = bus; pe->pdev = NULL; pe->tce32_seg = -1; @@ -441,8 +831,15 @@ static void pnv_ioda_setup_PEs(struct pci_bus *bus) static void pnv_pci_ioda_setup_PEs(void) { struct pci_controller *hose, *tmp; + struct pnv_phb *phb; list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { + phb = hose->private_data; + + /* M64 layout might affect PE allocation */ + if (phb->alloc_m64_pe) + phb->alloc_m64_pe(phb); + pnv_ioda_setup_PEs(hose->bus); } } @@ -462,7 +859,7 @@ static void pnv_pci_ioda_dma_dev_setup(struct pnv_phb *phb, struct pci_dev *pdev pe = &phb->ioda.pe_array[pdn->pe_number]; WARN_ON(get_dma_ops(&pdev->dev) != &dma_iommu_ops); - set_iommu_table_base(&pdev->dev, &pe->tce32_table); + set_iommu_table_base_and_group(&pdev->dev, &pe->tce32_table); } static int pnv_pci_ioda_dma_set_mask(struct pnv_phb *phb, @@ -491,17 +888,48 @@ static int pnv_pci_ioda_dma_set_mask(struct pnv_phb *phb, set_dma_ops(&pdev->dev, &dma_iommu_ops); set_iommu_table_base(&pdev->dev, &pe->tce32_table); } + *pdev->dev.dma_mask = dma_mask; return 0; } -static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe, struct pci_bus *bus) +static u64 pnv_pci_ioda_dma_get_required_mask(struct pnv_phb *phb, + struct pci_dev *pdev) +{ + struct pci_dn *pdn = pci_get_pdn(pdev); + struct pnv_ioda_pe *pe; + u64 end, mask; + + if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE)) + return 0; + + pe = &phb->ioda.pe_array[pdn->pe_number]; + if (!pe->tce_bypass_enabled) + return __dma_get_required_mask(&pdev->dev); + + + end = pe->tce_bypass_base + memblock_end_of_DRAM(); + mask = 1ULL << (fls64(end) - 1); + mask += mask - 1; + + return mask; +} + +static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe, + struct pci_bus *bus, + bool add_to_iommu_group) { struct pci_dev *dev; list_for_each_entry(dev, &bus->devices, bus_list) { - set_iommu_table_base_and_group(&dev->dev, &pe->tce32_table); + if (add_to_iommu_group) + set_iommu_table_base_and_group(&dev->dev, + &pe->tce32_table); + else + set_iommu_table_base(&dev->dev, &pe->tce32_table); + if (dev->subordinate) - pnv_ioda_setup_bus_dma(pe, dev->subordinate); + pnv_ioda_setup_bus_dma(pe, dev->subordinate, + add_to_iommu_group); } } @@ -513,15 +941,16 @@ static void pnv_pci_ioda1_tce_invalidate(struct pnv_ioda_pe *pe, (__be64 __iomem *)pe->tce_inval_reg_phys : (__be64 __iomem *)tbl->it_index; unsigned long start, end, inc; + const unsigned shift = tbl->it_page_shift; start = __pa(startp); end = __pa(endp); /* BML uses this case for p6/p7/galaxy2: Shift addr and put in node */ if (tbl->it_busno) { - start <<= 12; - end <<= 12; - inc = 128 << 12; + start <<= shift; + end <<= shift; + inc = 128ull << shift; start |= tbl->it_busno; end |= tbl->it_busno; } else if (tbl->it_type & TCE_PCI_SWINV_PAIR) { @@ -559,18 +988,19 @@ static void pnv_pci_ioda2_tce_invalidate(struct pnv_ioda_pe *pe, __be64 __iomem *invalidate = rm ? (__be64 __iomem *)pe->tce_inval_reg_phys : (__be64 __iomem *)tbl->it_index; + const unsigned shift = tbl->it_page_shift; /* We'll invalidate DMA address in PE scope */ - start = 0x2ul << 60; + start = 0x2ull << 60; start |= (pe->pe_number & 0xFF); end = start; /* Figure out the start, end and step */ inc = tbl->it_offset + (((u64)startp - tbl->it_base) / sizeof(u64)); - start |= (inc << 12); + start |= (inc << shift); inc = tbl->it_offset + (((u64)endp - tbl->it_base) / sizeof(u64)); - end |= (inc << 12); - inc = (0x1ul << 12); + end |= (inc << shift); + inc = (0x1ull << shift); mb(); while (start <= end) { @@ -654,7 +1084,7 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb, /* Setup linux iommu table */ tbl = &pe->tce32_table; pnv_pci_setup_iommu_table(tbl, addr, TCE32_TABLE_SIZE * segs, - base << 28); + base << 28, IOMMU_PAGE_SHIFT_4K); /* OPAL variant of P7IOC SW invalidated TCEs */ swinvp = of_get_property(phb->hose->dn, "ibm,opal-tce-kill", NULL); @@ -677,7 +1107,7 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb, if (pe->pdev) set_iommu_table_base_and_group(&pe->pdev->dev, tbl); else - pnv_ioda_setup_bus_dma(pe, pe->pbus); + pnv_ioda_setup_bus_dma(pe, pe->pbus, true); return; fail: @@ -713,11 +1143,15 @@ static void pnv_pci_ioda2_set_bypass(struct iommu_table *tbl, bool enable) 0); /* - * We might want to reset the DMA ops of all devices on - * this PE. However in theory, that shouldn't be necessary - * as this is used for VFIO/KVM pass-through and the device - * hasn't yet been returned to its kernel driver + * EEH needs the mapping between IOMMU table and group + * of those VFIO/KVM pass-through devices. We can postpone + * resetting DMA ops until the DMA mask is configured in + * host side. */ + if (pe->pdev) + set_iommu_table_base(&pe->pdev->dev, tbl); + else + pnv_ioda_setup_bus_dma(pe, pe->pbus, false); } if (rc) pe_err(pe, "OPAL error %lld configuring bypass window\n", rc); @@ -784,7 +1218,8 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, /* Setup linux iommu table */ tbl = &pe->tce32_table; - pnv_pci_setup_iommu_table(tbl, addr, tce_table_size, 0); + pnv_pci_setup_iommu_table(tbl, addr, tce_table_size, 0, + IOMMU_PAGE_SHIFT_4K); /* OPAL variant of PHB3 invalidated TCEs */ swinvp = of_get_property(phb->hose->dn, "ibm,opal-tce-kill", NULL); @@ -805,7 +1240,7 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, if (pe->pdev) set_iommu_table_base_and_group(&pe->pdev->dev, tbl); else - pnv_ioda_setup_bus_dma(pe, pe->pbus); + pnv_ioda_setup_bus_dma(pe, pe->pbus, true); /* Also create a bypass window */ pnv_pci_ioda2_setup_bypass_pe(phb, pe); @@ -895,14 +1330,186 @@ static void pnv_ioda2_msi_eoi(struct irq_data *d) icp_native_eoi(d); } + +static void set_msi_irq_chip(struct pnv_phb *phb, unsigned int virq) +{ + struct irq_data *idata; + struct irq_chip *ichip; + + if (phb->type != PNV_PHB_IODA2) + return; + + if (!phb->ioda.irq_chip_init) { + /* + * First time we setup an MSI IRQ, we need to setup the + * corresponding IRQ chip to route correctly. + */ + idata = irq_get_irq_data(virq); + ichip = irq_data_get_irq_chip(idata); + phb->ioda.irq_chip_init = 1; + phb->ioda.irq_chip = *ichip; + phb->ioda.irq_chip.irq_eoi = pnv_ioda2_msi_eoi; + } + irq_set_chip(virq, &phb->ioda.irq_chip); +} + +#ifdef CONFIG_CXL_BASE + +struct device_node *pnv_pci_to_phb_node(struct pci_dev *dev) +{ + struct pci_controller *hose = pci_bus_to_host(dev->bus); + + return hose->dn; +} +EXPORT_SYMBOL(pnv_pci_to_phb_node); + +int pnv_phb_to_cxl(struct pci_dev *dev) +{ + struct pci_controller *hose = pci_bus_to_host(dev->bus); + struct pnv_phb *phb = hose->private_data; + struct pnv_ioda_pe *pe; + int rc; + + pe = pnv_ioda_get_pe(dev); + if (!pe) + return -ENODEV; + + pe_info(pe, "Switching PHB to CXL\n"); + + rc = opal_pci_set_phb_cxl_mode(phb->opal_id, 1, pe->pe_number); + if (rc) + dev_err(&dev->dev, "opal_pci_set_phb_cxl_mode failed: %i\n", rc); + + return rc; +} +EXPORT_SYMBOL(pnv_phb_to_cxl); + +/* Find PHB for cxl dev and allocate MSI hwirqs? + * Returns the absolute hardware IRQ number + */ +int pnv_cxl_alloc_hwirqs(struct pci_dev *dev, int num) +{ + struct pci_controller *hose = pci_bus_to_host(dev->bus); + struct pnv_phb *phb = hose->private_data; + int hwirq = msi_bitmap_alloc_hwirqs(&phb->msi_bmp, num); + + if (hwirq < 0) { + dev_warn(&dev->dev, "Failed to find a free MSI\n"); + return -ENOSPC; + } + + return phb->msi_base + hwirq; +} +EXPORT_SYMBOL(pnv_cxl_alloc_hwirqs); + +void pnv_cxl_release_hwirqs(struct pci_dev *dev, int hwirq, int num) +{ + struct pci_controller *hose = pci_bus_to_host(dev->bus); + struct pnv_phb *phb = hose->private_data; + + msi_bitmap_free_hwirqs(&phb->msi_bmp, hwirq - phb->msi_base, num); +} +EXPORT_SYMBOL(pnv_cxl_release_hwirqs); + +void pnv_cxl_release_hwirq_ranges(struct cxl_irq_ranges *irqs, + struct pci_dev *dev) +{ + struct pci_controller *hose = pci_bus_to_host(dev->bus); + struct pnv_phb *phb = hose->private_data; + int i, hwirq; + + for (i = 1; i < CXL_IRQ_RANGES; i++) { + if (!irqs->range[i]) + continue; + pr_devel("cxl release irq range 0x%x: offset: 0x%lx limit: %ld\n", + i, irqs->offset[i], + irqs->range[i]); + hwirq = irqs->offset[i] - phb->msi_base; + msi_bitmap_free_hwirqs(&phb->msi_bmp, hwirq, + irqs->range[i]); + } +} +EXPORT_SYMBOL(pnv_cxl_release_hwirq_ranges); + +int pnv_cxl_alloc_hwirq_ranges(struct cxl_irq_ranges *irqs, + struct pci_dev *dev, int num) +{ + struct pci_controller *hose = pci_bus_to_host(dev->bus); + struct pnv_phb *phb = hose->private_data; + int i, hwirq, try; + + memset(irqs, 0, sizeof(struct cxl_irq_ranges)); + + /* 0 is reserved for the multiplexed PSL DSI interrupt */ + for (i = 1; i < CXL_IRQ_RANGES && num; i++) { + try = num; + while (try) { + hwirq = msi_bitmap_alloc_hwirqs(&phb->msi_bmp, try); + if (hwirq >= 0) + break; + try /= 2; + } + if (!try) + goto fail; + + irqs->offset[i] = phb->msi_base + hwirq; + irqs->range[i] = try; + pr_devel("cxl alloc irq range 0x%x: offset: 0x%lx limit: %li\n", + i, irqs->offset[i], irqs->range[i]); + num -= try; + } + if (num) + goto fail; + + return 0; +fail: + pnv_cxl_release_hwirq_ranges(irqs, dev); + return -ENOSPC; +} +EXPORT_SYMBOL(pnv_cxl_alloc_hwirq_ranges); + +int pnv_cxl_get_irq_count(struct pci_dev *dev) +{ + struct pci_controller *hose = pci_bus_to_host(dev->bus); + struct pnv_phb *phb = hose->private_data; + + return phb->msi_bmp.irq_count; +} +EXPORT_SYMBOL(pnv_cxl_get_irq_count); + +int pnv_cxl_ioda_msi_setup(struct pci_dev *dev, unsigned int hwirq, + unsigned int virq) +{ + struct pci_controller *hose = pci_bus_to_host(dev->bus); + struct pnv_phb *phb = hose->private_data; + unsigned int xive_num = hwirq - phb->msi_base; + struct pnv_ioda_pe *pe; + int rc; + + if (!(pe = pnv_ioda_get_pe(dev))) + return -ENODEV; + + /* Assign XIVE to PE */ + rc = opal_pci_set_xive_pe(phb->opal_id, pe->pe_number, xive_num); + if (rc) { + pe_warn(pe, "%s: OPAL error %d setting msi_base 0x%x " + "hwirq 0x%x XIVE 0x%x PE\n", + pci_name(dev), rc, phb->msi_base, hwirq, xive_num); + return -EIO; + } + set_msi_irq_chip(phb, virq); + + return 0; +} +EXPORT_SYMBOL(pnv_cxl_ioda_msi_setup); +#endif + static int pnv_pci_ioda_msi_setup(struct pnv_phb *phb, struct pci_dev *dev, unsigned int hwirq, unsigned int virq, unsigned int is_64, struct msi_msg *msg) { struct pnv_ioda_pe *pe = pnv_ioda_get_pe(dev); struct pci_dn *pdn = pci_get_pdn(dev); - struct irq_data *idata; - struct irq_chip *ichip; unsigned int xive_num = hwirq - phb->msi_base; __be32 data; int rc; @@ -954,22 +1561,7 @@ static int pnv_pci_ioda_msi_setup(struct pnv_phb *phb, struct pci_dev *dev, } msg->data = be32_to_cpu(data); - /* - * Change the IRQ chip for the MSI interrupts on PHB3. - * The corresponding IRQ chip should be populated for - * the first time. - */ - if (phb->type == PNV_PHB_IODA2) { - if (!phb->ioda.irq_chip_init) { - idata = irq_get_irq_data(virq); - ichip = irq_data_get_irq_chip(idata); - phb->ioda.irq_chip_init = 1; - phb->ioda.irq_chip = *ichip; - phb->ioda.irq_chip.irq_eoi = pnv_ioda2_msi_eoi; - } - - irq_set_chip(virq, &phb->ioda.irq_chip); - } + set_msi_irq_chip(phb, virq); pr_devel("%s: %s-bit MSI on hwirq %x (xive #%d)," " address=%x_%08x data=%x PE# %d\n", @@ -1055,9 +1647,6 @@ static void pnv_ioda_setup_pe_seg(struct pci_controller *hose, index++; } } else if (res->flags & IORESOURCE_MEM) { - /* WARNING: Assumes M32 is mem region 0 in PHB. We need to - * harden that algorithm when we start supporting M64 - */ region.start = res->start - hose->mem_offset[0] - phb->ioda.m32_pci_base; @@ -1141,9 +1730,8 @@ static void pnv_pci_ioda_fixup(void) pnv_pci_ioda_create_dbgfs(); #ifdef CONFIG_EEH - eeh_probe_mode_set(EEH_PROBE_MODE_DEV); - eeh_addr_cache_build(); eeh_init(); + eeh_addr_cache_build(); #endif } @@ -1178,7 +1766,10 @@ static resource_size_t pnv_pci_window_alignment(struct pci_bus *bus, bridge = bridge->bus->self; } - /* We need support prefetchable memory window later */ + /* We fail back to M32 if M64 isn't supported */ + if (phb->ioda.m64_segsize && + pnv_pci_is_mem_pref_64(type)) + return phb->ioda.m64_segsize; if (type & IORESOURCE_MEM) return phb->ioda.m32_segsize; @@ -1217,12 +1808,12 @@ static u32 pnv_ioda_bdfn_to_pe(struct pnv_phb *phb, struct pci_bus *bus, static void pnv_pci_ioda_shutdown(struct pnv_phb *phb) { - opal_pci_reset(phb->opal_id, OPAL_PCI_IODA_TABLE_RESET, + opal_pci_reset(phb->opal_id, OPAL_RESET_PCI_IODA_TABLE, OPAL_ASSERT_RESET); } -void __init pnv_pci_init_ioda_phb(struct device_node *np, - u64 hub_id, int ioda_type) +static void __init pnv_pci_init_ioda_phb(struct device_node *np, + u64 hub_id, int ioda_type) { struct pci_controller *hose; struct pnv_phb *phb; @@ -1299,6 +1890,10 @@ void __init pnv_pci_init_ioda_phb(struct device_node *np, prop32 = of_get_property(np, "ibm,opal-reserved-pe", NULL); if (prop32) phb->ioda.reserved_pe = be32_to_cpup(prop32); + + /* Parse 64-bit MMIO range */ + pnv_ioda_parse_m64_window(phb); + phb->ioda.m32_size = resource_size(&hose->mem_resources[0]); /* FW Has already off top 64k of M32 space (MSI space) */ phb->ioda.m32_size += 0x10000; @@ -1334,14 +1929,6 @@ void __init pnv_pci_init_ioda_phb(struct device_node *np, /* Calculate how many 32-bit TCE segments we have */ phb->ioda.tce32_count = phb->ioda.m32_pci_base >> 28; - /* Clear unusable m64 */ - hose->mem_resources[1].flags = 0; - hose->mem_resources[1].start = 0; - hose->mem_resources[1].end = 0; - hose->mem_resources[2].flags = 0; - hose->mem_resources[2].start = 0; - hose->mem_resources[2].end = 0; - #if 0 /* We should really do that ... */ rc = opal_pci_set_phb_mem_window(opal->phb_id, window_type, @@ -1351,14 +1938,21 @@ void __init pnv_pci_init_ioda_phb(struct device_node *np, segment_size); #endif - pr_info(" %d (%d) PE's M32: 0x%x [segment=0x%x]" - " IO: 0x%x [segment=0x%x]\n", - phb->ioda.total_pe, - phb->ioda.reserved_pe, - phb->ioda.m32_size, phb->ioda.m32_segsize, - phb->ioda.io_size, phb->ioda.io_segsize); + pr_info(" %03d (%03d) PE's M32: 0x%x [segment=0x%x]\n", + phb->ioda.total_pe, phb->ioda.reserved_pe, + phb->ioda.m32_size, phb->ioda.m32_segsize); + if (phb->ioda.m64_size) + pr_info(" M64: 0x%lx [segment=0x%lx]\n", + phb->ioda.m64_size, phb->ioda.m64_segsize); + if (phb->ioda.io_size) + pr_info(" IO: 0x%x [segment=0x%x]\n", + phb->ioda.io_size, phb->ioda.io_segsize); + phb->hose->ops = &pnv_pci_ops; + phb->get_pe_state = pnv_ioda_get_pe_state; + phb->freeze_pe = pnv_ioda_freeze_pe; + phb->unfreeze_pe = pnv_ioda_unfreeze_pe; #ifdef CONFIG_EEH phb->eeh_ops = &ioda_eeh_ops; #endif @@ -1369,6 +1963,7 @@ void __init pnv_pci_init_ioda_phb(struct device_node *np, /* Setup TCEs */ phb->dma_dev_setup = pnv_pci_ioda_dma_dev_setup; phb->dma_set_mask = pnv_pci_ioda_dma_set_mask; + phb->dma_get_required_mask = pnv_pci_ioda_dma_get_required_mask; /* Setup shutdown function for kexec */ phb->shutdown = pnv_pci_ioda_shutdown; @@ -1390,7 +1985,7 @@ void __init pnv_pci_init_ioda_phb(struct device_node *np, pci_add_flags(PCI_REASSIGN_ALL_RSRC); /* Reset IODA tables to a clean state */ - rc = opal_pci_reset(phb_id, OPAL_PCI_IODA_TABLE_RESET, OPAL_ASSERT_RESET); + rc = opal_pci_reset(phb_id, OPAL_RESET_PCI_IODA_TABLE, OPAL_ASSERT_RESET); if (rc) pr_warning(" OPAL Error %ld performing IODA table reset !\n", rc); @@ -1404,6 +1999,10 @@ void __init pnv_pci_init_ioda_phb(struct device_node *np, ioda_eeh_phb_reset(hose, EEH_RESET_FUNDAMENTAL); ioda_eeh_phb_reset(hose, OPAL_DEASSERT_RESET); } + + /* Configure M64 window */ + if (phb->init_m64 && phb->init_m64(phb)) + hose->mem_resources[1].flags = 0; } void __init pnv_pci_init_ioda2_phb(struct device_node *np) diff --git a/arch/powerpc/platforms/powernv/pci-p5ioc2.c b/arch/powerpc/platforms/powernv/pci-p5ioc2.c index e3807d69393..94ce3481490 100644 --- a/arch/powerpc/platforms/powernv/pci-p5ioc2.c +++ b/arch/powerpc/platforms/powernv/pci-p5ioc2.c @@ -172,7 +172,8 @@ static void __init pnv_pci_init_p5ioc2_phb(struct device_node *np, u64 hub_id, /* Setup TCEs */ phb->dma_dev_setup = pnv_pci_p5ioc2_dma_dev_setup; pnv_pci_setup_iommu_table(&phb->p5ioc2.iommu_table, - tce_mem, tce_size, 0); + tce_mem, tce_size, 0, + IOMMU_PAGE_SHIFT_4K); } void __init pnv_pci_init_p5ioc2_hub(struct device_node *np) diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index f91a4e5d872..b3ca77ddf36 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -46,29 +46,21 @@ //#define cfg_dbg(fmt...) printk(fmt) #ifdef CONFIG_PCI_MSI -static int pnv_msi_check_device(struct pci_dev* pdev, int nvec, int type) -{ - struct pci_controller *hose = pci_bus_to_host(pdev->bus); - struct pnv_phb *phb = hose->private_data; - struct pci_dn *pdn = pci_get_pdn(pdev); - - if (pdn && pdn->force_32bit_msi && !phb->msi32_support) - return -ENODEV; - - return (phb && phb->msi_bmp.bitmap) ? 0 : -ENODEV; -} - static int pnv_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) { struct pci_controller *hose = pci_bus_to_host(pdev->bus); struct pnv_phb *phb = hose->private_data; + struct pci_dn *pdn = pci_get_pdn(pdev); struct msi_desc *entry; struct msi_msg msg; int hwirq; unsigned int virq; int rc; - if (WARN_ON(!phb)) + if (WARN_ON(!phb) || !phb->msi_bmp.bitmap) + return -ENODEV; + + if (pdn && pdn->force_32bit_msi && !phb->msi32_support) return -ENODEV; list_for_each_entry(entry, &pdev->msi_list, list) { @@ -132,61 +124,78 @@ static void pnv_pci_dump_p7ioc_diag_data(struct pci_controller *hose, data = (struct OpalIoP7IOCPhbErrorData *)common; pr_info("P7IOC PHB#%d Diag-data (Version: %d)\n", - hose->global_number, common->version); + hose->global_number, be32_to_cpu(common->version)); if (data->brdgCtl) pr_info("brdgCtl: %08x\n", - data->brdgCtl); + be32_to_cpu(data->brdgCtl)); if (data->portStatusReg || data->rootCmplxStatus || data->busAgentStatus) pr_info("UtlSts: %08x %08x %08x\n", - data->portStatusReg, data->rootCmplxStatus, - data->busAgentStatus); + be32_to_cpu(data->portStatusReg), + be32_to_cpu(data->rootCmplxStatus), + be32_to_cpu(data->busAgentStatus)); if (data->deviceStatus || data->slotStatus || data->linkStatus || data->devCmdStatus || data->devSecStatus) pr_info("RootSts: %08x %08x %08x %08x %08x\n", - data->deviceStatus, data->slotStatus, - data->linkStatus, data->devCmdStatus, - data->devSecStatus); + be32_to_cpu(data->deviceStatus), + be32_to_cpu(data->slotStatus), + be32_to_cpu(data->linkStatus), + be32_to_cpu(data->devCmdStatus), + be32_to_cpu(data->devSecStatus)); if (data->rootErrorStatus || data->uncorrErrorStatus || data->corrErrorStatus) pr_info("RootErrSts: %08x %08x %08x\n", - data->rootErrorStatus, data->uncorrErrorStatus, - data->corrErrorStatus); + be32_to_cpu(data->rootErrorStatus), + be32_to_cpu(data->uncorrErrorStatus), + be32_to_cpu(data->corrErrorStatus)); if (data->tlpHdr1 || data->tlpHdr2 || data->tlpHdr3 || data->tlpHdr4) pr_info("RootErrLog: %08x %08x %08x %08x\n", - data->tlpHdr1, data->tlpHdr2, - data->tlpHdr3, data->tlpHdr4); + be32_to_cpu(data->tlpHdr1), + be32_to_cpu(data->tlpHdr2), + be32_to_cpu(data->tlpHdr3), + be32_to_cpu(data->tlpHdr4)); if (data->sourceId || data->errorClass || data->correlator) pr_info("RootErrLog1: %08x %016llx %016llx\n", - data->sourceId, data->errorClass, - data->correlator); + be32_to_cpu(data->sourceId), + be64_to_cpu(data->errorClass), + be64_to_cpu(data->correlator)); if (data->p7iocPlssr || data->p7iocCsr) pr_info("PhbSts: %016llx %016llx\n", - data->p7iocPlssr, data->p7iocCsr); + be64_to_cpu(data->p7iocPlssr), + be64_to_cpu(data->p7iocCsr)); if (data->lemFir) pr_info("Lem: %016llx %016llx %016llx\n", - data->lemFir, data->lemErrorMask, - data->lemWOF); + be64_to_cpu(data->lemFir), + be64_to_cpu(data->lemErrorMask), + be64_to_cpu(data->lemWOF)); if (data->phbErrorStatus) pr_info("PhbErr: %016llx %016llx %016llx %016llx\n", - data->phbErrorStatus, data->phbFirstErrorStatus, - data->phbErrorLog0, data->phbErrorLog1); + be64_to_cpu(data->phbErrorStatus), + be64_to_cpu(data->phbFirstErrorStatus), + be64_to_cpu(data->phbErrorLog0), + be64_to_cpu(data->phbErrorLog1)); if (data->mmioErrorStatus) pr_info("OutErr: %016llx %016llx %016llx %016llx\n", - data->mmioErrorStatus, data->mmioFirstErrorStatus, - data->mmioErrorLog0, data->mmioErrorLog1); + be64_to_cpu(data->mmioErrorStatus), + be64_to_cpu(data->mmioFirstErrorStatus), + be64_to_cpu(data->mmioErrorLog0), + be64_to_cpu(data->mmioErrorLog1)); if (data->dma0ErrorStatus) pr_info("InAErr: %016llx %016llx %016llx %016llx\n", - data->dma0ErrorStatus, data->dma0FirstErrorStatus, - data->dma0ErrorLog0, data->dma0ErrorLog1); + be64_to_cpu(data->dma0ErrorStatus), + be64_to_cpu(data->dma0FirstErrorStatus), + be64_to_cpu(data->dma0ErrorLog0), + be64_to_cpu(data->dma0ErrorLog1)); if (data->dma1ErrorStatus) pr_info("InBErr: %016llx %016llx %016llx %016llx\n", - data->dma1ErrorStatus, data->dma1FirstErrorStatus, - data->dma1ErrorLog0, data->dma1ErrorLog1); + be64_to_cpu(data->dma1ErrorStatus), + be64_to_cpu(data->dma1FirstErrorStatus), + be64_to_cpu(data->dma1ErrorLog0), + be64_to_cpu(data->dma1ErrorLog1)); for (i = 0; i < OPAL_P7IOC_NUM_PEST_REGS; i++) { if ((data->pestA[i] >> 63) == 0 && @@ -194,7 +203,8 @@ static void pnv_pci_dump_p7ioc_diag_data(struct pci_controller *hose, continue; pr_info("PE[%3d] A/B: %016llx %016llx\n", - i, data->pestA[i], data->pestB[i]); + i, be64_to_cpu(data->pestA[i]), + be64_to_cpu(data->pestB[i])); } } @@ -319,43 +329,52 @@ void pnv_pci_dump_phb_diag_data(struct pci_controller *hose, static void pnv_pci_handle_eeh_config(struct pnv_phb *phb, u32 pe_no) { unsigned long flags, rc; - int has_diag; + int has_diag, ret = 0; spin_lock_irqsave(&phb->lock, flags); + /* Fetch PHB diag-data */ rc = opal_pci_get_phb_diag_data2(phb->opal_id, phb->diag.blob, PNV_PCI_DIAG_BUF_SIZE); has_diag = (rc == OPAL_SUCCESS); - rc = opal_pci_eeh_freeze_clear(phb->opal_id, pe_no, + /* If PHB supports compound PE, to handle it */ + if (phb->unfreeze_pe) { + ret = phb->unfreeze_pe(phb, + pe_no, OPAL_EEH_ACTION_CLEAR_FREEZE_ALL); - if (rc) { - pr_warning("PCI %d: Failed to clear EEH freeze state" - " for PE#%d, err %ld\n", - phb->hose->global_number, pe_no, rc); - - /* For now, let's only display the diag buffer when we fail to clear - * the EEH status. We'll do more sensible things later when we have - * proper EEH support. We need to make sure we don't pollute ourselves - * with the normal errors generated when probing empty slots - */ - if (has_diag) - pnv_pci_dump_phb_diag_data(phb->hose, phb->diag.blob); - else - pr_warning("PCI %d: No diag data available\n", - phb->hose->global_number); + } else { + rc = opal_pci_eeh_freeze_clear(phb->opal_id, + pe_no, + OPAL_EEH_ACTION_CLEAR_FREEZE_ALL); + if (rc) { + pr_warn("%s: Failure %ld clearing frozen " + "PHB#%x-PE#%x\n", + __func__, rc, phb->hose->global_number, + pe_no); + ret = -EIO; + } } + /* + * For now, let's only display the diag buffer when we fail to clear + * the EEH status. We'll do more sensible things later when we have + * proper EEH support. We need to make sure we don't pollute ourselves + * with the normal errors generated when probing empty slots + */ + if (has_diag && ret) + pnv_pci_dump_phb_diag_data(phb->hose, phb->diag.blob); + spin_unlock_irqrestore(&phb->lock, flags); } static void pnv_pci_config_check_eeh(struct pnv_phb *phb, struct device_node *dn) { - s64 rc; u8 fstate; __be16 pcierr; - u32 pe_no; + int pe_no; + s64 rc; /* * Get the PE#. During the PCI probe stage, we might not @@ -370,20 +389,42 @@ static void pnv_pci_config_check_eeh(struct pnv_phb *phb, pe_no = phb->ioda.reserved_pe; } - /* Read freeze status */ - rc = opal_pci_eeh_freeze_status(phb->opal_id, pe_no, &fstate, &pcierr, - NULL); - if (rc) { - pr_warning("%s: Can't read EEH status (PE#%d) for " - "%s, err %lld\n", - __func__, pe_no, dn->full_name, rc); - return; + /* + * Fetch frozen state. If the PHB support compound PE, + * we need handle that case. + */ + if (phb->get_pe_state) { + fstate = phb->get_pe_state(phb, pe_no); + } else { + rc = opal_pci_eeh_freeze_status(phb->opal_id, + pe_no, + &fstate, + &pcierr, + NULL); + if (rc) { + pr_warn("%s: Failure %lld getting PHB#%x-PE#%x state\n", + __func__, rc, phb->hose->global_number, pe_no); + return; + } } + cfg_dbg(" -> EEH check, bdfn=%04x PE#%d fstate=%x\n", (PCI_DN(dn)->busno << 8) | (PCI_DN(dn)->devfn), pe_no, fstate); - if (fstate != 0) + + /* Clear the frozen state if applicable */ + if (fstate == OPAL_EEH_STOPPED_MMIO_FREEZE || + fstate == OPAL_EEH_STOPPED_DMA_FREEZE || + fstate == OPAL_EEH_STOPPED_MMIO_DMA_FREEZE) { + /* + * If PHB supports compound PE, freeze it for + * consistency. + */ + if (phb->freeze_pe) + phb->freeze_pe(phb, pe_no); + pnv_pci_handle_eeh_config(phb, pe_no); + } } int pnv_pci_cfg_read(struct device_node *dn, @@ -564,10 +605,11 @@ static int pnv_tce_build(struct iommu_table *tbl, long index, long npages, proto_tce |= TCE_PCI_WRITE; tces = tcep = ((__be64 *)tbl->it_base) + index - tbl->it_offset; - rpn = __pa(uaddr) >> TCE_SHIFT; + rpn = __pa(uaddr) >> tbl->it_page_shift; while (npages--) - *(tcep++) = cpu_to_be64(proto_tce | (rpn++ << TCE_RPN_SHIFT)); + *(tcep++) = cpu_to_be64(proto_tce | + (rpn++ << tbl->it_page_shift)); /* Some implementations won't cache invalid TCEs and thus may not * need that flush. We'll probably turn it_type into a bit mask @@ -627,11 +669,11 @@ static void pnv_tce_free_rm(struct iommu_table *tbl, long index, long npages) void pnv_pci_setup_iommu_table(struct iommu_table *tbl, void *tce_mem, u64 tce_size, - u64 dma_offset) + u64 dma_offset, unsigned page_shift) { tbl->it_blocksize = 16; tbl->it_base = (unsigned long)tce_mem; - tbl->it_page_shift = IOMMU_PAGE_SHIFT_4K; + tbl->it_page_shift = page_shift; tbl->it_offset = dma_offset >> tbl->it_page_shift; tbl->it_index = 0; tbl->it_size = tce_size >> 3; @@ -656,7 +698,7 @@ static struct iommu_table *pnv_pci_setup_bml_iommu(struct pci_controller *hose) if (WARN_ON(!tbl)) return NULL; pnv_pci_setup_iommu_table(tbl, __va(be64_to_cpup(basep)), - be32_to_cpup(sizep), 0); + be32_to_cpup(sizep), 0, IOMMU_PAGE_SHIFT_4K); iommu_init_table(tbl, hose->node); iommu_register_group(tbl, pci_domain_nr(hose->bus), 0); @@ -711,6 +753,17 @@ int pnv_pci_dma_set_mask(struct pci_dev *pdev, u64 dma_mask) return __dma_set_mask(&pdev->dev, dma_mask); } +u64 pnv_pci_dma_get_required_mask(struct pci_dev *pdev) +{ + struct pci_controller *hose = pci_bus_to_host(pdev->bus); + struct pnv_phb *phb = hose->private_data; + + if (phb && phb->dma_get_required_mask) + return phb->dma_get_required_mask(phb, pdev); + + return __dma_get_required_mask(&pdev->dev); +} + void pnv_pci_shutdown(void) { struct pci_controller *hose; @@ -810,7 +863,6 @@ void __init pnv_pci_init(void) /* Configure MSIs */ #ifdef CONFIG_PCI_MSI - ppc_md.msi_check_device = pnv_msi_check_device; ppc_md.setup_msi_irqs = pnv_setup_msi_irqs; ppc_md.teardown_msi_irqs = pnv_teardown_msi_irqs; #endif @@ -842,5 +894,4 @@ static int __init tce_iommu_bus_notifier_init(void) bus_register_notifier(&pci_bus_type, &tce_iommu_bus_nb); return 0; } - -subsys_initcall_sync(tce_iommu_bus_notifier_init); +machine_subsys_initcall_sync(powernv, tce_iommu_bus_notifier_init); diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index 676232c3432..34d29eb2a4d 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -21,6 +21,8 @@ enum pnv_phb_model { #define PNV_IODA_PE_DEV (1 << 0) /* PE has single PCI device */ #define PNV_IODA_PE_BUS (1 << 1) /* PE has primary PCI bus */ #define PNV_IODA_PE_BUS_ALL (1 << 2) /* PE has subordinate buses */ +#define PNV_IODA_PE_MASTER (1 << 3) /* Master PE in compound case */ +#define PNV_IODA_PE_SLAVE (1 << 4) /* Slave PE in compound case */ /* Data associated with a PE, including IOMMU tracking etc.. */ struct pnv_phb; @@ -64,6 +66,10 @@ struct pnv_ioda_pe { */ int mve_number; + /* PEs in compound case */ + struct pnv_ioda_pe *master; + struct list_head slaves; + /* Link in list of PE#s */ struct list_head dma_link; struct list_head list; @@ -79,6 +85,8 @@ struct pnv_eeh_ops { int (*get_log)(struct eeh_pe *pe, int severity, char *drv_log, unsigned long len); int (*configure_bridge)(struct eeh_pe *pe); + int (*err_inject)(struct eeh_pe *pe, int type, int func, + unsigned long addr, unsigned long mask); int (*next_error)(struct eeh_pe **pe); }; #endif /* CONFIG_EEH */ @@ -116,9 +124,17 @@ struct pnv_phb { void (*dma_dev_setup)(struct pnv_phb *phb, struct pci_dev *pdev); int (*dma_set_mask)(struct pnv_phb *phb, struct pci_dev *pdev, u64 dma_mask); + u64 (*dma_get_required_mask)(struct pnv_phb *phb, + struct pci_dev *pdev); void (*fixup_phb)(struct pci_controller *hose); u32 (*bdfn_to_pe)(struct pnv_phb *phb, struct pci_bus *bus, u32 devfn); void (*shutdown)(struct pnv_phb *phb); + int (*init_m64)(struct pnv_phb *phb); + void (*alloc_m64_pe)(struct pnv_phb *phb); + int (*pick_m64_pe)(struct pnv_phb *phb, struct pci_bus *bus, int all); + int (*get_pe_state)(struct pnv_phb *phb, int pe_no); + void (*freeze_pe)(struct pnv_phb *phb, int pe_no); + int (*unfreeze_pe)(struct pnv_phb *phb, int pe_no, int opt); union { struct { @@ -129,9 +145,20 @@ struct pnv_phb { /* Global bridge info */ unsigned int total_pe; unsigned int reserved_pe; + + /* 32-bit MMIO window */ unsigned int m32_size; unsigned int m32_segsize; unsigned int m32_pci_base; + + /* 64-bit MMIO window */ + unsigned int m64_bar_idx; + unsigned long m64_size; + unsigned long m64_segsize; + unsigned long m64_base; + unsigned long m64_bar_alloc; + + /* IO ports */ unsigned int io_size; unsigned int io_segsize; unsigned int io_pci_base; @@ -198,7 +225,7 @@ int pnv_pci_cfg_write(struct device_node *dn, int where, int size, u32 val); extern void pnv_pci_setup_iommu_table(struct iommu_table *tbl, void *tce_mem, u64 tce_size, - u64 dma_offset); + u64 dma_offset, unsigned page_shift); extern void pnv_pci_init_p5ioc2_hub(struct device_node *np); extern void pnv_pci_init_ioda_hub(struct device_node *np); extern void pnv_pci_init_ioda2_phb(struct device_node *np); diff --git a/arch/powerpc/platforms/powernv/powernv.h b/arch/powerpc/platforms/powernv/powernv.h index 75501bfede7..6c8e2d188cd 100644 --- a/arch/powerpc/platforms/powernv/powernv.h +++ b/arch/powerpc/platforms/powernv/powernv.h @@ -13,6 +13,7 @@ struct pci_dev; extern void pnv_pci_init(void); extern void pnv_pci_shutdown(void); extern int pnv_pci_dma_set_mask(struct pci_dev *pdev, u64 dma_mask); +extern u64 pnv_pci_dma_get_required_mask(struct pci_dev *pdev); #else static inline void pnv_pci_init(void) { } static inline void pnv_pci_shutdown(void) { } @@ -21,6 +22,11 @@ static inline int pnv_pci_dma_set_mask(struct pci_dev *pdev, u64 dma_mask) { return -ENODEV; } + +static inline u64 pnv_pci_dma_get_required_mask(struct pci_dev *pdev) +{ + return 0; +} #endif extern void pnv_lpc_init(void); diff --git a/arch/powerpc/platforms/powernv/rng.c b/arch/powerpc/platforms/powernv/rng.c index 1cb160dc160..80db43944af 100644 --- a/arch/powerpc/platforms/powernv/rng.c +++ b/arch/powerpc/platforms/powernv/rng.c @@ -123,4 +123,4 @@ static __init int rng_init(void) return 0; } -subsys_initcall(rng_init); +machine_subsys_initcall(powernv, rng_init); diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index d9b88fa7c5a..3f9546d8a51 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -173,6 +173,14 @@ static int pnv_dma_set_mask(struct device *dev, u64 dma_mask) return __dma_set_mask(dev, dma_mask); } +static u64 pnv_dma_get_required_mask(struct device *dev) +{ + if (dev_is_pci(dev)) + return pnv_pci_dma_get_required_mask(to_pci_dev(dev)); + + return __dma_get_required_mask(dev); +} + static void pnv_shutdown(void) { /* Let the PCI code clear up IODA tables */ @@ -264,6 +272,8 @@ static void __init pnv_setup_machdep_opal(void) ppc_md.halt = pnv_halt; ppc_md.machine_check_exception = opal_machine_check; ppc_md.mce_check_early_recovery = opal_mce_check_early_recovery; + ppc_md.hmi_exception_early = opal_hmi_exception_early; + ppc_md.handle_hmi_exception = opal_handle_hmi_exception; } #ifdef CONFIG_PPC_POWERNV_RTAS @@ -305,7 +315,7 @@ static int __init pnv_probe(void) * Returns the cpu frequency for 'cpu' in Hz. This is used by * /proc/cpuinfo */ -unsigned long pnv_get_proc_freq(unsigned int cpu) +static unsigned long pnv_get_proc_freq(unsigned int cpu) { unsigned long ret_freq; @@ -333,6 +343,7 @@ define_machine(powernv) { .power_save = power7_idle, .calibrate_decr = generic_calibrate_decr, .dma_set_mask = pnv_dma_set_mask, + .dma_get_required_mask = pnv_dma_get_required_mask, #ifdef CONFIG_KEXEC .kexec_cpu_down = pnv_kexec_cpu_down, #endif diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c index 5fcfcf44e3a..4753958cd50 100644 --- a/arch/powerpc/platforms/powernv/smp.c +++ b/arch/powerpc/platforms/powernv/smp.c @@ -54,7 +54,7 @@ static void pnv_smp_setup_cpu(int cpu) #endif } -int pnv_smp_kick_cpu(int nr) +static int pnv_smp_kick_cpu(int nr) { unsigned int pcpu = get_hard_smp_processor_id(nr); unsigned long start_here = @@ -168,9 +168,9 @@ static void pnv_smp_cpu_kill_self(void) power7_nap(1); ppc64_runlatch_on(); - /* Reenable IRQs briefly to clear the IPI that woke us */ - local_irq_enable(); - local_irq_disable(); + /* Clear the IPI that woke us up */ + icp_native_flush_interrupt(); + local_paca->irq_happened &= PACA_IRQ_HARD_DIS; mb(); if (cpu_core_split_required()) diff --git a/arch/powerpc/platforms/powernv/subcore.c b/arch/powerpc/platforms/powernv/subcore.c index 894ecb3eb59..c87f96b79d1 100644 --- a/arch/powerpc/platforms/powernv/subcore.c +++ b/arch/powerpc/platforms/powernv/subcore.c @@ -24,6 +24,7 @@ #include <asm/smp.h> #include "subcore.h" +#include "powernv.h" /* diff --git a/arch/powerpc/platforms/pseries/cmm.c b/arch/powerpc/platforms/pseries/cmm.c index 2d8bf15879f..fc44ad0475f 100644 --- a/arch/powerpc/platforms/pseries/cmm.c +++ b/arch/powerpc/platforms/pseries/cmm.c @@ -555,7 +555,6 @@ static int cmm_mem_going_offline(void *arg) pa_last = pa_last->next; free_page((unsigned long)cmm_page_list); cmm_page_list = pa_last; - continue; } } pa_curr = pa_curr->next; diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c index 2d0b4d68a40..fdf01b660d5 100644 --- a/arch/powerpc/platforms/pseries/dlpar.c +++ b/arch/powerpc/platforms/pseries/dlpar.c @@ -17,6 +17,7 @@ #include <linux/slab.h> #include <linux/of.h> #include "offline_states.h" +#include "pseries.h" #include <asm/prom.h> #include <asm/machdep.h> @@ -363,7 +364,8 @@ static int dlpar_online_cpu(struct device_node *dn) int rc = 0; unsigned int cpu; int len, nthreads, i; - const u32 *intserv; + const __be32 *intserv; + u32 thread; intserv = of_get_property(dn, "ibm,ppc-interrupt-server#s", &len); if (!intserv) @@ -373,8 +375,9 @@ static int dlpar_online_cpu(struct device_node *dn) cpu_maps_update_begin(); for (i = 0; i < nthreads; i++) { + thread = be32_to_cpu(intserv[i]); for_each_present_cpu(cpu) { - if (get_hard_smp_processor_id(cpu) != intserv[i]) + if (get_hard_smp_processor_id(cpu) != thread) continue; BUG_ON(get_cpu_current_state(cpu) != CPU_STATE_OFFLINE); @@ -388,7 +391,7 @@ static int dlpar_online_cpu(struct device_node *dn) } if (cpu == num_possible_cpus()) printk(KERN_WARNING "Could not find cpu to online " - "with physical id 0x%x\n", intserv[i]); + "with physical id 0x%x\n", thread); } cpu_maps_update_done(); @@ -400,10 +403,10 @@ out: static ssize_t dlpar_cpu_probe(const char *buf, size_t count) { struct device_node *dn, *parent; - unsigned long drc_index; + u32 drc_index; int rc; - rc = strict_strtoul(buf, 0, &drc_index); + rc = kstrtou32(buf, 0, &drc_index); if (rc) return -EINVAL; @@ -442,7 +445,8 @@ static int dlpar_offline_cpu(struct device_node *dn) int rc = 0; unsigned int cpu; int len, nthreads, i; - const u32 *intserv; + const __be32 *intserv; + u32 thread; intserv = of_get_property(dn, "ibm,ppc-interrupt-server#s", &len); if (!intserv) @@ -452,8 +456,9 @@ static int dlpar_offline_cpu(struct device_node *dn) cpu_maps_update_begin(); for (i = 0; i < nthreads; i++) { + thread = be32_to_cpu(intserv[i]); for_each_present_cpu(cpu) { - if (get_hard_smp_processor_id(cpu) != intserv[i]) + if (get_hard_smp_processor_id(cpu) != thread) continue; if (get_cpu_current_state(cpu) == CPU_STATE_OFFLINE) @@ -475,14 +480,14 @@ static int dlpar_offline_cpu(struct device_node *dn) * Upgrade it's state to CPU_STATE_OFFLINE. */ set_preferred_offline_state(cpu, CPU_STATE_OFFLINE); - BUG_ON(plpar_hcall_norets(H_PROD, intserv[i]) + BUG_ON(plpar_hcall_norets(H_PROD, thread) != H_SUCCESS); __cpu_die(cpu); break; } if (cpu == num_possible_cpus()) printk(KERN_WARNING "Could not find cpu to offline " - "with physical id 0x%x\n", intserv[i]); + "with physical id 0x%x\n", thread); } cpu_maps_update_done(); @@ -494,15 +499,15 @@ out: static ssize_t dlpar_cpu_release(const char *buf, size_t count) { struct device_node *dn; - const u32 *drc_index; + u32 drc_index; int rc; dn = of_find_node_by_path(buf); if (!dn) return -EINVAL; - drc_index = of_get_property(dn, "ibm,my-drc-index", NULL); - if (!drc_index) { + rc = of_property_read_u32(dn, "ibm,my-drc-index", &drc_index); + if (rc) { of_node_put(dn); return -EINVAL; } @@ -513,7 +518,7 @@ static ssize_t dlpar_cpu_release(const char *buf, size_t count) return -EINVAL; } - rc = dlpar_release_drc(*drc_index); + rc = dlpar_release_drc(drc_index); if (rc) { of_node_put(dn); return rc; @@ -521,7 +526,7 @@ static ssize_t dlpar_cpu_release(const char *buf, size_t count) rc = dlpar_detach_node(dn); if (rc) { - dlpar_acquire_drc(*drc_index); + dlpar_acquire_drc(drc_index); return rc; } diff --git a/arch/powerpc/platforms/pseries/dtl.c b/arch/powerpc/platforms/pseries/dtl.c index 7d61498e45c..1062f71f5a8 100644 --- a/arch/powerpc/platforms/pseries/dtl.c +++ b/arch/powerpc/platforms/pseries/dtl.c @@ -29,6 +29,7 @@ #include <asm/lppaca.h> #include <asm/debug.h> #include <asm/plpar_wrappers.h> +#include <asm/machdep.h> struct dtl { struct dtl_entry *buf; @@ -391,4 +392,4 @@ err_remove_dir: err: return rc; } -arch_initcall(dtl_init); +machine_arch_initcall(pseries, dtl_init); diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c index 0bec0c02c5e..a6c7e19f5eb 100644 --- a/arch/powerpc/platforms/pseries/eeh_pseries.c +++ b/arch/powerpc/platforms/pseries/eeh_pseries.c @@ -88,29 +88,14 @@ static int pseries_eeh_init(void) * and its variant since the old firmware probably support address * of domain/bus/slot/function for EEH RTAS operations. */ - if (ibm_set_eeh_option == RTAS_UNKNOWN_SERVICE) { - pr_warning("%s: RTAS service <ibm,set-eeh-option> invalid\n", - __func__); - return -EINVAL; - } else if (ibm_set_slot_reset == RTAS_UNKNOWN_SERVICE) { - pr_warning("%s: RTAS service <ibm,set-slot-reset> invalid\n", - __func__); - return -EINVAL; - } else if (ibm_read_slot_reset_state2 == RTAS_UNKNOWN_SERVICE && - ibm_read_slot_reset_state == RTAS_UNKNOWN_SERVICE) { - pr_warning("%s: RTAS service <ibm,read-slot-reset-state2> and " - "<ibm,read-slot-reset-state> invalid\n", - __func__); - return -EINVAL; - } else if (ibm_slot_error_detail == RTAS_UNKNOWN_SERVICE) { - pr_warning("%s: RTAS service <ibm,slot-error-detail> invalid\n", - __func__); - return -EINVAL; - } else if (ibm_configure_pe == RTAS_UNKNOWN_SERVICE && - ibm_configure_bridge == RTAS_UNKNOWN_SERVICE) { - pr_warning("%s: RTAS service <ibm,configure-pe> and " - "<ibm,configure-bridge> invalid\n", - __func__); + if (ibm_set_eeh_option == RTAS_UNKNOWN_SERVICE || + ibm_set_slot_reset == RTAS_UNKNOWN_SERVICE || + (ibm_read_slot_reset_state2 == RTAS_UNKNOWN_SERVICE && + ibm_read_slot_reset_state == RTAS_UNKNOWN_SERVICE) || + ibm_slot_error_detail == RTAS_UNKNOWN_SERVICE || + (ibm_configure_pe == RTAS_UNKNOWN_SERVICE && + ibm_configure_bridge == RTAS_UNKNOWN_SERVICE)) { + pr_info("EEH functionality not supported\n"); return -EINVAL; } @@ -118,17 +103,17 @@ static int pseries_eeh_init(void) spin_lock_init(&slot_errbuf_lock); eeh_error_buf_size = rtas_token("rtas-error-log-max"); if (eeh_error_buf_size == RTAS_UNKNOWN_SERVICE) { - pr_warning("%s: unknown EEH error log size\n", + pr_info("%s: unknown EEH error log size\n", __func__); eeh_error_buf_size = 1024; } else if (eeh_error_buf_size > RTAS_ERROR_LOG_MAX) { - pr_warning("%s: EEH error log size %d exceeds the maximal %d\n", + pr_info("%s: EEH error log size %d exceeds the maximal %d\n", __func__, eeh_error_buf_size, RTAS_ERROR_LOG_MAX); eeh_error_buf_size = RTAS_ERROR_LOG_MAX; } /* Set EEH probe mode */ - eeh_probe_mode_set(EEH_PROBE_MODE_DEVTREE); + eeh_add_flag(EEH_PROBE_MODE_DEVTREE | EEH_ENABLE_IO_FOR_LOG); return 0; } @@ -270,7 +255,7 @@ static void *pseries_eeh_of_probe(struct device_node *dn, void *flag) /* Retrieve the device address */ regs = of_get_property(dn, "reg", NULL); if (!regs) { - pr_warning("%s: OF node property %s::reg not found\n", + pr_warn("%s: OF node property %s::reg not found\n", __func__, dn->full_name); return NULL; } @@ -297,7 +282,7 @@ static void *pseries_eeh_of_probe(struct device_node *dn, void *flag) enable = 1; if (enable) { - eeh_set_enable(true); + eeh_add_flag(EEH_ENABLED); eeh_add_to_parent_pe(edev); pr_debug("%s: EEH enabled on %s PHB#%d-PE#%x, config addr#%x\n", @@ -349,7 +334,9 @@ static int pseries_eeh_set_option(struct eeh_pe *pe, int option) if (pe->addr) config_addr = pe->addr; break; - + case EEH_OPT_FREEZE_PE: + /* Not support */ + return 0; default: pr_err("%s: Invalid option %d\n", __func__, option); @@ -398,7 +385,7 @@ static int pseries_eeh_get_pe_addr(struct eeh_pe *pe) pe->config_addr, BUID_HI(pe->phb->buid), BUID_LO(pe->phb->buid), 0); if (ret) { - pr_warning("%s: Failed to get address for PHB#%d-PE#%x\n", + pr_warn("%s: Failed to get address for PHB#%d-PE#%x\n", __func__, pe->phb->global_number, pe->config_addr); return 0; } @@ -411,7 +398,7 @@ static int pseries_eeh_get_pe_addr(struct eeh_pe *pe) pe->config_addr, BUID_HI(pe->phb->buid), BUID_LO(pe->phb->buid), 0); if (ret) { - pr_warning("%s: Failed to get address for PHB#%d-PE#%x\n", + pr_warn("%s: Failed to get address for PHB#%d-PE#%x\n", __func__, pe->phb->global_number, pe->config_addr); return 0; } @@ -584,17 +571,17 @@ static int pseries_eeh_wait_state(struct eeh_pe *pe, int max_wait) return ret; if (max_wait <= 0) { - pr_warning("%s: Timeout when getting PE's state (%d)\n", + pr_warn("%s: Timeout when getting PE's state (%d)\n", __func__, max_wait); return EEH_STATE_NOT_SUPPORT; } if (mwait <= 0) { - pr_warning("%s: Firmware returned bad wait value %d\n", + pr_warn("%s: Firmware returned bad wait value %d\n", __func__, mwait); mwait = EEH_STATE_MIN_WAIT_TIME; } else if (mwait > EEH_STATE_MAX_WAIT_TIME) { - pr_warning("%s: Firmware returned too long wait value %d\n", + pr_warn("%s: Firmware returned too long wait value %d\n", __func__, mwait); mwait = EEH_STATE_MAX_WAIT_TIME; } @@ -675,7 +662,7 @@ static int pseries_eeh_configure_bridge(struct eeh_pe *pe) } if (ret) - pr_warning("%s: Unable to configure bridge PHB#%d-PE#%x (%d)\n", + pr_warn("%s: Unable to configure bridge PHB#%d-PE#%x (%d)\n", __func__, pe->phb->global_number, pe->addr, ret); return ret; @@ -729,6 +716,7 @@ static struct eeh_ops pseries_eeh_ops = { .wait_state = pseries_eeh_wait_state, .get_log = pseries_eeh_get_log, .configure_bridge = pseries_eeh_configure_bridge, + .err_inject = NULL, .read_config = pseries_eeh_read_config, .write_config = pseries_eeh_write_config, .next_error = NULL, @@ -743,10 +731,7 @@ static struct eeh_ops pseries_eeh_ops = { */ static int __init eeh_pseries_init(void) { - int ret = -EINVAL; - - if (!machine_is(pseries)) - return ret; + int ret; ret = eeh_ops_register(&pseries_eeh_ops); if (!ret) @@ -757,5 +742,4 @@ static int __init eeh_pseries_init(void) return ret; } - -early_initcall(eeh_pseries_init); +machine_early_initcall(pseries, eeh_pseries_init); diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c index 20d62975856..b174fa751d2 100644 --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c @@ -90,7 +90,7 @@ static void rtas_stop_self(void) { static struct rtas_args args = { .nargs = 0, - .nret = 1, + .nret = cpu_to_be32(1), .rets = &args.args[0], }; @@ -312,7 +312,8 @@ static void pseries_remove_processor(struct device_node *np) { unsigned int cpu; int len, nthreads, i; - const u32 *intserv; + const __be32 *intserv; + u32 thread; intserv = of_get_property(np, "ibm,ppc-interrupt-server#s", &len); if (!intserv) @@ -322,8 +323,9 @@ static void pseries_remove_processor(struct device_node *np) cpu_maps_update_begin(); for (i = 0; i < nthreads; i++) { + thread = be32_to_cpu(intserv[i]); for_each_present_cpu(cpu) { - if (get_hard_smp_processor_id(cpu) != intserv[i]) + if (get_hard_smp_processor_id(cpu) != thread) continue; BUG_ON(cpu_online(cpu)); set_cpu_present(cpu, false); @@ -332,7 +334,7 @@ static void pseries_remove_processor(struct device_node *np) } if (cpu >= nr_cpu_ids) printk(KERN_WARNING "Could not find cpu to remove " - "with physical id 0x%x\n", intserv[i]); + "with physical id 0x%x\n", thread); } cpu_maps_update_done(); } diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index 7995135170a..3c4c0dcd90d 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -20,6 +20,7 @@ #include <asm/machdep.h> #include <asm/prom.h> #include <asm/sparsemem.h> +#include "pseries.h" unsigned long pseries_memory_block_size(void) { @@ -113,7 +114,7 @@ out: static int pseries_remove_mem_node(struct device_node *np) { const char *type; - const unsigned int *regs; + const __be32 *regs; unsigned long base; unsigned int lmb_size; int ret = -EINVAL; @@ -126,14 +127,14 @@ static int pseries_remove_mem_node(struct device_node *np) return 0; /* - * Find the bae address and size of the memblock + * Find the base address and size of the memblock */ regs = of_get_property(np, "reg", NULL); if (!regs) return ret; - base = *(unsigned long *)regs; - lmb_size = regs[3]; + base = be64_to_cpu(*(unsigned long *)regs); + lmb_size = be32_to_cpu(regs[3]); pseries_remove_memblock(base, lmb_size); return 0; @@ -146,14 +147,14 @@ static inline int pseries_remove_memblock(unsigned long base, } static inline int pseries_remove_mem_node(struct device_node *np) { - return -EOPNOTSUPP; + return 0; } #endif /* CONFIG_MEMORY_HOTREMOVE */ static int pseries_add_mem_node(struct device_node *np) { const char *type; - const unsigned int *regs; + const __be32 *regs; unsigned long base; unsigned int lmb_size; int ret = -EINVAL; @@ -172,8 +173,8 @@ static int pseries_add_mem_node(struct device_node *np) if (!regs) return ret; - base = *(unsigned long *)regs; - lmb_size = regs[3]; + base = be64_to_cpu(*(unsigned long *)regs); + lmb_size = be32_to_cpu(regs[3]); /* * Update memory region to represent the memory add @@ -187,44 +188,46 @@ static int pseries_update_drconf_memory(struct of_prop_reconfig *pr) struct of_drconf_cell *new_drmem, *old_drmem; unsigned long memblock_size; u32 entries; - u32 *p; + __be32 *p; int i, rc = -EINVAL; memblock_size = pseries_memory_block_size(); if (!memblock_size) return -EINVAL; - p = (u32 *)of_get_property(pr->dn, "ibm,dynamic-memory", NULL); + p = (__be32 *) pr->old_prop->value; if (!p) return -EINVAL; /* The first int of the property is the number of lmb's described * by the property. This is followed by an array of of_drconf_cell - * entries. Get the niumber of entries and skip to the array of + * entries. Get the number of entries and skip to the array of * of_drconf_cell's. */ - entries = *p++; + entries = be32_to_cpu(*p++); old_drmem = (struct of_drconf_cell *)p; - p = (u32 *)pr->prop->value; + p = (__be32 *)pr->prop->value; p++; new_drmem = (struct of_drconf_cell *)p; for (i = 0; i < entries; i++) { - if ((old_drmem[i].flags & DRCONF_MEM_ASSIGNED) && - (!(new_drmem[i].flags & DRCONF_MEM_ASSIGNED))) { - rc = pseries_remove_memblock(old_drmem[i].base_addr, + if ((be32_to_cpu(old_drmem[i].flags) & DRCONF_MEM_ASSIGNED) && + (!(be32_to_cpu(new_drmem[i].flags) & DRCONF_MEM_ASSIGNED))) { + rc = pseries_remove_memblock( + be64_to_cpu(old_drmem[i].base_addr), memblock_size); break; - } else if ((!(old_drmem[i].flags & DRCONF_MEM_ASSIGNED)) && - (new_drmem[i].flags & DRCONF_MEM_ASSIGNED)) { - rc = memblock_add(old_drmem[i].base_addr, + } else if ((!(be32_to_cpu(old_drmem[i].flags) & + DRCONF_MEM_ASSIGNED)) && + (be32_to_cpu(new_drmem[i].flags) & + DRCONF_MEM_ASSIGNED)) { + rc = memblock_add(be64_to_cpu(old_drmem[i].base_addr), memblock_size); rc = (rc < 0) ? -EINVAL : 0; break; } } - return rc; } diff --git a/arch/powerpc/platforms/pseries/hvCall.S b/arch/powerpc/platforms/pseries/hvCall.S index 99ecf0a5a92..3fda3f17b84 100644 --- a/arch/powerpc/platforms/pseries/hvCall.S +++ b/arch/powerpc/platforms/pseries/hvCall.S @@ -12,9 +12,13 @@ #include <asm/ppc_asm.h> #include <asm/asm-offsets.h> #include <asm/ptrace.h> +#include <asm/jump_label.h> + + .section ".text" #ifdef CONFIG_TRACEPOINTS +#ifndef CONFIG_JUMP_LABEL .section ".toc","aw" .globl hcall_tracepoint_refcount @@ -22,21 +26,13 @@ hcall_tracepoint_refcount: .llong 0 .section ".text" +#endif /* * precall must preserve all registers. use unused STK_PARAM() - * areas to save snapshots and opcode. We branch around this - * in early init (eg when populating the MMU hashtable) by using an - * unconditional cpu feature. + * areas to save snapshots and opcode. */ #define HCALL_INST_PRECALL(FIRST_REG) \ -BEGIN_FTR_SECTION; \ - b 1f; \ -END_FTR_SECTION(0, 1); \ - ld r12,hcall_tracepoint_refcount@toc(r2); \ - std r12,32(r1); \ - cmpdi r12,0; \ - beq+ 1f; \ mflr r0; \ std r3,STK_PARAM(R3)(r1); \ std r4,STK_PARAM(R4)(r1); \ @@ -50,45 +46,29 @@ END_FTR_SECTION(0, 1); \ addi r4,r1,STK_PARAM(FIRST_REG); \ stdu r1,-STACK_FRAME_OVERHEAD(r1); \ bl __trace_hcall_entry; \ - addi r1,r1,STACK_FRAME_OVERHEAD; \ - ld r0,16(r1); \ - ld r3,STK_PARAM(R3)(r1); \ - ld r4,STK_PARAM(R4)(r1); \ - ld r5,STK_PARAM(R5)(r1); \ - ld r6,STK_PARAM(R6)(r1); \ - ld r7,STK_PARAM(R7)(r1); \ - ld r8,STK_PARAM(R8)(r1); \ - ld r9,STK_PARAM(R9)(r1); \ - ld r10,STK_PARAM(R10)(r1); \ - mtlr r0; \ -1: + ld r3,STACK_FRAME_OVERHEAD+STK_PARAM(R3)(r1); \ + ld r4,STACK_FRAME_OVERHEAD+STK_PARAM(R4)(r1); \ + ld r5,STACK_FRAME_OVERHEAD+STK_PARAM(R5)(r1); \ + ld r6,STACK_FRAME_OVERHEAD+STK_PARAM(R6)(r1); \ + ld r7,STACK_FRAME_OVERHEAD+STK_PARAM(R7)(r1); \ + ld r8,STACK_FRAME_OVERHEAD+STK_PARAM(R8)(r1); \ + ld r9,STACK_FRAME_OVERHEAD+STK_PARAM(R9)(r1); \ + ld r10,STACK_FRAME_OVERHEAD+STK_PARAM(R10)(r1) /* * postcall is performed immediately before function return which - * allows liberal use of volatile registers. We branch around this - * in early init (eg when populating the MMU hashtable) by using an - * unconditional cpu feature. + * allows liberal use of volatile registers. */ #define __HCALL_INST_POSTCALL \ -BEGIN_FTR_SECTION; \ - b 1f; \ -END_FTR_SECTION(0, 1); \ - ld r12,32(r1); \ - cmpdi r12,0; \ - beq+ 1f; \ - mflr r0; \ - ld r6,STK_PARAM(R3)(r1); \ - std r3,STK_PARAM(R3)(r1); \ + ld r0,STACK_FRAME_OVERHEAD+STK_PARAM(R3)(r1); \ + std r3,STACK_FRAME_OVERHEAD+STK_PARAM(R3)(r1); \ mr r4,r3; \ - mr r3,r6; \ - std r0,16(r1); \ - stdu r1,-STACK_FRAME_OVERHEAD(r1); \ + mr r3,r0; \ bl __trace_hcall_exit; \ + ld r0,STACK_FRAME_OVERHEAD+16(r1); \ addi r1,r1,STACK_FRAME_OVERHEAD; \ - ld r0,16(r1); \ ld r3,STK_PARAM(R3)(r1); \ - mtlr r0; \ -1: + mtlr r0 #define HCALL_INST_POSTCALL_NORETS \ li r5,0; \ @@ -98,37 +78,62 @@ END_FTR_SECTION(0, 1); \ mr r5,BUFREG; \ __HCALL_INST_POSTCALL +#ifdef CONFIG_JUMP_LABEL +#define HCALL_BRANCH(LABEL) \ + ARCH_STATIC_BRANCH(LABEL, hcall_tracepoint_key) +#else + +/* + * We branch around this in early init (eg when populating the MMU + * hashtable) by using an unconditional cpu feature. + */ +#define HCALL_BRANCH(LABEL) \ +BEGIN_FTR_SECTION; \ + b 1f; \ +END_FTR_SECTION(0, 1); \ + ld r12,hcall_tracepoint_refcount@toc(r2); \ + std r12,32(r1); \ + cmpdi r12,0; \ + bne- LABEL; \ +1: +#endif + #else #define HCALL_INST_PRECALL(FIRST_ARG) #define HCALL_INST_POSTCALL_NORETS #define HCALL_INST_POSTCALL(BUFREG) +#define HCALL_BRANCH(LABEL) #endif - .text - _GLOBAL_TOC(plpar_hcall_norets) HMT_MEDIUM mfcr r0 stw r0,8(r1) - - HCALL_INST_PRECALL(R4) - + HCALL_BRANCH(plpar_hcall_norets_trace) HVSC /* invoke the hypervisor */ - HCALL_INST_POSTCALL_NORETS - lwz r0,8(r1) mtcrf 0xff,r0 blr /* return r3 = status */ +#ifdef CONFIG_TRACEPOINTS +plpar_hcall_norets_trace: + HCALL_INST_PRECALL(R4) + HVSC + HCALL_INST_POSTCALL_NORETS + lwz r0,8(r1) + mtcrf 0xff,r0 + blr +#endif + _GLOBAL_TOC(plpar_hcall) HMT_MEDIUM mfcr r0 stw r0,8(r1) - HCALL_INST_PRECALL(R5) + HCALL_BRANCH(plpar_hcall_trace) std r4,STK_PARAM(R4)(r1) /* Save ret buffer */ @@ -147,12 +152,40 @@ _GLOBAL_TOC(plpar_hcall) std r6, 16(r12) std r7, 24(r12) + lwz r0,8(r1) + mtcrf 0xff,r0 + + blr /* return r3 = status */ + +#ifdef CONFIG_TRACEPOINTS +plpar_hcall_trace: + HCALL_INST_PRECALL(R5) + + std r4,STK_PARAM(R4)(r1) + mr r0,r4 + + mr r4,r5 + mr r5,r6 + mr r6,r7 + mr r7,r8 + mr r8,r9 + mr r9,r10 + + HVSC + + ld r12,STK_PARAM(R4)(r1) + std r4,0(r12) + std r5,8(r12) + std r6,16(r12) + std r7,24(r12) + HCALL_INST_POSTCALL(r12) lwz r0,8(r1) mtcrf 0xff,r0 - blr /* return r3 = status */ + blr +#endif /* * plpar_hcall_raw can be called in real mode. kexec/kdump need some @@ -194,7 +227,7 @@ _GLOBAL_TOC(plpar_hcall9) mfcr r0 stw r0,8(r1) - HCALL_INST_PRECALL(R5) + HCALL_BRANCH(plpar_hcall9_trace) std r4,STK_PARAM(R4)(r1) /* Save ret buffer */ @@ -222,12 +255,49 @@ _GLOBAL_TOC(plpar_hcall9) std r11,56(r12) std r0, 64(r12) + lwz r0,8(r1) + mtcrf 0xff,r0 + + blr /* return r3 = status */ + +#ifdef CONFIG_TRACEPOINTS +plpar_hcall9_trace: + HCALL_INST_PRECALL(R5) + + std r4,STK_PARAM(R4)(r1) + mr r0,r4 + + mr r4,r5 + mr r5,r6 + mr r6,r7 + mr r7,r8 + mr r8,r9 + mr r9,r10 + ld r10,STACK_FRAME_OVERHEAD+STK_PARAM(R11)(r1) + ld r11,STACK_FRAME_OVERHEAD+STK_PARAM(R12)(r1) + ld r12,STACK_FRAME_OVERHEAD+STK_PARAM(R13)(r1) + + HVSC + + mr r0,r12 + ld r12,STACK_FRAME_OVERHEAD+STK_PARAM(R4)(r1) + std r4,0(r12) + std r5,8(r12) + std r6,16(r12) + std r7,24(r12) + std r8,32(r12) + std r9,40(r12) + std r10,48(r12) + std r11,56(r12) + std r0,64(r12) + HCALL_INST_POSTCALL(r12) lwz r0,8(r1) mtcrf 0xff,r0 - blr /* return r3 = status */ + blr +#endif /* See plpar_hcall_raw to see why this is needed */ _GLOBAL(plpar_hcall9_raw) diff --git a/arch/powerpc/platforms/pseries/hvCall_inst.c b/arch/powerpc/platforms/pseries/hvCall_inst.c index cf4e7736e4f..4575f0c9e52 100644 --- a/arch/powerpc/platforms/pseries/hvCall_inst.c +++ b/arch/powerpc/platforms/pseries/hvCall_inst.c @@ -27,6 +27,7 @@ #include <asm/firmware.h> #include <asm/cputable.h> #include <asm/trace.h> +#include <asm/machdep.h> DEFINE_PER_CPU(struct hcall_stats[HCALL_STAT_ARRAY_SIZE], hcall_stats); @@ -162,4 +163,4 @@ static int __init hcall_inst_init(void) return 0; } -__initcall(hcall_inst_init); +machine_device_initcall(pseries, hcall_inst_init); diff --git a/arch/powerpc/platforms/pseries/hvcserver.c b/arch/powerpc/platforms/pseries/hvcserver.c index 4557e91626c..eedb64594dc 100644 --- a/arch/powerpc/platforms/pseries/hvcserver.c +++ b/arch/powerpc/platforms/pseries/hvcserver.c @@ -163,8 +163,8 @@ int hvcs_get_partner_info(uint32_t unit_address, struct list_head *head, return retval; } - last_p_partition_ID = pi_buff[0]; - last_p_unit_address = pi_buff[1]; + last_p_partition_ID = be64_to_cpu(pi_buff[0]); + last_p_unit_address = be64_to_cpu(pi_buff[1]); /* This indicates that there are no further partners */ if (last_p_partition_ID == ~0UL diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index 33b552ffbe5..de1ec54a2a5 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -329,16 +329,16 @@ struct direct_window { /* Dynamic DMA Window support */ struct ddw_query_response { - __be32 windows_available; - __be32 largest_available_block; - __be32 page_size; - __be32 migration_capable; + u32 windows_available; + u32 largest_available_block; + u32 page_size; + u32 migration_capable; }; struct ddw_create_response { - __be32 liobn; - __be32 addr_hi; - __be32 addr_lo; + u32 liobn; + u32 addr_hi; + u32 addr_lo; }; static LIST_HEAD(direct_window_list); @@ -721,20 +721,22 @@ static int __init disable_ddw_setup(char *str) early_param("disable_ddw", disable_ddw_setup); -static void remove_ddw(struct device_node *np) +static void remove_ddw(struct device_node *np, bool remove_prop) { struct dynamic_dma_window_prop *dwp; struct property *win64; - const u32 *ddw_avail; + u32 ddw_avail[3]; u64 liobn; - int len, ret; + int ret = 0; + + ret = of_property_read_u32_array(np, "ibm,ddw-applicable", + &ddw_avail[0], 3); - ddw_avail = of_get_property(np, "ibm,ddw-applicable", &len); win64 = of_find_property(np, DIRECT64_PROPNAME, NULL); if (!win64) return; - if (!ddw_avail || len < 3 * sizeof(u32) || win64->length < sizeof(*dwp)) + if (ret || win64->length < sizeof(*dwp)) goto delprop; dwp = win64->value; @@ -761,7 +763,8 @@ static void remove_ddw(struct device_node *np) np->full_name, ret, ddw_avail[2], liobn); delprop: - ret = of_remove_property(np, win64); + if (remove_prop) + ret = of_remove_property(np, win64); if (ret) pr_warning("%s: failed to remove direct window property: %d\n", np->full_name, ret); @@ -805,7 +808,7 @@ static int find_existing_ddw_windows(void) window = kzalloc(sizeof(*window), GFP_KERNEL); if (!window || len < sizeof(struct dynamic_dma_window_prop)) { kfree(window); - remove_ddw(pdn); + remove_ddw(pdn, true); continue; } @@ -871,8 +874,9 @@ static int create_ddw(struct pci_dev *dev, const u32 *ddw_avail, do { /* extra outputs are LIOBN and dma-addr (hi, lo) */ - ret = rtas_call(ddw_avail[1], 5, 4, (u32 *)create, cfg_addr, - BUID_HI(buid), BUID_LO(buid), page_shift, window_shift); + ret = rtas_call(ddw_avail[1], 5, 4, (u32 *)create, + cfg_addr, BUID_HI(buid), BUID_LO(buid), + page_shift, window_shift); } while (rtas_busy_delay(ret)); dev_info(&dev->dev, "ibm,create-pe-dma-window(%x) %x %x %x %x %x returned %d " @@ -909,7 +913,7 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn) int page_shift; u64 dma_addr, max_addr; struct device_node *dn; - const u32 *uninitialized_var(ddw_avail); + u32 ddw_avail[3]; struct direct_window *window; struct property *win64; struct dynamic_dma_window_prop *ddwprop; @@ -941,8 +945,9 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn) * for the given node in that order. * the property is actually in the parent, not the PE */ - ddw_avail = of_get_property(pdn, "ibm,ddw-applicable", &len); - if (!ddw_avail || len < 3 * sizeof(u32)) + ret = of_property_read_u32_array(pdn, "ibm,ddw-applicable", + &ddw_avail[0], 3); + if (ret) goto out_failed; /* @@ -965,11 +970,11 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn) dev_dbg(&dev->dev, "no free dynamic windows"); goto out_failed; } - if (be32_to_cpu(query.page_size) & 4) { + if (query.page_size & 4) { page_shift = 24; /* 16MB */ - } else if (be32_to_cpu(query.page_size) & 2) { + } else if (query.page_size & 2) { page_shift = 16; /* 64kB */ - } else if (be32_to_cpu(query.page_size) & 1) { + } else if (query.page_size & 1) { page_shift = 12; /* 4kB */ } else { dev_dbg(&dev->dev, "no supported direct page size in mask %x", @@ -979,7 +984,7 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn) /* verify the window * number of ptes will map the partition */ /* check largest block * page size > max memory hotplug addr */ max_addr = memory_hotplug_max(); - if (be32_to_cpu(query.largest_available_block) < (max_addr >> page_shift)) { + if (query.largest_available_block < (max_addr >> page_shift)) { dev_dbg(&dev->dev, "can't map partiton max 0x%llx with %u " "%llu-sized pages\n", max_addr, query.largest_available_block, 1ULL << page_shift); @@ -1005,8 +1010,9 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn) if (ret != 0) goto out_free_prop; - ddwprop->liobn = create.liobn; - ddwprop->dma_base = cpu_to_be64(of_read_number(&create.addr_hi, 2)); + ddwprop->liobn = cpu_to_be32(create.liobn); + ddwprop->dma_base = cpu_to_be64(((u64)create.addr_hi << 32) | + create.addr_lo); ddwprop->tce_shift = cpu_to_be32(page_shift); ddwprop->window_shift = cpu_to_be32(len); @@ -1038,14 +1044,14 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn) list_add(&window->list, &direct_window_list); spin_unlock(&direct_window_list_lock); - dma_addr = of_read_number(&create.addr_hi, 2); + dma_addr = be64_to_cpu(ddwprop->dma_base); goto out_unlock; out_free_window: kfree(window); out_clear_window: - remove_ddw(pdn); + remove_ddw(pdn, true); out_free_prop: kfree(win64->name); @@ -1255,7 +1261,14 @@ static int iommu_reconfig_notifier(struct notifier_block *nb, unsigned long acti switch (action) { case OF_RECONFIG_DETACH_NODE: - remove_ddw(np); + /* + * Removing the property will invoke the reconfig + * notifier again, which causes dead-lock on the + * read-write semaphore of the notifier chain. So + * we have to remove the property when releasing + * the device node. + */ + remove_ddw(np, false); if (pci && pci->iommu_table) iommu_free_table(pci->iommu_table, np->full_name); diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index b02af9ef3ff..8c509d5397c 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -26,6 +26,7 @@ #include <linux/dma-mapping.h> #include <linux/console.h> #include <linux/export.h> +#include <linux/static_key.h> #include <asm/processor.h> #include <asm/mmu.h> #include <asm/page.h> @@ -58,8 +59,6 @@ EXPORT_SYMBOL(plpar_hcall); EXPORT_SYMBOL(plpar_hcall9); EXPORT_SYMBOL(plpar_hcall_norets); -extern void pSeries_find_serial_port(void); - void vpa_init(int cpu) { int hwcpu = get_hard_smp_processor_id(cpu); @@ -430,16 +429,17 @@ static void __pSeries_lpar_hugepage_invalidate(unsigned long *slot, spin_unlock_irqrestore(&pSeries_lpar_tlbie_lock, flags); } -static void pSeries_lpar_hugepage_invalidate(struct mm_struct *mm, - unsigned char *hpte_slot_array, - unsigned long addr, int psize) +static void pSeries_lpar_hugepage_invalidate(unsigned long vsid, + unsigned long addr, + unsigned char *hpte_slot_array, + int psize, int ssize) { - int ssize = 0, i, index = 0; + int i, index = 0; unsigned long s_addr = addr; unsigned int max_hpte_count, valid; unsigned long vpn_array[PPC64_HUGE_HPTE_BATCH]; unsigned long slot_array[PPC64_HUGE_HPTE_BATCH]; - unsigned long shift, hidx, vpn = 0, vsid, hash, slot; + unsigned long shift, hidx, vpn = 0, hash, slot; shift = mmu_psize_defs[psize].shift; max_hpte_count = 1U << (PMD_SHIFT - shift); @@ -452,15 +452,6 @@ static void pSeries_lpar_hugepage_invalidate(struct mm_struct *mm, /* get the vpn */ addr = s_addr + (i * (1ul << shift)); - if (!is_kernel_addr(addr)) { - ssize = user_segment_size(addr); - vsid = get_vsid(mm->context.id, addr, ssize); - WARN_ON(vsid == 0); - } else { - vsid = get_kernel_vsid(addr, mmu_kernel_ssize); - ssize = mmu_kernel_ssize; - } - vpn = hpt_vpn(addr, vsid, ssize); hash = hpt_hash(vpn, shift, ssize); if (hidx & _PTEIDX_SECONDARY) @@ -649,6 +640,19 @@ EXPORT_SYMBOL(arch_free_page); #endif #ifdef CONFIG_TRACEPOINTS +#ifdef HAVE_JUMP_LABEL +struct static_key hcall_tracepoint_key = STATIC_KEY_INIT; + +void hcall_tracepoint_regfunc(void) +{ + static_key_slow_inc(&hcall_tracepoint_key); +} + +void hcall_tracepoint_unregfunc(void) +{ + static_key_slow_dec(&hcall_tracepoint_key); +} +#else /* * We optimise our hcall path by placing hcall_tracepoint_refcount * directly in the TOC so we can check if the hcall tracepoints are @@ -658,13 +662,6 @@ EXPORT_SYMBOL(arch_free_page); /* NB: reg/unreg are called while guarded with the tracepoints_mutex */ extern long hcall_tracepoint_refcount; -/* - * Since the tracing code might execute hcalls we need to guard against - * recursion. One example of this are spinlocks calling H_YIELD on - * shared processor partitions. - */ -static DEFINE_PER_CPU(unsigned int, hcall_trace_depth); - void hcall_tracepoint_regfunc(void) { hcall_tracepoint_refcount++; @@ -674,6 +671,15 @@ void hcall_tracepoint_unregfunc(void) { hcall_tracepoint_refcount--; } +#endif + +/* + * Since the tracing code might execute hcalls we need to guard against + * recursion. One example of this are spinlocks calling H_YIELD on + * shared processor partitions. + */ +static DEFINE_PER_CPU(unsigned int, hcall_trace_depth); + void __trace_hcall_entry(unsigned long opcode, unsigned long *args) { diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c index bde7ebad394..e7cb6d4a871 100644 --- a/arch/powerpc/platforms/pseries/mobility.c +++ b/arch/powerpc/platforms/pseries/mobility.c @@ -18,6 +18,7 @@ #include <linux/delay.h> #include <linux/slab.h> +#include <asm/machdep.h> #include <asm/rtas.h> #include "pseries.h" @@ -319,7 +320,7 @@ static ssize_t migrate_store(struct class *class, struct class_attribute *attr, u64 streamid; int rc; - rc = strict_strtoull(buf, 0, &streamid); + rc = kstrtou64(buf, 0, &streamid); if (rc) return rc; @@ -362,4 +363,4 @@ static int __init mobility_sysfs_init(void) return rc; } -device_initcall(mobility_sysfs_init); +machine_device_initcall(pseries, mobility_sysfs_init); diff --git a/arch/powerpc/platforms/pseries/msi.c b/arch/powerpc/platforms/pseries/msi.c index 0c882e83c4c..8ab5add4ac8 100644 --- a/arch/powerpc/platforms/pseries/msi.c +++ b/arch/powerpc/platforms/pseries/msi.c @@ -16,6 +16,7 @@ #include <asm/rtas.h> #include <asm/hw_irq.h> #include <asm/ppc-pci.h> +#include <asm/machdep.h> static int query_token, change_token; @@ -335,26 +336,6 @@ out: return request; } -static int rtas_msi_check_device(struct pci_dev *pdev, int nvec, int type) -{ - int quota, rc; - - if (type == PCI_CAP_ID_MSIX) - rc = check_req_msix(pdev, nvec); - else - rc = check_req_msi(pdev, nvec); - - if (rc) - return rc; - - quota = msi_quota_for_device(pdev, nvec); - - if (quota && quota < nvec) - return quota; - - return 0; -} - static int check_msix_entries(struct pci_dev *pdev) { struct msi_desc *entry; @@ -396,15 +377,24 @@ static void rtas_hack_32bit_msi_gen2(struct pci_dev *pdev) static int rtas_setup_msi_irqs(struct pci_dev *pdev, int nvec_in, int type) { struct pci_dn *pdn; - int hwirq, virq, i, rc; + int hwirq, virq, i, quota, rc; struct msi_desc *entry; struct msi_msg msg; int nvec = nvec_in; int use_32bit_msi_hack = 0; - pdn = pci_get_pdn(pdev); - if (!pdn) - return -ENODEV; + if (type == PCI_CAP_ID_MSIX) + rc = check_req_msix(pdev, nvec); + else + rc = check_req_msi(pdev, nvec); + + if (rc) + return rc; + + quota = msi_quota_for_device(pdev, nvec); + + if (quota && quota < nvec) + return quota; if (type == PCI_CAP_ID_MSIX && check_msix_entries(pdev)) return -EINVAL; @@ -415,12 +405,14 @@ static int rtas_setup_msi_irqs(struct pci_dev *pdev, int nvec_in, int type) */ if (type == PCI_CAP_ID_MSIX) { int m = roundup_pow_of_two(nvec); - int quota = msi_quota_for_device(pdev, m); + quota = msi_quota_for_device(pdev, m); if (quota >= m) nvec = m; } + pdn = pci_get_pdn(pdev); + /* * Try the new more explicit firmware interface, if that fails fall * back to the old interface. The old interface is known to never @@ -484,7 +476,7 @@ again: irq_set_msi_desc(virq, entry); /* Read config space back so we can restore after reset */ - read_msi_msg(virq, &msg); + __read_msi_msg(entry, &msg); entry->msg = msg; } @@ -525,12 +517,10 @@ static int rtas_msi_init(void) WARN_ON(ppc_md.setup_msi_irqs); ppc_md.setup_msi_irqs = rtas_setup_msi_irqs; ppc_md.teardown_msi_irqs = rtas_teardown_msi_irqs; - ppc_md.msi_check_device = rtas_msi_check_device; WARN_ON(ppc_md.pci_irq_fixup); ppc_md.pci_irq_fixup = rtas_msi_pci_irq_fixup; return 0; } -arch_initcall(rtas_msi_init); - +machine_arch_initcall(pseries, rtas_msi_init); diff --git a/arch/powerpc/platforms/pseries/nvram.c b/arch/powerpc/platforms/pseries/nvram.c index 0cc240b7f69..11a3b617ef5 100644 --- a/arch/powerpc/platforms/pseries/nvram.c +++ b/arch/powerpc/platforms/pseries/nvram.c @@ -276,8 +276,10 @@ static ssize_t pSeries_nvram_get_size(void) * sequence #: The unique sequence # for each event. (until it wraps) * error log: The error log from event_scan */ -int nvram_write_os_partition(struct nvram_os_partition *part, char * buff, - int length, unsigned int err_type, unsigned int error_log_cnt) +static int nvram_write_os_partition(struct nvram_os_partition *part, + char *buff, int length, + unsigned int err_type, + unsigned int error_log_cnt) { int rc; loff_t tmp_index; @@ -330,9 +332,9 @@ int nvram_write_error_log(char * buff, int length, * * Reads nvram partition for at most 'length' */ -int nvram_read_partition(struct nvram_os_partition *part, char *buff, - int length, unsigned int *err_type, - unsigned int *error_log_cnt) +static int nvram_read_partition(struct nvram_os_partition *part, char *buff, + int length, unsigned int *err_type, + unsigned int *error_log_cnt) { int rc; loff_t tmp_index; diff --git a/arch/powerpc/platforms/pseries/pci.c b/arch/powerpc/platforms/pseries/pci.c index c413ec158ff..67e48594040 100644 --- a/arch/powerpc/platforms/pseries/pci.c +++ b/arch/powerpc/platforms/pseries/pci.c @@ -29,6 +29,7 @@ #include <asm/pci-bridge.h> #include <asm/prom.h> #include <asm/ppc-pci.h> +#include "pseries.h" #if 0 void pcibios_name_device(struct pci_dev *dev) diff --git a/arch/powerpc/platforms/pseries/pci_dlpar.c b/arch/powerpc/platforms/pseries/pci_dlpar.c index 203cbf0dc10..89e23811199 100644 --- a/arch/powerpc/platforms/pseries/pci_dlpar.c +++ b/arch/powerpc/platforms/pseries/pci_dlpar.c @@ -118,10 +118,10 @@ int remove_phb_dynamic(struct pci_controller *phb) } } - /* Unregister the bridge device from sysfs and remove the PCI bus */ - device_unregister(b->bridge); + /* Remove the PCI bus and unregister the bridge device from sysfs */ phb->bus = NULL; pci_remove_bus(b); + device_unregister(b->bridge); /* Now release the IO resource */ if (res->flags & IORESOURCE_IO) diff --git a/arch/powerpc/platforms/pseries/power.c b/arch/powerpc/platforms/pseries/power.c index 6d626623644..c26eadde434 100644 --- a/arch/powerpc/platforms/pseries/power.c +++ b/arch/powerpc/platforms/pseries/power.c @@ -25,6 +25,7 @@ #include <linux/string.h> #include <linux/errno.h> #include <linux/init.h> +#include <asm/machdep.h> unsigned long rtas_poweron_auto; /* default and normal state is 0 */ @@ -71,11 +72,11 @@ static int __init pm_init(void) return -ENOMEM; return sysfs_create_group(power_kobj, &attr_group); } -core_initcall(pm_init); +machine_core_initcall(pseries, pm_init); #else static int __init apo_pm_init(void) { return (sysfs_create_file(power_kobj, &auto_poweron_attr.attr)); } -__initcall(apo_pm_init); +machine_device_initcall(pseries, apo_pm_init); #endif diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c index 9c5778e6ed4..5a4d0fc03b0 100644 --- a/arch/powerpc/platforms/pseries/ras.c +++ b/arch/powerpc/platforms/pseries/ras.c @@ -71,7 +71,7 @@ static int __init init_ras_IRQ(void) return 0; } -subsys_initcall(init_ras_IRQ); +machine_subsys_initcall(pseries, init_ras_IRQ); #define EPOW_SHUTDOWN_NORMAL 1 #define EPOW_SHUTDOWN_ON_UPS 2 @@ -126,7 +126,7 @@ struct epow_errorlog { #define EPOW_MAIN_ENCLOSURE 5 #define EPOW_POWER_OFF 7 -void rtas_parse_epow_errlog(struct rtas_error_log *log) +static void rtas_parse_epow_errlog(struct rtas_error_log *log) { struct pseries_errorlog *pseries_log; struct epow_errorlog *epow_log; diff --git a/arch/powerpc/platforms/pseries/reconfig.c b/arch/powerpc/platforms/pseries/reconfig.c index 1c0a60d9886..0f319521e00 100644 --- a/arch/powerpc/platforms/pseries/reconfig.c +++ b/arch/powerpc/platforms/pseries/reconfig.c @@ -446,13 +446,10 @@ static int proc_ppc64_create_ofdt(void) { struct proc_dir_entry *ent; - if (!machine_is(pseries)) - return 0; - ent = proc_create("powerpc/ofdt", S_IWUSR, NULL, &ofdt_fops); if (ent) proc_set_size(ent, 0); return 0; } -__initcall(proc_ppc64_create_ofdt); +machine_device_initcall(pseries, proc_ppc64_create_ofdt); diff --git a/arch/powerpc/platforms/pseries/rng.c b/arch/powerpc/platforms/pseries/rng.c index 72a102758d4..e0960877090 100644 --- a/arch/powerpc/platforms/pseries/rng.c +++ b/arch/powerpc/platforms/pseries/rng.c @@ -42,4 +42,4 @@ static __init int rng_init(void) return 0; } -subsys_initcall(rng_init); +machine_subsys_initcall(pseries, rng_init); diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index f2f40e64658..125c589eeef 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -232,8 +232,7 @@ static void __init pseries_discover_pic(void) struct device_node *np; const char *typep; - for (np = NULL; (np = of_find_node_by_name(np, - "interrupt-controller"));) { + for_each_node_by_name(np, "interrupt-controller") { typep = of_get_property(np, "compatible", NULL); if (strstr(typep, "open-pic")) { pSeries_mpic_node = of_node_get(np); @@ -351,7 +350,7 @@ static int alloc_dispatch_log_kmem_cache(void) return alloc_dispatch_logs(); } -early_initcall(alloc_dispatch_log_kmem_cache); +machine_early_initcall(pseries, alloc_dispatch_log_kmem_cache); static void pseries_lpar_idle(void) { @@ -562,7 +561,7 @@ void pSeries_coalesce_init(void) * fw_cmo_feature_init - FW_FEATURE_CMO is not stored in ibm,hypertas-functions, * handle that here. (Stolen from parse_system_parameter_string) */ -void pSeries_cmo_feature_init(void) +static void pSeries_cmo_feature_init(void) { char *ptr, *key, *value, *end; int call_status; diff --git a/arch/powerpc/platforms/pseries/suspend.c b/arch/powerpc/platforms/pseries/suspend.c index b87b97849d4..e76aefae2aa 100644 --- a/arch/powerpc/platforms/pseries/suspend.c +++ b/arch/powerpc/platforms/pseries/suspend.c @@ -265,7 +265,7 @@ static int __init pseries_suspend_init(void) { int rc; - if (!machine_is(pseries) || !firmware_has_feature(FW_FEATURE_LPAR)) + if (!firmware_has_feature(FW_FEATURE_LPAR)) return 0; suspend_data.token = rtas_token("ibm,suspend-me"); @@ -280,5 +280,4 @@ static int __init pseries_suspend_init(void) suspend_set_ops(&pseries_suspend_ops); return 0; } - -__initcall(pseries_suspend_init); +machine_device_initcall(pseries, pseries_suspend_init); diff --git a/arch/powerpc/sysdev/axonram.c b/arch/powerpc/sysdev/axonram.c index 47b6b9f81d4..ad56edc3991 100644 --- a/arch/powerpc/sysdev/axonram.c +++ b/arch/powerpc/sysdev/axonram.c @@ -314,7 +314,7 @@ axon_ram_remove(struct platform_device *device) return 0; } -static struct of_device_id axon_ram_device_id[] = { +static const struct of_device_id axon_ram_device_id[] = { { .type = "dma-memory" }, diff --git a/arch/powerpc/sysdev/dcr.c b/arch/powerpc/sysdev/dcr.c index e9056e43857..2d8a101b6b9 100644 --- a/arch/powerpc/sysdev/dcr.c +++ b/arch/powerpc/sysdev/dcr.c @@ -230,5 +230,6 @@ EXPORT_SYMBOL_GPL(dcr_unmap_mmio); #ifdef CONFIG_PPC_DCR_NATIVE DEFINE_SPINLOCK(dcr_ind_lock); +EXPORT_SYMBOL_GPL(dcr_ind_lock); #endif /* defined(CONFIG_PPC_DCR_NATIVE) */ diff --git a/arch/powerpc/sysdev/fsl_85xx_l2ctlr.c b/arch/powerpc/sysdev/fsl_85xx_l2ctlr.c index afc2dbf3701..90545ad1626 100644 --- a/arch/powerpc/sysdev/fsl_85xx_l2ctlr.c +++ b/arch/powerpc/sysdev/fsl_85xx_l2ctlr.c @@ -171,7 +171,7 @@ static int mpc85xx_l2ctlr_of_remove(struct platform_device *dev) return 0; } -static struct of_device_id mpc85xx_l2ctlr_of_match[] = { +static const struct of_device_id mpc85xx_l2ctlr_of_match[] = { { .compatible = "fsl,p2020-l2-cache-controller", }, diff --git a/arch/powerpc/sysdev/fsl_msi.c b/arch/powerpc/sysdev/fsl_msi.c index 77efbaec7b9..de40b48b460 100644 --- a/arch/powerpc/sysdev/fsl_msi.c +++ b/arch/powerpc/sysdev/fsl_msi.c @@ -18,6 +18,8 @@ #include <linux/pci.h> #include <linux/slab.h> #include <linux/of_platform.h> +#include <linux/interrupt.h> +#include <linux/seq_file.h> #include <sysdev/fsl_soc.h> #include <asm/prom.h> #include <asm/hw_irq.h> @@ -50,6 +52,7 @@ struct fsl_msi_feature { struct fsl_msi_cascade_data { struct fsl_msi *msi_data; int index; + int virq; }; static inline u32 fsl_msi_read(u32 __iomem *base, unsigned int reg) @@ -65,11 +68,24 @@ static void fsl_msi_end_irq(struct irq_data *d) { } +static void fsl_msi_print_chip(struct irq_data *irqd, struct seq_file *p) +{ + struct fsl_msi *msi_data = irqd->domain->host_data; + irq_hw_number_t hwirq = irqd_to_hwirq(irqd); + int cascade_virq, srs; + + srs = (hwirq >> msi_data->srs_shift) & MSI_SRS_MASK; + cascade_virq = msi_data->cascade_array[srs]->virq; + + seq_printf(p, " fsl-msi-%d", cascade_virq); +} + + static struct irq_chip fsl_msi_chip = { .irq_mask = mask_msi_irq, .irq_unmask = unmask_msi_irq, .irq_ack = fsl_msi_end_irq, - .name = "FSL-MSI", + .irq_print_chip = fsl_msi_print_chip, }; static int fsl_msi_host_map(struct irq_domain *h, unsigned int virq, @@ -109,14 +125,6 @@ static int fsl_msi_init_allocator(struct fsl_msi *msi_data) return 0; } -static int fsl_msi_check_device(struct pci_dev *pdev, int nvec, int type) -{ - if (type == PCI_CAP_ID_MSIX) - pr_debug("fslmsi: MSI-X untested, trying anyway.\n"); - - return 0; -} - static void fsl_teardown_msi_irqs(struct pci_dev *pdev) { struct msi_desc *entry; @@ -173,6 +181,9 @@ static int fsl_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) struct msi_msg msg; struct fsl_msi *msi_data; + if (type == PCI_CAP_ID_MSIX) + pr_debug("fslmsi: MSI-X untested, trying anyway.\n"); + /* * If the PCI node has an fsl,msi property, then we need to use it * to find the specific MSI. @@ -180,7 +191,8 @@ static int fsl_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) np = of_parse_phandle(hose->dn, "fsl,msi", 0); if (np) { if (of_device_is_compatible(np, "fsl,mpic-msi") || - of_device_is_compatible(np, "fsl,vmpic-msi")) + of_device_is_compatible(np, "fsl,vmpic-msi") || + of_device_is_compatible(np, "fsl,vmpic-msi-v4.3")) phandle = np->phandle; else { dev_err(&pdev->dev, @@ -239,40 +251,24 @@ out_free: return rc; } -static void fsl_msi_cascade(unsigned int irq, struct irq_desc *desc) +static irqreturn_t fsl_msi_cascade(int irq, void *data) { - struct irq_chip *chip = irq_desc_get_chip(desc); - struct irq_data *idata = irq_desc_get_irq_data(desc); unsigned int cascade_irq; struct fsl_msi *msi_data; int msir_index = -1; u32 msir_value = 0; u32 intr_index; u32 have_shift = 0; - struct fsl_msi_cascade_data *cascade_data; + struct fsl_msi_cascade_data *cascade_data = data; + irqreturn_t ret = IRQ_NONE; - cascade_data = irq_get_handler_data(irq); msi_data = cascade_data->msi_data; - raw_spin_lock(&desc->lock); - if ((msi_data->feature & FSL_PIC_IP_MASK) == FSL_PIC_IP_IPIC) { - if (chip->irq_mask_ack) - chip->irq_mask_ack(idata); - else { - chip->irq_mask(idata); - chip->irq_ack(idata); - } - } - - if (unlikely(irqd_irq_inprogress(idata))) - goto unlock; - msir_index = cascade_data->index; if (msir_index >= NR_MSI_REG_MAX) cascade_irq = NO_IRQ; - irqd_set_chained_irq_inprogress(idata); switch (msi_data->feature & FSL_PIC_IP_MASK) { case FSL_PIC_IP_MPIC: msir_value = fsl_msi_read(msi_data->msi_regs, @@ -301,40 +297,32 @@ static void fsl_msi_cascade(unsigned int irq, struct irq_desc *desc) cascade_irq = irq_linear_revmap(msi_data->irqhost, msi_hwirq(msi_data, msir_index, intr_index + have_shift)); - if (cascade_irq != NO_IRQ) + if (cascade_irq != NO_IRQ) { generic_handle_irq(cascade_irq); + ret = IRQ_HANDLED; + } have_shift += intr_index + 1; msir_value = msir_value >> (intr_index + 1); } - irqd_clr_chained_irq_inprogress(idata); - switch (msi_data->feature & FSL_PIC_IP_MASK) { - case FSL_PIC_IP_MPIC: - case FSL_PIC_IP_VMPIC: - chip->irq_eoi(idata); - break; - case FSL_PIC_IP_IPIC: - if (!irqd_irq_disabled(idata) && chip->irq_unmask) - chip->irq_unmask(idata); - break; - } -unlock: - raw_spin_unlock(&desc->lock); + return ret; } static int fsl_of_msi_remove(struct platform_device *ofdev) { struct fsl_msi *msi = platform_get_drvdata(ofdev); int virq, i; - struct fsl_msi_cascade_data *cascade_data; if (msi->list.prev != NULL) list_del(&msi->list); for (i = 0; i < NR_MSI_REG_MAX; i++) { - virq = msi->msi_virqs[i]; - if (virq != NO_IRQ) { - cascade_data = irq_get_handler_data(virq); - kfree(cascade_data); + if (msi->cascade_array[i]) { + virq = msi->cascade_array[i]->virq; + + BUG_ON(virq == NO_IRQ); + + free_irq(virq, msi->cascade_array[i]); + kfree(msi->cascade_array[i]); irq_dispose_mapping(virq); } } @@ -353,7 +341,7 @@ static int fsl_msi_setup_hwirq(struct fsl_msi *msi, struct platform_device *dev, int offset, int irq_index) { struct fsl_msi_cascade_data *cascade_data = NULL; - int virt_msir, i; + int virt_msir, i, ret; virt_msir = irq_of_parse_and_map(dev->dev.of_node, irq_index); if (virt_msir == NO_IRQ) { @@ -368,11 +356,18 @@ static int fsl_msi_setup_hwirq(struct fsl_msi *msi, struct platform_device *dev, return -ENOMEM; } irq_set_lockdep_class(virt_msir, &fsl_msi_irq_class); - msi->msi_virqs[irq_index] = virt_msir; cascade_data->index = offset; cascade_data->msi_data = msi; - irq_set_handler_data(virt_msir, cascade_data); - irq_set_chained_handler(virt_msir, fsl_msi_cascade); + cascade_data->virq = virt_msir; + msi->cascade_array[irq_index] = cascade_data; + + ret = request_irq(virt_msir, fsl_msi_cascade, 0, + "fsl-msi-cascade", cascade_data); + if (ret) { + dev_err(&dev->dev, "failed to request_irq(%d), ret = %d\n", + virt_msir, ret); + return ret; + } /* Release the hwirqs corresponding to this MSI register */ for (i = 0; i < IRQS_PER_MSI_REG; i++) @@ -466,7 +461,8 @@ static int fsl_of_msi_probe(struct platform_device *dev) p = of_get_property(dev->dev.of_node, "msi-available-ranges", &len); - if (of_device_is_compatible(dev->dev.of_node, "fsl,mpic-msi-v4.3")) { + if (of_device_is_compatible(dev->dev.of_node, "fsl,mpic-msi-v4.3") || + of_device_is_compatible(dev->dev.of_node, "fsl,vmpic-msi-v4.3")) { msi->srs_shift = MSIIR1_SRS_SHIFT; msi->ibs_shift = MSIIR1_IBS_SHIFT; if (p) @@ -527,7 +523,6 @@ static int fsl_of_msi_probe(struct platform_device *dev) if (!ppc_md.setup_msi_irqs) { ppc_md.setup_msi_irqs = fsl_setup_msi_irqs; ppc_md.teardown_msi_irqs = fsl_teardown_msi_irqs; - ppc_md.msi_check_device = fsl_msi_check_device; } else if (ppc_md.setup_msi_irqs != fsl_setup_msi_irqs) { dev_err(&dev->dev, "Different MSI driver already installed!\n"); err = -ENODEV; @@ -572,6 +567,10 @@ static const struct of_device_id fsl_of_msi_ids[] = { .compatible = "fsl,vmpic-msi", .data = &vmpic_msi_feature, }, + { + .compatible = "fsl,vmpic-msi-v4.3", + .data = &vmpic_msi_feature, + }, #endif {} }; diff --git a/arch/powerpc/sysdev/fsl_msi.h b/arch/powerpc/sysdev/fsl_msi.h index df9aa9fe093..420cfcbdac0 100644 --- a/arch/powerpc/sysdev/fsl_msi.h +++ b/arch/powerpc/sysdev/fsl_msi.h @@ -27,6 +27,8 @@ #define FSL_PIC_IP_IPIC 0x00000002 #define FSL_PIC_IP_VMPIC 0x00000003 +struct fsl_msi_cascade_data; + struct fsl_msi { struct irq_domain *irqhost; @@ -37,7 +39,7 @@ struct fsl_msi { u32 srs_shift; /* Shift of the shared interrupt register select */ void __iomem *msi_regs; u32 feature; - int msi_virqs[NR_MSI_REG_MAX]; + struct fsl_msi_cascade_data *cascade_array[NR_MSI_REG_MAX]; struct msi_bitmap bitmap; diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c index 4bd091a0558..65d2ed4549e 100644 --- a/arch/powerpc/sysdev/fsl_pci.c +++ b/arch/powerpc/sysdev/fsl_pci.c @@ -522,7 +522,8 @@ int fsl_add_bridge(struct platform_device *pdev, int is_primary) } else { /* For PCI read PROG to identify controller mode */ early_read_config_byte(hose, 0, 0, PCI_CLASS_PROG, &progif); - if ((progif & 1) == 1) + if ((progif & 1) && + !of_property_read_bool(dev, "fsl,pci-agent-force-enum")) goto no_bridge; } @@ -853,8 +854,8 @@ u64 fsl_pci_immrbar_base(struct pci_controller *hose) in = pcie->cfg_type0 + PEX_RC_INWIN_BASE; for (i = 0; i < 4; i++) { /* not enabled, skip */ - if (!in_le32(&in[i].ar) & PEX_RCIWARn_EN) - continue; + if (!(in_le32(&in[i].ar) & PEX_RCIWARn_EN)) + continue; if (get_immrbase() == in_le32(&in[i].tar)) return (u64)in_le32(&in[i].barh) << 32 | diff --git a/arch/powerpc/sysdev/micropatch.c b/arch/powerpc/sysdev/micropatch.c index c0bb76ef724..6727dc54d54 100644 --- a/arch/powerpc/sysdev/micropatch.c +++ b/arch/powerpc/sysdev/micropatch.c @@ -13,7 +13,6 @@ #include <linux/mm.h> #include <linux/interrupt.h> #include <asm/irq.h> -#include <asm/mpc8xx.h> #include <asm/page.h> #include <asm/pgtable.h> #include <asm/8xx_immap.h> diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c index be33c9768ea..89cec0ed6a5 100644 --- a/arch/powerpc/sysdev/mpic.c +++ b/arch/powerpc/sysdev/mpic.c @@ -960,7 +960,7 @@ void mpic_set_vector(unsigned int virq, unsigned int vector) mpic_irq_write(src, MPIC_INFO(IRQ_VECTOR_PRI), vecpri); } -void mpic_set_destination(unsigned int virq, unsigned int cpuid) +static void mpic_set_destination(unsigned int virq, unsigned int cpuid) { struct mpic *mpic = mpic_from_irq(virq); unsigned int src = virq_to_hw(virq); diff --git a/arch/powerpc/sysdev/mpic_msgr.c b/arch/powerpc/sysdev/mpic_msgr.c index 2c9b52aa266..7bdf3cc741e 100644 --- a/arch/powerpc/sysdev/mpic_msgr.c +++ b/arch/powerpc/sysdev/mpic_msgr.c @@ -184,7 +184,7 @@ static int mpic_msgr_probe(struct platform_device *dev) dev_info(&dev->dev, "Found %d message registers\n", mpic_msgr_count); - mpic_msgrs = kzalloc(sizeof(struct mpic_msgr) * mpic_msgr_count, + mpic_msgrs = kcalloc(mpic_msgr_count, sizeof(*mpic_msgrs), GFP_KERNEL); if (!mpic_msgrs) { dev_err(&dev->dev, diff --git a/arch/powerpc/sysdev/mpic_pasemi_msi.c b/arch/powerpc/sysdev/mpic_pasemi_msi.c index 38e62382070..15dccd35fa1 100644 --- a/arch/powerpc/sysdev/mpic_pasemi_msi.c +++ b/arch/powerpc/sysdev/mpic_pasemi_msi.c @@ -63,14 +63,6 @@ static struct irq_chip mpic_pasemi_msi_chip = { .name = "PASEMI-MSI", }; -static int pasemi_msi_check_device(struct pci_dev *pdev, int nvec, int type) -{ - if (type == PCI_CAP_ID_MSIX) - pr_debug("pasemi_msi: MSI-X untested, trying anyway\n"); - - return 0; -} - static void pasemi_msi_teardown_msi_irqs(struct pci_dev *pdev) { struct msi_desc *entry; @@ -97,6 +89,8 @@ static int pasemi_msi_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) struct msi_msg msg; int hwirq; + if (type == PCI_CAP_ID_MSIX) + pr_debug("pasemi_msi: MSI-X untested, trying anyway\n"); pr_debug("pasemi_msi_setup_msi_irqs, pdev %p nvec %d type %d\n", pdev, nvec, type); @@ -169,7 +163,6 @@ int mpic_pasemi_msi_init(struct mpic *mpic) WARN_ON(ppc_md.setup_msi_irqs); ppc_md.setup_msi_irqs = pasemi_msi_setup_msi_irqs; ppc_md.teardown_msi_irqs = pasemi_msi_teardown_msi_irqs; - ppc_md.msi_check_device = pasemi_msi_check_device; return 0; } diff --git a/arch/powerpc/sysdev/mpic_u3msi.c b/arch/powerpc/sysdev/mpic_u3msi.c index 9a7aa0ed9c1..623d7fba15b 100644 --- a/arch/powerpc/sysdev/mpic_u3msi.c +++ b/arch/powerpc/sysdev/mpic_u3msi.c @@ -105,22 +105,6 @@ static u64 find_u4_magic_addr(struct pci_dev *pdev, unsigned int hwirq) return 0; } -static int u3msi_msi_check_device(struct pci_dev *pdev, int nvec, int type) -{ - if (type == PCI_CAP_ID_MSIX) - pr_debug("u3msi: MSI-X untested, trying anyway.\n"); - - /* If we can't find a magic address then MSI ain't gonna work */ - if (find_ht_magic_addr(pdev, 0) == 0 && - find_u4_magic_addr(pdev, 0) == 0) { - pr_debug("u3msi: no magic address found for %s\n", - pci_name(pdev)); - return -ENXIO; - } - - return 0; -} - static void u3msi_teardown_msi_irqs(struct pci_dev *pdev) { struct msi_desc *entry; @@ -146,6 +130,17 @@ static int u3msi_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) u64 addr; int hwirq; + if (type == PCI_CAP_ID_MSIX) + pr_debug("u3msi: MSI-X untested, trying anyway.\n"); + + /* If we can't find a magic address then MSI ain't gonna work */ + if (find_ht_magic_addr(pdev, 0) == 0 && + find_u4_magic_addr(pdev, 0) == 0) { + pr_debug("u3msi: no magic address found for %s\n", + pci_name(pdev)); + return -ENXIO; + } + list_for_each_entry(entry, &pdev->msi_list, list) { hwirq = msi_bitmap_alloc_hwirqs(&msi_mpic->msi_bitmap, 1); if (hwirq < 0) { @@ -202,7 +197,6 @@ int mpic_u3msi_init(struct mpic *mpic) WARN_ON(ppc_md.setup_msi_irqs); ppc_md.setup_msi_irqs = u3msi_setup_msi_irqs; ppc_md.teardown_msi_irqs = u3msi_teardown_msi_irqs; - ppc_md.msi_check_device = u3msi_msi_check_device; return 0; } diff --git a/arch/powerpc/sysdev/msi_bitmap.c b/arch/powerpc/sysdev/msi_bitmap.c index 2ff630267e9..0c75214b6f9 100644 --- a/arch/powerpc/sysdev/msi_bitmap.c +++ b/arch/powerpc/sysdev/msi_bitmap.c @@ -20,32 +20,37 @@ int msi_bitmap_alloc_hwirqs(struct msi_bitmap *bmp, int num) int offset, order = get_count_order(num); spin_lock_irqsave(&bmp->lock, flags); - /* - * This is fast, but stricter than we need. We might want to add - * a fallback routine which does a linear search with no alignment. - */ - offset = bitmap_find_free_region(bmp->bitmap, bmp->irq_count, order); + + offset = bitmap_find_next_zero_area(bmp->bitmap, bmp->irq_count, 0, + num, (1 << order) - 1); + if (offset > bmp->irq_count) + goto err; + + bitmap_set(bmp->bitmap, offset, num); spin_unlock_irqrestore(&bmp->lock, flags); - pr_debug("msi_bitmap: allocated 0x%x (2^%d) at offset 0x%x\n", - num, order, offset); + pr_debug("msi_bitmap: allocated 0x%x at offset 0x%x\n", num, offset); return offset; +err: + spin_unlock_irqrestore(&bmp->lock, flags); + return -ENOMEM; } +EXPORT_SYMBOL(msi_bitmap_alloc_hwirqs); void msi_bitmap_free_hwirqs(struct msi_bitmap *bmp, unsigned int offset, unsigned int num) { unsigned long flags; - int order = get_count_order(num); - pr_debug("msi_bitmap: freeing 0x%x (2^%d) at offset 0x%x\n", - num, order, offset); + pr_debug("msi_bitmap: freeing 0x%x at offset 0x%x\n", + num, offset); spin_lock_irqsave(&bmp->lock, flags); - bitmap_release_region(bmp->bitmap, offset, order); + bitmap_clear(bmp->bitmap, offset, num); spin_unlock_irqrestore(&bmp->lock, flags); } +EXPORT_SYMBOL(msi_bitmap_free_hwirqs); void msi_bitmap_reserve_hwirq(struct msi_bitmap *bmp, unsigned int hwirq) { @@ -143,7 +148,7 @@ void msi_bitmap_free(struct msi_bitmap *bmp) #define check(x) \ if (!(x)) printk("msi_bitmap: test failed at line %d\n", __LINE__); -void __init test_basics(void) +static void __init test_basics(void) { struct msi_bitmap bmp; int i, size = 512; @@ -180,6 +185,15 @@ void __init test_basics(void) msi_bitmap_free_hwirqs(&bmp, size / 2, 1); check(msi_bitmap_alloc_hwirqs(&bmp, 1) == size / 2); + /* Check we get a naturally aligned offset */ + check(msi_bitmap_alloc_hwirqs(&bmp, 2) % 2 == 0); + check(msi_bitmap_alloc_hwirqs(&bmp, 4) % 4 == 0); + check(msi_bitmap_alloc_hwirqs(&bmp, 8) % 8 == 0); + check(msi_bitmap_alloc_hwirqs(&bmp, 9) % 16 == 0); + check(msi_bitmap_alloc_hwirqs(&bmp, 3) % 4 == 0); + check(msi_bitmap_alloc_hwirqs(&bmp, 7) % 8 == 0); + check(msi_bitmap_alloc_hwirqs(&bmp, 121) % 128 == 0); + msi_bitmap_free(&bmp); /* Clients may check bitmap == NULL for "not-allocated" */ @@ -188,7 +202,7 @@ void __init test_basics(void) kfree(bmp.bitmap); } -void __init test_of_node(void) +static void __init test_of_node(void) { u32 prop_data[] = { 10, 10, 25, 3, 40, 1, 100, 100, 200, 20 }; const char *expected_str = "0-9,20-24,28-39,41-99,220-255"; @@ -236,7 +250,7 @@ void __init test_of_node(void) kfree(bmp.bitmap); } -int __init msi_bitmap_selftest(void) +static int __init msi_bitmap_selftest(void) { printk(KERN_DEBUG "Running MSI bitmap self-tests ...\n"); diff --git a/arch/powerpc/sysdev/mv64x60_dev.c b/arch/powerpc/sysdev/mv64x60_dev.c index c2dba7db71a..026bbc3b2c4 100644 --- a/arch/powerpc/sysdev/mv64x60_dev.c +++ b/arch/powerpc/sysdev/mv64x60_dev.c @@ -23,7 +23,7 @@ /* These functions provide the necessary setup for the mv64x60 drivers. */ -static struct of_device_id __initdata of_mv64x60_devices[] = { +static const struct of_device_id of_mv64x60_devices[] __initconst = { { .compatible = "marvell,mv64306-devctrl", }, {} }; diff --git a/arch/powerpc/sysdev/pmi.c b/arch/powerpc/sysdev/pmi.c index 5aaf86c0389..13e67d93a7c 100644 --- a/arch/powerpc/sysdev/pmi.c +++ b/arch/powerpc/sysdev/pmi.c @@ -101,7 +101,7 @@ out: } -static struct of_device_id pmi_match[] = { +static const struct of_device_id pmi_match[] = { { .type = "ibm,pmi", .name = "ibm,pmi" }, { .type = "ibm,pmi" }, {}, diff --git a/arch/powerpc/sysdev/ppc4xx_hsta_msi.c b/arch/powerpc/sysdev/ppc4xx_hsta_msi.c index 11c888416f0..a6a4dbda907 100644 --- a/arch/powerpc/sysdev/ppc4xx_hsta_msi.c +++ b/arch/powerpc/sysdev/ppc4xx_hsta_msi.c @@ -44,6 +44,12 @@ static int hsta_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) int irq, hwirq; u64 addr; + /* We don't support MSI-X */ + if (type == PCI_CAP_ID_MSIX) { + pr_debug("%s: MSI-X not supported.\n", __func__); + return -EINVAL; + } + list_for_each_entry(entry, &dev->msi_list, list) { irq = msi_bitmap_alloc_hwirqs(&ppc4xx_hsta_msi.bmp, 1); if (irq < 0) { @@ -117,17 +123,6 @@ static void hsta_teardown_msi_irqs(struct pci_dev *dev) } } -static int hsta_msi_check_device(struct pci_dev *pdev, int nvec, int type) -{ - /* We don't support MSI-X */ - if (type == PCI_CAP_ID_MSIX) { - pr_debug("%s: MSI-X not supported.\n", __func__); - return -EINVAL; - } - - return 0; -} - static int hsta_msi_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; @@ -178,7 +173,6 @@ static int hsta_msi_probe(struct platform_device *pdev) ppc_md.setup_msi_irqs = hsta_setup_msi_irqs; ppc_md.teardown_msi_irqs = hsta_teardown_msi_irqs; - ppc_md.msi_check_device = hsta_msi_check_device; return 0; out2: diff --git a/arch/powerpc/sysdev/ppc4xx_msi.c b/arch/powerpc/sysdev/ppc4xx_msi.c index 43948da837a..22b5200636e 100644 --- a/arch/powerpc/sysdev/ppc4xx_msi.c +++ b/arch/powerpc/sysdev/ppc4xx_msi.c @@ -85,8 +85,12 @@ static int ppc4xx_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) struct msi_desc *entry; struct ppc4xx_msi *msi_data = &ppc4xx_msi; - msi_data->msi_virqs = kmalloc((msi_irqs) * sizeof(int), - GFP_KERNEL); + dev_dbg(&dev->dev, "PCIE-MSI:%s called. vec %x type %d\n", + __func__, nvec, type); + if (type == PCI_CAP_ID_MSIX) + pr_debug("ppc4xx msi: MSI-X untested, trying anyway.\n"); + + msi_data->msi_virqs = kmalloc((msi_irqs) * sizeof(int), GFP_KERNEL); if (!msi_data->msi_virqs) return -ENOMEM; @@ -134,16 +138,6 @@ void ppc4xx_teardown_msi_irqs(struct pci_dev *dev) } } -static int ppc4xx_msi_check_device(struct pci_dev *pdev, int nvec, int type) -{ - dev_dbg(&pdev->dev, "PCIE-MSI:%s called. vec %x type %d\n", - __func__, nvec, type); - if (type == PCI_CAP_ID_MSIX) - pr_debug("ppc4xx msi: MSI-X untested, trying anyway.\n"); - - return 0; -} - static int ppc4xx_setup_pcieh_hw(struct platform_device *dev, struct resource res, struct ppc4xx_msi *msi) { @@ -259,7 +253,6 @@ static int ppc4xx_msi_probe(struct platform_device *dev) ppc_md.setup_msi_irqs = ppc4xx_setup_msi_irqs; ppc_md.teardown_msi_irqs = ppc4xx_teardown_msi_irqs; - ppc_md.msi_check_device = ppc4xx_msi_check_device; return err; error_out: diff --git a/arch/powerpc/sysdev/xics/icp-native.c b/arch/powerpc/sysdev/xics/icp-native.c index de8d9483bbe..2fc4cf1b755 100644 --- a/arch/powerpc/sysdev/xics/icp-native.c +++ b/arch/powerpc/sysdev/xics/icp-native.c @@ -155,6 +155,31 @@ static void icp_native_cause_ipi(int cpu, unsigned long data) icp_native_set_qirr(cpu, IPI_PRIORITY); } +/* + * Called when an interrupt is received on an off-line CPU to + * clear the interrupt, so that the CPU can go back to nap mode. + */ +void icp_native_flush_interrupt(void) +{ + unsigned int xirr = icp_native_get_xirr(); + unsigned int vec = xirr & 0x00ffffff; + + if (vec == XICS_IRQ_SPURIOUS) + return; + if (vec == XICS_IPI) { + /* Clear pending IPI */ + int cpu = smp_processor_id(); + kvmppc_set_host_ipi(cpu, 0); + icp_native_set_qirr(cpu, 0xff); + } else { + pr_err("XICS: hw interrupt 0x%x to offline cpu, disabling\n", + vec); + xics_mask_unknown_vec(vec); + } + /* EOI the interrupt */ + icp_native_set_xirr(xirr); +} + void xics_wake_cpu(int cpu) { icp_native_set_qirr(cpu, IPI_PRIORITY); diff --git a/arch/powerpc/sysdev/xilinx_intc.c b/arch/powerpc/sysdev/xilinx_intc.c index 83f943a8e0d..56f0524e47a 100644 --- a/arch/powerpc/sysdev/xilinx_intc.c +++ b/arch/powerpc/sysdev/xilinx_intc.c @@ -265,7 +265,7 @@ static void __init xilinx_i8259_setup_cascade(void) static inline void xilinx_i8259_setup_cascade(void) { return; } #endif /* defined(CONFIG_PPC_I8259) */ -static struct of_device_id xilinx_intc_match[] __initconst = { +static const struct of_device_id xilinx_intc_match[] __initconst = { { .compatible = "xlnx,opb-intc-1.00.c", }, { .compatible = "xlnx,xps-intc-1.00.a", }, {} diff --git a/arch/powerpc/sysdev/xilinx_pci.c b/arch/powerpc/sysdev/xilinx_pci.c index 1453b0eed22..fea5667699e 100644 --- a/arch/powerpc/sysdev/xilinx_pci.c +++ b/arch/powerpc/sysdev/xilinx_pci.c @@ -27,7 +27,7 @@ #define PCI_HOST_ENABLE_CMD PCI_COMMAND_SERR | PCI_COMMAND_PARITY | PCI_COMMAND_MASTER | PCI_COMMAND_MEMORY -static struct of_device_id xilinx_pci_match[] = { +static const struct of_device_id xilinx_pci_match[] = { { .compatible = "xlnx,plbv46-pci-1.03.a", }, {} }; diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index d199bfa2f1f..b988b5addf8 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -24,6 +24,7 @@ #include <linux/interrupt.h> #include <linux/irq.h> #include <linux/bug.h> +#include <linux/nmi.h> #include <asm/ptrace.h> #include <asm/string.h> @@ -374,6 +375,7 @@ static int xmon_core(struct pt_regs *regs, int fromipi) #endif local_irq_save(flags); + hard_irq_disable(); bp = in_breakpoint_table(regs->nip, &offset); if (bp != NULL) { @@ -558,6 +560,7 @@ static int xmon_core(struct pt_regs *regs, int fromipi) #endif insert_cpu_bpts(); + touch_nmi_watchdog(); local_irq_restore(flags); return cmd != 'X' && cmd != EOF; @@ -2058,10 +2061,6 @@ static void dump_one_paca(int cpu) DUMP(p, kernel_toc, "lx"); DUMP(p, kernelbase, "lx"); DUMP(p, kernel_msr, "lx"); -#ifdef CONFIG_PPC_STD_MMU_64 - DUMP(p, stab_real, "lx"); - DUMP(p, stab_addr, "lx"); -#endif DUMP(p, emergency_sp, "p"); #ifdef CONFIG_PPC_BOOK3S_64 DUMP(p, mc_emergency_sp, "p"); @@ -2694,7 +2693,7 @@ static void xmon_print_symbol(unsigned long address, const char *mid, } #ifdef CONFIG_PPC_BOOK3S_64 -static void dump_slb(void) +void dump_segments(void) { int i; unsigned long esid,vsid,valid; @@ -2726,34 +2725,6 @@ static void dump_slb(void) } } } - -static void dump_stab(void) -{ - int i; - unsigned long *tmp = (unsigned long *)local_paca->stab_addr; - - printf("Segment table contents of cpu 0x%x\n", smp_processor_id()); - - for (i = 0; i < PAGE_SIZE/16; i++) { - unsigned long a, b; - - a = *tmp++; - b = *tmp++; - - if (a || b) { - printf("%03d %016lx ", i, a); - printf("%016lx\n", b); - } - } -} - -void dump_segments(void) -{ - if (mmu_has_feature(MMU_FTR_SLB)) - dump_slb(); - else - dump_stab(); -} #endif #ifdef CONFIG_PPC_STD_MMU_32 |