diff options
author | David Woodhouse <David.Woodhouse@intel.com> | 2010-10-30 12:35:11 +0100 |
---|---|---|
committer | David Woodhouse <David.Woodhouse@intel.com> | 2010-10-30 12:35:11 +0100 |
commit | 67577927e8d7a1f4b09b4992df640eadc6aacb36 (patch) | |
tree | 2e9efe6b5745965faf0dcc084d4613d9356263f9 /arch/powerpc | |
parent | 6fe4c590313133ebd5dadb769031489ff178ece1 (diff) | |
parent | 51f00a471ce8f359627dd99aeac322947a0e491b (diff) |
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6.git
Conflicts:
drivers/mtd/mtd_blkdevs.c
Merge Grant's device-tree bits so that we can apply the subsequent fixes.
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
Diffstat (limited to 'arch/powerpc')
191 files changed, 6329 insertions, 1711 deletions
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 44df1bac970..29158913a66 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -138,6 +138,7 @@ config PPC select HAVE_OPROFILE select HAVE_SYSCALL_WRAPPERS if PPC64 select GENERIC_ATOMIC64 if PPC32 + select HAVE_IRQ_WORK select HAVE_PERF_EVENTS select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_HW_BREAKPOINT if PERF_EVENTS && PPC_BOOK3S_64 diff --git a/arch/powerpc/boot/addnote.c b/arch/powerpc/boot/addnote.c index b1e5611b2ab..349b5530d2c 100644 --- a/arch/powerpc/boot/addnote.c +++ b/arch/powerpc/boot/addnote.c @@ -20,7 +20,7 @@ #include <string.h> /* CHRP note section */ -char arch[] = "PowerPC"; +static const char arch[] = "PowerPC"; #define N_DESCR 6 unsigned int descr[N_DESCR] = { @@ -33,7 +33,7 @@ unsigned int descr[N_DESCR] = { }; /* RPA note section */ -char rpaname[] = "IBM,RPA-Client-Config"; +static const char rpaname[] = "IBM,RPA-Client-Config"; /* * Note: setting ignore_my_client_config *should* mean that OF ignores diff --git a/arch/powerpc/boot/dts/bluestone.dts b/arch/powerpc/boot/dts/bluestone.dts new file mode 100644 index 00000000000..9bb3d72c0e5 --- /dev/null +++ b/arch/powerpc/boot/dts/bluestone.dts @@ -0,0 +1,254 @@ +/* + * Device Tree for Bluestone (APM821xx) board. + * + * Copyright (c) 2010, Applied Micro Circuits Corporation + * Author: Tirumala R Marri <tmarri@apm.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, + * MA 02111-1307 USA + * + */ + +/dts-v1/; + +/ { + #address-cells = <2>; + #size-cells = <1>; + model = "apm,bluestone"; + compatible = "apm,bluestone"; + dcr-parent = <&{/cpus/cpu@0}>; + + aliases { + ethernet0 = &EMAC0; + serial0 = &UART0; + serial1 = &UART1; + }; + + cpus { + #address-cells = <1>; + #size-cells = <0>; + + cpu@0 { + device_type = "cpu"; + model = "PowerPC,apm821xx"; + reg = <0x00000000>; + clock-frequency = <0>; /* Filled in by U-Boot */ + timebase-frequency = <0>; /* Filled in by U-Boot */ + i-cache-line-size = <32>; + d-cache-line-size = <32>; + i-cache-size = <32768>; + d-cache-size = <32768>; + dcr-controller; + dcr-access-method = "native"; + next-level-cache = <&L2C0>; + }; + }; + + memory { + device_type = "memory"; + reg = <0x00000000 0x00000000 0x00000000>; /* Filled in by U-Boot */ + }; + + UIC0: interrupt-controller0 { + compatible = "ibm,uic"; + interrupt-controller; + cell-index = <0>; + dcr-reg = <0x0c0 0x009>; + #address-cells = <0>; + #size-cells = <0>; + #interrupt-cells = <2>; + }; + + UIC1: interrupt-controller1 { + compatible = "ibm,uic"; + interrupt-controller; + cell-index = <1>; + dcr-reg = <0x0d0 0x009>; + #address-cells = <0>; + #size-cells = <0>; + #interrupt-cells = <2>; + interrupts = <0x1e 0x4 0x1f 0x4>; /* cascade */ + interrupt-parent = <&UIC0>; + }; + + UIC2: interrupt-controller2 { + compatible = "ibm,uic"; + interrupt-controller; + cell-index = <2>; + dcr-reg = <0x0e0 0x009>; + #address-cells = <0>; + #size-cells = <0>; + #interrupt-cells = <2>; + interrupts = <0xa 0x4 0xb 0x4>; /* cascade */ + interrupt-parent = <&UIC0>; + }; + + UIC3: interrupt-controller3 { + compatible = "ibm,uic"; + interrupt-controller; + cell-index = <3>; + dcr-reg = <0x0f0 0x009>; + #address-cells = <0>; + #size-cells = <0>; + #interrupt-cells = <2>; + interrupts = <0x10 0x4 0x11 0x4>; /* cascade */ + interrupt-parent = <&UIC0>; + }; + + SDR0: sdr { + compatible = "ibm,sdr-apm821xx"; + dcr-reg = <0x00e 0x002>; + }; + + CPR0: cpr { + compatible = "ibm,cpr-apm821xx"; + dcr-reg = <0x00c 0x002>; + }; + + plb { + compatible = "ibm,plb4"; + #address-cells = <2>; + #size-cells = <1>; + ranges; + clock-frequency = <0>; /* Filled in by U-Boot */ + + SDRAM0: sdram { + compatible = "ibm,sdram-apm821xx"; + dcr-reg = <0x010 0x002>; + }; + + MAL0: mcmal { + compatible = "ibm,mcmal2"; + descriptor-memory = "ocm"; + dcr-reg = <0x180 0x062>; + num-tx-chans = <1>; + num-rx-chans = <1>; + #address-cells = <0>; + #size-cells = <0>; + interrupt-parent = <&UIC2>; + interrupts = < /*TXEOB*/ 0x6 0x4 + /*RXEOB*/ 0x7 0x4 + /*SERR*/ 0x3 0x4 + /*TXDE*/ 0x4 0x4 + /*RXDE*/ 0x5 0x4 + }; + + POB0: opb { + compatible = "ibm,opb"; + #address-cells = <1>; + #size-cells = <1>; + ranges = <0xb0000000 0x00000004 0xb0000000 0x50000000>; + clock-frequency = <0>; /* Filled in by U-Boot */ + + EBC0: ebc { + compatible = "ibm,ebc"; + dcr-reg = <0x012 0x002>; + #address-cells = <2>; + #size-cells = <1>; + clock-frequency = <0>; /* Filled in by U-Boot */ + /* ranges property is supplied by U-Boot */ + ranges = < 0x00000003 0x00000000 0xe0000000 0x8000000>; + interrupts = <0x6 0x4>; + interrupt-parent = <&UIC1>; + + nor_flash@0,0 { + compatible = "amd,s29gl512n", "cfi-flash"; + bank-width = <2>; + reg = <0x00000000 0x00000000 0x00400000>; + #address-cells = <1>; + #size-cells = <1>; + partition@0 { + label = "kernel"; + reg = <0x00000000 0x00180000>; + }; + partition@180000 { + label = "env"; + reg = <0x00180000 0x00020000>; + }; + partition@1a0000 { + label = "u-boot"; + reg = <0x001a0000 0x00060000>; + }; + }; + } + + UART0: serial@ef600300 { + device_type = "serial"; + compatible = "ns16550"; + reg = <0xef600300 0x00000008>; + virtual-reg = <0xef600300>; + clock-frequency = <0>; /* Filled in by U-Boot */ + current-speed = <0>; /* Filled in by U-Boot */ + interrupt-parent = <&UIC1>; + interrupts = <0x1 0x4>; + }; + + IIC0: i2c@ef600700 { + compatible = "ibm,iic"; + reg = <0xef600700 0x00000014>; + interrupt-parent = <&UIC0>; + interrupts = <0x2 0x4>; + }; + + IIC1: i2c@ef600800 { + compatible = "ibm,iic"; + reg = <0xef600800 0x00000014>; + interrupt-parent = <&UIC0>; + interrupts = <0x3 0x4>; + }; + + RGMII0: emac-rgmii@ef601500 { + compatible = "ibm,rgmii"; + reg = <0xef601500 0x00000008>; + has-mdio; + }; + + TAH0: emac-tah@ef601350 { + compatible = "ibm,tah"; + reg = <0xef601350 0x00000030>; + }; + + EMAC0: ethernet@ef600c00 { + device_type = "network"; + compatible = "ibm,emac4sync"; + interrupt-parent = <&EMAC0>; + interrupts = <0x0 0x1>; + #interrupt-cells = <1>; + #address-cells = <0>; + #size-cells = <0>; + interrupt-map = </*Status*/ 0x0 &UIC2 0x10 0x4 + /*Wake*/ 0x1 &UIC2 0x14 0x4>; + reg = <0xef600c00 0x000000c4>; + local-mac-address = [000000000000]; /* Filled in by U-Boot */ + mal-device = <&MAL0>; + mal-tx-channel = <0>; + mal-rx-channel = <0>; + cell-index = <0>; + max-frame-size = <9000>; + rx-fifo-size = <16384>; + tx-fifo-size = <2048>; + phy-mode = "rgmii"; + phy-map = <0x00000000>; + rgmii-device = <&RGMII0>; + rgmii-channel = <0>; + tah-device = <&TAH0>; + tah-channel = <0>; + has-inverted-stacr-oc; + has-new-stacr-staopc; + }; + }; + + }; +}; diff --git a/arch/powerpc/boot/dts/mpc8308_p1m.dts b/arch/powerpc/boot/dts/mpc8308_p1m.dts new file mode 100644 index 00000000000..05a76ccfd49 --- /dev/null +++ b/arch/powerpc/boot/dts/mpc8308_p1m.dts @@ -0,0 +1,332 @@ +/* + * mpc8308_p1m Device Tree Source + * + * Copyright 2010 Ilya Yanok, Emcraft Systems, yanok@emcraft.com + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +/dts-v1/; + +/ { + compatible = "denx,mpc8308_p1m"; + #address-cells = <1>; + #size-cells = <1>; + + aliases { + ethernet0 = &enet0; + ethernet1 = &enet1; + serial0 = &serial0; + serial1 = &serial1; + pci0 = &pci0; + }; + + cpus { + #address-cells = <1>; + #size-cells = <0>; + + PowerPC,8308@0 { + device_type = "cpu"; + reg = <0x0>; + d-cache-line-size = <32>; + i-cache-line-size = <32>; + d-cache-size = <16384>; + i-cache-size = <16384>; + timebase-frequency = <0>; // from bootloader + bus-frequency = <0>; // from bootloader + clock-frequency = <0>; // from bootloader + }; + }; + + memory { + device_type = "memory"; + reg = <0x00000000 0x08000000>; // 128MB at 0 + }; + + localbus@e0005000 { + #address-cells = <2>; + #size-cells = <1>; + compatible = "fsl,mpc8315-elbc", "fsl,elbc", "simple-bus"; + reg = <0xe0005000 0x1000>; + interrupts = <77 0x8>; + interrupt-parent = <&ipic>; + + ranges = <0x0 0x0 0xfc000000 0x04000000 + 0x1 0x0 0xfbff0000 0x00008000 + 0x2 0x0 0xfbff8000 0x00008000>; + + flash@0,0 { + #address-cells = <1>; + #size-cells = <1>; + compatible = "cfi-flash"; + reg = <0x0 0x0 0x4000000>; + bank-width = <2>; + device-width = <1>; + + u-boot@0 { + reg = <0x0 0x60000>; + read-only; + }; + env@60000 { + reg = <0x60000 0x20000>; + }; + env1@80000 { + reg = <0x80000 0x20000>; + }; + kernel@a0000 { + reg = <0xa0000 0x200000>; + }; + dtb@2a0000 { + reg = <0x2a0000 0x20000>; + }; + ramdisk@2c0000 { + reg = <0x2c0000 0x640000>; + }; + user@700000 { + reg = <0x700000 0x3900000>; + }; + }; + + can@1,0 { + compatible = "nxp,sja1000"; + reg = <0x1 0x0 0x80>; + interrupts = <18 0x8>; + interrups-parent = <&ipic>; + }; + + cpld@2,0 { + compatible = "denx,mpc8308_p1m-cpld"; + reg = <0x2 0x0 0x8>; + interrupts = <48 0x8>; + interrups-parent = <&ipic>; + }; + }; + + immr@e0000000 { + #address-cells = <1>; + #size-cells = <1>; + device_type = "soc"; + compatible = "fsl,mpc8308-immr", "simple-bus"; + ranges = <0 0xe0000000 0x00100000>; + reg = <0xe0000000 0x00000200>; + bus-frequency = <0>; + + i2c@3000 { + #address-cells = <1>; + #size-cells = <0>; + compatible = "fsl-i2c"; + reg = <0x3000 0x100>; + interrupts = <14 0x8>; + interrupt-parent = <&ipic>; + dfsrr; + fram@50 { + compatible = "ramtron,24c64"; + reg = <0x50>; + }; + }; + + i2c@3100 { + #address-cells = <1>; + #size-cells = <0>; + compatible = "fsl-i2c"; + reg = <0x3100 0x100>; + interrupts = <15 0x8>; + interrupt-parent = <&ipic>; + dfsrr; + pwm@28 { + compatible = "maxim,ds1050"; + reg = <0x28>; + }; + sensor@48 { + compatible = "maxim,max6625"; + reg = <0x48>; + }; + sensor@49 { + compatible = "maxim,max6625"; + reg = <0x49>; + }; + sensor@4b { + compatible = "maxim,max6625"; + reg = <0x4b>; + }; + }; + + usb@23000 { + compatible = "fsl-usb2-dr"; + reg = <0x23000 0x1000>; + #address-cells = <1>; + #size-cells = <0>; + interrupt-parent = <&ipic>; + interrupts = <38 0x8>; + dr_mode = "peripheral"; + phy_type = "ulpi"; + }; + + enet0: ethernet@24000 { + #address-cells = <1>; + #size-cells = <1>; + ranges = <0x0 0x24000 0x1000>; + + cell-index = <0>; + device_type = "network"; + model = "eTSEC"; + compatible = "gianfar"; + reg = <0x24000 0x1000>; + local-mac-address = [ 00 00 00 00 00 00 ]; + interrupts = <32 0x8 33 0x8 34 0x8>; + interrupt-parent = <&ipic>; + phy-handle = < &phy1 >; + + mdio@520 { + #address-cells = <1>; + #size-cells = <0>; + compatible = "fsl,gianfar-mdio"; + reg = <0x520 0x20>; + phy1: ethernet-phy@1 { + interrupt-parent = <&ipic>; + interrupts = <17 0x8>; + reg = <0x1>; + device_type = "ethernet-phy"; + }; + phy2: ethernet-phy@2 { + interrupt-parent = <&ipic>; + interrupts = <19 0x8>; + reg = <0x2>; + device_type = "ethernet-phy"; + }; + tbi0: tbi-phy@11 { + reg = <0x11>; + device_type = "tbi-phy"; + }; + }; + }; + + enet1: ethernet@25000 { + #address-cells = <1>; + #size-cells = <1>; + cell-index = <1>; + device_type = "network"; + model = "eTSEC"; + compatible = "gianfar"; + reg = <0x25000 0x1000>; + ranges = <0x0 0x25000 0x1000>; + local-mac-address = [ 00 00 00 00 00 00 ]; + interrupts = <35 0x8 36 0x8 37 0x8>; + interrupt-parent = <&ipic>; + phy-handle = < &phy2 >; + + mdio@520 { + #address-cells = <1>; + #size-cells = <0>; + compatible = "fsl,gianfar-tbi"; + reg = <0x520 0x20>; + tbi1: tbi-phy@11 { + reg = <0x11>; + device_type = "tbi-phy"; + }; + }; + }; + + serial0: serial@4500 { + cell-index = <0>; + device_type = "serial"; + compatible = "ns16550"; + reg = <0x4500 0x100>; + clock-frequency = <133333333>; + interrupts = <9 0x8>; + interrupt-parent = <&ipic>; + }; + + serial1: serial@4600 { + cell-index = <1>; + device_type = "serial"; + compatible = "ns16550"; + reg = <0x4600 0x100>; + clock-frequency = <133333333>; + interrupts = <10 0x8>; + interrupt-parent = <&ipic>; + }; + + gpio@c00 { + #gpio-cells = <2>; + compatible = "fsl,mpc8308-gpio", "fsl,mpc8349-gpio"; + reg = <0xc00 0x18>; + interrupts = <74 0x8>; + interrupt-parent = <&ipic>; + gpio-controller; + }; + + timer@500 { + compatible = "fsl,mpc8308-gtm", "fsl,gtm"; + reg = <0x500 0x100>; + interrupts = <90 8 78 8 84 8 72 8>; + interrupt-parent = <&ipic>; + clock-frequency = <133333333>; + }; + + /* IPIC + * interrupts cell = <intr #, sense> + * sense values match linux IORESOURCE_IRQ_* defines: + * sense == 8: Level, low assertion + * sense == 2: Edge, high-to-low change + */ + ipic: interrupt-controller@700 { + compatible = "fsl,ipic"; + interrupt-controller; + #address-cells = <0>; + #interrupt-cells = <2>; + reg = <0x700 0x100>; + device_type = "ipic"; + }; + + ipic-msi@7c0 { + compatible = "fsl,ipic-msi"; + reg = <0x7c0 0x40>; + msi-available-ranges = <0x0 0x100>; + interrupts = < 0x43 0x8 + 0x4 0x8 + 0x51 0x8 + 0x52 0x8 + 0x56 0x8 + 0x57 0x8 + 0x58 0x8 + 0x59 0x8 >; + interrupt-parent = < &ipic >; + }; + + }; + + pci0: pcie@e0009000 { + #address-cells = <3>; + #size-cells = <2>; + #interrupt-cells = <1>; + device_type = "pci"; + compatible = "fsl,mpc8308-pcie", "fsl,mpc8314-pcie"; + reg = <0xe0009000 0x00001000 + 0xb0000000 0x01000000>; + ranges = <0x02000000 0 0xa0000000 0xa0000000 0 0x10000000 + 0x01000000 0 0x00000000 0xb1000000 0 0x00800000>; + bus-range = <0 0>; + interrupt-map-mask = <0 0 0 0>; + interrupt-map = <0 0 0 0 &ipic 1 8>; + interrupts = <0x1 0x8>; + interrupt-parent = <&ipic>; + clock-frequency = <0>; + + pcie@0 { + #address-cells = <3>; + #size-cells = <2>; + device_type = "pci"; + reg = <0 0 0 0 0>; + ranges = <0x02000000 0 0xa0000000 + 0x02000000 0 0xa0000000 + 0 0x10000000 + 0x01000000 0 0x00000000 + 0x01000000 0 0x00000000 + 0 0x00800000>; + }; + }; +}; diff --git a/arch/powerpc/boot/dts/mpc8536ds.dts b/arch/powerpc/boot/dts/mpc8536ds.dts index 815cebb2e3e..a75c10eed26 100644 --- a/arch/powerpc/boot/dts/mpc8536ds.dts +++ b/arch/powerpc/boot/dts/mpc8536ds.dts @@ -108,6 +108,58 @@ }; }; + spi@7000 { + #address-cells = <1>; + #size-cells = <0>; + compatible = "fsl,mpc8536-espi"; + reg = <0x7000 0x1000>; + interrupts = <59 0x2>; + interrupt-parent = <&mpic>; + fsl,espi-num-chipselects = <4>; + + flash@0 { + #address-cells = <1>; + #size-cells = <1>; + compatible = "spansion,s25sl12801"; + reg = <0>; + spi-max-frequency = <40000000>; + partition@u-boot { + label = "u-boot"; + reg = <0x00000000 0x00100000>; + read-only; + }; + partition@kernel { + label = "kernel"; + reg = <0x00100000 0x00500000>; + read-only; + }; + partition@dtb { + label = "dtb"; + reg = <0x00600000 0x00100000>; + read-only; + }; + partition@fs { + label = "file system"; + reg = <0x00700000 0x00900000>; + }; + }; + flash@1 { + compatible = "spansion,s25sl12801"; + reg = <1>; + spi-max-frequency = <40000000>; + }; + flash@2 { + compatible = "spansion,s25sl12801"; + reg = <2>; + spi-max-frequency = <40000000>; + }; + flash@3 { + compatible = "spansion,s25sl12801"; + reg = <3>; + spi-max-frequency = <40000000>; + }; + }; + dma@21300 { #address-cells = <1>; #size-cells = <1>; diff --git a/arch/powerpc/boot/dts/p1022ds.dts b/arch/powerpc/boot/dts/p1022ds.dts index 8bcb10b9267..2bbecbb4cbf 100644 --- a/arch/powerpc/boot/dts/p1022ds.dts +++ b/arch/powerpc/boot/dts/p1022ds.dts @@ -148,6 +148,17 @@ label = "reserved-nand"; }; }; + + board-control@3,0 { + compatible = "fsl,p1022ds-pixis"; + reg = <3 0 0x30>; + interrupt-parent = <&mpic>; + /* + * IRQ8 is generated if the "EVENT" switch is pressed + * and PX_CTL[EVESEL] is set to 00. + */ + interrupts = <8 8>; + }; }; soc@fffe00000 { diff --git a/arch/powerpc/boot/dts/p4080ds.dts b/arch/powerpc/boot/dts/p4080ds.dts index 2f0de24e382..5b7fc29dd6c 100644 --- a/arch/powerpc/boot/dts/p4080ds.dts +++ b/arch/powerpc/boot/dts/p4080ds.dts @@ -236,22 +236,19 @@ }; spi@110000 { - cell-index = <0>; #address-cells = <1>; #size-cells = <0>; - compatible = "fsl,espi"; + compatible = "fsl,p4080-espi", "fsl,mpc8536-espi"; reg = <0x110000 0x1000>; interrupts = <53 0x2>; interrupt-parent = <&mpic>; - espi,num-ss-bits = <4>; - mode = "cpu"; + fsl,espi-num-chipselects = <4>; - fsl_m25p80@0 { + flash@0 { #address-cells = <1>; #size-cells = <1>; - compatible = "fsl,espi-flash"; + compatible = "spansion,s25sl12801"; reg = <0>; - linux,modalias = "fsl_m25p80"; spi-max-frequency = <40000000>; /* input clock */ partition@u-boot { label = "u-boot"; diff --git a/arch/powerpc/configs/44x/bluestone_defconfig b/arch/powerpc/configs/44x/bluestone_defconfig new file mode 100644 index 00000000000..ac65b48b8cc --- /dev/null +++ b/arch/powerpc/configs/44x/bluestone_defconfig @@ -0,0 +1,68 @@ +CONFIG_44x=y +CONFIG_EXPERIMENTAL=y +CONFIG_SYSVIPC=y +CONFIG_POSIX_MQUEUE=y +CONFIG_LOG_BUF_SHIFT=14 +CONFIG_BLK_DEV_INITRD=y +CONFIG_EMBEDDED=y +# CONFIG_VM_EVENT_COUNTERS is not set +# CONFIG_PCI_QUIRKS is not set +# CONFIG_COMPAT_BRK is not set +CONFIG_BLUESTONE=y +# CONFIG_EBONY is not set +# CONFIG_KVM_GUEST is not set +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_SPARSE_IRQ=y +CONFIG_CMDLINE_BOOL=y +CONFIG_CMDLINE="" +CONFIG_NET=y +CONFIG_PACKET=y +CONFIG_UNIX=y +CONFIG_INET=y +CONFIG_IP_PNP=y +CONFIG_IP_PNP_DHCP=y +CONFIG_IP_PNP_BOOTP=y +CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" +CONFIG_CONNECTOR=y +CONFIG_MTD=y +CONFIG_MTD_PARTITIONS=y +CONFIG_MTD_CMDLINE_PARTS=y +CONFIG_MTD_OF_PARTS=y +CONFIG_MTD_CHAR=y +CONFIG_MTD_BLOCK=y +CONFIG_MTD_CFI=y +CONFIG_MTD_CFI_AMDSTD=y +CONFIG_MTD_PHYSMAP_OF=y +CONFIG_PROC_DEVICETREE=y +CONFIG_BLK_DEV_RAM=y +CONFIG_BLK_DEV_RAM_SIZE=35000 +CONFIG_NETDEVICES=y +CONFIG_NET_ETHERNET=y +CONFIG_IBM_NEW_EMAC=y +CONFIG_IBM_NEW_EMAC_RXB=256 +CONFIG_IBM_NEW_EMAC_TXB=256 +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SERIAL_8250_NR_UARTS=2 +CONFIG_SERIAL_8250_RUNTIME_UARTS=2 +CONFIG_SERIAL_8250_EXTENDED=y +CONFIG_SERIAL_8250_SHARE_IRQ=y +CONFIG_SERIAL_OF_PLATFORM=y +CONFIG_I2C=y +CONFIG_I2C_CHARDEV=y +CONFIG_I2C_IBM_IIC=y +CONFIG_SENSORS_AD7414=y +# CONFIG_HID_SUPPORT is not set +# CONFIG_USB_SUPPORT is not set +CONFIG_RTC_CLASS=y +CONFIG_RTC_DRV_M41T80=y +CONFIG_EXT2_FS=y +CONFIG_EXT3_FS=y +CONFIG_PROC_KCORE=y +CONFIG_TMPFS=y +CONFIG_CRAMFS=y +CONFIG_NFS_FS=y +CONFIG_NFS_V3=y +CONFIG_ROOT_NFS=y +CONFIG_NLS=y diff --git a/arch/powerpc/configs/e55xx_smp_defconfig b/arch/powerpc/configs/e55xx_smp_defconfig new file mode 100644 index 00000000000..94d120ef99c --- /dev/null +++ b/arch/powerpc/configs/e55xx_smp_defconfig @@ -0,0 +1,84 @@ +CONFIG_PPC64=y +CONFIG_PPC_BOOK3E_64=y +# CONFIG_VIRT_CPU_ACCOUNTING is not set +CONFIG_SMP=y +CONFIG_NR_CPUS=2 +CONFIG_EXPERIMENTAL=y +CONFIG_SYSVIPC=y +CONFIG_BSD_PROCESS_ACCT=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_LOG_BUF_SHIFT=14 +CONFIG_SYSFS_DEPRECATED_V2=y +CONFIG_BLK_DEV_INITRD=y +# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set +CONFIG_EMBEDDED=y +CONFIG_KALLSYMS_ALL=y +CONFIG_KALLSYMS_EXTRA_PASS=y +CONFIG_MODULES=y +CONFIG_MODULE_UNLOAD=y +CONFIG_MODULE_FORCE_UNLOAD=y +CONFIG_MODVERSIONS=y +# CONFIG_BLK_DEV_BSG is not set +CONFIG_P5020_DS=y +# CONFIG_PPC_OF_BOOT_TRAMPOLINE is not set +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_BINFMT_MISC=m +CONFIG_SPARSE_IRQ=y +# CONFIG_PCI is not set +CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" +CONFIG_PROC_DEVICETREE=y +CONFIG_BLK_DEV_LOOP=y +CONFIG_BLK_DEV_RAM=y +CONFIG_BLK_DEV_RAM_SIZE=131072 +CONFIG_EEPROM_LEGACY=y +CONFIG_INPUT_FF_MEMLESS=m +# CONFIG_INPUT_MOUSEDEV is not set +# CONFIG_INPUT_KEYBOARD is not set +# CONFIG_INPUT_MOUSE is not set +CONFIG_SERIO_LIBPS2=y +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SERIAL_8250_EXTENDED=y +CONFIG_SERIAL_8250_MANY_PORTS=y +CONFIG_SERIAL_8250_DETECT_IRQ=y +CONFIG_SERIAL_8250_RSA=y +CONFIG_I2C=y +# CONFIG_HWMON is not set +CONFIG_VIDEO_OUTPUT_CONTROL=y +# CONFIG_HID_SUPPORT is not set +# CONFIG_USB_SUPPORT is not set +CONFIG_DMADEVICES=y +CONFIG_FSL_DMA=y +CONFIG_EXT2_FS=y +CONFIG_EXT3_FS=y +# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set +CONFIG_PROC_KCORE=y +CONFIG_TMPFS=y +# CONFIG_MISC_FILESYSTEMS is not set +CONFIG_PARTITION_ADVANCED=y +CONFIG_MAC_PARTITION=y +CONFIG_NLS=y +CONFIG_NLS_UTF8=m +CONFIG_CRC_T10DIF=y +CONFIG_CRC_ITU_T=m +CONFIG_LIBCRC32C=m +CONFIG_FRAME_WARN=1024 +CONFIG_DEBUG_FS=y +CONFIG_DEBUG_KERNEL=y +CONFIG_DETECT_HUNG_TASK=y +# CONFIG_DEBUG_BUGVERBOSE is not set +CONFIG_DEBUG_INFO=y +# CONFIG_RCU_CPU_STALL_DETECTOR is not set +CONFIG_SYSCTL_SYSCALL_CHECK=y +CONFIG_VIRQ_DEBUG=y +CONFIG_CRYPTO=y +CONFIG_CRYPTO_CBC=y +CONFIG_CRYPTO_PCBC=m +CONFIG_CRYPTO_HMAC=y +CONFIG_CRYPTO_MD5=y +CONFIG_CRYPTO_SHA1=m +CONFIG_CRYPTO_DES=y +# CONFIG_CRYPTO_ANSI_CPRNG is not set +CONFIG_CRYPTO_DEV_TALITOS=y diff --git a/arch/powerpc/configs/ppc44x_defconfig b/arch/powerpc/configs/ppc44x_defconfig index cd446fba3fa..2fa05f7be4c 100644 --- a/arch/powerpc/configs/ppc44x_defconfig +++ b/arch/powerpc/configs/ppc44x_defconfig @@ -12,6 +12,7 @@ CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set CONFIG_BAMBOO=y +CONFIG_BLUESTONE=y CONFIG_SAM440EP=y CONFIG_SEQUOIA=y CONFIG_TAISHAN=y @@ -97,14 +98,17 @@ CONFIG_USB_STORAGE=m CONFIG_EXT2_FS=y CONFIG_EXT3_FS=m # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -CONFIG_INOTIFY=y CONFIG_VFAT_FS=m CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_JFFS2_FS=y CONFIG_UBIFS_FS=m CONFIG_UBIFS_FS_XATTR=y +CONFIG_LOGFS=m CONFIG_CRAMFS=y +CONFIG_SQUASHFS=m +CONFIG_SQUASHFS_XATTR=y +CONFIG_SQUASHFS_LZO=y CONFIG_NFS_FS=y CONFIG_NFS_V3=y CONFIG_ROOT_NFS=y @@ -116,11 +120,8 @@ CONFIG_DEBUG_KERNEL=y CONFIG_DETECT_HUNG_TASK=y # CONFIG_RCU_CPU_STALL_DETECTOR is not set CONFIG_SYSCTL_SYSCALL_CHECK=y -CONFIG_CRYPTO_CBC=y CONFIG_CRYPTO_ECB=y CONFIG_CRYPTO_PCBC=y -CONFIG_CRYPTO_MD5=y -CONFIG_CRYPTO_DES=y # CONFIG_CRYPTO_ANSI_CPRNG is not set # CONFIG_CRYPTO_HW is not set CONFIG_VIRTUALIZATION=y diff --git a/arch/powerpc/configs/ppc64e_defconfig b/arch/powerpc/configs/ppc64e_defconfig index 04ae0740b6d..7bd1763877b 100644 --- a/arch/powerpc/configs/ppc64e_defconfig +++ b/arch/powerpc/configs/ppc64e_defconfig @@ -18,6 +18,7 @@ CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y CONFIG_MODVERSIONS=y CONFIG_MODULE_SRCVERSION_ALL=y +CONFIG_P5020_DS=y CONFIG_CPU_FREQ=y CONFIG_CPU_FREQ_GOV_POWERSAVE=y CONFIG_CPU_FREQ_GOV_USERSPACE=y @@ -256,7 +257,6 @@ CONFIG_HID_ZEROPLUS=y CONFIG_USB=y CONFIG_USB_DEVICEFS=y CONFIG_USB_EHCI_HCD=y -CONFIG_USB_EHCI_TT_NEWSCHED=y # CONFIG_USB_EHCI_HCD_PPC_OF is not set CONFIG_USB_OHCI_HCD=y CONFIG_USB_STORAGE=m @@ -290,7 +290,6 @@ CONFIG_JFS_POSIX_ACL=y CONFIG_JFS_SECURITY=y CONFIG_XFS_FS=m CONFIG_XFS_POSIX_ACL=y -CONFIG_INOTIFY=y CONFIG_AUTOFS4_FS=m CONFIG_ISO9660_FS=y CONFIG_UDF_FS=m @@ -384,7 +383,6 @@ CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_AES=m CONFIG_CRYPTO_ANUBIS=m -CONFIG_CRYPTO_ARC4=m CONFIG_CRYPTO_BLOWFISH=m CONFIG_CRYPTO_CAST6=m CONFIG_CRYPTO_KHAZAD=m diff --git a/arch/powerpc/include/asm/checksum.h b/arch/powerpc/include/asm/checksum.h index 7cdf358337c..ce0c28495f9 100644 --- a/arch/powerpc/include/asm/checksum.h +++ b/arch/powerpc/include/asm/checksum.h @@ -52,12 +52,22 @@ extern __wsum csum_partial(const void *buff, int len, __wsum sum); extern __wsum csum_partial_copy_generic(const void *src, void *dst, int len, __wsum sum, int *src_err, int *dst_err); + +#ifdef __powerpc64__ +#define _HAVE_ARCH_COPY_AND_CSUM_FROM_USER +extern __wsum csum_and_copy_from_user(const void __user *src, void *dst, + int len, __wsum sum, int *err_ptr); +#define HAVE_CSUM_COPY_USER +extern __wsum csum_and_copy_to_user(const void *src, void __user *dst, + int len, __wsum sum, int *err_ptr); +#else /* * the same as csum_partial, but copies from src to dst while it * checksums. */ #define csum_partial_copy_from_user(src, dst, len, sum, errp) \ csum_partial_copy_generic((__force const void *)(src), (dst), (len), (sum), (errp), NULL) +#endif #define csum_partial_copy_nocheck(src, dst, len, sum) \ csum_partial_copy_generic((src), (dst), (len), (sum), NULL, NULL) diff --git a/arch/powerpc/include/asm/compat.h b/arch/powerpc/include/asm/compat.h index a11d4eac4f9..2296112e247 100644 --- a/arch/powerpc/include/asm/compat.h +++ b/arch/powerpc/include/asm/compat.h @@ -143,7 +143,7 @@ static inline void __user *arch_compat_alloc_user_space(long len) * We cant access below the stack pointer in the 32bit ABI and * can access 288 bytes in the 64bit ABI */ - if (!(test_thread_flag(TIF_32BIT))) + if (!is_32bit_task()) usp -= 288; return (void __user *) (usp - len); @@ -213,7 +213,7 @@ struct compat_shmid64_ds { static inline int is_compat_task(void) { - return test_thread_flag(TIF_32BIT); + return is_32bit_task(); } #endif /* __KERNEL__ */ diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h index 3a40a992e59..f3a1fdd9cf0 100644 --- a/arch/powerpc/include/asm/cputable.h +++ b/arch/powerpc/include/asm/cputable.h @@ -198,6 +198,7 @@ extern const char *powerpc_base_platform; #define CPU_FTR_CP_USE_DCBTZ LONG_ASM_CONST(0x0040000000000000) #define CPU_FTR_UNALIGNED_LD_STD LONG_ASM_CONST(0x0080000000000000) #define CPU_FTR_ASYM_SMT LONG_ASM_CONST(0x0100000000000000) +#define CPU_FTR_STCX_CHECKS_ADDRESS LONG_ASM_CONST(0x0200000000000000) #ifndef __ASSEMBLY__ @@ -392,28 +393,31 @@ extern const char *powerpc_base_platform; CPU_FTR_MMCRA | CPU_FTR_CTRL) #define CPU_FTRS_POWER4 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \ CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \ - CPU_FTR_MMCRA | CPU_FTR_CP_USE_DCBTZ) + CPU_FTR_MMCRA | CPU_FTR_CP_USE_DCBTZ | \ + CPU_FTR_STCX_CHECKS_ADDRESS) #define CPU_FTRS_PPC970 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \ CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \ CPU_FTR_ALTIVEC_COMP | CPU_FTR_CAN_NAP | CPU_FTR_MMCRA | \ - CPU_FTR_CP_USE_DCBTZ) + CPU_FTR_CP_USE_DCBTZ | CPU_FTR_STCX_CHECKS_ADDRESS) #define CPU_FTRS_POWER5 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \ CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \ CPU_FTR_MMCRA | CPU_FTR_SMT | \ CPU_FTR_COHERENT_ICACHE | CPU_FTR_LOCKLESS_TLBIE | \ - CPU_FTR_PURR) + CPU_FTR_PURR | CPU_FTR_STCX_CHECKS_ADDRESS) #define CPU_FTRS_POWER6 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \ CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \ CPU_FTR_MMCRA | CPU_FTR_SMT | \ CPU_FTR_COHERENT_ICACHE | CPU_FTR_LOCKLESS_TLBIE | \ CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \ - CPU_FTR_DSCR | CPU_FTR_UNALIGNED_LD_STD) + CPU_FTR_DSCR | CPU_FTR_UNALIGNED_LD_STD | \ + CPU_FTR_STCX_CHECKS_ADDRESS) #define CPU_FTRS_POWER7 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \ CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \ CPU_FTR_MMCRA | CPU_FTR_SMT | \ CPU_FTR_COHERENT_ICACHE | CPU_FTR_LOCKLESS_TLBIE | \ CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \ - CPU_FTR_DSCR | CPU_FTR_SAO | CPU_FTR_ASYM_SMT) + CPU_FTR_DSCR | CPU_FTR_SAO | CPU_FTR_ASYM_SMT | \ + CPU_FTR_STCX_CHECKS_ADDRESS) #define CPU_FTRS_CELL (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \ CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \ CPU_FTR_ALTIVEC_COMP | CPU_FTR_MMCRA | CPU_FTR_SMT | \ diff --git a/arch/powerpc/include/asm/dma-mapping.h b/arch/powerpc/include/asm/dma-mapping.h index 8c9c6ad2004..6d2416a8570 100644 --- a/arch/powerpc/include/asm/dma-mapping.h +++ b/arch/powerpc/include/asm/dma-mapping.h @@ -127,19 +127,7 @@ static inline int dma_supported(struct device *dev, u64 mask) return dma_ops->dma_supported(dev, mask); } -static inline int dma_set_mask(struct device *dev, u64 dma_mask) -{ - struct dma_map_ops *dma_ops = get_dma_ops(dev); - - if (unlikely(dma_ops == NULL)) - return -EIO; - if (dma_ops->set_dma_mask != NULL) - return dma_ops->set_dma_mask(dev, dma_mask); - if (!dev->dma_mask || !dma_supported(dev, dma_mask)) - return -EIO; - *dev->dma_mask = dma_mask; - return 0; -} +extern int dma_set_mask(struct device *dev, u64 dma_mask); static inline void *dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flag) diff --git a/arch/powerpc/include/asm/elf.h b/arch/powerpc/include/asm/elf.h index c376eda1531..2b917c69ed1 100644 --- a/arch/powerpc/include/asm/elf.h +++ b/arch/powerpc/include/asm/elf.h @@ -250,7 +250,7 @@ do { \ * the 64bit ABI has never had these issues dont enable the workaround * even if we have an executable stack. */ -# define elf_read_implies_exec(ex, exec_stk) (test_thread_flag(TIF_32BIT) ? \ +# define elf_read_implies_exec(ex, exec_stk) (is_32bit_task() ? \ (exec_stk == EXSTACK_DEFAULT) : 0) #else # define SET_PERSONALITY(ex) \ diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index 57c40007199..7778d6f0c87 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -137,7 +137,8 @@ li r10,0; \ ld r11,exception_marker@toc(r2); \ std r10,RESULT(r1); /* clear regs->result */ \ - std r11,STACK_FRAME_OVERHEAD-16(r1); /* mark the frame */ + std r11,STACK_FRAME_OVERHEAD-16(r1); /* mark the frame */ \ + ACCOUNT_STOLEN_TIME /* * Exception vectors. diff --git a/arch/powerpc/include/asm/fsl_85xx_cache_sram.h b/arch/powerpc/include/asm/fsl_85xx_cache_sram.h new file mode 100644 index 00000000000..2af2bdc37b2 --- /dev/null +++ b/arch/powerpc/include/asm/fsl_85xx_cache_sram.h @@ -0,0 +1,48 @@ +/* + * Copyright 2009 Freescale Semiconductor, Inc. + * + * Cache SRAM handling for QorIQ platform + * + * Author: Vivek Mahajan <vivek.mahajan@freescale.com> + + * This file is derived from the original work done + * by Sylvain Munaut for the Bestcomm SRAM allocator. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef __ASM_POWERPC_FSL_85XX_CACHE_SRAM_H__ +#define __ASM_POWERPC_FSL_85XX_CACHE_SRAM_H__ + +#include <asm/rheap.h> +#include <linux/spinlock.h> + +/* + * Cache-SRAM + */ + +struct mpc85xx_cache_sram { + phys_addr_t base_phys; + void *base_virt; + unsigned int size; + rh_info_t *rh; + spinlock_t lock; +}; + +extern void mpc85xx_cache_sram_free(void *ptr); +extern void *mpc85xx_cache_sram_alloc(unsigned int size, + phys_addr_t *phys, unsigned int align); + +#endif /* __AMS_POWERPC_FSL_85XX_CACHE_SRAM_H__ */ diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index bd100fcf40d..ff08b70b36d 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -16,42 +16,57 @@ extern void timer_interrupt(struct pt_regs *); #ifdef CONFIG_PPC64 #include <asm/paca.h> -static inline unsigned long local_get_flags(void) +static inline unsigned long arch_local_save_flags(void) { unsigned long flags; - __asm__ __volatile__("lbz %0,%1(13)" - : "=r" (flags) - : "i" (offsetof(struct paca_struct, soft_enabled))); + asm volatile( + "lbz %0,%1(13)" + : "=r" (flags) + : "i" (offsetof(struct paca_struct, soft_enabled))); return flags; } -static inline unsigned long raw_local_irq_disable(void) +static inline unsigned long arch_local_irq_disable(void) { unsigned long flags, zero; - __asm__ __volatile__("li %1,0; lbz %0,%2(13); stb %1,%2(13)" - : "=r" (flags), "=&r" (zero) - : "i" (offsetof(struct paca_struct, soft_enabled)) - : "memory"); + asm volatile( + "li %1,0; lbz %0,%2(13); stb %1,%2(13)" + : "=r" (flags), "=&r" (zero) + : "i" (offsetof(struct paca_struct, soft_enabled)) + : "memory"); return flags; } -extern void raw_local_irq_restore(unsigned long); +extern void arch_local_irq_restore(unsigned long); extern void iseries_handle_interrupts(void); -#define raw_local_irq_enable() raw_local_irq_restore(1) -#define raw_local_save_flags(flags) ((flags) = local_get_flags()) -#define raw_local_irq_save(flags) ((flags) = raw_local_irq_disable()) +static inline void arch_local_irq_enable(void) +{ + arch_local_irq_restore(1); +} + +static inline unsigned long arch_local_irq_save(void) +{ + return arch_local_irq_disable(); +} + +static inline bool arch_irqs_disabled_flags(unsigned long flags) +{ + return flags == 0; +} -#define raw_irqs_disabled() (local_get_flags() == 0) -#define raw_irqs_disabled_flags(flags) ((flags) == 0) +static inline bool arch_irqs_disabled(void) +{ + return arch_irqs_disabled_flags(arch_local_save_flags()); +} #ifdef CONFIG_PPC_BOOK3E -#define __hard_irq_enable() __asm__ __volatile__("wrteei 1": : :"memory"); -#define __hard_irq_disable() __asm__ __volatile__("wrteei 0": : :"memory"); +#define __hard_irq_enable() asm volatile("wrteei 1" : : : "memory"); +#define __hard_irq_disable() asm volatile("wrteei 0" : : : "memory"); #else #define __hard_irq_enable() __mtmsrd(mfmsr() | MSR_EE, 1) #define __hard_irq_disable() __mtmsrd(mfmsr() & ~MSR_EE, 1) @@ -64,64 +79,66 @@ extern void iseries_handle_interrupts(void); get_paca()->hard_enabled = 0; \ } while(0) -#else +#else /* CONFIG_PPC64 */ -#if defined(CONFIG_BOOKE) #define SET_MSR_EE(x) mtmsr(x) -#define raw_local_irq_restore(flags) __asm__ __volatile__("wrtee %0" : : "r" (flags) : "memory") + +static inline unsigned long arch_local_save_flags(void) +{ + return mfmsr(); +} + +static inline void arch_local_irq_restore(unsigned long flags) +{ +#if defined(CONFIG_BOOKE) + asm volatile("wrtee %0" : : "r" (flags) : "memory"); #else -#define SET_MSR_EE(x) mtmsr(x) -#define raw_local_irq_restore(flags) mtmsr(flags) + mtmsr(flags); #endif +} -static inline void raw_local_irq_disable(void) +static inline unsigned long arch_local_irq_save(void) { + unsigned long flags = arch_local_save_flags(); #ifdef CONFIG_BOOKE - __asm__ __volatile__("wrteei 0": : :"memory"); + asm volatile("wrteei 0" : : : "memory"); #else - unsigned long msr; - - msr = mfmsr(); - SET_MSR_EE(msr & ~MSR_EE); + SET_MSR_EE(flags & ~MSR_EE); #endif + return flags; } -static inline void raw_local_irq_enable(void) +static inline void arch_local_irq_disable(void) { #ifdef CONFIG_BOOKE - __asm__ __volatile__("wrteei 1": : :"memory"); + asm volatile("wrteei 0" : : : "memory"); #else - unsigned long msr; - - msr = mfmsr(); - SET_MSR_EE(msr | MSR_EE); + arch_local_irq_save(); #endif } -static inline void raw_local_irq_save_ptr(unsigned long *flags) +static inline void arch_local_irq_enable(void) { - unsigned long msr; - msr = mfmsr(); - *flags = msr; #ifdef CONFIG_BOOKE - __asm__ __volatile__("wrteei 0": : :"memory"); + asm volatile("wrteei 1" : : : "memory"); #else - SET_MSR_EE(msr & ~MSR_EE); + unsigned long msr = mfmsr(); + SET_MSR_EE(msr | MSR_EE); #endif } -#define raw_local_save_flags(flags) ((flags) = mfmsr()) -#define raw_local_irq_save(flags) raw_local_irq_save_ptr(&flags) -#define raw_irqs_disabled() ((mfmsr() & MSR_EE) == 0) -#define raw_irqs_disabled_flags(flags) (((flags) & MSR_EE) == 0) - -#define hard_irq_disable() raw_local_irq_disable() - -static inline int irqs_disabled_flags(unsigned long flags) +static inline bool arch_irqs_disabled_flags(unsigned long flags) { return (flags & MSR_EE) == 0; } +static inline bool arch_irqs_disabled(void) +{ + return arch_irqs_disabled_flags(arch_local_save_flags()); +} + +#define hard_irq_disable() arch_local_irq_disable() + #endif /* CONFIG_PPC64 */ /* diff --git a/arch/powerpc/include/asm/hydra.h b/arch/powerpc/include/asm/hydra.h index 1ad4eed07fb..5b0c98bd46a 100644 --- a/arch/powerpc/include/asm/hydra.h +++ b/arch/powerpc/include/asm/hydra.h @@ -10,7 +10,7 @@ * * © Copyright 1995 Apple Computer, Inc. All rights reserved. * - * It's available online from http://chrp.apple.com/MacTech.pdf. + * It's available online from http://www.cpu.lu/~mlan/ftp/MacTech.pdf * You can obtain paper copies of this book from computer bookstores or by * writing Morgan Kaufmann Publishers, Inc., 340 Pine Street, Sixth Floor, San * Francisco, CA 94104. Reference ISBN 1-55860-393-X. diff --git a/arch/powerpc/include/asm/irqflags.h b/arch/powerpc/include/asm/irqflags.h index 5f68ecfdf51..b85d8ddbb66 100644 --- a/arch/powerpc/include/asm/irqflags.h +++ b/arch/powerpc/include/asm/irqflags.h @@ -6,7 +6,7 @@ #ifndef __ASSEMBLY__ /* - * Get definitions for raw_local_save_flags(x), etc. + * Get definitions for arch_local_save_flags(x), etc. */ #include <asm/hw_irq.h> diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h index 076327f2eff..f54408d995b 100644 --- a/arch/powerpc/include/asm/kexec.h +++ b/arch/powerpc/include/asm/kexec.h @@ -91,6 +91,7 @@ extern void machine_kexec_simple(struct kimage *image); extern void crash_kexec_secondary(struct pt_regs *regs); extern int overlaps_crashkernel(unsigned long start, unsigned long size); extern void reserve_crashkernel(void); +extern void machine_kexec_mask_interrupts(void); #else /* !CONFIG_KEXEC */ static inline int kexec_sr_activated(int cpu) { return 0; } diff --git a/arch/powerpc/include/asm/kvm.h b/arch/powerpc/include/asm/kvm.h index 6c5547d82bb..18ea6963ad7 100644 --- a/arch/powerpc/include/asm/kvm.h +++ b/arch/powerpc/include/asm/kvm.h @@ -86,5 +86,6 @@ struct kvm_guest_debug_arch { #define KVM_INTERRUPT_SET -1U #define KVM_INTERRUPT_UNSET -2U +#define KVM_INTERRUPT_SET_LEVEL -3U #endif /* __LINUX_KVM_POWERPC_H */ diff --git a/arch/powerpc/include/asm/kvm_asm.h b/arch/powerpc/include/asm/kvm_asm.h index c5ea4cda34b..5b750467439 100644 --- a/arch/powerpc/include/asm/kvm_asm.h +++ b/arch/powerpc/include/asm/kvm_asm.h @@ -58,6 +58,7 @@ #define BOOK3S_INTERRUPT_INST_STORAGE 0x400 #define BOOK3S_INTERRUPT_INST_SEGMENT 0x480 #define BOOK3S_INTERRUPT_EXTERNAL 0x500 +#define BOOK3S_INTERRUPT_EXTERNAL_LEVEL 0x501 #define BOOK3S_INTERRUPT_ALIGNMENT 0x600 #define BOOK3S_INTERRUPT_PROGRAM 0x700 #define BOOK3S_INTERRUPT_FP_UNAVAIL 0x800 @@ -84,7 +85,8 @@ #define BOOK3S_IRQPRIO_EXTERNAL 13 #define BOOK3S_IRQPRIO_DECREMENTER 14 #define BOOK3S_IRQPRIO_PERFORMANCE_MONITOR 15 -#define BOOK3S_IRQPRIO_MAX 16 +#define BOOK3S_IRQPRIO_EXTERNAL_LEVEL 16 +#define BOOK3S_IRQPRIO_MAX 17 #define BOOK3S_HFLAG_DCBZ32 0x1 #define BOOK3S_HFLAG_SLB 0x2 diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index 8274a2d4392..d62e703f121 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -38,15 +38,6 @@ struct kvmppc_slb { bool class : 1; }; -struct kvmppc_sr { - u32 raw; - u32 vsid; - bool Ks : 1; - bool Kp : 1; - bool nx : 1; - bool valid : 1; -}; - struct kvmppc_bat { u64 raw; u32 bepi; @@ -69,6 +60,13 @@ struct kvmppc_sid_map { #define SID_MAP_NUM (1 << SID_MAP_BITS) #define SID_MAP_MASK (SID_MAP_NUM - 1) +#ifdef CONFIG_PPC_BOOK3S_64 +#define SID_CONTEXTS 1 +#else +#define SID_CONTEXTS 128 +#define VSID_POOL_SIZE (SID_CONTEXTS * 16) +#endif + struct kvmppc_vcpu_book3s { struct kvm_vcpu vcpu; struct kvmppc_book3s_shadow_vcpu *shadow_vcpu; @@ -79,20 +77,22 @@ struct kvmppc_vcpu_book3s { u64 vsid; } slb_shadow[64]; u8 slb_shadow_max; - struct kvmppc_sr sr[16]; struct kvmppc_bat ibat[8]; struct kvmppc_bat dbat[8]; u64 hid[6]; u64 gqr[8]; int slb_nr; - u32 dsisr; u64 sdr1; u64 hior; u64 msr_mask; - u64 vsid_first; u64 vsid_next; +#ifdef CONFIG_PPC_BOOK3S_32 + u32 vsid_pool[VSID_POOL_SIZE]; +#else + u64 vsid_first; u64 vsid_max; - int context_id; +#endif + int context_id[SID_CONTEXTS]; ulong prog_flags; /* flags to inject when giving a 700 trap */ }; @@ -131,9 +131,10 @@ extern void kvmppc_set_bat(struct kvm_vcpu *vcpu, struct kvmppc_bat *bat, bool upper, u32 val); extern void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr); extern int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu); +extern pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn); -extern u32 kvmppc_trampoline_lowmem; -extern u32 kvmppc_trampoline_enter; +extern ulong kvmppc_trampoline_lowmem; +extern ulong kvmppc_trampoline_enter; extern void kvmppc_rmcall(ulong srr0, ulong srr1); extern void kvmppc_load_up_fpu(void); extern void kvmppc_load_up_altivec(void); diff --git a/arch/powerpc/include/asm/kvm_fpu.h b/arch/powerpc/include/asm/kvm_fpu.h index c3d4f0518a6..92daae13249 100644 --- a/arch/powerpc/include/asm/kvm_fpu.h +++ b/arch/powerpc/include/asm/kvm_fpu.h @@ -82,7 +82,7 @@ FPD_THREE_IN(fmadd) FPD_THREE_IN(fnmsub) FPD_THREE_IN(fnmadd) -extern void kvm_cvt_fd(u32 *from, u64 *to, u64 *fpscr); -extern void kvm_cvt_df(u64 *from, u32 *to, u64 *fpscr); +extern void kvm_cvt_fd(u32 *from, u64 *to); +extern void kvm_cvt_df(u64 *from, u32 *to); #endif diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index b0b23c007d6..bba3b9b72a3 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -25,6 +25,7 @@ #include <linux/interrupt.h> #include <linux/types.h> #include <linux/kvm_types.h> +#include <linux/kvm_para.h> #include <asm/kvm_asm.h> #define KVM_MAX_VCPUS 1 @@ -41,12 +42,17 @@ #define HPTEG_CACHE_NUM (1 << 15) #define HPTEG_HASH_BITS_PTE 13 +#define HPTEG_HASH_BITS_PTE_LONG 12 #define HPTEG_HASH_BITS_VPTE 13 #define HPTEG_HASH_BITS_VPTE_LONG 5 #define HPTEG_HASH_NUM_PTE (1 << HPTEG_HASH_BITS_PTE) +#define HPTEG_HASH_NUM_PTE_LONG (1 << HPTEG_HASH_BITS_PTE_LONG) #define HPTEG_HASH_NUM_VPTE (1 << HPTEG_HASH_BITS_VPTE) #define HPTEG_HASH_NUM_VPTE_LONG (1 << HPTEG_HASH_BITS_VPTE_LONG) +/* Physical Address Mask - allowed range of real mode RAM access */ +#define KVM_PAM 0x0fffffffffffffffULL + struct kvm; struct kvm_run; struct kvm_vcpu; @@ -159,8 +165,10 @@ struct kvmppc_mmu { struct hpte_cache { struct hlist_node list_pte; + struct hlist_node list_pte_long; struct hlist_node list_vpte; struct hlist_node list_vpte_long; + struct rcu_head rcu_head; u64 host_va; u64 pfn; ulong slot; @@ -210,28 +218,20 @@ struct kvm_vcpu_arch { u32 cr; #endif - ulong msr; #ifdef CONFIG_PPC_BOOK3S ulong shadow_msr; ulong hflags; ulong guest_owned_ext; #endif u32 mmucr; - ulong sprg0; - ulong sprg1; - ulong sprg2; - ulong sprg3; ulong sprg4; ulong sprg5; ulong sprg6; ulong sprg7; - ulong srr0; - ulong srr1; ulong csrr0; ulong csrr1; ulong dsrr0; ulong dsrr1; - ulong dear; ulong esr; u32 dec; u32 decar; @@ -290,12 +290,17 @@ struct kvm_vcpu_arch { struct tasklet_struct tasklet; u64 dec_jiffies; unsigned long pending_exceptions; + struct kvm_vcpu_arch_shared *shared; + unsigned long magic_page_pa; /* phys addr to map the magic page to */ + unsigned long magic_page_ea; /* effect. addr to map the magic page to */ #ifdef CONFIG_PPC_BOOK3S struct hlist_head hpte_hash_pte[HPTEG_HASH_NUM_PTE]; + struct hlist_head hpte_hash_pte_long[HPTEG_HASH_NUM_PTE_LONG]; struct hlist_head hpte_hash_vpte[HPTEG_HASH_NUM_VPTE]; struct hlist_head hpte_hash_vpte_long[HPTEG_HASH_NUM_VPTE_LONG]; int hpte_cache_count; + spinlock_t mmu_lock; #endif }; diff --git a/arch/powerpc/include/asm/kvm_para.h b/arch/powerpc/include/asm/kvm_para.h index 2d48f6a63d0..50533f9adf4 100644 --- a/arch/powerpc/include/asm/kvm_para.h +++ b/arch/powerpc/include/asm/kvm_para.h @@ -20,16 +20,153 @@ #ifndef __POWERPC_KVM_PARA_H__ #define __POWERPC_KVM_PARA_H__ +#include <linux/types.h> + +struct kvm_vcpu_arch_shared { + __u64 scratch1; + __u64 scratch2; + __u64 scratch3; + __u64 critical; /* Guest may not get interrupts if == r1 */ + __u64 sprg0; + __u64 sprg1; + __u64 sprg2; + __u64 sprg3; + __u64 srr0; + __u64 srr1; + __u64 dar; + __u64 msr; + __u32 dsisr; + __u32 int_pending; /* Tells the guest if we have an interrupt */ + __u32 sr[16]; +}; + +#define KVM_SC_MAGIC_R0 0x4b564d21 /* "KVM!" */ +#define HC_VENDOR_KVM (42 << 16) +#define HC_EV_SUCCESS 0 +#define HC_EV_UNIMPLEMENTED 12 + +#define KVM_FEATURE_MAGIC_PAGE 1 + +#define KVM_MAGIC_FEAT_SR (1 << 0) + #ifdef __KERNEL__ +#ifdef CONFIG_KVM_GUEST + +#include <linux/of.h> + +static inline int kvm_para_available(void) +{ + struct device_node *hyper_node; + + hyper_node = of_find_node_by_path("/hypervisor"); + if (!hyper_node) + return 0; + + if (!of_device_is_compatible(hyper_node, "linux,kvm")) + return 0; + + return 1; +} + +extern unsigned long kvm_hypercall(unsigned long *in, + unsigned long *out, + unsigned long nr); + +#else + static inline int kvm_para_available(void) { return 0; } +static unsigned long kvm_hypercall(unsigned long *in, + unsigned long *out, + unsigned long nr) +{ + return HC_EV_UNIMPLEMENTED; +} + +#endif + +static inline long kvm_hypercall0_1(unsigned int nr, unsigned long *r2) +{ + unsigned long in[8]; + unsigned long out[8]; + unsigned long r; + + r = kvm_hypercall(in, out, nr | HC_VENDOR_KVM); + *r2 = out[0]; + + return r; +} + +static inline long kvm_hypercall0(unsigned int nr) +{ + unsigned long in[8]; + unsigned long out[8]; + + return kvm_hypercall(in, out, nr | HC_VENDOR_KVM); +} + +static inline long kvm_hypercall1(unsigned int nr, unsigned long p1) +{ + unsigned long in[8]; + unsigned long out[8]; + + in[0] = p1; + return kvm_hypercall(in, out, nr | HC_VENDOR_KVM); +} + +static inline long kvm_hypercall2(unsigned int nr, unsigned long p1, + unsigned long p2) +{ + unsigned long in[8]; + unsigned long out[8]; + + in[0] = p1; + in[1] = p2; + return kvm_hypercall(in, out, nr | HC_VENDOR_KVM); +} + +static inline long kvm_hypercall3(unsigned int nr, unsigned long p1, + unsigned long p2, unsigned long p3) +{ + unsigned long in[8]; + unsigned long out[8]; + + in[0] = p1; + in[1] = p2; + in[2] = p3; + return kvm_hypercall(in, out, nr | HC_VENDOR_KVM); +} + +static inline long kvm_hypercall4(unsigned int nr, unsigned long p1, + unsigned long p2, unsigned long p3, + unsigned long p4) +{ + unsigned long in[8]; + unsigned long out[8]; + + in[0] = p1; + in[1] = p2; + in[2] = p3; + in[3] = p4; + return kvm_hypercall(in, out, nr | HC_VENDOR_KVM); +} + + static inline unsigned int kvm_arch_para_features(void) { - return 0; + unsigned long r; + + if (!kvm_para_available()) + return 0; + + if(kvm_hypercall0_1(KVM_HC_FEATURES, &r)) + return 0; + + return r; } #endif /* __KERNEL__ */ diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 18d139ec2d2..ecb3bc74c34 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -107,6 +107,7 @@ extern int kvmppc_booke_init(void); extern void kvmppc_booke_exit(void); extern void kvmppc_core_destroy_mmu(struct kvm_vcpu *vcpu); +extern int kvmppc_kvm_pv(struct kvm_vcpu *vcpu); /* * Cuts out inst bits with ordering according to spec. diff --git a/arch/powerpc/include/asm/lppaca.h b/arch/powerpc/include/asm/lppaca.h index 14b592dfb4e..7f5e0fefebb 100644 --- a/arch/powerpc/include/asm/lppaca.h +++ b/arch/powerpc/include/asm/lppaca.h @@ -153,6 +153,8 @@ struct lppaca { extern struct lppaca lppaca[]; +#define lppaca_of(cpu) (*paca[cpu].lppaca_ptr) + /* * SLB shadow buffer structure as defined in the PAPR. The save_area * contains adjacent ESID and VSID pairs for each shadowed SLB. The @@ -170,6 +172,33 @@ struct slb_shadow { extern struct slb_shadow slb_shadow[]; +/* + * Layout of entries in the hypervisor's dispatch trace log buffer. + */ +struct dtl_entry { + u8 dispatch_reason; + u8 preempt_reason; + u16 processor_id; + u32 enqueue_to_dispatch_time; + u32 ready_to_enqueue_time; + u32 waiting_to_ready_time; + u64 timebase; + u64 fault_addr; + u64 srr0; + u64 srr1; +}; + +#define DISPATCH_LOG_BYTES 4096 /* bytes per cpu */ +#define N_DISPATCH_LOG (DISPATCH_LOG_BYTES / sizeof(struct dtl_entry)) + +/* + * When CONFIG_VIRT_CPU_ACCOUNTING = y, the cpu accounting code controls + * reading from the dispatch trace log. If other code wants to consume + * DTL entries, it can set this pointer to a function that will get + * called once for each DTL entry that gets processed. + */ +extern void (*dtl_consumer)(struct dtl_entry *entry, u64 index); + #endif /* CONFIG_PPC_BOOK3S */ #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_LPPACA_H */ diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index adc8e6cdf33..d045b014553 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@ -102,6 +102,9 @@ struct machdep_calls { void (*pci_dma_dev_setup)(struct pci_dev *dev); void (*pci_dma_bus_setup)(struct pci_bus *bus); + /* Platform set_dma_mask override */ + int (*dma_set_mask)(struct device *dev, u64 dma_mask); + int (*probe)(void); void (*setup_arch)(void); /* Optional, may be NULL */ void (*init_early)(void); diff --git a/arch/powerpc/include/asm/memblock.h b/arch/powerpc/include/asm/memblock.h index 3c29728b56b..43efc345065 100644 --- a/arch/powerpc/include/asm/memblock.h +++ b/arch/powerpc/include/asm/memblock.h @@ -5,11 +5,4 @@ #define MEMBLOCK_DBG(fmt...) udbg_printf(fmt) -#ifdef CONFIG_PPC32 -extern phys_addr_t lowmem_end_addr; -#define MEMBLOCK_REAL_LIMIT lowmem_end_addr -#else -#define MEMBLOCK_REAL_LIMIT 0 -#endif - #endif /* _ASM_POWERPC_MEMBLOCK_H */ diff --git a/arch/powerpc/include/asm/mmu-book3e.h b/arch/powerpc/include/asm/mmu-book3e.h index 87a1d787c5b..8eaed81ea64 100644 --- a/arch/powerpc/include/asm/mmu-book3e.h +++ b/arch/powerpc/include/asm/mmu-book3e.h @@ -114,6 +114,17 @@ #define MAS7_RPN 0xFFFFFFFF +/* Bit definitions for MMUCFG */ +#define MMUCFG_MAVN 0x00000003 /* MMU Architecture Version Number */ +#define MMUCFG_MAVN_V1 0x00000000 /* v1.0 */ +#define MMUCFG_MAVN_V2 0x00000001 /* v2.0 */ +#define MMUCFG_NTLBS 0x0000000c /* Number of TLBs */ +#define MMUCFG_PIDSIZE 0x000007c0 /* PID Reg Size */ +#define MMUCFG_TWC 0x00008000 /* TLB Write Conditional (v2.0) */ +#define MMUCFG_LRAT 0x00010000 /* LRAT Supported (v2.0) */ +#define MMUCFG_RASIZE 0x00fe0000 /* Real Addr Size */ +#define MMUCFG_LPIDSIZE 0x0f000000 /* LPID Reg Size */ + /* Bit definitions for MMUCSR0 */ #define MMUCSR0_TLB1FI 0x00000002 /* TLB1 Flash invalidate */ #define MMUCSR0_TLB0FI 0x00000004 /* TLB0 Flash invalidate */ @@ -133,6 +144,10 @@ #define TLBnCFG_GTWE 0x00010000 /* Guest can write */ #define TLBnCFG_IND 0x00020000 /* IND entries supported */ #define TLBnCFG_PT 0x00040000 /* Can load from page table */ +#define TLBnCFG_MINSIZE 0x00f00000 /* Minimum Page Size (v1.0) */ +#define TLBnCFG_MINSIZE_SHIFT 20 +#define TLBnCFG_MAXSIZE 0x000f0000 /* Maximum Page Size (v1.0) */ +#define TLBnCFG_MAXSIZE_SHIFT 16 #define TLBnCFG_ASSOC 0xff000000 /* Associativity */ /* TLBnPS encoding */ diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h index 7ebf42ed84a..bb40a06d3b7 100644 --- a/arch/powerpc/include/asm/mmu.h +++ b/arch/powerpc/include/asm/mmu.h @@ -2,6 +2,8 @@ #define _ASM_POWERPC_MMU_H_ #ifdef __KERNEL__ +#include <linux/types.h> + #include <asm/asm-compat.h> #include <asm/feature-fixups.h> @@ -82,6 +84,16 @@ extern unsigned int __start___mmu_ftr_fixup, __stop___mmu_ftr_fixup; extern void early_init_mmu(void); extern void early_init_mmu_secondary(void); +extern void setup_initial_memory_limit(phys_addr_t first_memblock_base, + phys_addr_t first_memblock_size); + +#ifdef CONFIG_PPC64 +/* This is our real memory area size on ppc64 server, on embedded, we + * make it match the size our of bolted TLB area + */ +extern u64 ppc64_rma_size; +#endif /* CONFIG_PPC64 */ + #endif /* !__ASSEMBLY__ */ /* The kernel use the constants below to index in the page sizes array. diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index 1ff6662f7fa..ec57540cd7a 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h @@ -85,6 +85,8 @@ struct paca_struct { u8 kexec_state; /* set when kexec down has irqs off */ #ifdef CONFIG_PPC_STD_MMU_64 struct slb_shadow *slb_shadow_ptr; + struct dtl_entry *dispatch_log; + struct dtl_entry *dispatch_log_end; /* * Now, starting in cacheline 2, the exception save areas @@ -129,13 +131,19 @@ struct paca_struct { u8 soft_enabled; /* irq soft-enable flag */ u8 hard_enabled; /* set if irqs are enabled in MSR */ u8 io_sync; /* writel() needs spin_unlock sync */ - u8 perf_event_pending; /* PM interrupt while soft-disabled */ + u8 irq_work_pending; /* IRQ_WORK interrupt while soft-disable */ /* Stuff for accurate time accounting */ u64 user_time; /* accumulated usermode TB ticks */ u64 system_time; /* accumulated system TB ticks */ - u64 startpurr; /* PURR/TB value snapshot */ + u64 user_time_scaled; /* accumulated usermode SPURR ticks */ + u64 starttime; /* TB value snapshot */ + u64 starttime_user; /* TB value on exit to usermode */ u64 startspurr; /* SPURR value snapshot */ + u64 utime_sspurr; /* ->user_time when ->startspurr set */ + u64 stolen_time; /* TB ticks taken by hypervisor */ + u64 dtl_ridx; /* read index in dispatch log */ + struct dtl_entry *dtl_curr; /* pointer corresponding to dtl_ridx */ #ifdef CONFIG_KVM_BOOK3S_HANDLER /* We use this to store guest state in */ diff --git a/arch/powerpc/include/asm/page_64.h b/arch/powerpc/include/asm/page_64.h index 358ff14ea25..932f88dcf6f 100644 --- a/arch/powerpc/include/asm/page_64.h +++ b/arch/powerpc/include/asm/page_64.h @@ -163,7 +163,7 @@ do { \ #endif /* !CONFIG_HUGETLB_PAGE */ #define VM_DATA_DEFAULT_FLAGS \ - (test_thread_flag(TIF_32BIT) ? \ + (is_32bit_task() ? \ VM_DATA_DEFAULT_FLAGS32 : VM_DATA_DEFAULT_FLAGS64) /* @@ -179,7 +179,7 @@ do { \ VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) #define VM_STACK_DEFAULT_FLAGS \ - (test_thread_flag(TIF_32BIT) ? \ + (is_32bit_task() ? \ VM_STACK_DEFAULT_FLAGS32 : VM_STACK_DEFAULT_FLAGS64) #include <asm-generic/getorder.h> diff --git a/arch/powerpc/include/asm/ppc-pci.h b/arch/powerpc/include/asm/ppc-pci.h index 42fdff0e4b3..43268f15004 100644 --- a/arch/powerpc/include/asm/ppc-pci.h +++ b/arch/powerpc/include/asm/ppc-pci.h @@ -28,8 +28,8 @@ extern void find_and_init_phbs(void); extern struct pci_dev *isa_bridge_pcidev; /* may be NULL if no ISA bus */ /** Bus Unit ID macros; get low and hi 32-bits of the 64-bit BUID */ -#define BUID_HI(buid) ((buid) >> 32) -#define BUID_LO(buid) ((buid) & 0xffffffff) +#define BUID_HI(buid) upper_32_bits(buid) +#define BUID_LO(buid) lower_32_bits(buid) /* PCI device_node operations */ struct device_node; diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h index 498fe09263d..98210067c1c 100644 --- a/arch/powerpc/include/asm/ppc_asm.h +++ b/arch/powerpc/include/asm/ppc_asm.h @@ -9,6 +9,7 @@ #include <asm/asm-compat.h> #include <asm/processor.h> #include <asm/ppc-opcode.h> +#include <asm/firmware.h> #ifndef __ASSEMBLY__ #error __FILE__ should only be used in assembler files @@ -26,17 +27,13 @@ #ifndef CONFIG_VIRT_CPU_ACCOUNTING #define ACCOUNT_CPU_USER_ENTRY(ra, rb) #define ACCOUNT_CPU_USER_EXIT(ra, rb) +#define ACCOUNT_STOLEN_TIME #else #define ACCOUNT_CPU_USER_ENTRY(ra, rb) \ beq 2f; /* if from kernel mode */ \ -BEGIN_FTR_SECTION; \ - mfspr ra,SPRN_PURR; /* get processor util. reg */ \ -END_FTR_SECTION_IFSET(CPU_FTR_PURR); \ -BEGIN_FTR_SECTION; \ - MFTB(ra); /* or get TB if no PURR */ \ -END_FTR_SECTION_IFCLR(CPU_FTR_PURR); \ - ld rb,PACA_STARTPURR(r13); \ - std ra,PACA_STARTPURR(r13); \ + MFTB(ra); /* get timebase */ \ + ld rb,PACA_STARTTIME_USER(r13); \ + std ra,PACA_STARTTIME(r13); \ subf rb,rb,ra; /* subtract start value */ \ ld ra,PACA_USER_TIME(r13); \ add ra,ra,rb; /* add on to user time */ \ @@ -44,19 +41,34 @@ END_FTR_SECTION_IFCLR(CPU_FTR_PURR); \ 2: #define ACCOUNT_CPU_USER_EXIT(ra, rb) \ -BEGIN_FTR_SECTION; \ - mfspr ra,SPRN_PURR; /* get processor util. reg */ \ -END_FTR_SECTION_IFSET(CPU_FTR_PURR); \ -BEGIN_FTR_SECTION; \ - MFTB(ra); /* or get TB if no PURR */ \ -END_FTR_SECTION_IFCLR(CPU_FTR_PURR); \ - ld rb,PACA_STARTPURR(r13); \ - std ra,PACA_STARTPURR(r13); \ + MFTB(ra); /* get timebase */ \ + ld rb,PACA_STARTTIME(r13); \ + std ra,PACA_STARTTIME_USER(r13); \ subf rb,rb,ra; /* subtract start value */ \ ld ra,PACA_SYSTEM_TIME(r13); \ - add ra,ra,rb; /* add on to user time */ \ - std ra,PACA_SYSTEM_TIME(r13); -#endif + add ra,ra,rb; /* add on to system time */ \ + std ra,PACA_SYSTEM_TIME(r13) + +#ifdef CONFIG_PPC_SPLPAR +#define ACCOUNT_STOLEN_TIME \ +BEGIN_FW_FTR_SECTION; \ + beq 33f; \ + /* from user - see if there are any DTL entries to process */ \ + ld r10,PACALPPACAPTR(r13); /* get ptr to VPA */ \ + ld r11,PACA_DTL_RIDX(r13); /* get log read index */ \ + ld r10,LPPACA_DTLIDX(r10); /* get log write index */ \ + cmpd cr1,r11,r10; \ + beq+ cr1,33f; \ + bl .accumulate_stolen_time; \ +33: \ +END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR) + +#else /* CONFIG_PPC_SPLPAR */ +#define ACCOUNT_STOLEN_TIME + +#endif /* CONFIG_PPC_SPLPAR */ + +#endif /* CONFIG_VIRT_CPU_ACCOUNTING */ /* * Macros for storing registers into and loading registers from diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index 19c05b0f74b..4c14187ba02 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -118,7 +118,7 @@ extern struct task_struct *last_task_used_spe; #define TASK_UNMAPPED_BASE_USER32 (PAGE_ALIGN(TASK_SIZE_USER32 / 4)) #define TASK_UNMAPPED_BASE_USER64 (PAGE_ALIGN(TASK_SIZE_USER64 / 4)) -#define TASK_UNMAPPED_BASE ((test_thread_flag(TIF_32BIT)) ? \ +#define TASK_UNMAPPED_BASE ((is_32bit_task()) ? \ TASK_UNMAPPED_BASE_USER32 : TASK_UNMAPPED_BASE_USER64 ) #endif @@ -128,7 +128,7 @@ extern struct task_struct *last_task_used_spe; #define STACK_TOP_USER64 TASK_SIZE_USER64 #define STACK_TOP_USER32 TASK_SIZE_USER32 -#define STACK_TOP (test_thread_flag(TIF_32BIT) ? \ +#define STACK_TOP (is_32bit_task() ? \ STACK_TOP_USER32 : STACK_TOP_USER64) #define STACK_TOP_MAX STACK_TOP_USER64 diff --git a/arch/powerpc/include/asm/pte-common.h b/arch/powerpc/include/asm/pte-common.h index f2b370180a0..76bb195e4f2 100644 --- a/arch/powerpc/include/asm/pte-common.h +++ b/arch/powerpc/include/asm/pte-common.h @@ -171,6 +171,13 @@ extern unsigned long bad_call_to_PMD_PAGE_SIZE(void); /* Make modules code happy. We don't set RO yet */ #define PAGE_KERNEL_EXEC PAGE_KERNEL_X +/* + * Don't just check for any non zero bits in __PAGE_USER, since for book3e + * and PTE_64BIT, PAGE_KERNEL_X contains _PAGE_BAP_SR which is also in + * _PAGE_USER. Need to explictly match _PAGE_BAP_UR bit in that case too. + */ +#define pte_user(val) ((val & _PAGE_USER) == _PAGE_USER) + /* Advertise special mapping type for AGP */ #define PAGE_AGP (PAGE_KERNEL_NC) #define HAVE_PAGE_AGP diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h index 3d35f8ae377..9a1193e30f2 100644 --- a/arch/powerpc/include/asm/rtas.h +++ b/arch/powerpc/include/asm/rtas.h @@ -187,6 +187,7 @@ extern void rtas_progress(char *s, unsigned short hex); extern void rtas_initialize(void); extern int rtas_suspend_cpu(struct rtas_suspend_me_data *data); extern int rtas_suspend_last_cpu(struct rtas_suspend_me_data *data); +extern int rtas_ibm_suspend_me(struct rtas_args *); struct rtc_time; extern unsigned long rtas_get_boot_time(void); diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h index 3d212669a13..aa0f1ebb4aa 100644 --- a/arch/powerpc/include/asm/systbl.h +++ b/arch/powerpc/include/asm/systbl.h @@ -329,3 +329,22 @@ COMPAT_SYS(rt_tgsigqueueinfo) SYSCALL(fanotify_init) COMPAT_SYS(fanotify_mark) SYSCALL_SPU(prlimit64) +SYSCALL_SPU(socket) +SYSCALL_SPU(bind) +SYSCALL_SPU(connect) +SYSCALL_SPU(listen) +SYSCALL_SPU(accept) +SYSCALL_SPU(getsockname) +SYSCALL_SPU(getpeername) +SYSCALL_SPU(socketpair) +SYSCALL_SPU(send) +SYSCALL_SPU(sendto) +COMPAT_SYS_SPU(recv) +COMPAT_SYS_SPU(recvfrom) +SYSCALL_SPU(shutdown) +COMPAT_SYS_SPU(setsockopt) +COMPAT_SYS_SPU(getsockopt) +COMPAT_SYS_SPU(sendmsg) +COMPAT_SYS_SPU(recvmsg) +COMPAT_SYS_SPU(recvmmsg) +SYSCALL_SPU(accept4) diff --git a/arch/powerpc/include/asm/system.h b/arch/powerpc/include/asm/system.h index 6c294acac84..5e474ddd227 100644 --- a/arch/powerpc/include/asm/system.h +++ b/arch/powerpc/include/asm/system.h @@ -154,8 +154,8 @@ extern void enable_kernel_spe(void); extern void giveup_spe(struct task_struct *); extern void load_up_spe(struct task_struct *); extern int fix_alignment(struct pt_regs *); -extern void cvt_fd(float *from, double *to, struct thread_struct *thread); -extern void cvt_df(double *from, float *to, struct thread_struct *thread); +extern void cvt_fd(float *from, double *to); +extern void cvt_df(double *from, float *to); #ifndef CONFIG_SMP extern void discard_lazy_cpu_state(void); @@ -542,10 +542,6 @@ extern void reloc_got2(unsigned long); #define PTRRELOC(x) ((typeof(x)) add_reloc_offset((unsigned long)(x))) -#ifdef CONFIG_VIRT_CPU_ACCOUNTING -extern void account_system_vtime(struct task_struct *); -#endif - extern struct dentry *powerpc_debugfs_root; #endif /* __KERNEL__ */ diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h index dc779dfcf25..fe6f7c2c9c6 100644 --- a/arch/powerpc/include/asm/time.h +++ b/arch/powerpc/include/asm/time.h @@ -34,7 +34,6 @@ extern void to_tm(int tim, struct rtc_time * tm); extern void GregorianDay(struct rtc_time *tm); extern void generic_calibrate_decr(void); -extern void snapshot_timebase(void); extern void set_dec_cpu6(unsigned int val); @@ -212,12 +211,8 @@ struct cpu_usage { DECLARE_PER_CPU(struct cpu_usage, cpu_usage_array); #if defined(CONFIG_VIRT_CPU_ACCOUNTING) -extern void calculate_steal_time(void); -extern void snapshot_timebases(void); #define account_process_vtime(tsk) account_process_tick(tsk, 0) #else -#define calculate_steal_time() do { } while (0) -#define snapshot_timebases() do { } while (0) #define account_process_vtime(tsk) do { } while (0) #endif diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h index 597e6f9d094..6151937657f 100644 --- a/arch/powerpc/include/asm/unistd.h +++ b/arch/powerpc/include/asm/unistd.h @@ -348,10 +348,29 @@ #define __NR_fanotify_init 323 #define __NR_fanotify_mark 324 #define __NR_prlimit64 325 +#define __NR_socket 326 +#define __NR_bind 327 +#define __NR_connect 328 +#define __NR_listen 329 +#define __NR_accept 330 +#define __NR_getsockname 331 +#define __NR_getpeername 332 +#define __NR_socketpair 333 +#define __NR_send 334 +#define __NR_sendto 335 +#define __NR_recv 336 +#define __NR_recvfrom 337 +#define __NR_shutdown 338 +#define __NR_setsockopt 339 +#define __NR_getsockopt 340 +#define __NR_sendmsg 341 +#define __NR_recvmsg 342 +#define __NR_recvmmsg 343 +#define __NR_accept4 344 #ifdef __KERNEL__ -#define __NR_syscalls 326 +#define __NR_syscalls 345 #define __NR__exit __NR_exit #define NR_syscalls __NR_syscalls diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 1dda7012914..36c30f31ec9 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -55,7 +55,9 @@ obj-$(CONFIG_IBMVIO) += vio.o obj-$(CONFIG_IBMEBUS) += ibmebus.o obj-$(CONFIG_GENERIC_TBSYNC) += smp-tbsync.o obj-$(CONFIG_CRASH_DUMP) += crash_dump.o +ifeq ($(CONFIG_PPC32),y) obj-$(CONFIG_E500) += idle_e500.o +endif obj-$(CONFIG_6xx) += idle_6xx.o l2cr_6xx.o cpu_setup_6xx.o obj-$(CONFIG_TAU) += tau_6xx.o obj-$(CONFIG_HIBERNATION) += swsusp.o suspend.o @@ -67,7 +69,7 @@ endif obj64-$(CONFIG_HIBERNATION) += swsusp_asm64.o obj-$(CONFIG_MODULES) += module.o module_$(CONFIG_WORD_SIZE).o obj-$(CONFIG_44x) += cpu_setup_44x.o -obj-$(CONFIG_FSL_BOOKE) += cpu_setup_fsl_booke.o dbell.o +obj-$(CONFIG_PPC_FSL_BOOK3E) += cpu_setup_fsl_booke.o dbell.o obj-$(CONFIG_PPC_BOOK3E_64) += dbell.o extra-y := head_$(CONFIG_WORD_SIZE).o @@ -127,6 +129,8 @@ ifneq ($(CONFIG_XMON)$(CONFIG_KEXEC),) obj-y += ppc_save_regs.o endif +obj-$(CONFIG_KVM_GUEST) += kvm.o kvm_emul.o + # Disable GCOV in odd or sensitive code GCOV_PROFILE_prom_init.o := n GCOV_PROFILE_ftrace.o := n diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c index b876e989220..8184ee97e48 100644 --- a/arch/powerpc/kernel/align.c +++ b/arch/powerpc/kernel/align.c @@ -889,7 +889,7 @@ int fix_alignment(struct pt_regs *regs) #ifdef CONFIG_PPC_FPU preempt_disable(); enable_kernel_fp(); - cvt_df(&data.dd, (float *)&data.v[4], ¤t->thread); + cvt_df(&data.dd, (float *)&data.v[4]); preempt_enable(); #else return 0; @@ -933,7 +933,7 @@ int fix_alignment(struct pt_regs *regs) #ifdef CONFIG_PPC_FPU preempt_disable(); enable_kernel_fp(); - cvt_fd((float *)&data.v[4], &data.dd, ¤t->thread); + cvt_fd((float *)&data.v[4], &data.dd); preempt_enable(); #else return 0; diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 1c0607ddccc..bd0df2e6aa8 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -48,11 +48,11 @@ #ifdef CONFIG_PPC_ISERIES #include <asm/iseries/alpaca.h> #endif -#ifdef CONFIG_KVM +#if defined(CONFIG_KVM) || defined(CONFIG_KVM_GUEST) #include <linux/kvm_host.h> -#ifndef CONFIG_BOOKE -#include <asm/kvm_book3s.h> #endif +#if defined(CONFIG_KVM) && defined(CONFIG_PPC_BOOK3S) +#include <asm/kvm_book3s.h> #endif #ifdef CONFIG_PPC32 @@ -61,7 +61,7 @@ #endif #endif -#if defined(CONFIG_FSL_BOOKE) +#if defined(CONFIG_PPC_FSL_BOOK3E) #include "../mm/mmu_decl.h" #endif @@ -181,17 +181,19 @@ int main(void) offsetof(struct slb_shadow, save_area[SLB_NUM_BOLTED - 1].vsid)); DEFINE(SLBSHADOW_STACKESID, offsetof(struct slb_shadow, save_area[SLB_NUM_BOLTED - 1].esid)); + DEFINE(SLBSHADOW_SAVEAREA, offsetof(struct slb_shadow, save_area)); DEFINE(LPPACASRR0, offsetof(struct lppaca, saved_srr0)); DEFINE(LPPACASRR1, offsetof(struct lppaca, saved_srr1)); DEFINE(LPPACAANYINT, offsetof(struct lppaca, int_dword.any_int)); DEFINE(LPPACADECRINT, offsetof(struct lppaca, int_dword.fields.decr_int)); - DEFINE(SLBSHADOW_SAVEAREA, offsetof(struct slb_shadow, save_area)); + DEFINE(LPPACA_DTLIDX, offsetof(struct lppaca, dtl_idx)); + DEFINE(PACA_DTL_RIDX, offsetof(struct paca_struct, dtl_ridx)); #endif /* CONFIG_PPC_STD_MMU_64 */ DEFINE(PACAEMERGSP, offsetof(struct paca_struct, emergency_sp)); DEFINE(PACAHWCPUID, offsetof(struct paca_struct, hw_cpu_id)); DEFINE(PACAKEXECSTATE, offsetof(struct paca_struct, kexec_state)); - DEFINE(PACA_STARTPURR, offsetof(struct paca_struct, startpurr)); - DEFINE(PACA_STARTSPURR, offsetof(struct paca_struct, startspurr)); + DEFINE(PACA_STARTTIME, offsetof(struct paca_struct, starttime)); + DEFINE(PACA_STARTTIME_USER, offsetof(struct paca_struct, starttime_user)); DEFINE(PACA_USER_TIME, offsetof(struct paca_struct, user_time)); DEFINE(PACA_SYSTEM_TIME, offsetof(struct paca_struct, system_time)); DEFINE(PACA_TRAP_SAVE, offsetof(struct paca_struct, trap_save)); @@ -394,12 +396,13 @@ int main(void) DEFINE(VCPU_HOST_STACK, offsetof(struct kvm_vcpu, arch.host_stack)); DEFINE(VCPU_HOST_PID, offsetof(struct kvm_vcpu, arch.host_pid)); DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr)); - DEFINE(VCPU_MSR, offsetof(struct kvm_vcpu, arch.msr)); DEFINE(VCPU_SPRG4, offsetof(struct kvm_vcpu, arch.sprg4)); DEFINE(VCPU_SPRG5, offsetof(struct kvm_vcpu, arch.sprg5)); DEFINE(VCPU_SPRG6, offsetof(struct kvm_vcpu, arch.sprg6)); DEFINE(VCPU_SPRG7, offsetof(struct kvm_vcpu, arch.sprg7)); DEFINE(VCPU_SHADOW_PID, offsetof(struct kvm_vcpu, arch.shadow_pid)); + DEFINE(VCPU_SHARED, offsetof(struct kvm_vcpu, arch.shared)); + DEFINE(VCPU_SHARED_MSR, offsetof(struct kvm_vcpu_arch_shared, msr)); /* book3s */ #ifdef CONFIG_PPC_BOOK3S @@ -464,11 +467,27 @@ int main(void) DEFINE(VCPU_FAULT_ESR, offsetof(struct kvm_vcpu, arch.fault_esr)); #endif /* CONFIG_PPC_BOOK3S */ #endif + +#ifdef CONFIG_KVM_GUEST + DEFINE(KVM_MAGIC_SCRATCH1, offsetof(struct kvm_vcpu_arch_shared, + scratch1)); + DEFINE(KVM_MAGIC_SCRATCH2, offsetof(struct kvm_vcpu_arch_shared, + scratch2)); + DEFINE(KVM_MAGIC_SCRATCH3, offsetof(struct kvm_vcpu_arch_shared, + scratch3)); + DEFINE(KVM_MAGIC_INT, offsetof(struct kvm_vcpu_arch_shared, + int_pending)); + DEFINE(KVM_MAGIC_MSR, offsetof(struct kvm_vcpu_arch_shared, msr)); + DEFINE(KVM_MAGIC_CRITICAL, offsetof(struct kvm_vcpu_arch_shared, + critical)); + DEFINE(KVM_MAGIC_SR, offsetof(struct kvm_vcpu_arch_shared, sr)); +#endif + #ifdef CONFIG_44x DEFINE(PGD_T_LOG2, PGD_T_LOG2); DEFINE(PTE_T_LOG2, PTE_T_LOG2); #endif -#ifdef CONFIG_FSL_BOOKE +#ifdef CONFIG_PPC_FSL_BOOK3E DEFINE(TLBCAM_SIZE, sizeof(struct tlbcam)); DEFINE(TLBCAM_MAS0, offsetof(struct tlbcam, MAS0)); DEFINE(TLBCAM_MAS1, offsetof(struct tlbcam, MAS1)); diff --git a/arch/powerpc/kernel/cpu_setup_44x.S b/arch/powerpc/kernel/cpu_setup_44x.S index 7d606f89a83..e32b4a9a2c2 100644 --- a/arch/powerpc/kernel/cpu_setup_44x.S +++ b/arch/powerpc/kernel/cpu_setup_44x.S @@ -35,6 +35,7 @@ _GLOBAL(__setup_cpu_440grx) _GLOBAL(__setup_cpu_460ex) _GLOBAL(__setup_cpu_460gt) _GLOBAL(__setup_cpu_460sx) +_GLOBAL(__setup_cpu_apm821xx) mflr r4 bl __init_fpu_44x bl __fixup_440A_mcheck diff --git a/arch/powerpc/kernel/cpu_setup_fsl_booke.S b/arch/powerpc/kernel/cpu_setup_fsl_booke.S index 0adb50ad803..894e64fa481 100644 --- a/arch/powerpc/kernel/cpu_setup_fsl_booke.S +++ b/arch/powerpc/kernel/cpu_setup_fsl_booke.S @@ -51,6 +51,7 @@ _GLOBAL(__e500_dcache_setup) isync blr +#ifdef CONFIG_PPC32 _GLOBAL(__setup_cpu_e200) /* enable dedicated debug exception handling resources (Debug APU) */ mfspr r3,SPRN_HID0 @@ -72,3 +73,17 @@ _GLOBAL(__setup_cpu_e500mc) bl __setup_e500mc_ivors mtlr r4 blr +#endif +/* Right now, restore and setup are the same thing */ +_GLOBAL(__restore_cpu_e5500) +_GLOBAL(__setup_cpu_e5500) + mflr r4 + bl __e500_icache_setup + bl __e500_dcache_setup +#ifdef CONFIG_PPC_BOOK3E_64 + bl .__setup_base_ivors +#else + bl __setup_e500mc_ivors +#endif + mtlr r4 + blr diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index 1f9123f412e..96a908f1cd8 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -48,6 +48,7 @@ extern void __setup_cpu_440x5(unsigned long offset, struct cpu_spec* spec); extern void __setup_cpu_460ex(unsigned long offset, struct cpu_spec* spec); extern void __setup_cpu_460gt(unsigned long offset, struct cpu_spec* spec); extern void __setup_cpu_460sx(unsigned long offset, struct cpu_spec *spec); +extern void __setup_cpu_apm821xx(unsigned long offset, struct cpu_spec *spec); extern void __setup_cpu_603(unsigned long offset, struct cpu_spec* spec); extern void __setup_cpu_604(unsigned long offset, struct cpu_spec* spec); extern void __setup_cpu_750(unsigned long offset, struct cpu_spec* spec); @@ -66,6 +67,10 @@ extern void __restore_cpu_ppc970(void); extern void __setup_cpu_power7(unsigned long offset, struct cpu_spec* spec); extern void __restore_cpu_power7(void); #endif /* CONFIG_PPC64 */ +#if defined(CONFIG_E500) +extern void __setup_cpu_e5500(unsigned long offset, struct cpu_spec* spec); +extern void __restore_cpu_e5500(void); +#endif /* CONFIG_E500 */ /* This table only contains "desktop" CPUs, it need to be filled with embedded * ones as well... @@ -1805,6 +1810,20 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check = machine_check_440A, .platform = "ppc440", }, + { /* 464 in APM821xx */ + .pvr_mask = 0xffffff00, + .pvr_value = 0x12C41C80, + .cpu_name = "APM821XX", + .cpu_features = CPU_FTRS_44X, + .cpu_user_features = COMMON_USER_BOOKE | + PPC_FEATURE_HAS_FPU, + .mmu_features = MMU_FTR_TYPE_44x, + .icache_bsize = 32, + .dcache_bsize = 32, + .cpu_setup = __setup_cpu_apm821xx, + .machine_check = machine_check_440A, + .platform = "ppc440", + }, { /* 476 core */ .pvr_mask = 0xffff0000, .pvr_value = 0x11a50000, @@ -1891,7 +1910,9 @@ static struct cpu_spec __initdata cpu_specs[] = { .platform = "ppc5554", } #endif /* CONFIG_E200 */ +#endif /* CONFIG_PPC32 */ #ifdef CONFIG_E500 +#ifdef CONFIG_PPC32 { /* e500 */ .pvr_mask = 0xffff0000, .pvr_value = 0x80200000, @@ -1946,6 +1967,26 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check = machine_check_e500mc, .platform = "ppce500mc", }, +#endif /* CONFIG_PPC32 */ + { /* e5500 */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x80240000, + .cpu_name = "e5500", + .cpu_features = CPU_FTRS_E500MC, + .cpu_user_features = COMMON_USER_BOOKE, + .mmu_features = MMU_FTR_TYPE_FSL_E | MMU_FTR_BIG_PHYS | + MMU_FTR_USE_TLBILX, + .icache_bsize = 64, + .dcache_bsize = 64, + .num_pmcs = 4, + .oprofile_cpu_type = "ppc/e500mc", + .oprofile_type = PPC_OPROFILE_FSL_EMB, + .cpu_setup = __setup_cpu_e5500, + .cpu_restore = __restore_cpu_e5500, + .machine_check = machine_check_e500mc, + .platform = "ppce5500", + }, +#ifdef CONFIG_PPC32 { /* default match */ .pvr_mask = 0x00000000, .pvr_value = 0x00000000, @@ -1960,8 +2001,8 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check = machine_check_e500, .platform = "powerpc", } -#endif /* CONFIG_E500 */ #endif /* CONFIG_PPC32 */ +#endif /* CONFIG_E500 */ #ifdef CONFIG_PPC_BOOK3E_64 { /* This is a default entry to get going, to be replaced by diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c index 4457382f866..832c8c4db25 100644 --- a/arch/powerpc/kernel/crash.c +++ b/arch/powerpc/kernel/crash.c @@ -414,18 +414,7 @@ void default_machine_crash_shutdown(struct pt_regs *regs) crash_kexec_wait_realmode(crashing_cpu); #endif - for_each_irq(i) { - struct irq_desc *desc = irq_to_desc(i); - - if (!desc || !desc->chip || !desc->chip->eoi) - continue; - - if (desc->status & IRQ_INPROGRESS) - desc->chip->eoi(i); - - if (!(desc->status & IRQ_DISABLED)) - desc->chip->shutdown(i); - } + machine_kexec_mask_interrupts(); /* * Call registered shutdown routines savely. Swap out diff --git a/arch/powerpc/kernel/dma-iommu.c b/arch/powerpc/kernel/dma-iommu.c index 37771a51811..6e54a0fd31a 100644 --- a/arch/powerpc/kernel/dma-iommu.c +++ b/arch/powerpc/kernel/dma-iommu.c @@ -74,16 +74,17 @@ static int dma_iommu_dma_supported(struct device *dev, u64 mask) { struct iommu_table *tbl = get_iommu_table_base(dev); - if (!tbl || tbl->it_offset > mask) { - printk(KERN_INFO - "Warning: IOMMU offset too big for device mask\n"); - if (tbl) - printk(KERN_INFO - "mask: 0x%08llx, table offset: 0x%08lx\n", - mask, tbl->it_offset); - else - printk(KERN_INFO "mask: 0x%08llx, table unavailable\n", - mask); + if (!tbl) { + dev_info(dev, "Warning: IOMMU dma not supported: mask 0x%08llx" + ", table unavailable\n", mask); + return 0; + } + + if ((tbl->it_offset + tbl->it_size) > (mask >> IOMMU_PAGE_SHIFT)) { + dev_info(dev, "Warning: IOMMU window too big for device mask\n"); + dev_info(dev, "mask: 0x%08llx, table end: 0x%08lx\n", + mask, (tbl->it_offset + tbl->it_size) << + IOMMU_PAGE_SHIFT); return 0; } else return 1; diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c index 84d6367ec00..cf02cad62d9 100644 --- a/arch/powerpc/kernel/dma.c +++ b/arch/powerpc/kernel/dma.c @@ -12,6 +12,7 @@ #include <linux/memblock.h> #include <asm/bug.h> #include <asm/abs_addr.h> +#include <asm/machdep.h> /* * Generic direct DMA implementation @@ -89,7 +90,7 @@ static int dma_direct_dma_supported(struct device *dev, u64 mask) /* Could be improved so platforms can set the limit in case * they have limited DMA windows */ - return mask >= (memblock_end_of_DRAM() - 1); + return mask >= get_dma_offset(dev) + (memblock_end_of_DRAM() - 1); #else return 1; #endif @@ -154,6 +155,23 @@ EXPORT_SYMBOL(dma_direct_ops); #define PREALLOC_DMA_DEBUG_ENTRIES (1 << 16) +int dma_set_mask(struct device *dev, u64 dma_mask) +{ + struct dma_map_ops *dma_ops = get_dma_ops(dev); + + if (ppc_md.dma_set_mask) + return ppc_md.dma_set_mask(dev, dma_mask); + if (unlikely(dma_ops == NULL)) + return -EIO; + if (dma_ops->set_dma_mask != NULL) + return dma_ops->set_dma_mask(dev, dma_mask); + if (!dev->dma_mask || !dma_supported(dev, dma_mask)) + return -EIO; + *dev->dma_mask = dma_mask; + return 0; +} +EXPORT_SYMBOL(dma_set_mask); + static int __init dma_init(void) { dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES); diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 42e9d908914..d82878c4daa 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -97,6 +97,24 @@ system_call_common: addi r9,r1,STACK_FRAME_OVERHEAD ld r11,exception_marker@toc(r2) std r11,-16(r9) /* "regshere" marker */ +#if defined(CONFIG_VIRT_CPU_ACCOUNTING) && defined(CONFIG_PPC_SPLPAR) +BEGIN_FW_FTR_SECTION + beq 33f + /* if from user, see if there are any DTL entries to process */ + ld r10,PACALPPACAPTR(r13) /* get ptr to VPA */ + ld r11,PACA_DTL_RIDX(r13) /* get log read index */ + ld r10,LPPACA_DTLIDX(r10) /* get log write index */ + cmpd cr1,r11,r10 + beq+ cr1,33f + bl .accumulate_stolen_time + REST_GPR(0,r1) + REST_4GPRS(3,r1) + REST_2GPRS(7,r1) + addi r9,r1,STACK_FRAME_OVERHEAD +33: +END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR) +#endif /* CONFIG_VIRT_CPU_ACCOUNTING && CONFIG_PPC_SPLPAR */ + #ifdef CONFIG_TRACE_IRQFLAGS bl .trace_hardirqs_on REST_GPR(0,r1) @@ -202,7 +220,9 @@ syscall_exit: bge- syscall_error syscall_error_cont: ld r7,_NIP(r1) +BEGIN_FTR_SECTION stdcx. r0,0,r1 /* to clear the reservation */ +END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) andi. r6,r8,MSR_PR ld r4,_LINK(r1) /* @@ -419,6 +439,17 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) sync #endif /* CONFIG_SMP */ + /* + * If we optimise away the clear of the reservation in system + * calls because we know the CPU tracks the address of the + * reservation, then we need to clear it here to cover the + * case that the kernel context switch path has no larx + * instructions. + */ +BEGIN_FTR_SECTION + ldarx r6,0,r1 +END_FTR_SECTION_IFSET(CPU_FTR_STCX_CHECKS_ADDRESS) + addi r6,r4,-THREAD /* Convert THREAD to 'current' */ std r6,PACACURRENT(r13) /* Set new 'current' */ @@ -576,7 +607,16 @@ ALT_FW_FTR_SECTION_END_IFCLR(FW_FEATURE_ISERIES) andi. r0,r3,MSR_RI beq- unrecov_restore + /* + * Clear the reservation. If we know the CPU tracks the address of + * the reservation then we can potentially save some cycles and use + * a larx. On POWER6 and POWER7 this is significantly faster. + */ +BEGIN_FTR_SECTION stdcx. r0,0,r1 /* to clear the reservation */ +FTR_SECTION_ELSE + ldarx r4,0,r1 +ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) /* * Clear RI before restoring r13. If we are returning to diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index f53029a0155..9f8b01d6466 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -299,6 +299,12 @@ slb_miss_user_pseries: b . /* prevent spec. execution */ #endif /* __DISABLED__ */ +/* KVM's trampoline code needs to be close to the interrupt handlers */ + +#ifdef CONFIG_KVM_BOOK3S_64_HANDLER +#include "../kvm/book3s_rmhandlers.S" +#endif + .align 7 .globl __end_interrupts __end_interrupts: @@ -818,12 +824,12 @@ END_FW_FTR_SECTION_IFCLR(FW_FEATURE_ISERIES) /* * hash_page couldn't handle it, set soft interrupt enable back - * to what it was before the trap. Note that .raw_local_irq_restore + * to what it was before the trap. Note that .arch_local_irq_restore * handles any interrupts pending at this point. */ ld r3,SOFTE(r1) TRACE_AND_RESTORE_IRQ_PARTIAL(r3, 11f) - bl .raw_local_irq_restore + bl .arch_local_irq_restore b 11f /* We have a data breakpoint exception - handle it */ diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S index fc8f5b14019..e86c040ae58 100644 --- a/arch/powerpc/kernel/fpu.S +++ b/arch/powerpc/kernel/fpu.S @@ -163,24 +163,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX) /* * These are used in the alignment trap handler when emulating * single-precision loads and stores. - * We restore and save the fpscr so the task gets the same result - * and exceptions as if the cpu had performed the load or store. */ _GLOBAL(cvt_fd) - lfd 0,THREAD_FPSCR(r5) /* load up fpscr value */ - MTFSF_L(0) lfs 0,0(r3) stfd 0,0(r4) - mffs 0 - stfd 0,THREAD_FPSCR(r5) /* save new fpscr value */ blr _GLOBAL(cvt_df) - lfd 0,THREAD_FPSCR(r5) /* load up fpscr value */ - MTFSF_L(0) lfd 0,0(r3) stfs 0,0(r4) - mffs 0 - stfd 0,THREAD_FPSCR(r5) /* save new fpscr value */ blr diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S index a90625f9b48..8278e8bad5a 100644 --- a/arch/powerpc/kernel/head_40x.S +++ b/arch/powerpc/kernel/head_40x.S @@ -923,11 +923,7 @@ initial_mmu: mtspr SPRN_PID,r0 sync - /* Configure and load two entries into TLB slots 62 and 63. - * In case we are pinning TLBs, these are reserved in by the - * other TLB functions. If not reserving, then it doesn't - * matter where they are loaded. - */ + /* Configure and load one entry into TLB slots 63 */ clrrwi r4,r4,10 /* Mask off the real page number */ ori r4,r4,(TLB_WR | TLB_EX) /* Set the write and execute bits */ diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index c571cd3c145..f0dd577e4a5 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -166,12 +166,6 @@ exception_marker: #include "exceptions-64s.S" #endif -/* KVM trampoline code needs to be close to the interrupt handlers */ - -#ifdef CONFIG_KVM_BOOK3S_64_HANDLER -#include "../kvm/book3s_rmhandlers.S" -#endif - _GLOBAL(generic_secondary_thread_init) mr r24,r3 diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index 4faeba24785..529b817f473 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -152,8 +152,11 @@ _ENTRY(__early_start) /* Check to see if we're the second processor, and jump * to the secondary_start code if so */ - mfspr r24,SPRN_PIR - cmpwi r24,0 + lis r24, boot_cpuid@h + ori r24, r24, boot_cpuid@l + lwz r24, 0(r24) + cmpwi r24, -1 + mfspr r24,SPRN_PIR bne __secondary_start #endif @@ -175,6 +178,9 @@ _ENTRY(__early_start) li r0,0 stwu r0,THREAD_SIZE-STACK_FRAME_OVERHEAD(r1) + rlwinm r22,r1,0,0,31-THREAD_SHIFT /* current thread_info */ + stw r24, TI_CPU(r22) + bl early_init #ifdef CONFIG_RELOCATABLE diff --git a/arch/powerpc/kernel/ibmebus.c b/arch/powerpc/kernel/ibmebus.c index 9b626cfffce..f62efdfd176 100644 --- a/arch/powerpc/kernel/ibmebus.c +++ b/arch/powerpc/kernel/ibmebus.c @@ -162,13 +162,10 @@ static int ibmebus_create_device(struct device_node *dn) dev->dev.bus = &ibmebus_bus_type; dev->dev.archdata.dma_ops = &ibmebus_dma_ops; - ret = of_device_register(dev); - if (ret) { - of_device_free(dev); - return ret; - } - - return 0; + ret = of_device_add(dev); + if (ret) + platform_device_put(dev); + return ret; } static int ibmebus_create_devices(const struct of_device_id *matches) diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 4a65386995d..ce557f6f00f 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -116,7 +116,7 @@ static inline notrace void set_soft_enabled(unsigned long enable) : : "r" (enable), "i" (offsetof(struct paca_struct, soft_enabled))); } -notrace void raw_local_irq_restore(unsigned long en) +notrace void arch_local_irq_restore(unsigned long en) { /* * get_paca()->soft_enabled = en; @@ -192,7 +192,7 @@ notrace void raw_local_irq_restore(unsigned long en) __hard_irq_enable(); } -EXPORT_SYMBOL(raw_local_irq_restore); +EXPORT_SYMBOL(arch_local_irq_restore); #endif /* CONFIG_PPC64 */ static int show_other_interrupts(struct seq_file *p, int prec) @@ -587,8 +587,10 @@ struct irq_host *irq_alloc_host(struct device_node *of_node, * this will be fixed once slab is made available early * instead of the current cruft */ - if (mem_init_done) + if (mem_init_done) { + of_node_put(host->of_node); kfree(host); + } return NULL; } irq_map[0].host = host; @@ -1143,7 +1145,7 @@ static int virq_debug_show(struct seq_file *m, void *private) unsigned long flags; struct irq_desc *desc; const char *p; - char none[] = "none"; + static const char none[] = "none"; int i; seq_printf(m, "%-5s %-7s %-15s %s\n", "virq", "hwirq", diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c new file mode 100644 index 00000000000..428d0e538ae --- /dev/null +++ b/arch/powerpc/kernel/kvm.c @@ -0,0 +1,596 @@ +/* + * Copyright (C) 2010 SUSE Linux Products GmbH. All rights reserved. + * + * Authors: + * Alexander Graf <agraf@suse.de> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#include <linux/kvm_host.h> +#include <linux/init.h> +#include <linux/kvm_para.h> +#include <linux/slab.h> +#include <linux/of.h> + +#include <asm/reg.h> +#include <asm/sections.h> +#include <asm/cacheflush.h> +#include <asm/disassemble.h> + +#define KVM_MAGIC_PAGE (-4096L) +#define magic_var(x) KVM_MAGIC_PAGE + offsetof(struct kvm_vcpu_arch_shared, x) + +#define KVM_INST_LWZ 0x80000000 +#define KVM_INST_STW 0x90000000 +#define KVM_INST_LD 0xe8000000 +#define KVM_INST_STD 0xf8000000 +#define KVM_INST_NOP 0x60000000 +#define KVM_INST_B 0x48000000 +#define KVM_INST_B_MASK 0x03ffffff +#define KVM_INST_B_MAX 0x01ffffff + +#define KVM_MASK_RT 0x03e00000 +#define KVM_RT_30 0x03c00000 +#define KVM_MASK_RB 0x0000f800 +#define KVM_INST_MFMSR 0x7c0000a6 +#define KVM_INST_MFSPR_SPRG0 0x7c1042a6 +#define KVM_INST_MFSPR_SPRG1 0x7c1142a6 +#define KVM_INST_MFSPR_SPRG2 0x7c1242a6 +#define KVM_INST_MFSPR_SPRG3 0x7c1342a6 +#define KVM_INST_MFSPR_SRR0 0x7c1a02a6 +#define KVM_INST_MFSPR_SRR1 0x7c1b02a6 +#define KVM_INST_MFSPR_DAR 0x7c1302a6 +#define KVM_INST_MFSPR_DSISR 0x7c1202a6 + +#define KVM_INST_MTSPR_SPRG0 0x7c1043a6 +#define KVM_INST_MTSPR_SPRG1 0x7c1143a6 +#define KVM_INST_MTSPR_SPRG2 0x7c1243a6 +#define KVM_INST_MTSPR_SPRG3 0x7c1343a6 +#define KVM_INST_MTSPR_SRR0 0x7c1a03a6 +#define KVM_INST_MTSPR_SRR1 0x7c1b03a6 +#define KVM_INST_MTSPR_DAR 0x7c1303a6 +#define KVM_INST_MTSPR_DSISR 0x7c1203a6 + +#define KVM_INST_TLBSYNC 0x7c00046c +#define KVM_INST_MTMSRD_L0 0x7c000164 +#define KVM_INST_MTMSRD_L1 0x7c010164 +#define KVM_INST_MTMSR 0x7c000124 + +#define KVM_INST_WRTEEI_0 0x7c000146 +#define KVM_INST_WRTEEI_1 0x7c008146 + +#define KVM_INST_MTSRIN 0x7c0001e4 + +static bool kvm_patching_worked = true; +static char kvm_tmp[1024 * 1024]; +static int kvm_tmp_index; + +static inline void kvm_patch_ins(u32 *inst, u32 new_inst) +{ + *inst = new_inst; + flush_icache_range((ulong)inst, (ulong)inst + 4); +} + +static void kvm_patch_ins_ll(u32 *inst, long addr, u32 rt) +{ +#ifdef CONFIG_64BIT + kvm_patch_ins(inst, KVM_INST_LD | rt | (addr & 0x0000fffc)); +#else + kvm_patch_ins(inst, KVM_INST_LWZ | rt | (addr & 0x0000fffc)); +#endif +} + +static void kvm_patch_ins_ld(u32 *inst, long addr, u32 rt) +{ +#ifdef CONFIG_64BIT + kvm_patch_ins(inst, KVM_INST_LD | rt | (addr & 0x0000fffc)); +#else + kvm_patch_ins(inst, KVM_INST_LWZ | rt | ((addr + 4) & 0x0000fffc)); +#endif +} + +static void kvm_patch_ins_lwz(u32 *inst, long addr, u32 rt) +{ + kvm_patch_ins(inst, KVM_INST_LWZ | rt | (addr & 0x0000ffff)); +} + +static void kvm_patch_ins_std(u32 *inst, long addr, u32 rt) +{ +#ifdef CONFIG_64BIT + kvm_patch_ins(inst, KVM_INST_STD | rt | (addr & 0x0000fffc)); +#else + kvm_patch_ins(inst, KVM_INST_STW | rt | ((addr + 4) & 0x0000fffc)); +#endif +} + +static void kvm_patch_ins_stw(u32 *inst, long addr, u32 rt) +{ + kvm_patch_ins(inst, KVM_INST_STW | rt | (addr & 0x0000fffc)); +} + +static void kvm_patch_ins_nop(u32 *inst) +{ + kvm_patch_ins(inst, KVM_INST_NOP); +} + +static void kvm_patch_ins_b(u32 *inst, int addr) +{ +#ifdef CONFIG_RELOCATABLE + /* On relocatable kernels interrupts handlers and our code + can be in different regions, so we don't patch them */ + + extern u32 __end_interrupts; + if ((ulong)inst < (ulong)&__end_interrupts) + return; +#endif + + kvm_patch_ins(inst, KVM_INST_B | (addr & KVM_INST_B_MASK)); +} + +static u32 *kvm_alloc(int len) +{ + u32 *p; + + if ((kvm_tmp_index + len) > ARRAY_SIZE(kvm_tmp)) { + printk(KERN_ERR "KVM: No more space (%d + %d)\n", + kvm_tmp_index, len); + kvm_patching_worked = false; + return NULL; + } + + p = (void*)&kvm_tmp[kvm_tmp_index]; + kvm_tmp_index += len; + + return p; +} + +extern u32 kvm_emulate_mtmsrd_branch_offs; +extern u32 kvm_emulate_mtmsrd_reg_offs; +extern u32 kvm_emulate_mtmsrd_orig_ins_offs; +extern u32 kvm_emulate_mtmsrd_len; +extern u32 kvm_emulate_mtmsrd[]; + +static void kvm_patch_ins_mtmsrd(u32 *inst, u32 rt) +{ + u32 *p; + int distance_start; + int distance_end; + ulong next_inst; + + p = kvm_alloc(kvm_emulate_mtmsrd_len * 4); + if (!p) + return; + + /* Find out where we are and put everything there */ + distance_start = (ulong)p - (ulong)inst; + next_inst = ((ulong)inst + 4); + distance_end = next_inst - (ulong)&p[kvm_emulate_mtmsrd_branch_offs]; + + /* Make sure we only write valid b instructions */ + if (distance_start > KVM_INST_B_MAX) { + kvm_patching_worked = false; + return; + } + + /* Modify the chunk to fit the invocation */ + memcpy(p, kvm_emulate_mtmsrd, kvm_emulate_mtmsrd_len * 4); + p[kvm_emulate_mtmsrd_branch_offs] |= distance_end & KVM_INST_B_MASK; + switch (get_rt(rt)) { + case 30: + kvm_patch_ins_ll(&p[kvm_emulate_mtmsrd_reg_offs], + magic_var(scratch2), KVM_RT_30); + break; + case 31: + kvm_patch_ins_ll(&p[kvm_emulate_mtmsrd_reg_offs], + magic_var(scratch1), KVM_RT_30); + break; + default: + p[kvm_emulate_mtmsrd_reg_offs] |= rt; + break; + } + + p[kvm_emulate_mtmsrd_orig_ins_offs] = *inst; + flush_icache_range((ulong)p, (ulong)p + kvm_emulate_mtmsrd_len * 4); + + /* Patch the invocation */ + kvm_patch_ins_b(inst, distance_start); +} + +extern u32 kvm_emulate_mtmsr_branch_offs; +extern u32 kvm_emulate_mtmsr_reg1_offs; +extern u32 kvm_emulate_mtmsr_reg2_offs; +extern u32 kvm_emulate_mtmsr_orig_ins_offs; +extern u32 kvm_emulate_mtmsr_len; +extern u32 kvm_emulate_mtmsr[]; + +static void kvm_patch_ins_mtmsr(u32 *inst, u32 rt) +{ + u32 *p; + int distance_start; + int distance_end; + ulong next_inst; + + p = kvm_alloc(kvm_emulate_mtmsr_len * 4); + if (!p) + return; + + /* Find out where we are and put everything there */ + distance_start = (ulong)p - (ulong)inst; + next_inst = ((ulong)inst + 4); + distance_end = next_inst - (ulong)&p[kvm_emulate_mtmsr_branch_offs]; + + /* Make sure we only write valid b instructions */ + if (distance_start > KVM_INST_B_MAX) { + kvm_patching_worked = false; + return; + } + + /* Modify the chunk to fit the invocation */ + memcpy(p, kvm_emulate_mtmsr, kvm_emulate_mtmsr_len * 4); + p[kvm_emulate_mtmsr_branch_offs] |= distance_end & KVM_INST_B_MASK; + + /* Make clobbered registers work too */ + switch (get_rt(rt)) { + case 30: + kvm_patch_ins_ll(&p[kvm_emulate_mtmsr_reg1_offs], + magic_var(scratch2), KVM_RT_30); + kvm_patch_ins_ll(&p[kvm_emulate_mtmsr_reg2_offs], + magic_var(scratch2), KVM_RT_30); + break; + case 31: + kvm_patch_ins_ll(&p[kvm_emulate_mtmsr_reg1_offs], + magic_var(scratch1), KVM_RT_30); + kvm_patch_ins_ll(&p[kvm_emulate_mtmsr_reg2_offs], + magic_var(scratch1), KVM_RT_30); + break; + default: + p[kvm_emulate_mtmsr_reg1_offs] |= rt; + p[kvm_emulate_mtmsr_reg2_offs] |= rt; + break; + } + + p[kvm_emulate_mtmsr_orig_ins_offs] = *inst; + flush_icache_range((ulong)p, (ulong)p + kvm_emulate_mtmsr_len * 4); + + /* Patch the invocation */ + kvm_patch_ins_b(inst, distance_start); +} + +#ifdef CONFIG_BOOKE + +extern u32 kvm_emulate_wrteei_branch_offs; +extern u32 kvm_emulate_wrteei_ee_offs; +extern u32 kvm_emulate_wrteei_len; +extern u32 kvm_emulate_wrteei[]; + +static void kvm_patch_ins_wrteei(u32 *inst) +{ + u32 *p; + int distance_start; + int distance_end; + ulong next_inst; + + p = kvm_alloc(kvm_emulate_wrteei_len * 4); + if (!p) + return; + + /* Find out where we are and put everything there */ + distance_start = (ulong)p - (ulong)inst; + next_inst = ((ulong)inst + 4); + distance_end = next_inst - (ulong)&p[kvm_emulate_wrteei_branch_offs]; + + /* Make sure we only write valid b instructions */ + if (distance_start > KVM_INST_B_MAX) { + kvm_patching_worked = false; + return; + } + + /* Modify the chunk to fit the invocation */ + memcpy(p, kvm_emulate_wrteei, kvm_emulate_wrteei_len * 4); + p[kvm_emulate_wrteei_branch_offs] |= distance_end & KVM_INST_B_MASK; + p[kvm_emulate_wrteei_ee_offs] |= (*inst & MSR_EE); + flush_icache_range((ulong)p, (ulong)p + kvm_emulate_wrteei_len * 4); + + /* Patch the invocation */ + kvm_patch_ins_b(inst, distance_start); +} + +#endif + +#ifdef CONFIG_PPC_BOOK3S_32 + +extern u32 kvm_emulate_mtsrin_branch_offs; +extern u32 kvm_emulate_mtsrin_reg1_offs; +extern u32 kvm_emulate_mtsrin_reg2_offs; +extern u32 kvm_emulate_mtsrin_orig_ins_offs; +extern u32 kvm_emulate_mtsrin_len; +extern u32 kvm_emulate_mtsrin[]; + +static void kvm_patch_ins_mtsrin(u32 *inst, u32 rt, u32 rb) +{ + u32 *p; + int distance_start; + int distance_end; + ulong next_inst; + + p = kvm_alloc(kvm_emulate_mtsrin_len * 4); + if (!p) + return; + + /* Find out where we are and put everything there */ + distance_start = (ulong)p - (ulong)inst; + next_inst = ((ulong)inst + 4); + distance_end = next_inst - (ulong)&p[kvm_emulate_mtsrin_branch_offs]; + + /* Make sure we only write valid b instructions */ + if (distance_start > KVM_INST_B_MAX) { + kvm_patching_worked = false; + return; + } + + /* Modify the chunk to fit the invocation */ + memcpy(p, kvm_emulate_mtsrin, kvm_emulate_mtsrin_len * 4); + p[kvm_emulate_mtsrin_branch_offs] |= distance_end & KVM_INST_B_MASK; + p[kvm_emulate_mtsrin_reg1_offs] |= (rb << 10); + p[kvm_emulate_mtsrin_reg2_offs] |= rt; + p[kvm_emulate_mtsrin_orig_ins_offs] = *inst; + flush_icache_range((ulong)p, (ulong)p + kvm_emulate_mtsrin_len * 4); + + /* Patch the invocation */ + kvm_patch_ins_b(inst, distance_start); +} + +#endif + +static void kvm_map_magic_page(void *data) +{ + u32 *features = data; + + ulong in[8]; + ulong out[8]; + + in[0] = KVM_MAGIC_PAGE; + in[1] = KVM_MAGIC_PAGE; + + kvm_hypercall(in, out, HC_VENDOR_KVM | KVM_HC_PPC_MAP_MAGIC_PAGE); + + *features = out[0]; +} + +static void kvm_check_ins(u32 *inst, u32 features) +{ + u32 _inst = *inst; + u32 inst_no_rt = _inst & ~KVM_MASK_RT; + u32 inst_rt = _inst & KVM_MASK_RT; + + switch (inst_no_rt) { + /* Loads */ + case KVM_INST_MFMSR: + kvm_patch_ins_ld(inst, magic_var(msr), inst_rt); + break; + case KVM_INST_MFSPR_SPRG0: + kvm_patch_ins_ld(inst, magic_var(sprg0), inst_rt); + break; + case KVM_INST_MFSPR_SPRG1: + kvm_patch_ins_ld(inst, magic_var(sprg1), inst_rt); + break; + case KVM_INST_MFSPR_SPRG2: + kvm_patch_ins_ld(inst, magic_var(sprg2), inst_rt); + break; + case KVM_INST_MFSPR_SPRG3: + kvm_patch_ins_ld(inst, magic_var(sprg3), inst_rt); + break; + case KVM_INST_MFSPR_SRR0: + kvm_patch_ins_ld(inst, magic_var(srr0), inst_rt); + break; + case KVM_INST_MFSPR_SRR1: + kvm_patch_ins_ld(inst, magic_var(srr1), inst_rt); + break; + case KVM_INST_MFSPR_DAR: + kvm_patch_ins_ld(inst, magic_var(dar), inst_rt); + break; + case KVM_INST_MFSPR_DSISR: + kvm_patch_ins_lwz(inst, magic_var(dsisr), inst_rt); + break; + + /* Stores */ + case KVM_INST_MTSPR_SPRG0: + kvm_patch_ins_std(inst, magic_var(sprg0), inst_rt); + break; + case KVM_INST_MTSPR_SPRG1: + kvm_patch_ins_std(inst, magic_var(sprg1), inst_rt); + break; + case KVM_INST_MTSPR_SPRG2: + kvm_patch_ins_std(inst, magic_var(sprg2), inst_rt); + break; + case KVM_INST_MTSPR_SPRG3: + kvm_patch_ins_std(inst, magic_var(sprg3), inst_rt); + break; + case KVM_INST_MTSPR_SRR0: + kvm_patch_ins_std(inst, magic_var(srr0), inst_rt); + break; + case KVM_INST_MTSPR_SRR1: + kvm_patch_ins_std(inst, magic_var(srr1), inst_rt); + break; + case KVM_INST_MTSPR_DAR: + kvm_patch_ins_std(inst, magic_var(dar), inst_rt); + break; + case KVM_INST_MTSPR_DSISR: + kvm_patch_ins_stw(inst, magic_var(dsisr), inst_rt); + break; + + /* Nops */ + case KVM_INST_TLBSYNC: + kvm_patch_ins_nop(inst); + break; + + /* Rewrites */ + case KVM_INST_MTMSRD_L1: + kvm_patch_ins_mtmsrd(inst, inst_rt); + break; + case KVM_INST_MTMSR: + case KVM_INST_MTMSRD_L0: + kvm_patch_ins_mtmsr(inst, inst_rt); + break; + } + + switch (inst_no_rt & ~KVM_MASK_RB) { +#ifdef CONFIG_PPC_BOOK3S_32 + case KVM_INST_MTSRIN: + if (features & KVM_MAGIC_FEAT_SR) { + u32 inst_rb = _inst & KVM_MASK_RB; + kvm_patch_ins_mtsrin(inst, inst_rt, inst_rb); + } + break; + break; +#endif + } + + switch (_inst) { +#ifdef CONFIG_BOOKE + case KVM_INST_WRTEEI_0: + case KVM_INST_WRTEEI_1: + kvm_patch_ins_wrteei(inst); + break; +#endif + } +} + +static void kvm_use_magic_page(void) +{ + u32 *p; + u32 *start, *end; + u32 tmp; + u32 features; + + /* Tell the host to map the magic page to -4096 on all CPUs */ + on_each_cpu(kvm_map_magic_page, &features, 1); + + /* Quick self-test to see if the mapping works */ + if (__get_user(tmp, (u32*)KVM_MAGIC_PAGE)) { + kvm_patching_worked = false; + return; + } + + /* Now loop through all code and find instructions */ + start = (void*)_stext; + end = (void*)_etext; + + for (p = start; p < end; p++) + kvm_check_ins(p, features); + + printk(KERN_INFO "KVM: Live patching for a fast VM %s\n", + kvm_patching_worked ? "worked" : "failed"); +} + +unsigned long kvm_hypercall(unsigned long *in, + unsigned long *out, + unsigned long nr) +{ + unsigned long register r0 asm("r0"); + unsigned long register r3 asm("r3") = in[0]; + unsigned long register r4 asm("r4") = in[1]; + unsigned long register r5 asm("r5") = in[2]; + unsigned long register r6 asm("r6") = in[3]; + unsigned long register r7 asm("r7") = in[4]; + unsigned long register r8 asm("r8") = in[5]; + unsigned long register r9 asm("r9") = in[6]; + unsigned long register r10 asm("r10") = in[7]; + unsigned long register r11 asm("r11") = nr; + unsigned long register r12 asm("r12"); + + asm volatile("bl kvm_hypercall_start" + : "=r"(r0), "=r"(r3), "=r"(r4), "=r"(r5), "=r"(r6), + "=r"(r7), "=r"(r8), "=r"(r9), "=r"(r10), "=r"(r11), + "=r"(r12) + : "r"(r3), "r"(r4), "r"(r5), "r"(r6), "r"(r7), "r"(r8), + "r"(r9), "r"(r10), "r"(r11) + : "memory", "cc", "xer", "ctr", "lr"); + + out[0] = r4; + out[1] = r5; + out[2] = r6; + out[3] = r7; + out[4] = r8; + out[5] = r9; + out[6] = r10; + out[7] = r11; + + return r3; +} +EXPORT_SYMBOL_GPL(kvm_hypercall); + +static int kvm_para_setup(void) +{ + extern u32 kvm_hypercall_start; + struct device_node *hyper_node; + u32 *insts; + int len, i; + + hyper_node = of_find_node_by_path("/hypervisor"); + if (!hyper_node) + return -1; + + insts = (u32*)of_get_property(hyper_node, "hcall-instructions", &len); + if (len % 4) + return -1; + if (len > (4 * 4)) + return -1; + + for (i = 0; i < (len / 4); i++) + kvm_patch_ins(&(&kvm_hypercall_start)[i], insts[i]); + + return 0; +} + +static __init void kvm_free_tmp(void) +{ + unsigned long start, end; + + start = (ulong)&kvm_tmp[kvm_tmp_index + (PAGE_SIZE - 1)] & PAGE_MASK; + end = (ulong)&kvm_tmp[ARRAY_SIZE(kvm_tmp)] & PAGE_MASK; + + /* Free the tmp space we don't need */ + for (; start < end; start += PAGE_SIZE) { + ClearPageReserved(virt_to_page(start)); + init_page_count(virt_to_page(start)); + free_page(start); + totalram_pages++; + } +} + +static int __init kvm_guest_init(void) +{ + if (!kvm_para_available()) + goto free_tmp; + + if (kvm_para_setup()) + goto free_tmp; + + if (kvm_para_has_feature(KVM_FEATURE_MAGIC_PAGE)) + kvm_use_magic_page(); + +#ifdef CONFIG_PPC_BOOK3S_64 + /* Enable napping */ + powersave_nap = 1; +#endif + +free_tmp: + kvm_free_tmp(); + + return 0; +} + +postcore_initcall(kvm_guest_init); diff --git a/arch/powerpc/kernel/kvm_emul.S b/arch/powerpc/kernel/kvm_emul.S new file mode 100644 index 00000000000..f2b1b2523e6 --- /dev/null +++ b/arch/powerpc/kernel/kvm_emul.S @@ -0,0 +1,302 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright SUSE Linux Products GmbH 2010 + * + * Authors: Alexander Graf <agraf@suse.de> + */ + +#include <asm/ppc_asm.h> +#include <asm/kvm_asm.h> +#include <asm/reg.h> +#include <asm/page.h> +#include <asm/asm-offsets.h> + +/* Hypercall entry point. Will be patched with device tree instructions. */ + +.global kvm_hypercall_start +kvm_hypercall_start: + li r3, -1 + nop + nop + nop + blr + +#define KVM_MAGIC_PAGE (-4096) + +#ifdef CONFIG_64BIT +#define LL64(reg, offs, reg2) ld reg, (offs)(reg2) +#define STL64(reg, offs, reg2) std reg, (offs)(reg2) +#else +#define LL64(reg, offs, reg2) lwz reg, (offs + 4)(reg2) +#define STL64(reg, offs, reg2) stw reg, (offs + 4)(reg2) +#endif + +#define SCRATCH_SAVE \ + /* Enable critical section. We are critical if \ + shared->critical == r1 */ \ + STL64(r1, KVM_MAGIC_PAGE + KVM_MAGIC_CRITICAL, 0); \ + \ + /* Save state */ \ + PPC_STL r31, (KVM_MAGIC_PAGE + KVM_MAGIC_SCRATCH1)(0); \ + PPC_STL r30, (KVM_MAGIC_PAGE + KVM_MAGIC_SCRATCH2)(0); \ + mfcr r31; \ + stw r31, (KVM_MAGIC_PAGE + KVM_MAGIC_SCRATCH3)(0); + +#define SCRATCH_RESTORE \ + /* Restore state */ \ + PPC_LL r31, (KVM_MAGIC_PAGE + KVM_MAGIC_SCRATCH1)(0); \ + lwz r30, (KVM_MAGIC_PAGE + KVM_MAGIC_SCRATCH3)(0); \ + mtcr r30; \ + PPC_LL r30, (KVM_MAGIC_PAGE + KVM_MAGIC_SCRATCH2)(0); \ + \ + /* Disable critical section. We are critical if \ + shared->critical == r1 and r2 is always != r1 */ \ + STL64(r2, KVM_MAGIC_PAGE + KVM_MAGIC_CRITICAL, 0); + +.global kvm_emulate_mtmsrd +kvm_emulate_mtmsrd: + + SCRATCH_SAVE + + /* Put MSR & ~(MSR_EE|MSR_RI) in r31 */ + LL64(r31, KVM_MAGIC_PAGE + KVM_MAGIC_MSR, 0) + lis r30, (~(MSR_EE | MSR_RI))@h + ori r30, r30, (~(MSR_EE | MSR_RI))@l + and r31, r31, r30 + + /* OR the register's (MSR_EE|MSR_RI) on MSR */ +kvm_emulate_mtmsrd_reg: + ori r30, r0, 0 + andi. r30, r30, (MSR_EE|MSR_RI) + or r31, r31, r30 + + /* Put MSR back into magic page */ + STL64(r31, KVM_MAGIC_PAGE + KVM_MAGIC_MSR, 0) + + /* Check if we have to fetch an interrupt */ + lwz r31, (KVM_MAGIC_PAGE + KVM_MAGIC_INT)(0) + cmpwi r31, 0 + beq+ no_check + + /* Check if we may trigger an interrupt */ + andi. r30, r30, MSR_EE + beq no_check + + SCRATCH_RESTORE + + /* Nag hypervisor */ +kvm_emulate_mtmsrd_orig_ins: + tlbsync + + b kvm_emulate_mtmsrd_branch + +no_check: + + SCRATCH_RESTORE + + /* Go back to caller */ +kvm_emulate_mtmsrd_branch: + b . +kvm_emulate_mtmsrd_end: + +.global kvm_emulate_mtmsrd_branch_offs +kvm_emulate_mtmsrd_branch_offs: + .long (kvm_emulate_mtmsrd_branch - kvm_emulate_mtmsrd) / 4 + +.global kvm_emulate_mtmsrd_reg_offs +kvm_emulate_mtmsrd_reg_offs: + .long (kvm_emulate_mtmsrd_reg - kvm_emulate_mtmsrd) / 4 + +.global kvm_emulate_mtmsrd_orig_ins_offs +kvm_emulate_mtmsrd_orig_ins_offs: + .long (kvm_emulate_mtmsrd_orig_ins - kvm_emulate_mtmsrd) / 4 + +.global kvm_emulate_mtmsrd_len +kvm_emulate_mtmsrd_len: + .long (kvm_emulate_mtmsrd_end - kvm_emulate_mtmsrd) / 4 + + +#define MSR_SAFE_BITS (MSR_EE | MSR_CE | MSR_ME | MSR_RI) +#define MSR_CRITICAL_BITS ~MSR_SAFE_BITS + +.global kvm_emulate_mtmsr +kvm_emulate_mtmsr: + + SCRATCH_SAVE + + /* Fetch old MSR in r31 */ + LL64(r31, KVM_MAGIC_PAGE + KVM_MAGIC_MSR, 0) + + /* Find the changed bits between old and new MSR */ +kvm_emulate_mtmsr_reg1: + ori r30, r0, 0 + xor r31, r30, r31 + + /* Check if we need to really do mtmsr */ + LOAD_REG_IMMEDIATE(r30, MSR_CRITICAL_BITS) + and. r31, r31, r30 + + /* No critical bits changed? Maybe we can stay in the guest. */ + beq maybe_stay_in_guest + +do_mtmsr: + + SCRATCH_RESTORE + + /* Just fire off the mtmsr if it's critical */ +kvm_emulate_mtmsr_orig_ins: + mtmsr r0 + + b kvm_emulate_mtmsr_branch + +maybe_stay_in_guest: + + /* Get the target register in r30 */ +kvm_emulate_mtmsr_reg2: + ori r30, r0, 0 + + /* Check if we have to fetch an interrupt */ + lwz r31, (KVM_MAGIC_PAGE + KVM_MAGIC_INT)(0) + cmpwi r31, 0 + beq+ no_mtmsr + + /* Check if we may trigger an interrupt */ + andi. r31, r30, MSR_EE + beq no_mtmsr + + b do_mtmsr + +no_mtmsr: + + /* Put MSR into magic page because we don't call mtmsr */ + STL64(r30, KVM_MAGIC_PAGE + KVM_MAGIC_MSR, 0) + + SCRATCH_RESTORE + + /* Go back to caller */ +kvm_emulate_mtmsr_branch: + b . +kvm_emulate_mtmsr_end: + +.global kvm_emulate_mtmsr_branch_offs +kvm_emulate_mtmsr_branch_offs: + .long (kvm_emulate_mtmsr_branch - kvm_emulate_mtmsr) / 4 + +.global kvm_emulate_mtmsr_reg1_offs +kvm_emulate_mtmsr_reg1_offs: + .long (kvm_emulate_mtmsr_reg1 - kvm_emulate_mtmsr) / 4 + +.global kvm_emulate_mtmsr_reg2_offs +kvm_emulate_mtmsr_reg2_offs: + .long (kvm_emulate_mtmsr_reg2 - kvm_emulate_mtmsr) / 4 + +.global kvm_emulate_mtmsr_orig_ins_offs +kvm_emulate_mtmsr_orig_ins_offs: + .long (kvm_emulate_mtmsr_orig_ins - kvm_emulate_mtmsr) / 4 + +.global kvm_emulate_mtmsr_len +kvm_emulate_mtmsr_len: + .long (kvm_emulate_mtmsr_end - kvm_emulate_mtmsr) / 4 + + + +.global kvm_emulate_wrteei +kvm_emulate_wrteei: + + SCRATCH_SAVE + + /* Fetch old MSR in r31 */ + LL64(r31, KVM_MAGIC_PAGE + KVM_MAGIC_MSR, 0) + + /* Remove MSR_EE from old MSR */ + li r30, 0 + ori r30, r30, MSR_EE + andc r31, r31, r30 + + /* OR new MSR_EE onto the old MSR */ +kvm_emulate_wrteei_ee: + ori r31, r31, 0 + + /* Write new MSR value back */ + STL64(r31, KVM_MAGIC_PAGE + KVM_MAGIC_MSR, 0) + + SCRATCH_RESTORE + + /* Go back to caller */ +kvm_emulate_wrteei_branch: + b . +kvm_emulate_wrteei_end: + +.global kvm_emulate_wrteei_branch_offs +kvm_emulate_wrteei_branch_offs: + .long (kvm_emulate_wrteei_branch - kvm_emulate_wrteei) / 4 + +.global kvm_emulate_wrteei_ee_offs +kvm_emulate_wrteei_ee_offs: + .long (kvm_emulate_wrteei_ee - kvm_emulate_wrteei) / 4 + +.global kvm_emulate_wrteei_len +kvm_emulate_wrteei_len: + .long (kvm_emulate_wrteei_end - kvm_emulate_wrteei) / 4 + + +.global kvm_emulate_mtsrin +kvm_emulate_mtsrin: + + SCRATCH_SAVE + + LL64(r31, KVM_MAGIC_PAGE + KVM_MAGIC_MSR, 0) + andi. r31, r31, MSR_DR | MSR_IR + beq kvm_emulate_mtsrin_reg1 + + SCRATCH_RESTORE + +kvm_emulate_mtsrin_orig_ins: + nop + b kvm_emulate_mtsrin_branch + +kvm_emulate_mtsrin_reg1: + /* rX >> 26 */ + rlwinm r30,r0,6,26,29 + +kvm_emulate_mtsrin_reg2: + stw r0, (KVM_MAGIC_PAGE + KVM_MAGIC_SR)(r30) + + SCRATCH_RESTORE + + /* Go back to caller */ +kvm_emulate_mtsrin_branch: + b . +kvm_emulate_mtsrin_end: + +.global kvm_emulate_mtsrin_branch_offs +kvm_emulate_mtsrin_branch_offs: + .long (kvm_emulate_mtsrin_branch - kvm_emulate_mtsrin) / 4 + +.global kvm_emulate_mtsrin_reg1_offs +kvm_emulate_mtsrin_reg1_offs: + .long (kvm_emulate_mtsrin_reg1 - kvm_emulate_mtsrin) / 4 + +.global kvm_emulate_mtsrin_reg2_offs +kvm_emulate_mtsrin_reg2_offs: + .long (kvm_emulate_mtsrin_reg2 - kvm_emulate_mtsrin) / 4 + +.global kvm_emulate_mtsrin_orig_ins_offs +kvm_emulate_mtsrin_orig_ins_offs: + .long (kvm_emulate_mtsrin_orig_ins - kvm_emulate_mtsrin) / 4 + +.global kvm_emulate_mtsrin_len +kvm_emulate_mtsrin_len: + .long (kvm_emulate_mtsrin_end - kvm_emulate_mtsrin) / 4 diff --git a/arch/powerpc/kernel/legacy_serial.c b/arch/powerpc/kernel/legacy_serial.c index c1fd0f9658f..c834757bebc 100644 --- a/arch/powerpc/kernel/legacy_serial.c +++ b/arch/powerpc/kernel/legacy_serial.c @@ -52,14 +52,14 @@ static int __init add_legacy_port(struct device_node *np, int want_index, phys_addr_t taddr, unsigned long irq, upf_t flags, int irq_check_parent) { - const u32 *clk, *spd; + const __be32 *clk, *spd; u32 clock = BASE_BAUD * 16; int index; /* get clock freq. if present */ clk = of_get_property(np, "clock-frequency", NULL); if (clk && *clk) - clock = *clk; + clock = be32_to_cpup(clk); /* get default speed if present */ spd = of_get_property(np, "current-speed", NULL); @@ -109,7 +109,7 @@ static int __init add_legacy_port(struct device_node *np, int want_index, legacy_serial_infos[index].taddr = taddr; legacy_serial_infos[index].np = of_node_get(np); legacy_serial_infos[index].clock = clock; - legacy_serial_infos[index].speed = spd ? *spd : 0; + legacy_serial_infos[index].speed = spd ? be32_to_cpup(spd) : 0; legacy_serial_infos[index].irq_check_parent = irq_check_parent; printk(KERN_DEBUG "Found legacy serial port %d for %s\n", @@ -168,7 +168,7 @@ static int __init add_legacy_soc_port(struct device_node *np, static int __init add_legacy_isa_port(struct device_node *np, struct device_node *isa_brg) { - const u32 *reg; + const __be32 *reg; const char *typep; int index = -1; u64 taddr; @@ -181,7 +181,7 @@ static int __init add_legacy_isa_port(struct device_node *np, return -1; /* Verify it's an IO port, we don't support anything else */ - if (!(reg[0] & 0x00000001)) + if (!(be32_to_cpu(reg[0]) & 0x00000001)) return -1; /* Now look for an "ibm,aix-loc" property that gives us ordering @@ -202,7 +202,7 @@ static int __init add_legacy_isa_port(struct device_node *np, taddr = 0; /* Add port, irq will be dealt with later */ - return add_legacy_port(np, index, UPIO_PORT, reg[1], taddr, + return add_legacy_port(np, index, UPIO_PORT, be32_to_cpu(reg[1]), taddr, NO_IRQ, UPF_BOOT_AUTOCONF, 0); } @@ -251,9 +251,9 @@ static int __init add_legacy_pci_port(struct device_node *np, * we get to their "reg" property */ if (np != pci_dev) { - const u32 *reg = of_get_property(np, "reg", NULL); - if (reg && (*reg < 4)) - index = lindex = *reg; + const __be32 *reg = of_get_property(np, "reg", NULL); + if (reg && (be32_to_cpup(reg) < 4)) + index = lindex = be32_to_cpup(reg); } /* Local index means it's the Nth port in the PCI chip. Unfortunately @@ -507,7 +507,7 @@ static int __init check_legacy_serial_console(void) struct device_node *prom_stdout = NULL; int i, speed = 0, offset = 0; const char *name; - const u32 *spd; + const __be32 *spd; DBG(" -> check_legacy_serial_console()\n"); @@ -547,7 +547,7 @@ static int __init check_legacy_serial_console(void) } spd = of_get_property(prom_stdout, "current-speed", NULL); if (spd) - speed = *spd; + speed = be32_to_cpup(spd); if (strcmp(name, "serial") != 0) goto not_found; diff --git a/arch/powerpc/kernel/lparcfg.c b/arch/powerpc/kernel/lparcfg.c index 50362b6ef6e..16468362ad5 100644 --- a/arch/powerpc/kernel/lparcfg.c +++ b/arch/powerpc/kernel/lparcfg.c @@ -56,7 +56,7 @@ static unsigned long get_purr(void) for_each_possible_cpu(cpu) { if (firmware_has_feature(FW_FEATURE_ISERIES)) - sum_purr += lppaca[cpu].emulated_time_base; + sum_purr += lppaca_of(cpu).emulated_time_base; else { struct cpu_usage *cu; @@ -263,7 +263,7 @@ static void parse_ppp_data(struct seq_file *m) ppp_data.active_system_procs); /* pool related entries are apropriate for shared configs */ - if (lppaca[0].shared_proc) { + if (lppaca_of(0).shared_proc) { unsigned long pool_idle_time, pool_procs; seq_printf(m, "pool=%d\n", ppp_data.pool_num); @@ -460,8 +460,8 @@ static void pseries_cmo_data(struct seq_file *m) return; for_each_possible_cpu(cpu) { - cmo_faults += lppaca[cpu].cmo_faults; - cmo_fault_time += lppaca[cpu].cmo_fault_time; + cmo_faults += lppaca_of(cpu).cmo_faults; + cmo_fault_time += lppaca_of(cpu).cmo_fault_time; } seq_printf(m, "cmo_faults=%lu\n", cmo_faults); @@ -479,8 +479,8 @@ static void splpar_dispatch_data(struct seq_file *m) unsigned long dispatch_dispersions = 0; for_each_possible_cpu(cpu) { - dispatches += lppaca[cpu].yield_count; - dispatch_dispersions += lppaca[cpu].dispersion_count; + dispatches += lppaca_of(cpu).yield_count; + dispatch_dispersions += lppaca_of(cpu).dispersion_count; } seq_printf(m, "dispatches=%lu\n", dispatches); @@ -545,7 +545,7 @@ static int pseries_lparcfg_data(struct seq_file *m, void *v) seq_printf(m, "partition_potential_processors=%d\n", partition_potential_processors); - seq_printf(m, "shared_processor_mode=%d\n", lppaca[0].shared_proc); + seq_printf(m, "shared_processor_mode=%d\n", lppaca_of(0).shared_proc); seq_printf(m, "slb_size=%d\n", mmu_slb_size); @@ -780,6 +780,7 @@ static const struct file_operations lparcfg_fops = { .write = lparcfg_write, .open = lparcfg_open, .release = single_release, + .llseek = seq_lseek, }; static int __init lparcfg_init(void) diff --git a/arch/powerpc/kernel/machine_kexec.c b/arch/powerpc/kernel/machine_kexec.c index dd6c141f166..df7e20c191c 100644 --- a/arch/powerpc/kernel/machine_kexec.c +++ b/arch/powerpc/kernel/machine_kexec.c @@ -14,10 +14,34 @@ #include <linux/threads.h> #include <linux/memblock.h> #include <linux/of.h> +#include <linux/irq.h> + #include <asm/machdep.h> #include <asm/prom.h> #include <asm/sections.h> +void machine_kexec_mask_interrupts(void) { + unsigned int i; + + for_each_irq(i) { + struct irq_desc *desc = irq_to_desc(i); + + if (!desc || !desc->chip) + continue; + + if (desc->chip->eoi && + desc->status & IRQ_INPROGRESS) + desc->chip->eoi(i); + + if (desc->chip->mask) + desc->chip->mask(i); + + if (desc->chip->disable && + !(desc->status & IRQ_DISABLED)) + desc->chip->disable(i); + } +} + void machine_crash_shutdown(struct pt_regs *regs) { if (ppc_md.machine_crash_shutdown) diff --git a/arch/powerpc/kernel/machine_kexec_32.c b/arch/powerpc/kernel/machine_kexec_32.c index ae63a964b85..e63f2e7d2ef 100644 --- a/arch/powerpc/kernel/machine_kexec_32.c +++ b/arch/powerpc/kernel/machine_kexec_32.c @@ -39,6 +39,10 @@ void default_machine_kexec(struct kimage *image) /* Interrupts aren't acceptable while we reboot */ local_irq_disable(); + /* mask each interrupt so we are in a more sane state for the + * kexec kernel */ + machine_kexec_mask_interrupts(); + page_list = image->head; /* we need both effective and real address here */ diff --git a/arch/powerpc/kernel/module.c b/arch/powerpc/kernel/module.c index 477c663e014..49cee9df225 100644 --- a/arch/powerpc/kernel/module.c +++ b/arch/powerpc/kernel/module.c @@ -63,11 +63,6 @@ int module_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *me) { const Elf_Shdr *sect; - int err; - - err = module_bug_finalize(hdr, sechdrs, me); - if (err) - return err; /* Apply feature fixups */ sect = find_section(hdr, sechdrs, "__ftr_fixup"); @@ -101,5 +96,4 @@ int module_finalize(const Elf_Ehdr *hdr, void module_arch_cleanup(struct module *mod) { - module_bug_cleanup(mod); } diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index d0a26f1770f..ebf9846f3c3 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -27,6 +27,20 @@ extern unsigned long __toc_start; #ifdef CONFIG_PPC_BOOK3S /* + * We only have to have statically allocated lppaca structs on + * legacy iSeries, which supports at most 64 cpus. + */ +#ifdef CONFIG_PPC_ISERIES +#if NR_CPUS < 64 +#define NR_LPPACAS NR_CPUS +#else +#define NR_LPPACAS 64 +#endif +#else /* not iSeries */ +#define NR_LPPACAS 1 +#endif + +/* * The structure which the hypervisor knows about - this structure * should not cross a page boundary. The vpa_init/register_vpa call * is now known to fail if the lppaca structure crosses a page @@ -36,7 +50,7 @@ extern unsigned long __toc_start; * will suffice to ensure that it doesn't cross a page boundary. */ struct lppaca lppaca[] = { - [0 ... (NR_CPUS-1)] = { + [0 ... (NR_LPPACAS-1)] = { .desc = 0xd397d781, /* "LpPa" */ .size = sizeof(struct lppaca), .dyn_proc_status = 2, @@ -49,6 +63,54 @@ struct lppaca lppaca[] = { }, }; +static struct lppaca *extra_lppacas; +static long __initdata lppaca_size; + +static void allocate_lppacas(int nr_cpus, unsigned long limit) +{ + if (nr_cpus <= NR_LPPACAS) + return; + + lppaca_size = PAGE_ALIGN(sizeof(struct lppaca) * + (nr_cpus - NR_LPPACAS)); + extra_lppacas = __va(memblock_alloc_base(lppaca_size, + PAGE_SIZE, limit)); +} + +static struct lppaca *new_lppaca(int cpu) +{ + struct lppaca *lp; + + if (cpu < NR_LPPACAS) + return &lppaca[cpu]; + + lp = extra_lppacas + (cpu - NR_LPPACAS); + *lp = lppaca[0]; + + return lp; +} + +static void free_lppacas(void) +{ + long new_size = 0, nr; + + if (!lppaca_size) + return; + nr = num_possible_cpus() - NR_LPPACAS; + if (nr > 0) + new_size = PAGE_ALIGN(nr * sizeof(struct lppaca)); + if (new_size >= lppaca_size) + return; + + memblock_free(__pa(extra_lppacas) + new_size, lppaca_size - new_size); + lppaca_size = new_size; +} + +#else + +static inline void allocate_lppacas(int nr_cpus, unsigned long limit) { } +static inline void free_lppacas(void) { } + #endif /* CONFIG_PPC_BOOK3S */ #ifdef CONFIG_PPC_STD_MMU_64 @@ -88,7 +150,7 @@ void __init initialise_paca(struct paca_struct *new_paca, int cpu) unsigned long kernel_toc = (unsigned long)(&__toc_start) + 0x8000UL; #ifdef CONFIG_PPC_BOOK3S - new_paca->lppaca_ptr = &lppaca[cpu]; + new_paca->lppaca_ptr = new_lppaca(cpu); #else new_paca->kernel_pgd = swapper_pg_dir; #endif @@ -127,7 +189,7 @@ void __init allocate_pacas(void) * the first segment. On iSeries they must be within the area mapped * by the HV, which is HvPagesToMap * HVPAGESIZE bytes. */ - limit = min(0x10000000ULL, memblock.rmo_size); + limit = min(0x10000000ULL, ppc64_rma_size); if (firmware_has_feature(FW_FEATURE_ISERIES)) limit = min(limit, HvPagesToMap * HVPAGESIZE); @@ -144,6 +206,8 @@ void __init allocate_pacas(void) printk(KERN_DEBUG "Allocated %u bytes for %d pacas at %p\n", paca_size, nr_cpus, paca); + allocate_lppacas(nr_cpus, limit); + /* Can't use for_each_*_cpu, as they aren't functional yet */ for (cpu = 0; cpu < nr_cpus; cpu++) initialise_paca(&paca[cpu], cpu); @@ -164,4 +228,6 @@ void __init free_unused_pacas(void) paca_size - new_size); paca_size = new_size; + + free_lppacas(); } diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index 9021c4ad4bb..10a44e68ef1 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -1090,8 +1090,6 @@ void __devinit pcibios_setup_bus_devices(struct pci_bus *bus) bus->number, bus->self ? pci_name(bus->self) : "PHB"); list_for_each_entry(dev, &bus->devices, bus_list) { - struct dev_archdata *sd = &dev->dev.archdata; - /* Cardbus can call us to add new devices to a bus, so ignore * those who are already fully discovered */ @@ -1107,7 +1105,7 @@ void __devinit pcibios_setup_bus_devices(struct pci_bus *bus) set_dev_node(&dev->dev, pcibus_to_node(dev->bus)); /* Hook up default DMA ops */ - sd->dma_ops = pci_dma_ops; + set_dma_ops(&dev->dev, pci_dma_ops); set_dma_offset(&dev->dev, PCI_DRAM_OFFSET); /* Additional platform DMA/iommu setup */ diff --git a/arch/powerpc/kernel/perf_callchain.c b/arch/powerpc/kernel/perf_callchain.c index 95ad9dad298..d05ae4204bb 100644 --- a/arch/powerpc/kernel/perf_callchain.c +++ b/arch/powerpc/kernel/perf_callchain.c @@ -23,18 +23,6 @@ #include "ppc32.h" #endif -/* - * Store another value in a callchain_entry. - */ -static inline void callchain_store(struct perf_callchain_entry *entry, u64 ip) -{ - unsigned int nr = entry->nr; - - if (nr < PERF_MAX_STACK_DEPTH) { - entry->ip[nr] = ip; - entry->nr = nr + 1; - } -} /* * Is sp valid as the address of the next kernel stack frame after prev_sp? @@ -58,8 +46,8 @@ static int valid_next_sp(unsigned long sp, unsigned long prev_sp) return 0; } -static void perf_callchain_kernel(struct pt_regs *regs, - struct perf_callchain_entry *entry) +void +perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs) { unsigned long sp, next_sp; unsigned long next_ip; @@ -69,8 +57,7 @@ static void perf_callchain_kernel(struct pt_regs *regs, lr = regs->link; sp = regs->gpr[1]; - callchain_store(entry, PERF_CONTEXT_KERNEL); - callchain_store(entry, regs->nip); + perf_callchain_store(entry, regs->nip); if (!validate_sp(sp, current, STACK_FRAME_OVERHEAD)) return; @@ -89,7 +76,7 @@ static void perf_callchain_kernel(struct pt_regs *regs, next_ip = regs->nip; lr = regs->link; level = 0; - callchain_store(entry, PERF_CONTEXT_KERNEL); + perf_callchain_store(entry, PERF_CONTEXT_KERNEL); } else { if (level == 0) @@ -111,7 +98,7 @@ static void perf_callchain_kernel(struct pt_regs *regs, ++level; } - callchain_store(entry, next_ip); + perf_callchain_store(entry, next_ip); if (!valid_next_sp(next_sp, sp)) return; sp = next_sp; @@ -233,8 +220,8 @@ static int sane_signal_64_frame(unsigned long sp) puc == (unsigned long) &sf->uc; } -static void perf_callchain_user_64(struct pt_regs *regs, - struct perf_callchain_entry *entry) +static void perf_callchain_user_64(struct perf_callchain_entry *entry, + struct pt_regs *regs) { unsigned long sp, next_sp; unsigned long next_ip; @@ -246,8 +233,7 @@ static void perf_callchain_user_64(struct pt_regs *regs, next_ip = regs->nip; lr = regs->link; sp = regs->gpr[1]; - callchain_store(entry, PERF_CONTEXT_USER); - callchain_store(entry, next_ip); + perf_callchain_store(entry, next_ip); for (;;) { fp = (unsigned long __user *) sp; @@ -276,14 +262,14 @@ static void perf_callchain_user_64(struct pt_regs *regs, read_user_stack_64(&uregs[PT_R1], &sp)) return; level = 0; - callchain_store(entry, PERF_CONTEXT_USER); - callchain_store(entry, next_ip); + perf_callchain_store(entry, PERF_CONTEXT_USER); + perf_callchain_store(entry, next_ip); continue; } if (level == 0) next_ip = lr; - callchain_store(entry, next_ip); + perf_callchain_store(entry, next_ip); ++level; sp = next_sp; } @@ -315,8 +301,8 @@ static int read_user_stack_32(unsigned int __user *ptr, unsigned int *ret) return __get_user_inatomic(*ret, ptr); } -static inline void perf_callchain_user_64(struct pt_regs *regs, - struct perf_callchain_entry *entry) +static inline void perf_callchain_user_64(struct perf_callchain_entry *entry, + struct pt_regs *regs) { } @@ -435,8 +421,8 @@ static unsigned int __user *signal_frame_32_regs(unsigned int sp, return mctx->mc_gregs; } -static void perf_callchain_user_32(struct pt_regs *regs, - struct perf_callchain_entry *entry) +static void perf_callchain_user_32(struct perf_callchain_entry *entry, + struct pt_regs *regs) { unsigned int sp, next_sp; unsigned int next_ip; @@ -447,8 +433,7 @@ static void perf_callchain_user_32(struct pt_regs *regs, next_ip = regs->nip; lr = regs->link; sp = regs->gpr[1]; - callchain_store(entry, PERF_CONTEXT_USER); - callchain_store(entry, next_ip); + perf_callchain_store(entry, next_ip); while (entry->nr < PERF_MAX_STACK_DEPTH) { fp = (unsigned int __user *) (unsigned long) sp; @@ -470,45 +455,24 @@ static void perf_callchain_user_32(struct pt_regs *regs, read_user_stack_32(&uregs[PT_R1], &sp)) return; level = 0; - callchain_store(entry, PERF_CONTEXT_USER); - callchain_store(entry, next_ip); + perf_callchain_store(entry, PERF_CONTEXT_USER); + perf_callchain_store(entry, next_ip); continue; } if (level == 0) next_ip = lr; - callchain_store(entry, next_ip); + perf_callchain_store(entry, next_ip); ++level; sp = next_sp; } } -/* - * Since we can't get PMU interrupts inside a PMU interrupt handler, - * we don't need separate irq and nmi entries here. - */ -static DEFINE_PER_CPU(struct perf_callchain_entry, cpu_perf_callchain); - -struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) +void +perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) { - struct perf_callchain_entry *entry = &__get_cpu_var(cpu_perf_callchain); - - entry->nr = 0; - - if (!user_mode(regs)) { - perf_callchain_kernel(regs, entry); - if (current->mm) - regs = task_pt_regs(current); - else - regs = NULL; - } - - if (regs) { - if (current_is_64bit()) - perf_callchain_user_64(regs, entry); - else - perf_callchain_user_32(regs, entry); - } - - return entry; + if (current_is_64bit()) + perf_callchain_user_64(entry, regs); + else + perf_callchain_user_32(entry, regs); } diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c index d301a30445e..3129c855933 100644 --- a/arch/powerpc/kernel/perf_event.c +++ b/arch/powerpc/kernel/perf_event.c @@ -402,6 +402,9 @@ static void power_pmu_read(struct perf_event *event) { s64 val, delta, prev; + if (event->hw.state & PERF_HES_STOPPED) + return; + if (!event->hw.idx) return; /* @@ -517,7 +520,7 @@ static void write_mmcr0(struct cpu_hw_events *cpuhw, unsigned long mmcr0) * Disable all events to prevent PMU interrupts and to allow * events to be added or removed. */ -void hw_perf_disable(void) +static void power_pmu_disable(struct pmu *pmu) { struct cpu_hw_events *cpuhw; unsigned long flags; @@ -565,7 +568,7 @@ void hw_perf_disable(void) * If we were previously disabled and events were added, then * put the new config on the PMU. */ -void hw_perf_enable(void) +static void power_pmu_enable(struct pmu *pmu) { struct perf_event *event; struct cpu_hw_events *cpuhw; @@ -672,6 +675,8 @@ void hw_perf_enable(void) } local64_set(&event->hw.prev_count, val); event->hw.idx = idx; + if (event->hw.state & PERF_HES_STOPPED) + val = 0; write_pmc(idx, val); perf_event_update_userpage(event); } @@ -727,7 +732,7 @@ static int collect_events(struct perf_event *group, int max_count, * re-enable the PMU in order to get hw_perf_enable to do the * actual work of reconfiguring the PMU. */ -static int power_pmu_enable(struct perf_event *event) +static int power_pmu_add(struct perf_event *event, int ef_flags) { struct cpu_hw_events *cpuhw; unsigned long flags; @@ -735,7 +740,7 @@ static int power_pmu_enable(struct perf_event *event) int ret = -EAGAIN; local_irq_save(flags); - perf_disable(); + perf_pmu_disable(event->pmu); /* * Add the event to the list (if there is room) @@ -749,6 +754,9 @@ static int power_pmu_enable(struct perf_event *event) cpuhw->events[n0] = event->hw.config; cpuhw->flags[n0] = event->hw.event_base; + if (!(ef_flags & PERF_EF_START)) + event->hw.state = PERF_HES_STOPPED | PERF_HES_UPTODATE; + /* * If group events scheduling transaction was started, * skip the schedulability test here, it will be peformed @@ -769,7 +777,7 @@ nocheck: ret = 0; out: - perf_enable(); + perf_pmu_enable(event->pmu); local_irq_restore(flags); return ret; } @@ -777,14 +785,14 @@ nocheck: /* * Remove a event from the PMU. */ -static void power_pmu_disable(struct perf_event *event) +static void power_pmu_del(struct perf_event *event, int ef_flags) { struct cpu_hw_events *cpuhw; long i; unsigned long flags; local_irq_save(flags); - perf_disable(); + perf_pmu_disable(event->pmu); power_pmu_read(event); @@ -821,34 +829,60 @@ static void power_pmu_disable(struct perf_event *event) cpuhw->mmcr[0] &= ~(MMCR0_PMXE | MMCR0_FCECE); } - perf_enable(); + perf_pmu_enable(event->pmu); local_irq_restore(flags); } /* - * Re-enable interrupts on a event after they were throttled - * because they were coming too fast. + * POWER-PMU does not support disabling individual counters, hence + * program their cycle counter to their max value and ignore the interrupts. */ -static void power_pmu_unthrottle(struct perf_event *event) + +static void power_pmu_start(struct perf_event *event, int ef_flags) +{ + unsigned long flags; + s64 left; + + if (!event->hw.idx || !event->hw.sample_period) + return; + + if (!(event->hw.state & PERF_HES_STOPPED)) + return; + + if (ef_flags & PERF_EF_RELOAD) + WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE)); + + local_irq_save(flags); + perf_pmu_disable(event->pmu); + + event->hw.state = 0; + left = local64_read(&event->hw.period_left); + write_pmc(event->hw.idx, left); + + perf_event_update_userpage(event); + perf_pmu_enable(event->pmu); + local_irq_restore(flags); +} + +static void power_pmu_stop(struct perf_event *event, int ef_flags) { - s64 val, left; unsigned long flags; if (!event->hw.idx || !event->hw.sample_period) return; + + if (event->hw.state & PERF_HES_STOPPED) + return; + local_irq_save(flags); - perf_disable(); + perf_pmu_disable(event->pmu); + power_pmu_read(event); - left = event->hw.sample_period; - event->hw.last_period = left; - val = 0; - if (left < 0x80000000L) - val = 0x80000000L - left; - write_pmc(event->hw.idx, val); - local64_set(&event->hw.prev_count, val); - local64_set(&event->hw.period_left, left); + event->hw.state |= PERF_HES_STOPPED | PERF_HES_UPTODATE; + write_pmc(event->hw.idx, 0); + perf_event_update_userpage(event); - perf_enable(); + perf_pmu_enable(event->pmu); local_irq_restore(flags); } @@ -857,10 +891,11 @@ static void power_pmu_unthrottle(struct perf_event *event) * Set the flag to make pmu::enable() not perform the * schedulability test, it will be performed at commit time */ -void power_pmu_start_txn(const struct pmu *pmu) +void power_pmu_start_txn(struct pmu *pmu) { struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + perf_pmu_disable(pmu); cpuhw->group_flag |= PERF_EVENT_TXN; cpuhw->n_txn_start = cpuhw->n_events; } @@ -870,11 +905,12 @@ void power_pmu_start_txn(const struct pmu *pmu) * Clear the flag and pmu::enable() will perform the * schedulability test. */ -void power_pmu_cancel_txn(const struct pmu *pmu) +void power_pmu_cancel_txn(struct pmu *pmu) { struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); cpuhw->group_flag &= ~PERF_EVENT_TXN; + perf_pmu_enable(pmu); } /* @@ -882,7 +918,7 @@ void power_pmu_cancel_txn(const struct pmu *pmu) * Perform the group schedulability test as a whole * Return 0 if success */ -int power_pmu_commit_txn(const struct pmu *pmu) +int power_pmu_commit_txn(struct pmu *pmu) { struct cpu_hw_events *cpuhw; long i, n; @@ -901,19 +937,10 @@ int power_pmu_commit_txn(const struct pmu *pmu) cpuhw->event[i]->hw.config = cpuhw->events[i]; cpuhw->group_flag &= ~PERF_EVENT_TXN; + perf_pmu_enable(pmu); return 0; } -struct pmu power_pmu = { - .enable = power_pmu_enable, - .disable = power_pmu_disable, - .read = power_pmu_read, - .unthrottle = power_pmu_unthrottle, - .start_txn = power_pmu_start_txn, - .cancel_txn = power_pmu_cancel_txn, - .commit_txn = power_pmu_commit_txn, -}; - /* * Return 1 if we might be able to put event on a limited PMC, * or 0 if not. @@ -1014,7 +1041,7 @@ static int hw_perf_cache_event(u64 config, u64 *eventp) return 0; } -const struct pmu *hw_perf_event_init(struct perf_event *event) +static int power_pmu_event_init(struct perf_event *event) { u64 ev; unsigned long flags; @@ -1026,25 +1053,27 @@ const struct pmu *hw_perf_event_init(struct perf_event *event) struct cpu_hw_events *cpuhw; if (!ppmu) - return ERR_PTR(-ENXIO); + return -ENOENT; + switch (event->attr.type) { case PERF_TYPE_HARDWARE: ev = event->attr.config; if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0) - return ERR_PTR(-EOPNOTSUPP); + return -EOPNOTSUPP; ev = ppmu->generic_events[ev]; break; case PERF_TYPE_HW_CACHE: err = hw_perf_cache_event(event->attr.config, &ev); if (err) - return ERR_PTR(err); + return err; break; case PERF_TYPE_RAW: ev = event->attr.config; break; default: - return ERR_PTR(-EINVAL); + return -ENOENT; } + event->hw.config_base = ev; event->hw.idx = 0; @@ -1063,7 +1092,7 @@ const struct pmu *hw_perf_event_init(struct perf_event *event) * XXX we should check if the task is an idle task. */ flags = 0; - if (event->ctx->task) + if (event->attach_state & PERF_ATTACH_TASK) flags |= PPMU_ONLY_COUNT_RUN; /* @@ -1081,7 +1110,7 @@ const struct pmu *hw_perf_event_init(struct perf_event *event) */ ev = normal_pmc_alternative(ev, flags); if (!ev) - return ERR_PTR(-EINVAL); + return -EINVAL; } } @@ -1095,19 +1124,19 @@ const struct pmu *hw_perf_event_init(struct perf_event *event) n = collect_events(event->group_leader, ppmu->n_counter - 1, ctrs, events, cflags); if (n < 0) - return ERR_PTR(-EINVAL); + return -EINVAL; } events[n] = ev; ctrs[n] = event; cflags[n] = flags; if (check_excludes(ctrs, cflags, n, 1)) - return ERR_PTR(-EINVAL); + return -EINVAL; cpuhw = &get_cpu_var(cpu_hw_events); err = power_check_constraints(cpuhw, events, cflags, n + 1); put_cpu_var(cpu_hw_events); if (err) - return ERR_PTR(-EINVAL); + return -EINVAL; event->hw.config = events[n]; event->hw.event_base = cflags[n]; @@ -1132,11 +1161,23 @@ const struct pmu *hw_perf_event_init(struct perf_event *event) } event->destroy = hw_perf_event_destroy; - if (err) - return ERR_PTR(err); - return &power_pmu; + return err; } +struct pmu power_pmu = { + .pmu_enable = power_pmu_enable, + .pmu_disable = power_pmu_disable, + .event_init = power_pmu_event_init, + .add = power_pmu_add, + .del = power_pmu_del, + .start = power_pmu_start, + .stop = power_pmu_stop, + .read = power_pmu_read, + .start_txn = power_pmu_start_txn, + .cancel_txn = power_pmu_cancel_txn, + .commit_txn = power_pmu_commit_txn, +}; + /* * A counter has overflowed; update its count and record * things if requested. Note that interrupts are hard-disabled @@ -1149,6 +1190,11 @@ static void record_and_restart(struct perf_event *event, unsigned long val, s64 prev, delta, left; int record = 0; + if (event->hw.state & PERF_HES_STOPPED) { + write_pmc(event->hw.idx, 0); + return; + } + /* we don't have to worry about interrupts here */ prev = local64_read(&event->hw.prev_count); delta = (val - prev) & 0xfffffffful; @@ -1171,6 +1217,11 @@ static void record_and_restart(struct perf_event *event, unsigned long val, val = 0x80000000LL - left; } + write_pmc(event->hw.idx, val); + local64_set(&event->hw.prev_count, val); + local64_set(&event->hw.period_left, left); + perf_event_update_userpage(event); + /* * Finally record data if requested. */ @@ -1183,23 +1234,9 @@ static void record_and_restart(struct perf_event *event, unsigned long val, if (event->attr.sample_type & PERF_SAMPLE_ADDR) perf_get_data_addr(regs, &data.addr); - if (perf_event_overflow(event, nmi, &data, regs)) { - /* - * Interrupts are coming too fast - throttle them - * by setting the event to 0, so it will be - * at least 2^30 cycles until the next interrupt - * (assuming each event counts at most 2 counts - * per cycle). - */ - val = 0; - left = ~0ULL >> 1; - } + if (perf_event_overflow(event, nmi, &data, regs)) + power_pmu_stop(event, 0); } - - write_pmc(event->hw.idx, val); - local64_set(&event->hw.prev_count, val); - local64_set(&event->hw.period_left, left); - perf_event_update_userpage(event); } /* @@ -1342,6 +1379,7 @@ int register_power_pmu(struct power_pmu *pmu) freeze_events_kernel = MMCR0_FCHV; #endif /* CONFIG_PPC64 */ + perf_pmu_register(&power_pmu); perf_cpu_notifier(power_pmu_notifier); return 0; diff --git a/arch/powerpc/kernel/perf_event_fsl_emb.c b/arch/powerpc/kernel/perf_event_fsl_emb.c index 1ba45471ae4..7ecca59ddf7 100644 --- a/arch/powerpc/kernel/perf_event_fsl_emb.c +++ b/arch/powerpc/kernel/perf_event_fsl_emb.c @@ -156,6 +156,9 @@ static void fsl_emb_pmu_read(struct perf_event *event) { s64 val, delta, prev; + if (event->hw.state & PERF_HES_STOPPED) + return; + /* * Performance monitor interrupts come even when interrupts * are soft-disabled, as long as interrupts are hard-enabled. @@ -177,7 +180,7 @@ static void fsl_emb_pmu_read(struct perf_event *event) * Disable all events to prevent PMU interrupts and to allow * events to be added or removed. */ -void hw_perf_disable(void) +static void fsl_emb_pmu_disable(struct pmu *pmu) { struct cpu_hw_events *cpuhw; unsigned long flags; @@ -216,7 +219,7 @@ void hw_perf_disable(void) * If we were previously disabled and events were added, then * put the new config on the PMU. */ -void hw_perf_enable(void) +static void fsl_emb_pmu_enable(struct pmu *pmu) { struct cpu_hw_events *cpuhw; unsigned long flags; @@ -262,8 +265,8 @@ static int collect_events(struct perf_event *group, int max_count, return n; } -/* perf must be disabled, context locked on entry */ -static int fsl_emb_pmu_enable(struct perf_event *event) +/* context locked on entry */ +static int fsl_emb_pmu_add(struct perf_event *event, int flags) { struct cpu_hw_events *cpuhw; int ret = -EAGAIN; @@ -271,6 +274,7 @@ static int fsl_emb_pmu_enable(struct perf_event *event) u64 val; int i; + perf_pmu_disable(event->pmu); cpuhw = &get_cpu_var(cpu_hw_events); if (event->hw.config & FSL_EMB_EVENT_RESTRICTED) @@ -301,6 +305,12 @@ static int fsl_emb_pmu_enable(struct perf_event *event) val = 0x80000000L - left; } local64_set(&event->hw.prev_count, val); + + if (!(flags & PERF_EF_START)) { + event->hw.state = PERF_HES_STOPPED | PERF_HES_UPTODATE; + val = 0; + } + write_pmc(i, val); perf_event_update_userpage(event); @@ -310,15 +320,17 @@ static int fsl_emb_pmu_enable(struct perf_event *event) ret = 0; out: put_cpu_var(cpu_hw_events); + perf_pmu_enable(event->pmu); return ret; } -/* perf must be disabled, context locked on entry */ -static void fsl_emb_pmu_disable(struct perf_event *event) +/* context locked on entry */ +static void fsl_emb_pmu_del(struct perf_event *event, int flags) { struct cpu_hw_events *cpuhw; int i = event->hw.idx; + perf_pmu_disable(event->pmu); if (i < 0) goto out; @@ -346,44 +358,57 @@ static void fsl_emb_pmu_disable(struct perf_event *event) cpuhw->n_events--; out: + perf_pmu_enable(event->pmu); put_cpu_var(cpu_hw_events); } -/* - * Re-enable interrupts on a event after they were throttled - * because they were coming too fast. - * - * Context is locked on entry, but perf is not disabled. - */ -static void fsl_emb_pmu_unthrottle(struct perf_event *event) +static void fsl_emb_pmu_start(struct perf_event *event, int ef_flags) { - s64 val, left; unsigned long flags; + s64 left; if (event->hw.idx < 0 || !event->hw.sample_period) return; + + if (!(event->hw.state & PERF_HES_STOPPED)) + return; + + if (ef_flags & PERF_EF_RELOAD) + WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE)); + local_irq_save(flags); - perf_disable(); - fsl_emb_pmu_read(event); - left = event->hw.sample_period; - event->hw.last_period = left; - val = 0; - if (left < 0x80000000L) - val = 0x80000000L - left; - write_pmc(event->hw.idx, val); - local64_set(&event->hw.prev_count, val); - local64_set(&event->hw.period_left, left); + perf_pmu_disable(event->pmu); + + event->hw.state = 0; + left = local64_read(&event->hw.period_left); + write_pmc(event->hw.idx, left); + perf_event_update_userpage(event); - perf_enable(); + perf_pmu_enable(event->pmu); local_irq_restore(flags); } -static struct pmu fsl_emb_pmu = { - .enable = fsl_emb_pmu_enable, - .disable = fsl_emb_pmu_disable, - .read = fsl_emb_pmu_read, - .unthrottle = fsl_emb_pmu_unthrottle, -}; +static void fsl_emb_pmu_stop(struct perf_event *event, int ef_flags) +{ + unsigned long flags; + + if (event->hw.idx < 0 || !event->hw.sample_period) + return; + + if (event->hw.state & PERF_HES_STOPPED) + return; + + local_irq_save(flags); + perf_pmu_disable(event->pmu); + + fsl_emb_pmu_read(event); + event->hw.state |= PERF_HES_STOPPED | PERF_HES_UPTODATE; + write_pmc(event->hw.idx, 0); + + perf_event_update_userpage(event); + perf_pmu_enable(event->pmu); + local_irq_restore(flags); +} /* * Release the PMU if this is the last perf_event. @@ -428,7 +453,7 @@ static int hw_perf_cache_event(u64 config, u64 *eventp) return 0; } -const struct pmu *hw_perf_event_init(struct perf_event *event) +static int fsl_emb_pmu_event_init(struct perf_event *event) { u64 ev; struct perf_event *events[MAX_HWEVENTS]; @@ -441,14 +466,14 @@ const struct pmu *hw_perf_event_init(struct perf_event *event) case PERF_TYPE_HARDWARE: ev = event->attr.config; if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0) - return ERR_PTR(-EOPNOTSUPP); + return -EOPNOTSUPP; ev = ppmu->generic_events[ev]; break; case PERF_TYPE_HW_CACHE: err = hw_perf_cache_event(event->attr.config, &ev); if (err) - return ERR_PTR(err); + return err; break; case PERF_TYPE_RAW: @@ -456,12 +481,12 @@ const struct pmu *hw_perf_event_init(struct perf_event *event) break; default: - return ERR_PTR(-EINVAL); + return -ENOENT; } event->hw.config = ppmu->xlate_event(ev); if (!(event->hw.config & FSL_EMB_EVENT_VALID)) - return ERR_PTR(-EINVAL); + return -EINVAL; /* * If this is in a group, check if it can go on with all the @@ -473,7 +498,7 @@ const struct pmu *hw_perf_event_init(struct perf_event *event) n = collect_events(event->group_leader, ppmu->n_counter - 1, events); if (n < 0) - return ERR_PTR(-EINVAL); + return -EINVAL; } if (event->hw.config & FSL_EMB_EVENT_RESTRICTED) { @@ -484,7 +509,7 @@ const struct pmu *hw_perf_event_init(struct perf_event *event) } if (num_restricted >= ppmu->n_restricted) - return ERR_PTR(-EINVAL); + return -EINVAL; } event->hw.idx = -1; @@ -497,7 +522,7 @@ const struct pmu *hw_perf_event_init(struct perf_event *event) if (event->attr.exclude_kernel) event->hw.config_base |= PMLCA_FCS; if (event->attr.exclude_idle) - return ERR_PTR(-ENOTSUPP); + return -ENOTSUPP; event->hw.last_period = event->hw.sample_period; local64_set(&event->hw.period_left, event->hw.last_period); @@ -523,11 +548,20 @@ const struct pmu *hw_perf_event_init(struct perf_event *event) } event->destroy = hw_perf_event_destroy; - if (err) - return ERR_PTR(err); - return &fsl_emb_pmu; + return err; } +static struct pmu fsl_emb_pmu = { + .pmu_enable = fsl_emb_pmu_enable, + .pmu_disable = fsl_emb_pmu_disable, + .event_init = fsl_emb_pmu_event_init, + .add = fsl_emb_pmu_add, + .del = fsl_emb_pmu_del, + .start = fsl_emb_pmu_start, + .stop = fsl_emb_pmu_stop, + .read = fsl_emb_pmu_read, +}; + /* * A counter has overflowed; update its count and record * things if requested. Note that interrupts are hard-disabled @@ -540,6 +574,11 @@ static void record_and_restart(struct perf_event *event, unsigned long val, s64 prev, delta, left; int record = 0; + if (event->hw.state & PERF_HES_STOPPED) { + write_pmc(event->hw.idx, 0); + return; + } + /* we don't have to worry about interrupts here */ prev = local64_read(&event->hw.prev_count); delta = (val - prev) & 0xfffffffful; @@ -562,6 +601,11 @@ static void record_and_restart(struct perf_event *event, unsigned long val, val = 0x80000000LL - left; } + write_pmc(event->hw.idx, val); + local64_set(&event->hw.prev_count, val); + local64_set(&event->hw.period_left, left); + perf_event_update_userpage(event); + /* * Finally record data if requested. */ @@ -571,23 +615,9 @@ static void record_and_restart(struct perf_event *event, unsigned long val, perf_sample_data_init(&data, 0); data.period = event->hw.last_period; - if (perf_event_overflow(event, nmi, &data, regs)) { - /* - * Interrupts are coming too fast - throttle them - * by setting the event to 0, so it will be - * at least 2^30 cycles until the next interrupt - * (assuming each event counts at most 2 counts - * per cycle). - */ - val = 0; - left = ~0ULL >> 1; - } + if (perf_event_overflow(event, nmi, &data, regs)) + fsl_emb_pmu_stop(event, 0); } - - write_pmc(event->hw.idx, val); - local64_set(&event->hw.prev_count, val); - local64_set(&event->hw.period_left, left); - perf_event_update_userpage(event); } static void perf_event_interrupt(struct pt_regs *regs) @@ -651,5 +681,7 @@ int register_fsl_emb_pmu(struct fsl_emb_pmu *pmu) pr_info("%s performance monitor hardware support registered\n", pmu->name); + perf_pmu_register(&fsl_emb_pmu); + return 0; } diff --git a/arch/powerpc/kernel/ppc970-pmu.c b/arch/powerpc/kernel/ppc970-pmu.c index 8eff48e20db..3fee685de4d 100644 --- a/arch/powerpc/kernel/ppc970-pmu.c +++ b/arch/powerpc/kernel/ppc970-pmu.c @@ -169,9 +169,11 @@ static int p970_marked_instr_event(u64 event) switch (unit) { case PM_VPU: mask = 0x4c; /* byte 0 bits 2,3,6 */ + break; case PM_LSU0: /* byte 2 bits 0,2,3,4,6; all of byte 1 */ mask = 0x085dff00; + break; case PM_LSU1L: mask = 0x50 << 24; /* byte 3 bits 4,6 */ break; diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index b1c648a36b0..84906d3fc86 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -517,7 +517,6 @@ struct task_struct *__switch_to(struct task_struct *prev, account_system_vtime(current); account_process_vtime(current); - calculate_steal_time(); /* * We can't take a PMU exception inside _switch() since there is a @@ -1298,14 +1297,3 @@ unsigned long randomize_et_dyn(unsigned long base) return ret; } - -#ifdef CONFIG_SMP -int arch_sd_sibling_asym_packing(void) -{ - if (cpu_has_feature(CPU_FTR_ASYM_SMT)) { - printk_once(KERN_INFO "Enabling Asymmetric SMT scheduling\n"); - return SD_ASYM_PACKING; - } - return 0; -} -#endif diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index fed9bf6187d..9e3132db718 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -66,6 +66,7 @@ int __initdata iommu_is_off; int __initdata iommu_force_on; unsigned long tce_alloc_start, tce_alloc_end; +u64 ppc64_rma_size; #endif static int __init early_parse_mem(char *p) @@ -98,7 +99,7 @@ static void __init move_device_tree(void) if ((memory_limit && (start + size) > memory_limit) || overlaps_crashkernel(start, size)) { - p = __va(memblock_alloc_base(size, PAGE_SIZE, memblock.rmo_size)); + p = __va(memblock_alloc(size, PAGE_SIZE)); memcpy(p, initial_boot_params, size); initial_boot_params = (struct boot_param_header *)p; DBG("Moved device tree to 0x%p\n", p); @@ -363,10 +364,15 @@ static int __init early_init_dt_scan_cpus(unsigned long node, return 0; } -void __init early_init_dt_scan_chosen_arch(unsigned long node) +int __init early_init_dt_scan_chosen_ppc(unsigned long node, const char *uname, + int depth, void *data) { unsigned long *lprop; + /* Use common scan routine to determine if this is the chosen node */ + if (early_init_dt_scan_chosen(node, uname, depth, data) == 0) + return 0; + #ifdef CONFIG_PPC64 /* check if iommu is forced on or off */ if (of_get_flat_dt_prop(node, "linux,iommu-off", NULL) != NULL) @@ -398,6 +404,9 @@ void __init early_init_dt_scan_chosen_arch(unsigned long node) if (lprop) crashk_res.end = crashk_res.start + *lprop - 1; #endif + + /* break now */ + return 1; } #ifdef CONFIG_PPC_PSERIES @@ -492,7 +501,7 @@ static int __init early_init_dt_scan_memory_ppc(unsigned long node, void __init early_init_dt_add_memory_arch(u64 base, u64 size) { -#if defined(CONFIG_PPC64) +#ifdef CONFIG_PPC64 if (iommu_is_off) { if (base >= 0x80000000ul) return; @@ -501,9 +510,13 @@ void __init early_init_dt_add_memory_arch(u64 base, u64 size) } #endif - memblock_add(base, size); - + /* First MEMBLOCK added, do some special initializations */ + if (memstart_addr == ~(phys_addr_t)0) + setup_initial_memory_limit(base, size); memstart_addr = min((u64)memstart_addr, base); + + /* Add the chunk to the MEMBLOCK list */ + memblock_add(base, size); } u64 __init early_init_dt_alloc_memory_arch(u64 size, u64 align) @@ -655,7 +668,6 @@ static void __init phyp_dump_reserve_mem(void) static inline void __init phyp_dump_reserve_mem(void) {} #endif /* CONFIG_PHYP_DUMP && CONFIG_PPC_RTAS */ - void __init early_init_devtree(void *params) { phys_addr_t limit; @@ -679,10 +691,11 @@ void __init early_init_devtree(void *params) * device-tree, including the platform type, initrd location and * size, TCE reserve, and more ... */ - of_scan_flat_dt(early_init_dt_scan_chosen, NULL); + of_scan_flat_dt(early_init_dt_scan_chosen_ppc, NULL); /* Scan memory nodes and rebuild MEMBLOCKs */ memblock_init(); + of_scan_flat_dt(early_init_dt_scan_root, NULL); of_scan_flat_dt(early_init_dt_scan_memory_ppc, NULL); diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index 11f3cd9c832..286d9783d93 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -1681,7 +1681,7 @@ long do_syscall_trace_enter(struct pt_regs *regs) if (unlikely(current->audit_context)) { #ifdef CONFIG_PPC64 - if (!test_thread_flag(TIF_32BIT)) + if (!is_32bit_task()) audit_syscall_entry(AUDIT_ARCH_PPC64, regs->gpr[0], regs->gpr[3], regs->gpr[4], diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 41048de3c6c..8fe8bc61c10 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -805,7 +805,7 @@ static void rtas_percpu_suspend_me(void *info) __rtas_suspend_cpu((struct rtas_suspend_me_data *)info, 1); } -static int rtas_ibm_suspend_me(struct rtas_args *args) +int rtas_ibm_suspend_me(struct rtas_args *args) { long state; long rc; @@ -855,7 +855,7 @@ static int rtas_ibm_suspend_me(struct rtas_args *args) return atomic_read(&data.error); } #else /* CONFIG_PPC_PSERIES */ -static int rtas_ibm_suspend_me(struct rtas_args *args) +int rtas_ibm_suspend_me(struct rtas_args *args) { return -ENOSYS; } @@ -969,7 +969,7 @@ void __init rtas_initialize(void) */ #ifdef CONFIG_PPC64 if (machine_is(pseries) && firmware_has_feature(FW_FEATURE_LPAR)) { - rtas_region = min(memblock.rmo_size, RTAS_INSTANTIATE_MAX); + rtas_region = min(ppc64_rma_size, RTAS_INSTANTIATE_MAX); ibm_suspend_me_token = rtas_token("ibm,suspend-me"); } #endif diff --git a/arch/powerpc/kernel/rtas_flash.c b/arch/powerpc/kernel/rtas_flash.c index 67a84d8f118..2b442e6c21e 100644 --- a/arch/powerpc/kernel/rtas_flash.c +++ b/arch/powerpc/kernel/rtas_flash.c @@ -716,6 +716,7 @@ static const struct file_operations rtas_flash_operations = { .write = rtas_flash_write, .open = rtas_excl_open, .release = rtas_flash_release, + .llseek = default_llseek, }; static const struct file_operations manage_flash_operations = { @@ -724,6 +725,7 @@ static const struct file_operations manage_flash_operations = { .write = manage_flash_write, .open = rtas_excl_open, .release = rtas_excl_release, + .llseek = default_llseek, }; static const struct file_operations validate_flash_operations = { @@ -732,6 +734,7 @@ static const struct file_operations validate_flash_operations = { .write = validate_flash_write, .open = rtas_excl_open, .release = validate_flash_release, + .llseek = default_llseek, }; static int __init rtas_flash_init(void) diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c index 638883e23e3..0438f819fe6 100644 --- a/arch/powerpc/kernel/rtasd.c +++ b/arch/powerpc/kernel/rtasd.c @@ -354,6 +354,7 @@ static const struct file_operations proc_rtas_log_operations = { .poll = rtas_log_poll, .open = rtas_log_open, .release = rtas_log_release, + .llseek = noop_llseek, }; static int enable_surveillance(int timeout) diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index 93666f9cabf..1d2fbc90530 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -46,7 +46,7 @@ extern void bootx_init(unsigned long r4, unsigned long phys); -int boot_cpuid; +int boot_cpuid = -1; EXPORT_SYMBOL_GPL(boot_cpuid); int boot_cpuid_phys; @@ -246,7 +246,7 @@ static void __init irqstack_early_init(void) unsigned int i; /* interrupt stacks must be in lowmem, we get that for free on ppc32 - * as the memblock is limited to lowmem by MEMBLOCK_REAL_LIMIT */ + * as the memblock is limited to lowmem by default */ for_each_possible_cpu(i) { softirq_ctx[i] = (struct thread_info *) __va(memblock_alloc(THREAD_SIZE, THREAD_SIZE)); diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index e72690ec9b8..2a178b0ebcd 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -486,7 +486,7 @@ static void __init emergency_stack_init(void) * bringup, we need to get at them in real mode. This means they * must also be within the RMO region. */ - limit = min(slb0_limit(), memblock.rmo_size); + limit = min(slb0_limit(), ppc64_rma_size); for_each_possible_cpu(i) { unsigned long sp; diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 0008bc58e82..68034bbf2e4 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -508,9 +508,6 @@ int __devinit start_secondary(void *unused) if (smp_ops->take_timebase) smp_ops->take_timebase(); - if (system_state > SYSTEM_BOOTING) - snapshot_timebase(); - secondary_cpu_time_init(); ipi_call_lock(); @@ -575,11 +572,18 @@ void __init smp_cpus_done(unsigned int max_cpus) free_cpumask_var(old_mask); - snapshot_timebases(); - dump_numa_cpu_topology(); } +int arch_sd_sibling_asym_packing(void) +{ + if (cpu_has_feature(CPU_FTR_ASYM_SMT)) { + printk_once(KERN_INFO "Enabling Asymmetric SMT scheduling\n"); + return SD_ASYM_PACKING; + } + return 0; +} + #ifdef CONFIG_HOTPLUG_CPU int __cpu_disable(void) { diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 8533b3b83f5..010406958d9 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -53,7 +53,7 @@ #include <linux/posix-timers.h> #include <linux/irq.h> #include <linux/delay.h> -#include <linux/perf_event.h> +#include <linux/irq_work.h> #include <asm/trace.h> #include <asm/io.h> @@ -161,10 +161,9 @@ extern struct timezone sys_tz; static long timezone_offset; unsigned long ppc_proc_freq; -EXPORT_SYMBOL(ppc_proc_freq); +EXPORT_SYMBOL_GPL(ppc_proc_freq); unsigned long ppc_tb_freq; - -static DEFINE_PER_CPU(u64, last_jiffy); +EXPORT_SYMBOL_GPL(ppc_tb_freq); #ifdef CONFIG_VIRT_CPU_ACCOUNTING /* @@ -185,6 +184,8 @@ DEFINE_PER_CPU(unsigned long, cputime_scaled_last_delta); cputime_t cputime_one_jiffy; +void (*dtl_consumer)(struct dtl_entry *, u64); + static void calc_cputime_factors(void) { struct div_result res; @@ -200,62 +201,153 @@ static void calc_cputime_factors(void) } /* - * Read the PURR on systems that have it, otherwise the timebase. + * Read the SPURR on systems that have it, otherwise the PURR, + * or if that doesn't exist return the timebase value passed in. */ -static u64 read_purr(void) +static u64 read_spurr(u64 tb) { + if (cpu_has_feature(CPU_FTR_SPURR)) + return mfspr(SPRN_SPURR); if (cpu_has_feature(CPU_FTR_PURR)) return mfspr(SPRN_PURR); - return mftb(); + return tb; } +#ifdef CONFIG_PPC_SPLPAR + /* - * Read the SPURR on systems that have it, otherwise the purr + * Scan the dispatch trace log and count up the stolen time. + * Should be called with interrupts disabled. */ -static u64 read_spurr(u64 purr) +static u64 scan_dispatch_log(u64 stop_tb) { - /* - * cpus without PURR won't have a SPURR - * We already know the former when we use this, so tell gcc - */ - if (cpu_has_feature(CPU_FTR_PURR) && cpu_has_feature(CPU_FTR_SPURR)) - return mfspr(SPRN_SPURR); - return purr; + u64 i = local_paca->dtl_ridx; + struct dtl_entry *dtl = local_paca->dtl_curr; + struct dtl_entry *dtl_end = local_paca->dispatch_log_end; + struct lppaca *vpa = local_paca->lppaca_ptr; + u64 tb_delta; + u64 stolen = 0; + u64 dtb; + + if (i == vpa->dtl_idx) + return 0; + while (i < vpa->dtl_idx) { + if (dtl_consumer) + dtl_consumer(dtl, i); + dtb = dtl->timebase; + tb_delta = dtl->enqueue_to_dispatch_time + + dtl->ready_to_enqueue_time; + barrier(); + if (i + N_DISPATCH_LOG < vpa->dtl_idx) { + /* buffer has overflowed */ + i = vpa->dtl_idx - N_DISPATCH_LOG; + dtl = local_paca->dispatch_log + (i % N_DISPATCH_LOG); + continue; + } + if (dtb > stop_tb) + break; + stolen += tb_delta; + ++i; + ++dtl; + if (dtl == dtl_end) + dtl = local_paca->dispatch_log; + } + local_paca->dtl_ridx = i; + local_paca->dtl_curr = dtl; + return stolen; } /* + * Accumulate stolen time by scanning the dispatch trace log. + * Called on entry from user mode. + */ +void accumulate_stolen_time(void) +{ + u64 sst, ust; + + sst = scan_dispatch_log(get_paca()->starttime_user); + ust = scan_dispatch_log(get_paca()->starttime); + get_paca()->system_time -= sst; + get_paca()->user_time -= ust; + get_paca()->stolen_time += ust + sst; +} + +static inline u64 calculate_stolen_time(u64 stop_tb) +{ + u64 stolen = 0; + + if (get_paca()->dtl_ridx != get_paca()->lppaca_ptr->dtl_idx) { + stolen = scan_dispatch_log(stop_tb); + get_paca()->system_time -= stolen; + } + + stolen += get_paca()->stolen_time; + get_paca()->stolen_time = 0; + return stolen; +} + +#else /* CONFIG_PPC_SPLPAR */ +static inline u64 calculate_stolen_time(u64 stop_tb) +{ + return 0; +} + +#endif /* CONFIG_PPC_SPLPAR */ + +/* * Account time for a transition between system, hard irq * or soft irq state. */ void account_system_vtime(struct task_struct *tsk) { - u64 now, nowscaled, delta, deltascaled, sys_time; + u64 now, nowscaled, delta, deltascaled; unsigned long flags; + u64 stolen, udelta, sys_scaled, user_scaled; local_irq_save(flags); - now = read_purr(); + now = mftb(); nowscaled = read_spurr(now); - delta = now - get_paca()->startpurr; + get_paca()->system_time += now - get_paca()->starttime; + get_paca()->starttime = now; deltascaled = nowscaled - get_paca()->startspurr; - get_paca()->startpurr = now; get_paca()->startspurr = nowscaled; - if (!in_interrupt()) { - /* deltascaled includes both user and system time. - * Hence scale it based on the purr ratio to estimate - * the system time */ - sys_time = get_paca()->system_time; - if (get_paca()->user_time) - deltascaled = deltascaled * sys_time / - (sys_time + get_paca()->user_time); - delta += sys_time; - get_paca()->system_time = 0; + + stolen = calculate_stolen_time(now); + + delta = get_paca()->system_time; + get_paca()->system_time = 0; + udelta = get_paca()->user_time - get_paca()->utime_sspurr; + get_paca()->utime_sspurr = get_paca()->user_time; + + /* + * Because we don't read the SPURR on every kernel entry/exit, + * deltascaled includes both user and system SPURR ticks. + * Apportion these ticks to system SPURR ticks and user + * SPURR ticks in the same ratio as the system time (delta) + * and user time (udelta) values obtained from the timebase + * over the same interval. The system ticks get accounted here; + * the user ticks get saved up in paca->user_time_scaled to be + * used by account_process_tick. + */ + sys_scaled = delta; + user_scaled = udelta; + if (deltascaled != delta + udelta) { + if (udelta) { + sys_scaled = deltascaled * delta / (delta + udelta); + user_scaled = deltascaled - sys_scaled; + } else { + sys_scaled = deltascaled; + } + } + get_paca()->user_time_scaled += user_scaled; + + if (in_irq() || idle_task(smp_processor_id()) != tsk) { + account_system_time(tsk, 0, delta, sys_scaled); + if (stolen) + account_steal_time(stolen); + } else { + account_idle_time(delta + stolen); } - if (in_irq() || idle_task(smp_processor_id()) != tsk) - account_system_time(tsk, 0, delta, deltascaled); - else - account_idle_time(delta); - __get_cpu_var(cputime_last_delta) = delta; - __get_cpu_var(cputime_scaled_last_delta) = deltascaled; local_irq_restore(flags); } EXPORT_SYMBOL_GPL(account_system_vtime); @@ -265,125 +357,26 @@ EXPORT_SYMBOL_GPL(account_system_vtime); * by the exception entry and exit code to the generic process * user and system time records. * Must be called with interrupts disabled. + * Assumes that account_system_vtime() has been called recently + * (i.e. since the last entry from usermode) so that + * get_paca()->user_time_scaled is up to date. */ void account_process_tick(struct task_struct *tsk, int user_tick) { cputime_t utime, utimescaled; utime = get_paca()->user_time; + utimescaled = get_paca()->user_time_scaled; get_paca()->user_time = 0; - utimescaled = cputime_to_scaled(utime); + get_paca()->user_time_scaled = 0; + get_paca()->utime_sspurr = 0; account_user_time(tsk, utime, utimescaled); } -/* - * Stuff for accounting stolen time. - */ -struct cpu_purr_data { - int initialized; /* thread is running */ - u64 tb; /* last TB value read */ - u64 purr; /* last PURR value read */ - u64 spurr; /* last SPURR value read */ -}; - -/* - * Each entry in the cpu_purr_data array is manipulated only by its - * "owner" cpu -- usually in the timer interrupt but also occasionally - * in process context for cpu online. As long as cpus do not touch - * each others' cpu_purr_data, disabling local interrupts is - * sufficient to serialize accesses. - */ -static DEFINE_PER_CPU(struct cpu_purr_data, cpu_purr_data); - -static void snapshot_tb_and_purr(void *data) -{ - unsigned long flags; - struct cpu_purr_data *p = &__get_cpu_var(cpu_purr_data); - - local_irq_save(flags); - p->tb = get_tb_or_rtc(); - p->purr = mfspr(SPRN_PURR); - wmb(); - p->initialized = 1; - local_irq_restore(flags); -} - -/* - * Called during boot when all cpus have come up. - */ -void snapshot_timebases(void) -{ - if (!cpu_has_feature(CPU_FTR_PURR)) - return; - on_each_cpu(snapshot_tb_and_purr, NULL, 1); -} - -/* - * Must be called with interrupts disabled. - */ -void calculate_steal_time(void) -{ - u64 tb, purr; - s64 stolen; - struct cpu_purr_data *pme; - - pme = &__get_cpu_var(cpu_purr_data); - if (!pme->initialized) - return; /* !CPU_FTR_PURR or early in early boot */ - tb = mftb(); - purr = mfspr(SPRN_PURR); - stolen = (tb - pme->tb) - (purr - pme->purr); - if (stolen > 0) { - if (idle_task(smp_processor_id()) != current) - account_steal_time(stolen); - else - account_idle_time(stolen); - } - pme->tb = tb; - pme->purr = purr; -} - -#ifdef CONFIG_PPC_SPLPAR -/* - * Must be called before the cpu is added to the online map when - * a cpu is being brought up at runtime. - */ -static void snapshot_purr(void) -{ - struct cpu_purr_data *pme; - unsigned long flags; - - if (!cpu_has_feature(CPU_FTR_PURR)) - return; - local_irq_save(flags); - pme = &__get_cpu_var(cpu_purr_data); - pme->tb = mftb(); - pme->purr = mfspr(SPRN_PURR); - pme->initialized = 1; - local_irq_restore(flags); -} - -#endif /* CONFIG_PPC_SPLPAR */ - #else /* ! CONFIG_VIRT_CPU_ACCOUNTING */ #define calc_cputime_factors() -#define calculate_steal_time() do { } while (0) #endif -#if !(defined(CONFIG_VIRT_CPU_ACCOUNTING) && defined(CONFIG_PPC_SPLPAR)) -#define snapshot_purr() do { } while (0) -#endif - -/* - * Called when a cpu comes up after the system has finished booting, - * i.e. as a result of a hotplug cpu action. - */ -void snapshot_timebase(void) -{ - __get_cpu_var(last_jiffy) = get_tb_or_rtc(); - snapshot_purr(); -} - void __delay(unsigned long loops) { unsigned long start; @@ -493,60 +486,60 @@ void __init iSeries_time_init_early(void) } #endif /* CONFIG_PPC_ISERIES */ -#ifdef CONFIG_PERF_EVENTS +#ifdef CONFIG_IRQ_WORK /* * 64-bit uses a byte in the PACA, 32-bit uses a per-cpu variable... */ #ifdef CONFIG_PPC64 -static inline unsigned long test_perf_event_pending(void) +static inline unsigned long test_irq_work_pending(void) { unsigned long x; asm volatile("lbz %0,%1(13)" : "=r" (x) - : "i" (offsetof(struct paca_struct, perf_event_pending))); + : "i" (offsetof(struct paca_struct, irq_work_pending))); return x; } -static inline void set_perf_event_pending_flag(void) +static inline void set_irq_work_pending_flag(void) { asm volatile("stb %0,%1(13)" : : "r" (1), - "i" (offsetof(struct paca_struct, perf_event_pending))); + "i" (offsetof(struct paca_struct, irq_work_pending))); } -static inline void clear_perf_event_pending(void) +static inline void clear_irq_work_pending(void) { asm volatile("stb %0,%1(13)" : : "r" (0), - "i" (offsetof(struct paca_struct, perf_event_pending))); + "i" (offsetof(struct paca_struct, irq_work_pending))); } #else /* 32-bit */ -DEFINE_PER_CPU(u8, perf_event_pending); +DEFINE_PER_CPU(u8, irq_work_pending); -#define set_perf_event_pending_flag() __get_cpu_var(perf_event_pending) = 1 -#define test_perf_event_pending() __get_cpu_var(perf_event_pending) -#define clear_perf_event_pending() __get_cpu_var(perf_event_pending) = 0 +#define set_irq_work_pending_flag() __get_cpu_var(irq_work_pending) = 1 +#define test_irq_work_pending() __get_cpu_var(irq_work_pending) +#define clear_irq_work_pending() __get_cpu_var(irq_work_pending) = 0 #endif /* 32 vs 64 bit */ -void set_perf_event_pending(void) +void set_irq_work_pending(void) { preempt_disable(); - set_perf_event_pending_flag(); + set_irq_work_pending_flag(); set_dec(1); preempt_enable(); } -#else /* CONFIG_PERF_EVENTS */ +#else /* CONFIG_IRQ_WORK */ -#define test_perf_event_pending() 0 -#define clear_perf_event_pending() +#define test_irq_work_pending() 0 +#define clear_irq_work_pending() -#endif /* CONFIG_PERF_EVENTS */ +#endif /* CONFIG_IRQ_WORK */ /* * For iSeries shared processors, we have to let the hypervisor @@ -585,11 +578,9 @@ void timer_interrupt(struct pt_regs * regs) old_regs = set_irq_regs(regs); irq_enter(); - calculate_steal_time(); - - if (test_perf_event_pending()) { - clear_perf_event_pending(); - perf_event_do_pending(); + if (test_irq_work_pending()) { + clear_irq_work_pending(); + irq_work_run(); } #ifdef CONFIG_PPC_ISERIES diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index a45a63c3a0c..1b2cdc8eec9 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -538,6 +538,11 @@ int machine_check_e500(struct pt_regs *regs) return 0; } + +int machine_check_generic(struct pt_regs *regs) +{ + return 0; +} #elif defined(CONFIG_E200) int machine_check_e200(struct pt_regs *regs) { diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index 13002fe206e..fd8728729ab 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -159,7 +159,7 @@ static void dump_vdso_pages(struct vm_area_struct * vma) { int i; - if (!vma || test_thread_flag(TIF_32BIT)) { + if (!vma || is_32bit_task()) { printk("vDSO32 @ %016lx:\n", (unsigned long)vdso32_kbase); for (i=0; i<vdso32_pages; i++) { struct page *pg = virt_to_page(vdso32_kbase + @@ -170,7 +170,7 @@ static void dump_vdso_pages(struct vm_area_struct * vma) dump_one_vdso_page(pg, upg); } } - if (!vma || !test_thread_flag(TIF_32BIT)) { + if (!vma || !is_32bit_task()) { printk("vDSO64 @ %016lx:\n", (unsigned long)vdso64_kbase); for (i=0; i<vdso64_pages; i++) { struct page *pg = virt_to_page(vdso64_kbase + @@ -200,7 +200,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) return 0; #ifdef CONFIG_PPC64 - if (test_thread_flag(TIF_32BIT)) { + if (is_32bit_task()) { vdso_pagelist = vdso32_pagelist; vdso_pages = vdso32_pages; vdso_base = VDSO32_MBASE; diff --git a/arch/powerpc/kernel/vdso32/Makefile b/arch/powerpc/kernel/vdso32/Makefile index 51ead52141b..9a7946c4173 100644 --- a/arch/powerpc/kernel/vdso32/Makefile +++ b/arch/powerpc/kernel/vdso32/Makefile @@ -14,10 +14,10 @@ obj-vdso32 := $(addprefix $(obj)/, $(obj-vdso32)) GCOV_PROFILE := n -EXTRA_CFLAGS := -shared -fno-common -fno-builtin -EXTRA_CFLAGS += -nostdlib -Wl,-soname=linux-vdso32.so.1 \ +ccflags-y := -shared -fno-common -fno-builtin +ccflags-y += -nostdlib -Wl,-soname=linux-vdso32.so.1 \ $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) -EXTRA_AFLAGS := -D__VDSO32__ -s +asflags-y := -D__VDSO32__ -s obj-y += vdso32_wrapper.o extra-y += vdso32.lds diff --git a/arch/powerpc/kernel/vdso64/Makefile b/arch/powerpc/kernel/vdso64/Makefile index 79da65d44a2..8c500d8622e 100644 --- a/arch/powerpc/kernel/vdso64/Makefile +++ b/arch/powerpc/kernel/vdso64/Makefile @@ -9,10 +9,10 @@ obj-vdso64 := $(addprefix $(obj)/, $(obj-vdso64)) GCOV_PROFILE := n -EXTRA_CFLAGS := -shared -fno-common -fno-builtin -EXTRA_CFLAGS += -nostdlib -Wl,-soname=linux-vdso64.so.1 \ +ccflags-y := -shared -fno-common -fno-builtin +ccflags-y += -nostdlib -Wl,-soname=linux-vdso64.so.1 \ $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) -EXTRA_AFLAGS := -D__VDSO64__ -s +asflags-y := -D__VDSO64__ -s obj-y += vdso64_wrapper.o extra-y += vdso64.lds diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c index fa3469ddaef..d692989a431 100644 --- a/arch/powerpc/kernel/vio.c +++ b/arch/powerpc/kernel/vio.c @@ -1184,7 +1184,12 @@ EXPORT_SYMBOL(vio_unregister_driver); /* vio_dev refcount hit 0 */ static void __devinit vio_dev_release(struct device *dev) { - /* XXX should free TCE table */ + struct iommu_table *tbl = get_iommu_table_base(dev); + + /* iSeries uses a common table for all vio devices */ + if (!firmware_has_feature(FW_FEATURE_ISERIES) && tbl) + iommu_free_table(tbl, dev->of_node ? + dev->of_node->full_name : dev_name(dev)); of_node_put(dev->of_node); kfree(to_vio_dev(dev)); } @@ -1254,8 +1259,7 @@ struct vio_dev *vio_register_device_node(struct device_node *of_node) if (device_register(&viodev->dev)) { printk(KERN_ERR "%s: failed to register device %s\n", __func__, dev_name(&viodev->dev)); - /* XXX free TCE table */ - kfree(viodev); + put_device(&viodev->dev); return NULL; } diff --git a/arch/powerpc/kvm/44x.c b/arch/powerpc/kvm/44x.c index 73c0a3f64ed..74d0e742114 100644 --- a/arch/powerpc/kvm/44x.c +++ b/arch/powerpc/kvm/44x.c @@ -43,7 +43,7 @@ int kvmppc_core_check_processor_compat(void) { int r; - if (strcmp(cur_cpu_spec->platform, "ppc440") == 0) + if (strncmp(cur_cpu_spec->platform, "ppc440", 6) == 0) r = 0; else r = -ENOTSUPP; @@ -72,6 +72,7 @@ int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu) /* Since the guest can directly access the timebase, it must know the * real timebase frequency. Accordingly, it must see the state of * CCR1[TCS]. */ + /* XXX CCR1 doesn't exist on all 440 SoCs. */ vcpu->arch.ccr1 = mfspr(SPRN_CCR1); for (i = 0; i < ARRAY_SIZE(vcpu_44x->shadow_refs); i++) @@ -123,8 +124,14 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) if (err) goto free_vcpu; + vcpu->arch.shared = (void*)__get_free_page(GFP_KERNEL|__GFP_ZERO); + if (!vcpu->arch.shared) + goto uninit_vcpu; + return vcpu; +uninit_vcpu: + kvm_vcpu_uninit(vcpu); free_vcpu: kmem_cache_free(kvm_vcpu_cache, vcpu_44x); out: @@ -135,6 +142,7 @@ void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) { struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); + free_page((unsigned long)vcpu->arch.shared); kvm_vcpu_uninit(vcpu); kmem_cache_free(kvm_vcpu_cache, vcpu_44x); } diff --git a/arch/powerpc/kvm/44x_tlb.c b/arch/powerpc/kvm/44x_tlb.c index 9b9b5cdea84..5f3cff83e08 100644 --- a/arch/powerpc/kvm/44x_tlb.c +++ b/arch/powerpc/kvm/44x_tlb.c @@ -47,6 +47,7 @@ #ifdef DEBUG void kvmppc_dump_tlbs(struct kvm_vcpu *vcpu) { + struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); struct kvmppc_44x_tlbe *tlbe; int i; @@ -221,14 +222,14 @@ gpa_t kvmppc_mmu_xlate(struct kvm_vcpu *vcpu, unsigned int gtlb_index, int kvmppc_mmu_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr) { - unsigned int as = !!(vcpu->arch.msr & MSR_IS); + unsigned int as = !!(vcpu->arch.shared->msr & MSR_IS); return kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as); } int kvmppc_mmu_dtlb_index(struct kvm_vcpu *vcpu, gva_t eaddr) { - unsigned int as = !!(vcpu->arch.msr & MSR_DS); + unsigned int as = !!(vcpu->arch.shared->msr & MSR_DS); return kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as); } @@ -354,7 +355,7 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr, stlbe.word1 = (hpaddr & 0xfffffc00) | ((hpaddr >> 32) & 0xf); stlbe.word2 = kvmppc_44x_tlb_shadow_attrib(flags, - vcpu->arch.msr & MSR_PR); + vcpu->arch.shared->msr & MSR_PR); stlbe.tid = !(asid & 0xff); /* Keep track of the reference so we can properly release it later. */ @@ -423,7 +424,7 @@ static int tlbe_is_host_safe(const struct kvm_vcpu *vcpu, /* Does it match current guest AS? */ /* XXX what about IS != DS? */ - if (get_tlb_ts(tlbe) != !!(vcpu->arch.msr & MSR_IS)) + if (get_tlb_ts(tlbe) != !!(vcpu->arch.shared->msr & MSR_IS)) return 0; gpa = get_tlb_raddr(tlbe); diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile index d45c818a384..4d6863823f6 100644 --- a/arch/powerpc/kvm/Makefile +++ b/arch/powerpc/kvm/Makefile @@ -4,7 +4,7 @@ subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror -EXTRA_CFLAGS += -Ivirt/kvm -Iarch/powerpc/kvm +ccflags-y := -Ivirt/kvm -Iarch/powerpc/kvm common-objs-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o) diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index a3cef30d1d4..e316847c08c 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -17,6 +17,7 @@ #include <linux/kvm_host.h> #include <linux/err.h> #include <linux/slab.h> +#include "trace.h" #include <asm/reg.h> #include <asm/cputable.h> @@ -35,7 +36,6 @@ #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU /* #define EXIT_DEBUG */ -/* #define EXIT_DEBUG_SIMPLE */ /* #define DEBUG_EXT */ static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr, @@ -105,65 +105,71 @@ void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) kvmppc_giveup_ext(vcpu, MSR_VSX); } -#if defined(EXIT_DEBUG) -static u32 kvmppc_get_dec(struct kvm_vcpu *vcpu) -{ - u64 jd = mftb() - vcpu->arch.dec_jiffies; - return vcpu->arch.dec - jd; -} -#endif - static void kvmppc_recalc_shadow_msr(struct kvm_vcpu *vcpu) { - vcpu->arch.shadow_msr = vcpu->arch.msr; + ulong smsr = vcpu->arch.shared->msr; + /* Guest MSR values */ - vcpu->arch.shadow_msr &= MSR_FE0 | MSR_FE1 | MSR_SF | MSR_SE | - MSR_BE | MSR_DE; + smsr &= MSR_FE0 | MSR_FE1 | MSR_SF | MSR_SE | MSR_BE | MSR_DE; /* Process MSR values */ - vcpu->arch.shadow_msr |= MSR_ME | MSR_RI | MSR_IR | MSR_DR | MSR_PR | - MSR_EE; + smsr |= MSR_ME | MSR_RI | MSR_IR | MSR_DR | MSR_PR | MSR_EE; /* External providers the guest reserved */ - vcpu->arch.shadow_msr |= (vcpu->arch.msr & vcpu->arch.guest_owned_ext); + smsr |= (vcpu->arch.shared->msr & vcpu->arch.guest_owned_ext); /* 64-bit Process MSR values */ #ifdef CONFIG_PPC_BOOK3S_64 - vcpu->arch.shadow_msr |= MSR_ISF | MSR_HV; + smsr |= MSR_ISF | MSR_HV; #endif + vcpu->arch.shadow_msr = smsr; } void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr) { - ulong old_msr = vcpu->arch.msr; + ulong old_msr = vcpu->arch.shared->msr; #ifdef EXIT_DEBUG printk(KERN_INFO "KVM: Set MSR to 0x%llx\n", msr); #endif msr &= to_book3s(vcpu)->msr_mask; - vcpu->arch.msr = msr; + vcpu->arch.shared->msr = msr; kvmppc_recalc_shadow_msr(vcpu); - if (msr & (MSR_WE|MSR_POW)) { + if (msr & MSR_POW) { if (!vcpu->arch.pending_exceptions) { kvm_vcpu_block(vcpu); vcpu->stat.halt_wakeup++; + + /* Unset POW bit after we woke up */ + msr &= ~MSR_POW; + vcpu->arch.shared->msr = msr; } } - if ((vcpu->arch.msr & (MSR_PR|MSR_IR|MSR_DR)) != + if ((vcpu->arch.shared->msr & (MSR_PR|MSR_IR|MSR_DR)) != (old_msr & (MSR_PR|MSR_IR|MSR_DR))) { kvmppc_mmu_flush_segments(vcpu); kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu)); + + /* Preload magic page segment when in kernel mode */ + if (!(msr & MSR_PR) && vcpu->arch.magic_page_pa) { + struct kvm_vcpu_arch *a = &vcpu->arch; + + if (msr & MSR_DR) + kvmppc_mmu_map_segment(vcpu, a->magic_page_ea); + else + kvmppc_mmu_map_segment(vcpu, a->magic_page_pa); + } } /* Preload FPU if it's enabled */ - if (vcpu->arch.msr & MSR_FP) + if (vcpu->arch.shared->msr & MSR_FP) kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP); } void kvmppc_inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 flags) { - vcpu->arch.srr0 = kvmppc_get_pc(vcpu); - vcpu->arch.srr1 = vcpu->arch.msr | flags; + vcpu->arch.shared->srr0 = kvmppc_get_pc(vcpu); + vcpu->arch.shared->srr1 = vcpu->arch.shared->msr | flags; kvmppc_set_pc(vcpu, to_book3s(vcpu)->hior + vec); vcpu->arch.mmu.reset_msr(vcpu); } @@ -180,6 +186,7 @@ static int kvmppc_book3s_vec2irqprio(unsigned int vec) case 0x400: prio = BOOK3S_IRQPRIO_INST_STORAGE; break; case 0x480: prio = BOOK3S_IRQPRIO_INST_SEGMENT; break; case 0x500: prio = BOOK3S_IRQPRIO_EXTERNAL; break; + case 0x501: prio = BOOK3S_IRQPRIO_EXTERNAL_LEVEL; break; case 0x600: prio = BOOK3S_IRQPRIO_ALIGNMENT; break; case 0x700: prio = BOOK3S_IRQPRIO_PROGRAM; break; case 0x800: prio = BOOK3S_IRQPRIO_FP_UNAVAIL; break; @@ -199,6 +206,9 @@ static void kvmppc_book3s_dequeue_irqprio(struct kvm_vcpu *vcpu, { clear_bit(kvmppc_book3s_vec2irqprio(vec), &vcpu->arch.pending_exceptions); + + if (!vcpu->arch.pending_exceptions) + vcpu->arch.shared->int_pending = 0; } void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec) @@ -237,13 +247,19 @@ void kvmppc_core_dequeue_dec(struct kvm_vcpu *vcpu) void kvmppc_core_queue_external(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq) { - kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL); + unsigned int vec = BOOK3S_INTERRUPT_EXTERNAL; + + if (irq->irq == KVM_INTERRUPT_SET_LEVEL) + vec = BOOK3S_INTERRUPT_EXTERNAL_LEVEL; + + kvmppc_book3s_queue_irqprio(vcpu, vec); } void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq) { kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL); + kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL_LEVEL); } int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu, unsigned int priority) @@ -251,14 +267,29 @@ int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu, unsigned int priority) int deliver = 1; int vec = 0; ulong flags = 0ULL; + ulong crit_raw = vcpu->arch.shared->critical; + ulong crit_r1 = kvmppc_get_gpr(vcpu, 1); + bool crit; + + /* Truncate crit indicators in 32 bit mode */ + if (!(vcpu->arch.shared->msr & MSR_SF)) { + crit_raw &= 0xffffffff; + crit_r1 &= 0xffffffff; + } + + /* Critical section when crit == r1 */ + crit = (crit_raw == crit_r1); + /* ... and we're in supervisor mode */ + crit = crit && !(vcpu->arch.shared->msr & MSR_PR); switch (priority) { case BOOK3S_IRQPRIO_DECREMENTER: - deliver = vcpu->arch.msr & MSR_EE; + deliver = (vcpu->arch.shared->msr & MSR_EE) && !crit; vec = BOOK3S_INTERRUPT_DECREMENTER; break; case BOOK3S_IRQPRIO_EXTERNAL: - deliver = vcpu->arch.msr & MSR_EE; + case BOOK3S_IRQPRIO_EXTERNAL_LEVEL: + deliver = (vcpu->arch.shared->msr & MSR_EE) && !crit; vec = BOOK3S_INTERRUPT_EXTERNAL; break; case BOOK3S_IRQPRIO_SYSTEM_RESET: @@ -320,9 +351,27 @@ int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu, unsigned int priority) return deliver; } +/* + * This function determines if an irqprio should be cleared once issued. + */ +static bool clear_irqprio(struct kvm_vcpu *vcpu, unsigned int priority) +{ + switch (priority) { + case BOOK3S_IRQPRIO_DECREMENTER: + /* DEC interrupts get cleared by mtdec */ + return false; + case BOOK3S_IRQPRIO_EXTERNAL_LEVEL: + /* External interrupts get cleared by userspace */ + return false; + } + + return true; +} + void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu) { unsigned long *pending = &vcpu->arch.pending_exceptions; + unsigned long old_pending = vcpu->arch.pending_exceptions; unsigned int priority; #ifdef EXIT_DEBUG @@ -332,8 +381,7 @@ void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu) priority = __ffs(*pending); while (priority < BOOK3S_IRQPRIO_MAX) { if (kvmppc_book3s_irqprio_deliver(vcpu, priority) && - (priority != BOOK3S_IRQPRIO_DECREMENTER)) { - /* DEC interrupts get cleared by mtdec */ + clear_irqprio(vcpu, priority)) { clear_bit(priority, &vcpu->arch.pending_exceptions); break; } @@ -342,6 +390,12 @@ void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu) BITS_PER_BYTE * sizeof(*pending), priority + 1); } + + /* Tell the guest about our interrupt status */ + if (*pending) + vcpu->arch.shared->int_pending = 1; + else if (old_pending) + vcpu->arch.shared->int_pending = 0; } void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr) @@ -398,6 +452,25 @@ void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr) } } +pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn) +{ + ulong mp_pa = vcpu->arch.magic_page_pa; + + /* Magic page override */ + if (unlikely(mp_pa) && + unlikely(((gfn << PAGE_SHIFT) & KVM_PAM) == + ((mp_pa & PAGE_MASK) & KVM_PAM))) { + ulong shared_page = ((ulong)vcpu->arch.shared) & PAGE_MASK; + pfn_t pfn; + + pfn = (pfn_t)virt_to_phys((void*)shared_page) >> PAGE_SHIFT; + get_page(pfn_to_page(pfn)); + return pfn; + } + + return gfn_to_pfn(vcpu->kvm, gfn); +} + /* Book3s_32 CPUs always have 32 bytes cache line size, which Linux assumes. To * make Book3s_32 Linux work on Book3s_64, we have to make sure we trap dcbz to * emulate 32 bytes dcbz length. @@ -415,8 +488,10 @@ static void kvmppc_patch_dcbz(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte) int i; hpage = gfn_to_page(vcpu->kvm, pte->raddr >> PAGE_SHIFT); - if (is_error_page(hpage)) + if (is_error_page(hpage)) { + kvm_release_page_clean(hpage); return; + } hpage_offset = pte->raddr & ~PAGE_MASK; hpage_offset &= ~0xFFFULL; @@ -437,14 +512,14 @@ static void kvmppc_patch_dcbz(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte) static int kvmppc_xlate(struct kvm_vcpu *vcpu, ulong eaddr, bool data, struct kvmppc_pte *pte) { - int relocated = (vcpu->arch.msr & (data ? MSR_DR : MSR_IR)); + int relocated = (vcpu->arch.shared->msr & (data ? MSR_DR : MSR_IR)); int r; if (relocated) { r = vcpu->arch.mmu.xlate(vcpu, eaddr, pte, data); } else { pte->eaddr = eaddr; - pte->raddr = eaddr & 0xffffffff; + pte->raddr = eaddr & KVM_PAM; pte->vpage = VSID_REAL | eaddr >> 12; pte->may_read = true; pte->may_write = true; @@ -533,6 +608,13 @@ mmio: static int kvmppc_visible_gfn(struct kvm_vcpu *vcpu, gfn_t gfn) { + ulong mp_pa = vcpu->arch.magic_page_pa; + + if (unlikely(mp_pa) && + unlikely((mp_pa & KVM_PAM) >> PAGE_SHIFT == gfn)) { + return 1; + } + return kvm_is_visible_gfn(vcpu->kvm, gfn); } @@ -545,8 +627,8 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu, int page_found = 0; struct kvmppc_pte pte; bool is_mmio = false; - bool dr = (vcpu->arch.msr & MSR_DR) ? true : false; - bool ir = (vcpu->arch.msr & MSR_IR) ? true : false; + bool dr = (vcpu->arch.shared->msr & MSR_DR) ? true : false; + bool ir = (vcpu->arch.shared->msr & MSR_IR) ? true : false; u64 vsid; relocated = data ? dr : ir; @@ -558,12 +640,12 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu, pte.may_execute = true; pte.may_read = true; pte.may_write = true; - pte.raddr = eaddr & 0xffffffff; + pte.raddr = eaddr & KVM_PAM; pte.eaddr = eaddr; pte.vpage = eaddr >> 12; } - switch (vcpu->arch.msr & (MSR_DR|MSR_IR)) { + switch (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) { case 0: pte.vpage |= ((u64)VSID_REAL << (SID_SHIFT - 12)); break; @@ -571,7 +653,7 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu, case MSR_IR: vcpu->arch.mmu.esid_to_vsid(vcpu, eaddr >> SID_SHIFT, &vsid); - if ((vcpu->arch.msr & (MSR_DR|MSR_IR)) == MSR_DR) + if ((vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) == MSR_DR) pte.vpage |= ((u64)VSID_REAL_DR << (SID_SHIFT - 12)); else pte.vpage |= ((u64)VSID_REAL_IR << (SID_SHIFT - 12)); @@ -594,20 +676,23 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu, if (page_found == -ENOENT) { /* Page not found in guest PTE entries */ - vcpu->arch.dear = kvmppc_get_fault_dar(vcpu); - to_book3s(vcpu)->dsisr = to_svcpu(vcpu)->fault_dsisr; - vcpu->arch.msr |= (to_svcpu(vcpu)->shadow_srr1 & 0x00000000f8000000ULL); + vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu); + vcpu->arch.shared->dsisr = to_svcpu(vcpu)->fault_dsisr; + vcpu->arch.shared->msr |= + (to_svcpu(vcpu)->shadow_srr1 & 0x00000000f8000000ULL); kvmppc_book3s_queue_irqprio(vcpu, vec); } else if (page_found == -EPERM) { /* Storage protection */ - vcpu->arch.dear = kvmppc_get_fault_dar(vcpu); - to_book3s(vcpu)->dsisr = to_svcpu(vcpu)->fault_dsisr & ~DSISR_NOHPTE; - to_book3s(vcpu)->dsisr |= DSISR_PROTFAULT; - vcpu->arch.msr |= (to_svcpu(vcpu)->shadow_srr1 & 0x00000000f8000000ULL); + vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu); + vcpu->arch.shared->dsisr = + to_svcpu(vcpu)->fault_dsisr & ~DSISR_NOHPTE; + vcpu->arch.shared->dsisr |= DSISR_PROTFAULT; + vcpu->arch.shared->msr |= + (to_svcpu(vcpu)->shadow_srr1 & 0x00000000f8000000ULL); kvmppc_book3s_queue_irqprio(vcpu, vec); } else if (page_found == -EINVAL) { /* Page not found in guest SLB */ - vcpu->arch.dear = kvmppc_get_fault_dar(vcpu); + vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu); kvmppc_book3s_queue_irqprio(vcpu, vec + 0x80); } else if (!is_mmio && kvmppc_visible_gfn(vcpu, pte.raddr >> PAGE_SHIFT)) { @@ -695,9 +780,11 @@ static int kvmppc_read_inst(struct kvm_vcpu *vcpu) ret = kvmppc_ld(vcpu, &srr0, sizeof(u32), &last_inst, false); if (ret == -ENOENT) { - vcpu->arch.msr = kvmppc_set_field(vcpu->arch.msr, 33, 33, 1); - vcpu->arch.msr = kvmppc_set_field(vcpu->arch.msr, 34, 36, 0); - vcpu->arch.msr = kvmppc_set_field(vcpu->arch.msr, 42, 47, 0); + ulong msr = vcpu->arch.shared->msr; + + msr = kvmppc_set_field(msr, 33, 33, 1); + msr = kvmppc_set_field(msr, 34, 36, 0); + vcpu->arch.shared->msr = kvmppc_set_field(msr, 42, 47, 0); kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_INST_STORAGE); return EMULATE_AGAIN; } @@ -736,7 +823,7 @@ static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr, if (vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE) return RESUME_GUEST; - if (!(vcpu->arch.msr & msr)) { + if (!(vcpu->arch.shared->msr & msr)) { kvmppc_book3s_queue_irqprio(vcpu, exit_nr); return RESUME_GUEST; } @@ -796,16 +883,8 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, run->exit_reason = KVM_EXIT_UNKNOWN; run->ready_for_interrupt_injection = 1; -#ifdef EXIT_DEBUG - printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | dar=0x%lx | dec=0x%x | msr=0x%lx\n", - exit_nr, kvmppc_get_pc(vcpu), kvmppc_get_fault_dar(vcpu), - kvmppc_get_dec(vcpu), to_svcpu(vcpu)->shadow_srr1); -#elif defined (EXIT_DEBUG_SIMPLE) - if ((exit_nr != 0x900) && (exit_nr != 0x500)) - printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | dar=0x%lx | msr=0x%lx\n", - exit_nr, kvmppc_get_pc(vcpu), kvmppc_get_fault_dar(vcpu), - vcpu->arch.msr); -#endif + + trace_kvm_book3s_exit(exit_nr, vcpu); kvm_resched(vcpu); switch (exit_nr) { case BOOK3S_INTERRUPT_INST_STORAGE: @@ -836,9 +915,9 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, kvmppc_mmu_pte_flush(vcpu, kvmppc_get_pc(vcpu), ~0xFFFUL); r = RESUME_GUEST; } else { - vcpu->arch.msr |= to_svcpu(vcpu)->shadow_srr1 & 0x58000000; + vcpu->arch.shared->msr |= + to_svcpu(vcpu)->shadow_srr1 & 0x58000000; kvmppc_book3s_queue_irqprio(vcpu, exit_nr); - kvmppc_mmu_pte_flush(vcpu, kvmppc_get_pc(vcpu), ~0xFFFUL); r = RESUME_GUEST; } break; @@ -861,17 +940,16 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, if (to_svcpu(vcpu)->fault_dsisr & DSISR_NOHPTE) { r = kvmppc_handle_pagefault(run, vcpu, dar, exit_nr); } else { - vcpu->arch.dear = dar; - to_book3s(vcpu)->dsisr = to_svcpu(vcpu)->fault_dsisr; + vcpu->arch.shared->dar = dar; + vcpu->arch.shared->dsisr = to_svcpu(vcpu)->fault_dsisr; kvmppc_book3s_queue_irqprio(vcpu, exit_nr); - kvmppc_mmu_pte_flush(vcpu, vcpu->arch.dear, ~0xFFFUL); r = RESUME_GUEST; } break; } case BOOK3S_INTERRUPT_DATA_SEGMENT: if (kvmppc_mmu_map_segment(vcpu, kvmppc_get_fault_dar(vcpu)) < 0) { - vcpu->arch.dear = kvmppc_get_fault_dar(vcpu); + vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu); kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_DATA_SEGMENT); } @@ -904,7 +982,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, program_interrupt: flags = to_svcpu(vcpu)->shadow_srr1 & 0x1f0000ull; - if (vcpu->arch.msr & MSR_PR) { + if (vcpu->arch.shared->msr & MSR_PR) { #ifdef EXIT_DEBUG printk(KERN_INFO "Userspace triggered 0x700 exception at 0x%lx (0x%x)\n", kvmppc_get_pc(vcpu), kvmppc_get_last_inst(vcpu)); #endif @@ -941,10 +1019,10 @@ program_interrupt: break; } case BOOK3S_INTERRUPT_SYSCALL: - // XXX make user settable if (vcpu->arch.osi_enabled && (((u32)kvmppc_get_gpr(vcpu, 3)) == OSI_SC_MAGIC_R3) && (((u32)kvmppc_get_gpr(vcpu, 4)) == OSI_SC_MAGIC_R4)) { + /* MOL hypercalls */ u64 *gprs = run->osi.gprs; int i; @@ -953,8 +1031,13 @@ program_interrupt: gprs[i] = kvmppc_get_gpr(vcpu, i); vcpu->arch.osi_needed = 1; r = RESUME_HOST_NV; - + } else if (!(vcpu->arch.shared->msr & MSR_PR) && + (((u32)kvmppc_get_gpr(vcpu, 0)) == KVM_SC_MAGIC_R0)) { + /* KVM PV hypercalls */ + kvmppc_set_gpr(vcpu, 3, kvmppc_kvm_pv(vcpu)); + r = RESUME_GUEST; } else { + /* Guest syscalls */ vcpu->stat.syscall_exits++; kvmppc_book3s_queue_irqprio(vcpu, exit_nr); r = RESUME_GUEST; @@ -989,9 +1072,9 @@ program_interrupt: } case BOOK3S_INTERRUPT_ALIGNMENT: if (kvmppc_read_inst(vcpu) == EMULATE_DONE) { - to_book3s(vcpu)->dsisr = kvmppc_alignment_dsisr(vcpu, + vcpu->arch.shared->dsisr = kvmppc_alignment_dsisr(vcpu, kvmppc_get_last_inst(vcpu)); - vcpu->arch.dear = kvmppc_alignment_dar(vcpu, + vcpu->arch.shared->dar = kvmppc_alignment_dar(vcpu, kvmppc_get_last_inst(vcpu)); kvmppc_book3s_queue_irqprio(vcpu, exit_nr); } @@ -1031,9 +1114,7 @@ program_interrupt: } } -#ifdef EXIT_DEBUG - printk(KERN_EMERG "KVM exit: vcpu=0x%p pc=0x%lx r=0x%x\n", vcpu, kvmppc_get_pc(vcpu), r); -#endif + trace_kvm_book3s_reenter(r, vcpu); return r; } @@ -1052,14 +1133,14 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) regs->ctr = kvmppc_get_ctr(vcpu); regs->lr = kvmppc_get_lr(vcpu); regs->xer = kvmppc_get_xer(vcpu); - regs->msr = vcpu->arch.msr; - regs->srr0 = vcpu->arch.srr0; - regs->srr1 = vcpu->arch.srr1; + regs->msr = vcpu->arch.shared->msr; + regs->srr0 = vcpu->arch.shared->srr0; + regs->srr1 = vcpu->arch.shared->srr1; regs->pid = vcpu->arch.pid; - regs->sprg0 = vcpu->arch.sprg0; - regs->sprg1 = vcpu->arch.sprg1; - regs->sprg2 = vcpu->arch.sprg2; - regs->sprg3 = vcpu->arch.sprg3; + regs->sprg0 = vcpu->arch.shared->sprg0; + regs->sprg1 = vcpu->arch.shared->sprg1; + regs->sprg2 = vcpu->arch.shared->sprg2; + regs->sprg3 = vcpu->arch.shared->sprg3; regs->sprg5 = vcpu->arch.sprg4; regs->sprg6 = vcpu->arch.sprg5; regs->sprg7 = vcpu->arch.sprg6; @@ -1080,12 +1161,12 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) kvmppc_set_lr(vcpu, regs->lr); kvmppc_set_xer(vcpu, regs->xer); kvmppc_set_msr(vcpu, regs->msr); - vcpu->arch.srr0 = regs->srr0; - vcpu->arch.srr1 = regs->srr1; - vcpu->arch.sprg0 = regs->sprg0; - vcpu->arch.sprg1 = regs->sprg1; - vcpu->arch.sprg2 = regs->sprg2; - vcpu->arch.sprg3 = regs->sprg3; + vcpu->arch.shared->srr0 = regs->srr0; + vcpu->arch.shared->srr1 = regs->srr1; + vcpu->arch.shared->sprg0 = regs->sprg0; + vcpu->arch.shared->sprg1 = regs->sprg1; + vcpu->arch.shared->sprg2 = regs->sprg2; + vcpu->arch.shared->sprg3 = regs->sprg3; vcpu->arch.sprg5 = regs->sprg4; vcpu->arch.sprg6 = regs->sprg5; vcpu->arch.sprg7 = regs->sprg6; @@ -1111,10 +1192,9 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, sregs->u.s.ppc64.slb[i].slbv = vcpu3s->slb[i].origv; } } else { - for (i = 0; i < 16; i++) { - sregs->u.s.ppc32.sr[i] = vcpu3s->sr[i].raw; - sregs->u.s.ppc32.sr[i] = vcpu3s->sr[i].raw; - } + for (i = 0; i < 16; i++) + sregs->u.s.ppc32.sr[i] = vcpu->arch.shared->sr[i]; + for (i = 0; i < 8; i++) { sregs->u.s.ppc32.ibat[i] = vcpu3s->ibat[i].raw; sregs->u.s.ppc32.dbat[i] = vcpu3s->dbat[i].raw; @@ -1225,6 +1305,7 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) struct kvmppc_vcpu_book3s *vcpu_book3s; struct kvm_vcpu *vcpu; int err = -ENOMEM; + unsigned long p; vcpu_book3s = vmalloc(sizeof(struct kvmppc_vcpu_book3s)); if (!vcpu_book3s) @@ -1242,6 +1323,12 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) if (err) goto free_shadow_vcpu; + p = __get_free_page(GFP_KERNEL|__GFP_ZERO); + /* the real shared page fills the last 4k of our page */ + vcpu->arch.shared = (void*)(p + PAGE_SIZE - 4096); + if (!p) + goto uninit_vcpu; + vcpu->arch.host_retip = kvm_return_point; vcpu->arch.host_msr = mfmsr(); #ifdef CONFIG_PPC_BOOK3S_64 @@ -1268,10 +1355,12 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) err = kvmppc_mmu_init(vcpu); if (err < 0) - goto free_shadow_vcpu; + goto uninit_vcpu; return vcpu; +uninit_vcpu: + kvm_vcpu_uninit(vcpu); free_shadow_vcpu: kfree(vcpu_book3s->shadow_vcpu); free_vcpu: @@ -1284,6 +1373,7 @@ void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) { struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu); + free_page((unsigned long)vcpu->arch.shared & PAGE_MASK); kvm_vcpu_uninit(vcpu); kfree(vcpu_book3s->shadow_vcpu); vfree(vcpu_book3s); @@ -1346,7 +1436,7 @@ int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) local_irq_enable(); /* Preload FPU if it's enabled */ - if (vcpu->arch.msr & MSR_FP) + if (vcpu->arch.shared->msr & MSR_FP) kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP); ret = __kvmppc_vcpu_entry(kvm_run, vcpu); diff --git a/arch/powerpc/kvm/book3s_32_mmu.c b/arch/powerpc/kvm/book3s_32_mmu.c index 3292d76101d..c8cefdd15fd 100644 --- a/arch/powerpc/kvm/book3s_32_mmu.c +++ b/arch/powerpc/kvm/book3s_32_mmu.c @@ -58,14 +58,39 @@ static inline bool check_debug_ip(struct kvm_vcpu *vcpu) #endif } +static inline u32 sr_vsid(u32 sr_raw) +{ + return sr_raw & 0x0fffffff; +} + +static inline bool sr_valid(u32 sr_raw) +{ + return (sr_raw & 0x80000000) ? false : true; +} + +static inline bool sr_ks(u32 sr_raw) +{ + return (sr_raw & 0x40000000) ? true: false; +} + +static inline bool sr_kp(u32 sr_raw) +{ + return (sr_raw & 0x20000000) ? true: false; +} + +static inline bool sr_nx(u32 sr_raw) +{ + return (sr_raw & 0x10000000) ? true: false; +} + static int kvmppc_mmu_book3s_32_xlate_bat(struct kvm_vcpu *vcpu, gva_t eaddr, struct kvmppc_pte *pte, bool data); static int kvmppc_mmu_book3s_32_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid, u64 *vsid); -static struct kvmppc_sr *find_sr(struct kvmppc_vcpu_book3s *vcpu_book3s, gva_t eaddr) +static u32 find_sr(struct kvm_vcpu *vcpu, gva_t eaddr) { - return &vcpu_book3s->sr[(eaddr >> 28) & 0xf]; + return vcpu->arch.shared->sr[(eaddr >> 28) & 0xf]; } static u64 kvmppc_mmu_book3s_32_ea_to_vp(struct kvm_vcpu *vcpu, gva_t eaddr, @@ -87,7 +112,7 @@ static void kvmppc_mmu_book3s_32_reset_msr(struct kvm_vcpu *vcpu) } static hva_t kvmppc_mmu_book3s_32_get_pteg(struct kvmppc_vcpu_book3s *vcpu_book3s, - struct kvmppc_sr *sre, gva_t eaddr, + u32 sre, gva_t eaddr, bool primary) { u32 page, hash, pteg, htabmask; @@ -96,7 +121,7 @@ static hva_t kvmppc_mmu_book3s_32_get_pteg(struct kvmppc_vcpu_book3s *vcpu_book3 page = (eaddr & 0x0FFFFFFF) >> 12; htabmask = ((vcpu_book3s->sdr1 & 0x1FF) << 16) | 0xFFC0; - hash = ((sre->vsid ^ page) << 6); + hash = ((sr_vsid(sre) ^ page) << 6); if (!primary) hash = ~hash; hash &= htabmask; @@ -104,8 +129,8 @@ static hva_t kvmppc_mmu_book3s_32_get_pteg(struct kvmppc_vcpu_book3s *vcpu_book3 pteg = (vcpu_book3s->sdr1 & 0xffff0000) | hash; dprintk("MMU: pc=0x%lx eaddr=0x%lx sdr1=0x%llx pteg=0x%x vsid=0x%x\n", - vcpu_book3s->vcpu.arch.pc, eaddr, vcpu_book3s->sdr1, pteg, - sre->vsid); + kvmppc_get_pc(&vcpu_book3s->vcpu), eaddr, vcpu_book3s->sdr1, pteg, + sr_vsid(sre)); r = gfn_to_hva(vcpu_book3s->vcpu.kvm, pteg >> PAGE_SHIFT); if (kvm_is_error_hva(r)) @@ -113,10 +138,9 @@ static hva_t kvmppc_mmu_book3s_32_get_pteg(struct kvmppc_vcpu_book3s *vcpu_book3 return r | (pteg & ~PAGE_MASK); } -static u32 kvmppc_mmu_book3s_32_get_ptem(struct kvmppc_sr *sre, gva_t eaddr, - bool primary) +static u32 kvmppc_mmu_book3s_32_get_ptem(u32 sre, gva_t eaddr, bool primary) { - return ((eaddr & 0x0fffffff) >> 22) | (sre->vsid << 7) | + return ((eaddr & 0x0fffffff) >> 22) | (sr_vsid(sre) << 7) | (primary ? 0 : 0x40) | 0x80000000; } @@ -133,7 +157,7 @@ static int kvmppc_mmu_book3s_32_xlate_bat(struct kvm_vcpu *vcpu, gva_t eaddr, else bat = &vcpu_book3s->ibat[i]; - if (vcpu->arch.msr & MSR_PR) { + if (vcpu->arch.shared->msr & MSR_PR) { if (!bat->vp) continue; } else { @@ -180,17 +204,17 @@ static int kvmppc_mmu_book3s_32_xlate_pte(struct kvm_vcpu *vcpu, gva_t eaddr, bool primary) { struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu); - struct kvmppc_sr *sre; + u32 sre; hva_t ptegp; u32 pteg[16]; u32 ptem = 0; int i; int found = 0; - sre = find_sr(vcpu_book3s, eaddr); + sre = find_sr(vcpu, eaddr); dprintk_pte("SR 0x%lx: vsid=0x%x, raw=0x%x\n", eaddr >> 28, - sre->vsid, sre->raw); + sr_vsid(sre), sre); pte->vpage = kvmppc_mmu_book3s_32_ea_to_vp(vcpu, eaddr, data); @@ -214,8 +238,8 @@ static int kvmppc_mmu_book3s_32_xlate_pte(struct kvm_vcpu *vcpu, gva_t eaddr, pte->raddr = (pteg[i+1] & ~(0xFFFULL)) | (eaddr & 0xFFF); pp = pteg[i+1] & 3; - if ((sre->Kp && (vcpu->arch.msr & MSR_PR)) || - (sre->Ks && !(vcpu->arch.msr & MSR_PR))) + if ((sr_kp(sre) && (vcpu->arch.shared->msr & MSR_PR)) || + (sr_ks(sre) && !(vcpu->arch.shared->msr & MSR_PR))) pp |= 4; pte->may_write = false; @@ -269,7 +293,7 @@ no_page_found: dprintk_pte("KVM MMU: No PTE found (sdr1=0x%llx ptegp=0x%lx)\n", to_book3s(vcpu)->sdr1, ptegp); for (i=0; i<16; i+=2) { - dprintk_pte(" %02d: 0x%x - 0x%x (0x%llx)\n", + dprintk_pte(" %02d: 0x%x - 0x%x (0x%x)\n", i, pteg[i], pteg[i+1], ptem); } } @@ -281,8 +305,24 @@ static int kvmppc_mmu_book3s_32_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, struct kvmppc_pte *pte, bool data) { int r; + ulong mp_ea = vcpu->arch.magic_page_ea; pte->eaddr = eaddr; + + /* Magic page override */ + if (unlikely(mp_ea) && + unlikely((eaddr & ~0xfffULL) == (mp_ea & ~0xfffULL)) && + !(vcpu->arch.shared->msr & MSR_PR)) { + pte->vpage = kvmppc_mmu_book3s_32_ea_to_vp(vcpu, eaddr, data); + pte->raddr = vcpu->arch.magic_page_pa | (pte->raddr & 0xfff); + pte->raddr &= KVM_PAM; + pte->may_execute = true; + pte->may_read = true; + pte->may_write = true; + + return 0; + } + r = kvmppc_mmu_book3s_32_xlate_bat(vcpu, eaddr, pte, data); if (r < 0) r = kvmppc_mmu_book3s_32_xlate_pte(vcpu, eaddr, pte, data, true); @@ -295,30 +335,13 @@ static int kvmppc_mmu_book3s_32_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, static u32 kvmppc_mmu_book3s_32_mfsrin(struct kvm_vcpu *vcpu, u32 srnum) { - return to_book3s(vcpu)->sr[srnum].raw; + return vcpu->arch.shared->sr[srnum]; } static void kvmppc_mmu_book3s_32_mtsrin(struct kvm_vcpu *vcpu, u32 srnum, ulong value) { - struct kvmppc_sr *sre; - - sre = &to_book3s(vcpu)->sr[srnum]; - - /* Flush any left-over shadows from the previous SR */ - - /* XXX Not necessary? */ - /* kvmppc_mmu_pte_flush(vcpu, ((u64)sre->vsid) << 28, 0xf0000000ULL); */ - - /* And then put in the new SR */ - sre->raw = value; - sre->vsid = (value & 0x0fffffff); - sre->valid = (value & 0x80000000) ? false : true; - sre->Ks = (value & 0x40000000) ? true : false; - sre->Kp = (value & 0x20000000) ? true : false; - sre->nx = (value & 0x10000000) ? true : false; - - /* Map the new segment */ + vcpu->arch.shared->sr[srnum] = value; kvmppc_mmu_map_segment(vcpu, srnum << SID_SHIFT); } @@ -331,19 +354,19 @@ static int kvmppc_mmu_book3s_32_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid, u64 *vsid) { ulong ea = esid << SID_SHIFT; - struct kvmppc_sr *sr; + u32 sr; u64 gvsid = esid; - if (vcpu->arch.msr & (MSR_DR|MSR_IR)) { - sr = find_sr(to_book3s(vcpu), ea); - if (sr->valid) - gvsid = sr->vsid; + if (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) { + sr = find_sr(vcpu, ea); + if (sr_valid(sr)) + gvsid = sr_vsid(sr); } /* In case we only have one of MSR_IR or MSR_DR set, let's put that in the real-mode context (and hope RM doesn't access high memory) */ - switch (vcpu->arch.msr & (MSR_DR|MSR_IR)) { + switch (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) { case 0: *vsid = VSID_REAL | esid; break; @@ -354,8 +377,8 @@ static int kvmppc_mmu_book3s_32_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid, *vsid = VSID_REAL_DR | gvsid; break; case MSR_DR|MSR_IR: - if (sr->valid) - *vsid = sr->vsid; + if (sr_valid(sr)) + *vsid = sr_vsid(sr); else *vsid = VSID_BAT | gvsid; break; @@ -363,7 +386,7 @@ static int kvmppc_mmu_book3s_32_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid, BUG(); } - if (vcpu->arch.msr & MSR_PR) + if (vcpu->arch.shared->msr & MSR_PR) *vsid |= VSID_PR; return 0; diff --git a/arch/powerpc/kvm/book3s_32_mmu_host.c b/arch/powerpc/kvm/book3s_32_mmu_host.c index 0b51ef872c1..9fecbfbce77 100644 --- a/arch/powerpc/kvm/book3s_32_mmu_host.c +++ b/arch/powerpc/kvm/book3s_32_mmu_host.c @@ -19,7 +19,6 @@ */ #include <linux/kvm_host.h> -#include <linux/hash.h> #include <asm/kvm_ppc.h> #include <asm/kvm_book3s.h> @@ -77,7 +76,14 @@ void kvmppc_mmu_invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte) * a hash, so we don't waste cycles on looping */ static u16 kvmppc_sid_hash(struct kvm_vcpu *vcpu, u64 gvsid) { - return hash_64(gvsid, SID_MAP_BITS); + return (u16)(((gvsid >> (SID_MAP_BITS * 7)) & SID_MAP_MASK) ^ + ((gvsid >> (SID_MAP_BITS * 6)) & SID_MAP_MASK) ^ + ((gvsid >> (SID_MAP_BITS * 5)) & SID_MAP_MASK) ^ + ((gvsid >> (SID_MAP_BITS * 4)) & SID_MAP_MASK) ^ + ((gvsid >> (SID_MAP_BITS * 3)) & SID_MAP_MASK) ^ + ((gvsid >> (SID_MAP_BITS * 2)) & SID_MAP_MASK) ^ + ((gvsid >> (SID_MAP_BITS * 1)) & SID_MAP_MASK) ^ + ((gvsid >> (SID_MAP_BITS * 0)) & SID_MAP_MASK)); } @@ -86,7 +92,7 @@ static struct kvmppc_sid_map *find_sid_vsid(struct kvm_vcpu *vcpu, u64 gvsid) struct kvmppc_sid_map *map; u16 sid_map_mask; - if (vcpu->arch.msr & MSR_PR) + if (vcpu->arch.shared->msr & MSR_PR) gvsid |= VSID_PR; sid_map_mask = kvmppc_sid_hash(vcpu, gvsid); @@ -147,8 +153,8 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte) struct hpte_cache *pte; /* Get host physical address for gpa */ - hpaddr = gfn_to_pfn(vcpu->kvm, orig_pte->raddr >> PAGE_SHIFT); - if (kvm_is_error_hva(hpaddr)) { + hpaddr = kvmppc_gfn_to_pfn(vcpu, orig_pte->raddr >> PAGE_SHIFT); + if (is_error_pfn(hpaddr)) { printk(KERN_INFO "Couldn't get guest page for gfn %lx!\n", orig_pte->eaddr); return -EINVAL; @@ -253,7 +259,7 @@ static struct kvmppc_sid_map *create_sid_map(struct kvm_vcpu *vcpu, u64 gvsid) u16 sid_map_mask; static int backwards_map = 0; - if (vcpu->arch.msr & MSR_PR) + if (vcpu->arch.shared->msr & MSR_PR) gvsid |= VSID_PR; /* We might get collisions that trap in preceding order, so let's @@ -269,18 +275,15 @@ static struct kvmppc_sid_map *create_sid_map(struct kvm_vcpu *vcpu, u64 gvsid) backwards_map = !backwards_map; /* Uh-oh ... out of mappings. Let's flush! */ - if (vcpu_book3s->vsid_next >= vcpu_book3s->vsid_max) { - vcpu_book3s->vsid_next = vcpu_book3s->vsid_first; + if (vcpu_book3s->vsid_next >= VSID_POOL_SIZE) { + vcpu_book3s->vsid_next = 0; memset(vcpu_book3s->sid_map, 0, sizeof(struct kvmppc_sid_map) * SID_MAP_NUM); kvmppc_mmu_pte_flush(vcpu, 0, 0); kvmppc_mmu_flush_segments(vcpu); } - map->host_vsid = vcpu_book3s->vsid_next; - - /* Would have to be 111 to be completely aligned with the rest of - Linux, but that is just way too little space! */ - vcpu_book3s->vsid_next+=1; + map->host_vsid = vcpu_book3s->vsid_pool[vcpu_book3s->vsid_next]; + vcpu_book3s->vsid_next++; map->guest_vsid = gvsid; map->valid = true; @@ -327,40 +330,38 @@ void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu) void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu) { + int i; + kvmppc_mmu_hpte_destroy(vcpu); preempt_disable(); - __destroy_context(to_book3s(vcpu)->context_id); + for (i = 0; i < SID_CONTEXTS; i++) + __destroy_context(to_book3s(vcpu)->context_id[i]); preempt_enable(); } /* From mm/mmu_context_hash32.c */ -#define CTX_TO_VSID(ctx) (((ctx) * (897 * 16)) & 0xffffff) +#define CTX_TO_VSID(c, id) ((((c) * (897 * 16)) + (id * 0x111)) & 0xffffff) int kvmppc_mmu_init(struct kvm_vcpu *vcpu) { struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu); int err; ulong sdr1; + int i; + int j; - err = __init_new_context(); - if (err < 0) - return -1; - vcpu3s->context_id = err; - - vcpu3s->vsid_max = CTX_TO_VSID(vcpu3s->context_id + 1) - 1; - vcpu3s->vsid_first = CTX_TO_VSID(vcpu3s->context_id); - -#if 0 /* XXX still doesn't guarantee uniqueness */ - /* We could collide with the Linux vsid space because the vsid - * wraps around at 24 bits. We're safe if we do our own space - * though, so let's always set the highest bit. */ + for (i = 0; i < SID_CONTEXTS; i++) { + err = __init_new_context(); + if (err < 0) + goto init_fail; + vcpu3s->context_id[i] = err; - vcpu3s->vsid_max |= 0x00800000; - vcpu3s->vsid_first |= 0x00800000; -#endif - BUG_ON(vcpu3s->vsid_max < vcpu3s->vsid_first); + /* Remember context id for this combination */ + for (j = 0; j < 16; j++) + vcpu3s->vsid_pool[(i * 16) + j] = CTX_TO_VSID(err, j); + } - vcpu3s->vsid_next = vcpu3s->vsid_first; + vcpu3s->vsid_next = 0; /* Remember where the HTAB is */ asm ( "mfsdr1 %0" : "=r"(sdr1) ); @@ -370,4 +371,14 @@ int kvmppc_mmu_init(struct kvm_vcpu *vcpu) kvmppc_mmu_hpte_init(vcpu); return 0; + +init_fail: + for (j = 0; j < i; j++) { + if (!vcpu3s->context_id[j]) + continue; + + __destroy_context(to_book3s(vcpu)->context_id[j]); + } + + return -1; } diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c index 4025ea26b3c..d7889ef3211 100644 --- a/arch/powerpc/kvm/book3s_64_mmu.c +++ b/arch/powerpc/kvm/book3s_64_mmu.c @@ -163,6 +163,22 @@ static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, bool found = false; bool perm_err = false; int second = 0; + ulong mp_ea = vcpu->arch.magic_page_ea; + + /* Magic page override */ + if (unlikely(mp_ea) && + unlikely((eaddr & ~0xfffULL) == (mp_ea & ~0xfffULL)) && + !(vcpu->arch.shared->msr & MSR_PR)) { + gpte->eaddr = eaddr; + gpte->vpage = kvmppc_mmu_book3s_64_ea_to_vp(vcpu, eaddr, data); + gpte->raddr = vcpu->arch.magic_page_pa | (gpte->raddr & 0xfff); + gpte->raddr &= KVM_PAM; + gpte->may_execute = true; + gpte->may_read = true; + gpte->may_write = true; + + return 0; + } slbe = kvmppc_mmu_book3s_64_find_slbe(vcpu_book3s, eaddr); if (!slbe) @@ -180,9 +196,9 @@ do_second: goto no_page_found; } - if ((vcpu->arch.msr & MSR_PR) && slbe->Kp) + if ((vcpu->arch.shared->msr & MSR_PR) && slbe->Kp) key = 4; - else if (!(vcpu->arch.msr & MSR_PR) && slbe->Ks) + else if (!(vcpu->arch.shared->msr & MSR_PR) && slbe->Ks) key = 4; for (i=0; i<16; i+=2) { @@ -381,7 +397,7 @@ static void kvmppc_mmu_book3s_64_slbia(struct kvm_vcpu *vcpu) for (i = 1; i < vcpu_book3s->slb_nr; i++) vcpu_book3s->slb[i].valid = false; - if (vcpu->arch.msr & MSR_IR) { + if (vcpu->arch.shared->msr & MSR_IR) { kvmppc_mmu_flush_segments(vcpu); kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu)); } @@ -445,14 +461,15 @@ static int kvmppc_mmu_book3s_64_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid, ulong ea = esid << SID_SHIFT; struct kvmppc_slb *slb; u64 gvsid = esid; + ulong mp_ea = vcpu->arch.magic_page_ea; - if (vcpu->arch.msr & (MSR_DR|MSR_IR)) { + if (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) { slb = kvmppc_mmu_book3s_64_find_slbe(to_book3s(vcpu), ea); if (slb) gvsid = slb->vsid; } - switch (vcpu->arch.msr & (MSR_DR|MSR_IR)) { + switch (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) { case 0: *vsid = VSID_REAL | esid; break; @@ -464,7 +481,7 @@ static int kvmppc_mmu_book3s_64_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid, break; case MSR_DR|MSR_IR: if (!slb) - return -ENOENT; + goto no_slb; *vsid = gvsid; break; @@ -473,10 +490,21 @@ static int kvmppc_mmu_book3s_64_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid, break; } - if (vcpu->arch.msr & MSR_PR) + if (vcpu->arch.shared->msr & MSR_PR) *vsid |= VSID_PR; return 0; + +no_slb: + /* Catch magic page case */ + if (unlikely(mp_ea) && + unlikely(esid == (mp_ea >> SID_SHIFT)) && + !(vcpu->arch.shared->msr & MSR_PR)) { + *vsid = VSID_REAL | esid; + return 0; + } + + return -EINVAL; } static bool kvmppc_mmu_book3s_64_is_dcbz32(struct kvm_vcpu *vcpu) diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c index 384179a5002..fa2f08434ba 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_host.c +++ b/arch/powerpc/kvm/book3s_64_mmu_host.c @@ -20,7 +20,6 @@ */ #include <linux/kvm_host.h> -#include <linux/hash.h> #include <asm/kvm_ppc.h> #include <asm/kvm_book3s.h> @@ -28,24 +27,9 @@ #include <asm/machdep.h> #include <asm/mmu_context.h> #include <asm/hw_irq.h> +#include "trace.h" #define PTE_SIZE 12 -#define VSID_ALL 0 - -/* #define DEBUG_MMU */ -/* #define DEBUG_SLB */ - -#ifdef DEBUG_MMU -#define dprintk_mmu(a, ...) printk(KERN_INFO a, __VA_ARGS__) -#else -#define dprintk_mmu(a, ...) do { } while(0) -#endif - -#ifdef DEBUG_SLB -#define dprintk_slb(a, ...) printk(KERN_INFO a, __VA_ARGS__) -#else -#define dprintk_slb(a, ...) do { } while(0) -#endif void kvmppc_mmu_invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte) { @@ -58,34 +42,39 @@ void kvmppc_mmu_invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte) * a hash, so we don't waste cycles on looping */ static u16 kvmppc_sid_hash(struct kvm_vcpu *vcpu, u64 gvsid) { - return hash_64(gvsid, SID_MAP_BITS); + return (u16)(((gvsid >> (SID_MAP_BITS * 7)) & SID_MAP_MASK) ^ + ((gvsid >> (SID_MAP_BITS * 6)) & SID_MAP_MASK) ^ + ((gvsid >> (SID_MAP_BITS * 5)) & SID_MAP_MASK) ^ + ((gvsid >> (SID_MAP_BITS * 4)) & SID_MAP_MASK) ^ + ((gvsid >> (SID_MAP_BITS * 3)) & SID_MAP_MASK) ^ + ((gvsid >> (SID_MAP_BITS * 2)) & SID_MAP_MASK) ^ + ((gvsid >> (SID_MAP_BITS * 1)) & SID_MAP_MASK) ^ + ((gvsid >> (SID_MAP_BITS * 0)) & SID_MAP_MASK)); } + static struct kvmppc_sid_map *find_sid_vsid(struct kvm_vcpu *vcpu, u64 gvsid) { struct kvmppc_sid_map *map; u16 sid_map_mask; - if (vcpu->arch.msr & MSR_PR) + if (vcpu->arch.shared->msr & MSR_PR) gvsid |= VSID_PR; sid_map_mask = kvmppc_sid_hash(vcpu, gvsid); map = &to_book3s(vcpu)->sid_map[sid_map_mask]; - if (map->guest_vsid == gvsid) { - dprintk_slb("SLB: Searching: 0x%llx -> 0x%llx\n", - gvsid, map->host_vsid); + if (map->valid && (map->guest_vsid == gvsid)) { + trace_kvm_book3s_slb_found(gvsid, map->host_vsid); return map; } map = &to_book3s(vcpu)->sid_map[SID_MAP_MASK - sid_map_mask]; - if (map->guest_vsid == gvsid) { - dprintk_slb("SLB: Searching 0x%llx -> 0x%llx\n", - gvsid, map->host_vsid); + if (map->valid && (map->guest_vsid == gvsid)) { + trace_kvm_book3s_slb_found(gvsid, map->host_vsid); return map; } - dprintk_slb("SLB: Searching %d/%d: 0x%llx -> not found\n", - sid_map_mask, SID_MAP_MASK - sid_map_mask, gvsid); + trace_kvm_book3s_slb_fail(sid_map_mask, gvsid); return NULL; } @@ -101,18 +90,13 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte) struct kvmppc_sid_map *map; /* Get host physical address for gpa */ - hpaddr = gfn_to_pfn(vcpu->kvm, orig_pte->raddr >> PAGE_SHIFT); - if (kvm_is_error_hva(hpaddr)) { + hpaddr = kvmppc_gfn_to_pfn(vcpu, orig_pte->raddr >> PAGE_SHIFT); + if (is_error_pfn(hpaddr)) { printk(KERN_INFO "Couldn't get guest page for gfn %lx!\n", orig_pte->eaddr); return -EINVAL; } hpaddr <<= PAGE_SHIFT; -#if PAGE_SHIFT == 12 -#elif PAGE_SHIFT == 16 - hpaddr |= orig_pte->raddr & 0xf000; -#else -#error Unknown page size -#endif + hpaddr |= orig_pte->raddr & (~0xfffULL & ~PAGE_MASK); /* and write the mapping ea -> hpa into the pt */ vcpu->arch.mmu.esid_to_vsid(vcpu, orig_pte->eaddr >> SID_SHIFT, &vsid); @@ -161,10 +145,7 @@ map_again: } else { struct hpte_cache *pte = kvmppc_mmu_hpte_cache_next(vcpu); - dprintk_mmu("KVM: %c%c Map 0x%lx: [%lx] 0x%lx (0x%llx) -> %lx\n", - ((rflags & HPTE_R_PP) == 3) ? '-' : 'w', - (rflags & HPTE_R_N) ? '-' : 'x', - orig_pte->eaddr, hpteg, va, orig_pte->vpage, hpaddr); + trace_kvm_book3s_64_mmu_map(rflags, hpteg, va, hpaddr, orig_pte); /* The ppc_md code may give us a secondary entry even though we asked for a primary. Fix up. */ @@ -191,7 +172,7 @@ static struct kvmppc_sid_map *create_sid_map(struct kvm_vcpu *vcpu, u64 gvsid) u16 sid_map_mask; static int backwards_map = 0; - if (vcpu->arch.msr & MSR_PR) + if (vcpu->arch.shared->msr & MSR_PR) gvsid |= VSID_PR; /* We might get collisions that trap in preceding order, so let's @@ -219,8 +200,7 @@ static struct kvmppc_sid_map *create_sid_map(struct kvm_vcpu *vcpu, u64 gvsid) map->guest_vsid = gvsid; map->valid = true; - dprintk_slb("SLB: New mapping at %d: 0x%llx -> 0x%llx\n", - sid_map_mask, gvsid, map->host_vsid); + trace_kvm_book3s_slb_map(sid_map_mask, gvsid, map->host_vsid); return map; } @@ -292,7 +272,7 @@ int kvmppc_mmu_map_segment(struct kvm_vcpu *vcpu, ulong eaddr) to_svcpu(vcpu)->slb[slb_index].esid = slb_esid; to_svcpu(vcpu)->slb[slb_index].vsid = slb_vsid; - dprintk_slb("slbmte %#llx, %#llx\n", slb_vsid, slb_esid); + trace_kvm_book3s_slbmte(slb_vsid, slb_esid); return 0; } @@ -306,7 +286,7 @@ void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu) void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu) { kvmppc_mmu_hpte_destroy(vcpu); - __destroy_context(to_book3s(vcpu)->context_id); + __destroy_context(to_book3s(vcpu)->context_id[0]); } int kvmppc_mmu_init(struct kvm_vcpu *vcpu) @@ -317,10 +297,10 @@ int kvmppc_mmu_init(struct kvm_vcpu *vcpu) err = __init_new_context(); if (err < 0) return -1; - vcpu3s->context_id = err; + vcpu3s->context_id[0] = err; - vcpu3s->vsid_max = ((vcpu3s->context_id + 1) << USER_ESID_BITS) - 1; - vcpu3s->vsid_first = vcpu3s->context_id << USER_ESID_BITS; + vcpu3s->vsid_max = ((vcpu3s->context_id[0] + 1) << USER_ESID_BITS) - 1; + vcpu3s->vsid_first = vcpu3s->context_id[0] << USER_ESID_BITS; vcpu3s->vsid_next = vcpu3s->vsid_first; kvmppc_mmu_hpte_init(vcpu); diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c index c85f906038c..46684655708 100644 --- a/arch/powerpc/kvm/book3s_emulate.c +++ b/arch/powerpc/kvm/book3s_emulate.c @@ -73,8 +73,8 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, switch (get_xop(inst)) { case OP_19_XOP_RFID: case OP_19_XOP_RFI: - kvmppc_set_pc(vcpu, vcpu->arch.srr0); - kvmppc_set_msr(vcpu, vcpu->arch.srr1); + kvmppc_set_pc(vcpu, vcpu->arch.shared->srr0); + kvmppc_set_msr(vcpu, vcpu->arch.shared->srr1); *advance = 0; break; @@ -86,14 +86,15 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, case 31: switch (get_xop(inst)) { case OP_31_XOP_MFMSR: - kvmppc_set_gpr(vcpu, get_rt(inst), vcpu->arch.msr); + kvmppc_set_gpr(vcpu, get_rt(inst), + vcpu->arch.shared->msr); break; case OP_31_XOP_MTMSRD: { ulong rs = kvmppc_get_gpr(vcpu, get_rs(inst)); if (inst & 0x10000) { - vcpu->arch.msr &= ~(MSR_RI | MSR_EE); - vcpu->arch.msr |= rs & (MSR_RI | MSR_EE); + vcpu->arch.shared->msr &= ~(MSR_RI | MSR_EE); + vcpu->arch.shared->msr |= rs & (MSR_RI | MSR_EE); } else kvmppc_set_msr(vcpu, rs); break; @@ -204,14 +205,14 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, ra = kvmppc_get_gpr(vcpu, get_ra(inst)); addr = (ra + rb) & ~31ULL; - if (!(vcpu->arch.msr & MSR_SF)) + if (!(vcpu->arch.shared->msr & MSR_SF)) addr &= 0xffffffff; vaddr = addr; r = kvmppc_st(vcpu, &addr, 32, zeros, true); if ((r == -ENOENT) || (r == -EPERM)) { *advance = 0; - vcpu->arch.dear = vaddr; + vcpu->arch.shared->dar = vaddr; to_svcpu(vcpu)->fault_dar = vaddr; dsisr = DSISR_ISSTORE; @@ -220,7 +221,7 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, else if (r == -EPERM) dsisr |= DSISR_PROTFAULT; - to_book3s(vcpu)->dsisr = dsisr; + vcpu->arch.shared->dsisr = dsisr; to_svcpu(vcpu)->fault_dsisr = dsisr; kvmppc_book3s_queue_irqprio(vcpu, @@ -263,7 +264,7 @@ void kvmppc_set_bat(struct kvm_vcpu *vcpu, struct kvmppc_bat *bat, bool upper, } } -static u32 kvmppc_read_bat(struct kvm_vcpu *vcpu, int sprn) +static struct kvmppc_bat *kvmppc_find_bat(struct kvm_vcpu *vcpu, int sprn) { struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu); struct kvmppc_bat *bat; @@ -285,35 +286,7 @@ static u32 kvmppc_read_bat(struct kvm_vcpu *vcpu, int sprn) BUG(); } - if (sprn % 2) - return bat->raw >> 32; - else - return bat->raw; -} - -static void kvmppc_write_bat(struct kvm_vcpu *vcpu, int sprn, u32 val) -{ - struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu); - struct kvmppc_bat *bat; - - switch (sprn) { - case SPRN_IBAT0U ... SPRN_IBAT3L: - bat = &vcpu_book3s->ibat[(sprn - SPRN_IBAT0U) / 2]; - break; - case SPRN_IBAT4U ... SPRN_IBAT7L: - bat = &vcpu_book3s->ibat[4 + ((sprn - SPRN_IBAT4U) / 2)]; - break; - case SPRN_DBAT0U ... SPRN_DBAT3L: - bat = &vcpu_book3s->dbat[(sprn - SPRN_DBAT0U) / 2]; - break; - case SPRN_DBAT4U ... SPRN_DBAT7L: - bat = &vcpu_book3s->dbat[4 + ((sprn - SPRN_DBAT4U) / 2)]; - break; - default: - BUG(); - } - - kvmppc_set_bat(vcpu, bat, !(sprn % 2), val); + return bat; } int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) @@ -326,10 +299,10 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) to_book3s(vcpu)->sdr1 = spr_val; break; case SPRN_DSISR: - to_book3s(vcpu)->dsisr = spr_val; + vcpu->arch.shared->dsisr = spr_val; break; case SPRN_DAR: - vcpu->arch.dear = spr_val; + vcpu->arch.shared->dar = spr_val; break; case SPRN_HIOR: to_book3s(vcpu)->hior = spr_val; @@ -338,12 +311,16 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) case SPRN_IBAT4U ... SPRN_IBAT7L: case SPRN_DBAT0U ... SPRN_DBAT3L: case SPRN_DBAT4U ... SPRN_DBAT7L: - kvmppc_write_bat(vcpu, sprn, (u32)spr_val); + { + struct kvmppc_bat *bat = kvmppc_find_bat(vcpu, sprn); + + kvmppc_set_bat(vcpu, bat, !(sprn % 2), (u32)spr_val); /* BAT writes happen so rarely that we're ok to flush * everything here */ kvmppc_mmu_pte_flush(vcpu, 0, 0); kvmppc_mmu_flush_segments(vcpu); break; + } case SPRN_HID0: to_book3s(vcpu)->hid[0] = spr_val; break; @@ -433,16 +410,24 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) case SPRN_IBAT4U ... SPRN_IBAT7L: case SPRN_DBAT0U ... SPRN_DBAT3L: case SPRN_DBAT4U ... SPRN_DBAT7L: - kvmppc_set_gpr(vcpu, rt, kvmppc_read_bat(vcpu, sprn)); + { + struct kvmppc_bat *bat = kvmppc_find_bat(vcpu, sprn); + + if (sprn % 2) + kvmppc_set_gpr(vcpu, rt, bat->raw >> 32); + else + kvmppc_set_gpr(vcpu, rt, bat->raw); + break; + } case SPRN_SDR1: kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->sdr1); break; case SPRN_DSISR: - kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->dsisr); + kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->dsisr); break; case SPRN_DAR: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.dear); + kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->dar); break; case SPRN_HIOR: kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hior); diff --git a/arch/powerpc/kvm/book3s_mmu_hpte.c b/arch/powerpc/kvm/book3s_mmu_hpte.c index 4868d4a7ebc..79751d8dd13 100644 --- a/arch/powerpc/kvm/book3s_mmu_hpte.c +++ b/arch/powerpc/kvm/book3s_mmu_hpte.c @@ -21,6 +21,7 @@ #include <linux/kvm_host.h> #include <linux/hash.h> #include <linux/slab.h> +#include "trace.h" #include <asm/kvm_ppc.h> #include <asm/kvm_book3s.h> @@ -30,14 +31,6 @@ #define PTE_SIZE 12 -/* #define DEBUG_MMU */ - -#ifdef DEBUG_MMU -#define dprintk_mmu(a, ...) printk(KERN_INFO a, __VA_ARGS__) -#else -#define dprintk_mmu(a, ...) do { } while(0) -#endif - static struct kmem_cache *hpte_cache; static inline u64 kvmppc_mmu_hash_pte(u64 eaddr) @@ -45,6 +38,12 @@ static inline u64 kvmppc_mmu_hash_pte(u64 eaddr) return hash_64(eaddr >> PTE_SIZE, HPTEG_HASH_BITS_PTE); } +static inline u64 kvmppc_mmu_hash_pte_long(u64 eaddr) +{ + return hash_64((eaddr & 0x0ffff000) >> PTE_SIZE, + HPTEG_HASH_BITS_PTE_LONG); +} + static inline u64 kvmppc_mmu_hash_vpte(u64 vpage) { return hash_64(vpage & 0xfffffffffULL, HPTEG_HASH_BITS_VPTE); @@ -60,77 +59,128 @@ void kvmppc_mmu_hpte_cache_map(struct kvm_vcpu *vcpu, struct hpte_cache *pte) { u64 index; + trace_kvm_book3s_mmu_map(pte); + + spin_lock(&vcpu->arch.mmu_lock); + /* Add to ePTE list */ index = kvmppc_mmu_hash_pte(pte->pte.eaddr); - hlist_add_head(&pte->list_pte, &vcpu->arch.hpte_hash_pte[index]); + hlist_add_head_rcu(&pte->list_pte, &vcpu->arch.hpte_hash_pte[index]); + + /* Add to ePTE_long list */ + index = kvmppc_mmu_hash_pte_long(pte->pte.eaddr); + hlist_add_head_rcu(&pte->list_pte_long, + &vcpu->arch.hpte_hash_pte_long[index]); /* Add to vPTE list */ index = kvmppc_mmu_hash_vpte(pte->pte.vpage); - hlist_add_head(&pte->list_vpte, &vcpu->arch.hpte_hash_vpte[index]); + hlist_add_head_rcu(&pte->list_vpte, &vcpu->arch.hpte_hash_vpte[index]); /* Add to vPTE_long list */ index = kvmppc_mmu_hash_vpte_long(pte->pte.vpage); - hlist_add_head(&pte->list_vpte_long, - &vcpu->arch.hpte_hash_vpte_long[index]); + hlist_add_head_rcu(&pte->list_vpte_long, + &vcpu->arch.hpte_hash_vpte_long[index]); + + spin_unlock(&vcpu->arch.mmu_lock); +} + +static void free_pte_rcu(struct rcu_head *head) +{ + struct hpte_cache *pte = container_of(head, struct hpte_cache, rcu_head); + kmem_cache_free(hpte_cache, pte); } static void invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte) { - dprintk_mmu("KVM: Flushing SPT: 0x%lx (0x%llx) -> 0x%llx\n", - pte->pte.eaddr, pte->pte.vpage, pte->host_va); + trace_kvm_book3s_mmu_invalidate(pte); /* Different for 32 and 64 bit */ kvmppc_mmu_invalidate_pte(vcpu, pte); + spin_lock(&vcpu->arch.mmu_lock); + + /* pte already invalidated in between? */ + if (hlist_unhashed(&pte->list_pte)) { + spin_unlock(&vcpu->arch.mmu_lock); + return; + } + + hlist_del_init_rcu(&pte->list_pte); + hlist_del_init_rcu(&pte->list_pte_long); + hlist_del_init_rcu(&pte->list_vpte); + hlist_del_init_rcu(&pte->list_vpte_long); + if (pte->pte.may_write) kvm_release_pfn_dirty(pte->pfn); else kvm_release_pfn_clean(pte->pfn); - hlist_del(&pte->list_pte); - hlist_del(&pte->list_vpte); - hlist_del(&pte->list_vpte_long); + spin_unlock(&vcpu->arch.mmu_lock); vcpu->arch.hpte_cache_count--; - kmem_cache_free(hpte_cache, pte); + call_rcu(&pte->rcu_head, free_pte_rcu); } static void kvmppc_mmu_pte_flush_all(struct kvm_vcpu *vcpu) { struct hpte_cache *pte; - struct hlist_node *node, *tmp; + struct hlist_node *node; int i; + rcu_read_lock(); + for (i = 0; i < HPTEG_HASH_NUM_VPTE_LONG; i++) { struct hlist_head *list = &vcpu->arch.hpte_hash_vpte_long[i]; - hlist_for_each_entry_safe(pte, node, tmp, list, list_vpte_long) + hlist_for_each_entry_rcu(pte, node, list, list_vpte_long) invalidate_pte(vcpu, pte); } + + rcu_read_unlock(); } static void kvmppc_mmu_pte_flush_page(struct kvm_vcpu *vcpu, ulong guest_ea) { struct hlist_head *list; - struct hlist_node *node, *tmp; + struct hlist_node *node; struct hpte_cache *pte; /* Find the list of entries in the map */ list = &vcpu->arch.hpte_hash_pte[kvmppc_mmu_hash_pte(guest_ea)]; + rcu_read_lock(); + /* Check the list for matching entries and invalidate */ - hlist_for_each_entry_safe(pte, node, tmp, list, list_pte) + hlist_for_each_entry_rcu(pte, node, list, list_pte) if ((pte->pte.eaddr & ~0xfffUL) == guest_ea) invalidate_pte(vcpu, pte); + + rcu_read_unlock(); } -void kvmppc_mmu_pte_flush(struct kvm_vcpu *vcpu, ulong guest_ea, ulong ea_mask) +static void kvmppc_mmu_pte_flush_long(struct kvm_vcpu *vcpu, ulong guest_ea) { - u64 i; + struct hlist_head *list; + struct hlist_node *node; + struct hpte_cache *pte; - dprintk_mmu("KVM: Flushing %d Shadow PTEs: 0x%lx & 0x%lx\n", - vcpu->arch.hpte_cache_count, guest_ea, ea_mask); + /* Find the list of entries in the map */ + list = &vcpu->arch.hpte_hash_pte_long[ + kvmppc_mmu_hash_pte_long(guest_ea)]; + rcu_read_lock(); + + /* Check the list for matching entries and invalidate */ + hlist_for_each_entry_rcu(pte, node, list, list_pte_long) + if ((pte->pte.eaddr & 0x0ffff000UL) == guest_ea) + invalidate_pte(vcpu, pte); + + rcu_read_unlock(); +} + +void kvmppc_mmu_pte_flush(struct kvm_vcpu *vcpu, ulong guest_ea, ulong ea_mask) +{ + trace_kvm_book3s_mmu_flush("", vcpu, guest_ea, ea_mask); guest_ea &= ea_mask; switch (ea_mask) { @@ -138,9 +188,7 @@ void kvmppc_mmu_pte_flush(struct kvm_vcpu *vcpu, ulong guest_ea, ulong ea_mask) kvmppc_mmu_pte_flush_page(vcpu, guest_ea); break; case 0x0ffff000: - /* 32-bit flush w/o segment, go through all possible segments */ - for (i = 0; i < 0x100000000ULL; i += 0x10000000ULL) - kvmppc_mmu_pte_flush(vcpu, guest_ea | i, ~0xfffUL); + kvmppc_mmu_pte_flush_long(vcpu, guest_ea); break; case 0: /* Doing a complete flush -> start from scratch */ @@ -156,39 +204,46 @@ void kvmppc_mmu_pte_flush(struct kvm_vcpu *vcpu, ulong guest_ea, ulong ea_mask) static void kvmppc_mmu_pte_vflush_short(struct kvm_vcpu *vcpu, u64 guest_vp) { struct hlist_head *list; - struct hlist_node *node, *tmp; + struct hlist_node *node; struct hpte_cache *pte; u64 vp_mask = 0xfffffffffULL; list = &vcpu->arch.hpte_hash_vpte[kvmppc_mmu_hash_vpte(guest_vp)]; + rcu_read_lock(); + /* Check the list for matching entries and invalidate */ - hlist_for_each_entry_safe(pte, node, tmp, list, list_vpte) + hlist_for_each_entry_rcu(pte, node, list, list_vpte) if ((pte->pte.vpage & vp_mask) == guest_vp) invalidate_pte(vcpu, pte); + + rcu_read_unlock(); } /* Flush with mask 0xffffff000 */ static void kvmppc_mmu_pte_vflush_long(struct kvm_vcpu *vcpu, u64 guest_vp) { struct hlist_head *list; - struct hlist_node *node, *tmp; + struct hlist_node *node; struct hpte_cache *pte; u64 vp_mask = 0xffffff000ULL; list = &vcpu->arch.hpte_hash_vpte_long[ kvmppc_mmu_hash_vpte_long(guest_vp)]; + rcu_read_lock(); + /* Check the list for matching entries and invalidate */ - hlist_for_each_entry_safe(pte, node, tmp, list, list_vpte_long) + hlist_for_each_entry_rcu(pte, node, list, list_vpte_long) if ((pte->pte.vpage & vp_mask) == guest_vp) invalidate_pte(vcpu, pte); + + rcu_read_unlock(); } void kvmppc_mmu_pte_vflush(struct kvm_vcpu *vcpu, u64 guest_vp, u64 vp_mask) { - dprintk_mmu("KVM: Flushing %d Shadow vPTEs: 0x%llx & 0x%llx\n", - vcpu->arch.hpte_cache_count, guest_vp, vp_mask); + trace_kvm_book3s_mmu_flush("v", vcpu, guest_vp, vp_mask); guest_vp &= vp_mask; switch(vp_mask) { @@ -206,21 +261,24 @@ void kvmppc_mmu_pte_vflush(struct kvm_vcpu *vcpu, u64 guest_vp, u64 vp_mask) void kvmppc_mmu_pte_pflush(struct kvm_vcpu *vcpu, ulong pa_start, ulong pa_end) { - struct hlist_node *node, *tmp; + struct hlist_node *node; struct hpte_cache *pte; int i; - dprintk_mmu("KVM: Flushing %d Shadow pPTEs: 0x%lx - 0x%lx\n", - vcpu->arch.hpte_cache_count, pa_start, pa_end); + trace_kvm_book3s_mmu_flush("p", vcpu, pa_start, pa_end); + + rcu_read_lock(); for (i = 0; i < HPTEG_HASH_NUM_VPTE_LONG; i++) { struct hlist_head *list = &vcpu->arch.hpte_hash_vpte_long[i]; - hlist_for_each_entry_safe(pte, node, tmp, list, list_vpte_long) + hlist_for_each_entry_rcu(pte, node, list, list_vpte_long) if ((pte->pte.raddr >= pa_start) && (pte->pte.raddr < pa_end)) invalidate_pte(vcpu, pte); } + + rcu_read_unlock(); } struct hpte_cache *kvmppc_mmu_hpte_cache_next(struct kvm_vcpu *vcpu) @@ -254,11 +312,15 @@ int kvmppc_mmu_hpte_init(struct kvm_vcpu *vcpu) /* init hpte lookup hashes */ kvmppc_mmu_hpte_init_hash(vcpu->arch.hpte_hash_pte, ARRAY_SIZE(vcpu->arch.hpte_hash_pte)); + kvmppc_mmu_hpte_init_hash(vcpu->arch.hpte_hash_pte_long, + ARRAY_SIZE(vcpu->arch.hpte_hash_pte_long)); kvmppc_mmu_hpte_init_hash(vcpu->arch.hpte_hash_vpte, ARRAY_SIZE(vcpu->arch.hpte_hash_vpte)); kvmppc_mmu_hpte_init_hash(vcpu->arch.hpte_hash_vpte_long, ARRAY_SIZE(vcpu->arch.hpte_hash_vpte_long)); + spin_lock_init(&vcpu->arch.mmu_lock); + return 0; } diff --git a/arch/powerpc/kvm/book3s_paired_singles.c b/arch/powerpc/kvm/book3s_paired_singles.c index 474f2e24050..7b0ee96c1be 100644 --- a/arch/powerpc/kvm/book3s_paired_singles.c +++ b/arch/powerpc/kvm/book3s_paired_singles.c @@ -159,20 +159,21 @@ static inline void kvmppc_sync_qpr(struct kvm_vcpu *vcpu, int rt) { - kvm_cvt_df(&vcpu->arch.fpr[rt], &vcpu->arch.qpr[rt], &vcpu->arch.fpscr); + kvm_cvt_df(&vcpu->arch.fpr[rt], &vcpu->arch.qpr[rt]); } static void kvmppc_inject_pf(struct kvm_vcpu *vcpu, ulong eaddr, bool is_store) { u64 dsisr; + struct kvm_vcpu_arch_shared *shared = vcpu->arch.shared; - vcpu->arch.msr = kvmppc_set_field(vcpu->arch.msr, 33, 36, 0); - vcpu->arch.msr = kvmppc_set_field(vcpu->arch.msr, 42, 47, 0); - vcpu->arch.dear = eaddr; + shared->msr = kvmppc_set_field(shared->msr, 33, 36, 0); + shared->msr = kvmppc_set_field(shared->msr, 42, 47, 0); + shared->dar = eaddr; /* Page Fault */ dsisr = kvmppc_set_field(0, 33, 33, 1); if (is_store) - to_book3s(vcpu)->dsisr = kvmppc_set_field(dsisr, 38, 38, 1); + shared->dsisr = kvmppc_set_field(dsisr, 38, 38, 1); kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_DATA_STORAGE); } @@ -204,7 +205,7 @@ static int kvmppc_emulate_fpr_load(struct kvm_run *run, struct kvm_vcpu *vcpu, /* put in registers */ switch (ls_type) { case FPU_LS_SINGLE: - kvm_cvt_fd((u32*)tmp, &vcpu->arch.fpr[rs], &vcpu->arch.fpscr); + kvm_cvt_fd((u32*)tmp, &vcpu->arch.fpr[rs]); vcpu->arch.qpr[rs] = *((u32*)tmp); break; case FPU_LS_DOUBLE: @@ -230,7 +231,7 @@ static int kvmppc_emulate_fpr_store(struct kvm_run *run, struct kvm_vcpu *vcpu, switch (ls_type) { case FPU_LS_SINGLE: - kvm_cvt_df(&vcpu->arch.fpr[rs], (u32*)tmp, &vcpu->arch.fpscr); + kvm_cvt_df(&vcpu->arch.fpr[rs], (u32*)tmp); val = *((u32*)tmp); len = sizeof(u32); break; @@ -296,7 +297,7 @@ static int kvmppc_emulate_psq_load(struct kvm_run *run, struct kvm_vcpu *vcpu, emulated = EMULATE_DONE; /* put in registers */ - kvm_cvt_fd(&tmp[0], &vcpu->arch.fpr[rs], &vcpu->arch.fpscr); + kvm_cvt_fd(&tmp[0], &vcpu->arch.fpr[rs]); vcpu->arch.qpr[rs] = tmp[1]; dprintk(KERN_INFO "KVM: PSQ_LD [0x%x, 0x%x] at 0x%lx (%d)\n", tmp[0], @@ -314,7 +315,7 @@ static int kvmppc_emulate_psq_store(struct kvm_run *run, struct kvm_vcpu *vcpu, u32 tmp[2]; int len = w ? sizeof(u32) : sizeof(u64); - kvm_cvt_df(&vcpu->arch.fpr[rs], &tmp[0], &vcpu->arch.fpscr); + kvm_cvt_df(&vcpu->arch.fpr[rs], &tmp[0]); tmp[1] = vcpu->arch.qpr[rs]; r = kvmppc_st(vcpu, &addr, len, tmp, true); @@ -516,9 +517,9 @@ static int kvmppc_ps_three_in(struct kvm_vcpu *vcpu, bool rc, WARN_ON(rc); /* PS0 */ - kvm_cvt_df(&fpr[reg_in1], &ps0_in1, &vcpu->arch.fpscr); - kvm_cvt_df(&fpr[reg_in2], &ps0_in2, &vcpu->arch.fpscr); - kvm_cvt_df(&fpr[reg_in3], &ps0_in3, &vcpu->arch.fpscr); + kvm_cvt_df(&fpr[reg_in1], &ps0_in1); + kvm_cvt_df(&fpr[reg_in2], &ps0_in2); + kvm_cvt_df(&fpr[reg_in3], &ps0_in3); if (scalar & SCALAR_LOW) ps0_in2 = qpr[reg_in2]; @@ -529,7 +530,7 @@ static int kvmppc_ps_three_in(struct kvm_vcpu *vcpu, bool rc, ps0_in1, ps0_in2, ps0_in3, ps0_out); if (!(scalar & SCALAR_NO_PS0)) - kvm_cvt_fd(&ps0_out, &fpr[reg_out], &vcpu->arch.fpscr); + kvm_cvt_fd(&ps0_out, &fpr[reg_out]); /* PS1 */ ps1_in1 = qpr[reg_in1]; @@ -566,12 +567,12 @@ static int kvmppc_ps_two_in(struct kvm_vcpu *vcpu, bool rc, WARN_ON(rc); /* PS0 */ - kvm_cvt_df(&fpr[reg_in1], &ps0_in1, &vcpu->arch.fpscr); + kvm_cvt_df(&fpr[reg_in1], &ps0_in1); if (scalar & SCALAR_LOW) ps0_in2 = qpr[reg_in2]; else - kvm_cvt_df(&fpr[reg_in2], &ps0_in2, &vcpu->arch.fpscr); + kvm_cvt_df(&fpr[reg_in2], &ps0_in2); func(&vcpu->arch.fpscr, &ps0_out, &ps0_in1, &ps0_in2); @@ -579,7 +580,7 @@ static int kvmppc_ps_two_in(struct kvm_vcpu *vcpu, bool rc, dprintk(KERN_INFO "PS2 ps0 -> f(0x%x, 0x%x) = 0x%x\n", ps0_in1, ps0_in2, ps0_out); - kvm_cvt_fd(&ps0_out, &fpr[reg_out], &vcpu->arch.fpscr); + kvm_cvt_fd(&ps0_out, &fpr[reg_out]); } /* PS1 */ @@ -615,13 +616,13 @@ static int kvmppc_ps_one_in(struct kvm_vcpu *vcpu, bool rc, WARN_ON(rc); /* PS0 */ - kvm_cvt_df(&fpr[reg_in], &ps0_in, &vcpu->arch.fpscr); + kvm_cvt_df(&fpr[reg_in], &ps0_in); func(&vcpu->arch.fpscr, &ps0_out, &ps0_in); dprintk(KERN_INFO "PS1 ps0 -> f(0x%x) = 0x%x\n", ps0_in, ps0_out); - kvm_cvt_fd(&ps0_out, &fpr[reg_out], &vcpu->arch.fpscr); + kvm_cvt_fd(&ps0_out, &fpr[reg_out]); /* PS1 */ ps1_in = qpr[reg_in]; @@ -658,7 +659,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu) if (!kvmppc_inst_is_paired_single(vcpu, inst)) return EMULATE_FAIL; - if (!(vcpu->arch.msr & MSR_FP)) { + if (!(vcpu->arch.shared->msr & MSR_FP)) { kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL); return EMULATE_AGAIN; } @@ -671,7 +672,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu) #ifdef DEBUG for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++) { u32 f; - kvm_cvt_df(&vcpu->arch.fpr[i], &f, &vcpu->arch.fpscr); + kvm_cvt_df(&vcpu->arch.fpr[i], &f); dprintk(KERN_INFO "FPR[%d] = 0x%x / 0x%llx QPR[%d] = 0x%x\n", i, f, vcpu->arch.fpr[i], i, vcpu->arch.qpr[i]); } @@ -796,8 +797,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu) vcpu->arch.fpr[ax_rd] = vcpu->arch.fpr[ax_ra]; /* vcpu->arch.qpr[ax_rd] = vcpu->arch.fpr[ax_rb]; */ kvm_cvt_df(&vcpu->arch.fpr[ax_rb], - &vcpu->arch.qpr[ax_rd], - &vcpu->arch.fpscr); + &vcpu->arch.qpr[ax_rd]); break; case OP_4X_PS_MERGE01: WARN_ON(rcomp); @@ -808,19 +808,16 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu) WARN_ON(rcomp); /* vcpu->arch.fpr[ax_rd] = vcpu->arch.qpr[ax_ra]; */ kvm_cvt_fd(&vcpu->arch.qpr[ax_ra], - &vcpu->arch.fpr[ax_rd], - &vcpu->arch.fpscr); + &vcpu->arch.fpr[ax_rd]); /* vcpu->arch.qpr[ax_rd] = vcpu->arch.fpr[ax_rb]; */ kvm_cvt_df(&vcpu->arch.fpr[ax_rb], - &vcpu->arch.qpr[ax_rd], - &vcpu->arch.fpscr); + &vcpu->arch.qpr[ax_rd]); break; case OP_4X_PS_MERGE11: WARN_ON(rcomp); /* vcpu->arch.fpr[ax_rd] = vcpu->arch.qpr[ax_ra]; */ kvm_cvt_fd(&vcpu->arch.qpr[ax_ra], - &vcpu->arch.fpr[ax_rd], - &vcpu->arch.fpscr); + &vcpu->arch.fpr[ax_rd]); vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rb]; break; } @@ -1255,7 +1252,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu) #ifdef DEBUG for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++) { u32 f; - kvm_cvt_df(&vcpu->arch.fpr[i], &f, &vcpu->arch.fpscr); + kvm_cvt_df(&vcpu->arch.fpr[i], &f); dprintk(KERN_INFO "FPR[%d] = 0x%x\n", i, f); } #endif diff --git a/arch/powerpc/kvm/book3s_rmhandlers.S b/arch/powerpc/kvm/book3s_rmhandlers.S index 506d5c316c9..2b9c9088d00 100644 --- a/arch/powerpc/kvm/book3s_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_rmhandlers.S @@ -202,8 +202,25 @@ _GLOBAL(kvmppc_rmcall) #if defined(CONFIG_PPC_BOOK3S_32) #define STACK_LR INT_FRAME_SIZE+4 + +/* load_up_xxx have to run with MSR_DR=0 on Book3S_32 */ +#define MSR_EXT_START \ + PPC_STL r20, _NIP(r1); \ + mfmsr r20; \ + LOAD_REG_IMMEDIATE(r3, MSR_DR|MSR_EE); \ + andc r3,r20,r3; /* Disable DR,EE */ \ + mtmsr r3; \ + sync + +#define MSR_EXT_END \ + mtmsr r20; /* Enable DR,EE */ \ + sync; \ + PPC_LL r20, _NIP(r1) + #elif defined(CONFIG_PPC_BOOK3S_64) #define STACK_LR _LINK +#define MSR_EXT_START +#define MSR_EXT_END #endif /* @@ -215,19 +232,12 @@ _GLOBAL(kvmppc_load_up_ ## what); \ PPC_STLU r1, -INT_FRAME_SIZE(r1); \ mflr r3; \ PPC_STL r3, STACK_LR(r1); \ - PPC_STL r20, _NIP(r1); \ - mfmsr r20; \ - LOAD_REG_IMMEDIATE(r3, MSR_DR|MSR_EE); \ - andc r3,r20,r3; /* Disable DR,EE */ \ - mtmsr r3; \ - sync; \ + MSR_EXT_START; \ \ bl FUNC(load_up_ ## what); \ \ - mtmsr r20; /* Enable DR,EE */ \ - sync; \ + MSR_EXT_END; \ PPC_LL r3, STACK_LR(r1); \ - PPC_LL r20, _NIP(r1); \ mtlr r3; \ addi r1, r1, INT_FRAME_SIZE; \ blr @@ -242,10 +252,10 @@ define_load_up(vsx) .global kvmppc_trampoline_lowmem kvmppc_trampoline_lowmem: - .long kvmppc_handler_lowmem_trampoline - CONFIG_KERNEL_START + PPC_LONG kvmppc_handler_lowmem_trampoline - CONFIG_KERNEL_START .global kvmppc_trampoline_enter kvmppc_trampoline_enter: - .long kvmppc_handler_trampoline_enter - CONFIG_KERNEL_START + PPC_LONG kvmppc_handler_trampoline_enter - CONFIG_KERNEL_START #include "book3s_segment.S" diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 8d4e35f5372..77575d08c81 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -62,9 +62,10 @@ void kvmppc_dump_vcpu(struct kvm_vcpu *vcpu) { int i; - printk("pc: %08lx msr: %08lx\n", vcpu->arch.pc, vcpu->arch.msr); + printk("pc: %08lx msr: %08llx\n", vcpu->arch.pc, vcpu->arch.shared->msr); printk("lr: %08lx ctr: %08lx\n", vcpu->arch.lr, vcpu->arch.ctr); - printk("srr0: %08lx srr1: %08lx\n", vcpu->arch.srr0, vcpu->arch.srr1); + printk("srr0: %08llx srr1: %08llx\n", vcpu->arch.shared->srr0, + vcpu->arch.shared->srr1); printk("exceptions: %08lx\n", vcpu->arch.pending_exceptions); @@ -130,13 +131,19 @@ void kvmppc_core_dequeue_dec(struct kvm_vcpu *vcpu) void kvmppc_core_queue_external(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq) { - kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_EXTERNAL); + unsigned int prio = BOOKE_IRQPRIO_EXTERNAL; + + if (irq->irq == KVM_INTERRUPT_SET_LEVEL) + prio = BOOKE_IRQPRIO_EXTERNAL_LEVEL; + + kvmppc_booke_queue_irqprio(vcpu, prio); } void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq) { clear_bit(BOOKE_IRQPRIO_EXTERNAL, &vcpu->arch.pending_exceptions); + clear_bit(BOOKE_IRQPRIO_EXTERNAL_LEVEL, &vcpu->arch.pending_exceptions); } /* Deliver the interrupt of the corresponding priority, if possible. */ @@ -146,6 +153,26 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, int allowed = 0; ulong uninitialized_var(msr_mask); bool update_esr = false, update_dear = false; + ulong crit_raw = vcpu->arch.shared->critical; + ulong crit_r1 = kvmppc_get_gpr(vcpu, 1); + bool crit; + bool keep_irq = false; + + /* Truncate crit indicators in 32 bit mode */ + if (!(vcpu->arch.shared->msr & MSR_SF)) { + crit_raw &= 0xffffffff; + crit_r1 &= 0xffffffff; + } + + /* Critical section when crit == r1 */ + crit = (crit_raw == crit_r1); + /* ... and we're in supervisor mode */ + crit = crit && !(vcpu->arch.shared->msr & MSR_PR); + + if (priority == BOOKE_IRQPRIO_EXTERNAL_LEVEL) { + priority = BOOKE_IRQPRIO_EXTERNAL; + keep_irq = true; + } switch (priority) { case BOOKE_IRQPRIO_DTLB_MISS: @@ -169,36 +196,38 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, break; case BOOKE_IRQPRIO_CRITICAL: case BOOKE_IRQPRIO_WATCHDOG: - allowed = vcpu->arch.msr & MSR_CE; + allowed = vcpu->arch.shared->msr & MSR_CE; msr_mask = MSR_ME; break; case BOOKE_IRQPRIO_MACHINE_CHECK: - allowed = vcpu->arch.msr & MSR_ME; + allowed = vcpu->arch.shared->msr & MSR_ME; msr_mask = 0; break; case BOOKE_IRQPRIO_EXTERNAL: case BOOKE_IRQPRIO_DECREMENTER: case BOOKE_IRQPRIO_FIT: - allowed = vcpu->arch.msr & MSR_EE; + allowed = vcpu->arch.shared->msr & MSR_EE; + allowed = allowed && !crit; msr_mask = MSR_CE|MSR_ME|MSR_DE; break; case BOOKE_IRQPRIO_DEBUG: - allowed = vcpu->arch.msr & MSR_DE; + allowed = vcpu->arch.shared->msr & MSR_DE; msr_mask = MSR_ME; break; } if (allowed) { - vcpu->arch.srr0 = vcpu->arch.pc; - vcpu->arch.srr1 = vcpu->arch.msr; + vcpu->arch.shared->srr0 = vcpu->arch.pc; + vcpu->arch.shared->srr1 = vcpu->arch.shared->msr; vcpu->arch.pc = vcpu->arch.ivpr | vcpu->arch.ivor[priority]; if (update_esr == true) vcpu->arch.esr = vcpu->arch.queued_esr; if (update_dear == true) - vcpu->arch.dear = vcpu->arch.queued_dear; - kvmppc_set_msr(vcpu, vcpu->arch.msr & msr_mask); + vcpu->arch.shared->dar = vcpu->arch.queued_dear; + kvmppc_set_msr(vcpu, vcpu->arch.shared->msr & msr_mask); - clear_bit(priority, &vcpu->arch.pending_exceptions); + if (!keep_irq) + clear_bit(priority, &vcpu->arch.pending_exceptions); } return allowed; @@ -208,6 +237,7 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu) { unsigned long *pending = &vcpu->arch.pending_exceptions; + unsigned long old_pending = vcpu->arch.pending_exceptions; unsigned int priority; priority = __ffs(*pending); @@ -219,6 +249,12 @@ void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu) BITS_PER_BYTE * sizeof(*pending), priority + 1); } + + /* Tell the guest about our interrupt status */ + if (*pending) + vcpu->arch.shared->int_pending = 1; + else if (old_pending) + vcpu->arch.shared->int_pending = 0; } /** @@ -265,7 +301,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, break; case BOOKE_INTERRUPT_PROGRAM: - if (vcpu->arch.msr & MSR_PR) { + if (vcpu->arch.shared->msr & MSR_PR) { /* Program traps generated by user-level software must be handled * by the guest kernel. */ kvmppc_core_queue_program(vcpu, vcpu->arch.fault_esr); @@ -337,7 +373,15 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, break; case BOOKE_INTERRUPT_SYSCALL: - kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_SYSCALL); + if (!(vcpu->arch.shared->msr & MSR_PR) && + (((u32)kvmppc_get_gpr(vcpu, 0)) == KVM_SC_MAGIC_R0)) { + /* KVM PV hypercalls */ + kvmppc_set_gpr(vcpu, 3, kvmppc_kvm_pv(vcpu)); + r = RESUME_GUEST; + } else { + /* Guest syscalls */ + kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_SYSCALL); + } kvmppc_account_exit(vcpu, SYSCALL_EXITS); r = RESUME_GUEST; break; @@ -466,15 +510,19 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, /* Initial guest state: 16MB mapping 0 -> 0, PC = 0, MSR = 0, R1 = 16MB */ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) { + int i; + vcpu->arch.pc = 0; - vcpu->arch.msr = 0; + vcpu->arch.shared->msr = 0; kvmppc_set_gpr(vcpu, 1, (16<<20) - 8); /* -8 for the callee-save LR slot */ vcpu->arch.shadow_pid = 1; - /* Eye-catching number so we know if the guest takes an interrupt - * before it's programmed its own IVPR. */ + /* Eye-catching numbers so we know if the guest takes an interrupt + * before it's programmed its own IVPR/IVORs. */ vcpu->arch.ivpr = 0x55550000; + for (i = 0; i < BOOKE_IRQPRIO_MAX; i++) + vcpu->arch.ivor[i] = 0x7700 | i * 4; kvmppc_init_timing_stats(vcpu); @@ -490,14 +538,14 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) regs->ctr = vcpu->arch.ctr; regs->lr = vcpu->arch.lr; regs->xer = kvmppc_get_xer(vcpu); - regs->msr = vcpu->arch.msr; - regs->srr0 = vcpu->arch.srr0; - regs->srr1 = vcpu->arch.srr1; + regs->msr = vcpu->arch.shared->msr; + regs->srr0 = vcpu->arch.shared->srr0; + regs->srr1 = vcpu->arch.shared->srr1; regs->pid = vcpu->arch.pid; - regs->sprg0 = vcpu->arch.sprg0; - regs->sprg1 = vcpu->arch.sprg1; - regs->sprg2 = vcpu->arch.sprg2; - regs->sprg3 = vcpu->arch.sprg3; + regs->sprg0 = vcpu->arch.shared->sprg0; + regs->sprg1 = vcpu->arch.shared->sprg1; + regs->sprg2 = vcpu->arch.shared->sprg2; + regs->sprg3 = vcpu->arch.shared->sprg3; regs->sprg5 = vcpu->arch.sprg4; regs->sprg6 = vcpu->arch.sprg5; regs->sprg7 = vcpu->arch.sprg6; @@ -518,12 +566,12 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) vcpu->arch.lr = regs->lr; kvmppc_set_xer(vcpu, regs->xer); kvmppc_set_msr(vcpu, regs->msr); - vcpu->arch.srr0 = regs->srr0; - vcpu->arch.srr1 = regs->srr1; - vcpu->arch.sprg0 = regs->sprg0; - vcpu->arch.sprg1 = regs->sprg1; - vcpu->arch.sprg2 = regs->sprg2; - vcpu->arch.sprg3 = regs->sprg3; + vcpu->arch.shared->srr0 = regs->srr0; + vcpu->arch.shared->srr1 = regs->srr1; + vcpu->arch.shared->sprg0 = regs->sprg0; + vcpu->arch.shared->sprg1 = regs->sprg1; + vcpu->arch.shared->sprg2 = regs->sprg2; + vcpu->arch.shared->sprg3 = regs->sprg3; vcpu->arch.sprg5 = regs->sprg4; vcpu->arch.sprg6 = regs->sprg5; vcpu->arch.sprg7 = regs->sprg6; diff --git a/arch/powerpc/kvm/booke.h b/arch/powerpc/kvm/booke.h index d59bcca1f9d..492bb703035 100644 --- a/arch/powerpc/kvm/booke.h +++ b/arch/powerpc/kvm/booke.h @@ -46,7 +46,9 @@ #define BOOKE_IRQPRIO_FIT 17 #define BOOKE_IRQPRIO_DECREMENTER 18 #define BOOKE_IRQPRIO_PERFORMANCE_MONITOR 19 -#define BOOKE_IRQPRIO_MAX 19 +/* Internal pseudo-irqprio for level triggered externals */ +#define BOOKE_IRQPRIO_EXTERNAL_LEVEL 20 +#define BOOKE_IRQPRIO_MAX 20 extern unsigned long kvmppc_booke_handlers; @@ -54,12 +56,12 @@ extern unsigned long kvmppc_booke_handlers; * changing. */ static inline void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr) { - if ((new_msr & MSR_PR) != (vcpu->arch.msr & MSR_PR)) + if ((new_msr & MSR_PR) != (vcpu->arch.shared->msr & MSR_PR)) kvmppc_mmu_priv_switch(vcpu, new_msr & MSR_PR); - vcpu->arch.msr = new_msr; + vcpu->arch.shared->msr = new_msr; - if (vcpu->arch.msr & MSR_WE) { + if (vcpu->arch.shared->msr & MSR_WE) { kvm_vcpu_block(vcpu); kvmppc_set_exit_type(vcpu, EMULATED_MTMSRWE_EXITS); }; diff --git a/arch/powerpc/kvm/booke_emulate.c b/arch/powerpc/kvm/booke_emulate.c index cbc790ee192..1260f5f24c0 100644 --- a/arch/powerpc/kvm/booke_emulate.c +++ b/arch/powerpc/kvm/booke_emulate.c @@ -31,8 +31,8 @@ static void kvmppc_emul_rfi(struct kvm_vcpu *vcpu) { - vcpu->arch.pc = vcpu->arch.srr0; - kvmppc_set_msr(vcpu, vcpu->arch.srr1); + vcpu->arch.pc = vcpu->arch.shared->srr0; + kvmppc_set_msr(vcpu, vcpu->arch.shared->srr1); } int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, @@ -62,7 +62,7 @@ int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, case OP_31_XOP_MFMSR: rt = get_rt(inst); - kvmppc_set_gpr(vcpu, rt, vcpu->arch.msr); + kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->msr); kvmppc_set_exit_type(vcpu, EMULATED_MFMSR_EXITS); break; @@ -74,13 +74,13 @@ int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, case OP_31_XOP_WRTEE: rs = get_rs(inst); - vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE) + vcpu->arch.shared->msr = (vcpu->arch.shared->msr & ~MSR_EE) | (kvmppc_get_gpr(vcpu, rs) & MSR_EE); kvmppc_set_exit_type(vcpu, EMULATED_WRTEE_EXITS); break; case OP_31_XOP_WRTEEI: - vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE) + vcpu->arch.shared->msr = (vcpu->arch.shared->msr & ~MSR_EE) | (inst & MSR_EE); kvmppc_set_exit_type(vcpu, EMULATED_WRTEE_EXITS); break; @@ -105,7 +105,7 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) switch (sprn) { case SPRN_DEAR: - vcpu->arch.dear = spr_val; break; + vcpu->arch.shared->dar = spr_val; break; case SPRN_ESR: vcpu->arch.esr = spr_val; break; case SPRN_DBCR0: @@ -200,7 +200,7 @@ int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) case SPRN_IVPR: kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivpr); break; case SPRN_DEAR: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.dear); break; + kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->dar); break; case SPRN_ESR: kvmppc_set_gpr(vcpu, rt, vcpu->arch.esr); break; case SPRN_DBCR0: diff --git a/arch/powerpc/kvm/booke_interrupts.S b/arch/powerpc/kvm/booke_interrupts.S index 380a78cf484..049846911ce 100644 --- a/arch/powerpc/kvm/booke_interrupts.S +++ b/arch/powerpc/kvm/booke_interrupts.S @@ -415,7 +415,8 @@ lightweight_exit: lwz r8, VCPU_GPR(r8)(r4) lwz r3, VCPU_PC(r4) mtsrr0 r3 - lwz r3, VCPU_MSR(r4) + lwz r3, VCPU_SHARED(r4) + lwz r3, VCPU_SHARED_MSR(r3) oris r3, r3, KVMPPC_MSR_MASK@h ori r3, r3, KVMPPC_MSR_MASK@l mtsrr1 r3 diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c index e8a00b0c444..71750f2dd5d 100644 --- a/arch/powerpc/kvm/e500.c +++ b/arch/powerpc/kvm/e500.c @@ -117,8 +117,14 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) if (err) goto uninit_vcpu; + vcpu->arch.shared = (void*)__get_free_page(GFP_KERNEL|__GFP_ZERO); + if (!vcpu->arch.shared) + goto uninit_tlb; + return vcpu; +uninit_tlb: + kvmppc_e500_tlb_uninit(vcpu_e500); uninit_vcpu: kvm_vcpu_uninit(vcpu); free_vcpu: @@ -131,6 +137,7 @@ void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) { struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + free_page((unsigned long)vcpu->arch.shared); kvmppc_e500_tlb_uninit(vcpu_e500); kvm_vcpu_uninit(vcpu); kmem_cache_free(kvm_vcpu_cache, vcpu_e500); diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c index 21011e12cae..d6d6d47a75a 100644 --- a/arch/powerpc/kvm/e500_tlb.c +++ b/arch/powerpc/kvm/e500_tlb.c @@ -226,8 +226,7 @@ static void kvmppc_e500_stlbe_invalidate(struct kvmppc_vcpu_e500 *vcpu_e500, kvmppc_e500_shadow_release(vcpu_e500, tlbsel, esel); stlbe->mas1 = 0; - trace_kvm_stlb_inval(index_of(tlbsel, esel), stlbe->mas1, stlbe->mas2, - stlbe->mas3, stlbe->mas7); + trace_kvm_stlb_inval(index_of(tlbsel, esel)); } static void kvmppc_e500_tlb1_invalidate(struct kvmppc_vcpu_e500 *vcpu_e500, @@ -298,7 +297,8 @@ static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, /* Get reference to new page. */ new_page = gfn_to_page(vcpu_e500->vcpu.kvm, gfn); if (is_error_page(new_page)) { - printk(KERN_ERR "Couldn't get guest page for gfn %lx!\n", gfn); + printk(KERN_ERR "Couldn't get guest page for gfn %lx!\n", + (long)gfn); kvm_release_page_clean(new_page); return; } @@ -314,10 +314,10 @@ static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, | MAS1_TID(get_tlb_tid(gtlbe)) | MAS1_TS | MAS1_VALID; stlbe->mas2 = (gvaddr & MAS2_EPN) | e500_shadow_mas2_attrib(gtlbe->mas2, - vcpu_e500->vcpu.arch.msr & MSR_PR); + vcpu_e500->vcpu.arch.shared->msr & MSR_PR); stlbe->mas3 = (hpaddr & MAS3_RPN) | e500_shadow_mas3_attrib(gtlbe->mas3, - vcpu_e500->vcpu.arch.msr & MSR_PR); + vcpu_e500->vcpu.arch.shared->msr & MSR_PR); stlbe->mas7 = (hpaddr >> 32) & MAS7_RPN; trace_kvm_stlb_write(index_of(tlbsel, esel), stlbe->mas1, stlbe->mas2, @@ -576,28 +576,28 @@ int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu) int kvmppc_mmu_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr) { - unsigned int as = !!(vcpu->arch.msr & MSR_IS); + unsigned int as = !!(vcpu->arch.shared->msr & MSR_IS); return kvmppc_e500_tlb_search(vcpu, eaddr, get_cur_pid(vcpu), as); } int kvmppc_mmu_dtlb_index(struct kvm_vcpu *vcpu, gva_t eaddr) { - unsigned int as = !!(vcpu->arch.msr & MSR_DS); + unsigned int as = !!(vcpu->arch.shared->msr & MSR_DS); return kvmppc_e500_tlb_search(vcpu, eaddr, get_cur_pid(vcpu), as); } void kvmppc_mmu_itlb_miss(struct kvm_vcpu *vcpu) { - unsigned int as = !!(vcpu->arch.msr & MSR_IS); + unsigned int as = !!(vcpu->arch.shared->msr & MSR_IS); kvmppc_e500_deliver_tlb_miss(vcpu, vcpu->arch.pc, as); } void kvmppc_mmu_dtlb_miss(struct kvm_vcpu *vcpu) { - unsigned int as = !!(vcpu->arch.msr & MSR_DS); + unsigned int as = !!(vcpu->arch.shared->msr & MSR_DS); kvmppc_e500_deliver_tlb_miss(vcpu, vcpu->arch.fault_dear, as); } diff --git a/arch/powerpc/kvm/e500_tlb.h b/arch/powerpc/kvm/e500_tlb.h index d28e3010a5e..458946b4775 100644 --- a/arch/powerpc/kvm/e500_tlb.h +++ b/arch/powerpc/kvm/e500_tlb.h @@ -171,7 +171,7 @@ static inline int tlbe_is_host_safe(const struct kvm_vcpu *vcpu, /* Does it match current guest AS? */ /* XXX what about IS != DS? */ - if (get_tlb_ts(tlbe) != !!(vcpu->arch.msr & MSR_IS)) + if (get_tlb_ts(tlbe) != !!(vcpu->arch.shared->msr & MSR_IS)) return 0; gpa = get_tlb_raddr(tlbe); diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c index 4568ec386c2..c64fd2909bb 100644 --- a/arch/powerpc/kvm/emulate.c +++ b/arch/powerpc/kvm/emulate.c @@ -145,7 +145,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) /* this default type might be overwritten by subcategories */ kvmppc_set_exit_type(vcpu, EMULATED_INST_EXITS); - pr_debug(KERN_INFO "Emulating opcode %d / %d\n", get_op(inst), get_xop(inst)); + pr_debug("Emulating opcode %d / %d\n", get_op(inst), get_xop(inst)); switch (get_op(inst)) { case OP_TRAP: @@ -242,9 +242,11 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) switch (sprn) { case SPRN_SRR0: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.srr0); break; + kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->srr0); + break; case SPRN_SRR1: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.srr1); break; + kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->srr1); + break; case SPRN_PVR: kvmppc_set_gpr(vcpu, rt, vcpu->arch.pvr); break; case SPRN_PIR: @@ -261,13 +263,17 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) kvmppc_set_gpr(vcpu, rt, get_tb()); break; case SPRN_SPRG0: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.sprg0); break; + kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->sprg0); + break; case SPRN_SPRG1: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.sprg1); break; + kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->sprg1); + break; case SPRN_SPRG2: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.sprg2); break; + kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->sprg2); + break; case SPRN_SPRG3: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.sprg3); break; + kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->sprg3); + break; /* Note: SPRG4-7 are user-readable, so we don't get * a trap. */ @@ -275,7 +281,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) { u64 jd = get_tb() - vcpu->arch.dec_jiffies; kvmppc_set_gpr(vcpu, rt, vcpu->arch.dec - jd); - pr_debug(KERN_INFO "mfDEC: %x - %llx = %lx\n", + pr_debug("mfDEC: %x - %llx = %lx\n", vcpu->arch.dec, jd, kvmppc_get_gpr(vcpu, rt)); break; @@ -320,9 +326,11 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) rs = get_rs(inst); switch (sprn) { case SPRN_SRR0: - vcpu->arch.srr0 = kvmppc_get_gpr(vcpu, rs); break; + vcpu->arch.shared->srr0 = kvmppc_get_gpr(vcpu, rs); + break; case SPRN_SRR1: - vcpu->arch.srr1 = kvmppc_get_gpr(vcpu, rs); break; + vcpu->arch.shared->srr1 = kvmppc_get_gpr(vcpu, rs); + break; /* XXX We need to context-switch the timebase for * watchdog and FIT. */ @@ -337,13 +345,17 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) break; case SPRN_SPRG0: - vcpu->arch.sprg0 = kvmppc_get_gpr(vcpu, rs); break; + vcpu->arch.shared->sprg0 = kvmppc_get_gpr(vcpu, rs); + break; case SPRN_SPRG1: - vcpu->arch.sprg1 = kvmppc_get_gpr(vcpu, rs); break; + vcpu->arch.shared->sprg1 = kvmppc_get_gpr(vcpu, rs); + break; case SPRN_SPRG2: - vcpu->arch.sprg2 = kvmppc_get_gpr(vcpu, rs); break; + vcpu->arch.shared->sprg2 = kvmppc_get_gpr(vcpu, rs); + break; case SPRN_SPRG3: - vcpu->arch.sprg3 = kvmppc_get_gpr(vcpu, rs); break; + vcpu->arch.shared->sprg3 = kvmppc_get_gpr(vcpu, rs); + break; default: emulated = kvmppc_core_emulate_mtspr(vcpu, sprn, rs); diff --git a/arch/powerpc/kvm/fpu.S b/arch/powerpc/kvm/fpu.S index cb34bbe1611..bf68d597549 100644 --- a/arch/powerpc/kvm/fpu.S +++ b/arch/powerpc/kvm/fpu.S @@ -273,19 +273,11 @@ FPD_THREE_IN(fnmsub) FPD_THREE_IN(fnmadd) _GLOBAL(kvm_cvt_fd) - lfd 0,0(r5) /* load up fpscr value */ - MTFSF_L(0) lfs 0,0(r3) stfd 0,0(r4) - mffs 0 - stfd 0,0(r5) /* save new fpscr value */ blr _GLOBAL(kvm_cvt_df) - lfd 0,0(r5) /* load up fpscr value */ - MTFSF_L(0) lfd 0,0(r3) stfs 0,0(r4) - mffs 0 - stfd 0,0(r5) /* save new fpscr value */ blr diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 72a4ad86ee9..2f87a1627f6 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -38,9 +38,56 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) { - return !(v->arch.msr & MSR_WE) || !!(v->arch.pending_exceptions); + return !(v->arch.shared->msr & MSR_WE) || + !!(v->arch.pending_exceptions); } +int kvmppc_kvm_pv(struct kvm_vcpu *vcpu) +{ + int nr = kvmppc_get_gpr(vcpu, 11); + int r; + unsigned long __maybe_unused param1 = kvmppc_get_gpr(vcpu, 3); + unsigned long __maybe_unused param2 = kvmppc_get_gpr(vcpu, 4); + unsigned long __maybe_unused param3 = kvmppc_get_gpr(vcpu, 5); + unsigned long __maybe_unused param4 = kvmppc_get_gpr(vcpu, 6); + unsigned long r2 = 0; + + if (!(vcpu->arch.shared->msr & MSR_SF)) { + /* 32 bit mode */ + param1 &= 0xffffffff; + param2 &= 0xffffffff; + param3 &= 0xffffffff; + param4 &= 0xffffffff; + } + + switch (nr) { + case HC_VENDOR_KVM | KVM_HC_PPC_MAP_MAGIC_PAGE: + { + vcpu->arch.magic_page_pa = param1; + vcpu->arch.magic_page_ea = param2; + + r2 = KVM_MAGIC_FEAT_SR; + + r = HC_EV_SUCCESS; + break; + } + case HC_VENDOR_KVM | KVM_HC_FEATURES: + r = HC_EV_SUCCESS; +#if defined(CONFIG_PPC_BOOK3S) /* XXX Missing magic page on BookE */ + r2 |= (1 << KVM_FEATURE_MAGIC_PAGE); +#endif + + /* Second return value is in r4 */ + break; + default: + r = HC_EV_UNIMPLEMENTED; + break; + } + + kvmppc_set_gpr(vcpu, 4, r2); + + return r; +} int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu) { @@ -145,8 +192,10 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_PPC_SEGSTATE: case KVM_CAP_PPC_PAIRED_SINGLES: case KVM_CAP_PPC_UNSET_IRQ: + case KVM_CAP_PPC_IRQ_LEVEL: case KVM_CAP_ENABLE_CAP: case KVM_CAP_PPC_OSI: + case KVM_CAP_PPC_GET_PVINFO: r = 1; break; case KVM_CAP_COALESCED_MMIO: @@ -534,16 +583,53 @@ out: return r; } +static int kvm_vm_ioctl_get_pvinfo(struct kvm_ppc_pvinfo *pvinfo) +{ + u32 inst_lis = 0x3c000000; + u32 inst_ori = 0x60000000; + u32 inst_nop = 0x60000000; + u32 inst_sc = 0x44000002; + u32 inst_imm_mask = 0xffff; + + /* + * The hypercall to get into KVM from within guest context is as + * follows: + * + * lis r0, r0, KVM_SC_MAGIC_R0@h + * ori r0, KVM_SC_MAGIC_R0@l + * sc + * nop + */ + pvinfo->hcall[0] = inst_lis | ((KVM_SC_MAGIC_R0 >> 16) & inst_imm_mask); + pvinfo->hcall[1] = inst_ori | (KVM_SC_MAGIC_R0 & inst_imm_mask); + pvinfo->hcall[2] = inst_sc; + pvinfo->hcall[3] = inst_nop; + + return 0; +} + long kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { + void __user *argp = (void __user *)arg; long r; switch (ioctl) { + case KVM_PPC_GET_PVINFO: { + struct kvm_ppc_pvinfo pvinfo; + r = kvm_vm_ioctl_get_pvinfo(&pvinfo); + if (copy_to_user(argp, &pvinfo, sizeof(pvinfo))) { + r = -EFAULT; + goto out; + } + + break; + } default: r = -ENOTTY; } +out: return r; } diff --git a/arch/powerpc/kvm/trace.h b/arch/powerpc/kvm/trace.h index a8e84001805..3aca1b042b8 100644 --- a/arch/powerpc/kvm/trace.h +++ b/arch/powerpc/kvm/trace.h @@ -98,6 +98,245 @@ TRACE_EVENT(kvm_gtlb_write, __entry->word1, __entry->word2) ); + +/************************************************************************* + * Book3S trace points * + *************************************************************************/ + +#ifdef CONFIG_PPC_BOOK3S + +TRACE_EVENT(kvm_book3s_exit, + TP_PROTO(unsigned int exit_nr, struct kvm_vcpu *vcpu), + TP_ARGS(exit_nr, vcpu), + + TP_STRUCT__entry( + __field( unsigned int, exit_nr ) + __field( unsigned long, pc ) + __field( unsigned long, msr ) + __field( unsigned long, dar ) + __field( unsigned long, srr1 ) + ), + + TP_fast_assign( + __entry->exit_nr = exit_nr; + __entry->pc = kvmppc_get_pc(vcpu); + __entry->dar = kvmppc_get_fault_dar(vcpu); + __entry->msr = vcpu->arch.shared->msr; + __entry->srr1 = to_svcpu(vcpu)->shadow_srr1; + ), + + TP_printk("exit=0x%x | pc=0x%lx | msr=0x%lx | dar=0x%lx | srr1=0x%lx", + __entry->exit_nr, __entry->pc, __entry->msr, __entry->dar, + __entry->srr1) +); + +TRACE_EVENT(kvm_book3s_reenter, + TP_PROTO(int r, struct kvm_vcpu *vcpu), + TP_ARGS(r, vcpu), + + TP_STRUCT__entry( + __field( unsigned int, r ) + __field( unsigned long, pc ) + ), + + TP_fast_assign( + __entry->r = r; + __entry->pc = kvmppc_get_pc(vcpu); + ), + + TP_printk("reentry r=%d | pc=0x%lx", __entry->r, __entry->pc) +); + +#ifdef CONFIG_PPC_BOOK3S_64 + +TRACE_EVENT(kvm_book3s_64_mmu_map, + TP_PROTO(int rflags, ulong hpteg, ulong va, pfn_t hpaddr, + struct kvmppc_pte *orig_pte), + TP_ARGS(rflags, hpteg, va, hpaddr, orig_pte), + + TP_STRUCT__entry( + __field( unsigned char, flag_w ) + __field( unsigned char, flag_x ) + __field( unsigned long, eaddr ) + __field( unsigned long, hpteg ) + __field( unsigned long, va ) + __field( unsigned long long, vpage ) + __field( unsigned long, hpaddr ) + ), + + TP_fast_assign( + __entry->flag_w = ((rflags & HPTE_R_PP) == 3) ? '-' : 'w'; + __entry->flag_x = (rflags & HPTE_R_N) ? '-' : 'x'; + __entry->eaddr = orig_pte->eaddr; + __entry->hpteg = hpteg; + __entry->va = va; + __entry->vpage = orig_pte->vpage; + __entry->hpaddr = hpaddr; + ), + + TP_printk("KVM: %c%c Map 0x%lx: [%lx] 0x%lx (0x%llx) -> %lx", + __entry->flag_w, __entry->flag_x, __entry->eaddr, + __entry->hpteg, __entry->va, __entry->vpage, __entry->hpaddr) +); + +#endif /* CONFIG_PPC_BOOK3S_64 */ + +TRACE_EVENT(kvm_book3s_mmu_map, + TP_PROTO(struct hpte_cache *pte), + TP_ARGS(pte), + + TP_STRUCT__entry( + __field( u64, host_va ) + __field( u64, pfn ) + __field( ulong, eaddr ) + __field( u64, vpage ) + __field( ulong, raddr ) + __field( int, flags ) + ), + + TP_fast_assign( + __entry->host_va = pte->host_va; + __entry->pfn = pte->pfn; + __entry->eaddr = pte->pte.eaddr; + __entry->vpage = pte->pte.vpage; + __entry->raddr = pte->pte.raddr; + __entry->flags = (pte->pte.may_read ? 0x4 : 0) | + (pte->pte.may_write ? 0x2 : 0) | + (pte->pte.may_execute ? 0x1 : 0); + ), + + TP_printk("Map: hva=%llx pfn=%llx ea=%lx vp=%llx ra=%lx [%x]", + __entry->host_va, __entry->pfn, __entry->eaddr, + __entry->vpage, __entry->raddr, __entry->flags) +); + +TRACE_EVENT(kvm_book3s_mmu_invalidate, + TP_PROTO(struct hpte_cache *pte), + TP_ARGS(pte), + + TP_STRUCT__entry( + __field( u64, host_va ) + __field( u64, pfn ) + __field( ulong, eaddr ) + __field( u64, vpage ) + __field( ulong, raddr ) + __field( int, flags ) + ), + + TP_fast_assign( + __entry->host_va = pte->host_va; + __entry->pfn = pte->pfn; + __entry->eaddr = pte->pte.eaddr; + __entry->vpage = pte->pte.vpage; + __entry->raddr = pte->pte.raddr; + __entry->flags = (pte->pte.may_read ? 0x4 : 0) | + (pte->pte.may_write ? 0x2 : 0) | + (pte->pte.may_execute ? 0x1 : 0); + ), + + TP_printk("Flush: hva=%llx pfn=%llx ea=%lx vp=%llx ra=%lx [%x]", + __entry->host_va, __entry->pfn, __entry->eaddr, + __entry->vpage, __entry->raddr, __entry->flags) +); + +TRACE_EVENT(kvm_book3s_mmu_flush, + TP_PROTO(const char *type, struct kvm_vcpu *vcpu, unsigned long long p1, + unsigned long long p2), + TP_ARGS(type, vcpu, p1, p2), + + TP_STRUCT__entry( + __field( int, count ) + __field( unsigned long long, p1 ) + __field( unsigned long long, p2 ) + __field( const char *, type ) + ), + + TP_fast_assign( + __entry->count = vcpu->arch.hpte_cache_count; + __entry->p1 = p1; + __entry->p2 = p2; + __entry->type = type; + ), + + TP_printk("Flush %d %sPTEs: %llx - %llx", + __entry->count, __entry->type, __entry->p1, __entry->p2) +); + +TRACE_EVENT(kvm_book3s_slb_found, + TP_PROTO(unsigned long long gvsid, unsigned long long hvsid), + TP_ARGS(gvsid, hvsid), + + TP_STRUCT__entry( + __field( unsigned long long, gvsid ) + __field( unsigned long long, hvsid ) + ), + + TP_fast_assign( + __entry->gvsid = gvsid; + __entry->hvsid = hvsid; + ), + + TP_printk("%llx -> %llx", __entry->gvsid, __entry->hvsid) +); + +TRACE_EVENT(kvm_book3s_slb_fail, + TP_PROTO(u16 sid_map_mask, unsigned long long gvsid), + TP_ARGS(sid_map_mask, gvsid), + + TP_STRUCT__entry( + __field( unsigned short, sid_map_mask ) + __field( unsigned long long, gvsid ) + ), + + TP_fast_assign( + __entry->sid_map_mask = sid_map_mask; + __entry->gvsid = gvsid; + ), + + TP_printk("%x/%x: %llx", __entry->sid_map_mask, + SID_MAP_MASK - __entry->sid_map_mask, __entry->gvsid) +); + +TRACE_EVENT(kvm_book3s_slb_map, + TP_PROTO(u16 sid_map_mask, unsigned long long gvsid, + unsigned long long hvsid), + TP_ARGS(sid_map_mask, gvsid, hvsid), + + TP_STRUCT__entry( + __field( unsigned short, sid_map_mask ) + __field( unsigned long long, guest_vsid ) + __field( unsigned long long, host_vsid ) + ), + + TP_fast_assign( + __entry->sid_map_mask = sid_map_mask; + __entry->guest_vsid = gvsid; + __entry->host_vsid = hvsid; + ), + + TP_printk("%x: %llx -> %llx", __entry->sid_map_mask, + __entry->guest_vsid, __entry->host_vsid) +); + +TRACE_EVENT(kvm_book3s_slbmte, + TP_PROTO(u64 slb_vsid, u64 slb_esid), + TP_ARGS(slb_vsid, slb_esid), + + TP_STRUCT__entry( + __field( u64, slb_vsid ) + __field( u64, slb_esid ) + ), + + TP_fast_assign( + __entry->slb_vsid = slb_vsid; + __entry->slb_esid = slb_esid; + ), + + TP_printk("%llx, %llx", __entry->slb_vsid, __entry->slb_esid) +); + +#endif /* CONFIG_PPC_BOOK3S */ + #endif /* _TRACE_KVM_H */ /* This part must be outside protection */ diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile index 5bb89c82807..889f2bc106d 100644 --- a/arch/powerpc/lib/Makefile +++ b/arch/powerpc/lib/Makefile @@ -4,9 +4,7 @@ subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror -ifeq ($(CONFIG_PPC64),y) -EXTRA_CFLAGS += -mno-minimal-toc -endif +ccflags-$(CONFIG_PPC64) := -mno-minimal-toc CFLAGS_REMOVE_code-patching.o = -pg CFLAGS_REMOVE_feature-fixups.o = -pg @@ -17,7 +15,8 @@ obj-$(CONFIG_PPC32) += div64.o copy_32.o obj-$(CONFIG_HAS_IOMEM) += devres.o obj-$(CONFIG_PPC64) += copypage_64.o copyuser_64.o \ - memcpy_64.o usercopy_64.o mem_64.o string.o + memcpy_64.o usercopy_64.o mem_64.o string.o \ + checksum_wrappers_64.o obj-$(CONFIG_XMON) += sstep.o ldstfp.o obj-$(CONFIG_KPROBES) += sstep.o ldstfp.o obj-$(CONFIG_HAVE_HW_BREAKPOINT) += sstep.o ldstfp.o diff --git a/arch/powerpc/lib/checksum_64.S b/arch/powerpc/lib/checksum_64.S index ef96c6c58ef..18245af38ae 100644 --- a/arch/powerpc/lib/checksum_64.S +++ b/arch/powerpc/lib/checksum_64.S @@ -65,165 +65,393 @@ _GLOBAL(csum_tcpudp_magic) srwi r3,r3,16 blr +#define STACKFRAMESIZE 256 +#define STK_REG(i) (112 + ((i)-14)*8) + /* * Computes the checksum of a memory block at buff, length len, * and adds in "sum" (32-bit). * - * This code assumes at least halfword alignment, though the length - * can be any number of bytes. The sum is accumulated in r5. - * * csum_partial(r3=buff, r4=len, r5=sum) */ _GLOBAL(csum_partial) - subi r3,r3,8 /* we'll offset by 8 for the loads */ - srdi. r6,r4,3 /* divide by 8 for doubleword count */ - addic r5,r5,0 /* clear carry */ - beq 3f /* if we're doing < 8 bytes */ - andi. r0,r3,2 /* aligned on a word boundary already? */ - beq+ 1f - lhz r6,8(r3) /* do 2 bytes to get aligned */ - addi r3,r3,2 - subi r4,r4,2 - addc r5,r5,r6 - srdi. r6,r4,3 /* recompute number of doublewords */ - beq 3f /* any left? */ -1: mtctr r6 -2: ldu r6,8(r3) /* main sum loop */ - adde r5,r5,r6 - bdnz 2b - andi. r4,r4,7 /* compute bytes left to sum after doublewords */ -3: cmpwi 0,r4,4 /* is at least a full word left? */ - blt 4f - lwz r6,8(r3) /* sum this word */ + addic r0,r5,0 /* clear carry */ + + srdi. r6,r4,3 /* less than 8 bytes? */ + beq .Lcsum_tail_word + + /* + * If only halfword aligned, align to a double word. Since odd + * aligned addresses should be rare and they would require more + * work to calculate the correct checksum, we ignore that case + * and take the potential slowdown of unaligned loads. + */ + rldicl. r6,r3,64-1,64-2 /* r6 = (r3 & 0x3) >> 1 */ + beq .Lcsum_aligned + + li r7,4 + sub r6,r7,r6 + mtctr r6 + +1: + lhz r6,0(r3) /* align to doubleword */ + subi r4,r4,2 + addi r3,r3,2 + adde r0,r0,r6 + bdnz 1b + +.Lcsum_aligned: + /* + * We unroll the loop such that each iteration is 64 bytes with an + * entry and exit limb of 64 bytes, meaning a minimum size of + * 128 bytes. + */ + srdi. r6,r4,7 + beq .Lcsum_tail_doublewords /* len < 128 */ + + srdi r6,r4,6 + subi r6,r6,1 + mtctr r6 + + stdu r1,-STACKFRAMESIZE(r1) + std r14,STK_REG(r14)(r1) + std r15,STK_REG(r15)(r1) + std r16,STK_REG(r16)(r1) + + ld r6,0(r3) + ld r9,8(r3) + + ld r10,16(r3) + ld r11,24(r3) + + /* + * On POWER6 and POWER7 back to back addes take 2 cycles because of + * the XER dependency. This means the fastest this loop can go is + * 16 cycles per iteration. The scheduling of the loop below has + * been shown to hit this on both POWER6 and POWER7. + */ + .align 5 +2: + adde r0,r0,r6 + ld r12,32(r3) + ld r14,40(r3) + + adde r0,r0,r9 + ld r15,48(r3) + ld r16,56(r3) + addi r3,r3,64 + + adde r0,r0,r10 + + adde r0,r0,r11 + + adde r0,r0,r12 + + adde r0,r0,r14 + + adde r0,r0,r15 + ld r6,0(r3) + ld r9,8(r3) + + adde r0,r0,r16 + ld r10,16(r3) + ld r11,24(r3) + bdnz 2b + + + adde r0,r0,r6 + ld r12,32(r3) + ld r14,40(r3) + + adde r0,r0,r9 + ld r15,48(r3) + ld r16,56(r3) + addi r3,r3,64 + + adde r0,r0,r10 + adde r0,r0,r11 + adde r0,r0,r12 + adde r0,r0,r14 + adde r0,r0,r15 + adde r0,r0,r16 + + ld r14,STK_REG(r14)(r1) + ld r15,STK_REG(r15)(r1) + ld r16,STK_REG(r16)(r1) + addi r1,r1,STACKFRAMESIZE + + andi. r4,r4,63 + +.Lcsum_tail_doublewords: /* Up to 127 bytes to go */ + srdi. r6,r4,3 + beq .Lcsum_tail_word + + mtctr r6 +3: + ld r6,0(r3) + addi r3,r3,8 + adde r0,r0,r6 + bdnz 3b + + andi. r4,r4,7 + +.Lcsum_tail_word: /* Up to 7 bytes to go */ + srdi. r6,r4,2 + beq .Lcsum_tail_halfword + + lwz r6,0(r3) addi r3,r3,4 + adde r0,r0,r6 subi r4,r4,4 - adde r5,r5,r6 -4: cmpwi 0,r4,2 /* is at least a halfword left? */ - blt+ 5f - lhz r6,8(r3) /* sum this halfword */ - addi r3,r3,2 - subi r4,r4,2 - adde r5,r5,r6 -5: cmpwi 0,r4,1 /* is at least a byte left? */ - bne+ 6f - lbz r6,8(r3) /* sum this byte */ - slwi r6,r6,8 /* this byte is assumed to be the upper byte of a halfword */ - adde r5,r5,r6 -6: addze r5,r5 /* add in final carry */ - rldicl r4,r5,32,0 /* fold two 32-bit halves together */ - add r3,r4,r5 - srdi r3,r3,32 - blr + +.Lcsum_tail_halfword: /* Up to 3 bytes to go */ + srdi. r6,r4,1 + beq .Lcsum_tail_byte + + lhz r6,0(r3) + addi r3,r3,2 + adde r0,r0,r6 + subi r4,r4,2 + +.Lcsum_tail_byte: /* Up to 1 byte to go */ + andi. r6,r4,1 + beq .Lcsum_finish + + lbz r6,0(r3) + sldi r9,r6,8 /* Pad the byte out to 16 bits */ + adde r0,r0,r9 + +.Lcsum_finish: + addze r0,r0 /* add in final carry */ + rldicl r4,r0,32,0 /* fold two 32 bit halves together */ + add r3,r4,r0 + srdi r3,r3,32 + blr + + + .macro source +100: + .section __ex_table,"a" + .align 3 + .llong 100b,.Lsrc_error + .previous + .endm + + .macro dest +200: + .section __ex_table,"a" + .align 3 + .llong 200b,.Ldest_error + .previous + .endm /* * Computes the checksum of a memory block at src, length len, * and adds in "sum" (32-bit), while copying the block to dst. * If an access exception occurs on src or dst, it stores -EFAULT - * to *src_err or *dst_err respectively, and (for an error on - * src) zeroes the rest of dst. - * - * This code needs to be reworked to take advantage of 64 bit sum+copy. - * However, due to tokenring halfword alignment problems this will be very - * tricky. For now we'll leave it until we instrument it somehow. + * to *src_err or *dst_err respectively. The caller must take any action + * required in this case (zeroing memory, recalculating partial checksum etc). * * csum_partial_copy_generic(r3=src, r4=dst, r5=len, r6=sum, r7=src_err, r8=dst_err) */ _GLOBAL(csum_partial_copy_generic) - addic r0,r6,0 - subi r3,r3,4 - subi r4,r4,4 - srwi. r6,r5,2 - beq 3f /* if we're doing < 4 bytes */ - andi. r9,r4,2 /* Align dst to longword boundary */ - beq+ 1f -81: lhz r6,4(r3) /* do 2 bytes to get aligned */ - addi r3,r3,2 + addic r0,r6,0 /* clear carry */ + + srdi. r6,r5,3 /* less than 8 bytes? */ + beq .Lcopy_tail_word + + /* + * If only halfword aligned, align to a double word. Since odd + * aligned addresses should be rare and they would require more + * work to calculate the correct checksum, we ignore that case + * and take the potential slowdown of unaligned loads. + * + * If the source and destination are relatively unaligned we only + * align the source. This keeps things simple. + */ + rldicl. r6,r3,64-1,64-2 /* r6 = (r3 & 0x3) >> 1 */ + beq .Lcopy_aligned + + li r7,4 + sub r6,r7,r6 + mtctr r6 + +1: +source; lhz r6,0(r3) /* align to doubleword */ subi r5,r5,2 -91: sth r6,4(r4) - addi r4,r4,2 - addc r0,r0,r6 - srwi. r6,r5,2 /* # words to do */ - beq 3f -1: mtctr r6 -82: lwzu r6,4(r3) /* the bdnz has zero overhead, so it should */ -92: stwu r6,4(r4) /* be unnecessary to unroll this loop */ - adde r0,r0,r6 - bdnz 82b - andi. r5,r5,3 -3: cmpwi 0,r5,2 - blt+ 4f -83: lhz r6,4(r3) addi r3,r3,2 - subi r5,r5,2 -93: sth r6,4(r4) + adde r0,r0,r6 +dest; sth r6,0(r4) addi r4,r4,2 + bdnz 1b + +.Lcopy_aligned: + /* + * We unroll the loop such that each iteration is 64 bytes with an + * entry and exit limb of 64 bytes, meaning a minimum size of + * 128 bytes. + */ + srdi. r6,r5,7 + beq .Lcopy_tail_doublewords /* len < 128 */ + + srdi r6,r5,6 + subi r6,r6,1 + mtctr r6 + + stdu r1,-STACKFRAMESIZE(r1) + std r14,STK_REG(r14)(r1) + std r15,STK_REG(r15)(r1) + std r16,STK_REG(r16)(r1) + +source; ld r6,0(r3) +source; ld r9,8(r3) + +source; ld r10,16(r3) +source; ld r11,24(r3) + + /* + * On POWER6 and POWER7 back to back addes take 2 cycles because of + * the XER dependency. This means the fastest this loop can go is + * 16 cycles per iteration. The scheduling of the loop below has + * been shown to hit this on both POWER6 and POWER7. + */ + .align 5 +2: adde r0,r0,r6 -4: cmpwi 0,r5,1 - bne+ 5f -84: lbz r6,4(r3) -94: stb r6,4(r4) - slwi r6,r6,8 /* Upper byte of word */ +source; ld r12,32(r3) +source; ld r14,40(r3) + + adde r0,r0,r9 +source; ld r15,48(r3) +source; ld r16,56(r3) + addi r3,r3,64 + + adde r0,r0,r10 +dest; std r6,0(r4) +dest; std r9,8(r4) + + adde r0,r0,r11 +dest; std r10,16(r4) +dest; std r11,24(r4) + + adde r0,r0,r12 +dest; std r12,32(r4) +dest; std r14,40(r4) + + adde r0,r0,r14 +dest; std r15,48(r4) +dest; std r16,56(r4) + addi r4,r4,64 + + adde r0,r0,r15 +source; ld r6,0(r3) +source; ld r9,8(r3) + + adde r0,r0,r16 +source; ld r10,16(r3) +source; ld r11,24(r3) + bdnz 2b + + adde r0,r0,r6 -5: addze r3,r0 /* add in final carry (unlikely with 64-bit regs) */ - rldicl r4,r3,32,0 /* fold 64 bit value */ - add r3,r4,r3 - srdi r3,r3,32 - blr +source; ld r12,32(r3) +source; ld r14,40(r3) -/* These shouldn't go in the fixup section, since that would - cause the ex_table addresses to get out of order. */ + adde r0,r0,r9 +source; ld r15,48(r3) +source; ld r16,56(r3) + addi r3,r3,64 + + adde r0,r0,r10 +dest; std r6,0(r4) +dest; std r9,8(r4) + + adde r0,r0,r11 +dest; std r10,16(r4) +dest; std r11,24(r4) + + adde r0,r0,r12 +dest; std r12,32(r4) +dest; std r14,40(r4) + + adde r0,r0,r14 +dest; std r15,48(r4) +dest; std r16,56(r4) + addi r4,r4,64 + + adde r0,r0,r15 + adde r0,r0,r16 + + ld r14,STK_REG(r14)(r1) + ld r15,STK_REG(r15)(r1) + ld r16,STK_REG(r16)(r1) + addi r1,r1,STACKFRAMESIZE + + andi. r5,r5,63 + +.Lcopy_tail_doublewords: /* Up to 127 bytes to go */ + srdi. r6,r5,3 + beq .Lcopy_tail_word - .globl src_error_1 -src_error_1: - li r6,0 - subi r5,r5,2 -95: sth r6,4(r4) - addi r4,r4,2 - srwi. r6,r5,2 - beq 3f mtctr r6 - .globl src_error_2 -src_error_2: - li r6,0 -96: stwu r6,4(r4) - bdnz 96b -3: andi. r5,r5,3 - beq src_error - .globl src_error_3 -src_error_3: - li r6,0 - mtctr r5 - addi r4,r4,3 -97: stbu r6,1(r4) - bdnz 97b - .globl src_error -src_error: +3: +source; ld r6,0(r3) + addi r3,r3,8 + adde r0,r0,r6 +dest; std r6,0(r4) + addi r4,r4,8 + bdnz 3b + + andi. r5,r5,7 + +.Lcopy_tail_word: /* Up to 7 bytes to go */ + srdi. r6,r5,2 + beq .Lcopy_tail_halfword + +source; lwz r6,0(r3) + addi r3,r3,4 + adde r0,r0,r6 +dest; stw r6,0(r4) + addi r4,r4,4 + subi r5,r5,4 + +.Lcopy_tail_halfword: /* Up to 3 bytes to go */ + srdi. r6,r5,1 + beq .Lcopy_tail_byte + +source; lhz r6,0(r3) + addi r3,r3,2 + adde r0,r0,r6 +dest; sth r6,0(r4) + addi r4,r4,2 + subi r5,r5,2 + +.Lcopy_tail_byte: /* Up to 1 byte to go */ + andi. r6,r5,1 + beq .Lcopy_finish + +source; lbz r6,0(r3) + sldi r9,r6,8 /* Pad the byte out to 16 bits */ + adde r0,r0,r9 +dest; stb r6,0(r4) + +.Lcopy_finish: + addze r0,r0 /* add in final carry */ + rldicl r4,r0,32,0 /* fold two 32 bit halves together */ + add r3,r4,r0 + srdi r3,r3,32 + blr + +.Lsrc_error: cmpdi 0,r7,0 - beq 1f + beqlr li r6,-EFAULT stw r6,0(r7) -1: addze r3,r0 blr - .globl dst_error -dst_error: +.Ldest_error: cmpdi 0,r8,0 - beq 1f + beqlr li r6,-EFAULT stw r6,0(r8) -1: addze r3,r0 blr - -.section __ex_table,"a" - .align 3 - .llong 81b,src_error_1 - .llong 91b,dst_error - .llong 82b,src_error_2 - .llong 92b,dst_error - .llong 83b,src_error_3 - .llong 93b,dst_error - .llong 84b,src_error_3 - .llong 94b,dst_error - .llong 95b,dst_error - .llong 96b,dst_error - .llong 97b,dst_error diff --git a/arch/powerpc/lib/checksum_wrappers_64.c b/arch/powerpc/lib/checksum_wrappers_64.c new file mode 100644 index 00000000000..769b817fbb3 --- /dev/null +++ b/arch/powerpc/lib/checksum_wrappers_64.c @@ -0,0 +1,102 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2010 + * + * Author: Anton Blanchard <anton@au.ibm.com> + */ +#include <linux/module.h> +#include <linux/compiler.h> +#include <linux/types.h> +#include <asm/checksum.h> +#include <asm/uaccess.h> + +__wsum csum_and_copy_from_user(const void __user *src, void *dst, + int len, __wsum sum, int *err_ptr) +{ + unsigned int csum; + + might_sleep(); + + *err_ptr = 0; + + if (!len) { + csum = 0; + goto out; + } + + if (unlikely((len < 0) || !access_ok(VERIFY_READ, src, len))) { + *err_ptr = -EFAULT; + csum = (__force unsigned int)sum; + goto out; + } + + csum = csum_partial_copy_generic((void __force *)src, dst, + len, sum, err_ptr, NULL); + + if (unlikely(*err_ptr)) { + int missing = __copy_from_user(dst, src, len); + + if (missing) { + memset(dst + len - missing, 0, missing); + *err_ptr = -EFAULT; + } else { + *err_ptr = 0; + } + + csum = csum_partial(dst, len, sum); + } + +out: + return (__force __wsum)csum; +} +EXPORT_SYMBOL(csum_and_copy_from_user); + +__wsum csum_and_copy_to_user(const void *src, void __user *dst, int len, + __wsum sum, int *err_ptr) +{ + unsigned int csum; + + might_sleep(); + + *err_ptr = 0; + + if (!len) { + csum = 0; + goto out; + } + + if (unlikely((len < 0) || !access_ok(VERIFY_WRITE, dst, len))) { + *err_ptr = -EFAULT; + csum = -1; /* invalid checksum */ + goto out; + } + + csum = csum_partial_copy_generic(src, (void __force *)dst, + len, sum, NULL, err_ptr); + + if (unlikely(*err_ptr)) { + csum = csum_partial(src, len, sum); + + if (copy_to_user(dst, src, len)) { + *err_ptr = -EFAULT; + csum = -1; /* invalid checksum */ + } + } + +out: + return (__force __wsum)csum; +} +EXPORT_SYMBOL(csum_and_copy_to_user); diff --git a/arch/powerpc/lib/copy_32.S b/arch/powerpc/lib/copy_32.S index 74a7f4130b4..55f19f9fd70 100644 --- a/arch/powerpc/lib/copy_32.S +++ b/arch/powerpc/lib/copy_32.S @@ -62,7 +62,7 @@ .text .stabs "arch/powerpc/lib/",N_SO,0,0,0f - .stabs "copy32.S",N_SO,0,0,0f + .stabs "copy_32.S",N_SO,0,0,0f 0: CACHELINE_BYTES = L1_CACHE_BYTES diff --git a/arch/powerpc/lib/ldstfp.S b/arch/powerpc/lib/ldstfp.S index f6448636baf..6a85380520b 100644 --- a/arch/powerpc/lib/ldstfp.S +++ b/arch/powerpc/lib/ldstfp.S @@ -17,6 +17,8 @@ #include <asm/asm-offsets.h> #include <linux/errno.h> +#ifdef CONFIG_PPC_FPU + #define STKFRM (PPC_MIN_STKFRM + 16) .macro extab instr,handler @@ -81,7 +83,7 @@ _GLOBAL(do_lfs) mfmsr r6 ori r7,r6,MSR_FP cmpwi cr7,r3,0 - mtmsrd r7 + MTMSRD(r7) isync beq cr7,1f stfd fr0,STKFRM-16(r1) @@ -93,7 +95,7 @@ _GLOBAL(do_lfs) lfd fr0,STKFRM-16(r1) 4: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1) mtlr r0 - mtmsrd r6 + MTMSRD(r6) isync mr r3,r9 addi r1,r1,STKFRM @@ -108,7 +110,7 @@ _GLOBAL(do_lfd) mfmsr r6 ori r7,r6,MSR_FP cmpwi cr7,r3,0 - mtmsrd r7 + MTMSRD(r7) isync beq cr7,1f stfd fr0,STKFRM-16(r1) @@ -120,7 +122,7 @@ _GLOBAL(do_lfd) lfd fr0,STKFRM-16(r1) 4: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1) mtlr r0 - mtmsrd r6 + MTMSRD(r6) isync mr r3,r9 addi r1,r1,STKFRM @@ -135,7 +137,7 @@ _GLOBAL(do_stfs) mfmsr r6 ori r7,r6,MSR_FP cmpwi cr7,r3,0 - mtmsrd r7 + MTMSRD(r7) isync beq cr7,1f stfd fr0,STKFRM-16(r1) @@ -147,7 +149,7 @@ _GLOBAL(do_stfs) lfd fr0,STKFRM-16(r1) 4: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1) mtlr r0 - mtmsrd r6 + MTMSRD(r6) isync mr r3,r9 addi r1,r1,STKFRM @@ -162,7 +164,7 @@ _GLOBAL(do_stfd) mfmsr r6 ori r7,r6,MSR_FP cmpwi cr7,r3,0 - mtmsrd r7 + MTMSRD(r7) isync beq cr7,1f stfd fr0,STKFRM-16(r1) @@ -174,7 +176,7 @@ _GLOBAL(do_stfd) lfd fr0,STKFRM-16(r1) 4: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1) mtlr r0 - mtmsrd r6 + MTMSRD(r6) isync mr r3,r9 addi r1,r1,STKFRM @@ -229,7 +231,7 @@ _GLOBAL(do_lvx) oris r7,r6,MSR_VEC@h cmpwi cr7,r3,0 li r8,STKFRM-16 - mtmsrd r7 + MTMSRD(r7) isync beq cr7,1f stvx vr0,r1,r8 @@ -241,7 +243,7 @@ _GLOBAL(do_lvx) lvx vr0,r1,r8 4: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1) mtlr r0 - mtmsrd r6 + MTMSRD(r6) isync mr r3,r9 addi r1,r1,STKFRM @@ -257,7 +259,7 @@ _GLOBAL(do_stvx) oris r7,r6,MSR_VEC@h cmpwi cr7,r3,0 li r8,STKFRM-16 - mtmsrd r7 + MTMSRD(r7) isync beq cr7,1f stvx vr0,r1,r8 @@ -269,7 +271,7 @@ _GLOBAL(do_stvx) lvx vr0,r1,r8 4: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1) mtlr r0 - mtmsrd r6 + MTMSRD(r6) isync mr r3,r9 addi r1,r1,STKFRM @@ -325,7 +327,7 @@ _GLOBAL(do_lxvd2x) oris r7,r6,MSR_VSX@h cmpwi cr7,r3,0 li r8,STKFRM-16 - mtmsrd r7 + MTMSRD(r7) isync beq cr7,1f STXVD2X(0,r1,r8) @@ -337,7 +339,7 @@ _GLOBAL(do_lxvd2x) LXVD2X(0,r1,r8) 4: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1) mtlr r0 - mtmsrd r6 + MTMSRD(r6) isync mr r3,r9 addi r1,r1,STKFRM @@ -353,7 +355,7 @@ _GLOBAL(do_stxvd2x) oris r7,r6,MSR_VSX@h cmpwi cr7,r3,0 li r8,STKFRM-16 - mtmsrd r7 + MTMSRD(r7) isync beq cr7,1f STXVD2X(0,r1,r8) @@ -365,7 +367,7 @@ _GLOBAL(do_stxvd2x) LXVD2X(0,r1,r8) 4: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1) mtlr r0 - mtmsrd r6 + MTMSRD(r6) isync mr r3,r9 addi r1,r1,STKFRM @@ -373,3 +375,5 @@ _GLOBAL(do_stxvd2x) extab 2b,3b #endif /* CONFIG_VSX */ + +#endif /* CONFIG_PPC_FPU */ diff --git a/arch/powerpc/lib/locks.c b/arch/powerpc/lib/locks.c index 58e14fba11b..9b8182e8216 100644 --- a/arch/powerpc/lib/locks.c +++ b/arch/powerpc/lib/locks.c @@ -34,7 +34,7 @@ void __spin_yield(arch_spinlock_t *lock) return; holder_cpu = lock_value & 0xffff; BUG_ON(holder_cpu >= NR_CPUS); - yield_count = lppaca[holder_cpu].yield_count; + yield_count = lppaca_of(holder_cpu).yield_count; if ((yield_count & 1) == 0) return; /* virtual cpu is currently running */ rmb(); @@ -65,7 +65,7 @@ void __rw_yield(arch_rwlock_t *rw) return; /* no write lock at present */ holder_cpu = lock_value & 0xffff; BUG_ON(holder_cpu >= NR_CPUS); - yield_count = lppaca[holder_cpu].yield_count; + yield_count = lppaca_of(holder_cpu).yield_count; if ((yield_count & 1) == 0) return; /* virtual cpu is currently running */ rmb(); diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index e0a9858d537..ae5189ab004 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -30,6 +30,7 @@ extern char system_call_common[]; #define XER_OV 0x40000000U #define XER_CA 0x20000000U +#ifdef CONFIG_PPC_FPU /* * Functions in ldstfp.S */ @@ -41,6 +42,7 @@ extern int do_lvx(int rn, unsigned long ea); extern int do_stvx(int rn, unsigned long ea); extern int do_lxvd2x(int rn, unsigned long ea); extern int do_stxvd2x(int rn, unsigned long ea); +#endif /* * Determine whether a conditional branch instruction would branch. @@ -290,6 +292,7 @@ static int __kprobes write_mem(unsigned long val, unsigned long ea, int nb, return write_mem_unaligned(val, ea, nb, regs); } +#ifdef CONFIG_PPC_FPU /* * Check the address and alignment, and call func to do the actual * load or store. @@ -351,6 +354,7 @@ static int __kprobes do_fp_store(int rn, int (*func)(int, unsigned long), } return err; } +#endif #ifdef CONFIG_ALTIVEC /* For Altivec/VMX, no need to worry about alignment */ @@ -1393,6 +1397,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) regs->gpr[rd] = byterev_4(val); goto ldst_done; +#ifdef CONFIG_PPC_CPU case 535: /* lfsx */ case 567: /* lfsux */ if (!(regs->msr & MSR_FP)) @@ -1424,6 +1429,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) ea = xform_ea(instr, regs, u); err = do_fp_store(rd, do_stfd, ea, 8, regs); goto ldst_done; +#endif #ifdef __powerpc64__ case 660: /* stdbrx */ @@ -1534,6 +1540,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) } while (++rd < 32); goto instr_done; +#ifdef CONFIG_PPC_FPU case 48: /* lfs */ case 49: /* lfsu */ if (!(regs->msr & MSR_FP)) @@ -1565,6 +1572,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) ea = dform_ea(instr, regs); err = do_fp_store(rd, do_stfd, ea, 8, regs); goto ldst_done; +#endif #ifdef __powerpc64__ case 58: /* ld[u], lwa */ diff --git a/arch/powerpc/math-emu/Makefile b/arch/powerpc/math-emu/Makefile index 0c16ab947f1..7d1dba0d57f 100644 --- a/arch/powerpc/math-emu/Makefile +++ b/arch/powerpc/math-emu/Makefile @@ -15,4 +15,4 @@ obj-$(CONFIG_SPE) += math_efp.o CFLAGS_fabs.o = -fno-builtin-fabs CFLAGS_math.o = -fno-builtin-fabs -EXTRA_CFLAGS = -I. -Iinclude/math-emu -w +ccflags-y = -I. -Iinclude/math-emu -w diff --git a/arch/powerpc/mm/40x_mmu.c b/arch/powerpc/mm/40x_mmu.c index 1dc2fa5ce1b..5810967511d 100644 --- a/arch/powerpc/mm/40x_mmu.c +++ b/arch/powerpc/mm/40x_mmu.c @@ -35,6 +35,7 @@ #include <linux/init.h> #include <linux/delay.h> #include <linux/highmem.h> +#include <linux/memblock.h> #include <asm/pgalloc.h> #include <asm/prom.h> @@ -47,6 +48,7 @@ #include <asm/bootx.h> #include <asm/machdep.h> #include <asm/setup.h> + #include "mmu_decl.h" extern int __map_without_ltlbs; @@ -139,8 +141,19 @@ unsigned long __init mmu_mapin_ram(unsigned long top) * coverage with normal-sized pages (or other reasons) do not * attempt to allocate outside the allowed range. */ - - __initial_memory_limit_addr = memstart_addr + mapped; + memblock_set_current_limit(mapped); return mapped; } + +void setup_initial_memory_limit(phys_addr_t first_memblock_base, + phys_addr_t first_memblock_size) +{ + /* We don't currently support the first MEMBLOCK not mapping 0 + * physical on those processors + */ + BUG_ON(first_memblock_base != 0); + + /* 40x can only access 16MB at the moment (see head_40x.S) */ + memblock_set_current_limit(min_t(u64, first_memblock_size, 0x00800000)); +} diff --git a/arch/powerpc/mm/44x_mmu.c b/arch/powerpc/mm/44x_mmu.c index d8c6efb32bc..024acab588f 100644 --- a/arch/powerpc/mm/44x_mmu.c +++ b/arch/powerpc/mm/44x_mmu.c @@ -24,6 +24,8 @@ */ #include <linux/init.h> +#include <linux/memblock.h> + #include <asm/mmu.h> #include <asm/system.h> #include <asm/page.h> @@ -213,6 +215,18 @@ unsigned long __init mmu_mapin_ram(unsigned long top) return total_lowmem; } +void setup_initial_memory_limit(phys_addr_t first_memblock_base, + phys_addr_t first_memblock_size) +{ + /* We don't currently support the first MEMBLOCK not mapping 0 + * physical on those processors + */ + BUG_ON(first_memblock_base != 0); + + /* 44x has a 256M TLB entry pinned at boot */ + memblock_set_current_limit(min_t(u64, first_memblock_size, PPC_PIN_SIZE)); +} + #ifdef CONFIG_SMP void __cpuinit mmu_init_secondary(int cpu) { diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile index ce68708bbad..bdca46e0838 100644 --- a/arch/powerpc/mm/Makefile +++ b/arch/powerpc/mm/Makefile @@ -4,9 +4,7 @@ subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror -ifeq ($(CONFIG_PPC64),y) -EXTRA_CFLAGS += -mno-minimal-toc -endif +ccflags-$(CONFIG_PPC64) := -mno-minimal-toc obj-y := fault.o mem.o pgtable.o gup.o \ init_$(CONFIG_WORD_SIZE).o \ @@ -25,7 +23,7 @@ obj-$(CONFIG_PPC_STD_MMU) += hash_low_$(CONFIG_WORD_SIZE).o \ mmu_context_hash$(CONFIG_WORD_SIZE).o obj-$(CONFIG_40x) += 40x_mmu.o obj-$(CONFIG_44x) += 44x_mmu.o -obj-$(CONFIG_FSL_BOOKE) += fsl_booke_mmu.o +obj-$(CONFIG_PPC_FSL_BOOK3E) += fsl_booke_mmu.o obj-$(CONFIG_NEED_MULTIPLE_NODES) += numa.o obj-$(CONFIG_PPC_MM_SLICES) += slice.o ifeq ($(CONFIG_HUGETLB_PAGE),y) diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 1bd712c33ce..54f4fb994e9 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -30,6 +30,7 @@ #include <linux/kprobes.h> #include <linux/kdebug.h> #include <linux/perf_event.h> +#include <linux/magic.h> #include <asm/firmware.h> #include <asm/page.h> @@ -385,6 +386,7 @@ do_sigbus: void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig) { const struct exception_table_entry *entry; + unsigned long *stackend; /* Are we prepared to handle this fault? */ if ((entry = search_exception_tables(regs->nip)) != NULL) { @@ -413,5 +415,9 @@ void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig) printk(KERN_ALERT "Faulting instruction address: 0x%08lx\n", regs->nip); + stackend = end_of_stack(current); + if (current != &init_task && *stackend != STACK_END_MAGIC) + printk(KERN_ALERT "Thread overran stack, or stack corrupted\n"); + die("Kernel access of bad area", regs, sig); } diff --git a/arch/powerpc/mm/fsl_booke_mmu.c b/arch/powerpc/mm/fsl_booke_mmu.c index 4b66a1ece6d..f7802c8bba0 100644 --- a/arch/powerpc/mm/fsl_booke_mmu.c +++ b/arch/powerpc/mm/fsl_booke_mmu.c @@ -40,6 +40,7 @@ #include <linux/init.h> #include <linux/delay.h> #include <linux/highmem.h> +#include <linux/memblock.h> #include <asm/pgalloc.h> #include <asm/prom.h> @@ -56,11 +57,6 @@ unsigned int tlbcam_index; - -#if defined(CONFIG_LOWMEM_CAM_NUM_BOOL) && (CONFIG_LOWMEM_CAM_NUM >= NUM_TLBCAMS) -#error "LOWMEM_CAM_NUM must be less than NUM_TLBCAMS" -#endif - #define NUM_TLBCAMS (64) struct tlbcam TLBCAM[NUM_TLBCAMS]; @@ -137,7 +133,8 @@ static void settlbcam(int index, unsigned long virt, phys_addr_t phys, if (mmu_has_feature(MMU_FTR_BIG_PHYS)) TLBCAM[index].MAS7 = (u64)phys >> 32; - if (flags & _PAGE_USER) { + /* Below is unlikely -- only for large user pages or similar */ + if (pte_user(flags)) { TLBCAM[index].MAS3 |= MAS3_UX | MAS3_UR; TLBCAM[index].MAS3 |= ((flags & _PAGE_RW) ? MAS3_UW : 0); } @@ -184,6 +181,12 @@ unsigned long map_mem_in_cams(unsigned long ram, int max_cam_idx) return amount_mapped; } +#ifdef CONFIG_PPC32 + +#if defined(CONFIG_LOWMEM_CAM_NUM_BOOL) && (CONFIG_LOWMEM_CAM_NUM >= NUM_TLBCAMS) +#error "LOWMEM_CAM_NUM must be less than NUM_TLBCAMS" +#endif + unsigned long __init mmu_mapin_ram(unsigned long top) { return tlbcam_addrs[tlbcam_index - 1].limit - PAGE_OFFSET + 1; @@ -213,5 +216,15 @@ void __init adjust_total_lowmem(void) pr_cont("%lu Mb, residual: %dMb\n", tlbcam_sz(tlbcam_index - 1) >> 20, (unsigned int)((total_lowmem - __max_low_memory) >> 20)); - __initial_memory_limit_addr = memstart_addr + __max_low_memory; + memblock_set_current_limit(memstart_addr + __max_low_memory); } + +void setup_initial_memory_limit(phys_addr_t first_memblock_base, + phys_addr_t first_memblock_size) +{ + phys_addr_t limit = first_memblock_base + first_memblock_size; + + /* 64M mapped initially according to head_fsl_booke.S */ + memblock_set_current_limit(min_t(u64, limit, 0x04000000)); +} +#endif diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 09dffe6efa4..83f534d862d 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -588,7 +588,7 @@ static void __init htab_initialize(void) unsigned long pteg_count; unsigned long prot; unsigned long base = 0, size = 0, limit; - int i; + struct memblock_region *reg; DBG(" -> htab_initialize()\n"); @@ -625,7 +625,7 @@ static void __init htab_initialize(void) if (machine_is(cell)) limit = 0x80000000; else - limit = 0; + limit = MEMBLOCK_ALLOC_ANYWHERE; table = memblock_alloc_base(htab_size_bytes, htab_size_bytes, limit); @@ -649,7 +649,7 @@ static void __init htab_initialize(void) #ifdef CONFIG_DEBUG_PAGEALLOC linear_map_hash_count = memblock_end_of_DRAM() >> PAGE_SHIFT; linear_map_hash_slots = __va(memblock_alloc_base(linear_map_hash_count, - 1, memblock.rmo_size)); + 1, ppc64_rma_size)); memset(linear_map_hash_slots, 0, linear_map_hash_count); #endif /* CONFIG_DEBUG_PAGEALLOC */ @@ -659,9 +659,9 @@ static void __init htab_initialize(void) */ /* create bolted the linear mapping in the hash table */ - for (i=0; i < memblock.memory.cnt; i++) { - base = (unsigned long)__va(memblock.memory.region[i].base); - size = memblock.memory.region[i].size; + for_each_memblock(memory, reg) { + base = (unsigned long)__va(reg->base); + size = reg->size; DBG("creating mapping for region: %lx..%lx (prot: %lx)\n", base, size, prot); @@ -696,7 +696,8 @@ static void __init htab_initialize(void) #endif /* CONFIG_U3_DART */ BUG_ON(htab_bolt_mapping(base, base + size, __pa(base), prot, mmu_linear_psize, mmu_kernel_ssize)); - } + } + memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE); /* * If we have a memory_limit and we've allocated TCEs then we need to @@ -1247,3 +1248,23 @@ void kernel_map_pages(struct page *page, int numpages, int enable) local_irq_restore(flags); } #endif /* CONFIG_DEBUG_PAGEALLOC */ + +void setup_initial_memory_limit(phys_addr_t first_memblock_base, + phys_addr_t first_memblock_size) +{ + /* We don't currently support the first MEMBLOCK not mapping 0 + * physical on those processors + */ + BUG_ON(first_memblock_base != 0); + + /* On LPAR systems, the first entry is our RMA region, + * non-LPAR 64-bit hash MMU systems don't have a limitation + * on real mode access, but using the first entry works well + * enough. We also clamp it to 1G to avoid some funky things + * such as RTAS bugs etc... + */ + ppc64_rma_size = min_t(u64, first_memblock_size, 0x40000000); + + /* Finally limit subsequent allocations */ + memblock_set_current_limit(ppc64_rma_size); +} diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c index 6a6975dc265..742da43b4ab 100644 --- a/arch/powerpc/mm/init_32.c +++ b/arch/powerpc/mm/init_32.c @@ -92,12 +92,6 @@ int __allow_ioremap_reserved; unsigned long __max_low_memory = MAX_LOW_MEM; /* - * address of the limit of what is accessible with initial MMU setup - - * 256MB usually, but only 16MB on 601. - */ -phys_addr_t __initial_memory_limit_addr = (phys_addr_t)0x10000000; - -/* * Check for command-line options that affect what MMU_init will do. */ void MMU_setup(void) @@ -126,13 +120,6 @@ void __init MMU_init(void) if (ppc_md.progress) ppc_md.progress("MMU:enter", 0x111); - /* 601 can only access 16MB at the moment */ - if (PVR_VER(mfspr(SPRN_PVR)) == 1) - __initial_memory_limit_addr = 0x01000000; - /* 8xx can only access 8MB at the moment */ - if (PVR_VER(mfspr(SPRN_PVR)) == 0x50) - __initial_memory_limit_addr = 0x00800000; - /* parse args from command line */ MMU_setup(); @@ -190,20 +177,18 @@ void __init MMU_init(void) #ifdef CONFIG_BOOTX_TEXT btext_unmap(); #endif + + /* Shortly after that, the entire linear mapping will be available */ + memblock_set_current_limit(lowmem_end_addr); } /* This is only called until mem_init is done. */ void __init *early_get_page(void) { - void *p; - - if (init_bootmem_done) { - p = alloc_bootmem_pages(PAGE_SIZE); - } else { - p = __va(memblock_alloc_base(PAGE_SIZE, PAGE_SIZE, - __initial_memory_limit_addr)); - } - return p; + if (init_bootmem_done) + return alloc_bootmem_pages(PAGE_SIZE); + else + return __va(memblock_alloc(PAGE_SIZE, PAGE_SIZE)); } /* Free up now-unused memory */ @@ -252,3 +237,17 @@ void free_initrd_mem(unsigned long start, unsigned long end) } #endif + +#ifdef CONFIG_8xx /* No 8xx specific .c file to put that in ... */ +void setup_initial_memory_limit(phys_addr_t first_memblock_base, + phys_addr_t first_memblock_size) +{ + /* We don't currently support the first MEMBLOCK not mapping 0 + * physical on those processors + */ + BUG_ON(first_memblock_base != 0); + + /* 8xx can only access 8MB at the moment */ + memblock_set_current_limit(min_t(u64, first_memblock_size, 0x00800000)); +} +#endif /* CONFIG_8xx */ diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index ace85fa74b2..6374b2196a1 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -330,3 +330,4 @@ int __meminit vmemmap_populate(struct page *start_page, return 0; } #endif /* CONFIG_SPARSEMEM_VMEMMAP */ + diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 1a84a8d0000..a6649965090 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -82,18 +82,11 @@ int page_is_ram(unsigned long pfn) return pfn < max_pfn; #else unsigned long paddr = (pfn << PAGE_SHIFT); - int i; - for (i=0; i < memblock.memory.cnt; i++) { - unsigned long base; + struct memblock_region *reg; - base = memblock.memory.region[i].base; - - if ((paddr >= base) && - (paddr < (base + memblock.memory.region[i].size))) { + for_each_memblock(memory, reg) + if (paddr >= reg->base && paddr < (reg->base + reg->size)) return 1; - } - } - return 0; #endif } @@ -149,23 +142,19 @@ int walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages, void *arg, int (*func)(unsigned long, unsigned long, void *)) { - struct memblock_property res; - unsigned long pfn, len; - u64 end; + struct memblock_region *reg; + unsigned long end_pfn = start_pfn + nr_pages; + unsigned long tstart, tend; int ret = -1; - res.base = (u64) start_pfn << PAGE_SHIFT; - res.size = (u64) nr_pages << PAGE_SHIFT; - - end = res.base + res.size - 1; - while ((res.base < end) && (memblock_find(&res) >= 0)) { - pfn = (unsigned long)(res.base >> PAGE_SHIFT); - len = (unsigned long)(res.size >> PAGE_SHIFT); - ret = (*func)(pfn, len, arg); + for_each_memblock(memory, reg) { + tstart = max(start_pfn, memblock_region_memory_base_pfn(reg)); + tend = min(end_pfn, memblock_region_memory_end_pfn(reg)); + if (tstart >= tend) + continue; + ret = (*func)(tstart, tend - tstart, arg); if (ret) break; - res.base += (res.size + 1); - res.size = (end - res.base + 1); } return ret; } @@ -179,9 +168,9 @@ EXPORT_SYMBOL_GPL(walk_system_ram_range); #ifndef CONFIG_NEED_MULTIPLE_NODES void __init do_init_bootmem(void) { - unsigned long i; unsigned long start, bootmap_pages; unsigned long total_pages; + struct memblock_region *reg; int boot_mapsize; max_low_pfn = max_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT; @@ -204,10 +193,10 @@ void __init do_init_bootmem(void) boot_mapsize = init_bootmem_node(NODE_DATA(0), start >> PAGE_SHIFT, min_low_pfn, max_low_pfn); /* Add active regions with valid PFNs */ - for (i = 0; i < memblock.memory.cnt; i++) { + for_each_memblock(memory, reg) { unsigned long start_pfn, end_pfn; - start_pfn = memblock.memory.region[i].base >> PAGE_SHIFT; - end_pfn = start_pfn + memblock_size_pages(&memblock.memory, i); + start_pfn = memblock_region_memory_base_pfn(reg); + end_pfn = memblock_region_memory_end_pfn(reg); add_active_range(0, start_pfn, end_pfn); } @@ -218,29 +207,21 @@ void __init do_init_bootmem(void) free_bootmem_with_active_regions(0, lowmem_end_addr >> PAGE_SHIFT); /* reserve the sections we're already using */ - for (i = 0; i < memblock.reserved.cnt; i++) { - unsigned long addr = memblock.reserved.region[i].base + - memblock_size_bytes(&memblock.reserved, i) - 1; - if (addr < lowmem_end_addr) - reserve_bootmem(memblock.reserved.region[i].base, - memblock_size_bytes(&memblock.reserved, i), - BOOTMEM_DEFAULT); - else if (memblock.reserved.region[i].base < lowmem_end_addr) { - unsigned long adjusted_size = lowmem_end_addr - - memblock.reserved.region[i].base; - reserve_bootmem(memblock.reserved.region[i].base, - adjusted_size, BOOTMEM_DEFAULT); + for_each_memblock(reserved, reg) { + unsigned long top = reg->base + reg->size - 1; + if (top < lowmem_end_addr) + reserve_bootmem(reg->base, reg->size, BOOTMEM_DEFAULT); + else if (reg->base < lowmem_end_addr) { + unsigned long trunc_size = lowmem_end_addr - reg->base; + reserve_bootmem(reg->base, trunc_size, BOOTMEM_DEFAULT); } } #else free_bootmem_with_active_regions(0, max_pfn); /* reserve the sections we're already using */ - for (i = 0; i < memblock.reserved.cnt; i++) - reserve_bootmem(memblock.reserved.region[i].base, - memblock_size_bytes(&memblock.reserved, i), - BOOTMEM_DEFAULT); - + for_each_memblock(reserved, reg) + reserve_bootmem(reg->base, reg->size, BOOTMEM_DEFAULT); #endif /* XXX need to clip this if using highmem? */ sparse_memory_present_with_active_regions(0); @@ -251,22 +232,15 @@ void __init do_init_bootmem(void) /* mark pages that don't exist as nosave */ static int __init mark_nonram_nosave(void) { - unsigned long memblock_next_region_start_pfn, - memblock_region_max_pfn; - int i; - - for (i = 0; i < memblock.memory.cnt - 1; i++) { - memblock_region_max_pfn = - (memblock.memory.region[i].base >> PAGE_SHIFT) + - (memblock.memory.region[i].size >> PAGE_SHIFT); - memblock_next_region_start_pfn = - memblock.memory.region[i+1].base >> PAGE_SHIFT; - - if (memblock_region_max_pfn < memblock_next_region_start_pfn) - register_nosave_region(memblock_region_max_pfn, - memblock_next_region_start_pfn); + struct memblock_region *reg, *prev = NULL; + + for_each_memblock(memory, reg) { + if (prev && + memblock_region_memory_end_pfn(prev) < memblock_region_memory_base_pfn(reg)) + register_nosave_region(memblock_region_memory_end_pfn(prev), + memblock_region_memory_base_pfn(reg)); + prev = reg; } - return 0; } @@ -327,7 +301,7 @@ void __init mem_init(void) swiotlb_init(1); #endif - num_physpages = memblock.memory.size >> PAGE_SHIFT; + num_physpages = memblock_phys_mem_size() >> PAGE_SHIFT; high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); #ifdef CONFIG_NEED_MULTIPLE_NODES diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/mmu_context_nohash.c index ddfd7ad4e1d..5ce99848d91 100644 --- a/arch/powerpc/mm/mmu_context_nohash.c +++ b/arch/powerpc/mm/mmu_context_nohash.c @@ -334,7 +334,7 @@ static int __cpuinit mmu_context_cpu_notify(struct notifier_block *self, /* We don't touch CPU 0 map, it's allocated at aboot and kept * around forever */ - if (cpu == 0) + if (cpu == boot_cpuid) return NOTIFY_OK; switch (action) { @@ -420,9 +420,11 @@ void __init mmu_context_init(void) */ context_map = alloc_bootmem(CTX_MAP_SIZE); context_mm = alloc_bootmem(sizeof(void *) * (last_context + 1)); +#ifndef CONFIG_SMP stale_map[0] = alloc_bootmem(CTX_MAP_SIZE); +#else + stale_map[boot_cpuid] = alloc_bootmem(CTX_MAP_SIZE); -#ifdef CONFIG_SMP register_cpu_notifier(&mmu_context_cpu_nb); #endif diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h index 63b84a0d3b1..dd0a2589591 100644 --- a/arch/powerpc/mm/mmu_decl.h +++ b/arch/powerpc/mm/mmu_decl.h @@ -140,10 +140,13 @@ extern void wii_memory_fixups(void); extern void MMU_init_hw(void); extern unsigned long mmu_mapin_ram(unsigned long top); -#elif defined(CONFIG_FSL_BOOKE) +#elif defined(CONFIG_PPC_FSL_BOOK3E) +extern unsigned long map_mem_in_cams(unsigned long ram, int max_cam_idx); +#ifdef CONFIG_PPC32 extern void MMU_init_hw(void); extern unsigned long mmu_mapin_ram(unsigned long top); extern void adjust_total_lowmem(void); +#endif extern void loadcam_entry(unsigned int index); struct tlbcam { diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 002878ccf90..74505b24537 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -802,16 +802,17 @@ static void __init setup_nonnuma(void) unsigned long top_of_ram = memblock_end_of_DRAM(); unsigned long total_ram = memblock_phys_mem_size(); unsigned long start_pfn, end_pfn; - unsigned int i, nid = 0; + unsigned int nid = 0; + struct memblock_region *reg; printk(KERN_DEBUG "Top of RAM: 0x%lx, Total RAM: 0x%lx\n", top_of_ram, total_ram); printk(KERN_DEBUG "Memory hole size: %ldMB\n", (top_of_ram - total_ram) >> 20); - for (i = 0; i < memblock.memory.cnt; ++i) { - start_pfn = memblock.memory.region[i].base >> PAGE_SHIFT; - end_pfn = start_pfn + memblock_size_pages(&memblock.memory, i); + for_each_memblock(memory, reg) { + start_pfn = memblock_region_memory_base_pfn(reg); + end_pfn = memblock_region_memory_end_pfn(reg); fake_numa_create_new_node(end_pfn, &nid); add_active_range(nid, start_pfn, end_pfn); @@ -947,11 +948,11 @@ static struct notifier_block __cpuinitdata ppc64_numa_nb = { static void mark_reserved_regions_for_nid(int nid) { struct pglist_data *node = NODE_DATA(nid); - int i; + struct memblock_region *reg; - for (i = 0; i < memblock.reserved.cnt; i++) { - unsigned long physbase = memblock.reserved.region[i].base; - unsigned long size = memblock.reserved.region[i].size; + for_each_memblock(reserved, reg) { + unsigned long physbase = reg->base; + unsigned long size = reg->size; unsigned long start_pfn = physbase >> PAGE_SHIFT; unsigned long end_pfn = PFN_UP(physbase + size); struct node_active_region node_ar; diff --git a/arch/powerpc/mm/ppc_mmu_32.c b/arch/powerpc/mm/ppc_mmu_32.c index f8a01829d64..11571e11883 100644 --- a/arch/powerpc/mm/ppc_mmu_32.c +++ b/arch/powerpc/mm/ppc_mmu_32.c @@ -223,8 +223,7 @@ void __init MMU_init_hw(void) * Find some memory for the hash table. */ if ( ppc_md.progress ) ppc_md.progress("hash:find piece", 0x322); - Hash = __va(memblock_alloc_base(Hash_size, Hash_size, - __initial_memory_limit_addr)); + Hash = __va(memblock_alloc(Hash_size, Hash_size)); cacheable_memzero(Hash, Hash_size); _SDR1 = __pa(Hash) | SDR1_LOW_BITS; @@ -272,3 +271,18 @@ void __init MMU_init_hw(void) if ( ppc_md.progress ) ppc_md.progress("hash:done", 0x205); } + +void setup_initial_memory_limit(phys_addr_t first_memblock_base, + phys_addr_t first_memblock_size) +{ + /* We don't currently support the first MEMBLOCK not mapping 0 + * physical on those processors + */ + BUG_ON(first_memblock_base != 0); + + /* 601 can only access 16MB at the moment */ + if (PVR_VER(mfspr(SPRN_PVR)) == 1) + memblock_set_current_limit(min_t(u64, first_memblock_size, 0x01000000)); + else /* Anything else has 256M mapped */ + memblock_set_current_limit(min_t(u64, first_memblock_size, 0x10000000)); +} diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c index fe391e94252..36c0c449a89 100644 --- a/arch/powerpc/mm/tlb_nohash.c +++ b/arch/powerpc/mm/tlb_nohash.c @@ -349,11 +349,47 @@ void tlb_flush_pgtable(struct mmu_gather *tlb, unsigned long address) static void setup_page_sizes(void) { - unsigned int tlb0cfg = mfspr(SPRN_TLB0CFG); - unsigned int tlb0ps = mfspr(SPRN_TLB0PS); - unsigned int eptcfg = mfspr(SPRN_EPTCFG); + unsigned int tlb0cfg; + unsigned int tlb0ps; + unsigned int eptcfg; int i, psize; +#ifdef CONFIG_PPC_FSL_BOOK3E + unsigned int mmucfg = mfspr(SPRN_MMUCFG); + + if (((mmucfg & MMUCFG_MAVN) == MMUCFG_MAVN_V1) && + (mmu_has_feature(MMU_FTR_TYPE_FSL_E))) { + unsigned int tlb1cfg = mfspr(SPRN_TLB1CFG); + unsigned int min_pg, max_pg; + + min_pg = (tlb1cfg & TLBnCFG_MINSIZE) >> TLBnCFG_MINSIZE_SHIFT; + max_pg = (tlb1cfg & TLBnCFG_MAXSIZE) >> TLBnCFG_MAXSIZE_SHIFT; + + for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) { + struct mmu_psize_def *def; + unsigned int shift; + + def = &mmu_psize_defs[psize]; + shift = def->shift; + + if (shift == 0) + continue; + + /* adjust to be in terms of 4^shift Kb */ + shift = (shift - 10) >> 1; + + if ((shift >= min_pg) && (shift <= max_pg)) + def->flags |= MMU_PAGE_SIZE_DIRECT; + } + + goto no_indirect; + } +#endif + + tlb0cfg = mfspr(SPRN_TLB0CFG); + tlb0ps = mfspr(SPRN_TLB0PS); + eptcfg = mfspr(SPRN_EPTCFG); + /* Look for supported direct sizes */ for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) { struct mmu_psize_def *def = &mmu_psize_defs[psize]; @@ -505,10 +541,26 @@ static void __early_init_mmu(int boot_cpu) */ linear_map_top = memblock_end_of_DRAM(); +#ifdef CONFIG_PPC_FSL_BOOK3E + if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) { + unsigned int num_cams; + + /* use a quarter of the TLBCAM for bolted linear map */ + num_cams = (mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY) / 4; + linear_map_top = map_mem_in_cams(linear_map_top, num_cams); + + /* limit memory so we dont have linear faults */ + memblock_enforce_memory_limit(linear_map_top); + memblock_analyze(); + } +#endif + /* A sync won't hurt us after mucking around with * the MMU configuration */ mb(); + + memblock_set_current_limit(linear_map_top); } void __init early_init_mmu(void) @@ -521,4 +573,18 @@ void __cpuinit early_init_mmu_secondary(void) __early_init_mmu(0); } +void setup_initial_memory_limit(phys_addr_t first_memblock_base, + phys_addr_t first_memblock_size) +{ + /* On Embedded 64-bit, we adjust the RMA size to match + * the bolted TLB entry. We know for now that only 1G + * entries are supported though that may eventually + * change. We crop it to the size of the first MEMBLOCK to + * avoid going over total available memory just in case... + */ + ppc64_rma_size = min_t(u64, first_memblock_size, 0x40000000); + + /* Finally limit subsequent allocations */ + memblock_set_current_limit(ppc64_memblock_base + ppc64_rma_size); +} #endif /* CONFIG_PPC64 */ diff --git a/arch/powerpc/mm/tlb_nohash_low.S b/arch/powerpc/mm/tlb_nohash_low.S index b9d9fed8f36..af405eefe48 100644 --- a/arch/powerpc/mm/tlb_nohash_low.S +++ b/arch/powerpc/mm/tlb_nohash_low.S @@ -367,7 +367,7 @@ _GLOBAL(set_context) #error Unsupported processor type ! #endif -#if defined(CONFIG_FSL_BOOKE) +#if defined(CONFIG_PPC_FSL_BOOK3E) /* * extern void loadcam_entry(unsigned int index) * diff --git a/arch/powerpc/oprofile/Makefile b/arch/powerpc/oprofile/Makefile index e219ca43962..73456c4cec2 100644 --- a/arch/powerpc/oprofile/Makefile +++ b/arch/powerpc/oprofile/Makefile @@ -1,8 +1,6 @@ subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror -ifeq ($(CONFIG_PPC64),y) -EXTRA_CFLAGS += -mno-minimal-toc -endif +ccflags-$(CONFIG_PPC64) := -mno-minimal-toc obj-$(CONFIG_OPROFILE) += oprofile.o diff --git a/arch/powerpc/oprofile/backtrace.c b/arch/powerpc/oprofile/backtrace.c index b4278cfd1f8..f75301f2c85 100644 --- a/arch/powerpc/oprofile/backtrace.c +++ b/arch/powerpc/oprofile/backtrace.c @@ -105,7 +105,7 @@ void op_powerpc_backtrace(struct pt_regs * const regs, unsigned int depth) } } else { #ifdef CONFIG_PPC64 - if (!test_thread_flag(TIF_32BIT)) { + if (!is_32bit_task()) { while (depth--) { sp = user_getsp64(sp, first_frame); if (!sp) diff --git a/arch/powerpc/oprofile/op_model_fsl_emb.c b/arch/powerpc/oprofile/op_model_fsl_emb.c index 62312abffa2..d4e6507277b 100644 --- a/arch/powerpc/oprofile/op_model_fsl_emb.c +++ b/arch/powerpc/oprofile/op_model_fsl_emb.c @@ -2,7 +2,7 @@ * Freescale Embedded oprofile support, based on ppc64 oprofile support * Copyright (C) 2004 Anton Blanchard <anton@au.ibm.com>, IBM * - * Copyright (c) 2004 Freescale Semiconductor, Inc + * Copyright (c) 2004, 2010 Freescale Semiconductor, Inc * * Author: Andy Fleming * Maintainer: Kumar Gala <galak@kernel.crashing.org> @@ -321,9 +321,6 @@ static void fsl_emb_handle_interrupt(struct pt_regs *regs, int val; int i; - /* set the PMM bit (see comment below) */ - mtmsr(mfmsr() | MSR_PMM); - pc = regs->nip; is_kernel = is_kernel_addr(pc); @@ -340,9 +337,13 @@ static void fsl_emb_handle_interrupt(struct pt_regs *regs, } /* The freeze bit was set by the interrupt. */ - /* Clear the freeze bit, and reenable the interrupt. - * The counters won't actually start until the rfi clears - * the PMM bit */ + /* Clear the freeze bit, and reenable the interrupt. The + * counters won't actually start until the rfi clears the PMM + * bit. The PMM bit should not be set until after the interrupt + * is cleared to avoid it getting lost in some hypervisor + * environments. + */ + mtmsr(mfmsr() | MSR_PMM); pmc_start_ctrs(1); } diff --git a/arch/powerpc/platforms/44x/Kconfig b/arch/powerpc/platforms/44x/Kconfig index 69d668c072a..0f979c5c756 100644 --- a/arch/powerpc/platforms/44x/Kconfig +++ b/arch/powerpc/platforms/44x/Kconfig @@ -17,6 +17,16 @@ config BAMBOO help This option enables support for the IBM PPC440EP evaluation board. +config BLUESTONE + bool "Bluestone" + depends on 44x + default n + select PPC44x_SIMPLE + select APM821xx + select IBM_NEW_EMAC_RGMII + help + This option enables support for the APM APM821xx Evaluation board. + config EBONY bool "Ebony" depends on 44x @@ -293,6 +303,12 @@ config 460SX select IBM_NEW_EMAC_ZMII select IBM_NEW_EMAC_TAH +config APM821xx + bool + select PPC_FPU + select IBM_NEW_EMAC_EMAC4 + select IBM_NEW_EMAC_TAH + # 44x errata/workaround config symbols, selected by the CPU models above config IBM440EP_ERR42 bool diff --git a/arch/powerpc/platforms/44x/ppc44x_simple.c b/arch/powerpc/platforms/44x/ppc44x_simple.c index 5f7a29d7f59..7ddcba3b939 100644 --- a/arch/powerpc/platforms/44x/ppc44x_simple.c +++ b/arch/powerpc/platforms/44x/ppc44x_simple.c @@ -52,6 +52,7 @@ machine_device_initcall(ppc44x_simple, ppc44x_device_probe); static char *board[] __initdata = { "amcc,arches", "amcc,bamboo", + "amcc,bluestone", "amcc,canyonlands", "amcc,glacier", "ibm,ebony", diff --git a/arch/powerpc/platforms/512x/clock.c b/arch/powerpc/platforms/512x/clock.c index 5b243bd3eb3..3dc2a8d262b 100644 --- a/arch/powerpc/platforms/512x/clock.c +++ b/arch/powerpc/platforms/512x/clock.c @@ -57,7 +57,7 @@ static struct clk *mpc5121_clk_get(struct device *dev, const char *id) int id_match = 0; if (dev == NULL || id == NULL) - return NULL; + return clk; mutex_lock(&clocks_mutex); list_for_each_entry(p, &clocks, node) { diff --git a/arch/powerpc/platforms/52xx/efika.c b/arch/powerpc/platforms/52xx/efika.c index 45c0cb9b67e..18c10482019 100644 --- a/arch/powerpc/platforms/52xx/efika.c +++ b/arch/powerpc/platforms/52xx/efika.c @@ -99,7 +99,7 @@ static void __init efika_pcisetup(void) if (bus_range == NULL || len < 2 * sizeof(int)) { printk(KERN_WARNING EFIKA_PLATFORM_NAME ": Can't get bus-range for %s\n", pcictrl->full_name); - return; + goto out_put; } if (bus_range[1] == bus_range[0]) @@ -111,12 +111,12 @@ static void __init efika_pcisetup(void) printk(" controlled by %s\n", pcictrl->full_name); printk("\n"); - hose = pcibios_alloc_controller(of_node_get(pcictrl)); + hose = pcibios_alloc_controller(pcictrl); if (!hose) { printk(KERN_WARNING EFIKA_PLATFORM_NAME ": Can't allocate PCI controller structure for %s\n", pcictrl->full_name); - return; + goto out_put; } hose->first_busno = bus_range[0]; @@ -124,6 +124,9 @@ static void __init efika_pcisetup(void) hose->ops = &rtas_pci_ops; pci_process_bridge_OF_ranges(hose, pcictrl, 0); + return; +out_put: + of_node_put(pcictrl); } #else diff --git a/arch/powerpc/platforms/52xx/mpc52xx_common.c b/arch/powerpc/platforms/52xx/mpc52xx_common.c index 6e905314ad5..41f3a7eda1d 100644 --- a/arch/powerpc/platforms/52xx/mpc52xx_common.c +++ b/arch/powerpc/platforms/52xx/mpc52xx_common.c @@ -325,12 +325,16 @@ int mpc5200_psc_ac97_gpio_reset(int psc_number) clrbits32(&simple_gpio->simple_dvo, sync | out); clrbits8(&wkup_gpio->wkup_dvo, reset); - /* wait at lease 1 us */ - udelay(2); + /* wait for 1 us */ + udelay(1); /* Deassert reset */ setbits8(&wkup_gpio->wkup_dvo, reset); + /* wait at least 200ns */ + /* 7 ~= (200ns * timebase) / ns2sec */ + __delay(7); + /* Restore pin-muxing */ out_be32(&simple_gpio->port_config, mux); diff --git a/arch/powerpc/platforms/83xx/Kconfig b/arch/powerpc/platforms/83xx/Kconfig index 021763a32c2..73f4135f3a1 100644 --- a/arch/powerpc/platforms/83xx/Kconfig +++ b/arch/powerpc/platforms/83xx/Kconfig @@ -10,12 +10,12 @@ menuconfig PPC_83xx if PPC_83xx config MPC830x_RDB - bool "Freescale MPC830x RDB" + bool "Freescale MPC830x RDB and derivatives" select DEFAULT_UIMAGE select PPC_MPC831x select FSL_GTM help - This option enables support for the MPC8308 RDB board. + This option enables support for the MPC8308 RDB and MPC8308 P1M boards. config MPC831x_RDB bool "Freescale MPC831x RDB" diff --git a/arch/powerpc/platforms/83xx/mpc830x_rdb.c b/arch/powerpc/platforms/83xx/mpc830x_rdb.c index ac102ee9abe..846831d495b 100644 --- a/arch/powerpc/platforms/83xx/mpc830x_rdb.c +++ b/arch/powerpc/platforms/83xx/mpc830x_rdb.c @@ -65,7 +65,8 @@ static int __init mpc830x_rdb_probe(void) unsigned long root = of_get_flat_dt_root(); return of_flat_dt_is_compatible(root, "MPC8308RDB") || - of_flat_dt_is_compatible(root, "fsl,mpc8308rdb"); + of_flat_dt_is_compatible(root, "fsl,mpc8308rdb") || + of_flat_dt_is_compatible(root, "denx,mpc8308_p1m"); } static struct of_device_id __initdata of_bus_ids[] = { diff --git a/arch/powerpc/platforms/85xx/Kconfig b/arch/powerpc/platforms/85xx/Kconfig index bea1f5905ad..b6976e1726e 100644 --- a/arch/powerpc/platforms/85xx/Kconfig +++ b/arch/powerpc/platforms/85xx/Kconfig @@ -11,6 +11,8 @@ menuconfig FSL_SOC_BOOKE if FSL_SOC_BOOKE +if PPC32 + config MPC8540_ADS bool "Freescale MPC8540 ADS" select DEFAULT_UIMAGE @@ -153,10 +155,20 @@ config SBC8560 help This option enables support for the Wind River SBC8560 board +config P3041_DS + bool "Freescale P3041 DS" + select DEFAULT_UIMAGE + select PPC_E500MC + select PHYS_64BIT + select SWIOTLB + select MPC8xxx_GPIO + select HAS_RAPIDIO + help + This option enables support for the P3041 DS board + config P4080_DS bool "Freescale P4080 DS" select DEFAULT_UIMAGE - select PPC_FSL_BOOK3E select PPC_E500MC select PHYS_64BIT select SWIOTLB @@ -165,6 +177,20 @@ config P4080_DS help This option enables support for the P4080 DS board +endif # PPC32 + +config P5020_DS + bool "Freescale P5020 DS" + select DEFAULT_UIMAGE + select E500 + select PPC_E500MC + select PHYS_64BIT + select SWIOTLB + select MPC8xxx_GPIO + select HAS_RAPIDIO + help + This option enables support for the P5020 DS board + endif # FSL_SOC_BOOKE config TQM85xx diff --git a/arch/powerpc/platforms/85xx/Makefile b/arch/powerpc/platforms/85xx/Makefile index a2ec3f8f4d0..dd70db77d63 100644 --- a/arch/powerpc/platforms/85xx/Makefile +++ b/arch/powerpc/platforms/85xx/Makefile @@ -11,7 +11,9 @@ obj-$(CONFIG_MPC85xx_DS) += mpc85xx_ds.o obj-$(CONFIG_MPC85xx_MDS) += mpc85xx_mds.o obj-$(CONFIG_MPC85xx_RDB) += mpc85xx_rdb.o obj-$(CONFIG_P1022_DS) += p1022_ds.o +obj-$(CONFIG_P3041_DS) += p3041_ds.o corenet_ds.o obj-$(CONFIG_P4080_DS) += p4080_ds.o corenet_ds.o +obj-$(CONFIG_P5020_DS) += p5020_ds.o corenet_ds.o obj-$(CONFIG_STX_GP3) += stx_gp3.o obj-$(CONFIG_TQM85xx) += tqm85xx.o obj-$(CONFIG_SBC8560) += sbc8560.o diff --git a/arch/powerpc/platforms/85xx/p1022_ds.c b/arch/powerpc/platforms/85xx/p1022_ds.c index 34e00902ce8..2b390d19a1d 100644 --- a/arch/powerpc/platforms/85xx/p1022_ds.c +++ b/arch/powerpc/platforms/85xx/p1022_ds.c @@ -112,6 +112,8 @@ static struct of_device_id __initdata p1022_ds_ids[] = { { .compatible = "soc", }, { .compatible = "simple-bus", }, { .compatible = "gianfar", }, + /* So that the DMA channel nodes can be probed individually: */ + { .compatible = "fsl,eloplus-dma", }, {}, }; diff --git a/arch/powerpc/platforms/85xx/p3041_ds.c b/arch/powerpc/platforms/85xx/p3041_ds.c new file mode 100644 index 00000000000..0ed52e18298 --- /dev/null +++ b/arch/powerpc/platforms/85xx/p3041_ds.c @@ -0,0 +1,64 @@ +/* + * P3041 DS Setup + * + * Maintained by Kumar Gala (see MAINTAINERS for contact information) + * + * Copyright 2009-2010 Freescale Semiconductor Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#include <linux/kernel.h> +#include <linux/pci.h> +#include <linux/kdev_t.h> +#include <linux/delay.h> +#include <linux/interrupt.h> +#include <linux/phy.h> + +#include <asm/system.h> +#include <asm/time.h> +#include <asm/machdep.h> +#include <asm/pci-bridge.h> +#include <mm/mmu_decl.h> +#include <asm/prom.h> +#include <asm/udbg.h> +#include <asm/mpic.h> + +#include <linux/of_platform.h> +#include <sysdev/fsl_soc.h> +#include <sysdev/fsl_pci.h> + +#include "corenet_ds.h" + +/* + * Called very early, device-tree isn't unflattened + */ +static int __init p3041_ds_probe(void) +{ + unsigned long root = of_get_flat_dt_root(); + + return of_flat_dt_is_compatible(root, "fsl,P3041DS"); +} + +define_machine(p3041_ds) { + .name = "P3041 DS", + .probe = p3041_ds_probe, + .setup_arch = corenet_ds_setup_arch, + .init_IRQ = corenet_ds_pic_init, +#ifdef CONFIG_PCI + .pcibios_fixup_bus = fsl_pcibios_fixup_bus, +#endif + .get_irq = mpic_get_coreint_irq, + .restart = fsl_rstcr_restart, + .calibrate_decr = generic_calibrate_decr, + .progress = udbg_progress, +}; + +machine_device_initcall(p3041_ds, corenet_ds_publish_devices); + +#ifdef CONFIG_SWIOTLB +machine_arch_initcall(p3041_ds, swiotlb_setup_bus_notifier); +#endif diff --git a/arch/powerpc/platforms/85xx/p5020_ds.c b/arch/powerpc/platforms/85xx/p5020_ds.c new file mode 100644 index 00000000000..7467b712ee0 --- /dev/null +++ b/arch/powerpc/platforms/85xx/p5020_ds.c @@ -0,0 +1,69 @@ +/* + * P5020 DS Setup + * + * Maintained by Kumar Gala (see MAINTAINERS for contact information) + * + * Copyright 2009-2010 Freescale Semiconductor Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#include <linux/kernel.h> +#include <linux/pci.h> +#include <linux/kdev_t.h> +#include <linux/delay.h> +#include <linux/interrupt.h> +#include <linux/phy.h> + +#include <asm/system.h> +#include <asm/time.h> +#include <asm/machdep.h> +#include <asm/pci-bridge.h> +#include <mm/mmu_decl.h> +#include <asm/prom.h> +#include <asm/udbg.h> +#include <asm/mpic.h> + +#include <linux/of_platform.h> +#include <sysdev/fsl_soc.h> +#include <sysdev/fsl_pci.h> + +#include "corenet_ds.h" + +/* + * Called very early, device-tree isn't unflattened + */ +static int __init p5020_ds_probe(void) +{ + unsigned long root = of_get_flat_dt_root(); + + return of_flat_dt_is_compatible(root, "fsl,P5020DS"); +} + +define_machine(p5020_ds) { + .name = "P5020 DS", + .probe = p5020_ds_probe, + .setup_arch = corenet_ds_setup_arch, + .init_IRQ = corenet_ds_pic_init, +#ifdef CONFIG_PCI + .pcibios_fixup_bus = fsl_pcibios_fixup_bus, +#endif +/* coreint doesn't play nice with lazy EE, use legacy mpic for now */ +#ifdef CONFIG_PPC64 + .get_irq = mpic_get_irq, +#else + .get_irq = mpic_get_coreint_irq, +#endif + .restart = fsl_rstcr_restart, + .calibrate_decr = generic_calibrate_decr, + .progress = udbg_progress, +}; + +machine_device_initcall(p5020_ds, corenet_ds_publish_devices); + +#ifdef CONFIG_SWIOTLB +machine_arch_initcall(p5020_ds, swiotlb_setup_bus_notifier); +#endif diff --git a/arch/powerpc/platforms/85xx/smp.c b/arch/powerpc/platforms/85xx/smp.c index a6b106557be..5c91a992f02 100644 --- a/arch/powerpc/platforms/85xx/smp.c +++ b/arch/powerpc/platforms/85xx/smp.c @@ -16,6 +16,7 @@ #include <linux/delay.h> #include <linux/of.h> #include <linux/kexec.h> +#include <linux/highmem.h> #include <asm/machdep.h> #include <asm/pgtable.h> @@ -79,6 +80,7 @@ smp_85xx_kick_cpu(int nr) local_irq_save(flags); out_be32(bptr_vaddr + BOOT_ENTRY_PIR, nr); +#ifdef CONFIG_PPC32 out_be32(bptr_vaddr + BOOT_ENTRY_ADDR_LOWER, __pa(__early_start)); if (!ioremappable) @@ -88,6 +90,12 @@ smp_85xx_kick_cpu(int nr) /* Wait a bit for the CPU to ack. */ while ((__secondary_hold_acknowledge != nr) && (++n < 1000)) mdelay(1); +#else + out_be64((u64 *)(bptr_vaddr + BOOT_ENTRY_ADDR_UPPER), + __pa((u64)*((unsigned long long *) generic_secondary_smp_init))); + + smp_generic_kick_cpu(nr); +#endif local_irq_restore(flags); @@ -114,19 +122,15 @@ struct smp_ops_t smp_85xx_ops = { }; #ifdef CONFIG_KEXEC -static int kexec_down_cpus = 0; +atomic_t kexec_down_cpus = ATOMIC_INIT(0); void mpc85xx_smp_kexec_cpu_down(int crash_shutdown, int secondary) { - mpic_teardown_this_cpu(1); - - /* When crashing, this gets called on all CPU's we only - * take down the non-boot cpus */ - if (smp_processor_id() != boot_cpuid) - { - local_irq_disable(); - kexec_down_cpus++; + local_irq_disable(); + if (secondary) { + atomic_inc(&kexec_down_cpus); + /* loop forever */ while (1); } } @@ -137,16 +141,65 @@ static void mpc85xx_smp_kexec_down(void *arg) ppc_md.kexec_cpu_down(0,1); } -static void mpc85xx_smp_machine_kexec(struct kimage *image) +static void map_and_flush(unsigned long paddr) { - int timeout = 2000; + struct page *page = pfn_to_page(paddr >> PAGE_SHIFT); + unsigned long kaddr = (unsigned long)kmap(page); + + flush_dcache_range(kaddr, kaddr + PAGE_SIZE); + kunmap(page); +} + +/** + * Before we reset the other cores, we need to flush relevant cache + * out to memory so we don't get anything corrupted, some of these flushes + * are performed out of an overabundance of caution as interrupts are not + * disabled yet and we can switch cores + */ +static void mpc85xx_smp_flush_dcache_kexec(struct kimage *image) +{ + kimage_entry_t *ptr, entry; + unsigned long paddr; int i; - set_cpus_allowed(current, cpumask_of_cpu(boot_cpuid)); + if (image->type == KEXEC_TYPE_DEFAULT) { + /* normal kexec images are stored in temporary pages */ + for (ptr = &image->head; (entry = *ptr) && !(entry & IND_DONE); + ptr = (entry & IND_INDIRECTION) ? + phys_to_virt(entry & PAGE_MASK) : ptr + 1) { + if (!(entry & IND_DESTINATION)) { + map_and_flush(entry); + } + } + /* flush out last IND_DONE page */ + map_and_flush(entry); + } else { + /* crash type kexec images are copied to the crash region */ + for (i = 0; i < image->nr_segments; i++) { + struct kexec_segment *seg = &image->segment[i]; + for (paddr = seg->mem; paddr < seg->mem + seg->memsz; + paddr += PAGE_SIZE) { + map_and_flush(paddr); + } + } + } + + /* also flush the kimage struct to be passed in as well */ + flush_dcache_range((unsigned long)image, + (unsigned long)image + sizeof(*image)); +} + +static void mpc85xx_smp_machine_kexec(struct kimage *image) +{ + int timeout = INT_MAX; + int i, num_cpus = num_present_cpus(); + + mpc85xx_smp_flush_dcache_kexec(image); - smp_call_function(mpc85xx_smp_kexec_down, NULL, 0); + if (image->type == KEXEC_TYPE_DEFAULT) + smp_call_function(mpc85xx_smp_kexec_down, NULL, 0); - while ( (kexec_down_cpus != (num_online_cpus() - 1)) && + while ( (atomic_read(&kexec_down_cpus) != (num_cpus - 1)) && ( timeout > 0 ) ) { timeout--; @@ -155,7 +208,7 @@ static void mpc85xx_smp_machine_kexec(struct kimage *image) if ( !timeout ) printk(KERN_ERR "Unable to bring down secondary cpu(s)"); - for (i = 0; i < num_present_cpus(); i++) + for (i = 0; i < num_cpus; i++) { if ( i == smp_processor_id() ) continue; mpic_reset_core(i); diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig index 81c9208025f..956154f32cf 100644 --- a/arch/powerpc/platforms/Kconfig +++ b/arch/powerpc/platforms/Kconfig @@ -21,6 +21,16 @@ source "arch/powerpc/platforms/44x/Kconfig" source "arch/powerpc/platforms/40x/Kconfig" source "arch/powerpc/platforms/amigaone/Kconfig" +config KVM_GUEST + bool "KVM Guest support" + default y + ---help--- + This option enables various optimizations for running under the KVM + hypervisor. Overhead for the kernel when not running inside KVM should + be minimal. + + In case of doubt, say Y + config PPC_NATIVE bool depends on 6xx || PPC64 diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index d361f8119b1..111138c55f9 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -125,6 +125,7 @@ config 8xx config E500 select FSL_EMB_PERFMON + select PPC_FSL_BOOK3E bool config PPC_E500MC @@ -166,9 +167,14 @@ config BOOKE config FSL_BOOKE bool - depends on E200 || E500 + depends on (E200 || E500) && PPC32 default y +# this is for common code between PPC32 & PPC64 FSL BOOKE +config PPC_FSL_BOOK3E + bool + select FSL_EMB_PERFMON + default y if FSL_BOOKE config PTE_64BIT bool diff --git a/arch/powerpc/platforms/cell/axon_msi.c b/arch/powerpc/platforms/cell/axon_msi.c index 97085530aa6..e3e379c6caa 100644 --- a/arch/powerpc/platforms/cell/axon_msi.c +++ b/arch/powerpc/platforms/cell/axon_msi.c @@ -310,9 +310,9 @@ static void axon_msi_teardown_msi_irqs(struct pci_dev *dev) } static struct irq_chip msic_irq_chip = { - .mask = mask_msi_irq, - .unmask = unmask_msi_irq, - .shutdown = unmask_msi_irq, + .irq_mask = mask_msi_irq, + .irq_unmask = unmask_msi_irq, + .irq_shutdown = mask_msi_irq, .name = "AXON-MSI", }; diff --git a/arch/powerpc/platforms/cell/ras.c b/arch/powerpc/platforms/cell/ras.c index 1d3c4effea1..5ec1e47a0d7 100644 --- a/arch/powerpc/platforms/cell/ras.c +++ b/arch/powerpc/platforms/cell/ras.c @@ -173,8 +173,10 @@ static int __init cbe_ptcal_enable(void) return -ENODEV; size = of_get_property(np, "ibm,cbe-ptcal-size", NULL); - if (!size) + if (!size) { + of_node_put(np); return -ENODEV; + } pr_debug("%s: enabling PTCAL, size = 0x%x\n", __func__, *size); order = get_order(*size); diff --git a/arch/powerpc/platforms/cell/spider-pic.c b/arch/powerpc/platforms/cell/spider-pic.c index 5876e888e41..3f2e557344a 100644 --- a/arch/powerpc/platforms/cell/spider-pic.c +++ b/arch/powerpc/platforms/cell/spider-pic.c @@ -258,8 +258,10 @@ static unsigned int __init spider_find_cascade_and_node(struct spider_pic *pic) return NO_IRQ; imap += intsize + 1; tmp = of_get_property(iic, "#interrupt-cells", NULL); - if (tmp == NULL) + if (tmp == NULL) { + of_node_put(iic); return NO_IRQ; + } intsize = *tmp; /* Assume unit is last entry of interrupt specifier */ unit = imap[intsize - 1]; diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c index 1a40da92154..02f7b113a31 100644 --- a/arch/powerpc/platforms/cell/spufs/file.c +++ b/arch/powerpc/platforms/cell/spufs/file.c @@ -154,6 +154,7 @@ static const struct file_operations __fops = { \ .release = spufs_attr_release, \ .read = spufs_attr_read, \ .write = spufs_attr_write, \ + .llseek = generic_file_llseek, \ }; @@ -521,6 +522,7 @@ static const struct file_operations spufs_cntl_fops = { .release = spufs_cntl_release, .read = simple_attr_read, .write = simple_attr_write, + .llseek = generic_file_llseek, .mmap = spufs_cntl_mmap, }; @@ -714,6 +716,7 @@ static ssize_t spufs_mbox_read(struct file *file, char __user *buf, static const struct file_operations spufs_mbox_fops = { .open = spufs_pipe_open, .read = spufs_mbox_read, + .llseek = no_llseek, }; static ssize_t spufs_mbox_stat_read(struct file *file, char __user *buf, @@ -743,6 +746,7 @@ static ssize_t spufs_mbox_stat_read(struct file *file, char __user *buf, static const struct file_operations spufs_mbox_stat_fops = { .open = spufs_pipe_open, .read = spufs_mbox_stat_read, + .llseek = no_llseek, }; /* low-level ibox access function */ @@ -863,6 +867,7 @@ static const struct file_operations spufs_ibox_fops = { .read = spufs_ibox_read, .poll = spufs_ibox_poll, .fasync = spufs_ibox_fasync, + .llseek = no_llseek, }; static ssize_t spufs_ibox_stat_read(struct file *file, char __user *buf, @@ -890,6 +895,7 @@ static ssize_t spufs_ibox_stat_read(struct file *file, char __user *buf, static const struct file_operations spufs_ibox_stat_fops = { .open = spufs_pipe_open, .read = spufs_ibox_stat_read, + .llseek = no_llseek, }; /* low-level mailbox write */ @@ -1011,6 +1017,7 @@ static const struct file_operations spufs_wbox_fops = { .write = spufs_wbox_write, .poll = spufs_wbox_poll, .fasync = spufs_wbox_fasync, + .llseek = no_llseek, }; static ssize_t spufs_wbox_stat_read(struct file *file, char __user *buf, @@ -1038,6 +1045,7 @@ static ssize_t spufs_wbox_stat_read(struct file *file, char __user *buf, static const struct file_operations spufs_wbox_stat_fops = { .open = spufs_pipe_open, .read = spufs_wbox_stat_read, + .llseek = no_llseek, }; static int spufs_signal1_open(struct inode *inode, struct file *file) @@ -1166,6 +1174,7 @@ static const struct file_operations spufs_signal1_fops = { .read = spufs_signal1_read, .write = spufs_signal1_write, .mmap = spufs_signal1_mmap, + .llseek = no_llseek, }; static const struct file_operations spufs_signal1_nosched_fops = { @@ -1173,6 +1182,7 @@ static const struct file_operations spufs_signal1_nosched_fops = { .release = spufs_signal1_release, .write = spufs_signal1_write, .mmap = spufs_signal1_mmap, + .llseek = no_llseek, }; static int spufs_signal2_open(struct inode *inode, struct file *file) @@ -1305,6 +1315,7 @@ static const struct file_operations spufs_signal2_fops = { .read = spufs_signal2_read, .write = spufs_signal2_write, .mmap = spufs_signal2_mmap, + .llseek = no_llseek, }; static const struct file_operations spufs_signal2_nosched_fops = { @@ -1312,6 +1323,7 @@ static const struct file_operations spufs_signal2_nosched_fops = { .release = spufs_signal2_release, .write = spufs_signal2_write, .mmap = spufs_signal2_mmap, + .llseek = no_llseek, }; /* @@ -1451,6 +1463,7 @@ static const struct file_operations spufs_mss_fops = { .open = spufs_mss_open, .release = spufs_mss_release, .mmap = spufs_mss_mmap, + .llseek = no_llseek, }; static int @@ -1508,6 +1521,7 @@ static const struct file_operations spufs_psmap_fops = { .open = spufs_psmap_open, .release = spufs_psmap_release, .mmap = spufs_psmap_mmap, + .llseek = no_llseek, }; @@ -1871,6 +1885,7 @@ static const struct file_operations spufs_mfc_fops = { .fsync = spufs_mfc_fsync, .fasync = spufs_mfc_fasync, .mmap = spufs_mfc_mmap, + .llseek = no_llseek, }; static int spufs_npc_set(void *data, u64 val) @@ -2246,6 +2261,7 @@ static ssize_t spufs_dma_info_read(struct file *file, char __user *buf, static const struct file_operations spufs_dma_info_fops = { .open = spufs_info_open, .read = spufs_dma_info_read, + .llseek = no_llseek, }; static ssize_t __spufs_proxydma_info_read(struct spu_context *ctx, @@ -2299,6 +2315,7 @@ static ssize_t spufs_proxydma_info_read(struct file *file, char __user *buf, static const struct file_operations spufs_proxydma_info_fops = { .open = spufs_info_open, .read = spufs_proxydma_info_read, + .llseek = no_llseek, }; static int spufs_show_tid(struct seq_file *s, void *private) @@ -2585,6 +2602,7 @@ static const struct file_operations spufs_switch_log_fops = { .read = spufs_switch_log_read, .poll = spufs_switch_log_poll, .release = spufs_switch_log_release, + .llseek = no_llseek, }; /** diff --git a/arch/powerpc/platforms/chrp/nvram.c b/arch/powerpc/platforms/chrp/nvram.c index ba3588f2d8e..d3ceff04ffc 100644 --- a/arch/powerpc/platforms/chrp/nvram.c +++ b/arch/powerpc/platforms/chrp/nvram.c @@ -74,8 +74,10 @@ void __init chrp_nvram_init(void) return; nbytes_p = of_get_property(nvram, "#bytes", &proplen); - if (nbytes_p == NULL || proplen != sizeof(unsigned int)) + if (nbytes_p == NULL || proplen != sizeof(unsigned int)) { + of_node_put(nvram); return; + } nvram_size = *nbytes_p; diff --git a/arch/powerpc/platforms/embedded6xx/wii.c b/arch/powerpc/platforms/embedded6xx/wii.c index 5cdcc7c8d97..649473a729b 100644 --- a/arch/powerpc/platforms/embedded6xx/wii.c +++ b/arch/powerpc/platforms/embedded6xx/wii.c @@ -65,7 +65,7 @@ static int __init page_aligned(unsigned long x) void __init wii_memory_fixups(void) { - struct memblock_property *p = memblock.memory.region; + struct memblock_region *p = memblock.memory.regions; /* * This is part of a workaround to allow the use of two diff --git a/arch/powerpc/platforms/iseries/Makefile b/arch/powerpc/platforms/iseries/Makefile index ce014928d46..a7602b11ed9 100644 --- a/arch/powerpc/platforms/iseries/Makefile +++ b/arch/powerpc/platforms/iseries/Makefile @@ -1,4 +1,4 @@ -EXTRA_CFLAGS += -mno-minimal-toc +ccflags-y := -mno-minimal-toc obj-y += exception.o obj-y += hvlog.o hvlpconfig.o lpardata.o setup.o dt.o mf.o lpevents.o \ diff --git a/arch/powerpc/platforms/iseries/dt.c b/arch/powerpc/platforms/iseries/dt.c index 7f45a51fe79..fdb7384c0c4 100644 --- a/arch/powerpc/platforms/iseries/dt.c +++ b/arch/powerpc/platforms/iseries/dt.c @@ -243,7 +243,7 @@ static void __init dt_cpus(struct iseries_flat_dt *dt) pft_size[1] = __ilog2(HvCallHpt_getHptPages() * HW_PAGE_SIZE); for (i = 0; i < NR_CPUS; i++) { - if (lppaca[i].dyn_proc_status >= 2) + if (lppaca_of(i).dyn_proc_status >= 2) continue; snprintf(p, 32 - (p - buf), "@%d", i); @@ -251,7 +251,7 @@ static void __init dt_cpus(struct iseries_flat_dt *dt) dt_prop_str(dt, "device_type", device_type_cpu); - index = lppaca[i].dyn_hv_phys_proc_index; + index = lppaca_of(i).dyn_hv_phys_proc_index; d = &xIoHriProcessorVpd[index]; dt_prop_u32(dt, "i-cache-size", d->xInstCacheSize * 1024); diff --git a/arch/powerpc/platforms/iseries/mf.c b/arch/powerpc/platforms/iseries/mf.c index 33e5fc7334f..42d0a886de0 100644 --- a/arch/powerpc/platforms/iseries/mf.c +++ b/arch/powerpc/platforms/iseries/mf.c @@ -1249,6 +1249,7 @@ out: static const struct file_operations proc_vmlinux_operations = { .write = proc_mf_change_vmlinux, + .llseek = default_llseek, }; static int __init mf_proc_init(void) diff --git a/arch/powerpc/platforms/iseries/smp.c b/arch/powerpc/platforms/iseries/smp.c index 6590850045a..6c6029914db 100644 --- a/arch/powerpc/platforms/iseries/smp.c +++ b/arch/powerpc/platforms/iseries/smp.c @@ -91,7 +91,7 @@ static void smp_iSeries_kick_cpu(int nr) BUG_ON((nr < 0) || (nr >= NR_CPUS)); /* Verify that our partition has a processor nr */ - if (lppaca[nr].dyn_proc_status >= 2) + if (lppaca_of(nr).dyn_proc_status >= 2) return; /* The processor is currently spinning, waiting diff --git a/arch/powerpc/platforms/maple/setup.c b/arch/powerpc/platforms/maple/setup.c index 3fff8d979b4..fe34c3d9bb7 100644 --- a/arch/powerpc/platforms/maple/setup.c +++ b/arch/powerpc/platforms/maple/setup.c @@ -358,6 +358,7 @@ static int __init maple_cpc925_edac_setup(void) model = (const unsigned char *)of_get_property(np, "model", NULL); if (!model) { printk(KERN_ERR "%s: Unabel to get model info\n", __func__); + of_node_put(np); return -ENODEV; } diff --git a/arch/powerpc/platforms/powermac/pfunc_core.c b/arch/powerpc/platforms/powermac/pfunc_core.c index cec63594265..b0c3777528a 100644 --- a/arch/powerpc/platforms/powermac/pfunc_core.c +++ b/arch/powerpc/platforms/powermac/pfunc_core.c @@ -837,8 +837,10 @@ struct pmf_function *__pmf_find_function(struct device_node *target, return NULL; find_it: dev = pmf_find_device(actor); - if (dev == NULL) - return NULL; + if (dev == NULL) { + result = NULL; + goto out; + } list_for_each_entry(func, &dev->functions, link) { if (name && strcmp(name, func->name)) @@ -850,8 +852,9 @@ struct pmf_function *__pmf_find_function(struct device_node *target, result = func; break; } - of_node_put(actor); pmf_put_device(dev); +out: + of_node_put(actor); return result; } diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile index 046ace9c438..59eb8bdaa79 100644 --- a/arch/powerpc/platforms/pseries/Makefile +++ b/arch/powerpc/platforms/pseries/Makefile @@ -1,14 +1,9 @@ -ifeq ($(CONFIG_PPC64),y) -EXTRA_CFLAGS += -mno-minimal-toc -endif - -ifeq ($(CONFIG_PPC_PSERIES_DEBUG),y) -EXTRA_CFLAGS += -DDEBUG -endif +ccflags-$(CONFIG_PPC64) := -mno-minimal-toc +ccflags-$(CONFIG_PPC_PSERIES_DEBUG) += -DDEBUG obj-y := lpar.o hvCall.o nvram.o reconfig.o \ setup.o iommu.o event_sources.o ras.o \ - firmware.o power.o dlpar.o + firmware.o power.o dlpar.o mobility.o obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_XICS) += xics.o obj-$(CONFIG_SCANLOG) += scanlog.o @@ -23,7 +18,7 @@ obj-$(CONFIG_MEMORY_HOTPLUG) += hotplug-memory.o obj-$(CONFIG_HVC_CONSOLE) += hvconsole.o obj-$(CONFIG_HVCS) += hvcserver.o obj-$(CONFIG_HCALL_STATS) += hvCall_inst.o -obj-$(CONFIG_PHYP_DUMP) += phyp_dump.o +obj-$(CONFIG_PHYP_DUMP) += phyp_dump.o obj-$(CONFIG_CMM) += cmm.o obj-$(CONFIG_DTL) += dtl.o diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c index 72d8054fa73..b74a9230edc 100644 --- a/arch/powerpc/platforms/pseries/dlpar.c +++ b/arch/powerpc/platforms/pseries/dlpar.c @@ -33,7 +33,7 @@ struct cc_workarea { u32 prop_offset; }; -static void dlpar_free_cc_property(struct property *prop) +void dlpar_free_cc_property(struct property *prop) { kfree(prop->name); kfree(prop->value); @@ -55,13 +55,12 @@ static struct property *dlpar_parse_cc_property(struct cc_workarea *ccwa) prop->length = ccwa->prop_length; value = (char *)ccwa + ccwa->prop_offset; - prop->value = kzalloc(prop->length, GFP_KERNEL); + prop->value = kmemdup(value, prop->length, GFP_KERNEL); if (!prop->value) { dlpar_free_cc_property(prop); return NULL; } - memcpy(prop->value, value, prop->length); return prop; } @@ -102,7 +101,7 @@ static void dlpar_free_one_cc_node(struct device_node *dn) kfree(dn); } -static void dlpar_free_cc_nodes(struct device_node *dn) +void dlpar_free_cc_nodes(struct device_node *dn) { if (dn->child) dlpar_free_cc_nodes(dn->child); diff --git a/arch/powerpc/platforms/pseries/dtl.c b/arch/powerpc/platforms/pseries/dtl.c index a00addb5594..c371bc06434 100644 --- a/arch/powerpc/platforms/pseries/dtl.c +++ b/arch/powerpc/platforms/pseries/dtl.c @@ -23,37 +23,22 @@ #include <linux/init.h> #include <linux/slab.h> #include <linux/debugfs.h> +#include <linux/spinlock.h> #include <asm/smp.h> #include <asm/system.h> #include <asm/uaccess.h> #include <asm/firmware.h> +#include <asm/lppaca.h> #include "plpar_wrappers.h" -/* - * Layout of entries in the hypervisor's DTL buffer. Although we don't - * actually access the internals of an entry (we only need to know the size), - * we might as well define it here for reference. - */ -struct dtl_entry { - u8 dispatch_reason; - u8 preempt_reason; - u16 processor_id; - u32 enqueue_to_dispatch_time; - u32 ready_to_enqueue_time; - u32 waiting_to_ready_time; - u64 timebase; - u64 fault_addr; - u64 srr0; - u64 srr1; -}; - struct dtl { struct dtl_entry *buf; struct dentry *file; int cpu; int buf_entries; u64 last_idx; + spinlock_t lock; }; static DEFINE_PER_CPU(struct dtl, cpu_dtl); @@ -72,25 +57,97 @@ static u8 dtl_event_mask = 0x7; static int dtl_buf_entries = (16 * 85); -static int dtl_enable(struct dtl *dtl) +#ifdef CONFIG_VIRT_CPU_ACCOUNTING +struct dtl_ring { + u64 write_index; + struct dtl_entry *write_ptr; + struct dtl_entry *buf; + struct dtl_entry *buf_end; + u8 saved_dtl_mask; +}; + +static DEFINE_PER_CPU(struct dtl_ring, dtl_rings); + +static atomic_t dtl_count; + +/* + * The cpu accounting code controls the DTL ring buffer, and we get + * given entries as they are processed. + */ +static void consume_dtle(struct dtl_entry *dtle, u64 index) { - unsigned long addr; - int ret, hwcpu; + struct dtl_ring *dtlr = &__get_cpu_var(dtl_rings); + struct dtl_entry *wp = dtlr->write_ptr; + struct lppaca *vpa = local_paca->lppaca_ptr; - /* only allow one reader */ - if (dtl->buf) - return -EBUSY; + if (!wp) + return; - /* we need to store the original allocation size for use during read */ - dtl->buf_entries = dtl_buf_entries; + *wp = *dtle; + barrier(); - dtl->buf = kmalloc_node(dtl->buf_entries * sizeof(struct dtl_entry), - GFP_KERNEL, cpu_to_node(dtl->cpu)); - if (!dtl->buf) { - printk(KERN_WARNING "%s: buffer alloc failed for cpu %d\n", - __func__, dtl->cpu); - return -ENOMEM; - } + /* check for hypervisor ring buffer overflow, ignore this entry if so */ + if (index + N_DISPATCH_LOG < vpa->dtl_idx) + return; + + ++wp; + if (wp == dtlr->buf_end) + wp = dtlr->buf; + dtlr->write_ptr = wp; + + /* incrementing write_index makes the new entry visible */ + smp_wmb(); + ++dtlr->write_index; +} + +static int dtl_start(struct dtl *dtl) +{ + struct dtl_ring *dtlr = &per_cpu(dtl_rings, dtl->cpu); + + dtlr->buf = dtl->buf; + dtlr->buf_end = dtl->buf + dtl->buf_entries; + dtlr->write_index = 0; + + /* setting write_ptr enables logging into our buffer */ + smp_wmb(); + dtlr->write_ptr = dtl->buf; + + /* enable event logging */ + dtlr->saved_dtl_mask = lppaca_of(dtl->cpu).dtl_enable_mask; + lppaca_of(dtl->cpu).dtl_enable_mask |= dtl_event_mask; + + dtl_consumer = consume_dtle; + atomic_inc(&dtl_count); + return 0; +} + +static void dtl_stop(struct dtl *dtl) +{ + struct dtl_ring *dtlr = &per_cpu(dtl_rings, dtl->cpu); + + dtlr->write_ptr = NULL; + smp_wmb(); + + dtlr->buf = NULL; + + /* restore dtl_enable_mask */ + lppaca_of(dtl->cpu).dtl_enable_mask = dtlr->saved_dtl_mask; + + if (atomic_dec_and_test(&dtl_count)) + dtl_consumer = NULL; +} + +static u64 dtl_current_index(struct dtl *dtl) +{ + return per_cpu(dtl_rings, dtl->cpu).write_index; +} + +#else /* CONFIG_VIRT_CPU_ACCOUNTING */ + +static int dtl_start(struct dtl *dtl) +{ + unsigned long addr; + int ret, hwcpu; /* Register our dtl buffer with the hypervisor. The HV expects the * buffer size to be passed in the second word of the buffer */ @@ -102,34 +159,82 @@ static int dtl_enable(struct dtl *dtl) if (ret) { printk(KERN_WARNING "%s: DTL registration for cpu %d (hw %d) " "failed with %d\n", __func__, dtl->cpu, hwcpu, ret); - kfree(dtl->buf); return -EIO; } /* set our initial buffer indices */ - dtl->last_idx = lppaca[dtl->cpu].dtl_idx = 0; + lppaca_of(dtl->cpu).dtl_idx = 0; /* ensure that our updates to the lppaca fields have occurred before * we actually enable the logging */ smp_wmb(); /* enable event logging */ - lppaca[dtl->cpu].dtl_enable_mask = dtl_event_mask; + lppaca_of(dtl->cpu).dtl_enable_mask = dtl_event_mask; return 0; } -static void dtl_disable(struct dtl *dtl) +static void dtl_stop(struct dtl *dtl) { int hwcpu = get_hard_smp_processor_id(dtl->cpu); - lppaca[dtl->cpu].dtl_enable_mask = 0x0; + lppaca_of(dtl->cpu).dtl_enable_mask = 0x0; unregister_dtl(hwcpu, __pa(dtl->buf)); +} + +static u64 dtl_current_index(struct dtl *dtl) +{ + return lppaca_of(dtl->cpu).dtl_idx; +} +#endif /* CONFIG_VIRT_CPU_ACCOUNTING */ + +static int dtl_enable(struct dtl *dtl) +{ + long int n_entries; + long int rc; + struct dtl_entry *buf = NULL; + /* only allow one reader */ + if (dtl->buf) + return -EBUSY; + + n_entries = dtl_buf_entries; + buf = kmalloc_node(n_entries * sizeof(struct dtl_entry), + GFP_KERNEL, cpu_to_node(dtl->cpu)); + if (!buf) { + printk(KERN_WARNING "%s: buffer alloc failed for cpu %d\n", + __func__, dtl->cpu); + return -ENOMEM; + } + + spin_lock(&dtl->lock); + rc = -EBUSY; + if (!dtl->buf) { + /* store the original allocation size for use during read */ + dtl->buf_entries = n_entries; + dtl->buf = buf; + dtl->last_idx = 0; + rc = dtl_start(dtl); + if (rc) + dtl->buf = NULL; + } + spin_unlock(&dtl->lock); + + if (rc) + kfree(buf); + return rc; +} + +static void dtl_disable(struct dtl *dtl) +{ + spin_lock(&dtl->lock); + dtl_stop(dtl); kfree(dtl->buf); dtl->buf = NULL; dtl->buf_entries = 0; + spin_unlock(&dtl->lock); } /* file interface */ @@ -157,8 +262,9 @@ static int dtl_file_release(struct inode *inode, struct file *filp) static ssize_t dtl_file_read(struct file *filp, char __user *buf, size_t len, loff_t *pos) { - int rc, cur_idx, last_idx, n_read, n_req, read_size; + long int rc, n_read, n_req, read_size; struct dtl *dtl; + u64 cur_idx, last_idx, i; if ((len % sizeof(struct dtl_entry)) != 0) return -EINVAL; @@ -171,41 +277,48 @@ static ssize_t dtl_file_read(struct file *filp, char __user *buf, size_t len, /* actual number of entries read */ n_read = 0; - cur_idx = lppaca[dtl->cpu].dtl_idx; + spin_lock(&dtl->lock); + + cur_idx = dtl_current_index(dtl); last_idx = dtl->last_idx; - if (cur_idx - last_idx > dtl->buf_entries) { - pr_debug("%s: hv buffer overflow for cpu %d, samples lost\n", - __func__, dtl->cpu); - } + if (last_idx + dtl->buf_entries <= cur_idx) + last_idx = cur_idx - dtl->buf_entries + 1; + + if (last_idx + n_req > cur_idx) + n_req = cur_idx - last_idx; + + if (n_req > 0) + dtl->last_idx = last_idx + n_req; + + spin_unlock(&dtl->lock); + + if (n_req <= 0) + return 0; - cur_idx %= dtl->buf_entries; - last_idx %= dtl->buf_entries; + i = last_idx % dtl->buf_entries; /* read the tail of the buffer if we've wrapped */ - if (last_idx > cur_idx) { - read_size = min(n_req, dtl->buf_entries - last_idx); + if (i + n_req > dtl->buf_entries) { + read_size = dtl->buf_entries - i; - rc = copy_to_user(buf, &dtl->buf[last_idx], + rc = copy_to_user(buf, &dtl->buf[i], read_size * sizeof(struct dtl_entry)); if (rc) return -EFAULT; - last_idx = 0; + i = 0; n_req -= read_size; n_read += read_size; buf += read_size * sizeof(struct dtl_entry); } /* .. and now the head */ - read_size = min(n_req, cur_idx - last_idx); - rc = copy_to_user(buf, &dtl->buf[last_idx], - read_size * sizeof(struct dtl_entry)); + rc = copy_to_user(buf, &dtl->buf[i], n_req * sizeof(struct dtl_entry)); if (rc) return -EFAULT; - n_read += read_size; - dtl->last_idx += n_read; + n_read += n_req; return n_read * sizeof(struct dtl_entry); } @@ -263,6 +376,7 @@ static int dtl_init(void) /* set up the per-cpu log structures */ for_each_possible_cpu(i) { struct dtl *dtl = &per_cpu(cpu_dtl, i); + spin_lock_init(&dtl->lock); dtl->cpu = i; rc = dtl_setup_file(dtl); diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index cf79b46d8f8..f129040d974 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -248,11 +248,13 @@ void vpa_init(int cpu) int hwcpu = get_hard_smp_processor_id(cpu); unsigned long addr; long ret; + struct paca_struct *pp; + struct dtl_entry *dtl; if (cpu_has_feature(CPU_FTR_ALTIVEC)) - lppaca[cpu].vmxregs_in_use = 1; + lppaca_of(cpu).vmxregs_in_use = 1; - addr = __pa(&lppaca[cpu]); + addr = __pa(&lppaca_of(cpu)); ret = register_vpa(hwcpu, addr); if (ret) { @@ -274,6 +276,25 @@ void vpa_init(int cpu) "registration for cpu %d (hw %d) of area %lx " "returns %ld\n", cpu, hwcpu, addr, ret); } + + /* + * Register dispatch trace log, if one has been allocated. + */ + pp = &paca[cpu]; + dtl = pp->dispatch_log; + if (dtl) { + pp->dtl_ridx = 0; + pp->dtl_curr = dtl; + lppaca_of(cpu).dtl_idx = 0; + + /* hypervisor reads buffer length from this field */ + dtl->enqueue_to_dispatch_time = DISPATCH_LOG_BYTES; + ret = register_dtl(hwcpu, __pa(dtl)); + if (ret) + pr_warn("DTL registration failed for cpu %d (%ld)\n", + cpu, ret); + lppaca_of(cpu).dtl_enable_mask = 2; + } } static long pSeries_lpar_hpte_insert(unsigned long hpte_group, diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c new file mode 100644 index 00000000000..3e7f651e50a --- /dev/null +++ b/arch/powerpc/platforms/pseries/mobility.c @@ -0,0 +1,362 @@ +/* + * Support for Partition Mobility/Migration + * + * Copyright (C) 2010 Nathan Fontenot + * Copyright (C) 2010 IBM Corporation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation. + */ + +#include <linux/kernel.h> +#include <linux/kobject.h> +#include <linux/smp.h> +#include <linux/completion.h> +#include <linux/device.h> +#include <linux/delay.h> +#include <linux/slab.h> + +#include <asm/rtas.h> +#include "pseries.h" + +static struct kobject *mobility_kobj; + +struct update_props_workarea { + u32 phandle; + u32 state; + u64 reserved; + u32 nprops; +}; + +#define NODE_ACTION_MASK 0xff000000 +#define NODE_COUNT_MASK 0x00ffffff + +#define DELETE_DT_NODE 0x01000000 +#define UPDATE_DT_NODE 0x02000000 +#define ADD_DT_NODE 0x03000000 + +static int mobility_rtas_call(int token, char *buf) +{ + int rc; + + spin_lock(&rtas_data_buf_lock); + + memcpy(rtas_data_buf, buf, RTAS_DATA_BUF_SIZE); + rc = rtas_call(token, 2, 1, NULL, rtas_data_buf, 1); + memcpy(buf, rtas_data_buf, RTAS_DATA_BUF_SIZE); + + spin_unlock(&rtas_data_buf_lock); + return rc; +} + +static int delete_dt_node(u32 phandle) +{ + struct device_node *dn; + + dn = of_find_node_by_phandle(phandle); + if (!dn) + return -ENOENT; + + dlpar_detach_node(dn); + return 0; +} + +static int update_dt_property(struct device_node *dn, struct property **prop, + const char *name, u32 vd, char *value) +{ + struct property *new_prop = *prop; + struct property *old_prop; + int more = 0; + + /* A negative 'vd' value indicates that only part of the new property + * value is contained in the buffer and we need to call + * ibm,update-properties again to get the rest of the value. + * + * A negative value is also the two's compliment of the actual value. + */ + if (vd & 0x80000000) { + vd = ~vd + 1; + more = 1; + } + + if (new_prop) { + /* partial property fixup */ + char *new_data = kzalloc(new_prop->length + vd, GFP_KERNEL); + if (!new_data) + return -ENOMEM; + + memcpy(new_data, new_prop->value, new_prop->length); + memcpy(new_data + new_prop->length, value, vd); + + kfree(new_prop->value); + new_prop->value = new_data; + new_prop->length += vd; + } else { + new_prop = kzalloc(sizeof(*new_prop), GFP_KERNEL); + if (!new_prop) + return -ENOMEM; + + new_prop->name = kstrdup(name, GFP_KERNEL); + if (!new_prop->name) { + kfree(new_prop); + return -ENOMEM; + } + + new_prop->length = vd; + new_prop->value = kzalloc(new_prop->length, GFP_KERNEL); + if (!new_prop->value) { + kfree(new_prop->name); + kfree(new_prop); + return -ENOMEM; + } + + memcpy(new_prop->value, value, vd); + *prop = new_prop; + } + + if (!more) { + old_prop = of_find_property(dn, new_prop->name, NULL); + if (old_prop) + prom_update_property(dn, new_prop, old_prop); + else + prom_add_property(dn, new_prop); + + new_prop = NULL; + } + + return 0; +} + +static int update_dt_node(u32 phandle) +{ + struct update_props_workarea *upwa; + struct device_node *dn; + struct property *prop = NULL; + int i, rc; + char *prop_data; + char *rtas_buf; + int update_properties_token; + + update_properties_token = rtas_token("ibm,update-properties"); + if (update_properties_token == RTAS_UNKNOWN_SERVICE) + return -EINVAL; + + rtas_buf = kzalloc(RTAS_DATA_BUF_SIZE, GFP_KERNEL); + if (!rtas_buf) + return -ENOMEM; + + dn = of_find_node_by_phandle(phandle); + if (!dn) { + kfree(rtas_buf); + return -ENOENT; + } + + upwa = (struct update_props_workarea *)&rtas_buf[0]; + upwa->phandle = phandle; + + do { + rc = mobility_rtas_call(update_properties_token, rtas_buf); + if (rc < 0) + break; + + prop_data = rtas_buf + sizeof(*upwa); + + for (i = 0; i < upwa->nprops; i++) { + char *prop_name; + u32 vd; + + prop_name = prop_data + 1; + prop_data += strlen(prop_name) + 1; + vd = *prop_data++; + + switch (vd) { + case 0x00000000: + /* name only property, nothing to do */ + break; + + case 0x80000000: + prop = of_find_property(dn, prop_name, NULL); + prom_remove_property(dn, prop); + prop = NULL; + break; + + default: + rc = update_dt_property(dn, &prop, prop_name, + vd, prop_data); + if (rc) { + printk(KERN_ERR "Could not update %s" + " property\n", prop_name); + } + + prop_data += vd; + } + } + } while (rc == 1); + + of_node_put(dn); + kfree(rtas_buf); + return 0; +} + +static int add_dt_node(u32 parent_phandle, u32 drc_index) +{ + struct device_node *dn; + struct device_node *parent_dn; + int rc; + + dn = dlpar_configure_connector(drc_index); + if (!dn) + return -ENOENT; + + parent_dn = of_find_node_by_phandle(parent_phandle); + if (!parent_dn) { + dlpar_free_cc_nodes(dn); + return -ENOENT; + } + + dn->parent = parent_dn; + rc = dlpar_attach_node(dn); + if (rc) + dlpar_free_cc_nodes(dn); + + of_node_put(parent_dn); + return rc; +} + +static int pseries_devicetree_update(void) +{ + char *rtas_buf; + u32 *data; + int update_nodes_token; + int rc; + + update_nodes_token = rtas_token("ibm,update-nodes"); + if (update_nodes_token == RTAS_UNKNOWN_SERVICE) + return -EINVAL; + + rtas_buf = kzalloc(RTAS_DATA_BUF_SIZE, GFP_KERNEL); + if (!rtas_buf) + return -ENOMEM; + + do { + rc = mobility_rtas_call(update_nodes_token, rtas_buf); + if (rc && rc != 1) + break; + + data = (u32 *)rtas_buf + 4; + while (*data & NODE_ACTION_MASK) { + int i; + u32 action = *data & NODE_ACTION_MASK; + int node_count = *data & NODE_COUNT_MASK; + + data++; + + for (i = 0; i < node_count; i++) { + u32 phandle = *data++; + u32 drc_index; + + switch (action) { + case DELETE_DT_NODE: + delete_dt_node(phandle); + break; + case UPDATE_DT_NODE: + update_dt_node(phandle); + break; + case ADD_DT_NODE: + drc_index = *data++; + add_dt_node(phandle, drc_index); + break; + } + } + } + } while (rc == 1); + + kfree(rtas_buf); + return rc; +} + +void post_mobility_fixup(void) +{ + int rc; + int activate_fw_token; + + rc = pseries_devicetree_update(); + if (rc) { + printk(KERN_ERR "Initial post-mobility device tree update " + "failed: %d\n", rc); + return; + } + + activate_fw_token = rtas_token("ibm,activate-firmware"); + if (activate_fw_token == RTAS_UNKNOWN_SERVICE) { + printk(KERN_ERR "Could not make post-mobility " + "activate-fw call.\n"); + return; + } + + rc = rtas_call(activate_fw_token, 0, 1, NULL); + if (!rc) { + rc = pseries_devicetree_update(); + if (rc) + printk(KERN_ERR "Secondary post-mobility device tree " + "update failed: %d\n", rc); + } else { + printk(KERN_ERR "Post-mobility activate-fw failed: %d\n", rc); + return; + } + + return; +} + +static ssize_t migrate_store(struct class *class, struct class_attribute *attr, + const char *buf, size_t count) +{ + struct rtas_args args; + u64 streamid; + int rc; + + rc = strict_strtoull(buf, 0, &streamid); + if (rc) + return rc; + + memset(&args, 0, sizeof(args)); + args.token = rtas_token("ibm,suspend-me"); + args.nargs = 2; + args.nret = 1; + + args.args[0] = streamid >> 32 ; + args.args[1] = streamid & 0xffffffff; + args.rets = &args.args[args.nargs]; + + do { + args.rets[0] = 0; + rc = rtas_ibm_suspend_me(&args); + if (!rc && args.rets[0] == RTAS_NOT_SUSPENDABLE) + ssleep(1); + } while (!rc && args.rets[0] == RTAS_NOT_SUSPENDABLE); + + if (rc) + return rc; + else if (args.rets[0]) + return args.rets[0]; + + post_mobility_fixup(); + return count; +} + +static CLASS_ATTR(migration, S_IWUSR, NULL, migrate_store); + +static int __init mobility_sysfs_init(void) +{ + int rc; + + mobility_kobj = kobject_create_and_add("mobility", kernel_kobj); + if (!mobility_kobj) + return -ENOMEM; + + rc = sysfs_create_file(mobility_kobj, &class_attr_migration.attr); + + return rc; +} +device_initcall(mobility_sysfs_init); diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h index 40c93cad91d..e9f6d2859c3 100644 --- a/arch/powerpc/platforms/pseries/pseries.h +++ b/arch/powerpc/platforms/pseries/pseries.h @@ -17,6 +17,8 @@ struct device_node; extern void request_event_sources_irqs(struct device_node *np, irq_handler_t handler, const char *name); +#include <linux/of.h> + extern void __init fw_feature_init(const char *hypertas, unsigned long len); struct pt_regs; @@ -47,4 +49,11 @@ extern unsigned long rtas_poweron_auto; extern void find_udbg_vterm(void); +/* Dynamic logical Partitioning/Mobility */ +extern void dlpar_free_cc_nodes(struct device_node *); +extern void dlpar_free_cc_property(struct property *); +extern struct device_node *dlpar_configure_connector(u32); +extern int dlpar_attach_node(struct device_node *); +extern int dlpar_detach_node(struct device_node *); + #endif /* _PSERIES_PSERIES_H */ diff --git a/arch/powerpc/platforms/pseries/reconfig.c b/arch/powerpc/platforms/pseries/reconfig.c index 57ddbb43b33..1de2cbb9230 100644 --- a/arch/powerpc/platforms/pseries/reconfig.c +++ b/arch/powerpc/platforms/pseries/reconfig.c @@ -539,7 +539,8 @@ out: } static const struct file_operations ofdt_fops = { - .write = ofdt_write + .write = ofdt_write, + .llseek = noop_llseek, }; /* create /proc/powerpc/ofdt write-only by root */ diff --git a/arch/powerpc/platforms/pseries/scanlog.c b/arch/powerpc/platforms/pseries/scanlog.c index 80e9e7652a4..554457294a2 100644 --- a/arch/powerpc/platforms/pseries/scanlog.c +++ b/arch/powerpc/platforms/pseries/scanlog.c @@ -170,6 +170,7 @@ const struct file_operations scanlog_fops = { .write = scanlog_write, .open = scanlog_open, .release = scanlog_release, + .llseek = noop_llseek, }; static int __init scanlog_init(void) diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index a6d19e3a505..d345bfd56bb 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -273,6 +273,58 @@ static struct notifier_block pci_dn_reconfig_nb = { .notifier_call = pci_dn_reconfig_notifier, }; +#ifdef CONFIG_VIRT_CPU_ACCOUNTING +/* + * Allocate space for the dispatch trace log for all possible cpus + * and register the buffers with the hypervisor. This is used for + * computing time stolen by the hypervisor. + */ +static int alloc_dispatch_logs(void) +{ + int cpu, ret; + struct paca_struct *pp; + struct dtl_entry *dtl; + + if (!firmware_has_feature(FW_FEATURE_SPLPAR)) + return 0; + + for_each_possible_cpu(cpu) { + pp = &paca[cpu]; + dtl = kmalloc_node(DISPATCH_LOG_BYTES, GFP_KERNEL, + cpu_to_node(cpu)); + if (!dtl) { + pr_warn("Failed to allocate dispatch trace log for cpu %d\n", + cpu); + pr_warn("Stolen time statistics will be unreliable\n"); + break; + } + + pp->dtl_ridx = 0; + pp->dispatch_log = dtl; + pp->dispatch_log_end = dtl + N_DISPATCH_LOG; + pp->dtl_curr = dtl; + } + + /* Register the DTL for the current (boot) cpu */ + dtl = get_paca()->dispatch_log; + get_paca()->dtl_ridx = 0; + get_paca()->dtl_curr = dtl; + get_paca()->lppaca_ptr->dtl_idx = 0; + + /* hypervisor reads buffer length from this field */ + dtl->enqueue_to_dispatch_time = DISPATCH_LOG_BYTES; + ret = register_dtl(hard_smp_processor_id(), __pa(dtl)); + if (ret) + pr_warn("DTL registration failed for boot cpu %d (%d)\n", + smp_processor_id(), ret); + get_paca()->lppaca_ptr->dtl_enable_mask = 2; + + return 0; +} + +early_initcall(alloc_dispatch_logs); +#endif /* CONFIG_VIRT_CPU_ACCOUNTING */ + static void __init pSeries_setup_arch(void) { /* Discover PIC type and setup ppc_md accordingly */ diff --git a/arch/powerpc/platforms/pseries/xics.c b/arch/powerpc/platforms/pseries/xics.c index 93834b0d827..7b96e5a270c 100644 --- a/arch/powerpc/platforms/pseries/xics.c +++ b/arch/powerpc/platforms/pseries/xics.c @@ -178,7 +178,7 @@ static int get_irq_server(unsigned int virq, const struct cpumask *cpumask, if (!distribute_irqs) return default_server; - if (!cpumask_equal(cpumask, cpu_all_mask)) { + if (!cpumask_subset(cpu_possible_mask, cpumask)) { int server = cpumask_first_and(cpu_online_mask, cpumask); if (server < nr_cpu_ids) @@ -243,7 +243,7 @@ static unsigned int xics_startup(unsigned int virq) * at that level, so we do it here by hand. */ if (irq_to_desc(virq)->msi_desc) - unmask_msi_irq(virq); + unmask_msi_irq(irq_get_irq_data(virq)); /* unmask it */ xics_unmask_irq(virq); diff --git a/arch/powerpc/sysdev/Makefile b/arch/powerpc/sysdev/Makefile index 5642924fb9f..0bef9dacb64 100644 --- a/arch/powerpc/sysdev/Makefile +++ b/arch/powerpc/sysdev/Makefile @@ -1,8 +1,6 @@ subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror -ifeq ($(CONFIG_PPC64),y) -EXTRA_CFLAGS += -mno-minimal-toc -endif +ccflags-$(CONFIG_PPC64) := -mno-minimal-toc mpic-msi-obj-$(CONFIG_PCI_MSI) += mpic_msi.o mpic_u3msi.o mpic_pasemi_msi.o obj-$(CONFIG_MPIC) += mpic.o $(mpic-msi-obj-y) @@ -20,6 +18,7 @@ obj-$(CONFIG_FSL_PMC) += fsl_pmc.o obj-$(CONFIG_FSL_LBC) += fsl_lbc.o obj-$(CONFIG_FSL_GTM) += fsl_gtm.o obj-$(CONFIG_MPC8xxx_GPIO) += mpc8xxx_gpio.o +obj-$(CONFIG_FSL_85XX_CACHE_SRAM) += fsl_85xx_l2ctlr.o fsl_85xx_cache_sram.o obj-$(CONFIG_SIMPLE_GPIO) += simple_gpio.o obj-$(CONFIG_RAPIDIO) += fsl_rio.o obj-$(CONFIG_TSI108_BRIDGE) += tsi108_pci.o tsi108_dev.o diff --git a/arch/powerpc/sysdev/dart_iommu.c b/arch/powerpc/sysdev/dart_iommu.c index 559db2b846a..17cf15ec38b 100644 --- a/arch/powerpc/sysdev/dart_iommu.c +++ b/arch/powerpc/sysdev/dart_iommu.c @@ -70,6 +70,8 @@ static int iommu_table_dart_inited; static int dart_dirty; static int dart_is_u4; +#define DART_U4_BYPASS_BASE 0x8000000000ull + #define DBG(...) static inline void dart_tlb_invalidate_all(void) @@ -292,12 +294,20 @@ static void iommu_table_dart_setup(void) set_bit(iommu_table_dart.it_size - 1, iommu_table_dart.it_map); } -static void pci_dma_dev_setup_dart(struct pci_dev *dev) +static void dma_dev_setup_dart(struct device *dev) { /* We only have one iommu table on the mac for now, which makes * things simple. Setup all PCI devices to point to this table */ - set_iommu_table_base(&dev->dev, &iommu_table_dart); + if (get_dma_ops(dev) == &dma_direct_ops) + set_dma_offset(dev, DART_U4_BYPASS_BASE); + else + set_iommu_table_base(dev, &iommu_table_dart); +} + +static void pci_dma_dev_setup_dart(struct pci_dev *dev) +{ + dma_dev_setup_dart(&dev->dev); } static void pci_dma_bus_setup_dart(struct pci_bus *bus) @@ -315,6 +325,45 @@ static void pci_dma_bus_setup_dart(struct pci_bus *bus) PCI_DN(dn)->iommu_table = &iommu_table_dart; } +static bool dart_device_on_pcie(struct device *dev) +{ + struct device_node *np = of_node_get(dev->of_node); + + while(np) { + if (of_device_is_compatible(np, "U4-pcie") || + of_device_is_compatible(np, "u4-pcie")) { + of_node_put(np); + return true; + } + np = of_get_next_parent(np); + } + return false; +} + +static int dart_dma_set_mask(struct device *dev, u64 dma_mask) +{ + if (!dev->dma_mask || !dma_supported(dev, dma_mask)) + return -EIO; + + /* U4 supports a DART bypass, we use it for 64-bit capable + * devices to improve performances. However, that only works + * for devices connected to U4 own PCIe interface, not bridged + * through hypertransport. We need the device to support at + * least 40 bits of addresses. + */ + if (dart_device_on_pcie(dev) && dma_mask >= DMA_BIT_MASK(40)) { + dev_info(dev, "Using 64-bit DMA iommu bypass\n"); + set_dma_ops(dev, &dma_direct_ops); + } else { + dev_info(dev, "Using 32-bit DMA via iommu\n"); + set_dma_ops(dev, &dma_iommu_ops); + } + dma_dev_setup_dart(dev); + + *dev->dma_mask = dma_mask; + return 0; +} + void __init iommu_init_early_dart(void) { struct device_node *dn; @@ -328,20 +377,25 @@ void __init iommu_init_early_dart(void) dart_is_u4 = 1; } + /* Initialize the DART HW */ + if (dart_init(dn) != 0) + goto bail; + /* Setup low level TCE operations for the core IOMMU code */ ppc_md.tce_build = dart_build; ppc_md.tce_free = dart_free; ppc_md.tce_flush = dart_flush; - /* Initialize the DART HW */ - if (dart_init(dn) == 0) { - ppc_md.pci_dma_dev_setup = pci_dma_dev_setup_dart; - ppc_md.pci_dma_bus_setup = pci_dma_bus_setup_dart; + /* Setup bypass if supported */ + if (dart_is_u4) + ppc_md.dma_set_mask = dart_dma_set_mask; - /* Setup pci_dma ops */ - set_pci_dma_ops(&dma_iommu_ops); - return; - } + ppc_md.pci_dma_dev_setup = pci_dma_dev_setup_dart; + ppc_md.pci_dma_bus_setup = pci_dma_bus_setup_dart; + + /* Setup pci_dma ops */ + set_pci_dma_ops(&dma_iommu_ops); + return; bail: /* If init failed, use direct iommu and null setup functions */ diff --git a/arch/powerpc/sysdev/fsl_85xx_cache_ctlr.h b/arch/powerpc/sysdev/fsl_85xx_cache_ctlr.h new file mode 100644 index 00000000000..60c9c0bd5ba --- /dev/null +++ b/arch/powerpc/sysdev/fsl_85xx_cache_ctlr.h @@ -0,0 +1,101 @@ +/* + * Copyright 2009-2010 Freescale Semiconductor, Inc + * + * QorIQ based Cache Controller Memory Mapped Registers + * + * Author: Vivek Mahajan <vivek.mahajan@freescale.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef __FSL_85XX_CACHE_CTLR_H__ +#define __FSL_85XX_CACHE_CTLR_H__ + +#define L2CR_L2FI 0x40000000 /* L2 flash invalidate */ +#define L2CR_L2IO 0x00200000 /* L2 instruction only */ +#define L2CR_SRAM_ZERO 0x00000000 /* L2SRAM zero size */ +#define L2CR_SRAM_FULL 0x00010000 /* L2SRAM full size */ +#define L2CR_SRAM_HALF 0x00020000 /* L2SRAM half size */ +#define L2CR_SRAM_TWO_HALFS 0x00030000 /* L2SRAM two half sizes */ +#define L2CR_SRAM_QUART 0x00040000 /* L2SRAM one quarter size */ +#define L2CR_SRAM_TWO_QUARTS 0x00050000 /* L2SRAM two quarter size */ +#define L2CR_SRAM_EIGHTH 0x00060000 /* L2SRAM one eighth size */ +#define L2CR_SRAM_TWO_EIGHTH 0x00070000 /* L2SRAM two eighth size */ + +#define L2SRAM_OPTIMAL_SZ_SHIFT 0x00000003 /* Optimum size for L2SRAM */ + +#define L2SRAM_BAR_MSK_LO18 0xFFFFC000 /* Lower 18 bits */ +#define L2SRAM_BARE_MSK_HI4 0x0000000F /* Upper 4 bits */ + +enum cache_sram_lock_ways { + LOCK_WAYS_ZERO, + LOCK_WAYS_EIGHTH, + LOCK_WAYS_TWO_EIGHTH, + LOCK_WAYS_HALF = 4, + LOCK_WAYS_FULL = 8, +}; + +struct mpc85xx_l2ctlr { + u32 ctl; /* 0x000 - L2 control */ + u8 res1[0xC]; + u32 ewar0; /* 0x010 - External write address 0 */ + u32 ewarea0; /* 0x014 - External write address extended 0 */ + u32 ewcr0; /* 0x018 - External write ctrl */ + u8 res2[4]; + u32 ewar1; /* 0x020 - External write address 1 */ + u32 ewarea1; /* 0x024 - External write address extended 1 */ + u32 ewcr1; /* 0x028 - External write ctrl 1 */ + u8 res3[4]; + u32 ewar2; /* 0x030 - External write address 2 */ + u32 ewarea2; /* 0x034 - External write address extended 2 */ + u32 ewcr2; /* 0x038 - External write ctrl 2 */ + u8 res4[4]; + u32 ewar3; /* 0x040 - External write address 3 */ + u32 ewarea3; /* 0x044 - External write address extended 3 */ + u32 ewcr3; /* 0x048 - External write ctrl 3 */ + u8 res5[0xB4]; + u32 srbar0; /* 0x100 - SRAM base address 0 */ + u32 srbarea0; /* 0x104 - SRAM base addr reg ext address 0 */ + u32 srbar1; /* 0x108 - SRAM base address 1 */ + u32 srbarea1; /* 0x10C - SRAM base addr reg ext address 1 */ + u8 res6[0xCF0]; + u32 errinjhi; /* 0xE00 - Error injection mask high */ + u32 errinjlo; /* 0xE04 - Error injection mask low */ + u32 errinjctl; /* 0xE08 - Error injection tag/ecc control */ + u8 res7[0x14]; + u32 captdatahi; /* 0xE20 - Error data high capture */ + u32 captdatalo; /* 0xE24 - Error data low capture */ + u32 captecc; /* 0xE28 - Error syndrome */ + u8 res8[0x14]; + u32 errdet; /* 0xE40 - Error detect */ + u32 errdis; /* 0xE44 - Error disable */ + u32 errinten; /* 0xE48 - Error interrupt enable */ + u32 errattr; /* 0xE4c - Error attribute capture */ + u32 erradrrl; /* 0xE50 - Error address capture low */ + u32 erradrrh; /* 0xE54 - Error address capture high */ + u32 errctl; /* 0xE58 - Error control */ + u8 res9[0x1A4]; +}; + +struct sram_parameters { + unsigned int sram_size; + uint64_t sram_offset; +}; + +extern int instantiate_cache_sram(struct platform_device *dev, + struct sram_parameters sram_params); +extern void remove_cache_sram(struct platform_device *dev); + +#endif /* __FSL_85XX_CACHE_CTLR_H__ */ diff --git a/arch/powerpc/sysdev/fsl_85xx_cache_sram.c b/arch/powerpc/sysdev/fsl_85xx_cache_sram.c new file mode 100644 index 00000000000..54fb1922fe3 --- /dev/null +++ b/arch/powerpc/sysdev/fsl_85xx_cache_sram.c @@ -0,0 +1,159 @@ +/* + * Copyright 2009-2010 Freescale Semiconductor, Inc. + * + * Simple memory allocator abstraction for QorIQ (P1/P2) based Cache-SRAM + * + * Author: Vivek Mahajan <vivek.mahajan@freescale.com> + * + * This file is derived from the original work done + * by Sylvain Munaut for the Bestcomm SRAM allocator. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/err.h> +#include <linux/of_platform.h> +#include <asm/pgtable.h> +#include <asm/fsl_85xx_cache_sram.h> + +#include "fsl_85xx_cache_ctlr.h" + +struct mpc85xx_cache_sram *cache_sram; + +void *mpc85xx_cache_sram_alloc(unsigned int size, + phys_addr_t *phys, unsigned int align) +{ + unsigned long offset; + unsigned long flags; + + if (unlikely(cache_sram == NULL)) + return NULL; + + if (!size || (size > cache_sram->size) || (align > cache_sram->size)) { + pr_err("%s(): size(=%x) or align(=%x) zero or too big\n", + __func__, size, align); + return NULL; + } + + if ((align & (align - 1)) || align <= 1) { + pr_err("%s(): align(=%x) must be power of two and >1\n", + __func__, align); + return NULL; + } + + spin_lock_irqsave(&cache_sram->lock, flags); + offset = rh_alloc_align(cache_sram->rh, size, align, NULL); + spin_unlock_irqrestore(&cache_sram->lock, flags); + + if (IS_ERR_VALUE(offset)) + return NULL; + + *phys = cache_sram->base_phys + offset; + + return (unsigned char *)cache_sram->base_virt + offset; +} +EXPORT_SYMBOL(mpc85xx_cache_sram_alloc); + +void mpc85xx_cache_sram_free(void *ptr) +{ + unsigned long flags; + BUG_ON(!ptr); + + spin_lock_irqsave(&cache_sram->lock, flags); + rh_free(cache_sram->rh, ptr - cache_sram->base_virt); + spin_unlock_irqrestore(&cache_sram->lock, flags); +} +EXPORT_SYMBOL(mpc85xx_cache_sram_free); + +int __init instantiate_cache_sram(struct platform_device *dev, + struct sram_parameters sram_params) +{ + int ret = 0; + + if (cache_sram) { + dev_err(&dev->dev, "Already initialized cache-sram\n"); + return -EBUSY; + } + + cache_sram = kzalloc(sizeof(struct mpc85xx_cache_sram), GFP_KERNEL); + if (!cache_sram) { + dev_err(&dev->dev, "Out of memory for cache_sram structure\n"); + return -ENOMEM; + } + + cache_sram->base_phys = sram_params.sram_offset; + cache_sram->size = sram_params.sram_size; + + if (!request_mem_region(cache_sram->base_phys, cache_sram->size, + "fsl_85xx_cache_sram")) { + dev_err(&dev->dev, "%s: request memory failed\n", + dev->dev.of_node->full_name); + ret = -ENXIO; + goto out_free; + } + + cache_sram->base_virt = ioremap_flags(cache_sram->base_phys, + cache_sram->size, _PAGE_COHERENT | PAGE_KERNEL); + if (!cache_sram->base_virt) { + dev_err(&dev->dev, "%s: ioremap_flags failed\n", + dev->dev.of_node->full_name); + ret = -ENOMEM; + goto out_release; + } + + cache_sram->rh = rh_create(sizeof(unsigned int)); + if (IS_ERR(cache_sram->rh)) { + dev_err(&dev->dev, "%s: Unable to create remote heap\n", + dev->dev.of_node->full_name); + ret = PTR_ERR(cache_sram->rh); + goto out_unmap; + } + + rh_attach_region(cache_sram->rh, 0, cache_sram->size); + spin_lock_init(&cache_sram->lock); + + dev_info(&dev->dev, "[base:0x%llx, size:0x%x] configured and loaded\n", + (unsigned long long)cache_sram->base_phys, cache_sram->size); + + return 0; + +out_unmap: + iounmap(cache_sram->base_virt); + +out_release: + release_mem_region(cache_sram->base_phys, cache_sram->size); + +out_free: + kfree(cache_sram); + return ret; +} + +void remove_cache_sram(struct platform_device *dev) +{ + BUG_ON(!cache_sram); + + rh_detach_region(cache_sram->rh, 0, cache_sram->size); + rh_destroy(cache_sram->rh); + + iounmap(cache_sram->base_virt); + release_mem_region(cache_sram->base_phys, cache_sram->size); + + kfree(cache_sram); + cache_sram = NULL; + + dev_info(&dev->dev, "MPC85xx Cache-SRAM driver unloaded\n"); +} diff --git a/arch/powerpc/sysdev/fsl_85xx_l2ctlr.c b/arch/powerpc/sysdev/fsl_85xx_l2ctlr.c new file mode 100644 index 00000000000..cc8d6556d79 --- /dev/null +++ b/arch/powerpc/sysdev/fsl_85xx_l2ctlr.c @@ -0,0 +1,231 @@ +/* + * Copyright 2009-2010 Freescale Semiconductor, Inc. + * + * QorIQ (P1/P2) L2 controller init for Cache-SRAM instantiation + * + * Author: Vivek Mahajan <vivek.mahajan@freescale.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include <linux/kernel.h> +#include <linux/of_platform.h> +#include <asm/io.h> + +#include "fsl_85xx_cache_ctlr.h" + +static char *sram_size; +static char *sram_offset; +struct mpc85xx_l2ctlr __iomem *l2ctlr; + +static long get_cache_sram_size(void) +{ + unsigned long val; + + if (!sram_size || (strict_strtoul(sram_size, 0, &val) < 0)) + return -EINVAL; + + return val; +} + +static long get_cache_sram_offset(void) +{ + unsigned long val; + + if (!sram_offset || (strict_strtoul(sram_offset, 0, &val) < 0)) + return -EINVAL; + + return val; +} + +static int __init get_size_from_cmdline(char *str) +{ + if (!str) + return 0; + + sram_size = str; + return 1; +} + +static int __init get_offset_from_cmdline(char *str) +{ + if (!str) + return 0; + + sram_offset = str; + return 1; +} + +__setup("cache-sram-size=", get_size_from_cmdline); +__setup("cache-sram-offset=", get_offset_from_cmdline); + +static int __devinit mpc85xx_l2ctlr_of_probe(struct platform_device *dev, + const struct of_device_id *match) +{ + long rval; + unsigned int rem; + unsigned char ways; + const unsigned int *prop; + unsigned int l2cache_size; + struct sram_parameters sram_params; + + if (!dev->dev.of_node) { + dev_err(&dev->dev, "Device's OF-node is NULL\n"); + return -EINVAL; + } + + prop = of_get_property(dev->dev.of_node, "cache-size", NULL); + if (!prop) { + dev_err(&dev->dev, "Missing L2 cache-size\n"); + return -EINVAL; + } + l2cache_size = *prop; + + sram_params.sram_size = get_cache_sram_size(); + if (sram_params.sram_size <= 0) { + dev_err(&dev->dev, + "Entire L2 as cache, Aborting Cache-SRAM stuff\n"); + return -EINVAL; + } + + sram_params.sram_offset = get_cache_sram_offset(); + if (sram_params.sram_offset <= 0) { + dev_err(&dev->dev, + "Entire L2 as cache, provide a valid sram offset\n"); + return -EINVAL; + } + + + rem = l2cache_size % sram_params.sram_size; + ways = LOCK_WAYS_FULL * sram_params.sram_size / l2cache_size; + if (rem || (ways & (ways - 1))) { + dev_err(&dev->dev, "Illegal cache-sram-size in command line\n"); + return -EINVAL; + } + + l2ctlr = of_iomap(dev->dev.of_node, 0); + if (!l2ctlr) { + dev_err(&dev->dev, "Can't map L2 controller\n"); + return -EINVAL; + } + + /* + * Write bits[0-17] to srbar0 + */ + out_be32(&l2ctlr->srbar0, + sram_params.sram_offset & L2SRAM_BAR_MSK_LO18); + + /* + * Write bits[18-21] to srbare0 + */ +#ifdef CONFIG_PHYS_64BIT + out_be32(&l2ctlr->srbarea0, + (sram_params.sram_offset >> 32) & L2SRAM_BARE_MSK_HI4); +#endif + + clrsetbits_be32(&l2ctlr->ctl, L2CR_L2E, L2CR_L2FI); + + switch (ways) { + case LOCK_WAYS_EIGHTH: + setbits32(&l2ctlr->ctl, + L2CR_L2E | L2CR_L2FI | L2CR_SRAM_EIGHTH); + break; + + case LOCK_WAYS_TWO_EIGHTH: + setbits32(&l2ctlr->ctl, + L2CR_L2E | L2CR_L2FI | L2CR_SRAM_QUART); + break; + + case LOCK_WAYS_HALF: + setbits32(&l2ctlr->ctl, + L2CR_L2E | L2CR_L2FI | L2CR_SRAM_HALF); + break; + + case LOCK_WAYS_FULL: + default: + setbits32(&l2ctlr->ctl, + L2CR_L2E | L2CR_L2FI | L2CR_SRAM_FULL); + break; + } + eieio(); + + rval = instantiate_cache_sram(dev, sram_params); + if (rval < 0) { + dev_err(&dev->dev, "Can't instantiate Cache-SRAM\n"); + iounmap(l2ctlr); + return -EINVAL; + } + + return 0; +} + +static int __devexit mpc85xx_l2ctlr_of_remove(struct platform_device *dev) +{ + BUG_ON(!l2ctlr); + + iounmap(l2ctlr); + remove_cache_sram(dev); + dev_info(&dev->dev, "MPC85xx L2 controller unloaded\n"); + + return 0; +} + +static struct of_device_id mpc85xx_l2ctlr_of_match[] = { + { + .compatible = "fsl,p2020-l2-cache-controller", + }, + { + .compatible = "fsl,p2010-l2-cache-controller", + }, + { + .compatible = "fsl,p1020-l2-cache-controller", + }, + { + .compatible = "fsl,p1011-l2-cache-controller", + }, + { + .compatible = "fsl,p1013-l2-cache-controller", + }, + { + .compatible = "fsl,p1022-l2-cache-controller", + }, + {}, +}; + +static struct of_platform_driver mpc85xx_l2ctlr_of_platform_driver = { + .driver = { + .name = "fsl-l2ctlr", + .owner = THIS_MODULE, + .of_match_table = mpc85xx_l2ctlr_of_match, + }, + .probe = mpc85xx_l2ctlr_of_probe, + .remove = __devexit_p(mpc85xx_l2ctlr_of_remove), +}; + +static __init int mpc85xx_l2ctlr_of_init(void) +{ + return of_register_platform_driver(&mpc85xx_l2ctlr_of_platform_driver); +} + +static void __exit mpc85xx_l2ctlr_of_exit(void) +{ + of_unregister_platform_driver(&mpc85xx_l2ctlr_of_platform_driver); +} + +subsys_initcall(mpc85xx_l2ctlr_of_init); +module_exit(mpc85xx_l2ctlr_of_exit); + +MODULE_DESCRIPTION("Freescale MPC85xx L2 controller init"); +MODULE_LICENSE("GPL v2"); diff --git a/arch/powerpc/sysdev/fsl_msi.c b/arch/powerpc/sysdev/fsl_msi.c index 87991d3abba..108d76fa8f1 100644 --- a/arch/powerpc/sysdev/fsl_msi.c +++ b/arch/powerpc/sysdev/fsl_msi.c @@ -24,6 +24,7 @@ #include <asm/ppc-pci.h> #include <asm/mpic.h> #include "fsl_msi.h" +#include "fsl_pci.h" LIST_HEAD(msi_head); @@ -51,8 +52,8 @@ static void fsl_msi_end_irq(unsigned int virq) } static struct irq_chip fsl_msi_chip = { - .mask = mask_msi_irq, - .unmask = unmask_msi_irq, + .irq_mask = mask_msi_irq, + .irq_unmask = unmask_msi_irq, .ack = fsl_msi_end_irq, .name = "FSL-MSI", }; @@ -125,13 +126,11 @@ static void fsl_compose_msi_msg(struct pci_dev *pdev, int hwirq, { struct fsl_msi *msi_data = fsl_msi_data; struct pci_controller *hose = pci_bus_to_host(pdev->bus); - u32 base = 0; + u64 base = fsl_pci_immrbar_base(hose); - pci_bus_read_config_dword(hose->bus, - PCI_DEVFN(0, 0), PCI_BASE_ADDRESS_0, &base); + msg->address_lo = msi_data->msi_addr_lo + lower_32_bits(base); + msg->address_hi = msi_data->msi_addr_hi + upper_32_bits(base); - msg->address_lo = msi_data->msi_addr_lo + base; - msg->address_hi = msi_data->msi_addr_hi; msg->data = hwirq; pr_debug("%s: allocated srs: %d, ibs: %d\n", diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c index 4ae93322525..818f7c6c8fa 100644 --- a/arch/powerpc/sysdev/fsl_pci.c +++ b/arch/powerpc/sysdev/fsl_pci.c @@ -1,7 +1,7 @@ /* * MPC83xx/85xx/86xx PCI/PCIE support routing. * - * Copyright 2007-2009 Freescale Semiconductor, Inc. + * Copyright 2007-2010 Freescale Semiconductor, Inc. * Copyright 2008-2009 MontaVista Software, Inc. * * Initial author: Xianghua Xiao <x.xiao@freescale.com> @@ -34,7 +34,7 @@ #include <sysdev/fsl_soc.h> #include <sysdev/fsl_pci.h> -static int fsl_pcie_bus_fixup; +static int fsl_pcie_bus_fixup, is_mpc83xx_pci; static void __init quirk_fsl_pcie_header(struct pci_dev *dev) { @@ -407,10 +407,18 @@ DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P2010E, quirk_fsl_pcie_header); DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P2010, quirk_fsl_pcie_header); DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P2020E, quirk_fsl_pcie_header); DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P2020, quirk_fsl_pcie_header); +DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P2040E, quirk_fsl_pcie_header); +DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P2040, quirk_fsl_pcie_header); +DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P3041E, quirk_fsl_pcie_header); +DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P3041, quirk_fsl_pcie_header); DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P4040E, quirk_fsl_pcie_header); DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P4040, quirk_fsl_pcie_header); DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P4080E, quirk_fsl_pcie_header); DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P4080, quirk_fsl_pcie_header); +DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P5010E, quirk_fsl_pcie_header); +DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P5010, quirk_fsl_pcie_header); +DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P5020E, quirk_fsl_pcie_header); +DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P5020, quirk_fsl_pcie_header); #endif /* CONFIG_FSL_SOC_BOOKE || CONFIG_PPC_86xx */ #if defined(CONFIG_PPC_83xx) || defined(CONFIG_PPC_MPC512x) @@ -430,6 +438,13 @@ struct mpc83xx_pcie_priv { u32 dev_base; }; +struct pex_inbound_window { + u32 ar; + u32 tar; + u32 barl; + u32 barh; +}; + /* * With the convention of u-boot, the PCIE outbound window 0 serves * as configuration transactions outbound. @@ -437,6 +452,8 @@ struct mpc83xx_pcie_priv { #define PEX_OUTWIN0_BAR 0xCA4 #define PEX_OUTWIN0_TAL 0xCA8 #define PEX_OUTWIN0_TAH 0xCAC +#define PEX_RC_INWIN_BASE 0xE60 +#define PEX_RCIWARn_EN 0x1 static int mpc83xx_pcie_exclude_device(struct pci_bus *bus, unsigned int devfn) { @@ -604,6 +621,8 @@ int __init mpc83xx_add_bridge(struct device_node *dev) const int *bus_range; int primary; + is_mpc83xx_pci = 1; + if (!of_device_is_available(dev)) { pr_warning("%s: disabled by the firmware.\n", dev->full_name); @@ -683,3 +702,40 @@ err0: return ret; } #endif /* CONFIG_PPC_83xx */ + +u64 fsl_pci_immrbar_base(struct pci_controller *hose) +{ +#ifdef CONFIG_PPC_83xx + if (is_mpc83xx_pci) { + struct mpc83xx_pcie_priv *pcie = hose->dn->data; + struct pex_inbound_window *in; + int i; + + /* Walk the Root Complex Inbound windows to match IMMR base */ + in = pcie->cfg_type0 + PEX_RC_INWIN_BASE; + for (i = 0; i < 4; i++) { + /* not enabled, skip */ + if (!in_le32(&in[i].ar) & PEX_RCIWARn_EN) + continue; + + if (get_immrbase() == in_le32(&in[i].tar)) + return (u64)in_le32(&in[i].barh) << 32 | + in_le32(&in[i].barl); + } + + printk(KERN_WARNING "could not find PCI BAR matching IMMR\n"); + } +#endif + +#if defined(CONFIG_FSL_SOC_BOOKE) || defined(CONFIG_PPC_86xx) + if (!is_mpc83xx_pci) { + u32 base; + + pci_bus_read_config_dword(hose->bus, + PCI_DEVFN(0, 0), PCI_BASE_ADDRESS_0, &base); + return base; + } +#endif + + return 0; +} diff --git a/arch/powerpc/sysdev/fsl_pci.h b/arch/powerpc/sysdev/fsl_pci.h index a9d8bbebed8..8ad72a11f77 100644 --- a/arch/powerpc/sysdev/fsl_pci.h +++ b/arch/powerpc/sysdev/fsl_pci.h @@ -88,6 +88,7 @@ struct ccsr_pci { extern int fsl_add_bridge(struct device_node *dev, int is_primary); extern void fsl_pcibios_fixup_bus(struct pci_bus *bus); extern int mpc83xx_add_bridge(struct device_node *dev); +u64 fsl_pci_immrbar_base(struct pci_controller *hose); #endif /* __POWERPC_FSL_PCI_H */ #endif /* __KERNEL__ */ diff --git a/arch/powerpc/sysdev/fsl_rio.c b/arch/powerpc/sysdev/fsl_rio.c index 3017532319c..412763672d2 100644 --- a/arch/powerpc/sysdev/fsl_rio.c +++ b/arch/powerpc/sysdev/fsl_rio.c @@ -117,44 +117,59 @@ struct rio_atmu_regs { }; struct rio_msg_regs { - u32 omr; - u32 osr; + u32 omr; /* 0xD_3000 - Outbound message 0 mode register */ + u32 osr; /* 0xD_3004 - Outbound message 0 status register */ u32 pad1; - u32 odqdpar; + u32 odqdpar; /* 0xD_300C - Outbound message 0 descriptor queue + dequeue pointer address register */ u32 pad2; - u32 osar; - u32 odpr; - u32 odatr; - u32 odcr; + u32 osar; /* 0xD_3014 - Outbound message 0 source address + register */ + u32 odpr; /* 0xD_3018 - Outbound message 0 destination port + register */ + u32 odatr; /* 0xD_301C - Outbound message 0 destination attributes + Register*/ + u32 odcr; /* 0xD_3020 - Outbound message 0 double-word count + register */ u32 pad3; - u32 odqepar; + u32 odqepar; /* 0xD_3028 - Outbound message 0 descriptor queue + enqueue pointer address register */ u32 pad4[13]; - u32 imr; - u32 isr; + u32 imr; /* 0xD_3060 - Inbound message 0 mode register */ + u32 isr; /* 0xD_3064 - Inbound message 0 status register */ u32 pad5; - u32 ifqdpar; + u32 ifqdpar; /* 0xD_306C - Inbound message 0 frame queue dequeue + pointer address register*/ u32 pad6; - u32 ifqepar; + u32 ifqepar; /* 0xD_3074 - Inbound message 0 frame queue enqueue + pointer address register */ u32 pad7[226]; - u32 odmr; - u32 odsr; + u32 odmr; /* 0xD_3400 - Outbound doorbell mode register */ + u32 odsr; /* 0xD_3404 - Outbound doorbell status register */ u32 res0[4]; - u32 oddpr; - u32 oddatr; + u32 oddpr; /* 0xD_3418 - Outbound doorbell destination port + register */ + u32 oddatr; /* 0xD_341c - Outbound doorbell destination attributes + register */ u32 res1[3]; - u32 odretcr; + u32 odretcr; /* 0xD_342C - Outbound doorbell retry error threshold + configuration register */ u32 res2[12]; - u32 dmr; - u32 dsr; + u32 dmr; /* 0xD_3460 - Inbound doorbell mode register */ + u32 dsr; /* 0xD_3464 - Inbound doorbell status register */ u32 pad8; - u32 dqdpar; + u32 dqdpar; /* 0xD_346C - Inbound doorbell queue dequeue Pointer + address register */ u32 pad9; - u32 dqepar; + u32 dqepar; /* 0xD_3474 - Inbound doorbell Queue enqueue pointer + address register */ u32 pad10[26]; - u32 pwmr; - u32 pwsr; - u32 epwqbar; - u32 pwqbar; + u32 pwmr; /* 0xD_34E0 - Inbound port-write mode register */ + u32 pwsr; /* 0xD_34E4 - Inbound port-write status register */ + u32 epwqbar; /* 0xD_34E8 - Extended Port-Write Queue Base Address + register */ + u32 pwqbar; /* 0xD_34EC - Inbound port-write queue base address + register */ }; struct rio_tx_desc { diff --git a/arch/powerpc/sysdev/fsl_soc.c b/arch/powerpc/sysdev/fsl_soc.c index b91f7acdda6..19e5015e039 100644 --- a/arch/powerpc/sysdev/fsl_soc.c +++ b/arch/powerpc/sysdev/fsl_soc.c @@ -209,186 +209,29 @@ static int __init of_add_fixed_phys(void) arch_initcall(of_add_fixed_phys); #endif /* CONFIG_FIXED_PHY */ -static enum fsl_usb2_phy_modes determine_usb_phy(const char *phy_type) -{ - if (!phy_type) - return FSL_USB2_PHY_NONE; - if (!strcasecmp(phy_type, "ulpi")) - return FSL_USB2_PHY_ULPI; - if (!strcasecmp(phy_type, "utmi")) - return FSL_USB2_PHY_UTMI; - if (!strcasecmp(phy_type, "utmi_wide")) - return FSL_USB2_PHY_UTMI_WIDE; - if (!strcasecmp(phy_type, "serial")) - return FSL_USB2_PHY_SERIAL; - - return FSL_USB2_PHY_NONE; -} +#if defined(CONFIG_FSL_SOC_BOOKE) || defined(CONFIG_PPC_86xx) +static __be32 __iomem *rstcr; -static int __init fsl_usb_of_init(void) +static int __init setup_rstcr(void) { struct device_node *np; - unsigned int i = 0; - struct platform_device *usb_dev_mph = NULL, *usb_dev_dr_host = NULL, - *usb_dev_dr_client = NULL; - int ret; - - for_each_compatible_node(np, NULL, "fsl-usb2-mph") { - struct resource r[2]; - struct fsl_usb2_platform_data usb_data; - const unsigned char *prop = NULL; - - memset(&r, 0, sizeof(r)); - memset(&usb_data, 0, sizeof(usb_data)); - - ret = of_address_to_resource(np, 0, &r[0]); - if (ret) - goto err; - - of_irq_to_resource(np, 0, &r[1]); - - usb_dev_mph = - platform_device_register_simple("fsl-ehci", i, r, 2); - if (IS_ERR(usb_dev_mph)) { - ret = PTR_ERR(usb_dev_mph); - goto err; - } - - usb_dev_mph->dev.coherent_dma_mask = 0xffffffffUL; - usb_dev_mph->dev.dma_mask = &usb_dev_mph->dev.coherent_dma_mask; - - usb_data.operating_mode = FSL_USB2_MPH_HOST; - - prop = of_get_property(np, "port0", NULL); - if (prop) - usb_data.port_enables |= FSL_USB2_PORT0_ENABLED; - - prop = of_get_property(np, "port1", NULL); - if (prop) - usb_data.port_enables |= FSL_USB2_PORT1_ENABLED; - - prop = of_get_property(np, "phy_type", NULL); - usb_data.phy_mode = determine_usb_phy(prop); - - ret = - platform_device_add_data(usb_dev_mph, &usb_data, - sizeof(struct - fsl_usb2_platform_data)); - if (ret) - goto unreg_mph; - i++; - } - - for_each_compatible_node(np, NULL, "fsl-usb2-dr") { - struct resource r[2]; - struct fsl_usb2_platform_data usb_data; - const unsigned char *prop = NULL; - if (!of_device_is_available(np)) - continue; - - memset(&r, 0, sizeof(r)); - memset(&usb_data, 0, sizeof(usb_data)); - - ret = of_address_to_resource(np, 0, &r[0]); - if (ret) - goto unreg_mph; - - of_irq_to_resource(np, 0, &r[1]); - - prop = of_get_property(np, "dr_mode", NULL); - - if (!prop || !strcmp(prop, "host")) { - usb_data.operating_mode = FSL_USB2_DR_HOST; - usb_dev_dr_host = platform_device_register_simple( - "fsl-ehci", i, r, 2); - if (IS_ERR(usb_dev_dr_host)) { - ret = PTR_ERR(usb_dev_dr_host); - goto err; - } - } else if (prop && !strcmp(prop, "peripheral")) { - usb_data.operating_mode = FSL_USB2_DR_DEVICE; - usb_dev_dr_client = platform_device_register_simple( - "fsl-usb2-udc", i, r, 2); - if (IS_ERR(usb_dev_dr_client)) { - ret = PTR_ERR(usb_dev_dr_client); - goto err; - } - } else if (prop && !strcmp(prop, "otg")) { - usb_data.operating_mode = FSL_USB2_DR_OTG; - usb_dev_dr_host = platform_device_register_simple( - "fsl-ehci", i, r, 2); - if (IS_ERR(usb_dev_dr_host)) { - ret = PTR_ERR(usb_dev_dr_host); - goto err; - } - usb_dev_dr_client = platform_device_register_simple( - "fsl-usb2-udc", i, r, 2); - if (IS_ERR(usb_dev_dr_client)) { - ret = PTR_ERR(usb_dev_dr_client); - goto err; - } - } else { - ret = -EINVAL; - goto err; + for_each_node_by_name(np, "global-utilities") { + if ((of_get_property(np, "fsl,has-rstcr", NULL))) { + rstcr = of_iomap(np, 0) + 0xb0; + if (!rstcr) + printk (KERN_ERR "Error: reset control " + "register not mapped!\n"); + break; } - - prop = of_get_property(np, "phy_type", NULL); - usb_data.phy_mode = determine_usb_phy(prop); - - if (usb_dev_dr_host) { - usb_dev_dr_host->dev.coherent_dma_mask = 0xffffffffUL; - usb_dev_dr_host->dev.dma_mask = &usb_dev_dr_host-> - dev.coherent_dma_mask; - if ((ret = platform_device_add_data(usb_dev_dr_host, - &usb_data, sizeof(struct - fsl_usb2_platform_data)))) - goto unreg_dr; - } - if (usb_dev_dr_client) { - usb_dev_dr_client->dev.coherent_dma_mask = 0xffffffffUL; - usb_dev_dr_client->dev.dma_mask = &usb_dev_dr_client-> - dev.coherent_dma_mask; - if ((ret = platform_device_add_data(usb_dev_dr_client, - &usb_data, sizeof(struct - fsl_usb2_platform_data)))) - goto unreg_dr; - } - i++; } - return 0; - -unreg_dr: - if (usb_dev_dr_host) - platform_device_unregister(usb_dev_dr_host); - if (usb_dev_dr_client) - platform_device_unregister(usb_dev_dr_client); -unreg_mph: - if (usb_dev_mph) - platform_device_unregister(usb_dev_mph); -err: - return ret; -} - -arch_initcall(fsl_usb_of_init); - -#if defined(CONFIG_FSL_SOC_BOOKE) || defined(CONFIG_PPC_86xx) -static __be32 __iomem *rstcr; -static int __init setup_rstcr(void) -{ - struct device_node *np; - np = of_find_node_by_name(NULL, "global-utilities"); - if ((np && of_get_property(np, "fsl,has-rstcr", NULL))) { - rstcr = of_iomap(np, 0) + 0xb0; - if (!rstcr) - printk (KERN_EMERG "Error: reset control register " - "not mapped!\n"); - } else if (ppc_md.restart == fsl_rstcr_restart) + if (!rstcr && ppc_md.restart == fsl_rstcr_restart) printk(KERN_ERR "No RSTCR register, warm reboot won't work\n"); if (np) of_node_put(np); + return 0; } diff --git a/arch/powerpc/sysdev/mpc8xxx_gpio.c b/arch/powerpc/sysdev/mpc8xxx_gpio.c index 2b69084d0f0..c0ea05e87f1 100644 --- a/arch/powerpc/sysdev/mpc8xxx_gpio.c +++ b/arch/powerpc/sysdev/mpc8xxx_gpio.c @@ -330,6 +330,9 @@ static int __init mpc8xxx_add_gpiochips(void) for_each_compatible_node(np, NULL, "fsl,mpc8610-gpio") mpc8xxx_add_controller(np); + for_each_compatible_node(np, NULL, "fsl,qoriq-gpio") + mpc8xxx_add_controller(np); + return 0; } arch_initcall(mpc8xxx_add_gpiochips); diff --git a/arch/powerpc/sysdev/mpic_pasemi_msi.c b/arch/powerpc/sysdev/mpic_pasemi_msi.c index 3b6a9a43718..320ad5a9a25 100644 --- a/arch/powerpc/sysdev/mpic_pasemi_msi.c +++ b/arch/powerpc/sysdev/mpic_pasemi_msi.c @@ -39,24 +39,24 @@ static struct mpic *msi_mpic; -static void mpic_pasemi_msi_mask_irq(unsigned int irq) +static void mpic_pasemi_msi_mask_irq(struct irq_data *data) { - pr_debug("mpic_pasemi_msi_mask_irq %d\n", irq); - mask_msi_irq(irq); - mpic_mask_irq(irq); + pr_debug("mpic_pasemi_msi_mask_irq %d\n", data->irq); + mask_msi_irq(data); + mpic_mask_irq(data->irq); } -static void mpic_pasemi_msi_unmask_irq(unsigned int irq) +static void mpic_pasemi_msi_unmask_irq(struct irq_data *data) { - pr_debug("mpic_pasemi_msi_unmask_irq %d\n", irq); - mpic_unmask_irq(irq); - unmask_msi_irq(irq); + pr_debug("mpic_pasemi_msi_unmask_irq %d\n", data->irq); + mpic_unmask_irq(data->irq); + unmask_msi_irq(data); } static struct irq_chip mpic_pasemi_msi_chip = { - .shutdown = mpic_pasemi_msi_mask_irq, - .mask = mpic_pasemi_msi_mask_irq, - .unmask = mpic_pasemi_msi_unmask_irq, + .irq_shutdown = mpic_pasemi_msi_mask_irq, + .irq_mask = mpic_pasemi_msi_mask_irq, + .irq_unmask = mpic_pasemi_msi_unmask_irq, .eoi = mpic_end_irq, .set_type = mpic_set_irq_type, .set_affinity = mpic_set_affinity, diff --git a/arch/powerpc/sysdev/mpic_u3msi.c b/arch/powerpc/sysdev/mpic_u3msi.c index bcbfe79c704..a2b028b4a20 100644 --- a/arch/powerpc/sysdev/mpic_u3msi.c +++ b/arch/powerpc/sysdev/mpic_u3msi.c @@ -23,22 +23,22 @@ /* A bit ugly, can we get this from the pci_dev somehow? */ static struct mpic *msi_mpic; -static void mpic_u3msi_mask_irq(unsigned int irq) +static void mpic_u3msi_mask_irq(struct irq_data *data) { - mask_msi_irq(irq); - mpic_mask_irq(irq); + mask_msi_irq(data); + mpic_mask_irq(data->irq); } -static void mpic_u3msi_unmask_irq(unsigned int irq) +static void mpic_u3msi_unmask_irq(struct irq_data *data) { - mpic_unmask_irq(irq); - unmask_msi_irq(irq); + mpic_unmask_irq(data->irq); + unmask_msi_irq(data); } static struct irq_chip mpic_u3msi_chip = { - .shutdown = mpic_u3msi_mask_irq, - .mask = mpic_u3msi_mask_irq, - .unmask = mpic_u3msi_unmask_irq, + .irq_shutdown = mpic_u3msi_mask_irq, + .irq_mask = mpic_u3msi_mask_irq, + .irq_unmask = mpic_u3msi_unmask_irq, .eoi = mpic_end_irq, .set_type = mpic_set_irq_type, .set_affinity = mpic_set_affinity, diff --git a/arch/powerpc/sysdev/pmi.c b/arch/powerpc/sysdev/pmi.c index 24a0bb955b1..4260f368db5 100644 --- a/arch/powerpc/sysdev/pmi.c +++ b/arch/powerpc/sysdev/pmi.c @@ -114,7 +114,7 @@ static void pmi_notify_handlers(struct work_struct *work) spin_lock(&data->handler_spinlock); list_for_each_entry(handler, &data->handler, node) { - pr_debug(KERN_INFO "pmi: notifying handler %p\n", handler); + pr_debug("pmi: notifying handler %p\n", handler); if (handler->type == data->msg.type) handler->handle_pmi_message(data->msg); } diff --git a/arch/powerpc/xmon/Makefile b/arch/powerpc/xmon/Makefile index faa81b6a661..c168c54e3c4 100644 --- a/arch/powerpc/xmon/Makefile +++ b/arch/powerpc/xmon/Makefile @@ -4,9 +4,7 @@ subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror GCOV_PROFILE := n -ifdef CONFIG_PPC64 -EXTRA_CFLAGS += -mno-minimal-toc -endif +ccflags-$(CONFIG_PPC64) := -mno-minimal-toc obj-y += xmon.o start.o nonstdio.o |