332 files changed, 16203 insertions, 19134 deletions
diff --git a/Documentation/devicetree/bindings/input/cap1106.txt b/Documentation/devicetree/bindings/input/cap11xx.txt
index 4b463904cba..7d0a3009771 100644
--- a/Documentation/devicetree/bindings/input/cap1106.txt
+++ b/Documentation/devicetree/bindings/input/cap11xx.txt
@@ -1,14 +1,16 @@
-Device tree bindings for Microchip CAP1106, 6 channel capacitive touch sensor
+Device tree bindings for Microchip CAP11xx based capacitive touch sensors
 
-The node for this driver must be a child of a I2C controller node, as the
+The node for this device must be a child of a I2C controller node, as the
 device communication via I2C only.
 
 Required properties:
 
-	compatible:		Must be "microchip,cap1106"
+	compatible:		Must contain one of:
+					"microchip,cap1106"
+					"microchip,cap1126"
+					"microchip,cap1188"
 
 	reg:			The I2C slave address of the device.
-				Only 0x28 is valid.
 
 	interrupts:		Property describing the interrupt line the
 				device's ALERT#/CM_IRQ# pin is connected to.
@@ -26,6 +28,10 @@ Optional properties:
 				Valid values are 1, 2, 4, and 8.
 				By default, a gain of 1 is set.
 
+	microchip,irq-active-high:	By default the interrupt pin is active low
+				open drain. This property allows using the active
+				high push-pull output.
+
 	linux,keycodes:		Specifies an array of numeric keycode values to
 				be used for the channels. If this property is
 				omitted, KEY_A, KEY_B, etc are used as
@@ -43,11 +49,11 @@ i2c_controller {
 		autorepeat;
 		microchip,sensor-gain = <2>;
 
-		linux,keycodes = <103		/* KEY_UP */
-				  106		/* KEY_RIGHT */
-				  108		/* KEY_DOWN */
-				  105		/* KEY_LEFT */
-				  109		/* KEY_PAGEDOWN */
-				  104>;		/* KEY_PAGEUP */
+		linux,keycodes = <103>,		/* KEY_UP */
+				 <106>,		/* KEY_RIGHT */
+				 <108>,		/* KEY_DOWN */
+				 <105>,		/* KEY_LEFT */
+				 <109>,		/* KEY_PAGEDOWN */
+				 <104>;		/* KEY_PAGEUP */
 	};
 }
diff --git a/Documentation/devicetree/bindings/input/elan_i2c.txt b/Documentation/devicetree/bindings/input/elan_i2c.txt
new file mode 100644
index 00000000000..ee3242c4ba6
--- /dev/null
+++ b/Documentation/devicetree/bindings/input/elan_i2c.txt
@@ -0,0 +1,34 @@
+Elantech I2C Touchpad
+
+Required properties:
+- compatible: must be "elan,ekth3000".
+- reg: I2C address of the chip.
+- interrupt-parent: a phandle for the interrupt controller (see interrupt
+  binding[0]).
+- interrupts: interrupt to which the chip is connected (see interrupt
+  binding[0]).
+
+Optional properties:
+- wakeup-source: touchpad can be used as a wakeup source.
+- pinctrl-names: should be "default" (see pinctrl binding [1]).
+- pinctrl-0: a phandle pointing to the pin settings for the device (see
+  pinctrl binding [1]).
+- vcc-supply: a phandle for the regulator supplying 3.3V power.
+
+[0]: Documentation/devicetree/bindings/interrupt-controller/interrupts.txt
+[1]: Documentation/devicetree/bindings/pinctrl/pinctrl-bindings.txt
+
+Example:
+	&i2c1 {
+		/* ... */
+
+		touchpad@15 {
+			compatible = "elan,ekth3000";
+			reg = <0x15>;
+			interrupt-parent = <&gpio4>;
+			interrupts = <0x0 IRQ_TYPE_EDGE_FALLING>;
+			wakeup-source;
+		};
+
+		/* ... */
+	};
diff --git a/Documentation/devicetree/bindings/input/elants_i2c.txt b/Documentation/devicetree/bindings/input/elants_i2c.txt
new file mode 100644
index 00000000000..a765232e644
--- /dev/null
+++ b/Documentation/devicetree/bindings/input/elants_i2c.txt
@@ -0,0 +1,33 @@
+Elantech I2C Touchscreen
+
+Required properties:
+- compatible: must be "elan,ekth3500".
+- reg: I2C address of the chip.
+- interrupt-parent: a phandle for the interrupt controller (see interrupt
+  binding[0]).
+- interrupts: interrupt to which the chip is connected (see interrupt
+  binding[0]).
+
+Optional properties:
+- wakeup-source: touchscreen can be used as a wakeup source.
+- pinctrl-names: should be "default" (see pinctrl binding [1]).
+- pinctrl-0: a phandle pointing to the pin settings for the device (see
+  pinctrl binding [1]).
+
+[0]: Documentation/devicetree/bindings/interrupt-controller/interrupts.txt
+[1]: Documentation/devicetree/bindings/pinctrl/pinctrl-bindings.txt
+
+Example:
+	&i2c1 {
+		/* ... */
+
+		touchscreen@10 {
+			compatible = "elan,ekth3500";
+			reg = <0x10>;
+			interrupt-parent = <&gpio4>;
+			interrupts = <0x0 IRQ_TYPE_EDGE_FALLING>;
+			wakeup-source;
+		};
+
+		/* ... */
+	};
diff --git a/Documentation/devicetree/bindings/input/gpio-keys.txt b/Documentation/devicetree/bindings/input/gpio-keys.txt
index 5c2c02140a6..a4a38fcf2ed 100644
--- a/Documentation/devicetree/bindings/input/gpio-keys.txt
+++ b/Documentation/devicetree/bindings/input/gpio-keys.txt
@@ -10,10 +10,13 @@ Optional properties:
 Each button (key) is represented as a sub-node of "gpio-keys":
 Subnode properties:
 
-	- gpios: OF device-tree gpio specification.
 	- label: Descriptive name of the key.
 	- linux,code: Keycode to emit.
 
+Required mutual exclusive subnode-properties:
+	- gpios: OF device-tree gpio specification.
+	- interrupts: the interrupt line for that input
+
 Optional subnode-properties:
 	- linux,input-type: Specify event type this button/key generates.
 	  If not specified defaults to <1> == EV_KEY.
@@ -33,4 +36,9 @@ Example nodes:
 				linux,code = <103>;
 				gpios = <&gpio1 0 1>;
 			};
+			button@22 {
+				label = "GPIO Key DOWN";
+				linux,code = <108>;
+				interrupts = <1 IRQ_TYPE_LEVEL_HIGH 7>;
+			};
 			...
diff --git a/Documentation/devicetree/bindings/mtd/atmel-nand.txt b/Documentation/devicetree/bindings/mtd/atmel-nand.txt
index 6edc3b616e9..1fe6dde9849 100644
--- a/Documentation/devicetree/bindings/mtd/atmel-nand.txt
+++ b/Documentation/devicetree/bindings/mtd/atmel-nand.txt
@@ -5,7 +5,9 @@ Required properties:
 - reg : should specify localbus address and size used for the chip,
 	and hardware ECC controller if available.
 	If the hardware ECC is PMECC, it should contain address and size for
-	PMECC, PMECC Error Location controller and ROM which has lookup tables.
+	PMECC and PMECC Error Location controller.
+	The PMECC lookup table address and size in ROM is optional. If not
+	specified, driver will build it in runtime.
 - atmel,nand-addr-offset : offset for the address latch.
 - atmel,nand-cmd-offset : offset for the command latch.
 - #address-cells, #size-cells : Must be present if the device has sub-nodes
@@ -27,7 +29,7 @@ Optional properties:
   are: 512, 1024.
 - atmel,pmecc-lookup-table-offset : includes two offsets of lookup table in ROM
   for different sector size. First one is for sector size 512, the next is for
-  sector size 1024.
+  sector size 1024. If not specified, driver will build the table in runtime.
 - nand-bus-width : 8 or 16 bus width if not present 8
 - nand-on-flash-bbt: boolean to enable on flash bbt option if not present false
 - Nand Flash Controller(NFC) is a slave driver under Atmel nand flash
diff --git a/Documentation/devicetree/bindings/mtd/diskonchip.txt b/Documentation/devicetree/bindings/mtd/diskonchip.txt
new file mode 100644
index 00000000000..3e13bfdbea5
--- /dev/null
+++ b/Documentation/devicetree/bindings/mtd/diskonchip.txt
@@ -0,0 +1,15 @@
+M-Systems and Sandisk DiskOnChip devices
+
+M-System DiskOnChip G3
+======================
+The Sandisk (formerly M-Systems) docg3 is a nand device of 64M to 256MB.
+
+Required properties:
+ - compatible: should be "m-systems,diskonchip-g3"
+ - reg: register base and size
+
+Example:
+	docg3: flash@0 {
+		compatible = "m-systems,diskonchip-g3";
+		reg = <0x0 0x2000>;
+	};
diff --git a/Documentation/devicetree/bindings/mtd/gpio-control-nand.txt b/Documentation/devicetree/bindings/mtd/gpio-control-nand.txt
index 36ef07d3c90..af8915b41cc 100644
--- a/Documentation/devicetree/bindings/mtd/gpio-control-nand.txt
+++ b/Documentation/devicetree/bindings/mtd/gpio-control-nand.txt
@@ -11,8 +11,8 @@ Required properties:
   are made in native endianness.
 - #address-cells, #size-cells : Must be present if the device has sub-nodes
   representing partitions.
-- gpios : specifies the gpio pins to control the NAND device.  nwp is an
-  optional gpio and may be set to 0 if not present.
+- gpios : Specifies the GPIO pins to control the NAND device.  The order of
+  GPIO references is:  RDY, nCE, ALE, CLE, and an optional nWP.
 
 Optional properties:
 - bank-width : Width (in bytes) of the device.  If not present, the width
@@ -35,11 +35,11 @@ gpio-nand@1,0 {
 	reg = <1 0x0000 0x2>;
 	#address-cells = <1>;
 	#size-cells = <1>;
-	gpios = <&banka 1 0	/* rdy */
-		 &banka 2 0 	/* nce */
-		 &banka 3 0 	/* ale */
-		 &banka 4 0 	/* cle */
-		 0		/* nwp */>;
+	gpios = <&banka 1 0>,	/* RDY */
+		<&banka 2 0>, 	/* nCE */
+		<&banka 3 0>, 	/* ALE */
+		<&banka 4 0>, 	/* CLE */
+		<0>;		/* nWP */
 
 	partition@0 {
 	...
diff --git a/Documentation/devicetree/bindings/mtd/sunxi-nand.txt b/Documentation/devicetree/bindings/mtd/sunxi-nand.txt
new file mode 100644
index 00000000000..0273adb8638
--- /dev/null
+++ b/Documentation/devicetree/bindings/mtd/sunxi-nand.txt
@@ -0,0 +1,45 @@
+Allwinner NAND Flash Controller (NFC)
+
+Required properties:
+- compatible : "allwinner,sun4i-a10-nand".
+- reg : shall contain registers location and length for data and reg.
+- interrupts : shall define the nand controller interrupt.
+- #address-cells: shall be set to 1. Encode the nand CS.
+- #size-cells : shall be set to 0.
+- clocks : shall reference nand controller clocks.
+- clock-names : nand controller internal clock names. Shall contain :
+    * "ahb" : AHB gating clock
+    * "mod" : nand controller clock
+
+Optional children nodes:
+Children nodes represent the available nand chips.
+
+Optional properties:
+- allwinner,rb : shall contain the native Ready/Busy ids.
+ or
+- rb-gpios : shall contain the gpios used as R/B pins.
+- nand-ecc-mode : one of the supported ECC modes ("hw", "hw_syndrome", "soft",
+  "soft_bch" or "none")
+
+see Documentation/devicetree/mtd/nand.txt for generic bindings.
+
+
+Examples:
+nfc: nand@01c03000 {
+	compatible = "allwinner,sun4i-a10-nand";
+	reg = <0x01c03000 0x1000>;
+	interrupts = <0 37 1>;
+	clocks = <&ahb_gates 13>, <&nand_clk>;
+	clock-names = "ahb", "mod";
+	#address-cells = <1>;
+	#size-cells = <0>;
+	pinctrl-names = "default";
+	pinctrl-0 = <&nand_pins_a &nand_cs0_pins_a &nand_rb0_pins_a>;
+	status = "okay";
+
+	nand@0 {
+		reg = <0>;
+		allwinner,rb = <0>;
+		nand-ecc-mode = "soft_bch";
+	};
+};
diff --git a/Documentation/devicetree/bindings/pwm/atmel-hlcdc-pwm.txt b/Documentation/devicetree/bindings/pwm/atmel-hlcdc-pwm.txt
new file mode 100644
index 00000000000..cfda0d57d30
--- /dev/null
+++ b/Documentation/devicetree/bindings/pwm/atmel-hlcdc-pwm.txt
@@ -0,0 +1,29 @@
+Device-Tree bindings for Atmel's HLCDC (High-end LCD Controller) PWM driver
+
+The Atmel HLCDC PWM is subdevice of the HLCDC MFD device.
+See ../mfd/atmel-hlcdc.txt for more details.
+
+Required properties:
+ - compatible: value should be one of the following:
+   "atmel,hlcdc-pwm"
+ - pinctr-names: the pin control state names. Should contain "default".
+ - pinctrl-0: should contain the pinctrl states described by pinctrl
+   default.
+ - #pwm-cells: should be set to 3. This PWM chip use the default 3 cells
+   bindings defined in pwm.txt in this directory.
+
+Example:
+
+	hlcdc: hlcdc@f0030000 {
+		compatible = "atmel,sama5d3-hlcdc";
+		reg = <0xf0030000 0x2000>;
+		clocks = <&lcdc_clk>, <&lcdck>, <&clk32k>;
+		clock-names = "periph_clk","sys_clk", "slow_clk";
+
+		hlcdc_pwm: hlcdc-pwm {
+			compatible = "atmel,hlcdc-pwm";
+			pinctrl-names = "default";
+			pinctrl-0 = <&pinctrl_lcd_pwm>;
+			#pwm-cells = <3>;
+		};
+	};
diff --git a/Documentation/devicetree/bindings/pwm/pwm-bcm2835.txt b/Documentation/devicetree/bindings/pwm/pwm-bcm2835.txt
new file mode 100644
index 00000000000..fb6fb31bc4c
--- /dev/null
+++ b/Documentation/devicetree/bindings/pwm/pwm-bcm2835.txt
@@ -0,0 +1,30 @@
+BCM2835 PWM controller (Raspberry Pi controller)
+
+Required properties:
+- compatible: should be "brcm,bcm2835-pwm"
+- reg: physical base address and length of the controller's registers
+- clock: This clock defines the base clock frequency of the PWM hardware
+  system, the period and the duty_cycle of the PWM signal is a multiple of
+  the base period.
+- #pwm-cells: Should be 2. See pwm.txt in this directory for a description of
+  the cells format.
+
+Examples:
+
+pwm@2020c000 {
+	compatible = "brcm,bcm2835-pwm";
+	reg = <0x2020c000 0x28>;
+	clocks = <&clk_pwm>;
+	#pwm-cells = <2>;
+};
+
+clocks {
+	....
+		clk_pwm: pwm {
+			compatible = "fixed-clock";
+			reg = <3>;
+			#clock-cells = <0>;
+			clock-frequency = <9200000>;
+		};
+	....
+};
diff --git a/Documentation/devicetree/bindings/thermal/armada-thermal.txt b/Documentation/devicetree/bindings/thermal/armada-thermal.txt
index 4cf024929a3..4698e0edc20 100644
--- a/Documentation/devicetree/bindings/thermal/armada-thermal.txt
+++ b/Documentation/devicetree/bindings/thermal/armada-thermal.txt
@@ -5,17 +5,9 @@ Required properties:
 - compatible:	Should be set to one of the following:
 		marvell,armada370-thermal
 		marvell,armada375-thermal
-		marvell,armada375-z1-thermal
 		marvell,armada380-thermal
 		marvell,armadaxp-thermal
 
-		Note: As the name suggests, "marvell,armada375-z1-thermal"
-		applies for the SoC Z1 stepping only. On such stepping
-		some quirks need to be done and the register offset differs
-		from the one in the A0 stepping.
-		The operating system may auto-detect the SoC stepping and
-		update the compatible and register offsets at runtime.
-
 - reg:		Device's register space.
 		Two entries are expected, see the examples below.
 		The first one is required for the sensor register;
diff --git a/Documentation/devicetree/bindings/thermal/rockchip-thermal.txt b/Documentation/devicetree/bindings/thermal/rockchip-thermal.txt
new file mode 100644
index 00000000000..ef802de4957
--- /dev/null
+++ b/Documentation/devicetree/bindings/thermal/rockchip-thermal.txt
@@ -0,0 +1,68 @@
+* Temperature Sensor ADC (TSADC) on rockchip SoCs
+
+Required properties:
+- compatible : "rockchip,rk3288-tsadc"
+- reg : physical base address of the controller and length of memory mapped
+	region.
+- interrupts : The interrupt number to the cpu. The interrupt specifier format
+	       depends on the interrupt controller.
+- clocks : Must contain an entry for each entry in clock-names.
+- clock-names : Shall be "tsadc" for the converter-clock, and "apb_pclk" for
+		the peripheral clock.
+- resets : Must contain an entry for each entry in reset-names.
+	   See ../reset/reset.txt for details.
+- reset-names : Must include the name "tsadc-apb".
+- #thermal-sensor-cells : Should be 1. See ./thermal.txt for a description.
+- rockchip,hw-tshut-temp : The hardware-controlled shutdown temperature value.
+- rockchip,hw-tshut-mode : The hardware-controlled shutdown mode 0:CRU 1:GPIO.
+- rockchip,hw-tshut-polarity : The hardware-controlled active polarity 0:LOW
+			       1:HIGH.
+
+Exiample:
+tsadc: tsadc@ff280000 {
+	compatible = "rockchip,rk3288-tsadc";
+	reg = <0xff280000 0x100>;
+	interrupts = <GIC_SPI 37 IRQ_TYPE_LEVEL_HIGH>;
+	clocks = <&cru SCLK_TSADC>, <&cru PCLK_TSADC>;
+	clock-names = "tsadc", "apb_pclk";
+	resets = <&cru SRST_TSADC>;
+	reset-names = "tsadc-apb";
+	pinctrl-names = "default";
+	pinctrl-0 = <&otp_out>;
+	#thermal-sensor-cells = <1>;
+	rockchip,hw-tshut-temp = <95000>;
+	rockchip,hw-tshut-mode = <0>;
+	rockchip,hw-tshut-polarity = <0>;
+};
+
+Example: referring to thermal sensors:
+thermal-zones {
+	cpu_thermal: cpu_thermal {
+		polling-delay-passive = <1000>; /* milliseconds */
+		polling-delay = <5000>; /* milliseconds */
+
+		/* sensor	ID */
+		thermal-sensors = <&tsadc	1>;
+
+		trips {
+			cpu_alert0: cpu_alert {
+				temperature = <70000>; /* millicelsius */
+				hysteresis = <2000>; /* millicelsius */
+				type = "passive";
+			};
+			cpu_crit: cpu_crit {
+				temperature = <90000>; /* millicelsius */
+				hysteresis = <2000>; /* millicelsius */
+				type = "critical";
+			};
+		};
+
+		cooling-maps {
+			map0 {
+				trip = <&cpu_alert0>;
+				cooling-device =
+				    <&cpu0 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
+			};
+		};
+	};
+};
diff --git a/Documentation/devicetree/bindings/thermal/tegra-soctherm.txt b/Documentation/devicetree/bindings/thermal/tegra-soctherm.txt
new file mode 100644
index 00000000000..ecf3ed76cd4
--- /dev/null
+++ b/Documentation/devicetree/bindings/thermal/tegra-soctherm.txt
@@ -0,0 +1,53 @@
+Tegra124 SOCTHERM thermal management system
+
+The SOCTHERM IP block contains thermal sensors, support for polled
+or interrupt-based thermal monitoring, CPU and GPU throttling based
+on temperature trip points, and handling external overcurrent
+notifications. It is also used to manage emergency shutdown in an
+overheating situation.
+
+Required properties :
+- compatible : "nvidia,tegra124-soctherm".
+- reg : Should contain 1 entry:
+  - SOCTHERM register set
+- interrupts : Defines the interrupt used by SOCTHERM
+- clocks : Must contain an entry for each entry in clock-names.
+  See ../clocks/clock-bindings.txt for details.
+- clock-names : Must include the following entries:
+  - tsensor
+  - soctherm
+- resets : Must contain an entry for each entry in reset-names.
+  See ../reset/reset.txt for details.
+- reset-names : Must include the following entries:
+  - soctherm
+- #thermal-sensor-cells : Should be 1. See ./thermal.txt for a description
+    of this property. See <dt-bindings/thermal/tegra124-soctherm.h> for a
+    list of valid values when referring to thermal sensors.
+
+
+Example :
+
+	soctherm@0,700e2000 {
+		compatible = "nvidia,tegra124-soctherm";
+		reg = <0x0 0x700e2000 0x0 0x1000>;
+		interrupts = <GIC_SPI 48 IRQ_TYPE_LEVEL_HIGH>;
+		clocks = <&tegra_car TEGRA124_CLK_TSENSOR>,
+			<&tegra_car TEGRA124_CLK_SOC_THERM>;
+		clock-names = "tsensor", "soctherm";
+		resets = <&tegra_car 78>;
+		reset-names = "soctherm";
+
+		#thermal-sensor-cells = <1>;
+	};
+
+Example: referring to thermal sensors :
+
+       thermal-zones {
+                cpu {
+                        polling-delay-passive = <1000>;
+                        polling-delay = <1000>;
+
+                        thermal-sensors =
+                                <&soctherm TEGRA124_SOCTHERM_SENSOR_CPU>;
+                };
+	};
diff --git a/Documentation/devicetree/bindings/vendor-prefixes.txt b/Documentation/devicetree/bindings/vendor-prefixes.txt
index 423d47418e7..b1df0ad1306 100644
--- a/Documentation/devicetree/bindings/vendor-prefixes.txt
+++ b/Documentation/devicetree/bindings/vendor-prefixes.txt
@@ -47,6 +47,7 @@ dlink	D-Link Corporation
 dmo	Data Modul AG
 ebv	EBV Elektronik
 edt	Emerging Display Technologies
+elan	Elan Microelectronic Corp.
 emmicro	EM Microelectronic
 energymicro	Silicon Laboratories (formerly Energy Micro AS)
 epcos	EPCOS AG
diff --git a/Documentation/ia64/kvm.txt b/Documentation/ia64/kvm.txt
deleted file mode 100644
index ffb5c80bec3..00000000000
--- a/Documentation/ia64/kvm.txt
+++ /dev/null
@@ -1,83 +0,0 @@
-Currently, kvm module is in EXPERIMENTAL stage on IA64. This means that
-interfaces are not stable enough to use. So, please don't run critical
-applications in virtual machine.
-We will try our best to improve it in future versions!
-
-				Guide: How to boot up guests on kvm/ia64
-
-This guide is to describe how to enable kvm support for IA-64 systems.
-
-1. Get the kvm source from git.kernel.org.
-	Userspace source:
-		git clone git://git.kernel.org/pub/scm/virt/kvm/kvm-userspace.git
-	Kernel Source:
-		git clone git://git.kernel.org/pub/scm/linux/kernel/git/xiantao/kvm-ia64.git
-
-2. Compile the source code.
-	2.1 Compile userspace code:
-		(1)cd ./kvm-userspace
-		(2)./configure
-		(3)cd kernel
-		(4)make sync LINUX= $kernel_dir (kernel_dir is the directory of kernel source.)
-		(5)cd ..
-		(6)make qemu
-		(7)cd qemu; make install
-
-	2.2 Compile kernel source code:
-		(1) cd ./$kernel_dir
-		(2) Make menuconfig
-		(3) Enter into virtualization option, and choose kvm.
-		(4) make
-		(5) Once (4) done, make modules_install
-		(6) Make initrd, and use new kernel to reboot up host machine.
-		(7) Once (6) done, cd $kernel_dir/arch/ia64/kvm
-		(8) insmod kvm.ko; insmod kvm-intel.ko
-
-Note: For step 2, please make sure that host page size == TARGET_PAGE_SIZE of qemu, otherwise, may fail.
-
-3. Get Guest Firmware named as Flash.fd, and put it under right place:
-	(1) If you have the guest firmware (binary) released by Intel Corp for Xen, use it directly.
-
-	(2) If you have no firmware at hand, Please download its source from
-		hg clone http://xenbits.xensource.com/ext/efi-vfirmware.hg
-	    you can get the firmware's binary in the directory of efi-vfirmware.hg/binaries.
-
-	(3) Rename the firmware you owned to Flash.fd, and copy it to /usr/local/share/qemu
-
-4. Boot up Linux or Windows guests:
-	4.1 Create or install a image for guest boot. If you have xen experience, it should be easy.
-
-	4.2 Boot up guests use the following command.
-		/usr/local/bin/qemu-system-ia64 -smp xx -m 512 -hda $your_image
-		(xx is the number of virtual processors for the guest, now the maximum value is 4)
-
-5. Known possible issue on some platforms with old Firmware.
-
-In the event of strange host crash issues, try to solve it through either of the following ways:
-
-(1): Upgrade your Firmware to the latest one.
-
-(2): Applying the below patch to kernel source.
-diff --git a/arch/ia64/kernel/pal.S b/arch/ia64/kernel/pal.S
-index 0b53344..f02b0f7 100644
---- a/arch/ia64/kernel/pal.S
-+++ b/arch/ia64/kernel/pal.S
-@@ -84,7 +84,8 @@ GLOBAL_ENTRY(ia64_pal_call_static)
-	mov ar.pfs = loc1
-	mov rp = loc0
-	;;
--	srlz.d				// serialize restoration of psr.l
-+	srlz.i			// serialize restoration of psr.l
-+	;;
-	br.ret.sptk.many b0
- END(ia64_pal_call_static)
-
-6. Bug report:
-	If you found any issues when use kvm/ia64, Please post the bug info to kvm-ia64-devel mailing list.
-	https://lists.sourceforge.net/lists/listinfo/kvm-ia64-devel/
-
-Thanks for your interest! Let's work together, and make kvm/ia64 stronger and stronger!
-
-
-								Xiantao Zhang <xiantao.zhang@intel.com>
-											2008.3.10
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 7610eaa4d49..0007fef4ed8 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -68,9 +68,12 @@ description:
 
   Capability: which KVM extension provides this ioctl.  Can be 'basic',
       which means that is will be provided by any kernel that supports
-      API version 12 (see section 4.1), or a KVM_CAP_xyz constant, which
+      API version 12 (see section 4.1), a KVM_CAP_xyz constant, which
       means availability needs to be checked with KVM_CHECK_EXTENSION
-      (see section 4.4).
+      (see section 4.4), or 'none' which means that while not all kernels
+      support this ioctl, there's no capability bit to check its
+      availability: for kernels that don't support the ioctl,
+      the ioctl returns -ENOTTY.
 
   Architectures: which instruction set architectures provide this ioctl.
       x86 includes both i386 and x86_64.
@@ -604,7 +607,7 @@ struct kvm_fpu {
 4.24 KVM_CREATE_IRQCHIP
 
 Capability: KVM_CAP_IRQCHIP, KVM_CAP_S390_IRQCHIP (s390)
-Architectures: x86, ia64, ARM, arm64, s390
+Architectures: x86, ARM, arm64, s390
 Type: vm ioctl
 Parameters: none
 Returns: 0 on success, -1 on error
@@ -612,7 +615,7 @@ Returns: 0 on success, -1 on error
 Creates an interrupt controller model in the kernel.  On x86, creates a virtual
 ioapic, a virtual PIC (two PICs, nested), and sets up future vcpus to have a
 local APIC.  IRQ routing for GSIs 0-15 is set to both PIC and IOAPIC; GSI 16-23
-only go to the IOAPIC.  On ia64, a IOSAPIC is created. On ARM/arm64, a GIC is
+only go to the IOAPIC.  On ARM/arm64, a GIC is
 created. On s390, a dummy irq routing table is created.
 
 Note that on s390 the KVM_CAP_S390_IRQCHIP vm capability needs to be enabled
@@ -622,7 +625,7 @@ before KVM_CREATE_IRQCHIP can be used.
 4.25 KVM_IRQ_LINE
 
 Capability: KVM_CAP_IRQCHIP
-Architectures: x86, ia64, arm, arm64
+Architectures: x86, arm, arm64
 Type: vm ioctl
 Parameters: struct kvm_irq_level
 Returns: 0 on success, -1 on error
@@ -676,7 +679,7 @@ struct kvm_irq_level {
 4.26 KVM_GET_IRQCHIP
 
 Capability: KVM_CAP_IRQCHIP
-Architectures: x86, ia64
+Architectures: x86
 Type: vm ioctl
 Parameters: struct kvm_irqchip (in/out)
 Returns: 0 on success, -1 on error
@@ -698,7 +701,7 @@ struct kvm_irqchip {
 4.27 KVM_SET_IRQCHIP
 
 Capability: KVM_CAP_IRQCHIP
-Architectures: x86, ia64
+Architectures: x86
 Type: vm ioctl
 Parameters: struct kvm_irqchip (in)
 Returns: 0 on success, -1 on error
@@ -991,7 +994,7 @@ for vm-wide capabilities.
 4.38 KVM_GET_MP_STATE
 
 Capability: KVM_CAP_MP_STATE
-Architectures: x86, ia64, s390
+Architectures: x86, s390
 Type: vcpu ioctl
 Parameters: struct kvm_mp_state (out)
 Returns: 0 on success; -1 on error
@@ -1005,16 +1008,15 @@ uniprocessor guests).
 
 Possible values are:
 
- - KVM_MP_STATE_RUNNABLE:        the vcpu is currently running [x86, ia64]
+ - KVM_MP_STATE_RUNNABLE:        the vcpu is currently running [x86]
  - KVM_MP_STATE_UNINITIALIZED:   the vcpu is an application processor (AP)
-                                 which has not yet received an INIT signal [x86,
-                                 ia64]
+                                 which has not yet received an INIT signal [x86]
  - KVM_MP_STATE_INIT_RECEIVED:   the vcpu has received an INIT signal, and is
-                                 now ready for a SIPI [x86, ia64]
+                                 now ready for a SIPI [x86]
  - KVM_MP_STATE_HALTED:          the vcpu has executed a HLT instruction and
-                                 is waiting for an interrupt [x86, ia64]
+                                 is waiting for an interrupt [x86]
  - KVM_MP_STATE_SIPI_RECEIVED:   the vcpu has just received a SIPI (vector
-                                 accessible via KVM_GET_VCPU_EVENTS) [x86, ia64]
+                                 accessible via KVM_GET_VCPU_EVENTS) [x86]
  - KVM_MP_STATE_STOPPED:         the vcpu is stopped [s390]
  - KVM_MP_STATE_CHECK_STOP:      the vcpu is in a special error state [s390]
  - KVM_MP_STATE_OPERATING:       the vcpu is operating (running or halted)
@@ -1022,7 +1024,7 @@ Possible values are:
  - KVM_MP_STATE_LOAD:            the vcpu is in a special load/startup state
                                  [s390]
 
-On x86 and ia64, this ioctl is only useful after KVM_CREATE_IRQCHIP. Without an
+On x86, this ioctl is only useful after KVM_CREATE_IRQCHIP. Without an
 in-kernel irqchip, the multiprocessing state must be maintained by userspace on
 these architectures.
 
@@ -1030,7 +1032,7 @@ these architectures.
 4.39 KVM_SET_MP_STATE
 
 Capability: KVM_CAP_MP_STATE
-Architectures: x86, ia64, s390
+Architectures: x86, s390
 Type: vcpu ioctl
 Parameters: struct kvm_mp_state (in)
 Returns: 0 on success; -1 on error
@@ -1038,7 +1040,7 @@ Returns: 0 on success; -1 on error
 Sets the vcpu's current "multiprocessing state"; see KVM_GET_MP_STATE for
 arguments.
 
-On x86 and ia64, this ioctl is only useful after KVM_CREATE_IRQCHIP. Without an
+On x86, this ioctl is only useful after KVM_CREATE_IRQCHIP. Without an
 in-kernel irqchip, the multiprocessing state must be maintained by userspace on
 these architectures.
 
@@ -1065,7 +1067,7 @@ documentation when it pops into existence).
 4.41 KVM_SET_BOOT_CPU_ID
 
 Capability: KVM_CAP_SET_BOOT_CPU_ID
-Architectures: x86, ia64
+Architectures: x86
 Type: vm ioctl
 Parameters: unsigned long vcpu_id
 Returns: 0 on success, -1 on error
@@ -1257,8 +1259,8 @@ The flags bitmap is defined as:
 
 4.48 KVM_ASSIGN_PCI_DEVICE
 
-Capability: KVM_CAP_DEVICE_ASSIGNMENT
-Architectures: x86 ia64
+Capability: none
+Architectures: x86
 Type: vm ioctl
 Parameters: struct kvm_assigned_pci_dev (in)
 Returns: 0 on success, -1 on error
@@ -1298,25 +1300,36 @@ Only PCI header type 0 devices with PCI BAR resources are supported by
 device assignment.  The user requesting this ioctl must have read/write
 access to the PCI sysfs resource files associated with the device.
 
+Errors:
+  ENOTTY: kernel does not support this ioctl
+
+  Other error conditions may be defined by individual device types or
+  have their standard meanings.
+
 
 4.49 KVM_DEASSIGN_PCI_DEVICE
 
-Capability: KVM_CAP_DEVICE_DEASSIGNMENT
-Architectures: x86 ia64
+Capability: none
+Architectures: x86
 Type: vm ioctl
 Parameters: struct kvm_assigned_pci_dev (in)
 Returns: 0 on success, -1 on error
 
 Ends PCI device assignment, releasing all associated resources.
 
-See KVM_CAP_DEVICE_ASSIGNMENT for the data structure. Only assigned_dev_id is
+See KVM_ASSIGN_PCI_DEVICE for the data structure. Only assigned_dev_id is
 used in kvm_assigned_pci_dev to identify the device.
 
+Errors:
+  ENOTTY: kernel does not support this ioctl
+
+  Other error conditions may be defined by individual device types or
+  have their standard meanings.
 
 4.50 KVM_ASSIGN_DEV_IRQ
 
 Capability: KVM_CAP_ASSIGN_DEV_IRQ
-Architectures: x86 ia64
+Architectures: x86
 Type: vm ioctl
 Parameters: struct kvm_assigned_irq (in)
 Returns: 0 on success, -1 on error
@@ -1346,11 +1359,17 @@ The following flags are defined:
 It is not valid to specify multiple types per host or guest IRQ. However, the
 IRQ type of host and guest can differ or can even be null.
 
+Errors:
+  ENOTTY: kernel does not support this ioctl
+
+  Other error conditions may be defined by individual device types or
+  have their standard meanings.
+
 
 4.51 KVM_DEASSIGN_DEV_IRQ
 
 Capability: KVM_CAP_ASSIGN_DEV_IRQ
-Architectures: x86 ia64
+Architectures: x86
 Type: vm ioctl
 Parameters: struct kvm_assigned_irq (in)
 Returns: 0 on success, -1 on error
@@ -1365,7 +1384,7 @@ KVM_ASSIGN_DEV_IRQ. Partial deassignment of host or guest IRQ is allowed.
 4.52 KVM_SET_GSI_ROUTING
 
 Capability: KVM_CAP_IRQ_ROUTING
-Architectures: x86 ia64 s390
+Architectures: x86 s390
 Type: vm ioctl
 Parameters: struct kvm_irq_routing (in)
 Returns: 0 on success, -1 on error
@@ -1423,8 +1442,8 @@ struct kvm_irq_routing_s390_adapter {
 
 4.53 KVM_ASSIGN_SET_MSIX_NR
 
-Capability: KVM_CAP_DEVICE_MSIX
-Architectures: x86 ia64
+Capability: none
+Architectures: x86
 Type: vm ioctl
 Parameters: struct kvm_assigned_msix_nr (in)
 Returns: 0 on success, -1 on error
@@ -1445,8 +1464,8 @@ struct kvm_assigned_msix_nr {
 
 4.54 KVM_ASSIGN_SET_MSIX_ENTRY
 
-Capability: KVM_CAP_DEVICE_MSIX
-Architectures: x86 ia64
+Capability: none
+Architectures: x86
 Type: vm ioctl
 Parameters: struct kvm_assigned_msix_entry (in)
 Returns: 0 on success, -1 on error
@@ -1461,6 +1480,12 @@ struct kvm_assigned_msix_entry {
 	__u16 padding[3];
 };
 
+Errors:
+  ENOTTY: kernel does not support this ioctl
+
+  Other error conditions may be defined by individual device types or
+  have their standard meanings.
+
 
 4.55 KVM_SET_TSC_KHZ
 
@@ -2453,9 +2478,15 @@ return ENOEXEC for that vcpu.
 Note that because some registers reflect machine topology, all vcpus
 should be created before this ioctl is invoked.
 
+Userspace can call this function multiple times for a given vcpu, including
+after the vcpu has been run. This will reset the vcpu to its initial
+state. All calls to this function after the initial call must use the same
+target and same set of feature flags, otherwise EINVAL will be returned.
+
 Possible features:
 	- KVM_ARM_VCPU_POWER_OFF: Starts the CPU in a power-off state.
-	  Depends on KVM_CAP_ARM_PSCI.
+	  Depends on KVM_CAP_ARM_PSCI.  If not set, the CPU will be powered on
+	  and execute guest code when KVM_RUN is called.
 	- KVM_ARM_VCPU_EL1_32BIT: Starts the CPU in a 32bit mode.
 	  Depends on KVM_CAP_ARM_EL1_32BIT (arm64 only).
 	- KVM_ARM_VCPU_PSCI_0_2: Emulate PSCI v0.2 for the CPU.
@@ -2951,6 +2982,15 @@ HVC instruction based PSCI call from the vcpu. The 'type' field describes
 the system-level event type. The 'flags' field describes architecture
 specific flags for the system-level event.
 
+Valid values for 'type' are:
+  KVM_SYSTEM_EVENT_SHUTDOWN -- the guest has requested a shutdown of the
+   VM. Userspace is not obliged to honour this, and if it does honour
+   this does not need to destroy the VM synchronously (ie it may call
+   KVM_RUN again before shutdown finally occurs).
+  KVM_SYSTEM_EVENT_RESET -- the guest has requested a reset of the VM.
+   As with SHUTDOWN, userspace can choose to ignore the request, or
+   to schedule the reset to occur in the future and may call KVM_RUN again.
+
 		/* Fix the size of the union. */
 		char padding[256];
 	};
diff --git a/Documentation/virtual/kvm/devices/vm.txt b/Documentation/virtual/kvm/devices/vm.txt
index 0d16f96c0ea..d426fc87fe9 100644
--- a/Documentation/virtual/kvm/devices/vm.txt
+++ b/Documentation/virtual/kvm/devices/vm.txt
@@ -12,14 +12,14 @@ specific.
 1. GROUP: KVM_S390_VM_MEM_CTRL
 Architectures: s390
 
-1.1. ATTRIBUTE: KVM_S390_VM_MEM_CTRL
+1.1. ATTRIBUTE: KVM_S390_VM_MEM_ENABLE_CMMA
 Parameters: none
-Returns: -EBUSY if already a vcpus is defined, otherwise 0
+Returns: -EBUSY if a vcpu is already defined, otherwise 0
 
-Enables CMMA for the virtual machine
+Enables Collaborative Memory Management Assist (CMMA) for the virtual machine.
 
-1.2. ATTRIBUTE: KVM_S390_VM_CLR_CMMA
-Parameteres: none
+1.2. ATTRIBUTE: KVM_S390_VM_MEM_CLR_CMMA
+Parameters: none
 Returns: 0
 
 Clear the CMMA status for all guest pages, so any pages the guest marked
diff --git a/Documentation/virtual/kvm/msr.txt b/Documentation/virtual/kvm/msr.txt
index 6d470ae7b07..2a71c8f29f6 100644
--- a/Documentation/virtual/kvm/msr.txt
+++ b/Documentation/virtual/kvm/msr.txt
@@ -168,7 +168,7 @@ MSR_KVM_ASYNC_PF_EN: 0x4b564d02
 	64 byte memory area which must be in guest RAM and must be
 	zeroed. Bits 5-2 are reserved and should be zero. Bit 0 is 1
 	when asynchronous page faults are enabled on the vcpu 0 when
-	disabled. Bit 2 is 1 if asynchronous page faults can be injected
+	disabled. Bit 1 is 1 if asynchronous page faults can be injected
 	when vcpu is in cpl == 0.
 
 	First 4 byte of 64 byte memory location will be written to by
diff --git a/MAINTAINERS b/MAINTAINERS
index 4aac6c8dce7..7605833aabc 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4255,6 +4255,12 @@ L:	linux-media@vger.kernel.org
 S:	Maintained
 F:	drivers/media/usb/go7007/
 
+GOODIX TOUCHSCREEN
+M:	Bastien Nocera <hadess@hadess.net>
+L:	linux-input@vger.kernel.org
+S:	Maintained
+F:	drivers/input/touchscreen/goodix.c
+
 GPIO SUBSYSTEM
 M:	Linus Walleij <linus.walleij@linaro.org>
 M:	Alexandre Courbot <gnurou@gmail.com>
@@ -5489,15 +5495,6 @@ S:	Supported
 F:	arch/powerpc/include/asm/kvm*
 F:	arch/powerpc/kvm/
 
-KERNEL VIRTUAL MACHINE For Itanium (KVM/IA64)
-M:	Xiantao Zhang <xiantao.zhang@intel.com>
-L:	kvm-ia64@vger.kernel.org
-W:	http://kvm.qumranet.com
-S:	Supported
-F:	Documentation/ia64/kvm.txt
-F:	arch/ia64/include/asm/kvm*
-F:	arch/ia64/kvm/
-
 KERNEL VIRTUAL MACHINE for s390 (KVM/s390)
 M:	Christian Borntraeger <borntraeger@de.ibm.com>
 M:	Cornelia Huck <cornelia.huck@de.ibm.com>
@@ -9510,6 +9507,7 @@ Q:	https://patchwork.kernel.org/project/linux-pm/list/
 S:	Supported
 F:	drivers/thermal/
 F:	include/linux/thermal.h
+F:	include/uapi/linux/thermal.h
 F:	include/linux/cpu_cooling.h
 F:	Documentation/devicetree/bindings/thermal/
 
diff --git a/arch/arm/boot/dts/tegra124-jetson-tk1.dts b/arch/arm/boot/dts/tegra124-jetson-tk1.dts
index 51b373ff106..4eb540be368 100644
--- a/arch/arm/boot/dts/tegra124-jetson-tk1.dts
+++ b/arch/arm/boot/dts/tegra124-jetson-tk1.dts
@@ -1942,4 +1942,48 @@
 			 <&tegra_car TEGRA124_CLK_EXTERN1>;
 		clock-names = "pll_a", "pll_a_out0", "mclk";
 	};
+
+	thermal-zones {
+		cpu {
+			trips {
+				trip@0 {
+					temperature = <101000>;
+					hysteresis = <0>;
+					type = "critical";
+				};
+			};
+
+			cooling-maps {
+				/* There are currently no cooling maps because there are no cooling devices */
+			};
+		};
+
+		mem {
+			trips {
+				trip@0 {
+					temperature = <101000>;
+					hysteresis = <0>;
+					type = "critical";
+				};
+			};
+
+			cooling-maps {
+				/* There are currently no cooling maps because there are no cooling devices */
+			};
+		};
+
+		gpu {
+			trips {
+				trip@0 {
+					temperature = <101000>;
+					hysteresis = <0>;
+					type = "critical";
+				};
+			};
+
+			cooling-maps {
+				/* There are currently no cooling maps because there are no cooling devices */
+			};
+		};
+	};
 };
diff --git a/arch/arm/boot/dts/tegra124.dtsi b/arch/arm/boot/dts/tegra124.dtsi
index 3ad2e3cf299..4be06c6ea0c 100644
--- a/arch/arm/boot/dts/tegra124.dtsi
+++ b/arch/arm/boot/dts/tegra124.dtsi
@@ -4,6 +4,7 @@
 #include <dt-bindings/pinctrl/pinctrl-tegra.h>
 #include <dt-bindings/pinctrl/pinctrl-tegra-xusb.h>
 #include <dt-bindings/interrupt-controller/arm-gic.h>
+#include <dt-bindings/thermal/tegra124-soctherm.h>
 
 #include "skeleton.dtsi"
 
@@ -657,6 +658,18 @@
 		status = "disabled";
 	};
 
+	soctherm: thermal-sensor@0,700e2000 {
+		compatible = "nvidia,tegra124-soctherm";
+		reg = <0x0 0x700e2000 0x0 0x1000>;
+		interrupts = <GIC_SPI 48 IRQ_TYPE_LEVEL_HIGH>;
+		clocks = <&tegra_car TEGRA124_CLK_TSENSOR>,
+			<&tegra_car TEGRA124_CLK_SOC_THERM>;
+		clock-names = "tsensor", "soctherm";
+		resets = <&tegra_car 78>;
+		reset-names = "soctherm";
+		#thermal-sensor-cells = <1>;
+	};
+
 	ahub@0,70300000 {
 		compatible = "nvidia,tegra124-ahub";
 		reg = <0x0 0x70300000 0x0 0x200>,
@@ -898,6 +911,40 @@
 		};
 	};
 
+	thermal-zones {
+		cpu {
+			polling-delay-passive = <1000>;
+			polling-delay = <1000>;
+
+			thermal-sensors =
+				<&soctherm TEGRA124_SOCTHERM_SENSOR_CPU>;
+		};
+
+		mem {
+			polling-delay-passive = <1000>;
+			polling-delay = <1000>;
+
+			thermal-sensors =
+				<&soctherm TEGRA124_SOCTHERM_SENSOR_MEM>;
+		};
+
+		gpu {
+			polling-delay-passive = <1000>;
+			polling-delay = <1000>;
+
+			thermal-sensors =
+				<&soctherm TEGRA124_SOCTHERM_SENSOR_GPU>;
+		};
+
+		pllx {
+			polling-delay-passive = <1000>;
+			polling-delay = <1000>;
+
+			thermal-sensors =
+				<&soctherm TEGRA124_SOCTHERM_SENSOR_PLLX>;
+		};
+	};
+
 	timer {
 		compatible = "arm,armv7-timer";
 		interrupts = <GIC_PPI 13
diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h
index b9db269c6e6..66ce17655bb 100644
--- a/arch/arm/include/asm/kvm_emulate.h
+++ b/arch/arm/include/asm/kvm_emulate.h
@@ -33,6 +33,11 @@ void kvm_inject_undefined(struct kvm_vcpu *vcpu);
 void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr);
 void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr);
 
+static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.hcr = HCR_GUEST_MASK;
+}
+
 static inline bool vcpu_mode_is_32bit(struct kvm_vcpu *vcpu)
 {
 	return 1;
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 53036e21756..254e0650e48 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -150,8 +150,6 @@ struct kvm_vcpu_stat {
 	u32 halt_wakeup;
 };
 
-int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
-			const struct kvm_vcpu_init *init);
 int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init);
 unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu);
 int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices);
diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index acb0d571271..63e0ecc0490 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -52,6 +52,7 @@ int create_hyp_io_mappings(void *from, void *to, phys_addr_t);
 void free_boot_hyp_pgd(void);
 void free_hyp_pgds(void);
 
+void stage2_unmap_vm(struct kvm *kvm);
 int kvm_alloc_stage2_pgd(struct kvm *kvm);
 void kvm_free_stage2_pgd(struct kvm *kvm);
 int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
@@ -161,9 +162,10 @@ static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu)
 }
 
 static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva,
-					     unsigned long size)
+					     unsigned long size,
+					     bool ipa_uncached)
 {
-	if (!vcpu_has_cache_enabled(vcpu))
+	if (!vcpu_has_cache_enabled(vcpu) || ipa_uncached)
 		kvm_flush_dcache_to_poc((void *)hva, size);
 	
 	/*
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 9e193c8a959..2d6d9100106 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -213,6 +213,11 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
 	int err;
 	struct kvm_vcpu *vcpu;
 
+	if (irqchip_in_kernel(kvm) && vgic_initialized(kvm)) {
+		err = -EBUSY;
+		goto out;
+	}
+
 	vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
 	if (!vcpu) {
 		err = -ENOMEM;
@@ -263,6 +268,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 {
 	/* Force users to call KVM_ARM_VCPU_INIT */
 	vcpu->arch.target = -1;
+	bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES);
 
 	/* Set up the timer */
 	kvm_timer_vcpu_init(vcpu);
@@ -419,6 +425,7 @@ static void update_vttbr(struct kvm *kvm)
 
 static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
 {
+	struct kvm *kvm = vcpu->kvm;
 	int ret;
 
 	if (likely(vcpu->arch.has_run_once))
@@ -427,15 +434,23 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
 	vcpu->arch.has_run_once = true;
 
 	/*
-	 * Initialize the VGIC before running a vcpu the first time on
-	 * this VM.
+	 * Map the VGIC hardware resources before running a vcpu the first
+	 * time on this VM.
 	 */
-	if (unlikely(!vgic_initialized(vcpu->kvm))) {
-		ret = kvm_vgic_init(vcpu->kvm);
+	if (unlikely(!vgic_ready(kvm))) {
+		ret = kvm_vgic_map_resources(kvm);
 		if (ret)
 			return ret;
 	}
 
+	/*
+	 * Enable the arch timers only if we have an in-kernel VGIC
+	 * and it has been properly initialized, since we cannot handle
+	 * interrupts from the virtual timer with a userspace gic.
+	 */
+	if (irqchip_in_kernel(kvm) && vgic_initialized(kvm))
+		kvm_timer_enable(kvm);
+
 	return 0;
 }
 
@@ -649,6 +664,48 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level,
 	return -EINVAL;
 }
 
+static int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
+			       const struct kvm_vcpu_init *init)
+{
+	unsigned int i;
+	int phys_target = kvm_target_cpu();
+
+	if (init->target != phys_target)
+		return -EINVAL;
+
+	/*
+	 * Secondary and subsequent calls to KVM_ARM_VCPU_INIT must
+	 * use the same target.
+	 */
+	if (vcpu->arch.target != -1 && vcpu->arch.target != init->target)
+		return -EINVAL;
+
+	/* -ENOENT for unknown features, -EINVAL for invalid combinations. */
+	for (i = 0; i < sizeof(init->features) * 8; i++) {
+		bool set = (init->features[i / 32] & (1 << (i % 32)));
+
+		if (set && i >= KVM_VCPU_MAX_FEATURES)
+			return -ENOENT;
+
+		/*
+		 * Secondary and subsequent calls to KVM_ARM_VCPU_INIT must
+		 * use the same feature set.
+		 */
+		if (vcpu->arch.target != -1 && i < KVM_VCPU_MAX_FEATURES &&
+		    test_bit(i, vcpu->arch.features) != set)
+			return -EINVAL;
+
+		if (set)
+			set_bit(i, vcpu->arch.features);
+	}
+
+	vcpu->arch.target = phys_target;
+
+	/* Now we know what it is, we can reset it. */
+	return kvm_reset_vcpu(vcpu);
+}
+
+
 static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu,
 					 struct kvm_vcpu_init *init)
 {
@@ -659,10 +716,21 @@ static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu,
 		return ret;
 
 	/*
+	 * Ensure a rebooted VM will fault in RAM pages and detect if the
+	 * guest MMU is turned off and flush the caches as needed.
+	 */
+	if (vcpu->arch.has_run_once)
+		stage2_unmap_vm(vcpu->kvm);
+
+	vcpu_reset_hcr(vcpu);
+
+	/*
 	 * Handle the "start in power-off" case by marking the VCPU as paused.
 	 */
-	if (__test_and_clear_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features))
+	if (test_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features))
 		vcpu->arch.pause = true;
+	else
+		vcpu->arch.pause = false;
 
 	return 0;
 }
diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c
index cc0b78769bd..384bab67c46 100644
--- a/arch/arm/kvm/guest.c
+++ b/arch/arm/kvm/guest.c
@@ -38,7 +38,6 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 
 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 {
-	vcpu->arch.hcr = HCR_GUEST_MASK;
 	return 0;
 }
 
@@ -274,31 +273,6 @@ int __attribute_const__ kvm_target_cpu(void)
 	}
 }
 
-int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
-			const struct kvm_vcpu_init *init)
-{
-	unsigned int i;
-
-	/* We can only cope with guest==host and only on A15/A7 (for now). */
-	if (init->target != kvm_target_cpu())
-		return -EINVAL;
-
-	vcpu->arch.target = init->target;
-	bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES);
-
-	/* -ENOENT for unknown features, -EINVAL for invalid combinations. */
-	for (i = 0; i < sizeof(init->features) * 8; i++) {
-		if (test_bit(i, (void *)init->features)) {
-			if (i >= KVM_VCPU_MAX_FEATURES)
-				return -ENOENT;
-			set_bit(i, vcpu->arch.features);
-		}
-	}
-
-	/* Now we know what it is, we can reset it. */
-	return kvm_reset_vcpu(vcpu);
-}
-
 int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init)
 {
 	int target = kvm_target_cpu();
diff --git a/arch/arm/kvm/mmio.c b/arch/arm/kvm/mmio.c
index 4cb5a93182e..5d3bfc0eb3f 100644
--- a/arch/arm/kvm/mmio.c
+++ b/arch/arm/kvm/mmio.c
@@ -187,15 +187,18 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
 	}
 
 	rt = vcpu->arch.mmio_decode.rt;
-	data = vcpu_data_guest_to_host(vcpu, *vcpu_reg(vcpu, rt), mmio.len);
 
-	trace_kvm_mmio((mmio.is_write) ? KVM_TRACE_MMIO_WRITE :
-					 KVM_TRACE_MMIO_READ_UNSATISFIED,
-			mmio.len, fault_ipa,
-			(mmio.is_write) ? data : 0);
+	if (mmio.is_write) {
+		data = vcpu_data_guest_to_host(vcpu, *vcpu_reg(vcpu, rt),
+					       mmio.len);
 
-	if (mmio.is_write)
+		trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, mmio.len,
+			       fault_ipa, data);
 		mmio_write_buf(mmio.data, mmio.len, data);
+	} else {
+		trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, mmio.len,
+			       fault_ipa, 0);
+	}
 
 	if (vgic_handle_mmio(vcpu, run, &mmio))
 		return 1;
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 8664ff17cbb..1dc9778a00a 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -612,6 +612,71 @@ static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size)
 	unmap_range(kvm, kvm->arch.pgd, start, size);
 }
 
+static void stage2_unmap_memslot(struct kvm *kvm,
+				 struct kvm_memory_slot *memslot)
+{
+	hva_t hva = memslot->userspace_addr;
+	phys_addr_t addr = memslot->base_gfn << PAGE_SHIFT;
+	phys_addr_t size = PAGE_SIZE * memslot->npages;
+	hva_t reg_end = hva + size;
+
+	/*
+	 * A memory region could potentially cover multiple VMAs, and any holes
+	 * between them, so iterate over all of them to find out if we should
+	 * unmap any of them.
+	 *
+	 *     +--------------------------------------------+
+	 * +---------------+----------------+   +----------------+
+	 * |   : VMA 1     |      VMA 2     |   |    VMA 3  :    |
+	 * +---------------+----------------+   +----------------+
+	 *     |               memory region                |
+	 *     +--------------------------------------------+
+	 */
+	do {
+		struct vm_area_struct *vma = find_vma(current->mm, hva);
+		hva_t vm_start, vm_end;
+
+		if (!vma || vma->vm_start >= reg_end)
+			break;
+
+		/*
+		 * Take the intersection of this VMA with the memory region
+		 */
+		vm_start = max(hva, vma->vm_start);
+		vm_end = min(reg_end, vma->vm_end);
+
+		if (!(vma->vm_flags & VM_PFNMAP)) {
+			gpa_t gpa = addr + (vm_start - memslot->userspace_addr);
+			unmap_stage2_range(kvm, gpa, vm_end - vm_start);
+		}
+		hva = vm_end;
+	} while (hva < reg_end);
+}
+
+/**
+ * stage2_unmap_vm - Unmap Stage-2 RAM mappings
+ * @kvm: The struct kvm pointer
+ *
+ * Go through the memregions and unmap any reguler RAM
+ * backing memory already mapped to the VM.
+ */
+void stage2_unmap_vm(struct kvm *kvm)
+{
+	struct kvm_memslots *slots;
+	struct kvm_memory_slot *memslot;
+	int idx;
+
+	idx = srcu_read_lock(&kvm->srcu);
+	spin_lock(&kvm->mmu_lock);
+
+	slots = kvm_memslots(kvm);
+	kvm_for_each_memslot(memslot, slots)
+		stage2_unmap_memslot(kvm, memslot);
+
+	spin_unlock(&kvm->mmu_lock);
+	srcu_read_unlock(&kvm->srcu, idx);
+}
+
 /**
  * kvm_free_stage2_pgd - free all stage-2 tables
  * @kvm:	The KVM struct pointer for the VM.
@@ -853,6 +918,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 	struct vm_area_struct *vma;
 	pfn_t pfn;
 	pgprot_t mem_type = PAGE_S2;
+	bool fault_ipa_uncached;
 
 	write_fault = kvm_is_write_fault(vcpu);
 	if (fault_status == FSC_PERM && !write_fault) {
@@ -919,6 +985,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 	if (!hugetlb && !force_pte)
 		hugetlb = transparent_hugepage_adjust(&pfn, &fault_ipa);
 
+	fault_ipa_uncached = memslot->flags & KVM_MEMSLOT_INCOHERENT;
+
 	if (hugetlb) {
 		pmd_t new_pmd = pfn_pmd(pfn, mem_type);
 		new_pmd = pmd_mkhuge(new_pmd);
@@ -926,7 +994,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 			kvm_set_s2pmd_writable(&new_pmd);
 			kvm_set_pfn_dirty(pfn);
 		}
-		coherent_cache_guest_page(vcpu, hva & PMD_MASK, PMD_SIZE);
+		coherent_cache_guest_page(vcpu, hva & PMD_MASK, PMD_SIZE,
+					  fault_ipa_uncached);
 		ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd);
 	} else {
 		pte_t new_pte = pfn_pte(pfn, mem_type);
@@ -934,7 +1003,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 			kvm_set_s2pte_writable(&new_pte);
 			kvm_set_pfn_dirty(pfn);
 		}
-		coherent_cache_guest_page(vcpu, hva, PAGE_SIZE);
+		coherent_cache_guest_page(vcpu, hva, PAGE_SIZE,
+					  fault_ipa_uncached);
 		ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte,
 			pgprot_val(mem_type) == pgprot_val(PAGE_S2_DEVICE));
 	}
@@ -1294,11 +1364,12 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
 		hva = vm_end;
 	} while (hva < reg_end);
 
-	if (ret) {
-		spin_lock(&kvm->mmu_lock);
+	spin_lock(&kvm->mmu_lock);
+	if (ret)
 		unmap_stage2_range(kvm, mem->guest_phys_addr, mem->memory_size);
-		spin_unlock(&kvm->mmu_lock);
-	}
+	else
+		stage2_flush_memslot(kvm, memslot);
+	spin_unlock(&kvm->mmu_lock);
 	return ret;
 }
 
@@ -1310,6 +1381,15 @@ void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
 			    unsigned long npages)
 {
+	/*
+	 * Readonly memslots are not incoherent with the caches by definition,
+	 * but in practice, they are used mostly to emulate ROMs or NOR flashes
+	 * that the guest may consider devices and hence map as uncached.
+	 * To prevent incoherency issues in these cases, tag all readonly
+	 * regions as incoherent.
+	 */
+	if (slot->flags & KVM_MEM_READONLY)
+		slot->flags |= KVM_MEMSLOT_INCOHERENT;
 	return 0;
 }
 
diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c
index 09cf37737ee..58cb3248d27 100644
--- a/arch/arm/kvm/psci.c
+++ b/arch/arm/kvm/psci.c
@@ -15,6 +15,7 @@
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
+#include <linux/preempt.h>
 #include <linux/kvm_host.h>
 #include <linux/wait.h>
 
@@ -166,6 +167,23 @@ static unsigned long kvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu)
 
 static void kvm_prepare_system_event(struct kvm_vcpu *vcpu, u32 type)
 {
+	int i;
+	struct kvm_vcpu *tmp;
+
+	/*
+	 * The KVM ABI specifies that a system event exit may call KVM_RUN
+	 * again and may perform shutdown/reboot at a later time that when the
+	 * actual request is made.  Since we are implementing PSCI and a
+	 * caller of PSCI reboot and shutdown expects that the system shuts
+	 * down or reboots immediately, let's make sure that VCPUs are not run
+	 * after this call is handled and before the VCPUs have been
+	 * re-initialized.
+	 */
+	kvm_for_each_vcpu(i, tmp, vcpu->kvm) {
+		tmp->arch.pause = true;
+		kvm_vcpu_kick(tmp);
+	}
+
 	memset(&vcpu->run->system_event, 0, sizeof(vcpu->run->system_event));
 	vcpu->run->system_event.type = type;
 	vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index 5674a55b551..8127e45e263 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -38,6 +38,11 @@ void kvm_inject_undefined(struct kvm_vcpu *vcpu);
 void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr);
 void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr);
 
+static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS;
+}
+
 static inline unsigned long *vcpu_pc(const struct kvm_vcpu *vcpu)
 {
 	return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.pc;
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 2012c4ba8d6..0b7dfdb931d 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -165,8 +165,6 @@ struct kvm_vcpu_stat {
 	u32 halt_wakeup;
 };
 
-int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
-			const struct kvm_vcpu_init *init);
 int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init);
 unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu);
 int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices);
@@ -200,6 +198,7 @@ struct kvm_vcpu *kvm_arm_get_running_vcpu(void);
 struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void);
 
 u64 kvm_call_hyp(void *hypfn, ...);
+void force_vm_exit(const cpumask_t *mask);
 
 int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
 		int exception_index);
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 0caf7a59f6a..14a74f13627 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -83,6 +83,7 @@ int create_hyp_io_mappings(void *from, void *to, phys_addr_t);
 void free_boot_hyp_pgd(void);
 void free_hyp_pgds(void);
 
+void stage2_unmap_vm(struct kvm *kvm);
 int kvm_alloc_stage2_pgd(struct kvm *kvm);
 void kvm_free_stage2_pgd(struct kvm *kvm);
 int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
@@ -243,9 +244,10 @@ static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu)
 }
 
 static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva,
-					     unsigned long size)
+					     unsigned long size,
+					     bool ipa_uncached)
 {
-	if (!vcpu_has_cache_enabled(vcpu))
+	if (!vcpu_has_cache_enabled(vcpu) || ipa_uncached)
 		kvm_flush_dcache_to_poc((void *)hva, size);
 
 	if (!icache_is_aliasing()) {		/* PIPT */
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index 76794692c20..9535bd555d1 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -38,7 +38,6 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 
 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 {
-	vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS;
 	return 0;
 }
 
@@ -297,31 +296,6 @@ int __attribute_const__ kvm_target_cpu(void)
 	return -EINVAL;
 }
 
-int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
-			const struct kvm_vcpu_init *init)
-{
-	unsigned int i;
-	int phys_target = kvm_target_cpu();
-
-	if (init->target != phys_target)
-		return -EINVAL;
-
-	vcpu->arch.target = phys_target;
-	bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES);
-
-	/* -ENOENT for unknown features, -EINVAL for invalid combinations. */
-	for (i = 0; i < sizeof(init->features) * 8; i++) {
-		if (init->features[i / 32] & (1 << (i % 32))) {
-			if (i >= KVM_VCPU_MAX_FEATURES)
-				return -ENOENT;
-			set_bit(i, vcpu->arch.features);
-		}
-	}
-
-	/* Now we know what it is, we can reset it. */
-	return kvm_reset_vcpu(vcpu);
-}
-
 int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init)
 {
 	int target = kvm_target_cpu();
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 536d13b0bea..371b55bc5a6 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -20,7 +20,6 @@ config IA64
 	select HAVE_DYNAMIC_FTRACE if (!ITANIUM)
 	select HAVE_FUNCTION_TRACER
 	select HAVE_DMA_ATTRS
-	select HAVE_KVM
 	select TTY
 	select HAVE_ARCH_TRACEHOOK
 	select HAVE_DMA_API_DEBUG
@@ -640,8 +639,6 @@ source "security/Kconfig"
 
 source "crypto/Kconfig"
 
-source "arch/ia64/kvm/Kconfig"
-
 source "lib/Kconfig"
 
 config IOMMU_HELPER
diff --git a/arch/ia64/Makefile b/arch/ia64/Makefile
index 5441b14994f..970d0bd9962 100644
--- a/arch/ia64/Makefile
+++ b/arch/ia64/Makefile
@@ -53,7 +53,6 @@ core-$(CONFIG_IA64_HP_ZX1)	+= arch/ia64/dig/
 core-$(CONFIG_IA64_HP_ZX1_SWIOTLB) += arch/ia64/dig/
 core-$(CONFIG_IA64_SGI_SN2)	+= arch/ia64/sn/
 core-$(CONFIG_IA64_SGI_UV)	+= arch/ia64/uv/
-core-$(CONFIG_KVM) 		+= arch/ia64/kvm/
 
 drivers-$(CONFIG_PCI)		+= arch/ia64/pci/
 drivers-$(CONFIG_IA64_HP_SIM)	+= arch/ia64/hp/sim/
diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h
deleted file mode 100644
index 4729752b725..00000000000
--- a/arch/ia64/include/asm/kvm_host.h
+++ /dev/null
@@ -1,609 +0,0 @@
-/*
- * kvm_host.h: used for kvm module, and hold ia64-specific sections.
- *
- * Copyright (C) 2007, Intel Corporation.
- *
- * Xiantao Zhang <xiantao.zhang@intel.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- */
-
-#ifndef __ASM_KVM_HOST_H
-#define __ASM_KVM_HOST_H
-
-#define KVM_USER_MEM_SLOTS 32
-
-#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
-#define KVM_IRQCHIP_NUM_PINS  KVM_IOAPIC_NUM_PINS
-
-/* define exit reasons from vmm to kvm*/
-#define EXIT_REASON_VM_PANIC		0
-#define EXIT_REASON_MMIO_INSTRUCTION	1
-#define EXIT_REASON_PAL_CALL		2
-#define EXIT_REASON_SAL_CALL		3
-#define EXIT_REASON_SWITCH_RR6		4
-#define EXIT_REASON_VM_DESTROY		5
-#define EXIT_REASON_EXTERNAL_INTERRUPT	6
-#define EXIT_REASON_IPI			7
-#define EXIT_REASON_PTC_G		8
-#define EXIT_REASON_DEBUG		20
-
-/*Define vmm address space and vm data space.*/
-#define KVM_VMM_SIZE (__IA64_UL_CONST(16)<<20)
-#define KVM_VMM_SHIFT 24
-#define KVM_VMM_BASE 0xD000000000000000
-#define VMM_SIZE (__IA64_UL_CONST(8)<<20)
-
-/*
- * Define vm_buffer, used by PAL Services, base address.
- * Note: vm_buffer is in the VMM-BLOCK, the size must be < 8M
- */
-#define KVM_VM_BUFFER_BASE (KVM_VMM_BASE + VMM_SIZE)
-#define KVM_VM_BUFFER_SIZE (__IA64_UL_CONST(8)<<20)
-
-/*
- * kvm guest's data area looks as follow:
- *
- *            +----------------------+	-------	KVM_VM_DATA_SIZE
- *	      |	    vcpu[n]'s data   |	 |     ___________________KVM_STK_OFFSET
- *     	      |			     |	 |    /			  |
- *     	      |	       ..........    |	 |   /vcpu's struct&stack |
- *     	      |	       ..........    |	 |  /---------------------|---- 0
- *	      |	    vcpu[5]'s data   |	 | /	   vpd		  |
- *	      |	    vcpu[4]'s data   |	 |/-----------------------|
- *	      |	    vcpu[3]'s data   |	 /	   vtlb		  |
- *	      |	    vcpu[2]'s data   |	/|------------------------|
- *	      |	    vcpu[1]'s data   |/  |	   vhpt		  |
- *	      |	    vcpu[0]'s data   |____________________________|
- *            +----------------------+	 |
- *	      |	   memory dirty log  |	 |
- *            +----------------------+	 |
- *	      |	   vm's data struct  |	 |
- *            +----------------------+	 |
- *	      |			     |	 |
- *	      |			     |	 |
- *	      |			     |	 |
- *	      |			     |	 |
- *	      |			     |	 |
- *	      |			     |	 |
- *	      |			     |	 |
- *	      |	  vm's p2m table  |	 |
- *	      |			     |	 |
- *            |			     |	 |
- *	      |			     |	 |  |
- * vm's data->|			     |   |  |
- *	      +----------------------+ ------- 0
- * To support large memory, needs to increase the size of p2m.
- * To support more vcpus, needs to ensure it has enough space to
- * hold vcpus' data.
- */
-
-#define KVM_VM_DATA_SHIFT	26
-#define KVM_VM_DATA_SIZE	(__IA64_UL_CONST(1) << KVM_VM_DATA_SHIFT)
-#define KVM_VM_DATA_BASE	(KVM_VMM_BASE + KVM_VM_DATA_SIZE)
-
-#define KVM_P2M_BASE		KVM_VM_DATA_BASE
-#define KVM_P2M_SIZE		(__IA64_UL_CONST(24) << 20)
-
-#define VHPT_SHIFT		16
-#define VHPT_SIZE		(__IA64_UL_CONST(1) << VHPT_SHIFT)
-#define VHPT_NUM_ENTRIES	(__IA64_UL_CONST(1) << (VHPT_SHIFT-5))
-
-#define VTLB_SHIFT		16
-#define VTLB_SIZE		(__IA64_UL_CONST(1) << VTLB_SHIFT)
-#define VTLB_NUM_ENTRIES	(1UL << (VHPT_SHIFT-5))
-
-#define VPD_SHIFT		16
-#define VPD_SIZE		(__IA64_UL_CONST(1) << VPD_SHIFT)
-
-#define VCPU_STRUCT_SHIFT	16
-#define VCPU_STRUCT_SIZE	(__IA64_UL_CONST(1) << VCPU_STRUCT_SHIFT)
-
-/*
- * This must match KVM_IA64_VCPU_STACK_{SHIFT,SIZE} arch/ia64/include/asm/kvm.h
- */
-#define KVM_STK_SHIFT		16
-#define KVM_STK_OFFSET		(__IA64_UL_CONST(1)<< KVM_STK_SHIFT)
-
-#define KVM_VM_STRUCT_SHIFT	19
-#define KVM_VM_STRUCT_SIZE	(__IA64_UL_CONST(1) << KVM_VM_STRUCT_SHIFT)
-
-#define KVM_MEM_DIRY_LOG_SHIFT	19
-#define KVM_MEM_DIRTY_LOG_SIZE (__IA64_UL_CONST(1) << KVM_MEM_DIRY_LOG_SHIFT)
-
-#ifndef __ASSEMBLY__
-
-/*Define the max vcpus and memory for Guests.*/
-#define KVM_MAX_VCPUS	(KVM_VM_DATA_SIZE - KVM_P2M_SIZE - KVM_VM_STRUCT_SIZE -\
-			KVM_MEM_DIRTY_LOG_SIZE) / sizeof(struct kvm_vcpu_data)
-#define KVM_MAX_MEM_SIZE (KVM_P2M_SIZE >> 3 << PAGE_SHIFT)
-
-#define VMM_LOG_LEN 256
-
-#include <linux/types.h>
-#include <linux/mm.h>
-#include <linux/kvm.h>
-#include <linux/kvm_para.h>
-#include <linux/kvm_types.h>
-
-#include <asm/pal.h>
-#include <asm/sal.h>
-#include <asm/page.h>
-
-struct kvm_vcpu_data {
-	char vcpu_vhpt[VHPT_SIZE];
-	char vcpu_vtlb[VTLB_SIZE];
-	char vcpu_vpd[VPD_SIZE];
-	char vcpu_struct[VCPU_STRUCT_SIZE];
-};
-
-struct kvm_vm_data {
-	char kvm_p2m[KVM_P2M_SIZE];
-	char kvm_vm_struct[KVM_VM_STRUCT_SIZE];
-	char kvm_mem_dirty_log[KVM_MEM_DIRTY_LOG_SIZE];
-	struct kvm_vcpu_data vcpu_data[KVM_MAX_VCPUS];
-};
-
-#define VCPU_BASE(n)	(KVM_VM_DATA_BASE + \
-				offsetof(struct kvm_vm_data, vcpu_data[n]))
-#define KVM_VM_BASE	(KVM_VM_DATA_BASE + \
-				offsetof(struct kvm_vm_data, kvm_vm_struct))
-#define KVM_MEM_DIRTY_LOG_BASE	KVM_VM_DATA_BASE + \
-				offsetof(struct kvm_vm_data, kvm_mem_dirty_log)
-
-#define VHPT_BASE(n) (VCPU_BASE(n) + offsetof(struct kvm_vcpu_data, vcpu_vhpt))
-#define VTLB_BASE(n) (VCPU_BASE(n) + offsetof(struct kvm_vcpu_data, vcpu_vtlb))
-#define VPD_BASE(n)  (VCPU_BASE(n) + offsetof(struct kvm_vcpu_data, vcpu_vpd))
-#define VCPU_STRUCT_BASE(n)	(VCPU_BASE(n) + \
-				offsetof(struct kvm_vcpu_data, vcpu_struct))
-
-/*IO section definitions*/
-#define IOREQ_READ      1
-#define IOREQ_WRITE     0
-
-#define STATE_IOREQ_NONE        0
-#define STATE_IOREQ_READY       1
-#define STATE_IOREQ_INPROCESS   2
-#define STATE_IORESP_READY      3
-
-/*Guest Physical address layout.*/
-#define GPFN_MEM        (0UL << 60) /* Guest pfn is normal mem */
-#define GPFN_FRAME_BUFFER   (1UL << 60) /* VGA framebuffer */
-#define GPFN_LOW_MMIO       (2UL << 60) /* Low MMIO range */
-#define GPFN_PIB        (3UL << 60) /* PIB base */
-#define GPFN_IOSAPIC        (4UL << 60) /* IOSAPIC base */
-#define GPFN_LEGACY_IO      (5UL << 60) /* Legacy I/O base */
-#define GPFN_GFW        (6UL << 60) /* Guest Firmware */
-#define GPFN_PHYS_MMIO      (7UL << 60) /* Directed MMIO Range */
-
-#define GPFN_IO_MASK        (7UL << 60) /* Guest pfn is I/O type */
-#define GPFN_INV_MASK       (1UL << 63) /* Guest pfn is invalid */
-#define INVALID_MFN       (~0UL)
-#define MEM_G   (1UL << 30)
-#define MEM_M   (1UL << 20)
-#define MMIO_START       (3 * MEM_G)
-#define MMIO_SIZE        (512 * MEM_M)
-#define VGA_IO_START     0xA0000UL
-#define VGA_IO_SIZE      0x20000
-#define LEGACY_IO_START  (MMIO_START + MMIO_SIZE)
-#define LEGACY_IO_SIZE   (64 * MEM_M)
-#define IO_SAPIC_START   0xfec00000UL
-#define IO_SAPIC_SIZE    0x100000
-#define PIB_START 0xfee00000UL
-#define PIB_SIZE 0x200000
-#define GFW_START        (4 * MEM_G - 16 * MEM_M)
-#define GFW_SIZE         (16 * MEM_M)
-
-/*Deliver mode, defined for ioapic.c*/
-#define dest_Fixed IOSAPIC_FIXED
-#define dest_LowestPrio IOSAPIC_LOWEST_PRIORITY
-
-#define NMI_VECTOR      		2
-#define ExtINT_VECTOR       		0
-#define NULL_VECTOR     		(-1)
-#define IA64_SPURIOUS_INT_VECTOR    	0x0f
-
-#define VCPU_LID(v) (((u64)(v)->vcpu_id) << 24)
-
-/*
- *Delivery mode
- */
-#define SAPIC_DELIV_SHIFT      8
-#define SAPIC_FIXED            0x0
-#define SAPIC_LOWEST_PRIORITY  0x1
-#define SAPIC_PMI              0x2
-#define SAPIC_NMI              0x4
-#define SAPIC_INIT             0x5
-#define SAPIC_EXTINT           0x7
-
-/*
- * vcpu->requests bit members for arch
- */
-#define KVM_REQ_PTC_G		32
-#define KVM_REQ_RESUME		33
-
-struct kvm_mmio_req {
-	uint64_t addr;          /*  physical address		*/
-	uint64_t size;          /*  size in bytes		*/
-	uint64_t data;          /*  data (or paddr of data)     */
-	uint8_t state:4;
-	uint8_t dir:1;          /*  1=read, 0=write             */
-};
-
-/*Pal data struct */
-struct kvm_pal_call{
-	/*In area*/
-	uint64_t gr28;
-	uint64_t gr29;
-	uint64_t gr30;
-	uint64_t gr31;
-	/*Out area*/
-	struct ia64_pal_retval ret;
-};
-
-/* Sal data structure */
-struct kvm_sal_call{
-	/*In area*/
-	uint64_t in0;
-	uint64_t in1;
-	uint64_t in2;
-	uint64_t in3;
-	uint64_t in4;
-	uint64_t in5;
-	uint64_t in6;
-	uint64_t in7;
-	struct sal_ret_values ret;
-};
-
-/*Guest change rr6*/
-struct kvm_switch_rr6 {
-	uint64_t old_rr;
-	uint64_t new_rr;
-};
-
-union ia64_ipi_a{
-	unsigned long val;
-	struct {
-		unsigned long rv  : 3;
-		unsigned long ir  : 1;
-		unsigned long eid : 8;
-		unsigned long id  : 8;
-		unsigned long ib_base : 44;
-	};
-};
-
-union ia64_ipi_d {
-	unsigned long val;
-	struct {
-		unsigned long vector : 8;
-		unsigned long dm  : 3;
-		unsigned long ig  : 53;
-	};
-};
-
-/*ipi check exit data*/
-struct kvm_ipi_data{
-	union ia64_ipi_a addr;
-	union ia64_ipi_d data;
-};
-
-/*global purge data*/
-struct kvm_ptc_g {
-	unsigned long vaddr;
-	unsigned long rr;
-	unsigned long ps;
-	struct kvm_vcpu *vcpu;
-};
-
-/*Exit control data */
-struct exit_ctl_data{
-	uint32_t exit_reason;
-	uint32_t vm_status;
-	union {
-		struct kvm_mmio_req	ioreq;
-		struct kvm_pal_call	pal_data;
-		struct kvm_sal_call	sal_data;
-		struct kvm_switch_rr6	rr_data;
-		struct kvm_ipi_data	ipi_data;
-		struct kvm_ptc_g	ptc_g_data;
-	} u;
-};
-
-union pte_flags {
-	unsigned long val;
-	struct {
-		unsigned long p    :  1; /*0      */
-		unsigned long      :  1; /* 1     */
-		unsigned long ma   :  3; /* 2-4   */
-		unsigned long a    :  1; /* 5     */
-		unsigned long d    :  1; /* 6     */
-		unsigned long pl   :  2; /* 7-8   */
-		unsigned long ar   :  3; /* 9-11  */
-		unsigned long ppn  : 38; /* 12-49 */
-		unsigned long      :  2; /* 50-51 */
-		unsigned long ed   :  1; /* 52    */
-	};
-};
-
-union ia64_pta {
-	unsigned long val;
-	struct {
-		unsigned long ve : 1;
-		unsigned long reserved0 : 1;
-		unsigned long size : 6;
-		unsigned long vf : 1;
-		unsigned long reserved1 : 6;
-		unsigned long base : 49;
-	};
-};
-
-struct thash_cb {
-	/* THASH base information */
-	struct thash_data	*hash; /* hash table pointer */
-	union ia64_pta		pta;
-	int           num;
-};
-
-struct kvm_vcpu_stat {
-	u32 halt_wakeup;
-};
-
-struct kvm_vcpu_arch {
-	int launched;
-	int last_exit;
-	int last_run_cpu;
-	int vmm_tr_slot;
-	int vm_tr_slot;
-	int sn_rtc_tr_slot;
-
-#define KVM_MP_STATE_RUNNABLE          0
-#define KVM_MP_STATE_UNINITIALIZED     1
-#define KVM_MP_STATE_INIT_RECEIVED     2
-#define KVM_MP_STATE_HALTED            3
-	int mp_state;
-
-#define MAX_PTC_G_NUM			3
-	int ptc_g_count;
-	struct kvm_ptc_g ptc_g_data[MAX_PTC_G_NUM];
-
-	/*halt timer to wake up sleepy vcpus*/
-	struct hrtimer hlt_timer;
-	long ht_active;
-
-	struct kvm_lapic *apic;    /* kernel irqchip context */
-	struct vpd *vpd;
-
-	/* Exit data for vmm_transition*/
-	struct exit_ctl_data exit_data;
-
-	cpumask_t cache_coherent_map;
-
-	unsigned long vmm_rr;
-	unsigned long host_rr6;
-	unsigned long psbits[8];
-	unsigned long cr_iipa;
-	unsigned long cr_isr;
-	unsigned long vsa_base;
-	unsigned long dirty_log_lock_pa;
-	unsigned long __gp;
-	/* TR and TC.  */
-	struct thash_data itrs[NITRS];
-	struct thash_data dtrs[NDTRS];
-	/* Bit is set if there is a tr/tc for the region.  */
-	unsigned char itr_regions;
-	unsigned char dtr_regions;
-	unsigned char tc_regions;
-	/* purge all */
-	unsigned long ptce_base;
-	unsigned long ptce_count[2];
-	unsigned long ptce_stride[2];
-	/* itc/itm */
-	unsigned long last_itc;
-	long itc_offset;
-	unsigned long itc_check;
-	unsigned long timer_check;
-	unsigned int timer_pending;
-	unsigned int timer_fired;
-
-	unsigned long vrr[8];
-	unsigned long ibr[8];
-	unsigned long dbr[8];
-	unsigned long insvc[4];		/* Interrupt in service.  */
-	unsigned long xtp;
-
-	unsigned long metaphysical_rr0; /* from kvm_arch (so is pinned) */
-	unsigned long metaphysical_rr4;	/* from kvm_arch (so is pinned) */
-	unsigned long metaphysical_saved_rr0; /* from kvm_arch          */
-	unsigned long metaphysical_saved_rr4; /* from kvm_arch          */
-	unsigned long fp_psr;       /*used for lazy float register */
-	unsigned long saved_gp;
-	/*for phycial  emulation */
-	int mode_flags;
-	struct thash_cb vtlb;
-	struct thash_cb vhpt;
-	char irq_check;
-	char irq_new_pending;
-
-	unsigned long opcode;
-	unsigned long cause;
-	char log_buf[VMM_LOG_LEN];
-	union context host;
-	union context guest;
-
-	char mmio_data[8];
-};
-
-struct kvm_vm_stat {
-	u64 remote_tlb_flush;
-};
-
-struct kvm_sal_data {
-	unsigned long boot_ip;
-	unsigned long boot_gp;
-};
-
-struct kvm_arch_memory_slot {
-};
-
-struct kvm_arch {
-	spinlock_t dirty_log_lock;
-
-	unsigned long	vm_base;
-	unsigned long	metaphysical_rr0;
-	unsigned long	metaphysical_rr4;
-	unsigned long	vmm_init_rr;
-
-	int		is_sn2;
-
-	struct kvm_ioapic *vioapic;
-	struct kvm_vm_stat stat;
-	struct kvm_sal_data rdv_sal_data;
-
-	struct list_head assigned_dev_head;
-	struct iommu_domain *iommu_domain;
-	bool iommu_noncoherent;
-
-	unsigned long irq_sources_bitmap;
-	unsigned long irq_states[KVM_IOAPIC_NUM_PINS];
-};
-
-union cpuid3_t {
-	u64 value;
-	struct {
-		u64 number : 8;
-		u64 revision : 8;
-		u64 model : 8;
-		u64 family : 8;
-		u64 archrev : 8;
-		u64 rv : 24;
-	};
-};
-
-struct kvm_pt_regs {
-	/* The following registers are saved by SAVE_MIN: */
-	unsigned long b6;  /* scratch */
-	unsigned long b7;  /* scratch */
-
-	unsigned long ar_csd; /* used by cmp8xchg16 (scratch) */
-	unsigned long ar_ssd; /* reserved for future use (scratch) */
-
-	unsigned long r8;  /* scratch (return value register 0) */
-	unsigned long r9;  /* scratch (return value register 1) */
-	unsigned long r10; /* scratch (return value register 2) */
-	unsigned long r11; /* scratch (return value register 3) */
-
-	unsigned long cr_ipsr; /* interrupted task's psr */
-	unsigned long cr_iip;  /* interrupted task's instruction pointer */
-	unsigned long cr_ifs;  /* interrupted task's function state */
-
-	unsigned long ar_unat; /* interrupted task's NaT register (preserved) */
-	unsigned long ar_pfs;  /* prev function state  */
-	unsigned long ar_rsc;  /* RSE configuration */
-	/* The following two are valid only if cr_ipsr.cpl > 0: */
-	unsigned long ar_rnat;  /* RSE NaT */
-	unsigned long ar_bspstore; /* RSE bspstore */
-
-	unsigned long pr;  /* 64 predicate registers (1 bit each) */
-	unsigned long b0;  /* return pointer (bp) */
-	unsigned long loadrs;  /* size of dirty partition << 16 */
-
-	unsigned long r1;  /* the gp pointer */
-	unsigned long r12; /* interrupted task's memory stack pointer */
-	unsigned long r13; /* thread pointer */
-
-	unsigned long ar_fpsr;  /* floating point status (preserved) */
-	unsigned long r15;  /* scratch */
-
-	/* The remaining registers are NOT saved for system calls.  */
-	unsigned long r14;  /* scratch */
-	unsigned long r2;  /* scratch */
-	unsigned long r3;  /* scratch */
-	unsigned long r16;  /* scratch */
-	unsigned long r17;  /* scratch */
-	unsigned long r18;  /* scratch */
-	unsigned long r19;  /* scratch */
-	unsigned long r20;  /* scratch */
-	unsigned long r21;  /* scratch */
-	unsigned long r22;  /* scratch */
-	unsigned long r23;  /* scratch */
-	unsigned long r24;  /* scratch */
-	unsigned long r25;  /* scratch */
-	unsigned long r26;  /* scratch */
-	unsigned long r27;  /* scratch */
-	unsigned long r28;  /* scratch */
-	unsigned long r29;  /* scratch */
-	unsigned long r30;  /* scratch */
-	unsigned long r31;  /* scratch */
-	unsigned long ar_ccv;  /* compare/exchange value (scratch) */
-
-	/*
-	 * Floating point registers that the kernel considers scratch:
-	 */
-	struct ia64_fpreg f6;  /* scratch */
-	struct ia64_fpreg f7;  /* scratch */
-	struct ia64_fpreg f8;  /* scratch */
-	struct ia64_fpreg f9;  /* scratch */
-	struct ia64_fpreg f10;  /* scratch */
-	struct ia64_fpreg f11;  /* scratch */
-
-	unsigned long r4;  /* preserved */
-	unsigned long r5;  /* preserved */
-	unsigned long r6;  /* preserved */
-	unsigned long r7;  /* preserved */
-	unsigned long eml_unat;    /* used for emulating instruction */
-	unsigned long pad0;     /* alignment pad */
-};
-
-static inline struct kvm_pt_regs *vcpu_regs(struct kvm_vcpu *v)
-{
-	return (struct kvm_pt_regs *) ((unsigned long) v + KVM_STK_OFFSET) - 1;
-}
-
-typedef int kvm_vmm_entry(void);
-typedef void kvm_tramp_entry(union context *host, union context *guest);
-
-struct kvm_vmm_info{
-	struct module	*module;
-	kvm_vmm_entry 	*vmm_entry;
-	kvm_tramp_entry *tramp_entry;
-	unsigned long 	vmm_ivt;
-	unsigned long	patch_mov_ar;
-	unsigned long	patch_mov_ar_sn2;
-};
-
-int kvm_highest_pending_irq(struct kvm_vcpu *vcpu);
-int kvm_emulate_halt(struct kvm_vcpu *vcpu);
-int kvm_pal_emul(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run);
-void kvm_sal_emul(struct kvm_vcpu *vcpu);
-
-#define __KVM_HAVE_ARCH_VM_ALLOC 1
-struct kvm *kvm_arch_alloc_vm(void);
-void kvm_arch_free_vm(struct kvm *kvm);
-
-static inline void kvm_arch_sync_events(struct kvm *kvm) {}
-static inline void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) {}
-static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu) {}
-static inline void kvm_arch_free_memslot(struct kvm *kvm,
-		struct kvm_memory_slot *free, struct kvm_memory_slot *dont) {}
-static inline void kvm_arch_memslots_updated(struct kvm *kvm) {}
-static inline void kvm_arch_commit_memory_region(struct kvm *kvm,
-		struct kvm_userspace_memory_region *mem,
-		const struct kvm_memory_slot *old,
-		enum kvm_mr_change change) {}
-static inline void kvm_arch_hardware_unsetup(void) {}
-
-#endif /* __ASSEMBLY__*/
-
-#endif
diff --git a/arch/ia64/include/asm/pvclock-abi.h b/arch/ia64/include/asm/pvclock-abi.h
deleted file mode 100644
index 42b233bedeb..00000000000
--- a/arch/ia64/include/asm/pvclock-abi.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * same structure to x86's
- * Hopefully asm-x86/pvclock-abi.h would be moved to somewhere more generic.
- * For now, define same duplicated definitions.
- */
-
-#ifndef _ASM_IA64__PVCLOCK_ABI_H
-#define _ASM_IA64__PVCLOCK_ABI_H
-#ifndef __ASSEMBLY__
-
-/*
- * These structs MUST NOT be changed.
- * They are the ABI between hypervisor and guest OS.
- * KVM is using this.
- *
- * pvclock_vcpu_time_info holds the system time and the tsc timestamp
- * of the last update. So the guest can use the tsc delta to get a
- * more precise system time.  There is one per virtual cpu.
- *
- * pvclock_wall_clock references the point in time when the system
- * time was zero (usually boot time), thus the guest calculates the
- * current wall clock by adding the system time.
- *
- * Protocol for the "version" fields is: hypervisor raises it (making
- * it uneven) before it starts updating the fields and raises it again
- * (making it even) when it is done.  Thus the guest can make sure the
- * time values it got are consistent by checking the version before
- * and after reading them.
- */
-
-struct pvclock_vcpu_time_info {
-	u32   version;
-	u32   pad0;
-	u64   tsc_timestamp;
-	u64   system_time;
-	u32   tsc_to_system_mul;
-	s8    tsc_shift;
-	u8    pad[3];
-} __attribute__((__packed__)); /* 32 bytes */
-
-struct pvclock_wall_clock {
-	u32   version;
-	u32   sec;
-	u32   nsec;
-} __attribute__((__packed__));
-
-#endif /* __ASSEMBLY__ */
-#endif /* _ASM_IA64__PVCLOCK_ABI_H */
diff --git a/arch/ia64/include/uapi/asm/kvm.h b/arch/ia64/include/uapi/asm/kvm.h
deleted file mode 100644
index 99503c28440..00000000000
--- a/arch/ia64/include/uapi/asm/kvm.h
+++ /dev/null
@@ -1,268 +0,0 @@
-#ifndef __ASM_IA64_KVM_H
-#define __ASM_IA64_KVM_H
-
-/*
- * kvm structure definitions  for ia64
- *
- * Copyright (C) 2007 Xiantao Zhang <xiantao.zhang@intel.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- */
-
-#include <linux/types.h>
-#include <linux/ioctl.h>
-
-/* Select x86 specific features in <linux/kvm.h> */
-#define __KVM_HAVE_IOAPIC
-#define __KVM_HAVE_IRQ_LINE
-
-/* Architectural interrupt line count. */
-#define KVM_NR_INTERRUPTS 256
-
-#define KVM_IOAPIC_NUM_PINS  48
-
-struct kvm_ioapic_state {
-	__u64 base_address;
-	__u32 ioregsel;
-	__u32 id;
-	__u32 irr;
-	__u32 pad;
-	union {
-		__u64 bits;
-		struct {
-			__u8 vector;
-			__u8 delivery_mode:3;
-			__u8 dest_mode:1;
-			__u8 delivery_status:1;
-			__u8 polarity:1;
-			__u8 remote_irr:1;
-			__u8 trig_mode:1;
-			__u8 mask:1;
-			__u8 reserve:7;
-			__u8 reserved[4];
-			__u8 dest_id;
-		} fields;
-	} redirtbl[KVM_IOAPIC_NUM_PINS];
-};
-
-#define KVM_IRQCHIP_PIC_MASTER   0
-#define KVM_IRQCHIP_PIC_SLAVE    1
-#define KVM_IRQCHIP_IOAPIC       2
-#define KVM_NR_IRQCHIPS          3
-
-#define KVM_CONTEXT_SIZE	8*1024
-
-struct kvm_fpreg {
-	union {
-		unsigned long bits[2];
-		long double __dummy;	/* force 16-byte alignment */
-	} u;
-};
-
-union context {
-	/* 8K size */
-	char	dummy[KVM_CONTEXT_SIZE];
-	struct {
-		unsigned long       psr;
-		unsigned long       pr;
-		unsigned long       caller_unat;
-		unsigned long       pad;
-		unsigned long       gr[32];
-		unsigned long       ar[128];
-		unsigned long       br[8];
-		unsigned long       cr[128];
-		unsigned long       rr[8];
-		unsigned long       ibr[8];
-		unsigned long       dbr[8];
-		unsigned long       pkr[8];
-		struct kvm_fpreg   fr[128];
-	};
-};
-
-struct thash_data {
-	union {
-		struct {
-			unsigned long p    :  1; /* 0 */
-			unsigned long rv1  :  1; /* 1 */
-			unsigned long ma   :  3; /* 2-4 */
-			unsigned long a    :  1; /* 5 */
-			unsigned long d    :  1; /* 6 */
-			unsigned long pl   :  2; /* 7-8 */
-			unsigned long ar   :  3; /* 9-11 */
-			unsigned long ppn  : 38; /* 12-49 */
-			unsigned long rv2  :  2; /* 50-51 */
-			unsigned long ed   :  1; /* 52 */
-			unsigned long ig1  : 11; /* 53-63 */
-		};
-		struct {
-			unsigned long __rv1 : 53;     /* 0-52 */
-			unsigned long contiguous : 1; /*53 */
-			unsigned long tc : 1;         /* 54 TR or TC */
-			unsigned long cl : 1;
-			/* 55 I side or D side cache line */
-			unsigned long len  :  4;      /* 56-59 */
-			unsigned long io  : 1;	/* 60 entry is for io or not */
-			unsigned long nomap : 1;
-			/* 61 entry cann't be inserted into machine TLB.*/
-			unsigned long checked : 1;
-			/* 62 for VTLB/VHPT sanity check */
-			unsigned long invalid : 1;
-			/* 63 invalid entry */
-		};
-		unsigned long page_flags;
-	};                  /* same for VHPT and TLB */
-
-	union {
-		struct {
-			unsigned long rv3  :  2;
-			unsigned long ps   :  6;
-			unsigned long key  : 24;
-			unsigned long rv4  : 32;
-		};
-		unsigned long itir;
-	};
-	union {
-		struct {
-			unsigned long ig2  :  12;
-			unsigned long vpn  :  49;
-			unsigned long vrn  :   3;
-		};
-		unsigned long ifa;
-		unsigned long vadr;
-		struct {
-			unsigned long tag  :  63;
-			unsigned long ti   :  1;
-		};
-		unsigned long etag;
-	};
-	union {
-		struct thash_data *next;
-		unsigned long rid;
-		unsigned long gpaddr;
-	};
-};
-
-#define	NITRS	8
-#define NDTRS	8
-
-struct saved_vpd {
-	unsigned long  vhpi;
-	unsigned long  vgr[16];
-	unsigned long  vbgr[16];
-	unsigned long  vnat;
-	unsigned long  vbnat;
-	unsigned long  vcpuid[5];
-	unsigned long  vpsr;
-	unsigned long  vpr;
-	union {
-		unsigned long  vcr[128];
-		struct {
-			unsigned long dcr;
-			unsigned long itm;
-			unsigned long iva;
-			unsigned long rsv1[5];
-			unsigned long pta;
-			unsigned long rsv2[7];
-			unsigned long ipsr;
-			unsigned long isr;
-			unsigned long rsv3;
-			unsigned long iip;
-			unsigned long ifa;
-			unsigned long itir;
-			unsigned long iipa;
-			unsigned long ifs;
-			unsigned long iim;
-			unsigned long iha;
-			unsigned long rsv4[38];
-			unsigned long lid;
-			unsigned long ivr;
-			unsigned long tpr;
-			unsigned long eoi;
-			unsigned long irr[4];
-			unsigned long itv;
-			unsigned long pmv;
-			unsigned long cmcv;
-			unsigned long rsv5[5];
-			unsigned long lrr0;
-			unsigned long lrr1;
-			unsigned long rsv6[46];
-		};
-	};
-};
-
-struct kvm_regs {
-	struct saved_vpd vpd;
-	/*Arch-regs*/
-	int mp_state;
-	unsigned long vmm_rr;
-	/* TR and TC.  */
-	struct thash_data itrs[NITRS];
-	struct thash_data dtrs[NDTRS];
-	/* Bit is set if there is a tr/tc for the region.  */
-	unsigned char itr_regions;
-	unsigned char dtr_regions;
-	unsigned char tc_regions;
-
-	char irq_check;
-	unsigned long saved_itc;
-	unsigned long itc_check;
-	unsigned long timer_check;
-	unsigned long timer_pending;
-	unsigned long last_itc;
-
-	unsigned long vrr[8];
-	unsigned long ibr[8];
-	unsigned long dbr[8];
-	unsigned long insvc[4];		/* Interrupt in service.  */
-	unsigned long xtp;
-
-	unsigned long metaphysical_rr0; /* from kvm_arch (so is pinned) */
-	unsigned long metaphysical_rr4;	/* from kvm_arch (so is pinned) */
-	unsigned long metaphysical_saved_rr0; /* from kvm_arch          */
-	unsigned long metaphysical_saved_rr4; /* from kvm_arch          */
-	unsigned long fp_psr;       /*used for lazy float register */
-	unsigned long saved_gp;
-	/*for phycial  emulation */
-
-	union context saved_guest;
-
-	unsigned long reserved[64];	/* for future use */
-};
-
-struct kvm_sregs {
-};
-
-struct kvm_fpu {
-};
-
-#define KVM_IA64_VCPU_STACK_SHIFT	16
-#define KVM_IA64_VCPU_STACK_SIZE	(1UL << KVM_IA64_VCPU_STACK_SHIFT)
-
-struct kvm_ia64_vcpu_stack {
-	unsigned char stack[KVM_IA64_VCPU_STACK_SIZE];
-};
-
-struct kvm_debug_exit_arch {
-};
-
-/* for KVM_SET_GUEST_DEBUG */
-struct kvm_guest_debug_arch {
-};
-
-/* definition of registers in kvm_run */
-struct kvm_sync_regs {
-};
-
-#endif
diff --git a/arch/ia64/kvm/Kconfig b/arch/ia64/kvm/Kconfig
deleted file mode 100644
index 3d50ea955c4..00000000000
--- a/arch/ia64/kvm/Kconfig
+++ /dev/null
@@ -1,66 +0,0 @@
-#
-# KVM configuration
-#
-
-source "virt/kvm/Kconfig"
-
-menuconfig VIRTUALIZATION
-	bool "Virtualization"
-	depends on HAVE_KVM || IA64
-	default y
-	---help---
-	  Say Y here to get to see options for using your Linux host to run other
-	  operating systems inside virtual machines (guests).
-	  This option alone does not add any kernel code.
-
-	  If you say N, all options in this submenu will be skipped and disabled.
-
-if VIRTUALIZATION
-
-config KVM
-	tristate "Kernel-based Virtual Machine (KVM) support"
-	depends on BROKEN
-	depends on HAVE_KVM && MODULES
-	depends on BROKEN
-	select PREEMPT_NOTIFIERS
-	select ANON_INODES
-	select HAVE_KVM_IRQCHIP
-	select HAVE_KVM_IRQFD
-	select HAVE_KVM_IRQ_ROUTING
-	select KVM_APIC_ARCHITECTURE
-	select KVM_MMIO
-	---help---
-	  Support hosting fully virtualized guest machines using hardware
-	  virtualization extensions.  You will need a fairly recent
-	  processor equipped with virtualization extensions. You will also
-	  need to select one or more of the processor modules below.
-
-	  This module provides access to the hardware capabilities through
-	  a character device node named /dev/kvm.
-
-	  To compile this as a module, choose M here: the module
-	  will be called kvm.
-
-	  If unsure, say N.
-
-config KVM_INTEL
-	tristate "KVM for Intel Itanium 2 processors support"
-	depends on KVM && m
-	---help---
-	  Provides support for KVM on Itanium 2 processors equipped with the VT
-	  extensions.
-
-config KVM_DEVICE_ASSIGNMENT
-	bool "KVM legacy PCI device assignment support"
-	depends on KVM && PCI && IOMMU_API
-	default y
-	---help---
-	  Provide support for legacy PCI device assignment through KVM.  The
-	  kernel now also supports a full featured userspace device driver
-	  framework through VFIO, which supersedes much of this support.
-
-	  If unsure, say Y.
-
-source drivers/vhost/Kconfig
-
-endif # VIRTUALIZATION
diff --git a/arch/ia64/kvm/Makefile b/arch/ia64/kvm/Makefile
deleted file mode 100644
index 18e45ec49bb..00000000000
--- a/arch/ia64/kvm/Makefile
+++ /dev/null
@@ -1,67 +0,0 @@
-#This Make file is to generate asm-offsets.h and build source.
-#
-
-#Generate asm-offsets.h for vmm module build
-offsets-file := asm-offsets.h
-
-always  := $(offsets-file)
-targets := $(offsets-file)
-targets += arch/ia64/kvm/asm-offsets.s
-
-# Default sed regexp - multiline due to syntax constraints
-define sed-y
-	"/^->/{s:^->\([^ ]*\) [\$$#]*\([^ ]*\) \(.*\):#define \1 \2 /* \3 */:; s:->::; p;}"
-endef
-
-quiet_cmd_offsets = GEN     $@
-define cmd_offsets
-	(set -e; \
-	 echo "#ifndef __ASM_KVM_OFFSETS_H__"; \
-	 echo "#define __ASM_KVM_OFFSETS_H__"; \
-	 echo "/*"; \
-	 echo " * DO NOT MODIFY."; \
-	 echo " *"; \
-	 echo " * This file was generated by Makefile"; \
-	 echo " *"; \
-	 echo " */"; \
-	 echo ""; \
-	 sed -ne $(sed-y) $<; \
-	 echo ""; \
-	 echo "#endif" ) > $@
-endef
-
-# We use internal rules to avoid the "is up to date" message from make
-arch/ia64/kvm/asm-offsets.s: arch/ia64/kvm/asm-offsets.c \
-			$(wildcard $(srctree)/arch/ia64/include/asm/*.h)\
-			$(wildcard $(srctree)/include/linux/*.h)
-	$(call if_changed_dep,cc_s_c)
-
-$(obj)/$(offsets-file): arch/ia64/kvm/asm-offsets.s
-	$(call cmd,offsets)
-
-FORCE : $(obj)/$(offsets-file)
-
-#
-# Makefile for Kernel-based Virtual Machine module
-#
-
-ccflags-y := -Ivirt/kvm -Iarch/ia64/kvm/
-asflags-y := -Ivirt/kvm -Iarch/ia64/kvm/
-KVM := ../../../virt/kvm
-
-common-objs = $(KVM)/kvm_main.o $(KVM)/ioapic.o \
-		$(KVM)/coalesced_mmio.o $(KVM)/irq_comm.o
-
-ifeq ($(CONFIG_KVM_DEVICE_ASSIGNMENT),y)
-common-objs += $(KVM)/assigned-dev.o $(KVM)/iommu.o
-endif
-
-kvm-objs := $(common-objs) kvm-ia64.o kvm_fw.o
-obj-$(CONFIG_KVM) += kvm.o
-
-CFLAGS_vcpu.o += -mfixed-range=f2-f5,f12-f127
-kvm-intel-objs = vmm.o vmm_ivt.o trampoline.o vcpu.o optvfault.o mmio.o \
-	vtlb.o process.o kvm_lib.o
-#Add link memcpy and memset to avoid possible structure assignment error
-kvm-intel-objs += memcpy.o memset.o
-obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
diff --git a/arch/ia64/kvm/asm-offsets.c b/arch/ia64/kvm/asm-offsets.c
deleted file mode 100644
index 9324c875caf..00000000000
--- a/arch/ia64/kvm/asm-offsets.c
+++ /dev/null
@@ -1,241 +0,0 @@
-/*
- * asm-offsets.c Generate definitions needed by assembly language modules.
- * This code generates raw asm output which is post-processed
- * to extract and format the required data.
- *
- * Anthony Xu    <anthony.xu@intel.com>
- * Xiantao Zhang <xiantao.zhang@intel.com>
- * Copyright (c) 2007 Intel Corporation  KVM support.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- */
-
-#include <linux/kvm_host.h>
-#include <linux/kbuild.h>
-
-#include "vcpu.h"
-
-void foo(void)
-{
-	DEFINE(VMM_TASK_SIZE, sizeof(struct kvm_vcpu));
-	DEFINE(VMM_PT_REGS_SIZE, sizeof(struct kvm_pt_regs));
-
-	BLANK();
-
-	DEFINE(VMM_VCPU_META_RR0_OFFSET,
-			offsetof(struct kvm_vcpu, arch.metaphysical_rr0));
-	DEFINE(VMM_VCPU_META_SAVED_RR0_OFFSET,
-			offsetof(struct kvm_vcpu,
-				arch.metaphysical_saved_rr0));
-	DEFINE(VMM_VCPU_VRR0_OFFSET,
-			offsetof(struct kvm_vcpu, arch.vrr[0]));
-	DEFINE(VMM_VPD_IRR0_OFFSET,
-			offsetof(struct vpd, irr[0]));
-	DEFINE(VMM_VCPU_ITC_CHECK_OFFSET,
-			offsetof(struct kvm_vcpu, arch.itc_check));
-	DEFINE(VMM_VCPU_IRQ_CHECK_OFFSET,
-			offsetof(struct kvm_vcpu, arch.irq_check));
-	DEFINE(VMM_VPD_VHPI_OFFSET,
-			offsetof(struct vpd, vhpi));
-	DEFINE(VMM_VCPU_VSA_BASE_OFFSET,
-			offsetof(struct kvm_vcpu, arch.vsa_base));
-	DEFINE(VMM_VCPU_VPD_OFFSET,
-			offsetof(struct kvm_vcpu, arch.vpd));
-	DEFINE(VMM_VCPU_IRQ_CHECK,
-			offsetof(struct kvm_vcpu, arch.irq_check));
-	DEFINE(VMM_VCPU_TIMER_PENDING,
-			offsetof(struct kvm_vcpu, arch.timer_pending));
-	DEFINE(VMM_VCPU_META_SAVED_RR0_OFFSET,
-			offsetof(struct kvm_vcpu, arch.metaphysical_saved_rr0));
-	DEFINE(VMM_VCPU_MODE_FLAGS_OFFSET,
-			offsetof(struct kvm_vcpu, arch.mode_flags));
-	DEFINE(VMM_VCPU_ITC_OFS_OFFSET,
-			offsetof(struct kvm_vcpu, arch.itc_offset));
-	DEFINE(VMM_VCPU_LAST_ITC_OFFSET,
-			offsetof(struct kvm_vcpu, arch.last_itc));
-	DEFINE(VMM_VCPU_SAVED_GP_OFFSET,
-			offsetof(struct kvm_vcpu, arch.saved_gp));
-
-	BLANK();
-
-	DEFINE(VMM_PT_REGS_B6_OFFSET,
-				offsetof(struct kvm_pt_regs, b6));
-	DEFINE(VMM_PT_REGS_B7_OFFSET,
-				offsetof(struct kvm_pt_regs, b7));
-	DEFINE(VMM_PT_REGS_AR_CSD_OFFSET,
-				offsetof(struct kvm_pt_regs, ar_csd));
-	DEFINE(VMM_PT_REGS_AR_SSD_OFFSET,
-				offsetof(struct kvm_pt_regs, ar_ssd));
-	DEFINE(VMM_PT_REGS_R8_OFFSET,
-				offsetof(struct kvm_pt_regs, r8));
-	DEFINE(VMM_PT_REGS_R9_OFFSET,
-				offsetof(struct kvm_pt_regs, r9));
-	DEFINE(VMM_PT_REGS_R10_OFFSET,
-				offsetof(struct kvm_pt_regs, r10));
-	DEFINE(VMM_PT_REGS_R11_OFFSET,
-				offsetof(struct kvm_pt_regs, r11));
-	DEFINE(VMM_PT_REGS_CR_IPSR_OFFSET,
-				offsetof(struct kvm_pt_regs, cr_ipsr));
-	DEFINE(VMM_PT_REGS_CR_IIP_OFFSET,
-				offsetof(struct kvm_pt_regs, cr_iip));
-	DEFINE(VMM_PT_REGS_CR_IFS_OFFSET,
-				offsetof(struct kvm_pt_regs, cr_ifs));
-	DEFINE(VMM_PT_REGS_AR_UNAT_OFFSET,
-				offsetof(struct kvm_pt_regs, ar_unat));
-	DEFINE(VMM_PT_REGS_AR_PFS_OFFSET,
-				offsetof(struct kvm_pt_regs, ar_pfs));
-	DEFINE(VMM_PT_REGS_AR_RSC_OFFSET,
-				offsetof(struct kvm_pt_regs, ar_rsc));
-	DEFINE(VMM_PT_REGS_AR_RNAT_OFFSET,
-				offsetof(struct kvm_pt_regs, ar_rnat));
-
-	DEFINE(VMM_PT_REGS_AR_BSPSTORE_OFFSET,
-				offsetof(struct kvm_pt_regs, ar_bspstore));
-	DEFINE(VMM_PT_REGS_PR_OFFSET,
-				offsetof(struct kvm_pt_regs, pr));
-	DEFINE(VMM_PT_REGS_B0_OFFSET,
-				offsetof(struct kvm_pt_regs, b0));
-	DEFINE(VMM_PT_REGS_LOADRS_OFFSET,
-				offsetof(struct kvm_pt_regs, loadrs));
-	DEFINE(VMM_PT_REGS_R1_OFFSET,
-				offsetof(struct kvm_pt_regs, r1));
-	DEFINE(VMM_PT_REGS_R12_OFFSET,
-				offsetof(struct kvm_pt_regs, r12));
-	DEFINE(VMM_PT_REGS_R13_OFFSET,
-				offsetof(struct kvm_pt_regs, r13));
-	DEFINE(VMM_PT_REGS_AR_FPSR_OFFSET,
-				offsetof(struct kvm_pt_regs, ar_fpsr));
-	DEFINE(VMM_PT_REGS_R15_OFFSET,
-				offsetof(struct kvm_pt_regs, r15));
-	DEFINE(VMM_PT_REGS_R14_OFFSET,
-				offsetof(struct kvm_pt_regs, r14));
-	DEFINE(VMM_PT_REGS_R2_OFFSET,
-				offsetof(struct kvm_pt_regs, r2));
-	DEFINE(VMM_PT_REGS_R3_OFFSET,
-				offsetof(struct kvm_pt_regs, r3));
-	DEFINE(VMM_PT_REGS_R16_OFFSET,
-				offsetof(struct kvm_pt_regs, r16));
-	DEFINE(VMM_PT_REGS_R17_OFFSET,
-				offsetof(struct kvm_pt_regs, r17));
-	DEFINE(VMM_PT_REGS_R18_OFFSET,
-				offsetof(struct kvm_pt_regs, r18));
-	DEFINE(VMM_PT_REGS_R19_OFFSET,
-				offsetof(struct kvm_pt_regs, r19));
-	DEFINE(VMM_PT_REGS_R20_OFFSET,
-				offsetof(struct kvm_pt_regs, r20));
-	DEFINE(VMM_PT_REGS_R21_OFFSET,
-				offsetof(struct kvm_pt_regs, r21));
-	DEFINE(VMM_PT_REGS_R22_OFFSET,
-				offsetof(struct kvm_pt_regs, r22));
-	DEFINE(VMM_PT_REGS_R23_OFFSET,
-				offsetof(struct kvm_pt_regs, r23));
-	DEFINE(VMM_PT_REGS_R24_OFFSET,
-				offsetof(struct kvm_pt_regs, r24));
-	DEFINE(VMM_PT_REGS_R25_OFFSET,
-				offsetof(struct kvm_pt_regs, r25));
-	DEFINE(VMM_PT_REGS_R26_OFFSET,
-				offsetof(struct kvm_pt_regs, r26));
-	DEFINE(VMM_PT_REGS_R27_OFFSET,
-				offsetof(struct kvm_pt_regs, r27));
-	DEFINE(VMM_PT_REGS_R28_OFFSET,
-				offsetof(struct kvm_pt_regs, r28));
-	DEFINE(VMM_PT_REGS_R29_OFFSET,
-				offsetof(struct kvm_pt_regs, r29));
-	DEFINE(VMM_PT_REGS_R30_OFFSET,
-				offsetof(struct kvm_pt_regs, r30));
-	DEFINE(VMM_PT_REGS_R31_OFFSET,
-				offsetof(struct kvm_pt_regs, r31));
-	DEFINE(VMM_PT_REGS_AR_CCV_OFFSET,
-				offsetof(struct kvm_pt_regs, ar_ccv));
-	DEFINE(VMM_PT_REGS_F6_OFFSET,
-				offsetof(struct kvm_pt_regs, f6));
-	DEFINE(VMM_PT_REGS_F7_OFFSET,
-				offsetof(struct kvm_pt_regs, f7));
-	DEFINE(VMM_PT_REGS_F8_OFFSET,
-				offsetof(struct kvm_pt_regs, f8));
-	DEFINE(VMM_PT_REGS_F9_OFFSET,
-				offsetof(struct kvm_pt_regs, f9));
-	DEFINE(VMM_PT_REGS_F10_OFFSET,
-				offsetof(struct kvm_pt_regs, f10));
-	DEFINE(VMM_PT_REGS_F11_OFFSET,
-				offsetof(struct kvm_pt_regs, f11));
-	DEFINE(VMM_PT_REGS_R4_OFFSET,
-				offsetof(struct kvm_pt_regs, r4));
-	DEFINE(VMM_PT_REGS_R5_OFFSET,
-				offsetof(struct kvm_pt_regs, r5));
-	DEFINE(VMM_PT_REGS_R6_OFFSET,
-				offsetof(struct kvm_pt_regs, r6));
-	DEFINE(VMM_PT_REGS_R7_OFFSET,
-				offsetof(struct kvm_pt_regs, r7));
-	DEFINE(VMM_PT_REGS_EML_UNAT_OFFSET,
-				offsetof(struct kvm_pt_regs, eml_unat));
-	DEFINE(VMM_VCPU_IIPA_OFFSET,
-				offsetof(struct kvm_vcpu, arch.cr_iipa));
-	DEFINE(VMM_VCPU_OPCODE_OFFSET,
-				offsetof(struct kvm_vcpu, arch.opcode));
-	DEFINE(VMM_VCPU_CAUSE_OFFSET, offsetof(struct kvm_vcpu, arch.cause));
-	DEFINE(VMM_VCPU_ISR_OFFSET,
-				offsetof(struct kvm_vcpu, arch.cr_isr));
-	DEFINE(VMM_PT_REGS_R16_SLOT,
-				(((offsetof(struct kvm_pt_regs, r16)
-				- sizeof(struct kvm_pt_regs)) >> 3) & 0x3f));
-	DEFINE(VMM_VCPU_MODE_FLAGS_OFFSET,
-				offsetof(struct kvm_vcpu, arch.mode_flags));
-	DEFINE(VMM_VCPU_GP_OFFSET, offsetof(struct kvm_vcpu, arch.__gp));
-	BLANK();
-
-	DEFINE(VMM_VPD_BASE_OFFSET, offsetof(struct kvm_vcpu, arch.vpd));
-	DEFINE(VMM_VPD_VIFS_OFFSET, offsetof(struct vpd, ifs));
-	DEFINE(VMM_VLSAPIC_INSVC_BASE_OFFSET,
-			offsetof(struct kvm_vcpu, arch.insvc[0]));
-	DEFINE(VMM_VPD_VPTA_OFFSET, offsetof(struct vpd, pta));
-	DEFINE(VMM_VPD_VPSR_OFFSET, offsetof(struct vpd, vpsr));
-
-	DEFINE(VMM_CTX_R4_OFFSET, offsetof(union context, gr[4]));
-	DEFINE(VMM_CTX_R5_OFFSET, offsetof(union context, gr[5]));
-	DEFINE(VMM_CTX_R12_OFFSET, offsetof(union context, gr[12]));
-	DEFINE(VMM_CTX_R13_OFFSET, offsetof(union context, gr[13]));
-	DEFINE(VMM_CTX_KR0_OFFSET, offsetof(union context, ar[0]));
-	DEFINE(VMM_CTX_KR1_OFFSET, offsetof(union context, ar[1]));
-	DEFINE(VMM_CTX_B0_OFFSET, offsetof(union context, br[0]));
-	DEFINE(VMM_CTX_B1_OFFSET, offsetof(union context, br[1]));
-	DEFINE(VMM_CTX_B2_OFFSET, offsetof(union context, br[2]));
-	DEFINE(VMM_CTX_RR0_OFFSET, offsetof(union context, rr[0]));
-	DEFINE(VMM_CTX_RSC_OFFSET, offsetof(union context, ar[16]));
-	DEFINE(VMM_CTX_BSPSTORE_OFFSET, offsetof(union context, ar[18]));
-	DEFINE(VMM_CTX_RNAT_OFFSET, offsetof(union context, ar[19]));
-	DEFINE(VMM_CTX_FCR_OFFSET, offsetof(union context, ar[21]));
-	DEFINE(VMM_CTX_EFLAG_OFFSET, offsetof(union context, ar[24]));
-	DEFINE(VMM_CTX_CFLG_OFFSET, offsetof(union context, ar[27]));
-	DEFINE(VMM_CTX_FSR_OFFSET, offsetof(union context, ar[28]));
-	DEFINE(VMM_CTX_FIR_OFFSET, offsetof(union context, ar[29]));
-	DEFINE(VMM_CTX_FDR_OFFSET, offsetof(union context, ar[30]));
-	DEFINE(VMM_CTX_UNAT_OFFSET, offsetof(union context, ar[36]));
-	DEFINE(VMM_CTX_FPSR_OFFSET, offsetof(union context, ar[40]));
-	DEFINE(VMM_CTX_PFS_OFFSET, offsetof(union context, ar[64]));
-	DEFINE(VMM_CTX_LC_OFFSET, offsetof(union context, ar[65]));
-	DEFINE(VMM_CTX_DCR_OFFSET, offsetof(union context, cr[0]));
-	DEFINE(VMM_CTX_IVA_OFFSET, offsetof(union context, cr[2]));
-	DEFINE(VMM_CTX_PTA_OFFSET, offsetof(union context, cr[8]));
-	DEFINE(VMM_CTX_IBR0_OFFSET, offsetof(union context, ibr[0]));
-	DEFINE(VMM_CTX_DBR0_OFFSET, offsetof(union context, dbr[0]));
-	DEFINE(VMM_CTX_F2_OFFSET, offsetof(union context, fr[2]));
-	DEFINE(VMM_CTX_F3_OFFSET, offsetof(union context, fr[3]));
-	DEFINE(VMM_CTX_F32_OFFSET, offsetof(union context, fr[32]));
-	DEFINE(VMM_CTX_F33_OFFSET, offsetof(union context, fr[33]));
-	DEFINE(VMM_CTX_PKR0_OFFSET, offsetof(union context, pkr[0]));
-	DEFINE(VMM_CTX_PSR_OFFSET, offsetof(union context, psr));
-	BLANK();
-}
diff --git a/arch/ia64/kvm/irq.h b/arch/ia64/kvm/irq.h
deleted file mode 100644
index c0785a72827..00000000000
--- a/arch/ia64/kvm/irq.h
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * irq.h: In-kernel interrupt controller related definitions
- * Copyright (c) 2008, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- * Authors:
- *   Xiantao Zhang <xiantao.zhang@intel.com>
- *
- */
-
-#ifndef __IRQ_H
-#define __IRQ_H
-
-#include "lapic.h"
-
-static inline int irqchip_in_kernel(struct kvm *kvm)
-{
-	return 1;
-}
-
-#endif
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
deleted file mode 100644
index dbe46f43884..00000000000
--- a/arch/ia64/kvm/kvm-ia64.c
+++ /dev/null
@@ -1,1942 +0,0 @@
-/*
- * kvm_ia64.c: Basic KVM support On Itanium series processors
- *
- *
- * 	Copyright (C) 2007, Intel Corporation.
- *  	Xiantao Zhang  (xiantao.zhang@intel.com)
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- */
-
-#include <linux/module.h>
-#include <linux/errno.h>
-#include <linux/percpu.h>
-#include <linux/fs.h>
-#include <linux/slab.h>
-#include <linux/smp.h>
-#include <linux/kvm_host.h>
-#include <linux/kvm.h>
-#include <linux/bitops.h>
-#include <linux/hrtimer.h>
-#include <linux/uaccess.h>
-#include <linux/iommu.h>
-#include <linux/intel-iommu.h>
-#include <linux/pci.h>
-
-#include <asm/pgtable.h>
-#include <asm/gcc_intrin.h>
-#include <asm/pal.h>
-#include <asm/cacheflush.h>
-#include <asm/div64.h>
-#include <asm/tlb.h>
-#include <asm/elf.h>
-#include <asm/sn/addrs.h>
-#include <asm/sn/clksupport.h>
-#include <asm/sn/shub_mmr.h>
-
-#include "misc.h"
-#include "vti.h"
-#include "iodev.h"
-#include "ioapic.h"
-#include "lapic.h"
-#include "irq.h"
-
-static unsigned long kvm_vmm_base;
-static unsigned long kvm_vsa_base;
-static unsigned long kvm_vm_buffer;
-static unsigned long kvm_vm_buffer_size;
-unsigned long kvm_vmm_gp;
-
-static long vp_env_info;
-
-static struct kvm_vmm_info *kvm_vmm_info;
-
-static DEFINE_PER_CPU(struct kvm_vcpu *, last_vcpu);
-
-struct kvm_stats_debugfs_item debugfs_entries[] = {
-	{ NULL }
-};
-
-static unsigned long kvm_get_itc(struct kvm_vcpu *vcpu)
-{
-#if defined(CONFIG_IA64_SGI_SN2) || defined(CONFIG_IA64_GENERIC)
-	if (vcpu->kvm->arch.is_sn2)
-		return rtc_time();
-	else
-#endif
-		return ia64_getreg(_IA64_REG_AR_ITC);
-}
-
-static void kvm_flush_icache(unsigned long start, unsigned long len)
-{
-	int l;
-
-	for (l = 0; l < (len + 32); l += 32)
-		ia64_fc((void *)(start + l));
-
-	ia64_sync_i();
-	ia64_srlz_i();
-}
-
-static void kvm_flush_tlb_all(void)
-{
-	unsigned long i, j, count0, count1, stride0, stride1, addr;
-	long flags;
-
-	addr    = local_cpu_data->ptce_base;
-	count0  = local_cpu_data->ptce_count[0];
-	count1  = local_cpu_data->ptce_count[1];
-	stride0 = local_cpu_data->ptce_stride[0];
-	stride1 = local_cpu_data->ptce_stride[1];
-
-	local_irq_save(flags);
-	for (i = 0; i < count0; ++i) {
-		for (j = 0; j < count1; ++j) {
-			ia64_ptce(addr);
-			addr += stride1;
-		}
-		addr += stride0;
-	}
-	local_irq_restore(flags);
-	ia64_srlz_i();			/* srlz.i implies srlz.d */
-}
-
-long ia64_pal_vp_create(u64 *vpd, u64 *host_iva, u64 *opt_handler)
-{
-	struct ia64_pal_retval iprv;
-
-	PAL_CALL_STK(iprv, PAL_VP_CREATE, (u64)vpd, (u64)host_iva,
-			(u64)opt_handler);
-
-	return iprv.status;
-}
-
-static  DEFINE_SPINLOCK(vp_lock);
-
-int kvm_arch_hardware_enable(void)
-{
-	long  status;
-	long  tmp_base;
-	unsigned long pte;
-	unsigned long saved_psr;
-	int slot;
-
-	pte = pte_val(mk_pte_phys(__pa(kvm_vmm_base), PAGE_KERNEL));
-	local_irq_save(saved_psr);
-	slot = ia64_itr_entry(0x3, KVM_VMM_BASE, pte, KVM_VMM_SHIFT);
-	local_irq_restore(saved_psr);
-	if (slot < 0)
-		return -EINVAL;
-
-	spin_lock(&vp_lock);
-	status = ia64_pal_vp_init_env(kvm_vsa_base ?
-				VP_INIT_ENV : VP_INIT_ENV_INITALIZE,
-			__pa(kvm_vm_buffer), KVM_VM_BUFFER_BASE, &tmp_base);
-	if (status != 0) {
-		spin_unlock(&vp_lock);
-		printk(KERN_WARNING"kvm: Failed to Enable VT Support!!!!\n");
-		return -EINVAL;
-	}
-
-	if (!kvm_vsa_base) {
-		kvm_vsa_base = tmp_base;
-		printk(KERN_INFO"kvm: kvm_vsa_base:0x%lx\n", kvm_vsa_base);
-	}
-	spin_unlock(&vp_lock);
-	ia64_ptr_entry(0x3, slot);
-
-	return 0;
-}
-
-void kvm_arch_hardware_disable(void)
-{
-
-	long status;
-	int slot;
-	unsigned long pte;
-	unsigned long saved_psr;
-	unsigned long host_iva = ia64_getreg(_IA64_REG_CR_IVA);
-
-	pte = pte_val(mk_pte_phys(__pa(kvm_vmm_base),
-				PAGE_KERNEL));
-
-	local_irq_save(saved_psr);
-	slot = ia64_itr_entry(0x3, KVM_VMM_BASE, pte, KVM_VMM_SHIFT);
-	local_irq_restore(saved_psr);
-	if (slot < 0)
-		return;
-
-	status = ia64_pal_vp_exit_env(host_iva);
-	if (status)
-		printk(KERN_DEBUG"kvm: Failed to disable VT support! :%ld\n",
-				status);
-	ia64_ptr_entry(0x3, slot);
-}
-
-void kvm_arch_check_processor_compat(void *rtn)
-{
-	*(int *)rtn = 0;
-}
-
-int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
-{
-
-	int r;
-
-	switch (ext) {
-	case KVM_CAP_IRQCHIP:
-	case KVM_CAP_MP_STATE:
-	case KVM_CAP_IRQ_INJECT_STATUS:
-	case KVM_CAP_IOAPIC_POLARITY_IGNORED:
-		r = 1;
-		break;
-	case KVM_CAP_COALESCED_MMIO:
-		r = KVM_COALESCED_MMIO_PAGE_OFFSET;
-		break;
-#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
-	case KVM_CAP_IOMMU:
-		r = iommu_present(&pci_bus_type);
-		break;
-#endif
-	default:
-		r = 0;
-	}
-	return r;
-
-}
-
-static int handle_vm_error(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
-{
-	kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
-	kvm_run->hw.hardware_exit_reason = 1;
-	return 0;
-}
-
-static int handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
-{
-	struct kvm_mmio_req *p;
-	struct kvm_io_device *mmio_dev;
-	int r;
-
-	p = kvm_get_vcpu_ioreq(vcpu);
-
-	if ((p->addr & PAGE_MASK) == IOAPIC_DEFAULT_BASE_ADDRESS)
-		goto mmio;
-	vcpu->mmio_needed = 1;
-	vcpu->mmio_fragments[0].gpa = kvm_run->mmio.phys_addr = p->addr;
-	vcpu->mmio_fragments[0].len = kvm_run->mmio.len = p->size;
-	vcpu->mmio_is_write = kvm_run->mmio.is_write = !p->dir;
-
-	if (vcpu->mmio_is_write)
-		memcpy(vcpu->arch.mmio_data, &p->data, p->size);
-	memcpy(kvm_run->mmio.data, &p->data, p->size);
-	kvm_run->exit_reason = KVM_EXIT_MMIO;
-	return 0;
-mmio:
-	if (p->dir)
-		r = kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, p->addr,
-				    p->size, &p->data);
-	else
-		r = kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, p->addr,
-				     p->size, &p->data);
-	if (r)
-		printk(KERN_ERR"kvm: No iodevice found! addr:%lx\n", p->addr);
-	p->state = STATE_IORESP_READY;
-
-	return 1;
-}
-
-static int handle_pal_call(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
-{
-	struct exit_ctl_data *p;
-
-	p = kvm_get_exit_data(vcpu);
-
-	if (p->exit_reason == EXIT_REASON_PAL_CALL)
-		return kvm_pal_emul(vcpu, kvm_run);
-	else {
-		kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
-		kvm_run->hw.hardware_exit_reason = 2;
-		return 0;
-	}
-}
-
-static int handle_sal_call(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
-{
-	struct exit_ctl_data *p;
-
-	p = kvm_get_exit_data(vcpu);
-
-	if (p->exit_reason == EXIT_REASON_SAL_CALL) {
-		kvm_sal_emul(vcpu);
-		return 1;
-	} else {
-		kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
-		kvm_run->hw.hardware_exit_reason = 3;
-		return 0;
-	}
-
-}
-
-static int __apic_accept_irq(struct kvm_vcpu *vcpu, uint64_t vector)
-{
-	struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
-
-	if (!test_and_set_bit(vector, &vpd->irr[0])) {
-		vcpu->arch.irq_new_pending = 1;
-		kvm_vcpu_kick(vcpu);
-		return 1;
-	}
-	return 0;
-}
-
-/*
- *  offset: address offset to IPI space.
- *  value:  deliver value.
- */
-static void vcpu_deliver_ipi(struct kvm_vcpu *vcpu, uint64_t dm,
-				uint64_t vector)
-{
-	switch (dm) {
-	case SAPIC_FIXED:
-		break;
-	case SAPIC_NMI:
-		vector = 2;
-		break;
-	case SAPIC_EXTINT:
-		vector = 0;
-		break;
-	case SAPIC_INIT:
-	case SAPIC_PMI:
-	default:
-		printk(KERN_ERR"kvm: Unimplemented Deliver reserved IPI!\n");
-		return;
-	}
-	__apic_accept_irq(vcpu, vector);
-}
-
-static struct kvm_vcpu *lid_to_vcpu(struct kvm *kvm, unsigned long id,
-			unsigned long eid)
-{
-	union ia64_lid lid;
-	int i;
-	struct kvm_vcpu *vcpu;
-
-	kvm_for_each_vcpu(i, vcpu, kvm) {
-		lid.val = VCPU_LID(vcpu);
-		if (lid.id == id && lid.eid == eid)
-			return vcpu;
-	}
-
-	return NULL;
-}
-
-static int handle_ipi(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
-{
-	struct exit_ctl_data *p = kvm_get_exit_data(vcpu);
-	struct kvm_vcpu *target_vcpu;
-	struct kvm_pt_regs *regs;
-	union ia64_ipi_a addr = p->u.ipi_data.addr;
-	union ia64_ipi_d data = p->u.ipi_data.data;
-
-	target_vcpu = lid_to_vcpu(vcpu->kvm, addr.id, addr.eid);
-	if (!target_vcpu)
-		return handle_vm_error(vcpu, kvm_run);
-
-	if (!target_vcpu->arch.launched) {
-		regs = vcpu_regs(target_vcpu);
-
-		regs->cr_iip = vcpu->kvm->arch.rdv_sal_data.boot_ip;
-		regs->r1 = vcpu->kvm->arch.rdv_sal_data.boot_gp;
-
-		target_vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
-		if (waitqueue_active(&target_vcpu->wq))
-			wake_up_interruptible(&target_vcpu->wq);
-	} else {
-		vcpu_deliver_ipi(target_vcpu, data.dm, data.vector);
-		if (target_vcpu != vcpu)
-			kvm_vcpu_kick(target_vcpu);
-	}
-
-	return 1;
-}
-
-struct call_data {
-	struct kvm_ptc_g ptc_g_data;
-	struct kvm_vcpu *vcpu;
-};
-
-static void vcpu_global_purge(void *info)
-{
-	struct call_data *p = (struct call_data *)info;
-	struct kvm_vcpu *vcpu = p->vcpu;
-
-	if (test_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests))
-		return;
-
-	set_bit(KVM_REQ_PTC_G, &vcpu->requests);
-	if (vcpu->arch.ptc_g_count < MAX_PTC_G_NUM) {
-		vcpu->arch.ptc_g_data[vcpu->arch.ptc_g_count++] =
-							p->ptc_g_data;
-	} else {
-		clear_bit(KVM_REQ_PTC_G, &vcpu->requests);
-		vcpu->arch.ptc_g_count = 0;
-		set_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests);
-	}
-}
-
-static int handle_global_purge(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
-{
-	struct exit_ctl_data *p = kvm_get_exit_data(vcpu);
-	struct kvm *kvm = vcpu->kvm;
-	struct call_data call_data;
-	int i;
-	struct kvm_vcpu *vcpui;
-
-	call_data.ptc_g_data = p->u.ptc_g_data;
-
-	kvm_for_each_vcpu(i, vcpui, kvm) {
-		if (vcpui->arch.mp_state == KVM_MP_STATE_UNINITIALIZED ||
-				vcpu == vcpui)
-			continue;
-
-		if (waitqueue_active(&vcpui->wq))
-			wake_up_interruptible(&vcpui->wq);
-
-		if (vcpui->cpu != -1) {
-			call_data.vcpu = vcpui;
-			smp_call_function_single(vcpui->cpu,
-					vcpu_global_purge, &call_data, 1);
-		} else
-			printk(KERN_WARNING"kvm: Uninit vcpu received ipi!\n");
-
-	}
-	return 1;
-}
-
-static int handle_switch_rr6(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
-{
-	return 1;
-}
-
-static int kvm_sn2_setup_mappings(struct kvm_vcpu *vcpu)
-{
-	unsigned long pte, rtc_phys_addr, map_addr;
-	int slot;
-
-	map_addr = KVM_VMM_BASE + (1UL << KVM_VMM_SHIFT);
-	rtc_phys_addr = LOCAL_MMR_OFFSET | SH_RTC;
-	pte = pte_val(mk_pte_phys(rtc_phys_addr, PAGE_KERNEL_UC));
-	slot = ia64_itr_entry(0x3, map_addr, pte, PAGE_SHIFT);
-	vcpu->arch.sn_rtc_tr_slot = slot;
-	if (slot < 0) {
-		printk(KERN_ERR "Mayday mayday! RTC mapping failed!\n");
-		slot = 0;
-	}
-	return slot;
-}
-
-int kvm_emulate_halt(struct kvm_vcpu *vcpu)
-{
-
-	ktime_t kt;
-	long itc_diff;
-	unsigned long vcpu_now_itc;
-	unsigned long expires;
-	struct hrtimer *p_ht = &vcpu->arch.hlt_timer;
-	unsigned long cyc_per_usec = local_cpu_data->cyc_per_usec;
-	struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
-
-	if (irqchip_in_kernel(vcpu->kvm)) {
-
-		vcpu_now_itc = kvm_get_itc(vcpu) + vcpu->arch.itc_offset;
-
-		if (time_after(vcpu_now_itc, vpd->itm)) {
-			vcpu->arch.timer_check = 1;
-			return 1;
-		}
-		itc_diff = vpd->itm - vcpu_now_itc;
-		if (itc_diff < 0)
-			itc_diff = -itc_diff;
-
-		expires = div64_u64(itc_diff, cyc_per_usec);
-		kt = ktime_set(0, 1000 * expires);
-
-		vcpu->arch.ht_active = 1;
-		hrtimer_start(p_ht, kt, HRTIMER_MODE_ABS);
-
-		vcpu->arch.mp_state = KVM_MP_STATE_HALTED;
-		kvm_vcpu_block(vcpu);
-		hrtimer_cancel(p_ht);
-		vcpu->arch.ht_active = 0;
-
-		if (test_and_clear_bit(KVM_REQ_UNHALT, &vcpu->requests) ||
-				kvm_cpu_has_pending_timer(vcpu))
-			if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED)
-				vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
-
-		if (vcpu->arch.mp_state != KVM_MP_STATE_RUNNABLE)
-			return -EINTR;
-		return 1;
-	} else {
-		printk(KERN_ERR"kvm: Unsupported userspace halt!");
-		return 0;
-	}
-}
-
-static int handle_vm_shutdown(struct kvm_vcpu *vcpu,
-		struct kvm_run *kvm_run)
-{
-	kvm_run->exit_reason = KVM_EXIT_SHUTDOWN;
-	return 0;
-}
-
-static int handle_external_interrupt(struct kvm_vcpu *vcpu,
-		struct kvm_run *kvm_run)
-{
-	return 1;
-}
-
-static int handle_vcpu_debug(struct kvm_vcpu *vcpu,
-				struct kvm_run *kvm_run)
-{
-	printk("VMM: %s", vcpu->arch.log_buf);
-	return 1;
-}
-
-static int (*kvm_vti_exit_handlers[])(struct kvm_vcpu *vcpu,
-		struct kvm_run *kvm_run) = {
-	[EXIT_REASON_VM_PANIC]              = handle_vm_error,
-	[EXIT_REASON_MMIO_INSTRUCTION]      = handle_mmio,
-	[EXIT_REASON_PAL_CALL]              = handle_pal_call,
-	[EXIT_REASON_SAL_CALL]              = handle_sal_call,
-	[EXIT_REASON_SWITCH_RR6]            = handle_switch_rr6,
-	[EXIT_REASON_VM_DESTROY]            = handle_vm_shutdown,
-	[EXIT_REASON_EXTERNAL_INTERRUPT]    = handle_external_interrupt,
-	[EXIT_REASON_IPI]		    = handle_ipi,
-	[EXIT_REASON_PTC_G]		    = handle_global_purge,
-	[EXIT_REASON_DEBUG]		    = handle_vcpu_debug,
-
-};
-
-static const int kvm_vti_max_exit_handlers =
-		sizeof(kvm_vti_exit_handlers)/sizeof(*kvm_vti_exit_handlers);
-
-static uint32_t kvm_get_exit_reason(struct kvm_vcpu *vcpu)
-{
-	struct exit_ctl_data *p_exit_data;
-
-	p_exit_data = kvm_get_exit_data(vcpu);
-	return p_exit_data->exit_reason;
-}
-
-/*
- * The guest has exited.  See if we can fix it or if we need userspace
- * assistance.
- */
-static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
-{
-	u32 exit_reason = kvm_get_exit_reason(vcpu);
-	vcpu->arch.last_exit = exit_reason;
-
-	if (exit_reason < kvm_vti_max_exit_handlers
-			&& kvm_vti_exit_handlers[exit_reason])
-		return kvm_vti_exit_handlers[exit_reason](vcpu, kvm_run);
-	else {
-		kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
-		kvm_run->hw.hardware_exit_reason = exit_reason;
-	}
-	return 0;
-}
-
-static inline void vti_set_rr6(unsigned long rr6)
-{
-	ia64_set_rr(RR6, rr6);
-	ia64_srlz_i();
-}
-
-static int kvm_insert_vmm_mapping(struct kvm_vcpu *vcpu)
-{
-	unsigned long pte;
-	struct kvm *kvm = vcpu->kvm;
-	int r;
-
-	/*Insert a pair of tr to map vmm*/
-	pte = pte_val(mk_pte_phys(__pa(kvm_vmm_base), PAGE_KERNEL));
-	r = ia64_itr_entry(0x3, KVM_VMM_BASE, pte, KVM_VMM_SHIFT);
-	if (r < 0)
-		goto out;
-	vcpu->arch.vmm_tr_slot = r;
-	/*Insert a pairt of tr to map data of vm*/
-	pte = pte_val(mk_pte_phys(__pa(kvm->arch.vm_base), PAGE_KERNEL));
-	r = ia64_itr_entry(0x3, KVM_VM_DATA_BASE,
-					pte, KVM_VM_DATA_SHIFT);
-	if (r < 0)
-		goto out;
-	vcpu->arch.vm_tr_slot = r;
-
-#if defined(CONFIG_IA64_SGI_SN2) || defined(CONFIG_IA64_GENERIC)
-	if (kvm->arch.is_sn2) {
-		r = kvm_sn2_setup_mappings(vcpu);
-		if (r < 0)
-			goto out;
-	}
-#endif
-
-	r = 0;
-out:
-	return r;
-}
-
-static void kvm_purge_vmm_mapping(struct kvm_vcpu *vcpu)
-{
-	struct kvm *kvm = vcpu->kvm;
-	ia64_ptr_entry(0x3, vcpu->arch.vmm_tr_slot);
-	ia64_ptr_entry(0x3, vcpu->arch.vm_tr_slot);
-#if defined(CONFIG_IA64_SGI_SN2) || defined(CONFIG_IA64_GENERIC)
-	if (kvm->arch.is_sn2)
-		ia64_ptr_entry(0x3, vcpu->arch.sn_rtc_tr_slot);
-#endif
-}
-
-static int kvm_vcpu_pre_transition(struct kvm_vcpu *vcpu)
-{
-	unsigned long psr;
-	int r;
-	int cpu = smp_processor_id();
-
-	if (vcpu->arch.last_run_cpu != cpu ||
-			per_cpu(last_vcpu, cpu) != vcpu) {
-		per_cpu(last_vcpu, cpu) = vcpu;
-		vcpu->arch.last_run_cpu = cpu;
-		kvm_flush_tlb_all();
-	}
-
-	vcpu->arch.host_rr6 = ia64_get_rr(RR6);
-	vti_set_rr6(vcpu->arch.vmm_rr);
-	local_irq_save(psr);
-	r = kvm_insert_vmm_mapping(vcpu);
-	local_irq_restore(psr);
-	return r;
-}
-
-static void kvm_vcpu_post_transition(struct kvm_vcpu *vcpu)
-{
-	kvm_purge_vmm_mapping(vcpu);
-	vti_set_rr6(vcpu->arch.host_rr6);
-}
-
-static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
-{
-	union context *host_ctx, *guest_ctx;
-	int r, idx;
-
-	idx = srcu_read_lock(&vcpu->kvm->srcu);
-
-again:
-	if (signal_pending(current)) {
-		r = -EINTR;
-		kvm_run->exit_reason = KVM_EXIT_INTR;
-		goto out;
-	}
-
-	preempt_disable();
-	local_irq_disable();
-
-	/*Get host and guest context with guest address space.*/
-	host_ctx = kvm_get_host_context(vcpu);
-	guest_ctx = kvm_get_guest_context(vcpu);
-
-	clear_bit(KVM_REQ_KICK, &vcpu->requests);
-
-	r = kvm_vcpu_pre_transition(vcpu);
-	if (r < 0)
-		goto vcpu_run_fail;
-
-	srcu_read_unlock(&vcpu->kvm->srcu, idx);
-	vcpu->mode = IN_GUEST_MODE;
-	kvm_guest_enter();
-
-	/*
-	 * Transition to the guest
-	 */
-	kvm_vmm_info->tramp_entry(host_ctx, guest_ctx);
-
-	kvm_vcpu_post_transition(vcpu);
-
-	vcpu->arch.launched = 1;
-	set_bit(KVM_REQ_KICK, &vcpu->requests);
-	local_irq_enable();
-
-	/*
-	 * We must have an instruction between local_irq_enable() and
-	 * kvm_guest_exit(), so the timer interrupt isn't delayed by
-	 * the interrupt shadow.  The stat.exits increment will do nicely.
-	 * But we need to prevent reordering, hence this barrier():
-	 */
-	barrier();
-	kvm_guest_exit();
-	vcpu->mode = OUTSIDE_GUEST_MODE;
-	preempt_enable();
-
-	idx = srcu_read_lock(&vcpu->kvm->srcu);
-
-	r = kvm_handle_exit(kvm_run, vcpu);
-
-	if (r > 0) {
-		if (!need_resched())
-			goto again;
-	}
-
-out:
-	srcu_read_unlock(&vcpu->kvm->srcu, idx);
-	if (r > 0) {
-		cond_resched();
-		idx = srcu_read_lock(&vcpu->kvm->srcu);
-		goto again;
-	}
-
-	return r;
-
-vcpu_run_fail:
-	local_irq_enable();
-	preempt_enable();
-	kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
-	goto out;
-}
-
-static void kvm_set_mmio_data(struct kvm_vcpu *vcpu)
-{
-	struct kvm_mmio_req *p = kvm_get_vcpu_ioreq(vcpu);
-
-	if (!vcpu->mmio_is_write)
-		memcpy(&p->data, vcpu->arch.mmio_data, 8);
-	p->state = STATE_IORESP_READY;
-}
-
-int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
-{
-	int r;
-	sigset_t sigsaved;
-
-	if (vcpu->sigset_active)
-		sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
-
-	if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
-		kvm_vcpu_block(vcpu);
-		clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
-		r = -EAGAIN;
-		goto out;
-	}
-
-	if (vcpu->mmio_needed) {
-		memcpy(vcpu->arch.mmio_data, kvm_run->mmio.data, 8);
-		kvm_set_mmio_data(vcpu);
-		vcpu->mmio_read_completed = 1;
-		vcpu->mmio_needed = 0;
-	}
-	r = __vcpu_run(vcpu, kvm_run);
-out:
-	if (vcpu->sigset_active)
-		sigprocmask(SIG_SETMASK, &sigsaved, NULL);
-
-	return r;
-}
-
-struct kvm *kvm_arch_alloc_vm(void)
-{
-
-	struct kvm *kvm;
-	uint64_t  vm_base;
-
-	BUG_ON(sizeof(struct kvm) > KVM_VM_STRUCT_SIZE);
-
-	vm_base = __get_free_pages(GFP_KERNEL, get_order(KVM_VM_DATA_SIZE));
-
-	if (!vm_base)
-		return NULL;
-
-	memset((void *)vm_base, 0, KVM_VM_DATA_SIZE);
-	kvm = (struct kvm *)(vm_base +
-			offsetof(struct kvm_vm_data, kvm_vm_struct));
-	kvm->arch.vm_base = vm_base;
-	printk(KERN_DEBUG"kvm: vm's data area:0x%lx\n", vm_base);
-
-	return kvm;
-}
-
-struct kvm_ia64_io_range {
-	unsigned long start;
-	unsigned long size;
-	unsigned long type;
-};
-
-static const struct kvm_ia64_io_range io_ranges[] = {
-	{VGA_IO_START, VGA_IO_SIZE, GPFN_FRAME_BUFFER},
-	{MMIO_START, MMIO_SIZE, GPFN_LOW_MMIO},
-	{LEGACY_IO_START, LEGACY_IO_SIZE, GPFN_LEGACY_IO},
-	{IO_SAPIC_START, IO_SAPIC_SIZE, GPFN_IOSAPIC},
-	{PIB_START, PIB_SIZE, GPFN_PIB},
-};
-
-static void kvm_build_io_pmt(struct kvm *kvm)
-{
-	unsigned long i, j;
-
-	/* Mark I/O ranges */
-	for (i = 0; i < (sizeof(io_ranges) / sizeof(struct kvm_io_range));
-							i++) {
-		for (j = io_ranges[i].start;
-				j < io_ranges[i].start + io_ranges[i].size;
-				j += PAGE_SIZE)
-			kvm_set_pmt_entry(kvm, j >> PAGE_SHIFT,
-					io_ranges[i].type, 0);
-	}
-
-}
-
-/*Use unused rids to virtualize guest rid.*/
-#define GUEST_PHYSICAL_RR0	0x1739
-#define GUEST_PHYSICAL_RR4	0x2739
-#define VMM_INIT_RR		0x1660
-
-int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
-{
-	BUG_ON(!kvm);
-
-	if (type)
-		return -EINVAL;
-
-	kvm->arch.is_sn2 = ia64_platform_is("sn2");
-
-	kvm->arch.metaphysical_rr0 = GUEST_PHYSICAL_RR0;
-	kvm->arch.metaphysical_rr4 = GUEST_PHYSICAL_RR4;
-	kvm->arch.vmm_init_rr = VMM_INIT_RR;
-
-	/*
-	 *Fill P2M entries for MMIO/IO ranges
-	 */
-	kvm_build_io_pmt(kvm);
-
-	INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
-
-	/* Reserve bit 0 of irq_sources_bitmap for userspace irq source */
-	set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap);
-
-	return 0;
-}
-
-static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm,
-					struct kvm_irqchip *chip)
-{
-	int r;
-
-	r = 0;
-	switch (chip->chip_id) {
-	case KVM_IRQCHIP_IOAPIC:
-		r = kvm_get_ioapic(kvm, &chip->chip.ioapic);
-		break;
-	default:
-		r = -EINVAL;
-		break;
-	}
-	return r;
-}
-
-static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
-{
-	int r;
-
-	r = 0;
-	switch (chip->chip_id) {
-	case KVM_IRQCHIP_IOAPIC:
-		r = kvm_set_ioapic(kvm, &chip->chip.ioapic);
-		break;
-	default:
-		r = -EINVAL;
-		break;
-	}
-	return r;
-}
-
-#define RESTORE_REGS(_x) vcpu->arch._x = regs->_x
-
-int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
-{
-	struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
-	int i;
-
-	for (i = 0; i < 16; i++) {
-		vpd->vgr[i] = regs->vpd.vgr[i];
-		vpd->vbgr[i] = regs->vpd.vbgr[i];
-	}
-	for (i = 0; i < 128; i++)
-		vpd->vcr[i] = regs->vpd.vcr[i];
-	vpd->vhpi = regs->vpd.vhpi;
-	vpd->vnat = regs->vpd.vnat;
-	vpd->vbnat = regs->vpd.vbnat;
-	vpd->vpsr = regs->vpd.vpsr;
-
-	vpd->vpr = regs->vpd.vpr;
-
-	memcpy(&vcpu->arch.guest, &regs->saved_guest, sizeof(union context));
-
-	RESTORE_REGS(mp_state);
-	RESTORE_REGS(vmm_rr);
-	memcpy(vcpu->arch.itrs, regs->itrs, sizeof(struct thash_data) * NITRS);
-	memcpy(vcpu->arch.dtrs, regs->dtrs, sizeof(struct thash_data) * NDTRS);
-	RESTORE_REGS(itr_regions);
-	RESTORE_REGS(dtr_regions);
-	RESTORE_REGS(tc_regions);
-	RESTORE_REGS(irq_check);
-	RESTORE_REGS(itc_check);
-	RESTORE_REGS(timer_check);
-	RESTORE_REGS(timer_pending);
-	RESTORE_REGS(last_itc);
-	for (i = 0; i < 8; i++) {
-		vcpu->arch.vrr[i] = regs->vrr[i];
-		vcpu->arch.ibr[i] = regs->ibr[i];
-		vcpu->arch.dbr[i] = regs->dbr[i];
-	}
-	for (i = 0; i < 4; i++)
-		vcpu->arch.insvc[i] = regs->insvc[i];
-	RESTORE_REGS(xtp);
-	RESTORE_REGS(metaphysical_rr0);
-	RESTORE_REGS(metaphysical_rr4);
-	RESTORE_REGS(metaphysical_saved_rr0);
-	RESTORE_REGS(metaphysical_saved_rr4);
-	RESTORE_REGS(fp_psr);
-	RESTORE_REGS(saved_gp);
-
-	vcpu->arch.irq_new_pending = 1;
-	vcpu->arch.itc_offset = regs->saved_itc - kvm_get_itc(vcpu);
-	set_bit(KVM_REQ_RESUME, &vcpu->requests);
-
-	return 0;
-}
-
-int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event,
-		bool line_status)
-{
-	if (!irqchip_in_kernel(kvm))
-		return -ENXIO;
-
-	irq_event->status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
-					irq_event->irq, irq_event->level,
-					line_status);
-	return 0;
-}
-
-long kvm_arch_vm_ioctl(struct file *filp,
-		unsigned int ioctl, unsigned long arg)
-{
-	struct kvm *kvm = filp->private_data;
-	void __user *argp = (void __user *)arg;
-	int r = -ENOTTY;
-
-	switch (ioctl) {
-	case KVM_CREATE_IRQCHIP:
-		r = -EFAULT;
-		r = kvm_ioapic_init(kvm);
-		if (r)
-			goto out;
-		r = kvm_setup_default_irq_routing(kvm);
-		if (r) {
-			mutex_lock(&kvm->slots_lock);
-			kvm_ioapic_destroy(kvm);
-			mutex_unlock(&kvm->slots_lock);
-			goto out;
-		}
-		break;
-	case KVM_GET_IRQCHIP: {
-		/* 0: PIC master, 1: PIC slave, 2: IOAPIC */
-		struct kvm_irqchip chip;
-
-		r = -EFAULT;
-		if (copy_from_user(&chip, argp, sizeof chip))
-				goto out;
-		r = -ENXIO;
-		if (!irqchip_in_kernel(kvm))
-			goto out;
-		r = kvm_vm_ioctl_get_irqchip(kvm, &chip);
-		if (r)
-			goto out;
-		r = -EFAULT;
-		if (copy_to_user(argp, &chip, sizeof chip))
-				goto out;
-		r = 0;
-		break;
-		}
-	case KVM_SET_IRQCHIP: {
-		/* 0: PIC master, 1: PIC slave, 2: IOAPIC */
-		struct kvm_irqchip chip;
-
-		r = -EFAULT;
-		if (copy_from_user(&chip, argp, sizeof chip))
-				goto out;
-		r = -ENXIO;
-		if (!irqchip_in_kernel(kvm))
-			goto out;
-		r = kvm_vm_ioctl_set_irqchip(kvm, &chip);
-		if (r)
-			goto out;
-		r = 0;
-		break;
-		}
-	default:
-		;
-	}
-out:
-	return r;
-}
-
-int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
-		struct kvm_sregs *sregs)
-{
-	return -EINVAL;
-}
-
-int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
-		struct kvm_sregs *sregs)
-{
-	return -EINVAL;
-
-}
-int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
-		struct kvm_translation *tr)
-{
-
-	return -EINVAL;
-}
-
-static int kvm_alloc_vmm_area(void)
-{
-	if (!kvm_vmm_base && (kvm_vm_buffer_size < KVM_VM_BUFFER_SIZE)) {
-		kvm_vmm_base = __get_free_pages(GFP_KERNEL,
-				get_order(KVM_VMM_SIZE));
-		if (!kvm_vmm_base)
-			return -ENOMEM;
-
-		memset((void *)kvm_vmm_base, 0, KVM_VMM_SIZE);
-		kvm_vm_buffer = kvm_vmm_base + VMM_SIZE;
-
-		printk(KERN_DEBUG"kvm:VMM's Base Addr:0x%lx, vm_buffer:0x%lx\n",
-				kvm_vmm_base, kvm_vm_buffer);
-	}
-
-	return 0;
-}
-
-static void kvm_free_vmm_area(void)
-{
-	if (kvm_vmm_base) {
-		/*Zero this area before free to avoid bits leak!!*/
-		memset((void *)kvm_vmm_base, 0, KVM_VMM_SIZE);
-		free_pages(kvm_vmm_base, get_order(KVM_VMM_SIZE));
-		kvm_vmm_base  = 0;
-		kvm_vm_buffer = 0;
-		kvm_vsa_base = 0;
-	}
-}
-
-static int vti_init_vpd(struct kvm_vcpu *vcpu)
-{
-	int i;
-	union cpuid3_t cpuid3;
-	struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
-
-	if (IS_ERR(vpd))
-		return PTR_ERR(vpd);
-
-	/* CPUID init */
-	for (i = 0; i < 5; i++)
-		vpd->vcpuid[i] = ia64_get_cpuid(i);
-
-	/* Limit the CPUID number to 5 */
-	cpuid3.value = vpd->vcpuid[3];
-	cpuid3.number = 4;	/* 5 - 1 */
-	vpd->vcpuid[3] = cpuid3.value;
-
-	/*Set vac and vdc fields*/
-	vpd->vac.a_from_int_cr = 1;
-	vpd->vac.a_to_int_cr = 1;
-	vpd->vac.a_from_psr = 1;
-	vpd->vac.a_from_cpuid = 1;
-	vpd->vac.a_cover = 1;
-	vpd->vac.a_bsw = 1;
-	vpd->vac.a_int = 1;
-	vpd->vdc.d_vmsw = 1;
-
-	/*Set virtual buffer*/
-	vpd->virt_env_vaddr = KVM_VM_BUFFER_BASE;
-
-	return 0;
-}
-
-static int vti_create_vp(struct kvm_vcpu *vcpu)
-{
-	long ret;
-	struct vpd *vpd = vcpu->arch.vpd;
-	unsigned long  vmm_ivt;
-
-	vmm_ivt = kvm_vmm_info->vmm_ivt;
-
-	printk(KERN_DEBUG "kvm: vcpu:%p,ivt: 0x%lx\n", vcpu, vmm_ivt);
-
-	ret = ia64_pal_vp_create((u64 *)vpd, (u64 *)vmm_ivt, 0);
-
-	if (ret) {
-		printk(KERN_ERR"kvm: ia64_pal_vp_create failed!\n");
-		return -EINVAL;
-	}
-	return 0;
-}
-
-static void init_ptce_info(struct kvm_vcpu *vcpu)
-{
-	ia64_ptce_info_t ptce = {0};
-
-	ia64_get_ptce(&ptce);
-	vcpu->arch.ptce_base = ptce.base;
-	vcpu->arch.ptce_count[0] = ptce.count[0];
-	vcpu->arch.ptce_count[1] = ptce.count[1];
-	vcpu->arch.ptce_stride[0] = ptce.stride[0];
-	vcpu->arch.ptce_stride[1] = ptce.stride[1];
-}
-
-static void kvm_migrate_hlt_timer(struct kvm_vcpu *vcpu)
-{
-	struct hrtimer *p_ht = &vcpu->arch.hlt_timer;
-
-	if (hrtimer_cancel(p_ht))
-		hrtimer_start_expires(p_ht, HRTIMER_MODE_ABS);
-}
-
-static enum hrtimer_restart hlt_timer_fn(struct hrtimer *data)
-{
-	struct kvm_vcpu *vcpu;
-	wait_queue_head_t *q;
-
-	vcpu  = container_of(data, struct kvm_vcpu, arch.hlt_timer);
-	q = &vcpu->wq;
-
-	if (vcpu->arch.mp_state != KVM_MP_STATE_HALTED)
-		goto out;
-
-	if (waitqueue_active(q))
-		wake_up_interruptible(q);
-
-out:
-	vcpu->arch.timer_fired = 1;
-	vcpu->arch.timer_check = 1;
-	return HRTIMER_NORESTART;
-}
-
-#define PALE_RESET_ENTRY    0x80000000ffffffb0UL
-
-bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu)
-{
-	return irqchip_in_kernel(vcpu->kvm) == (vcpu->arch.apic != NULL);
-}
-
-int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
-{
-	struct kvm_vcpu *v;
-	int r;
-	int i;
-	long itc_offset;
-	struct kvm *kvm = vcpu->kvm;
-	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
-
-	union context *p_ctx = &vcpu->arch.guest;
-	struct kvm_vcpu *vmm_vcpu = to_guest(vcpu->kvm, vcpu);
-
-	/*Init vcpu context for first run.*/
-	if (IS_ERR(vmm_vcpu))
-		return PTR_ERR(vmm_vcpu);
-
-	if (kvm_vcpu_is_bsp(vcpu)) {
-		vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
-
-		/*Set entry address for first run.*/
-		regs->cr_iip = PALE_RESET_ENTRY;
-
-		/*Initialize itc offset for vcpus*/
-		itc_offset = 0UL - kvm_get_itc(vcpu);
-		for (i = 0; i < KVM_MAX_VCPUS; i++) {
-			v = (struct kvm_vcpu *)((char *)vcpu +
-					sizeof(struct kvm_vcpu_data) * i);
-			v->arch.itc_offset = itc_offset;
-			v->arch.last_itc = 0;
-		}
-	} else
-		vcpu->arch.mp_state = KVM_MP_STATE_UNINITIALIZED;
-
-	r = -ENOMEM;
-	vcpu->arch.apic = kzalloc(sizeof(struct kvm_lapic), GFP_KERNEL);
-	if (!vcpu->arch.apic)
-		goto out;
-	vcpu->arch.apic->vcpu = vcpu;
-
-	p_ctx->gr[1] = 0;
-	p_ctx->gr[12] = (unsigned long)((char *)vmm_vcpu + KVM_STK_OFFSET);
-	p_ctx->gr[13] = (unsigned long)vmm_vcpu;
-	p_ctx->psr = 0x1008522000UL;
-	p_ctx->ar[40] = FPSR_DEFAULT; /*fpsr*/
-	p_ctx->caller_unat = 0;
-	p_ctx->pr = 0x0;
-	p_ctx->ar[36] = 0x0; /*unat*/
-	p_ctx->ar[19] = 0x0; /*rnat*/
-	p_ctx->ar[18] = (unsigned long)vmm_vcpu +
-				((sizeof(struct kvm_vcpu)+15) & ~15);
-	p_ctx->ar[64] = 0x0; /*pfs*/
-	p_ctx->cr[0] = 0x7e04UL;
-	p_ctx->cr[2] = (unsigned long)kvm_vmm_info->vmm_ivt;
-	p_ctx->cr[8] = 0x3c;
-
-	/*Initialize region register*/
-	p_ctx->rr[0] = 0x30;
-	p_ctx->rr[1] = 0x30;
-	p_ctx->rr[2] = 0x30;
-	p_ctx->rr[3] = 0x30;
-	p_ctx->rr[4] = 0x30;
-	p_ctx->rr[5] = 0x30;
-	p_ctx->rr[7] = 0x30;
-
-	/*Initialize branch register 0*/
-	p_ctx->br[0] = *(unsigned long *)kvm_vmm_info->vmm_entry;
-
-	vcpu->arch.vmm_rr = kvm->arch.vmm_init_rr;
-	vcpu->arch.metaphysical_rr0 = kvm->arch.metaphysical_rr0;
-	vcpu->arch.metaphysical_rr4 = kvm->arch.metaphysical_rr4;
-
-	hrtimer_init(&vcpu->arch.hlt_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
-	vcpu->arch.hlt_timer.function = hlt_timer_fn;
-
-	vcpu->arch.last_run_cpu = -1;
-	vcpu->arch.vpd = (struct vpd *)VPD_BASE(vcpu->vcpu_id);
-	vcpu->arch.vsa_base = kvm_vsa_base;
-	vcpu->arch.__gp = kvm_vmm_gp;
-	vcpu->arch.dirty_log_lock_pa = __pa(&kvm->arch.dirty_log_lock);
-	vcpu->arch.vhpt.hash = (struct thash_data *)VHPT_BASE(vcpu->vcpu_id);
-	vcpu->arch.vtlb.hash = (struct thash_data *)VTLB_BASE(vcpu->vcpu_id);
-	init_ptce_info(vcpu);
-
-	r = 0;
-out:
-	return r;
-}
-
-static int vti_vcpu_setup(struct kvm_vcpu *vcpu, int id)
-{
-	unsigned long psr;
-	int r;
-
-	local_irq_save(psr);
-	r = kvm_insert_vmm_mapping(vcpu);
-	local_irq_restore(psr);
-	if (r)
-		goto fail;
-	r = kvm_vcpu_init(vcpu, vcpu->kvm, id);
-	if (r)
-		goto fail;
-
-	r = vti_init_vpd(vcpu);
-	if (r) {
-		printk(KERN_DEBUG"kvm: vpd init error!!\n");
-		goto uninit;
-	}
-
-	r = vti_create_vp(vcpu);
-	if (r)
-		goto uninit;
-
-	kvm_purge_vmm_mapping(vcpu);
-
-	return 0;
-uninit:
-	kvm_vcpu_uninit(vcpu);
-fail:
-	return r;
-}
-
-struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
-		unsigned int id)
-{
-	struct kvm_vcpu *vcpu;
-	unsigned long vm_base = kvm->arch.vm_base;
-	int r;
-	int cpu;
-
-	BUG_ON(sizeof(struct kvm_vcpu) > VCPU_STRUCT_SIZE/2);
-
-	r = -EINVAL;
-	if (id >= KVM_MAX_VCPUS) {
-		printk(KERN_ERR"kvm: Can't configure vcpus > %ld",
-				KVM_MAX_VCPUS);
-		goto fail;
-	}
-
-	r = -ENOMEM;
-	if (!vm_base) {
-		printk(KERN_ERR"kvm: Create vcpu[%d] error!\n", id);
-		goto fail;
-	}
-	vcpu = (struct kvm_vcpu *)(vm_base + offsetof(struct kvm_vm_data,
-					vcpu_data[id].vcpu_struct));
-	vcpu->kvm = kvm;
-
-	cpu = get_cpu();
-	r = vti_vcpu_setup(vcpu, id);
-	put_cpu();
-
-	if (r) {
-		printk(KERN_DEBUG"kvm: vcpu_setup error!!\n");
-		goto fail;
-	}
-
-	return vcpu;
-fail:
-	return ERR_PTR(r);
-}
-
-int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
-{
-	return 0;
-}
-
-int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
-{
-	return 0;
-}
-
-int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
-{
-	return -EINVAL;
-}
-
-int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
-{
-	return -EINVAL;
-}
-
-int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
-					struct kvm_guest_debug *dbg)
-{
-	return -EINVAL;
-}
-
-void kvm_arch_free_vm(struct kvm *kvm)
-{
-	unsigned long vm_base = kvm->arch.vm_base;
-
-	if (vm_base) {
-		memset((void *)vm_base, 0, KVM_VM_DATA_SIZE);
-		free_pages(vm_base, get_order(KVM_VM_DATA_SIZE));
-	}
-
-}
-
-static void kvm_release_vm_pages(struct kvm *kvm)
-{
-	struct kvm_memslots *slots;
-	struct kvm_memory_slot *memslot;
-	int j;
-
-	slots = kvm_memslots(kvm);
-	kvm_for_each_memslot(memslot, slots) {
-		for (j = 0; j < memslot->npages; j++) {
-			if (memslot->rmap[j])
-				put_page((struct page *)memslot->rmap[j]);
-		}
-	}
-}
-
-void kvm_arch_destroy_vm(struct kvm *kvm)
-{
-	kvm_iommu_unmap_guest(kvm);
-	kvm_free_all_assigned_devices(kvm);
-	kfree(kvm->arch.vioapic);
-	kvm_release_vm_pages(kvm);
-}
-
-void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
-{
-	if (cpu != vcpu->cpu) {
-		vcpu->cpu = cpu;
-		if (vcpu->arch.ht_active)
-			kvm_migrate_hlt_timer(vcpu);
-	}
-}
-
-#define SAVE_REGS(_x) 	regs->_x = vcpu->arch._x
-
-int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
-{
-	struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
-	int i;
-
-	vcpu_load(vcpu);
-
-	for (i = 0; i < 16; i++) {
-		regs->vpd.vgr[i] = vpd->vgr[i];
-		regs->vpd.vbgr[i] = vpd->vbgr[i];
-	}
-	for (i = 0; i < 128; i++)
-		regs->vpd.vcr[i] = vpd->vcr[i];
-	regs->vpd.vhpi = vpd->vhpi;
-	regs->vpd.vnat = vpd->vnat;
-	regs->vpd.vbnat = vpd->vbnat;
-	regs->vpd.vpsr = vpd->vpsr;
-	regs->vpd.vpr = vpd->vpr;
-
-	memcpy(&regs->saved_guest, &vcpu->arch.guest, sizeof(union context));
-
-	SAVE_REGS(mp_state);
-	SAVE_REGS(vmm_rr);
-	memcpy(regs->itrs, vcpu->arch.itrs, sizeof(struct thash_data) * NITRS);
-	memcpy(regs->dtrs, vcpu->arch.dtrs, sizeof(struct thash_data) * NDTRS);
-	SAVE_REGS(itr_regions);
-	SAVE_REGS(dtr_regions);
-	SAVE_REGS(tc_regions);
-	SAVE_REGS(irq_check);
-	SAVE_REGS(itc_check);
-	SAVE_REGS(timer_check);
-	SAVE_REGS(timer_pending);
-	SAVE_REGS(last_itc);
-	for (i = 0; i < 8; i++) {
-		regs->vrr[i] = vcpu->arch.vrr[i];
-		regs->ibr[i] = vcpu->arch.ibr[i];
-		regs->dbr[i] = vcpu->arch.dbr[i];
-	}
-	for (i = 0; i < 4; i++)
-		regs->insvc[i] = vcpu->arch.insvc[i];
-	regs->saved_itc = vcpu->arch.itc_offset + kvm_get_itc(vcpu);
-	SAVE_REGS(xtp);
-	SAVE_REGS(metaphysical_rr0);
-	SAVE_REGS(metaphysical_rr4);
-	SAVE_REGS(metaphysical_saved_rr0);
-	SAVE_REGS(metaphysical_saved_rr4);
-	SAVE_REGS(fp_psr);
-	SAVE_REGS(saved_gp);
-
-	vcpu_put(vcpu);
-	return 0;
-}
-
-int kvm_arch_vcpu_ioctl_get_stack(struct kvm_vcpu *vcpu,
-				  struct kvm_ia64_vcpu_stack *stack)
-{
-	memcpy(stack, vcpu, sizeof(struct kvm_ia64_vcpu_stack));
-	return 0;
-}
-
-int kvm_arch_vcpu_ioctl_set_stack(struct kvm_vcpu *vcpu,
-				  struct kvm_ia64_vcpu_stack *stack)
-{
-	memcpy(vcpu + 1, &stack->stack[0] + sizeof(struct kvm_vcpu),
-	       sizeof(struct kvm_ia64_vcpu_stack) - sizeof(struct kvm_vcpu));
-
-	vcpu->arch.exit_data = ((struct kvm_vcpu *)stack)->arch.exit_data;
-	return 0;
-}
-
-void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
-{
-
-	hrtimer_cancel(&vcpu->arch.hlt_timer);
-	kfree(vcpu->arch.apic);
-}
-
-long kvm_arch_vcpu_ioctl(struct file *filp,
-			 unsigned int ioctl, unsigned long arg)
-{
-	struct kvm_vcpu *vcpu = filp->private_data;
-	void __user *argp = (void __user *)arg;
-	struct kvm_ia64_vcpu_stack *stack = NULL;
-	long r;
-
-	switch (ioctl) {
-	case KVM_IA64_VCPU_GET_STACK: {
-		struct kvm_ia64_vcpu_stack __user *user_stack;
-	        void __user *first_p = argp;
-
-		r = -EFAULT;
-		if (copy_from_user(&user_stack, first_p, sizeof(void *)))
-			goto out;
-
-		if (!access_ok(VERIFY_WRITE, user_stack,
-			       sizeof(struct kvm_ia64_vcpu_stack))) {
-			printk(KERN_INFO "KVM_IA64_VCPU_GET_STACK: "
-			       "Illegal user destination address for stack\n");
-			goto out;
-		}
-		stack = kzalloc(sizeof(struct kvm_ia64_vcpu_stack), GFP_KERNEL);
-		if (!stack) {
-			r = -ENOMEM;
-			goto out;
-		}
-
-		r = kvm_arch_vcpu_ioctl_get_stack(vcpu, stack);
-		if (r)
-			goto out;
-
-		if (copy_to_user(user_stack, stack,
-				 sizeof(struct kvm_ia64_vcpu_stack))) {
-			r = -EFAULT;
-			goto out;
-		}
-
-		break;
-	}
-	case KVM_IA64_VCPU_SET_STACK: {
-		struct kvm_ia64_vcpu_stack __user *user_stack;
-	        void __user *first_p = argp;
-
-		r = -EFAULT;
-		if (copy_from_user(&user_stack, first_p, sizeof(void *)))
-			goto out;
-
-		if (!access_ok(VERIFY_READ, user_stack,
-			    sizeof(struct kvm_ia64_vcpu_stack))) {
-			printk(KERN_INFO "KVM_IA64_VCPU_SET_STACK: "
-			       "Illegal user address for stack\n");
-			goto out;
-		}
-		stack = kmalloc(sizeof(struct kvm_ia64_vcpu_stack), GFP_KERNEL);
-		if (!stack) {
-			r = -ENOMEM;
-			goto out;
-		}
-		if (copy_from_user(stack, user_stack,
-				   sizeof(struct kvm_ia64_vcpu_stack)))
-			goto out;
-
-		r = kvm_arch_vcpu_ioctl_set_stack(vcpu, stack);
-		break;
-	}
-
-	default:
-		r = -EINVAL;
-	}
-
-out:
-	kfree(stack);
-	return r;
-}
-
-int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
-{
-	return VM_FAULT_SIGBUS;
-}
-
-int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
-			    unsigned long npages)
-{
-	return 0;
-}
-
-int kvm_arch_prepare_memory_region(struct kvm *kvm,
-		struct kvm_memory_slot *memslot,
-		struct kvm_userspace_memory_region *mem,
-		enum kvm_mr_change change)
-{
-	unsigned long i;
-	unsigned long pfn;
-	int npages = memslot->npages;
-	unsigned long base_gfn = memslot->base_gfn;
-
-	if (base_gfn + npages > (KVM_MAX_MEM_SIZE >> PAGE_SHIFT))
-		return -ENOMEM;
-
-	for (i = 0; i < npages; i++) {
-		pfn = gfn_to_pfn(kvm, base_gfn + i);
-		if (!kvm_is_reserved_pfn(pfn)) {
-			kvm_set_pmt_entry(kvm, base_gfn + i,
-					pfn << PAGE_SHIFT,
-				_PAGE_AR_RWX | _PAGE_MA_WB);
-			memslot->rmap[i] = (unsigned long)pfn_to_page(pfn);
-		} else {
-			kvm_set_pmt_entry(kvm, base_gfn + i,
-					GPFN_PHYS_MMIO | (pfn << PAGE_SHIFT),
-					_PAGE_MA_UC);
-			memslot->rmap[i] = 0;
-			}
-	}
-
-	return 0;
-}
-
-void kvm_arch_flush_shadow_all(struct kvm *kvm)
-{
-	kvm_flush_remote_tlbs(kvm);
-}
-
-void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
-				   struct kvm_memory_slot *slot)
-{
-	kvm_arch_flush_shadow_all();
-}
-
-long kvm_arch_dev_ioctl(struct file *filp,
-			unsigned int ioctl, unsigned long arg)
-{
-	return -EINVAL;
-}
-
-void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
-{
-	kvm_vcpu_uninit(vcpu);
-}
-
-static int vti_cpu_has_kvm_support(void)
-{
-	long  avail = 1, status = 1, control = 1;
-	long ret;
-
-	ret = ia64_pal_proc_get_features(&avail, &status, &control, 0);
-	if (ret)
-		goto out;
-
-	if (!(avail & PAL_PROC_VM_BIT))
-		goto out;
-
-	printk(KERN_DEBUG"kvm: Hardware Supports VT\n");
-
-	ret = ia64_pal_vp_env_info(&kvm_vm_buffer_size, &vp_env_info);
-	if (ret)
-		goto out;
-	printk(KERN_DEBUG"kvm: VM Buffer Size:0x%lx\n", kvm_vm_buffer_size);
-
-	if (!(vp_env_info & VP_OPCODE)) {
-		printk(KERN_WARNING"kvm: No opcode ability on hardware, "
-				"vm_env_info:0x%lx\n", vp_env_info);
-	}
-
-	return 1;
-out:
-	return 0;
-}
-
-
-/*
- * On SN2, the ITC isn't stable, so copy in fast path code to use the
- * SN2 RTC, replacing the ITC based default verion.
- */
-static void kvm_patch_vmm(struct kvm_vmm_info *vmm_info,
-			  struct module *module)
-{
-	unsigned long new_ar, new_ar_sn2;
-	unsigned long module_base;
-
-	if (!ia64_platform_is("sn2"))
-		return;
-
-	module_base = (unsigned long)module->module_core;
-
-	new_ar = kvm_vmm_base + vmm_info->patch_mov_ar - module_base;
-	new_ar_sn2 = kvm_vmm_base + vmm_info->patch_mov_ar_sn2 - module_base;
-
-	printk(KERN_INFO "kvm: Patching ITC emulation to use SGI SN2 RTC "
-	       "as source\n");
-
-	/*
-	 * Copy the SN2 version of mov_ar into place. They are both
-	 * the same size, so 6 bundles is sufficient (6 * 0x10).
-	 */
-	memcpy((void *)new_ar, (void *)new_ar_sn2, 0x60);
-}
-
-static int kvm_relocate_vmm(struct kvm_vmm_info *vmm_info,
-			    struct module *module)
-{
-	unsigned long module_base;
-	unsigned long vmm_size;
-
-	unsigned long vmm_offset, func_offset, fdesc_offset;
-	struct fdesc *p_fdesc;
-
-	BUG_ON(!module);
-
-	if (!kvm_vmm_base) {
-		printk("kvm: kvm area hasn't been initialized yet!!\n");
-		return -EFAULT;
-	}
-
-	/*Calculate new position of relocated vmm module.*/
-	module_base = (unsigned long)module->module_core;
-	vmm_size = module->core_size;
-	if (unlikely(vmm_size > KVM_VMM_SIZE))
-		return -EFAULT;
-
-	memcpy((void *)kvm_vmm_base, (void *)module_base, vmm_size);
-	kvm_patch_vmm(vmm_info, module);
-	kvm_flush_icache(kvm_vmm_base, vmm_size);
-
-	/*Recalculate kvm_vmm_info based on new VMM*/
-	vmm_offset = vmm_info->vmm_ivt - module_base;
-	kvm_vmm_info->vmm_ivt = KVM_VMM_BASE + vmm_offset;
-	printk(KERN_DEBUG"kvm: Relocated VMM's IVT Base Addr:%lx\n",
-			kvm_vmm_info->vmm_ivt);
-
-	fdesc_offset = (unsigned long)vmm_info->vmm_entry - module_base;
-	kvm_vmm_info->vmm_entry = (kvm_vmm_entry *)(KVM_VMM_BASE +
-							fdesc_offset);
-	func_offset = *(unsigned long *)vmm_info->vmm_entry - module_base;
-	p_fdesc = (struct fdesc *)(kvm_vmm_base + fdesc_offset);
-	p_fdesc->ip = KVM_VMM_BASE + func_offset;
-	p_fdesc->gp = KVM_VMM_BASE+(p_fdesc->gp - module_base);
-
-	printk(KERN_DEBUG"kvm: Relocated VMM's Init Entry Addr:%lx\n",
-			KVM_VMM_BASE+func_offset);
-
-	fdesc_offset = (unsigned long)vmm_info->tramp_entry - module_base;
-	kvm_vmm_info->tramp_entry = (kvm_tramp_entry *)(KVM_VMM_BASE +
-			fdesc_offset);
-	func_offset = *(unsigned long *)vmm_info->tramp_entry - module_base;
-	p_fdesc = (struct fdesc *)(kvm_vmm_base + fdesc_offset);
-	p_fdesc->ip = KVM_VMM_BASE + func_offset;
-	p_fdesc->gp = KVM_VMM_BASE + (p_fdesc->gp - module_base);
-
-	kvm_vmm_gp = p_fdesc->gp;
-
-	printk(KERN_DEBUG"kvm: Relocated VMM's Entry IP:%p\n",
-						kvm_vmm_info->vmm_entry);
-	printk(KERN_DEBUG"kvm: Relocated VMM's Trampoline Entry IP:0x%lx\n",
-						KVM_VMM_BASE + func_offset);
-
-	return 0;
-}
-
-int kvm_arch_init(void *opaque)
-{
-	int r;
-	struct kvm_vmm_info *vmm_info = (struct kvm_vmm_info *)opaque;
-
-	if (!vti_cpu_has_kvm_support()) {
-		printk(KERN_ERR "kvm: No Hardware Virtualization Support!\n");
-		r = -EOPNOTSUPP;
-		goto out;
-	}
-
-	if (kvm_vmm_info) {
-		printk(KERN_ERR "kvm: Already loaded VMM module!\n");
-		r = -EEXIST;
-		goto out;
-	}
-
-	r = -ENOMEM;
-	kvm_vmm_info = kzalloc(sizeof(struct kvm_vmm_info), GFP_KERNEL);
-	if (!kvm_vmm_info)
-		goto out;
-
-	if (kvm_alloc_vmm_area())
-		goto out_free0;
-
-	r = kvm_relocate_vmm(vmm_info, vmm_info->module);
-	if (r)
-		goto out_free1;
-
-	return 0;
-
-out_free1:
-	kvm_free_vmm_area();
-out_free0:
-	kfree(kvm_vmm_info);
-out:
-	return r;
-}
-
-void kvm_arch_exit(void)
-{
-	kvm_free_vmm_area();
-	kfree(kvm_vmm_info);
-	kvm_vmm_info = NULL;
-}
-
-static void kvm_ia64_sync_dirty_log(struct kvm *kvm,
-				    struct kvm_memory_slot *memslot)
-{
-	int i;
-	long base;
-	unsigned long n;
-	unsigned long *dirty_bitmap = (unsigned long *)(kvm->arch.vm_base +
-			offsetof(struct kvm_vm_data, kvm_mem_dirty_log));
-
-	n = kvm_dirty_bitmap_bytes(memslot);
-	base = memslot->base_gfn / BITS_PER_LONG;
-
-	spin_lock(&kvm->arch.dirty_log_lock);
-	for (i = 0; i < n/sizeof(long); ++i) {
-		memslot->dirty_bitmap[i] = dirty_bitmap[base + i];
-		dirty_bitmap[base + i] = 0;
-	}
-	spin_unlock(&kvm->arch.dirty_log_lock);
-}
-
-int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
-		struct kvm_dirty_log *log)
-{
-	int r;
-	unsigned long n;
-	struct kvm_memory_slot *memslot;
-	int is_dirty = 0;
-
-	mutex_lock(&kvm->slots_lock);
-
-	r = -EINVAL;
-	if (log->slot >= KVM_USER_MEM_SLOTS)
-		goto out;
-
-	memslot = id_to_memslot(kvm->memslots, log->slot);
-	r = -ENOENT;
-	if (!memslot->dirty_bitmap)
-		goto out;
-
-	kvm_ia64_sync_dirty_log(kvm, memslot);
-	r = kvm_get_dirty_log(kvm, log, &is_dirty);
-	if (r)
-		goto out;
-
-	/* If nothing is dirty, don't bother messing with page tables. */
-	if (is_dirty) {
-		kvm_flush_remote_tlbs(kvm);
-		n = kvm_dirty_bitmap_bytes(memslot);
-		memset(memslot->dirty_bitmap, 0, n);
-	}
-	r = 0;
-out:
-	mutex_unlock(&kvm->slots_lock);
-	return r;
-}
-
-int kvm_arch_hardware_setup(void)
-{
-	return 0;
-}
-
-int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq)
-{
-	return __apic_accept_irq(vcpu, irq->vector);
-}
-
-int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest)
-{
-	return apic->vcpu->vcpu_id == dest;
-}
-
-int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda)
-{
-	return 0;
-}
-
-int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2)
-{
-	return vcpu1->arch.xtp - vcpu2->arch.xtp;
-}
-
-int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
-		int short_hand, int dest, int dest_mode)
-{
-	struct kvm_lapic *target = vcpu->arch.apic;
-	return (dest_mode == 0) ?
-		kvm_apic_match_physical_addr(target, dest) :
-		kvm_apic_match_logical_addr(target, dest);
-}
-
-static int find_highest_bits(int *dat)
-{
-	u32  bits, bitnum;
-	int i;
-
-	/* loop for all 256 bits */
-	for (i = 7; i >= 0 ; i--) {
-		bits = dat[i];
-		if (bits) {
-			bitnum = fls(bits);
-			return i * 32 + bitnum - 1;
-		}
-	}
-
-	return -1;
-}
-
-int kvm_highest_pending_irq(struct kvm_vcpu *vcpu)
-{
-    struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
-
-    if (vpd->irr[0] & (1UL << NMI_VECTOR))
-		return NMI_VECTOR;
-    if (vpd->irr[0] & (1UL << ExtINT_VECTOR))
-		return ExtINT_VECTOR;
-
-    return find_highest_bits((int *)&vpd->irr[0]);
-}
-
-int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
-{
-	return vcpu->arch.timer_fired;
-}
-
-int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
-{
-	return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE) ||
-		(kvm_highest_pending_irq(vcpu) != -1);
-}
-
-int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
-{
-	return (!test_and_set_bit(KVM_REQ_KICK, &vcpu->requests));
-}
-
-int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
-				    struct kvm_mp_state *mp_state)
-{
-	mp_state->mp_state = vcpu->arch.mp_state;
-	return 0;
-}
-
-static int vcpu_reset(struct kvm_vcpu *vcpu)
-{
-	int r;
-	long psr;
-	local_irq_save(psr);
-	r = kvm_insert_vmm_mapping(vcpu);
-	local_irq_restore(psr);
-	if (r)
-		goto fail;
-
-	vcpu->arch.launched = 0;
-	kvm_arch_vcpu_uninit(vcpu);
-	r = kvm_arch_vcpu_init(vcpu);
-	if (r)
-		goto fail;
-
-	kvm_purge_vmm_mapping(vcpu);
-	r = 0;
-fail:
-	return r;
-}
-
-int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
-				    struct kvm_mp_state *mp_state)
-{
-	int r = 0;
-
-	vcpu->arch.mp_state = mp_state->mp_state;
-	if (vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)
-		r = vcpu_reset(vcpu);
-	return r;
-}
diff --git a/arch/ia64/kvm/kvm_fw.c b/arch/ia64/kvm/kvm_fw.c
deleted file mode 100644
index cb548ee9fca..00000000000
--- a/arch/ia64/kvm/kvm_fw.c
+++ /dev/null
@@ -1,674 +0,0 @@
-/*
- * PAL/SAL call delegation
- *
- * Copyright (c) 2004 Li Susie <susie.li@intel.com>
- * Copyright (c) 2005 Yu Ke <ke.yu@intel.com>
- * Copyright (c) 2007 Xiantao Zhang <xiantao.zhang@intel.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- */
-
-#include <linux/kvm_host.h>
-#include <linux/smp.h>
-#include <asm/sn/addrs.h>
-#include <asm/sn/clksupport.h>
-#include <asm/sn/shub_mmr.h>
-
-#include "vti.h"
-#include "misc.h"
-
-#include <asm/pal.h>
-#include <asm/sal.h>
-#include <asm/tlb.h>
-
-/*
- * Handy macros to make sure that the PAL return values start out
- * as something meaningful.
- */
-#define INIT_PAL_STATUS_UNIMPLEMENTED(x)		\
-	{						\
-		x.status = PAL_STATUS_UNIMPLEMENTED;	\
-		x.v0 = 0;				\
-		x.v1 = 0;				\
-		x.v2 = 0;				\
-	}
-
-#define INIT_PAL_STATUS_SUCCESS(x)			\
-	{						\
-		x.status = PAL_STATUS_SUCCESS;		\
-		x.v0 = 0;				\
-		x.v1 = 0;				\
-		x.v2 = 0;				\
-    }
-
-static void kvm_get_pal_call_data(struct kvm_vcpu *vcpu,
-		u64 *gr28, u64 *gr29, u64 *gr30, u64 *gr31) {
-	struct exit_ctl_data *p;
-
-	if (vcpu) {
-		p = &vcpu->arch.exit_data;
-		if (p->exit_reason == EXIT_REASON_PAL_CALL) {
-			*gr28 = p->u.pal_data.gr28;
-			*gr29 = p->u.pal_data.gr29;
-			*gr30 = p->u.pal_data.gr30;
-			*gr31 = p->u.pal_data.gr31;
-			return ;
-		}
-	}
-	printk(KERN_DEBUG"Failed to get vcpu pal data!!!\n");
-}
-
-static void set_pal_result(struct kvm_vcpu *vcpu,
-		struct ia64_pal_retval result) {
-
-	struct exit_ctl_data *p;
-
-	p = kvm_get_exit_data(vcpu);
-	if (p->exit_reason == EXIT_REASON_PAL_CALL) {
-		p->u.pal_data.ret = result;
-		return ;
-	}
-	INIT_PAL_STATUS_UNIMPLEMENTED(p->u.pal_data.ret);
-}
-
-static void set_sal_result(struct kvm_vcpu *vcpu,
-		struct sal_ret_values result) {
-	struct exit_ctl_data *p;
-
-	p = kvm_get_exit_data(vcpu);
-	if (p->exit_reason == EXIT_REASON_SAL_CALL) {
-		p->u.sal_data.ret = result;
-		return ;
-	}
-	printk(KERN_WARNING"Failed to set sal result!!\n");
-}
-
-struct cache_flush_args {
-	u64 cache_type;
-	u64 operation;
-	u64 progress;
-	long status;
-};
-
-cpumask_t cpu_cache_coherent_map;
-
-static void remote_pal_cache_flush(void *data)
-{
-	struct cache_flush_args *args = data;
-	long status;
-	u64 progress = args->progress;
-
-	status = ia64_pal_cache_flush(args->cache_type, args->operation,
-					&progress, NULL);
-	if (status != 0)
-	args->status = status;
-}
-
-static struct ia64_pal_retval pal_cache_flush(struct kvm_vcpu *vcpu)
-{
-	u64 gr28, gr29, gr30, gr31;
-	struct ia64_pal_retval result = {0, 0, 0, 0};
-	struct cache_flush_args args = {0, 0, 0, 0};
-	long psr;
-
-	gr28 = gr29 = gr30 = gr31 = 0;
-	kvm_get_pal_call_data(vcpu, &gr28, &gr29, &gr30, &gr31);
-
-	if (gr31 != 0)
-		printk(KERN_ERR"vcpu:%p called cache_flush error!\n", vcpu);
-
-	/* Always call Host Pal in int=1 */
-	gr30 &= ~PAL_CACHE_FLUSH_CHK_INTRS;
-	args.cache_type = gr29;
-	args.operation = gr30;
-	smp_call_function(remote_pal_cache_flush,
-				(void *)&args, 1);
-	if (args.status != 0)
-		printk(KERN_ERR"pal_cache_flush error!,"
-				"status:0x%lx\n", args.status);
-	/*
-	 * Call Host PAL cache flush
-	 * Clear psr.ic when call PAL_CACHE_FLUSH
-	 */
-	local_irq_save(psr);
-	result.status = ia64_pal_cache_flush(gr29, gr30, &result.v1,
-						&result.v0);
-	local_irq_restore(psr);
-	if (result.status != 0)
-		printk(KERN_ERR"vcpu:%p crashed due to cache_flush err:%ld"
-				"in1:%lx,in2:%lx\n",
-				vcpu, result.status, gr29, gr30);
-
-#if 0
-	if (gr29 == PAL_CACHE_TYPE_COHERENT) {
-		cpus_setall(vcpu->arch.cache_coherent_map);
-		cpu_clear(vcpu->cpu, vcpu->arch.cache_coherent_map);
-		cpus_setall(cpu_cache_coherent_map);
-		cpu_clear(vcpu->cpu, cpu_cache_coherent_map);
-	}
-#endif
-	return result;
-}
-
-struct ia64_pal_retval pal_cache_summary(struct kvm_vcpu *vcpu)
-{
-
-	struct ia64_pal_retval result;
-
-	PAL_CALL(result, PAL_CACHE_SUMMARY, 0, 0, 0);
-	return result;
-}
-
-static struct ia64_pal_retval pal_freq_base(struct kvm_vcpu *vcpu)
-{
-
-	struct ia64_pal_retval result;
-
-	PAL_CALL(result, PAL_FREQ_BASE, 0, 0, 0);
-
-	/*
-	 * PAL_FREQ_BASE may not be implemented in some platforms,
-	 * call SAL instead.
-	 */
-	if (result.v0 == 0) {
-		result.status = ia64_sal_freq_base(SAL_FREQ_BASE_PLATFORM,
-							&result.v0,
-							&result.v1);
-		result.v2 = 0;
-	}
-
-	return result;
-}
-
-/*
- * On the SGI SN2, the ITC isn't stable. Emulation backed by the SN2
- * RTC is used instead. This function patches the ratios from SAL
- * to match the RTC before providing them to the guest.
- */
-static void sn2_patch_itc_freq_ratios(struct ia64_pal_retval *result)
-{
-	struct pal_freq_ratio *ratio;
-	unsigned long sal_freq, sal_drift, factor;
-
-	result->status = ia64_sal_freq_base(SAL_FREQ_BASE_PLATFORM,
-					    &sal_freq, &sal_drift);
-	ratio = (struct pal_freq_ratio *)&result->v2;
-	factor = ((sal_freq * 3) + (sn_rtc_cycles_per_second / 2)) /
-		sn_rtc_cycles_per_second;
-
-	ratio->num = 3;
-	ratio->den = factor;
-}
-
-static struct ia64_pal_retval pal_freq_ratios(struct kvm_vcpu *vcpu)
-{
-	struct ia64_pal_retval result;
-
-	PAL_CALL(result, PAL_FREQ_RATIOS, 0, 0, 0);
-
-	if (vcpu->kvm->arch.is_sn2)
-		sn2_patch_itc_freq_ratios(&result);
-
-	return result;
-}
-
-static struct ia64_pal_retval pal_logical_to_physica(struct kvm_vcpu *vcpu)
-{
-	struct ia64_pal_retval result;
-
-	INIT_PAL_STATUS_UNIMPLEMENTED(result);
-	return result;
-}
-
-static struct ia64_pal_retval pal_platform_addr(struct kvm_vcpu *vcpu)
-{
-
-	struct ia64_pal_retval result;
-
-	INIT_PAL_STATUS_SUCCESS(result);
-	return result;
-}
-
-static struct ia64_pal_retval pal_proc_get_features(struct kvm_vcpu *vcpu)
-{
-
-	struct ia64_pal_retval result = {0, 0, 0, 0};
-	long in0, in1, in2, in3;
-
-	kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3);
-	result.status = ia64_pal_proc_get_features(&result.v0, &result.v1,
-			&result.v2, in2);
-
-	return result;
-}
-
-static struct ia64_pal_retval pal_register_info(struct kvm_vcpu *vcpu)
-{
-
-	struct ia64_pal_retval result = {0, 0, 0, 0};
-	long in0, in1, in2, in3;
-
-	kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3);
-	result.status = ia64_pal_register_info(in1, &result.v1, &result.v2);
-
-	return result;
-}
-
-static struct ia64_pal_retval pal_cache_info(struct kvm_vcpu *vcpu)
-{
-
-	pal_cache_config_info_t ci;
-	long status;
-	unsigned long in0, in1, in2, in3, r9, r10;
-
-	kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3);
-	status = ia64_pal_cache_config_info(in1, in2, &ci);
-	r9 = ci.pcci_info_1.pcci1_data;
-	r10 = ci.pcci_info_2.pcci2_data;
-	return ((struct ia64_pal_retval){status, r9, r10, 0});
-}
-
-#define GUEST_IMPL_VA_MSB	59
-#define GUEST_RID_BITS		18
-
-static struct ia64_pal_retval pal_vm_summary(struct kvm_vcpu *vcpu)
-{
-
-	pal_vm_info_1_u_t vminfo1;
-	pal_vm_info_2_u_t vminfo2;
-	struct ia64_pal_retval result;
-
-	PAL_CALL(result, PAL_VM_SUMMARY, 0, 0, 0);
-	if (!result.status) {
-		vminfo1.pvi1_val = result.v0;
-		vminfo1.pal_vm_info_1_s.max_itr_entry = 8;
-		vminfo1.pal_vm_info_1_s.max_dtr_entry = 8;
-		result.v0 = vminfo1.pvi1_val;
-		vminfo2.pal_vm_info_2_s.impl_va_msb = GUEST_IMPL_VA_MSB;
-		vminfo2.pal_vm_info_2_s.rid_size = GUEST_RID_BITS;
-		result.v1 = vminfo2.pvi2_val;
-	}
-
-	return result;
-}
-
-static struct ia64_pal_retval pal_vm_info(struct kvm_vcpu *vcpu)
-{
-	struct ia64_pal_retval result;
-	unsigned long in0, in1, in2, in3;
-
-	kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3);
-
-	result.status = ia64_pal_vm_info(in1, in2,
-			(pal_tc_info_u_t *)&result.v1, &result.v2);
-
-	return result;
-}
-
-static  u64 kvm_get_pal_call_index(struct kvm_vcpu *vcpu)
-{
-	u64 index = 0;
-	struct exit_ctl_data *p;
-
-	p = kvm_get_exit_data(vcpu);
-	if (p->exit_reason == EXIT_REASON_PAL_CALL)
-		index = p->u.pal_data.gr28;
-
-	return index;
-}
-
-static void prepare_for_halt(struct kvm_vcpu *vcpu)
-{
-	vcpu->arch.timer_pending = 1;
-	vcpu->arch.timer_fired = 0;
-}
-
-static struct ia64_pal_retval pal_perf_mon_info(struct kvm_vcpu *vcpu)
-{
-	long status;
-	unsigned long in0, in1, in2, in3, r9;
-	unsigned long pm_buffer[16];
-
-	kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3);
-	status = ia64_pal_perf_mon_info(pm_buffer,
-				(pal_perf_mon_info_u_t *) &r9);
-	if (status != 0) {
-		printk(KERN_DEBUG"PAL_PERF_MON_INFO fails ret=%ld\n", status);
-	} else {
-		if (in1)
-			memcpy((void *)in1, pm_buffer, sizeof(pm_buffer));
-		else {
-			status = PAL_STATUS_EINVAL;
-			printk(KERN_WARNING"Invalid parameters "
-						"for PAL call:0x%lx!\n", in0);
-		}
-	}
-	return (struct ia64_pal_retval){status, r9, 0, 0};
-}
-
-static struct ia64_pal_retval pal_halt_info(struct kvm_vcpu *vcpu)
-{
-	unsigned long in0, in1, in2, in3;
-	long status;
-	unsigned long res = 1000UL | (1000UL << 16) | (10UL << 32)
-					| (1UL << 61) | (1UL << 60);
-
-	kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3);
-	if (in1) {
-		memcpy((void *)in1, &res, sizeof(res));
-		status = 0;
-	} else{
-		status = PAL_STATUS_EINVAL;
-		printk(KERN_WARNING"Invalid parameters "
-					"for PAL call:0x%lx!\n", in0);
-	}
-
-	return (struct ia64_pal_retval){status, 0, 0, 0};
-}
-
-static struct ia64_pal_retval pal_mem_attrib(struct kvm_vcpu *vcpu)
-{
-	unsigned long r9;
-	long status;
-
-	status = ia64_pal_mem_attrib(&r9);
-
-	return (struct ia64_pal_retval){status, r9, 0, 0};
-}
-
-static void remote_pal_prefetch_visibility(void *v)
-{
-	s64 trans_type = (s64)v;
-	ia64_pal_prefetch_visibility(trans_type);
-}
-
-static struct ia64_pal_retval pal_prefetch_visibility(struct kvm_vcpu *vcpu)
-{
-	struct ia64_pal_retval result = {0, 0, 0, 0};
-	unsigned long in0, in1, in2, in3;
-	kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3);
-	result.status = ia64_pal_prefetch_visibility(in1);
-	if (result.status == 0) {
-		/* Must be performed on all remote processors
-		in the coherence domain. */
-		smp_call_function(remote_pal_prefetch_visibility,
-					(void *)in1, 1);
-		/* Unnecessary on remote processor for other vcpus!*/
-		result.status = 1;
-	}
-	return result;
-}
-
-static void remote_pal_mc_drain(void *v)
-{
-	ia64_pal_mc_drain();
-}
-
-static struct ia64_pal_retval pal_get_brand_info(struct kvm_vcpu *vcpu)
-{
-	struct ia64_pal_retval result = {0, 0, 0, 0};
-	unsigned long in0, in1, in2, in3;
-
-	kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3);
-
-	if (in1 == 0 && in2) {
-		char brand_info[128];
-		result.status = ia64_pal_get_brand_info(brand_info);
-		if (result.status == PAL_STATUS_SUCCESS)
-			memcpy((void *)in2, brand_info, 128);
-	} else {
-		result.status = PAL_STATUS_REQUIRES_MEMORY;
-		printk(KERN_WARNING"Invalid parameters for "
-					"PAL call:0x%lx!\n", in0);
-	}
-
-	return result;
-}
-
-int kvm_pal_emul(struct kvm_vcpu *vcpu, struct kvm_run *run)
-{
-
-	u64 gr28;
-	struct ia64_pal_retval result;
-	int ret = 1;
-
-	gr28 = kvm_get_pal_call_index(vcpu);
-	switch (gr28) {
-	case PAL_CACHE_FLUSH:
-		result = pal_cache_flush(vcpu);
-		break;
-	case PAL_MEM_ATTRIB:
-		result = pal_mem_attrib(vcpu);
-		break;
-	case PAL_CACHE_SUMMARY:
-		result = pal_cache_summary(vcpu);
-		break;
-	case PAL_PERF_MON_INFO:
-		result = pal_perf_mon_info(vcpu);
-		break;
-	case PAL_HALT_INFO:
-		result = pal_halt_info(vcpu);
-		break;
-	case PAL_HALT_LIGHT:
-	{
-		INIT_PAL_STATUS_SUCCESS(result);
-		prepare_for_halt(vcpu);
-		if (kvm_highest_pending_irq(vcpu) == -1)
-			ret = kvm_emulate_halt(vcpu);
-	}
-		break;
-
-	case PAL_PREFETCH_VISIBILITY:
-		result = pal_prefetch_visibility(vcpu);
-		break;
-	case PAL_MC_DRAIN:
-		result.status = ia64_pal_mc_drain();
-		/* FIXME: All vcpus likely call PAL_MC_DRAIN.
-		   That causes the congestion. */
-		smp_call_function(remote_pal_mc_drain, NULL, 1);
-		break;
-
-	case PAL_FREQ_RATIOS:
-		result = pal_freq_ratios(vcpu);
-		break;
-
-	case PAL_FREQ_BASE:
-		result = pal_freq_base(vcpu);
-		break;
-
-	case PAL_LOGICAL_TO_PHYSICAL :
-		result = pal_logical_to_physica(vcpu);
-		break;
-
-	case PAL_VM_SUMMARY :
-		result = pal_vm_summary(vcpu);
-		break;
-
-	case PAL_VM_INFO :
-		result = pal_vm_info(vcpu);
-		break;
-	case PAL_PLATFORM_ADDR :
-		result = pal_platform_addr(vcpu);
-		break;
-	case PAL_CACHE_INFO:
-		result = pal_cache_info(vcpu);
-		break;
-	case PAL_PTCE_INFO:
-		INIT_PAL_STATUS_SUCCESS(result);
-		result.v1 = (1L << 32) | 1L;
-		break;
-	case PAL_REGISTER_INFO:
-		result = pal_register_info(vcpu);
-		break;
-	case PAL_VM_PAGE_SIZE:
-		result.status = ia64_pal_vm_page_size(&result.v0,
-							&result.v1);
-		break;
-	case PAL_RSE_INFO:
-		result.status = ia64_pal_rse_info(&result.v0,
-					(pal_hints_u_t *)&result.v1);
-		break;
-	case PAL_PROC_GET_FEATURES:
-		result = pal_proc_get_features(vcpu);
-		break;
-	case PAL_DEBUG_INFO:
-		result.status = ia64_pal_debug_info(&result.v0,
-							&result.v1);
-		break;
-	case PAL_VERSION:
-		result.status = ia64_pal_version(
-				(pal_version_u_t *)&result.v0,
-				(pal_version_u_t *)&result.v1);
-		break;
-	case PAL_FIXED_ADDR:
-		result.status = PAL_STATUS_SUCCESS;
-		result.v0 = vcpu->vcpu_id;
-		break;
-	case PAL_BRAND_INFO:
-		result = pal_get_brand_info(vcpu);
-		break;
-	case PAL_GET_PSTATE:
-	case PAL_CACHE_SHARED_INFO:
-		INIT_PAL_STATUS_UNIMPLEMENTED(result);
-		break;
-	default:
-		INIT_PAL_STATUS_UNIMPLEMENTED(result);
-		printk(KERN_WARNING"kvm: Unsupported pal call,"
-					" index:0x%lx\n", gr28);
-	}
-	set_pal_result(vcpu, result);
-	return ret;
-}
-
-static struct sal_ret_values sal_emulator(struct kvm *kvm,
-				long index, unsigned long in1,
-				unsigned long in2, unsigned long in3,
-				unsigned long in4, unsigned long in5,
-				unsigned long in6, unsigned long in7)
-{
-	unsigned long r9  = 0;
-	unsigned long r10 = 0;
-	long r11 = 0;
-	long status;
-
-	status = 0;
-	switch (index) {
-	case SAL_FREQ_BASE:
-		status = ia64_sal_freq_base(in1, &r9, &r10);
-		break;
-	case SAL_PCI_CONFIG_READ:
-		printk(KERN_WARNING"kvm: Not allowed to call here!"
-			" SAL_PCI_CONFIG_READ\n");
-		break;
-	case SAL_PCI_CONFIG_WRITE:
-		printk(KERN_WARNING"kvm: Not allowed to call here!"
-			" SAL_PCI_CONFIG_WRITE\n");
-		break;
-	case SAL_SET_VECTORS:
-		if (in1 == SAL_VECTOR_OS_BOOT_RENDEZ) {
-			if (in4 != 0 || in5 != 0 || in6 != 0 || in7 != 0) {
-				status = -2;
-			} else {
-				kvm->arch.rdv_sal_data.boot_ip = in2;
-				kvm->arch.rdv_sal_data.boot_gp = in3;
-			}
-			printk("Rendvous called! iip:%lx\n\n", in2);
-		} else
-			printk(KERN_WARNING"kvm: CALLED SAL_SET_VECTORS %lu."
-							"ignored...\n", in1);
-		break;
-	case SAL_GET_STATE_INFO:
-		/* No more info.  */
-		status = -5;
-		r9 = 0;
-		break;
-	case SAL_GET_STATE_INFO_SIZE:
-		/* Return a dummy size.  */
-		status = 0;
-		r9 = 128;
-		break;
-	case SAL_CLEAR_STATE_INFO:
-		/* Noop.  */
-		break;
-	case SAL_MC_RENDEZ:
-		printk(KERN_WARNING
-			"kvm: called SAL_MC_RENDEZ. ignored...\n");
-		break;
-	case SAL_MC_SET_PARAMS:
-		printk(KERN_WARNING
-			"kvm: called  SAL_MC_SET_PARAMS.ignored!\n");
-		break;
-	case SAL_CACHE_FLUSH:
-		if (1) {
-			/*Flush using SAL.
-			This method is faster but has a side
-			effect on other vcpu running on
-			this cpu.  */
-			status = ia64_sal_cache_flush(in1);
-		} else {
-			/*Maybe need to implement the method
-			without side effect!*/
-			status = 0;
-		}
-		break;
-	case SAL_CACHE_INIT:
-		printk(KERN_WARNING
-			"kvm: called SAL_CACHE_INIT.  ignored...\n");
-		break;
-	case SAL_UPDATE_PAL:
-		printk(KERN_WARNING
-			"kvm: CALLED SAL_UPDATE_PAL.  ignored...\n");
-		break;
-	default:
-		printk(KERN_WARNING"kvm: called SAL_CALL with unknown index."
-						" index:%ld\n", index);
-		status = -1;
-		break;
-	}
-	return ((struct sal_ret_values) {status, r9, r10, r11});
-}
-
-static void kvm_get_sal_call_data(struct kvm_vcpu *vcpu, u64 *in0, u64 *in1,
-		u64 *in2, u64 *in3, u64 *in4, u64 *in5, u64 *in6, u64 *in7){
-
-	struct exit_ctl_data *p;
-
-	p = kvm_get_exit_data(vcpu);
-
-	if (p->exit_reason == EXIT_REASON_SAL_CALL) {
-		*in0 = p->u.sal_data.in0;
-		*in1 = p->u.sal_data.in1;
-		*in2 = p->u.sal_data.in2;
-		*in3 = p->u.sal_data.in3;
-		*in4 = p->u.sal_data.in4;
-		*in5 = p->u.sal_data.in5;
-		*in6 = p->u.sal_data.in6;
-		*in7 = p->u.sal_data.in7;
-		return ;
-	}
-	*in0 = 0;
-}
-
-void kvm_sal_emul(struct kvm_vcpu *vcpu)
-{
-
-	struct sal_ret_values result;
-	u64 index, in1, in2, in3, in4, in5, in6, in7;
-
-	kvm_get_sal_call_data(vcpu, &index, &in1, &in2,
-			&in3, &in4, &in5, &in6, &in7);
-	result = sal_emulator(vcpu->kvm, index, in1, in2, in3,
-					in4, in5, in6, in7);
-	set_sal_result(vcpu, result);
-}
diff --git a/arch/ia64/kvm/kvm_lib.c b/arch/ia64/kvm/kvm_lib.c
deleted file mode 100644
index f1268b8e6f9..00000000000
--- a/arch/ia64/kvm/kvm_lib.c
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * kvm_lib.c: Compile some libraries for kvm-intel module.
- *
- *	Just include kernel's library, and disable symbols export.
- * 	Copyright (C) 2008, Intel Corporation.
- *  	Xiantao Zhang  (xiantao.zhang@intel.com)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- */
-#undef CONFIG_MODULES
-#include <linux/module.h>
-#undef CONFIG_KALLSYMS
-#undef EXPORT_SYMBOL
-#undef EXPORT_SYMBOL_GPL
-#define EXPORT_SYMBOL(sym)
-#define EXPORT_SYMBOL_GPL(sym)
-#include "../../../lib/vsprintf.c"
-#include "../../../lib/ctype.c"
diff --git a/arch/ia64/kvm/kvm_minstate.h b/arch/ia64/kvm/kvm_minstate.h
deleted file mode 100644
index b2bcaa2787a..00000000000
--- a/arch/ia64/kvm/kvm_minstate.h
+++ /dev/null
@@ -1,266 +0,0 @@
-/*
- *  kvm_minstate.h: min save macros
- *  Copyright (c) 2007, Intel Corporation.
- *
- *  Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com)
- *  Xiantao Zhang (xiantao.zhang@intel.com)
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- */
-
-
-#include <asm/asmmacro.h>
-#include <asm/types.h>
-#include <asm/kregs.h>
-#include <asm/kvm_host.h>
-
-#include "asm-offsets.h"
-
-#define KVM_MINSTATE_START_SAVE_MIN	     					\
-	mov ar.rsc = 0;/* set enforced lazy mode, pl 0, little-endian, loadrs=0 */\
-	;;									\
-	mov.m r28 = ar.rnat;                                  			\
-	addl r22 = VMM_RBS_OFFSET,r1;            /* compute base of RBS */	\
-	;;									\
-	lfetch.fault.excl.nt1 [r22];						\
-	addl r1 = KVM_STK_OFFSET-VMM_PT_REGS_SIZE, r1;  \
-	mov r23 = ar.bspstore;			/* save ar.bspstore */          \
-	;;									\
-	mov ar.bspstore = r22;				/* switch to kernel RBS */\
-	;;									\
-	mov r18 = ar.bsp;							\
-	mov ar.rsc = 0x3;     /* set eager mode, pl 0, little-endian, loadrs=0 */
-
-
-
-#define KVM_MINSTATE_END_SAVE_MIN						\
-	bsw.1;          /* switch back to bank 1 (must be last in insn group) */\
-	;;
-
-
-#define PAL_VSA_SYNC_READ						\
-	/* begin to call pal vps sync_read */				\
-{.mii;									\
-	add r25 = VMM_VPD_BASE_OFFSET, r21;				\
-	nop 0x0;							\
-	mov r24=ip;							\
-	;;								\
-}									\
-{.mmb									\
-	add r24=0x20, r24;						\
-	ld8 r25 = [r25];      /* read vpd base */			\
-	br.cond.sptk kvm_vps_sync_read;		/*call the service*/	\
-	;;								\
-};									\
-
-
-#define KVM_MINSTATE_GET_CURRENT(reg)   mov reg=r21
-
-/*
- * KVM_DO_SAVE_MIN switches to the kernel stacks (if necessary) and saves
- * the minimum state necessary that allows us to turn psr.ic back
- * on.
- *
- * Assumed state upon entry:
- *  psr.ic: off
- *  r31:	contains saved predicates (pr)
- *
- * Upon exit, the state is as follows:
- *  psr.ic: off
- *   r2 = points to &pt_regs.r16
- *   r8 = contents of ar.ccv
- *   r9 = contents of ar.csd
- *  r10 = contents of ar.ssd
- *  r11 = FPSR_DEFAULT
- *  r12 = kernel sp (kernel virtual address)
- *  r13 = points to current task_struct (kernel virtual address)
- *  p15 = TRUE if psr.i is set in cr.ipsr
- *  predicate registers (other than p2, p3, and p15), b6, r3, r14, r15:
- *	  preserved
- *
- * Note that psr.ic is NOT turned on by this macro.  This is so that
- * we can pass interruption state as arguments to a handler.
- */
-
-
-#define PT(f) (VMM_PT_REGS_##f##_OFFSET)
-
-#define KVM_DO_SAVE_MIN(COVER,SAVE_IFS,EXTRA)			\
-	KVM_MINSTATE_GET_CURRENT(r16);  /* M (or M;;I) */	\
-	mov r27 = ar.rsc;         /* M */			\
-	mov r20 = r1;         /* A */				\
-	mov r25 = ar.unat;        /* M */			\
-	mov r29 = cr.ipsr;        /* M */			\
-	mov r26 = ar.pfs;         /* I */			\
-	mov r18 = cr.isr;         				\
-	COVER;              /* B;; (or nothing) */		\
-	;;							\
-	tbit.z p0,p15 = r29,IA64_PSR_I_BIT;			\
-	mov r1 = r16;						\
-/*	mov r21=r16;	*/					\
-	/* switch from user to kernel RBS: */			\
-	;;							\
-	invala;             /* M */				\
-	SAVE_IFS;						\
-	;;							\
-	KVM_MINSTATE_START_SAVE_MIN				\
-	adds r17 = 2*L1_CACHE_BYTES,r1;/* cache-line size */	\
-	adds r16 = PT(CR_IPSR),r1;				\
-	;;							\
-	lfetch.fault.excl.nt1 [r17],L1_CACHE_BYTES;		\
-	st8 [r16] = r29;      /* save cr.ipsr */		\
-	;;							\
-	lfetch.fault.excl.nt1 [r17];				\
-	tbit.nz p15,p0 = r29,IA64_PSR_I_BIT;			\
-	mov r29 = b0						\
-	;;							\
-	adds r16 = PT(R8),r1; /* initialize first base pointer */\
-	adds r17 = PT(R9),r1; /* initialize second base pointer */\
-	;;							\
-.mem.offset 0,0; st8.spill [r16] = r8,16;			\
-.mem.offset 8,0; st8.spill [r17] = r9,16;			\
-	;;							\
-.mem.offset 0,0; st8.spill [r16] = r10,24;			\
-.mem.offset 8,0; st8.spill [r17] = r11,24;			\
-	;;							\
-	mov r9 = cr.iip;         /* M */			\
-	mov r10 = ar.fpsr;        /* M */			\
-	;;							\
-	st8 [r16] = r9,16;    /* save cr.iip */			\
-	st8 [r17] = r30,16;   /* save cr.ifs */			\
-	sub r18 = r18,r22;    /* r18=RSE.ndirty*8 */		\
-	;;							\
-	st8 [r16] = r25,16;   /* save ar.unat */		\
-	st8 [r17] = r26,16;    /* save ar.pfs */		\
-	shl r18 = r18,16;     /* calu ar.rsc used for "loadrs" */\
-	;;							\
-	st8 [r16] = r27,16;   /* save ar.rsc */			\
-	st8 [r17] = r28,16;   /* save ar.rnat */		\
-	;;          /* avoid RAW on r16 & r17 */		\
-	st8 [r16] = r23,16;   /* save ar.bspstore */		\
-	st8 [r17] = r31,16;   /* save predicates */		\
-	;;							\
-	st8 [r16] = r29,16;   /* save b0 */			\
-	st8 [r17] = r18,16;   /* save ar.rsc value for "loadrs" */\
-	;;							\
-.mem.offset 0,0; st8.spill [r16] = r20,16;/* save original r1 */  \
-.mem.offset 8,0; st8.spill [r17] = r12,16;			\
-	adds r12 = -16,r1;    /* switch to kernel memory stack */  \
-	;;							\
-.mem.offset 0,0; st8.spill [r16] = r13,16;			\
-.mem.offset 8,0; st8.spill [r17] = r10,16;	/* save ar.fpsr */\
-	mov r13 = r21;   /* establish `current' */		\
-	;;							\
-.mem.offset 0,0; st8.spill [r16] = r15,16;			\
-.mem.offset 8,0; st8.spill [r17] = r14,16;			\
-	;;							\
-.mem.offset 0,0; st8.spill [r16] = r2,16;			\
-.mem.offset 8,0; st8.spill [r17] = r3,16;			\
-	adds r2 = VMM_PT_REGS_R16_OFFSET,r1;			\
-	 ;;							\
-	adds r16 = VMM_VCPU_IIPA_OFFSET,r13;			\
-	adds r17 = VMM_VCPU_ISR_OFFSET,r13;			\
-	mov r26 = cr.iipa;					\
-	mov r27 = cr.isr;					\
-	;;							\
-	st8 [r16] = r26;					\
-	st8 [r17] = r27;					\
-	;;							\
-	EXTRA;							\
-	mov r8 = ar.ccv;					\
-	mov r9 = ar.csd;					\
-	mov r10 = ar.ssd;					\
-	movl r11 = FPSR_DEFAULT;   /* L-unit */			\
-	adds r17 = VMM_VCPU_GP_OFFSET,r13;			\
-	;;							\
-	ld8 r1 = [r17];/* establish kernel global pointer */	\
-	;;							\
-	PAL_VSA_SYNC_READ					\
-	KVM_MINSTATE_END_SAVE_MIN
-
-/*
- * SAVE_REST saves the remainder of pt_regs (with psr.ic on).
- *
- * Assumed state upon entry:
- *  psr.ic: on
- *  r2: points to &pt_regs.f6
- *  r3: points to &pt_regs.f7
- *  r8: contents of ar.ccv
- *  r9: contents of ar.csd
- *  r10:	contents of ar.ssd
- *  r11:	FPSR_DEFAULT
- *
- * Registers r14 and r15 are guaranteed not to be touched by SAVE_REST.
- */
-#define KVM_SAVE_REST				\
-.mem.offset 0,0; st8.spill [r2] = r16,16;	\
-.mem.offset 8,0; st8.spill [r3] = r17,16;	\
-	;;				\
-.mem.offset 0,0; st8.spill [r2] = r18,16;	\
-.mem.offset 8,0; st8.spill [r3] = r19,16;	\
-	;;				\
-.mem.offset 0,0; st8.spill [r2] = r20,16;	\
-.mem.offset 8,0; st8.spill [r3] = r21,16;	\
-	mov r18=b6;			\
-	;;				\
-.mem.offset 0,0; st8.spill [r2] = r22,16;	\
-.mem.offset 8,0; st8.spill [r3] = r23,16;	\
-	mov r19 = b7;				\
-	;;					\
-.mem.offset 0,0; st8.spill [r2] = r24,16;	\
-.mem.offset 8,0; st8.spill [r3] = r25,16;	\
-	;;					\
-.mem.offset 0,0; st8.spill [r2] = r26,16;	\
-.mem.offset 8,0; st8.spill [r3] = r27,16;	\
-	;;					\
-.mem.offset 0,0; st8.spill [r2] = r28,16;	\
-.mem.offset 8,0; st8.spill [r3] = r29,16;	\
-	;;					\
-.mem.offset 0,0; st8.spill [r2] = r30,16;	\
-.mem.offset 8,0; st8.spill [r3] = r31,32;	\
-	;;					\
-	mov ar.fpsr = r11;			\
-	st8 [r2] = r8,8;			\
-	adds r24 = PT(B6)-PT(F7),r3;		\
-	adds r25 = PT(B7)-PT(F7),r3;		\
-	;;					\
-	st8 [r24] = r18,16;       /* b6 */	\
-	st8 [r25] = r19,16;       /* b7 */	\
-	adds r2 = PT(R4)-PT(F6),r2;		\
-	adds r3 = PT(R5)-PT(F7),r3;		\
-	;;					\
-	st8 [r24] = r9;	/* ar.csd */		\
-	st8 [r25] = r10;	/* ar.ssd */	\
-	;;					\
-	mov r18 = ar.unat;			\
-	adds r19 = PT(EML_UNAT)-PT(R4),r2;	\
-	;;					\
-	st8 [r19] = r18; /* eml_unat */ 	\
-
-
-#define KVM_SAVE_EXTRA				\
-.mem.offset 0,0; st8.spill [r2] = r4,16;	\
-.mem.offset 8,0; st8.spill [r3] = r5,16;	\
-	;;					\
-.mem.offset 0,0; st8.spill [r2] = r6,16;	\
-.mem.offset 8,0; st8.spill [r3] = r7;		\
-	;;					\
-	mov r26 = ar.unat;			\
-	;;					\
-	st8 [r2] = r26;/* eml_unat */ 		\
-
-#define KVM_SAVE_MIN_WITH_COVER		KVM_DO_SAVE_MIN(cover, mov r30 = cr.ifs,)
-#define KVM_SAVE_MIN_WITH_COVER_R19	KVM_DO_SAVE_MIN(cover, mov r30 = cr.ifs, mov r15 = r19)
-#define KVM_SAVE_MIN			KVM_DO_SAVE_MIN(     , mov r30 = r0, )
diff --git a/arch/ia64/kvm/lapic.h b/arch/ia64/kvm/lapic.h
deleted file mode 100644
index c5f92a926a9..00000000000
--- a/arch/ia64/kvm/lapic.h
+++ /dev/null
@@ -1,30 +0,0 @@
-#ifndef __KVM_IA64_LAPIC_H
-#define __KVM_IA64_LAPIC_H
-
-#include <linux/kvm_host.h>
-
-/*
- * vlsapic
- */
-struct kvm_lapic{
-	struct kvm_vcpu *vcpu;
-	uint64_t insvc[4];
-	uint64_t vhpi;
-	uint8_t xtp;
-	uint8_t pal_init_pending;
-	uint8_t pad[2];
-};
-
-int kvm_create_lapic(struct kvm_vcpu *vcpu);
-void kvm_free_lapic(struct kvm_vcpu *vcpu);
-
-int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest);
-int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda);
-int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
-		int short_hand, int dest, int dest_mode);
-int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2);
-int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq);
-#define kvm_apic_present(x) (true)
-#define kvm_lapic_enabled(x) (true)
-
-#endif
diff --git a/arch/ia64/kvm/memcpy.S b/arch/ia64/kvm/memcpy.S
deleted file mode 100644
index c04cdbe9f80..00000000000
--- a/arch/ia64/kvm/memcpy.S
+++ /dev/null
@@ -1 +0,0 @@
-#include "../lib/memcpy.S"
diff --git a/arch/ia64/kvm/memset.S b/arch/ia64/kvm/memset.S
deleted file mode 100644
index 83c3066d844..00000000000
--- a/arch/ia64/kvm/memset.S
+++ /dev/null
@@ -1 +0,0 @@
-#include "../lib/memset.S"
diff --git a/arch/ia64/kvm/misc.h b/arch/ia64/kvm/misc.h
deleted file mode 100644
index dd979e00b57..00000000000
--- a/arch/ia64/kvm/misc.h
+++ /dev/null
@@ -1,94 +0,0 @@
-#ifndef __KVM_IA64_MISC_H
-#define __KVM_IA64_MISC_H
-
-#include <linux/kvm_host.h>
-/*
- * misc.h
- * 	Copyright (C) 2007, Intel Corporation.
- *  	Xiantao Zhang  (xiantao.zhang@intel.com)
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- */
-
-/*
- *Return p2m base address at host side!
- */
-static inline uint64_t *kvm_host_get_pmt(struct kvm *kvm)
-{
-	return (uint64_t *)(kvm->arch.vm_base +
-				offsetof(struct kvm_vm_data, kvm_p2m));
-}
-
-static inline void kvm_set_pmt_entry(struct kvm *kvm, gfn_t gfn,
-		u64 paddr, u64 mem_flags)
-{
-	uint64_t *pmt_base = kvm_host_get_pmt(kvm);
-	unsigned long pte;
-
-	pte = PAGE_ALIGN(paddr) | mem_flags;
-	pmt_base[gfn] = pte;
-}
-
-/*Function for translating host address to guest address*/
-
-static inline void *to_guest(struct kvm *kvm, void *addr)
-{
-	return (void *)((unsigned long)(addr) - kvm->arch.vm_base +
-			KVM_VM_DATA_BASE);
-}
-
-/*Function for translating guest address to host address*/
-
-static inline void *to_host(struct kvm *kvm, void *addr)
-{
-	return (void *)((unsigned long)addr - KVM_VM_DATA_BASE
-			+ kvm->arch.vm_base);
-}
-
-/* Get host context of the vcpu */
-static inline union context *kvm_get_host_context(struct kvm_vcpu *vcpu)
-{
-	union context *ctx = &vcpu->arch.host;
-	return to_guest(vcpu->kvm, ctx);
-}
-
-/* Get guest context of the vcpu */
-static inline union context *kvm_get_guest_context(struct kvm_vcpu *vcpu)
-{
-	union context *ctx = &vcpu->arch.guest;
-	return  to_guest(vcpu->kvm, ctx);
-}
-
-/* kvm get exit data from gvmm! */
-static inline struct exit_ctl_data *kvm_get_exit_data(struct kvm_vcpu *vcpu)
-{
-	return &vcpu->arch.exit_data;
-}
-
-/*kvm get vcpu ioreq for kvm module!*/
-static inline struct kvm_mmio_req *kvm_get_vcpu_ioreq(struct kvm_vcpu *vcpu)
-{
-	struct exit_ctl_data *p_ctl_data;
-
-	if (vcpu) {
-		p_ctl_data = kvm_get_exit_data(vcpu);
-		if (p_ctl_data->exit_reason == EXIT_REASON_MMIO_INSTRUCTION)
-			return &p_ctl_data->u.ioreq;
-	}
-
-	return NULL;
-}
-
-#endif
diff --git a/arch/ia64/kvm/mmio.c b/arch/ia64/kvm/mmio.c
deleted file mode 100644
index f1e17d3d6cd..00000000000
--- a/arch/ia64/kvm/mmio.c
+++ /dev/null
@@ -1,336 +0,0 @@
-/*
- * mmio.c: MMIO emulation components.
- * Copyright (c) 2004, Intel Corporation.
- *  Yaozu Dong (Eddie Dong) (Eddie.dong@intel.com)
- *  Kun Tian (Kevin Tian) (Kevin.tian@intel.com)
- *
- * Copyright (c) 2007 Intel Corporation  KVM support.
- * Xuefei Xu (Anthony Xu) (anthony.xu@intel.com)
- * Xiantao Zhang  (xiantao.zhang@intel.com)
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- */
-
-#include <linux/kvm_host.h>
-
-#include "vcpu.h"
-
-static void vlsapic_write_xtp(struct kvm_vcpu *v, uint8_t val)
-{
-	VLSAPIC_XTP(v) = val;
-}
-
-/*
- * LSAPIC OFFSET
- */
-#define PIB_LOW_HALF(ofst)     !(ofst & (1 << 20))
-#define PIB_OFST_INTA          0x1E0000
-#define PIB_OFST_XTP           0x1E0008
-
-/*
- * execute write IPI op.
- */
-static void vlsapic_write_ipi(struct kvm_vcpu *vcpu,
-					uint64_t addr, uint64_t data)
-{
-	struct exit_ctl_data *p = &current_vcpu->arch.exit_data;
-	unsigned long psr;
-
-	local_irq_save(psr);
-
-	p->exit_reason = EXIT_REASON_IPI;
-	p->u.ipi_data.addr.val = addr;
-	p->u.ipi_data.data.val = data;
-	vmm_transition(current_vcpu);
-
-	local_irq_restore(psr);
-
-}
-
-void lsapic_write(struct kvm_vcpu *v, unsigned long addr,
-			unsigned long length, unsigned long val)
-{
-	addr &= (PIB_SIZE - 1);
-
-	switch (addr) {
-	case PIB_OFST_INTA:
-		panic_vm(v, "Undefined write on PIB INTA\n");
-		break;
-	case PIB_OFST_XTP:
-		if (length == 1) {
-			vlsapic_write_xtp(v, val);
-		} else {
-			panic_vm(v, "Undefined write on PIB XTP\n");
-		}
-		break;
-	default:
-		if (PIB_LOW_HALF(addr)) {
-			/*Lower half */
-			if (length != 8)
-				panic_vm(v, "Can't LHF write with size %ld!\n",
-						length);
-			else
-				vlsapic_write_ipi(v, addr, val);
-		} else {   /*Upper half */
-			panic_vm(v, "IPI-UHF write %lx\n", addr);
-		}
-		break;
-	}
-}
-
-unsigned long lsapic_read(struct kvm_vcpu *v, unsigned long addr,
-		unsigned long length)
-{
-	uint64_t result = 0;
-
-	addr &= (PIB_SIZE - 1);
-
-	switch (addr) {
-	case PIB_OFST_INTA:
-		if (length == 1) /* 1 byte load */
-			; /* There is no i8259, there is no INTA access*/
-		else
-			panic_vm(v, "Undefined read on PIB INTA\n");
-
-		break;
-	case PIB_OFST_XTP:
-		if (length == 1) {
-			result = VLSAPIC_XTP(v);
-		} else {
-			panic_vm(v, "Undefined read on PIB XTP\n");
-		}
-		break;
-	default:
-		panic_vm(v, "Undefined addr access for lsapic!\n");
-		break;
-	}
-	return result;
-}
-
-static void mmio_access(struct kvm_vcpu *vcpu, u64 src_pa, u64 *dest,
-					u16 s, int ma, int dir)
-{
-	unsigned long iot;
-	struct exit_ctl_data *p = &vcpu->arch.exit_data;
-	unsigned long psr;
-
-	iot = __gpfn_is_io(src_pa >> PAGE_SHIFT);
-
-	local_irq_save(psr);
-
-	/*Intercept the access for PIB range*/
-	if (iot == GPFN_PIB) {
-		if (!dir)
-			lsapic_write(vcpu, src_pa, s, *dest);
-		else
-			*dest = lsapic_read(vcpu, src_pa, s);
-		goto out;
-	}
-	p->exit_reason = EXIT_REASON_MMIO_INSTRUCTION;
-	p->u.ioreq.addr = src_pa;
-	p->u.ioreq.size = s;
-	p->u.ioreq.dir = dir;
-	if (dir == IOREQ_WRITE)
-		p->u.ioreq.data = *dest;
-	p->u.ioreq.state = STATE_IOREQ_READY;
-	vmm_transition(vcpu);
-
-	if (p->u.ioreq.state == STATE_IORESP_READY) {
-		if (dir == IOREQ_READ)
-			/* it's necessary to ensure zero extending */
-			*dest = p->u.ioreq.data & (~0UL >> (64-(s*8)));
-	} else
-		panic_vm(vcpu, "Unhandled mmio access returned!\n");
-out:
-	local_irq_restore(psr);
-	return ;
-}
-
-/*
-   dir 1: read 0:write
-   inst_type 0:integer 1:floating point
- */
-#define SL_INTEGER	0	/* store/load interger*/
-#define SL_FLOATING	1     	/* store/load floating*/
-
-void emulate_io_inst(struct kvm_vcpu *vcpu, u64 padr, u64 ma)
-{
-	struct kvm_pt_regs *regs;
-	IA64_BUNDLE bundle;
-	int slot, dir = 0;
-	int inst_type = -1;
-	u16 size = 0;
-	u64 data, slot1a, slot1b, temp, update_reg;
-	s32 imm;
-	INST64 inst;
-
-	regs = vcpu_regs(vcpu);
-
-	if (fetch_code(vcpu, regs->cr_iip, &bundle)) {
-		/* if fetch code fail, return and try again */
-		return;
-	}
-	slot = ((struct ia64_psr *)&(regs->cr_ipsr))->ri;
-	if (!slot)
-		inst.inst = bundle.slot0;
-	else if (slot == 1) {
-		slot1a = bundle.slot1a;
-		slot1b = bundle.slot1b;
-		inst.inst = slot1a + (slot1b << 18);
-	} else if (slot == 2)
-		inst.inst = bundle.slot2;
-
-	/* Integer Load/Store */
-	if (inst.M1.major == 4 && inst.M1.m == 0 && inst.M1.x == 0) {
-		inst_type = SL_INTEGER;
-		size = (inst.M1.x6 & 0x3);
-		if ((inst.M1.x6 >> 2) > 0xb) {
-			/*write*/
-			dir = IOREQ_WRITE;
-			data = vcpu_get_gr(vcpu, inst.M4.r2);
-		} else if ((inst.M1.x6 >> 2) < 0xb) {
-			/*read*/
-			dir = IOREQ_READ;
-		}
-	} else if (inst.M2.major == 4 && inst.M2.m == 1 && inst.M2.x == 0) {
-		/* Integer Load + Reg update */
-		inst_type = SL_INTEGER;
-		dir = IOREQ_READ;
-		size = (inst.M2.x6 & 0x3);
-		temp = vcpu_get_gr(vcpu, inst.M2.r3);
-		update_reg = vcpu_get_gr(vcpu, inst.M2.r2);
-		temp += update_reg;
-		vcpu_set_gr(vcpu, inst.M2.r3, temp, 0);
-	} else if (inst.M3.major == 5) {
-		/*Integer Load/Store + Imm update*/
-		inst_type = SL_INTEGER;
-		size = (inst.M3.x6&0x3);
-		if ((inst.M5.x6 >> 2) > 0xb) {
-			/*write*/
-			dir = IOREQ_WRITE;
-			data = vcpu_get_gr(vcpu, inst.M5.r2);
-			temp = vcpu_get_gr(vcpu, inst.M5.r3);
-			imm = (inst.M5.s << 31) | (inst.M5.i << 30) |
-				(inst.M5.imm7 << 23);
-			temp += imm >> 23;
-			vcpu_set_gr(vcpu, inst.M5.r3, temp, 0);
-
-		} else if ((inst.M3.x6 >> 2) < 0xb) {
-			/*read*/
-			dir = IOREQ_READ;
-			temp = vcpu_get_gr(vcpu, inst.M3.r3);
-			imm = (inst.M3.s << 31) | (inst.M3.i << 30) |
-				(inst.M3.imm7 << 23);
-			temp += imm >> 23;
-			vcpu_set_gr(vcpu, inst.M3.r3, temp, 0);
-
-		}
-	} else if (inst.M9.major == 6 && inst.M9.x6 == 0x3B
-				&& inst.M9.m == 0 && inst.M9.x == 0) {
-		/* Floating-point spill*/
-		struct ia64_fpreg v;
-
-		inst_type = SL_FLOATING;
-		dir = IOREQ_WRITE;
-		vcpu_get_fpreg(vcpu, inst.M9.f2, &v);
-		/* Write high word. FIXME: this is a kludge!  */
-		v.u.bits[1] &= 0x3ffff;
-		mmio_access(vcpu, padr + 8, (u64 *)&v.u.bits[1], 8,
-			    ma, IOREQ_WRITE);
-		data = v.u.bits[0];
-		size = 3;
-	} else if (inst.M10.major == 7 && inst.M10.x6 == 0x3B) {
-		/* Floating-point spill + Imm update */
-		struct ia64_fpreg v;
-
-		inst_type = SL_FLOATING;
-		dir = IOREQ_WRITE;
-		vcpu_get_fpreg(vcpu, inst.M10.f2, &v);
-		temp = vcpu_get_gr(vcpu, inst.M10.r3);
-		imm = (inst.M10.s << 31) | (inst.M10.i << 30) |
-			(inst.M10.imm7 << 23);
-		temp += imm >> 23;
-		vcpu_set_gr(vcpu, inst.M10.r3, temp, 0);
-
-		/* Write high word.FIXME: this is a kludge!  */
-		v.u.bits[1] &= 0x3ffff;
-		mmio_access(vcpu, padr + 8, (u64 *)&v.u.bits[1],
-			    8, ma, IOREQ_WRITE);
-		data = v.u.bits[0];
-		size = 3;
-	} else if (inst.M10.major == 7 && inst.M10.x6 == 0x31) {
-		/* Floating-point stf8 + Imm update */
-		struct ia64_fpreg v;
-		inst_type = SL_FLOATING;
-		dir = IOREQ_WRITE;
-		size = 3;
-		vcpu_get_fpreg(vcpu, inst.M10.f2, &v);
-		data = v.u.bits[0]; /* Significand.  */
-		temp = vcpu_get_gr(vcpu, inst.M10.r3);
-		imm = (inst.M10.s << 31) | (inst.M10.i << 30) |
-			(inst.M10.imm7 << 23);
-		temp += imm >> 23;
-		vcpu_set_gr(vcpu, inst.M10.r3, temp, 0);
-	} else if (inst.M15.major == 7 && inst.M15.x6 >= 0x2c
-			&& inst.M15.x6 <= 0x2f) {
-		temp = vcpu_get_gr(vcpu, inst.M15.r3);
-		imm = (inst.M15.s << 31) | (inst.M15.i << 30) |
-			(inst.M15.imm7 << 23);
-		temp += imm >> 23;
-		vcpu_set_gr(vcpu, inst.M15.r3, temp, 0);
-
-		vcpu_increment_iip(vcpu);
-		return;
-	} else if (inst.M12.major == 6 && inst.M12.m == 1
-			&& inst.M12.x == 1 && inst.M12.x6 == 1) {
-		/* Floating-point Load Pair + Imm ldfp8 M12*/
-		struct ia64_fpreg v;
-
-		inst_type = SL_FLOATING;
-		dir = IOREQ_READ;
-		size = 8;     /*ldfd*/
-		mmio_access(vcpu, padr, &data, size, ma, dir);
-		v.u.bits[0] = data;
-		v.u.bits[1] = 0x1003E;
-		vcpu_set_fpreg(vcpu, inst.M12.f1, &v);
-		padr += 8;
-		mmio_access(vcpu, padr, &data, size, ma, dir);
-		v.u.bits[0] = data;
-		v.u.bits[1] = 0x1003E;
-		vcpu_set_fpreg(vcpu, inst.M12.f2, &v);
-		padr += 8;
-		vcpu_set_gr(vcpu, inst.M12.r3, padr, 0);
-		vcpu_increment_iip(vcpu);
-		return;
-	} else {
-		inst_type = -1;
-		panic_vm(vcpu, "Unsupported MMIO access instruction! "
-				"Bunld[0]=0x%lx, Bundle[1]=0x%lx\n",
-				bundle.i64[0], bundle.i64[1]);
-	}
-
-	size = 1 << size;
-	if (dir == IOREQ_WRITE) {
-		mmio_access(vcpu, padr, &data, size, ma, dir);
-	} else {
-		mmio_access(vcpu, padr, &data, size, ma, dir);
-		if (inst_type == SL_INTEGER)
-			vcpu_set_gr(vcpu, inst.M1.r1, data, 0);
-		else
-			panic_vm(vcpu, "Unsupported instruction type!\n");
-
-	}
-	vcpu_increment_iip(vcpu);
-}
diff --git a/arch/ia64/kvm/optvfault.S b/arch/ia64/kvm/optvfault.S
deleted file mode 100644
index f793be3efff..00000000000
--- a/arch/ia64/kvm/optvfault.S
+++ /dev/null
@@ -1,1090 +0,0 @@
-/*
- * arch/ia64/kvm/optvfault.S
- * optimize virtualization fault handler
- *
- * Copyright (C) 2006 Intel Co
- *	Xuefei Xu (Anthony Xu) <anthony.xu@intel.com>
- * Copyright (C) 2008 Intel Co
- *      Add the support for Tukwila processors.
- *	Xiantao Zhang <xiantao.zhang@intel.com>
- */
-
-#include <asm/asmmacro.h>
-#include <asm/processor.h>
-#include <asm/kvm_host.h>
-
-#include "vti.h"
-#include "asm-offsets.h"
-
-#define ACCE_MOV_FROM_AR
-#define ACCE_MOV_FROM_RR
-#define ACCE_MOV_TO_RR
-#define ACCE_RSM
-#define ACCE_SSM
-#define ACCE_MOV_TO_PSR
-#define ACCE_THASH
-
-#define VMX_VPS_SYNC_READ			\
-	add r16=VMM_VPD_BASE_OFFSET,r21;	\
-	mov r17 = b0;				\
-	mov r18 = r24;				\
-	mov r19 = r25;				\
-	mov r20 = r31;				\
-	;;					\
-{.mii;						\
-	ld8 r16 = [r16];			\
-	nop 0x0;				\
-	mov r24 = ip;				\
-	;;					\
-};						\
-{.mmb;						\
-	add r24=0x20, r24;			\
-	mov r25 =r16;				\
-	br.sptk.many kvm_vps_sync_read;		\
-};						\
-	mov b0 = r17;				\
-	mov r24 = r18;				\
-	mov r25 = r19;				\
-	mov r31 = r20
-
-ENTRY(kvm_vps_entry)
-	adds r29 = VMM_VCPU_VSA_BASE_OFFSET,r21
-	;;
-	ld8 r29 = [r29]
-	;;
-	add r29 = r29, r30
-	;;
-	mov b0 = r29
-	br.sptk.many b0
-END(kvm_vps_entry)
-
-/*
- *	Inputs:
- *	r24 : return address
- *  	r25 : vpd
- *	r29 : scratch
- *
- */
-GLOBAL_ENTRY(kvm_vps_sync_read)
-	movl r30 = PAL_VPS_SYNC_READ
-	;;
-	br.sptk.many kvm_vps_entry
-END(kvm_vps_sync_read)
-
-/*
- *	Inputs:
- *	r24 : return address
- *  	r25 : vpd
- *	r29 : scratch
- *
- */
-GLOBAL_ENTRY(kvm_vps_sync_write)
-	movl r30 = PAL_VPS_SYNC_WRITE
-	;;
-	br.sptk.many kvm_vps_entry
-END(kvm_vps_sync_write)
-
-/*
- *	Inputs:
- *	r23 : pr
- *	r24 : guest b0
- *  	r25 : vpd
- *
- */
-GLOBAL_ENTRY(kvm_vps_resume_normal)
-	movl r30 = PAL_VPS_RESUME_NORMAL
-	;;
-	mov pr=r23,-2
-	br.sptk.many kvm_vps_entry
-END(kvm_vps_resume_normal)
-
-/*
- *	Inputs:
- *	r23 : pr
- *	r24 : guest b0
- *  	r25 : vpd
- *	r17 : isr
- */
-GLOBAL_ENTRY(kvm_vps_resume_handler)
-	movl r30 = PAL_VPS_RESUME_HANDLER
-	;;
-	ld8 r26=[r25]
-	shr r17=r17,IA64_ISR_IR_BIT
-	;;
-	dep r26=r17,r26,63,1   // bit 63 of r26 indicate whether enable CFLE
-	mov pr=r23,-2
-	br.sptk.many kvm_vps_entry
-END(kvm_vps_resume_handler)
-
-//mov r1=ar3
-GLOBAL_ENTRY(kvm_asm_mov_from_ar)
-#ifndef ACCE_MOV_FROM_AR
-	br.many kvm_virtualization_fault_back
-#endif
-	add r18=VMM_VCPU_ITC_OFS_OFFSET, r21
-	add r16=VMM_VCPU_LAST_ITC_OFFSET,r21
-	extr.u r17=r25,6,7
-	;;
-	ld8 r18=[r18]
-	mov r19=ar.itc
-	mov r24=b0
-	;;
-	add r19=r19,r18
-	addl r20=@gprel(asm_mov_to_reg),gp
-	;;
-	st8 [r16] = r19
-	adds r30=kvm_resume_to_guest-asm_mov_to_reg,r20
-	shladd r17=r17,4,r20
-	;;
-	mov b0=r17
-	br.sptk.few b0
-	;;
-END(kvm_asm_mov_from_ar)
-
-/*
- * Special SGI SN2 optimized version of mov_from_ar using the SN2 RTC
- * clock as it's source for emulating the ITC. This version will be
- * copied on top of the original version if the host is determined to
- * be an SN2.
- */
-GLOBAL_ENTRY(kvm_asm_mov_from_ar_sn2)
-	add r18=VMM_VCPU_ITC_OFS_OFFSET, r21
-	movl r19 = (KVM_VMM_BASE+(1<<KVM_VMM_SHIFT))
-
-	add r16=VMM_VCPU_LAST_ITC_OFFSET,r21
-	extr.u r17=r25,6,7
-	mov r24=b0
-	;;
-	ld8 r18=[r18]
-	ld8 r19=[r19]
-	addl r20=@gprel(asm_mov_to_reg),gp
-	;;
-	add r19=r19,r18
-	shladd r17=r17,4,r20
-	;;
-	adds r30=kvm_resume_to_guest-asm_mov_to_reg,r20
-	st8 [r16] = r19
-	mov b0=r17
-	br.sptk.few b0
-	;;
-END(kvm_asm_mov_from_ar_sn2)
-
-
-
-// mov r1=rr[r3]
-GLOBAL_ENTRY(kvm_asm_mov_from_rr)
-#ifndef ACCE_MOV_FROM_RR
-	br.many kvm_virtualization_fault_back
-#endif
-	extr.u r16=r25,20,7
-	extr.u r17=r25,6,7
-	addl r20=@gprel(asm_mov_from_reg),gp
-	;;
-	adds r30=kvm_asm_mov_from_rr_back_1-asm_mov_from_reg,r20
-	shladd r16=r16,4,r20
-	mov r24=b0
-	;;
-	add r27=VMM_VCPU_VRR0_OFFSET,r21
-	mov b0=r16
-	br.many b0
-	;;
-kvm_asm_mov_from_rr_back_1:
-	adds r30=kvm_resume_to_guest-asm_mov_from_reg,r20
-	adds r22=asm_mov_to_reg-asm_mov_from_reg,r20
-	shr.u r26=r19,61
-	;;
-	shladd r17=r17,4,r22
-	shladd r27=r26,3,r27
-	;;
-	ld8 r19=[r27]
-	mov b0=r17
-	br.many b0
-END(kvm_asm_mov_from_rr)
-
-
-// mov rr[r3]=r2
-GLOBAL_ENTRY(kvm_asm_mov_to_rr)
-#ifndef ACCE_MOV_TO_RR
-	br.many kvm_virtualization_fault_back
-#endif
-	extr.u r16=r25,20,7
-	extr.u r17=r25,13,7
-	addl r20=@gprel(asm_mov_from_reg),gp
-	;;
-	adds r30=kvm_asm_mov_to_rr_back_1-asm_mov_from_reg,r20
-	shladd r16=r16,4,r20
-	mov r22=b0
-	;;
-	add r27=VMM_VCPU_VRR0_OFFSET,r21
-	mov b0=r16
-	br.many b0
-	;;
-kvm_asm_mov_to_rr_back_1:
-	adds r30=kvm_asm_mov_to_rr_back_2-asm_mov_from_reg,r20
-	shr.u r23=r19,61
-	shladd r17=r17,4,r20
-	;;
-	//if rr6, go back
-	cmp.eq p6,p0=6,r23
-	mov b0=r22
-	(p6) br.cond.dpnt.many kvm_virtualization_fault_back
-	;;
-	mov r28=r19
-	mov b0=r17
-	br.many b0
-kvm_asm_mov_to_rr_back_2:
-	adds r30=kvm_resume_to_guest-asm_mov_from_reg,r20
-	shladd r27=r23,3,r27
-	;; // vrr.rid<<4 |0xe
-	st8 [r27]=r19
-	mov b0=r30
-	;;
-	extr.u r16=r19,8,26
-	extr.u r18 =r19,2,6
-	mov r17 =0xe
-	;;
-	shladd r16 = r16, 4, r17
-	extr.u r19 =r19,0,8
-	;;
-	shl r16 = r16,8
-	;;
-	add r19 = r19, r16
-	;; //set ve 1
-	dep r19=-1,r19,0,1
-	cmp.lt p6,p0=14,r18
-	;;
-	(p6) mov r18=14
-	;;
-	(p6) dep r19=r18,r19,2,6
-	;;
-	cmp.eq p6,p0=0,r23
-	;;
-	cmp.eq.or p6,p0=4,r23
-	;;
-	adds r16=VMM_VCPU_MODE_FLAGS_OFFSET,r21
-	(p6) adds r17=VMM_VCPU_META_SAVED_RR0_OFFSET,r21
-	;;
-	ld4 r16=[r16]
-	cmp.eq p7,p0=r0,r0
-	(p6) shladd r17=r23,1,r17
-	;;
-	(p6) st8 [r17]=r19
-	(p6) tbit.nz p6,p7=r16,0
-	;;
-	(p7) mov rr[r28]=r19
-	mov r24=r22
-	br.many b0
-END(kvm_asm_mov_to_rr)
-
-
-//rsm
-GLOBAL_ENTRY(kvm_asm_rsm)
-#ifndef ACCE_RSM
-	br.many kvm_virtualization_fault_back
-#endif
-	VMX_VPS_SYNC_READ
-	;;
-	extr.u r26=r25,6,21
-	extr.u r27=r25,31,2
-	;;
-	extr.u r28=r25,36,1
-	dep r26=r27,r26,21,2
-	;;
-	add r17=VPD_VPSR_START_OFFSET,r16
-	add r22=VMM_VCPU_MODE_FLAGS_OFFSET,r21
-	//r26 is imm24
-	dep r26=r28,r26,23,1
-	;;
-	ld8 r18=[r17]
-	movl r28=IA64_PSR_IC+IA64_PSR_I+IA64_PSR_DT+IA64_PSR_SI
-	ld4 r23=[r22]
-	sub r27=-1,r26
-	mov r24=b0
-	;;
-	mov r20=cr.ipsr
-	or r28=r27,r28
-	and r19=r18,r27
-	;;
-	st8 [r17]=r19
-	and r20=r20,r28
-	/* Comment it out due to short of fp lazy alorgithm support
-	adds r27=IA64_VCPU_FP_PSR_OFFSET,r21
-	;;
-	ld8 r27=[r27]
-	;;
-	tbit.nz p8,p0= r27,IA64_PSR_DFH_BIT
-	;;
-	(p8) dep r20=-1,r20,IA64_PSR_DFH_BIT,1
-	*/
-	;;
-	mov cr.ipsr=r20
-	tbit.nz p6,p0=r23,0
-	;;
-	tbit.z.or p6,p0=r26,IA64_PSR_DT_BIT
-	(p6) br.dptk kvm_resume_to_guest_with_sync
-	;;
-	add r26=VMM_VCPU_META_RR0_OFFSET,r21
-	add r27=VMM_VCPU_META_RR0_OFFSET+8,r21
-	dep r23=-1,r23,0,1
-	;;
-	ld8 r26=[r26]
-	ld8 r27=[r27]
-	st4 [r22]=r23
-	dep.z r28=4,61,3
-	;;
-	mov rr[r0]=r26
-	;;
-	mov rr[r28]=r27
-	;;
-	srlz.d
-	br.many kvm_resume_to_guest_with_sync
-END(kvm_asm_rsm)
-
-
-//ssm
-GLOBAL_ENTRY(kvm_asm_ssm)
-#ifndef ACCE_SSM
-	br.many kvm_virtualization_fault_back
-#endif
-	VMX_VPS_SYNC_READ
-	;;
-	extr.u r26=r25,6,21
-	extr.u r27=r25,31,2
-	;;
-	extr.u r28=r25,36,1
-	dep r26=r27,r26,21,2
-	;;  //r26 is imm24
-	add r27=VPD_VPSR_START_OFFSET,r16
-	dep r26=r28,r26,23,1
-	;;  //r19 vpsr
-	ld8 r29=[r27]
-	mov r24=b0
-	;;
-	add r22=VMM_VCPU_MODE_FLAGS_OFFSET,r21
-	mov r20=cr.ipsr
-	or r19=r29,r26
-	;;
-	ld4 r23=[r22]
-	st8 [r27]=r19
-	or r20=r20,r26
-	;;
-	mov cr.ipsr=r20
-	movl r28=IA64_PSR_DT+IA64_PSR_RT+IA64_PSR_IT
-	;;
-	and r19=r28,r19
-	tbit.z p6,p0=r23,0
-	;;
-	cmp.ne.or p6,p0=r28,r19
-	(p6) br.dptk kvm_asm_ssm_1
-	;;
-	add r26=VMM_VCPU_META_SAVED_RR0_OFFSET,r21
-	add r27=VMM_VCPU_META_SAVED_RR0_OFFSET+8,r21
-	dep r23=0,r23,0,1
-	;;
-	ld8 r26=[r26]
-	ld8 r27=[r27]
-	st4 [r22]=r23
-	dep.z r28=4,61,3
-	;;
-	mov rr[r0]=r26
-	;;
-	mov rr[r28]=r27
-	;;
-	srlz.d
-	;;
-kvm_asm_ssm_1:
-	tbit.nz p6,p0=r29,IA64_PSR_I_BIT
-	;;
-	tbit.z.or p6,p0=r19,IA64_PSR_I_BIT
-	(p6) br.dptk kvm_resume_to_guest_with_sync
-	;;
-	add r29=VPD_VTPR_START_OFFSET,r16
-	add r30=VPD_VHPI_START_OFFSET,r16
-	;;
-	ld8 r29=[r29]
-	ld8 r30=[r30]
-	;;
-	extr.u r17=r29,4,4
-	extr.u r18=r29,16,1
-	;;
-	dep r17=r18,r17,4,1
-	;;
-	cmp.gt p6,p0=r30,r17
-	(p6) br.dpnt.few kvm_asm_dispatch_vexirq
-	br.many kvm_resume_to_guest_with_sync
-END(kvm_asm_ssm)
-
-
-//mov psr.l=r2
-GLOBAL_ENTRY(kvm_asm_mov_to_psr)
-#ifndef ACCE_MOV_TO_PSR
-	br.many kvm_virtualization_fault_back
-#endif
-	VMX_VPS_SYNC_READ
-	;;
-	extr.u r26=r25,13,7 //r2
-	addl r20=@gprel(asm_mov_from_reg),gp
-	;;
-	adds r30=kvm_asm_mov_to_psr_back-asm_mov_from_reg,r20
-	shladd r26=r26,4,r20
-	mov r24=b0
-	;;
-	add r27=VPD_VPSR_START_OFFSET,r16
-	mov b0=r26
-	br.many b0
-	;;
-kvm_asm_mov_to_psr_back:
-	ld8 r17=[r27]
-	add r22=VMM_VCPU_MODE_FLAGS_OFFSET,r21
-	dep r19=0,r19,32,32
-	;;
-	ld4 r23=[r22]
-	dep r18=0,r17,0,32
-	;;
-	add r30=r18,r19
-	movl r28=IA64_PSR_DT+IA64_PSR_RT+IA64_PSR_IT
-	;;
-	st8 [r27]=r30
-	and r27=r28,r30
-	and r29=r28,r17
-	;;
-	cmp.eq p5,p0=r29,r27
-	cmp.eq p6,p7=r28,r27
-	(p5) br.many kvm_asm_mov_to_psr_1
-	;;
-	//virtual to physical
-	(p7) add r26=VMM_VCPU_META_RR0_OFFSET,r21
-	(p7) add r27=VMM_VCPU_META_RR0_OFFSET+8,r21
-	(p7) dep r23=-1,r23,0,1
-	;;
-	//physical to virtual
-	(p6) add r26=VMM_VCPU_META_SAVED_RR0_OFFSET,r21
-	(p6) add r27=VMM_VCPU_META_SAVED_RR0_OFFSET+8,r21
-	(p6) dep r23=0,r23,0,1
-	;;
-	ld8 r26=[r26]
-	ld8 r27=[r27]
-	st4 [r22]=r23
-	dep.z r28=4,61,3
-	;;
-	mov rr[r0]=r26
-	;;
-	mov rr[r28]=r27
-	;;
-	srlz.d
-	;;
-kvm_asm_mov_to_psr_1:
-	mov r20=cr.ipsr
-	movl r28=IA64_PSR_IC+IA64_PSR_I+IA64_PSR_DT+IA64_PSR_SI+IA64_PSR_RT
-	;;
-	or r19=r19,r28
-	dep r20=0,r20,0,32
-	;;
-	add r20=r19,r20
-	mov b0=r24
-	;;
-	/* Comment it out due to short of fp lazy algorithm support
-	adds r27=IA64_VCPU_FP_PSR_OFFSET,r21
-	;;
-	ld8 r27=[r27]
-	;;
-	tbit.nz p8,p0=r27,IA64_PSR_DFH_BIT
-	;;
-	(p8) dep r20=-1,r20,IA64_PSR_DFH_BIT,1
-	;;
-	*/
-	mov cr.ipsr=r20
-	cmp.ne p6,p0=r0,r0
-	;;
-	tbit.nz.or p6,p0=r17,IA64_PSR_I_BIT
-	tbit.z.or p6,p0=r30,IA64_PSR_I_BIT
-	(p6) br.dpnt.few kvm_resume_to_guest_with_sync
-	;;
-	add r29=VPD_VTPR_START_OFFSET,r16
-	add r30=VPD_VHPI_START_OFFSET,r16
-	;;
-	ld8 r29=[r29]
-	ld8 r30=[r30]
-	;;
-	extr.u r17=r29,4,4
-	extr.u r18=r29,16,1
-	;;
-	dep r17=r18,r17,4,1
-	;;
-	cmp.gt p6,p0=r30,r17
-	(p6) br.dpnt.few kvm_asm_dispatch_vexirq
-	br.many kvm_resume_to_guest_with_sync
-END(kvm_asm_mov_to_psr)
-
-
-ENTRY(kvm_asm_dispatch_vexirq)
-//increment iip
-	mov r17 = b0
-	mov r18 = r31
-{.mii
-	add r25=VMM_VPD_BASE_OFFSET,r21
-	nop 0x0
-	mov r24 = ip
-	;;
-}
-{.mmb
-	add r24 = 0x20, r24
-	ld8 r25 = [r25]
-	br.sptk.many kvm_vps_sync_write
-}
-	mov b0 =r17
-	mov r16=cr.ipsr
-	mov r31 = r18
-	mov r19 = 37
-	;;
-	extr.u r17=r16,IA64_PSR_RI_BIT,2
-	tbit.nz p6,p7=r16,IA64_PSR_RI_BIT+1
-	;;
-	(p6) mov r18=cr.iip
-	(p6) mov r17=r0
-	(p7) add r17=1,r17
-	;;
-	(p6) add r18=0x10,r18
-	dep r16=r17,r16,IA64_PSR_RI_BIT,2
-	;;
-	(p6) mov cr.iip=r18
-	mov cr.ipsr=r16
-	mov r30 =1
-	br.many kvm_dispatch_vexirq
-END(kvm_asm_dispatch_vexirq)
-
-// thash
-// TODO: add support when pta.vf = 1
-GLOBAL_ENTRY(kvm_asm_thash)
-#ifndef ACCE_THASH
-	br.many kvm_virtualization_fault_back
-#endif
-	extr.u r17=r25,20,7		// get r3 from opcode in r25
-	extr.u r18=r25,6,7		// get r1 from opcode in r25
-	addl r20=@gprel(asm_mov_from_reg),gp
-	;;
-	adds r30=kvm_asm_thash_back1-asm_mov_from_reg,r20
-	shladd r17=r17,4,r20	// get addr of MOVE_FROM_REG(r17)
-	adds r16=VMM_VPD_BASE_OFFSET,r21	// get vcpu.arch.priveregs
-	;;
-	mov r24=b0
-	;;
-	ld8 r16=[r16]		// get VPD addr
-	mov b0=r17
-	br.many b0			// r19 return value
-	;;
-kvm_asm_thash_back1:
-	shr.u r23=r19,61		// get RR number
-	adds r28=VMM_VCPU_VRR0_OFFSET,r21	// get vcpu->arch.vrr[0]'s addr
-	adds r16=VMM_VPD_VPTA_OFFSET,r16	// get vpta
-	;;
-	shladd r27=r23,3,r28	// get vcpu->arch.vrr[r23]'s addr
-	ld8 r17=[r16]		// get PTA
-	mov r26=1
-	;;
-	extr.u r29=r17,2,6	// get pta.size
-	ld8 r28=[r27]		// get vcpu->arch.vrr[r23]'s value
-	;;
-	mov b0=r24
-	//Fallback to C if pta.vf is set
-	tbit.nz p6,p0=r17, 8
-	;;
-	(p6) mov r24=EVENT_THASH
-	(p6) br.cond.dpnt.many kvm_virtualization_fault_back
-	extr.u r28=r28,2,6	// get rr.ps
-	shl r22=r26,r29		// 1UL << pta.size
-	;;
-	shr.u r23=r19,r28	// vaddr >> rr.ps
-	adds r26=3,r29		// pta.size + 3
-	shl r27=r17,3		// pta << 3
-	;;
-	shl r23=r23,3		// (vaddr >> rr.ps) << 3
-	shr.u r27=r27,r26	// (pta << 3) >> (pta.size+3)
-	movl r16=7<<61
-	;;
-	adds r22=-1,r22		// (1UL << pta.size) - 1
-	shl r27=r27,r29		// ((pta<<3)>>(pta.size+3))<<pta.size
-	and r19=r19,r16		// vaddr & VRN_MASK
-	;;
-	and r22=r22,r23		// vhpt_offset
-	or r19=r19,r27 // (vadr&VRN_MASK)|(((pta<<3)>>(pta.size + 3))<<pta.size)
-	adds r26=asm_mov_to_reg-asm_mov_from_reg,r20
-	;;
-	or r19=r19,r22		// calc pval
-	shladd r17=r18,4,r26
-	adds r30=kvm_resume_to_guest-asm_mov_from_reg,r20
-	;;
-	mov b0=r17
-	br.many b0
-END(kvm_asm_thash)
-
-#define MOV_TO_REG0	\
-{;			\
-	nop.b 0x0;		\
-	nop.b 0x0;		\
-	nop.b 0x0;		\
-	;;			\
-};
-
-
-#define MOV_TO_REG(n)	\
-{;			\
-	mov r##n##=r19;	\
-	mov b0=r30;	\
-	br.sptk.many b0;	\
-	;;			\
-};
-
-
-#define MOV_FROM_REG(n)	\
-{;				\
-	mov r19=r##n##;		\
-	mov b0=r30;		\
-	br.sptk.many b0;		\
-	;;				\
-};
-
-
-#define MOV_TO_BANK0_REG(n)			\
-ENTRY_MIN_ALIGN(asm_mov_to_bank0_reg##n##);	\
-{;						\
-	mov r26=r2;				\
-	mov r2=r19;				\
-	bsw.1;					\
-	;;						\
-};						\
-{;						\
-	mov r##n##=r2;				\
-	nop.b 0x0;					\
-	bsw.0;					\
-	;;						\
-};						\
-{;						\
-	mov r2=r26;				\
-	mov b0=r30;				\
-	br.sptk.many b0;				\
-	;;						\
-};						\
-END(asm_mov_to_bank0_reg##n##)
-
-
-#define MOV_FROM_BANK0_REG(n)			\
-ENTRY_MIN_ALIGN(asm_mov_from_bank0_reg##n##);	\
-{;						\
-	mov r26=r2;				\
-	nop.b 0x0;					\
-	bsw.1;					\
-	;;						\
-};						\
-{;						\
-	mov r2=r##n##;				\
-	nop.b 0x0;					\
-	bsw.0;					\
-	;;						\
-};						\
-{;						\
-	mov r19=r2;				\
-	mov r2=r26;				\
-	mov b0=r30;				\
-};						\
-{;						\
-	nop.b 0x0;					\
-	nop.b 0x0;					\
-	br.sptk.many b0;				\
-	;;						\
-};						\
-END(asm_mov_from_bank0_reg##n##)
-
-
-#define JMP_TO_MOV_TO_BANK0_REG(n)		\
-{;						\
-	nop.b 0x0;					\
-	nop.b 0x0;					\
-	br.sptk.many asm_mov_to_bank0_reg##n##;	\
-	;;						\
-}
-
-
-#define JMP_TO_MOV_FROM_BANK0_REG(n)		\
-{;						\
-	nop.b 0x0;					\
-	nop.b 0x0;					\
-	br.sptk.many asm_mov_from_bank0_reg##n##;	\
-	;;						\
-}
-
-
-MOV_FROM_BANK0_REG(16)
-MOV_FROM_BANK0_REG(17)
-MOV_FROM_BANK0_REG(18)
-MOV_FROM_BANK0_REG(19)
-MOV_FROM_BANK0_REG(20)
-MOV_FROM_BANK0_REG(21)
-MOV_FROM_BANK0_REG(22)
-MOV_FROM_BANK0_REG(23)
-MOV_FROM_BANK0_REG(24)
-MOV_FROM_BANK0_REG(25)
-MOV_FROM_BANK0_REG(26)
-MOV_FROM_BANK0_REG(27)
-MOV_FROM_BANK0_REG(28)
-MOV_FROM_BANK0_REG(29)
-MOV_FROM_BANK0_REG(30)
-MOV_FROM_BANK0_REG(31)
-
-
-// mov from reg table
-ENTRY(asm_mov_from_reg)
-	MOV_FROM_REG(0)
-	MOV_FROM_REG(1)
-	MOV_FROM_REG(2)
-	MOV_FROM_REG(3)
-	MOV_FROM_REG(4)
-	MOV_FROM_REG(5)
-	MOV_FROM_REG(6)
-	MOV_FROM_REG(7)
-	MOV_FROM_REG(8)
-	MOV_FROM_REG(9)
-	MOV_FROM_REG(10)
-	MOV_FROM_REG(11)
-	MOV_FROM_REG(12)
-	MOV_FROM_REG(13)
-	MOV_FROM_REG(14)
-	MOV_FROM_REG(15)
-	JMP_TO_MOV_FROM_BANK0_REG(16)
-	JMP_TO_MOV_FROM_BANK0_REG(17)
-	JMP_TO_MOV_FROM_BANK0_REG(18)
-	JMP_TO_MOV_FROM_BANK0_REG(19)
-	JMP_TO_MOV_FROM_BANK0_REG(20)
-	JMP_TO_MOV_FROM_BANK0_REG(21)
-	JMP_TO_MOV_FROM_BANK0_REG(22)
-	JMP_TO_MOV_FROM_BANK0_REG(23)
-	JMP_TO_MOV_FROM_BANK0_REG(24)
-	JMP_TO_MOV_FROM_BANK0_REG(25)
-	JMP_TO_MOV_FROM_BANK0_REG(26)
-	JMP_TO_MOV_FROM_BANK0_REG(27)
-	JMP_TO_MOV_FROM_BANK0_REG(28)
-	JMP_TO_MOV_FROM_BANK0_REG(29)
-	JMP_TO_MOV_FROM_BANK0_REG(30)
-	JMP_TO_MOV_FROM_BANK0_REG(31)
-	MOV_FROM_REG(32)
-	MOV_FROM_REG(33)
-	MOV_FROM_REG(34)
-	MOV_FROM_REG(35)
-	MOV_FROM_REG(36)
-	MOV_FROM_REG(37)
-	MOV_FROM_REG(38)
-	MOV_FROM_REG(39)
-	MOV_FROM_REG(40)
-	MOV_FROM_REG(41)
-	MOV_FROM_REG(42)
-	MOV_FROM_REG(43)
-	MOV_FROM_REG(44)
-	MOV_FROM_REG(45)
-	MOV_FROM_REG(46)
-	MOV_FROM_REG(47)
-	MOV_FROM_REG(48)
-	MOV_FROM_REG(49)
-	MOV_FROM_REG(50)
-	MOV_FROM_REG(51)
-	MOV_FROM_REG(52)
-	MOV_FROM_REG(53)
-	MOV_FROM_REG(54)
-	MOV_FROM_REG(55)
-	MOV_FROM_REG(56)
-	MOV_FROM_REG(57)
-	MOV_FROM_REG(58)
-	MOV_FROM_REG(59)
-	MOV_FROM_REG(60)
-	MOV_FROM_REG(61)
-	MOV_FROM_REG(62)
-	MOV_FROM_REG(63)
-	MOV_FROM_REG(64)
-	MOV_FROM_REG(65)
-	MOV_FROM_REG(66)
-	MOV_FROM_REG(67)
-	MOV_FROM_REG(68)
-	MOV_FROM_REG(69)
-	MOV_FROM_REG(70)
-	MOV_FROM_REG(71)
-	MOV_FROM_REG(72)
-	MOV_FROM_REG(73)
-	MOV_FROM_REG(74)
-	MOV_FROM_REG(75)
-	MOV_FROM_REG(76)
-	MOV_FROM_REG(77)
-	MOV_FROM_REG(78)
-	MOV_FROM_REG(79)
-	MOV_FROM_REG(80)
-	MOV_FROM_REG(81)
-	MOV_FROM_REG(82)
-	MOV_FROM_REG(83)
-	MOV_FROM_REG(84)
-	MOV_FROM_REG(85)
-	MOV_FROM_REG(86)
-	MOV_FROM_REG(87)
-	MOV_FROM_REG(88)
-	MOV_FROM_REG(89)
-	MOV_FROM_REG(90)
-	MOV_FROM_REG(91)
-	MOV_FROM_REG(92)
-	MOV_FROM_REG(93)
-	MOV_FROM_REG(94)
-	MOV_FROM_REG(95)
-	MOV_FROM_REG(96)
-	MOV_FROM_REG(97)
-	MOV_FROM_REG(98)
-	MOV_FROM_REG(99)
-	MOV_FROM_REG(100)
-	MOV_FROM_REG(101)
-	MOV_FROM_REG(102)
-	MOV_FROM_REG(103)
-	MOV_FROM_REG(104)
-	MOV_FROM_REG(105)
-	MOV_FROM_REG(106)
-	MOV_FROM_REG(107)
-	MOV_FROM_REG(108)
-	MOV_FROM_REG(109)
-	MOV_FROM_REG(110)
-	MOV_FROM_REG(111)
-	MOV_FROM_REG(112)
-	MOV_FROM_REG(113)
-	MOV_FROM_REG(114)
-	MOV_FROM_REG(115)
-	MOV_FROM_REG(116)
-	MOV_FROM_REG(117)
-	MOV_FROM_REG(118)
-	MOV_FROM_REG(119)
-	MOV_FROM_REG(120)
-	MOV_FROM_REG(121)
-	MOV_FROM_REG(122)
-	MOV_FROM_REG(123)
-	MOV_FROM_REG(124)
-	MOV_FROM_REG(125)
-	MOV_FROM_REG(126)
-	MOV_FROM_REG(127)
-END(asm_mov_from_reg)
-
-
-/* must be in bank 0
- * parameter:
- * r31: pr
- * r24: b0
- */
-ENTRY(kvm_resume_to_guest_with_sync)
-	adds r19=VMM_VPD_BASE_OFFSET,r21
-	mov r16 = r31
-	mov r17 = r24
-	;;
-{.mii
-	ld8 r25 =[r19]
-	nop 0x0
-	mov r24 = ip
-	;;
-}
-{.mmb
-	add r24 =0x20, r24
-	nop 0x0
-	br.sptk.many kvm_vps_sync_write
-}
-
-	mov r31 = r16
-	mov r24 =r17
-	;;
-	br.sptk.many kvm_resume_to_guest
-END(kvm_resume_to_guest_with_sync)
-
-ENTRY(kvm_resume_to_guest)
-	adds r16 = VMM_VCPU_SAVED_GP_OFFSET,r21
-	;;
-	ld8 r1 =[r16]
-	adds r20 = VMM_VCPU_VSA_BASE_OFFSET,r21
-	;;
-	mov r16=cr.ipsr
-	;;
-	ld8 r20 = [r20]
-	adds r19=VMM_VPD_BASE_OFFSET,r21
-	;;
-	ld8 r25=[r19]
-	extr.u r17=r16,IA64_PSR_RI_BIT,2
-	tbit.nz p6,p7=r16,IA64_PSR_RI_BIT+1
-	;;
-	(p6) mov r18=cr.iip
-	(p6) mov r17=r0
-	;;
-	(p6) add r18=0x10,r18
-	(p7) add r17=1,r17
-	;;
-	(p6) mov cr.iip=r18
-	dep r16=r17,r16,IA64_PSR_RI_BIT,2
-	;;
-	mov cr.ipsr=r16
-	adds r19= VPD_VPSR_START_OFFSET,r25
-	add r28=PAL_VPS_RESUME_NORMAL,r20
-	add r29=PAL_VPS_RESUME_HANDLER,r20
-	;;
-	ld8 r19=[r19]
-	mov b0=r29
-	mov r27=cr.isr
-	;;
-	tbit.z p6,p7 = r19,IA64_PSR_IC_BIT		// p7=vpsr.ic
-	shr r27=r27,IA64_ISR_IR_BIT
-	;;
-	(p6) ld8 r26=[r25]
-	(p7) mov b0=r28
-	;;
-	(p6) dep r26=r27,r26,63,1
-	mov pr=r31,-2
-	br.sptk.many b0             // call pal service
-	;;
-END(kvm_resume_to_guest)
-
-
-MOV_TO_BANK0_REG(16)
-MOV_TO_BANK0_REG(17)
-MOV_TO_BANK0_REG(18)
-MOV_TO_BANK0_REG(19)
-MOV_TO_BANK0_REG(20)
-MOV_TO_BANK0_REG(21)
-MOV_TO_BANK0_REG(22)
-MOV_TO_BANK0_REG(23)
-MOV_TO_BANK0_REG(24)
-MOV_TO_BANK0_REG(25)
-MOV_TO_BANK0_REG(26)
-MOV_TO_BANK0_REG(27)
-MOV_TO_BANK0_REG(28)
-MOV_TO_BANK0_REG(29)
-MOV_TO_BANK0_REG(30)
-MOV_TO_BANK0_REG(31)
-
-
-// mov to reg table
-ENTRY(asm_mov_to_reg)
-	MOV_TO_REG0
-	MOV_TO_REG(1)
-	MOV_TO_REG(2)
-	MOV_TO_REG(3)
-	MOV_TO_REG(4)
-	MOV_TO_REG(5)
-	MOV_TO_REG(6)
-	MOV_TO_REG(7)
-	MOV_TO_REG(8)
-	MOV_TO_REG(9)
-	MOV_TO_REG(10)
-	MOV_TO_REG(11)
-	MOV_TO_REG(12)
-	MOV_TO_REG(13)
-	MOV_TO_REG(14)
-	MOV_TO_REG(15)
-	JMP_TO_MOV_TO_BANK0_REG(16)
-	JMP_TO_MOV_TO_BANK0_REG(17)
-	JMP_TO_MOV_TO_BANK0_REG(18)
-	JMP_TO_MOV_TO_BANK0_REG(19)
-	JMP_TO_MOV_TO_BANK0_REG(20)
-	JMP_TO_MOV_TO_BANK0_REG(21)
-	JMP_TO_MOV_TO_BANK0_REG(22)
-	JMP_TO_MOV_TO_BANK0_REG(23)
-	JMP_TO_MOV_TO_BANK0_REG(24)
-	JMP_TO_MOV_TO_BANK0_REG(25)
-	JMP_TO_MOV_TO_BANK0_REG(26)
-	JMP_TO_MOV_TO_BANK0_REG(27)
-	JMP_TO_MOV_TO_BANK0_REG(28)
-	JMP_TO_MOV_TO_BANK0_REG(29)
-	JMP_TO_MOV_TO_BANK0_REG(30)
-	JMP_TO_MOV_TO_BANK0_REG(31)
-	MOV_TO_REG(32)
-	MOV_TO_REG(33)
-	MOV_TO_REG(34)
-	MOV_TO_REG(35)
-	MOV_TO_REG(36)
-	MOV_TO_REG(37)
-	MOV_TO_REG(38)
-	MOV_TO_REG(39)
-	MOV_TO_REG(40)
-	MOV_TO_REG(41)
-	MOV_TO_REG(42)
-	MOV_TO_REG(43)
-	MOV_TO_REG(44)
-	MOV_TO_REG(45)
-	MOV_TO_REG(46)
-	MOV_TO_REG(47)
-	MOV_TO_REG(48)
-	MOV_TO_REG(49)
-	MOV_TO_REG(50)
-	MOV_TO_REG(51)
-	MOV_TO_REG(52)
-	MOV_TO_REG(53)
-	MOV_TO_REG(54)
-	MOV_TO_REG(55)
-	MOV_TO_REG(56)
-	MOV_TO_REG(57)
-	MOV_TO_REG(58)
-	MOV_TO_REG(59)
-	MOV_TO_REG(60)
-	MOV_TO_REG(61)
-	MOV_TO_REG(62)
-	MOV_TO_REG(63)
-	MOV_TO_REG(64)
-	MOV_TO_REG(65)
-	MOV_TO_REG(66)
-	MOV_TO_REG(67)
-	MOV_TO_REG(68)
-	MOV_TO_REG(69)
-	MOV_TO_REG(70)
-	MOV_TO_REG(71)
-	MOV_TO_REG(72)
-	MOV_TO_REG(73)
-	MOV_TO_REG(74)
-	MOV_TO_REG(75)
-	MOV_TO_REG(76)
-	MOV_TO_REG(77)
-	MOV_TO_REG(78)
-	MOV_TO_REG(79)
-	MOV_TO_REG(80)
-	MOV_TO_REG(81)
-	MOV_TO_REG(82)
-	MOV_TO_REG(83)
-	MOV_TO_REG(84)
-	MOV_TO_REG(85)
-	MOV_TO_REG(86)
-	MOV_TO_REG(87)
-	MOV_TO_REG(88)
-	MOV_TO_REG(89)
-	MOV_TO_REG(90)
-	MOV_TO_REG(91)
-	MOV_TO_REG(92)
-	MOV_TO_REG(93)
-	MOV_TO_REG(94)
-	MOV_TO_REG(95)
-	MOV_TO_REG(96)
-	MOV_TO_REG(97)
-	MOV_TO_REG(98)
-	MOV_TO_REG(99)
-	MOV_TO_REG(100)
-	MOV_TO_REG(101)
-	MOV_TO_REG(102)
-	MOV_TO_REG(103)
-	MOV_TO_REG(104)
-	MOV_TO_REG(105)
-	MOV_TO_REG(106)
-	MOV_TO_REG(107)
-	MOV_TO_REG(108)
-	MOV_TO_REG(109)
-	MOV_TO_REG(110)
-	MOV_TO_REG(111)
-	MOV_TO_REG(112)
-	MOV_TO_REG(113)
-	MOV_TO_REG(114)
-	MOV_TO_REG(115)
-	MOV_TO_REG(116)
-	MOV_TO_REG(117)
-	MOV_TO_REG(118)
-	MOV_TO_REG(119)
-	MOV_TO_REG(120)
-	MOV_TO_REG(121)
-	MOV_TO_REG(122)
-	MOV_TO_REG(123)
-	MOV_TO_REG(124)
-	MOV_TO_REG(125)
-	MOV_TO_REG(126)
-	MOV_TO_REG(127)
-END(asm_mov_to_reg)
diff --git a/arch/ia64/kvm/process.c b/arch/ia64/kvm/process.c
deleted file mode 100644
index b0398740b48..00000000000
--- a/arch/ia64/kvm/process.c
+++ /dev/null
@@ -1,1024 +0,0 @@
-/*
- * process.c: handle interruption inject for guests.
- * Copyright (c) 2005, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- *  	Shaofan Li (Susue Li) <susie.li@intel.com>
- *  	Xiaoyan Feng (Fleming Feng)  <fleming.feng@intel.com>
- *  	Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com)
- *  	Xiantao Zhang (xiantao.zhang@intel.com)
- */
-#include "vcpu.h"
-
-#include <asm/pal.h>
-#include <asm/sal.h>
-#include <asm/fpswa.h>
-#include <asm/kregs.h>
-#include <asm/tlb.h>
-
-fpswa_interface_t *vmm_fpswa_interface;
-
-#define IA64_VHPT_TRANS_VECTOR			0x0000
-#define IA64_INST_TLB_VECTOR			0x0400
-#define IA64_DATA_TLB_VECTOR			0x0800
-#define IA64_ALT_INST_TLB_VECTOR		0x0c00
-#define IA64_ALT_DATA_TLB_VECTOR		0x1000
-#define IA64_DATA_NESTED_TLB_VECTOR		0x1400
-#define IA64_INST_KEY_MISS_VECTOR		0x1800
-#define IA64_DATA_KEY_MISS_VECTOR		0x1c00
-#define IA64_DIRTY_BIT_VECTOR			0x2000
-#define IA64_INST_ACCESS_BIT_VECTOR		0x2400
-#define IA64_DATA_ACCESS_BIT_VECTOR		0x2800
-#define IA64_BREAK_VECTOR			0x2c00
-#define IA64_EXTINT_VECTOR			0x3000
-#define IA64_PAGE_NOT_PRESENT_VECTOR		0x5000
-#define IA64_KEY_PERMISSION_VECTOR		0x5100
-#define IA64_INST_ACCESS_RIGHTS_VECTOR		0x5200
-#define IA64_DATA_ACCESS_RIGHTS_VECTOR		0x5300
-#define IA64_GENEX_VECTOR			0x5400
-#define IA64_DISABLED_FPREG_VECTOR		0x5500
-#define IA64_NAT_CONSUMPTION_VECTOR		0x5600
-#define IA64_SPECULATION_VECTOR		0x5700 /* UNUSED */
-#define IA64_DEBUG_VECTOR			0x5900
-#define IA64_UNALIGNED_REF_VECTOR		0x5a00
-#define IA64_UNSUPPORTED_DATA_REF_VECTOR	0x5b00
-#define IA64_FP_FAULT_VECTOR			0x5c00
-#define IA64_FP_TRAP_VECTOR			0x5d00
-#define IA64_LOWERPRIV_TRANSFER_TRAP_VECTOR 	0x5e00
-#define IA64_TAKEN_BRANCH_TRAP_VECTOR		0x5f00
-#define IA64_SINGLE_STEP_TRAP_VECTOR		0x6000
-
-/* SDM vol2 5.5 - IVA based interruption handling */
-#define INITIAL_PSR_VALUE_AT_INTERRUPTION (IA64_PSR_UP | IA64_PSR_MFL |\
-			IA64_PSR_MFH | IA64_PSR_PK | IA64_PSR_DT |    	\
-			IA64_PSR_RT | IA64_PSR_MC|IA64_PSR_IT)
-
-#define DOMN_PAL_REQUEST    0x110000
-#define DOMN_SAL_REQUEST    0x110001
-
-static u64 vec2off[68] = {0x0, 0x400, 0x800, 0xc00, 0x1000, 0x1400, 0x1800,
-	0x1c00, 0x2000, 0x2400, 0x2800, 0x2c00, 0x3000, 0x3400, 0x3800, 0x3c00,
-	0x4000, 0x4400, 0x4800, 0x4c00, 0x5000, 0x5100, 0x5200, 0x5300, 0x5400,
-	0x5500, 0x5600, 0x5700, 0x5800, 0x5900, 0x5a00, 0x5b00, 0x5c00, 0x5d00,
-	0x5e00, 0x5f00, 0x6000, 0x6100, 0x6200, 0x6300, 0x6400, 0x6500, 0x6600,
-	0x6700, 0x6800, 0x6900, 0x6a00, 0x6b00, 0x6c00, 0x6d00, 0x6e00, 0x6f00,
-	0x7000, 0x7100, 0x7200, 0x7300, 0x7400, 0x7500, 0x7600, 0x7700, 0x7800,
-	0x7900, 0x7a00, 0x7b00, 0x7c00, 0x7d00, 0x7e00, 0x7f00
-};
-
-static void collect_interruption(struct kvm_vcpu *vcpu)
-{
-	u64 ipsr;
-	u64 vdcr;
-	u64 vifs;
-	unsigned long vpsr;
-	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
-
-	vpsr = vcpu_get_psr(vcpu);
-	vcpu_bsw0(vcpu);
-	if (vpsr & IA64_PSR_IC) {
-
-		/* Sync mpsr id/da/dd/ss/ed bits to vipsr
-		 * since after guest do rfi, we still want these bits on in
-		 * mpsr
-		 */
-
-		ipsr = regs->cr_ipsr;
-		vpsr = vpsr | (ipsr & (IA64_PSR_ID | IA64_PSR_DA
-					| IA64_PSR_DD | IA64_PSR_SS
-					| IA64_PSR_ED));
-		vcpu_set_ipsr(vcpu, vpsr);
-
-		/* Currently, for trap, we do not advance IIP to next
-		 * instruction. That's because we assume caller already
-		 * set up IIP correctly
-		 */
-
-		vcpu_set_iip(vcpu , regs->cr_iip);
-
-		/* set vifs.v to zero */
-		vifs = VCPU(vcpu, ifs);
-		vifs &= ~IA64_IFS_V;
-		vcpu_set_ifs(vcpu, vifs);
-
-		vcpu_set_iipa(vcpu, VMX(vcpu, cr_iipa));
-	}
-
-	vdcr = VCPU(vcpu, dcr);
-
-	/* Set guest psr
-	 * up/mfl/mfh/pk/dt/rt/mc/it keeps unchanged
-	 * be: set to the value of dcr.be
-	 * pp: set to the value of dcr.pp
-	 */
-	vpsr &= INITIAL_PSR_VALUE_AT_INTERRUPTION;
-	vpsr |= (vdcr & IA64_DCR_BE);
-
-	/* VDCR pp bit position is different from VPSR pp bit */
-	if (vdcr & IA64_DCR_PP) {
-		vpsr |= IA64_PSR_PP;
-	} else {
-		vpsr &= ~IA64_PSR_PP;
-	}
-
-	vcpu_set_psr(vcpu, vpsr);
-
-}
-
-void inject_guest_interruption(struct kvm_vcpu *vcpu, u64 vec)
-{
-	u64 viva;
-	struct kvm_pt_regs *regs;
-	union ia64_isr pt_isr;
-
-	regs = vcpu_regs(vcpu);
-
-	/* clear cr.isr.ir (incomplete register frame)*/
-	pt_isr.val = VMX(vcpu, cr_isr);
-	pt_isr.ir = 0;
-	VMX(vcpu, cr_isr) = pt_isr.val;
-
-	collect_interruption(vcpu);
-
-	viva = vcpu_get_iva(vcpu);
-	regs->cr_iip = viva + vec;
-}
-
-static u64 vcpu_get_itir_on_fault(struct kvm_vcpu *vcpu, u64 ifa)
-{
-	union ia64_rr rr, rr1;
-
-	rr.val = vcpu_get_rr(vcpu, ifa);
-	rr1.val = 0;
-	rr1.ps = rr.ps;
-	rr1.rid = rr.rid;
-	return (rr1.val);
-}
-
-/*
- * Set vIFA & vITIR & vIHA, when vPSR.ic =1
- * Parameter:
- *  set_ifa: if true, set vIFA
- *  set_itir: if true, set vITIR
- *  set_iha: if true, set vIHA
- */
-void set_ifa_itir_iha(struct kvm_vcpu *vcpu, u64 vadr,
-		int set_ifa, int set_itir, int set_iha)
-{
-	long vpsr;
-	u64 value;
-
-	vpsr = VCPU(vcpu, vpsr);
-	/* Vol2, Table 8-1 */
-	if (vpsr & IA64_PSR_IC) {
-		if (set_ifa)
-			vcpu_set_ifa(vcpu, vadr);
-		if (set_itir) {
-			value = vcpu_get_itir_on_fault(vcpu, vadr);
-			vcpu_set_itir(vcpu, value);
-		}
-
-		if (set_iha) {
-			value = vcpu_thash(vcpu, vadr);
-			vcpu_set_iha(vcpu, value);
-		}
-	}
-}
-
-/*
- * Data TLB Fault
- *  @ Data TLB vector
- * Refer to SDM Vol2 Table 5-6 & 8-1
- */
-void dtlb_fault(struct kvm_vcpu *vcpu, u64 vadr)
-{
-	/* If vPSR.ic, IFA, ITIR, IHA */
-	set_ifa_itir_iha(vcpu, vadr, 1, 1, 1);
-	inject_guest_interruption(vcpu, IA64_DATA_TLB_VECTOR);
-}
-
-/*
- * Instruction TLB Fault
- *  @ Instruction TLB vector
- * Refer to SDM Vol2 Table 5-6 & 8-1
- */
-void itlb_fault(struct kvm_vcpu *vcpu, u64 vadr)
-{
-	/* If vPSR.ic, IFA, ITIR, IHA */
-	set_ifa_itir_iha(vcpu, vadr, 1, 1, 1);
-	inject_guest_interruption(vcpu, IA64_INST_TLB_VECTOR);
-}
-
-/*
- * Data Nested TLB Fault
- *  @ Data Nested TLB Vector
- * Refer to SDM Vol2 Table 5-6 & 8-1
- */
-void nested_dtlb(struct kvm_vcpu *vcpu)
-{
-	inject_guest_interruption(vcpu, IA64_DATA_NESTED_TLB_VECTOR);
-}
-
-/*
- * Alternate Data TLB Fault
- *  @ Alternate Data TLB vector
- * Refer to SDM Vol2 Table 5-6 & 8-1
- */
-void alt_dtlb(struct kvm_vcpu *vcpu, u64 vadr)
-{
-	set_ifa_itir_iha(vcpu, vadr, 1, 1, 0);
-	inject_guest_interruption(vcpu, IA64_ALT_DATA_TLB_VECTOR);
-}
-
-/*
- * Data TLB Fault
- *  @ Data TLB vector
- * Refer to SDM Vol2 Table 5-6 & 8-1
- */
-void alt_itlb(struct kvm_vcpu *vcpu, u64 vadr)
-{
-	set_ifa_itir_iha(vcpu, vadr, 1, 1, 0);
-	inject_guest_interruption(vcpu, IA64_ALT_INST_TLB_VECTOR);
-}
-
-/* Deal with:
- *  VHPT Translation Vector
- */
-static void _vhpt_fault(struct kvm_vcpu *vcpu, u64 vadr)
-{
-	/* If vPSR.ic, IFA, ITIR, IHA*/
-	set_ifa_itir_iha(vcpu, vadr, 1, 1, 1);
-	inject_guest_interruption(vcpu, IA64_VHPT_TRANS_VECTOR);
-}
-
-/*
- * VHPT Instruction Fault
- *  @ VHPT Translation vector
- * Refer to SDM Vol2 Table 5-6 & 8-1
- */
-void ivhpt_fault(struct kvm_vcpu *vcpu, u64 vadr)
-{
-	_vhpt_fault(vcpu, vadr);
-}
-
-/*
- * VHPT Data Fault
- *  @ VHPT Translation vector
- * Refer to SDM Vol2 Table 5-6 & 8-1
- */
-void dvhpt_fault(struct kvm_vcpu *vcpu, u64 vadr)
-{
-	_vhpt_fault(vcpu, vadr);
-}
-
-/*
- * Deal with:
- *  General Exception vector
- */
-void _general_exception(struct kvm_vcpu *vcpu)
-{
-	inject_guest_interruption(vcpu, IA64_GENEX_VECTOR);
-}
-
-/*
- * Illegal Operation Fault
- *  @ General Exception Vector
- * Refer to SDM Vol2 Table 5-6 & 8-1
- */
-void illegal_op(struct kvm_vcpu *vcpu)
-{
-	_general_exception(vcpu);
-}
-
-/*
- * Illegal Dependency Fault
- *  @ General Exception Vector
- * Refer to SDM Vol2 Table 5-6 & 8-1
- */
-void illegal_dep(struct kvm_vcpu *vcpu)
-{
-	_general_exception(vcpu);
-}
-
-/*
- * Reserved Register/Field Fault
- *  @ General Exception Vector
- * Refer to SDM Vol2 Table 5-6 & 8-1
- */
-void rsv_reg_field(struct kvm_vcpu *vcpu)
-{
-	_general_exception(vcpu);
-}
-/*
- * Privileged Operation Fault
- *  @ General Exception Vector
- * Refer to SDM Vol2 Table 5-6 & 8-1
- */
-
-void privilege_op(struct kvm_vcpu *vcpu)
-{
-	_general_exception(vcpu);
-}
-
-/*
- * Unimplement Data Address Fault
- *  @ General Exception Vector
- * Refer to SDM Vol2 Table 5-6 & 8-1
- */
-void unimpl_daddr(struct kvm_vcpu *vcpu)
-{
-	_general_exception(vcpu);
-}
-
-/*
- * Privileged Register Fault
- *  @ General Exception Vector
- * Refer to SDM Vol2 Table 5-6 & 8-1
- */
-void privilege_reg(struct kvm_vcpu *vcpu)
-{
-	_general_exception(vcpu);
-}
-
-/* Deal with
- *  Nat consumption vector
- * Parameter:
- *  vaddr: Optional, if t == REGISTER
- */
-static void _nat_consumption_fault(struct kvm_vcpu *vcpu, u64 vadr,
-						enum tlb_miss_type t)
-{
-	/* If vPSR.ic && t == DATA/INST, IFA */
-	if (t == DATA || t == INSTRUCTION) {
-		/* IFA */
-		set_ifa_itir_iha(vcpu, vadr, 1, 0, 0);
-	}
-
-	inject_guest_interruption(vcpu, IA64_NAT_CONSUMPTION_VECTOR);
-}
-
-/*
- * Instruction Nat Page Consumption Fault
- *  @ Nat Consumption Vector
- * Refer to SDM Vol2 Table 5-6 & 8-1
- */
-void inat_page_consumption(struct kvm_vcpu *vcpu, u64 vadr)
-{
-	_nat_consumption_fault(vcpu, vadr, INSTRUCTION);
-}
-
-/*
- * Register Nat Consumption Fault
- *  @ Nat Consumption Vector
- * Refer to SDM Vol2 Table 5-6 & 8-1
- */
-void rnat_consumption(struct kvm_vcpu *vcpu)
-{
-	_nat_consumption_fault(vcpu, 0, REGISTER);
-}
-
-/*
- * Data Nat Page Consumption Fault
- *  @ Nat Consumption Vector
- * Refer to SDM Vol2 Table 5-6 & 8-1
- */
-void dnat_page_consumption(struct kvm_vcpu *vcpu, u64 vadr)
-{
-	_nat_consumption_fault(vcpu, vadr, DATA);
-}
-
-/* Deal with
- *  Page not present vector
- */
-static void __page_not_present(struct kvm_vcpu *vcpu, u64 vadr)
-{
-	/* If vPSR.ic, IFA, ITIR */
-	set_ifa_itir_iha(vcpu, vadr, 1, 1, 0);
-	inject_guest_interruption(vcpu, IA64_PAGE_NOT_PRESENT_VECTOR);
-}
-
-void data_page_not_present(struct kvm_vcpu *vcpu, u64 vadr)
-{
-	__page_not_present(vcpu, vadr);
-}
-
-void inst_page_not_present(struct kvm_vcpu *vcpu, u64 vadr)
-{
-	__page_not_present(vcpu, vadr);
-}
-
-/* Deal with
- *  Data access rights vector
- */
-void data_access_rights(struct kvm_vcpu *vcpu, u64 vadr)
-{
-	/* If vPSR.ic, IFA, ITIR */
-	set_ifa_itir_iha(vcpu, vadr, 1, 1, 0);
-	inject_guest_interruption(vcpu, IA64_DATA_ACCESS_RIGHTS_VECTOR);
-}
-
-fpswa_ret_t vmm_fp_emulate(int fp_fault, void *bundle, unsigned long *ipsr,
-		unsigned long *fpsr, unsigned long *isr, unsigned long *pr,
-		unsigned long *ifs, struct kvm_pt_regs *regs)
-{
-	fp_state_t fp_state;
-	fpswa_ret_t ret;
-	struct kvm_vcpu *vcpu = current_vcpu;
-
-	uint64_t old_rr7 = ia64_get_rr(7UL<<61);
-
-	if (!vmm_fpswa_interface)
-		return (fpswa_ret_t) {-1, 0, 0, 0};
-
-	memset(&fp_state, 0, sizeof(fp_state_t));
-
-	/*
-	 * compute fp_state.  only FP registers f6 - f11 are used by the
-	 * vmm, so set those bits in the mask and set the low volatile
-	 * pointer to point to these registers.
-	 */
-	fp_state.bitmask_low64 = 0xfc0;  /* bit6..bit11 */
-
-	fp_state.fp_state_low_volatile = (fp_state_low_volatile_t *) &regs->f6;
-
-   /*
-	 * unsigned long (*EFI_FPSWA) (
-	 *      unsigned long    trap_type,
-	 *      void             *Bundle,
-	 *      unsigned long    *pipsr,
-	 *      unsigned long    *pfsr,
-	 *      unsigned long    *pisr,
-	 *      unsigned long    *ppreds,
-	 *      unsigned long    *pifs,
-	 *      void             *fp_state);
-	 */
-	/*Call host fpswa interface directly to virtualize
-	 *guest fpswa request!
-	 */
-	ia64_set_rr(7UL << 61, vcpu->arch.host.rr[7]);
-	ia64_srlz_d();
-
-	ret = (*vmm_fpswa_interface->fpswa) (fp_fault, bundle,
-			ipsr, fpsr, isr, pr, ifs, &fp_state);
-	ia64_set_rr(7UL << 61, old_rr7);
-	ia64_srlz_d();
-	return ret;
-}
-
-/*
- * Handle floating-point assist faults and traps for domain.
- */
-unsigned long vmm_handle_fpu_swa(int fp_fault, struct kvm_pt_regs *regs,
-					unsigned long isr)
-{
-	struct kvm_vcpu *v = current_vcpu;
-	IA64_BUNDLE bundle;
-	unsigned long fault_ip;
-	fpswa_ret_t ret;
-
-	fault_ip = regs->cr_iip;
-	/*
-	 * When the FP trap occurs, the trapping instruction is completed.
-	 * If ipsr.ri == 0, there is the trapping instruction in previous
-	 * bundle.
-	 */
-	if (!fp_fault && (ia64_psr(regs)->ri == 0))
-		fault_ip -= 16;
-
-	if (fetch_code(v, fault_ip, &bundle))
-		return -EAGAIN;
-
-	if (!bundle.i64[0] && !bundle.i64[1])
-		return -EACCES;
-
-	ret = vmm_fp_emulate(fp_fault, &bundle, &regs->cr_ipsr, &regs->ar_fpsr,
-			&isr, &regs->pr, &regs->cr_ifs, regs);
-	return ret.status;
-}
-
-void reflect_interruption(u64 ifa, u64 isr, u64 iim,
-		u64 vec, struct kvm_pt_regs *regs)
-{
-	u64 vector;
-	int status ;
-	struct kvm_vcpu *vcpu = current_vcpu;
-	u64 vpsr = VCPU(vcpu, vpsr);
-
-	vector = vec2off[vec];
-
-	if (!(vpsr & IA64_PSR_IC) && (vector != IA64_DATA_NESTED_TLB_VECTOR)) {
-		panic_vm(vcpu, "Interruption with vector :0x%lx occurs "
-						"with psr.ic = 0\n", vector);
-		return;
-	}
-
-	switch (vec) {
-	case 32: 	/*IA64_FP_FAULT_VECTOR*/
-		status = vmm_handle_fpu_swa(1, regs, isr);
-		if (!status) {
-			vcpu_increment_iip(vcpu);
-			return;
-		} else if (-EAGAIN == status)
-			return;
-		break;
-	case 33:	/*IA64_FP_TRAP_VECTOR*/
-		status = vmm_handle_fpu_swa(0, regs, isr);
-		if (!status)
-			return ;
-		break;
-	}
-
-	VCPU(vcpu, isr) = isr;
-	VCPU(vcpu, iipa) = regs->cr_iip;
-	if (vector == IA64_BREAK_VECTOR || vector == IA64_SPECULATION_VECTOR)
-		VCPU(vcpu, iim) = iim;
-	else
-		set_ifa_itir_iha(vcpu, ifa, 1, 1, 1);
-
-	inject_guest_interruption(vcpu, vector);
-}
-
-static unsigned long kvm_trans_pal_call_args(struct kvm_vcpu *vcpu,
-						unsigned long arg)
-{
-	struct thash_data *data;
-	unsigned long gpa, poff;
-
-	if (!is_physical_mode(vcpu)) {
-		/* Depends on caller to provide the DTR or DTC mapping.*/
-		data = vtlb_lookup(vcpu, arg, D_TLB);
-		if (data)
-			gpa = data->page_flags & _PAGE_PPN_MASK;
-		else {
-			data = vhpt_lookup(arg);
-			if (!data)
-				return 0;
-			gpa = data->gpaddr & _PAGE_PPN_MASK;
-		}
-
-		poff = arg & (PSIZE(data->ps) - 1);
-		arg = PAGEALIGN(gpa, data->ps) | poff;
-	}
-	arg = kvm_gpa_to_mpa(arg << 1 >> 1);
-
-	return (unsigned long)__va(arg);
-}
-
-static void set_pal_call_data(struct kvm_vcpu *vcpu)
-{
-	struct exit_ctl_data *p = &vcpu->arch.exit_data;
-	unsigned long gr28 = vcpu_get_gr(vcpu, 28);
-	unsigned long gr29 = vcpu_get_gr(vcpu, 29);
-	unsigned long gr30 = vcpu_get_gr(vcpu, 30);
-
-	/*FIXME:For static and stacked convention, firmware
-	 * has put the parameters in gr28-gr31 before
-	 * break to vmm  !!*/
-
-	switch (gr28) {
-	case PAL_PERF_MON_INFO:
-	case PAL_HALT_INFO:
-		p->u.pal_data.gr29 =  kvm_trans_pal_call_args(vcpu, gr29);
-		p->u.pal_data.gr30 = vcpu_get_gr(vcpu, 30);
-		break;
-	case PAL_BRAND_INFO:
-		p->u.pal_data.gr29 = gr29;
-		p->u.pal_data.gr30 = kvm_trans_pal_call_args(vcpu, gr30);
-		break;
-	default:
-		p->u.pal_data.gr29 = gr29;
-		p->u.pal_data.gr30 = vcpu_get_gr(vcpu, 30);
-	}
-	p->u.pal_data.gr28 = gr28;
-	p->u.pal_data.gr31 = vcpu_get_gr(vcpu, 31);
-
-	p->exit_reason = EXIT_REASON_PAL_CALL;
-}
-
-static void get_pal_call_result(struct kvm_vcpu *vcpu)
-{
-	struct exit_ctl_data *p = &vcpu->arch.exit_data;
-
-	if (p->exit_reason == EXIT_REASON_PAL_CALL) {
-		vcpu_set_gr(vcpu, 8, p->u.pal_data.ret.status, 0);
-		vcpu_set_gr(vcpu, 9, p->u.pal_data.ret.v0, 0);
-		vcpu_set_gr(vcpu, 10, p->u.pal_data.ret.v1, 0);
-		vcpu_set_gr(vcpu, 11, p->u.pal_data.ret.v2, 0);
-	} else
-		panic_vm(vcpu, "Mis-set for exit reason!\n");
-}
-
-static void set_sal_call_data(struct kvm_vcpu *vcpu)
-{
-	struct exit_ctl_data *p = &vcpu->arch.exit_data;
-
-	p->u.sal_data.in0 = vcpu_get_gr(vcpu, 32);
-	p->u.sal_data.in1 = vcpu_get_gr(vcpu, 33);
-	p->u.sal_data.in2 = vcpu_get_gr(vcpu, 34);
-	p->u.sal_data.in3 = vcpu_get_gr(vcpu, 35);
-	p->u.sal_data.in4 = vcpu_get_gr(vcpu, 36);
-	p->u.sal_data.in5 = vcpu_get_gr(vcpu, 37);
-	p->u.sal_data.in6 = vcpu_get_gr(vcpu, 38);
-	p->u.sal_data.in7 = vcpu_get_gr(vcpu, 39);
-	p->exit_reason = EXIT_REASON_SAL_CALL;
-}
-
-static void get_sal_call_result(struct kvm_vcpu *vcpu)
-{
-	struct exit_ctl_data *p = &vcpu->arch.exit_data;
-
-	if (p->exit_reason == EXIT_REASON_SAL_CALL) {
-		vcpu_set_gr(vcpu, 8, p->u.sal_data.ret.r8, 0);
-		vcpu_set_gr(vcpu, 9, p->u.sal_data.ret.r9, 0);
-		vcpu_set_gr(vcpu, 10, p->u.sal_data.ret.r10, 0);
-		vcpu_set_gr(vcpu, 11, p->u.sal_data.ret.r11, 0);
-	} else
-		panic_vm(vcpu, "Mis-set for exit reason!\n");
-}
-
-void  kvm_ia64_handle_break(unsigned long ifa, struct kvm_pt_regs *regs,
-		unsigned long isr, unsigned long iim)
-{
-	struct kvm_vcpu *v = current_vcpu;
-	long psr;
-
-	if (ia64_psr(regs)->cpl == 0) {
-		/* Allow hypercalls only when cpl = 0.  */
-		if (iim == DOMN_PAL_REQUEST) {
-			local_irq_save(psr);
-			set_pal_call_data(v);
-			vmm_transition(v);
-			get_pal_call_result(v);
-			vcpu_increment_iip(v);
-			local_irq_restore(psr);
-			return;
-		} else if (iim == DOMN_SAL_REQUEST) {
-			local_irq_save(psr);
-			set_sal_call_data(v);
-			vmm_transition(v);
-			get_sal_call_result(v);
-			vcpu_increment_iip(v);
-			local_irq_restore(psr);
-			return;
-		}
-	}
-	reflect_interruption(ifa, isr, iim, 11, regs);
-}
-
-void check_pending_irq(struct kvm_vcpu *vcpu)
-{
-	int  mask, h_pending, h_inservice;
-	u64 isr;
-	unsigned long  vpsr;
-	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
-
-	h_pending = highest_pending_irq(vcpu);
-	if (h_pending == NULL_VECTOR) {
-		update_vhpi(vcpu, NULL_VECTOR);
-		return;
-	}
-	h_inservice = highest_inservice_irq(vcpu);
-
-	vpsr = VCPU(vcpu, vpsr);
-	mask = irq_masked(vcpu, h_pending, h_inservice);
-	if ((vpsr & IA64_PSR_I) && IRQ_NO_MASKED == mask) {
-		isr = vpsr & IA64_PSR_RI;
-		update_vhpi(vcpu, h_pending);
-		reflect_interruption(0, isr, 0, 12, regs); /* EXT IRQ */
-	} else if (mask == IRQ_MASKED_BY_INSVC) {
-		if (VCPU(vcpu, vhpi))
-			update_vhpi(vcpu, NULL_VECTOR);
-	} else {
-		/* masked by vpsr.i or vtpr.*/
-		update_vhpi(vcpu, h_pending);
-	}
-}
-
-static void generate_exirq(struct kvm_vcpu *vcpu)
-{
-	unsigned  vpsr;
-	uint64_t isr;
-
-	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
-
-	vpsr = VCPU(vcpu, vpsr);
-	isr = vpsr & IA64_PSR_RI;
-	if (!(vpsr & IA64_PSR_IC))
-		panic_vm(vcpu, "Trying to inject one IRQ with psr.ic=0\n");
-	reflect_interruption(0, isr, 0, 12, regs); /* EXT IRQ */
-}
-
-void vhpi_detection(struct kvm_vcpu *vcpu)
-{
-	uint64_t    threshold, vhpi;
-	union ia64_tpr       vtpr;
-	struct ia64_psr vpsr;
-
-	vpsr = *(struct ia64_psr *)&VCPU(vcpu, vpsr);
-	vtpr.val = VCPU(vcpu, tpr);
-
-	threshold = ((!vpsr.i) << 5) | (vtpr.mmi << 4) | vtpr.mic;
-	vhpi = VCPU(vcpu, vhpi);
-	if (vhpi > threshold) {
-		/* interrupt actived*/
-		generate_exirq(vcpu);
-	}
-}
-
-void leave_hypervisor_tail(void)
-{
-	struct kvm_vcpu *v = current_vcpu;
-
-	if (VMX(v, timer_check)) {
-		VMX(v, timer_check) = 0;
-		if (VMX(v, itc_check)) {
-			if (vcpu_get_itc(v) > VCPU(v, itm)) {
-				if (!(VCPU(v, itv) & (1 << 16))) {
-					vcpu_pend_interrupt(v, VCPU(v, itv)
-							& 0xff);
-					VMX(v, itc_check) = 0;
-				} else {
-					v->arch.timer_pending = 1;
-				}
-				VMX(v, last_itc) = VCPU(v, itm) + 1;
-			}
-		}
-	}
-
-	rmb();
-	if (v->arch.irq_new_pending) {
-		v->arch.irq_new_pending = 0;
-		VMX(v, irq_check) = 0;
-		check_pending_irq(v);
-		return;
-	}
-	if (VMX(v, irq_check)) {
-		VMX(v, irq_check) = 0;
-		vhpi_detection(v);
-	}
-}
-
-static inline void handle_lds(struct kvm_pt_regs *regs)
-{
-	regs->cr_ipsr |= IA64_PSR_ED;
-}
-
-void physical_tlb_miss(struct kvm_vcpu *vcpu, unsigned long vadr, int type)
-{
-	unsigned long pte;
-	union ia64_rr rr;
-
-	rr.val = ia64_get_rr(vadr);
-	pte =  vadr & _PAGE_PPN_MASK;
-	pte = pte | PHY_PAGE_WB;
-	thash_vhpt_insert(vcpu, pte, (u64)(rr.ps << 2), vadr, type);
-	return;
-}
-
-void kvm_page_fault(u64 vadr , u64 vec, struct kvm_pt_regs *regs)
-{
-	unsigned long vpsr;
-	int type;
-
-	u64 vhpt_adr, gppa, pteval, rr, itir;
-	union ia64_isr misr;
-	union ia64_pta vpta;
-	struct thash_data *data;
-	struct kvm_vcpu *v = current_vcpu;
-
-	vpsr = VCPU(v, vpsr);
-	misr.val = VMX(v, cr_isr);
-
-	type = vec;
-
-	if (is_physical_mode(v) && (!(vadr << 1 >> 62))) {
-		if (vec == 2) {
-			if (__gpfn_is_io((vadr << 1) >> (PAGE_SHIFT + 1))) {
-				emulate_io_inst(v, ((vadr << 1) >> 1), 4);
-				return;
-			}
-		}
-		physical_tlb_miss(v, vadr, type);
-		return;
-	}
-	data = vtlb_lookup(v, vadr, type);
-	if (data != 0) {
-		if (type == D_TLB) {
-			gppa = (vadr & ((1UL << data->ps) - 1))
-				+ (data->ppn >> (data->ps - 12) << data->ps);
-			if (__gpfn_is_io(gppa >> PAGE_SHIFT)) {
-				if (data->pl >= ((regs->cr_ipsr >>
-						IA64_PSR_CPL0_BIT) & 3))
-					emulate_io_inst(v, gppa, data->ma);
-				else {
-					vcpu_set_isr(v, misr.val);
-					data_access_rights(v, vadr);
-				}
-				return ;
-			}
-		}
-		thash_vhpt_insert(v, data->page_flags, data->itir, vadr, type);
-
-	} else if (type == D_TLB) {
-		if (misr.sp) {
-			handle_lds(regs);
-			return;
-		}
-
-		rr = vcpu_get_rr(v, vadr);
-		itir = rr & (RR_RID_MASK | RR_PS_MASK);
-
-		if (!vhpt_enabled(v, vadr, misr.rs ? RSE_REF : DATA_REF)) {
-			if (vpsr & IA64_PSR_IC) {
-				vcpu_set_isr(v, misr.val);
-				alt_dtlb(v, vadr);
-			} else {
-				nested_dtlb(v);
-			}
-			return ;
-		}
-
-		vpta.val = vcpu_get_pta(v);
-		/* avoid recursively walking (short format) VHPT */
-
-		vhpt_adr = vcpu_thash(v, vadr);
-		if (!guest_vhpt_lookup(vhpt_adr, &pteval)) {
-			/* VHPT successfully read.  */
-			if (!(pteval & _PAGE_P)) {
-				if (vpsr & IA64_PSR_IC) {
-					vcpu_set_isr(v, misr.val);
-					dtlb_fault(v, vadr);
-				} else {
-					nested_dtlb(v);
-				}
-			} else if ((pteval & _PAGE_MA_MASK) != _PAGE_MA_ST) {
-				thash_purge_and_insert(v, pteval, itir,
-								vadr, D_TLB);
-			} else if (vpsr & IA64_PSR_IC) {
-				vcpu_set_isr(v, misr.val);
-				dtlb_fault(v, vadr);
-			} else {
-				nested_dtlb(v);
-			}
-		} else {
-			/* Can't read VHPT.  */
-			if (vpsr & IA64_PSR_IC) {
-				vcpu_set_isr(v, misr.val);
-				dvhpt_fault(v, vadr);
-			} else {
-				nested_dtlb(v);
-			}
-		}
-	} else if (type == I_TLB) {
-		if (!(vpsr & IA64_PSR_IC))
-			misr.ni = 1;
-		if (!vhpt_enabled(v, vadr, INST_REF)) {
-			vcpu_set_isr(v, misr.val);
-			alt_itlb(v, vadr);
-			return;
-		}
-
-		vpta.val = vcpu_get_pta(v);
-
-		vhpt_adr = vcpu_thash(v, vadr);
-		if (!guest_vhpt_lookup(vhpt_adr, &pteval)) {
-			/* VHPT successfully read.  */
-			if (pteval & _PAGE_P) {
-				if ((pteval & _PAGE_MA_MASK) == _PAGE_MA_ST) {
-					vcpu_set_isr(v, misr.val);
-					itlb_fault(v, vadr);
-					return ;
-				}
-				rr = vcpu_get_rr(v, vadr);
-				itir = rr & (RR_RID_MASK | RR_PS_MASK);
-				thash_purge_and_insert(v, pteval, itir,
-							vadr, I_TLB);
-			} else {
-				vcpu_set_isr(v, misr.val);
-				inst_page_not_present(v, vadr);
-			}
-		} else {
-			vcpu_set_isr(v, misr.val);
-			ivhpt_fault(v, vadr);
-		}
-	}
-}
-
-void kvm_vexirq(struct kvm_vcpu *vcpu)
-{
-	u64 vpsr, isr;
-	struct kvm_pt_regs *regs;
-
-	regs = vcpu_regs(vcpu);
-	vpsr = VCPU(vcpu, vpsr);
-	isr = vpsr & IA64_PSR_RI;
-	reflect_interruption(0, isr, 0, 12, regs); /*EXT IRQ*/
-}
-
-void kvm_ia64_handle_irq(struct kvm_vcpu *v)
-{
-	struct exit_ctl_data *p = &v->arch.exit_data;
-	long psr;
-
-	local_irq_save(psr);
-	p->exit_reason = EXIT_REASON_EXTERNAL_INTERRUPT;
-	vmm_transition(v);
-	local_irq_restore(psr);
-
-	VMX(v, timer_check) = 1;
-
-}
-
-static void ptc_ga_remote_func(struct kvm_vcpu *v, int pos)
-{
-	u64 oldrid, moldrid, oldpsbits, vaddr;
-	struct kvm_ptc_g *p = &v->arch.ptc_g_data[pos];
-	vaddr = p->vaddr;
-
-	oldrid = VMX(v, vrr[0]);
-	VMX(v, vrr[0]) = p->rr;
-	oldpsbits = VMX(v, psbits[0]);
-	VMX(v, psbits[0]) = VMX(v, psbits[REGION_NUMBER(vaddr)]);
-	moldrid = ia64_get_rr(0x0);
-	ia64_set_rr(0x0, vrrtomrr(p->rr));
-	ia64_srlz_d();
-
-	vaddr = PAGEALIGN(vaddr, p->ps);
-	thash_purge_entries_remote(v, vaddr, p->ps);
-
-	VMX(v, vrr[0]) = oldrid;
-	VMX(v, psbits[0]) = oldpsbits;
-	ia64_set_rr(0x0, moldrid);
-	ia64_dv_serialize_data();
-}
-
-static void vcpu_do_resume(struct kvm_vcpu *vcpu)
-{
-	/*Re-init VHPT and VTLB once from resume*/
-	vcpu->arch.vhpt.num = VHPT_NUM_ENTRIES;
-	thash_init(&vcpu->arch.vhpt, VHPT_SHIFT);
-	vcpu->arch.vtlb.num = VTLB_NUM_ENTRIES;
-	thash_init(&vcpu->arch.vtlb, VTLB_SHIFT);
-
-	ia64_set_pta(vcpu->arch.vhpt.pta.val);
-}
-
-static void vmm_sanity_check(struct kvm_vcpu *vcpu)
-{
-	struct exit_ctl_data *p = &vcpu->arch.exit_data;
-
-	if (!vmm_sanity && p->exit_reason != EXIT_REASON_DEBUG) {
-		panic_vm(vcpu, "Failed to do vmm sanity check,"
-			"it maybe caused by crashed vmm!!\n\n");
-	}
-}
-
-static void kvm_do_resume_op(struct kvm_vcpu *vcpu)
-{
-	vmm_sanity_check(vcpu); /*Guarantee vcpu running on healthy vmm!*/
-
-	if (test_and_clear_bit(KVM_REQ_RESUME, &vcpu->requests)) {
-		vcpu_do_resume(vcpu);
-		return;
-	}
-
-	if (unlikely(test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests))) {
-		thash_purge_all(vcpu);
-		return;
-	}
-
-	if (test_and_clear_bit(KVM_REQ_PTC_G, &vcpu->requests)) {
-		while (vcpu->arch.ptc_g_count > 0)
-			ptc_ga_remote_func(vcpu, --vcpu->arch.ptc_g_count);
-	}
-}
-
-void vmm_transition(struct kvm_vcpu *vcpu)
-{
-	ia64_call_vsa(PAL_VPS_SAVE, (unsigned long)vcpu->arch.vpd,
-			1, 0, 0, 0, 0, 0);
-	vmm_trampoline(&vcpu->arch.guest, &vcpu->arch.host);
-	ia64_call_vsa(PAL_VPS_RESTORE, (unsigned long)vcpu->arch.vpd,
-						1, 0, 0, 0, 0, 0);
-	kvm_do_resume_op(vcpu);
-}
-
-void vmm_panic_handler(u64 vec)
-{
-	struct kvm_vcpu *vcpu = current_vcpu;
-	vmm_sanity = 0;
-	panic_vm(vcpu, "Unexpected interruption occurs in VMM, vector:0x%lx\n",
-			vec2off[vec]);
-}
diff --git a/arch/ia64/kvm/trampoline.S b/arch/ia64/kvm/trampoline.S
deleted file mode 100644
index 30897d44d61..00000000000
--- a/arch/ia64/kvm/trampoline.S
+++ /dev/null
@@ -1,1038 +0,0 @@
-/* Save all processor states
- *
- * Copyright (c) 2007 Fleming Feng <fleming.feng@intel.com>
- * Copyright (c) 2007 Anthony Xu   <anthony.xu@intel.com>
- */
-
-#include <asm/asmmacro.h>
-#include "asm-offsets.h"
-
-
-#define CTX(name)    VMM_CTX_##name##_OFFSET
-
-	/*
-	 *	r32:		context_t base address
-	 */
-#define	SAVE_BRANCH_REGS			\
-	add	r2 = CTX(B0),r32;		\
-	add	r3 = CTX(B1),r32;		\
-	mov	r16 = b0;			\
-	mov	r17 = b1;			\
-	;;					\
-	st8	[r2]=r16,16;			\
-	st8	[r3]=r17,16;			\
-	;;					\
-	mov	r16 = b2;			\
-	mov	r17 = b3;			\
-	;;					\
-	st8	[r2]=r16,16;			\
-	st8	[r3]=r17,16;			\
-	;;					\
-	mov	r16 = b4;			\
-	mov	r17 = b5;			\
-	;;					\
-	st8	[r2]=r16;   			\
-	st8	[r3]=r17;   			\
-	;;
-
-	/*
-	 *	r33:		context_t base address
-	 */
-#define	RESTORE_BRANCH_REGS			\
-	add	r2 = CTX(B0),r33;		\
-	add	r3 = CTX(B1),r33;		\
-	;;					\
-	ld8	r16=[r2],16;			\
-	ld8	r17=[r3],16;			\
-	;;					\
-	mov	b0 = r16;			\
-	mov	b1 = r17;			\
-	;;					\
-	ld8	r16=[r2],16;			\
-	ld8	r17=[r3],16;			\
-	;;					\
-	mov	b2 = r16;			\
-	mov	b3 = r17;			\
-	;;					\
-	ld8	r16=[r2];   			\
-	ld8	r17=[r3];   			\
-	;;					\
-	mov	b4=r16;				\
-	mov	b5=r17;				\
-	;;
-
-
-	/*
-	 *	r32: context_t base address
-	 *	bsw == 1
-	 *	Save all bank1 general registers, r4 ~ r7
-	 */
-#define	SAVE_GENERAL_REGS			\
-	add	r2=CTX(R4),r32;			\
-	add	r3=CTX(R5),r32;			\
-	;;					\
-.mem.offset 0,0;        			\
-	st8.spill	[r2]=r4,16;		\
-.mem.offset 8,0;        			\
-	st8.spill	[r3]=r5,16;		\
-	;;					\
-.mem.offset 0,0;        			\
-	st8.spill	[r2]=r6,48;		\
-.mem.offset 8,0;        			\
-	st8.spill	[r3]=r7,48;		\
-	;;                          		\
-.mem.offset 0,0;        			\
-    st8.spill    [r2]=r12;			\
-.mem.offset 8,0;				\
-    st8.spill    [r3]=r13;			\
-    ;;
-
-	/*
-	 *	r33: context_t base address
-	 *	bsw == 1
-	 */
-#define	RESTORE_GENERAL_REGS			\
-	add	r2=CTX(R4),r33;			\
-	add	r3=CTX(R5),r33;			\
-	;;					\
-	ld8.fill	r4=[r2],16;		\
-	ld8.fill	r5=[r3],16;		\
-	;;					\
-	ld8.fill	r6=[r2],48;		\
-	ld8.fill	r7=[r3],48;		\
-	;;					\
-	ld8.fill    r12=[r2];			\
-	ld8.fill    r13 =[r3];			\
-	;;
-
-
-
-
-	/*
-	 *	r32:		context_t base address
-	 */
-#define	SAVE_KERNEL_REGS			\
-	add	r2 = CTX(KR0),r32;		\
-	add	r3 = CTX(KR1),r32;		\
-	mov	r16 = ar.k0;			\
-	mov	r17 = ar.k1;			\
-	;;		        		\
-	st8	[r2] = r16,16;			\
-	st8	[r3] = r17,16;			\
-	;;		        		\
-	mov	r16 = ar.k2;			\
-	mov	r17 = ar.k3;			\
-	;;		        		\
-	st8	[r2] = r16,16;			\
-	st8	[r3] = r17,16;			\
-	;;					\
-	mov	r16 = ar.k4;			\
-	mov	r17 = ar.k5;			\
-	;;				    	\
-	st8	[r2] = r16,16;			\
-	st8	[r3] = r17,16;			\
-	;;					\
-	mov	r16 = ar.k6;			\
-	mov	r17 = ar.k7;			\
-	;;		    			\
-	st8	[r2] = r16;     		\
-	st8	[r3] = r17;			\
-	;;
-
-
-
-	/*
-	 *	r33:		context_t base address
-	 */
-#define	RESTORE_KERNEL_REGS			\
-	add	r2 = CTX(KR0),r33;		\
-	add	r3 = CTX(KR1),r33;		\
-	;;		    			\
-	ld8	r16=[r2],16;     		\
-	ld8	r17=[r3],16;			\
-	;;					\
-	mov	ar.k0=r16;  			\
-	mov	ar.k1=r17;	    		\
-	;;		        		\
-	ld8	r16=[r2],16;			\
-	ld8	r17=[r3],16;			\
-	;;		        		\
-	mov	ar.k2=r16;   			\
-	mov	ar.k3=r17;	    		\
-	;;		        		\
-	ld8	r16=[r2],16;			\
-	ld8	r17=[r3],16;			\
-	;;					\
-	mov	ar.k4=r16;			\
-	mov	ar.k5=r17;	    		\
-	;;				    	\
-	ld8	r16=[r2],16;			\
-	ld8	r17=[r3],16;			\
-	;;					\
-	mov	ar.k6=r16;  			\
-	mov	ar.k7=r17;	    		\
-	;;
-
-
-
-	/*
-	 *	r32:		context_t base address
-	 */
-#define	SAVE_APP_REGS				\
-	add  r2 = CTX(BSPSTORE),r32;		\
-	mov  r16 = ar.bspstore;			\
-	;;					\
-	st8  [r2] = r16,CTX(RNAT)-CTX(BSPSTORE);\
-	mov  r16 = ar.rnat;			\
-	;;					\
-	st8  [r2] = r16,CTX(FCR)-CTX(RNAT);	\
-	mov  r16 = ar.fcr;			\
-	;;					\
-	st8  [r2] = r16,CTX(EFLAG)-CTX(FCR);	\
-	mov  r16 = ar.eflag;			\
-	;;					\
-	st8  [r2] = r16,CTX(CFLG)-CTX(EFLAG);	\
-	mov  r16 = ar.cflg;			\
-	;;					\
-	st8  [r2] = r16,CTX(FSR)-CTX(CFLG);	\
-	mov  r16 = ar.fsr;			\
-	;;					\
-	st8  [r2] = r16,CTX(FIR)-CTX(FSR);	\
-	mov  r16 = ar.fir;			\
-	;;					\
-	st8  [r2] = r16,CTX(FDR)-CTX(FIR);	\
-	mov  r16 = ar.fdr;			\
-	;;					\
-	st8  [r2] = r16,CTX(UNAT)-CTX(FDR);	\
-	mov  r16 = ar.unat;			\
-	;;					\
-	st8  [r2] = r16,CTX(FPSR)-CTX(UNAT);	\
-	mov  r16 = ar.fpsr;			\
-	;;					\
-	st8  [r2] = r16,CTX(PFS)-CTX(FPSR);	\
-	mov  r16 = ar.pfs;			\
-	;;					\
-	st8  [r2] = r16,CTX(LC)-CTX(PFS);	\
-	mov  r16 = ar.lc;			\
-	;;					\
-	st8  [r2] = r16;			\
-	;;
-
-	/*
-	 *	r33:		context_t base address
-	 */
-#define	RESTORE_APP_REGS			\
-	add  r2=CTX(BSPSTORE),r33;		\
-	;;					\
-	ld8  r16=[r2],CTX(RNAT)-CTX(BSPSTORE);	\
-	;;					\
-	mov  ar.bspstore=r16;			\
-	ld8  r16=[r2],CTX(FCR)-CTX(RNAT);	\
-	;;					\
-	mov  ar.rnat=r16;			\
-	ld8  r16=[r2],CTX(EFLAG)-CTX(FCR);	\
-	;;					\
-	mov  ar.fcr=r16;			\
-	ld8  r16=[r2],CTX(CFLG)-CTX(EFLAG);	\
-	;;					\
-	mov  ar.eflag=r16;			\
-	ld8  r16=[r2],CTX(FSR)-CTX(CFLG);	\
-	;;					\
-	mov  ar.cflg=r16;			\
-	ld8  r16=[r2],CTX(FIR)-CTX(FSR);	\
-	;;					\
-	mov  ar.fsr=r16;			\
-	ld8  r16=[r2],CTX(FDR)-CTX(FIR);	\
-	;;					\
-	mov  ar.fir=r16;			\
-	ld8  r16=[r2],CTX(UNAT)-CTX(FDR);	\
-	;;					\
-	mov  ar.fdr=r16;			\
-	ld8  r16=[r2],CTX(FPSR)-CTX(UNAT);	\
-	;;					\
-	mov  ar.unat=r16;			\
-	ld8  r16=[r2],CTX(PFS)-CTX(FPSR);	\
-	;;					\
-	mov  ar.fpsr=r16;			\
-	ld8  r16=[r2],CTX(LC)-CTX(PFS);		\
-	;;					\
-	mov  ar.pfs=r16;			\
-	ld8  r16=[r2];				\
-	;;					\
-	mov  ar.lc=r16;				\
-	;;
-
-	/*
-	 *	r32:		context_t base address
-	 */
-#define	SAVE_CTL_REGS				\
-	add	r2 = CTX(DCR),r32;		\
-	mov	r16 = cr.dcr;			\
-	;;					\
-	st8	[r2] = r16,CTX(IVA)-CTX(DCR);	\
-	;;                          		\
-	mov	r16 = cr.iva;			\
-	;;					\
-	st8	[r2] = r16,CTX(PTA)-CTX(IVA);	\
-	;;					\
-	mov r16 = cr.pta;			\
-	;;					\
-	st8 [r2] = r16 ;			\
-	;;
-
-	/*
-	 *	r33:		context_t base address
-	 */
-#define	RESTORE_CTL_REGS				\
-	add	r2 = CTX(DCR),r33;	        	\
-	;;						\
-	ld8	r16 = [r2],CTX(IVA)-CTX(DCR);		\
-	;;                      			\
-	mov	cr.dcr = r16;				\
-	dv_serialize_data;				\
-	;;						\
-	ld8	r16 = [r2],CTX(PTA)-CTX(IVA);		\
-	;;						\
-	mov	cr.iva = r16;				\
-	dv_serialize_data;				\
-	;;						\
-	ld8 r16 = [r2];					\
-	;;						\
-	mov cr.pta = r16;				\
-	dv_serialize_data;				\
-	;;
-
-
-	/*
-	 *	r32:		context_t base address
-	 */
-#define	SAVE_REGION_REGS			\
-	add	r2=CTX(RR0),r32;		\
-	mov	r16=rr[r0];			\
-	dep.z	r18=1,61,3;			\
-	;;					\
-	st8	[r2]=r16,8;			\
-	mov	r17=rr[r18];			\
-	dep.z	r18=2,61,3;			\
-	;;					\
-	st8	[r2]=r17,8;			\
-	mov	r16=rr[r18];			\
-	dep.z	r18=3,61,3;			\
-	;;					\
-	st8	[r2]=r16,8;			\
-	mov	r17=rr[r18];			\
-	dep.z	r18=4,61,3;			\
-	;;					\
-	st8	[r2]=r17,8;			\
-	mov	r16=rr[r18];			\
-	dep.z	r18=5,61,3;			\
-	;;					\
-	st8	[r2]=r16,8;			\
-	mov	r17=rr[r18];			\
-	dep.z	r18=7,61,3;			\
-	;;					\
-	st8	[r2]=r17,16;			\
-	mov	r16=rr[r18];			\
-	;;					\
-	st8	[r2]=r16,8;			\
-	;;
-
-	/*
-	 *	r33:context_t base address
-	 */
-#define	RESTORE_REGION_REGS	\
-	add	r2=CTX(RR0),r33;\
-	mov r18=r0;		\
-	;;			\
-	ld8	r20=[r2],8;	\
-	;;	/* rr0 */	\
-	ld8	r21=[r2],8;	\
-	;;	/* rr1 */	\
-	ld8	r22=[r2],8;	\
-	;;	/* rr2 */	\
-	ld8	r23=[r2],8;	\
-	;;	/* rr3 */	\
-	ld8	r24=[r2],8;	\
-	;;	/* rr4 */	\
-	ld8	r25=[r2],16;	\
-	;;	/* rr5 */	\
-	ld8	r27=[r2];	\
-	;;	/* rr7 */	\
-	mov rr[r18]=r20;	\
-	dep.z	r18=1,61,3;	\
-	;;  /* rr1 */		\
-	mov rr[r18]=r21;	\
-	dep.z	r18=2,61,3;	\
-	;;  /* rr2 */		\
-	mov rr[r18]=r22;	\
-	dep.z	r18=3,61,3;	\
-	;;  /* rr3 */		\
-	mov rr[r18]=r23;	\
-	dep.z	r18=4,61,3;	\
-	;;  /* rr4 */		\
-	mov rr[r18]=r24;	\
-	dep.z	r18=5,61,3;	\
-	;;  /* rr5 */		\
-	mov rr[r18]=r25;	\
-	dep.z	r18=7,61,3;	\
-	;;  /* rr7 */		\
-	mov rr[r18]=r27;	\
-	;;			\
-	srlz.i;			\
-	;;
-
-
-
-	/*
-	 *	r32:	context_t base address
-	 *	r36~r39:scratch registers
-	 */
-#define	SAVE_DEBUG_REGS				\
-	add	r2=CTX(IBR0),r32;		\
-	add	r3=CTX(DBR0),r32;		\
-	mov	r16=ibr[r0];			\
-	mov	r17=dbr[r0];			\
-	;;					\
-	st8	[r2]=r16,8; 			\
-	st8	[r3]=r17,8;	    		\
-	add	r18=1,r0;		    	\
-	;;					\
-	mov	r16=ibr[r18];			\
-	mov	r17=dbr[r18];			\
-	;;					\
-	st8	[r2]=r16,8;		    	\
-	st8	[r3]=r17,8;			\
-	add	r18=2,r0;			\
-	;;					\
-	mov	r16=ibr[r18];			\
-	mov	r17=dbr[r18];			\
-	;;					\
-	st8	[r2]=r16,8;		    	\
-	st8	[r3]=r17,8;			\
-	add	r18=2,r0;			\
-	;;					\
-	mov	r16=ibr[r18];			\
-	mov	r17=dbr[r18];			\
-	;;					\
-	st8	[r2]=r16,8;		    	\
-	st8	[r3]=r17,8;			\
-	add	r18=3,r0;			\
-	;;					\
-	mov	r16=ibr[r18];			\
-	mov	r17=dbr[r18];			\
-	;;					\
-	st8	[r2]=r16,8;		    	\
-	st8	[r3]=r17,8;			\
-	add	r18=4,r0;			\
-	;;					\
-	mov	r16=ibr[r18];			\
-	mov	r17=dbr[r18];			\
-	;;					\
-	st8	[r2]=r16,8;		    	\
-	st8	[r3]=r17,8;			\
-	add	r18=5,r0;			\
-	;;					\
-	mov	r16=ibr[r18];			\
-	mov	r17=dbr[r18];			\
-	;;					\
-	st8	[r2]=r16,8;		    	\
-	st8	[r3]=r17,8;			\
-	add	r18=6,r0;			\
-	;;					\
-	mov	r16=ibr[r18];			\
-	mov	r17=dbr[r18];			\
-	;;					\
-	st8	[r2]=r16,8;		    	\
-	st8	[r3]=r17,8;			\
-	add	r18=7,r0;			\
-	;;					\
-	mov	r16=ibr[r18];			\
-	mov	r17=dbr[r18];			\
-	;;					\
-	st8	[r2]=r16,8;		    	\
-	st8	[r3]=r17,8;			\
-	;;
-
-
-/*
- *      r33:    point to context_t structure
- *      ar.lc are corrupted.
- */
-#define RESTORE_DEBUG_REGS			\
-	add	r2=CTX(IBR0),r33;		\
-	add	r3=CTX(DBR0),r33;		\
-	mov r16=7;    				\
-	mov r17=r0;				\
-	;;                    			\
-	mov ar.lc = r16;			\
-	;; 					\
-1:						\
-	ld8 r18=[r2],8;		    		\
-	ld8 r19=[r3],8;				\
-	;;					\
-	mov ibr[r17]=r18;			\
-	mov dbr[r17]=r19;			\
-	;;   					\
-	srlz.i;					\
-	;; 					\
-	add r17=1,r17;				\
-	br.cloop.sptk 1b;			\
-	;;
-
-
-	/*
-	 *	r32:		context_t base address
-	 */
-#define	SAVE_FPU_LOW				\
-	add	r2=CTX(F2),r32;			\
-	add	r3=CTX(F3),r32;			\
-	;;					\
-	stf.spill.nta	[r2]=f2,32;		\
-	stf.spill.nta	[r3]=f3,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f4,32;		\
-	stf.spill.nta	[r3]=f5,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f6,32;		\
-	stf.spill.nta	[r3]=f7,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f8,32;		\
-	stf.spill.nta	[r3]=f9,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f10,32;		\
-	stf.spill.nta	[r3]=f11,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f12,32;		\
-	stf.spill.nta	[r3]=f13,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f14,32;		\
-	stf.spill.nta	[r3]=f15,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f16,32;		\
-	stf.spill.nta	[r3]=f17,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f18,32;		\
-	stf.spill.nta	[r3]=f19,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f20,32;		\
-	stf.spill.nta	[r3]=f21,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f22,32;		\
-	stf.spill.nta	[r3]=f23,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f24,32;		\
-	stf.spill.nta	[r3]=f25,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f26,32;		\
-	stf.spill.nta	[r3]=f27,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f28,32;		\
-	stf.spill.nta	[r3]=f29,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f30;		\
-	stf.spill.nta	[r3]=f31;		\
-	;;
-
-	/*
-	 *	r32:		context_t base address
-	 */
-#define	SAVE_FPU_HIGH				\
-	add	r2=CTX(F32),r32;		\
-	add	r3=CTX(F33),r32;		\
-	;;					\
-	stf.spill.nta	[r2]=f32,32;		\
-	stf.spill.nta	[r3]=f33,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f34,32;		\
-	stf.spill.nta	[r3]=f35,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f36,32;		\
-	stf.spill.nta	[r3]=f37,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f38,32;		\
-	stf.spill.nta	[r3]=f39,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f40,32;		\
-	stf.spill.nta	[r3]=f41,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f42,32;		\
-	stf.spill.nta	[r3]=f43,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f44,32;		\
-	stf.spill.nta	[r3]=f45,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f46,32;		\
-	stf.spill.nta	[r3]=f47,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f48,32;		\
-	stf.spill.nta	[r3]=f49,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f50,32;		\
-	stf.spill.nta	[r3]=f51,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f52,32;		\
-	stf.spill.nta	[r3]=f53,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f54,32;		\
-	stf.spill.nta	[r3]=f55,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f56,32;		\
-	stf.spill.nta	[r3]=f57,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f58,32;		\
-	stf.spill.nta	[r3]=f59,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f60,32;		\
-	stf.spill.nta	[r3]=f61,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f62,32;		\
-	stf.spill.nta	[r3]=f63,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f64,32;		\
-	stf.spill.nta	[r3]=f65,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f66,32;		\
-	stf.spill.nta	[r3]=f67,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f68,32;		\
-	stf.spill.nta	[r3]=f69,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f70,32;		\
-	stf.spill.nta	[r3]=f71,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f72,32;		\
-	stf.spill.nta	[r3]=f73,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f74,32;		\
-	stf.spill.nta	[r3]=f75,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f76,32;		\
-	stf.spill.nta	[r3]=f77,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f78,32;		\
-	stf.spill.nta	[r3]=f79,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f80,32;		\
-	stf.spill.nta	[r3]=f81,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f82,32;		\
-	stf.spill.nta	[r3]=f83,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f84,32;		\
-	stf.spill.nta	[r3]=f85,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f86,32;		\
-	stf.spill.nta	[r3]=f87,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f88,32;		\
-	stf.spill.nta	[r3]=f89,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f90,32;		\
-	stf.spill.nta	[r3]=f91,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f92,32;		\
-	stf.spill.nta	[r3]=f93,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f94,32;		\
-	stf.spill.nta	[r3]=f95,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f96,32;		\
-	stf.spill.nta	[r3]=f97,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f98,32;		\
-	stf.spill.nta	[r3]=f99,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f100,32;		\
-	stf.spill.nta	[r3]=f101,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f102,32;		\
-	stf.spill.nta	[r3]=f103,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f104,32;		\
-	stf.spill.nta	[r3]=f105,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f106,32;		\
-	stf.spill.nta	[r3]=f107,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f108,32;		\
-	stf.spill.nta	[r3]=f109,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f110,32;		\
-	stf.spill.nta	[r3]=f111,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f112,32;		\
-	stf.spill.nta	[r3]=f113,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f114,32;		\
-	stf.spill.nta	[r3]=f115,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f116,32;		\
-	stf.spill.nta	[r3]=f117,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f118,32;		\
-	stf.spill.nta	[r3]=f119,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f120,32;		\
-	stf.spill.nta	[r3]=f121,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f122,32;		\
-	stf.spill.nta	[r3]=f123,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f124,32;		\
-	stf.spill.nta	[r3]=f125,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f126;		\
-	stf.spill.nta	[r3]=f127;		\
-	;;
-
-     /*
-      *      r33:    point to context_t structure
-      */
-#define	RESTORE_FPU_LOW				\
-    add     r2 = CTX(F2), r33;			\
-    add     r3 = CTX(F3), r33;			\
-    ;;						\
-    ldf.fill.nta f2 = [r2], 32;			\
-    ldf.fill.nta f3 = [r3], 32;			\
-    ;;						\
-    ldf.fill.nta f4 = [r2], 32;			\
-    ldf.fill.nta f5 = [r3], 32;			\
-    ;;						\
-    ldf.fill.nta f6 = [r2], 32;			\
-    ldf.fill.nta f7 = [r3], 32;			\
-    ;;						\
-    ldf.fill.nta f8 = [r2], 32;			\
-    ldf.fill.nta f9 = [r3], 32;			\
-    ;;						\
-    ldf.fill.nta f10 = [r2], 32;		\
-    ldf.fill.nta f11 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f12 = [r2], 32;		\
-    ldf.fill.nta f13 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f14 = [r2], 32;		\
-    ldf.fill.nta f15 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f16 = [r2], 32;		\
-    ldf.fill.nta f17 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f18 = [r2], 32;		\
-    ldf.fill.nta f19 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f20 = [r2], 32;		\
-    ldf.fill.nta f21 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f22 = [r2], 32;		\
-    ldf.fill.nta f23 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f24 = [r2], 32;		\
-    ldf.fill.nta f25 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f26 = [r2], 32;		\
-    ldf.fill.nta f27 = [r3], 32;		\
-	;;					\
-    ldf.fill.nta f28 = [r2], 32;		\
-    ldf.fill.nta f29 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f30 = [r2], 32;		\
-    ldf.fill.nta f31 = [r3], 32;		\
-    ;;
-
-
-
-    /*
-     *      r33:    point to context_t structure
-     */
-#define	RESTORE_FPU_HIGH			\
-    add     r2 = CTX(F32), r33;			\
-    add     r3 = CTX(F33), r33;			\
-    ;;						\
-    ldf.fill.nta f32 = [r2], 32;		\
-    ldf.fill.nta f33 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f34 = [r2], 32;		\
-    ldf.fill.nta f35 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f36 = [r2], 32;		\
-    ldf.fill.nta f37 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f38 = [r2], 32;		\
-    ldf.fill.nta f39 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f40 = [r2], 32;		\
-    ldf.fill.nta f41 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f42 = [r2], 32;		\
-    ldf.fill.nta f43 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f44 = [r2], 32;		\
-    ldf.fill.nta f45 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f46 = [r2], 32;		\
-    ldf.fill.nta f47 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f48 = [r2], 32;		\
-    ldf.fill.nta f49 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f50 = [r2], 32;		\
-    ldf.fill.nta f51 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f52 = [r2], 32;		\
-    ldf.fill.nta f53 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f54 = [r2], 32;		\
-    ldf.fill.nta f55 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f56 = [r2], 32;		\
-    ldf.fill.nta f57 = [r3], 32;   		\
-    ;;						\
-    ldf.fill.nta f58 = [r2], 32;		\
-    ldf.fill.nta f59 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f60 = [r2], 32;		\
-    ldf.fill.nta f61 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f62 = [r2], 32;		\
-    ldf.fill.nta f63 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f64 = [r2], 32;		\
-    ldf.fill.nta f65 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f66 = [r2], 32;		\
-    ldf.fill.nta f67 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f68 = [r2], 32;		\
-    ldf.fill.nta f69 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f70 = [r2], 32;		\
-    ldf.fill.nta f71 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f72 = [r2], 32;		\
-    ldf.fill.nta f73 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f74 = [r2], 32;		\
-    ldf.fill.nta f75 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f76 = [r2], 32;		\
-    ldf.fill.nta f77 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f78 = [r2], 32;		\
-    ldf.fill.nta f79 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f80 = [r2], 32;		\
-    ldf.fill.nta f81 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f82 = [r2], 32;		\
-    ldf.fill.nta f83 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f84 = [r2], 32;		\
-    ldf.fill.nta f85 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f86 = [r2], 32;		\
-    ldf.fill.nta f87 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f88 = [r2], 32;		\
-    ldf.fill.nta f89 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f90 = [r2], 32;		\
-    ldf.fill.nta f91 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f92 = [r2], 32;		\
-    ldf.fill.nta f93 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f94 = [r2], 32;		\
-    ldf.fill.nta f95 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f96 = [r2], 32;		\
-    ldf.fill.nta f97 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f98 = [r2], 32;		\
-    ldf.fill.nta f99 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f100 = [r2], 32;		\
-    ldf.fill.nta f101 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f102 = [r2], 32;		\
-    ldf.fill.nta f103 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f104 = [r2], 32;		\
-    ldf.fill.nta f105 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f106 = [r2], 32;		\
-    ldf.fill.nta f107 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f108 = [r2], 32;		\
-    ldf.fill.nta f109 = [r3], 32;   		\
-    ;;						\
-    ldf.fill.nta f110 = [r2], 32;		\
-    ldf.fill.nta f111 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f112 = [r2], 32;		\
-    ldf.fill.nta f113 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f114 = [r2], 32;		\
-    ldf.fill.nta f115 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f116 = [r2], 32;		\
-    ldf.fill.nta f117 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f118 = [r2], 32;		\
-    ldf.fill.nta f119 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f120 = [r2], 32;		\
-    ldf.fill.nta f121 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f122 = [r2], 32;		\
-    ldf.fill.nta f123 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f124 = [r2], 32;		\
-    ldf.fill.nta f125 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f126 = [r2], 32;		\
-    ldf.fill.nta f127 = [r3], 32;		\
-    ;;
-
-	/*
-	 *	r32:		context_t base address
-	 */
-#define	SAVE_PTK_REGS				\
-    add r2=CTX(PKR0), r32;			\
-    mov r16=7;    				\
-    ;;                         			\
-    mov ar.lc=r16;  				\
-    mov r17=r0;					\
-    ;;						\
-1:						\
-    mov r18=pkr[r17];				\
-    ;;                     			\
-    srlz.i;					\
-    ;; 						\
-    st8 [r2]=r18, 8;				\
-    ;;    					\
-    add r17 =1,r17;				\
-    ;;                     			\
-    br.cloop.sptk 1b;				\
-    ;;
-
-/*
- *      r33:    point to context_t structure
- *      ar.lc are corrupted.
- */
-#define RESTORE_PTK_REGS	    		\
-    add r2=CTX(PKR0), r33;			\
-    mov r16=7;    				\
-    ;;                         			\
-    mov ar.lc=r16;  				\
-    mov r17=r0;					\
-    ;;						\
-1: 						\
-    ld8 r18=[r2], 8;				\
-    ;;						\
-    mov pkr[r17]=r18;				\
-    ;;    					\
-    srlz.i;					\
-    ;; 						\
-    add r17 =1,r17;				\
-    ;;                     			\
-    br.cloop.sptk 1b;				\
-    ;;
-
-
-/*
- * void vmm_trampoline( context_t * from,
- *			context_t * to)
- *
- * 	from:	r32
- *	to:	r33
- *  note: interrupt disabled before call this function.
- */
-GLOBAL_ENTRY(vmm_trampoline)
-    mov r16 = psr
-    adds r2 = CTX(PSR), r32
-    ;;
-    st8 [r2] = r16, 8       // psr
-    mov r17 = pr
-    ;;
-    st8 [r2] = r17, 8       // pr
-    mov r18 = ar.unat
-    ;;
-    st8 [r2] = r18
-    mov r17 = ar.rsc
-    ;;
-    adds r2 = CTX(RSC),r32
-    ;;
-    st8 [r2]= r17
-    mov ar.rsc =0
-    flushrs
-    ;;
-    SAVE_GENERAL_REGS
-    ;;
-    SAVE_KERNEL_REGS
-    ;;
-    SAVE_APP_REGS
-    ;;
-    SAVE_BRANCH_REGS
-    ;;
-    SAVE_CTL_REGS
-    ;;
-    SAVE_REGION_REGS
-    ;;
-    //SAVE_DEBUG_REGS
-    ;;
-    rsm  psr.dfl
-    ;;
-    srlz.d
-    ;;
-    SAVE_FPU_LOW
-    ;;
-    rsm  psr.dfh
-    ;;
-    srlz.d
-    ;;
-    SAVE_FPU_HIGH
-    ;;
-    SAVE_PTK_REGS
-    ;;
-    RESTORE_PTK_REGS
-    ;;
-    RESTORE_FPU_HIGH
-    ;;
-    RESTORE_FPU_LOW
-    ;;
-    //RESTORE_DEBUG_REGS
-    ;;
-    RESTORE_REGION_REGS
-    ;;
-    RESTORE_CTL_REGS
-    ;;
-    RESTORE_BRANCH_REGS
-    ;;
-    RESTORE_APP_REGS
-    ;;
-    RESTORE_KERNEL_REGS
-    ;;
-    RESTORE_GENERAL_REGS
-    ;;
-    adds r2=CTX(PSR), r33
-    ;;
-    ld8 r16=[r2], 8       // psr
-    ;;
-    mov psr.l=r16
-    ;;
-    srlz.d
-    ;;
-    ld8 r16=[r2], 8       // pr
-    ;;
-    mov pr =r16,-1
-    ld8 r16=[r2]       // unat
-    ;;
-    mov ar.unat=r16
-    ;;
-    adds r2=CTX(RSC),r33
-    ;;
-    ld8 r16 =[r2]
-    ;;
-    mov ar.rsc = r16
-    ;;
-    br.ret.sptk.few b0
-END(vmm_trampoline)
diff --git a/arch/ia64/kvm/vcpu.c b/arch/ia64/kvm/vcpu.c
deleted file mode 100644
index 958815c9787..00000000000
--- a/arch/ia64/kvm/vcpu.c
+++ /dev/null
@@ -1,2209 +0,0 @@
-/*
- * kvm_vcpu.c: handling all virtual cpu related thing.
- * Copyright (c) 2005, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- *  Shaofan Li (Susue Li) <susie.li@intel.com>
- *  Yaozu Dong (Eddie Dong) (Eddie.dong@intel.com)
- *  Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com)
- *  Xiantao Zhang <xiantao.zhang@intel.com>
- */
-
-#include <linux/kvm_host.h>
-#include <linux/types.h>
-
-#include <asm/processor.h>
-#include <asm/ia64regs.h>
-#include <asm/gcc_intrin.h>
-#include <asm/kregs.h>
-#include <asm/pgtable.h>
-#include <asm/tlb.h>
-
-#include "asm-offsets.h"
-#include "vcpu.h"
-
-/*
- * Special notes:
- * - Index by it/dt/rt sequence
- * - Only existing mode transitions are allowed in this table
- * - RSE is placed at lazy mode when emulating guest partial mode
- * - If gva happens to be rr0 and rr4, only allowed case is identity
- *   mapping (gva=gpa), or panic! (How?)
- */
-int mm_switch_table[8][8] = {
-	/*  2004/09/12(Kevin): Allow switch to self */
-	/*
-	 *  (it,dt,rt): (0,0,0) -> (1,1,1)
-	 *  This kind of transition usually occurs in the very early
-	 *  stage of Linux boot up procedure. Another case is in efi
-	 *  and pal calls. (see "arch/ia64/kernel/head.S")
-	 *
-	 *  (it,dt,rt): (0,0,0) -> (0,1,1)
-	 *  This kind of transition is found when OSYa exits efi boot
-	 *  service. Due to gva = gpa in this case (Same region),
-	 *  data access can be satisfied though itlb entry for physical
-	 *  emulation is hit.
-	 */
-	{SW_SELF, 0,  0,  SW_NOP, 0,  0,  0,  SW_P2V},
-	{0,  0,  0,  0,  0,  0,  0,  0},
-	{0,  0,  0,  0,  0,  0,  0,  0},
-	/*
-	 *  (it,dt,rt): (0,1,1) -> (1,1,1)
-	 *  This kind of transition is found in OSYa.
-	 *
-	 *  (it,dt,rt): (0,1,1) -> (0,0,0)
-	 *  This kind of transition is found in OSYa
-	 */
-	{SW_NOP, 0,  0,  SW_SELF, 0,  0,  0,  SW_P2V},
-	/* (1,0,0)->(1,1,1) */
-	{0,  0,  0,  0,  0,  0,  0,  SW_P2V},
-	/*
-	 *  (it,dt,rt): (1,0,1) -> (1,1,1)
-	 *  This kind of transition usually occurs when Linux returns
-	 *  from the low level TLB miss handlers.
-	 *  (see "arch/ia64/kernel/ivt.S")
-	 */
-	{0,  0,  0,  0,  0,  SW_SELF, 0,  SW_P2V},
-	{0,  0,  0,  0,  0,  0,  0,  0},
-	/*
-	 *  (it,dt,rt): (1,1,1) -> (1,0,1)
-	 *  This kind of transition usually occurs in Linux low level
-	 *  TLB miss handler. (see "arch/ia64/kernel/ivt.S")
-	 *
-	 *  (it,dt,rt): (1,1,1) -> (0,0,0)
-	 *  This kind of transition usually occurs in pal and efi calls,
-	 *  which requires running in physical mode.
-	 *  (see "arch/ia64/kernel/head.S")
-	 *  (1,1,1)->(1,0,0)
-	 */
-
-	{SW_V2P, 0,  0,  0,  SW_V2P, SW_V2P, 0,  SW_SELF},
-};
-
-void physical_mode_init(struct kvm_vcpu  *vcpu)
-{
-	vcpu->arch.mode_flags = GUEST_IN_PHY;
-}
-
-void switch_to_physical_rid(struct kvm_vcpu *vcpu)
-{
-	unsigned long psr;
-
-	/* Save original virtual mode rr[0] and rr[4] */
-	psr = ia64_clear_ic();
-	ia64_set_rr(VRN0<<VRN_SHIFT, vcpu->arch.metaphysical_rr0);
-	ia64_srlz_d();
-	ia64_set_rr(VRN4<<VRN_SHIFT, vcpu->arch.metaphysical_rr4);
-	ia64_srlz_d();
-
-	ia64_set_psr(psr);
-	return;
-}
-
-void switch_to_virtual_rid(struct kvm_vcpu *vcpu)
-{
-	unsigned long psr;
-
-	psr = ia64_clear_ic();
-	ia64_set_rr(VRN0 << VRN_SHIFT, vcpu->arch.metaphysical_saved_rr0);
-	ia64_srlz_d();
-	ia64_set_rr(VRN4 << VRN_SHIFT, vcpu->arch.metaphysical_saved_rr4);
-	ia64_srlz_d();
-	ia64_set_psr(psr);
-	return;
-}
-
-static int mm_switch_action(struct ia64_psr opsr, struct ia64_psr npsr)
-{
-	return mm_switch_table[MODE_IND(opsr)][MODE_IND(npsr)];
-}
-
-void switch_mm_mode(struct kvm_vcpu *vcpu, struct ia64_psr old_psr,
-					struct ia64_psr new_psr)
-{
-	int act;
-	act = mm_switch_action(old_psr, new_psr);
-	switch (act) {
-	case SW_V2P:
-		/*printk("V -> P mode transition: (0x%lx -> 0x%lx)\n",
-		old_psr.val, new_psr.val);*/
-		switch_to_physical_rid(vcpu);
-		/*
-		 * Set rse to enforced lazy, to prevent active rse
-		 *save/restor when guest physical mode.
-		 */
-		vcpu->arch.mode_flags |= GUEST_IN_PHY;
-		break;
-	case SW_P2V:
-		switch_to_virtual_rid(vcpu);
-		/*
-		 * recover old mode which is saved when entering
-		 * guest physical mode
-		 */
-		vcpu->arch.mode_flags &= ~GUEST_IN_PHY;
-		break;
-	case SW_SELF:
-		break;
-	case SW_NOP:
-		break;
-	default:
-		/* Sanity check */
-		break;
-	}
-	return;
-}
-
-/*
- * In physical mode, insert tc/tr for region 0 and 4 uses
- * RID[0] and RID[4] which is for physical mode emulation.
- * However what those inserted tc/tr wants is rid for
- * virtual mode. So original virtual rid needs to be restored
- * before insert.
- *
- * Operations which required such switch include:
- *  - insertions (itc.*, itr.*)
- *  - purges (ptc.* and ptr.*)
- *  - tpa
- *  - tak
- *  - thash?, ttag?
- * All above needs actual virtual rid for destination entry.
- */
-
-void check_mm_mode_switch(struct kvm_vcpu *vcpu,  struct ia64_psr old_psr,
-					struct ia64_psr new_psr)
-{
-
-	if ((old_psr.dt != new_psr.dt)
-			|| (old_psr.it != new_psr.it)
-			|| (old_psr.rt != new_psr.rt))
-		switch_mm_mode(vcpu, old_psr, new_psr);
-
-	return;
-}
-
-
-/*
- * In physical mode, insert tc/tr for region 0 and 4 uses
- * RID[0] and RID[4] which is for physical mode emulation.
- * However what those inserted tc/tr wants is rid for
- * virtual mode. So original virtual rid needs to be restored
- * before insert.
- *
- * Operations which required such switch include:
- *  - insertions (itc.*, itr.*)
- *  - purges (ptc.* and ptr.*)
- *  - tpa
- *  - tak
- *  - thash?, ttag?
- * All above needs actual virtual rid for destination entry.
- */
-
-void prepare_if_physical_mode(struct kvm_vcpu *vcpu)
-{
-	if (is_physical_mode(vcpu)) {
-		vcpu->arch.mode_flags |= GUEST_PHY_EMUL;
-		switch_to_virtual_rid(vcpu);
-	}
-	return;
-}
-
-/* Recover always follows prepare */
-void recover_if_physical_mode(struct kvm_vcpu *vcpu)
-{
-	if (is_physical_mode(vcpu))
-		switch_to_physical_rid(vcpu);
-	vcpu->arch.mode_flags &= ~GUEST_PHY_EMUL;
-	return;
-}
-
-#define RPT(x)	((u16) &((struct kvm_pt_regs *)0)->x)
-
-static u16 gr_info[32] = {
-	0, 	/* r0 is read-only : WE SHOULD NEVER GET THIS */
-	RPT(r1), RPT(r2), RPT(r3),
-	RPT(r4), RPT(r5), RPT(r6), RPT(r7),
-	RPT(r8), RPT(r9), RPT(r10), RPT(r11),
-	RPT(r12), RPT(r13), RPT(r14), RPT(r15),
-	RPT(r16), RPT(r17), RPT(r18), RPT(r19),
-	RPT(r20), RPT(r21), RPT(r22), RPT(r23),
-	RPT(r24), RPT(r25), RPT(r26), RPT(r27),
-	RPT(r28), RPT(r29), RPT(r30), RPT(r31)
-};
-
-#define IA64_FIRST_STACKED_GR   32
-#define IA64_FIRST_ROTATING_FR  32
-
-static inline unsigned long
-rotate_reg(unsigned long sor, unsigned long rrb, unsigned long reg)
-{
-	reg += rrb;
-	if (reg >= sor)
-		reg -= sor;
-	return reg;
-}
-
-/*
- * Return the (rotated) index for floating point register
- * be in the REGNUM (REGNUM must range from 32-127,
- * result is in the range from 0-95.
- */
-static inline unsigned long fph_index(struct kvm_pt_regs *regs,
-						long regnum)
-{
-	unsigned long rrb_fr = (regs->cr_ifs >> 25) & 0x7f;
-	return rotate_reg(96, rrb_fr, (regnum - IA64_FIRST_ROTATING_FR));
-}
-
-/*
- * The inverse of the above: given bspstore and the number of
- * registers, calculate ar.bsp.
- */
-static inline unsigned long *kvm_rse_skip_regs(unsigned long *addr,
-							long num_regs)
-{
-	long delta = ia64_rse_slot_num(addr) + num_regs;
-	int i = 0;
-
-	if (num_regs < 0)
-		delta -= 0x3e;
-	if (delta < 0) {
-		while (delta <= -0x3f) {
-			i--;
-			delta += 0x3f;
-		}
-	} else {
-		while (delta >= 0x3f) {
-			i++;
-			delta -= 0x3f;
-		}
-	}
-
-	return addr + num_regs + i;
-}
-
-static void get_rse_reg(struct kvm_pt_regs *regs, unsigned long r1,
-					unsigned long *val, int *nat)
-{
-	unsigned long *bsp, *addr, *rnat_addr, *bspstore;
-	unsigned long *kbs = (void *) current_vcpu + VMM_RBS_OFFSET;
-	unsigned long nat_mask;
-	unsigned long old_rsc, new_rsc;
-	long sof = (regs->cr_ifs) & 0x7f;
-	long sor = (((regs->cr_ifs >> 14) & 0xf) << 3);
-	long rrb_gr = (regs->cr_ifs >> 18) & 0x7f;
-	long ridx = r1 - 32;
-
-	if (ridx < sor)
-		ridx = rotate_reg(sor, rrb_gr, ridx);
-
-	old_rsc = ia64_getreg(_IA64_REG_AR_RSC);
-	new_rsc = old_rsc&(~(0x3));
-	ia64_setreg(_IA64_REG_AR_RSC, new_rsc);
-
-	bspstore = (unsigned long *)ia64_getreg(_IA64_REG_AR_BSPSTORE);
-	bsp = kbs + (regs->loadrs >> 19);
-
-	addr = kvm_rse_skip_regs(bsp, -sof + ridx);
-	nat_mask = 1UL << ia64_rse_slot_num(addr);
-	rnat_addr = ia64_rse_rnat_addr(addr);
-
-	if (addr >= bspstore) {
-		ia64_flushrs();
-		ia64_mf();
-		bspstore = (unsigned long *)ia64_getreg(_IA64_REG_AR_BSPSTORE);
-	}
-	*val = *addr;
-	if (nat) {
-		if (bspstore < rnat_addr)
-			*nat = (int)!!(ia64_getreg(_IA64_REG_AR_RNAT)
-							& nat_mask);
-		else
-			*nat = (int)!!((*rnat_addr) & nat_mask);
-		ia64_setreg(_IA64_REG_AR_RSC, old_rsc);
-	}
-}
-
-void set_rse_reg(struct kvm_pt_regs *regs, unsigned long r1,
-				unsigned long val, unsigned long nat)
-{
-	unsigned long *bsp, *bspstore, *addr, *rnat_addr;
-	unsigned long *kbs = (void *) current_vcpu + VMM_RBS_OFFSET;
-	unsigned long nat_mask;
-	unsigned long old_rsc, new_rsc, psr;
-	unsigned long rnat;
-	long sof = (regs->cr_ifs) & 0x7f;
-	long sor = (((regs->cr_ifs >> 14) & 0xf) << 3);
-	long rrb_gr = (regs->cr_ifs >> 18) & 0x7f;
-	long ridx = r1 - 32;
-
-	if (ridx < sor)
-		ridx = rotate_reg(sor, rrb_gr, ridx);
-
-	old_rsc = ia64_getreg(_IA64_REG_AR_RSC);
-	/* put RSC to lazy mode, and set loadrs 0 */
-	new_rsc = old_rsc & (~0x3fff0003);
-	ia64_setreg(_IA64_REG_AR_RSC, new_rsc);
-	bsp = kbs + (regs->loadrs >> 19); /* 16 + 3 */
-
-	addr = kvm_rse_skip_regs(bsp, -sof + ridx);
-	nat_mask = 1UL << ia64_rse_slot_num(addr);
-	rnat_addr = ia64_rse_rnat_addr(addr);
-
-	local_irq_save(psr);
-	bspstore = (unsigned long *)ia64_getreg(_IA64_REG_AR_BSPSTORE);
-	if (addr >= bspstore) {
-
-		ia64_flushrs();
-		ia64_mf();
-		*addr = val;
-		bspstore = (unsigned long *)ia64_getreg(_IA64_REG_AR_BSPSTORE);
-		rnat = ia64_getreg(_IA64_REG_AR_RNAT);
-		if (bspstore < rnat_addr)
-			rnat = rnat & (~nat_mask);
-		else
-			*rnat_addr = (*rnat_addr)&(~nat_mask);
-
-		ia64_mf();
-		ia64_loadrs();
-		ia64_setreg(_IA64_REG_AR_RNAT, rnat);
-	} else {
-		rnat = ia64_getreg(_IA64_REG_AR_RNAT);
-		*addr = val;
-		if (bspstore < rnat_addr)
-			rnat = rnat&(~nat_mask);
-		else
-			*rnat_addr = (*rnat_addr) & (~nat_mask);
-
-		ia64_setreg(_IA64_REG_AR_BSPSTORE, (unsigned long)bspstore);
-		ia64_setreg(_IA64_REG_AR_RNAT, rnat);
-	}
-	local_irq_restore(psr);
-	ia64_setreg(_IA64_REG_AR_RSC, old_rsc);
-}
-
-void getreg(unsigned long regnum, unsigned long *val,
-				int *nat, struct kvm_pt_regs *regs)
-{
-	unsigned long addr, *unat;
-	if (regnum >= IA64_FIRST_STACKED_GR) {
-		get_rse_reg(regs, regnum, val, nat);
-		return;
-	}
-
-	/*
-	 * Now look at registers in [0-31] range and init correct UNAT
-	 */
-	addr = (unsigned long)regs;
-	unat = &regs->eml_unat;
-
-	addr += gr_info[regnum];
-
-	*val  = *(unsigned long *)addr;
-	/*
-	 * do it only when requested
-	 */
-	if (nat)
-		*nat  = (*unat >> ((addr >> 3) & 0x3f)) & 0x1UL;
-}
-
-void setreg(unsigned long regnum, unsigned long val,
-			int nat, struct kvm_pt_regs *regs)
-{
-	unsigned long addr;
-	unsigned long bitmask;
-	unsigned long *unat;
-
-	/*
-	 * First takes care of stacked registers
-	 */
-	if (regnum >= IA64_FIRST_STACKED_GR) {
-		set_rse_reg(regs, regnum, val, nat);
-		return;
-	}
-
-	/*
-	 * Now look at registers in [0-31] range and init correct UNAT
-	 */
-	addr = (unsigned long)regs;
-	unat = &regs->eml_unat;
-	/*
-	 * add offset from base of struct
-	 * and do it !
-	 */
-	addr += gr_info[regnum];
-
-	*(unsigned long *)addr = val;
-
-	/*
-	 * We need to clear the corresponding UNAT bit to fully emulate the load
-	 * UNAT bit_pos = GR[r3]{8:3} form EAS-2.4
-	 */
-	bitmask   = 1UL << ((addr >> 3) & 0x3f);
-	if (nat)
-		*unat |= bitmask;
-	 else
-		*unat &= ~bitmask;
-
-}
-
-u64 vcpu_get_gr(struct kvm_vcpu *vcpu, unsigned long reg)
-{
-	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
-	unsigned long val;
-
-	if (!reg)
-		return 0;
-	getreg(reg, &val, 0, regs);
-	return val;
-}
-
-void vcpu_set_gr(struct kvm_vcpu *vcpu, unsigned long reg, u64 value, int nat)
-{
-	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
-	long sof = (regs->cr_ifs) & 0x7f;
-
-	if (!reg)
-		return;
-	if (reg >= sof + 32)
-		return;
-	setreg(reg, value, nat, regs);	/* FIXME: handle NATs later*/
-}
-
-void getfpreg(unsigned long regnum, struct ia64_fpreg *fpval,
-				struct kvm_pt_regs *regs)
-{
-	/* Take floating register rotation into consideration*/
-	if (regnum >= IA64_FIRST_ROTATING_FR)
-		regnum = IA64_FIRST_ROTATING_FR + fph_index(regs, regnum);
-#define CASE_FIXED_FP(reg)			\
-	case  (reg) :				\
-		ia64_stf_spill(fpval, reg);	\
-	break
-
-	switch (regnum) {
-		CASE_FIXED_FP(0);
-		CASE_FIXED_FP(1);
-		CASE_FIXED_FP(2);
-		CASE_FIXED_FP(3);
-		CASE_FIXED_FP(4);
-		CASE_FIXED_FP(5);
-
-		CASE_FIXED_FP(6);
-		CASE_FIXED_FP(7);
-		CASE_FIXED_FP(8);
-		CASE_FIXED_FP(9);
-		CASE_FIXED_FP(10);
-		CASE_FIXED_FP(11);
-
-		CASE_FIXED_FP(12);
-		CASE_FIXED_FP(13);
-		CASE_FIXED_FP(14);
-		CASE_FIXED_FP(15);
-		CASE_FIXED_FP(16);
-		CASE_FIXED_FP(17);
-		CASE_FIXED_FP(18);
-		CASE_FIXED_FP(19);
-		CASE_FIXED_FP(20);
-		CASE_FIXED_FP(21);
-		CASE_FIXED_FP(22);
-		CASE_FIXED_FP(23);
-		CASE_FIXED_FP(24);
-		CASE_FIXED_FP(25);
-		CASE_FIXED_FP(26);
-		CASE_FIXED_FP(27);
-		CASE_FIXED_FP(28);
-		CASE_FIXED_FP(29);
-		CASE_FIXED_FP(30);
-		CASE_FIXED_FP(31);
-		CASE_FIXED_FP(32);
-		CASE_FIXED_FP(33);
-		CASE_FIXED_FP(34);
-		CASE_FIXED_FP(35);
-		CASE_FIXED_FP(36);
-		CASE_FIXED_FP(37);
-		CASE_FIXED_FP(38);
-		CASE_FIXED_FP(39);
-		CASE_FIXED_FP(40);
-		CASE_FIXED_FP(41);
-		CASE_FIXED_FP(42);
-		CASE_FIXED_FP(43);
-		CASE_FIXED_FP(44);
-		CASE_FIXED_FP(45);
-		CASE_FIXED_FP(46);
-		CASE_FIXED_FP(47);
-		CASE_FIXED_FP(48);
-		CASE_FIXED_FP(49);
-		CASE_FIXED_FP(50);
-		CASE_FIXED_FP(51);
-		CASE_FIXED_FP(52);
-		CASE_FIXED_FP(53);
-		CASE_FIXED_FP(54);
-		CASE_FIXED_FP(55);
-		CASE_FIXED_FP(56);
-		CASE_FIXED_FP(57);
-		CASE_FIXED_FP(58);
-		CASE_FIXED_FP(59);
-		CASE_FIXED_FP(60);
-		CASE_FIXED_FP(61);
-		CASE_FIXED_FP(62);
-		CASE_FIXED_FP(63);
-		CASE_FIXED_FP(64);
-		CASE_FIXED_FP(65);
-		CASE_FIXED_FP(66);
-		CASE_FIXED_FP(67);
-		CASE_FIXED_FP(68);
-		CASE_FIXED_FP(69);
-		CASE_FIXED_FP(70);
-		CASE_FIXED_FP(71);
-		CASE_FIXED_FP(72);
-		CASE_FIXED_FP(73);
-		CASE_FIXED_FP(74);
-		CASE_FIXED_FP(75);
-		CASE_FIXED_FP(76);
-		CASE_FIXED_FP(77);
-		CASE_FIXED_FP(78);
-		CASE_FIXED_FP(79);
-		CASE_FIXED_FP(80);
-		CASE_FIXED_FP(81);
-		CASE_FIXED_FP(82);
-		CASE_FIXED_FP(83);
-		CASE_FIXED_FP(84);
-		CASE_FIXED_FP(85);
-		CASE_FIXED_FP(86);
-		CASE_FIXED_FP(87);
-		CASE_FIXED_FP(88);
-		CASE_FIXED_FP(89);
-		CASE_FIXED_FP(90);
-		CASE_FIXED_FP(91);
-		CASE_FIXED_FP(92);
-		CASE_FIXED_FP(93);
-		CASE_FIXED_FP(94);
-		CASE_FIXED_FP(95);
-		CASE_FIXED_FP(96);
-		CASE_FIXED_FP(97);
-		CASE_FIXED_FP(98);
-		CASE_FIXED_FP(99);
-		CASE_FIXED_FP(100);
-		CASE_FIXED_FP(101);
-		CASE_FIXED_FP(102);
-		CASE_FIXED_FP(103);
-		CASE_FIXED_FP(104);
-		CASE_FIXED_FP(105);
-		CASE_FIXED_FP(106);
-		CASE_FIXED_FP(107);
-		CASE_FIXED_FP(108);
-		CASE_FIXED_FP(109);
-		CASE_FIXED_FP(110);
-		CASE_FIXED_FP(111);
-		CASE_FIXED_FP(112);
-		CASE_FIXED_FP(113);
-		CASE_FIXED_FP(114);
-		CASE_FIXED_FP(115);
-		CASE_FIXED_FP(116);
-		CASE_FIXED_FP(117);
-		CASE_FIXED_FP(118);
-		CASE_FIXED_FP(119);
-		CASE_FIXED_FP(120);
-		CASE_FIXED_FP(121);
-		CASE_FIXED_FP(122);
-		CASE_FIXED_FP(123);
-		CASE_FIXED_FP(124);
-		CASE_FIXED_FP(125);
-		CASE_FIXED_FP(126);
-		CASE_FIXED_FP(127);
-	}
-#undef CASE_FIXED_FP
-}
-
-void setfpreg(unsigned long regnum, struct ia64_fpreg *fpval,
-					struct kvm_pt_regs *regs)
-{
-	/* Take floating register rotation into consideration*/
-	if (regnum >= IA64_FIRST_ROTATING_FR)
-		regnum = IA64_FIRST_ROTATING_FR + fph_index(regs, regnum);
-
-#define CASE_FIXED_FP(reg)			\
-	case (reg) :				\
-		ia64_ldf_fill(reg, fpval);	\
-	break
-
-	switch (regnum) {
-		CASE_FIXED_FP(2);
-		CASE_FIXED_FP(3);
-		CASE_FIXED_FP(4);
-		CASE_FIXED_FP(5);
-
-		CASE_FIXED_FP(6);
-		CASE_FIXED_FP(7);
-		CASE_FIXED_FP(8);
-		CASE_FIXED_FP(9);
-		CASE_FIXED_FP(10);
-		CASE_FIXED_FP(11);
-
-		CASE_FIXED_FP(12);
-		CASE_FIXED_FP(13);
-		CASE_FIXED_FP(14);
-		CASE_FIXED_FP(15);
-		CASE_FIXED_FP(16);
-		CASE_FIXED_FP(17);
-		CASE_FIXED_FP(18);
-		CASE_FIXED_FP(19);
-		CASE_FIXED_FP(20);
-		CASE_FIXED_FP(21);
-		CASE_FIXED_FP(22);
-		CASE_FIXED_FP(23);
-		CASE_FIXED_FP(24);
-		CASE_FIXED_FP(25);
-		CASE_FIXED_FP(26);
-		CASE_FIXED_FP(27);
-		CASE_FIXED_FP(28);
-		CASE_FIXED_FP(29);
-		CASE_FIXED_FP(30);
-		CASE_FIXED_FP(31);
-		CASE_FIXED_FP(32);
-		CASE_FIXED_FP(33);
-		CASE_FIXED_FP(34);
-		CASE_FIXED_FP(35);
-		CASE_FIXED_FP(36);
-		CASE_FIXED_FP(37);
-		CASE_FIXED_FP(38);
-		CASE_FIXED_FP(39);
-		CASE_FIXED_FP(40);
-		CASE_FIXED_FP(41);
-		CASE_FIXED_FP(42);
-		CASE_FIXED_FP(43);
-		CASE_FIXED_FP(44);
-		CASE_FIXED_FP(45);
-		CASE_FIXED_FP(46);
-		CASE_FIXED_FP(47);
-		CASE_FIXED_FP(48);
-		CASE_FIXED_FP(49);
-		CASE_FIXED_FP(50);
-		CASE_FIXED_FP(51);
-		CASE_FIXED_FP(52);
-		CASE_FIXED_FP(53);
-		CASE_FIXED_FP(54);
-		CASE_FIXED_FP(55);
-		CASE_FIXED_FP(56);
-		CASE_FIXED_FP(57);
-		CASE_FIXED_FP(58);
-		CASE_FIXED_FP(59);
-		CASE_FIXED_FP(60);
-		CASE_FIXED_FP(61);
-		CASE_FIXED_FP(62);
-		CASE_FIXED_FP(63);
-		CASE_FIXED_FP(64);
-		CASE_FIXED_FP(65);
-		CASE_FIXED_FP(66);
-		CASE_FIXED_FP(67);
-		CASE_FIXED_FP(68);
-		CASE_FIXED_FP(69);
-		CASE_FIXED_FP(70);
-		CASE_FIXED_FP(71);
-		CASE_FIXED_FP(72);
-		CASE_FIXED_FP(73);
-		CASE_FIXED_FP(74);
-		CASE_FIXED_FP(75);
-		CASE_FIXED_FP(76);
-		CASE_FIXED_FP(77);
-		CASE_FIXED_FP(78);
-		CASE_FIXED_FP(79);
-		CASE_FIXED_FP(80);
-		CASE_FIXED_FP(81);
-		CASE_FIXED_FP(82);
-		CASE_FIXED_FP(83);
-		CASE_FIXED_FP(84);
-		CASE_FIXED_FP(85);
-		CASE_FIXED_FP(86);
-		CASE_FIXED_FP(87);
-		CASE_FIXED_FP(88);
-		CASE_FIXED_FP(89);
-		CASE_FIXED_FP(90);
-		CASE_FIXED_FP(91);
-		CASE_FIXED_FP(92);
-		CASE_FIXED_FP(93);
-		CASE_FIXED_FP(94);
-		CASE_FIXED_FP(95);
-		CASE_FIXED_FP(96);
-		CASE_FIXED_FP(97);
-		CASE_FIXED_FP(98);
-		CASE_FIXED_FP(99);
-		CASE_FIXED_FP(100);
-		CASE_FIXED_FP(101);
-		CASE_FIXED_FP(102);
-		CASE_FIXED_FP(103);
-		CASE_FIXED_FP(104);
-		CASE_FIXED_FP(105);
-		CASE_FIXED_FP(106);
-		CASE_FIXED_FP(107);
-		CASE_FIXED_FP(108);
-		CASE_FIXED_FP(109);
-		CASE_FIXED_FP(110);
-		CASE_FIXED_FP(111);
-		CASE_FIXED_FP(112);
-		CASE_FIXED_FP(113);
-		CASE_FIXED_FP(114);
-		CASE_FIXED_FP(115);
-		CASE_FIXED_FP(116);
-		CASE_FIXED_FP(117);
-		CASE_FIXED_FP(118);
-		CASE_FIXED_FP(119);
-		CASE_FIXED_FP(120);
-		CASE_FIXED_FP(121);
-		CASE_FIXED_FP(122);
-		CASE_FIXED_FP(123);
-		CASE_FIXED_FP(124);
-		CASE_FIXED_FP(125);
-		CASE_FIXED_FP(126);
-		CASE_FIXED_FP(127);
-	}
-}
-
-void vcpu_get_fpreg(struct kvm_vcpu *vcpu, unsigned long reg,
-						struct ia64_fpreg *val)
-{
-	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
-
-	getfpreg(reg, val, regs);   /* FIXME: handle NATs later*/
-}
-
-void vcpu_set_fpreg(struct kvm_vcpu *vcpu, unsigned long reg,
-						struct ia64_fpreg *val)
-{
-	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
-
-	if (reg > 1)
-		setfpreg(reg, val, regs);   /* FIXME: handle NATs later*/
-}
-
-/*
- * The Altix RTC is mapped specially here for the vmm module
- */
-#define SN_RTC_BASE	(u64 *)(KVM_VMM_BASE+(1UL<<KVM_VMM_SHIFT))
-static long kvm_get_itc(struct kvm_vcpu *vcpu)
-{
-#if defined(CONFIG_IA64_SGI_SN2) || defined(CONFIG_IA64_GENERIC)
-	struct kvm *kvm = (struct kvm *)KVM_VM_BASE;
-
-	if (kvm->arch.is_sn2)
-		return (*SN_RTC_BASE);
-	else
-#endif
-		return ia64_getreg(_IA64_REG_AR_ITC);
-}
-
-/************************************************************************
- * lsapic timer
- ***********************************************************************/
-u64 vcpu_get_itc(struct kvm_vcpu *vcpu)
-{
-	unsigned long guest_itc;
-	guest_itc = VMX(vcpu, itc_offset) + kvm_get_itc(vcpu);
-
-	if (guest_itc >= VMX(vcpu, last_itc)) {
-		VMX(vcpu, last_itc) = guest_itc;
-		return  guest_itc;
-	} else
-		return VMX(vcpu, last_itc);
-}
-
-static inline void vcpu_set_itm(struct kvm_vcpu *vcpu, u64 val);
-static void vcpu_set_itc(struct kvm_vcpu *vcpu, u64 val)
-{
-	struct kvm_vcpu *v;
-	struct kvm *kvm;
-	int i;
-	long itc_offset = val - kvm_get_itc(vcpu);
-	unsigned long vitv = VCPU(vcpu, itv);
-
-	kvm = (struct kvm *)KVM_VM_BASE;
-
-	if (kvm_vcpu_is_bsp(vcpu)) {
-		for (i = 0; i < atomic_read(&kvm->online_vcpus); i++) {
-			v = (struct kvm_vcpu *)((char *)vcpu +
-					sizeof(struct kvm_vcpu_data) * i);
-			VMX(v, itc_offset) = itc_offset;
-			VMX(v, last_itc) = 0;
-		}
-	}
-	VMX(vcpu, last_itc) = 0;
-	if (VCPU(vcpu, itm) <= val) {
-		VMX(vcpu, itc_check) = 0;
-		vcpu_unpend_interrupt(vcpu, vitv);
-	} else {
-		VMX(vcpu, itc_check) = 1;
-		vcpu_set_itm(vcpu, VCPU(vcpu, itm));
-	}
-
-}
-
-static inline u64 vcpu_get_itm(struct kvm_vcpu *vcpu)
-{
-	return ((u64)VCPU(vcpu, itm));
-}
-
-static inline void vcpu_set_itm(struct kvm_vcpu *vcpu, u64 val)
-{
-	unsigned long vitv = VCPU(vcpu, itv);
-	VCPU(vcpu, itm) = val;
-
-	if (val > vcpu_get_itc(vcpu)) {
-		VMX(vcpu, itc_check) = 1;
-		vcpu_unpend_interrupt(vcpu, vitv);
-		VMX(vcpu, timer_pending) = 0;
-	} else
-		VMX(vcpu, itc_check) = 0;
-}
-
-#define  ITV_VECTOR(itv)    (itv&0xff)
-#define  ITV_IRQ_MASK(itv)  (itv&(1<<16))
-
-static inline void vcpu_set_itv(struct kvm_vcpu *vcpu, u64 val)
-{
-	VCPU(vcpu, itv) = val;
-	if (!ITV_IRQ_MASK(val) && vcpu->arch.timer_pending) {
-		vcpu_pend_interrupt(vcpu, ITV_VECTOR(val));
-		vcpu->arch.timer_pending = 0;
-	}
-}
-
-static inline void vcpu_set_eoi(struct kvm_vcpu *vcpu, u64 val)
-{
-	int vec;
-
-	vec = highest_inservice_irq(vcpu);
-	if (vec == NULL_VECTOR)
-		return;
-	VMX(vcpu, insvc[vec >> 6]) &= ~(1UL << (vec & 63));
-	VCPU(vcpu, eoi) = 0;
-	vcpu->arch.irq_new_pending = 1;
-
-}
-
-/* See Table 5-8 in SDM vol2 for the definition */
-int irq_masked(struct kvm_vcpu *vcpu, int h_pending, int h_inservice)
-{
-	union ia64_tpr vtpr;
-
-	vtpr.val = VCPU(vcpu, tpr);
-
-	if (h_inservice == NMI_VECTOR)
-		return IRQ_MASKED_BY_INSVC;
-
-	if (h_pending == NMI_VECTOR) {
-		/* Non Maskable Interrupt */
-		return IRQ_NO_MASKED;
-	}
-
-	if (h_inservice == ExtINT_VECTOR)
-		return IRQ_MASKED_BY_INSVC;
-
-	if (h_pending == ExtINT_VECTOR) {
-		if (vtpr.mmi) {
-			/* mask all external IRQ */
-			return IRQ_MASKED_BY_VTPR;
-		} else
-			return IRQ_NO_MASKED;
-	}
-
-	if (is_higher_irq(h_pending, h_inservice)) {
-		if (is_higher_class(h_pending, vtpr.mic + (vtpr.mmi << 4)))
-			return IRQ_NO_MASKED;
-		else
-			return IRQ_MASKED_BY_VTPR;
-	} else {
-		return IRQ_MASKED_BY_INSVC;
-	}
-}
-
-void vcpu_pend_interrupt(struct kvm_vcpu *vcpu, u8 vec)
-{
-	long spsr;
-	int ret;
-
-	local_irq_save(spsr);
-	ret = test_and_set_bit(vec, &VCPU(vcpu, irr[0]));
-	local_irq_restore(spsr);
-
-	vcpu->arch.irq_new_pending = 1;
-}
-
-void vcpu_unpend_interrupt(struct kvm_vcpu *vcpu, u8 vec)
-{
-	long spsr;
-	int ret;
-
-	local_irq_save(spsr);
-	ret = test_and_clear_bit(vec, &VCPU(vcpu, irr[0]));
-	local_irq_restore(spsr);
-	if (ret) {
-		vcpu->arch.irq_new_pending = 1;
-		wmb();
-	}
-}
-
-void update_vhpi(struct kvm_vcpu *vcpu, int vec)
-{
-	u64 vhpi;
-
-	if (vec == NULL_VECTOR)
-		vhpi = 0;
-	else if (vec == NMI_VECTOR)
-		vhpi = 32;
-	else if (vec == ExtINT_VECTOR)
-		vhpi = 16;
-	else
-		vhpi = vec >> 4;
-
-	VCPU(vcpu, vhpi) = vhpi;
-	if (VCPU(vcpu, vac).a_int)
-		ia64_call_vsa(PAL_VPS_SET_PENDING_INTERRUPT,
-				(u64)vcpu->arch.vpd, 0, 0, 0, 0, 0, 0);
-}
-
-u64 vcpu_get_ivr(struct kvm_vcpu *vcpu)
-{
-	int vec, h_inservice, mask;
-
-	vec = highest_pending_irq(vcpu);
-	h_inservice = highest_inservice_irq(vcpu);
-	mask = irq_masked(vcpu, vec, h_inservice);
-	if (vec == NULL_VECTOR || mask == IRQ_MASKED_BY_INSVC) {
-		if (VCPU(vcpu, vhpi))
-			update_vhpi(vcpu, NULL_VECTOR);
-		return IA64_SPURIOUS_INT_VECTOR;
-	}
-	if (mask == IRQ_MASKED_BY_VTPR) {
-		update_vhpi(vcpu, vec);
-		return IA64_SPURIOUS_INT_VECTOR;
-	}
-	VMX(vcpu, insvc[vec >> 6]) |= (1UL << (vec & 63));
-	vcpu_unpend_interrupt(vcpu, vec);
-	return  (u64)vec;
-}
-
-/**************************************************************************
-  Privileged operation emulation routines
- **************************************************************************/
-u64 vcpu_thash(struct kvm_vcpu *vcpu, u64 vadr)
-{
-	union ia64_pta vpta;
-	union ia64_rr vrr;
-	u64 pval;
-	u64 vhpt_offset;
-
-	vpta.val = vcpu_get_pta(vcpu);
-	vrr.val = vcpu_get_rr(vcpu, vadr);
-	vhpt_offset = ((vadr >> vrr.ps) << 3) & ((1UL << (vpta.size)) - 1);
-	if (vpta.vf) {
-		pval = ia64_call_vsa(PAL_VPS_THASH, vadr, vrr.val,
-				vpta.val, 0, 0, 0, 0);
-	} else {
-		pval = (vadr & VRN_MASK) | vhpt_offset |
-			(vpta.val << 3 >> (vpta.size + 3) << (vpta.size));
-	}
-	return  pval;
-}
-
-u64 vcpu_ttag(struct kvm_vcpu *vcpu, u64 vadr)
-{
-	union ia64_rr vrr;
-	union ia64_pta vpta;
-	u64 pval;
-
-	vpta.val = vcpu_get_pta(vcpu);
-	vrr.val = vcpu_get_rr(vcpu, vadr);
-	if (vpta.vf) {
-		pval = ia64_call_vsa(PAL_VPS_TTAG, vadr, vrr.val,
-						0, 0, 0, 0, 0);
-	} else
-		pval = 1;
-
-	return  pval;
-}
-
-u64 vcpu_tak(struct kvm_vcpu *vcpu, u64 vadr)
-{
-	struct thash_data *data;
-	union ia64_pta vpta;
-	u64 key;
-
-	vpta.val = vcpu_get_pta(vcpu);
-	if (vpta.vf == 0) {
-		key = 1;
-		return key;
-	}
-	data = vtlb_lookup(vcpu, vadr, D_TLB);
-	if (!data || !data->p)
-		key = 1;
-	else
-		key = data->key;
-
-	return key;
-}
-
-void kvm_thash(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long thash, vadr;
-
-	vadr = vcpu_get_gr(vcpu, inst.M46.r3);
-	thash = vcpu_thash(vcpu, vadr);
-	vcpu_set_gr(vcpu, inst.M46.r1, thash, 0);
-}
-
-void kvm_ttag(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long tag, vadr;
-
-	vadr = vcpu_get_gr(vcpu, inst.M46.r3);
-	tag = vcpu_ttag(vcpu, vadr);
-	vcpu_set_gr(vcpu, inst.M46.r1, tag, 0);
-}
-
-int vcpu_tpa(struct kvm_vcpu *vcpu, u64 vadr, unsigned long *padr)
-{
-	struct thash_data *data;
-	union ia64_isr visr, pt_isr;
-	struct kvm_pt_regs *regs;
-	struct ia64_psr vpsr;
-
-	regs = vcpu_regs(vcpu);
-	pt_isr.val = VMX(vcpu, cr_isr);
-	visr.val = 0;
-	visr.ei = pt_isr.ei;
-	visr.ir = pt_isr.ir;
-	vpsr = *(struct ia64_psr *)&VCPU(vcpu, vpsr);
-	visr.na = 1;
-
-	data = vhpt_lookup(vadr);
-	if (data) {
-		if (data->p == 0) {
-			vcpu_set_isr(vcpu, visr.val);
-			data_page_not_present(vcpu, vadr);
-			return IA64_FAULT;
-		} else if (data->ma == VA_MATTR_NATPAGE) {
-			vcpu_set_isr(vcpu, visr.val);
-			dnat_page_consumption(vcpu, vadr);
-			return IA64_FAULT;
-		} else {
-			*padr = (data->gpaddr >> data->ps << data->ps) |
-				(vadr & (PSIZE(data->ps) - 1));
-			return IA64_NO_FAULT;
-		}
-	}
-
-	data = vtlb_lookup(vcpu, vadr, D_TLB);
-	if (data) {
-		if (data->p == 0) {
-			vcpu_set_isr(vcpu, visr.val);
-			data_page_not_present(vcpu, vadr);
-			return IA64_FAULT;
-		} else if (data->ma == VA_MATTR_NATPAGE) {
-			vcpu_set_isr(vcpu, visr.val);
-			dnat_page_consumption(vcpu, vadr);
-			return IA64_FAULT;
-		} else{
-			*padr = ((data->ppn >> (data->ps - 12)) << data->ps)
-				| (vadr & (PSIZE(data->ps) - 1));
-			return IA64_NO_FAULT;
-		}
-	}
-	if (!vhpt_enabled(vcpu, vadr, NA_REF)) {
-		if (vpsr.ic) {
-			vcpu_set_isr(vcpu, visr.val);
-			alt_dtlb(vcpu, vadr);
-			return IA64_FAULT;
-		} else {
-			nested_dtlb(vcpu);
-			return IA64_FAULT;
-		}
-	} else {
-		if (vpsr.ic) {
-			vcpu_set_isr(vcpu, visr.val);
-			dvhpt_fault(vcpu, vadr);
-			return IA64_FAULT;
-		} else{
-			nested_dtlb(vcpu);
-			return IA64_FAULT;
-		}
-	}
-
-	return IA64_NO_FAULT;
-}
-
-int kvm_tpa(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long r1, r3;
-
-	r3 = vcpu_get_gr(vcpu, inst.M46.r3);
-
-	if (vcpu_tpa(vcpu, r3, &r1))
-		return IA64_FAULT;
-
-	vcpu_set_gr(vcpu, inst.M46.r1, r1, 0);
-	return(IA64_NO_FAULT);
-}
-
-void kvm_tak(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long r1, r3;
-
-	r3 = vcpu_get_gr(vcpu, inst.M46.r3);
-	r1 = vcpu_tak(vcpu, r3);
-	vcpu_set_gr(vcpu, inst.M46.r1, r1, 0);
-}
-
-/************************************
- * Insert/Purge translation register/cache
- ************************************/
-void vcpu_itc_i(struct kvm_vcpu *vcpu, u64 pte, u64 itir, u64 ifa)
-{
-	thash_purge_and_insert(vcpu, pte, itir, ifa, I_TLB);
-}
-
-void vcpu_itc_d(struct kvm_vcpu *vcpu, u64 pte, u64 itir, u64 ifa)
-{
-	thash_purge_and_insert(vcpu, pte, itir, ifa, D_TLB);
-}
-
-void vcpu_itr_i(struct kvm_vcpu *vcpu, u64 slot, u64 pte, u64 itir, u64 ifa)
-{
-	u64 ps, va, rid;
-	struct thash_data *p_itr;
-
-	ps = itir_ps(itir);
-	va = PAGEALIGN(ifa, ps);
-	pte &= ~PAGE_FLAGS_RV_MASK;
-	rid = vcpu_get_rr(vcpu, ifa);
-	rid = rid & RR_RID_MASK;
-	p_itr = (struct thash_data *)&vcpu->arch.itrs[slot];
-	vcpu_set_tr(p_itr, pte, itir, va, rid);
-	vcpu_quick_region_set(VMX(vcpu, itr_regions), va);
-}
-
-
-void vcpu_itr_d(struct kvm_vcpu *vcpu, u64 slot, u64 pte, u64 itir, u64 ifa)
-{
-	u64 gpfn;
-	u64 ps, va, rid;
-	struct thash_data *p_dtr;
-
-	ps = itir_ps(itir);
-	va = PAGEALIGN(ifa, ps);
-	pte &= ~PAGE_FLAGS_RV_MASK;
-
-	if (ps != _PAGE_SIZE_16M)
-		thash_purge_entries(vcpu, va, ps);
-	gpfn = (pte & _PAGE_PPN_MASK) >> PAGE_SHIFT;
-	if (__gpfn_is_io(gpfn))
-		pte |= VTLB_PTE_IO;
-	rid = vcpu_get_rr(vcpu, va);
-	rid = rid & RR_RID_MASK;
-	p_dtr = (struct thash_data *)&vcpu->arch.dtrs[slot];
-	vcpu_set_tr((struct thash_data *)&vcpu->arch.dtrs[slot],
-							pte, itir, va, rid);
-	vcpu_quick_region_set(VMX(vcpu, dtr_regions), va);
-}
-
-void vcpu_ptr_d(struct kvm_vcpu *vcpu, u64 ifa, u64 ps)
-{
-	int index;
-	u64 va;
-
-	va = PAGEALIGN(ifa, ps);
-	while ((index = vtr_find_overlap(vcpu, va, ps, D_TLB)) >= 0)
-		vcpu->arch.dtrs[index].page_flags = 0;
-
-	thash_purge_entries(vcpu, va, ps);
-}
-
-void vcpu_ptr_i(struct kvm_vcpu *vcpu, u64 ifa, u64 ps)
-{
-	int index;
-	u64 va;
-
-	va = PAGEALIGN(ifa, ps);
-	while ((index = vtr_find_overlap(vcpu, va, ps, I_TLB)) >= 0)
-		vcpu->arch.itrs[index].page_flags = 0;
-
-	thash_purge_entries(vcpu, va, ps);
-}
-
-void vcpu_ptc_l(struct kvm_vcpu *vcpu, u64 va, u64 ps)
-{
-	va = PAGEALIGN(va, ps);
-	thash_purge_entries(vcpu, va, ps);
-}
-
-void vcpu_ptc_e(struct kvm_vcpu *vcpu, u64 va)
-{
-	thash_purge_all(vcpu);
-}
-
-void vcpu_ptc_ga(struct kvm_vcpu *vcpu, u64 va, u64 ps)
-{
-	struct exit_ctl_data *p = &vcpu->arch.exit_data;
-	long psr;
-	local_irq_save(psr);
-	p->exit_reason = EXIT_REASON_PTC_G;
-
-	p->u.ptc_g_data.rr = vcpu_get_rr(vcpu, va);
-	p->u.ptc_g_data.vaddr = va;
-	p->u.ptc_g_data.ps = ps;
-	vmm_transition(vcpu);
-	/* Do Local Purge Here*/
-	vcpu_ptc_l(vcpu, va, ps);
-	local_irq_restore(psr);
-}
-
-
-void vcpu_ptc_g(struct kvm_vcpu *vcpu, u64 va, u64 ps)
-{
-	vcpu_ptc_ga(vcpu, va, ps);
-}
-
-void kvm_ptc_e(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long ifa;
-
-	ifa = vcpu_get_gr(vcpu, inst.M45.r3);
-	vcpu_ptc_e(vcpu, ifa);
-}
-
-void kvm_ptc_g(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long ifa, itir;
-
-	ifa = vcpu_get_gr(vcpu, inst.M45.r3);
-	itir = vcpu_get_gr(vcpu, inst.M45.r2);
-	vcpu_ptc_g(vcpu, ifa, itir_ps(itir));
-}
-
-void kvm_ptc_ga(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long ifa, itir;
-
-	ifa = vcpu_get_gr(vcpu, inst.M45.r3);
-	itir = vcpu_get_gr(vcpu, inst.M45.r2);
-	vcpu_ptc_ga(vcpu, ifa, itir_ps(itir));
-}
-
-void kvm_ptc_l(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long ifa, itir;
-
-	ifa = vcpu_get_gr(vcpu, inst.M45.r3);
-	itir = vcpu_get_gr(vcpu, inst.M45.r2);
-	vcpu_ptc_l(vcpu, ifa, itir_ps(itir));
-}
-
-void kvm_ptr_d(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long ifa, itir;
-
-	ifa = vcpu_get_gr(vcpu, inst.M45.r3);
-	itir = vcpu_get_gr(vcpu, inst.M45.r2);
-	vcpu_ptr_d(vcpu, ifa, itir_ps(itir));
-}
-
-void kvm_ptr_i(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long ifa, itir;
-
-	ifa = vcpu_get_gr(vcpu, inst.M45.r3);
-	itir = vcpu_get_gr(vcpu, inst.M45.r2);
-	vcpu_ptr_i(vcpu, ifa, itir_ps(itir));
-}
-
-void kvm_itr_d(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long itir, ifa, pte, slot;
-
-	slot = vcpu_get_gr(vcpu, inst.M45.r3);
-	pte = vcpu_get_gr(vcpu, inst.M45.r2);
-	itir = vcpu_get_itir(vcpu);
-	ifa = vcpu_get_ifa(vcpu);
-	vcpu_itr_d(vcpu, slot, pte, itir, ifa);
-}
-
-
-
-void kvm_itr_i(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long itir, ifa, pte, slot;
-
-	slot = vcpu_get_gr(vcpu, inst.M45.r3);
-	pte = vcpu_get_gr(vcpu, inst.M45.r2);
-	itir = vcpu_get_itir(vcpu);
-	ifa = vcpu_get_ifa(vcpu);
-	vcpu_itr_i(vcpu, slot, pte, itir, ifa);
-}
-
-void kvm_itc_d(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long itir, ifa, pte;
-
-	itir = vcpu_get_itir(vcpu);
-	ifa = vcpu_get_ifa(vcpu);
-	pte = vcpu_get_gr(vcpu, inst.M45.r2);
-	vcpu_itc_d(vcpu, pte, itir, ifa);
-}
-
-void kvm_itc_i(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long itir, ifa, pte;
-
-	itir = vcpu_get_itir(vcpu);
-	ifa = vcpu_get_ifa(vcpu);
-	pte = vcpu_get_gr(vcpu, inst.M45.r2);
-	vcpu_itc_i(vcpu, pte, itir, ifa);
-}
-
-/*************************************
- * Moves to semi-privileged registers
- *************************************/
-
-void kvm_mov_to_ar_imm(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long imm;
-
-	if (inst.M30.s)
-		imm = -inst.M30.imm;
-	else
-		imm = inst.M30.imm;
-
-	vcpu_set_itc(vcpu, imm);
-}
-
-void kvm_mov_to_ar_reg(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long r2;
-
-	r2 = vcpu_get_gr(vcpu, inst.M29.r2);
-	vcpu_set_itc(vcpu, r2);
-}
-
-void kvm_mov_from_ar_reg(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long r1;
-
-	r1 = vcpu_get_itc(vcpu);
-	vcpu_set_gr(vcpu, inst.M31.r1, r1, 0);
-}
-
-/**************************************************************************
-  struct kvm_vcpu protection key register access routines
- **************************************************************************/
-
-unsigned long vcpu_get_pkr(struct kvm_vcpu *vcpu, unsigned long reg)
-{
-	return ((unsigned long)ia64_get_pkr(reg));
-}
-
-void vcpu_set_pkr(struct kvm_vcpu *vcpu, unsigned long reg, unsigned long val)
-{
-	ia64_set_pkr(reg, val);
-}
-
-/********************************
- * Moves to privileged registers
- ********************************/
-unsigned long vcpu_set_rr(struct kvm_vcpu *vcpu, unsigned long reg,
-					unsigned long val)
-{
-	union ia64_rr oldrr, newrr;
-	unsigned long rrval;
-	struct exit_ctl_data *p = &vcpu->arch.exit_data;
-	unsigned long psr;
-
-	oldrr.val = vcpu_get_rr(vcpu, reg);
-	newrr.val = val;
-	vcpu->arch.vrr[reg >> VRN_SHIFT] = val;
-
-	switch ((unsigned long)(reg >> VRN_SHIFT)) {
-	case VRN6:
-		vcpu->arch.vmm_rr = vrrtomrr(val);
-		local_irq_save(psr);
-		p->exit_reason = EXIT_REASON_SWITCH_RR6;
-		vmm_transition(vcpu);
-		local_irq_restore(psr);
-		break;
-	case VRN4:
-		rrval = vrrtomrr(val);
-		vcpu->arch.metaphysical_saved_rr4 = rrval;
-		if (!is_physical_mode(vcpu))
-			ia64_set_rr(reg, rrval);
-		break;
-	case VRN0:
-		rrval = vrrtomrr(val);
-		vcpu->arch.metaphysical_saved_rr0 = rrval;
-		if (!is_physical_mode(vcpu))
-			ia64_set_rr(reg, rrval);
-		break;
-	default:
-		ia64_set_rr(reg, vrrtomrr(val));
-		break;
-	}
-
-	return (IA64_NO_FAULT);
-}
-
-void kvm_mov_to_rr(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long r3, r2;
-
-	r3 = vcpu_get_gr(vcpu, inst.M42.r3);
-	r2 = vcpu_get_gr(vcpu, inst.M42.r2);
-	vcpu_set_rr(vcpu, r3, r2);
-}
-
-void kvm_mov_to_dbr(struct kvm_vcpu *vcpu, INST64 inst)
-{
-}
-
-void kvm_mov_to_ibr(struct kvm_vcpu *vcpu, INST64 inst)
-{
-}
-
-void kvm_mov_to_pmc(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long r3, r2;
-
-	r3 = vcpu_get_gr(vcpu, inst.M42.r3);
-	r2 = vcpu_get_gr(vcpu, inst.M42.r2);
-	vcpu_set_pmc(vcpu, r3, r2);
-}
-
-void kvm_mov_to_pmd(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long r3, r2;
-
-	r3 = vcpu_get_gr(vcpu, inst.M42.r3);
-	r2 = vcpu_get_gr(vcpu, inst.M42.r2);
-	vcpu_set_pmd(vcpu, r3, r2);
-}
-
-void kvm_mov_to_pkr(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	u64 r3, r2;
-
-	r3 = vcpu_get_gr(vcpu, inst.M42.r3);
-	r2 = vcpu_get_gr(vcpu, inst.M42.r2);
-	vcpu_set_pkr(vcpu, r3, r2);
-}
-
-void kvm_mov_from_rr(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long r3, r1;
-
-	r3 = vcpu_get_gr(vcpu, inst.M43.r3);
-	r1 = vcpu_get_rr(vcpu, r3);
-	vcpu_set_gr(vcpu, inst.M43.r1, r1, 0);
-}
-
-void kvm_mov_from_pkr(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long r3, r1;
-
-	r3 = vcpu_get_gr(vcpu, inst.M43.r3);
-	r1 = vcpu_get_pkr(vcpu, r3);
-	vcpu_set_gr(vcpu, inst.M43.r1, r1, 0);
-}
-
-void kvm_mov_from_dbr(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long r3, r1;
-
-	r3 = vcpu_get_gr(vcpu, inst.M43.r3);
-	r1 = vcpu_get_dbr(vcpu, r3);
-	vcpu_set_gr(vcpu, inst.M43.r1, r1, 0);
-}
-
-void kvm_mov_from_ibr(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long r3, r1;
-
-	r3 = vcpu_get_gr(vcpu, inst.M43.r3);
-	r1 = vcpu_get_ibr(vcpu, r3);
-	vcpu_set_gr(vcpu, inst.M43.r1, r1, 0);
-}
-
-void kvm_mov_from_pmc(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long r3, r1;
-
-	r3 = vcpu_get_gr(vcpu, inst.M43.r3);
-	r1 = vcpu_get_pmc(vcpu, r3);
-	vcpu_set_gr(vcpu, inst.M43.r1, r1, 0);
-}
-
-unsigned long vcpu_get_cpuid(struct kvm_vcpu *vcpu, unsigned long reg)
-{
-	/* FIXME: This could get called as a result of a rsvd-reg fault */
-	if (reg > (ia64_get_cpuid(3) & 0xff))
-		return 0;
-	else
-		return ia64_get_cpuid(reg);
-}
-
-void kvm_mov_from_cpuid(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long r3, r1;
-
-	r3 = vcpu_get_gr(vcpu, inst.M43.r3);
-	r1 = vcpu_get_cpuid(vcpu, r3);
-	vcpu_set_gr(vcpu, inst.M43.r1, r1, 0);
-}
-
-void vcpu_set_tpr(struct kvm_vcpu *vcpu, unsigned long val)
-{
-	VCPU(vcpu, tpr) = val;
-	vcpu->arch.irq_check = 1;
-}
-
-unsigned long kvm_mov_to_cr(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long r2;
-
-	r2 = vcpu_get_gr(vcpu, inst.M32.r2);
-	VCPU(vcpu, vcr[inst.M32.cr3]) = r2;
-
-	switch (inst.M32.cr3) {
-	case 0:
-		vcpu_set_dcr(vcpu, r2);
-		break;
-	case 1:
-		vcpu_set_itm(vcpu, r2);
-		break;
-	case 66:
-		vcpu_set_tpr(vcpu, r2);
-		break;
-	case 67:
-		vcpu_set_eoi(vcpu, r2);
-		break;
-	default:
-		break;
-	}
-
-	return 0;
-}
-
-unsigned long kvm_mov_from_cr(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long tgt = inst.M33.r1;
-	unsigned long val;
-
-	switch (inst.M33.cr3) {
-	case 65:
-		val = vcpu_get_ivr(vcpu);
-		vcpu_set_gr(vcpu, tgt, val, 0);
-		break;
-
-	case 67:
-		vcpu_set_gr(vcpu, tgt, 0L, 0);
-		break;
-	default:
-		val = VCPU(vcpu, vcr[inst.M33.cr3]);
-		vcpu_set_gr(vcpu, tgt, val, 0);
-		break;
-	}
-
-	return 0;
-}
-
-void vcpu_set_psr(struct kvm_vcpu *vcpu, unsigned long val)
-{
-
-	unsigned long mask;
-	struct kvm_pt_regs *regs;
-	struct ia64_psr old_psr, new_psr;
-
-	old_psr = *(struct ia64_psr *)&VCPU(vcpu, vpsr);
-
-	regs = vcpu_regs(vcpu);
-	/* We only support guest as:
-	 *  vpsr.pk = 0
-	 *  vpsr.is = 0
-	 * Otherwise panic
-	 */
-	if (val & (IA64_PSR_PK | IA64_PSR_IS | IA64_PSR_VM))
-		panic_vm(vcpu, "Only support guests with vpsr.pk =0 "
-				"& vpsr.is=0\n");
-
-	/*
-	 * For those IA64_PSR bits: id/da/dd/ss/ed/ia
-	 * Since these bits will become 0, after success execution of each
-	 * instruction, we will change set them to mIA64_PSR
-	 */
-	VCPU(vcpu, vpsr) = val
-		& (~(IA64_PSR_ID | IA64_PSR_DA | IA64_PSR_DD |
-			IA64_PSR_SS | IA64_PSR_ED | IA64_PSR_IA));
-
-	if (!old_psr.i && (val & IA64_PSR_I)) {
-		/* vpsr.i 0->1 */
-		vcpu->arch.irq_check = 1;
-	}
-	new_psr = *(struct ia64_psr *)&VCPU(vcpu, vpsr);
-
-	/*
-	 * All vIA64_PSR bits shall go to mPSR (v->tf->tf_special.psr)
-	 * , except for the following bits:
-	 *  ic/i/dt/si/rt/mc/it/bn/vm
-	 */
-	mask =  IA64_PSR_IC + IA64_PSR_I + IA64_PSR_DT + IA64_PSR_SI +
-		IA64_PSR_RT + IA64_PSR_MC + IA64_PSR_IT + IA64_PSR_BN +
-		IA64_PSR_VM;
-
-	regs->cr_ipsr = (regs->cr_ipsr & mask) | (val & (~mask));
-
-	check_mm_mode_switch(vcpu, old_psr, new_psr);
-
-	return ;
-}
-
-unsigned long vcpu_cover(struct kvm_vcpu *vcpu)
-{
-	struct ia64_psr vpsr;
-
-	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
-	vpsr = *(struct ia64_psr *)&VCPU(vcpu, vpsr);
-
-	if (!vpsr.ic)
-		VCPU(vcpu, ifs) = regs->cr_ifs;
-	regs->cr_ifs = IA64_IFS_V;
-	return (IA64_NO_FAULT);
-}
-
-
-
-/**************************************************************************
-  VCPU banked general register access routines
- **************************************************************************/
-#define vcpu_bsw0_unat(i, b0unat, b1unat, runat, VMM_PT_REGS_R16_SLOT)	\
-	do {     							\
-		__asm__ __volatile__ (					\
-				";;extr.u %0 = %3,%6,16;;\n"		\
-				"dep %1 = %0, %1, 0, 16;;\n"		\
-				"st8 [%4] = %1\n"			\
-				"extr.u %0 = %2, 16, 16;;\n"		\
-				"dep %3 = %0, %3, %6, 16;;\n"		\
-				"st8 [%5] = %3\n"			\
-				::"r"(i), "r"(*b1unat), "r"(*b0unat),	\
-				"r"(*runat), "r"(b1unat), "r"(runat),	\
-				"i"(VMM_PT_REGS_R16_SLOT) : "memory");	\
-	} while (0)
-
-void vcpu_bsw0(struct kvm_vcpu *vcpu)
-{
-	unsigned long i;
-
-	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
-	unsigned long *r = &regs->r16;
-	unsigned long *b0 = &VCPU(vcpu, vbgr[0]);
-	unsigned long *b1 = &VCPU(vcpu, vgr[0]);
-	unsigned long *runat = &regs->eml_unat;
-	unsigned long *b0unat = &VCPU(vcpu, vbnat);
-	unsigned long *b1unat = &VCPU(vcpu, vnat);
-
-
-	if (VCPU(vcpu, vpsr) & IA64_PSR_BN) {
-		for (i = 0; i < 16; i++) {
-			*b1++ = *r;
-			*r++ = *b0++;
-		}
-		vcpu_bsw0_unat(i, b0unat, b1unat, runat,
-				VMM_PT_REGS_R16_SLOT);
-		VCPU(vcpu, vpsr) &= ~IA64_PSR_BN;
-	}
-}
-
-#define vcpu_bsw1_unat(i, b0unat, b1unat, runat, VMM_PT_REGS_R16_SLOT)	\
-	do {             						\
-		__asm__ __volatile__ (";;extr.u %0 = %3, %6, 16;;\n"	\
-				"dep %1 = %0, %1, 16, 16;;\n"		\
-				"st8 [%4] = %1\n"			\
-				"extr.u %0 = %2, 0, 16;;\n"		\
-				"dep %3 = %0, %3, %6, 16;;\n"		\
-				"st8 [%5] = %3\n"			\
-				::"r"(i), "r"(*b0unat), "r"(*b1unat),	\
-				"r"(*runat), "r"(b0unat), "r"(runat),	\
-				"i"(VMM_PT_REGS_R16_SLOT) : "memory");	\
-	} while (0)
-
-void vcpu_bsw1(struct kvm_vcpu *vcpu)
-{
-	unsigned long i;
-	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
-	unsigned long *r = &regs->r16;
-	unsigned long *b0 = &VCPU(vcpu, vbgr[0]);
-	unsigned long *b1 = &VCPU(vcpu, vgr[0]);
-	unsigned long *runat = &regs->eml_unat;
-	unsigned long *b0unat = &VCPU(vcpu, vbnat);
-	unsigned long *b1unat = &VCPU(vcpu, vnat);
-
-	if (!(VCPU(vcpu, vpsr) & IA64_PSR_BN)) {
-		for (i = 0; i < 16; i++) {
-			*b0++ = *r;
-			*r++ = *b1++;
-		}
-		vcpu_bsw1_unat(i, b0unat, b1unat, runat,
-				VMM_PT_REGS_R16_SLOT);
-		VCPU(vcpu, vpsr) |= IA64_PSR_BN;
-	}
-}
-
-void vcpu_rfi(struct kvm_vcpu *vcpu)
-{
-	unsigned long ifs, psr;
-	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
-
-	psr = VCPU(vcpu, ipsr);
-	if (psr & IA64_PSR_BN)
-		vcpu_bsw1(vcpu);
-	else
-		vcpu_bsw0(vcpu);
-	vcpu_set_psr(vcpu, psr);
-	ifs = VCPU(vcpu, ifs);
-	if (ifs >> 63)
-		regs->cr_ifs = ifs;
-	regs->cr_iip = VCPU(vcpu, iip);
-}
-
-/*
-   VPSR can't keep track of below bits of guest PSR
-   This function gets guest PSR
- */
-
-unsigned long vcpu_get_psr(struct kvm_vcpu *vcpu)
-{
-	unsigned long mask;
-	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
-
-	mask = IA64_PSR_BE | IA64_PSR_UP | IA64_PSR_AC | IA64_PSR_MFL |
-		IA64_PSR_MFH | IA64_PSR_CPL | IA64_PSR_RI;
-	return (VCPU(vcpu, vpsr) & ~mask) | (regs->cr_ipsr & mask);
-}
-
-void kvm_rsm(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long vpsr;
-	unsigned long imm24 = (inst.M44.i<<23) | (inst.M44.i2<<21)
-					| inst.M44.imm;
-
-	vpsr = vcpu_get_psr(vcpu);
-	vpsr &= (~imm24);
-	vcpu_set_psr(vcpu, vpsr);
-}
-
-void kvm_ssm(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long vpsr;
-	unsigned long imm24 = (inst.M44.i << 23) | (inst.M44.i2 << 21)
-				| inst.M44.imm;
-
-	vpsr = vcpu_get_psr(vcpu);
-	vpsr |= imm24;
-	vcpu_set_psr(vcpu, vpsr);
-}
-
-/* Generate Mask
- * Parameter:
- *  bit -- starting bit
- *  len -- how many bits
- */
-#define MASK(bit,len)				   	\
-({							\
-		__u64	ret;				\
-							\
-		__asm __volatile("dep %0=-1, r0, %1, %2"\
-				: "=r" (ret):		\
-		  "M" (bit),				\
-		  "M" (len));				\
-		ret;					\
-})
-
-void vcpu_set_psr_l(struct kvm_vcpu *vcpu, unsigned long val)
-{
-	val = (val & MASK(0, 32)) | (vcpu_get_psr(vcpu) & MASK(32, 32));
-	vcpu_set_psr(vcpu, val);
-}
-
-void kvm_mov_to_psr(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long val;
-
-	val = vcpu_get_gr(vcpu, inst.M35.r2);
-	vcpu_set_psr_l(vcpu, val);
-}
-
-void kvm_mov_from_psr(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long val;
-
-	val = vcpu_get_psr(vcpu);
-	val = (val & MASK(0, 32)) | (val & MASK(35, 2));
-	vcpu_set_gr(vcpu, inst.M33.r1, val, 0);
-}
-
-void vcpu_increment_iip(struct kvm_vcpu *vcpu)
-{
-	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
-	struct ia64_psr *ipsr = (struct ia64_psr *)&regs->cr_ipsr;
-	if (ipsr->ri == 2) {
-		ipsr->ri = 0;
-		regs->cr_iip += 16;
-	} else
-		ipsr->ri++;
-}
-
-void vcpu_decrement_iip(struct kvm_vcpu *vcpu)
-{
-	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
-	struct ia64_psr *ipsr = (struct ia64_psr *)&regs->cr_ipsr;
-
-	if (ipsr->ri == 0) {
-		ipsr->ri = 2;
-		regs->cr_iip -= 16;
-	} else
-		ipsr->ri--;
-}
-
-/** Emulate a privileged operation.
- *
- *
- * @param vcpu virtual cpu
- * @cause the reason cause virtualization fault
- * @opcode the instruction code which cause virtualization fault
- */
-
-void kvm_emulate(struct kvm_vcpu *vcpu, struct kvm_pt_regs *regs)
-{
-	unsigned long status, cause, opcode ;
-	INST64 inst;
-
-	status = IA64_NO_FAULT;
-	cause = VMX(vcpu, cause);
-	opcode = VMX(vcpu, opcode);
-	inst.inst = opcode;
-	/*
-	 * Switch to actual virtual rid in rr0 and rr4,
-	 * which is required by some tlb related instructions.
-	 */
-	prepare_if_physical_mode(vcpu);
-
-	switch (cause) {
-	case EVENT_RSM:
-		kvm_rsm(vcpu, inst);
-		break;
-	case EVENT_SSM:
-		kvm_ssm(vcpu, inst);
-		break;
-	case EVENT_MOV_TO_PSR:
-		kvm_mov_to_psr(vcpu, inst);
-		break;
-	case EVENT_MOV_FROM_PSR:
-		kvm_mov_from_psr(vcpu, inst);
-		break;
-	case EVENT_MOV_FROM_CR:
-		kvm_mov_from_cr(vcpu, inst);
-		break;
-	case EVENT_MOV_TO_CR:
-		kvm_mov_to_cr(vcpu, inst);
-		break;
-	case EVENT_BSW_0:
-		vcpu_bsw0(vcpu);
-		break;
-	case EVENT_BSW_1:
-		vcpu_bsw1(vcpu);
-		break;
-	case EVENT_COVER:
-		vcpu_cover(vcpu);
-		break;
-	case EVENT_RFI:
-		vcpu_rfi(vcpu);
-		break;
-	case EVENT_ITR_D:
-		kvm_itr_d(vcpu, inst);
-		break;
-	case EVENT_ITR_I:
-		kvm_itr_i(vcpu, inst);
-		break;
-	case EVENT_PTR_D:
-		kvm_ptr_d(vcpu, inst);
-		break;
-	case EVENT_PTR_I:
-		kvm_ptr_i(vcpu, inst);
-		break;
-	case EVENT_ITC_D:
-		kvm_itc_d(vcpu, inst);
-		break;
-	case EVENT_ITC_I:
-		kvm_itc_i(vcpu, inst);
-		break;
-	case EVENT_PTC_L:
-		kvm_ptc_l(vcpu, inst);
-		break;
-	case EVENT_PTC_G:
-		kvm_ptc_g(vcpu, inst);
-		break;
-	case EVENT_PTC_GA:
-		kvm_ptc_ga(vcpu, inst);
-		break;
-	case EVENT_PTC_E:
-		kvm_ptc_e(vcpu, inst);
-		break;
-	case EVENT_MOV_TO_RR:
-		kvm_mov_to_rr(vcpu, inst);
-		break;
-	case EVENT_MOV_FROM_RR:
-		kvm_mov_from_rr(vcpu, inst);
-		break;
-	case EVENT_THASH:
-		kvm_thash(vcpu, inst);
-		break;
-	case EVENT_TTAG:
-		kvm_ttag(vcpu, inst);
-		break;
-	case EVENT_TPA:
-		status = kvm_tpa(vcpu, inst);
-		break;
-	case EVENT_TAK:
-		kvm_tak(vcpu, inst);
-		break;
-	case EVENT_MOV_TO_AR_IMM:
-		kvm_mov_to_ar_imm(vcpu, inst);
-		break;
-	case EVENT_MOV_TO_AR:
-		kvm_mov_to_ar_reg(vcpu, inst);
-		break;
-	case EVENT_MOV_FROM_AR:
-		kvm_mov_from_ar_reg(vcpu, inst);
-		break;
-	case EVENT_MOV_TO_DBR:
-		kvm_mov_to_dbr(vcpu, inst);
-		break;
-	case EVENT_MOV_TO_IBR:
-		kvm_mov_to_ibr(vcpu, inst);
-		break;
-	case EVENT_MOV_TO_PMC:
-		kvm_mov_to_pmc(vcpu, inst);
-		break;
-	case EVENT_MOV_TO_PMD:
-		kvm_mov_to_pmd(vcpu, inst);
-		break;
-	case EVENT_MOV_TO_PKR:
-		kvm_mov_to_pkr(vcpu, inst);
-		break;
-	case EVENT_MOV_FROM_DBR:
-		kvm_mov_from_dbr(vcpu, inst);
-		break;
-	case EVENT_MOV_FROM_IBR:
-		kvm_mov_from_ibr(vcpu, inst);
-		break;
-	case EVENT_MOV_FROM_PMC:
-		kvm_mov_from_pmc(vcpu, inst);
-		break;
-	case EVENT_MOV_FROM_PKR:
-		kvm_mov_from_pkr(vcpu, inst);
-		break;
-	case EVENT_MOV_FROM_CPUID:
-		kvm_mov_from_cpuid(vcpu, inst);
-		break;
-	case EVENT_VMSW:
-		status = IA64_FAULT;
-		break;
-	default:
-		break;
-	};
-	/*Assume all status is NO_FAULT ?*/
-	if (status == IA64_NO_FAULT && cause != EVENT_RFI)
-		vcpu_increment_iip(vcpu);
-
-	recover_if_physical_mode(vcpu);
-}
-
-void init_vcpu(struct kvm_vcpu *vcpu)
-{
-	int i;
-
-	vcpu->arch.mode_flags = GUEST_IN_PHY;
-	VMX(vcpu, vrr[0]) = 0x38;
-	VMX(vcpu, vrr[1]) = 0x38;
-	VMX(vcpu, vrr[2]) = 0x38;
-	VMX(vcpu, vrr[3]) = 0x38;
-	VMX(vcpu, vrr[4]) = 0x38;
-	VMX(vcpu, vrr[5]) = 0x38;
-	VMX(vcpu, vrr[6]) = 0x38;
-	VMX(vcpu, vrr[7]) = 0x38;
-	VCPU(vcpu, vpsr) = IA64_PSR_BN;
-	VCPU(vcpu, dcr) = 0;
-	/* pta.size must not be 0.  The minimum is 15 (32k) */
-	VCPU(vcpu, pta) = 15 << 2;
-	VCPU(vcpu, itv) = 0x10000;
-	VCPU(vcpu, itm) = 0;
-	VMX(vcpu, last_itc) = 0;
-
-	VCPU(vcpu, lid) = VCPU_LID(vcpu);
-	VCPU(vcpu, ivr) = 0;
-	VCPU(vcpu, tpr) = 0x10000;
-	VCPU(vcpu, eoi) = 0;
-	VCPU(vcpu, irr[0]) = 0;
-	VCPU(vcpu, irr[1]) = 0;
-	VCPU(vcpu, irr[2]) = 0;
-	VCPU(vcpu, irr[3]) = 0;
-	VCPU(vcpu, pmv) = 0x10000;
-	VCPU(vcpu, cmcv) = 0x10000;
-	VCPU(vcpu, lrr0) = 0x10000;   /* default reset value? */
-	VCPU(vcpu, lrr1) = 0x10000;   /* default reset value? */
-	update_vhpi(vcpu, NULL_VECTOR);
-	VLSAPIC_XTP(vcpu) = 0x80;	/* disabled */
-
-	for (i = 0; i < 4; i++)
-		VLSAPIC_INSVC(vcpu, i) = 0;
-}
-
-void kvm_init_all_rr(struct kvm_vcpu *vcpu)
-{
-	unsigned long psr;
-
-	local_irq_save(psr);
-
-	/* WARNING: not allow co-exist of both virtual mode and physical
-	 * mode in same region
-	 */
-
-	vcpu->arch.metaphysical_saved_rr0 = vrrtomrr(VMX(vcpu, vrr[VRN0]));
-	vcpu->arch.metaphysical_saved_rr4 = vrrtomrr(VMX(vcpu, vrr[VRN4]));
-
-	if (is_physical_mode(vcpu)) {
-		if (vcpu->arch.mode_flags & GUEST_PHY_EMUL)
-			panic_vm(vcpu, "Machine Status conflicts!\n");
-
-		ia64_set_rr((VRN0 << VRN_SHIFT), vcpu->arch.metaphysical_rr0);
-		ia64_dv_serialize_data();
-		ia64_set_rr((VRN4 << VRN_SHIFT), vcpu->arch.metaphysical_rr4);
-		ia64_dv_serialize_data();
-	} else {
-		ia64_set_rr((VRN0 << VRN_SHIFT),
-				vcpu->arch.metaphysical_saved_rr0);
-		ia64_dv_serialize_data();
-		ia64_set_rr((VRN4 << VRN_SHIFT),
-				vcpu->arch.metaphysical_saved_rr4);
-		ia64_dv_serialize_data();
-	}
-	ia64_set_rr((VRN1 << VRN_SHIFT),
-			vrrtomrr(VMX(vcpu, vrr[VRN1])));
-	ia64_dv_serialize_data();
-	ia64_set_rr((VRN2 << VRN_SHIFT),
-			vrrtomrr(VMX(vcpu, vrr[VRN2])));
-	ia64_dv_serialize_data();
-	ia64_set_rr((VRN3 << VRN_SHIFT),
-			vrrtomrr(VMX(vcpu, vrr[VRN3])));
-	ia64_dv_serialize_data();
-	ia64_set_rr((VRN5 << VRN_SHIFT),
-			vrrtomrr(VMX(vcpu, vrr[VRN5])));
-	ia64_dv_serialize_data();
-	ia64_set_rr((VRN7 << VRN_SHIFT),
-			vrrtomrr(VMX(vcpu, vrr[VRN7])));
-	ia64_dv_serialize_data();
-	ia64_srlz_d();
-	ia64_set_psr(psr);
-}
-
-int vmm_entry(void)
-{
-	struct kvm_vcpu *v;
-	v = current_vcpu;
-
-	ia64_call_vsa(PAL_VPS_RESTORE, (unsigned long)v->arch.vpd,
-						0, 0, 0, 0, 0, 0);
-	kvm_init_vtlb(v);
-	kvm_init_vhpt(v);
-	init_vcpu(v);
-	kvm_init_all_rr(v);
-	vmm_reset_entry();
-
-	return 0;
-}
-
-static void kvm_show_registers(struct kvm_pt_regs *regs)
-{
-	unsigned long ip = regs->cr_iip + ia64_psr(regs)->ri;
-
-	struct kvm_vcpu *vcpu = current_vcpu;
-	if (vcpu != NULL)
-		printk("vcpu 0x%p vcpu %d\n",
-		       vcpu, vcpu->vcpu_id);
-
-	printk("psr : %016lx ifs : %016lx ip  : [<%016lx>]\n",
-	       regs->cr_ipsr, regs->cr_ifs, ip);
-
-	printk("unat: %016lx pfs : %016lx rsc : %016lx\n",
-	       regs->ar_unat, regs->ar_pfs, regs->ar_rsc);
-	printk("rnat: %016lx bspstore: %016lx pr  : %016lx\n",
-	       regs->ar_rnat, regs->ar_bspstore, regs->pr);
-	printk("ldrs: %016lx ccv : %016lx fpsr: %016lx\n",
-	       regs->loadrs, regs->ar_ccv, regs->ar_fpsr);
-	printk("csd : %016lx ssd : %016lx\n", regs->ar_csd, regs->ar_ssd);
-	printk("b0  : %016lx b6  : %016lx b7  : %016lx\n", regs->b0,
-							regs->b6, regs->b7);
-	printk("f6  : %05lx%016lx f7  : %05lx%016lx\n",
-	       regs->f6.u.bits[1], regs->f6.u.bits[0],
-	       regs->f7.u.bits[1], regs->f7.u.bits[0]);
-	printk("f8  : %05lx%016lx f9  : %05lx%016lx\n",
-	       regs->f8.u.bits[1], regs->f8.u.bits[0],
-	       regs->f9.u.bits[1], regs->f9.u.bits[0]);
-	printk("f10 : %05lx%016lx f11 : %05lx%016lx\n",
-	       regs->f10.u.bits[1], regs->f10.u.bits[0],
-	       regs->f11.u.bits[1], regs->f11.u.bits[0]);
-
-	printk("r1  : %016lx r2  : %016lx r3  : %016lx\n", regs->r1,
-							regs->r2, regs->r3);
-	printk("r8  : %016lx r9  : %016lx r10 : %016lx\n", regs->r8,
-							regs->r9, regs->r10);
-	printk("r11 : %016lx r12 : %016lx r13 : %016lx\n", regs->r11,
-							regs->r12, regs->r13);
-	printk("r14 : %016lx r15 : %016lx r16 : %016lx\n", regs->r14,
-							regs->r15, regs->r16);
-	printk("r17 : %016lx r18 : %016lx r19 : %016lx\n", regs->r17,
-							regs->r18, regs->r19);
-	printk("r20 : %016lx r21 : %016lx r22 : %016lx\n", regs->r20,
-							regs->r21, regs->r22);
-	printk("r23 : %016lx r24 : %016lx r25 : %016lx\n", regs->r23,
-							regs->r24, regs->r25);
-	printk("r26 : %016lx r27 : %016lx r28 : %016lx\n", regs->r26,
-							regs->r27, regs->r28);
-	printk("r29 : %016lx r30 : %016lx r31 : %016lx\n", regs->r29,
-							regs->r30, regs->r31);
-
-}
-
-void panic_vm(struct kvm_vcpu *v, const char *fmt, ...)
-{
-	va_list args;
-	char buf[256];
-
-	struct kvm_pt_regs *regs = vcpu_regs(v);
-	struct exit_ctl_data *p = &v->arch.exit_data;
-	va_start(args, fmt);
-	vsnprintf(buf, sizeof(buf), fmt, args);
-	va_end(args);
-	printk(buf);
-	kvm_show_registers(regs);
-	p->exit_reason = EXIT_REASON_VM_PANIC;
-	vmm_transition(v);
-	/*Never to return*/
-	while (1);
-}
diff --git a/arch/ia64/kvm/vcpu.h b/arch/ia64/kvm/vcpu.h
deleted file mode 100644
index 988911b4cc7..00000000000
--- a/arch/ia64/kvm/vcpu.h
+++ /dev/null
@@ -1,752 +0,0 @@
-/*
- *  vcpu.h: vcpu routines
- *  	Copyright (c) 2005, Intel Corporation.
- *  	Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com)
- *  	Yaozu Dong (Eddie Dong) (Eddie.dong@intel.com)
- *
- * 	Copyright (c) 2007, Intel Corporation.
- *  	Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com)
- *	Xiantao Zhang (xiantao.zhang@intel.com)
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- */
-
-
-#ifndef __KVM_VCPU_H__
-#define __KVM_VCPU_H__
-
-#include <asm/types.h>
-#include <asm/fpu.h>
-#include <asm/processor.h>
-
-#ifndef __ASSEMBLY__
-#include "vti.h"
-
-#include <linux/kvm_host.h>
-#include <linux/spinlock.h>
-
-typedef unsigned long IA64_INST;
-
-typedef union U_IA64_BUNDLE {
-	unsigned long i64[2];
-	struct { unsigned long template:5, slot0:41, slot1a:18,
-		slot1b:23, slot2:41; };
-	/* NOTE: following doesn't work because bitfields can't cross natural
-	   size boundaries
-	   struct { unsigned long template:5, slot0:41, slot1:41, slot2:41; }; */
-} IA64_BUNDLE;
-
-typedef union U_INST64_A5 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, r1:7, imm7b:7, r3:2, imm5c:5,
-		imm9d:9, s:1, major:4; };
-} INST64_A5;
-
-typedef union U_INST64_B4 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, btype:3, un3:3, p:1, b2:3, un11:11, x6:6,
-		wh:2, d:1, un1:1, major:4; };
-} INST64_B4;
-
-typedef union U_INST64_B8 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, un21:21, x6:6, un4:4, major:4; };
-} INST64_B8;
-
-typedef union U_INST64_B9 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, imm20:20, :1, x6:6, :3, i:1, major:4; };
-} INST64_B9;
-
-typedef union U_INST64_I19 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, imm20:20, :1, x6:6, x3:3, i:1, major:4; };
-} INST64_I19;
-
-typedef union U_INST64_I26 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, :7, r2:7, ar3:7, x6:6, x3:3, :1, major:4; };
-} INST64_I26;
-
-typedef union U_INST64_I27 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, :7, imm:7, ar3:7, x6:6, x3:3, s:1, major:4; };
-} INST64_I27;
-
-typedef union U_INST64_I28 { /* not privileged (mov from AR) */
-	IA64_INST inst;
-	struct { unsigned long qp:6, r1:7, :7, ar3:7, x6:6, x3:3, :1, major:4; };
-} INST64_I28;
-
-typedef union U_INST64_M28 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, :14, r3:7, x6:6, x3:3, :1, major:4; };
-} INST64_M28;
-
-typedef union U_INST64_M29 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, :7, r2:7, ar3:7, x6:6, x3:3, :1, major:4; };
-} INST64_M29;
-
-typedef union U_INST64_M30 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, :7, imm:7, ar3:7, x4:4, x2:2,
-		x3:3, s:1, major:4; };
-} INST64_M30;
-
-typedef union U_INST64_M31 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, r1:7, :7, ar3:7, x6:6, x3:3, :1, major:4; };
-} INST64_M31;
-
-typedef union U_INST64_M32 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, :7, r2:7, cr3:7, x6:6, x3:3, :1, major:4; };
-} INST64_M32;
-
-typedef union U_INST64_M33 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, r1:7, :7, cr3:7, x6:6, x3:3, :1, major:4; };
-} INST64_M33;
-
-typedef union U_INST64_M35 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, :7, r2:7, :7, x6:6, x3:3, :1, major:4; };
-
-} INST64_M35;
-
-typedef union U_INST64_M36 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, r1:7, :14, x6:6, x3:3, :1, major:4; };
-} INST64_M36;
-
-typedef union U_INST64_M37 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, imm20a:20, :1, x4:4, x2:2, x3:3,
-		i:1, major:4; };
-} INST64_M37;
-
-typedef union U_INST64_M41 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, :7, r2:7, :7, x6:6, x3:3, :1, major:4; };
-} INST64_M41;
-
-typedef union U_INST64_M42 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, :7, r2:7, r3:7, x6:6, x3:3, :1, major:4; };
-} INST64_M42;
-
-typedef union U_INST64_M43 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, r1:7, :7, r3:7, x6:6, x3:3, :1, major:4; };
-} INST64_M43;
-
-typedef union U_INST64_M44 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, imm:21, x4:4, i2:2, x3:3, i:1, major:4; };
-} INST64_M44;
-
-typedef union U_INST64_M45 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, :7, r2:7, r3:7, x6:6, x3:3, :1, major:4; };
-} INST64_M45;
-
-typedef union U_INST64_M46 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, r1:7, un7:7, r3:7, x6:6,
-		x3:3, un1:1, major:4; };
-} INST64_M46;
-
-typedef union U_INST64_M47 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, un14:14, r3:7, x6:6, x3:3, un1:1, major:4; };
-} INST64_M47;
-
-typedef union U_INST64_M1{
-	IA64_INST inst;
-	struct { unsigned long qp:6, r1:7, un7:7, r3:7, x:1, hint:2,
-		x6:6, m:1, major:4; };
-} INST64_M1;
-
-typedef union U_INST64_M2{
-	IA64_INST inst;
-	struct { unsigned long qp:6, r1:7, r2:7, r3:7, x:1, hint:2,
-		x6:6, m:1, major:4; };
-} INST64_M2;
-
-typedef union U_INST64_M3{
-	IA64_INST inst;
-	struct { unsigned long qp:6, r1:7, imm7:7, r3:7, i:1, hint:2,
-		x6:6, s:1, major:4; };
-} INST64_M3;
-
-typedef union U_INST64_M4 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, un7:7, r2:7, r3:7, x:1, hint:2,
-		x6:6, m:1, major:4; };
-} INST64_M4;
-
-typedef union U_INST64_M5 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, imm7:7, r2:7, r3:7, i:1, hint:2,
-		x6:6, s:1, major:4; };
-} INST64_M5;
-
-typedef union U_INST64_M6 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, f1:7, un7:7, r3:7, x:1, hint:2,
-		x6:6, m:1, major:4; };
-} INST64_M6;
-
-typedef union U_INST64_M9 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, :7, f2:7, r3:7, x:1, hint:2,
-		x6:6, m:1, major:4; };
-} INST64_M9;
-
-typedef union U_INST64_M10 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, imm7:7, f2:7, r3:7, i:1, hint:2,
-		x6:6, s:1, major:4; };
-} INST64_M10;
-
-typedef union U_INST64_M12 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, f1:7, f2:7, r3:7, x:1, hint:2,
-		x6:6, m:1, major:4; };
-} INST64_M12;
-
-typedef union U_INST64_M15 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, :7, imm7:7, r3:7, i:1, hint:2,
-		x6:6, s:1, major:4; };
-} INST64_M15;
-
-typedef union U_INST64 {
-	IA64_INST inst;
-	struct { unsigned long :37, major:4; } generic;
-	INST64_A5 A5;	/* used in build_hypercall_bundle only */
-	INST64_B4 B4;	/* used in build_hypercall_bundle only */
-	INST64_B8 B8;	/* rfi, bsw.[01] */
-	INST64_B9 B9;	/* break.b */
-	INST64_I19 I19;	/* used in build_hypercall_bundle only */
-	INST64_I26 I26;	/* mov register to ar (I unit) */
-	INST64_I27 I27;	/* mov immediate to ar (I unit) */
-	INST64_I28 I28;	/* mov from ar (I unit) */
-	INST64_M1  M1;	/* ld integer */
-	INST64_M2  M2;
-	INST64_M3  M3;
-	INST64_M4  M4;	/* st integer */
-	INST64_M5  M5;
-	INST64_M6  M6;	/* ldfd floating pointer 		*/
-	INST64_M9  M9;	/* stfd floating pointer		*/
-	INST64_M10 M10;	/* stfd floating pointer		*/
-	INST64_M12 M12;     /* ldfd pair floating pointer		*/
-	INST64_M15 M15;	/* lfetch + imm update			*/
-	INST64_M28 M28;	/* purge translation cache entry	*/
-	INST64_M29 M29;	/* mov register to ar (M unit)		*/
-	INST64_M30 M30;	/* mov immediate to ar (M unit)		*/
-	INST64_M31 M31;	/* mov from ar (M unit)			*/
-	INST64_M32 M32;	/* mov reg to cr			*/
-	INST64_M33 M33;	/* mov from cr				*/
-	INST64_M35 M35;	/* mov to psr				*/
-	INST64_M36 M36;	/* mov from psr				*/
-	INST64_M37 M37;	/* break.m				*/
-	INST64_M41 M41;	/* translation cache insert		*/
-	INST64_M42 M42;	/* mov to indirect reg/translation reg insert*/
-	INST64_M43 M43;	/* mov from indirect reg		*/
-	INST64_M44 M44;	/* set/reset system mask		*/
-	INST64_M45 M45;	/* translation purge			*/
-	INST64_M46 M46;	/* translation access (tpa,tak)		*/
-	INST64_M47 M47;	/* purge translation entry		*/
-} INST64;
-
-#define MASK_41 ((unsigned long)0x1ffffffffff)
-
-/* Virtual address memory attributes encoding */
-#define VA_MATTR_WB         0x0
-#define VA_MATTR_UC         0x4
-#define VA_MATTR_UCE        0x5
-#define VA_MATTR_WC         0x6
-#define VA_MATTR_NATPAGE    0x7
-
-#define PMASK(size)         (~((size) - 1))
-#define PSIZE(size)         (1UL<<(size))
-#define CLEARLSB(ppn, nbits)    (((ppn) >> (nbits)) << (nbits))
-#define PAGEALIGN(va, ps)	CLEARLSB(va, ps)
-#define PAGE_FLAGS_RV_MASK   (0x2|(0x3UL<<50)|(((1UL<<11)-1)<<53))
-#define _PAGE_MA_ST     (0x1 <<  2) /* is reserved for software use */
-
-#define ARCH_PAGE_SHIFT   12
-
-#define INVALID_TI_TAG (1UL << 63)
-
-#define VTLB_PTE_P_BIT      0
-#define VTLB_PTE_IO_BIT     60
-#define VTLB_PTE_IO         (1UL<<VTLB_PTE_IO_BIT)
-#define VTLB_PTE_P          (1UL<<VTLB_PTE_P_BIT)
-
-#define vcpu_quick_region_check(_tr_regions,_ifa)		\
-	(_tr_regions & (1 << ((unsigned long)_ifa >> 61)))
-
-#define vcpu_quick_region_set(_tr_regions,_ifa)             \
-	do {_tr_regions |= (1 << ((unsigned long)_ifa >> 61)); } while (0)
-
-static inline void vcpu_set_tr(struct thash_data *trp, u64 pte, u64 itir,
-		u64 va, u64 rid)
-{
-	trp->page_flags = pte;
-	trp->itir = itir;
-	trp->vadr = va;
-	trp->rid = rid;
-}
-
-extern u64 kvm_get_mpt_entry(u64 gpfn);
-
-/* Return I/ */
-static inline u64 __gpfn_is_io(u64 gpfn)
-{
-	u64  pte;
-	pte = kvm_get_mpt_entry(gpfn);
-	if (!(pte & GPFN_INV_MASK)) {
-		pte = pte & GPFN_IO_MASK;
-		if (pte != GPFN_PHYS_MMIO)
-			return pte;
-	}
-	return 0;
-}
-#endif
-#define IA64_NO_FAULT	0
-#define IA64_FAULT	1
-
-#define VMM_RBS_OFFSET  ((VMM_TASK_SIZE + 15) & ~15)
-
-#define SW_BAD  0   /* Bad mode transitition */
-#define SW_V2P  1   /* Physical emulatino is activated */
-#define SW_P2V  2   /* Exit physical mode emulation */
-#define SW_SELF 3   /* No mode transition */
-#define SW_NOP  4   /* Mode transition, but without action required */
-
-#define GUEST_IN_PHY    0x1
-#define GUEST_PHY_EMUL  0x2
-
-#define current_vcpu ((struct kvm_vcpu *) ia64_getreg(_IA64_REG_TP))
-
-#define VRN_SHIFT	61
-#define VRN_MASK	0xe000000000000000
-#define VRN0		0x0UL
-#define VRN1		0x1UL
-#define VRN2		0x2UL
-#define VRN3		0x3UL
-#define VRN4		0x4UL
-#define VRN5		0x5UL
-#define VRN6		0x6UL
-#define VRN7		0x7UL
-
-#define IRQ_NO_MASKED         0
-#define IRQ_MASKED_BY_VTPR    1
-#define IRQ_MASKED_BY_INSVC   2   /* masked by inservice IRQ */
-
-#define PTA_BASE_SHIFT      15
-
-#define IA64_PSR_VM_BIT     46
-#define IA64_PSR_VM (__IA64_UL(1) << IA64_PSR_VM_BIT)
-
-/* Interruption Function State */
-#define IA64_IFS_V_BIT      63
-#define IA64_IFS_V  (__IA64_UL(1) << IA64_IFS_V_BIT)
-
-#define PHY_PAGE_UC (_PAGE_A|_PAGE_D|_PAGE_P|_PAGE_MA_UC|_PAGE_AR_RWX)
-#define PHY_PAGE_WB (_PAGE_A|_PAGE_D|_PAGE_P|_PAGE_MA_WB|_PAGE_AR_RWX)
-
-#ifndef __ASSEMBLY__
-
-#include <asm/gcc_intrin.h>
-
-#define is_physical_mode(v)		\
-	((v->arch.mode_flags) & GUEST_IN_PHY)
-
-#define is_virtual_mode(v)	\
-	(!is_physical_mode(v))
-
-#define MODE_IND(psr)	\
-	(((psr).it << 2) + ((psr).dt << 1) + (psr).rt)
-
-#ifndef CONFIG_SMP
-#define _vmm_raw_spin_lock(x)	 do {}while(0)
-#define _vmm_raw_spin_unlock(x) do {}while(0)
-#else
-typedef struct {
-	volatile unsigned int lock;
-} vmm_spinlock_t;
-#define _vmm_raw_spin_lock(x)						\
-	do {								\
-		__u32 *ia64_spinlock_ptr = (__u32 *) (x);		\
-		__u64 ia64_spinlock_val;				\
-		ia64_spinlock_val = ia64_cmpxchg4_acq(ia64_spinlock_ptr, 1, 0);\
-		if (unlikely(ia64_spinlock_val)) {			\
-			do {						\
-				while (*ia64_spinlock_ptr)		\
-				ia64_barrier();				\
-				ia64_spinlock_val =			\
-				ia64_cmpxchg4_acq(ia64_spinlock_ptr, 1, 0);\
-			} while (ia64_spinlock_val);			\
-		}							\
-	} while (0)
-
-#define _vmm_raw_spin_unlock(x)				\
-	do { barrier();				\
-		((vmm_spinlock_t *)x)->lock = 0; } \
-while (0)
-#endif
-
-void vmm_spin_lock(vmm_spinlock_t *lock);
-void vmm_spin_unlock(vmm_spinlock_t *lock);
-enum {
-	I_TLB = 1,
-	D_TLB = 2
-};
-
-union kvm_va {
-	struct {
-		unsigned long off : 60;		/* intra-region offset */
-		unsigned long reg :  4;		/* region number */
-	} f;
-	unsigned long l;
-	void *p;
-};
-
-#define __kvm_pa(x)     ({union kvm_va _v; _v.l = (long) (x);		\
-						_v.f.reg = 0; _v.l; })
-#define __kvm_va(x)     ({union kvm_va _v; _v.l = (long) (x);		\
-				_v.f.reg = -1; _v.p; })
-
-#define _REGION_ID(x)           ({union ia64_rr _v; _v.val = (long)(x); \
-						_v.rid; })
-#define _REGION_PAGE_SIZE(x)    ({union ia64_rr _v; _v.val = (long)(x); \
-						_v.ps; })
-#define _REGION_HW_WALKER(x)    ({union ia64_rr _v; _v.val = (long)(x);	\
-						_v.ve; })
-
-enum vhpt_ref{ DATA_REF, NA_REF, INST_REF, RSE_REF };
-enum tlb_miss_type { INSTRUCTION, DATA, REGISTER };
-
-#define VCPU(_v, _x) ((_v)->arch.vpd->_x)
-#define VMX(_v, _x)  ((_v)->arch._x)
-
-#define VLSAPIC_INSVC(vcpu, i) ((vcpu)->arch.insvc[i])
-#define VLSAPIC_XTP(_v)        VMX(_v, xtp)
-
-static inline unsigned long itir_ps(unsigned long itir)
-{
-	return ((itir >> 2) & 0x3f);
-}
-
-
-/**************************************************************************
-  VCPU control register access routines
- **************************************************************************/
-
-static inline u64 vcpu_get_itir(struct kvm_vcpu *vcpu)
-{
-	return ((u64)VCPU(vcpu, itir));
-}
-
-static inline void vcpu_set_itir(struct kvm_vcpu *vcpu, u64 val)
-{
-	VCPU(vcpu, itir) = val;
-}
-
-static inline u64 vcpu_get_ifa(struct kvm_vcpu *vcpu)
-{
-	return ((u64)VCPU(vcpu, ifa));
-}
-
-static inline void vcpu_set_ifa(struct kvm_vcpu *vcpu, u64 val)
-{
-	VCPU(vcpu, ifa) = val;
-}
-
-static inline u64 vcpu_get_iva(struct kvm_vcpu *vcpu)
-{
-	return ((u64)VCPU(vcpu, iva));
-}
-
-static inline u64 vcpu_get_pta(struct kvm_vcpu *vcpu)
-{
-	return ((u64)VCPU(vcpu, pta));
-}
-
-static inline u64 vcpu_get_lid(struct kvm_vcpu *vcpu)
-{
-	return ((u64)VCPU(vcpu, lid));
-}
-
-static inline u64 vcpu_get_tpr(struct kvm_vcpu *vcpu)
-{
-	return ((u64)VCPU(vcpu, tpr));
-}
-
-static inline u64 vcpu_get_eoi(struct kvm_vcpu *vcpu)
-{
-	return (0UL);		/*reads of eoi always return 0 */
-}
-
-static inline u64 vcpu_get_irr0(struct kvm_vcpu *vcpu)
-{
-	return ((u64)VCPU(vcpu, irr[0]));
-}
-
-static inline u64 vcpu_get_irr1(struct kvm_vcpu *vcpu)
-{
-	return ((u64)VCPU(vcpu, irr[1]));
-}
-
-static inline u64 vcpu_get_irr2(struct kvm_vcpu *vcpu)
-{
-	return ((u64)VCPU(vcpu, irr[2]));
-}
-
-static inline u64 vcpu_get_irr3(struct kvm_vcpu *vcpu)
-{
-	return ((u64)VCPU(vcpu, irr[3]));
-}
-
-static inline void vcpu_set_dcr(struct kvm_vcpu *vcpu, u64 val)
-{
-	ia64_setreg(_IA64_REG_CR_DCR, val);
-}
-
-static inline void vcpu_set_isr(struct kvm_vcpu *vcpu, u64 val)
-{
-	VCPU(vcpu, isr) = val;
-}
-
-static inline void vcpu_set_lid(struct kvm_vcpu *vcpu, u64 val)
-{
-	VCPU(vcpu, lid) = val;
-}
-
-static inline void vcpu_set_ipsr(struct kvm_vcpu *vcpu, u64 val)
-{
-	VCPU(vcpu, ipsr) = val;
-}
-
-static inline void vcpu_set_iip(struct kvm_vcpu *vcpu, u64 val)
-{
-	VCPU(vcpu, iip) = val;
-}
-
-static inline void vcpu_set_ifs(struct kvm_vcpu *vcpu, u64 val)
-{
-	VCPU(vcpu, ifs) = val;
-}
-
-static inline void vcpu_set_iipa(struct kvm_vcpu *vcpu, u64 val)
-{
-	VCPU(vcpu, iipa) = val;
-}
-
-static inline void vcpu_set_iha(struct kvm_vcpu *vcpu, u64 val)
-{
-	VCPU(vcpu, iha) = val;
-}
-
-
-static inline u64 vcpu_get_rr(struct kvm_vcpu *vcpu, u64 reg)
-{
-	return vcpu->arch.vrr[reg>>61];
-}
-
-/**************************************************************************
-  VCPU debug breakpoint register access routines
- **************************************************************************/
-
-static inline void vcpu_set_dbr(struct kvm_vcpu *vcpu, u64 reg, u64 val)
-{
-	__ia64_set_dbr(reg, val);
-}
-
-static inline void vcpu_set_ibr(struct kvm_vcpu *vcpu, u64 reg, u64 val)
-{
-	ia64_set_ibr(reg, val);
-}
-
-static inline u64 vcpu_get_dbr(struct kvm_vcpu *vcpu, u64 reg)
-{
-	return ((u64)__ia64_get_dbr(reg));
-}
-
-static inline u64 vcpu_get_ibr(struct kvm_vcpu *vcpu, u64 reg)
-{
-	return ((u64)ia64_get_ibr(reg));
-}
-
-/**************************************************************************
-  VCPU performance monitor register access routines
- **************************************************************************/
-static inline void vcpu_set_pmc(struct kvm_vcpu *vcpu, u64 reg, u64 val)
-{
-	/* NOTE: Writes to unimplemented PMC registers are discarded */
-	ia64_set_pmc(reg, val);
-}
-
-static inline void vcpu_set_pmd(struct kvm_vcpu *vcpu, u64 reg, u64 val)
-{
-	/* NOTE: Writes to unimplemented PMD registers are discarded */
-	ia64_set_pmd(reg, val);
-}
-
-static inline u64 vcpu_get_pmc(struct kvm_vcpu *vcpu, u64 reg)
-{
-	/* NOTE: Reads from unimplemented PMC registers return zero */
-	return ((u64)ia64_get_pmc(reg));
-}
-
-static inline u64 vcpu_get_pmd(struct kvm_vcpu *vcpu, u64 reg)
-{
-	/* NOTE: Reads from unimplemented PMD registers return zero */
-	return ((u64)ia64_get_pmd(reg));
-}
-
-static inline unsigned long vrrtomrr(unsigned long val)
-{
-	union ia64_rr rr;
-	rr.val = val;
-	rr.rid = (rr.rid << 4) | 0xe;
-	if (rr.ps > PAGE_SHIFT)
-		rr.ps = PAGE_SHIFT;
-	rr.ve = 1;
-	return rr.val;
-}
-
-
-static inline int highest_bits(int *dat)
-{
-	u32  bits, bitnum;
-	int i;
-
-	/* loop for all 256 bits */
-	for (i = 7; i >= 0 ; i--) {
-		bits = dat[i];
-		if (bits) {
-			bitnum = fls(bits);
-			return i * 32 + bitnum - 1;
-		}
-	}
-	return NULL_VECTOR;
-}
-
-/*
- * The pending irq is higher than the inservice one.
- *
- */
-static inline int is_higher_irq(int pending, int inservice)
-{
-	return ((pending > inservice)
-			|| ((pending != NULL_VECTOR)
-				&& (inservice == NULL_VECTOR)));
-}
-
-static inline int is_higher_class(int pending, int mic)
-{
-	return ((pending >> 4) > mic);
-}
-
-/*
- * Return 0-255 for pending irq.
- *        NULL_VECTOR: when no pending.
- */
-static inline int highest_pending_irq(struct kvm_vcpu *vcpu)
-{
-	if (VCPU(vcpu, irr[0]) & (1UL<<NMI_VECTOR))
-		return NMI_VECTOR;
-	if (VCPU(vcpu, irr[0]) & (1UL<<ExtINT_VECTOR))
-		return ExtINT_VECTOR;
-
-	return highest_bits((int *)&VCPU(vcpu, irr[0]));
-}
-
-static inline int highest_inservice_irq(struct kvm_vcpu *vcpu)
-{
-	if (VMX(vcpu, insvc[0]) & (1UL<<NMI_VECTOR))
-		return NMI_VECTOR;
-	if (VMX(vcpu, insvc[0]) & (1UL<<ExtINT_VECTOR))
-		return ExtINT_VECTOR;
-
-	return highest_bits((int *)&(VMX(vcpu, insvc[0])));
-}
-
-extern void vcpu_get_fpreg(struct kvm_vcpu *vcpu, unsigned long reg,
-					struct ia64_fpreg *val);
-extern void vcpu_set_fpreg(struct kvm_vcpu *vcpu, unsigned long reg,
-					struct ia64_fpreg *val);
-extern u64 vcpu_get_gr(struct kvm_vcpu *vcpu, unsigned long reg);
-extern void vcpu_set_gr(struct kvm_vcpu *vcpu, unsigned long reg,
-			u64 val, int nat);
-extern unsigned long vcpu_get_psr(struct kvm_vcpu *vcpu);
-extern void vcpu_set_psr(struct kvm_vcpu *vcpu, unsigned long val);
-extern u64 vcpu_thash(struct kvm_vcpu *vcpu, u64 vadr);
-extern void vcpu_bsw0(struct kvm_vcpu *vcpu);
-extern void thash_vhpt_insert(struct kvm_vcpu *v, u64 pte,
-					u64 itir, u64 va, int type);
-extern struct thash_data *vhpt_lookup(u64 va);
-extern u64 guest_vhpt_lookup(u64 iha, u64 *pte);
-extern void thash_purge_entries(struct kvm_vcpu *v, u64 va, u64 ps);
-extern void thash_purge_entries_remote(struct kvm_vcpu *v, u64 va, u64 ps);
-extern u64 translate_phy_pte(u64 *pte, u64 itir, u64 va);
-extern void thash_purge_and_insert(struct kvm_vcpu *v, u64 pte,
-		u64 itir, u64 ifa, int type);
-extern void thash_purge_all(struct kvm_vcpu *v);
-extern struct thash_data *vtlb_lookup(struct kvm_vcpu *v,
-						u64 va, int is_data);
-extern int vtr_find_overlap(struct kvm_vcpu *vcpu, u64 va,
-						u64 ps, int is_data);
-
-extern void vcpu_increment_iip(struct kvm_vcpu *v);
-extern void vcpu_decrement_iip(struct kvm_vcpu *vcpu);
-extern void vcpu_pend_interrupt(struct kvm_vcpu *vcpu, u8 vec);
-extern void vcpu_unpend_interrupt(struct kvm_vcpu *vcpu, u8 vec);
-extern void data_page_not_present(struct kvm_vcpu *vcpu, u64 vadr);
-extern void dnat_page_consumption(struct kvm_vcpu *vcpu, u64 vadr);
-extern void alt_dtlb(struct kvm_vcpu *vcpu, u64 vadr);
-extern void nested_dtlb(struct kvm_vcpu *vcpu);
-extern void dvhpt_fault(struct kvm_vcpu *vcpu, u64 vadr);
-extern int vhpt_enabled(struct kvm_vcpu *vcpu, u64 vadr, enum vhpt_ref ref);
-
-extern void update_vhpi(struct kvm_vcpu *vcpu, int vec);
-extern int irq_masked(struct kvm_vcpu *vcpu, int h_pending, int h_inservice);
-
-extern int fetch_code(struct kvm_vcpu *vcpu, u64 gip, IA64_BUNDLE *pbundle);
-extern void emulate_io_inst(struct kvm_vcpu *vcpu, u64 padr, u64 ma);
-extern void vmm_transition(struct kvm_vcpu *vcpu);
-extern void vmm_trampoline(union context *from, union context *to);
-extern int vmm_entry(void);
-extern  u64 vcpu_get_itc(struct kvm_vcpu *vcpu);
-
-extern void vmm_reset_entry(void);
-void kvm_init_vtlb(struct kvm_vcpu *v);
-void kvm_init_vhpt(struct kvm_vcpu *v);
-void thash_init(struct thash_cb *hcb, u64 sz);
-
-void panic_vm(struct kvm_vcpu *v, const char *fmt, ...);
-u64 kvm_gpa_to_mpa(u64 gpa);
-extern u64 ia64_call_vsa(u64 proc, u64 arg1, u64 arg2, u64 arg3,
-		u64 arg4, u64 arg5, u64 arg6, u64 arg7);
-
-extern long vmm_sanity;
-
-#endif
-#endif	/* __VCPU_H__ */
diff --git a/arch/ia64/kvm/vmm.c b/arch/ia64/kvm/vmm.c
deleted file mode 100644
index 176a12cd56d..00000000000
--- a/arch/ia64/kvm/vmm.c
+++ /dev/null
@@ -1,99 +0,0 @@
-/*
- * vmm.c: vmm module interface with kvm module
- *
- * Copyright (c) 2007, Intel Corporation.
- *
- *  Xiantao Zhang (xiantao.zhang@intel.com)
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- */
-
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <asm/fpswa.h>
-
-#include "vcpu.h"
-
-MODULE_AUTHOR("Intel");
-MODULE_LICENSE("GPL");
-
-extern char kvm_ia64_ivt;
-extern char kvm_asm_mov_from_ar;
-extern char kvm_asm_mov_from_ar_sn2;
-extern fpswa_interface_t *vmm_fpswa_interface;
-
-long vmm_sanity = 1;
-
-struct kvm_vmm_info vmm_info = {
-	.module			= THIS_MODULE,
-	.vmm_entry		= vmm_entry,
-	.tramp_entry		= vmm_trampoline,
-	.vmm_ivt		= (unsigned long)&kvm_ia64_ivt,
-	.patch_mov_ar		= (unsigned long)&kvm_asm_mov_from_ar,
-	.patch_mov_ar_sn2	= (unsigned long)&kvm_asm_mov_from_ar_sn2,
-};
-
-static int __init  kvm_vmm_init(void)
-{
-
-	vmm_fpswa_interface = fpswa_interface;
-
-	/*Register vmm data to kvm side*/
-	return kvm_init(&vmm_info, 1024, 0, THIS_MODULE);
-}
-
-static void __exit kvm_vmm_exit(void)
-{
-	kvm_exit();
-	return ;
-}
-
-void vmm_spin_lock(vmm_spinlock_t *lock)
-{
-	_vmm_raw_spin_lock(lock);
-}
-
-void vmm_spin_unlock(vmm_spinlock_t *lock)
-{
-	_vmm_raw_spin_unlock(lock);
-}
-
-static void vcpu_debug_exit(struct kvm_vcpu *vcpu)
-{
-	struct exit_ctl_data *p = &vcpu->arch.exit_data;
-	long psr;
-
-	local_irq_save(psr);
-	p->exit_reason = EXIT_REASON_DEBUG;
-	vmm_transition(vcpu);
-	local_irq_restore(psr);
-}
-
-asmlinkage int printk(const char *fmt, ...)
-{
-	struct kvm_vcpu *vcpu = current_vcpu;
-	va_list args;
-	int r;
-
-	memset(vcpu->arch.log_buf, 0, VMM_LOG_LEN);
-	va_start(args, fmt);
-	r = vsnprintf(vcpu->arch.log_buf, VMM_LOG_LEN, fmt, args);
-	va_end(args);
-	vcpu_debug_exit(vcpu);
-	return r;
-}
-
-module_init(kvm_vmm_init)
-module_exit(kvm_vmm_exit)
diff --git a/arch/ia64/kvm/vmm_ivt.S b/arch/ia64/kvm/vmm_ivt.S
deleted file mode 100644
index 397e34a63e1..00000000000
--- a/arch/ia64/kvm/vmm_ivt.S
+++ /dev/null
@@ -1,1392 +0,0 @@
-/*
- * arch/ia64/kvm/vmm_ivt.S
- *
- * Copyright (C) 1998-2001, 2003 Hewlett-Packard Co
- *      Stephane Eranian <eranian@hpl.hp.com>
- *      David Mosberger <davidm@hpl.hp.com>
- * Copyright (C) 2000, 2002-2003 Intel Co
- *      Asit Mallick <asit.k.mallick@intel.com>
- *      Suresh Siddha <suresh.b.siddha@intel.com>
- *      Kenneth Chen <kenneth.w.chen@intel.com>
- *      Fenghua Yu <fenghua.yu@intel.com>
- *
- *
- * 00/08/23 Asit Mallick <asit.k.mallick@intel.com> TLB handling
- * for SMP
- * 00/12/20 David Mosberger-Tang <davidm@hpl.hp.com> DTLB/ITLB
- * handler now uses virtual PT.
- *
- * 07/6/20 Xuefei Xu  (Anthony Xu) (anthony.xu@intel.com)
- *              Supporting Intel virtualization architecture
- *
- */
-
-/*
- * This file defines the interruption vector table used by the CPU.
- * It does not include one entry per possible cause of interruption.
- *
- * The first 20 entries of the table contain 64 bundles each while the
- * remaining 48 entries contain only 16 bundles each.
- *
- * The 64 bundles are used to allow inlining the whole handler for
- * critical
- * interruptions like TLB misses.
- *
- *  For each entry, the comment is as follows:
- *
- *              // 0x1c00 Entry 7 (size 64 bundles) Data Key Miss
- *              (12,51)
- *  entry offset ----/     /         /                  /
- *  /
- *  entry number ---------/         /                  /
- *  /
- *  size of the entry -------------/                  /
- *  /
- *  vector name -------------------------------------/
- *  /
- *  interruptions triggering this vector
- *  ----------------------/
- *
- * The table is 32KB in size and must be aligned on 32KB
- * boundary.
- * (The CPU ignores the 15 lower bits of the address)
- *
- * Table is based upon EAS2.6 (Oct 1999)
- */
-
-
-#include <asm/asmmacro.h>
-#include <asm/cache.h>
-#include <asm/pgtable.h>
-
-#include "asm-offsets.h"
-#include "vcpu.h"
-#include "kvm_minstate.h"
-#include "vti.h"
-
-#if 0
-# define PSR_DEFAULT_BITS   psr.ac
-#else
-# define PSR_DEFAULT_BITS   0
-#endif
-
-#define KVM_FAULT(n)    \
-	kvm_fault_##n:;          \
-	mov r19=n;;          \
-	br.sptk.many kvm_vmm_panic;         \
-	;;                  \
-
-#define KVM_REFLECT(n)    \
-	mov r31=pr;           \
-	mov r19=n;       /* prepare to save predicates */ \
-	mov r29=cr.ipsr;      \
-	;;      \
-	tbit.z p6,p7=r29,IA64_PSR_VM_BIT;       \
-(p7)	br.sptk.many kvm_dispatch_reflection;        \
-	br.sptk.many kvm_vmm_panic;      \
-
-GLOBAL_ENTRY(kvm_vmm_panic)
-	KVM_SAVE_MIN_WITH_COVER_R19
-	alloc r14=ar.pfs,0,0,1,0
-	mov out0=r15
-	adds r3=8,r2                // set up second base pointer
-	;;
-	ssm psr.ic
-	;;
-	srlz.i    // guarantee that interruption collection is on
-	;;
-	(p15) ssm psr.i               // restore psr.
-	addl r14=@gprel(ia64_leave_hypervisor),gp
-	;;
-	KVM_SAVE_REST
-	mov rp=r14
-	;;
-	br.call.sptk.many b6=vmm_panic_handler;
-END(kvm_vmm_panic)
-
-    .section .text..ivt,"ax"
-
-    .align 32768    // align on 32KB boundary
-    .global kvm_ia64_ivt
-kvm_ia64_ivt:
-///////////////////////////////////////////////////////////////
-// 0x0000 Entry 0 (size 64 bundles) VHPT Translation (8,20,47)
-ENTRY(kvm_vhpt_miss)
-	KVM_FAULT(0)
-END(kvm_vhpt_miss)
-
-    .org kvm_ia64_ivt+0x400
-////////////////////////////////////////////////////////////////
-// 0x0400 Entry 1 (size 64 bundles) ITLB (21)
-ENTRY(kvm_itlb_miss)
-	mov r31 = pr
-	mov r29=cr.ipsr;
-	;;
-	tbit.z p6,p7=r29,IA64_PSR_VM_BIT;
-(p6)	br.sptk kvm_alt_itlb_miss
-	mov r19 = 1
-	br.sptk kvm_itlb_miss_dispatch
-	KVM_FAULT(1);
-END(kvm_itlb_miss)
-
-    .org kvm_ia64_ivt+0x0800
-//////////////////////////////////////////////////////////////////
-// 0x0800 Entry 2 (size 64 bundles) DTLB (9,48)
-ENTRY(kvm_dtlb_miss)
-	mov r31 = pr
-	mov r29=cr.ipsr;
-	;;
-	tbit.z p6,p7=r29,IA64_PSR_VM_BIT;
-(p6)	br.sptk kvm_alt_dtlb_miss
-	br.sptk kvm_dtlb_miss_dispatch
-END(kvm_dtlb_miss)
-
-     .org kvm_ia64_ivt+0x0c00
-////////////////////////////////////////////////////////////////////
-// 0x0c00 Entry 3 (size 64 bundles) Alt ITLB (19)
-ENTRY(kvm_alt_itlb_miss)
-	mov r16=cr.ifa    // get address that caused the TLB miss
-	;;
-	movl r17=PAGE_KERNEL
-	mov r24=cr.ipsr
-	movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
-	;;
-	and r19=r19,r16     // clear ed, reserved bits, and PTE control bits
-	;;
-	or r19=r17,r19      // insert PTE control bits into r19
-	;;
-	movl r20=IA64_GRANULE_SHIFT<<2
-	;;
-	mov cr.itir=r20
-	;;
-	itc.i r19		// insert the TLB entry
-	mov pr=r31,-1
-	rfi
-END(kvm_alt_itlb_miss)
-
-    .org kvm_ia64_ivt+0x1000
-/////////////////////////////////////////////////////////////////////
-// 0x1000 Entry 4 (size 64 bundles) Alt DTLB (7,46)
-ENTRY(kvm_alt_dtlb_miss)
-	mov r16=cr.ifa		// get address that caused the TLB miss
-	;;
-	movl r17=PAGE_KERNEL
-	movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
-	mov r24=cr.ipsr
-	;;
-	and r19=r19,r16     // clear ed, reserved bits, and PTE control bits
-	;;
-	or r19=r19,r17	// insert PTE control bits into r19
-	;;
-	movl r20=IA64_GRANULE_SHIFT<<2
-	;;
-	mov cr.itir=r20
-	;;
-	itc.d r19		// insert the TLB entry
-	mov pr=r31,-1
-	rfi
-END(kvm_alt_dtlb_miss)
-
-    .org kvm_ia64_ivt+0x1400
-//////////////////////////////////////////////////////////////////////
-// 0x1400 Entry 5 (size 64 bundles) Data nested TLB (6,45)
-ENTRY(kvm_nested_dtlb_miss)
-	KVM_FAULT(5)
-END(kvm_nested_dtlb_miss)
-
-    .org kvm_ia64_ivt+0x1800
-/////////////////////////////////////////////////////////////////////
-// 0x1800 Entry 6 (size 64 bundles) Instruction Key Miss (24)
-ENTRY(kvm_ikey_miss)
-	KVM_REFLECT(6)
-END(kvm_ikey_miss)
-
-    .org kvm_ia64_ivt+0x1c00
-/////////////////////////////////////////////////////////////////////
-// 0x1c00 Entry 7 (size 64 bundles) Data Key Miss (12,51)
-ENTRY(kvm_dkey_miss)
-	KVM_REFLECT(7)
-END(kvm_dkey_miss)
-
-    .org kvm_ia64_ivt+0x2000
-////////////////////////////////////////////////////////////////////
-// 0x2000 Entry 8 (size 64 bundles) Dirty-bit (54)
-ENTRY(kvm_dirty_bit)
-	KVM_REFLECT(8)
-END(kvm_dirty_bit)
-
-    .org kvm_ia64_ivt+0x2400
-////////////////////////////////////////////////////////////////////
-// 0x2400 Entry 9 (size 64 bundles) Instruction Access-bit (27)
-ENTRY(kvm_iaccess_bit)
-	KVM_REFLECT(9)
-END(kvm_iaccess_bit)
-
-    .org kvm_ia64_ivt+0x2800
-///////////////////////////////////////////////////////////////////
-// 0x2800 Entry 10 (size 64 bundles) Data Access-bit (15,55)
-ENTRY(kvm_daccess_bit)
-	KVM_REFLECT(10)
-END(kvm_daccess_bit)
-
-    .org kvm_ia64_ivt+0x2c00
-/////////////////////////////////////////////////////////////////
-// 0x2c00 Entry 11 (size 64 bundles) Break instruction (33)
-ENTRY(kvm_break_fault)
-	mov r31=pr
-	mov r19=11
-	mov r29=cr.ipsr
-	;;
-	KVM_SAVE_MIN_WITH_COVER_R19
-	;;
-	alloc r14=ar.pfs,0,0,4,0 //(must be first in insn group!)
-	mov out0=cr.ifa
-	mov out2=cr.isr     // FIXME: pity to make this slow access twice
-	mov out3=cr.iim     // FIXME: pity to make this slow access twice
-	adds r3=8,r2                // set up second base pointer
-	;;
-	ssm psr.ic
-	;;
-	srlz.i         // guarantee that interruption collection is on
-	;;
-	(p15)ssm psr.i               // restore psr.i
-	addl r14=@gprel(ia64_leave_hypervisor),gp
-	;;
-	KVM_SAVE_REST
-	mov rp=r14
-	;;
-	adds out1=16,sp
-	br.call.sptk.many b6=kvm_ia64_handle_break
-	;;
-END(kvm_break_fault)
-
-    .org kvm_ia64_ivt+0x3000
-/////////////////////////////////////////////////////////////////
-// 0x3000 Entry 12 (size 64 bundles) External Interrupt (4)
-ENTRY(kvm_interrupt)
-	mov r31=pr		// prepare to save predicates
-	mov r19=12
-	mov r29=cr.ipsr
-	;;
-	tbit.z p6,p7=r29,IA64_PSR_VM_BIT
-	tbit.z p0,p15=r29,IA64_PSR_I_BIT
-	;;
-(p7)	br.sptk kvm_dispatch_interrupt
-	;;
-	mov r27=ar.rsc		/* M */
-	mov r20=r1			/* A */
-	mov r25=ar.unat		/* M */
-	mov r26=ar.pfs		/* I */
-	mov r28=cr.iip		/* M */
-	cover			/* B (or nothing) */
-	;;
-	mov r1=sp
-	;;
-	invala			/* M */
-	mov r30=cr.ifs
-	;;
-	addl r1=-VMM_PT_REGS_SIZE,r1
-	;;
-	adds r17=2*L1_CACHE_BYTES,r1	/* really: biggest cache-line size */
-	adds r16=PT(CR_IPSR),r1
-	;;
-	lfetch.fault.excl.nt1 [r17],L1_CACHE_BYTES
-	st8 [r16]=r29			/* save cr.ipsr */
-	;;
-	lfetch.fault.excl.nt1 [r17]
-	mov r29=b0
-	;;
-	adds r16=PT(R8),r1  	/* initialize first base pointer */
-	adds r17=PT(R9),r1  	/* initialize second base pointer */
-	mov r18=r0      		/* make sure r18 isn't NaT */
-	;;
-.mem.offset 0,0; st8.spill [r16]=r8,16
-.mem.offset 8,0; st8.spill [r17]=r9,16
-        ;;
-.mem.offset 0,0; st8.spill [r16]=r10,24
-.mem.offset 8,0; st8.spill [r17]=r11,24
-        ;;
-	st8 [r16]=r28,16		/* save cr.iip */
-	st8 [r17]=r30,16		/* save cr.ifs */
-	mov r8=ar.fpsr		/* M */
-	mov r9=ar.csd
-	mov r10=ar.ssd
-	movl r11=FPSR_DEFAULT	/* L-unit */
-	;;
-	st8 [r16]=r25,16		/* save ar.unat */
-	st8 [r17]=r26,16		/* save ar.pfs */
-	shl r18=r18,16		/* compute ar.rsc to be used for "loadrs" */
-	;;
-	st8 [r16]=r27,16		/* save ar.rsc */
-	adds r17=16,r17		/* skip over ar_rnat field */
-	;;
-	st8 [r17]=r31,16		/* save predicates */
-	adds r16=16,r16		/* skip over ar_bspstore field */
-	;;
-	st8 [r16]=r29,16		/* save b0 */
-	st8 [r17]=r18,16		/* save ar.rsc value for "loadrs" */
-	;;
-.mem.offset 0,0; st8.spill [r16]=r20,16    /* save original r1 */
-.mem.offset 8,0; st8.spill [r17]=r12,16
-	adds r12=-16,r1
-	/* switch to kernel memory stack (with 16 bytes of scratch) */
-	;;
-.mem.offset 0,0; st8.spill [r16]=r13,16
-.mem.offset 8,0; st8.spill [r17]=r8,16 /* save ar.fpsr */
-	;;
-.mem.offset 0,0; st8.spill [r16]=r15,16
-.mem.offset 8,0; st8.spill [r17]=r14,16
-	dep r14=-1,r0,60,4
-	;;
-.mem.offset 0,0; st8.spill [r16]=r2,16
-.mem.offset 8,0; st8.spill [r17]=r3,16
-	adds r2=VMM_PT_REGS_R16_OFFSET,r1
-	adds r14 = VMM_VCPU_GP_OFFSET,r13
-	;;
-	mov r8=ar.ccv
-	ld8 r14 = [r14]
-	;;
-	mov r1=r14       /* establish kernel global pointer */
-	;;                                          \
-	bsw.1
-	;;
-	alloc r14=ar.pfs,0,0,1,0	// must be first in an insn group
-	mov out0=r13
-	;;
-	ssm psr.ic
-	;;
-	srlz.i
-	;;
-	//(p15) ssm psr.i
-	adds r3=8,r2		// set up second base pointer for SAVE_REST
-	srlz.i			// ensure everybody knows psr.ic is back on
-	;;
-.mem.offset 0,0; st8.spill [r2]=r16,16
-.mem.offset 8,0; st8.spill [r3]=r17,16
-	;;
-.mem.offset 0,0; st8.spill [r2]=r18,16
-.mem.offset 8,0; st8.spill [r3]=r19,16
-	;;
-.mem.offset 0,0; st8.spill [r2]=r20,16
-.mem.offset 8,0; st8.spill [r3]=r21,16
-	mov r18=b6
-	;;
-.mem.offset 0,0; st8.spill [r2]=r22,16
-.mem.offset 8,0; st8.spill [r3]=r23,16
-	mov r19=b7
-	;;
-.mem.offset 0,0; st8.spill [r2]=r24,16
-.mem.offset 8,0; st8.spill [r3]=r25,16
-	;;
-.mem.offset 0,0; st8.spill [r2]=r26,16
-.mem.offset 8,0; st8.spill [r3]=r27,16
-	;;
-.mem.offset 0,0; st8.spill [r2]=r28,16
-.mem.offset 8,0; st8.spill [r3]=r29,16
-	;;
-.mem.offset 0,0; st8.spill [r2]=r30,16
-.mem.offset 8,0; st8.spill [r3]=r31,32
-	;;
-	mov ar.fpsr=r11       /* M-unit */
-	st8 [r2]=r8,8         /* ar.ccv */
-	adds r24=PT(B6)-PT(F7),r3
-	;;
-	stf.spill [r2]=f6,32
-	stf.spill [r3]=f7,32
-	;;
-	stf.spill [r2]=f8,32
-	stf.spill [r3]=f9,32
-	;;
-	stf.spill [r2]=f10
-	stf.spill [r3]=f11
-	adds r25=PT(B7)-PT(F11),r3
-	;;
-	st8 [r24]=r18,16       /* b6 */
-	st8 [r25]=r19,16       /* b7 */
-	;;
-	st8 [r24]=r9           /* ar.csd */
-	st8 [r25]=r10          /* ar.ssd */
-	;;
-	srlz.d		// make sure we see the effect of cr.ivr
-	addl r14=@gprel(ia64_leave_nested),gp
-	;;
-	mov rp=r14
-	br.call.sptk.many b6=kvm_ia64_handle_irq
-	;;
-END(kvm_interrupt)
-
-    .global kvm_dispatch_vexirq
-    .org kvm_ia64_ivt+0x3400
-//////////////////////////////////////////////////////////////////////
-// 0x3400 Entry 13 (size 64 bundles) Reserved
-ENTRY(kvm_virtual_exirq)
-	mov r31=pr
-	mov r19=13
-	mov r30 =r0
-	;;
-kvm_dispatch_vexirq:
-	cmp.eq p6,p0 = 1,r30
-	;;
-(p6)	add r29 = VMM_VCPU_SAVED_GP_OFFSET,r21
-	;;
-(p6)	ld8 r1 = [r29]
-	;;
-	KVM_SAVE_MIN_WITH_COVER_R19
-	alloc r14=ar.pfs,0,0,1,0
-	mov out0=r13
-
-	ssm psr.ic
-	;;
-	srlz.i // guarantee that interruption collection is on
-	;;
-	(p15) ssm psr.i               // restore psr.i
-	adds r3=8,r2                // set up second base pointer
-	;;
-	KVM_SAVE_REST
-	addl r14=@gprel(ia64_leave_hypervisor),gp
-	;;
-	mov rp=r14
-	br.call.sptk.many b6=kvm_vexirq
-END(kvm_virtual_exirq)
-
-    .org kvm_ia64_ivt+0x3800
-/////////////////////////////////////////////////////////////////////
-// 0x3800 Entry 14 (size 64 bundles) Reserved
-	KVM_FAULT(14)
-	// this code segment is from 2.6.16.13
-
-    .org kvm_ia64_ivt+0x3c00
-///////////////////////////////////////////////////////////////////////
-// 0x3c00 Entry 15 (size 64 bundles) Reserved
-	KVM_FAULT(15)
-
-    .org kvm_ia64_ivt+0x4000
-///////////////////////////////////////////////////////////////////////
-// 0x4000 Entry 16 (size 64 bundles) Reserved
-	KVM_FAULT(16)
-
-    .org kvm_ia64_ivt+0x4400
-//////////////////////////////////////////////////////////////////////
-// 0x4400 Entry 17 (size 64 bundles) Reserved
-	KVM_FAULT(17)
-
-    .org kvm_ia64_ivt+0x4800
-//////////////////////////////////////////////////////////////////////
-// 0x4800 Entry 18 (size 64 bundles) Reserved
-	KVM_FAULT(18)
-
-    .org kvm_ia64_ivt+0x4c00
-//////////////////////////////////////////////////////////////////////
-// 0x4c00 Entry 19 (size 64 bundles) Reserved
-	KVM_FAULT(19)
-
-    .org kvm_ia64_ivt+0x5000
-//////////////////////////////////////////////////////////////////////
-// 0x5000 Entry 20 (size 16 bundles) Page Not Present
-ENTRY(kvm_page_not_present)
-	KVM_REFLECT(20)
-END(kvm_page_not_present)
-
-    .org kvm_ia64_ivt+0x5100
-///////////////////////////////////////////////////////////////////////
-// 0x5100 Entry 21 (size 16 bundles) Key Permission vector
-ENTRY(kvm_key_permission)
-	KVM_REFLECT(21)
-END(kvm_key_permission)
-
-    .org kvm_ia64_ivt+0x5200
-//////////////////////////////////////////////////////////////////////
-// 0x5200 Entry 22 (size 16 bundles) Instruction Access Rights (26)
-ENTRY(kvm_iaccess_rights)
-	KVM_REFLECT(22)
-END(kvm_iaccess_rights)
-
-    .org kvm_ia64_ivt+0x5300
-//////////////////////////////////////////////////////////////////////
-// 0x5300 Entry 23 (size 16 bundles) Data Access Rights (14,53)
-ENTRY(kvm_daccess_rights)
-	KVM_REFLECT(23)
-END(kvm_daccess_rights)
-
-    .org kvm_ia64_ivt+0x5400
-/////////////////////////////////////////////////////////////////////
-// 0x5400 Entry 24 (size 16 bundles) General Exception (5,32,34,36,38,39)
-ENTRY(kvm_general_exception)
-	KVM_REFLECT(24)
-	KVM_FAULT(24)
-END(kvm_general_exception)
-
-    .org kvm_ia64_ivt+0x5500
-//////////////////////////////////////////////////////////////////////
-// 0x5500 Entry 25 (size 16 bundles) Disabled FP-Register (35)
-ENTRY(kvm_disabled_fp_reg)
-	KVM_REFLECT(25)
-END(kvm_disabled_fp_reg)
-
-    .org kvm_ia64_ivt+0x5600
-////////////////////////////////////////////////////////////////////
-// 0x5600 Entry 26 (size 16 bundles) Nat Consumption (11,23,37,50)
-ENTRY(kvm_nat_consumption)
-	KVM_REFLECT(26)
-END(kvm_nat_consumption)
-
-    .org kvm_ia64_ivt+0x5700
-/////////////////////////////////////////////////////////////////////
-// 0x5700 Entry 27 (size 16 bundles) Speculation (40)
-ENTRY(kvm_speculation_vector)
-	KVM_REFLECT(27)
-END(kvm_speculation_vector)
-
-    .org kvm_ia64_ivt+0x5800
-/////////////////////////////////////////////////////////////////////
-// 0x5800 Entry 28 (size 16 bundles) Reserved
-	KVM_FAULT(28)
-
-    .org kvm_ia64_ivt+0x5900
-///////////////////////////////////////////////////////////////////
-// 0x5900 Entry 29 (size 16 bundles) Debug (16,28,56)
-ENTRY(kvm_debug_vector)
-	KVM_FAULT(29)
-END(kvm_debug_vector)
-
-    .org kvm_ia64_ivt+0x5a00
-///////////////////////////////////////////////////////////////
-// 0x5a00 Entry 30 (size 16 bundles) Unaligned Reference (57)
-ENTRY(kvm_unaligned_access)
-	KVM_REFLECT(30)
-END(kvm_unaligned_access)
-
-    .org kvm_ia64_ivt+0x5b00
-//////////////////////////////////////////////////////////////////////
-// 0x5b00 Entry 31 (size 16 bundles) Unsupported Data Reference (57)
-ENTRY(kvm_unsupported_data_reference)
-	KVM_REFLECT(31)
-END(kvm_unsupported_data_reference)
-
-    .org kvm_ia64_ivt+0x5c00
-////////////////////////////////////////////////////////////////////
-// 0x5c00 Entry 32 (size 16 bundles) Floating Point FAULT (65)
-ENTRY(kvm_floating_point_fault)
-	KVM_REFLECT(32)
-END(kvm_floating_point_fault)
-
-    .org kvm_ia64_ivt+0x5d00
-/////////////////////////////////////////////////////////////////////
-// 0x5d00 Entry 33 (size 16 bundles) Floating Point Trap (66)
-ENTRY(kvm_floating_point_trap)
-	KVM_REFLECT(33)
-END(kvm_floating_point_trap)
-
-    .org kvm_ia64_ivt+0x5e00
-//////////////////////////////////////////////////////////////////////
-// 0x5e00 Entry 34 (size 16 bundles) Lower Privilege Transfer Trap (66)
-ENTRY(kvm_lower_privilege_trap)
-	KVM_REFLECT(34)
-END(kvm_lower_privilege_trap)
-
-    .org kvm_ia64_ivt+0x5f00
-//////////////////////////////////////////////////////////////////////
-// 0x5f00 Entry 35 (size 16 bundles) Taken Branch Trap (68)
-ENTRY(kvm_taken_branch_trap)
-	KVM_REFLECT(35)
-END(kvm_taken_branch_trap)
-
-    .org kvm_ia64_ivt+0x6000
-////////////////////////////////////////////////////////////////////
-// 0x6000 Entry 36 (size 16 bundles) Single Step Trap (69)
-ENTRY(kvm_single_step_trap)
-	KVM_REFLECT(36)
-END(kvm_single_step_trap)
-    .global kvm_virtualization_fault_back
-    .org kvm_ia64_ivt+0x6100
-/////////////////////////////////////////////////////////////////////
-// 0x6100 Entry 37 (size 16 bundles) Virtualization Fault
-ENTRY(kvm_virtualization_fault)
-	mov r31=pr
-	adds r16 = VMM_VCPU_SAVED_GP_OFFSET,r21
-	;;
-	st8 [r16] = r1
-	adds r17 = VMM_VCPU_GP_OFFSET, r21
-	;;
-	ld8 r1 = [r17]
-	cmp.eq p6,p0=EVENT_MOV_FROM_AR,r24
-	cmp.eq p7,p0=EVENT_MOV_FROM_RR,r24
-	cmp.eq p8,p0=EVENT_MOV_TO_RR,r24
-	cmp.eq p9,p0=EVENT_RSM,r24
-	cmp.eq p10,p0=EVENT_SSM,r24
-	cmp.eq p11,p0=EVENT_MOV_TO_PSR,r24
-	cmp.eq p12,p0=EVENT_THASH,r24
-(p6)	br.dptk.many kvm_asm_mov_from_ar
-(p7)	br.dptk.many kvm_asm_mov_from_rr
-(p8)	br.dptk.many kvm_asm_mov_to_rr
-(p9)	br.dptk.many kvm_asm_rsm
-(p10)	br.dptk.many kvm_asm_ssm
-(p11)	br.dptk.many kvm_asm_mov_to_psr
-(p12)	br.dptk.many kvm_asm_thash
-	;;
-kvm_virtualization_fault_back:
-	adds r16 = VMM_VCPU_SAVED_GP_OFFSET,r21
-	;;
-	ld8 r1 = [r16]
-	;;
-	mov r19=37
-	adds r16 = VMM_VCPU_CAUSE_OFFSET,r21
-	adds r17 = VMM_VCPU_OPCODE_OFFSET,r21
-	;;
-	st8 [r16] = r24
-	st8 [r17] = r25
-	;;
-	cmp.ne p6,p0=EVENT_RFI, r24
-(p6)	br.sptk kvm_dispatch_virtualization_fault
-	;;
-	adds r18=VMM_VPD_BASE_OFFSET,r21
-	;;
-	ld8 r18=[r18]
-	;;
-	adds r18=VMM_VPD_VIFS_OFFSET,r18
-	;;
-	ld8 r18=[r18]
-	;;
-	tbit.z p6,p0=r18,63
-(p6)	br.sptk kvm_dispatch_virtualization_fault
-	;;
-//if vifs.v=1 desert current register frame
-	alloc r18=ar.pfs,0,0,0,0
-	br.sptk kvm_dispatch_virtualization_fault
-END(kvm_virtualization_fault)
-
-    .org kvm_ia64_ivt+0x6200
-//////////////////////////////////////////////////////////////
-// 0x6200 Entry 38 (size 16 bundles) Reserved
-	KVM_FAULT(38)
-
-    .org kvm_ia64_ivt+0x6300
-/////////////////////////////////////////////////////////////////
-// 0x6300 Entry 39 (size 16 bundles) Reserved
-	KVM_FAULT(39)
-
-    .org kvm_ia64_ivt+0x6400
-/////////////////////////////////////////////////////////////////
-// 0x6400 Entry 40 (size 16 bundles) Reserved
-	KVM_FAULT(40)
-
-    .org kvm_ia64_ivt+0x6500
-//////////////////////////////////////////////////////////////////
-// 0x6500 Entry 41 (size 16 bundles) Reserved
-	KVM_FAULT(41)
-
-    .org kvm_ia64_ivt+0x6600
-//////////////////////////////////////////////////////////////////
-// 0x6600 Entry 42 (size 16 bundles) Reserved
-	KVM_FAULT(42)
-
-    .org kvm_ia64_ivt+0x6700
-//////////////////////////////////////////////////////////////////
-// 0x6700 Entry 43 (size 16 bundles) Reserved
-	KVM_FAULT(43)
-
-    .org kvm_ia64_ivt+0x6800
-//////////////////////////////////////////////////////////////////
-// 0x6800 Entry 44 (size 16 bundles) Reserved
-	KVM_FAULT(44)
-
-    .org kvm_ia64_ivt+0x6900
-///////////////////////////////////////////////////////////////////
-// 0x6900 Entry 45 (size 16 bundles) IA-32 Exeception
-//(17,18,29,41,42,43,44,58,60,61,62,72,73,75,76,77)
-ENTRY(kvm_ia32_exception)
-	KVM_FAULT(45)
-END(kvm_ia32_exception)
-
-    .org kvm_ia64_ivt+0x6a00
-////////////////////////////////////////////////////////////////////
-// 0x6a00 Entry 46 (size 16 bundles) IA-32 Intercept  (30,31,59,70,71)
-ENTRY(kvm_ia32_intercept)
-	KVM_FAULT(47)
-END(kvm_ia32_intercept)
-
-    .org kvm_ia64_ivt+0x6c00
-/////////////////////////////////////////////////////////////////////
-// 0x6c00 Entry 48 (size 16 bundles) Reserved
-	KVM_FAULT(48)
-
-    .org kvm_ia64_ivt+0x6d00
-//////////////////////////////////////////////////////////////////////
-// 0x6d00 Entry 49 (size 16 bundles) Reserved
-	KVM_FAULT(49)
-
-    .org kvm_ia64_ivt+0x6e00
-//////////////////////////////////////////////////////////////////////
-// 0x6e00 Entry 50 (size 16 bundles) Reserved
-	KVM_FAULT(50)
-
-    .org kvm_ia64_ivt+0x6f00
-/////////////////////////////////////////////////////////////////////
-// 0x6f00 Entry 51 (size 16 bundles) Reserved
-	KVM_FAULT(52)
-
-    .org kvm_ia64_ivt+0x7100
-////////////////////////////////////////////////////////////////////
-// 0x7100 Entry 53 (size 16 bundles) Reserved
-	KVM_FAULT(53)
-
-    .org kvm_ia64_ivt+0x7200
-/////////////////////////////////////////////////////////////////////
-// 0x7200 Entry 54 (size 16 bundles) Reserved
-	KVM_FAULT(54)
-
-    .org kvm_ia64_ivt+0x7300
-////////////////////////////////////////////////////////////////////
-// 0x7300 Entry 55 (size 16 bundles) Reserved
-	KVM_FAULT(55)
-
-    .org kvm_ia64_ivt+0x7400
-////////////////////////////////////////////////////////////////////
-// 0x7400 Entry 56 (size 16 bundles) Reserved
-	KVM_FAULT(56)
-
-    .org kvm_ia64_ivt+0x7500
-/////////////////////////////////////////////////////////////////////
-// 0x7500 Entry 57 (size 16 bundles) Reserved
-	KVM_FAULT(57)
-
-    .org kvm_ia64_ivt+0x7600
-/////////////////////////////////////////////////////////////////////
-// 0x7600 Entry 58 (size 16 bundles) Reserved
-	KVM_FAULT(58)
-
-    .org kvm_ia64_ivt+0x7700
-////////////////////////////////////////////////////////////////////
-// 0x7700 Entry 59 (size 16 bundles) Reserved
-	KVM_FAULT(59)
-
-    .org kvm_ia64_ivt+0x7800
-////////////////////////////////////////////////////////////////////
-// 0x7800 Entry 60 (size 16 bundles) Reserved
-	KVM_FAULT(60)
-
-    .org kvm_ia64_ivt+0x7900
-/////////////////////////////////////////////////////////////////////
-// 0x7900 Entry 61 (size 16 bundles) Reserved
-	KVM_FAULT(61)
-
-    .org kvm_ia64_ivt+0x7a00
-/////////////////////////////////////////////////////////////////////
-// 0x7a00 Entry 62 (size 16 bundles) Reserved
-	KVM_FAULT(62)
-
-    .org kvm_ia64_ivt+0x7b00
-/////////////////////////////////////////////////////////////////////
-// 0x7b00 Entry 63 (size 16 bundles) Reserved
-	KVM_FAULT(63)
-
-    .org kvm_ia64_ivt+0x7c00
-////////////////////////////////////////////////////////////////////
-// 0x7c00 Entry 64 (size 16 bundles) Reserved
-	KVM_FAULT(64)
-
-    .org kvm_ia64_ivt+0x7d00
-/////////////////////////////////////////////////////////////////////
-// 0x7d00 Entry 65 (size 16 bundles) Reserved
-	KVM_FAULT(65)
-
-    .org kvm_ia64_ivt+0x7e00
-/////////////////////////////////////////////////////////////////////
-// 0x7e00 Entry 66 (size 16 bundles) Reserved
-	KVM_FAULT(66)
-
-    .org kvm_ia64_ivt+0x7f00
-////////////////////////////////////////////////////////////////////
-// 0x7f00 Entry 67 (size 16 bundles) Reserved
-	KVM_FAULT(67)
-
-    .org kvm_ia64_ivt+0x8000
-// There is no particular reason for this code to be here, other than that
-// there happens to be space here that would go unused otherwise.  If this
-// fault ever gets "unreserved", simply moved the following code to a more
-// suitable spot...
-
-
-ENTRY(kvm_dtlb_miss_dispatch)
-	mov r19 = 2
-	KVM_SAVE_MIN_WITH_COVER_R19
-	alloc r14=ar.pfs,0,0,3,0
-	mov out0=cr.ifa
-	mov out1=r15
-	adds r3=8,r2                // set up second base pointer
-	;;
-	ssm psr.ic
-	;;
-	srlz.i     // guarantee that interruption collection is on
-	;;
-	(p15) ssm psr.i               // restore psr.i
-	addl r14=@gprel(ia64_leave_hypervisor_prepare),gp
-	;;
-	KVM_SAVE_REST
-	KVM_SAVE_EXTRA
-	mov rp=r14
-	;;
-	adds out2=16,r12
-	br.call.sptk.many b6=kvm_page_fault
-END(kvm_dtlb_miss_dispatch)
-
-ENTRY(kvm_itlb_miss_dispatch)
-
-	KVM_SAVE_MIN_WITH_COVER_R19
-	alloc r14=ar.pfs,0,0,3,0
-	mov out0=cr.ifa
-	mov out1=r15
-	adds r3=8,r2                // set up second base pointer
-	;;
-	ssm psr.ic
-	;;
-	srlz.i   // guarantee that interruption collection is on
-	;;
-	(p15) ssm psr.i               // restore psr.i
-	addl r14=@gprel(ia64_leave_hypervisor),gp
-	;;
-	KVM_SAVE_REST
-	mov rp=r14
-	;;
-	adds out2=16,r12
-	br.call.sptk.many b6=kvm_page_fault
-END(kvm_itlb_miss_dispatch)
-
-ENTRY(kvm_dispatch_reflection)
-/*
- * Input:
- *  psr.ic: off
- *  r19:    intr type (offset into ivt, see ia64_int.h)
- *  r31:    contains saved predicates (pr)
- */
-	KVM_SAVE_MIN_WITH_COVER_R19
-	alloc r14=ar.pfs,0,0,5,0
-	mov out0=cr.ifa
-	mov out1=cr.isr
-	mov out2=cr.iim
-	mov out3=r15
-	adds r3=8,r2                // set up second base pointer
-	;;
-	ssm psr.ic
-	;;
-	srlz.i   // guarantee that interruption collection is on
-	;;
-	(p15) ssm psr.i               // restore psr.i
-	addl r14=@gprel(ia64_leave_hypervisor),gp
-	;;
-	KVM_SAVE_REST
-	mov rp=r14
-	;;
-	adds out4=16,r12
-	br.call.sptk.many b6=reflect_interruption
-END(kvm_dispatch_reflection)
-
-ENTRY(kvm_dispatch_virtualization_fault)
-	adds r16 = VMM_VCPU_CAUSE_OFFSET,r21
-	adds r17 = VMM_VCPU_OPCODE_OFFSET,r21
-	;;
-	st8 [r16] = r24
-	st8 [r17] = r25
-	;;
-	KVM_SAVE_MIN_WITH_COVER_R19
-	;;
-	alloc r14=ar.pfs,0,0,2,0 // (must be first in insn group!)
-	mov out0=r13        //vcpu
-	adds r3=8,r2                // set up second base pointer
-	;;
-	ssm psr.ic
-	;;
-	srlz.i    // guarantee that interruption collection is on
-	;;
-	(p15) ssm psr.i               // restore psr.i
-	addl r14=@gprel(ia64_leave_hypervisor_prepare),gp
-	;;
-	KVM_SAVE_REST
-	KVM_SAVE_EXTRA
-	mov rp=r14
-	;;
-	adds out1=16,sp         //regs
-	br.call.sptk.many b6=kvm_emulate
-END(kvm_dispatch_virtualization_fault)
-
-
-ENTRY(kvm_dispatch_interrupt)
-	KVM_SAVE_MIN_WITH_COVER_R19	// uses r31; defines r2 and r3
-	;;
-	alloc r14=ar.pfs,0,0,1,0 // must be first in an insn group
-	adds r3=8,r2		// set up second base pointer for SAVE_REST
-	;;
-	ssm psr.ic
-	;;
-	srlz.i
-	;;
-	(p15) ssm psr.i
-	addl r14=@gprel(ia64_leave_hypervisor),gp
-	;;
-	KVM_SAVE_REST
-	mov rp=r14
-	;;
-	mov out0=r13		// pass pointer to pt_regs as second arg
-	br.call.sptk.many b6=kvm_ia64_handle_irq
-END(kvm_dispatch_interrupt)
-
-GLOBAL_ENTRY(ia64_leave_nested)
-	rsm psr.i
-	;;
-	adds r21=PT(PR)+16,r12
-	;;
-	lfetch [r21],PT(CR_IPSR)-PT(PR)
-	adds r2=PT(B6)+16,r12
-	adds r3=PT(R16)+16,r12
-	;;
-	lfetch [r21]
-	ld8 r28=[r2],8		// load b6
-	adds r29=PT(R24)+16,r12
-
-	ld8.fill r16=[r3]
-	adds r3=PT(AR_CSD)-PT(R16),r3
-	adds r30=PT(AR_CCV)+16,r12
-	;;
-	ld8.fill r24=[r29]
-	ld8 r15=[r30]		// load ar.ccv
-	;;
-	ld8 r29=[r2],16		// load b7
-	ld8 r30=[r3],16		// load ar.csd
-	;;
-	ld8 r31=[r2],16		// load ar.ssd
-	ld8.fill r8=[r3],16
-	;;
-	ld8.fill r9=[r2],16
-	ld8.fill r10=[r3],PT(R17)-PT(R10)
-	;;
-	ld8.fill r11=[r2],PT(R18)-PT(R11)
-	ld8.fill r17=[r3],16
-	;;
-	ld8.fill r18=[r2],16
-	ld8.fill r19=[r3],16
-	;;
-	ld8.fill r20=[r2],16
-	ld8.fill r21=[r3],16
-	mov ar.csd=r30
-	mov ar.ssd=r31
-	;;
-	rsm psr.i | psr.ic
-	// initiate turning off of interrupt and interruption collection
-	invala			// invalidate ALAT
-	;;
-	srlz.i
-	;;
-	ld8.fill r22=[r2],24
-	ld8.fill r23=[r3],24
-	mov b6=r28
-	;;
-	ld8.fill r25=[r2],16
-	ld8.fill r26=[r3],16
-	mov b7=r29
-	;;
-	ld8.fill r27=[r2],16
-	ld8.fill r28=[r3],16
-	;;
-	ld8.fill r29=[r2],16
-	ld8.fill r30=[r3],24
-	;;
-	ld8.fill r31=[r2],PT(F9)-PT(R31)
-	adds r3=PT(F10)-PT(F6),r3
-	;;
-	ldf.fill f9=[r2],PT(F6)-PT(F9)
-	ldf.fill f10=[r3],PT(F8)-PT(F10)
-	;;
-	ldf.fill f6=[r2],PT(F7)-PT(F6)
-	;;
-	ldf.fill f7=[r2],PT(F11)-PT(F7)
-	ldf.fill f8=[r3],32
-	;;
-	srlz.i			// ensure interruption collection is off
-	mov ar.ccv=r15
-	;;
-	bsw.0	// switch back to bank 0 (no stop bit required beforehand...)
-	;;
-	ldf.fill f11=[r2]
-//	mov r18=r13
-//	mov r21=r13
-	adds r16=PT(CR_IPSR)+16,r12
-	adds r17=PT(CR_IIP)+16,r12
-	;;
-	ld8 r29=[r16],16	// load cr.ipsr
-	ld8 r28=[r17],16	// load cr.iip
-	;;
-	ld8 r30=[r16],16	// load cr.ifs
-	ld8 r25=[r17],16	// load ar.unat
-	;;
-	ld8 r26=[r16],16	// load ar.pfs
-	ld8 r27=[r17],16	// load ar.rsc
-	cmp.eq p9,p0=r0,r0
-	// set p9 to indicate that we should restore cr.ifs
-	;;
-	ld8 r24=[r16],16	// load ar.rnat (may be garbage)
-	ld8 r23=[r17],16// load ar.bspstore (may be garbage)
-	;;
-	ld8 r31=[r16],16	// load predicates
-	ld8 r22=[r17],16	// load b0
-	;;
-	ld8 r19=[r16],16	// load ar.rsc value for "loadrs"
-	ld8.fill r1=[r17],16	// load r1
-	;;
-	ld8.fill r12=[r16],16
-	ld8.fill r13=[r17],16
-	;;
-	ld8 r20=[r16],16	// ar.fpsr
-	ld8.fill r15=[r17],16
-	;;
-	ld8.fill r14=[r16],16
-	ld8.fill r2=[r17]
-	;;
-	ld8.fill r3=[r16]
-	;;
-	mov r16=ar.bsp		// get existing backing store pointer
-	;;
-	mov b0=r22
-	mov ar.pfs=r26
-	mov cr.ifs=r30
-	mov cr.ipsr=r29
-	mov ar.fpsr=r20
-	mov cr.iip=r28
-	;;
-	mov ar.rsc=r27
-	mov ar.unat=r25
-	mov pr=r31,-1
-	rfi
-END(ia64_leave_nested)
-
-GLOBAL_ENTRY(ia64_leave_hypervisor_prepare)
-/*
- * work.need_resched etc. mustn't get changed
- *by this CPU before it returns to
- * user- or fsys-mode, hence we disable interrupts early on:
- */
-	adds r2 = PT(R4)+16,r12
-	adds r3 = PT(R5)+16,r12
-	adds r8 = PT(EML_UNAT)+16,r12
-	;;
-	ld8 r8 = [r8]
-	;;
-	mov ar.unat=r8
-	;;
-	ld8.fill r4=[r2],16    //load r4
-	ld8.fill r5=[r3],16    //load r5
-	;;
-	ld8.fill r6=[r2]    //load r6
-	ld8.fill r7=[r3]    //load r7
-	;;
-END(ia64_leave_hypervisor_prepare)
-//fall through
-GLOBAL_ENTRY(ia64_leave_hypervisor)
-	rsm psr.i
-	;;
-	br.call.sptk.many b0=leave_hypervisor_tail
-	;;
-	adds r20=PT(PR)+16,r12
-	adds r8=PT(EML_UNAT)+16,r12
-	;;
-	ld8 r8=[r8]
-	;;
-	mov ar.unat=r8
-	;;
-	lfetch [r20],PT(CR_IPSR)-PT(PR)
-	adds r2 = PT(B6)+16,r12
-	adds r3 = PT(B7)+16,r12
-	;;
-	lfetch [r20]
-	;;
-	ld8 r24=[r2],16        /* B6 */
-	ld8 r25=[r3],16        /* B7 */
-	;;
-	ld8 r26=[r2],16        /* ar_csd */
-	ld8 r27=[r3],16        /* ar_ssd */
-	mov b6 = r24
-	;;
-	ld8.fill r8=[r2],16
-	ld8.fill r9=[r3],16
-	mov b7 = r25
-	;;
-	mov ar.csd = r26
-	mov ar.ssd = r27
-	;;
-	ld8.fill r10=[r2],PT(R15)-PT(R10)
-	ld8.fill r11=[r3],PT(R14)-PT(R11)
-	;;
-	ld8.fill r15=[r2],PT(R16)-PT(R15)
-	ld8.fill r14=[r3],PT(R17)-PT(R14)
-	;;
-	ld8.fill r16=[r2],16
-	ld8.fill r17=[r3],16
-	;;
-	ld8.fill r18=[r2],16
-	ld8.fill r19=[r3],16
-	;;
-	ld8.fill r20=[r2],16
-	ld8.fill r21=[r3],16
-	;;
-	ld8.fill r22=[r2],16
-	ld8.fill r23=[r3],16
-	;;
-	ld8.fill r24=[r2],16
-	ld8.fill r25=[r3],16
-	;;
-	ld8.fill r26=[r2],16
-	ld8.fill r27=[r3],16
-	;;
-	ld8.fill r28=[r2],16
-	ld8.fill r29=[r3],16
-	;;
-	ld8.fill r30=[r2],PT(F6)-PT(R30)
-	ld8.fill r31=[r3],PT(F7)-PT(R31)
-	;;
-	rsm psr.i | psr.ic
-	// initiate turning off of interrupt and interruption collection
-	invala          // invalidate ALAT
-	;;
-	srlz.i          // ensure interruption collection is off
-	;;
-	bsw.0
-	;;
-	adds r16 = PT(CR_IPSR)+16,r12
-	adds r17 = PT(CR_IIP)+16,r12
-	mov r21=r13		// get current
-	;;
-	ld8 r31=[r16],16    // load cr.ipsr
-	ld8 r30=[r17],16    // load cr.iip
-	;;
-	ld8 r29=[r16],16    // load cr.ifs
-	ld8 r28=[r17],16    // load ar.unat
-	;;
-	ld8 r27=[r16],16    // load ar.pfs
-	ld8 r26=[r17],16    // load ar.rsc
-	;;
-	ld8 r25=[r16],16    // load ar.rnat
-	ld8 r24=[r17],16    // load ar.bspstore
-	;;
-	ld8 r23=[r16],16    // load predicates
-	ld8 r22=[r17],16    // load b0
-	;;
-	ld8 r20=[r16],16    // load ar.rsc value for "loadrs"
-	ld8.fill r1=[r17],16    //load r1
-	;;
-	ld8.fill r12=[r16],16    //load r12
-	ld8.fill r13=[r17],PT(R2)-PT(R13)    //load r13
-	;;
-	ld8 r19=[r16],PT(R3)-PT(AR_FPSR)    //load ar_fpsr
-	ld8.fill r2=[r17],PT(AR_CCV)-PT(R2)    //load r2
-	;;
-	ld8.fill r3=[r16]	//load r3
-	ld8 r18=[r17]	//load ar_ccv
-	;;
-	mov ar.fpsr=r19
-	mov ar.ccv=r18
-	shr.u r18=r20,16
-	;;
-kvm_rbs_switch:
-	mov r19=96
-
-kvm_dont_preserve_current_frame:
-/*
-    * To prevent leaking bits between the hypervisor and guest domain,
-    * we must clear the stacked registers in the "invalid" partition here.
-    * 5 registers/cycle on McKinley).
-    */
-#   define pRecurse	p6
-#   define pReturn	p7
-#   define Nregs	14
-
-	alloc loc0=ar.pfs,2,Nregs-2,2,0
-	shr.u loc1=r18,9	// RNaTslots <= floor(dirtySize / (64*8))
-	sub r19=r19,r18		// r19 = (physStackedSize + 8) - dirtySize
-	;;
-	mov ar.rsc=r20		// load ar.rsc to be used for "loadrs"
-	shladd in0=loc1,3,r19
-	mov in1=0
-	;;
-	TEXT_ALIGN(32)
-kvm_rse_clear_invalid:
-	alloc loc0=ar.pfs,2,Nregs-2,2,0
-	cmp.lt pRecurse,p0=Nregs*8,in0
-	// if more than Nregs regs left to clear, (re)curse
-	add out0=-Nregs*8,in0
-	add out1=1,in1		// increment recursion count
-	mov loc1=0
-	mov loc2=0
-	;;
-	mov loc3=0
-	mov loc4=0
-	mov loc5=0
-	mov loc6=0
-	mov loc7=0
-(pRecurse) br.call.dptk.few b0=kvm_rse_clear_invalid
-	;;
-	mov loc8=0
-	mov loc9=0
-	cmp.ne pReturn,p0=r0,in1
-	// if recursion count != 0, we need to do a br.ret
-	mov loc10=0
-	mov loc11=0
-(pReturn) br.ret.dptk.many b0
-
-#	undef pRecurse
-#	undef pReturn
-
-// loadrs has already been shifted
-	alloc r16=ar.pfs,0,0,0,0    // drop current register frame
-	;;
-	loadrs
-	;;
-	mov ar.bspstore=r24
-	;;
-	mov ar.unat=r28
-	mov ar.rnat=r25
-	mov ar.rsc=r26
-	;;
-	mov cr.ipsr=r31
-	mov cr.iip=r30
-	mov cr.ifs=r29
-	mov ar.pfs=r27
-	adds r18=VMM_VPD_BASE_OFFSET,r21
-	;;
-	ld8 r18=[r18]   //vpd
-	adds r17=VMM_VCPU_ISR_OFFSET,r21
-	;;
-	ld8 r17=[r17]
-	adds r19=VMM_VPD_VPSR_OFFSET,r18
-	;;
-	ld8 r19=[r19]        //vpsr
-	mov r25=r18
-	adds r16= VMM_VCPU_GP_OFFSET,r21
-	;;
-	ld8 r16= [r16] // Put gp in r24
-	movl r24=@gprel(ia64_vmm_entry)  // calculate return address
-	;;
-	add  r24=r24,r16
-	;;
-	br.sptk.many  kvm_vps_sync_write       // call the service
-	;;
-END(ia64_leave_hypervisor)
-// fall through
-GLOBAL_ENTRY(ia64_vmm_entry)
-/*
- *  must be at bank 0
- *  parameter:
- *  r17:cr.isr
- *  r18:vpd
- *  r19:vpsr
- *  r22:b0
- *  r23:predicate
- */
-	mov r24=r22
-	mov r25=r18
-	tbit.nz p1,p2 = r19,IA64_PSR_IC_BIT        // p1=vpsr.ic
-(p1) 	br.cond.sptk.few kvm_vps_resume_normal
-(p2)	br.cond.sptk.many kvm_vps_resume_handler
-	;;
-END(ia64_vmm_entry)
-
-/*
- * extern u64 ia64_call_vsa(u64 proc, u64 arg1, u64 arg2,
- *                  u64 arg3, u64 arg4, u64 arg5,
- *                  u64 arg6, u64 arg7);
- *
- * XXX: The currently defined services use only 4 args at the max. The
- *  rest are not consumed.
- */
-GLOBAL_ENTRY(ia64_call_vsa)
-    .regstk 4,4,0,0
-
-rpsave  =   loc0
-pfssave =   loc1
-psrsave =   loc2
-entry   =   loc3
-hostret =   r24
-
-	alloc   pfssave=ar.pfs,4,4,0,0
-	mov rpsave=rp
-	adds entry=VMM_VCPU_VSA_BASE_OFFSET, r13
-	;;
-	ld8 entry=[entry]
-1:	mov hostret=ip
-	mov r25=in1         // copy arguments
-	mov r26=in2
-	mov r27=in3
-	mov psrsave=psr
-	;;
-	tbit.nz p6,p0=psrsave,14    // IA64_PSR_I
-	tbit.nz p7,p0=psrsave,13    // IA64_PSR_IC
-	;;
-	add hostret=2f-1b,hostret   // calculate return address
-	add entry=entry,in0
-	;;
-	rsm psr.i | psr.ic
-	;;
-	srlz.i
-	mov b6=entry
-	br.cond.sptk b6         // call the service
-2:
-// Architectural sequence for enabling interrupts if necessary
-(p7)    ssm psr.ic
-	;;
-(p7)    srlz.i
-	;;
-(p6)    ssm psr.i
-	;;
-	mov rp=rpsave
-	mov ar.pfs=pfssave
-	mov r8=r31
-	;;
-	srlz.d
-	br.ret.sptk rp
-
-END(ia64_call_vsa)
-
-#define  INIT_BSPSTORE  ((4<<30)-(12<<20)-0x100)
-
-GLOBAL_ENTRY(vmm_reset_entry)
-	//set up ipsr, iip, vpd.vpsr, dcr
-	// For IPSR: it/dt/rt=1, i/ic=1, si=1, vm/bn=1
-	// For DCR: all bits 0
-	bsw.0
-	;;
-	mov r21 =r13
-	adds r14=-VMM_PT_REGS_SIZE, r12
-	;;
-	movl r6=0x501008826000      // IPSR dt/rt/it:1;i/ic:1, si:1, vm/bn:1
-	movl r10=0x8000000000000000
-	adds r16=PT(CR_IIP), r14
-	adds r20=PT(R1), r14
-	;;
-	rsm psr.ic | psr.i
-	;;
-	srlz.i
-	;;
-	mov ar.rsc = 0
-	;;
-	flushrs
-	;;
-	mov ar.bspstore = 0
-	// clear BSPSTORE
-	;;
-	mov cr.ipsr=r6
-	mov cr.ifs=r10
-	ld8 r4 = [r16] // Set init iip for first run.
-	ld8 r1 = [r20]
-	;;
-	mov cr.iip=r4
-	adds r16=VMM_VPD_BASE_OFFSET,r13
-	;;
-	ld8 r18=[r16]
-	;;
-	adds r19=VMM_VPD_VPSR_OFFSET,r18
-	;;
-	ld8 r19=[r19]
-	mov r17=r0
-	mov r22=r0
-	mov r23=r0
-	br.cond.sptk ia64_vmm_entry
-	br.ret.sptk  b0
-END(vmm_reset_entry)
diff --git a/arch/ia64/kvm/vti.h b/arch/ia64/kvm/vti.h
deleted file mode 100644
index b214b5b0432..00000000000
--- a/arch/ia64/kvm/vti.h
+++ /dev/null
@@ -1,290 +0,0 @@
-/*
- * vti.h: prototype for generial vt related interface
- *   	Copyright (c) 2004, Intel Corporation.
- *
- *	Xuefei Xu (Anthony Xu) (anthony.xu@intel.com)
- *	Fred Yang (fred.yang@intel.com)
- * 	Kun Tian (Kevin Tian) (kevin.tian@intel.com)
- *
- *  	Copyright (c) 2007, Intel Corporation.
- *  	Zhang xiantao <xiantao.zhang@intel.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- */
-#ifndef _KVM_VT_I_H
-#define _KVM_VT_I_H
-
-#ifndef __ASSEMBLY__
-#include <asm/page.h>
-
-#include <linux/kvm_host.h>
-
-/* define itr.i and itr.d  in ia64_itr function */
-#define	ITR	0x01
-#define	DTR	0x02
-#define	IaDTR	0x03
-
-#define IA64_TR_VMM       6 /*itr6, dtr6 : maps vmm code, vmbuffer*/
-#define IA64_TR_VM_DATA   7 /*dtr7       : maps current vm data*/
-
-#define RR6 (6UL<<61)
-#define RR7 (7UL<<61)
-
-
-/* config_options in pal_vp_init_env */
-#define	VP_INITIALIZE	1UL
-#define	VP_FR_PMC	1UL<<1
-#define	VP_OPCODE	1UL<<8
-#define	VP_CAUSE	1UL<<9
-#define VP_FW_ACC   	1UL<<63
-
-/* init vp env with initializing vm_buffer */
-#define	VP_INIT_ENV_INITALIZE  (VP_INITIALIZE | VP_FR_PMC |\
-	VP_OPCODE | VP_CAUSE | VP_FW_ACC)
-/* init vp env without initializing vm_buffer */
-#define	VP_INIT_ENV  VP_FR_PMC | VP_OPCODE | VP_CAUSE | VP_FW_ACC
-
-#define		PAL_VP_CREATE   265
-/* Stacked Virt. Initializes a new VPD for the operation of
- * a new virtual processor in the virtual environment.
- */
-#define		PAL_VP_ENV_INFO 266
-/*Stacked Virt. Returns the parameters needed to enter a virtual environment.*/
-#define		PAL_VP_EXIT_ENV 267
-/*Stacked Virt. Allows a logical processor to exit a virtual environment.*/
-#define		PAL_VP_INIT_ENV 268
-/*Stacked Virt. Allows a logical processor to enter a virtual environment.*/
-#define		PAL_VP_REGISTER 269
-/*Stacked Virt. Register a different host IVT for the virtual processor.*/
-#define		PAL_VP_RESUME   270
-/* Renamed from PAL_VP_RESUME */
-#define		PAL_VP_RESTORE  270
-/*Stacked Virt. Resumes virtual processor operation on the logical processor.*/
-#define		PAL_VP_SUSPEND  271
-/* Renamed from PAL_VP_SUSPEND */
-#define		PAL_VP_SAVE	271
-/* Stacked Virt. Suspends operation for the specified virtual processor on
- * the logical processor.
- */
-#define		PAL_VP_TERMINATE 272
-/* Stacked Virt. Terminates operation for the specified virtual processor.*/
-
-union vac {
-	unsigned long value;
-	struct {
-		unsigned int a_int:1;
-		unsigned int a_from_int_cr:1;
-		unsigned int a_to_int_cr:1;
-		unsigned int a_from_psr:1;
-		unsigned int a_from_cpuid:1;
-		unsigned int a_cover:1;
-		unsigned int a_bsw:1;
-		long reserved:57;
-	};
-};
-
-union vdc {
-	unsigned long value;
-	struct {
-		unsigned int d_vmsw:1;
-		unsigned int d_extint:1;
-		unsigned int d_ibr_dbr:1;
-		unsigned int d_pmc:1;
-		unsigned int d_to_pmd:1;
-		unsigned int d_itm:1;
-		long reserved:58;
-	};
-};
-
-struct vpd {
-	union vac   vac;
-	union vdc   vdc;
-	unsigned long  virt_env_vaddr;
-	unsigned long  reserved1[29];
-	unsigned long  vhpi;
-	unsigned long  reserved2[95];
-	unsigned long  vgr[16];
-	unsigned long  vbgr[16];
-	unsigned long  vnat;
-	unsigned long  vbnat;
-	unsigned long  vcpuid[5];
-	unsigned long  reserved3[11];
-	unsigned long  vpsr;
-	unsigned long  vpr;
-	unsigned long  reserved4[76];
-	union {
-		unsigned long  vcr[128];
-		struct {
-			unsigned long dcr;
-			unsigned long itm;
-			unsigned long iva;
-			unsigned long rsv1[5];
-			unsigned long pta;
-			unsigned long rsv2[7];
-			unsigned long ipsr;
-			unsigned long isr;
-			unsigned long rsv3;
-			unsigned long iip;
-			unsigned long ifa;
-			unsigned long itir;
-			unsigned long iipa;
-			unsigned long ifs;
-			unsigned long iim;
-			unsigned long iha;
-			unsigned long rsv4[38];
-			unsigned long lid;
-			unsigned long ivr;
-			unsigned long tpr;
-			unsigned long eoi;
-			unsigned long irr[4];
-			unsigned long itv;
-			unsigned long pmv;
-			unsigned long cmcv;
-			unsigned long rsv5[5];
-			unsigned long lrr0;
-			unsigned long lrr1;
-			unsigned long rsv6[46];
-		};
-	};
-	unsigned long  reserved5[128];
-	unsigned long  reserved6[3456];
-	unsigned long  vmm_avail[128];
-	unsigned long  reserved7[4096];
-};
-
-#define PAL_PROC_VM_BIT		(1UL << 40)
-#define PAL_PROC_VMSW_BIT	(1UL << 54)
-
-static inline s64 ia64_pal_vp_env_info(u64 *buffer_size,
-		u64 *vp_env_info)
-{
-	struct ia64_pal_retval iprv;
-	PAL_CALL_STK(iprv, PAL_VP_ENV_INFO, 0, 0, 0);
-	*buffer_size = iprv.v0;
-	*vp_env_info = iprv.v1;
-	return iprv.status;
-}
-
-static inline s64 ia64_pal_vp_exit_env(u64 iva)
-{
-	struct ia64_pal_retval iprv;
-
-	PAL_CALL_STK(iprv, PAL_VP_EXIT_ENV, (u64)iva, 0, 0);
-	return iprv.status;
-}
-
-static inline s64 ia64_pal_vp_init_env(u64 config_options, u64 pbase_addr,
-			u64 vbase_addr, u64 *vsa_base)
-{
-	struct ia64_pal_retval iprv;
-
-	PAL_CALL_STK(iprv, PAL_VP_INIT_ENV, config_options, pbase_addr,
-			vbase_addr);
-	*vsa_base = iprv.v0;
-
-	return iprv.status;
-}
-
-static inline s64 ia64_pal_vp_restore(u64 *vpd, u64 pal_proc_vector)
-{
-	struct ia64_pal_retval iprv;
-
-	PAL_CALL_STK(iprv, PAL_VP_RESTORE, (u64)vpd, pal_proc_vector, 0);
-
-	return iprv.status;
-}
-
-static inline s64 ia64_pal_vp_save(u64 *vpd, u64 pal_proc_vector)
-{
-	struct ia64_pal_retval iprv;
-
-	PAL_CALL_STK(iprv, PAL_VP_SAVE, (u64)vpd, pal_proc_vector, 0);
-
-	return iprv.status;
-}
-
-#endif
-
-/*VPD field offset*/
-#define VPD_VAC_START_OFFSET		0
-#define VPD_VDC_START_OFFSET		8
-#define VPD_VHPI_START_OFFSET		256
-#define VPD_VGR_START_OFFSET		1024
-#define VPD_VBGR_START_OFFSET		1152
-#define VPD_VNAT_START_OFFSET		1280
-#define VPD_VBNAT_START_OFFSET		1288
-#define VPD_VCPUID_START_OFFSET		1296
-#define VPD_VPSR_START_OFFSET		1424
-#define VPD_VPR_START_OFFSET		1432
-#define VPD_VRSE_CFLE_START_OFFSET	1440
-#define VPD_VCR_START_OFFSET		2048
-#define VPD_VTPR_START_OFFSET		2576
-#define VPD_VRR_START_OFFSET		3072
-#define VPD_VMM_VAIL_START_OFFSET	31744
-
-/*Virtualization faults*/
-
-#define EVENT_MOV_TO_AR			 1
-#define EVENT_MOV_TO_AR_IMM		 2
-#define EVENT_MOV_FROM_AR		 3
-#define EVENT_MOV_TO_CR			 4
-#define EVENT_MOV_FROM_CR		 5
-#define EVENT_MOV_TO_PSR		 6
-#define EVENT_MOV_FROM_PSR		 7
-#define EVENT_ITC_D			 8
-#define EVENT_ITC_I			 9
-#define EVENT_MOV_TO_RR			 10
-#define EVENT_MOV_TO_DBR		 11
-#define EVENT_MOV_TO_IBR		 12
-#define EVENT_MOV_TO_PKR		 13
-#define EVENT_MOV_TO_PMC		 14
-#define EVENT_MOV_TO_PMD		 15
-#define EVENT_ITR_D			 16
-#define EVENT_ITR_I			 17
-#define EVENT_MOV_FROM_RR		 18
-#define EVENT_MOV_FROM_DBR		 19
-#define EVENT_MOV_FROM_IBR		 20
-#define EVENT_MOV_FROM_PKR		 21
-#define EVENT_MOV_FROM_PMC		 22
-#define EVENT_MOV_FROM_CPUID		 23
-#define EVENT_SSM			 24
-#define EVENT_RSM			 25
-#define EVENT_PTC_L			 26
-#define EVENT_PTC_G			 27
-#define EVENT_PTC_GA			 28
-#define EVENT_PTR_D			 29
-#define EVENT_PTR_I			 30
-#define EVENT_THASH			 31
-#define EVENT_TTAG			 32
-#define EVENT_TPA			 33
-#define EVENT_TAK			 34
-#define EVENT_PTC_E			 35
-#define EVENT_COVER			 36
-#define EVENT_RFI			 37
-#define EVENT_BSW_0			 38
-#define EVENT_BSW_1			 39
-#define EVENT_VMSW			 40
-
-/**PAL virtual services offsets */
-#define PAL_VPS_RESUME_NORMAL           0x0000
-#define PAL_VPS_RESUME_HANDLER          0x0400
-#define PAL_VPS_SYNC_READ               0x0800
-#define PAL_VPS_SYNC_WRITE              0x0c00
-#define PAL_VPS_SET_PENDING_INTERRUPT   0x1000
-#define PAL_VPS_THASH                   0x1400
-#define PAL_VPS_TTAG                    0x1800
-#define PAL_VPS_RESTORE                 0x1c00
-#define PAL_VPS_SAVE                    0x2000
-
-#endif/* _VT_I_H*/
diff --git a/arch/ia64/kvm/vtlb.c b/arch/ia64/kvm/vtlb.c
deleted file mode 100644
index a7869f8f49a..00000000000
--- a/arch/ia64/kvm/vtlb.c
+++ /dev/null
@@ -1,640 +0,0 @@
-/*
- * vtlb.c: guest virtual tlb handling module.
- * Copyright (c) 2004, Intel Corporation.
- *  Yaozu Dong (Eddie Dong) <Eddie.dong@intel.com>
- *  Xuefei Xu (Anthony Xu) <anthony.xu@intel.com>
- *
- * Copyright (c) 2007, Intel Corporation.
- *  Xuefei Xu (Anthony Xu) <anthony.xu@intel.com>
- *  Xiantao Zhang <xiantao.zhang@intel.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- */
-
-#include "vcpu.h"
-
-#include <linux/rwsem.h>
-
-#include <asm/tlb.h>
-
-/*
- * Check to see if the address rid:va is translated by the TLB
- */
-
-static int __is_tr_translated(struct thash_data *trp, u64 rid, u64 va)
-{
-	return ((trp->p) && (trp->rid == rid)
-				&& ((va-trp->vadr) < PSIZE(trp->ps)));
-}
-
-/*
- * Only for GUEST TR format.
- */
-static int __is_tr_overlap(struct thash_data *trp, u64 rid, u64 sva, u64 eva)
-{
-	u64 sa1, ea1;
-
-	if (!trp->p || trp->rid != rid)
-		return 0;
-
-	sa1 = trp->vadr;
-	ea1 = sa1 + PSIZE(trp->ps) - 1;
-	eva -= 1;
-	if ((sva > ea1) || (sa1 > eva))
-		return 0;
-	else
-		return 1;
-
-}
-
-void machine_tlb_purge(u64 va, u64 ps)
-{
-	ia64_ptcl(va, ps << 2);
-}
-
-void local_flush_tlb_all(void)
-{
-	int i, j;
-	unsigned long flags, count0, count1;
-	unsigned long stride0, stride1, addr;
-
-	addr    = current_vcpu->arch.ptce_base;
-	count0  = current_vcpu->arch.ptce_count[0];
-	count1  = current_vcpu->arch.ptce_count[1];
-	stride0 = current_vcpu->arch.ptce_stride[0];
-	stride1 = current_vcpu->arch.ptce_stride[1];
-
-	local_irq_save(flags);
-	for (i = 0; i < count0; ++i) {
-		for (j = 0; j < count1; ++j) {
-			ia64_ptce(addr);
-			addr += stride1;
-		}
-		addr += stride0;
-	}
-	local_irq_restore(flags);
-	ia64_srlz_i();          /* srlz.i implies srlz.d */
-}
-
-int vhpt_enabled(struct kvm_vcpu *vcpu, u64 vadr, enum vhpt_ref ref)
-{
-	union ia64_rr    vrr;
-	union ia64_pta   vpta;
-	struct  ia64_psr   vpsr;
-
-	vpsr = *(struct ia64_psr *)&VCPU(vcpu, vpsr);
-	vrr.val = vcpu_get_rr(vcpu, vadr);
-	vpta.val = vcpu_get_pta(vcpu);
-
-	if (vrr.ve & vpta.ve) {
-		switch (ref) {
-		case DATA_REF:
-		case NA_REF:
-			return vpsr.dt;
-		case INST_REF:
-			return vpsr.dt && vpsr.it && vpsr.ic;
-		case RSE_REF:
-			return vpsr.dt && vpsr.rt;
-
-		}
-	}
-	return 0;
-}
-
-struct thash_data *vsa_thash(union ia64_pta vpta, u64 va, u64 vrr, u64 *tag)
-{
-	u64 index, pfn, rid, pfn_bits;
-
-	pfn_bits = vpta.size - 5 - 8;
-	pfn = REGION_OFFSET(va) >> _REGION_PAGE_SIZE(vrr);
-	rid = _REGION_ID(vrr);
-	index = ((rid & 0xff) << pfn_bits)|(pfn & ((1UL << pfn_bits) - 1));
-	*tag = ((rid >> 8) & 0xffff) | ((pfn >> pfn_bits) << 16);
-
-	return (struct thash_data *)((vpta.base << PTA_BASE_SHIFT) +
-				(index << 5));
-}
-
-struct thash_data *__vtr_lookup(struct kvm_vcpu *vcpu, u64 va, int type)
-{
-
-	struct thash_data *trp;
-	int  i;
-	u64 rid;
-
-	rid = vcpu_get_rr(vcpu, va);
-	rid = rid & RR_RID_MASK;
-	if (type == D_TLB) {
-		if (vcpu_quick_region_check(vcpu->arch.dtr_regions, va)) {
-			for (trp = (struct thash_data *)&vcpu->arch.dtrs, i = 0;
-						i < NDTRS; i++, trp++) {
-				if (__is_tr_translated(trp, rid, va))
-					return trp;
-			}
-		}
-	} else {
-		if (vcpu_quick_region_check(vcpu->arch.itr_regions, va)) {
-			for (trp = (struct thash_data *)&vcpu->arch.itrs, i = 0;
-					i < NITRS; i++, trp++) {
-				if (__is_tr_translated(trp, rid, va))
-					return trp;
-			}
-		}
-	}
-
-	return NULL;
-}
-
-static void vhpt_insert(u64 pte, u64 itir, u64 ifa, u64 gpte)
-{
-	union ia64_rr rr;
-	struct thash_data *head;
-	unsigned long ps, gpaddr;
-
-	ps = itir_ps(itir);
-	rr.val = ia64_get_rr(ifa);
-
-	 gpaddr = ((gpte & _PAGE_PPN_MASK) >> ps << ps) |
-					(ifa & ((1UL << ps) - 1));
-
-	head = (struct thash_data *)ia64_thash(ifa);
-	head->etag = INVALID_TI_TAG;
-	ia64_mf();
-	head->page_flags = pte & ~PAGE_FLAGS_RV_MASK;
-	head->itir = rr.ps << 2;
-	head->etag = ia64_ttag(ifa);
-	head->gpaddr = gpaddr;
-}
-
-void mark_pages_dirty(struct kvm_vcpu *v, u64 pte, u64 ps)
-{
-	u64 i, dirty_pages = 1;
-	u64 base_gfn = (pte&_PAGE_PPN_MASK) >> PAGE_SHIFT;
-	vmm_spinlock_t *lock = __kvm_va(v->arch.dirty_log_lock_pa);
-	void *dirty_bitmap = (void *)KVM_MEM_DIRTY_LOG_BASE;
-
-	dirty_pages <<= ps <= PAGE_SHIFT ? 0 : ps - PAGE_SHIFT;
-
-	vmm_spin_lock(lock);
-	for (i = 0; i < dirty_pages; i++) {
-		/* avoid RMW */
-		if (!test_bit(base_gfn + i, dirty_bitmap))
-			set_bit(base_gfn + i , dirty_bitmap);
-	}
-	vmm_spin_unlock(lock);
-}
-
-void thash_vhpt_insert(struct kvm_vcpu *v, u64 pte, u64 itir, u64 va, int type)
-{
-	u64 phy_pte, psr;
-	union ia64_rr mrr;
-
-	mrr.val = ia64_get_rr(va);
-	phy_pte = translate_phy_pte(&pte, itir, va);
-
-	if (itir_ps(itir) >= mrr.ps) {
-		vhpt_insert(phy_pte, itir, va, pte);
-	} else {
-		phy_pte  &= ~PAGE_FLAGS_RV_MASK;
-		psr = ia64_clear_ic();
-		ia64_itc(type, va, phy_pte, itir_ps(itir));
-		paravirt_dv_serialize_data();
-		ia64_set_psr(psr);
-	}
-
-	if (!(pte&VTLB_PTE_IO))
-		mark_pages_dirty(v, pte, itir_ps(itir));
-}
-
-/*
- *   vhpt lookup
- */
-struct thash_data *vhpt_lookup(u64 va)
-{
-	struct thash_data *head;
-	u64 tag;
-
-	head = (struct thash_data *)ia64_thash(va);
-	tag = ia64_ttag(va);
-	if (head->etag == tag)
-		return head;
-	return NULL;
-}
-
-u64 guest_vhpt_lookup(u64 iha, u64 *pte)
-{
-	u64 ret;
-	struct thash_data *data;
-
-	data = __vtr_lookup(current_vcpu, iha, D_TLB);
-	if (data != NULL)
-		thash_vhpt_insert(current_vcpu, data->page_flags,
-			data->itir, iha, D_TLB);
-
-	asm volatile ("rsm psr.ic|psr.i;;"
-			"srlz.d;;"
-			"ld8.s r9=[%1];;"
-			"tnat.nz p6,p7=r9;;"
-			"(p6) mov %0=1;"
-			"(p6) mov r9=r0;"
-			"(p7) extr.u r9=r9,0,53;;"
-			"(p7) mov %0=r0;"
-			"(p7) st8 [%2]=r9;;"
-			"ssm psr.ic;;"
-			"srlz.d;;"
-			"ssm psr.i;;"
-			"srlz.d;;"
-			: "=&r"(ret) : "r"(iha), "r"(pte) : "memory");
-
-	return ret;
-}
-
-/*
- *  purge software guest tlb
- */
-
-static void vtlb_purge(struct kvm_vcpu *v, u64 va, u64 ps)
-{
-	struct thash_data *cur;
-	u64 start, curadr, size, psbits, tag, rr_ps, num;
-	union ia64_rr vrr;
-	struct thash_cb *hcb = &v->arch.vtlb;
-
-	vrr.val = vcpu_get_rr(v, va);
-	psbits = VMX(v, psbits[(va >> 61)]);
-	start = va & ~((1UL << ps) - 1);
-	while (psbits) {
-		curadr = start;
-		rr_ps = __ffs(psbits);
-		psbits &= ~(1UL << rr_ps);
-		num = 1UL << ((ps < rr_ps) ? 0 : (ps - rr_ps));
-		size = PSIZE(rr_ps);
-		vrr.ps = rr_ps;
-		while (num) {
-			cur = vsa_thash(hcb->pta, curadr, vrr.val, &tag);
-			if (cur->etag == tag && cur->ps == rr_ps)
-				cur->etag = INVALID_TI_TAG;
-			curadr += size;
-			num--;
-		}
-	}
-}
-
-
-/*
- *  purge VHPT and machine TLB
- */
-static void vhpt_purge(struct kvm_vcpu *v, u64 va, u64 ps)
-{
-	struct thash_data *cur;
-	u64 start, size, tag, num;
-	union ia64_rr rr;
-
-	start = va & ~((1UL << ps) - 1);
-	rr.val = ia64_get_rr(va);
-	size = PSIZE(rr.ps);
-	num = 1UL << ((ps < rr.ps) ? 0 : (ps - rr.ps));
-	while (num) {
-		cur = (struct thash_data *)ia64_thash(start);
-		tag = ia64_ttag(start);
-		if (cur->etag == tag)
-			cur->etag = INVALID_TI_TAG;
-		start += size;
-		num--;
-	}
-	machine_tlb_purge(va, ps);
-}
-
-/*
- * Insert an entry into hash TLB or VHPT.
- * NOTES:
- *  1: When inserting VHPT to thash, "va" is a must covered
- *  address by the inserted machine VHPT entry.
- *  2: The format of entry is always in TLB.
- *  3: The caller need to make sure the new entry will not overlap
- *     with any existed entry.
- */
-void vtlb_insert(struct kvm_vcpu *v, u64 pte, u64 itir, u64 va)
-{
-	struct thash_data *head;
-	union ia64_rr vrr;
-	u64 tag;
-	struct thash_cb *hcb = &v->arch.vtlb;
-
-	vrr.val = vcpu_get_rr(v, va);
-	vrr.ps = itir_ps(itir);
-	VMX(v, psbits[va >> 61]) |= (1UL << vrr.ps);
-	head = vsa_thash(hcb->pta, va, vrr.val, &tag);
-	head->page_flags = pte;
-	head->itir = itir;
-	head->etag = tag;
-}
-
-int vtr_find_overlap(struct kvm_vcpu *vcpu, u64 va, u64 ps, int type)
-{
-	struct thash_data  *trp;
-	int  i;
-	u64 end, rid;
-
-	rid = vcpu_get_rr(vcpu, va);
-	rid = rid & RR_RID_MASK;
-	end = va + PSIZE(ps);
-	if (type == D_TLB) {
-		if (vcpu_quick_region_check(vcpu->arch.dtr_regions, va)) {
-			for (trp = (struct thash_data *)&vcpu->arch.dtrs, i = 0;
-					i < NDTRS; i++, trp++) {
-				if (__is_tr_overlap(trp, rid, va, end))
-					return i;
-			}
-		}
-	} else {
-		if (vcpu_quick_region_check(vcpu->arch.itr_regions, va)) {
-			for (trp = (struct thash_data *)&vcpu->arch.itrs, i = 0;
-					i < NITRS; i++, trp++) {
-				if (__is_tr_overlap(trp, rid, va, end))
-					return i;
-			}
-		}
-	}
-	return -1;
-}
-
-/*
- * Purge entries in VTLB and VHPT
- */
-void thash_purge_entries(struct kvm_vcpu *v, u64 va, u64 ps)
-{
-	if (vcpu_quick_region_check(v->arch.tc_regions, va))
-		vtlb_purge(v, va, ps);
-	vhpt_purge(v, va, ps);
-}
-
-void thash_purge_entries_remote(struct kvm_vcpu *v, u64 va, u64 ps)
-{
-	u64 old_va = va;
-	va = REGION_OFFSET(va);
-	if (vcpu_quick_region_check(v->arch.tc_regions, old_va))
-		vtlb_purge(v, va, ps);
-	vhpt_purge(v, va, ps);
-}
-
-u64 translate_phy_pte(u64 *pte, u64 itir, u64 va)
-{
-	u64 ps, ps_mask, paddr, maddr, io_mask;
-	union pte_flags phy_pte;
-
-	ps = itir_ps(itir);
-	ps_mask = ~((1UL << ps) - 1);
-	phy_pte.val = *pte;
-	paddr = *pte;
-	paddr = ((paddr & _PAGE_PPN_MASK) & ps_mask) | (va & ~ps_mask);
-	maddr = kvm_get_mpt_entry(paddr >> PAGE_SHIFT);
-	io_mask = maddr & GPFN_IO_MASK;
-	if (io_mask && (io_mask != GPFN_PHYS_MMIO)) {
-		*pte |= VTLB_PTE_IO;
-		return -1;
-	}
-	maddr = ((maddr & _PAGE_PPN_MASK) & PAGE_MASK) |
-					(paddr & ~PAGE_MASK);
-	phy_pte.ppn = maddr >> ARCH_PAGE_SHIFT;
-	return phy_pte.val;
-}
-
-/*
- * Purge overlap TCs and then insert the new entry to emulate itc ops.
- * Notes: Only TC entry can purge and insert.
- */
-void  thash_purge_and_insert(struct kvm_vcpu *v, u64 pte, u64 itir,
-						u64 ifa, int type)
-{
-	u64 ps;
-	u64 phy_pte, io_mask, index;
-	union ia64_rr vrr, mrr;
-
-	ps = itir_ps(itir);
-	vrr.val = vcpu_get_rr(v, ifa);
-	mrr.val = ia64_get_rr(ifa);
-
-	index = (pte & _PAGE_PPN_MASK) >> PAGE_SHIFT;
-	io_mask = kvm_get_mpt_entry(index) & GPFN_IO_MASK;
-	phy_pte = translate_phy_pte(&pte, itir, ifa);
-
-	/* Ensure WB attribute if pte is related to a normal mem page,
-	 * which is required by vga acceleration since qemu maps shared
-	 * vram buffer with WB.
-	 */
-	if (!(pte & VTLB_PTE_IO) && ((pte & _PAGE_MA_MASK) != _PAGE_MA_NAT) &&
-			io_mask != GPFN_PHYS_MMIO) {
-		pte &= ~_PAGE_MA_MASK;
-		phy_pte &= ~_PAGE_MA_MASK;
-	}
-
-	vtlb_purge(v, ifa, ps);
-	vhpt_purge(v, ifa, ps);
-
-	if ((ps != mrr.ps) || (pte & VTLB_PTE_IO)) {
-		vtlb_insert(v, pte, itir, ifa);
-		vcpu_quick_region_set(VMX(v, tc_regions), ifa);
-	}
-	if (pte & VTLB_PTE_IO)
-		return;
-
-	if (ps >= mrr.ps)
-		vhpt_insert(phy_pte, itir, ifa, pte);
-	else {
-		u64 psr;
-		phy_pte  &= ~PAGE_FLAGS_RV_MASK;
-		psr = ia64_clear_ic();
-		ia64_itc(type, ifa, phy_pte, ps);
-		paravirt_dv_serialize_data();
-		ia64_set_psr(psr);
-	}
-	if (!(pte&VTLB_PTE_IO))
-		mark_pages_dirty(v, pte, ps);
-
-}
-
-/*
- * Purge all TCs or VHPT entries including those in Hash table.
- *
- */
-
-void thash_purge_all(struct kvm_vcpu *v)
-{
-	int i;
-	struct thash_data *head;
-	struct thash_cb  *vtlb, *vhpt;
-	vtlb = &v->arch.vtlb;
-	vhpt = &v->arch.vhpt;
-
-	for (i = 0; i < 8; i++)
-		VMX(v, psbits[i]) = 0;
-
-	head = vtlb->hash;
-	for (i = 0; i < vtlb->num; i++) {
-		head->page_flags = 0;
-		head->etag = INVALID_TI_TAG;
-		head->itir = 0;
-		head->next = 0;
-		head++;
-	};
-
-	head = vhpt->hash;
-	for (i = 0; i < vhpt->num; i++) {
-		head->page_flags = 0;
-		head->etag = INVALID_TI_TAG;
-		head->itir = 0;
-		head->next = 0;
-		head++;
-	};
-
-	local_flush_tlb_all();
-}
-
-/*
- * Lookup the hash table and its collision chain to find an entry
- * covering this address rid:va or the entry.
- *
- * INPUT:
- *  in: TLB format for both VHPT & TLB.
- */
-struct thash_data *vtlb_lookup(struct kvm_vcpu *v, u64 va, int is_data)
-{
-	struct thash_data  *cch;
-	u64    psbits, ps, tag;
-	union ia64_rr vrr;
-
-	struct thash_cb *hcb = &v->arch.vtlb;
-
-	cch = __vtr_lookup(v, va, is_data);
-	if (cch)
-		return cch;
-
-	if (vcpu_quick_region_check(v->arch.tc_regions, va) == 0)
-		return NULL;
-
-	psbits = VMX(v, psbits[(va >> 61)]);
-	vrr.val = vcpu_get_rr(v, va);
-	while (psbits) {
-		ps = __ffs(psbits);
-		psbits &= ~(1UL << ps);
-		vrr.ps = ps;
-		cch = vsa_thash(hcb->pta, va, vrr.val, &tag);
-		if (cch->etag == tag && cch->ps == ps)
-			return cch;
-	}
-
-	return NULL;
-}
-
-/*
- * Initialize internal control data before service.
- */
-void thash_init(struct thash_cb *hcb, u64 sz)
-{
-	int i;
-	struct thash_data *head;
-
-	hcb->pta.val = (unsigned long)hcb->hash;
-	hcb->pta.vf = 1;
-	hcb->pta.ve = 1;
-	hcb->pta.size = sz;
-	head = hcb->hash;
-	for (i = 0; i < hcb->num; i++) {
-		head->page_flags = 0;
-		head->itir = 0;
-		head->etag = INVALID_TI_TAG;
-		head->next = 0;
-		head++;
-	}
-}
-
-u64 kvm_get_mpt_entry(u64 gpfn)
-{
-	u64 *base = (u64 *) KVM_P2M_BASE;
-
-	if (gpfn >= (KVM_P2M_SIZE >> 3))
-		panic_vm(current_vcpu, "Invalid gpfn =%lx\n", gpfn);
-
-	return *(base + gpfn);
-}
-
-u64 kvm_lookup_mpa(u64 gpfn)
-{
-	u64 maddr;
-	maddr = kvm_get_mpt_entry(gpfn);
-	return maddr&_PAGE_PPN_MASK;
-}
-
-u64 kvm_gpa_to_mpa(u64 gpa)
-{
-	u64 pte = kvm_lookup_mpa(gpa >> PAGE_SHIFT);
-	return (pte >> PAGE_SHIFT << PAGE_SHIFT) | (gpa & ~PAGE_MASK);
-}
-
-/*
- * Fetch guest bundle code.
- * INPUT:
- *  gip: guest ip
- *  pbundle: used to return fetched bundle.
- */
-int fetch_code(struct kvm_vcpu *vcpu, u64 gip, IA64_BUNDLE *pbundle)
-{
-	u64     gpip = 0;   /* guest physical IP*/
-	u64     *vpa;
-	struct thash_data    *tlb;
-	u64     maddr;
-
-	if (!(VCPU(vcpu, vpsr) & IA64_PSR_IT)) {
-		/* I-side physical mode */
-		gpip = gip;
-	} else {
-		tlb = vtlb_lookup(vcpu, gip, I_TLB);
-		if (tlb)
-			gpip = (tlb->ppn >> (tlb->ps - 12) << tlb->ps) |
-				(gip & (PSIZE(tlb->ps) - 1));
-	}
-	if (gpip) {
-		maddr = kvm_gpa_to_mpa(gpip);
-	} else {
-		tlb = vhpt_lookup(gip);
-		if (tlb == NULL) {
-			ia64_ptcl(gip, ARCH_PAGE_SHIFT << 2);
-			return IA64_FAULT;
-		}
-		maddr = (tlb->ppn >> (tlb->ps - 12) << tlb->ps)
-					| (gip & (PSIZE(tlb->ps) - 1));
-	}
-	vpa = (u64 *)__kvm_va(maddr);
-
-	pbundle->i64[0] = *vpa++;
-	pbundle->i64[1] = *vpa;
-
-	return IA64_NO_FAULT;
-}
-
-void kvm_init_vhpt(struct kvm_vcpu *v)
-{
-	v->arch.vhpt.num = VHPT_NUM_ENTRIES;
-	thash_init(&v->arch.vhpt, VHPT_SHIFT);
-	ia64_set_pta(v->arch.vhpt.pta.val);
-	/*Enable VHPT here?*/
-}
-
-void kvm_init_vtlb(struct kvm_vcpu *v)
-{
-	v->arch.vtlb.num = VTLB_NUM_ENTRIES;
-	thash_init(&v->arch.vtlb, VTLB_SHIFT);
-}
diff --git a/arch/microblaze/include/asm/pgtable.h b/arch/microblaze/include/asm/pgtable.h
index 95cef0b5f83..df19d0c47be 100644
--- a/arch/microblaze/include/asm/pgtable.h
+++ b/arch/microblaze/include/asm/pgtable.h
@@ -565,6 +565,7 @@ void consistent_free(size_t size, void *vaddr);
 void consistent_sync(void *vaddr, size_t size, int direction);
 void consistent_sync_page(struct page *page, unsigned long offset,
 	size_t size, int direction);
+unsigned long consistent_virt_to_pfn(void *vaddr);
 
 void setup_memory(void);
 #endif /* __ASSEMBLY__ */
diff --git a/arch/microblaze/kernel/dma.c b/arch/microblaze/kernel/dma.c
index 4633c36c1b3..ed7ba8a1182 100644
--- a/arch/microblaze/kernel/dma.c
+++ b/arch/microblaze/kernel/dma.c
@@ -154,9 +154,36 @@ dma_direct_sync_sg_for_device(struct device *dev,
 			__dma_sync(sg->dma_address, sg->length, direction);
 }
 
+int dma_direct_mmap_coherent(struct device *dev, struct vm_area_struct *vma,
+			     void *cpu_addr, dma_addr_t handle, size_t size,
+			     struct dma_attrs *attrs)
+{
+#ifdef CONFIG_MMU
+	unsigned long user_count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+	unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT;
+	unsigned long off = vma->vm_pgoff;
+	unsigned long pfn;
+
+	if (off >= count || user_count > (count - off))
+		return -ENXIO;
+
+#ifdef NOT_COHERENT_CACHE
+	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+	pfn = consistent_virt_to_pfn(cpu_addr);
+#else
+	pfn = virt_to_pfn(cpu_addr);
+#endif
+	return remap_pfn_range(vma, vma->vm_start, pfn + off,
+			       vma->vm_end - vma->vm_start, vma->vm_page_prot);
+#else
+	return -ENXIO;
+#endif
+}
+
 struct dma_map_ops dma_direct_ops = {
 	.alloc		= dma_direct_alloc_coherent,
 	.free		= dma_direct_free_coherent,
+	.mmap		= dma_direct_mmap_coherent,
 	.map_sg		= dma_direct_map_sg,
 	.dma_supported	= dma_direct_dma_supported,
 	.map_page	= dma_direct_map_page,
diff --git a/arch/microblaze/mm/consistent.c b/arch/microblaze/mm/consistent.c
index e10ad930895..b06c3a7faf2 100644
--- a/arch/microblaze/mm/consistent.c
+++ b/arch/microblaze/mm/consistent.c
@@ -156,6 +156,25 @@ void *consistent_alloc(gfp_t gfp, size_t size, dma_addr_t *dma_handle)
 }
 EXPORT_SYMBOL(consistent_alloc);
 
+#ifdef CONFIG_MMU
+static pte_t *consistent_virt_to_pte(void *vaddr)
+{
+	unsigned long addr = (unsigned long)vaddr;
+
+	return pte_offset_kernel(pmd_offset(pgd_offset_k(addr), addr), addr);
+}
+
+unsigned long consistent_virt_to_pfn(void *vaddr)
+{
+	pte_t *ptep = consistent_virt_to_pte(vaddr);
+
+	if (pte_none(*ptep) || !pte_present(*ptep))
+		return 0;
+
+	return pte_pfn(*ptep);
+}
+#endif
+
 /*
  * free page(s) as defined by the above mapping.
  */
@@ -181,13 +200,9 @@ void consistent_free(size_t size, void *vaddr)
 	} while (size -= PAGE_SIZE);
 #else
 	do {
-		pte_t *ptep;
+		pte_t *ptep = consistent_virt_to_pte(vaddr);
 		unsigned long pfn;
 
-		ptep = pte_offset_kernel(pmd_offset(pgd_offset_k(
-						(unsigned int)vaddr),
-					(unsigned int)vaddr),
-				(unsigned int)vaddr);
 		if (!pte_none(*ptep) && pte_present(*ptep)) {
 			pfn = pte_pfn(*ptep);
 			pte_clear(&init_mm, (unsigned int)vaddr, ptep);
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index 6acf0c2a0f9..942c7b1678e 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -170,8 +170,6 @@ extern void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long addr,
 			unsigned long *nb_ret);
 extern void kvmppc_unpin_guest_page(struct kvm *kvm, void *addr,
 			unsigned long gpa, bool dirty);
-extern long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
-			long pte_index, unsigned long pteh, unsigned long ptel);
 extern long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
 			long pte_index, unsigned long pteh, unsigned long ptel,
 			pgd_t *pgdir, bool realmode, unsigned long *idx_ret);
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index 0aa817933e6..2d81e202bdc 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -37,7 +37,6 @@ static inline void svcpu_put(struct kvmppc_book3s_shadow_vcpu *svcpu)
 
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
 #define KVM_DEFAULT_HPT_ORDER	24	/* 16MB HPT by default */
-extern unsigned long kvm_rma_pages;
 #endif
 
 #define VRMA_VSID	0x1ffffffUL	/* 1TB VSID reserved for VRMA */
@@ -148,7 +147,7 @@ static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r,
 	/* This covers 14..54 bits of va*/
 	rb = (v & ~0x7fUL) << 16;		/* AVA field */
 
-	rb |= v >> (62 - 8);			/*  B field */
+	rb |= (v >> HPTE_V_SSIZE_SHIFT) << 8;	/*  B field */
 	/*
 	 * AVA in v had cleared lower 23 bits. We need to derive
 	 * that from pteg index
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 047855619cc..7efd666a3fa 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -180,11 +180,6 @@ struct kvmppc_spapr_tce_table {
 	struct page *pages[0];
 };
 
-struct kvm_rma_info {
-	atomic_t use_count;
-	unsigned long base_pfn;
-};
-
 /* XICS components, defined in book3s_xics.c */
 struct kvmppc_xics;
 struct kvmppc_icp;
@@ -214,16 +209,9 @@ struct revmap_entry {
 #define KVMPPC_RMAP_PRESENT	0x100000000ul
 #define KVMPPC_RMAP_INDEX	0xfffffffful
 
-/* Low-order bits in memslot->arch.slot_phys[] */
-#define KVMPPC_PAGE_ORDER_MASK	0x1f
-#define KVMPPC_PAGE_NO_CACHE	HPTE_R_I	/* 0x20 */
-#define KVMPPC_PAGE_WRITETHRU	HPTE_R_W	/* 0x40 */
-#define KVMPPC_GOT_PAGE		0x80
-
 struct kvm_arch_memory_slot {
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
 	unsigned long *rmap;
-	unsigned long *slot_phys;
 #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
 };
 
@@ -242,14 +230,12 @@ struct kvm_arch {
 	struct kvm_rma_info *rma;
 	unsigned long vrma_slb_v;
 	int rma_setup_done;
-	int using_mmu_notifiers;
 	u32 hpt_order;
 	atomic_t vcpus_running;
 	u32 online_vcores;
 	unsigned long hpt_npte;
 	unsigned long hpt_mask;
 	atomic_t hpte_mod_interest;
-	spinlock_t slot_phys_lock;
 	cpumask_t need_tlb_flush;
 	int hpt_cma_alloc;
 #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
@@ -297,6 +283,7 @@ struct kvmppc_vcore {
 	struct list_head runnable_threads;
 	spinlock_t lock;
 	wait_queue_head_t wq;
+	spinlock_t stoltb_lock;	/* protects stolen_tb and preempt_tb */
 	u64 stolen_tb;
 	u64 preempt_tb;
 	struct kvm_vcpu *runner;
@@ -308,6 +295,7 @@ struct kvmppc_vcore {
 	ulong dpdes;		/* doorbell state (POWER8) */
 	void *mpp_buffer; /* Micro Partition Prefetch buffer */
 	bool mpp_buffer_is_valid;
+	ulong conferring_threads;
 };
 
 #define VCORE_ENTRY_COUNT(vc)	((vc)->entry_exit_count & 0xff)
@@ -664,6 +652,8 @@ struct kvm_vcpu_arch {
 	spinlock_t tbacct_lock;
 	u64 busy_stolen;
 	u64 busy_preempt;
+
+	u32 emul_inst;
 #endif
 };
 
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index a6dcdb6d13c..46bf652c916 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -170,8 +170,6 @@ extern long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
 			     unsigned long ioba, unsigned long tce);
 extern long kvmppc_h_get_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
 			     unsigned long ioba);
-extern struct kvm_rma_info *kvm_alloc_rma(void);
-extern void kvm_release_rma(struct kvm_rma_info *ri);
 extern struct page *kvm_alloc_hpt(unsigned long nr_pages);
 extern void kvm_release_hpt(struct page *page, unsigned long nr_pages);
 extern int kvmppc_core_init_vm(struct kvm *kvm);
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index c161ef3f28a..24d78e1871c 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -489,7 +489,6 @@ int main(void)
 	DEFINE(KVM_HOST_LPID, offsetof(struct kvm, arch.host_lpid));
 	DEFINE(KVM_HOST_LPCR, offsetof(struct kvm, arch.host_lpcr));
 	DEFINE(KVM_HOST_SDR1, offsetof(struct kvm, arch.host_sdr1));
-	DEFINE(KVM_TLBIE_LOCK, offsetof(struct kvm, arch.tlbie_lock));
 	DEFINE(KVM_NEED_FLUSH, offsetof(struct kvm, arch.need_tlb_flush.bits));
 	DEFINE(KVM_ENABLED_HCALLS, offsetof(struct kvm, arch.enabled_hcalls));
 	DEFINE(KVM_LPCR, offsetof(struct kvm, arch.lpcr));
@@ -499,6 +498,7 @@ int main(void)
 	DEFINE(VCPU_DAR, offsetof(struct kvm_vcpu, arch.shregs.dar));
 	DEFINE(VCPU_VPA, offsetof(struct kvm_vcpu, arch.vpa.pinned_addr));
 	DEFINE(VCPU_VPA_DIRTY, offsetof(struct kvm_vcpu, arch.vpa.dirty));
+	DEFINE(VCPU_HEIR, offsetof(struct kvm_vcpu, arch.emul_inst));
 #endif
 #ifdef CONFIG_PPC_BOOK3S
 	DEFINE(VCPU_VCPUID, offsetof(struct kvm_vcpu, vcpu_id));
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index 602eb51d20b..f5769f19ae2 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -172,6 +172,7 @@ config KVM_XICS
 	depends on KVM_BOOK3S_64 && !KVM_MPIC
 	select HAVE_KVM_IRQCHIP
 	select HAVE_KVM_IRQFD
+	default y
 	---help---
 	  Include support for the XICS (eXternal Interrupt Controller
 	  Specification) interrupt controller architecture used on
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index b32db4b9536..888bf466d8c 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -64,14 +64,6 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 	{ NULL }
 };
 
-void kvmppc_core_load_host_debugstate(struct kvm_vcpu *vcpu)
-{
-}
-
-void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu)
-{
-}
-
 void kvmppc_unfixup_split_real(struct kvm_vcpu *vcpu)
 {
 	if (vcpu->arch.hflags & BOOK3S_HFLAG_SPLIT_HACK) {
diff --git a/arch/powerpc/kvm/book3s_32_mmu.c b/arch/powerpc/kvm/book3s_32_mmu.c
index cd0b0730e29..a2eb6d354a5 100644
--- a/arch/powerpc/kvm/book3s_32_mmu.c
+++ b/arch/powerpc/kvm/book3s_32_mmu.c
@@ -78,11 +78,6 @@ static inline bool sr_kp(u32 sr_raw)
 	return (sr_raw & 0x20000000) ? true: false;
 }
 
-static inline bool sr_nx(u32 sr_raw)
-{
-	return (sr_raw & 0x10000000) ? true: false;
-}
-
 static int kvmppc_mmu_book3s_32_xlate_bat(struct kvm_vcpu *vcpu, gva_t eaddr,
 					  struct kvmppc_pte *pte, bool data,
 					  bool iswrite);
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index d40770248b6..534acb3c6c3 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -37,8 +37,7 @@
 #include <asm/ppc-opcode.h>
 #include <asm/cputable.h>
 
-/* POWER7 has 10-bit LPIDs, PPC970 has 6-bit LPIDs */
-#define MAX_LPID_970	63
+#include "trace_hv.h"
 
 /* Power architecture requires HPT is at least 256kB */
 #define PPC_MIN_HPT_ORDER	18
@@ -229,14 +228,9 @@ int kvmppc_mmu_hv_init(void)
 	if (!cpu_has_feature(CPU_FTR_HVMODE))
 		return -EINVAL;
 
-	/* POWER7 has 10-bit LPIDs, PPC970 and e500mc have 6-bit LPIDs */
-	if (cpu_has_feature(CPU_FTR_ARCH_206)) {
-		host_lpid = mfspr(SPRN_LPID);	/* POWER7 */
-		rsvd_lpid = LPID_RSVD;
-	} else {
-		host_lpid = 0;			/* PPC970 */
-		rsvd_lpid = MAX_LPID_970;
-	}
+	/* POWER7 has 10-bit LPIDs (12-bit in POWER8) */
+	host_lpid = mfspr(SPRN_LPID);
+	rsvd_lpid = LPID_RSVD;
 
 	kvmppc_init_lpid(rsvd_lpid + 1);
 
@@ -259,130 +253,12 @@ static void kvmppc_mmu_book3s_64_hv_reset_msr(struct kvm_vcpu *vcpu)
 	kvmppc_set_msr(vcpu, msr);
 }
 
-/*
- * This is called to get a reference to a guest page if there isn't
- * one already in the memslot->arch.slot_phys[] array.
- */
-static long kvmppc_get_guest_page(struct kvm *kvm, unsigned long gfn,
-				  struct kvm_memory_slot *memslot,
-				  unsigned long psize)
-{
-	unsigned long start;
-	long np, err;
-	struct page *page, *hpage, *pages[1];
-	unsigned long s, pgsize;
-	unsigned long *physp;
-	unsigned int is_io, got, pgorder;
-	struct vm_area_struct *vma;
-	unsigned long pfn, i, npages;
-
-	physp = memslot->arch.slot_phys;
-	if (!physp)
-		return -EINVAL;
-	if (physp[gfn - memslot->base_gfn])
-		return 0;
-
-	is_io = 0;
-	got = 0;
-	page = NULL;
-	pgsize = psize;
-	err = -EINVAL;
-	start = gfn_to_hva_memslot(memslot, gfn);
-
-	/* Instantiate and get the page we want access to */
-	np = get_user_pages_fast(start, 1, 1, pages);
-	if (np != 1) {
-		/* Look up the vma for the page */
-		down_read(&current->mm->mmap_sem);
-		vma = find_vma(current->mm, start);
-		if (!vma || vma->vm_start > start ||
-		    start + psize > vma->vm_end ||
-		    !(vma->vm_flags & VM_PFNMAP))
-			goto up_err;
-		is_io = hpte_cache_bits(pgprot_val(vma->vm_page_prot));
-		pfn = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
-		/* check alignment of pfn vs. requested page size */
-		if (psize > PAGE_SIZE && (pfn & ((psize >> PAGE_SHIFT) - 1)))
-			goto up_err;
-		up_read(&current->mm->mmap_sem);
-
-	} else {
-		page = pages[0];
-		got = KVMPPC_GOT_PAGE;
-
-		/* See if this is a large page */
-		s = PAGE_SIZE;
-		if (PageHuge(page)) {
-			hpage = compound_head(page);
-			s <<= compound_order(hpage);
-			/* Get the whole large page if slot alignment is ok */
-			if (s > psize && slot_is_aligned(memslot, s) &&
-			    !(memslot->userspace_addr & (s - 1))) {
-				start &= ~(s - 1);
-				pgsize = s;
-				get_page(hpage);
-				put_page(page);
-				page = hpage;
-			}
-		}
-		if (s < psize)
-			goto out;
-		pfn = page_to_pfn(page);
-	}
-
-	npages = pgsize >> PAGE_SHIFT;
-	pgorder = __ilog2(npages);
-	physp += (gfn - memslot->base_gfn) & ~(npages - 1);
-	spin_lock(&kvm->arch.slot_phys_lock);
-	for (i = 0; i < npages; ++i) {
-		if (!physp[i]) {
-			physp[i] = ((pfn + i) << PAGE_SHIFT) +
-				got + is_io + pgorder;
-			got = 0;
-		}
-	}
-	spin_unlock(&kvm->arch.slot_phys_lock);
-	err = 0;
-
- out:
-	if (got)
-		put_page(page);
-	return err;
-
- up_err:
-	up_read(&current->mm->mmap_sem);
-	return err;
-}
-
 long kvmppc_virtmode_do_h_enter(struct kvm *kvm, unsigned long flags,
 				long pte_index, unsigned long pteh,
 				unsigned long ptel, unsigned long *pte_idx_ret)
 {
-	unsigned long psize, gpa, gfn;
-	struct kvm_memory_slot *memslot;
 	long ret;
 
-	if (kvm->arch.using_mmu_notifiers)
-		goto do_insert;
-
-	psize = hpte_page_size(pteh, ptel);
-	if (!psize)
-		return H_PARAMETER;
-
-	pteh &= ~(HPTE_V_HVLOCK | HPTE_V_ABSENT | HPTE_V_VALID);
-
-	/* Find the memslot (if any) for this address */
-	gpa = (ptel & HPTE_R_RPN) & ~(psize - 1);
-	gfn = gpa >> PAGE_SHIFT;
-	memslot = gfn_to_memslot(kvm, gfn);
-	if (memslot && !(memslot->flags & KVM_MEMSLOT_INVALID)) {
-		if (!slot_is_aligned(memslot, psize))
-			return H_PARAMETER;
-		if (kvmppc_get_guest_page(kvm, gfn, memslot, psize) < 0)
-			return H_PARAMETER;
-	}
-
- do_insert:
 	/* Protect linux PTE lookup from page table destruction */
 	rcu_read_lock_sched();	/* this disables preemption too */
 	ret = kvmppc_do_h_enter(kvm, flags, pte_index, pteh, ptel,
@@ -397,19 +273,6 @@ long kvmppc_virtmode_do_h_enter(struct kvm *kvm, unsigned long flags,
 
 }
 
-/*
- * We come here on a H_ENTER call from the guest when we are not
- * using mmu notifiers and we don't have the requested page pinned
- * already.
- */
-long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
-			     long pte_index, unsigned long pteh,
-			     unsigned long ptel)
-{
-	return kvmppc_virtmode_do_h_enter(vcpu->kvm, flags, pte_index,
-					  pteh, ptel, &vcpu->arch.gpr[4]);
-}
-
 static struct kvmppc_slb *kvmppc_mmu_book3s_hv_find_slbe(struct kvm_vcpu *vcpu,
 							 gva_t eaddr)
 {
@@ -494,7 +357,7 @@ static int kvmppc_mmu_book3s_64_hv_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
 	gpte->may_execute = gpte->may_read && !(gr & (HPTE_R_N | HPTE_R_G));
 
 	/* Storage key permission check for POWER7 */
-	if (data && virtmode && cpu_has_feature(CPU_FTR_ARCH_206)) {
+	if (data && virtmode) {
 		int amrfield = hpte_get_skey_perm(gr, vcpu->arch.amr);
 		if (amrfield & 1)
 			gpte->may_read = 0;
@@ -622,14 +485,13 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	gfn = gpa >> PAGE_SHIFT;
 	memslot = gfn_to_memslot(kvm, gfn);
 
+	trace_kvm_page_fault_enter(vcpu, hpte, memslot, ea, dsisr);
+
 	/* No memslot means it's an emulated MMIO region */
 	if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
 		return kvmppc_hv_emulate_mmio(run, vcpu, gpa, ea,
 					      dsisr & DSISR_ISSTORE);
 
-	if (!kvm->arch.using_mmu_notifiers)
-		return -EFAULT;		/* should never get here */
-
 	/*
 	 * This should never happen, because of the slot_is_aligned()
 	 * check in kvmppc_do_h_enter().
@@ -641,6 +503,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	mmu_seq = kvm->mmu_notifier_seq;
 	smp_rmb();
 
+	ret = -EFAULT;
 	is_io = 0;
 	pfn = 0;
 	page = NULL;
@@ -664,7 +527,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		}
 		up_read(&current->mm->mmap_sem);
 		if (!pfn)
-			return -EFAULT;
+			goto out_put;
 	} else {
 		page = pages[0];
 		pfn = page_to_pfn(page);
@@ -694,14 +557,14 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		}
 	}
 
-	ret = -EFAULT;
 	if (psize > pte_size)
 		goto out_put;
 
 	/* Check WIMG vs. the actual page we're accessing */
 	if (!hpte_cache_flags_ok(r, is_io)) {
 		if (is_io)
-			return -EFAULT;
+			goto out_put;
+
 		/*
 		 * Allow guest to map emulated device memory as
 		 * uncacheable, but actually make it cacheable.
@@ -765,6 +628,8 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		SetPageDirty(page);
 
  out_put:
+	trace_kvm_page_fault_exit(vcpu, hpte, ret);
+
 	if (page) {
 		/*
 		 * We drop pages[0] here, not page because page might
@@ -895,8 +760,7 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
 		psize = hpte_page_size(be64_to_cpu(hptep[0]), ptel);
 		if ((be64_to_cpu(hptep[0]) & HPTE_V_VALID) &&
 		    hpte_rpn(ptel, psize) == gfn) {
-			if (kvm->arch.using_mmu_notifiers)
-				hptep[0] |= cpu_to_be64(HPTE_V_ABSENT);
+			hptep[0] |= cpu_to_be64(HPTE_V_ABSENT);
 			kvmppc_invalidate_hpte(kvm, hptep, i);
 			/* Harvest R and C */
 			rcbits = be64_to_cpu(hptep[1]) & (HPTE_R_R | HPTE_R_C);
@@ -914,15 +778,13 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
 
 int kvm_unmap_hva_hv(struct kvm *kvm, unsigned long hva)
 {
-	if (kvm->arch.using_mmu_notifiers)
-		kvm_handle_hva(kvm, hva, kvm_unmap_rmapp);
+	kvm_handle_hva(kvm, hva, kvm_unmap_rmapp);
 	return 0;
 }
 
 int kvm_unmap_hva_range_hv(struct kvm *kvm, unsigned long start, unsigned long end)
 {
-	if (kvm->arch.using_mmu_notifiers)
-		kvm_handle_hva_range(kvm, start, end, kvm_unmap_rmapp);
+	kvm_handle_hva_range(kvm, start, end, kvm_unmap_rmapp);
 	return 0;
 }
 
@@ -1004,8 +866,6 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
 
 int kvm_age_hva_hv(struct kvm *kvm, unsigned long start, unsigned long end)
 {
-	if (!kvm->arch.using_mmu_notifiers)
-		return 0;
 	return kvm_handle_hva_range(kvm, start, end, kvm_age_rmapp);
 }
 
@@ -1042,15 +902,11 @@ static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
 
 int kvm_test_age_hva_hv(struct kvm *kvm, unsigned long hva)
 {
-	if (!kvm->arch.using_mmu_notifiers)
-		return 0;
 	return kvm_handle_hva(kvm, hva, kvm_test_age_rmapp);
 }
 
 void kvm_set_spte_hva_hv(struct kvm *kvm, unsigned long hva, pte_t pte)
 {
-	if (!kvm->arch.using_mmu_notifiers)
-		return;
 	kvm_handle_hva(kvm, hva, kvm_unmap_rmapp);
 }
 
@@ -1117,8 +973,11 @@ static int kvm_test_clear_dirty_npages(struct kvm *kvm, unsigned long *rmapp)
 		}
 
 		/* Now check and modify the HPTE */
-		if (!(hptep[0] & cpu_to_be64(HPTE_V_VALID)))
+		if (!(hptep[0] & cpu_to_be64(HPTE_V_VALID))) {
+			/* unlock and continue */
+			hptep[0] &= ~cpu_to_be64(HPTE_V_HVLOCK);
 			continue;
+		}
 
 		/* need to make it temporarily absent so C is stable */
 		hptep[0] |= cpu_to_be64(HPTE_V_ABSENT);
@@ -1206,35 +1065,17 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa,
 	struct page *page, *pages[1];
 	int npages;
 	unsigned long hva, offset;
-	unsigned long pa;
-	unsigned long *physp;
 	int srcu_idx;
 
 	srcu_idx = srcu_read_lock(&kvm->srcu);
 	memslot = gfn_to_memslot(kvm, gfn);
 	if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
 		goto err;
-	if (!kvm->arch.using_mmu_notifiers) {
-		physp = memslot->arch.slot_phys;
-		if (!physp)
-			goto err;
-		physp += gfn - memslot->base_gfn;
-		pa = *physp;
-		if (!pa) {
-			if (kvmppc_get_guest_page(kvm, gfn, memslot,
-						  PAGE_SIZE) < 0)
-				goto err;
-			pa = *physp;
-		}
-		page = pfn_to_page(pa >> PAGE_SHIFT);
-		get_page(page);
-	} else {
-		hva = gfn_to_hva_memslot(memslot, gfn);
-		npages = get_user_pages_fast(hva, 1, 1, pages);
-		if (npages < 1)
-			goto err;
-		page = pages[0];
-	}
+	hva = gfn_to_hva_memslot(memslot, gfn);
+	npages = get_user_pages_fast(hva, 1, 1, pages);
+	if (npages < 1)
+		goto err;
+	page = pages[0];
 	srcu_read_unlock(&kvm->srcu, srcu_idx);
 
 	offset = gpa & (PAGE_SIZE - 1);
@@ -1258,7 +1099,7 @@ void kvmppc_unpin_guest_page(struct kvm *kvm, void *va, unsigned long gpa,
 
 	put_page(page);
 
-	if (!dirty || !kvm->arch.using_mmu_notifiers)
+	if (!dirty)
 		return;
 
 	/* We need to mark this page dirty in the rmap chain */
@@ -1539,9 +1380,15 @@ static ssize_t kvm_htab_write(struct file *file, const char __user *buf,
 		hptp = (__be64 *)(kvm->arch.hpt_virt + (i * HPTE_SIZE));
 		lbuf = (unsigned long __user *)buf;
 		for (j = 0; j < hdr.n_valid; ++j) {
+			__be64 hpte_v;
+			__be64 hpte_r;
+
 			err = -EFAULT;
-			if (__get_user(v, lbuf) || __get_user(r, lbuf + 1))
+			if (__get_user(hpte_v, lbuf) ||
+			    __get_user(hpte_r, lbuf + 1))
 				goto out;
+			v = be64_to_cpu(hpte_v);
+			r = be64_to_cpu(hpte_r);
 			err = -EINVAL;
 			if (!(v & HPTE_V_VALID))
 				goto out;
@@ -1652,10 +1499,7 @@ void kvmppc_mmu_book3s_hv_init(struct kvm_vcpu *vcpu)
 {
 	struct kvmppc_mmu *mmu = &vcpu->arch.mmu;
 
-	if (cpu_has_feature(CPU_FTR_ARCH_206))
-		vcpu->arch.slb_nr = 32;		/* POWER7 */
-	else
-		vcpu->arch.slb_nr = 64;
+	vcpu->arch.slb_nr = 32;		/* POWER7/POWER8 */
 
 	mmu->xlate = kvmppc_mmu_book3s_64_hv_xlate;
 	mmu->reset_msr = kvmppc_mmu_book3s_64_hv_reset_msr;
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index e63587d30b7..de4018a1bc4 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -58,6 +58,9 @@
 
 #include "book3s.h"
 
+#define CREATE_TRACE_POINTS
+#include "trace_hv.h"
+
 /* #define EXIT_DEBUG */
 /* #define EXIT_DEBUG_SIMPLE */
 /* #define EXIT_DEBUG_INT */
@@ -135,11 +138,10 @@ static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu)
  * stolen.
  *
  * Updates to busy_stolen are protected by arch.tbacct_lock;
- * updates to vc->stolen_tb are protected by the arch.tbacct_lock
- * of the vcpu that has taken responsibility for running the vcore
- * (i.e. vc->runner).  The stolen times are measured in units of
- * timebase ticks.  (Note that the != TB_NIL checks below are
- * purely defensive; they should never fail.)
+ * updates to vc->stolen_tb are protected by the vcore->stoltb_lock
+ * lock.  The stolen times are measured in units of timebase ticks.
+ * (Note that the != TB_NIL checks below are purely defensive;
+ * they should never fail.)
  */
 
 static void kvmppc_core_vcpu_load_hv(struct kvm_vcpu *vcpu, int cpu)
@@ -147,12 +149,21 @@ static void kvmppc_core_vcpu_load_hv(struct kvm_vcpu *vcpu, int cpu)
 	struct kvmppc_vcore *vc = vcpu->arch.vcore;
 	unsigned long flags;
 
-	spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags);
-	if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE &&
-	    vc->preempt_tb != TB_NIL) {
-		vc->stolen_tb += mftb() - vc->preempt_tb;
-		vc->preempt_tb = TB_NIL;
+	/*
+	 * We can test vc->runner without taking the vcore lock,
+	 * because only this task ever sets vc->runner to this
+	 * vcpu, and once it is set to this vcpu, only this task
+	 * ever sets it to NULL.
+	 */
+	if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE) {
+		spin_lock_irqsave(&vc->stoltb_lock, flags);
+		if (vc->preempt_tb != TB_NIL) {
+			vc->stolen_tb += mftb() - vc->preempt_tb;
+			vc->preempt_tb = TB_NIL;
+		}
+		spin_unlock_irqrestore(&vc->stoltb_lock, flags);
 	}
+	spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags);
 	if (vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST &&
 	    vcpu->arch.busy_preempt != TB_NIL) {
 		vcpu->arch.busy_stolen += mftb() - vcpu->arch.busy_preempt;
@@ -166,9 +177,12 @@ static void kvmppc_core_vcpu_put_hv(struct kvm_vcpu *vcpu)
 	struct kvmppc_vcore *vc = vcpu->arch.vcore;
 	unsigned long flags;
 
-	spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags);
-	if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE)
+	if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE) {
+		spin_lock_irqsave(&vc->stoltb_lock, flags);
 		vc->preempt_tb = mftb();
+		spin_unlock_irqrestore(&vc->stoltb_lock, flags);
+	}
+	spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags);
 	if (vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST)
 		vcpu->arch.busy_preempt = mftb();
 	spin_unlock_irqrestore(&vcpu->arch.tbacct_lock, flags);
@@ -191,9 +205,6 @@ int kvmppc_set_arch_compat(struct kvm_vcpu *vcpu, u32 arch_compat)
 	struct kvmppc_vcore *vc = vcpu->arch.vcore;
 
 	if (arch_compat) {
-		if (!cpu_has_feature(CPU_FTR_ARCH_206))
-			return -EINVAL;	/* 970 has no compat mode support */
-
 		switch (arch_compat) {
 		case PVR_ARCH_205:
 			/*
@@ -505,25 +516,14 @@ static void kvmppc_update_vpas(struct kvm_vcpu *vcpu)
 static u64 vcore_stolen_time(struct kvmppc_vcore *vc, u64 now)
 {
 	u64 p;
+	unsigned long flags;
 
-	/*
-	 * If we are the task running the vcore, then since we hold
-	 * the vcore lock, we can't be preempted, so stolen_tb/preempt_tb
-	 * can't be updated, so we don't need the tbacct_lock.
-	 * If the vcore is inactive, it can't become active (since we
-	 * hold the vcore lock), so the vcpu load/put functions won't
-	 * update stolen_tb/preempt_tb, and we don't need tbacct_lock.
-	 */
+	spin_lock_irqsave(&vc->stoltb_lock, flags);
+	p = vc->stolen_tb;
 	if (vc->vcore_state != VCORE_INACTIVE &&
-	    vc->runner->arch.run_task != current) {
-		spin_lock_irq(&vc->runner->arch.tbacct_lock);
-		p = vc->stolen_tb;
-		if (vc->preempt_tb != TB_NIL)
-			p += now - vc->preempt_tb;
-		spin_unlock_irq(&vc->runner->arch.tbacct_lock);
-	} else {
-		p = vc->stolen_tb;
-	}
+	    vc->preempt_tb != TB_NIL)
+		p += now - vc->preempt_tb;
+	spin_unlock_irqrestore(&vc->stoltb_lock, flags);
 	return p;
 }
 
@@ -607,10 +607,45 @@ static int kvmppc_h_set_mode(struct kvm_vcpu *vcpu, unsigned long mflags,
 	}
 }
 
+static int kvm_arch_vcpu_yield_to(struct kvm_vcpu *target)
+{
+	struct kvmppc_vcore *vcore = target->arch.vcore;
+
+	/*
+	 * We expect to have been called by the real mode handler
+	 * (kvmppc_rm_h_confer()) which would have directly returned
+	 * H_SUCCESS if the source vcore wasn't idle (e.g. if it may
+	 * have useful work to do and should not confer) so we don't
+	 * recheck that here.
+	 */
+
+	spin_lock(&vcore->lock);
+	if (target->arch.state == KVMPPC_VCPU_RUNNABLE &&
+	    vcore->vcore_state != VCORE_INACTIVE)
+		target = vcore->runner;
+	spin_unlock(&vcore->lock);
+
+	return kvm_vcpu_yield_to(target);
+}
+
+static int kvmppc_get_yield_count(struct kvm_vcpu *vcpu)
+{
+	int yield_count = 0;
+	struct lppaca *lppaca;
+
+	spin_lock(&vcpu->arch.vpa_update_lock);
+	lppaca = (struct lppaca *)vcpu->arch.vpa.pinned_addr;
+	if (lppaca)
+		yield_count = lppaca->yield_count;
+	spin_unlock(&vcpu->arch.vpa_update_lock);
+	return yield_count;
+}
+
 int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
 {
 	unsigned long req = kvmppc_get_gpr(vcpu, 3);
 	unsigned long target, ret = H_SUCCESS;
+	int yield_count;
 	struct kvm_vcpu *tvcpu;
 	int idx, rc;
 
@@ -619,14 +654,6 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
 		return RESUME_HOST;
 
 	switch (req) {
-	case H_ENTER:
-		idx = srcu_read_lock(&vcpu->kvm->srcu);
-		ret = kvmppc_virtmode_h_enter(vcpu, kvmppc_get_gpr(vcpu, 4),
-					      kvmppc_get_gpr(vcpu, 5),
-					      kvmppc_get_gpr(vcpu, 6),
-					      kvmppc_get_gpr(vcpu, 7));
-		srcu_read_unlock(&vcpu->kvm->srcu, idx);
-		break;
 	case H_CEDE:
 		break;
 	case H_PROD:
@@ -654,7 +681,10 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
 			ret = H_PARAMETER;
 			break;
 		}
-		kvm_vcpu_yield_to(tvcpu);
+		yield_count = kvmppc_get_gpr(vcpu, 5);
+		if (kvmppc_get_yield_count(tvcpu) != yield_count)
+			break;
+		kvm_arch_vcpu_yield_to(tvcpu);
 		break;
 	case H_REGISTER_VPA:
 		ret = do_h_register_vpa(vcpu, kvmppc_get_gpr(vcpu, 4),
@@ -769,6 +799,8 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		vcpu->stat.ext_intr_exits++;
 		r = RESUME_GUEST;
 		break;
+	/* HMI is hypervisor interrupt and host has handled it. Resume guest.*/
+	case BOOK3S_INTERRUPT_HMI:
 	case BOOK3S_INTERRUPT_PERFMON:
 		r = RESUME_GUEST;
 		break;
@@ -837,6 +869,10 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	 * Accordingly return to Guest or Host.
 	 */
 	case BOOK3S_INTERRUPT_H_EMUL_ASSIST:
+		if (vcpu->arch.emul_inst != KVM_INST_FETCH_FAILED)
+			vcpu->arch.last_inst = kvmppc_need_byteswap(vcpu) ?
+				swab32(vcpu->arch.emul_inst) :
+				vcpu->arch.emul_inst;
 		if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) {
 			r = kvmppc_emulate_debug_inst(run, vcpu);
 		} else {
@@ -1357,6 +1393,7 @@ static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int core)
 
 	INIT_LIST_HEAD(&vcore->runnable_threads);
 	spin_lock_init(&vcore->lock);
+	spin_lock_init(&vcore->stoltb_lock);
 	init_waitqueue_head(&vcore->wq);
 	vcore->preempt_tb = TB_NIL;
 	vcore->lpcr = kvm->arch.lpcr;
@@ -1694,9 +1731,11 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc)
 	vc->n_woken = 0;
 	vc->nap_count = 0;
 	vc->entry_exit_count = 0;
+	vc->preempt_tb = TB_NIL;
 	vc->vcore_state = VCORE_STARTING;
 	vc->in_guest = 0;
 	vc->napping_threads = 0;
+	vc->conferring_threads = 0;
 
 	/*
 	 * Updating any of the vpas requires calling kvmppc_pin_guest_page,
@@ -1726,6 +1765,7 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc)
 	list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
 		kvmppc_start_thread(vcpu);
 		kvmppc_create_dtl_entry(vcpu, vc);
+		trace_kvm_guest_enter(vcpu);
 	}
 
 	/* Set this explicitly in case thread 0 doesn't have a vcpu */
@@ -1734,6 +1774,9 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc)
 
 	vc->vcore_state = VCORE_RUNNING;
 	preempt_disable();
+
+	trace_kvmppc_run_core(vc, 0);
+
 	spin_unlock(&vc->lock);
 
 	kvm_guest_enter();
@@ -1779,6 +1822,8 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc)
 		    kvmppc_core_pending_dec(vcpu))
 			kvmppc_core_dequeue_dec(vcpu);
 
+		trace_kvm_guest_exit(vcpu);
+
 		ret = RESUME_GUEST;
 		if (vcpu->arch.trap)
 			ret = kvmppc_handle_exit_hv(vcpu->arch.kvm_run, vcpu,
@@ -1804,6 +1849,8 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc)
 			wake_up(&vcpu->arch.cpu_run);
 		}
 	}
+
+	trace_kvmppc_run_core(vc, 1);
 }
 
 /*
@@ -1826,15 +1873,37 @@ static void kvmppc_wait_for_exec(struct kvm_vcpu *vcpu, int wait_state)
  */
 static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
 {
+	struct kvm_vcpu *vcpu;
+	int do_sleep = 1;
+
 	DEFINE_WAIT(wait);
 
 	prepare_to_wait(&vc->wq, &wait, TASK_INTERRUPTIBLE);
+
+	/*
+	 * Check one last time for pending exceptions and ceded state after
+	 * we put ourselves on the wait queue
+	 */
+	list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
+		if (vcpu->arch.pending_exceptions || !vcpu->arch.ceded) {
+			do_sleep = 0;
+			break;
+		}
+	}
+
+	if (!do_sleep) {
+		finish_wait(&vc->wq, &wait);
+		return;
+	}
+
 	vc->vcore_state = VCORE_SLEEPING;
+	trace_kvmppc_vcore_blocked(vc, 0);
 	spin_unlock(&vc->lock);
 	schedule();
 	finish_wait(&vc->wq, &wait);
 	spin_lock(&vc->lock);
 	vc->vcore_state = VCORE_INACTIVE;
+	trace_kvmppc_vcore_blocked(vc, 1);
 }
 
 static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
@@ -1843,6 +1912,8 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 	struct kvmppc_vcore *vc;
 	struct kvm_vcpu *v, *vn;
 
+	trace_kvmppc_run_vcpu_enter(vcpu);
+
 	kvm_run->exit_reason = 0;
 	vcpu->arch.ret = RESUME_GUEST;
 	vcpu->arch.trap = 0;
@@ -1872,6 +1943,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 		    VCORE_EXIT_COUNT(vc) == 0) {
 			kvmppc_create_dtl_entry(vcpu, vc);
 			kvmppc_start_thread(vcpu);
+			trace_kvm_guest_enter(vcpu);
 		} else if (vc->vcore_state == VCORE_SLEEPING) {
 			wake_up(&vc->wq);
 		}
@@ -1936,6 +2008,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 		wake_up(&v->arch.cpu_run);
 	}
 
+	trace_kvmppc_run_vcpu_exit(vcpu, kvm_run);
 	spin_unlock(&vc->lock);
 	return vcpu->arch.ret;
 }
@@ -1962,7 +2035,7 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
 	/* Order vcpus_running vs. rma_setup_done, see kvmppc_alloc_reset_hpt */
 	smp_mb();
 
-	/* On the first time here, set up HTAB and VRMA or RMA */
+	/* On the first time here, set up HTAB and VRMA */
 	if (!vcpu->kvm->arch.rma_setup_done) {
 		r = kvmppc_hv_setup_htab_rma(vcpu);
 		if (r)
@@ -1981,7 +2054,9 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
 
 		if (run->exit_reason == KVM_EXIT_PAPR_HCALL &&
 		    !(vcpu->arch.shregs.msr & MSR_PR)) {
+			trace_kvm_hcall_enter(vcpu);
 			r = kvmppc_pseries_do_hcall(vcpu);
+			trace_kvm_hcall_exit(vcpu, r);
 			kvmppc_core_prepare_to_enter(vcpu);
 		} else if (r == RESUME_PAGE_FAULT) {
 			srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
@@ -1997,98 +2072,6 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
 	return r;
 }
 
-
-/* Work out RMLS (real mode limit selector) field value for a given RMA size.
-   Assumes POWER7 or PPC970. */
-static inline int lpcr_rmls(unsigned long rma_size)
-{
-	switch (rma_size) {
-	case 32ul << 20:	/* 32 MB */
-		if (cpu_has_feature(CPU_FTR_ARCH_206))
-			return 8;	/* only supported on POWER7 */
-		return -1;
-	case 64ul << 20:	/* 64 MB */
-		return 3;
-	case 128ul << 20:	/* 128 MB */
-		return 7;
-	case 256ul << 20:	/* 256 MB */
-		return 4;
-	case 1ul << 30:		/* 1 GB */
-		return 2;
-	case 16ul << 30:	/* 16 GB */
-		return 1;
-	case 256ul << 30:	/* 256 GB */
-		return 0;
-	default:
-		return -1;
-	}
-}
-
-static int kvm_rma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
-{
-	struct page *page;
-	struct kvm_rma_info *ri = vma->vm_file->private_data;
-
-	if (vmf->pgoff >= kvm_rma_pages)
-		return VM_FAULT_SIGBUS;
-
-	page = pfn_to_page(ri->base_pfn + vmf->pgoff);
-	get_page(page);
-	vmf->page = page;
-	return 0;
-}
-
-static const struct vm_operations_struct kvm_rma_vm_ops = {
-	.fault = kvm_rma_fault,
-};
-
-static int kvm_rma_mmap(struct file *file, struct vm_area_struct *vma)
-{
-	vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
-	vma->vm_ops = &kvm_rma_vm_ops;
-	return 0;
-}
-
-static int kvm_rma_release(struct inode *inode, struct file *filp)
-{
-	struct kvm_rma_info *ri = filp->private_data;
-
-	kvm_release_rma(ri);
-	return 0;
-}
-
-static const struct file_operations kvm_rma_fops = {
-	.mmap           = kvm_rma_mmap,
-	.release	= kvm_rma_release,
-};
-
-static long kvm_vm_ioctl_allocate_rma(struct kvm *kvm,
-				      struct kvm_allocate_rma *ret)
-{
-	long fd;
-	struct kvm_rma_info *ri;
-	/*
-	 * Only do this on PPC970 in HV mode
-	 */
-	if (!cpu_has_feature(CPU_FTR_HVMODE) ||
-	    !cpu_has_feature(CPU_FTR_ARCH_201))
-		return -EINVAL;
-
-	if (!kvm_rma_pages)
-		return -EINVAL;
-
-	ri = kvm_alloc_rma();
-	if (!ri)
-		return -ENOMEM;
-
-	fd = anon_inode_getfd("kvm-rma", &kvm_rma_fops, ri, O_RDWR | O_CLOEXEC);
-	if (fd < 0)
-		kvm_release_rma(ri);
-
-	ret->rma_size = kvm_rma_pages << PAGE_SHIFT;
-	return fd;
-}
-
 static void kvmppc_add_seg_page_size(struct kvm_ppc_one_seg_page_size **sps,
 				     int linux_psize)
 {
@@ -2167,26 +2150,6 @@ out:
 	return r;
 }
 
-static void unpin_slot(struct kvm_memory_slot *memslot)
-{
-	unsigned long *physp;
-	unsigned long j, npages, pfn;
-	struct page *page;
-
-	physp = memslot->arch.slot_phys;
-	npages = memslot->npages;
-	if (!physp)
-		return;
-	for (j = 0; j < npages; j++) {
-		if (!(physp[j] & KVMPPC_GOT_PAGE))
-			continue;
-		pfn = physp[j] >> PAGE_SHIFT;
-		page = pfn_to_page(pfn);
-		SetPageDirty(page);
-		put_page(page);
-	}
-}
-
 static void kvmppc_core_free_memslot_hv(struct kvm_memory_slot *free,
 					struct kvm_memory_slot *dont)
 {
@@ -2194,11 +2157,6 @@ static void kvmppc_core_free_memslot_hv(struct kvm_memory_slot *free,
 		vfree(free->arch.rmap);
 		free->arch.rmap = NULL;
 	}
-	if (!dont || free->arch.slot_phys != dont->arch.slot_phys) {
-		unpin_slot(free);
-		vfree(free->arch.slot_phys);
-		free->arch.slot_phys = NULL;
-	}
 }
 
 static int kvmppc_core_create_memslot_hv(struct kvm_memory_slot *slot,
@@ -2207,7 +2165,6 @@ static int kvmppc_core_create_memslot_hv(struct kvm_memory_slot *slot,
 	slot->arch.rmap = vzalloc(npages * sizeof(*slot->arch.rmap));
 	if (!slot->arch.rmap)
 		return -ENOMEM;
-	slot->arch.slot_phys = NULL;
 
 	return 0;
 }
@@ -2216,17 +2173,6 @@ static int kvmppc_core_prepare_memory_region_hv(struct kvm *kvm,
 					struct kvm_memory_slot *memslot,
 					struct kvm_userspace_memory_region *mem)
 {
-	unsigned long *phys;
-
-	/* Allocate a slot_phys array if needed */
-	phys = memslot->arch.slot_phys;
-	if (!kvm->arch.using_mmu_notifiers && !phys && memslot->npages) {
-		phys = vzalloc(memslot->npages * sizeof(unsigned long));
-		if (!phys)
-			return -ENOMEM;
-		memslot->arch.slot_phys = phys;
-	}
-
 	return 0;
 }
 
@@ -2284,17 +2230,11 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
 {
 	int err = 0;
 	struct kvm *kvm = vcpu->kvm;
-	struct kvm_rma_info *ri = NULL;
 	unsigned long hva;
 	struct kvm_memory_slot *memslot;
 	struct vm_area_struct *vma;
 	unsigned long lpcr = 0, senc;
-	unsigned long lpcr_mask = 0;
 	unsigned long psize, porder;
-	unsigned long rma_size;
-	unsigned long rmls;
-	unsigned long *physp;
-	unsigned long i, npages;
 	int srcu_idx;
 
 	mutex_lock(&kvm->lock);
@@ -2329,88 +2269,25 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
 	psize = vma_kernel_pagesize(vma);
 	porder = __ilog2(psize);
 
-	/* Is this one of our preallocated RMAs? */
-	if (vma->vm_file && vma->vm_file->f_op == &kvm_rma_fops &&
-	    hva == vma->vm_start)
-		ri = vma->vm_file->private_data;
-
 	up_read(&current->mm->mmap_sem);
 
-	if (!ri) {
-		/* On POWER7, use VRMA; on PPC970, give up */
-		err = -EPERM;
-		if (cpu_has_feature(CPU_FTR_ARCH_201)) {
-			pr_err("KVM: CPU requires an RMO\n");
-			goto out_srcu;
-		}
+	/* We can handle 4k, 64k or 16M pages in the VRMA */
+	err = -EINVAL;
+	if (!(psize == 0x1000 || psize == 0x10000 ||
+	      psize == 0x1000000))
+		goto out_srcu;
 
-		/* We can handle 4k, 64k or 16M pages in the VRMA */
-		err = -EINVAL;
-		if (!(psize == 0x1000 || psize == 0x10000 ||
-		      psize == 0x1000000))
-			goto out_srcu;
+	/* Update VRMASD field in the LPCR */
+	senc = slb_pgsize_encoding(psize);
+	kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T |
+		(VRMA_VSID << SLB_VSID_SHIFT_1T);
+	/* the -4 is to account for senc values starting at 0x10 */
+	lpcr = senc << (LPCR_VRMASD_SH - 4);
 
-		/* Update VRMASD field in the LPCR */
-		senc = slb_pgsize_encoding(psize);
-		kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T |
-			(VRMA_VSID << SLB_VSID_SHIFT_1T);
-		lpcr_mask = LPCR_VRMASD;
-		/* the -4 is to account for senc values starting at 0x10 */
-		lpcr = senc << (LPCR_VRMASD_SH - 4);
+	/* Create HPTEs in the hash page table for the VRMA */
+	kvmppc_map_vrma(vcpu, memslot, porder);
 
-		/* Create HPTEs in the hash page table for the VRMA */
-		kvmppc_map_vrma(vcpu, memslot, porder);
-
-	} else {
-		/* Set up to use an RMO region */
-		rma_size = kvm_rma_pages;
-		if (rma_size > memslot->npages)
-			rma_size = memslot->npages;
-		rma_size <<= PAGE_SHIFT;
-		rmls = lpcr_rmls(rma_size);
-		err = -EINVAL;
-		if ((long)rmls < 0) {
-			pr_err("KVM: Can't use RMA of 0x%lx bytes\n", rma_size);
-			goto out_srcu;
-		}
-		atomic_inc(&ri->use_count);
-		kvm->arch.rma = ri;
-
-		/* Update LPCR and RMOR */
-		if (cpu_has_feature(CPU_FTR_ARCH_201)) {
-			/* PPC970; insert RMLS value (split field) in HID4 */
-			lpcr_mask = (1ul << HID4_RMLS0_SH) |
-				(3ul << HID4_RMLS2_SH) | HID4_RMOR;
-			lpcr = ((rmls >> 2) << HID4_RMLS0_SH) |
-				((rmls & 3) << HID4_RMLS2_SH);
-			/* RMOR is also in HID4 */
-			lpcr |= ((ri->base_pfn >> (26 - PAGE_SHIFT)) & 0xffff)
-				<< HID4_RMOR_SH;
-		} else {
-			/* POWER7 */
-			lpcr_mask = LPCR_VPM0 | LPCR_VRMA_L | LPCR_RMLS;
-			lpcr = rmls << LPCR_RMLS_SH;
-			kvm->arch.rmor = ri->base_pfn << PAGE_SHIFT;
-		}
-		pr_info("KVM: Using RMO at %lx size %lx (LPCR = %lx)\n",
-			ri->base_pfn << PAGE_SHIFT, rma_size, lpcr);
-
-		/* Initialize phys addrs of pages in RMO */
-		npages = kvm_rma_pages;
-		porder = __ilog2(npages);
-		physp = memslot->arch.slot_phys;
-		if (physp) {
-			if (npages > memslot->npages)
-				npages = memslot->npages;
-			spin_lock(&kvm->arch.slot_phys_lock);
-			for (i = 0; i < npages; ++i)
-				physp[i] = ((ri->base_pfn + i) << PAGE_SHIFT) +
-					porder;
-			spin_unlock(&kvm->arch.slot_phys_lock);
-		}
-	}
-
-	kvmppc_update_lpcr(kvm, lpcr, lpcr_mask);
+	kvmppc_update_lpcr(kvm, lpcr, LPCR_VRMASD);
 
 	/* Order updates to kvm->arch.lpcr etc. vs. rma_setup_done */
 	smp_wmb();
@@ -2449,35 +2326,21 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm)
 	memcpy(kvm->arch.enabled_hcalls, default_enabled_hcalls,
 	       sizeof(kvm->arch.enabled_hcalls));
 
-	kvm->arch.rma = NULL;
-
 	kvm->arch.host_sdr1 = mfspr(SPRN_SDR1);
 
-	if (cpu_has_feature(CPU_FTR_ARCH_201)) {
-		/* PPC970; HID4 is effectively the LPCR */
-		kvm->arch.host_lpid = 0;
-		kvm->arch.host_lpcr = lpcr = mfspr(SPRN_HID4);
-		lpcr &= ~((3 << HID4_LPID1_SH) | (0xful << HID4_LPID5_SH));
-		lpcr |= ((lpid >> 4) << HID4_LPID1_SH) |
-			((lpid & 0xf) << HID4_LPID5_SH);
-	} else {
-		/* POWER7; init LPCR for virtual RMA mode */
-		kvm->arch.host_lpid = mfspr(SPRN_LPID);
-		kvm->arch.host_lpcr = lpcr = mfspr(SPRN_LPCR);
-		lpcr &= LPCR_PECE | LPCR_LPES;
-		lpcr |= (4UL << LPCR_DPFD_SH) | LPCR_HDICE |
-			LPCR_VPM0 | LPCR_VPM1;
-		kvm->arch.vrma_slb_v = SLB_VSID_B_1T |
-			(VRMA_VSID << SLB_VSID_SHIFT_1T);
-		/* On POWER8 turn on online bit to enable PURR/SPURR */
-		if (cpu_has_feature(CPU_FTR_ARCH_207S))
-			lpcr |= LPCR_ONL;
-	}
+	/* Init LPCR for virtual RMA mode */
+	kvm->arch.host_lpid = mfspr(SPRN_LPID);
+	kvm->arch.host_lpcr = lpcr = mfspr(SPRN_LPCR);
+	lpcr &= LPCR_PECE | LPCR_LPES;
+	lpcr |= (4UL << LPCR_DPFD_SH) | LPCR_HDICE |
+		LPCR_VPM0 | LPCR_VPM1;
+	kvm->arch.vrma_slb_v = SLB_VSID_B_1T |
+		(VRMA_VSID << SLB_VSID_SHIFT_1T);
+	/* On POWER8 turn on online bit to enable PURR/SPURR */
+	if (cpu_has_feature(CPU_FTR_ARCH_207S))
+		lpcr |= LPCR_ONL;
 	kvm->arch.lpcr = lpcr;
 
-	kvm->arch.using_mmu_notifiers = !!cpu_has_feature(CPU_FTR_ARCH_206);
-	spin_lock_init(&kvm->arch.slot_phys_lock);
-
 	/*
 	 * Track that we now have a HV mode VM active. This blocks secondary
 	 * CPU threads from coming online.
@@ -2507,10 +2370,6 @@ static void kvmppc_core_destroy_vm_hv(struct kvm *kvm)
 	kvm_hv_vm_deactivated();
 
 	kvmppc_free_vcores(kvm);
-	if (kvm->arch.rma) {
-		kvm_release_rma(kvm->arch.rma);
-		kvm->arch.rma = NULL;
-	}
 
 	kvmppc_free_hpt(kvm);
 }
@@ -2536,7 +2395,8 @@ static int kvmppc_core_emulate_mfspr_hv(struct kvm_vcpu *vcpu, int sprn,
 
 static int kvmppc_core_check_processor_compat_hv(void)
 {
-	if (!cpu_has_feature(CPU_FTR_HVMODE))
+	if (!cpu_has_feature(CPU_FTR_HVMODE) ||
+	    !cpu_has_feature(CPU_FTR_ARCH_206))
 		return -EIO;
 	return 0;
 }
@@ -2550,16 +2410,6 @@ static long kvm_arch_vm_ioctl_hv(struct file *filp,
 
 	switch (ioctl) {
 
-	case KVM_ALLOCATE_RMA: {
-		struct kvm_allocate_rma rma;
-		struct kvm *kvm = filp->private_data;
-
-		r = kvm_vm_ioctl_allocate_rma(kvm, &rma);
-		if (r >= 0 && copy_to_user(argp, &rma, sizeof(rma)))
-			r = -EFAULT;
-		break;
-	}
-
 	case KVM_PPC_ALLOCATE_HTAB: {
 		u32 htab_order;
 
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index 3f1bb5a36c2..1f083ff8a61 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -16,6 +16,7 @@
 #include <linux/memblock.h>
 #include <linux/sizes.h>
 #include <linux/cma.h>
+#include <linux/bitops.h>
 
 #include <asm/cputable.h>
 #include <asm/kvm_ppc.h>
@@ -32,95 +33,9 @@
  * By default we reserve 5% of memory for hash pagetable allocation.
  */
 static unsigned long kvm_cma_resv_ratio = 5;
-/*
- * We allocate RMAs (real mode areas) for KVM guests from the KVM CMA area.
- * Each RMA has to be physically contiguous and of a size that the
- * hardware supports.  PPC970 and POWER7 support 64MB, 128MB and 256MB,
- * and other larger sizes.  Since we are unlikely to be allocate that
- * much physically contiguous memory after the system is up and running,
- * we preallocate a set of RMAs in early boot using CMA.
- * should be power of 2.
- */
-unsigned long kvm_rma_pages = (1 << 27) >> PAGE_SHIFT;	/* 128MB */
-EXPORT_SYMBOL_GPL(kvm_rma_pages);
 
 static struct cma *kvm_cma;
 
-/* Work out RMLS (real mode limit selector) field value for a given RMA size.
-   Assumes POWER7 or PPC970. */
-static inline int lpcr_rmls(unsigned long rma_size)
-{
-	switch (rma_size) {
-	case 32ul << 20:	/* 32 MB */
-		if (cpu_has_feature(CPU_FTR_ARCH_206))
-			return 8;	/* only supported on POWER7 */
-		return -1;
-	case 64ul << 20:	/* 64 MB */
-		return 3;
-	case 128ul << 20:	/* 128 MB */
-		return 7;
-	case 256ul << 20:	/* 256 MB */
-		return 4;
-	case 1ul << 30:		/* 1 GB */
-		return 2;
-	case 16ul << 30:	/* 16 GB */
-		return 1;
-	case 256ul << 30:	/* 256 GB */
-		return 0;
-	default:
-		return -1;
-	}
-}
-
-static int __init early_parse_rma_size(char *p)
-{
-	unsigned long kvm_rma_size;
-
-	pr_debug("%s(%s)\n", __func__, p);
-	if (!p)
-		return -EINVAL;
-	kvm_rma_size = memparse(p, &p);
-	/*
-	 * Check that the requested size is one supported in hardware
-	 */
-	if (lpcr_rmls(kvm_rma_size) < 0) {
-		pr_err("RMA size of 0x%lx not supported\n", kvm_rma_size);
-		return -EINVAL;
-	}
-	kvm_rma_pages = kvm_rma_size >> PAGE_SHIFT;
-	return 0;
-}
-early_param("kvm_rma_size", early_parse_rma_size);
-
-struct kvm_rma_info *kvm_alloc_rma()
-{
-	struct page *page;
-	struct kvm_rma_info *ri;
-
-	ri = kmalloc(sizeof(struct kvm_rma_info), GFP_KERNEL);
-	if (!ri)
-		return NULL;
-	page = cma_alloc(kvm_cma, kvm_rma_pages, order_base_2(kvm_rma_pages));
-	if (!page)
-		goto err_out;
-	atomic_set(&ri->use_count, 1);
-	ri->base_pfn = page_to_pfn(page);
-	return ri;
-err_out:
-	kfree(ri);
-	return NULL;
-}
-EXPORT_SYMBOL_GPL(kvm_alloc_rma);
-
-void kvm_release_rma(struct kvm_rma_info *ri)
-{
-	if (atomic_dec_and_test(&ri->use_count)) {
-		cma_release(kvm_cma, pfn_to_page(ri->base_pfn), kvm_rma_pages);
-		kfree(ri);
-	}
-}
-EXPORT_SYMBOL_GPL(kvm_release_rma);
-
 static int __init early_parse_kvm_cma_resv(char *p)
 {
 	pr_debug("%s(%s)\n", __func__, p);
@@ -132,14 +47,9 @@ early_param("kvm_cma_resv_ratio", early_parse_kvm_cma_resv);
 
 struct page *kvm_alloc_hpt(unsigned long nr_pages)
 {
-	unsigned long align_pages = HPT_ALIGN_PAGES;
-
 	VM_BUG_ON(order_base_2(nr_pages) < KVM_CMA_CHUNK_ORDER - PAGE_SHIFT);
 
-	/* Old CPUs require HPT aligned on a multiple of its size */
-	if (!cpu_has_feature(CPU_FTR_ARCH_206))
-		align_pages = nr_pages;
-	return cma_alloc(kvm_cma, nr_pages, order_base_2(align_pages));
+	return cma_alloc(kvm_cma, nr_pages, order_base_2(HPT_ALIGN_PAGES));
 }
 EXPORT_SYMBOL_GPL(kvm_alloc_hpt);
 
@@ -180,22 +90,44 @@ void __init kvm_cma_reserve(void)
 	if (selected_size) {
 		pr_debug("%s: reserving %ld MiB for global area\n", __func__,
 			 (unsigned long)selected_size / SZ_1M);
-		/*
-		 * Old CPUs require HPT aligned on a multiple of its size. So for them
-		 * make the alignment as max size we could request.
-		 */
-		if (!cpu_has_feature(CPU_FTR_ARCH_206))
-			align_size = __rounddown_pow_of_two(selected_size);
-		else
-			align_size = HPT_ALIGN_PAGES << PAGE_SHIFT;
-
-		align_size = max(kvm_rma_pages << PAGE_SHIFT, align_size);
+		align_size = HPT_ALIGN_PAGES << PAGE_SHIFT;
 		cma_declare_contiguous(0, selected_size, 0, align_size,
 			KVM_CMA_CHUNK_ORDER - PAGE_SHIFT, false, &kvm_cma);
 	}
 }
 
 /*
+ * Real-mode H_CONFER implementation.
+ * We check if we are the only vcpu out of this virtual core
+ * still running in the guest and not ceded.  If so, we pop up
+ * to the virtual-mode implementation; if not, just return to
+ * the guest.
+ */
+long int kvmppc_rm_h_confer(struct kvm_vcpu *vcpu, int target,
+			    unsigned int yield_count)
+{
+	struct kvmppc_vcore *vc = vcpu->arch.vcore;
+	int threads_running;
+	int threads_ceded;
+	int threads_conferring;
+	u64 stop = get_tb() + 10 * tb_ticks_per_usec;
+	int rv = H_SUCCESS; /* => don't yield */
+
+	set_bit(vcpu->arch.ptid, &vc->conferring_threads);
+	while ((get_tb() < stop) && (VCORE_EXIT_COUNT(vc) == 0)) {
+		threads_running = VCORE_ENTRY_COUNT(vc);
+		threads_ceded = hweight32(vc->napping_threads);
+		threads_conferring = hweight32(vc->conferring_threads);
+		if (threads_ceded + threads_conferring >= threads_running) {
+			rv = H_TOO_HARD; /* => do yield */
+			break;
+		}
+	}
+	clear_bit(vcpu->arch.ptid, &vc->conferring_threads);
+	return rv;
+}
+
+/*
  * When running HV mode KVM we need to block certain operations while KVM VMs
  * exist in the system. We use a counter of VMs to track this.
  *
diff --git a/arch/powerpc/kvm/book3s_hv_interrupts.S b/arch/powerpc/kvm/book3s_hv_interrupts.S
index 731be7478b2..36540a99d17 100644
--- a/arch/powerpc/kvm/book3s_hv_interrupts.S
+++ b/arch/powerpc/kvm/book3s_hv_interrupts.S
@@ -52,10 +52,8 @@ _GLOBAL(__kvmppc_vcore_entry)
 	std	r3, _CCR(r1)
 
 	/* Save host DSCR */
-BEGIN_FTR_SECTION
 	mfspr	r3, SPRN_DSCR
 	std	r3, HSTATE_DSCR(r13)
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
 
 BEGIN_FTR_SECTION
 	/* Save host DABR */
@@ -84,11 +82,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 	mfspr	r7, SPRN_MMCR0		/* save MMCR0 */
 	mtspr	SPRN_MMCR0, r3		/* freeze all counters, disable interrupts */
 	mfspr	r6, SPRN_MMCRA
-BEGIN_FTR_SECTION
-	/* On P7, clear MMCRA in order to disable SDAR updates */
+	/* Clear MMCRA in order to disable SDAR updates */
 	li	r5, 0
 	mtspr	SPRN_MMCRA, r5
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
 	isync
 	ld	r3, PACALPPACAPTR(r13)	/* is the host using the PMU? */
 	lbz	r5, LPPACA_PMCINUSE(r3)
@@ -113,20 +109,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 	mfspr	r7, SPRN_PMC4
 	mfspr	r8, SPRN_PMC5
 	mfspr	r9, SPRN_PMC6
-BEGIN_FTR_SECTION
-	mfspr	r10, SPRN_PMC7
-	mfspr	r11, SPRN_PMC8
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
 	stw	r3, HSTATE_PMC(r13)
 	stw	r5, HSTATE_PMC + 4(r13)
 	stw	r6, HSTATE_PMC + 8(r13)
 	stw	r7, HSTATE_PMC + 12(r13)
 	stw	r8, HSTATE_PMC + 16(r13)
 	stw	r9, HSTATE_PMC + 20(r13)
-BEGIN_FTR_SECTION
-	stw	r10, HSTATE_PMC + 24(r13)
-	stw	r11, HSTATE_PMC + 28(r13)
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
 31:
 
 	/*
@@ -140,31 +128,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
 	add	r8,r8,r7
 	std	r8,HSTATE_DECEXP(r13)
 
-#ifdef CONFIG_SMP
-	/*
-	 * On PPC970, if the guest vcpu has an external interrupt pending,
-	 * send ourselves an IPI so as to interrupt the guest once it
-	 * enables interrupts.  (It must have interrupts disabled,
-	 * otherwise we would already have delivered the interrupt.)
-	 *
-	 * XXX If this is a UP build, smp_send_reschedule is not available,
-	 * so the interrupt will be delayed until the next time the vcpu
-	 * enters the guest with interrupts enabled.
-	 */
-BEGIN_FTR_SECTION
-	ld	r4, HSTATE_KVM_VCPU(r13)
-	ld	r0, VCPU_PENDING_EXC(r4)
-	li	r7, (1 << BOOK3S_IRQPRIO_EXTERNAL)
-	oris	r7, r7, (1 << BOOK3S_IRQPRIO_EXTERNAL_LEVEL)@h
-	and.	r0, r0, r7
-	beq	32f
-	lhz	r3, PACAPACAINDEX(r13)
-	bl	smp_send_reschedule
-	nop
-32:
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
-#endif /* CONFIG_SMP */
-
 	/* Jump to partition switch code */
 	bl	kvmppc_hv_entry_trampoline
 	nop
diff --git a/arch/powerpc/kvm/book3s_hv_ras.c b/arch/powerpc/kvm/book3s_hv_ras.c
index d562c8e2bc3..60081bd7584 100644
--- a/arch/powerpc/kvm/book3s_hv_ras.c
+++ b/arch/powerpc/kvm/book3s_hv_ras.c
@@ -138,8 +138,5 @@ out:
 
 long kvmppc_realmode_machine_check(struct kvm_vcpu *vcpu)
 {
-	if (cpu_has_feature(CPU_FTR_ARCH_206))
-		return kvmppc_realmode_mc_power7(vcpu);
-
-	return 0;
+	return kvmppc_realmode_mc_power7(vcpu);
 }
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index 084ad54c73c..510bdfbc407 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -45,16 +45,12 @@ static int global_invalidates(struct kvm *kvm, unsigned long flags)
 	 * as indicated by local_paca->kvm_hstate.kvm_vcpu being set,
 	 * we can use tlbiel as long as we mark all other physical
 	 * cores as potentially having stale TLB entries for this lpid.
-	 * If we're not using MMU notifiers, we never take pages away
-	 * from the guest, so we can use tlbiel if requested.
 	 * Otherwise, don't use tlbiel.
 	 */
 	if (kvm->arch.online_vcores == 1 && local_paca->kvm_hstate.kvm_vcpu)
 		global = 0;
-	else if (kvm->arch.using_mmu_notifiers)
-		global = 1;
 	else
-		global = !(flags & H_LOCAL);
+		global = 1;
 
 	if (!global) {
 		/* any other core might now have stale TLB entries... */
@@ -170,7 +166,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
 	struct revmap_entry *rev;
 	unsigned long g_ptel;
 	struct kvm_memory_slot *memslot;
-	unsigned long *physp, pte_size;
+	unsigned long pte_size;
 	unsigned long is_io;
 	unsigned long *rmap;
 	pte_t pte;
@@ -198,9 +194,6 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
 	is_io = ~0ul;
 	rmap = NULL;
 	if (!(memslot && !(memslot->flags & KVM_MEMSLOT_INVALID))) {
-		/* PPC970 can't do emulated MMIO */
-		if (!cpu_has_feature(CPU_FTR_ARCH_206))
-			return H_PARAMETER;
 		/* Emulated MMIO - mark this with key=31 */
 		pteh |= HPTE_V_ABSENT;
 		ptel |= HPTE_R_KEY_HI | HPTE_R_KEY_LO;
@@ -213,37 +206,20 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
 	slot_fn = gfn - memslot->base_gfn;
 	rmap = &memslot->arch.rmap[slot_fn];
 
-	if (!kvm->arch.using_mmu_notifiers) {
-		physp = memslot->arch.slot_phys;
-		if (!physp)
-			return H_PARAMETER;
-		physp += slot_fn;
-		if (realmode)
-			physp = real_vmalloc_addr(physp);
-		pa = *physp;
-		if (!pa)
-			return H_TOO_HARD;
-		is_io = pa & (HPTE_R_I | HPTE_R_W);
-		pte_size = PAGE_SIZE << (pa & KVMPPC_PAGE_ORDER_MASK);
-		pa &= PAGE_MASK;
+	/* Translate to host virtual address */
+	hva = __gfn_to_hva_memslot(memslot, gfn);
+
+	/* Look up the Linux PTE for the backing page */
+	pte_size = psize;
+	pte = lookup_linux_pte_and_update(pgdir, hva, writing, &pte_size);
+	if (pte_present(pte) && !pte_numa(pte)) {
+		if (writing && !pte_write(pte))
+			/* make the actual HPTE be read-only */
+			ptel = hpte_make_readonly(ptel);
+		is_io = hpte_cache_bits(pte_val(pte));
+		pa = pte_pfn(pte) << PAGE_SHIFT;
+		pa |= hva & (pte_size - 1);
 		pa |= gpa & ~PAGE_MASK;
-	} else {
-		/* Translate to host virtual address */
-		hva = __gfn_to_hva_memslot(memslot, gfn);
-
-		/* Look up the Linux PTE for the backing page */
-		pte_size = psize;
-		pte = lookup_linux_pte_and_update(pgdir, hva, writing,
-						  &pte_size);
-		if (pte_present(pte) && !pte_numa(pte)) {
-			if (writing && !pte_write(pte))
-				/* make the actual HPTE be read-only */
-				ptel = hpte_make_readonly(ptel);
-			is_io = hpte_cache_bits(pte_val(pte));
-			pa = pte_pfn(pte) << PAGE_SHIFT;
-			pa |= hva & (pte_size - 1);
-			pa |= gpa & ~PAGE_MASK;
-		}
 	}
 
 	if (pte_size < psize)
@@ -337,8 +313,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
 			rmap = real_vmalloc_addr(rmap);
 		lock_rmap(rmap);
 		/* Check for pending invalidations under the rmap chain lock */
-		if (kvm->arch.using_mmu_notifiers &&
-		    mmu_notifier_retry(kvm, mmu_seq)) {
+		if (mmu_notifier_retry(kvm, mmu_seq)) {
 			/* inval in progress, write a non-present HPTE */
 			pteh |= HPTE_V_ABSENT;
 			pteh &= ~HPTE_V_VALID;
@@ -395,61 +370,11 @@ static inline int try_lock_tlbie(unsigned int *lock)
 	return old == 0;
 }
 
-/*
- * tlbie/tlbiel is a bit different on the PPC970 compared to later
- * processors such as POWER7; the large page bit is in the instruction
- * not RB, and the top 16 bits and the bottom 12 bits of the VA
- * in RB must be 0.
- */
-static void do_tlbies_970(struct kvm *kvm, unsigned long *rbvalues,
-			  long npages, int global, bool need_sync)
-{
-	long i;
-
-	if (global) {
-		while (!try_lock_tlbie(&kvm->arch.tlbie_lock))
-			cpu_relax();
-		if (need_sync)
-			asm volatile("ptesync" : : : "memory");
-		for (i = 0; i < npages; ++i) {
-			unsigned long rb = rbvalues[i];
-
-			if (rb & 1)		/* large page */
-				asm volatile("tlbie %0,1" : :
-					     "r" (rb & 0x0000fffffffff000ul));
-			else
-				asm volatile("tlbie %0,0" : :
-					     "r" (rb & 0x0000fffffffff000ul));
-		}
-		asm volatile("eieio; tlbsync; ptesync" : : : "memory");
-		kvm->arch.tlbie_lock = 0;
-	} else {
-		if (need_sync)
-			asm volatile("ptesync" : : : "memory");
-		for (i = 0; i < npages; ++i) {
-			unsigned long rb = rbvalues[i];
-
-			if (rb & 1)		/* large page */
-				asm volatile("tlbiel %0,1" : :
-					     "r" (rb & 0x0000fffffffff000ul));
-			else
-				asm volatile("tlbiel %0,0" : :
-					     "r" (rb & 0x0000fffffffff000ul));
-		}
-		asm volatile("ptesync" : : : "memory");
-	}
-}
-
 static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues,
 		      long npages, int global, bool need_sync)
 {
 	long i;
 
-	if (cpu_has_feature(CPU_FTR_ARCH_201)) {
-		/* PPC970 tlbie instruction is a bit different */
-		do_tlbies_970(kvm, rbvalues, npages, global, need_sync);
-		return;
-	}
 	if (global) {
 		while (!try_lock_tlbie(&kvm->arch.tlbie_lock))
 			cpu_relax();
@@ -667,40 +592,29 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
 		rev->guest_rpte = r;
 		note_hpte_modification(kvm, rev);
 	}
-	r = (be64_to_cpu(hpte[1]) & ~mask) | bits;
 
 	/* Update HPTE */
 	if (v & HPTE_V_VALID) {
-		rb = compute_tlbie_rb(v, r, pte_index);
-		hpte[0] = cpu_to_be64(v & ~HPTE_V_VALID);
-		do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags), true);
 		/*
-		 * If the host has this page as readonly but the guest
-		 * wants to make it read/write, reduce the permissions.
-		 * Checking the host permissions involves finding the
-		 * memslot and then the Linux PTE for the page.
+		 * If the page is valid, don't let it transition from
+		 * readonly to writable.  If it should be writable, we'll
+		 * take a trap and let the page fault code sort it out.
 		 */
-		if (hpte_is_writable(r) && kvm->arch.using_mmu_notifiers) {
-			unsigned long psize, gfn, hva;
-			struct kvm_memory_slot *memslot;
-			pgd_t *pgdir = vcpu->arch.pgdir;
-			pte_t pte;
-
-			psize = hpte_page_size(v, r);
-			gfn = ((r & HPTE_R_RPN) & ~(psize - 1)) >> PAGE_SHIFT;
-			memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn);
-			if (memslot) {
-				hva = __gfn_to_hva_memslot(memslot, gfn);
-				pte = lookup_linux_pte_and_update(pgdir, hva,
-								  1, &psize);
-				if (pte_present(pte) && !pte_write(pte))
-					r = hpte_make_readonly(r);
-			}
+		pte = be64_to_cpu(hpte[1]);
+		r = (pte & ~mask) | bits;
+		if (hpte_is_writable(r) && !hpte_is_writable(pte))
+			r = hpte_make_readonly(r);
+		/* If the PTE is changing, invalidate it first */
+		if (r != pte) {
+			rb = compute_tlbie_rb(v, r, pte_index);
+			hpte[0] = cpu_to_be64((v & ~HPTE_V_VALID) |
+					      HPTE_V_ABSENT);
+			do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags),
+				  true);
+			hpte[1] = cpu_to_be64(r);
 		}
 	}
-	hpte[1] = cpu_to_be64(r);
-	eieio();
-	hpte[0] = cpu_to_be64(v & ~HPTE_V_HVLOCK);
+	unlock_hpte(hpte, v & ~HPTE_V_HVLOCK);
 	asm volatile("ptesync" : : : "memory");
 	return H_SUCCESS;
 }
diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c
index 3ee38e6e884..7b066f6b02a 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_xics.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c
@@ -183,8 +183,10 @@ static void icp_rm_down_cppr(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
 	 * state update in HW (ie bus transactions) so we can handle them
 	 * separately here as well.
 	 */
-	if (resend)
+	if (resend) {
 		icp->rm_action |= XICS_RM_CHECK_RESEND;
+		icp->rm_resend_icp = icp;
+	}
 }
 
 
@@ -254,10 +256,25 @@ int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
 	 * nothing needs to be done as there can be no XISR to
 	 * reject.
 	 *
+	 * ICP state: Check_IPI
+	 *
 	 * If the CPPR is less favored, then we might be replacing
-	 * an interrupt, and thus need to possibly reject it as in
+	 * an interrupt, and thus need to possibly reject it.
 	 *
-	 * ICP state: Check_IPI
+	 * ICP State: IPI
+	 *
+	 * Besides rejecting any pending interrupts, we also
+	 * update XISR and pending_pri to mark IPI as pending.
+	 *
+	 * PAPR does not describe this state, but if the MFRR is being
+	 * made less favored than its earlier value, there might be
+	 * a previously-rejected interrupt needing to be resent.
+	 * Ideally, we would want to resend only if
+	 *	prio(pending_interrupt) < mfrr &&
+	 *	prio(pending_interrupt) < cppr
+	 * where pending interrupt is the one that was rejected. But
+	 * we don't have that state, so we simply trigger a resend
+	 * whenever the MFRR is made less favored.
 	 */
 	do {
 		old_state = new_state = ACCESS_ONCE(icp->state);
@@ -270,13 +287,14 @@ int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
 		resend = false;
 		if (mfrr < new_state.cppr) {
 			/* Reject a pending interrupt if not an IPI */
-			if (mfrr <= new_state.pending_pri)
+			if (mfrr <= new_state.pending_pri) {
 				reject = new_state.xisr;
-			new_state.pending_pri = mfrr;
-			new_state.xisr = XICS_IPI;
+				new_state.pending_pri = mfrr;
+				new_state.xisr = XICS_IPI;
+			}
 		}
 
-		if (mfrr > old_state.mfrr && mfrr > new_state.cppr) {
+		if (mfrr > old_state.mfrr) {
 			resend = new_state.need_resend;
 			new_state.need_resend = 0;
 		}
@@ -289,8 +307,10 @@ int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
 	}
 
 	/* Pass resends to virtual mode */
-	if (resend)
+	if (resend) {
 		this_icp->rm_action |= XICS_RM_CHECK_RESEND;
+		this_icp->rm_resend_icp = icp;
+	}
 
 	return check_too_hard(xics, this_icp);
 }
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 65c105b17a2..10554df1385 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -94,20 +94,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_PMAO_BUG)
 	lwz	r6, HSTATE_PMC + 12(r13)
 	lwz	r8, HSTATE_PMC + 16(r13)
 	lwz	r9, HSTATE_PMC + 20(r13)
-BEGIN_FTR_SECTION
-	lwz	r10, HSTATE_PMC + 24(r13)
-	lwz	r11, HSTATE_PMC + 28(r13)
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
 	mtspr	SPRN_PMC1, r3
 	mtspr	SPRN_PMC2, r4
 	mtspr	SPRN_PMC3, r5
 	mtspr	SPRN_PMC4, r6
 	mtspr	SPRN_PMC5, r8
 	mtspr	SPRN_PMC6, r9
-BEGIN_FTR_SECTION
-	mtspr	SPRN_PMC7, r10
-	mtspr	SPRN_PMC8, r11
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
 	ld	r3, HSTATE_MMCR(r13)
 	ld	r4, HSTATE_MMCR + 8(r13)
 	ld	r5, HSTATE_MMCR + 16(r13)
@@ -153,11 +145,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 
 	cmpwi	cr1, r12, BOOK3S_INTERRUPT_MACHINE_CHECK
 	cmpwi	r12, BOOK3S_INTERRUPT_EXTERNAL
-BEGIN_FTR_SECTION
 	beq	11f
 	cmpwi	cr2, r12, BOOK3S_INTERRUPT_HMI
 	beq	cr2, 14f			/* HMI check */
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
 
 	/* RFI into the highmem handler, or branch to interrupt handler */
 	mfmsr	r6
@@ -166,7 +156,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
 	mtmsrd	r6, 1			/* Clear RI in MSR */
 	mtsrr0	r8
 	mtsrr1	r7
-	beqa	0x500			/* external interrupt (PPC970) */
 	beq	cr1, 13f		/* machine check */
 	RFI
 
@@ -393,11 +382,8 @@ kvmppc_hv_entry:
 	slbia
 	ptesync
 
-BEGIN_FTR_SECTION
-	b	30f
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
 	/*
-	 * POWER7 host -> guest partition switch code.
+	 * POWER7/POWER8 host -> guest partition switch code.
 	 * We don't have to lock against concurrent tlbies,
 	 * but we do have to coordinate across hardware threads.
 	 */
@@ -505,97 +491,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 	cmpwi	r3,512		/* 1 microsecond */
 	li	r12,BOOK3S_INTERRUPT_HV_DECREMENTER
 	blt	hdec_soon
-	b	31f
-
-	/*
-	 * PPC970 host -> guest partition switch code.
-	 * We have to lock against concurrent tlbies,
-	 * using native_tlbie_lock to lock against host tlbies
-	 * and kvm->arch.tlbie_lock to lock against guest tlbies.
-	 * We also have to invalidate the TLB since its
-	 * entries aren't tagged with the LPID.
-	 */
-30:	ld	r5,HSTATE_KVM_VCORE(r13)
-	ld	r9,VCORE_KVM(r5)	/* pointer to struct kvm */
-
-	/* first take native_tlbie_lock */
-	.section ".toc","aw"
-toc_tlbie_lock:
-	.tc	native_tlbie_lock[TC],native_tlbie_lock
-	.previous
-	ld	r3,toc_tlbie_lock@toc(r2)
-#ifdef __BIG_ENDIAN__
-	lwz	r8,PACA_LOCK_TOKEN(r13)
-#else
-	lwz	r8,PACAPACAINDEX(r13)
-#endif
-24:	lwarx	r0,0,r3
-	cmpwi	r0,0
-	bne	24b
-	stwcx.	r8,0,r3
-	bne	24b
-	isync
-
-	ld	r5,HSTATE_KVM_VCORE(r13)
-	ld	r7,VCORE_LPCR(r5)	/* use vcore->lpcr to store HID4 */
-	li	r0,0x18f
-	rotldi	r0,r0,HID4_LPID5_SH	/* all lpid bits in HID4 = 1 */
-	or	r0,r7,r0
-	ptesync
-	sync
-	mtspr	SPRN_HID4,r0		/* switch to reserved LPID */
-	isync
-	li	r0,0
-	stw	r0,0(r3)		/* drop native_tlbie_lock */
-
-	/* invalidate the whole TLB */
-	li	r0,256
-	mtctr	r0
-	li	r6,0
-25:	tlbiel	r6
-	addi	r6,r6,0x1000
-	bdnz	25b
-	ptesync
 
-	/* Take the guest's tlbie_lock */
-	addi	r3,r9,KVM_TLBIE_LOCK
-24:	lwarx	r0,0,r3
-	cmpwi	r0,0
-	bne	24b
-	stwcx.	r8,0,r3
-	bne	24b
-	isync
-	ld	r6,KVM_SDR1(r9)
-	mtspr	SPRN_SDR1,r6		/* switch to partition page table */
-
-	/* Set up HID4 with the guest's LPID etc. */
-	sync
-	mtspr	SPRN_HID4,r7
-	isync
-
-	/* drop the guest's tlbie_lock */
-	li	r0,0
-	stw	r0,0(r3)
-
-	/* Check if HDEC expires soon */
-	mfspr	r3,SPRN_HDEC
-	cmpwi	r3,10
-	li	r12,BOOK3S_INTERRUPT_HV_DECREMENTER
-	blt	hdec_soon
-
-	/* Enable HDEC interrupts */
-	mfspr	r0,SPRN_HID0
-	li	r3,1
-	rldimi	r0,r3, HID0_HDICE_SH, 64-HID0_HDICE_SH-1
-	sync
-	mtspr	SPRN_HID0,r0
-	mfspr	r0,SPRN_HID0
-	mfspr	r0,SPRN_HID0
-	mfspr	r0,SPRN_HID0
-	mfspr	r0,SPRN_HID0
-	mfspr	r0,SPRN_HID0
-	mfspr	r0,SPRN_HID0
-31:
 	/* Do we have a guest vcpu to run? */
 	cmpdi	r4, 0
 	beq	kvmppc_primary_no_guest
@@ -625,7 +521,6 @@ kvmppc_got_guest:
 	stb	r6, VCPU_VPA_DIRTY(r4)
 25:
 
-BEGIN_FTR_SECTION
 	/* Save purr/spurr */
 	mfspr	r5,SPRN_PURR
 	mfspr	r6,SPRN_SPURR
@@ -635,7 +530,6 @@ BEGIN_FTR_SECTION
 	ld	r8,VCPU_SPURR(r4)
 	mtspr	SPRN_PURR,r7
 	mtspr	SPRN_SPURR,r8
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
 
 BEGIN_FTR_SECTION
 	/* Set partition DABR */
@@ -644,9 +538,7 @@ BEGIN_FTR_SECTION
 	ld	r6,VCPU_DABR(r4)
 	mtspr	SPRN_DABRX,r5
 	mtspr	SPRN_DABR,r6
- BEGIN_FTR_SECTION_NESTED(89)
 	isync
- END_FTR_SECTION_NESTED(CPU_FTR_ARCH_206, CPU_FTR_ARCH_206, 89)
 END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
 
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
@@ -777,20 +669,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_PMAO_BUG)
 	lwz	r7, VCPU_PMC + 12(r4)
 	lwz	r8, VCPU_PMC + 16(r4)
 	lwz	r9, VCPU_PMC + 20(r4)
-BEGIN_FTR_SECTION
-	lwz	r10, VCPU_PMC + 24(r4)
-	lwz	r11, VCPU_PMC + 28(r4)
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
 	mtspr	SPRN_PMC1, r3
 	mtspr	SPRN_PMC2, r5
 	mtspr	SPRN_PMC3, r6
 	mtspr	SPRN_PMC4, r7
 	mtspr	SPRN_PMC5, r8
 	mtspr	SPRN_PMC6, r9
-BEGIN_FTR_SECTION
-	mtspr	SPRN_PMC7, r10
-	mtspr	SPRN_PMC8, r11
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
 	ld	r3, VCPU_MMCR(r4)
 	ld	r5, VCPU_MMCR + 8(r4)
 	ld	r6, VCPU_MMCR + 16(r4)
@@ -837,14 +721,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 	ld	r30, VCPU_GPR(R30)(r4)
 	ld	r31, VCPU_GPR(R31)(r4)
 
-BEGIN_FTR_SECTION
 	/* Switch DSCR to guest value */
 	ld	r5, VCPU_DSCR(r4)
 	mtspr	SPRN_DSCR, r5
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
 
 BEGIN_FTR_SECTION
-	/* Skip next section on POWER7 or PPC970 */
+	/* Skip next section on POWER7 */
 	b	8f
 END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
 	/* Turn on TM so we can access TFHAR/TFIAR/TEXASR */
@@ -920,7 +802,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
 	mtspr	SPRN_DAR, r5
 	mtspr	SPRN_DSISR, r6
 
-BEGIN_FTR_SECTION
 	/* Restore AMR and UAMOR, set AMOR to all 1s */
 	ld	r5,VCPU_AMR(r4)
 	ld	r6,VCPU_UAMOR(r4)
@@ -928,7 +809,6 @@ BEGIN_FTR_SECTION
 	mtspr	SPRN_AMR,r5
 	mtspr	SPRN_UAMOR,r6
 	mtspr	SPRN_AMOR,r7
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
 
 	/* Restore state of CTRL run bit; assume 1 on entry */
 	lwz	r5,VCPU_CTRL(r4)
@@ -963,13 +843,11 @@ deliver_guest_interrupt:
 	rldicl	r0, r0, 64 - BOOK3S_IRQPRIO_EXTERNAL_LEVEL, 63
 	cmpdi	cr1, r0, 0
 	andi.	r8, r11, MSR_EE
-BEGIN_FTR_SECTION
 	mfspr	r8, SPRN_LPCR
 	/* Insert EXTERNAL_LEVEL bit into LPCR at the MER bit position */
 	rldimi	r8, r0, LPCR_MER_SH, 63 - LPCR_MER_SH
 	mtspr	SPRN_LPCR, r8
 	isync
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
 	beq	5f
 	li	r0, BOOK3S_INTERRUPT_EXTERNAL
 	bne	cr1, 12f
@@ -1124,15 +1002,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 
 	stw	r12,VCPU_TRAP(r9)
 
-	/* Save HEIR (HV emulation assist reg) in last_inst
+	/* Save HEIR (HV emulation assist reg) in emul_inst
 	   if this is an HEI (HV emulation interrupt, e40) */
 	li	r3,KVM_INST_FETCH_FAILED
-BEGIN_FTR_SECTION
 	cmpwi	r12,BOOK3S_INTERRUPT_H_EMUL_ASSIST
 	bne	11f
 	mfspr	r3,SPRN_HEIR
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
-11:	stw	r3,VCPU_LAST_INST(r9)
+11:	stw	r3,VCPU_HEIR(r9)
 
 	/* these are volatile across C function calls */
 	mfctr	r3
@@ -1140,13 +1016,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
 	std	r3, VCPU_CTR(r9)
 	stw	r4, VCPU_XER(r9)
 
-BEGIN_FTR_SECTION
 	/* If this is a page table miss then see if it's theirs or ours */
 	cmpwi	r12, BOOK3S_INTERRUPT_H_DATA_STORAGE
 	beq	kvmppc_hdsi
 	cmpwi	r12, BOOK3S_INTERRUPT_H_INST_STORAGE
 	beq	kvmppc_hisi
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
 
 	/* See if this is a leftover HDEC interrupt */
 	cmpwi	r12,BOOK3S_INTERRUPT_HV_DECREMENTER
@@ -1159,11 +1033,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
 	cmpwi	r12,BOOK3S_INTERRUPT_SYSCALL
 	beq	hcall_try_real_mode
 
-	/* Only handle external interrupts here on arch 206 and later */
-BEGIN_FTR_SECTION
-	b	ext_interrupt_to_host
-END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206)
-
 	/* External interrupt ? */
 	cmpwi	r12, BOOK3S_INTERRUPT_EXTERNAL
 	bne+	ext_interrupt_to_host
@@ -1193,11 +1062,9 @@ guest_exit_cont:		/* r9 = vcpu, r12 = trap, r13 = paca */
 	mfdsisr	r7
 	std	r6, VCPU_DAR(r9)
 	stw	r7, VCPU_DSISR(r9)
-BEGIN_FTR_SECTION
 	/* don't overwrite fault_dar/fault_dsisr if HDSI */
 	cmpwi	r12,BOOK3S_INTERRUPT_H_DATA_STORAGE
 	beq	6f
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
 	std	r6, VCPU_FAULT_DAR(r9)
 	stw	r7, VCPU_FAULT_DSISR(r9)
 
@@ -1236,7 +1103,6 @@ mc_cont:
 	/*
 	 * Save the guest PURR/SPURR
 	 */
-BEGIN_FTR_SECTION
 	mfspr	r5,SPRN_PURR
 	mfspr	r6,SPRN_SPURR
 	ld	r7,VCPU_PURR(r9)
@@ -1256,7 +1122,6 @@ BEGIN_FTR_SECTION
 	add	r4,r4,r6
 	mtspr	SPRN_PURR,r3
 	mtspr	SPRN_SPURR,r4
-END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_201)
 
 	/* Save DEC */
 	mfspr	r5,SPRN_DEC
@@ -1306,22 +1171,18 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
 8:
 
 	/* Save and reset AMR and UAMOR before turning on the MMU */
-BEGIN_FTR_SECTION
 	mfspr	r5,SPRN_AMR
 	mfspr	r6,SPRN_UAMOR
 	std	r5,VCPU_AMR(r9)
 	std	r6,VCPU_UAMOR(r9)
 	li	r6,0
 	mtspr	SPRN_AMR,r6
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
 
 	/* Switch DSCR back to host value */
-BEGIN_FTR_SECTION
 	mfspr	r8, SPRN_DSCR
 	ld	r7, HSTATE_DSCR(r13)
 	std	r8, VCPU_DSCR(r9)
 	mtspr	SPRN_DSCR, r7
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
 
 	/* Save non-volatile GPRs */
 	std	r14, VCPU_GPR(R14)(r9)
@@ -1503,11 +1364,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 	mfspr	r4, SPRN_MMCR0		/* save MMCR0 */
 	mtspr	SPRN_MMCR0, r3		/* freeze all counters, disable ints */
 	mfspr	r6, SPRN_MMCRA
-BEGIN_FTR_SECTION
-	/* On P7, clear MMCRA in order to disable SDAR updates */
+	/* Clear MMCRA in order to disable SDAR updates */
 	li	r7, 0
 	mtspr	SPRN_MMCRA, r7
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
 	isync
 	beq	21f			/* if no VPA, save PMU stuff anyway */
 	lbz	r7, LPPACA_PMCINUSE(r8)
@@ -1532,10 +1391,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 	mfspr	r6, SPRN_PMC4
 	mfspr	r7, SPRN_PMC5
 	mfspr	r8, SPRN_PMC6
-BEGIN_FTR_SECTION
-	mfspr	r10, SPRN_PMC7
-	mfspr	r11, SPRN_PMC8
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
 	stw	r3, VCPU_PMC(r9)
 	stw	r4, VCPU_PMC + 4(r9)
 	stw	r5, VCPU_PMC + 8(r9)
@@ -1543,10 +1398,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
 	stw	r7, VCPU_PMC + 16(r9)
 	stw	r8, VCPU_PMC + 20(r9)
 BEGIN_FTR_SECTION
-	stw	r10, VCPU_PMC + 24(r9)
-	stw	r11, VCPU_PMC + 28(r9)
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
-BEGIN_FTR_SECTION
 	mfspr	r5, SPRN_SIER
 	mfspr	r6, SPRN_SPMC1
 	mfspr	r7, SPRN_SPMC2
@@ -1566,11 +1417,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 	ptesync
 
 hdec_soon:			/* r12 = trap, r13 = paca */
-BEGIN_FTR_SECTION
-	b	32f
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
 	/*
-	 * POWER7 guest -> host partition switch code.
+	 * POWER7/POWER8 guest -> host partition switch code.
 	 * We don't have to lock against tlbies but we do
 	 * have to coordinate the hardware threads.
 	 */
@@ -1698,87 +1546,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 16:	ld	r8,KVM_HOST_LPCR(r4)
 	mtspr	SPRN_LPCR,r8
 	isync
-	b	33f
-
-	/*
-	 * PPC970 guest -> host partition switch code.
-	 * We have to lock against concurrent tlbies, and
-	 * we have to flush the whole TLB.
-	 */
-32:	ld	r5,HSTATE_KVM_VCORE(r13)
-	ld	r4,VCORE_KVM(r5)	/* pointer to struct kvm */
-
-	/* Take the guest's tlbie_lock */
-#ifdef __BIG_ENDIAN__
-	lwz	r8,PACA_LOCK_TOKEN(r13)
-#else
-	lwz	r8,PACAPACAINDEX(r13)
-#endif
-	addi	r3,r4,KVM_TLBIE_LOCK
-24:	lwarx	r0,0,r3
-	cmpwi	r0,0
-	bne	24b
-	stwcx.	r8,0,r3
-	bne	24b
-	isync
-
-	ld	r7,KVM_HOST_LPCR(r4)	/* use kvm->arch.host_lpcr for HID4 */
-	li	r0,0x18f
-	rotldi	r0,r0,HID4_LPID5_SH	/* all lpid bits in HID4 = 1 */
-	or	r0,r7,r0
-	ptesync
-	sync
-	mtspr	SPRN_HID4,r0		/* switch to reserved LPID */
-	isync
-	li	r0,0
-	stw	r0,0(r3)		/* drop guest tlbie_lock */
-
-	/* invalidate the whole TLB */
-	li	r0,256
-	mtctr	r0
-	li	r6,0
-25:	tlbiel	r6
-	addi	r6,r6,0x1000
-	bdnz	25b
-	ptesync
-
-	/* take native_tlbie_lock */
-	ld	r3,toc_tlbie_lock@toc(2)
-24:	lwarx	r0,0,r3
-	cmpwi	r0,0
-	bne	24b
-	stwcx.	r8,0,r3
-	bne	24b
-	isync
-
-	ld	r6,KVM_HOST_SDR1(r4)
-	mtspr	SPRN_SDR1,r6		/* switch to host page table */
-
-	/* Set up host HID4 value */
-	sync
-	mtspr	SPRN_HID4,r7
-	isync
-	li	r0,0
-	stw	r0,0(r3)		/* drop native_tlbie_lock */
-
-	lis	r8,0x7fff		/* MAX_INT@h */
-	mtspr	SPRN_HDEC,r8
-
-	/* Disable HDEC interrupts */
-	mfspr	r0,SPRN_HID0
-	li	r3,0
-	rldimi	r0,r3, HID0_HDICE_SH, 64-HID0_HDICE_SH-1
-	sync
-	mtspr	SPRN_HID0,r0
-	mfspr	r0,SPRN_HID0
-	mfspr	r0,SPRN_HID0
-	mfspr	r0,SPRN_HID0
-	mfspr	r0,SPRN_HID0
-	mfspr	r0,SPRN_HID0
-	mfspr	r0,SPRN_HID0
 
 	/* load host SLB entries */
-33:	ld	r8,PACA_SLBSHADOWPTR(r13)
+	ld	r8,PACA_SLBSHADOWPTR(r13)
 
 	.rept	SLB_NUM_BOLTED
 	li	r3, SLBSHADOW_SAVEAREA
@@ -2047,7 +1817,7 @@ hcall_real_table:
 	.long	0		/* 0xd8 */
 	.long	0		/* 0xdc */
 	.long	DOTSYM(kvmppc_h_cede) - hcall_real_table
-	.long	0		/* 0xe4 */
+	.long	DOTSYM(kvmppc_rm_h_confer) - hcall_real_table
 	.long	0		/* 0xe8 */
 	.long	0		/* 0xec */
 	.long	0		/* 0xf0 */
@@ -2126,9 +1896,6 @@ _GLOBAL(kvmppc_h_cede)
 	stw	r0,VCPU_TRAP(r3)
 	li	r0,H_SUCCESS
 	std	r0,VCPU_GPR(R3)(r3)
-BEGIN_FTR_SECTION
-	b	kvm_cede_exit	/* just send it up to host on 970 */
-END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206)
 
 	/*
 	 * Set our bit in the bitmask of napping threads unless all the
@@ -2455,7 +2222,6 @@ BEGIN_FTR_SECTION
 END_FTR_SECTION_IFSET(CPU_FTR_VSX)
 #endif
 	mtmsrd	r8
-	isync
 	addi	r3,r3,VCPU_FPRS
 	bl	store_fp_state
 #ifdef CONFIG_ALTIVEC
@@ -2491,7 +2257,6 @@ BEGIN_FTR_SECTION
 END_FTR_SECTION_IFSET(CPU_FTR_VSX)
 #endif
 	mtmsrd	r8
-	isync
 	addi	r3,r4,VCPU_FPRS
 	bl	load_fp_state
 #ifdef CONFIG_ALTIVEC
diff --git a/arch/powerpc/kvm/book3s_paired_singles.c b/arch/powerpc/kvm/book3s_paired_singles.c
index bfb8035314e..bd6ab1672ae 100644
--- a/arch/powerpc/kvm/book3s_paired_singles.c
+++ b/arch/powerpc/kvm/book3s_paired_singles.c
@@ -352,14 +352,6 @@ static inline u32 inst_get_field(u32 inst, int msb, int lsb)
 	return kvmppc_get_field(inst, msb + 32, lsb + 32);
 }
 
-/*
- * Replaces inst bits with ordering according to spec.
- */
-static inline u32 inst_set_field(u32 inst, int msb, int lsb, int value)
-{
-	return kvmppc_set_field(inst, msb + 32, lsb + 32, value);
-}
-
 bool kvmppc_inst_is_paired_single(struct kvm_vcpu *vcpu, u32 inst)
 {
 	if (!(vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE))
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index cf2eb16846d..f57383941d0 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -644,11 +644,6 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	return r;
 }
 
-static inline int get_fpr_index(int i)
-{
-	return i * TS_FPRWIDTH;
-}
-
 /* Give up external provider (FPU, Altivec, VSX) */
 void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr)
 {
diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c
index eaeb78047fb..807351f76f8 100644
--- a/arch/powerpc/kvm/book3s_xics.c
+++ b/arch/powerpc/kvm/book3s_xics.c
@@ -613,10 +613,25 @@ static noinline int kvmppc_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
 	 * there might be a previously-rejected interrupt needing
 	 * to be resent.
 	 *
+	 * ICP state: Check_IPI
+	 *
 	 * If the CPPR is less favored, then we might be replacing
-	 * an interrupt, and thus need to possibly reject it as in
+	 * an interrupt, and thus need to possibly reject it.
 	 *
-	 * ICP state: Check_IPI
+	 * ICP State: IPI
+	 *
+	 * Besides rejecting any pending interrupts, we also
+	 * update XISR and pending_pri to mark IPI as pending.
+	 *
+	 * PAPR does not describe this state, but if the MFRR is being
+	 * made less favored than its earlier value, there might be
+	 * a previously-rejected interrupt needing to be resent.
+	 * Ideally, we would want to resend only if
+	 *	prio(pending_interrupt) < mfrr &&
+	 *	prio(pending_interrupt) < cppr
+	 * where pending interrupt is the one that was rejected. But
+	 * we don't have that state, so we simply trigger a resend
+	 * whenever the MFRR is made less favored.
 	 */
 	do {
 		old_state = new_state = ACCESS_ONCE(icp->state);
@@ -629,13 +644,14 @@ static noinline int kvmppc_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
 		resend = false;
 		if (mfrr < new_state.cppr) {
 			/* Reject a pending interrupt if not an IPI */
-			if (mfrr <= new_state.pending_pri)
+			if (mfrr <= new_state.pending_pri) {
 				reject = new_state.xisr;
-			new_state.pending_pri = mfrr;
-			new_state.xisr = XICS_IPI;
+				new_state.pending_pri = mfrr;
+				new_state.xisr = XICS_IPI;
+			}
 		}
 
-		if (mfrr > old_state.mfrr && mfrr > new_state.cppr) {
+		if (mfrr > old_state.mfrr) {
 			resend = new_state.need_resend;
 			new_state.need_resend = 0;
 		}
@@ -789,7 +805,7 @@ static noinline int kvmppc_xics_rm_complete(struct kvm_vcpu *vcpu, u32 hcall)
 	if (icp->rm_action & XICS_RM_KICK_VCPU)
 		kvmppc_fast_vcpu_kick(icp->rm_kick_target);
 	if (icp->rm_action & XICS_RM_CHECK_RESEND)
-		icp_check_resend(xics, icp);
+		icp_check_resend(xics, icp->rm_resend_icp);
 	if (icp->rm_action & XICS_RM_REJECT)
 		icp_deliver_irq(xics, icp, icp->rm_reject);
 	if (icp->rm_action & XICS_RM_NOTIFY_EOI)
diff --git a/arch/powerpc/kvm/book3s_xics.h b/arch/powerpc/kvm/book3s_xics.h
index e8aaa7a3f20..73f0f2723c0 100644
--- a/arch/powerpc/kvm/book3s_xics.h
+++ b/arch/powerpc/kvm/book3s_xics.h
@@ -74,6 +74,7 @@ struct kvmppc_icp {
 #define XICS_RM_NOTIFY_EOI	0x8
 	u32 rm_action;
 	struct kvm_vcpu *rm_kick_target;
+	struct kvmppc_icp *rm_resend_icp;
 	u32  rm_reject;
 	u32  rm_eoied_irq;
 
diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c
index 16095841afe..b29ce752c7d 100644
--- a/arch/powerpc/kvm/e500.c
+++ b/arch/powerpc/kvm/e500.c
@@ -78,7 +78,7 @@ static inline int local_sid_setup_one(struct id *entry)
 
 	sid = __this_cpu_inc_return(pcpu_last_used_sid);
 	if (sid < NUM_TIDS) {
-		__this_cpu_write(pcpu_sids)entry[sid], entry);
+		__this_cpu_write(pcpu_sids.entry[sid], entry);
 		entry->val = sid;
 		entry->pentry = this_cpu_ptr(&pcpu_sids.entry[sid]);
 		ret = sid;
@@ -299,14 +299,6 @@ void kvmppc_mmu_msr_notify(struct kvm_vcpu *vcpu, u32 old_msr)
 	kvmppc_e500_recalc_shadow_pid(to_e500(vcpu));
 }
 
-void kvmppc_core_load_host_debugstate(struct kvm_vcpu *vcpu)
-{
-}
-
-void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu)
-{
-}
-
 static void kvmppc_core_vcpu_load_e500(struct kvm_vcpu *vcpu, int cpu)
 {
 	kvmppc_booke_vcpu_load(vcpu, cpu);
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index c1f8f53cd31..c45eaab752b 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -527,18 +527,12 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 			r = 0;
 		break;
 	case KVM_CAP_PPC_RMA:
-		r = hv_enabled;
-		/* PPC970 requires an RMA */
-		if (r && cpu_has_feature(CPU_FTR_ARCH_201))
-			r = 2;
+		r = 0;
 		break;
 #endif
 	case KVM_CAP_SYNC_MMU:
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
-		if (hv_enabled)
-			r = cpu_has_feature(CPU_FTR_ARCH_206) ? 1 : 0;
-		else
-			r = 0;
+		r = hv_enabled;
 #elif defined(KVM_ARCH_WANT_MMU_NOTIFIER)
 		r = 1;
 #else
diff --git a/arch/powerpc/kvm/trace_book3s.h b/arch/powerpc/kvm/trace_book3s.h
new file mode 100644
index 00000000000..f647ce0f428
--- /dev/null
+++ b/arch/powerpc/kvm/trace_book3s.h
@@ -0,0 +1,32 @@
+#if !defined(_TRACE_KVM_BOOK3S_H)
+#define _TRACE_KVM_BOOK3S_H
+
+/*
+ * Common defines used by the trace macros in trace_pr.h and trace_hv.h
+ */
+
+#define kvm_trace_symbol_exit \
+	{0x100, "SYSTEM_RESET"}, \
+	{0x200, "MACHINE_CHECK"}, \
+	{0x300, "DATA_STORAGE"}, \
+	{0x380, "DATA_SEGMENT"}, \
+	{0x400, "INST_STORAGE"}, \
+	{0x480, "INST_SEGMENT"}, \
+	{0x500, "EXTERNAL"}, \
+	{0x501, "EXTERNAL_LEVEL"}, \
+	{0x502, "EXTERNAL_HV"}, \
+	{0x600, "ALIGNMENT"}, \
+	{0x700, "PROGRAM"}, \
+	{0x800, "FP_UNAVAIL"}, \
+	{0x900, "DECREMENTER"}, \
+	{0x980, "HV_DECREMENTER"}, \
+	{0xc00, "SYSCALL"}, \
+	{0xd00, "TRACE"}, \
+	{0xe00, "H_DATA_STORAGE"}, \
+	{0xe20, "H_INST_STORAGE"}, \
+	{0xe40, "H_EMUL_ASSIST"}, \
+	{0xf00, "PERFMON"}, \
+	{0xf20, "ALTIVEC"}, \
+	{0xf40, "VSX"}
+
+#endif
diff --git a/arch/powerpc/kvm/trace_booke.h b/arch/powerpc/kvm/trace_booke.h
index f7537cf26ce..7ec534d1db9 100644
--- a/arch/powerpc/kvm/trace_booke.h
+++ b/arch/powerpc/kvm/trace_booke.h
@@ -151,6 +151,47 @@ TRACE_EVENT(kvm_booke206_ref_release,
 		__entry->pfn, __entry->flags)
 );
 
+#ifdef CONFIG_SPE_POSSIBLE
+#define kvm_trace_symbol_irqprio_spe \
+	{BOOKE_IRQPRIO_SPE_UNAVAIL, "SPE_UNAVAIL"}, \
+	{BOOKE_IRQPRIO_SPE_FP_DATA, "SPE_FP_DATA"}, \
+	{BOOKE_IRQPRIO_SPE_FP_ROUND, "SPE_FP_ROUND"},
+#else
+#define kvm_trace_symbol_irqprio_spe
+#endif
+
+#ifdef CONFIG_PPC_E500MC
+#define kvm_trace_symbol_irqprio_e500mc \
+	{BOOKE_IRQPRIO_ALTIVEC_UNAVAIL, "ALTIVEC_UNAVAIL"}, \
+	{BOOKE_IRQPRIO_ALTIVEC_ASSIST, "ALTIVEC_ASSIST"},
+#else
+#define kvm_trace_symbol_irqprio_e500mc
+#endif
+
+#define kvm_trace_symbol_irqprio \
+	kvm_trace_symbol_irqprio_spe \
+	kvm_trace_symbol_irqprio_e500mc \
+	{BOOKE_IRQPRIO_DATA_STORAGE, "DATA_STORAGE"}, \
+	{BOOKE_IRQPRIO_INST_STORAGE, "INST_STORAGE"}, \
+	{BOOKE_IRQPRIO_ALIGNMENT, "ALIGNMENT"}, \
+	{BOOKE_IRQPRIO_PROGRAM, "PROGRAM"}, \
+	{BOOKE_IRQPRIO_FP_UNAVAIL, "FP_UNAVAIL"}, \
+	{BOOKE_IRQPRIO_SYSCALL, "SYSCALL"}, \
+	{BOOKE_IRQPRIO_AP_UNAVAIL, "AP_UNAVAIL"}, \
+	{BOOKE_IRQPRIO_DTLB_MISS, "DTLB_MISS"}, \
+	{BOOKE_IRQPRIO_ITLB_MISS, "ITLB_MISS"}, \
+	{BOOKE_IRQPRIO_MACHINE_CHECK, "MACHINE_CHECK"}, \
+	{BOOKE_IRQPRIO_DEBUG, "DEBUG"}, \
+	{BOOKE_IRQPRIO_CRITICAL, "CRITICAL"}, \
+	{BOOKE_IRQPRIO_WATCHDOG, "WATCHDOG"}, \
+	{BOOKE_IRQPRIO_EXTERNAL, "EXTERNAL"}, \
+	{BOOKE_IRQPRIO_FIT, "FIT"}, \
+	{BOOKE_IRQPRIO_DECREMENTER, "DECREMENTER"}, \
+	{BOOKE_IRQPRIO_PERFORMANCE_MONITOR, "PERFORMANCE_MONITOR"}, \
+	{BOOKE_IRQPRIO_EXTERNAL_LEVEL, "EXTERNAL_LEVEL"}, \
+	{BOOKE_IRQPRIO_DBELL, "DBELL"}, \
+	{BOOKE_IRQPRIO_DBELL_CRIT, "DBELL_CRIT"} \
+
 TRACE_EVENT(kvm_booke_queue_irqprio,
 	TP_PROTO(struct kvm_vcpu *vcpu, unsigned int priority),
 	TP_ARGS(vcpu, priority),
@@ -167,8 +208,10 @@ TRACE_EVENT(kvm_booke_queue_irqprio,
 		__entry->pending	= vcpu->arch.pending_exceptions;
 	),
 
-	TP_printk("vcpu=%x prio=%x pending=%lx",
-		__entry->cpu_nr, __entry->priority, __entry->pending)
+	TP_printk("vcpu=%x prio=%s pending=%lx",
+		__entry->cpu_nr,
+		__print_symbolic(__entry->priority, kvm_trace_symbol_irqprio),
+		__entry->pending)
 );
 
 #endif
diff --git a/arch/powerpc/kvm/trace_hv.h b/arch/powerpc/kvm/trace_hv.h
new file mode 100644
index 00000000000..33d9daff578
--- /dev/null
+++ b/arch/powerpc/kvm/trace_hv.h
@@ -0,0 +1,477 @@
+#if !defined(_TRACE_KVM_HV_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_KVM_HV_H
+
+#include <linux/tracepoint.h>
+#include "trace_book3s.h"
+#include <asm/hvcall.h>
+#include <asm/kvm_asm.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM kvm_hv
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE trace_hv
+
+#define kvm_trace_symbol_hcall \
+	{H_REMOVE,			"H_REMOVE"}, \
+	{H_ENTER,			"H_ENTER"}, \
+	{H_READ,			"H_READ"}, \
+	{H_CLEAR_MOD,			"H_CLEAR_MOD"}, \
+	{H_CLEAR_REF,			"H_CLEAR_REF"}, \
+	{H_PROTECT,			"H_PROTECT"}, \
+	{H_GET_TCE,			"H_GET_TCE"}, \
+	{H_PUT_TCE,			"H_PUT_TCE"}, \
+	{H_SET_SPRG0,			"H_SET_SPRG0"}, \
+	{H_SET_DABR,			"H_SET_DABR"}, \
+	{H_PAGE_INIT,			"H_PAGE_INIT"}, \
+	{H_SET_ASR,			"H_SET_ASR"}, \
+	{H_ASR_ON,			"H_ASR_ON"}, \
+	{H_ASR_OFF,			"H_ASR_OFF"}, \
+	{H_LOGICAL_CI_LOAD,		"H_LOGICAL_CI_LOAD"}, \
+	{H_LOGICAL_CI_STORE,		"H_LOGICAL_CI_STORE"}, \
+	{H_LOGICAL_CACHE_LOAD,		"H_LOGICAL_CACHE_LOAD"}, \
+	{H_LOGICAL_CACHE_STORE,		"H_LOGICAL_CACHE_STORE"}, \
+	{H_LOGICAL_ICBI,		"H_LOGICAL_ICBI"}, \
+	{H_LOGICAL_DCBF,		"H_LOGICAL_DCBF"}, \
+	{H_GET_TERM_CHAR,		"H_GET_TERM_CHAR"}, \
+	{H_PUT_TERM_CHAR,		"H_PUT_TERM_CHAR"}, \
+	{H_REAL_TO_LOGICAL,		"H_REAL_TO_LOGICAL"}, \
+	{H_HYPERVISOR_DATA,		"H_HYPERVISOR_DATA"}, \
+	{H_EOI,				"H_EOI"}, \
+	{H_CPPR,			"H_CPPR"}, \
+	{H_IPI,				"H_IPI"}, \
+	{H_IPOLL,			"H_IPOLL"}, \
+	{H_XIRR,			"H_XIRR"}, \
+	{H_PERFMON,			"H_PERFMON"}, \
+	{H_MIGRATE_DMA,			"H_MIGRATE_DMA"}, \
+	{H_REGISTER_VPA,		"H_REGISTER_VPA"}, \
+	{H_CEDE,			"H_CEDE"}, \
+	{H_CONFER,			"H_CONFER"}, \
+	{H_PROD,			"H_PROD"}, \
+	{H_GET_PPP,			"H_GET_PPP"}, \
+	{H_SET_PPP,			"H_SET_PPP"}, \
+	{H_PURR,			"H_PURR"}, \
+	{H_PIC,				"H_PIC"}, \
+	{H_REG_CRQ,			"H_REG_CRQ"}, \
+	{H_FREE_CRQ,			"H_FREE_CRQ"}, \
+	{H_VIO_SIGNAL,			"H_VIO_SIGNAL"}, \
+	{H_SEND_CRQ,			"H_SEND_CRQ"}, \
+	{H_COPY_RDMA,			"H_COPY_RDMA"}, \
+	{H_REGISTER_LOGICAL_LAN,	"H_REGISTER_LOGICAL_LAN"}, \
+	{H_FREE_LOGICAL_LAN,		"H_FREE_LOGICAL_LAN"}, \
+	{H_ADD_LOGICAL_LAN_BUFFER,	"H_ADD_LOGICAL_LAN_BUFFER"}, \
+	{H_SEND_LOGICAL_LAN,		"H_SEND_LOGICAL_LAN"}, \
+	{H_BULK_REMOVE,			"H_BULK_REMOVE"}, \
+	{H_MULTICAST_CTRL,		"H_MULTICAST_CTRL"}, \
+	{H_SET_XDABR,			"H_SET_XDABR"}, \
+	{H_STUFF_TCE,			"H_STUFF_TCE"}, \
+	{H_PUT_TCE_INDIRECT,		"H_PUT_TCE_INDIRECT"}, \
+	{H_CHANGE_LOGICAL_LAN_MAC,	"H_CHANGE_LOGICAL_LAN_MAC"}, \
+	{H_VTERM_PARTNER_INFO,		"H_VTERM_PARTNER_INFO"}, \
+	{H_REGISTER_VTERM,		"H_REGISTER_VTERM"}, \
+	{H_FREE_VTERM,			"H_FREE_VTERM"}, \
+	{H_RESET_EVENTS,		"H_RESET_EVENTS"}, \
+	{H_ALLOC_RESOURCE,		"H_ALLOC_RESOURCE"}, \
+	{H_FREE_RESOURCE,		"H_FREE_RESOURCE"}, \
+	{H_MODIFY_QP,			"H_MODIFY_QP"}, \
+	{H_QUERY_QP,			"H_QUERY_QP"}, \
+	{H_REREGISTER_PMR,		"H_REREGISTER_PMR"}, \
+	{H_REGISTER_SMR,		"H_REGISTER_SMR"}, \
+	{H_QUERY_MR,			"H_QUERY_MR"}, \
+	{H_QUERY_MW,			"H_QUERY_MW"}, \
+	{H_QUERY_HCA,			"H_QUERY_HCA"}, \
+	{H_QUERY_PORT,			"H_QUERY_PORT"}, \
+	{H_MODIFY_PORT,			"H_MODIFY_PORT"}, \
+	{H_DEFINE_AQP1,			"H_DEFINE_AQP1"}, \
+	{H_GET_TRACE_BUFFER,		"H_GET_TRACE_BUFFER"}, \
+	{H_DEFINE_AQP0,			"H_DEFINE_AQP0"}, \
+	{H_RESIZE_MR,			"H_RESIZE_MR"}, \
+	{H_ATTACH_MCQP,			"H_ATTACH_MCQP"}, \
+	{H_DETACH_MCQP,			"H_DETACH_MCQP"}, \
+	{H_CREATE_RPT,			"H_CREATE_RPT"}, \
+	{H_REMOVE_RPT,			"H_REMOVE_RPT"}, \
+	{H_REGISTER_RPAGES,		"H_REGISTER_RPAGES"}, \
+	{H_DISABLE_AND_GETC,		"H_DISABLE_AND_GETC"}, \
+	{H_ERROR_DATA,			"H_ERROR_DATA"}, \
+	{H_GET_HCA_INFO,		"H_GET_HCA_INFO"}, \
+	{H_GET_PERF_COUNT,		"H_GET_PERF_COUNT"}, \
+	{H_MANAGE_TRACE,		"H_MANAGE_TRACE"}, \
+	{H_FREE_LOGICAL_LAN_BUFFER,	"H_FREE_LOGICAL_LAN_BUFFER"}, \
+	{H_QUERY_INT_STATE,		"H_QUERY_INT_STATE"}, \
+	{H_POLL_PENDING,		"H_POLL_PENDING"}, \
+	{H_ILLAN_ATTRIBUTES,		"H_ILLAN_ATTRIBUTES"}, \
+	{H_MODIFY_HEA_QP,		"H_MODIFY_HEA_QP"}, \
+	{H_QUERY_HEA_QP,		"H_QUERY_HEA_QP"}, \
+	{H_QUERY_HEA,			"H_QUERY_HEA"}, \
+	{H_QUERY_HEA_PORT,		"H_QUERY_HEA_PORT"}, \
+	{H_MODIFY_HEA_PORT,		"H_MODIFY_HEA_PORT"}, \
+	{H_REG_BCMC,			"H_REG_BCMC"}, \
+	{H_DEREG_BCMC,			"H_DEREG_BCMC"}, \
+	{H_REGISTER_HEA_RPAGES,		"H_REGISTER_HEA_RPAGES"}, \
+	{H_DISABLE_AND_GET_HEA,		"H_DISABLE_AND_GET_HEA"}, \
+	{H_GET_HEA_INFO,		"H_GET_HEA_INFO"}, \
+	{H_ALLOC_HEA_RESOURCE,		"H_ALLOC_HEA_RESOURCE"}, \
+	{H_ADD_CONN,			"H_ADD_CONN"}, \
+	{H_DEL_CONN,			"H_DEL_CONN"}, \
+	{H_JOIN,			"H_JOIN"}, \
+	{H_VASI_STATE,			"H_VASI_STATE"}, \
+	{H_ENABLE_CRQ,			"H_ENABLE_CRQ"}, \
+	{H_GET_EM_PARMS,		"H_GET_EM_PARMS"}, \
+	{H_SET_MPP,			"H_SET_MPP"}, \
+	{H_GET_MPP,			"H_GET_MPP"}, \
+	{H_HOME_NODE_ASSOCIATIVITY,	"H_HOME_NODE_ASSOCIATIVITY"}, \
+	{H_BEST_ENERGY,			"H_BEST_ENERGY"}, \
+	{H_XIRR_X,			"H_XIRR_X"}, \
+	{H_RANDOM,			"H_RANDOM"}, \
+	{H_COP,				"H_COP"}, \
+	{H_GET_MPP_X,			"H_GET_MPP_X"}, \
+	{H_SET_MODE,			"H_SET_MODE"}, \
+	{H_RTAS,			"H_RTAS"}
+
+#define kvm_trace_symbol_kvmret \
+	{RESUME_GUEST,			"RESUME_GUEST"}, \
+	{RESUME_GUEST_NV,		"RESUME_GUEST_NV"}, \
+	{RESUME_HOST,			"RESUME_HOST"}, \
+	{RESUME_HOST_NV,		"RESUME_HOST_NV"}
+
+#define kvm_trace_symbol_hcall_rc \
+	{H_SUCCESS,			"H_SUCCESS"}, \
+	{H_BUSY,			"H_BUSY"}, \
+	{H_CLOSED,			"H_CLOSED"}, \
+	{H_NOT_AVAILABLE,		"H_NOT_AVAILABLE"}, \
+	{H_CONSTRAINED,			"H_CONSTRAINED"}, \
+	{H_PARTIAL,			"H_PARTIAL"}, \
+	{H_IN_PROGRESS,			"H_IN_PROGRESS"}, \
+	{H_PAGE_REGISTERED,		"H_PAGE_REGISTERED"}, \
+	{H_PARTIAL_STORE,		"H_PARTIAL_STORE"}, \
+	{H_PENDING,			"H_PENDING"}, \
+	{H_CONTINUE,			"H_CONTINUE"}, \
+	{H_LONG_BUSY_START_RANGE,	"H_LONG_BUSY_START_RANGE"}, \
+	{H_LONG_BUSY_ORDER_1_MSEC,	"H_LONG_BUSY_ORDER_1_MSEC"}, \
+	{H_LONG_BUSY_ORDER_10_MSEC,	"H_LONG_BUSY_ORDER_10_MSEC"}, \
+	{H_LONG_BUSY_ORDER_100_MSEC,	"H_LONG_BUSY_ORDER_100_MSEC"}, \
+	{H_LONG_BUSY_ORDER_1_SEC,	"H_LONG_BUSY_ORDER_1_SEC"}, \
+	{H_LONG_BUSY_ORDER_10_SEC,	"H_LONG_BUSY_ORDER_10_SEC"}, \
+	{H_LONG_BUSY_ORDER_100_SEC,	"H_LONG_BUSY_ORDER_100_SEC"}, \
+	{H_LONG_BUSY_END_RANGE,		"H_LONG_BUSY_END_RANGE"}, \
+	{H_TOO_HARD,			"H_TOO_HARD"}, \
+	{H_HARDWARE,			"H_HARDWARE"}, \
+	{H_FUNCTION,			"H_FUNCTION"}, \
+	{H_PRIVILEGE,			"H_PRIVILEGE"}, \
+	{H_PARAMETER,			"H_PARAMETER"}, \
+	{H_BAD_MODE,			"H_BAD_MODE"}, \
+	{H_PTEG_FULL,			"H_PTEG_FULL"}, \
+	{H_NOT_FOUND,			"H_NOT_FOUND"}, \
+	{H_RESERVED_DABR,		"H_RESERVED_DABR"}, \
+	{H_NO_MEM,			"H_NO_MEM"}, \
+	{H_AUTHORITY,			"H_AUTHORITY"}, \
+	{H_PERMISSION,			"H_PERMISSION"}, \
+	{H_DROPPED,			"H_DROPPED"}, \
+	{H_SOURCE_PARM,			"H_SOURCE_PARM"}, \
+	{H_DEST_PARM,			"H_DEST_PARM"}, \
+	{H_REMOTE_PARM,			"H_REMOTE_PARM"}, \
+	{H_RESOURCE,			"H_RESOURCE"}, \
+	{H_ADAPTER_PARM,		"H_ADAPTER_PARM"}, \
+	{H_RH_PARM,			"H_RH_PARM"}, \
+	{H_RCQ_PARM,			"H_RCQ_PARM"}, \
+	{H_SCQ_PARM,			"H_SCQ_PARM"}, \
+	{H_EQ_PARM,			"H_EQ_PARM"}, \
+	{H_RT_PARM,			"H_RT_PARM"}, \
+	{H_ST_PARM,			"H_ST_PARM"}, \
+	{H_SIGT_PARM,			"H_SIGT_PARM"}, \
+	{H_TOKEN_PARM,			"H_TOKEN_PARM"}, \
+	{H_MLENGTH_PARM,		"H_MLENGTH_PARM"}, \
+	{H_MEM_PARM,			"H_MEM_PARM"}, \
+	{H_MEM_ACCESS_PARM,		"H_MEM_ACCESS_PARM"}, \
+	{H_ATTR_PARM,			"H_ATTR_PARM"}, \
+	{H_PORT_PARM,			"H_PORT_PARM"}, \
+	{H_MCG_PARM,			"H_MCG_PARM"}, \
+	{H_VL_PARM,			"H_VL_PARM"}, \
+	{H_TSIZE_PARM,			"H_TSIZE_PARM"}, \
+	{H_TRACE_PARM,			"H_TRACE_PARM"}, \
+	{H_MASK_PARM,			"H_MASK_PARM"}, \
+	{H_MCG_FULL,			"H_MCG_FULL"}, \
+	{H_ALIAS_EXIST,			"H_ALIAS_EXIST"}, \
+	{H_P_COUNTER,			"H_P_COUNTER"}, \
+	{H_TABLE_FULL,			"H_TABLE_FULL"}, \
+	{H_ALT_TABLE,			"H_ALT_TABLE"}, \
+	{H_MR_CONDITION,		"H_MR_CONDITION"}, \
+	{H_NOT_ENOUGH_RESOURCES,	"H_NOT_ENOUGH_RESOURCES"}, \
+	{H_R_STATE,			"H_R_STATE"}, \
+	{H_RESCINDED,			"H_RESCINDED"}, \
+	{H_P2,				"H_P2"}, \
+	{H_P3,				"H_P3"}, \
+	{H_P4,				"H_P4"}, \
+	{H_P5,				"H_P5"}, \
+	{H_P6,				"H_P6"}, \
+	{H_P7,				"H_P7"}, \
+	{H_P8,				"H_P8"}, \
+	{H_P9,				"H_P9"}, \
+	{H_TOO_BIG,			"H_TOO_BIG"}, \
+	{H_OVERLAP,			"H_OVERLAP"}, \
+	{H_INTERRUPT,			"H_INTERRUPT"}, \
+	{H_BAD_DATA,			"H_BAD_DATA"}, \
+	{H_NOT_ACTIVE,			"H_NOT_ACTIVE"}, \
+	{H_SG_LIST,			"H_SG_LIST"}, \
+	{H_OP_MODE,			"H_OP_MODE"}, \
+	{H_COP_HW,			"H_COP_HW"}, \
+	{H_UNSUPPORTED_FLAG_START,	"H_UNSUPPORTED_FLAG_START"}, \
+	{H_UNSUPPORTED_FLAG_END,	"H_UNSUPPORTED_FLAG_END"}, \
+	{H_MULTI_THREADS_ACTIVE,	"H_MULTI_THREADS_ACTIVE"}, \
+	{H_OUTSTANDING_COP_OPS,		"H_OUTSTANDING_COP_OPS"}
+
+TRACE_EVENT(kvm_guest_enter,
+	TP_PROTO(struct kvm_vcpu *vcpu),
+	TP_ARGS(vcpu),
+
+	TP_STRUCT__entry(
+		__field(int,		vcpu_id)
+		__field(unsigned long,	pc)
+		__field(unsigned long,  pending_exceptions)
+		__field(u8,		ceded)
+	),
+
+	TP_fast_assign(
+		__entry->vcpu_id	= vcpu->vcpu_id;
+		__entry->pc		= kvmppc_get_pc(vcpu);
+		__entry->ceded		= vcpu->arch.ceded;
+		__entry->pending_exceptions  = vcpu->arch.pending_exceptions;
+	),
+
+	TP_printk("VCPU %d: pc=0x%lx pexcp=0x%lx ceded=%d",
+			__entry->vcpu_id,
+			__entry->pc,
+			__entry->pending_exceptions, __entry->ceded)
+);
+
+TRACE_EVENT(kvm_guest_exit,
+	TP_PROTO(struct kvm_vcpu *vcpu),
+	TP_ARGS(vcpu),
+
+	TP_STRUCT__entry(
+		__field(int,		vcpu_id)
+		__field(int,		trap)
+		__field(unsigned long,	pc)
+		__field(unsigned long,	msr)
+		__field(u8,		ceded)
+	),
+
+	TP_fast_assign(
+		__entry->vcpu_id = vcpu->vcpu_id;
+		__entry->trap	 = vcpu->arch.trap;
+		__entry->ceded	 = vcpu->arch.ceded;
+		__entry->pc	 = kvmppc_get_pc(vcpu);
+		__entry->msr	 = vcpu->arch.shregs.msr;
+	),
+
+	TP_printk("VCPU %d: trap=%s pc=0x%lx msr=0x%lx, ceded=%d",
+		__entry->vcpu_id,
+		__print_symbolic(__entry->trap, kvm_trace_symbol_exit),
+		__entry->pc, __entry->msr, __entry->ceded
+	)
+);
+
+TRACE_EVENT(kvm_page_fault_enter,
+	TP_PROTO(struct kvm_vcpu *vcpu, unsigned long *hptep,
+		 struct kvm_memory_slot *memslot, unsigned long ea,
+		 unsigned long dsisr),
+
+	TP_ARGS(vcpu, hptep, memslot, ea, dsisr),
+
+	TP_STRUCT__entry(
+		__field(int,		vcpu_id)
+		__field(unsigned long,	hpte_v)
+		__field(unsigned long,	hpte_r)
+		__field(unsigned long,	gpte_r)
+		__field(unsigned long,	ea)
+		__field(u64,		base_gfn)
+		__field(u32,		slot_flags)
+		__field(u32,		dsisr)
+	),
+
+	TP_fast_assign(
+		__entry->vcpu_id  = vcpu->vcpu_id;
+		__entry->hpte_v	  = hptep[0];
+		__entry->hpte_r	  = hptep[1];
+		__entry->gpte_r	  = hptep[2];
+		__entry->ea	  = ea;
+		__entry->dsisr	  = dsisr;
+		__entry->base_gfn = memslot ? memslot->base_gfn : -1UL;
+		__entry->slot_flags = memslot ? memslot->flags : 0;
+	),
+
+	TP_printk("VCPU %d: hpte=0x%lx:0x%lx guest=0x%lx ea=0x%lx,%x slot=0x%llx,0x%x",
+		   __entry->vcpu_id,
+		   __entry->hpte_v, __entry->hpte_r, __entry->gpte_r,
+		   __entry->ea, __entry->dsisr,
+		   __entry->base_gfn, __entry->slot_flags)
+);
+
+TRACE_EVENT(kvm_page_fault_exit,
+	TP_PROTO(struct kvm_vcpu *vcpu, unsigned long *hptep, long ret),
+
+	TP_ARGS(vcpu, hptep, ret),
+
+	TP_STRUCT__entry(
+		__field(int,		vcpu_id)
+		__field(unsigned long,	hpte_v)
+		__field(unsigned long,	hpte_r)
+		__field(long,		ret)
+	),
+
+	TP_fast_assign(
+		__entry->vcpu_id  = vcpu->vcpu_id;
+		__entry->hpte_v	= hptep[0];
+		__entry->hpte_r	= hptep[1];
+		__entry->ret = ret;
+	),
+
+	TP_printk("VCPU %d: hpte=0x%lx:0x%lx ret=0x%lx",
+		   __entry->vcpu_id,
+		   __entry->hpte_v, __entry->hpte_r, __entry->ret)
+);
+
+TRACE_EVENT(kvm_hcall_enter,
+	TP_PROTO(struct kvm_vcpu *vcpu),
+
+	TP_ARGS(vcpu),
+
+	TP_STRUCT__entry(
+		__field(int,		vcpu_id)
+		__field(unsigned long,	req)
+		__field(unsigned long,	gpr4)
+		__field(unsigned long,	gpr5)
+		__field(unsigned long,	gpr6)
+		__field(unsigned long,	gpr7)
+	),
+
+	TP_fast_assign(
+		__entry->vcpu_id  = vcpu->vcpu_id;
+		__entry->req   = kvmppc_get_gpr(vcpu, 3);
+		__entry->gpr4  = kvmppc_get_gpr(vcpu, 4);
+		__entry->gpr5  = kvmppc_get_gpr(vcpu, 5);
+		__entry->gpr6  = kvmppc_get_gpr(vcpu, 6);
+		__entry->gpr7  = kvmppc_get_gpr(vcpu, 7);
+	),
+
+	TP_printk("VCPU %d: hcall=%s GPR4-7=0x%lx,0x%lx,0x%lx,0x%lx",
+		   __entry->vcpu_id,
+		   __print_symbolic(__entry->req, kvm_trace_symbol_hcall),
+		   __entry->gpr4, __entry->gpr5, __entry->gpr6, __entry->gpr7)
+);
+
+TRACE_EVENT(kvm_hcall_exit,
+	TP_PROTO(struct kvm_vcpu *vcpu, int ret),
+
+	TP_ARGS(vcpu, ret),
+
+	TP_STRUCT__entry(
+		__field(int,		vcpu_id)
+		__field(unsigned long,	ret)
+		__field(unsigned long,	hcall_rc)
+	),
+
+	TP_fast_assign(
+		__entry->vcpu_id  = vcpu->vcpu_id;
+		__entry->ret	  = ret;
+		__entry->hcall_rc = kvmppc_get_gpr(vcpu, 3);
+	),
+
+	TP_printk("VCPU %d: ret=%s hcall_rc=%s",
+		   __entry->vcpu_id,
+		   __print_symbolic(__entry->ret, kvm_trace_symbol_kvmret),
+		   __print_symbolic(__entry->ret & RESUME_FLAG_HOST ?
+					H_TOO_HARD : __entry->hcall_rc,
+					kvm_trace_symbol_hcall_rc))
+);
+
+TRACE_EVENT(kvmppc_run_core,
+	TP_PROTO(struct kvmppc_vcore *vc, int where),
+
+	TP_ARGS(vc, where),
+
+	TP_STRUCT__entry(
+		__field(int,	n_runnable)
+		__field(int,	runner_vcpu)
+		__field(int,	where)
+		__field(pid_t,	tgid)
+	),
+
+	TP_fast_assign(
+		__entry->runner_vcpu	= vc->runner->vcpu_id;
+		__entry->n_runnable	= vc->n_runnable;
+		__entry->where		= where;
+		__entry->tgid		= current->tgid;
+	),
+
+	TP_printk("%s runner_vcpu==%d runnable=%d tgid=%d",
+		    __entry->where ? "Exit" : "Enter",
+		    __entry->runner_vcpu, __entry->n_runnable, __entry->tgid)
+);
+
+TRACE_EVENT(kvmppc_vcore_blocked,
+	TP_PROTO(struct kvmppc_vcore *vc, int where),
+
+	TP_ARGS(vc, where),
+
+	TP_STRUCT__entry(
+		__field(int,	n_runnable)
+		__field(int,	runner_vcpu)
+		__field(int,	where)
+		__field(pid_t,	tgid)
+	),
+
+	TP_fast_assign(
+		__entry->runner_vcpu = vc->runner->vcpu_id;
+		__entry->n_runnable  = vc->n_runnable;
+		__entry->where       = where;
+		__entry->tgid	     = current->tgid;
+	),
+
+	TP_printk("%s runner_vcpu=%d runnable=%d tgid=%d",
+		   __entry->where ? "Exit" : "Enter",
+		   __entry->runner_vcpu, __entry->n_runnable, __entry->tgid)
+);
+
+TRACE_EVENT(kvmppc_run_vcpu_enter,
+	TP_PROTO(struct kvm_vcpu *vcpu),
+
+	TP_ARGS(vcpu),
+
+	TP_STRUCT__entry(
+		__field(int,		vcpu_id)
+		__field(pid_t,		tgid)
+	),
+
+	TP_fast_assign(
+		__entry->vcpu_id  = vcpu->vcpu_id;
+		__entry->tgid	  = current->tgid;
+	),
+
+	TP_printk("VCPU %d: tgid=%d", __entry->vcpu_id, __entry->tgid)
+);
+
+TRACE_EVENT(kvmppc_run_vcpu_exit,
+	TP_PROTO(struct kvm_vcpu *vcpu, struct kvm_run *run),
+
+	TP_ARGS(vcpu, run),
+
+	TP_STRUCT__entry(
+		__field(int,		vcpu_id)
+		__field(int,		exit)
+		__field(int,		ret)
+	),
+
+	TP_fast_assign(
+		__entry->vcpu_id  = vcpu->vcpu_id;
+		__entry->exit     = run->exit_reason;
+		__entry->ret      = vcpu->arch.ret;
+	),
+
+	TP_printk("VCPU %d: exit=%d, ret=%d",
+			__entry->vcpu_id, __entry->exit, __entry->ret)
+);
+
+#endif /* _TRACE_KVM_HV_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/arch/powerpc/kvm/trace_pr.h b/arch/powerpc/kvm/trace_pr.h
index e1357cd8dc1..810507cb688 100644
--- a/arch/powerpc/kvm/trace_pr.h
+++ b/arch/powerpc/kvm/trace_pr.h
@@ -3,36 +3,13 @@
 #define _TRACE_KVM_PR_H
 
 #include <linux/tracepoint.h>
+#include "trace_book3s.h"
 
 #undef TRACE_SYSTEM
 #define TRACE_SYSTEM kvm_pr
 #define TRACE_INCLUDE_PATH .
 #define TRACE_INCLUDE_FILE trace_pr
 
-#define kvm_trace_symbol_exit \
-	{0x100, "SYSTEM_RESET"}, \
-	{0x200, "MACHINE_CHECK"}, \
-	{0x300, "DATA_STORAGE"}, \
-	{0x380, "DATA_SEGMENT"}, \
-	{0x400, "INST_STORAGE"}, \
-	{0x480, "INST_SEGMENT"}, \
-	{0x500, "EXTERNAL"}, \
-	{0x501, "EXTERNAL_LEVEL"}, \
-	{0x502, "EXTERNAL_HV"}, \
-	{0x600, "ALIGNMENT"}, \
-	{0x700, "PROGRAM"}, \
-	{0x800, "FP_UNAVAIL"}, \
-	{0x900, "DECREMENTER"}, \
-	{0x980, "HV_DECREMENTER"}, \
-	{0xc00, "SYSCALL"}, \
-	{0xd00, "TRACE"}, \
-	{0xe00, "H_DATA_STORAGE"}, \
-	{0xe20, "H_INST_STORAGE"}, \
-	{0xe40, "H_EMUL_ASSIST"}, \
-	{0xf00, "PERFMON"}, \
-	{0xf20, "ALTIVEC"}, \
-	{0xf40, "VSX"}
-
 TRACE_EVENT(kvm_book3s_reenter,
 	TP_PROTO(int r, struct kvm_vcpu *vcpu),
 	TP_ARGS(r, vcpu),
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 2175f911a73..9cba74d5d85 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -123,7 +123,7 @@ struct kvm_s390_sie_block {
 #define ICPT_PARTEXEC	0x38
 #define ICPT_IOINST	0x40
 	__u8	icptcode;		/* 0x0050 */
-	__u8	reserved51;		/* 0x0051 */
+	__u8	icptstatus;		/* 0x0051 */
 	__u16	ihcpu;			/* 0x0052 */
 	__u8	reserved54[2];		/* 0x0054 */
 	__u16	ipa;			/* 0x0056 */
@@ -226,10 +226,17 @@ struct kvm_vcpu_stat {
 	u32 instruction_sigp_sense_running;
 	u32 instruction_sigp_external_call;
 	u32 instruction_sigp_emergency;
+	u32 instruction_sigp_cond_emergency;
+	u32 instruction_sigp_start;
 	u32 instruction_sigp_stop;
+	u32 instruction_sigp_stop_store_status;
+	u32 instruction_sigp_store_status;
 	u32 instruction_sigp_arch;
 	u32 instruction_sigp_prefix;
 	u32 instruction_sigp_restart;
+	u32 instruction_sigp_init_cpu_reset;
+	u32 instruction_sigp_cpu_reset;
+	u32 instruction_sigp_unknown;
 	u32 diagnose_10;
 	u32 diagnose_44;
 	u32 diagnose_9c;
@@ -288,6 +295,79 @@ struct kvm_vcpu_stat {
 #define PGM_PER				0x80
 #define PGM_CRYPTO_OPERATION		0x119
 
+/* irq types in order of priority */
+enum irq_types {
+	IRQ_PEND_MCHK_EX = 0,
+	IRQ_PEND_SVC,
+	IRQ_PEND_PROG,
+	IRQ_PEND_MCHK_REP,
+	IRQ_PEND_EXT_IRQ_KEY,
+	IRQ_PEND_EXT_MALFUNC,
+	IRQ_PEND_EXT_EMERGENCY,
+	IRQ_PEND_EXT_EXTERNAL,
+	IRQ_PEND_EXT_CLOCK_COMP,
+	IRQ_PEND_EXT_CPU_TIMER,
+	IRQ_PEND_EXT_TIMING,
+	IRQ_PEND_EXT_SERVICE,
+	IRQ_PEND_EXT_HOST,
+	IRQ_PEND_PFAULT_INIT,
+	IRQ_PEND_PFAULT_DONE,
+	IRQ_PEND_VIRTIO,
+	IRQ_PEND_IO_ISC_0,
+	IRQ_PEND_IO_ISC_1,
+	IRQ_PEND_IO_ISC_2,
+	IRQ_PEND_IO_ISC_3,
+	IRQ_PEND_IO_ISC_4,
+	IRQ_PEND_IO_ISC_5,
+	IRQ_PEND_IO_ISC_6,
+	IRQ_PEND_IO_ISC_7,
+	IRQ_PEND_SIGP_STOP,
+	IRQ_PEND_RESTART,
+	IRQ_PEND_SET_PREFIX,
+	IRQ_PEND_COUNT
+};
+
+/*
+ * Repressible (non-floating) machine check interrupts
+ * subclass bits in MCIC
+ */
+#define MCHK_EXTD_BIT 58
+#define MCHK_DEGR_BIT 56
+#define MCHK_WARN_BIT 55
+#define MCHK_REP_MASK ((1UL << MCHK_DEGR_BIT) | \
+		       (1UL << MCHK_EXTD_BIT) | \
+		       (1UL << MCHK_WARN_BIT))
+
+/* Exigent machine check interrupts subclass bits in MCIC */
+#define MCHK_SD_BIT 63
+#define MCHK_PD_BIT 62
+#define MCHK_EX_MASK ((1UL << MCHK_SD_BIT) | (1UL << MCHK_PD_BIT))
+
+#define IRQ_PEND_EXT_MASK ((1UL << IRQ_PEND_EXT_IRQ_KEY)    | \
+			   (1UL << IRQ_PEND_EXT_CLOCK_COMP) | \
+			   (1UL << IRQ_PEND_EXT_CPU_TIMER)  | \
+			   (1UL << IRQ_PEND_EXT_MALFUNC)    | \
+			   (1UL << IRQ_PEND_EXT_EMERGENCY)  | \
+			   (1UL << IRQ_PEND_EXT_EXTERNAL)   | \
+			   (1UL << IRQ_PEND_EXT_TIMING)     | \
+			   (1UL << IRQ_PEND_EXT_HOST)       | \
+			   (1UL << IRQ_PEND_EXT_SERVICE)    | \
+			   (1UL << IRQ_PEND_VIRTIO)         | \
+			   (1UL << IRQ_PEND_PFAULT_INIT)    | \
+			   (1UL << IRQ_PEND_PFAULT_DONE))
+
+#define IRQ_PEND_IO_MASK ((1UL << IRQ_PEND_IO_ISC_0) | \
+			  (1UL << IRQ_PEND_IO_ISC_1) | \
+			  (1UL << IRQ_PEND_IO_ISC_2) | \
+			  (1UL << IRQ_PEND_IO_ISC_3) | \
+			  (1UL << IRQ_PEND_IO_ISC_4) | \
+			  (1UL << IRQ_PEND_IO_ISC_5) | \
+			  (1UL << IRQ_PEND_IO_ISC_6) | \
+			  (1UL << IRQ_PEND_IO_ISC_7))
+
+#define IRQ_PEND_MCHK_MASK ((1UL << IRQ_PEND_MCHK_REP) | \
+			    (1UL << IRQ_PEND_MCHK_EX))
+
 struct kvm_s390_interrupt_info {
 	struct list_head list;
 	u64	type;
@@ -306,14 +386,25 @@ struct kvm_s390_interrupt_info {
 #define ACTION_STORE_ON_STOP		(1<<0)
 #define ACTION_STOP_ON_STOP		(1<<1)
 
+struct kvm_s390_irq_payload {
+	struct kvm_s390_io_info io;
+	struct kvm_s390_ext_info ext;
+	struct kvm_s390_pgm_info pgm;
+	struct kvm_s390_emerg_info emerg;
+	struct kvm_s390_extcall_info extcall;
+	struct kvm_s390_prefix_info prefix;
+	struct kvm_s390_mchk_info mchk;
+};
+
 struct kvm_s390_local_interrupt {
 	spinlock_t lock;
-	struct list_head list;
-	atomic_t active;
 	struct kvm_s390_float_interrupt *float_int;
 	wait_queue_head_t *wq;
 	atomic_t *cpuflags;
 	unsigned int action_bits;
+	DECLARE_BITMAP(sigp_emerg_pending, KVM_MAX_VCPUS);
+	struct kvm_s390_irq_payload irq;
+	unsigned long pending_irqs;
 };
 
 struct kvm_s390_float_interrupt {
@@ -434,6 +525,8 @@ struct kvm_arch{
 	int user_cpu_state_ctrl;
 	struct s390_io_adapter *adapters[MAX_S390_IO_ADAPTERS];
 	wait_queue_head_t ipte_wq;
+	int ipte_lock_count;
+	struct mutex ipte_mutex;
 	spinlock_t start_stop_lock;
 	struct kvm_s390_crypto crypto;
 };
diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h
index e510b9460ef..3009c2ba46d 100644
--- a/arch/s390/include/asm/pgalloc.h
+++ b/arch/s390/include/asm/pgalloc.h
@@ -24,6 +24,7 @@ void page_table_free_rcu(struct mmu_gather *, unsigned long *, unsigned long);
 
 int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
 			  unsigned long key, bool nq);
+unsigned long get_guest_storage_key(struct mm_struct *mm, unsigned long addr);
 
 static inline void clear_table(unsigned long *s, unsigned long val, size_t n)
 {
diff --git a/arch/s390/include/asm/sigp.h b/arch/s390/include/asm/sigp.h
index 49576115dbb..fad4ae23ece 100644
--- a/arch/s390/include/asm/sigp.h
+++ b/arch/s390/include/asm/sigp.h
@@ -10,6 +10,7 @@
 #define SIGP_RESTART		      6
 #define SIGP_STOP_AND_STORE_STATUS    9
 #define SIGP_INITIAL_CPU_RESET	     11
+#define SIGP_CPU_RESET		     12
 #define SIGP_SET_PREFIX		     13
 #define SIGP_STORE_STATUS_AT_ADDRESS 14
 #define SIGP_SET_ARCHITECTURE	     18
diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c
index ca38139423a..437e6115927 100644
--- a/arch/s390/kernel/compat_linux.c
+++ b/arch/s390/kernel/compat_linux.c
@@ -249,7 +249,7 @@ COMPAT_SYSCALL_DEFINE2(s390_setgroups16, int, gidsetsize, u16 __user *, grouplis
 	struct group_info *group_info;
 	int retval;
 
-	if (!capable(CAP_SETGID))
+	if (!may_setgroups())
 		return -EPERM;
 	if ((unsigned)gidsetsize > NGROUPS_MAX)
 		return -EINVAL;
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index 0f961a1c64b..8b9ccf02a2c 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -207,8 +207,6 @@ union raddress {
 	unsigned long pfra : 52; /* Page-Frame Real Address */
 };
 
-static int ipte_lock_count;
-static DEFINE_MUTEX(ipte_mutex);
 
 int ipte_lock_held(struct kvm_vcpu *vcpu)
 {
@@ -216,47 +214,51 @@ int ipte_lock_held(struct kvm_vcpu *vcpu)
 
 	if (vcpu->arch.sie_block->eca & 1)
 		return ic->kh != 0;
-	return ipte_lock_count != 0;
+	return vcpu->kvm->arch.ipte_lock_count != 0;
 }
 
 static void ipte_lock_simple(struct kvm_vcpu *vcpu)
 {
 	union ipte_control old, new, *ic;
 
-	mutex_lock(&ipte_mutex);
-	ipte_lock_count++;
-	if (ipte_lock_count > 1)
+	mutex_lock(&vcpu->kvm->arch.ipte_mutex);
+	vcpu->kvm->arch.ipte_lock_count++;
+	if (vcpu->kvm->arch.ipte_lock_count > 1)
 		goto out;
 	ic = &vcpu->kvm->arch.sca->ipte_control;
 	do {
-		old = ACCESS_ONCE(*ic);
+		old = *ic;
+		barrier();
 		while (old.k) {
 			cond_resched();
-			old = ACCESS_ONCE(*ic);
+			old = *ic;
+			barrier();
 		}
 		new = old;
 		new.k = 1;
 	} while (cmpxchg(&ic->val, old.val, new.val) != old.val);
 out:
-	mutex_unlock(&ipte_mutex);
+	mutex_unlock(&vcpu->kvm->arch.ipte_mutex);
 }
 
 static void ipte_unlock_simple(struct kvm_vcpu *vcpu)
 {
 	union ipte_control old, new, *ic;
 
-	mutex_lock(&ipte_mutex);
-	ipte_lock_count--;
-	if (ipte_lock_count)
+	mutex_lock(&vcpu->kvm->arch.ipte_mutex);
+	vcpu->kvm->arch.ipte_lock_count--;
+	if (vcpu->kvm->arch.ipte_lock_count)
 		goto out;
 	ic = &vcpu->kvm->arch.sca->ipte_control;
 	do {
-		new = old = ACCESS_ONCE(*ic);
+		old = *ic;
+		barrier();
+		new = old;
 		new.k = 0;
 	} while (cmpxchg(&ic->val, old.val, new.val) != old.val);
 	wake_up(&vcpu->kvm->arch.ipte_wq);
 out:
-	mutex_unlock(&ipte_mutex);
+	mutex_unlock(&vcpu->kvm->arch.ipte_mutex);
 }
 
 static void ipte_lock_siif(struct kvm_vcpu *vcpu)
@@ -265,10 +267,12 @@ static void ipte_lock_siif(struct kvm_vcpu *vcpu)
 
 	ic = &vcpu->kvm->arch.sca->ipte_control;
 	do {
-		old = ACCESS_ONCE(*ic);
+		old = *ic;
+		barrier();
 		while (old.kg) {
 			cond_resched();
-			old = ACCESS_ONCE(*ic);
+			old = *ic;
+			barrier();
 		}
 		new = old;
 		new.k = 1;
@@ -282,7 +286,9 @@ static void ipte_unlock_siif(struct kvm_vcpu *vcpu)
 
 	ic = &vcpu->kvm->arch.sca->ipte_control;
 	do {
-		new = old = ACCESS_ONCE(*ic);
+		old = *ic;
+		barrier();
+		new = old;
 		new.kh--;
 		if (!new.kh)
 			new.k = 0;
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
index eaf46291d36..81c77ab8102 100644
--- a/arch/s390/kvm/intercept.c
+++ b/arch/s390/kvm/intercept.c
@@ -38,6 +38,19 @@ static const intercept_handler_t instruction_handlers[256] = {
 	[0xeb] = kvm_s390_handle_eb,
 };
 
+void kvm_s390_rewind_psw(struct kvm_vcpu *vcpu, int ilc)
+{
+	struct kvm_s390_sie_block *sie_block = vcpu->arch.sie_block;
+
+	/* Use the length of the EXECUTE instruction if necessary */
+	if (sie_block->icptstatus & 1) {
+		ilc = (sie_block->icptstatus >> 4) & 0x6;
+		if (!ilc)
+			ilc = 4;
+	}
+	sie_block->gpsw.addr = __rewind_psw(sie_block->gpsw, ilc);
+}
+
 static int handle_noop(struct kvm_vcpu *vcpu)
 {
 	switch (vcpu->arch.sie_block->icptcode) {
@@ -244,7 +257,7 @@ static int handle_instruction_and_prog(struct kvm_vcpu *vcpu)
 static int handle_external_interrupt(struct kvm_vcpu *vcpu)
 {
 	u16 eic = vcpu->arch.sie_block->eic;
-	struct kvm_s390_interrupt irq;
+	struct kvm_s390_irq irq;
 	psw_t newpsw;
 	int rc;
 
@@ -269,7 +282,7 @@ static int handle_external_interrupt(struct kvm_vcpu *vcpu)
 		if (kvm_s390_si_ext_call_pending(vcpu))
 			return 0;
 		irq.type = KVM_S390_INT_EXTERNAL_CALL;
-		irq.parm = vcpu->arch.sie_block->extcpuaddr;
+		irq.u.extcall.code = vcpu->arch.sie_block->extcpuaddr;
 		break;
 	default:
 		return -EOPNOTSUPP;
@@ -288,7 +301,6 @@ static int handle_external_interrupt(struct kvm_vcpu *vcpu)
  */
 static int handle_mvpg_pei(struct kvm_vcpu *vcpu)
 {
-	psw_t *psw = &vcpu->arch.sie_block->gpsw;
 	unsigned long srcaddr, dstaddr;
 	int reg1, reg2, rc;
 
@@ -310,7 +322,7 @@ static int handle_mvpg_pei(struct kvm_vcpu *vcpu)
 	if (rc != 0)
 		return rc;
 
-	psw->addr = __rewind_psw(*psw, 4);
+	kvm_s390_rewind_psw(vcpu, 4);
 
 	return 0;
 }
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index a39838457f0..f00f31e66cd 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -16,6 +16,7 @@
 #include <linux/mmu_context.h>
 #include <linux/signal.h>
 #include <linux/slab.h>
+#include <linux/bitmap.h>
 #include <asm/asm-offsets.h>
 #include <asm/uaccess.h>
 #include "kvm-s390.h"
@@ -27,8 +28,8 @@
 #define IOINT_CSSID_MASK 0x03fc0000
 #define IOINT_AI_MASK 0x04000000
 #define PFAULT_INIT 0x0600
-
-static int __must_check deliver_ckc_interrupt(struct kvm_vcpu *vcpu);
+#define PFAULT_DONE 0x0680
+#define VIRTIO_PARAM 0x0d00
 
 static int is_ioint(u64 type)
 {
@@ -136,6 +137,31 @@ static int __must_check __interrupt_is_deliverable(struct kvm_vcpu *vcpu,
 	return 0;
 }
 
+static inline unsigned long pending_local_irqs(struct kvm_vcpu *vcpu)
+{
+	return vcpu->arch.local_int.pending_irqs;
+}
+
+static unsigned long deliverable_local_irqs(struct kvm_vcpu *vcpu)
+{
+	unsigned long active_mask = pending_local_irqs(vcpu);
+
+	if (psw_extint_disabled(vcpu))
+		active_mask &= ~IRQ_PEND_EXT_MASK;
+	if (!(vcpu->arch.sie_block->gcr[0] & 0x2000ul))
+		__clear_bit(IRQ_PEND_EXT_EXTERNAL, &active_mask);
+	if (!(vcpu->arch.sie_block->gcr[0] & 0x4000ul))
+		__clear_bit(IRQ_PEND_EXT_EMERGENCY, &active_mask);
+	if (!(vcpu->arch.sie_block->gcr[0] & 0x800ul))
+		__clear_bit(IRQ_PEND_EXT_CLOCK_COMP, &active_mask);
+	if (!(vcpu->arch.sie_block->gcr[0] & 0x400ul))
+		__clear_bit(IRQ_PEND_EXT_CPU_TIMER, &active_mask);
+	if (psw_mchk_disabled(vcpu))
+		active_mask &= ~IRQ_PEND_MCHK_MASK;
+
+	return active_mask;
+}
+
 static void __set_cpu_idle(struct kvm_vcpu *vcpu)
 {
 	atomic_set_mask(CPUSTAT_WAIT, &vcpu->arch.sie_block->cpuflags);
@@ -170,26 +196,45 @@ static void __set_cpuflag(struct kvm_vcpu *vcpu, u32 flag)
 	atomic_set_mask(flag, &vcpu->arch.sie_block->cpuflags);
 }
 
+static void set_intercept_indicators_ext(struct kvm_vcpu *vcpu)
+{
+	if (!(pending_local_irqs(vcpu) & IRQ_PEND_EXT_MASK))
+		return;
+	if (psw_extint_disabled(vcpu))
+		__set_cpuflag(vcpu, CPUSTAT_EXT_INT);
+	else
+		vcpu->arch.sie_block->lctl |= LCTL_CR0;
+}
+
+static void set_intercept_indicators_mchk(struct kvm_vcpu *vcpu)
+{
+	if (!(pending_local_irqs(vcpu) & IRQ_PEND_MCHK_MASK))
+		return;
+	if (psw_mchk_disabled(vcpu))
+		vcpu->arch.sie_block->ictl |= ICTL_LPSW;
+	else
+		vcpu->arch.sie_block->lctl |= LCTL_CR14;
+}
+
+/* Set interception request for non-deliverable local interrupts */
+static void set_intercept_indicators_local(struct kvm_vcpu *vcpu)
+{
+	set_intercept_indicators_ext(vcpu);
+	set_intercept_indicators_mchk(vcpu);
+}
+
 static void __set_intercept_indicator(struct kvm_vcpu *vcpu,
 				      struct kvm_s390_interrupt_info *inti)
 {
 	switch (inti->type) {
-	case KVM_S390_INT_EXTERNAL_CALL:
-	case KVM_S390_INT_EMERGENCY:
 	case KVM_S390_INT_SERVICE:
-	case KVM_S390_INT_PFAULT_INIT:
 	case KVM_S390_INT_PFAULT_DONE:
 	case KVM_S390_INT_VIRTIO:
-	case KVM_S390_INT_CLOCK_COMP:
-	case KVM_S390_INT_CPU_TIMER:
 		if (psw_extint_disabled(vcpu))
 			__set_cpuflag(vcpu, CPUSTAT_EXT_INT);
 		else
 			vcpu->arch.sie_block->lctl |= LCTL_CR0;
 		break;
-	case KVM_S390_SIGP_STOP:
-		__set_cpuflag(vcpu, CPUSTAT_STOP_INT);
-		break;
 	case KVM_S390_MCHK:
 		if (psw_mchk_disabled(vcpu))
 			vcpu->arch.sie_block->ictl |= ICTL_LPSW;
@@ -226,13 +271,236 @@ static u16 get_ilc(struct kvm_vcpu *vcpu)
 	}
 }
 
-static int __must_check __deliver_prog_irq(struct kvm_vcpu *vcpu,
-			      struct kvm_s390_pgm_info *pgm_info)
+static int __must_check __deliver_cpu_timer(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	int rc;
+
+	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_INT_CPU_TIMER,
+					 0, 0);
+
+	rc  = put_guest_lc(vcpu, EXT_IRQ_CPU_TIMER,
+			   (u16 *)__LC_EXT_INT_CODE);
+	rc |= put_guest_lc(vcpu, 0, (u16 *)__LC_EXT_CPU_ADDR);
+	rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+			     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
+			    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	clear_bit(IRQ_PEND_EXT_CPU_TIMER, &li->pending_irqs);
+	return rc ? -EFAULT : 0;
+}
+
+static int __must_check __deliver_ckc(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	int rc;
+
+	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_INT_CLOCK_COMP,
+					 0, 0);
+
+	rc  = put_guest_lc(vcpu, EXT_IRQ_CLK_COMP,
+			   (u16 __user *)__LC_EXT_INT_CODE);
+	rc |= put_guest_lc(vcpu, 0, (u16 *)__LC_EXT_CPU_ADDR);
+	rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+			     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
+			    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	clear_bit(IRQ_PEND_EXT_CLOCK_COMP, &li->pending_irqs);
+	return rc ? -EFAULT : 0;
+}
+
+static int __must_check __deliver_pfault_init(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	struct kvm_s390_ext_info ext;
+	int rc;
+
+	spin_lock(&li->lock);
+	ext = li->irq.ext;
+	clear_bit(IRQ_PEND_PFAULT_INIT, &li->pending_irqs);
+	li->irq.ext.ext_params2 = 0;
+	spin_unlock(&li->lock);
+
+	VCPU_EVENT(vcpu, 4, "interrupt: pfault init parm:%x,parm64:%llx",
+		   0, ext.ext_params2);
+	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
+					 KVM_S390_INT_PFAULT_INIT,
+					 0, ext.ext_params2);
+
+	rc  = put_guest_lc(vcpu, EXT_IRQ_CP_SERVICE, (u16 *) __LC_EXT_INT_CODE);
+	rc |= put_guest_lc(vcpu, PFAULT_INIT, (u16 *) __LC_EXT_CPU_ADDR);
+	rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+			     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
+			    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	rc |= put_guest_lc(vcpu, ext.ext_params2, (u64 *) __LC_EXT_PARAMS2);
+	return rc ? -EFAULT : 0;
+}
+
+static int __must_check __deliver_machine_check(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	struct kvm_s390_mchk_info mchk;
+	int rc;
+
+	spin_lock(&li->lock);
+	mchk = li->irq.mchk;
+	/*
+	 * If there was an exigent machine check pending, then any repressible
+	 * machine checks that might have been pending are indicated along
+	 * with it, so always clear both bits
+	 */
+	clear_bit(IRQ_PEND_MCHK_EX, &li->pending_irqs);
+	clear_bit(IRQ_PEND_MCHK_REP, &li->pending_irqs);
+	memset(&li->irq.mchk, 0, sizeof(mchk));
+	spin_unlock(&li->lock);
+
+	VCPU_EVENT(vcpu, 4, "interrupt: machine check mcic=%llx",
+		   mchk.mcic);
+	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_MCHK,
+					 mchk.cr14, mchk.mcic);
+
+	rc  = kvm_s390_vcpu_store_status(vcpu, KVM_S390_STORE_STATUS_PREFIXED);
+	rc |= put_guest_lc(vcpu, mchk.mcic,
+			   (u64 __user *) __LC_MCCK_CODE);
+	rc |= put_guest_lc(vcpu, mchk.failing_storage_address,
+			   (u64 __user *) __LC_MCCK_FAIL_STOR_ADDR);
+	rc |= write_guest_lc(vcpu, __LC_PSW_SAVE_AREA,
+			     &mchk.fixed_logout, sizeof(mchk.fixed_logout));
+	rc |= write_guest_lc(vcpu, __LC_MCK_OLD_PSW,
+			     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	rc |= read_guest_lc(vcpu, __LC_MCK_NEW_PSW,
+			    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	return rc ? -EFAULT : 0;
+}
+
+static int __must_check __deliver_restart(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	int rc;
+
+	VCPU_EVENT(vcpu, 4, "%s", "interrupt: cpu restart");
+	vcpu->stat.deliver_restart_signal++;
+	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_RESTART, 0, 0);
+
+	rc  = write_guest_lc(vcpu,
+			     offsetof(struct _lowcore, restart_old_psw),
+			     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	rc |= read_guest_lc(vcpu, offsetof(struct _lowcore, restart_psw),
+			    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	clear_bit(IRQ_PEND_RESTART, &li->pending_irqs);
+	return rc ? -EFAULT : 0;
+}
+
+static int __must_check __deliver_stop(struct kvm_vcpu *vcpu)
+{
+	VCPU_EVENT(vcpu, 4, "%s", "interrupt: cpu stop");
+	vcpu->stat.deliver_stop_signal++;
+	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_SIGP_STOP,
+					 0, 0);
+
+	__set_cpuflag(vcpu, CPUSTAT_STOP_INT);
+	clear_bit(IRQ_PEND_SIGP_STOP, &vcpu->arch.local_int.pending_irqs);
+	return 0;
+}
+
+static int __must_check __deliver_set_prefix(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	struct kvm_s390_prefix_info prefix;
+
+	spin_lock(&li->lock);
+	prefix = li->irq.prefix;
+	li->irq.prefix.address = 0;
+	clear_bit(IRQ_PEND_SET_PREFIX, &li->pending_irqs);
+	spin_unlock(&li->lock);
+
+	VCPU_EVENT(vcpu, 4, "interrupt: set prefix to %x", prefix.address);
+	vcpu->stat.deliver_prefix_signal++;
+	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
+					 KVM_S390_SIGP_SET_PREFIX,
+					 prefix.address, 0);
+
+	kvm_s390_set_prefix(vcpu, prefix.address);
+	return 0;
+}
+
+static int __must_check __deliver_emergency_signal(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	int rc;
+	int cpu_addr;
+
+	spin_lock(&li->lock);
+	cpu_addr = find_first_bit(li->sigp_emerg_pending, KVM_MAX_VCPUS);
+	clear_bit(cpu_addr, li->sigp_emerg_pending);
+	if (bitmap_empty(li->sigp_emerg_pending, KVM_MAX_VCPUS))
+		clear_bit(IRQ_PEND_EXT_EMERGENCY, &li->pending_irqs);
+	spin_unlock(&li->lock);
+
+	VCPU_EVENT(vcpu, 4, "%s", "interrupt: sigp emerg");
+	vcpu->stat.deliver_emergency_signal++;
+	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_INT_EMERGENCY,
+					 cpu_addr, 0);
+
+	rc  = put_guest_lc(vcpu, EXT_IRQ_EMERGENCY_SIG,
+			   (u16 *)__LC_EXT_INT_CODE);
+	rc |= put_guest_lc(vcpu, cpu_addr, (u16 *)__LC_EXT_CPU_ADDR);
+	rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+			     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
+			    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	return rc ? -EFAULT : 0;
+}
+
+static int __must_check __deliver_external_call(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	struct kvm_s390_extcall_info extcall;
+	int rc;
+
+	spin_lock(&li->lock);
+	extcall = li->irq.extcall;
+	li->irq.extcall.code = 0;
+	clear_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs);
+	spin_unlock(&li->lock);
+
+	VCPU_EVENT(vcpu, 4, "%s", "interrupt: sigp ext call");
+	vcpu->stat.deliver_external_call++;
+	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
+					 KVM_S390_INT_EXTERNAL_CALL,
+					 extcall.code, 0);
+
+	rc  = put_guest_lc(vcpu, EXT_IRQ_EXTERNAL_CALL,
+			   (u16 *)__LC_EXT_INT_CODE);
+	rc |= put_guest_lc(vcpu, extcall.code, (u16 *)__LC_EXT_CPU_ADDR);
+	rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+			     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW, &vcpu->arch.sie_block->gpsw,
+			    sizeof(psw_t));
+	return rc ? -EFAULT : 0;
+}
+
+static int __must_check __deliver_prog(struct kvm_vcpu *vcpu)
 {
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	struct kvm_s390_pgm_info pgm_info;
 	int rc = 0;
 	u16 ilc = get_ilc(vcpu);
 
-	switch (pgm_info->code & ~PGM_PER) {
+	spin_lock(&li->lock);
+	pgm_info = li->irq.pgm;
+	clear_bit(IRQ_PEND_PROG, &li->pending_irqs);
+	memset(&li->irq.pgm, 0, sizeof(pgm_info));
+	spin_unlock(&li->lock);
+
+	VCPU_EVENT(vcpu, 4, "interrupt: pgm check code:%x, ilc:%x",
+		   pgm_info.code, ilc);
+	vcpu->stat.deliver_program_int++;
+	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_PROGRAM_INT,
+					 pgm_info.code, 0);
+
+	switch (pgm_info.code & ~PGM_PER) {
 	case PGM_AFX_TRANSLATION:
 	case PGM_ASX_TRANSLATION:
 	case PGM_EX_TRANSLATION:
@@ -243,7 +511,7 @@ static int __must_check __deliver_prog_irq(struct kvm_vcpu *vcpu,
 	case PGM_PRIMARY_AUTHORITY:
 	case PGM_SECONDARY_AUTHORITY:
 	case PGM_SPACE_SWITCH:
-		rc = put_guest_lc(vcpu, pgm_info->trans_exc_code,
+		rc = put_guest_lc(vcpu, pgm_info.trans_exc_code,
 				  (u64 *)__LC_TRANS_EXC_CODE);
 		break;
 	case PGM_ALEN_TRANSLATION:
@@ -252,7 +520,7 @@ static int __must_check __deliver_prog_irq(struct kvm_vcpu *vcpu,
 	case PGM_ASTE_SEQUENCE:
 	case PGM_ASTE_VALIDITY:
 	case PGM_EXTENDED_AUTHORITY:
-		rc = put_guest_lc(vcpu, pgm_info->exc_access_id,
+		rc = put_guest_lc(vcpu, pgm_info.exc_access_id,
 				  (u8 *)__LC_EXC_ACCESS_ID);
 		break;
 	case PGM_ASCE_TYPE:
@@ -261,247 +529,208 @@ static int __must_check __deliver_prog_irq(struct kvm_vcpu *vcpu,
 	case PGM_REGION_SECOND_TRANS:
 	case PGM_REGION_THIRD_TRANS:
 	case PGM_SEGMENT_TRANSLATION:
-		rc = put_guest_lc(vcpu, pgm_info->trans_exc_code,
+		rc = put_guest_lc(vcpu, pgm_info.trans_exc_code,
 				  (u64 *)__LC_TRANS_EXC_CODE);
-		rc |= put_guest_lc(vcpu, pgm_info->exc_access_id,
+		rc |= put_guest_lc(vcpu, pgm_info.exc_access_id,
 				   (u8 *)__LC_EXC_ACCESS_ID);
-		rc |= put_guest_lc(vcpu, pgm_info->op_access_id,
+		rc |= put_guest_lc(vcpu, pgm_info.op_access_id,
 				   (u8 *)__LC_OP_ACCESS_ID);
 		break;
 	case PGM_MONITOR:
-		rc = put_guest_lc(vcpu, pgm_info->mon_class_nr,
-				  (u64 *)__LC_MON_CLASS_NR);
-		rc |= put_guest_lc(vcpu, pgm_info->mon_code,
+		rc = put_guest_lc(vcpu, pgm_info.mon_class_nr,
+				  (u16 *)__LC_MON_CLASS_NR);
+		rc |= put_guest_lc(vcpu, pgm_info.mon_code,
 				   (u64 *)__LC_MON_CODE);
 		break;
 	case PGM_DATA:
-		rc = put_guest_lc(vcpu, pgm_info->data_exc_code,
+		rc = put_guest_lc(vcpu, pgm_info.data_exc_code,
 				  (u32 *)__LC_DATA_EXC_CODE);
 		break;
 	case PGM_PROTECTION:
-		rc = put_guest_lc(vcpu, pgm_info->trans_exc_code,
+		rc = put_guest_lc(vcpu, pgm_info.trans_exc_code,
 				  (u64 *)__LC_TRANS_EXC_CODE);
-		rc |= put_guest_lc(vcpu, pgm_info->exc_access_id,
+		rc |= put_guest_lc(vcpu, pgm_info.exc_access_id,
 				   (u8 *)__LC_EXC_ACCESS_ID);
 		break;
 	}
 
-	if (pgm_info->code & PGM_PER) {
-		rc |= put_guest_lc(vcpu, pgm_info->per_code,
+	if (pgm_info.code & PGM_PER) {
+		rc |= put_guest_lc(vcpu, pgm_info.per_code,
 				   (u8 *) __LC_PER_CODE);
-		rc |= put_guest_lc(vcpu, pgm_info->per_atmid,
+		rc |= put_guest_lc(vcpu, pgm_info.per_atmid,
 				   (u8 *)__LC_PER_ATMID);
-		rc |= put_guest_lc(vcpu, pgm_info->per_address,
+		rc |= put_guest_lc(vcpu, pgm_info.per_address,
 				   (u64 *) __LC_PER_ADDRESS);
-		rc |= put_guest_lc(vcpu, pgm_info->per_access_id,
+		rc |= put_guest_lc(vcpu, pgm_info.per_access_id,
 				   (u8 *) __LC_PER_ACCESS_ID);
 	}
 
 	rc |= put_guest_lc(vcpu, ilc, (u16 *) __LC_PGM_ILC);
-	rc |= put_guest_lc(vcpu, pgm_info->code,
+	rc |= put_guest_lc(vcpu, pgm_info.code,
 			   (u16 *)__LC_PGM_INT_CODE);
 	rc |= write_guest_lc(vcpu, __LC_PGM_OLD_PSW,
 			     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
 	rc |= read_guest_lc(vcpu, __LC_PGM_NEW_PSW,
 			    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	return rc ? -EFAULT : 0;
+}
 
-	return rc;
+static int __must_check __deliver_service(struct kvm_vcpu *vcpu,
+					  struct kvm_s390_interrupt_info *inti)
+{
+	int rc;
+
+	VCPU_EVENT(vcpu, 4, "interrupt: sclp parm:%x",
+		   inti->ext.ext_params);
+	vcpu->stat.deliver_service_signal++;
+	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
+					 inti->ext.ext_params, 0);
+
+	rc  = put_guest_lc(vcpu, EXT_IRQ_SERVICE_SIG, (u16 *)__LC_EXT_INT_CODE);
+	rc |= put_guest_lc(vcpu, 0, (u16 *)__LC_EXT_CPU_ADDR);
+	rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+			     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
+			    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	rc |= put_guest_lc(vcpu, inti->ext.ext_params,
+			   (u32 *)__LC_EXT_PARAMS);
+	return rc ? -EFAULT : 0;
 }
 
-static int __must_check __do_deliver_interrupt(struct kvm_vcpu *vcpu,
-				   struct kvm_s390_interrupt_info *inti)
+static int __must_check __deliver_pfault_done(struct kvm_vcpu *vcpu,
+					   struct kvm_s390_interrupt_info *inti)
 {
-	const unsigned short table[] = { 2, 4, 4, 6 };
-	int rc = 0;
+	int rc;
+
+	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
+					 KVM_S390_INT_PFAULT_DONE, 0,
+					 inti->ext.ext_params2);
+
+	rc  = put_guest_lc(vcpu, EXT_IRQ_CP_SERVICE, (u16 *)__LC_EXT_INT_CODE);
+	rc |= put_guest_lc(vcpu, PFAULT_DONE, (u16 *)__LC_EXT_CPU_ADDR);
+	rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+			     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
+			    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	rc |= put_guest_lc(vcpu, inti->ext.ext_params2,
+			   (u64 *)__LC_EXT_PARAMS2);
+	return rc ? -EFAULT : 0;
+}
+
+static int __must_check __deliver_virtio(struct kvm_vcpu *vcpu,
+					 struct kvm_s390_interrupt_info *inti)
+{
+	int rc;
+
+	VCPU_EVENT(vcpu, 4, "interrupt: virtio parm:%x,parm64:%llx",
+		   inti->ext.ext_params, inti->ext.ext_params2);
+	vcpu->stat.deliver_virtio_interrupt++;
+	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
+					 inti->ext.ext_params,
+					 inti->ext.ext_params2);
+
+	rc  = put_guest_lc(vcpu, EXT_IRQ_CP_SERVICE, (u16 *)__LC_EXT_INT_CODE);
+	rc |= put_guest_lc(vcpu, VIRTIO_PARAM, (u16 *)__LC_EXT_CPU_ADDR);
+	rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+			     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
+			    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	rc |= put_guest_lc(vcpu, inti->ext.ext_params,
+			   (u32 *)__LC_EXT_PARAMS);
+	rc |= put_guest_lc(vcpu, inti->ext.ext_params2,
+			   (u64 *)__LC_EXT_PARAMS2);
+	return rc ? -EFAULT : 0;
+}
+
+static int __must_check __deliver_io(struct kvm_vcpu *vcpu,
+				     struct kvm_s390_interrupt_info *inti)
+{
+	int rc;
+
+	VCPU_EVENT(vcpu, 4, "interrupt: I/O %llx", inti->type);
+	vcpu->stat.deliver_io_int++;
+	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
+					 ((__u32)inti->io.subchannel_id << 16) |
+						inti->io.subchannel_nr,
+					 ((__u64)inti->io.io_int_parm << 32) |
+						inti->io.io_int_word);
+
+	rc  = put_guest_lc(vcpu, inti->io.subchannel_id,
+			   (u16 *)__LC_SUBCHANNEL_ID);
+	rc |= put_guest_lc(vcpu, inti->io.subchannel_nr,
+			   (u16 *)__LC_SUBCHANNEL_NR);
+	rc |= put_guest_lc(vcpu, inti->io.io_int_parm,
+			   (u32 *)__LC_IO_INT_PARM);
+	rc |= put_guest_lc(vcpu, inti->io.io_int_word,
+			   (u32 *)__LC_IO_INT_WORD);
+	rc |= write_guest_lc(vcpu, __LC_IO_OLD_PSW,
+			     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	rc |= read_guest_lc(vcpu, __LC_IO_NEW_PSW,
+			    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	return rc ? -EFAULT : 0;
+}
+
+static int __must_check __deliver_mchk_floating(struct kvm_vcpu *vcpu,
+					   struct kvm_s390_interrupt_info *inti)
+{
+	struct kvm_s390_mchk_info *mchk = &inti->mchk;
+	int rc;
+
+	VCPU_EVENT(vcpu, 4, "interrupt: machine check mcic=%llx",
+		   mchk->mcic);
+	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_MCHK,
+					 mchk->cr14, mchk->mcic);
+
+	rc  = kvm_s390_vcpu_store_status(vcpu, KVM_S390_STORE_STATUS_PREFIXED);
+	rc |= put_guest_lc(vcpu, mchk->mcic,
+			(u64 __user *) __LC_MCCK_CODE);
+	rc |= put_guest_lc(vcpu, mchk->failing_storage_address,
+			(u64 __user *) __LC_MCCK_FAIL_STOR_ADDR);
+	rc |= write_guest_lc(vcpu, __LC_PSW_SAVE_AREA,
+			     &mchk->fixed_logout, sizeof(mchk->fixed_logout));
+	rc |= write_guest_lc(vcpu, __LC_MCK_OLD_PSW,
+			     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	rc |= read_guest_lc(vcpu, __LC_MCK_NEW_PSW,
+			    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	return rc ? -EFAULT : 0;
+}
+
+typedef int (*deliver_irq_t)(struct kvm_vcpu *vcpu);
+
+static const deliver_irq_t deliver_irq_funcs[] = {
+	[IRQ_PEND_MCHK_EX]        = __deliver_machine_check,
+	[IRQ_PEND_PROG]           = __deliver_prog,
+	[IRQ_PEND_EXT_EMERGENCY]  = __deliver_emergency_signal,
+	[IRQ_PEND_EXT_EXTERNAL]   = __deliver_external_call,
+	[IRQ_PEND_EXT_CLOCK_COMP] = __deliver_ckc,
+	[IRQ_PEND_EXT_CPU_TIMER]  = __deliver_cpu_timer,
+	[IRQ_PEND_RESTART]        = __deliver_restart,
+	[IRQ_PEND_SIGP_STOP]      = __deliver_stop,
+	[IRQ_PEND_SET_PREFIX]     = __deliver_set_prefix,
+	[IRQ_PEND_PFAULT_INIT]    = __deliver_pfault_init,
+};
+
+static int __must_check __deliver_floating_interrupt(struct kvm_vcpu *vcpu,
+					   struct kvm_s390_interrupt_info *inti)
+{
+	int rc;
 
 	switch (inti->type) {
-	case KVM_S390_INT_EMERGENCY:
-		VCPU_EVENT(vcpu, 4, "%s", "interrupt: sigp emerg");
-		vcpu->stat.deliver_emergency_signal++;
-		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
-						 inti->emerg.code, 0);
-		rc  = put_guest_lc(vcpu, 0x1201, (u16 *)__LC_EXT_INT_CODE);
-		rc |= put_guest_lc(vcpu, inti->emerg.code,
-				   (u16 *)__LC_EXT_CPU_ADDR);
-		rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
-				     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-		rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
-				    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-		break;
-	case KVM_S390_INT_EXTERNAL_CALL:
-		VCPU_EVENT(vcpu, 4, "%s", "interrupt: sigp ext call");
-		vcpu->stat.deliver_external_call++;
-		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
-						 inti->extcall.code, 0);
-		rc  = put_guest_lc(vcpu, 0x1202, (u16 *)__LC_EXT_INT_CODE);
-		rc |= put_guest_lc(vcpu, inti->extcall.code,
-				   (u16 *)__LC_EXT_CPU_ADDR);
-		rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
-				     &vcpu->arch.sie_block->gpsw,
-				     sizeof(psw_t));
-		rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
-				    &vcpu->arch.sie_block->gpsw,
-				    sizeof(psw_t));
-		break;
-	case KVM_S390_INT_CLOCK_COMP:
-		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
-						 inti->ext.ext_params, 0);
-		rc = deliver_ckc_interrupt(vcpu);
-		break;
-	case KVM_S390_INT_CPU_TIMER:
-		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
-						 inti->ext.ext_params, 0);
-		rc  = put_guest_lc(vcpu, EXT_IRQ_CPU_TIMER,
-				   (u16 *)__LC_EXT_INT_CODE);
-		rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
-				     &vcpu->arch.sie_block->gpsw,
-				     sizeof(psw_t));
-		rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
-				    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-		rc |= put_guest_lc(vcpu, inti->ext.ext_params,
-				   (u32 *)__LC_EXT_PARAMS);
-		break;
 	case KVM_S390_INT_SERVICE:
-		VCPU_EVENT(vcpu, 4, "interrupt: sclp parm:%x",
-			   inti->ext.ext_params);
-		vcpu->stat.deliver_service_signal++;
-		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
-						 inti->ext.ext_params, 0);
-		rc  = put_guest_lc(vcpu, 0x2401, (u16 *)__LC_EXT_INT_CODE);
-		rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
-				     &vcpu->arch.sie_block->gpsw,
-				     sizeof(psw_t));
-		rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
-				    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-		rc |= put_guest_lc(vcpu, inti->ext.ext_params,
-				   (u32 *)__LC_EXT_PARAMS);
-		break;
-	case KVM_S390_INT_PFAULT_INIT:
-		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, 0,
-						 inti->ext.ext_params2);
-		rc  = put_guest_lc(vcpu, EXT_IRQ_CP_SERVICE,
-				   (u16 *) __LC_EXT_INT_CODE);
-		rc |= put_guest_lc(vcpu, PFAULT_INIT, (u16 *) __LC_EXT_CPU_ADDR);
-		rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
-				     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-		rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
-				    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-		rc |= put_guest_lc(vcpu, inti->ext.ext_params2,
-				   (u64 *) __LC_EXT_PARAMS2);
+		rc = __deliver_service(vcpu, inti);
 		break;
 	case KVM_S390_INT_PFAULT_DONE:
-		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, 0,
-						 inti->ext.ext_params2);
-		rc  = put_guest_lc(vcpu, 0x2603, (u16 *)__LC_EXT_INT_CODE);
-		rc |= put_guest_lc(vcpu, 0x0680, (u16 *)__LC_EXT_CPU_ADDR);
-		rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
-				     &vcpu->arch.sie_block->gpsw,
-				     sizeof(psw_t));
-		rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
-				    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-		rc |= put_guest_lc(vcpu, inti->ext.ext_params2,
-				   (u64 *)__LC_EXT_PARAMS2);
+		rc = __deliver_pfault_done(vcpu, inti);
 		break;
 	case KVM_S390_INT_VIRTIO:
-		VCPU_EVENT(vcpu, 4, "interrupt: virtio parm:%x,parm64:%llx",
-			   inti->ext.ext_params, inti->ext.ext_params2);
-		vcpu->stat.deliver_virtio_interrupt++;
-		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
-						 inti->ext.ext_params,
-						 inti->ext.ext_params2);
-		rc  = put_guest_lc(vcpu, 0x2603, (u16 *)__LC_EXT_INT_CODE);
-		rc |= put_guest_lc(vcpu, 0x0d00, (u16 *)__LC_EXT_CPU_ADDR);
-		rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
-				     &vcpu->arch.sie_block->gpsw,
-				     sizeof(psw_t));
-		rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
-				    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-		rc |= put_guest_lc(vcpu, inti->ext.ext_params,
-				   (u32 *)__LC_EXT_PARAMS);
-		rc |= put_guest_lc(vcpu, inti->ext.ext_params2,
-				   (u64 *)__LC_EXT_PARAMS2);
-		break;
-	case KVM_S390_SIGP_STOP:
-		VCPU_EVENT(vcpu, 4, "%s", "interrupt: cpu stop");
-		vcpu->stat.deliver_stop_signal++;
-		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
-						 0, 0);
-		__set_intercept_indicator(vcpu, inti);
-		break;
-
-	case KVM_S390_SIGP_SET_PREFIX:
-		VCPU_EVENT(vcpu, 4, "interrupt: set prefix to %x",
-			   inti->prefix.address);
-		vcpu->stat.deliver_prefix_signal++;
-		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
-						 inti->prefix.address, 0);
-		kvm_s390_set_prefix(vcpu, inti->prefix.address);
-		break;
-
-	case KVM_S390_RESTART:
-		VCPU_EVENT(vcpu, 4, "%s", "interrupt: cpu restart");
-		vcpu->stat.deliver_restart_signal++;
-		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
-						 0, 0);
-		rc  = write_guest_lc(vcpu,
-				     offsetof(struct _lowcore, restart_old_psw),
-				     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-		rc |= read_guest_lc(vcpu, offsetof(struct _lowcore, restart_psw),
-				    &vcpu->arch.sie_block->gpsw,
-				    sizeof(psw_t));
+		rc = __deliver_virtio(vcpu, inti);
 		break;
-	case KVM_S390_PROGRAM_INT:
-		VCPU_EVENT(vcpu, 4, "interrupt: pgm check code:%x, ilc:%x",
-			   inti->pgm.code,
-			   table[vcpu->arch.sie_block->ipa >> 14]);
-		vcpu->stat.deliver_program_int++;
-		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
-						 inti->pgm.code, 0);
-		rc = __deliver_prog_irq(vcpu, &inti->pgm);
-		break;
-
 	case KVM_S390_MCHK:
-		VCPU_EVENT(vcpu, 4, "interrupt: machine check mcic=%llx",
-			   inti->mchk.mcic);
-		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
-						 inti->mchk.cr14,
-						 inti->mchk.mcic);
-		rc  = kvm_s390_vcpu_store_status(vcpu,
-						 KVM_S390_STORE_STATUS_PREFIXED);
-		rc |= put_guest_lc(vcpu, inti->mchk.mcic, (u64 *)__LC_MCCK_CODE);
-		rc |= write_guest_lc(vcpu, __LC_MCK_OLD_PSW,
-				     &vcpu->arch.sie_block->gpsw,
-				     sizeof(psw_t));
-		rc |= read_guest_lc(vcpu, __LC_MCK_NEW_PSW,
-				    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+		rc = __deliver_mchk_floating(vcpu, inti);
 		break;
-
 	case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
-	{
-		__u32 param0 = ((__u32)inti->io.subchannel_id << 16) |
-			inti->io.subchannel_nr;
-		__u64 param1 = ((__u64)inti->io.io_int_parm << 32) |
-			inti->io.io_int_word;
-		VCPU_EVENT(vcpu, 4, "interrupt: I/O %llx", inti->type);
-		vcpu->stat.deliver_io_int++;
-		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
-						 param0, param1);
-		rc  = put_guest_lc(vcpu, inti->io.subchannel_id,
-				   (u16 *)__LC_SUBCHANNEL_ID);
-		rc |= put_guest_lc(vcpu, inti->io.subchannel_nr,
-				   (u16 *)__LC_SUBCHANNEL_NR);
-		rc |= put_guest_lc(vcpu, inti->io.io_int_parm,
-				   (u32 *)__LC_IO_INT_PARM);
-		rc |= put_guest_lc(vcpu, inti->io.io_int_word,
-				   (u32 *)__LC_IO_INT_WORD);
-		rc |= write_guest_lc(vcpu, __LC_IO_OLD_PSW,
-				     &vcpu->arch.sie_block->gpsw,
-				     sizeof(psw_t));
-		rc |= read_guest_lc(vcpu, __LC_IO_NEW_PSW,
-				    &vcpu->arch.sie_block->gpsw,
-				    sizeof(psw_t));
+		rc = __deliver_io(vcpu, inti);
 		break;
-	}
 	default:
 		BUG();
 	}
@@ -509,19 +738,6 @@ static int __must_check __do_deliver_interrupt(struct kvm_vcpu *vcpu,
 	return rc;
 }
 
-static int __must_check deliver_ckc_interrupt(struct kvm_vcpu *vcpu)
-{
-	int rc;
-
-	rc  = put_guest_lc(vcpu, 0x1004, (u16 __user *)__LC_EXT_INT_CODE);
-	rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
-			     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-	rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
-			    &vcpu->arch.sie_block->gpsw,
-			    sizeof(psw_t));
-	return rc;
-}
-
 /* Check whether SIGP interpretation facility has an external call pending */
 int kvm_s390_si_ext_call_pending(struct kvm_vcpu *vcpu)
 {
@@ -538,20 +754,11 @@ int kvm_s390_si_ext_call_pending(struct kvm_vcpu *vcpu)
 
 int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu)
 {
-	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
 	struct kvm_s390_float_interrupt *fi = vcpu->arch.local_int.float_int;
 	struct kvm_s390_interrupt_info  *inti;
-	int rc = 0;
+	int rc;
 
-	if (atomic_read(&li->active)) {
-		spin_lock(&li->lock);
-		list_for_each_entry(inti, &li->list, list)
-			if (__interrupt_is_deliverable(vcpu, inti)) {
-				rc = 1;
-				break;
-			}
-		spin_unlock(&li->lock);
-	}
+	rc = !!deliverable_local_irqs(vcpu);
 
 	if ((!rc) && atomic_read(&fi->active)) {
 		spin_lock(&fi->lock);
@@ -643,18 +850,15 @@ enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer)
 void kvm_s390_clear_local_irqs(struct kvm_vcpu *vcpu)
 {
 	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
-	struct kvm_s390_interrupt_info  *n, *inti = NULL;
 
 	spin_lock(&li->lock);
-	list_for_each_entry_safe(inti, n, &li->list, list) {
-		list_del(&inti->list);
-		kfree(inti);
-	}
-	atomic_set(&li->active, 0);
+	li->pending_irqs = 0;
+	bitmap_zero(li->sigp_emerg_pending, KVM_MAX_VCPUS);
+	memset(&li->irq, 0, sizeof(li->irq));
 	spin_unlock(&li->lock);
 
 	/* clear pending external calls set by sigp interpretation facility */
-	atomic_clear_mask(CPUSTAT_ECALL_PEND, &vcpu->arch.sie_block->cpuflags);
+	atomic_clear_mask(CPUSTAT_ECALL_PEND, li->cpuflags);
 	atomic_clear_mask(SIGP_CTRL_C,
 			  &vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].ctrl);
 }
@@ -664,34 +868,35 @@ int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
 	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
 	struct kvm_s390_float_interrupt *fi = vcpu->arch.local_int.float_int;
 	struct kvm_s390_interrupt_info  *n, *inti = NULL;
+	deliver_irq_t func;
 	int deliver;
 	int rc = 0;
+	unsigned long irq_type;
+	unsigned long deliverable_irqs;
 
 	__reset_intercept_indicators(vcpu);
-	if (atomic_read(&li->active)) {
-		do {
-			deliver = 0;
-			spin_lock(&li->lock);
-			list_for_each_entry_safe(inti, n, &li->list, list) {
-				if (__interrupt_is_deliverable(vcpu, inti)) {
-					list_del(&inti->list);
-					deliver = 1;
-					break;
-				}
-				__set_intercept_indicator(vcpu, inti);
-			}
-			if (list_empty(&li->list))
-				atomic_set(&li->active, 0);
-			spin_unlock(&li->lock);
-			if (deliver) {
-				rc = __do_deliver_interrupt(vcpu, inti);
-				kfree(inti);
-			}
-		} while (!rc && deliver);
-	}
 
-	if (!rc && kvm_cpu_has_pending_timer(vcpu))
-		rc = deliver_ckc_interrupt(vcpu);
+	/* pending ckc conditions might have been invalidated */
+	clear_bit(IRQ_PEND_EXT_CLOCK_COMP, &li->pending_irqs);
+	if (kvm_cpu_has_pending_timer(vcpu))
+		set_bit(IRQ_PEND_EXT_CLOCK_COMP, &li->pending_irqs);
+
+	do {
+		deliverable_irqs = deliverable_local_irqs(vcpu);
+		/* bits are in the order of interrupt priority */
+		irq_type = find_first_bit(&deliverable_irqs, IRQ_PEND_COUNT);
+		if (irq_type == IRQ_PEND_COUNT)
+			break;
+		func = deliver_irq_funcs[irq_type];
+		if (!func) {
+			WARN_ON_ONCE(func == NULL);
+			clear_bit(irq_type, &li->pending_irqs);
+			continue;
+		}
+		rc = func(vcpu);
+	} while (!rc && irq_type != IRQ_PEND_COUNT);
+
+	set_intercept_indicators_local(vcpu);
 
 	if (!rc && atomic_read(&fi->active)) {
 		do {
@@ -710,7 +915,7 @@ int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
 				atomic_set(&fi->active, 0);
 			spin_unlock(&fi->lock);
 			if (deliver) {
-				rc = __do_deliver_interrupt(vcpu, inti);
+				rc = __deliver_floating_interrupt(vcpu, inti);
 				kfree(inti);
 			}
 		} while (!rc && deliver);
@@ -719,23 +924,26 @@ int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
 	return rc;
 }
 
-int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code)
+static int __inject_prog(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
 {
 	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
-	struct kvm_s390_interrupt_info *inti;
 
-	inti = kzalloc(sizeof(*inti), GFP_KERNEL);
-	if (!inti)
-		return -ENOMEM;
+	li->irq.pgm = irq->u.pgm;
+	set_bit(IRQ_PEND_PROG, &li->pending_irqs);
+	return 0;
+}
 
-	inti->type = KVM_S390_PROGRAM_INT;
-	inti->pgm.code = code;
+int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	struct kvm_s390_irq irq;
 
 	VCPU_EVENT(vcpu, 3, "inject: program check %d (from kernel)", code);
-	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, inti->type, code, 0, 1);
+	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_PROGRAM_INT, code,
+				   0, 1);
 	spin_lock(&li->lock);
-	list_add(&inti->list, &li->list);
-	atomic_set(&li->active, 1);
+	irq.u.pgm.code = code;
+	__inject_prog(vcpu, &irq);
 	BUG_ON(waitqueue_active(li->wq));
 	spin_unlock(&li->lock);
 	return 0;
@@ -745,27 +953,166 @@ int kvm_s390_inject_prog_irq(struct kvm_vcpu *vcpu,
 			     struct kvm_s390_pgm_info *pgm_info)
 {
 	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
-	struct kvm_s390_interrupt_info *inti;
-
-	inti = kzalloc(sizeof(*inti), GFP_KERNEL);
-	if (!inti)
-		return -ENOMEM;
+	struct kvm_s390_irq irq;
+	int rc;
 
 	VCPU_EVENT(vcpu, 3, "inject: prog irq %d (from kernel)",
 		   pgm_info->code);
 	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_PROGRAM_INT,
 				   pgm_info->code, 0, 1);
-
-	inti->type = KVM_S390_PROGRAM_INT;
-	memcpy(&inti->pgm, pgm_info, sizeof(inti->pgm));
 	spin_lock(&li->lock);
-	list_add(&inti->list, &li->list);
-	atomic_set(&li->active, 1);
+	irq.u.pgm = *pgm_info;
+	rc = __inject_prog(vcpu, &irq);
 	BUG_ON(waitqueue_active(li->wq));
 	spin_unlock(&li->lock);
+	return rc;
+}
+
+static int __inject_pfault_init(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+
+	VCPU_EVENT(vcpu, 3, "inject: external irq params:%x, params2:%llx",
+		   irq->u.ext.ext_params, irq->u.ext.ext_params2);
+	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_PFAULT_INIT,
+				   irq->u.ext.ext_params,
+				   irq->u.ext.ext_params2, 2);
+
+	li->irq.ext = irq->u.ext;
+	set_bit(IRQ_PEND_PFAULT_INIT, &li->pending_irqs);
+	atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
 	return 0;
 }
 
+int __inject_extcall(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	struct kvm_s390_extcall_info *extcall = &li->irq.extcall;
+
+	VCPU_EVENT(vcpu, 3, "inject: external call source-cpu:%u",
+		   irq->u.extcall.code);
+	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_EXTERNAL_CALL,
+				   irq->u.extcall.code, 0, 2);
+
+	*extcall = irq->u.extcall;
+	set_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs);
+	atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
+	return 0;
+}
+
+static int __inject_set_prefix(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	struct kvm_s390_prefix_info *prefix = &li->irq.prefix;
+
+	VCPU_EVENT(vcpu, 3, "inject: set prefix to %x (from user)",
+		   prefix->address);
+	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_SIGP_SET_PREFIX,
+				   prefix->address, 0, 2);
+
+	*prefix = irq->u.prefix;
+	set_bit(IRQ_PEND_SET_PREFIX, &li->pending_irqs);
+	return 0;
+}
+
+static int __inject_sigp_stop(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+
+	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_SIGP_STOP, 0, 0, 2);
+
+	li->action_bits |= ACTION_STOP_ON_STOP;
+	set_bit(IRQ_PEND_SIGP_STOP, &li->pending_irqs);
+	return 0;
+}
+
+static int __inject_sigp_restart(struct kvm_vcpu *vcpu,
+				 struct kvm_s390_irq *irq)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+
+	VCPU_EVENT(vcpu, 3, "inject: restart type %llx", irq->type);
+	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_RESTART, 0, 0, 2);
+
+	set_bit(IRQ_PEND_RESTART, &li->pending_irqs);
+	return 0;
+}
+
+static int __inject_sigp_emergency(struct kvm_vcpu *vcpu,
+				   struct kvm_s390_irq *irq)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	struct kvm_s390_emerg_info *emerg = &li->irq.emerg;
+
+	VCPU_EVENT(vcpu, 3, "inject: emergency %u\n",
+		   irq->u.emerg.code);
+	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_EMERGENCY,
+				   emerg->code, 0, 2);
+
+	set_bit(emerg->code, li->sigp_emerg_pending);
+	set_bit(IRQ_PEND_EXT_EMERGENCY, &li->pending_irqs);
+	atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
+	return 0;
+}
+
+static int __inject_mchk(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	struct kvm_s390_mchk_info *mchk = &li->irq.mchk;
+
+	VCPU_EVENT(vcpu, 5, "inject: machine check parm64:%llx",
+		   mchk->mcic);
+	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_MCHK, 0,
+				   mchk->mcic, 2);
+
+	/*
+	 * Because repressible machine checks can be indicated along with
+	 * exigent machine checks (PoP, Chapter 11, Interruption action)
+	 * we need to combine cr14, mcic and external damage code.
+	 * Failing storage address and the logout area should not be or'ed
+	 * together, we just indicate the last occurrence of the corresponding
+	 * machine check
+	 */
+	mchk->cr14 |= irq->u.mchk.cr14;
+	mchk->mcic |= irq->u.mchk.mcic;
+	mchk->ext_damage_code |= irq->u.mchk.ext_damage_code;
+	mchk->failing_storage_address = irq->u.mchk.failing_storage_address;
+	memcpy(&mchk->fixed_logout, &irq->u.mchk.fixed_logout,
+	       sizeof(mchk->fixed_logout));
+	if (mchk->mcic & MCHK_EX_MASK)
+		set_bit(IRQ_PEND_MCHK_EX, &li->pending_irqs);
+	else if (mchk->mcic & MCHK_REP_MASK)
+		set_bit(IRQ_PEND_MCHK_REP,  &li->pending_irqs);
+	return 0;
+}
+
+static int __inject_ckc(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+
+	VCPU_EVENT(vcpu, 3, "inject: type %x", KVM_S390_INT_CLOCK_COMP);
+	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_CLOCK_COMP,
+				   0, 0, 2);
+
+	set_bit(IRQ_PEND_EXT_CLOCK_COMP, &li->pending_irqs);
+	atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
+	return 0;
+}
+
+static int __inject_cpu_timer(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+
+	VCPU_EVENT(vcpu, 3, "inject: type %x", KVM_S390_INT_CPU_TIMER);
+	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_CPU_TIMER,
+				   0, 0, 2);
+
+	set_bit(IRQ_PEND_EXT_CPU_TIMER, &li->pending_irqs);
+	atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
+	return 0;
+}
+
+
 struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm,
 						    u64 cr6, u64 schid)
 {
@@ -851,7 +1198,17 @@ static int __inject_vm(struct kvm *kvm, struct kvm_s390_interrupt_info *inti)
 	dst_vcpu = kvm_get_vcpu(kvm, sigcpu);
 	li = &dst_vcpu->arch.local_int;
 	spin_lock(&li->lock);
-	atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
+	switch (inti->type) {
+	case KVM_S390_MCHK:
+		atomic_set_mask(CPUSTAT_STOP_INT, li->cpuflags);
+		break;
+	case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
+		atomic_set_mask(CPUSTAT_IO_INT, li->cpuflags);
+		break;
+	default:
+		atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
+		break;
+	}
 	spin_unlock(&li->lock);
 	kvm_s390_vcpu_wakeup(kvm_get_vcpu(kvm, sigcpu));
 unlock_fi:
@@ -920,92 +1277,85 @@ void kvm_s390_reinject_io_int(struct kvm *kvm,
 	__inject_vm(kvm, inti);
 }
 
-int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
-			 struct kvm_s390_interrupt *s390int)
+int s390int_to_s390irq(struct kvm_s390_interrupt *s390int,
+		       struct kvm_s390_irq *irq)
 {
-	struct kvm_s390_local_interrupt *li;
-	struct kvm_s390_interrupt_info *inti;
+	irq->type = s390int->type;
+	switch (irq->type) {
+	case KVM_S390_PROGRAM_INT:
+		if (s390int->parm & 0xffff0000)
+			return -EINVAL;
+		irq->u.pgm.code = s390int->parm;
+		break;
+	case KVM_S390_SIGP_SET_PREFIX:
+		irq->u.prefix.address = s390int->parm;
+		break;
+	case KVM_S390_INT_EXTERNAL_CALL:
+		if (irq->u.extcall.code & 0xffff0000)
+			return -EINVAL;
+		irq->u.extcall.code = s390int->parm;
+		break;
+	case KVM_S390_INT_EMERGENCY:
+		if (irq->u.emerg.code & 0xffff0000)
+			return -EINVAL;
+		irq->u.emerg.code = s390int->parm;
+		break;
+	case KVM_S390_MCHK:
+		irq->u.mchk.mcic = s390int->parm64;
+		break;
+	}
+	return 0;
+}
 
-	inti = kzalloc(sizeof(*inti), GFP_KERNEL);
-	if (!inti)
-		return -ENOMEM;
+int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	int rc;
 
-	switch (s390int->type) {
+	spin_lock(&li->lock);
+	switch (irq->type) {
 	case KVM_S390_PROGRAM_INT:
-		if (s390int->parm & 0xffff0000) {
-			kfree(inti);
-			return -EINVAL;
-		}
-		inti->type = s390int->type;
-		inti->pgm.code = s390int->parm;
 		VCPU_EVENT(vcpu, 3, "inject: program check %d (from user)",
-			   s390int->parm);
+			   irq->u.pgm.code);
+		rc = __inject_prog(vcpu, irq);
 		break;
 	case KVM_S390_SIGP_SET_PREFIX:
-		inti->prefix.address = s390int->parm;
-		inti->type = s390int->type;
-		VCPU_EVENT(vcpu, 3, "inject: set prefix to %x (from user)",
-			   s390int->parm);
+		rc = __inject_set_prefix(vcpu, irq);
 		break;
 	case KVM_S390_SIGP_STOP:
+		rc = __inject_sigp_stop(vcpu, irq);
+		break;
 	case KVM_S390_RESTART:
+		rc = __inject_sigp_restart(vcpu, irq);
+		break;
 	case KVM_S390_INT_CLOCK_COMP:
+		rc = __inject_ckc(vcpu);
+		break;
 	case KVM_S390_INT_CPU_TIMER:
-		VCPU_EVENT(vcpu, 3, "inject: type %x", s390int->type);
-		inti->type = s390int->type;
+		rc = __inject_cpu_timer(vcpu);
 		break;
 	case KVM_S390_INT_EXTERNAL_CALL:
-		if (s390int->parm & 0xffff0000) {
-			kfree(inti);
-			return -EINVAL;
-		}
-		VCPU_EVENT(vcpu, 3, "inject: external call source-cpu:%u",
-			   s390int->parm);
-		inti->type = s390int->type;
-		inti->extcall.code = s390int->parm;
+		rc = __inject_extcall(vcpu, irq);
 		break;
 	case KVM_S390_INT_EMERGENCY:
-		if (s390int->parm & 0xffff0000) {
-			kfree(inti);
-			return -EINVAL;
-		}
-		VCPU_EVENT(vcpu, 3, "inject: emergency %u\n", s390int->parm);
-		inti->type = s390int->type;
-		inti->emerg.code = s390int->parm;
+		rc = __inject_sigp_emergency(vcpu, irq);
 		break;
 	case KVM_S390_MCHK:
-		VCPU_EVENT(vcpu, 5, "inject: machine check parm64:%llx",
-			   s390int->parm64);
-		inti->type = s390int->type;
-		inti->mchk.mcic = s390int->parm64;
+		rc = __inject_mchk(vcpu, irq);
 		break;
 	case KVM_S390_INT_PFAULT_INIT:
-		inti->type = s390int->type;
-		inti->ext.ext_params2 = s390int->parm64;
+		rc = __inject_pfault_init(vcpu, irq);
 		break;
 	case KVM_S390_INT_VIRTIO:
 	case KVM_S390_INT_SERVICE:
 	case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
 	default:
-		kfree(inti);
-		return -EINVAL;
+		rc = -EINVAL;
 	}
-	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, s390int->type, s390int->parm,
-				   s390int->parm64, 2);
-
-	li = &vcpu->arch.local_int;
-	spin_lock(&li->lock);
-	if (inti->type == KVM_S390_PROGRAM_INT)
-		list_add(&inti->list, &li->list);
-	else
-		list_add_tail(&inti->list, &li->list);
-	atomic_set(&li->active, 1);
-	if (inti->type == KVM_S390_SIGP_STOP)
-		li->action_bits |= ACTION_STOP_ON_STOP;
-	atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
 	spin_unlock(&li->lock);
-	kvm_s390_vcpu_wakeup(vcpu);
-	return 0;
+	if (!rc)
+		kvm_s390_vcpu_wakeup(vcpu);
+	return rc;
 }
 
 void kvm_s390_clear_float_irqs(struct kvm *kvm)
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 6b049ee75a5..3e09801e310 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -81,10 +81,17 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 	{ "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
 	{ "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
 	{ "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
+	{ "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
+	{ "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
 	{ "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
+	{ "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
+	{ "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
 	{ "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
 	{ "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
 	{ "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
+	{ "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
+	{ "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
+	{ "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
 	{ "diagnose_10", VCPU_STAT(diagnose_10) },
 	{ "diagnose_44", VCPU_STAT(diagnose_44) },
 	{ "diagnose_9c", VCPU_STAT(diagnose_9c) },
@@ -453,6 +460,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 	spin_lock_init(&kvm->arch.float_int.lock);
 	INIT_LIST_HEAD(&kvm->arch.float_int.list);
 	init_waitqueue_head(&kvm->arch.ipte_wq);
+	mutex_init(&kvm->arch.ipte_mutex);
 
 	debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
 	VM_EVENT(kvm, 3, "%s", "vm created");
@@ -711,7 +719,6 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
 	}
 
 	spin_lock_init(&vcpu->arch.local_int.lock);
-	INIT_LIST_HEAD(&vcpu->arch.local_int.list);
 	vcpu->arch.local_int.float_int = &kvm->arch.float_int;
 	vcpu->arch.local_int.wq = &vcpu->wq;
 	vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
@@ -1114,13 +1121,15 @@ static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
 				      unsigned long token)
 {
 	struct kvm_s390_interrupt inti;
-	inti.parm64 = token;
+	struct kvm_s390_irq irq;
 
 	if (start_token) {
-		inti.type = KVM_S390_INT_PFAULT_INIT;
-		WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &inti));
+		irq.u.ext.ext_params2 = token;
+		irq.type = KVM_S390_INT_PFAULT_INIT;
+		WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
 	} else {
 		inti.type = KVM_S390_INT_PFAULT_DONE;
+		inti.parm64 = token;
 		WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
 	}
 }
@@ -1614,11 +1623,14 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 	switch (ioctl) {
 	case KVM_S390_INTERRUPT: {
 		struct kvm_s390_interrupt s390int;
+		struct kvm_s390_irq s390irq;
 
 		r = -EFAULT;
 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
 			break;
-		r = kvm_s390_inject_vcpu(vcpu, &s390int);
+		if (s390int_to_s390irq(&s390int, &s390irq))
+			return -EINVAL;
+		r = kvm_s390_inject_vcpu(vcpu, &s390irq);
 		break;
 	}
 	case KVM_S390_STORE_STATUS:
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index 244d0230318..a8f3d9b71c1 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -24,8 +24,6 @@ typedef int (*intercept_handler_t)(struct kvm_vcpu *vcpu);
 /* declare vfacilities extern */
 extern unsigned long *vfacilities;
 
-int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu);
-
 /* Transactional Memory Execution related macros */
 #define IS_TE_ENABLED(vcpu)	((vcpu->arch.sie_block->ecb & 0x10))
 #define TDB_FORMAT1		1
@@ -144,7 +142,7 @@ void kvm_s390_clear_float_irqs(struct kvm *kvm);
 int __must_check kvm_s390_inject_vm(struct kvm *kvm,
 				    struct kvm_s390_interrupt *s390int);
 int __must_check kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
-				      struct kvm_s390_interrupt *s390int);
+				      struct kvm_s390_irq *irq);
 int __must_check kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code);
 struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm,
 						    u64 cr6, u64 schid);
@@ -152,6 +150,10 @@ void kvm_s390_reinject_io_int(struct kvm *kvm,
 			      struct kvm_s390_interrupt_info *inti);
 int kvm_s390_mask_adapter(struct kvm *kvm, unsigned int id, bool masked);
 
+/* implemented in intercept.c */
+void kvm_s390_rewind_psw(struct kvm_vcpu *vcpu, int ilc);
+int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu);
+
 /* implemented in priv.c */
 int is_valid_psw(psw_t *psw);
 int kvm_s390_handle_b2(struct kvm_vcpu *vcpu);
@@ -222,6 +224,9 @@ static inline int kvm_s390_inject_prog_cond(struct kvm_vcpu *vcpu, int rc)
 	return kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
 }
 
+int s390int_to_s390irq(struct kvm_s390_interrupt *s390int,
+			struct kvm_s390_irq *s390irq);
+
 /* implemented in interrupt.c */
 int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
 int psw_extint_disabled(struct kvm_vcpu *vcpu);
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index f47cb0c6d90..1be578d64df 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -180,21 +180,18 @@ static int handle_skey(struct kvm_vcpu *vcpu)
 	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
 		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
 
-	vcpu->arch.sie_block->gpsw.addr =
-		__rewind_psw(vcpu->arch.sie_block->gpsw, 4);
+	kvm_s390_rewind_psw(vcpu, 4);
 	VCPU_EVENT(vcpu, 4, "%s", "retrying storage key operation");
 	return 0;
 }
 
 static int handle_ipte_interlock(struct kvm_vcpu *vcpu)
 {
-	psw_t *psw = &vcpu->arch.sie_block->gpsw;
-
 	vcpu->stat.instruction_ipte_interlock++;
-	if (psw_bits(*psw).p)
+	if (psw_bits(vcpu->arch.sie_block->gpsw).p)
 		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
 	wait_event(vcpu->kvm->arch.ipte_wq, !ipte_lock_held(vcpu));
-	psw->addr = __rewind_psw(*psw, 4);
+	kvm_s390_rewind_psw(vcpu, 4);
 	VCPU_EVENT(vcpu, 4, "%s", "retrying ipte interlock operation");
 	return 0;
 }
@@ -650,10 +647,7 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 
 	start = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK;
-	if (vcpu->run->s.regs.gprs[reg1] & PFMF_CF) {
-		if (kvm_s390_check_low_addr_protection(vcpu, start))
-			return kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
-	}
+	start = kvm_s390_logical_to_effective(vcpu, start);
 
 	switch (vcpu->run->s.regs.gprs[reg1] & PFMF_FSC) {
 	case 0x00000000:
@@ -669,6 +663,12 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
 	default:
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 	}
+
+	if (vcpu->run->s.regs.gprs[reg1] & PFMF_CF) {
+		if (kvm_s390_check_low_addr_protection(vcpu, start))
+			return kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
+	}
+
 	while (start < end) {
 		unsigned long useraddr, abs_addr;
 
@@ -725,8 +725,7 @@ static int handle_essa(struct kvm_vcpu *vcpu)
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 
 	/* Rewind PSW to repeat the ESSA instruction */
-	vcpu->arch.sie_block->gpsw.addr =
-		__rewind_psw(vcpu->arch.sie_block->gpsw, 4);
+	kvm_s390_rewind_psw(vcpu, 4);
 	vcpu->arch.sie_block->cbrlo &= PAGE_MASK;	/* reset nceo */
 	cbrlo = phys_to_virt(vcpu->arch.sie_block->cbrlo);
 	down_read(&gmap->mm->mmap_sem);
@@ -769,8 +768,8 @@ int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu)
 {
 	int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
 	int reg3 = vcpu->arch.sie_block->ipa & 0x000f;
-	u32 val = 0;
-	int reg, rc;
+	int reg, rc, nr_regs;
+	u32 ctl_array[16];
 	u64 ga;
 
 	vcpu->stat.instruction_lctl++;
@@ -786,19 +785,20 @@ int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu)
 	VCPU_EVENT(vcpu, 5, "lctl r1:%x, r3:%x, addr:%llx", reg1, reg3, ga);
 	trace_kvm_s390_handle_lctl(vcpu, 0, reg1, reg3, ga);
 
+	nr_regs = ((reg3 - reg1) & 0xf) + 1;
+	rc = read_guest(vcpu, ga, ctl_array, nr_regs * sizeof(u32));
+	if (rc)
+		return kvm_s390_inject_prog_cond(vcpu, rc);
 	reg = reg1;
+	nr_regs = 0;
 	do {
-		rc = read_guest(vcpu, ga, &val, sizeof(val));
-		if (rc)
-			return kvm_s390_inject_prog_cond(vcpu, rc);
 		vcpu->arch.sie_block->gcr[reg] &= 0xffffffff00000000ul;
-		vcpu->arch.sie_block->gcr[reg] |= val;
-		ga += 4;
+		vcpu->arch.sie_block->gcr[reg] |= ctl_array[nr_regs++];
 		if (reg == reg3)
 			break;
 		reg = (reg + 1) % 16;
 	} while (1);
-
+	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
 	return 0;
 }
 
@@ -806,9 +806,9 @@ int kvm_s390_handle_stctl(struct kvm_vcpu *vcpu)
 {
 	int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
 	int reg3 = vcpu->arch.sie_block->ipa & 0x000f;
+	int reg, rc, nr_regs;
+	u32 ctl_array[16];
 	u64 ga;
-	u32 val;
-	int reg, rc;
 
 	vcpu->stat.instruction_stctl++;
 
@@ -824,26 +824,24 @@ int kvm_s390_handle_stctl(struct kvm_vcpu *vcpu)
 	trace_kvm_s390_handle_stctl(vcpu, 0, reg1, reg3, ga);
 
 	reg = reg1;
+	nr_regs = 0;
 	do {
-		val = vcpu->arch.sie_block->gcr[reg] &  0x00000000fffffffful;
-		rc = write_guest(vcpu, ga, &val, sizeof(val));
-		if (rc)
-			return kvm_s390_inject_prog_cond(vcpu, rc);
-		ga += 4;
+		ctl_array[nr_regs++] = vcpu->arch.sie_block->gcr[reg];
 		if (reg == reg3)
 			break;
 		reg = (reg + 1) % 16;
 	} while (1);
-
-	return 0;
+	rc = write_guest(vcpu, ga, ctl_array, nr_regs * sizeof(u32));
+	return rc ? kvm_s390_inject_prog_cond(vcpu, rc) : 0;
 }
 
 static int handle_lctlg(struct kvm_vcpu *vcpu)
 {
 	int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
 	int reg3 = vcpu->arch.sie_block->ipa & 0x000f;
-	u64 ga, val;
-	int reg, rc;
+	int reg, rc, nr_regs;
+	u64 ctl_array[16];
+	u64 ga;
 
 	vcpu->stat.instruction_lctlg++;
 
@@ -855,22 +853,22 @@ static int handle_lctlg(struct kvm_vcpu *vcpu)
 	if (ga & 7)
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 
-	reg = reg1;
-
 	VCPU_EVENT(vcpu, 5, "lctlg r1:%x, r3:%x, addr:%llx", reg1, reg3, ga);
 	trace_kvm_s390_handle_lctl(vcpu, 1, reg1, reg3, ga);
 
+	nr_regs = ((reg3 - reg1) & 0xf) + 1;
+	rc = read_guest(vcpu, ga, ctl_array, nr_regs * sizeof(u64));
+	if (rc)
+		return kvm_s390_inject_prog_cond(vcpu, rc);
+	reg = reg1;
+	nr_regs = 0;
 	do {
-		rc = read_guest(vcpu, ga, &val, sizeof(val));
-		if (rc)
-			return kvm_s390_inject_prog_cond(vcpu, rc);
-		vcpu->arch.sie_block->gcr[reg] = val;
-		ga += 8;
+		vcpu->arch.sie_block->gcr[reg] = ctl_array[nr_regs++];
 		if (reg == reg3)
 			break;
 		reg = (reg + 1) % 16;
 	} while (1);
-
+	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
 	return 0;
 }
 
@@ -878,8 +876,9 @@ static int handle_stctg(struct kvm_vcpu *vcpu)
 {
 	int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
 	int reg3 = vcpu->arch.sie_block->ipa & 0x000f;
-	u64 ga, val;
-	int reg, rc;
+	int reg, rc, nr_regs;
+	u64 ctl_array[16];
+	u64 ga;
 
 	vcpu->stat.instruction_stctg++;
 
@@ -891,23 +890,19 @@ static int handle_stctg(struct kvm_vcpu *vcpu)
 	if (ga & 7)
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 
-	reg = reg1;
-
 	VCPU_EVENT(vcpu, 5, "stctg r1:%x, r3:%x, addr:%llx", reg1, reg3, ga);
 	trace_kvm_s390_handle_stctl(vcpu, 1, reg1, reg3, ga);
 
+	reg = reg1;
+	nr_regs = 0;
 	do {
-		val = vcpu->arch.sie_block->gcr[reg];
-		rc = write_guest(vcpu, ga, &val, sizeof(val));
-		if (rc)
-			return kvm_s390_inject_prog_cond(vcpu, rc);
-		ga += 8;
+		ctl_array[nr_regs++] = vcpu->arch.sie_block->gcr[reg];
 		if (reg == reg3)
 			break;
 		reg = (reg + 1) % 16;
 	} while (1);
-
-	return 0;
+	rc = write_guest(vcpu, ga, ctl_array, nr_regs * sizeof(u64));
+	return rc ? kvm_s390_inject_prog_cond(vcpu, rc) : 0;
 }
 
 static const intercept_handler_t eb_handlers[256] = {
diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c
index cf243ba3d50..6651f9f7397 100644
--- a/arch/s390/kvm/sigp.c
+++ b/arch/s390/kvm/sigp.c
@@ -20,20 +20,13 @@
 #include "kvm-s390.h"
 #include "trace.h"
 
-static int __sigp_sense(struct kvm_vcpu *vcpu, u16 cpu_addr,
+static int __sigp_sense(struct kvm_vcpu *vcpu, struct kvm_vcpu *dst_vcpu,
 			u64 *reg)
 {
 	struct kvm_s390_local_interrupt *li;
-	struct kvm_vcpu *dst_vcpu = NULL;
 	int cpuflags;
 	int rc;
 
-	if (cpu_addr >= KVM_MAX_VCPUS)
-		return SIGP_CC_NOT_OPERATIONAL;
-
-	dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
-	if (!dst_vcpu)
-		return SIGP_CC_NOT_OPERATIONAL;
 	li = &dst_vcpu->arch.local_int;
 
 	cpuflags = atomic_read(li->cpuflags);
@@ -48,55 +41,53 @@ static int __sigp_sense(struct kvm_vcpu *vcpu, u16 cpu_addr,
 		rc = SIGP_CC_STATUS_STORED;
 	}
 
-	VCPU_EVENT(vcpu, 4, "sensed status of cpu %x rc %x", cpu_addr, rc);
+	VCPU_EVENT(vcpu, 4, "sensed status of cpu %x rc %x", dst_vcpu->vcpu_id,
+		   rc);
 	return rc;
 }
 
-static int __sigp_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr)
+static int __inject_sigp_emergency(struct kvm_vcpu *vcpu,
+				    struct kvm_vcpu *dst_vcpu)
 {
-	struct kvm_s390_interrupt s390int = {
+	struct kvm_s390_irq irq = {
 		.type = KVM_S390_INT_EMERGENCY,
-		.parm = vcpu->vcpu_id,
+		.u.emerg.code = vcpu->vcpu_id,
 	};
-	struct kvm_vcpu *dst_vcpu = NULL;
 	int rc = 0;
 
-	if (cpu_addr < KVM_MAX_VCPUS)
-		dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
-	if (!dst_vcpu)
-		return SIGP_CC_NOT_OPERATIONAL;
-
-	rc = kvm_s390_inject_vcpu(dst_vcpu, &s390int);
+	rc = kvm_s390_inject_vcpu(dst_vcpu, &irq);
 	if (!rc)
-		VCPU_EVENT(vcpu, 4, "sent sigp emerg to cpu %x", cpu_addr);
+		VCPU_EVENT(vcpu, 4, "sent sigp emerg to cpu %x",
+			   dst_vcpu->vcpu_id);
 
 	return rc ? rc : SIGP_CC_ORDER_CODE_ACCEPTED;
 }
 
-static int __sigp_conditional_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr,
+static int __sigp_emergency(struct kvm_vcpu *vcpu, struct kvm_vcpu *dst_vcpu)
+{
+	return __inject_sigp_emergency(vcpu, dst_vcpu);
+}
+
+static int __sigp_conditional_emergency(struct kvm_vcpu *vcpu,
+					struct kvm_vcpu *dst_vcpu,
 					u16 asn, u64 *reg)
 {
-	struct kvm_vcpu *dst_vcpu = NULL;
 	const u64 psw_int_mask = PSW_MASK_IO | PSW_MASK_EXT;
 	u16 p_asn, s_asn;
 	psw_t *psw;
 	u32 flags;
 
-	if (cpu_addr < KVM_MAX_VCPUS)
-		dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
-	if (!dst_vcpu)
-		return SIGP_CC_NOT_OPERATIONAL;
 	flags = atomic_read(&dst_vcpu->arch.sie_block->cpuflags);
 	psw = &dst_vcpu->arch.sie_block->gpsw;
 	p_asn = dst_vcpu->arch.sie_block->gcr[4] & 0xffff;  /* Primary ASN */
 	s_asn = dst_vcpu->arch.sie_block->gcr[3] & 0xffff;  /* Secondary ASN */
 
-	/* Deliver the emergency signal? */
+	/* Inject the emergency signal? */
 	if (!(flags & CPUSTAT_STOPPED)
 	    || (psw->mask & psw_int_mask) != psw_int_mask
 	    || ((flags & CPUSTAT_WAIT) && psw->addr != 0)
 	    || (!(flags & CPUSTAT_WAIT) && (asn == p_asn || asn == s_asn))) {
-		return __sigp_emergency(vcpu, cpu_addr);
+		return __inject_sigp_emergency(vcpu, dst_vcpu);
 	} else {
 		*reg &= 0xffffffff00000000UL;
 		*reg |= SIGP_STATUS_INCORRECT_STATE;
@@ -104,23 +95,19 @@ static int __sigp_conditional_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr,
 	}
 }
 
-static int __sigp_external_call(struct kvm_vcpu *vcpu, u16 cpu_addr)
+static int __sigp_external_call(struct kvm_vcpu *vcpu,
+				struct kvm_vcpu *dst_vcpu)
 {
-	struct kvm_s390_interrupt s390int = {
+	struct kvm_s390_irq irq = {
 		.type = KVM_S390_INT_EXTERNAL_CALL,
-		.parm = vcpu->vcpu_id,
+		.u.extcall.code = vcpu->vcpu_id,
 	};
-	struct kvm_vcpu *dst_vcpu = NULL;
 	int rc;
 
-	if (cpu_addr < KVM_MAX_VCPUS)
-		dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
-	if (!dst_vcpu)
-		return SIGP_CC_NOT_OPERATIONAL;
-
-	rc = kvm_s390_inject_vcpu(dst_vcpu, &s390int);
+	rc = kvm_s390_inject_vcpu(dst_vcpu, &irq);
 	if (!rc)
-		VCPU_EVENT(vcpu, 4, "sent sigp ext call to cpu %x", cpu_addr);
+		VCPU_EVENT(vcpu, 4, "sent sigp ext call to cpu %x",
+			   dst_vcpu->vcpu_id);
 
 	return rc ? rc : SIGP_CC_ORDER_CODE_ACCEPTED;
 }
@@ -128,29 +115,20 @@ static int __sigp_external_call(struct kvm_vcpu *vcpu, u16 cpu_addr)
 static int __inject_sigp_stop(struct kvm_vcpu *dst_vcpu, int action)
 {
 	struct kvm_s390_local_interrupt *li = &dst_vcpu->arch.local_int;
-	struct kvm_s390_interrupt_info *inti;
 	int rc = SIGP_CC_ORDER_CODE_ACCEPTED;
 
-	inti = kzalloc(sizeof(*inti), GFP_ATOMIC);
-	if (!inti)
-		return -ENOMEM;
-	inti->type = KVM_S390_SIGP_STOP;
-
 	spin_lock(&li->lock);
 	if (li->action_bits & ACTION_STOP_ON_STOP) {
 		/* another SIGP STOP is pending */
-		kfree(inti);
 		rc = SIGP_CC_BUSY;
 		goto out;
 	}
 	if ((atomic_read(li->cpuflags) & CPUSTAT_STOPPED)) {
-		kfree(inti);
 		if ((action & ACTION_STORE_ON_STOP) != 0)
 			rc = -ESHUTDOWN;
 		goto out;
 	}
-	list_add_tail(&inti->list, &li->list);
-	atomic_set(&li->active, 1);
+	set_bit(IRQ_PEND_SIGP_STOP, &li->pending_irqs);
 	li->action_bits |= action;
 	atomic_set_mask(CPUSTAT_STOP_INT, li->cpuflags);
 	kvm_s390_vcpu_wakeup(dst_vcpu);
@@ -160,23 +138,27 @@ out:
 	return rc;
 }
 
-static int __sigp_stop(struct kvm_vcpu *vcpu, u16 cpu_addr, int action)
+static int __sigp_stop(struct kvm_vcpu *vcpu, struct kvm_vcpu *dst_vcpu)
 {
-	struct kvm_vcpu *dst_vcpu = NULL;
 	int rc;
 
-	if (cpu_addr >= KVM_MAX_VCPUS)
-		return SIGP_CC_NOT_OPERATIONAL;
+	rc = __inject_sigp_stop(dst_vcpu, ACTION_STOP_ON_STOP);
+	VCPU_EVENT(vcpu, 4, "sent sigp stop to cpu %x", dst_vcpu->vcpu_id);
 
-	dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
-	if (!dst_vcpu)
-		return SIGP_CC_NOT_OPERATIONAL;
+	return rc;
+}
 
-	rc = __inject_sigp_stop(dst_vcpu, action);
+static int __sigp_stop_and_store_status(struct kvm_vcpu *vcpu,
+					struct kvm_vcpu *dst_vcpu, u64 *reg)
+{
+	int rc;
 
-	VCPU_EVENT(vcpu, 4, "sent sigp stop to cpu %x", cpu_addr);
+	rc = __inject_sigp_stop(dst_vcpu, ACTION_STOP_ON_STOP |
+					      ACTION_STORE_ON_STOP);
+	VCPU_EVENT(vcpu, 4, "sent sigp stop and store status to cpu %x",
+		   dst_vcpu->vcpu_id);
 
-	if ((action & ACTION_STORE_ON_STOP) != 0 && rc == -ESHUTDOWN) {
+	if (rc == -ESHUTDOWN) {
 		/* If the CPU has already been stopped, we still have
 		 * to save the status when doing stop-and-store. This
 		 * has to be done after unlocking all spinlocks. */
@@ -212,18 +194,12 @@ static int __sigp_set_arch(struct kvm_vcpu *vcpu, u32 parameter)
 	return rc;
 }
 
-static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address,
-			     u64 *reg)
+static int __sigp_set_prefix(struct kvm_vcpu *vcpu, struct kvm_vcpu *dst_vcpu,
+			     u32 address, u64 *reg)
 {
 	struct kvm_s390_local_interrupt *li;
-	struct kvm_vcpu *dst_vcpu = NULL;
-	struct kvm_s390_interrupt_info *inti;
 	int rc;
 
-	if (cpu_addr < KVM_MAX_VCPUS)
-		dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
-	if (!dst_vcpu)
-		return SIGP_CC_NOT_OPERATIONAL;
 	li = &dst_vcpu->arch.local_int;
 
 	/*
@@ -238,46 +214,34 @@ static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address,
 		return SIGP_CC_STATUS_STORED;
 	}
 
-	inti = kzalloc(sizeof(*inti), GFP_KERNEL);
-	if (!inti)
-		return SIGP_CC_BUSY;
-
 	spin_lock(&li->lock);
 	/* cpu must be in stopped state */
 	if (!(atomic_read(li->cpuflags) & CPUSTAT_STOPPED)) {
 		*reg &= 0xffffffff00000000UL;
 		*reg |= SIGP_STATUS_INCORRECT_STATE;
 		rc = SIGP_CC_STATUS_STORED;
-		kfree(inti);
 		goto out_li;
 	}
 
-	inti->type = KVM_S390_SIGP_SET_PREFIX;
-	inti->prefix.address = address;
-
-	list_add_tail(&inti->list, &li->list);
-	atomic_set(&li->active, 1);
+	li->irq.prefix.address = address;
+	set_bit(IRQ_PEND_SET_PREFIX, &li->pending_irqs);
 	kvm_s390_vcpu_wakeup(dst_vcpu);
 	rc = SIGP_CC_ORDER_CODE_ACCEPTED;
 
-	VCPU_EVENT(vcpu, 4, "set prefix of cpu %02x to %x", cpu_addr, address);
+	VCPU_EVENT(vcpu, 4, "set prefix of cpu %02x to %x", dst_vcpu->vcpu_id,
+		   address);
 out_li:
 	spin_unlock(&li->lock);
 	return rc;
 }
 
-static int __sigp_store_status_at_addr(struct kvm_vcpu *vcpu, u16 cpu_id,
-					u32 addr, u64 *reg)
+static int __sigp_store_status_at_addr(struct kvm_vcpu *vcpu,
+				       struct kvm_vcpu *dst_vcpu,
+				       u32 addr, u64 *reg)
 {
-	struct kvm_vcpu *dst_vcpu = NULL;
 	int flags;
 	int rc;
 
-	if (cpu_id < KVM_MAX_VCPUS)
-		dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_id);
-	if (!dst_vcpu)
-		return SIGP_CC_NOT_OPERATIONAL;
-
 	spin_lock(&dst_vcpu->arch.local_int.lock);
 	flags = atomic_read(dst_vcpu->arch.local_int.cpuflags);
 	spin_unlock(&dst_vcpu->arch.local_int.lock);
@@ -297,19 +261,12 @@ static int __sigp_store_status_at_addr(struct kvm_vcpu *vcpu, u16 cpu_id,
 	return rc;
 }
 
-static int __sigp_sense_running(struct kvm_vcpu *vcpu, u16 cpu_addr,
-				u64 *reg)
+static int __sigp_sense_running(struct kvm_vcpu *vcpu,
+				struct kvm_vcpu *dst_vcpu, u64 *reg)
 {
 	struct kvm_s390_local_interrupt *li;
-	struct kvm_vcpu *dst_vcpu = NULL;
 	int rc;
 
-	if (cpu_addr >= KVM_MAX_VCPUS)
-		return SIGP_CC_NOT_OPERATIONAL;
-
-	dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
-	if (!dst_vcpu)
-		return SIGP_CC_NOT_OPERATIONAL;
 	li = &dst_vcpu->arch.local_int;
 	if (atomic_read(li->cpuflags) & CPUSTAT_RUNNING) {
 		/* running */
@@ -321,26 +278,19 @@ static int __sigp_sense_running(struct kvm_vcpu *vcpu, u16 cpu_addr,
 		rc = SIGP_CC_STATUS_STORED;
 	}
 
-	VCPU_EVENT(vcpu, 4, "sensed running status of cpu %x rc %x", cpu_addr,
-		   rc);
+	VCPU_EVENT(vcpu, 4, "sensed running status of cpu %x rc %x",
+		   dst_vcpu->vcpu_id, rc);
 
 	return rc;
 }
 
-/* Test whether the destination CPU is available and not busy */
-static int sigp_check_callable(struct kvm_vcpu *vcpu, u16 cpu_addr)
+static int __prepare_sigp_re_start(struct kvm_vcpu *vcpu,
+				   struct kvm_vcpu *dst_vcpu, u8 order_code)
 {
-	struct kvm_s390_local_interrupt *li;
-	int rc = SIGP_CC_ORDER_CODE_ACCEPTED;
-	struct kvm_vcpu *dst_vcpu = NULL;
-
-	if (cpu_addr >= KVM_MAX_VCPUS)
-		return SIGP_CC_NOT_OPERATIONAL;
+	struct kvm_s390_local_interrupt *li = &dst_vcpu->arch.local_int;
+	/* handle (RE)START in user space */
+	int rc = -EOPNOTSUPP;
 
-	dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
-	if (!dst_vcpu)
-		return SIGP_CC_NOT_OPERATIONAL;
-	li = &dst_vcpu->arch.local_int;
 	spin_lock(&li->lock);
 	if (li->action_bits & ACTION_STOP_ON_STOP)
 		rc = SIGP_CC_BUSY;
@@ -349,90 +299,131 @@ static int sigp_check_callable(struct kvm_vcpu *vcpu, u16 cpu_addr)
 	return rc;
 }
 
-int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu)
+static int __prepare_sigp_cpu_reset(struct kvm_vcpu *vcpu,
+				    struct kvm_vcpu *dst_vcpu, u8 order_code)
 {
-	int r1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
-	int r3 = vcpu->arch.sie_block->ipa & 0x000f;
-	u32 parameter;
-	u16 cpu_addr = vcpu->run->s.regs.gprs[r3];
-	u8 order_code;
-	int rc;
+	/* handle (INITIAL) CPU RESET in user space */
+	return -EOPNOTSUPP;
+}
 
-	/* sigp in userspace can exit */
-	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
-		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+static int __prepare_sigp_unknown(struct kvm_vcpu *vcpu,
+				  struct kvm_vcpu *dst_vcpu)
+{
+	/* handle unknown orders in user space */
+	return -EOPNOTSUPP;
+}
 
-	order_code = kvm_s390_get_base_disp_rs(vcpu);
+static int handle_sigp_dst(struct kvm_vcpu *vcpu, u8 order_code,
+			   u16 cpu_addr, u32 parameter, u64 *status_reg)
+{
+	int rc;
+	struct kvm_vcpu *dst_vcpu;
 
-	if (r1 % 2)
-		parameter = vcpu->run->s.regs.gprs[r1];
-	else
-		parameter = vcpu->run->s.regs.gprs[r1 + 1];
+	if (cpu_addr >= KVM_MAX_VCPUS)
+		return SIGP_CC_NOT_OPERATIONAL;
+
+	dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
+	if (!dst_vcpu)
+		return SIGP_CC_NOT_OPERATIONAL;
 
-	trace_kvm_s390_handle_sigp(vcpu, order_code, cpu_addr, parameter);
 	switch (order_code) {
 	case SIGP_SENSE:
 		vcpu->stat.instruction_sigp_sense++;
-		rc = __sigp_sense(vcpu, cpu_addr,
-				  &vcpu->run->s.regs.gprs[r1]);
+		rc = __sigp_sense(vcpu, dst_vcpu, status_reg);
 		break;
 	case SIGP_EXTERNAL_CALL:
 		vcpu->stat.instruction_sigp_external_call++;
-		rc = __sigp_external_call(vcpu, cpu_addr);
+		rc = __sigp_external_call(vcpu, dst_vcpu);
 		break;
 	case SIGP_EMERGENCY_SIGNAL:
 		vcpu->stat.instruction_sigp_emergency++;
-		rc = __sigp_emergency(vcpu, cpu_addr);
+		rc = __sigp_emergency(vcpu, dst_vcpu);
 		break;
 	case SIGP_STOP:
 		vcpu->stat.instruction_sigp_stop++;
-		rc = __sigp_stop(vcpu, cpu_addr, ACTION_STOP_ON_STOP);
+		rc = __sigp_stop(vcpu, dst_vcpu);
 		break;
 	case SIGP_STOP_AND_STORE_STATUS:
-		vcpu->stat.instruction_sigp_stop++;
-		rc = __sigp_stop(vcpu, cpu_addr, ACTION_STORE_ON_STOP |
-						 ACTION_STOP_ON_STOP);
+		vcpu->stat.instruction_sigp_stop_store_status++;
+		rc = __sigp_stop_and_store_status(vcpu, dst_vcpu, status_reg);
 		break;
 	case SIGP_STORE_STATUS_AT_ADDRESS:
-		rc = __sigp_store_status_at_addr(vcpu, cpu_addr, parameter,
-						 &vcpu->run->s.regs.gprs[r1]);
-		break;
-	case SIGP_SET_ARCHITECTURE:
-		vcpu->stat.instruction_sigp_arch++;
-		rc = __sigp_set_arch(vcpu, parameter);
+		vcpu->stat.instruction_sigp_store_status++;
+		rc = __sigp_store_status_at_addr(vcpu, dst_vcpu, parameter,
+						 status_reg);
 		break;
 	case SIGP_SET_PREFIX:
 		vcpu->stat.instruction_sigp_prefix++;
-		rc = __sigp_set_prefix(vcpu, cpu_addr, parameter,
-				       &vcpu->run->s.regs.gprs[r1]);
+		rc = __sigp_set_prefix(vcpu, dst_vcpu, parameter, status_reg);
 		break;
 	case SIGP_COND_EMERGENCY_SIGNAL:
-		rc = __sigp_conditional_emergency(vcpu, cpu_addr, parameter,
-						  &vcpu->run->s.regs.gprs[r1]);
+		vcpu->stat.instruction_sigp_cond_emergency++;
+		rc = __sigp_conditional_emergency(vcpu, dst_vcpu, parameter,
+						  status_reg);
 		break;
 	case SIGP_SENSE_RUNNING:
 		vcpu->stat.instruction_sigp_sense_running++;
-		rc = __sigp_sense_running(vcpu, cpu_addr,
-					  &vcpu->run->s.regs.gprs[r1]);
+		rc = __sigp_sense_running(vcpu, dst_vcpu, status_reg);
 		break;
 	case SIGP_START:
-		rc = sigp_check_callable(vcpu, cpu_addr);
-		if (rc == SIGP_CC_ORDER_CODE_ACCEPTED)
-			rc = -EOPNOTSUPP;    /* Handle START in user space */
+		vcpu->stat.instruction_sigp_start++;
+		rc = __prepare_sigp_re_start(vcpu, dst_vcpu, order_code);
 		break;
 	case SIGP_RESTART:
 		vcpu->stat.instruction_sigp_restart++;
-		rc = sigp_check_callable(vcpu, cpu_addr);
-		if (rc == SIGP_CC_ORDER_CODE_ACCEPTED) {
-			VCPU_EVENT(vcpu, 4,
-				   "sigp restart %x to handle userspace",
-				   cpu_addr);
-			/* user space must know about restart */
-			rc = -EOPNOTSUPP;
-		}
+		rc = __prepare_sigp_re_start(vcpu, dst_vcpu, order_code);
+		break;
+	case SIGP_INITIAL_CPU_RESET:
+		vcpu->stat.instruction_sigp_init_cpu_reset++;
+		rc = __prepare_sigp_cpu_reset(vcpu, dst_vcpu, order_code);
+		break;
+	case SIGP_CPU_RESET:
+		vcpu->stat.instruction_sigp_cpu_reset++;
+		rc = __prepare_sigp_cpu_reset(vcpu, dst_vcpu, order_code);
+		break;
+	default:
+		vcpu->stat.instruction_sigp_unknown++;
+		rc = __prepare_sigp_unknown(vcpu, dst_vcpu);
+	}
+
+	if (rc == -EOPNOTSUPP)
+		VCPU_EVENT(vcpu, 4,
+			   "sigp order %u -> cpu %x: handled in user space",
+			   order_code, dst_vcpu->vcpu_id);
+
+	return rc;
+}
+
+int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu)
+{
+	int r1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
+	int r3 = vcpu->arch.sie_block->ipa & 0x000f;
+	u32 parameter;
+	u16 cpu_addr = vcpu->run->s.regs.gprs[r3];
+	u8 order_code;
+	int rc;
+
+	/* sigp in userspace can exit */
+	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+	order_code = kvm_s390_get_base_disp_rs(vcpu);
+
+	if (r1 % 2)
+		parameter = vcpu->run->s.regs.gprs[r1];
+	else
+		parameter = vcpu->run->s.regs.gprs[r1 + 1];
+
+	trace_kvm_s390_handle_sigp(vcpu, order_code, cpu_addr, parameter);
+	switch (order_code) {
+	case SIGP_SET_ARCHITECTURE:
+		vcpu->stat.instruction_sigp_arch++;
+		rc = __sigp_set_arch(vcpu, parameter);
 		break;
 	default:
-		return -EOPNOTSUPP;
+		rc = handle_sigp_dst(vcpu, order_code, cpu_addr,
+				     parameter,
+				     &vcpu->run->s.regs.gprs[r1]);
 	}
 
 	if (rc < 0)
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 71c7eff2c89..be99357d238 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -844,7 +844,7 @@ int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
 
 	down_read(&mm->mmap_sem);
 retry:
-	ptep = get_locked_pte(current->mm, addr, &ptl);
+	ptep = get_locked_pte(mm, addr, &ptl);
 	if (unlikely(!ptep)) {
 		up_read(&mm->mmap_sem);
 		return -EFAULT;
@@ -888,6 +888,45 @@ retry:
 }
 EXPORT_SYMBOL(set_guest_storage_key);
 
+unsigned long get_guest_storage_key(struct mm_struct *mm, unsigned long addr)
+{
+	spinlock_t *ptl;
+	pgste_t pgste;
+	pte_t *ptep;
+	uint64_t physaddr;
+	unsigned long key = 0;
+
+	down_read(&mm->mmap_sem);
+	ptep = get_locked_pte(mm, addr, &ptl);
+	if (unlikely(!ptep)) {
+		up_read(&mm->mmap_sem);
+		return -EFAULT;
+	}
+	pgste = pgste_get_lock(ptep);
+
+	if (pte_val(*ptep) & _PAGE_INVALID) {
+		key |= (pgste_val(pgste) & PGSTE_ACC_BITS) >> 56;
+		key |= (pgste_val(pgste) & PGSTE_FP_BIT) >> 56;
+		key |= (pgste_val(pgste) & PGSTE_GR_BIT) >> 48;
+		key |= (pgste_val(pgste) & PGSTE_GC_BIT) >> 48;
+	} else {
+		physaddr = pte_val(*ptep) & PAGE_MASK;
+		key = page_get_storage_key(physaddr);
+
+		/* Reflect guest's logical view, not physical */
+		if (pgste_val(pgste) & PGSTE_GR_BIT)
+			key |= _PAGE_REFERENCED;
+		if (pgste_val(pgste) & PGSTE_GC_BIT)
+			key |= _PAGE_CHANGED;
+	}
+
+	pgste_set_unlock(ptep, pgste);
+	pte_unmap_unlock(ptep, ptl);
+	up_read(&mm->mmap_sem);
+	return key;
+}
+EXPORT_SYMBOL(get_guest_storage_key);
+
 #else /* CONFIG_PGSTE */
 
 static inline int page_table_with_pgste(struct page *page)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 6ed0c30d6a0..d89c6b828c9 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -33,7 +33,7 @@
 
 #define KVM_MAX_VCPUS 255
 #define KVM_SOFT_MAX_VCPUS 160
-#define KVM_USER_MEM_SLOTS 125
+#define KVM_USER_MEM_SLOTS 509
 /* memory slots that are not exposed to userspace */
 #define KVM_PRIVATE_MEM_SLOTS 3
 #define KVM_MEM_SLOTS_NUM (KVM_USER_MEM_SLOTS + KVM_PRIVATE_MEM_SLOTS)
@@ -51,6 +51,7 @@
 			  | X86_CR0_NW | X86_CR0_CD | X86_CR0_PG))
 
 #define CR3_L_MODE_RESERVED_BITS 0xFFFFFF0000000000ULL
+#define CR3_PCID_INVD		 (1UL << 63)
 #define CR4_RESERVED_BITS                                               \
 	(~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\
 			  | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE     \
@@ -361,6 +362,7 @@ struct kvm_vcpu_arch {
 	int mp_state;
 	u64 ia32_misc_enable_msr;
 	bool tpr_access_reporting;
+	u64 ia32_xss;
 
 	/*
 	 * Paging state of the vcpu
@@ -542,7 +544,7 @@ struct kvm_apic_map {
 	struct rcu_head rcu;
 	u8 ldr_bits;
 	/* fields bellow are used to decode ldr values in different modes */
-	u32 cid_shift, cid_mask, lid_mask;
+	u32 cid_shift, cid_mask, lid_mask, broadcast;
 	struct kvm_lapic *phys_map[256];
 	/* first index is cluster id second is cpu id in a cluster */
 	struct kvm_lapic *logical_map[16][16];
@@ -602,6 +604,9 @@ struct kvm_arch {
 
 	struct kvm_xen_hvm_config xen_hvm_config;
 
+	/* reads protected by irq_srcu, writes by irq_lock */
+	struct hlist_head mask_notifier_list;
+
 	/* fields used by HYPER-V emulation */
 	u64 hv_guest_os_id;
 	u64 hv_hypercall;
@@ -659,6 +664,16 @@ struct msr_data {
 	u64 data;
 };
 
+struct kvm_lapic_irq {
+	u32 vector;
+	u32 delivery_mode;
+	u32 dest_mode;
+	u32 level;
+	u32 trig_mode;
+	u32 shorthand;
+	u32 dest_id;
+};
+
 struct kvm_x86_ops {
 	int (*cpu_has_kvm_support)(void);          /* __init */
 	int (*disabled_by_bios)(void);             /* __init */
@@ -767,6 +782,7 @@ struct kvm_x86_ops {
 			       enum x86_intercept_stage stage);
 	void (*handle_external_intr)(struct kvm_vcpu *vcpu);
 	bool (*mpx_supported)(void);
+	bool (*xsaves_supported)(void);
 
 	int (*check_nested_events)(struct kvm_vcpu *vcpu, bool external_intr);
 
@@ -818,6 +834,19 @@ int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
 			  const void *val, int bytes);
 u8 kvm_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn);
 
+struct kvm_irq_mask_notifier {
+	void (*func)(struct kvm_irq_mask_notifier *kimn, bool masked);
+	int irq;
+	struct hlist_node link;
+};
+
+void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq,
+				    struct kvm_irq_mask_notifier *kimn);
+void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
+				      struct kvm_irq_mask_notifier *kimn);
+void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin,
+			     bool mask);
+
 extern bool tdp_enabled;
 
 u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu);
@@ -863,7 +892,7 @@ int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu);
 
 void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg);
 int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg);
-void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, unsigned int vector);
+void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector);
 
 int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,
 		    int reason, bool has_error_code, u32 error_code);
@@ -895,6 +924,7 @@ int kvm_read_guest_page_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
 			    gfn_t gfn, void *data, int offset, int len,
 			    u32 access);
 bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl);
+bool kvm_require_dr(struct kvm_vcpu *vcpu, int dr);
 
 static inline int __kvm_irq_line_state(unsigned long *irq_state,
 				       int irq_source_id, int level)
@@ -1066,6 +1096,7 @@ void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
 void kvm_define_shared_msr(unsigned index, u32 msr);
 int kvm_set_shared_msr(unsigned index, u64 val, u64 mask);
 
+unsigned long kvm_get_linear_rip(struct kvm_vcpu *vcpu);
 bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip);
 
 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index bcbfade26d8..45afaee9555 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -69,6 +69,7 @@
 #define SECONDARY_EXEC_PAUSE_LOOP_EXITING	0x00000400
 #define SECONDARY_EXEC_ENABLE_INVPCID		0x00001000
 #define SECONDARY_EXEC_SHADOW_VMCS              0x00004000
+#define SECONDARY_EXEC_XSAVES			0x00100000
 
 
 #define PIN_BASED_EXT_INTR_MASK                 0x00000001
@@ -159,6 +160,8 @@ enum vmcs_field {
 	EOI_EXIT_BITMAP3_HIGH           = 0x00002023,
 	VMREAD_BITMAP                   = 0x00002026,
 	VMWRITE_BITMAP                  = 0x00002028,
+	XSS_EXIT_BITMAP                 = 0x0000202C,
+	XSS_EXIT_BITMAP_HIGH            = 0x0000202D,
 	GUEST_PHYSICAL_ADDRESS          = 0x00002400,
 	GUEST_PHYSICAL_ADDRESS_HIGH     = 0x00002401,
 	VMCS_LINK_POINTER               = 0x00002800,
diff --git a/arch/x86/include/asm/xsave.h b/arch/x86/include/asm/xsave.h
index 7e7a79ada65..5fa9770035d 100644
--- a/arch/x86/include/asm/xsave.h
+++ b/arch/x86/include/asm/xsave.h
@@ -16,6 +16,7 @@
 #define XSTATE_Hi16_ZMM		0x80
 
 #define XSTATE_FPSSE	(XSTATE_FP | XSTATE_SSE)
+#define XSTATE_AVX512	(XSTATE_OPMASK | XSTATE_ZMM_Hi256 | XSTATE_Hi16_ZMM)
 /* Bit 63 of XCR0 is reserved for future expansion */
 #define XSTATE_EXTEND_MASK	(~(XSTATE_FPSSE | (1ULL << 63)))
 
diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h
index 990a2fe1588..b813bf9da1e 100644
--- a/arch/x86/include/uapi/asm/vmx.h
+++ b/arch/x86/include/uapi/asm/vmx.h
@@ -72,6 +72,8 @@
 #define EXIT_REASON_XSETBV              55
 #define EXIT_REASON_APIC_WRITE          56
 #define EXIT_REASON_INVPCID             58
+#define EXIT_REASON_XSAVES              63
+#define EXIT_REASON_XRSTORS             64
 
 #define VMX_EXIT_REASONS \
 	{ EXIT_REASON_EXCEPTION_NMI,         "EXCEPTION_NMI" }, \
@@ -116,6 +118,8 @@
 	{ EXIT_REASON_INVALID_STATE,         "INVALID_STATE" }, \
 	{ EXIT_REASON_INVD,                  "INVD" }, \
 	{ EXIT_REASON_INVVPID,               "INVVPID" }, \
-	{ EXIT_REASON_INVPCID,               "INVPCID" }
+	{ EXIT_REASON_INVPCID,               "INVPCID" }, \
+	{ EXIT_REASON_XSAVES,                "XSAVES" }, \
+	{ EXIT_REASON_XRSTORS,               "XRSTORS" }
 
 #endif /* _UAPIVMX_H */
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index f6945bef2cd..94f64348430 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -283,7 +283,14 @@ NOKPROBE_SYMBOL(do_async_page_fault);
 static void __init paravirt_ops_setup(void)
 {
 	pv_info.name = "KVM";
-	pv_info.paravirt_enabled = 1;
+
+	/*
+	 * KVM isn't paravirt in the sense of paravirt_enabled.  A KVM
+	 * guest kernel works like a bare metal kernel with additional
+	 * features, and paravirt_enabled is about features that are
+	 * missing.
+	 */
+	pv_info.paravirt_enabled = 0;
 
 	if (kvm_para_has_feature(KVM_FEATURE_NOP_IO_DELAY))
 		pv_cpu_ops.io_delay = kvm_io_delay;
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index d9156ceecdf..42caaef897c 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -59,13 +59,12 @@ static void kvm_get_wallclock(struct timespec *now)
 
 	native_write_msr(msr_kvm_wall_clock, low, high);
 
-	preempt_disable();
-	cpu = smp_processor_id();
+	cpu = get_cpu();
 
 	vcpu_time = &hv_clock[cpu].pvti;
 	pvclock_read_wallclock(&wall_clock, vcpu_time, now);
 
-	preempt_enable();
+	put_cpu();
 }
 
 static int kvm_set_wallclock(const struct timespec *now)
@@ -107,11 +106,10 @@ static unsigned long kvm_get_tsc_khz(void)
 	int cpu;
 	unsigned long tsc_khz;
 
-	preempt_disable();
-	cpu = smp_processor_id();
+	cpu = get_cpu();
 	src = &hv_clock[cpu].pvti;
 	tsc_khz = pvclock_tsc_khz(src);
-	preempt_enable();
+	put_cpu();
 	return tsc_khz;
 }
 
@@ -263,7 +261,6 @@ void __init kvmclock_init(void)
 #endif
 	kvm_get_preset_lpj();
 	clocksource_register_hz(&kvm_clock, NSEC_PER_SEC);
-	pv_info.paravirt_enabled = 1;
 	pv_info.name = "KVM";
 
 	if (kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE_STABLE_BIT))
@@ -284,23 +281,22 @@ int __init kvm_setup_vsyscall_timeinfo(void)
 
 	size = PAGE_ALIGN(sizeof(struct pvclock_vsyscall_time_info)*NR_CPUS);
 
-	preempt_disable();
-	cpu = smp_processor_id();
+	cpu = get_cpu();
 
 	vcpu_time = &hv_clock[cpu].pvti;
 	flags = pvclock_read_flags(vcpu_time);
 
 	if (!(flags & PVCLOCK_TSC_STABLE_BIT)) {
-		preempt_enable();
+		put_cpu();
 		return 1;
 	}
 
 	if ((ret = pvclock_init_vsyscall(hv_clock, size))) {
-		preempt_enable();
+		put_cpu();
 		return ret;
 	}
 
-	preempt_enable();
+	put_cpu();
 
 	kvm_clock.archdata.vclock_mode = VCLOCK_PVCLOCK;
 #endif
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c
index 4c540c4719d..0de1fae2bdf 100644
--- a/arch/x86/kernel/xsave.c
+++ b/arch/x86/kernel/xsave.c
@@ -738,3 +738,4 @@ void *get_xsave_addr(struct xsave_struct *xsave, int xstate)
 
 	return (void *)xsave + xstate_comp_offsets[feature];
 }
+EXPORT_SYMBOL_GPL(get_xsave_addr);
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index 25d22b2d650..08f790dfadc 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -7,14 +7,13 @@ CFLAGS_vmx.o := -I.
 
 KVM := ../../../virt/kvm
 
-kvm-y			+= $(KVM)/kvm_main.o $(KVM)/ioapic.o \
-				$(KVM)/coalesced_mmio.o $(KVM)/irq_comm.o \
+kvm-y			+= $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \
 				$(KVM)/eventfd.o $(KVM)/irqchip.o $(KVM)/vfio.o
-kvm-$(CONFIG_KVM_DEVICE_ASSIGNMENT)	+= $(KVM)/assigned-dev.o $(KVM)/iommu.o
 kvm-$(CONFIG_KVM_ASYNC_PF)	+= $(KVM)/async_pf.o
 
 kvm-y			+= x86.o mmu.o emulate.o i8259.o irq.o lapic.o \
-			   i8254.o cpuid.o pmu.o
+			   i8254.o ioapic.o irq_comm.o cpuid.o pmu.o
+kvm-$(CONFIG_KVM_DEVICE_ASSIGNMENT)	+= assigned-dev.o iommu.o
 kvm-intel-y		+= vmx.o
 kvm-amd-y		+= svm.o
 
diff --git a/virt/kvm/assigned-dev.c b/arch/x86/kvm/assigned-dev.c
index e05000e200d..6eb5c20ee37 100644
--- a/virt/kvm/assigned-dev.c
+++ b/arch/x86/kvm/assigned-dev.c
@@ -20,6 +20,32 @@
 #include <linux/namei.h>
 #include <linux/fs.h>
 #include "irq.h"
+#include "assigned-dev.h"
+
+struct kvm_assigned_dev_kernel {
+	struct kvm_irq_ack_notifier ack_notifier;
+	struct list_head list;
+	int assigned_dev_id;
+	int host_segnr;
+	int host_busnr;
+	int host_devfn;
+	unsigned int entries_nr;
+	int host_irq;
+	bool host_irq_disabled;
+	bool pci_2_3;
+	struct msix_entry *host_msix_entries;
+	int guest_irq;
+	struct msix_entry *guest_msix_entries;
+	unsigned long irq_requested_type;
+	int irq_source_id;
+	int flags;
+	struct pci_dev *dev;
+	struct kvm *kvm;
+	spinlock_t intx_lock;
+	spinlock_t intx_mask_lock;
+	char irq_name[32];
+	struct pci_saved_state *pci_saved_state;
+};
 
 static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head,
 						      int assigned_dev_id)
@@ -748,7 +774,7 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
 		if (r)
 			goto out_list_del;
 	}
-	r = kvm_assign_device(kvm, match);
+	r = kvm_assign_device(kvm, match->dev);
 	if (r)
 		goto out_list_del;
 
@@ -790,7 +816,7 @@ static int kvm_vm_ioctl_deassign_device(struct kvm *kvm,
 		goto out;
 	}
 
-	kvm_deassign_device(kvm, match);
+	kvm_deassign_device(kvm, match->dev);
 
 	kvm_free_assigned_device(kvm, match);
 
diff --git a/arch/x86/kvm/assigned-dev.h b/arch/x86/kvm/assigned-dev.h
new file mode 100644
index 00000000000..a428c1a211b
--- /dev/null
+++ b/arch/x86/kvm/assigned-dev.h
@@ -0,0 +1,32 @@
+#ifndef ARCH_X86_KVM_ASSIGNED_DEV_H
+#define ARCH_X86_KVM_ASSIGNED_DEV_H
+
+#include <linux/kvm_host.h>
+
+#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
+int kvm_assign_device(struct kvm *kvm, struct pci_dev *pdev);
+int kvm_deassign_device(struct kvm *kvm, struct pci_dev *pdev);
+
+int kvm_iommu_map_guest(struct kvm *kvm);
+int kvm_iommu_unmap_guest(struct kvm *kvm);
+
+long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
+				  unsigned long arg);
+
+void kvm_free_all_assigned_devices(struct kvm *kvm);
+#else
+static inline int kvm_iommu_unmap_guest(struct kvm *kvm)
+{
+	return 0;
+}
+
+static inline long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
+						unsigned long arg)
+{
+	return -ENOTTY;
+}
+
+static inline void kvm_free_all_assigned_devices(struct kvm *kvm) {}
+#endif /* CONFIG_KVM_DEVICE_ASSIGNMENT */
+
+#endif /* ARCH_X86_KVM_ASSIGNED_DEV_H */
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 976e3a57f9e..8a80737ee6e 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -23,7 +23,7 @@
 #include "mmu.h"
 #include "trace.h"
 
-static u32 xstate_required_size(u64 xstate_bv)
+static u32 xstate_required_size(u64 xstate_bv, bool compacted)
 {
 	int feature_bit = 0;
 	u32 ret = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET;
@@ -31,9 +31,10 @@ static u32 xstate_required_size(u64 xstate_bv)
 	xstate_bv &= XSTATE_EXTEND_MASK;
 	while (xstate_bv) {
 		if (xstate_bv & 0x1) {
-		        u32 eax, ebx, ecx, edx;
+		        u32 eax, ebx, ecx, edx, offset;
 		        cpuid_count(0xD, feature_bit, &eax, &ebx, &ecx, &edx);
-			ret = max(ret, eax + ebx);
+			offset = compacted ? ret : ebx;
+			ret = max(ret, offset + eax);
 		}
 
 		xstate_bv >>= 1;
@@ -53,6 +54,8 @@ u64 kvm_supported_xcr0(void)
 	return xcr0;
 }
 
+#define F(x) bit(X86_FEATURE_##x)
+
 int kvm_update_cpuid(struct kvm_vcpu *vcpu)
 {
 	struct kvm_cpuid_entry2 *best;
@@ -64,13 +67,13 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu)
 
 	/* Update OSXSAVE bit */
 	if (cpu_has_xsave && best->function == 0x1) {
-		best->ecx &= ~(bit(X86_FEATURE_OSXSAVE));
+		best->ecx &= ~F(OSXSAVE);
 		if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE))
-			best->ecx |= bit(X86_FEATURE_OSXSAVE);
+			best->ecx |= F(OSXSAVE);
 	}
 
 	if (apic) {
-		if (best->ecx & bit(X86_FEATURE_TSC_DEADLINE_TIMER))
+		if (best->ecx & F(TSC_DEADLINE_TIMER))
 			apic->lapic_timer.timer_mode_mask = 3 << 17;
 		else
 			apic->lapic_timer.timer_mode_mask = 1 << 17;
@@ -85,9 +88,13 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu)
 			(best->eax | ((u64)best->edx << 32)) &
 			kvm_supported_xcr0();
 		vcpu->arch.guest_xstate_size = best->ebx =
-			xstate_required_size(vcpu->arch.xcr0);
+			xstate_required_size(vcpu->arch.xcr0, false);
 	}
 
+	best = kvm_find_cpuid_entry(vcpu, 0xD, 1);
+	if (best && (best->eax & (F(XSAVES) | F(XSAVEC))))
+		best->ebx = xstate_required_size(vcpu->arch.xcr0, true);
+
 	/*
 	 * The existing code assumes virtual address is 48-bit in the canonical
 	 * address checks; exit if it is ever changed.
@@ -122,8 +129,8 @@ static void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu)
 			break;
 		}
 	}
-	if (entry && (entry->edx & bit(X86_FEATURE_NX)) && !is_efer_nx()) {
-		entry->edx &= ~bit(X86_FEATURE_NX);
+	if (entry && (entry->edx & F(NX)) && !is_efer_nx()) {
+		entry->edx &= ~F(NX);
 		printk(KERN_INFO "kvm: guest NX capability removed\n");
 	}
 }
@@ -227,8 +234,6 @@ static void do_cpuid_1_ent(struct kvm_cpuid_entry2 *entry, u32 function,
 	entry->flags = 0;
 }
 
-#define F(x) bit(X86_FEATURE_##x)
-
 static int __do_cpuid_ent_emulated(struct kvm_cpuid_entry2 *entry,
 				   u32 func, u32 index, int *nent, int maxnent)
 {
@@ -267,6 +272,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
 	unsigned f_rdtscp = kvm_x86_ops->rdtscp_supported() ? F(RDTSCP) : 0;
 	unsigned f_invpcid = kvm_x86_ops->invpcid_supported() ? F(INVPCID) : 0;
 	unsigned f_mpx = kvm_x86_ops->mpx_supported() ? F(MPX) : 0;
+	unsigned f_xsaves = kvm_x86_ops->xsaves_supported() ? F(XSAVES) : 0;
 
 	/* cpuid 1.edx */
 	const u32 kvm_supported_word0_x86_features =
@@ -317,7 +323,12 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
 	const u32 kvm_supported_word9_x86_features =
 		F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) |
 		F(BMI2) | F(ERMS) | f_invpcid | F(RTM) | f_mpx | F(RDSEED) |
-		F(ADX) | F(SMAP);
+		F(ADX) | F(SMAP) | F(AVX512F) | F(AVX512PF) | F(AVX512ER) |
+		F(AVX512CD);
+
+	/* cpuid 0xD.1.eax */
+	const u32 kvm_supported_word10_x86_features =
+		F(XSAVEOPT) | F(XSAVEC) | F(XGETBV1) | f_xsaves;
 
 	/* all calls to cpuid_count() should be made on the same cpu */
 	get_cpu();
@@ -453,16 +464,34 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
 		u64 supported = kvm_supported_xcr0();
 
 		entry->eax &= supported;
+		entry->ebx = xstate_required_size(supported, false);
+		entry->ecx = entry->ebx;
 		entry->edx &= supported >> 32;
 		entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
+		if (!supported)
+			break;
+
 		for (idx = 1, i = 1; idx < 64; ++idx) {
 			u64 mask = ((u64)1 << idx);
 			if (*nent >= maxnent)
 				goto out;
 
 			do_cpuid_1_ent(&entry[i], function, idx);
-			if (entry[i].eax == 0 || !(supported & mask))
-				continue;
+			if (idx == 1) {
+				entry[i].eax &= kvm_supported_word10_x86_features;
+				entry[i].ebx = 0;
+				if (entry[i].eax & (F(XSAVES)|F(XSAVEC)))
+					entry[i].ebx =
+						xstate_required_size(supported,
+								     true);
+			} else {
+				if (entry[i].eax == 0 || !(supported & mask))
+					continue;
+				if (WARN_ON_ONCE(entry[i].ecx & 1))
+					continue;
+			}
+			entry[i].ecx = 0;
+			entry[i].edx = 0;
 			entry[i].flags |=
 			       KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
 			++*nent;
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 9f8a2faf504..169b09d76dd 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -123,6 +123,7 @@
 #define Prefix      (3<<15)     /* Instruction varies with 66/f2/f3 prefix */
 #define RMExt       (4<<15)     /* Opcode extension in ModRM r/m if mod == 3 */
 #define Escape      (5<<15)     /* Escape to coprocessor instruction */
+#define InstrDual   (6<<15)     /* Alternate instruction decoding of mod == 3 */
 #define Sse         (1<<18)     /* SSE Vector instruction */
 /* Generic ModRM decode. */
 #define ModRM       (1<<19)
@@ -166,6 +167,8 @@
 #define CheckPerm   ((u64)1 << 49)  /* Has valid check_perm field */
 #define NoBigReal   ((u64)1 << 50)  /* No big real mode */
 #define PrivUD      ((u64)1 << 51)  /* #UD instead of #GP on CPL > 0 */
+#define NearBranch  ((u64)1 << 52)  /* Near branches */
+#define No16	    ((u64)1 << 53)  /* No 16 bit operand */
 
 #define DstXacc     (DstAccLo | SrcAccHi | SrcWrite)
 
@@ -209,6 +212,7 @@ struct opcode {
 		const struct group_dual *gdual;
 		const struct gprefix *gprefix;
 		const struct escape *esc;
+		const struct instr_dual *idual;
 		void (*fastop)(struct fastop *fake);
 	} u;
 	int (*check_perm)(struct x86_emulate_ctxt *ctxt);
@@ -231,6 +235,11 @@ struct escape {
 	struct opcode high[64];
 };
 
+struct instr_dual {
+	struct opcode mod012;
+	struct opcode mod3;
+};
+
 /* EFLAGS bit definitions. */
 #define EFLG_ID (1<<21)
 #define EFLG_VIP (1<<20)
@@ -379,6 +388,15 @@ static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *));
 	ON64(FOP2E(op##q, rax, cl)) \
 	FOP_END
 
+/* 2 operand, src and dest are reversed */
+#define FASTOP2R(op, name) \
+	FOP_START(name) \
+	FOP2E(op##b, dl, al) \
+	FOP2E(op##w, dx, ax) \
+	FOP2E(op##l, edx, eax) \
+	ON64(FOP2E(op##q, rdx, rax)) \
+	FOP_END
+
 #define FOP3E(op,  dst, src, src2) \
 	FOP_ALIGN #op " %" #src2 ", %" #src ", %" #dst " \n\t" FOP_RET
 
@@ -477,9 +495,9 @@ address_mask(struct x86_emulate_ctxt *ctxt, unsigned long reg)
 }
 
 static inline unsigned long
-register_address(struct x86_emulate_ctxt *ctxt, unsigned long reg)
+register_address(struct x86_emulate_ctxt *ctxt, int reg)
 {
-	return address_mask(ctxt, reg);
+	return address_mask(ctxt, reg_read(ctxt, reg));
 }
 
 static void masked_increment(ulong *reg, ulong mask, int inc)
@@ -488,7 +506,7 @@ static void masked_increment(ulong *reg, ulong mask, int inc)
 }
 
 static inline void
-register_address_increment(struct x86_emulate_ctxt *ctxt, unsigned long *reg, int inc)
+register_address_increment(struct x86_emulate_ctxt *ctxt, int reg, int inc)
 {
 	ulong mask;
 
@@ -496,7 +514,7 @@ register_address_increment(struct x86_emulate_ctxt *ctxt, unsigned long *reg, in
 		mask = ~0UL;
 	else
 		mask = ad_mask(ctxt);
-	masked_increment(reg, mask, inc);
+	masked_increment(reg_rmw(ctxt, reg), mask, inc);
 }
 
 static void rsp_increment(struct x86_emulate_ctxt *ctxt, int inc)
@@ -564,40 +582,6 @@ static int emulate_nm(struct x86_emulate_ctxt *ctxt)
 	return emulate_exception(ctxt, NM_VECTOR, 0, false);
 }
 
-static inline int assign_eip_far(struct x86_emulate_ctxt *ctxt, ulong dst,
-			       int cs_l)
-{
-	switch (ctxt->op_bytes) {
-	case 2:
-		ctxt->_eip = (u16)dst;
-		break;
-	case 4:
-		ctxt->_eip = (u32)dst;
-		break;
-#ifdef CONFIG_X86_64
-	case 8:
-		if ((cs_l && is_noncanonical_address(dst)) ||
-		    (!cs_l && (dst >> 32) != 0))
-			return emulate_gp(ctxt, 0);
-		ctxt->_eip = dst;
-		break;
-#endif
-	default:
-		WARN(1, "unsupported eip assignment size\n");
-	}
-	return X86EMUL_CONTINUE;
-}
-
-static inline int assign_eip_near(struct x86_emulate_ctxt *ctxt, ulong dst)
-{
-	return assign_eip_far(ctxt, dst, ctxt->mode == X86EMUL_MODE_PROT64);
-}
-
-static inline int jmp_rel(struct x86_emulate_ctxt *ctxt, int rel)
-{
-	return assign_eip_near(ctxt, ctxt->_eip + rel);
-}
-
 static u16 get_segment_selector(struct x86_emulate_ctxt *ctxt, unsigned seg)
 {
 	u16 selector;
@@ -641,25 +625,24 @@ static bool insn_aligned(struct x86_emulate_ctxt *ctxt, unsigned size)
 		return true;
 }
 
-static int __linearize(struct x86_emulate_ctxt *ctxt,
-		     struct segmented_address addr,
-		     unsigned *max_size, unsigned size,
-		     bool write, bool fetch,
-		     ulong *linear)
+static __always_inline int __linearize(struct x86_emulate_ctxt *ctxt,
+				       struct segmented_address addr,
+				       unsigned *max_size, unsigned size,
+				       bool write, bool fetch,
+				       enum x86emul_mode mode, ulong *linear)
 {
 	struct desc_struct desc;
 	bool usable;
 	ulong la;
 	u32 lim;
 	u16 sel;
-	unsigned cpl;
 
 	la = seg_base(ctxt, addr.seg) + addr.ea;
 	*max_size = 0;
-	switch (ctxt->mode) {
+	switch (mode) {
 	case X86EMUL_MODE_PROT64:
-		if (((signed long)la << 16) >> 16 != la)
-			return emulate_gp(ctxt, 0);
+		if (is_noncanonical_address(la))
+			goto bad;
 
 		*max_size = min_t(u64, ~0u, (1ull << 48) - la);
 		if (size > *max_size)
@@ -678,46 +661,20 @@ static int __linearize(struct x86_emulate_ctxt *ctxt,
 		if (!fetch && (desc.type & 8) && !(desc.type & 2))
 			goto bad;
 		lim = desc_limit_scaled(&desc);
-		if ((ctxt->mode == X86EMUL_MODE_REAL) && !fetch &&
-		    (ctxt->d & NoBigReal)) {
-			/* la is between zero and 0xffff */
-			if (la > 0xffff)
-				goto bad;
-			*max_size = 0x10000 - la;
-		} else if ((desc.type & 8) || !(desc.type & 4)) {
-			/* expand-up segment */
-			if (addr.ea > lim)
-				goto bad;
-			*max_size = min_t(u64, ~0u, (u64)lim + 1 - addr.ea);
-		} else {
+		if (!(desc.type & 8) && (desc.type & 4)) {
 			/* expand-down segment */
 			if (addr.ea <= lim)
 				goto bad;
 			lim = desc.d ? 0xffffffff : 0xffff;
-			if (addr.ea > lim)
-				goto bad;
-			*max_size = min_t(u64, ~0u, (u64)lim + 1 - addr.ea);
 		}
+		if (addr.ea > lim)
+			goto bad;
+		*max_size = min_t(u64, ~0u, (u64)lim + 1 - addr.ea);
 		if (size > *max_size)
 			goto bad;
-		cpl = ctxt->ops->cpl(ctxt);
-		if (!(desc.type & 8)) {
-			/* data segment */
-			if (cpl > desc.dpl)
-				goto bad;
-		} else if ((desc.type & 8) && !(desc.type & 4)) {
-			/* nonconforming code segment */
-			if (cpl != desc.dpl)
-				goto bad;
-		} else if ((desc.type & 8) && (desc.type & 4)) {
-			/* conforming code segment */
-			if (cpl < desc.dpl)
-				goto bad;
-		}
+		la &= (u32)-1;
 		break;
 	}
-	if (fetch ? ctxt->mode != X86EMUL_MODE_PROT64 : ctxt->ad_bytes != 8)
-		la &= (u32)-1;
 	if (insn_aligned(ctxt, size) && ((la & (size - 1)) != 0))
 		return emulate_gp(ctxt, 0);
 	*linear = la;
@@ -735,9 +692,55 @@ static int linearize(struct x86_emulate_ctxt *ctxt,
 		     ulong *linear)
 {
 	unsigned max_size;
-	return __linearize(ctxt, addr, &max_size, size, write, false, linear);
+	return __linearize(ctxt, addr, &max_size, size, write, false,
+			   ctxt->mode, linear);
+}
+
+static inline int assign_eip(struct x86_emulate_ctxt *ctxt, ulong dst,
+			     enum x86emul_mode mode)
+{
+	ulong linear;
+	int rc;
+	unsigned max_size;
+	struct segmented_address addr = { .seg = VCPU_SREG_CS,
+					   .ea = dst };
+
+	if (ctxt->op_bytes != sizeof(unsigned long))
+		addr.ea = dst & ((1UL << (ctxt->op_bytes << 3)) - 1);
+	rc = __linearize(ctxt, addr, &max_size, 1, false, true, mode, &linear);
+	if (rc == X86EMUL_CONTINUE)
+		ctxt->_eip = addr.ea;
+	return rc;
+}
+
+static inline int assign_eip_near(struct x86_emulate_ctxt *ctxt, ulong dst)
+{
+	return assign_eip(ctxt, dst, ctxt->mode);
 }
 
+static int assign_eip_far(struct x86_emulate_ctxt *ctxt, ulong dst,
+			  const struct desc_struct *cs_desc)
+{
+	enum x86emul_mode mode = ctxt->mode;
+
+#ifdef CONFIG_X86_64
+	if (ctxt->mode >= X86EMUL_MODE_PROT32 && cs_desc->l) {
+		u64 efer = 0;
+
+		ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
+		if (efer & EFER_LMA)
+			mode = X86EMUL_MODE_PROT64;
+	}
+#endif
+	if (mode == X86EMUL_MODE_PROT16 || mode == X86EMUL_MODE_PROT32)
+		mode = cs_desc->d ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16;
+	return assign_eip(ctxt, dst, mode);
+}
+
+static inline int jmp_rel(struct x86_emulate_ctxt *ctxt, int rel)
+{
+	return assign_eip_near(ctxt, ctxt->_eip + rel);
+}
 
 static int segmented_read_std(struct x86_emulate_ctxt *ctxt,
 			      struct segmented_address addr,
@@ -776,7 +779,8 @@ static int __do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt, int op_size)
 	 * boundary check itself.  Instead, we use max_size to check
 	 * against op_size.
 	 */
-	rc = __linearize(ctxt, addr, &max_size, 0, false, true, &linear);
+	rc = __linearize(ctxt, addr, &max_size, 0, false, true, ctxt->mode,
+			 &linear);
 	if (unlikely(rc != X86EMUL_CONTINUE))
 		return rc;
 
@@ -911,6 +915,8 @@ FASTOP2W(btc);
 
 FASTOP2(xadd);
 
+FASTOP2R(cmp, cmp_r);
+
 static u8 test_cc(unsigned int condition, unsigned long flags)
 {
 	u8 rc;
@@ -1221,6 +1227,7 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt,
 			if (index_reg != 4)
 				modrm_ea += reg_read(ctxt, index_reg) << scale;
 		} else if ((ctxt->modrm_rm & 7) == 5 && ctxt->modrm_mod == 0) {
+			modrm_ea += insn_fetch(s32, ctxt);
 			if (ctxt->mode == X86EMUL_MODE_PROT64)
 				ctxt->rip_relative = 1;
 		} else {
@@ -1229,10 +1236,6 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt,
 			adjust_modrm_seg(ctxt, base_reg);
 		}
 		switch (ctxt->modrm_mod) {
-		case 0:
-			if (ctxt->modrm_rm == 5)
-				modrm_ea += insn_fetch(s32, ctxt);
-			break;
 		case 1:
 			modrm_ea += insn_fetch(s8, ctxt);
 			break;
@@ -1284,7 +1287,8 @@ static void fetch_bit_operand(struct x86_emulate_ctxt *ctxt)
 		else
 			sv = (s64)ctxt->src.val & (s64)mask;
 
-		ctxt->dst.addr.mem.ea += (sv >> 3);
+		ctxt->dst.addr.mem.ea = address_mask(ctxt,
+					   ctxt->dst.addr.mem.ea + (sv >> 3));
 	}
 
 	/* only subword offset */
@@ -1610,6 +1614,9 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
 				sizeof(base3), &ctxt->exception);
 		if (ret != X86EMUL_CONTINUE)
 			return ret;
+		if (is_noncanonical_address(get_desc_base(&seg_desc) |
+					     ((u64)base3 << 32)))
+			return emulate_gp(ctxt, 0);
 	}
 load:
 	ctxt->ops->set_segment(ctxt, selector, &seg_desc, base3, seg);
@@ -1807,6 +1814,10 @@ static int em_push_sreg(struct x86_emulate_ctxt *ctxt)
 	int seg = ctxt->src2.val;
 
 	ctxt->src.val = get_segment_selector(ctxt, seg);
+	if (ctxt->op_bytes == 4) {
+		rsp_increment(ctxt, -2);
+		ctxt->op_bytes = 2;
+	}
 
 	return em_push(ctxt);
 }
@@ -1850,7 +1861,7 @@ static int em_pusha(struct x86_emulate_ctxt *ctxt)
 
 static int em_pushf(struct x86_emulate_ctxt *ctxt)
 {
-	ctxt->src.val =  (unsigned long)ctxt->eflags;
+	ctxt->src.val = (unsigned long)ctxt->eflags & ~EFLG_VM;
 	return em_push(ctxt);
 }
 
@@ -2035,7 +2046,7 @@ static int em_jmp_far(struct x86_emulate_ctxt *ctxt)
 	if (rc != X86EMUL_CONTINUE)
 		return rc;
 
-	rc = assign_eip_far(ctxt, ctxt->src.val, new_desc.l);
+	rc = assign_eip_far(ctxt, ctxt->src.val, &new_desc);
 	if (rc != X86EMUL_CONTINUE) {
 		WARN_ON(ctxt->mode != X86EMUL_MODE_PROT64);
 		/* assigning eip failed; restore the old cs */
@@ -2045,31 +2056,22 @@ static int em_jmp_far(struct x86_emulate_ctxt *ctxt)
 	return rc;
 }
 
-static int em_grp45(struct x86_emulate_ctxt *ctxt)
+static int em_jmp_abs(struct x86_emulate_ctxt *ctxt)
 {
-	int rc = X86EMUL_CONTINUE;
+	return assign_eip_near(ctxt, ctxt->src.val);
+}
 
-	switch (ctxt->modrm_reg) {
-	case 2: /* call near abs */ {
-		long int old_eip;
-		old_eip = ctxt->_eip;
-		rc = assign_eip_near(ctxt, ctxt->src.val);
-		if (rc != X86EMUL_CONTINUE)
-			break;
-		ctxt->src.val = old_eip;
-		rc = em_push(ctxt);
-		break;
-	}
-	case 4: /* jmp abs */
-		rc = assign_eip_near(ctxt, ctxt->src.val);
-		break;
-	case 5: /* jmp far */
-		rc = em_jmp_far(ctxt);
-		break;
-	case 6:	/* push */
-		rc = em_push(ctxt);
-		break;
-	}
+static int em_call_near_abs(struct x86_emulate_ctxt *ctxt)
+{
+	int rc;
+	long int old_eip;
+
+	old_eip = ctxt->_eip;
+	rc = assign_eip_near(ctxt, ctxt->src.val);
+	if (rc != X86EMUL_CONTINUE)
+		return rc;
+	ctxt->src.val = old_eip;
+	rc = em_push(ctxt);
 	return rc;
 }
 
@@ -2128,11 +2130,11 @@ static int em_ret_far(struct x86_emulate_ctxt *ctxt)
 	/* Outer-privilege level return is not implemented */
 	if (ctxt->mode >= X86EMUL_MODE_PROT16 && (cs & 3) > cpl)
 		return X86EMUL_UNHANDLEABLE;
-	rc = __load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS, 0, false,
+	rc = __load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS, cpl, false,
 				       &new_desc);
 	if (rc != X86EMUL_CONTINUE)
 		return rc;
-	rc = assign_eip_far(ctxt, eip, new_desc.l);
+	rc = assign_eip_far(ctxt, eip, &new_desc);
 	if (rc != X86EMUL_CONTINUE) {
 		WARN_ON(ctxt->mode != X86EMUL_MODE_PROT64);
 		ops->set_segment(ctxt, old_cs, &old_desc, 0, VCPU_SREG_CS);
@@ -2316,6 +2318,7 @@ static int em_syscall(struct x86_emulate_ctxt *ctxt)
 
 		ops->get_msr(ctxt, MSR_SYSCALL_MASK, &msr_data);
 		ctxt->eflags &= ~msr_data;
+		ctxt->eflags |= EFLG_RESERVED_ONE_MASK;
 #endif
 	} else {
 		/* legacy mode */
@@ -2349,11 +2352,9 @@ static int em_sysenter(struct x86_emulate_ctxt *ctxt)
 	    && !vendor_intel(ctxt))
 		return emulate_ud(ctxt);
 
-	/* XXX sysenter/sysexit have not been tested in 64bit mode.
-	* Therefore, we inject an #UD.
-	*/
+	/* sysenter/sysexit have not been tested in 64bit mode. */
 	if (ctxt->mode == X86EMUL_MODE_PROT64)
-		return emulate_ud(ctxt);
+		return X86EMUL_UNHANDLEABLE;
 
 	setup_syscalls_segments(ctxt, &cs, &ss);
 
@@ -2425,6 +2426,8 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt)
 		if ((msr_data & 0xfffc) == 0x0)
 			return emulate_gp(ctxt, 0);
 		ss_sel = (u16)(msr_data + 24);
+		rcx = (u32)rcx;
+		rdx = (u32)rdx;
 		break;
 	case X86EMUL_MODE_PROT64:
 		cs_sel = (u16)(msr_data + 32);
@@ -2599,7 +2602,6 @@ static int task_switch_16(struct x86_emulate_ctxt *ctxt,
 	ret = ops->read_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg,
 			    &ctxt->exception);
 	if (ret != X86EMUL_CONTINUE)
-		/* FIXME: need to provide precise fault address */
 		return ret;
 
 	save_state_to_tss16(ctxt, &tss_seg);
@@ -2607,13 +2609,11 @@ static int task_switch_16(struct x86_emulate_ctxt *ctxt,
 	ret = ops->write_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg,
 			     &ctxt->exception);
 	if (ret != X86EMUL_CONTINUE)
-		/* FIXME: need to provide precise fault address */
 		return ret;
 
 	ret = ops->read_std(ctxt, new_tss_base, &tss_seg, sizeof tss_seg,
 			    &ctxt->exception);
 	if (ret != X86EMUL_CONTINUE)
-		/* FIXME: need to provide precise fault address */
 		return ret;
 
 	if (old_tss_sel != 0xffff) {
@@ -2624,7 +2624,6 @@ static int task_switch_16(struct x86_emulate_ctxt *ctxt,
 				     sizeof tss_seg.prev_task_link,
 				     &ctxt->exception);
 		if (ret != X86EMUL_CONTINUE)
-			/* FIXME: need to provide precise fault address */
 			return ret;
 	}
 
@@ -2813,7 +2812,8 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt,
 	 *
 	 * 1. jmp/call/int to task gate: Check against DPL of the task gate
 	 * 2. Exception/IRQ/iret: No check is performed
-	 * 3. jmp/call to TSS: Check against DPL of the TSS
+	 * 3. jmp/call to TSS/task-gate: No check is performed since the
+	 *    hardware checks it before exiting.
 	 */
 	if (reason == TASK_SWITCH_GATE) {
 		if (idt_index != -1) {
@@ -2830,13 +2830,8 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt,
 			if ((tss_selector & 3) > dpl || ops->cpl(ctxt) > dpl)
 				return emulate_gp(ctxt, (idt_index << 3) | 0x2);
 		}
-	} else if (reason != TASK_SWITCH_IRET) {
-		int dpl = next_tss_desc.dpl;
-		if ((tss_selector & 3) > dpl || ops->cpl(ctxt) > dpl)
-			return emulate_gp(ctxt, tss_selector);
 	}
 
-
 	desc_limit = desc_limit_scaled(&next_tss_desc);
 	if (!next_tss_desc.p ||
 	    ((desc_limit < 0x67 && (next_tss_desc.type & 8)) ||
@@ -2913,8 +2908,8 @@ static void string_addr_inc(struct x86_emulate_ctxt *ctxt, int reg,
 {
 	int df = (ctxt->eflags & EFLG_DF) ? -op->count : op->count;
 
-	register_address_increment(ctxt, reg_rmw(ctxt, reg), df * op->bytes);
-	op->addr.mem.ea = register_address(ctxt, reg_read(ctxt, reg));
+	register_address_increment(ctxt, reg, df * op->bytes);
+	op->addr.mem.ea = register_address(ctxt, reg);
 }
 
 static int em_das(struct x86_emulate_ctxt *ctxt)
@@ -3025,7 +3020,7 @@ static int em_call_far(struct x86_emulate_ctxt *ctxt)
 	if (rc != X86EMUL_CONTINUE)
 		return X86EMUL_CONTINUE;
 
-	rc = assign_eip_far(ctxt, ctxt->src.val, new_desc.l);
+	rc = assign_eip_far(ctxt, ctxt->src.val, &new_desc);
 	if (rc != X86EMUL_CONTINUE)
 		goto fail;
 
@@ -3215,6 +3210,8 @@ static int em_mov_rm_sreg(struct x86_emulate_ctxt *ctxt)
 		return emulate_ud(ctxt);
 
 	ctxt->dst.val = get_segment_selector(ctxt, ctxt->modrm_reg);
+	if (ctxt->dst.bytes == 4 && ctxt->dst.type == OP_MEM)
+		ctxt->dst.bytes = 2;
 	return X86EMUL_CONTINUE;
 }
 
@@ -3317,7 +3314,7 @@ static int em_sidt(struct x86_emulate_ctxt *ctxt)
 	return emulate_store_desc_ptr(ctxt, ctxt->ops->get_idt);
 }
 
-static int em_lgdt(struct x86_emulate_ctxt *ctxt)
+static int em_lgdt_lidt(struct x86_emulate_ctxt *ctxt, bool lgdt)
 {
 	struct desc_ptr desc_ptr;
 	int rc;
@@ -3329,12 +3326,23 @@ static int em_lgdt(struct x86_emulate_ctxt *ctxt)
 			     ctxt->op_bytes);
 	if (rc != X86EMUL_CONTINUE)
 		return rc;
-	ctxt->ops->set_gdt(ctxt, &desc_ptr);
+	if (ctxt->mode == X86EMUL_MODE_PROT64 &&
+	    is_noncanonical_address(desc_ptr.address))
+		return emulate_gp(ctxt, 0);
+	if (lgdt)
+		ctxt->ops->set_gdt(ctxt, &desc_ptr);
+	else
+		ctxt->ops->set_idt(ctxt, &desc_ptr);
 	/* Disable writeback. */
 	ctxt->dst.type = OP_NONE;
 	return X86EMUL_CONTINUE;
 }
 
+static int em_lgdt(struct x86_emulate_ctxt *ctxt)
+{
+	return em_lgdt_lidt(ctxt, true);
+}
+
 static int em_vmmcall(struct x86_emulate_ctxt *ctxt)
 {
 	int rc;
@@ -3348,20 +3356,7 @@ static int em_vmmcall(struct x86_emulate_ctxt *ctxt)
 
 static int em_lidt(struct x86_emulate_ctxt *ctxt)
 {
-	struct desc_ptr desc_ptr;
-	int rc;
-
-	if (ctxt->mode == X86EMUL_MODE_PROT64)
-		ctxt->op_bytes = 8;
-	rc = read_descriptor(ctxt, ctxt->src.addr.mem,
-			     &desc_ptr.size, &desc_ptr.address,
-			     ctxt->op_bytes);
-	if (rc != X86EMUL_CONTINUE)
-		return rc;
-	ctxt->ops->set_idt(ctxt, &desc_ptr);
-	/* Disable writeback. */
-	ctxt->dst.type = OP_NONE;
-	return X86EMUL_CONTINUE;
+	return em_lgdt_lidt(ctxt, false);
 }
 
 static int em_smsw(struct x86_emulate_ctxt *ctxt)
@@ -3384,7 +3379,7 @@ static int em_loop(struct x86_emulate_ctxt *ctxt)
 {
 	int rc = X86EMUL_CONTINUE;
 
-	register_address_increment(ctxt, reg_rmw(ctxt, VCPU_REGS_RCX), -1);
+	register_address_increment(ctxt, VCPU_REGS_RCX, -1);
 	if ((address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) != 0) &&
 	    (ctxt->b == 0xe2 || test_cc(ctxt->b ^ 0x5, ctxt->eflags)))
 		rc = jmp_rel(ctxt, ctxt->src.val);
@@ -3554,7 +3549,7 @@ static int check_cr_write(struct x86_emulate_ctxt *ctxt)
 
 		ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
 		if (efer & EFER_LMA)
-			rsvd = CR3_L_MODE_RESERVED_BITS;
+			rsvd = CR3_L_MODE_RESERVED_BITS & ~CR3_PCID_INVD;
 
 		if (new_val & rsvd)
 			return emulate_gp(ctxt, 0);
@@ -3596,8 +3591,15 @@ static int check_dr_read(struct x86_emulate_ctxt *ctxt)
 	if ((cr4 & X86_CR4_DE) && (dr == 4 || dr == 5))
 		return emulate_ud(ctxt);
 
-	if (check_dr7_gd(ctxt))
+	if (check_dr7_gd(ctxt)) {
+		ulong dr6;
+
+		ctxt->ops->get_dr(ctxt, 6, &dr6);
+		dr6 &= ~15;
+		dr6 |= DR6_BD | DR6_RTM;
+		ctxt->ops->set_dr(ctxt, 6, dr6);
 		return emulate_db(ctxt);
+	}
 
 	return X86EMUL_CONTINUE;
 }
@@ -3684,6 +3686,7 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt)
 #define EXT(_f, _e) { .flags = ((_f) | RMExt), .u.group = (_e) }
 #define G(_f, _g) { .flags = ((_f) | Group | ModRM), .u.group = (_g) }
 #define GD(_f, _g) { .flags = ((_f) | GroupDual | ModRM), .u.gdual = (_g) }
+#define ID(_f, _i) { .flags = ((_f) | InstrDual | ModRM), .u.idual = (_i) }
 #define E(_f, _e) { .flags = ((_f) | Escape | ModRM), .u.esc = (_e) }
 #define I(_f, _e) { .flags = (_f), .u.execute = (_e) }
 #define F(_f, _e) { .flags = (_f) | Fastop, .u.fastop = (_e) }
@@ -3780,11 +3783,11 @@ static const struct opcode group4[] = {
 static const struct opcode group5[] = {
 	F(DstMem | SrcNone | Lock,		em_inc),
 	F(DstMem | SrcNone | Lock,		em_dec),
-	I(SrcMem | Stack,			em_grp45),
+	I(SrcMem | NearBranch,			em_call_near_abs),
 	I(SrcMemFAddr | ImplicitOps | Stack,	em_call_far),
-	I(SrcMem | Stack,			em_grp45),
-	I(SrcMemFAddr | ImplicitOps,		em_grp45),
-	I(SrcMem | Stack,			em_grp45), D(Undefined),
+	I(SrcMem | NearBranch,			em_jmp_abs),
+	I(SrcMemFAddr | ImplicitOps,		em_jmp_far),
+	I(SrcMem | Stack,			em_push), D(Undefined),
 };
 
 static const struct opcode group6[] = {
@@ -3845,8 +3848,12 @@ static const struct gprefix pfx_0f_6f_0f_7f = {
 	I(Mmx, em_mov), I(Sse | Aligned, em_mov), N, I(Sse | Unaligned, em_mov),
 };
 
+static const struct instr_dual instr_dual_0f_2b = {
+	I(0, em_mov), N
+};
+
 static const struct gprefix pfx_0f_2b = {
-	I(0, em_mov), I(0, em_mov), N, N,
+	ID(0, &instr_dual_0f_2b), ID(0, &instr_dual_0f_2b), N, N,
 };
 
 static const struct gprefix pfx_0f_28_0f_29 = {
@@ -3920,6 +3927,10 @@ static const struct escape escape_dd = { {
 	N, N, N, N, N, N, N, N,
 } };
 
+static const struct instr_dual instr_dual_0f_c3 = {
+	I(DstMem | SrcReg | ModRM | No16 | Mov, em_mov), N
+};
+
 static const struct opcode opcode_table[256] = {
 	/* 0x00 - 0x07 */
 	F6ALU(Lock, em_add),
@@ -3964,7 +3975,7 @@ static const struct opcode opcode_table[256] = {
 	I2bvIP(DstDI | SrcDX | Mov | String | Unaligned, em_in, ins, check_perm_in), /* insb, insw/insd */
 	I2bvIP(SrcSI | DstDX | String, em_out, outs, check_perm_out), /* outsb, outsw/outsd */
 	/* 0x70 - 0x7F */
-	X16(D(SrcImmByte)),
+	X16(D(SrcImmByte | NearBranch)),
 	/* 0x80 - 0x87 */
 	G(ByteOp | DstMem | SrcImm, group1),
 	G(DstMem | SrcImm, group1),
@@ -3991,20 +4002,20 @@ static const struct opcode opcode_table[256] = {
 	I2bv(DstAcc | SrcMem | Mov | MemAbs, em_mov),
 	I2bv(DstMem | SrcAcc | Mov | MemAbs | PageTable, em_mov),
 	I2bv(SrcSI | DstDI | Mov | String, em_mov),
-	F2bv(SrcSI | DstDI | String | NoWrite, em_cmp),
+	F2bv(SrcSI | DstDI | String | NoWrite, em_cmp_r),
 	/* 0xA8 - 0xAF */
 	F2bv(DstAcc | SrcImm | NoWrite, em_test),
 	I2bv(SrcAcc | DstDI | Mov | String, em_mov),
 	I2bv(SrcSI | DstAcc | Mov | String, em_mov),
-	F2bv(SrcAcc | DstDI | String | NoWrite, em_cmp),
+	F2bv(SrcAcc | DstDI | String | NoWrite, em_cmp_r),
 	/* 0xB0 - 0xB7 */
 	X8(I(ByteOp | DstReg | SrcImm | Mov, em_mov)),
 	/* 0xB8 - 0xBF */
 	X8(I(DstReg | SrcImm64 | Mov, em_mov)),
 	/* 0xC0 - 0xC7 */
 	G(ByteOp | Src2ImmByte, group2), G(Src2ImmByte, group2),
-	I(ImplicitOps | Stack | SrcImmU16, em_ret_near_imm),
-	I(ImplicitOps | Stack, em_ret),
+	I(ImplicitOps | NearBranch | SrcImmU16, em_ret_near_imm),
+	I(ImplicitOps | NearBranch, em_ret),
 	I(DstReg | SrcMemFAddr | ModRM | No64 | Src2ES, em_lseg),
 	I(DstReg | SrcMemFAddr | ModRM | No64 | Src2DS, em_lseg),
 	G(ByteOp, group11), G(0, group11),
@@ -4024,13 +4035,14 @@ static const struct opcode opcode_table[256] = {
 	/* 0xD8 - 0xDF */
 	N, E(0, &escape_d9), N, E(0, &escape_db), N, E(0, &escape_dd), N, N,
 	/* 0xE0 - 0xE7 */
-	X3(I(SrcImmByte, em_loop)),
-	I(SrcImmByte, em_jcxz),
+	X3(I(SrcImmByte | NearBranch, em_loop)),
+	I(SrcImmByte | NearBranch, em_jcxz),
 	I2bvIP(SrcImmUByte | DstAcc, em_in,  in,  check_perm_in),
 	I2bvIP(SrcAcc | DstImmUByte, em_out, out, check_perm_out),
 	/* 0xE8 - 0xEF */
-	I(SrcImm | Stack, em_call), D(SrcImm | ImplicitOps),
-	I(SrcImmFAddr | No64, em_jmp_far), D(SrcImmByte | ImplicitOps),
+	I(SrcImm | NearBranch, em_call), D(SrcImm | ImplicitOps | NearBranch),
+	I(SrcImmFAddr | No64, em_jmp_far),
+	D(SrcImmByte | ImplicitOps | NearBranch),
 	I2bvIP(SrcDX | DstAcc, em_in,  in,  check_perm_in),
 	I2bvIP(SrcAcc | DstDX, em_out, out, check_perm_out),
 	/* 0xF0 - 0xF7 */
@@ -4090,7 +4102,7 @@ static const struct opcode twobyte_table[256] = {
 	N, N, N, N,
 	N, N, N, GP(SrcReg | DstMem | ModRM | Mov, &pfx_0f_6f_0f_7f),
 	/* 0x80 - 0x8F */
-	X16(D(SrcImm)),
+	X16(D(SrcImm | NearBranch)),
 	/* 0x90 - 0x9F */
 	X16(D(ByteOp | DstMem | SrcNone | ModRM| Mov)),
 	/* 0xA0 - 0xA7 */
@@ -4121,7 +4133,7 @@ static const struct opcode twobyte_table[256] = {
 	D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov),
 	/* 0xC0 - 0xC7 */
 	F2bv(DstMem | SrcReg | ModRM | SrcWrite | Lock, em_xadd),
-	N, D(DstMem | SrcReg | ModRM | Mov),
+	N, ID(0, &instr_dual_0f_c3),
 	N, N, N, GD(0, &group9),
 	/* 0xC8 - 0xCF */
 	X8(I(DstReg, em_bswap)),
@@ -4134,12 +4146,20 @@ static const struct opcode twobyte_table[256] = {
 	N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N
 };
 
+static const struct instr_dual instr_dual_0f_38_f0 = {
+	I(DstReg | SrcMem | Mov, em_movbe), N
+};
+
+static const struct instr_dual instr_dual_0f_38_f1 = {
+	I(DstMem | SrcReg | Mov, em_movbe), N
+};
+
 static const struct gprefix three_byte_0f_38_f0 = {
-	I(DstReg | SrcMem | Mov, em_movbe), N, N, N
+	ID(0, &instr_dual_0f_38_f0), N, N, N
 };
 
 static const struct gprefix three_byte_0f_38_f1 = {
-	I(DstMem | SrcReg | Mov, em_movbe), N, N, N
+	ID(0, &instr_dual_0f_38_f1), N, N, N
 };
 
 /*
@@ -4152,8 +4172,8 @@ static const struct opcode opcode_map_0f_38[256] = {
 	/* 0x80 - 0xef */
 	X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), X16(N),
 	/* 0xf0 - 0xf1 */
-	GP(EmulateOnUD | ModRM | Prefix, &three_byte_0f_38_f0),
-	GP(EmulateOnUD | ModRM | Prefix, &three_byte_0f_38_f1),
+	GP(EmulateOnUD | ModRM, &three_byte_0f_38_f0),
+	GP(EmulateOnUD | ModRM, &three_byte_0f_38_f1),
 	/* 0xf2 - 0xff */
 	N, N, X4(N), X8(N)
 };
@@ -4275,7 +4295,7 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op,
 		op->type = OP_MEM;
 		op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
 		op->addr.mem.ea =
-			register_address(ctxt, reg_read(ctxt, VCPU_REGS_RDI));
+			register_address(ctxt, VCPU_REGS_RDI);
 		op->addr.mem.seg = VCPU_SREG_ES;
 		op->val = 0;
 		op->count = 1;
@@ -4329,7 +4349,7 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op,
 		op->type = OP_MEM;
 		op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
 		op->addr.mem.ea =
-			register_address(ctxt, reg_read(ctxt, VCPU_REGS_RSI));
+			register_address(ctxt, VCPU_REGS_RSI);
 		op->addr.mem.seg = ctxt->seg_override;
 		op->val = 0;
 		op->count = 1;
@@ -4338,7 +4358,7 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op,
 		op->type = OP_MEM;
 		op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
 		op->addr.mem.ea =
-			register_address(ctxt,
+			address_mask(ctxt,
 				reg_read(ctxt, VCPU_REGS_RBX) +
 				(reg_read(ctxt, VCPU_REGS_RAX) & 0xff));
 		op->addr.mem.seg = ctxt->seg_override;
@@ -4510,8 +4530,7 @@ done_prefixes:
 
 	/* vex-prefix instructions are not implemented */
 	if (ctxt->opcode_len == 1 && (ctxt->b == 0xc5 || ctxt->b == 0xc4) &&
-	    (mode == X86EMUL_MODE_PROT64 ||
-	    (mode >= X86EMUL_MODE_PROT16 && (ctxt->modrm & 0x80)))) {
+	    (mode == X86EMUL_MODE_PROT64 || (ctxt->modrm & 0xc0) == 0xc0)) {
 		ctxt->d = NotImpl;
 	}
 
@@ -4549,6 +4568,12 @@ done_prefixes:
 			else
 				opcode = opcode.u.esc->op[(ctxt->modrm >> 3) & 7];
 			break;
+		case InstrDual:
+			if ((ctxt->modrm >> 6) == 3)
+				opcode = opcode.u.idual->mod3;
+			else
+				opcode = opcode.u.idual->mod012;
+			break;
 		default:
 			return EMULATION_FAILED;
 		}
@@ -4567,7 +4592,8 @@ done_prefixes:
 		return EMULATION_FAILED;
 
 	if (unlikely(ctxt->d &
-		     (NotImpl|Stack|Op3264|Sse|Mmx|Intercept|CheckPerm))) {
+	    (NotImpl|Stack|Op3264|Sse|Mmx|Intercept|CheckPerm|NearBranch|
+	     No16))) {
 		/*
 		 * These are copied unconditionally here, and checked unconditionally
 		 * in x86_emulate_insn.
@@ -4578,8 +4604,12 @@ done_prefixes:
 		if (ctxt->d & NotImpl)
 			return EMULATION_FAILED;
 
-		if (mode == X86EMUL_MODE_PROT64 && (ctxt->d & Stack))
-			ctxt->op_bytes = 8;
+		if (mode == X86EMUL_MODE_PROT64) {
+			if (ctxt->op_bytes == 4 && (ctxt->d & Stack))
+				ctxt->op_bytes = 8;
+			else if (ctxt->d & NearBranch)
+				ctxt->op_bytes = 8;
+		}
 
 		if (ctxt->d & Op3264) {
 			if (mode == X86EMUL_MODE_PROT64)
@@ -4588,6 +4618,9 @@ done_prefixes:
 				ctxt->op_bytes = 4;
 		}
 
+		if ((ctxt->d & No16) && ctxt->op_bytes == 2)
+			ctxt->op_bytes = 4;
+
 		if (ctxt->d & Sse)
 			ctxt->op_bytes = 16;
 		else if (ctxt->d & Mmx)
@@ -4631,7 +4664,8 @@ done_prefixes:
 	rc = decode_operand(ctxt, &ctxt->dst, (ctxt->d >> DstShift) & OpMask);
 
 	if (ctxt->rip_relative)
-		ctxt->memopp->addr.mem.ea += ctxt->_eip;
+		ctxt->memopp->addr.mem.ea = address_mask(ctxt,
+					ctxt->memopp->addr.mem.ea + ctxt->_eip);
 
 done:
 	return (rc != X86EMUL_CONTINUE) ? EMULATION_FAILED : EMULATION_OK;
@@ -4775,6 +4809,12 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
 				goto done;
 		}
 
+		/* Instruction can only be executed in protected mode */
+		if ((ctxt->d & Prot) && ctxt->mode < X86EMUL_MODE_PROT16) {
+			rc = emulate_ud(ctxt);
+			goto done;
+		}
+
 		/* Privileged instruction can be executed only in CPL=0 */
 		if ((ctxt->d & Priv) && ops->cpl(ctxt)) {
 			if (ctxt->d & PrivUD)
@@ -4784,12 +4824,6 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
 			goto done;
 		}
 
-		/* Instruction can only be executed in protected mode */
-		if ((ctxt->d & Prot) && ctxt->mode < X86EMUL_MODE_PROT16) {
-			rc = emulate_ud(ctxt);
-			goto done;
-		}
-
 		/* Do instruction specific permission checks */
 		if (ctxt->d & CheckPerm) {
 			rc = ctxt->check_perm(ctxt);
@@ -4974,8 +5008,7 @@ writeback:
 			count = ctxt->src.count;
 		else
 			count = ctxt->dst.count;
-		register_address_increment(ctxt, reg_rmw(ctxt, VCPU_REGS_RCX),
-				-count);
+		register_address_increment(ctxt, VCPU_REGS_RCX, -count);
 
 		if (!string_insn_completed(ctxt)) {
 			/*
@@ -5053,11 +5086,6 @@ twobyte_insn:
 		ctxt->dst.val = (ctxt->src.bytes == 1) ? (s8) ctxt->src.val :
 							(s16) ctxt->src.val;
 		break;
-	case 0xc3:		/* movnti */
-		ctxt->dst.bytes = ctxt->op_bytes;
-		ctxt->dst.val = (ctxt->op_bytes == 8) ? (u64) ctxt->src.val :
-							(u32) ctxt->src.val;
-		break;
 	default:
 		goto cannot_emulate;
 	}
diff --git a/virt/kvm/ioapic.c b/arch/x86/kvm/ioapic.c
index 0ba4057d271..b1947e0f3e1 100644
--- a/virt/kvm/ioapic.c
+++ b/arch/x86/kvm/ioapic.c
@@ -270,7 +270,6 @@ void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap,
 	spin_unlock(&ioapic->lock);
 }
 
-#ifdef CONFIG_X86
 void kvm_vcpu_request_scan_ioapic(struct kvm *kvm)
 {
 	struct kvm_ioapic *ioapic = kvm->arch.vioapic;
@@ -279,12 +278,6 @@ void kvm_vcpu_request_scan_ioapic(struct kvm *kvm)
 		return;
 	kvm_make_scan_ioapic_request(kvm);
 }
-#else
-void kvm_vcpu_request_scan_ioapic(struct kvm *kvm)
-{
-	return;
-}
-#endif
 
 static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
 {
@@ -586,11 +579,6 @@ static int ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len,
 	case IOAPIC_REG_WINDOW:
 		ioapic_write_indirect(ioapic, data);
 		break;
-#ifdef	CONFIG_IA64
-	case IOAPIC_REG_EOI:
-		__kvm_ioapic_update_eoi(NULL, ioapic, data, IOAPIC_LEVEL_TRIG);
-		break;
-#endif
 
 	default:
 		break;
diff --git a/virt/kvm/ioapic.h b/arch/x86/kvm/ioapic.h
index e23b70634f1..3c9195535ff 100644
--- a/virt/kvm/ioapic.h
+++ b/arch/x86/kvm/ioapic.h
@@ -19,7 +19,6 @@ struct kvm_vcpu;
 /* Direct registers. */
 #define IOAPIC_REG_SELECT  0x00
 #define IOAPIC_REG_WINDOW  0x10
-#define IOAPIC_REG_EOI     0x40	/* IA64 IOSAPIC only */
 
 /* Indirect registers. */
 #define IOAPIC_REG_APIC_ID 0x00	/* x86 IOAPIC only */
@@ -45,6 +44,23 @@ struct rtc_status {
 	DECLARE_BITMAP(dest_map, KVM_MAX_VCPUS);
 };
 
+union kvm_ioapic_redirect_entry {
+	u64 bits;
+	struct {
+		u8 vector;
+		u8 delivery_mode:3;
+		u8 dest_mode:1;
+		u8 delivery_status:1;
+		u8 polarity:1;
+		u8 remote_irr:1;
+		u8 trig_mode:1;
+		u8 mask:1;
+		u8 reserve:7;
+		u8 reserved[4];
+		u8 dest_id;
+	} fields;
+};
+
 struct kvm_ioapic {
 	u64 base_address;
 	u32 ioregsel;
@@ -83,7 +99,7 @@ static inline struct kvm_ioapic *ioapic_irqchip(struct kvm *kvm)
 
 void kvm_rtc_eoi_tracking_restore_one(struct kvm_vcpu *vcpu);
 int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
-		int short_hand, int dest, int dest_mode);
+		int short_hand, unsigned int dest, int dest_mode);
 int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2);
 void kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, int vector,
 			int trigger_mode);
@@ -97,7 +113,6 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
 		struct kvm_lapic_irq *irq, unsigned long *dest_map);
 int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
 int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
-void kvm_vcpu_request_scan_ioapic(struct kvm *kvm);
 void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap,
 			u32 *tmr);
 
diff --git a/virt/kvm/iommu.c b/arch/x86/kvm/iommu.c
index c1e6ae989a4..17b73eeac8a 100644
--- a/virt/kvm/iommu.c
+++ b/arch/x86/kvm/iommu.c
@@ -31,6 +31,7 @@
 #include <linux/dmar.h>
 #include <linux/iommu.h>
 #include <linux/intel-iommu.h>
+#include "assigned-dev.h"
 
 static bool allow_unsafe_assigned_interrupts;
 module_param_named(allow_unsafe_assigned_interrupts,
@@ -169,10 +170,8 @@ static int kvm_iommu_map_memslots(struct kvm *kvm)
 	return r;
 }
 
-int kvm_assign_device(struct kvm *kvm,
-		      struct kvm_assigned_dev_kernel *assigned_dev)
+int kvm_assign_device(struct kvm *kvm, struct pci_dev *pdev)
 {
-	struct pci_dev *pdev = NULL;
 	struct iommu_domain *domain = kvm->arch.iommu_domain;
 	int r;
 	bool noncoherent;
@@ -181,7 +180,6 @@ int kvm_assign_device(struct kvm *kvm,
 	if (!domain)
 		return 0;
 
-	pdev = assigned_dev->dev;
 	if (pdev == NULL)
 		return -ENODEV;
 
@@ -212,17 +210,14 @@ out_unmap:
 	return r;
 }
 
-int kvm_deassign_device(struct kvm *kvm,
-			struct kvm_assigned_dev_kernel *assigned_dev)
+int kvm_deassign_device(struct kvm *kvm, struct pci_dev *pdev)
 {
 	struct iommu_domain *domain = kvm->arch.iommu_domain;
-	struct pci_dev *pdev = NULL;
 
 	/* check if iommu exists and in use */
 	if (!domain)
 		return 0;
 
-	pdev = assigned_dev->dev;
 	if (pdev == NULL)
 		return -ENODEV;
 
diff --git a/virt/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
index 963b8995a9e..72298b3ac02 100644
--- a/virt/kvm/irq_comm.c
+++ b/arch/x86/kvm/irq_comm.c
@@ -26,9 +26,6 @@
 #include <trace/events/kvm.h>
 
 #include <asm/msidef.h>
-#ifdef CONFIG_IA64
-#include <asm/iosapic.h>
-#endif
 
 #include "irq.h"
 
@@ -38,12 +35,8 @@ static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e,
 			   struct kvm *kvm, int irq_source_id, int level,
 			   bool line_status)
 {
-#ifdef CONFIG_X86
 	struct kvm_pic *pic = pic_irqchip(kvm);
 	return kvm_pic_set_irq(pic, e->irqchip.pin, irq_source_id, level);
-#else
-	return -1;
-#endif
 }
 
 static int kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e,
@@ -57,12 +50,7 @@ static int kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e,
 
 inline static bool kvm_is_dm_lowest_prio(struct kvm_lapic_irq *irq)
 {
-#ifdef CONFIG_IA64
-	return irq->delivery_mode ==
-		(IOSAPIC_LOWEST_PRIORITY << IOSAPIC_DELIVERY_SHIFT);
-#else
 	return irq->delivery_mode == APIC_DM_LOWEST;
-#endif
 }
 
 int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
@@ -202,9 +190,7 @@ int kvm_request_irq_source_id(struct kvm *kvm)
 	}
 
 	ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID);
-#ifdef CONFIG_X86
 	ASSERT(irq_source_id != KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID);
-#endif
 	set_bit(irq_source_id, bitmap);
 unlock:
 	mutex_unlock(&kvm->irq_lock);
@@ -215,9 +201,7 @@ unlock:
 void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id)
 {
 	ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID);
-#ifdef CONFIG_X86
 	ASSERT(irq_source_id != KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID);
-#endif
 
 	mutex_lock(&kvm->irq_lock);
 	if (irq_source_id < 0 ||
@@ -230,9 +214,7 @@ void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id)
 		goto unlock;
 
 	kvm_ioapic_clear_all(kvm->arch.vioapic, irq_source_id);
-#ifdef CONFIG_X86
 	kvm_pic_clear_all(pic_irqchip(kvm), irq_source_id);
-#endif
 unlock:
 	mutex_unlock(&kvm->irq_lock);
 }
@@ -242,7 +224,7 @@ void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq,
 {
 	mutex_lock(&kvm->irq_lock);
 	kimn->irq = irq;
-	hlist_add_head_rcu(&kimn->link, &kvm->mask_notifier_list);
+	hlist_add_head_rcu(&kimn->link, &kvm->arch.mask_notifier_list);
 	mutex_unlock(&kvm->irq_lock);
 }
 
@@ -264,7 +246,7 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin,
 	idx = srcu_read_lock(&kvm->irq_srcu);
 	gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin);
 	if (gsi != -1)
-		hlist_for_each_entry_rcu(kimn, &kvm->mask_notifier_list, link)
+		hlist_for_each_entry_rcu(kimn, &kvm->arch.mask_notifier_list, link)
 			if (kimn->irq == gsi)
 				kimn->func(kimn, mask);
 	srcu_read_unlock(&kvm->irq_srcu, idx);
@@ -322,16 +304,11 @@ out:
 	  .u.irqchip = { .irqchip = KVM_IRQCHIP_IOAPIC, .pin = (irq) } }
 #define ROUTING_ENTRY1(irq) IOAPIC_ROUTING_ENTRY(irq)
 
-#ifdef CONFIG_X86
-#  define PIC_ROUTING_ENTRY(irq) \
+#define PIC_ROUTING_ENTRY(irq) \
 	{ .gsi = irq, .type = KVM_IRQ_ROUTING_IRQCHIP,	\
 	  .u.irqchip = { .irqchip = SELECT_PIC(irq), .pin = (irq) % 8 } }
-#  define ROUTING_ENTRY2(irq) \
+#define ROUTING_ENTRY2(irq) \
 	IOAPIC_ROUTING_ENTRY(irq), PIC_ROUTING_ENTRY(irq)
-#else
-#  define ROUTING_ENTRY2(irq) \
-	IOAPIC_ROUTING_ENTRY(irq)
-#endif
 
 static const struct kvm_irq_routing_entry default_routing[] = {
 	ROUTING_ENTRY2(0), ROUTING_ENTRY2(1),
@@ -346,20 +323,6 @@ static const struct kvm_irq_routing_entry default_routing[] = {
 	ROUTING_ENTRY1(18), ROUTING_ENTRY1(19),
 	ROUTING_ENTRY1(20), ROUTING_ENTRY1(21),
 	ROUTING_ENTRY1(22), ROUTING_ENTRY1(23),
-#ifdef CONFIG_IA64
-	ROUTING_ENTRY1(24), ROUTING_ENTRY1(25),
-	ROUTING_ENTRY1(26), ROUTING_ENTRY1(27),
-	ROUTING_ENTRY1(28), ROUTING_ENTRY1(29),
-	ROUTING_ENTRY1(30), ROUTING_ENTRY1(31),
-	ROUTING_ENTRY1(32), ROUTING_ENTRY1(33),
-	ROUTING_ENTRY1(34), ROUTING_ENTRY1(35),
-	ROUTING_ENTRY1(36), ROUTING_ENTRY1(37),
-	ROUTING_ENTRY1(38), ROUTING_ENTRY1(39),
-	ROUTING_ENTRY1(40), ROUTING_ENTRY1(41),
-	ROUTING_ENTRY1(42), ROUTING_ENTRY1(43),
-	ROUTING_ENTRY1(44), ROUTING_ENTRY1(45),
-	ROUTING_ENTRY1(46), ROUTING_ENTRY1(47),
-#endif
 };
 
 int kvm_setup_default_irq_routing(struct kvm *kvm)
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index b8345dd41b2..4f0c0b95468 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -68,6 +68,9 @@
 #define MAX_APIC_VECTOR			256
 #define APIC_VECTORS_PER_REG		32
 
+#define APIC_BROADCAST			0xFF
+#define X2APIC_BROADCAST		0xFFFFFFFFul
+
 #define VEC_POS(v) ((v) & (32 - 1))
 #define REG_POS(v) (((v) >> 5) << 4)
 
@@ -129,8 +132,6 @@ static inline int kvm_apic_id(struct kvm_lapic *apic)
 	return (kvm_apic_get_reg(apic, APIC_ID) >> 24) & 0xff;
 }
 
-#define KVM_X2APIC_CID_BITS 0
-
 static void recalculate_apic_map(struct kvm *kvm)
 {
 	struct kvm_apic_map *new, *old = NULL;
@@ -149,42 +150,56 @@ static void recalculate_apic_map(struct kvm *kvm)
 	new->cid_shift = 8;
 	new->cid_mask = 0;
 	new->lid_mask = 0xff;
+	new->broadcast = APIC_BROADCAST;
 
 	kvm_for_each_vcpu(i, vcpu, kvm) {
 		struct kvm_lapic *apic = vcpu->arch.apic;
-		u16 cid, lid;
-		u32 ldr;
 
 		if (!kvm_apic_present(vcpu))
 			continue;
 
+		if (apic_x2apic_mode(apic)) {
+			new->ldr_bits = 32;
+			new->cid_shift = 16;
+			new->cid_mask = new->lid_mask = 0xffff;
+			new->broadcast = X2APIC_BROADCAST;
+		} else if (kvm_apic_get_reg(apic, APIC_LDR)) {
+			if (kvm_apic_get_reg(apic, APIC_DFR) ==
+							APIC_DFR_CLUSTER) {
+				new->cid_shift = 4;
+				new->cid_mask = 0xf;
+				new->lid_mask = 0xf;
+			} else {
+				new->cid_shift = 8;
+				new->cid_mask = 0;
+				new->lid_mask = 0xff;
+			}
+		}
+
 		/*
 		 * All APICs have to be configured in the same mode by an OS.
 		 * We take advatage of this while building logical id loockup
-		 * table. After reset APICs are in xapic/flat mode, so if we
-		 * find apic with different setting we assume this is the mode
+		 * table. After reset APICs are in software disabled mode, so if
+		 * we find apic with different setting we assume this is the mode
 		 * OS wants all apics to be in; build lookup table accordingly.
 		 */
-		if (apic_x2apic_mode(apic)) {
-			new->ldr_bits = 32;
-			new->cid_shift = 16;
-			new->cid_mask = (1 << KVM_X2APIC_CID_BITS) - 1;
-			new->lid_mask = 0xffff;
-		} else if (kvm_apic_sw_enabled(apic) &&
-				!new->cid_mask /* flat mode */ &&
-				kvm_apic_get_reg(apic, APIC_DFR) == APIC_DFR_CLUSTER) {
-			new->cid_shift = 4;
-			new->cid_mask = 0xf;
-			new->lid_mask = 0xf;
-		}
+		if (kvm_apic_sw_enabled(apic))
+			break;
+	}
 
-		new->phys_map[kvm_apic_id(apic)] = apic;
+	kvm_for_each_vcpu(i, vcpu, kvm) {
+		struct kvm_lapic *apic = vcpu->arch.apic;
+		u16 cid, lid;
+		u32 ldr, aid;
 
+		aid = kvm_apic_id(apic);
 		ldr = kvm_apic_get_reg(apic, APIC_LDR);
 		cid = apic_cluster_id(new, ldr);
 		lid = apic_logical_id(new, ldr);
 
-		if (lid)
+		if (aid < ARRAY_SIZE(new->phys_map))
+			new->phys_map[aid] = apic;
+		if (lid && cid < ARRAY_SIZE(new->logical_map))
 			new->logical_map[cid][ffs(lid) - 1] = apic;
 	}
 out:
@@ -201,11 +216,13 @@ out:
 
 static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val)
 {
-	u32 prev = kvm_apic_get_reg(apic, APIC_SPIV);
+	bool enabled = val & APIC_SPIV_APIC_ENABLED;
 
 	apic_set_reg(apic, APIC_SPIV, val);
-	if ((prev ^ val) & APIC_SPIV_APIC_ENABLED) {
-		if (val & APIC_SPIV_APIC_ENABLED) {
+
+	if (enabled != apic->sw_enabled) {
+		apic->sw_enabled = enabled;
+		if (enabled) {
 			static_key_slow_dec_deferred(&apic_sw_disabled);
 			recalculate_apic_map(apic->vcpu->kvm);
 		} else
@@ -237,21 +254,17 @@ static inline int apic_lvt_vector(struct kvm_lapic *apic, int lvt_type)
 
 static inline int apic_lvtt_oneshot(struct kvm_lapic *apic)
 {
-	return ((kvm_apic_get_reg(apic, APIC_LVTT) &
-		apic->lapic_timer.timer_mode_mask) == APIC_LVT_TIMER_ONESHOT);
+	return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_ONESHOT;
 }
 
 static inline int apic_lvtt_period(struct kvm_lapic *apic)
 {
-	return ((kvm_apic_get_reg(apic, APIC_LVTT) &
-		apic->lapic_timer.timer_mode_mask) == APIC_LVT_TIMER_PERIODIC);
+	return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_PERIODIC;
 }
 
 static inline int apic_lvtt_tscdeadline(struct kvm_lapic *apic)
 {
-	return ((kvm_apic_get_reg(apic, APIC_LVTT) &
-		apic->lapic_timer.timer_mode_mask) ==
-			APIC_LVT_TIMER_TSCDEADLINE);
+	return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_TSCDEADLINE;
 }
 
 static inline int apic_lvt_nmi_mode(u32 lvt_val)
@@ -326,8 +339,12 @@ EXPORT_SYMBOL_GPL(kvm_apic_update_irr);
 
 static inline void apic_set_irr(int vec, struct kvm_lapic *apic)
 {
-	apic->irr_pending = true;
 	apic_set_vector(vec, apic->regs + APIC_IRR);
+	/*
+	 * irr_pending must be true if any interrupt is pending; set it after
+	 * APIC_IRR to avoid race with apic_clear_irr
+	 */
+	apic->irr_pending = true;
 }
 
 static inline int apic_search_irr(struct kvm_lapic *apic)
@@ -359,13 +376,15 @@ static inline void apic_clear_irr(int vec, struct kvm_lapic *apic)
 
 	vcpu = apic->vcpu;
 
-	apic_clear_vector(vec, apic->regs + APIC_IRR);
-	if (unlikely(kvm_apic_vid_enabled(vcpu->kvm)))
+	if (unlikely(kvm_apic_vid_enabled(vcpu->kvm))) {
 		/* try to update RVI */
+		apic_clear_vector(vec, apic->regs + APIC_IRR);
 		kvm_make_request(KVM_REQ_EVENT, vcpu);
-	else {
-		vec = apic_search_irr(apic);
-		apic->irr_pending = (vec != -1);
+	} else {
+		apic->irr_pending = false;
+		apic_clear_vector(vec, apic->regs + APIC_IRR);
+		if (apic_search_irr(apic) != -1)
+			apic->irr_pending = true;
 	}
 }
 
@@ -558,16 +577,25 @@ static void apic_set_tpr(struct kvm_lapic *apic, u32 tpr)
 	apic_update_ppr(apic);
 }
 
-int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest)
+static int kvm_apic_broadcast(struct kvm_lapic *apic, u32 dest)
+{
+	return dest == (apic_x2apic_mode(apic) ?
+			X2APIC_BROADCAST : APIC_BROADCAST);
+}
+
+int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u32 dest)
 {
-	return dest == 0xff || kvm_apic_id(apic) == dest;
+	return kvm_apic_id(apic) == dest || kvm_apic_broadcast(apic, dest);
 }
 
-int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda)
+int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda)
 {
 	int result = 0;
 	u32 logical_id;
 
+	if (kvm_apic_broadcast(apic, mda))
+		return 1;
+
 	if (apic_x2apic_mode(apic)) {
 		logical_id = kvm_apic_get_reg(apic, APIC_LDR);
 		return logical_id & mda;
@@ -595,7 +623,7 @@ int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda)
 }
 
 int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
-			   int short_hand, int dest, int dest_mode)
+			   int short_hand, unsigned int dest, int dest_mode)
 {
 	int result = 0;
 	struct kvm_lapic *target = vcpu->arch.apic;
@@ -657,15 +685,24 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
 	if (!map)
 		goto out;
 
+	if (irq->dest_id == map->broadcast)
+		goto out;
+
+	ret = true;
+
 	if (irq->dest_mode == 0) { /* physical mode */
-		if (irq->delivery_mode == APIC_DM_LOWEST ||
-				irq->dest_id == 0xff)
+		if (irq->dest_id >= ARRAY_SIZE(map->phys_map))
 			goto out;
-		dst = &map->phys_map[irq->dest_id & 0xff];
+
+		dst = &map->phys_map[irq->dest_id];
 	} else {
 		u32 mda = irq->dest_id << (32 - map->ldr_bits);
+		u16 cid = apic_cluster_id(map, mda);
+
+		if (cid >= ARRAY_SIZE(map->logical_map))
+			goto out;
 
-		dst = map->logical_map[apic_cluster_id(map, mda)];
+		dst = map->logical_map[cid];
 
 		bitmap = apic_logical_id(map, mda);
 
@@ -691,8 +728,6 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
 			*r = 0;
 		*r += kvm_apic_set_irq(dst[i]->vcpu, irq, dest_map);
 	}
-
-	ret = true;
 out:
 	rcu_read_unlock();
 	return ret;
@@ -1034,6 +1069,26 @@ static void update_divide_count(struct kvm_lapic *apic)
 				   apic->divide_count);
 }
 
+static void apic_timer_expired(struct kvm_lapic *apic)
+{
+	struct kvm_vcpu *vcpu = apic->vcpu;
+	wait_queue_head_t *q = &vcpu->wq;
+
+	/*
+	 * Note: KVM_REQ_PENDING_TIMER is implicitly checked in
+	 * vcpu_enter_guest.
+	 */
+	if (atomic_read(&apic->lapic_timer.pending))
+		return;
+
+	atomic_inc(&apic->lapic_timer.pending);
+	/* FIXME: this code should not know anything about vcpus */
+	kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu);
+
+	if (waitqueue_active(q))
+		wake_up_interruptible(q);
+}
+
 static void start_apic_timer(struct kvm_lapic *apic)
 {
 	ktime_t now;
@@ -1096,9 +1151,10 @@ static void start_apic_timer(struct kvm_lapic *apic)
 		if (likely(tscdeadline > guest_tsc)) {
 			ns = (tscdeadline - guest_tsc) * 1000000ULL;
 			do_div(ns, this_tsc_khz);
-		}
-		hrtimer_start(&apic->lapic_timer.timer,
-			ktime_add_ns(now, ns), HRTIMER_MODE_ABS);
+			hrtimer_start(&apic->lapic_timer.timer,
+				ktime_add_ns(now, ns), HRTIMER_MODE_ABS);
+		} else
+			apic_timer_expired(apic);
 
 		local_irq_restore(flags);
 	}
@@ -1203,17 +1259,20 @@ static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
 
 		break;
 
-	case APIC_LVTT:
-		if ((kvm_apic_get_reg(apic, APIC_LVTT) &
-		    apic->lapic_timer.timer_mode_mask) !=
-		   (val & apic->lapic_timer.timer_mode_mask))
+	case APIC_LVTT: {
+		u32 timer_mode = val & apic->lapic_timer.timer_mode_mask;
+
+		if (apic->lapic_timer.timer_mode != timer_mode) {
+			apic->lapic_timer.timer_mode = timer_mode;
 			hrtimer_cancel(&apic->lapic_timer.timer);
+		}
 
 		if (!kvm_apic_sw_enabled(apic))
 			val |= APIC_LVT_MASKED;
 		val &= (apic_lvt_mask[0] | apic->lapic_timer.timer_mode_mask);
 		apic_set_reg(apic, APIC_LVTT, val);
 		break;
+	}
 
 	case APIC_TMICT:
 		if (apic_lvtt_tscdeadline(apic))
@@ -1320,7 +1379,7 @@ void kvm_free_lapic(struct kvm_vcpu *vcpu)
 	if (!(vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE))
 		static_key_slow_dec_deferred(&apic_hw_disabled);
 
-	if (!(kvm_apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_APIC_ENABLED))
+	if (!apic->sw_enabled)
 		static_key_slow_dec_deferred(&apic_sw_disabled);
 
 	if (apic->regs)
@@ -1355,9 +1414,6 @@ void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data)
 		return;
 
 	hrtimer_cancel(&apic->lapic_timer.timer);
-	/* Inject here so clearing tscdeadline won't override new value */
-	if (apic_has_pending_timer(vcpu))
-		kvm_inject_apic_timer_irqs(vcpu);
 	apic->lapic_timer.tscdeadline = data;
 	start_apic_timer(apic);
 }
@@ -1422,6 +1478,10 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
 	apic->base_address = apic->vcpu->arch.apic_base &
 			     MSR_IA32_APICBASE_BASE;
 
+	if ((value & MSR_IA32_APICBASE_ENABLE) &&
+	     apic->base_address != APIC_DEFAULT_PHYS_BASE)
+		pr_warn_once("APIC base relocation is unsupported by KVM");
+
 	/* with FSB delivery interrupt, we can restart APIC functionality */
 	apic_debug("apic base msr is 0x%016" PRIx64 ", and base address is "
 		   "0x%lx.\n", apic->vcpu->arch.apic_base, apic->base_address);
@@ -1447,6 +1507,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu)
 
 	for (i = 0; i < APIC_LVT_NUM; i++)
 		apic_set_reg(apic, APIC_LVTT + 0x10 * i, APIC_LVT_MASKED);
+	apic->lapic_timer.timer_mode = 0;
 	apic_set_reg(apic, APIC_LVT0,
 		     SET_APIC_DELIVERY_MODE(0, APIC_MODE_EXTINT));
 
@@ -1538,23 +1599,8 @@ static enum hrtimer_restart apic_timer_fn(struct hrtimer *data)
 {
 	struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer);
 	struct kvm_lapic *apic = container_of(ktimer, struct kvm_lapic, lapic_timer);
-	struct kvm_vcpu *vcpu = apic->vcpu;
-	wait_queue_head_t *q = &vcpu->wq;
-
-	/*
-	 * There is a race window between reading and incrementing, but we do
-	 * not care about potentially losing timer events in the !reinject
-	 * case anyway. Note: KVM_REQ_PENDING_TIMER is implicitly checked
-	 * in vcpu_enter_guest.
-	 */
-	if (!atomic_read(&ktimer->pending)) {
-		atomic_inc(&ktimer->pending);
-		/* FIXME: this code should not know anything about vcpus */
-		kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu);
-	}
 
-	if (waitqueue_active(q))
-		wake_up_interruptible(q);
+	apic_timer_expired(apic);
 
 	if (lapic_is_periodic(apic)) {
 		hrtimer_add_expires_ns(&ktimer->timer, ktimer->period);
@@ -1693,6 +1739,9 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu,
 	apic->isr_count = kvm_apic_vid_enabled(vcpu->kvm) ?
 				1 : count_vectors(apic->regs + APIC_ISR);
 	apic->highest_isr_cache = -1;
+	if (kvm_x86_ops->hwapic_irr_update)
+		kvm_x86_ops->hwapic_irr_update(vcpu,
+				apic_find_highest_irr(apic));
 	kvm_x86_ops->hwapic_isr_update(vcpu->kvm, apic_find_highest_isr(apic));
 	kvm_make_request(KVM_REQ_EVENT, vcpu);
 	kvm_rtc_eoi_tracking_restore_one(vcpu);
@@ -1837,8 +1886,11 @@ int kvm_x2apic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 	if (!irqchip_in_kernel(vcpu->kvm) || !apic_x2apic_mode(apic))
 		return 1;
 
+	if (reg == APIC_ICR2)
+		return 1;
+
 	/* if this is ICR write vector before command */
-	if (msr == 0x830)
+	if (reg == APIC_ICR)
 		apic_reg_write(apic, APIC_ICR2, (u32)(data >> 32));
 	return apic_reg_write(apic, reg, (u32)data);
 }
@@ -1851,9 +1903,15 @@ int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data)
 	if (!irqchip_in_kernel(vcpu->kvm) || !apic_x2apic_mode(apic))
 		return 1;
 
+	if (reg == APIC_DFR || reg == APIC_ICR2) {
+		apic_debug("KVM_APIC_READ: read x2apic reserved register %x\n",
+			   reg);
+		return 1;
+	}
+
 	if (apic_reg_read(apic, reg, 4, &low))
 		return 1;
-	if (msr == 0x830)
+	if (reg == APIC_ICR)
 		apic_reg_read(apic, APIC_ICR2, 4, &high);
 
 	*data = (((u64)high) << 32) | low;
@@ -1908,7 +1966,7 @@ int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data)
 void kvm_apic_accept_events(struct kvm_vcpu *vcpu)
 {
 	struct kvm_lapic *apic = vcpu->arch.apic;
-	unsigned int sipi_vector;
+	u8 sipi_vector;
 	unsigned long pe;
 
 	if (!kvm_vcpu_has_lapic(vcpu) || !apic->pending_events)
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 6a11845fd8b..c674fce53cf 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -11,6 +11,7 @@
 struct kvm_timer {
 	struct hrtimer timer;
 	s64 period; 				/* unit: ns */
+	u32 timer_mode;
 	u32 timer_mode_mask;
 	u64 tscdeadline;
 	atomic_t pending;			/* accumulated triggered timers */
@@ -22,6 +23,7 @@ struct kvm_lapic {
 	struct kvm_timer lapic_timer;
 	u32 divide_count;
 	struct kvm_vcpu *vcpu;
+	bool sw_enabled;
 	bool irr_pending;
 	/* Number of bits set in ISR. */
 	s16 isr_count;
@@ -55,8 +57,8 @@ void kvm_apic_set_version(struct kvm_vcpu *vcpu);
 
 void kvm_apic_update_tmr(struct kvm_vcpu *vcpu, u32 *tmr);
 void kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir);
-int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest);
-int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda);
+int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u32 dest);
+int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda);
 int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq,
 		unsigned long *dest_map);
 int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type);
@@ -119,11 +121,11 @@ static inline int kvm_apic_hw_enabled(struct kvm_lapic *apic)
 
 extern struct static_key_deferred apic_sw_disabled;
 
-static inline int kvm_apic_sw_enabled(struct kvm_lapic *apic)
+static inline bool kvm_apic_sw_enabled(struct kvm_lapic *apic)
 {
 	if (static_key_false(&apic_sw_disabled.key))
-		return kvm_apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_APIC_ENABLED;
-	return APIC_SPIV_APIC_ENABLED;
+		return apic->sw_enabled;
+	return true;
 }
 
 static inline bool kvm_apic_present(struct kvm_vcpu *vcpu)
@@ -152,8 +154,6 @@ static inline u16 apic_cluster_id(struct kvm_apic_map *map, u32 ldr)
 	ldr >>= 32 - map->ldr_bits;
 	cid = (ldr >> map->cid_shift) & map->cid_mask;
 
-	BUG_ON(cid >= ARRAY_SIZE(map->logical_map));
-
 	return cid;
 }
 
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 978f402006e..10fbed126b1 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -214,13 +214,12 @@ EXPORT_SYMBOL_GPL(kvm_mmu_set_mmio_spte_mask);
 #define MMIO_GEN_LOW_SHIFT		10
 #define MMIO_GEN_LOW_MASK		((1 << MMIO_GEN_LOW_SHIFT) - 2)
 #define MMIO_GEN_MASK			((1 << MMIO_GEN_SHIFT) - 1)
-#define MMIO_MAX_GEN			((1 << MMIO_GEN_SHIFT) - 1)
 
 static u64 generation_mmio_spte_mask(unsigned int gen)
 {
 	u64 mask;
 
-	WARN_ON(gen > MMIO_MAX_GEN);
+	WARN_ON(gen & ~MMIO_GEN_MASK);
 
 	mask = (gen & MMIO_GEN_LOW_MASK) << MMIO_SPTE_GEN_LOW_SHIFT;
 	mask |= ((u64)gen >> MMIO_GEN_LOW_SHIFT) << MMIO_SPTE_GEN_HIGH_SHIFT;
@@ -263,13 +262,13 @@ static bool is_mmio_spte(u64 spte)
 
 static gfn_t get_mmio_spte_gfn(u64 spte)
 {
-	u64 mask = generation_mmio_spte_mask(MMIO_MAX_GEN) | shadow_mmio_mask;
+	u64 mask = generation_mmio_spte_mask(MMIO_GEN_MASK) | shadow_mmio_mask;
 	return (spte & ~mask) >> PAGE_SHIFT;
 }
 
 static unsigned get_mmio_spte_access(u64 spte)
 {
-	u64 mask = generation_mmio_spte_mask(MMIO_MAX_GEN) | shadow_mmio_mask;
+	u64 mask = generation_mmio_spte_mask(MMIO_GEN_MASK) | shadow_mmio_mask;
 	return (spte & ~mask) & ~PAGE_MASK;
 }
 
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 7527cefc5a4..41dd0387ccc 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1056,9 +1056,11 @@ static void svm_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment, bool ho
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
 
-	WARN_ON(adjustment < 0);
-	if (host)
-		adjustment = svm_scale_tsc(vcpu, adjustment);
+	if (host) {
+		if (svm->tsc_ratio != TSC_RATIO_DEFAULT)
+			WARN_ON(adjustment < 0);
+		adjustment = svm_scale_tsc(vcpu, (u64)adjustment);
+	}
 
 	svm->vmcb->control.tsc_offset += adjustment;
 	if (is_guest_mode(vcpu))
@@ -2999,7 +3001,6 @@ static int dr_interception(struct vcpu_svm *svm)
 {
 	int reg, dr;
 	unsigned long val;
-	int err;
 
 	if (svm->vcpu.guest_debug == 0) {
 		/*
@@ -3019,12 +3020,15 @@ static int dr_interception(struct vcpu_svm *svm)
 	dr = svm->vmcb->control.exit_code - SVM_EXIT_READ_DR0;
 
 	if (dr >= 16) { /* mov to DRn */
+		if (!kvm_require_dr(&svm->vcpu, dr - 16))
+			return 1;
 		val = kvm_register_read(&svm->vcpu, reg);
 		kvm_set_dr(&svm->vcpu, dr - 16, val);
 	} else {
-		err = kvm_get_dr(&svm->vcpu, dr, &val);
-		if (!err)
-			kvm_register_write(&svm->vcpu, reg, val);
+		if (!kvm_require_dr(&svm->vcpu, dr))
+			return 1;
+		kvm_get_dr(&svm->vcpu, dr, &val);
+		kvm_register_write(&svm->vcpu, reg, val);
 	}
 
 	skip_emulated_instruction(&svm->vcpu);
@@ -4123,6 +4127,11 @@ static bool svm_mpx_supported(void)
 	return false;
 }
 
+static bool svm_xsaves_supported(void)
+{
+	return false;
+}
+
 static bool svm_has_wbinvd_exit(void)
 {
 	return true;
@@ -4410,6 +4419,7 @@ static struct kvm_x86_ops svm_x86_ops = {
 	.rdtscp_supported = svm_rdtscp_supported,
 	.invpcid_supported = svm_invpcid_supported,
 	.mpx_supported = svm_mpx_supported,
+	.xsaves_supported = svm_xsaves_supported,
 
 	.set_supported_cpuid = svm_set_supported_cpuid,
 
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index 6b06ab8748d..c2a34bb5ad9 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -5,6 +5,7 @@
 #include <asm/vmx.h>
 #include <asm/svm.h>
 #include <asm/clocksource.h>
+#include <asm/pvclock-abi.h>
 
 #undef TRACE_SYSTEM
 #define TRACE_SYSTEM kvm
@@ -877,6 +878,42 @@ TRACE_EVENT(kvm_ple_window,
 #define trace_kvm_ple_window_shrink(vcpu_id, new, old) \
 	trace_kvm_ple_window(false, vcpu_id, new, old)
 
+TRACE_EVENT(kvm_pvclock_update,
+	TP_PROTO(unsigned int vcpu_id, struct pvclock_vcpu_time_info *pvclock),
+	TP_ARGS(vcpu_id, pvclock),
+
+	TP_STRUCT__entry(
+		__field(	unsigned int,	vcpu_id			)
+		__field(	__u32,		version			)
+		__field(	__u64,		tsc_timestamp		)
+		__field(	__u64,		system_time		)
+		__field(	__u32,		tsc_to_system_mul	)
+		__field(	__s8,		tsc_shift		)
+		__field(	__u8,		flags			)
+	),
+
+	TP_fast_assign(
+		__entry->vcpu_id	   = vcpu_id;
+		__entry->version	   = pvclock->version;
+		__entry->tsc_timestamp	   = pvclock->tsc_timestamp;
+		__entry->system_time	   = pvclock->system_time;
+		__entry->tsc_to_system_mul = pvclock->tsc_to_system_mul;
+		__entry->tsc_shift	   = pvclock->tsc_shift;
+		__entry->flags		   = pvclock->flags;
+	),
+
+	TP_printk("vcpu_id %u, pvclock { version %u, tsc_timestamp 0x%llx, "
+		  "system_time 0x%llx, tsc_to_system_mul 0x%x, tsc_shift %d, "
+		  "flags 0x%x }",
+		  __entry->vcpu_id,
+		  __entry->version,
+		  __entry->tsc_timestamp,
+		  __entry->system_time,
+		  __entry->tsc_to_system_mul,
+		  __entry->tsc_shift,
+		  __entry->flags)
+);
+
 #endif /* _TRACE_KVM_H */
 
 #undef TRACE_INCLUDE_PATH
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 3e556c68351..feb852b0459 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -99,13 +99,15 @@ module_param_named(enable_shadow_vmcs, enable_shadow_vmcs, bool, S_IRUGO);
 static bool __read_mostly nested = 0;
 module_param(nested, bool, S_IRUGO);
 
+static u64 __read_mostly host_xss;
+
 #define KVM_GUEST_CR0_MASK (X86_CR0_NW | X86_CR0_CD)
 #define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST (X86_CR0_WP | X86_CR0_NE)
 #define KVM_VM_CR0_ALWAYS_ON						\
 	(KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE)
 #define KVM_CR4_GUEST_OWNED_BITS				      \
 	(X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR      \
-	 | X86_CR4_OSXMMEXCPT)
+	 | X86_CR4_OSXMMEXCPT | X86_CR4_TSD)
 
 #define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE)
 #define KVM_RMODE_VM_CR4_ALWAYS_ON (X86_CR4_VME | X86_CR4_PAE | X86_CR4_VMXE)
@@ -214,6 +216,7 @@ struct __packed vmcs12 {
 	u64 virtual_apic_page_addr;
 	u64 apic_access_addr;
 	u64 ept_pointer;
+	u64 xss_exit_bitmap;
 	u64 guest_physical_address;
 	u64 vmcs_link_pointer;
 	u64 guest_ia32_debugctl;
@@ -616,6 +619,7 @@ static const unsigned short vmcs_field_to_offset_table[] = {
 	FIELD64(VIRTUAL_APIC_PAGE_ADDR, virtual_apic_page_addr),
 	FIELD64(APIC_ACCESS_ADDR, apic_access_addr),
 	FIELD64(EPT_POINTER, ept_pointer),
+	FIELD64(XSS_EXIT_BITMAP, xss_exit_bitmap),
 	FIELD64(GUEST_PHYSICAL_ADDRESS, guest_physical_address),
 	FIELD64(VMCS_LINK_POINTER, vmcs_link_pointer),
 	FIELD64(GUEST_IA32_DEBUGCTL, guest_ia32_debugctl),
@@ -720,12 +724,15 @@ static const unsigned short vmcs_field_to_offset_table[] = {
 	FIELD(HOST_RSP, host_rsp),
 	FIELD(HOST_RIP, host_rip),
 };
-static const int max_vmcs_field = ARRAY_SIZE(vmcs_field_to_offset_table);
 
 static inline short vmcs_field_to_offset(unsigned long field)
 {
-	if (field >= max_vmcs_field || vmcs_field_to_offset_table[field] == 0)
-		return -1;
+	BUILD_BUG_ON(ARRAY_SIZE(vmcs_field_to_offset_table) > SHRT_MAX);
+
+	if (field >= ARRAY_SIZE(vmcs_field_to_offset_table) ||
+	    vmcs_field_to_offset_table[field] == 0)
+		return -ENOENT;
+
 	return vmcs_field_to_offset_table[field];
 }
 
@@ -758,6 +765,7 @@ static u64 construct_eptp(unsigned long root_hpa);
 static void kvm_cpu_vmxon(u64 addr);
 static void kvm_cpu_vmxoff(void);
 static bool vmx_mpx_supported(void);
+static bool vmx_xsaves_supported(void);
 static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr);
 static void vmx_set_segment(struct kvm_vcpu *vcpu,
 			    struct kvm_segment *var, int seg);
@@ -1098,6 +1106,12 @@ static inline int nested_cpu_has_ept(struct vmcs12 *vmcs12)
 	return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_EPT);
 }
 
+static inline bool nested_cpu_has_xsaves(struct vmcs12 *vmcs12)
+{
+	return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES) &&
+		vmx_xsaves_supported();
+}
+
 static inline bool is_exception(u32 intr_info)
 {
 	return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK))
@@ -1659,12 +1673,20 @@ static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset)
 	vmx->guest_msrs[efer_offset].mask = ~ignore_bits;
 
 	clear_atomic_switch_msr(vmx, MSR_EFER);
-	/* On ept, can't emulate nx, and must switch nx atomically */
-	if (enable_ept && ((vmx->vcpu.arch.efer ^ host_efer) & EFER_NX)) {
+
+	/*
+	 * On EPT, we can't emulate NX, so we must switch EFER atomically.
+	 * On CPUs that support "load IA32_EFER", always switch EFER
+	 * atomically, since it's faster than switching it manually.
+	 */
+	if (cpu_has_load_ia32_efer ||
+	    (enable_ept && ((vmx->vcpu.arch.efer ^ host_efer) & EFER_NX))) {
 		guest_efer = vmx->vcpu.arch.efer;
 		if (!(guest_efer & EFER_LMA))
 			guest_efer &= ~EFER_LME;
-		add_atomic_switch_msr(vmx, MSR_EFER, guest_efer, host_efer);
+		if (guest_efer != host_efer)
+			add_atomic_switch_msr(vmx, MSR_EFER,
+					      guest_efer, host_efer);
 		return false;
 	}
 
@@ -2377,12 +2399,13 @@ static __init void nested_vmx_setup_ctls_msrs(void)
 	nested_vmx_secondary_ctls_low = 0;
 	nested_vmx_secondary_ctls_high &=
 		SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
-		SECONDARY_EXEC_UNRESTRICTED_GUEST |
-		SECONDARY_EXEC_WBINVD_EXITING;
+		SECONDARY_EXEC_WBINVD_EXITING |
+		SECONDARY_EXEC_XSAVES;
 
 	if (enable_ept) {
 		/* nested EPT: emulate EPT also to L1 */
-		nested_vmx_secondary_ctls_high |= SECONDARY_EXEC_ENABLE_EPT;
+		nested_vmx_secondary_ctls_high |= SECONDARY_EXEC_ENABLE_EPT |
+			SECONDARY_EXEC_UNRESTRICTED_GUEST;
 		nested_vmx_ept_caps = VMX_EPT_PAGE_WALK_4_BIT |
 			 VMX_EPTP_WB_BIT | VMX_EPT_2MB_PAGE_BIT |
 			 VMX_EPT_INVEPT_BIT;
@@ -2558,6 +2581,11 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
 		if (!nested_vmx_allowed(vcpu))
 			return 1;
 		return vmx_get_vmx_msr(vcpu, msr_index, pdata);
+	case MSR_IA32_XSS:
+		if (!vmx_xsaves_supported())
+			return 1;
+		data = vcpu->arch.ia32_xss;
+		break;
 	case MSR_TSC_AUX:
 		if (!to_vmx(vcpu)->rdtscp_enabled)
 			return 1;
@@ -2649,6 +2677,22 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		break;
 	case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
 		return 1; /* they are read-only */
+	case MSR_IA32_XSS:
+		if (!vmx_xsaves_supported())
+			return 1;
+		/*
+		 * The only supported bit as of Skylake is bit 8, but
+		 * it is not supported on KVM.
+		 */
+		if (data != 0)
+			return 1;
+		vcpu->arch.ia32_xss = data;
+		if (vcpu->arch.ia32_xss != host_xss)
+			add_atomic_switch_msr(vmx, MSR_IA32_XSS,
+				vcpu->arch.ia32_xss, host_xss);
+		else
+			clear_atomic_switch_msr(vmx, MSR_IA32_XSS);
+		break;
 	case MSR_TSC_AUX:
 		if (!vmx->rdtscp_enabled)
 			return 1;
@@ -2884,7 +2928,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
 			SECONDARY_EXEC_ENABLE_INVPCID |
 			SECONDARY_EXEC_APIC_REGISTER_VIRT |
 			SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
-			SECONDARY_EXEC_SHADOW_VMCS;
+			SECONDARY_EXEC_SHADOW_VMCS |
+			SECONDARY_EXEC_XSAVES;
 		if (adjust_vmx_controls(min2, opt2,
 					MSR_IA32_VMX_PROCBASED_CTLS2,
 					&_cpu_based_2nd_exec_control) < 0)
@@ -3007,6 +3052,9 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
 		}
 	}
 
+	if (cpu_has_xsaves)
+		rdmsrl(MSR_IA32_XSS, host_xss);
+
 	return 0;
 }
 
@@ -3110,76 +3158,6 @@ static __init int alloc_kvm_area(void)
 	return 0;
 }
 
-static __init int hardware_setup(void)
-{
-	if (setup_vmcs_config(&vmcs_config) < 0)
-		return -EIO;
-
-	if (boot_cpu_has(X86_FEATURE_NX))
-		kvm_enable_efer_bits(EFER_NX);
-
-	if (!cpu_has_vmx_vpid())
-		enable_vpid = 0;
-	if (!cpu_has_vmx_shadow_vmcs())
-		enable_shadow_vmcs = 0;
-	if (enable_shadow_vmcs)
-		init_vmcs_shadow_fields();
-
-	if (!cpu_has_vmx_ept() ||
-	    !cpu_has_vmx_ept_4levels()) {
-		enable_ept = 0;
-		enable_unrestricted_guest = 0;
-		enable_ept_ad_bits = 0;
-	}
-
-	if (!cpu_has_vmx_ept_ad_bits())
-		enable_ept_ad_bits = 0;
-
-	if (!cpu_has_vmx_unrestricted_guest())
-		enable_unrestricted_guest = 0;
-
-	if (!cpu_has_vmx_flexpriority()) {
-		flexpriority_enabled = 0;
-
-		/*
-		 * set_apic_access_page_addr() is used to reload apic access
-		 * page upon invalidation.  No need to do anything if the
-		 * processor does not have the APIC_ACCESS_ADDR VMCS field.
-		 */
-		kvm_x86_ops->set_apic_access_page_addr = NULL;
-	}
-
-	if (!cpu_has_vmx_tpr_shadow())
-		kvm_x86_ops->update_cr8_intercept = NULL;
-
-	if (enable_ept && !cpu_has_vmx_ept_2m_page())
-		kvm_disable_largepages();
-
-	if (!cpu_has_vmx_ple())
-		ple_gap = 0;
-
-	if (!cpu_has_vmx_apicv())
-		enable_apicv = 0;
-
-	if (enable_apicv)
-		kvm_x86_ops->update_cr8_intercept = NULL;
-	else {
-		kvm_x86_ops->hwapic_irr_update = NULL;
-		kvm_x86_ops->deliver_posted_interrupt = NULL;
-		kvm_x86_ops->sync_pir_to_irr = vmx_sync_pir_to_irr_dummy;
-	}
-
-	if (nested)
-		nested_vmx_setup_ctls_msrs();
-
-	return alloc_kvm_area();
-}
-
-static __exit void hardware_unsetup(void)
-{
-	free_kvm_area();
-}
-
 static bool emulation_required(struct kvm_vcpu *vcpu)
 {
 	return emulate_invalid_guest_state && !guest_state_valid(vcpu);
@@ -4396,6 +4374,7 @@ static void ept_set_mmio_spte_mask(void)
 	kvm_mmu_set_mmio_spte_mask((0x3ull << 62) | 0x6ull);
 }
 
+#define VMX_XSS_EXIT_BITMAP 0
 /*
  * Sets up the vmcs for emulated real mode.
  */
@@ -4505,6 +4484,9 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
 	vmcs_writel(CR0_GUEST_HOST_MASK, ~0UL);
 	set_cr4_guest_host_mask(vmx);
 
+	if (vmx_xsaves_supported())
+		vmcs_write64(XSS_EXIT_BITMAP, VMX_XSS_EXIT_BITMAP);
+
 	return 0;
 }
 
@@ -5163,13 +5145,20 @@ static int handle_cr(struct kvm_vcpu *vcpu)
 static int handle_dr(struct kvm_vcpu *vcpu)
 {
 	unsigned long exit_qualification;
-	int dr, reg;
+	int dr, dr7, reg;
+
+	exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
+	dr = exit_qualification & DEBUG_REG_ACCESS_NUM;
+
+	/* First, if DR does not exist, trigger UD */
+	if (!kvm_require_dr(vcpu, dr))
+		return 1;
 
 	/* Do not handle if the CPL > 0, will trigger GP on re-entry */
 	if (!kvm_require_cpl(vcpu, 0))
 		return 1;
-	dr = vmcs_readl(GUEST_DR7);
-	if (dr & DR7_GD) {
+	dr7 = vmcs_readl(GUEST_DR7);
+	if (dr7 & DR7_GD) {
 		/*
 		 * As the vm-exit takes precedence over the debug trap, we
 		 * need to emulate the latter, either for the host or the
@@ -5177,17 +5166,14 @@ static int handle_dr(struct kvm_vcpu *vcpu)
 		 */
 		if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) {
 			vcpu->run->debug.arch.dr6 = vcpu->arch.dr6;
-			vcpu->run->debug.arch.dr7 = dr;
-			vcpu->run->debug.arch.pc =
-				vmcs_readl(GUEST_CS_BASE) +
-				vmcs_readl(GUEST_RIP);
+			vcpu->run->debug.arch.dr7 = dr7;
+			vcpu->run->debug.arch.pc = kvm_get_linear_rip(vcpu);
 			vcpu->run->debug.arch.exception = DB_VECTOR;
 			vcpu->run->exit_reason = KVM_EXIT_DEBUG;
 			return 0;
 		} else {
-			vcpu->arch.dr7 &= ~DR7_GD;
+			vcpu->arch.dr6 &= ~15;
 			vcpu->arch.dr6 |= DR6_BD | DR6_RTM;
-			vmcs_writel(GUEST_DR7, vcpu->arch.dr7);
 			kvm_queue_exception(vcpu, DB_VECTOR);
 			return 1;
 		}
@@ -5209,8 +5195,6 @@ static int handle_dr(struct kvm_vcpu *vcpu)
 		return 1;
 	}
 
-	exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
-	dr = exit_qualification & DEBUG_REG_ACCESS_NUM;
 	reg = DEBUG_REG_ACCESS_REG(exit_qualification);
 	if (exit_qualification & TYPE_MOV_FROM_DR) {
 		unsigned long val;
@@ -5391,6 +5375,20 @@ static int handle_xsetbv(struct kvm_vcpu *vcpu)
 	return 1;
 }
 
+static int handle_xsaves(struct kvm_vcpu *vcpu)
+{
+	skip_emulated_instruction(vcpu);
+	WARN(1, "this should never happen\n");
+	return 1;
+}
+
+static int handle_xrstors(struct kvm_vcpu *vcpu)
+{
+	skip_emulated_instruction(vcpu);
+	WARN(1, "this should never happen\n");
+	return 1;
+}
+
 static int handle_apic_access(struct kvm_vcpu *vcpu)
 {
 	if (likely(fasteoi)) {
@@ -5492,7 +5490,7 @@ static int handle_task_switch(struct kvm_vcpu *vcpu)
 	}
 
 	/* clear all local breakpoint enable flags */
-	vmcs_writel(GUEST_DR7, vmcs_readl(GUEST_DR7) & ~0x55);
+	vmcs_writel(GUEST_DR7, vmcs_readl(GUEST_DR7) & ~0x155);
 
 	/*
 	 * TODO: What about debug traps on tss switch?
@@ -5539,11 +5537,11 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
 	trace_kvm_page_fault(gpa, exit_qualification);
 
 	/* It is a write fault? */
-	error_code = exit_qualification & (1U << 1);
+	error_code = exit_qualification & PFERR_WRITE_MASK;
 	/* It is a fetch fault? */
-	error_code |= (exit_qualification & (1U << 2)) << 2;
+	error_code |= (exit_qualification << 2) & PFERR_FETCH_MASK;
 	/* ept page table is present? */
-	error_code |= (exit_qualification >> 3) & 0x1;
+	error_code |= (exit_qualification >> 3) & PFERR_PRESENT_MASK;
 
 	vcpu->arch.exit_qualification = exit_qualification;
 
@@ -5785,6 +5783,204 @@ static void update_ple_window_actual_max(void)
 			                    ple_window_grow, INT_MIN);
 }
 
+static __init int hardware_setup(void)
+{
+	int r = -ENOMEM, i, msr;
+
+	rdmsrl_safe(MSR_EFER, &host_efer);
+
+	for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i)
+		kvm_define_shared_msr(i, vmx_msr_index[i]);
+
+	vmx_io_bitmap_a = (unsigned long *)__get_free_page(GFP_KERNEL);
+	if (!vmx_io_bitmap_a)
+		return r;
+
+	vmx_io_bitmap_b = (unsigned long *)__get_free_page(GFP_KERNEL);
+	if (!vmx_io_bitmap_b)
+		goto out;
+
+	vmx_msr_bitmap_legacy = (unsigned long *)__get_free_page(GFP_KERNEL);
+	if (!vmx_msr_bitmap_legacy)
+		goto out1;
+
+	vmx_msr_bitmap_legacy_x2apic =
+				(unsigned long *)__get_free_page(GFP_KERNEL);
+	if (!vmx_msr_bitmap_legacy_x2apic)
+		goto out2;
+
+	vmx_msr_bitmap_longmode = (unsigned long *)__get_free_page(GFP_KERNEL);
+	if (!vmx_msr_bitmap_longmode)
+		goto out3;
+
+	vmx_msr_bitmap_longmode_x2apic =
+				(unsigned long *)__get_free_page(GFP_KERNEL);
+	if (!vmx_msr_bitmap_longmode_x2apic)
+		goto out4;
+	vmx_vmread_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
+	if (!vmx_vmread_bitmap)
+		goto out5;
+
+	vmx_vmwrite_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
+	if (!vmx_vmwrite_bitmap)
+		goto out6;
+
+	memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE);
+	memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE);
+
+	/*
+	 * Allow direct access to the PC debug port (it is often used for I/O
+	 * delays, but the vmexits simply slow things down).
+	 */
+	memset(vmx_io_bitmap_a, 0xff, PAGE_SIZE);
+	clear_bit(0x80, vmx_io_bitmap_a);
+
+	memset(vmx_io_bitmap_b, 0xff, PAGE_SIZE);
+
+	memset(vmx_msr_bitmap_legacy, 0xff, PAGE_SIZE);
+	memset(vmx_msr_bitmap_longmode, 0xff, PAGE_SIZE);
+
+	vmx_disable_intercept_for_msr(MSR_FS_BASE, false);
+	vmx_disable_intercept_for_msr(MSR_GS_BASE, false);
+	vmx_disable_intercept_for_msr(MSR_KERNEL_GS_BASE, true);
+	vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false);
+	vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false);
+	vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false);
+	vmx_disable_intercept_for_msr(MSR_IA32_BNDCFGS, true);
+
+	memcpy(vmx_msr_bitmap_legacy_x2apic,
+			vmx_msr_bitmap_legacy, PAGE_SIZE);
+	memcpy(vmx_msr_bitmap_longmode_x2apic,
+			vmx_msr_bitmap_longmode, PAGE_SIZE);
+
+	if (enable_apicv) {
+		for (msr = 0x800; msr <= 0x8ff; msr++)
+			vmx_disable_intercept_msr_read_x2apic(msr);
+
+		/* According SDM, in x2apic mode, the whole id reg is used.
+		 * But in KVM, it only use the highest eight bits. Need to
+		 * intercept it */
+		vmx_enable_intercept_msr_read_x2apic(0x802);
+		/* TMCCT */
+		vmx_enable_intercept_msr_read_x2apic(0x839);
+		/* TPR */
+		vmx_disable_intercept_msr_write_x2apic(0x808);
+		/* EOI */
+		vmx_disable_intercept_msr_write_x2apic(0x80b);
+		/* SELF-IPI */
+		vmx_disable_intercept_msr_write_x2apic(0x83f);
+	}
+
+	if (enable_ept) {
+		kvm_mmu_set_mask_ptes(0ull,
+			(enable_ept_ad_bits) ? VMX_EPT_ACCESS_BIT : 0ull,
+			(enable_ept_ad_bits) ? VMX_EPT_DIRTY_BIT : 0ull,
+			0ull, VMX_EPT_EXECUTABLE_MASK);
+		ept_set_mmio_spte_mask();
+		kvm_enable_tdp();
+	} else
+		kvm_disable_tdp();
+
+	update_ple_window_actual_max();
+
+	if (setup_vmcs_config(&vmcs_config) < 0) {
+		r = -EIO;
+		goto out7;
+    }
+
+	if (boot_cpu_has(X86_FEATURE_NX))
+		kvm_enable_efer_bits(EFER_NX);
+
+	if (!cpu_has_vmx_vpid())
+		enable_vpid = 0;
+	if (!cpu_has_vmx_shadow_vmcs())
+		enable_shadow_vmcs = 0;
+	if (enable_shadow_vmcs)
+		init_vmcs_shadow_fields();
+
+	if (!cpu_has_vmx_ept() ||
+	    !cpu_has_vmx_ept_4levels()) {
+		enable_ept = 0;
+		enable_unrestricted_guest = 0;
+		enable_ept_ad_bits = 0;
+	}
+
+	if (!cpu_has_vmx_ept_ad_bits())
+		enable_ept_ad_bits = 0;
+
+	if (!cpu_has_vmx_unrestricted_guest())
+		enable_unrestricted_guest = 0;
+
+	if (!cpu_has_vmx_flexpriority()) {
+		flexpriority_enabled = 0;
+
+		/*
+		 * set_apic_access_page_addr() is used to reload apic access
+		 * page upon invalidation.  No need to do anything if the
+		 * processor does not have the APIC_ACCESS_ADDR VMCS field.
+		 */
+		kvm_x86_ops->set_apic_access_page_addr = NULL;
+	}
+
+	if (!cpu_has_vmx_tpr_shadow())
+		kvm_x86_ops->update_cr8_intercept = NULL;
+
+	if (enable_ept && !cpu_has_vmx_ept_2m_page())
+		kvm_disable_largepages();
+
+	if (!cpu_has_vmx_ple())
+		ple_gap = 0;
+
+	if (!cpu_has_vmx_apicv())
+		enable_apicv = 0;
+
+	if (enable_apicv)
+		kvm_x86_ops->update_cr8_intercept = NULL;
+	else {
+		kvm_x86_ops->hwapic_irr_update = NULL;
+		kvm_x86_ops->deliver_posted_interrupt = NULL;
+		kvm_x86_ops->sync_pir_to_irr = vmx_sync_pir_to_irr_dummy;
+	}
+
+	if (nested)
+		nested_vmx_setup_ctls_msrs();
+
+	return alloc_kvm_area();
+
+out7:
+	free_page((unsigned long)vmx_vmwrite_bitmap);
+out6:
+	free_page((unsigned long)vmx_vmread_bitmap);
+out5:
+	free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
+out4:
+	free_page((unsigned long)vmx_msr_bitmap_longmode);
+out3:
+	free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic);
+out2:
+	free_page((unsigned long)vmx_msr_bitmap_legacy);
+out1:
+	free_page((unsigned long)vmx_io_bitmap_b);
+out:
+	free_page((unsigned long)vmx_io_bitmap_a);
+
+    return r;
+}
+
+static __exit void hardware_unsetup(void)
+{
+	free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic);
+	free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
+	free_page((unsigned long)vmx_msr_bitmap_legacy);
+	free_page((unsigned long)vmx_msr_bitmap_longmode);
+	free_page((unsigned long)vmx_io_bitmap_b);
+	free_page((unsigned long)vmx_io_bitmap_a);
+	free_page((unsigned long)vmx_vmwrite_bitmap);
+	free_page((unsigned long)vmx_vmread_bitmap);
+
+	free_kvm_area();
+}
+
 /*
  * Indicate a busy-waiting vcpu in spinlock. We do not enable the PAUSE
  * exiting, so only get here on cpu with PAUSE-Loop-Exiting.
@@ -6361,58 +6557,60 @@ static inline int vmcs_field_readonly(unsigned long field)
  * some of the bits we return here (e.g., on 32-bit guests, only 32 bits of
  * 64-bit fields are to be returned).
  */
-static inline bool vmcs12_read_any(struct kvm_vcpu *vcpu,
-					unsigned long field, u64 *ret)
+static inline int vmcs12_read_any(struct kvm_vcpu *vcpu,
+				  unsigned long field, u64 *ret)
 {
 	short offset = vmcs_field_to_offset(field);
 	char *p;
 
 	if (offset < 0)
-		return 0;
+		return offset;
 
 	p = ((char *)(get_vmcs12(vcpu))) + offset;
 
 	switch (vmcs_field_type(field)) {
 	case VMCS_FIELD_TYPE_NATURAL_WIDTH:
 		*ret = *((natural_width *)p);
-		return 1;
+		return 0;
 	case VMCS_FIELD_TYPE_U16:
 		*ret = *((u16 *)p);
-		return 1;
+		return 0;
 	case VMCS_FIELD_TYPE_U32:
 		*ret = *((u32 *)p);
-		return 1;
+		return 0;
 	case VMCS_FIELD_TYPE_U64:
 		*ret = *((u64 *)p);
-		return 1;
+		return 0;
 	default:
-		return 0; /* can never happen. */
+		WARN_ON(1);
+		return -ENOENT;
 	}
 }
 
 
-static inline bool vmcs12_write_any(struct kvm_vcpu *vcpu,
-				    unsigned long field, u64 field_value){
+static inline int vmcs12_write_any(struct kvm_vcpu *vcpu,
+				   unsigned long field, u64 field_value){
 	short offset = vmcs_field_to_offset(field);
 	char *p = ((char *) get_vmcs12(vcpu)) + offset;
 	if (offset < 0)
-		return false;
+		return offset;
 
 	switch (vmcs_field_type(field)) {
 	case VMCS_FIELD_TYPE_U16:
 		*(u16 *)p = field_value;
-		return true;
+		return 0;
 	case VMCS_FIELD_TYPE_U32:
 		*(u32 *)p = field_value;
-		return true;
+		return 0;
 	case VMCS_FIELD_TYPE_U64:
 		*(u64 *)p = field_value;
-		return true;
+		return 0;
 	case VMCS_FIELD_TYPE_NATURAL_WIDTH:
 		*(natural_width *)p = field_value;
-		return true;
+		return 0;
 	default:
-		return false; /* can never happen. */
+		WARN_ON(1);
+		return -ENOENT;
 	}
 
 }
@@ -6445,6 +6643,9 @@ static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx)
 		case VMCS_FIELD_TYPE_NATURAL_WIDTH:
 			field_value = vmcs_readl(field);
 			break;
+		default:
+			WARN_ON(1);
+			continue;
 		}
 		vmcs12_write_any(&vmx->vcpu, field, field_value);
 	}
@@ -6490,6 +6691,9 @@ static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx)
 			case VMCS_FIELD_TYPE_NATURAL_WIDTH:
 				vmcs_writel(field, (long)field_value);
 				break;
+			default:
+				WARN_ON(1);
+				break;
 			}
 		}
 	}
@@ -6528,7 +6732,7 @@ static int handle_vmread(struct kvm_vcpu *vcpu)
 	/* Decode instruction info and find the field to read */
 	field = kvm_register_readl(vcpu, (((vmx_instruction_info) >> 28) & 0xf));
 	/* Read the field, zero-extended to a u64 field_value */
-	if (!vmcs12_read_any(vcpu, field, &field_value)) {
+	if (vmcs12_read_any(vcpu, field, &field_value) < 0) {
 		nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
 		skip_emulated_instruction(vcpu);
 		return 1;
@@ -6598,7 +6802,7 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu)
 		return 1;
 	}
 
-	if (!vmcs12_write_any(vcpu, field, field_value)) {
+	if (vmcs12_write_any(vcpu, field, field_value) < 0) {
 		nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
 		skip_emulated_instruction(vcpu);
 		return 1;
@@ -6802,6 +7006,8 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
 	[EXIT_REASON_MONITOR_INSTRUCTION]     = handle_monitor,
 	[EXIT_REASON_INVEPT]                  = handle_invept,
 	[EXIT_REASON_INVVPID]                 = handle_invvpid,
+	[EXIT_REASON_XSAVES]                  = handle_xsaves,
+	[EXIT_REASON_XRSTORS]                 = handle_xrstors,
 };
 
 static const int kvm_vmx_max_exit_handlers =
@@ -7089,6 +7295,14 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
 		return nested_cpu_has2(vmcs12, SECONDARY_EXEC_WBINVD_EXITING);
 	case EXIT_REASON_XSETBV:
 		return 1;
+	case EXIT_REASON_XSAVES: case EXIT_REASON_XRSTORS:
+		/*
+		 * This should never happen, since it is not possible to
+		 * set XSS to a non-zero value---neither in L1 nor in L2.
+		 * If if it were, XSS would have to be checked against
+		 * the XSS exit bitmap in vmcs12.
+		 */
+		return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES);
 	default:
 		return 1;
 	}
@@ -7277,6 +7491,9 @@ static void vmx_set_rvi(int vector)
 	u16 status;
 	u8 old;
 
+	if (vector == -1)
+		vector = 0;
+
 	status = vmcs_read16(GUEST_INTR_STATUS);
 	old = (u8)status & 0xff;
 	if ((u8)vector != old) {
@@ -7288,22 +7505,23 @@ static void vmx_set_rvi(int vector)
 
 static void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr)
 {
+	if (!is_guest_mode(vcpu)) {
+		vmx_set_rvi(max_irr);
+		return;
+	}
+
 	if (max_irr == -1)
 		return;
 
 	/*
-	 * If a vmexit is needed, vmx_check_nested_events handles it.
+	 * In guest mode.  If a vmexit is needed, vmx_check_nested_events
+	 * handles it.
 	 */
-	if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu))
+	if (nested_exit_on_intr(vcpu))
 		return;
 
-	if (!is_guest_mode(vcpu)) {
-		vmx_set_rvi(max_irr);
-		return;
-	}
-
 	/*
-	 * Fall back to pre-APICv interrupt injection since L2
+	 * Else, fall back to pre-APICv interrupt injection since L2
 	 * is run without virtual interrupt delivery.
 	 */
 	if (!kvm_event_needs_reinjection(vcpu) &&
@@ -7400,6 +7618,12 @@ static bool vmx_mpx_supported(void)
 		(vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_BNDCFGS);
 }
 
+static bool vmx_xsaves_supported(void)
+{
+	return vmcs_config.cpu_based_2nd_exec_ctrl &
+		SECONDARY_EXEC_XSAVES;
+}
+
 static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx)
 {
 	u32 exit_intr_info;
@@ -8135,6 +8359,8 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 	vmcs_writel(GUEST_SYSENTER_ESP, vmcs12->guest_sysenter_esp);
 	vmcs_writel(GUEST_SYSENTER_EIP, vmcs12->guest_sysenter_eip);
 
+	if (nested_cpu_has_xsaves(vmcs12))
+		vmcs_write64(XSS_EXIT_BITMAP, vmcs12->xss_exit_bitmap);
 	vmcs_write64(VMCS_LINK_POINTER, -1ull);
 
 	exec_control = vmcs12->pin_based_vm_exec_control;
@@ -8775,6 +9001,8 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
 	vmcs12->guest_sysenter_eip = vmcs_readl(GUEST_SYSENTER_EIP);
 	if (vmx_mpx_supported())
 		vmcs12->guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS);
+	if (nested_cpu_has_xsaves(vmcs12))
+		vmcs12->xss_exit_bitmap = vmcs_read64(XSS_EXIT_BITMAP);
 
 	/* update exit information fields: */
 
@@ -9176,6 +9404,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
 	.check_intercept = vmx_check_intercept,
 	.handle_external_intr = vmx_handle_external_intr,
 	.mpx_supported = vmx_mpx_supported,
+	.xsaves_supported = vmx_xsaves_supported,
 
 	.check_nested_events = vmx_check_nested_events,
 
@@ -9184,150 +9413,21 @@ static struct kvm_x86_ops vmx_x86_ops = {
 
 static int __init vmx_init(void)
 {
-	int r, i, msr;
-
-	rdmsrl_safe(MSR_EFER, &host_efer);
-
-	for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i)
-		kvm_define_shared_msr(i, vmx_msr_index[i]);
-
-	vmx_io_bitmap_a = (unsigned long *)__get_free_page(GFP_KERNEL);
-	if (!vmx_io_bitmap_a)
-		return -ENOMEM;
-
-	r = -ENOMEM;
-
-	vmx_io_bitmap_b = (unsigned long *)__get_free_page(GFP_KERNEL);
-	if (!vmx_io_bitmap_b)
-		goto out;
-
-	vmx_msr_bitmap_legacy = (unsigned long *)__get_free_page(GFP_KERNEL);
-	if (!vmx_msr_bitmap_legacy)
-		goto out1;
-
-	vmx_msr_bitmap_legacy_x2apic =
-				(unsigned long *)__get_free_page(GFP_KERNEL);
-	if (!vmx_msr_bitmap_legacy_x2apic)
-		goto out2;
-
-	vmx_msr_bitmap_longmode = (unsigned long *)__get_free_page(GFP_KERNEL);
-	if (!vmx_msr_bitmap_longmode)
-		goto out3;
-
-	vmx_msr_bitmap_longmode_x2apic =
-				(unsigned long *)__get_free_page(GFP_KERNEL);
-	if (!vmx_msr_bitmap_longmode_x2apic)
-		goto out4;
-	vmx_vmread_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
-	if (!vmx_vmread_bitmap)
-		goto out5;
-
-	vmx_vmwrite_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
-	if (!vmx_vmwrite_bitmap)
-		goto out6;
-
-	memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE);
-	memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE);
-
-	/*
-	 * Allow direct access to the PC debug port (it is often used for I/O
-	 * delays, but the vmexits simply slow things down).
-	 */
-	memset(vmx_io_bitmap_a, 0xff, PAGE_SIZE);
-	clear_bit(0x80, vmx_io_bitmap_a);
-
-	memset(vmx_io_bitmap_b, 0xff, PAGE_SIZE);
-
-	memset(vmx_msr_bitmap_legacy, 0xff, PAGE_SIZE);
-	memset(vmx_msr_bitmap_longmode, 0xff, PAGE_SIZE);
-
-	set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */
-
-	r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx),
-		     __alignof__(struct vcpu_vmx), THIS_MODULE);
+	int r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx),
+                     __alignof__(struct vcpu_vmx), THIS_MODULE);
 	if (r)
-		goto out7;
+		return r;
 
 #ifdef CONFIG_KEXEC
 	rcu_assign_pointer(crash_vmclear_loaded_vmcss,
 			   crash_vmclear_local_loaded_vmcss);
 #endif
 
-	vmx_disable_intercept_for_msr(MSR_FS_BASE, false);
-	vmx_disable_intercept_for_msr(MSR_GS_BASE, false);
-	vmx_disable_intercept_for_msr(MSR_KERNEL_GS_BASE, true);
-	vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false);
-	vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false);
-	vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false);
-	vmx_disable_intercept_for_msr(MSR_IA32_BNDCFGS, true);
-
-	memcpy(vmx_msr_bitmap_legacy_x2apic,
-			vmx_msr_bitmap_legacy, PAGE_SIZE);
-	memcpy(vmx_msr_bitmap_longmode_x2apic,
-			vmx_msr_bitmap_longmode, PAGE_SIZE);
-
-	if (enable_apicv) {
-		for (msr = 0x800; msr <= 0x8ff; msr++)
-			vmx_disable_intercept_msr_read_x2apic(msr);
-
-		/* According SDM, in x2apic mode, the whole id reg is used.
-		 * But in KVM, it only use the highest eight bits. Need to
-		 * intercept it */
-		vmx_enable_intercept_msr_read_x2apic(0x802);
-		/* TMCCT */
-		vmx_enable_intercept_msr_read_x2apic(0x839);
-		/* TPR */
-		vmx_disable_intercept_msr_write_x2apic(0x808);
-		/* EOI */
-		vmx_disable_intercept_msr_write_x2apic(0x80b);
-		/* SELF-IPI */
-		vmx_disable_intercept_msr_write_x2apic(0x83f);
-	}
-
-	if (enable_ept) {
-		kvm_mmu_set_mask_ptes(0ull,
-			(enable_ept_ad_bits) ? VMX_EPT_ACCESS_BIT : 0ull,
-			(enable_ept_ad_bits) ? VMX_EPT_DIRTY_BIT : 0ull,
-			0ull, VMX_EPT_EXECUTABLE_MASK);
-		ept_set_mmio_spte_mask();
-		kvm_enable_tdp();
-	} else
-		kvm_disable_tdp();
-
-	update_ple_window_actual_max();
-
 	return 0;
-
-out7:
-	free_page((unsigned long)vmx_vmwrite_bitmap);
-out6:
-	free_page((unsigned long)vmx_vmread_bitmap);
-out5:
-	free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
-out4:
-	free_page((unsigned long)vmx_msr_bitmap_longmode);
-out3:
-	free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic);
-out2:
-	free_page((unsigned long)vmx_msr_bitmap_legacy);
-out1:
-	free_page((unsigned long)vmx_io_bitmap_b);
-out:
-	free_page((unsigned long)vmx_io_bitmap_a);
-	return r;
 }
 
 static void __exit vmx_exit(void)
 {
-	free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic);
-	free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
-	free_page((unsigned long)vmx_msr_bitmap_legacy);
-	free_page((unsigned long)vmx_msr_bitmap_longmode);
-	free_page((unsigned long)vmx_io_bitmap_b);
-	free_page((unsigned long)vmx_io_bitmap_a);
-	free_page((unsigned long)vmx_vmwrite_bitmap);
-	free_page((unsigned long)vmx_vmread_bitmap);
-
 #ifdef CONFIG_KEXEC
 	RCU_INIT_POINTER(crash_vmclear_loaded_vmcss, NULL);
 	synchronize_rcu();
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 0033df32a74..c259814200b 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -27,6 +27,7 @@
 #include "kvm_cache_regs.h"
 #include "x86.h"
 #include "cpuid.h"
+#include "assigned-dev.h"
 
 #include <linux/clocksource.h>
 #include <linux/interrupt.h>
@@ -353,6 +354,8 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
 
 	if (!vcpu->arch.exception.pending) {
 	queue:
+		if (has_error && !is_protmode(vcpu))
+			has_error = false;
 		vcpu->arch.exception.pending = true;
 		vcpu->arch.exception.has_error_code = has_error;
 		vcpu->arch.exception.nr = nr;
@@ -455,6 +458,16 @@ bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl)
 }
 EXPORT_SYMBOL_GPL(kvm_require_cpl);
 
+bool kvm_require_dr(struct kvm_vcpu *vcpu, int dr)
+{
+	if ((dr != 4 && dr != 5) || !kvm_read_cr4_bits(vcpu, X86_CR4_DE))
+		return true;
+
+	kvm_queue_exception(vcpu, UD_VECTOR);
+	return false;
+}
+EXPORT_SYMBOL_GPL(kvm_require_dr);
+
 /*
  * This function will be used to read from the physical memory of the currently
  * running guest. The difference to kvm_read_guest_page is that this function
@@ -656,6 +669,12 @@ int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
 	if ((!(xcr0 & XSTATE_BNDREGS)) != (!(xcr0 & XSTATE_BNDCSR)))
 		return 1;
 
+	if (xcr0 & XSTATE_AVX512) {
+		if (!(xcr0 & XSTATE_YMM))
+			return 1;
+		if ((xcr0 & XSTATE_AVX512) != XSTATE_AVX512)
+			return 1;
+	}
 	kvm_put_guest_xcr0(vcpu);
 	vcpu->arch.xcr0 = xcr0;
 
@@ -732,6 +751,10 @@ EXPORT_SYMBOL_GPL(kvm_set_cr4);
 
 int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
 {
+#ifdef CONFIG_X86_64
+	cr3 &= ~CR3_PCID_INVD;
+#endif
+
 	if (cr3 == kvm_read_cr3(vcpu) && !pdptrs_changed(vcpu)) {
 		kvm_mmu_sync_roots(vcpu);
 		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
@@ -811,8 +834,6 @@ static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
 			vcpu->arch.eff_db[dr] = val;
 		break;
 	case 4:
-		if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
-			return 1; /* #UD */
 		/* fall through */
 	case 6:
 		if (val & 0xffffffff00000000ULL)
@@ -821,8 +842,6 @@ static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
 		kvm_update_dr6(vcpu);
 		break;
 	case 5:
-		if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
-			return 1; /* #UD */
 		/* fall through */
 	default: /* 7 */
 		if (val & 0xffffffff00000000ULL)
@@ -837,27 +856,21 @@ static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
 
 int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
 {
-	int res;
-
-	res = __kvm_set_dr(vcpu, dr, val);
-	if (res > 0)
-		kvm_queue_exception(vcpu, UD_VECTOR);
-	else if (res < 0)
+	if (__kvm_set_dr(vcpu, dr, val)) {
 		kvm_inject_gp(vcpu, 0);
-
-	return res;
+		return 1;
+	}
+	return 0;
 }
 EXPORT_SYMBOL_GPL(kvm_set_dr);
 
-static int _kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val)
+int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val)
 {
 	switch (dr) {
 	case 0 ... 3:
 		*val = vcpu->arch.db[dr];
 		break;
 	case 4:
-		if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
-			return 1;
 		/* fall through */
 	case 6:
 		if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
@@ -866,23 +879,11 @@ static int _kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val)
 			*val = kvm_x86_ops->get_dr6(vcpu);
 		break;
 	case 5:
-		if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
-			return 1;
 		/* fall through */
 	default: /* 7 */
 		*val = vcpu->arch.dr7;
 		break;
 	}
-
-	return 0;
-}
-
-int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val)
-{
-	if (_kvm_get_dr(vcpu, dr, val)) {
-		kvm_queue_exception(vcpu, UD_VECTOR);
-		return 1;
-	}
 	return 0;
 }
 EXPORT_SYMBOL_GPL(kvm_get_dr);
@@ -1237,21 +1238,22 @@ void kvm_track_tsc_matching(struct kvm_vcpu *vcpu)
 {
 #ifdef CONFIG_X86_64
 	bool vcpus_matched;
-	bool do_request = false;
 	struct kvm_arch *ka = &vcpu->kvm->arch;
 	struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
 
 	vcpus_matched = (ka->nr_vcpus_matched_tsc + 1 ==
 			 atomic_read(&vcpu->kvm->online_vcpus));
 
-	if (vcpus_matched && gtod->clock.vclock_mode == VCLOCK_TSC)
-		if (!ka->use_master_clock)
-			do_request = 1;
-
-	if (!vcpus_matched && ka->use_master_clock)
-			do_request = 1;
-
-	if (do_request)
+	/*
+	 * Once the masterclock is enabled, always perform request in
+	 * order to update it.
+	 *
+	 * In order to enable masterclock, the host clocksource must be TSC
+	 * and the vcpus need to have matched TSCs.  When that happens,
+	 * perform request to enable masterclock.
+	 */
+	if (ka->use_master_clock ||
+	    (gtod->clock.vclock_mode == VCLOCK_TSC && vcpus_matched))
 		kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
 
 	trace_kvm_track_tsc(vcpu->vcpu_id, ka->nr_vcpus_matched_tsc,
@@ -1637,16 +1639,16 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
 	vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset;
 	vcpu->last_guest_tsc = tsc_timestamp;
 
+	if (unlikely(kvm_read_guest_cached(v->kvm, &vcpu->pv_time,
+		&guest_hv_clock, sizeof(guest_hv_clock))))
+		return 0;
+
 	/*
 	 * The interface expects us to write an even number signaling that the
 	 * update is finished. Since the guest won't see the intermediate
 	 * state, we just increase by 2 at the end.
 	 */
-	vcpu->hv_clock.version += 2;
-
-	if (unlikely(kvm_read_guest_cached(v->kvm, &vcpu->pv_time,
-		&guest_hv_clock, sizeof(guest_hv_clock))))
-		return 0;
+	vcpu->hv_clock.version = guest_hv_clock.version + 2;
 
 	/* retain PVCLOCK_GUEST_STOPPED if set in guest copy */
 	pvclock_flags = (guest_hv_clock.flags & PVCLOCK_GUEST_STOPPED);
@@ -1662,6 +1664,8 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
 
 	vcpu->hv_clock.flags = pvclock_flags;
 
+	trace_kvm_pvclock_update(v->vcpu_id, &vcpu->hv_clock);
+
 	kvm_write_guest_cached(v->kvm, &vcpu->pv_time,
 				&vcpu->hv_clock,
 				sizeof(vcpu->hv_clock));
@@ -2140,7 +2144,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 	case MSR_IA32_TSC_ADJUST:
 		if (guest_cpuid_has_tsc_adjust(vcpu)) {
 			if (!msr_info->host_initiated) {
-				u64 adj = data - vcpu->arch.ia32_tsc_adjust_msr;
+				s64 adj = data - vcpu->arch.ia32_tsc_adjust_msr;
 				kvm_x86_ops->adjust_tsc_offset(vcpu, adj, true);
 			}
 			vcpu->arch.ia32_tsc_adjust_msr = data;
@@ -3106,7 +3110,7 @@ static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu,
 	unsigned long val;
 
 	memcpy(dbgregs->db, vcpu->arch.db, sizeof(vcpu->arch.db));
-	_kvm_get_dr(vcpu, 6, &val);
+	kvm_get_dr(vcpu, 6, &val);
 	dbgregs->dr6 = val;
 	dbgregs->dr7 = vcpu->arch.dr7;
 	dbgregs->flags = 0;
@@ -3128,15 +3132,89 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
 	return 0;
 }
 
+#define XSTATE_COMPACTION_ENABLED (1ULL << 63)
+
+static void fill_xsave(u8 *dest, struct kvm_vcpu *vcpu)
+{
+	struct xsave_struct *xsave = &vcpu->arch.guest_fpu.state->xsave;
+	u64 xstate_bv = xsave->xsave_hdr.xstate_bv;
+	u64 valid;
+
+	/*
+	 * Copy legacy XSAVE area, to avoid complications with CPUID
+	 * leaves 0 and 1 in the loop below.
+	 */
+	memcpy(dest, xsave, XSAVE_HDR_OFFSET);
+
+	/* Set XSTATE_BV */
+	*(u64 *)(dest + XSAVE_HDR_OFFSET) = xstate_bv;
+
+	/*
+	 * Copy each region from the possibly compacted offset to the
+	 * non-compacted offset.
+	 */
+	valid = xstate_bv & ~XSTATE_FPSSE;
+	while (valid) {
+		u64 feature = valid & -valid;
+		int index = fls64(feature) - 1;
+		void *src = get_xsave_addr(xsave, feature);
+
+		if (src) {
+			u32 size, offset, ecx, edx;
+			cpuid_count(XSTATE_CPUID, index,
+				    &size, &offset, &ecx, &edx);
+			memcpy(dest + offset, src, size);
+		}
+
+		valid -= feature;
+	}
+}
+
+static void load_xsave(struct kvm_vcpu *vcpu, u8 *src)
+{
+	struct xsave_struct *xsave = &vcpu->arch.guest_fpu.state->xsave;
+	u64 xstate_bv = *(u64 *)(src + XSAVE_HDR_OFFSET);
+	u64 valid;
+
+	/*
+	 * Copy legacy XSAVE area, to avoid complications with CPUID
+	 * leaves 0 and 1 in the loop below.
+	 */
+	memcpy(xsave, src, XSAVE_HDR_OFFSET);
+
+	/* Set XSTATE_BV and possibly XCOMP_BV.  */
+	xsave->xsave_hdr.xstate_bv = xstate_bv;
+	if (cpu_has_xsaves)
+		xsave->xsave_hdr.xcomp_bv = host_xcr0 | XSTATE_COMPACTION_ENABLED;
+
+	/*
+	 * Copy each region from the non-compacted offset to the
+	 * possibly compacted offset.
+	 */
+	valid = xstate_bv & ~XSTATE_FPSSE;
+	while (valid) {
+		u64 feature = valid & -valid;
+		int index = fls64(feature) - 1;
+		void *dest = get_xsave_addr(xsave, feature);
+
+		if (dest) {
+			u32 size, offset, ecx, edx;
+			cpuid_count(XSTATE_CPUID, index,
+				    &size, &offset, &ecx, &edx);
+			memcpy(dest, src + offset, size);
+		} else
+			WARN_ON_ONCE(1);
+
+		valid -= feature;
+	}
+}
+
 static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
 					 struct kvm_xsave *guest_xsave)
 {
 	if (cpu_has_xsave) {
-		memcpy(guest_xsave->region,
-			&vcpu->arch.guest_fpu.state->xsave,
-			vcpu->arch.guest_xstate_size);
-		*(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)] &=
-			vcpu->arch.guest_supported_xcr0 | XSTATE_FPSSE;
+		memset(guest_xsave, 0, sizeof(struct kvm_xsave));
+		fill_xsave((u8 *) guest_xsave->region, vcpu);
 	} else {
 		memcpy(guest_xsave->region,
 			&vcpu->arch.guest_fpu.state->fxsave,
@@ -3160,8 +3238,7 @@ static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
 		 */
 		if (xstate_bv & ~kvm_supported_xcr0())
 			return -EINVAL;
-		memcpy(&vcpu->arch.guest_fpu.state->xsave,
-			guest_xsave->region, vcpu->arch.guest_xstate_size);
+		load_xsave(vcpu, (u8 *)guest_xsave->region);
 	} else {
 		if (xstate_bv & ~XSTATE_FPSSE)
 			return -EINVAL;
@@ -4004,7 +4081,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
 	}
 
 	default:
-		;
+		r = kvm_vm_ioctl_assigned_device(kvm, ioctl, arg);
 	}
 out:
 	return r;
@@ -4667,7 +4744,7 @@ static void emulator_wbinvd(struct x86_emulate_ctxt *ctxt)
 
 int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest)
 {
-	return _kvm_get_dr(emul_to_vcpu(ctxt), dr, dest);
+	return kvm_get_dr(emul_to_vcpu(ctxt), dr, dest);
 }
 
 int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value)
@@ -5211,21 +5288,17 @@ static void kvm_vcpu_check_singlestep(struct kvm_vcpu *vcpu, unsigned long rflag
 
 static bool kvm_vcpu_check_breakpoint(struct kvm_vcpu *vcpu, int *r)
 {
-	struct kvm_run *kvm_run = vcpu->run;
-	unsigned long eip = vcpu->arch.emulate_ctxt.eip;
-	u32 dr6 = 0;
-
 	if (unlikely(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) &&
 	    (vcpu->arch.guest_debug_dr7 & DR7_BP_EN_MASK)) {
-		dr6 = kvm_vcpu_check_hw_bp(eip, 0,
+		struct kvm_run *kvm_run = vcpu->run;
+		unsigned long eip = kvm_get_linear_rip(vcpu);
+		u32 dr6 = kvm_vcpu_check_hw_bp(eip, 0,
 					   vcpu->arch.guest_debug_dr7,
 					   vcpu->arch.eff_db);
 
 		if (dr6 != 0) {
 			kvm_run->debug.arch.dr6 = dr6 | DR6_FIXED_1 | DR6_RTM;
-			kvm_run->debug.arch.pc = kvm_rip_read(vcpu) +
-				get_segment_base(vcpu, VCPU_SREG_CS);
-
+			kvm_run->debug.arch.pc = eip;
 			kvm_run->debug.arch.exception = DB_VECTOR;
 			kvm_run->exit_reason = KVM_EXIT_DEBUG;
 			*r = EMULATE_USER_EXIT;
@@ -5235,7 +5308,8 @@ static bool kvm_vcpu_check_breakpoint(struct kvm_vcpu *vcpu, int *r)
 
 	if (unlikely(vcpu->arch.dr7 & DR7_BP_EN_MASK) &&
 	    !(kvm_get_rflags(vcpu) & X86_EFLAGS_RF)) {
-		dr6 = kvm_vcpu_check_hw_bp(eip, 0,
+		unsigned long eip = kvm_get_linear_rip(vcpu);
+		u32 dr6 = kvm_vcpu_check_hw_bp(eip, 0,
 					   vcpu->arch.dr7,
 					   vcpu->arch.db);
 
@@ -5365,7 +5439,9 @@ restart:
 		kvm_rip_write(vcpu, ctxt->eip);
 		if (r == EMULATE_DONE)
 			kvm_vcpu_check_singlestep(vcpu, rflags, &r);
-		__kvm_set_rflags(vcpu, ctxt->eflags);
+		if (!ctxt->have_exception ||
+		    exception_type(ctxt->exception.vector) == EXCPT_TRAP)
+			__kvm_set_rflags(vcpu, ctxt->eflags);
 
 		/*
 		 * For STI, interrupts are shadowed; so KVM_REQ_EVENT will
@@ -5965,6 +6041,12 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win)
 			__kvm_set_rflags(vcpu, kvm_get_rflags(vcpu) |
 					     X86_EFLAGS_RF);
 
+		if (vcpu->arch.exception.nr == DB_VECTOR &&
+		    (vcpu->arch.dr7 & DR7_GD)) {
+			vcpu->arch.dr7 &= ~DR7_GD;
+			kvm_update_dr7(vcpu);
+		}
+
 		kvm_x86_ops->queue_exception(vcpu, vcpu->arch.exception.nr,
 					  vcpu->arch.exception.has_error_code,
 					  vcpu->arch.exception.error_code,
@@ -6873,6 +6955,9 @@ int fx_init(struct kvm_vcpu *vcpu)
 		return err;
 
 	fpu_finit(&vcpu->arch.guest_fpu);
+	if (cpu_has_xsaves)
+		vcpu->arch.guest_fpu.state->xsave.xsave_hdr.xcomp_bv =
+			host_xcr0 | XSTATE_COMPACTION_ENABLED;
 
 	/*
 	 * Ensure guest xcr0 is valid for loading
@@ -7024,7 +7109,7 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu)
 	kvm_x86_ops->vcpu_reset(vcpu);
 }
 
-void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, unsigned int vector)
+void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
 {
 	struct kvm_segment cs;
 
@@ -7256,6 +7341,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 	if (type)
 		return -EINVAL;
 
+	INIT_HLIST_HEAD(&kvm->arch.mask_notifier_list);
 	INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
 	INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages);
 	INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
@@ -7536,12 +7622,18 @@ int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu)
 	return kvm_x86_ops->interrupt_allowed(vcpu);
 }
 
-bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip)
+unsigned long kvm_get_linear_rip(struct kvm_vcpu *vcpu)
 {
-	unsigned long current_rip = kvm_rip_read(vcpu) +
-		get_segment_base(vcpu, VCPU_SREG_CS);
+	if (is_64_bit_mode(vcpu))
+		return kvm_rip_read(vcpu);
+	return (u32)(get_segment_base(vcpu, VCPU_SREG_CS) +
+		     kvm_rip_read(vcpu));
+}
+EXPORT_SYMBOL_GPL(kvm_get_linear_rip);
 
-	return current_rip == linear_rip;
+bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip)
+{
+	return kvm_get_linear_rip(vcpu) == linear_rip;
 }
 EXPORT_SYMBOL_GPL(kvm_is_linear_rip);
 
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 7cb9c45a5fe..cc1d61af614 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -162,7 +162,8 @@ int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt,
 bool kvm_mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data);
 
 #define KVM_SUPPORTED_XCR0     (XSTATE_FP | XSTATE_SSE | XSTATE_YMM \
-				| XSTATE_BNDREGS | XSTATE_BNDCSR)
+				| XSTATE_BNDREGS | XSTATE_BNDCSR \
+				| XSTATE_AVX512)
 extern u64 host_xcr0;
 
 extern u64 kvm_supported_xcr0(void);
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index b74a7e130b0..38dcec403b4 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -1247,7 +1247,7 @@ good_area:
 		}
 
 		/* User mode? Just return to handle the fatal exception */
-		if (fault & FAULT_FLAG_USER)
+		if (flags & FAULT_FLAG_USER)
 			return;
 
 		/* Not returning to user mode? Handle exceptions or die: */
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 27b71a0b72d..3ec85dfce12 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -2370,8 +2370,12 @@ static void rbd_img_obj_request_fill(struct rbd_obj_request *obj_request,
 		opcode = CEPH_OSD_OP_READ;
 	}
 
-	osd_req_op_extent_init(osd_request, num_ops, opcode, offset, length,
-				0, 0);
+	if (opcode == CEPH_OSD_OP_DELETE)
+		osd_req_op_init(osd_request, num_ops, opcode);
+	else
+		osd_req_op_extent_init(osd_request, num_ops, opcode,
+				       offset, length, 0, 0);
+
 	if (obj_request->type == OBJ_REQUEST_BIO)
 		osd_req_op_extent_osd_data_bio(osd_request, num_ops,
 					obj_request->bio_list, length);
@@ -3405,8 +3409,7 @@ err_rq:
 	if (result)
 		rbd_warn(rbd_dev, "%s %llx at %llx result %d",
 			 obj_op_name(op_type), length, offset, result);
-	if (snapc)
-		ceph_put_snap_context(snapc);
+	ceph_put_snap_context(snapc);
 	blk_end_request_all(rq, result);
 }
 
diff --git a/drivers/hwmon/lm75.c b/drivers/hwmon/lm75.c
index 6753fd940c7..fe41d5ae7cb 100644
--- a/drivers/hwmon/lm75.c
+++ b/drivers/hwmon/lm75.c
@@ -177,6 +177,10 @@ static struct attribute *lm75_attrs[] = {
 };
 ATTRIBUTE_GROUPS(lm75);
 
+static const struct thermal_zone_of_device_ops lm75_of_thermal_ops = {
+	.get_temp = lm75_read_temp,
+};
+
 /*-----------------------------------------------------------------------*/
 
 /* device probe and removal */
@@ -296,10 +300,9 @@ lm75_probe(struct i2c_client *client, const struct i2c_device_id *id)
 	if (IS_ERR(data->hwmon_dev))
 		return PTR_ERR(data->hwmon_dev);
 
-	data->tz = thermal_zone_of_sensor_register(data->hwmon_dev,
-						   0,
+	data->tz = thermal_zone_of_sensor_register(data->hwmon_dev, 0,
 						   data->hwmon_dev,
-						   lm75_read_temp, NULL);
+						   &lm75_of_thermal_ops);
 	if (IS_ERR(data->tz))
 		data->tz = NULL;
 
diff --git a/drivers/hwmon/ntc_thermistor.c b/drivers/hwmon/ntc_thermistor.c
index fd9a945fe8d..112e4d45e4a 100644
--- a/drivers/hwmon/ntc_thermistor.c
+++ b/drivers/hwmon/ntc_thermistor.c
@@ -486,6 +486,10 @@ static const struct attribute_group ntc_attr_group = {
 	.attrs = ntc_attributes,
 };
 
+static const struct thermal_zone_of_device_ops ntc_of_thermal_ops = {
+	.get_temp = ntc_read_temp,
+};
+
 static int ntc_thermistor_probe(struct platform_device *pdev)
 {
 	const struct of_device_id *of_id =
@@ -579,7 +583,7 @@ static int ntc_thermistor_probe(struct platform_device *pdev)
 								pdev_id->name);
 
 	data->tz = thermal_zone_of_sensor_register(data->dev, 0, data->dev,
-						ntc_read_temp, NULL);
+						   &ntc_of_thermal_ops);
 	if (IS_ERR(data->tz)) {
 		dev_dbg(&pdev->dev, "Failed to register to thermal fw.\n");
 		data->tz = NULL;
diff --git a/drivers/hwmon/tmp102.c b/drivers/hwmon/tmp102.c
index 51719956cc0..ba9f478f64e 100644
--- a/drivers/hwmon/tmp102.c
+++ b/drivers/hwmon/tmp102.c
@@ -158,6 +158,10 @@ ATTRIBUTE_GROUPS(tmp102);
 #define TMP102_CONFIG  (TMP102_CONF_TM | TMP102_CONF_EM | TMP102_CONF_CR1)
 #define TMP102_CONFIG_RD_ONLY (TMP102_CONF_R0 | TMP102_CONF_R1 | TMP102_CONF_AL)
 
+static const struct thermal_zone_of_device_ops tmp102_of_thermal_ops = {
+	.get_temp = tmp102_read_temp,
+};
+
 static int tmp102_probe(struct i2c_client *client,
 				  const struct i2c_device_id *id)
 {
@@ -215,7 +219,7 @@ static int tmp102_probe(struct i2c_client *client,
 	}
 	tmp102->hwmon_dev = hwmon_dev;
 	tmp102->tz = thermal_zone_of_sensor_register(hwmon_dev, 0, hwmon_dev,
-						     tmp102_read_temp, NULL);
+						     &tmp102_of_thermal_ops);
 	if (IS_ERR(tmp102->tz))
 		tmp102->tz = NULL;
 
diff --git a/drivers/input/gameport/gameport.c b/drivers/input/gameport/gameport.c
index e29c04e2aff..e853a213468 100644
--- a/drivers/input/gameport/gameport.c
+++ b/drivers/input/gameport/gameport.c
@@ -527,14 +527,14 @@ EXPORT_SYMBOL(gameport_set_phys);
  */
 static void gameport_init_port(struct gameport *gameport)
 {
-	static atomic_t gameport_no = ATOMIC_INIT(0);
+	static atomic_t gameport_no = ATOMIC_INIT(-1);
 
 	__module_get(THIS_MODULE);
 
 	mutex_init(&gameport->drv_mutex);
 	device_initialize(&gameport->dev);
 	dev_set_name(&gameport->dev, "gameport%lu",
-			(unsigned long)atomic_inc_return(&gameport_no) - 1);
+			(unsigned long)atomic_inc_return(&gameport_no));
 	gameport->dev.bus = &gameport_bus;
 	gameport->dev.release = gameport_release_port;
 	if (gameport->parent)
diff --git a/drivers/input/input.c b/drivers/input/input.c
index 0f175f55782..04217c2e345 100644
--- a/drivers/input/input.c
+++ b/drivers/input/input.c
@@ -1775,7 +1775,7 @@ EXPORT_SYMBOL_GPL(input_class);
  */
 struct input_dev *input_allocate_device(void)
 {
-	static atomic_t input_no = ATOMIC_INIT(0);
+	static atomic_t input_no = ATOMIC_INIT(-1);
 	struct input_dev *dev;
 
 	dev = kzalloc(sizeof(struct input_dev), GFP_KERNEL);
@@ -1790,7 +1790,7 @@ struct input_dev *input_allocate_device(void)
 		INIT_LIST_HEAD(&dev->node);
 
 		dev_set_name(&dev->dev, "input%lu",
-			     (unsigned long) atomic_inc_return(&input_no) - 1);
+			     (unsigned long)atomic_inc_return(&input_no));
 
 		__module_get(THIS_MODULE);
 	}
diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c
index fc55f0d15b7..3aa2f3f3da5 100644
--- a/drivers/input/joystick/xpad.c
+++ b/drivers/input/joystick/xpad.c
@@ -886,8 +886,8 @@ static void xpad_led_set(struct led_classdev *led_cdev,
 
 static int xpad_led_probe(struct usb_xpad *xpad)
 {
-	static atomic_t led_seq	= ATOMIC_INIT(0);
-	long led_no;
+	static atomic_t led_seq	= ATOMIC_INIT(-1);
+	unsigned long led_no;
 	struct xpad_led *led;
 	struct led_classdev *led_cdev;
 	int error;
@@ -899,9 +899,9 @@ static int xpad_led_probe(struct usb_xpad *xpad)
 	if (!led)
 		return -ENOMEM;
 
-	led_no = (long)atomic_inc_return(&led_seq) - 1;
+	led_no = atomic_inc_return(&led_seq);
 
-	snprintf(led->name, sizeof(led->name), "xpad%ld", led_no);
+	snprintf(led->name, sizeof(led->name), "xpad%lu", led_no);
 	led->xpad = xpad;
 
 	led_cdev = &led->led_cdev;
diff --git a/drivers/input/keyboard/Kconfig b/drivers/input/keyboard/Kconfig
index a3958c63d7d..96ee26c555e 100644
--- a/drivers/input/keyboard/Kconfig
+++ b/drivers/input/keyboard/Kconfig
@@ -665,14 +665,14 @@ config KEYBOARD_CROS_EC
 	  To compile this driver as a module, choose M here: the
 	  module will be called cros_ec_keyb.
 
-config KEYBOARD_CAP1106
-	tristate "Microchip CAP1106 touch sensor"
+config KEYBOARD_CAP11XX
+	tristate "Microchip CAP11XX based touch sensors"
 	depends on OF && I2C
 	select REGMAP_I2C
 	help
-	  Say Y here to enable the CAP1106 touch sensor driver.
+	  Say Y here to enable the CAP11XX touch sensor driver.
 
 	  To compile this driver as a module, choose M here: the
-	  module will be called cap1106.
+	  module will be called cap11xx.
 
 endif
diff --git a/drivers/input/keyboard/Makefile b/drivers/input/keyboard/Makefile
index 0a3345634d7..febafa527eb 100644
--- a/drivers/input/keyboard/Makefile
+++ b/drivers/input/keyboard/Makefile
@@ -11,7 +11,7 @@ obj-$(CONFIG_KEYBOARD_AMIGA)		+= amikbd.o
 obj-$(CONFIG_KEYBOARD_ATARI)		+= atakbd.o
 obj-$(CONFIG_KEYBOARD_ATKBD)		+= atkbd.o
 obj-$(CONFIG_KEYBOARD_BFIN)		+= bf54x-keys.o
-obj-$(CONFIG_KEYBOARD_CAP1106)		+= cap1106.o
+obj-$(CONFIG_KEYBOARD_CAP11XX)		+= cap11xx.o
 obj-$(CONFIG_KEYBOARD_CLPS711X)		+= clps711x-keypad.o
 obj-$(CONFIG_KEYBOARD_CROS_EC)		+= cros_ec_keyb.o
 obj-$(CONFIG_KEYBOARD_DAVINCI)		+= davinci_keyscan.o
diff --git a/drivers/input/keyboard/amikbd.c b/drivers/input/keyboard/amikbd.c
index d3b8c58fcfd..e04a3b4e55d 100644
--- a/drivers/input/keyboard/amikbd.c
+++ b/drivers/input/keyboard/amikbd.c
@@ -45,6 +45,7 @@ MODULE_AUTHOR("Vojtech Pavlik <vojtech@ucw.cz>");
 MODULE_DESCRIPTION("Amiga keyboard driver");
 MODULE_LICENSE("GPL");
 
+#ifdef CONFIG_HW_CONSOLE
 static unsigned char amikbd_keycode[0x78] __initdata = {
 	[0]	 = KEY_GRAVE,
 	[1]	 = KEY_1,
@@ -144,6 +145,32 @@ static unsigned char amikbd_keycode[0x78] __initdata = {
 	[103]	 = KEY_RIGHTMETA
 };
 
+static void __init amikbd_init_console_keymaps(void)
+{
+	/* We can spare 512 bytes on stack for temp_map in init path. */
+	unsigned short temp_map[NR_KEYS];
+	int i, j;
+
+	for (i = 0; i < MAX_NR_KEYMAPS; i++) {
+		if (!key_maps[i])
+			continue;
+		memset(temp_map, 0, sizeof(temp_map));
+		for (j = 0; j < 0x78; j++) {
+			if (!amikbd_keycode[j])
+				continue;
+			temp_map[j] = key_maps[i][amikbd_keycode[j]];
+		}
+		for (j = 0; j < NR_KEYS; j++) {
+			if (!temp_map[j])
+				temp_map[j] = 0xf200;
+		}
+		memcpy(key_maps[i], temp_map, sizeof(temp_map));
+	}
+}
+#else /* !CONFIG_HW_CONSOLE */
+static inline void amikbd_init_console_keymaps(void) {}
+#endif /* !CONFIG_HW_CONSOLE */
+
 static const char *amikbd_messages[8] = {
 	[0] = KERN_ALERT "amikbd: Ctrl-Amiga-Amiga reset warning!!\n",
 	[1] = KERN_WARNING "amikbd: keyboard lost sync\n",
@@ -186,7 +213,7 @@ static irqreturn_t amikbd_interrupt(int irq, void *data)
 static int __init amikbd_probe(struct platform_device *pdev)
 {
 	struct input_dev *dev;
-	int i, j, err;
+	int i, err;
 
 	dev = input_allocate_device();
 	if (!dev) {
@@ -207,22 +234,8 @@ static int __init amikbd_probe(struct platform_device *pdev)
 	for (i = 0; i < 0x78; i++)
 		set_bit(i, dev->keybit);
 
-	for (i = 0; i < MAX_NR_KEYMAPS; i++) {
-		static u_short temp_map[NR_KEYS] __initdata;
-		if (!key_maps[i])
-			continue;
-		memset(temp_map, 0, sizeof(temp_map));
-		for (j = 0; j < 0x78; j++) {
-			if (!amikbd_keycode[j])
-				continue;
-			temp_map[j] = key_maps[i][amikbd_keycode[j]];
-		}
-		for (j = 0; j < NR_KEYS; j++) {
-			if (!temp_map[j])
-				temp_map[j] = 0xf200;
-		}
-		memcpy(key_maps[i], temp_map, sizeof(temp_map));
-	}
+	amikbd_init_console_keymaps();
+
 	ciaa.cra &= ~0x41;	 /* serial data in, turn off TA */
 	err = request_irq(IRQ_AMIGA_CIAA_SP, amikbd_interrupt, 0, "amikbd",
 			  dev);
diff --git a/drivers/input/keyboard/atkbd.c b/drivers/input/keyboard/atkbd.c
index 6f5d7956913..e27a25892db 100644
--- a/drivers/input/keyboard/atkbd.c
+++ b/drivers/input/keyboard/atkbd.c
@@ -456,8 +456,9 @@ static irqreturn_t atkbd_interrupt(struct serio *serio, unsigned char data,
 
 	keycode = atkbd->keycode[code];
 
-	if (keycode != ATKBD_KEY_NULL)
-		input_event(dev, EV_MSC, MSC_SCAN, code);
+	if (!(atkbd->release && test_bit(code, atkbd->force_release_mask)))
+		if (keycode != ATKBD_KEY_NULL)
+			input_event(dev, EV_MSC, MSC_SCAN, code);
 
 	switch (keycode) {
 	case ATKBD_KEY_NULL:
@@ -511,6 +512,7 @@ static irqreturn_t atkbd_interrupt(struct serio *serio, unsigned char data,
 		input_sync(dev);
 
 		if (value && test_bit(code, atkbd->force_release_mask)) {
+			input_event(dev, EV_MSC, MSC_SCAN, code);
 			input_report_key(dev, keycode, 0);
 			input_sync(dev);
 		}
diff --git a/drivers/input/keyboard/cap1106.c b/drivers/input/keyboard/cap1106.c
deleted file mode 100644
index d70b65a14ce..00000000000
--- a/drivers/input/keyboard/cap1106.c
+++ /dev/null
@@ -1,341 +0,0 @@
-/*
- * Input driver for Microchip CAP1106, 6 channel capacitive touch sensor
- *
- * http://www.microchip.com/wwwproducts/Devices.aspx?product=CAP1106
- *
- * (c) 2014 Daniel Mack <linux@zonque.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/interrupt.h>
-#include <linux/input.h>
-#include <linux/of_irq.h>
-#include <linux/regmap.h>
-#include <linux/i2c.h>
-#include <linux/gpio/consumer.h>
-
-#define CAP1106_REG_MAIN_CONTROL	0x00
-#define CAP1106_REG_MAIN_CONTROL_GAIN_SHIFT	(6)
-#define CAP1106_REG_MAIN_CONTROL_GAIN_MASK	(0xc0)
-#define CAP1106_REG_MAIN_CONTROL_DLSEEP		BIT(4)
-#define CAP1106_REG_GENERAL_STATUS	0x02
-#define CAP1106_REG_SENSOR_INPUT	0x03
-#define CAP1106_REG_NOISE_FLAG_STATUS	0x0a
-#define CAP1106_REG_SENOR_DELTA(X)	(0x10 + (X))
-#define CAP1106_REG_SENSITIVITY_CONTROL	0x1f
-#define CAP1106_REG_CONFIG		0x20
-#define CAP1106_REG_SENSOR_ENABLE	0x21
-#define CAP1106_REG_SENSOR_CONFIG	0x22
-#define CAP1106_REG_SENSOR_CONFIG2	0x23
-#define CAP1106_REG_SAMPLING_CONFIG	0x24
-#define CAP1106_REG_CALIBRATION		0x26
-#define CAP1106_REG_INT_ENABLE		0x27
-#define CAP1106_REG_REPEAT_RATE		0x28
-#define CAP1106_REG_MT_CONFIG		0x2a
-#define CAP1106_REG_MT_PATTERN_CONFIG	0x2b
-#define CAP1106_REG_MT_PATTERN		0x2d
-#define CAP1106_REG_RECALIB_CONFIG	0x2f
-#define CAP1106_REG_SENSOR_THRESH(X)	(0x30 + (X))
-#define CAP1106_REG_SENSOR_NOISE_THRESH	0x38
-#define CAP1106_REG_STANDBY_CHANNEL	0x40
-#define CAP1106_REG_STANDBY_CONFIG	0x41
-#define CAP1106_REG_STANDBY_SENSITIVITY	0x42
-#define CAP1106_REG_STANDBY_THRESH	0x43
-#define CAP1106_REG_CONFIG2		0x44
-#define CAP1106_REG_SENSOR_BASE_CNT(X)	(0x50 + (X))
-#define CAP1106_REG_SENSOR_CALIB	(0xb1 + (X))
-#define CAP1106_REG_SENSOR_CALIB_LSB1	0xb9
-#define CAP1106_REG_SENSOR_CALIB_LSB2	0xba
-#define CAP1106_REG_PRODUCT_ID		0xfd
-#define CAP1106_REG_MANUFACTURER_ID	0xfe
-#define CAP1106_REG_REVISION		0xff
-
-#define CAP1106_NUM_CHN 6
-#define CAP1106_PRODUCT_ID	0x55
-#define CAP1106_MANUFACTURER_ID	0x5d
-
-struct cap1106_priv {
-	struct regmap *regmap;
-	struct input_dev *idev;
-
-	/* config */
-	unsigned short keycodes[CAP1106_NUM_CHN];
-};
-
-static const struct reg_default cap1106_reg_defaults[] = {
-	{ CAP1106_REG_MAIN_CONTROL,		0x00 },
-	{ CAP1106_REG_GENERAL_STATUS,		0x00 },
-	{ CAP1106_REG_SENSOR_INPUT,		0x00 },
-	{ CAP1106_REG_NOISE_FLAG_STATUS,	0x00 },
-	{ CAP1106_REG_SENSITIVITY_CONTROL,	0x2f },
-	{ CAP1106_REG_CONFIG,			0x20 },
-	{ CAP1106_REG_SENSOR_ENABLE,		0x3f },
-	{ CAP1106_REG_SENSOR_CONFIG,		0xa4 },
-	{ CAP1106_REG_SENSOR_CONFIG2,		0x07 },
-	{ CAP1106_REG_SAMPLING_CONFIG,		0x39 },
-	{ CAP1106_REG_CALIBRATION,		0x00 },
-	{ CAP1106_REG_INT_ENABLE,		0x3f },
-	{ CAP1106_REG_REPEAT_RATE,		0x3f },
-	{ CAP1106_REG_MT_CONFIG,		0x80 },
-	{ CAP1106_REG_MT_PATTERN_CONFIG,	0x00 },
-	{ CAP1106_REG_MT_PATTERN,		0x3f },
-	{ CAP1106_REG_RECALIB_CONFIG,		0x8a },
-	{ CAP1106_REG_SENSOR_THRESH(0),		0x40 },
-	{ CAP1106_REG_SENSOR_THRESH(1),		0x40 },
-	{ CAP1106_REG_SENSOR_THRESH(2),		0x40 },
-	{ CAP1106_REG_SENSOR_THRESH(3),		0x40 },
-	{ CAP1106_REG_SENSOR_THRESH(4),		0x40 },
-	{ CAP1106_REG_SENSOR_THRESH(5),		0x40 },
-	{ CAP1106_REG_SENSOR_NOISE_THRESH,	0x01 },
-	{ CAP1106_REG_STANDBY_CHANNEL,		0x00 },
-	{ CAP1106_REG_STANDBY_CONFIG,		0x39 },
-	{ CAP1106_REG_STANDBY_SENSITIVITY,	0x02 },
-	{ CAP1106_REG_STANDBY_THRESH,		0x40 },
-	{ CAP1106_REG_CONFIG2,			0x40 },
-	{ CAP1106_REG_SENSOR_CALIB_LSB1,	0x00 },
-	{ CAP1106_REG_SENSOR_CALIB_LSB2,	0x00 },
-};
-
-static bool cap1106_volatile_reg(struct device *dev, unsigned int reg)
-{
-	switch (reg) {
-	case CAP1106_REG_MAIN_CONTROL:
-	case CAP1106_REG_SENSOR_INPUT:
-	case CAP1106_REG_SENOR_DELTA(0):
-	case CAP1106_REG_SENOR_DELTA(1):
-	case CAP1106_REG_SENOR_DELTA(2):
-	case CAP1106_REG_SENOR_DELTA(3):
-	case CAP1106_REG_SENOR_DELTA(4):
-	case CAP1106_REG_SENOR_DELTA(5):
-	case CAP1106_REG_PRODUCT_ID:
-	case CAP1106_REG_MANUFACTURER_ID:
-	case CAP1106_REG_REVISION:
-		return true;
-	}
-
-	return false;
-}
-
-static const struct regmap_config cap1106_regmap_config = {
-	.reg_bits = 8,
-	.val_bits = 8,
-
-	.max_register = CAP1106_REG_REVISION,
-	.reg_defaults = cap1106_reg_defaults,
-
-	.num_reg_defaults = ARRAY_SIZE(cap1106_reg_defaults),
-	.cache_type = REGCACHE_RBTREE,
-	.volatile_reg = cap1106_volatile_reg,
-};
-
-static irqreturn_t cap1106_thread_func(int irq_num, void *data)
-{
-	struct cap1106_priv *priv = data;
-	unsigned int status;
-	int ret, i;
-
-	/*
-	 * Deassert interrupt. This needs to be done before reading the status
-	 * registers, which will not carry valid values otherwise.
-	 */
-	ret = regmap_update_bits(priv->regmap, CAP1106_REG_MAIN_CONTROL, 1, 0);
-	if (ret < 0)
-		goto out;
-
-	ret = regmap_read(priv->regmap, CAP1106_REG_SENSOR_INPUT, &status);
-	if (ret < 0)
-		goto out;
-
-	for (i = 0; i < CAP1106_NUM_CHN; i++)
-		input_report_key(priv->idev, priv->keycodes[i],
-				 status & (1 << i));
-
-	input_sync(priv->idev);
-
-out:
-	return IRQ_HANDLED;
-}
-
-static int cap1106_set_sleep(struct cap1106_priv *priv, bool sleep)
-{
-	return regmap_update_bits(priv->regmap, CAP1106_REG_MAIN_CONTROL,
-				  CAP1106_REG_MAIN_CONTROL_DLSEEP,
-				  sleep ? CAP1106_REG_MAIN_CONTROL_DLSEEP : 0);
-}
-
-static int cap1106_input_open(struct input_dev *idev)
-{
-	struct cap1106_priv *priv = input_get_drvdata(idev);
-
-	return cap1106_set_sleep(priv, false);
-}
-
-static void cap1106_input_close(struct input_dev *idev)
-{
-	struct cap1106_priv *priv = input_get_drvdata(idev);
-
-	cap1106_set_sleep(priv, true);
-}
-
-static int cap1106_i2c_probe(struct i2c_client *i2c_client,
-			     const struct i2c_device_id *id)
-{
-	struct device *dev = &i2c_client->dev;
-	struct cap1106_priv *priv;
-	struct device_node *node;
-	int i, error, irq, gain = 0;
-	unsigned int val, rev;
-	u32 gain32, keycodes[CAP1106_NUM_CHN];
-
-	priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
-	if (!priv)
-		return -ENOMEM;
-
-	priv->regmap = devm_regmap_init_i2c(i2c_client, &cap1106_regmap_config);
-	if (IS_ERR(priv->regmap))
-		return PTR_ERR(priv->regmap);
-
-	error = regmap_read(priv->regmap, CAP1106_REG_PRODUCT_ID, &val);
-	if (error)
-		return error;
-
-	if (val != CAP1106_PRODUCT_ID) {
-		dev_err(dev, "Product ID: Got 0x%02x, expected 0x%02x\n",
-			val, CAP1106_PRODUCT_ID);
-		return -ENODEV;
-	}
-
-	error = regmap_read(priv->regmap, CAP1106_REG_MANUFACTURER_ID, &val);
-	if (error)
-		return error;
-
-	if (val != CAP1106_MANUFACTURER_ID) {
-		dev_err(dev, "Manufacturer ID: Got 0x%02x, expected 0x%02x\n",
-			val, CAP1106_MANUFACTURER_ID);
-		return -ENODEV;
-	}
-
-	error = regmap_read(priv->regmap, CAP1106_REG_REVISION, &rev);
-	if (error < 0)
-		return error;
-
-	dev_info(dev, "CAP1106 detected, revision 0x%02x\n", rev);
-	i2c_set_clientdata(i2c_client, priv);
-	node = dev->of_node;
-
-	if (!of_property_read_u32(node, "microchip,sensor-gain", &gain32)) {
-		if (is_power_of_2(gain32) && gain32 <= 8)
-			gain = ilog2(gain32);
-		else
-			dev_err(dev, "Invalid sensor-gain value %d\n", gain32);
-	}
-
-	BUILD_BUG_ON(ARRAY_SIZE(keycodes) != ARRAY_SIZE(priv->keycodes));
-
-	/* Provide some useful defaults */
-	for (i = 0; i < ARRAY_SIZE(keycodes); i++)
-		keycodes[i] = KEY_A + i;
-
-	of_property_read_u32_array(node, "linux,keycodes",
-				   keycodes, ARRAY_SIZE(keycodes));
-
-	for (i = 0; i < ARRAY_SIZE(keycodes); i++)
-		priv->keycodes[i] = keycodes[i];
-
-	error = regmap_update_bits(priv->regmap, CAP1106_REG_MAIN_CONTROL,
-				   CAP1106_REG_MAIN_CONTROL_GAIN_MASK,
-				   gain << CAP1106_REG_MAIN_CONTROL_GAIN_SHIFT);
-	if (error)
-		return error;
-
-	/* Disable autorepeat. The Linux input system has its own handling. */
-	error = regmap_write(priv->regmap, CAP1106_REG_REPEAT_RATE, 0);
-	if (error)
-		return error;
-
-	priv->idev = devm_input_allocate_device(dev);
-	if (!priv->idev)
-		return -ENOMEM;
-
-	priv->idev->name = "CAP1106 capacitive touch sensor";
-	priv->idev->id.bustype = BUS_I2C;
-	priv->idev->evbit[0] = BIT_MASK(EV_KEY);
-
-	if (of_property_read_bool(node, "autorepeat"))
-		__set_bit(EV_REP, priv->idev->evbit);
-
-	for (i = 0; i < CAP1106_NUM_CHN; i++)
-		__set_bit(priv->keycodes[i], priv->idev->keybit);
-
-	__clear_bit(KEY_RESERVED, priv->idev->keybit);
-
-	priv->idev->keycode = priv->keycodes;
-	priv->idev->keycodesize = sizeof(priv->keycodes[0]);
-	priv->idev->keycodemax = ARRAY_SIZE(priv->keycodes);
-
-	priv->idev->id.vendor = CAP1106_MANUFACTURER_ID;
-	priv->idev->id.product = CAP1106_PRODUCT_ID;
-	priv->idev->id.version = rev;
-
-	priv->idev->open = cap1106_input_open;
-	priv->idev->close = cap1106_input_close;
-
-	input_set_drvdata(priv->idev, priv);
-
-	/*
-	 * Put the device in deep sleep mode for now.
-	 * ->open() will bring it back once the it is actually needed.
-	 */
-	cap1106_set_sleep(priv, true);
-
-	error = input_register_device(priv->idev);
-	if (error)
-		return error;
-
-	irq = irq_of_parse_and_map(node, 0);
-	if (!irq) {
-		dev_err(dev, "Unable to parse or map IRQ\n");
-		return -ENXIO;
-	}
-
-	error = devm_request_threaded_irq(dev, irq, NULL, cap1106_thread_func,
-					  IRQF_ONESHOT, dev_name(dev), priv);
-	if (error)
-		return error;
-
-	return 0;
-}
-
-static const struct of_device_id cap1106_dt_ids[] = {
-	{ .compatible = "microchip,cap1106", },
-	{}
-};
-MODULE_DEVICE_TABLE(of, cap1106_dt_ids);
-
-static const struct i2c_device_id cap1106_i2c_ids[] = {
-	{ "cap1106", 0 },
-	{}
-};
-MODULE_DEVICE_TABLE(i2c, cap1106_i2c_ids);
-
-static struct i2c_driver cap1106_i2c_driver = {
-	.driver = {
-		.name	= "cap1106",
-		.owner	= THIS_MODULE,
-		.of_match_table = cap1106_dt_ids,
-	},
-	.id_table	= cap1106_i2c_ids,
-	.probe		= cap1106_i2c_probe,
-};
-
-module_i2c_driver(cap1106_i2c_driver);
-
-MODULE_ALIAS("platform:cap1106");
-MODULE_DESCRIPTION("Microchip CAP1106 driver");
-MODULE_AUTHOR("Daniel Mack <linux@zonque.org>");
-MODULE_LICENSE("GPL v2");
diff --git a/drivers/input/keyboard/cap11xx.c b/drivers/input/keyboard/cap11xx.c
new file mode 100644
index 00000000000..4f59f0bab28
--- /dev/null
+++ b/drivers/input/keyboard/cap11xx.c
@@ -0,0 +1,376 @@
+/*
+ * Input driver for Microchip CAP11xx based capacitive touch sensors
+ *
+ * (c) 2014 Daniel Mack <linux@zonque.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/input.h>
+#include <linux/of_irq.h>
+#include <linux/regmap.h>
+#include <linux/i2c.h>
+#include <linux/gpio/consumer.h>
+
+#define CAP11XX_REG_MAIN_CONTROL	0x00
+#define CAP11XX_REG_MAIN_CONTROL_GAIN_SHIFT	(6)
+#define CAP11XX_REG_MAIN_CONTROL_GAIN_MASK	(0xc0)
+#define CAP11XX_REG_MAIN_CONTROL_DLSEEP		BIT(4)
+#define CAP11XX_REG_GENERAL_STATUS	0x02
+#define CAP11XX_REG_SENSOR_INPUT	0x03
+#define CAP11XX_REG_NOISE_FLAG_STATUS	0x0a
+#define CAP11XX_REG_SENOR_DELTA(X)	(0x10 + (X))
+#define CAP11XX_REG_SENSITIVITY_CONTROL	0x1f
+#define CAP11XX_REG_CONFIG		0x20
+#define CAP11XX_REG_SENSOR_ENABLE	0x21
+#define CAP11XX_REG_SENSOR_CONFIG	0x22
+#define CAP11XX_REG_SENSOR_CONFIG2	0x23
+#define CAP11XX_REG_SAMPLING_CONFIG	0x24
+#define CAP11XX_REG_CALIBRATION		0x26
+#define CAP11XX_REG_INT_ENABLE		0x27
+#define CAP11XX_REG_REPEAT_RATE		0x28
+#define CAP11XX_REG_MT_CONFIG		0x2a
+#define CAP11XX_REG_MT_PATTERN_CONFIG	0x2b
+#define CAP11XX_REG_MT_PATTERN		0x2d
+#define CAP11XX_REG_RECALIB_CONFIG	0x2f
+#define CAP11XX_REG_SENSOR_THRESH(X)	(0x30 + (X))
+#define CAP11XX_REG_SENSOR_NOISE_THRESH	0x38
+#define CAP11XX_REG_STANDBY_CHANNEL	0x40
+#define CAP11XX_REG_STANDBY_CONFIG	0x41
+#define CAP11XX_REG_STANDBY_SENSITIVITY	0x42
+#define CAP11XX_REG_STANDBY_THRESH	0x43
+#define CAP11XX_REG_CONFIG2		0x44
+#define CAP11XX_REG_CONFIG2_ALT_POL	BIT(6)
+#define CAP11XX_REG_SENSOR_BASE_CNT(X)	(0x50 + (X))
+#define CAP11XX_REG_SENSOR_CALIB	(0xb1 + (X))
+#define CAP11XX_REG_SENSOR_CALIB_LSB1	0xb9
+#define CAP11XX_REG_SENSOR_CALIB_LSB2	0xba
+#define CAP11XX_REG_PRODUCT_ID		0xfd
+#define CAP11XX_REG_MANUFACTURER_ID	0xfe
+#define CAP11XX_REG_REVISION		0xff
+
+#define CAP11XX_MANUFACTURER_ID	0x5d
+
+struct cap11xx_priv {
+	struct regmap *regmap;
+	struct input_dev *idev;
+
+	/* config */
+	u32 keycodes[];
+};
+
+struct cap11xx_hw_model {
+	u8 product_id;
+	unsigned int num_channels;
+};
+
+enum {
+	CAP1106,
+	CAP1126,
+	CAP1188,
+};
+
+static const struct cap11xx_hw_model cap11xx_devices[] = {
+	[CAP1106] = { .product_id = 0x55, .num_channels = 6 },
+	[CAP1126] = { .product_id = 0x53, .num_channels = 6 },
+	[CAP1188] = { .product_id = 0x50, .num_channels = 8 },
+};
+
+static const struct reg_default cap11xx_reg_defaults[] = {
+	{ CAP11XX_REG_MAIN_CONTROL,		0x00 },
+	{ CAP11XX_REG_GENERAL_STATUS,		0x00 },
+	{ CAP11XX_REG_SENSOR_INPUT,		0x00 },
+	{ CAP11XX_REG_NOISE_FLAG_STATUS,	0x00 },
+	{ CAP11XX_REG_SENSITIVITY_CONTROL,	0x2f },
+	{ CAP11XX_REG_CONFIG,			0x20 },
+	{ CAP11XX_REG_SENSOR_ENABLE,		0x3f },
+	{ CAP11XX_REG_SENSOR_CONFIG,		0xa4 },
+	{ CAP11XX_REG_SENSOR_CONFIG2,		0x07 },
+	{ CAP11XX_REG_SAMPLING_CONFIG,		0x39 },
+	{ CAP11XX_REG_CALIBRATION,		0x00 },
+	{ CAP11XX_REG_INT_ENABLE,		0x3f },
+	{ CAP11XX_REG_REPEAT_RATE,		0x3f },
+	{ CAP11XX_REG_MT_CONFIG,		0x80 },
+	{ CAP11XX_REG_MT_PATTERN_CONFIG,	0x00 },
+	{ CAP11XX_REG_MT_PATTERN,		0x3f },
+	{ CAP11XX_REG_RECALIB_CONFIG,		0x8a },
+	{ CAP11XX_REG_SENSOR_THRESH(0),		0x40 },
+	{ CAP11XX_REG_SENSOR_THRESH(1),		0x40 },
+	{ CAP11XX_REG_SENSOR_THRESH(2),		0x40 },
+	{ CAP11XX_REG_SENSOR_THRESH(3),		0x40 },
+	{ CAP11XX_REG_SENSOR_THRESH(4),		0x40 },
+	{ CAP11XX_REG_SENSOR_THRESH(5),		0x40 },
+	{ CAP11XX_REG_SENSOR_NOISE_THRESH,	0x01 },
+	{ CAP11XX_REG_STANDBY_CHANNEL,		0x00 },
+	{ CAP11XX_REG_STANDBY_CONFIG,		0x39 },
+	{ CAP11XX_REG_STANDBY_SENSITIVITY,	0x02 },
+	{ CAP11XX_REG_STANDBY_THRESH,		0x40 },
+	{ CAP11XX_REG_CONFIG2,			0x40 },
+	{ CAP11XX_REG_SENSOR_CALIB_LSB1,	0x00 },
+	{ CAP11XX_REG_SENSOR_CALIB_LSB2,	0x00 },
+};
+
+static bool cap11xx_volatile_reg(struct device *dev, unsigned int reg)
+{
+	switch (reg) {
+	case CAP11XX_REG_MAIN_CONTROL:
+	case CAP11XX_REG_SENSOR_INPUT:
+	case CAP11XX_REG_SENOR_DELTA(0):
+	case CAP11XX_REG_SENOR_DELTA(1):
+	case CAP11XX_REG_SENOR_DELTA(2):
+	case CAP11XX_REG_SENOR_DELTA(3):
+	case CAP11XX_REG_SENOR_DELTA(4):
+	case CAP11XX_REG_SENOR_DELTA(5):
+	case CAP11XX_REG_PRODUCT_ID:
+	case CAP11XX_REG_MANUFACTURER_ID:
+	case CAP11XX_REG_REVISION:
+		return true;
+	}
+
+	return false;
+}
+
+static const struct regmap_config cap11xx_regmap_config = {
+	.reg_bits = 8,
+	.val_bits = 8,
+
+	.max_register = CAP11XX_REG_REVISION,
+	.reg_defaults = cap11xx_reg_defaults,
+
+	.num_reg_defaults = ARRAY_SIZE(cap11xx_reg_defaults),
+	.cache_type = REGCACHE_RBTREE,
+	.volatile_reg = cap11xx_volatile_reg,
+};
+
+static irqreturn_t cap11xx_thread_func(int irq_num, void *data)
+{
+	struct cap11xx_priv *priv = data;
+	unsigned int status;
+	int ret, i;
+
+	/*
+	 * Deassert interrupt. This needs to be done before reading the status
+	 * registers, which will not carry valid values otherwise.
+	 */
+	ret = regmap_update_bits(priv->regmap, CAP11XX_REG_MAIN_CONTROL, 1, 0);
+	if (ret < 0)
+		goto out;
+
+	ret = regmap_read(priv->regmap, CAP11XX_REG_SENSOR_INPUT, &status);
+	if (ret < 0)
+		goto out;
+
+	for (i = 0; i < priv->idev->keycodemax; i++)
+		input_report_key(priv->idev, priv->keycodes[i],
+				 status & (1 << i));
+
+	input_sync(priv->idev);
+
+out:
+	return IRQ_HANDLED;
+}
+
+static int cap11xx_set_sleep(struct cap11xx_priv *priv, bool sleep)
+{
+	return regmap_update_bits(priv->regmap, CAP11XX_REG_MAIN_CONTROL,
+				  CAP11XX_REG_MAIN_CONTROL_DLSEEP,
+				  sleep ? CAP11XX_REG_MAIN_CONTROL_DLSEEP : 0);
+}
+
+static int cap11xx_input_open(struct input_dev *idev)
+{
+	struct cap11xx_priv *priv = input_get_drvdata(idev);
+
+	return cap11xx_set_sleep(priv, false);
+}
+
+static void cap11xx_input_close(struct input_dev *idev)
+{
+	struct cap11xx_priv *priv = input_get_drvdata(idev);
+
+	cap11xx_set_sleep(priv, true);
+}
+
+static int cap11xx_i2c_probe(struct i2c_client *i2c_client,
+			     const struct i2c_device_id *id)
+{
+	struct device *dev = &i2c_client->dev;
+	struct cap11xx_priv *priv;
+	struct device_node *node;
+	const struct cap11xx_hw_model *cap;
+	int i, error, irq, gain = 0;
+	unsigned int val, rev;
+	u32 gain32;
+
+	if (id->driver_data >= ARRAY_SIZE(cap11xx_devices)) {
+		dev_err(dev, "Invalid device ID %lu\n", id->driver_data);
+		return -EINVAL;
+	}
+
+	cap = &cap11xx_devices[id->driver_data];
+	if (!cap || !cap->num_channels) {
+		dev_err(dev, "Invalid device configuration\n");
+		return -EINVAL;
+	}
+
+	priv = devm_kzalloc(dev,
+			    sizeof(*priv) +
+				cap->num_channels * sizeof(priv->keycodes[0]),
+			    GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	priv->regmap = devm_regmap_init_i2c(i2c_client, &cap11xx_regmap_config);
+	if (IS_ERR(priv->regmap))
+		return PTR_ERR(priv->regmap);
+
+	error = regmap_read(priv->regmap, CAP11XX_REG_PRODUCT_ID, &val);
+	if (error)
+		return error;
+
+	if (val != cap->product_id) {
+		dev_err(dev, "Product ID: Got 0x%02x, expected 0x%02x\n",
+			val, cap->product_id);
+		return -ENXIO;
+	}
+
+	error = regmap_read(priv->regmap, CAP11XX_REG_MANUFACTURER_ID, &val);
+	if (error)
+		return error;
+
+	if (val != CAP11XX_MANUFACTURER_ID) {
+		dev_err(dev, "Manufacturer ID: Got 0x%02x, expected 0x%02x\n",
+			val, CAP11XX_MANUFACTURER_ID);
+		return -ENXIO;
+	}
+
+	error = regmap_read(priv->regmap, CAP11XX_REG_REVISION, &rev);
+	if (error < 0)
+		return error;
+
+	dev_info(dev, "CAP11XX detected, revision 0x%02x\n", rev);
+	i2c_set_clientdata(i2c_client, priv);
+	node = dev->of_node;
+
+	if (!of_property_read_u32(node, "microchip,sensor-gain", &gain32)) {
+		if (is_power_of_2(gain32) && gain32 <= 8)
+			gain = ilog2(gain32);
+		else
+			dev_err(dev, "Invalid sensor-gain value %d\n", gain32);
+	}
+
+	if (of_property_read_bool(node, "microchip,irq-active-high")) {
+		error = regmap_update_bits(priv->regmap, CAP11XX_REG_CONFIG2,
+					   CAP11XX_REG_CONFIG2_ALT_POL, 0);
+		if (error)
+			return error;
+	}
+
+	/* Provide some useful defaults */
+	for (i = 0; i < cap->num_channels; i++)
+		priv->keycodes[i] = KEY_A + i;
+
+	of_property_read_u32_array(node, "linux,keycodes",
+				   priv->keycodes, cap->num_channels);
+
+	error = regmap_update_bits(priv->regmap, CAP11XX_REG_MAIN_CONTROL,
+				   CAP11XX_REG_MAIN_CONTROL_GAIN_MASK,
+				   gain << CAP11XX_REG_MAIN_CONTROL_GAIN_SHIFT);
+	if (error)
+		return error;
+
+	/* Disable autorepeat. The Linux input system has its own handling. */
+	error = regmap_write(priv->regmap, CAP11XX_REG_REPEAT_RATE, 0);
+	if (error)
+		return error;
+
+	priv->idev = devm_input_allocate_device(dev);
+	if (!priv->idev)
+		return -ENOMEM;
+
+	priv->idev->name = "CAP11XX capacitive touch sensor";
+	priv->idev->id.bustype = BUS_I2C;
+	priv->idev->evbit[0] = BIT_MASK(EV_KEY);
+
+	if (of_property_read_bool(node, "autorepeat"))
+		__set_bit(EV_REP, priv->idev->evbit);
+
+	for (i = 0; i < cap->num_channels; i++)
+		__set_bit(priv->keycodes[i], priv->idev->keybit);
+
+	__clear_bit(KEY_RESERVED, priv->idev->keybit);
+
+	priv->idev->keycode = priv->keycodes;
+	priv->idev->keycodesize = sizeof(priv->keycodes[0]);
+	priv->idev->keycodemax = cap->num_channels;
+
+	priv->idev->id.vendor = CAP11XX_MANUFACTURER_ID;
+	priv->idev->id.product = cap->product_id;
+	priv->idev->id.version = rev;
+
+	priv->idev->open = cap11xx_input_open;
+	priv->idev->close = cap11xx_input_close;
+
+	input_set_drvdata(priv->idev, priv);
+
+	/*
+	 * Put the device in deep sleep mode for now.
+	 * ->open() will bring it back once the it is actually needed.
+	 */
+	cap11xx_set_sleep(priv, true);
+
+	error = input_register_device(priv->idev);
+	if (error)
+		return error;
+
+	irq = irq_of_parse_and_map(node, 0);
+	if (!irq) {
+		dev_err(dev, "Unable to parse or map IRQ\n");
+		return -ENXIO;
+	}
+
+	error = devm_request_threaded_irq(dev, irq, NULL, cap11xx_thread_func,
+					  IRQF_ONESHOT, dev_name(dev), priv);
+	if (error)
+		return error;
+
+	return 0;
+}
+
+static const struct of_device_id cap11xx_dt_ids[] = {
+	{ .compatible = "microchip,cap1106", },
+	{ .compatible = "microchip,cap1126", },
+	{ .compatible = "microchip,cap1188", },
+	{}
+};
+MODULE_DEVICE_TABLE(of, cap11xx_dt_ids);
+
+static const struct i2c_device_id cap11xx_i2c_ids[] = {
+	{ "cap1106", CAP1106 },
+	{ "cap1126", CAP1126 },
+	{ "cap1188", CAP1188 },
+	{}
+};
+MODULE_DEVICE_TABLE(i2c, cap11xx_i2c_ids);
+
+static struct i2c_driver cap11xx_i2c_driver = {
+	.driver = {
+		.name	= "cap11xx",
+		.owner	= THIS_MODULE,
+		.of_match_table = cap11xx_dt_ids,
+	},
+	.id_table	= cap11xx_i2c_ids,
+	.probe		= cap11xx_i2c_probe,
+};
+
+module_i2c_driver(cap11xx_i2c_driver);
+
+MODULE_ALIAS("platform:cap11xx");
+MODULE_DESCRIPTION("Microchip CAP11XX driver");
+MODULE_AUTHOR("Daniel Mack <linux@zonque.org>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/input/keyboard/gpio_keys.c b/drivers/input/keyboard/gpio_keys.c
index 8f3a24e1540..d4dd78a7d56 100644
--- a/drivers/input/keyboard/gpio_keys.c
+++ b/drivers/input/keyboard/gpio_keys.c
@@ -29,6 +29,7 @@
 #include <linux/of.h>
 #include <linux/of_platform.h>
 #include <linux/of_gpio.h>
+#include <linux/of_irq.h>
 #include <linux/spinlock.h>
 
 struct gpio_button_data {
@@ -617,27 +618,31 @@ gpio_keys_get_devtree_pdata(struct device *dev)
 
 	i = 0;
 	for_each_child_of_node(node, pp) {
-		int gpio;
+		int gpio = -1;
 		enum of_gpio_flags flags;
 
-		if (!of_find_property(pp, "gpios", NULL)) {
-			pdata->nbuttons--;
-			dev_warn(dev, "Found button without gpios\n");
-			continue;
-		}
+		button = &pdata->buttons[i++];
 
-		gpio = of_get_gpio_flags(pp, 0, &flags);
-		if (gpio < 0) {
-			error = gpio;
-			if (error != -EPROBE_DEFER)
-				dev_err(dev,
-					"Failed to get gpio flags, error: %d\n",
-					error);
-			return ERR_PTR(error);
+		if (!of_find_property(pp, "gpios", NULL)) {
+			button->irq = irq_of_parse_and_map(pp, 0);
+			if (button->irq == 0) {
+				i--;
+				pdata->nbuttons--;
+				dev_warn(dev, "Found button without gpios or irqs\n");
+				continue;
+			}
+		} else {
+			gpio = of_get_gpio_flags(pp, 0, &flags);
+			if (gpio < 0) {
+				error = gpio;
+				if (error != -EPROBE_DEFER)
+					dev_err(dev,
+						"Failed to get gpio flags, error: %d\n",
+						error);
+				return ERR_PTR(error);
+			}
 		}
 
-		button = &pdata->buttons[i++];
-
 		button->gpio = gpio;
 		button->active_low = flags & OF_GPIO_ACTIVE_LOW;
 
diff --git a/drivers/input/keyboard/lm8323.c b/drivers/input/keyboard/lm8323.c
index cb32e2b506b..21bea52d436 100644
--- a/drivers/input/keyboard/lm8323.c
+++ b/drivers/input/keyboard/lm8323.c
@@ -616,6 +616,8 @@ static ssize_t lm8323_set_disable(struct device *dev,
 	unsigned int i;
 
 	ret = kstrtouint(buf, 10, &i);
+	if (ret)
+		return ret;
 
 	mutex_lock(&lm->lock);
 	lm->kp_enabled = !i;
diff --git a/drivers/input/keyboard/lpc32xx-keys.c b/drivers/input/keyboard/lpc32xx-keys.c
index 8c079371c2e..265d641c40e 100644
--- a/drivers/input/keyboard/lpc32xx-keys.c
+++ b/drivers/input/keyboard/lpc32xx-keys.c
@@ -66,7 +66,6 @@
 struct lpc32xx_kscan_drv {
 	struct input_dev *input;
 	struct clk *clk;
-	struct resource *iores;
 	void __iomem *kscan_base;
 	unsigned int irq;
 
@@ -188,32 +187,27 @@ static int lpc32xx_kscan_probe(struct platform_device *pdev)
 		return -EINVAL;
 	}
 
-	kscandat = kzalloc(sizeof(struct lpc32xx_kscan_drv), GFP_KERNEL);
-	if (!kscandat) {
-		dev_err(&pdev->dev, "failed to allocate memory\n");
+	kscandat = devm_kzalloc(&pdev->dev, sizeof(*kscandat),
+				GFP_KERNEL);
+	if (!kscandat)
 		return -ENOMEM;
-	}
 
 	error = lpc32xx_parse_dt(&pdev->dev, kscandat);
 	if (error) {
 		dev_err(&pdev->dev, "failed to parse device tree\n");
-		goto err_free_mem;
+		return error;
 	}
 
 	keymap_size = sizeof(kscandat->keymap[0]) *
 				(kscandat->matrix_sz << kscandat->row_shift);
-	kscandat->keymap = kzalloc(keymap_size, GFP_KERNEL);
-	if (!kscandat->keymap) {
-		dev_err(&pdev->dev, "could not allocate memory for keymap\n");
-		error = -ENOMEM;
-		goto err_free_mem;
-	}
+	kscandat->keymap = devm_kzalloc(&pdev->dev, keymap_size, GFP_KERNEL);
+	if (!kscandat->keymap)
+		return -ENOMEM;
 
-	kscandat->input = input = input_allocate_device();
+	kscandat->input = input = devm_input_allocate_device(&pdev->dev);
 	if (!input) {
 		dev_err(&pdev->dev, "failed to allocate input device\n");
-		error = -ENOMEM;
-		goto err_free_keymap;
+		return -ENOMEM;
 	}
 
 	/* Setup key input */
@@ -234,39 +228,26 @@ static int lpc32xx_kscan_probe(struct platform_device *pdev)
 					   kscandat->keymap, kscandat->input);
 	if (error) {
 		dev_err(&pdev->dev, "failed to build keymap\n");
-		goto err_free_input;
+		return error;
 	}
 
 	input_set_drvdata(kscandat->input, kscandat);
 
-	kscandat->iores = request_mem_region(res->start, resource_size(res),
-					     pdev->name);
-	if (!kscandat->iores) {
-		dev_err(&pdev->dev, "failed to request I/O memory\n");
-		error = -EBUSY;
-		goto err_free_input;
-	}
-
-	kscandat->kscan_base = ioremap(kscandat->iores->start,
-				       resource_size(kscandat->iores));
-	if (!kscandat->kscan_base) {
-		dev_err(&pdev->dev, "failed to remap I/O memory\n");
-		error = -EBUSY;
-		goto err_release_memregion;
-	}
+	kscandat->kscan_base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(kscandat->kscan_base))
+		return PTR_ERR(kscandat->kscan_base);
 
 	/* Get the key scanner clock */
-	kscandat->clk = clk_get(&pdev->dev, NULL);
+	kscandat->clk = devm_clk_get(&pdev->dev, NULL);
 	if (IS_ERR(kscandat->clk)) {
 		dev_err(&pdev->dev, "failed to get clock\n");
-		error = PTR_ERR(kscandat->clk);
-		goto err_unmap;
+		return PTR_ERR(kscandat->clk);
 	}
 
 	/* Configure the key scanner */
 	error = clk_prepare_enable(kscandat->clk);
 	if (error)
-		goto err_clk_put;
+		return error;
 
 	writel(kscandat->deb_clks, LPC32XX_KS_DEB(kscandat->kscan_base));
 	writel(kscandat->scan_delay, LPC32XX_KS_SCAN_CTL(kscandat->kscan_base));
@@ -277,52 +258,20 @@ static int lpc32xx_kscan_probe(struct platform_device *pdev)
 	writel(1, LPC32XX_KS_IRQ(kscandat->kscan_base));
 	clk_disable_unprepare(kscandat->clk);
 
-	error = request_irq(irq, lpc32xx_kscan_irq, 0, pdev->name, kscandat);
+	error = devm_request_irq(&pdev->dev, irq, lpc32xx_kscan_irq, 0,
+				 pdev->name, kscandat);
 	if (error) {
 		dev_err(&pdev->dev, "failed to request irq\n");
-		goto err_clk_put;
+		return error;
 	}
 
 	error = input_register_device(kscandat->input);
 	if (error) {
 		dev_err(&pdev->dev, "failed to register input device\n");
-		goto err_free_irq;
+		return error;
 	}
 
 	platform_set_drvdata(pdev, kscandat);
-	return 0;
-
-err_free_irq:
-	free_irq(irq, kscandat);
-err_clk_put:
-	clk_put(kscandat->clk);
-err_unmap:
-	iounmap(kscandat->kscan_base);
-err_release_memregion:
-	release_mem_region(kscandat->iores->start,
-			   resource_size(kscandat->iores));
-err_free_input:
-	input_free_device(kscandat->input);
-err_free_keymap:
-	kfree(kscandat->keymap);
-err_free_mem:
-	kfree(kscandat);
-
-	return error;
-}
-
-static int lpc32xx_kscan_remove(struct platform_device *pdev)
-{
-	struct lpc32xx_kscan_drv *kscandat = platform_get_drvdata(pdev);
-
-	free_irq(platform_get_irq(pdev, 0), kscandat);
-	clk_put(kscandat->clk);
-	iounmap(kscandat->kscan_base);
-	release_mem_region(kscandat->iores->start,
-			   resource_size(kscandat->iores));
-	input_unregister_device(kscandat->input);
-	kfree(kscandat->keymap);
-	kfree(kscandat);
 
 	return 0;
 }
@@ -378,7 +327,6 @@ MODULE_DEVICE_TABLE(of, lpc32xx_kscan_match);
 
 static struct platform_driver lpc32xx_kscan_driver = {
 	.probe		= lpc32xx_kscan_probe,
-	.remove		= lpc32xx_kscan_remove,
 	.driver		= {
 		.name	= DRV_NAME,
 		.pm	= &lpc32xx_kscan_pm_ops,
diff --git a/drivers/input/keyboard/mpr121_touchkey.c b/drivers/input/keyboard/mpr121_touchkey.c
index 009c82256e8..3aa2ec45bca 100644
--- a/drivers/input/keyboard/mpr121_touchkey.c
+++ b/drivers/input/keyboard/mpr121_touchkey.c
@@ -214,13 +214,14 @@ static int mpr_touchkey_probe(struct i2c_client *client,
 		return -EINVAL;
 	}
 
-	mpr121 = kzalloc(sizeof(struct mpr121_touchkey), GFP_KERNEL);
-	input_dev = input_allocate_device();
-	if (!mpr121 || !input_dev) {
-		dev_err(&client->dev, "Failed to allocate memory\n");
-		error = -ENOMEM;
-		goto err_free_mem;
-	}
+	mpr121 = devm_kzalloc(&client->dev, sizeof(*mpr121),
+			      GFP_KERNEL);
+	if (!mpr121)
+		return -ENOMEM;
+
+	input_dev = devm_input_allocate_device(&client->dev);
+	if (!input_dev)
+		return -ENOMEM;
 
 	mpr121->client = client;
 	mpr121->input_dev = input_dev;
@@ -243,44 +244,26 @@ static int mpr_touchkey_probe(struct i2c_client *client,
 	error = mpr121_phys_init(pdata, mpr121, client);
 	if (error) {
 		dev_err(&client->dev, "Failed to init register\n");
-		goto err_free_mem;
+		return error;
 	}
 
-	error = request_threaded_irq(client->irq, NULL,
+	error = devm_request_threaded_irq(&client->dev, client->irq, NULL,
 				     mpr_touchkey_interrupt,
 				     IRQF_TRIGGER_FALLING | IRQF_ONESHOT,
 				     client->dev.driver->name, mpr121);
 	if (error) {
 		dev_err(&client->dev, "Failed to register interrupt\n");
-		goto err_free_mem;
+		return error;
 	}
 
 	error = input_register_device(input_dev);
 	if (error)
-		goto err_free_irq;
+		return error;
 
 	i2c_set_clientdata(client, mpr121);
 	device_init_wakeup(&client->dev, pdata->wakeup);
 
 	return 0;
-
-err_free_irq:
-	free_irq(client->irq, mpr121);
-err_free_mem:
-	input_free_device(input_dev);
-	kfree(mpr121);
-	return error;
-}
-
-static int mpr_touchkey_remove(struct i2c_client *client)
-{
-	struct mpr121_touchkey *mpr121 = i2c_get_clientdata(client);
-
-	free_irq(client->irq, mpr121);
-	input_unregister_device(mpr121->input_dev);
-	kfree(mpr121);
-
-	return 0;
 }
 
 #ifdef CONFIG_PM_SLEEP
@@ -327,7 +310,6 @@ static struct i2c_driver mpr_touchkey_driver = {
 	},
 	.id_table	= mpr121_id,
 	.probe		= mpr_touchkey_probe,
-	.remove		= mpr_touchkey_remove,
 };
 
 module_i2c_driver(mpr_touchkey_driver);
diff --git a/drivers/input/keyboard/pxa27x_keypad.c b/drivers/input/keyboard/pxa27x_keypad.c
index 6ab3e7c9632..a90d6bdc499 100644
--- a/drivers/input/keyboard/pxa27x_keypad.c
+++ b/drivers/input/keyboard/pxa27x_keypad.c
@@ -741,37 +741,27 @@ static int pxa27x_keypad_probe(struct platform_device *pdev)
 		return -ENXIO;
 	}
 
-	keypad = kzalloc(sizeof(struct pxa27x_keypad), GFP_KERNEL);
-	input_dev = input_allocate_device();
-	if (!keypad || !input_dev) {
-		dev_err(&pdev->dev, "failed to allocate memory\n");
-		error = -ENOMEM;
-		goto failed_free;
-	}
+	keypad = devm_kzalloc(&pdev->dev, sizeof(*keypad),
+			      GFP_KERNEL);
+	if (!keypad)
+		return -ENOMEM;
+
+	input_dev = devm_input_allocate_device(&pdev->dev);
+	if (!input_dev)
+		return -ENOMEM;
 
 	keypad->pdata = pdata;
 	keypad->input_dev = input_dev;
 	keypad->irq = irq;
 
-	res = request_mem_region(res->start, resource_size(res), pdev->name);
-	if (res == NULL) {
-		dev_err(&pdev->dev, "failed to request I/O memory\n");
-		error = -EBUSY;
-		goto failed_free;
-	}
-
-	keypad->mmio_base = ioremap(res->start, resource_size(res));
-	if (keypad->mmio_base == NULL) {
-		dev_err(&pdev->dev, "failed to remap I/O memory\n");
-		error = -ENXIO;
-		goto failed_free_mem;
-	}
+	keypad->mmio_base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(keypad->mmio_base))
+		return PTR_ERR(keypad->mmio_base);
 
-	keypad->clk = clk_get(&pdev->dev, NULL);
+	keypad->clk = devm_clk_get(&pdev->dev, NULL);
 	if (IS_ERR(keypad->clk)) {
 		dev_err(&pdev->dev, "failed to get keypad clock\n");
-		error = PTR_ERR(keypad->clk);
-		goto failed_free_io;
+		return PTR_ERR(keypad->clk);
 	}
 
 	input_dev->name = pdev->name;
@@ -802,7 +792,7 @@ static int pxa27x_keypad_probe(struct platform_device *pdev)
 	}
 	if (error) {
 		dev_err(&pdev->dev, "failed to build keycode\n");
-		goto failed_put_clk;
+		return error;
 	}
 
 	keypad->row_shift = get_count_order(pdata->matrix_key_cols);
@@ -812,61 +802,26 @@ static int pxa27x_keypad_probe(struct platform_device *pdev)
 		input_dev->evbit[0] |= BIT_MASK(EV_REL);
 	}
 
-	error = request_irq(irq, pxa27x_keypad_irq_handler, 0,
-			    pdev->name, keypad);
+	error = devm_request_irq(&pdev->dev, irq, pxa27x_keypad_irq_handler,
+				 0, pdev->name, keypad);
 	if (error) {
 		dev_err(&pdev->dev, "failed to request IRQ\n");
-		goto failed_put_clk;
+		return error;
 	}
 
 	/* Register the input device */
 	error = input_register_device(input_dev);
 	if (error) {
 		dev_err(&pdev->dev, "failed to register input device\n");
-		goto failed_free_irq;
+		return error;
 	}
 
 	platform_set_drvdata(pdev, keypad);
 	device_init_wakeup(&pdev->dev, 1);
 
 	return 0;
-
-failed_free_irq:
-	free_irq(irq, keypad);
-failed_put_clk:
-	clk_put(keypad->clk);
-failed_free_io:
-	iounmap(keypad->mmio_base);
-failed_free_mem:
-	release_mem_region(res->start, resource_size(res));
-failed_free:
-	input_free_device(input_dev);
-	kfree(keypad);
-	return error;
 }
 
-static int pxa27x_keypad_remove(struct platform_device *pdev)
-{
-	struct pxa27x_keypad *keypad = platform_get_drvdata(pdev);
-	struct resource *res;
-
-	free_irq(keypad->irq, keypad);
-	clk_put(keypad->clk);
-
-	input_unregister_device(keypad->input_dev);
-	iounmap(keypad->mmio_base);
-
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	release_mem_region(res->start, resource_size(res));
-
-	kfree(keypad);
-
-	return 0;
-}
-
-/* work with hotplug and coldplug */
-MODULE_ALIAS("platform:pxa27x-keypad");
-
 #ifdef CONFIG_OF
 static const struct of_device_id pxa27x_keypad_dt_match[] = {
 	{ .compatible = "marvell,pxa27x-keypad" },
@@ -877,7 +832,6 @@ MODULE_DEVICE_TABLE(of, pxa27x_keypad_dt_match);
 
 static struct platform_driver pxa27x_keypad_driver = {
 	.probe		= pxa27x_keypad_probe,
-	.remove		= pxa27x_keypad_remove,
 	.driver		= {
 		.name	= "pxa27x-keypad",
 		.of_match_table = of_match_ptr(pxa27x_keypad_dt_match),
@@ -888,3 +842,5 @@ module_platform_driver(pxa27x_keypad_driver);
 
 MODULE_DESCRIPTION("PXA27x Keypad Controller Driver");
 MODULE_LICENSE("GPL");
+/* work with hotplug and coldplug */
+MODULE_ALIAS("platform:pxa27x-keypad");
diff --git a/drivers/input/misc/88pm860x_onkey.c b/drivers/input/misc/88pm860x_onkey.c
index cfdca6e9977..cc87443aa2e 100644
--- a/drivers/input/misc/88pm860x_onkey.c
+++ b/drivers/input/misc/88pm860x_onkey.c
@@ -112,8 +112,7 @@ static int pm860x_onkey_probe(struct platform_device *pdev)
 	return 0;
 }
 
-#ifdef CONFIG_PM_SLEEP
-static int pm860x_onkey_suspend(struct device *dev)
+static int __maybe_unused pm860x_onkey_suspend(struct device *dev)
 {
 	struct platform_device *pdev = to_platform_device(dev);
 	struct pm860x_chip *chip = dev_get_drvdata(pdev->dev.parent);
@@ -122,7 +121,7 @@ static int pm860x_onkey_suspend(struct device *dev)
 		chip->wakeup_flag |= 1 << PM8607_IRQ_ONKEY;
 	return 0;
 }
-static int pm860x_onkey_resume(struct device *dev)
+static int __maybe_unused pm860x_onkey_resume(struct device *dev)
 {
 	struct platform_device *pdev = to_platform_device(dev);
 	struct pm860x_chip *chip = dev_get_drvdata(pdev->dev.parent);
@@ -131,7 +130,6 @@ static int pm860x_onkey_resume(struct device *dev)
 		chip->wakeup_flag &= ~(1 << PM8607_IRQ_ONKEY);
 	return 0;
 }
-#endif
 
 static SIMPLE_DEV_PM_OPS(pm860x_onkey_pm_ops, pm860x_onkey_suspend, pm860x_onkey_resume);
 
diff --git a/drivers/input/misc/ad714x-i2c.c b/drivers/input/misc/ad714x-i2c.c
index e0f522516ef..189bdc8e91a 100644
--- a/drivers/input/misc/ad714x-i2c.c
+++ b/drivers/input/misc/ad714x-i2c.c
@@ -13,17 +13,15 @@
 #include <linux/pm.h>
 #include "ad714x.h"
 
-#ifdef CONFIG_PM_SLEEP
-static int ad714x_i2c_suspend(struct device *dev)
+static int __maybe_unused ad714x_i2c_suspend(struct device *dev)
 {
 	return ad714x_disable(i2c_get_clientdata(to_i2c_client(dev)));
 }
 
-static int ad714x_i2c_resume(struct device *dev)
+static int __maybe_unused ad714x_i2c_resume(struct device *dev)
 {
 	return ad714x_enable(i2c_get_clientdata(to_i2c_client(dev)));
 }
-#endif
 
 static SIMPLE_DEV_PM_OPS(ad714x_i2c_pm, ad714x_i2c_suspend, ad714x_i2c_resume);
 
diff --git a/drivers/input/misc/ad714x-spi.c b/drivers/input/misc/ad714x-spi.c
index 3a90b710e30..a79e50b58bf 100644
--- a/drivers/input/misc/ad714x-spi.c
+++ b/drivers/input/misc/ad714x-spi.c
@@ -16,17 +16,15 @@
 #define AD714x_SPI_CMD_PREFIX      0xE000   /* bits 15:11 */
 #define AD714x_SPI_READ            BIT(10)
 
-#ifdef CONFIG_PM_SLEEP
-static int ad714x_spi_suspend(struct device *dev)
+static int __maybe_unused ad714x_spi_suspend(struct device *dev)
 {
 	return ad714x_disable(spi_get_drvdata(to_spi_device(dev)));
 }
 
-static int ad714x_spi_resume(struct device *dev)
+static int __maybe_unused ad714x_spi_resume(struct device *dev)
 {
 	return ad714x_enable(spi_get_drvdata(to_spi_device(dev)));
 }
-#endif
 
 static SIMPLE_DEV_PM_OPS(ad714x_spi_pm, ad714x_spi_suspend, ad714x_spi_resume);
 
diff --git a/drivers/input/misc/adxl34x-i2c.c b/drivers/input/misc/adxl34x-i2c.c
index 416f47ddcc9..470bfd6f083 100644
--- a/drivers/input/misc/adxl34x-i2c.c
+++ b/drivers/input/misc/adxl34x-i2c.c
@@ -105,8 +105,7 @@ static int adxl34x_i2c_remove(struct i2c_client *client)
 	return adxl34x_remove(ac);
 }
 
-#ifdef CONFIG_PM_SLEEP
-static int adxl34x_i2c_suspend(struct device *dev)
+static int __maybe_unused adxl34x_i2c_suspend(struct device *dev)
 {
 	struct i2c_client *client = to_i2c_client(dev);
 	struct adxl34x *ac = i2c_get_clientdata(client);
@@ -116,7 +115,7 @@ static int adxl34x_i2c_suspend(struct device *dev)
 	return 0;
 }
 
-static int adxl34x_i2c_resume(struct device *dev)
+static int __maybe_unused adxl34x_i2c_resume(struct device *dev)
 {
 	struct i2c_client *client = to_i2c_client(dev);
 	struct adxl34x *ac = i2c_get_clientdata(client);
@@ -125,7 +124,6 @@ static int adxl34x_i2c_resume(struct device *dev)
 
 	return 0;
 }
-#endif
 
 static SIMPLE_DEV_PM_OPS(adxl34x_i2c_pm, adxl34x_i2c_suspend,
 			 adxl34x_i2c_resume);
diff --git a/drivers/input/misc/adxl34x-spi.c b/drivers/input/misc/adxl34x-spi.c
index 76dc0679d3b..da6e76b58da 100644
--- a/drivers/input/misc/adxl34x-spi.c
+++ b/drivers/input/misc/adxl34x-spi.c
@@ -94,8 +94,7 @@ static int adxl34x_spi_remove(struct spi_device *spi)
 	return adxl34x_remove(ac);
 }
 
-#ifdef CONFIG_PM_SLEEP
-static int adxl34x_spi_suspend(struct device *dev)
+static int __maybe_unused adxl34x_spi_suspend(struct device *dev)
 {
 	struct spi_device *spi = to_spi_device(dev);
 	struct adxl34x *ac = spi_get_drvdata(spi);
@@ -105,7 +104,7 @@ static int adxl34x_spi_suspend(struct device *dev)
 	return 0;
 }
 
-static int adxl34x_spi_resume(struct device *dev)
+static int __maybe_unused adxl34x_spi_resume(struct device *dev)
 {
 	struct spi_device *spi = to_spi_device(dev);
 	struct adxl34x *ac = spi_get_drvdata(spi);
@@ -114,7 +113,6 @@ static int adxl34x_spi_resume(struct device *dev)
 
 	return 0;
 }
-#endif
 
 static SIMPLE_DEV_PM_OPS(adxl34x_spi_pm, adxl34x_spi_suspend,
 			 adxl34x_spi_resume);
diff --git a/drivers/input/misc/drv260x.c b/drivers/input/misc/drv260x.c
index cab87f5ce6d..a364e109ca7 100644
--- a/drivers/input/misc/drv260x.c
+++ b/drivers/input/misc/drv260x.c
@@ -639,8 +639,7 @@ static int drv260x_probe(struct i2c_client *client,
 	return 0;
 }
 
-#ifdef CONFIG_PM_SLEEP
-static int drv260x_suspend(struct device *dev)
+static int __maybe_unused drv260x_suspend(struct device *dev)
 {
 	struct drv260x_data *haptics = dev_get_drvdata(dev);
 	int ret = 0;
@@ -672,7 +671,7 @@ out:
 	return ret;
 }
 
-static int drv260x_resume(struct device *dev)
+static int __maybe_unused drv260x_resume(struct device *dev)
 {
 	struct drv260x_data *haptics = dev_get_drvdata(dev);
 	int ret = 0;
@@ -702,7 +701,6 @@ out:
 	mutex_unlock(&haptics->input_dev->mutex);
 	return ret;
 }
-#endif
 
 static SIMPLE_DEV_PM_OPS(drv260x_pm_ops, drv260x_suspend, drv260x_resume);
 
diff --git a/drivers/input/misc/drv2667.c b/drivers/input/misc/drv2667.c
index 0f437581cc0..a021744e608 100644
--- a/drivers/input/misc/drv2667.c
+++ b/drivers/input/misc/drv2667.c
@@ -406,8 +406,7 @@ static int drv2667_probe(struct i2c_client *client,
 	return 0;
 }
 
-#ifdef CONFIG_PM_SLEEP
-static int drv2667_suspend(struct device *dev)
+static int __maybe_unused drv2667_suspend(struct device *dev)
 {
 	struct drv2667_data *haptics = dev_get_drvdata(dev);
 	int ret = 0;
@@ -436,7 +435,7 @@ out:
 	return ret;
 }
 
-static int drv2667_resume(struct device *dev)
+static int __maybe_unused drv2667_resume(struct device *dev)
 {
 	struct drv2667_data *haptics = dev_get_drvdata(dev);
 	int ret = 0;
@@ -464,7 +463,6 @@ out:
 	mutex_unlock(&haptics->input_dev->mutex);
 	return ret;
 }
-#endif
 
 static SIMPLE_DEV_PM_OPS(drv2667_pm_ops, drv2667_suspend, drv2667_resume);
 
diff --git a/drivers/input/misc/gp2ap002a00f.c b/drivers/input/misc/gp2ap002a00f.c
index de21e317da3..0ac176d66a6 100644
--- a/drivers/input/misc/gp2ap002a00f.c
+++ b/drivers/input/misc/gp2ap002a00f.c
@@ -225,8 +225,7 @@ static int gp2a_remove(struct i2c_client *client)
 	return 0;
 }
 
-#ifdef CONFIG_PM_SLEEP
-static int gp2a_suspend(struct device *dev)
+static int __maybe_unused gp2a_suspend(struct device *dev)
 {
 	struct i2c_client *client = to_i2c_client(dev);
 	struct gp2a_data *dt = i2c_get_clientdata(client);
@@ -244,7 +243,7 @@ static int gp2a_suspend(struct device *dev)
 	return retval;
 }
 
-static int gp2a_resume(struct device *dev)
+static int __maybe_unused gp2a_resume(struct device *dev)
 {
 	struct i2c_client *client = to_i2c_client(dev);
 	struct gp2a_data *dt = i2c_get_clientdata(client);
@@ -261,7 +260,6 @@ static int gp2a_resume(struct device *dev)
 
 	return retval;
 }
-#endif
 
 static SIMPLE_DEV_PM_OPS(gp2a_pm, gp2a_suspend, gp2a_resume);
 
diff --git a/drivers/input/misc/ims-pcu.c b/drivers/input/misc/ims-pcu.c
index afed8e2b2f9..ac1fa5f4458 100644
--- a/drivers/input/misc/ims-pcu.c
+++ b/drivers/input/misc/ims-pcu.c
@@ -1851,7 +1851,7 @@ static int ims_pcu_identify_type(struct ims_pcu *pcu, u8 *device_id)
 
 static int ims_pcu_init_application_mode(struct ims_pcu *pcu)
 {
-	static atomic_t device_no = ATOMIC_INIT(0);
+	static atomic_t device_no = ATOMIC_INIT(-1);
 
 	const struct ims_pcu_device_info *info;
 	int error;
@@ -1882,7 +1882,7 @@ static int ims_pcu_init_application_mode(struct ims_pcu *pcu)
 	}
 
 	/* Device appears to be operable, complete initialization */
-	pcu->device_no = atomic_inc_return(&device_no) - 1;
+	pcu->device_no = atomic_inc_return(&device_no);
 
 	/*
 	 * PCU-B devices, both GEN_1 and GEN_2 do not have OFN sensor
diff --git a/drivers/input/misc/kxtj9.c b/drivers/input/misc/kxtj9.c
index d708478bc5b..6e29349da53 100644
--- a/drivers/input/misc/kxtj9.c
+++ b/drivers/input/misc/kxtj9.c
@@ -615,8 +615,7 @@ static int kxtj9_remove(struct i2c_client *client)
 	return 0;
 }
 
-#ifdef CONFIG_PM_SLEEP
-static int kxtj9_suspend(struct device *dev)
+static int __maybe_unused kxtj9_suspend(struct device *dev)
 {
 	struct i2c_client *client = to_i2c_client(dev);
 	struct kxtj9_data *tj9 = i2c_get_clientdata(client);
@@ -631,7 +630,7 @@ static int kxtj9_suspend(struct device *dev)
 	return 0;
 }
 
-static int kxtj9_resume(struct device *dev)
+static int __maybe_unused kxtj9_resume(struct device *dev)
 {
 	struct i2c_client *client = to_i2c_client(dev);
 	struct kxtj9_data *tj9 = i2c_get_clientdata(client);
@@ -646,7 +645,6 @@ static int kxtj9_resume(struct device *dev)
 	mutex_unlock(&input_dev->mutex);
 	return retval;
 }
-#endif
 
 static SIMPLE_DEV_PM_OPS(kxtj9_pm_ops, kxtj9_suspend, kxtj9_resume);
 
diff --git a/drivers/input/misc/max77693-haptic.c b/drivers/input/misc/max77693-haptic.c
index 034093ee63b..39e930c10eb 100644
--- a/drivers/input/misc/max77693-haptic.c
+++ b/drivers/input/misc/max77693-haptic.c
@@ -309,8 +309,7 @@ static int max77693_haptic_probe(struct platform_device *pdev)
 	return 0;
 }
 
-#ifdef CONFIG_PM_SLEEP
-static int max77693_haptic_suspend(struct device *dev)
+static int __maybe_unused max77693_haptic_suspend(struct device *dev)
 {
 	struct platform_device *pdev = to_platform_device(dev);
 	struct max77693_haptic *haptic = platform_get_drvdata(pdev);
@@ -323,7 +322,7 @@ static int max77693_haptic_suspend(struct device *dev)
 	return 0;
 }
 
-static int max77693_haptic_resume(struct device *dev)
+static int __maybe_unused max77693_haptic_resume(struct device *dev)
 {
 	struct platform_device *pdev = to_platform_device(dev);
 	struct max77693_haptic *haptic = platform_get_drvdata(pdev);
@@ -335,7 +334,6 @@ static int max77693_haptic_resume(struct device *dev)
 
 	return 0;
 }
-#endif
 
 static SIMPLE_DEV_PM_OPS(max77693_haptic_pm_ops,
 			 max77693_haptic_suspend, max77693_haptic_resume);
diff --git a/drivers/input/misc/max8925_onkey.c b/drivers/input/misc/max8925_onkey.c
index 297e2a9169d..7c49b8d2389 100644
--- a/drivers/input/misc/max8925_onkey.c
+++ b/drivers/input/misc/max8925_onkey.c
@@ -133,8 +133,7 @@ static int max8925_onkey_probe(struct platform_device *pdev)
 	return 0;
 }
 
-#ifdef CONFIG_PM_SLEEP
-static int max8925_onkey_suspend(struct device *dev)
+static int __maybe_unused max8925_onkey_suspend(struct device *dev)
 {
 	struct platform_device *pdev = to_platform_device(dev);
 	struct max8925_onkey_info *info = platform_get_drvdata(pdev);
@@ -148,7 +147,7 @@ static int max8925_onkey_suspend(struct device *dev)
 	return 0;
 }
 
-static int max8925_onkey_resume(struct device *dev)
+static int __maybe_unused max8925_onkey_resume(struct device *dev)
 {
 	struct platform_device *pdev = to_platform_device(dev);
 	struct max8925_onkey_info *info = platform_get_drvdata(pdev);
@@ -161,7 +160,6 @@ static int max8925_onkey_resume(struct device *dev)
 
 	return 0;
 }
-#endif
 
 static SIMPLE_DEV_PM_OPS(max8925_onkey_pm_ops, max8925_onkey_suspend, max8925_onkey_resume);
 
diff --git a/drivers/input/misc/max8997_haptic.c b/drivers/input/misc/max8997_haptic.c
index 5b3154edf82..d0f68728133 100644
--- a/drivers/input/misc/max8997_haptic.c
+++ b/drivers/input/misc/max8997_haptic.c
@@ -378,8 +378,7 @@ static int max8997_haptic_remove(struct platform_device *pdev)
 	return 0;
 }
 
-#ifdef CONFIG_PM_SLEEP
-static int max8997_haptic_suspend(struct device *dev)
+static int __maybe_unused max8997_haptic_suspend(struct device *dev)
 {
 	struct platform_device *pdev = to_platform_device(dev);
 	struct max8997_haptic *chip = platform_get_drvdata(pdev);
@@ -388,7 +387,6 @@ static int max8997_haptic_suspend(struct device *dev)
 
 	return 0;
 }
-#endif
 
 static SIMPLE_DEV_PM_OPS(max8997_haptic_pm_ops, max8997_haptic_suspend, NULL);
 
diff --git a/drivers/input/misc/palmas-pwrbutton.c b/drivers/input/misc/palmas-pwrbutton.c
index 066c5ab632c..1f9b5ee9274 100644
--- a/drivers/input/misc/palmas-pwrbutton.c
+++ b/drivers/input/misc/palmas-pwrbutton.c
@@ -260,7 +260,6 @@ static int palmas_pwron_remove(struct platform_device *pdev)
 	return 0;
 }
 
-#ifdef CONFIG_PM_SLEEP
 /**
  * palmas_pwron_suspend() - suspend handler
  * @dev:	power button device
@@ -269,7 +268,7 @@ static int palmas_pwron_remove(struct platform_device *pdev)
  *
  * Return: 0
  */
-static int palmas_pwron_suspend(struct device *dev)
+static int __maybe_unused palmas_pwron_suspend(struct device *dev)
 {
 	struct platform_device *pdev = to_platform_device(dev);
 	struct palmas_pwron *pwron = platform_get_drvdata(pdev);
@@ -290,7 +289,7 @@ static int palmas_pwron_suspend(struct device *dev)
  *
  * Return: 0
  */
-static int palmas_pwron_resume(struct device *dev)
+static int __maybe_unused palmas_pwron_resume(struct device *dev)
 {
 	struct platform_device *pdev = to_platform_device(dev);
 	struct palmas_pwron *pwron = platform_get_drvdata(pdev);
@@ -300,7 +299,6 @@ static int palmas_pwron_resume(struct device *dev)
 
 	return 0;
 }
-#endif
 
 static SIMPLE_DEV_PM_OPS(palmas_pwron_pm,
 			 palmas_pwron_suspend, palmas_pwron_resume);
diff --git a/drivers/input/misc/pm8xxx-vibrator.c b/drivers/input/misc/pm8xxx-vibrator.c
index e9c77a95717..5113877153d 100644
--- a/drivers/input/misc/pm8xxx-vibrator.c
+++ b/drivers/input/misc/pm8xxx-vibrator.c
@@ -199,8 +199,7 @@ static int pm8xxx_vib_probe(struct platform_device *pdev)
 	return 0;
 }
 
-#ifdef CONFIG_PM_SLEEP
-static int pm8xxx_vib_suspend(struct device *dev)
+static int __maybe_unused pm8xxx_vib_suspend(struct device *dev)
 {
 	struct pm8xxx_vib *vib = dev_get_drvdata(dev);
 
@@ -209,7 +208,6 @@ static int pm8xxx_vib_suspend(struct device *dev)
 
 	return 0;
 }
-#endif
 
 static SIMPLE_DEV_PM_OPS(pm8xxx_vib_pm_ops, pm8xxx_vib_suspend, NULL);
 
diff --git a/drivers/input/misc/pmic8xxx-pwrkey.c b/drivers/input/misc/pmic8xxx-pwrkey.c
index cb799177cbd..c4ca20e6322 100644
--- a/drivers/input/misc/pmic8xxx-pwrkey.c
+++ b/drivers/input/misc/pmic8xxx-pwrkey.c
@@ -53,8 +53,7 @@ static irqreturn_t pwrkey_release_irq(int irq, void *_pwr)
 	return IRQ_HANDLED;
 }
 
-#ifdef CONFIG_PM_SLEEP
-static int pmic8xxx_pwrkey_suspend(struct device *dev)
+static int __maybe_unused pmic8xxx_pwrkey_suspend(struct device *dev)
 {
 	struct pmic8xxx_pwrkey *pwrkey = dev_get_drvdata(dev);
 
@@ -64,7 +63,7 @@ static int pmic8xxx_pwrkey_suspend(struct device *dev)
 	return 0;
 }
 
-static int pmic8xxx_pwrkey_resume(struct device *dev)
+static int __maybe_unused pmic8xxx_pwrkey_resume(struct device *dev)
 {
 	struct pmic8xxx_pwrkey *pwrkey = dev_get_drvdata(dev);
 
@@ -73,7 +72,6 @@ static int pmic8xxx_pwrkey_resume(struct device *dev)
 
 	return 0;
 }
-#endif
 
 static SIMPLE_DEV_PM_OPS(pm8xxx_pwr_key_pm_ops,
 		pmic8xxx_pwrkey_suspend, pmic8xxx_pwrkey_resume);
diff --git a/drivers/input/misc/pwm-beeper.c b/drivers/input/misc/pwm-beeper.c
index 294aa48bad5..a28ee70ff15 100644
--- a/drivers/input/misc/pwm-beeper.c
+++ b/drivers/input/misc/pwm-beeper.c
@@ -144,8 +144,7 @@ static int pwm_beeper_remove(struct platform_device *pdev)
 	return 0;
 }
 
-#ifdef CONFIG_PM_SLEEP
-static int pwm_beeper_suspend(struct device *dev)
+static int __maybe_unused pwm_beeper_suspend(struct device *dev)
 {
 	struct pwm_beeper *beeper = dev_get_drvdata(dev);
 
@@ -155,7 +154,7 @@ static int pwm_beeper_suspend(struct device *dev)
 	return 0;
 }
 
-static int pwm_beeper_resume(struct device *dev)
+static int __maybe_unused pwm_beeper_resume(struct device *dev)
 {
 	struct pwm_beeper *beeper = dev_get_drvdata(dev);
 
@@ -170,6 +169,7 @@ static int pwm_beeper_resume(struct device *dev)
 static SIMPLE_DEV_PM_OPS(pwm_beeper_pm_ops,
 			 pwm_beeper_suspend, pwm_beeper_resume);
 
+#ifdef CONFIG_PM_SLEEP
 #define PWM_BEEPER_PM_OPS (&pwm_beeper_pm_ops)
 #else
 #define PWM_BEEPER_PM_OPS NULL
diff --git a/drivers/input/misc/sirfsoc-onkey.c b/drivers/input/misc/sirfsoc-onkey.c
index 4faf9f8d124..9d5b89befe6 100644
--- a/drivers/input/misc/sirfsoc-onkey.c
+++ b/drivers/input/misc/sirfsoc-onkey.c
@@ -179,8 +179,7 @@ static int sirfsoc_pwrc_remove(struct platform_device *pdev)
 	return 0;
 }
 
-#ifdef CONFIG_PM_SLEEP
-static int sirfsoc_pwrc_resume(struct device *dev)
+static int __maybe_unused sirfsoc_pwrc_resume(struct device *dev)
 {
 	struct sirfsoc_pwrc_drvdata *pwrcdrv = dev_get_drvdata(dev);
 	struct input_dev *input = pwrcdrv->input;
@@ -196,7 +195,6 @@ static int sirfsoc_pwrc_resume(struct device *dev)
 
 	return 0;
 }
-#endif
 
 static SIMPLE_DEV_PM_OPS(sirfsoc_pwrc_pm_ops, NULL, sirfsoc_pwrc_resume);
 
diff --git a/drivers/input/misc/twl4030-vibra.c b/drivers/input/misc/twl4030-vibra.c
index ccd6dd18f8f..fc17b9592f5 100644
--- a/drivers/input/misc/twl4030-vibra.c
+++ b/drivers/input/misc/twl4030-vibra.c
@@ -157,8 +157,7 @@ static void twl4030_vibra_close(struct input_dev *input)
 }
 
 /*** Module ***/
-#ifdef CONFIG_PM_SLEEP
-static int twl4030_vibra_suspend(struct device *dev)
+static int __maybe_unused twl4030_vibra_suspend(struct device *dev)
 {
 	struct platform_device *pdev = to_platform_device(dev);
 	struct vibra_info *info = platform_get_drvdata(pdev);
@@ -169,12 +168,11 @@ static int twl4030_vibra_suspend(struct device *dev)
 	return 0;
 }
 
-static int twl4030_vibra_resume(struct device *dev)
+static int __maybe_unused twl4030_vibra_resume(struct device *dev)
 {
 	vibra_disable_leds();
 	return 0;
 }
-#endif
 
 static SIMPLE_DEV_PM_OPS(twl4030_vibra_pm_ops,
 			 twl4030_vibra_suspend, twl4030_vibra_resume);
diff --git a/drivers/input/misc/twl6040-vibra.c b/drivers/input/misc/twl6040-vibra.c
index 96e0e0c0ccb..0e0d094df2e 100644
--- a/drivers/input/misc/twl6040-vibra.c
+++ b/drivers/input/misc/twl6040-vibra.c
@@ -236,8 +236,7 @@ static void twl6040_vibra_close(struct input_dev *input)
 	mutex_unlock(&info->mutex);
 }
 
-#ifdef CONFIG_PM_SLEEP
-static int twl6040_vibra_suspend(struct device *dev)
+static int __maybe_unused twl6040_vibra_suspend(struct device *dev)
 {
 	struct platform_device *pdev = to_platform_device(dev);
 	struct vibra_info *info = platform_get_drvdata(pdev);
@@ -251,7 +250,6 @@ static int twl6040_vibra_suspend(struct device *dev)
 
 	return 0;
 }
-#endif
 
 static SIMPLE_DEV_PM_OPS(twl6040_vibra_pm_ops, twl6040_vibra_suspend, NULL);
 
diff --git a/drivers/input/mouse/Kconfig b/drivers/input/mouse/Kconfig
index 366fc7ad5eb..d8b46b0f2db 100644
--- a/drivers/input/mouse/Kconfig
+++ b/drivers/input/mouse/Kconfig
@@ -215,6 +215,36 @@ config MOUSE_CYAPA
 	  To compile this driver as a module, choose M here: the module will be
 	  called cyapa.
 
+config MOUSE_ELAN_I2C
+	tristate "ELAN I2C Touchpad support"
+	depends on I2C
+	help
+	  This driver adds support for Elan I2C/SMbus Trackpads.
+
+	  Say Y here if you have a ELAN I2C/SMbus Touchpad.
+
+	  To compile this driver as a module, choose M here: the module will be
+	  called elan_i2c.
+
+config MOUSE_ELAN_I2C_I2C
+	bool "Enable I2C support"
+	depends on MOUSE_ELAN_I2C
+	default y
+	help
+	   Say Y here if Elan Touchpad in your system is connected to
+	   a standard I2C controller.
+
+	   If unsure, say Y.
+
+config MOUSE_ELAN_I2C_SMBUS
+	bool "Enable SMbus support"
+	depends on MOUSE_ELAN_I2C
+	help
+	   Say Y here if Elan Touchpad in your system is connected to
+	   a SMbus adapter.
+
+	   If unsure, say Y.
+
 config MOUSE_INPORT
 	tristate "InPort/MS/ATIXL busmouse"
 	depends on ISA
diff --git a/drivers/input/mouse/Makefile b/drivers/input/mouse/Makefile
index dda507f8b3a..560003dcac3 100644
--- a/drivers/input/mouse/Makefile
+++ b/drivers/input/mouse/Makefile
@@ -9,6 +9,7 @@ obj-$(CONFIG_MOUSE_APPLETOUCH)		+= appletouch.o
 obj-$(CONFIG_MOUSE_ATARI)		+= atarimouse.o
 obj-$(CONFIG_MOUSE_BCM5974)		+= bcm5974.o
 obj-$(CONFIG_MOUSE_CYAPA)		+= cyapa.o
+obj-$(CONFIG_MOUSE_ELAN_I2C)		+= elan_i2c.o
 obj-$(CONFIG_MOUSE_GPIO)		+= gpio_mouse.o
 obj-$(CONFIG_MOUSE_INPORT)		+= inport.o
 obj-$(CONFIG_MOUSE_LOGIBM)		+= logibm.o
@@ -34,3 +35,7 @@ psmouse-$(CONFIG_MOUSE_PS2_SENTELIC)	+= sentelic.o
 psmouse-$(CONFIG_MOUSE_PS2_TRACKPOINT)	+= trackpoint.o
 psmouse-$(CONFIG_MOUSE_PS2_TOUCHKIT)	+= touchkit_ps2.o
 psmouse-$(CONFIG_MOUSE_PS2_CYPRESS)	+= cypress_ps2.o
+
+elan_i2c-objs := elan_i2c_core.o
+elan_i2c-$(CONFIG_MOUSE_ELAN_I2C_I2C)	+= elan_i2c_i2c.o
+elan_i2c-$(CONFIG_MOUSE_ELAN_I2C_SMBUS)	+= elan_i2c_smbus.o
diff --git a/drivers/input/mouse/cyapa.c b/drivers/input/mouse/cyapa.c
index b409c3d7d4f..1bece8cad46 100644
--- a/drivers/input/mouse/cyapa.c
+++ b/drivers/input/mouse/cyapa.c
@@ -6,7 +6,7 @@
  *   Daniel Kurtz <djkurtz@chromium.org>
  *   Benson Leung <bleung@chromium.org>
  *
- * Copyright (C) 2011-2012 Cypress Semiconductor, Inc.
+ * Copyright (C) 2011-2014 Cypress Semiconductor, Inc.
  * Copyright (C) 2011-2012 Google, Inc.
  *
  * This file is subject to the terms and conditions of the GNU General Public
@@ -206,7 +206,6 @@ struct cyapa {
 	struct i2c_client *client;
 	struct input_dev *input;
 	char phys[32];	/* device physical location */
-	int irq;
 	bool irq_wake;  /* irq wake is enabled */
 	bool smbus;
 
@@ -422,8 +421,8 @@ static ssize_t cyapa_read_block(struct cyapa *cyapa, u8 cmd_idx, u8 *values)
  */
 static int cyapa_get_state(struct cyapa *cyapa)
 {
-	int ret;
 	u8 status[BL_STATUS_SIZE];
+	int error;
 
 	cyapa->state = CYAPA_STATE_NO_DEVICE;
 
@@ -433,18 +432,18 @@ static int cyapa_get_state(struct cyapa *cyapa)
 	 * If the device is in operation mode, this will be the DATA regs.
 	 *
 	 */
-	ret = cyapa_i2c_reg_read_block(cyapa, BL_HEAD_OFFSET, BL_STATUS_SIZE,
-				       status);
+	error = cyapa_i2c_reg_read_block(cyapa, BL_HEAD_OFFSET, BL_STATUS_SIZE,
+				         status);
 
 	/*
 	 * On smbus systems in OP mode, the i2c_reg_read will fail with
 	 * -ETIMEDOUT.  In this case, try again using the smbus equivalent
 	 * command.  This should return a BL_HEAD indicating CYAPA_STATE_OP.
 	 */
-	if (cyapa->smbus && (ret == -ETIMEDOUT || ret == -ENXIO))
-		ret = cyapa_read_block(cyapa, CYAPA_CMD_BL_STATUS, status);
+	if (cyapa->smbus && (error == -ETIMEDOUT || error == -ENXIO))
+		error = cyapa_read_block(cyapa, CYAPA_CMD_BL_STATUS, status);
 
-	if (ret != BL_STATUS_SIZE)
+	if (error != BL_STATUS_SIZE)
 		goto error;
 
 	if ((status[REG_OP_STATUS] & OP_STATUS_SRC) == OP_STATUS_SRC) {
@@ -454,7 +453,7 @@ static int cyapa_get_state(struct cyapa *cyapa)
 			cyapa->state = CYAPA_STATE_OP;
 			break;
 		default:
-			ret = -EAGAIN;
+			error = -EAGAIN;
 			goto error;
 		}
 	} else {
@@ -468,7 +467,7 @@ static int cyapa_get_state(struct cyapa *cyapa)
 
 	return 0;
 error:
-	return (ret < 0) ? ret : -EAGAIN;
+	return (error < 0) ? error : -EAGAIN;
 }
 
 /*
@@ -487,31 +486,31 @@ error:
  */
 static int cyapa_poll_state(struct cyapa *cyapa, unsigned int timeout)
 {
-	int ret;
+	int error;
 	int tries = timeout / 100;
 
-	ret = cyapa_get_state(cyapa);
-	while ((ret || cyapa->state >= CYAPA_STATE_BL_BUSY) && tries--) {
+	error = cyapa_get_state(cyapa);
+	while ((error || cyapa->state >= CYAPA_STATE_BL_BUSY) && tries--) {
 		msleep(100);
-		ret = cyapa_get_state(cyapa);
+		error = cyapa_get_state(cyapa);
 	}
-	return (ret == -EAGAIN || ret == -ETIMEDOUT) ? -ETIMEDOUT : ret;
+	return (error == -EAGAIN || error == -ETIMEDOUT) ? -ETIMEDOUT : error;
 }
 
 static int cyapa_bl_deactivate(struct cyapa *cyapa)
 {
-	int ret;
+	int error;
 
-	ret = cyapa_i2c_reg_write_block(cyapa, 0, sizeof(bl_deactivate),
-					bl_deactivate);
-	if (ret < 0)
-		return ret;
+	error = cyapa_i2c_reg_write_block(cyapa, 0, sizeof(bl_deactivate),
+					  bl_deactivate);
+	if (error)
+		return error;
 
 	/* wait for bootloader to switch to idle state; should take < 100ms */
 	msleep(100);
-	ret = cyapa_poll_state(cyapa, 500);
-	if (ret < 0)
-		return ret;
+	error = cyapa_poll_state(cyapa, 500);
+	if (error)
+		return error;
 	if (cyapa->state != CYAPA_STATE_BL_IDLE)
 		return -EAGAIN;
 	return 0;
@@ -532,11 +531,11 @@ static int cyapa_bl_deactivate(struct cyapa *cyapa)
  */
 static int cyapa_bl_exit(struct cyapa *cyapa)
 {
-	int ret;
+	int error;
 
-	ret = cyapa_i2c_reg_write_block(cyapa, 0, sizeof(bl_exit), bl_exit);
-	if (ret < 0)
-		return ret;
+	error = cyapa_i2c_reg_write_block(cyapa, 0, sizeof(bl_exit), bl_exit);
+	if (error)
+		return error;
 
 	/*
 	 * Wait for bootloader to exit, and operation mode to start.
@@ -548,9 +547,9 @@ static int cyapa_bl_exit(struct cyapa *cyapa)
 	 * updated to new firmware, it must first calibrate its sensors, which
 	 * can take up to an additional 2 seconds.
 	 */
-	ret = cyapa_poll_state(cyapa, 2000);
-	if (ret < 0)
-		return ret;
+	error = cyapa_poll_state(cyapa, 2000);
+	if (error < 0)
+		return error;
 	if (cyapa->state != CYAPA_STATE_OP)
 		return -EAGAIN;
 
@@ -577,10 +576,13 @@ static int cyapa_set_power_mode(struct cyapa *cyapa, u8 power_mode)
 	power = ret & ~PWR_MODE_MASK;
 	power |= power_mode & PWR_MODE_MASK;
 	ret = cyapa_write_byte(cyapa, CYAPA_CMD_POWER_MODE, power);
-	if (ret < 0)
+	if (ret < 0) {
 		dev_err(dev, "failed to set power_mode 0x%02x err = %d\n",
 			power_mode, ret);
-	return ret;
+		return ret;
+	}
+
+	return 0;
 }
 
 static int cyapa_get_query_data(struct cyapa *cyapa)
@@ -637,28 +639,28 @@ static int cyapa_check_is_operational(struct cyapa *cyapa)
 {
 	struct device *dev = &cyapa->client->dev;
 	static const char unique_str[] = "CYTRA";
-	int ret;
+	int error;
 
-	ret = cyapa_poll_state(cyapa, 2000);
-	if (ret < 0)
-		return ret;
+	error = cyapa_poll_state(cyapa, 2000);
+	if (error)
+		return error;
 	switch (cyapa->state) {
 	case CYAPA_STATE_BL_ACTIVE:
-		ret = cyapa_bl_deactivate(cyapa);
-		if (ret)
-			return ret;
+		error = cyapa_bl_deactivate(cyapa);
+		if (error)
+			return error;
 
 	/* Fallthrough state */
 	case CYAPA_STATE_BL_IDLE:
-		ret = cyapa_bl_exit(cyapa);
-		if (ret)
-			return ret;
+		error = cyapa_bl_exit(cyapa);
+		if (error)
+			return error;
 
 	/* Fallthrough state */
 	case CYAPA_STATE_OP:
-		ret = cyapa_get_query_data(cyapa);
-		if (ret < 0)
-			return ret;
+		error = cyapa_get_query_data(cyapa);
+		if (error)
+			return error;
 
 		/* only support firmware protocol gen3 */
 		if (cyapa->gen != CYAPA_GEN3) {
@@ -753,18 +755,42 @@ static u8 cyapa_check_adapter_functionality(struct i2c_client *client)
 	return ret;
 }
 
+static int cyapa_open(struct input_dev *input)
+{
+	struct cyapa *cyapa = input_get_drvdata(input);
+	struct i2c_client *client = cyapa->client;
+	int error;
+
+	error = cyapa_set_power_mode(cyapa, PWR_MODE_FULL_ACTIVE);
+	if (error) {
+		dev_err(&client->dev, "set active power failed: %d\n", error);
+		return error;
+	}
+
+	enable_irq(client->irq);
+	return 0;
+}
+
+static void cyapa_close(struct input_dev *input)
+{
+	struct cyapa *cyapa = input_get_drvdata(input);
+
+	disable_irq(cyapa->client->irq);
+	cyapa_set_power_mode(cyapa, PWR_MODE_OFF);
+}
+
 static int cyapa_create_input_dev(struct cyapa *cyapa)
 {
 	struct device *dev = &cyapa->client->dev;
-	int ret;
 	struct input_dev *input;
+	int error;
 
 	if (!cyapa->physical_size_x || !cyapa->physical_size_y)
 		return -EINVAL;
 
-	input = cyapa->input = input_allocate_device();
+	input = devm_input_allocate_device(dev);
 	if (!input) {
-		dev_err(dev, "allocate memory for input device failed\n");
+		dev_err(dev, "failed to allocate memory for input device.\n");
 		return -ENOMEM;
 	}
 
@@ -772,14 +798,17 @@ static int cyapa_create_input_dev(struct cyapa *cyapa)
 	input->phys = cyapa->phys;
 	input->id.bustype = BUS_I2C;
 	input->id.version = 1;
-	input->id.product = 0;  /* means any product in eventcomm. */
+	input->id.product = 0;  /* Means any product in eventcomm. */
 	input->dev.parent = &cyapa->client->dev;
 
+	input->open = cyapa_open;
+	input->close = cyapa_close;
+
 	input_set_drvdata(input, cyapa);
 
 	__set_bit(EV_ABS, input->evbit);
 
-	/* finger position */
+	/* Finger position */
 	input_set_abs_params(input, ABS_MT_POSITION_X, 0, cyapa->max_abs_x, 0,
 			     0);
 	input_set_abs_params(input, ABS_MT_POSITION_Y, 0, cyapa->max_abs_y, 0,
@@ -801,35 +830,25 @@ static int cyapa_create_input_dev(struct cyapa *cyapa)
 	if (cyapa->btn_capability == CAPABILITY_LEFT_BTN_MASK)
 		__set_bit(INPUT_PROP_BUTTONPAD, input->propbit);
 
-	/* handle pointer emulation and unused slots in core */
-	ret = input_mt_init_slots(input, CYAPA_MAX_MT_SLOTS,
-				  INPUT_MT_POINTER | INPUT_MT_DROP_UNUSED);
-	if (ret) {
-		dev_err(dev, "allocate memory for MT slots failed, %d\n", ret);
-		goto err_free_device;
+	/* Handle pointer emulation and unused slots in core */
+	error = input_mt_init_slots(input, CYAPA_MAX_MT_SLOTS,
+				    INPUT_MT_POINTER | INPUT_MT_DROP_UNUSED);
+	if (error) {
+		dev_err(dev, "failed to initialize MT slots: %d\n", error);
+		return error;
 	}
 
-	/* Register the device in input subsystem */
-	ret = input_register_device(input);
-	if (ret) {
-		dev_err(dev, "input device register failed, %d\n", ret);
-		goto err_free_device;
-	}
+	cyapa->input = input;
 	return 0;
-
-err_free_device:
-	input_free_device(input);
-	cyapa->input = NULL;
-	return ret;
 }
 
 static int cyapa_probe(struct i2c_client *client,
 		       const struct i2c_device_id *dev_id)
 {
-	int ret;
-	u8 adapter_func;
-	struct cyapa *cyapa;
 	struct device *dev = &client->dev;
+	struct cyapa *cyapa;
+	u8 adapter_func;
+	int error;
 
 	adapter_func = cyapa_check_adapter_functionality(client);
 	if (adapter_func == CYAPA_ADAPTER_FUNC_NONE) {
@@ -837,11 +856,9 @@ static int cyapa_probe(struct i2c_client *client,
 		return -EIO;
 	}
 
-	cyapa = kzalloc(sizeof(struct cyapa), GFP_KERNEL);
-	if (!cyapa) {
-		dev_err(dev, "allocate memory for cyapa failed\n");
+	cyapa = devm_kzalloc(dev, sizeof(struct cyapa), GFP_KERNEL);
+	if (!cyapa)
 		return -ENOMEM;
-	}
 
 	cyapa->gen = CYAPA_GEN3;
 	cyapa->client = client;
@@ -852,67 +869,61 @@ static int cyapa_probe(struct i2c_client *client,
 	/* i2c isn't supported, use smbus */
 	if (adapter_func == CYAPA_ADAPTER_FUNC_SMBUS)
 		cyapa->smbus = true;
+
 	cyapa->state = CYAPA_STATE_NO_DEVICE;
-	ret = cyapa_check_is_operational(cyapa);
-	if (ret) {
-		dev_err(dev, "device not operational, %d\n", ret);
-		goto err_mem_free;
-	}
 
-	ret = cyapa_create_input_dev(cyapa);
-	if (ret) {
-		dev_err(dev, "create input_dev instance failed, %d\n", ret);
-		goto err_mem_free;
+	error = cyapa_check_is_operational(cyapa);
+	if (error) {
+		dev_err(dev, "device not operational, %d\n", error);
+		return error;
 	}
 
-	ret = cyapa_set_power_mode(cyapa, PWR_MODE_FULL_ACTIVE);
-	if (ret) {
-		dev_err(dev, "set active power failed, %d\n", ret);
-		goto err_unregister_device;
+	/* Power down the device until we need it */
+	error = cyapa_set_power_mode(cyapa, PWR_MODE_OFF);
+	if (error) {
+		dev_err(dev, "failed to quiesce the device: %d\n", error);
+		return error;
 	}
 
-	cyapa->irq = client->irq;
-	ret = request_threaded_irq(cyapa->irq,
-				   NULL,
-				   cyapa_irq,
-				   IRQF_TRIGGER_FALLING | IRQF_ONESHOT,
-				   "cyapa",
-				   cyapa);
-	if (ret) {
-		dev_err(dev, "IRQ request failed: %d\n, ", ret);
-		goto err_unregister_device;
+	error = cyapa_create_input_dev(cyapa);
+	if (error)
+		return error;
+
+	error = devm_request_threaded_irq(dev, client->irq,
+					  NULL, cyapa_irq,
+					  IRQF_TRIGGER_FALLING | IRQF_ONESHOT,
+					  "cyapa", cyapa);
+	if (error) {
+		dev_err(dev, "failed to request threaded irq: %d\n", error);
+		return error;
 	}
 
-	return 0;
+	/* Disable IRQ until the device is opened */
+	disable_irq(client->irq);
 
-err_unregister_device:
-	input_unregister_device(cyapa->input);
-err_mem_free:
-	kfree(cyapa);
-
-	return ret;
-}
-
-static int cyapa_remove(struct i2c_client *client)
-{
-	struct cyapa *cyapa = i2c_get_clientdata(client);
-
-	free_irq(cyapa->irq, cyapa);
-	input_unregister_device(cyapa->input);
-	cyapa_set_power_mode(cyapa, PWR_MODE_OFF);
-	kfree(cyapa);
+	/* Register the device in input subsystem */
+	error = input_register_device(cyapa->input);
+	if (error) {
+		dev_err(dev, "failed to register input device: %d\n", error);
+		return error;
+	}
 
 	return 0;
 }
 
-#ifdef CONFIG_PM_SLEEP
-static int cyapa_suspend(struct device *dev)
+static int __maybe_unused cyapa_suspend(struct device *dev)
 {
-	int ret;
+	struct i2c_client *client = to_i2c_client(dev);
+	struct cyapa *cyapa = i2c_get_clientdata(client);
+	struct input_dev *input = cyapa->input;
 	u8 power_mode;
-	struct cyapa *cyapa = dev_get_drvdata(dev);
+	int error;
 
-	disable_irq(cyapa->irq);
+	error = mutex_lock_interruptible(&input->mutex);
+	if (error)
+		return error;
+
+	disable_irq(client->irq);
 
 	/*
 	 * Set trackpad device to idle mode if wakeup is allowed,
@@ -920,31 +931,44 @@ static int cyapa_suspend(struct device *dev)
 	 */
 	power_mode = device_may_wakeup(dev) ? PWR_MODE_IDLE
 					    : PWR_MODE_OFF;
-	ret = cyapa_set_power_mode(cyapa, power_mode);
-	if (ret < 0)
-		dev_err(dev, "set power mode failed, %d\n", ret);
+	error = cyapa_set_power_mode(cyapa, power_mode);
+	if (error)
+		dev_err(dev, "resume: set power mode to %d failed: %d\n",
+			 power_mode, error);
 
 	if (device_may_wakeup(dev))
-		cyapa->irq_wake = (enable_irq_wake(cyapa->irq) == 0);
+		cyapa->irq_wake = (enable_irq_wake(client->irq) == 0);
+
+	mutex_unlock(&input->mutex);
+
 	return 0;
 }
 
-static int cyapa_resume(struct device *dev)
+static int __maybe_unused cyapa_resume(struct device *dev)
 {
-	int ret;
-	struct cyapa *cyapa = dev_get_drvdata(dev);
+	struct i2c_client *client = to_i2c_client(dev);
+	struct cyapa *cyapa = i2c_get_clientdata(client);
+	struct input_dev *input = cyapa->input;
+	u8 power_mode;
+	int error;
+
+	mutex_lock(&input->mutex);
 
 	if (device_may_wakeup(dev) && cyapa->irq_wake)
-		disable_irq_wake(cyapa->irq);
+		disable_irq_wake(client->irq);
+
+	power_mode = input->users ? PWR_MODE_FULL_ACTIVE : PWR_MODE_OFF;
+	error = cyapa_set_power_mode(cyapa, PWR_MODE_FULL_ACTIVE);
+	if (error)
+		dev_warn(dev, "resume: set power mode to %d failed: %d\n",
+			 power_mode, error);
+
+	enable_irq(client->irq);
 
-	ret = cyapa_set_power_mode(cyapa, PWR_MODE_FULL_ACTIVE);
-	if (ret)
-		dev_warn(dev, "resume active power failed, %d\n", ret);
+	mutex_unlock(&input->mutex);
 
-	enable_irq(cyapa->irq);
 	return 0;
 }
-#endif /* CONFIG_PM_SLEEP */
 
 static SIMPLE_DEV_PM_OPS(cyapa_pm_ops, cyapa_suspend, cyapa_resume);
 
@@ -962,7 +986,6 @@ static struct i2c_driver cyapa_driver = {
 	},
 
 	.probe = cyapa_probe,
-	.remove = cyapa_remove,
 	.id_table = cyapa_id_table,
 };
 
diff --git a/drivers/input/mouse/elan_i2c.h b/drivers/input/mouse/elan_i2c.h
new file mode 100644
index 00000000000..2e838626205
--- /dev/null
+++ b/drivers/input/mouse/elan_i2c.h
@@ -0,0 +1,86 @@
+/*
+ * Elan I2C/SMBus Touchpad driver
+ *
+ * Copyright (c) 2013 ELAN Microelectronics Corp.
+ *
+ * Author: 林政維 (Duson Lin) <dusonlin@emc.com.tw>
+ * Version: 1.5.5
+ *
+ * Based on cyapa driver:
+ * copyright (c) 2011-2012 Cypress Semiconductor, Inc.
+ * copyright (c) 2011-2012 Google, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ *
+ * Trademarks are the property of their respective owners.
+ */
+
+#ifndef _ELAN_I2C_H
+#define _ELAN_i2C_H
+
+#include <linux/types.h>
+
+#define ETP_ENABLE_ABS		0x0001
+#define ETP_ENABLE_CALIBRATE	0x0002
+#define ETP_DISABLE_CALIBRATE	0x0000
+#define ETP_DISABLE_POWER	0x0001
+
+/* IAP Firmware handling */
+#define ETP_FW_NAME		"elan_i2c.bin"
+#define ETP_IAP_START_ADDR	0x0083
+#define ETP_FW_IAP_PAGE_ERR	(1 << 5)
+#define ETP_FW_IAP_INTF_ERR	(1 << 4)
+#define ETP_FW_PAGE_SIZE	64
+#define ETP_FW_PAGE_COUNT	768
+#define ETP_FW_SIZE		(ETP_FW_PAGE_SIZE * ETP_FW_PAGE_COUNT)
+
+struct i2c_client;
+struct completion;
+
+enum tp_mode {
+	IAP_MODE = 1,
+	MAIN_MODE
+};
+
+struct elan_transport_ops {
+	int (*initialize)(struct i2c_client *client);
+	int (*sleep_control)(struct i2c_client *, bool sleep);
+	int (*power_control)(struct i2c_client *, bool enable);
+	int (*set_mode)(struct i2c_client *client, u8 mode);
+
+	int (*calibrate)(struct i2c_client *client);
+	int (*calibrate_result)(struct i2c_client *client, u8 *val);
+
+	int (*get_baseline_data)(struct i2c_client *client,
+				 bool max_baseliune, u8 *value);
+
+	int (*get_version)(struct i2c_client *client, bool iap, u8 *version);
+	int (*get_sm_version)(struct i2c_client *client, u8 *version);
+	int (*get_checksum)(struct i2c_client *client, bool iap, u16 *csum);
+	int (*get_product_id)(struct i2c_client *client, u8 *id);
+
+	int (*get_max)(struct i2c_client *client,
+		       unsigned int *max_x, unsigned int *max_y);
+	int (*get_resolution)(struct i2c_client *client,
+			      u8 *hw_res_x, u8 *hw_res_y);
+	int (*get_num_traces)(struct i2c_client *client,
+			      unsigned int *x_tracenum,
+			      unsigned int *y_tracenum);
+
+	int (*iap_get_mode)(struct i2c_client *client, enum tp_mode *mode);
+	int (*iap_reset)(struct i2c_client *client);
+
+	int (*prepare_fw_update)(struct i2c_client *client);
+	int (*write_fw_block)(struct i2c_client *client,
+			      const u8 *page, u16 checksum, int idx);
+	int (*finish_fw_update)(struct i2c_client *client,
+				struct completion *reset_done);
+
+	int (*get_report)(struct i2c_client *client, u8 *report);
+};
+
+extern const struct elan_transport_ops elan_smbus_ops, elan_i2c_ops;
+
+#endif /* _ELAN_I2C_H */
diff --git a/drivers/input/mouse/elan_i2c_core.c b/drivers/input/mouse/elan_i2c_core.c
new file mode 100644
index 00000000000..0cb2be48d53
--- /dev/null
+++ b/drivers/input/mouse/elan_i2c_core.c
@@ -0,0 +1,1137 @@
+/*
+ * Elan I2C/SMBus Touchpad driver
+ *
+ * Copyright (c) 2013 ELAN Microelectronics Corp.
+ *
+ * Author: 林政維 (Duson Lin) <dusonlin@emc.com.tw>
+ * Version: 1.5.5
+ *
+ * Based on cyapa driver:
+ * copyright (c) 2011-2012 Cypress Semiconductor, Inc.
+ * copyright (c) 2011-2012 Google, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ *
+ * Trademarks are the property of their respective owners.
+ */
+
+#include <linux/acpi.h>
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/firmware.h>
+#include <linux/i2c.h>
+#include <linux/init.h>
+#include <linux/input/mt.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/input.h>
+#include <linux/uaccess.h>
+#include <linux/jiffies.h>
+#include <linux/completion.h>
+#include <linux/of.h>
+#include <linux/regulator/consumer.h>
+#include <asm/unaligned.h>
+
+#include "elan_i2c.h"
+
+#define DRIVER_NAME		"elan_i2c"
+#define ELAN_DRIVER_VERSION	"1.5.5"
+#define ETP_PRESSURE_OFFSET	25
+#define ETP_MAX_PRESSURE	255
+#define ETP_FWIDTH_REDUCE	90
+#define ETP_FINGER_WIDTH	15
+#define ETP_RETRY_COUNT		3
+
+#define ETP_MAX_FINGERS		5
+#define ETP_FINGER_DATA_LEN	5
+#define ETP_REPORT_ID		0x5D
+#define ETP_REPORT_ID_OFFSET	2
+#define ETP_TOUCH_INFO_OFFSET	3
+#define ETP_FINGER_DATA_OFFSET	4
+#define ETP_MAX_REPORT_LEN	34
+
+/* The main device structure */
+struct elan_tp_data {
+	struct i2c_client	*client;
+	struct input_dev	*input;
+	struct regulator	*vcc;
+
+	const struct elan_transport_ops *ops;
+
+	/* for fw update */
+	struct completion	fw_completion;
+	bool			in_fw_update;
+
+	struct mutex		sysfs_mutex;
+
+	unsigned int		max_x;
+	unsigned int		max_y;
+	unsigned int		width_x;
+	unsigned int		width_y;
+	unsigned int		x_res;
+	unsigned int		y_res;
+
+	u8			product_id;
+	u8			fw_version;
+	u8			sm_version;
+	u8			iap_version;
+	u16			fw_checksum;
+
+	u8			mode;
+
+	bool			irq_wake;
+
+	u8			min_baseline;
+	u8			max_baseline;
+	bool			baseline_ready;
+};
+
+static int elan_enable_power(struct elan_tp_data *data)
+{
+	int repeat = ETP_RETRY_COUNT;
+	int error;
+
+	error = regulator_enable(data->vcc);
+	if (error) {
+		dev_err(&data->client->dev,
+			"Failed to enable regulator: %d\n", error);
+		return error;
+	}
+
+	do {
+		error = data->ops->power_control(data->client, true);
+		if (error >= 0)
+			return 0;
+
+		msleep(30);
+	} while (--repeat > 0);
+
+	return error;
+}
+
+static int elan_disable_power(struct elan_tp_data *data)
+{
+	int repeat = ETP_RETRY_COUNT;
+	int error;
+
+	do {
+		error = data->ops->power_control(data->client, false);
+		if (!error) {
+			error = regulator_disable(data->vcc);
+			if (error) {
+				dev_err(&data->client->dev,
+					"Failed to disable regulator: %d\n",
+					error);
+				/* Attempt to power the chip back up */
+				data->ops->power_control(data->client, true);
+				break;
+			}
+
+			return 0;
+		}
+
+		msleep(30);
+	} while (--repeat > 0);
+
+	return error;
+}
+
+static int elan_sleep(struct elan_tp_data *data)
+{
+	int repeat = ETP_RETRY_COUNT;
+	int error;
+
+	do {
+		error = data->ops->sleep_control(data->client, true);
+		if (!error)
+			return 0;
+
+		msleep(30);
+	} while (--repeat > 0);
+
+	return error;
+}
+
+static int __elan_initialize(struct elan_tp_data *data)
+{
+	struct i2c_client *client = data->client;
+	int error;
+
+	error = data->ops->initialize(client);
+	if (error) {
+		dev_err(&client->dev, "device initialize failed: %d\n", error);
+		return error;
+	}
+
+	data->mode |= ETP_ENABLE_ABS;
+	error = data->ops->set_mode(client, data->mode);
+	if (error) {
+		dev_err(&client->dev,
+			"failed to switch to absolute mode: %d\n", error);
+		return error;
+	}
+
+	error = data->ops->sleep_control(client, false);
+	if (error) {
+		dev_err(&client->dev,
+			"failed to wake device up: %d\n", error);
+		return error;
+	}
+
+	return 0;
+}
+
+static int elan_initialize(struct elan_tp_data *data)
+{
+	int repeat = ETP_RETRY_COUNT;
+	int error;
+
+	do {
+		error = __elan_initialize(data);
+		if (!error)
+			return 0;
+
+		repeat--;
+		msleep(30);
+	} while (--repeat > 0);
+
+	return error;
+}
+
+static int elan_query_device_info(struct elan_tp_data *data)
+{
+	int error;
+
+	error = data->ops->get_product_id(data->client, &data->product_id);
+	if (error)
+		return error;
+
+	error = data->ops->get_version(data->client, false, &data->fw_version);
+	if (error)
+		return error;
+
+	error = data->ops->get_checksum(data->client, false,
+					&data->fw_checksum);
+	if (error)
+		return error;
+
+	error = data->ops->get_sm_version(data->client, &data->sm_version);
+	if (error)
+		return error;
+
+	error = data->ops->get_version(data->client, true, &data->iap_version);
+	if (error)
+		return error;
+
+	return 0;
+}
+
+static unsigned int elan_convert_resolution(u8 val)
+{
+	/*
+	 * (value from firmware) * 10 + 790 = dpi
+	 *
+	 * We also have to convert dpi to dots/mm (*10/254 to avoid floating
+	 * point).
+	 */
+
+	return ((int)(char)val * 10 + 790) * 10 / 254;
+}
+
+static int elan_query_device_parameters(struct elan_tp_data *data)
+{
+	unsigned int x_traces, y_traces;
+	u8 hw_x_res, hw_y_res;
+	int error;
+
+	error = data->ops->get_max(data->client, &data->max_x, &data->max_y);
+	if (error)
+		return error;
+
+	error = data->ops->get_num_traces(data->client, &x_traces, &y_traces);
+	if (error)
+		return error;
+
+	data->width_x = data->max_x / x_traces;
+	data->width_y = data->max_y / y_traces;
+
+	error = data->ops->get_resolution(data->client, &hw_x_res, &hw_y_res);
+	if (error)
+		return error;
+
+	data->x_res = elan_convert_resolution(hw_x_res);
+	data->y_res = elan_convert_resolution(hw_y_res);
+
+	return 0;
+}
+
+/*
+ **********************************************************
+ * IAP firmware updater related routines
+ **********************************************************
+ */
+static int elan_write_fw_block(struct elan_tp_data *data,
+			       const u8 *page, u16 checksum, int idx)
+{
+	int retry = ETP_RETRY_COUNT;
+	int error;
+
+	do {
+		error = data->ops->write_fw_block(data->client,
+						  page, checksum, idx);
+		if (!error)
+			return 0;
+
+		dev_dbg(&data->client->dev,
+			"IAP retrying page %d (error: %d)\n", idx, error);
+	} while (--retry > 0);
+
+	return error;
+}
+
+static int __elan_update_firmware(struct elan_tp_data *data,
+				  const struct firmware *fw)
+{
+	struct i2c_client *client = data->client;
+	struct device *dev = &client->dev;
+	int i, j;
+	int error;
+	u16 iap_start_addr;
+	u16 boot_page_count;
+	u16 sw_checksum = 0, fw_checksum = 0;
+
+	error = data->ops->prepare_fw_update(client);
+	if (error)
+		return error;
+
+	iap_start_addr = get_unaligned_le16(&fw->data[ETP_IAP_START_ADDR * 2]);
+
+	boot_page_count = (iap_start_addr * 2) / ETP_FW_PAGE_SIZE;
+	for (i = boot_page_count; i < ETP_FW_PAGE_COUNT; i++) {
+		u16 checksum = 0;
+		const u8 *page = &fw->data[i * ETP_FW_PAGE_SIZE];
+
+		for (j = 0; j < ETP_FW_PAGE_SIZE; j += 2)
+			checksum += ((page[j + 1] << 8) | page[j]);
+
+		error = elan_write_fw_block(data, page, checksum, i);
+		if (error) {
+			dev_err(dev, "write page %d fail: %d\n", i, error);
+			return error;
+		}
+
+		sw_checksum += checksum;
+	}
+
+	/* Wait WDT reset and power on reset */
+	msleep(600);
+
+	error = data->ops->finish_fw_update(client, &data->fw_completion);
+	if (error)
+		return error;
+
+	error = data->ops->get_checksum(client, true, &fw_checksum);
+	if (error)
+		return error;
+
+	if (sw_checksum != fw_checksum) {
+		dev_err(dev, "checksum diff sw=[%04X], fw=[%04X]\n",
+			sw_checksum, fw_checksum);
+		return -EIO;
+	}
+
+	return 0;
+}
+
+static int elan_update_firmware(struct elan_tp_data *data,
+				const struct firmware *fw)
+{
+	struct i2c_client *client = data->client;
+	int retval;
+
+	dev_dbg(&client->dev, "Starting firmware update....\n");
+
+	disable_irq(client->irq);
+	data->in_fw_update = true;
+
+	retval = __elan_update_firmware(data, fw);
+	if (retval) {
+		dev_err(&client->dev, "firmware update failed: %d\n", retval);
+		data->ops->iap_reset(client);
+	} else {
+		/* Reinitialize TP after fw is updated */
+		elan_initialize(data);
+		elan_query_device_info(data);
+	}
+
+	data->in_fw_update = false;
+	enable_irq(client->irq);
+
+	return retval;
+}
+
+/*
+ *******************************************************************
+ * SYSFS attributes
+ *******************************************************************
+ */
+static ssize_t elan_sysfs_read_fw_checksum(struct device *dev,
+					   struct device_attribute *attr,
+					   char *buf)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct elan_tp_data *data = i2c_get_clientdata(client);
+
+	return sprintf(buf, "0x%04x\n", data->fw_checksum);
+}
+
+static ssize_t elan_sysfs_read_product_id(struct device *dev,
+					 struct device_attribute *attr,
+					 char *buf)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct elan_tp_data *data = i2c_get_clientdata(client);
+
+	return sprintf(buf, "%d.0\n", data->product_id);
+}
+
+static ssize_t elan_sysfs_read_fw_ver(struct device *dev,
+				      struct device_attribute *attr,
+				      char *buf)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct elan_tp_data *data = i2c_get_clientdata(client);
+
+	return sprintf(buf, "%d.0\n", data->fw_version);
+}
+
+static ssize_t elan_sysfs_read_sm_ver(struct device *dev,
+				      struct device_attribute *attr,
+				      char *buf)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct elan_tp_data *data = i2c_get_clientdata(client);
+
+	return sprintf(buf, "%d.0\n", data->sm_version);
+}
+
+static ssize_t elan_sysfs_read_iap_ver(struct device *dev,
+				       struct device_attribute *attr,
+				       char *buf)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct elan_tp_data *data = i2c_get_clientdata(client);
+
+	return sprintf(buf, "%d.0\n", data->iap_version);
+}
+
+static ssize_t elan_sysfs_update_fw(struct device *dev,
+				    struct device_attribute *attr,
+				    const char *buf, size_t count)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct elan_tp_data *data = i2c_get_clientdata(client);
+	const struct firmware *fw;
+	int error;
+
+	error = request_firmware(&fw, ETP_FW_NAME, dev);
+	if (error) {
+		dev_err(dev, "cannot load firmware %s: %d\n",
+			ETP_FW_NAME, error);
+		return error;
+	}
+
+	/* Firmware must be exactly PAGE_NUM * PAGE_SIZE bytes */
+	if (fw->size != ETP_FW_SIZE) {
+		dev_err(dev, "invalid firmware size = %zu, expected %d.\n",
+			fw->size, ETP_FW_SIZE);
+		error = -EBADF;
+		goto out_release_fw;
+	}
+
+	error = mutex_lock_interruptible(&data->sysfs_mutex);
+	if (error)
+		goto out_release_fw;
+
+	error = elan_update_firmware(data, fw);
+
+	mutex_unlock(&data->sysfs_mutex);
+
+out_release_fw:
+	release_firmware(fw);
+	return error ?: count;
+}
+
+static ssize_t calibrate_store(struct device *dev,
+			       struct device_attribute *attr,
+			       const char *buf, size_t count)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct elan_tp_data *data = i2c_get_clientdata(client);
+	int tries = 20;
+	int retval;
+	int error;
+	u8 val[3];
+
+	retval = mutex_lock_interruptible(&data->sysfs_mutex);
+	if (retval)
+		return retval;
+
+	disable_irq(client->irq);
+
+	data->mode |= ETP_ENABLE_CALIBRATE;
+	retval = data->ops->set_mode(client, data->mode);
+	if (retval) {
+		dev_err(dev, "failed to enable calibration mode: %d\n",
+			retval);
+		goto out;
+	}
+
+	retval = data->ops->calibrate(client);
+	if (retval) {
+		dev_err(dev, "failed to start calibration: %d\n",
+			retval);
+		goto out_disable_calibrate;
+	}
+
+	val[0] = 0xff;
+	do {
+		/* Wait 250ms before checking if calibration has completed. */
+		msleep(250);
+
+		retval = data->ops->calibrate_result(client, val);
+		if (retval)
+			dev_err(dev, "failed to check calibration result: %d\n",
+				retval);
+		else if (val[0] == 0)
+			break; /* calibration done */
+
+	} while (--tries);
+
+	if (tries == 0) {
+		dev_err(dev, "failed to calibrate. Timeout.\n");
+		retval = -ETIMEDOUT;
+	}
+
+out_disable_calibrate:
+	data->mode &= ~ETP_ENABLE_CALIBRATE;
+	error = data->ops->set_mode(data->client, data->mode);
+	if (error) {
+		dev_err(dev, "failed to disable calibration mode: %d\n",
+			error);
+		if (!retval)
+			retval = error;
+	}
+out:
+	enable_irq(client->irq);
+	mutex_unlock(&data->sysfs_mutex);
+	return retval ?: count;
+}
+
+static ssize_t elan_sysfs_read_mode(struct device *dev,
+				    struct device_attribute *attr,
+				    char *buf)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct elan_tp_data *data = i2c_get_clientdata(client);
+	int error;
+	enum tp_mode mode;
+
+	error = mutex_lock_interruptible(&data->sysfs_mutex);
+	if (error)
+		return error;
+
+	error = data->ops->iap_get_mode(data->client, &mode);
+
+	mutex_unlock(&data->sysfs_mutex);
+
+	if (error)
+		return error;
+
+	return sprintf(buf, "%d\n", (int)mode);
+}
+
+static DEVICE_ATTR(product_id, S_IRUGO, elan_sysfs_read_product_id, NULL);
+static DEVICE_ATTR(firmware_version, S_IRUGO, elan_sysfs_read_fw_ver, NULL);
+static DEVICE_ATTR(sample_version, S_IRUGO, elan_sysfs_read_sm_ver, NULL);
+static DEVICE_ATTR(iap_version, S_IRUGO, elan_sysfs_read_iap_ver, NULL);
+static DEVICE_ATTR(fw_checksum, S_IRUGO, elan_sysfs_read_fw_checksum, NULL);
+static DEVICE_ATTR(mode, S_IRUGO, elan_sysfs_read_mode, NULL);
+static DEVICE_ATTR(update_fw, S_IWUSR, NULL, elan_sysfs_update_fw);
+
+static DEVICE_ATTR_WO(calibrate);
+
+static struct attribute *elan_sysfs_entries[] = {
+	&dev_attr_product_id.attr,
+	&dev_attr_firmware_version.attr,
+	&dev_attr_sample_version.attr,
+	&dev_attr_iap_version.attr,
+	&dev_attr_fw_checksum.attr,
+	&dev_attr_calibrate.attr,
+	&dev_attr_mode.attr,
+	&dev_attr_update_fw.attr,
+	NULL,
+};
+
+static const struct attribute_group elan_sysfs_group = {
+	.attrs = elan_sysfs_entries,
+};
+
+static ssize_t acquire_store(struct device *dev, struct device_attribute *attr,
+			     const char *buf, size_t count)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct elan_tp_data *data = i2c_get_clientdata(client);
+	int error;
+	int retval;
+
+	retval = mutex_lock_interruptible(&data->sysfs_mutex);
+	if (retval)
+		return retval;
+
+	disable_irq(client->irq);
+
+	data->baseline_ready = false;
+
+	data->mode |= ETP_ENABLE_CALIBRATE;
+	retval = data->ops->set_mode(data->client, data->mode);
+	if (retval) {
+		dev_err(dev, "Failed to enable calibration mode to get baseline: %d\n",
+			retval);
+		goto out;
+	}
+
+	msleep(250);
+
+	retval = data->ops->get_baseline_data(data->client, true,
+					      &data->max_baseline);
+	if (retval) {
+		dev_err(dev, "Failed to read max baseline form device: %d\n",
+			retval);
+		goto out_disable_calibrate;
+	}
+
+	retval = data->ops->get_baseline_data(data->client, false,
+					      &data->min_baseline);
+	if (retval) {
+		dev_err(dev, "Failed to read min baseline form device: %d\n",
+			retval);
+		goto out_disable_calibrate;
+	}
+
+	data->baseline_ready = true;
+
+out_disable_calibrate:
+	data->mode &= ~ETP_ENABLE_CALIBRATE;
+	error = data->ops->set_mode(data->client, data->mode);
+	if (error) {
+		dev_err(dev, "Failed to disable calibration mode after acquiring baseline: %d\n",
+			error);
+		if (!retval)
+			retval = error;
+	}
+out:
+	enable_irq(client->irq);
+	mutex_unlock(&data->sysfs_mutex);
+	return retval ?: count;
+}
+
+static ssize_t min_show(struct device *dev,
+			struct device_attribute *attr, char *buf)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct elan_tp_data *data = i2c_get_clientdata(client);
+	int retval;
+
+	retval = mutex_lock_interruptible(&data->sysfs_mutex);
+	if (retval)
+		return retval;
+
+	if (!data->baseline_ready) {
+		retval = -ENODATA;
+		goto out;
+	}
+
+	retval = snprintf(buf, PAGE_SIZE, "%d", data->min_baseline);
+
+out:
+	mutex_unlock(&data->sysfs_mutex);
+	return retval;
+}
+
+static ssize_t max_show(struct device *dev,
+			struct device_attribute *attr, char *buf)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct elan_tp_data *data = i2c_get_clientdata(client);
+	int retval;
+
+	retval = mutex_lock_interruptible(&data->sysfs_mutex);
+	if (retval)
+		return retval;
+
+	if (!data->baseline_ready) {
+		retval = -ENODATA;
+		goto out;
+	}
+
+	retval = snprintf(buf, PAGE_SIZE, "%d", data->max_baseline);
+
+out:
+	mutex_unlock(&data->sysfs_mutex);
+	return retval;
+}
+
+
+static DEVICE_ATTR_WO(acquire);
+static DEVICE_ATTR_RO(min);
+static DEVICE_ATTR_RO(max);
+
+static struct attribute *elan_baseline_sysfs_entries[] = {
+	&dev_attr_acquire.attr,
+	&dev_attr_min.attr,
+	&dev_attr_max.attr,
+	NULL,
+};
+
+static const struct attribute_group elan_baseline_sysfs_group = {
+	.name = "baseline",
+	.attrs = elan_baseline_sysfs_entries,
+};
+
+static const struct attribute_group *elan_sysfs_groups[] = {
+	&elan_sysfs_group,
+	&elan_baseline_sysfs_group,
+	NULL
+};
+
+/*
+ ******************************************************************
+ * Elan isr functions
+ ******************************************************************
+ */
+static void elan_report_contact(struct elan_tp_data *data,
+				int contact_num, bool contact_valid,
+				u8 *finger_data)
+{
+	struct input_dev *input = data->input;
+	unsigned int pos_x, pos_y;
+	unsigned int pressure, mk_x, mk_y;
+	unsigned int area_x, area_y, major, minor, new_pressure;
+
+
+	if (contact_valid) {
+		pos_x = ((finger_data[0] & 0xf0) << 4) |
+						finger_data[1];
+		pos_y = ((finger_data[0] & 0x0f) << 8) |
+						finger_data[2];
+		mk_x = (finger_data[3] & 0x0f);
+		mk_y = (finger_data[3] >> 4);
+		pressure = finger_data[4];
+
+		if (pos_x > data->max_x || pos_y > data->max_y) {
+			dev_dbg(input->dev.parent,
+				"[%d] x=%d y=%d over max (%d, %d)",
+				contact_num, pos_x, pos_y,
+				data->max_x, data->max_y);
+			return;
+		}
+
+		/*
+		 * To avoid treating large finger as palm, let's reduce the
+		 * width x and y per trace.
+		 */
+		area_x = mk_x * (data->width_x - ETP_FWIDTH_REDUCE);
+		area_y = mk_y * (data->width_y - ETP_FWIDTH_REDUCE);
+
+		major = max(area_x, area_y);
+		minor = min(area_x, area_y);
+
+		new_pressure = pressure + ETP_PRESSURE_OFFSET;
+		if (new_pressure > ETP_MAX_PRESSURE)
+			new_pressure = ETP_MAX_PRESSURE;
+
+		input_mt_slot(input, contact_num);
+		input_mt_report_slot_state(input, MT_TOOL_FINGER, true);
+		input_report_abs(input, ABS_MT_POSITION_X, pos_x);
+		input_report_abs(input, ABS_MT_POSITION_Y, data->max_y - pos_y);
+		input_report_abs(input, ABS_MT_PRESSURE, new_pressure);
+		input_report_abs(input, ABS_TOOL_WIDTH, mk_x);
+		input_report_abs(input, ABS_MT_TOUCH_MAJOR, major);
+		input_report_abs(input, ABS_MT_TOUCH_MINOR, minor);
+	} else {
+		input_mt_slot(input, contact_num);
+		input_mt_report_slot_state(input, MT_TOOL_FINGER, false);
+	}
+}
+
+static void elan_report_absolute(struct elan_tp_data *data, u8 *packet)
+{
+	struct input_dev *input = data->input;
+	u8 *finger_data = &packet[ETP_FINGER_DATA_OFFSET];
+	int i;
+	u8 tp_info = packet[ETP_TOUCH_INFO_OFFSET];
+	bool contact_valid;
+
+	for (i = 0; i < ETP_MAX_FINGERS; i++) {
+		contact_valid = tp_info & (1U << (3 + i));
+		elan_report_contact(data, i, contact_valid, finger_data);
+
+		if (contact_valid)
+			finger_data += ETP_FINGER_DATA_LEN;
+	}
+
+	input_report_key(input, BTN_LEFT, tp_info & 0x01);
+	input_mt_report_pointer_emulation(input, true);
+	input_sync(input);
+}
+
+static irqreturn_t elan_isr(int irq, void *dev_id)
+{
+	struct elan_tp_data *data = dev_id;
+	struct device *dev = &data->client->dev;
+	int error;
+	u8 report[ETP_MAX_REPORT_LEN];
+
+	/*
+	 * When device is connected to i2c bus, when all IAP page writes
+	 * complete, the driver will receive interrupt and must read
+	 * 0000 to confirm that IAP is finished.
+	*/
+	if (data->in_fw_update) {
+		complete(&data->fw_completion);
+		goto out;
+	}
+
+	error = data->ops->get_report(data->client, report);
+	if (error)
+		goto out;
+
+	if (report[ETP_REPORT_ID_OFFSET] != ETP_REPORT_ID)
+		dev_err(dev, "invalid report id data (%x)\n",
+			report[ETP_REPORT_ID_OFFSET]);
+	else
+		elan_report_absolute(data, report);
+
+out:
+	return IRQ_HANDLED;
+}
+
+/*
+ ******************************************************************
+ * Elan initialization functions
+ ******************************************************************
+ */
+static int elan_setup_input_device(struct elan_tp_data *data)
+{
+	struct device *dev = &data->client->dev;
+	struct input_dev *input;
+	unsigned int max_width = max(data->width_x, data->width_y);
+	unsigned int min_width = min(data->width_x, data->width_y);
+	int error;
+
+	input = devm_input_allocate_device(dev);
+	if (!input)
+		return -ENOMEM;
+
+	input->name = "Elan Touchpad";
+	input->id.bustype = BUS_I2C;
+	input_set_drvdata(input, data);
+
+	error = input_mt_init_slots(input, ETP_MAX_FINGERS,
+				    INPUT_MT_POINTER | INPUT_MT_DROP_UNUSED);
+	if (error) {
+		dev_err(dev, "failed to initialize MT slots: %d\n", error);
+		return error;
+	}
+
+	__set_bit(EV_ABS, input->evbit);
+	__set_bit(INPUT_PROP_POINTER, input->propbit);
+	__set_bit(INPUT_PROP_BUTTONPAD, input->propbit);
+	__set_bit(BTN_LEFT, input->keybit);
+
+	/* Set up ST parameters */
+	input_set_abs_params(input, ABS_X, 0, data->max_x, 0, 0);
+	input_set_abs_params(input, ABS_Y, 0, data->max_y, 0, 0);
+	input_abs_set_res(input, ABS_X, data->x_res);
+	input_abs_set_res(input, ABS_Y, data->y_res);
+	input_set_abs_params(input, ABS_PRESSURE, 0, ETP_MAX_PRESSURE, 0, 0);
+	input_set_abs_params(input, ABS_TOOL_WIDTH, 0, ETP_FINGER_WIDTH, 0, 0);
+
+	/* And MT parameters */
+	input_set_abs_params(input, ABS_MT_POSITION_X, 0, data->max_x, 0, 0);
+	input_set_abs_params(input, ABS_MT_POSITION_Y, 0, data->max_y, 0, 0);
+	input_abs_set_res(input, ABS_MT_POSITION_X, data->x_res);
+	input_abs_set_res(input, ABS_MT_POSITION_Y, data->y_res);
+	input_set_abs_params(input, ABS_MT_PRESSURE, 0,
+			     ETP_MAX_PRESSURE, 0, 0);
+	input_set_abs_params(input, ABS_MT_TOUCH_MAJOR, 0,
+			     ETP_FINGER_WIDTH * max_width, 0, 0);
+	input_set_abs_params(input, ABS_MT_TOUCH_MINOR, 0,
+			     ETP_FINGER_WIDTH * min_width, 0, 0);
+
+	data->input = input;
+
+	return 0;
+}
+
+static void elan_disable_regulator(void *_data)
+{
+	struct elan_tp_data *data = _data;
+
+	regulator_disable(data->vcc);
+}
+
+static void elan_remove_sysfs_groups(void *_data)
+{
+	struct elan_tp_data *data = _data;
+
+	sysfs_remove_groups(&data->client->dev.kobj, elan_sysfs_groups);
+}
+
+static int elan_probe(struct i2c_client *client,
+		      const struct i2c_device_id *dev_id)
+{
+	const struct elan_transport_ops *transport_ops;
+	struct device *dev = &client->dev;
+	struct elan_tp_data *data;
+	unsigned long irqflags;
+	int error;
+
+	if (IS_ENABLED(CONFIG_MOUSE_ELAN_I2C_I2C) &&
+	    i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) {
+		transport_ops = &elan_i2c_ops;
+	} else if (IS_ENABLED(CONFIG_MOUSE_ELAN_I2C_SMBUS) &&
+		   i2c_check_functionality(client->adapter,
+					   I2C_FUNC_SMBUS_BYTE_DATA |
+						I2C_FUNC_SMBUS_BLOCK_DATA |
+						I2C_FUNC_SMBUS_I2C_BLOCK)) {
+		transport_ops = &elan_smbus_ops;
+	} else {
+		dev_err(dev, "not a supported I2C/SMBus adapter\n");
+		return -EIO;
+	}
+
+	data = devm_kzalloc(&client->dev, sizeof(struct elan_tp_data),
+			    GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	i2c_set_clientdata(client, data);
+
+	data->ops = transport_ops;
+	data->client = client;
+	init_completion(&data->fw_completion);
+	mutex_init(&data->sysfs_mutex);
+
+	data->vcc = devm_regulator_get(&client->dev, "vcc");
+	if (IS_ERR(data->vcc)) {
+		error = PTR_ERR(data->vcc);
+		if (error != -EPROBE_DEFER)
+			dev_err(&client->dev,
+				"Failed to get 'vcc' regulator: %d\n",
+				error);
+		return error;
+	}
+
+	error = regulator_enable(data->vcc);
+	if (error) {
+		dev_err(&client->dev,
+			"Failed to enable regulator: %d\n", error);
+		return error;
+	}
+
+	error = devm_add_action(&client->dev,
+				elan_disable_regulator, data);
+	if (error) {
+		regulator_disable(data->vcc);
+		dev_err(&client->dev,
+			"Failed to add disable regulator action: %d\n",
+			error);
+		return error;
+	}
+
+	/* Initialize the touchpad. */
+	error = elan_initialize(data);
+	if (error)
+		return error;
+
+	error = elan_query_device_info(data);
+	if (error)
+		return error;
+
+	error = elan_query_device_parameters(data);
+	if (error)
+		return error;
+
+	dev_dbg(&client->dev,
+		"Elan Touchpad Information:\n"
+		"    Module product ID:  0x%04x\n"
+		"    Firmware Version:  0x%04x\n"
+		"    Sample Version:  0x%04x\n"
+		"    IAP Version:  0x%04x\n"
+		"    Max ABS X,Y:   %d,%d\n"
+		"    Width X,Y:   %d,%d\n"
+		"    Resolution X,Y:   %d,%d (dots/mm)\n",
+		data->product_id,
+		data->fw_version,
+		data->sm_version,
+		data->iap_version,
+		data->max_x, data->max_y,
+		data->width_x, data->width_y,
+		data->x_res, data->y_res);
+
+	/* Set up input device properties based on queried parameters. */
+	error = elan_setup_input_device(data);
+	if (error)
+		return error;
+
+	/*
+	 * Systems using device tree should set up interrupt via DTS,
+	 * the rest will use the default falling edge interrupts.
+	 */
+	irqflags = client->dev.of_node ? 0 : IRQF_TRIGGER_FALLING;
+
+	error = devm_request_threaded_irq(&client->dev, client->irq,
+					  NULL, elan_isr,
+					  irqflags | IRQF_ONESHOT,
+					  client->name, data);
+	if (error) {
+		dev_err(&client->dev, "cannot register irq=%d\n", client->irq);
+		return error;
+	}
+
+	error = sysfs_create_groups(&client->dev.kobj, elan_sysfs_groups);
+	if (error) {
+		dev_err(&client->dev, "failed to create sysfs attributes: %d\n",
+			error);
+		return error;
+	}
+
+	error = devm_add_action(&client->dev,
+				elan_remove_sysfs_groups, data);
+	if (error) {
+		elan_remove_sysfs_groups(data);
+		dev_err(&client->dev,
+			"Failed to add sysfs cleanup action: %d\n",
+			error);
+		return error;
+	}
+
+	error = input_register_device(data->input);
+	if (error) {
+		dev_err(&client->dev, "failed to register input device: %d\n",
+			error);
+		return error;
+	}
+
+	/*
+	 * Systems using device tree should set up wakeup via DTS,
+	 * the rest will configure device as wakeup source by default.
+	 */
+	if (!client->dev.of_node)
+		device_init_wakeup(&client->dev, true);
+
+	return 0;
+}
+
+static int __maybe_unused elan_suspend(struct device *dev)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct elan_tp_data *data = i2c_get_clientdata(client);
+	int ret;
+
+	/*
+	 * We are taking the mutex to make sure sysfs operations are
+	 * complete before we attempt to bring the device into low[er]
+	 * power mode.
+	 */
+	ret = mutex_lock_interruptible(&data->sysfs_mutex);
+	if (ret)
+		return ret;
+
+	disable_irq(client->irq);
+
+	if (device_may_wakeup(dev)) {
+		ret = elan_sleep(data);
+		/* Enable wake from IRQ */
+		data->irq_wake = (enable_irq_wake(client->irq) == 0);
+	} else {
+		ret = elan_disable_power(data);
+	}
+
+	mutex_unlock(&data->sysfs_mutex);
+	return ret;
+}
+
+static int __maybe_unused elan_resume(struct device *dev)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct elan_tp_data *data = i2c_get_clientdata(client);
+	int error;
+
+	if (device_may_wakeup(dev) && data->irq_wake) {
+		disable_irq_wake(client->irq);
+		data->irq_wake = false;
+	}
+
+	error = elan_enable_power(data);
+	if (error)
+		dev_err(dev, "power up when resuming failed: %d\n", error);
+
+	error = elan_initialize(data);
+	if (error)
+		dev_err(dev, "initialize when resuming failed: %d\n", error);
+
+	enable_irq(data->client->irq);
+
+	return 0;
+}
+
+static SIMPLE_DEV_PM_OPS(elan_pm_ops, elan_suspend, elan_resume);
+
+static const struct i2c_device_id elan_id[] = {
+	{ DRIVER_NAME, 0 },
+	{ },
+};
+MODULE_DEVICE_TABLE(i2c, elan_id);
+
+#ifdef CONFIG_ACPI
+static const struct acpi_device_id elan_acpi_id[] = {
+	{ "ELAN0000", 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(acpi, elan_acpi_id);
+#endif
+
+#ifdef CONFIG_OF
+static const struct of_device_id elan_of_match[] = {
+	{ .compatible = "elan,ekth3000" },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, elan_of_match);
+#endif
+
+static struct i2c_driver elan_driver = {
+	.driver = {
+		.name	= DRIVER_NAME,
+		.owner	= THIS_MODULE,
+		.pm	= &elan_pm_ops,
+		.acpi_match_table = ACPI_PTR(elan_acpi_id),
+		.of_match_table = of_match_ptr(elan_of_match),
+	},
+	.probe		= elan_probe,
+	.id_table	= elan_id,
+};
+
+module_i2c_driver(elan_driver);
+
+MODULE_AUTHOR("Duson Lin <dusonlin@emc.com.tw>");
+MODULE_DESCRIPTION("Elan I2C/SMBus Touchpad driver");
+MODULE_LICENSE("GPL");
+MODULE_VERSION(ELAN_DRIVER_VERSION);
diff --git a/drivers/input/mouse/elan_i2c_i2c.c b/drivers/input/mouse/elan_i2c_i2c.c
new file mode 100644
index 00000000000..97d4937fc24
--- /dev/null
+++ b/drivers/input/mouse/elan_i2c_i2c.c
@@ -0,0 +1,611 @@
+/*
+ * Elan I2C/SMBus Touchpad driver - I2C interface
+ *
+ * Copyright (c) 2013 ELAN Microelectronics Corp.
+ *
+ * Author: 林政維 (Duson Lin) <dusonlin@emc.com.tw>
+ * Version: 1.5.5
+ *
+ * Based on cyapa driver:
+ * copyright (c) 2011-2012 Cypress Semiconductor, Inc.
+ * copyright (c) 2011-2012 Google, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ *
+ * Trademarks are the property of their respective owners.
+ */
+
+#include <linux/completion.h>
+#include <linux/delay.h>
+#include <linux/i2c.h>
+#include <linux/interrupt.h>
+#include <linux/jiffies.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <asm/unaligned.h>
+
+#include "elan_i2c.h"
+
+/* Elan i2c commands */
+#define ETP_I2C_RESET			0x0100
+#define ETP_I2C_WAKE_UP			0x0800
+#define ETP_I2C_SLEEP			0x0801
+#define ETP_I2C_DESC_CMD		0x0001
+#define ETP_I2C_REPORT_DESC_CMD		0x0002
+#define ETP_I2C_STAND_CMD		0x0005
+#define ETP_I2C_UNIQUEID_CMD		0x0101
+#define ETP_I2C_FW_VERSION_CMD		0x0102
+#define ETP_I2C_SM_VERSION_CMD		0x0103
+#define ETP_I2C_XY_TRACENUM_CMD		0x0105
+#define ETP_I2C_MAX_X_AXIS_CMD		0x0106
+#define ETP_I2C_MAX_Y_AXIS_CMD		0x0107
+#define ETP_I2C_RESOLUTION_CMD		0x0108
+#define ETP_I2C_IAP_VERSION_CMD		0x0110
+#define ETP_I2C_SET_CMD			0x0300
+#define ETP_I2C_POWER_CMD		0x0307
+#define ETP_I2C_FW_CHECKSUM_CMD		0x030F
+#define ETP_I2C_IAP_CTRL_CMD		0x0310
+#define ETP_I2C_IAP_CMD			0x0311
+#define ETP_I2C_IAP_RESET_CMD		0x0314
+#define ETP_I2C_IAP_CHECKSUM_CMD	0x0315
+#define ETP_I2C_CALIBRATE_CMD		0x0316
+#define ETP_I2C_MAX_BASELINE_CMD	0x0317
+#define ETP_I2C_MIN_BASELINE_CMD	0x0318
+
+#define ETP_I2C_REPORT_LEN		34
+#define ETP_I2C_DESC_LENGTH		30
+#define ETP_I2C_REPORT_DESC_LENGTH	158
+#define ETP_I2C_INF_LENGTH		2
+#define ETP_I2C_IAP_PASSWORD		0x1EA5
+#define ETP_I2C_IAP_RESET		0xF0F0
+#define ETP_I2C_MAIN_MODE_ON		(1 << 9)
+#define ETP_I2C_IAP_REG_L		0x01
+#define ETP_I2C_IAP_REG_H		0x06
+
+static int elan_i2c_read_block(struct i2c_client *client,
+			       u16 reg, u8 *val, u16 len)
+{
+	__le16 buf[] = {
+		cpu_to_le16(reg),
+	};
+	struct i2c_msg msgs[] = {
+		{
+			.addr = client->addr,
+			.flags = client->flags & I2C_M_TEN,
+			.len = sizeof(buf),
+			.buf = (u8 *)buf,
+		},
+		{
+			.addr = client->addr,
+			.flags = (client->flags & I2C_M_TEN) | I2C_M_RD,
+			.len = len,
+			.buf = val,
+		}
+	};
+	int ret;
+
+	ret = i2c_transfer(client->adapter, msgs, ARRAY_SIZE(msgs));
+	return ret == ARRAY_SIZE(msgs) ? 0 : (ret < 0 ? ret : -EIO);
+}
+
+static int elan_i2c_read_cmd(struct i2c_client *client, u16 reg, u8 *val)
+{
+	int retval;
+
+	retval = elan_i2c_read_block(client, reg, val, ETP_I2C_INF_LENGTH);
+	if (retval < 0) {
+		dev_err(&client->dev, "reading cmd (0x%04x) fail.\n", reg);
+		return retval;
+	}
+
+	return 0;
+}
+
+static int elan_i2c_write_cmd(struct i2c_client *client, u16 reg, u16 cmd)
+{
+	__le16 buf[] = {
+		cpu_to_le16(reg),
+		cpu_to_le16(cmd),
+	};
+	struct i2c_msg msg = {
+		.addr = client->addr,
+		.flags = client->flags & I2C_M_TEN,
+		.len = sizeof(buf),
+		.buf = (u8 *)buf,
+	};
+	int ret;
+
+	ret = i2c_transfer(client->adapter, &msg, 1);
+	return ret == 1 ? 0 : (ret < 0 ? ret : -EIO);
+}
+
+static int elan_i2c_initialize(struct i2c_client *client)
+{
+	struct device *dev = &client->dev;
+	int error;
+	u8 val[256];
+
+	error = elan_i2c_write_cmd(client, ETP_I2C_STAND_CMD, ETP_I2C_RESET);
+	if (error) {
+		dev_err(dev, "device reset failed: %d\n", error);
+		return error;
+	}
+
+	/* Wait for the device to reset */
+	msleep(100);
+
+	/* get reset acknowledgement 0000 */
+	error = i2c_master_recv(client, val, ETP_I2C_INF_LENGTH);
+	if (error < 0) {
+		dev_err(dev, "failed to read reset response: %d\n", error);
+		return error;
+	}
+
+	error = elan_i2c_read_block(client, ETP_I2C_DESC_CMD,
+				    val, ETP_I2C_DESC_LENGTH);
+	if (error) {
+		dev_err(dev, "cannot get device descriptor: %d\n", error);
+		return error;
+	}
+
+	error = elan_i2c_read_block(client, ETP_I2C_REPORT_DESC_CMD,
+				    val, ETP_I2C_REPORT_DESC_LENGTH);
+	if (error) {
+		dev_err(dev, "fetching report descriptor failed.: %d\n", error);
+		return error;
+	}
+
+	return 0;
+}
+
+static int elan_i2c_sleep_control(struct i2c_client *client, bool sleep)
+{
+	return elan_i2c_write_cmd(client, ETP_I2C_STAND_CMD,
+				  sleep ? ETP_I2C_SLEEP : ETP_I2C_WAKE_UP);
+}
+
+static int elan_i2c_power_control(struct i2c_client *client, bool enable)
+{
+	u8 val[2];
+	u16 reg;
+	int error;
+
+	error = elan_i2c_read_cmd(client, ETP_I2C_POWER_CMD, val);
+	if (error) {
+		dev_err(&client->dev,
+			"failed to read current power state: %d\n",
+			error);
+		return error;
+	}
+
+	reg = le16_to_cpup((__le16 *)val);
+	if (enable)
+		reg &= ~ETP_DISABLE_POWER;
+	else
+		reg |= ETP_DISABLE_POWER;
+
+	error = elan_i2c_write_cmd(client, ETP_I2C_POWER_CMD, reg);
+	if (error) {
+		dev_err(&client->dev,
+			"failed to write current power state: %d\n",
+			error);
+		return error;
+	}
+
+	return 0;
+}
+
+static int elan_i2c_set_mode(struct i2c_client *client, u8 mode)
+{
+	return elan_i2c_write_cmd(client, ETP_I2C_SET_CMD, mode);
+}
+
+
+static int elan_i2c_calibrate(struct i2c_client *client)
+{
+	return elan_i2c_write_cmd(client, ETP_I2C_CALIBRATE_CMD, 1);
+}
+
+static int elan_i2c_calibrate_result(struct i2c_client *client, u8 *val)
+{
+	return elan_i2c_read_block(client, ETP_I2C_CALIBRATE_CMD, val, 1);
+}
+
+static int elan_i2c_get_baseline_data(struct i2c_client *client,
+				      bool max_baseline, u8 *value)
+{
+	int error;
+	u8 val[3];
+
+	error = elan_i2c_read_cmd(client,
+				  max_baseline ? ETP_I2C_MAX_BASELINE_CMD :
+						 ETP_I2C_MIN_BASELINE_CMD,
+				  val);
+	if (error)
+		return error;
+
+	*value = le16_to_cpup((__le16 *)val);
+
+	return 0;
+}
+
+static int elan_i2c_get_version(struct i2c_client *client,
+				bool iap, u8 *version)
+{
+	int error;
+	u8 val[3];
+
+	error = elan_i2c_read_cmd(client,
+				  iap ? ETP_I2C_IAP_VERSION_CMD :
+					ETP_I2C_FW_VERSION_CMD,
+				  val);
+	if (error) {
+		dev_err(&client->dev, "failed to get %s version: %d\n",
+			iap ? "IAP" : "FW", error);
+		return error;
+	}
+
+	*version = val[0];
+	return 0;
+}
+
+static int elan_i2c_get_sm_version(struct i2c_client *client, u8 *version)
+{
+	int error;
+	u8 val[3];
+
+	error = elan_i2c_read_cmd(client, ETP_I2C_SM_VERSION_CMD, val);
+	if (error) {
+		dev_err(&client->dev, "failed to get SM version: %d\n", error);
+		return error;
+	}
+
+	*version = val[0];
+	return 0;
+}
+
+static int elan_i2c_get_product_id(struct i2c_client *client, u8 *id)
+{
+	int error;
+	u8 val[3];
+
+	error = elan_i2c_read_cmd(client, ETP_I2C_UNIQUEID_CMD, val);
+	if (error) {
+		dev_err(&client->dev, "failed to get product ID: %d\n", error);
+		return error;
+	}
+
+	*id = val[0];
+	return 0;
+}
+
+static int elan_i2c_get_checksum(struct i2c_client *client,
+				 bool iap, u16 *csum)
+{
+	int error;
+	u8 val[3];
+
+	error = elan_i2c_read_cmd(client,
+				  iap ? ETP_I2C_IAP_CHECKSUM_CMD :
+					ETP_I2C_FW_CHECKSUM_CMD,
+				  val);
+	if (error) {
+		dev_err(&client->dev, "failed to get %s checksum: %d\n",
+			iap ? "IAP" : "FW", error);
+		return error;
+	}
+
+	*csum = le16_to_cpup((__le16 *)val);
+	return 0;
+}
+
+static int elan_i2c_get_max(struct i2c_client *client,
+			    unsigned int *max_x, unsigned int *max_y)
+{
+	int error;
+	u8 val[3];
+
+	error = elan_i2c_read_cmd(client, ETP_I2C_MAX_X_AXIS_CMD, val);
+	if (error) {
+		dev_err(&client->dev, "failed to get X dimension: %d\n", error);
+		return error;
+	}
+
+	*max_x = le16_to_cpup((__le16 *)val) & 0x0fff;
+
+	error = elan_i2c_read_cmd(client, ETP_I2C_MAX_Y_AXIS_CMD, val);
+	if (error) {
+		dev_err(&client->dev, "failed to get Y dimension: %d\n", error);
+		return error;
+	}
+
+	*max_y = le16_to_cpup((__le16 *)val) & 0x0fff;
+
+	return 0;
+}
+
+static int elan_i2c_get_resolution(struct i2c_client *client,
+				   u8 *hw_res_x, u8 *hw_res_y)
+{
+	int error;
+	u8 val[3];
+
+	error = elan_i2c_read_cmd(client, ETP_I2C_RESOLUTION_CMD, val);
+	if (error) {
+		dev_err(&client->dev, "failed to get resolution: %d\n", error);
+		return error;
+	}
+
+	*hw_res_x = val[0];
+	*hw_res_y = val[1];
+
+	return 0;
+}
+
+static int elan_i2c_get_num_traces(struct i2c_client *client,
+				   unsigned int *x_traces,
+				   unsigned int *y_traces)
+{
+	int error;
+	u8 val[3];
+
+	error = elan_i2c_read_cmd(client, ETP_I2C_XY_TRACENUM_CMD, val);
+	if (error) {
+		dev_err(&client->dev, "failed to get trace info: %d\n", error);
+		return error;
+	}
+
+	*x_traces = val[0] - 1;
+	*y_traces = val[1] - 1;
+
+	return 0;
+}
+
+static int elan_i2c_iap_get_mode(struct i2c_client *client, enum tp_mode *mode)
+{
+	int error;
+	u16 constant;
+	u8 val[3];
+
+	error = elan_i2c_read_cmd(client, ETP_I2C_IAP_CTRL_CMD, val);
+	if (error) {
+		dev_err(&client->dev,
+			"failed to read iap control register: %d\n",
+			error);
+		return error;
+	}
+
+	constant = le16_to_cpup((__le16 *)val);
+	dev_dbg(&client->dev, "iap control reg: 0x%04x.\n", constant);
+
+	*mode = (constant & ETP_I2C_MAIN_MODE_ON) ? MAIN_MODE : IAP_MODE;
+
+	return 0;
+}
+
+static int elan_i2c_iap_reset(struct i2c_client *client)
+{
+	int error;
+
+	error = elan_i2c_write_cmd(client, ETP_I2C_IAP_RESET_CMD,
+				   ETP_I2C_IAP_RESET);
+	if (error) {
+		dev_err(&client->dev, "cannot reset IC: %d\n", error);
+		return error;
+	}
+
+	return 0;
+}
+
+static int elan_i2c_set_flash_key(struct i2c_client *client)
+{
+	int error;
+
+	error = elan_i2c_write_cmd(client, ETP_I2C_IAP_CMD,
+				   ETP_I2C_IAP_PASSWORD);
+	if (error) {
+		dev_err(&client->dev, "cannot set flash key: %d\n", error);
+		return error;
+	}
+
+	return 0;
+}
+
+static int elan_i2c_prepare_fw_update(struct i2c_client *client)
+{
+	struct device *dev = &client->dev;
+	int error;
+	enum tp_mode mode;
+	u8 val[3];
+	u16 password;
+
+	/* Get FW in which mode	(IAP_MODE/MAIN_MODE)  */
+	error = elan_i2c_iap_get_mode(client, &mode);
+	if (error)
+		return error;
+
+	if (mode == IAP_MODE) {
+		/* Reset IC */
+		error = elan_i2c_iap_reset(client);
+		if (error)
+			return error;
+
+		msleep(30);
+	}
+
+	/* Set flash key*/
+	error = elan_i2c_set_flash_key(client);
+	if (error)
+		return error;
+
+	/* Wait for F/W IAP initialization */
+	msleep(mode == MAIN_MODE ? 100 : 30);
+
+	/* Check if we are in IAP mode or not */
+	error = elan_i2c_iap_get_mode(client, &mode);
+	if (error)
+		return error;
+
+	if (mode == MAIN_MODE) {
+		dev_err(dev, "wrong mode: %d\n", mode);
+		return -EIO;
+	}
+
+	/* Set flash key again */
+	error = elan_i2c_set_flash_key(client);
+	if (error)
+		return error;
+
+	/* Wait for F/W IAP initialization */
+	msleep(30);
+
+	/* read back to check we actually enabled successfully. */
+	error = elan_i2c_read_cmd(client, ETP_I2C_IAP_CMD, val);
+	if (error) {
+		dev_err(dev, "cannot read iap password: %d\n",
+			error);
+		return error;
+	}
+
+	password = le16_to_cpup((__le16 *)val);
+	if (password != ETP_I2C_IAP_PASSWORD) {
+		dev_err(dev, "wrong iap password: 0x%X\n", password);
+		return -EIO;
+	}
+
+	return 0;
+}
+
+static int elan_i2c_write_fw_block(struct i2c_client *client,
+				   const u8 *page, u16 checksum, int idx)
+{
+	struct device *dev = &client->dev;
+	u8 page_store[ETP_FW_PAGE_SIZE + 4];
+	u8 val[3];
+	u16 result;
+	int ret, error;
+
+	page_store[0] = ETP_I2C_IAP_REG_L;
+	page_store[1] = ETP_I2C_IAP_REG_H;
+	memcpy(&page_store[2], page, ETP_FW_PAGE_SIZE);
+	/* recode checksum at last two bytes */
+	put_unaligned_le16(checksum, &page_store[ETP_FW_PAGE_SIZE + 2]);
+
+	ret = i2c_master_send(client, page_store, sizeof(page_store));
+	if (ret != sizeof(page_store)) {
+		error = ret < 0 ? ret : -EIO;
+		dev_err(dev, "Failed to write page %d: %d\n", idx, error);
+		return error;
+	}
+
+	/* Wait for F/W to update one page ROM data. */
+	msleep(20);
+
+	error = elan_i2c_read_cmd(client, ETP_I2C_IAP_CTRL_CMD, val);
+	if (error) {
+		dev_err(dev, "Failed to read IAP write result: %d\n", error);
+		return error;
+	}
+
+	result = le16_to_cpup((__le16 *)val);
+	if (result & (ETP_FW_IAP_PAGE_ERR | ETP_FW_IAP_INTF_ERR)) {
+		dev_err(dev, "IAP reports failed write: %04hx\n",
+			result);
+		return -EIO;
+	}
+
+	return 0;
+}
+
+static int elan_i2c_finish_fw_update(struct i2c_client *client,
+				     struct completion *completion)
+{
+	struct device *dev = &client->dev;
+	long ret;
+	int error;
+	int len;
+	u8 buffer[ETP_I2C_INF_LENGTH];
+
+	reinit_completion(completion);
+	enable_irq(client->irq);
+
+	error = elan_i2c_write_cmd(client, ETP_I2C_STAND_CMD, ETP_I2C_RESET);
+	if (!error)
+		ret = wait_for_completion_interruptible_timeout(completion,
+							msecs_to_jiffies(300));
+	disable_irq(client->irq);
+
+	if (error) {
+		dev_err(dev, "device reset failed: %d\n", error);
+		return error;
+	} else if (ret == 0) {
+		dev_err(dev, "timeout waiting for device reset\n");
+		return -ETIMEDOUT;
+	} else if (ret < 0) {
+		error = ret;
+		dev_err(dev, "error waiting for device reset: %d\n", error);
+		return error;
+	}
+
+	len = i2c_master_recv(client, buffer, ETP_I2C_INF_LENGTH);
+	if (len != ETP_I2C_INF_LENGTH) {
+		error = len < 0 ? len : -EIO;
+		dev_err(dev, "failed to read INT signal: %d (%d)\n",
+			error, len);
+		return error;
+	}
+
+	return 0;
+}
+
+static int elan_i2c_get_report(struct i2c_client *client, u8 *report)
+{
+	int len;
+
+	len = i2c_master_recv(client, report, ETP_I2C_REPORT_LEN);
+	if (len < 0) {
+		dev_err(&client->dev, "failed to read report data: %d\n", len);
+		return len;
+	}
+
+	if (len != ETP_I2C_REPORT_LEN) {
+		dev_err(&client->dev,
+			"wrong report length (%d vs %d expected)\n",
+			len, ETP_I2C_REPORT_LEN);
+		return -EIO;
+	}
+
+	return 0;
+}
+
+const struct elan_transport_ops elan_i2c_ops = {
+	.initialize		= elan_i2c_initialize,
+	.sleep_control		= elan_i2c_sleep_control,
+	.power_control		= elan_i2c_power_control,
+	.set_mode		= elan_i2c_set_mode,
+
+	.calibrate		= elan_i2c_calibrate,
+	.calibrate_result	= elan_i2c_calibrate_result,
+
+	.get_baseline_data	= elan_i2c_get_baseline_data,
+
+	.get_version		= elan_i2c_get_version,
+	.get_sm_version		= elan_i2c_get_sm_version,
+	.get_product_id		= elan_i2c_get_product_id,
+	.get_checksum		= elan_i2c_get_checksum,
+
+	.get_max		= elan_i2c_get_max,
+	.get_resolution		= elan_i2c_get_resolution,
+	.get_num_traces		= elan_i2c_get_num_traces,
+
+	.iap_get_mode		= elan_i2c_iap_get_mode,
+	.iap_reset		= elan_i2c_iap_reset,
+
+	.prepare_fw_update	= elan_i2c_prepare_fw_update,
+	.write_fw_block		= elan_i2c_write_fw_block,
+	.finish_fw_update	= elan_i2c_finish_fw_update,
+
+	.get_report		= elan_i2c_get_report,
+};
diff --git a/drivers/input/mouse/elan_i2c_smbus.c b/drivers/input/mouse/elan_i2c_smbus.c
new file mode 100644
index 00000000000..359bf8583d5
--- /dev/null
+++ b/drivers/input/mouse/elan_i2c_smbus.c
@@ -0,0 +1,514 @@
+/*
+ * Elan I2C/SMBus Touchpad driver - SMBus interface
+ *
+ * Copyright (c) 2013 ELAN Microelectronics Corp.
+ *
+ * Author: 林政維 (Duson Lin) <dusonlin@emc.com.tw>
+ * Version: 1.5.5
+ *
+ * Based on cyapa driver:
+ * copyright (c) 2011-2012 Cypress Semiconductor, Inc.
+ * copyright (c) 2011-2012 Google, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ *
+ * Trademarks are the property of their respective owners.
+ */
+
+#include <linux/delay.h>
+#include <linux/i2c.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+
+#include "elan_i2c.h"
+
+/* Elan SMbus commands */
+#define ETP_SMBUS_IAP_CMD		0x00
+#define ETP_SMBUS_ENABLE_TP		0x20
+#define ETP_SMBUS_SLEEP_CMD		0x21
+#define ETP_SMBUS_IAP_PASSWORD_WRITE	0x29
+#define ETP_SMBUS_IAP_PASSWORD_READ	0x80
+#define ETP_SMBUS_WRITE_FW_BLOCK	0x2A
+#define ETP_SMBUS_IAP_RESET_CMD		0x2B
+#define ETP_SMBUS_RANGE_CMD		0xA0
+#define ETP_SMBUS_FW_VERSION_CMD	0xA1
+#define ETP_SMBUS_XY_TRACENUM_CMD	0xA2
+#define ETP_SMBUS_SM_VERSION_CMD	0xA3
+#define ETP_SMBUS_UNIQUEID_CMD		0xA3
+#define ETP_SMBUS_RESOLUTION_CMD	0xA4
+#define ETP_SMBUS_HELLOPACKET_CMD	0xA7
+#define ETP_SMBUS_PACKET_QUERY		0xA8
+#define ETP_SMBUS_IAP_VERSION_CMD	0xAC
+#define ETP_SMBUS_IAP_CTRL_CMD		0xAD
+#define ETP_SMBUS_IAP_CHECKSUM_CMD	0xAE
+#define ETP_SMBUS_FW_CHECKSUM_CMD	0xAF
+#define ETP_SMBUS_MAX_BASELINE_CMD	0xC3
+#define ETP_SMBUS_MIN_BASELINE_CMD	0xC4
+#define ETP_SMBUS_CALIBRATE_QUERY	0xC5
+
+#define ETP_SMBUS_REPORT_LEN		32
+#define ETP_SMBUS_REPORT_OFFSET		2
+#define ETP_SMBUS_HELLOPACKET_LEN	5
+#define ETP_SMBUS_IAP_PASSWORD		0x1234
+#define ETP_SMBUS_IAP_MODE_ON		(1 << 6)
+
+static int elan_smbus_initialize(struct i2c_client *client)
+{
+	u8 check[ETP_SMBUS_HELLOPACKET_LEN] = { 0x55, 0x55, 0x55, 0x55, 0x55 };
+	u8 values[ETP_SMBUS_HELLOPACKET_LEN] = { 0, 0, 0, 0, 0 };
+	int len, error;
+
+	/* Get hello packet */
+	len = i2c_smbus_read_block_data(client,
+					ETP_SMBUS_HELLOPACKET_CMD, values);
+	if (len != ETP_SMBUS_HELLOPACKET_LEN) {
+		dev_err(&client->dev, "hello packet length fail: %d\n", len);
+		error = len < 0 ? len : -EIO;
+		return error;
+	}
+
+	/* compare hello packet */
+	if (memcmp(values, check, ETP_SMBUS_HELLOPACKET_LEN)) {
+		dev_err(&client->dev, "hello packet fail [%*px]\n",
+			ETP_SMBUS_HELLOPACKET_LEN, values);
+		return -ENXIO;
+	}
+
+	/* enable tp */
+	error = i2c_smbus_write_byte(client, ETP_SMBUS_ENABLE_TP);
+	if (error) {
+		dev_err(&client->dev, "failed to enable touchpad: %d\n", error);
+		return error;
+	}
+
+	return 0;
+}
+
+static int elan_smbus_set_mode(struct i2c_client *client, u8 mode)
+{
+	u8 cmd[4] = { 0x00, 0x07, 0x00, mode };
+
+	return i2c_smbus_write_block_data(client, ETP_SMBUS_IAP_CMD,
+					  sizeof(cmd), cmd);
+}
+
+static int elan_smbus_sleep_control(struct i2c_client *client, bool sleep)
+{
+	if (sleep)
+		return i2c_smbus_write_byte(client, ETP_SMBUS_SLEEP_CMD);
+	else
+		return 0; /* XXX should we send ETP_SMBUS_ENABLE_TP here? */
+}
+
+static int elan_smbus_power_control(struct i2c_client *client, bool enable)
+{
+	return 0; /* A no-op */
+}
+
+static int elan_smbus_calibrate(struct i2c_client *client)
+{
+	u8 cmd[4] = { 0x00, 0x08, 0x00, 0x01 };
+
+	return i2c_smbus_write_block_data(client, ETP_SMBUS_IAP_CMD,
+					  sizeof(cmd), cmd);
+}
+
+static int elan_smbus_calibrate_result(struct i2c_client *client, u8 *val)
+{
+	int error;
+
+	error = i2c_smbus_read_block_data(client,
+					  ETP_SMBUS_CALIBRATE_QUERY, val);
+	if (error < 0)
+		return error;
+
+	return 0;
+}
+
+static int elan_smbus_get_baseline_data(struct i2c_client *client,
+					bool max_baseline, u8 *value)
+{
+	int error;
+	u8 val[3];
+
+	error = i2c_smbus_read_block_data(client,
+					  max_baseline ?
+						ETP_SMBUS_MAX_BASELINE_CMD :
+						ETP_SMBUS_MIN_BASELINE_CMD,
+					  val);
+	if (error < 0)
+		return error;
+
+	*value = be16_to_cpup((__be16 *)val);
+
+	return 0;
+}
+
+static int elan_smbus_get_version(struct i2c_client *client,
+				  bool iap, u8 *version)
+{
+	int error;
+	u8 val[3];
+
+	error = i2c_smbus_read_block_data(client,
+					  iap ? ETP_SMBUS_IAP_VERSION_CMD :
+						ETP_SMBUS_FW_VERSION_CMD,
+					  val);
+	if (error < 0) {
+		dev_err(&client->dev, "failed to get %s version: %d\n",
+			iap ? "IAP" : "FW", error);
+		return error;
+	}
+
+	*version = val[2];
+	return 0;
+}
+
+static int elan_smbus_get_sm_version(struct i2c_client *client, u8 *version)
+{
+	int error;
+	u8 val[3];
+
+	error = i2c_smbus_read_block_data(client,
+					  ETP_SMBUS_SM_VERSION_CMD, val);
+	if (error < 0) {
+		dev_err(&client->dev, "failed to get SM version: %d\n", error);
+		return error;
+	}
+
+	*version = val[0]; /* XXX Why 0 and not 2 as in IAP/FW versions? */
+	return 0;
+}
+
+static int elan_smbus_get_product_id(struct i2c_client *client, u8 *id)
+{
+	int error;
+	u8 val[3];
+
+	error = i2c_smbus_read_block_data(client,
+					  ETP_SMBUS_UNIQUEID_CMD, val);
+	if (error < 0) {
+		dev_err(&client->dev, "failed to get product ID: %d\n", error);
+		return error;
+	}
+
+	*id = val[1];
+	return 0;
+}
+
+static int elan_smbus_get_checksum(struct i2c_client *client,
+				   bool iap, u16 *csum)
+{
+	int error;
+	u8 val[3];
+
+	error = i2c_smbus_read_block_data(client,
+					  iap ? ETP_SMBUS_FW_CHECKSUM_CMD :
+						ETP_SMBUS_IAP_CHECKSUM_CMD,
+					  val);
+	if (error < 0) {
+		dev_err(&client->dev, "failed to get %s checksum: %d\n",
+			iap ? "IAP" : "FW", error);
+		return error;
+	}
+
+	*csum = be16_to_cpup((__be16 *)val);
+	return 0;
+}
+
+static int elan_smbus_get_max(struct i2c_client *client,
+			      unsigned int *max_x, unsigned int *max_y)
+{
+	int error;
+	u8 val[3];
+
+	error = i2c_smbus_read_block_data(client, ETP_SMBUS_RANGE_CMD, val);
+	if (error) {
+		dev_err(&client->dev, "failed to get dimensions: %d\n", error);
+		return error;
+	}
+
+	*max_x = (0x0f & val[0]) << 8 | val[1];
+	*max_y = (0xf0 & val[0]) << 4 | val[2];
+
+	return 0;
+}
+
+static int elan_smbus_get_resolution(struct i2c_client *client,
+				     u8 *hw_res_x, u8 *hw_res_y)
+{
+	int error;
+	u8 val[3];
+
+	error = i2c_smbus_read_block_data(client,
+					  ETP_SMBUS_RESOLUTION_CMD, val);
+	if (error) {
+		dev_err(&client->dev, "failed to get resolution: %d\n", error);
+		return error;
+	}
+
+	*hw_res_x = val[1] & 0x0F;
+	*hw_res_y = (val[1] & 0xF0) >> 4;
+
+	return 0;
+}
+
+static int elan_smbus_get_num_traces(struct i2c_client *client,
+				     unsigned int *x_traces,
+				     unsigned int *y_traces)
+{
+	int error;
+	u8 val[3];
+
+	error = i2c_smbus_read_block_data(client,
+					  ETP_SMBUS_XY_TRACENUM_CMD, val);
+	if (error) {
+		dev_err(&client->dev, "failed to get trace info: %d\n", error);
+		return error;
+	}
+
+	*x_traces = val[1] - 1;
+	*y_traces = val[2] - 1;
+
+	return 0;
+}
+
+static int elan_smbus_iap_get_mode(struct i2c_client *client,
+				   enum tp_mode *mode)
+{
+	int error;
+	u16 constant;
+	u8 val[3];
+
+	error = i2c_smbus_read_block_data(client, ETP_SMBUS_IAP_CTRL_CMD, val);
+	if (error < 0) {
+		dev_err(&client->dev, "failed to read iap ctrol register: %d\n",
+			error);
+		return error;
+	}
+
+	constant = be16_to_cpup((__be16 *)val);
+	dev_dbg(&client->dev, "iap control reg: 0x%04x.\n", constant);
+
+	*mode = (constant & ETP_SMBUS_IAP_MODE_ON) ? IAP_MODE : MAIN_MODE;
+
+	return 0;
+}
+
+static int elan_smbus_iap_reset(struct i2c_client *client)
+{
+	int error;
+
+	error = i2c_smbus_write_byte(client, ETP_SMBUS_IAP_RESET_CMD);
+	if (error) {
+		dev_err(&client->dev, "cannot reset IC: %d\n", error);
+		return error;
+	}
+
+	return 0;
+}
+
+static int elan_smbus_set_flash_key(struct i2c_client *client)
+{
+	int error;
+	u8 cmd[4] = { 0x00, 0x0B, 0x00, 0x5A };
+
+	error = i2c_smbus_write_block_data(client, ETP_SMBUS_IAP_CMD,
+					   sizeof(cmd), cmd);
+	if (error) {
+		dev_err(&client->dev, "cannot set flash key: %d\n", error);
+		return error;
+	}
+
+	return 0;
+}
+
+static int elan_smbus_prepare_fw_update(struct i2c_client *client)
+{
+	struct device *dev = &client->dev;
+	int len;
+	int error;
+	enum tp_mode mode;
+	u8 val[3];
+	u8 cmd[4] = {0x0F, 0x78, 0x00, 0x06};
+	u16 password;
+
+	/* Get FW in which mode	(IAP_MODE/MAIN_MODE)  */
+	error = elan_smbus_iap_get_mode(client, &mode);
+	if (error)
+		return error;
+
+	if (mode == MAIN_MODE) {
+
+		/* set flash key */
+		error = elan_smbus_set_flash_key(client);
+		if (error)
+			return error;
+
+		/* write iap password */
+		if (i2c_smbus_write_byte(client,
+					 ETP_SMBUS_IAP_PASSWORD_WRITE) < 0) {
+			dev_err(dev, "cannot write iap password\n");
+			return -EIO;
+		}
+
+		error = i2c_smbus_write_block_data(client, ETP_SMBUS_IAP_CMD,
+						   sizeof(cmd), cmd);
+		if (error) {
+			dev_err(dev, "failed to write iap password: %d\n",
+				error);
+			return error;
+		}
+
+		/*
+		 * Read back password to make sure we enabled flash
+		 * successfully.
+		 */
+		len = i2c_smbus_read_block_data(client,
+						ETP_SMBUS_IAP_PASSWORD_READ,
+						val);
+		if (len < sizeof(u16)) {
+			error = len < 0 ? len : -EIO;
+			dev_err(dev, "failed to read iap password: %d\n",
+				error);
+			return error;
+		}
+
+		password = be16_to_cpup((__be16 *)val);
+		if (password != ETP_SMBUS_IAP_PASSWORD) {
+			dev_err(dev, "wrong iap password = 0x%X\n", password);
+			return -EIO;
+		}
+
+		/* Wait 30ms for MAIN_MODE change to IAP_MODE */
+		msleep(30);
+	}
+
+	error = elan_smbus_set_flash_key(client);
+	if (error)
+		return error;
+
+	/* Reset IC */
+	error = elan_smbus_iap_reset(client);
+	if (error)
+		return error;
+
+	return 0;
+}
+
+
+static int elan_smbus_write_fw_block(struct i2c_client *client,
+				     const u8 *page, u16 checksum, int idx)
+{
+	struct device *dev = &client->dev;
+	int error;
+	u16 result;
+	u8 val[3];
+
+	/*
+	 * Due to the limitation of smbus protocol limiting
+	 * transfer to 32 bytes at a time, we must split block
+	 * in 2 transfers.
+	 */
+	error = i2c_smbus_write_block_data(client,
+					   ETP_SMBUS_WRITE_FW_BLOCK,
+					   ETP_FW_PAGE_SIZE / 2,
+					   page);
+	if (error) {
+		dev_err(dev, "Failed to write page %d (part %d): %d\n",
+			idx, 1, error);
+		return error;
+	}
+
+	error = i2c_smbus_write_block_data(client,
+					   ETP_SMBUS_WRITE_FW_BLOCK,
+					   ETP_FW_PAGE_SIZE / 2,
+					   page + ETP_FW_PAGE_SIZE / 2);
+	if (error) {
+		dev_err(dev, "Failed to write page %d (part %d): %d\n",
+			idx, 2, error);
+		return error;
+	}
+
+
+	/* Wait for F/W to update one page ROM data. */
+	usleep_range(8000, 10000);
+
+	error = i2c_smbus_read_block_data(client,
+					  ETP_SMBUS_IAP_CTRL_CMD, val);
+	if (error < 0) {
+		dev_err(dev, "Failed to read IAP write result: %d\n",
+			error);
+		return error;
+	}
+
+	result = be16_to_cpup((__be16 *)val);
+	if (result & (ETP_FW_IAP_PAGE_ERR | ETP_FW_IAP_INTF_ERR)) {
+		dev_err(dev, "IAP reports failed write: %04hx\n",
+			result);
+		return -EIO;
+	}
+
+	return 0;
+}
+
+static int elan_smbus_get_report(struct i2c_client *client, u8 *report)
+{
+	int len;
+
+	len = i2c_smbus_read_block_data(client,
+					ETP_SMBUS_PACKET_QUERY,
+					&report[ETP_SMBUS_REPORT_OFFSET]);
+	if (len < 0) {
+		dev_err(&client->dev, "failed to read report data: %d\n", len);
+		return len;
+	}
+
+	if (len != ETP_SMBUS_REPORT_LEN) {
+		dev_err(&client->dev,
+			"wrong report length (%d vs %d expected)\n",
+			len, ETP_SMBUS_REPORT_LEN);
+		return -EIO;
+	}
+
+	return 0;
+}
+
+static int elan_smbus_finish_fw_update(struct i2c_client *client,
+				       struct completion *fw_completion)
+{
+	/* No special handling unlike I2C transport */
+	return 0;
+}
+
+const struct elan_transport_ops elan_smbus_ops = {
+	.initialize		= elan_smbus_initialize,
+	.sleep_control		= elan_smbus_sleep_control,
+	.power_control		= elan_smbus_power_control,
+	.set_mode		= elan_smbus_set_mode,
+
+	.calibrate		= elan_smbus_calibrate,
+	.calibrate_result	= elan_smbus_calibrate_result,
+
+	.get_baseline_data	= elan_smbus_get_baseline_data,
+
+	.get_version		= elan_smbus_get_version,
+	.get_sm_version		= elan_smbus_get_sm_version,
+	.get_product_id		= elan_smbus_get_product_id,
+	.get_checksum		= elan_smbus_get_checksum,
+
+	.get_max		= elan_smbus_get_max,
+	.get_resolution		= elan_smbus_get_resolution,
+	.get_num_traces		= elan_smbus_get_num_traces,
+
+	.iap_get_mode		= elan_smbus_iap_get_mode,
+	.iap_reset		= elan_smbus_iap_reset,
+
+	.prepare_fw_update	= elan_smbus_prepare_fw_update,
+	.write_fw_block		= elan_smbus_write_fw_block,
+	.finish_fw_update	= elan_smbus_finish_fw_update,
+
+	.get_report		= elan_smbus_get_report,
+};
diff --git a/drivers/input/mouse/lifebook.h b/drivers/input/mouse/lifebook.h
index 4c4326c6f50..0baf02a70a9 100644
--- a/drivers/input/mouse/lifebook.h
+++ b/drivers/input/mouse/lifebook.h
@@ -16,14 +16,14 @@ void lifebook_module_init(void);
 int lifebook_detect(struct psmouse *psmouse, bool set_properties);
 int lifebook_init(struct psmouse *psmouse);
 #else
-inline void lifebook_module_init(void)
+static inline void lifebook_module_init(void)
 {
 }
-inline int lifebook_detect(struct psmouse *psmouse, bool set_properties)
+static inline int lifebook_detect(struct psmouse *psmouse, bool set_properties)
 {
 	return -ENOSYS;
 }
-inline int lifebook_init(struct psmouse *psmouse)
+static inline int lifebook_init(struct psmouse *psmouse)
 {
 	return -ENOSYS;
 }
diff --git a/drivers/input/mouse/navpoint.c b/drivers/input/mouse/navpoint.c
index 2a0360f5b5f..d6e8f58a1de 100644
--- a/drivers/input/mouse/navpoint.c
+++ b/drivers/input/mouse/navpoint.c
@@ -318,8 +318,7 @@ static int navpoint_remove(struct platform_device *pdev)
 	return 0;
 }
 
-#ifdef CONFIG_PM_SLEEP
-static int navpoint_suspend(struct device *dev)
+static int __maybe_unused navpoint_suspend(struct device *dev)
 {
 	struct platform_device *pdev = to_platform_device(dev);
 	struct navpoint *navpoint = platform_get_drvdata(pdev);
@@ -333,7 +332,7 @@ static int navpoint_suspend(struct device *dev)
 	return 0;
 }
 
-static int navpoint_resume(struct device *dev)
+static int __maybe_unused navpoint_resume(struct device *dev)
 {
 	struct platform_device *pdev = to_platform_device(dev);
 	struct navpoint *navpoint = platform_get_drvdata(pdev);
@@ -346,7 +345,6 @@ static int navpoint_resume(struct device *dev)
 
 	return 0;
 }
-#endif
 
 static SIMPLE_DEV_PM_OPS(navpoint_pm_ops, navpoint_suspend, navpoint_resume);
 
diff --git a/drivers/input/mouse/synaptics_i2c.c b/drivers/input/mouse/synaptics_i2c.c
index ad822608f6e..878f18498f3 100644
--- a/drivers/input/mouse/synaptics_i2c.c
+++ b/drivers/input/mouse/synaptics_i2c.c
@@ -614,8 +614,7 @@ static int synaptics_i2c_remove(struct i2c_client *client)
 	return 0;
 }
 
-#ifdef CONFIG_PM_SLEEP
-static int synaptics_i2c_suspend(struct device *dev)
+static int __maybe_unused synaptics_i2c_suspend(struct device *dev)
 {
 	struct i2c_client *client = to_i2c_client(dev);
 	struct synaptics_i2c *touch = i2c_get_clientdata(client);
@@ -628,7 +627,7 @@ static int synaptics_i2c_suspend(struct device *dev)
 	return 0;
 }
 
-static int synaptics_i2c_resume(struct device *dev)
+static int __maybe_unused synaptics_i2c_resume(struct device *dev)
 {
 	int ret;
 	struct i2c_client *client = to_i2c_client(dev);
@@ -643,7 +642,6 @@ static int synaptics_i2c_resume(struct device *dev)
 
 	return 0;
 }
-#endif
 
 static SIMPLE_DEV_PM_OPS(synaptics_i2c_pm, synaptics_i2c_suspend,
 			 synaptics_i2c_resume);
diff --git a/drivers/input/serio/altera_ps2.c b/drivers/input/serio/altera_ps2.c
index 8921c96589b..131d7826dc6 100644
--- a/drivers/input/serio/altera_ps2.c
+++ b/drivers/input/serio/altera_ps2.c
@@ -24,9 +24,7 @@
 
 struct ps2if {
 	struct serio *io;
-	struct resource *iomem_res;
 	void __iomem *base;
-	unsigned irq;
 };
 
 /*
@@ -83,16 +81,34 @@ static void altera_ps2_close(struct serio *io)
 static int altera_ps2_probe(struct platform_device *pdev)
 {
 	struct ps2if *ps2if;
+	struct resource *res;
 	struct serio *serio;
 	int error, irq;
 
-	ps2if = kzalloc(sizeof(struct ps2if), GFP_KERNEL);
-	serio = kzalloc(sizeof(struct serio), GFP_KERNEL);
-	if (!ps2if || !serio) {
-		error = -ENOMEM;
-		goto err_free_mem;
+	ps2if = devm_kzalloc(&pdev->dev, sizeof(struct ps2if), GFP_KERNEL);
+	if (!ps2if)
+		return -ENOMEM;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	ps2if->base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(ps2if->base))
+		return PTR_ERR(ps2if->base);
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0)
+		return -ENXIO;
+
+	error = devm_request_irq(&pdev->dev, irq, altera_ps2_rxint, 0,
+				 pdev->name, ps2if);
+	if (error) {
+		dev_err(&pdev->dev, "could not request IRQ %d\n", irq);
+		return error;
 	}
 
+	serio = kzalloc(sizeof(struct serio), GFP_KERNEL);
+	if (!serio)
+		return -ENOMEM;
+
 	serio->id.type		= SERIO_8042;
 	serio->write		= altera_ps2_write;
 	serio->open		= altera_ps2_open;
@@ -103,56 +119,12 @@ static int altera_ps2_probe(struct platform_device *pdev)
 	serio->dev.parent	= &pdev->dev;
 	ps2if->io		= serio;
 
-	ps2if->iomem_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (ps2if->iomem_res == NULL) {
-		error = -ENOENT;
-		goto err_free_mem;
-	}
-
-
-	irq = platform_get_irq(pdev, 0);
-	if (irq < 0) {
-		error = -ENXIO;
-		goto err_free_mem;
-	}
-	ps2if->irq = irq;
-
-	if (!request_mem_region(ps2if->iomem_res->start,
-				resource_size(ps2if->iomem_res), pdev->name)) {
-		error = -EBUSY;
-		goto err_free_mem;
-	}
-
-	ps2if->base = ioremap(ps2if->iomem_res->start,
-			      resource_size(ps2if->iomem_res));
-	if (!ps2if->base) {
-		error = -ENOMEM;
-		goto err_free_res;
-	}
-
-	error = request_irq(ps2if->irq, altera_ps2_rxint, 0, pdev->name, ps2if);
-	if (error) {
-		dev_err(&pdev->dev, "could not allocate IRQ %d: %d\n",
-			ps2if->irq, error);
-		goto err_unmap;
-	}
-
-	dev_info(&pdev->dev, "base %p, irq %d\n", ps2if->base, ps2if->irq);
+	dev_info(&pdev->dev, "base %p, irq %d\n", ps2if->base, irq);
 
 	serio_register_port(ps2if->io);
 	platform_set_drvdata(pdev, ps2if);
 
 	return 0;
-
- err_unmap:
-	iounmap(ps2if->base);
- err_free_res:
-	release_mem_region(ps2if->iomem_res->start,
-			   resource_size(ps2if->iomem_res));
- err_free_mem:
-	kfree(ps2if);
-	kfree(serio);
-	return error;
 }
 
 /*
@@ -163,11 +135,6 @@ static int altera_ps2_remove(struct platform_device *pdev)
 	struct ps2if *ps2if = platform_get_drvdata(pdev);
 
 	serio_unregister_port(ps2if->io);
-	free_irq(ps2if->irq, ps2if);
-	iounmap(ps2if->base);
-	release_mem_region(ps2if->iomem_res->start,
-			   resource_size(ps2if->iomem_res));
-	kfree(ps2if);
 
 	return 0;
 }
diff --git a/drivers/input/serio/i8042-x86ia64io.h b/drivers/input/serio/i8042-x86ia64io.h
index faeeb137246..c66d1b53843 100644
--- a/drivers/input/serio/i8042-x86ia64io.h
+++ b/drivers/input/serio/i8042-x86ia64io.h
@@ -579,6 +579,16 @@ static const struct dmi_system_id __initconst i8042_dmi_nopnp_table[] = {
 		},
 	},
 	{
+		/*
+		 * Intel NUC D54250WYK - does not have i8042 controller but
+		 * declares PS/2 devices in DSDT.
+		 */
+		.matches = {
+			DMI_MATCH(DMI_BOARD_NAME, "D54250WYK"),
+			DMI_MATCH(DMI_BOARD_VENDOR, "Intel Corporation"),
+		},
+	},
+	{
 		/* MSI Wind U-100 */
 		.matches = {
 			DMI_MATCH(DMI_BOARD_NAME, "U-100"),
diff --git a/drivers/input/serio/serio.c b/drivers/input/serio/serio.c
index d399b8b0f00..a05a5179da3 100644
--- a/drivers/input/serio/serio.c
+++ b/drivers/input/serio/serio.c
@@ -514,7 +514,7 @@ static void serio_release_port(struct device *dev)
  */
 static void serio_init_port(struct serio *serio)
 {
-	static atomic_t serio_no = ATOMIC_INIT(0);
+	static atomic_t serio_no = ATOMIC_INIT(-1);
 
 	__module_get(THIS_MODULE);
 
@@ -525,7 +525,7 @@ static void serio_init_port(struct serio *serio)
 	mutex_init(&serio->drv_mutex);
 	device_initialize(&serio->dev);
 	dev_set_name(&serio->dev, "serio%lu",
-		     (unsigned long)atomic_inc_return(&serio_no) - 1);
+		     (unsigned long)atomic_inc_return(&serio_no));
 	serio->dev.bus = &serio_bus;
 	serio->dev.release = serio_release_port;
 	serio->dev.groups = serio_device_attr_groups;
diff --git a/drivers/input/serio/serio_raw.c b/drivers/input/serio/serio_raw.c
index c9a02fe5757..71ef5d65a0c 100644
--- a/drivers/input/serio/serio_raw.c
+++ b/drivers/input/serio/serio_raw.c
@@ -292,7 +292,7 @@ static irqreturn_t serio_raw_interrupt(struct serio *serio, unsigned char data,
 
 static int serio_raw_connect(struct serio *serio, struct serio_driver *drv)
 {
-	static atomic_t serio_raw_no = ATOMIC_INIT(0);
+	static atomic_t serio_raw_no = ATOMIC_INIT(-1);
 	struct serio_raw *serio_raw;
 	int err;
 
@@ -303,7 +303,7 @@ static int serio_raw_connect(struct serio *serio, struct serio_driver *drv)
 	}
 
 	snprintf(serio_raw->name, sizeof(serio_raw->name),
-		 "serio_raw%ld", (long)atomic_inc_return(&serio_raw_no) - 1);
+		 "serio_raw%ld", (long)atomic_inc_return(&serio_raw_no));
 	kref_init(&serio_raw->kref);
 	INIT_LIST_HEAD(&serio_raw->client_list);
 	init_waitqueue_head(&serio_raw->wait);
diff --git a/drivers/input/touchscreen/Kconfig b/drivers/input/touchscreen/Kconfig
index e1d8003d01f..58917525126 100644
--- a/drivers/input/touchscreen/Kconfig
+++ b/drivers/input/touchscreen/Kconfig
@@ -295,6 +295,19 @@ config TOUCHSCREEN_FUJITSU
 	  To compile this driver as a module, choose M here: the
 	  module will be called fujitsu-ts.
 
+config TOUCHSCREEN_GOODIX
+	tristate "Goodix I2C touchscreen"
+	depends on I2C && ACPI
+	help
+	  Say Y here if you have the Goodix touchscreen (such as one
+	  installed in Onda v975w tablets) connected to your
+	  system.
+
+	  If unsure, say N.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called goodix.
+
 config TOUCHSCREEN_ILI210X
 	tristate "Ilitek ILI210X based touchscreen"
 	depends on I2C
@@ -334,6 +347,18 @@ config TOUCHSCREEN_GUNZE
 	  To compile this driver as a module, choose M here: the
 	  module will be called gunze.
 
+config TOUCHSCREEN_ELAN
+	tristate "Elan eKTH I2C touchscreen"
+	depends on I2C
+	help
+	  Say Y here if you have an Elan eKTH I2C touchscreen
+	  connected to your system.
+
+	  If unsure, say N.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called elants_i2c.
+
 config TOUCHSCREEN_ELO
 	tristate "Elo serial touchscreens"
 	select SERIO
diff --git a/drivers/input/touchscreen/Makefile b/drivers/input/touchscreen/Makefile
index 090e61cc917..0242fea2102 100644
--- a/drivers/input/touchscreen/Makefile
+++ b/drivers/input/touchscreen/Makefile
@@ -31,9 +31,11 @@ obj-$(CONFIG_TOUCHSCREEN_EDT_FT5X06)	+= edt-ft5x06.o
 obj-$(CONFIG_TOUCHSCREEN_HAMPSHIRE)	+= hampshire.o
 obj-$(CONFIG_TOUCHSCREEN_GUNZE)		+= gunze.o
 obj-$(CONFIG_TOUCHSCREEN_EETI)		+= eeti_ts.o
+obj-$(CONFIG_TOUCHSCREEN_ELAN)		+= elants_i2c.o
 obj-$(CONFIG_TOUCHSCREEN_ELO)		+= elo.o
 obj-$(CONFIG_TOUCHSCREEN_EGALAX)	+= egalax_ts.o
 obj-$(CONFIG_TOUCHSCREEN_FUJITSU)	+= fujitsu_ts.o
+obj-$(CONFIG_TOUCHSCREEN_GOODIX)	+= goodix.o
 obj-$(CONFIG_TOUCHSCREEN_ILI210X)	+= ili210x.o
 obj-$(CONFIG_TOUCHSCREEN_INEXIO)	+= inexio.o
 obj-$(CONFIG_TOUCHSCREEN_INTEL_MID)	+= intel-mid-touch.o
diff --git a/drivers/input/touchscreen/ad7877.c b/drivers/input/touchscreen/ad7877.c
index 523865daa1d..da4e5bb5e04 100644
--- a/drivers/input/touchscreen/ad7877.c
+++ b/drivers/input/touchscreen/ad7877.c
@@ -820,8 +820,7 @@ static int ad7877_remove(struct spi_device *spi)
 	return 0;
 }
 
-#ifdef CONFIG_PM_SLEEP
-static int ad7877_suspend(struct device *dev)
+static int __maybe_unused ad7877_suspend(struct device *dev)
 {
 	struct ad7877 *ts = dev_get_drvdata(dev);
 
@@ -830,7 +829,7 @@ static int ad7877_suspend(struct device *dev)
 	return 0;
 }
 
-static int ad7877_resume(struct device *dev)
+static int __maybe_unused ad7877_resume(struct device *dev)
 {
 	struct ad7877 *ts = dev_get_drvdata(dev);
 
@@ -838,7 +837,6 @@ static int ad7877_resume(struct device *dev)
 
 	return 0;
 }
-#endif
 
 static SIMPLE_DEV_PM_OPS(ad7877_pm, ad7877_suspend, ad7877_resume);
 
diff --git a/drivers/input/touchscreen/ad7879.c b/drivers/input/touchscreen/ad7879.c
index 1eb9d3c2088..fec66ad8051 100644
--- a/drivers/input/touchscreen/ad7879.c
+++ b/drivers/input/touchscreen/ad7879.c
@@ -284,8 +284,7 @@ static void ad7879_close(struct input_dev* input)
 		__ad7879_disable(ts);
 }
 
-#ifdef CONFIG_PM_SLEEP
-static int ad7879_suspend(struct device *dev)
+static int __maybe_unused ad7879_suspend(struct device *dev)
 {
 	struct ad7879 *ts = dev_get_drvdata(dev);
 
@@ -301,7 +300,7 @@ static int ad7879_suspend(struct device *dev)
 	return 0;
 }
 
-static int ad7879_resume(struct device *dev)
+static int __maybe_unused ad7879_resume(struct device *dev)
 {
 	struct ad7879 *ts = dev_get_drvdata(dev);
 
@@ -316,7 +315,6 @@ static int ad7879_resume(struct device *dev)
 
 	return 0;
 }
-#endif
 
 SIMPLE_DEV_PM_OPS(ad7879_pm_ops, ad7879_suspend, ad7879_resume);
 EXPORT_SYMBOL(ad7879_pm_ops);
diff --git a/drivers/input/touchscreen/ads7846.c b/drivers/input/touchscreen/ads7846.c
index e57ba52bf48..e4eb8a6c658 100644
--- a/drivers/input/touchscreen/ads7846.c
+++ b/drivers/input/touchscreen/ads7846.c
@@ -883,8 +883,7 @@ static irqreturn_t ads7846_irq(int irq, void *handle)
 	return IRQ_HANDLED;
 }
 
-#ifdef CONFIG_PM_SLEEP
-static int ads7846_suspend(struct device *dev)
+static int __maybe_unused ads7846_suspend(struct device *dev)
 {
 	struct ads7846 *ts = dev_get_drvdata(dev);
 
@@ -906,7 +905,7 @@ static int ads7846_suspend(struct device *dev)
 	return 0;
 }
 
-static int ads7846_resume(struct device *dev)
+static int __maybe_unused ads7846_resume(struct device *dev)
 {
 	struct ads7846 *ts = dev_get_drvdata(dev);
 
@@ -927,7 +926,6 @@ static int ads7846_resume(struct device *dev)
 
 	return 0;
 }
-#endif
 
 static SIMPLE_DEV_PM_OPS(ads7846_pm, ads7846_suspend, ads7846_resume);
 
diff --git a/drivers/input/touchscreen/atmel_mxt_ts.c b/drivers/input/touchscreen/atmel_mxt_ts.c
index aaacf8bfa61..bb070206223 100644
--- a/drivers/input/touchscreen/atmel_mxt_ts.c
+++ b/drivers/input/touchscreen/atmel_mxt_ts.c
@@ -2244,8 +2244,7 @@ static int mxt_remove(struct i2c_client *client)
 	return 0;
 }
 
-#ifdef CONFIG_PM_SLEEP
-static int mxt_suspend(struct device *dev)
+static int __maybe_unused mxt_suspend(struct device *dev)
 {
 	struct i2c_client *client = to_i2c_client(dev);
 	struct mxt_data *data = i2c_get_clientdata(client);
@@ -2261,7 +2260,7 @@ static int mxt_suspend(struct device *dev)
 	return 0;
 }
 
-static int mxt_resume(struct device *dev)
+static int __maybe_unused mxt_resume(struct device *dev)
 {
 	struct i2c_client *client = to_i2c_client(dev);
 	struct mxt_data *data = i2c_get_clientdata(client);
@@ -2276,7 +2275,6 @@ static int mxt_resume(struct device *dev)
 
 	return 0;
 }
-#endif
 
 static SIMPLE_DEV_PM_OPS(mxt_pm_ops, mxt_suspend, mxt_resume);
 
diff --git a/drivers/input/touchscreen/auo-pixcir-ts.c b/drivers/input/touchscreen/auo-pixcir-ts.c
index 7f3c9478778..40e02dd5b2f 100644
--- a/drivers/input/touchscreen/auo-pixcir-ts.c
+++ b/drivers/input/touchscreen/auo-pixcir-ts.c
@@ -417,8 +417,7 @@ static void auo_pixcir_input_close(struct input_dev *dev)
 	return;
 }
 
-#ifdef CONFIG_PM_SLEEP
-static int auo_pixcir_suspend(struct device *dev)
+static int __maybe_unused auo_pixcir_suspend(struct device *dev)
 {
 	struct i2c_client *client = to_i2c_client(dev);
 	struct auo_pixcir_ts *ts = i2c_get_clientdata(client);
@@ -450,7 +449,7 @@ unlock:
 	return ret;
 }
 
-static int auo_pixcir_resume(struct device *dev)
+static int __maybe_unused auo_pixcir_resume(struct device *dev)
 {
 	struct i2c_client *client = to_i2c_client(dev);
 	struct auo_pixcir_ts *ts = i2c_get_clientdata(client);
@@ -479,7 +478,6 @@ unlock:
 
 	return ret;
 }
-#endif
 
 static SIMPLE_DEV_PM_OPS(auo_pixcir_pm_ops,
 			 auo_pixcir_suspend, auo_pixcir_resume);
diff --git a/drivers/input/touchscreen/cy8ctmg110_ts.c b/drivers/input/touchscreen/cy8ctmg110_ts.c
index 5bf1aeeea82..f2119ee0e21 100644
--- a/drivers/input/touchscreen/cy8ctmg110_ts.c
+++ b/drivers/input/touchscreen/cy8ctmg110_ts.c
@@ -291,8 +291,7 @@ err_free_mem:
 	return err;
 }
 
-#ifdef CONFIG_PM_SLEEP
-static int cy8ctmg110_suspend(struct device *dev)
+static int __maybe_unused cy8ctmg110_suspend(struct device *dev)
 {
 	struct i2c_client *client = to_i2c_client(dev);
 	struct cy8ctmg110 *ts = i2c_get_clientdata(client);
@@ -306,7 +305,7 @@ static int cy8ctmg110_suspend(struct device *dev)
 	return 0;
 }
 
-static int cy8ctmg110_resume(struct device *dev)
+static int __maybe_unused cy8ctmg110_resume(struct device *dev)
 {
 	struct i2c_client *client = to_i2c_client(dev);
 	struct cy8ctmg110 *ts = i2c_get_clientdata(client);
@@ -319,7 +318,6 @@ static int cy8ctmg110_resume(struct device *dev)
 	}
 	return 0;
 }
-#endif
 
 static SIMPLE_DEV_PM_OPS(cy8ctmg110_pm, cy8ctmg110_suspend, cy8ctmg110_resume);
 
diff --git a/drivers/input/touchscreen/cyttsp_core.c b/drivers/input/touchscreen/cyttsp_core.c
index eee656f77a2..5b74e8b84e7 100644
--- a/drivers/input/touchscreen/cyttsp_core.c
+++ b/drivers/input/touchscreen/cyttsp_core.c
@@ -472,8 +472,7 @@ static int cyttsp_disable(struct cyttsp *ts)
 	return 0;
 }
 
-#ifdef CONFIG_PM_SLEEP
-static int cyttsp_suspend(struct device *dev)
+static int __maybe_unused cyttsp_suspend(struct device *dev)
 {
 	struct cyttsp *ts = dev_get_drvdata(dev);
 	int retval = 0;
@@ -491,7 +490,7 @@ static int cyttsp_suspend(struct device *dev)
 	return retval;
 }
 
-static int cyttsp_resume(struct device *dev)
+static int __maybe_unused cyttsp_resume(struct device *dev)
 {
 	struct cyttsp *ts = dev_get_drvdata(dev);
 
@@ -507,8 +506,6 @@ static int cyttsp_resume(struct device *dev)
 	return 0;
 }
 
-#endif
-
 SIMPLE_DEV_PM_OPS(cyttsp_pm_ops, cyttsp_suspend, cyttsp_resume);
 EXPORT_SYMBOL_GPL(cyttsp_pm_ops);
 
diff --git a/drivers/input/touchscreen/edt-ft5x06.c b/drivers/input/touchscreen/edt-ft5x06.c
index ee3434f1e94..3793fcc7e5d 100644
--- a/drivers/input/touchscreen/edt-ft5x06.c
+++ b/drivers/input/touchscreen/edt-ft5x06.c
@@ -1092,8 +1092,7 @@ static int edt_ft5x06_ts_remove(struct i2c_client *client)
 	return 0;
 }
 
-#ifdef CONFIG_PM_SLEEP
-static int edt_ft5x06_ts_suspend(struct device *dev)
+static int __maybe_unused edt_ft5x06_ts_suspend(struct device *dev)
 {
 	struct i2c_client *client = to_i2c_client(dev);
 
@@ -1103,7 +1102,7 @@ static int edt_ft5x06_ts_suspend(struct device *dev)
 	return 0;
 }
 
-static int edt_ft5x06_ts_resume(struct device *dev)
+static int __maybe_unused edt_ft5x06_ts_resume(struct device *dev)
 {
 	struct i2c_client *client = to_i2c_client(dev);
 
@@ -1112,7 +1111,6 @@ static int edt_ft5x06_ts_resume(struct device *dev)
 
 	return 0;
 }
-#endif
 
 static SIMPLE_DEV_PM_OPS(edt_ft5x06_ts_pm_ops,
 			 edt_ft5x06_ts_suspend, edt_ft5x06_ts_resume);
diff --git a/drivers/input/touchscreen/eeti_ts.c b/drivers/input/touchscreen/eeti_ts.c
index b1884ddd7a8..09be6ced715 100644
--- a/drivers/input/touchscreen/eeti_ts.c
+++ b/drivers/input/touchscreen/eeti_ts.c
@@ -264,8 +264,7 @@ static int eeti_ts_remove(struct i2c_client *client)
 	return 0;
 }
 
-#ifdef CONFIG_PM_SLEEP
-static int eeti_ts_suspend(struct device *dev)
+static int __maybe_unused eeti_ts_suspend(struct device *dev)
 {
 	struct i2c_client *client = to_i2c_client(dev);
 	struct eeti_ts_priv *priv = i2c_get_clientdata(client);
@@ -284,7 +283,7 @@ static int eeti_ts_suspend(struct device *dev)
 	return 0;
 }
 
-static int eeti_ts_resume(struct device *dev)
+static int __maybe_unused eeti_ts_resume(struct device *dev)
 {
 	struct i2c_client *client = to_i2c_client(dev);
 	struct eeti_ts_priv *priv = i2c_get_clientdata(client);
@@ -302,7 +301,6 @@ static int eeti_ts_resume(struct device *dev)
 
 	return 0;
 }
-#endif
 
 static SIMPLE_DEV_PM_OPS(eeti_ts_pm, eeti_ts_suspend, eeti_ts_resume);
 
diff --git a/drivers/input/touchscreen/egalax_ts.c b/drivers/input/touchscreen/egalax_ts.c
index c8057847d71..4c56299284e 100644
--- a/drivers/input/touchscreen/egalax_ts.c
+++ b/drivers/input/touchscreen/egalax_ts.c
@@ -239,8 +239,7 @@ static const struct i2c_device_id egalax_ts_id[] = {
 };
 MODULE_DEVICE_TABLE(i2c, egalax_ts_id);
 
-#ifdef CONFIG_PM_SLEEP
-static int egalax_ts_suspend(struct device *dev)
+static int __maybe_unused egalax_ts_suspend(struct device *dev)
 {
 	static const u8 suspend_cmd[MAX_I2C_DATA_LEN] = {
 		0x3, 0x6, 0xa, 0x3, 0x36, 0x3f, 0x2, 0, 0, 0
@@ -252,13 +251,12 @@ static int egalax_ts_suspend(struct device *dev)
 	return ret > 0 ? 0 : ret;
 }
 
-static int egalax_ts_resume(struct device *dev)
+static int __maybe_unused egalax_ts_resume(struct device *dev)
 {
 	struct i2c_client *client = to_i2c_client(dev);
 
 	return egalax_wake_up_device(client);
 }
-#endif
 
 static SIMPLE_DEV_PM_OPS(egalax_ts_pm_ops, egalax_ts_suspend, egalax_ts_resume);
 
diff --git a/drivers/input/touchscreen/elants_i2c.c b/drivers/input/touchscreen/elants_i2c.c
new file mode 100644
index 00000000000..a510f7ef9b6
--- /dev/null
+++ b/drivers/input/touchscreen/elants_i2c.c
@@ -0,0 +1,1271 @@
+/*
+ * Elan Microelectronics touch panels with I2C interface
+ *
+ * Copyright (C) 2014 Elan Microelectronics Corporation.
+ * Scott Liu <scott.liu@emc.com.tw>
+ *
+ * This code is partly based on hid-multitouch.c:
+ *
+ *  Copyright (c) 2010-2012 Stephane Chatty <chatty@enac.fr>
+ *  Copyright (c) 2010-2012 Benjamin Tissoires <benjamin.tissoires@gmail.com>
+ *  Copyright (c) 2010-2012 Ecole Nationale de l'Aviation Civile, France
+ *
+ *
+ * This code is partly based on i2c-hid.c:
+ *
+ * Copyright (c) 2012 Benjamin Tissoires <benjamin.tissoires@gmail.com>
+ * Copyright (c) 2012 Ecole Nationale de l'Aviation Civile, France
+ * Copyright (c) 2012 Red Hat, Inc
+ */
+
+/*
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ */
+
+#include <linux/module.h>
+#include <linux/input.h>
+#include <linux/interrupt.h>
+#include <linux/platform_device.h>
+#include <linux/async.h>
+#include <linux/i2c.h>
+#include <linux/delay.h>
+#include <linux/uaccess.h>
+#include <linux/buffer_head.h>
+#include <linux/version.h>
+#include <linux/slab.h>
+#include <linux/firmware.h>
+#include <linux/version.h>
+#include <linux/input/mt.h>
+#include <linux/acpi.h>
+#include <linux/of.h>
+#include <asm/unaligned.h>
+
+/* Device, Driver information */
+#define DEVICE_NAME	"elants_i2c"
+#define DRV_VERSION	"1.0.9"
+
+/* Convert from rows or columns into resolution */
+#define ELAN_TS_RESOLUTION(n, m)   (((n) - 1) * (m))
+
+/* FW header data */
+#define HEADER_SIZE		4
+#define FW_HDR_TYPE		0
+#define FW_HDR_COUNT		1
+#define FW_HDR_LENGTH		2
+
+/* Buffer mode Queue Header information */
+#define QUEUE_HEADER_SINGLE	0x62
+#define QUEUE_HEADER_NORMAL	0X63
+#define QUEUE_HEADER_WAIT	0x64
+
+/* Command header definition */
+#define CMD_HEADER_WRITE	0x54
+#define CMD_HEADER_READ		0x53
+#define CMD_HEADER_6B_READ	0x5B
+#define CMD_HEADER_RESP		0x52
+#define CMD_HEADER_6B_RESP	0x9B
+#define CMD_HEADER_HELLO	0x55
+#define CMD_HEADER_REK		0x66
+
+/* FW position data */
+#define PACKET_SIZE		55
+#define MAX_CONTACT_NUM		10
+#define FW_POS_HEADER		0
+#define FW_POS_STATE		1
+#define FW_POS_TOTAL		2
+#define FW_POS_XY		3
+#define FW_POS_CHECKSUM		34
+#define FW_POS_WIDTH		35
+#define FW_POS_PRESSURE		45
+
+#define HEADER_REPORT_10_FINGER	0x62
+
+/* Header (4 bytes) plus 3 fill 10-finger packets */
+#define MAX_PACKET_SIZE		169
+
+#define BOOT_TIME_DELAY_MS	50
+
+/* FW read command, 0x53 0x?? 0x0, 0x01 */
+#define E_ELAN_INFO_FW_VER	0x00
+#define E_ELAN_INFO_BC_VER	0x10
+#define E_ELAN_INFO_TEST_VER	0xE0
+#define E_ELAN_INFO_FW_ID	0xF0
+#define E_INFO_OSR		0xD6
+#define E_INFO_PHY_SCAN		0xD7
+#define E_INFO_PHY_DRIVER	0xD8
+
+#define MAX_RETRIES		3
+#define MAX_FW_UPDATE_RETRIES	30
+
+#define ELAN_FW_PAGESIZE	132
+#define ELAN_FW_FILENAME	"elants_i2c.bin"
+
+/* calibration timeout definition */
+#define ELAN_CALI_TIMEOUT_MSEC	10000
+
+enum elants_state {
+	ELAN_STATE_NORMAL,
+	ELAN_WAIT_QUEUE_HEADER,
+	ELAN_WAIT_RECALIBRATION,
+};
+
+enum elants_iap_mode {
+	ELAN_IAP_OPERATIONAL,
+	ELAN_IAP_RECOVERY,
+};
+
+/* struct elants_data - represents state of Elan touchscreen device */
+struct elants_data {
+	struct i2c_client *client;
+	struct input_dev *input;
+
+	u16 fw_version;
+	u8 test_version;
+	u8 solution_version;
+	u8 bc_version;
+	u8 iap_version;
+	u16 hw_version;
+	unsigned int x_res;	/* resolution in units/mm */
+	unsigned int y_res;
+	unsigned int x_max;
+	unsigned int y_max;
+
+	enum elants_state state;
+	enum elants_iap_mode iap_mode;
+
+	/* Guards against concurrent access to the device via sysfs */
+	struct mutex sysfs_mutex;
+
+	u8 cmd_resp[HEADER_SIZE];
+	struct completion cmd_done;
+
+	u8 buf[MAX_PACKET_SIZE];
+
+	bool wake_irq_enabled;
+};
+
+static int elants_i2c_send(struct i2c_client *client,
+			   const void *data, size_t size)
+{
+	int ret;
+
+	ret = i2c_master_send(client, data, size);
+	if (ret == size)
+		return 0;
+
+	if (ret >= 0)
+		ret = -EIO;
+
+	dev_err(&client->dev, "%s failed (%*ph): %d\n",
+		__func__, (int)size, data, ret);
+
+	return ret;
+}
+
+static int elants_i2c_read(struct i2c_client *client, void *data, size_t size)
+{
+	int ret;
+
+	ret = i2c_master_recv(client, data, size);
+	if (ret == size)
+		return 0;
+
+	if (ret >= 0)
+		ret = -EIO;
+
+	dev_err(&client->dev, "%s failed: %d\n", __func__, ret);
+
+	return ret;
+}
+
+static int elants_i2c_execute_command(struct i2c_client *client,
+				      const u8 *cmd, size_t cmd_size,
+				      u8 *resp, size_t resp_size)
+{
+	struct i2c_msg msgs[2];
+	int ret;
+	u8 expected_response;
+
+	switch (cmd[0]) {
+	case CMD_HEADER_READ:
+		expected_response = CMD_HEADER_RESP;
+		break;
+
+	case CMD_HEADER_6B_READ:
+		expected_response = CMD_HEADER_6B_RESP;
+		break;
+
+	default:
+		dev_err(&client->dev, "%s: invalid command %*ph\n",
+			__func__, (int)cmd_size, cmd);
+		return -EINVAL;
+	}
+
+	msgs[0].addr = client->addr;
+	msgs[0].flags = client->flags & I2C_M_TEN;
+	msgs[0].len = cmd_size;
+	msgs[0].buf = (u8 *)cmd;
+
+	msgs[1].addr = client->addr;
+	msgs[1].flags = client->flags & I2C_M_TEN;
+	msgs[1].flags |= I2C_M_RD;
+	msgs[1].len = resp_size;
+	msgs[1].buf = resp;
+
+	ret = i2c_transfer(client->adapter, msgs, ARRAY_SIZE(msgs));
+	if (ret < 0)
+		return ret;
+
+	if (ret != ARRAY_SIZE(msgs) || resp[FW_HDR_TYPE] != expected_response)
+		return -EIO;
+
+	return 0;
+}
+
+static int elants_i2c_calibrate(struct elants_data *ts)
+{
+	struct i2c_client *client = ts->client;
+	int ret, error;
+	static const u8 w_flashkey[] = { 0x54, 0xC0, 0xE1, 0x5A };
+	static const u8 rek[] = { 0x54, 0x29, 0x00, 0x01 };
+	static const u8 rek_resp[] = { CMD_HEADER_REK, 0x66, 0x66, 0x66 };
+
+	disable_irq(client->irq);
+
+	ts->state = ELAN_WAIT_RECALIBRATION;
+	reinit_completion(&ts->cmd_done);
+
+	elants_i2c_send(client, w_flashkey, sizeof(w_flashkey));
+	elants_i2c_send(client, rek, sizeof(rek));
+
+	enable_irq(client->irq);
+
+	ret = wait_for_completion_interruptible_timeout(&ts->cmd_done,
+				msecs_to_jiffies(ELAN_CALI_TIMEOUT_MSEC));
+
+	ts->state = ELAN_STATE_NORMAL;
+
+	if (ret <= 0) {
+		error = ret < 0 ? ret : -ETIMEDOUT;
+		dev_err(&client->dev,
+			"error while waiting for calibration to complete: %d\n",
+			error);
+		return error;
+	}
+
+	if (memcmp(rek_resp, ts->cmd_resp, sizeof(rek_resp))) {
+		dev_err(&client->dev,
+			"unexpected calibration response: %*ph\n",
+			(int)sizeof(ts->cmd_resp), ts->cmd_resp);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int elants_i2c_sw_reset(struct i2c_client *client)
+{
+	const u8 soft_rst_cmd[] = { 0x77, 0x77, 0x77, 0x77 };
+	int error;
+
+	error = elants_i2c_send(client, soft_rst_cmd,
+				sizeof(soft_rst_cmd));
+	if (error) {
+		dev_err(&client->dev, "software reset failed: %d\n", error);
+		return error;
+	}
+
+	/*
+	 * We should wait at least 10 msec (but no more than 40) before
+	 * sending fastboot or IAP command to the device.
+	 */
+	msleep(30);
+
+	return 0;
+}
+
+static u16 elants_i2c_parse_version(u8 *buf)
+{
+	return get_unaligned_be32(buf) >> 4;
+}
+
+static int elants_i2c_query_fw_id(struct elants_data *ts)
+{
+	struct i2c_client *client = ts->client;
+	int error, retry_cnt;
+	const u8 cmd[] = { CMD_HEADER_READ, E_ELAN_INFO_FW_ID, 0x00, 0x01 };
+	u8 resp[HEADER_SIZE];
+
+	for (retry_cnt = 0; retry_cnt < MAX_RETRIES; retry_cnt++) {
+		error = elants_i2c_execute_command(client, cmd, sizeof(cmd),
+						   resp, sizeof(resp));
+		if (!error) {
+			ts->hw_version = elants_i2c_parse_version(resp);
+			if (ts->hw_version != 0xffff)
+				return 0;
+		}
+
+		dev_dbg(&client->dev, "read fw id error=%d, buf=%*phC\n",
+			error, (int)sizeof(resp), resp);
+	}
+
+	dev_err(&client->dev,
+		"Failed to read fw id or fw id is invalid\n");
+
+	return -EINVAL;
+}
+
+static int elants_i2c_query_fw_version(struct elants_data *ts)
+{
+	struct i2c_client *client = ts->client;
+	int error, retry_cnt;
+	const u8 cmd[] = { CMD_HEADER_READ, E_ELAN_INFO_FW_VER, 0x00, 0x01 };
+	u8 resp[HEADER_SIZE];
+
+	for (retry_cnt = 0; retry_cnt < MAX_RETRIES; retry_cnt++) {
+		error = elants_i2c_execute_command(client, cmd, sizeof(cmd),
+						   resp, sizeof(resp));
+		if (!error) {
+			ts->fw_version = elants_i2c_parse_version(resp);
+			if (ts->fw_version != 0x0000 &&
+			    ts->fw_version != 0xffff)
+				return 0;
+		}
+
+		dev_dbg(&client->dev, "read fw version error=%d, buf=%*phC\n",
+			error, (int)sizeof(resp), resp);
+	}
+
+	dev_err(&client->dev,
+		"Failed to read fw version or fw version is invalid\n");
+
+	return -EINVAL;
+}
+
+static int elants_i2c_query_test_version(struct elants_data *ts)
+{
+	struct i2c_client *client = ts->client;
+	int error, retry_cnt;
+	u16 version;
+	const u8 cmd[] = { CMD_HEADER_READ, E_ELAN_INFO_TEST_VER, 0x00, 0x01 };
+	u8 resp[HEADER_SIZE];
+
+	for (retry_cnt = 0; retry_cnt < MAX_RETRIES; retry_cnt++) {
+		error = elants_i2c_execute_command(client, cmd, sizeof(cmd),
+						   resp, sizeof(resp));
+		if (!error) {
+			version = elants_i2c_parse_version(resp);
+			ts->test_version = version >> 8;
+			ts->solution_version = version & 0xff;
+
+			return 0;
+		}
+
+		dev_dbg(&client->dev,
+			"read test version error rc=%d, buf=%*phC\n",
+			error, (int)sizeof(resp), resp);
+	}
+
+	dev_err(&client->dev, "Failed to read test version\n");
+
+	return -EINVAL;
+}
+
+static int elants_i2c_query_bc_version(struct elants_data *ts)
+{
+	struct i2c_client *client = ts->client;
+	const u8 cmd[] = { CMD_HEADER_READ, E_ELAN_INFO_BC_VER, 0x00, 0x01 };
+	u8 resp[HEADER_SIZE];
+	u16 version;
+	int error;
+
+	error = elants_i2c_execute_command(client, cmd, sizeof(cmd),
+					   resp, sizeof(resp));
+	if (error) {
+		dev_err(&client->dev,
+			"read BC version error=%d, buf=%*phC\n",
+			error, (int)sizeof(resp), resp);
+		return error;
+	}
+
+	version = elants_i2c_parse_version(resp);
+	ts->bc_version = version >> 8;
+	ts->iap_version = version & 0xff;
+
+	return 0;
+}
+
+static int elants_i2c_query_ts_info(struct elants_data *ts)
+{
+	struct i2c_client *client = ts->client;
+	int error;
+	u8 resp[17];
+	u16 phy_x, phy_y, rows, cols, osr;
+	const u8 get_resolution_cmd[] = {
+		CMD_HEADER_6B_READ, 0x00, 0x00, 0x00, 0x00, 0x00
+	};
+	const u8 get_osr_cmd[] = {
+		CMD_HEADER_READ, E_INFO_OSR, 0x00, 0x01
+	};
+	const u8 get_physical_scan_cmd[] = {
+		CMD_HEADER_READ, E_INFO_PHY_SCAN, 0x00, 0x01
+	};
+	const u8 get_physical_drive_cmd[] = {
+		CMD_HEADER_READ, E_INFO_PHY_DRIVER, 0x00, 0x01
+	};
+
+	/* Get trace number */
+	error = elants_i2c_execute_command(client,
+					   get_resolution_cmd,
+					   sizeof(get_resolution_cmd),
+					   resp, sizeof(resp));
+	if (error) {
+		dev_err(&client->dev, "get resolution command failed: %d\n",
+			error);
+		return error;
+	}
+
+	rows = resp[2] + resp[6] + resp[10];
+	cols = resp[3] + resp[7] + resp[11];
+
+	/* Process mm_to_pixel information */
+	error = elants_i2c_execute_command(client,
+					   get_osr_cmd, sizeof(get_osr_cmd),
+					   resp, sizeof(resp));
+	if (error) {
+		dev_err(&client->dev, "get osr command failed: %d\n",
+			error);
+		return error;
+	}
+
+	osr = resp[3];
+
+	error = elants_i2c_execute_command(client,
+					   get_physical_scan_cmd,
+					   sizeof(get_physical_scan_cmd),
+					   resp, sizeof(resp));
+	if (error) {
+		dev_err(&client->dev, "get physical scan command failed: %d\n",
+			error);
+		return error;
+	}
+
+	phy_x = get_unaligned_be16(&resp[2]);
+
+	error = elants_i2c_execute_command(client,
+					   get_physical_drive_cmd,
+					   sizeof(get_physical_drive_cmd),
+					   resp, sizeof(resp));
+	if (error) {
+		dev_err(&client->dev, "get physical drive command failed: %d\n",
+			error);
+		return error;
+	}
+
+	phy_y = get_unaligned_be16(&resp[2]);
+
+	dev_dbg(&client->dev, "phy_x=%d, phy_y=%d\n", phy_x, phy_y);
+
+	if (rows == 0 || cols == 0 || osr == 0) {
+		dev_warn(&client->dev,
+			 "invalid trace number data: %d, %d, %d\n",
+			 rows, cols, osr);
+	} else {
+		/* translate trace number to TS resolution */
+		ts->x_max = ELAN_TS_RESOLUTION(rows, osr);
+		ts->x_res = DIV_ROUND_CLOSEST(ts->x_max, phy_x);
+		ts->y_max = ELAN_TS_RESOLUTION(cols, osr);
+		ts->y_res = DIV_ROUND_CLOSEST(ts->y_max, phy_y);
+	}
+
+	return 0;
+}
+
+static int elants_i2c_fastboot(struct i2c_client *client)
+{
+	const u8 boot_cmd[] = { 0x4D, 0x61, 0x69, 0x6E };
+	int error;
+
+	error = elants_i2c_send(client, boot_cmd, sizeof(boot_cmd));
+	if (error) {
+		dev_err(&client->dev, "boot failed: %d\n", error);
+		return error;
+	}
+
+	dev_dbg(&client->dev, "boot success -- 0x%x\n", client->addr);
+	return 0;
+}
+
+static int elants_i2c_initialize(struct elants_data *ts)
+{
+	struct i2c_client *client = ts->client;
+	int error, retry_cnt;
+	const u8 hello_packet[] = { 0x55, 0x55, 0x55, 0x55 };
+	const u8 recov_packet[] = { 0x55, 0x55, 0x80, 0x80 };
+	u8 buf[HEADER_SIZE];
+
+	for (retry_cnt = 0; retry_cnt < MAX_RETRIES; retry_cnt++) {
+		error = elants_i2c_sw_reset(client);
+		if (error) {
+			/* Continue initializing if it's the last try */
+			if (retry_cnt < MAX_RETRIES - 1)
+				continue;
+		}
+
+		error = elants_i2c_fastboot(client);
+		if (error) {
+			/* Continue initializing if it's the last try */
+			if (retry_cnt < MAX_RETRIES - 1)
+				continue;
+		}
+
+		/* Wait for Hello packet */
+		msleep(BOOT_TIME_DELAY_MS);
+
+		error = elants_i2c_read(client, buf, sizeof(buf));
+		if (error) {
+			dev_err(&client->dev,
+				"failed to read 'hello' packet: %d\n", error);
+		} else if (!memcmp(buf, hello_packet, sizeof(hello_packet))) {
+			ts->iap_mode = ELAN_IAP_OPERATIONAL;
+			break;
+		} else if (!memcmp(buf, recov_packet, sizeof(recov_packet))) {
+			/*
+			 * Setting error code will mark device
+			 * in recovery mode below.
+			 */
+			error = -EIO;
+			break;
+		} else {
+			error = -EINVAL;
+			dev_err(&client->dev,
+				"invalid 'hello' packet: %*ph\n",
+				(int)sizeof(buf), buf);
+		}
+	}
+
+	if (!error)
+		error = elants_i2c_query_fw_id(ts);
+	if (!error)
+		error = elants_i2c_query_fw_version(ts);
+
+	if (error) {
+		ts->iap_mode = ELAN_IAP_RECOVERY;
+	} else {
+		elants_i2c_query_test_version(ts);
+		elants_i2c_query_bc_version(ts);
+		elants_i2c_query_ts_info(ts);
+	}
+
+	return 0;
+}
+
+/*
+ * Firmware update interface.
+ */
+
+static int elants_i2c_fw_write_page(struct i2c_client *client,
+				    const void *page)
+{
+	const u8 ack_ok[] = { 0xaa, 0xaa };
+	u8 buf[2];
+	int retry;
+	int error;
+
+	for (retry = 0; retry < MAX_FW_UPDATE_RETRIES; retry++) {
+		error = elants_i2c_send(client, page, ELAN_FW_PAGESIZE);
+		if (error) {
+			dev_err(&client->dev,
+				"IAP Write Page failed: %d\n", error);
+			continue;
+		}
+
+		error = elants_i2c_read(client, buf, 2);
+		if (error) {
+			dev_err(&client->dev,
+				"IAP Ack read failed: %d\n", error);
+			return error;
+		}
+
+		if (!memcmp(buf, ack_ok, sizeof(ack_ok)))
+			return 0;
+
+		error = -EIO;
+		dev_err(&client->dev,
+			"IAP Get Ack Error [%02x:%02x]\n",
+			buf[0], buf[1]);
+	}
+
+	return error;
+}
+
+static int elants_i2c_do_update_firmware(struct i2c_client *client,
+					 const struct firmware *fw,
+					 bool force)
+{
+	const u8 enter_iap[] = { 0x45, 0x49, 0x41, 0x50 };
+	const u8 enter_iap2[] = { 0x54, 0x00, 0x12, 0x34 };
+	const u8 iap_ack[] = { 0x55, 0xaa, 0x33, 0xcc };
+	u8 buf[HEADER_SIZE];
+	u16 send_id;
+	int page, n_fw_pages;
+	int error;
+
+	/* Recovery mode detection! */
+	if (force) {
+		dev_dbg(&client->dev, "Recovery mode procedure\n");
+		error = elants_i2c_send(client, enter_iap2, sizeof(enter_iap2));
+	} else {
+		/* Start IAP Procedure */
+		dev_dbg(&client->dev, "Normal IAP procedure\n");
+		elants_i2c_sw_reset(client);
+
+		error = elants_i2c_send(client, enter_iap, sizeof(enter_iap));
+	}
+
+	if (error) {
+		dev_err(&client->dev, "failed to enter IAP mode: %d\n", error);
+		return error;
+	}
+
+	msleep(20);
+
+	/* check IAP state */
+	error = elants_i2c_read(client, buf, 4);
+	if (error) {
+		dev_err(&client->dev,
+			"failed to read IAP acknowledgement: %d\n",
+			error);
+		return error;
+	}
+
+	if (memcmp(buf, iap_ack, sizeof(iap_ack))) {
+		dev_err(&client->dev,
+			"failed to enter IAP: %*ph (expected %*ph)\n",
+			(int)sizeof(buf), buf, (int)sizeof(iap_ack), iap_ack);
+		return -EIO;
+	}
+
+	dev_info(&client->dev, "successfully entered IAP mode");
+
+	send_id = client->addr;
+	error = elants_i2c_send(client, &send_id, 1);
+	if (error) {
+		dev_err(&client->dev, "sending dummy byte failed: %d\n",
+			error);
+		return error;
+	}
+
+	/* Clear the last page of Master */
+	error = elants_i2c_send(client, fw->data, ELAN_FW_PAGESIZE);
+	if (error) {
+		dev_err(&client->dev, "clearing of the last page failed: %d\n",
+			error);
+		return error;
+	}
+
+	error = elants_i2c_read(client, buf, 2);
+	if (error) {
+		dev_err(&client->dev,
+			"failed to read ACK for clearing the last page: %d\n",
+			error);
+		return error;
+	}
+
+	n_fw_pages = fw->size / ELAN_FW_PAGESIZE;
+	dev_dbg(&client->dev, "IAP Pages = %d\n", n_fw_pages);
+
+	for (page = 0; page < n_fw_pages; page++) {
+		error = elants_i2c_fw_write_page(client,
+					fw->data + page * ELAN_FW_PAGESIZE);
+		if (error) {
+			dev_err(&client->dev,
+				"failed to write FW page %d: %d\n",
+				page, error);
+			return error;
+		}
+	}
+
+	/* Old iap needs to wait 200ms for WDT and rest is for hello packets */
+	msleep(300);
+
+	dev_info(&client->dev, "firmware update completed\n");
+	return 0;
+}
+
+static int elants_i2c_fw_update(struct elants_data *ts)
+{
+	struct i2c_client *client = ts->client;
+	const struct firmware *fw;
+	int error;
+
+	error = request_firmware(&fw, ELAN_FW_FILENAME, &client->dev);
+	if (error) {
+		dev_err(&client->dev, "failed to request firmware %s: %d\n",
+			ELAN_FW_FILENAME, error);
+		return error;
+	}
+
+	if (fw->size % ELAN_FW_PAGESIZE) {
+		dev_err(&client->dev, "invalid firmware length: %zu\n",
+			fw->size);
+		error = -EINVAL;
+		goto out;
+	}
+
+	disable_irq(client->irq);
+
+	error = elants_i2c_do_update_firmware(client, fw,
+					ts->iap_mode == ELAN_IAP_RECOVERY);
+	if (error) {
+		dev_err(&client->dev, "firmware update failed: %d\n", error);
+		ts->iap_mode = ELAN_IAP_RECOVERY;
+		goto out_enable_irq;
+	}
+
+	error = elants_i2c_initialize(ts);
+	if (error) {
+		dev_err(&client->dev,
+			"failed to initialize device after firmware update: %d\n",
+			error);
+		ts->iap_mode = ELAN_IAP_RECOVERY;
+		goto out_enable_irq;
+	}
+
+	ts->iap_mode = ELAN_IAP_OPERATIONAL;
+
+out_enable_irq:
+	ts->state = ELAN_STATE_NORMAL;
+	enable_irq(client->irq);
+	msleep(100);
+
+	if (!error)
+		elants_i2c_calibrate(ts);
+out:
+	release_firmware(fw);
+	return error;
+}
+
+/*
+ * Event reporting.
+ */
+
+static void elants_i2c_mt_event(struct elants_data *ts, u8 *buf)
+{
+	struct input_dev *input = ts->input;
+	unsigned int n_fingers;
+	u16 finger_state;
+	int i;
+
+	n_fingers = buf[FW_POS_STATE + 1] & 0x0f;
+	finger_state = ((buf[FW_POS_STATE + 1] & 0x30) << 4) |
+			buf[FW_POS_STATE];
+
+	dev_dbg(&ts->client->dev,
+		"n_fingers: %u, state: %04x\n",  n_fingers, finger_state);
+
+	for (i = 0; i < MAX_CONTACT_NUM && n_fingers; i++) {
+		if (finger_state & 1) {
+			unsigned int x, y, p, w;
+			u8 *pos;
+
+			pos = &buf[FW_POS_XY + i * 3];
+			x = (((u16)pos[0] & 0xf0) << 4) | pos[1];
+			y = (((u16)pos[0] & 0x0f) << 8) | pos[2];
+			p = buf[FW_POS_PRESSURE + i];
+			w = buf[FW_POS_WIDTH + i];
+
+			dev_dbg(&ts->client->dev, "i=%d x=%d y=%d p=%d w=%d\n",
+				i, x, y, p, w);
+
+			input_mt_slot(input, i);
+			input_mt_report_slot_state(input, MT_TOOL_FINGER, true);
+			input_event(input, EV_ABS, ABS_MT_POSITION_X, x);
+			input_event(input, EV_ABS, ABS_MT_POSITION_Y, y);
+			input_event(input, EV_ABS, ABS_MT_PRESSURE, p);
+			input_event(input, EV_ABS, ABS_MT_TOUCH_MAJOR, w);
+
+			n_fingers--;
+		}
+
+		finger_state >>= 1;
+	}
+
+	input_mt_sync_frame(input);
+	input_sync(input);
+}
+
+static u8 elants_i2c_calculate_checksum(u8 *buf)
+{
+	u8 checksum = 0;
+	u8 i;
+
+	for (i = 0; i < FW_POS_CHECKSUM; i++)
+		checksum += buf[i];
+
+	return checksum;
+}
+
+static void elants_i2c_event(struct elants_data *ts, u8 *buf)
+{
+	u8 checksum = elants_i2c_calculate_checksum(buf);
+
+	if (unlikely(buf[FW_POS_CHECKSUM] != checksum))
+		dev_warn(&ts->client->dev,
+			 "%s: invalid checksum for packet %02x: %02x vs. %02x\n",
+			 __func__, buf[FW_POS_HEADER],
+			 checksum, buf[FW_POS_CHECKSUM]);
+	else if (unlikely(buf[FW_POS_HEADER] != HEADER_REPORT_10_FINGER))
+		dev_warn(&ts->client->dev,
+			 "%s: unknown packet type: %02x\n",
+			 __func__, buf[FW_POS_HEADER]);
+	else
+		elants_i2c_mt_event(ts, buf);
+}
+
+static irqreturn_t elants_i2c_irq(int irq, void *_dev)
+{
+	const u8 wait_packet[] = { 0x64, 0x64, 0x64, 0x64 };
+	struct elants_data *ts = _dev;
+	struct i2c_client *client = ts->client;
+	int report_count, report_len;
+	int i;
+	int len;
+
+	len = i2c_master_recv(client, ts->buf, sizeof(ts->buf));
+	if (len < 0) {
+		dev_err(&client->dev, "%s: failed to read data: %d\n",
+			__func__, len);
+		goto out;
+	}
+
+	dev_dbg(&client->dev, "%s: packet %*ph\n",
+		__func__, HEADER_SIZE, ts->buf);
+
+	switch (ts->state) {
+	case ELAN_WAIT_RECALIBRATION:
+		if (ts->buf[FW_HDR_TYPE] == CMD_HEADER_REK) {
+			memcpy(ts->cmd_resp, ts->buf, sizeof(ts->cmd_resp));
+			complete(&ts->cmd_done);
+			ts->state = ELAN_STATE_NORMAL;
+		}
+		break;
+
+	case ELAN_WAIT_QUEUE_HEADER:
+		if (ts->buf[FW_HDR_TYPE] != QUEUE_HEADER_NORMAL)
+			break;
+
+		ts->state = ELAN_STATE_NORMAL;
+		/* fall through */
+
+	case ELAN_STATE_NORMAL:
+
+		switch (ts->buf[FW_HDR_TYPE]) {
+		case CMD_HEADER_HELLO:
+		case CMD_HEADER_RESP:
+		case CMD_HEADER_REK:
+			break;
+
+		case QUEUE_HEADER_WAIT:
+			if (memcmp(ts->buf, wait_packet, sizeof(wait_packet))) {
+				dev_err(&client->dev,
+					"invalid wait packet %*ph\n",
+					HEADER_SIZE, ts->buf);
+			} else {
+				ts->state = ELAN_WAIT_QUEUE_HEADER;
+				udelay(30);
+			}
+			break;
+
+		case QUEUE_HEADER_SINGLE:
+			elants_i2c_event(ts, &ts->buf[HEADER_SIZE]);
+			break;
+
+		case QUEUE_HEADER_NORMAL:
+			report_count = ts->buf[FW_HDR_COUNT];
+			if (report_count > 3) {
+				dev_err(&client->dev,
+					"too large report count: %*ph\n",
+					HEADER_SIZE, ts->buf);
+				break;
+			}
+
+			report_len = ts->buf[FW_HDR_LENGTH] / report_count;
+			if (report_len != PACKET_SIZE) {
+				dev_err(&client->dev,
+					"mismatching report length: %*ph\n",
+					HEADER_SIZE, ts->buf);
+				break;
+			}
+
+			for (i = 0; i < report_count; i++) {
+				u8 *buf = ts->buf + HEADER_SIZE +
+							i * PACKET_SIZE;
+				elants_i2c_event(ts, buf);
+			}
+			break;
+
+		default:
+			dev_err(&client->dev, "unknown packet %*ph\n",
+				HEADER_SIZE, ts->buf);
+			break;
+		}
+		break;
+	}
+
+out:
+	return IRQ_HANDLED;
+}
+
+/*
+ * sysfs interface
+ */
+static ssize_t calibrate_store(struct device *dev,
+			       struct device_attribute *attr,
+			      const char *buf, size_t count)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct elants_data *ts = i2c_get_clientdata(client);
+	int error;
+
+	error = mutex_lock_interruptible(&ts->sysfs_mutex);
+	if (error)
+		return error;
+
+	error = elants_i2c_calibrate(ts);
+
+	mutex_unlock(&ts->sysfs_mutex);
+	return error ?: count;
+}
+
+static ssize_t write_update_fw(struct device *dev,
+			       struct device_attribute *attr,
+			       const char *buf, size_t count)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct elants_data *ts = i2c_get_clientdata(client);
+	int error;
+
+	error = mutex_lock_interruptible(&ts->sysfs_mutex);
+	if (error)
+		return error;
+
+	error = elants_i2c_fw_update(ts);
+	dev_dbg(dev, "firmware update result: %d\n", error);
+
+	mutex_unlock(&ts->sysfs_mutex);
+	return error ?: count;
+}
+
+static ssize_t show_iap_mode(struct device *dev,
+			     struct device_attribute *attr, char *buf)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct elants_data *ts = i2c_get_clientdata(client);
+
+	return sprintf(buf, "%s\n",
+		       ts->iap_mode == ELAN_IAP_OPERATIONAL ?
+				"Normal" : "Recovery");
+}
+
+static DEVICE_ATTR(calibrate, S_IWUSR, NULL, calibrate_store);
+static DEVICE_ATTR(iap_mode, S_IRUGO, show_iap_mode, NULL);
+static DEVICE_ATTR(update_fw, S_IWUSR, NULL, write_update_fw);
+
+struct elants_version_attribute {
+	struct device_attribute dattr;
+	size_t field_offset;
+	size_t field_size;
+};
+
+#define __ELANTS_FIELD_SIZE(_field)					\
+	sizeof(((struct elants_data *)NULL)->_field)
+#define __ELANTS_VERIFY_SIZE(_field)					\
+	(BUILD_BUG_ON_ZERO(__ELANTS_FIELD_SIZE(_field) > 2) +		\
+	 __ELANTS_FIELD_SIZE(_field))
+#define ELANTS_VERSION_ATTR(_field)					\
+	struct elants_version_attribute elants_ver_attr_##_field = {	\
+		.dattr = __ATTR(_field, S_IRUGO,			\
+				elants_version_attribute_show, NULL),	\
+		.field_offset = offsetof(struct elants_data, _field),	\
+		.field_size = __ELANTS_VERIFY_SIZE(_field),		\
+	}
+
+static ssize_t elants_version_attribute_show(struct device *dev,
+					     struct device_attribute *dattr,
+					     char *buf)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct elants_data *ts = i2c_get_clientdata(client);
+	struct elants_version_attribute *attr =
+		container_of(dattr, struct elants_version_attribute, dattr);
+	u8 *field = (u8 *)((char *)ts + attr->field_offset);
+	unsigned int fmt_size;
+	unsigned int val;
+
+	if (attr->field_size == 1) {
+		val = *field;
+		fmt_size = 2; /* 2 HEX digits */
+	} else {
+		val = *(u16 *)field;
+		fmt_size = 4; /* 4 HEX digits */
+	}
+
+	return sprintf(buf, "%0*x\n", fmt_size, val);
+}
+
+static ELANTS_VERSION_ATTR(fw_version);
+static ELANTS_VERSION_ATTR(hw_version);
+static ELANTS_VERSION_ATTR(test_version);
+static ELANTS_VERSION_ATTR(solution_version);
+static ELANTS_VERSION_ATTR(bc_version);
+static ELANTS_VERSION_ATTR(iap_version);
+
+static struct attribute *elants_attributes[] = {
+	&dev_attr_calibrate.attr,
+	&dev_attr_update_fw.attr,
+	&dev_attr_iap_mode.attr,
+
+	&elants_ver_attr_fw_version.dattr.attr,
+	&elants_ver_attr_hw_version.dattr.attr,
+	&elants_ver_attr_test_version.dattr.attr,
+	&elants_ver_attr_solution_version.dattr.attr,
+	&elants_ver_attr_bc_version.dattr.attr,
+	&elants_ver_attr_iap_version.dattr.attr,
+	NULL
+};
+
+static struct attribute_group elants_attribute_group = {
+	.attrs = elants_attributes,
+};
+
+static void elants_i2c_remove_sysfs_group(void *_data)
+{
+	struct elants_data *ts = _data;
+
+	sysfs_remove_group(&ts->client->dev.kobj, &elants_attribute_group);
+}
+
+static int elants_i2c_probe(struct i2c_client *client,
+			    const struct i2c_device_id *id)
+{
+	union i2c_smbus_data dummy;
+	struct elants_data *ts;
+	unsigned long irqflags;
+	int error;
+
+	if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) {
+		dev_err(&client->dev,
+			"%s: i2c check functionality error\n", DEVICE_NAME);
+		return -ENXIO;
+	}
+
+	/* Make sure there is something at this address */
+	if (i2c_smbus_xfer(client->adapter, client->addr, 0,
+			I2C_SMBUS_READ, 0, I2C_SMBUS_BYTE, &dummy) < 0) {
+		dev_err(&client->dev, "nothing at this address\n");
+		return -ENXIO;
+	}
+
+	ts = devm_kzalloc(&client->dev, sizeof(struct elants_data), GFP_KERNEL);
+	if (!ts)
+		return -ENOMEM;
+
+	mutex_init(&ts->sysfs_mutex);
+	init_completion(&ts->cmd_done);
+
+	ts->client = client;
+	i2c_set_clientdata(client, ts);
+
+	error = elants_i2c_initialize(ts);
+	if (error) {
+		dev_err(&client->dev, "failed to initialize: %d\n", error);
+		return error;
+	}
+
+	ts->input = devm_input_allocate_device(&client->dev);
+	if (!ts->input) {
+		dev_err(&client->dev, "Failed to allocate input device\n");
+		return -ENOMEM;
+	}
+
+	ts->input->name = "Elan Touchscreen";
+	ts->input->id.bustype = BUS_I2C;
+
+	__set_bit(BTN_TOUCH, ts->input->keybit);
+	__set_bit(EV_ABS, ts->input->evbit);
+	__set_bit(EV_KEY, ts->input->evbit);
+
+	/* Single touch input params setup */
+	input_set_abs_params(ts->input, ABS_X, 0, ts->x_max, 0, 0);
+	input_set_abs_params(ts->input, ABS_Y, 0, ts->y_max, 0, 0);
+	input_set_abs_params(ts->input, ABS_PRESSURE, 0, 255, 0, 0);
+	input_abs_set_res(ts->input, ABS_X, ts->x_res);
+	input_abs_set_res(ts->input, ABS_Y, ts->y_res);
+
+	/* Multitouch input params setup */
+	error = input_mt_init_slots(ts->input, MAX_CONTACT_NUM,
+				    INPUT_MT_DIRECT | INPUT_MT_DROP_UNUSED);
+	if (error) {
+		dev_err(&client->dev,
+			"failed to initialize MT slots: %d\n", error);
+		return error;
+	}
+
+	input_set_abs_params(ts->input, ABS_MT_POSITION_X, 0, ts->x_max, 0, 0);
+	input_set_abs_params(ts->input, ABS_MT_POSITION_Y, 0, ts->y_max, 0, 0);
+	input_set_abs_params(ts->input, ABS_MT_TOUCH_MAJOR, 0, 255, 0, 0);
+	input_set_abs_params(ts->input, ABS_MT_PRESSURE, 0, 255, 0, 0);
+	input_abs_set_res(ts->input, ABS_MT_POSITION_X, ts->x_res);
+	input_abs_set_res(ts->input, ABS_MT_POSITION_Y, ts->y_res);
+
+	input_set_drvdata(ts->input, ts);
+
+	error = input_register_device(ts->input);
+	if (error) {
+		dev_err(&client->dev,
+			"unable to register input device: %d\n", error);
+		return error;
+	}
+
+	/*
+	 * Systems using device tree should set up interrupt via DTS,
+	 * the rest will use the default falling edge interrupts.
+	 */
+	irqflags = client->dev.of_node ? 0 : IRQF_TRIGGER_FALLING;
+
+	error = devm_request_threaded_irq(&client->dev, client->irq,
+					  NULL, elants_i2c_irq,
+					  irqflags | IRQF_ONESHOT,
+					  client->name, ts);
+	if (error) {
+		dev_err(&client->dev, "Failed to register interrupt\n");
+		return error;
+	}
+
+	/*
+	 * Systems using device tree should set up wakeup via DTS,
+	 * the rest will configure device as wakeup source by default.
+	 */
+	if (!client->dev.of_node)
+		device_init_wakeup(&client->dev, true);
+
+	error = sysfs_create_group(&client->dev.kobj, &elants_attribute_group);
+	if (error) {
+		dev_err(&client->dev, "failed to create sysfs attributes: %d\n",
+			error);
+		return error;
+	}
+
+	error = devm_add_action(&client->dev,
+				elants_i2c_remove_sysfs_group, ts);
+	if (error) {
+		elants_i2c_remove_sysfs_group(ts);
+		dev_err(&client->dev,
+			"Failed to add sysfs cleanup action: %d\n",
+			error);
+		return error;
+	}
+
+	return 0;
+}
+
+static int __maybe_unused elants_i2c_suspend(struct device *dev)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct elants_data *ts = i2c_get_clientdata(client);
+	const u8 set_sleep_cmd[] = { 0x54, 0x50, 0x00, 0x01 };
+	int retry_cnt;
+	int error;
+
+	/* Command not support in IAP recovery mode */
+	if (ts->iap_mode != ELAN_IAP_OPERATIONAL)
+		return -EBUSY;
+
+	disable_irq(client->irq);
+
+	for (retry_cnt = 0; retry_cnt < MAX_RETRIES; retry_cnt++) {
+		error = elants_i2c_send(client, set_sleep_cmd,
+					sizeof(set_sleep_cmd));
+		if (!error)
+			break;
+
+		dev_err(&client->dev, "suspend command failed: %d\n", error);
+	}
+
+	if (device_may_wakeup(dev))
+		ts->wake_irq_enabled = (enable_irq_wake(client->irq) == 0);
+
+	return 0;
+}
+
+static int __maybe_unused elants_i2c_resume(struct device *dev)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct elants_data *ts = i2c_get_clientdata(client);
+	const u8 set_active_cmd[] = { 0x54, 0x58, 0x00, 0x01 };
+	int retry_cnt;
+	int error;
+
+	if (device_may_wakeup(dev) && ts->wake_irq_enabled)
+		disable_irq_wake(client->irq);
+
+	for (retry_cnt = 0; retry_cnt < MAX_RETRIES; retry_cnt++) {
+		error = elants_i2c_send(client, set_active_cmd,
+					sizeof(set_active_cmd));
+		if (!error)
+			break;
+
+		dev_err(&client->dev, "resume command failed: %d\n", error);
+	}
+
+	ts->state = ELAN_STATE_NORMAL;
+	enable_irq(client->irq);
+
+	return 0;
+}
+
+static SIMPLE_DEV_PM_OPS(elants_i2c_pm_ops,
+			 elants_i2c_suspend, elants_i2c_resume);
+
+static const struct i2c_device_id elants_i2c_id[] = {
+	{ DEVICE_NAME, 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, elants_i2c_id);
+
+#ifdef CONFIG_ACPI
+static const struct acpi_device_id elants_acpi_id[] = {
+	{ "ELAN0001", 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(acpi, elants_acpi_id);
+#endif
+
+#ifdef CONFIG_OF
+static const struct of_device_id elants_of_match[] = {
+	{ .compatible = "elan,ekth3500" },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, elants_of_match);
+#endif
+
+static struct i2c_driver elants_i2c_driver = {
+	.probe = elants_i2c_probe,
+	.id_table = elants_i2c_id,
+	.driver = {
+		.name = DEVICE_NAME,
+		.owner = THIS_MODULE,
+		.pm = &elants_i2c_pm_ops,
+		.acpi_match_table = ACPI_PTR(elants_acpi_id),
+		.of_match_table = of_match_ptr(elants_of_match),
+	},
+};
+module_i2c_driver(elants_i2c_driver);
+
+MODULE_AUTHOR("Scott Liu <scott.liu@emc.com.tw>");
+MODULE_DESCRIPTION("Elan I2c Touchscreen driver");
+MODULE_VERSION(DRV_VERSION);
+MODULE_LICENSE("GPL");
diff --git a/drivers/input/touchscreen/goodix.c b/drivers/input/touchscreen/goodix.c
new file mode 100644
index 00000000000..ca196689f02
--- /dev/null
+++ b/drivers/input/touchscreen/goodix.c
@@ -0,0 +1,395 @@
+/*
+ *  Driver for Goodix Touchscreens
+ *
+ *  Copyright (c) 2014 Red Hat Inc.
+ *
+ *  This code is based on gt9xx.c authored by andrew@goodix.com:
+ *
+ *  2010 - 2012 Goodix Technology.
+ */
+
+/*
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; version 2 of the License.
+ */
+
+#include <linux/kernel.h>
+#include <linux/i2c.h>
+#include <linux/input.h>
+#include <linux/input/mt.h>
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/irq.h>
+#include <linux/interrupt.h>
+#include <linux/slab.h>
+#include <asm/unaligned.h>
+
+struct goodix_ts_data {
+	struct i2c_client *client;
+	struct input_dev *input_dev;
+	int abs_x_max;
+	int abs_y_max;
+	unsigned int max_touch_num;
+	unsigned int int_trigger_type;
+};
+
+#define GOODIX_MAX_HEIGHT		4096
+#define GOODIX_MAX_WIDTH		4096
+#define GOODIX_INT_TRIGGER		1
+#define GOODIX_CONTACT_SIZE		8
+#define GOODIX_MAX_CONTACTS		10
+
+#define GOODIX_CONFIG_MAX_LENGTH	240
+
+/* Register defines */
+#define GOODIX_READ_COOR_ADDR		0x814E
+#define GOODIX_REG_CONFIG_DATA		0x8047
+#define GOODIX_REG_VERSION		0x8140
+
+#define RESOLUTION_LOC		1
+#define TRIGGER_LOC		6
+
+static const unsigned long goodix_irq_flags[] = {
+	IRQ_TYPE_EDGE_RISING,
+	IRQ_TYPE_EDGE_FALLING,
+	IRQ_TYPE_LEVEL_LOW,
+	IRQ_TYPE_LEVEL_HIGH,
+};
+
+/**
+ * goodix_i2c_read - read data from a register of the i2c slave device.
+ *
+ * @client: i2c device.
+ * @reg: the register to read from.
+ * @buf: raw write data buffer.
+ * @len: length of the buffer to write
+ */
+static int goodix_i2c_read(struct i2c_client *client,
+				u16 reg, u8 *buf, int len)
+{
+	struct i2c_msg msgs[2];
+	u16 wbuf = cpu_to_be16(reg);
+	int ret;
+
+	msgs[0].flags = 0;
+	msgs[0].addr  = client->addr;
+	msgs[0].len   = 2;
+	msgs[0].buf   = (u8 *) &wbuf;
+
+	msgs[1].flags = I2C_M_RD;
+	msgs[1].addr  = client->addr;
+	msgs[1].len   = len;
+	msgs[1].buf   = buf;
+
+	ret = i2c_transfer(client->adapter, msgs, 2);
+	return ret < 0 ? ret : (ret != ARRAY_SIZE(msgs) ? -EIO : 0);
+}
+
+static int goodix_ts_read_input_report(struct goodix_ts_data *ts, u8 *data)
+{
+	int touch_num;
+	int error;
+
+	error = goodix_i2c_read(ts->client, GOODIX_READ_COOR_ADDR, data,
+				GOODIX_CONTACT_SIZE + 1);
+	if (error) {
+		dev_err(&ts->client->dev, "I2C transfer error: %d\n", error);
+		return error;
+	}
+
+	touch_num = data[0] & 0x0f;
+	if (touch_num > GOODIX_MAX_CONTACTS)
+		return -EPROTO;
+
+	if (touch_num > 1) {
+		data += 1 + GOODIX_CONTACT_SIZE;
+		error = goodix_i2c_read(ts->client,
+					GOODIX_READ_COOR_ADDR +
+						1 + GOODIX_CONTACT_SIZE,
+					data,
+					GOODIX_CONTACT_SIZE * (touch_num - 1));
+		if (error)
+			return error;
+	}
+
+	return touch_num;
+}
+
+static void goodix_ts_report_touch(struct goodix_ts_data *ts, u8 *coor_data)
+{
+	int id = coor_data[0] & 0x0F;
+	int input_x = get_unaligned_le16(&coor_data[1]);
+	int input_y = get_unaligned_le16(&coor_data[3]);
+	int input_w = get_unaligned_le16(&coor_data[5]);
+
+	input_mt_slot(ts->input_dev, id);
+	input_mt_report_slot_state(ts->input_dev, MT_TOOL_FINGER, true);
+	input_report_abs(ts->input_dev, ABS_MT_POSITION_X, input_x);
+	input_report_abs(ts->input_dev, ABS_MT_POSITION_Y, input_y);
+	input_report_abs(ts->input_dev, ABS_MT_TOUCH_MAJOR, input_w);
+	input_report_abs(ts->input_dev, ABS_MT_WIDTH_MAJOR, input_w);
+}
+
+/**
+ * goodix_process_events - Process incoming events
+ *
+ * @ts: our goodix_ts_data pointer
+ *
+ * Called when the IRQ is triggered. Read the current device state, and push
+ * the input events to the user space.
+ */
+static void goodix_process_events(struct goodix_ts_data *ts)
+{
+	u8  point_data[1 + GOODIX_CONTACT_SIZE * GOODIX_MAX_CONTACTS];
+	int touch_num;
+	int i;
+
+	touch_num = goodix_ts_read_input_report(ts, point_data);
+	if (touch_num < 0)
+		return;
+
+	for (i = 0; i < touch_num; i++)
+		goodix_ts_report_touch(ts,
+				&point_data[1 + GOODIX_CONTACT_SIZE * i]);
+
+	input_mt_sync_frame(ts->input_dev);
+	input_sync(ts->input_dev);
+}
+
+/**
+ * goodix_ts_irq_handler - The IRQ handler
+ *
+ * @irq: interrupt number.
+ * @dev_id: private data pointer.
+ */
+static irqreturn_t goodix_ts_irq_handler(int irq, void *dev_id)
+{
+	static const u8 end_cmd[] = {
+		GOODIX_READ_COOR_ADDR >> 8,
+		GOODIX_READ_COOR_ADDR & 0xff,
+		0
+	};
+	struct goodix_ts_data *ts = dev_id;
+
+	goodix_process_events(ts);
+
+	if (i2c_master_send(ts->client, end_cmd, sizeof(end_cmd)) < 0)
+		dev_err(&ts->client->dev, "I2C write end_cmd error\n");
+
+	return IRQ_HANDLED;
+}
+
+/**
+ * goodix_read_config - Read the embedded configuration of the panel
+ *
+ * @ts: our goodix_ts_data pointer
+ *
+ * Must be called during probe
+ */
+static void goodix_read_config(struct goodix_ts_data *ts)
+{
+	u8 config[GOODIX_CONFIG_MAX_LENGTH];
+	int error;
+
+	error = goodix_i2c_read(ts->client, GOODIX_REG_CONFIG_DATA,
+			      config,
+			   GOODIX_CONFIG_MAX_LENGTH);
+	if (error) {
+		dev_warn(&ts->client->dev,
+			 "Error reading config (%d), using defaults\n",
+			 error);
+		ts->abs_x_max = GOODIX_MAX_WIDTH;
+		ts->abs_y_max = GOODIX_MAX_HEIGHT;
+		ts->int_trigger_type = GOODIX_INT_TRIGGER;
+		return;
+	}
+
+	ts->abs_x_max = get_unaligned_le16(&config[RESOLUTION_LOC]);
+	ts->abs_y_max = get_unaligned_le16(&config[RESOLUTION_LOC + 2]);
+	ts->int_trigger_type = (config[TRIGGER_LOC]) & 0x03;
+	if (!ts->abs_x_max || !ts->abs_y_max) {
+		dev_err(&ts->client->dev,
+			"Invalid config, using defaults\n");
+		ts->abs_x_max = GOODIX_MAX_WIDTH;
+		ts->abs_y_max = GOODIX_MAX_HEIGHT;
+	}
+}
+
+
+/**
+ * goodix_read_version - Read goodix touchscreen version
+ *
+ * @client: the i2c client
+ * @version: output buffer containing the version on success
+ */
+static int goodix_read_version(struct i2c_client *client, u16 *version)
+{
+	int error;
+	u8 buf[6];
+
+	error = goodix_i2c_read(client, GOODIX_REG_VERSION, buf, sizeof(buf));
+	if (error) {
+		dev_err(&client->dev, "read version failed: %d\n", error);
+		return error;
+	}
+
+	if (version)
+		*version = get_unaligned_le16(&buf[4]);
+
+	dev_info(&client->dev, "IC VERSION: %6ph\n", buf);
+
+	return 0;
+}
+
+/**
+ * goodix_i2c_test - I2C test function to check if the device answers.
+ *
+ * @client: the i2c client
+ */
+static int goodix_i2c_test(struct i2c_client *client)
+{
+	int retry = 0;
+	int error;
+	u8 test;
+
+	while (retry++ < 2) {
+		error = goodix_i2c_read(client, GOODIX_REG_CONFIG_DATA,
+					&test, 1);
+		if (!error)
+			return 0;
+
+		dev_err(&client->dev, "i2c test failed attempt %d: %d\n",
+			retry, error);
+		msleep(20);
+	}
+
+	return error;
+}
+
+/**
+ * goodix_request_input_dev - Allocate, populate and register the input device
+ *
+ * @ts: our goodix_ts_data pointer
+ *
+ * Must be called during probe
+ */
+static int goodix_request_input_dev(struct goodix_ts_data *ts)
+{
+	int error;
+
+	ts->input_dev = devm_input_allocate_device(&ts->client->dev);
+	if (!ts->input_dev) {
+		dev_err(&ts->client->dev, "Failed to allocate input device.");
+		return -ENOMEM;
+	}
+
+	ts->input_dev->evbit[0] = BIT_MASK(EV_SYN) |
+				  BIT_MASK(EV_KEY) |
+				  BIT_MASK(EV_ABS);
+
+	input_set_abs_params(ts->input_dev, ABS_MT_POSITION_X, 0,
+				ts->abs_x_max, 0, 0);
+	input_set_abs_params(ts->input_dev, ABS_MT_POSITION_Y, 0,
+				ts->abs_y_max, 0, 0);
+	input_set_abs_params(ts->input_dev, ABS_MT_WIDTH_MAJOR, 0, 255, 0, 0);
+	input_set_abs_params(ts->input_dev, ABS_MT_TOUCH_MAJOR, 0, 255, 0, 0);
+
+	input_mt_init_slots(ts->input_dev, GOODIX_MAX_CONTACTS,
+			    INPUT_MT_DIRECT | INPUT_MT_DROP_UNUSED);
+
+	ts->input_dev->name = "Goodix Capacitive TouchScreen";
+	ts->input_dev->phys = "input/ts";
+	ts->input_dev->id.bustype = BUS_I2C;
+	ts->input_dev->id.vendor = 0x0416;
+	ts->input_dev->id.product = 0x1001;
+	ts->input_dev->id.version = 10427;
+
+	error = input_register_device(ts->input_dev);
+	if (error) {
+		dev_err(&ts->client->dev,
+			"Failed to register input device: %d", error);
+		return error;
+	}
+
+	return 0;
+}
+
+static int goodix_ts_probe(struct i2c_client *client,
+			   const struct i2c_device_id *id)
+{
+	struct goodix_ts_data *ts;
+	unsigned long irq_flags;
+	int error;
+	u16 version_info;
+
+	dev_dbg(&client->dev, "I2C Address: 0x%02x\n", client->addr);
+
+	if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) {
+		dev_err(&client->dev, "I2C check functionality failed.\n");
+		return -ENXIO;
+	}
+
+	ts = devm_kzalloc(&client->dev, sizeof(*ts), GFP_KERNEL);
+	if (!ts)
+		return -ENOMEM;
+
+	ts->client = client;
+	i2c_set_clientdata(client, ts);
+
+	error = goodix_i2c_test(client);
+	if (error) {
+		dev_err(&client->dev, "I2C communication failure: %d\n", error);
+		return error;
+	}
+
+	error = goodix_read_version(client, &version_info);
+	if (error) {
+		dev_err(&client->dev, "Read version failed.\n");
+		return error;
+	}
+
+	goodix_read_config(ts);
+
+	error = goodix_request_input_dev(ts);
+	if (error)
+		return error;
+
+	irq_flags = goodix_irq_flags[ts->int_trigger_type] | IRQF_ONESHOT;
+	error = devm_request_threaded_irq(&ts->client->dev, client->irq,
+					  NULL, goodix_ts_irq_handler,
+					  irq_flags, client->name, ts);
+	if (error) {
+		dev_err(&client->dev, "request IRQ failed: %d\n", error);
+		return error;
+	}
+
+	return 0;
+}
+
+static const struct i2c_device_id goodix_ts_id[] = {
+	{ "GDIX1001:00", 0 },
+	{ }
+};
+
+static const struct acpi_device_id goodix_acpi_match[] = {
+	{ "GDIX1001", 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(acpi, goodix_acpi_match);
+
+static struct i2c_driver goodix_ts_driver = {
+	.probe = goodix_ts_probe,
+	.id_table = goodix_ts_id,
+	.driver = {
+		.name = "Goodix-TS",
+		.owner = THIS_MODULE,
+		.acpi_match_table = goodix_acpi_match,
+	},
+};
+module_i2c_driver(goodix_ts_driver);
+
+MODULE_AUTHOR("Benjamin Tissoires <benjamin.tissoires@gmail.com>");
+MODULE_AUTHOR("Bastien Nocera <hadess@hadess.net>");
+MODULE_DESCRIPTION("Goodix touchscreen driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/input/touchscreen/ili210x.c b/drivers/input/touchscreen/ili210x.c
index 2a508913981..da6dc819c84 100644
--- a/drivers/input/touchscreen/ili210x.c
+++ b/drivers/input/touchscreen/ili210x.c
@@ -311,8 +311,7 @@ static int ili210x_i2c_remove(struct i2c_client *client)
 	return 0;
 }
 
-#ifdef CONFIG_PM_SLEEP
-static int ili210x_i2c_suspend(struct device *dev)
+static int __maybe_unused ili210x_i2c_suspend(struct device *dev)
 {
 	struct i2c_client *client = to_i2c_client(dev);
 
@@ -322,7 +321,7 @@ static int ili210x_i2c_suspend(struct device *dev)
 	return 0;
 }
 
-static int ili210x_i2c_resume(struct device *dev)
+static int __maybe_unused ili210x_i2c_resume(struct device *dev)
 {
 	struct i2c_client *client = to_i2c_client(dev);
 
@@ -331,7 +330,6 @@ static int ili210x_i2c_resume(struct device *dev)
 
 	return 0;
 }
-#endif
 
 static SIMPLE_DEV_PM_OPS(ili210x_i2c_pm,
 			 ili210x_i2c_suspend, ili210x_i2c_resume);
diff --git a/drivers/input/touchscreen/ipaq-micro-ts.c b/drivers/input/touchscreen/ipaq-micro-ts.c
index 62c8976e616..33c134820ef 100644
--- a/drivers/input/touchscreen/ipaq-micro-ts.c
+++ b/drivers/input/touchscreen/ipaq-micro-ts.c
@@ -122,8 +122,7 @@ static int micro_ts_probe(struct platform_device *pdev)
 	return 0;
 }
 
-#ifdef CONFIG_PM_SLEEP
-static int micro_ts_suspend(struct device *dev)
+static int __maybe_unused micro_ts_suspend(struct device *dev)
 {
 	struct touchscreen_data *ts = dev_get_drvdata(dev);
 
@@ -132,7 +131,7 @@ static int micro_ts_suspend(struct device *dev)
 	return 0;
 }
 
-static int micro_ts_resume(struct device *dev)
+static int __maybe_unused micro_ts_resume(struct device *dev)
 {
 	struct touchscreen_data *ts = dev_get_drvdata(dev);
 	struct input_dev *input = ts->input;
@@ -146,7 +145,6 @@ static int micro_ts_resume(struct device *dev)
 
 	return 0;
 }
-#endif
 
 static const struct dev_pm_ops micro_ts_dev_pm_ops = {
 	SET_SYSTEM_SLEEP_PM_OPS(micro_ts_suspend, micro_ts_resume)
diff --git a/drivers/input/touchscreen/mms114.c b/drivers/input/touchscreen/mms114.c
index 372bbf7658f..67c0d31613d 100644
--- a/drivers/input/touchscreen/mms114.c
+++ b/drivers/input/touchscreen/mms114.c
@@ -515,8 +515,7 @@ static int mms114_probe(struct i2c_client *client,
 	return 0;
 }
 
-#ifdef CONFIG_PM_SLEEP
-static int mms114_suspend(struct device *dev)
+static int __maybe_unused mms114_suspend(struct device *dev)
 {
 	struct i2c_client *client = to_i2c_client(dev);
 	struct mms114_data *data = i2c_get_clientdata(client);
@@ -540,7 +539,7 @@ static int mms114_suspend(struct device *dev)
 	return 0;
 }
 
-static int mms114_resume(struct device *dev)
+static int __maybe_unused mms114_resume(struct device *dev)
 {
 	struct i2c_client *client = to_i2c_client(dev);
 	struct mms114_data *data = i2c_get_clientdata(client);
@@ -559,7 +558,6 @@ static int mms114_resume(struct device *dev)
 
 	return 0;
 }
-#endif
 
 static SIMPLE_DEV_PM_OPS(mms114_pm_ops, mms114_suspend, mms114_resume);
 
diff --git a/drivers/input/touchscreen/pixcir_i2c_ts.c b/drivers/input/touchscreen/pixcir_i2c_ts.c
index fc49c75317d..4fb5537fdd4 100644
--- a/drivers/input/touchscreen/pixcir_i2c_ts.c
+++ b/drivers/input/touchscreen/pixcir_i2c_ts.c
@@ -347,8 +347,7 @@ static void pixcir_input_close(struct input_dev *dev)
 	pixcir_stop(ts);
 }
 
-#ifdef CONFIG_PM_SLEEP
-static int pixcir_i2c_ts_suspend(struct device *dev)
+static int __maybe_unused pixcir_i2c_ts_suspend(struct device *dev)
 {
 	struct i2c_client *client = to_i2c_client(dev);
 	struct pixcir_i2c_ts_data *ts = i2c_get_clientdata(client);
@@ -377,7 +376,7 @@ unlock:
 	return ret;
 }
 
-static int pixcir_i2c_ts_resume(struct device *dev)
+static int __maybe_unused pixcir_i2c_ts_resume(struct device *dev)
 {
 	struct i2c_client *client = to_i2c_client(dev);
 	struct pixcir_i2c_ts_data *ts = i2c_get_clientdata(client);
@@ -405,7 +404,6 @@ unlock:
 
 	return ret;
 }
-#endif
 
 static SIMPLE_DEV_PM_OPS(pixcir_dev_pm_ops,
 			 pixcir_i2c_ts_suspend, pixcir_i2c_ts_resume);
diff --git a/drivers/input/touchscreen/st1232.c b/drivers/input/touchscreen/st1232.c
index 3c0f57efe7b..697e26e52d5 100644
--- a/drivers/input/touchscreen/st1232.c
+++ b/drivers/input/touchscreen/st1232.c
@@ -243,8 +243,7 @@ static int st1232_ts_remove(struct i2c_client *client)
 	return 0;
 }
 
-#ifdef CONFIG_PM_SLEEP
-static int st1232_ts_suspend(struct device *dev)
+static int __maybe_unused st1232_ts_suspend(struct device *dev)
 {
 	struct i2c_client *client = to_i2c_client(dev);
 	struct st1232_ts_data *ts = i2c_get_clientdata(client);
@@ -259,7 +258,7 @@ static int st1232_ts_suspend(struct device *dev)
 	return 0;
 }
 
-static int st1232_ts_resume(struct device *dev)
+static int __maybe_unused st1232_ts_resume(struct device *dev)
 {
 	struct i2c_client *client = to_i2c_client(dev);
 	struct st1232_ts_data *ts = i2c_get_clientdata(client);
@@ -274,8 +273,6 @@ static int st1232_ts_resume(struct device *dev)
 	return 0;
 }
 
-#endif
-
 static SIMPLE_DEV_PM_OPS(st1232_ts_pm_ops,
 			 st1232_ts_suspend, st1232_ts_resume);
 
diff --git a/drivers/input/touchscreen/tsc2005.c b/drivers/input/touchscreen/tsc2005.c
index 52380b68ebd..72657c57943 100644
--- a/drivers/input/touchscreen/tsc2005.c
+++ b/drivers/input/touchscreen/tsc2005.c
@@ -773,8 +773,7 @@ static int tsc2005_remove(struct spi_device *spi)
 	return 0;
 }
 
-#ifdef CONFIG_PM_SLEEP
-static int tsc2005_suspend(struct device *dev)
+static int __maybe_unused tsc2005_suspend(struct device *dev)
 {
 	struct spi_device *spi = to_spi_device(dev);
 	struct tsc2005 *ts = spi_get_drvdata(spi);
@@ -791,7 +790,7 @@ static int tsc2005_suspend(struct device *dev)
 	return 0;
 }
 
-static int tsc2005_resume(struct device *dev)
+static int __maybe_unused tsc2005_resume(struct device *dev)
 {
 	struct spi_device *spi = to_spi_device(dev);
 	struct tsc2005 *ts = spi_get_drvdata(spi);
@@ -807,7 +806,6 @@ static int tsc2005_resume(struct device *dev)
 
 	return 0;
 }
-#endif
 
 static SIMPLE_DEV_PM_OPS(tsc2005_pm_ops, tsc2005_suspend, tsc2005_resume);
 
diff --git a/drivers/input/touchscreen/ucb1400_ts.c b/drivers/input/touchscreen/ucb1400_ts.c
index 0eca00da584..c1e23cfc615 100644
--- a/drivers/input/touchscreen/ucb1400_ts.c
+++ b/drivers/input/touchscreen/ucb1400_ts.c
@@ -406,8 +406,7 @@ static int ucb1400_ts_remove(struct platform_device *pdev)
 	return 0;
 }
 
-#ifdef CONFIG_PM_SLEEP
-static int ucb1400_ts_suspend(struct device *dev)
+static int __maybe_unused ucb1400_ts_suspend(struct device *dev)
 {
 	struct ucb1400_ts *ucb = dev_get_platdata(dev);
 	struct input_dev *idev = ucb->ts_idev;
@@ -421,7 +420,7 @@ static int ucb1400_ts_suspend(struct device *dev)
 	return 0;
 }
 
-static int ucb1400_ts_resume(struct device *dev)
+static int __maybe_unused ucb1400_ts_resume(struct device *dev)
 {
 	struct ucb1400_ts *ucb = dev_get_platdata(dev);
 	struct input_dev *idev = ucb->ts_idev;
@@ -434,7 +433,6 @@ static int ucb1400_ts_resume(struct device *dev)
 	mutex_unlock(&idev->mutex);
 	return 0;
 }
-#endif
 
 static SIMPLE_DEV_PM_OPS(ucb1400_ts_pm_ops,
 			 ucb1400_ts_suspend, ucb1400_ts_resume);
diff --git a/drivers/input/touchscreen/wacom_i2c.c b/drivers/input/touchscreen/wacom_i2c.c
index 7ccaa1b12b0..32f8ac00393 100644
--- a/drivers/input/touchscreen/wacom_i2c.c
+++ b/drivers/input/touchscreen/wacom_i2c.c
@@ -242,8 +242,7 @@ static int wacom_i2c_remove(struct i2c_client *client)
 	return 0;
 }
 
-#ifdef CONFIG_PM_SLEEP
-static int wacom_i2c_suspend(struct device *dev)
+static int __maybe_unused wacom_i2c_suspend(struct device *dev)
 {
 	struct i2c_client *client = to_i2c_client(dev);
 
@@ -252,7 +251,7 @@ static int wacom_i2c_suspend(struct device *dev)
 	return 0;
 }
 
-static int wacom_i2c_resume(struct device *dev)
+static int __maybe_unused wacom_i2c_resume(struct device *dev)
 {
 	struct i2c_client *client = to_i2c_client(dev);
 
@@ -260,7 +259,6 @@ static int wacom_i2c_resume(struct device *dev)
 
 	return 0;
 }
-#endif
 
 static SIMPLE_DEV_PM_OPS(wacom_i2c_pm, wacom_i2c_suspend, wacom_i2c_resume);
 
diff --git a/drivers/input/touchscreen/zforce_ts.c b/drivers/input/touchscreen/zforce_ts.c
index 8ba48f5eff7..19880c7385e 100644
--- a/drivers/input/touchscreen/zforce_ts.c
+++ b/drivers/input/touchscreen/zforce_ts.c
@@ -602,8 +602,7 @@ static void zforce_input_close(struct input_dev *dev)
 	return;
 }
 
-#ifdef CONFIG_PM_SLEEP
-static int zforce_suspend(struct device *dev)
+static int __maybe_unused zforce_suspend(struct device *dev)
 {
 	struct i2c_client *client = to_i2c_client(dev);
 	struct zforce_ts *ts = i2c_get_clientdata(client);
@@ -648,7 +647,7 @@ unlock:
 	return ret;
 }
 
-static int zforce_resume(struct device *dev)
+static int __maybe_unused zforce_resume(struct device *dev)
 {
 	struct i2c_client *client = to_i2c_client(dev);
 	struct zforce_ts *ts = i2c_get_clientdata(client);
@@ -685,7 +684,6 @@ unlock:
 
 	return ret;
 }
-#endif
 
 static SIMPLE_DEV_PM_OPS(zforce_pm_ops, zforce_suspend, zforce_resume);
 
diff --git a/drivers/memory/fsl_ifc.c b/drivers/memory/fsl_ifc.c
index 3d5d792d5cb..410c3974987 100644
--- a/drivers/memory/fsl_ifc.c
+++ b/drivers/memory/fsl_ifc.c
@@ -61,7 +61,7 @@ int fsl_ifc_find(phys_addr_t addr_base)
 	if (!fsl_ifc_ctrl_dev || !fsl_ifc_ctrl_dev->regs)
 		return -ENODEV;
 
-	for (i = 0; i < ARRAY_SIZE(fsl_ifc_ctrl_dev->regs->cspr_cs); i++) {
+	for (i = 0; i < fsl_ifc_ctrl_dev->banks; i++) {
 		u32 cspr = in_be32(&fsl_ifc_ctrl_dev->regs->cspr_cs[i].cspr);
 		if (cspr & CSPR_V && (cspr & CSPR_BA) ==
 				convert_ifc_address(addr_base))
@@ -213,7 +213,7 @@ static irqreturn_t fsl_ifc_ctrl_irq(int irqno, void *data)
 static int fsl_ifc_ctrl_probe(struct platform_device *dev)
 {
 	int ret = 0;
-
+	int version, banks;
 
 	dev_info(&dev->dev, "Freescale Integrated Flash Controller\n");
 
@@ -231,6 +231,15 @@ static int fsl_ifc_ctrl_probe(struct platform_device *dev)
 		goto err;
 	}
 
+	version = ioread32be(&fsl_ifc_ctrl_dev->regs->ifc_rev) &
+			FSL_IFC_VERSION_MASK;
+	banks = (version == FSL_IFC_VERSION_1_0_0) ? 4 : 8;
+	dev_info(&dev->dev, "IFC version %d.%d, %d banks\n",
+		version >> 24, (version >> 16) & 0xf, banks);
+
+	fsl_ifc_ctrl_dev->version = version;
+	fsl_ifc_ctrl_dev->banks = banks;
+
 	/* get the Controller level irq */
 	fsl_ifc_ctrl_dev->irq = irq_of_parse_and_map(dev->dev.of_node, 0);
 	if (fsl_ifc_ctrl_dev->irq == NO_IRQ) {
diff --git a/drivers/mtd/Kconfig b/drivers/mtd/Kconfig
index 94b821042d9..71fea895ce3 100644
--- a/drivers/mtd/Kconfig
+++ b/drivers/mtd/Kconfig
@@ -133,7 +133,7 @@ config MTD_OF_PARTS
 	help
 	  This provides a partition parsing function which derives
 	  the partition map from the children of the flash node,
-	  as described in Documentation/devicetree/booting-without-of.txt.
+	  as described in Documentation/devicetree/bindings/mtd/partition.txt.
 
 config MTD_AR7_PARTS
 	tristate "TI AR7 partitioning support"
diff --git a/drivers/mtd/bcm47xxpart.c b/drivers/mtd/bcm47xxpart.c
index 8057f52a45b..cc13ea5ce4d 100644
--- a/drivers/mtd/bcm47xxpart.c
+++ b/drivers/mtd/bcm47xxpart.c
@@ -15,8 +15,12 @@
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/partitions.h>
 
-/* 10 parts were found on sflash on Netgear WNDR4500 */
-#define BCM47XXPART_MAX_PARTS		12
+/*
+ * NAND flash on Netgear R6250 was verified to contain 15 partitions.
+ * This will result in allocating too big array for some old devices, but the
+ * memory will be freed soon anyway (see mtd_device_parse_register).
+ */
+#define BCM47XXPART_MAX_PARTS		20
 
 /*
  * Amount of bytes we read when analyzing each block of flash memory.
@@ -168,18 +172,26 @@ static int bcm47xxpart_parse(struct mtd_info *master,
 				i++;
 			}
 
-			bcm47xxpart_add_part(&parts[curr_part++], "linux",
-					     offset + trx->offset[i], 0);
-			i++;
+			if (trx->offset[i]) {
+				bcm47xxpart_add_part(&parts[curr_part++],
+						     "linux",
+						     offset + trx->offset[i],
+						     0);
+				i++;
+			}
 
 			/*
 			 * Pure rootfs size is known and can be calculated as:
 			 * trx->length - trx->offset[i]. We don't fill it as
 			 * we want to have jffs2 (overlay) in the same mtd.
 			 */
-			bcm47xxpart_add_part(&parts[curr_part++], "rootfs",
-					     offset + trx->offset[i], 0);
-			i++;
+			if (trx->offset[i]) {
+				bcm47xxpart_add_part(&parts[curr_part++],
+						     "rootfs",
+						     offset + trx->offset[i],
+						     0);
+				i++;
+			}
 
 			last_trx_part = curr_part - 1;
 
diff --git a/drivers/mtd/chips/cfi_cmdset_0001.c b/drivers/mtd/chips/cfi_cmdset_0001.c
index 3096f3ded3a..286b97a304c 100644
--- a/drivers/mtd/chips/cfi_cmdset_0001.c
+++ b/drivers/mtd/chips/cfi_cmdset_0001.c
@@ -2654,8 +2654,7 @@ static void cfi_intelext_destroy(struct mtd_info *mtd)
 	kfree(cfi);
 	for (i = 0; i < mtd->numeraseregions; i++) {
 		region = &mtd->eraseregions[i];
-		if (region->lockmap)
-			kfree(region->lockmap);
+		kfree(region->lockmap);
 	}
 	kfree(mtd->eraseregions);
 }
diff --git a/drivers/mtd/devices/docg3.c b/drivers/mtd/devices/docg3.c
index 72346048532..448ce42f951 100644
--- a/drivers/mtd/devices/docg3.c
+++ b/drivers/mtd/devices/docg3.c
@@ -22,6 +22,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/errno.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/string.h>
 #include <linux/slab.h>
@@ -1655,22 +1656,21 @@ static int dbg_flashctrl_show(struct seq_file *s, void *p)
 {
 	struct docg3 *docg3 = (struct docg3 *)s->private;
 
-	int pos = 0;
 	u8 fctrl;
 
 	mutex_lock(&docg3->cascade->lock);
 	fctrl = doc_register_readb(docg3, DOC_FLASHCONTROL);
 	mutex_unlock(&docg3->cascade->lock);
 
-	pos += seq_printf(s,
-		 "FlashControl : 0x%02x (%s,CE# %s,%s,%s,flash %s)\n",
-		 fctrl,
-		 fctrl & DOC_CTRL_VIOLATION ? "protocol violation" : "-",
-		 fctrl & DOC_CTRL_CE ? "active" : "inactive",
-		 fctrl & DOC_CTRL_PROTECTION_ERROR ? "protection error" : "-",
-		 fctrl & DOC_CTRL_SEQUENCE_ERROR ? "sequence error" : "-",
-		 fctrl & DOC_CTRL_FLASHREADY ? "ready" : "not ready");
-	return pos;
+	seq_printf(s, "FlashControl : 0x%02x (%s,CE# %s,%s,%s,flash %s)\n",
+		   fctrl,
+		   fctrl & DOC_CTRL_VIOLATION ? "protocol violation" : "-",
+		   fctrl & DOC_CTRL_CE ? "active" : "inactive",
+		   fctrl & DOC_CTRL_PROTECTION_ERROR ? "protection error" : "-",
+		   fctrl & DOC_CTRL_SEQUENCE_ERROR ? "sequence error" : "-",
+		   fctrl & DOC_CTRL_FLASHREADY ? "ready" : "not ready");
+
+	return 0;
 }
 DEBUGFS_RO_ATTR(flashcontrol, dbg_flashctrl_show);
 
@@ -1678,58 +1678,56 @@ static int dbg_asicmode_show(struct seq_file *s, void *p)
 {
 	struct docg3 *docg3 = (struct docg3 *)s->private;
 
-	int pos = 0, pctrl, mode;
+	int pctrl, mode;
 
 	mutex_lock(&docg3->cascade->lock);
 	pctrl = doc_register_readb(docg3, DOC_ASICMODE);
 	mode = pctrl & 0x03;
 	mutex_unlock(&docg3->cascade->lock);
 
-	pos += seq_printf(s,
-			 "%04x : RAM_WE=%d,RSTIN_RESET=%d,BDETCT_RESET=%d,WRITE_ENABLE=%d,POWERDOWN=%d,MODE=%d%d (",
-			 pctrl,
-			 pctrl & DOC_ASICMODE_RAM_WE ? 1 : 0,
-			 pctrl & DOC_ASICMODE_RSTIN_RESET ? 1 : 0,
-			 pctrl & DOC_ASICMODE_BDETCT_RESET ? 1 : 0,
-			 pctrl & DOC_ASICMODE_MDWREN ? 1 : 0,
-			 pctrl & DOC_ASICMODE_POWERDOWN ? 1 : 0,
-			 mode >> 1, mode & 0x1);
+	seq_printf(s,
+		   "%04x : RAM_WE=%d,RSTIN_RESET=%d,BDETCT_RESET=%d,WRITE_ENABLE=%d,POWERDOWN=%d,MODE=%d%d (",
+		   pctrl,
+		   pctrl & DOC_ASICMODE_RAM_WE ? 1 : 0,
+		   pctrl & DOC_ASICMODE_RSTIN_RESET ? 1 : 0,
+		   pctrl & DOC_ASICMODE_BDETCT_RESET ? 1 : 0,
+		   pctrl & DOC_ASICMODE_MDWREN ? 1 : 0,
+		   pctrl & DOC_ASICMODE_POWERDOWN ? 1 : 0,
+		   mode >> 1, mode & 0x1);
 
 	switch (mode) {
 	case DOC_ASICMODE_RESET:
-		pos += seq_puts(s, "reset");
+		seq_puts(s, "reset");
 		break;
 	case DOC_ASICMODE_NORMAL:
-		pos += seq_puts(s, "normal");
+		seq_puts(s, "normal");
 		break;
 	case DOC_ASICMODE_POWERDOWN:
-		pos += seq_puts(s, "powerdown");
+		seq_puts(s, "powerdown");
 		break;
 	}
-	pos += seq_puts(s, ")\n");
-	return pos;
+	seq_puts(s, ")\n");
+	return 0;
 }
 DEBUGFS_RO_ATTR(asic_mode, dbg_asicmode_show);
 
 static int dbg_device_id_show(struct seq_file *s, void *p)
 {
 	struct docg3 *docg3 = (struct docg3 *)s->private;
-	int pos = 0;
 	int id;
 
 	mutex_lock(&docg3->cascade->lock);
 	id = doc_register_readb(docg3, DOC_DEVICESELECT);
 	mutex_unlock(&docg3->cascade->lock);
 
-	pos += seq_printf(s, "DeviceId = %d\n", id);
-	return pos;
+	seq_printf(s, "DeviceId = %d\n", id);
+	return 0;
 }
 DEBUGFS_RO_ATTR(device_id, dbg_device_id_show);
 
 static int dbg_protection_show(struct seq_file *s, void *p)
 {
 	struct docg3 *docg3 = (struct docg3 *)s->private;
-	int pos = 0;
 	int protect, dps0, dps0_low, dps0_high, dps1, dps1_low, dps1_high;
 
 	mutex_lock(&docg3->cascade->lock);
@@ -1742,45 +1740,40 @@ static int dbg_protection_show(struct seq_file *s, void *p)
 	dps1_high = doc_register_readw(docg3, DOC_DPS1_ADDRHIGH);
 	mutex_unlock(&docg3->cascade->lock);
 
-	pos += seq_printf(s, "Protection = 0x%02x (",
-			 protect);
+	seq_printf(s, "Protection = 0x%02x (", protect);
 	if (protect & DOC_PROTECT_FOUNDRY_OTP_LOCK)
-		pos += seq_puts(s, "FOUNDRY_OTP_LOCK,");
+		seq_puts(s, "FOUNDRY_OTP_LOCK,");
 	if (protect & DOC_PROTECT_CUSTOMER_OTP_LOCK)
-		pos += seq_puts(s, "CUSTOMER_OTP_LOCK,");
+		seq_puts(s, "CUSTOMER_OTP_LOCK,");
 	if (protect & DOC_PROTECT_LOCK_INPUT)
-		pos += seq_puts(s, "LOCK_INPUT,");
+		seq_puts(s, "LOCK_INPUT,");
 	if (protect & DOC_PROTECT_STICKY_LOCK)
-		pos += seq_puts(s, "STICKY_LOCK,");
+		seq_puts(s, "STICKY_LOCK,");
 	if (protect & DOC_PROTECT_PROTECTION_ENABLED)
-		pos += seq_puts(s, "PROTECTION ON,");
+		seq_puts(s, "PROTECTION ON,");
 	if (protect & DOC_PROTECT_IPL_DOWNLOAD_LOCK)
-		pos += seq_puts(s, "IPL_DOWNLOAD_LOCK,");
+		seq_puts(s, "IPL_DOWNLOAD_LOCK,");
 	if (protect & DOC_PROTECT_PROTECTION_ERROR)
-		pos += seq_puts(s, "PROTECT_ERR,");
+		seq_puts(s, "PROTECT_ERR,");
 	else
-		pos += seq_puts(s, "NO_PROTECT_ERR");
-	pos += seq_puts(s, ")\n");
-
-	pos += seq_printf(s, "DPS0 = 0x%02x : "
-			 "Protected area [0x%x - 0x%x] : OTP=%d, READ=%d, "
-			 "WRITE=%d, HW_LOCK=%d, KEY_OK=%d\n",
-			 dps0, dps0_low, dps0_high,
-			 !!(dps0 & DOC_DPS_OTP_PROTECTED),
-			 !!(dps0 & DOC_DPS_READ_PROTECTED),
-			 !!(dps0 & DOC_DPS_WRITE_PROTECTED),
-			 !!(dps0 & DOC_DPS_HW_LOCK_ENABLED),
-			 !!(dps0 & DOC_DPS_KEY_OK));
-	pos += seq_printf(s, "DPS1 = 0x%02x : "
-			 "Protected area [0x%x - 0x%x] : OTP=%d, READ=%d, "
-			 "WRITE=%d, HW_LOCK=%d, KEY_OK=%d\n",
-			 dps1, dps1_low, dps1_high,
-			 !!(dps1 & DOC_DPS_OTP_PROTECTED),
-			 !!(dps1 & DOC_DPS_READ_PROTECTED),
-			 !!(dps1 & DOC_DPS_WRITE_PROTECTED),
-			 !!(dps1 & DOC_DPS_HW_LOCK_ENABLED),
-			 !!(dps1 & DOC_DPS_KEY_OK));
-	return pos;
+		seq_puts(s, "NO_PROTECT_ERR");
+	seq_puts(s, ")\n");
+
+	seq_printf(s, "DPS0 = 0x%02x : Protected area [0x%x - 0x%x] : OTP=%d, READ=%d, WRITE=%d, HW_LOCK=%d, KEY_OK=%d\n",
+		   dps0, dps0_low, dps0_high,
+		   !!(dps0 & DOC_DPS_OTP_PROTECTED),
+		   !!(dps0 & DOC_DPS_READ_PROTECTED),
+		   !!(dps0 & DOC_DPS_WRITE_PROTECTED),
+		   !!(dps0 & DOC_DPS_HW_LOCK_ENABLED),
+		   !!(dps0 & DOC_DPS_KEY_OK));
+	seq_printf(s, "DPS1 = 0x%02x : Protected area [0x%x - 0x%x] : OTP=%d, READ=%d, WRITE=%d, HW_LOCK=%d, KEY_OK=%d\n",
+		   dps1, dps1_low, dps1_high,
+		   !!(dps1 & DOC_DPS_OTP_PROTECTED),
+		   !!(dps1 & DOC_DPS_READ_PROTECTED),
+		   !!(dps1 & DOC_DPS_WRITE_PROTECTED),
+		   !!(dps1 & DOC_DPS_HW_LOCK_ENABLED),
+		   !!(dps1 & DOC_DPS_KEY_OK));
+	return 0;
 }
 DEBUGFS_RO_ATTR(protection, dbg_protection_show);
 
@@ -2126,9 +2119,18 @@ static int __exit docg3_release(struct platform_device *pdev)
 	return 0;
 }
 
+#ifdef CONFIG_OF
+static struct of_device_id docg3_dt_ids[] = {
+	{ .compatible = "m-systems,diskonchip-g3" },
+	{}
+};
+MODULE_DEVICE_TABLE(of, docg3_dt_ids);
+#endif
+
 static struct platform_driver g3_driver = {
 	.driver		= {
 		.name	= "docg3",
+		.of_match_table = of_match_ptr(docg3_dt_ids),
 	},
 	.suspend	= docg3_suspend,
 	.resume		= docg3_resume,
diff --git a/drivers/mtd/devices/m25p80.c b/drivers/mtd/devices/m25p80.c
index ed827cf894e..85e35467fba 100644
--- a/drivers/mtd/devices/m25p80.c
+++ b/drivers/mtd/devices/m25p80.c
@@ -128,13 +128,10 @@ static int m25p80_read(struct spi_nor *nor, loff_t from, size_t len,
 	struct spi_device *spi = flash->spi;
 	struct spi_transfer t[2];
 	struct spi_message m;
-	int dummy = nor->read_dummy;
-	int ret;
+	unsigned int dummy = nor->read_dummy;
 
-	/* Wait till previous write/erase is done. */
-	ret = nor->wait_till_ready(nor);
-	if (ret)
-		return ret;
+	/* convert the dummy cycles to the number of bytes */
+	dummy /= 8;
 
 	spi_message_init(&m);
 	memset(t, 0, (sizeof t));
@@ -160,21 +157,10 @@ static int m25p80_read(struct spi_nor *nor, loff_t from, size_t len,
 static int m25p80_erase(struct spi_nor *nor, loff_t offset)
 {
 	struct m25p *flash = nor->priv;
-	int ret;
 
 	dev_dbg(nor->dev, "%dKiB at 0x%08x\n",
 		flash->mtd.erasesize / 1024, (u32)offset);
 
-	/* Wait until finished previous write command. */
-	ret = nor->wait_till_ready(nor);
-	if (ret)
-		return ret;
-
-	/* Send write enable, then erase commands. */
-	ret = nor->write_reg(nor, SPINOR_OP_WREN, NULL, 0, 0);
-	if (ret)
-		return ret;
-
 	/* Set up command buffer. */
 	flash->command[0] = nor->erase_opcode;
 	m25p_addr2cmd(nor, offset, flash->command);
@@ -260,7 +246,6 @@ static int m25p_remove(struct spi_device *spi)
 	return mtd_device_unregister(&flash->mtd);
 }
 
-
 /*
  * XXX This needs to be kept in sync with spi_nor_ids.  We can't share
  * it with spi-nor, because if this is built as a module then modpost
@@ -287,7 +272,7 @@ static const struct spi_device_id m25p_ids[] = {
 	{"s25fl512s"},	{"s70fl01gs"},	{"s25sl12800"},	{"s25sl12801"},
 	{"s25fl129p0"},	{"s25fl129p1"},	{"s25sl004a"},	{"s25sl008a"},
 	{"s25sl016a"},	{"s25sl032a"},	{"s25sl064a"},	{"s25fl008k"},
-	{"s25fl016k"},	{"s25fl064k"},
+	{"s25fl016k"},	{"s25fl064k"},	{"s25fl132k"},
 	{"sst25vf040b"},{"sst25vf080b"},{"sst25vf016b"},{"sst25vf032b"},
 	{"sst25vf064c"},{"sst25wf512"},	{"sst25wf010"},	{"sst25wf020"},
 	{"sst25wf040"},
@@ -300,17 +285,16 @@ static const struct spi_device_id m25p_ids[] = {
 	{"m45pe10"},	{"m45pe80"},	{"m45pe16"},
 	{"m25pe20"},	{"m25pe80"},	{"m25pe16"},
 	{"m25px16"},	{"m25px32"},	{"m25px32-s0"},	{"m25px32-s1"},
-	{"m25px64"},
+	{"m25px64"},	{"m25px80"},
 	{"w25x10"},	{"w25x20"},	{"w25x40"},	{"w25x80"},
 	{"w25x16"},	{"w25x32"},	{"w25q32"},	{"w25q32dw"},
-	{"w25x64"},	{"w25q64"},	{"w25q128"},	{"w25q80"},
-	{"w25q80bl"},	{"w25q128"},	{"w25q256"},	{"cat25c11"},
+	{"w25x64"},	{"w25q64"},	{"w25q80"},	{"w25q80bl"},
+	{"w25q128"},	{"w25q256"},	{"cat25c11"},
 	{"cat25c03"},	{"cat25c09"},	{"cat25c17"},	{"cat25128"},
 	{ },
 };
 MODULE_DEVICE_TABLE(spi, m25p_ids);
 
-
 static struct spi_driver m25p80_driver = {
 	.driver = {
 		.name	= "m25p80",
diff --git a/drivers/mtd/devices/mtd_dataflash.c b/drivers/mtd/devices/mtd_dataflash.c
index dd22ce2cc9a..0099aba72a8 100644
--- a/drivers/mtd/devices/mtd_dataflash.c
+++ b/drivers/mtd/devices/mtd_dataflash.c
@@ -149,7 +149,7 @@ static int dataflash_erase(struct mtd_info *mtd, struct erase_info *instr)
 {
 	struct dataflash	*priv = mtd->priv;
 	struct spi_device	*spi = priv->spi;
-	struct spi_transfer	x = { .tx_dma = 0, };
+	struct spi_transfer	x = { };
 	struct spi_message	msg;
 	unsigned		blocksize = priv->page_size << 3;
 	uint8_t			*command;
@@ -235,7 +235,7 @@ static int dataflash_read(struct mtd_info *mtd, loff_t from, size_t len,
 			       size_t *retlen, u_char *buf)
 {
 	struct dataflash	*priv = mtd->priv;
-	struct spi_transfer	x[2] = { { .tx_dma = 0, }, };
+	struct spi_transfer	x[2] = { };
 	struct spi_message	msg;
 	unsigned int		addr;
 	uint8_t			*command;
@@ -301,7 +301,7 @@ static int dataflash_write(struct mtd_info *mtd, loff_t to, size_t len,
 {
 	struct dataflash	*priv = mtd->priv;
 	struct spi_device	*spi = priv->spi;
-	struct spi_transfer	x[2] = { { .tx_dma = 0, }, };
+	struct spi_transfer	x[2] = { };
 	struct spi_message	msg;
 	unsigned int		pageaddr, addr, offset, writelen;
 	size_t			remaining = len;
diff --git a/drivers/mtd/devices/phram.c b/drivers/mtd/devices/phram.c
index effd9a4ef7e..8b66e52ca3c 100644
--- a/drivers/mtd/devices/phram.c
+++ b/drivers/mtd/devices/phram.c
@@ -17,7 +17,7 @@
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
-#include <asm/io.h>
+#include <linux/io.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/list.h>
diff --git a/drivers/mtd/devices/pmc551.c b/drivers/mtd/devices/pmc551.c
index f02603e1bfe..708b7e8c8b1 100644
--- a/drivers/mtd/devices/pmc551.c
+++ b/drivers/mtd/devices/pmc551.c
@@ -812,8 +812,7 @@ static int __init init_pmc551(void)
 	}
 
 	/* Exited early, reference left over */
-	if (PCI_Device)
-		pci_dev_put(PCI_Device);
+	pci_dev_put(PCI_Device);
 
 	if (!pmc551list) {
 		printk(KERN_NOTICE "pmc551: not detected\n");
diff --git a/drivers/mtd/inftlmount.c b/drivers/mtd/inftlmount.c
index 487e64f411a..1388c8d7f30 100644
--- a/drivers/mtd/inftlmount.c
+++ b/drivers/mtd/inftlmount.c
@@ -518,7 +518,7 @@ void INFTL_dumpVUchains(struct INFTLrecord *s)
 	pr_debug("INFTL Virtual Unit Chains:\n");
 	for (logical = 0; logical < s->nb_blocks; logical++) {
 		block = s->VUtable[logical];
-		if (block > s->nb_blocks)
+		if (block >= s->nb_blocks)
 			continue;
 		pr_debug("  LOGICAL %d --> %d ", logical, block);
 		for (i = 0; i < s->nb_blocks; i++) {
diff --git a/drivers/mtd/maps/bfin-async-flash.c b/drivers/mtd/maps/bfin-async-flash.c
index 6ea51e54904..41730feeace 100644
--- a/drivers/mtd/maps/bfin-async-flash.c
+++ b/drivers/mtd/maps/bfin-async-flash.c
@@ -126,7 +126,6 @@ static const char * const part_probe_types[] = {
 
 static int bfin_flash_probe(struct platform_device *pdev)
 {
-	int ret;
 	struct physmap_flash_data *pdata = dev_get_platdata(&pdev->dev);
 	struct resource *memory = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	struct resource *flash_ambctl = platform_get_resource(pdev, IORESOURCE_MEM, 1);
diff --git a/drivers/mtd/maps/physmap_of.c b/drivers/mtd/maps/physmap_of.c
index 991d0cb871f..f35cd208131 100644
--- a/drivers/mtd/maps/physmap_of.c
+++ b/drivers/mtd/maps/physmap_of.c
@@ -47,14 +47,12 @@ static int of_flash_remove(struct platform_device *dev)
 		return 0;
 	dev_set_drvdata(&dev->dev, NULL);
 
-	if (info->cmtd != info->list[0].mtd) {
+	if (info->cmtd) {
 		mtd_device_unregister(info->cmtd);
-		mtd_concat_destroy(info->cmtd);
+		if (info->cmtd != info->list[0].mtd)
+			mtd_concat_destroy(info->cmtd);
 	}
 
-	if (info->cmtd)
-		mtd_device_unregister(info->cmtd);
-
 	for (i = 0; i < info->list_size; i++) {
 		if (info->list[i].mtd)
 			map_destroy(info->list[i].mtd);
diff --git a/drivers/mtd/nand/Kconfig b/drivers/mtd/nand/Kconfig
index dd10646982a..7d0150d2043 100644
--- a/drivers/mtd/nand/Kconfig
+++ b/drivers/mtd/nand/Kconfig
@@ -75,10 +75,12 @@ config MTD_NAND_DENALI_SCRATCH_REG_ADDR
           boards, the scratch register is at 0xFF108018.
 
 config MTD_NAND_GPIO
-	tristate "GPIO NAND Flash driver"
+	tristate "GPIO assisted NAND Flash driver"
 	depends on GPIOLIB
 	help
-	  This enables a GPIO based NAND flash driver.
+	  This enables a NAND flash driver where control signals are
+	  connected to GPIO pins, and commands and data are communicated
+	  via a memory mapped interface.
 
 config MTD_NAND_AMS_DELTA
 	tristate "NAND Flash device on Amstrad E3"
@@ -516,4 +518,10 @@ config MTD_NAND_XWAY
 	  Enables support for NAND Flash chips on Lantiq XWAY SoCs. NAND is attached
 	  to the External Bus Unit (EBU).
 
+config MTD_NAND_SUNXI
+	tristate "Support for NAND on Allwinner SoCs"
+	depends on ARCH_SUNXI
+	help
+	  Enables support for NAND Flash chips on Allwinner SoCs.
+
 endif # MTD_NAND
diff --git a/drivers/mtd/nand/Makefile b/drivers/mtd/nand/Makefile
index 9c847e469ca..bd38f21d2e2 100644
--- a/drivers/mtd/nand/Makefile
+++ b/drivers/mtd/nand/Makefile
@@ -50,5 +50,6 @@ obj-$(CONFIG_MTD_NAND_JZ4740)		+= jz4740_nand.o
 obj-$(CONFIG_MTD_NAND_GPMI_NAND)	+= gpmi-nand/
 obj-$(CONFIG_MTD_NAND_XWAY)		+= xway_nand.o
 obj-$(CONFIG_MTD_NAND_BCM47XXNFLASH)	+= bcm47xxnflash/
+obj-$(CONFIG_MTD_NAND_SUNXI)		+= sunxi_nand.o
 
 nand-objs := nand_base.o nand_bbt.o nand_timings.o
diff --git a/drivers/mtd/nand/atmel_nand.c b/drivers/mtd/nand/atmel_nand.c
index 84c38f3c65b..a345e7b2463 100644
--- a/drivers/mtd/nand/atmel_nand.c
+++ b/drivers/mtd/nand/atmel_nand.c
@@ -92,7 +92,7 @@ static struct nand_ecclayout atmel_oobinfo_small = {
 struct atmel_nfc {
 	void __iomem		*base_cmd_regs;
 	void __iomem		*hsmc_regs;
-	void __iomem		*sram_bank0;
+	void			*sram_bank0;
 	dma_addr_t		sram_bank0_phys;
 	bool			use_nfc_sram;
 	bool			write_by_sram;
@@ -105,7 +105,7 @@ struct atmel_nfc {
 	struct completion	comp_xfer_done;
 
 	/* Point to the sram bank which include readed data via NFC */
-	void __iomem		*data_in_sram;
+	void			*data_in_sram;
 	bool			will_write_sram;
 };
 static struct atmel_nfc	nand_nfc;
@@ -127,6 +127,7 @@ struct atmel_nand_host {
 	bool			has_pmecc;
 	u8			pmecc_corr_cap;
 	u16			pmecc_sector_size;
+	bool			has_no_lookup_table;
 	u32			pmecc_lookup_table_offset;
 	u32			pmecc_lookup_table_offset_512;
 	u32			pmecc_lookup_table_offset_1024;
@@ -256,26 +257,6 @@ static int atmel_nand_set_enable_ready_pins(struct mtd_info *mtd)
 	return res;
 }
 
-static void memcpy32_fromio(void *trg, const void __iomem  *src, size_t size)
-{
-	int i;
-	u32 *t = trg;
-	const __iomem u32 *s = src;
-
-	for (i = 0; i < (size >> 2); i++)
-		*t++ = readl_relaxed(s++);
-}
-
-static void memcpy32_toio(void __iomem *trg, const void *src, int size)
-{
-	int i;
-	u32 __iomem *t = trg;
-	const u32 *s = src;
-
-	for (i = 0; i < (size >> 2); i++)
-		writel_relaxed(*s++, t++);
-}
-
 /*
  * Minimal-overhead PIO for data access.
  */
@@ -285,7 +266,7 @@ static void atmel_read_buf8(struct mtd_info *mtd, u8 *buf, int len)
 	struct atmel_nand_host *host = nand_chip->priv;
 
 	if (host->nfc && host->nfc->use_nfc_sram && host->nfc->data_in_sram) {
-		memcpy32_fromio(buf, host->nfc->data_in_sram, len);
+		memcpy(buf, host->nfc->data_in_sram, len);
 		host->nfc->data_in_sram += len;
 	} else {
 		__raw_readsb(nand_chip->IO_ADDR_R, buf, len);
@@ -298,7 +279,7 @@ static void atmel_read_buf16(struct mtd_info *mtd, u8 *buf, int len)
 	struct atmel_nand_host *host = nand_chip->priv;
 
 	if (host->nfc && host->nfc->use_nfc_sram && host->nfc->data_in_sram) {
-		memcpy32_fromio(buf, host->nfc->data_in_sram, len);
+		memcpy(buf, host->nfc->data_in_sram, len);
 		host->nfc->data_in_sram += len;
 	} else {
 		__raw_readsw(nand_chip->IO_ADDR_R, buf, len / 2);
@@ -1112,12 +1093,66 @@ static int pmecc_choose_ecc(struct atmel_nand_host *host,
 	return 0;
 }
 
+static inline int deg(unsigned int poly)
+{
+	/* polynomial degree is the most-significant bit index */
+	return fls(poly) - 1;
+}
+
+static int build_gf_tables(int mm, unsigned int poly,
+		int16_t *index_of, int16_t *alpha_to)
+{
+	unsigned int i, x = 1;
+	const unsigned int k = 1 << deg(poly);
+	unsigned int nn = (1 << mm) - 1;
+
+	/* primitive polynomial must be of degree m */
+	if (k != (1u << mm))
+		return -EINVAL;
+
+	for (i = 0; i < nn; i++) {
+		alpha_to[i] = x;
+		index_of[x] = i;
+		if (i && (x == 1))
+			/* polynomial is not primitive (a^i=1 with 0<i<2^m-1) */
+			return -EINVAL;
+		x <<= 1;
+		if (x & k)
+			x ^= poly;
+	}
+	alpha_to[nn] = 1;
+	index_of[0] = 0;
+
+	return 0;
+}
+
+static uint16_t *create_lookup_table(struct device *dev, int sector_size)
+{
+	int degree = (sector_size == 512) ?
+			PMECC_GF_DIMENSION_13 :
+			PMECC_GF_DIMENSION_14;
+	unsigned int poly = (sector_size == 512) ?
+			PMECC_GF_13_PRIMITIVE_POLY :
+			PMECC_GF_14_PRIMITIVE_POLY;
+	int table_size = (sector_size == 512) ?
+			PMECC_LOOKUP_TABLE_SIZE_512 :
+			PMECC_LOOKUP_TABLE_SIZE_1024;
+
+	int16_t *addr = devm_kzalloc(dev, 2 * table_size * sizeof(uint16_t),
+			GFP_KERNEL);
+	if (addr && build_gf_tables(degree, poly, addr, addr + table_size))
+		return NULL;
+
+	return addr;
+}
+
 static int atmel_pmecc_nand_init_params(struct platform_device *pdev,
 					 struct atmel_nand_host *host)
 {
 	struct mtd_info *mtd = &host->mtd;
 	struct nand_chip *nand_chip = &host->nand_chip;
 	struct resource *regs, *regs_pmerr, *regs_rom;
+	uint16_t *galois_table;
 	int cap, sector_size, err_no;
 
 	err_no = pmecc_choose_ecc(host, &cap, &sector_size);
@@ -1163,8 +1198,24 @@ static int atmel_pmecc_nand_init_params(struct platform_device *pdev,
 	regs_rom = platform_get_resource(pdev, IORESOURCE_MEM, 3);
 	host->pmecc_rom_base = devm_ioremap_resource(&pdev->dev, regs_rom);
 	if (IS_ERR(host->pmecc_rom_base)) {
-		err_no = PTR_ERR(host->pmecc_rom_base);
-		goto err;
+		if (!host->has_no_lookup_table)
+			/* Don't display the information again */
+			dev_err(host->dev, "Can not get I/O resource for ROM, will build a lookup table in runtime!\n");
+
+		host->has_no_lookup_table = true;
+	}
+
+	if (host->has_no_lookup_table) {
+		/* Build the look-up table in runtime */
+		galois_table = create_lookup_table(host->dev, sector_size);
+		if (!galois_table) {
+			dev_err(host->dev, "Failed to build a lookup table in runtime!\n");
+			err_no = -EINVAL;
+			goto err;
+		}
+
+		host->pmecc_rom_base = (void __iomem *)galois_table;
+		host->pmecc_lookup_table_offset = 0;
 	}
 
 	nand_chip->ecc.size = sector_size;
@@ -1501,8 +1552,10 @@ static int atmel_of_init_port(struct atmel_nand_host *host,
 
 	if (of_property_read_u32_array(np, "atmel,pmecc-lookup-table-offset",
 			offset, 2) != 0) {
-		dev_err(host->dev, "Cannot get PMECC lookup table offset\n");
-		return -EINVAL;
+		dev_err(host->dev, "Cannot get PMECC lookup table offset, will build a lookup table in runtime.\n");
+		host->has_no_lookup_table = true;
+		/* Will build a lookup table and initialize the offset later */
+		return 0;
 	}
 	if (!offset[0] && !offset[1]) {
 		dev_err(host->dev, "Invalid PMECC lookup table offset\n");
@@ -1899,7 +1952,7 @@ static int nfc_sram_write_page(struct mtd_info *mtd, struct nand_chip *chip,
 	int cfg, len;
 	int status = 0;
 	struct atmel_nand_host *host = chip->priv;
-	void __iomem *sram = host->nfc->sram_bank0 + nfc_get_sram_off(host);
+	void *sram = host->nfc->sram_bank0 + nfc_get_sram_off(host);
 
 	/* Subpage write is not supported */
 	if (offset || (data_len < mtd->writesize))
@@ -1910,14 +1963,14 @@ static int nfc_sram_write_page(struct mtd_info *mtd, struct nand_chip *chip,
 	if (use_dma) {
 		if (atmel_nand_dma_op(mtd, (void *)buf, len, 0) != 0)
 			/* Fall back to use cpu copy */
-			memcpy32_toio(sram, buf, len);
+			memcpy(sram, buf, len);
 	} else {
-		memcpy32_toio(sram, buf, len);
+		memcpy(sram, buf, len);
 	}
 
 	cfg = nfc_readl(host->nfc->hsmc_regs, CFG);
 	if (unlikely(raw) && oob_required) {
-		memcpy32_toio(sram + len, chip->oob_poi, mtd->oobsize);
+		memcpy(sram + len, chip->oob_poi, mtd->oobsize);
 		len += mtd->oobsize;
 		nfc_writel(host->nfc->hsmc_regs, CFG, cfg | NFC_CFG_WSPARE);
 	} else {
@@ -2260,7 +2313,8 @@ static int atmel_nand_nfc_probe(struct platform_device *pdev)
 
 	nfc_sram = platform_get_resource(pdev, IORESOURCE_MEM, 2);
 	if (nfc_sram) {
-		nfc->sram_bank0 = devm_ioremap_resource(&pdev->dev, nfc_sram);
+		nfc->sram_bank0 = (void * __force)
+				devm_ioremap_resource(&pdev->dev, nfc_sram);
 		if (IS_ERR(nfc->sram_bank0)) {
 			dev_warn(&pdev->dev, "Fail to ioremap the NFC sram with error: %ld. So disable NFC sram.\n",
 					PTR_ERR(nfc->sram_bank0));
diff --git a/drivers/mtd/nand/atmel_nand_ecc.h b/drivers/mtd/nand/atmel_nand_ecc.h
index 8a1e9a68675..d4035e335ad 100644
--- a/drivers/mtd/nand/atmel_nand_ecc.h
+++ b/drivers/mtd/nand/atmel_nand_ecc.h
@@ -142,6 +142,10 @@
 #define PMECC_GF_DIMENSION_13			13
 #define PMECC_GF_DIMENSION_14			14
 
+/* Primitive Polynomial used by PMECC */
+#define PMECC_GF_13_PRIMITIVE_POLY		0x201b
+#define PMECC_GF_14_PRIMITIVE_POLY		0x4443
+
 #define PMECC_LOOKUP_TABLE_SIZE_512		0x2000
 #define PMECC_LOOKUP_TABLE_SIZE_1024		0x4000
 
diff --git a/drivers/mtd/nand/cafe_nand.c b/drivers/mtd/nand/cafe_nand.c
index 4e66726da9a..9a0f45f1d93 100644
--- a/drivers/mtd/nand/cafe_nand.c
+++ b/drivers/mtd/nand/cafe_nand.c
@@ -529,50 +529,6 @@ static int cafe_nand_write_page_lowlevel(struct mtd_info *mtd,
 	return 0;
 }
 
-static int cafe_nand_write_page(struct mtd_info *mtd, struct nand_chip *chip,
-			uint32_t offset, int data_len, const uint8_t *buf,
-			int oob_required, int page, int cached, int raw)
-{
-	int status;
-
-	chip->cmdfunc(mtd, NAND_CMD_SEQIN, 0x00, page);
-
-	if (unlikely(raw))
-		status = chip->ecc.write_page_raw(mtd, chip, buf, oob_required);
-	else
-		status = chip->ecc.write_page(mtd, chip, buf, oob_required);
-
-	if (status < 0)
-		return status;
-
-	/*
-	 * Cached progamming disabled for now, Not sure if its worth the
-	 * trouble. The speed gain is not very impressive. (2.3->2.6Mib/s)
-	 */
-	cached = 0;
-
-	if (!cached || !(chip->options & NAND_CACHEPRG)) {
-
-		chip->cmdfunc(mtd, NAND_CMD_PAGEPROG, -1, -1);
-		status = chip->waitfunc(mtd, chip);
-		/*
-		 * See if operation failed and additional status checks are
-		 * available
-		 */
-		if ((status & NAND_STATUS_FAIL) && (chip->errstat))
-			status = chip->errstat(mtd, chip, FL_WRITING, status,
-					       page);
-
-		if (status & NAND_STATUS_FAIL)
-			return -EIO;
-	} else {
-		chip->cmdfunc(mtd, NAND_CMD_CACHEDPROG, -1, -1);
-		status = chip->waitfunc(mtd, chip);
-	}
-
-	return 0;
-}
-
 static int cafe_nand_block_bad(struct mtd_info *mtd, loff_t ofs, int getchip)
 {
 	return 0;
@@ -800,7 +756,6 @@ static int cafe_nand_probe(struct pci_dev *pdev,
 	cafe->nand.ecc.hwctl  = (void *)cafe_nand_bug;
 	cafe->nand.ecc.calculate = (void *)cafe_nand_bug;
 	cafe->nand.ecc.correct  = (void *)cafe_nand_bug;
-	cafe->nand.write_page = cafe_nand_write_page;
 	cafe->nand.ecc.write_page = cafe_nand_write_page_lowlevel;
 	cafe->nand.ecc.write_oob = cafe_nand_write_oob;
 	cafe->nand.ecc.read_page = cafe_nand_read_page;
diff --git a/drivers/mtd/nand/fsl_ifc_nand.c b/drivers/mtd/nand/fsl_ifc_nand.c
index b9ef7a6bba4..4c05f4f6a5c 100644
--- a/drivers/mtd/nand/fsl_ifc_nand.c
+++ b/drivers/mtd/nand/fsl_ifc_nand.c
@@ -31,7 +31,6 @@
 #include <linux/mtd/nand_ecc.h>
 #include <linux/fsl_ifc.h>
 
-#define FSL_IFC_V1_1_0	0x01010000
 #define ERR_BYTE		0xFF /* Value returned for read
 					bytes when read failed	*/
 #define IFC_TIMEOUT_MSECS	500  /* Maximum number of mSecs to wait
@@ -877,7 +876,7 @@ static int fsl_ifc_chip_init(struct fsl_ifc_mtd *priv)
 	struct fsl_ifc_regs __iomem *ifc = ctrl->regs;
 	struct nand_chip *chip = &priv->chip;
 	struct nand_ecclayout *layout;
-	u32 csor, ver;
+	u32 csor;
 
 	/* Fill in fsl_ifc_mtd structure */
 	priv->mtd.priv = chip;
@@ -984,8 +983,7 @@ static int fsl_ifc_chip_init(struct fsl_ifc_mtd *priv)
 		chip->ecc.mode = NAND_ECC_SOFT;
 	}
 
-	ver = ioread32be(&ifc->ifc_rev);
-	if (ver == FSL_IFC_V1_1_0)
+	if (ctrl->version == FSL_IFC_VERSION_1_1_0)
 		fsl_ifc_sram_init(priv);
 
 	return 0;
@@ -1045,12 +1043,12 @@ static int fsl_ifc_nand_probe(struct platform_device *dev)
 	}
 
 	/* find which chip select it is connected to */
-	for (bank = 0; bank < FSL_IFC_BANK_COUNT; bank++) {
+	for (bank = 0; bank < fsl_ifc_ctrl_dev->banks; bank++) {
 		if (match_bank(ifc, bank, res.start))
 			break;
 	}
 
-	if (bank >= FSL_IFC_BANK_COUNT) {
+	if (bank >= fsl_ifc_ctrl_dev->banks) {
 		dev_err(&dev->dev, "%s: address did not match any chip selects\n",
 			__func__);
 		return -ENODEV;
diff --git a/drivers/mtd/nand/gpio.c b/drivers/mtd/nand/gpio.c
index 918283999a4..73c4048c3a5 100644
--- a/drivers/mtd/nand/gpio.c
+++ b/drivers/mtd/nand/gpio.c
@@ -8,7 +8,9 @@
  *
  * © 2004 Simtec Electronics
  *
- * Device driver for NAND connected via GPIO
+ * Device driver for NAND flash that uses a memory mapped interface to
+ * read/write the NAND commands and data, and GPIO pins for control signals
+ * (the DT binding refers to this as "GPIO assisted NAND flash")
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
diff --git a/drivers/mtd/nand/gpmi-nand/gpmi-lib.c b/drivers/mtd/nand/gpmi-nand/gpmi-lib.c
index 87e658ce23e..27f272ed502 100644
--- a/drivers/mtd/nand/gpmi-nand/gpmi-lib.c
+++ b/drivers/mtd/nand/gpmi-nand/gpmi-lib.c
@@ -1353,3 +1353,156 @@ int gpmi_read_page(struct gpmi_nand_data *this,
 	set_dma_type(this, DMA_FOR_READ_ECC_PAGE);
 	return start_dma_with_bch_irq(this, desc);
 }
+
+/**
+ * gpmi_copy_bits - copy bits from one memory region to another
+ * @dst: destination buffer
+ * @dst_bit_off: bit offset we're starting to write at
+ * @src: source buffer
+ * @src_bit_off: bit offset we're starting to read from
+ * @nbits: number of bits to copy
+ *
+ * This functions copies bits from one memory region to another, and is used by
+ * the GPMI driver to copy ECC sections which are not guaranteed to be byte
+ * aligned.
+ *
+ * src and dst should not overlap.
+ *
+ */
+void gpmi_copy_bits(u8 *dst, size_t dst_bit_off,
+		    const u8 *src, size_t src_bit_off,
+		    size_t nbits)
+{
+	size_t i;
+	size_t nbytes;
+	u32 src_buffer = 0;
+	size_t bits_in_src_buffer = 0;
+
+	if (!nbits)
+		return;
+
+	/*
+	 * Move src and dst pointers to the closest byte pointer and store bit
+	 * offsets within a byte.
+	 */
+	src += src_bit_off / 8;
+	src_bit_off %= 8;
+
+	dst += dst_bit_off / 8;
+	dst_bit_off %= 8;
+
+	/*
+	 * Initialize the src_buffer value with bits available in the first
+	 * byte of data so that we end up with a byte aligned src pointer.
+	 */
+	if (src_bit_off) {
+		src_buffer = src[0] >> src_bit_off;
+		if (nbits >= (8 - src_bit_off)) {
+			bits_in_src_buffer += 8 - src_bit_off;
+		} else {
+			src_buffer &= GENMASK(nbits - 1, 0);
+			bits_in_src_buffer += nbits;
+		}
+		nbits -= bits_in_src_buffer;
+		src++;
+	}
+
+	/* Calculate the number of bytes that can be copied from src to dst. */
+	nbytes = nbits / 8;
+
+	/* Try to align dst to a byte boundary. */
+	if (dst_bit_off) {
+		if (bits_in_src_buffer < (8 - dst_bit_off) && nbytes) {
+			src_buffer |= src[0] << bits_in_src_buffer;
+			bits_in_src_buffer += 8;
+			src++;
+			nbytes--;
+		}
+
+		if (bits_in_src_buffer >= (8 - dst_bit_off)) {
+			dst[0] &= GENMASK(dst_bit_off - 1, 0);
+			dst[0] |= src_buffer << dst_bit_off;
+			src_buffer >>= (8 - dst_bit_off);
+			bits_in_src_buffer -= (8 - dst_bit_off);
+			dst_bit_off = 0;
+			dst++;
+			if (bits_in_src_buffer > 7) {
+				bits_in_src_buffer -= 8;
+				dst[0] = src_buffer;
+				dst++;
+				src_buffer >>= 8;
+			}
+		}
+	}
+
+	if (!bits_in_src_buffer && !dst_bit_off) {
+		/*
+		 * Both src and dst pointers are byte aligned, thus we can
+		 * just use the optimized memcpy function.
+		 */
+		if (nbytes)
+			memcpy(dst, src, nbytes);
+	} else {
+		/*
+		 * src buffer is not byte aligned, hence we have to copy each
+		 * src byte to the src_buffer variable before extracting a byte
+		 * to store in dst.
+		 */
+		for (i = 0; i < nbytes; i++) {
+			src_buffer |= src[i] << bits_in_src_buffer;
+			dst[i] = src_buffer;
+			src_buffer >>= 8;
+		}
+	}
+	/* Update dst and src pointers */
+	dst += nbytes;
+	src += nbytes;
+
+	/*
+	 * nbits is the number of remaining bits. It should not exceed 8 as
+	 * we've already copied as much bytes as possible.
+	 */
+	nbits %= 8;
+
+	/*
+	 * If there's no more bits to copy to the destination and src buffer
+	 * was already byte aligned, then we're done.
+	 */
+	if (!nbits && !bits_in_src_buffer)
+		return;
+
+	/* Copy the remaining bits to src_buffer */
+	if (nbits)
+		src_buffer |= (*src & GENMASK(nbits - 1, 0)) <<
+			      bits_in_src_buffer;
+	bits_in_src_buffer += nbits;
+
+	/*
+	 * In case there were not enough bits to get a byte aligned dst buffer
+	 * prepare the src_buffer variable to match the dst organization (shift
+	 * src_buffer by dst_bit_off and retrieve the least significant bits
+	 * from dst).
+	 */
+	if (dst_bit_off)
+		src_buffer = (src_buffer << dst_bit_off) |
+			     (*dst & GENMASK(dst_bit_off - 1, 0));
+	bits_in_src_buffer += dst_bit_off;
+
+	/*
+	 * Keep most significant bits from dst if we end up with an unaligned
+	 * number of bits.
+	 */
+	nbytes = bits_in_src_buffer / 8;
+	if (bits_in_src_buffer % 8) {
+		src_buffer |= (dst[nbytes] &
+			       GENMASK(7, bits_in_src_buffer % 8)) <<
+			      (nbytes * 8);
+		nbytes++;
+	}
+
+	/* Copy the remaining bytes to dst */
+	for (i = 0; i < nbytes; i++) {
+		dst[i] = src_buffer;
+		src_buffer >>= 8;
+	}
+}
diff --git a/drivers/mtd/nand/gpmi-nand/gpmi-nand.c b/drivers/mtd/nand/gpmi-nand/gpmi-nand.c
index 959cb9b7031..4f3851a24bb 100644
--- a/drivers/mtd/nand/gpmi-nand/gpmi-nand.c
+++ b/drivers/mtd/nand/gpmi-nand/gpmi-nand.c
@@ -791,6 +791,7 @@ static void gpmi_free_dma_buffer(struct gpmi_nand_data *this)
 					this->page_buffer_phys);
 	kfree(this->cmd_buffer);
 	kfree(this->data_buffer_dma);
+	kfree(this->raw_buffer);
 
 	this->cmd_buffer	= NULL;
 	this->data_buffer_dma	= NULL;
@@ -837,6 +838,9 @@ static int gpmi_alloc_dma_buffer(struct gpmi_nand_data *this)
 	if (!this->page_buffer_virt)
 		goto error_alloc;
 
+	this->raw_buffer = kzalloc(mtd->writesize + mtd->oobsize, GFP_KERNEL);
+	if (!this->raw_buffer)
+		goto error_alloc;
 
 	/* Slice up the page buffer. */
 	this->payload_virt = this->page_buffer_virt;
@@ -1347,6 +1351,199 @@ gpmi_ecc_write_oob(struct mtd_info *mtd, struct nand_chip *chip, int page)
 	return status & NAND_STATUS_FAIL ? -EIO : 0;
 }
 
+/*
+ * This function reads a NAND page without involving the ECC engine (no HW
+ * ECC correction).
+ * The tricky part in the GPMI/BCH controller is that it stores ECC bits
+ * inline (interleaved with payload DATA), and do not align data chunk on
+ * byte boundaries.
+ * We thus need to take care moving the payload data and ECC bits stored in the
+ * page into the provided buffers, which is why we're using gpmi_copy_bits.
+ *
+ * See set_geometry_by_ecc_info inline comments to have a full description
+ * of the layout used by the GPMI controller.
+ */
+static int gpmi_ecc_read_page_raw(struct mtd_info *mtd,
+				  struct nand_chip *chip, uint8_t *buf,
+				  int oob_required, int page)
+{
+	struct gpmi_nand_data *this = chip->priv;
+	struct bch_geometry *nfc_geo = &this->bch_geometry;
+	int eccsize = nfc_geo->ecc_chunk_size;
+	int eccbits = nfc_geo->ecc_strength * nfc_geo->gf_len;
+	u8 *tmp_buf = this->raw_buffer;
+	size_t src_bit_off;
+	size_t oob_bit_off;
+	size_t oob_byte_off;
+	uint8_t *oob = chip->oob_poi;
+	int step;
+
+	chip->read_buf(mtd, tmp_buf,
+		       mtd->writesize + mtd->oobsize);
+
+	/*
+	 * If required, swap the bad block marker and the data stored in the
+	 * metadata section, so that we don't wrongly consider a block as bad.
+	 *
+	 * See the layout description for a detailed explanation on why this
+	 * is needed.
+	 */
+	if (this->swap_block_mark) {
+		u8 swap = tmp_buf[0];
+
+		tmp_buf[0] = tmp_buf[mtd->writesize];
+		tmp_buf[mtd->writesize] = swap;
+	}
+
+	/*
+	 * Copy the metadata section into the oob buffer (this section is
+	 * guaranteed to be aligned on a byte boundary).
+	 */
+	if (oob_required)
+		memcpy(oob, tmp_buf, nfc_geo->metadata_size);
+
+	oob_bit_off = nfc_geo->metadata_size * 8;
+	src_bit_off = oob_bit_off;
+
+	/* Extract interleaved payload data and ECC bits */
+	for (step = 0; step < nfc_geo->ecc_chunk_count; step++) {
+		if (buf)
+			gpmi_copy_bits(buf, step * eccsize * 8,
+				       tmp_buf, src_bit_off,
+				       eccsize * 8);
+		src_bit_off += eccsize * 8;
+
+		/* Align last ECC block to align a byte boundary */
+		if (step == nfc_geo->ecc_chunk_count - 1 &&
+		    (oob_bit_off + eccbits) % 8)
+			eccbits += 8 - ((oob_bit_off + eccbits) % 8);
+
+		if (oob_required)
+			gpmi_copy_bits(oob, oob_bit_off,
+				       tmp_buf, src_bit_off,
+				       eccbits);
+
+		src_bit_off += eccbits;
+		oob_bit_off += eccbits;
+	}
+
+	if (oob_required) {
+		oob_byte_off = oob_bit_off / 8;
+
+		if (oob_byte_off < mtd->oobsize)
+			memcpy(oob + oob_byte_off,
+			       tmp_buf + mtd->writesize + oob_byte_off,
+			       mtd->oobsize - oob_byte_off);
+	}
+
+	return 0;
+}
+
+/*
+ * This function writes a NAND page without involving the ECC engine (no HW
+ * ECC generation).
+ * The tricky part in the GPMI/BCH controller is that it stores ECC bits
+ * inline (interleaved with payload DATA), and do not align data chunk on
+ * byte boundaries.
+ * We thus need to take care moving the OOB area at the right place in the
+ * final page, which is why we're using gpmi_copy_bits.
+ *
+ * See set_geometry_by_ecc_info inline comments to have a full description
+ * of the layout used by the GPMI controller.
+ */
+static int gpmi_ecc_write_page_raw(struct mtd_info *mtd,
+				   struct nand_chip *chip,
+				   const uint8_t *buf,
+				   int oob_required)
+{
+	struct gpmi_nand_data *this = chip->priv;
+	struct bch_geometry *nfc_geo = &this->bch_geometry;
+	int eccsize = nfc_geo->ecc_chunk_size;
+	int eccbits = nfc_geo->ecc_strength * nfc_geo->gf_len;
+	u8 *tmp_buf = this->raw_buffer;
+	uint8_t *oob = chip->oob_poi;
+	size_t dst_bit_off;
+	size_t oob_bit_off;
+	size_t oob_byte_off;
+	int step;
+
+	/*
+	 * Initialize all bits to 1 in case we don't have a buffer for the
+	 * payload or oob data in order to leave unspecified bits of data
+	 * to their initial state.
+	 */
+	if (!buf || !oob_required)
+		memset(tmp_buf, 0xff, mtd->writesize + mtd->oobsize);
+
+	/*
+	 * First copy the metadata section (stored in oob buffer) at the
+	 * beginning of the page, as imposed by the GPMI layout.
+	 */
+	memcpy(tmp_buf, oob, nfc_geo->metadata_size);
+	oob_bit_off = nfc_geo->metadata_size * 8;
+	dst_bit_off = oob_bit_off;
+
+	/* Interleave payload data and ECC bits */
+	for (step = 0; step < nfc_geo->ecc_chunk_count; step++) {
+		if (buf)
+			gpmi_copy_bits(tmp_buf, dst_bit_off,
+				       buf, step * eccsize * 8, eccsize * 8);
+		dst_bit_off += eccsize * 8;
+
+		/* Align last ECC block to align a byte boundary */
+		if (step == nfc_geo->ecc_chunk_count - 1 &&
+		    (oob_bit_off + eccbits) % 8)
+			eccbits += 8 - ((oob_bit_off + eccbits) % 8);
+
+		if (oob_required)
+			gpmi_copy_bits(tmp_buf, dst_bit_off,
+				       oob, oob_bit_off, eccbits);
+
+		dst_bit_off += eccbits;
+		oob_bit_off += eccbits;
+	}
+
+	oob_byte_off = oob_bit_off / 8;
+
+	if (oob_required && oob_byte_off < mtd->oobsize)
+		memcpy(tmp_buf + mtd->writesize + oob_byte_off,
+		       oob + oob_byte_off, mtd->oobsize - oob_byte_off);
+
+	/*
+	 * If required, swap the bad block marker and the first byte of the
+	 * metadata section, so that we don't modify the bad block marker.
+	 *
+	 * See the layout description for a detailed explanation on why this
+	 * is needed.
+	 */
+	if (this->swap_block_mark) {
+		u8 swap = tmp_buf[0];
+
+		tmp_buf[0] = tmp_buf[mtd->writesize];
+		tmp_buf[mtd->writesize] = swap;
+	}
+
+	chip->write_buf(mtd, tmp_buf, mtd->writesize + mtd->oobsize);
+
+	return 0;
+}
+
+static int gpmi_ecc_read_oob_raw(struct mtd_info *mtd, struct nand_chip *chip,
+				 int page)
+{
+	chip->cmdfunc(mtd, NAND_CMD_READ0, 0, page);
+
+	return gpmi_ecc_read_page_raw(mtd, chip, NULL, 1, page);
+}
+
+static int gpmi_ecc_write_oob_raw(struct mtd_info *mtd, struct nand_chip *chip,
+				 int page)
+{
+	chip->cmdfunc(mtd, NAND_CMD_SEQIN, 0, page);
+
+	return gpmi_ecc_write_page_raw(mtd, chip, NULL, 1);
+}
+
 static int gpmi_block_markbad(struct mtd_info *mtd, loff_t ofs)
 {
 	struct nand_chip *chip = mtd->priv;
@@ -1664,6 +1861,10 @@ static int gpmi_init_last(struct gpmi_nand_data *this)
 	ecc->write_page	= gpmi_ecc_write_page;
 	ecc->read_oob	= gpmi_ecc_read_oob;
 	ecc->write_oob	= gpmi_ecc_write_oob;
+	ecc->read_page_raw = gpmi_ecc_read_page_raw;
+	ecc->write_page_raw = gpmi_ecc_write_page_raw;
+	ecc->read_oob_raw = gpmi_ecc_read_oob_raw;
+	ecc->write_oob_raw = gpmi_ecc_write_oob_raw;
 	ecc->mode	= NAND_ECC_HW;
 	ecc->size	= bch_geo->ecc_chunk_size;
 	ecc->strength	= bch_geo->ecc_strength;
diff --git a/drivers/mtd/nand/gpmi-nand/gpmi-nand.h b/drivers/mtd/nand/gpmi-nand/gpmi-nand.h
index 32c6ba49f98..544062f6502 100644
--- a/drivers/mtd/nand/gpmi-nand/gpmi-nand.h
+++ b/drivers/mtd/nand/gpmi-nand/gpmi-nand.h
@@ -189,6 +189,8 @@ struct gpmi_nand_data {
 	void			*auxiliary_virt;
 	dma_addr_t		auxiliary_phys;
 
+	void			*raw_buffer;
+
 	/* DMA channels */
 #define DMA_CHANS		8
 	struct dma_chan		*dma_chans[DMA_CHANS];
@@ -290,6 +292,10 @@ extern int gpmi_send_page(struct gpmi_nand_data *,
 extern int gpmi_read_page(struct gpmi_nand_data *,
 			dma_addr_t payload, dma_addr_t auxiliary);
 
+void gpmi_copy_bits(u8 *dst, size_t dst_bit_off,
+		    const u8 *src, size_t src_bit_off,
+		    size_t nbits);
+
 /* BCH : Status Block Completion Codes */
 #define STATUS_GOOD		0x00
 #define STATUS_ERASED		0xff
diff --git a/drivers/mtd/nand/mxc_nand.c b/drivers/mtd/nand/mxc_nand.c
index e1d56beeca7..a8f550fec35 100644
--- a/drivers/mtd/nand/mxc_nand.c
+++ b/drivers/mtd/nand/mxc_nand.c
@@ -280,14 +280,10 @@ static void memcpy32_fromio(void *trg, const void __iomem  *src, size_t size)
 		*t++ = __raw_readl(s++);
 }
 
-static void memcpy32_toio(void __iomem *trg, const void *src, int size)
+static inline void memcpy32_toio(void __iomem *trg, const void *src, int size)
 {
-	int i;
-	u32 __iomem *t = trg;
-	const u32 *s = src;
-
-	for (i = 0; i < (size >> 2); i++)
-		__raw_writel(*s++, t++);
+	/* __iowrite32_copy use 32bit size values so divide by 4 */
+	__iowrite32_copy(trg, src, size / 4);
 }
 
 static int check_int_v3(struct mxc_nand_host *host)
diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index 5b5c6271281..41585dfb206 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -485,11 +485,11 @@ static int nand_check_wp(struct mtd_info *mtd)
 }
 
 /**
- * nand_block_checkbad - [GENERIC] Check if a block is marked bad
+ * nand_block_isreserved - [GENERIC] Check if a block is marked reserved.
  * @mtd: MTD device structure
  * @ofs: offset from device start
  *
- * Check if the block is mark as reserved.
+ * Check if the block is marked as reserved.
  */
 static int nand_block_isreserved(struct mtd_info *mtd, loff_t ofs)
 {
@@ -720,7 +720,7 @@ static void nand_command_lp(struct mtd_info *mtd, unsigned int command,
 
 	/*
 	 * Program and erase have their own busy handlers status, sequential
-	 * in, and deplete1 need no delay.
+	 * in and status need no delay.
 	 */
 	switch (command) {
 
@@ -3765,9 +3765,9 @@ ident_done:
 		pr_info("%s %s\n", nand_manuf_ids[maf_idx].name,
 				type->name);
 
-	pr_info("%dMiB, %s, page size: %d, OOB size: %d\n",
+	pr_info("%d MiB, %s, erase size: %d KiB, page size: %d, OOB size: %d\n",
 		(int)(chip->chipsize >> 20), nand_is_slc(chip) ? "SLC" : "MLC",
-		mtd->writesize, mtd->oobsize);
+		mtd->erasesize >> 10, mtd->writesize, mtd->oobsize);
 	return type;
 }
 
@@ -4035,7 +4035,7 @@ int nand_scan_tail(struct mtd_info *mtd)
 		 */
 		if (!ecc->size && (mtd->oobsize >= 64)) {
 			ecc->size = 512;
-			ecc->bytes = 7;
+			ecc->bytes = DIV_ROUND_UP(13 * ecc->strength, 8);
 		}
 		ecc->priv = nand_bch_init(mtd, ecc->size, ecc->bytes,
 					       &ecc->layout);
diff --git a/drivers/mtd/nand/nand_ids.c b/drivers/mtd/nand/nand_ids.c
index fbde8910524..dd620c19c61 100644
--- a/drivers/mtd/nand/nand_ids.c
+++ b/drivers/mtd/nand/nand_ids.c
@@ -178,6 +178,7 @@ struct nand_manufacturers nand_manuf_ids[] = {
 	{NAND_MFR_EON, "Eon"},
 	{NAND_MFR_SANDISK, "SanDisk"},
 	{NAND_MFR_INTEL, "Intel"},
+	{NAND_MFR_ATO, "ATO"},
 	{0x0, "Unknown"}
 };
 
diff --git a/drivers/mtd/nand/nandsim.c b/drivers/mtd/nand/nandsim.c
index 7dc1dd28d89..ab5bbf56743 100644
--- a/drivers/mtd/nand/nandsim.c
+++ b/drivers/mtd/nand/nandsim.c
@@ -87,10 +87,6 @@
 #define CONFIG_NANDSIM_MAX_PARTS  32
 #endif
 
-static uint first_id_byte  = CONFIG_NANDSIM_FIRST_ID_BYTE;
-static uint second_id_byte = CONFIG_NANDSIM_SECOND_ID_BYTE;
-static uint third_id_byte  = CONFIG_NANDSIM_THIRD_ID_BYTE;
-static uint fourth_id_byte = CONFIG_NANDSIM_FOURTH_ID_BYTE;
 static uint access_delay   = CONFIG_NANDSIM_ACCESS_DELAY;
 static uint programm_delay = CONFIG_NANDSIM_PROGRAMM_DELAY;
 static uint erase_delay    = CONFIG_NANDSIM_ERASE_DELAY;
@@ -111,11 +107,19 @@ static unsigned int overridesize = 0;
 static char *cache_file = NULL;
 static unsigned int bbt;
 static unsigned int bch;
+static u_char id_bytes[8] = {
+	[0] = CONFIG_NANDSIM_FIRST_ID_BYTE,
+	[1] = CONFIG_NANDSIM_SECOND_ID_BYTE,
+	[2] = CONFIG_NANDSIM_THIRD_ID_BYTE,
+	[3] = CONFIG_NANDSIM_FOURTH_ID_BYTE,
+	[4 ... 7] = 0xFF,
+};
 
-module_param(first_id_byte,  uint, 0400);
-module_param(second_id_byte, uint, 0400);
-module_param(third_id_byte,  uint, 0400);
-module_param(fourth_id_byte, uint, 0400);
+module_param_array(id_bytes, byte, NULL, 0400);
+module_param_named(first_id_byte, id_bytes[0], byte, 0400);
+module_param_named(second_id_byte, id_bytes[1], byte, 0400);
+module_param_named(third_id_byte, id_bytes[2], byte, 0400);
+module_param_named(fourth_id_byte, id_bytes[3], byte, 0400);
 module_param(access_delay,   uint, 0400);
 module_param(programm_delay, uint, 0400);
 module_param(erase_delay,    uint, 0400);
@@ -136,10 +140,11 @@ module_param(cache_file,     charp, 0400);
 module_param(bbt,	     uint, 0400);
 module_param(bch,	     uint, 0400);
 
-MODULE_PARM_DESC(first_id_byte,  "The first byte returned by NAND Flash 'read ID' command (manufacturer ID)");
-MODULE_PARM_DESC(second_id_byte, "The second byte returned by NAND Flash 'read ID' command (chip ID)");
-MODULE_PARM_DESC(third_id_byte,  "The third byte returned by NAND Flash 'read ID' command");
-MODULE_PARM_DESC(fourth_id_byte, "The fourth byte returned by NAND Flash 'read ID' command");
+MODULE_PARM_DESC(id_bytes,       "The ID bytes returned by NAND Flash 'read ID' command");
+MODULE_PARM_DESC(first_id_byte,  "The first byte returned by NAND Flash 'read ID' command (manufacturer ID) (obsolete)");
+MODULE_PARM_DESC(second_id_byte, "The second byte returned by NAND Flash 'read ID' command (chip ID) (obsolete)");
+MODULE_PARM_DESC(third_id_byte,  "The third byte returned by NAND Flash 'read ID' command (obsolete)");
+MODULE_PARM_DESC(fourth_id_byte, "The fourth byte returned by NAND Flash 'read ID' command (obsolete)");
 MODULE_PARM_DESC(access_delay,   "Initial page access delay (microseconds)");
 MODULE_PARM_DESC(programm_delay, "Page programm delay (microseconds");
 MODULE_PARM_DESC(erase_delay,    "Sector erase delay (milliseconds)");
@@ -304,7 +309,7 @@ struct nandsim {
 	unsigned int nbparts;
 
 	uint busw;              /* flash chip bus width (8 or 16) */
-	u_char ids[4];          /* chip's ID bytes */
+	u_char ids[8];          /* chip's ID bytes */
 	uint32_t options;       /* chip's characteristic bits */
 	uint32_t state;         /* current chip state */
 	uint32_t nxstate;       /* next expected state */
@@ -2279,17 +2284,18 @@ static int __init ns_init_module(void)
 	 * Perform minimum nandsim structure initialization to handle
 	 * the initial ID read command correctly
 	 */
-	if (third_id_byte != 0xFF || fourth_id_byte != 0xFF)
+	if (id_bytes[6] != 0xFF || id_bytes[7] != 0xFF)
+		nand->geom.idbytes = 8;
+	else if (id_bytes[4] != 0xFF || id_bytes[5] != 0xFF)
+		nand->geom.idbytes = 6;
+	else if (id_bytes[2] != 0xFF || id_bytes[3] != 0xFF)
 		nand->geom.idbytes = 4;
 	else
 		nand->geom.idbytes = 2;
 	nand->regs.status = NS_STATUS_OK(nand);
 	nand->nxstate = STATE_UNKNOWN;
 	nand->options |= OPT_PAGE512; /* temporary value */
-	nand->ids[0] = first_id_byte;
-	nand->ids[1] = second_id_byte;
-	nand->ids[2] = third_id_byte;
-	nand->ids[3] = fourth_id_byte;
+	memcpy(nand->ids, id_bytes, sizeof(nand->ids));
 	if (bus_width == 16) {
 		nand->busw = 16;
 		chip->options |= NAND_BUSWIDTH_16;
diff --git a/drivers/mtd/nand/omap2.c b/drivers/mtd/nand/omap2.c
index 6d74b56dd9f..63f858e6bf3 100644
--- a/drivers/mtd/nand/omap2.c
+++ b/drivers/mtd/nand/omap2.c
@@ -144,11 +144,13 @@ static u_char bch8_vector[] = {0xf3, 0xdb, 0x14, 0x16, 0x8b, 0xd2, 0xbe, 0xcc,
 	0xac, 0x6b, 0xff, 0x99, 0x7b};
 static u_char bch4_vector[] = {0x00, 0x6b, 0x31, 0xdd, 0x41, 0xbc, 0x10};
 
-/* oob info generated runtime depending on ecc algorithm and layout selected */
-static struct nand_ecclayout omap_oobinfo;
+/* Shared among all NAND instances to synchronize access to the ECC Engine */
+static struct nand_hw_control omap_gpmc_controller = {
+	.lock = __SPIN_LOCK_UNLOCKED(omap_gpmc_controller.lock),
+	.wq = __WAIT_QUEUE_HEAD_INITIALIZER(omap_gpmc_controller.wq),
+};
 
 struct omap_nand_info {
-	struct nand_hw_control		controller;
 	struct omap_nand_platform_data	*pdata;
 	struct mtd_info			mtd;
 	struct nand_chip		nand;
@@ -168,6 +170,8 @@ struct omap_nand_info {
 	u_char				*buf;
 	int					buf_len;
 	struct gpmc_nand_regs		reg;
+	/* generated at runtime depending on ECC algorithm and layout selected */
+	struct nand_ecclayout		oobinfo;
 	/* fields specific for BCHx_HW ECC scheme */
 	struct device			*elm_dev;
 	struct device_node		*of_node;
@@ -1686,9 +1690,6 @@ static int omap_nand_probe(struct platform_device *pdev)
 
 	platform_set_drvdata(pdev, info);
 
-	spin_lock_init(&info->controller.lock);
-	init_waitqueue_head(&info->controller.wq);
-
 	info->pdev		= pdev;
 	info->gpmc_cs		= pdata->cs;
 	info->reg		= pdata->reg;
@@ -1708,7 +1709,7 @@ static int omap_nand_probe(struct platform_device *pdev)
 
 	info->phys_base = res->start;
 
-	nand_chip->controller = &info->controller;
+	nand_chip->controller = &omap_gpmc_controller;
 
 	nand_chip->IO_ADDR_W = nand_chip->IO_ADDR_R;
 	nand_chip->cmd_ctrl  = omap_hwcontrol;
@@ -1741,13 +1742,6 @@ static int omap_nand_probe(struct platform_device *pdev)
 		goto return_error;
 	}
 
-	/* check for small page devices */
-	if ((mtd->oobsize < 64) && (pdata->ecc_opt != OMAP_ECC_HAM1_CODE_HW)) {
-		dev_err(&info->pdev->dev, "small page devices are not supported\n");
-		err = -EINVAL;
-		goto return_error;
-	}
-
 	/* re-populate low-level callbacks based on xfer modes */
 	switch (pdata->xfer_type) {
 	case NAND_OMAP_PREFETCH_POLLED:
@@ -1840,7 +1834,7 @@ static int omap_nand_probe(struct platform_device *pdev)
 	}
 
 	/* populate MTD interface based on ECC scheme */
-	ecclayout		= &omap_oobinfo;
+	ecclayout		= &info->oobinfo;
 	switch (info->ecc_opt) {
 	case OMAP_ECC_HAM1_CODE_SW:
 		nand_chip->ecc.mode = NAND_ECC_SOFT;
diff --git a/drivers/mtd/nand/orion_nand.c b/drivers/mtd/nand/orion_nand.c
index c53e36956bf..c3c6d305caa 100644
--- a/drivers/mtd/nand/orion_nand.c
+++ b/drivers/mtd/nand/orion_nand.c
@@ -19,7 +19,7 @@
 #include <linux/mtd/partitions.h>
 #include <linux/clk.h>
 #include <linux/err.h>
-#include <asm/io.h>
+#include <linux/io.h>
 #include <asm/sizes.h>
 #include <linux/platform_data/mtd-orion_nand.h>
 
@@ -85,33 +85,24 @@ static int __init orion_nand_probe(struct platform_device *pdev)
 	int ret = 0;
 	u32 val = 0;
 
-	nc = kzalloc(sizeof(struct nand_chip) + sizeof(struct mtd_info), GFP_KERNEL);
-	if (!nc) {
-		ret = -ENOMEM;
-		goto no_res;
-	}
+	nc = devm_kzalloc(&pdev->dev,
+			sizeof(struct nand_chip) + sizeof(struct mtd_info),
+			GFP_KERNEL);
+	if (!nc)
+		return -ENOMEM;
 	mtd = (struct mtd_info *)(nc + 1);
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (!res) {
-		ret = -ENODEV;
-		goto no_res;
-	}
+	io_base = devm_ioremap_resource(&pdev->dev, res);
 
-	io_base = ioremap(res->start, resource_size(res));
-	if (!io_base) {
-		dev_err(&pdev->dev, "ioremap failed\n");
-		ret = -EIO;
-		goto no_res;
-	}
+	if (IS_ERR(io_base))
+		return PTR_ERR(io_base);
 
 	if (pdev->dev.of_node) {
 		board = devm_kzalloc(&pdev->dev, sizeof(struct orion_nand_data),
 					GFP_KERNEL);
-		if (!board) {
-			ret = -ENOMEM;
-			goto no_res;
-		}
+		if (!board)
+			return -ENOMEM;
 		if (!of_property_read_u32(pdev->dev.of_node, "cle", &val))
 			board->cle = (u8)val;
 		else
@@ -185,9 +176,6 @@ no_dev:
 		clk_disable_unprepare(clk);
 		clk_put(clk);
 	}
-	iounmap(io_base);
-no_res:
-	kfree(nc);
 
 	return ret;
 }
@@ -195,15 +183,10 @@ no_res:
 static int orion_nand_remove(struct platform_device *pdev)
 {
 	struct mtd_info *mtd = platform_get_drvdata(pdev);
-	struct nand_chip *nc = mtd->priv;
 	struct clk *clk;
 
 	nand_release(mtd);
 
-	iounmap(nc->IO_ADDR_W);
-
-	kfree(nc);
-
 	clk = clk_get(&pdev->dev, NULL);
 	if (!IS_ERR(clk)) {
 		clk_disable_unprepare(clk);
diff --git a/drivers/mtd/nand/sunxi_nand.c b/drivers/mtd/nand/sunxi_nand.c
new file mode 100644
index 00000000000..ccaa8e28338
--- /dev/null
+++ b/drivers/mtd/nand/sunxi_nand.c
@@ -0,0 +1,1432 @@
+/*
+ * Copyright (C) 2013 Boris BREZILLON <b.brezillon.dev@gmail.com>
+ *
+ * Derived from:
+ *	https://github.com/yuq/sunxi-nfc-mtd
+ *	Copyright (C) 2013 Qiang Yu <yuq825@gmail.com>
+ *
+ *	https://github.com/hno/Allwinner-Info
+ *	Copyright (C) 2013 Henrik Nordström <Henrik Nordström>
+ *
+ *	Copyright (C) 2013 Dmitriy B. <rzk333@gmail.com>
+ *	Copyright (C) 2013 Sergey Lapin <slapin@ossfans.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/dma-mapping.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/platform_device.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/of_gpio.h>
+#include <linux/of_mtd.h>
+#include <linux/mtd/mtd.h>
+#include <linux/mtd/nand.h>
+#include <linux/mtd/partitions.h>
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/dmaengine.h>
+#include <linux/gpio.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+
+#define NFC_REG_CTL		0x0000
+#define NFC_REG_ST		0x0004
+#define NFC_REG_INT		0x0008
+#define NFC_REG_TIMING_CTL	0x000C
+#define NFC_REG_TIMING_CFG	0x0010
+#define NFC_REG_ADDR_LOW	0x0014
+#define NFC_REG_ADDR_HIGH	0x0018
+#define NFC_REG_SECTOR_NUM	0x001C
+#define NFC_REG_CNT		0x0020
+#define NFC_REG_CMD		0x0024
+#define NFC_REG_RCMD_SET	0x0028
+#define NFC_REG_WCMD_SET	0x002C
+#define NFC_REG_IO_DATA		0x0030
+#define NFC_REG_ECC_CTL		0x0034
+#define NFC_REG_ECC_ST		0x0038
+#define NFC_REG_DEBUG		0x003C
+#define NFC_REG_ECC_CNT0	0x0040
+#define NFC_REG_ECC_CNT1	0x0044
+#define NFC_REG_ECC_CNT2	0x0048
+#define NFC_REG_ECC_CNT3	0x004c
+#define NFC_REG_USER_DATA_BASE	0x0050
+#define NFC_REG_SPARE_AREA	0x00A0
+#define NFC_RAM0_BASE		0x0400
+#define NFC_RAM1_BASE		0x0800
+
+/* define bit use in NFC_CTL */
+#define NFC_EN			BIT(0)
+#define NFC_RESET		BIT(1)
+#define NFC_BUS_WIDYH		BIT(2)
+#define NFC_RB_SEL		BIT(3)
+#define NFC_CE_SEL		GENMASK(26, 24)
+#define NFC_CE_CTL		BIT(6)
+#define NFC_CE_CTL1		BIT(7)
+#define NFC_PAGE_SIZE		GENMASK(11, 8)
+#define NFC_SAM			BIT(12)
+#define NFC_RAM_METHOD		BIT(14)
+#define NFC_DEBUG_CTL		BIT(31)
+
+/* define bit use in NFC_ST */
+#define NFC_RB_B2R		BIT(0)
+#define NFC_CMD_INT_FLAG	BIT(1)
+#define NFC_DMA_INT_FLAG	BIT(2)
+#define NFC_CMD_FIFO_STATUS	BIT(3)
+#define NFC_STA			BIT(4)
+#define NFC_NATCH_INT_FLAG	BIT(5)
+#define NFC_RB_STATE0		BIT(8)
+#define NFC_RB_STATE1		BIT(9)
+#define NFC_RB_STATE2		BIT(10)
+#define NFC_RB_STATE3		BIT(11)
+
+/* define bit use in NFC_INT */
+#define NFC_B2R_INT_ENABLE	BIT(0)
+#define NFC_CMD_INT_ENABLE	BIT(1)
+#define NFC_DMA_INT_ENABLE	BIT(2)
+#define NFC_INT_MASK		(NFC_B2R_INT_ENABLE | \
+				 NFC_CMD_INT_ENABLE | \
+				 NFC_DMA_INT_ENABLE)
+
+/* define bit use in NFC_CMD */
+#define NFC_CMD_LOW_BYTE	GENMASK(7, 0)
+#define NFC_CMD_HIGH_BYTE	GENMASK(15, 8)
+#define NFC_ADR_NUM		GENMASK(18, 16)
+#define NFC_SEND_ADR		BIT(19)
+#define NFC_ACCESS_DIR		BIT(20)
+#define NFC_DATA_TRANS		BIT(21)
+#define NFC_SEND_CMD1		BIT(22)
+#define NFC_WAIT_FLAG		BIT(23)
+#define NFC_SEND_CMD2		BIT(24)
+#define NFC_SEQ			BIT(25)
+#define NFC_DATA_SWAP_METHOD	BIT(26)
+#define NFC_ROW_AUTO_INC	BIT(27)
+#define NFC_SEND_CMD3		BIT(28)
+#define NFC_SEND_CMD4		BIT(29)
+#define NFC_CMD_TYPE		GENMASK(31, 30)
+
+/* define bit use in NFC_RCMD_SET */
+#define NFC_READ_CMD		GENMASK(7, 0)
+#define NFC_RANDOM_READ_CMD0	GENMASK(15, 8)
+#define NFC_RANDOM_READ_CMD1	GENMASK(23, 16)
+
+/* define bit use in NFC_WCMD_SET */
+#define NFC_PROGRAM_CMD		GENMASK(7, 0)
+#define NFC_RANDOM_WRITE_CMD	GENMASK(15, 8)
+#define NFC_READ_CMD0		GENMASK(23, 16)
+#define NFC_READ_CMD1		GENMASK(31, 24)
+
+/* define bit use in NFC_ECC_CTL */
+#define NFC_ECC_EN		BIT(0)
+#define NFC_ECC_PIPELINE	BIT(3)
+#define NFC_ECC_EXCEPTION	BIT(4)
+#define NFC_ECC_BLOCK_SIZE	BIT(5)
+#define NFC_RANDOM_EN		BIT(9)
+#define NFC_RANDOM_DIRECTION	BIT(10)
+#define NFC_ECC_MODE_SHIFT	12
+#define NFC_ECC_MODE		GENMASK(15, 12)
+#define NFC_RANDOM_SEED		GENMASK(30, 16)
+
+#define NFC_DEFAULT_TIMEOUT_MS	1000
+
+#define NFC_SRAM_SIZE		1024
+
+#define NFC_MAX_CS		7
+
+/*
+ * Ready/Busy detection type: describes the Ready/Busy detection modes
+ *
+ * @RB_NONE:	no external detection available, rely on STATUS command
+ *		and software timeouts
+ * @RB_NATIVE:	use sunxi NAND controller Ready/Busy support. The Ready/Busy
+ *		pin of the NAND flash chip must be connected to one of the
+ *		native NAND R/B pins (those which can be muxed to the NAND
+ *		Controller)
+ * @RB_GPIO:	use a simple GPIO to handle Ready/Busy status. The Ready/Busy
+ *		pin of the NAND flash chip must be connected to a GPIO capable
+ *		pin.
+ */
+enum sunxi_nand_rb_type {
+	RB_NONE,
+	RB_NATIVE,
+	RB_GPIO,
+};
+
+/*
+ * Ready/Busy structure: stores information related to Ready/Busy detection
+ *
+ * @type:	the Ready/Busy detection mode
+ * @info:	information related to the R/B detection mode. Either a gpio
+ *		id or a native R/B id (those supported by the NAND controller).
+ */
+struct sunxi_nand_rb {
+	enum sunxi_nand_rb_type type;
+	union {
+		int gpio;
+		int nativeid;
+	} info;
+};
+
+/*
+ * Chip Select structure: stores information related to NAND Chip Select
+ *
+ * @cs:		the NAND CS id used to communicate with a NAND Chip
+ * @rb:		the Ready/Busy description
+ */
+struct sunxi_nand_chip_sel {
+	u8 cs;
+	struct sunxi_nand_rb rb;
+};
+
+/*
+ * sunxi HW ECC infos: stores information related to HW ECC support
+ *
+ * @mode:	the sunxi ECC mode field deduced from ECC requirements
+ * @layout:	the OOB layout depending on the ECC requirements and the
+ *		selected ECC mode
+ */
+struct sunxi_nand_hw_ecc {
+	int mode;
+	struct nand_ecclayout layout;
+};
+
+/*
+ * NAND chip structure: stores NAND chip device related information
+ *
+ * @node:		used to store NAND chips into a list
+ * @nand:		base NAND chip structure
+ * @mtd:		base MTD structure
+ * @clk_rate:		clk_rate required for this NAND chip
+ * @selected:		current active CS
+ * @nsels:		number of CS lines required by the NAND chip
+ * @sels:		array of CS lines descriptions
+ */
+struct sunxi_nand_chip {
+	struct list_head node;
+	struct nand_chip nand;
+	struct mtd_info mtd;
+	unsigned long clk_rate;
+	int selected;
+	int nsels;
+	struct sunxi_nand_chip_sel sels[0];
+};
+
+static inline struct sunxi_nand_chip *to_sunxi_nand(struct nand_chip *nand)
+{
+	return container_of(nand, struct sunxi_nand_chip, nand);
+}
+
+/*
+ * NAND Controller structure: stores sunxi NAND controller information
+ *
+ * @controller:		base controller structure
+ * @dev:		parent device (used to print error messages)
+ * @regs:		NAND controller registers
+ * @ahb_clk:		NAND Controller AHB clock
+ * @mod_clk:		NAND Controller mod clock
+ * @assigned_cs:	bitmask describing already assigned CS lines
+ * @clk_rate:		NAND controller current clock rate
+ * @chips:		a list containing all the NAND chips attached to
+ *			this NAND controller
+ * @complete:		a completion object used to wait for NAND
+ *			controller events
+ */
+struct sunxi_nfc {
+	struct nand_hw_control controller;
+	struct device *dev;
+	void __iomem *regs;
+	struct clk *ahb_clk;
+	struct clk *mod_clk;
+	unsigned long assigned_cs;
+	unsigned long clk_rate;
+	struct list_head chips;
+	struct completion complete;
+};
+
+static inline struct sunxi_nfc *to_sunxi_nfc(struct nand_hw_control *ctrl)
+{
+	return container_of(ctrl, struct sunxi_nfc, controller);
+}
+
+static irqreturn_t sunxi_nfc_interrupt(int irq, void *dev_id)
+{
+	struct sunxi_nfc *nfc = dev_id;
+	u32 st = readl(nfc->regs + NFC_REG_ST);
+	u32 ien = readl(nfc->regs + NFC_REG_INT);
+
+	if (!(ien & st))
+		return IRQ_NONE;
+
+	if ((ien & st) == ien)
+		complete(&nfc->complete);
+
+	writel(st & NFC_INT_MASK, nfc->regs + NFC_REG_ST);
+	writel(~st & ien & NFC_INT_MASK, nfc->regs + NFC_REG_INT);
+
+	return IRQ_HANDLED;
+}
+
+static int sunxi_nfc_wait_int(struct sunxi_nfc *nfc, u32 flags,
+			      unsigned int timeout_ms)
+{
+	init_completion(&nfc->complete);
+
+	writel(flags, nfc->regs + NFC_REG_INT);
+
+	if (!timeout_ms)
+		timeout_ms = NFC_DEFAULT_TIMEOUT_MS;
+
+	if (!wait_for_completion_timeout(&nfc->complete,
+					 msecs_to_jiffies(timeout_ms))) {
+		dev_err(nfc->dev, "wait interrupt timedout\n");
+		return -ETIMEDOUT;
+	}
+
+	return 0;
+}
+
+static int sunxi_nfc_wait_cmd_fifo_empty(struct sunxi_nfc *nfc)
+{
+	unsigned long timeout = jiffies +
+				msecs_to_jiffies(NFC_DEFAULT_TIMEOUT_MS);
+
+	do {
+		if (!(readl(nfc->regs + NFC_REG_ST) & NFC_CMD_FIFO_STATUS))
+			return 0;
+	} while (time_before(jiffies, timeout));
+
+	dev_err(nfc->dev, "wait for empty cmd FIFO timedout\n");
+	return -ETIMEDOUT;
+}
+
+static int sunxi_nfc_rst(struct sunxi_nfc *nfc)
+{
+	unsigned long timeout = jiffies +
+				msecs_to_jiffies(NFC_DEFAULT_TIMEOUT_MS);
+
+	writel(0, nfc->regs + NFC_REG_ECC_CTL);
+	writel(NFC_RESET, nfc->regs + NFC_REG_CTL);
+
+	do {
+		if (!(readl(nfc->regs + NFC_REG_CTL) & NFC_RESET))
+			return 0;
+	} while (time_before(jiffies, timeout));
+
+	dev_err(nfc->dev, "wait for NAND controller reset timedout\n");
+	return -ETIMEDOUT;
+}
+
+static int sunxi_nfc_dev_ready(struct mtd_info *mtd)
+{
+	struct nand_chip *nand = mtd->priv;
+	struct sunxi_nand_chip *sunxi_nand = to_sunxi_nand(nand);
+	struct sunxi_nfc *nfc = to_sunxi_nfc(sunxi_nand->nand.controller);
+	struct sunxi_nand_rb *rb;
+	unsigned long timeo = (sunxi_nand->nand.state == FL_ERASING ? 400 : 20);
+	int ret;
+
+	if (sunxi_nand->selected < 0)
+		return 0;
+
+	rb = &sunxi_nand->sels[sunxi_nand->selected].rb;
+
+	switch (rb->type) {
+	case RB_NATIVE:
+		ret = !!(readl(nfc->regs + NFC_REG_ST) &
+			 (NFC_RB_STATE0 << rb->info.nativeid));
+		if (ret)
+			break;
+
+		sunxi_nfc_wait_int(nfc, NFC_RB_B2R, timeo);
+		ret = !!(readl(nfc->regs + NFC_REG_ST) &
+			 (NFC_RB_STATE0 << rb->info.nativeid));
+		break;
+	case RB_GPIO:
+		ret = gpio_get_value(rb->info.gpio);
+		break;
+	case RB_NONE:
+	default:
+		ret = 0;
+		dev_err(nfc->dev, "cannot check R/B NAND status!\n");
+		break;
+	}
+
+	return ret;
+}
+
+static void sunxi_nfc_select_chip(struct mtd_info *mtd, int chip)
+{
+	struct nand_chip *nand = mtd->priv;
+	struct sunxi_nand_chip *sunxi_nand = to_sunxi_nand(nand);
+	struct sunxi_nfc *nfc = to_sunxi_nfc(sunxi_nand->nand.controller);
+	struct sunxi_nand_chip_sel *sel;
+	u32 ctl;
+
+	if (chip > 0 && chip >= sunxi_nand->nsels)
+		return;
+
+	if (chip == sunxi_nand->selected)
+		return;
+
+	ctl = readl(nfc->regs + NFC_REG_CTL) &
+	      ~(NFC_CE_SEL | NFC_RB_SEL | NFC_EN);
+
+	if (chip >= 0) {
+		sel = &sunxi_nand->sels[chip];
+
+		ctl |= (sel->cs << 24) | NFC_EN |
+		       (((nand->page_shift - 10) & 0xf) << 8);
+		if (sel->rb.type == RB_NONE) {
+			nand->dev_ready = NULL;
+		} else {
+			nand->dev_ready = sunxi_nfc_dev_ready;
+			if (sel->rb.type == RB_NATIVE)
+				ctl |= (sel->rb.info.nativeid << 3);
+		}
+
+		writel(mtd->writesize, nfc->regs + NFC_REG_SPARE_AREA);
+
+		if (nfc->clk_rate != sunxi_nand->clk_rate) {
+			clk_set_rate(nfc->mod_clk, sunxi_nand->clk_rate);
+			nfc->clk_rate = sunxi_nand->clk_rate;
+		}
+	}
+
+	writel(ctl, nfc->regs + NFC_REG_CTL);
+
+	sunxi_nand->selected = chip;
+}
+
+static void sunxi_nfc_read_buf(struct mtd_info *mtd, uint8_t *buf, int len)
+{
+	struct nand_chip *nand = mtd->priv;
+	struct sunxi_nand_chip *sunxi_nand = to_sunxi_nand(nand);
+	struct sunxi_nfc *nfc = to_sunxi_nfc(sunxi_nand->nand.controller);
+	int ret;
+	int cnt;
+	int offs = 0;
+	u32 tmp;
+
+	while (len > offs) {
+		cnt = min(len - offs, NFC_SRAM_SIZE);
+
+		ret = sunxi_nfc_wait_cmd_fifo_empty(nfc);
+		if (ret)
+			break;
+
+		writel(cnt, nfc->regs + NFC_REG_CNT);
+		tmp = NFC_DATA_TRANS | NFC_DATA_SWAP_METHOD;
+		writel(tmp, nfc->regs + NFC_REG_CMD);
+
+		ret = sunxi_nfc_wait_int(nfc, NFC_CMD_INT_FLAG, 0);
+		if (ret)
+			break;
+
+		if (buf)
+			memcpy_fromio(buf + offs, nfc->regs + NFC_RAM0_BASE,
+				      cnt);
+		offs += cnt;
+	}
+}
+
+static void sunxi_nfc_write_buf(struct mtd_info *mtd, const uint8_t *buf,
+				int len)
+{
+	struct nand_chip *nand = mtd->priv;
+	struct sunxi_nand_chip *sunxi_nand = to_sunxi_nand(nand);
+	struct sunxi_nfc *nfc = to_sunxi_nfc(sunxi_nand->nand.controller);
+	int ret;
+	int cnt;
+	int offs = 0;
+	u32 tmp;
+
+	while (len > offs) {
+		cnt = min(len - offs, NFC_SRAM_SIZE);
+
+		ret = sunxi_nfc_wait_cmd_fifo_empty(nfc);
+		if (ret)
+			break;
+
+		writel(cnt, nfc->regs + NFC_REG_CNT);
+		memcpy_toio(nfc->regs + NFC_RAM0_BASE, buf + offs, cnt);
+		tmp = NFC_DATA_TRANS | NFC_DATA_SWAP_METHOD |
+		      NFC_ACCESS_DIR;
+		writel(tmp, nfc->regs + NFC_REG_CMD);
+
+		ret = sunxi_nfc_wait_int(nfc, NFC_CMD_INT_FLAG, 0);
+		if (ret)
+			break;
+
+		offs += cnt;
+	}
+}
+
+static uint8_t sunxi_nfc_read_byte(struct mtd_info *mtd)
+{
+	uint8_t ret;
+
+	sunxi_nfc_read_buf(mtd, &ret, 1);
+
+	return ret;
+}
+
+static void sunxi_nfc_cmd_ctrl(struct mtd_info *mtd, int dat,
+			       unsigned int ctrl)
+{
+	struct nand_chip *nand = mtd->priv;
+	struct sunxi_nand_chip *sunxi_nand = to_sunxi_nand(nand);
+	struct sunxi_nfc *nfc = to_sunxi_nfc(sunxi_nand->nand.controller);
+	int ret;
+	u32 tmp;
+
+	ret = sunxi_nfc_wait_cmd_fifo_empty(nfc);
+	if (ret)
+		return;
+
+	if (ctrl & NAND_CTRL_CHANGE) {
+		tmp = readl(nfc->regs + NFC_REG_CTL);
+		if (ctrl & NAND_NCE)
+			tmp |= NFC_CE_CTL;
+		else
+			tmp &= ~NFC_CE_CTL;
+		writel(tmp, nfc->regs + NFC_REG_CTL);
+	}
+
+	if (dat == NAND_CMD_NONE)
+		return;
+
+	if (ctrl & NAND_CLE) {
+		writel(NFC_SEND_CMD1 | dat, nfc->regs + NFC_REG_CMD);
+	} else {
+		writel(dat, nfc->regs + NFC_REG_ADDR_LOW);
+		writel(NFC_SEND_ADR, nfc->regs + NFC_REG_CMD);
+	}
+
+	sunxi_nfc_wait_int(nfc, NFC_CMD_INT_FLAG, 0);
+}
+
+static int sunxi_nfc_hw_ecc_read_page(struct mtd_info *mtd,
+				      struct nand_chip *chip, uint8_t *buf,
+				      int oob_required, int page)
+{
+	struct sunxi_nfc *nfc = to_sunxi_nfc(chip->controller);
+	struct nand_ecc_ctrl *ecc = &chip->ecc;
+	struct nand_ecclayout *layout = ecc->layout;
+	struct sunxi_nand_hw_ecc *data = ecc->priv;
+	unsigned int max_bitflips = 0;
+	int offset;
+	int ret;
+	u32 tmp;
+	int i;
+	int cnt;
+
+	tmp = readl(nfc->regs + NFC_REG_ECC_CTL);
+	tmp &= ~(NFC_ECC_MODE | NFC_ECC_PIPELINE | NFC_ECC_BLOCK_SIZE);
+	tmp |= NFC_ECC_EN | (data->mode << NFC_ECC_MODE_SHIFT) |
+	       NFC_ECC_EXCEPTION;
+
+	writel(tmp, nfc->regs + NFC_REG_ECC_CTL);
+
+	for (i = 0; i < ecc->steps; i++) {
+		if (i)
+			chip->cmdfunc(mtd, NAND_CMD_RNDOUT, i * ecc->size, -1);
+
+		offset = mtd->writesize + layout->eccpos[i * ecc->bytes] - 4;
+
+		chip->read_buf(mtd, NULL, ecc->size);
+
+		chip->cmdfunc(mtd, NAND_CMD_RNDOUT, offset, -1);
+
+		ret = sunxi_nfc_wait_cmd_fifo_empty(nfc);
+		if (ret)
+			return ret;
+
+		tmp = NFC_DATA_TRANS | NFC_DATA_SWAP_METHOD | (1 << 30);
+		writel(tmp, nfc->regs + NFC_REG_CMD);
+
+		ret = sunxi_nfc_wait_int(nfc, NFC_CMD_INT_FLAG, 0);
+		if (ret)
+			return ret;
+
+		memcpy_fromio(buf + (i * ecc->size),
+			      nfc->regs + NFC_RAM0_BASE, ecc->size);
+
+		if (readl(nfc->regs + NFC_REG_ECC_ST) & 0x1) {
+			mtd->ecc_stats.failed++;
+		} else {
+			tmp = readl(nfc->regs + NFC_REG_ECC_CNT0) & 0xff;
+			mtd->ecc_stats.corrected += tmp;
+			max_bitflips = max_t(unsigned int, max_bitflips, tmp);
+		}
+
+		if (oob_required) {
+			chip->cmdfunc(mtd, NAND_CMD_RNDOUT, offset, -1);
+
+			ret = sunxi_nfc_wait_cmd_fifo_empty(nfc);
+			if (ret)
+				return ret;
+
+			offset -= mtd->writesize;
+			chip->read_buf(mtd, chip->oob_poi + offset,
+				      ecc->bytes + 4);
+		}
+	}
+
+	if (oob_required) {
+		cnt = ecc->layout->oobfree[ecc->steps].length;
+		if (cnt > 0) {
+			offset = mtd->writesize +
+				 ecc->layout->oobfree[ecc->steps].offset;
+			chip->cmdfunc(mtd, NAND_CMD_RNDOUT, offset, -1);
+			offset -= mtd->writesize;
+			chip->read_buf(mtd, chip->oob_poi + offset, cnt);
+		}
+	}
+
+	tmp = readl(nfc->regs + NFC_REG_ECC_CTL);
+	tmp &= ~NFC_ECC_EN;
+
+	writel(tmp, nfc->regs + NFC_REG_ECC_CTL);
+
+	return max_bitflips;
+}
+
+static int sunxi_nfc_hw_ecc_write_page(struct mtd_info *mtd,
+				       struct nand_chip *chip,
+				       const uint8_t *buf, int oob_required)
+{
+	struct sunxi_nfc *nfc = to_sunxi_nfc(chip->controller);
+	struct nand_ecc_ctrl *ecc = &chip->ecc;
+	struct nand_ecclayout *layout = ecc->layout;
+	struct sunxi_nand_hw_ecc *data = ecc->priv;
+	int offset;
+	int ret;
+	u32 tmp;
+	int i;
+	int cnt;
+
+	tmp = readl(nfc->regs + NFC_REG_ECC_CTL);
+	tmp &= ~(NFC_ECC_MODE | NFC_ECC_PIPELINE | NFC_ECC_BLOCK_SIZE);
+	tmp |= NFC_ECC_EN | (data->mode << NFC_ECC_MODE_SHIFT) |
+	       NFC_ECC_EXCEPTION;
+
+	writel(tmp, nfc->regs + NFC_REG_ECC_CTL);
+
+	for (i = 0; i < ecc->steps; i++) {
+		if (i)
+			chip->cmdfunc(mtd, NAND_CMD_RNDIN, i * ecc->size, -1);
+
+		chip->write_buf(mtd, buf + (i * ecc->size), ecc->size);
+
+		offset = layout->eccpos[i * ecc->bytes] - 4 + mtd->writesize;
+
+		/* Fill OOB data in */
+		if (oob_required) {
+			tmp = 0xffffffff;
+			memcpy_toio(nfc->regs + NFC_REG_USER_DATA_BASE, &tmp,
+				    4);
+		} else {
+			memcpy_toio(nfc->regs + NFC_REG_USER_DATA_BASE,
+				    chip->oob_poi + offset - mtd->writesize,
+				    4);
+		}
+
+		chip->cmdfunc(mtd, NAND_CMD_RNDIN, offset, -1);
+
+		ret = sunxi_nfc_wait_cmd_fifo_empty(nfc);
+		if (ret)
+			return ret;
+
+		tmp = NFC_DATA_TRANS | NFC_DATA_SWAP_METHOD | NFC_ACCESS_DIR |
+		      (1 << 30);
+		writel(tmp, nfc->regs + NFC_REG_CMD);
+		ret = sunxi_nfc_wait_int(nfc, NFC_CMD_INT_FLAG, 0);
+		if (ret)
+			return ret;
+	}
+
+	if (oob_required) {
+		cnt = ecc->layout->oobfree[i].length;
+		if (cnt > 0) {
+			offset = mtd->writesize +
+				 ecc->layout->oobfree[i].offset;
+			chip->cmdfunc(mtd, NAND_CMD_RNDIN, offset, -1);
+			offset -= mtd->writesize;
+			chip->write_buf(mtd, chip->oob_poi + offset, cnt);
+		}
+	}
+
+	tmp = readl(nfc->regs + NFC_REG_ECC_CTL);
+	tmp &= ~NFC_ECC_EN;
+
+	writel(tmp, nfc->regs + NFC_REG_ECC_CTL);
+
+	return 0;
+}
+
+static int sunxi_nfc_hw_syndrome_ecc_read_page(struct mtd_info *mtd,
+					       struct nand_chip *chip,
+					       uint8_t *buf, int oob_required,
+					       int page)
+{
+	struct sunxi_nfc *nfc = to_sunxi_nfc(chip->controller);
+	struct nand_ecc_ctrl *ecc = &chip->ecc;
+	struct sunxi_nand_hw_ecc *data = ecc->priv;
+	unsigned int max_bitflips = 0;
+	uint8_t *oob = chip->oob_poi;
+	int offset = 0;
+	int ret;
+	int cnt;
+	u32 tmp;
+	int i;
+
+	tmp = readl(nfc->regs + NFC_REG_ECC_CTL);
+	tmp &= ~(NFC_ECC_MODE | NFC_ECC_PIPELINE | NFC_ECC_BLOCK_SIZE);
+	tmp |= NFC_ECC_EN | (data->mode << NFC_ECC_MODE_SHIFT) |
+	       NFC_ECC_EXCEPTION;
+
+	writel(tmp, nfc->regs + NFC_REG_ECC_CTL);
+
+	for (i = 0; i < ecc->steps; i++) {
+		chip->read_buf(mtd, NULL, ecc->size);
+
+		tmp = NFC_DATA_TRANS | NFC_DATA_SWAP_METHOD | (1 << 30);
+		writel(tmp, nfc->regs + NFC_REG_CMD);
+
+		ret = sunxi_nfc_wait_int(nfc, NFC_CMD_INT_FLAG, 0);
+		if (ret)
+			return ret;
+
+		memcpy_fromio(buf, nfc->regs + NFC_RAM0_BASE, ecc->size);
+		buf += ecc->size;
+		offset += ecc->size;
+
+		if (readl(nfc->regs + NFC_REG_ECC_ST) & 0x1) {
+			mtd->ecc_stats.failed++;
+		} else {
+			tmp = readl(nfc->regs + NFC_REG_ECC_CNT0) & 0xff;
+			mtd->ecc_stats.corrected += tmp;
+			max_bitflips = max_t(unsigned int, max_bitflips, tmp);
+		}
+
+		if (oob_required) {
+			chip->cmdfunc(mtd, NAND_CMD_RNDOUT, offset, -1);
+			chip->read_buf(mtd, oob, ecc->bytes + ecc->prepad);
+			oob += ecc->bytes + ecc->prepad;
+		}
+
+		offset += ecc->bytes + ecc->prepad;
+	}
+
+	if (oob_required) {
+		cnt = mtd->oobsize - (oob - chip->oob_poi);
+		if (cnt > 0) {
+			chip->cmdfunc(mtd, NAND_CMD_RNDOUT, offset, -1);
+			chip->read_buf(mtd, oob, cnt);
+		}
+	}
+
+	writel(readl(nfc->regs + NFC_REG_ECC_CTL) & ~NFC_ECC_EN,
+	       nfc->regs + NFC_REG_ECC_CTL);
+
+	return max_bitflips;
+}
+
+static int sunxi_nfc_hw_syndrome_ecc_write_page(struct mtd_info *mtd,
+						struct nand_chip *chip,
+						const uint8_t *buf,
+						int oob_required)
+{
+	struct sunxi_nfc *nfc = to_sunxi_nfc(chip->controller);
+	struct nand_ecc_ctrl *ecc = &chip->ecc;
+	struct sunxi_nand_hw_ecc *data = ecc->priv;
+	uint8_t *oob = chip->oob_poi;
+	int offset = 0;
+	int ret;
+	int cnt;
+	u32 tmp;
+	int i;
+
+	tmp = readl(nfc->regs + NFC_REG_ECC_CTL);
+	tmp &= ~(NFC_ECC_MODE | NFC_ECC_PIPELINE | NFC_ECC_BLOCK_SIZE);
+	tmp |= NFC_ECC_EN | (data->mode << NFC_ECC_MODE_SHIFT) |
+	       NFC_ECC_EXCEPTION;
+
+	writel(tmp, nfc->regs + NFC_REG_ECC_CTL);
+
+	for (i = 0; i < ecc->steps; i++) {
+		chip->write_buf(mtd, buf + (i * ecc->size), ecc->size);
+		offset += ecc->size;
+
+		/* Fill OOB data in */
+		if (oob_required) {
+			tmp = 0xffffffff;
+			memcpy_toio(nfc->regs + NFC_REG_USER_DATA_BASE, &tmp,
+				    4);
+		} else {
+			memcpy_toio(nfc->regs + NFC_REG_USER_DATA_BASE, oob,
+				    4);
+		}
+
+		tmp = NFC_DATA_TRANS | NFC_DATA_SWAP_METHOD | NFC_ACCESS_DIR |
+		      (1 << 30);
+		writel(tmp, nfc->regs + NFC_REG_CMD);
+
+		ret = sunxi_nfc_wait_int(nfc, NFC_CMD_INT_FLAG, 0);
+		if (ret)
+			return ret;
+
+		offset += ecc->bytes + ecc->prepad;
+		oob += ecc->bytes + ecc->prepad;
+	}
+
+	if (oob_required) {
+		cnt = mtd->oobsize - (oob - chip->oob_poi);
+		if (cnt > 0) {
+			chip->cmdfunc(mtd, NAND_CMD_RNDIN, offset, -1);
+			chip->write_buf(mtd, oob, cnt);
+		}
+	}
+
+	tmp = readl(nfc->regs + NFC_REG_ECC_CTL);
+	tmp &= ~NFC_ECC_EN;
+
+	writel(tmp, nfc->regs + NFC_REG_ECC_CTL);
+
+	return 0;
+}
+
+static int sunxi_nand_chip_set_timings(struct sunxi_nand_chip *chip,
+				       const struct nand_sdr_timings *timings)
+{
+	u32 min_clk_period = 0;
+
+	/* T1 <=> tCLS */
+	if (timings->tCLS_min > min_clk_period)
+		min_clk_period = timings->tCLS_min;
+
+	/* T2 <=> tCLH */
+	if (timings->tCLH_min > min_clk_period)
+		min_clk_period = timings->tCLH_min;
+
+	/* T3 <=> tCS */
+	if (timings->tCS_min > min_clk_period)
+		min_clk_period = timings->tCS_min;
+
+	/* T4 <=> tCH */
+	if (timings->tCH_min > min_clk_period)
+		min_clk_period = timings->tCH_min;
+
+	/* T5 <=> tWP */
+	if (timings->tWP_min > min_clk_period)
+		min_clk_period = timings->tWP_min;
+
+	/* T6 <=> tWH */
+	if (timings->tWH_min > min_clk_period)
+		min_clk_period = timings->tWH_min;
+
+	/* T7 <=> tALS */
+	if (timings->tALS_min > min_clk_period)
+		min_clk_period = timings->tALS_min;
+
+	/* T8 <=> tDS */
+	if (timings->tDS_min > min_clk_period)
+		min_clk_period = timings->tDS_min;
+
+	/* T9 <=> tDH */
+	if (timings->tDH_min > min_clk_period)
+		min_clk_period = timings->tDH_min;
+
+	/* T10 <=> tRR */
+	if (timings->tRR_min > (min_clk_period * 3))
+		min_clk_period = DIV_ROUND_UP(timings->tRR_min, 3);
+
+	/* T11 <=> tALH */
+	if (timings->tALH_min > min_clk_period)
+		min_clk_period = timings->tALH_min;
+
+	/* T12 <=> tRP */
+	if (timings->tRP_min > min_clk_period)
+		min_clk_period = timings->tRP_min;
+
+	/* T13 <=> tREH */
+	if (timings->tREH_min > min_clk_period)
+		min_clk_period = timings->tREH_min;
+
+	/* T14 <=> tRC */
+	if (timings->tRC_min > (min_clk_period * 2))
+		min_clk_period = DIV_ROUND_UP(timings->tRC_min, 2);
+
+	/* T15 <=> tWC */
+	if (timings->tWC_min > (min_clk_period * 2))
+		min_clk_period = DIV_ROUND_UP(timings->tWC_min, 2);
+
+
+	/* Convert min_clk_period from picoseconds to nanoseconds */
+	min_clk_period = DIV_ROUND_UP(min_clk_period, 1000);
+
+	/*
+	 * Convert min_clk_period into a clk frequency, then get the
+	 * appropriate rate for the NAND controller IP given this formula
+	 * (specified in the datasheet):
+	 * nand clk_rate = 2 * min_clk_rate
+	 */
+	chip->clk_rate = (2 * NSEC_PER_SEC) / min_clk_period;
+
+	/* TODO: configure T16-T19 */
+
+	return 0;
+}
+
+static int sunxi_nand_chip_init_timings(struct sunxi_nand_chip *chip,
+					struct device_node *np)
+{
+	const struct nand_sdr_timings *timings;
+	int ret;
+	int mode;
+
+	mode = onfi_get_async_timing_mode(&chip->nand);
+	if (mode == ONFI_TIMING_MODE_UNKNOWN) {
+		mode = chip->nand.onfi_timing_mode_default;
+	} else {
+		uint8_t feature[ONFI_SUBFEATURE_PARAM_LEN] = {};
+
+		mode = fls(mode) - 1;
+		if (mode < 0)
+			mode = 0;
+
+		feature[0] = mode;
+		ret = chip->nand.onfi_set_features(&chip->mtd, &chip->nand,
+						ONFI_FEATURE_ADDR_TIMING_MODE,
+						feature);
+		if (ret)
+			return ret;
+	}
+
+	timings = onfi_async_timing_mode_to_sdr_timings(mode);
+	if (IS_ERR(timings))
+		return PTR_ERR(timings);
+
+	return sunxi_nand_chip_set_timings(chip, timings);
+}
+
+static int sunxi_nand_hw_common_ecc_ctrl_init(struct mtd_info *mtd,
+					      struct nand_ecc_ctrl *ecc,
+					      struct device_node *np)
+{
+	static const u8 strengths[] = { 16, 24, 28, 32, 40, 48, 56, 60, 64 };
+	struct nand_chip *nand = mtd->priv;
+	struct sunxi_nand_chip *sunxi_nand = to_sunxi_nand(nand);
+	struct sunxi_nfc *nfc = to_sunxi_nfc(sunxi_nand->nand.controller);
+	struct sunxi_nand_hw_ecc *data;
+	struct nand_ecclayout *layout;
+	int nsectors;
+	int ret;
+	int i;
+
+	data = kzalloc(sizeof(*data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	/* Add ECC info retrieval from DT */
+	for (i = 0; i < ARRAY_SIZE(strengths); i++) {
+		if (ecc->strength <= strengths[i])
+			break;
+	}
+
+	if (i >= ARRAY_SIZE(strengths)) {
+		dev_err(nfc->dev, "unsupported strength\n");
+		ret = -ENOTSUPP;
+		goto err;
+	}
+
+	data->mode = i;
+
+	/* HW ECC always request ECC bytes for 1024 bytes blocks */
+	ecc->bytes = DIV_ROUND_UP(ecc->strength * fls(8 * 1024), 8);
+
+	/* HW ECC always work with even numbers of ECC bytes */
+	ecc->bytes = ALIGN(ecc->bytes, 2);
+
+	layout = &data->layout;
+	nsectors = mtd->writesize / ecc->size;
+
+	if (mtd->oobsize < ((ecc->bytes + 4) * nsectors)) {
+		ret = -EINVAL;
+		goto err;
+	}
+
+	layout->eccbytes = (ecc->bytes * nsectors);
+
+	ecc->layout = layout;
+	ecc->priv = data;
+
+	return 0;
+
+err:
+	kfree(data);
+
+	return ret;
+}
+
+static void sunxi_nand_hw_common_ecc_ctrl_cleanup(struct nand_ecc_ctrl *ecc)
+{
+	kfree(ecc->priv);
+}
+
+static int sunxi_nand_hw_ecc_ctrl_init(struct mtd_info *mtd,
+				       struct nand_ecc_ctrl *ecc,
+				       struct device_node *np)
+{
+	struct nand_ecclayout *layout;
+	int nsectors;
+	int i, j;
+	int ret;
+
+	ret = sunxi_nand_hw_common_ecc_ctrl_init(mtd, ecc, np);
+	if (ret)
+		return ret;
+
+	ecc->read_page = sunxi_nfc_hw_ecc_read_page;
+	ecc->write_page = sunxi_nfc_hw_ecc_write_page;
+	layout = ecc->layout;
+	nsectors = mtd->writesize / ecc->size;
+
+	for (i = 0; i < nsectors; i++) {
+		if (i) {
+			layout->oobfree[i].offset =
+				layout->oobfree[i - 1].offset +
+				layout->oobfree[i - 1].length +
+				ecc->bytes;
+			layout->oobfree[i].length = 4;
+		} else {
+			/*
+			 * The first 2 bytes are used for BB markers, hence we
+			 * only have 2 bytes available in the first user data
+			 * section.
+			 */
+			layout->oobfree[i].length = 2;
+			layout->oobfree[i].offset = 2;
+		}
+
+		for (j = 0; j < ecc->bytes; j++)
+			layout->eccpos[(ecc->bytes * i) + j] =
+					layout->oobfree[i].offset +
+					layout->oobfree[i].length + j;
+	}
+
+	if (mtd->oobsize > (ecc->bytes + 4) * nsectors) {
+		layout->oobfree[nsectors].offset =
+				layout->oobfree[nsectors - 1].offset +
+				layout->oobfree[nsectors - 1].length +
+				ecc->bytes;
+		layout->oobfree[nsectors].length = mtd->oobsize -
+				((ecc->bytes + 4) * nsectors);
+	}
+
+	return 0;
+}
+
+static int sunxi_nand_hw_syndrome_ecc_ctrl_init(struct mtd_info *mtd,
+						struct nand_ecc_ctrl *ecc,
+						struct device_node *np)
+{
+	struct nand_ecclayout *layout;
+	int nsectors;
+	int i;
+	int ret;
+
+	ret = sunxi_nand_hw_common_ecc_ctrl_init(mtd, ecc, np);
+	if (ret)
+		return ret;
+
+	ecc->prepad = 4;
+	ecc->read_page = sunxi_nfc_hw_syndrome_ecc_read_page;
+	ecc->write_page = sunxi_nfc_hw_syndrome_ecc_write_page;
+
+	layout = ecc->layout;
+	nsectors = mtd->writesize / ecc->size;
+
+	for (i = 0; i < (ecc->bytes * nsectors); i++)
+		layout->eccpos[i] = i;
+
+	layout->oobfree[0].length = mtd->oobsize - i;
+	layout->oobfree[0].offset = i;
+
+	return 0;
+}
+
+static void sunxi_nand_ecc_cleanup(struct nand_ecc_ctrl *ecc)
+{
+	switch (ecc->mode) {
+	case NAND_ECC_HW:
+	case NAND_ECC_HW_SYNDROME:
+		sunxi_nand_hw_common_ecc_ctrl_cleanup(ecc);
+		break;
+	case NAND_ECC_NONE:
+		kfree(ecc->layout);
+	default:
+		break;
+	}
+}
+
+static int sunxi_nand_ecc_init(struct mtd_info *mtd, struct nand_ecc_ctrl *ecc,
+			       struct device_node *np)
+{
+	struct nand_chip *nand = mtd->priv;
+	int strength;
+	int blk_size;
+	int ret;
+
+	blk_size = of_get_nand_ecc_step_size(np);
+	strength = of_get_nand_ecc_strength(np);
+	if (blk_size > 0 && strength > 0) {
+		ecc->size = blk_size;
+		ecc->strength = strength;
+	} else {
+		ecc->size = nand->ecc_step_ds;
+		ecc->strength = nand->ecc_strength_ds;
+	}
+
+	if (!ecc->size || !ecc->strength)
+		return -EINVAL;
+
+	ecc->mode = NAND_ECC_HW;
+
+	ret = of_get_nand_ecc_mode(np);
+	if (ret >= 0)
+		ecc->mode = ret;
+
+	switch (ecc->mode) {
+	case NAND_ECC_SOFT_BCH:
+		ecc->bytes = DIV_ROUND_UP(ecc->strength * fls(8 * ecc->size),
+					  8);
+		break;
+	case NAND_ECC_HW:
+		ret = sunxi_nand_hw_ecc_ctrl_init(mtd, ecc, np);
+		if (ret)
+			return ret;
+		break;
+	case NAND_ECC_HW_SYNDROME:
+		ret = sunxi_nand_hw_syndrome_ecc_ctrl_init(mtd, ecc, np);
+		if (ret)
+			return ret;
+		break;
+	case NAND_ECC_NONE:
+		ecc->layout = kzalloc(sizeof(*ecc->layout), GFP_KERNEL);
+		if (!ecc->layout)
+			return -ENOMEM;
+		ecc->layout->oobfree[0].length = mtd->oobsize;
+	case NAND_ECC_SOFT:
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int sunxi_nand_chip_init(struct device *dev, struct sunxi_nfc *nfc,
+				struct device_node *np)
+{
+	const struct nand_sdr_timings *timings;
+	struct sunxi_nand_chip *chip;
+	struct mtd_part_parser_data ppdata;
+	struct mtd_info *mtd;
+	struct nand_chip *nand;
+	int nsels;
+	int ret;
+	int i;
+	u32 tmp;
+
+	if (!of_get_property(np, "reg", &nsels))
+		return -EINVAL;
+
+	nsels /= sizeof(u32);
+	if (!nsels) {
+		dev_err(dev, "invalid reg property size\n");
+		return -EINVAL;
+	}
+
+	chip = devm_kzalloc(dev,
+			    sizeof(*chip) +
+			    (nsels * sizeof(struct sunxi_nand_chip_sel)),
+			    GFP_KERNEL);
+	if (!chip) {
+		dev_err(dev, "could not allocate chip\n");
+		return -ENOMEM;
+	}
+
+	chip->nsels = nsels;
+	chip->selected = -1;
+
+	for (i = 0; i < nsels; i++) {
+		ret = of_property_read_u32_index(np, "reg", i, &tmp);
+		if (ret) {
+			dev_err(dev, "could not retrieve reg property: %d\n",
+				ret);
+			return ret;
+		}
+
+		if (tmp > NFC_MAX_CS) {
+			dev_err(dev,
+				"invalid reg value: %u (max CS = 7)\n",
+				tmp);
+			return -EINVAL;
+		}
+
+		if (test_and_set_bit(tmp, &nfc->assigned_cs)) {
+			dev_err(dev, "CS %d already assigned\n", tmp);
+			return -EINVAL;
+		}
+
+		chip->sels[i].cs = tmp;
+
+		if (!of_property_read_u32_index(np, "allwinner,rb", i, &tmp) &&
+		    tmp < 2) {
+			chip->sels[i].rb.type = RB_NATIVE;
+			chip->sels[i].rb.info.nativeid = tmp;
+		} else {
+			ret = of_get_named_gpio(np, "rb-gpios", i);
+			if (ret >= 0) {
+				tmp = ret;
+				chip->sels[i].rb.type = RB_GPIO;
+				chip->sels[i].rb.info.gpio = tmp;
+				ret = devm_gpio_request(dev, tmp, "nand-rb");
+				if (ret)
+					return ret;
+
+				ret = gpio_direction_input(tmp);
+				if (ret)
+					return ret;
+			} else {
+				chip->sels[i].rb.type = RB_NONE;
+			}
+		}
+	}
+
+	timings = onfi_async_timing_mode_to_sdr_timings(0);
+	if (IS_ERR(timings)) {
+		ret = PTR_ERR(timings);
+		dev_err(dev,
+			"could not retrieve timings for ONFI mode 0: %d\n",
+			ret);
+		return ret;
+	}
+
+	ret = sunxi_nand_chip_set_timings(chip, timings);
+	if (ret) {
+		dev_err(dev, "could not configure chip timings: %d\n", ret);
+		return ret;
+	}
+
+	nand = &chip->nand;
+	/* Default tR value specified in the ONFI spec (chapter 4.15.1) */
+	nand->chip_delay = 200;
+	nand->controller = &nfc->controller;
+	nand->select_chip = sunxi_nfc_select_chip;
+	nand->cmd_ctrl = sunxi_nfc_cmd_ctrl;
+	nand->read_buf = sunxi_nfc_read_buf;
+	nand->write_buf = sunxi_nfc_write_buf;
+	nand->read_byte = sunxi_nfc_read_byte;
+
+	if (of_get_nand_on_flash_bbt(np))
+		nand->bbt_options |= NAND_BBT_USE_FLASH | NAND_BBT_NO_OOB;
+
+	mtd = &chip->mtd;
+	mtd->dev.parent = dev;
+	mtd->priv = nand;
+	mtd->owner = THIS_MODULE;
+
+	ret = nand_scan_ident(mtd, nsels, NULL);
+	if (ret)
+		return ret;
+
+	ret = sunxi_nand_chip_init_timings(chip, np);
+	if (ret) {
+		dev_err(dev, "could not configure chip timings: %d\n", ret);
+		return ret;
+	}
+
+	ret = sunxi_nand_ecc_init(mtd, &nand->ecc, np);
+	if (ret) {
+		dev_err(dev, "ECC init failed: %d\n", ret);
+		return ret;
+	}
+
+	ret = nand_scan_tail(mtd);
+	if (ret) {
+		dev_err(dev, "nand_scan_tail failed: %d\n", ret);
+		return ret;
+	}
+
+	ppdata.of_node = np;
+	ret = mtd_device_parse_register(mtd, NULL, &ppdata, NULL, 0);
+	if (ret) {
+		dev_err(dev, "failed to register mtd device: %d\n", ret);
+		nand_release(mtd);
+		return ret;
+	}
+
+	list_add_tail(&chip->node, &nfc->chips);
+
+	return 0;
+}
+
+static int sunxi_nand_chips_init(struct device *dev, struct sunxi_nfc *nfc)
+{
+	struct device_node *np = dev->of_node;
+	struct device_node *nand_np;
+	int nchips = of_get_child_count(np);
+	int ret;
+
+	if (nchips > 8) {
+		dev_err(dev, "too many NAND chips: %d (max = 8)\n", nchips);
+		return -EINVAL;
+	}
+
+	for_each_child_of_node(np, nand_np) {
+		ret = sunxi_nand_chip_init(dev, nfc, nand_np);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static void sunxi_nand_chips_cleanup(struct sunxi_nfc *nfc)
+{
+	struct sunxi_nand_chip *chip;
+
+	while (!list_empty(&nfc->chips)) {
+		chip = list_first_entry(&nfc->chips, struct sunxi_nand_chip,
+					node);
+		nand_release(&chip->mtd);
+		sunxi_nand_ecc_cleanup(&chip->nand.ecc);
+	}
+}
+
+static int sunxi_nfc_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct resource *r;
+	struct sunxi_nfc *nfc;
+	int irq;
+	int ret;
+
+	nfc = devm_kzalloc(dev, sizeof(*nfc), GFP_KERNEL);
+	if (!nfc)
+		return -ENOMEM;
+
+	nfc->dev = dev;
+	spin_lock_init(&nfc->controller.lock);
+	init_waitqueue_head(&nfc->controller.wq);
+	INIT_LIST_HEAD(&nfc->chips);
+
+	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	nfc->regs = devm_ioremap_resource(dev, r);
+	if (IS_ERR(nfc->regs))
+		return PTR_ERR(nfc->regs);
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0) {
+		dev_err(dev, "failed to retrieve irq\n");
+		return irq;
+	}
+
+	nfc->ahb_clk = devm_clk_get(dev, "ahb");
+	if (IS_ERR(nfc->ahb_clk)) {
+		dev_err(dev, "failed to retrieve ahb clk\n");
+		return PTR_ERR(nfc->ahb_clk);
+	}
+
+	ret = clk_prepare_enable(nfc->ahb_clk);
+	if (ret)
+		return ret;
+
+	nfc->mod_clk = devm_clk_get(dev, "mod");
+	if (IS_ERR(nfc->mod_clk)) {
+		dev_err(dev, "failed to retrieve mod clk\n");
+		ret = PTR_ERR(nfc->mod_clk);
+		goto out_ahb_clk_unprepare;
+	}
+
+	ret = clk_prepare_enable(nfc->mod_clk);
+	if (ret)
+		goto out_ahb_clk_unprepare;
+
+	ret = sunxi_nfc_rst(nfc);
+	if (ret)
+		goto out_mod_clk_unprepare;
+
+	writel(0, nfc->regs + NFC_REG_INT);
+	ret = devm_request_irq(dev, irq, sunxi_nfc_interrupt,
+			       0, "sunxi-nand", nfc);
+	if (ret)
+		goto out_mod_clk_unprepare;
+
+	platform_set_drvdata(pdev, nfc);
+
+	/*
+	 * TODO: replace these magic values with proper flags as soon as we
+	 * know what they are encoding.
+	 */
+	writel(0x100, nfc->regs + NFC_REG_TIMING_CTL);
+	writel(0x7ff, nfc->regs + NFC_REG_TIMING_CFG);
+
+	ret = sunxi_nand_chips_init(dev, nfc);
+	if (ret) {
+		dev_err(dev, "failed to init nand chips\n");
+		goto out_mod_clk_unprepare;
+	}
+
+	return 0;
+
+out_mod_clk_unprepare:
+	clk_disable_unprepare(nfc->mod_clk);
+out_ahb_clk_unprepare:
+	clk_disable_unprepare(nfc->ahb_clk);
+
+	return ret;
+}
+
+static int sunxi_nfc_remove(struct platform_device *pdev)
+{
+	struct sunxi_nfc *nfc = platform_get_drvdata(pdev);
+
+	sunxi_nand_chips_cleanup(nfc);
+
+	return 0;
+}
+
+static const struct of_device_id sunxi_nfc_ids[] = {
+	{ .compatible = "allwinner,sun4i-a10-nand" },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, sunxi_nfc_ids);
+
+static struct platform_driver sunxi_nfc_driver = {
+	.driver = {
+		.name = "sunxi_nand",
+		.of_match_table = sunxi_nfc_ids,
+	},
+	.probe = sunxi_nfc_probe,
+	.remove = sunxi_nfc_remove,
+};
+module_platform_driver(sunxi_nfc_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Boris BREZILLON");
+MODULE_DESCRIPTION("Allwinner NAND Flash Controller driver");
+MODULE_ALIAS("platform:sunxi_nand");
diff --git a/drivers/mtd/spi-nor/fsl-quadspi.c b/drivers/mtd/spi-nor/fsl-quadspi.c
index 2fb07eced2b..39763b94f67 100644
--- a/drivers/mtd/spi-nor/fsl-quadspi.c
+++ b/drivers/mtd/spi-nor/fsl-quadspi.c
@@ -719,16 +719,10 @@ static int fsl_qspi_read(struct spi_nor *nor, loff_t from,
 {
 	struct fsl_qspi *q = nor->priv;
 	u8 cmd = nor->read_opcode;
-	int ret;
 
 	dev_dbg(q->dev, "cmd [%x],read from (0x%p, 0x%.8x, 0x%.8x),len:%d\n",
 		cmd, q->ahb_base, q->chip_base_addr, (unsigned int)from, len);
 
-	/* Wait until the previous command is finished. */
-	ret = nor->wait_till_ready(nor);
-	if (ret)
-		return ret;
-
 	/* Read out the data directly from the AHB buffer.*/
 	memcpy(buf, q->ahb_base + q->chip_base_addr + from, len);
 
@@ -744,16 +738,6 @@ static int fsl_qspi_erase(struct spi_nor *nor, loff_t offs)
 	dev_dbg(nor->dev, "%dKiB at 0x%08x:0x%08x\n",
 		nor->mtd->erasesize / 1024, q->chip_base_addr, (u32)offs);
 
-	/* Wait until finished previous write command. */
-	ret = nor->wait_till_ready(nor);
-	if (ret)
-		return ret;
-
-	/* Send write enable, then erase commands. */
-	ret = nor->write_reg(nor, SPINOR_OP_WREN, NULL, 0, 0);
-	if (ret)
-		return ret;
-
 	ret = fsl_qspi_runcmd(q, nor->erase_opcode, offs, 0);
 	if (ret)
 		return ret;
@@ -849,9 +833,8 @@ static int fsl_qspi_probe(struct platform_device *pdev)
 
 	ret = clk_prepare_enable(q->clk);
 	if (ret) {
-		clk_disable_unprepare(q->clk_en);
 		dev_err(dev, "can not enable the qspi clock\n");
-		goto map_failed;
+		goto clk_failed;
 	}
 
 	/* find the irq */
@@ -905,7 +888,8 @@ static int fsl_qspi_probe(struct platform_device *pdev)
 		nor->prepare = fsl_qspi_prep;
 		nor->unprepare = fsl_qspi_unprep;
 
-		if (of_modalias_node(np, modalias, sizeof(modalias)) < 0)
+		ret = of_modalias_node(np, modalias, sizeof(modalias));
+		if (ret < 0)
 			goto map_failed;
 
 		ret = of_property_read_u32(np, "spi-max-frequency",
@@ -964,6 +948,7 @@ last_init_failed:
 
 irq_failed:
 	clk_disable_unprepare(q->clk);
+clk_failed:
 	clk_disable_unprepare(q->clk_en);
 map_failed:
 	dev_err(dev, "Freescale QuadSPI probe failed\n");
diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c
index c51ee52386a..0f8ec3c2d01 100644
--- a/drivers/mtd/spi-nor/spi-nor.c
+++ b/drivers/mtd/spi-nor/spi-nor.c
@@ -26,7 +26,38 @@
 /* Define max times to check status register before we give up. */
 #define	MAX_READY_WAIT_JIFFIES	(40 * HZ) /* M25P16 specs 40s max chip erase */
 
-#define JEDEC_MFR(_jedec_id)	((_jedec_id) >> 16)
+#define SPI_NOR_MAX_ID_LEN	6
+
+struct flash_info {
+	/*
+	 * This array stores the ID bytes.
+	 * The first three bytes are the JEDIC ID.
+	 * JEDEC ID zero means "no ID" (mostly older chips).
+	 */
+	u8		id[SPI_NOR_MAX_ID_LEN];
+	u8		id_len;
+
+	/* The size listed here is what works with SPINOR_OP_SE, which isn't
+	 * necessarily called a "sector" by the vendor.
+	 */
+	unsigned	sector_size;
+	u16		n_sectors;
+
+	u16		page_size;
+	u16		addr_width;
+
+	u16		flags;
+#define	SECT_4K			0x01	/* SPINOR_OP_BE_4K works uniformly */
+#define	SPI_NOR_NO_ERASE	0x02	/* No erase command needed */
+#define	SST_WRITE		0x04	/* use SST byte programming */
+#define	SPI_NOR_NO_FR		0x08	/* Can't do fastread */
+#define	SECT_4K_PMC		0x10	/* SPINOR_OP_BE_4K_PMC works uniformly */
+#define	SPI_NOR_DUAL_READ	0x20    /* Flash supports Dual Read */
+#define	SPI_NOR_QUAD_READ	0x40    /* Flash supports Quad Read */
+#define	USE_FSR			0x80	/* use flag status register */
+};
+
+#define JEDEC_MFR(info)	((info)->id[0])
 
 static const struct spi_device_id *spi_nor_match_id(const char *name);
 
@@ -98,7 +129,7 @@ static inline int spi_nor_read_dummy_cycles(struct spi_nor *nor)
 	case SPI_NOR_FAST:
 	case SPI_NOR_DUAL:
 	case SPI_NOR_QUAD:
-		return 1;
+		return 8;
 	case SPI_NOR_NORMAL:
 		return 0;
 	}
@@ -138,13 +169,14 @@ static inline struct spi_nor *mtd_to_spi_nor(struct mtd_info *mtd)
 }
 
 /* Enable/disable 4-byte addressing mode. */
-static inline int set_4byte(struct spi_nor *nor, u32 jedec_id, int enable)
+static inline int set_4byte(struct spi_nor *nor, struct flash_info *info,
+			    int enable)
 {
 	int status;
 	bool need_wren = false;
 	u8 cmd;
 
-	switch (JEDEC_MFR(jedec_id)) {
+	switch (JEDEC_MFR(info)) {
 	case CFI_MFR_ST: /* Micron, actually */
 		/* Some Micron need WREN command; all will accept it */
 		need_wren = true;
@@ -165,81 +197,74 @@ static inline int set_4byte(struct spi_nor *nor, u32 jedec_id, int enable)
 		return nor->write_reg(nor, SPINOR_OP_BRWR, nor->cmd_buf, 1, 0);
 	}
 }
-
-static int spi_nor_wait_till_ready(struct spi_nor *nor)
+static inline int spi_nor_sr_ready(struct spi_nor *nor)
 {
-	unsigned long deadline;
-	int sr;
-
-	deadline = jiffies + MAX_READY_WAIT_JIFFIES;
-
-	do {
-		cond_resched();
+	int sr = read_sr(nor);
+	if (sr < 0)
+		return sr;
+	else
+		return !(sr & SR_WIP);
+}
 
-		sr = read_sr(nor);
-		if (sr < 0)
-			break;
-		else if (!(sr & SR_WIP))
-			return 0;
-	} while (!time_after_eq(jiffies, deadline));
+static inline int spi_nor_fsr_ready(struct spi_nor *nor)
+{
+	int fsr = read_fsr(nor);
+	if (fsr < 0)
+		return fsr;
+	else
+		return fsr & FSR_READY;
+}
 
-	return -ETIMEDOUT;
+static int spi_nor_ready(struct spi_nor *nor)
+{
+	int sr, fsr;
+	sr = spi_nor_sr_ready(nor);
+	if (sr < 0)
+		return sr;
+	fsr = nor->flags & SNOR_F_USE_FSR ? spi_nor_fsr_ready(nor) : 1;
+	if (fsr < 0)
+		return fsr;
+	return sr && fsr;
 }
 
-static int spi_nor_wait_till_fsr_ready(struct spi_nor *nor)
+/*
+ * Service routine to read status register until ready, or timeout occurs.
+ * Returns non-zero if error.
+ */
+static int spi_nor_wait_till_ready(struct spi_nor *nor)
 {
 	unsigned long deadline;
-	int sr;
-	int fsr;
+	int timeout = 0, ret;
 
 	deadline = jiffies + MAX_READY_WAIT_JIFFIES;
 
-	do {
+	while (!timeout) {
+		if (time_after_eq(jiffies, deadline))
+			timeout = 1;
+
+		ret = spi_nor_ready(nor);
+		if (ret < 0)
+			return ret;
+		if (ret)
+			return 0;
+
 		cond_resched();
+	}
 
-		sr = read_sr(nor);
-		if (sr < 0) {
-			break;
-		} else if (!(sr & SR_WIP)) {
-			fsr = read_fsr(nor);
-			if (fsr < 0)
-				break;
-			if (fsr & FSR_READY)
-				return 0;
-		}
-	} while (!time_after_eq(jiffies, deadline));
+	dev_err(nor->dev, "flash operation timed out\n");
 
 	return -ETIMEDOUT;
 }
 
 /*
- * Service routine to read status register until ready, or timeout occurs.
- * Returns non-zero if error.
- */
-static int wait_till_ready(struct spi_nor *nor)
-{
-	return nor->wait_till_ready(nor);
-}
-
-/*
  * Erase the whole flash memory
  *
  * Returns 0 if successful, non-zero otherwise.
  */
 static int erase_chip(struct spi_nor *nor)
 {
-	int ret;
-
 	dev_dbg(nor->dev, " %lldKiB\n", (long long)(nor->mtd->size >> 10));
 
-	/* Wait until finished previous write command. */
-	ret = wait_till_ready(nor);
-	if (ret)
-		return ret;
-
-	/* Send write enable, then erase commands. */
-	write_enable(nor);
-
 	return nor->write_reg(nor, SPINOR_OP_CHIP_ERASE, NULL, 0, 0);
 }
 
@@ -294,11 +319,17 @@ static int spi_nor_erase(struct mtd_info *mtd, struct erase_info *instr)
 
 	/* whole-chip erase? */
 	if (len == mtd->size) {
+		write_enable(nor);
+
 		if (erase_chip(nor)) {
 			ret = -EIO;
 			goto erase_err;
 		}
 
+		ret = spi_nor_wait_till_ready(nor);
+		if (ret)
+			goto erase_err;
+
 	/* REVISIT in some cases we could speed up erasing large regions
 	 * by using SPINOR_OP_SE instead of SPINOR_OP_BE_4K.  We may have set up
 	 * to use "small sector erase", but that's not always optimal.
@@ -307,6 +338,8 @@ static int spi_nor_erase(struct mtd_info *mtd, struct erase_info *instr)
 	/* "sector"-at-a-time erase */
 	} else {
 		while (len) {
+			write_enable(nor);
+
 			if (nor->erase(nor, addr)) {
 				ret = -EIO;
 				goto erase_err;
@@ -314,9 +347,15 @@ static int spi_nor_erase(struct mtd_info *mtd, struct erase_info *instr)
 
 			addr += mtd->erasesize;
 			len -= mtd->erasesize;
+
+			ret = spi_nor_wait_till_ready(nor);
+			if (ret)
+				goto erase_err;
 		}
 	}
 
+	write_disable(nor);
+
 	spi_nor_unlock_and_unprep(nor, SPI_NOR_OPS_ERASE);
 
 	instr->state = MTD_ERASE_DONE;
@@ -341,11 +380,6 @@ static int spi_nor_lock(struct mtd_info *mtd, loff_t ofs, uint64_t len)
 	if (ret)
 		return ret;
 
-	/* Wait until finished previous command */
-	ret = wait_till_ready(nor);
-	if (ret)
-		goto err;
-
 	status_old = read_sr(nor);
 
 	if (offset < mtd->size - (mtd->size / 2))
@@ -388,11 +422,6 @@ static int spi_nor_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len)
 	if (ret)
 		return ret;
 
-	/* Wait until finished previous command */
-	ret = wait_till_ready(nor);
-	if (ret)
-		goto err;
-
 	status_old = read_sr(nor);
 
 	if (offset+len > mtd->size - (mtd->size / 64))
@@ -424,38 +453,34 @@ err:
 	return ret;
 }
 
-struct flash_info {
-	/* JEDEC id zero means "no ID" (most older chips); otherwise it has
-	 * a high byte of zero plus three data bytes: the manufacturer id,
-	 * then a two byte device id.
-	 */
-	u32		jedec_id;
-	u16             ext_id;
-
-	/* The size listed here is what works with SPINOR_OP_SE, which isn't
-	 * necessarily called a "sector" by the vendor.
-	 */
-	unsigned	sector_size;
-	u16		n_sectors;
-
-	u16		page_size;
-	u16		addr_width;
-
-	u16		flags;
-#define	SECT_4K			0x01	/* SPINOR_OP_BE_4K works uniformly */
-#define	SPI_NOR_NO_ERASE	0x02	/* No erase command needed */
-#define	SST_WRITE		0x04	/* use SST byte programming */
-#define	SPI_NOR_NO_FR		0x08	/* Can't do fastread */
-#define	SECT_4K_PMC		0x10	/* SPINOR_OP_BE_4K_PMC works uniformly */
-#define	SPI_NOR_DUAL_READ	0x20    /* Flash supports Dual Read */
-#define	SPI_NOR_QUAD_READ	0x40    /* Flash supports Quad Read */
-#define	USE_FSR			0x80	/* use flag status register */
-};
-
+/* Used when the "_ext_id" is two bytes at most */
 #define INFO(_jedec_id, _ext_id, _sector_size, _n_sectors, _flags)	\
 	((kernel_ulong_t)&(struct flash_info) {				\
-		.jedec_id = (_jedec_id),				\
-		.ext_id = (_ext_id),					\
+		.id = {							\
+			((_jedec_id) >> 16) & 0xff,			\
+			((_jedec_id) >> 8) & 0xff,			\
+			(_jedec_id) & 0xff,				\
+			((_ext_id) >> 8) & 0xff,			\
+			(_ext_id) & 0xff,				\
+			},						\
+		.id_len = (!(_jedec_id) ? 0 : (3 + ((_ext_id) ? 2 : 0))),	\
+		.sector_size = (_sector_size),				\
+		.n_sectors = (_n_sectors),				\
+		.page_size = 256,					\
+		.flags = (_flags),					\
+	})
+
+#define INFO6(_jedec_id, _ext_id, _sector_size, _n_sectors, _flags)	\
+	((kernel_ulong_t)&(struct flash_info) {				\
+		.id = {							\
+			((_jedec_id) >> 16) & 0xff,			\
+			((_jedec_id) >> 8) & 0xff,			\
+			(_jedec_id) & 0xff,				\
+			((_ext_id) >> 16) & 0xff,			\
+			((_ext_id) >> 8) & 0xff,			\
+			(_ext_id) & 0xff,				\
+			},						\
+		.id_len = 6,						\
 		.sector_size = (_sector_size),				\
 		.n_sectors = (_n_sectors),				\
 		.page_size = 256,					\
@@ -507,6 +532,9 @@ static const struct spi_device_id spi_nor_ids[] = {
 	{ "mr25h256", CAT25_INFO( 32 * 1024, 1, 256, 2, SPI_NOR_NO_ERASE | SPI_NOR_NO_FR) },
 	{ "mr25h10",  CAT25_INFO(128 * 1024, 1, 256, 3, SPI_NOR_NO_ERASE | SPI_NOR_NO_FR) },
 
+	/* Fujitsu */
+	{ "mb85rs1mt", INFO(0x047f27, 0, 128 * 1024, 1, SPI_NOR_NO_ERASE) },
+
 	/* GigaDevice */
 	{ "gd25q32", INFO(0xc84016, 0, 64 * 1024,  64, SECT_4K) },
 	{ "gd25q64", INFO(0xc84017, 0, 64 * 1024, 128, SECT_4K) },
@@ -532,6 +560,7 @@ static const struct spi_device_id spi_nor_ids[] = {
 	{ "mx66l1g55g",  INFO(0xc2261b, 0, 64 * 1024, 2048, SPI_NOR_QUAD_READ) },
 
 	/* Micron */
+	{ "n25q032",	 INFO(0x20ba16, 0, 64 * 1024,   64, 0) },
 	{ "n25q064",     INFO(0x20ba17, 0, 64 * 1024,  128, 0) },
 	{ "n25q128a11",  INFO(0x20bb18, 0, 64 * 1024,  256, 0) },
 	{ "n25q128a13",  INFO(0x20ba18, 0, 64 * 1024,  256, 0) },
@@ -556,6 +585,7 @@ static const struct spi_device_id spi_nor_ids[] = {
 	{ "s70fl01gs",  INFO(0x010221, 0x4d00, 256 * 1024, 256, 0) },
 	{ "s25sl12800", INFO(0x012018, 0x0300, 256 * 1024,  64, 0) },
 	{ "s25sl12801", INFO(0x012018, 0x0301,  64 * 1024, 256, 0) },
+	{ "s25fl128s",	INFO6(0x012018, 0x4d0180, 64 * 1024, 256, SPI_NOR_QUAD_READ) },
 	{ "s25fl129p0", INFO(0x012018, 0x4d00, 256 * 1024,  64, 0) },
 	{ "s25fl129p1", INFO(0x012018, 0x4d01,  64 * 1024, 256, 0) },
 	{ "s25sl004a",  INFO(0x010212,      0,  64 * 1024,   8, 0) },
@@ -566,6 +596,7 @@ static const struct spi_device_id spi_nor_ids[] = {
 	{ "s25fl008k",  INFO(0xef4014,      0,  64 * 1024,  16, SECT_4K) },
 	{ "s25fl016k",  INFO(0xef4015,      0,  64 * 1024,  32, SECT_4K) },
 	{ "s25fl064k",  INFO(0xef4017,      0,  64 * 1024, 128, SECT_4K) },
+	{ "s25fl132k",  INFO(0x014016,      0,  64 * 1024,  64, 0) },
 
 	/* SST -- large erase sizes are "overlays", "sectors" are 4K */
 	{ "sst25vf040b", INFO(0xbf258d, 0, 64 * 1024,  8, SECT_4K | SST_WRITE) },
@@ -577,6 +608,7 @@ static const struct spi_device_id spi_nor_ids[] = {
 	{ "sst25wf010",  INFO(0xbf2502, 0, 64 * 1024,  2, SECT_4K | SST_WRITE) },
 	{ "sst25wf020",  INFO(0xbf2503, 0, 64 * 1024,  4, SECT_4K | SST_WRITE) },
 	{ "sst25wf040",  INFO(0xbf2504, 0, 64 * 1024,  8, SECT_4K | SST_WRITE) },
+	{ "sst25wf080",  INFO(0xbf2505, 0, 64 * 1024, 16, SECT_4K | SST_WRITE) },
 
 	/* ST Microelectronics -- newer production may have feature updates */
 	{ "m25p05",  INFO(0x202010,  0,  32 * 1024,   2, 0) },
@@ -588,7 +620,6 @@ static const struct spi_device_id spi_nor_ids[] = {
 	{ "m25p32",  INFO(0x202016,  0,  64 * 1024,  64, 0) },
 	{ "m25p64",  INFO(0x202017,  0,  64 * 1024, 128, 0) },
 	{ "m25p128", INFO(0x202018,  0, 256 * 1024,  64, 0) },
-	{ "n25q032", INFO(0x20ba16,  0,  64 * 1024,  64, 0) },
 
 	{ "m25p05-nonjedec",  INFO(0, 0,  32 * 1024,   2, 0) },
 	{ "m25p10-nonjedec",  INFO(0, 0,  32 * 1024,   4, 0) },
@@ -643,32 +674,24 @@ static const struct spi_device_id spi_nor_ids[] = {
 static const struct spi_device_id *spi_nor_read_id(struct spi_nor *nor)
 {
 	int			tmp;
-	u8			id[5];
-	u32			jedec;
-	u16                     ext_jedec;
+	u8			id[SPI_NOR_MAX_ID_LEN];
 	struct flash_info	*info;
 
-	tmp = nor->read_reg(nor, SPINOR_OP_RDID, id, 5);
+	tmp = nor->read_reg(nor, SPINOR_OP_RDID, id, SPI_NOR_MAX_ID_LEN);
 	if (tmp < 0) {
 		dev_dbg(nor->dev, " error %d reading JEDEC ID\n", tmp);
 		return ERR_PTR(tmp);
 	}
-	jedec = id[0];
-	jedec = jedec << 8;
-	jedec |= id[1];
-	jedec = jedec << 8;
-	jedec |= id[2];
-
-	ext_jedec = id[3] << 8 | id[4];
 
 	for (tmp = 0; tmp < ARRAY_SIZE(spi_nor_ids) - 1; tmp++) {
 		info = (void *)spi_nor_ids[tmp].driver_data;
-		if (info->jedec_id == jedec) {
-			if (info->ext_id == 0 || info->ext_id == ext_jedec)
+		if (info->id_len) {
+			if (!memcmp(info->id, id, info->id_len))
 				return &spi_nor_ids[tmp];
 		}
 	}
-	dev_err(nor->dev, "unrecognized JEDEC id %06x\n", jedec);
+	dev_err(nor->dev, "unrecognized JEDEC id bytes: %02x, %2x, %2x\n",
+		id[0], id[1], id[2]);
 	return ERR_PTR(-ENODEV);
 }
 
@@ -703,11 +726,6 @@ static int sst_write(struct mtd_info *mtd, loff_t to, size_t len,
 	if (ret)
 		return ret;
 
-	/* Wait until finished previous write command. */
-	ret = wait_till_ready(nor);
-	if (ret)
-		goto time_out;
-
 	write_enable(nor);
 
 	nor->sst_write_second = false;
@@ -719,7 +737,7 @@ static int sst_write(struct mtd_info *mtd, loff_t to, size_t len,
 
 		/* write one byte. */
 		nor->write(nor, to, 1, retlen, buf);
-		ret = wait_till_ready(nor);
+		ret = spi_nor_wait_till_ready(nor);
 		if (ret)
 			goto time_out;
 	}
@@ -731,7 +749,7 @@ static int sst_write(struct mtd_info *mtd, loff_t to, size_t len,
 
 		/* write two bytes. */
 		nor->write(nor, to, 2, retlen, buf + actual);
-		ret = wait_till_ready(nor);
+		ret = spi_nor_wait_till_ready(nor);
 		if (ret)
 			goto time_out;
 		to += 2;
@@ -740,7 +758,7 @@ static int sst_write(struct mtd_info *mtd, loff_t to, size_t len,
 	nor->sst_write_second = false;
 
 	write_disable(nor);
-	ret = wait_till_ready(nor);
+	ret = spi_nor_wait_till_ready(nor);
 	if (ret)
 		goto time_out;
 
@@ -751,7 +769,7 @@ static int sst_write(struct mtd_info *mtd, loff_t to, size_t len,
 		nor->program_opcode = SPINOR_OP_BP;
 		nor->write(nor, to, 1, retlen, buf + actual);
 
-		ret = wait_till_ready(nor);
+		ret = spi_nor_wait_till_ready(nor);
 		if (ret)
 			goto time_out;
 		write_disable(nor);
@@ -779,11 +797,6 @@ static int spi_nor_write(struct mtd_info *mtd, loff_t to, size_t len,
 	if (ret)
 		return ret;
 
-	/* Wait until finished previous write command. */
-	ret = wait_till_ready(nor);
-	if (ret)
-		goto write_err;
-
 	write_enable(nor);
 
 	page_offset = to & (nor->page_size - 1);
@@ -802,16 +815,20 @@ static int spi_nor_write(struct mtd_info *mtd, loff_t to, size_t len,
 			if (page_size > nor->page_size)
 				page_size = nor->page_size;
 
-			wait_till_ready(nor);
+			ret = spi_nor_wait_till_ready(nor);
+			if (ret)
+				goto write_err;
+
 			write_enable(nor);
 
 			nor->write(nor, to + i, page_size, retlen, buf + i);
 		}
 	}
 
+	ret = spi_nor_wait_till_ready(nor);
 write_err:
 	spi_nor_unlock_and_unprep(nor, SPI_NOR_OPS_WRITE);
-	return 0;
+	return ret;
 }
 
 static int macronix_quad_enable(struct spi_nor *nor)
@@ -824,7 +841,7 @@ static int macronix_quad_enable(struct spi_nor *nor)
 	nor->cmd_buf[0] = val | SR_QUAD_EN_MX;
 	nor->write_reg(nor, SPINOR_OP_WRSR, nor->cmd_buf, 1, 0);
 
-	if (wait_till_ready(nor))
+	if (spi_nor_wait_till_ready(nor))
 		return 1;
 
 	ret = read_sr(nor);
@@ -874,11 +891,11 @@ static int spansion_quad_enable(struct spi_nor *nor)
 	return 0;
 }
 
-static int set_quad_mode(struct spi_nor *nor, u32 jedec_id)
+static int set_quad_mode(struct spi_nor *nor, struct flash_info *info)
 {
 	int status;
 
-	switch (JEDEC_MFR(jedec_id)) {
+	switch (JEDEC_MFR(info)) {
 	case CFI_MFR_MACRONIX:
 		status = macronix_quad_enable(nor);
 		if (status) {
@@ -904,11 +921,6 @@ static int spi_nor_check(struct spi_nor *nor)
 		return -EINVAL;
 	}
 
-	if (!nor->read_id)
-		nor->read_id = spi_nor_read_id;
-	if (!nor->wait_till_ready)
-		nor->wait_till_ready = spi_nor_wait_till_ready;
-
 	return 0;
 }
 
@@ -926,16 +938,24 @@ int spi_nor_scan(struct spi_nor *nor, const char *name, enum read_mode mode)
 	if (ret)
 		return ret;
 
-	id = spi_nor_match_id(name);
-	if (!id)
+	/* Try to auto-detect if chip name wasn't specified */
+	if (!name)
+		id = spi_nor_read_id(nor);
+	else
+		id = spi_nor_match_id(name);
+	if (IS_ERR_OR_NULL(id))
 		return -ENOENT;
 
 	info = (void *)id->driver_data;
 
-	if (info->jedec_id) {
+	/*
+	 * If caller has specified name of flash model that can normally be
+	 * detected using JEDEC, let's verify it.
+	 */
+	if (name && info->id_len) {
 		const struct spi_device_id *jid;
 
-		jid = nor->read_id(nor);
+		jid = spi_nor_read_id(nor);
 		if (IS_ERR(jid)) {
 			return PTR_ERR(jid);
 		} else if (jid != id) {
@@ -960,9 +980,9 @@ int spi_nor_scan(struct spi_nor *nor, const char *name, enum read_mode mode)
 	 * up with the software protection bits set
 	 */
 
-	if (JEDEC_MFR(info->jedec_id) == CFI_MFR_ATMEL ||
-	    JEDEC_MFR(info->jedec_id) == CFI_MFR_INTEL ||
-	    JEDEC_MFR(info->jedec_id) == CFI_MFR_SST) {
+	if (JEDEC_MFR(info) == CFI_MFR_ATMEL ||
+	    JEDEC_MFR(info) == CFI_MFR_INTEL ||
+	    JEDEC_MFR(info) == CFI_MFR_SST) {
 		write_enable(nor);
 		write_sr(nor, 0);
 	}
@@ -977,7 +997,7 @@ int spi_nor_scan(struct spi_nor *nor, const char *name, enum read_mode mode)
 	mtd->_read = spi_nor_read;
 
 	/* nor protection support for STmicro chips */
-	if (JEDEC_MFR(info->jedec_id) == CFI_MFR_ST) {
+	if (JEDEC_MFR(info) == CFI_MFR_ST) {
 		mtd->_lock = spi_nor_lock;
 		mtd->_unlock = spi_nor_unlock;
 	}
@@ -988,9 +1008,8 @@ int spi_nor_scan(struct spi_nor *nor, const char *name, enum read_mode mode)
 	else
 		mtd->_write = spi_nor_write;
 
-	if ((info->flags & USE_FSR) &&
-	    nor->wait_till_ready == spi_nor_wait_till_ready)
-		nor->wait_till_ready = spi_nor_wait_till_fsr_ready;
+	if (info->flags & USE_FSR)
+		nor->flags |= SNOR_F_USE_FSR;
 
 #ifdef CONFIG_MTD_SPI_NOR_USE_4K_SECTORS
 	/* prefer "small sector" erase if possible */
@@ -1031,7 +1050,7 @@ int spi_nor_scan(struct spi_nor *nor, const char *name, enum read_mode mode)
 
 	/* Quad/Dual-read mode takes precedence over fast/normal */
 	if (mode == SPI_NOR_QUAD && info->flags & SPI_NOR_QUAD_READ) {
-		ret = set_quad_mode(nor, info->jedec_id);
+		ret = set_quad_mode(nor, info);
 		if (ret) {
 			dev_err(dev, "quad mode not supported\n");
 			return ret;
@@ -1067,7 +1086,7 @@ int spi_nor_scan(struct spi_nor *nor, const char *name, enum read_mode mode)
 	else if (mtd->size > 0x1000000) {
 		/* enable 4-byte addressing if the device exceeds 16MiB */
 		nor->addr_width = 4;
-		if (JEDEC_MFR(info->jedec_id) == CFI_MFR_AMD) {
+		if (JEDEC_MFR(info) == CFI_MFR_AMD) {
 			/* Dedicated 4-byte command set */
 			switch (nor->flash_read) {
 			case SPI_NOR_QUAD:
@@ -1088,7 +1107,7 @@ int spi_nor_scan(struct spi_nor *nor, const char *name, enum read_mode mode)
 			nor->erase_opcode = SPINOR_OP_SE_4B;
 			mtd->erasesize = info->sector_size;
 		} else
-			set_4byte(nor, info->jedec_id, 1);
+			set_4byte(nor, info, 1);
 	} else {
 		nor->addr_width = 3;
 	}
diff --git a/drivers/mtd/tests/oobtest.c b/drivers/mtd/tests/oobtest.c
index dc4f9602b97..5e061186eab 100644
--- a/drivers/mtd/tests/oobtest.c
+++ b/drivers/mtd/tests/oobtest.c
@@ -34,8 +34,11 @@
 #include "mtd_test.h"
 
 static int dev = -EINVAL;
+static int bitflip_limit;
 module_param(dev, int, S_IRUGO);
 MODULE_PARM_DESC(dev, "MTD device number to use");
+module_param(bitflip_limit, int, S_IRUGO);
+MODULE_PARM_DESC(bitflip_limit, "Max. allowed bitflips per page");
 
 static struct mtd_info *mtd;
 static unsigned char *readbuf;
@@ -115,12 +118,36 @@ static int write_whole_device(void)
 	return 0;
 }
 
+/*
+ * Display the address, offset and data bytes at comparison failure.
+ * Return number of bitflips encountered.
+ */
+static size_t memcmpshow(loff_t addr, const void *cs, const void *ct, size_t count)
+{
+	const unsigned char *su1, *su2;
+	int res;
+	size_t i = 0;
+	size_t bitflips = 0;
+
+	for (su1 = cs, su2 = ct; 0 < count; ++su1, ++su2, count--, i++) {
+		res = *su1 ^ *su2;
+		if (res) {
+			pr_info("error @addr[0x%lx:0x%zx] 0x%x -> 0x%x diff 0x%x\n",
+				(unsigned long)addr, i, *su1, *su2, res);
+			bitflips += hweight8(res);
+		}
+	}
+
+	return bitflips;
+}
+
 static int verify_eraseblock(int ebnum)
 {
 	int i;
 	struct mtd_oob_ops ops;
 	int err = 0;
 	loff_t addr = (loff_t)ebnum * mtd->erasesize;
+	size_t bitflips;
 
 	prandom_bytes_state(&rnd_state, writebuf, use_len_max * pgcnt);
 	for (i = 0; i < pgcnt; ++i, addr += mtd->writesize) {
@@ -139,8 +166,11 @@ static int verify_eraseblock(int ebnum)
 			errcnt += 1;
 			return err ? err : -1;
 		}
-		if (memcmp(readbuf, writebuf + (use_len_max * i) + use_offset,
-			   use_len)) {
+
+		bitflips = memcmpshow(addr, readbuf,
+				      writebuf + (use_len_max * i) + use_offset,
+				      use_len);
+		if (bitflips > bitflip_limit) {
 			pr_err("error: verify failed at %#llx\n",
 			       (long long)addr);
 			errcnt += 1;
@@ -148,7 +178,10 @@ static int verify_eraseblock(int ebnum)
 				pr_err("error: too many errors\n");
 				return -1;
 			}
+		} else if (bitflips) {
+			pr_info("ignoring error as within bitflip_limit\n");
 		}
+
 		if (use_offset != 0 || use_len < mtd->ecclayout->oobavail) {
 			int k;
 
@@ -167,9 +200,10 @@ static int verify_eraseblock(int ebnum)
 				errcnt += 1;
 				return err ? err : -1;
 			}
-			if (memcmp(readbuf + use_offset,
-				   writebuf + (use_len_max * i) + use_offset,
-				   use_len)) {
+			bitflips = memcmpshow(addr, readbuf + use_offset,
+					      writebuf + (use_len_max * i) + use_offset,
+					      use_len);
+			if (bitflips > bitflip_limit) {
 				pr_err("error: verify failed at %#llx\n",
 						(long long)addr);
 				errcnt += 1;
@@ -177,7 +211,10 @@ static int verify_eraseblock(int ebnum)
 					pr_err("error: too many errors\n");
 					return -1;
 				}
+			} else if (bitflips) {
+				pr_info("ignoring error as within bitflip_limit\n");
 			}
+
 			for (k = 0; k < use_offset; ++k)
 				if (readbuf[k] != 0xff) {
 					pr_err("error: verify 0xff "
@@ -216,6 +253,9 @@ static int verify_eraseblock_in_one_go(int ebnum)
 	int err = 0;
 	loff_t addr = (loff_t)ebnum * mtd->erasesize;
 	size_t len = mtd->ecclayout->oobavail * pgcnt;
+	size_t oobavail = mtd->ecclayout->oobavail;
+	size_t bitflips;
+	int i;
 
 	prandom_bytes_state(&rnd_state, writebuf, len);
 	ops.mode      = MTD_OPS_AUTO_OOB;
@@ -226,6 +266,8 @@ static int verify_eraseblock_in_one_go(int ebnum)
 	ops.ooboffs   = 0;
 	ops.datbuf    = NULL;
 	ops.oobbuf    = readbuf;
+
+	/* read entire block's OOB at one go */
 	err = mtd_read_oob(mtd, addr, &ops);
 	if (err || ops.oobretlen != len) {
 		pr_err("error: readoob failed at %#llx\n",
@@ -233,13 +275,21 @@ static int verify_eraseblock_in_one_go(int ebnum)
 		errcnt += 1;
 		return err ? err : -1;
 	}
-	if (memcmp(readbuf, writebuf, len)) {
-		pr_err("error: verify failed at %#llx\n",
-		       (long long)addr);
-		errcnt += 1;
-		if (errcnt > 1000) {
-			pr_err("error: too many errors\n");
-			return -1;
+
+	/* verify one page OOB at a time for bitflip per page limit check */
+	for (i = 0; i < pgcnt; ++i, addr += mtd->writesize) {
+		bitflips = memcmpshow(addr, readbuf + (i * oobavail),
+				      writebuf + (i * oobavail), oobavail);
+		if (bitflips > bitflip_limit) {
+			pr_err("error: verify failed at %#llx\n",
+			       (long long)addr);
+			errcnt += 1;
+			if (errcnt > 1000) {
+				pr_err("error: too many errors\n");
+				return -1;
+			}
+		} else if (bitflips) {
+			pr_info("ignoring error as within bitflip_limit\n");
 		}
 	}
 
@@ -610,7 +660,8 @@ static int __init mtd_oobtest_init(void)
 		err = mtd_read_oob(mtd, addr, &ops);
 		if (err)
 			goto out;
-		if (memcmp(readbuf, writebuf, mtd->ecclayout->oobavail * 2)) {
+		if (memcmpshow(addr, readbuf, writebuf,
+			       mtd->ecclayout->oobavail * 2)) {
 			pr_err("error: verify failed at %#llx\n",
 			       (long long)addr);
 			errcnt += 1;
diff --git a/drivers/mtd/tests/torturetest.c b/drivers/mtd/tests/torturetest.c
index eeab96973cf..b55bc52a134 100644
--- a/drivers/mtd/tests/torturetest.c
+++ b/drivers/mtd/tests/torturetest.c
@@ -264,7 +264,9 @@ static int __init tort_init(void)
 		int i;
 		void *patt;
 
-		mtdtest_erase_good_eraseblocks(mtd, bad_ebs, eb, ebcnt);
+		err = mtdtest_erase_good_eraseblocks(mtd, bad_ebs, eb, ebcnt);
+		if (err)
+			goto out;
 
 		/* Check if the eraseblocks contain only 0xFF bytes */
 		if (check) {
diff --git a/drivers/pwm/Kconfig b/drivers/pwm/Kconfig
index ef2dd2e4754..a3ecf580963 100644
--- a/drivers/pwm/Kconfig
+++ b/drivers/pwm/Kconfig
@@ -50,6 +50,17 @@ config PWM_ATMEL
 	  To compile this driver as a module, choose M here: the module
 	  will be called pwm-atmel.
 
+config PWM_ATMEL_HLCDC_PWM
+	tristate "Atmel HLCDC PWM support"
+	depends on MFD_ATMEL_HLCDC
+	help
+	  Generic PWM framework driver for the PWM output of the HLCDC
+	  (Atmel High-end LCD Controller). This PWM output is mainly used
+	  to control the LCD backlight.
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called pwm-atmel-hlcdc.
+
 config PWM_ATMEL_TCB
 	tristate "Atmel TC Block PWM support"
 	depends on ATMEL_TCLIB && OF
@@ -71,6 +82,15 @@ config PWM_BCM_KONA
 	  To compile this driver as a module, choose M here: the module
 	  will be called pwm-bcm-kona.
 
+config PWM_BCM2835
+	tristate "BCM2835 PWM support"
+	depends on ARCH_BCM2835
+	help
+	  PWM framework driver for BCM2835 controller (Raspberry Pi)
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called pwm-bcm2835.
+
 config PWM_BFIN
 	tristate "Blackfin PWM support"
 	depends on BFIN_GPTIMERS
@@ -235,7 +255,7 @@ config PWM_ROCKCHIP
 
 config PWM_SAMSUNG
 	tristate "Samsung PWM support"
-	depends on PLAT_SAMSUNG
+	depends on PLAT_SAMSUNG || ARCH_EXYNOS
 	help
 	  Generic PWM framework driver for Samsung.
 
diff --git a/drivers/pwm/Makefile b/drivers/pwm/Makefile
index c458606c375..65259ac1e8d 100644
--- a/drivers/pwm/Makefile
+++ b/drivers/pwm/Makefile
@@ -2,8 +2,10 @@ obj-$(CONFIG_PWM)		+= core.o
 obj-$(CONFIG_PWM_SYSFS)		+= sysfs.o
 obj-$(CONFIG_PWM_AB8500)	+= pwm-ab8500.o
 obj-$(CONFIG_PWM_ATMEL)		+= pwm-atmel.o
+obj-$(CONFIG_PWM_ATMEL_HLCDC_PWM)	+= pwm-atmel-hlcdc.o
 obj-$(CONFIG_PWM_ATMEL_TCB)	+= pwm-atmel-tcb.o
 obj-$(CONFIG_PWM_BCM_KONA)	+= pwm-bcm-kona.o
+obj-$(CONFIG_PWM_BCM2835)	+= pwm-bcm2835.o
 obj-$(CONFIG_PWM_BFIN)		+= pwm-bfin.o
 obj-$(CONFIG_PWM_CLPS711X)	+= pwm-clps711x.o
 obj-$(CONFIG_PWM_EP93XX)	+= pwm-ep93xx.o
diff --git a/drivers/pwm/pwm-atmel-hlcdc.c b/drivers/pwm/pwm-atmel-hlcdc.c
new file mode 100644
index 00000000000..e7a785fadcd
--- /dev/null
+++ b/drivers/pwm/pwm-atmel-hlcdc.c
@@ -0,0 +1,299 @@
+/*
+ * Copyright (C) 2014 Free Electrons
+ * Copyright (C) 2014 Atmel
+ *
+ * Author: Boris BREZILLON <boris.brezillon@free-electrons.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/mfd/atmel-hlcdc.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/pwm.h>
+#include <linux/regmap.h>
+
+#define ATMEL_HLCDC_PWMCVAL_MASK	GENMASK(15, 8)
+#define ATMEL_HLCDC_PWMCVAL(x)		(((x) << 8) & ATMEL_HLCDC_PWMCVAL_MASK)
+#define ATMEL_HLCDC_PWMPOL		BIT(4)
+#define ATMEL_HLCDC_PWMPS_MASK		GENMASK(2, 0)
+#define ATMEL_HLCDC_PWMPS_MAX		0x6
+#define ATMEL_HLCDC_PWMPS(x)		((x) & ATMEL_HLCDC_PWMPS_MASK)
+
+struct atmel_hlcdc_pwm_errata {
+	bool slow_clk_erratum;
+	bool div1_clk_erratum;
+};
+
+struct atmel_hlcdc_pwm {
+	struct pwm_chip chip;
+	struct atmel_hlcdc *hlcdc;
+	struct clk *cur_clk;
+	const struct atmel_hlcdc_pwm_errata *errata;
+};
+
+static inline struct atmel_hlcdc_pwm *to_atmel_hlcdc_pwm(struct pwm_chip *chip)
+{
+	return container_of(chip, struct atmel_hlcdc_pwm, chip);
+}
+
+static int atmel_hlcdc_pwm_config(struct pwm_chip *c,
+				  struct pwm_device *pwm,
+				  int duty_ns, int period_ns)
+{
+	struct atmel_hlcdc_pwm *chip = to_atmel_hlcdc_pwm(c);
+	struct atmel_hlcdc *hlcdc = chip->hlcdc;
+	struct clk *new_clk = hlcdc->slow_clk;
+	u64 pwmcval = duty_ns * 256;
+	unsigned long clk_freq;
+	u64 clk_period_ns;
+	u32 pwmcfg;
+	int pres;
+
+	if (!chip->errata || !chip->errata->slow_clk_erratum) {
+		clk_freq = clk_get_rate(new_clk);
+		clk_period_ns = (u64)NSEC_PER_SEC * 256;
+		do_div(clk_period_ns, clk_freq);
+	}
+
+	/* Errata: cannot use slow clk on some IP revisions */
+	if ((chip->errata && chip->errata->slow_clk_erratum) ||
+	    clk_period_ns > period_ns) {
+		new_clk = hlcdc->sys_clk;
+		clk_freq = clk_get_rate(new_clk);
+		clk_period_ns = (u64)NSEC_PER_SEC * 256;
+		do_div(clk_period_ns, clk_freq);
+	}
+
+	for (pres = 0; pres <= ATMEL_HLCDC_PWMPS_MAX; pres++) {
+		/* Errata: cannot divide by 1 on some IP revisions */
+		if (!pres && chip->errata && chip->errata->div1_clk_erratum)
+			continue;
+
+		if ((clk_period_ns << pres) >= period_ns)
+			break;
+	}
+
+	if (pres > ATMEL_HLCDC_PWMPS_MAX)
+		return -EINVAL;
+
+	pwmcfg = ATMEL_HLCDC_PWMPS(pres);
+
+	if (new_clk != chip->cur_clk) {
+		u32 gencfg = 0;
+		int ret;
+
+		ret = clk_prepare_enable(new_clk);
+		if (ret)
+			return ret;
+
+		clk_disable_unprepare(chip->cur_clk);
+		chip->cur_clk = new_clk;
+
+		if (new_clk == hlcdc->sys_clk)
+			gencfg = ATMEL_HLCDC_CLKPWMSEL;
+
+		ret = regmap_update_bits(hlcdc->regmap, ATMEL_HLCDC_CFG(0),
+					 ATMEL_HLCDC_CLKPWMSEL, gencfg);
+		if (ret)
+			return ret;
+	}
+
+	do_div(pwmcval, period_ns);
+
+	/*
+	 * The PWM duty cycle is configurable from 0/256 to 255/256 of the
+	 * period cycle. Hence we can't set a duty cycle occupying the
+	 * whole period cycle if we're asked to.
+	 * Set it to 255 if pwmcval is greater than 256.
+	 */
+	if (pwmcval > 255)
+		pwmcval = 255;
+
+	pwmcfg |= ATMEL_HLCDC_PWMCVAL(pwmcval);
+
+	return regmap_update_bits(hlcdc->regmap, ATMEL_HLCDC_CFG(6),
+				  ATMEL_HLCDC_PWMCVAL_MASK |
+				  ATMEL_HLCDC_PWMPS_MASK,
+				  pwmcfg);
+}
+
+static int atmel_hlcdc_pwm_set_polarity(struct pwm_chip *c,
+					struct pwm_device *pwm,
+					enum pwm_polarity polarity)
+{
+	struct atmel_hlcdc_pwm *chip = to_atmel_hlcdc_pwm(c);
+	struct atmel_hlcdc *hlcdc = chip->hlcdc;
+	u32 cfg = 0;
+
+	if (polarity == PWM_POLARITY_NORMAL)
+		cfg = ATMEL_HLCDC_PWMPOL;
+
+	return regmap_update_bits(hlcdc->regmap, ATMEL_HLCDC_CFG(6),
+				  ATMEL_HLCDC_PWMPOL, cfg);
+}
+
+static int atmel_hlcdc_pwm_enable(struct pwm_chip *c, struct pwm_device *pwm)
+{
+	struct atmel_hlcdc_pwm *chip = to_atmel_hlcdc_pwm(c);
+	struct atmel_hlcdc *hlcdc = chip->hlcdc;
+	u32 status;
+	int ret;
+
+	ret = regmap_write(hlcdc->regmap, ATMEL_HLCDC_EN, ATMEL_HLCDC_PWM);
+	if (ret)
+		return ret;
+
+	while (true) {
+		ret = regmap_read(hlcdc->regmap, ATMEL_HLCDC_SR, &status);
+		if (ret)
+			return ret;
+
+		if ((status & ATMEL_HLCDC_PWM) != 0)
+			break;
+
+		usleep_range(1, 10);
+	}
+
+	return 0;
+}
+
+static void atmel_hlcdc_pwm_disable(struct pwm_chip *c,
+				    struct pwm_device *pwm)
+{
+	struct atmel_hlcdc_pwm *chip = to_atmel_hlcdc_pwm(c);
+	struct atmel_hlcdc *hlcdc = chip->hlcdc;
+	u32 status;
+	int ret;
+
+	ret = regmap_write(hlcdc->regmap, ATMEL_HLCDC_DIS, ATMEL_HLCDC_PWM);
+	if (ret)
+		return;
+
+	while (true) {
+		ret = regmap_read(hlcdc->regmap, ATMEL_HLCDC_SR, &status);
+		if (ret)
+			return;
+
+		if ((status & ATMEL_HLCDC_PWM) == 0)
+			break;
+
+		usleep_range(1, 10);
+	}
+}
+
+static const struct pwm_ops atmel_hlcdc_pwm_ops = {
+	.config = atmel_hlcdc_pwm_config,
+	.set_polarity = atmel_hlcdc_pwm_set_polarity,
+	.enable = atmel_hlcdc_pwm_enable,
+	.disable = atmel_hlcdc_pwm_disable,
+	.owner = THIS_MODULE,
+};
+
+static const struct atmel_hlcdc_pwm_errata atmel_hlcdc_pwm_at91sam9x5_errata = {
+	.slow_clk_erratum = true,
+};
+
+static const struct atmel_hlcdc_pwm_errata atmel_hlcdc_pwm_sama5d3_errata = {
+	.div1_clk_erratum = true,
+};
+
+static const struct of_device_id atmel_hlcdc_dt_ids[] = {
+	{
+		.compatible = "atmel,at91sam9x5-hlcdc",
+		.data = &atmel_hlcdc_pwm_at91sam9x5_errata,
+	},
+	{
+		.compatible = "atmel,sama5d3-hlcdc",
+		.data = &atmel_hlcdc_pwm_sama5d3_errata,
+	},
+	{ /* sentinel */ },
+};
+
+static int atmel_hlcdc_pwm_probe(struct platform_device *pdev)
+{
+	const struct of_device_id *match;
+	struct device *dev = &pdev->dev;
+	struct atmel_hlcdc_pwm *chip;
+	struct atmel_hlcdc *hlcdc;
+	int ret;
+
+	hlcdc = dev_get_drvdata(dev->parent);
+
+	chip = devm_kzalloc(dev, sizeof(*chip), GFP_KERNEL);
+	if (!chip)
+		return -ENOMEM;
+
+	ret = clk_prepare_enable(hlcdc->periph_clk);
+	if (ret)
+		return ret;
+
+	match = of_match_node(atmel_hlcdc_dt_ids, dev->parent->of_node);
+	if (match)
+		chip->errata = match->data;
+
+	chip->hlcdc = hlcdc;
+	chip->chip.ops = &atmel_hlcdc_pwm_ops;
+	chip->chip.dev = dev;
+	chip->chip.base = -1;
+	chip->chip.npwm = 1;
+	chip->chip.of_xlate = of_pwm_xlate_with_flags;
+	chip->chip.of_pwm_n_cells = 3;
+	chip->chip.can_sleep = 1;
+
+	ret = pwmchip_add(&chip->chip);
+	if (ret) {
+		clk_disable_unprepare(hlcdc->periph_clk);
+		return ret;
+	}
+
+	platform_set_drvdata(pdev, chip);
+
+	return 0;
+}
+
+static int atmel_hlcdc_pwm_remove(struct platform_device *pdev)
+{
+	struct atmel_hlcdc_pwm *chip = platform_get_drvdata(pdev);
+	int ret;
+
+	ret = pwmchip_remove(&chip->chip);
+	if (ret)
+		return ret;
+
+	clk_disable_unprepare(chip->hlcdc->periph_clk);
+
+	return 0;
+}
+
+static const struct of_device_id atmel_hlcdc_pwm_dt_ids[] = {
+	{ .compatible = "atmel,hlcdc-pwm" },
+	{ /* sentinel */ },
+};
+
+static struct platform_driver atmel_hlcdc_pwm_driver = {
+	.driver = {
+		.name = "atmel-hlcdc-pwm",
+		.of_match_table = atmel_hlcdc_pwm_dt_ids,
+	},
+	.probe = atmel_hlcdc_pwm_probe,
+	.remove = atmel_hlcdc_pwm_remove,
+};
+module_platform_driver(atmel_hlcdc_pwm_driver);
+
+MODULE_ALIAS("platform:atmel-hlcdc-pwm");
+MODULE_AUTHOR("Boris Brezillon <boris.brezillon@free-electrons.com>");
+MODULE_DESCRIPTION("Atmel HLCDC PWM driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/pwm/pwm-bcm2835.c b/drivers/pwm/pwm-bcm2835.c
new file mode 100644
index 00000000000..b4c7f956b6f
--- /dev/null
+++ b/drivers/pwm/pwm-bcm2835.c
@@ -0,0 +1,205 @@
+/*
+ * Copyright 2014 Bart Tanghe <bart.tanghe@thomasmore.be>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2.
+ */
+
+#include <linux/clk.h>
+#include <linux/err.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/pwm.h>
+
+#define PWM_CONTROL		0x000
+#define PWM_CONTROL_SHIFT(x)	((x) * 8)
+#define PWM_CONTROL_MASK	0xff
+#define PWM_MODE		0x80		/* set timer in PWM mode */
+#define PWM_ENABLE		(1 << 0)
+#define PWM_POLARITY		(1 << 4)
+
+#define PERIOD(x)		(((x) * 0x10) + 0x10)
+#define DUTY(x)			(((x) * 0x10) + 0x14)
+
+#define MIN_PERIOD		108		/* 9.2 MHz max. PWM clock */
+
+struct bcm2835_pwm {
+	struct pwm_chip chip;
+	struct device *dev;
+	unsigned long scaler;
+	void __iomem *base;
+	struct clk *clk;
+};
+
+static inline struct bcm2835_pwm *to_bcm2835_pwm(struct pwm_chip *chip)
+{
+	return container_of(chip, struct bcm2835_pwm, chip);
+}
+
+static int bcm2835_pwm_request(struct pwm_chip *chip, struct pwm_device *pwm)
+{
+	struct bcm2835_pwm *pc = to_bcm2835_pwm(chip);
+	u32 value;
+
+	value = readl(pc->base + PWM_CONTROL);
+	value &= ~(PWM_CONTROL_MASK << PWM_CONTROL_SHIFT(pwm->hwpwm));
+	value |= (PWM_MODE << PWM_CONTROL_SHIFT(pwm->hwpwm));
+	writel(value, pc->base + PWM_CONTROL);
+
+	return 0;
+}
+
+static void bcm2835_pwm_free(struct pwm_chip *chip, struct pwm_device *pwm)
+{
+	struct bcm2835_pwm *pc = to_bcm2835_pwm(chip);
+	u32 value;
+
+	value = readl(pc->base + PWM_CONTROL);
+	value &= ~(PWM_CONTROL_MASK << PWM_CONTROL_SHIFT(pwm->hwpwm));
+	writel(value, pc->base + PWM_CONTROL);
+}
+
+static int bcm2835_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
+			      int duty_ns, int period_ns)
+{
+	struct bcm2835_pwm *pc = to_bcm2835_pwm(chip);
+
+	if (period_ns <= MIN_PERIOD) {
+		dev_err(pc->dev, "period %d not supported, minimum %d\n",
+			period_ns, MIN_PERIOD);
+		return -EINVAL;
+	}
+
+	writel(duty_ns / pc->scaler, pc->base + DUTY(pwm->hwpwm));
+	writel(period_ns / pc->scaler, pc->base + PERIOD(pwm->hwpwm));
+
+	return 0;
+}
+
+static int bcm2835_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm)
+{
+	struct bcm2835_pwm *pc = to_bcm2835_pwm(chip);
+	u32 value;
+
+	value = readl(pc->base + PWM_CONTROL);
+	value |= PWM_ENABLE << PWM_CONTROL_SHIFT(pwm->hwpwm);
+	writel(value, pc->base + PWM_CONTROL);
+
+	return 0;
+}
+
+static void bcm2835_pwm_disable(struct pwm_chip *chip, struct pwm_device *pwm)
+{
+	struct bcm2835_pwm *pc = to_bcm2835_pwm(chip);
+	u32 value;
+
+	value = readl(pc->base + PWM_CONTROL);
+	value &= ~(PWM_ENABLE << PWM_CONTROL_SHIFT(pwm->hwpwm));
+	writel(value, pc->base + PWM_CONTROL);
+}
+
+static int bcm2835_set_polarity(struct pwm_chip *chip, struct pwm_device *pwm,
+				enum pwm_polarity polarity)
+{
+	struct bcm2835_pwm *pc = to_bcm2835_pwm(chip);
+	u32 value;
+
+	value = readl(pc->base + PWM_CONTROL);
+
+	if (polarity == PWM_POLARITY_NORMAL)
+		value &= ~(PWM_POLARITY << PWM_CONTROL_SHIFT(pwm->hwpwm));
+	else
+		value |= PWM_POLARITY << PWM_CONTROL_SHIFT(pwm->hwpwm);
+
+	writel(value, pc->base + PWM_CONTROL);
+
+	return 0;
+}
+
+static const struct pwm_ops bcm2835_pwm_ops = {
+	.request = bcm2835_pwm_request,
+	.free = bcm2835_pwm_free,
+	.config = bcm2835_pwm_config,
+	.enable = bcm2835_pwm_enable,
+	.disable = bcm2835_pwm_disable,
+	.set_polarity = bcm2835_set_polarity,
+	.owner = THIS_MODULE,
+};
+
+static int bcm2835_pwm_probe(struct platform_device *pdev)
+{
+	struct bcm2835_pwm *pc;
+	struct resource *res;
+	int ret;
+
+	pc = devm_kzalloc(&pdev->dev, sizeof(*pc), GFP_KERNEL);
+	if (!pc)
+		return -ENOMEM;
+
+	pc->dev = &pdev->dev;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	pc->base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(pc->base))
+		return PTR_ERR(pc->base);
+
+	pc->clk = devm_clk_get(&pdev->dev, NULL);
+	if (IS_ERR(pc->clk)) {
+		dev_err(&pdev->dev, "clock not found: %ld\n", PTR_ERR(pc->clk));
+		return PTR_ERR(pc->clk);
+	}
+
+	ret = clk_prepare_enable(pc->clk);
+	if (ret)
+		return ret;
+
+	pc->scaler = NSEC_PER_SEC / clk_get_rate(pc->clk);
+
+	pc->chip.dev = &pdev->dev;
+	pc->chip.ops = &bcm2835_pwm_ops;
+	pc->chip.npwm = 2;
+
+	platform_set_drvdata(pdev, pc);
+
+	ret = pwmchip_add(&pc->chip);
+	if (ret < 0)
+		goto add_fail;
+
+	return 0;
+
+add_fail:
+	clk_disable_unprepare(pc->clk);
+	return ret;
+}
+
+static int bcm2835_pwm_remove(struct platform_device *pdev)
+{
+	struct bcm2835_pwm *pc = platform_get_drvdata(pdev);
+
+	clk_disable_unprepare(pc->clk);
+
+	return pwmchip_remove(&pc->chip);
+}
+
+static const struct of_device_id bcm2835_pwm_of_match[] = {
+	{ .compatible = "brcm,bcm2835-pwm", },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, bcm2835_pwm_of_match);
+
+static struct platform_driver bcm2835_pwm_driver = {
+	.driver = {
+		.name = "bcm2835-pwm",
+		.of_match_table = bcm2835_pwm_of_match,
+	},
+	.probe = bcm2835_pwm_probe,
+	.remove = bcm2835_pwm_remove,
+};
+module_platform_driver(bcm2835_pwm_driver);
+
+MODULE_AUTHOR("Bart Tanghe <bart.tanghe@thomasmore.be");
+MODULE_DESCRIPTION("Broadcom BCM2835 PWM driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/pwm/pwm-fsl-ftm.c b/drivers/pwm/pwm-fsl-ftm.c
index 0f2cc7ef778..f9dfc8b6407 100644
--- a/drivers/pwm/pwm-fsl-ftm.c
+++ b/drivers/pwm/pwm-fsl-ftm.c
@@ -17,6 +17,7 @@
 #include <linux/mutex.h>
 #include <linux/of_address.h>
 #include <linux/platform_device.h>
+#include <linux/pm.h>
 #include <linux/pwm.h>
 #include <linux/regmap.h>
 #include <linux/slab.h>
@@ -299,7 +300,7 @@ static int fsl_counter_clock_enable(struct fsl_pwm_chip *fpc)
 {
 	int ret;
 
-	if (fpc->use_count != 0)
+	if (fpc->use_count++ != 0)
 		return 0;
 
 	/* select counter clock source */
@@ -316,8 +317,6 @@ static int fsl_counter_clock_enable(struct fsl_pwm_chip *fpc)
 		return ret;
 	}
 
-	fpc->use_count++;
-
 	return 0;
 }
 
@@ -399,12 +398,23 @@ static int fsl_pwm_init(struct fsl_pwm_chip *fpc)
 	return 0;
 }
 
+static bool fsl_pwm_volatile_reg(struct device *dev, unsigned int reg)
+{
+	switch (reg) {
+	case FTM_CNT:
+		return true;
+	}
+	return false;
+}
+
 static const struct regmap_config fsl_pwm_regmap_config = {
 	.reg_bits = 32,
 	.reg_stride = 4,
 	.val_bits = 32,
 
 	.max_register = FTM_PWMLOAD,
+	.volatile_reg = fsl_pwm_volatile_reg,
+	.cache_type = REGCACHE_RBTREE,
 };
 
 static int fsl_pwm_probe(struct platform_device *pdev)
@@ -427,7 +437,7 @@ static int fsl_pwm_probe(struct platform_device *pdev)
 	if (IS_ERR(base))
 		return PTR_ERR(base);
 
-	fpc->regmap = devm_regmap_init_mmio_clk(&pdev->dev, NULL, base,
+	fpc->regmap = devm_regmap_init_mmio_clk(&pdev->dev, "ftm_sys", base,
 						&fsl_pwm_regmap_config);
 	if (IS_ERR(fpc->regmap)) {
 		dev_err(&pdev->dev, "regmap init failed\n");
@@ -478,6 +488,51 @@ static int fsl_pwm_remove(struct platform_device *pdev)
 	return pwmchip_remove(&fpc->chip);
 }
 
+#ifdef CONFIG_PM_SLEEP
+static int fsl_pwm_suspend(struct device *dev)
+{
+	struct fsl_pwm_chip *fpc = dev_get_drvdata(dev);
+	u32 val;
+
+	regcache_cache_only(fpc->regmap, true);
+	regcache_mark_dirty(fpc->regmap);
+
+	/* read from cache */
+	regmap_read(fpc->regmap, FTM_OUTMASK, &val);
+	if ((val & 0xFF) != 0xFF) {
+		clk_disable_unprepare(fpc->clk[FSL_PWM_CLK_CNTEN]);
+		clk_disable_unprepare(fpc->clk[fpc->cnt_select]);
+		clk_disable_unprepare(fpc->clk[FSL_PWM_CLK_SYS]);
+	}
+
+	return 0;
+}
+
+static int fsl_pwm_resume(struct device *dev)
+{
+	struct fsl_pwm_chip *fpc = dev_get_drvdata(dev);
+	u32 val;
+
+	/* read from cache */
+	regmap_read(fpc->regmap, FTM_OUTMASK, &val);
+	if ((val & 0xFF) != 0xFF) {
+		clk_prepare_enable(fpc->clk[FSL_PWM_CLK_SYS]);
+		clk_prepare_enable(fpc->clk[fpc->cnt_select]);
+		clk_prepare_enable(fpc->clk[FSL_PWM_CLK_CNTEN]);
+	}
+
+	/* restore all registers from cache */
+	regcache_cache_only(fpc->regmap, false);
+	regcache_sync(fpc->regmap);
+
+	return 0;
+}
+#endif
+
+static const struct dev_pm_ops fsl_pwm_pm_ops = {
+	SET_SYSTEM_SLEEP_PM_OPS(fsl_pwm_suspend, fsl_pwm_resume)
+};
+
 static const struct of_device_id fsl_pwm_dt_ids[] = {
 	{ .compatible = "fsl,vf610-ftm-pwm", },
 	{ /* sentinel */ }
@@ -488,6 +543,7 @@ static struct platform_driver fsl_pwm_driver = {
 	.driver = {
 		.name = "fsl-ftm-pwm",
 		.of_match_table = fsl_pwm_dt_ids,
+		.pm = &fsl_pwm_pm_ops,
 	},
 	.probe = fsl_pwm_probe,
 	.remove = fsl_pwm_remove,
diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig
index f554d25b439..af40db0df58 100644
--- a/drivers/thermal/Kconfig
+++ b/drivers/thermal/Kconfig
@@ -112,6 +112,18 @@ config CPU_THERMAL
 
 	  If you want this support, you should say Y here.
 
+config CLOCK_THERMAL
+	bool "Generic clock cooling support"
+	depends on COMMON_CLK
+	depends on PM_OPP
+	help
+	  This entry implements the generic clock cooling mechanism through
+	  frequency clipping. Typically used to cool off co-processors. The
+	  device that is configured to use this cooling mechanism will be
+	  controlled to reduce clock frequency whenever temperature is high.
+
+	  If you want this support, you should say Y here.
+
 config THERMAL_EMULATION
 	bool "Thermal emulation mode support"
 	help
@@ -143,6 +155,16 @@ config SPEAR_THERMAL
 	  Enable this to plug the SPEAr thermal sensor driver into the Linux
 	  thermal framework.
 
+config ROCKCHIP_THERMAL
+	tristate "Rockchip thermal driver"
+	depends on ARCH_ROCKCHIP
+	depends on RESET_CONTROLLER
+	help
+	  Rockchip thermal driver provides support for Temperature sensor
+	  ADC (TS-ADC) found on Rockchip SoCs. It supports one critical
+	  trip point. Cpufreq is used as the cooling device and will throttle
+	  CPUs when the Temperature crosses the passive trip point.
+
 config RCAR_THERMAL
 	tristate "Renesas R-Car thermal driver"
 	depends on ARCH_SHMOBILE || COMPILE_TEST
@@ -185,6 +207,16 @@ config ARMADA_THERMAL
 	  Enable this option if you want to have support for thermal management
 	  controller present in Armada 370 and Armada XP SoC.
 
+config TEGRA_SOCTHERM
+	tristate "Tegra SOCTHERM thermal management"
+	depends on ARCH_TEGRA
+	help
+	  Enable this option for integrated thermal management support on NVIDIA
+	  Tegra124 systems-on-chip. The driver supports four thermal zones
+	  (CPU, GPU, MEM, PLLX). Cooling devices can be bound to the thermal
+	  zones to manage temperatures. This option is also required for the
+	  emergency thermal reset (thermtrip) feature to function.
+
 config DB8500_CPUFREQ_COOLING
 	tristate "DB8500 cpufreq cooling"
 	depends on ARCH_U8500
diff --git a/drivers/thermal/Makefile b/drivers/thermal/Makefile
index 39c4fe87da2..fa0dc486790 100644
--- a/drivers/thermal/Makefile
+++ b/drivers/thermal/Makefile
@@ -18,8 +18,12 @@ thermal_sys-$(CONFIG_THERMAL_GOV_USER_SPACE)	+= user_space.o
 # cpufreq cooling
 thermal_sys-$(CONFIG_CPU_THERMAL)	+= cpu_cooling.o
 
+# clock cooling
+thermal_sys-$(CONFIG_CLOCK_THERMAL)	+= clock_cooling.o
+
 # platform thermal drivers
 obj-$(CONFIG_SPEAR_THERMAL)	+= spear_thermal.o
+obj-$(CONFIG_ROCKCHIP_THERMAL)	+= rockchip_thermal.o
 obj-$(CONFIG_RCAR_THERMAL)	+= rcar_thermal.o
 obj-$(CONFIG_KIRKWOOD_THERMAL)  += kirkwood_thermal.o
 obj-y				+= samsung/
@@ -34,3 +38,4 @@ obj-$(CONFIG_INTEL_SOC_DTS_THERMAL)	+= intel_soc_dts_thermal.o
 obj-$(CONFIG_TI_SOC_THERMAL)	+= ti-soc-thermal/
 obj-$(CONFIG_INT340X_THERMAL)  += int340x_thermal/
 obj-$(CONFIG_ST_THERMAL)	+= st/
+obj-$(CONFIG_TEGRA_SOCTHERM)	+= tegra_soctherm.o
diff --git a/drivers/thermal/armada_thermal.c b/drivers/thermal/armada_thermal.c
index eaaf59c98ba..c2556cf5186 100644
--- a/drivers/thermal/armada_thermal.c
+++ b/drivers/thermal/armada_thermal.c
@@ -35,10 +35,6 @@
 #define PMU_TDC0_OTF_CAL_MASK		(0x1 << 30)
 #define PMU_TDC0_START_CAL_MASK		(0x1 << 25)
 
-#define A375_Z1_CAL_RESET_LSB		0x8011e214
-#define A375_Z1_CAL_RESET_MSB		0x30a88019
-#define A375_Z1_WORKAROUND_BIT		BIT(9)
-
 #define A375_UNIT_CONTROL_SHIFT		27
 #define A375_UNIT_CONTROL_MASK		0x7
 #define A375_READOUT_INVERT		BIT(15)
@@ -124,24 +120,12 @@ static void armada375_init_sensor(struct platform_device *pdev,
 				  struct armada_thermal_priv *priv)
 {
 	unsigned long reg;
-	bool quirk_needed =
-		!!of_device_is_compatible(pdev->dev.of_node,
-					  "marvell,armada375-z1-thermal");
-
-	if (quirk_needed) {
-		/* Ensure these registers have the default (reset) values */
-		writel(A375_Z1_CAL_RESET_LSB, priv->control);
-		writel(A375_Z1_CAL_RESET_MSB, priv->control + 0x4);
-	}
 
 	reg = readl(priv->control + 4);
 	reg &= ~(A375_UNIT_CONTROL_MASK << A375_UNIT_CONTROL_SHIFT);
 	reg &= ~A375_READOUT_INVERT;
 	reg &= ~A375_HW_RESETn;
 
-	if (quirk_needed)
-		reg |= A375_Z1_WORKAROUND_BIT;
-
 	writel(reg, priv->control + 4);
 	mdelay(20);
 
@@ -260,10 +244,6 @@ static const struct of_device_id armada_thermal_id_table[] = {
 		.data       = &armada375_data,
 	},
 	{
-		.compatible = "marvell,armada375-z1-thermal",
-		.data       = &armada375_data,
-	},
-	{
 		.compatible = "marvell,armada380-thermal",
 		.data       = &armada380_data,
 	},
diff --git a/drivers/thermal/clock_cooling.c b/drivers/thermal/clock_cooling.c
new file mode 100644
index 00000000000..1b4ff0f4c71
--- /dev/null
+++ b/drivers/thermal/clock_cooling.c
@@ -0,0 +1,485 @@
+/*
+ *  drivers/thermal/clock_cooling.c
+ *
+ *  Copyright (C) 2014 Eduardo Valentin <edubezval@gmail.com>
+ *
+ *  Copyright (C) 2013	Texas Instruments Inc.
+ *  Contact:  Eduardo Valentin <eduardo.valentin@ti.com>
+ *
+ *  Highly based on cpu_cooling.c.
+ *  Copyright (C) 2012	Samsung Electronics Co., Ltd(http://www.samsung.com)
+ *  Copyright (C) 2012  Amit Daniel <amit.kachhap@linaro.org>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; version 2 of the License.
+ *
+ *  This program is distributed in the hope that it will be useful, but
+ *  WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  General Public License for more details.
+ */
+#include <linux/clk.h>
+#include <linux/cpufreq.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/idr.h>
+#include <linux/mutex.h>
+#include <linux/pm_opp.h>
+#include <linux/slab.h>
+#include <linux/thermal.h>
+#include <linux/clock_cooling.h>
+
+/**
+ * struct clock_cooling_device - data for cooling device with clock
+ * @id: unique integer value corresponding to each clock_cooling_device
+ *	registered.
+ * @dev: struct device pointer to the device being used to cool off using
+ *       clock frequencies.
+ * @cdev: thermal_cooling_device pointer to keep track of the
+ *	registered cooling device.
+ * @clk_rate_change_nb: reference to notifier block used to receive clock
+ *                      rate changes.
+ * @freq_table: frequency table used to keep track of available frequencies.
+ * @clock_state: integer value representing the current state of clock
+ *	cooling	devices.
+ * @clock_val: integer value representing the absolute value of the clipped
+ *	frequency.
+ * @clk: struct clk reference used to enforce clock limits.
+ * @lock: mutex lock to protect this struct.
+ *
+ * This structure is required for keeping information of each
+ * clock_cooling_device registered. In order to prevent corruption of this a
+ * mutex @lock is used.
+ */
+struct clock_cooling_device {
+	int id;
+	struct device *dev;
+	struct thermal_cooling_device *cdev;
+	struct notifier_block clk_rate_change_nb;
+	struct cpufreq_frequency_table *freq_table;
+	unsigned long clock_state;
+	unsigned long clock_val;
+	struct clk *clk;
+	struct mutex lock; /* lock to protect the content of this struct */
+};
+#define to_clock_cooling_device(x) \
+		container_of(x, struct clock_cooling_device, clk_rate_change_nb)
+static DEFINE_IDR(clock_idr);
+static DEFINE_MUTEX(cooling_clock_lock);
+
+/**
+ * clock_cooling_get_idr - function to get an unique id.
+ * @id: int * value generated by this function.
+ *
+ * This function will populate @id with an unique
+ * id, using the idr API.
+ *
+ * Return: 0 on success, an error code on failure.
+ */
+static int clock_cooling_get_idr(int *id)
+{
+	int ret;
+
+	mutex_lock(&cooling_clock_lock);
+	ret = idr_alloc(&clock_idr, NULL, 0, 0, GFP_KERNEL);
+	mutex_unlock(&cooling_clock_lock);
+	if (unlikely(ret < 0))
+		return ret;
+	*id = ret;
+
+	return 0;
+}
+
+/**
+ * release_idr - function to free the unique id.
+ * @id: int value representing the unique id.
+ */
+static void release_idr(int id)
+{
+	mutex_lock(&cooling_clock_lock);
+	idr_remove(&clock_idr, id);
+	mutex_unlock(&cooling_clock_lock);
+}
+
+/* Below code defines functions to be used for clock as cooling device */
+
+enum clock_cooling_property {
+	GET_LEVEL,
+	GET_FREQ,
+	GET_MAXL,
+};
+
+/**
+ * clock_cooling_get_property - fetch a property of interest for a give cpu.
+ * @ccdev: clock cooling device reference
+ * @input: query parameter
+ * @output: query return
+ * @property: type of query (frequency, level, max level)
+ *
+ * This is the common function to
+ * 1. get maximum clock cooling states
+ * 2. translate frequency to cooling state
+ * 3. translate cooling state to frequency
+ * Note that the code may be not in good shape
+ * but it is written in this way in order to:
+ * a) reduce duplicate code as most of the code can be shared.
+ * b) make sure the logic is consistent when translating between
+ *    cooling states and frequencies.
+ *
+ * Return: 0 on success, -EINVAL when invalid parameters are passed.
+ */
+static int clock_cooling_get_property(struct clock_cooling_device *ccdev,
+				      unsigned long input,
+				      unsigned long *output,
+				      enum clock_cooling_property property)
+{
+	int i;
+	unsigned long max_level = 0, level = 0;
+	unsigned int freq = CPUFREQ_ENTRY_INVALID;
+	int descend = -1;
+	struct cpufreq_frequency_table *pos, *table = ccdev->freq_table;
+
+	if (!output)
+		return -EINVAL;
+
+	if (!table)
+		return -EINVAL;
+
+	cpufreq_for_each_valid_entry(pos, table) {
+		/* ignore duplicate entry */
+		if (freq == pos->frequency)
+			continue;
+
+		/* get the frequency order */
+		if (freq != CPUFREQ_ENTRY_INVALID && descend == -1)
+			descend = freq > pos->frequency;
+
+		freq = pos->frequency;
+		max_level++;
+	}
+
+	/* No valid cpu frequency entry */
+	if (max_level == 0)
+		return -EINVAL;
+
+	/* max_level is an index, not a counter */
+	max_level--;
+
+	/* get max level */
+	if (property == GET_MAXL) {
+		*output = max_level;
+		return 0;
+	}
+
+	if (property == GET_FREQ)
+		level = descend ? input : (max_level - input);
+
+	i = 0;
+	cpufreq_for_each_valid_entry(pos, table) {
+		/* ignore duplicate entry */
+		if (freq == pos->frequency)
+			continue;
+
+		/* now we have a valid frequency entry */
+		freq = pos->frequency;
+
+		if (property == GET_LEVEL && (unsigned int)input == freq) {
+			/* get level by frequency */
+			*output = descend ? i : (max_level - i);
+			return 0;
+		}
+		if (property == GET_FREQ && level == i) {
+			/* get frequency by level */
+			*output = freq;
+			return 0;
+		}
+		i++;
+	}
+
+	return -EINVAL;
+}
+
+/**
+ * clock_cooling_get_level - return the cooling level of given clock cooling.
+ * @cdev: reference of a thermal cooling device of used as clock cooling device
+ * @freq: the frequency of interest
+ *
+ * This function will match the cooling level corresponding to the
+ * requested @freq and return it.
+ *
+ * Return: The matched cooling level on success or THERMAL_CSTATE_INVALID
+ * otherwise.
+ */
+unsigned long clock_cooling_get_level(struct thermal_cooling_device *cdev,
+				      unsigned long freq)
+{
+	struct clock_cooling_device *ccdev = cdev->devdata;
+	unsigned long val;
+
+	if (clock_cooling_get_property(ccdev, (unsigned long)freq, &val,
+				       GET_LEVEL))
+		return THERMAL_CSTATE_INVALID;
+
+	return val;
+}
+EXPORT_SYMBOL_GPL(clock_cooling_get_level);
+
+/**
+ * clock_cooling_get_frequency - get the absolute value of frequency from level.
+ * @ccdev: clock cooling device reference
+ * @level: cooling level
+ *
+ * This function matches cooling level with frequency. Based on a cooling level
+ * of frequency, equals cooling state of cpu cooling device, it will return
+ * the corresponding frequency.
+ *	e.g level=0 --> 1st MAX FREQ, level=1 ---> 2nd MAX FREQ, .... etc
+ *
+ * Return: 0 on error, the corresponding frequency otherwise.
+ */
+static unsigned long
+clock_cooling_get_frequency(struct clock_cooling_device *ccdev,
+			    unsigned long level)
+{
+	int ret = 0;
+	unsigned long freq;
+
+	ret = clock_cooling_get_property(ccdev, level, &freq, GET_FREQ);
+	if (ret)
+		return 0;
+
+	return freq;
+}
+
+/**
+ * clock_cooling_apply - function to apply frequency clipping.
+ * @ccdev: clock_cooling_device pointer containing frequency clipping data.
+ * @cooling_state: value of the cooling state.
+ *
+ * Function used to make sure the clock layer is aware of current thermal
+ * limits. The limits are applied by updating the clock rate in case it is
+ * higher than the corresponding frequency based on the requested cooling_state.
+ *
+ * Return: 0 on success, an error code otherwise (-EINVAL in case wrong
+ * cooling state).
+ */
+static int clock_cooling_apply(struct clock_cooling_device *ccdev,
+			       unsigned long cooling_state)
+{
+	unsigned long clip_freq, cur_freq;
+	int ret = 0;
+
+	/* Here we write the clipping */
+	/* Check if the old cooling action is same as new cooling action */
+	if (ccdev->clock_state == cooling_state)
+		return 0;
+
+	clip_freq = clock_cooling_get_frequency(ccdev, cooling_state);
+	if (!clip_freq)
+		return -EINVAL;
+
+	cur_freq = clk_get_rate(ccdev->clk);
+
+	mutex_lock(&ccdev->lock);
+	ccdev->clock_state = cooling_state;
+	ccdev->clock_val = clip_freq;
+	/* enforce clock level */
+	if (cur_freq > clip_freq)
+		ret = clk_set_rate(ccdev->clk, clip_freq);
+	mutex_unlock(&ccdev->lock);
+
+	return ret;
+}
+
+/**
+ * clock_cooling_clock_notifier - notifier callback on clock rate changes.
+ * @nb:	struct notifier_block * with callback info.
+ * @event: value showing clock event for which this function invoked.
+ * @data: callback-specific data
+ *
+ * Callback to hijack the notification on clock transition.
+ * Every time there is a clock change, we intercept all pre change events
+ * and block the transition in case the new rate infringes thermal limits.
+ *
+ * Return: NOTIFY_DONE (success) or NOTIFY_BAD (new_rate > thermal limit).
+ */
+static int clock_cooling_clock_notifier(struct notifier_block *nb,
+					unsigned long event, void *data)
+{
+	struct clk_notifier_data *ndata = data;
+	struct clock_cooling_device *ccdev = to_clock_cooling_device(nb);
+
+	switch (event) {
+	case PRE_RATE_CHANGE:
+		/*
+		 * checks on current state
+		 * TODO: current method is not best we can find as it
+		 * allows possibly voltage transitions, in case DVFS
+		 * layer is also hijacking clock pre notifications.
+		 */
+		if (ndata->new_rate > ccdev->clock_val)
+			return NOTIFY_BAD;
+		/* fall through */
+	case POST_RATE_CHANGE:
+	case ABORT_RATE_CHANGE:
+	default:
+		return NOTIFY_DONE;
+	}
+}
+
+/* clock cooling device thermal callback functions are defined below */
+
+/**
+ * clock_cooling_get_max_state - callback function to get the max cooling state.
+ * @cdev: thermal cooling device pointer.
+ * @state: fill this variable with the max cooling state.
+ *
+ * Callback for the thermal cooling device to return the clock
+ * max cooling state.
+ *
+ * Return: 0 on success, an error code otherwise.
+ */
+static int clock_cooling_get_max_state(struct thermal_cooling_device *cdev,
+				       unsigned long *state)
+{
+	struct clock_cooling_device *ccdev = cdev->devdata;
+	unsigned long count = 0;
+	int ret;
+
+	ret = clock_cooling_get_property(ccdev, 0, &count, GET_MAXL);
+	if (!ret)
+		*state = count;
+
+	return ret;
+}
+
+/**
+ * clock_cooling_get_cur_state - function to get the current cooling state.
+ * @cdev: thermal cooling device pointer.
+ * @state: fill this variable with the current cooling state.
+ *
+ * Callback for the thermal cooling device to return the clock
+ * current cooling state.
+ *
+ * Return: 0 (success)
+ */
+static int clock_cooling_get_cur_state(struct thermal_cooling_device *cdev,
+				       unsigned long *state)
+{
+	struct clock_cooling_device *ccdev = cdev->devdata;
+
+	*state = ccdev->clock_state;
+
+	return 0;
+}
+
+/**
+ * clock_cooling_set_cur_state - function to set the current cooling state.
+ * @cdev: thermal cooling device pointer.
+ * @state: set this variable to the current cooling state.
+ *
+ * Callback for the thermal cooling device to change the clock cooling
+ * current cooling state.
+ *
+ * Return: 0 on success, an error code otherwise.
+ */
+static int clock_cooling_set_cur_state(struct thermal_cooling_device *cdev,
+				       unsigned long state)
+{
+	struct clock_cooling_device *clock_device = cdev->devdata;
+
+	return clock_cooling_apply(clock_device, state);
+}
+
+/* Bind clock callbacks to thermal cooling device ops */
+static struct thermal_cooling_device_ops const clock_cooling_ops = {
+	.get_max_state = clock_cooling_get_max_state,
+	.get_cur_state = clock_cooling_get_cur_state,
+	.set_cur_state = clock_cooling_set_cur_state,
+};
+
+/**
+ * clock_cooling_register - function to create clock cooling device.
+ * @dev: struct device pointer to the device used as clock cooling device.
+ * @clock_name: string containing the clock used as cooling mechanism.
+ *
+ * This interface function registers the clock cooling device with the name
+ * "thermal-clock-%x". The cooling device is based on clock frequencies.
+ * The struct device is assumed to be capable of DVFS transitions.
+ * The OPP layer is used to fetch and fill the available frequencies for
+ * the referred device. The ordered frequency table is used to control
+ * the clock cooling device cooling states and to limit clock transitions
+ * based on the cooling state requested by the thermal framework.
+ *
+ * Return: a valid struct thermal_cooling_device pointer on success,
+ * on failure, it returns a corresponding ERR_PTR().
+ */
+struct thermal_cooling_device *
+clock_cooling_register(struct device *dev, const char *clock_name)
+{
+	struct thermal_cooling_device *cdev;
+	struct clock_cooling_device *ccdev = NULL;
+	char dev_name[THERMAL_NAME_LENGTH];
+	int ret = 0;
+
+	ccdev = devm_kzalloc(dev, sizeof(*ccdev), GFP_KERNEL);
+	if (!ccdev)
+		return ERR_PTR(-ENOMEM);
+
+	ccdev->dev = dev;
+	ccdev->clk = devm_clk_get(dev, clock_name);
+	if (IS_ERR(ccdev->clk))
+		return ERR_CAST(ccdev->clk);
+
+	ret = clock_cooling_get_idr(&ccdev->id);
+	if (ret)
+		return ERR_PTR(-EINVAL);
+
+	snprintf(dev_name, sizeof(dev_name), "thermal-clock-%d", ccdev->id);
+
+	cdev = thermal_cooling_device_register(dev_name, ccdev,
+					       &clock_cooling_ops);
+	if (IS_ERR(cdev)) {
+		release_idr(ccdev->id);
+		return ERR_PTR(-EINVAL);
+	}
+	ccdev->cdev = cdev;
+	ccdev->clk_rate_change_nb.notifier_call = clock_cooling_clock_notifier;
+
+	/* Assuming someone has already filled the opp table for this device */
+	ret = dev_pm_opp_init_cpufreq_table(dev, &ccdev->freq_table);
+	if (ret) {
+		release_idr(ccdev->id);
+		return ERR_PTR(ret);
+	}
+	ccdev->clock_state = 0;
+	ccdev->clock_val = clock_cooling_get_frequency(ccdev, 0);
+
+	clk_notifier_register(ccdev->clk, &ccdev->clk_rate_change_nb);
+
+	return cdev;
+}
+EXPORT_SYMBOL_GPL(clock_cooling_register);
+
+/**
+ * clock_cooling_unregister - function to remove clock cooling device.
+ * @cdev: thermal cooling device pointer.
+ *
+ * This interface function unregisters the "thermal-clock-%x" cooling device.
+ */
+void clock_cooling_unregister(struct thermal_cooling_device *cdev)
+{
+	struct clock_cooling_device *ccdev;
+
+	if (!cdev)
+		return;
+
+	ccdev = cdev->devdata;
+
+	clk_notifier_unregister(ccdev->clk, &ccdev->clk_rate_change_nb);
+	dev_pm_opp_free_cpufreq_table(ccdev->dev, &ccdev->freq_table);
+
+	thermal_cooling_device_unregister(ccdev->cdev);
+	release_idr(ccdev->id);
+}
+EXPORT_SYMBOL_GPL(clock_cooling_unregister);
diff --git a/drivers/thermal/int340x_thermal/acpi_thermal_rel.c b/drivers/thermal/int340x_thermal/acpi_thermal_rel.c
index 0d8db808f0a..e4e61b3fb11 100644
--- a/drivers/thermal/int340x_thermal/acpi_thermal_rel.c
+++ b/drivers/thermal/int340x_thermal/acpi_thermal_rel.c
@@ -131,6 +131,8 @@ int acpi_parse_trt(acpi_handle handle, int *trt_count, struct trt **trtp,
 			pr_warn("Failed to get target ACPI device\n");
 	}
 
+	result = 0;
+
 	*trtp = trts;
 	/* don't count bad entries */
 	*trt_count -= nr_bad_entries;
@@ -317,21 +319,21 @@ static long acpi_thermal_rel_ioctl(struct file *f, unsigned int cmd,
 {
 	int ret = 0;
 	unsigned long length = 0;
-	unsigned long count = 0;
+	int count = 0;
 	char __user *arg = (void __user *)__arg;
 	struct trt *trts;
 	struct art *arts;
 
 	switch (cmd) {
 	case ACPI_THERMAL_GET_TRT_COUNT:
-		ret = acpi_parse_trt(acpi_thermal_rel_handle, (int *)&count,
+		ret = acpi_parse_trt(acpi_thermal_rel_handle, &count,
 				&trts, false);
 		kfree(trts);
 		if (!ret)
 			return put_user(count, (unsigned long __user *)__arg);
 		return ret;
 	case ACPI_THERMAL_GET_TRT_LEN:
-		ret = acpi_parse_trt(acpi_thermal_rel_handle, (int *)&count,
+		ret = acpi_parse_trt(acpi_thermal_rel_handle, &count,
 				&trts, false);
 		kfree(trts);
 		length = count * sizeof(union trt_object);
@@ -341,14 +343,14 @@ static long acpi_thermal_rel_ioctl(struct file *f, unsigned int cmd,
 	case ACPI_THERMAL_GET_TRT:
 		return fill_trt(arg);
 	case ACPI_THERMAL_GET_ART_COUNT:
-		ret = acpi_parse_art(acpi_thermal_rel_handle, (int *)&count,
+		ret = acpi_parse_art(acpi_thermal_rel_handle, &count,
 				&arts, false);
 		kfree(arts);
 		if (!ret)
 			return put_user(count, (unsigned long __user *)__arg);
 		return ret;
 	case ACPI_THERMAL_GET_ART_LEN:
-		ret = acpi_parse_art(acpi_thermal_rel_handle, (int *)&count,
+		ret = acpi_parse_art(acpi_thermal_rel_handle, &count,
 				&arts, false);
 		kfree(arts);
 		length = count * sizeof(union art_object);
diff --git a/drivers/thermal/int340x_thermal/int3400_thermal.c b/drivers/thermal/int340x_thermal/int3400_thermal.c
index edc1cce117b..dcb306ea14a 100644
--- a/drivers/thermal/int340x_thermal/int3400_thermal.c
+++ b/drivers/thermal/int340x_thermal/int3400_thermal.c
@@ -43,6 +43,74 @@ struct int3400_thermal_priv {
 	struct trt *trts;
 	u8 uuid_bitmap;
 	int rel_misc_dev_res;
+	int current_uuid_index;
+};
+
+static ssize_t available_uuids_show(struct device *dev,
+				    struct device_attribute *attr,
+				    char *buf)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct int3400_thermal_priv *priv = platform_get_drvdata(pdev);
+	int i;
+	int length = 0;
+
+	for (i = 0; i < INT3400_THERMAL_MAXIMUM_UUID; i++) {
+		if (priv->uuid_bitmap & (1 << i))
+			if (PAGE_SIZE - length > 0)
+				length += snprintf(&buf[length],
+						   PAGE_SIZE - length,
+						   "%s\n",
+						   int3400_thermal_uuids[i]);
+	}
+
+	return length;
+}
+
+static ssize_t current_uuid_show(struct device *dev,
+				 struct device_attribute *devattr, char *buf)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct int3400_thermal_priv *priv = platform_get_drvdata(pdev);
+
+	if (priv->uuid_bitmap & (1 << priv->current_uuid_index))
+		return sprintf(buf, "%s\n",
+			       int3400_thermal_uuids[priv->current_uuid_index]);
+	else
+		return sprintf(buf, "INVALID\n");
+}
+
+static ssize_t current_uuid_store(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t count)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct int3400_thermal_priv *priv = platform_get_drvdata(pdev);
+	int i;
+
+	for (i = 0; i < INT3400_THERMAL_MAXIMUM_UUID; ++i) {
+		if ((priv->uuid_bitmap & (1 << i)) &&
+		    !(strncmp(buf, int3400_thermal_uuids[i],
+			      sizeof(int3400_thermal_uuids[i]) - 1))) {
+			priv->current_uuid_index = i;
+			return count;
+		}
+	}
+
+	return -EINVAL;
+}
+
+static DEVICE_ATTR(current_uuid, 0644, current_uuid_show, current_uuid_store);
+static DEVICE_ATTR_RO(available_uuids);
+static struct attribute *uuid_attrs[] = {
+	&dev_attr_available_uuids.attr,
+	&dev_attr_current_uuid.attr,
+	NULL
+};
+
+static struct attribute_group uuid_attribute_group = {
+	.attrs = uuid_attrs,
+	.name = "uuids"
 };
 
 static int int3400_thermal_get_uuids(struct int3400_thermal_priv *priv)
@@ -160,9 +228,9 @@ static int int3400_thermal_set_mode(struct thermal_zone_device *thermal,
 
 	if (enable != priv->mode) {
 		priv->mode = enable;
-		/* currently, only PASSIVE COOLING is supported */
 		result = int3400_thermal_run_osc(priv->adev->handle,
-					INT3400_THERMAL_PASSIVE_1, enable);
+						 priv->current_uuid_index,
+						 enable);
 	}
 	return result;
 }
@@ -223,7 +291,14 @@ static int int3400_thermal_probe(struct platform_device *pdev)
 	priv->rel_misc_dev_res = acpi_thermal_rel_misc_device_add(
 							priv->adev->handle);
 
+	result = sysfs_create_group(&pdev->dev.kobj, &uuid_attribute_group);
+	if (result)
+		goto free_zone;
+
 	return 0;
+
+free_zone:
+	thermal_zone_device_unregister(priv->thermal);
 free_trt:
 	kfree(priv->trts);
 free_art:
@@ -240,6 +315,7 @@ static int int3400_thermal_remove(struct platform_device *pdev)
 	if (!priv->rel_misc_dev_res)
 		acpi_thermal_rel_misc_device_remove(priv->adev->handle);
 
+	sysfs_remove_group(&pdev->dev.kobj, &uuid_attribute_group);
 	thermal_zone_device_unregister(priv->thermal);
 	kfree(priv->trts);
 	kfree(priv->arts);
diff --git a/drivers/thermal/int340x_thermal/int3403_thermal.c b/drivers/thermal/int340x_thermal/int3403_thermal.c
index 6e9fb62eb81..1bfa6a69e77 100644
--- a/drivers/thermal/int340x_thermal/int3403_thermal.c
+++ b/drivers/thermal/int340x_thermal/int3403_thermal.c
@@ -293,8 +293,7 @@ static int int3403_sensor_add(struct int3403_priv *priv)
 	return 0;
 
  err_free_obj:
-	if (obj->tzone)
-		thermal_zone_device_unregister(obj->tzone);
+	thermal_zone_device_unregister(obj->tzone);
 	return result;
 }
 
@@ -471,7 +470,6 @@ static struct platform_driver int3403_driver = {
 	.remove = int3403_remove,
 	.driver = {
 		.name = "int3403 thermal",
-		.owner  = THIS_MODULE,
 		.acpi_match_table = int3403_device_ids,
 	},
 };
diff --git a/drivers/thermal/intel_powerclamp.c b/drivers/thermal/intel_powerclamp.c
index 95cb7fc20e1..b46c706e1ca 100644
--- a/drivers/thermal/intel_powerclamp.c
+++ b/drivers/thermal/intel_powerclamp.c
@@ -689,6 +689,7 @@ static const struct x86_cpu_id intel_powerclamp_ids[] = {
 	{ X86_VENDOR_INTEL, 6, 0x3f},
 	{ X86_VENDOR_INTEL, 6, 0x45},
 	{ X86_VENDOR_INTEL, 6, 0x46},
+	{ X86_VENDOR_INTEL, 6, 0x4c},
 	{}
 };
 MODULE_DEVICE_TABLE(x86cpu, intel_powerclamp_ids);
diff --git a/drivers/thermal/intel_soc_dts_thermal.c b/drivers/thermal/intel_soc_dts_thermal.c
index a6a0a18ec0a..5580f5b24eb 100644
--- a/drivers/thermal/intel_soc_dts_thermal.c
+++ b/drivers/thermal/intel_soc_dts_thermal.c
@@ -360,6 +360,9 @@ static void proc_thermal_interrupt(void)
 	u32 sticky_out;
 	int status;
 	u32 ptmc_out;
+	unsigned long flags;
+
+	spin_lock_irqsave(&intr_notify_lock, flags);
 
 	/* Clear APIC interrupt */
 	status = iosf_mbi_read(BT_MBI_UNIT_PMC, BT_MBI_BUNIT_READ,
@@ -378,21 +381,20 @@ static void proc_thermal_interrupt(void)
 		/* reset sticky bit */
 		status = iosf_mbi_write(BT_MBI_UNIT_PMC, BT_MBI_BUNIT_WRITE,
 					SOC_DTS_OFFSET_PTTSS, sticky_out);
+		spin_unlock_irqrestore(&intr_notify_lock, flags);
+
 		for (i = 0; i < SOC_MAX_DTS_SENSORS; ++i) {
 			pr_debug("TZD update for zone %d\n", i);
 			thermal_zone_device_update(soc_dts[i]->tzone);
 		}
-	}
+	} else
+		spin_unlock_irqrestore(&intr_notify_lock, flags);
 
 }
 
 static irqreturn_t soc_irq_thread_fn(int irq, void *dev_data)
 {
-	unsigned long flags;
-
-	spin_lock_irqsave(&intr_notify_lock, flags);
 	proc_thermal_interrupt();
-	spin_unlock_irqrestore(&intr_notify_lock, flags);
 	pr_debug("proc_thermal_interrupt\n");
 
 	return IRQ_HANDLED;
diff --git a/drivers/thermal/of-thermal.c b/drivers/thermal/of-thermal.c
index 62143ba3100..e145b66df44 100644
--- a/drivers/thermal/of-thermal.c
+++ b/drivers/thermal/of-thermal.c
@@ -30,27 +30,13 @@
 #include <linux/err.h>
 #include <linux/export.h>
 #include <linux/string.h>
+#include <linux/thermal.h>
 
 #include "thermal_core.h"
 
 /***   Private data structures to represent thermal device tree data ***/
 
 /**
- * struct __thermal_trip - representation of a point in temperature domain
- * @np: pointer to struct device_node that this trip point was created from
- * @temperature: temperature value in miliCelsius
- * @hysteresis: relative hysteresis in miliCelsius
- * @type: trip point type
- */
-
-struct __thermal_trip {
-	struct device_node *np;
-	unsigned long int temperature;
-	unsigned long int hysteresis;
-	enum thermal_trip_type type;
-};
-
-/**
  * struct __thermal_bind_param - a match between trip and cooling device
  * @cooling_device: a pointer to identify the referred cooling device
  * @trip_id: the trip point index
@@ -77,8 +63,7 @@ struct __thermal_bind_params {
  * @num_tbps: number of thermal bind params
  * @tbps: an array of thermal bind params (0..num_tbps - 1)
  * @sensor_data: sensor private data used while reading temperature and trend
- * @get_temp: sensor callback to read temperature
- * @get_trend: sensor callback to read temperature trend
+ * @ops: set of callbacks to handle the thermal zone based on DT
  */
 
 struct __thermal_zone {
@@ -88,7 +73,7 @@ struct __thermal_zone {
 
 	/* trip data */
 	int ntrips;
-	struct __thermal_trip *trips;
+	struct thermal_trip *trips;
 
 	/* cooling binding data */
 	int num_tbps;
@@ -96,8 +81,7 @@ struct __thermal_zone {
 
 	/* sensor interface */
 	void *sensor_data;
-	int (*get_temp)(void *, long *);
-	int (*get_trend)(void *, long *);
+	const struct thermal_zone_of_device_ops *ops;
 };
 
 /***   DT thermal zone device callbacks   ***/
@@ -107,10 +91,96 @@ static int of_thermal_get_temp(struct thermal_zone_device *tz,
 {
 	struct __thermal_zone *data = tz->devdata;
 
-	if (!data->get_temp)
+	if (!data->ops->get_temp)
 		return -EINVAL;
 
-	return data->get_temp(data->sensor_data, temp);
+	return data->ops->get_temp(data->sensor_data, temp);
+}
+
+/**
+ * of_thermal_get_ntrips - function to export number of available trip
+ *			   points.
+ * @tz: pointer to a thermal zone
+ *
+ * This function is a globally visible wrapper to get number of trip points
+ * stored in the local struct __thermal_zone
+ *
+ * Return: number of available trip points, -ENODEV when data not available
+ */
+int of_thermal_get_ntrips(struct thermal_zone_device *tz)
+{
+	struct __thermal_zone *data = tz->devdata;
+
+	if (!data || IS_ERR(data))
+		return -ENODEV;
+
+	return data->ntrips;
+}
+EXPORT_SYMBOL_GPL(of_thermal_get_ntrips);
+
+/**
+ * of_thermal_is_trip_valid - function to check if trip point is valid
+ *
+ * @tz:	pointer to a thermal zone
+ * @trip:	trip point to evaluate
+ *
+ * This function is responsible for checking if passed trip point is valid
+ *
+ * Return: true if trip point is valid, false otherwise
+ */
+bool of_thermal_is_trip_valid(struct thermal_zone_device *tz, int trip)
+{
+	struct __thermal_zone *data = tz->devdata;
+
+	if (!data || trip >= data->ntrips || trip < 0)
+		return false;
+
+	return true;
+}
+EXPORT_SYMBOL_GPL(of_thermal_is_trip_valid);
+
+/**
+ * of_thermal_get_trip_points - function to get access to a globally exported
+ *				trip points
+ *
+ * @tz:	pointer to a thermal zone
+ *
+ * This function provides a pointer to trip points table
+ *
+ * Return: pointer to trip points table, NULL otherwise
+ */
+const struct thermal_trip * const
+of_thermal_get_trip_points(struct thermal_zone_device *tz)
+{
+	struct __thermal_zone *data = tz->devdata;
+
+	if (!data)
+		return NULL;
+
+	return data->trips;
+}
+EXPORT_SYMBOL_GPL(of_thermal_get_trip_points);
+
+/**
+ * of_thermal_set_emul_temp - function to set emulated temperature
+ *
+ * @tz:	pointer to a thermal zone
+ * @temp:	temperature to set
+ *
+ * This function gives the ability to set emulated value of temperature,
+ * which is handy for debugging
+ *
+ * Return: zero on success, error code otherwise
+ */
+static int of_thermal_set_emul_temp(struct thermal_zone_device *tz,
+				    unsigned long temp)
+{
+	struct __thermal_zone *data = tz->devdata;
+
+	if (!data->ops || !data->ops->set_emul_temp)
+		return -EINVAL;
+
+	return data->ops->set_emul_temp(data->sensor_data, temp);
 }
 
 static int of_thermal_get_trend(struct thermal_zone_device *tz, int trip,
@@ -120,10 +190,10 @@ static int of_thermal_get_trend(struct thermal_zone_device *tz, int trip,
 	long dev_trend;
 	int r;
 
-	if (!data->get_trend)
+	if (!data->ops->get_trend)
 		return -EINVAL;
 
-	r = data->get_trend(data->sensor_data, &dev_trend);
+	r = data->ops->get_trend(data->sensor_data, &dev_trend);
 	if (r)
 		return r;
 
@@ -324,8 +394,7 @@ static struct thermal_zone_device_ops of_thermal_ops = {
 static struct thermal_zone_device *
 thermal_zone_of_add_sensor(struct device_node *zone,
 			   struct device_node *sensor, void *data,
-			   int (*get_temp)(void *, long *),
-			   int (*get_trend)(void *, long *))
+			   const struct thermal_zone_of_device_ops *ops)
 {
 	struct thermal_zone_device *tzd;
 	struct __thermal_zone *tz;
@@ -336,13 +405,16 @@ thermal_zone_of_add_sensor(struct device_node *zone,
 
 	tz = tzd->devdata;
 
+	if (!ops)
+		return ERR_PTR(-EINVAL);
+
 	mutex_lock(&tzd->lock);
-	tz->get_temp = get_temp;
-	tz->get_trend = get_trend;
+	tz->ops = ops;
 	tz->sensor_data = data;
 
 	tzd->ops->get_temp = of_thermal_get_temp;
 	tzd->ops->get_trend = of_thermal_get_trend;
+	tzd->ops->set_emul_temp = of_thermal_set_emul_temp;
 	mutex_unlock(&tzd->lock);
 
 	return tzd;
@@ -356,8 +428,7 @@ thermal_zone_of_add_sensor(struct device_node *zone,
  *             than one sensors
  * @data: a private pointer (owned by the caller) that will be passed
  *        back, when a temperature reading is needed.
- * @get_temp: a pointer to a function that reads the sensor temperature.
- * @get_trend: a pointer to a function that reads the sensor temperature trend.
+ * @ops: struct thermal_zone_of_device_ops *. Must contain at least .get_temp.
  *
  * This function will search the list of thermal zones described in device
  * tree and look for the zone that refer to the sensor device pointed by
@@ -382,9 +453,8 @@ thermal_zone_of_add_sensor(struct device_node *zone,
  * check the return value with help of IS_ERR() helper.
  */
 struct thermal_zone_device *
-thermal_zone_of_sensor_register(struct device *dev, int sensor_id,
-				void *data, int (*get_temp)(void *, long *),
-				int (*get_trend)(void *, long *))
+thermal_zone_of_sensor_register(struct device *dev, int sensor_id, void *data,
+				const struct thermal_zone_of_device_ops *ops)
 {
 	struct device_node *np, *child, *sensor_np;
 	struct thermal_zone_device *tzd = ERR_PTR(-ENODEV);
@@ -426,9 +496,7 @@ thermal_zone_of_sensor_register(struct device *dev, int sensor_id,
 
 		if (sensor_specs.np == sensor_np && id == sensor_id) {
 			tzd = thermal_zone_of_add_sensor(child, sensor_np,
-							 data,
-							 get_temp,
-							 get_trend);
+							 data, ops);
 			of_node_put(sensor_specs.np);
 			of_node_put(child);
 			goto exit;
@@ -475,9 +543,9 @@ void thermal_zone_of_sensor_unregister(struct device *dev,
 	mutex_lock(&tzd->lock);
 	tzd->ops->get_temp = NULL;
 	tzd->ops->get_trend = NULL;
+	tzd->ops->set_emul_temp = NULL;
 
-	tz->get_temp = NULL;
-	tz->get_trend = NULL;
+	tz->ops = NULL;
 	tz->sensor_data = NULL;
 	mutex_unlock(&tzd->lock);
 }
@@ -501,7 +569,7 @@ EXPORT_SYMBOL_GPL(thermal_zone_of_sensor_unregister);
  */
 static int thermal_of_populate_bind_params(struct device_node *np,
 					   struct __thermal_bind_params *__tbp,
-					   struct __thermal_trip *trips,
+					   struct thermal_trip *trips,
 					   int ntrips)
 {
 	struct of_phandle_args cooling_spec;
@@ -604,7 +672,7 @@ static int thermal_of_get_trip_type(struct device_node *np,
  * Return: 0 on success, proper error code otherwise
  */
 static int thermal_of_populate_trip(struct device_node *np,
-				    struct __thermal_trip *trip)
+				    struct thermal_trip *trip)
 {
 	int prop;
 	int ret;
diff --git a/drivers/thermal/rockchip_thermal.c b/drivers/thermal/rockchip_thermal.c
new file mode 100644
index 00000000000..1bcddfc60e9
--- /dev/null
+++ b/drivers/thermal/rockchip_thermal.c
@@ -0,0 +1,693 @@
+/*
+ * Copyright (c) 2014, Fuzhou Rockchip Electronics Co., Ltd
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/platform_device.h>
+#include <linux/reset.h>
+#include <linux/thermal.h>
+
+/**
+ * If the temperature over a period of time High,
+ * the resulting TSHUT gave CRU module,let it reset the entire chip,
+ * or via GPIO give PMIC.
+ */
+enum tshut_mode {
+	TSHUT_MODE_CRU = 0,
+	TSHUT_MODE_GPIO,
+};
+
+/**
+ * the system Temperature Sensors tshut(tshut) polarity
+ * the bit 8 is tshut polarity.
+ * 0: low active, 1: high active
+ */
+enum tshut_polarity {
+	TSHUT_LOW_ACTIVE = 0,
+	TSHUT_HIGH_ACTIVE,
+};
+
+/**
+ * The system has three Temperature Sensors.  channel 0 is reserved,
+ * channel 1 is for CPU, and channel 2 is for GPU.
+ */
+enum sensor_id {
+	SENSOR_CPU = 1,
+	SENSOR_GPU,
+};
+
+struct rockchip_tsadc_chip {
+	/* The hardware-controlled tshut property */
+	long tshut_temp;
+	enum tshut_mode tshut_mode;
+	enum tshut_polarity tshut_polarity;
+
+	/* Chip-wide methods */
+	void (*initialize)(void __iomem *reg, enum tshut_polarity p);
+	void (*irq_ack)(void __iomem *reg);
+	void (*control)(void __iomem *reg, bool on);
+
+	/* Per-sensor methods */
+	int (*get_temp)(int chn, void __iomem *reg, long *temp);
+	void (*set_tshut_temp)(int chn, void __iomem *reg, long temp);
+	void (*set_tshut_mode)(int chn, void __iomem *reg, enum tshut_mode m);
+};
+
+struct rockchip_thermal_sensor {
+	struct rockchip_thermal_data *thermal;
+	struct thermal_zone_device *tzd;
+	enum sensor_id id;
+};
+
+#define NUM_SENSORS	2 /* Ignore unused sensor 0 */
+
+struct rockchip_thermal_data {
+	const struct rockchip_tsadc_chip *chip;
+	struct platform_device *pdev;
+	struct reset_control *reset;
+
+	struct rockchip_thermal_sensor sensors[NUM_SENSORS];
+
+	struct clk *clk;
+	struct clk *pclk;
+
+	void __iomem *regs;
+
+	long tshut_temp;
+	enum tshut_mode tshut_mode;
+	enum tshut_polarity tshut_polarity;
+};
+
+/* TSADC V2 Sensor info define: */
+#define TSADCV2_AUTO_CON			0x04
+#define TSADCV2_INT_EN				0x08
+#define TSADCV2_INT_PD				0x0c
+#define TSADCV2_DATA(chn)			(0x20 + (chn) * 0x04)
+#define TSADCV2_COMP_SHUT(chn)		        (0x40 + (chn) * 0x04)
+#define TSADCV2_HIGHT_INT_DEBOUNCE		0x60
+#define TSADCV2_HIGHT_TSHUT_DEBOUNCE		0x64
+#define TSADCV2_AUTO_PERIOD			0x68
+#define TSADCV2_AUTO_PERIOD_HT			0x6c
+
+#define TSADCV2_AUTO_EN				BIT(0)
+#define TSADCV2_AUTO_DISABLE			~BIT(0)
+#define TSADCV2_AUTO_SRC_EN(chn)		BIT(4 + (chn))
+#define TSADCV2_AUTO_TSHUT_POLARITY_HIGH	BIT(8)
+#define TSADCV2_AUTO_TSHUT_POLARITY_LOW		~BIT(8)
+
+#define TSADCV2_INT_SRC_EN(chn)			BIT(chn)
+#define TSADCV2_SHUT_2GPIO_SRC_EN(chn)		BIT(4 + (chn))
+#define TSADCV2_SHUT_2CRU_SRC_EN(chn)		BIT(8 + (chn))
+
+#define TSADCV2_INT_PD_CLEAR			~BIT(8)
+
+#define TSADCV2_DATA_MASK			0xfff
+#define TSADCV2_HIGHT_INT_DEBOUNCE_COUNT	4
+#define TSADCV2_HIGHT_TSHUT_DEBOUNCE_COUNT	4
+#define TSADCV2_AUTO_PERIOD_TIME		250 /* msec */
+#define TSADCV2_AUTO_PERIOD_HT_TIME		50  /* msec */
+
+struct tsadc_table {
+	unsigned long code;
+	long temp;
+};
+
+static const struct tsadc_table v2_code_table[] = {
+	{TSADCV2_DATA_MASK, -40000},
+	{3800, -40000},
+	{3792, -35000},
+	{3783, -30000},
+	{3774, -25000},
+	{3765, -20000},
+	{3756, -15000},
+	{3747, -10000},
+	{3737, -5000},
+	{3728, 0},
+	{3718, 5000},
+	{3708, 10000},
+	{3698, 15000},
+	{3688, 20000},
+	{3678, 25000},
+	{3667, 30000},
+	{3656, 35000},
+	{3645, 40000},
+	{3634, 45000},
+	{3623, 50000},
+	{3611, 55000},
+	{3600, 60000},
+	{3588, 65000},
+	{3575, 70000},
+	{3563, 75000},
+	{3550, 80000},
+	{3537, 85000},
+	{3524, 90000},
+	{3510, 95000},
+	{3496, 100000},
+	{3482, 105000},
+	{3467, 110000},
+	{3452, 115000},
+	{3437, 120000},
+	{3421, 125000},
+	{0, 125000},
+};
+
+static u32 rk_tsadcv2_temp_to_code(long temp)
+{
+	int high, low, mid;
+
+	low = 0;
+	high = ARRAY_SIZE(v2_code_table) - 1;
+	mid = (high + low) / 2;
+
+	if (temp < v2_code_table[low].temp || temp > v2_code_table[high].temp)
+		return 0;
+
+	while (low <= high) {
+		if (temp == v2_code_table[mid].temp)
+			return v2_code_table[mid].code;
+		else if (temp < v2_code_table[mid].temp)
+			high = mid - 1;
+		else
+			low = mid + 1;
+		mid = (low + high) / 2;
+	}
+
+	return 0;
+}
+
+static long rk_tsadcv2_code_to_temp(u32 code)
+{
+	int high, low, mid;
+
+	low = 0;
+	high = ARRAY_SIZE(v2_code_table) - 1;
+	mid = (high + low) / 2;
+
+	if (code > v2_code_table[low].code || code < v2_code_table[high].code)
+		return 125000; /* No code available, return max temperature */
+
+	while (low <= high) {
+		if (code >= v2_code_table[mid].code && code <
+		    v2_code_table[mid - 1].code)
+			return v2_code_table[mid].temp;
+		else if (code < v2_code_table[mid].code)
+			low = mid + 1;
+		else
+			high = mid - 1;
+		mid = (low + high) / 2;
+	}
+
+	return 125000;
+}
+
+/**
+ * rk_tsadcv2_initialize - initialize TASDC Controller
+ * (1) Set TSADCV2_AUTO_PERIOD, configure the interleave between
+ * every two accessing of TSADC in normal operation.
+ * (2) Set TSADCV2_AUTO_PERIOD_HT, configure the interleave between
+ * every two accessing of TSADC after the temperature is higher
+ * than COM_SHUT or COM_INT.
+ * (3) Set TSADCV2_HIGH_INT_DEBOUNCE and TSADC_HIGHT_TSHUT_DEBOUNCE,
+ * if the temperature is higher than COMP_INT or COMP_SHUT for
+ * "debounce" times, TSADC controller will generate interrupt or TSHUT.
+ */
+static void rk_tsadcv2_initialize(void __iomem *regs,
+				  enum tshut_polarity tshut_polarity)
+{
+	if (tshut_polarity == TSHUT_HIGH_ACTIVE)
+		writel_relaxed(0 | (TSADCV2_AUTO_TSHUT_POLARITY_HIGH),
+			       regs + TSADCV2_AUTO_CON);
+	else
+		writel_relaxed(0 | (TSADCV2_AUTO_TSHUT_POLARITY_LOW),
+			       regs + TSADCV2_AUTO_CON);
+
+	writel_relaxed(TSADCV2_AUTO_PERIOD_TIME, regs + TSADCV2_AUTO_PERIOD);
+	writel_relaxed(TSADCV2_HIGHT_INT_DEBOUNCE_COUNT,
+		       regs + TSADCV2_HIGHT_INT_DEBOUNCE);
+	writel_relaxed(TSADCV2_AUTO_PERIOD_HT_TIME,
+		       regs + TSADCV2_AUTO_PERIOD_HT);
+	writel_relaxed(TSADCV2_HIGHT_TSHUT_DEBOUNCE_COUNT,
+		       regs + TSADCV2_HIGHT_TSHUT_DEBOUNCE);
+}
+
+static void rk_tsadcv2_irq_ack(void __iomem *regs)
+{
+	u32 val;
+
+	val = readl_relaxed(regs + TSADCV2_INT_PD);
+	writel_relaxed(val & TSADCV2_INT_PD_CLEAR, regs + TSADCV2_INT_PD);
+}
+
+static void rk_tsadcv2_control(void __iomem *regs, bool enable)
+{
+	u32 val;
+
+	val = readl_relaxed(regs + TSADCV2_AUTO_CON);
+	if (enable)
+		val |= TSADCV2_AUTO_EN;
+	else
+		val &= ~TSADCV2_AUTO_EN;
+
+	writel_relaxed(val, regs + TSADCV2_AUTO_CON);
+}
+
+static int rk_tsadcv2_get_temp(int chn, void __iomem *regs, long *temp)
+{
+	u32 val;
+
+	/* the A/D value of the channel last conversion need some time */
+	val = readl_relaxed(regs + TSADCV2_DATA(chn));
+	if (val == 0)
+		return -EAGAIN;
+
+	*temp = rk_tsadcv2_code_to_temp(val);
+
+	return 0;
+}
+
+static void rk_tsadcv2_tshut_temp(int chn, void __iomem *regs, long temp)
+{
+	u32 tshut_value, val;
+
+	tshut_value = rk_tsadcv2_temp_to_code(temp);
+	writel_relaxed(tshut_value, regs + TSADCV2_COMP_SHUT(chn));
+
+	/* TSHUT will be valid */
+	val = readl_relaxed(regs + TSADCV2_AUTO_CON);
+	writel_relaxed(val | TSADCV2_AUTO_SRC_EN(chn), regs + TSADCV2_AUTO_CON);
+}
+
+static void rk_tsadcv2_tshut_mode(int chn, void __iomem *regs,
+				  enum tshut_mode mode)
+{
+	u32 val;
+
+	val = readl_relaxed(regs + TSADCV2_INT_EN);
+	if (mode == TSHUT_MODE_GPIO) {
+		val &= ~TSADCV2_SHUT_2CRU_SRC_EN(chn);
+		val |= TSADCV2_SHUT_2GPIO_SRC_EN(chn);
+	} else {
+		val &= ~TSADCV2_SHUT_2GPIO_SRC_EN(chn);
+		val |= TSADCV2_SHUT_2CRU_SRC_EN(chn);
+	}
+
+	writel_relaxed(val, regs + TSADCV2_INT_EN);
+}
+
+static const struct rockchip_tsadc_chip rk3288_tsadc_data = {
+	.tshut_mode = TSHUT_MODE_GPIO, /* default TSHUT via GPIO give PMIC */
+	.tshut_polarity = TSHUT_LOW_ACTIVE, /* default TSHUT LOW ACTIVE */
+	.tshut_temp = 95000,
+
+	.initialize = rk_tsadcv2_initialize,
+	.irq_ack = rk_tsadcv2_irq_ack,
+	.control = rk_tsadcv2_control,
+	.get_temp = rk_tsadcv2_get_temp,
+	.set_tshut_temp = rk_tsadcv2_tshut_temp,
+	.set_tshut_mode = rk_tsadcv2_tshut_mode,
+};
+
+static const struct of_device_id of_rockchip_thermal_match[] = {
+	{
+		.compatible = "rockchip,rk3288-tsadc",
+		.data = (void *)&rk3288_tsadc_data,
+	},
+	{ /* end */ },
+};
+MODULE_DEVICE_TABLE(of, of_rockchip_thermal_match);
+
+static void
+rockchip_thermal_toggle_sensor(struct rockchip_thermal_sensor *sensor, bool on)
+{
+	struct thermal_zone_device *tzd = sensor->tzd;
+
+	tzd->ops->set_mode(tzd,
+		on ? THERMAL_DEVICE_ENABLED : THERMAL_DEVICE_DISABLED);
+}
+
+static irqreturn_t rockchip_thermal_alarm_irq_thread(int irq, void *dev)
+{
+	struct rockchip_thermal_data *thermal = dev;
+	int i;
+
+	dev_dbg(&thermal->pdev->dev, "thermal alarm\n");
+
+	thermal->chip->irq_ack(thermal->regs);
+
+	for (i = 0; i < ARRAY_SIZE(thermal->sensors); i++)
+		thermal_zone_device_update(thermal->sensors[i].tzd);
+
+	return IRQ_HANDLED;
+}
+
+static int rockchip_thermal_get_temp(void *_sensor, long *out_temp)
+{
+	struct rockchip_thermal_sensor *sensor = _sensor;
+	struct rockchip_thermal_data *thermal = sensor->thermal;
+	const struct rockchip_tsadc_chip *tsadc = sensor->thermal->chip;
+	int retval;
+
+	retval = tsadc->get_temp(sensor->id, thermal->regs, out_temp);
+	dev_dbg(&thermal->pdev->dev, "sensor %d - temp: %ld, retval: %d\n",
+		sensor->id, *out_temp, retval);
+
+	return retval;
+}
+
+static const struct thermal_zone_of_device_ops rockchip_of_thermal_ops = {
+	.get_temp = rockchip_thermal_get_temp,
+};
+
+static int rockchip_configure_from_dt(struct device *dev,
+				      struct device_node *np,
+				      struct rockchip_thermal_data *thermal)
+{
+	u32 shut_temp, tshut_mode, tshut_polarity;
+
+	if (of_property_read_u32(np, "rockchip,hw-tshut-temp", &shut_temp)) {
+		dev_warn(dev,
+			 "Missing tshut temp property, using default %ld\n",
+			 thermal->chip->tshut_temp);
+		thermal->tshut_temp = thermal->chip->tshut_temp;
+	} else {
+		thermal->tshut_temp = shut_temp;
+	}
+
+	if (thermal->tshut_temp > INT_MAX) {
+		dev_err(dev, "Invalid tshut temperature specified: %ld\n",
+			thermal->tshut_temp);
+		return -ERANGE;
+	}
+
+	if (of_property_read_u32(np, "rockchip,hw-tshut-mode", &tshut_mode)) {
+		dev_warn(dev,
+			 "Missing tshut mode property, using default (%s)\n",
+			 thermal->chip->tshut_mode == TSHUT_MODE_GPIO ?
+				"gpio" : "cru");
+		thermal->tshut_mode = thermal->chip->tshut_mode;
+	} else {
+		thermal->tshut_mode = tshut_mode;
+	}
+
+	if (thermal->tshut_mode > 1) {
+		dev_err(dev, "Invalid tshut mode specified: %d\n",
+			thermal->tshut_mode);
+		return -EINVAL;
+	}
+
+	if (of_property_read_u32(np, "rockchip,hw-tshut-polarity",
+				 &tshut_polarity)) {
+		dev_warn(dev,
+			 "Missing tshut-polarity property, using default (%s)\n",
+			 thermal->chip->tshut_polarity == TSHUT_LOW_ACTIVE ?
+				"low" : "high");
+		thermal->tshut_polarity = thermal->chip->tshut_polarity;
+	} else {
+		thermal->tshut_polarity = tshut_polarity;
+	}
+
+	if (thermal->tshut_polarity > 1) {
+		dev_err(dev, "Invalid tshut-polarity specified: %d\n",
+			thermal->tshut_polarity);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int
+rockchip_thermal_register_sensor(struct platform_device *pdev,
+				 struct rockchip_thermal_data *thermal,
+				 struct rockchip_thermal_sensor *sensor,
+				 enum sensor_id id)
+{
+	const struct rockchip_tsadc_chip *tsadc = thermal->chip;
+	int error;
+
+	tsadc->set_tshut_mode(id, thermal->regs, thermal->tshut_mode);
+	tsadc->set_tshut_temp(id, thermal->regs, thermal->tshut_temp);
+
+	sensor->thermal = thermal;
+	sensor->id = id;
+	sensor->tzd = thermal_zone_of_sensor_register(&pdev->dev, id, sensor,
+						      &rockchip_of_thermal_ops);
+	if (IS_ERR(sensor->tzd)) {
+		error = PTR_ERR(sensor->tzd);
+		dev_err(&pdev->dev, "failed to register sensor %d: %d\n",
+			id, error);
+		return error;
+	}
+
+	return 0;
+}
+
+/*
+ * Reset TSADC Controller, reset all tsadc registers.
+ */
+static void rockchip_thermal_reset_controller(struct reset_control *reset)
+{
+	reset_control_assert(reset);
+	usleep_range(10, 20);
+	reset_control_deassert(reset);
+}
+
+static int rockchip_thermal_probe(struct platform_device *pdev)
+{
+	struct device_node *np = pdev->dev.of_node;
+	struct rockchip_thermal_data *thermal;
+	const struct of_device_id *match;
+	struct resource *res;
+	int irq;
+	int i;
+	int error;
+
+	match = of_match_node(of_rockchip_thermal_match, np);
+	if (!match)
+		return -ENXIO;
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0) {
+		dev_err(&pdev->dev, "no irq resource?\n");
+		return -EINVAL;
+	}
+
+	thermal = devm_kzalloc(&pdev->dev, sizeof(struct rockchip_thermal_data),
+			       GFP_KERNEL);
+	if (!thermal)
+		return -ENOMEM;
+
+	thermal->pdev = pdev;
+
+	thermal->chip = (const struct rockchip_tsadc_chip *)match->data;
+	if (!thermal->chip)
+		return -EINVAL;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	thermal->regs = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(thermal->regs))
+		return PTR_ERR(thermal->regs);
+
+	thermal->reset = devm_reset_control_get(&pdev->dev, "tsadc-apb");
+	if (IS_ERR(thermal->reset)) {
+		error = PTR_ERR(thermal->reset);
+		dev_err(&pdev->dev, "failed to get tsadc reset: %d\n", error);
+		return error;
+	}
+
+	thermal->clk = devm_clk_get(&pdev->dev, "tsadc");
+	if (IS_ERR(thermal->clk)) {
+		error = PTR_ERR(thermal->clk);
+		dev_err(&pdev->dev, "failed to get tsadc clock: %d\n", error);
+		return error;
+	}
+
+	thermal->pclk = devm_clk_get(&pdev->dev, "apb_pclk");
+	if (IS_ERR(thermal->pclk)) {
+		error = PTR_ERR(thermal->clk);
+		dev_err(&pdev->dev, "failed to get apb_pclk clock: %d\n",
+			error);
+		return error;
+	}
+
+	error = clk_prepare_enable(thermal->clk);
+	if (error) {
+		dev_err(&pdev->dev, "failed to enable converter clock: %d\n",
+			error);
+		return error;
+	}
+
+	error = clk_prepare_enable(thermal->pclk);
+	if (error) {
+		dev_err(&pdev->dev, "failed to enable pclk: %d\n", error);
+		goto err_disable_clk;
+	}
+
+	rockchip_thermal_reset_controller(thermal->reset);
+
+	error = rockchip_configure_from_dt(&pdev->dev, np, thermal);
+	if (error) {
+		dev_err(&pdev->dev, "failed to parse device tree data: %d\n",
+			error);
+		goto err_disable_pclk;
+	}
+
+	thermal->chip->initialize(thermal->regs, thermal->tshut_polarity);
+
+	error = rockchip_thermal_register_sensor(pdev, thermal,
+						 &thermal->sensors[0],
+						 SENSOR_CPU);
+	if (error) {
+		dev_err(&pdev->dev,
+			"failed to register CPU thermal sensor: %d\n", error);
+		goto err_disable_pclk;
+	}
+
+	error = rockchip_thermal_register_sensor(pdev, thermal,
+						 &thermal->sensors[1],
+						 SENSOR_GPU);
+	if (error) {
+		dev_err(&pdev->dev,
+			"failed to register GPU thermal sensor: %d\n", error);
+		goto err_unregister_cpu_sensor;
+	}
+
+	error = devm_request_threaded_irq(&pdev->dev, irq, NULL,
+					  &rockchip_thermal_alarm_irq_thread,
+					  IRQF_ONESHOT,
+					  "rockchip_thermal", thermal);
+	if (error) {
+		dev_err(&pdev->dev,
+			"failed to request tsadc irq: %d\n", error);
+		goto err_unregister_gpu_sensor;
+	}
+
+	thermal->chip->control(thermal->regs, true);
+
+	for (i = 0; i < ARRAY_SIZE(thermal->sensors); i++)
+		rockchip_thermal_toggle_sensor(&thermal->sensors[i], true);
+
+	platform_set_drvdata(pdev, thermal);
+
+	return 0;
+
+err_unregister_gpu_sensor:
+	thermal_zone_of_sensor_unregister(&pdev->dev, thermal->sensors[1].tzd);
+err_unregister_cpu_sensor:
+	thermal_zone_of_sensor_unregister(&pdev->dev, thermal->sensors[0].tzd);
+err_disable_pclk:
+	clk_disable_unprepare(thermal->pclk);
+err_disable_clk:
+	clk_disable_unprepare(thermal->clk);
+
+	return error;
+}
+
+static int rockchip_thermal_remove(struct platform_device *pdev)
+{
+	struct rockchip_thermal_data *thermal = platform_get_drvdata(pdev);
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(thermal->sensors); i++) {
+		struct rockchip_thermal_sensor *sensor = &thermal->sensors[i];
+
+		rockchip_thermal_toggle_sensor(sensor, false);
+		thermal_zone_of_sensor_unregister(&pdev->dev, sensor->tzd);
+	}
+
+	thermal->chip->control(thermal->regs, false);
+
+	clk_disable_unprepare(thermal->pclk);
+	clk_disable_unprepare(thermal->clk);
+
+	return 0;
+}
+
+static int __maybe_unused rockchip_thermal_suspend(struct device *dev)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct rockchip_thermal_data *thermal = platform_get_drvdata(pdev);
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(thermal->sensors); i++)
+		rockchip_thermal_toggle_sensor(&thermal->sensors[i], false);
+
+	thermal->chip->control(thermal->regs, false);
+
+	clk_disable(thermal->pclk);
+	clk_disable(thermal->clk);
+
+	return 0;
+}
+
+static int __maybe_unused rockchip_thermal_resume(struct device *dev)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct rockchip_thermal_data *thermal = platform_get_drvdata(pdev);
+	int i;
+	int error;
+
+	error = clk_enable(thermal->clk);
+	if (error)
+		return error;
+
+	error = clk_enable(thermal->pclk);
+	if (error)
+		return error;
+
+	rockchip_thermal_reset_controller(thermal->reset);
+
+	thermal->chip->initialize(thermal->regs, thermal->tshut_polarity);
+
+	for (i = 0; i < ARRAY_SIZE(thermal->sensors); i++) {
+		enum sensor_id id = thermal->sensors[i].id;
+
+		thermal->chip->set_tshut_mode(id, thermal->regs,
+					      thermal->tshut_mode);
+		thermal->chip->set_tshut_temp(id, thermal->regs,
+					      thermal->tshut_temp);
+	}
+
+	thermal->chip->control(thermal->regs, true);
+
+	for (i = 0; i < ARRAY_SIZE(thermal->sensors); i++)
+		rockchip_thermal_toggle_sensor(&thermal->sensors[i], true);
+
+	return 0;
+}
+
+static SIMPLE_DEV_PM_OPS(rockchip_thermal_pm_ops,
+			 rockchip_thermal_suspend, rockchip_thermal_resume);
+
+static struct platform_driver rockchip_thermal_driver = {
+	.driver = {
+		.name = "rockchip-thermal",
+		.owner = THIS_MODULE,
+		.pm = &rockchip_thermal_pm_ops,
+		.of_match_table = of_rockchip_thermal_match,
+	},
+	.probe = rockchip_thermal_probe,
+	.remove = rockchip_thermal_remove,
+};
+
+module_platform_driver(rockchip_thermal_driver);
+
+MODULE_DESCRIPTION("ROCKCHIP THERMAL Driver");
+MODULE_AUTHOR("Rockchip, Inc.");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("platform:rockchip-thermal");
diff --git a/drivers/thermal/samsung/exynos_thermal_common.h b/drivers/thermal/samsung/exynos_thermal_common.h
index 158f5aa8dc5..cd4471925cd 100644
--- a/drivers/thermal/samsung/exynos_thermal_common.h
+++ b/drivers/thermal/samsung/exynos_thermal_common.h
@@ -27,7 +27,6 @@
 #define SENSOR_NAME_LEN	16
 #define MAX_TRIP_COUNT	8
 #define MAX_COOLING_DEVICE 4
-#define MAX_TRIMINFO_CTRL_REG	2
 
 #define ACTIVE_INTERVAL 500
 #define IDLE_INTERVAL 10000
diff --git a/drivers/thermal/samsung/exynos_tmu.c b/drivers/thermal/samsung/exynos_tmu.c
index 1e7d0736e86..d44d91d681d 100644
--- a/drivers/thermal/samsung/exynos_tmu.c
+++ b/drivers/thermal/samsung/exynos_tmu.c
@@ -33,7 +33,87 @@
 
 #include "exynos_thermal_common.h"
 #include "exynos_tmu.h"
-#include "exynos_tmu_data.h"
+
+/* Exynos generic registers */
+#define EXYNOS_TMU_REG_TRIMINFO		0x0
+#define EXYNOS_TMU_REG_CONTROL		0x20
+#define EXYNOS_TMU_REG_STATUS		0x28
+#define EXYNOS_TMU_REG_CURRENT_TEMP	0x40
+#define EXYNOS_TMU_REG_INTEN		0x70
+#define EXYNOS_TMU_REG_INTSTAT		0x74
+#define EXYNOS_TMU_REG_INTCLEAR		0x78
+
+#define EXYNOS_TMU_TEMP_MASK		0xff
+#define EXYNOS_TMU_REF_VOLTAGE_SHIFT	24
+#define EXYNOS_TMU_REF_VOLTAGE_MASK	0x1f
+#define EXYNOS_TMU_BUF_SLOPE_SEL_MASK	0xf
+#define EXYNOS_TMU_BUF_SLOPE_SEL_SHIFT	8
+#define EXYNOS_TMU_CORE_EN_SHIFT	0
+
+/* Exynos3250 specific registers */
+#define EXYNOS_TMU_TRIMINFO_CON1	0x10
+
+/* Exynos4210 specific registers */
+#define EXYNOS4210_TMU_REG_THRESHOLD_TEMP	0x44
+#define EXYNOS4210_TMU_REG_TRIG_LEVEL0	0x50
+
+/* Exynos5250, Exynos4412, Exynos3250 specific registers */
+#define EXYNOS_TMU_TRIMINFO_CON2	0x14
+#define EXYNOS_THD_TEMP_RISE		0x50
+#define EXYNOS_THD_TEMP_FALL		0x54
+#define EXYNOS_EMUL_CON		0x80
+
+#define EXYNOS_TRIMINFO_RELOAD_ENABLE	1
+#define EXYNOS_TRIMINFO_25_SHIFT	0
+#define EXYNOS_TRIMINFO_85_SHIFT	8
+#define EXYNOS_TMU_TRIP_MODE_SHIFT	13
+#define EXYNOS_TMU_TRIP_MODE_MASK	0x7
+#define EXYNOS_TMU_THERM_TRIP_EN_SHIFT	12
+
+#define EXYNOS_TMU_INTEN_RISE0_SHIFT	0
+#define EXYNOS_TMU_INTEN_RISE1_SHIFT	4
+#define EXYNOS_TMU_INTEN_RISE2_SHIFT	8
+#define EXYNOS_TMU_INTEN_RISE3_SHIFT	12
+#define EXYNOS_TMU_INTEN_FALL0_SHIFT	16
+
+#define EXYNOS_EMUL_TIME	0x57F0
+#define EXYNOS_EMUL_TIME_MASK	0xffff
+#define EXYNOS_EMUL_TIME_SHIFT	16
+#define EXYNOS_EMUL_DATA_SHIFT	8
+#define EXYNOS_EMUL_DATA_MASK	0xFF
+#define EXYNOS_EMUL_ENABLE	0x1
+
+/* Exynos5260 specific */
+#define EXYNOS5260_TMU_REG_INTEN		0xC0
+#define EXYNOS5260_TMU_REG_INTSTAT		0xC4
+#define EXYNOS5260_TMU_REG_INTCLEAR		0xC8
+#define EXYNOS5260_EMUL_CON			0x100
+
+/* Exynos4412 specific */
+#define EXYNOS4412_MUX_ADDR_VALUE          6
+#define EXYNOS4412_MUX_ADDR_SHIFT          20
+
+/*exynos5440 specific registers*/
+#define EXYNOS5440_TMU_S0_7_TRIM		0x000
+#define EXYNOS5440_TMU_S0_7_CTRL		0x020
+#define EXYNOS5440_TMU_S0_7_DEBUG		0x040
+#define EXYNOS5440_TMU_S0_7_TEMP		0x0f0
+#define EXYNOS5440_TMU_S0_7_TH0			0x110
+#define EXYNOS5440_TMU_S0_7_TH1			0x130
+#define EXYNOS5440_TMU_S0_7_TH2			0x150
+#define EXYNOS5440_TMU_S0_7_IRQEN		0x210
+#define EXYNOS5440_TMU_S0_7_IRQ			0x230
+/* exynos5440 common registers */
+#define EXYNOS5440_TMU_IRQ_STATUS		0x000
+#define EXYNOS5440_TMU_PMIN			0x004
+
+#define EXYNOS5440_TMU_INTEN_RISE0_SHIFT	0
+#define EXYNOS5440_TMU_INTEN_RISE1_SHIFT	1
+#define EXYNOS5440_TMU_INTEN_RISE2_SHIFT	2
+#define EXYNOS5440_TMU_INTEN_RISE3_SHIFT	3
+#define EXYNOS5440_TMU_INTEN_FALL0_SHIFT	4
+#define EXYNOS5440_TMU_TH_RISE4_SHIFT		24
+#define EXYNOS5440_EFUSE_SWAP_OFFSET		8
 
 /**
  * struct exynos_tmu_data : A structure to hold the private data of the TMU
@@ -52,6 +132,11 @@
  * @temp_error2: fused value of the second point trim.
  * @regulator: pointer to the TMU regulator structure.
  * @reg_conf: pointer to structure to register with core thermal.
+ * @tmu_initialize: SoC specific TMU initialization method
+ * @tmu_control: SoC specific TMU control method
+ * @tmu_read: SoC specific TMU temperature read method
+ * @tmu_set_emulation: SoC specific TMU emulation setting method
+ * @tmu_clear_irqs: SoC specific TMU interrupts clearing method
  */
 struct exynos_tmu_data {
 	int id;
@@ -66,6 +151,12 @@ struct exynos_tmu_data {
 	u8 temp_error1, temp_error2;
 	struct regulator *regulator;
 	struct thermal_sensor_conf *reg_conf;
+	int (*tmu_initialize)(struct platform_device *pdev);
+	void (*tmu_control)(struct platform_device *pdev, bool on);
+	int (*tmu_read)(struct exynos_tmu_data *data);
+	void (*tmu_set_emulation)(struct exynos_tmu_data *data,
+				  unsigned long temp);
+	void (*tmu_clear_irqs)(struct exynos_tmu_data *data);
 };
 
 /*
@@ -122,83 +213,10 @@ static int code_to_temp(struct exynos_tmu_data *data, u8 temp_code)
 	return temp;
 }
 
-static void exynos_tmu_clear_irqs(struct exynos_tmu_data *data)
-{
-	const struct exynos_tmu_registers *reg = data->pdata->registers;
-	unsigned int val_irq;
-
-	val_irq = readl(data->base + reg->tmu_intstat);
-	/*
-	 * Clear the interrupts.  Please note that the documentation for
-	 * Exynos3250, Exynos4412, Exynos5250 and Exynos5260 incorrectly
-	 * states that INTCLEAR register has a different placing of bits
-	 * responsible for FALL IRQs than INTSTAT register.  Exynos5420
-	 * and Exynos5440 documentation is correct (Exynos4210 doesn't
-	 * support FALL IRQs at all).
-	 */
-	writel(val_irq, data->base + reg->tmu_intclear);
-}
-
-static int exynos_tmu_initialize(struct platform_device *pdev)
+static void sanitize_temp_error(struct exynos_tmu_data *data, u32 trim_info)
 {
-	struct exynos_tmu_data *data = platform_get_drvdata(pdev);
 	struct exynos_tmu_platform_data *pdata = data->pdata;
-	const struct exynos_tmu_registers *reg = pdata->registers;
-	unsigned int status, trim_info = 0, con, ctrl;
-	unsigned int rising_threshold = 0, falling_threshold = 0;
-	int ret = 0, threshold_code, i;
-
-	mutex_lock(&data->lock);
-	clk_enable(data->clk);
-	if (!IS_ERR(data->clk_sec))
-		clk_enable(data->clk_sec);
 
-	if (TMU_SUPPORTS(pdata, READY_STATUS)) {
-		status = readb(data->base + reg->tmu_status);
-		if (!status) {
-			ret = -EBUSY;
-			goto out;
-		}
-	}
-
-	if (TMU_SUPPORTS(pdata, TRIM_RELOAD)) {
-		for (i = 0; i < reg->triminfo_ctrl_count; i++) {
-			if (pdata->triminfo_reload[i]) {
-				ctrl = readl(data->base +
-						reg->triminfo_ctrl[i]);
-				ctrl |= pdata->triminfo_reload[i];
-				writel(ctrl, data->base +
-						reg->triminfo_ctrl[i]);
-			}
-		}
-	}
-
-	/* Save trimming info in order to perform calibration */
-	if (data->soc == SOC_ARCH_EXYNOS5440) {
-		/*
-		 * For exynos5440 soc triminfo value is swapped between TMU0 and
-		 * TMU2, so the below logic is needed.
-		 */
-		switch (data->id) {
-		case 0:
-			trim_info = readl(data->base +
-			EXYNOS5440_EFUSE_SWAP_OFFSET + reg->triminfo_data);
-			break;
-		case 1:
-			trim_info = readl(data->base + reg->triminfo_data);
-			break;
-		case 2:
-			trim_info = readl(data->base -
-			EXYNOS5440_EFUSE_SWAP_OFFSET + reg->triminfo_data);
-		}
-	} else {
-		/* On exynos5420 the triminfo register is in the shared space */
-		if (data->soc == SOC_ARCH_EXYNOS5420_TRIMINFO)
-			trim_info = readl(data->base_second +
-							reg->triminfo_data);
-		else
-			trim_info = readl(data->base + reg->triminfo_data);
-	}
 	data->temp_error1 = trim_info & EXYNOS_TMU_TEMP_MASK;
 	data->temp_error2 = ((trim_info >> EXYNOS_TRIMINFO_85_SHIFT) &
 				EXYNOS_TMU_TEMP_MASK);
@@ -212,69 +230,37 @@ static int exynos_tmu_initialize(struct platform_device *pdev)
 		data->temp_error2 =
 			(pdata->efuse_value >> EXYNOS_TRIMINFO_85_SHIFT) &
 			EXYNOS_TMU_TEMP_MASK;
+}
 
-	rising_threshold = readl(data->base + reg->threshold_th0);
+static u32 get_th_reg(struct exynos_tmu_data *data, u32 threshold, bool falling)
+{
+	struct exynos_tmu_platform_data *pdata = data->pdata;
+	int i;
 
-	if (data->soc == SOC_ARCH_EXYNOS4210) {
-		/* Write temperature code for threshold */
-		threshold_code = temp_to_code(data, pdata->threshold);
-		writeb(threshold_code,
-			data->base + reg->threshold_temp);
-		for (i = 0; i < pdata->non_hw_trigger_levels; i++)
-			writeb(pdata->trigger_levels[i], data->base +
-			reg->threshold_th0 + i * sizeof(reg->threshold_th0));
+	for (i = 0; i < pdata->non_hw_trigger_levels; i++) {
+		u8 temp = pdata->trigger_levels[i];
 
-		exynos_tmu_clear_irqs(data);
-	} else {
-		/* Write temperature code for rising and falling threshold */
-		for (i = 0; i < pdata->non_hw_trigger_levels; i++) {
-			threshold_code = temp_to_code(data,
-						pdata->trigger_levels[i]);
-			rising_threshold &= ~(0xff << 8 * i);
-			rising_threshold |= threshold_code << 8 * i;
-			if (pdata->threshold_falling) {
-				threshold_code = temp_to_code(data,
-						pdata->trigger_levels[i] -
-						pdata->threshold_falling);
-				falling_threshold |= threshold_code << 8 * i;
-			}
-		}
+		if (falling)
+			temp -= pdata->threshold_falling;
+		else
+			threshold &= ~(0xff << 8 * i);
 
-		writel(rising_threshold,
-				data->base + reg->threshold_th0);
-		writel(falling_threshold,
-				data->base + reg->threshold_th1);
-
-		exynos_tmu_clear_irqs(data);
-
-		/* if last threshold limit is also present */
-		i = pdata->max_trigger_level - 1;
-		if (pdata->trigger_levels[i] &&
-				(pdata->trigger_type[i] == HW_TRIP)) {
-			threshold_code = temp_to_code(data,
-						pdata->trigger_levels[i]);
-			if (i == EXYNOS_MAX_TRIGGER_PER_REG - 1) {
-				/* 1-4 level to be assigned in th0 reg */
-				rising_threshold &= ~(0xff << 8 * i);
-				rising_threshold |= threshold_code << 8 * i;
-				writel(rising_threshold,
-					data->base + reg->threshold_th0);
-			} else if (i == EXYNOS_MAX_TRIGGER_PER_REG) {
-				/* 5th level to be assigned in th2 reg */
-				rising_threshold =
-				threshold_code << reg->threshold_th3_l0_shift;
-				writel(rising_threshold,
-					data->base + reg->threshold_th2);
-			}
-			con = readl(data->base + reg->tmu_ctrl);
-			con |= (1 << reg->therm_trip_en_shift);
-			writel(con, data->base + reg->tmu_ctrl);
-		}
+		threshold |= temp_to_code(data, temp) << 8 * i;
 	}
-	/*Clear the PMIN in the common TMU register*/
-	if (reg->tmu_pmin && !data->id)
-		writel(0, data->base_second + reg->tmu_pmin);
-out:
+
+	return threshold;
+}
+
+static int exynos_tmu_initialize(struct platform_device *pdev)
+{
+	struct exynos_tmu_data *data = platform_get_drvdata(pdev);
+	int ret;
+
+	mutex_lock(&data->lock);
+	clk_enable(data->clk);
+	if (!IS_ERR(data->clk_sec))
+		clk_enable(data->clk_sec);
+	ret = data->tmu_initialize(pdev);
 	clk_disable(data->clk);
 	mutex_unlock(&data->lock);
 	if (!IS_ERR(data->clk_sec))
@@ -283,20 +269,13 @@ out:
 	return ret;
 }
 
-static void exynos_tmu_control(struct platform_device *pdev, bool on)
+static u32 get_con_reg(struct exynos_tmu_data *data, u32 con)
 {
-	struct exynos_tmu_data *data = platform_get_drvdata(pdev);
 	struct exynos_tmu_platform_data *pdata = data->pdata;
-	const struct exynos_tmu_registers *reg = pdata->registers;
-	unsigned int con, interrupt_en;
 
-	mutex_lock(&data->lock);
-	clk_enable(data->clk);
-
-	con = readl(data->base + reg->tmu_ctrl);
-
-	if (pdata->test_mux)
-		con |= (pdata->test_mux << reg->test_mux_addr_shift);
+	if (data->soc == SOC_ARCH_EXYNOS4412 ||
+	    data->soc == SOC_ARCH_EXYNOS3250)
+		con |= (EXYNOS4412_MUX_ADDR_VALUE << EXYNOS4412_MUX_ADDR_SHIFT);
 
 	con &= ~(EXYNOS_TMU_REF_VOLTAGE_MASK << EXYNOS_TMU_REF_VOLTAGE_SHIFT);
 	con |= pdata->reference_voltage << EXYNOS_TMU_REF_VOLTAGE_SHIFT;
@@ -305,95 +284,287 @@ static void exynos_tmu_control(struct platform_device *pdev, bool on)
 	con |= (pdata->gain << EXYNOS_TMU_BUF_SLOPE_SEL_SHIFT);
 
 	if (pdata->noise_cancel_mode) {
-		con &= ~(reg->therm_trip_mode_mask <<
-					reg->therm_trip_mode_shift);
-		con |= (pdata->noise_cancel_mode << reg->therm_trip_mode_shift);
+		con &= ~(EXYNOS_TMU_TRIP_MODE_MASK << EXYNOS_TMU_TRIP_MODE_SHIFT);
+		con |= (pdata->noise_cancel_mode << EXYNOS_TMU_TRIP_MODE_SHIFT);
 	}
 
-	if (on) {
-		con |= (1 << EXYNOS_TMU_CORE_EN_SHIFT);
-		interrupt_en =
-			pdata->trigger_enable[3] << reg->inten_rise3_shift |
-			pdata->trigger_enable[2] << reg->inten_rise2_shift |
-			pdata->trigger_enable[1] << reg->inten_rise1_shift |
-			pdata->trigger_enable[0] << reg->inten_rise0_shift;
-		if (TMU_SUPPORTS(pdata, FALLING_TRIP))
-			interrupt_en |=
-				interrupt_en << reg->inten_fall0_shift;
-	} else {
-		con &= ~(1 << EXYNOS_TMU_CORE_EN_SHIFT);
-		interrupt_en = 0; /* Disable all interrupts */
-	}
-	writel(interrupt_en, data->base + reg->tmu_inten);
-	writel(con, data->base + reg->tmu_ctrl);
+	return con;
+}
+
+static void exynos_tmu_control(struct platform_device *pdev, bool on)
+{
+	struct exynos_tmu_data *data = platform_get_drvdata(pdev);
 
+	mutex_lock(&data->lock);
+	clk_enable(data->clk);
+	data->tmu_control(pdev, on);
 	clk_disable(data->clk);
 	mutex_unlock(&data->lock);
 }
 
-static int exynos_tmu_read(struct exynos_tmu_data *data)
+static int exynos4210_tmu_initialize(struct platform_device *pdev)
 {
+	struct exynos_tmu_data *data = platform_get_drvdata(pdev);
 	struct exynos_tmu_platform_data *pdata = data->pdata;
-	const struct exynos_tmu_registers *reg = pdata->registers;
-	u8 temp_code;
-	int temp;
+	unsigned int status;
+	int ret = 0, threshold_code, i;
 
-	mutex_lock(&data->lock);
-	clk_enable(data->clk);
+	status = readb(data->base + EXYNOS_TMU_REG_STATUS);
+	if (!status) {
+		ret = -EBUSY;
+		goto out;
+	}
 
-	temp_code = readb(data->base + reg->tmu_cur_temp);
+	sanitize_temp_error(data, readl(data->base + EXYNOS_TMU_REG_TRIMINFO));
 
-	if (data->soc == SOC_ARCH_EXYNOS4210)
-		/* temp_code should range between 75 and 175 */
-		if (temp_code < 75 || temp_code > 175) {
-			temp = -ENODATA;
-			goto out;
+	/* Write temperature code for threshold */
+	threshold_code = temp_to_code(data, pdata->threshold);
+	writeb(threshold_code, data->base + EXYNOS4210_TMU_REG_THRESHOLD_TEMP);
+
+	for (i = 0; i < pdata->non_hw_trigger_levels; i++)
+		writeb(pdata->trigger_levels[i], data->base +
+		       EXYNOS4210_TMU_REG_TRIG_LEVEL0 + i * 4);
+
+	data->tmu_clear_irqs(data);
+out:
+	return ret;
+}
+
+static int exynos4412_tmu_initialize(struct platform_device *pdev)
+{
+	struct exynos_tmu_data *data = platform_get_drvdata(pdev);
+	struct exynos_tmu_platform_data *pdata = data->pdata;
+	unsigned int status, trim_info, con, ctrl, rising_threshold;
+	int ret = 0, threshold_code, i;
+
+	status = readb(data->base + EXYNOS_TMU_REG_STATUS);
+	if (!status) {
+		ret = -EBUSY;
+		goto out;
+	}
+
+	if (data->soc == SOC_ARCH_EXYNOS3250 ||
+	    data->soc == SOC_ARCH_EXYNOS4412 ||
+	    data->soc == SOC_ARCH_EXYNOS5250) {
+		if (data->soc == SOC_ARCH_EXYNOS3250) {
+			ctrl = readl(data->base + EXYNOS_TMU_TRIMINFO_CON1);
+			ctrl |= EXYNOS_TRIMINFO_RELOAD_ENABLE;
+			writel(ctrl, data->base + EXYNOS_TMU_TRIMINFO_CON1);
 		}
+		ctrl = readl(data->base + EXYNOS_TMU_TRIMINFO_CON2);
+		ctrl |= EXYNOS_TRIMINFO_RELOAD_ENABLE;
+		writel(ctrl, data->base + EXYNOS_TMU_TRIMINFO_CON2);
+	}
 
-	temp = code_to_temp(data, temp_code);
+	/* On exynos5420 the triminfo register is in the shared space */
+	if (data->soc == SOC_ARCH_EXYNOS5420_TRIMINFO)
+		trim_info = readl(data->base_second + EXYNOS_TMU_REG_TRIMINFO);
+	else
+		trim_info = readl(data->base + EXYNOS_TMU_REG_TRIMINFO);
+
+	sanitize_temp_error(data, trim_info);
+
+	/* Write temperature code for rising and falling threshold */
+	rising_threshold = readl(data->base + EXYNOS_THD_TEMP_RISE);
+	rising_threshold = get_th_reg(data, rising_threshold, false);
+	writel(rising_threshold, data->base + EXYNOS_THD_TEMP_RISE);
+	writel(get_th_reg(data, 0, true), data->base + EXYNOS_THD_TEMP_FALL);
+
+	data->tmu_clear_irqs(data);
+
+	/* if last threshold limit is also present */
+	i = pdata->max_trigger_level - 1;
+	if (pdata->trigger_levels[i] && pdata->trigger_type[i] == HW_TRIP) {
+		threshold_code = temp_to_code(data, pdata->trigger_levels[i]);
+		/* 1-4 level to be assigned in th0 reg */
+		rising_threshold &= ~(0xff << 8 * i);
+		rising_threshold |= threshold_code << 8 * i;
+		writel(rising_threshold, data->base + EXYNOS_THD_TEMP_RISE);
+		con = readl(data->base + EXYNOS_TMU_REG_CONTROL);
+		con |= (1 << EXYNOS_TMU_THERM_TRIP_EN_SHIFT);
+		writel(con, data->base + EXYNOS_TMU_REG_CONTROL);
+	}
 out:
-	clk_disable(data->clk);
-	mutex_unlock(&data->lock);
+	return ret;
+}
 
-	return temp;
+static int exynos5440_tmu_initialize(struct platform_device *pdev)
+{
+	struct exynos_tmu_data *data = platform_get_drvdata(pdev);
+	struct exynos_tmu_platform_data *pdata = data->pdata;
+	unsigned int trim_info = 0, con, rising_threshold;
+	int ret = 0, threshold_code, i;
+
+	/*
+	 * For exynos5440 soc triminfo value is swapped between TMU0 and
+	 * TMU2, so the below logic is needed.
+	 */
+	switch (data->id) {
+	case 0:
+		trim_info = readl(data->base + EXYNOS5440_EFUSE_SWAP_OFFSET +
+				 EXYNOS5440_TMU_S0_7_TRIM);
+		break;
+	case 1:
+		trim_info = readl(data->base + EXYNOS5440_TMU_S0_7_TRIM);
+		break;
+	case 2:
+		trim_info = readl(data->base - EXYNOS5440_EFUSE_SWAP_OFFSET +
+				  EXYNOS5440_TMU_S0_7_TRIM);
+	}
+	sanitize_temp_error(data, trim_info);
+
+	/* Write temperature code for rising and falling threshold */
+	rising_threshold = readl(data->base + EXYNOS5440_TMU_S0_7_TH0);
+	rising_threshold = get_th_reg(data, rising_threshold, false);
+	writel(rising_threshold, data->base + EXYNOS5440_TMU_S0_7_TH0);
+	writel(0, data->base + EXYNOS5440_TMU_S0_7_TH1);
+
+	data->tmu_clear_irqs(data);
+
+	/* if last threshold limit is also present */
+	i = pdata->max_trigger_level - 1;
+	if (pdata->trigger_levels[i] && pdata->trigger_type[i] == HW_TRIP) {
+		threshold_code = temp_to_code(data, pdata->trigger_levels[i]);
+		/* 5th level to be assigned in th2 reg */
+		rising_threshold =
+			threshold_code << EXYNOS5440_TMU_TH_RISE4_SHIFT;
+		writel(rising_threshold, data->base + EXYNOS5440_TMU_S0_7_TH2);
+		con = readl(data->base + EXYNOS5440_TMU_S0_7_CTRL);
+		con |= (1 << EXYNOS_TMU_THERM_TRIP_EN_SHIFT);
+		writel(con, data->base + EXYNOS5440_TMU_S0_7_CTRL);
+	}
+	/* Clear the PMIN in the common TMU register */
+	if (!data->id)
+		writel(0, data->base_second + EXYNOS5440_TMU_PMIN);
+	return ret;
 }
 
-#ifdef CONFIG_THERMAL_EMULATION
-static int exynos_tmu_set_emulation(void *drv_data, unsigned long temp)
+static void exynos4210_tmu_control(struct platform_device *pdev, bool on)
 {
-	struct exynos_tmu_data *data = drv_data;
+	struct exynos_tmu_data *data = platform_get_drvdata(pdev);
 	struct exynos_tmu_platform_data *pdata = data->pdata;
-	const struct exynos_tmu_registers *reg = pdata->registers;
-	unsigned int val;
-	int ret = -EINVAL;
+	unsigned int con, interrupt_en;
 
-	if (!TMU_SUPPORTS(pdata, EMULATION))
-		goto out;
+	con = get_con_reg(data, readl(data->base + EXYNOS_TMU_REG_CONTROL));
 
-	if (temp && temp < MCELSIUS)
-		goto out;
+	if (on) {
+		con |= (1 << EXYNOS_TMU_CORE_EN_SHIFT);
+		interrupt_en =
+			pdata->trigger_enable[3] << EXYNOS_TMU_INTEN_RISE3_SHIFT |
+			pdata->trigger_enable[2] << EXYNOS_TMU_INTEN_RISE2_SHIFT |
+			pdata->trigger_enable[1] << EXYNOS_TMU_INTEN_RISE1_SHIFT |
+			pdata->trigger_enable[0] << EXYNOS_TMU_INTEN_RISE0_SHIFT;
+		if (data->soc != SOC_ARCH_EXYNOS4210)
+			interrupt_en |=
+				interrupt_en << EXYNOS_TMU_INTEN_FALL0_SHIFT;
+	} else {
+		con &= ~(1 << EXYNOS_TMU_CORE_EN_SHIFT);
+		interrupt_en = 0; /* Disable all interrupts */
+	}
+	writel(interrupt_en, data->base + EXYNOS_TMU_REG_INTEN);
+	writel(con, data->base + EXYNOS_TMU_REG_CONTROL);
+}
+
+static void exynos5440_tmu_control(struct platform_device *pdev, bool on)
+{
+	struct exynos_tmu_data *data = platform_get_drvdata(pdev);
+	struct exynos_tmu_platform_data *pdata = data->pdata;
+	unsigned int con, interrupt_en;
+
+	con = get_con_reg(data, readl(data->base + EXYNOS5440_TMU_S0_7_CTRL));
+
+	if (on) {
+		con |= (1 << EXYNOS_TMU_CORE_EN_SHIFT);
+		interrupt_en =
+			pdata->trigger_enable[3] << EXYNOS5440_TMU_INTEN_RISE3_SHIFT |
+			pdata->trigger_enable[2] << EXYNOS5440_TMU_INTEN_RISE2_SHIFT |
+			pdata->trigger_enable[1] << EXYNOS5440_TMU_INTEN_RISE1_SHIFT |
+			pdata->trigger_enable[0] << EXYNOS5440_TMU_INTEN_RISE0_SHIFT;
+		interrupt_en |= interrupt_en << EXYNOS5440_TMU_INTEN_FALL0_SHIFT;
+	} else {
+		con &= ~(1 << EXYNOS_TMU_CORE_EN_SHIFT);
+		interrupt_en = 0; /* Disable all interrupts */
+	}
+	writel(interrupt_en, data->base + EXYNOS5440_TMU_S0_7_IRQEN);
+	writel(con, data->base + EXYNOS5440_TMU_S0_7_CTRL);
+}
+
+static int exynos_tmu_read(struct exynos_tmu_data *data)
+{
+	int ret;
 
 	mutex_lock(&data->lock);
 	clk_enable(data->clk);
+	ret = data->tmu_read(data);
+	if (ret >= 0)
+		ret = code_to_temp(data, ret);
+	clk_disable(data->clk);
+	mutex_unlock(&data->lock);
 
-	val = readl(data->base + reg->emul_con);
+	return ret;
+}
 
+#ifdef CONFIG_THERMAL_EMULATION
+static u32 get_emul_con_reg(struct exynos_tmu_data *data, unsigned int val,
+			    unsigned long temp)
+{
 	if (temp) {
 		temp /= MCELSIUS;
 
-		if (TMU_SUPPORTS(pdata, EMUL_TIME)) {
-			val &= ~(EXYNOS_EMUL_TIME_MASK << reg->emul_time_shift);
-			val |= (EXYNOS_EMUL_TIME << reg->emul_time_shift);
+		if (data->soc != SOC_ARCH_EXYNOS5440) {
+			val &= ~(EXYNOS_EMUL_TIME_MASK << EXYNOS_EMUL_TIME_SHIFT);
+			val |= (EXYNOS_EMUL_TIME << EXYNOS_EMUL_TIME_SHIFT);
 		}
-		val &= ~(EXYNOS_EMUL_DATA_MASK << reg->emul_temp_shift);
-		val |= (temp_to_code(data, temp) << reg->emul_temp_shift) |
+		val &= ~(EXYNOS_EMUL_DATA_MASK << EXYNOS_EMUL_DATA_SHIFT);
+		val |= (temp_to_code(data, temp) << EXYNOS_EMUL_DATA_SHIFT) |
 			EXYNOS_EMUL_ENABLE;
 	} else {
 		val &= ~EXYNOS_EMUL_ENABLE;
 	}
 
-	writel(val, data->base + reg->emul_con);
+	return val;
+}
+
+static void exynos4412_tmu_set_emulation(struct exynos_tmu_data *data,
+					 unsigned long temp)
+{
+	unsigned int val;
+	u32 emul_con;
+
+	if (data->soc == SOC_ARCH_EXYNOS5260)
+		emul_con = EXYNOS5260_EMUL_CON;
+	else
+		emul_con = EXYNOS_EMUL_CON;
+
+	val = readl(data->base + emul_con);
+	val = get_emul_con_reg(data, val, temp);
+	writel(val, data->base + emul_con);
+}
+
+static void exynos5440_tmu_set_emulation(struct exynos_tmu_data *data,
+					 unsigned long temp)
+{
+	unsigned int val;
+
+	val = readl(data->base + EXYNOS5440_TMU_S0_7_DEBUG);
+	val = get_emul_con_reg(data, val, temp);
+	writel(val, data->base + EXYNOS5440_TMU_S0_7_DEBUG);
+}
+
+static int exynos_tmu_set_emulation(void *drv_data, unsigned long temp)
+{
+	struct exynos_tmu_data *data = drv_data;
+	int ret = -EINVAL;
+
+	if (data->soc == SOC_ARCH_EXYNOS4210)
+		goto out;
 
+	if (temp && temp < MCELSIUS)
+		goto out;
+
+	mutex_lock(&data->lock);
+	clk_enable(data->clk);
+	data->tmu_set_emulation(data, temp);
 	clk_disable(data->clk);
 	mutex_unlock(&data->lock);
 	return 0;
@@ -401,23 +572,41 @@ out:
 	return ret;
 }
 #else
+#define exynos4412_tmu_set_emulation NULL
+#define exynos5440_tmu_set_emulation NULL
 static int exynos_tmu_set_emulation(void *drv_data,	unsigned long temp)
 	{ return -EINVAL; }
 #endif/*CONFIG_THERMAL_EMULATION*/
 
+static int exynos4210_tmu_read(struct exynos_tmu_data *data)
+{
+	int ret = readb(data->base + EXYNOS_TMU_REG_CURRENT_TEMP);
+
+	/* "temp_code" should range between 75 and 175 */
+	return (ret < 75 || ret > 175) ? -ENODATA : ret;
+}
+
+static int exynos4412_tmu_read(struct exynos_tmu_data *data)
+{
+	return readb(data->base + EXYNOS_TMU_REG_CURRENT_TEMP);
+}
+
+static int exynos5440_tmu_read(struct exynos_tmu_data *data)
+{
+	return readb(data->base + EXYNOS5440_TMU_S0_7_TEMP);
+}
+
 static void exynos_tmu_work(struct work_struct *work)
 {
 	struct exynos_tmu_data *data = container_of(work,
 			struct exynos_tmu_data, irq_work);
-	struct exynos_tmu_platform_data *pdata = data->pdata;
-	const struct exynos_tmu_registers *reg = pdata->registers;
 	unsigned int val_type;
 
 	if (!IS_ERR(data->clk_sec))
 		clk_enable(data->clk_sec);
 	/* Find which sensor generated this interrupt */
-	if (reg->tmu_irqstatus) {
-		val_type = readl(data->base_second + reg->tmu_irqstatus);
+	if (data->soc == SOC_ARCH_EXYNOS5440) {
+		val_type = readl(data->base_second + EXYNOS5440_TMU_IRQ_STATUS);
 		if (!((val_type >> data->id) & 0x1))
 			goto out;
 	}
@@ -429,7 +618,7 @@ static void exynos_tmu_work(struct work_struct *work)
 	clk_enable(data->clk);
 
 	/* TODO: take action based on particular interrupt */
-	exynos_tmu_clear_irqs(data);
+	data->tmu_clear_irqs(data);
 
 	clk_disable(data->clk);
 	mutex_unlock(&data->lock);
@@ -437,6 +626,40 @@ out:
 	enable_irq(data->irq);
 }
 
+static void exynos4210_tmu_clear_irqs(struct exynos_tmu_data *data)
+{
+	unsigned int val_irq;
+	u32 tmu_intstat, tmu_intclear;
+
+	if (data->soc == SOC_ARCH_EXYNOS5260) {
+		tmu_intstat = EXYNOS5260_TMU_REG_INTSTAT;
+		tmu_intclear = EXYNOS5260_TMU_REG_INTCLEAR;
+	} else {
+		tmu_intstat = EXYNOS_TMU_REG_INTSTAT;
+		tmu_intclear = EXYNOS_TMU_REG_INTCLEAR;
+	}
+
+	val_irq = readl(data->base + tmu_intstat);
+	/*
+	 * Clear the interrupts.  Please note that the documentation for
+	 * Exynos3250, Exynos4412, Exynos5250 and Exynos5260 incorrectly
+	 * states that INTCLEAR register has a different placing of bits
+	 * responsible for FALL IRQs than INTSTAT register.  Exynos5420
+	 * and Exynos5440 documentation is correct (Exynos4210 doesn't
+	 * support FALL IRQs at all).
+	 */
+	writel(val_irq, data->base + tmu_intclear);
+}
+
+static void exynos5440_tmu_clear_irqs(struct exynos_tmu_data *data)
+{
+	unsigned int val_irq;
+
+	val_irq = readl(data->base + EXYNOS5440_TMU_S0_7_IRQ);
+	/* clear the interrupts */
+	writel(val_irq, data->base + EXYNOS5440_TMU_S0_7_IRQ);
+}
+
 static irqreturn_t exynos_tmu_irq(int irq, void *id)
 {
 	struct exynos_tmu_data *data = id;
@@ -450,35 +673,35 @@ static irqreturn_t exynos_tmu_irq(int irq, void *id)
 static const struct of_device_id exynos_tmu_match[] = {
 	{
 		.compatible = "samsung,exynos3250-tmu",
-		.data = (void *)EXYNOS3250_TMU_DRV_DATA,
+		.data = &exynos3250_default_tmu_data,
 	},
 	{
 		.compatible = "samsung,exynos4210-tmu",
-		.data = (void *)EXYNOS4210_TMU_DRV_DATA,
+		.data = &exynos4210_default_tmu_data,
 	},
 	{
 		.compatible = "samsung,exynos4412-tmu",
-		.data = (void *)EXYNOS4412_TMU_DRV_DATA,
+		.data = &exynos4412_default_tmu_data,
 	},
 	{
 		.compatible = "samsung,exynos5250-tmu",
-		.data = (void *)EXYNOS5250_TMU_DRV_DATA,
+		.data = &exynos5250_default_tmu_data,
 	},
 	{
 		.compatible = "samsung,exynos5260-tmu",
-		.data = (void *)EXYNOS5260_TMU_DRV_DATA,
+		.data = &exynos5260_default_tmu_data,
 	},
 	{
 		.compatible = "samsung,exynos5420-tmu",
-		.data = (void *)EXYNOS5420_TMU_DRV_DATA,
+		.data = &exynos5420_default_tmu_data,
 	},
 	{
 		.compatible = "samsung,exynos5420-tmu-ext-triminfo",
-		.data = (void *)EXYNOS5420_TMU_DRV_DATA,
+		.data = &exynos5420_default_tmu_data,
 	},
 	{
 		.compatible = "samsung,exynos5440-tmu",
-		.data = (void *)EXYNOS5440_TMU_DRV_DATA,
+		.data = &exynos5440_default_tmu_data,
 	},
 	{},
 };
@@ -553,12 +776,47 @@ static int exynos_map_dt_data(struct platform_device *pdev)
 		dev_err(&pdev->dev, "No platform init data supplied.\n");
 		return -ENODEV;
 	}
+
 	data->pdata = pdata;
+	data->soc = pdata->type;
+
+	switch (data->soc) {
+	case SOC_ARCH_EXYNOS4210:
+		data->tmu_initialize = exynos4210_tmu_initialize;
+		data->tmu_control = exynos4210_tmu_control;
+		data->tmu_read = exynos4210_tmu_read;
+		data->tmu_clear_irqs = exynos4210_tmu_clear_irqs;
+		break;
+	case SOC_ARCH_EXYNOS3250:
+	case SOC_ARCH_EXYNOS4412:
+	case SOC_ARCH_EXYNOS5250:
+	case SOC_ARCH_EXYNOS5260:
+	case SOC_ARCH_EXYNOS5420:
+	case SOC_ARCH_EXYNOS5420_TRIMINFO:
+		data->tmu_initialize = exynos4412_tmu_initialize;
+		data->tmu_control = exynos4210_tmu_control;
+		data->tmu_read = exynos4412_tmu_read;
+		data->tmu_set_emulation = exynos4412_tmu_set_emulation;
+		data->tmu_clear_irqs = exynos4210_tmu_clear_irqs;
+		break;
+	case SOC_ARCH_EXYNOS5440:
+		data->tmu_initialize = exynos5440_tmu_initialize;
+		data->tmu_control = exynos5440_tmu_control;
+		data->tmu_read = exynos5440_tmu_read;
+		data->tmu_set_emulation = exynos5440_tmu_set_emulation;
+		data->tmu_clear_irqs = exynos5440_tmu_clear_irqs;
+		break;
+	default:
+		dev_err(&pdev->dev, "Platform not supported\n");
+		return -EINVAL;
+	}
+
 	/*
 	 * Check if the TMU shares some registers and then try to map the
 	 * memory of common registers.
 	 */
-	if (!TMU_SUPPORTS(pdata, ADDRESS_MULTIPLE))
+	if (data->soc != SOC_ARCH_EXYNOS5420_TRIMINFO &&
+	    data->soc != SOC_ARCH_EXYNOS5440)
 		return 0;
 
 	if (of_address_to_resource(pdev->dev.of_node, 1, &res)) {
@@ -625,20 +883,6 @@ static int exynos_tmu_probe(struct platform_device *pdev)
 		goto err_clk_sec;
 	}
 
-	if (pdata->type == SOC_ARCH_EXYNOS3250 ||
-	    pdata->type == SOC_ARCH_EXYNOS4210 ||
-	    pdata->type == SOC_ARCH_EXYNOS4412 ||
-	    pdata->type == SOC_ARCH_EXYNOS5250 ||
-	    pdata->type == SOC_ARCH_EXYNOS5260 ||
-	    pdata->type == SOC_ARCH_EXYNOS5420_TRIMINFO ||
-	    pdata->type == SOC_ARCH_EXYNOS5440)
-		data->soc = pdata->type;
-	else {
-		ret = -EINVAL;
-		dev_err(&pdev->dev, "Platform not supported\n");
-		goto err_clk;
-	}
-
 	ret = exynos_tmu_initialize(pdev);
 	if (ret) {
 		dev_err(&pdev->dev, "Failed to initialize TMU\n");
diff --git a/drivers/thermal/samsung/exynos_tmu.h b/drivers/thermal/samsung/exynos_tmu.h
index c58c7663a3f..da3009bff6c 100644
--- a/drivers/thermal/samsung/exynos_tmu.h
+++ b/drivers/thermal/samsung/exynos_tmu.h
@@ -40,115 +40,12 @@ enum soc_type {
 	SOC_ARCH_EXYNOS4412,
 	SOC_ARCH_EXYNOS5250,
 	SOC_ARCH_EXYNOS5260,
+	SOC_ARCH_EXYNOS5420,
 	SOC_ARCH_EXYNOS5420_TRIMINFO,
 	SOC_ARCH_EXYNOS5440,
 };
 
 /**
- * EXYNOS TMU supported features.
- * TMU_SUPPORT_EMULATION - This features is used to set user defined
- *			temperature to the TMU controller.
- * TMU_SUPPORT_MULTI_INST - This features denotes that the soc
- *			has many instances of TMU.
- * TMU_SUPPORT_TRIM_RELOAD - This features shows that trimming can
- *			be reloaded.
- * TMU_SUPPORT_FALLING_TRIP - This features shows that interrupt can
- *			be registered for falling trips also.
- * TMU_SUPPORT_READY_STATUS - This feature tells that the TMU current
- *			state(active/idle) can be checked.
- * TMU_SUPPORT_EMUL_TIME - This features allows to set next temp emulation
- *			sample time.
- * TMU_SUPPORT_ADDRESS_MULTIPLE - This feature tells that the different TMU
- *			sensors shares some common registers.
- * TMU_SUPPORT - macro to compare the above features with the supplied.
- */
-#define TMU_SUPPORT_EMULATION			BIT(0)
-#define TMU_SUPPORT_MULTI_INST			BIT(1)
-#define TMU_SUPPORT_TRIM_RELOAD			BIT(2)
-#define TMU_SUPPORT_FALLING_TRIP		BIT(3)
-#define TMU_SUPPORT_READY_STATUS		BIT(4)
-#define TMU_SUPPORT_EMUL_TIME			BIT(5)
-#define TMU_SUPPORT_ADDRESS_MULTIPLE		BIT(6)
-
-#define TMU_SUPPORTS(a, b)	(a->features & TMU_SUPPORT_ ## b)
-
-/**
- * struct exynos_tmu_register - register descriptors to access registers and
- * bitfields. The register validity, offsets and bitfield values may vary
- * slightly across different exynos SOC's.
- * @triminfo_data: register containing 2 pont trimming data
- * @triminfo_ctrl: trim info controller register.
- * @triminfo_ctrl_count: the number of trim info controller register.
- * @tmu_ctrl: TMU main controller register.
- * @test_mux_addr_shift: shift bits of test mux address.
- * @therm_trip_mode_shift: shift bits of tripping mode in tmu_ctrl register.
- * @therm_trip_mode_mask: mask bits of tripping mode in tmu_ctrl register.
- * @therm_trip_en_shift: shift bits of tripping enable in tmu_ctrl register.
- * @tmu_status: register drescribing the TMU status.
- * @tmu_cur_temp: register containing the current temperature of the TMU.
- * @threshold_temp: register containing the base threshold level.
- * @threshold_th0: Register containing first set of rising levels.
- * @threshold_th1: Register containing second set of rising levels.
- * @threshold_th2: Register containing third set of rising levels.
- * @threshold_th3_l0_shift: shift bits of level0 threshold temperature.
- * @tmu_inten: register containing the different threshold interrupt
-	enable bits.
- * @inten_rise0_shift: shift bits of rising 0 interrupt bits.
- * @inten_rise1_shift: shift bits of rising 1 interrupt bits.
- * @inten_rise2_shift: shift bits of rising 2 interrupt bits.
- * @inten_rise3_shift: shift bits of rising 3 interrupt bits.
- * @inten_fall0_shift: shift bits of falling 0 interrupt bits.
- * @tmu_intstat: Register containing the interrupt status values.
- * @tmu_intclear: Register for clearing the raised interrupt status.
- * @emul_con: TMU emulation controller register.
- * @emul_temp_shift: shift bits of emulation temperature.
- * @emul_time_shift: shift bits of emulation time.
- * @tmu_irqstatus: register to find which TMU generated interrupts.
- * @tmu_pmin: register to get/set the Pmin value.
- */
-struct exynos_tmu_registers {
-	u32	triminfo_data;
-
-	u32	triminfo_ctrl[MAX_TRIMINFO_CTRL_REG];
-	u32	triminfo_ctrl_count;
-
-	u32	tmu_ctrl;
-	u32     test_mux_addr_shift;
-	u32	therm_trip_mode_shift;
-	u32	therm_trip_mode_mask;
-	u32	therm_trip_en_shift;
-
-	u32	tmu_status;
-
-	u32	tmu_cur_temp;
-
-	u32	threshold_temp;
-
-	u32	threshold_th0;
-	u32	threshold_th1;
-	u32	threshold_th2;
-	u32	threshold_th3_l0_shift;
-
-	u32	tmu_inten;
-	u32	inten_rise0_shift;
-	u32	inten_rise1_shift;
-	u32	inten_rise2_shift;
-	u32	inten_rise3_shift;
-	u32	inten_fall0_shift;
-
-	u32	tmu_intstat;
-
-	u32	tmu_intclear;
-
-	u32	emul_con;
-	u32	emul_temp_shift;
-	u32	emul_time_shift;
-
-	u32	tmu_irqstatus;
-	u32	tmu_pmin;
-};
-
-/**
  * struct exynos_tmu_platform_data
  * @threshold: basic temperature for generating interrupt
  *	       25 <= threshold <= 125 [unit: degree Celsius]
@@ -192,16 +89,10 @@ struct exynos_tmu_registers {
  * @first_point_trim: temp value of the first point trimming
  * @second_point_trim: temp value of the second point trimming
  * @default_temp_offset: default temperature offset in case of no trimming
- * @test_mux; information if SoC supports test MUX
- * @triminfo_reload: reload value to read TRIMINFO register
  * @cal_type: calibration type for temperature
  * @freq_clip_table: Table representing frequency reduction percentage.
  * @freq_tab_count: Count of the above table as frequency reduction may
  *	applicable to only some of the trigger levels.
- * @registers: Pointer to structure containing all the TMU controller registers
- *	and bitfields shifts and masks.
- * @features: a bitfield value indicating the features supported in SOC like
- *	emulation, multi instance etc
  *
  * This structure is required for configuration of exynos_tmu driver.
  */
@@ -223,15 +114,11 @@ struct exynos_tmu_platform_data {
 	u8 first_point_trim;
 	u8 second_point_trim;
 	u8 default_temp_offset;
-	u8 test_mux;
-	u8 triminfo_reload[MAX_TRIMINFO_CTRL_REG];
 
 	enum calibration_type cal_type;
 	enum soc_type type;
 	struct freq_clip_table freq_tab[4];
 	unsigned int freq_tab_count;
-	const struct exynos_tmu_registers *registers;
-	unsigned int features;
 };
 
 /**
@@ -246,4 +133,12 @@ struct exynos_tmu_init_data {
 	struct exynos_tmu_platform_data tmu_data[];
 };
 
+extern struct exynos_tmu_init_data const exynos3250_default_tmu_data;
+extern struct exynos_tmu_init_data const exynos4210_default_tmu_data;
+extern struct exynos_tmu_init_data const exynos4412_default_tmu_data;
+extern struct exynos_tmu_init_data const exynos5250_default_tmu_data;
+extern struct exynos_tmu_init_data const exynos5260_default_tmu_data;
+extern struct exynos_tmu_init_data const exynos5420_default_tmu_data;
+extern struct exynos_tmu_init_data const exynos5440_default_tmu_data;
+
 #endif /* _EXYNOS_TMU_H */
diff --git a/drivers/thermal/samsung/exynos_tmu_data.c b/drivers/thermal/samsung/exynos_tmu_data.c
index 1724f6cdaef..b23910069f6 100644
--- a/drivers/thermal/samsung/exynos_tmu_data.c
+++ b/drivers/thermal/samsung/exynos_tmu_data.c
@@ -22,24 +22,6 @@
 
 #include "exynos_thermal_common.h"
 #include "exynos_tmu.h"
-#include "exynos_tmu_data.h"
-
-#if defined(CONFIG_CPU_EXYNOS4210)
-static const struct exynos_tmu_registers exynos4210_tmu_registers = {
-	.triminfo_data = EXYNOS_TMU_REG_TRIMINFO,
-	.tmu_ctrl = EXYNOS_TMU_REG_CONTROL,
-	.tmu_status = EXYNOS_TMU_REG_STATUS,
-	.tmu_cur_temp = EXYNOS_TMU_REG_CURRENT_TEMP,
-	.threshold_temp = EXYNOS4210_TMU_REG_THRESHOLD_TEMP,
-	.threshold_th0 = EXYNOS4210_TMU_REG_TRIG_LEVEL0,
-	.tmu_inten = EXYNOS_TMU_REG_INTEN,
-	.inten_rise0_shift = EXYNOS_TMU_INTEN_RISE0_SHIFT,
-	.inten_rise1_shift = EXYNOS_TMU_INTEN_RISE1_SHIFT,
-	.inten_rise2_shift = EXYNOS_TMU_INTEN_RISE2_SHIFT,
-	.inten_rise3_shift = EXYNOS_TMU_INTEN_RISE3_SHIFT,
-	.tmu_intstat = EXYNOS_TMU_REG_INTSTAT,
-	.tmu_intclear = EXYNOS_TMU_REG_INTCLEAR,
-};
 
 struct exynos_tmu_init_data const exynos4210_default_tmu_data = {
 	.tmu_data = {
@@ -75,40 +57,10 @@ struct exynos_tmu_init_data const exynos4210_default_tmu_data = {
 		},
 		.freq_tab_count = 2,
 		.type = SOC_ARCH_EXYNOS4210,
-		.registers = &exynos4210_tmu_registers,
-		.features = TMU_SUPPORT_READY_STATUS,
 		},
 	},
 	.tmu_count = 1,
 };
-#endif
-
-#if defined(CONFIG_SOC_EXYNOS3250)
-static const struct exynos_tmu_registers exynos3250_tmu_registers = {
-	.triminfo_data = EXYNOS_TMU_REG_TRIMINFO,
-	.triminfo_ctrl[0] = EXYNOS_TMU_TRIMINFO_CON1,
-	.triminfo_ctrl[1] = EXYNOS_TMU_TRIMINFO_CON2,
-	.triminfo_ctrl_count = 2,
-	.tmu_ctrl = EXYNOS_TMU_REG_CONTROL,
-	.test_mux_addr_shift = EXYNOS4412_MUX_ADDR_SHIFT,
-	.therm_trip_mode_shift = EXYNOS_TMU_TRIP_MODE_SHIFT,
-	.therm_trip_mode_mask = EXYNOS_TMU_TRIP_MODE_MASK,
-	.therm_trip_en_shift = EXYNOS_TMU_THERM_TRIP_EN_SHIFT,
-	.tmu_status = EXYNOS_TMU_REG_STATUS,
-	.tmu_cur_temp = EXYNOS_TMU_REG_CURRENT_TEMP,
-	.threshold_th0 = EXYNOS_THD_TEMP_RISE,
-	.threshold_th1 = EXYNOS_THD_TEMP_FALL,
-	.tmu_inten = EXYNOS_TMU_REG_INTEN,
-	.inten_rise0_shift = EXYNOS_TMU_INTEN_RISE0_SHIFT,
-	.inten_rise1_shift = EXYNOS_TMU_INTEN_RISE1_SHIFT,
-	.inten_rise2_shift = EXYNOS_TMU_INTEN_RISE2_SHIFT,
-	.inten_fall0_shift = EXYNOS_TMU_INTEN_FALL0_SHIFT,
-	.tmu_intstat = EXYNOS_TMU_REG_INTSTAT,
-	.tmu_intclear = EXYNOS_TMU_REG_INTCLEAR,
-	.emul_con = EXYNOS_EMUL_CON,
-	.emul_temp_shift = EXYNOS_EMUL_DATA_SHIFT,
-	.emul_time_shift = EXYNOS_EMUL_TIME_SHIFT,
-};
 
 #define EXYNOS3250_TMU_DATA \
 	.threshold_falling = 10, \
@@ -144,54 +96,17 @@ static const struct exynos_tmu_registers exynos3250_tmu_registers = {
 		.freq_clip_max = 400 * 1000, \
 		.temp_level = 95, \
 	}, \
-	.freq_tab_count = 2, \
-	.triminfo_reload[0] = EXYNOS_TRIMINFO_RELOAD_ENABLE, \
-	.triminfo_reload[1] = EXYNOS_TRIMINFO_RELOAD_ENABLE, \
-	.registers = &exynos3250_tmu_registers, \
-	.features = (TMU_SUPPORT_EMULATION | TMU_SUPPORT_TRIM_RELOAD | \
-			TMU_SUPPORT_FALLING_TRIP | TMU_SUPPORT_READY_STATUS | \
-			TMU_SUPPORT_EMUL_TIME)
-#endif
+	.freq_tab_count = 2
 
-#if defined(CONFIG_SOC_EXYNOS3250)
 struct exynos_tmu_init_data const exynos3250_default_tmu_data = {
 	.tmu_data = {
 		{
 			EXYNOS3250_TMU_DATA,
 			.type = SOC_ARCH_EXYNOS3250,
-			.test_mux = EXYNOS4412_MUX_ADDR_VALUE,
 		},
 	},
 	.tmu_count = 1,
 };
-#endif
-
-#if defined(CONFIG_SOC_EXYNOS4412) || defined(CONFIG_SOC_EXYNOS5250)
-static const struct exynos_tmu_registers exynos4412_tmu_registers = {
-	.triminfo_data = EXYNOS_TMU_REG_TRIMINFO,
-	.triminfo_ctrl[0] = EXYNOS_TMU_TRIMINFO_CON2,
-	.triminfo_ctrl_count = 1,
-	.tmu_ctrl = EXYNOS_TMU_REG_CONTROL,
-	.test_mux_addr_shift = EXYNOS4412_MUX_ADDR_SHIFT,
-	.therm_trip_mode_shift = EXYNOS_TMU_TRIP_MODE_SHIFT,
-	.therm_trip_mode_mask = EXYNOS_TMU_TRIP_MODE_MASK,
-	.therm_trip_en_shift = EXYNOS_TMU_THERM_TRIP_EN_SHIFT,
-	.tmu_status = EXYNOS_TMU_REG_STATUS,
-	.tmu_cur_temp = EXYNOS_TMU_REG_CURRENT_TEMP,
-	.threshold_th0 = EXYNOS_THD_TEMP_RISE,
-	.threshold_th1 = EXYNOS_THD_TEMP_FALL,
-	.tmu_inten = EXYNOS_TMU_REG_INTEN,
-	.inten_rise0_shift = EXYNOS_TMU_INTEN_RISE0_SHIFT,
-	.inten_rise1_shift = EXYNOS_TMU_INTEN_RISE1_SHIFT,
-	.inten_rise2_shift = EXYNOS_TMU_INTEN_RISE2_SHIFT,
-	.inten_rise3_shift = EXYNOS_TMU_INTEN_RISE3_SHIFT,
-	.inten_fall0_shift = EXYNOS_TMU_INTEN_FALL0_SHIFT,
-	.tmu_intstat = EXYNOS_TMU_REG_INTSTAT,
-	.tmu_intclear = EXYNOS_TMU_REG_INTCLEAR,
-	.emul_con = EXYNOS_EMUL_CON,
-	.emul_temp_shift = EXYNOS_EMUL_DATA_SHIFT,
-	.emul_time_shift = EXYNOS_EMUL_TIME_SHIFT,
-};
 
 #define EXYNOS4412_TMU_DATA \
 	.threshold_falling = 10, \
@@ -227,28 +142,18 @@ static const struct exynos_tmu_registers exynos4412_tmu_registers = {
 		.freq_clip_max = 400 * 1000, \
 		.temp_level = 95, \
 	}, \
-	.freq_tab_count = 2, \
-	.triminfo_reload[0] = EXYNOS_TRIMINFO_RELOAD_ENABLE, \
-	.registers = &exynos4412_tmu_registers, \
-	.features = (TMU_SUPPORT_EMULATION | TMU_SUPPORT_TRIM_RELOAD | \
-			TMU_SUPPORT_FALLING_TRIP | TMU_SUPPORT_READY_STATUS | \
-			TMU_SUPPORT_EMUL_TIME)
-#endif
+	.freq_tab_count = 2
 
-#if defined(CONFIG_SOC_EXYNOS4412)
 struct exynos_tmu_init_data const exynos4412_default_tmu_data = {
 	.tmu_data = {
 		{
 			EXYNOS4412_TMU_DATA,
 			.type = SOC_ARCH_EXYNOS4412,
-			.test_mux = EXYNOS4412_MUX_ADDR_VALUE,
 		},
 	},
 	.tmu_count = 1,
 };
-#endif
 
-#if defined(CONFIG_SOC_EXYNOS5250)
 struct exynos_tmu_init_data const exynos5250_default_tmu_data = {
 	.tmu_data = {
 		{
@@ -258,31 +163,6 @@ struct exynos_tmu_init_data const exynos5250_default_tmu_data = {
 	},
 	.tmu_count = 1,
 };
-#endif
-
-#if defined(CONFIG_SOC_EXYNOS5260)
-static const struct exynos_tmu_registers exynos5260_tmu_registers = {
-	.triminfo_data = EXYNOS_TMU_REG_TRIMINFO,
-	.tmu_ctrl = EXYNOS_TMU_REG_CONTROL,
-	.therm_trip_mode_shift = EXYNOS_TMU_TRIP_MODE_SHIFT,
-	.therm_trip_mode_mask = EXYNOS_TMU_TRIP_MODE_MASK,
-	.therm_trip_en_shift = EXYNOS_TMU_THERM_TRIP_EN_SHIFT,
-	.tmu_status = EXYNOS_TMU_REG_STATUS,
-	.tmu_cur_temp = EXYNOS_TMU_REG_CURRENT_TEMP,
-	.threshold_th0 = EXYNOS_THD_TEMP_RISE,
-	.threshold_th1 = EXYNOS_THD_TEMP_FALL,
-	.tmu_inten = EXYNOS5260_TMU_REG_INTEN,
-	.inten_rise0_shift = EXYNOS_TMU_INTEN_RISE0_SHIFT,
-	.inten_rise1_shift = EXYNOS_TMU_INTEN_RISE1_SHIFT,
-	.inten_rise2_shift = EXYNOS_TMU_INTEN_RISE2_SHIFT,
-	.inten_rise3_shift = EXYNOS_TMU_INTEN_RISE3_SHIFT,
-	.inten_fall0_shift = EXYNOS_TMU_INTEN_FALL0_SHIFT,
-	.tmu_intstat = EXYNOS5260_TMU_REG_INTSTAT,
-	.tmu_intclear = EXYNOS5260_TMU_REG_INTCLEAR,
-	.emul_con = EXYNOS5260_EMUL_CON,
-	.emul_temp_shift = EXYNOS_EMUL_DATA_SHIFT,
-	.emul_time_shift = EXYNOS_EMUL_TIME_SHIFT,
-};
 
 #define __EXYNOS5260_TMU_DATA	\
 	.threshold_falling = 10, \
@@ -319,13 +199,10 @@ static const struct exynos_tmu_registers exynos5260_tmu_registers = {
 		.temp_level = 103, \
 	}, \
 	.freq_tab_count = 2, \
-	.registers = &exynos5260_tmu_registers, \
 
 #define EXYNOS5260_TMU_DATA \
 	__EXYNOS5260_TMU_DATA \
-	.type = SOC_ARCH_EXYNOS5260, \
-	.features = (TMU_SUPPORT_EMULATION | TMU_SUPPORT_FALLING_TRIP | \
-			TMU_SUPPORT_READY_STATUS | TMU_SUPPORT_EMUL_TIME)
+	.type = SOC_ARCH_EXYNOS5260
 
 struct exynos_tmu_init_data const exynos5260_default_tmu_data = {
 	.tmu_data = {
@@ -337,82 +214,14 @@ struct exynos_tmu_init_data const exynos5260_default_tmu_data = {
 	},
 	.tmu_count = 5,
 };
-#endif
-
-#if defined(CONFIG_SOC_EXYNOS5420)
-static const struct exynos_tmu_registers exynos5420_tmu_registers = {
-	.triminfo_data = EXYNOS_TMU_REG_TRIMINFO,
-	.tmu_ctrl = EXYNOS_TMU_REG_CONTROL,
-	.therm_trip_mode_shift = EXYNOS_TMU_TRIP_MODE_SHIFT,
-	.therm_trip_mode_mask = EXYNOS_TMU_TRIP_MODE_MASK,
-	.therm_trip_en_shift = EXYNOS_TMU_THERM_TRIP_EN_SHIFT,
-	.tmu_status = EXYNOS_TMU_REG_STATUS,
-	.tmu_cur_temp = EXYNOS_TMU_REG_CURRENT_TEMP,
-	.threshold_th0 = EXYNOS_THD_TEMP_RISE,
-	.threshold_th1 = EXYNOS_THD_TEMP_FALL,
-	.tmu_inten = EXYNOS_TMU_REG_INTEN,
-	.inten_rise0_shift = EXYNOS_TMU_INTEN_RISE0_SHIFT,
-	.inten_rise1_shift = EXYNOS_TMU_INTEN_RISE1_SHIFT,
-	.inten_rise2_shift = EXYNOS_TMU_INTEN_RISE2_SHIFT,
-	/* INTEN_RISE3 Not availble in exynos5420 */
-	.inten_rise3_shift = EXYNOS_TMU_INTEN_RISE3_SHIFT,
-	.inten_fall0_shift = EXYNOS_TMU_INTEN_FALL0_SHIFT,
-	.tmu_intstat = EXYNOS_TMU_REG_INTSTAT,
-	.tmu_intclear = EXYNOS_TMU_REG_INTCLEAR,
-	.emul_con = EXYNOS_EMUL_CON,
-	.emul_temp_shift = EXYNOS_EMUL_DATA_SHIFT,
-	.emul_time_shift = EXYNOS_EMUL_TIME_SHIFT,
-};
-
-#define __EXYNOS5420_TMU_DATA	\
-	.threshold_falling = 10, \
-	.trigger_levels[0] = 85, \
-	.trigger_levels[1] = 103, \
-	.trigger_levels[2] = 110, \
-	.trigger_levels[3] = 120, \
-	.trigger_enable[0] = true, \
-	.trigger_enable[1] = true, \
-	.trigger_enable[2] = true, \
-	.trigger_enable[3] = false, \
-	.trigger_type[0] = THROTTLE_ACTIVE, \
-	.trigger_type[1] = THROTTLE_ACTIVE, \
-	.trigger_type[2] = SW_TRIP, \
-	.trigger_type[3] = HW_TRIP, \
-	.max_trigger_level = 4, \
-	.non_hw_trigger_levels = 3, \
-	.gain = 8, \
-	.reference_voltage = 16, \
-	.noise_cancel_mode = 4, \
-	.cal_type = TYPE_ONE_POINT_TRIMMING, \
-	.efuse_value = 55, \
-	.min_efuse_value = 40, \
-	.max_efuse_value = 100, \
-	.first_point_trim = 25, \
-	.second_point_trim = 85, \
-	.default_temp_offset = 50, \
-	.freq_tab[0] = { \
-		.freq_clip_max = 800 * 1000, \
-		.temp_level = 85, \
-	}, \
-	.freq_tab[1] = { \
-		.freq_clip_max = 200 * 1000, \
-		.temp_level = 103, \
-	}, \
-	.freq_tab_count = 2, \
-	.registers = &exynos5420_tmu_registers, \
 
 #define EXYNOS5420_TMU_DATA \
-	__EXYNOS5420_TMU_DATA \
-	.type = SOC_ARCH_EXYNOS5250, \
-	.features = (TMU_SUPPORT_EMULATION | TMU_SUPPORT_FALLING_TRIP | \
-			TMU_SUPPORT_READY_STATUS | TMU_SUPPORT_EMUL_TIME)
+	__EXYNOS5260_TMU_DATA \
+	.type = SOC_ARCH_EXYNOS5420
 
 #define EXYNOS5420_TMU_DATA_SHARED \
-	__EXYNOS5420_TMU_DATA \
-	.type = SOC_ARCH_EXYNOS5420_TRIMINFO, \
-	.features = (TMU_SUPPORT_EMULATION | TMU_SUPPORT_FALLING_TRIP | \
-			TMU_SUPPORT_READY_STATUS | TMU_SUPPORT_EMUL_TIME | \
-			TMU_SUPPORT_ADDRESS_MULTIPLE)
+	__EXYNOS5260_TMU_DATA \
+	.type = SOC_ARCH_EXYNOS5420_TRIMINFO
 
 struct exynos_tmu_init_data const exynos5420_default_tmu_data = {
 	.tmu_data = {
@@ -424,34 +233,6 @@ struct exynos_tmu_init_data const exynos5420_default_tmu_data = {
 	},
 	.tmu_count = 5,
 };
-#endif
-
-#if defined(CONFIG_SOC_EXYNOS5440)
-static const struct exynos_tmu_registers exynos5440_tmu_registers = {
-	.triminfo_data = EXYNOS5440_TMU_S0_7_TRIM,
-	.tmu_ctrl = EXYNOS5440_TMU_S0_7_CTRL,
-	.therm_trip_mode_shift = EXYNOS_TMU_TRIP_MODE_SHIFT,
-	.therm_trip_mode_mask = EXYNOS_TMU_TRIP_MODE_MASK,
-	.therm_trip_en_shift = EXYNOS_TMU_THERM_TRIP_EN_SHIFT,
-	.tmu_status = EXYNOS5440_TMU_S0_7_STATUS,
-	.tmu_cur_temp = EXYNOS5440_TMU_S0_7_TEMP,
-	.threshold_th0 = EXYNOS5440_TMU_S0_7_TH0,
-	.threshold_th1 = EXYNOS5440_TMU_S0_7_TH1,
-	.threshold_th2 = EXYNOS5440_TMU_S0_7_TH2,
-	.threshold_th3_l0_shift = EXYNOS5440_TMU_TH_RISE4_SHIFT,
-	.tmu_inten = EXYNOS5440_TMU_S0_7_IRQEN,
-	.inten_rise0_shift = EXYNOS5440_TMU_INTEN_RISE0_SHIFT,
-	.inten_rise1_shift = EXYNOS5440_TMU_INTEN_RISE1_SHIFT,
-	.inten_rise2_shift = EXYNOS5440_TMU_INTEN_RISE2_SHIFT,
-	.inten_rise3_shift = EXYNOS5440_TMU_INTEN_RISE3_SHIFT,
-	.inten_fall0_shift = EXYNOS5440_TMU_INTEN_FALL0_SHIFT,
-	.tmu_intstat = EXYNOS5440_TMU_S0_7_IRQ,
-	.tmu_intclear = EXYNOS5440_TMU_S0_7_IRQ,
-	.tmu_irqstatus = EXYNOS5440_TMU_IRQ_STATUS,
-	.emul_con = EXYNOS5440_TMU_S0_7_DEBUG,
-	.emul_temp_shift = EXYNOS_EMUL_DATA_SHIFT,
-	.tmu_pmin = EXYNOS5440_TMU_PMIN,
-};
 
 #define EXYNOS5440_TMU_DATA \
 	.trigger_levels[0] = 100, \
@@ -471,10 +252,7 @@ static const struct exynos_tmu_registers exynos5440_tmu_registers = {
 	.first_point_trim = 25, \
 	.second_point_trim = 70, \
 	.default_temp_offset = 25, \
-	.type = SOC_ARCH_EXYNOS5440, \
-	.registers = &exynos5440_tmu_registers, \
-	.features = (TMU_SUPPORT_EMULATION | TMU_SUPPORT_FALLING_TRIP | \
-			TMU_SUPPORT_MULTI_INST | TMU_SUPPORT_ADDRESS_MULTIPLE),
+	.type = SOC_ARCH_EXYNOS5440
 
 struct exynos_tmu_init_data const exynos5440_default_tmu_data = {
 	.tmu_data = {
@@ -484,4 +262,3 @@ struct exynos_tmu_init_data const exynos5440_default_tmu_data = {
 	},
 	.tmu_count = 3,
 };
-#endif
diff --git a/drivers/thermal/samsung/exynos_tmu_data.h b/drivers/thermal/samsung/exynos_tmu_data.h
deleted file mode 100644
index 63de598c9c2..00000000000
--- a/drivers/thermal/samsung/exynos_tmu_data.h
+++ /dev/null
@@ -1,159 +0,0 @@
-/*
- * exynos_tmu_data.h - Samsung EXYNOS tmu data header file
- *
- *  Copyright (C) 2013 Samsung Electronics
- *  Amit Daniel Kachhap <amit.daniel@samsung.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- *
- */
-
-#ifndef _EXYNOS_TMU_DATA_H
-#define _EXYNOS_TMU_DATA_H
-
-/* Exynos generic registers */
-#define EXYNOS_TMU_REG_TRIMINFO		0x0
-#define EXYNOS_TMU_REG_CONTROL		0x20
-#define EXYNOS_TMU_REG_STATUS		0x28
-#define EXYNOS_TMU_REG_CURRENT_TEMP	0x40
-#define EXYNOS_TMU_REG_INTEN		0x70
-#define EXYNOS_TMU_REG_INTSTAT		0x74
-#define EXYNOS_TMU_REG_INTCLEAR		0x78
-
-#define EXYNOS_TMU_TEMP_MASK		0xff
-#define EXYNOS_TMU_REF_VOLTAGE_SHIFT	24
-#define EXYNOS_TMU_REF_VOLTAGE_MASK	0x1f
-#define EXYNOS_TMU_BUF_SLOPE_SEL_MASK	0xf
-#define EXYNOS_TMU_BUF_SLOPE_SEL_SHIFT	8
-#define EXYNOS_TMU_CORE_EN_SHIFT	0
-
-/* Exynos3250 specific registers */
-#define EXYNOS_TMU_TRIMINFO_CON1	0x10
-
-/* Exynos4210 specific registers */
-#define EXYNOS4210_TMU_REG_THRESHOLD_TEMP	0x44
-#define EXYNOS4210_TMU_REG_TRIG_LEVEL0	0x50
-
-/* Exynos5250, Exynos4412, Exynos3250 specific registers */
-#define EXYNOS_TMU_TRIMINFO_CON2	0x14
-#define EXYNOS_THD_TEMP_RISE		0x50
-#define EXYNOS_THD_TEMP_FALL		0x54
-#define EXYNOS_EMUL_CON		0x80
-
-#define EXYNOS_TRIMINFO_RELOAD_ENABLE	1
-#define EXYNOS_TRIMINFO_25_SHIFT	0
-#define EXYNOS_TRIMINFO_85_SHIFT	8
-#define EXYNOS_TMU_TRIP_MODE_SHIFT	13
-#define EXYNOS_TMU_TRIP_MODE_MASK	0x7
-#define EXYNOS_TMU_THERM_TRIP_EN_SHIFT	12
-
-#define EXYNOS_TMU_INTEN_RISE0_SHIFT	0
-#define EXYNOS_TMU_INTEN_RISE1_SHIFT	4
-#define EXYNOS_TMU_INTEN_RISE2_SHIFT	8
-#define EXYNOS_TMU_INTEN_RISE3_SHIFT	12
-#define EXYNOS_TMU_INTEN_FALL0_SHIFT	16
-
-#define EXYNOS_EMUL_TIME	0x57F0
-#define EXYNOS_EMUL_TIME_MASK	0xffff
-#define EXYNOS_EMUL_TIME_SHIFT	16
-#define EXYNOS_EMUL_DATA_SHIFT	8
-#define EXYNOS_EMUL_DATA_MASK	0xFF
-#define EXYNOS_EMUL_ENABLE	0x1
-
-#define EXYNOS_MAX_TRIGGER_PER_REG	4
-
-/* Exynos5260 specific */
-#define EXYNOS5260_TMU_REG_INTEN		0xC0
-#define EXYNOS5260_TMU_REG_INTSTAT		0xC4
-#define EXYNOS5260_TMU_REG_INTCLEAR		0xC8
-#define EXYNOS5260_EMUL_CON			0x100
-
-/* Exynos4412 specific */
-#define EXYNOS4412_MUX_ADDR_VALUE          6
-#define EXYNOS4412_MUX_ADDR_SHIFT          20
-
-/*exynos5440 specific registers*/
-#define EXYNOS5440_TMU_S0_7_TRIM		0x000
-#define EXYNOS5440_TMU_S0_7_CTRL		0x020
-#define EXYNOS5440_TMU_S0_7_DEBUG		0x040
-#define EXYNOS5440_TMU_S0_7_STATUS		0x060
-#define EXYNOS5440_TMU_S0_7_TEMP		0x0f0
-#define EXYNOS5440_TMU_S0_7_TH0			0x110
-#define EXYNOS5440_TMU_S0_7_TH1			0x130
-#define EXYNOS5440_TMU_S0_7_TH2			0x150
-#define EXYNOS5440_TMU_S0_7_IRQEN		0x210
-#define EXYNOS5440_TMU_S0_7_IRQ			0x230
-/* exynos5440 common registers */
-#define EXYNOS5440_TMU_IRQ_STATUS		0x000
-#define EXYNOS5440_TMU_PMIN			0x004
-
-#define EXYNOS5440_TMU_INTEN_RISE0_SHIFT	0
-#define EXYNOS5440_TMU_INTEN_RISE1_SHIFT	1
-#define EXYNOS5440_TMU_INTEN_RISE2_SHIFT	2
-#define EXYNOS5440_TMU_INTEN_RISE3_SHIFT	3
-#define EXYNOS5440_TMU_INTEN_FALL0_SHIFT	4
-#define EXYNOS5440_TMU_TH_RISE4_SHIFT		24
-#define EXYNOS5440_EFUSE_SWAP_OFFSET		8
-
-#if defined(CONFIG_SOC_EXYNOS3250)
-extern struct exynos_tmu_init_data const exynos3250_default_tmu_data;
-#define EXYNOS3250_TMU_DRV_DATA (&exynos3250_default_tmu_data)
-#else
-#define EXYNOS3250_TMU_DRV_DATA (NULL)
-#endif
-
-#if defined(CONFIG_CPU_EXYNOS4210)
-extern struct exynos_tmu_init_data const exynos4210_default_tmu_data;
-#define EXYNOS4210_TMU_DRV_DATA (&exynos4210_default_tmu_data)
-#else
-#define EXYNOS4210_TMU_DRV_DATA (NULL)
-#endif
-
-#if defined(CONFIG_SOC_EXYNOS4412)
-extern struct exynos_tmu_init_data const exynos4412_default_tmu_data;
-#define EXYNOS4412_TMU_DRV_DATA (&exynos4412_default_tmu_data)
-#else
-#define EXYNOS4412_TMU_DRV_DATA (NULL)
-#endif
-
-#if defined(CONFIG_SOC_EXYNOS5250)
-extern struct exynos_tmu_init_data const exynos5250_default_tmu_data;
-#define EXYNOS5250_TMU_DRV_DATA (&exynos5250_default_tmu_data)
-#else
-#define EXYNOS5250_TMU_DRV_DATA (NULL)
-#endif
-
-#if defined(CONFIG_SOC_EXYNOS5260)
-extern struct exynos_tmu_init_data const exynos5260_default_tmu_data;
-#define EXYNOS5260_TMU_DRV_DATA (&exynos5260_default_tmu_data)
-#else
-#define EXYNOS5260_TMU_DRV_DATA (NULL)
-#endif
-
-#if defined(CONFIG_SOC_EXYNOS5420)
-extern struct exynos_tmu_init_data const exynos5420_default_tmu_data;
-#define EXYNOS5420_TMU_DRV_DATA (&exynos5420_default_tmu_data)
-#else
-#define EXYNOS5420_TMU_DRV_DATA (NULL)
-#endif
-
-#if defined(CONFIG_SOC_EXYNOS5440)
-extern struct exynos_tmu_init_data const exynos5440_default_tmu_data;
-#define EXYNOS5440_TMU_DRV_DATA (&exynos5440_default_tmu_data)
-#else
-#define EXYNOS5440_TMU_DRV_DATA (NULL)
-#endif
-
-#endif /*_EXYNOS_TMU_DATA_H*/
diff --git a/drivers/thermal/tegra_soctherm.c b/drivers/thermal/tegra_soctherm.c
new file mode 100644
index 00000000000..9197fc05c5c
--- /dev/null
+++ b/drivers/thermal/tegra_soctherm.c
@@ -0,0 +1,476 @@
+/*
+ * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Author:
+ *	Mikko Perttunen <mperttunen@nvidia.com>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/bitops.h>
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/err.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/reset.h>
+#include <linux/thermal.h>
+
+#include <soc/tegra/fuse.h>
+
+#define SENSOR_CONFIG0				0
+#define SENSOR_CONFIG0_STOP			BIT(0)
+#define SENSOR_CONFIG0_TALL_SHIFT		8
+#define SENSOR_CONFIG0_TCALC_OVER		BIT(4)
+#define SENSOR_CONFIG0_OVER			BIT(3)
+#define SENSOR_CONFIG0_CPTR_OVER		BIT(2)
+
+#define SENSOR_CONFIG1				4
+#define SENSOR_CONFIG1_TSAMPLE_SHIFT		0
+#define SENSOR_CONFIG1_TIDDQ_EN_SHIFT		15
+#define SENSOR_CONFIG1_TEN_COUNT_SHIFT		24
+#define SENSOR_CONFIG1_TEMP_ENABLE		BIT(31)
+
+#define SENSOR_CONFIG2				8
+#define SENSOR_CONFIG2_THERMA_SHIFT		16
+#define SENSOR_CONFIG2_THERMB_SHIFT		0
+
+#define SENSOR_PDIV				0x1c0
+#define SENSOR_PDIV_T124			0x8888
+#define SENSOR_HOTSPOT_OFF			0x1c4
+#define SENSOR_HOTSPOT_OFF_T124			0x00060600
+#define SENSOR_TEMP1				0x1c8
+#define SENSOR_TEMP2				0x1cc
+
+#define SENSOR_TEMP_MASK			0xffff
+#define READBACK_VALUE_MASK			0xff00
+#define READBACK_VALUE_SHIFT			8
+#define READBACK_ADD_HALF			BIT(7)
+#define READBACK_NEGATE				BIT(1)
+
+#define FUSE_TSENSOR8_CALIB			0x180
+#define FUSE_SPARE_REALIGNMENT_REG_0		0x1fc
+
+#define FUSE_TSENSOR_CALIB_CP_TS_BASE_MASK	0x1fff
+#define FUSE_TSENSOR_CALIB_FT_TS_BASE_MASK	(0x1fff << 13)
+#define FUSE_TSENSOR_CALIB_FT_TS_BASE_SHIFT	13
+
+#define FUSE_TSENSOR8_CALIB_CP_TS_BASE_MASK	0x3ff
+#define FUSE_TSENSOR8_CALIB_FT_TS_BASE_MASK	(0x7ff << 10)
+#define FUSE_TSENSOR8_CALIB_FT_TS_BASE_SHIFT	10
+
+#define FUSE_SPARE_REALIGNMENT_REG_SHIFT_CP_MASK 0x3f
+#define FUSE_SPARE_REALIGNMENT_REG_SHIFT_FT_MASK (0x1f << 21)
+#define FUSE_SPARE_REALIGNMENT_REG_SHIFT_FT_SHIFT 21
+
+#define NOMINAL_CALIB_FT_T124			105
+#define NOMINAL_CALIB_CP_T124			25
+
+struct tegra_tsensor_configuration {
+	u32 tall, tsample, tiddq_en, ten_count, pdiv, tsample_ate, pdiv_ate;
+};
+
+struct tegra_tsensor {
+	const struct tegra_tsensor_configuration *config;
+	u32 base, calib_fuse_offset;
+	/* Correction values used to modify values read from calibration fuses */
+	s32 fuse_corr_alpha, fuse_corr_beta;
+};
+
+struct tegra_thermctl_zone {
+	void __iomem *reg;
+	unsigned int shift;
+};
+
+static const struct tegra_tsensor_configuration t124_tsensor_config = {
+	.tall = 16300,
+	.tsample = 120,
+	.tiddq_en = 1,
+	.ten_count = 1,
+	.pdiv = 8,
+	.tsample_ate = 480,
+	.pdiv_ate = 8
+};
+
+static const struct tegra_tsensor t124_tsensors[] = {
+	{
+		.config = &t124_tsensor_config,
+		.base = 0xc0,
+		.calib_fuse_offset = 0x098,
+		.fuse_corr_alpha = 1135400,
+		.fuse_corr_beta = -6266900,
+	},
+	{
+		.config = &t124_tsensor_config,
+		.base = 0xe0,
+		.calib_fuse_offset = 0x084,
+		.fuse_corr_alpha = 1122220,
+		.fuse_corr_beta = -5700700,
+	},
+	{
+		.config = &t124_tsensor_config,
+		.base = 0x100,
+		.calib_fuse_offset = 0x088,
+		.fuse_corr_alpha = 1127000,
+		.fuse_corr_beta = -6768200,
+	},
+	{
+		.config = &t124_tsensor_config,
+		.base = 0x120,
+		.calib_fuse_offset = 0x12c,
+		.fuse_corr_alpha = 1110900,
+		.fuse_corr_beta = -6232000,
+	},
+	{
+		.config = &t124_tsensor_config,
+		.base = 0x140,
+		.calib_fuse_offset = 0x158,
+		.fuse_corr_alpha = 1122300,
+		.fuse_corr_beta = -5936400,
+	},
+	{
+		.config = &t124_tsensor_config,
+		.base = 0x160,
+		.calib_fuse_offset = 0x15c,
+		.fuse_corr_alpha = 1145700,
+		.fuse_corr_beta = -7124600,
+	},
+	{
+		.config = &t124_tsensor_config,
+		.base = 0x180,
+		.calib_fuse_offset = 0x154,
+		.fuse_corr_alpha = 1120100,
+		.fuse_corr_beta = -6000500,
+	},
+	{
+		.config = &t124_tsensor_config,
+		.base = 0x1a0,
+		.calib_fuse_offset = 0x160,
+		.fuse_corr_alpha = 1106500,
+		.fuse_corr_beta = -6729300,
+	},
+};
+
+struct tegra_soctherm {
+	struct reset_control *reset;
+	struct clk *clock_tsensor;
+	struct clk *clock_soctherm;
+	void __iomem *regs;
+
+	struct thermal_zone_device *thermctl_tzs[4];
+};
+
+struct tsensor_shared_calibration {
+	u32 base_cp, base_ft;
+	u32 actual_temp_cp, actual_temp_ft;
+};
+
+static int calculate_shared_calibration(struct tsensor_shared_calibration *r)
+{
+	u32 val, shifted_cp, shifted_ft;
+	int err;
+
+	err = tegra_fuse_readl(FUSE_TSENSOR8_CALIB, &val);
+	if (err)
+		return err;
+	r->base_cp = val & FUSE_TSENSOR8_CALIB_CP_TS_BASE_MASK;
+	r->base_ft = (val & FUSE_TSENSOR8_CALIB_FT_TS_BASE_MASK)
+		>> FUSE_TSENSOR8_CALIB_FT_TS_BASE_SHIFT;
+	val = ((val & FUSE_SPARE_REALIGNMENT_REG_SHIFT_FT_MASK)
+		>> FUSE_SPARE_REALIGNMENT_REG_SHIFT_FT_SHIFT);
+	shifted_ft = sign_extend32(val, 4);
+
+	err = tegra_fuse_readl(FUSE_SPARE_REALIGNMENT_REG_0, &val);
+	if (err)
+		return err;
+	shifted_cp = sign_extend32(val, 5);
+
+	r->actual_temp_cp = 2 * NOMINAL_CALIB_CP_T124 + shifted_cp;
+	r->actual_temp_ft = 2 * NOMINAL_CALIB_FT_T124 + shifted_ft;
+
+	return 0;
+}
+
+static s64 div64_s64_precise(s64 a, s64 b)
+{
+	s64 r, al;
+
+	/* Scale up for increased precision division */
+	al = a << 16;
+
+	r = div64_s64(al * 2 + 1, 2 * b);
+	return r >> 16;
+}
+
+static int
+calculate_tsensor_calibration(const struct tegra_tsensor *sensor,
+			      const struct tsensor_shared_calibration *shared,
+			      u32 *calib)
+{
+	u32 val;
+	s32 actual_tsensor_ft, actual_tsensor_cp, delta_sens, delta_temp,
+	    mult, div;
+	s16 therma, thermb;
+	s64 tmp;
+	int err;
+
+	err = tegra_fuse_readl(sensor->calib_fuse_offset, &val);
+	if (err)
+		return err;
+
+	actual_tsensor_cp = (shared->base_cp * 64) + sign_extend32(val, 12);
+	val = (val & FUSE_TSENSOR_CALIB_FT_TS_BASE_MASK)
+		>> FUSE_TSENSOR_CALIB_FT_TS_BASE_SHIFT;
+	actual_tsensor_ft = (shared->base_ft * 32) + sign_extend32(val, 12);
+
+	delta_sens = actual_tsensor_ft - actual_tsensor_cp;
+	delta_temp = shared->actual_temp_ft - shared->actual_temp_cp;
+
+	mult = sensor->config->pdiv * sensor->config->tsample_ate;
+	div = sensor->config->tsample * sensor->config->pdiv_ate;
+
+	therma = div64_s64_precise((s64) delta_temp * (1LL << 13) * mult,
+				   (s64) delta_sens * div);
+
+	tmp = (s64)actual_tsensor_ft * shared->actual_temp_cp -
+	      (s64)actual_tsensor_cp * shared->actual_temp_ft;
+	thermb = div64_s64_precise(tmp, (s64)delta_sens);
+
+	therma = div64_s64_precise((s64)therma * sensor->fuse_corr_alpha,
+				   (s64)1000000LL);
+	thermb = div64_s64_precise((s64)thermb * sensor->fuse_corr_alpha +
+				   sensor->fuse_corr_beta, (s64)1000000LL);
+
+	*calib = ((u16)therma << SENSOR_CONFIG2_THERMA_SHIFT) |
+		 ((u16)thermb << SENSOR_CONFIG2_THERMB_SHIFT);
+
+	return 0;
+}
+
+static int enable_tsensor(struct tegra_soctherm *tegra,
+			  const struct tegra_tsensor *sensor,
+			  const struct tsensor_shared_calibration *shared)
+{
+	void __iomem *base = tegra->regs + sensor->base;
+	unsigned int val;
+	u32 calib;
+	int err;
+
+	err = calculate_tsensor_calibration(sensor, shared, &calib);
+	if (err)
+		return err;
+
+	val = sensor->config->tall << SENSOR_CONFIG0_TALL_SHIFT;
+	writel(val, base + SENSOR_CONFIG0);
+
+	val  = (sensor->config->tsample - 1) << SENSOR_CONFIG1_TSAMPLE_SHIFT;
+	val |= sensor->config->tiddq_en << SENSOR_CONFIG1_TIDDQ_EN_SHIFT;
+	val |= sensor->config->ten_count << SENSOR_CONFIG1_TEN_COUNT_SHIFT;
+	val |= SENSOR_CONFIG1_TEMP_ENABLE;
+	writel(val, base + SENSOR_CONFIG1);
+
+	writel(calib, base + SENSOR_CONFIG2);
+
+	return 0;
+}
+
+/*
+ * Translate from soctherm readback format to millicelsius.
+ * The soctherm readback format in bits is as follows:
+ *   TTTTTTTT H______N
+ * where T's contain the temperature in Celsius,
+ * H denotes an addition of 0.5 Celsius and N denotes negation
+ * of the final value.
+ */
+static long translate_temp(u16 val)
+{
+	long t;
+
+	t = ((val & READBACK_VALUE_MASK) >> READBACK_VALUE_SHIFT) * 1000;
+	if (val & READBACK_ADD_HALF)
+		t += 500;
+	if (val & READBACK_NEGATE)
+		t *= -1;
+
+	return t;
+}
+
+static int tegra_thermctl_get_temp(void *data, long *out_temp)
+{
+	struct tegra_thermctl_zone *zone = data;
+	u32 val;
+
+	val = (readl(zone->reg) >> zone->shift) & SENSOR_TEMP_MASK;
+	*out_temp = translate_temp(val);
+
+	return 0;
+}
+
+static const struct thermal_zone_of_device_ops tegra_of_thermal_ops = {
+	.get_temp = tegra_thermctl_get_temp,
+};
+
+static const struct of_device_id tegra_soctherm_of_match[] = {
+	{ .compatible = "nvidia,tegra124-soctherm" },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, tegra_soctherm_of_match);
+
+struct thermctl_zone_desc {
+	unsigned int offset;
+	unsigned int shift;
+};
+
+static const struct thermctl_zone_desc t124_thermctl_temp_zones[] = {
+	{ SENSOR_TEMP1, 16 },
+	{ SENSOR_TEMP2, 16 },
+	{ SENSOR_TEMP1, 0 },
+	{ SENSOR_TEMP2, 0 }
+};
+
+static int tegra_soctherm_probe(struct platform_device *pdev)
+{
+	struct tegra_soctherm *tegra;
+	struct thermal_zone_device *tz;
+	struct tsensor_shared_calibration shared_calib;
+	struct resource *res;
+	unsigned int i;
+	int err;
+
+	const struct tegra_tsensor *tsensors = t124_tsensors;
+
+	tegra = devm_kzalloc(&pdev->dev, sizeof(*tegra), GFP_KERNEL);
+	if (!tegra)
+		return -ENOMEM;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	tegra->regs = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(tegra->regs))
+		return PTR_ERR(tegra->regs);
+
+	tegra->reset = devm_reset_control_get(&pdev->dev, "soctherm");
+	if (IS_ERR(tegra->reset)) {
+		dev_err(&pdev->dev, "can't get soctherm reset\n");
+		return PTR_ERR(tegra->reset);
+	}
+
+	tegra->clock_tsensor = devm_clk_get(&pdev->dev, "tsensor");
+	if (IS_ERR(tegra->clock_tsensor)) {
+		dev_err(&pdev->dev, "can't get tsensor clock\n");
+		return PTR_ERR(tegra->clock_tsensor);
+	}
+
+	tegra->clock_soctherm = devm_clk_get(&pdev->dev, "soctherm");
+	if (IS_ERR(tegra->clock_soctherm)) {
+		dev_err(&pdev->dev, "can't get soctherm clock\n");
+		return PTR_ERR(tegra->clock_soctherm);
+	}
+
+	reset_control_assert(tegra->reset);
+
+	err = clk_prepare_enable(tegra->clock_soctherm);
+	if (err)
+		return err;
+
+	err = clk_prepare_enable(tegra->clock_tsensor);
+	if (err) {
+		clk_disable_unprepare(tegra->clock_soctherm);
+		return err;
+	}
+
+	reset_control_deassert(tegra->reset);
+
+	/* Initialize raw sensors */
+
+	err = calculate_shared_calibration(&shared_calib);
+	if (err)
+		goto disable_clocks;
+
+	for (i = 0; i < ARRAY_SIZE(t124_tsensors); ++i) {
+		err = enable_tsensor(tegra, tsensors + i, &shared_calib);
+		if (err)
+			goto disable_clocks;
+	}
+
+	writel(SENSOR_PDIV_T124, tegra->regs + SENSOR_PDIV);
+	writel(SENSOR_HOTSPOT_OFF_T124, tegra->regs + SENSOR_HOTSPOT_OFF);
+
+	/* Initialize thermctl sensors */
+
+	for (i = 0; i < ARRAY_SIZE(tegra->thermctl_tzs); ++i) {
+		struct tegra_thermctl_zone *zone =
+			devm_kzalloc(&pdev->dev, sizeof(*zone), GFP_KERNEL);
+		if (!zone) {
+			err = -ENOMEM;
+			goto unregister_tzs;
+		}
+
+		zone->reg = tegra->regs + t124_thermctl_temp_zones[i].offset;
+		zone->shift = t124_thermctl_temp_zones[i].shift;
+
+		tz = thermal_zone_of_sensor_register(&pdev->dev, i, zone,
+						     &tegra_of_thermal_ops);
+		if (IS_ERR(tz)) {
+			err = PTR_ERR(tz);
+			dev_err(&pdev->dev, "failed to register sensor: %d\n",
+				err);
+			goto unregister_tzs;
+		}
+
+		tegra->thermctl_tzs[i] = tz;
+	}
+
+	return 0;
+
+unregister_tzs:
+	while (i--)
+		thermal_zone_of_sensor_unregister(&pdev->dev,
+						  tegra->thermctl_tzs[i]);
+
+disable_clocks:
+	clk_disable_unprepare(tegra->clock_tsensor);
+	clk_disable_unprepare(tegra->clock_soctherm);
+
+	return err;
+}
+
+static int tegra_soctherm_remove(struct platform_device *pdev)
+{
+	struct tegra_soctherm *tegra = platform_get_drvdata(pdev);
+	unsigned int i;
+
+	for (i = 0; i < ARRAY_SIZE(tegra->thermctl_tzs); ++i) {
+		thermal_zone_of_sensor_unregister(&pdev->dev,
+						  tegra->thermctl_tzs[i]);
+	}
+
+	clk_disable_unprepare(tegra->clock_tsensor);
+	clk_disable_unprepare(tegra->clock_soctherm);
+
+	return 0;
+}
+
+static struct platform_driver tegra_soctherm_driver = {
+	.probe = tegra_soctherm_probe,
+	.remove = tegra_soctherm_remove,
+	.driver = {
+		.name = "tegra-soctherm",
+		.of_match_table = tegra_soctherm_of_match,
+	},
+};
+module_platform_driver(tegra_soctherm_driver);
+
+MODULE_AUTHOR("Mikko Perttunen <mperttunen@nvidia.com>");
+MODULE_DESCRIPTION("NVIDIA Tegra SOCTHERM thermal management driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
index 43b90709585..84fdf0792e2 100644
--- a/drivers/thermal/thermal_core.c
+++ b/drivers/thermal/thermal_core.c
@@ -368,7 +368,7 @@ static void handle_critical_trips(struct thermal_zone_device *tz,
 	tz->ops->get_trip_temp(tz, trip, &trip_temp);
 
 	/* If we have not crossed the trip_temp, we do not care. */
-	if (tz->temperature < trip_temp)
+	if (trip_temp <= 0 || tz->temperature < trip_temp)
 		return;
 
 	trace_thermal_zone_trip(tz, trip, trip_type);
@@ -757,6 +757,7 @@ policy_store(struct device *dev, struct device_attribute *attr,
 	snprintf(name, sizeof(name), "%s", buf);
 
 	mutex_lock(&thermal_governor_lock);
+	mutex_lock(&tz->lock);
 
 	gov = __find_governor(strim(name));
 	if (!gov)
@@ -766,6 +767,7 @@ policy_store(struct device *dev, struct device_attribute *attr,
 	ret = count;
 
 exit:
+	mutex_unlock(&tz->lock);
 	mutex_unlock(&thermal_governor_lock);
 	return ret;
 }
@@ -1835,10 +1837,10 @@ static int __init thermal_init(void)
 
 exit_netlink:
 	genetlink_exit();
-unregister_governors:
-	thermal_unregister_governors();
 unregister_class:
 	class_unregister(&thermal_class);
+unregister_governors:
+	thermal_unregister_governors();
 error:
 	idr_destroy(&thermal_tz_idr);
 	idr_destroy(&thermal_cdev_idr);
diff --git a/drivers/thermal/thermal_core.h b/drivers/thermal/thermal_core.h
index d15d243de27..9083e752062 100644
--- a/drivers/thermal/thermal_core.h
+++ b/drivers/thermal/thermal_core.h
@@ -89,9 +89,27 @@ static inline void thermal_gov_user_space_unregister(void) {}
 #ifdef CONFIG_THERMAL_OF
 int of_parse_thermal_zones(void);
 void of_thermal_destroy_zones(void);
+int of_thermal_get_ntrips(struct thermal_zone_device *);
+bool of_thermal_is_trip_valid(struct thermal_zone_device *, int);
+const struct thermal_trip * const
+of_thermal_get_trip_points(struct thermal_zone_device *);
 #else
 static inline int of_parse_thermal_zones(void) { return 0; }
 static inline void of_thermal_destroy_zones(void) { }
+static inline int of_thermal_get_ntrips(struct thermal_zone_device *tz)
+{
+	return 0;
+}
+static inline bool of_thermal_is_trip_valid(struct thermal_zone_device *tz,
+					    int trip)
+{
+	return 0;
+}
+static inline const struct thermal_trip * const
+of_thermal_get_trip_points(struct thermal_zone_device *tz)
+{
+	return NULL;
+}
 #endif
 
 #endif /* __THERMAL_CORE_H__ */
diff --git a/drivers/thermal/ti-soc-thermal/ti-thermal-common.c b/drivers/thermal/ti-soc-thermal/ti-thermal-common.c
index 9eec26dc044..5fd03865e39 100644
--- a/drivers/thermal/ti-soc-thermal/ti-thermal-common.c
+++ b/drivers/thermal/ti-soc-thermal/ti-thermal-common.c
@@ -286,6 +286,11 @@ static int ti_thermal_get_crit_temp(struct thermal_zone_device *thermal,
 	return ti_thermal_get_trip_temp(thermal, OMAP_TRIP_NUMBER - 1, temp);
 }
 
+static const struct thermal_zone_of_device_ops ti_of_thermal_ops = {
+	.get_temp = __ti_thermal_get_temp,
+	.get_trend = __ti_thermal_get_trend,
+};
+
 static struct thermal_zone_device_ops ti_thermal_ops = {
 	.get_temp = ti_thermal_get_temp,
 	.get_trend = ti_thermal_get_trend,
@@ -333,8 +338,7 @@ int ti_thermal_expose_sensor(struct ti_bandgap *bgp, int id,
 
 	/* in case this is specified by DT */
 	data->ti_thermal = thermal_zone_of_sensor_register(bgp->dev, id,
-					data, __ti_thermal_get_temp,
-					__ti_thermal_get_trend);
+					data, &ti_of_thermal_ops);
 	if (IS_ERR(data->ti_thermal)) {
 		/* Create thermal zone */
 		data->ti_thermal = thermal_zone_device_register(domain,
diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig
index d8c57636b9c..14e27ab3245 100644
--- a/drivers/vfio/Kconfig
+++ b/drivers/vfio/Kconfig
@@ -16,7 +16,7 @@ config VFIO_SPAPR_EEH
 menuconfig VFIO
 	tristate "VFIO Non-Privileged userspace driver framework"
 	depends on IOMMU_API
-	select VFIO_IOMMU_TYPE1 if X86
+	select VFIO_IOMMU_TYPE1 if (X86 || S390 || ARM_SMMU)
 	select VFIO_IOMMU_SPAPR_TCE if (PPC_POWERNV || PPC_PSERIES)
 	select VFIO_SPAPR_EEH if (PPC_POWERNV || PPC_PSERIES)
 	select ANON_INODES
diff --git a/drivers/vfio/pci/Kconfig b/drivers/vfio/pci/Kconfig
index c41b01e2b69..c6bb5da2d2a 100644
--- a/drivers/vfio/pci/Kconfig
+++ b/drivers/vfio/pci/Kconfig
@@ -16,3 +16,11 @@ config VFIO_PCI_VGA
 	  BIOS and generic video drivers.
 
 	  If you don't know what to do here, say N.
+
+config VFIO_PCI_MMAP
+	depends on VFIO_PCI
+	def_bool y if !S390
+
+config VFIO_PCI_INTX
+	depends on VFIO_PCI
+	def_bool y if !S390
diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index 9558da3f06a..255201f2212 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -215,7 +215,7 @@ static int vfio_pci_get_irq_count(struct vfio_pci_device *vdev, int irq_type)
 	if (irq_type == VFIO_PCI_INTX_IRQ_INDEX) {
 		u8 pin;
 		pci_read_config_byte(vdev->pdev, PCI_INTERRUPT_PIN, &pin);
-		if (pin)
+		if (IS_ENABLED(CONFIG_VFIO_PCI_INTX) && pin)
 			return 1;
 
 	} else if (irq_type == VFIO_PCI_MSI_IRQ_INDEX) {
@@ -406,7 +406,8 @@ static long vfio_pci_ioctl(void *device_data,
 
 			info.flags = VFIO_REGION_INFO_FLAG_READ |
 				     VFIO_REGION_INFO_FLAG_WRITE;
-			if (pci_resource_flags(pdev, info.index) &
+			if (IS_ENABLED(CONFIG_VFIO_PCI_MMAP) &&
+			    pci_resource_flags(pdev, info.index) &
 			    IORESOURCE_MEM && info.size >= PAGE_SIZE)
 				info.flags |= VFIO_REGION_INFO_FLAG_MMAP;
 			break;
diff --git a/drivers/vfio/pci/vfio_pci_config.c b/drivers/vfio/pci/vfio_pci_config.c
index 1de3f94aa7d..ff75ca31a19 100644
--- a/drivers/vfio/pci/vfio_pci_config.c
+++ b/drivers/vfio/pci/vfio_pci_config.c
@@ -609,6 +609,10 @@ static int __init init_pci_cap_basic_perm(struct perm_bits *perm)
 
 	/* Sometimes used by sw, just virtualize */
 	p_setb(perm, PCI_INTERRUPT_LINE, (u8)ALL_VIRT, (u8)ALL_WRITE);
+
+	/* Virtualize interrupt pin to allow hiding INTx */
+	p_setb(perm, PCI_INTERRUPT_PIN, (u8)ALL_VIRT, (u8)NO_WRITE);
+
 	return 0;
 }
 
@@ -1445,6 +1449,9 @@ int vfio_config_init(struct vfio_pci_device *vdev)
 		*(__le16 *)&vconfig[PCI_DEVICE_ID] = cpu_to_le16(pdev->device);
 	}
 
+	if (!IS_ENABLED(CONFIG_VFIO_PCI_INTX))
+		vconfig[PCI_INTERRUPT_PIN] = 0;
+
 	ret = vfio_cap_init(vdev);
 	if (ret)
 		goto out;
diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index c9703d4d6f6..50c5f42d7a9 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -28,6 +28,7 @@
 #include <linux/slab.h>
 #include <linux/module.h>
 #include <linux/balloon_compaction.h>
+#include <linux/oom.h>
 
 /*
  * Balloon device works in 4K page units.  So each page is pointed to by
@@ -36,6 +37,12 @@
  */
 #define VIRTIO_BALLOON_PAGES_PER_PAGE (unsigned)(PAGE_SIZE >> VIRTIO_BALLOON_PFN_SHIFT)
 #define VIRTIO_BALLOON_ARRAY_PFNS_MAX 256
+#define OOM_VBALLOON_DEFAULT_PAGES 256
+#define VIRTBALLOON_OOM_NOTIFY_PRIORITY 80
+
+static int oom_pages = OOM_VBALLOON_DEFAULT_PAGES;
+module_param(oom_pages, int, S_IRUSR | S_IWUSR);
+MODULE_PARM_DESC(oom_pages, "pages to free on OOM");
 
 struct virtio_balloon
 {
@@ -71,6 +78,9 @@ struct virtio_balloon
 	/* Memory statistics */
 	int need_stats_update;
 	struct virtio_balloon_stat stats[VIRTIO_BALLOON_S_NR];
+
+	/* To register callback in oom notifier call chain */
+	struct notifier_block nb;
 };
 
 static struct virtio_device_id id_table[] = {
@@ -168,8 +178,9 @@ static void release_pages_by_pfn(const u32 pfns[], unsigned int num)
 	}
 }
 
-static void leak_balloon(struct virtio_balloon *vb, size_t num)
+static unsigned leak_balloon(struct virtio_balloon *vb, size_t num)
 {
+	unsigned num_freed_pages;
 	struct page *page;
 	struct balloon_dev_info *vb_dev_info = &vb->vb_dev_info;
 
@@ -186,6 +197,7 @@ static void leak_balloon(struct virtio_balloon *vb, size_t num)
 		vb->num_pages -= VIRTIO_BALLOON_PAGES_PER_PAGE;
 	}
 
+	num_freed_pages = vb->num_pfns;
 	/*
 	 * Note that if
 	 * virtio_has_feature(vdev, VIRTIO_BALLOON_F_MUST_TELL_HOST);
@@ -195,6 +207,7 @@ static void leak_balloon(struct virtio_balloon *vb, size_t num)
 		tell_host(vb, vb->deflate_vq);
 	mutex_unlock(&vb->balloon_lock);
 	release_pages_by_pfn(vb->pfns, vb->num_pfns);
+	return num_freed_pages;
 }
 
 static inline void update_stat(struct virtio_balloon *vb, int idx,
@@ -287,6 +300,38 @@ static void update_balloon_size(struct virtio_balloon *vb)
 		      &actual);
 }
 
+/*
+ * virtballoon_oom_notify - release pages when system is under severe
+ *			    memory pressure (called from out_of_memory())
+ * @self : notifier block struct
+ * @dummy: not used
+ * @parm : returned - number of freed pages
+ *
+ * The balancing of memory by use of the virtio balloon should not cause
+ * the termination of processes while there are pages in the balloon.
+ * If virtio balloon manages to release some memory, it will make the
+ * system return and retry the allocation that forced the OOM killer
+ * to run.
+ */
+static int virtballoon_oom_notify(struct notifier_block *self,
+				  unsigned long dummy, void *parm)
+{
+	struct virtio_balloon *vb;
+	unsigned long *freed;
+	unsigned num_freed_pages;
+
+	vb = container_of(self, struct virtio_balloon, nb);
+	if (!virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM))
+		return NOTIFY_OK;
+
+	freed = parm;
+	num_freed_pages = leak_balloon(vb, oom_pages);
+	update_balloon_size(vb);
+	*freed += num_freed_pages;
+
+	return NOTIFY_OK;
+}
+
 static int balloon(void *_vballoon)
 {
 	struct virtio_balloon *vb = _vballoon;
@@ -443,6 +488,12 @@ static int virtballoon_probe(struct virtio_device *vdev)
 	if (err)
 		goto out_free_vb;
 
+	vb->nb.notifier_call = virtballoon_oom_notify;
+	vb->nb.priority = VIRTBALLOON_OOM_NOTIFY_PRIORITY;
+	err = register_oom_notifier(&vb->nb);
+	if (err < 0)
+		goto out_oom_notify;
+
 	vb->thread = kthread_run(balloon, vb, "vballoon");
 	if (IS_ERR(vb->thread)) {
 		err = PTR_ERR(vb->thread);
@@ -452,6 +503,8 @@ static int virtballoon_probe(struct virtio_device *vdev)
 	return 0;
 
 out_del_vqs:
+	unregister_oom_notifier(&vb->nb);
+out_oom_notify:
 	vdev->config->del_vqs(vdev);
 out_free_vb:
 	kfree(vb);
@@ -476,6 +529,7 @@ static void virtballoon_remove(struct virtio_device *vdev)
 {
 	struct virtio_balloon *vb = vdev->priv;
 
+	unregister_oom_notifier(&vb->nb);
 	kthread_stop(vb->thread);
 	remove_common(vb);
 	kfree(vb);
@@ -515,6 +569,7 @@ static int virtballoon_restore(struct virtio_device *vdev)
 static unsigned int features[] = {
 	VIRTIO_BALLOON_F_MUST_TELL_HOST,
 	VIRTIO_BALLOON_F_STATS_VQ,
+	VIRTIO_BALLOON_F_DEFLATE_ON_OOM,
 };
 
 static struct virtio_driver virtio_balloon_driver = {
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 18c06bbaf13..f5013d92a7e 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -192,17 +192,30 @@ static int readpage_nounlock(struct file *filp, struct page *page)
 	struct ceph_osd_client *osdc =
 		&ceph_inode_to_client(inode)->client->osdc;
 	int err = 0;
+	u64 off = page_offset(page);
 	u64 len = PAGE_CACHE_SIZE;
 
-	err = ceph_readpage_from_fscache(inode, page);
+	if (off >= i_size_read(inode)) {
+		zero_user_segment(page, err, PAGE_CACHE_SIZE);
+		SetPageUptodate(page);
+		return 0;
+	}
 
+	/*
+	 * Uptodate inline data should have been added into page cache
+	 * while getting Fcr caps.
+	 */
+	if (ci->i_inline_version != CEPH_INLINE_NONE)
+		return -EINVAL;
+
+	err = ceph_readpage_from_fscache(inode, page);
 	if (err == 0)
 		goto out;
 
 	dout("readpage inode %p file %p page %p index %lu\n",
 	     inode, filp, page, page->index);
 	err = ceph_osdc_readpages(osdc, ceph_vino(inode), &ci->i_layout,
-				  (u64) page_offset(page), &len,
+				  off, &len,
 				  ci->i_truncate_seq, ci->i_truncate_size,
 				  &page, 1, 0);
 	if (err == -ENOENT)
@@ -319,7 +332,7 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max)
 	     off, len);
 	vino = ceph_vino(inode);
 	req = ceph_osdc_new_request(osdc, &ci->i_layout, vino, off, &len,
-				    1, CEPH_OSD_OP_READ,
+				    0, 1, CEPH_OSD_OP_READ,
 				    CEPH_OSD_FLAG_READ, NULL,
 				    ci->i_truncate_seq, ci->i_truncate_size,
 				    false);
@@ -384,6 +397,9 @@ static int ceph_readpages(struct file *file, struct address_space *mapping,
 	int rc = 0;
 	int max = 0;
 
+	if (ceph_inode(inode)->i_inline_version != CEPH_INLINE_NONE)
+		return -EINVAL;
+
 	rc = ceph_readpages_from_fscache(mapping->host, mapping, page_list,
 					 &nr_pages);
 
@@ -673,7 +689,7 @@ static int ceph_writepages_start(struct address_space *mapping,
 	int rc = 0;
 	unsigned wsize = 1 << inode->i_blkbits;
 	struct ceph_osd_request *req = NULL;
-	int do_sync;
+	int do_sync = 0;
 	u64 truncate_size, snap_size;
 	u32 truncate_seq;
 
@@ -750,7 +766,6 @@ retry:
 	last_snapc = snapc;
 
 	while (!done && index <= end) {
-		int num_ops = do_sync ? 2 : 1;
 		unsigned i;
 		int first;
 		pgoff_t next;
@@ -850,7 +865,8 @@ get_more_pages:
 				len = wsize;
 				req = ceph_osdc_new_request(&fsc->client->osdc,
 							&ci->i_layout, vino,
-							offset, &len, num_ops,
+							offset, &len, 0,
+							do_sync ? 2 : 1,
 							CEPH_OSD_OP_WRITE,
 							CEPH_OSD_FLAG_WRITE |
 							CEPH_OSD_FLAG_ONDISK,
@@ -862,6 +878,9 @@ get_more_pages:
 					break;
 				}
 
+				if (do_sync)
+					osd_req_op_init(req, 1, CEPH_OSD_OP_STARTSYNC);
+
 				req->r_callback = writepages_finish;
 				req->r_inode = inode;
 
@@ -1204,6 +1223,7 @@ static int ceph_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	struct inode *inode = file_inode(vma->vm_file);
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	struct ceph_file_info *fi = vma->vm_file->private_data;
+	struct page *pinned_page = NULL;
 	loff_t off = vmf->pgoff << PAGE_CACHE_SHIFT;
 	int want, got, ret;
 
@@ -1215,7 +1235,8 @@ static int ceph_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 		want = CEPH_CAP_FILE_CACHE;
 	while (1) {
 		got = 0;
-		ret = ceph_get_caps(ci, CEPH_CAP_FILE_RD, want, &got, -1);
+		ret = ceph_get_caps(ci, CEPH_CAP_FILE_RD, want,
+				    -1, &got, &pinned_page);
 		if (ret == 0)
 			break;
 		if (ret != -ERESTARTSYS) {
@@ -1226,12 +1247,54 @@ static int ceph_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	dout("filemap_fault %p %llu~%zd got cap refs on %s\n",
 	     inode, off, (size_t)PAGE_CACHE_SIZE, ceph_cap_string(got));
 
-	ret = filemap_fault(vma, vmf);
+	if ((got & (CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO)) ||
+	    ci->i_inline_version == CEPH_INLINE_NONE)
+		ret = filemap_fault(vma, vmf);
+	else
+		ret = -EAGAIN;
 
 	dout("filemap_fault %p %llu~%zd dropping cap refs on %s ret %d\n",
 	     inode, off, (size_t)PAGE_CACHE_SIZE, ceph_cap_string(got), ret);
+	if (pinned_page)
+		page_cache_release(pinned_page);
 	ceph_put_cap_refs(ci, got);
 
+	if (ret != -EAGAIN)
+		return ret;
+
+	/* read inline data */
+	if (off >= PAGE_CACHE_SIZE) {
+		/* does not support inline data > PAGE_SIZE */
+		ret = VM_FAULT_SIGBUS;
+	} else {
+		int ret1;
+		struct address_space *mapping = inode->i_mapping;
+		struct page *page = find_or_create_page(mapping, 0,
+						mapping_gfp_mask(mapping) &
+						~__GFP_FS);
+		if (!page) {
+			ret = VM_FAULT_OOM;
+			goto out;
+		}
+		ret1 = __ceph_do_getattr(inode, page,
+					 CEPH_STAT_CAP_INLINE_DATA, true);
+		if (ret1 < 0 || off >= i_size_read(inode)) {
+			unlock_page(page);
+			page_cache_release(page);
+			ret = VM_FAULT_SIGBUS;
+			goto out;
+		}
+		if (ret1 < PAGE_CACHE_SIZE)
+			zero_user_segment(page, ret1, PAGE_CACHE_SIZE);
+		else
+			flush_dcache_page(page);
+		SetPageUptodate(page);
+		vmf->page = page;
+		ret = VM_FAULT_MAJOR | VM_FAULT_LOCKED;
+	}
+out:
+	dout("filemap_fault %p %llu~%zd read inline data ret %d\n",
+	     inode, off, (size_t)PAGE_CACHE_SIZE, ret);
 	return ret;
 }
 
@@ -1250,6 +1313,19 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 	size_t len;
 	int want, got, ret;
 
+	if (ci->i_inline_version != CEPH_INLINE_NONE) {
+		struct page *locked_page = NULL;
+		if (off == 0) {
+			lock_page(page);
+			locked_page = page;
+		}
+		ret = ceph_uninline_data(vma->vm_file, locked_page);
+		if (locked_page)
+			unlock_page(locked_page);
+		if (ret < 0)
+			return VM_FAULT_SIGBUS;
+	}
+
 	if (off + PAGE_CACHE_SIZE <= size)
 		len = PAGE_CACHE_SIZE;
 	else
@@ -1263,7 +1339,8 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 		want = CEPH_CAP_FILE_BUFFER;
 	while (1) {
 		got = 0;
-		ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, &got, off + len);
+		ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, off + len,
+				    &got, NULL);
 		if (ret == 0)
 			break;
 		if (ret != -ERESTARTSYS) {
@@ -1297,11 +1374,13 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 			ret = VM_FAULT_SIGBUS;
 	}
 out:
-	if (ret != VM_FAULT_LOCKED) {
+	if (ret != VM_FAULT_LOCKED)
 		unlock_page(page);
-	} else {
+	if (ret == VM_FAULT_LOCKED ||
+	    ci->i_inline_version != CEPH_INLINE_NONE) {
 		int dirty;
 		spin_lock(&ci->i_ceph_lock);
+		ci->i_inline_version = CEPH_INLINE_NONE;
 		dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR);
 		spin_unlock(&ci->i_ceph_lock);
 		if (dirty)
@@ -1315,6 +1394,178 @@ out:
 	return ret;
 }
 
+void ceph_fill_inline_data(struct inode *inode, struct page *locked_page,
+			   char	*data, size_t len)
+{
+	struct address_space *mapping = inode->i_mapping;
+	struct page *page;
+
+	if (locked_page) {
+		page = locked_page;
+	} else {
+		if (i_size_read(inode) == 0)
+			return;
+		page = find_or_create_page(mapping, 0,
+					   mapping_gfp_mask(mapping) & ~__GFP_FS);
+		if (!page)
+			return;
+		if (PageUptodate(page)) {
+			unlock_page(page);
+			page_cache_release(page);
+			return;
+		}
+	}
+
+	dout("fill_inline_data %p %llx.%llx len %lu locked_page %p\n",
+	     inode, ceph_vinop(inode), len, locked_page);
+
+	if (len > 0) {
+		void *kaddr = kmap_atomic(page);
+		memcpy(kaddr, data, len);
+		kunmap_atomic(kaddr);
+	}
+
+	if (page != locked_page) {
+		if (len < PAGE_CACHE_SIZE)
+			zero_user_segment(page, len, PAGE_CACHE_SIZE);
+		else
+			flush_dcache_page(page);
+
+		SetPageUptodate(page);
+		unlock_page(page);
+		page_cache_release(page);
+	}
+}
+
+int ceph_uninline_data(struct file *filp, struct page *locked_page)
+{
+	struct inode *inode = file_inode(filp);
+	struct ceph_inode_info *ci = ceph_inode(inode);
+	struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
+	struct ceph_osd_request *req;
+	struct page *page = NULL;
+	u64 len, inline_version;
+	int err = 0;
+	bool from_pagecache = false;
+
+	spin_lock(&ci->i_ceph_lock);
+	inline_version = ci->i_inline_version;
+	spin_unlock(&ci->i_ceph_lock);
+
+	dout("uninline_data %p %llx.%llx inline_version %llu\n",
+	     inode, ceph_vinop(inode), inline_version);
+
+	if (inline_version == 1 || /* initial version, no data */
+	    inline_version == CEPH_INLINE_NONE)
+		goto out;
+
+	if (locked_page) {
+		page = locked_page;
+		WARN_ON(!PageUptodate(page));
+	} else if (ceph_caps_issued(ci) &
+		   (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) {
+		page = find_get_page(inode->i_mapping, 0);
+		if (page) {
+			if (PageUptodate(page)) {
+				from_pagecache = true;
+				lock_page(page);
+			} else {
+				page_cache_release(page);
+				page = NULL;
+			}
+		}
+	}
+
+	if (page) {
+		len = i_size_read(inode);
+		if (len > PAGE_CACHE_SIZE)
+			len = PAGE_CACHE_SIZE;
+	} else {
+		page = __page_cache_alloc(GFP_NOFS);
+		if (!page) {
+			err = -ENOMEM;
+			goto out;
+		}
+		err = __ceph_do_getattr(inode, page,
+					CEPH_STAT_CAP_INLINE_DATA, true);
+		if (err < 0) {
+			/* no inline data */
+			if (err == -ENODATA)
+				err = 0;
+			goto out;
+		}
+		len = err;
+	}
+
+	req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
+				    ceph_vino(inode), 0, &len, 0, 1,
+				    CEPH_OSD_OP_CREATE,
+				    CEPH_OSD_FLAG_ONDISK | CEPH_OSD_FLAG_WRITE,
+				    ci->i_snap_realm->cached_context,
+				    0, 0, false);
+	if (IS_ERR(req)) {
+		err = PTR_ERR(req);
+		goto out;
+	}
+
+	ceph_osdc_build_request(req, 0, NULL, CEPH_NOSNAP, &inode->i_mtime);
+	err = ceph_osdc_start_request(&fsc->client->osdc, req, false);
+	if (!err)
+		err = ceph_osdc_wait_request(&fsc->client->osdc, req);
+	ceph_osdc_put_request(req);
+	if (err < 0)
+		goto out;
+
+	req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
+				    ceph_vino(inode), 0, &len, 1, 3,
+				    CEPH_OSD_OP_WRITE,
+				    CEPH_OSD_FLAG_ONDISK | CEPH_OSD_FLAG_WRITE,
+				    ci->i_snap_realm->cached_context,
+				    ci->i_truncate_seq, ci->i_truncate_size,
+				    false);
+	if (IS_ERR(req)) {
+		err = PTR_ERR(req);
+		goto out;
+	}
+
+	osd_req_op_extent_osd_data_pages(req, 1, &page, len, 0, false, false);
+
+	err = osd_req_op_xattr_init(req, 0, CEPH_OSD_OP_CMPXATTR,
+				    "inline_version", &inline_version,
+				    sizeof(inline_version),
+				    CEPH_OSD_CMPXATTR_OP_GT,
+				    CEPH_OSD_CMPXATTR_MODE_U64);
+	if (err)
+		goto out_put;
+
+	err = osd_req_op_xattr_init(req, 2, CEPH_OSD_OP_SETXATTR,
+				    "inline_version", &inline_version,
+				    sizeof(inline_version), 0, 0);
+	if (err)
+		goto out_put;
+
+	ceph_osdc_build_request(req, 0, NULL, CEPH_NOSNAP, &inode->i_mtime);
+	err = ceph_osdc_start_request(&fsc->client->osdc, req, false);
+	if (!err)
+		err = ceph_osdc_wait_request(&fsc->client->osdc, req);
+out_put:
+	ceph_osdc_put_request(req);
+	if (err == -ECANCELED)
+		err = 0;
+out:
+	if (page && page != locked_page) {
+		if (from_pagecache) {
+			unlock_page(page);
+			page_cache_release(page);
+		} else
+			__free_pages(page, 0);
+	}
+
+	dout("uninline_data %p %llx.%llx inline_version %llu = %d\n",
+	     inode, ceph_vinop(inode), inline_version, err);
+	return err;
+}
+
 static struct vm_operations_struct ceph_vmops = {
 	.fault		= ceph_filemap_fault,
 	.page_mkwrite	= ceph_page_mkwrite,
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index cefca661464..b93c631c6c8 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -975,10 +975,12 @@ static int send_cap_msg(struct ceph_mds_session *session,
 			kuid_t uid, kgid_t gid, umode_t mode,
 			u64 xattr_version,
 			struct ceph_buffer *xattrs_buf,
-			u64 follows)
+			u64 follows, bool inline_data)
 {
 	struct ceph_mds_caps *fc;
 	struct ceph_msg *msg;
+	void *p;
+	size_t extra_len;
 
 	dout("send_cap_msg %s %llx %llx caps %s wanted %s dirty %s"
 	     " seq %u/%u mseq %u follows %lld size %llu/%llu"
@@ -988,7 +990,10 @@ static int send_cap_msg(struct ceph_mds_session *session,
 	     seq, issue_seq, mseq, follows, size, max_size,
 	     xattr_version, xattrs_buf ? (int)xattrs_buf->vec.iov_len : 0);
 
-	msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPS, sizeof(*fc), GFP_NOFS, false);
+	/* flock buffer size + inline version + inline data size */
+	extra_len = 4 + 8 + 4;
+	msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPS, sizeof(*fc) + extra_len,
+			   GFP_NOFS, false);
 	if (!msg)
 		return -ENOMEM;
 
@@ -1020,6 +1025,14 @@ static int send_cap_msg(struct ceph_mds_session *session,
 	fc->gid = cpu_to_le32(from_kgid(&init_user_ns, gid));
 	fc->mode = cpu_to_le32(mode);
 
+	p = fc + 1;
+	/* flock buffer size */
+	ceph_encode_32(&p, 0);
+	/* inline version */
+	ceph_encode_64(&p, inline_data ? 0 : CEPH_INLINE_NONE);
+	/* inline data size */
+	ceph_encode_32(&p, 0);
+
 	fc->xattr_version = cpu_to_le64(xattr_version);
 	if (xattrs_buf) {
 		msg->middle = ceph_buffer_get(xattrs_buf);
@@ -1126,6 +1139,7 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
 	u64 flush_tid = 0;
 	int i;
 	int ret;
+	bool inline_data;
 
 	held = cap->issued | cap->implemented;
 	revoking = cap->implemented & ~cap->issued;
@@ -1209,13 +1223,15 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
 		xattr_version = ci->i_xattrs.version;
 	}
 
+	inline_data = ci->i_inline_version != CEPH_INLINE_NONE;
+
 	spin_unlock(&ci->i_ceph_lock);
 
 	ret = send_cap_msg(session, ceph_vino(inode).ino, cap_id,
 		op, keep, want, flushing, seq, flush_tid, issue_seq, mseq,
 		size, max_size, &mtime, &atime, time_warp_seq,
 		uid, gid, mode, xattr_version, xattr_blob,
-		follows);
+		follows, inline_data);
 	if (ret < 0) {
 		dout("error sending cap msg, must requeue %p\n", inode);
 		delayed = 1;
@@ -1336,7 +1352,7 @@ retry:
 			     capsnap->time_warp_seq,
 			     capsnap->uid, capsnap->gid, capsnap->mode,
 			     capsnap->xattr_version, capsnap->xattr_blob,
-			     capsnap->follows);
+			     capsnap->follows, capsnap->inline_data);
 
 		next_follows = capsnap->follows + 1;
 		ceph_put_cap_snap(capsnap);
@@ -2057,15 +2073,17 @@ static void __take_cap_refs(struct ceph_inode_info *ci, int got)
  * requested from the MDS.
  */
 static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want,
-			    int *got, loff_t endoff, int *check_max, int *err)
+			    loff_t endoff, int *got, struct page **pinned_page,
+			    int *check_max, int *err)
 {
 	struct inode *inode = &ci->vfs_inode;
 	int ret = 0;
-	int have, implemented;
+	int have, implemented, _got = 0;
 	int file_wanted;
 
 	dout("get_cap_refs %p need %s want %s\n", inode,
 	     ceph_cap_string(need), ceph_cap_string(want));
+again:
 	spin_lock(&ci->i_ceph_lock);
 
 	/* make sure file is actually open */
@@ -2075,7 +2093,7 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want,
 		     ceph_cap_string(need), ceph_cap_string(file_wanted));
 		*err = -EBADF;
 		ret = 1;
-		goto out;
+		goto out_unlock;
 	}
 
 	/* finish pending truncate */
@@ -2095,7 +2113,7 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want,
 				*check_max = 1;
 				ret = 1;
 			}
-			goto out;
+			goto out_unlock;
 		}
 		/*
 		 * If a sync write is in progress, we must wait, so that we
@@ -2103,7 +2121,7 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want,
 		 */
 		if (__ceph_have_pending_cap_snap(ci)) {
 			dout("get_cap_refs %p cap_snap_pending\n", inode);
-			goto out;
+			goto out_unlock;
 		}
 	}
 
@@ -2120,18 +2138,50 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want,
 		     inode, ceph_cap_string(have), ceph_cap_string(not),
 		     ceph_cap_string(revoking));
 		if ((revoking & not) == 0) {
-			*got = need | (have & want);
-			__take_cap_refs(ci, *got);
+			_got = need | (have & want);
+			__take_cap_refs(ci, _got);
 			ret = 1;
 		}
 	} else {
 		dout("get_cap_refs %p have %s needed %s\n", inode,
 		     ceph_cap_string(have), ceph_cap_string(need));
 	}
-out:
+out_unlock:
 	spin_unlock(&ci->i_ceph_lock);
+
+	if (ci->i_inline_version != CEPH_INLINE_NONE &&
+	    (_got & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) &&
+	    i_size_read(inode) > 0) {
+		int ret1;
+		struct page *page = find_get_page(inode->i_mapping, 0);
+		if (page) {
+			if (PageUptodate(page)) {
+				*pinned_page = page;
+				goto out;
+			}
+			page_cache_release(page);
+		}
+		/*
+		 * drop cap refs first because getattr while holding
+		 * caps refs can cause deadlock.
+		 */
+		ceph_put_cap_refs(ci, _got);
+		_got = 0;
+
+		/* getattr request will bring inline data into page cache */
+		ret1 = __ceph_do_getattr(inode, NULL,
+					 CEPH_STAT_CAP_INLINE_DATA, true);
+		if (ret1 >= 0) {
+			ret = 0;
+			goto again;
+		}
+		*err = ret1;
+		ret = 1;
+	}
+out:
 	dout("get_cap_refs %p ret %d got %s\n", inode,
-	     ret, ceph_cap_string(*got));
+	     ret, ceph_cap_string(_got));
+	*got = _got;
 	return ret;
 }
 
@@ -2168,8 +2218,8 @@ static void check_max_size(struct inode *inode, loff_t endoff)
  * due to a small max_size, make sure we check_max_size (and possibly
  * ask the mds) so we don't get hung up indefinitely.
  */
-int ceph_get_caps(struct ceph_inode_info *ci, int need, int want, int *got,
-		  loff_t endoff)
+int ceph_get_caps(struct ceph_inode_info *ci, int need, int want,
+		  loff_t endoff, int *got, struct page **pinned_page)
 {
 	int check_max, ret, err;
 
@@ -2179,8 +2229,8 @@ retry:
 	check_max = 0;
 	err = 0;
 	ret = wait_event_interruptible(ci->i_cap_wq,
-				       try_get_cap_refs(ci, need, want,
-							got, endoff,
+				       try_get_cap_refs(ci, need, want, endoff,
+							got, pinned_page,
 							&check_max, &err));
 	if (err)
 		ret = err;
@@ -2383,6 +2433,8 @@ static void invalidate_aliases(struct inode *inode)
 static void handle_cap_grant(struct ceph_mds_client *mdsc,
 			     struct inode *inode, struct ceph_mds_caps *grant,
 			     void *snaptrace, int snaptrace_len,
+			     u64 inline_version,
+			     void *inline_data, int inline_len,
 			     struct ceph_buffer *xattr_buf,
 			     struct ceph_mds_session *session,
 			     struct ceph_cap *cap, int issued)
@@ -2403,6 +2455,7 @@ static void handle_cap_grant(struct ceph_mds_client *mdsc,
 	bool queue_invalidate = false;
 	bool queue_revalidate = false;
 	bool deleted_inode = false;
+	bool fill_inline = false;
 
 	dout("handle_cap_grant inode %p cap %p mds%d seq %d %s\n",
 	     inode, cap, mds, seq, ceph_cap_string(newcaps));
@@ -2576,6 +2629,13 @@ static void handle_cap_grant(struct ceph_mds_client *mdsc,
 	}
 	BUG_ON(cap->issued & ~cap->implemented);
 
+	if (inline_version > 0 && inline_version >= ci->i_inline_version) {
+		ci->i_inline_version = inline_version;
+		if (ci->i_inline_version != CEPH_INLINE_NONE &&
+		    (newcaps & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)))
+			fill_inline = true;
+	}
+
 	spin_unlock(&ci->i_ceph_lock);
 
 	if (le32_to_cpu(grant->op) == CEPH_CAP_OP_IMPORT) {
@@ -2589,6 +2649,9 @@ static void handle_cap_grant(struct ceph_mds_client *mdsc,
 			wake = true;
 	}
 
+	if (fill_inline)
+		ceph_fill_inline_data(inode, NULL, inline_data, inline_len);
+
 	if (queue_trunc) {
 		ceph_queue_vmtruncate(inode);
 		ceph_queue_revalidate(inode);
@@ -2996,11 +3059,12 @@ void ceph_handle_caps(struct ceph_mds_session *session,
 	u64 cap_id;
 	u64 size, max_size;
 	u64 tid;
+	u64 inline_version = 0;
+	void *inline_data = NULL;
+	u32  inline_len = 0;
 	void *snaptrace;
 	size_t snaptrace_len;
-	void *flock;
-	void *end;
-	u32 flock_len;
+	void *p, *end;
 
 	dout("handle_caps from mds%d\n", mds);
 
@@ -3021,30 +3085,37 @@ void ceph_handle_caps(struct ceph_mds_session *session,
 
 	snaptrace = h + 1;
 	snaptrace_len = le32_to_cpu(h->snap_trace_len);
+	p = snaptrace + snaptrace_len;
 
 	if (le16_to_cpu(msg->hdr.version) >= 2) {
-		void *p = snaptrace + snaptrace_len;
+		u32 flock_len;
 		ceph_decode_32_safe(&p, end, flock_len, bad);
 		if (p + flock_len > end)
 			goto bad;
-		flock = p;
-	} else {
-		flock = NULL;
-		flock_len = 0;
+		p += flock_len;
 	}
 
 	if (le16_to_cpu(msg->hdr.version) >= 3) {
 		if (op == CEPH_CAP_OP_IMPORT) {
-			void *p = flock + flock_len;
 			if (p + sizeof(*peer) > end)
 				goto bad;
 			peer = p;
+			p += sizeof(*peer);
 		} else if (op == CEPH_CAP_OP_EXPORT) {
 			/* recorded in unused fields */
 			peer = (void *)&h->size;
 		}
 	}
 
+	if (le16_to_cpu(msg->hdr.version) >= 4) {
+		ceph_decode_64_safe(&p, end, inline_version, bad);
+		ceph_decode_32_safe(&p, end, inline_len, bad);
+		if (p + inline_len > end)
+			goto bad;
+		inline_data = p;
+		p += inline_len;
+	}
+
 	/* lookup ino */
 	inode = ceph_find_inode(sb, vino);
 	ci = ceph_inode(inode);
@@ -3085,6 +3156,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
 		handle_cap_import(mdsc, inode, h, peer, session,
 				  &cap, &issued);
 		handle_cap_grant(mdsc, inode, h,  snaptrace, snaptrace_len,
+				 inline_version, inline_data, inline_len,
 				 msg->middle, session, cap, issued);
 		goto done_unlocked;
 	}
@@ -3105,8 +3177,9 @@ void ceph_handle_caps(struct ceph_mds_session *session,
 	case CEPH_CAP_OP_GRANT:
 		__ceph_caps_issued(ci, &issued);
 		issued |= __ceph_caps_dirty(ci);
-		handle_cap_grant(mdsc, inode, h, NULL, 0, msg->middle,
-				 session, cap, issued);
+		handle_cap_grant(mdsc, inode, h, NULL, 0,
+				 inline_version, inline_data, inline_len,
+				 msg->middle, session, cap, issued);
 		goto done_unlocked;
 
 	case CEPH_CAP_OP_FLUSH_ACK:
@@ -3137,8 +3210,7 @@ flush_cap_releases:
 done:
 	mutex_unlock(&session->s_mutex);
 done_unlocked:
-	if (inode)
-		iput(inode);
+	iput(inode);
 	return;
 
 bad:
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 681a8537b64..c241603764f 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -183,7 +183,7 @@ more:
 	spin_unlock(&parent->d_lock);
 
 	/* make sure a dentry wasn't dropped while we didn't have parent lock */
-	if (!ceph_dir_is_complete(dir)) {
+	if (!ceph_dir_is_complete_ordered(dir)) {
 		dout(" lost dir complete on %p; falling back to mds\n", dir);
 		dput(dentry);
 		err = -EAGAIN;
@@ -261,10 +261,6 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
 
 	/* always start with . and .. */
 	if (ctx->pos == 0) {
-		/* note dir version at start of readdir so we can tell
-		 * if any dentries get dropped */
-		fi->dir_release_count = atomic_read(&ci->i_release_count);
-
 		dout("readdir off 0 -> '.'\n");
 		if (!dir_emit(ctx, ".", 1, 
 			    ceph_translate_ino(inode->i_sb, inode->i_ino),
@@ -289,7 +285,7 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
 	if ((ctx->pos == 2 || fi->dentry) &&
 	    !ceph_test_mount_opt(fsc, NOASYNCREADDIR) &&
 	    ceph_snap(inode) != CEPH_SNAPDIR &&
-	    __ceph_dir_is_complete(ci) &&
+	    __ceph_dir_is_complete_ordered(ci) &&
 	    __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1)) {
 		u32 shared_gen = ci->i_shared_gen;
 		spin_unlock(&ci->i_ceph_lock);
@@ -312,6 +308,13 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
 
 	/* proceed with a normal readdir */
 
+	if (ctx->pos == 2) {
+		/* note dir version at start of readdir so we can tell
+		 * if any dentries get dropped */
+		fi->dir_release_count = atomic_read(&ci->i_release_count);
+		fi->dir_ordered_count = ci->i_ordered_count;
+	}
+
 more:
 	/* do we have the correct frag content buffered? */
 	if (fi->frag != frag || fi->last_readdir == NULL) {
@@ -446,8 +449,12 @@ more:
 	 */
 	spin_lock(&ci->i_ceph_lock);
 	if (atomic_read(&ci->i_release_count) == fi->dir_release_count) {
-		dout(" marking %p complete\n", inode);
-		__ceph_dir_set_complete(ci, fi->dir_release_count);
+		if (ci->i_ordered_count == fi->dir_ordered_count)
+			dout(" marking %p complete and ordered\n", inode);
+		else
+			dout(" marking %p complete\n", inode);
+		__ceph_dir_set_complete(ci, fi->dir_release_count,
+					fi->dir_ordered_count);
 	}
 	spin_unlock(&ci->i_ceph_lock);
 
@@ -805,7 +812,9 @@ static int ceph_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 		acls.pagelist = NULL;
 	}
 	err = ceph_mdsc_do_request(mdsc, dir, req);
-	if (!err && !req->r_reply_info.head->is_dentry)
+	if (!err &&
+	    !req->r_reply_info.head->is_target &&
+	    !req->r_reply_info.head->is_dentry)
 		err = ceph_handle_notrace_create(dir, dentry);
 	ceph_mdsc_put_request(req);
 out:
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 9f8e3572040..ce74b394b49 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -333,6 +333,11 @@ int ceph_release(struct inode *inode, struct file *file)
 	return 0;
 }
 
+enum {
+	CHECK_EOF = 1,
+	READ_INLINE = 2,
+};
+
 /*
  * Read a range of bytes striped over one or more objects.  Iterate over
  * objects we stripe over.  (That's not atomic, but good enough for now.)
@@ -412,7 +417,7 @@ more:
 		ret = read;
 		/* did we bounce off eof? */
 		if (pos + left > inode->i_size)
-			*checkeof = 1;
+			*checkeof = CHECK_EOF;
 	}
 
 	dout("striped_read returns %d\n", ret);
@@ -598,7 +603,7 @@ ceph_sync_direct_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos)
 		snapc = ci->i_snap_realm->cached_context;
 		vino = ceph_vino(inode);
 		req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
-					    vino, pos, &len,
+					    vino, pos, &len, 0,
 					    2,/*include a 'startsync' command*/
 					    CEPH_OSD_OP_WRITE, flags, snapc,
 					    ci->i_truncate_seq,
@@ -609,6 +614,8 @@ ceph_sync_direct_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos)
 			break;
 		}
 
+		osd_req_op_init(req, 1, CEPH_OSD_OP_STARTSYNC);
+
 		n = iov_iter_get_pages_alloc(from, &pages, len, &start);
 		if (unlikely(n < 0)) {
 			ret = n;
@@ -713,7 +720,7 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos)
 		snapc = ci->i_snap_realm->cached_context;
 		vino = ceph_vino(inode);
 		req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
-					    vino, pos, &len, 1,
+					    vino, pos, &len, 0, 1,
 					    CEPH_OSD_OP_WRITE, flags, snapc,
 					    ci->i_truncate_seq,
 					    ci->i_truncate_size,
@@ -803,9 +810,10 @@ static ssize_t ceph_read_iter(struct kiocb *iocb, struct iov_iter *to)
 	size_t len = iocb->ki_nbytes;
 	struct inode *inode = file_inode(filp);
 	struct ceph_inode_info *ci = ceph_inode(inode);
+	struct page *pinned_page = NULL;
 	ssize_t ret;
 	int want, got = 0;
-	int checkeof = 0, read = 0;
+	int retry_op = 0, read = 0;
 
 again:
 	dout("aio_read %p %llx.%llx %llu~%u trying to get caps on %p\n",
@@ -815,7 +823,7 @@ again:
 		want = CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO;
 	else
 		want = CEPH_CAP_FILE_CACHE;
-	ret = ceph_get_caps(ci, CEPH_CAP_FILE_RD, want, &got, -1);
+	ret = ceph_get_caps(ci, CEPH_CAP_FILE_RD, want, -1, &got, &pinned_page);
 	if (ret < 0)
 		return ret;
 
@@ -827,8 +835,12 @@ again:
 		     inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len,
 		     ceph_cap_string(got));
 
-		/* hmm, this isn't really async... */
-		ret = ceph_sync_read(iocb, to, &checkeof);
+		if (ci->i_inline_version == CEPH_INLINE_NONE) {
+			/* hmm, this isn't really async... */
+			ret = ceph_sync_read(iocb, to, &retry_op);
+		} else {
+			retry_op = READ_INLINE;
+		}
 	} else {
 		dout("aio_read %p %llx.%llx %llu~%u got cap refs on %s\n",
 		     inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len,
@@ -838,13 +850,55 @@ again:
 	}
 	dout("aio_read %p %llx.%llx dropping cap refs on %s = %d\n",
 	     inode, ceph_vinop(inode), ceph_cap_string(got), (int)ret);
+	if (pinned_page) {
+		page_cache_release(pinned_page);
+		pinned_page = NULL;
+	}
 	ceph_put_cap_refs(ci, got);
+	if (retry_op && ret >= 0) {
+		int statret;
+		struct page *page = NULL;
+		loff_t i_size;
+		if (retry_op == READ_INLINE) {
+			page = __page_cache_alloc(GFP_NOFS);
+			if (!page)
+				return -ENOMEM;
+		}
 
-	if (checkeof && ret >= 0) {
-		int statret = ceph_do_getattr(inode, CEPH_STAT_CAP_SIZE, false);
+		statret = __ceph_do_getattr(inode, page,
+					    CEPH_STAT_CAP_INLINE_DATA, !!page);
+		if (statret < 0) {
+			 __free_page(page);
+			if (statret == -ENODATA) {
+				BUG_ON(retry_op != READ_INLINE);
+				goto again;
+			}
+			return statret;
+		}
+
+		i_size = i_size_read(inode);
+		if (retry_op == READ_INLINE) {
+			/* does not support inline data > PAGE_SIZE */
+			if (i_size > PAGE_CACHE_SIZE) {
+				ret = -EIO;
+			} else if (iocb->ki_pos < i_size) {
+				loff_t end = min_t(loff_t, i_size,
+						   iocb->ki_pos + len);
+				if (statret < end)
+					zero_user_segment(page, statret, end);
+				ret = copy_page_to_iter(page,
+						iocb->ki_pos & ~PAGE_MASK,
+						end - iocb->ki_pos, to);
+				iocb->ki_pos += ret;
+			} else {
+				ret = 0;
+			}
+			__free_pages(page, 0);
+			return ret;
+		}
 
 		/* hit EOF or hole? */
-		if (statret == 0 && iocb->ki_pos < inode->i_size &&
+		if (retry_op == CHECK_EOF && iocb->ki_pos < i_size &&
 			ret < len) {
 			dout("sync_read hit hole, ppos %lld < size %lld"
 			     ", reading more\n", iocb->ki_pos,
@@ -852,7 +906,7 @@ again:
 
 			read += ret;
 			len -= ret;
-			checkeof = 0;
+			retry_op = 0;
 			goto again;
 		}
 	}
@@ -909,6 +963,12 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
 	if (err)
 		goto out;
 
+	if (ci->i_inline_version != CEPH_INLINE_NONE) {
+		err = ceph_uninline_data(file, NULL);
+		if (err < 0)
+			goto out;
+	}
+
 retry_snap:
 	if (ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_FULL)) {
 		err = -ENOSPC;
@@ -922,7 +982,8 @@ retry_snap:
 	else
 		want = CEPH_CAP_FILE_BUFFER;
 	got = 0;
-	err = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, &got, pos + count);
+	err = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, pos + count,
+			    &got, NULL);
 	if (err < 0)
 		goto out;
 
@@ -969,6 +1030,7 @@ retry_snap:
 	if (written >= 0) {
 		int dirty;
 		spin_lock(&ci->i_ceph_lock);
+		ci->i_inline_version = CEPH_INLINE_NONE;
 		dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR);
 		spin_unlock(&ci->i_ceph_lock);
 		if (dirty)
@@ -1111,7 +1173,7 @@ static int ceph_zero_partial_object(struct inode *inode,
 	req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
 					ceph_vino(inode),
 					offset, length,
-					1, op,
+					0, 1, op,
 					CEPH_OSD_FLAG_WRITE |
 					CEPH_OSD_FLAG_ONDISK,
 					NULL, 0, 0, false);
@@ -1214,6 +1276,12 @@ static long ceph_fallocate(struct file *file, int mode,
 		goto unlock;
 	}
 
+	if (ci->i_inline_version != CEPH_INLINE_NONE) {
+		ret = ceph_uninline_data(file, NULL);
+		if (ret < 0)
+			goto unlock;
+	}
+
 	size = i_size_read(inode);
 	if (!(mode & FALLOC_FL_KEEP_SIZE))
 		endoff = offset + length;
@@ -1223,7 +1291,7 @@ static long ceph_fallocate(struct file *file, int mode,
 	else
 		want = CEPH_CAP_FILE_BUFFER;
 
-	ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, &got, endoff);
+	ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, endoff, &got, NULL);
 	if (ret < 0)
 		goto unlock;
 
@@ -1240,6 +1308,7 @@ static long ceph_fallocate(struct file *file, int mode,
 
 	if (!ret) {
 		spin_lock(&ci->i_ceph_lock);
+		ci->i_inline_version = CEPH_INLINE_NONE;
 		dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR);
 		spin_unlock(&ci->i_ceph_lock);
 		if (dirty)
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index a5593d51d03..f61a74115be 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -387,8 +387,10 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
 	spin_lock_init(&ci->i_ceph_lock);
 
 	ci->i_version = 0;
+	ci->i_inline_version = 0;
 	ci->i_time_warp_seq = 0;
 	ci->i_ceph_flags = 0;
+	ci->i_ordered_count = 0;
 	atomic_set(&ci->i_release_count, 1);
 	atomic_set(&ci->i_complete_count, 0);
 	ci->i_symlink = NULL;
@@ -657,7 +659,7 @@ void ceph_fill_file_time(struct inode *inode, int issued,
  * Populate an inode based on info from mds.  May be called on new or
  * existing inodes.
  */
-static int fill_inode(struct inode *inode,
+static int fill_inode(struct inode *inode, struct page *locked_page,
 		      struct ceph_mds_reply_info_in *iinfo,
 		      struct ceph_mds_reply_dirfrag *dirinfo,
 		      struct ceph_mds_session *session,
@@ -675,6 +677,7 @@ static int fill_inode(struct inode *inode,
 	bool wake = false;
 	bool queue_trunc = false;
 	bool new_version = false;
+	bool fill_inline = false;
 
 	dout("fill_inode %p ino %llx.%llx v %llu had %llu\n",
 	     inode, ceph_vinop(inode), le64_to_cpu(info->version),
@@ -845,7 +848,8 @@ static int fill_inode(struct inode *inode,
 	    (issued & CEPH_CAP_FILE_EXCL) == 0 &&
 	    !__ceph_dir_is_complete(ci)) {
 		dout(" marking %p complete (empty)\n", inode);
-		__ceph_dir_set_complete(ci, atomic_read(&ci->i_release_count));
+		__ceph_dir_set_complete(ci, atomic_read(&ci->i_release_count),
+					ci->i_ordered_count);
 	}
 
 	/* were we issued a capability? */
@@ -873,8 +877,23 @@ static int fill_inode(struct inode *inode,
 			   ceph_vinop(inode));
 		__ceph_get_fmode(ci, cap_fmode);
 	}
+
+	if (iinfo->inline_version > 0 &&
+	    iinfo->inline_version >= ci->i_inline_version) {
+		int cache_caps = CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO;
+		ci->i_inline_version = iinfo->inline_version;
+		if (ci->i_inline_version != CEPH_INLINE_NONE &&
+		    (locked_page ||
+		     (le32_to_cpu(info->cap.caps) & cache_caps)))
+			fill_inline = true;
+	}
+
 	spin_unlock(&ci->i_ceph_lock);
 
+	if (fill_inline)
+		ceph_fill_inline_data(inode, locked_page,
+				      iinfo->inline_data, iinfo->inline_len);
+
 	if (wake)
 		wake_up_all(&ci->i_cap_wq);
 
@@ -1062,7 +1081,8 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
 		struct inode *dir = req->r_locked_dir;
 
 		if (dir) {
-			err = fill_inode(dir, &rinfo->diri, rinfo->dirfrag,
+			err = fill_inode(dir, NULL,
+					 &rinfo->diri, rinfo->dirfrag,
 					 session, req->r_request_started, -1,
 					 &req->r_caps_reservation);
 			if (err < 0)
@@ -1132,7 +1152,7 @@ retry_lookup:
 		}
 		req->r_target_inode = in;
 
-		err = fill_inode(in, &rinfo->targeti, NULL,
+		err = fill_inode(in, req->r_locked_page, &rinfo->targeti, NULL,
 				session, req->r_request_started,
 				(!req->r_aborted && rinfo->head->result == 0) ?
 				req->r_fmode : -1,
@@ -1204,8 +1224,8 @@ retry_lookup:
 			ceph_invalidate_dentry_lease(dn);
 
 			/* d_move screws up sibling dentries' offsets */
-			ceph_dir_clear_complete(dir);
-			ceph_dir_clear_complete(olddir);
+			ceph_dir_clear_ordered(dir);
+			ceph_dir_clear_ordered(olddir);
 
 			dout("dn %p gets new offset %lld\n", req->r_old_dentry,
 			     ceph_dentry(req->r_old_dentry)->offset);
@@ -1217,6 +1237,7 @@ retry_lookup:
 		if (!rinfo->head->is_target) {
 			dout("fill_trace null dentry\n");
 			if (dn->d_inode) {
+				ceph_dir_clear_ordered(dir);
 				dout("d_delete %p\n", dn);
 				d_delete(dn);
 			} else {
@@ -1233,7 +1254,7 @@ retry_lookup:
 
 		/* attach proper inode */
 		if (!dn->d_inode) {
-			ceph_dir_clear_complete(dir);
+			ceph_dir_clear_ordered(dir);
 			ihold(in);
 			dn = splice_dentry(dn, in, &have_lease);
 			if (IS_ERR(dn)) {
@@ -1263,7 +1284,7 @@ retry_lookup:
 		BUG_ON(!dir);
 		BUG_ON(ceph_snap(dir) != CEPH_SNAPDIR);
 		dout(" linking snapped dir %p to dn %p\n", in, dn);
-		ceph_dir_clear_complete(dir);
+		ceph_dir_clear_ordered(dir);
 		ihold(in);
 		dn = splice_dentry(dn, in, NULL);
 		if (IS_ERR(dn)) {
@@ -1300,7 +1321,7 @@ static int readdir_prepopulate_inodes_only(struct ceph_mds_request *req,
 			dout("new_inode badness got %d\n", err);
 			continue;
 		}
-		rc = fill_inode(in, &rinfo->dir_in[i], NULL, session,
+		rc = fill_inode(in, NULL, &rinfo->dir_in[i], NULL, session,
 				req->r_request_started, -1,
 				&req->r_caps_reservation);
 		if (rc < 0) {
@@ -1416,7 +1437,7 @@ retry_lookup:
 			}
 		}
 
-		if (fill_inode(in, &rinfo->dir_in[i], NULL, session,
+		if (fill_inode(in, NULL, &rinfo->dir_in[i], NULL, session,
 			       req->r_request_started, -1,
 			       &req->r_caps_reservation) < 0) {
 			pr_err("fill_inode badness on %p\n", in);
@@ -1899,7 +1920,8 @@ out_put:
  * Verify that we have a lease on the given mask.  If not,
  * do a getattr against an mds.
  */
-int ceph_do_getattr(struct inode *inode, int mask, bool force)
+int __ceph_do_getattr(struct inode *inode, struct page *locked_page,
+		      int mask, bool force)
 {
 	struct ceph_fs_client *fsc = ceph_sb_to_client(inode->i_sb);
 	struct ceph_mds_client *mdsc = fsc->mdsc;
@@ -1911,7 +1933,8 @@ int ceph_do_getattr(struct inode *inode, int mask, bool force)
 		return 0;
 	}
 
-	dout("do_getattr inode %p mask %s mode 0%o\n", inode, ceph_cap_string(mask), inode->i_mode);
+	dout("do_getattr inode %p mask %s mode 0%o\n",
+	     inode, ceph_cap_string(mask), inode->i_mode);
 	if (!force && ceph_caps_issued_mask(ceph_inode(inode), mask, 1))
 		return 0;
 
@@ -1922,7 +1945,19 @@ int ceph_do_getattr(struct inode *inode, int mask, bool force)
 	ihold(inode);
 	req->r_num_caps = 1;
 	req->r_args.getattr.mask = cpu_to_le32(mask);
+	req->r_locked_page = locked_page;
 	err = ceph_mdsc_do_request(mdsc, NULL, req);
+	if (locked_page && err == 0) {
+		u64 inline_version = req->r_reply_info.targeti.inline_version;
+		if (inline_version == 0) {
+			/* the reply is supposed to contain inline data */
+			err = -EINVAL;
+		} else if (inline_version == CEPH_INLINE_NONE) {
+			err = -ENODATA;
+		} else {
+			err = req->r_reply_info.targeti.inline_len;
+		}
+	}
 	ceph_mdsc_put_request(req);
 	dout("do_getattr result=%d\n", err);
 	return err;
diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c
index fbc39c47bac..c35c5c614e3 100644
--- a/fs/ceph/locks.c
+++ b/fs/ceph/locks.c
@@ -9,6 +9,8 @@
 #include <linux/ceph/pagelist.h>
 
 static u64 lock_secret;
+static int ceph_lock_wait_for_completion(struct ceph_mds_client *mdsc,
+                                         struct ceph_mds_request *req);
 
 static inline u64 secure_addr(void *addr)
 {
@@ -40,6 +42,9 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file,
 	u64 length = 0;
 	u64 owner;
 
+	if (operation != CEPH_MDS_OP_SETFILELOCK || cmd == CEPH_LOCK_UNLOCK)
+		wait = 0;
+
 	req = ceph_mdsc_create_request(mdsc, operation, USE_AUTH_MDS);
 	if (IS_ERR(req))
 		return PTR_ERR(req);
@@ -68,6 +73,9 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file,
 	req->r_args.filelock_change.length = cpu_to_le64(length);
 	req->r_args.filelock_change.wait = wait;
 
+	if (wait)
+		req->r_wait_for_completion = ceph_lock_wait_for_completion;
+
 	err = ceph_mdsc_do_request(mdsc, inode, req);
 
 	if (operation == CEPH_MDS_OP_GETFILELOCK) {
@@ -96,6 +104,52 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file,
 	return err;
 }
 
+static int ceph_lock_wait_for_completion(struct ceph_mds_client *mdsc,
+                                         struct ceph_mds_request *req)
+{
+	struct ceph_mds_request *intr_req;
+	struct inode *inode = req->r_inode;
+	int err, lock_type;
+
+	BUG_ON(req->r_op != CEPH_MDS_OP_SETFILELOCK);
+	if (req->r_args.filelock_change.rule == CEPH_LOCK_FCNTL)
+		lock_type = CEPH_LOCK_FCNTL_INTR;
+	else if (req->r_args.filelock_change.rule == CEPH_LOCK_FLOCK)
+		lock_type = CEPH_LOCK_FLOCK_INTR;
+	else
+		BUG_ON(1);
+	BUG_ON(req->r_args.filelock_change.type == CEPH_LOCK_UNLOCK);
+
+	err = wait_for_completion_interruptible(&req->r_completion);
+	if (!err)
+		return 0;
+
+	dout("ceph_lock_wait_for_completion: request %llu was interrupted\n",
+	     req->r_tid);
+
+	intr_req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SETFILELOCK,
+					    USE_AUTH_MDS);
+	if (IS_ERR(intr_req))
+		return PTR_ERR(intr_req);
+
+	intr_req->r_inode = inode;
+	ihold(inode);
+	intr_req->r_num_caps = 1;
+
+	intr_req->r_args.filelock_change = req->r_args.filelock_change;
+	intr_req->r_args.filelock_change.rule = lock_type;
+	intr_req->r_args.filelock_change.type = CEPH_LOCK_UNLOCK;
+
+	err = ceph_mdsc_do_request(mdsc, inode, intr_req);
+	ceph_mdsc_put_request(intr_req);
+
+	if (err && err != -ERESTARTSYS)
+		return err;
+
+	wait_for_completion(&req->r_completion);
+	return 0;
+}
+
 /**
  * Attempt to set an fcntl lock.
  * For now, this just goes away to the server. Later it may be more awesome.
@@ -143,11 +197,6 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
 				     err);
 			}
 		}
-
-	} else if (err == -ERESTARTSYS) {
-		dout("undoing lock\n");
-		ceph_lock_message(CEPH_LOCK_FCNTL, op, file,
-				  CEPH_LOCK_UNLOCK, 0, fl);
 	}
 	return err;
 }
@@ -186,11 +235,6 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl)
 					  file, CEPH_LOCK_UNLOCK, 0, fl);
 			dout("got %d on flock_lock_file_wait, undid lock", err);
 		}
-	} else if (err == -ERESTARTSYS) {
-		dout("undoing lock\n");
-		ceph_lock_message(CEPH_LOCK_FLOCK,
-				  CEPH_MDS_OP_SETFILELOCK,
-				  file, CEPH_LOCK_UNLOCK, 0, fl);
 	}
 	return err;
 }
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index a92d3f5c6c1..d2171f4a698 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -89,6 +89,16 @@ static int parse_reply_info_in(void **p, void *end,
 	ceph_decode_need(p, end, info->xattr_len, bad);
 	info->xattr_data = *p;
 	*p += info->xattr_len;
+
+	if (features & CEPH_FEATURE_MDS_INLINE_DATA) {
+		ceph_decode_64_safe(p, end, info->inline_version, bad);
+		ceph_decode_32_safe(p, end, info->inline_len, bad);
+		ceph_decode_need(p, end, info->inline_len, bad);
+		info->inline_data = *p;
+		*p += info->inline_len;
+	} else
+		info->inline_version = CEPH_INLINE_NONE;
+
 	return 0;
 bad:
 	return err;
@@ -524,8 +534,7 @@ void ceph_mdsc_release_request(struct kref *kref)
 	}
 	if (req->r_locked_dir)
 		ceph_put_cap_refs(ceph_inode(req->r_locked_dir), CEPH_CAP_PIN);
-	if (req->r_target_inode)
-		iput(req->r_target_inode);
+	iput(req->r_target_inode);
 	if (req->r_dentry)
 		dput(req->r_dentry);
 	if (req->r_old_dentry)
@@ -861,8 +870,11 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6
 	/*
 	 * Serialize client metadata into waiting buffer space, using
 	 * the format that userspace expects for map<string, string>
+	 *
+	 * ClientSession messages with metadata are v2
 	 */
-	msg->hdr.version = 2;  /* ClientSession messages with metadata are v2 */
+	msg->hdr.version = cpu_to_le16(2);
+	msg->hdr.compat_version = cpu_to_le16(1);
 
 	/* The write pointer, following the session_head structure */
 	p = msg->front.iov_base + sizeof(*h);
@@ -1066,8 +1078,7 @@ out:
 	session->s_cap_iterator = NULL;
 	spin_unlock(&session->s_cap_lock);
 
-	if (last_inode)
-		iput(last_inode);
+	iput(last_inode);
 	if (old_cap)
 		ceph_put_cap(session->s_mdsc, old_cap);
 
@@ -1874,7 +1885,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
 		goto out_free2;
 	}
 
-	msg->hdr.version = 2;
+	msg->hdr.version = cpu_to_le16(2);
 	msg->hdr.tid = cpu_to_le64(req->r_tid);
 
 	head = msg->front.iov_base;
@@ -2208,6 +2219,8 @@ int ceph_mdsc_do_request(struct ceph_mds_client *mdsc,
 			&req->r_completion, req->r_timeout);
 		if (err == 0)
 			err = -EIO;
+	} else if (req->r_wait_for_completion) {
+		err = req->r_wait_for_completion(mdsc, req);
 	} else {
 		err = wait_for_completion_killable(&req->r_completion);
 	}
@@ -3744,6 +3757,20 @@ static struct ceph_msg *mds_alloc_msg(struct ceph_connection *con,
 	return msg;
 }
 
+static int sign_message(struct ceph_connection *con, struct ceph_msg *msg)
+{
+       struct ceph_mds_session *s = con->private;
+       struct ceph_auth_handshake *auth = &s->s_auth;
+       return ceph_auth_sign_message(auth, msg);
+}
+
+static int check_message_signature(struct ceph_connection *con, struct ceph_msg *msg)
+{
+       struct ceph_mds_session *s = con->private;
+       struct ceph_auth_handshake *auth = &s->s_auth;
+       return ceph_auth_check_message_signature(auth, msg);
+}
+
 static const struct ceph_connection_operations mds_con_ops = {
 	.get = con_get,
 	.put = con_put,
@@ -3753,6 +3780,8 @@ static const struct ceph_connection_operations mds_con_ops = {
 	.invalidate_authorizer = invalidate_authorizer,
 	.peer_reset = peer_reset,
 	.alloc_msg = mds_alloc_msg,
+	.sign_message = sign_message,
+	.check_message_signature = check_message_signature,
 };
 
 /* eof */
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index 3288359353e..e2817d00f7d 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -41,6 +41,9 @@ struct ceph_mds_reply_info_in {
 	char *symlink;
 	u32 xattr_len;
 	char *xattr_data;
+	u64 inline_version;
+	u32 inline_len;
+	char *inline_data;
 };
 
 /*
@@ -166,6 +169,11 @@ struct ceph_mds_client;
  */
 typedef void (*ceph_mds_request_callback_t) (struct ceph_mds_client *mdsc,
 					     struct ceph_mds_request *req);
+/*
+ * wait for request completion callback
+ */
+typedef int (*ceph_mds_request_wait_callback_t) (struct ceph_mds_client *mdsc,
+						 struct ceph_mds_request *req);
 
 /*
  * an in-flight mds request
@@ -215,6 +223,7 @@ struct ceph_mds_request {
 	int r_request_release_offset;
 	struct ceph_msg  *r_reply;
 	struct ceph_mds_reply_info_parsed r_reply_info;
+	struct page *r_locked_page;
 	int r_err;
 	bool r_aborted;
 
@@ -239,6 +248,7 @@ struct ceph_mds_request {
 	struct completion r_completion;
 	struct completion r_safe_completion;
 	ceph_mds_request_callback_t r_callback;
+	ceph_mds_request_wait_callback_t r_wait_for_completion;
 	struct list_head  r_unsafe_item;  /* per-session unsafe list item */
 	bool		  r_got_unsafe, r_got_safe, r_got_result;
 
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index f01645a2775..ce35fbd4ba5 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -288,6 +288,9 @@ static int cmpu64_rev(const void *a, const void *b)
 	return 0;
 }
 
+
+static struct ceph_snap_context *empty_snapc;
+
 /*
  * build the snap context for a given realm.
  */
@@ -328,6 +331,12 @@ static int build_snap_context(struct ceph_snap_realm *realm)
 		return 0;
 	}
 
+	if (num == 0 && realm->seq == empty_snapc->seq) {
+		ceph_get_snap_context(empty_snapc);
+		snapc = empty_snapc;
+		goto done;
+	}
+
 	/* alloc new snap context */
 	err = -ENOMEM;
 	if (num > (SIZE_MAX - sizeof(*snapc)) / sizeof(u64))
@@ -365,8 +374,8 @@ static int build_snap_context(struct ceph_snap_realm *realm)
 	     realm->ino, realm, snapc, snapc->seq,
 	     (unsigned int) snapc->num_snaps);
 
-	if (realm->cached_context)
-		ceph_put_snap_context(realm->cached_context);
+done:
+	ceph_put_snap_context(realm->cached_context);
 	realm->cached_context = snapc;
 	return 0;
 
@@ -466,6 +475,9 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci)
 		   cap_snap.  lucky us. */
 		dout("queue_cap_snap %p already pending\n", inode);
 		kfree(capsnap);
+	} else if (ci->i_snap_realm->cached_context == empty_snapc) {
+		dout("queue_cap_snap %p empty snapc\n", inode);
+		kfree(capsnap);
 	} else if (dirty & (CEPH_CAP_AUTH_EXCL|CEPH_CAP_XATTR_EXCL|
 			    CEPH_CAP_FILE_EXCL|CEPH_CAP_FILE_WR)) {
 		struct ceph_snap_context *snapc = ci->i_head_snapc;
@@ -504,6 +516,8 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci)
 			capsnap->xattr_version = 0;
 		}
 
+		capsnap->inline_data = ci->i_inline_version != CEPH_INLINE_NONE;
+
 		/* dirty page count moved from _head to this cap_snap;
 		   all subsequent writes page dirties occur _after_ this
 		   snapshot. */
@@ -590,15 +604,13 @@ static void queue_realm_cap_snaps(struct ceph_snap_realm *realm)
 		if (!inode)
 			continue;
 		spin_unlock(&realm->inodes_with_caps_lock);
-		if (lastinode)
-			iput(lastinode);
+		iput(lastinode);
 		lastinode = inode;
 		ceph_queue_cap_snap(ci);
 		spin_lock(&realm->inodes_with_caps_lock);
 	}
 	spin_unlock(&realm->inodes_with_caps_lock);
-	if (lastinode)
-		iput(lastinode);
+	iput(lastinode);
 
 	list_for_each_entry(child, &realm->children, child_item) {
 		dout("queue_realm_cap_snaps %p %llx queue child %p %llx\n",
@@ -928,5 +940,16 @@ out:
 	return;
 }
 
+int __init ceph_snap_init(void)
+{
+	empty_snapc = ceph_create_snap_context(0, GFP_NOFS);
+	if (!empty_snapc)
+		return -ENOMEM;
+	empty_snapc->seq = 1;
+	return 0;
+}
 
-
+void ceph_snap_exit(void)
+{
+	ceph_put_snap_context(empty_snapc);
+}
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index f6e12377335..50f06cddc94 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -515,7 +515,8 @@ static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt,
 	struct ceph_fs_client *fsc;
 	const u64 supported_features =
 		CEPH_FEATURE_FLOCK |
-		CEPH_FEATURE_DIRLAYOUTHASH;
+		CEPH_FEATURE_DIRLAYOUTHASH |
+		CEPH_FEATURE_MDS_INLINE_DATA;
 	const u64 required_features = 0;
 	int page_count;
 	size_t size;
@@ -1017,9 +1018,6 @@ static struct file_system_type ceph_fs_type = {
 };
 MODULE_ALIAS_FS("ceph");
 
-#define _STRINGIFY(x) #x
-#define STRINGIFY(x) _STRINGIFY(x)
-
 static int __init init_ceph(void)
 {
 	int ret = init_caches();
@@ -1028,15 +1026,20 @@ static int __init init_ceph(void)
 
 	ceph_flock_init();
 	ceph_xattr_init();
+	ret = ceph_snap_init();
+	if (ret)
+		goto out_xattr;
 	ret = register_filesystem(&ceph_fs_type);
 	if (ret)
-		goto out_icache;
+		goto out_snap;
 
 	pr_info("loaded (mds proto %d)\n", CEPH_MDSC_PROTOCOL);
 
 	return 0;
 
-out_icache:
+out_snap:
+	ceph_snap_exit();
+out_xattr:
 	ceph_xattr_exit();
 	destroy_caches();
 out:
@@ -1047,6 +1050,7 @@ static void __exit exit_ceph(void)
 {
 	dout("exit_ceph\n");
 	unregister_filesystem(&ceph_fs_type);
+	ceph_snap_exit();
 	ceph_xattr_exit();
 	destroy_caches();
 }
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index b82f507979b..e1aa32d0759 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -161,6 +161,7 @@ struct ceph_cap_snap {
 	u64 time_warp_seq;
 	int writing;   /* a sync write is still in progress */
 	int dirty_pages;     /* dirty pages awaiting writeback */
+	bool inline_data;
 };
 
 static inline void ceph_put_cap_snap(struct ceph_cap_snap *capsnap)
@@ -253,9 +254,11 @@ struct ceph_inode_info {
 	spinlock_t i_ceph_lock;
 
 	u64 i_version;
+	u64 i_inline_version;
 	u32 i_time_warp_seq;
 
 	unsigned i_ceph_flags;
+	int i_ordered_count;
 	atomic_t i_release_count;
 	atomic_t i_complete_count;
 
@@ -434,14 +437,19 @@ static inline struct inode *ceph_find_inode(struct super_block *sb,
 /*
  * Ceph inode.
  */
-#define CEPH_I_NODELAY   4  /* do not delay cap release */
-#define CEPH_I_FLUSH     8  /* do not delay flush of dirty metadata */
-#define CEPH_I_NOFLUSH  16  /* do not flush dirty caps */
+#define CEPH_I_DIR_ORDERED	1  /* dentries in dir are ordered */
+#define CEPH_I_NODELAY		4  /* do not delay cap release */
+#define CEPH_I_FLUSH		8  /* do not delay flush of dirty metadata */
+#define CEPH_I_NOFLUSH		16 /* do not flush dirty caps */
 
 static inline void __ceph_dir_set_complete(struct ceph_inode_info *ci,
-					   int release_count)
+					   int release_count, int ordered_count)
 {
 	atomic_set(&ci->i_complete_count, release_count);
+	if (ci->i_ordered_count == ordered_count)
+		ci->i_ceph_flags |= CEPH_I_DIR_ORDERED;
+	else
+		ci->i_ceph_flags &= ~CEPH_I_DIR_ORDERED;
 }
 
 static inline void __ceph_dir_clear_complete(struct ceph_inode_info *ci)
@@ -455,16 +463,35 @@ static inline bool __ceph_dir_is_complete(struct ceph_inode_info *ci)
 		atomic_read(&ci->i_release_count);
 }
 
+static inline bool __ceph_dir_is_complete_ordered(struct ceph_inode_info *ci)
+{
+	return __ceph_dir_is_complete(ci) &&
+		(ci->i_ceph_flags & CEPH_I_DIR_ORDERED);
+}
+
 static inline void ceph_dir_clear_complete(struct inode *inode)
 {
 	__ceph_dir_clear_complete(ceph_inode(inode));
 }
 
-static inline bool ceph_dir_is_complete(struct inode *inode)
+static inline void ceph_dir_clear_ordered(struct inode *inode)
 {
-	return __ceph_dir_is_complete(ceph_inode(inode));
+	struct ceph_inode_info *ci = ceph_inode(inode);
+	spin_lock(&ci->i_ceph_lock);
+	ci->i_ordered_count++;
+	ci->i_ceph_flags &= ~CEPH_I_DIR_ORDERED;
+	spin_unlock(&ci->i_ceph_lock);
 }
 
+static inline bool ceph_dir_is_complete_ordered(struct inode *inode)
+{
+	struct ceph_inode_info *ci = ceph_inode(inode);
+	bool ret;
+	spin_lock(&ci->i_ceph_lock);
+	ret = __ceph_dir_is_complete_ordered(ci);
+	spin_unlock(&ci->i_ceph_lock);
+	return ret;
+}
 
 /* find a specific frag @f */
 extern struct ceph_inode_frag *__ceph_find_frag(struct ceph_inode_info *ci,
@@ -580,6 +607,7 @@ struct ceph_file_info {
 	char *last_name;       /* last entry in previous chunk */
 	struct dentry *dentry; /* next dentry (for dcache readdir) */
 	int dir_release_count;
+	int dir_ordered_count;
 
 	/* used for -o dirstat read() on directory thing */
 	char *dir_info;
@@ -673,6 +701,8 @@ extern void ceph_queue_cap_snap(struct ceph_inode_info *ci);
 extern int __ceph_finish_cap_snap(struct ceph_inode_info *ci,
 				  struct ceph_cap_snap *capsnap);
 extern void ceph_cleanup_empty_realms(struct ceph_mds_client *mdsc);
+extern int ceph_snap_init(void);
+extern void ceph_snap_exit(void);
 
 /*
  * a cap_snap is "pending" if it is still awaiting an in-progress
@@ -715,7 +745,12 @@ extern void ceph_queue_vmtruncate(struct inode *inode);
 extern void ceph_queue_invalidate(struct inode *inode);
 extern void ceph_queue_writeback(struct inode *inode);
 
-extern int ceph_do_getattr(struct inode *inode, int mask, bool force);
+extern int __ceph_do_getattr(struct inode *inode, struct page *locked_page,
+			     int mask, bool force);
+static inline int ceph_do_getattr(struct inode *inode, int mask, bool force)
+{
+	return __ceph_do_getattr(inode, NULL, mask, force);
+}
 extern int ceph_permission(struct inode *inode, int mask);
 extern int ceph_setattr(struct dentry *dentry, struct iattr *attr);
 extern int ceph_getattr(struct vfsmount *mnt, struct dentry *dentry,
@@ -830,7 +865,7 @@ extern int ceph_encode_dentry_release(void **p, struct dentry *dn,
 				      int mds, int drop, int unless);
 
 extern int ceph_get_caps(struct ceph_inode_info *ci, int need, int want,
-			 int *got, loff_t endoff);
+			 loff_t endoff, int *got, struct page **pinned_page);
 
 /* for counting open files by mode */
 static inline void __ceph_get_fmode(struct ceph_inode_info *ci, int mode)
@@ -852,7 +887,9 @@ extern int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
 			    struct file *file, unsigned flags, umode_t mode,
 			    int *opened);
 extern int ceph_release(struct inode *inode, struct file *filp);
-
+extern void ceph_fill_inline_data(struct inode *inode, struct page *locked_page,
+				  char *data, size_t len);
+int ceph_uninline_data(struct file *filp, struct page *locked_page);
 /* dir.c */
 extern const struct file_operations ceph_dir_fops;
 extern const struct inode_operations ceph_dir_iops;
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index 678b0d2bbbc..5a492caf34c 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -854,7 +854,7 @@ static int ceph_sync_setxattr(struct dentry *dentry, const char *name,
 	struct ceph_pagelist *pagelist = NULL;
 	int err;
 
-	if (value) {
+	if (size > 0) {
 		/* copy value into pagelist */
 		pagelist = kmalloc(sizeof(*pagelist), GFP_NOFS);
 		if (!pagelist)
@@ -864,7 +864,7 @@ static int ceph_sync_setxattr(struct dentry *dentry, const char *name,
 		err = ceph_pagelist_append(pagelist, value, size);
 		if (err)
 			goto out;
-	} else {
+	} else if (!value) {
 		flags |= CEPH_XATTR_REMOVE;
 	}
 
@@ -1001,6 +1001,9 @@ int ceph_setxattr(struct dentry *dentry, const char *name,
 	if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
 		return generic_setxattr(dentry, name, value, size, flags);
 
+	if (size == 0)
+		value = "";  /* empty EA, do not remove */
+
 	return __ceph_setxattr(dentry, name, value, size, flags);
 }
 
diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c
index 966ace8b243..28d0c7abba1 100644
--- a/fs/fuse/cuse.c
+++ b/fs/fuse/cuse.c
@@ -415,7 +415,7 @@ err_unlock:
 err_region:
 	unregister_chrdev_region(devt, 1);
 err:
-	fuse_conn_kill(fc);
+	fuse_abort_conn(fc);
 	goto out;
 }
 
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index ca887314aba..ba1107977f2 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -511,6 +511,35 @@ void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req)
 }
 EXPORT_SYMBOL_GPL(fuse_request_send);
 
+ssize_t fuse_simple_request(struct fuse_conn *fc, struct fuse_args *args)
+{
+	struct fuse_req *req;
+	ssize_t ret;
+
+	req = fuse_get_req(fc, 0);
+	if (IS_ERR(req))
+		return PTR_ERR(req);
+
+	req->in.h.opcode = args->in.h.opcode;
+	req->in.h.nodeid = args->in.h.nodeid;
+	req->in.numargs = args->in.numargs;
+	memcpy(req->in.args, args->in.args,
+	       args->in.numargs * sizeof(struct fuse_in_arg));
+	req->out.argvar = args->out.argvar;
+	req->out.numargs = args->out.numargs;
+	memcpy(req->out.args, args->out.args,
+	       args->out.numargs * sizeof(struct fuse_arg));
+	fuse_request_send(fc, req);
+	ret = req->out.h.error;
+	if (!ret && args->out.argvar) {
+		BUG_ON(args->out.numargs != 1);
+		ret = req->out.args[0].size;
+	}
+	fuse_put_request(fc, req);
+
+	return ret;
+}
+
 static void fuse_request_send_nowait_locked(struct fuse_conn *fc,
 					    struct fuse_req *req)
 {
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index df562cc8776..252b8a5de8b 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -145,22 +145,22 @@ static void fuse_invalidate_entry(struct dentry *entry)
 	fuse_invalidate_entry_cache(entry);
 }
 
-static void fuse_lookup_init(struct fuse_conn *fc, struct fuse_req *req,
+static void fuse_lookup_init(struct fuse_conn *fc, struct fuse_args *args,
 			     u64 nodeid, struct qstr *name,
 			     struct fuse_entry_out *outarg)
 {
 	memset(outarg, 0, sizeof(struct fuse_entry_out));
-	req->in.h.opcode = FUSE_LOOKUP;
-	req->in.h.nodeid = nodeid;
-	req->in.numargs = 1;
-	req->in.args[0].size = name->len + 1;
-	req->in.args[0].value = name->name;
-	req->out.numargs = 1;
+	args->in.h.opcode = FUSE_LOOKUP;
+	args->in.h.nodeid = nodeid;
+	args->in.numargs = 1;
+	args->in.args[0].size = name->len + 1;
+	args->in.args[0].value = name->name;
+	args->out.numargs = 1;
 	if (fc->minor < 9)
-		req->out.args[0].size = FUSE_COMPAT_ENTRY_OUT_SIZE;
+		args->out.args[0].size = FUSE_COMPAT_ENTRY_OUT_SIZE;
 	else
-		req->out.args[0].size = sizeof(struct fuse_entry_out);
-	req->out.args[0].value = outarg;
+		args->out.args[0].size = sizeof(struct fuse_entry_out);
+	args->out.args[0].value = outarg;
 }
 
 u64 fuse_get_attr_version(struct fuse_conn *fc)
@@ -200,9 +200,8 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
 		goto invalid;
 	else if (time_before64(fuse_dentry_time(entry), get_jiffies_64()) ||
 		 (flags & LOOKUP_REVAL)) {
-		int err;
 		struct fuse_entry_out outarg;
-		struct fuse_req *req;
+		FUSE_ARGS(args);
 		struct fuse_forget_link *forget;
 		u64 attr_version;
 
@@ -215,31 +214,23 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
 			goto out;
 
 		fc = get_fuse_conn(inode);
-		req = fuse_get_req_nopages(fc);
-		ret = PTR_ERR(req);
-		if (IS_ERR(req))
-			goto out;
 
 		forget = fuse_alloc_forget();
-		if (!forget) {
-			fuse_put_request(fc, req);
-			ret = -ENOMEM;
+		ret = -ENOMEM;
+		if (!forget)
 			goto out;
-		}
 
 		attr_version = fuse_get_attr_version(fc);
 
 		parent = dget_parent(entry);
-		fuse_lookup_init(fc, req, get_node_id(parent->d_inode),
+		fuse_lookup_init(fc, &args, get_node_id(parent->d_inode),
 				 &entry->d_name, &outarg);
-		fuse_request_send(fc, req);
+		ret = fuse_simple_request(fc, &args);
 		dput(parent);
-		err = req->out.h.error;
-		fuse_put_request(fc, req);
 		/* Zero nodeid is same as -ENOENT */
-		if (!err && !outarg.nodeid)
-			err = -ENOENT;
-		if (!err) {
+		if (!ret && !outarg.nodeid)
+			ret = -ENOENT;
+		if (!ret) {
 			fi = get_fuse_inode(inode);
 			if (outarg.nodeid != get_node_id(inode)) {
 				fuse_queue_forget(fc, forget, outarg.nodeid, 1);
@@ -250,7 +241,9 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
 			spin_unlock(&fc->lock);
 		}
 		kfree(forget);
-		if (err || (outarg.attr.mode ^ inode->i_mode) & S_IFMT)
+		if (ret == -ENOMEM)
+			goto out;
+		if (ret || (outarg.attr.mode ^ inode->i_mode) & S_IFMT)
 			goto invalid;
 
 		fuse_change_attributes(inode, &outarg.attr,
@@ -296,7 +289,7 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name,
 		     struct fuse_entry_out *outarg, struct inode **inode)
 {
 	struct fuse_conn *fc = get_fuse_conn_super(sb);
-	struct fuse_req *req;
+	FUSE_ARGS(args);
 	struct fuse_forget_link *forget;
 	u64 attr_version;
 	int err;
@@ -306,24 +299,16 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name,
 	if (name->len > FUSE_NAME_MAX)
 		goto out;
 
-	req = fuse_get_req_nopages(fc);
-	err = PTR_ERR(req);
-	if (IS_ERR(req))
-		goto out;
 
 	forget = fuse_alloc_forget();
 	err = -ENOMEM;
-	if (!forget) {
-		fuse_put_request(fc, req);
+	if (!forget)
 		goto out;
-	}
 
 	attr_version = fuse_get_attr_version(fc);
 
-	fuse_lookup_init(fc, req, nodeid, name, outarg);
-	fuse_request_send(fc, req);
-	err = req->out.h.error;
-	fuse_put_request(fc, req);
+	fuse_lookup_init(fc, &args, nodeid, name, outarg);
+	err = fuse_simple_request(fc, &args);
 	/* Zero nodeid is same as -ENOENT, but with valid timeout */
 	if (err || !outarg->nodeid)
 		goto out_put_forget;
@@ -405,7 +390,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
 	int err;
 	struct inode *inode;
 	struct fuse_conn *fc = get_fuse_conn(dir);
-	struct fuse_req *req;
+	FUSE_ARGS(args);
 	struct fuse_forget_link *forget;
 	struct fuse_create_in inarg;
 	struct fuse_open_out outopen;
@@ -420,15 +405,10 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
 	if (!forget)
 		goto out_err;
 
-	req = fuse_get_req_nopages(fc);
-	err = PTR_ERR(req);
-	if (IS_ERR(req))
-		goto out_put_forget_req;
-
 	err = -ENOMEM;
 	ff = fuse_file_alloc(fc);
 	if (!ff)
-		goto out_put_request;
+		goto out_put_forget_req;
 
 	if (!fc->dont_mask)
 		mode &= ~current_umask();
@@ -439,24 +419,23 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
 	inarg.flags = flags;
 	inarg.mode = mode;
 	inarg.umask = current_umask();
-	req->in.h.opcode = FUSE_CREATE;
-	req->in.h.nodeid = get_node_id(dir);
-	req->in.numargs = 2;
-	req->in.args[0].size = fc->minor < 12 ? sizeof(struct fuse_open_in) :
+	args.in.h.opcode = FUSE_CREATE;
+	args.in.h.nodeid = get_node_id(dir);
+	args.in.numargs = 2;
+	args.in.args[0].size = fc->minor < 12 ? sizeof(struct fuse_open_in) :
 						sizeof(inarg);
-	req->in.args[0].value = &inarg;
-	req->in.args[1].size = entry->d_name.len + 1;
-	req->in.args[1].value = entry->d_name.name;
-	req->out.numargs = 2;
+	args.in.args[0].value = &inarg;
+	args.in.args[1].size = entry->d_name.len + 1;
+	args.in.args[1].value = entry->d_name.name;
+	args.out.numargs = 2;
 	if (fc->minor < 9)
-		req->out.args[0].size = FUSE_COMPAT_ENTRY_OUT_SIZE;
+		args.out.args[0].size = FUSE_COMPAT_ENTRY_OUT_SIZE;
 	else
-		req->out.args[0].size = sizeof(outentry);
-	req->out.args[0].value = &outentry;
-	req->out.args[1].size = sizeof(outopen);
-	req->out.args[1].value = &outopen;
-	fuse_request_send(fc, req);
-	err = req->out.h.error;
+		args.out.args[0].size = sizeof(outentry);
+	args.out.args[0].value = &outentry;
+	args.out.args[1].size = sizeof(outopen);
+	args.out.args[1].value = &outopen;
+	err = fuse_simple_request(fc, &args);
 	if (err)
 		goto out_free_ff;
 
@@ -464,7 +443,6 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
 	if (!S_ISREG(outentry.attr.mode) || invalid_nodeid(outentry.nodeid))
 		goto out_free_ff;
 
-	fuse_put_request(fc, req);
 	ff->fh = outopen.fh;
 	ff->nodeid = outentry.nodeid;
 	ff->open_flags = outopen.open_flags;
@@ -492,8 +470,6 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
 
 out_free_ff:
 	fuse_file_free(ff);
-out_put_request:
-	fuse_put_request(fc, req);
 out_put_forget_req:
 	kfree(forget);
 out_err:
@@ -547,7 +523,7 @@ no_open:
 /*
  * Code shared between mknod, mkdir, symlink and link
  */
-static int create_new_entry(struct fuse_conn *fc, struct fuse_req *req,
+static int create_new_entry(struct fuse_conn *fc, struct fuse_args *args,
 			    struct inode *dir, struct dentry *entry,
 			    umode_t mode)
 {
@@ -557,22 +533,18 @@ static int create_new_entry(struct fuse_conn *fc, struct fuse_req *req,
 	struct fuse_forget_link *forget;
 
 	forget = fuse_alloc_forget();
-	if (!forget) {
-		fuse_put_request(fc, req);
+	if (!forget)
 		return -ENOMEM;
-	}
 
 	memset(&outarg, 0, sizeof(outarg));
-	req->in.h.nodeid = get_node_id(dir);
-	req->out.numargs = 1;
+	args->in.h.nodeid = get_node_id(dir);
+	args->out.numargs = 1;
 	if (fc->minor < 9)
-		req->out.args[0].size = FUSE_COMPAT_ENTRY_OUT_SIZE;
+		args->out.args[0].size = FUSE_COMPAT_ENTRY_OUT_SIZE;
 	else
-		req->out.args[0].size = sizeof(outarg);
-	req->out.args[0].value = &outarg;
-	fuse_request_send(fc, req);
-	err = req->out.h.error;
-	fuse_put_request(fc, req);
+		args->out.args[0].size = sizeof(outarg);
+	args->out.args[0].value = &outarg;
+	err = fuse_simple_request(fc, args);
 	if (err)
 		goto out_put_forget_req;
 
@@ -609,9 +581,7 @@ static int fuse_mknod(struct inode *dir, struct dentry *entry, umode_t mode,
 {
 	struct fuse_mknod_in inarg;
 	struct fuse_conn *fc = get_fuse_conn(dir);
-	struct fuse_req *req = fuse_get_req_nopages(fc);
-	if (IS_ERR(req))
-		return PTR_ERR(req);
+	FUSE_ARGS(args);
 
 	if (!fc->dont_mask)
 		mode &= ~current_umask();
@@ -620,14 +590,14 @@ static int fuse_mknod(struct inode *dir, struct dentry *entry, umode_t mode,
 	inarg.mode = mode;
 	inarg.rdev = new_encode_dev(rdev);
 	inarg.umask = current_umask();
-	req->in.h.opcode = FUSE_MKNOD;
-	req->in.numargs = 2;
-	req->in.args[0].size = fc->minor < 12 ? FUSE_COMPAT_MKNOD_IN_SIZE :
+	args.in.h.opcode = FUSE_MKNOD;
+	args.in.numargs = 2;
+	args.in.args[0].size = fc->minor < 12 ? FUSE_COMPAT_MKNOD_IN_SIZE :
 						sizeof(inarg);
-	req->in.args[0].value = &inarg;
-	req->in.args[1].size = entry->d_name.len + 1;
-	req->in.args[1].value = entry->d_name.name;
-	return create_new_entry(fc, req, dir, entry, mode);
+	args.in.args[0].value = &inarg;
+	args.in.args[1].size = entry->d_name.len + 1;
+	args.in.args[1].value = entry->d_name.name;
+	return create_new_entry(fc, &args, dir, entry, mode);
 }
 
 static int fuse_create(struct inode *dir, struct dentry *entry, umode_t mode,
@@ -640,9 +610,7 @@ static int fuse_mkdir(struct inode *dir, struct dentry *entry, umode_t mode)
 {
 	struct fuse_mkdir_in inarg;
 	struct fuse_conn *fc = get_fuse_conn(dir);
-	struct fuse_req *req = fuse_get_req_nopages(fc);
-	if (IS_ERR(req))
-		return PTR_ERR(req);
+	FUSE_ARGS(args);
 
 	if (!fc->dont_mask)
 		mode &= ~current_umask();
@@ -650,13 +618,13 @@ static int fuse_mkdir(struct inode *dir, struct dentry *entry, umode_t mode)
 	memset(&inarg, 0, sizeof(inarg));
 	inarg.mode = mode;
 	inarg.umask = current_umask();
-	req->in.h.opcode = FUSE_MKDIR;
-	req->in.numargs = 2;
-	req->in.args[0].size = sizeof(inarg);
-	req->in.args[0].value = &inarg;
-	req->in.args[1].size = entry->d_name.len + 1;
-	req->in.args[1].value = entry->d_name.name;
-	return create_new_entry(fc, req, dir, entry, S_IFDIR);
+	args.in.h.opcode = FUSE_MKDIR;
+	args.in.numargs = 2;
+	args.in.args[0].size = sizeof(inarg);
+	args.in.args[0].value = &inarg;
+	args.in.args[1].size = entry->d_name.len + 1;
+	args.in.args[1].value = entry->d_name.name;
+	return create_new_entry(fc, &args, dir, entry, S_IFDIR);
 }
 
 static int fuse_symlink(struct inode *dir, struct dentry *entry,
@@ -664,17 +632,15 @@ static int fuse_symlink(struct inode *dir, struct dentry *entry,
 {
 	struct fuse_conn *fc = get_fuse_conn(dir);
 	unsigned len = strlen(link) + 1;
-	struct fuse_req *req = fuse_get_req_nopages(fc);
-	if (IS_ERR(req))
-		return PTR_ERR(req);
+	FUSE_ARGS(args);
 
-	req->in.h.opcode = FUSE_SYMLINK;
-	req->in.numargs = 2;
-	req->in.args[0].size = entry->d_name.len + 1;
-	req->in.args[0].value = entry->d_name.name;
-	req->in.args[1].size = len;
-	req->in.args[1].value = link;
-	return create_new_entry(fc, req, dir, entry, S_IFLNK);
+	args.in.h.opcode = FUSE_SYMLINK;
+	args.in.numargs = 2;
+	args.in.args[0].size = entry->d_name.len + 1;
+	args.in.args[0].value = entry->d_name.name;
+	args.in.args[1].size = len;
+	args.in.args[1].value = link;
+	return create_new_entry(fc, &args, dir, entry, S_IFLNK);
 }
 
 static inline void fuse_update_ctime(struct inode *inode)
@@ -689,18 +655,14 @@ static int fuse_unlink(struct inode *dir, struct dentry *entry)
 {
 	int err;
 	struct fuse_conn *fc = get_fuse_conn(dir);
-	struct fuse_req *req = fuse_get_req_nopages(fc);
-	if (IS_ERR(req))
-		return PTR_ERR(req);
-
-	req->in.h.opcode = FUSE_UNLINK;
-	req->in.h.nodeid = get_node_id(dir);
-	req->in.numargs = 1;
-	req->in.args[0].size = entry->d_name.len + 1;
-	req->in.args[0].value = entry->d_name.name;
-	fuse_request_send(fc, req);
-	err = req->out.h.error;
-	fuse_put_request(fc, req);
+	FUSE_ARGS(args);
+
+	args.in.h.opcode = FUSE_UNLINK;
+	args.in.h.nodeid = get_node_id(dir);
+	args.in.numargs = 1;
+	args.in.args[0].size = entry->d_name.len + 1;
+	args.in.args[0].value = entry->d_name.name;
+	err = fuse_simple_request(fc, &args);
 	if (!err) {
 		struct inode *inode = entry->d_inode;
 		struct fuse_inode *fi = get_fuse_inode(inode);
@@ -729,18 +691,14 @@ static int fuse_rmdir(struct inode *dir, struct dentry *entry)
 {
 	int err;
 	struct fuse_conn *fc = get_fuse_conn(dir);
-	struct fuse_req *req = fuse_get_req_nopages(fc);
-	if (IS_ERR(req))
-		return PTR_ERR(req);
-
-	req->in.h.opcode = FUSE_RMDIR;
-	req->in.h.nodeid = get_node_id(dir);
-	req->in.numargs = 1;
-	req->in.args[0].size = entry->d_name.len + 1;
-	req->in.args[0].value = entry->d_name.name;
-	fuse_request_send(fc, req);
-	err = req->out.h.error;
-	fuse_put_request(fc, req);
+	FUSE_ARGS(args);
+
+	args.in.h.opcode = FUSE_RMDIR;
+	args.in.h.nodeid = get_node_id(dir);
+	args.in.numargs = 1;
+	args.in.args[0].size = entry->d_name.len + 1;
+	args.in.args[0].value = entry->d_name.name;
+	err = fuse_simple_request(fc, &args);
 	if (!err) {
 		clear_nlink(entry->d_inode);
 		fuse_invalidate_attr(dir);
@@ -757,27 +715,21 @@ static int fuse_rename_common(struct inode *olddir, struct dentry *oldent,
 	int err;
 	struct fuse_rename2_in inarg;
 	struct fuse_conn *fc = get_fuse_conn(olddir);
-	struct fuse_req *req;
-
-	req = fuse_get_req_nopages(fc);
-	if (IS_ERR(req))
-		return PTR_ERR(req);
+	FUSE_ARGS(args);
 
 	memset(&inarg, 0, argsize);
 	inarg.newdir = get_node_id(newdir);
 	inarg.flags = flags;
-	req->in.h.opcode = opcode;
-	req->in.h.nodeid = get_node_id(olddir);
-	req->in.numargs = 3;
-	req->in.args[0].size = argsize;
-	req->in.args[0].value = &inarg;
-	req->in.args[1].size = oldent->d_name.len + 1;
-	req->in.args[1].value = oldent->d_name.name;
-	req->in.args[2].size = newent->d_name.len + 1;
-	req->in.args[2].value = newent->d_name.name;
-	fuse_request_send(fc, req);
-	err = req->out.h.error;
-	fuse_put_request(fc, req);
+	args.in.h.opcode = opcode;
+	args.in.h.nodeid = get_node_id(olddir);
+	args.in.numargs = 3;
+	args.in.args[0].size = argsize;
+	args.in.args[0].value = &inarg;
+	args.in.args[1].size = oldent->d_name.len + 1;
+	args.in.args[1].value = oldent->d_name.name;
+	args.in.args[2].size = newent->d_name.len + 1;
+	args.in.args[2].value = newent->d_name.name;
+	err = fuse_simple_request(fc, &args);
 	if (!err) {
 		/* ctime changes */
 		fuse_invalidate_attr(oldent->d_inode);
@@ -849,19 +801,17 @@ static int fuse_link(struct dentry *entry, struct inode *newdir,
 	struct fuse_link_in inarg;
 	struct inode *inode = entry->d_inode;
 	struct fuse_conn *fc = get_fuse_conn(inode);
-	struct fuse_req *req = fuse_get_req_nopages(fc);
-	if (IS_ERR(req))
-		return PTR_ERR(req);
+	FUSE_ARGS(args);
 
 	memset(&inarg, 0, sizeof(inarg));
 	inarg.oldnodeid = get_node_id(inode);
-	req->in.h.opcode = FUSE_LINK;
-	req->in.numargs = 2;
-	req->in.args[0].size = sizeof(inarg);
-	req->in.args[0].value = &inarg;
-	req->in.args[1].size = newent->d_name.len + 1;
-	req->in.args[1].value = newent->d_name.name;
-	err = create_new_entry(fc, req, newdir, newent, inode->i_mode);
+	args.in.h.opcode = FUSE_LINK;
+	args.in.numargs = 2;
+	args.in.args[0].size = sizeof(inarg);
+	args.in.args[0].value = &inarg;
+	args.in.args[1].size = newent->d_name.len + 1;
+	args.in.args[1].value = newent->d_name.name;
+	err = create_new_entry(fc, &args, newdir, newent, inode->i_mode);
 	/* Contrary to "normal" filesystems it can happen that link
 	   makes two "logical" inodes point to the same "physical"
 	   inode.  We invalidate the attributes of the old one, so it
@@ -929,13 +879,9 @@ static int fuse_do_getattr(struct inode *inode, struct kstat *stat,
 	struct fuse_getattr_in inarg;
 	struct fuse_attr_out outarg;
 	struct fuse_conn *fc = get_fuse_conn(inode);
-	struct fuse_req *req;
+	FUSE_ARGS(args);
 	u64 attr_version;
 
-	req = fuse_get_req_nopages(fc);
-	if (IS_ERR(req))
-		return PTR_ERR(req);
-
 	attr_version = fuse_get_attr_version(fc);
 
 	memset(&inarg, 0, sizeof(inarg));
@@ -947,20 +893,18 @@ static int fuse_do_getattr(struct inode *inode, struct kstat *stat,
 		inarg.getattr_flags |= FUSE_GETATTR_FH;
 		inarg.fh = ff->fh;
 	}
-	req->in.h.opcode = FUSE_GETATTR;
-	req->in.h.nodeid = get_node_id(inode);
-	req->in.numargs = 1;
-	req->in.args[0].size = sizeof(inarg);
-	req->in.args[0].value = &inarg;
-	req->out.numargs = 1;
+	args.in.h.opcode = FUSE_GETATTR;
+	args.in.h.nodeid = get_node_id(inode);
+	args.in.numargs = 1;
+	args.in.args[0].size = sizeof(inarg);
+	args.in.args[0].value = &inarg;
+	args.out.numargs = 1;
 	if (fc->minor < 9)
-		req->out.args[0].size = FUSE_COMPAT_ATTR_OUT_SIZE;
+		args.out.args[0].size = FUSE_COMPAT_ATTR_OUT_SIZE;
 	else
-		req->out.args[0].size = sizeof(outarg);
-	req->out.args[0].value = &outarg;
-	fuse_request_send(fc, req);
-	err = req->out.h.error;
-	fuse_put_request(fc, req);
+		args.out.args[0].size = sizeof(outarg);
+	args.out.args[0].value = &outarg;
+	err = fuse_simple_request(fc, &args);
 	if (!err) {
 		if ((inode->i_mode ^ outarg.attr.mode) & S_IFMT) {
 			make_bad_inode(inode);
@@ -1102,7 +1046,7 @@ int fuse_allow_current_process(struct fuse_conn *fc)
 static int fuse_access(struct inode *inode, int mask)
 {
 	struct fuse_conn *fc = get_fuse_conn(inode);
-	struct fuse_req *req;
+	FUSE_ARGS(args);
 	struct fuse_access_in inarg;
 	int err;
 
@@ -1111,20 +1055,14 @@ static int fuse_access(struct inode *inode, int mask)
 	if (fc->no_access)
 		return 0;
 
-	req = fuse_get_req_nopages(fc);
-	if (IS_ERR(req))
-		return PTR_ERR(req);
-
 	memset(&inarg, 0, sizeof(inarg));
 	inarg.mask = mask & (MAY_READ | MAY_WRITE | MAY_EXEC);
-	req->in.h.opcode = FUSE_ACCESS;
-	req->in.h.nodeid = get_node_id(inode);
-	req->in.numargs = 1;
-	req->in.args[0].size = sizeof(inarg);
-	req->in.args[0].value = &inarg;
-	fuse_request_send(fc, req);
-	err = req->out.h.error;
-	fuse_put_request(fc, req);
+	args.in.h.opcode = FUSE_ACCESS;
+	args.in.h.nodeid = get_node_id(inode);
+	args.in.numargs = 1;
+	args.in.args[0].size = sizeof(inarg);
+	args.in.args[0].value = &inarg;
+	err = fuse_simple_request(fc, &args);
 	if (err == -ENOSYS) {
 		fc->no_access = 1;
 		err = 0;
@@ -1445,31 +1383,27 @@ static char *read_link(struct dentry *dentry)
 {
 	struct inode *inode = dentry->d_inode;
 	struct fuse_conn *fc = get_fuse_conn(inode);
-	struct fuse_req *req = fuse_get_req_nopages(fc);
+	FUSE_ARGS(args);
 	char *link;
-
-	if (IS_ERR(req))
-		return ERR_CAST(req);
+	ssize_t ret;
 
 	link = (char *) __get_free_page(GFP_KERNEL);
-	if (!link) {
-		link = ERR_PTR(-ENOMEM);
-		goto out;
-	}
-	req->in.h.opcode = FUSE_READLINK;
-	req->in.h.nodeid = get_node_id(inode);
-	req->out.argvar = 1;
-	req->out.numargs = 1;
-	req->out.args[0].size = PAGE_SIZE - 1;
-	req->out.args[0].value = link;
-	fuse_request_send(fc, req);
-	if (req->out.h.error) {
+	if (!link)
+		return ERR_PTR(-ENOMEM);
+
+	args.in.h.opcode = FUSE_READLINK;
+	args.in.h.nodeid = get_node_id(inode);
+	args.out.argvar = 1;
+	args.out.numargs = 1;
+	args.out.args[0].size = PAGE_SIZE - 1;
+	args.out.args[0].value = link;
+	ret = fuse_simple_request(fc, &args);
+	if (ret < 0) {
 		free_page((unsigned long) link);
-		link = ERR_PTR(req->out.h.error);
-	} else
-		link[req->out.args[0].size] = '\0';
- out:
-	fuse_put_request(fc, req);
+		link = ERR_PTR(ret);
+	} else {
+		link[ret] = '\0';
+	}
 	fuse_invalidate_atime(inode);
 	return link;
 }
@@ -1629,22 +1563,22 @@ void fuse_release_nowrite(struct inode *inode)
 	spin_unlock(&fc->lock);
 }
 
-static void fuse_setattr_fill(struct fuse_conn *fc, struct fuse_req *req,
+static void fuse_setattr_fill(struct fuse_conn *fc, struct fuse_args *args,
 			      struct inode *inode,
 			      struct fuse_setattr_in *inarg_p,
 			      struct fuse_attr_out *outarg_p)
 {
-	req->in.h.opcode = FUSE_SETATTR;
-	req->in.h.nodeid = get_node_id(inode);
-	req->in.numargs = 1;
-	req->in.args[0].size = sizeof(*inarg_p);
-	req->in.args[0].value = inarg_p;
-	req->out.numargs = 1;
+	args->in.h.opcode = FUSE_SETATTR;
+	args->in.h.nodeid = get_node_id(inode);
+	args->in.numargs = 1;
+	args->in.args[0].size = sizeof(*inarg_p);
+	args->in.args[0].value = inarg_p;
+	args->out.numargs = 1;
 	if (fc->minor < 9)
-		req->out.args[0].size = FUSE_COMPAT_ATTR_OUT_SIZE;
+		args->out.args[0].size = FUSE_COMPAT_ATTR_OUT_SIZE;
 	else
-		req->out.args[0].size = sizeof(*outarg_p);
-	req->out.args[0].value = outarg_p;
+		args->out.args[0].size = sizeof(*outarg_p);
+	args->out.args[0].value = outarg_p;
 }
 
 /*
@@ -1653,14 +1587,9 @@ static void fuse_setattr_fill(struct fuse_conn *fc, struct fuse_req *req,
 int fuse_flush_times(struct inode *inode, struct fuse_file *ff)
 {
 	struct fuse_conn *fc = get_fuse_conn(inode);
-	struct fuse_req *req;
+	FUSE_ARGS(args);
 	struct fuse_setattr_in inarg;
 	struct fuse_attr_out outarg;
-	int err;
-
-	req = fuse_get_req_nopages(fc);
-	if (IS_ERR(req))
-		return PTR_ERR(req);
 
 	memset(&inarg, 0, sizeof(inarg));
 	memset(&outarg, 0, sizeof(outarg));
@@ -1677,12 +1606,9 @@ int fuse_flush_times(struct inode *inode, struct fuse_file *ff)
 		inarg.valid |= FATTR_FH;
 		inarg.fh = ff->fh;
 	}
-	fuse_setattr_fill(fc, req, inode, &inarg, &outarg);
-	fuse_request_send(fc, req);
-	err = req->out.h.error;
-	fuse_put_request(fc, req);
+	fuse_setattr_fill(fc, &args, inode, &inarg, &outarg);
 
-	return err;
+	return fuse_simple_request(fc, &args);
 }
 
 /*
@@ -1698,7 +1624,7 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr,
 {
 	struct fuse_conn *fc = get_fuse_conn(inode);
 	struct fuse_inode *fi = get_fuse_inode(inode);
-	struct fuse_req *req;
+	FUSE_ARGS(args);
 	struct fuse_setattr_in inarg;
 	struct fuse_attr_out outarg;
 	bool is_truncate = false;
@@ -1723,10 +1649,6 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr,
 	if (attr->ia_valid & ATTR_SIZE)
 		is_truncate = true;
 
-	req = fuse_get_req_nopages(fc);
-	if (IS_ERR(req))
-		return PTR_ERR(req);
-
 	if (is_truncate) {
 		fuse_set_nowrite(inode);
 		set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
@@ -1747,10 +1669,8 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr,
 		inarg.valid |= FATTR_LOCKOWNER;
 		inarg.lock_owner = fuse_lock_owner_id(fc, current->files);
 	}
-	fuse_setattr_fill(fc, req, inode, &inarg, &outarg);
-	fuse_request_send(fc, req);
-	err = req->out.h.error;
-	fuse_put_request(fc, req);
+	fuse_setattr_fill(fc, &args, inode, &inarg, &outarg);
+	err = fuse_simple_request(fc, &args);
 	if (err) {
 		if (err == -EINTR)
 			fuse_invalidate_attr(inode);
@@ -1837,32 +1757,26 @@ static int fuse_setxattr(struct dentry *entry, const char *name,
 {
 	struct inode *inode = entry->d_inode;
 	struct fuse_conn *fc = get_fuse_conn(inode);
-	struct fuse_req *req;
+	FUSE_ARGS(args);
 	struct fuse_setxattr_in inarg;
 	int err;
 
 	if (fc->no_setxattr)
 		return -EOPNOTSUPP;
 
-	req = fuse_get_req_nopages(fc);
-	if (IS_ERR(req))
-		return PTR_ERR(req);
-
 	memset(&inarg, 0, sizeof(inarg));
 	inarg.size = size;
 	inarg.flags = flags;
-	req->in.h.opcode = FUSE_SETXATTR;
-	req->in.h.nodeid = get_node_id(inode);
-	req->in.numargs = 3;
-	req->in.args[0].size = sizeof(inarg);
-	req->in.args[0].value = &inarg;
-	req->in.args[1].size = strlen(name) + 1;
-	req->in.args[1].value = name;
-	req->in.args[2].size = size;
-	req->in.args[2].value = value;
-	fuse_request_send(fc, req);
-	err = req->out.h.error;
-	fuse_put_request(fc, req);
+	args.in.h.opcode = FUSE_SETXATTR;
+	args.in.h.nodeid = get_node_id(inode);
+	args.in.numargs = 3;
+	args.in.args[0].size = sizeof(inarg);
+	args.in.args[0].value = &inarg;
+	args.in.args[1].size = strlen(name) + 1;
+	args.in.args[1].value = name;
+	args.in.args[2].size = size;
+	args.in.args[2].value = value;
+	err = fuse_simple_request(fc, &args);
 	if (err == -ENOSYS) {
 		fc->no_setxattr = 1;
 		err = -EOPNOTSUPP;
@@ -1879,7 +1793,7 @@ static ssize_t fuse_getxattr(struct dentry *entry, const char *name,
 {
 	struct inode *inode = entry->d_inode;
 	struct fuse_conn *fc = get_fuse_conn(inode);
-	struct fuse_req *req;
+	FUSE_ARGS(args);
 	struct fuse_getxattr_in inarg;
 	struct fuse_getxattr_out outarg;
 	ssize_t ret;
@@ -1887,40 +1801,32 @@ static ssize_t fuse_getxattr(struct dentry *entry, const char *name,
 	if (fc->no_getxattr)
 		return -EOPNOTSUPP;
 
-	req = fuse_get_req_nopages(fc);
-	if (IS_ERR(req))
-		return PTR_ERR(req);
-
 	memset(&inarg, 0, sizeof(inarg));
 	inarg.size = size;
-	req->in.h.opcode = FUSE_GETXATTR;
-	req->in.h.nodeid = get_node_id(inode);
-	req->in.numargs = 2;
-	req->in.args[0].size = sizeof(inarg);
-	req->in.args[0].value = &inarg;
-	req->in.args[1].size = strlen(name) + 1;
-	req->in.args[1].value = name;
+	args.in.h.opcode = FUSE_GETXATTR;
+	args.in.h.nodeid = get_node_id(inode);
+	args.in.numargs = 2;
+	args.in.args[0].size = sizeof(inarg);
+	args.in.args[0].value = &inarg;
+	args.in.args[1].size = strlen(name) + 1;
+	args.in.args[1].value = name;
 	/* This is really two different operations rolled into one */
-	req->out.numargs = 1;
+	args.out.numargs = 1;
 	if (size) {
-		req->out.argvar = 1;
-		req->out.args[0].size = size;
-		req->out.args[0].value = value;
+		args.out.argvar = 1;
+		args.out.args[0].size = size;
+		args.out.args[0].value = value;
 	} else {
-		req->out.args[0].size = sizeof(outarg);
-		req->out.args[0].value = &outarg;
+		args.out.args[0].size = sizeof(outarg);
+		args.out.args[0].value = &outarg;
 	}
-	fuse_request_send(fc, req);
-	ret = req->out.h.error;
-	if (!ret)
-		ret = size ? req->out.args[0].size : outarg.size;
-	else {
-		if (ret == -ENOSYS) {
-			fc->no_getxattr = 1;
-			ret = -EOPNOTSUPP;
-		}
+	ret = fuse_simple_request(fc, &args);
+	if (!ret && !size)
+		ret = outarg.size;
+	if (ret == -ENOSYS) {
+		fc->no_getxattr = 1;
+		ret = -EOPNOTSUPP;
 	}
-	fuse_put_request(fc, req);
 	return ret;
 }
 
@@ -1928,7 +1834,7 @@ static ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size)
 {
 	struct inode *inode = entry->d_inode;
 	struct fuse_conn *fc = get_fuse_conn(inode);
-	struct fuse_req *req;
+	FUSE_ARGS(args);
 	struct fuse_getxattr_in inarg;
 	struct fuse_getxattr_out outarg;
 	ssize_t ret;
@@ -1939,38 +1845,30 @@ static ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size)
 	if (fc->no_listxattr)
 		return -EOPNOTSUPP;
 
-	req = fuse_get_req_nopages(fc);
-	if (IS_ERR(req))
-		return PTR_ERR(req);
-
 	memset(&inarg, 0, sizeof(inarg));
 	inarg.size = size;
-	req->in.h.opcode = FUSE_LISTXATTR;
-	req->in.h.nodeid = get_node_id(inode);
-	req->in.numargs = 1;
-	req->in.args[0].size = sizeof(inarg);
-	req->in.args[0].value = &inarg;
+	args.in.h.opcode = FUSE_LISTXATTR;
+	args.in.h.nodeid = get_node_id(inode);
+	args.in.numargs = 1;
+	args.in.args[0].size = sizeof(inarg);
+	args.in.args[0].value = &inarg;
 	/* This is really two different operations rolled into one */
-	req->out.numargs = 1;
+	args.out.numargs = 1;
 	if (size) {
-		req->out.argvar = 1;
-		req->out.args[0].size = size;
-		req->out.args[0].value = list;
+		args.out.argvar = 1;
+		args.out.args[0].size = size;
+		args.out.args[0].value = list;
 	} else {
-		req->out.args[0].size = sizeof(outarg);
-		req->out.args[0].value = &outarg;
+		args.out.args[0].size = sizeof(outarg);
+		args.out.args[0].value = &outarg;
 	}
-	fuse_request_send(fc, req);
-	ret = req->out.h.error;
-	if (!ret)
-		ret = size ? req->out.args[0].size : outarg.size;
-	else {
-		if (ret == -ENOSYS) {
-			fc->no_listxattr = 1;
-			ret = -EOPNOTSUPP;
-		}
+	ret = fuse_simple_request(fc, &args);
+	if (!ret && !size)
+		ret = outarg.size;
+	if (ret == -ENOSYS) {
+		fc->no_listxattr = 1;
+		ret = -EOPNOTSUPP;
 	}
-	fuse_put_request(fc, req);
 	return ret;
 }
 
@@ -1978,24 +1876,18 @@ static int fuse_removexattr(struct dentry *entry, const char *name)
 {
 	struct inode *inode = entry->d_inode;
 	struct fuse_conn *fc = get_fuse_conn(inode);
-	struct fuse_req *req;
+	FUSE_ARGS(args);
 	int err;
 
 	if (fc->no_removexattr)
 		return -EOPNOTSUPP;
 
-	req = fuse_get_req_nopages(fc);
-	if (IS_ERR(req))
-		return PTR_ERR(req);
-
-	req->in.h.opcode = FUSE_REMOVEXATTR;
-	req->in.h.nodeid = get_node_id(inode);
-	req->in.numargs = 1;
-	req->in.args[0].size = strlen(name) + 1;
-	req->in.args[0].value = name;
-	fuse_request_send(fc, req);
-	err = req->out.h.error;
-	fuse_put_request(fc, req);
+	args.in.h.opcode = FUSE_REMOVEXATTR;
+	args.in.h.nodeid = get_node_id(inode);
+	args.in.numargs = 1;
+	args.in.args[0].size = strlen(name) + 1;
+	args.in.args[0].value = name;
+	err = fuse_simple_request(fc, &args);
 	if (err == -ENOSYS) {
 		fc->no_removexattr = 1;
 		err = -EOPNOTSUPP;
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index bf50259012a..760b2c55219 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -24,30 +24,22 @@ static int fuse_send_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
 			  int opcode, struct fuse_open_out *outargp)
 {
 	struct fuse_open_in inarg;
-	struct fuse_req *req;
-	int err;
-
-	req = fuse_get_req_nopages(fc);
-	if (IS_ERR(req))
-		return PTR_ERR(req);
+	FUSE_ARGS(args);
 
 	memset(&inarg, 0, sizeof(inarg));
 	inarg.flags = file->f_flags & ~(O_CREAT | O_EXCL | O_NOCTTY);
 	if (!fc->atomic_o_trunc)
 		inarg.flags &= ~O_TRUNC;
-	req->in.h.opcode = opcode;
-	req->in.h.nodeid = nodeid;
-	req->in.numargs = 1;
-	req->in.args[0].size = sizeof(inarg);
-	req->in.args[0].value = &inarg;
-	req->out.numargs = 1;
-	req->out.args[0].size = sizeof(*outargp);
-	req->out.args[0].value = outargp;
-	fuse_request_send(fc, req);
-	err = req->out.h.error;
-	fuse_put_request(fc, req);
+	args.in.h.opcode = opcode;
+	args.in.h.nodeid = nodeid;
+	args.in.numargs = 1;
+	args.in.args[0].size = sizeof(inarg);
+	args.in.args[0].value = &inarg;
+	args.out.numargs = 1;
+	args.out.args[0].size = sizeof(*outargp);
+	args.out.args[0].value = outargp;
 
-	return err;
+	return fuse_simple_request(fc, &args);
 }
 
 struct fuse_file *fuse_file_alloc(struct fuse_conn *fc)
@@ -89,37 +81,9 @@ struct fuse_file *fuse_file_get(struct fuse_file *ff)
 	return ff;
 }
 
-static void fuse_release_async(struct work_struct *work)
-{
-	struct fuse_req *req;
-	struct fuse_conn *fc;
-	struct path path;
-
-	req = container_of(work, struct fuse_req, misc.release.work);
-	path = req->misc.release.path;
-	fc = get_fuse_conn(path.dentry->d_inode);
-
-	fuse_put_request(fc, req);
-	path_put(&path);
-}
-
 static void fuse_release_end(struct fuse_conn *fc, struct fuse_req *req)
 {
-	if (fc->destroy_req) {
-		/*
-		 * If this is a fuseblk mount, then it's possible that
-		 * releasing the path will result in releasing the
-		 * super block and sending the DESTROY request.  If
-		 * the server is single threaded, this would hang.
-		 * For this reason do the path_put() in a separate
-		 * thread.
-		 */
-		atomic_inc(&req->count);
-		INIT_WORK(&req->misc.release.work, fuse_release_async);
-		schedule_work(&req->misc.release.work);
-	} else {
-		path_put(&req->misc.release.path);
-	}
+	iput(req->misc.release.inode);
 }
 
 static void fuse_file_put(struct fuse_file *ff, bool sync)
@@ -133,12 +97,12 @@ static void fuse_file_put(struct fuse_file *ff, bool sync)
 			 * implement 'open'
 			 */
 			req->background = 0;
-			path_put(&req->misc.release.path);
+			iput(req->misc.release.inode);
 			fuse_put_request(ff->fc, req);
 		} else if (sync) {
 			req->background = 0;
 			fuse_request_send(ff->fc, req);
-			path_put(&req->misc.release.path);
+			iput(req->misc.release.inode);
 			fuse_put_request(ff->fc, req);
 		} else {
 			req->end = fuse_release_end;
@@ -297,9 +261,8 @@ void fuse_release_common(struct file *file, int opcode)
 		inarg->lock_owner = fuse_lock_owner_id(ff->fc,
 						       (fl_owner_t) file);
 	}
-	/* Hold vfsmount and dentry until release is finished */
-	path_get(&file->f_path);
-	req->misc.release.path = file->f_path;
+	/* Hold inode until release is finished */
+	req->misc.release.inode = igrab(file_inode(file));
 
 	/*
 	 * Normally this will send the RELEASE request, however if
@@ -480,7 +443,7 @@ int fuse_fsync_common(struct file *file, loff_t start, loff_t end,
 	struct inode *inode = file->f_mapping->host;
 	struct fuse_conn *fc = get_fuse_conn(inode);
 	struct fuse_file *ff = file->private_data;
-	struct fuse_req *req;
+	FUSE_ARGS(args);
 	struct fuse_fsync_in inarg;
 	int err;
 
@@ -506,23 +469,15 @@ int fuse_fsync_common(struct file *file, loff_t start, loff_t end,
 	if ((!isdir && fc->no_fsync) || (isdir && fc->no_fsyncdir))
 		goto out;
 
-	req = fuse_get_req_nopages(fc);
-	if (IS_ERR(req)) {
-		err = PTR_ERR(req);
-		goto out;
-	}
-
 	memset(&inarg, 0, sizeof(inarg));
 	inarg.fh = ff->fh;
 	inarg.fsync_flags = datasync ? 1 : 0;
-	req->in.h.opcode = isdir ? FUSE_FSYNCDIR : FUSE_FSYNC;
-	req->in.h.nodeid = get_node_id(inode);
-	req->in.numargs = 1;
-	req->in.args[0].size = sizeof(inarg);
-	req->in.args[0].value = &inarg;
-	fuse_request_send(fc, req);
-	err = req->out.h.error;
-	fuse_put_request(fc, req);
+	args.in.h.opcode = isdir ? FUSE_FSYNCDIR : FUSE_FSYNC;
+	args.in.h.nodeid = get_node_id(inode);
+	args.in.numargs = 1;
+	args.in.args[0].size = sizeof(inarg);
+	args.in.args[0].value = &inarg;
+	err = fuse_simple_request(fc, &args);
 	if (err == -ENOSYS) {
 		if (isdir)
 			fc->no_fsyncdir = 1;
@@ -2156,49 +2111,44 @@ static int convert_fuse_file_lock(const struct fuse_file_lock *ffl,
 	return 0;
 }
 
-static void fuse_lk_fill(struct fuse_req *req, struct file *file,
+static void fuse_lk_fill(struct fuse_args *args, struct file *file,
 			 const struct file_lock *fl, int opcode, pid_t pid,
-			 int flock)
+			 int flock, struct fuse_lk_in *inarg)
 {
 	struct inode *inode = file_inode(file);
 	struct fuse_conn *fc = get_fuse_conn(inode);
 	struct fuse_file *ff = file->private_data;
-	struct fuse_lk_in *arg = &req->misc.lk_in;
-
-	arg->fh = ff->fh;
-	arg->owner = fuse_lock_owner_id(fc, fl->fl_owner);
-	arg->lk.start = fl->fl_start;
-	arg->lk.end = fl->fl_end;
-	arg->lk.type = fl->fl_type;
-	arg->lk.pid = pid;
+
+	memset(inarg, 0, sizeof(*inarg));
+	inarg->fh = ff->fh;
+	inarg->owner = fuse_lock_owner_id(fc, fl->fl_owner);
+	inarg->lk.start = fl->fl_start;
+	inarg->lk.end = fl->fl_end;
+	inarg->lk.type = fl->fl_type;
+	inarg->lk.pid = pid;
 	if (flock)
-		arg->lk_flags |= FUSE_LK_FLOCK;
-	req->in.h.opcode = opcode;
-	req->in.h.nodeid = get_node_id(inode);
-	req->in.numargs = 1;
-	req->in.args[0].size = sizeof(*arg);
-	req->in.args[0].value = arg;
+		inarg->lk_flags |= FUSE_LK_FLOCK;
+	args->in.h.opcode = opcode;
+	args->in.h.nodeid = get_node_id(inode);
+	args->in.numargs = 1;
+	args->in.args[0].size = sizeof(*inarg);
+	args->in.args[0].value = inarg;
 }
 
 static int fuse_getlk(struct file *file, struct file_lock *fl)
 {
 	struct inode *inode = file_inode(file);
 	struct fuse_conn *fc = get_fuse_conn(inode);
-	struct fuse_req *req;
+	FUSE_ARGS(args);
+	struct fuse_lk_in inarg;
 	struct fuse_lk_out outarg;
 	int err;
 
-	req = fuse_get_req_nopages(fc);
-	if (IS_ERR(req))
-		return PTR_ERR(req);
-
-	fuse_lk_fill(req, file, fl, FUSE_GETLK, 0, 0);
-	req->out.numargs = 1;
-	req->out.args[0].size = sizeof(outarg);
-	req->out.args[0].value = &outarg;
-	fuse_request_send(fc, req);
-	err = req->out.h.error;
-	fuse_put_request(fc, req);
+	fuse_lk_fill(&args, file, fl, FUSE_GETLK, 0, 0, &inarg);
+	args.out.numargs = 1;
+	args.out.args[0].size = sizeof(outarg);
+	args.out.args[0].value = &outarg;
+	err = fuse_simple_request(fc, &args);
 	if (!err)
 		err = convert_fuse_file_lock(&outarg.lk, fl);
 
@@ -2209,7 +2159,8 @@ static int fuse_setlk(struct file *file, struct file_lock *fl, int flock)
 {
 	struct inode *inode = file_inode(file);
 	struct fuse_conn *fc = get_fuse_conn(inode);
-	struct fuse_req *req;
+	FUSE_ARGS(args);
+	struct fuse_lk_in inarg;
 	int opcode = (fl->fl_flags & FL_SLEEP) ? FUSE_SETLKW : FUSE_SETLK;
 	pid_t pid = fl->fl_type != F_UNLCK ? current->tgid : 0;
 	int err;
@@ -2223,17 +2174,13 @@ static int fuse_setlk(struct file *file, struct file_lock *fl, int flock)
 	if (fl->fl_flags & FL_CLOSE)
 		return 0;
 
-	req = fuse_get_req_nopages(fc);
-	if (IS_ERR(req))
-		return PTR_ERR(req);
+	fuse_lk_fill(&args, file, fl, opcode, pid, flock, &inarg);
+	err = fuse_simple_request(fc, &args);
 
-	fuse_lk_fill(req, file, fl, opcode, pid, flock);
-	fuse_request_send(fc, req);
-	err = req->out.h.error;
 	/* locking is restartable */
 	if (err == -EINTR)
 		err = -ERESTARTSYS;
-	fuse_put_request(fc, req);
+
 	return err;
 }
 
@@ -2283,7 +2230,7 @@ static sector_t fuse_bmap(struct address_space *mapping, sector_t block)
 {
 	struct inode *inode = mapping->host;
 	struct fuse_conn *fc = get_fuse_conn(inode);
-	struct fuse_req *req;
+	FUSE_ARGS(args);
 	struct fuse_bmap_in inarg;
 	struct fuse_bmap_out outarg;
 	int err;
@@ -2291,24 +2238,18 @@ static sector_t fuse_bmap(struct address_space *mapping, sector_t block)
 	if (!inode->i_sb->s_bdev || fc->no_bmap)
 		return 0;
 
-	req = fuse_get_req_nopages(fc);
-	if (IS_ERR(req))
-		return 0;
-
 	memset(&inarg, 0, sizeof(inarg));
 	inarg.block = block;
 	inarg.blocksize = inode->i_sb->s_blocksize;
-	req->in.h.opcode = FUSE_BMAP;
-	req->in.h.nodeid = get_node_id(inode);
-	req->in.numargs = 1;
-	req->in.args[0].size = sizeof(inarg);
-	req->in.args[0].value = &inarg;
-	req->out.numargs = 1;
-	req->out.args[0].size = sizeof(outarg);
-	req->out.args[0].value = &outarg;
-	fuse_request_send(fc, req);
-	err = req->out.h.error;
-	fuse_put_request(fc, req);
+	args.in.h.opcode = FUSE_BMAP;
+	args.in.h.nodeid = get_node_id(inode);
+	args.in.numargs = 1;
+	args.in.args[0].size = sizeof(inarg);
+	args.in.args[0].value = &inarg;
+	args.out.numargs = 1;
+	args.out.args[0].size = sizeof(outarg);
+	args.out.args[0].value = &outarg;
+	err = fuse_simple_request(fc, &args);
 	if (err == -ENOSYS)
 		fc->no_bmap = 1;
 
@@ -2776,7 +2717,7 @@ unsigned fuse_file_poll(struct file *file, poll_table *wait)
 	struct fuse_conn *fc = ff->fc;
 	struct fuse_poll_in inarg = { .fh = ff->fh, .kh = ff->kh };
 	struct fuse_poll_out outarg;
-	struct fuse_req *req;
+	FUSE_ARGS(args);
 	int err;
 
 	if (fc->no_poll)
@@ -2794,21 +2735,15 @@ unsigned fuse_file_poll(struct file *file, poll_table *wait)
 		fuse_register_polled_file(fc, ff);
 	}
 
-	req = fuse_get_req_nopages(fc);
-	if (IS_ERR(req))
-		return POLLERR;
-
-	req->in.h.opcode = FUSE_POLL;
-	req->in.h.nodeid = ff->nodeid;
-	req->in.numargs = 1;
-	req->in.args[0].size = sizeof(inarg);
-	req->in.args[0].value = &inarg;
-	req->out.numargs = 1;
-	req->out.args[0].size = sizeof(outarg);
-	req->out.args[0].value = &outarg;
-	fuse_request_send(fc, req);
-	err = req->out.h.error;
-	fuse_put_request(fc, req);
+	args.in.h.opcode = FUSE_POLL;
+	args.in.h.nodeid = ff->nodeid;
+	args.in.numargs = 1;
+	args.in.args[0].size = sizeof(inarg);
+	args.in.args[0].value = &inarg;
+	args.out.numargs = 1;
+	args.out.args[0].size = sizeof(outarg);
+	args.out.args[0].value = &outarg;
+	err = fuse_simple_request(fc, &args);
 
 	if (!err)
 		return outarg.revents;
@@ -2949,10 +2884,10 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
 				loff_t length)
 {
 	struct fuse_file *ff = file->private_data;
-	struct inode *inode = file->f_inode;
+	struct inode *inode = file_inode(file);
 	struct fuse_inode *fi = get_fuse_inode(inode);
 	struct fuse_conn *fc = ff->fc;
-	struct fuse_req *req;
+	FUSE_ARGS(args);
 	struct fuse_fallocate_in inarg = {
 		.fh = ff->fh,
 		.offset = offset,
@@ -2985,25 +2920,16 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
 	if (!(mode & FALLOC_FL_KEEP_SIZE))
 		set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
 
-	req = fuse_get_req_nopages(fc);
-	if (IS_ERR(req)) {
-		err = PTR_ERR(req);
-		goto out;
-	}
-
-	req->in.h.opcode = FUSE_FALLOCATE;
-	req->in.h.nodeid = ff->nodeid;
-	req->in.numargs = 1;
-	req->in.args[0].size = sizeof(inarg);
-	req->in.args[0].value = &inarg;
-	fuse_request_send(fc, req);
-	err = req->out.h.error;
+	args.in.h.opcode = FUSE_FALLOCATE;
+	args.in.h.nodeid = ff->nodeid;
+	args.in.numargs = 1;
+	args.in.args[0].size = sizeof(inarg);
+	args.in.args[0].value = &inarg;
+	err = fuse_simple_request(fc, &args);
 	if (err == -ENOSYS) {
 		fc->no_fallocate = 1;
 		err = -EOPNOTSUPP;
 	}
-	fuse_put_request(fc, req);
-
 	if (err)
 		goto out;
 
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index e8e47a6ab51..e0fc6725d1d 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -213,7 +213,7 @@ struct fuse_out {
 	unsigned numargs;
 
 	/** Array of arguments */
-	struct fuse_arg args[3];
+	struct fuse_arg args[2];
 };
 
 /** FUSE page descriptor */
@@ -222,6 +222,25 @@ struct fuse_page_desc {
 	unsigned int offset;
 };
 
+struct fuse_args {
+	struct {
+		struct {
+			uint32_t opcode;
+			uint64_t nodeid;
+		} h;
+		unsigned numargs;
+		struct fuse_in_arg args[3];
+
+	} in;
+	struct {
+		unsigned argvar:1;
+		unsigned numargs;
+		struct fuse_arg args[2];
+	} out;
+};
+
+#define FUSE_ARGS(args) struct fuse_args args = {}
+
 /** The request state */
 enum fuse_req_state {
 	FUSE_REQ_INIT = 0,
@@ -305,11 +324,8 @@ struct fuse_req {
 	/** Data for asynchronous requests */
 	union {
 		struct {
-			union {
-				struct fuse_release_in in;
-				struct work_struct work;
-			};
-			struct path path;
+			struct fuse_release_in in;
+			struct inode *inode;
 		} release;
 		struct fuse_init_in init_in;
 		struct fuse_init_out init_out;
@@ -324,7 +340,6 @@ struct fuse_req {
 			struct fuse_req *next;
 		} write;
 		struct fuse_notify_retrieve_in retrieve_in;
-		struct fuse_lk_in lk_in;
 	} misc;
 
 	/** page vector */
@@ -754,15 +769,6 @@ struct fuse_req *fuse_get_req_for_background(struct fuse_conn *fc,
 void __fuse_get_request(struct fuse_req *req);
 
 /**
- * Get a request, may fail with -ENOMEM,
- * useful for callers who doesn't use req->pages[]
- */
-static inline struct fuse_req *fuse_get_req_nopages(struct fuse_conn *fc)
-{
-	return fuse_get_req(fc, 0);
-}
-
-/**
  * Gets a requests for a file operation, always succeeds
  */
 struct fuse_req *fuse_get_req_nofail_nopages(struct fuse_conn *fc,
@@ -780,6 +786,11 @@ void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req);
 void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req);
 
 /**
+ * Simple request sending that does request allocation and freeing
+ */
+ssize_t fuse_simple_request(struct fuse_conn *fc, struct fuse_args *args);
+
+/**
  * Send a request in the background
  */
 void fuse_request_send_background(struct fuse_conn *fc, struct fuse_req *req);
@@ -804,8 +815,6 @@ void fuse_invalidate_atime(struct inode *inode);
  */
 struct fuse_conn *fuse_conn_get(struct fuse_conn *fc);
 
-void fuse_conn_kill(struct fuse_conn *fc);
-
 /**
  * Initialize fuse_conn
  */
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 03246cd9d47..6749109f255 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -376,28 +376,13 @@ static void fuse_bdi_destroy(struct fuse_conn *fc)
 		bdi_destroy(&fc->bdi);
 }
 
-void fuse_conn_kill(struct fuse_conn *fc)
-{
-	spin_lock(&fc->lock);
-	fc->connected = 0;
-	fc->blocked = 0;
-	fc->initialized = 1;
-	spin_unlock(&fc->lock);
-	/* Flush all readers on this fs */
-	kill_fasync(&fc->fasync, SIGIO, POLL_IN);
-	wake_up_all(&fc->waitq);
-	wake_up_all(&fc->blocked_waitq);
-	wake_up_all(&fc->reserved_req_waitq);
-}
-EXPORT_SYMBOL_GPL(fuse_conn_kill);
-
 static void fuse_put_super(struct super_block *sb)
 {
 	struct fuse_conn *fc = get_fuse_conn_super(sb);
 
 	fuse_send_destroy(fc);
 
-	fuse_conn_kill(fc);
+	fuse_abort_conn(fc);
 	mutex_lock(&fuse_mutex);
 	list_del(&fc->entry);
 	fuse_ctl_remove_conn(fc);
@@ -425,7 +410,7 @@ static int fuse_statfs(struct dentry *dentry, struct kstatfs *buf)
 {
 	struct super_block *sb = dentry->d_sb;
 	struct fuse_conn *fc = get_fuse_conn_super(sb);
-	struct fuse_req *req;
+	FUSE_ARGS(args);
 	struct fuse_statfs_out outarg;
 	int err;
 
@@ -434,23 +419,17 @@ static int fuse_statfs(struct dentry *dentry, struct kstatfs *buf)
 		return 0;
 	}
 
-	req = fuse_get_req_nopages(fc);
-	if (IS_ERR(req))
-		return PTR_ERR(req);
-
 	memset(&outarg, 0, sizeof(outarg));
-	req->in.numargs = 0;
-	req->in.h.opcode = FUSE_STATFS;
-	req->in.h.nodeid = get_node_id(dentry->d_inode);
-	req->out.numargs = 1;
-	req->out.args[0].size =
+	args.in.numargs = 0;
+	args.in.h.opcode = FUSE_STATFS;
+	args.in.h.nodeid = get_node_id(dentry->d_inode);
+	args.out.numargs = 1;
+	args.out.args[0].size =
 		fc->minor < 4 ? FUSE_COMPAT_STATFS_SIZE : sizeof(outarg);
-	req->out.args[0].value = &outarg;
-	fuse_request_send(fc, req);
-	err = req->out.h.error;
+	args.out.args[0].value = &outarg;
+	err = fuse_simple_request(fc, &args);
 	if (!err)
 		convert_fuse_statfs(buf, &outarg.st);
-	fuse_put_request(fc, req);
 	return err;
 }
 
diff --git a/fs/jffs2/readinode.c b/fs/jffs2/readinode.c
index 386303dca38..dddbde4f56f 100644
--- a/fs/jffs2/readinode.c
+++ b/fs/jffs2/readinode.c
@@ -224,7 +224,7 @@ static int jffs2_add_tn_to_tree(struct jffs2_sb_info *c,
 
 	dbg_readinode("insert fragment %#04x-%#04x, ver %u at %08x\n", tn->fn->ofs, fn_end, tn->version, ref_offset(tn->fn->raw));
 
-	/* If a node has zero dsize, we only have to keep if it if it might be the
+	/* If a node has zero dsize, we only have to keep it if it might be the
 	   node with highest version -- i.e. the one which will end up as f->metadata.
 	   Note that such nodes won't be REF_UNCHECKED since there are no data to
 	   check anyway. */
diff --git a/fs/jffs2/summary.c b/fs/jffs2/summary.c
index c522d098bb4..bc5385471a6 100644
--- a/fs/jffs2/summary.c
+++ b/fs/jffs2/summary.c
@@ -844,6 +844,7 @@ static int jffs2_sum_write_data(struct jffs2_sb_info *c, struct jffs2_eraseblock
 /* Write out summary information - called from jffs2_do_reserve_space */
 
 int jffs2_sum_write_sumnode(struct jffs2_sb_info *c)
+	__must_hold(&c->erase_completion_block)
 {
 	int datasize, infosize, padsize;
 	struct jffs2_eraseblock *jeb;
diff --git a/fs/namespace.c b/fs/namespace.c
index 30df6e7dd80..cd1e9681a0c 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -963,7 +963,8 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
 	}
 
 	/* Don't allow unprivileged users to reveal what is under a mount */
-	if ((flag & CL_UNPRIVILEGED) && list_empty(&old->mnt_expire))
+	if ((flag & CL_UNPRIVILEGED) &&
+	    (!(flag & CL_EXPIRE) || list_empty(&old->mnt_expire)))
 		mnt->mnt.mnt_flags |= MNT_LOCKED;
 
 	atomic_inc(&sb->s_active);
@@ -1369,6 +1370,8 @@ void umount_tree(struct mount *mnt, int how)
 	}
 	if (last) {
 		last->mnt_hash.next = unmounted.first;
+		if (unmounted.first)
+			unmounted.first->pprev = &last->mnt_hash.next;
 		unmounted.first = tmp_list.first;
 		unmounted.first->pprev = &unmounted.first;
 	}
@@ -1544,6 +1547,9 @@ SYSCALL_DEFINE2(umount, char __user *, name, int, flags)
 		goto dput_and_out;
 	if (mnt->mnt.mnt_flags & MNT_LOCKED)
 		goto dput_and_out;
+	retval = -EPERM;
+	if (flags & MNT_FORCE && !capable(CAP_SYS_ADMIN))
+		goto dput_and_out;
 
 	retval = do_umount(mnt, flags);
 dput_and_out:
@@ -1606,7 +1612,6 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
 	if (IS_ERR(q))
 		return q;
 
-	q->mnt.mnt_flags &= ~MNT_LOCKED;
 	q->mnt_mountpoint = mnt->mnt_mountpoint;
 
 	p = mnt;
@@ -2097,7 +2102,13 @@ static int do_remount(struct path *path, int flags, int mnt_flags,
 	}
 	if ((mnt->mnt.mnt_flags & MNT_LOCK_NODEV) &&
 	    !(mnt_flags & MNT_NODEV)) {
-		return -EPERM;
+		/* Was the nodev implicitly added in mount? */
+		if ((mnt->mnt_ns->user_ns != &init_user_ns) &&
+		    !(sb->s_type->fs_flags & FS_USERNS_DEV_MOUNT)) {
+			mnt_flags |= MNT_NODEV;
+		} else {
+			return -EPERM;
+		}
 	}
 	if ((mnt->mnt.mnt_flags & MNT_LOCK_NOSUID) &&
 	    !(mnt_flags & MNT_NOSUID)) {
@@ -2958,6 +2969,8 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
 	/* mount new_root on / */
 	attach_mnt(new_mnt, real_mount(root_parent.mnt), root_mp);
 	touch_mnt_namespace(current->nsproxy->mnt_ns);
+	/* A moved mount should not expire automatically */
+	list_del_init(&new_mnt->mnt_expire);
 	unlock_mount_hash();
 	chroot_fs_refs(&root, &new);
 	put_mountpoint(root_mp);
@@ -3002,6 +3015,7 @@ static void __init init_mount_tree(void)
 
 	root.mnt = mnt;
 	root.dentry = mnt->mnt_root;
+	mnt->mnt_flags |= MNT_LOCKED;
 
 	set_fs_pwd(current->fs, &root);
 	set_fs_root(current->fs, &root);
diff --git a/fs/pnode.c b/fs/pnode.c
index aae331a5d03..260ac8f898a 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -242,6 +242,7 @@ static int propagate_one(struct mount *m)
 	child = copy_tree(last_source, last_source->mnt.mnt_root, type);
 	if (IS_ERR(child))
 		return PTR_ERR(child);
+	child->mnt.mnt_flags &= ~MNT_LOCKED;
 	mnt_set_mountpoint(m, mp, child);
 	last_dest = m;
 	last_source = child;
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 590aeda5af1..3f3d7aeb071 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2464,6 +2464,57 @@ static const struct file_operations proc_projid_map_operations = {
 	.llseek		= seq_lseek,
 	.release	= proc_id_map_release,
 };
+
+static int proc_setgroups_open(struct inode *inode, struct file *file)
+{
+	struct user_namespace *ns = NULL;
+	struct task_struct *task;
+	int ret;
+
+	ret = -ESRCH;
+	task = get_proc_task(inode);
+	if (task) {
+		rcu_read_lock();
+		ns = get_user_ns(task_cred_xxx(task, user_ns));
+		rcu_read_unlock();
+		put_task_struct(task);
+	}
+	if (!ns)
+		goto err;
+
+	if (file->f_mode & FMODE_WRITE) {
+		ret = -EACCES;
+		if (!ns_capable(ns, CAP_SYS_ADMIN))
+			goto err_put_ns;
+	}
+
+	ret = single_open(file, &proc_setgroups_show, ns);
+	if (ret)
+		goto err_put_ns;
+
+	return 0;
+err_put_ns:
+	put_user_ns(ns);
+err:
+	return ret;
+}
+
+static int proc_setgroups_release(struct inode *inode, struct file *file)
+{
+	struct seq_file *seq = file->private_data;
+	struct user_namespace *ns = seq->private;
+	int ret = single_release(inode, file);
+	put_user_ns(ns);
+	return ret;
+}
+
+static const struct file_operations proc_setgroups_operations = {
+	.open		= proc_setgroups_open,
+	.write		= proc_setgroups_write,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= proc_setgroups_release,
+};
 #endif /* CONFIG_USER_NS */
 
 static int proc_pid_personality(struct seq_file *m, struct pid_namespace *ns,
@@ -2572,6 +2623,7 @@ static const struct pid_entry tgid_base_stuff[] = {
 	REG("uid_map",    S_IRUGO|S_IWUSR, proc_uid_map_operations),
 	REG("gid_map",    S_IRUGO|S_IWUSR, proc_gid_map_operations),
 	REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations),
+	REG("setgroups",  S_IRUGO|S_IWUSR, proc_setgroups_operations),
 #endif
 #ifdef CONFIG_CHECKPOINT_RESTORE
 	REG("timers",	  S_IRUGO, proc_timers_operations),
@@ -2916,6 +2968,7 @@ static const struct pid_entry tid_base_stuff[] = {
 	REG("uid_map",    S_IRUGO|S_IWUSR, proc_uid_map_operations),
 	REG("gid_map",    S_IRUGO|S_IWUSR, proc_gid_map_operations),
 	REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations),
+	REG("setgroups",  S_IRUGO|S_IWUSR, proc_setgroups_operations),
 #endif
 };
 
diff --git a/include/dt-bindings/thermal/tegra124-soctherm.h b/include/dt-bindings/thermal/tegra124-soctherm.h
new file mode 100644
index 00000000000..85aaf66690f
--- /dev/null
+++ b/include/dt-bindings/thermal/tegra124-soctherm.h
@@ -0,0 +1,13 @@
+/*
+ * This header provides constants for binding nvidia,tegra124-soctherm.
+ */
+
+#ifndef _DT_BINDINGS_THERMAL_TEGRA124_SOCTHERM_H
+#define _DT_BINDINGS_THERMAL_TEGRA124_SOCTHERM_H
+
+#define TEGRA124_SOCTHERM_SENSOR_CPU 0
+#define TEGRA124_SOCTHERM_SENSOR_MEM 1
+#define TEGRA124_SOCTHERM_SENSOR_GPU 2
+#define TEGRA124_SOCTHERM_SENSOR_PLLX 3
+
+#endif
diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h
index ad9db6045b2..b3f45a57834 100644
--- a/include/kvm/arm_arch_timer.h
+++ b/include/kvm/arm_arch_timer.h
@@ -60,7 +60,8 @@ struct arch_timer_cpu {
 
 #ifdef CONFIG_KVM_ARM_TIMER
 int kvm_timer_hyp_init(void);
-int kvm_timer_init(struct kvm *kvm);
+void kvm_timer_enable(struct kvm *kvm);
+void kvm_timer_init(struct kvm *kvm);
 void kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu,
 			  const struct kvm_irq_level *irq);
 void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu);
@@ -77,11 +78,8 @@ static inline int kvm_timer_hyp_init(void)
 	return 0;
 };
 
-static inline int kvm_timer_init(struct kvm *kvm)
-{
-	return 0;
-}
-
+static inline void kvm_timer_enable(struct kvm *kvm) {}
+static inline void kvm_timer_init(struct kvm *kvm) {}
 static inline void kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu,
 					const struct kvm_irq_level *irq) {}
 static inline void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) {}
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 206dcc3b3f7..ac4888dc86b 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -274,7 +274,7 @@ struct kvm_exit_mmio;
 #ifdef CONFIG_KVM_ARM_VGIC
 int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write);
 int kvm_vgic_hyp_init(void);
-int kvm_vgic_init(struct kvm *kvm);
+int kvm_vgic_map_resources(struct kvm *kvm);
 int kvm_vgic_create(struct kvm *kvm);
 void kvm_vgic_destroy(struct kvm *kvm);
 void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu);
@@ -287,7 +287,8 @@ bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
 		      struct kvm_exit_mmio *mmio);
 
 #define irqchip_in_kernel(k)	(!!((k)->arch.vgic.in_kernel))
-#define vgic_initialized(k)	((k)->arch.vgic.ready)
+#define vgic_initialized(k)	(!!((k)->arch.vgic.nr_cpus))
+#define vgic_ready(k)		((k)->arch.vgic.ready)
 
 int vgic_v2_probe(struct device_node *vgic_node,
 		  const struct vgic_ops **ops,
@@ -321,7 +322,7 @@ static inline int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr,
 	return -ENXIO;
 }
 
-static inline int kvm_vgic_init(struct kvm *kvm)
+static inline int kvm_vgic_map_resources(struct kvm *kvm)
 {
 	return 0;
 }
@@ -373,6 +374,11 @@ static inline bool vgic_initialized(struct kvm *kvm)
 {
 	return true;
 }
+
+static inline bool vgic_ready(struct kvm *kvm)
+{
+	return true;
+}
 #endif
 
 #endif
diff --git a/include/linux/ceph/auth.h b/include/linux/ceph/auth.h
index 5f338684413..260d78b587c 100644
--- a/include/linux/ceph/auth.h
+++ b/include/linux/ceph/auth.h
@@ -13,6 +13,7 @@
 
 struct ceph_auth_client;
 struct ceph_authorizer;
+struct ceph_msg;
 
 struct ceph_auth_handshake {
 	struct ceph_authorizer *authorizer;
@@ -20,6 +21,10 @@ struct ceph_auth_handshake {
 	size_t authorizer_buf_len;
 	void *authorizer_reply_buf;
 	size_t authorizer_reply_buf_len;
+	int (*sign_message)(struct ceph_auth_handshake *auth,
+			    struct ceph_msg *msg);
+	int (*check_message_signature)(struct ceph_auth_handshake *auth,
+				       struct ceph_msg *msg);
 };
 
 struct ceph_auth_client_ops {
@@ -66,6 +71,11 @@ struct ceph_auth_client_ops {
 	void (*reset)(struct ceph_auth_client *ac);
 
 	void (*destroy)(struct ceph_auth_client *ac);
+
+	int (*sign_message)(struct ceph_auth_handshake *auth,
+			    struct ceph_msg *msg);
+	int (*check_message_signature)(struct ceph_auth_handshake *auth,
+				       struct ceph_msg *msg);
 };
 
 struct ceph_auth_client {
@@ -113,4 +123,20 @@ extern int ceph_auth_verify_authorizer_reply(struct ceph_auth_client *ac,
 extern void ceph_auth_invalidate_authorizer(struct ceph_auth_client *ac,
 					    int peer_type);
 
+static inline int ceph_auth_sign_message(struct ceph_auth_handshake *auth,
+					 struct ceph_msg *msg)
+{
+	if (auth->sign_message)
+		return auth->sign_message(auth, msg);
+	return 0;
+}
+
+static inline
+int ceph_auth_check_message_signature(struct ceph_auth_handshake *auth,
+				      struct ceph_msg *msg)
+{
+	if (auth->check_message_signature)
+		return auth->check_message_signature(auth, msg);
+	return 0;
+}
 #endif
diff --git a/include/linux/ceph/buffer.h b/include/linux/ceph/buffer.h
index 07ad423cc37..07ca15e7610 100644
--- a/include/linux/ceph/buffer.h
+++ b/include/linux/ceph/buffer.h
@@ -10,8 +10,7 @@
 /*
  * a simple reference counted buffer.
  *
- * use kmalloc for small sizes (<= one page), vmalloc for larger
- * sizes.
+ * use kmalloc for smaller sizes, vmalloc for larger sizes.
  */
 struct ceph_buffer {
 	struct kref kref;
diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h
index d12659ce550..71e05bbf8ce 100644
--- a/include/linux/ceph/ceph_features.h
+++ b/include/linux/ceph/ceph_features.h
@@ -84,6 +84,7 @@ static inline u64 ceph_sanitize_features(u64 features)
 	 CEPH_FEATURE_PGPOOL3 |			\
 	 CEPH_FEATURE_OSDENC |			\
 	 CEPH_FEATURE_CRUSH_TUNABLES |		\
+	 CEPH_FEATURE_MSG_AUTH |		\
 	 CEPH_FEATURE_CRUSH_TUNABLES2 |		\
 	 CEPH_FEATURE_REPLY_CREATE_INODE |	\
 	 CEPH_FEATURE_OSDHASHPSPOOL |		\
diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h
index 3c97d5e9b95..c0dadaac26e 100644
--- a/include/linux/ceph/ceph_fs.h
+++ b/include/linux/ceph/ceph_fs.h
@@ -522,8 +522,11 @@ struct ceph_mds_reply_dirfrag {
 	__le32 dist[];
 } __attribute__ ((packed));
 
-#define CEPH_LOCK_FCNTL    1
-#define CEPH_LOCK_FLOCK    2
+#define CEPH_LOCK_FCNTL		1
+#define CEPH_LOCK_FLOCK		2
+#define CEPH_LOCK_FCNTL_INTR    3
+#define CEPH_LOCK_FLOCK_INTR    4
+
 
 #define CEPH_LOCK_SHARED   1
 #define CEPH_LOCK_EXCL     2
@@ -549,6 +552,7 @@ struct ceph_filelock {
 
 int ceph_flags_to_mode(int flags);
 
+#define CEPH_INLINE_NONE	((__u64)-1)
 
 /* capability bits */
 #define CEPH_CAP_PIN         1  /* no specific capabilities beyond the pin */
@@ -613,6 +617,8 @@ int ceph_flags_to_mode(int flags);
 				 CEPH_CAP_LINK_SHARED |	\
 				 CEPH_CAP_FILE_SHARED |	\
 				 CEPH_CAP_XATTR_SHARED)
+#define CEPH_STAT_CAP_INLINE_DATA (CEPH_CAP_FILE_SHARED | \
+				   CEPH_CAP_FILE_RD)
 
 #define CEPH_CAP_ANY_SHARED (CEPH_CAP_AUTH_SHARED |			\
 			      CEPH_CAP_LINK_SHARED |			\
diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h
index 07bc359b88a..8b11a79ca1c 100644
--- a/include/linux/ceph/libceph.h
+++ b/include/linux/ceph/libceph.h
@@ -29,6 +29,7 @@
 #define CEPH_OPT_NOSHARE          (1<<1) /* don't share client with other sbs */
 #define CEPH_OPT_MYIP             (1<<2) /* specified my ip */
 #define CEPH_OPT_NOCRC            (1<<3) /* no data crc on writes */
+#define CEPH_OPT_NOMSGAUTH	  (1<<4) /* not require cephx message signature */
 
 #define CEPH_OPT_DEFAULT   (0)
 
@@ -184,7 +185,6 @@ extern bool libceph_compatible(void *data);
 extern const char *ceph_msg_type_name(int type);
 extern int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid);
 extern void *ceph_kvmalloc(size_t size, gfp_t flags);
-extern void ceph_kvfree(const void *ptr);
 
 extern struct ceph_options *ceph_parse_options(char *options,
 			      const char *dev_name, const char *dev_name_end,
diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h
index 40ae58e3e9d..d9d396c1650 100644
--- a/include/linux/ceph/messenger.h
+++ b/include/linux/ceph/messenger.h
@@ -42,6 +42,10 @@ struct ceph_connection_operations {
 	struct ceph_msg * (*alloc_msg) (struct ceph_connection *con,
 					struct ceph_msg_header *hdr,
 					int *skip);
+	int (*sign_message) (struct ceph_connection *con, struct ceph_msg *msg);
+
+	int (*check_message_signature) (struct ceph_connection *con,
+					struct ceph_msg *msg);
 };
 
 /* use format string %s%d */
@@ -142,7 +146,10 @@ struct ceph_msg_data_cursor {
  */
 struct ceph_msg {
 	struct ceph_msg_header hdr;	/* header */
-	struct ceph_msg_footer footer;	/* footer */
+	union {
+		struct ceph_msg_footer footer;		/* footer */
+		struct ceph_msg_footer_old old_footer;	/* old format footer */
+	};
 	struct kvec front;              /* unaligned blobs of message */
 	struct ceph_buffer *middle;
 
diff --git a/include/linux/ceph/msgr.h b/include/linux/ceph/msgr.h
index 3d94a73b5f3..1c1887206ff 100644
--- a/include/linux/ceph/msgr.h
+++ b/include/linux/ceph/msgr.h
@@ -152,7 +152,8 @@ struct ceph_msg_header {
 			     receiver: mask against ~PAGE_MASK */
 
 	struct ceph_entity_name src;
-	__le32 reserved;
+	__le16 compat_version;
+	__le16 reserved;
 	__le32 crc;       /* header crc32c */
 } __attribute__ ((packed));
 
@@ -164,13 +165,21 @@ struct ceph_msg_header {
 /*
  * follows data payload
  */
+struct ceph_msg_footer_old {
+	__le32 front_crc, middle_crc, data_crc;
+	__u8 flags;
+} __attribute__ ((packed));
+
 struct ceph_msg_footer {
 	__le32 front_crc, middle_crc, data_crc;
+	// sig holds the 64 bits of the digital signature for the message PLR
+	__le64  sig;
 	__u8 flags;
 } __attribute__ ((packed));
 
 #define CEPH_MSG_FOOTER_COMPLETE  (1<<0)   /* msg wasn't aborted */
 #define CEPH_MSG_FOOTER_NOCRC     (1<<1)   /* no data crc */
+#define CEPH_MSG_FOOTER_SIGNED	  (1<<2)   /* msg was signed */
 
 
 #endif
diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index 03aeb27fcc6..5d86416d35f 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -87,6 +87,13 @@ struct ceph_osd_req_op {
 			struct ceph_osd_data osd_data;
 		} extent;
 		struct {
+			__le32 name_len;
+			__le32 value_len;
+			__u8 cmp_op;       /* CEPH_OSD_CMPXATTR_OP_* */
+			__u8 cmp_mode;     /* CEPH_OSD_CMPXATTR_MODE_* */
+			struct ceph_osd_data osd_data;
+		} xattr;
+		struct {
 			const char *class_name;
 			const char *method_name;
 			struct ceph_osd_data request_info;
@@ -295,6 +302,9 @@ extern void osd_req_op_cls_response_data_pages(struct ceph_osd_request *,
 extern void osd_req_op_cls_init(struct ceph_osd_request *osd_req,
 					unsigned int which, u16 opcode,
 					const char *class, const char *method);
+extern int osd_req_op_xattr_init(struct ceph_osd_request *osd_req, unsigned int which,
+				 u16 opcode, const char *name, const void *value,
+				 size_t size, u8 cmp_op, u8 cmp_mode);
 extern void osd_req_op_watch_init(struct ceph_osd_request *osd_req,
 					unsigned int which, u16 opcode,
 					u64 cookie, u64 version, int flag);
@@ -318,7 +328,8 @@ extern struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *,
 				      struct ceph_file_layout *layout,
 				      struct ceph_vino vino,
 				      u64 offset, u64 *len,
-				      int num_ops, int opcode, int flags,
+				      unsigned int which, int num_ops,
+				      int opcode, int flags,
 				      struct ceph_snap_context *snapc,
 				      u32 truncate_seq, u64 truncate_size,
 				      bool use_mempool);
diff --git a/include/linux/ceph/pagelist.h b/include/linux/ceph/pagelist.h
index 5f871d84ddc..13d71fe18b0 100644
--- a/include/linux/ceph/pagelist.h
+++ b/include/linux/ceph/pagelist.h
@@ -1,8 +1,10 @@
 #ifndef __FS_CEPH_PAGELIST_H
 #define __FS_CEPH_PAGELIST_H
 
-#include <linux/list.h>
+#include <asm/byteorder.h>
 #include <linux/atomic.h>
+#include <linux/list.h>
+#include <linux/types.h>
 
 struct ceph_pagelist {
 	struct list_head head;
diff --git a/include/linux/clock_cooling.h b/include/linux/clock_cooling.h
new file mode 100644
index 00000000000..4d1019d56f7
--- /dev/null
+++ b/include/linux/clock_cooling.h
@@ -0,0 +1,65 @@
+/*
+ *  linux/include/linux/clock_cooling.h
+ *
+ *  Copyright (C) 2014 Eduardo Valentin <edubezval@gmail.com>
+ *
+ *  Copyright (C) 2013	Texas Instruments Inc.
+ *  Contact:  Eduardo Valentin <eduardo.valentin@ti.com>
+ *
+ *  Highly based on cpu_cooling.c.
+ *  Copyright (C) 2012	Samsung Electronics Co., Ltd(http://www.samsung.com)
+ *  Copyright (C) 2012  Amit Daniel <amit.kachhap@linaro.org>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; version 2 of the License.
+ *
+ *  This program is distributed in the hope that it will be useful, but
+ *  WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  General Public License for more details.
+ */
+
+#ifndef __CPU_COOLING_H__
+#define __CPU_COOLING_H__
+
+#include <linux/of.h>
+#include <linux/thermal.h>
+#include <linux/cpumask.h>
+
+#ifdef CONFIG_CLOCK_THERMAL
+/**
+ * clock_cooling_register - function to create clock cooling device.
+ * @dev: struct device pointer to the device used as clock cooling device.
+ * @clock_name: string containing the clock used as cooling mechanism.
+ */
+struct thermal_cooling_device *
+clock_cooling_register(struct device *dev, const char *clock_name);
+
+/**
+ * clock_cooling_unregister - function to remove clock cooling device.
+ * @cdev: thermal cooling device pointer.
+ */
+void clock_cooling_unregister(struct thermal_cooling_device *cdev);
+
+unsigned long clock_cooling_get_level(struct thermal_cooling_device *cdev,
+				      unsigned long freq);
+#else /* !CONFIG_CLOCK_THERMAL */
+static inline struct thermal_cooling_device *
+clock_cooling_register(struct device *dev, const char *clock_name)
+{
+	return NULL;
+}
+static inline
+void clock_cooling_unregister(struct thermal_cooling_device *cdev)
+{
+}
+static inline
+unsigned long clock_cooling_get_level(struct thermal_cooling_device *cdev,
+				      unsigned long freq)
+{
+	return THERMAL_CSTATE_INVALID;
+}
+#endif	/* CONFIG_CLOCK_THERMAL */
+
+#endif /* __CPU_COOLING_H__ */
diff --git a/include/linux/cred.h b/include/linux/cred.h
index b2d0820837c..2fb2ca2127e 100644
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -68,6 +68,7 @@ extern void groups_free(struct group_info *);
 extern int set_current_groups(struct group_info *);
 extern void set_groups(struct cred *, struct group_info *);
 extern int groups_search(const struct group_info *, kgid_t);
+extern bool may_setgroups(void);
 
 /* access the groups "array" with this macro */
 #define GROUP_AT(gi, i) \
diff --git a/include/linux/fsl_ifc.h b/include/linux/fsl_ifc.h
index 84d60cb841b..bf0321eabbd 100644
--- a/include/linux/fsl_ifc.h
+++ b/include/linux/fsl_ifc.h
@@ -29,7 +29,16 @@
 #include <linux/of_platform.h>
 #include <linux/interrupt.h>
 
-#define FSL_IFC_BANK_COUNT 4
+/*
+ * The actual number of banks implemented depends on the IFC version
+ *    - IFC version 1.0 implements 4 banks.
+ *    - IFC version 1.1 onward implements 8 banks.
+ */
+#define FSL_IFC_BANK_COUNT 8
+
+#define FSL_IFC_VERSION_MASK	0x0F0F0000
+#define FSL_IFC_VERSION_1_0_0	0x01000000
+#define FSL_IFC_VERSION_1_1_0	0x01010000
 
 /*
  * CSPR - Chip Select Property Register
@@ -776,23 +785,23 @@ struct fsl_ifc_regs {
 		__be32 cspr;
 		u32 res2;
 	} cspr_cs[FSL_IFC_BANK_COUNT];
-	u32 res3[0x19];
+	u32 res3[0xd];
 	struct {
 		__be32 amask;
 		u32 res4[0x2];
 	} amask_cs[FSL_IFC_BANK_COUNT];
-	u32 res5[0x18];
+	u32 res5[0xc];
 	struct {
 		__be32 csor;
 		__be32 csor_ext;
 		u32 res6;
 	} csor_cs[FSL_IFC_BANK_COUNT];
-	u32 res7[0x18];
+	u32 res7[0xc];
 	struct {
 		__be32 ftim[4];
 		u32 res8[0x8];
 	} ftim_cs[FSL_IFC_BANK_COUNT];
-	u32 res9[0x60];
+	u32 res9[0x30];
 	__be32 rb_stat;
 	u32 res10[0x2];
 	__be32 ifc_gcr;
@@ -827,6 +836,8 @@ struct fsl_ifc_ctrl {
 	int				nand_irq;
 	spinlock_t			lock;
 	void				*nand;
+	int				version;
+	int				banks;
 
 	u32 nand_stat;
 	wait_queue_head_t nand_wait;
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index a6059bdf7b0..26f106022c8 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -43,6 +43,7 @@
  * include/linux/kvm_h.
  */
 #define KVM_MEMSLOT_INVALID	(1UL << 16)
+#define KVM_MEMSLOT_INCOHERENT	(1UL << 17)
 
 /* Two fragments for cross MMIO pages. */
 #define KVM_MAX_MMIO_FRAGMENTS	2
@@ -353,6 +354,8 @@ struct kvm_memslots {
 	struct kvm_memory_slot memslots[KVM_MEM_SLOTS_NUM];
 	/* The mapping table from slot id to the index in memslots[]. */
 	short id_to_index[KVM_MEM_SLOTS_NUM];
+	atomic_t lru_slot;
+	int used_slots;
 };
 
 struct kvm {
@@ -395,7 +398,6 @@ struct kvm {
 	 * Update side is protected by irq_lock.
 	 */
 	struct kvm_irq_routing_table __rcu *irq_routing;
-	struct hlist_head mask_notifier_list;
 #endif
 #ifdef CONFIG_HAVE_KVM_IRQFD
 	struct hlist_head irq_ack_notifier_list;
@@ -447,6 +449,14 @@ void kvm_vcpu_uninit(struct kvm_vcpu *vcpu);
 int __must_check vcpu_load(struct kvm_vcpu *vcpu);
 void vcpu_put(struct kvm_vcpu *vcpu);
 
+#ifdef __KVM_HAVE_IOAPIC
+void kvm_vcpu_request_scan_ioapic(struct kvm *kvm);
+#else
+static inline void kvm_vcpu_request_scan_ioapic(struct kvm *kvm)
+{
+}
+#endif
+
 #ifdef CONFIG_HAVE_KVM_IRQFD
 int kvm_irqfd_init(void);
 void kvm_irqfd_exit(void);
@@ -711,44 +721,6 @@ struct kvm_irq_ack_notifier {
 	void (*irq_acked)(struct kvm_irq_ack_notifier *kian);
 };
 
-struct kvm_assigned_dev_kernel {
-	struct kvm_irq_ack_notifier ack_notifier;
-	struct list_head list;
-	int assigned_dev_id;
-	int host_segnr;
-	int host_busnr;
-	int host_devfn;
-	unsigned int entries_nr;
-	int host_irq;
-	bool host_irq_disabled;
-	bool pci_2_3;
-	struct msix_entry *host_msix_entries;
-	int guest_irq;
-	struct msix_entry *guest_msix_entries;
-	unsigned long irq_requested_type;
-	int irq_source_id;
-	int flags;
-	struct pci_dev *dev;
-	struct kvm *kvm;
-	spinlock_t intx_lock;
-	spinlock_t intx_mask_lock;
-	char irq_name[32];
-	struct pci_saved_state *pci_saved_state;
-};
-
-struct kvm_irq_mask_notifier {
-	void (*func)(struct kvm_irq_mask_notifier *kimn, bool masked);
-	int irq;
-	struct hlist_node link;
-};
-
-void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq,
-				    struct kvm_irq_mask_notifier *kimn);
-void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
-				      struct kvm_irq_mask_notifier *kimn);
-void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin,
-			     bool mask);
-
 int kvm_irq_map_gsi(struct kvm *kvm,
 		    struct kvm_kernel_irq_routing_entry *entries, int gsi);
 int kvm_irq_map_chip_pin(struct kvm *kvm, unsigned irqchip, unsigned pin);
@@ -770,12 +742,6 @@ void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id);
 #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
 int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot);
 void kvm_iommu_unmap_pages(struct kvm *kvm, struct kvm_memory_slot *slot);
-int kvm_iommu_map_guest(struct kvm *kvm);
-int kvm_iommu_unmap_guest(struct kvm *kvm);
-int kvm_assign_device(struct kvm *kvm,
-		      struct kvm_assigned_dev_kernel *assigned_dev);
-int kvm_deassign_device(struct kvm *kvm,
-			struct kvm_assigned_dev_kernel *assigned_dev);
 #else
 static inline int kvm_iommu_map_pages(struct kvm *kvm,
 				      struct kvm_memory_slot *slot)
@@ -787,11 +753,6 @@ static inline void kvm_iommu_unmap_pages(struct kvm *kvm,
 					 struct kvm_memory_slot *slot)
 {
 }
-
-static inline int kvm_iommu_unmap_guest(struct kvm *kvm)
-{
-	return 0;
-}
 #endif
 
 static inline void kvm_guest_enter(void)
@@ -832,12 +793,28 @@ static inline void kvm_guest_exit(void)
 static inline struct kvm_memory_slot *
 search_memslots(struct kvm_memslots *slots, gfn_t gfn)
 {
-	struct kvm_memory_slot *memslot;
+	int start = 0, end = slots->used_slots;
+	int slot = atomic_read(&slots->lru_slot);
+	struct kvm_memory_slot *memslots = slots->memslots;
+
+	if (gfn >= memslots[slot].base_gfn &&
+	    gfn < memslots[slot].base_gfn + memslots[slot].npages)
+		return &memslots[slot];
 
-	kvm_for_each_memslot(memslot, slots)
-		if (gfn >= memslot->base_gfn &&
-		      gfn < memslot->base_gfn + memslot->npages)
-			return memslot;
+	while (start < end) {
+		slot = start + (end - start) / 2;
+
+		if (gfn >= memslots[slot].base_gfn)
+			end = slot;
+		else
+			start = slot + 1;
+	}
+
+	if (gfn >= memslots[start].base_gfn &&
+	    gfn < memslots[start].base_gfn + memslots[start].npages) {
+		atomic_set(&slots->lru_slot, start);
+		return &memslots[start];
+	}
 
 	return NULL;
 }
@@ -1011,25 +988,6 @@ static inline bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu) { return true; }
 
 #endif
 
-#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
-
-long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
-				  unsigned long arg);
-
-void kvm_free_all_assigned_devices(struct kvm *kvm);
-
-#else
-
-static inline long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
-						unsigned long arg)
-{
-	return -ENOTTY;
-}
-
-static inline void kvm_free_all_assigned_devices(struct kvm *kvm) {}
-
-#endif
-
 static inline void kvm_make_request(int req, struct kvm_vcpu *vcpu)
 {
 	set_bit(req, &vcpu->requests);
diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h
index b606bb689a3..931da7e917c 100644
--- a/include/linux/kvm_types.h
+++ b/include/linux/kvm_types.h
@@ -54,33 +54,6 @@ typedef u64            hfn_t;
 
 typedef hfn_t pfn_t;
 
-union kvm_ioapic_redirect_entry {
-	u64 bits;
-	struct {
-		u8 vector;
-		u8 delivery_mode:3;
-		u8 dest_mode:1;
-		u8 delivery_status:1;
-		u8 polarity:1;
-		u8 remote_irr:1;
-		u8 trig_mode:1;
-		u8 mask:1;
-		u8 reserve:7;
-		u8 reserved[4];
-		u8 dest_id;
-	} fields;
-};
-
-struct kvm_lapic_irq {
-	u32 vector;
-	u32 delivery_mode;
-	u32 dest_mode;
-	u32 level;
-	u32 trig_mode;
-	u32 shorthand;
-	u32 dest_id;
-};
-
 struct gfn_to_hva_cache {
 	u64 generation;
 	gpa_t gpa;
diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index e4d451e4600..3d4ea7eb2b6 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -455,8 +455,21 @@ struct nand_hw_control {
  *		be provided if an hardware ECC is available
  * @calculate:	function for ECC calculation or readback from ECC hardware
  * @correct:	function for ECC correction, matching to ECC generator (sw/hw)
- * @read_page_raw:	function to read a raw page without ECC
- * @write_page_raw:	function to write a raw page without ECC
+ * @read_page_raw:	function to read a raw page without ECC. This function
+ *			should hide the specific layout used by the ECC
+ *			controller and always return contiguous in-band and
+ *			out-of-band data even if they're not stored
+ *			contiguously on the NAND chip (e.g.
+ *			NAND_ECC_HW_SYNDROME interleaves in-band and
+ *			out-of-band data).
+ * @write_page_raw:	function to write a raw page without ECC. This function
+ *			should hide the specific layout used by the ECC
+ *			controller and consider the passed data as contiguous
+ *			in-band and out-of-band data. ECC controller is
+ *			responsible for doing the appropriate transformations
+ *			to adapt to its specific layout (e.g.
+ *			NAND_ECC_HW_SYNDROME interleaves in-band and
+ *			out-of-band data).
  * @read_page:	function to read a page according to the ECC generator
  *		requirements; returns maximum number of bitflips corrected in
  *		any single ECC step, 0 if bitflips uncorrectable, -EIO hw error
@@ -723,6 +736,7 @@ struct nand_chip {
 #define NAND_MFR_EON		0x92
 #define NAND_MFR_SANDISK	0x45
 #define NAND_MFR_INTEL		0x89
+#define NAND_MFR_ATO		0x9b
 
 /* The maximum expected count of bytes in the NAND ID sequence */
 #define NAND_MAX_ID_LEN 8
diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h
index 046a0a2e4c4..63aeccf9ddc 100644
--- a/include/linux/mtd/spi-nor.h
+++ b/include/linux/mtd/spi-nor.h
@@ -116,6 +116,10 @@ enum spi_nor_ops {
 	SPI_NOR_OPS_UNLOCK,
 };
 
+enum spi_nor_option_flags {
+	SNOR_F_USE_FSR		= BIT(0),
+};
+
 /**
  * struct spi_nor - Structure for defining a the SPI NOR layer
  * @mtd:		point to a mtd_info structure
@@ -129,6 +133,7 @@ enum spi_nor_ops {
  * @program_opcode:	the program opcode
  * @flash_read:		the mode of the read
  * @sst_write_second:	used by the SST write operation
+ * @flags:		flag options for the current SPI-NOR (SNOR_F_*)
  * @cfg:		used by the read_xfer/write_xfer
  * @cmd_buf:		used by the write_reg
  * @prepare:		[OPTIONAL] do some preparations for the
@@ -139,9 +144,6 @@ enum spi_nor_ops {
  * @write_xfer:		[OPTIONAL] the writefundamental primitive
  * @read_reg:		[DRIVER-SPECIFIC] read out the register
  * @write_reg:		[DRIVER-SPECIFIC] write data to the register
- * @read_id:		[REPLACEABLE] read out the ID data, and find
- *			the proper spi_device_id
- * @wait_till_ready:	[REPLACEABLE] wait till the NOR becomes ready
  * @read:		[DRIVER-SPECIFIC] read data from the SPI NOR
  * @write:		[DRIVER-SPECIFIC] write data to the SPI NOR
  * @erase:		[DRIVER-SPECIFIC] erase a sector of the SPI NOR
@@ -160,6 +162,7 @@ struct spi_nor {
 	u8			program_opcode;
 	enum read_mode		flash_read;
 	bool			sst_write_second;
+	u32			flags;
 	struct spi_nor_xfer_cfg	cfg;
 	u8			cmd_buf[SPI_NOR_MAX_CMD_SIZE];
 
@@ -172,8 +175,6 @@ struct spi_nor {
 	int (*read_reg)(struct spi_nor *nor, u8 opcode, u8 *buf, int len);
 	int (*write_reg)(struct spi_nor *nor, u8 opcode, u8 *buf, int len,
 			int write_enable);
-	const struct spi_device_id *(*read_id)(struct spi_nor *nor);
-	int (*wait_till_ready)(struct spi_nor *nor);
 
 	int (*read)(struct spi_nor *nor, loff_t from,
 			size_t len, size_t *retlen, u_char *read_buf);
diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index ef90838b36a..c611a02fbc5 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -29,10 +29,10 @@
 #include <linux/idr.h>
 #include <linux/device.h>
 #include <linux/workqueue.h>
+#include <uapi/linux/thermal.h>
 
 #define THERMAL_TRIPS_NONE	-1
 #define THERMAL_MAX_TRIPS	12
-#define THERMAL_NAME_LENGTH	20
 
 /* invalid cooling state */
 #define THERMAL_CSTATE_INVALID -1UL
@@ -49,11 +49,6 @@
 #define MILLICELSIUS_TO_DECI_KELVIN_WITH_OFFSET(t, off) (((t) / 100) + (off))
 #define MILLICELSIUS_TO_DECI_KELVIN(t) MILLICELSIUS_TO_DECI_KELVIN_WITH_OFFSET(t, 2732)
 
-/* Adding event notification support elements */
-#define THERMAL_GENL_FAMILY_NAME                "thermal_event"
-#define THERMAL_GENL_VERSION                    0x01
-#define THERMAL_GENL_MCAST_GROUP_NAME           "thermal_mc_grp"
-
 /* Default Thermal Governor */
 #if defined(CONFIG_THERMAL_DEFAULT_GOV_STEP_WISE)
 #define DEFAULT_THERMAL_GOVERNOR       "step_wise"
@@ -86,30 +81,6 @@ enum thermal_trend {
 	THERMAL_TREND_DROP_FULL, /* apply lowest cooling action */
 };
 
-/* Events supported by Thermal Netlink */
-enum events {
-	THERMAL_AUX0,
-	THERMAL_AUX1,
-	THERMAL_CRITICAL,
-	THERMAL_DEV_FAULT,
-};
-
-/* attributes of thermal_genl_family */
-enum {
-	THERMAL_GENL_ATTR_UNSPEC,
-	THERMAL_GENL_ATTR_EVENT,
-	__THERMAL_GENL_ATTR_MAX,
-};
-#define THERMAL_GENL_ATTR_MAX (__THERMAL_GENL_ATTR_MAX - 1)
-
-/* commands supported by the thermal_genl_family */
-enum {
-	THERMAL_GENL_CMD_UNSPEC,
-	THERMAL_GENL_CMD_EVENT,
-	__THERMAL_GENL_CMD_MAX,
-};
-#define THERMAL_GENL_CMD_MAX (__THERMAL_GENL_CMD_MAX - 1)
-
 struct thermal_zone_device_ops {
 	int (*bind) (struct thermal_zone_device *,
 		     struct thermal_cooling_device *);
@@ -289,19 +260,49 @@ struct thermal_genl_event {
 	enum events event;
 };
 
+/**
+ * struct thermal_zone_of_device_ops - scallbacks for handling DT based zones
+ *
+ * Mandatory:
+ * @get_temp: a pointer to a function that reads the sensor temperature.
+ *
+ * Optional:
+ * @get_trend: a pointer to a function that reads the sensor temperature trend.
+ * @set_emul_temp: a pointer to a function that sets sensor emulated
+ *		   temperature.
+ */
+struct thermal_zone_of_device_ops {
+	int (*get_temp)(void *, long *);
+	int (*get_trend)(void *, long *);
+	int (*set_emul_temp)(void *, unsigned long);
+};
+
+/**
+ * struct thermal_trip - representation of a point in temperature domain
+ * @np: pointer to struct device_node that this trip point was created from
+ * @temperature: temperature value in miliCelsius
+ * @hysteresis: relative hysteresis in miliCelsius
+ * @type: trip point type
+ */
+
+struct thermal_trip {
+	struct device_node *np;
+	unsigned long int temperature;
+	unsigned long int hysteresis;
+	enum thermal_trip_type type;
+};
+
 /* Function declarations */
 #ifdef CONFIG_THERMAL_OF
 struct thermal_zone_device *
-thermal_zone_of_sensor_register(struct device *dev, int id,
-				void *data, int (*get_temp)(void *, long *),
-				int (*get_trend)(void *, long *));
+thermal_zone_of_sensor_register(struct device *dev, int id, void *data,
+				const struct thermal_zone_of_device_ops *ops);
 void thermal_zone_of_sensor_unregister(struct device *dev,
 				       struct thermal_zone_device *tz);
 #else
 static inline struct thermal_zone_device *
-thermal_zone_of_sensor_register(struct device *dev, int id,
-				void *data, int (*get_temp)(void *, long *),
-				int (*get_trend)(void *, long *))
+thermal_zone_of_sensor_register(struct device *dev, int id, void *data,
+				const struct thermal_zone_of_device_ops *ops)
 {
 	return NULL;
 }
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index 4cf06c140e2..8297e5b341d 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -18,6 +18,10 @@ struct uid_gid_map {	/* 64 bytes -- 1 cache line */
 	} extent[UID_GID_MAP_MAX_EXTENTS];
 };
 
+#define USERNS_SETGROUPS_ALLOWED 1UL
+
+#define USERNS_INIT_FLAGS USERNS_SETGROUPS_ALLOWED
+
 struct user_namespace {
 	struct uid_gid_map	uid_map;
 	struct uid_gid_map	gid_map;
@@ -28,6 +32,7 @@ struct user_namespace {
 	kuid_t			owner;
 	kgid_t			group;
 	struct ns_common	ns;
+	unsigned long		flags;
 
 	/* Register of per-UID persistent keyrings for this namespace */
 #ifdef CONFIG_PERSISTENT_KEYRINGS
@@ -64,6 +69,9 @@ extern const struct seq_operations proc_projid_seq_operations;
 extern ssize_t proc_uid_map_write(struct file *, const char __user *, size_t, loff_t *);
 extern ssize_t proc_gid_map_write(struct file *, const char __user *, size_t, loff_t *);
 extern ssize_t proc_projid_map_write(struct file *, const char __user *, size_t, loff_t *);
+extern ssize_t proc_setgroups_write(struct file *, const char __user *, size_t, loff_t *);
+extern int proc_setgroups_show(struct seq_file *m, void *v);
+extern bool userns_may_setgroups(const struct user_namespace *ns);
 #else
 
 static inline struct user_namespace *get_user_ns(struct user_namespace *ns)
@@ -88,6 +96,10 @@ static inline void put_user_ns(struct user_namespace *ns)
 {
 }
 
+static inline bool userns_may_setgroups(const struct user_namespace *ns)
+{
+	return true;
+}
 #endif
 
 #endif /* _LINUX_USER_H */
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index d09e0938fd6..28f0e65b9a1 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -81,7 +81,7 @@ void *virtqueue_get_used(struct virtqueue *vq);
 /**
  * virtio_device - representation of a device using virtio
  * @index: unique position on the virtio bus
- * @failed: saved value for CONFIG_S_FAILED bit (for restore)
+ * @failed: saved value for VIRTIO_CONFIG_S_FAILED bit (for restore)
  * @config_enabled: configuration change reporting enabled
  * @config_change_pending: configuration change reported while disabled
  * @config_lock: protects configuration change reporting
diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild
index 259d31fc907..00b100023c4 100644
--- a/include/uapi/linux/Kbuild
+++ b/include/uapi/linux/Kbuild
@@ -387,6 +387,7 @@ header-y += tcp.h
 header-y += tcp_metrics.h
 header-y += telephony.h
 header-y += termios.h
+header-y += thermal.h
 header-y += time.h
 header-y += times.h
 header-y += timex.h
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 60768822b14..a37fd1224f3 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -647,11 +647,7 @@ struct kvm_ppc_smmu_info {
 #define KVM_CAP_MP_STATE 14
 #define KVM_CAP_COALESCED_MMIO 15
 #define KVM_CAP_SYNC_MMU 16  /* Changes to host mmap are reflected in guest */
-#define KVM_CAP_DEVICE_ASSIGNMENT 17
 #define KVM_CAP_IOMMU 18
-#ifdef __KVM_HAVE_MSI
-#define KVM_CAP_DEVICE_MSI 20
-#endif
 /* Bug in KVM_SET_USER_MEMORY_REGION fixed: */
 #define KVM_CAP_DESTROY_MEMORY_REGION_WORKS 21
 #define KVM_CAP_USER_NMI 22
@@ -663,10 +659,6 @@ struct kvm_ppc_smmu_info {
 #endif
 #define KVM_CAP_IRQ_ROUTING 25
 #define KVM_CAP_IRQ_INJECT_STATUS 26
-#define KVM_CAP_DEVICE_DEASSIGNMENT 27
-#ifdef __KVM_HAVE_MSIX
-#define KVM_CAP_DEVICE_MSIX 28
-#endif
 #define KVM_CAP_ASSIGN_DEV_IRQ 29
 /* Another bug in KVM_SET_USER_MEMORY_REGION fixed: */
 #define KVM_CAP_JOIN_MEMORY_REGIONS_WORKS 30
@@ -1107,9 +1099,6 @@ struct kvm_s390_ucas_mapping {
 #define KVM_X86_SETUP_MCE         _IOW(KVMIO,  0x9c, __u64)
 #define KVM_X86_GET_MCE_CAP_SUPPORTED _IOR(KVMIO,  0x9d, __u64)
 #define KVM_X86_SET_MCE           _IOW(KVMIO,  0x9e, struct kvm_x86_mce)
-/* IA64 stack access */
-#define KVM_IA64_VCPU_GET_STACK   _IOR(KVMIO,  0x9a, void *)
-#define KVM_IA64_VCPU_SET_STACK   _IOW(KVMIO,  0x9b, void *)
 /* Available with KVM_CAP_VCPU_EVENTS */
 #define KVM_GET_VCPU_EVENTS       _IOR(KVMIO,  0x9f, struct kvm_vcpu_events)
 #define KVM_SET_VCPU_EVENTS       _IOW(KVMIO,  0xa0, struct kvm_vcpu_events)
diff --git a/include/uapi/linux/thermal.h b/include/uapi/linux/thermal.h
new file mode 100644
index 00000000000..ac553585598
--- /dev/null
+++ b/include/uapi/linux/thermal.h
@@ -0,0 +1,35 @@
+#ifndef _UAPI_LINUX_THERMAL_H
+#define _UAPI_LINUX_THERMAL_H
+
+#define THERMAL_NAME_LENGTH	20
+
+/* Adding event notification support elements */
+#define THERMAL_GENL_FAMILY_NAME                "thermal_event"
+#define THERMAL_GENL_VERSION                    0x01
+#define THERMAL_GENL_MCAST_GROUP_NAME           "thermal_mc_grp"
+
+/* Events supported by Thermal Netlink */
+enum events {
+	THERMAL_AUX0,
+	THERMAL_AUX1,
+	THERMAL_CRITICAL,
+	THERMAL_DEV_FAULT,
+};
+
+/* attributes of thermal_genl_family */
+enum {
+	THERMAL_GENL_ATTR_UNSPEC,
+	THERMAL_GENL_ATTR_EVENT,
+	__THERMAL_GENL_ATTR_MAX,
+};
+#define THERMAL_GENL_ATTR_MAX (__THERMAL_GENL_ATTR_MAX - 1)
+
+/* commands supported by the thermal_genl_family */
+enum {
+	THERMAL_GENL_CMD_UNSPEC,
+	THERMAL_GENL_CMD_EVENT,
+	__THERMAL_GENL_CMD_MAX,
+};
+#define THERMAL_GENL_CMD_MAX (__THERMAL_GENL_CMD_MAX - 1)
+
+#endif /* _UAPI_LINUX_THERMAL_H */
diff --git a/include/uapi/linux/virtio_balloon.h b/include/uapi/linux/virtio_balloon.h
index 5e26f61b5df..be40f7059e9 100644
--- a/include/uapi/linux/virtio_balloon.h
+++ b/include/uapi/linux/virtio_balloon.h
@@ -31,6 +31,7 @@
 /* The feature bitmap for virtio balloon */
 #define VIRTIO_BALLOON_F_MUST_TELL_HOST	0 /* Tell before reclaiming pages */
 #define VIRTIO_BALLOON_F_STATS_VQ	1 /* Memory Stats virtqueue */
+#define VIRTIO_BALLOON_F_DEFLATE_ON_OOM	2 /* Deflate balloon on OOM */
 
 /* Size of a PFN in the balloon interface. */
 #define VIRTIO_BALLOON_PFN_SHIFT 12
diff --git a/kernel/groups.c b/kernel/groups.c
index 451698f86cf..664411f171b 100644
--- a/kernel/groups.c
+++ b/kernel/groups.c
@@ -6,6 +6,7 @@
 #include <linux/slab.h>
 #include <linux/security.h>
 #include <linux/syscalls.h>
+#include <linux/user_namespace.h>
 #include <asm/uaccess.h>
 
 /* init to 2 - one for init_task, one to ensure it is never freed */
@@ -213,6 +214,14 @@ out:
 	return i;
 }
 
+bool may_setgroups(void)
+{
+	struct user_namespace *user_ns = current_user_ns();
+
+	return ns_capable(user_ns, CAP_SETGID) &&
+		userns_may_setgroups(user_ns);
+}
+
 /*
  *	SMP: Our groups are copy-on-write. We can set them safely
  *	without another task interfering.
@@ -223,7 +232,7 @@ SYSCALL_DEFINE2(setgroups, int, gidsetsize, gid_t __user *, grouplist)
 	struct group_info *group_info;
 	int retval;
 
-	if (!ns_capable(current_user_ns(), CAP_SETGID))
+	if (!may_setgroups())
 		return -EPERM;
 	if ((unsigned)gidsetsize > NGROUPS_MAX)
 		return -EINVAL;
diff --git a/kernel/uid16.c b/kernel/uid16.c
index 602e5bbbcef..d58cc4d8f0d 100644
--- a/kernel/uid16.c
+++ b/kernel/uid16.c
@@ -176,7 +176,7 @@ SYSCALL_DEFINE2(setgroups16, int, gidsetsize, old_gid_t __user *, grouplist)
 	struct group_info *group_info;
 	int retval;
 
-	if (!ns_capable(current_user_ns(), CAP_SETGID))
+	if (!may_setgroups())
 		return -EPERM;
 	if ((unsigned)gidsetsize > NGROUPS_MAX)
 		return -EINVAL;
diff --git a/kernel/user.c b/kernel/user.c
index 69b800aebf1..b069ccbfb0b 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -54,6 +54,7 @@ struct user_namespace init_user_ns = {
 #ifdef CONFIG_USER_NS
 	.ns.ops = &userns_operations,
 #endif
+	.flags = USERNS_INIT_FLAGS,
 #ifdef CONFIG_PERSISTENT_KEYRINGS
 	.persistent_keyring_register_sem =
 	__RWSEM_INITIALIZER(init_user_ns.persistent_keyring_register_sem),
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 1491ad00388..4109f832068 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -24,6 +24,7 @@
 #include <linux/fs_struct.h>
 
 static struct kmem_cache *user_ns_cachep __read_mostly;
+static DEFINE_MUTEX(userns_state_mutex);
 
 static bool new_idmap_permitted(const struct file *file,
 				struct user_namespace *ns, int cap_setid,
@@ -100,6 +101,11 @@ int create_user_ns(struct cred *new)
 	ns->owner = owner;
 	ns->group = group;
 
+	/* Inherit USERNS_SETGROUPS_ALLOWED from our parent */
+	mutex_lock(&userns_state_mutex);
+	ns->flags = parent_ns->flags;
+	mutex_unlock(&userns_state_mutex);
+
 	set_cred_user_ns(new, ns);
 
 #ifdef CONFIG_PERSISTENT_KEYRINGS
@@ -584,9 +590,6 @@ static bool mappings_overlap(struct uid_gid_map *new_map,
 	return false;
 }
 
-
-static DEFINE_MUTEX(id_map_mutex);
-
 static ssize_t map_write(struct file *file, const char __user *buf,
 			 size_t count, loff_t *ppos,
 			 int cap_setid,
@@ -603,7 +606,7 @@ static ssize_t map_write(struct file *file, const char __user *buf,
 	ssize_t ret = -EINVAL;
 
 	/*
-	 * The id_map_mutex serializes all writes to any given map.
+	 * The userns_state_mutex serializes all writes to any given map.
 	 *
 	 * Any map is only ever written once.
 	 *
@@ -621,7 +624,7 @@ static ssize_t map_write(struct file *file, const char __user *buf,
 	 * order and smp_rmb() is guaranteed that we don't have crazy
 	 * architectures returning stale data.
 	 */
-	mutex_lock(&id_map_mutex);
+	mutex_lock(&userns_state_mutex);
 
 	ret = -EPERM;
 	/* Only allow one successful write to the map */
@@ -641,7 +644,7 @@ static ssize_t map_write(struct file *file, const char __user *buf,
 	if (!page)
 		goto out;
 
-	/* Only allow <= page size writes at the beginning of the file */
+	/* Only allow < page size writes at the beginning of the file */
 	ret = -EINVAL;
 	if ((*ppos != 0) || (count >= PAGE_SIZE))
 		goto out;
@@ -751,7 +754,7 @@ static ssize_t map_write(struct file *file, const char __user *buf,
 	*ppos = count;
 	ret = count;
 out:
-	mutex_unlock(&id_map_mutex);
+	mutex_unlock(&userns_state_mutex);
 	if (page)
 		free_page(page);
 	return ret;
@@ -813,16 +816,21 @@ static bool new_idmap_permitted(const struct file *file,
 				struct user_namespace *ns, int cap_setid,
 				struct uid_gid_map *new_map)
 {
-	/* Allow mapping to your own filesystem ids */
-	if ((new_map->nr_extents == 1) && (new_map->extent[0].count == 1)) {
+	const struct cred *cred = file->f_cred;
+	/* Don't allow mappings that would allow anything that wouldn't
+	 * be allowed without the establishment of unprivileged mappings.
+	 */
+	if ((new_map->nr_extents == 1) && (new_map->extent[0].count == 1) &&
+	    uid_eq(ns->owner, cred->euid)) {
 		u32 id = new_map->extent[0].lower_first;
 		if (cap_setid == CAP_SETUID) {
 			kuid_t uid = make_kuid(ns->parent, id);
-			if (uid_eq(uid, file->f_cred->fsuid))
+			if (uid_eq(uid, cred->euid))
 				return true;
 		} else if (cap_setid == CAP_SETGID) {
 			kgid_t gid = make_kgid(ns->parent, id);
-			if (gid_eq(gid, file->f_cred->fsgid))
+			if (!(ns->flags & USERNS_SETGROUPS_ALLOWED) &&
+			    gid_eq(gid, cred->egid))
 				return true;
 		}
 	}
@@ -842,6 +850,100 @@ static bool new_idmap_permitted(const struct file *file,
 	return false;
 }
 
+int proc_setgroups_show(struct seq_file *seq, void *v)
+{
+	struct user_namespace *ns = seq->private;
+	unsigned long userns_flags = ACCESS_ONCE(ns->flags);
+
+	seq_printf(seq, "%s\n",
+		   (userns_flags & USERNS_SETGROUPS_ALLOWED) ?
+		   "allow" : "deny");
+	return 0;
+}
+
+ssize_t proc_setgroups_write(struct file *file, const char __user *buf,
+			     size_t count, loff_t *ppos)
+{
+	struct seq_file *seq = file->private_data;
+	struct user_namespace *ns = seq->private;
+	char kbuf[8], *pos;
+	bool setgroups_allowed;
+	ssize_t ret;
+
+	/* Only allow a very narrow range of strings to be written */
+	ret = -EINVAL;
+	if ((*ppos != 0) || (count >= sizeof(kbuf)))
+		goto out;
+
+	/* What was written? */
+	ret = -EFAULT;
+	if (copy_from_user(kbuf, buf, count))
+		goto out;
+	kbuf[count] = '\0';
+	pos = kbuf;
+
+	/* What is being requested? */
+	ret = -EINVAL;
+	if (strncmp(pos, "allow", 5) == 0) {
+		pos += 5;
+		setgroups_allowed = true;
+	}
+	else if (strncmp(pos, "deny", 4) == 0) {
+		pos += 4;
+		setgroups_allowed = false;
+	}
+	else
+		goto out;
+
+	/* Verify there is not trailing junk on the line */
+	pos = skip_spaces(pos);
+	if (*pos != '\0')
+		goto out;
+
+	ret = -EPERM;
+	mutex_lock(&userns_state_mutex);
+	if (setgroups_allowed) {
+		/* Enabling setgroups after setgroups has been disabled
+		 * is not allowed.
+		 */
+		if (!(ns->flags & USERNS_SETGROUPS_ALLOWED))
+			goto out_unlock;
+	} else {
+		/* Permanently disabling setgroups after setgroups has
+		 * been enabled by writing the gid_map is not allowed.
+		 */
+		if (ns->gid_map.nr_extents != 0)
+			goto out_unlock;
+		ns->flags &= ~USERNS_SETGROUPS_ALLOWED;
+	}
+	mutex_unlock(&userns_state_mutex);
+
+	/* Report a successful write */
+	*ppos = count;
+	ret = count;
+out:
+	return ret;
+out_unlock:
+	mutex_unlock(&userns_state_mutex);
+	goto out;
+}
+
+bool userns_may_setgroups(const struct user_namespace *ns)
+{
+	bool allowed;
+
+	mutex_lock(&userns_state_mutex);
+	/* It is not safe to use setgroups until a gid mapping in
+	 * the user namespace has been established.
+	 */
+	allowed = ns->gid_map.nr_extents != 0;
+	/* Is setgroups allowed? */
+	allowed = allowed && (ns->flags & USERNS_SETGROUPS_ALLOWED);
+	mutex_unlock(&userns_state_mutex);
+
+	return allowed;
+}
+
 static inline struct user_namespace *to_user_ns(struct ns_common *ns)
 {
 	return container_of(ns, struct user_namespace, ns);
diff --git a/mm/memory.c b/mm/memory.c
index c3b9097251c..6efe36a998b 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -235,9 +235,6 @@ void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long
 
 static void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb)
 {
-	if (!tlb->end)
-		return;
-
 	tlb_flush(tlb);
 	mmu_notifier_invalidate_range(tlb->mm, tlb->start, tlb->end);
 #ifdef CONFIG_HAVE_RCU_TABLE_FREE
@@ -259,6 +256,9 @@ static void tlb_flush_mmu_free(struct mmu_gather *tlb)
 
 void tlb_flush_mmu(struct mmu_gather *tlb)
 {
+	if (!tlb->end)
+		return;
+
 	tlb_flush_mmu_tlbonly(tlb);
 	tlb_flush_mmu_free(tlb);
 }
diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c
index 7e38b729696..15845814a0f 100644
--- a/net/ceph/auth_x.c
+++ b/net/ceph/auth_x.c
@@ -8,6 +8,7 @@
 
 #include <linux/ceph/decode.h>
 #include <linux/ceph/auth.h>
+#include <linux/ceph/messenger.h>
 
 #include "crypto.h"
 #include "auth_x.h"
@@ -293,6 +294,11 @@ static int ceph_x_build_authorizer(struct ceph_auth_client *ac,
 	dout("build_authorizer for %s %p\n",
 	     ceph_entity_type_name(th->service), au);
 
+	ceph_crypto_key_destroy(&au->session_key);
+	ret = ceph_crypto_key_clone(&au->session_key, &th->session_key);
+	if (ret)
+		return ret;
+
 	maxlen = sizeof(*msg_a) + sizeof(msg_b) +
 		ceph_x_encrypt_buflen(ticket_blob_len);
 	dout("  need len %d\n", maxlen);
@@ -302,8 +308,10 @@ static int ceph_x_build_authorizer(struct ceph_auth_client *ac,
 	}
 	if (!au->buf) {
 		au->buf = ceph_buffer_new(maxlen, GFP_NOFS);
-		if (!au->buf)
+		if (!au->buf) {
+			ceph_crypto_key_destroy(&au->session_key);
 			return -ENOMEM;
+		}
 	}
 	au->service = th->service;
 	au->secret_id = th->secret_id;
@@ -329,7 +337,7 @@ static int ceph_x_build_authorizer(struct ceph_auth_client *ac,
 	get_random_bytes(&au->nonce, sizeof(au->nonce));
 	msg_b.struct_v = 1;
 	msg_b.nonce = cpu_to_le64(au->nonce);
-	ret = ceph_x_encrypt(&th->session_key, &msg_b, sizeof(msg_b),
+	ret = ceph_x_encrypt(&au->session_key, &msg_b, sizeof(msg_b),
 			     p, end - p);
 	if (ret < 0)
 		goto out_buf;
@@ -560,6 +568,8 @@ static int ceph_x_create_authorizer(
 	auth->authorizer_buf_len = au->buf->vec.iov_len;
 	auth->authorizer_reply_buf = au->reply_buf;
 	auth->authorizer_reply_buf_len = sizeof (au->reply_buf);
+	auth->sign_message = ac->ops->sign_message;
+	auth->check_message_signature = ac->ops->check_message_signature;
 
 	return 0;
 }
@@ -588,17 +598,13 @@ static int ceph_x_verify_authorizer_reply(struct ceph_auth_client *ac,
 					  struct ceph_authorizer *a, size_t len)
 {
 	struct ceph_x_authorizer *au = (void *)a;
-	struct ceph_x_ticket_handler *th;
 	int ret = 0;
 	struct ceph_x_authorize_reply reply;
 	void *preply = &reply;
 	void *p = au->reply_buf;
 	void *end = p + sizeof(au->reply_buf);
 
-	th = get_ticket_handler(ac, au->service);
-	if (IS_ERR(th))
-		return PTR_ERR(th);
-	ret = ceph_x_decrypt(&th->session_key, &p, end, &preply, sizeof(reply));
+	ret = ceph_x_decrypt(&au->session_key, &p, end, &preply, sizeof(reply));
 	if (ret < 0)
 		return ret;
 	if (ret != sizeof(reply))
@@ -618,6 +624,7 @@ static void ceph_x_destroy_authorizer(struct ceph_auth_client *ac,
 {
 	struct ceph_x_authorizer *au = (void *)a;
 
+	ceph_crypto_key_destroy(&au->session_key);
 	ceph_buffer_put(au->buf);
 	kfree(au);
 }
@@ -663,6 +670,59 @@ static void ceph_x_invalidate_authorizer(struct ceph_auth_client *ac,
 		memset(&th->validity, 0, sizeof(th->validity));
 }
 
+static int calcu_signature(struct ceph_x_authorizer *au,
+			   struct ceph_msg *msg, __le64 *sig)
+{
+	int ret;
+	char tmp_enc[40];
+	__le32 tmp[5] = {
+		16u, msg->hdr.crc, msg->footer.front_crc,
+		msg->footer.middle_crc, msg->footer.data_crc,
+	};
+	ret = ceph_x_encrypt(&au->session_key, &tmp, sizeof(tmp),
+			     tmp_enc, sizeof(tmp_enc));
+	if (ret < 0)
+		return ret;
+	*sig = *(__le64*)(tmp_enc + 4);
+	return 0;
+}
+
+static int ceph_x_sign_message(struct ceph_auth_handshake *auth,
+			       struct ceph_msg *msg)
+{
+	int ret;
+	if (!auth->authorizer)
+		return 0;
+	ret = calcu_signature((struct ceph_x_authorizer *)auth->authorizer,
+			      msg, &msg->footer.sig);
+	if (ret < 0)
+		return ret;
+	msg->footer.flags |= CEPH_MSG_FOOTER_SIGNED;
+	return 0;
+}
+
+static int ceph_x_check_message_signature(struct ceph_auth_handshake *auth,
+					  struct ceph_msg *msg)
+{
+	__le64 sig_check;
+	int ret;
+
+	if (!auth->authorizer)
+		return 0;
+	ret = calcu_signature((struct ceph_x_authorizer *)auth->authorizer,
+			      msg, &sig_check);
+	if (ret < 0)
+		return ret;
+	if (sig_check == msg->footer.sig)
+		return 0;
+	if (msg->footer.flags & CEPH_MSG_FOOTER_SIGNED)
+		dout("ceph_x_check_message_signature %p has signature %llx "
+		     "expect %llx\n", msg, msg->footer.sig, sig_check);
+	else
+		dout("ceph_x_check_message_signature %p sender did not set "
+		     "CEPH_MSG_FOOTER_SIGNED\n", msg);
+	return -EBADMSG;
+}
 
 static const struct ceph_auth_client_ops ceph_x_ops = {
 	.name = "x",
@@ -677,6 +737,8 @@ static const struct ceph_auth_client_ops ceph_x_ops = {
 	.invalidate_authorizer = ceph_x_invalidate_authorizer,
 	.reset =  ceph_x_reset,
 	.destroy = ceph_x_destroy,
+	.sign_message = ceph_x_sign_message,
+	.check_message_signature = ceph_x_check_message_signature,
 };
 
 
diff --git a/net/ceph/auth_x.h b/net/ceph/auth_x.h
index 65ee72082d9..e8b7c6917d4 100644
--- a/net/ceph/auth_x.h
+++ b/net/ceph/auth_x.h
@@ -26,6 +26,7 @@ struct ceph_x_ticket_handler {
 
 
 struct ceph_x_authorizer {
+	struct ceph_crypto_key session_key;
 	struct ceph_buffer *buf;
 	unsigned int service;
 	u64 nonce;
diff --git a/net/ceph/buffer.c b/net/ceph/buffer.c
index 621b5f65407..add5f921a0f 100644
--- a/net/ceph/buffer.c
+++ b/net/ceph/buffer.c
@@ -6,7 +6,7 @@
 
 #include <linux/ceph/buffer.h>
 #include <linux/ceph/decode.h>
-#include <linux/ceph/libceph.h> /* for ceph_kv{malloc,free} */
+#include <linux/ceph/libceph.h> /* for ceph_kvmalloc */
 
 struct ceph_buffer *ceph_buffer_new(size_t len, gfp_t gfp)
 {
@@ -35,7 +35,7 @@ void ceph_buffer_release(struct kref *kref)
 	struct ceph_buffer *b = container_of(kref, struct ceph_buffer, kref);
 
 	dout("buffer_release %p\n", b);
-	ceph_kvfree(b->vec.iov_base);
+	kvfree(b->vec.iov_base);
 	kfree(b);
 }
 EXPORT_SYMBOL(ceph_buffer_release);
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index 58fbfe134f9..5d5ab67f516 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -184,14 +184,6 @@ void *ceph_kvmalloc(size_t size, gfp_t flags)
 	return __vmalloc(size, flags | __GFP_HIGHMEM, PAGE_KERNEL);
 }
 
-void ceph_kvfree(const void *ptr)
-{
-	if (is_vmalloc_addr(ptr))
-		vfree(ptr);
-	else
-		kfree(ptr);
-}
-
 
 static int parse_fsid(const char *str, struct ceph_fsid *fsid)
 {
@@ -245,6 +237,8 @@ enum {
 	Opt_noshare,
 	Opt_crc,
 	Opt_nocrc,
+	Opt_cephx_require_signatures,
+	Opt_nocephx_require_signatures,
 };
 
 static match_table_t opt_tokens = {
@@ -263,6 +257,8 @@ static match_table_t opt_tokens = {
 	{Opt_noshare, "noshare"},
 	{Opt_crc, "crc"},
 	{Opt_nocrc, "nocrc"},
+	{Opt_cephx_require_signatures, "cephx_require_signatures"},
+	{Opt_nocephx_require_signatures, "nocephx_require_signatures"},
 	{-1, NULL}
 };
 
@@ -461,6 +457,12 @@ ceph_parse_options(char *options, const char *dev_name,
 		case Opt_nocrc:
 			opt->flags |= CEPH_OPT_NOCRC;
 			break;
+		case Opt_cephx_require_signatures:
+			opt->flags &= ~CEPH_OPT_NOMSGAUTH;
+			break;
+		case Opt_nocephx_require_signatures:
+			opt->flags |= CEPH_OPT_NOMSGAUTH;
+			break;
 
 		default:
 			BUG_ON(token);
@@ -504,6 +506,9 @@ struct ceph_client *ceph_create_client(struct ceph_options *opt, void *private,
 	init_waitqueue_head(&client->auth_wq);
 	client->auth_err = 0;
 
+	if (!ceph_test_opt(client, NOMSGAUTH))
+		required_features |= CEPH_FEATURE_MSG_AUTH;
+
 	client->extra_mon_dispatch = NULL;
 	client->supported_features = CEPH_FEATURES_SUPPORTED_DEFAULT |
 		supported_features;
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 8d1653caffd..33a2f201e46 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -1196,8 +1196,18 @@ static void prepare_write_message_footer(struct ceph_connection *con)
 	dout("prepare_write_message_footer %p\n", con);
 	con->out_kvec_is_msg = true;
 	con->out_kvec[v].iov_base = &m->footer;
-	con->out_kvec[v].iov_len = sizeof(m->footer);
-	con->out_kvec_bytes += sizeof(m->footer);
+	if (con->peer_features & CEPH_FEATURE_MSG_AUTH) {
+		if (con->ops->sign_message)
+			con->ops->sign_message(con, m);
+		else
+			m->footer.sig = 0;
+		con->out_kvec[v].iov_len = sizeof(m->footer);
+		con->out_kvec_bytes += sizeof(m->footer);
+	} else {
+		m->old_footer.flags = m->footer.flags;
+		con->out_kvec[v].iov_len = sizeof(m->old_footer);
+		con->out_kvec_bytes += sizeof(m->old_footer);
+	}
 	con->out_kvec_left++;
 	con->out_more = m->more_to_follow;
 	con->out_msg_done = true;
@@ -2249,6 +2259,7 @@ static int read_partial_message(struct ceph_connection *con)
 	int ret;
 	unsigned int front_len, middle_len, data_len;
 	bool do_datacrc = !con->msgr->nocrc;
+	bool need_sign = (con->peer_features & CEPH_FEATURE_MSG_AUTH);
 	u64 seq;
 	u32 crc;
 
@@ -2361,12 +2372,21 @@ static int read_partial_message(struct ceph_connection *con)
 	}
 
 	/* footer */
-	size = sizeof (m->footer);
+	if (need_sign)
+		size = sizeof(m->footer);
+	else
+		size = sizeof(m->old_footer);
+
 	end += size;
 	ret = read_partial(con, end, size, &m->footer);
 	if (ret <= 0)
 		return ret;
 
+	if (!need_sign) {
+		m->footer.flags = m->old_footer.flags;
+		m->footer.sig = 0;
+	}
+
 	dout("read_partial_message got msg %p %d (%u) + %d (%u) + %d (%u)\n",
 	     m, front_len, m->footer.front_crc, middle_len,
 	     m->footer.middle_crc, data_len, m->footer.data_crc);
@@ -2390,6 +2410,12 @@ static int read_partial_message(struct ceph_connection *con)
 		return -EBADMSG;
 	}
 
+	if (need_sign && con->ops->check_message_signature &&
+	    con->ops->check_message_signature(con, m)) {
+		pr_err("read_partial_message %p signature check failed\n", m);
+		return -EBADMSG;
+	}
+
 	return 1; /* done! */
 }
 
@@ -3288,7 +3314,7 @@ static int ceph_con_in_msg_alloc(struct ceph_connection *con, int *skip)
 static void ceph_msg_free(struct ceph_msg *m)
 {
 	dout("%s %p\n", __func__, m);
-	ceph_kvfree(m->front.iov_base);
+	kvfree(m->front.iov_base);
 	kmem_cache_free(ceph_msg_cache, m);
 }
 
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 6f164289bde..53299c7b0ca 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -292,6 +292,10 @@ static void osd_req_op_data_release(struct ceph_osd_request *osd_req,
 		ceph_osd_data_release(&op->cls.request_data);
 		ceph_osd_data_release(&op->cls.response_data);
 		break;
+	case CEPH_OSD_OP_SETXATTR:
+	case CEPH_OSD_OP_CMPXATTR:
+		ceph_osd_data_release(&op->xattr.osd_data);
+		break;
 	default:
 		break;
 	}
@@ -476,8 +480,7 @@ void osd_req_op_extent_init(struct ceph_osd_request *osd_req,
 	size_t payload_len = 0;
 
 	BUG_ON(opcode != CEPH_OSD_OP_READ && opcode != CEPH_OSD_OP_WRITE &&
-	       opcode != CEPH_OSD_OP_DELETE && opcode != CEPH_OSD_OP_ZERO &&
-	       opcode != CEPH_OSD_OP_TRUNCATE);
+	       opcode != CEPH_OSD_OP_ZERO && opcode != CEPH_OSD_OP_TRUNCATE);
 
 	op->extent.offset = offset;
 	op->extent.length = length;
@@ -545,6 +548,39 @@ void osd_req_op_cls_init(struct ceph_osd_request *osd_req, unsigned int which,
 }
 EXPORT_SYMBOL(osd_req_op_cls_init);
 
+int osd_req_op_xattr_init(struct ceph_osd_request *osd_req, unsigned int which,
+			  u16 opcode, const char *name, const void *value,
+			  size_t size, u8 cmp_op, u8 cmp_mode)
+{
+	struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which, opcode);
+	struct ceph_pagelist *pagelist;
+	size_t payload_len;
+
+	BUG_ON(opcode != CEPH_OSD_OP_SETXATTR && opcode != CEPH_OSD_OP_CMPXATTR);
+
+	pagelist = kmalloc(sizeof(*pagelist), GFP_NOFS);
+	if (!pagelist)
+		return -ENOMEM;
+
+	ceph_pagelist_init(pagelist);
+
+	payload_len = strlen(name);
+	op->xattr.name_len = payload_len;
+	ceph_pagelist_append(pagelist, name, payload_len);
+
+	op->xattr.value_len = size;
+	ceph_pagelist_append(pagelist, value, size);
+	payload_len += size;
+
+	op->xattr.cmp_op = cmp_op;
+	op->xattr.cmp_mode = cmp_mode;
+
+	ceph_osd_data_pagelist_init(&op->xattr.osd_data, pagelist);
+	op->payload_len = payload_len;
+	return 0;
+}
+EXPORT_SYMBOL(osd_req_op_xattr_init);
+
 void osd_req_op_watch_init(struct ceph_osd_request *osd_req,
 				unsigned int which, u16 opcode,
 				u64 cookie, u64 version, int flag)
@@ -626,7 +662,6 @@ static u64 osd_req_encode_op(struct ceph_osd_request *req,
 	case CEPH_OSD_OP_READ:
 	case CEPH_OSD_OP_WRITE:
 	case CEPH_OSD_OP_ZERO:
-	case CEPH_OSD_OP_DELETE:
 	case CEPH_OSD_OP_TRUNCATE:
 		if (src->op == CEPH_OSD_OP_WRITE)
 			request_data_len = src->extent.length;
@@ -676,6 +711,19 @@ static u64 osd_req_encode_op(struct ceph_osd_request *req,
 		dst->alloc_hint.expected_write_size =
 		    cpu_to_le64(src->alloc_hint.expected_write_size);
 		break;
+	case CEPH_OSD_OP_SETXATTR:
+	case CEPH_OSD_OP_CMPXATTR:
+		dst->xattr.name_len = cpu_to_le32(src->xattr.name_len);
+		dst->xattr.value_len = cpu_to_le32(src->xattr.value_len);
+		dst->xattr.cmp_op = src->xattr.cmp_op;
+		dst->xattr.cmp_mode = src->xattr.cmp_mode;
+		osd_data = &src->xattr.osd_data;
+		ceph_osdc_msg_data_add(req->r_request, osd_data);
+		request_data_len = osd_data->pagelist->length;
+		break;
+	case CEPH_OSD_OP_CREATE:
+	case CEPH_OSD_OP_DELETE:
+		break;
 	default:
 		pr_err("unsupported osd opcode %s\n",
 			ceph_osd_op_name(src->op));
@@ -705,7 +753,8 @@ static u64 osd_req_encode_op(struct ceph_osd_request *req,
 struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
 					       struct ceph_file_layout *layout,
 					       struct ceph_vino vino,
-					       u64 off, u64 *plen, int num_ops,
+					       u64 off, u64 *plen,
+					       unsigned int which, int num_ops,
 					       int opcode, int flags,
 					       struct ceph_snap_context *snapc,
 					       u32 truncate_seq,
@@ -716,13 +765,11 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
 	u64 objnum = 0;
 	u64 objoff = 0;
 	u64 objlen = 0;
-	u32 object_size;
-	u64 object_base;
 	int r;
 
 	BUG_ON(opcode != CEPH_OSD_OP_READ && opcode != CEPH_OSD_OP_WRITE &&
-	       opcode != CEPH_OSD_OP_DELETE && opcode != CEPH_OSD_OP_ZERO &&
-	       opcode != CEPH_OSD_OP_TRUNCATE);
+	       opcode != CEPH_OSD_OP_ZERO && opcode != CEPH_OSD_OP_TRUNCATE &&
+	       opcode != CEPH_OSD_OP_CREATE && opcode != CEPH_OSD_OP_DELETE);
 
 	req = ceph_osdc_alloc_request(osdc, snapc, num_ops, use_mempool,
 					GFP_NOFS);
@@ -738,29 +785,24 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
 		return ERR_PTR(r);
 	}
 
-	object_size = le32_to_cpu(layout->fl_object_size);
-	object_base = off - objoff;
-	if (!(truncate_seq == 1 && truncate_size == -1ULL)) {
-		if (truncate_size <= object_base) {
-			truncate_size = 0;
-		} else {
-			truncate_size -= object_base;
-			if (truncate_size > object_size)
-				truncate_size = object_size;
+	if (opcode == CEPH_OSD_OP_CREATE || opcode == CEPH_OSD_OP_DELETE) {
+		osd_req_op_init(req, which, opcode);
+	} else {
+		u32 object_size = le32_to_cpu(layout->fl_object_size);
+		u32 object_base = off - objoff;
+		if (!(truncate_seq == 1 && truncate_size == -1ULL)) {
+			if (truncate_size <= object_base) {
+				truncate_size = 0;
+			} else {
+				truncate_size -= object_base;
+				if (truncate_size > object_size)
+					truncate_size = object_size;
+			}
 		}
+		osd_req_op_extent_init(req, which, opcode, objoff, objlen,
+				       truncate_size, truncate_seq);
 	}
 
-	osd_req_op_extent_init(req, 0, opcode, objoff, objlen,
-				truncate_size, truncate_seq);
-
-	/*
-	 * A second op in the ops array means the caller wants to
-	 * also issue a include a 'startsync' command so that the
-	 * osd will flush data quickly.
-	 */
-	if (num_ops > 1)
-		osd_req_op_init(req, 1, CEPH_OSD_OP_STARTSYNC);
-
 	req->r_base_oloc.pool = ceph_file_layout_pg_pool(*layout);
 
 	snprintf(req->r_base_oid.name, sizeof(req->r_base_oid.name),
@@ -2626,7 +2668,7 @@ int ceph_osdc_readpages(struct ceph_osd_client *osdc,
 
 	dout("readpages on ino %llx.%llx on %llu~%llu\n", vino.ino,
 	     vino.snap, off, *plen);
-	req = ceph_osdc_new_request(osdc, layout, vino, off, plen, 1,
+	req = ceph_osdc_new_request(osdc, layout, vino, off, plen, 0, 1,
 				    CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ,
 				    NULL, truncate_seq, truncate_size,
 				    false);
@@ -2669,7 +2711,7 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino,
 	int page_align = off & ~PAGE_MASK;
 
 	BUG_ON(vino.snap != CEPH_NOSNAP);	/* snapshots aren't writeable */
-	req = ceph_osdc_new_request(osdc, layout, vino, off, &len, 1,
+	req = ceph_osdc_new_request(osdc, layout, vino, off, &len, 0, 1,
 				    CEPH_OSD_OP_WRITE,
 				    CEPH_OSD_FLAG_ONDISK | CEPH_OSD_FLAG_WRITE,
 				    snapc, truncate_seq, truncate_size,
@@ -2920,6 +2962,20 @@ static int invalidate_authorizer(struct ceph_connection *con)
 	return ceph_monc_validate_auth(&osdc->client->monc);
 }
 
+static int sign_message(struct ceph_connection *con, struct ceph_msg *msg)
+{
+	struct ceph_osd *o = con->private;
+	struct ceph_auth_handshake *auth = &o->o_auth;
+	return ceph_auth_sign_message(auth, msg);
+}
+
+static int check_message_signature(struct ceph_connection *con, struct ceph_msg *msg)
+{
+	struct ceph_osd *o = con->private;
+	struct ceph_auth_handshake *auth = &o->o_auth;
+	return ceph_auth_check_message_signature(auth, msg);
+}
+
 static const struct ceph_connection_operations osd_con_ops = {
 	.get = get_osd_con,
 	.put = put_osd_con,
@@ -2928,5 +2984,7 @@ static const struct ceph_connection_operations osd_con_ops = {
 	.verify_authorizer_reply = verify_authorizer_reply,
 	.invalidate_authorizer = invalidate_authorizer,
 	.alloc_msg = alloc_msg,
+	.sign_message = sign_message,
+	.check_message_signature = check_message_signature,
 	.fault = osd_reset,
 };
diff --git a/security/integrity/ima/Kconfig b/security/integrity/ima/Kconfig
index b80a93ec1cc..57515bc915c 100644
--- a/security/integrity/ima/Kconfig
+++ b/security/integrity/ima/Kconfig
@@ -10,7 +10,7 @@ config IMA
 	select CRYPTO_HASH_INFO
 	select TCG_TPM if HAS_IOMEM && !UML
 	select TCG_TIS if TCG_TPM && X86
-	select TCG_IBMVTPM if TCG_TPM && PPC64
+	select TCG_IBMVTPM if TCG_TPM && PPC_PSERIES
 	help
 	  The Trusted Computing Group(TCG) runtime Integrity
 	  Measurement Architecture(IMA) maintains a list of hash
diff --git a/security/keys/encrypted-keys/encrypted.c b/security/keys/encrypted-keys/encrypted.c
index db9675db102..7bed4ad7cd7 100644
--- a/security/keys/encrypted-keys/encrypted.c
+++ b/security/keys/encrypted-keys/encrypted.c
@@ -1017,10 +1017,13 @@ static int __init init_encrypted(void)
 	ret = encrypted_shash_alloc();
 	if (ret < 0)
 		return ret;
+	ret = aes_get_sizes();
+	if (ret < 0)
+		goto out;
 	ret = register_key_type(&key_type_encrypted);
 	if (ret < 0)
 		goto out;
-	return aes_get_sizes();
+	return 0;
 out:
 	encrypted_shash_release();
 	return ret;
diff --git a/security/keys/key.c b/security/keys/key.c
index e17ba6aefdc..aee2ec5a18f 100644
--- a/security/keys/key.c
+++ b/security/keys/key.c
@@ -276,12 +276,10 @@ struct key *key_alloc(struct key_type *type, const char *desc,
 	if (!key)
 		goto no_memory_2;
 
-	if (desc) {
-		key->index_key.desc_len = desclen;
-		key->index_key.description = kmemdup(desc, desclen + 1, GFP_KERNEL);
-		if (!key->description)
-			goto no_memory_3;
-	}
+	key->index_key.desc_len = desclen;
+	key->index_key.description = kmemdup(desc, desclen + 1, GFP_KERNEL);
+	if (!key->description)
+		goto no_memory_3;
 
 	atomic_set(&key->usage, 1);
 	init_rwsem(&key->sem);
diff --git a/tools/testing/selftests/mount/unprivileged-remount-test.c b/tools/testing/selftests/mount/unprivileged-remount-test.c
index 1b3ff2fda4d..517785052f1 100644
--- a/tools/testing/selftests/mount/unprivileged-remount-test.c
+++ b/tools/testing/selftests/mount/unprivileged-remount-test.c
@@ -6,6 +6,8 @@
 #include <sys/types.h>
 #include <sys/mount.h>
 #include <sys/wait.h>
+#include <sys/vfs.h>
+#include <sys/statvfs.h>
 #include <stdlib.h>
 #include <unistd.h>
 #include <fcntl.h>
@@ -32,11 +34,14 @@
 # define CLONE_NEWPID 0x20000000
 #endif
 
+#ifndef MS_REC
+# define MS_REC 16384
+#endif
 #ifndef MS_RELATIME
-#define MS_RELATIME (1 << 21)
+# define MS_RELATIME (1 << 21)
 #endif
 #ifndef MS_STRICTATIME
-#define MS_STRICTATIME (1 << 24)
+# define MS_STRICTATIME (1 << 24)
 #endif
 
 static void die(char *fmt, ...)
@@ -48,17 +53,14 @@ static void die(char *fmt, ...)
 	exit(EXIT_FAILURE);
 }
 
-static void write_file(char *filename, char *fmt, ...)
+static void vmaybe_write_file(bool enoent_ok, char *filename, char *fmt, va_list ap)
 {
 	char buf[4096];
 	int fd;
 	ssize_t written;
 	int buf_len;
-	va_list ap;
 
-	va_start(ap, fmt);
 	buf_len = vsnprintf(buf, sizeof(buf), fmt, ap);
-	va_end(ap);
 	if (buf_len < 0) {
 		die("vsnprintf failed: %s\n",
 		    strerror(errno));
@@ -69,6 +71,8 @@ static void write_file(char *filename, char *fmt, ...)
 
 	fd = open(filename, O_WRONLY);
 	if (fd < 0) {
+		if ((errno == ENOENT) && enoent_ok)
+			return;
 		die("open of %s failed: %s\n",
 		    filename, strerror(errno));
 	}
@@ -87,6 +91,65 @@ static void write_file(char *filename, char *fmt, ...)
 	}
 }
 
+static void maybe_write_file(char *filename, char *fmt, ...)
+{
+	va_list ap;
+
+	va_start(ap, fmt);
+	vmaybe_write_file(true, filename, fmt, ap);
+	va_end(ap);
+
+}
+
+static void write_file(char *filename, char *fmt, ...)
+{
+	va_list ap;
+
+	va_start(ap, fmt);
+	vmaybe_write_file(false, filename, fmt, ap);
+	va_end(ap);
+
+}
+
+static int read_mnt_flags(const char *path)
+{
+	int ret;
+	struct statvfs stat;
+	int mnt_flags;
+
+	ret = statvfs(path, &stat);
+	if (ret != 0) {
+		die("statvfs of %s failed: %s\n",
+			path, strerror(errno));
+	}
+	if (stat.f_flag & ~(ST_RDONLY | ST_NOSUID | ST_NODEV | \
+			ST_NOEXEC | ST_NOATIME | ST_NODIRATIME | ST_RELATIME | \
+			ST_SYNCHRONOUS | ST_MANDLOCK)) {
+		die("Unrecognized mount flags\n");
+	}
+	mnt_flags = 0;
+	if (stat.f_flag & ST_RDONLY)
+		mnt_flags |= MS_RDONLY;
+	if (stat.f_flag & ST_NOSUID)
+		mnt_flags |= MS_NOSUID;
+	if (stat.f_flag & ST_NODEV)
+		mnt_flags |= MS_NODEV;
+	if (stat.f_flag & ST_NOEXEC)
+		mnt_flags |= MS_NOEXEC;
+	if (stat.f_flag & ST_NOATIME)
+		mnt_flags |= MS_NOATIME;
+	if (stat.f_flag & ST_NODIRATIME)
+		mnt_flags |= MS_NODIRATIME;
+	if (stat.f_flag & ST_RELATIME)
+		mnt_flags |= MS_RELATIME;
+	if (stat.f_flag & ST_SYNCHRONOUS)
+		mnt_flags |= MS_SYNCHRONOUS;
+	if (stat.f_flag & ST_MANDLOCK)
+		mnt_flags |= ST_MANDLOCK;
+
+	return mnt_flags;
+}
+
 static void create_and_enter_userns(void)
 {
 	uid_t uid;
@@ -100,13 +163,10 @@ static void create_and_enter_userns(void)
 			strerror(errno));
 	}
 
+	maybe_write_file("/proc/self/setgroups", "deny");
 	write_file("/proc/self/uid_map", "0 %d 1", uid);
 	write_file("/proc/self/gid_map", "0 %d 1", gid);
 
-	if (setgroups(0, NULL) != 0) {
-		die("setgroups failed: %s\n",
-			strerror(errno));
-	}
 	if (setgid(0) != 0) {
 		die ("setgid(0) failed %s\n",
 			strerror(errno));
@@ -118,7 +178,8 @@ static void create_and_enter_userns(void)
 }
 
 static
-bool test_unpriv_remount(int mount_flags, int remount_flags, int invalid_flags)
+bool test_unpriv_remount(const char *fstype, const char *mount_options,
+			 int mount_flags, int remount_flags, int invalid_flags)
 {
 	pid_t child;
 
@@ -151,9 +212,11 @@ bool test_unpriv_remount(int mount_flags, int remount_flags, int invalid_flags)
 			strerror(errno));
 	}
 
-	if (mount("testing", "/tmp", "ramfs", mount_flags, NULL) != 0) {
-		die("mount of /tmp failed: %s\n",
-			strerror(errno));
+	if (mount("testing", "/tmp", fstype, mount_flags, mount_options) != 0) {
+		die("mount of %s with options '%s' on /tmp failed: %s\n",
+		    fstype,
+		    mount_options? mount_options : "",
+		    strerror(errno));
 	}
 
 	create_and_enter_userns();
@@ -181,62 +244,127 @@ bool test_unpriv_remount(int mount_flags, int remount_flags, int invalid_flags)
 
 static bool test_unpriv_remount_simple(int mount_flags)
 {
-	return test_unpriv_remount(mount_flags, mount_flags, 0);
+	return test_unpriv_remount("ramfs", NULL, mount_flags, mount_flags, 0);
 }
 
 static bool test_unpriv_remount_atime(int mount_flags, int invalid_flags)
 {
-	return test_unpriv_remount(mount_flags, mount_flags, invalid_flags);
+	return test_unpriv_remount("ramfs", NULL, mount_flags, mount_flags,
+				   invalid_flags);
+}
+
+static bool test_priv_mount_unpriv_remount(void)
+{
+	pid_t child;
+	int ret;
+	const char *orig_path = "/dev";
+	const char *dest_path = "/tmp";
+	int orig_mnt_flags, remount_mnt_flags;
+
+	child = fork();
+	if (child == -1) {
+		die("fork failed: %s\n",
+			strerror(errno));
+	}
+	if (child != 0) { /* parent */
+		pid_t pid;
+		int status;
+		pid = waitpid(child, &status, 0);
+		if (pid == -1) {
+			die("waitpid failed: %s\n",
+				strerror(errno));
+		}
+		if (pid != child) {
+			die("waited for %d got %d\n",
+				child, pid);
+		}
+		if (!WIFEXITED(status)) {
+			die("child did not terminate cleanly\n");
+		}
+		return WEXITSTATUS(status) == EXIT_SUCCESS ? true : false;
+	}
+
+	orig_mnt_flags = read_mnt_flags(orig_path);
+
+	create_and_enter_userns();
+	ret = unshare(CLONE_NEWNS);
+	if (ret != 0) {
+		die("unshare(CLONE_NEWNS) failed: %s\n",
+			strerror(errno));
+	}
+
+	ret = mount(orig_path, dest_path, "bind", MS_BIND | MS_REC, NULL);
+	if (ret != 0) {
+		die("recursive bind mount of %s onto %s failed: %s\n",
+			orig_path, dest_path, strerror(errno));
+	}
+
+	ret = mount(dest_path, dest_path, "none",
+		    MS_REMOUNT | MS_BIND | orig_mnt_flags , NULL);
+	if (ret != 0) {
+		/* system("cat /proc/self/mounts"); */
+		die("remount of /tmp failed: %s\n",
+		    strerror(errno));
+	}
+
+	remount_mnt_flags = read_mnt_flags(dest_path);
+	if (orig_mnt_flags != remount_mnt_flags) {
+		die("Mount flags unexpectedly changed during remount of %s originally mounted on %s\n",
+			dest_path, orig_path);
+	}
+	exit(EXIT_SUCCESS);
 }
 
 int main(int argc, char **argv)
 {
-	if (!test_unpriv_remount_simple(MS_RDONLY|MS_NODEV)) {
+	if (!test_unpriv_remount_simple(MS_RDONLY)) {
 		die("MS_RDONLY malfunctions\n");
 	}
-	if (!test_unpriv_remount_simple(MS_NODEV)) {
+	if (!test_unpriv_remount("devpts", "newinstance", MS_NODEV, MS_NODEV, 0)) {
 		die("MS_NODEV malfunctions\n");
 	}
-	if (!test_unpriv_remount_simple(MS_NOSUID|MS_NODEV)) {
+	if (!test_unpriv_remount_simple(MS_NOSUID)) {
 		die("MS_NOSUID malfunctions\n");
 	}
-	if (!test_unpriv_remount_simple(MS_NOEXEC|MS_NODEV)) {
+	if (!test_unpriv_remount_simple(MS_NOEXEC)) {
 		die("MS_NOEXEC malfunctions\n");
 	}
-	if (!test_unpriv_remount_atime(MS_RELATIME|MS_NODEV,
-				       MS_NOATIME|MS_NODEV))
+	if (!test_unpriv_remount_atime(MS_RELATIME,
+				       MS_NOATIME))
 	{
 		die("MS_RELATIME malfunctions\n");
 	}
-	if (!test_unpriv_remount_atime(MS_STRICTATIME|MS_NODEV,
-				       MS_NOATIME|MS_NODEV))
+	if (!test_unpriv_remount_atime(MS_STRICTATIME,
+				       MS_NOATIME))
 	{
 		die("MS_STRICTATIME malfunctions\n");
 	}
-	if (!test_unpriv_remount_atime(MS_NOATIME|MS_NODEV,
-				       MS_STRICTATIME|MS_NODEV))
+	if (!test_unpriv_remount_atime(MS_NOATIME,
+				       MS_STRICTATIME))
 	{
-		die("MS_RELATIME malfunctions\n");
+		die("MS_NOATIME malfunctions\n");
 	}
-	if (!test_unpriv_remount_atime(MS_RELATIME|MS_NODIRATIME|MS_NODEV,
-				       MS_NOATIME|MS_NODEV))
+	if (!test_unpriv_remount_atime(MS_RELATIME|MS_NODIRATIME,
+				       MS_NOATIME))
 	{
-		die("MS_RELATIME malfunctions\n");
+		die("MS_RELATIME|MS_NODIRATIME malfunctions\n");
 	}
-	if (!test_unpriv_remount_atime(MS_STRICTATIME|MS_NODIRATIME|MS_NODEV,
-				       MS_NOATIME|MS_NODEV))
+	if (!test_unpriv_remount_atime(MS_STRICTATIME|MS_NODIRATIME,
+				       MS_NOATIME))
 	{
-		die("MS_RELATIME malfunctions\n");
+		die("MS_STRICTATIME|MS_NODIRATIME malfunctions\n");
 	}
-	if (!test_unpriv_remount_atime(MS_NOATIME|MS_NODIRATIME|MS_NODEV,
-				       MS_STRICTATIME|MS_NODEV))
+	if (!test_unpriv_remount_atime(MS_NOATIME|MS_NODIRATIME,
+				       MS_STRICTATIME))
 	{
-		die("MS_RELATIME malfunctions\n");
+		die("MS_NOATIME|MS_DIRATIME malfunctions\n");
 	}
-	if (!test_unpriv_remount(MS_STRICTATIME|MS_NODEV, MS_NODEV,
-				 MS_NOATIME|MS_NODEV))
+	if (!test_unpriv_remount("ramfs", NULL, MS_STRICTATIME, 0, MS_NOATIME))
 	{
 		die("Default atime malfunctions\n");
 	}
+	if (!test_priv_mount_unpriv_remount()) {
+		die("Mount flags unexpectedly changed after remount\n");
+	}
 	return EXIT_SUCCESS;
 }
diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index 22fa819a9b6..1c0772b340d 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -61,12 +61,14 @@ static void timer_disarm(struct arch_timer_cpu *timer)
 
 static void kvm_timer_inject_irq(struct kvm_vcpu *vcpu)
 {
+	int ret;
 	struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
 
 	timer->cntv_ctl |= ARCH_TIMER_CTRL_IT_MASK;
-	kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id,
-			    timer->irq->irq,
-			    timer->irq->level);
+	ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id,
+				  timer->irq->irq,
+				  timer->irq->level);
+	WARN_ON(ret);
 }
 
 static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id)
@@ -307,12 +309,24 @@ void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu)
 	timer_disarm(timer);
 }
 
-int kvm_timer_init(struct kvm *kvm)
+void kvm_timer_enable(struct kvm *kvm)
 {
-	if (timecounter && wqueue) {
-		kvm->arch.timer.cntvoff = kvm_phys_timer_read();
+	if (kvm->arch.timer.enabled)
+		return;
+
+	/*
+	 * There is a potential race here between VCPUs starting for the first
+	 * time, which may be enabling the timer multiple times.  That doesn't
+	 * hurt though, because we're just setting a variable to the same
+	 * variable that it already was.  The important thing is that all
+	 * VCPUs have the enabled variable set, before entering the guest, if
+	 * the arch timers are enabled.
+	 */
+	if (timecounter && wqueue)
 		kvm->arch.timer.enabled = 1;
-	}
+}
 
-	return 0;
+void kvm_timer_init(struct kvm *kvm)
+{
+	kvm->arch.timer.cntvoff = kvm_phys_timer_read();
 }
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index aacdb59f30d..03affc7bf45 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -91,6 +91,7 @@
 #define ACCESS_WRITE_VALUE	(3 << 1)
 #define ACCESS_WRITE_MASK(x)	((x) & (3 << 1))
 
+static int vgic_init(struct kvm *kvm);
 static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu);
 static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu);
 static void vgic_update_state(struct kvm *kvm);
@@ -1607,7 +1608,7 @@ static int vgic_validate_injection(struct kvm_vcpu *vcpu, int irq, int level)
 	}
 }
 
-static bool vgic_update_irq_pending(struct kvm *kvm, int cpuid,
+static int vgic_update_irq_pending(struct kvm *kvm, int cpuid,
 				  unsigned int irq_num, bool level)
 {
 	struct vgic_dist *dist = &kvm->arch.vgic;
@@ -1643,9 +1644,10 @@ static bool vgic_update_irq_pending(struct kvm *kvm, int cpuid,
 			vgic_dist_irq_clear_level(vcpu, irq_num);
 			if (!vgic_dist_irq_soft_pend(vcpu, irq_num))
 				vgic_dist_irq_clear_pending(vcpu, irq_num);
-		} else {
-			vgic_dist_irq_clear_pending(vcpu, irq_num);
 		}
+
+		ret = false;
+		goto out;
 	}
 
 	enabled = vgic_irq_is_enabled(vcpu, irq_num);
@@ -1672,7 +1674,7 @@ static bool vgic_update_irq_pending(struct kvm *kvm, int cpuid,
 out:
 	spin_unlock(&dist->lock);
 
-	return ret;
+	return ret ? cpuid : -EINVAL;
 }
 
 /**
@@ -1692,11 +1694,26 @@ out:
 int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num,
 			bool level)
 {
-	if (likely(vgic_initialized(kvm)) &&
-	    vgic_update_irq_pending(kvm, cpuid, irq_num, level))
-		vgic_kick_vcpus(kvm);
+	int ret = 0;
+	int vcpu_id;
 
-	return 0;
+	if (unlikely(!vgic_initialized(kvm))) {
+		mutex_lock(&kvm->lock);
+		ret = vgic_init(kvm);
+		mutex_unlock(&kvm->lock);
+
+		if (ret)
+			goto out;
+	}
+
+	vcpu_id = vgic_update_irq_pending(kvm, cpuid, irq_num, level);
+	if (vcpu_id >= 0) {
+		/* kick the specified vcpu */
+		kvm_vcpu_kick(kvm_get_vcpu(kvm, vcpu_id));
+	}
+
+out:
+	return ret;
 }
 
 static irqreturn_t vgic_maintenance_handler(int irq, void *data)
@@ -1726,39 +1743,14 @@ static int vgic_vcpu_init_maps(struct kvm_vcpu *vcpu, int nr_irqs)
 
 	int sz = (nr_irqs - VGIC_NR_PRIVATE_IRQS) / 8;
 	vgic_cpu->pending_shared = kzalloc(sz, GFP_KERNEL);
-	vgic_cpu->vgic_irq_lr_map = kzalloc(nr_irqs, GFP_KERNEL);
+	vgic_cpu->vgic_irq_lr_map = kmalloc(nr_irqs, GFP_KERNEL);
 
 	if (!vgic_cpu->pending_shared || !vgic_cpu->vgic_irq_lr_map) {
 		kvm_vgic_vcpu_destroy(vcpu);
 		return -ENOMEM;
 	}
 
-	return 0;
-}
-
-/**
- * kvm_vgic_vcpu_init - Initialize per-vcpu VGIC state
- * @vcpu: pointer to the vcpu struct
- *
- * Initialize the vgic_cpu struct and vgic_dist struct fields pertaining to
- * this vcpu and enable the VGIC for this VCPU
- */
-static void kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
-{
-	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
-	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
-	int i;
-
-	for (i = 0; i < dist->nr_irqs; i++) {
-		if (i < VGIC_NR_PPIS)
-			vgic_bitmap_set_irq_val(&dist->irq_enabled,
-						vcpu->vcpu_id, i, 1);
-		if (i < VGIC_NR_PRIVATE_IRQS)
-			vgic_bitmap_set_irq_val(&dist->irq_cfg,
-						vcpu->vcpu_id, i, VGIC_CFG_EDGE);
-
-		vgic_cpu->vgic_irq_lr_map[i] = LR_EMPTY;
-	}
+	memset(vgic_cpu->vgic_irq_lr_map, LR_EMPTY, nr_irqs);
 
 	/*
 	 * Store the number of LRs per vcpu, so we don't have to go
@@ -1767,7 +1759,7 @@ static void kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
 	 */
 	vgic_cpu->nr_lr = vgic->nr_lr;
 
-	vgic_enable(vcpu);
+	return 0;
 }
 
 void kvm_vgic_destroy(struct kvm *kvm)
@@ -1798,20 +1790,21 @@ void kvm_vgic_destroy(struct kvm *kvm)
 	dist->irq_spi_cpu = NULL;
 	dist->irq_spi_target = NULL;
 	dist->irq_pending_on_cpu = NULL;
+	dist->nr_cpus = 0;
 }
 
 /*
  * Allocate and initialize the various data structures. Must be called
  * with kvm->lock held!
  */
-static int vgic_init_maps(struct kvm *kvm)
+static int vgic_init(struct kvm *kvm)
 {
 	struct vgic_dist *dist = &kvm->arch.vgic;
 	struct kvm_vcpu *vcpu;
 	int nr_cpus, nr_irqs;
-	int ret, i;
+	int ret, i, vcpu_id;
 
-	if (dist->nr_cpus)	/* Already allocated */
+	if (vgic_initialized(kvm))
 		return 0;
 
 	nr_cpus = dist->nr_cpus = atomic_read(&kvm->online_vcpus);
@@ -1859,16 +1852,28 @@ static int vgic_init_maps(struct kvm *kvm)
 	if (ret)
 		goto out;
 
-	kvm_for_each_vcpu(i, vcpu, kvm) {
+	for (i = VGIC_NR_PRIVATE_IRQS; i < dist->nr_irqs; i += 4)
+		vgic_set_target_reg(kvm, 0, i);
+
+	kvm_for_each_vcpu(vcpu_id, vcpu, kvm) {
 		ret = vgic_vcpu_init_maps(vcpu, nr_irqs);
 		if (ret) {
 			kvm_err("VGIC: Failed to allocate vcpu memory\n");
 			break;
 		}
-	}
 
-	for (i = VGIC_NR_PRIVATE_IRQS; i < dist->nr_irqs; i += 4)
-		vgic_set_target_reg(kvm, 0, i);
+		for (i = 0; i < dist->nr_irqs; i++) {
+			if (i < VGIC_NR_PPIS)
+				vgic_bitmap_set_irq_val(&dist->irq_enabled,
+							vcpu->vcpu_id, i, 1);
+			if (i < VGIC_NR_PRIVATE_IRQS)
+				vgic_bitmap_set_irq_val(&dist->irq_cfg,
+							vcpu->vcpu_id, i,
+							VGIC_CFG_EDGE);
+		}
+
+		vgic_enable(vcpu);
+	}
 
 out:
 	if (ret)
@@ -1878,25 +1883,23 @@ out:
 }
 
 /**
- * kvm_vgic_init - Initialize global VGIC state before running any VCPUs
+ * kvm_vgic_map_resources - Configure global VGIC state before running any VCPUs
  * @kvm: pointer to the kvm struct
  *
  * Map the virtual CPU interface into the VM before running any VCPUs.  We
  * can't do this at creation time, because user space must first set the
- * virtual CPU interface address in the guest physical address space.  Also
- * initialize the ITARGETSRn regs to 0 on the emulated distributor.
+ * virtual CPU interface address in the guest physical address space.
  */
-int kvm_vgic_init(struct kvm *kvm)
+int kvm_vgic_map_resources(struct kvm *kvm)
 {
-	struct kvm_vcpu *vcpu;
-	int ret = 0, i;
+	int ret = 0;
 
 	if (!irqchip_in_kernel(kvm))
 		return 0;
 
 	mutex_lock(&kvm->lock);
 
-	if (vgic_initialized(kvm))
+	if (vgic_ready(kvm))
 		goto out;
 
 	if (IS_VGIC_ADDR_UNDEF(kvm->arch.vgic.vgic_dist_base) ||
@@ -1906,7 +1909,11 @@ int kvm_vgic_init(struct kvm *kvm)
 		goto out;
 	}
 
-	ret = vgic_init_maps(kvm);
+	/*
+	 * Initialize the vgic if this hasn't already been done on demand by
+	 * accessing the vgic state from userspace.
+	 */
+	ret = vgic_init(kvm);
 	if (ret) {
 		kvm_err("Unable to allocate maps\n");
 		goto out;
@@ -1920,9 +1927,6 @@ int kvm_vgic_init(struct kvm *kvm)
 		goto out;
 	}
 
-	kvm_for_each_vcpu(i, vcpu, kvm)
-		kvm_vgic_vcpu_init(vcpu);
-
 	kvm->arch.vgic.ready = true;
 out:
 	if (ret)
@@ -2167,7 +2171,7 @@ static int vgic_attr_regs_access(struct kvm_device *dev,
 
 	mutex_lock(&dev->kvm->lock);
 
-	ret = vgic_init_maps(dev->kvm);
+	ret = vgic_init(dev->kvm);
 	if (ret)
 		goto out;
 
@@ -2289,7 +2293,7 @@ static int vgic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
 
 		mutex_lock(&dev->kvm->lock);
 
-		if (vgic_initialized(dev->kvm) || dev->kvm->arch.vgic.nr_irqs)
+		if (vgic_ready(dev->kvm) || dev->kvm->arch.vgic.nr_irqs)
 			ret = -EBUSY;
 		else
 			dev->kvm->arch.vgic.nr_irqs = val;
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index b0fb390943c..148b2392c76 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -36,9 +36,6 @@
 #include <linux/seqlock.h>
 #include <trace/events/kvm.h>
 
-#ifdef __KVM_HAVE_IOAPIC
-#include "ioapic.h"
-#endif
 #include "iodev.h"
 
 #ifdef CONFIG_HAVE_KVM_IRQFD
@@ -492,9 +489,7 @@ void kvm_register_irq_ack_notifier(struct kvm *kvm,
 	mutex_lock(&kvm->irq_lock);
 	hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list);
 	mutex_unlock(&kvm->irq_lock);
-#ifdef __KVM_HAVE_IOAPIC
 	kvm_vcpu_request_scan_ioapic(kvm);
-#endif
 }
 
 void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
@@ -504,9 +499,7 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
 	hlist_del_init_rcu(&kian->link);
 	mutex_unlock(&kvm->irq_lock);
 	synchronize_srcu(&kvm->irq_srcu);
-#ifdef __KVM_HAVE_IOAPIC
 	kvm_vcpu_request_scan_ioapic(kvm);
-#endif
 }
 #endif
 
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 3cee7b16705..f5283438ee0 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -124,15 +124,6 @@ int vcpu_load(struct kvm_vcpu *vcpu)
 
 	if (mutex_lock_killable(&vcpu->mutex))
 		return -EINTR;
-	if (unlikely(vcpu->pid != current->pids[PIDTYPE_PID].pid)) {
-		/* The thread running this VCPU changed. */
-		struct pid *oldpid = vcpu->pid;
-		struct pid *newpid = get_task_pid(current, PIDTYPE_PID);
-		rcu_assign_pointer(vcpu->pid, newpid);
-		if (oldpid)
-			synchronize_rcu();
-		put_pid(oldpid);
-	}
 	cpu = get_cpu();
 	preempt_notifier_register(&vcpu->preempt_notifier);
 	kvm_arch_vcpu_load(vcpu, cpu);
@@ -468,9 +459,6 @@ static struct kvm *kvm_create_vm(unsigned long type)
 	if (r)
 		goto out_err_no_disable;
 
-#ifdef CONFIG_HAVE_KVM_IRQCHIP
-	INIT_HLIST_HEAD(&kvm->mask_notifier_list);
-#endif
 #ifdef CONFIG_HAVE_KVM_IRQFD
 	INIT_HLIST_HEAD(&kvm->irq_ack_notifier_list);
 #endif
@@ -668,48 +656,46 @@ static int kvm_create_dirty_bitmap(struct kvm_memory_slot *memslot)
 	return 0;
 }
 
-static int cmp_memslot(const void *slot1, const void *slot2)
-{
-	struct kvm_memory_slot *s1, *s2;
-
-	s1 = (struct kvm_memory_slot *)slot1;
-	s2 = (struct kvm_memory_slot *)slot2;
-
-	if (s1->npages < s2->npages)
-		return 1;
-	if (s1->npages > s2->npages)
-		return -1;
-
-	return 0;
-}
-
 /*
- * Sort the memslots base on its size, so the larger slots
- * will get better fit.
+ * Insert memslot and re-sort memslots based on their GFN,
+ * so binary search could be used to lookup GFN.
+ * Sorting algorithm takes advantage of having initially
+ * sorted array and known changed memslot position.
  */
-static void sort_memslots(struct kvm_memslots *slots)
-{
-	int i;
-
-	sort(slots->memslots, KVM_MEM_SLOTS_NUM,
-	      sizeof(struct kvm_memory_slot), cmp_memslot, NULL);
-
-	for (i = 0; i < KVM_MEM_SLOTS_NUM; i++)
-		slots->id_to_index[slots->memslots[i].id] = i;
-}
-
 static void update_memslots(struct kvm_memslots *slots,
 			    struct kvm_memory_slot *new)
 {
-	if (new) {
-		int id = new->id;
-		struct kvm_memory_slot *old = id_to_memslot(slots, id);
-		unsigned long npages = old->npages;
+	int id = new->id;
+	int i = slots->id_to_index[id];
+	struct kvm_memory_slot *mslots = slots->memslots;
 
-		*old = *new;
-		if (new->npages != npages)
-			sort_memslots(slots);
+	WARN_ON(mslots[i].id != id);
+	if (!new->npages) {
+		new->base_gfn = 0;
+		if (mslots[i].npages)
+			slots->used_slots--;
+	} else {
+		if (!mslots[i].npages)
+			slots->used_slots++;
 	}
+
+	while (i < KVM_MEM_SLOTS_NUM - 1 &&
+	       new->base_gfn <= mslots[i + 1].base_gfn) {
+		if (!mslots[i + 1].npages)
+			break;
+		mslots[i] = mslots[i + 1];
+		slots->id_to_index[mslots[i].id] = i;
+		i++;
+	}
+	while (i > 0 &&
+	       new->base_gfn > mslots[i - 1].base_gfn) {
+		mslots[i] = mslots[i - 1];
+		slots->id_to_index[mslots[i].id] = i;
+		i--;
+	}
+
+	mslots[i] = *new;
+	slots->id_to_index[mslots[i].id] = i;
 }
 
 static int check_memory_region_flags(struct kvm_userspace_memory_region *mem)
@@ -727,7 +713,7 @@ static int check_memory_region_flags(struct kvm_userspace_memory_region *mem)
 }
 
 static struct kvm_memslots *install_new_memslots(struct kvm *kvm,
-		struct kvm_memslots *slots, struct kvm_memory_slot *new)
+		struct kvm_memslots *slots)
 {
 	struct kvm_memslots *old_memslots = kvm->memslots;
 
@@ -738,7 +724,6 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm,
 	WARN_ON(old_memslots->generation & 1);
 	slots->generation = old_memslots->generation + 1;
 
-	update_memslots(slots, new);
 	rcu_assign_pointer(kvm->memslots, slots);
 	synchronize_srcu_expedited(&kvm->srcu);
 
@@ -760,7 +745,7 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm,
  *
  * Discontiguous memory is allowed, mostly for framebuffers.
  *
- * Must be called holding mmap_sem for write.
+ * Must be called holding kvm->slots_lock for write.
  */
 int __kvm_set_memory_region(struct kvm *kvm,
 			    struct kvm_userspace_memory_region *mem)
@@ -866,15 +851,16 @@ int __kvm_set_memory_region(struct kvm *kvm,
 			goto out_free;
 	}
 
+	slots = kmemdup(kvm->memslots, sizeof(struct kvm_memslots),
+			GFP_KERNEL);
+	if (!slots)
+		goto out_free;
+
 	if ((change == KVM_MR_DELETE) || (change == KVM_MR_MOVE)) {
-		slots = kmemdup(kvm->memslots, sizeof(struct kvm_memslots),
-				GFP_KERNEL);
-		if (!slots)
-			goto out_free;
 		slot = id_to_memslot(slots, mem->slot);
 		slot->flags |= KVM_MEMSLOT_INVALID;
 
-		old_memslots = install_new_memslots(kvm, slots, NULL);
+		old_memslots = install_new_memslots(kvm, slots);
 
 		/* slot was deleted or moved, clear iommu mapping */
 		kvm_iommu_unmap_pages(kvm, &old);
@@ -886,6 +872,12 @@ int __kvm_set_memory_region(struct kvm *kvm,
 		 * 	- kvm_is_visible_gfn (mmu_check_roots)
 		 */
 		kvm_arch_flush_shadow_memslot(kvm, slot);
+
+		/*
+		 * We can re-use the old_memslots from above, the only difference
+		 * from the currently installed memslots is the invalid flag.  This
+		 * will get overwritten by update_memslots anyway.
+		 */
 		slots = old_memslots;
 	}
 
@@ -893,26 +885,14 @@ int __kvm_set_memory_region(struct kvm *kvm,
 	if (r)
 		goto out_slots;
 
-	r = -ENOMEM;
-	/*
-	 * We can re-use the old_memslots from above, the only difference
-	 * from the currently installed memslots is the invalid flag.  This
-	 * will get overwritten by update_memslots anyway.
-	 */
-	if (!slots) {
-		slots = kmemdup(kvm->memslots, sizeof(struct kvm_memslots),
-				GFP_KERNEL);
-		if (!slots)
-			goto out_free;
-	}
-
 	/* actual memory is freed via old in kvm_free_physmem_slot below */
 	if (change == KVM_MR_DELETE) {
 		new.dirty_bitmap = NULL;
 		memset(&new.arch, 0, sizeof(new.arch));
 	}
 
-	old_memslots = install_new_memslots(kvm, slots, &new);
+	update_memslots(slots, &new);
+	old_memslots = install_new_memslots(kvm, slots);
 
 	kvm_arch_commit_memory_region(kvm, mem, &old, change);
 
@@ -1799,10 +1779,6 @@ int kvm_vcpu_yield_to(struct kvm_vcpu *target)
 	rcu_read_unlock();
 	if (!task)
 		return ret;
-	if (task->flags & PF_VCPU) {
-		put_task_struct(task);
-		return ret;
-	}
 	ret = yield_to(task, 1);
 	put_task_struct(task);
 
@@ -2065,6 +2041,15 @@ static long kvm_vcpu_ioctl(struct file *filp,
 		r = -EINVAL;
 		if (arg)
 			goto out;
+		if (unlikely(vcpu->pid != current->pids[PIDTYPE_PID].pid)) {
+			/* The thread running this VCPU changed. */
+			struct pid *oldpid = vcpu->pid;
+			struct pid *newpid = get_task_pid(current, PIDTYPE_PID);
+			rcu_assign_pointer(vcpu->pid, newpid);
+			if (oldpid)
+				synchronize_rcu();
+			put_pid(oldpid);
+		}
 		r = kvm_arch_vcpu_ioctl_run(vcpu, vcpu->run);
 		trace_kvm_userspace_exit(vcpu->run->exit_reason, r);
 		break;
@@ -2599,8 +2584,6 @@ static long kvm_vm_ioctl(struct file *filp,
 		break;
 	default:
 		r = kvm_arch_vm_ioctl(filp, ioctl, arg);
-		if (r == -ENOTTY)
-			r = kvm_vm_ioctl_assigned_device(kvm, ioctl, arg);
 	}
 out:
 	return r;