diff options
332 files changed, 16203 insertions, 19134 deletions
diff --git a/Documentation/devicetree/bindings/input/cap1106.txt b/Documentation/devicetree/bindings/input/cap11xx.txt index 4b463904cba..7d0a3009771 100644 --- a/Documentation/devicetree/bindings/input/cap1106.txt +++ b/Documentation/devicetree/bindings/input/cap11xx.txt @@ -1,14 +1,16 @@ -Device tree bindings for Microchip CAP1106, 6 channel capacitive touch sensor +Device tree bindings for Microchip CAP11xx based capacitive touch sensors -The node for this driver must be a child of a I2C controller node, as the +The node for this device must be a child of a I2C controller node, as the device communication via I2C only. Required properties: - compatible: Must be "microchip,cap1106" + compatible: Must contain one of: + "microchip,cap1106" + "microchip,cap1126" + "microchip,cap1188" reg: The I2C slave address of the device. - Only 0x28 is valid. interrupts: Property describing the interrupt line the device's ALERT#/CM_IRQ# pin is connected to. @@ -26,6 +28,10 @@ Optional properties: Valid values are 1, 2, 4, and 8. By default, a gain of 1 is set. + microchip,irq-active-high: By default the interrupt pin is active low + open drain. This property allows using the active + high push-pull output. + linux,keycodes: Specifies an array of numeric keycode values to be used for the channels. If this property is omitted, KEY_A, KEY_B, etc are used as @@ -43,11 +49,11 @@ i2c_controller { autorepeat; microchip,sensor-gain = <2>; - linux,keycodes = <103 /* KEY_UP */ - 106 /* KEY_RIGHT */ - 108 /* KEY_DOWN */ - 105 /* KEY_LEFT */ - 109 /* KEY_PAGEDOWN */ - 104>; /* KEY_PAGEUP */ + linux,keycodes = <103>, /* KEY_UP */ + <106>, /* KEY_RIGHT */ + <108>, /* KEY_DOWN */ + <105>, /* KEY_LEFT */ + <109>, /* KEY_PAGEDOWN */ + <104>; /* KEY_PAGEUP */ }; } diff --git a/Documentation/devicetree/bindings/input/elan_i2c.txt b/Documentation/devicetree/bindings/input/elan_i2c.txt new file mode 100644 index 00000000000..ee3242c4ba6 --- /dev/null +++ b/Documentation/devicetree/bindings/input/elan_i2c.txt @@ -0,0 +1,34 @@ +Elantech I2C Touchpad + +Required properties: +- compatible: must be "elan,ekth3000". +- reg: I2C address of the chip. +- interrupt-parent: a phandle for the interrupt controller (see interrupt + binding[0]). +- interrupts: interrupt to which the chip is connected (see interrupt + binding[0]). + +Optional properties: +- wakeup-source: touchpad can be used as a wakeup source. +- pinctrl-names: should be "default" (see pinctrl binding [1]). +- pinctrl-0: a phandle pointing to the pin settings for the device (see + pinctrl binding [1]). +- vcc-supply: a phandle for the regulator supplying 3.3V power. + +[0]: Documentation/devicetree/bindings/interrupt-controller/interrupts.txt +[1]: Documentation/devicetree/bindings/pinctrl/pinctrl-bindings.txt + +Example: + &i2c1 { + /* ... */ + + touchpad@15 { + compatible = "elan,ekth3000"; + reg = <0x15>; + interrupt-parent = <&gpio4>; + interrupts = <0x0 IRQ_TYPE_EDGE_FALLING>; + wakeup-source; + }; + + /* ... */ + }; diff --git a/Documentation/devicetree/bindings/input/elants_i2c.txt b/Documentation/devicetree/bindings/input/elants_i2c.txt new file mode 100644 index 00000000000..a765232e644 --- /dev/null +++ b/Documentation/devicetree/bindings/input/elants_i2c.txt @@ -0,0 +1,33 @@ +Elantech I2C Touchscreen + +Required properties: +- compatible: must be "elan,ekth3500". +- reg: I2C address of the chip. +- interrupt-parent: a phandle for the interrupt controller (see interrupt + binding[0]). +- interrupts: interrupt to which the chip is connected (see interrupt + binding[0]). + +Optional properties: +- wakeup-source: touchscreen can be used as a wakeup source. +- pinctrl-names: should be "default" (see pinctrl binding [1]). +- pinctrl-0: a phandle pointing to the pin settings for the device (see + pinctrl binding [1]). + +[0]: Documentation/devicetree/bindings/interrupt-controller/interrupts.txt +[1]: Documentation/devicetree/bindings/pinctrl/pinctrl-bindings.txt + +Example: + &i2c1 { + /* ... */ + + touchscreen@10 { + compatible = "elan,ekth3500"; + reg = <0x10>; + interrupt-parent = <&gpio4>; + interrupts = <0x0 IRQ_TYPE_EDGE_FALLING>; + wakeup-source; + }; + + /* ... */ + }; diff --git a/Documentation/devicetree/bindings/input/gpio-keys.txt b/Documentation/devicetree/bindings/input/gpio-keys.txt index 5c2c02140a6..a4a38fcf2ed 100644 --- a/Documentation/devicetree/bindings/input/gpio-keys.txt +++ b/Documentation/devicetree/bindings/input/gpio-keys.txt @@ -10,10 +10,13 @@ Optional properties: Each button (key) is represented as a sub-node of "gpio-keys": Subnode properties: - - gpios: OF device-tree gpio specification. - label: Descriptive name of the key. - linux,code: Keycode to emit. +Required mutual exclusive subnode-properties: + - gpios: OF device-tree gpio specification. + - interrupts: the interrupt line for that input + Optional subnode-properties: - linux,input-type: Specify event type this button/key generates. If not specified defaults to <1> == EV_KEY. @@ -33,4 +36,9 @@ Example nodes: linux,code = <103>; gpios = <&gpio1 0 1>; }; + button@22 { + label = "GPIO Key DOWN"; + linux,code = <108>; + interrupts = <1 IRQ_TYPE_LEVEL_HIGH 7>; + }; ... diff --git a/Documentation/devicetree/bindings/mtd/atmel-nand.txt b/Documentation/devicetree/bindings/mtd/atmel-nand.txt index 6edc3b616e9..1fe6dde9849 100644 --- a/Documentation/devicetree/bindings/mtd/atmel-nand.txt +++ b/Documentation/devicetree/bindings/mtd/atmel-nand.txt @@ -5,7 +5,9 @@ Required properties: - reg : should specify localbus address and size used for the chip, and hardware ECC controller if available. If the hardware ECC is PMECC, it should contain address and size for - PMECC, PMECC Error Location controller and ROM which has lookup tables. + PMECC and PMECC Error Location controller. + The PMECC lookup table address and size in ROM is optional. If not + specified, driver will build it in runtime. - atmel,nand-addr-offset : offset for the address latch. - atmel,nand-cmd-offset : offset for the command latch. - #address-cells, #size-cells : Must be present if the device has sub-nodes @@ -27,7 +29,7 @@ Optional properties: are: 512, 1024. - atmel,pmecc-lookup-table-offset : includes two offsets of lookup table in ROM for different sector size. First one is for sector size 512, the next is for - sector size 1024. + sector size 1024. If not specified, driver will build the table in runtime. - nand-bus-width : 8 or 16 bus width if not present 8 - nand-on-flash-bbt: boolean to enable on flash bbt option if not present false - Nand Flash Controller(NFC) is a slave driver under Atmel nand flash diff --git a/Documentation/devicetree/bindings/mtd/diskonchip.txt b/Documentation/devicetree/bindings/mtd/diskonchip.txt new file mode 100644 index 00000000000..3e13bfdbea5 --- /dev/null +++ b/Documentation/devicetree/bindings/mtd/diskonchip.txt @@ -0,0 +1,15 @@ +M-Systems and Sandisk DiskOnChip devices + +M-System DiskOnChip G3 +====================== +The Sandisk (formerly M-Systems) docg3 is a nand device of 64M to 256MB. + +Required properties: + - compatible: should be "m-systems,diskonchip-g3" + - reg: register base and size + +Example: + docg3: flash@0 { + compatible = "m-systems,diskonchip-g3"; + reg = <0x0 0x2000>; + }; diff --git a/Documentation/devicetree/bindings/mtd/gpio-control-nand.txt b/Documentation/devicetree/bindings/mtd/gpio-control-nand.txt index 36ef07d3c90..af8915b41cc 100644 --- a/Documentation/devicetree/bindings/mtd/gpio-control-nand.txt +++ b/Documentation/devicetree/bindings/mtd/gpio-control-nand.txt @@ -11,8 +11,8 @@ Required properties: are made in native endianness. - #address-cells, #size-cells : Must be present if the device has sub-nodes representing partitions. -- gpios : specifies the gpio pins to control the NAND device. nwp is an - optional gpio and may be set to 0 if not present. +- gpios : Specifies the GPIO pins to control the NAND device. The order of + GPIO references is: RDY, nCE, ALE, CLE, and an optional nWP. Optional properties: - bank-width : Width (in bytes) of the device. If not present, the width @@ -35,11 +35,11 @@ gpio-nand@1,0 { reg = <1 0x0000 0x2>; #address-cells = <1>; #size-cells = <1>; - gpios = <&banka 1 0 /* rdy */ - &banka 2 0 /* nce */ - &banka 3 0 /* ale */ - &banka 4 0 /* cle */ - 0 /* nwp */>; + gpios = <&banka 1 0>, /* RDY */ + <&banka 2 0>, /* nCE */ + <&banka 3 0>, /* ALE */ + <&banka 4 0>, /* CLE */ + <0>; /* nWP */ partition@0 { ... diff --git a/Documentation/devicetree/bindings/mtd/sunxi-nand.txt b/Documentation/devicetree/bindings/mtd/sunxi-nand.txt new file mode 100644 index 00000000000..0273adb8638 --- /dev/null +++ b/Documentation/devicetree/bindings/mtd/sunxi-nand.txt @@ -0,0 +1,45 @@ +Allwinner NAND Flash Controller (NFC) + +Required properties: +- compatible : "allwinner,sun4i-a10-nand". +- reg : shall contain registers location and length for data and reg. +- interrupts : shall define the nand controller interrupt. +- #address-cells: shall be set to 1. Encode the nand CS. +- #size-cells : shall be set to 0. +- clocks : shall reference nand controller clocks. +- clock-names : nand controller internal clock names. Shall contain : + * "ahb" : AHB gating clock + * "mod" : nand controller clock + +Optional children nodes: +Children nodes represent the available nand chips. + +Optional properties: +- allwinner,rb : shall contain the native Ready/Busy ids. + or +- rb-gpios : shall contain the gpios used as R/B pins. +- nand-ecc-mode : one of the supported ECC modes ("hw", "hw_syndrome", "soft", + "soft_bch" or "none") + +see Documentation/devicetree/mtd/nand.txt for generic bindings. + + +Examples: +nfc: nand@01c03000 { + compatible = "allwinner,sun4i-a10-nand"; + reg = <0x01c03000 0x1000>; + interrupts = <0 37 1>; + clocks = <&ahb_gates 13>, <&nand_clk>; + clock-names = "ahb", "mod"; + #address-cells = <1>; + #size-cells = <0>; + pinctrl-names = "default"; + pinctrl-0 = <&nand_pins_a &nand_cs0_pins_a &nand_rb0_pins_a>; + status = "okay"; + + nand@0 { + reg = <0>; + allwinner,rb = <0>; + nand-ecc-mode = "soft_bch"; + }; +}; diff --git a/Documentation/devicetree/bindings/pwm/atmel-hlcdc-pwm.txt b/Documentation/devicetree/bindings/pwm/atmel-hlcdc-pwm.txt new file mode 100644 index 00000000000..cfda0d57d30 --- /dev/null +++ b/Documentation/devicetree/bindings/pwm/atmel-hlcdc-pwm.txt @@ -0,0 +1,29 @@ +Device-Tree bindings for Atmel's HLCDC (High-end LCD Controller) PWM driver + +The Atmel HLCDC PWM is subdevice of the HLCDC MFD device. +See ../mfd/atmel-hlcdc.txt for more details. + +Required properties: + - compatible: value should be one of the following: + "atmel,hlcdc-pwm" + - pinctr-names: the pin control state names. Should contain "default". + - pinctrl-0: should contain the pinctrl states described by pinctrl + default. + - #pwm-cells: should be set to 3. This PWM chip use the default 3 cells + bindings defined in pwm.txt in this directory. + +Example: + + hlcdc: hlcdc@f0030000 { + compatible = "atmel,sama5d3-hlcdc"; + reg = <0xf0030000 0x2000>; + clocks = <&lcdc_clk>, <&lcdck>, <&clk32k>; + clock-names = "periph_clk","sys_clk", "slow_clk"; + + hlcdc_pwm: hlcdc-pwm { + compatible = "atmel,hlcdc-pwm"; + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_lcd_pwm>; + #pwm-cells = <3>; + }; + }; diff --git a/Documentation/devicetree/bindings/pwm/pwm-bcm2835.txt b/Documentation/devicetree/bindings/pwm/pwm-bcm2835.txt new file mode 100644 index 00000000000..fb6fb31bc4c --- /dev/null +++ b/Documentation/devicetree/bindings/pwm/pwm-bcm2835.txt @@ -0,0 +1,30 @@ +BCM2835 PWM controller (Raspberry Pi controller) + +Required properties: +- compatible: should be "brcm,bcm2835-pwm" +- reg: physical base address and length of the controller's registers +- clock: This clock defines the base clock frequency of the PWM hardware + system, the period and the duty_cycle of the PWM signal is a multiple of + the base period. +- #pwm-cells: Should be 2. See pwm.txt in this directory for a description of + the cells format. + +Examples: + +pwm@2020c000 { + compatible = "brcm,bcm2835-pwm"; + reg = <0x2020c000 0x28>; + clocks = <&clk_pwm>; + #pwm-cells = <2>; +}; + +clocks { + .... + clk_pwm: pwm { + compatible = "fixed-clock"; + reg = <3>; + #clock-cells = <0>; + clock-frequency = <9200000>; + }; + .... +}; diff --git a/Documentation/devicetree/bindings/thermal/armada-thermal.txt b/Documentation/devicetree/bindings/thermal/armada-thermal.txt index 4cf024929a3..4698e0edc20 100644 --- a/Documentation/devicetree/bindings/thermal/armada-thermal.txt +++ b/Documentation/devicetree/bindings/thermal/armada-thermal.txt @@ -5,17 +5,9 @@ Required properties: - compatible: Should be set to one of the following: marvell,armada370-thermal marvell,armada375-thermal - marvell,armada375-z1-thermal marvell,armada380-thermal marvell,armadaxp-thermal - Note: As the name suggests, "marvell,armada375-z1-thermal" - applies for the SoC Z1 stepping only. On such stepping - some quirks need to be done and the register offset differs - from the one in the A0 stepping. - The operating system may auto-detect the SoC stepping and - update the compatible and register offsets at runtime. - - reg: Device's register space. Two entries are expected, see the examples below. The first one is required for the sensor register; diff --git a/Documentation/devicetree/bindings/thermal/rockchip-thermal.txt b/Documentation/devicetree/bindings/thermal/rockchip-thermal.txt new file mode 100644 index 00000000000..ef802de4957 --- /dev/null +++ b/Documentation/devicetree/bindings/thermal/rockchip-thermal.txt @@ -0,0 +1,68 @@ +* Temperature Sensor ADC (TSADC) on rockchip SoCs + +Required properties: +- compatible : "rockchip,rk3288-tsadc" +- reg : physical base address of the controller and length of memory mapped + region. +- interrupts : The interrupt number to the cpu. The interrupt specifier format + depends on the interrupt controller. +- clocks : Must contain an entry for each entry in clock-names. +- clock-names : Shall be "tsadc" for the converter-clock, and "apb_pclk" for + the peripheral clock. +- resets : Must contain an entry for each entry in reset-names. + See ../reset/reset.txt for details. +- reset-names : Must include the name "tsadc-apb". +- #thermal-sensor-cells : Should be 1. See ./thermal.txt for a description. +- rockchip,hw-tshut-temp : The hardware-controlled shutdown temperature value. +- rockchip,hw-tshut-mode : The hardware-controlled shutdown mode 0:CRU 1:GPIO. +- rockchip,hw-tshut-polarity : The hardware-controlled active polarity 0:LOW + 1:HIGH. + +Exiample: +tsadc: tsadc@ff280000 { + compatible = "rockchip,rk3288-tsadc"; + reg = <0xff280000 0x100>; + interrupts = <GIC_SPI 37 IRQ_TYPE_LEVEL_HIGH>; + clocks = <&cru SCLK_TSADC>, <&cru PCLK_TSADC>; + clock-names = "tsadc", "apb_pclk"; + resets = <&cru SRST_TSADC>; + reset-names = "tsadc-apb"; + pinctrl-names = "default"; + pinctrl-0 = <&otp_out>; + #thermal-sensor-cells = <1>; + rockchip,hw-tshut-temp = <95000>; + rockchip,hw-tshut-mode = <0>; + rockchip,hw-tshut-polarity = <0>; +}; + +Example: referring to thermal sensors: +thermal-zones { + cpu_thermal: cpu_thermal { + polling-delay-passive = <1000>; /* milliseconds */ + polling-delay = <5000>; /* milliseconds */ + + /* sensor ID */ + thermal-sensors = <&tsadc 1>; + + trips { + cpu_alert0: cpu_alert { + temperature = <70000>; /* millicelsius */ + hysteresis = <2000>; /* millicelsius */ + type = "passive"; + }; + cpu_crit: cpu_crit { + temperature = <90000>; /* millicelsius */ + hysteresis = <2000>; /* millicelsius */ + type = "critical"; + }; + }; + + cooling-maps { + map0 { + trip = <&cpu_alert0>; + cooling-device = + <&cpu0 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>; + }; + }; + }; +}; diff --git a/Documentation/devicetree/bindings/thermal/tegra-soctherm.txt b/Documentation/devicetree/bindings/thermal/tegra-soctherm.txt new file mode 100644 index 00000000000..ecf3ed76cd4 --- /dev/null +++ b/Documentation/devicetree/bindings/thermal/tegra-soctherm.txt @@ -0,0 +1,53 @@ +Tegra124 SOCTHERM thermal management system + +The SOCTHERM IP block contains thermal sensors, support for polled +or interrupt-based thermal monitoring, CPU and GPU throttling based +on temperature trip points, and handling external overcurrent +notifications. It is also used to manage emergency shutdown in an +overheating situation. + +Required properties : +- compatible : "nvidia,tegra124-soctherm". +- reg : Should contain 1 entry: + - SOCTHERM register set +- interrupts : Defines the interrupt used by SOCTHERM +- clocks : Must contain an entry for each entry in clock-names. + See ../clocks/clock-bindings.txt for details. +- clock-names : Must include the following entries: + - tsensor + - soctherm +- resets : Must contain an entry for each entry in reset-names. + See ../reset/reset.txt for details. +- reset-names : Must include the following entries: + - soctherm +- #thermal-sensor-cells : Should be 1. See ./thermal.txt for a description + of this property. See <dt-bindings/thermal/tegra124-soctherm.h> for a + list of valid values when referring to thermal sensors. + + +Example : + + soctherm@0,700e2000 { + compatible = "nvidia,tegra124-soctherm"; + reg = <0x0 0x700e2000 0x0 0x1000>; + interrupts = <GIC_SPI 48 IRQ_TYPE_LEVEL_HIGH>; + clocks = <&tegra_car TEGRA124_CLK_TSENSOR>, + <&tegra_car TEGRA124_CLK_SOC_THERM>; + clock-names = "tsensor", "soctherm"; + resets = <&tegra_car 78>; + reset-names = "soctherm"; + + #thermal-sensor-cells = <1>; + }; + +Example: referring to thermal sensors : + + thermal-zones { + cpu { + polling-delay-passive = <1000>; + polling-delay = <1000>; + + thermal-sensors = + <&soctherm TEGRA124_SOCTHERM_SENSOR_CPU>; + }; + }; diff --git a/Documentation/devicetree/bindings/vendor-prefixes.txt b/Documentation/devicetree/bindings/vendor-prefixes.txt index 423d47418e7..b1df0ad1306 100644 --- a/Documentation/devicetree/bindings/vendor-prefixes.txt +++ b/Documentation/devicetree/bindings/vendor-prefixes.txt @@ -47,6 +47,7 @@ dlink D-Link Corporation dmo Data Modul AG ebv EBV Elektronik edt Emerging Display Technologies +elan Elan Microelectronic Corp. emmicro EM Microelectronic energymicro Silicon Laboratories (formerly Energy Micro AS) epcos EPCOS AG diff --git a/Documentation/ia64/kvm.txt b/Documentation/ia64/kvm.txt deleted file mode 100644 index ffb5c80bec3..00000000000 --- a/Documentation/ia64/kvm.txt +++ /dev/null @@ -1,83 +0,0 @@ -Currently, kvm module is in EXPERIMENTAL stage on IA64. This means that -interfaces are not stable enough to use. So, please don't run critical -applications in virtual machine. -We will try our best to improve it in future versions! - - Guide: How to boot up guests on kvm/ia64 - -This guide is to describe how to enable kvm support for IA-64 systems. - -1. Get the kvm source from git.kernel.org. - Userspace source: - git clone git://git.kernel.org/pub/scm/virt/kvm/kvm-userspace.git - Kernel Source: - git clone git://git.kernel.org/pub/scm/linux/kernel/git/xiantao/kvm-ia64.git - -2. Compile the source code. - 2.1 Compile userspace code: - (1)cd ./kvm-userspace - (2)./configure - (3)cd kernel - (4)make sync LINUX= $kernel_dir (kernel_dir is the directory of kernel source.) - (5)cd .. - (6)make qemu - (7)cd qemu; make install - - 2.2 Compile kernel source code: - (1) cd ./$kernel_dir - (2) Make menuconfig - (3) Enter into virtualization option, and choose kvm. - (4) make - (5) Once (4) done, make modules_install - (6) Make initrd, and use new kernel to reboot up host machine. - (7) Once (6) done, cd $kernel_dir/arch/ia64/kvm - (8) insmod kvm.ko; insmod kvm-intel.ko - -Note: For step 2, please make sure that host page size == TARGET_PAGE_SIZE of qemu, otherwise, may fail. - -3. Get Guest Firmware named as Flash.fd, and put it under right place: - (1) If you have the guest firmware (binary) released by Intel Corp for Xen, use it directly. - - (2) If you have no firmware at hand, Please download its source from - hg clone http://xenbits.xensource.com/ext/efi-vfirmware.hg - you can get the firmware's binary in the directory of efi-vfirmware.hg/binaries. - - (3) Rename the firmware you owned to Flash.fd, and copy it to /usr/local/share/qemu - -4. Boot up Linux or Windows guests: - 4.1 Create or install a image for guest boot. If you have xen experience, it should be easy. - - 4.2 Boot up guests use the following command. - /usr/local/bin/qemu-system-ia64 -smp xx -m 512 -hda $your_image - (xx is the number of virtual processors for the guest, now the maximum value is 4) - -5. Known possible issue on some platforms with old Firmware. - -In the event of strange host crash issues, try to solve it through either of the following ways: - -(1): Upgrade your Firmware to the latest one. - -(2): Applying the below patch to kernel source. -diff --git a/arch/ia64/kernel/pal.S b/arch/ia64/kernel/pal.S -index 0b53344..f02b0f7 100644 ---- a/arch/ia64/kernel/pal.S -+++ b/arch/ia64/kernel/pal.S -@@ -84,7 +84,8 @@ GLOBAL_ENTRY(ia64_pal_call_static) - mov ar.pfs = loc1 - mov rp = loc0 - ;; -- srlz.d // serialize restoration of psr.l -+ srlz.i // serialize restoration of psr.l -+ ;; - br.ret.sptk.many b0 - END(ia64_pal_call_static) - -6. Bug report: - If you found any issues when use kvm/ia64, Please post the bug info to kvm-ia64-devel mailing list. - https://lists.sourceforge.net/lists/listinfo/kvm-ia64-devel/ - -Thanks for your interest! Let's work together, and make kvm/ia64 stronger and stronger! - - - Xiantao Zhang <xiantao.zhang@intel.com> - 2008.3.10 diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 7610eaa4d49..0007fef4ed8 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -68,9 +68,12 @@ description: Capability: which KVM extension provides this ioctl. Can be 'basic', which means that is will be provided by any kernel that supports - API version 12 (see section 4.1), or a KVM_CAP_xyz constant, which + API version 12 (see section 4.1), a KVM_CAP_xyz constant, which means availability needs to be checked with KVM_CHECK_EXTENSION - (see section 4.4). + (see section 4.4), or 'none' which means that while not all kernels + support this ioctl, there's no capability bit to check its + availability: for kernels that don't support the ioctl, + the ioctl returns -ENOTTY. Architectures: which instruction set architectures provide this ioctl. x86 includes both i386 and x86_64. @@ -604,7 +607,7 @@ struct kvm_fpu { 4.24 KVM_CREATE_IRQCHIP Capability: KVM_CAP_IRQCHIP, KVM_CAP_S390_IRQCHIP (s390) -Architectures: x86, ia64, ARM, arm64, s390 +Architectures: x86, ARM, arm64, s390 Type: vm ioctl Parameters: none Returns: 0 on success, -1 on error @@ -612,7 +615,7 @@ Returns: 0 on success, -1 on error Creates an interrupt controller model in the kernel. On x86, creates a virtual ioapic, a virtual PIC (two PICs, nested), and sets up future vcpus to have a local APIC. IRQ routing for GSIs 0-15 is set to both PIC and IOAPIC; GSI 16-23 -only go to the IOAPIC. On ia64, a IOSAPIC is created. On ARM/arm64, a GIC is +only go to the IOAPIC. On ARM/arm64, a GIC is created. On s390, a dummy irq routing table is created. Note that on s390 the KVM_CAP_S390_IRQCHIP vm capability needs to be enabled @@ -622,7 +625,7 @@ before KVM_CREATE_IRQCHIP can be used. 4.25 KVM_IRQ_LINE Capability: KVM_CAP_IRQCHIP -Architectures: x86, ia64, arm, arm64 +Architectures: x86, arm, arm64 Type: vm ioctl Parameters: struct kvm_irq_level Returns: 0 on success, -1 on error @@ -676,7 +679,7 @@ struct kvm_irq_level { 4.26 KVM_GET_IRQCHIP Capability: KVM_CAP_IRQCHIP -Architectures: x86, ia64 +Architectures: x86 Type: vm ioctl Parameters: struct kvm_irqchip (in/out) Returns: 0 on success, -1 on error @@ -698,7 +701,7 @@ struct kvm_irqchip { 4.27 KVM_SET_IRQCHIP Capability: KVM_CAP_IRQCHIP -Architectures: x86, ia64 +Architectures: x86 Type: vm ioctl Parameters: struct kvm_irqchip (in) Returns: 0 on success, -1 on error @@ -991,7 +994,7 @@ for vm-wide capabilities. 4.38 KVM_GET_MP_STATE Capability: KVM_CAP_MP_STATE -Architectures: x86, ia64, s390 +Architectures: x86, s390 Type: vcpu ioctl Parameters: struct kvm_mp_state (out) Returns: 0 on success; -1 on error @@ -1005,16 +1008,15 @@ uniprocessor guests). Possible values are: - - KVM_MP_STATE_RUNNABLE: the vcpu is currently running [x86, ia64] + - KVM_MP_STATE_RUNNABLE: the vcpu is currently running [x86] - KVM_MP_STATE_UNINITIALIZED: the vcpu is an application processor (AP) - which has not yet received an INIT signal [x86, - ia64] + which has not yet received an INIT signal [x86] - KVM_MP_STATE_INIT_RECEIVED: the vcpu has received an INIT signal, and is - now ready for a SIPI [x86, ia64] + now ready for a SIPI [x86] - KVM_MP_STATE_HALTED: the vcpu has executed a HLT instruction and - is waiting for an interrupt [x86, ia64] + is waiting for an interrupt [x86] - KVM_MP_STATE_SIPI_RECEIVED: the vcpu has just received a SIPI (vector - accessible via KVM_GET_VCPU_EVENTS) [x86, ia64] + accessible via KVM_GET_VCPU_EVENTS) [x86] - KVM_MP_STATE_STOPPED: the vcpu is stopped [s390] - KVM_MP_STATE_CHECK_STOP: the vcpu is in a special error state [s390] - KVM_MP_STATE_OPERATING: the vcpu is operating (running or halted) @@ -1022,7 +1024,7 @@ Possible values are: - KVM_MP_STATE_LOAD: the vcpu is in a special load/startup state [s390] -On x86 and ia64, this ioctl is only useful after KVM_CREATE_IRQCHIP. Without an +On x86, this ioctl is only useful after KVM_CREATE_IRQCHIP. Without an in-kernel irqchip, the multiprocessing state must be maintained by userspace on these architectures. @@ -1030,7 +1032,7 @@ these architectures. 4.39 KVM_SET_MP_STATE Capability: KVM_CAP_MP_STATE -Architectures: x86, ia64, s390 +Architectures: x86, s390 Type: vcpu ioctl Parameters: struct kvm_mp_state (in) Returns: 0 on success; -1 on error @@ -1038,7 +1040,7 @@ Returns: 0 on success; -1 on error Sets the vcpu's current "multiprocessing state"; see KVM_GET_MP_STATE for arguments. -On x86 and ia64, this ioctl is only useful after KVM_CREATE_IRQCHIP. Without an +On x86, this ioctl is only useful after KVM_CREATE_IRQCHIP. Without an in-kernel irqchip, the multiprocessing state must be maintained by userspace on these architectures. @@ -1065,7 +1067,7 @@ documentation when it pops into existence). 4.41 KVM_SET_BOOT_CPU_ID Capability: KVM_CAP_SET_BOOT_CPU_ID -Architectures: x86, ia64 +Architectures: x86 Type: vm ioctl Parameters: unsigned long vcpu_id Returns: 0 on success, -1 on error @@ -1257,8 +1259,8 @@ The flags bitmap is defined as: 4.48 KVM_ASSIGN_PCI_DEVICE -Capability: KVM_CAP_DEVICE_ASSIGNMENT -Architectures: x86 ia64 +Capability: none +Architectures: x86 Type: vm ioctl Parameters: struct kvm_assigned_pci_dev (in) Returns: 0 on success, -1 on error @@ -1298,25 +1300,36 @@ Only PCI header type 0 devices with PCI BAR resources are supported by device assignment. The user requesting this ioctl must have read/write access to the PCI sysfs resource files associated with the device. +Errors: + ENOTTY: kernel does not support this ioctl + + Other error conditions may be defined by individual device types or + have their standard meanings. + 4.49 KVM_DEASSIGN_PCI_DEVICE -Capability: KVM_CAP_DEVICE_DEASSIGNMENT -Architectures: x86 ia64 +Capability: none +Architectures: x86 Type: vm ioctl Parameters: struct kvm_assigned_pci_dev (in) Returns: 0 on success, -1 on error Ends PCI device assignment, releasing all associated resources. -See KVM_CAP_DEVICE_ASSIGNMENT for the data structure. Only assigned_dev_id is +See KVM_ASSIGN_PCI_DEVICE for the data structure. Only assigned_dev_id is used in kvm_assigned_pci_dev to identify the device. +Errors: + ENOTTY: kernel does not support this ioctl + + Other error conditions may be defined by individual device types or + have their standard meanings. 4.50 KVM_ASSIGN_DEV_IRQ Capability: KVM_CAP_ASSIGN_DEV_IRQ -Architectures: x86 ia64 +Architectures: x86 Type: vm ioctl Parameters: struct kvm_assigned_irq (in) Returns: 0 on success, -1 on error @@ -1346,11 +1359,17 @@ The following flags are defined: It is not valid to specify multiple types per host or guest IRQ. However, the IRQ type of host and guest can differ or can even be null. +Errors: + ENOTTY: kernel does not support this ioctl + + Other error conditions may be defined by individual device types or + have their standard meanings. + 4.51 KVM_DEASSIGN_DEV_IRQ Capability: KVM_CAP_ASSIGN_DEV_IRQ -Architectures: x86 ia64 +Architectures: x86 Type: vm ioctl Parameters: struct kvm_assigned_irq (in) Returns: 0 on success, -1 on error @@ -1365,7 +1384,7 @@ KVM_ASSIGN_DEV_IRQ. Partial deassignment of host or guest IRQ is allowed. 4.52 KVM_SET_GSI_ROUTING Capability: KVM_CAP_IRQ_ROUTING -Architectures: x86 ia64 s390 +Architectures: x86 s390 Type: vm ioctl Parameters: struct kvm_irq_routing (in) Returns: 0 on success, -1 on error @@ -1423,8 +1442,8 @@ struct kvm_irq_routing_s390_adapter { 4.53 KVM_ASSIGN_SET_MSIX_NR -Capability: KVM_CAP_DEVICE_MSIX -Architectures: x86 ia64 +Capability: none +Architectures: x86 Type: vm ioctl Parameters: struct kvm_assigned_msix_nr (in) Returns: 0 on success, -1 on error @@ -1445,8 +1464,8 @@ struct kvm_assigned_msix_nr { 4.54 KVM_ASSIGN_SET_MSIX_ENTRY -Capability: KVM_CAP_DEVICE_MSIX -Architectures: x86 ia64 +Capability: none +Architectures: x86 Type: vm ioctl Parameters: struct kvm_assigned_msix_entry (in) Returns: 0 on success, -1 on error @@ -1461,6 +1480,12 @@ struct kvm_assigned_msix_entry { __u16 padding[3]; }; +Errors: + ENOTTY: kernel does not support this ioctl + + Other error conditions may be defined by individual device types or + have their standard meanings. + 4.55 KVM_SET_TSC_KHZ @@ -2453,9 +2478,15 @@ return ENOEXEC for that vcpu. Note that because some registers reflect machine topology, all vcpus should be created before this ioctl is invoked. +Userspace can call this function multiple times for a given vcpu, including +after the vcpu has been run. This will reset the vcpu to its initial +state. All calls to this function after the initial call must use the same +target and same set of feature flags, otherwise EINVAL will be returned. + Possible features: - KVM_ARM_VCPU_POWER_OFF: Starts the CPU in a power-off state. - Depends on KVM_CAP_ARM_PSCI. + Depends on KVM_CAP_ARM_PSCI. If not set, the CPU will be powered on + and execute guest code when KVM_RUN is called. - KVM_ARM_VCPU_EL1_32BIT: Starts the CPU in a 32bit mode. Depends on KVM_CAP_ARM_EL1_32BIT (arm64 only). - KVM_ARM_VCPU_PSCI_0_2: Emulate PSCI v0.2 for the CPU. @@ -2951,6 +2982,15 @@ HVC instruction based PSCI call from the vcpu. The 'type' field describes the system-level event type. The 'flags' field describes architecture specific flags for the system-level event. +Valid values for 'type' are: + KVM_SYSTEM_EVENT_SHUTDOWN -- the guest has requested a shutdown of the + VM. Userspace is not obliged to honour this, and if it does honour + this does not need to destroy the VM synchronously (ie it may call + KVM_RUN again before shutdown finally occurs). + KVM_SYSTEM_EVENT_RESET -- the guest has requested a reset of the VM. + As with SHUTDOWN, userspace can choose to ignore the request, or + to schedule the reset to occur in the future and may call KVM_RUN again. + /* Fix the size of the union. */ char padding[256]; }; diff --git a/Documentation/virtual/kvm/devices/vm.txt b/Documentation/virtual/kvm/devices/vm.txt index 0d16f96c0ea..d426fc87fe9 100644 --- a/Documentation/virtual/kvm/devices/vm.txt +++ b/Documentation/virtual/kvm/devices/vm.txt @@ -12,14 +12,14 @@ specific. 1. GROUP: KVM_S390_VM_MEM_CTRL Architectures: s390 -1.1. ATTRIBUTE: KVM_S390_VM_MEM_CTRL +1.1. ATTRIBUTE: KVM_S390_VM_MEM_ENABLE_CMMA Parameters: none -Returns: -EBUSY if already a vcpus is defined, otherwise 0 +Returns: -EBUSY if a vcpu is already defined, otherwise 0 -Enables CMMA for the virtual machine +Enables Collaborative Memory Management Assist (CMMA) for the virtual machine. -1.2. ATTRIBUTE: KVM_S390_VM_CLR_CMMA -Parameteres: none +1.2. ATTRIBUTE: KVM_S390_VM_MEM_CLR_CMMA +Parameters: none Returns: 0 Clear the CMMA status for all guest pages, so any pages the guest marked diff --git a/Documentation/virtual/kvm/msr.txt b/Documentation/virtual/kvm/msr.txt index 6d470ae7b07..2a71c8f29f6 100644 --- a/Documentation/virtual/kvm/msr.txt +++ b/Documentation/virtual/kvm/msr.txt @@ -168,7 +168,7 @@ MSR_KVM_ASYNC_PF_EN: 0x4b564d02 64 byte memory area which must be in guest RAM and must be zeroed. Bits 5-2 are reserved and should be zero. Bit 0 is 1 when asynchronous page faults are enabled on the vcpu 0 when - disabled. Bit 2 is 1 if asynchronous page faults can be injected + disabled. Bit 1 is 1 if asynchronous page faults can be injected when vcpu is in cpl == 0. First 4 byte of 64 byte memory location will be written to by diff --git a/MAINTAINERS b/MAINTAINERS index 4aac6c8dce7..7605833aabc 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4255,6 +4255,12 @@ L: linux-media@vger.kernel.org S: Maintained F: drivers/media/usb/go7007/ +GOODIX TOUCHSCREEN +M: Bastien Nocera <hadess@hadess.net> +L: linux-input@vger.kernel.org +S: Maintained +F: drivers/input/touchscreen/goodix.c + GPIO SUBSYSTEM M: Linus Walleij <linus.walleij@linaro.org> M: Alexandre Courbot <gnurou@gmail.com> @@ -5489,15 +5495,6 @@ S: Supported F: arch/powerpc/include/asm/kvm* F: arch/powerpc/kvm/ -KERNEL VIRTUAL MACHINE For Itanium (KVM/IA64) -M: Xiantao Zhang <xiantao.zhang@intel.com> -L: kvm-ia64@vger.kernel.org -W: http://kvm.qumranet.com -S: Supported -F: Documentation/ia64/kvm.txt -F: arch/ia64/include/asm/kvm* -F: arch/ia64/kvm/ - KERNEL VIRTUAL MACHINE for s390 (KVM/s390) M: Christian Borntraeger <borntraeger@de.ibm.com> M: Cornelia Huck <cornelia.huck@de.ibm.com> @@ -9510,6 +9507,7 @@ Q: https://patchwork.kernel.org/project/linux-pm/list/ S: Supported F: drivers/thermal/ F: include/linux/thermal.h +F: include/uapi/linux/thermal.h F: include/linux/cpu_cooling.h F: Documentation/devicetree/bindings/thermal/ diff --git a/arch/arm/boot/dts/tegra124-jetson-tk1.dts b/arch/arm/boot/dts/tegra124-jetson-tk1.dts index 51b373ff106..4eb540be368 100644 --- a/arch/arm/boot/dts/tegra124-jetson-tk1.dts +++ b/arch/arm/boot/dts/tegra124-jetson-tk1.dts @@ -1942,4 +1942,48 @@ <&tegra_car TEGRA124_CLK_EXTERN1>; clock-names = "pll_a", "pll_a_out0", "mclk"; }; + + thermal-zones { + cpu { + trips { + trip@0 { + temperature = <101000>; + hysteresis = <0>; + type = "critical"; + }; + }; + + cooling-maps { + /* There are currently no cooling maps because there are no cooling devices */ + }; + }; + + mem { + trips { + trip@0 { + temperature = <101000>; + hysteresis = <0>; + type = "critical"; + }; + }; + + cooling-maps { + /* There are currently no cooling maps because there are no cooling devices */ + }; + }; + + gpu { + trips { + trip@0 { + temperature = <101000>; + hysteresis = <0>; + type = "critical"; + }; + }; + + cooling-maps { + /* There are currently no cooling maps because there are no cooling devices */ + }; + }; + }; }; diff --git a/arch/arm/boot/dts/tegra124.dtsi b/arch/arm/boot/dts/tegra124.dtsi index 3ad2e3cf299..4be06c6ea0c 100644 --- a/arch/arm/boot/dts/tegra124.dtsi +++ b/arch/arm/boot/dts/tegra124.dtsi @@ -4,6 +4,7 @@ #include <dt-bindings/pinctrl/pinctrl-tegra.h> #include <dt-bindings/pinctrl/pinctrl-tegra-xusb.h> #include <dt-bindings/interrupt-controller/arm-gic.h> +#include <dt-bindings/thermal/tegra124-soctherm.h> #include "skeleton.dtsi" @@ -657,6 +658,18 @@ status = "disabled"; }; + soctherm: thermal-sensor@0,700e2000 { + compatible = "nvidia,tegra124-soctherm"; + reg = <0x0 0x700e2000 0x0 0x1000>; + interrupts = <GIC_SPI 48 IRQ_TYPE_LEVEL_HIGH>; + clocks = <&tegra_car TEGRA124_CLK_TSENSOR>, + <&tegra_car TEGRA124_CLK_SOC_THERM>; + clock-names = "tsensor", "soctherm"; + resets = <&tegra_car 78>; + reset-names = "soctherm"; + #thermal-sensor-cells = <1>; + }; + ahub@0,70300000 { compatible = "nvidia,tegra124-ahub"; reg = <0x0 0x70300000 0x0 0x200>, @@ -898,6 +911,40 @@ }; }; + thermal-zones { + cpu { + polling-delay-passive = <1000>; + polling-delay = <1000>; + + thermal-sensors = + <&soctherm TEGRA124_SOCTHERM_SENSOR_CPU>; + }; + + mem { + polling-delay-passive = <1000>; + polling-delay = <1000>; + + thermal-sensors = + <&soctherm TEGRA124_SOCTHERM_SENSOR_MEM>; + }; + + gpu { + polling-delay-passive = <1000>; + polling-delay = <1000>; + + thermal-sensors = + <&soctherm TEGRA124_SOCTHERM_SENSOR_GPU>; + }; + + pllx { + polling-delay-passive = <1000>; + polling-delay = <1000>; + + thermal-sensors = + <&soctherm TEGRA124_SOCTHERM_SENSOR_PLLX>; + }; + }; + timer { compatible = "arm,armv7-timer"; interrupts = <GIC_PPI 13 diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h index b9db269c6e6..66ce17655bb 100644 --- a/arch/arm/include/asm/kvm_emulate.h +++ b/arch/arm/include/asm/kvm_emulate.h @@ -33,6 +33,11 @@ void kvm_inject_undefined(struct kvm_vcpu *vcpu); void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr); void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr); +static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu) +{ + vcpu->arch.hcr = HCR_GUEST_MASK; +} + static inline bool vcpu_mode_is_32bit(struct kvm_vcpu *vcpu) { return 1; diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 53036e21756..254e0650e48 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -150,8 +150,6 @@ struct kvm_vcpu_stat { u32 halt_wakeup; }; -int kvm_vcpu_set_target(struct kvm_vcpu *vcpu, - const struct kvm_vcpu_init *init); int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init); unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu); int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices); diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index acb0d571271..63e0ecc0490 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -52,6 +52,7 @@ int create_hyp_io_mappings(void *from, void *to, phys_addr_t); void free_boot_hyp_pgd(void); void free_hyp_pgds(void); +void stage2_unmap_vm(struct kvm *kvm); int kvm_alloc_stage2_pgd(struct kvm *kvm); void kvm_free_stage2_pgd(struct kvm *kvm); int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, @@ -161,9 +162,10 @@ static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu) } static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva, - unsigned long size) + unsigned long size, + bool ipa_uncached) { - if (!vcpu_has_cache_enabled(vcpu)) + if (!vcpu_has_cache_enabled(vcpu) || ipa_uncached) kvm_flush_dcache_to_poc((void *)hva, size); /* diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 9e193c8a959..2d6d9100106 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -213,6 +213,11 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) int err; struct kvm_vcpu *vcpu; + if (irqchip_in_kernel(kvm) && vgic_initialized(kvm)) { + err = -EBUSY; + goto out; + } + vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); if (!vcpu) { err = -ENOMEM; @@ -263,6 +268,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) { /* Force users to call KVM_ARM_VCPU_INIT */ vcpu->arch.target = -1; + bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES); /* Set up the timer */ kvm_timer_vcpu_init(vcpu); @@ -419,6 +425,7 @@ static void update_vttbr(struct kvm *kvm) static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu) { + struct kvm *kvm = vcpu->kvm; int ret; if (likely(vcpu->arch.has_run_once)) @@ -427,15 +434,23 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu) vcpu->arch.has_run_once = true; /* - * Initialize the VGIC before running a vcpu the first time on - * this VM. + * Map the VGIC hardware resources before running a vcpu the first + * time on this VM. */ - if (unlikely(!vgic_initialized(vcpu->kvm))) { - ret = kvm_vgic_init(vcpu->kvm); + if (unlikely(!vgic_ready(kvm))) { + ret = kvm_vgic_map_resources(kvm); if (ret) return ret; } + /* + * Enable the arch timers only if we have an in-kernel VGIC + * and it has been properly initialized, since we cannot handle + * interrupts from the virtual timer with a userspace gic. + */ + if (irqchip_in_kernel(kvm) && vgic_initialized(kvm)) + kvm_timer_enable(kvm); + return 0; } @@ -649,6 +664,48 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level, return -EINVAL; } +static int kvm_vcpu_set_target(struct kvm_vcpu *vcpu, + const struct kvm_vcpu_init *init) +{ + unsigned int i; + int phys_target = kvm_target_cpu(); + + if (init->target != phys_target) + return -EINVAL; + + /* + * Secondary and subsequent calls to KVM_ARM_VCPU_INIT must + * use the same target. + */ + if (vcpu->arch.target != -1 && vcpu->arch.target != init->target) + return -EINVAL; + + /* -ENOENT for unknown features, -EINVAL for invalid combinations. */ + for (i = 0; i < sizeof(init->features) * 8; i++) { + bool set = (init->features[i / 32] & (1 << (i % 32))); + + if (set && i >= KVM_VCPU_MAX_FEATURES) + return -ENOENT; + + /* + * Secondary and subsequent calls to KVM_ARM_VCPU_INIT must + * use the same feature set. + */ + if (vcpu->arch.target != -1 && i < KVM_VCPU_MAX_FEATURES && + test_bit(i, vcpu->arch.features) != set) + return -EINVAL; + + if (set) + set_bit(i, vcpu->arch.features); + } + + vcpu->arch.target = phys_target; + + /* Now we know what it is, we can reset it. */ + return kvm_reset_vcpu(vcpu); +} + + static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init) { @@ -659,10 +716,21 @@ static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu, return ret; /* + * Ensure a rebooted VM will fault in RAM pages and detect if the + * guest MMU is turned off and flush the caches as needed. + */ + if (vcpu->arch.has_run_once) + stage2_unmap_vm(vcpu->kvm); + + vcpu_reset_hcr(vcpu); + + /* * Handle the "start in power-off" case by marking the VCPU as paused. */ - if (__test_and_clear_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features)) + if (test_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features)) vcpu->arch.pause = true; + else + vcpu->arch.pause = false; return 0; } diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c index cc0b78769bd..384bab67c46 100644 --- a/arch/arm/kvm/guest.c +++ b/arch/arm/kvm/guest.c @@ -38,7 +38,6 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) { - vcpu->arch.hcr = HCR_GUEST_MASK; return 0; } @@ -274,31 +273,6 @@ int __attribute_const__ kvm_target_cpu(void) } } -int kvm_vcpu_set_target(struct kvm_vcpu *vcpu, - const struct kvm_vcpu_init *init) -{ - unsigned int i; - - /* We can only cope with guest==host and only on A15/A7 (for now). */ - if (init->target != kvm_target_cpu()) - return -EINVAL; - - vcpu->arch.target = init->target; - bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES); - - /* -ENOENT for unknown features, -EINVAL for invalid combinations. */ - for (i = 0; i < sizeof(init->features) * 8; i++) { - if (test_bit(i, (void *)init->features)) { - if (i >= KVM_VCPU_MAX_FEATURES) - return -ENOENT; - set_bit(i, vcpu->arch.features); - } - } - - /* Now we know what it is, we can reset it. */ - return kvm_reset_vcpu(vcpu); -} - int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init) { int target = kvm_target_cpu(); diff --git a/arch/arm/kvm/mmio.c b/arch/arm/kvm/mmio.c index 4cb5a93182e..5d3bfc0eb3f 100644 --- a/arch/arm/kvm/mmio.c +++ b/arch/arm/kvm/mmio.c @@ -187,15 +187,18 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run, } rt = vcpu->arch.mmio_decode.rt; - data = vcpu_data_guest_to_host(vcpu, *vcpu_reg(vcpu, rt), mmio.len); - trace_kvm_mmio((mmio.is_write) ? KVM_TRACE_MMIO_WRITE : - KVM_TRACE_MMIO_READ_UNSATISFIED, - mmio.len, fault_ipa, - (mmio.is_write) ? data : 0); + if (mmio.is_write) { + data = vcpu_data_guest_to_host(vcpu, *vcpu_reg(vcpu, rt), + mmio.len); - if (mmio.is_write) + trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, mmio.len, + fault_ipa, data); mmio_write_buf(mmio.data, mmio.len, data); + } else { + trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, mmio.len, + fault_ipa, 0); + } if (vgic_handle_mmio(vcpu, run, &mmio)) return 1; diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 8664ff17cbb..1dc9778a00a 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -612,6 +612,71 @@ static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size) unmap_range(kvm, kvm->arch.pgd, start, size); } +static void stage2_unmap_memslot(struct kvm *kvm, + struct kvm_memory_slot *memslot) +{ + hva_t hva = memslot->userspace_addr; + phys_addr_t addr = memslot->base_gfn << PAGE_SHIFT; + phys_addr_t size = PAGE_SIZE * memslot->npages; + hva_t reg_end = hva + size; + + /* + * A memory region could potentially cover multiple VMAs, and any holes + * between them, so iterate over all of them to find out if we should + * unmap any of them. + * + * +--------------------------------------------+ + * +---------------+----------------+ +----------------+ + * | : VMA 1 | VMA 2 | | VMA 3 : | + * +---------------+----------------+ +----------------+ + * | memory region | + * +--------------------------------------------+ + */ + do { + struct vm_area_struct *vma = find_vma(current->mm, hva); + hva_t vm_start, vm_end; + + if (!vma || vma->vm_start >= reg_end) + break; + + /* + * Take the intersection of this VMA with the memory region + */ + vm_start = max(hva, vma->vm_start); + vm_end = min(reg_end, vma->vm_end); + + if (!(vma->vm_flags & VM_PFNMAP)) { + gpa_t gpa = addr + (vm_start - memslot->userspace_addr); + unmap_stage2_range(kvm, gpa, vm_end - vm_start); + } + hva = vm_end; + } while (hva < reg_end); +} + +/** + * stage2_unmap_vm - Unmap Stage-2 RAM mappings + * @kvm: The struct kvm pointer + * + * Go through the memregions and unmap any reguler RAM + * backing memory already mapped to the VM. + */ +void stage2_unmap_vm(struct kvm *kvm) +{ + struct kvm_memslots *slots; + struct kvm_memory_slot *memslot; + int idx; + + idx = srcu_read_lock(&kvm->srcu); + spin_lock(&kvm->mmu_lock); + + slots = kvm_memslots(kvm); + kvm_for_each_memslot(memslot, slots) + stage2_unmap_memslot(kvm, memslot); + + spin_unlock(&kvm->mmu_lock); + srcu_read_unlock(&kvm->srcu, idx); +} + /** * kvm_free_stage2_pgd - free all stage-2 tables * @kvm: The KVM struct pointer for the VM. @@ -853,6 +918,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, struct vm_area_struct *vma; pfn_t pfn; pgprot_t mem_type = PAGE_S2; + bool fault_ipa_uncached; write_fault = kvm_is_write_fault(vcpu); if (fault_status == FSC_PERM && !write_fault) { @@ -919,6 +985,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, if (!hugetlb && !force_pte) hugetlb = transparent_hugepage_adjust(&pfn, &fault_ipa); + fault_ipa_uncached = memslot->flags & KVM_MEMSLOT_INCOHERENT; + if (hugetlb) { pmd_t new_pmd = pfn_pmd(pfn, mem_type); new_pmd = pmd_mkhuge(new_pmd); @@ -926,7 +994,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, kvm_set_s2pmd_writable(&new_pmd); kvm_set_pfn_dirty(pfn); } - coherent_cache_guest_page(vcpu, hva & PMD_MASK, PMD_SIZE); + coherent_cache_guest_page(vcpu, hva & PMD_MASK, PMD_SIZE, + fault_ipa_uncached); ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd); } else { pte_t new_pte = pfn_pte(pfn, mem_type); @@ -934,7 +1003,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, kvm_set_s2pte_writable(&new_pte); kvm_set_pfn_dirty(pfn); } - coherent_cache_guest_page(vcpu, hva, PAGE_SIZE); + coherent_cache_guest_page(vcpu, hva, PAGE_SIZE, + fault_ipa_uncached); ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte, pgprot_val(mem_type) == pgprot_val(PAGE_S2_DEVICE)); } @@ -1294,11 +1364,12 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, hva = vm_end; } while (hva < reg_end); - if (ret) { - spin_lock(&kvm->mmu_lock); + spin_lock(&kvm->mmu_lock); + if (ret) unmap_stage2_range(kvm, mem->guest_phys_addr, mem->memory_size); - spin_unlock(&kvm->mmu_lock); - } + else + stage2_flush_memslot(kvm, memslot); + spin_unlock(&kvm->mmu_lock); return ret; } @@ -1310,6 +1381,15 @@ void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, unsigned long npages) { + /* + * Readonly memslots are not incoherent with the caches by definition, + * but in practice, they are used mostly to emulate ROMs or NOR flashes + * that the guest may consider devices and hence map as uncached. + * To prevent incoherency issues in these cases, tag all readonly + * regions as incoherent. + */ + if (slot->flags & KVM_MEM_READONLY) + slot->flags |= KVM_MEMSLOT_INCOHERENT; return 0; } diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c index 09cf37737ee..58cb3248d27 100644 --- a/arch/arm/kvm/psci.c +++ b/arch/arm/kvm/psci.c @@ -15,6 +15,7 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. */ +#include <linux/preempt.h> #include <linux/kvm_host.h> #include <linux/wait.h> @@ -166,6 +167,23 @@ static unsigned long kvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu) static void kvm_prepare_system_event(struct kvm_vcpu *vcpu, u32 type) { + int i; + struct kvm_vcpu *tmp; + + /* + * The KVM ABI specifies that a system event exit may call KVM_RUN + * again and may perform shutdown/reboot at a later time that when the + * actual request is made. Since we are implementing PSCI and a + * caller of PSCI reboot and shutdown expects that the system shuts + * down or reboots immediately, let's make sure that VCPUs are not run + * after this call is handled and before the VCPUs have been + * re-initialized. + */ + kvm_for_each_vcpu(i, tmp, vcpu->kvm) { + tmp->arch.pause = true; + kvm_vcpu_kick(tmp); + } + memset(&vcpu->run->system_event, 0, sizeof(vcpu->run->system_event)); vcpu->run->system_event.type = type; vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT; diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 5674a55b551..8127e45e263 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -38,6 +38,11 @@ void kvm_inject_undefined(struct kvm_vcpu *vcpu); void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr); void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr); +static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu) +{ + vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS; +} + static inline unsigned long *vcpu_pc(const struct kvm_vcpu *vcpu) { return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.pc; diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 2012c4ba8d6..0b7dfdb931d 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -165,8 +165,6 @@ struct kvm_vcpu_stat { u32 halt_wakeup; }; -int kvm_vcpu_set_target(struct kvm_vcpu *vcpu, - const struct kvm_vcpu_init *init); int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init); unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu); int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices); @@ -200,6 +198,7 @@ struct kvm_vcpu *kvm_arm_get_running_vcpu(void); struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void); u64 kvm_call_hyp(void *hypfn, ...); +void force_vm_exit(const cpumask_t *mask); int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, int exception_index); diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 0caf7a59f6a..14a74f13627 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -83,6 +83,7 @@ int create_hyp_io_mappings(void *from, void *to, phys_addr_t); void free_boot_hyp_pgd(void); void free_hyp_pgds(void); +void stage2_unmap_vm(struct kvm *kvm); int kvm_alloc_stage2_pgd(struct kvm *kvm); void kvm_free_stage2_pgd(struct kvm *kvm); int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, @@ -243,9 +244,10 @@ static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu) } static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva, - unsigned long size) + unsigned long size, + bool ipa_uncached) { - if (!vcpu_has_cache_enabled(vcpu)) + if (!vcpu_has_cache_enabled(vcpu) || ipa_uncached) kvm_flush_dcache_to_poc((void *)hva, size); if (!icache_is_aliasing()) { /* PIPT */ diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 76794692c20..9535bd555d1 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -38,7 +38,6 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) { - vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS; return 0; } @@ -297,31 +296,6 @@ int __attribute_const__ kvm_target_cpu(void) return -EINVAL; } -int kvm_vcpu_set_target(struct kvm_vcpu *vcpu, - const struct kvm_vcpu_init *init) -{ - unsigned int i; - int phys_target = kvm_target_cpu(); - - if (init->target != phys_target) - return -EINVAL; - - vcpu->arch.target = phys_target; - bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES); - - /* -ENOENT for unknown features, -EINVAL for invalid combinations. */ - for (i = 0; i < sizeof(init->features) * 8; i++) { - if (init->features[i / 32] & (1 << (i % 32))) { - if (i >= KVM_VCPU_MAX_FEATURES) - return -ENOENT; - set_bit(i, vcpu->arch.features); - } - } - - /* Now we know what it is, we can reset it. */ - return kvm_reset_vcpu(vcpu); -} - int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init) { int target = kvm_target_cpu(); diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 536d13b0bea..371b55bc5a6 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -20,7 +20,6 @@ config IA64 select HAVE_DYNAMIC_FTRACE if (!ITANIUM) select HAVE_FUNCTION_TRACER select HAVE_DMA_ATTRS - select HAVE_KVM select TTY select HAVE_ARCH_TRACEHOOK select HAVE_DMA_API_DEBUG @@ -640,8 +639,6 @@ source "security/Kconfig" source "crypto/Kconfig" -source "arch/ia64/kvm/Kconfig" - source "lib/Kconfig" config IOMMU_HELPER diff --git a/arch/ia64/Makefile b/arch/ia64/Makefile index 5441b14994f..970d0bd9962 100644 --- a/arch/ia64/Makefile +++ b/arch/ia64/Makefile @@ -53,7 +53,6 @@ core-$(CONFIG_IA64_HP_ZX1) += arch/ia64/dig/ core-$(CONFIG_IA64_HP_ZX1_SWIOTLB) += arch/ia64/dig/ core-$(CONFIG_IA64_SGI_SN2) += arch/ia64/sn/ core-$(CONFIG_IA64_SGI_UV) += arch/ia64/uv/ -core-$(CONFIG_KVM) += arch/ia64/kvm/ drivers-$(CONFIG_PCI) += arch/ia64/pci/ drivers-$(CONFIG_IA64_HP_SIM) += arch/ia64/hp/sim/ diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h deleted file mode 100644 index 4729752b725..00000000000 --- a/arch/ia64/include/asm/kvm_host.h +++ /dev/null @@ -1,609 +0,0 @@ -/* - * kvm_host.h: used for kvm module, and hold ia64-specific sections. - * - * Copyright (C) 2007, Intel Corporation. - * - * Xiantao Zhang <xiantao.zhang@intel.com> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple - * Place - Suite 330, Boston, MA 02111-1307 USA. - * - */ - -#ifndef __ASM_KVM_HOST_H -#define __ASM_KVM_HOST_H - -#define KVM_USER_MEM_SLOTS 32 - -#define KVM_COALESCED_MMIO_PAGE_OFFSET 1 -#define KVM_IRQCHIP_NUM_PINS KVM_IOAPIC_NUM_PINS - -/* define exit reasons from vmm to kvm*/ -#define EXIT_REASON_VM_PANIC 0 -#define EXIT_REASON_MMIO_INSTRUCTION 1 -#define EXIT_REASON_PAL_CALL 2 -#define EXIT_REASON_SAL_CALL 3 -#define EXIT_REASON_SWITCH_RR6 4 -#define EXIT_REASON_VM_DESTROY 5 -#define EXIT_REASON_EXTERNAL_INTERRUPT 6 -#define EXIT_REASON_IPI 7 -#define EXIT_REASON_PTC_G 8 -#define EXIT_REASON_DEBUG 20 - -/*Define vmm address space and vm data space.*/ -#define KVM_VMM_SIZE (__IA64_UL_CONST(16)<<20) -#define KVM_VMM_SHIFT 24 -#define KVM_VMM_BASE 0xD000000000000000 -#define VMM_SIZE (__IA64_UL_CONST(8)<<20) - -/* - * Define vm_buffer, used by PAL Services, base address. - * Note: vm_buffer is in the VMM-BLOCK, the size must be < 8M - */ -#define KVM_VM_BUFFER_BASE (KVM_VMM_BASE + VMM_SIZE) -#define KVM_VM_BUFFER_SIZE (__IA64_UL_CONST(8)<<20) - -/* - * kvm guest's data area looks as follow: - * - * +----------------------+ ------- KVM_VM_DATA_SIZE - * | vcpu[n]'s data | | ___________________KVM_STK_OFFSET - * | | | / | - * | .......... | | /vcpu's struct&stack | - * | .......... | | /---------------------|---- 0 - * | vcpu[5]'s data | | / vpd | - * | vcpu[4]'s data | |/-----------------------| - * | vcpu[3]'s data | / vtlb | - * | vcpu[2]'s data | /|------------------------| - * | vcpu[1]'s data |/ | vhpt | - * | vcpu[0]'s data |____________________________| - * +----------------------+ | - * | memory dirty log | | - * +----------------------+ | - * | vm's data struct | | - * +----------------------+ | - * | | | - * | | | - * | | | - * | | | - * | | | - * | | | - * | | | - * | vm's p2m table | | - * | | | - * | | | - * | | | | - * vm's data->| | | | - * +----------------------+ ------- 0 - * To support large memory, needs to increase the size of p2m. - * To support more vcpus, needs to ensure it has enough space to - * hold vcpus' data. - */ - -#define KVM_VM_DATA_SHIFT 26 -#define KVM_VM_DATA_SIZE (__IA64_UL_CONST(1) << KVM_VM_DATA_SHIFT) -#define KVM_VM_DATA_BASE (KVM_VMM_BASE + KVM_VM_DATA_SIZE) - -#define KVM_P2M_BASE KVM_VM_DATA_BASE -#define KVM_P2M_SIZE (__IA64_UL_CONST(24) << 20) - -#define VHPT_SHIFT 16 -#define VHPT_SIZE (__IA64_UL_CONST(1) << VHPT_SHIFT) -#define VHPT_NUM_ENTRIES (__IA64_UL_CONST(1) << (VHPT_SHIFT-5)) - -#define VTLB_SHIFT 16 -#define VTLB_SIZE (__IA64_UL_CONST(1) << VTLB_SHIFT) -#define VTLB_NUM_ENTRIES (1UL << (VHPT_SHIFT-5)) - -#define VPD_SHIFT 16 -#define VPD_SIZE (__IA64_UL_CONST(1) << VPD_SHIFT) - -#define VCPU_STRUCT_SHIFT 16 -#define VCPU_STRUCT_SIZE (__IA64_UL_CONST(1) << VCPU_STRUCT_SHIFT) - -/* - * This must match KVM_IA64_VCPU_STACK_{SHIFT,SIZE} arch/ia64/include/asm/kvm.h - */ -#define KVM_STK_SHIFT 16 -#define KVM_STK_OFFSET (__IA64_UL_CONST(1)<< KVM_STK_SHIFT) - -#define KVM_VM_STRUCT_SHIFT 19 -#define KVM_VM_STRUCT_SIZE (__IA64_UL_CONST(1) << KVM_VM_STRUCT_SHIFT) - -#define KVM_MEM_DIRY_LOG_SHIFT 19 -#define KVM_MEM_DIRTY_LOG_SIZE (__IA64_UL_CONST(1) << KVM_MEM_DIRY_LOG_SHIFT) - -#ifndef __ASSEMBLY__ - -/*Define the max vcpus and memory for Guests.*/ -#define KVM_MAX_VCPUS (KVM_VM_DATA_SIZE - KVM_P2M_SIZE - KVM_VM_STRUCT_SIZE -\ - KVM_MEM_DIRTY_LOG_SIZE) / sizeof(struct kvm_vcpu_data) -#define KVM_MAX_MEM_SIZE (KVM_P2M_SIZE >> 3 << PAGE_SHIFT) - -#define VMM_LOG_LEN 256 - -#include <linux/types.h> -#include <linux/mm.h> -#include <linux/kvm.h> -#include <linux/kvm_para.h> -#include <linux/kvm_types.h> - -#include <asm/pal.h> -#include <asm/sal.h> -#include <asm/page.h> - -struct kvm_vcpu_data { - char vcpu_vhpt[VHPT_SIZE]; - char vcpu_vtlb[VTLB_SIZE]; - char vcpu_vpd[VPD_SIZE]; - char vcpu_struct[VCPU_STRUCT_SIZE]; -}; - -struct kvm_vm_data { - char kvm_p2m[KVM_P2M_SIZE]; - char kvm_vm_struct[KVM_VM_STRUCT_SIZE]; - char kvm_mem_dirty_log[KVM_MEM_DIRTY_LOG_SIZE]; - struct kvm_vcpu_data vcpu_data[KVM_MAX_VCPUS]; -}; - -#define VCPU_BASE(n) (KVM_VM_DATA_BASE + \ - offsetof(struct kvm_vm_data, vcpu_data[n])) -#define KVM_VM_BASE (KVM_VM_DATA_BASE + \ - offsetof(struct kvm_vm_data, kvm_vm_struct)) -#define KVM_MEM_DIRTY_LOG_BASE KVM_VM_DATA_BASE + \ - offsetof(struct kvm_vm_data, kvm_mem_dirty_log) - -#define VHPT_BASE(n) (VCPU_BASE(n) + offsetof(struct kvm_vcpu_data, vcpu_vhpt)) -#define VTLB_BASE(n) (VCPU_BASE(n) + offsetof(struct kvm_vcpu_data, vcpu_vtlb)) -#define VPD_BASE(n) (VCPU_BASE(n) + offsetof(struct kvm_vcpu_data, vcpu_vpd)) -#define VCPU_STRUCT_BASE(n) (VCPU_BASE(n) + \ - offsetof(struct kvm_vcpu_data, vcpu_struct)) - -/*IO section definitions*/ -#define IOREQ_READ 1 -#define IOREQ_WRITE 0 - -#define STATE_IOREQ_NONE 0 -#define STATE_IOREQ_READY 1 -#define STATE_IOREQ_INPROCESS 2 -#define STATE_IORESP_READY 3 - -/*Guest Physical address layout.*/ -#define GPFN_MEM (0UL << 60) /* Guest pfn is normal mem */ -#define GPFN_FRAME_BUFFER (1UL << 60) /* VGA framebuffer */ -#define GPFN_LOW_MMIO (2UL << 60) /* Low MMIO range */ -#define GPFN_PIB (3UL << 60) /* PIB base */ -#define GPFN_IOSAPIC (4UL << 60) /* IOSAPIC base */ -#define GPFN_LEGACY_IO (5UL << 60) /* Legacy I/O base */ -#define GPFN_GFW (6UL << 60) /* Guest Firmware */ -#define GPFN_PHYS_MMIO (7UL << 60) /* Directed MMIO Range */ - -#define GPFN_IO_MASK (7UL << 60) /* Guest pfn is I/O type */ -#define GPFN_INV_MASK (1UL << 63) /* Guest pfn is invalid */ -#define INVALID_MFN (~0UL) -#define MEM_G (1UL << 30) -#define MEM_M (1UL << 20) -#define MMIO_START (3 * MEM_G) -#define MMIO_SIZE (512 * MEM_M) -#define VGA_IO_START 0xA0000UL -#define VGA_IO_SIZE 0x20000 -#define LEGACY_IO_START (MMIO_START + MMIO_SIZE) -#define LEGACY_IO_SIZE (64 * MEM_M) -#define IO_SAPIC_START 0xfec00000UL -#define IO_SAPIC_SIZE 0x100000 -#define PIB_START 0xfee00000UL -#define PIB_SIZE 0x200000 -#define GFW_START (4 * MEM_G - 16 * MEM_M) -#define GFW_SIZE (16 * MEM_M) - -/*Deliver mode, defined for ioapic.c*/ -#define dest_Fixed IOSAPIC_FIXED -#define dest_LowestPrio IOSAPIC_LOWEST_PRIORITY - -#define NMI_VECTOR 2 -#define ExtINT_VECTOR 0 -#define NULL_VECTOR (-1) -#define IA64_SPURIOUS_INT_VECTOR 0x0f - -#define VCPU_LID(v) (((u64)(v)->vcpu_id) << 24) - -/* - *Delivery mode - */ -#define SAPIC_DELIV_SHIFT 8 -#define SAPIC_FIXED 0x0 -#define SAPIC_LOWEST_PRIORITY 0x1 -#define SAPIC_PMI 0x2 -#define SAPIC_NMI 0x4 -#define SAPIC_INIT 0x5 -#define SAPIC_EXTINT 0x7 - -/* - * vcpu->requests bit members for arch - */ -#define KVM_REQ_PTC_G 32 -#define KVM_REQ_RESUME 33 - -struct kvm_mmio_req { - uint64_t addr; /* physical address */ - uint64_t size; /* size in bytes */ - uint64_t data; /* data (or paddr of data) */ - uint8_t state:4; - uint8_t dir:1; /* 1=read, 0=write */ -}; - -/*Pal data struct */ -struct kvm_pal_call{ - /*In area*/ - uint64_t gr28; - uint64_t gr29; - uint64_t gr30; - uint64_t gr31; - /*Out area*/ - struct ia64_pal_retval ret; -}; - -/* Sal data structure */ -struct kvm_sal_call{ - /*In area*/ - uint64_t in0; - uint64_t in1; - uint64_t in2; - uint64_t in3; - uint64_t in4; - uint64_t in5; - uint64_t in6; - uint64_t in7; - struct sal_ret_values ret; -}; - -/*Guest change rr6*/ -struct kvm_switch_rr6 { - uint64_t old_rr; - uint64_t new_rr; -}; - -union ia64_ipi_a{ - unsigned long val; - struct { - unsigned long rv : 3; - unsigned long ir : 1; - unsigned long eid : 8; - unsigned long id : 8; - unsigned long ib_base : 44; - }; -}; - -union ia64_ipi_d { - unsigned long val; - struct { - unsigned long vector : 8; - unsigned long dm : 3; - unsigned long ig : 53; - }; -}; - -/*ipi check exit data*/ -struct kvm_ipi_data{ - union ia64_ipi_a addr; - union ia64_ipi_d data; -}; - -/*global purge data*/ -struct kvm_ptc_g { - unsigned long vaddr; - unsigned long rr; - unsigned long ps; - struct kvm_vcpu *vcpu; -}; - -/*Exit control data */ -struct exit_ctl_data{ - uint32_t exit_reason; - uint32_t vm_status; - union { - struct kvm_mmio_req ioreq; - struct kvm_pal_call pal_data; - struct kvm_sal_call sal_data; - struct kvm_switch_rr6 rr_data; - struct kvm_ipi_data ipi_data; - struct kvm_ptc_g ptc_g_data; - } u; -}; - -union pte_flags { - unsigned long val; - struct { - unsigned long p : 1; /*0 */ - unsigned long : 1; /* 1 */ - unsigned long ma : 3; /* 2-4 */ - unsigned long a : 1; /* 5 */ - unsigned long d : 1; /* 6 */ - unsigned long pl : 2; /* 7-8 */ - unsigned long ar : 3; /* 9-11 */ - unsigned long ppn : 38; /* 12-49 */ - unsigned long : 2; /* 50-51 */ - unsigned long ed : 1; /* 52 */ - }; -}; - -union ia64_pta { - unsigned long val; - struct { - unsigned long ve : 1; - unsigned long reserved0 : 1; - unsigned long size : 6; - unsigned long vf : 1; - unsigned long reserved1 : 6; - unsigned long base : 49; - }; -}; - -struct thash_cb { - /* THASH base information */ - struct thash_data *hash; /* hash table pointer */ - union ia64_pta pta; - int num; -}; - -struct kvm_vcpu_stat { - u32 halt_wakeup; -}; - -struct kvm_vcpu_arch { - int launched; - int last_exit; - int last_run_cpu; - int vmm_tr_slot; - int vm_tr_slot; - int sn_rtc_tr_slot; - -#define KVM_MP_STATE_RUNNABLE 0 -#define KVM_MP_STATE_UNINITIALIZED 1 -#define KVM_MP_STATE_INIT_RECEIVED 2 -#define KVM_MP_STATE_HALTED 3 - int mp_state; - -#define MAX_PTC_G_NUM 3 - int ptc_g_count; - struct kvm_ptc_g ptc_g_data[MAX_PTC_G_NUM]; - - /*halt timer to wake up sleepy vcpus*/ - struct hrtimer hlt_timer; - long ht_active; - - struct kvm_lapic *apic; /* kernel irqchip context */ - struct vpd *vpd; - - /* Exit data for vmm_transition*/ - struct exit_ctl_data exit_data; - - cpumask_t cache_coherent_map; - - unsigned long vmm_rr; - unsigned long host_rr6; - unsigned long psbits[8]; - unsigned long cr_iipa; - unsigned long cr_isr; - unsigned long vsa_base; - unsigned long dirty_log_lock_pa; - unsigned long __gp; - /* TR and TC. */ - struct thash_data itrs[NITRS]; - struct thash_data dtrs[NDTRS]; - /* Bit is set if there is a tr/tc for the region. */ - unsigned char itr_regions; - unsigned char dtr_regions; - unsigned char tc_regions; - /* purge all */ - unsigned long ptce_base; - unsigned long ptce_count[2]; - unsigned long ptce_stride[2]; - /* itc/itm */ - unsigned long last_itc; - long itc_offset; - unsigned long itc_check; - unsigned long timer_check; - unsigned int timer_pending; - unsigned int timer_fired; - - unsigned long vrr[8]; - unsigned long ibr[8]; - unsigned long dbr[8]; - unsigned long insvc[4]; /* Interrupt in service. */ - unsigned long xtp; - - unsigned long metaphysical_rr0; /* from kvm_arch (so is pinned) */ - unsigned long metaphysical_rr4; /* from kvm_arch (so is pinned) */ - unsigned long metaphysical_saved_rr0; /* from kvm_arch */ - unsigned long metaphysical_saved_rr4; /* from kvm_arch */ - unsigned long fp_psr; /*used for lazy float register */ - unsigned long saved_gp; - /*for phycial emulation */ - int mode_flags; - struct thash_cb vtlb; - struct thash_cb vhpt; - char irq_check; - char irq_new_pending; - - unsigned long opcode; - unsigned long cause; - char log_buf[VMM_LOG_LEN]; - union context host; - union context guest; - - char mmio_data[8]; -}; - -struct kvm_vm_stat { - u64 remote_tlb_flush; -}; - -struct kvm_sal_data { - unsigned long boot_ip; - unsigned long boot_gp; -}; - -struct kvm_arch_memory_slot { -}; - -struct kvm_arch { - spinlock_t dirty_log_lock; - - unsigned long vm_base; - unsigned long metaphysical_rr0; - unsigned long metaphysical_rr4; - unsigned long vmm_init_rr; - - int is_sn2; - - struct kvm_ioapic *vioapic; - struct kvm_vm_stat stat; - struct kvm_sal_data rdv_sal_data; - - struct list_head assigned_dev_head; - struct iommu_domain *iommu_domain; - bool iommu_noncoherent; - - unsigned long irq_sources_bitmap; - unsigned long irq_states[KVM_IOAPIC_NUM_PINS]; -}; - -union cpuid3_t { - u64 value; - struct { - u64 number : 8; - u64 revision : 8; - u64 model : 8; - u64 family : 8; - u64 archrev : 8; - u64 rv : 24; - }; -}; - -struct kvm_pt_regs { - /* The following registers are saved by SAVE_MIN: */ - unsigned long b6; /* scratch */ - unsigned long b7; /* scratch */ - - unsigned long ar_csd; /* used by cmp8xchg16 (scratch) */ - unsigned long ar_ssd; /* reserved for future use (scratch) */ - - unsigned long r8; /* scratch (return value register 0) */ - unsigned long r9; /* scratch (return value register 1) */ - unsigned long r10; /* scratch (return value register 2) */ - unsigned long r11; /* scratch (return value register 3) */ - - unsigned long cr_ipsr; /* interrupted task's psr */ - unsigned long cr_iip; /* interrupted task's instruction pointer */ - unsigned long cr_ifs; /* interrupted task's function state */ - - unsigned long ar_unat; /* interrupted task's NaT register (preserved) */ - unsigned long ar_pfs; /* prev function state */ - unsigned long ar_rsc; /* RSE configuration */ - /* The following two are valid only if cr_ipsr.cpl > 0: */ - unsigned long ar_rnat; /* RSE NaT */ - unsigned long ar_bspstore; /* RSE bspstore */ - - unsigned long pr; /* 64 predicate registers (1 bit each) */ - unsigned long b0; /* return pointer (bp) */ - unsigned long loadrs; /* size of dirty partition << 16 */ - - unsigned long r1; /* the gp pointer */ - unsigned long r12; /* interrupted task's memory stack pointer */ - unsigned long r13; /* thread pointer */ - - unsigned long ar_fpsr; /* floating point status (preserved) */ - unsigned long r15; /* scratch */ - - /* The remaining registers are NOT saved for system calls. */ - unsigned long r14; /* scratch */ - unsigned long r2; /* scratch */ - unsigned long r3; /* scratch */ - unsigned long r16; /* scratch */ - unsigned long r17; /* scratch */ - unsigned long r18; /* scratch */ - unsigned long r19; /* scratch */ - unsigned long r20; /* scratch */ - unsigned long r21; /* scratch */ - unsigned long r22; /* scratch */ - unsigned long r23; /* scratch */ - unsigned long r24; /* scratch */ - unsigned long r25; /* scratch */ - unsigned long r26; /* scratch */ - unsigned long r27; /* scratch */ - unsigned long r28; /* scratch */ - unsigned long r29; /* scratch */ - unsigned long r30; /* scratch */ - unsigned long r31; /* scratch */ - unsigned long ar_ccv; /* compare/exchange value (scratch) */ - - /* - * Floating point registers that the kernel considers scratch: - */ - struct ia64_fpreg f6; /* scratch */ - struct ia64_fpreg f7; /* scratch */ - struct ia64_fpreg f8; /* scratch */ - struct ia64_fpreg f9; /* scratch */ - struct ia64_fpreg f10; /* scratch */ - struct ia64_fpreg f11; /* scratch */ - - unsigned long r4; /* preserved */ - unsigned long r5; /* preserved */ - unsigned long r6; /* preserved */ - unsigned long r7; /* preserved */ - unsigned long eml_unat; /* used for emulating instruction */ - unsigned long pad0; /* alignment pad */ -}; - -static inline struct kvm_pt_regs *vcpu_regs(struct kvm_vcpu *v) -{ - return (struct kvm_pt_regs *) ((unsigned long) v + KVM_STK_OFFSET) - 1; -} - -typedef int kvm_vmm_entry(void); -typedef void kvm_tramp_entry(union context *host, union context *guest); - -struct kvm_vmm_info{ - struct module *module; - kvm_vmm_entry *vmm_entry; - kvm_tramp_entry *tramp_entry; - unsigned long vmm_ivt; - unsigned long patch_mov_ar; - unsigned long patch_mov_ar_sn2; -}; - -int kvm_highest_pending_irq(struct kvm_vcpu *vcpu); -int kvm_emulate_halt(struct kvm_vcpu *vcpu); -int kvm_pal_emul(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run); -void kvm_sal_emul(struct kvm_vcpu *vcpu); - -#define __KVM_HAVE_ARCH_VM_ALLOC 1 -struct kvm *kvm_arch_alloc_vm(void); -void kvm_arch_free_vm(struct kvm *kvm); - -static inline void kvm_arch_sync_events(struct kvm *kvm) {} -static inline void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) {} -static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu) {} -static inline void kvm_arch_free_memslot(struct kvm *kvm, - struct kvm_memory_slot *free, struct kvm_memory_slot *dont) {} -static inline void kvm_arch_memslots_updated(struct kvm *kvm) {} -static inline void kvm_arch_commit_memory_region(struct kvm *kvm, - struct kvm_userspace_memory_region *mem, - const struct kvm_memory_slot *old, - enum kvm_mr_change change) {} -static inline void kvm_arch_hardware_unsetup(void) {} - -#endif /* __ASSEMBLY__*/ - -#endif diff --git a/arch/ia64/include/asm/pvclock-abi.h b/arch/ia64/include/asm/pvclock-abi.h deleted file mode 100644 index 42b233bedeb..00000000000 --- a/arch/ia64/include/asm/pvclock-abi.h +++ /dev/null @@ -1,48 +0,0 @@ -/* - * same structure to x86's - * Hopefully asm-x86/pvclock-abi.h would be moved to somewhere more generic. - * For now, define same duplicated definitions. - */ - -#ifndef _ASM_IA64__PVCLOCK_ABI_H -#define _ASM_IA64__PVCLOCK_ABI_H -#ifndef __ASSEMBLY__ - -/* - * These structs MUST NOT be changed. - * They are the ABI between hypervisor and guest OS. - * KVM is using this. - * - * pvclock_vcpu_time_info holds the system time and the tsc timestamp - * of the last update. So the guest can use the tsc delta to get a - * more precise system time. There is one per virtual cpu. - * - * pvclock_wall_clock references the point in time when the system - * time was zero (usually boot time), thus the guest calculates the - * current wall clock by adding the system time. - * - * Protocol for the "version" fields is: hypervisor raises it (making - * it uneven) before it starts updating the fields and raises it again - * (making it even) when it is done. Thus the guest can make sure the - * time values it got are consistent by checking the version before - * and after reading them. - */ - -struct pvclock_vcpu_time_info { - u32 version; - u32 pad0; - u64 tsc_timestamp; - u64 system_time; - u32 tsc_to_system_mul; - s8 tsc_shift; - u8 pad[3]; -} __attribute__((__packed__)); /* 32 bytes */ - -struct pvclock_wall_clock { - u32 version; - u32 sec; - u32 nsec; -} __attribute__((__packed__)); - -#endif /* __ASSEMBLY__ */ -#endif /* _ASM_IA64__PVCLOCK_ABI_H */ diff --git a/arch/ia64/include/uapi/asm/kvm.h b/arch/ia64/include/uapi/asm/kvm.h deleted file mode 100644 index 99503c28440..00000000000 --- a/arch/ia64/include/uapi/asm/kvm.h +++ /dev/null @@ -1,268 +0,0 @@ -#ifndef __ASM_IA64_KVM_H -#define __ASM_IA64_KVM_H - -/* - * kvm structure definitions for ia64 - * - * Copyright (C) 2007 Xiantao Zhang <xiantao.zhang@intel.com> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple - * Place - Suite 330, Boston, MA 02111-1307 USA. - * - */ - -#include <linux/types.h> -#include <linux/ioctl.h> - -/* Select x86 specific features in <linux/kvm.h> */ -#define __KVM_HAVE_IOAPIC -#define __KVM_HAVE_IRQ_LINE - -/* Architectural interrupt line count. */ -#define KVM_NR_INTERRUPTS 256 - -#define KVM_IOAPIC_NUM_PINS 48 - -struct kvm_ioapic_state { - __u64 base_address; - __u32 ioregsel; - __u32 id; - __u32 irr; - __u32 pad; - union { - __u64 bits; - struct { - __u8 vector; - __u8 delivery_mode:3; - __u8 dest_mode:1; - __u8 delivery_status:1; - __u8 polarity:1; - __u8 remote_irr:1; - __u8 trig_mode:1; - __u8 mask:1; - __u8 reserve:7; - __u8 reserved[4]; - __u8 dest_id; - } fields; - } redirtbl[KVM_IOAPIC_NUM_PINS]; -}; - -#define KVM_IRQCHIP_PIC_MASTER 0 -#define KVM_IRQCHIP_PIC_SLAVE 1 -#define KVM_IRQCHIP_IOAPIC 2 -#define KVM_NR_IRQCHIPS 3 - -#define KVM_CONTEXT_SIZE 8*1024 - -struct kvm_fpreg { - union { - unsigned long bits[2]; - long double __dummy; /* force 16-byte alignment */ - } u; -}; - -union context { - /* 8K size */ - char dummy[KVM_CONTEXT_SIZE]; - struct { - unsigned long psr; - unsigned long pr; - unsigned long caller_unat; - unsigned long pad; - unsigned long gr[32]; - unsigned long ar[128]; - unsigned long br[8]; - unsigned long cr[128]; - unsigned long rr[8]; - unsigned long ibr[8]; - unsigned long dbr[8]; - unsigned long pkr[8]; - struct kvm_fpreg fr[128]; - }; -}; - -struct thash_data { - union { - struct { - unsigned long p : 1; /* 0 */ - unsigned long rv1 : 1; /* 1 */ - unsigned long ma : 3; /* 2-4 */ - unsigned long a : 1; /* 5 */ - unsigned long d : 1; /* 6 */ - unsigned long pl : 2; /* 7-8 */ - unsigned long ar : 3; /* 9-11 */ - unsigned long ppn : 38; /* 12-49 */ - unsigned long rv2 : 2; /* 50-51 */ - unsigned long ed : 1; /* 52 */ - unsigned long ig1 : 11; /* 53-63 */ - }; - struct { - unsigned long __rv1 : 53; /* 0-52 */ - unsigned long contiguous : 1; /*53 */ - unsigned long tc : 1; /* 54 TR or TC */ - unsigned long cl : 1; - /* 55 I side or D side cache line */ - unsigned long len : 4; /* 56-59 */ - unsigned long io : 1; /* 60 entry is for io or not */ - unsigned long nomap : 1; - /* 61 entry cann't be inserted into machine TLB.*/ - unsigned long checked : 1; - /* 62 for VTLB/VHPT sanity check */ - unsigned long invalid : 1; - /* 63 invalid entry */ - }; - unsigned long page_flags; - }; /* same for VHPT and TLB */ - - union { - struct { - unsigned long rv3 : 2; - unsigned long ps : 6; - unsigned long key : 24; - unsigned long rv4 : 32; - }; - unsigned long itir; - }; - union { - struct { - unsigned long ig2 : 12; - unsigned long vpn : 49; - unsigned long vrn : 3; - }; - unsigned long ifa; - unsigned long vadr; - struct { - unsigned long tag : 63; - unsigned long ti : 1; - }; - unsigned long etag; - }; - union { - struct thash_data *next; - unsigned long rid; - unsigned long gpaddr; - }; -}; - -#define NITRS 8 -#define NDTRS 8 - -struct saved_vpd { - unsigned long vhpi; - unsigned long vgr[16]; - unsigned long vbgr[16]; - unsigned long vnat; - unsigned long vbnat; - unsigned long vcpuid[5]; - unsigned long vpsr; - unsigned long vpr; - union { - unsigned long vcr[128]; - struct { - unsigned long dcr; - unsigned long itm; - unsigned long iva; - unsigned long rsv1[5]; - unsigned long pta; - unsigned long rsv2[7]; - unsigned long ipsr; - unsigned long isr; - unsigned long rsv3; - unsigned long iip; - unsigned long ifa; - unsigned long itir; - unsigned long iipa; - unsigned long ifs; - unsigned long iim; - unsigned long iha; - unsigned long rsv4[38]; - unsigned long lid; - unsigned long ivr; - unsigned long tpr; - unsigned long eoi; - unsigned long irr[4]; - unsigned long itv; - unsigned long pmv; - unsigned long cmcv; - unsigned long rsv5[5]; - unsigned long lrr0; - unsigned long lrr1; - unsigned long rsv6[46]; - }; - }; -}; - -struct kvm_regs { - struct saved_vpd vpd; - /*Arch-regs*/ - int mp_state; - unsigned long vmm_rr; - /* TR and TC. */ - struct thash_data itrs[NITRS]; - struct thash_data dtrs[NDTRS]; - /* Bit is set if there is a tr/tc for the region. */ - unsigned char itr_regions; - unsigned char dtr_regions; - unsigned char tc_regions; - - char irq_check; - unsigned long saved_itc; - unsigned long itc_check; - unsigned long timer_check; - unsigned long timer_pending; - unsigned long last_itc; - - unsigned long vrr[8]; - unsigned long ibr[8]; - unsigned long dbr[8]; - unsigned long insvc[4]; /* Interrupt in service. */ - unsigned long xtp; - - unsigned long metaphysical_rr0; /* from kvm_arch (so is pinned) */ - unsigned long metaphysical_rr4; /* from kvm_arch (so is pinned) */ - unsigned long metaphysical_saved_rr0; /* from kvm_arch */ - unsigned long metaphysical_saved_rr4; /* from kvm_arch */ - unsigned long fp_psr; /*used for lazy float register */ - unsigned long saved_gp; - /*for phycial emulation */ - - union context saved_guest; - - unsigned long reserved[64]; /* for future use */ -}; - -struct kvm_sregs { -}; - -struct kvm_fpu { -}; - -#define KVM_IA64_VCPU_STACK_SHIFT 16 -#define KVM_IA64_VCPU_STACK_SIZE (1UL << KVM_IA64_VCPU_STACK_SHIFT) - -struct kvm_ia64_vcpu_stack { - unsigned char stack[KVM_IA64_VCPU_STACK_SIZE]; -}; - -struct kvm_debug_exit_arch { -}; - -/* for KVM_SET_GUEST_DEBUG */ -struct kvm_guest_debug_arch { -}; - -/* definition of registers in kvm_run */ -struct kvm_sync_regs { -}; - -#endif diff --git a/arch/ia64/kvm/Kconfig b/arch/ia64/kvm/Kconfig deleted file mode 100644 index 3d50ea955c4..00000000000 --- a/arch/ia64/kvm/Kconfig +++ /dev/null @@ -1,66 +0,0 @@ -# -# KVM configuration -# - -source "virt/kvm/Kconfig" - -menuconfig VIRTUALIZATION - bool "Virtualization" - depends on HAVE_KVM || IA64 - default y - ---help--- - Say Y here to get to see options for using your Linux host to run other - operating systems inside virtual machines (guests). - This option alone does not add any kernel code. - - If you say N, all options in this submenu will be skipped and disabled. - -if VIRTUALIZATION - -config KVM - tristate "Kernel-based Virtual Machine (KVM) support" - depends on BROKEN - depends on HAVE_KVM && MODULES - depends on BROKEN - select PREEMPT_NOTIFIERS - select ANON_INODES - select HAVE_KVM_IRQCHIP - select HAVE_KVM_IRQFD - select HAVE_KVM_IRQ_ROUTING - select KVM_APIC_ARCHITECTURE - select KVM_MMIO - ---help--- - Support hosting fully virtualized guest machines using hardware - virtualization extensions. You will need a fairly recent - processor equipped with virtualization extensions. You will also - need to select one or more of the processor modules below. - - This module provides access to the hardware capabilities through - a character device node named /dev/kvm. - - To compile this as a module, choose M here: the module - will be called kvm. - - If unsure, say N. - -config KVM_INTEL - tristate "KVM for Intel Itanium 2 processors support" - depends on KVM && m - ---help--- - Provides support for KVM on Itanium 2 processors equipped with the VT - extensions. - -config KVM_DEVICE_ASSIGNMENT - bool "KVM legacy PCI device assignment support" - depends on KVM && PCI && IOMMU_API - default y - ---help--- - Provide support for legacy PCI device assignment through KVM. The - kernel now also supports a full featured userspace device driver - framework through VFIO, which supersedes much of this support. - - If unsure, say Y. - -source drivers/vhost/Kconfig - -endif # VIRTUALIZATION diff --git a/arch/ia64/kvm/Makefile b/arch/ia64/kvm/Makefile deleted file mode 100644 index 18e45ec49bb..00000000000 --- a/arch/ia64/kvm/Makefile +++ /dev/null @@ -1,67 +0,0 @@ -#This Make file is to generate asm-offsets.h and build source. -# - -#Generate asm-offsets.h for vmm module build -offsets-file := asm-offsets.h - -always := $(offsets-file) -targets := $(offsets-file) -targets += arch/ia64/kvm/asm-offsets.s - -# Default sed regexp - multiline due to syntax constraints -define sed-y - "/^->/{s:^->\([^ ]*\) [\$$#]*\([^ ]*\) \(.*\):#define \1 \2 /* \3 */:; s:->::; p;}" -endef - -quiet_cmd_offsets = GEN $@ -define cmd_offsets - (set -e; \ - echo "#ifndef __ASM_KVM_OFFSETS_H__"; \ - echo "#define __ASM_KVM_OFFSETS_H__"; \ - echo "/*"; \ - echo " * DO NOT MODIFY."; \ - echo " *"; \ - echo " * This file was generated by Makefile"; \ - echo " *"; \ - echo " */"; \ - echo ""; \ - sed -ne $(sed-y) $<; \ - echo ""; \ - echo "#endif" ) > $@ -endef - -# We use internal rules to avoid the "is up to date" message from make -arch/ia64/kvm/asm-offsets.s: arch/ia64/kvm/asm-offsets.c \ - $(wildcard $(srctree)/arch/ia64/include/asm/*.h)\ - $(wildcard $(srctree)/include/linux/*.h) - $(call if_changed_dep,cc_s_c) - -$(obj)/$(offsets-file): arch/ia64/kvm/asm-offsets.s - $(call cmd,offsets) - -FORCE : $(obj)/$(offsets-file) - -# -# Makefile for Kernel-based Virtual Machine module -# - -ccflags-y := -Ivirt/kvm -Iarch/ia64/kvm/ -asflags-y := -Ivirt/kvm -Iarch/ia64/kvm/ -KVM := ../../../virt/kvm - -common-objs = $(KVM)/kvm_main.o $(KVM)/ioapic.o \ - $(KVM)/coalesced_mmio.o $(KVM)/irq_comm.o - -ifeq ($(CONFIG_KVM_DEVICE_ASSIGNMENT),y) -common-objs += $(KVM)/assigned-dev.o $(KVM)/iommu.o -endif - -kvm-objs := $(common-objs) kvm-ia64.o kvm_fw.o -obj-$(CONFIG_KVM) += kvm.o - -CFLAGS_vcpu.o += -mfixed-range=f2-f5,f12-f127 -kvm-intel-objs = vmm.o vmm_ivt.o trampoline.o vcpu.o optvfault.o mmio.o \ - vtlb.o process.o kvm_lib.o -#Add link memcpy and memset to avoid possible structure assignment error -kvm-intel-objs += memcpy.o memset.o -obj-$(CONFIG_KVM_INTEL) += kvm-intel.o diff --git a/arch/ia64/kvm/asm-offsets.c b/arch/ia64/kvm/asm-offsets.c deleted file mode 100644 index 9324c875caf..00000000000 --- a/arch/ia64/kvm/asm-offsets.c +++ /dev/null @@ -1,241 +0,0 @@ -/* - * asm-offsets.c Generate definitions needed by assembly language modules. - * This code generates raw asm output which is post-processed - * to extract and format the required data. - * - * Anthony Xu <anthony.xu@intel.com> - * Xiantao Zhang <xiantao.zhang@intel.com> - * Copyright (c) 2007 Intel Corporation KVM support. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple - * Place - Suite 330, Boston, MA 02111-1307 USA. - * - */ - -#include <linux/kvm_host.h> -#include <linux/kbuild.h> - -#include "vcpu.h" - -void foo(void) -{ - DEFINE(VMM_TASK_SIZE, sizeof(struct kvm_vcpu)); - DEFINE(VMM_PT_REGS_SIZE, sizeof(struct kvm_pt_regs)); - - BLANK(); - - DEFINE(VMM_VCPU_META_RR0_OFFSET, - offsetof(struct kvm_vcpu, arch.metaphysical_rr0)); - DEFINE(VMM_VCPU_META_SAVED_RR0_OFFSET, - offsetof(struct kvm_vcpu, - arch.metaphysical_saved_rr0)); - DEFINE(VMM_VCPU_VRR0_OFFSET, - offsetof(struct kvm_vcpu, arch.vrr[0])); - DEFINE(VMM_VPD_IRR0_OFFSET, - offsetof(struct vpd, irr[0])); - DEFINE(VMM_VCPU_ITC_CHECK_OFFSET, - offsetof(struct kvm_vcpu, arch.itc_check)); - DEFINE(VMM_VCPU_IRQ_CHECK_OFFSET, - offsetof(struct kvm_vcpu, arch.irq_check)); - DEFINE(VMM_VPD_VHPI_OFFSET, - offsetof(struct vpd, vhpi)); - DEFINE(VMM_VCPU_VSA_BASE_OFFSET, - offsetof(struct kvm_vcpu, arch.vsa_base)); - DEFINE(VMM_VCPU_VPD_OFFSET, - offsetof(struct kvm_vcpu, arch.vpd)); - DEFINE(VMM_VCPU_IRQ_CHECK, - offsetof(struct kvm_vcpu, arch.irq_check)); - DEFINE(VMM_VCPU_TIMER_PENDING, - offsetof(struct kvm_vcpu, arch.timer_pending)); - DEFINE(VMM_VCPU_META_SAVED_RR0_OFFSET, - offsetof(struct kvm_vcpu, arch.metaphysical_saved_rr0)); - DEFINE(VMM_VCPU_MODE_FLAGS_OFFSET, - offsetof(struct kvm_vcpu, arch.mode_flags)); - DEFINE(VMM_VCPU_ITC_OFS_OFFSET, - offsetof(struct kvm_vcpu, arch.itc_offset)); - DEFINE(VMM_VCPU_LAST_ITC_OFFSET, - offsetof(struct kvm_vcpu, arch.last_itc)); - DEFINE(VMM_VCPU_SAVED_GP_OFFSET, - offsetof(struct kvm_vcpu, arch.saved_gp)); - - BLANK(); - - DEFINE(VMM_PT_REGS_B6_OFFSET, - offsetof(struct kvm_pt_regs, b6)); - DEFINE(VMM_PT_REGS_B7_OFFSET, - offsetof(struct kvm_pt_regs, b7)); - DEFINE(VMM_PT_REGS_AR_CSD_OFFSET, - offsetof(struct kvm_pt_regs, ar_csd)); - DEFINE(VMM_PT_REGS_AR_SSD_OFFSET, - offsetof(struct kvm_pt_regs, ar_ssd)); - DEFINE(VMM_PT_REGS_R8_OFFSET, - offsetof(struct kvm_pt_regs, r8)); - DEFINE(VMM_PT_REGS_R9_OFFSET, - offsetof(struct kvm_pt_regs, r9)); - DEFINE(VMM_PT_REGS_R10_OFFSET, - offsetof(struct kvm_pt_regs, r10)); - DEFINE(VMM_PT_REGS_R11_OFFSET, - offsetof(struct kvm_pt_regs, r11)); - DEFINE(VMM_PT_REGS_CR_IPSR_OFFSET, - offsetof(struct kvm_pt_regs, cr_ipsr)); - DEFINE(VMM_PT_REGS_CR_IIP_OFFSET, - offsetof(struct kvm_pt_regs, cr_iip)); - DEFINE(VMM_PT_REGS_CR_IFS_OFFSET, - offsetof(struct kvm_pt_regs, cr_ifs)); - DEFINE(VMM_PT_REGS_AR_UNAT_OFFSET, - offsetof(struct kvm_pt_regs, ar_unat)); - DEFINE(VMM_PT_REGS_AR_PFS_OFFSET, - offsetof(struct kvm_pt_regs, ar_pfs)); - DEFINE(VMM_PT_REGS_AR_RSC_OFFSET, - offsetof(struct kvm_pt_regs, ar_rsc)); - DEFINE(VMM_PT_REGS_AR_RNAT_OFFSET, - offsetof(struct kvm_pt_regs, ar_rnat)); - - DEFINE(VMM_PT_REGS_AR_BSPSTORE_OFFSET, - offsetof(struct kvm_pt_regs, ar_bspstore)); - DEFINE(VMM_PT_REGS_PR_OFFSET, - offsetof(struct kvm_pt_regs, pr)); - DEFINE(VMM_PT_REGS_B0_OFFSET, - offsetof(struct kvm_pt_regs, b0)); - DEFINE(VMM_PT_REGS_LOADRS_OFFSET, - offsetof(struct kvm_pt_regs, loadrs)); - DEFINE(VMM_PT_REGS_R1_OFFSET, - offsetof(struct kvm_pt_regs, r1)); - DEFINE(VMM_PT_REGS_R12_OFFSET, - offsetof(struct kvm_pt_regs, r12)); - DEFINE(VMM_PT_REGS_R13_OFFSET, - offsetof(struct kvm_pt_regs, r13)); - DEFINE(VMM_PT_REGS_AR_FPSR_OFFSET, - offsetof(struct kvm_pt_regs, ar_fpsr)); - DEFINE(VMM_PT_REGS_R15_OFFSET, - offsetof(struct kvm_pt_regs, r15)); - DEFINE(VMM_PT_REGS_R14_OFFSET, - offsetof(struct kvm_pt_regs, r14)); - DEFINE(VMM_PT_REGS_R2_OFFSET, - offsetof(struct kvm_pt_regs, r2)); - DEFINE(VMM_PT_REGS_R3_OFFSET, - offsetof(struct kvm_pt_regs, r3)); - DEFINE(VMM_PT_REGS_R16_OFFSET, - offsetof(struct kvm_pt_regs, r16)); - DEFINE(VMM_PT_REGS_R17_OFFSET, - offsetof(struct kvm_pt_regs, r17)); - DEFINE(VMM_PT_REGS_R18_OFFSET, - offsetof(struct kvm_pt_regs, r18)); - DEFINE(VMM_PT_REGS_R19_OFFSET, - offsetof(struct kvm_pt_regs, r19)); - DEFINE(VMM_PT_REGS_R20_OFFSET, - offsetof(struct kvm_pt_regs, r20)); - DEFINE(VMM_PT_REGS_R21_OFFSET, - offsetof(struct kvm_pt_regs, r21)); - DEFINE(VMM_PT_REGS_R22_OFFSET, - offsetof(struct kvm_pt_regs, r22)); - DEFINE(VMM_PT_REGS_R23_OFFSET, - offsetof(struct kvm_pt_regs, r23)); - DEFINE(VMM_PT_REGS_R24_OFFSET, - offsetof(struct kvm_pt_regs, r24)); - DEFINE(VMM_PT_REGS_R25_OFFSET, - offsetof(struct kvm_pt_regs, r25)); - DEFINE(VMM_PT_REGS_R26_OFFSET, - offsetof(struct kvm_pt_regs, r26)); - DEFINE(VMM_PT_REGS_R27_OFFSET, - offsetof(struct kvm_pt_regs, r27)); - DEFINE(VMM_PT_REGS_R28_OFFSET, - offsetof(struct kvm_pt_regs, r28)); - DEFINE(VMM_PT_REGS_R29_OFFSET, - offsetof(struct kvm_pt_regs, r29)); - DEFINE(VMM_PT_REGS_R30_OFFSET, - offsetof(struct kvm_pt_regs, r30)); - DEFINE(VMM_PT_REGS_R31_OFFSET, - offsetof(struct kvm_pt_regs, r31)); - DEFINE(VMM_PT_REGS_AR_CCV_OFFSET, - offsetof(struct kvm_pt_regs, ar_ccv)); - DEFINE(VMM_PT_REGS_F6_OFFSET, - offsetof(struct kvm_pt_regs, f6)); - DEFINE(VMM_PT_REGS_F7_OFFSET, - offsetof(struct kvm_pt_regs, f7)); - DEFINE(VMM_PT_REGS_F8_OFFSET, - offsetof(struct kvm_pt_regs, f8)); - DEFINE(VMM_PT_REGS_F9_OFFSET, - offsetof(struct kvm_pt_regs, f9)); - DEFINE(VMM_PT_REGS_F10_OFFSET, - offsetof(struct kvm_pt_regs, f10)); - DEFINE(VMM_PT_REGS_F11_OFFSET, - offsetof(struct kvm_pt_regs, f11)); - DEFINE(VMM_PT_REGS_R4_OFFSET, - offsetof(struct kvm_pt_regs, r4)); - DEFINE(VMM_PT_REGS_R5_OFFSET, - offsetof(struct kvm_pt_regs, r5)); - DEFINE(VMM_PT_REGS_R6_OFFSET, - offsetof(struct kvm_pt_regs, r6)); - DEFINE(VMM_PT_REGS_R7_OFFSET, - offsetof(struct kvm_pt_regs, r7)); - DEFINE(VMM_PT_REGS_EML_UNAT_OFFSET, - offsetof(struct kvm_pt_regs, eml_unat)); - DEFINE(VMM_VCPU_IIPA_OFFSET, - offsetof(struct kvm_vcpu, arch.cr_iipa)); - DEFINE(VMM_VCPU_OPCODE_OFFSET, - offsetof(struct kvm_vcpu, arch.opcode)); - DEFINE(VMM_VCPU_CAUSE_OFFSET, offsetof(struct kvm_vcpu, arch.cause)); - DEFINE(VMM_VCPU_ISR_OFFSET, - offsetof(struct kvm_vcpu, arch.cr_isr)); - DEFINE(VMM_PT_REGS_R16_SLOT, - (((offsetof(struct kvm_pt_regs, r16) - - sizeof(struct kvm_pt_regs)) >> 3) & 0x3f)); - DEFINE(VMM_VCPU_MODE_FLAGS_OFFSET, - offsetof(struct kvm_vcpu, arch.mode_flags)); - DEFINE(VMM_VCPU_GP_OFFSET, offsetof(struct kvm_vcpu, arch.__gp)); - BLANK(); - - DEFINE(VMM_VPD_BASE_OFFSET, offsetof(struct kvm_vcpu, arch.vpd)); - DEFINE(VMM_VPD_VIFS_OFFSET, offsetof(struct vpd, ifs)); - DEFINE(VMM_VLSAPIC_INSVC_BASE_OFFSET, - offsetof(struct kvm_vcpu, arch.insvc[0])); - DEFINE(VMM_VPD_VPTA_OFFSET, offsetof(struct vpd, pta)); - DEFINE(VMM_VPD_VPSR_OFFSET, offsetof(struct vpd, vpsr)); - - DEFINE(VMM_CTX_R4_OFFSET, offsetof(union context, gr[4])); - DEFINE(VMM_CTX_R5_OFFSET, offsetof(union context, gr[5])); - DEFINE(VMM_CTX_R12_OFFSET, offsetof(union context, gr[12])); - DEFINE(VMM_CTX_R13_OFFSET, offsetof(union context, gr[13])); - DEFINE(VMM_CTX_KR0_OFFSET, offsetof(union context, ar[0])); - DEFINE(VMM_CTX_KR1_OFFSET, offsetof(union context, ar[1])); - DEFINE(VMM_CTX_B0_OFFSET, offsetof(union context, br[0])); - DEFINE(VMM_CTX_B1_OFFSET, offsetof(union context, br[1])); - DEFINE(VMM_CTX_B2_OFFSET, offsetof(union context, br[2])); - DEFINE(VMM_CTX_RR0_OFFSET, offsetof(union context, rr[0])); - DEFINE(VMM_CTX_RSC_OFFSET, offsetof(union context, ar[16])); - DEFINE(VMM_CTX_BSPSTORE_OFFSET, offsetof(union context, ar[18])); - DEFINE(VMM_CTX_RNAT_OFFSET, offsetof(union context, ar[19])); - DEFINE(VMM_CTX_FCR_OFFSET, offsetof(union context, ar[21])); - DEFINE(VMM_CTX_EFLAG_OFFSET, offsetof(union context, ar[24])); - DEFINE(VMM_CTX_CFLG_OFFSET, offsetof(union context, ar[27])); - DEFINE(VMM_CTX_FSR_OFFSET, offsetof(union context, ar[28])); - DEFINE(VMM_CTX_FIR_OFFSET, offsetof(union context, ar[29])); - DEFINE(VMM_CTX_FDR_OFFSET, offsetof(union context, ar[30])); - DEFINE(VMM_CTX_UNAT_OFFSET, offsetof(union context, ar[36])); - DEFINE(VMM_CTX_FPSR_OFFSET, offsetof(union context, ar[40])); - DEFINE(VMM_CTX_PFS_OFFSET, offsetof(union context, ar[64])); - DEFINE(VMM_CTX_LC_OFFSET, offsetof(union context, ar[65])); - DEFINE(VMM_CTX_DCR_OFFSET, offsetof(union context, cr[0])); - DEFINE(VMM_CTX_IVA_OFFSET, offsetof(union context, cr[2])); - DEFINE(VMM_CTX_PTA_OFFSET, offsetof(union context, cr[8])); - DEFINE(VMM_CTX_IBR0_OFFSET, offsetof(union context, ibr[0])); - DEFINE(VMM_CTX_DBR0_OFFSET, offsetof(union context, dbr[0])); - DEFINE(VMM_CTX_F2_OFFSET, offsetof(union context, fr[2])); - DEFINE(VMM_CTX_F3_OFFSET, offsetof(union context, fr[3])); - DEFINE(VMM_CTX_F32_OFFSET, offsetof(union context, fr[32])); - DEFINE(VMM_CTX_F33_OFFSET, offsetof(union context, fr[33])); - DEFINE(VMM_CTX_PKR0_OFFSET, offsetof(union context, pkr[0])); - DEFINE(VMM_CTX_PSR_OFFSET, offsetof(union context, psr)); - BLANK(); -} diff --git a/arch/ia64/kvm/irq.h b/arch/ia64/kvm/irq.h deleted file mode 100644 index c0785a72827..00000000000 --- a/arch/ia64/kvm/irq.h +++ /dev/null @@ -1,33 +0,0 @@ -/* - * irq.h: In-kernel interrupt controller related definitions - * Copyright (c) 2008, Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple - * Place - Suite 330, Boston, MA 02111-1307 USA. - * - * Authors: - * Xiantao Zhang <xiantao.zhang@intel.com> - * - */ - -#ifndef __IRQ_H -#define __IRQ_H - -#include "lapic.h" - -static inline int irqchip_in_kernel(struct kvm *kvm) -{ - return 1; -} - -#endif diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c deleted file mode 100644 index dbe46f43884..00000000000 --- a/arch/ia64/kvm/kvm-ia64.c +++ /dev/null @@ -1,1942 +0,0 @@ -/* - * kvm_ia64.c: Basic KVM support On Itanium series processors - * - * - * Copyright (C) 2007, Intel Corporation. - * Xiantao Zhang (xiantao.zhang@intel.com) - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple - * Place - Suite 330, Boston, MA 02111-1307 USA. - * - */ - -#include <linux/module.h> -#include <linux/errno.h> -#include <linux/percpu.h> -#include <linux/fs.h> -#include <linux/slab.h> -#include <linux/smp.h> -#include <linux/kvm_host.h> -#include <linux/kvm.h> -#include <linux/bitops.h> -#include <linux/hrtimer.h> -#include <linux/uaccess.h> -#include <linux/iommu.h> -#include <linux/intel-iommu.h> -#include <linux/pci.h> - -#include <asm/pgtable.h> -#include <asm/gcc_intrin.h> -#include <asm/pal.h> -#include <asm/cacheflush.h> -#include <asm/div64.h> -#include <asm/tlb.h> -#include <asm/elf.h> -#include <asm/sn/addrs.h> -#include <asm/sn/clksupport.h> -#include <asm/sn/shub_mmr.h> - -#include "misc.h" -#include "vti.h" -#include "iodev.h" -#include "ioapic.h" -#include "lapic.h" -#include "irq.h" - -static unsigned long kvm_vmm_base; -static unsigned long kvm_vsa_base; -static unsigned long kvm_vm_buffer; -static unsigned long kvm_vm_buffer_size; -unsigned long kvm_vmm_gp; - -static long vp_env_info; - -static struct kvm_vmm_info *kvm_vmm_info; - -static DEFINE_PER_CPU(struct kvm_vcpu *, last_vcpu); - -struct kvm_stats_debugfs_item debugfs_entries[] = { - { NULL } -}; - -static unsigned long kvm_get_itc(struct kvm_vcpu *vcpu) -{ -#if defined(CONFIG_IA64_SGI_SN2) || defined(CONFIG_IA64_GENERIC) - if (vcpu->kvm->arch.is_sn2) - return rtc_time(); - else -#endif - return ia64_getreg(_IA64_REG_AR_ITC); -} - -static void kvm_flush_icache(unsigned long start, unsigned long len) -{ - int l; - - for (l = 0; l < (len + 32); l += 32) - ia64_fc((void *)(start + l)); - - ia64_sync_i(); - ia64_srlz_i(); -} - -static void kvm_flush_tlb_all(void) -{ - unsigned long i, j, count0, count1, stride0, stride1, addr; - long flags; - - addr = local_cpu_data->ptce_base; - count0 = local_cpu_data->ptce_count[0]; - count1 = local_cpu_data->ptce_count[1]; - stride0 = local_cpu_data->ptce_stride[0]; - stride1 = local_cpu_data->ptce_stride[1]; - - local_irq_save(flags); - for (i = 0; i < count0; ++i) { - for (j = 0; j < count1; ++j) { - ia64_ptce(addr); - addr += stride1; - } - addr += stride0; - } - local_irq_restore(flags); - ia64_srlz_i(); /* srlz.i implies srlz.d */ -} - -long ia64_pal_vp_create(u64 *vpd, u64 *host_iva, u64 *opt_handler) -{ - struct ia64_pal_retval iprv; - - PAL_CALL_STK(iprv, PAL_VP_CREATE, (u64)vpd, (u64)host_iva, - (u64)opt_handler); - - return iprv.status; -} - -static DEFINE_SPINLOCK(vp_lock); - -int kvm_arch_hardware_enable(void) -{ - long status; - long tmp_base; - unsigned long pte; - unsigned long saved_psr; - int slot; - - pte = pte_val(mk_pte_phys(__pa(kvm_vmm_base), PAGE_KERNEL)); - local_irq_save(saved_psr); - slot = ia64_itr_entry(0x3, KVM_VMM_BASE, pte, KVM_VMM_SHIFT); - local_irq_restore(saved_psr); - if (slot < 0) - return -EINVAL; - - spin_lock(&vp_lock); - status = ia64_pal_vp_init_env(kvm_vsa_base ? - VP_INIT_ENV : VP_INIT_ENV_INITALIZE, - __pa(kvm_vm_buffer), KVM_VM_BUFFER_BASE, &tmp_base); - if (status != 0) { - spin_unlock(&vp_lock); - printk(KERN_WARNING"kvm: Failed to Enable VT Support!!!!\n"); - return -EINVAL; - } - - if (!kvm_vsa_base) { - kvm_vsa_base = tmp_base; - printk(KERN_INFO"kvm: kvm_vsa_base:0x%lx\n", kvm_vsa_base); - } - spin_unlock(&vp_lock); - ia64_ptr_entry(0x3, slot); - - return 0; -} - -void kvm_arch_hardware_disable(void) -{ - - long status; - int slot; - unsigned long pte; - unsigned long saved_psr; - unsigned long host_iva = ia64_getreg(_IA64_REG_CR_IVA); - - pte = pte_val(mk_pte_phys(__pa(kvm_vmm_base), - PAGE_KERNEL)); - - local_irq_save(saved_psr); - slot = ia64_itr_entry(0x3, KVM_VMM_BASE, pte, KVM_VMM_SHIFT); - local_irq_restore(saved_psr); - if (slot < 0) - return; - - status = ia64_pal_vp_exit_env(host_iva); - if (status) - printk(KERN_DEBUG"kvm: Failed to disable VT support! :%ld\n", - status); - ia64_ptr_entry(0x3, slot); -} - -void kvm_arch_check_processor_compat(void *rtn) -{ - *(int *)rtn = 0; -} - -int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) -{ - - int r; - - switch (ext) { - case KVM_CAP_IRQCHIP: - case KVM_CAP_MP_STATE: - case KVM_CAP_IRQ_INJECT_STATUS: - case KVM_CAP_IOAPIC_POLARITY_IGNORED: - r = 1; - break; - case KVM_CAP_COALESCED_MMIO: - r = KVM_COALESCED_MMIO_PAGE_OFFSET; - break; -#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT - case KVM_CAP_IOMMU: - r = iommu_present(&pci_bus_type); - break; -#endif - default: - r = 0; - } - return r; - -} - -static int handle_vm_error(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) -{ - kvm_run->exit_reason = KVM_EXIT_UNKNOWN; - kvm_run->hw.hardware_exit_reason = 1; - return 0; -} - -static int handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) -{ - struct kvm_mmio_req *p; - struct kvm_io_device *mmio_dev; - int r; - - p = kvm_get_vcpu_ioreq(vcpu); - - if ((p->addr & PAGE_MASK) == IOAPIC_DEFAULT_BASE_ADDRESS) - goto mmio; - vcpu->mmio_needed = 1; - vcpu->mmio_fragments[0].gpa = kvm_run->mmio.phys_addr = p->addr; - vcpu->mmio_fragments[0].len = kvm_run->mmio.len = p->size; - vcpu->mmio_is_write = kvm_run->mmio.is_write = !p->dir; - - if (vcpu->mmio_is_write) - memcpy(vcpu->arch.mmio_data, &p->data, p->size); - memcpy(kvm_run->mmio.data, &p->data, p->size); - kvm_run->exit_reason = KVM_EXIT_MMIO; - return 0; -mmio: - if (p->dir) - r = kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, p->addr, - p->size, &p->data); - else - r = kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, p->addr, - p->size, &p->data); - if (r) - printk(KERN_ERR"kvm: No iodevice found! addr:%lx\n", p->addr); - p->state = STATE_IORESP_READY; - - return 1; -} - -static int handle_pal_call(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) -{ - struct exit_ctl_data *p; - - p = kvm_get_exit_data(vcpu); - - if (p->exit_reason == EXIT_REASON_PAL_CALL) - return kvm_pal_emul(vcpu, kvm_run); - else { - kvm_run->exit_reason = KVM_EXIT_UNKNOWN; - kvm_run->hw.hardware_exit_reason = 2; - return 0; - } -} - -static int handle_sal_call(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) -{ - struct exit_ctl_data *p; - - p = kvm_get_exit_data(vcpu); - - if (p->exit_reason == EXIT_REASON_SAL_CALL) { - kvm_sal_emul(vcpu); - return 1; - } else { - kvm_run->exit_reason = KVM_EXIT_UNKNOWN; - kvm_run->hw.hardware_exit_reason = 3; - return 0; - } - -} - -static int __apic_accept_irq(struct kvm_vcpu *vcpu, uint64_t vector) -{ - struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd); - - if (!test_and_set_bit(vector, &vpd->irr[0])) { - vcpu->arch.irq_new_pending = 1; - kvm_vcpu_kick(vcpu); - return 1; - } - return 0; -} - -/* - * offset: address offset to IPI space. - * value: deliver value. - */ -static void vcpu_deliver_ipi(struct kvm_vcpu *vcpu, uint64_t dm, - uint64_t vector) -{ - switch (dm) { - case SAPIC_FIXED: - break; - case SAPIC_NMI: - vector = 2; - break; - case SAPIC_EXTINT: - vector = 0; - break; - case SAPIC_INIT: - case SAPIC_PMI: - default: - printk(KERN_ERR"kvm: Unimplemented Deliver reserved IPI!\n"); - return; - } - __apic_accept_irq(vcpu, vector); -} - -static struct kvm_vcpu *lid_to_vcpu(struct kvm *kvm, unsigned long id, - unsigned long eid) -{ - union ia64_lid lid; - int i; - struct kvm_vcpu *vcpu; - - kvm_for_each_vcpu(i, vcpu, kvm) { - lid.val = VCPU_LID(vcpu); - if (lid.id == id && lid.eid == eid) - return vcpu; - } - - return NULL; -} - -static int handle_ipi(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) -{ - struct exit_ctl_data *p = kvm_get_exit_data(vcpu); - struct kvm_vcpu *target_vcpu; - struct kvm_pt_regs *regs; - union ia64_ipi_a addr = p->u.ipi_data.addr; - union ia64_ipi_d data = p->u.ipi_data.data; - - target_vcpu = lid_to_vcpu(vcpu->kvm, addr.id, addr.eid); - if (!target_vcpu) - return handle_vm_error(vcpu, kvm_run); - - if (!target_vcpu->arch.launched) { - regs = vcpu_regs(target_vcpu); - - regs->cr_iip = vcpu->kvm->arch.rdv_sal_data.boot_ip; - regs->r1 = vcpu->kvm->arch.rdv_sal_data.boot_gp; - - target_vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; - if (waitqueue_active(&target_vcpu->wq)) - wake_up_interruptible(&target_vcpu->wq); - } else { - vcpu_deliver_ipi(target_vcpu, data.dm, data.vector); - if (target_vcpu != vcpu) - kvm_vcpu_kick(target_vcpu); - } - - return 1; -} - -struct call_data { - struct kvm_ptc_g ptc_g_data; - struct kvm_vcpu *vcpu; -}; - -static void vcpu_global_purge(void *info) -{ - struct call_data *p = (struct call_data *)info; - struct kvm_vcpu *vcpu = p->vcpu; - - if (test_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests)) - return; - - set_bit(KVM_REQ_PTC_G, &vcpu->requests); - if (vcpu->arch.ptc_g_count < MAX_PTC_G_NUM) { - vcpu->arch.ptc_g_data[vcpu->arch.ptc_g_count++] = - p->ptc_g_data; - } else { - clear_bit(KVM_REQ_PTC_G, &vcpu->requests); - vcpu->arch.ptc_g_count = 0; - set_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests); - } -} - -static int handle_global_purge(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) -{ - struct exit_ctl_data *p = kvm_get_exit_data(vcpu); - struct kvm *kvm = vcpu->kvm; - struct call_data call_data; - int i; - struct kvm_vcpu *vcpui; - - call_data.ptc_g_data = p->u.ptc_g_data; - - kvm_for_each_vcpu(i, vcpui, kvm) { - if (vcpui->arch.mp_state == KVM_MP_STATE_UNINITIALIZED || - vcpu == vcpui) - continue; - - if (waitqueue_active(&vcpui->wq)) - wake_up_interruptible(&vcpui->wq); - - if (vcpui->cpu != -1) { - call_data.vcpu = vcpui; - smp_call_function_single(vcpui->cpu, - vcpu_global_purge, &call_data, 1); - } else - printk(KERN_WARNING"kvm: Uninit vcpu received ipi!\n"); - - } - return 1; -} - -static int handle_switch_rr6(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) -{ - return 1; -} - -static int kvm_sn2_setup_mappings(struct kvm_vcpu *vcpu) -{ - unsigned long pte, rtc_phys_addr, map_addr; - int slot; - - map_addr = KVM_VMM_BASE + (1UL << KVM_VMM_SHIFT); - rtc_phys_addr = LOCAL_MMR_OFFSET | SH_RTC; - pte = pte_val(mk_pte_phys(rtc_phys_addr, PAGE_KERNEL_UC)); - slot = ia64_itr_entry(0x3, map_addr, pte, PAGE_SHIFT); - vcpu->arch.sn_rtc_tr_slot = slot; - if (slot < 0) { - printk(KERN_ERR "Mayday mayday! RTC mapping failed!\n"); - slot = 0; - } - return slot; -} - -int kvm_emulate_halt(struct kvm_vcpu *vcpu) -{ - - ktime_t kt; - long itc_diff; - unsigned long vcpu_now_itc; - unsigned long expires; - struct hrtimer *p_ht = &vcpu->arch.hlt_timer; - unsigned long cyc_per_usec = local_cpu_data->cyc_per_usec; - struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd); - - if (irqchip_in_kernel(vcpu->kvm)) { - - vcpu_now_itc = kvm_get_itc(vcpu) + vcpu->arch.itc_offset; - - if (time_after(vcpu_now_itc, vpd->itm)) { - vcpu->arch.timer_check = 1; - return 1; - } - itc_diff = vpd->itm - vcpu_now_itc; - if (itc_diff < 0) - itc_diff = -itc_diff; - - expires = div64_u64(itc_diff, cyc_per_usec); - kt = ktime_set(0, 1000 * expires); - - vcpu->arch.ht_active = 1; - hrtimer_start(p_ht, kt, HRTIMER_MODE_ABS); - - vcpu->arch.mp_state = KVM_MP_STATE_HALTED; - kvm_vcpu_block(vcpu); - hrtimer_cancel(p_ht); - vcpu->arch.ht_active = 0; - - if (test_and_clear_bit(KVM_REQ_UNHALT, &vcpu->requests) || - kvm_cpu_has_pending_timer(vcpu)) - if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED) - vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; - - if (vcpu->arch.mp_state != KVM_MP_STATE_RUNNABLE) - return -EINTR; - return 1; - } else { - printk(KERN_ERR"kvm: Unsupported userspace halt!"); - return 0; - } -} - -static int handle_vm_shutdown(struct kvm_vcpu *vcpu, - struct kvm_run *kvm_run) -{ - kvm_run->exit_reason = KVM_EXIT_SHUTDOWN; - return 0; -} - -static int handle_external_interrupt(struct kvm_vcpu *vcpu, - struct kvm_run *kvm_run) -{ - return 1; -} - -static int handle_vcpu_debug(struct kvm_vcpu *vcpu, - struct kvm_run *kvm_run) -{ - printk("VMM: %s", vcpu->arch.log_buf); - return 1; -} - -static int (*kvm_vti_exit_handlers[])(struct kvm_vcpu *vcpu, - struct kvm_run *kvm_run) = { - [EXIT_REASON_VM_PANIC] = handle_vm_error, - [EXIT_REASON_MMIO_INSTRUCTION] = handle_mmio, - [EXIT_REASON_PAL_CALL] = handle_pal_call, - [EXIT_REASON_SAL_CALL] = handle_sal_call, - [EXIT_REASON_SWITCH_RR6] = handle_switch_rr6, - [EXIT_REASON_VM_DESTROY] = handle_vm_shutdown, - [EXIT_REASON_EXTERNAL_INTERRUPT] = handle_external_interrupt, - [EXIT_REASON_IPI] = handle_ipi, - [EXIT_REASON_PTC_G] = handle_global_purge, - [EXIT_REASON_DEBUG] = handle_vcpu_debug, - -}; - -static const int kvm_vti_max_exit_handlers = - sizeof(kvm_vti_exit_handlers)/sizeof(*kvm_vti_exit_handlers); - -static uint32_t kvm_get_exit_reason(struct kvm_vcpu *vcpu) -{ - struct exit_ctl_data *p_exit_data; - - p_exit_data = kvm_get_exit_data(vcpu); - return p_exit_data->exit_reason; -} - -/* - * The guest has exited. See if we can fix it or if we need userspace - * assistance. - */ -static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) -{ - u32 exit_reason = kvm_get_exit_reason(vcpu); - vcpu->arch.last_exit = exit_reason; - - if (exit_reason < kvm_vti_max_exit_handlers - && kvm_vti_exit_handlers[exit_reason]) - return kvm_vti_exit_handlers[exit_reason](vcpu, kvm_run); - else { - kvm_run->exit_reason = KVM_EXIT_UNKNOWN; - kvm_run->hw.hardware_exit_reason = exit_reason; - } - return 0; -} - -static inline void vti_set_rr6(unsigned long rr6) -{ - ia64_set_rr(RR6, rr6); - ia64_srlz_i(); -} - -static int kvm_insert_vmm_mapping(struct kvm_vcpu *vcpu) -{ - unsigned long pte; - struct kvm *kvm = vcpu->kvm; - int r; - - /*Insert a pair of tr to map vmm*/ - pte = pte_val(mk_pte_phys(__pa(kvm_vmm_base), PAGE_KERNEL)); - r = ia64_itr_entry(0x3, KVM_VMM_BASE, pte, KVM_VMM_SHIFT); - if (r < 0) - goto out; - vcpu->arch.vmm_tr_slot = r; - /*Insert a pairt of tr to map data of vm*/ - pte = pte_val(mk_pte_phys(__pa(kvm->arch.vm_base), PAGE_KERNEL)); - r = ia64_itr_entry(0x3, KVM_VM_DATA_BASE, - pte, KVM_VM_DATA_SHIFT); - if (r < 0) - goto out; - vcpu->arch.vm_tr_slot = r; - -#if defined(CONFIG_IA64_SGI_SN2) || defined(CONFIG_IA64_GENERIC) - if (kvm->arch.is_sn2) { - r = kvm_sn2_setup_mappings(vcpu); - if (r < 0) - goto out; - } -#endif - - r = 0; -out: - return r; -} - -static void kvm_purge_vmm_mapping(struct kvm_vcpu *vcpu) -{ - struct kvm *kvm = vcpu->kvm; - ia64_ptr_entry(0x3, vcpu->arch.vmm_tr_slot); - ia64_ptr_entry(0x3, vcpu->arch.vm_tr_slot); -#if defined(CONFIG_IA64_SGI_SN2) || defined(CONFIG_IA64_GENERIC) - if (kvm->arch.is_sn2) - ia64_ptr_entry(0x3, vcpu->arch.sn_rtc_tr_slot); -#endif -} - -static int kvm_vcpu_pre_transition(struct kvm_vcpu *vcpu) -{ - unsigned long psr; - int r; - int cpu = smp_processor_id(); - - if (vcpu->arch.last_run_cpu != cpu || - per_cpu(last_vcpu, cpu) != vcpu) { - per_cpu(last_vcpu, cpu) = vcpu; - vcpu->arch.last_run_cpu = cpu; - kvm_flush_tlb_all(); - } - - vcpu->arch.host_rr6 = ia64_get_rr(RR6); - vti_set_rr6(vcpu->arch.vmm_rr); - local_irq_save(psr); - r = kvm_insert_vmm_mapping(vcpu); - local_irq_restore(psr); - return r; -} - -static void kvm_vcpu_post_transition(struct kvm_vcpu *vcpu) -{ - kvm_purge_vmm_mapping(vcpu); - vti_set_rr6(vcpu->arch.host_rr6); -} - -static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) -{ - union context *host_ctx, *guest_ctx; - int r, idx; - - idx = srcu_read_lock(&vcpu->kvm->srcu); - -again: - if (signal_pending(current)) { - r = -EINTR; - kvm_run->exit_reason = KVM_EXIT_INTR; - goto out; - } - - preempt_disable(); - local_irq_disable(); - - /*Get host and guest context with guest address space.*/ - host_ctx = kvm_get_host_context(vcpu); - guest_ctx = kvm_get_guest_context(vcpu); - - clear_bit(KVM_REQ_KICK, &vcpu->requests); - - r = kvm_vcpu_pre_transition(vcpu); - if (r < 0) - goto vcpu_run_fail; - - srcu_read_unlock(&vcpu->kvm->srcu, idx); - vcpu->mode = IN_GUEST_MODE; - kvm_guest_enter(); - - /* - * Transition to the guest - */ - kvm_vmm_info->tramp_entry(host_ctx, guest_ctx); - - kvm_vcpu_post_transition(vcpu); - - vcpu->arch.launched = 1; - set_bit(KVM_REQ_KICK, &vcpu->requests); - local_irq_enable(); - - /* - * We must have an instruction between local_irq_enable() and - * kvm_guest_exit(), so the timer interrupt isn't delayed by - * the interrupt shadow. The stat.exits increment will do nicely. - * But we need to prevent reordering, hence this barrier(): - */ - barrier(); - kvm_guest_exit(); - vcpu->mode = OUTSIDE_GUEST_MODE; - preempt_enable(); - - idx = srcu_read_lock(&vcpu->kvm->srcu); - - r = kvm_handle_exit(kvm_run, vcpu); - - if (r > 0) { - if (!need_resched()) - goto again; - } - -out: - srcu_read_unlock(&vcpu->kvm->srcu, idx); - if (r > 0) { - cond_resched(); - idx = srcu_read_lock(&vcpu->kvm->srcu); - goto again; - } - - return r; - -vcpu_run_fail: - local_irq_enable(); - preempt_enable(); - kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; - goto out; -} - -static void kvm_set_mmio_data(struct kvm_vcpu *vcpu) -{ - struct kvm_mmio_req *p = kvm_get_vcpu_ioreq(vcpu); - - if (!vcpu->mmio_is_write) - memcpy(&p->data, vcpu->arch.mmio_data, 8); - p->state = STATE_IORESP_READY; -} - -int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) -{ - int r; - sigset_t sigsaved; - - if (vcpu->sigset_active) - sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); - - if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) { - kvm_vcpu_block(vcpu); - clear_bit(KVM_REQ_UNHALT, &vcpu->requests); - r = -EAGAIN; - goto out; - } - - if (vcpu->mmio_needed) { - memcpy(vcpu->arch.mmio_data, kvm_run->mmio.data, 8); - kvm_set_mmio_data(vcpu); - vcpu->mmio_read_completed = 1; - vcpu->mmio_needed = 0; - } - r = __vcpu_run(vcpu, kvm_run); -out: - if (vcpu->sigset_active) - sigprocmask(SIG_SETMASK, &sigsaved, NULL); - - return r; -} - -struct kvm *kvm_arch_alloc_vm(void) -{ - - struct kvm *kvm; - uint64_t vm_base; - - BUG_ON(sizeof(struct kvm) > KVM_VM_STRUCT_SIZE); - - vm_base = __get_free_pages(GFP_KERNEL, get_order(KVM_VM_DATA_SIZE)); - - if (!vm_base) - return NULL; - - memset((void *)vm_base, 0, KVM_VM_DATA_SIZE); - kvm = (struct kvm *)(vm_base + - offsetof(struct kvm_vm_data, kvm_vm_struct)); - kvm->arch.vm_base = vm_base; - printk(KERN_DEBUG"kvm: vm's data area:0x%lx\n", vm_base); - - return kvm; -} - -struct kvm_ia64_io_range { - unsigned long start; - unsigned long size; - unsigned long type; -}; - -static const struct kvm_ia64_io_range io_ranges[] = { - {VGA_IO_START, VGA_IO_SIZE, GPFN_FRAME_BUFFER}, - {MMIO_START, MMIO_SIZE, GPFN_LOW_MMIO}, - {LEGACY_IO_START, LEGACY_IO_SIZE, GPFN_LEGACY_IO}, - {IO_SAPIC_START, IO_SAPIC_SIZE, GPFN_IOSAPIC}, - {PIB_START, PIB_SIZE, GPFN_PIB}, -}; - -static void kvm_build_io_pmt(struct kvm *kvm) -{ - unsigned long i, j; - - /* Mark I/O ranges */ - for (i = 0; i < (sizeof(io_ranges) / sizeof(struct kvm_io_range)); - i++) { - for (j = io_ranges[i].start; - j < io_ranges[i].start + io_ranges[i].size; - j += PAGE_SIZE) - kvm_set_pmt_entry(kvm, j >> PAGE_SHIFT, - io_ranges[i].type, 0); - } - -} - -/*Use unused rids to virtualize guest rid.*/ -#define GUEST_PHYSICAL_RR0 0x1739 -#define GUEST_PHYSICAL_RR4 0x2739 -#define VMM_INIT_RR 0x1660 - -int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) -{ - BUG_ON(!kvm); - - if (type) - return -EINVAL; - - kvm->arch.is_sn2 = ia64_platform_is("sn2"); - - kvm->arch.metaphysical_rr0 = GUEST_PHYSICAL_RR0; - kvm->arch.metaphysical_rr4 = GUEST_PHYSICAL_RR4; - kvm->arch.vmm_init_rr = VMM_INIT_RR; - - /* - *Fill P2M entries for MMIO/IO ranges - */ - kvm_build_io_pmt(kvm); - - INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); - - /* Reserve bit 0 of irq_sources_bitmap for userspace irq source */ - set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap); - - return 0; -} - -static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, - struct kvm_irqchip *chip) -{ - int r; - - r = 0; - switch (chip->chip_id) { - case KVM_IRQCHIP_IOAPIC: - r = kvm_get_ioapic(kvm, &chip->chip.ioapic); - break; - default: - r = -EINVAL; - break; - } - return r; -} - -static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip) -{ - int r; - - r = 0; - switch (chip->chip_id) { - case KVM_IRQCHIP_IOAPIC: - r = kvm_set_ioapic(kvm, &chip->chip.ioapic); - break; - default: - r = -EINVAL; - break; - } - return r; -} - -#define RESTORE_REGS(_x) vcpu->arch._x = regs->_x - -int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) -{ - struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd); - int i; - - for (i = 0; i < 16; i++) { - vpd->vgr[i] = regs->vpd.vgr[i]; - vpd->vbgr[i] = regs->vpd.vbgr[i]; - } - for (i = 0; i < 128; i++) - vpd->vcr[i] = regs->vpd.vcr[i]; - vpd->vhpi = regs->vpd.vhpi; - vpd->vnat = regs->vpd.vnat; - vpd->vbnat = regs->vpd.vbnat; - vpd->vpsr = regs->vpd.vpsr; - - vpd->vpr = regs->vpd.vpr; - - memcpy(&vcpu->arch.guest, ®s->saved_guest, sizeof(union context)); - - RESTORE_REGS(mp_state); - RESTORE_REGS(vmm_rr); - memcpy(vcpu->arch.itrs, regs->itrs, sizeof(struct thash_data) * NITRS); - memcpy(vcpu->arch.dtrs, regs->dtrs, sizeof(struct thash_data) * NDTRS); - RESTORE_REGS(itr_regions); - RESTORE_REGS(dtr_regions); - RESTORE_REGS(tc_regions); - RESTORE_REGS(irq_check); - RESTORE_REGS(itc_check); - RESTORE_REGS(timer_check); - RESTORE_REGS(timer_pending); - RESTORE_REGS(last_itc); - for (i = 0; i < 8; i++) { - vcpu->arch.vrr[i] = regs->vrr[i]; - vcpu->arch.ibr[i] = regs->ibr[i]; - vcpu->arch.dbr[i] = regs->dbr[i]; - } - for (i = 0; i < 4; i++) - vcpu->arch.insvc[i] = regs->insvc[i]; - RESTORE_REGS(xtp); - RESTORE_REGS(metaphysical_rr0); - RESTORE_REGS(metaphysical_rr4); - RESTORE_REGS(metaphysical_saved_rr0); - RESTORE_REGS(metaphysical_saved_rr4); - RESTORE_REGS(fp_psr); - RESTORE_REGS(saved_gp); - - vcpu->arch.irq_new_pending = 1; - vcpu->arch.itc_offset = regs->saved_itc - kvm_get_itc(vcpu); - set_bit(KVM_REQ_RESUME, &vcpu->requests); - - return 0; -} - -int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event, - bool line_status) -{ - if (!irqchip_in_kernel(kvm)) - return -ENXIO; - - irq_event->status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, - irq_event->irq, irq_event->level, - line_status); - return 0; -} - -long kvm_arch_vm_ioctl(struct file *filp, - unsigned int ioctl, unsigned long arg) -{ - struct kvm *kvm = filp->private_data; - void __user *argp = (void __user *)arg; - int r = -ENOTTY; - - switch (ioctl) { - case KVM_CREATE_IRQCHIP: - r = -EFAULT; - r = kvm_ioapic_init(kvm); - if (r) - goto out; - r = kvm_setup_default_irq_routing(kvm); - if (r) { - mutex_lock(&kvm->slots_lock); - kvm_ioapic_destroy(kvm); - mutex_unlock(&kvm->slots_lock); - goto out; - } - break; - case KVM_GET_IRQCHIP: { - /* 0: PIC master, 1: PIC slave, 2: IOAPIC */ - struct kvm_irqchip chip; - - r = -EFAULT; - if (copy_from_user(&chip, argp, sizeof chip)) - goto out; - r = -ENXIO; - if (!irqchip_in_kernel(kvm)) - goto out; - r = kvm_vm_ioctl_get_irqchip(kvm, &chip); - if (r) - goto out; - r = -EFAULT; - if (copy_to_user(argp, &chip, sizeof chip)) - goto out; - r = 0; - break; - } - case KVM_SET_IRQCHIP: { - /* 0: PIC master, 1: PIC slave, 2: IOAPIC */ - struct kvm_irqchip chip; - - r = -EFAULT; - if (copy_from_user(&chip, argp, sizeof chip)) - goto out; - r = -ENXIO; - if (!irqchip_in_kernel(kvm)) - goto out; - r = kvm_vm_ioctl_set_irqchip(kvm, &chip); - if (r) - goto out; - r = 0; - break; - } - default: - ; - } -out: - return r; -} - -int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, - struct kvm_sregs *sregs) -{ - return -EINVAL; -} - -int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, - struct kvm_sregs *sregs) -{ - return -EINVAL; - -} -int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, - struct kvm_translation *tr) -{ - - return -EINVAL; -} - -static int kvm_alloc_vmm_area(void) -{ - if (!kvm_vmm_base && (kvm_vm_buffer_size < KVM_VM_BUFFER_SIZE)) { - kvm_vmm_base = __get_free_pages(GFP_KERNEL, - get_order(KVM_VMM_SIZE)); - if (!kvm_vmm_base) - return -ENOMEM; - - memset((void *)kvm_vmm_base, 0, KVM_VMM_SIZE); - kvm_vm_buffer = kvm_vmm_base + VMM_SIZE; - - printk(KERN_DEBUG"kvm:VMM's Base Addr:0x%lx, vm_buffer:0x%lx\n", - kvm_vmm_base, kvm_vm_buffer); - } - - return 0; -} - -static void kvm_free_vmm_area(void) -{ - if (kvm_vmm_base) { - /*Zero this area before free to avoid bits leak!!*/ - memset((void *)kvm_vmm_base, 0, KVM_VMM_SIZE); - free_pages(kvm_vmm_base, get_order(KVM_VMM_SIZE)); - kvm_vmm_base = 0; - kvm_vm_buffer = 0; - kvm_vsa_base = 0; - } -} - -static int vti_init_vpd(struct kvm_vcpu *vcpu) -{ - int i; - union cpuid3_t cpuid3; - struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd); - - if (IS_ERR(vpd)) - return PTR_ERR(vpd); - - /* CPUID init */ - for (i = 0; i < 5; i++) - vpd->vcpuid[i] = ia64_get_cpuid(i); - - /* Limit the CPUID number to 5 */ - cpuid3.value = vpd->vcpuid[3]; - cpuid3.number = 4; /* 5 - 1 */ - vpd->vcpuid[3] = cpuid3.value; - - /*Set vac and vdc fields*/ - vpd->vac.a_from_int_cr = 1; - vpd->vac.a_to_int_cr = 1; - vpd->vac.a_from_psr = 1; - vpd->vac.a_from_cpuid = 1; - vpd->vac.a_cover = 1; - vpd->vac.a_bsw = 1; - vpd->vac.a_int = 1; - vpd->vdc.d_vmsw = 1; - - /*Set virtual buffer*/ - vpd->virt_env_vaddr = KVM_VM_BUFFER_BASE; - - return 0; -} - -static int vti_create_vp(struct kvm_vcpu *vcpu) -{ - long ret; - struct vpd *vpd = vcpu->arch.vpd; - unsigned long vmm_ivt; - - vmm_ivt = kvm_vmm_info->vmm_ivt; - - printk(KERN_DEBUG "kvm: vcpu:%p,ivt: 0x%lx\n", vcpu, vmm_ivt); - - ret = ia64_pal_vp_create((u64 *)vpd, (u64 *)vmm_ivt, 0); - - if (ret) { - printk(KERN_ERR"kvm: ia64_pal_vp_create failed!\n"); - return -EINVAL; - } - return 0; -} - -static void init_ptce_info(struct kvm_vcpu *vcpu) -{ - ia64_ptce_info_t ptce = {0}; - - ia64_get_ptce(&ptce); - vcpu->arch.ptce_base = ptce.base; - vcpu->arch.ptce_count[0] = ptce.count[0]; - vcpu->arch.ptce_count[1] = ptce.count[1]; - vcpu->arch.ptce_stride[0] = ptce.stride[0]; - vcpu->arch.ptce_stride[1] = ptce.stride[1]; -} - -static void kvm_migrate_hlt_timer(struct kvm_vcpu *vcpu) -{ - struct hrtimer *p_ht = &vcpu->arch.hlt_timer; - - if (hrtimer_cancel(p_ht)) - hrtimer_start_expires(p_ht, HRTIMER_MODE_ABS); -} - -static enum hrtimer_restart hlt_timer_fn(struct hrtimer *data) -{ - struct kvm_vcpu *vcpu; - wait_queue_head_t *q; - - vcpu = container_of(data, struct kvm_vcpu, arch.hlt_timer); - q = &vcpu->wq; - - if (vcpu->arch.mp_state != KVM_MP_STATE_HALTED) - goto out; - - if (waitqueue_active(q)) - wake_up_interruptible(q); - -out: - vcpu->arch.timer_fired = 1; - vcpu->arch.timer_check = 1; - return HRTIMER_NORESTART; -} - -#define PALE_RESET_ENTRY 0x80000000ffffffb0UL - -bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu) -{ - return irqchip_in_kernel(vcpu->kvm) == (vcpu->arch.apic != NULL); -} - -int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) -{ - struct kvm_vcpu *v; - int r; - int i; - long itc_offset; - struct kvm *kvm = vcpu->kvm; - struct kvm_pt_regs *regs = vcpu_regs(vcpu); - - union context *p_ctx = &vcpu->arch.guest; - struct kvm_vcpu *vmm_vcpu = to_guest(vcpu->kvm, vcpu); - - /*Init vcpu context for first run.*/ - if (IS_ERR(vmm_vcpu)) - return PTR_ERR(vmm_vcpu); - - if (kvm_vcpu_is_bsp(vcpu)) { - vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; - - /*Set entry address for first run.*/ - regs->cr_iip = PALE_RESET_ENTRY; - - /*Initialize itc offset for vcpus*/ - itc_offset = 0UL - kvm_get_itc(vcpu); - for (i = 0; i < KVM_MAX_VCPUS; i++) { - v = (struct kvm_vcpu *)((char *)vcpu + - sizeof(struct kvm_vcpu_data) * i); - v->arch.itc_offset = itc_offset; - v->arch.last_itc = 0; - } - } else - vcpu->arch.mp_state = KVM_MP_STATE_UNINITIALIZED; - - r = -ENOMEM; - vcpu->arch.apic = kzalloc(sizeof(struct kvm_lapic), GFP_KERNEL); - if (!vcpu->arch.apic) - goto out; - vcpu->arch.apic->vcpu = vcpu; - - p_ctx->gr[1] = 0; - p_ctx->gr[12] = (unsigned long)((char *)vmm_vcpu + KVM_STK_OFFSET); - p_ctx->gr[13] = (unsigned long)vmm_vcpu; - p_ctx->psr = 0x1008522000UL; - p_ctx->ar[40] = FPSR_DEFAULT; /*fpsr*/ - p_ctx->caller_unat = 0; - p_ctx->pr = 0x0; - p_ctx->ar[36] = 0x0; /*unat*/ - p_ctx->ar[19] = 0x0; /*rnat*/ - p_ctx->ar[18] = (unsigned long)vmm_vcpu + - ((sizeof(struct kvm_vcpu)+15) & ~15); - p_ctx->ar[64] = 0x0; /*pfs*/ - p_ctx->cr[0] = 0x7e04UL; - p_ctx->cr[2] = (unsigned long)kvm_vmm_info->vmm_ivt; - p_ctx->cr[8] = 0x3c; - - /*Initialize region register*/ - p_ctx->rr[0] = 0x30; - p_ctx->rr[1] = 0x30; - p_ctx->rr[2] = 0x30; - p_ctx->rr[3] = 0x30; - p_ctx->rr[4] = 0x30; - p_ctx->rr[5] = 0x30; - p_ctx->rr[7] = 0x30; - - /*Initialize branch register 0*/ - p_ctx->br[0] = *(unsigned long *)kvm_vmm_info->vmm_entry; - - vcpu->arch.vmm_rr = kvm->arch.vmm_init_rr; - vcpu->arch.metaphysical_rr0 = kvm->arch.metaphysical_rr0; - vcpu->arch.metaphysical_rr4 = kvm->arch.metaphysical_rr4; - - hrtimer_init(&vcpu->arch.hlt_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); - vcpu->arch.hlt_timer.function = hlt_timer_fn; - - vcpu->arch.last_run_cpu = -1; - vcpu->arch.vpd = (struct vpd *)VPD_BASE(vcpu->vcpu_id); - vcpu->arch.vsa_base = kvm_vsa_base; - vcpu->arch.__gp = kvm_vmm_gp; - vcpu->arch.dirty_log_lock_pa = __pa(&kvm->arch.dirty_log_lock); - vcpu->arch.vhpt.hash = (struct thash_data *)VHPT_BASE(vcpu->vcpu_id); - vcpu->arch.vtlb.hash = (struct thash_data *)VTLB_BASE(vcpu->vcpu_id); - init_ptce_info(vcpu); - - r = 0; -out: - return r; -} - -static int vti_vcpu_setup(struct kvm_vcpu *vcpu, int id) -{ - unsigned long psr; - int r; - - local_irq_save(psr); - r = kvm_insert_vmm_mapping(vcpu); - local_irq_restore(psr); - if (r) - goto fail; - r = kvm_vcpu_init(vcpu, vcpu->kvm, id); - if (r) - goto fail; - - r = vti_init_vpd(vcpu); - if (r) { - printk(KERN_DEBUG"kvm: vpd init error!!\n"); - goto uninit; - } - - r = vti_create_vp(vcpu); - if (r) - goto uninit; - - kvm_purge_vmm_mapping(vcpu); - - return 0; -uninit: - kvm_vcpu_uninit(vcpu); -fail: - return r; -} - -struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, - unsigned int id) -{ - struct kvm_vcpu *vcpu; - unsigned long vm_base = kvm->arch.vm_base; - int r; - int cpu; - - BUG_ON(sizeof(struct kvm_vcpu) > VCPU_STRUCT_SIZE/2); - - r = -EINVAL; - if (id >= KVM_MAX_VCPUS) { - printk(KERN_ERR"kvm: Can't configure vcpus > %ld", - KVM_MAX_VCPUS); - goto fail; - } - - r = -ENOMEM; - if (!vm_base) { - printk(KERN_ERR"kvm: Create vcpu[%d] error!\n", id); - goto fail; - } - vcpu = (struct kvm_vcpu *)(vm_base + offsetof(struct kvm_vm_data, - vcpu_data[id].vcpu_struct)); - vcpu->kvm = kvm; - - cpu = get_cpu(); - r = vti_vcpu_setup(vcpu, id); - put_cpu(); - - if (r) { - printk(KERN_DEBUG"kvm: vcpu_setup error!!\n"); - goto fail; - } - - return vcpu; -fail: - return ERR_PTR(r); -} - -int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) -{ - return 0; -} - -int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) -{ - return 0; -} - -int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) -{ - return -EINVAL; -} - -int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) -{ - return -EINVAL; -} - -int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, - struct kvm_guest_debug *dbg) -{ - return -EINVAL; -} - -void kvm_arch_free_vm(struct kvm *kvm) -{ - unsigned long vm_base = kvm->arch.vm_base; - - if (vm_base) { - memset((void *)vm_base, 0, KVM_VM_DATA_SIZE); - free_pages(vm_base, get_order(KVM_VM_DATA_SIZE)); - } - -} - -static void kvm_release_vm_pages(struct kvm *kvm) -{ - struct kvm_memslots *slots; - struct kvm_memory_slot *memslot; - int j; - - slots = kvm_memslots(kvm); - kvm_for_each_memslot(memslot, slots) { - for (j = 0; j < memslot->npages; j++) { - if (memslot->rmap[j]) - put_page((struct page *)memslot->rmap[j]); - } - } -} - -void kvm_arch_destroy_vm(struct kvm *kvm) -{ - kvm_iommu_unmap_guest(kvm); - kvm_free_all_assigned_devices(kvm); - kfree(kvm->arch.vioapic); - kvm_release_vm_pages(kvm); -} - -void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) -{ - if (cpu != vcpu->cpu) { - vcpu->cpu = cpu; - if (vcpu->arch.ht_active) - kvm_migrate_hlt_timer(vcpu); - } -} - -#define SAVE_REGS(_x) regs->_x = vcpu->arch._x - -int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) -{ - struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd); - int i; - - vcpu_load(vcpu); - - for (i = 0; i < 16; i++) { - regs->vpd.vgr[i] = vpd->vgr[i]; - regs->vpd.vbgr[i] = vpd->vbgr[i]; - } - for (i = 0; i < 128; i++) - regs->vpd.vcr[i] = vpd->vcr[i]; - regs->vpd.vhpi = vpd->vhpi; - regs->vpd.vnat = vpd->vnat; - regs->vpd.vbnat = vpd->vbnat; - regs->vpd.vpsr = vpd->vpsr; - regs->vpd.vpr = vpd->vpr; - - memcpy(®s->saved_guest, &vcpu->arch.guest, sizeof(union context)); - - SAVE_REGS(mp_state); - SAVE_REGS(vmm_rr); - memcpy(regs->itrs, vcpu->arch.itrs, sizeof(struct thash_data) * NITRS); - memcpy(regs->dtrs, vcpu->arch.dtrs, sizeof(struct thash_data) * NDTRS); - SAVE_REGS(itr_regions); - SAVE_REGS(dtr_regions); - SAVE_REGS(tc_regions); - SAVE_REGS(irq_check); - SAVE_REGS(itc_check); - SAVE_REGS(timer_check); - SAVE_REGS(timer_pending); - SAVE_REGS(last_itc); - for (i = 0; i < 8; i++) { - regs->vrr[i] = vcpu->arch.vrr[i]; - regs->ibr[i] = vcpu->arch.ibr[i]; - regs->dbr[i] = vcpu->arch.dbr[i]; - } - for (i = 0; i < 4; i++) - regs->insvc[i] = vcpu->arch.insvc[i]; - regs->saved_itc = vcpu->arch.itc_offset + kvm_get_itc(vcpu); - SAVE_REGS(xtp); - SAVE_REGS(metaphysical_rr0); - SAVE_REGS(metaphysical_rr4); - SAVE_REGS(metaphysical_saved_rr0); - SAVE_REGS(metaphysical_saved_rr4); - SAVE_REGS(fp_psr); - SAVE_REGS(saved_gp); - - vcpu_put(vcpu); - return 0; -} - -int kvm_arch_vcpu_ioctl_get_stack(struct kvm_vcpu *vcpu, - struct kvm_ia64_vcpu_stack *stack) -{ - memcpy(stack, vcpu, sizeof(struct kvm_ia64_vcpu_stack)); - return 0; -} - -int kvm_arch_vcpu_ioctl_set_stack(struct kvm_vcpu *vcpu, - struct kvm_ia64_vcpu_stack *stack) -{ - memcpy(vcpu + 1, &stack->stack[0] + sizeof(struct kvm_vcpu), - sizeof(struct kvm_ia64_vcpu_stack) - sizeof(struct kvm_vcpu)); - - vcpu->arch.exit_data = ((struct kvm_vcpu *)stack)->arch.exit_data; - return 0; -} - -void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) -{ - - hrtimer_cancel(&vcpu->arch.hlt_timer); - kfree(vcpu->arch.apic); -} - -long kvm_arch_vcpu_ioctl(struct file *filp, - unsigned int ioctl, unsigned long arg) -{ - struct kvm_vcpu *vcpu = filp->private_data; - void __user *argp = (void __user *)arg; - struct kvm_ia64_vcpu_stack *stack = NULL; - long r; - - switch (ioctl) { - case KVM_IA64_VCPU_GET_STACK: { - struct kvm_ia64_vcpu_stack __user *user_stack; - void __user *first_p = argp; - - r = -EFAULT; - if (copy_from_user(&user_stack, first_p, sizeof(void *))) - goto out; - - if (!access_ok(VERIFY_WRITE, user_stack, - sizeof(struct kvm_ia64_vcpu_stack))) { - printk(KERN_INFO "KVM_IA64_VCPU_GET_STACK: " - "Illegal user destination address for stack\n"); - goto out; - } - stack = kzalloc(sizeof(struct kvm_ia64_vcpu_stack), GFP_KERNEL); - if (!stack) { - r = -ENOMEM; - goto out; - } - - r = kvm_arch_vcpu_ioctl_get_stack(vcpu, stack); - if (r) - goto out; - - if (copy_to_user(user_stack, stack, - sizeof(struct kvm_ia64_vcpu_stack))) { - r = -EFAULT; - goto out; - } - - break; - } - case KVM_IA64_VCPU_SET_STACK: { - struct kvm_ia64_vcpu_stack __user *user_stack; - void __user *first_p = argp; - - r = -EFAULT; - if (copy_from_user(&user_stack, first_p, sizeof(void *))) - goto out; - - if (!access_ok(VERIFY_READ, user_stack, - sizeof(struct kvm_ia64_vcpu_stack))) { - printk(KERN_INFO "KVM_IA64_VCPU_SET_STACK: " - "Illegal user address for stack\n"); - goto out; - } - stack = kmalloc(sizeof(struct kvm_ia64_vcpu_stack), GFP_KERNEL); - if (!stack) { - r = -ENOMEM; - goto out; - } - if (copy_from_user(stack, user_stack, - sizeof(struct kvm_ia64_vcpu_stack))) - goto out; - - r = kvm_arch_vcpu_ioctl_set_stack(vcpu, stack); - break; - } - - default: - r = -EINVAL; - } - -out: - kfree(stack); - return r; -} - -int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) -{ - return VM_FAULT_SIGBUS; -} - -int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, - unsigned long npages) -{ - return 0; -} - -int kvm_arch_prepare_memory_region(struct kvm *kvm, - struct kvm_memory_slot *memslot, - struct kvm_userspace_memory_region *mem, - enum kvm_mr_change change) -{ - unsigned long i; - unsigned long pfn; - int npages = memslot->npages; - unsigned long base_gfn = memslot->base_gfn; - - if (base_gfn + npages > (KVM_MAX_MEM_SIZE >> PAGE_SHIFT)) - return -ENOMEM; - - for (i = 0; i < npages; i++) { - pfn = gfn_to_pfn(kvm, base_gfn + i); - if (!kvm_is_reserved_pfn(pfn)) { - kvm_set_pmt_entry(kvm, base_gfn + i, - pfn << PAGE_SHIFT, - _PAGE_AR_RWX | _PAGE_MA_WB); - memslot->rmap[i] = (unsigned long)pfn_to_page(pfn); - } else { - kvm_set_pmt_entry(kvm, base_gfn + i, - GPFN_PHYS_MMIO | (pfn << PAGE_SHIFT), - _PAGE_MA_UC); - memslot->rmap[i] = 0; - } - } - - return 0; -} - -void kvm_arch_flush_shadow_all(struct kvm *kvm) -{ - kvm_flush_remote_tlbs(kvm); -} - -void kvm_arch_flush_shadow_memslot(struct kvm *kvm, - struct kvm_memory_slot *slot) -{ - kvm_arch_flush_shadow_all(); -} - -long kvm_arch_dev_ioctl(struct file *filp, - unsigned int ioctl, unsigned long arg) -{ - return -EINVAL; -} - -void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) -{ - kvm_vcpu_uninit(vcpu); -} - -static int vti_cpu_has_kvm_support(void) -{ - long avail = 1, status = 1, control = 1; - long ret; - - ret = ia64_pal_proc_get_features(&avail, &status, &control, 0); - if (ret) - goto out; - - if (!(avail & PAL_PROC_VM_BIT)) - goto out; - - printk(KERN_DEBUG"kvm: Hardware Supports VT\n"); - - ret = ia64_pal_vp_env_info(&kvm_vm_buffer_size, &vp_env_info); - if (ret) - goto out; - printk(KERN_DEBUG"kvm: VM Buffer Size:0x%lx\n", kvm_vm_buffer_size); - - if (!(vp_env_info & VP_OPCODE)) { - printk(KERN_WARNING"kvm: No opcode ability on hardware, " - "vm_env_info:0x%lx\n", vp_env_info); - } - - return 1; -out: - return 0; -} - - -/* - * On SN2, the ITC isn't stable, so copy in fast path code to use the - * SN2 RTC, replacing the ITC based default verion. - */ -static void kvm_patch_vmm(struct kvm_vmm_info *vmm_info, - struct module *module) -{ - unsigned long new_ar, new_ar_sn2; - unsigned long module_base; - - if (!ia64_platform_is("sn2")) - return; - - module_base = (unsigned long)module->module_core; - - new_ar = kvm_vmm_base + vmm_info->patch_mov_ar - module_base; - new_ar_sn2 = kvm_vmm_base + vmm_info->patch_mov_ar_sn2 - module_base; - - printk(KERN_INFO "kvm: Patching ITC emulation to use SGI SN2 RTC " - "as source\n"); - - /* - * Copy the SN2 version of mov_ar into place. They are both - * the same size, so 6 bundles is sufficient (6 * 0x10). - */ - memcpy((void *)new_ar, (void *)new_ar_sn2, 0x60); -} - -static int kvm_relocate_vmm(struct kvm_vmm_info *vmm_info, - struct module *module) -{ - unsigned long module_base; - unsigned long vmm_size; - - unsigned long vmm_offset, func_offset, fdesc_offset; - struct fdesc *p_fdesc; - - BUG_ON(!module); - - if (!kvm_vmm_base) { - printk("kvm: kvm area hasn't been initialized yet!!\n"); - return -EFAULT; - } - - /*Calculate new position of relocated vmm module.*/ - module_base = (unsigned long)module->module_core; - vmm_size = module->core_size; - if (unlikely(vmm_size > KVM_VMM_SIZE)) - return -EFAULT; - - memcpy((void *)kvm_vmm_base, (void *)module_base, vmm_size); - kvm_patch_vmm(vmm_info, module); - kvm_flush_icache(kvm_vmm_base, vmm_size); - - /*Recalculate kvm_vmm_info based on new VMM*/ - vmm_offset = vmm_info->vmm_ivt - module_base; - kvm_vmm_info->vmm_ivt = KVM_VMM_BASE + vmm_offset; - printk(KERN_DEBUG"kvm: Relocated VMM's IVT Base Addr:%lx\n", - kvm_vmm_info->vmm_ivt); - - fdesc_offset = (unsigned long)vmm_info->vmm_entry - module_base; - kvm_vmm_info->vmm_entry = (kvm_vmm_entry *)(KVM_VMM_BASE + - fdesc_offset); - func_offset = *(unsigned long *)vmm_info->vmm_entry - module_base; - p_fdesc = (struct fdesc *)(kvm_vmm_base + fdesc_offset); - p_fdesc->ip = KVM_VMM_BASE + func_offset; - p_fdesc->gp = KVM_VMM_BASE+(p_fdesc->gp - module_base); - - printk(KERN_DEBUG"kvm: Relocated VMM's Init Entry Addr:%lx\n", - KVM_VMM_BASE+func_offset); - - fdesc_offset = (unsigned long)vmm_info->tramp_entry - module_base; - kvm_vmm_info->tramp_entry = (kvm_tramp_entry *)(KVM_VMM_BASE + - fdesc_offset); - func_offset = *(unsigned long *)vmm_info->tramp_entry - module_base; - p_fdesc = (struct fdesc *)(kvm_vmm_base + fdesc_offset); - p_fdesc->ip = KVM_VMM_BASE + func_offset; - p_fdesc->gp = KVM_VMM_BASE + (p_fdesc->gp - module_base); - - kvm_vmm_gp = p_fdesc->gp; - - printk(KERN_DEBUG"kvm: Relocated VMM's Entry IP:%p\n", - kvm_vmm_info->vmm_entry); - printk(KERN_DEBUG"kvm: Relocated VMM's Trampoline Entry IP:0x%lx\n", - KVM_VMM_BASE + func_offset); - - return 0; -} - -int kvm_arch_init(void *opaque) -{ - int r; - struct kvm_vmm_info *vmm_info = (struct kvm_vmm_info *)opaque; - - if (!vti_cpu_has_kvm_support()) { - printk(KERN_ERR "kvm: No Hardware Virtualization Support!\n"); - r = -EOPNOTSUPP; - goto out; - } - - if (kvm_vmm_info) { - printk(KERN_ERR "kvm: Already loaded VMM module!\n"); - r = -EEXIST; - goto out; - } - - r = -ENOMEM; - kvm_vmm_info = kzalloc(sizeof(struct kvm_vmm_info), GFP_KERNEL); - if (!kvm_vmm_info) - goto out; - - if (kvm_alloc_vmm_area()) - goto out_free0; - - r = kvm_relocate_vmm(vmm_info, vmm_info->module); - if (r) - goto out_free1; - - return 0; - -out_free1: - kvm_free_vmm_area(); -out_free0: - kfree(kvm_vmm_info); -out: - return r; -} - -void kvm_arch_exit(void) -{ - kvm_free_vmm_area(); - kfree(kvm_vmm_info); - kvm_vmm_info = NULL; -} - -static void kvm_ia64_sync_dirty_log(struct kvm *kvm, - struct kvm_memory_slot *memslot) -{ - int i; - long base; - unsigned long n; - unsigned long *dirty_bitmap = (unsigned long *)(kvm->arch.vm_base + - offsetof(struct kvm_vm_data, kvm_mem_dirty_log)); - - n = kvm_dirty_bitmap_bytes(memslot); - base = memslot->base_gfn / BITS_PER_LONG; - - spin_lock(&kvm->arch.dirty_log_lock); - for (i = 0; i < n/sizeof(long); ++i) { - memslot->dirty_bitmap[i] = dirty_bitmap[base + i]; - dirty_bitmap[base + i] = 0; - } - spin_unlock(&kvm->arch.dirty_log_lock); -} - -int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, - struct kvm_dirty_log *log) -{ - int r; - unsigned long n; - struct kvm_memory_slot *memslot; - int is_dirty = 0; - - mutex_lock(&kvm->slots_lock); - - r = -EINVAL; - if (log->slot >= KVM_USER_MEM_SLOTS) - goto out; - - memslot = id_to_memslot(kvm->memslots, log->slot); - r = -ENOENT; - if (!memslot->dirty_bitmap) - goto out; - - kvm_ia64_sync_dirty_log(kvm, memslot); - r = kvm_get_dirty_log(kvm, log, &is_dirty); - if (r) - goto out; - - /* If nothing is dirty, don't bother messing with page tables. */ - if (is_dirty) { - kvm_flush_remote_tlbs(kvm); - n = kvm_dirty_bitmap_bytes(memslot); - memset(memslot->dirty_bitmap, 0, n); - } - r = 0; -out: - mutex_unlock(&kvm->slots_lock); - return r; -} - -int kvm_arch_hardware_setup(void) -{ - return 0; -} - -int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq) -{ - return __apic_accept_irq(vcpu, irq->vector); -} - -int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest) -{ - return apic->vcpu->vcpu_id == dest; -} - -int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda) -{ - return 0; -} - -int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2) -{ - return vcpu1->arch.xtp - vcpu2->arch.xtp; -} - -int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, - int short_hand, int dest, int dest_mode) -{ - struct kvm_lapic *target = vcpu->arch.apic; - return (dest_mode == 0) ? - kvm_apic_match_physical_addr(target, dest) : - kvm_apic_match_logical_addr(target, dest); -} - -static int find_highest_bits(int *dat) -{ - u32 bits, bitnum; - int i; - - /* loop for all 256 bits */ - for (i = 7; i >= 0 ; i--) { - bits = dat[i]; - if (bits) { - bitnum = fls(bits); - return i * 32 + bitnum - 1; - } - } - - return -1; -} - -int kvm_highest_pending_irq(struct kvm_vcpu *vcpu) -{ - struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd); - - if (vpd->irr[0] & (1UL << NMI_VECTOR)) - return NMI_VECTOR; - if (vpd->irr[0] & (1UL << ExtINT_VECTOR)) - return ExtINT_VECTOR; - - return find_highest_bits((int *)&vpd->irr[0]); -} - -int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) -{ - return vcpu->arch.timer_fired; -} - -int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) -{ - return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE) || - (kvm_highest_pending_irq(vcpu) != -1); -} - -int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) -{ - return (!test_and_set_bit(KVM_REQ_KICK, &vcpu->requests)); -} - -int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, - struct kvm_mp_state *mp_state) -{ - mp_state->mp_state = vcpu->arch.mp_state; - return 0; -} - -static int vcpu_reset(struct kvm_vcpu *vcpu) -{ - int r; - long psr; - local_irq_save(psr); - r = kvm_insert_vmm_mapping(vcpu); - local_irq_restore(psr); - if (r) - goto fail; - - vcpu->arch.launched = 0; - kvm_arch_vcpu_uninit(vcpu); - r = kvm_arch_vcpu_init(vcpu); - if (r) - goto fail; - - kvm_purge_vmm_mapping(vcpu); - r = 0; -fail: - return r; -} - -int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, - struct kvm_mp_state *mp_state) -{ - int r = 0; - - vcpu->arch.mp_state = mp_state->mp_state; - if (vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED) - r = vcpu_reset(vcpu); - return r; -} diff --git a/arch/ia64/kvm/kvm_fw.c b/arch/ia64/kvm/kvm_fw.c deleted file mode 100644 index cb548ee9fca..00000000000 --- a/arch/ia64/kvm/kvm_fw.c +++ /dev/null @@ -1,674 +0,0 @@ -/* - * PAL/SAL call delegation - * - * Copyright (c) 2004 Li Susie <susie.li@intel.com> - * Copyright (c) 2005 Yu Ke <ke.yu@intel.com> - * Copyright (c) 2007 Xiantao Zhang <xiantao.zhang@intel.com> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple - * Place - Suite 330, Boston, MA 02111-1307 USA. - */ - -#include <linux/kvm_host.h> -#include <linux/smp.h> -#include <asm/sn/addrs.h> -#include <asm/sn/clksupport.h> -#include <asm/sn/shub_mmr.h> - -#include "vti.h" -#include "misc.h" - -#include <asm/pal.h> -#include <asm/sal.h> -#include <asm/tlb.h> - -/* - * Handy macros to make sure that the PAL return values start out - * as something meaningful. - */ -#define INIT_PAL_STATUS_UNIMPLEMENTED(x) \ - { \ - x.status = PAL_STATUS_UNIMPLEMENTED; \ - x.v0 = 0; \ - x.v1 = 0; \ - x.v2 = 0; \ - } - -#define INIT_PAL_STATUS_SUCCESS(x) \ - { \ - x.status = PAL_STATUS_SUCCESS; \ - x.v0 = 0; \ - x.v1 = 0; \ - x.v2 = 0; \ - } - -static void kvm_get_pal_call_data(struct kvm_vcpu *vcpu, - u64 *gr28, u64 *gr29, u64 *gr30, u64 *gr31) { - struct exit_ctl_data *p; - - if (vcpu) { - p = &vcpu->arch.exit_data; - if (p->exit_reason == EXIT_REASON_PAL_CALL) { - *gr28 = p->u.pal_data.gr28; - *gr29 = p->u.pal_data.gr29; - *gr30 = p->u.pal_data.gr30; - *gr31 = p->u.pal_data.gr31; - return ; - } - } - printk(KERN_DEBUG"Failed to get vcpu pal data!!!\n"); -} - -static void set_pal_result(struct kvm_vcpu *vcpu, - struct ia64_pal_retval result) { - - struct exit_ctl_data *p; - - p = kvm_get_exit_data(vcpu); - if (p->exit_reason == EXIT_REASON_PAL_CALL) { - p->u.pal_data.ret = result; - return ; - } - INIT_PAL_STATUS_UNIMPLEMENTED(p->u.pal_data.ret); -} - -static void set_sal_result(struct kvm_vcpu *vcpu, - struct sal_ret_values result) { - struct exit_ctl_data *p; - - p = kvm_get_exit_data(vcpu); - if (p->exit_reason == EXIT_REASON_SAL_CALL) { - p->u.sal_data.ret = result; - return ; - } - printk(KERN_WARNING"Failed to set sal result!!\n"); -} - -struct cache_flush_args { - u64 cache_type; - u64 operation; - u64 progress; - long status; -}; - -cpumask_t cpu_cache_coherent_map; - -static void remote_pal_cache_flush(void *data) -{ - struct cache_flush_args *args = data; - long status; - u64 progress = args->progress; - - status = ia64_pal_cache_flush(args->cache_type, args->operation, - &progress, NULL); - if (status != 0) - args->status = status; -} - -static struct ia64_pal_retval pal_cache_flush(struct kvm_vcpu *vcpu) -{ - u64 gr28, gr29, gr30, gr31; - struct ia64_pal_retval result = {0, 0, 0, 0}; - struct cache_flush_args args = {0, 0, 0, 0}; - long psr; - - gr28 = gr29 = gr30 = gr31 = 0; - kvm_get_pal_call_data(vcpu, &gr28, &gr29, &gr30, &gr31); - - if (gr31 != 0) - printk(KERN_ERR"vcpu:%p called cache_flush error!\n", vcpu); - - /* Always call Host Pal in int=1 */ - gr30 &= ~PAL_CACHE_FLUSH_CHK_INTRS; - args.cache_type = gr29; - args.operation = gr30; - smp_call_function(remote_pal_cache_flush, - (void *)&args, 1); - if (args.status != 0) - printk(KERN_ERR"pal_cache_flush error!," - "status:0x%lx\n", args.status); - /* - * Call Host PAL cache flush - * Clear psr.ic when call PAL_CACHE_FLUSH - */ - local_irq_save(psr); - result.status = ia64_pal_cache_flush(gr29, gr30, &result.v1, - &result.v0); - local_irq_restore(psr); - if (result.status != 0) - printk(KERN_ERR"vcpu:%p crashed due to cache_flush err:%ld" - "in1:%lx,in2:%lx\n", - vcpu, result.status, gr29, gr30); - -#if 0 - if (gr29 == PAL_CACHE_TYPE_COHERENT) { - cpus_setall(vcpu->arch.cache_coherent_map); - cpu_clear(vcpu->cpu, vcpu->arch.cache_coherent_map); - cpus_setall(cpu_cache_coherent_map); - cpu_clear(vcpu->cpu, cpu_cache_coherent_map); - } -#endif - return result; -} - -struct ia64_pal_retval pal_cache_summary(struct kvm_vcpu *vcpu) -{ - - struct ia64_pal_retval result; - - PAL_CALL(result, PAL_CACHE_SUMMARY, 0, 0, 0); - return result; -} - -static struct ia64_pal_retval pal_freq_base(struct kvm_vcpu *vcpu) -{ - - struct ia64_pal_retval result; - - PAL_CALL(result, PAL_FREQ_BASE, 0, 0, 0); - - /* - * PAL_FREQ_BASE may not be implemented in some platforms, - * call SAL instead. - */ - if (result.v0 == 0) { - result.status = ia64_sal_freq_base(SAL_FREQ_BASE_PLATFORM, - &result.v0, - &result.v1); - result.v2 = 0; - } - - return result; -} - -/* - * On the SGI SN2, the ITC isn't stable. Emulation backed by the SN2 - * RTC is used instead. This function patches the ratios from SAL - * to match the RTC before providing them to the guest. - */ -static void sn2_patch_itc_freq_ratios(struct ia64_pal_retval *result) -{ - struct pal_freq_ratio *ratio; - unsigned long sal_freq, sal_drift, factor; - - result->status = ia64_sal_freq_base(SAL_FREQ_BASE_PLATFORM, - &sal_freq, &sal_drift); - ratio = (struct pal_freq_ratio *)&result->v2; - factor = ((sal_freq * 3) + (sn_rtc_cycles_per_second / 2)) / - sn_rtc_cycles_per_second; - - ratio->num = 3; - ratio->den = factor; -} - -static struct ia64_pal_retval pal_freq_ratios(struct kvm_vcpu *vcpu) -{ - struct ia64_pal_retval result; - - PAL_CALL(result, PAL_FREQ_RATIOS, 0, 0, 0); - - if (vcpu->kvm->arch.is_sn2) - sn2_patch_itc_freq_ratios(&result); - - return result; -} - -static struct ia64_pal_retval pal_logical_to_physica(struct kvm_vcpu *vcpu) -{ - struct ia64_pal_retval result; - - INIT_PAL_STATUS_UNIMPLEMENTED(result); - return result; -} - -static struct ia64_pal_retval pal_platform_addr(struct kvm_vcpu *vcpu) -{ - - struct ia64_pal_retval result; - - INIT_PAL_STATUS_SUCCESS(result); - return result; -} - -static struct ia64_pal_retval pal_proc_get_features(struct kvm_vcpu *vcpu) -{ - - struct ia64_pal_retval result = {0, 0, 0, 0}; - long in0, in1, in2, in3; - - kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3); - result.status = ia64_pal_proc_get_features(&result.v0, &result.v1, - &result.v2, in2); - - return result; -} - -static struct ia64_pal_retval pal_register_info(struct kvm_vcpu *vcpu) -{ - - struct ia64_pal_retval result = {0, 0, 0, 0}; - long in0, in1, in2, in3; - - kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3); - result.status = ia64_pal_register_info(in1, &result.v1, &result.v2); - - return result; -} - -static struct ia64_pal_retval pal_cache_info(struct kvm_vcpu *vcpu) -{ - - pal_cache_config_info_t ci; - long status; - unsigned long in0, in1, in2, in3, r9, r10; - - kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3); - status = ia64_pal_cache_config_info(in1, in2, &ci); - r9 = ci.pcci_info_1.pcci1_data; - r10 = ci.pcci_info_2.pcci2_data; - return ((struct ia64_pal_retval){status, r9, r10, 0}); -} - -#define GUEST_IMPL_VA_MSB 59 -#define GUEST_RID_BITS 18 - -static struct ia64_pal_retval pal_vm_summary(struct kvm_vcpu *vcpu) -{ - - pal_vm_info_1_u_t vminfo1; - pal_vm_info_2_u_t vminfo2; - struct ia64_pal_retval result; - - PAL_CALL(result, PAL_VM_SUMMARY, 0, 0, 0); - if (!result.status) { - vminfo1.pvi1_val = result.v0; - vminfo1.pal_vm_info_1_s.max_itr_entry = 8; - vminfo1.pal_vm_info_1_s.max_dtr_entry = 8; - result.v0 = vminfo1.pvi1_val; - vminfo2.pal_vm_info_2_s.impl_va_msb = GUEST_IMPL_VA_MSB; - vminfo2.pal_vm_info_2_s.rid_size = GUEST_RID_BITS; - result.v1 = vminfo2.pvi2_val; - } - - return result; -} - -static struct ia64_pal_retval pal_vm_info(struct kvm_vcpu *vcpu) -{ - struct ia64_pal_retval result; - unsigned long in0, in1, in2, in3; - - kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3); - - result.status = ia64_pal_vm_info(in1, in2, - (pal_tc_info_u_t *)&result.v1, &result.v2); - - return result; -} - -static u64 kvm_get_pal_call_index(struct kvm_vcpu *vcpu) -{ - u64 index = 0; - struct exit_ctl_data *p; - - p = kvm_get_exit_data(vcpu); - if (p->exit_reason == EXIT_REASON_PAL_CALL) - index = p->u.pal_data.gr28; - - return index; -} - -static void prepare_for_halt(struct kvm_vcpu *vcpu) -{ - vcpu->arch.timer_pending = 1; - vcpu->arch.timer_fired = 0; -} - -static struct ia64_pal_retval pal_perf_mon_info(struct kvm_vcpu *vcpu) -{ - long status; - unsigned long in0, in1, in2, in3, r9; - unsigned long pm_buffer[16]; - - kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3); - status = ia64_pal_perf_mon_info(pm_buffer, - (pal_perf_mon_info_u_t *) &r9); - if (status != 0) { - printk(KERN_DEBUG"PAL_PERF_MON_INFO fails ret=%ld\n", status); - } else { - if (in1) - memcpy((void *)in1, pm_buffer, sizeof(pm_buffer)); - else { - status = PAL_STATUS_EINVAL; - printk(KERN_WARNING"Invalid parameters " - "for PAL call:0x%lx!\n", in0); - } - } - return (struct ia64_pal_retval){status, r9, 0, 0}; -} - -static struct ia64_pal_retval pal_halt_info(struct kvm_vcpu *vcpu) -{ - unsigned long in0, in1, in2, in3; - long status; - unsigned long res = 1000UL | (1000UL << 16) | (10UL << 32) - | (1UL << 61) | (1UL << 60); - - kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3); - if (in1) { - memcpy((void *)in1, &res, sizeof(res)); - status = 0; - } else{ - status = PAL_STATUS_EINVAL; - printk(KERN_WARNING"Invalid parameters " - "for PAL call:0x%lx!\n", in0); - } - - return (struct ia64_pal_retval){status, 0, 0, 0}; -} - -static struct ia64_pal_retval pal_mem_attrib(struct kvm_vcpu *vcpu) -{ - unsigned long r9; - long status; - - status = ia64_pal_mem_attrib(&r9); - - return (struct ia64_pal_retval){status, r9, 0, 0}; -} - -static void remote_pal_prefetch_visibility(void *v) -{ - s64 trans_type = (s64)v; - ia64_pal_prefetch_visibility(trans_type); -} - -static struct ia64_pal_retval pal_prefetch_visibility(struct kvm_vcpu *vcpu) -{ - struct ia64_pal_retval result = {0, 0, 0, 0}; - unsigned long in0, in1, in2, in3; - kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3); - result.status = ia64_pal_prefetch_visibility(in1); - if (result.status == 0) { - /* Must be performed on all remote processors - in the coherence domain. */ - smp_call_function(remote_pal_prefetch_visibility, - (void *)in1, 1); - /* Unnecessary on remote processor for other vcpus!*/ - result.status = 1; - } - return result; -} - -static void remote_pal_mc_drain(void *v) -{ - ia64_pal_mc_drain(); -} - -static struct ia64_pal_retval pal_get_brand_info(struct kvm_vcpu *vcpu) -{ - struct ia64_pal_retval result = {0, 0, 0, 0}; - unsigned long in0, in1, in2, in3; - - kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3); - - if (in1 == 0 && in2) { - char brand_info[128]; - result.status = ia64_pal_get_brand_info(brand_info); - if (result.status == PAL_STATUS_SUCCESS) - memcpy((void *)in2, brand_info, 128); - } else { - result.status = PAL_STATUS_REQUIRES_MEMORY; - printk(KERN_WARNING"Invalid parameters for " - "PAL call:0x%lx!\n", in0); - } - - return result; -} - -int kvm_pal_emul(struct kvm_vcpu *vcpu, struct kvm_run *run) -{ - - u64 gr28; - struct ia64_pal_retval result; - int ret = 1; - - gr28 = kvm_get_pal_call_index(vcpu); - switch (gr28) { - case PAL_CACHE_FLUSH: - result = pal_cache_flush(vcpu); - break; - case PAL_MEM_ATTRIB: - result = pal_mem_attrib(vcpu); - break; - case PAL_CACHE_SUMMARY: - result = pal_cache_summary(vcpu); - break; - case PAL_PERF_MON_INFO: - result = pal_perf_mon_info(vcpu); - break; - case PAL_HALT_INFO: - result = pal_halt_info(vcpu); - break; - case PAL_HALT_LIGHT: - { - INIT_PAL_STATUS_SUCCESS(result); - prepare_for_halt(vcpu); - if (kvm_highest_pending_irq(vcpu) == -1) - ret = kvm_emulate_halt(vcpu); - } - break; - - case PAL_PREFETCH_VISIBILITY: - result = pal_prefetch_visibility(vcpu); - break; - case PAL_MC_DRAIN: - result.status = ia64_pal_mc_drain(); - /* FIXME: All vcpus likely call PAL_MC_DRAIN. - That causes the congestion. */ - smp_call_function(remote_pal_mc_drain, NULL, 1); - break; - - case PAL_FREQ_RATIOS: - result = pal_freq_ratios(vcpu); - break; - - case PAL_FREQ_BASE: - result = pal_freq_base(vcpu); - break; - - case PAL_LOGICAL_TO_PHYSICAL : - result = pal_logical_to_physica(vcpu); - break; - - case PAL_VM_SUMMARY : - result = pal_vm_summary(vcpu); - break; - - case PAL_VM_INFO : - result = pal_vm_info(vcpu); - break; - case PAL_PLATFORM_ADDR : - result = pal_platform_addr(vcpu); - break; - case PAL_CACHE_INFO: - result = pal_cache_info(vcpu); - break; - case PAL_PTCE_INFO: - INIT_PAL_STATUS_SUCCESS(result); - result.v1 = (1L << 32) | 1L; - break; - case PAL_REGISTER_INFO: - result = pal_register_info(vcpu); - break; - case PAL_VM_PAGE_SIZE: - result.status = ia64_pal_vm_page_size(&result.v0, - &result.v1); - break; - case PAL_RSE_INFO: - result.status = ia64_pal_rse_info(&result.v0, - (pal_hints_u_t *)&result.v1); - break; - case PAL_PROC_GET_FEATURES: - result = pal_proc_get_features(vcpu); - break; - case PAL_DEBUG_INFO: - result.status = ia64_pal_debug_info(&result.v0, - &result.v1); - break; - case PAL_VERSION: - result.status = ia64_pal_version( - (pal_version_u_t *)&result.v0, - (pal_version_u_t *)&result.v1); - break; - case PAL_FIXED_ADDR: - result.status = PAL_STATUS_SUCCESS; - result.v0 = vcpu->vcpu_id; - break; - case PAL_BRAND_INFO: - result = pal_get_brand_info(vcpu); - break; - case PAL_GET_PSTATE: - case PAL_CACHE_SHARED_INFO: - INIT_PAL_STATUS_UNIMPLEMENTED(result); - break; - default: - INIT_PAL_STATUS_UNIMPLEMENTED(result); - printk(KERN_WARNING"kvm: Unsupported pal call," - " index:0x%lx\n", gr28); - } - set_pal_result(vcpu, result); - return ret; -} - -static struct sal_ret_values sal_emulator(struct kvm *kvm, - long index, unsigned long in1, - unsigned long in2, unsigned long in3, - unsigned long in4, unsigned long in5, - unsigned long in6, unsigned long in7) -{ - unsigned long r9 = 0; - unsigned long r10 = 0; - long r11 = 0; - long status; - - status = 0; - switch (index) { - case SAL_FREQ_BASE: - status = ia64_sal_freq_base(in1, &r9, &r10); - break; - case SAL_PCI_CONFIG_READ: - printk(KERN_WARNING"kvm: Not allowed to call here!" - " SAL_PCI_CONFIG_READ\n"); - break; - case SAL_PCI_CONFIG_WRITE: - printk(KERN_WARNING"kvm: Not allowed to call here!" - " SAL_PCI_CONFIG_WRITE\n"); - break; - case SAL_SET_VECTORS: - if (in1 == SAL_VECTOR_OS_BOOT_RENDEZ) { - if (in4 != 0 || in5 != 0 || in6 != 0 || in7 != 0) { - status = -2; - } else { - kvm->arch.rdv_sal_data.boot_ip = in2; - kvm->arch.rdv_sal_data.boot_gp = in3; - } - printk("Rendvous called! iip:%lx\n\n", in2); - } else - printk(KERN_WARNING"kvm: CALLED SAL_SET_VECTORS %lu." - "ignored...\n", in1); - break; - case SAL_GET_STATE_INFO: - /* No more info. */ - status = -5; - r9 = 0; - break; - case SAL_GET_STATE_INFO_SIZE: - /* Return a dummy size. */ - status = 0; - r9 = 128; - break; - case SAL_CLEAR_STATE_INFO: - /* Noop. */ - break; - case SAL_MC_RENDEZ: - printk(KERN_WARNING - "kvm: called SAL_MC_RENDEZ. ignored...\n"); - break; - case SAL_MC_SET_PARAMS: - printk(KERN_WARNING - "kvm: called SAL_MC_SET_PARAMS.ignored!\n"); - break; - case SAL_CACHE_FLUSH: - if (1) { - /*Flush using SAL. - This method is faster but has a side - effect on other vcpu running on - this cpu. */ - status = ia64_sal_cache_flush(in1); - } else { - /*Maybe need to implement the method - without side effect!*/ - status = 0; - } - break; - case SAL_CACHE_INIT: - printk(KERN_WARNING - "kvm: called SAL_CACHE_INIT. ignored...\n"); - break; - case SAL_UPDATE_PAL: - printk(KERN_WARNING - "kvm: CALLED SAL_UPDATE_PAL. ignored...\n"); - break; - default: - printk(KERN_WARNING"kvm: called SAL_CALL with unknown index." - " index:%ld\n", index); - status = -1; - break; - } - return ((struct sal_ret_values) {status, r9, r10, r11}); -} - -static void kvm_get_sal_call_data(struct kvm_vcpu *vcpu, u64 *in0, u64 *in1, - u64 *in2, u64 *in3, u64 *in4, u64 *in5, u64 *in6, u64 *in7){ - - struct exit_ctl_data *p; - - p = kvm_get_exit_data(vcpu); - - if (p->exit_reason == EXIT_REASON_SAL_CALL) { - *in0 = p->u.sal_data.in0; - *in1 = p->u.sal_data.in1; - *in2 = p->u.sal_data.in2; - *in3 = p->u.sal_data.in3; - *in4 = p->u.sal_data.in4; - *in5 = p->u.sal_data.in5; - *in6 = p->u.sal_data.in6; - *in7 = p->u.sal_data.in7; - return ; - } - *in0 = 0; -} - -void kvm_sal_emul(struct kvm_vcpu *vcpu) -{ - - struct sal_ret_values result; - u64 index, in1, in2, in3, in4, in5, in6, in7; - - kvm_get_sal_call_data(vcpu, &index, &in1, &in2, - &in3, &in4, &in5, &in6, &in7); - result = sal_emulator(vcpu->kvm, index, in1, in2, in3, - in4, in5, in6, in7); - set_sal_result(vcpu, result); -} diff --git a/arch/ia64/kvm/kvm_lib.c b/arch/ia64/kvm/kvm_lib.c deleted file mode 100644 index f1268b8e6f9..00000000000 --- a/arch/ia64/kvm/kvm_lib.c +++ /dev/null @@ -1,21 +0,0 @@ -/* - * kvm_lib.c: Compile some libraries for kvm-intel module. - * - * Just include kernel's library, and disable symbols export. - * Copyright (C) 2008, Intel Corporation. - * Xiantao Zhang (xiantao.zhang@intel.com) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - */ -#undef CONFIG_MODULES -#include <linux/module.h> -#undef CONFIG_KALLSYMS -#undef EXPORT_SYMBOL -#undef EXPORT_SYMBOL_GPL -#define EXPORT_SYMBOL(sym) -#define EXPORT_SYMBOL_GPL(sym) -#include "../../../lib/vsprintf.c" -#include "../../../lib/ctype.c" diff --git a/arch/ia64/kvm/kvm_minstate.h b/arch/ia64/kvm/kvm_minstate.h deleted file mode 100644 index b2bcaa2787a..00000000000 --- a/arch/ia64/kvm/kvm_minstate.h +++ /dev/null @@ -1,266 +0,0 @@ -/* - * kvm_minstate.h: min save macros - * Copyright (c) 2007, Intel Corporation. - * - * Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com) - * Xiantao Zhang (xiantao.zhang@intel.com) - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple - * Place - Suite 330, Boston, MA 02111-1307 USA. - * - */ - - -#include <asm/asmmacro.h> -#include <asm/types.h> -#include <asm/kregs.h> -#include <asm/kvm_host.h> - -#include "asm-offsets.h" - -#define KVM_MINSTATE_START_SAVE_MIN \ - mov ar.rsc = 0;/* set enforced lazy mode, pl 0, little-endian, loadrs=0 */\ - ;; \ - mov.m r28 = ar.rnat; \ - addl r22 = VMM_RBS_OFFSET,r1; /* compute base of RBS */ \ - ;; \ - lfetch.fault.excl.nt1 [r22]; \ - addl r1 = KVM_STK_OFFSET-VMM_PT_REGS_SIZE, r1; \ - mov r23 = ar.bspstore; /* save ar.bspstore */ \ - ;; \ - mov ar.bspstore = r22; /* switch to kernel RBS */\ - ;; \ - mov r18 = ar.bsp; \ - mov ar.rsc = 0x3; /* set eager mode, pl 0, little-endian, loadrs=0 */ - - - -#define KVM_MINSTATE_END_SAVE_MIN \ - bsw.1; /* switch back to bank 1 (must be last in insn group) */\ - ;; - - -#define PAL_VSA_SYNC_READ \ - /* begin to call pal vps sync_read */ \ -{.mii; \ - add r25 = VMM_VPD_BASE_OFFSET, r21; \ - nop 0x0; \ - mov r24=ip; \ - ;; \ -} \ -{.mmb \ - add r24=0x20, r24; \ - ld8 r25 = [r25]; /* read vpd base */ \ - br.cond.sptk kvm_vps_sync_read; /*call the service*/ \ - ;; \ -}; \ - - -#define KVM_MINSTATE_GET_CURRENT(reg) mov reg=r21 - -/* - * KVM_DO_SAVE_MIN switches to the kernel stacks (if necessary) and saves - * the minimum state necessary that allows us to turn psr.ic back - * on. - * - * Assumed state upon entry: - * psr.ic: off - * r31: contains saved predicates (pr) - * - * Upon exit, the state is as follows: - * psr.ic: off - * r2 = points to &pt_regs.r16 - * r8 = contents of ar.ccv - * r9 = contents of ar.csd - * r10 = contents of ar.ssd - * r11 = FPSR_DEFAULT - * r12 = kernel sp (kernel virtual address) - * r13 = points to current task_struct (kernel virtual address) - * p15 = TRUE if psr.i is set in cr.ipsr - * predicate registers (other than p2, p3, and p15), b6, r3, r14, r15: - * preserved - * - * Note that psr.ic is NOT turned on by this macro. This is so that - * we can pass interruption state as arguments to a handler. - */ - - -#define PT(f) (VMM_PT_REGS_##f##_OFFSET) - -#define KVM_DO_SAVE_MIN(COVER,SAVE_IFS,EXTRA) \ - KVM_MINSTATE_GET_CURRENT(r16); /* M (or M;;I) */ \ - mov r27 = ar.rsc; /* M */ \ - mov r20 = r1; /* A */ \ - mov r25 = ar.unat; /* M */ \ - mov r29 = cr.ipsr; /* M */ \ - mov r26 = ar.pfs; /* I */ \ - mov r18 = cr.isr; \ - COVER; /* B;; (or nothing) */ \ - ;; \ - tbit.z p0,p15 = r29,IA64_PSR_I_BIT; \ - mov r1 = r16; \ -/* mov r21=r16; */ \ - /* switch from user to kernel RBS: */ \ - ;; \ - invala; /* M */ \ - SAVE_IFS; \ - ;; \ - KVM_MINSTATE_START_SAVE_MIN \ - adds r17 = 2*L1_CACHE_BYTES,r1;/* cache-line size */ \ - adds r16 = PT(CR_IPSR),r1; \ - ;; \ - lfetch.fault.excl.nt1 [r17],L1_CACHE_BYTES; \ - st8 [r16] = r29; /* save cr.ipsr */ \ - ;; \ - lfetch.fault.excl.nt1 [r17]; \ - tbit.nz p15,p0 = r29,IA64_PSR_I_BIT; \ - mov r29 = b0 \ - ;; \ - adds r16 = PT(R8),r1; /* initialize first base pointer */\ - adds r17 = PT(R9),r1; /* initialize second base pointer */\ - ;; \ -.mem.offset 0,0; st8.spill [r16] = r8,16; \ -.mem.offset 8,0; st8.spill [r17] = r9,16; \ - ;; \ -.mem.offset 0,0; st8.spill [r16] = r10,24; \ -.mem.offset 8,0; st8.spill [r17] = r11,24; \ - ;; \ - mov r9 = cr.iip; /* M */ \ - mov r10 = ar.fpsr; /* M */ \ - ;; \ - st8 [r16] = r9,16; /* save cr.iip */ \ - st8 [r17] = r30,16; /* save cr.ifs */ \ - sub r18 = r18,r22; /* r18=RSE.ndirty*8 */ \ - ;; \ - st8 [r16] = r25,16; /* save ar.unat */ \ - st8 [r17] = r26,16; /* save ar.pfs */ \ - shl r18 = r18,16; /* calu ar.rsc used for "loadrs" */\ - ;; \ - st8 [r16] = r27,16; /* save ar.rsc */ \ - st8 [r17] = r28,16; /* save ar.rnat */ \ - ;; /* avoid RAW on r16 & r17 */ \ - st8 [r16] = r23,16; /* save ar.bspstore */ \ - st8 [r17] = r31,16; /* save predicates */ \ - ;; \ - st8 [r16] = r29,16; /* save b0 */ \ - st8 [r17] = r18,16; /* save ar.rsc value for "loadrs" */\ - ;; \ -.mem.offset 0,0; st8.spill [r16] = r20,16;/* save original r1 */ \ -.mem.offset 8,0; st8.spill [r17] = r12,16; \ - adds r12 = -16,r1; /* switch to kernel memory stack */ \ - ;; \ -.mem.offset 0,0; st8.spill [r16] = r13,16; \ -.mem.offset 8,0; st8.spill [r17] = r10,16; /* save ar.fpsr */\ - mov r13 = r21; /* establish `current' */ \ - ;; \ -.mem.offset 0,0; st8.spill [r16] = r15,16; \ -.mem.offset 8,0; st8.spill [r17] = r14,16; \ - ;; \ -.mem.offset 0,0; st8.spill [r16] = r2,16; \ -.mem.offset 8,0; st8.spill [r17] = r3,16; \ - adds r2 = VMM_PT_REGS_R16_OFFSET,r1; \ - ;; \ - adds r16 = VMM_VCPU_IIPA_OFFSET,r13; \ - adds r17 = VMM_VCPU_ISR_OFFSET,r13; \ - mov r26 = cr.iipa; \ - mov r27 = cr.isr; \ - ;; \ - st8 [r16] = r26; \ - st8 [r17] = r27; \ - ;; \ - EXTRA; \ - mov r8 = ar.ccv; \ - mov r9 = ar.csd; \ - mov r10 = ar.ssd; \ - movl r11 = FPSR_DEFAULT; /* L-unit */ \ - adds r17 = VMM_VCPU_GP_OFFSET,r13; \ - ;; \ - ld8 r1 = [r17];/* establish kernel global pointer */ \ - ;; \ - PAL_VSA_SYNC_READ \ - KVM_MINSTATE_END_SAVE_MIN - -/* - * SAVE_REST saves the remainder of pt_regs (with psr.ic on). - * - * Assumed state upon entry: - * psr.ic: on - * r2: points to &pt_regs.f6 - * r3: points to &pt_regs.f7 - * r8: contents of ar.ccv - * r9: contents of ar.csd - * r10: contents of ar.ssd - * r11: FPSR_DEFAULT - * - * Registers r14 and r15 are guaranteed not to be touched by SAVE_REST. - */ -#define KVM_SAVE_REST \ -.mem.offset 0,0; st8.spill [r2] = r16,16; \ -.mem.offset 8,0; st8.spill [r3] = r17,16; \ - ;; \ -.mem.offset 0,0; st8.spill [r2] = r18,16; \ -.mem.offset 8,0; st8.spill [r3] = r19,16; \ - ;; \ -.mem.offset 0,0; st8.spill [r2] = r20,16; \ -.mem.offset 8,0; st8.spill [r3] = r21,16; \ - mov r18=b6; \ - ;; \ -.mem.offset 0,0; st8.spill [r2] = r22,16; \ -.mem.offset 8,0; st8.spill [r3] = r23,16; \ - mov r19 = b7; \ - ;; \ -.mem.offset 0,0; st8.spill [r2] = r24,16; \ -.mem.offset 8,0; st8.spill [r3] = r25,16; \ - ;; \ -.mem.offset 0,0; st8.spill [r2] = r26,16; \ -.mem.offset 8,0; st8.spill [r3] = r27,16; \ - ;; \ -.mem.offset 0,0; st8.spill [r2] = r28,16; \ -.mem.offset 8,0; st8.spill [r3] = r29,16; \ - ;; \ -.mem.offset 0,0; st8.spill [r2] = r30,16; \ -.mem.offset 8,0; st8.spill [r3] = r31,32; \ - ;; \ - mov ar.fpsr = r11; \ - st8 [r2] = r8,8; \ - adds r24 = PT(B6)-PT(F7),r3; \ - adds r25 = PT(B7)-PT(F7),r3; \ - ;; \ - st8 [r24] = r18,16; /* b6 */ \ - st8 [r25] = r19,16; /* b7 */ \ - adds r2 = PT(R4)-PT(F6),r2; \ - adds r3 = PT(R5)-PT(F7),r3; \ - ;; \ - st8 [r24] = r9; /* ar.csd */ \ - st8 [r25] = r10; /* ar.ssd */ \ - ;; \ - mov r18 = ar.unat; \ - adds r19 = PT(EML_UNAT)-PT(R4),r2; \ - ;; \ - st8 [r19] = r18; /* eml_unat */ \ - - -#define KVM_SAVE_EXTRA \ -.mem.offset 0,0; st8.spill [r2] = r4,16; \ -.mem.offset 8,0; st8.spill [r3] = r5,16; \ - ;; \ -.mem.offset 0,0; st8.spill [r2] = r6,16; \ -.mem.offset 8,0; st8.spill [r3] = r7; \ - ;; \ - mov r26 = ar.unat; \ - ;; \ - st8 [r2] = r26;/* eml_unat */ \ - -#define KVM_SAVE_MIN_WITH_COVER KVM_DO_SAVE_MIN(cover, mov r30 = cr.ifs,) -#define KVM_SAVE_MIN_WITH_COVER_R19 KVM_DO_SAVE_MIN(cover, mov r30 = cr.ifs, mov r15 = r19) -#define KVM_SAVE_MIN KVM_DO_SAVE_MIN( , mov r30 = r0, ) diff --git a/arch/ia64/kvm/lapic.h b/arch/ia64/kvm/lapic.h deleted file mode 100644 index c5f92a926a9..00000000000 --- a/arch/ia64/kvm/lapic.h +++ /dev/null @@ -1,30 +0,0 @@ -#ifndef __KVM_IA64_LAPIC_H -#define __KVM_IA64_LAPIC_H - -#include <linux/kvm_host.h> - -/* - * vlsapic - */ -struct kvm_lapic{ - struct kvm_vcpu *vcpu; - uint64_t insvc[4]; - uint64_t vhpi; - uint8_t xtp; - uint8_t pal_init_pending; - uint8_t pad[2]; -}; - -int kvm_create_lapic(struct kvm_vcpu *vcpu); -void kvm_free_lapic(struct kvm_vcpu *vcpu); - -int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest); -int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda); -int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, - int short_hand, int dest, int dest_mode); -int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2); -int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq); -#define kvm_apic_present(x) (true) -#define kvm_lapic_enabled(x) (true) - -#endif diff --git a/arch/ia64/kvm/memcpy.S b/arch/ia64/kvm/memcpy.S deleted file mode 100644 index c04cdbe9f80..00000000000 --- a/arch/ia64/kvm/memcpy.S +++ /dev/null @@ -1 +0,0 @@ -#include "../lib/memcpy.S" diff --git a/arch/ia64/kvm/memset.S b/arch/ia64/kvm/memset.S deleted file mode 100644 index 83c3066d844..00000000000 --- a/arch/ia64/kvm/memset.S +++ /dev/null @@ -1 +0,0 @@ -#include "../lib/memset.S" diff --git a/arch/ia64/kvm/misc.h b/arch/ia64/kvm/misc.h deleted file mode 100644 index dd979e00b57..00000000000 --- a/arch/ia64/kvm/misc.h +++ /dev/null @@ -1,94 +0,0 @@ -#ifndef __KVM_IA64_MISC_H -#define __KVM_IA64_MISC_H - -#include <linux/kvm_host.h> -/* - * misc.h - * Copyright (C) 2007, Intel Corporation. - * Xiantao Zhang (xiantao.zhang@intel.com) - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple - * Place - Suite 330, Boston, MA 02111-1307 USA. - * - */ - -/* - *Return p2m base address at host side! - */ -static inline uint64_t *kvm_host_get_pmt(struct kvm *kvm) -{ - return (uint64_t *)(kvm->arch.vm_base + - offsetof(struct kvm_vm_data, kvm_p2m)); -} - -static inline void kvm_set_pmt_entry(struct kvm *kvm, gfn_t gfn, - u64 paddr, u64 mem_flags) -{ - uint64_t *pmt_base = kvm_host_get_pmt(kvm); - unsigned long pte; - - pte = PAGE_ALIGN(paddr) | mem_flags; - pmt_base[gfn] = pte; -} - -/*Function for translating host address to guest address*/ - -static inline void *to_guest(struct kvm *kvm, void *addr) -{ - return (void *)((unsigned long)(addr) - kvm->arch.vm_base + - KVM_VM_DATA_BASE); -} - -/*Function for translating guest address to host address*/ - -static inline void *to_host(struct kvm *kvm, void *addr) -{ - return (void *)((unsigned long)addr - KVM_VM_DATA_BASE - + kvm->arch.vm_base); -} - -/* Get host context of the vcpu */ -static inline union context *kvm_get_host_context(struct kvm_vcpu *vcpu) -{ - union context *ctx = &vcpu->arch.host; - return to_guest(vcpu->kvm, ctx); -} - -/* Get guest context of the vcpu */ -static inline union context *kvm_get_guest_context(struct kvm_vcpu *vcpu) -{ - union context *ctx = &vcpu->arch.guest; - return to_guest(vcpu->kvm, ctx); -} - -/* kvm get exit data from gvmm! */ -static inline struct exit_ctl_data *kvm_get_exit_data(struct kvm_vcpu *vcpu) -{ - return &vcpu->arch.exit_data; -} - -/*kvm get vcpu ioreq for kvm module!*/ -static inline struct kvm_mmio_req *kvm_get_vcpu_ioreq(struct kvm_vcpu *vcpu) -{ - struct exit_ctl_data *p_ctl_data; - - if (vcpu) { - p_ctl_data = kvm_get_exit_data(vcpu); - if (p_ctl_data->exit_reason == EXIT_REASON_MMIO_INSTRUCTION) - return &p_ctl_data->u.ioreq; - } - - return NULL; -} - -#endif diff --git a/arch/ia64/kvm/mmio.c b/arch/ia64/kvm/mmio.c deleted file mode 100644 index f1e17d3d6cd..00000000000 --- a/arch/ia64/kvm/mmio.c +++ /dev/null @@ -1,336 +0,0 @@ -/* - * mmio.c: MMIO emulation components. - * Copyright (c) 2004, Intel Corporation. - * Yaozu Dong (Eddie Dong) (Eddie.dong@intel.com) - * Kun Tian (Kevin Tian) (Kevin.tian@intel.com) - * - * Copyright (c) 2007 Intel Corporation KVM support. - * Xuefei Xu (Anthony Xu) (anthony.xu@intel.com) - * Xiantao Zhang (xiantao.zhang@intel.com) - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple - * Place - Suite 330, Boston, MA 02111-1307 USA. - * - */ - -#include <linux/kvm_host.h> - -#include "vcpu.h" - -static void vlsapic_write_xtp(struct kvm_vcpu *v, uint8_t val) -{ - VLSAPIC_XTP(v) = val; -} - -/* - * LSAPIC OFFSET - */ -#define PIB_LOW_HALF(ofst) !(ofst & (1 << 20)) -#define PIB_OFST_INTA 0x1E0000 -#define PIB_OFST_XTP 0x1E0008 - -/* - * execute write IPI op. - */ -static void vlsapic_write_ipi(struct kvm_vcpu *vcpu, - uint64_t addr, uint64_t data) -{ - struct exit_ctl_data *p = ¤t_vcpu->arch.exit_data; - unsigned long psr; - - local_irq_save(psr); - - p->exit_reason = EXIT_REASON_IPI; - p->u.ipi_data.addr.val = addr; - p->u.ipi_data.data.val = data; - vmm_transition(current_vcpu); - - local_irq_restore(psr); - -} - -void lsapic_write(struct kvm_vcpu *v, unsigned long addr, - unsigned long length, unsigned long val) -{ - addr &= (PIB_SIZE - 1); - - switch (addr) { - case PIB_OFST_INTA: - panic_vm(v, "Undefined write on PIB INTA\n"); - break; - case PIB_OFST_XTP: - if (length == 1) { - vlsapic_write_xtp(v, val); - } else { - panic_vm(v, "Undefined write on PIB XTP\n"); - } - break; - default: - if (PIB_LOW_HALF(addr)) { - /*Lower half */ - if (length != 8) - panic_vm(v, "Can't LHF write with size %ld!\n", - length); - else - vlsapic_write_ipi(v, addr, val); - } else { /*Upper half */ - panic_vm(v, "IPI-UHF write %lx\n", addr); - } - break; - } -} - -unsigned long lsapic_read(struct kvm_vcpu *v, unsigned long addr, - unsigned long length) -{ - uint64_t result = 0; - - addr &= (PIB_SIZE - 1); - - switch (addr) { - case PIB_OFST_INTA: - if (length == 1) /* 1 byte load */ - ; /* There is no i8259, there is no INTA access*/ - else - panic_vm(v, "Undefined read on PIB INTA\n"); - - break; - case PIB_OFST_XTP: - if (length == 1) { - result = VLSAPIC_XTP(v); - } else { - panic_vm(v, "Undefined read on PIB XTP\n"); - } - break; - default: - panic_vm(v, "Undefined addr access for lsapic!\n"); - break; - } - return result; -} - -static void mmio_access(struct kvm_vcpu *vcpu, u64 src_pa, u64 *dest, - u16 s, int ma, int dir) -{ - unsigned long iot; - struct exit_ctl_data *p = &vcpu->arch.exit_data; - unsigned long psr; - - iot = __gpfn_is_io(src_pa >> PAGE_SHIFT); - - local_irq_save(psr); - - /*Intercept the access for PIB range*/ - if (iot == GPFN_PIB) { - if (!dir) - lsapic_write(vcpu, src_pa, s, *dest); - else - *dest = lsapic_read(vcpu, src_pa, s); - goto out; - } - p->exit_reason = EXIT_REASON_MMIO_INSTRUCTION; - p->u.ioreq.addr = src_pa; - p->u.ioreq.size = s; - p->u.ioreq.dir = dir; - if (dir == IOREQ_WRITE) - p->u.ioreq.data = *dest; - p->u.ioreq.state = STATE_IOREQ_READY; - vmm_transition(vcpu); - - if (p->u.ioreq.state == STATE_IORESP_READY) { - if (dir == IOREQ_READ) - /* it's necessary to ensure zero extending */ - *dest = p->u.ioreq.data & (~0UL >> (64-(s*8))); - } else - panic_vm(vcpu, "Unhandled mmio access returned!\n"); -out: - local_irq_restore(psr); - return ; -} - -/* - dir 1: read 0:write - inst_type 0:integer 1:floating point - */ -#define SL_INTEGER 0 /* store/load interger*/ -#define SL_FLOATING 1 /* store/load floating*/ - -void emulate_io_inst(struct kvm_vcpu *vcpu, u64 padr, u64 ma) -{ - struct kvm_pt_regs *regs; - IA64_BUNDLE bundle; - int slot, dir = 0; - int inst_type = -1; - u16 size = 0; - u64 data, slot1a, slot1b, temp, update_reg; - s32 imm; - INST64 inst; - - regs = vcpu_regs(vcpu); - - if (fetch_code(vcpu, regs->cr_iip, &bundle)) { - /* if fetch code fail, return and try again */ - return; - } - slot = ((struct ia64_psr *)&(regs->cr_ipsr))->ri; - if (!slot) - inst.inst = bundle.slot0; - else if (slot == 1) { - slot1a = bundle.slot1a; - slot1b = bundle.slot1b; - inst.inst = slot1a + (slot1b << 18); - } else if (slot == 2) - inst.inst = bundle.slot2; - - /* Integer Load/Store */ - if (inst.M1.major == 4 && inst.M1.m == 0 && inst.M1.x == 0) { - inst_type = SL_INTEGER; - size = (inst.M1.x6 & 0x3); - if ((inst.M1.x6 >> 2) > 0xb) { - /*write*/ - dir = IOREQ_WRITE; - data = vcpu_get_gr(vcpu, inst.M4.r2); - } else if ((inst.M1.x6 >> 2) < 0xb) { - /*read*/ - dir = IOREQ_READ; - } - } else if (inst.M2.major == 4 && inst.M2.m == 1 && inst.M2.x == 0) { - /* Integer Load + Reg update */ - inst_type = SL_INTEGER; - dir = IOREQ_READ; - size = (inst.M2.x6 & 0x3); - temp = vcpu_get_gr(vcpu, inst.M2.r3); - update_reg = vcpu_get_gr(vcpu, inst.M2.r2); - temp += update_reg; - vcpu_set_gr(vcpu, inst.M2.r3, temp, 0); - } else if (inst.M3.major == 5) { - /*Integer Load/Store + Imm update*/ - inst_type = SL_INTEGER; - size = (inst.M3.x6&0x3); - if ((inst.M5.x6 >> 2) > 0xb) { - /*write*/ - dir = IOREQ_WRITE; - data = vcpu_get_gr(vcpu, inst.M5.r2); - temp = vcpu_get_gr(vcpu, inst.M5.r3); - imm = (inst.M5.s << 31) | (inst.M5.i << 30) | - (inst.M5.imm7 << 23); - temp += imm >> 23; - vcpu_set_gr(vcpu, inst.M5.r3, temp, 0); - - } else if ((inst.M3.x6 >> 2) < 0xb) { - /*read*/ - dir = IOREQ_READ; - temp = vcpu_get_gr(vcpu, inst.M3.r3); - imm = (inst.M3.s << 31) | (inst.M3.i << 30) | - (inst.M3.imm7 << 23); - temp += imm >> 23; - vcpu_set_gr(vcpu, inst.M3.r3, temp, 0); - - } - } else if (inst.M9.major == 6 && inst.M9.x6 == 0x3B - && inst.M9.m == 0 && inst.M9.x == 0) { - /* Floating-point spill*/ - struct ia64_fpreg v; - - inst_type = SL_FLOATING; - dir = IOREQ_WRITE; - vcpu_get_fpreg(vcpu, inst.M9.f2, &v); - /* Write high word. FIXME: this is a kludge! */ - v.u.bits[1] &= 0x3ffff; - mmio_access(vcpu, padr + 8, (u64 *)&v.u.bits[1], 8, - ma, IOREQ_WRITE); - data = v.u.bits[0]; - size = 3; - } else if (inst.M10.major == 7 && inst.M10.x6 == 0x3B) { - /* Floating-point spill + Imm update */ - struct ia64_fpreg v; - - inst_type = SL_FLOATING; - dir = IOREQ_WRITE; - vcpu_get_fpreg(vcpu, inst.M10.f2, &v); - temp = vcpu_get_gr(vcpu, inst.M10.r3); - imm = (inst.M10.s << 31) | (inst.M10.i << 30) | - (inst.M10.imm7 << 23); - temp += imm >> 23; - vcpu_set_gr(vcpu, inst.M10.r3, temp, 0); - - /* Write high word.FIXME: this is a kludge! */ - v.u.bits[1] &= 0x3ffff; - mmio_access(vcpu, padr + 8, (u64 *)&v.u.bits[1], - 8, ma, IOREQ_WRITE); - data = v.u.bits[0]; - size = 3; - } else if (inst.M10.major == 7 && inst.M10.x6 == 0x31) { - /* Floating-point stf8 + Imm update */ - struct ia64_fpreg v; - inst_type = SL_FLOATING; - dir = IOREQ_WRITE; - size = 3; - vcpu_get_fpreg(vcpu, inst.M10.f2, &v); - data = v.u.bits[0]; /* Significand. */ - temp = vcpu_get_gr(vcpu, inst.M10.r3); - imm = (inst.M10.s << 31) | (inst.M10.i << 30) | - (inst.M10.imm7 << 23); - temp += imm >> 23; - vcpu_set_gr(vcpu, inst.M10.r3, temp, 0); - } else if (inst.M15.major == 7 && inst.M15.x6 >= 0x2c - && inst.M15.x6 <= 0x2f) { - temp = vcpu_get_gr(vcpu, inst.M15.r3); - imm = (inst.M15.s << 31) | (inst.M15.i << 30) | - (inst.M15.imm7 << 23); - temp += imm >> 23; - vcpu_set_gr(vcpu, inst.M15.r3, temp, 0); - - vcpu_increment_iip(vcpu); - return; - } else if (inst.M12.major == 6 && inst.M12.m == 1 - && inst.M12.x == 1 && inst.M12.x6 == 1) { - /* Floating-point Load Pair + Imm ldfp8 M12*/ - struct ia64_fpreg v; - - inst_type = SL_FLOATING; - dir = IOREQ_READ; - size = 8; /*ldfd*/ - mmio_access(vcpu, padr, &data, size, ma, dir); - v.u.bits[0] = data; - v.u.bits[1] = 0x1003E; - vcpu_set_fpreg(vcpu, inst.M12.f1, &v); - padr += 8; - mmio_access(vcpu, padr, &data, size, ma, dir); - v.u.bits[0] = data; - v.u.bits[1] = 0x1003E; - vcpu_set_fpreg(vcpu, inst.M12.f2, &v); - padr += 8; - vcpu_set_gr(vcpu, inst.M12.r3, padr, 0); - vcpu_increment_iip(vcpu); - return; - } else { - inst_type = -1; - panic_vm(vcpu, "Unsupported MMIO access instruction! " - "Bunld[0]=0x%lx, Bundle[1]=0x%lx\n", - bundle.i64[0], bundle.i64[1]); - } - - size = 1 << size; - if (dir == IOREQ_WRITE) { - mmio_access(vcpu, padr, &data, size, ma, dir); - } else { - mmio_access(vcpu, padr, &data, size, ma, dir); - if (inst_type == SL_INTEGER) - vcpu_set_gr(vcpu, inst.M1.r1, data, 0); - else - panic_vm(vcpu, "Unsupported instruction type!\n"); - - } - vcpu_increment_iip(vcpu); -} diff --git a/arch/ia64/kvm/optvfault.S b/arch/ia64/kvm/optvfault.S deleted file mode 100644 index f793be3efff..00000000000 --- a/arch/ia64/kvm/optvfault.S +++ /dev/null @@ -1,1090 +0,0 @@ -/* - * arch/ia64/kvm/optvfault.S - * optimize virtualization fault handler - * - * Copyright (C) 2006 Intel Co - * Xuefei Xu (Anthony Xu) <anthony.xu@intel.com> - * Copyright (C) 2008 Intel Co - * Add the support for Tukwila processors. - * Xiantao Zhang <xiantao.zhang@intel.com> - */ - -#include <asm/asmmacro.h> -#include <asm/processor.h> -#include <asm/kvm_host.h> - -#include "vti.h" -#include "asm-offsets.h" - -#define ACCE_MOV_FROM_AR -#define ACCE_MOV_FROM_RR -#define ACCE_MOV_TO_RR -#define ACCE_RSM -#define ACCE_SSM -#define ACCE_MOV_TO_PSR -#define ACCE_THASH - -#define VMX_VPS_SYNC_READ \ - add r16=VMM_VPD_BASE_OFFSET,r21; \ - mov r17 = b0; \ - mov r18 = r24; \ - mov r19 = r25; \ - mov r20 = r31; \ - ;; \ -{.mii; \ - ld8 r16 = [r16]; \ - nop 0x0; \ - mov r24 = ip; \ - ;; \ -}; \ -{.mmb; \ - add r24=0x20, r24; \ - mov r25 =r16; \ - br.sptk.many kvm_vps_sync_read; \ -}; \ - mov b0 = r17; \ - mov r24 = r18; \ - mov r25 = r19; \ - mov r31 = r20 - -ENTRY(kvm_vps_entry) - adds r29 = VMM_VCPU_VSA_BASE_OFFSET,r21 - ;; - ld8 r29 = [r29] - ;; - add r29 = r29, r30 - ;; - mov b0 = r29 - br.sptk.many b0 -END(kvm_vps_entry) - -/* - * Inputs: - * r24 : return address - * r25 : vpd - * r29 : scratch - * - */ -GLOBAL_ENTRY(kvm_vps_sync_read) - movl r30 = PAL_VPS_SYNC_READ - ;; - br.sptk.many kvm_vps_entry -END(kvm_vps_sync_read) - -/* - * Inputs: - * r24 : return address - * r25 : vpd - * r29 : scratch - * - */ -GLOBAL_ENTRY(kvm_vps_sync_write) - movl r30 = PAL_VPS_SYNC_WRITE - ;; - br.sptk.many kvm_vps_entry -END(kvm_vps_sync_write) - -/* - * Inputs: - * r23 : pr - * r24 : guest b0 - * r25 : vpd - * - */ -GLOBAL_ENTRY(kvm_vps_resume_normal) - movl r30 = PAL_VPS_RESUME_NORMAL - ;; - mov pr=r23,-2 - br.sptk.many kvm_vps_entry -END(kvm_vps_resume_normal) - -/* - * Inputs: - * r23 : pr - * r24 : guest b0 - * r25 : vpd - * r17 : isr - */ -GLOBAL_ENTRY(kvm_vps_resume_handler) - movl r30 = PAL_VPS_RESUME_HANDLER - ;; - ld8 r26=[r25] - shr r17=r17,IA64_ISR_IR_BIT - ;; - dep r26=r17,r26,63,1 // bit 63 of r26 indicate whether enable CFLE - mov pr=r23,-2 - br.sptk.many kvm_vps_entry -END(kvm_vps_resume_handler) - -//mov r1=ar3 -GLOBAL_ENTRY(kvm_asm_mov_from_ar) -#ifndef ACCE_MOV_FROM_AR - br.many kvm_virtualization_fault_back -#endif - add r18=VMM_VCPU_ITC_OFS_OFFSET, r21 - add r16=VMM_VCPU_LAST_ITC_OFFSET,r21 - extr.u r17=r25,6,7 - ;; - ld8 r18=[r18] - mov r19=ar.itc - mov r24=b0 - ;; - add r19=r19,r18 - addl r20=@gprel(asm_mov_to_reg),gp - ;; - st8 [r16] = r19 - adds r30=kvm_resume_to_guest-asm_mov_to_reg,r20 - shladd r17=r17,4,r20 - ;; - mov b0=r17 - br.sptk.few b0 - ;; -END(kvm_asm_mov_from_ar) - -/* - * Special SGI SN2 optimized version of mov_from_ar using the SN2 RTC - * clock as it's source for emulating the ITC. This version will be - * copied on top of the original version if the host is determined to - * be an SN2. - */ -GLOBAL_ENTRY(kvm_asm_mov_from_ar_sn2) - add r18=VMM_VCPU_ITC_OFS_OFFSET, r21 - movl r19 = (KVM_VMM_BASE+(1<<KVM_VMM_SHIFT)) - - add r16=VMM_VCPU_LAST_ITC_OFFSET,r21 - extr.u r17=r25,6,7 - mov r24=b0 - ;; - ld8 r18=[r18] - ld8 r19=[r19] - addl r20=@gprel(asm_mov_to_reg),gp - ;; - add r19=r19,r18 - shladd r17=r17,4,r20 - ;; - adds r30=kvm_resume_to_guest-asm_mov_to_reg,r20 - st8 [r16] = r19 - mov b0=r17 - br.sptk.few b0 - ;; -END(kvm_asm_mov_from_ar_sn2) - - - -// mov r1=rr[r3] -GLOBAL_ENTRY(kvm_asm_mov_from_rr) -#ifndef ACCE_MOV_FROM_RR - br.many kvm_virtualization_fault_back -#endif - extr.u r16=r25,20,7 - extr.u r17=r25,6,7 - addl r20=@gprel(asm_mov_from_reg),gp - ;; - adds r30=kvm_asm_mov_from_rr_back_1-asm_mov_from_reg,r20 - shladd r16=r16,4,r20 - mov r24=b0 - ;; - add r27=VMM_VCPU_VRR0_OFFSET,r21 - mov b0=r16 - br.many b0 - ;; -kvm_asm_mov_from_rr_back_1: - adds r30=kvm_resume_to_guest-asm_mov_from_reg,r20 - adds r22=asm_mov_to_reg-asm_mov_from_reg,r20 - shr.u r26=r19,61 - ;; - shladd r17=r17,4,r22 - shladd r27=r26,3,r27 - ;; - ld8 r19=[r27] - mov b0=r17 - br.many b0 -END(kvm_asm_mov_from_rr) - - -// mov rr[r3]=r2 -GLOBAL_ENTRY(kvm_asm_mov_to_rr) -#ifndef ACCE_MOV_TO_RR - br.many kvm_virtualization_fault_back -#endif - extr.u r16=r25,20,7 - extr.u r17=r25,13,7 - addl r20=@gprel(asm_mov_from_reg),gp - ;; - adds r30=kvm_asm_mov_to_rr_back_1-asm_mov_from_reg,r20 - shladd r16=r16,4,r20 - mov r22=b0 - ;; - add r27=VMM_VCPU_VRR0_OFFSET,r21 - mov b0=r16 - br.many b0 - ;; -kvm_asm_mov_to_rr_back_1: - adds r30=kvm_asm_mov_to_rr_back_2-asm_mov_from_reg,r20 - shr.u r23=r19,61 - shladd r17=r17,4,r20 - ;; - //if rr6, go back - cmp.eq p6,p0=6,r23 - mov b0=r22 - (p6) br.cond.dpnt.many kvm_virtualization_fault_back - ;; - mov r28=r19 - mov b0=r17 - br.many b0 -kvm_asm_mov_to_rr_back_2: - adds r30=kvm_resume_to_guest-asm_mov_from_reg,r20 - shladd r27=r23,3,r27 - ;; // vrr.rid<<4 |0xe - st8 [r27]=r19 - mov b0=r30 - ;; - extr.u r16=r19,8,26 - extr.u r18 =r19,2,6 - mov r17 =0xe - ;; - shladd r16 = r16, 4, r17 - extr.u r19 =r19,0,8 - ;; - shl r16 = r16,8 - ;; - add r19 = r19, r16 - ;; //set ve 1 - dep r19=-1,r19,0,1 - cmp.lt p6,p0=14,r18 - ;; - (p6) mov r18=14 - ;; - (p6) dep r19=r18,r19,2,6 - ;; - cmp.eq p6,p0=0,r23 - ;; - cmp.eq.or p6,p0=4,r23 - ;; - adds r16=VMM_VCPU_MODE_FLAGS_OFFSET,r21 - (p6) adds r17=VMM_VCPU_META_SAVED_RR0_OFFSET,r21 - ;; - ld4 r16=[r16] - cmp.eq p7,p0=r0,r0 - (p6) shladd r17=r23,1,r17 - ;; - (p6) st8 [r17]=r19 - (p6) tbit.nz p6,p7=r16,0 - ;; - (p7) mov rr[r28]=r19 - mov r24=r22 - br.many b0 -END(kvm_asm_mov_to_rr) - - -//rsm -GLOBAL_ENTRY(kvm_asm_rsm) -#ifndef ACCE_RSM - br.many kvm_virtualization_fault_back -#endif - VMX_VPS_SYNC_READ - ;; - extr.u r26=r25,6,21 - extr.u r27=r25,31,2 - ;; - extr.u r28=r25,36,1 - dep r26=r27,r26,21,2 - ;; - add r17=VPD_VPSR_START_OFFSET,r16 - add r22=VMM_VCPU_MODE_FLAGS_OFFSET,r21 - //r26 is imm24 - dep r26=r28,r26,23,1 - ;; - ld8 r18=[r17] - movl r28=IA64_PSR_IC+IA64_PSR_I+IA64_PSR_DT+IA64_PSR_SI - ld4 r23=[r22] - sub r27=-1,r26 - mov r24=b0 - ;; - mov r20=cr.ipsr - or r28=r27,r28 - and r19=r18,r27 - ;; - st8 [r17]=r19 - and r20=r20,r28 - /* Comment it out due to short of fp lazy alorgithm support - adds r27=IA64_VCPU_FP_PSR_OFFSET,r21 - ;; - ld8 r27=[r27] - ;; - tbit.nz p8,p0= r27,IA64_PSR_DFH_BIT - ;; - (p8) dep r20=-1,r20,IA64_PSR_DFH_BIT,1 - */ - ;; - mov cr.ipsr=r20 - tbit.nz p6,p0=r23,0 - ;; - tbit.z.or p6,p0=r26,IA64_PSR_DT_BIT - (p6) br.dptk kvm_resume_to_guest_with_sync - ;; - add r26=VMM_VCPU_META_RR0_OFFSET,r21 - add r27=VMM_VCPU_META_RR0_OFFSET+8,r21 - dep r23=-1,r23,0,1 - ;; - ld8 r26=[r26] - ld8 r27=[r27] - st4 [r22]=r23 - dep.z r28=4,61,3 - ;; - mov rr[r0]=r26 - ;; - mov rr[r28]=r27 - ;; - srlz.d - br.many kvm_resume_to_guest_with_sync -END(kvm_asm_rsm) - - -//ssm -GLOBAL_ENTRY(kvm_asm_ssm) -#ifndef ACCE_SSM - br.many kvm_virtualization_fault_back -#endif - VMX_VPS_SYNC_READ - ;; - extr.u r26=r25,6,21 - extr.u r27=r25,31,2 - ;; - extr.u r28=r25,36,1 - dep r26=r27,r26,21,2 - ;; //r26 is imm24 - add r27=VPD_VPSR_START_OFFSET,r16 - dep r26=r28,r26,23,1 - ;; //r19 vpsr - ld8 r29=[r27] - mov r24=b0 - ;; - add r22=VMM_VCPU_MODE_FLAGS_OFFSET,r21 - mov r20=cr.ipsr - or r19=r29,r26 - ;; - ld4 r23=[r22] - st8 [r27]=r19 - or r20=r20,r26 - ;; - mov cr.ipsr=r20 - movl r28=IA64_PSR_DT+IA64_PSR_RT+IA64_PSR_IT - ;; - and r19=r28,r19 - tbit.z p6,p0=r23,0 - ;; - cmp.ne.or p6,p0=r28,r19 - (p6) br.dptk kvm_asm_ssm_1 - ;; - add r26=VMM_VCPU_META_SAVED_RR0_OFFSET,r21 - add r27=VMM_VCPU_META_SAVED_RR0_OFFSET+8,r21 - dep r23=0,r23,0,1 - ;; - ld8 r26=[r26] - ld8 r27=[r27] - st4 [r22]=r23 - dep.z r28=4,61,3 - ;; - mov rr[r0]=r26 - ;; - mov rr[r28]=r27 - ;; - srlz.d - ;; -kvm_asm_ssm_1: - tbit.nz p6,p0=r29,IA64_PSR_I_BIT - ;; - tbit.z.or p6,p0=r19,IA64_PSR_I_BIT - (p6) br.dptk kvm_resume_to_guest_with_sync - ;; - add r29=VPD_VTPR_START_OFFSET,r16 - add r30=VPD_VHPI_START_OFFSET,r16 - ;; - ld8 r29=[r29] - ld8 r30=[r30] - ;; - extr.u r17=r29,4,4 - extr.u r18=r29,16,1 - ;; - dep r17=r18,r17,4,1 - ;; - cmp.gt p6,p0=r30,r17 - (p6) br.dpnt.few kvm_asm_dispatch_vexirq - br.many kvm_resume_to_guest_with_sync -END(kvm_asm_ssm) - - -//mov psr.l=r2 -GLOBAL_ENTRY(kvm_asm_mov_to_psr) -#ifndef ACCE_MOV_TO_PSR - br.many kvm_virtualization_fault_back -#endif - VMX_VPS_SYNC_READ - ;; - extr.u r26=r25,13,7 //r2 - addl r20=@gprel(asm_mov_from_reg),gp - ;; - adds r30=kvm_asm_mov_to_psr_back-asm_mov_from_reg,r20 - shladd r26=r26,4,r20 - mov r24=b0 - ;; - add r27=VPD_VPSR_START_OFFSET,r16 - mov b0=r26 - br.many b0 - ;; -kvm_asm_mov_to_psr_back: - ld8 r17=[r27] - add r22=VMM_VCPU_MODE_FLAGS_OFFSET,r21 - dep r19=0,r19,32,32 - ;; - ld4 r23=[r22] - dep r18=0,r17,0,32 - ;; - add r30=r18,r19 - movl r28=IA64_PSR_DT+IA64_PSR_RT+IA64_PSR_IT - ;; - st8 [r27]=r30 - and r27=r28,r30 - and r29=r28,r17 - ;; - cmp.eq p5,p0=r29,r27 - cmp.eq p6,p7=r28,r27 - (p5) br.many kvm_asm_mov_to_psr_1 - ;; - //virtual to physical - (p7) add r26=VMM_VCPU_META_RR0_OFFSET,r21 - (p7) add r27=VMM_VCPU_META_RR0_OFFSET+8,r21 - (p7) dep r23=-1,r23,0,1 - ;; - //physical to virtual - (p6) add r26=VMM_VCPU_META_SAVED_RR0_OFFSET,r21 - (p6) add r27=VMM_VCPU_META_SAVED_RR0_OFFSET+8,r21 - (p6) dep r23=0,r23,0,1 - ;; - ld8 r26=[r26] - ld8 r27=[r27] - st4 [r22]=r23 - dep.z r28=4,61,3 - ;; - mov rr[r0]=r26 - ;; - mov rr[r28]=r27 - ;; - srlz.d - ;; -kvm_asm_mov_to_psr_1: - mov r20=cr.ipsr - movl r28=IA64_PSR_IC+IA64_PSR_I+IA64_PSR_DT+IA64_PSR_SI+IA64_PSR_RT - ;; - or r19=r19,r28 - dep r20=0,r20,0,32 - ;; - add r20=r19,r20 - mov b0=r24 - ;; - /* Comment it out due to short of fp lazy algorithm support - adds r27=IA64_VCPU_FP_PSR_OFFSET,r21 - ;; - ld8 r27=[r27] - ;; - tbit.nz p8,p0=r27,IA64_PSR_DFH_BIT - ;; - (p8) dep r20=-1,r20,IA64_PSR_DFH_BIT,1 - ;; - */ - mov cr.ipsr=r20 - cmp.ne p6,p0=r0,r0 - ;; - tbit.nz.or p6,p0=r17,IA64_PSR_I_BIT - tbit.z.or p6,p0=r30,IA64_PSR_I_BIT - (p6) br.dpnt.few kvm_resume_to_guest_with_sync - ;; - add r29=VPD_VTPR_START_OFFSET,r16 - add r30=VPD_VHPI_START_OFFSET,r16 - ;; - ld8 r29=[r29] - ld8 r30=[r30] - ;; - extr.u r17=r29,4,4 - extr.u r18=r29,16,1 - ;; - dep r17=r18,r17,4,1 - ;; - cmp.gt p6,p0=r30,r17 - (p6) br.dpnt.few kvm_asm_dispatch_vexirq - br.many kvm_resume_to_guest_with_sync -END(kvm_asm_mov_to_psr) - - -ENTRY(kvm_asm_dispatch_vexirq) -//increment iip - mov r17 = b0 - mov r18 = r31 -{.mii - add r25=VMM_VPD_BASE_OFFSET,r21 - nop 0x0 - mov r24 = ip - ;; -} -{.mmb - add r24 = 0x20, r24 - ld8 r25 = [r25] - br.sptk.many kvm_vps_sync_write -} - mov b0 =r17 - mov r16=cr.ipsr - mov r31 = r18 - mov r19 = 37 - ;; - extr.u r17=r16,IA64_PSR_RI_BIT,2 - tbit.nz p6,p7=r16,IA64_PSR_RI_BIT+1 - ;; - (p6) mov r18=cr.iip - (p6) mov r17=r0 - (p7) add r17=1,r17 - ;; - (p6) add r18=0x10,r18 - dep r16=r17,r16,IA64_PSR_RI_BIT,2 - ;; - (p6) mov cr.iip=r18 - mov cr.ipsr=r16 - mov r30 =1 - br.many kvm_dispatch_vexirq -END(kvm_asm_dispatch_vexirq) - -// thash -// TODO: add support when pta.vf = 1 -GLOBAL_ENTRY(kvm_asm_thash) -#ifndef ACCE_THASH - br.many kvm_virtualization_fault_back -#endif - extr.u r17=r25,20,7 // get r3 from opcode in r25 - extr.u r18=r25,6,7 // get r1 from opcode in r25 - addl r20=@gprel(asm_mov_from_reg),gp - ;; - adds r30=kvm_asm_thash_back1-asm_mov_from_reg,r20 - shladd r17=r17,4,r20 // get addr of MOVE_FROM_REG(r17) - adds r16=VMM_VPD_BASE_OFFSET,r21 // get vcpu.arch.priveregs - ;; - mov r24=b0 - ;; - ld8 r16=[r16] // get VPD addr - mov b0=r17 - br.many b0 // r19 return value - ;; -kvm_asm_thash_back1: - shr.u r23=r19,61 // get RR number - adds r28=VMM_VCPU_VRR0_OFFSET,r21 // get vcpu->arch.vrr[0]'s addr - adds r16=VMM_VPD_VPTA_OFFSET,r16 // get vpta - ;; - shladd r27=r23,3,r28 // get vcpu->arch.vrr[r23]'s addr - ld8 r17=[r16] // get PTA - mov r26=1 - ;; - extr.u r29=r17,2,6 // get pta.size - ld8 r28=[r27] // get vcpu->arch.vrr[r23]'s value - ;; - mov b0=r24 - //Fallback to C if pta.vf is set - tbit.nz p6,p0=r17, 8 - ;; - (p6) mov r24=EVENT_THASH - (p6) br.cond.dpnt.many kvm_virtualization_fault_back - extr.u r28=r28,2,6 // get rr.ps - shl r22=r26,r29 // 1UL << pta.size - ;; - shr.u r23=r19,r28 // vaddr >> rr.ps - adds r26=3,r29 // pta.size + 3 - shl r27=r17,3 // pta << 3 - ;; - shl r23=r23,3 // (vaddr >> rr.ps) << 3 - shr.u r27=r27,r26 // (pta << 3) >> (pta.size+3) - movl r16=7<<61 - ;; - adds r22=-1,r22 // (1UL << pta.size) - 1 - shl r27=r27,r29 // ((pta<<3)>>(pta.size+3))<<pta.size - and r19=r19,r16 // vaddr & VRN_MASK - ;; - and r22=r22,r23 // vhpt_offset - or r19=r19,r27 // (vadr&VRN_MASK)|(((pta<<3)>>(pta.size + 3))<<pta.size) - adds r26=asm_mov_to_reg-asm_mov_from_reg,r20 - ;; - or r19=r19,r22 // calc pval - shladd r17=r18,4,r26 - adds r30=kvm_resume_to_guest-asm_mov_from_reg,r20 - ;; - mov b0=r17 - br.many b0 -END(kvm_asm_thash) - -#define MOV_TO_REG0 \ -{; \ - nop.b 0x0; \ - nop.b 0x0; \ - nop.b 0x0; \ - ;; \ -}; - - -#define MOV_TO_REG(n) \ -{; \ - mov r##n##=r19; \ - mov b0=r30; \ - br.sptk.many b0; \ - ;; \ -}; - - -#define MOV_FROM_REG(n) \ -{; \ - mov r19=r##n##; \ - mov b0=r30; \ - br.sptk.many b0; \ - ;; \ -}; - - -#define MOV_TO_BANK0_REG(n) \ -ENTRY_MIN_ALIGN(asm_mov_to_bank0_reg##n##); \ -{; \ - mov r26=r2; \ - mov r2=r19; \ - bsw.1; \ - ;; \ -}; \ -{; \ - mov r##n##=r2; \ - nop.b 0x0; \ - bsw.0; \ - ;; \ -}; \ -{; \ - mov r2=r26; \ - mov b0=r30; \ - br.sptk.many b0; \ - ;; \ -}; \ -END(asm_mov_to_bank0_reg##n##) - - -#define MOV_FROM_BANK0_REG(n) \ -ENTRY_MIN_ALIGN(asm_mov_from_bank0_reg##n##); \ -{; \ - mov r26=r2; \ - nop.b 0x0; \ - bsw.1; \ - ;; \ -}; \ -{; \ - mov r2=r##n##; \ - nop.b 0x0; \ - bsw.0; \ - ;; \ -}; \ -{; \ - mov r19=r2; \ - mov r2=r26; \ - mov b0=r30; \ -}; \ -{; \ - nop.b 0x0; \ - nop.b 0x0; \ - br.sptk.many b0; \ - ;; \ -}; \ -END(asm_mov_from_bank0_reg##n##) - - -#define JMP_TO_MOV_TO_BANK0_REG(n) \ -{; \ - nop.b 0x0; \ - nop.b 0x0; \ - br.sptk.many asm_mov_to_bank0_reg##n##; \ - ;; \ -} - - -#define JMP_TO_MOV_FROM_BANK0_REG(n) \ -{; \ - nop.b 0x0; \ - nop.b 0x0; \ - br.sptk.many asm_mov_from_bank0_reg##n##; \ - ;; \ -} - - -MOV_FROM_BANK0_REG(16) -MOV_FROM_BANK0_REG(17) -MOV_FROM_BANK0_REG(18) -MOV_FROM_BANK0_REG(19) -MOV_FROM_BANK0_REG(20) -MOV_FROM_BANK0_REG(21) -MOV_FROM_BANK0_REG(22) -MOV_FROM_BANK0_REG(23) -MOV_FROM_BANK0_REG(24) -MOV_FROM_BANK0_REG(25) -MOV_FROM_BANK0_REG(26) -MOV_FROM_BANK0_REG(27) -MOV_FROM_BANK0_REG(28) -MOV_FROM_BANK0_REG(29) -MOV_FROM_BANK0_REG(30) -MOV_FROM_BANK0_REG(31) - - -// mov from reg table -ENTRY(asm_mov_from_reg) - MOV_FROM_REG(0) - MOV_FROM_REG(1) - MOV_FROM_REG(2) - MOV_FROM_REG(3) - MOV_FROM_REG(4) - MOV_FROM_REG(5) - MOV_FROM_REG(6) - MOV_FROM_REG(7) - MOV_FROM_REG(8) - MOV_FROM_REG(9) - MOV_FROM_REG(10) - MOV_FROM_REG(11) - MOV_FROM_REG(12) - MOV_FROM_REG(13) - MOV_FROM_REG(14) - MOV_FROM_REG(15) - JMP_TO_MOV_FROM_BANK0_REG(16) - JMP_TO_MOV_FROM_BANK0_REG(17) - JMP_TO_MOV_FROM_BANK0_REG(18) - JMP_TO_MOV_FROM_BANK0_REG(19) - JMP_TO_MOV_FROM_BANK0_REG(20) - JMP_TO_MOV_FROM_BANK0_REG(21) - JMP_TO_MOV_FROM_BANK0_REG(22) - JMP_TO_MOV_FROM_BANK0_REG(23) - JMP_TO_MOV_FROM_BANK0_REG(24) - JMP_TO_MOV_FROM_BANK0_REG(25) - JMP_TO_MOV_FROM_BANK0_REG(26) - JMP_TO_MOV_FROM_BANK0_REG(27) - JMP_TO_MOV_FROM_BANK0_REG(28) - JMP_TO_MOV_FROM_BANK0_REG(29) - JMP_TO_MOV_FROM_BANK0_REG(30) - JMP_TO_MOV_FROM_BANK0_REG(31) - MOV_FROM_REG(32) - MOV_FROM_REG(33) - MOV_FROM_REG(34) - MOV_FROM_REG(35) - MOV_FROM_REG(36) - MOV_FROM_REG(37) - MOV_FROM_REG(38) - MOV_FROM_REG(39) - MOV_FROM_REG(40) - MOV_FROM_REG(41) - MOV_FROM_REG(42) - MOV_FROM_REG(43) - MOV_FROM_REG(44) - MOV_FROM_REG(45) - MOV_FROM_REG(46) - MOV_FROM_REG(47) - MOV_FROM_REG(48) - MOV_FROM_REG(49) - MOV_FROM_REG(50) - MOV_FROM_REG(51) - MOV_FROM_REG(52) - MOV_FROM_REG(53) - MOV_FROM_REG(54) - MOV_FROM_REG(55) - MOV_FROM_REG(56) - MOV_FROM_REG(57) - MOV_FROM_REG(58) - MOV_FROM_REG(59) - MOV_FROM_REG(60) - MOV_FROM_REG(61) - MOV_FROM_REG(62) - MOV_FROM_REG(63) - MOV_FROM_REG(64) - MOV_FROM_REG(65) - MOV_FROM_REG(66) - MOV_FROM_REG(67) - MOV_FROM_REG(68) - MOV_FROM_REG(69) - MOV_FROM_REG(70) - MOV_FROM_REG(71) - MOV_FROM_REG(72) - MOV_FROM_REG(73) - MOV_FROM_REG(74) - MOV_FROM_REG(75) - MOV_FROM_REG(76) - MOV_FROM_REG(77) - MOV_FROM_REG(78) - MOV_FROM_REG(79) - MOV_FROM_REG(80) - MOV_FROM_REG(81) - MOV_FROM_REG(82) - MOV_FROM_REG(83) - MOV_FROM_REG(84) - MOV_FROM_REG(85) - MOV_FROM_REG(86) - MOV_FROM_REG(87) - MOV_FROM_REG(88) - MOV_FROM_REG(89) - MOV_FROM_REG(90) - MOV_FROM_REG(91) - MOV_FROM_REG(92) - MOV_FROM_REG(93) - MOV_FROM_REG(94) - MOV_FROM_REG(95) - MOV_FROM_REG(96) - MOV_FROM_REG(97) - MOV_FROM_REG(98) - MOV_FROM_REG(99) - MOV_FROM_REG(100) - MOV_FROM_REG(101) - MOV_FROM_REG(102) - MOV_FROM_REG(103) - MOV_FROM_REG(104) - MOV_FROM_REG(105) - MOV_FROM_REG(106) - MOV_FROM_REG(107) - MOV_FROM_REG(108) - MOV_FROM_REG(109) - MOV_FROM_REG(110) - MOV_FROM_REG(111) - MOV_FROM_REG(112) - MOV_FROM_REG(113) - MOV_FROM_REG(114) - MOV_FROM_REG(115) - MOV_FROM_REG(116) - MOV_FROM_REG(117) - MOV_FROM_REG(118) - MOV_FROM_REG(119) - MOV_FROM_REG(120) - MOV_FROM_REG(121) - MOV_FROM_REG(122) - MOV_FROM_REG(123) - MOV_FROM_REG(124) - MOV_FROM_REG(125) - MOV_FROM_REG(126) - MOV_FROM_REG(127) -END(asm_mov_from_reg) - - -/* must be in bank 0 - * parameter: - * r31: pr - * r24: b0 - */ -ENTRY(kvm_resume_to_guest_with_sync) - adds r19=VMM_VPD_BASE_OFFSET,r21 - mov r16 = r31 - mov r17 = r24 - ;; -{.mii - ld8 r25 =[r19] - nop 0x0 - mov r24 = ip - ;; -} -{.mmb - add r24 =0x20, r24 - nop 0x0 - br.sptk.many kvm_vps_sync_write -} - - mov r31 = r16 - mov r24 =r17 - ;; - br.sptk.many kvm_resume_to_guest -END(kvm_resume_to_guest_with_sync) - -ENTRY(kvm_resume_to_guest) - adds r16 = VMM_VCPU_SAVED_GP_OFFSET,r21 - ;; - ld8 r1 =[r16] - adds r20 = VMM_VCPU_VSA_BASE_OFFSET,r21 - ;; - mov r16=cr.ipsr - ;; - ld8 r20 = [r20] - adds r19=VMM_VPD_BASE_OFFSET,r21 - ;; - ld8 r25=[r19] - extr.u r17=r16,IA64_PSR_RI_BIT,2 - tbit.nz p6,p7=r16,IA64_PSR_RI_BIT+1 - ;; - (p6) mov r18=cr.iip - (p6) mov r17=r0 - ;; - (p6) add r18=0x10,r18 - (p7) add r17=1,r17 - ;; - (p6) mov cr.iip=r18 - dep r16=r17,r16,IA64_PSR_RI_BIT,2 - ;; - mov cr.ipsr=r16 - adds r19= VPD_VPSR_START_OFFSET,r25 - add r28=PAL_VPS_RESUME_NORMAL,r20 - add r29=PAL_VPS_RESUME_HANDLER,r20 - ;; - ld8 r19=[r19] - mov b0=r29 - mov r27=cr.isr - ;; - tbit.z p6,p7 = r19,IA64_PSR_IC_BIT // p7=vpsr.ic - shr r27=r27,IA64_ISR_IR_BIT - ;; - (p6) ld8 r26=[r25] - (p7) mov b0=r28 - ;; - (p6) dep r26=r27,r26,63,1 - mov pr=r31,-2 - br.sptk.many b0 // call pal service - ;; -END(kvm_resume_to_guest) - - -MOV_TO_BANK0_REG(16) -MOV_TO_BANK0_REG(17) -MOV_TO_BANK0_REG(18) -MOV_TO_BANK0_REG(19) -MOV_TO_BANK0_REG(20) -MOV_TO_BANK0_REG(21) -MOV_TO_BANK0_REG(22) -MOV_TO_BANK0_REG(23) -MOV_TO_BANK0_REG(24) -MOV_TO_BANK0_REG(25) -MOV_TO_BANK0_REG(26) -MOV_TO_BANK0_REG(27) -MOV_TO_BANK0_REG(28) -MOV_TO_BANK0_REG(29) -MOV_TO_BANK0_REG(30) -MOV_TO_BANK0_REG(31) - - -// mov to reg table -ENTRY(asm_mov_to_reg) - MOV_TO_REG0 - MOV_TO_REG(1) - MOV_TO_REG(2) - MOV_TO_REG(3) - MOV_TO_REG(4) - MOV_TO_REG(5) - MOV_TO_REG(6) - MOV_TO_REG(7) - MOV_TO_REG(8) - MOV_TO_REG(9) - MOV_TO_REG(10) - MOV_TO_REG(11) - MOV_TO_REG(12) - MOV_TO_REG(13) - MOV_TO_REG(14) - MOV_TO_REG(15) - JMP_TO_MOV_TO_BANK0_REG(16) - JMP_TO_MOV_TO_BANK0_REG(17) - JMP_TO_MOV_TO_BANK0_REG(18) - JMP_TO_MOV_TO_BANK0_REG(19) - JMP_TO_MOV_TO_BANK0_REG(20) - JMP_TO_MOV_TO_BANK0_REG(21) - JMP_TO_MOV_TO_BANK0_REG(22) - JMP_TO_MOV_TO_BANK0_REG(23) - JMP_TO_MOV_TO_BANK0_REG(24) - JMP_TO_MOV_TO_BANK0_REG(25) - JMP_TO_MOV_TO_BANK0_REG(26) - JMP_TO_MOV_TO_BANK0_REG(27) - JMP_TO_MOV_TO_BANK0_REG(28) - JMP_TO_MOV_TO_BANK0_REG(29) - JMP_TO_MOV_TO_BANK0_REG(30) - JMP_TO_MOV_TO_BANK0_REG(31) - MOV_TO_REG(32) - MOV_TO_REG(33) - MOV_TO_REG(34) - MOV_TO_REG(35) - MOV_TO_REG(36) - MOV_TO_REG(37) - MOV_TO_REG(38) - MOV_TO_REG(39) - MOV_TO_REG(40) - MOV_TO_REG(41) - MOV_TO_REG(42) - MOV_TO_REG(43) - MOV_TO_REG(44) - MOV_TO_REG(45) - MOV_TO_REG(46) - MOV_TO_REG(47) - MOV_TO_REG(48) - MOV_TO_REG(49) - MOV_TO_REG(50) - MOV_TO_REG(51) - MOV_TO_REG(52) - MOV_TO_REG(53) - MOV_TO_REG(54) - MOV_TO_REG(55) - MOV_TO_REG(56) - MOV_TO_REG(57) - MOV_TO_REG(58) - MOV_TO_REG(59) - MOV_TO_REG(60) - MOV_TO_REG(61) - MOV_TO_REG(62) - MOV_TO_REG(63) - MOV_TO_REG(64) - MOV_TO_REG(65) - MOV_TO_REG(66) - MOV_TO_REG(67) - MOV_TO_REG(68) - MOV_TO_REG(69) - MOV_TO_REG(70) - MOV_TO_REG(71) - MOV_TO_REG(72) - MOV_TO_REG(73) - MOV_TO_REG(74) - MOV_TO_REG(75) - MOV_TO_REG(76) - MOV_TO_REG(77) - MOV_TO_REG(78) - MOV_TO_REG(79) - MOV_TO_REG(80) - MOV_TO_REG(81) - MOV_TO_REG(82) - MOV_TO_REG(83) - MOV_TO_REG(84) - MOV_TO_REG(85) - MOV_TO_REG(86) - MOV_TO_REG(87) - MOV_TO_REG(88) - MOV_TO_REG(89) - MOV_TO_REG(90) - MOV_TO_REG(91) - MOV_TO_REG(92) - MOV_TO_REG(93) - MOV_TO_REG(94) - MOV_TO_REG(95) - MOV_TO_REG(96) - MOV_TO_REG(97) - MOV_TO_REG(98) - MOV_TO_REG(99) - MOV_TO_REG(100) - MOV_TO_REG(101) - MOV_TO_REG(102) - MOV_TO_REG(103) - MOV_TO_REG(104) - MOV_TO_REG(105) - MOV_TO_REG(106) - MOV_TO_REG(107) - MOV_TO_REG(108) - MOV_TO_REG(109) - MOV_TO_REG(110) - MOV_TO_REG(111) - MOV_TO_REG(112) - MOV_TO_REG(113) - MOV_TO_REG(114) - MOV_TO_REG(115) - MOV_TO_REG(116) - MOV_TO_REG(117) - MOV_TO_REG(118) - MOV_TO_REG(119) - MOV_TO_REG(120) - MOV_TO_REG(121) - MOV_TO_REG(122) - MOV_TO_REG(123) - MOV_TO_REG(124) - MOV_TO_REG(125) - MOV_TO_REG(126) - MOV_TO_REG(127) -END(asm_mov_to_reg) diff --git a/arch/ia64/kvm/process.c b/arch/ia64/kvm/process.c deleted file mode 100644 index b0398740b48..00000000000 --- a/arch/ia64/kvm/process.c +++ /dev/null @@ -1,1024 +0,0 @@ -/* - * process.c: handle interruption inject for guests. - * Copyright (c) 2005, Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple - * Place - Suite 330, Boston, MA 02111-1307 USA. - * - * Shaofan Li (Susue Li) <susie.li@intel.com> - * Xiaoyan Feng (Fleming Feng) <fleming.feng@intel.com> - * Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com) - * Xiantao Zhang (xiantao.zhang@intel.com) - */ -#include "vcpu.h" - -#include <asm/pal.h> -#include <asm/sal.h> -#include <asm/fpswa.h> -#include <asm/kregs.h> -#include <asm/tlb.h> - -fpswa_interface_t *vmm_fpswa_interface; - -#define IA64_VHPT_TRANS_VECTOR 0x0000 -#define IA64_INST_TLB_VECTOR 0x0400 -#define IA64_DATA_TLB_VECTOR 0x0800 -#define IA64_ALT_INST_TLB_VECTOR 0x0c00 -#define IA64_ALT_DATA_TLB_VECTOR 0x1000 -#define IA64_DATA_NESTED_TLB_VECTOR 0x1400 -#define IA64_INST_KEY_MISS_VECTOR 0x1800 -#define IA64_DATA_KEY_MISS_VECTOR 0x1c00 -#define IA64_DIRTY_BIT_VECTOR 0x2000 -#define IA64_INST_ACCESS_BIT_VECTOR 0x2400 -#define IA64_DATA_ACCESS_BIT_VECTOR 0x2800 -#define IA64_BREAK_VECTOR 0x2c00 -#define IA64_EXTINT_VECTOR 0x3000 -#define IA64_PAGE_NOT_PRESENT_VECTOR 0x5000 -#define IA64_KEY_PERMISSION_VECTOR 0x5100 -#define IA64_INST_ACCESS_RIGHTS_VECTOR 0x5200 -#define IA64_DATA_ACCESS_RIGHTS_VECTOR 0x5300 -#define IA64_GENEX_VECTOR 0x5400 -#define IA64_DISABLED_FPREG_VECTOR 0x5500 -#define IA64_NAT_CONSUMPTION_VECTOR 0x5600 -#define IA64_SPECULATION_VECTOR 0x5700 /* UNUSED */ -#define IA64_DEBUG_VECTOR 0x5900 -#define IA64_UNALIGNED_REF_VECTOR 0x5a00 -#define IA64_UNSUPPORTED_DATA_REF_VECTOR 0x5b00 -#define IA64_FP_FAULT_VECTOR 0x5c00 -#define IA64_FP_TRAP_VECTOR 0x5d00 -#define IA64_LOWERPRIV_TRANSFER_TRAP_VECTOR 0x5e00 -#define IA64_TAKEN_BRANCH_TRAP_VECTOR 0x5f00 -#define IA64_SINGLE_STEP_TRAP_VECTOR 0x6000 - -/* SDM vol2 5.5 - IVA based interruption handling */ -#define INITIAL_PSR_VALUE_AT_INTERRUPTION (IA64_PSR_UP | IA64_PSR_MFL |\ - IA64_PSR_MFH | IA64_PSR_PK | IA64_PSR_DT | \ - IA64_PSR_RT | IA64_PSR_MC|IA64_PSR_IT) - -#define DOMN_PAL_REQUEST 0x110000 -#define DOMN_SAL_REQUEST 0x110001 - -static u64 vec2off[68] = {0x0, 0x400, 0x800, 0xc00, 0x1000, 0x1400, 0x1800, - 0x1c00, 0x2000, 0x2400, 0x2800, 0x2c00, 0x3000, 0x3400, 0x3800, 0x3c00, - 0x4000, 0x4400, 0x4800, 0x4c00, 0x5000, 0x5100, 0x5200, 0x5300, 0x5400, - 0x5500, 0x5600, 0x5700, 0x5800, 0x5900, 0x5a00, 0x5b00, 0x5c00, 0x5d00, - 0x5e00, 0x5f00, 0x6000, 0x6100, 0x6200, 0x6300, 0x6400, 0x6500, 0x6600, - 0x6700, 0x6800, 0x6900, 0x6a00, 0x6b00, 0x6c00, 0x6d00, 0x6e00, 0x6f00, - 0x7000, 0x7100, 0x7200, 0x7300, 0x7400, 0x7500, 0x7600, 0x7700, 0x7800, - 0x7900, 0x7a00, 0x7b00, 0x7c00, 0x7d00, 0x7e00, 0x7f00 -}; - -static void collect_interruption(struct kvm_vcpu *vcpu) -{ - u64 ipsr; - u64 vdcr; - u64 vifs; - unsigned long vpsr; - struct kvm_pt_regs *regs = vcpu_regs(vcpu); - - vpsr = vcpu_get_psr(vcpu); - vcpu_bsw0(vcpu); - if (vpsr & IA64_PSR_IC) { - - /* Sync mpsr id/da/dd/ss/ed bits to vipsr - * since after guest do rfi, we still want these bits on in - * mpsr - */ - - ipsr = regs->cr_ipsr; - vpsr = vpsr | (ipsr & (IA64_PSR_ID | IA64_PSR_DA - | IA64_PSR_DD | IA64_PSR_SS - | IA64_PSR_ED)); - vcpu_set_ipsr(vcpu, vpsr); - - /* Currently, for trap, we do not advance IIP to next - * instruction. That's because we assume caller already - * set up IIP correctly - */ - - vcpu_set_iip(vcpu , regs->cr_iip); - - /* set vifs.v to zero */ - vifs = VCPU(vcpu, ifs); - vifs &= ~IA64_IFS_V; - vcpu_set_ifs(vcpu, vifs); - - vcpu_set_iipa(vcpu, VMX(vcpu, cr_iipa)); - } - - vdcr = VCPU(vcpu, dcr); - - /* Set guest psr - * up/mfl/mfh/pk/dt/rt/mc/it keeps unchanged - * be: set to the value of dcr.be - * pp: set to the value of dcr.pp - */ - vpsr &= INITIAL_PSR_VALUE_AT_INTERRUPTION; - vpsr |= (vdcr & IA64_DCR_BE); - - /* VDCR pp bit position is different from VPSR pp bit */ - if (vdcr & IA64_DCR_PP) { - vpsr |= IA64_PSR_PP; - } else { - vpsr &= ~IA64_PSR_PP; - } - - vcpu_set_psr(vcpu, vpsr); - -} - -void inject_guest_interruption(struct kvm_vcpu *vcpu, u64 vec) -{ - u64 viva; - struct kvm_pt_regs *regs; - union ia64_isr pt_isr; - - regs = vcpu_regs(vcpu); - - /* clear cr.isr.ir (incomplete register frame)*/ - pt_isr.val = VMX(vcpu, cr_isr); - pt_isr.ir = 0; - VMX(vcpu, cr_isr) = pt_isr.val; - - collect_interruption(vcpu); - - viva = vcpu_get_iva(vcpu); - regs->cr_iip = viva + vec; -} - -static u64 vcpu_get_itir_on_fault(struct kvm_vcpu *vcpu, u64 ifa) -{ - union ia64_rr rr, rr1; - - rr.val = vcpu_get_rr(vcpu, ifa); - rr1.val = 0; - rr1.ps = rr.ps; - rr1.rid = rr.rid; - return (rr1.val); -} - -/* - * Set vIFA & vITIR & vIHA, when vPSR.ic =1 - * Parameter: - * set_ifa: if true, set vIFA - * set_itir: if true, set vITIR - * set_iha: if true, set vIHA - */ -void set_ifa_itir_iha(struct kvm_vcpu *vcpu, u64 vadr, - int set_ifa, int set_itir, int set_iha) -{ - long vpsr; - u64 value; - - vpsr = VCPU(vcpu, vpsr); - /* Vol2, Table 8-1 */ - if (vpsr & IA64_PSR_IC) { - if (set_ifa) - vcpu_set_ifa(vcpu, vadr); - if (set_itir) { - value = vcpu_get_itir_on_fault(vcpu, vadr); - vcpu_set_itir(vcpu, value); - } - - if (set_iha) { - value = vcpu_thash(vcpu, vadr); - vcpu_set_iha(vcpu, value); - } - } -} - -/* - * Data TLB Fault - * @ Data TLB vector - * Refer to SDM Vol2 Table 5-6 & 8-1 - */ -void dtlb_fault(struct kvm_vcpu *vcpu, u64 vadr) -{ - /* If vPSR.ic, IFA, ITIR, IHA */ - set_ifa_itir_iha(vcpu, vadr, 1, 1, 1); - inject_guest_interruption(vcpu, IA64_DATA_TLB_VECTOR); -} - -/* - * Instruction TLB Fault - * @ Instruction TLB vector - * Refer to SDM Vol2 Table 5-6 & 8-1 - */ -void itlb_fault(struct kvm_vcpu *vcpu, u64 vadr) -{ - /* If vPSR.ic, IFA, ITIR, IHA */ - set_ifa_itir_iha(vcpu, vadr, 1, 1, 1); - inject_guest_interruption(vcpu, IA64_INST_TLB_VECTOR); -} - -/* - * Data Nested TLB Fault - * @ Data Nested TLB Vector - * Refer to SDM Vol2 Table 5-6 & 8-1 - */ -void nested_dtlb(struct kvm_vcpu *vcpu) -{ - inject_guest_interruption(vcpu, IA64_DATA_NESTED_TLB_VECTOR); -} - -/* - * Alternate Data TLB Fault - * @ Alternate Data TLB vector - * Refer to SDM Vol2 Table 5-6 & 8-1 - */ -void alt_dtlb(struct kvm_vcpu *vcpu, u64 vadr) -{ - set_ifa_itir_iha(vcpu, vadr, 1, 1, 0); - inject_guest_interruption(vcpu, IA64_ALT_DATA_TLB_VECTOR); -} - -/* - * Data TLB Fault - * @ Data TLB vector - * Refer to SDM Vol2 Table 5-6 & 8-1 - */ -void alt_itlb(struct kvm_vcpu *vcpu, u64 vadr) -{ - set_ifa_itir_iha(vcpu, vadr, 1, 1, 0); - inject_guest_interruption(vcpu, IA64_ALT_INST_TLB_VECTOR); -} - -/* Deal with: - * VHPT Translation Vector - */ -static void _vhpt_fault(struct kvm_vcpu *vcpu, u64 vadr) -{ - /* If vPSR.ic, IFA, ITIR, IHA*/ - set_ifa_itir_iha(vcpu, vadr, 1, 1, 1); - inject_guest_interruption(vcpu, IA64_VHPT_TRANS_VECTOR); -} - -/* - * VHPT Instruction Fault - * @ VHPT Translation vector - * Refer to SDM Vol2 Table 5-6 & 8-1 - */ -void ivhpt_fault(struct kvm_vcpu *vcpu, u64 vadr) -{ - _vhpt_fault(vcpu, vadr); -} - -/* - * VHPT Data Fault - * @ VHPT Translation vector - * Refer to SDM Vol2 Table 5-6 & 8-1 - */ -void dvhpt_fault(struct kvm_vcpu *vcpu, u64 vadr) -{ - _vhpt_fault(vcpu, vadr); -} - -/* - * Deal with: - * General Exception vector - */ -void _general_exception(struct kvm_vcpu *vcpu) -{ - inject_guest_interruption(vcpu, IA64_GENEX_VECTOR); -} - -/* - * Illegal Operation Fault - * @ General Exception Vector - * Refer to SDM Vol2 Table 5-6 & 8-1 - */ -void illegal_op(struct kvm_vcpu *vcpu) -{ - _general_exception(vcpu); -} - -/* - * Illegal Dependency Fault - * @ General Exception Vector - * Refer to SDM Vol2 Table 5-6 & 8-1 - */ -void illegal_dep(struct kvm_vcpu *vcpu) -{ - _general_exception(vcpu); -} - -/* - * Reserved Register/Field Fault - * @ General Exception Vector - * Refer to SDM Vol2 Table 5-6 & 8-1 - */ -void rsv_reg_field(struct kvm_vcpu *vcpu) -{ - _general_exception(vcpu); -} -/* - * Privileged Operation Fault - * @ General Exception Vector - * Refer to SDM Vol2 Table 5-6 & 8-1 - */ - -void privilege_op(struct kvm_vcpu *vcpu) -{ - _general_exception(vcpu); -} - -/* - * Unimplement Data Address Fault - * @ General Exception Vector - * Refer to SDM Vol2 Table 5-6 & 8-1 - */ -void unimpl_daddr(struct kvm_vcpu *vcpu) -{ - _general_exception(vcpu); -} - -/* - * Privileged Register Fault - * @ General Exception Vector - * Refer to SDM Vol2 Table 5-6 & 8-1 - */ -void privilege_reg(struct kvm_vcpu *vcpu) -{ - _general_exception(vcpu); -} - -/* Deal with - * Nat consumption vector - * Parameter: - * vaddr: Optional, if t == REGISTER - */ -static void _nat_consumption_fault(struct kvm_vcpu *vcpu, u64 vadr, - enum tlb_miss_type t) -{ - /* If vPSR.ic && t == DATA/INST, IFA */ - if (t == DATA || t == INSTRUCTION) { - /* IFA */ - set_ifa_itir_iha(vcpu, vadr, 1, 0, 0); - } - - inject_guest_interruption(vcpu, IA64_NAT_CONSUMPTION_VECTOR); -} - -/* - * Instruction Nat Page Consumption Fault - * @ Nat Consumption Vector - * Refer to SDM Vol2 Table 5-6 & 8-1 - */ -void inat_page_consumption(struct kvm_vcpu *vcpu, u64 vadr) -{ - _nat_consumption_fault(vcpu, vadr, INSTRUCTION); -} - -/* - * Register Nat Consumption Fault - * @ Nat Consumption Vector - * Refer to SDM Vol2 Table 5-6 & 8-1 - */ -void rnat_consumption(struct kvm_vcpu *vcpu) -{ - _nat_consumption_fault(vcpu, 0, REGISTER); -} - -/* - * Data Nat Page Consumption Fault - * @ Nat Consumption Vector - * Refer to SDM Vol2 Table 5-6 & 8-1 - */ -void dnat_page_consumption(struct kvm_vcpu *vcpu, u64 vadr) -{ - _nat_consumption_fault(vcpu, vadr, DATA); -} - -/* Deal with - * Page not present vector - */ -static void __page_not_present(struct kvm_vcpu *vcpu, u64 vadr) -{ - /* If vPSR.ic, IFA, ITIR */ - set_ifa_itir_iha(vcpu, vadr, 1, 1, 0); - inject_guest_interruption(vcpu, IA64_PAGE_NOT_PRESENT_VECTOR); -} - -void data_page_not_present(struct kvm_vcpu *vcpu, u64 vadr) -{ - __page_not_present(vcpu, vadr); -} - -void inst_page_not_present(struct kvm_vcpu *vcpu, u64 vadr) -{ - __page_not_present(vcpu, vadr); -} - -/* Deal with - * Data access rights vector - */ -void data_access_rights(struct kvm_vcpu *vcpu, u64 vadr) -{ - /* If vPSR.ic, IFA, ITIR */ - set_ifa_itir_iha(vcpu, vadr, 1, 1, 0); - inject_guest_interruption(vcpu, IA64_DATA_ACCESS_RIGHTS_VECTOR); -} - -fpswa_ret_t vmm_fp_emulate(int fp_fault, void *bundle, unsigned long *ipsr, - unsigned long *fpsr, unsigned long *isr, unsigned long *pr, - unsigned long *ifs, struct kvm_pt_regs *regs) -{ - fp_state_t fp_state; - fpswa_ret_t ret; - struct kvm_vcpu *vcpu = current_vcpu; - - uint64_t old_rr7 = ia64_get_rr(7UL<<61); - - if (!vmm_fpswa_interface) - return (fpswa_ret_t) {-1, 0, 0, 0}; - - memset(&fp_state, 0, sizeof(fp_state_t)); - - /* - * compute fp_state. only FP registers f6 - f11 are used by the - * vmm, so set those bits in the mask and set the low volatile - * pointer to point to these registers. - */ - fp_state.bitmask_low64 = 0xfc0; /* bit6..bit11 */ - - fp_state.fp_state_low_volatile = (fp_state_low_volatile_t *) ®s->f6; - - /* - * unsigned long (*EFI_FPSWA) ( - * unsigned long trap_type, - * void *Bundle, - * unsigned long *pipsr, - * unsigned long *pfsr, - * unsigned long *pisr, - * unsigned long *ppreds, - * unsigned long *pifs, - * void *fp_state); - */ - /*Call host fpswa interface directly to virtualize - *guest fpswa request! - */ - ia64_set_rr(7UL << 61, vcpu->arch.host.rr[7]); - ia64_srlz_d(); - - ret = (*vmm_fpswa_interface->fpswa) (fp_fault, bundle, - ipsr, fpsr, isr, pr, ifs, &fp_state); - ia64_set_rr(7UL << 61, old_rr7); - ia64_srlz_d(); - return ret; -} - -/* - * Handle floating-point assist faults and traps for domain. - */ -unsigned long vmm_handle_fpu_swa(int fp_fault, struct kvm_pt_regs *regs, - unsigned long isr) -{ - struct kvm_vcpu *v = current_vcpu; - IA64_BUNDLE bundle; - unsigned long fault_ip; - fpswa_ret_t ret; - - fault_ip = regs->cr_iip; - /* - * When the FP trap occurs, the trapping instruction is completed. - * If ipsr.ri == 0, there is the trapping instruction in previous - * bundle. - */ - if (!fp_fault && (ia64_psr(regs)->ri == 0)) - fault_ip -= 16; - - if (fetch_code(v, fault_ip, &bundle)) - return -EAGAIN; - - if (!bundle.i64[0] && !bundle.i64[1]) - return -EACCES; - - ret = vmm_fp_emulate(fp_fault, &bundle, ®s->cr_ipsr, ®s->ar_fpsr, - &isr, ®s->pr, ®s->cr_ifs, regs); - return ret.status; -} - -void reflect_interruption(u64 ifa, u64 isr, u64 iim, - u64 vec, struct kvm_pt_regs *regs) -{ - u64 vector; - int status ; - struct kvm_vcpu *vcpu = current_vcpu; - u64 vpsr = VCPU(vcpu, vpsr); - - vector = vec2off[vec]; - - if (!(vpsr & IA64_PSR_IC) && (vector != IA64_DATA_NESTED_TLB_VECTOR)) { - panic_vm(vcpu, "Interruption with vector :0x%lx occurs " - "with psr.ic = 0\n", vector); - return; - } - - switch (vec) { - case 32: /*IA64_FP_FAULT_VECTOR*/ - status = vmm_handle_fpu_swa(1, regs, isr); - if (!status) { - vcpu_increment_iip(vcpu); - return; - } else if (-EAGAIN == status) - return; - break; - case 33: /*IA64_FP_TRAP_VECTOR*/ - status = vmm_handle_fpu_swa(0, regs, isr); - if (!status) - return ; - break; - } - - VCPU(vcpu, isr) = isr; - VCPU(vcpu, iipa) = regs->cr_iip; - if (vector == IA64_BREAK_VECTOR || vector == IA64_SPECULATION_VECTOR) - VCPU(vcpu, iim) = iim; - else - set_ifa_itir_iha(vcpu, ifa, 1, 1, 1); - - inject_guest_interruption(vcpu, vector); -} - -static unsigned long kvm_trans_pal_call_args(struct kvm_vcpu *vcpu, - unsigned long arg) -{ - struct thash_data *data; - unsigned long gpa, poff; - - if (!is_physical_mode(vcpu)) { - /* Depends on caller to provide the DTR or DTC mapping.*/ - data = vtlb_lookup(vcpu, arg, D_TLB); - if (data) - gpa = data->page_flags & _PAGE_PPN_MASK; - else { - data = vhpt_lookup(arg); - if (!data) - return 0; - gpa = data->gpaddr & _PAGE_PPN_MASK; - } - - poff = arg & (PSIZE(data->ps) - 1); - arg = PAGEALIGN(gpa, data->ps) | poff; - } - arg = kvm_gpa_to_mpa(arg << 1 >> 1); - - return (unsigned long)__va(arg); -} - -static void set_pal_call_data(struct kvm_vcpu *vcpu) -{ - struct exit_ctl_data *p = &vcpu->arch.exit_data; - unsigned long gr28 = vcpu_get_gr(vcpu, 28); - unsigned long gr29 = vcpu_get_gr(vcpu, 29); - unsigned long gr30 = vcpu_get_gr(vcpu, 30); - - /*FIXME:For static and stacked convention, firmware - * has put the parameters in gr28-gr31 before - * break to vmm !!*/ - - switch (gr28) { - case PAL_PERF_MON_INFO: - case PAL_HALT_INFO: - p->u.pal_data.gr29 = kvm_trans_pal_call_args(vcpu, gr29); - p->u.pal_data.gr30 = vcpu_get_gr(vcpu, 30); - break; - case PAL_BRAND_INFO: - p->u.pal_data.gr29 = gr29; - p->u.pal_data.gr30 = kvm_trans_pal_call_args(vcpu, gr30); - break; - default: - p->u.pal_data.gr29 = gr29; - p->u.pal_data.gr30 = vcpu_get_gr(vcpu, 30); - } - p->u.pal_data.gr28 = gr28; - p->u.pal_data.gr31 = vcpu_get_gr(vcpu, 31); - - p->exit_reason = EXIT_REASON_PAL_CALL; -} - -static void get_pal_call_result(struct kvm_vcpu *vcpu) -{ - struct exit_ctl_data *p = &vcpu->arch.exit_data; - - if (p->exit_reason == EXIT_REASON_PAL_CALL) { - vcpu_set_gr(vcpu, 8, p->u.pal_data.ret.status, 0); - vcpu_set_gr(vcpu, 9, p->u.pal_data.ret.v0, 0); - vcpu_set_gr(vcpu, 10, p->u.pal_data.ret.v1, 0); - vcpu_set_gr(vcpu, 11, p->u.pal_data.ret.v2, 0); - } else - panic_vm(vcpu, "Mis-set for exit reason!\n"); -} - -static void set_sal_call_data(struct kvm_vcpu *vcpu) -{ - struct exit_ctl_data *p = &vcpu->arch.exit_data; - - p->u.sal_data.in0 = vcpu_get_gr(vcpu, 32); - p->u.sal_data.in1 = vcpu_get_gr(vcpu, 33); - p->u.sal_data.in2 = vcpu_get_gr(vcpu, 34); - p->u.sal_data.in3 = vcpu_get_gr(vcpu, 35); - p->u.sal_data.in4 = vcpu_get_gr(vcpu, 36); - p->u.sal_data.in5 = vcpu_get_gr(vcpu, 37); - p->u.sal_data.in6 = vcpu_get_gr(vcpu, 38); - p->u.sal_data.in7 = vcpu_get_gr(vcpu, 39); - p->exit_reason = EXIT_REASON_SAL_CALL; -} - -static void get_sal_call_result(struct kvm_vcpu *vcpu) -{ - struct exit_ctl_data *p = &vcpu->arch.exit_data; - - if (p->exit_reason == EXIT_REASON_SAL_CALL) { - vcpu_set_gr(vcpu, 8, p->u.sal_data.ret.r8, 0); - vcpu_set_gr(vcpu, 9, p->u.sal_data.ret.r9, 0); - vcpu_set_gr(vcpu, 10, p->u.sal_data.ret.r10, 0); - vcpu_set_gr(vcpu, 11, p->u.sal_data.ret.r11, 0); - } else - panic_vm(vcpu, "Mis-set for exit reason!\n"); -} - -void kvm_ia64_handle_break(unsigned long ifa, struct kvm_pt_regs *regs, - unsigned long isr, unsigned long iim) -{ - struct kvm_vcpu *v = current_vcpu; - long psr; - - if (ia64_psr(regs)->cpl == 0) { - /* Allow hypercalls only when cpl = 0. */ - if (iim == DOMN_PAL_REQUEST) { - local_irq_save(psr); - set_pal_call_data(v); - vmm_transition(v); - get_pal_call_result(v); - vcpu_increment_iip(v); - local_irq_restore(psr); - return; - } else if (iim == DOMN_SAL_REQUEST) { - local_irq_save(psr); - set_sal_call_data(v); - vmm_transition(v); - get_sal_call_result(v); - vcpu_increment_iip(v); - local_irq_restore(psr); - return; - } - } - reflect_interruption(ifa, isr, iim, 11, regs); -} - -void check_pending_irq(struct kvm_vcpu *vcpu) -{ - int mask, h_pending, h_inservice; - u64 isr; - unsigned long vpsr; - struct kvm_pt_regs *regs = vcpu_regs(vcpu); - - h_pending = highest_pending_irq(vcpu); - if (h_pending == NULL_VECTOR) { - update_vhpi(vcpu, NULL_VECTOR); - return; - } - h_inservice = highest_inservice_irq(vcpu); - - vpsr = VCPU(vcpu, vpsr); - mask = irq_masked(vcpu, h_pending, h_inservice); - if ((vpsr & IA64_PSR_I) && IRQ_NO_MASKED == mask) { - isr = vpsr & IA64_PSR_RI; - update_vhpi(vcpu, h_pending); - reflect_interruption(0, isr, 0, 12, regs); /* EXT IRQ */ - } else if (mask == IRQ_MASKED_BY_INSVC) { - if (VCPU(vcpu, vhpi)) - update_vhpi(vcpu, NULL_VECTOR); - } else { - /* masked by vpsr.i or vtpr.*/ - update_vhpi(vcpu, h_pending); - } -} - -static void generate_exirq(struct kvm_vcpu *vcpu) -{ - unsigned vpsr; - uint64_t isr; - - struct kvm_pt_regs *regs = vcpu_regs(vcpu); - - vpsr = VCPU(vcpu, vpsr); - isr = vpsr & IA64_PSR_RI; - if (!(vpsr & IA64_PSR_IC)) - panic_vm(vcpu, "Trying to inject one IRQ with psr.ic=0\n"); - reflect_interruption(0, isr, 0, 12, regs); /* EXT IRQ */ -} - -void vhpi_detection(struct kvm_vcpu *vcpu) -{ - uint64_t threshold, vhpi; - union ia64_tpr vtpr; - struct ia64_psr vpsr; - - vpsr = *(struct ia64_psr *)&VCPU(vcpu, vpsr); - vtpr.val = VCPU(vcpu, tpr); - - threshold = ((!vpsr.i) << 5) | (vtpr.mmi << 4) | vtpr.mic; - vhpi = VCPU(vcpu, vhpi); - if (vhpi > threshold) { - /* interrupt actived*/ - generate_exirq(vcpu); - } -} - -void leave_hypervisor_tail(void) -{ - struct kvm_vcpu *v = current_vcpu; - - if (VMX(v, timer_check)) { - VMX(v, timer_check) = 0; - if (VMX(v, itc_check)) { - if (vcpu_get_itc(v) > VCPU(v, itm)) { - if (!(VCPU(v, itv) & (1 << 16))) { - vcpu_pend_interrupt(v, VCPU(v, itv) - & 0xff); - VMX(v, itc_check) = 0; - } else { - v->arch.timer_pending = 1; - } - VMX(v, last_itc) = VCPU(v, itm) + 1; - } - } - } - - rmb(); - if (v->arch.irq_new_pending) { - v->arch.irq_new_pending = 0; - VMX(v, irq_check) = 0; - check_pending_irq(v); - return; - } - if (VMX(v, irq_check)) { - VMX(v, irq_check) = 0; - vhpi_detection(v); - } -} - -static inline void handle_lds(struct kvm_pt_regs *regs) -{ - regs->cr_ipsr |= IA64_PSR_ED; -} - -void physical_tlb_miss(struct kvm_vcpu *vcpu, unsigned long vadr, int type) -{ - unsigned long pte; - union ia64_rr rr; - - rr.val = ia64_get_rr(vadr); - pte = vadr & _PAGE_PPN_MASK; - pte = pte | PHY_PAGE_WB; - thash_vhpt_insert(vcpu, pte, (u64)(rr.ps << 2), vadr, type); - return; -} - -void kvm_page_fault(u64 vadr , u64 vec, struct kvm_pt_regs *regs) -{ - unsigned long vpsr; - int type; - - u64 vhpt_adr, gppa, pteval, rr, itir; - union ia64_isr misr; - union ia64_pta vpta; - struct thash_data *data; - struct kvm_vcpu *v = current_vcpu; - - vpsr = VCPU(v, vpsr); - misr.val = VMX(v, cr_isr); - - type = vec; - - if (is_physical_mode(v) && (!(vadr << 1 >> 62))) { - if (vec == 2) { - if (__gpfn_is_io((vadr << 1) >> (PAGE_SHIFT + 1))) { - emulate_io_inst(v, ((vadr << 1) >> 1), 4); - return; - } - } - physical_tlb_miss(v, vadr, type); - return; - } - data = vtlb_lookup(v, vadr, type); - if (data != 0) { - if (type == D_TLB) { - gppa = (vadr & ((1UL << data->ps) - 1)) - + (data->ppn >> (data->ps - 12) << data->ps); - if (__gpfn_is_io(gppa >> PAGE_SHIFT)) { - if (data->pl >= ((regs->cr_ipsr >> - IA64_PSR_CPL0_BIT) & 3)) - emulate_io_inst(v, gppa, data->ma); - else { - vcpu_set_isr(v, misr.val); - data_access_rights(v, vadr); - } - return ; - } - } - thash_vhpt_insert(v, data->page_flags, data->itir, vadr, type); - - } else if (type == D_TLB) { - if (misr.sp) { - handle_lds(regs); - return; - } - - rr = vcpu_get_rr(v, vadr); - itir = rr & (RR_RID_MASK | RR_PS_MASK); - - if (!vhpt_enabled(v, vadr, misr.rs ? RSE_REF : DATA_REF)) { - if (vpsr & IA64_PSR_IC) { - vcpu_set_isr(v, misr.val); - alt_dtlb(v, vadr); - } else { - nested_dtlb(v); - } - return ; - } - - vpta.val = vcpu_get_pta(v); - /* avoid recursively walking (short format) VHPT */ - - vhpt_adr = vcpu_thash(v, vadr); - if (!guest_vhpt_lookup(vhpt_adr, &pteval)) { - /* VHPT successfully read. */ - if (!(pteval & _PAGE_P)) { - if (vpsr & IA64_PSR_IC) { - vcpu_set_isr(v, misr.val); - dtlb_fault(v, vadr); - } else { - nested_dtlb(v); - } - } else if ((pteval & _PAGE_MA_MASK) != _PAGE_MA_ST) { - thash_purge_and_insert(v, pteval, itir, - vadr, D_TLB); - } else if (vpsr & IA64_PSR_IC) { - vcpu_set_isr(v, misr.val); - dtlb_fault(v, vadr); - } else { - nested_dtlb(v); - } - } else { - /* Can't read VHPT. */ - if (vpsr & IA64_PSR_IC) { - vcpu_set_isr(v, misr.val); - dvhpt_fault(v, vadr); - } else { - nested_dtlb(v); - } - } - } else if (type == I_TLB) { - if (!(vpsr & IA64_PSR_IC)) - misr.ni = 1; - if (!vhpt_enabled(v, vadr, INST_REF)) { - vcpu_set_isr(v, misr.val); - alt_itlb(v, vadr); - return; - } - - vpta.val = vcpu_get_pta(v); - - vhpt_adr = vcpu_thash(v, vadr); - if (!guest_vhpt_lookup(vhpt_adr, &pteval)) { - /* VHPT successfully read. */ - if (pteval & _PAGE_P) { - if ((pteval & _PAGE_MA_MASK) == _PAGE_MA_ST) { - vcpu_set_isr(v, misr.val); - itlb_fault(v, vadr); - return ; - } - rr = vcpu_get_rr(v, vadr); - itir = rr & (RR_RID_MASK | RR_PS_MASK); - thash_purge_and_insert(v, pteval, itir, - vadr, I_TLB); - } else { - vcpu_set_isr(v, misr.val); - inst_page_not_present(v, vadr); - } - } else { - vcpu_set_isr(v, misr.val); - ivhpt_fault(v, vadr); - } - } -} - -void kvm_vexirq(struct kvm_vcpu *vcpu) -{ - u64 vpsr, isr; - struct kvm_pt_regs *regs; - - regs = vcpu_regs(vcpu); - vpsr = VCPU(vcpu, vpsr); - isr = vpsr & IA64_PSR_RI; - reflect_interruption(0, isr, 0, 12, regs); /*EXT IRQ*/ -} - -void kvm_ia64_handle_irq(struct kvm_vcpu *v) -{ - struct exit_ctl_data *p = &v->arch.exit_data; - long psr; - - local_irq_save(psr); - p->exit_reason = EXIT_REASON_EXTERNAL_INTERRUPT; - vmm_transition(v); - local_irq_restore(psr); - - VMX(v, timer_check) = 1; - -} - -static void ptc_ga_remote_func(struct kvm_vcpu *v, int pos) -{ - u64 oldrid, moldrid, oldpsbits, vaddr; - struct kvm_ptc_g *p = &v->arch.ptc_g_data[pos]; - vaddr = p->vaddr; - - oldrid = VMX(v, vrr[0]); - VMX(v, vrr[0]) = p->rr; - oldpsbits = VMX(v, psbits[0]); - VMX(v, psbits[0]) = VMX(v, psbits[REGION_NUMBER(vaddr)]); - moldrid = ia64_get_rr(0x0); - ia64_set_rr(0x0, vrrtomrr(p->rr)); - ia64_srlz_d(); - - vaddr = PAGEALIGN(vaddr, p->ps); - thash_purge_entries_remote(v, vaddr, p->ps); - - VMX(v, vrr[0]) = oldrid; - VMX(v, psbits[0]) = oldpsbits; - ia64_set_rr(0x0, moldrid); - ia64_dv_serialize_data(); -} - -static void vcpu_do_resume(struct kvm_vcpu *vcpu) -{ - /*Re-init VHPT and VTLB once from resume*/ - vcpu->arch.vhpt.num = VHPT_NUM_ENTRIES; - thash_init(&vcpu->arch.vhpt, VHPT_SHIFT); - vcpu->arch.vtlb.num = VTLB_NUM_ENTRIES; - thash_init(&vcpu->arch.vtlb, VTLB_SHIFT); - - ia64_set_pta(vcpu->arch.vhpt.pta.val); -} - -static void vmm_sanity_check(struct kvm_vcpu *vcpu) -{ - struct exit_ctl_data *p = &vcpu->arch.exit_data; - - if (!vmm_sanity && p->exit_reason != EXIT_REASON_DEBUG) { - panic_vm(vcpu, "Failed to do vmm sanity check," - "it maybe caused by crashed vmm!!\n\n"); - } -} - -static void kvm_do_resume_op(struct kvm_vcpu *vcpu) -{ - vmm_sanity_check(vcpu); /*Guarantee vcpu running on healthy vmm!*/ - - if (test_and_clear_bit(KVM_REQ_RESUME, &vcpu->requests)) { - vcpu_do_resume(vcpu); - return; - } - - if (unlikely(test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests))) { - thash_purge_all(vcpu); - return; - } - - if (test_and_clear_bit(KVM_REQ_PTC_G, &vcpu->requests)) { - while (vcpu->arch.ptc_g_count > 0) - ptc_ga_remote_func(vcpu, --vcpu->arch.ptc_g_count); - } -} - -void vmm_transition(struct kvm_vcpu *vcpu) -{ - ia64_call_vsa(PAL_VPS_SAVE, (unsigned long)vcpu->arch.vpd, - 1, 0, 0, 0, 0, 0); - vmm_trampoline(&vcpu->arch.guest, &vcpu->arch.host); - ia64_call_vsa(PAL_VPS_RESTORE, (unsigned long)vcpu->arch.vpd, - 1, 0, 0, 0, 0, 0); - kvm_do_resume_op(vcpu); -} - -void vmm_panic_handler(u64 vec) -{ - struct kvm_vcpu *vcpu = current_vcpu; - vmm_sanity = 0; - panic_vm(vcpu, "Unexpected interruption occurs in VMM, vector:0x%lx\n", - vec2off[vec]); -} diff --git a/arch/ia64/kvm/trampoline.S b/arch/ia64/kvm/trampoline.S deleted file mode 100644 index 30897d44d61..00000000000 --- a/arch/ia64/kvm/trampoline.S +++ /dev/null @@ -1,1038 +0,0 @@ -/* Save all processor states - * - * Copyright (c) 2007 Fleming Feng <fleming.feng@intel.com> - * Copyright (c) 2007 Anthony Xu <anthony.xu@intel.com> - */ - -#include <asm/asmmacro.h> -#include "asm-offsets.h" - - -#define CTX(name) VMM_CTX_##name##_OFFSET - - /* - * r32: context_t base address - */ -#define SAVE_BRANCH_REGS \ - add r2 = CTX(B0),r32; \ - add r3 = CTX(B1),r32; \ - mov r16 = b0; \ - mov r17 = b1; \ - ;; \ - st8 [r2]=r16,16; \ - st8 [r3]=r17,16; \ - ;; \ - mov r16 = b2; \ - mov r17 = b3; \ - ;; \ - st8 [r2]=r16,16; \ - st8 [r3]=r17,16; \ - ;; \ - mov r16 = b4; \ - mov r17 = b5; \ - ;; \ - st8 [r2]=r16; \ - st8 [r3]=r17; \ - ;; - - /* - * r33: context_t base address - */ -#define RESTORE_BRANCH_REGS \ - add r2 = CTX(B0),r33; \ - add r3 = CTX(B1),r33; \ - ;; \ - ld8 r16=[r2],16; \ - ld8 r17=[r3],16; \ - ;; \ - mov b0 = r16; \ - mov b1 = r17; \ - ;; \ - ld8 r16=[r2],16; \ - ld8 r17=[r3],16; \ - ;; \ - mov b2 = r16; \ - mov b3 = r17; \ - ;; \ - ld8 r16=[r2]; \ - ld8 r17=[r3]; \ - ;; \ - mov b4=r16; \ - mov b5=r17; \ - ;; - - - /* - * r32: context_t base address - * bsw == 1 - * Save all bank1 general registers, r4 ~ r7 - */ -#define SAVE_GENERAL_REGS \ - add r2=CTX(R4),r32; \ - add r3=CTX(R5),r32; \ - ;; \ -.mem.offset 0,0; \ - st8.spill [r2]=r4,16; \ -.mem.offset 8,0; \ - st8.spill [r3]=r5,16; \ - ;; \ -.mem.offset 0,0; \ - st8.spill [r2]=r6,48; \ -.mem.offset 8,0; \ - st8.spill [r3]=r7,48; \ - ;; \ -.mem.offset 0,0; \ - st8.spill [r2]=r12; \ -.mem.offset 8,0; \ - st8.spill [r3]=r13; \ - ;; - - /* - * r33: context_t base address - * bsw == 1 - */ -#define RESTORE_GENERAL_REGS \ - add r2=CTX(R4),r33; \ - add r3=CTX(R5),r33; \ - ;; \ - ld8.fill r4=[r2],16; \ - ld8.fill r5=[r3],16; \ - ;; \ - ld8.fill r6=[r2],48; \ - ld8.fill r7=[r3],48; \ - ;; \ - ld8.fill r12=[r2]; \ - ld8.fill r13 =[r3]; \ - ;; - - - - - /* - * r32: context_t base address - */ -#define SAVE_KERNEL_REGS \ - add r2 = CTX(KR0),r32; \ - add r3 = CTX(KR1),r32; \ - mov r16 = ar.k0; \ - mov r17 = ar.k1; \ - ;; \ - st8 [r2] = r16,16; \ - st8 [r3] = r17,16; \ - ;; \ - mov r16 = ar.k2; \ - mov r17 = ar.k3; \ - ;; \ - st8 [r2] = r16,16; \ - st8 [r3] = r17,16; \ - ;; \ - mov r16 = ar.k4; \ - mov r17 = ar.k5; \ - ;; \ - st8 [r2] = r16,16; \ - st8 [r3] = r17,16; \ - ;; \ - mov r16 = ar.k6; \ - mov r17 = ar.k7; \ - ;; \ - st8 [r2] = r16; \ - st8 [r3] = r17; \ - ;; - - - - /* - * r33: context_t base address - */ -#define RESTORE_KERNEL_REGS \ - add r2 = CTX(KR0),r33; \ - add r3 = CTX(KR1),r33; \ - ;; \ - ld8 r16=[r2],16; \ - ld8 r17=[r3],16; \ - ;; \ - mov ar.k0=r16; \ - mov ar.k1=r17; \ - ;; \ - ld8 r16=[r2],16; \ - ld8 r17=[r3],16; \ - ;; \ - mov ar.k2=r16; \ - mov ar.k3=r17; \ - ;; \ - ld8 r16=[r2],16; \ - ld8 r17=[r3],16; \ - ;; \ - mov ar.k4=r16; \ - mov ar.k5=r17; \ - ;; \ - ld8 r16=[r2],16; \ - ld8 r17=[r3],16; \ - ;; \ - mov ar.k6=r16; \ - mov ar.k7=r17; \ - ;; - - - - /* - * r32: context_t base address - */ -#define SAVE_APP_REGS \ - add r2 = CTX(BSPSTORE),r32; \ - mov r16 = ar.bspstore; \ - ;; \ - st8 [r2] = r16,CTX(RNAT)-CTX(BSPSTORE);\ - mov r16 = ar.rnat; \ - ;; \ - st8 [r2] = r16,CTX(FCR)-CTX(RNAT); \ - mov r16 = ar.fcr; \ - ;; \ - st8 [r2] = r16,CTX(EFLAG)-CTX(FCR); \ - mov r16 = ar.eflag; \ - ;; \ - st8 [r2] = r16,CTX(CFLG)-CTX(EFLAG); \ - mov r16 = ar.cflg; \ - ;; \ - st8 [r2] = r16,CTX(FSR)-CTX(CFLG); \ - mov r16 = ar.fsr; \ - ;; \ - st8 [r2] = r16,CTX(FIR)-CTX(FSR); \ - mov r16 = ar.fir; \ - ;; \ - st8 [r2] = r16,CTX(FDR)-CTX(FIR); \ - mov r16 = ar.fdr; \ - ;; \ - st8 [r2] = r16,CTX(UNAT)-CTX(FDR); \ - mov r16 = ar.unat; \ - ;; \ - st8 [r2] = r16,CTX(FPSR)-CTX(UNAT); \ - mov r16 = ar.fpsr; \ - ;; \ - st8 [r2] = r16,CTX(PFS)-CTX(FPSR); \ - mov r16 = ar.pfs; \ - ;; \ - st8 [r2] = r16,CTX(LC)-CTX(PFS); \ - mov r16 = ar.lc; \ - ;; \ - st8 [r2] = r16; \ - ;; - - /* - * r33: context_t base address - */ -#define RESTORE_APP_REGS \ - add r2=CTX(BSPSTORE),r33; \ - ;; \ - ld8 r16=[r2],CTX(RNAT)-CTX(BSPSTORE); \ - ;; \ - mov ar.bspstore=r16; \ - ld8 r16=[r2],CTX(FCR)-CTX(RNAT); \ - ;; \ - mov ar.rnat=r16; \ - ld8 r16=[r2],CTX(EFLAG)-CTX(FCR); \ - ;; \ - mov ar.fcr=r16; \ - ld8 r16=[r2],CTX(CFLG)-CTX(EFLAG); \ - ;; \ - mov ar.eflag=r16; \ - ld8 r16=[r2],CTX(FSR)-CTX(CFLG); \ - ;; \ - mov ar.cflg=r16; \ - ld8 r16=[r2],CTX(FIR)-CTX(FSR); \ - ;; \ - mov ar.fsr=r16; \ - ld8 r16=[r2],CTX(FDR)-CTX(FIR); \ - ;; \ - mov ar.fir=r16; \ - ld8 r16=[r2],CTX(UNAT)-CTX(FDR); \ - ;; \ - mov ar.fdr=r16; \ - ld8 r16=[r2],CTX(FPSR)-CTX(UNAT); \ - ;; \ - mov ar.unat=r16; \ - ld8 r16=[r2],CTX(PFS)-CTX(FPSR); \ - ;; \ - mov ar.fpsr=r16; \ - ld8 r16=[r2],CTX(LC)-CTX(PFS); \ - ;; \ - mov ar.pfs=r16; \ - ld8 r16=[r2]; \ - ;; \ - mov ar.lc=r16; \ - ;; - - /* - * r32: context_t base address - */ -#define SAVE_CTL_REGS \ - add r2 = CTX(DCR),r32; \ - mov r16 = cr.dcr; \ - ;; \ - st8 [r2] = r16,CTX(IVA)-CTX(DCR); \ - ;; \ - mov r16 = cr.iva; \ - ;; \ - st8 [r2] = r16,CTX(PTA)-CTX(IVA); \ - ;; \ - mov r16 = cr.pta; \ - ;; \ - st8 [r2] = r16 ; \ - ;; - - /* - * r33: context_t base address - */ -#define RESTORE_CTL_REGS \ - add r2 = CTX(DCR),r33; \ - ;; \ - ld8 r16 = [r2],CTX(IVA)-CTX(DCR); \ - ;; \ - mov cr.dcr = r16; \ - dv_serialize_data; \ - ;; \ - ld8 r16 = [r2],CTX(PTA)-CTX(IVA); \ - ;; \ - mov cr.iva = r16; \ - dv_serialize_data; \ - ;; \ - ld8 r16 = [r2]; \ - ;; \ - mov cr.pta = r16; \ - dv_serialize_data; \ - ;; - - - /* - * r32: context_t base address - */ -#define SAVE_REGION_REGS \ - add r2=CTX(RR0),r32; \ - mov r16=rr[r0]; \ - dep.z r18=1,61,3; \ - ;; \ - st8 [r2]=r16,8; \ - mov r17=rr[r18]; \ - dep.z r18=2,61,3; \ - ;; \ - st8 [r2]=r17,8; \ - mov r16=rr[r18]; \ - dep.z r18=3,61,3; \ - ;; \ - st8 [r2]=r16,8; \ - mov r17=rr[r18]; \ - dep.z r18=4,61,3; \ - ;; \ - st8 [r2]=r17,8; \ - mov r16=rr[r18]; \ - dep.z r18=5,61,3; \ - ;; \ - st8 [r2]=r16,8; \ - mov r17=rr[r18]; \ - dep.z r18=7,61,3; \ - ;; \ - st8 [r2]=r17,16; \ - mov r16=rr[r18]; \ - ;; \ - st8 [r2]=r16,8; \ - ;; - - /* - * r33:context_t base address - */ -#define RESTORE_REGION_REGS \ - add r2=CTX(RR0),r33;\ - mov r18=r0; \ - ;; \ - ld8 r20=[r2],8; \ - ;; /* rr0 */ \ - ld8 r21=[r2],8; \ - ;; /* rr1 */ \ - ld8 r22=[r2],8; \ - ;; /* rr2 */ \ - ld8 r23=[r2],8; \ - ;; /* rr3 */ \ - ld8 r24=[r2],8; \ - ;; /* rr4 */ \ - ld8 r25=[r2],16; \ - ;; /* rr5 */ \ - ld8 r27=[r2]; \ - ;; /* rr7 */ \ - mov rr[r18]=r20; \ - dep.z r18=1,61,3; \ - ;; /* rr1 */ \ - mov rr[r18]=r21; \ - dep.z r18=2,61,3; \ - ;; /* rr2 */ \ - mov rr[r18]=r22; \ - dep.z r18=3,61,3; \ - ;; /* rr3 */ \ - mov rr[r18]=r23; \ - dep.z r18=4,61,3; \ - ;; /* rr4 */ \ - mov rr[r18]=r24; \ - dep.z r18=5,61,3; \ - ;; /* rr5 */ \ - mov rr[r18]=r25; \ - dep.z r18=7,61,3; \ - ;; /* rr7 */ \ - mov rr[r18]=r27; \ - ;; \ - srlz.i; \ - ;; - - - - /* - * r32: context_t base address - * r36~r39:scratch registers - */ -#define SAVE_DEBUG_REGS \ - add r2=CTX(IBR0),r32; \ - add r3=CTX(DBR0),r32; \ - mov r16=ibr[r0]; \ - mov r17=dbr[r0]; \ - ;; \ - st8 [r2]=r16,8; \ - st8 [r3]=r17,8; \ - add r18=1,r0; \ - ;; \ - mov r16=ibr[r18]; \ - mov r17=dbr[r18]; \ - ;; \ - st8 [r2]=r16,8; \ - st8 [r3]=r17,8; \ - add r18=2,r0; \ - ;; \ - mov r16=ibr[r18]; \ - mov r17=dbr[r18]; \ - ;; \ - st8 [r2]=r16,8; \ - st8 [r3]=r17,8; \ - add r18=2,r0; \ - ;; \ - mov r16=ibr[r18]; \ - mov r17=dbr[r18]; \ - ;; \ - st8 [r2]=r16,8; \ - st8 [r3]=r17,8; \ - add r18=3,r0; \ - ;; \ - mov r16=ibr[r18]; \ - mov r17=dbr[r18]; \ - ;; \ - st8 [r2]=r16,8; \ - st8 [r3]=r17,8; \ - add r18=4,r0; \ - ;; \ - mov r16=ibr[r18]; \ - mov r17=dbr[r18]; \ - ;; \ - st8 [r2]=r16,8; \ - st8 [r3]=r17,8; \ - add r18=5,r0; \ - ;; \ - mov r16=ibr[r18]; \ - mov r17=dbr[r18]; \ - ;; \ - st8 [r2]=r16,8; \ - st8 [r3]=r17,8; \ - add r18=6,r0; \ - ;; \ - mov r16=ibr[r18]; \ - mov r17=dbr[r18]; \ - ;; \ - st8 [r2]=r16,8; \ - st8 [r3]=r17,8; \ - add r18=7,r0; \ - ;; \ - mov r16=ibr[r18]; \ - mov r17=dbr[r18]; \ - ;; \ - st8 [r2]=r16,8; \ - st8 [r3]=r17,8; \ - ;; - - -/* - * r33: point to context_t structure - * ar.lc are corrupted. - */ -#define RESTORE_DEBUG_REGS \ - add r2=CTX(IBR0),r33; \ - add r3=CTX(DBR0),r33; \ - mov r16=7; \ - mov r17=r0; \ - ;; \ - mov ar.lc = r16; \ - ;; \ -1: \ - ld8 r18=[r2],8; \ - ld8 r19=[r3],8; \ - ;; \ - mov ibr[r17]=r18; \ - mov dbr[r17]=r19; \ - ;; \ - srlz.i; \ - ;; \ - add r17=1,r17; \ - br.cloop.sptk 1b; \ - ;; - - - /* - * r32: context_t base address - */ -#define SAVE_FPU_LOW \ - add r2=CTX(F2),r32; \ - add r3=CTX(F3),r32; \ - ;; \ - stf.spill.nta [r2]=f2,32; \ - stf.spill.nta [r3]=f3,32; \ - ;; \ - stf.spill.nta [r2]=f4,32; \ - stf.spill.nta [r3]=f5,32; \ - ;; \ - stf.spill.nta [r2]=f6,32; \ - stf.spill.nta [r3]=f7,32; \ - ;; \ - stf.spill.nta [r2]=f8,32; \ - stf.spill.nta [r3]=f9,32; \ - ;; \ - stf.spill.nta [r2]=f10,32; \ - stf.spill.nta [r3]=f11,32; \ - ;; \ - stf.spill.nta [r2]=f12,32; \ - stf.spill.nta [r3]=f13,32; \ - ;; \ - stf.spill.nta [r2]=f14,32; \ - stf.spill.nta [r3]=f15,32; \ - ;; \ - stf.spill.nta [r2]=f16,32; \ - stf.spill.nta [r3]=f17,32; \ - ;; \ - stf.spill.nta [r2]=f18,32; \ - stf.spill.nta [r3]=f19,32; \ - ;; \ - stf.spill.nta [r2]=f20,32; \ - stf.spill.nta [r3]=f21,32; \ - ;; \ - stf.spill.nta [r2]=f22,32; \ - stf.spill.nta [r3]=f23,32; \ - ;; \ - stf.spill.nta [r2]=f24,32; \ - stf.spill.nta [r3]=f25,32; \ - ;; \ - stf.spill.nta [r2]=f26,32; \ - stf.spill.nta [r3]=f27,32; \ - ;; \ - stf.spill.nta [r2]=f28,32; \ - stf.spill.nta [r3]=f29,32; \ - ;; \ - stf.spill.nta [r2]=f30; \ - stf.spill.nta [r3]=f31; \ - ;; - - /* - * r32: context_t base address - */ -#define SAVE_FPU_HIGH \ - add r2=CTX(F32),r32; \ - add r3=CTX(F33),r32; \ - ;; \ - stf.spill.nta [r2]=f32,32; \ - stf.spill.nta [r3]=f33,32; \ - ;; \ - stf.spill.nta [r2]=f34,32; \ - stf.spill.nta [r3]=f35,32; \ - ;; \ - stf.spill.nta [r2]=f36,32; \ - stf.spill.nta [r3]=f37,32; \ - ;; \ - stf.spill.nta [r2]=f38,32; \ - stf.spill.nta [r3]=f39,32; \ - ;; \ - stf.spill.nta [r2]=f40,32; \ - stf.spill.nta [r3]=f41,32; \ - ;; \ - stf.spill.nta [r2]=f42,32; \ - stf.spill.nta [r3]=f43,32; \ - ;; \ - stf.spill.nta [r2]=f44,32; \ - stf.spill.nta [r3]=f45,32; \ - ;; \ - stf.spill.nta [r2]=f46,32; \ - stf.spill.nta [r3]=f47,32; \ - ;; \ - stf.spill.nta [r2]=f48,32; \ - stf.spill.nta [r3]=f49,32; \ - ;; \ - stf.spill.nta [r2]=f50,32; \ - stf.spill.nta [r3]=f51,32; \ - ;; \ - stf.spill.nta [r2]=f52,32; \ - stf.spill.nta [r3]=f53,32; \ - ;; \ - stf.spill.nta [r2]=f54,32; \ - stf.spill.nta [r3]=f55,32; \ - ;; \ - stf.spill.nta [r2]=f56,32; \ - stf.spill.nta [r3]=f57,32; \ - ;; \ - stf.spill.nta [r2]=f58,32; \ - stf.spill.nta [r3]=f59,32; \ - ;; \ - stf.spill.nta [r2]=f60,32; \ - stf.spill.nta [r3]=f61,32; \ - ;; \ - stf.spill.nta [r2]=f62,32; \ - stf.spill.nta [r3]=f63,32; \ - ;; \ - stf.spill.nta [r2]=f64,32; \ - stf.spill.nta [r3]=f65,32; \ - ;; \ - stf.spill.nta [r2]=f66,32; \ - stf.spill.nta [r3]=f67,32; \ - ;; \ - stf.spill.nta [r2]=f68,32; \ - stf.spill.nta [r3]=f69,32; \ - ;; \ - stf.spill.nta [r2]=f70,32; \ - stf.spill.nta [r3]=f71,32; \ - ;; \ - stf.spill.nta [r2]=f72,32; \ - stf.spill.nta [r3]=f73,32; \ - ;; \ - stf.spill.nta [r2]=f74,32; \ - stf.spill.nta [r3]=f75,32; \ - ;; \ - stf.spill.nta [r2]=f76,32; \ - stf.spill.nta [r3]=f77,32; \ - ;; \ - stf.spill.nta [r2]=f78,32; \ - stf.spill.nta [r3]=f79,32; \ - ;; \ - stf.spill.nta [r2]=f80,32; \ - stf.spill.nta [r3]=f81,32; \ - ;; \ - stf.spill.nta [r2]=f82,32; \ - stf.spill.nta [r3]=f83,32; \ - ;; \ - stf.spill.nta [r2]=f84,32; \ - stf.spill.nta [r3]=f85,32; \ - ;; \ - stf.spill.nta [r2]=f86,32; \ - stf.spill.nta [r3]=f87,32; \ - ;; \ - stf.spill.nta [r2]=f88,32; \ - stf.spill.nta [r3]=f89,32; \ - ;; \ - stf.spill.nta [r2]=f90,32; \ - stf.spill.nta [r3]=f91,32; \ - ;; \ - stf.spill.nta [r2]=f92,32; \ - stf.spill.nta [r3]=f93,32; \ - ;; \ - stf.spill.nta [r2]=f94,32; \ - stf.spill.nta [r3]=f95,32; \ - ;; \ - stf.spill.nta [r2]=f96,32; \ - stf.spill.nta [r3]=f97,32; \ - ;; \ - stf.spill.nta [r2]=f98,32; \ - stf.spill.nta [r3]=f99,32; \ - ;; \ - stf.spill.nta [r2]=f100,32; \ - stf.spill.nta [r3]=f101,32; \ - ;; \ - stf.spill.nta [r2]=f102,32; \ - stf.spill.nta [r3]=f103,32; \ - ;; \ - stf.spill.nta [r2]=f104,32; \ - stf.spill.nta [r3]=f105,32; \ - ;; \ - stf.spill.nta [r2]=f106,32; \ - stf.spill.nta [r3]=f107,32; \ - ;; \ - stf.spill.nta [r2]=f108,32; \ - stf.spill.nta [r3]=f109,32; \ - ;; \ - stf.spill.nta [r2]=f110,32; \ - stf.spill.nta [r3]=f111,32; \ - ;; \ - stf.spill.nta [r2]=f112,32; \ - stf.spill.nta [r3]=f113,32; \ - ;; \ - stf.spill.nta [r2]=f114,32; \ - stf.spill.nta [r3]=f115,32; \ - ;; \ - stf.spill.nta [r2]=f116,32; \ - stf.spill.nta [r3]=f117,32; \ - ;; \ - stf.spill.nta [r2]=f118,32; \ - stf.spill.nta [r3]=f119,32; \ - ;; \ - stf.spill.nta [r2]=f120,32; \ - stf.spill.nta [r3]=f121,32; \ - ;; \ - stf.spill.nta [r2]=f122,32; \ - stf.spill.nta [r3]=f123,32; \ - ;; \ - stf.spill.nta [r2]=f124,32; \ - stf.spill.nta [r3]=f125,32; \ - ;; \ - stf.spill.nta [r2]=f126; \ - stf.spill.nta [r3]=f127; \ - ;; - - /* - * r33: point to context_t structure - */ -#define RESTORE_FPU_LOW \ - add r2 = CTX(F2), r33; \ - add r3 = CTX(F3), r33; \ - ;; \ - ldf.fill.nta f2 = [r2], 32; \ - ldf.fill.nta f3 = [r3], 32; \ - ;; \ - ldf.fill.nta f4 = [r2], 32; \ - ldf.fill.nta f5 = [r3], 32; \ - ;; \ - ldf.fill.nta f6 = [r2], 32; \ - ldf.fill.nta f7 = [r3], 32; \ - ;; \ - ldf.fill.nta f8 = [r2], 32; \ - ldf.fill.nta f9 = [r3], 32; \ - ;; \ - ldf.fill.nta f10 = [r2], 32; \ - ldf.fill.nta f11 = [r3], 32; \ - ;; \ - ldf.fill.nta f12 = [r2], 32; \ - ldf.fill.nta f13 = [r3], 32; \ - ;; \ - ldf.fill.nta f14 = [r2], 32; \ - ldf.fill.nta f15 = [r3], 32; \ - ;; \ - ldf.fill.nta f16 = [r2], 32; \ - ldf.fill.nta f17 = [r3], 32; \ - ;; \ - ldf.fill.nta f18 = [r2], 32; \ - ldf.fill.nta f19 = [r3], 32; \ - ;; \ - ldf.fill.nta f20 = [r2], 32; \ - ldf.fill.nta f21 = [r3], 32; \ - ;; \ - ldf.fill.nta f22 = [r2], 32; \ - ldf.fill.nta f23 = [r3], 32; \ - ;; \ - ldf.fill.nta f24 = [r2], 32; \ - ldf.fill.nta f25 = [r3], 32; \ - ;; \ - ldf.fill.nta f26 = [r2], 32; \ - ldf.fill.nta f27 = [r3], 32; \ - ;; \ - ldf.fill.nta f28 = [r2], 32; \ - ldf.fill.nta f29 = [r3], 32; \ - ;; \ - ldf.fill.nta f30 = [r2], 32; \ - ldf.fill.nta f31 = [r3], 32; \ - ;; - - - - /* - * r33: point to context_t structure - */ -#define RESTORE_FPU_HIGH \ - add r2 = CTX(F32), r33; \ - add r3 = CTX(F33), r33; \ - ;; \ - ldf.fill.nta f32 = [r2], 32; \ - ldf.fill.nta f33 = [r3], 32; \ - ;; \ - ldf.fill.nta f34 = [r2], 32; \ - ldf.fill.nta f35 = [r3], 32; \ - ;; \ - ldf.fill.nta f36 = [r2], 32; \ - ldf.fill.nta f37 = [r3], 32; \ - ;; \ - ldf.fill.nta f38 = [r2], 32; \ - ldf.fill.nta f39 = [r3], 32; \ - ;; \ - ldf.fill.nta f40 = [r2], 32; \ - ldf.fill.nta f41 = [r3], 32; \ - ;; \ - ldf.fill.nta f42 = [r2], 32; \ - ldf.fill.nta f43 = [r3], 32; \ - ;; \ - ldf.fill.nta f44 = [r2], 32; \ - ldf.fill.nta f45 = [r3], 32; \ - ;; \ - ldf.fill.nta f46 = [r2], 32; \ - ldf.fill.nta f47 = [r3], 32; \ - ;; \ - ldf.fill.nta f48 = [r2], 32; \ - ldf.fill.nta f49 = [r3], 32; \ - ;; \ - ldf.fill.nta f50 = [r2], 32; \ - ldf.fill.nta f51 = [r3], 32; \ - ;; \ - ldf.fill.nta f52 = [r2], 32; \ - ldf.fill.nta f53 = [r3], 32; \ - ;; \ - ldf.fill.nta f54 = [r2], 32; \ - ldf.fill.nta f55 = [r3], 32; \ - ;; \ - ldf.fill.nta f56 = [r2], 32; \ - ldf.fill.nta f57 = [r3], 32; \ - ;; \ - ldf.fill.nta f58 = [r2], 32; \ - ldf.fill.nta f59 = [r3], 32; \ - ;; \ - ldf.fill.nta f60 = [r2], 32; \ - ldf.fill.nta f61 = [r3], 32; \ - ;; \ - ldf.fill.nta f62 = [r2], 32; \ - ldf.fill.nta f63 = [r3], 32; \ - ;; \ - ldf.fill.nta f64 = [r2], 32; \ - ldf.fill.nta f65 = [r3], 32; \ - ;; \ - ldf.fill.nta f66 = [r2], 32; \ - ldf.fill.nta f67 = [r3], 32; \ - ;; \ - ldf.fill.nta f68 = [r2], 32; \ - ldf.fill.nta f69 = [r3], 32; \ - ;; \ - ldf.fill.nta f70 = [r2], 32; \ - ldf.fill.nta f71 = [r3], 32; \ - ;; \ - ldf.fill.nta f72 = [r2], 32; \ - ldf.fill.nta f73 = [r3], 32; \ - ;; \ - ldf.fill.nta f74 = [r2], 32; \ - ldf.fill.nta f75 = [r3], 32; \ - ;; \ - ldf.fill.nta f76 = [r2], 32; \ - ldf.fill.nta f77 = [r3], 32; \ - ;; \ - ldf.fill.nta f78 = [r2], 32; \ - ldf.fill.nta f79 = [r3], 32; \ - ;; \ - ldf.fill.nta f80 = [r2], 32; \ - ldf.fill.nta f81 = [r3], 32; \ - ;; \ - ldf.fill.nta f82 = [r2], 32; \ - ldf.fill.nta f83 = [r3], 32; \ - ;; \ - ldf.fill.nta f84 = [r2], 32; \ - ldf.fill.nta f85 = [r3], 32; \ - ;; \ - ldf.fill.nta f86 = [r2], 32; \ - ldf.fill.nta f87 = [r3], 32; \ - ;; \ - ldf.fill.nta f88 = [r2], 32; \ - ldf.fill.nta f89 = [r3], 32; \ - ;; \ - ldf.fill.nta f90 = [r2], 32; \ - ldf.fill.nta f91 = [r3], 32; \ - ;; \ - ldf.fill.nta f92 = [r2], 32; \ - ldf.fill.nta f93 = [r3], 32; \ - ;; \ - ldf.fill.nta f94 = [r2], 32; \ - ldf.fill.nta f95 = [r3], 32; \ - ;; \ - ldf.fill.nta f96 = [r2], 32; \ - ldf.fill.nta f97 = [r3], 32; \ - ;; \ - ldf.fill.nta f98 = [r2], 32; \ - ldf.fill.nta f99 = [r3], 32; \ - ;; \ - ldf.fill.nta f100 = [r2], 32; \ - ldf.fill.nta f101 = [r3], 32; \ - ;; \ - ldf.fill.nta f102 = [r2], 32; \ - ldf.fill.nta f103 = [r3], 32; \ - ;; \ - ldf.fill.nta f104 = [r2], 32; \ - ldf.fill.nta f105 = [r3], 32; \ - ;; \ - ldf.fill.nta f106 = [r2], 32; \ - ldf.fill.nta f107 = [r3], 32; \ - ;; \ - ldf.fill.nta f108 = [r2], 32; \ - ldf.fill.nta f109 = [r3], 32; \ - ;; \ - ldf.fill.nta f110 = [r2], 32; \ - ldf.fill.nta f111 = [r3], 32; \ - ;; \ - ldf.fill.nta f112 = [r2], 32; \ - ldf.fill.nta f113 = [r3], 32; \ - ;; \ - ldf.fill.nta f114 = [r2], 32; \ - ldf.fill.nta f115 = [r3], 32; \ - ;; \ - ldf.fill.nta f116 = [r2], 32; \ - ldf.fill.nta f117 = [r3], 32; \ - ;; \ - ldf.fill.nta f118 = [r2], 32; \ - ldf.fill.nta f119 = [r3], 32; \ - ;; \ - ldf.fill.nta f120 = [r2], 32; \ - ldf.fill.nta f121 = [r3], 32; \ - ;; \ - ldf.fill.nta f122 = [r2], 32; \ - ldf.fill.nta f123 = [r3], 32; \ - ;; \ - ldf.fill.nta f124 = [r2], 32; \ - ldf.fill.nta f125 = [r3], 32; \ - ;; \ - ldf.fill.nta f126 = [r2], 32; \ - ldf.fill.nta f127 = [r3], 32; \ - ;; - - /* - * r32: context_t base address - */ -#define SAVE_PTK_REGS \ - add r2=CTX(PKR0), r32; \ - mov r16=7; \ - ;; \ - mov ar.lc=r16; \ - mov r17=r0; \ - ;; \ -1: \ - mov r18=pkr[r17]; \ - ;; \ - srlz.i; \ - ;; \ - st8 [r2]=r18, 8; \ - ;; \ - add r17 =1,r17; \ - ;; \ - br.cloop.sptk 1b; \ - ;; - -/* - * r33: point to context_t structure - * ar.lc are corrupted. - */ -#define RESTORE_PTK_REGS \ - add r2=CTX(PKR0), r33; \ - mov r16=7; \ - ;; \ - mov ar.lc=r16; \ - mov r17=r0; \ - ;; \ -1: \ - ld8 r18=[r2], 8; \ - ;; \ - mov pkr[r17]=r18; \ - ;; \ - srlz.i; \ - ;; \ - add r17 =1,r17; \ - ;; \ - br.cloop.sptk 1b; \ - ;; - - -/* - * void vmm_trampoline( context_t * from, - * context_t * to) - * - * from: r32 - * to: r33 - * note: interrupt disabled before call this function. - */ -GLOBAL_ENTRY(vmm_trampoline) - mov r16 = psr - adds r2 = CTX(PSR), r32 - ;; - st8 [r2] = r16, 8 // psr - mov r17 = pr - ;; - st8 [r2] = r17, 8 // pr - mov r18 = ar.unat - ;; - st8 [r2] = r18 - mov r17 = ar.rsc - ;; - adds r2 = CTX(RSC),r32 - ;; - st8 [r2]= r17 - mov ar.rsc =0 - flushrs - ;; - SAVE_GENERAL_REGS - ;; - SAVE_KERNEL_REGS - ;; - SAVE_APP_REGS - ;; - SAVE_BRANCH_REGS - ;; - SAVE_CTL_REGS - ;; - SAVE_REGION_REGS - ;; - //SAVE_DEBUG_REGS - ;; - rsm psr.dfl - ;; - srlz.d - ;; - SAVE_FPU_LOW - ;; - rsm psr.dfh - ;; - srlz.d - ;; - SAVE_FPU_HIGH - ;; - SAVE_PTK_REGS - ;; - RESTORE_PTK_REGS - ;; - RESTORE_FPU_HIGH - ;; - RESTORE_FPU_LOW - ;; - //RESTORE_DEBUG_REGS - ;; - RESTORE_REGION_REGS - ;; - RESTORE_CTL_REGS - ;; - RESTORE_BRANCH_REGS - ;; - RESTORE_APP_REGS - ;; - RESTORE_KERNEL_REGS - ;; - RESTORE_GENERAL_REGS - ;; - adds r2=CTX(PSR), r33 - ;; - ld8 r16=[r2], 8 // psr - ;; - mov psr.l=r16 - ;; - srlz.d - ;; - ld8 r16=[r2], 8 // pr - ;; - mov pr =r16,-1 - ld8 r16=[r2] // unat - ;; - mov ar.unat=r16 - ;; - adds r2=CTX(RSC),r33 - ;; - ld8 r16 =[r2] - ;; - mov ar.rsc = r16 - ;; - br.ret.sptk.few b0 -END(vmm_trampoline) diff --git a/arch/ia64/kvm/vcpu.c b/arch/ia64/kvm/vcpu.c deleted file mode 100644 index 958815c9787..00000000000 --- a/arch/ia64/kvm/vcpu.c +++ /dev/null @@ -1,2209 +0,0 @@ -/* - * kvm_vcpu.c: handling all virtual cpu related thing. - * Copyright (c) 2005, Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple - * Place - Suite 330, Boston, MA 02111-1307 USA. - * - * Shaofan Li (Susue Li) <susie.li@intel.com> - * Yaozu Dong (Eddie Dong) (Eddie.dong@intel.com) - * Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com) - * Xiantao Zhang <xiantao.zhang@intel.com> - */ - -#include <linux/kvm_host.h> -#include <linux/types.h> - -#include <asm/processor.h> -#include <asm/ia64regs.h> -#include <asm/gcc_intrin.h> -#include <asm/kregs.h> -#include <asm/pgtable.h> -#include <asm/tlb.h> - -#include "asm-offsets.h" -#include "vcpu.h" - -/* - * Special notes: - * - Index by it/dt/rt sequence - * - Only existing mode transitions are allowed in this table - * - RSE is placed at lazy mode when emulating guest partial mode - * - If gva happens to be rr0 and rr4, only allowed case is identity - * mapping (gva=gpa), or panic! (How?) - */ -int mm_switch_table[8][8] = { - /* 2004/09/12(Kevin): Allow switch to self */ - /* - * (it,dt,rt): (0,0,0) -> (1,1,1) - * This kind of transition usually occurs in the very early - * stage of Linux boot up procedure. Another case is in efi - * and pal calls. (see "arch/ia64/kernel/head.S") - * - * (it,dt,rt): (0,0,0) -> (0,1,1) - * This kind of transition is found when OSYa exits efi boot - * service. Due to gva = gpa in this case (Same region), - * data access can be satisfied though itlb entry for physical - * emulation is hit. - */ - {SW_SELF, 0, 0, SW_NOP, 0, 0, 0, SW_P2V}, - {0, 0, 0, 0, 0, 0, 0, 0}, - {0, 0, 0, 0, 0, 0, 0, 0}, - /* - * (it,dt,rt): (0,1,1) -> (1,1,1) - * This kind of transition is found in OSYa. - * - * (it,dt,rt): (0,1,1) -> (0,0,0) - * This kind of transition is found in OSYa - */ - {SW_NOP, 0, 0, SW_SELF, 0, 0, 0, SW_P2V}, - /* (1,0,0)->(1,1,1) */ - {0, 0, 0, 0, 0, 0, 0, SW_P2V}, - /* - * (it,dt,rt): (1,0,1) -> (1,1,1) - * This kind of transition usually occurs when Linux returns - * from the low level TLB miss handlers. - * (see "arch/ia64/kernel/ivt.S") - */ - {0, 0, 0, 0, 0, SW_SELF, 0, SW_P2V}, - {0, 0, 0, 0, 0, 0, 0, 0}, - /* - * (it,dt,rt): (1,1,1) -> (1,0,1) - * This kind of transition usually occurs in Linux low level - * TLB miss handler. (see "arch/ia64/kernel/ivt.S") - * - * (it,dt,rt): (1,1,1) -> (0,0,0) - * This kind of transition usually occurs in pal and efi calls, - * which requires running in physical mode. - * (see "arch/ia64/kernel/head.S") - * (1,1,1)->(1,0,0) - */ - - {SW_V2P, 0, 0, 0, SW_V2P, SW_V2P, 0, SW_SELF}, -}; - -void physical_mode_init(struct kvm_vcpu *vcpu) -{ - vcpu->arch.mode_flags = GUEST_IN_PHY; -} - -void switch_to_physical_rid(struct kvm_vcpu *vcpu) -{ - unsigned long psr; - - /* Save original virtual mode rr[0] and rr[4] */ - psr = ia64_clear_ic(); - ia64_set_rr(VRN0<<VRN_SHIFT, vcpu->arch.metaphysical_rr0); - ia64_srlz_d(); - ia64_set_rr(VRN4<<VRN_SHIFT, vcpu->arch.metaphysical_rr4); - ia64_srlz_d(); - - ia64_set_psr(psr); - return; -} - -void switch_to_virtual_rid(struct kvm_vcpu *vcpu) -{ - unsigned long psr; - - psr = ia64_clear_ic(); - ia64_set_rr(VRN0 << VRN_SHIFT, vcpu->arch.metaphysical_saved_rr0); - ia64_srlz_d(); - ia64_set_rr(VRN4 << VRN_SHIFT, vcpu->arch.metaphysical_saved_rr4); - ia64_srlz_d(); - ia64_set_psr(psr); - return; -} - -static int mm_switch_action(struct ia64_psr opsr, struct ia64_psr npsr) -{ - return mm_switch_table[MODE_IND(opsr)][MODE_IND(npsr)]; -} - -void switch_mm_mode(struct kvm_vcpu *vcpu, struct ia64_psr old_psr, - struct ia64_psr new_psr) -{ - int act; - act = mm_switch_action(old_psr, new_psr); - switch (act) { - case SW_V2P: - /*printk("V -> P mode transition: (0x%lx -> 0x%lx)\n", - old_psr.val, new_psr.val);*/ - switch_to_physical_rid(vcpu); - /* - * Set rse to enforced lazy, to prevent active rse - *save/restor when guest physical mode. - */ - vcpu->arch.mode_flags |= GUEST_IN_PHY; - break; - case SW_P2V: - switch_to_virtual_rid(vcpu); - /* - * recover old mode which is saved when entering - * guest physical mode - */ - vcpu->arch.mode_flags &= ~GUEST_IN_PHY; - break; - case SW_SELF: - break; - case SW_NOP: - break; - default: - /* Sanity check */ - break; - } - return; -} - -/* - * In physical mode, insert tc/tr for region 0 and 4 uses - * RID[0] and RID[4] which is for physical mode emulation. - * However what those inserted tc/tr wants is rid for - * virtual mode. So original virtual rid needs to be restored - * before insert. - * - * Operations which required such switch include: - * - insertions (itc.*, itr.*) - * - purges (ptc.* and ptr.*) - * - tpa - * - tak - * - thash?, ttag? - * All above needs actual virtual rid for destination entry. - */ - -void check_mm_mode_switch(struct kvm_vcpu *vcpu, struct ia64_psr old_psr, - struct ia64_psr new_psr) -{ - - if ((old_psr.dt != new_psr.dt) - || (old_psr.it != new_psr.it) - || (old_psr.rt != new_psr.rt)) - switch_mm_mode(vcpu, old_psr, new_psr); - - return; -} - - -/* - * In physical mode, insert tc/tr for region 0 and 4 uses - * RID[0] and RID[4] which is for physical mode emulation. - * However what those inserted tc/tr wants is rid for - * virtual mode. So original virtual rid needs to be restored - * before insert. - * - * Operations which required such switch include: - * - insertions (itc.*, itr.*) - * - purges (ptc.* and ptr.*) - * - tpa - * - tak - * - thash?, ttag? - * All above needs actual virtual rid for destination entry. - */ - -void prepare_if_physical_mode(struct kvm_vcpu *vcpu) -{ - if (is_physical_mode(vcpu)) { - vcpu->arch.mode_flags |= GUEST_PHY_EMUL; - switch_to_virtual_rid(vcpu); - } - return; -} - -/* Recover always follows prepare */ -void recover_if_physical_mode(struct kvm_vcpu *vcpu) -{ - if (is_physical_mode(vcpu)) - switch_to_physical_rid(vcpu); - vcpu->arch.mode_flags &= ~GUEST_PHY_EMUL; - return; -} - -#define RPT(x) ((u16) &((struct kvm_pt_regs *)0)->x) - -static u16 gr_info[32] = { - 0, /* r0 is read-only : WE SHOULD NEVER GET THIS */ - RPT(r1), RPT(r2), RPT(r3), - RPT(r4), RPT(r5), RPT(r6), RPT(r7), - RPT(r8), RPT(r9), RPT(r10), RPT(r11), - RPT(r12), RPT(r13), RPT(r14), RPT(r15), - RPT(r16), RPT(r17), RPT(r18), RPT(r19), - RPT(r20), RPT(r21), RPT(r22), RPT(r23), - RPT(r24), RPT(r25), RPT(r26), RPT(r27), - RPT(r28), RPT(r29), RPT(r30), RPT(r31) -}; - -#define IA64_FIRST_STACKED_GR 32 -#define IA64_FIRST_ROTATING_FR 32 - -static inline unsigned long -rotate_reg(unsigned long sor, unsigned long rrb, unsigned long reg) -{ - reg += rrb; - if (reg >= sor) - reg -= sor; - return reg; -} - -/* - * Return the (rotated) index for floating point register - * be in the REGNUM (REGNUM must range from 32-127, - * result is in the range from 0-95. - */ -static inline unsigned long fph_index(struct kvm_pt_regs *regs, - long regnum) -{ - unsigned long rrb_fr = (regs->cr_ifs >> 25) & 0x7f; - return rotate_reg(96, rrb_fr, (regnum - IA64_FIRST_ROTATING_FR)); -} - -/* - * The inverse of the above: given bspstore and the number of - * registers, calculate ar.bsp. - */ -static inline unsigned long *kvm_rse_skip_regs(unsigned long *addr, - long num_regs) -{ - long delta = ia64_rse_slot_num(addr) + num_regs; - int i = 0; - - if (num_regs < 0) - delta -= 0x3e; - if (delta < 0) { - while (delta <= -0x3f) { - i--; - delta += 0x3f; - } - } else { - while (delta >= 0x3f) { - i++; - delta -= 0x3f; - } - } - - return addr + num_regs + i; -} - -static void get_rse_reg(struct kvm_pt_regs *regs, unsigned long r1, - unsigned long *val, int *nat) -{ - unsigned long *bsp, *addr, *rnat_addr, *bspstore; - unsigned long *kbs = (void *) current_vcpu + VMM_RBS_OFFSET; - unsigned long nat_mask; - unsigned long old_rsc, new_rsc; - long sof = (regs->cr_ifs) & 0x7f; - long sor = (((regs->cr_ifs >> 14) & 0xf) << 3); - long rrb_gr = (regs->cr_ifs >> 18) & 0x7f; - long ridx = r1 - 32; - - if (ridx < sor) - ridx = rotate_reg(sor, rrb_gr, ridx); - - old_rsc = ia64_getreg(_IA64_REG_AR_RSC); - new_rsc = old_rsc&(~(0x3)); - ia64_setreg(_IA64_REG_AR_RSC, new_rsc); - - bspstore = (unsigned long *)ia64_getreg(_IA64_REG_AR_BSPSTORE); - bsp = kbs + (regs->loadrs >> 19); - - addr = kvm_rse_skip_regs(bsp, -sof + ridx); - nat_mask = 1UL << ia64_rse_slot_num(addr); - rnat_addr = ia64_rse_rnat_addr(addr); - - if (addr >= bspstore) { - ia64_flushrs(); - ia64_mf(); - bspstore = (unsigned long *)ia64_getreg(_IA64_REG_AR_BSPSTORE); - } - *val = *addr; - if (nat) { - if (bspstore < rnat_addr) - *nat = (int)!!(ia64_getreg(_IA64_REG_AR_RNAT) - & nat_mask); - else - *nat = (int)!!((*rnat_addr) & nat_mask); - ia64_setreg(_IA64_REG_AR_RSC, old_rsc); - } -} - -void set_rse_reg(struct kvm_pt_regs *regs, unsigned long r1, - unsigned long val, unsigned long nat) -{ - unsigned long *bsp, *bspstore, *addr, *rnat_addr; - unsigned long *kbs = (void *) current_vcpu + VMM_RBS_OFFSET; - unsigned long nat_mask; - unsigned long old_rsc, new_rsc, psr; - unsigned long rnat; - long sof = (regs->cr_ifs) & 0x7f; - long sor = (((regs->cr_ifs >> 14) & 0xf) << 3); - long rrb_gr = (regs->cr_ifs >> 18) & 0x7f; - long ridx = r1 - 32; - - if (ridx < sor) - ridx = rotate_reg(sor, rrb_gr, ridx); - - old_rsc = ia64_getreg(_IA64_REG_AR_RSC); - /* put RSC to lazy mode, and set loadrs 0 */ - new_rsc = old_rsc & (~0x3fff0003); - ia64_setreg(_IA64_REG_AR_RSC, new_rsc); - bsp = kbs + (regs->loadrs >> 19); /* 16 + 3 */ - - addr = kvm_rse_skip_regs(bsp, -sof + ridx); - nat_mask = 1UL << ia64_rse_slot_num(addr); - rnat_addr = ia64_rse_rnat_addr(addr); - - local_irq_save(psr); - bspstore = (unsigned long *)ia64_getreg(_IA64_REG_AR_BSPSTORE); - if (addr >= bspstore) { - - ia64_flushrs(); - ia64_mf(); - *addr = val; - bspstore = (unsigned long *)ia64_getreg(_IA64_REG_AR_BSPSTORE); - rnat = ia64_getreg(_IA64_REG_AR_RNAT); - if (bspstore < rnat_addr) - rnat = rnat & (~nat_mask); - else - *rnat_addr = (*rnat_addr)&(~nat_mask); - - ia64_mf(); - ia64_loadrs(); - ia64_setreg(_IA64_REG_AR_RNAT, rnat); - } else { - rnat = ia64_getreg(_IA64_REG_AR_RNAT); - *addr = val; - if (bspstore < rnat_addr) - rnat = rnat&(~nat_mask); - else - *rnat_addr = (*rnat_addr) & (~nat_mask); - - ia64_setreg(_IA64_REG_AR_BSPSTORE, (unsigned long)bspstore); - ia64_setreg(_IA64_REG_AR_RNAT, rnat); - } - local_irq_restore(psr); - ia64_setreg(_IA64_REG_AR_RSC, old_rsc); -} - -void getreg(unsigned long regnum, unsigned long *val, - int *nat, struct kvm_pt_regs *regs) -{ - unsigned long addr, *unat; - if (regnum >= IA64_FIRST_STACKED_GR) { - get_rse_reg(regs, regnum, val, nat); - return; - } - - /* - * Now look at registers in [0-31] range and init correct UNAT - */ - addr = (unsigned long)regs; - unat = ®s->eml_unat; - - addr += gr_info[regnum]; - - *val = *(unsigned long *)addr; - /* - * do it only when requested - */ - if (nat) - *nat = (*unat >> ((addr >> 3) & 0x3f)) & 0x1UL; -} - -void setreg(unsigned long regnum, unsigned long val, - int nat, struct kvm_pt_regs *regs) -{ - unsigned long addr; - unsigned long bitmask; - unsigned long *unat; - - /* - * First takes care of stacked registers - */ - if (regnum >= IA64_FIRST_STACKED_GR) { - set_rse_reg(regs, regnum, val, nat); - return; - } - - /* - * Now look at registers in [0-31] range and init correct UNAT - */ - addr = (unsigned long)regs; - unat = ®s->eml_unat; - /* - * add offset from base of struct - * and do it ! - */ - addr += gr_info[regnum]; - - *(unsigned long *)addr = val; - - /* - * We need to clear the corresponding UNAT bit to fully emulate the load - * UNAT bit_pos = GR[r3]{8:3} form EAS-2.4 - */ - bitmask = 1UL << ((addr >> 3) & 0x3f); - if (nat) - *unat |= bitmask; - else - *unat &= ~bitmask; - -} - -u64 vcpu_get_gr(struct kvm_vcpu *vcpu, unsigned long reg) -{ - struct kvm_pt_regs *regs = vcpu_regs(vcpu); - unsigned long val; - - if (!reg) - return 0; - getreg(reg, &val, 0, regs); - return val; -} - -void vcpu_set_gr(struct kvm_vcpu *vcpu, unsigned long reg, u64 value, int nat) -{ - struct kvm_pt_regs *regs = vcpu_regs(vcpu); - long sof = (regs->cr_ifs) & 0x7f; - - if (!reg) - return; - if (reg >= sof + 32) - return; - setreg(reg, value, nat, regs); /* FIXME: handle NATs later*/ -} - -void getfpreg(unsigned long regnum, struct ia64_fpreg *fpval, - struct kvm_pt_regs *regs) -{ - /* Take floating register rotation into consideration*/ - if (regnum >= IA64_FIRST_ROTATING_FR) - regnum = IA64_FIRST_ROTATING_FR + fph_index(regs, regnum); -#define CASE_FIXED_FP(reg) \ - case (reg) : \ - ia64_stf_spill(fpval, reg); \ - break - - switch (regnum) { - CASE_FIXED_FP(0); - CASE_FIXED_FP(1); - CASE_FIXED_FP(2); - CASE_FIXED_FP(3); - CASE_FIXED_FP(4); - CASE_FIXED_FP(5); - - CASE_FIXED_FP(6); - CASE_FIXED_FP(7); - CASE_FIXED_FP(8); - CASE_FIXED_FP(9); - CASE_FIXED_FP(10); - CASE_FIXED_FP(11); - - CASE_FIXED_FP(12); - CASE_FIXED_FP(13); - CASE_FIXED_FP(14); - CASE_FIXED_FP(15); - CASE_FIXED_FP(16); - CASE_FIXED_FP(17); - CASE_FIXED_FP(18); - CASE_FIXED_FP(19); - CASE_FIXED_FP(20); - CASE_FIXED_FP(21); - CASE_FIXED_FP(22); - CASE_FIXED_FP(23); - CASE_FIXED_FP(24); - CASE_FIXED_FP(25); - CASE_FIXED_FP(26); - CASE_FIXED_FP(27); - CASE_FIXED_FP(28); - CASE_FIXED_FP(29); - CASE_FIXED_FP(30); - CASE_FIXED_FP(31); - CASE_FIXED_FP(32); - CASE_FIXED_FP(33); - CASE_FIXED_FP(34); - CASE_FIXED_FP(35); - CASE_FIXED_FP(36); - CASE_FIXED_FP(37); - CASE_FIXED_FP(38); - CASE_FIXED_FP(39); - CASE_FIXED_FP(40); - CASE_FIXED_FP(41); - CASE_FIXED_FP(42); - CASE_FIXED_FP(43); - CASE_FIXED_FP(44); - CASE_FIXED_FP(45); - CASE_FIXED_FP(46); - CASE_FIXED_FP(47); - CASE_FIXED_FP(48); - CASE_FIXED_FP(49); - CASE_FIXED_FP(50); - CASE_FIXED_FP(51); - CASE_FIXED_FP(52); - CASE_FIXED_FP(53); - CASE_FIXED_FP(54); - CASE_FIXED_FP(55); - CASE_FIXED_FP(56); - CASE_FIXED_FP(57); - CASE_FIXED_FP(58); - CASE_FIXED_FP(59); - CASE_FIXED_FP(60); - CASE_FIXED_FP(61); - CASE_FIXED_FP(62); - CASE_FIXED_FP(63); - CASE_FIXED_FP(64); - CASE_FIXED_FP(65); - CASE_FIXED_FP(66); - CASE_FIXED_FP(67); - CASE_FIXED_FP(68); - CASE_FIXED_FP(69); - CASE_FIXED_FP(70); - CASE_FIXED_FP(71); - CASE_FIXED_FP(72); - CASE_FIXED_FP(73); - CASE_FIXED_FP(74); - CASE_FIXED_FP(75); - CASE_FIXED_FP(76); - CASE_FIXED_FP(77); - CASE_FIXED_FP(78); - CASE_FIXED_FP(79); - CASE_FIXED_FP(80); - CASE_FIXED_FP(81); - CASE_FIXED_FP(82); - CASE_FIXED_FP(83); - CASE_FIXED_FP(84); - CASE_FIXED_FP(85); - CASE_FIXED_FP(86); - CASE_FIXED_FP(87); - CASE_FIXED_FP(88); - CASE_FIXED_FP(89); - CASE_FIXED_FP(90); - CASE_FIXED_FP(91); - CASE_FIXED_FP(92); - CASE_FIXED_FP(93); - CASE_FIXED_FP(94); - CASE_FIXED_FP(95); - CASE_FIXED_FP(96); - CASE_FIXED_FP(97); - CASE_FIXED_FP(98); - CASE_FIXED_FP(99); - CASE_FIXED_FP(100); - CASE_FIXED_FP(101); - CASE_FIXED_FP(102); - CASE_FIXED_FP(103); - CASE_FIXED_FP(104); - CASE_FIXED_FP(105); - CASE_FIXED_FP(106); - CASE_FIXED_FP(107); - CASE_FIXED_FP(108); - CASE_FIXED_FP(109); - CASE_FIXED_FP(110); - CASE_FIXED_FP(111); - CASE_FIXED_FP(112); - CASE_FIXED_FP(113); - CASE_FIXED_FP(114); - CASE_FIXED_FP(115); - CASE_FIXED_FP(116); - CASE_FIXED_FP(117); - CASE_FIXED_FP(118); - CASE_FIXED_FP(119); - CASE_FIXED_FP(120); - CASE_FIXED_FP(121); - CASE_FIXED_FP(122); - CASE_FIXED_FP(123); - CASE_FIXED_FP(124); - CASE_FIXED_FP(125); - CASE_FIXED_FP(126); - CASE_FIXED_FP(127); - } -#undef CASE_FIXED_FP -} - -void setfpreg(unsigned long regnum, struct ia64_fpreg *fpval, - struct kvm_pt_regs *regs) -{ - /* Take floating register rotation into consideration*/ - if (regnum >= IA64_FIRST_ROTATING_FR) - regnum = IA64_FIRST_ROTATING_FR + fph_index(regs, regnum); - -#define CASE_FIXED_FP(reg) \ - case (reg) : \ - ia64_ldf_fill(reg, fpval); \ - break - - switch (regnum) { - CASE_FIXED_FP(2); - CASE_FIXED_FP(3); - CASE_FIXED_FP(4); - CASE_FIXED_FP(5); - - CASE_FIXED_FP(6); - CASE_FIXED_FP(7); - CASE_FIXED_FP(8); - CASE_FIXED_FP(9); - CASE_FIXED_FP(10); - CASE_FIXED_FP(11); - - CASE_FIXED_FP(12); - CASE_FIXED_FP(13); - CASE_FIXED_FP(14); - CASE_FIXED_FP(15); - CASE_FIXED_FP(16); - CASE_FIXED_FP(17); - CASE_FIXED_FP(18); - CASE_FIXED_FP(19); - CASE_FIXED_FP(20); - CASE_FIXED_FP(21); - CASE_FIXED_FP(22); - CASE_FIXED_FP(23); - CASE_FIXED_FP(24); - CASE_FIXED_FP(25); - CASE_FIXED_FP(26); - CASE_FIXED_FP(27); - CASE_FIXED_FP(28); - CASE_FIXED_FP(29); - CASE_FIXED_FP(30); - CASE_FIXED_FP(31); - CASE_FIXED_FP(32); - CASE_FIXED_FP(33); - CASE_FIXED_FP(34); - CASE_FIXED_FP(35); - CASE_FIXED_FP(36); - CASE_FIXED_FP(37); - CASE_FIXED_FP(38); - CASE_FIXED_FP(39); - CASE_FIXED_FP(40); - CASE_FIXED_FP(41); - CASE_FIXED_FP(42); - CASE_FIXED_FP(43); - CASE_FIXED_FP(44); - CASE_FIXED_FP(45); - CASE_FIXED_FP(46); - CASE_FIXED_FP(47); - CASE_FIXED_FP(48); - CASE_FIXED_FP(49); - CASE_FIXED_FP(50); - CASE_FIXED_FP(51); - CASE_FIXED_FP(52); - CASE_FIXED_FP(53); - CASE_FIXED_FP(54); - CASE_FIXED_FP(55); - CASE_FIXED_FP(56); - CASE_FIXED_FP(57); - CASE_FIXED_FP(58); - CASE_FIXED_FP(59); - CASE_FIXED_FP(60); - CASE_FIXED_FP(61); - CASE_FIXED_FP(62); - CASE_FIXED_FP(63); - CASE_FIXED_FP(64); - CASE_FIXED_FP(65); - CASE_FIXED_FP(66); - CASE_FIXED_FP(67); - CASE_FIXED_FP(68); - CASE_FIXED_FP(69); - CASE_FIXED_FP(70); - CASE_FIXED_FP(71); - CASE_FIXED_FP(72); - CASE_FIXED_FP(73); - CASE_FIXED_FP(74); - CASE_FIXED_FP(75); - CASE_FIXED_FP(76); - CASE_FIXED_FP(77); - CASE_FIXED_FP(78); - CASE_FIXED_FP(79); - CASE_FIXED_FP(80); - CASE_FIXED_FP(81); - CASE_FIXED_FP(82); - CASE_FIXED_FP(83); - CASE_FIXED_FP(84); - CASE_FIXED_FP(85); - CASE_FIXED_FP(86); - CASE_FIXED_FP(87); - CASE_FIXED_FP(88); - CASE_FIXED_FP(89); - CASE_FIXED_FP(90); - CASE_FIXED_FP(91); - CASE_FIXED_FP(92); - CASE_FIXED_FP(93); - CASE_FIXED_FP(94); - CASE_FIXED_FP(95); - CASE_FIXED_FP(96); - CASE_FIXED_FP(97); - CASE_FIXED_FP(98); - CASE_FIXED_FP(99); - CASE_FIXED_FP(100); - CASE_FIXED_FP(101); - CASE_FIXED_FP(102); - CASE_FIXED_FP(103); - CASE_FIXED_FP(104); - CASE_FIXED_FP(105); - CASE_FIXED_FP(106); - CASE_FIXED_FP(107); - CASE_FIXED_FP(108); - CASE_FIXED_FP(109); - CASE_FIXED_FP(110); - CASE_FIXED_FP(111); - CASE_FIXED_FP(112); - CASE_FIXED_FP(113); - CASE_FIXED_FP(114); - CASE_FIXED_FP(115); - CASE_FIXED_FP(116); - CASE_FIXED_FP(117); - CASE_FIXED_FP(118); - CASE_FIXED_FP(119); - CASE_FIXED_FP(120); - CASE_FIXED_FP(121); - CASE_FIXED_FP(122); - CASE_FIXED_FP(123); - CASE_FIXED_FP(124); - CASE_FIXED_FP(125); - CASE_FIXED_FP(126); - CASE_FIXED_FP(127); - } -} - -void vcpu_get_fpreg(struct kvm_vcpu *vcpu, unsigned long reg, - struct ia64_fpreg *val) -{ - struct kvm_pt_regs *regs = vcpu_regs(vcpu); - - getfpreg(reg, val, regs); /* FIXME: handle NATs later*/ -} - -void vcpu_set_fpreg(struct kvm_vcpu *vcpu, unsigned long reg, - struct ia64_fpreg *val) -{ - struct kvm_pt_regs *regs = vcpu_regs(vcpu); - - if (reg > 1) - setfpreg(reg, val, regs); /* FIXME: handle NATs later*/ -} - -/* - * The Altix RTC is mapped specially here for the vmm module - */ -#define SN_RTC_BASE (u64 *)(KVM_VMM_BASE+(1UL<<KVM_VMM_SHIFT)) -static long kvm_get_itc(struct kvm_vcpu *vcpu) -{ -#if defined(CONFIG_IA64_SGI_SN2) || defined(CONFIG_IA64_GENERIC) - struct kvm *kvm = (struct kvm *)KVM_VM_BASE; - - if (kvm->arch.is_sn2) - return (*SN_RTC_BASE); - else -#endif - return ia64_getreg(_IA64_REG_AR_ITC); -} - -/************************************************************************ - * lsapic timer - ***********************************************************************/ -u64 vcpu_get_itc(struct kvm_vcpu *vcpu) -{ - unsigned long guest_itc; - guest_itc = VMX(vcpu, itc_offset) + kvm_get_itc(vcpu); - - if (guest_itc >= VMX(vcpu, last_itc)) { - VMX(vcpu, last_itc) = guest_itc; - return guest_itc; - } else - return VMX(vcpu, last_itc); -} - -static inline void vcpu_set_itm(struct kvm_vcpu *vcpu, u64 val); -static void vcpu_set_itc(struct kvm_vcpu *vcpu, u64 val) -{ - struct kvm_vcpu *v; - struct kvm *kvm; - int i; - long itc_offset = val - kvm_get_itc(vcpu); - unsigned long vitv = VCPU(vcpu, itv); - - kvm = (struct kvm *)KVM_VM_BASE; - - if (kvm_vcpu_is_bsp(vcpu)) { - for (i = 0; i < atomic_read(&kvm->online_vcpus); i++) { - v = (struct kvm_vcpu *)((char *)vcpu + - sizeof(struct kvm_vcpu_data) * i); - VMX(v, itc_offset) = itc_offset; - VMX(v, last_itc) = 0; - } - } - VMX(vcpu, last_itc) = 0; - if (VCPU(vcpu, itm) <= val) { - VMX(vcpu, itc_check) = 0; - vcpu_unpend_interrupt(vcpu, vitv); - } else { - VMX(vcpu, itc_check) = 1; - vcpu_set_itm(vcpu, VCPU(vcpu, itm)); - } - -} - -static inline u64 vcpu_get_itm(struct kvm_vcpu *vcpu) -{ - return ((u64)VCPU(vcpu, itm)); -} - -static inline void vcpu_set_itm(struct kvm_vcpu *vcpu, u64 val) -{ - unsigned long vitv = VCPU(vcpu, itv); - VCPU(vcpu, itm) = val; - - if (val > vcpu_get_itc(vcpu)) { - VMX(vcpu, itc_check) = 1; - vcpu_unpend_interrupt(vcpu, vitv); - VMX(vcpu, timer_pending) = 0; - } else - VMX(vcpu, itc_check) = 0; -} - -#define ITV_VECTOR(itv) (itv&0xff) -#define ITV_IRQ_MASK(itv) (itv&(1<<16)) - -static inline void vcpu_set_itv(struct kvm_vcpu *vcpu, u64 val) -{ - VCPU(vcpu, itv) = val; - if (!ITV_IRQ_MASK(val) && vcpu->arch.timer_pending) { - vcpu_pend_interrupt(vcpu, ITV_VECTOR(val)); - vcpu->arch.timer_pending = 0; - } -} - -static inline void vcpu_set_eoi(struct kvm_vcpu *vcpu, u64 val) -{ - int vec; - - vec = highest_inservice_irq(vcpu); - if (vec == NULL_VECTOR) - return; - VMX(vcpu, insvc[vec >> 6]) &= ~(1UL << (vec & 63)); - VCPU(vcpu, eoi) = 0; - vcpu->arch.irq_new_pending = 1; - -} - -/* See Table 5-8 in SDM vol2 for the definition */ -int irq_masked(struct kvm_vcpu *vcpu, int h_pending, int h_inservice) -{ - union ia64_tpr vtpr; - - vtpr.val = VCPU(vcpu, tpr); - - if (h_inservice == NMI_VECTOR) - return IRQ_MASKED_BY_INSVC; - - if (h_pending == NMI_VECTOR) { - /* Non Maskable Interrupt */ - return IRQ_NO_MASKED; - } - - if (h_inservice == ExtINT_VECTOR) - return IRQ_MASKED_BY_INSVC; - - if (h_pending == ExtINT_VECTOR) { - if (vtpr.mmi) { - /* mask all external IRQ */ - return IRQ_MASKED_BY_VTPR; - } else - return IRQ_NO_MASKED; - } - - if (is_higher_irq(h_pending, h_inservice)) { - if (is_higher_class(h_pending, vtpr.mic + (vtpr.mmi << 4))) - return IRQ_NO_MASKED; - else - return IRQ_MASKED_BY_VTPR; - } else { - return IRQ_MASKED_BY_INSVC; - } -} - -void vcpu_pend_interrupt(struct kvm_vcpu *vcpu, u8 vec) -{ - long spsr; - int ret; - - local_irq_save(spsr); - ret = test_and_set_bit(vec, &VCPU(vcpu, irr[0])); - local_irq_restore(spsr); - - vcpu->arch.irq_new_pending = 1; -} - -void vcpu_unpend_interrupt(struct kvm_vcpu *vcpu, u8 vec) -{ - long spsr; - int ret; - - local_irq_save(spsr); - ret = test_and_clear_bit(vec, &VCPU(vcpu, irr[0])); - local_irq_restore(spsr); - if (ret) { - vcpu->arch.irq_new_pending = 1; - wmb(); - } -} - -void update_vhpi(struct kvm_vcpu *vcpu, int vec) -{ - u64 vhpi; - - if (vec == NULL_VECTOR) - vhpi = 0; - else if (vec == NMI_VECTOR) - vhpi = 32; - else if (vec == ExtINT_VECTOR) - vhpi = 16; - else - vhpi = vec >> 4; - - VCPU(vcpu, vhpi) = vhpi; - if (VCPU(vcpu, vac).a_int) - ia64_call_vsa(PAL_VPS_SET_PENDING_INTERRUPT, - (u64)vcpu->arch.vpd, 0, 0, 0, 0, 0, 0); -} - -u64 vcpu_get_ivr(struct kvm_vcpu *vcpu) -{ - int vec, h_inservice, mask; - - vec = highest_pending_irq(vcpu); - h_inservice = highest_inservice_irq(vcpu); - mask = irq_masked(vcpu, vec, h_inservice); - if (vec == NULL_VECTOR || mask == IRQ_MASKED_BY_INSVC) { - if (VCPU(vcpu, vhpi)) - update_vhpi(vcpu, NULL_VECTOR); - return IA64_SPURIOUS_INT_VECTOR; - } - if (mask == IRQ_MASKED_BY_VTPR) { - update_vhpi(vcpu, vec); - return IA64_SPURIOUS_INT_VECTOR; - } - VMX(vcpu, insvc[vec >> 6]) |= (1UL << (vec & 63)); - vcpu_unpend_interrupt(vcpu, vec); - return (u64)vec; -} - -/************************************************************************** - Privileged operation emulation routines - **************************************************************************/ -u64 vcpu_thash(struct kvm_vcpu *vcpu, u64 vadr) -{ - union ia64_pta vpta; - union ia64_rr vrr; - u64 pval; - u64 vhpt_offset; - - vpta.val = vcpu_get_pta(vcpu); - vrr.val = vcpu_get_rr(vcpu, vadr); - vhpt_offset = ((vadr >> vrr.ps) << 3) & ((1UL << (vpta.size)) - 1); - if (vpta.vf) { - pval = ia64_call_vsa(PAL_VPS_THASH, vadr, vrr.val, - vpta.val, 0, 0, 0, 0); - } else { - pval = (vadr & VRN_MASK) | vhpt_offset | - (vpta.val << 3 >> (vpta.size + 3) << (vpta.size)); - } - return pval; -} - -u64 vcpu_ttag(struct kvm_vcpu *vcpu, u64 vadr) -{ - union ia64_rr vrr; - union ia64_pta vpta; - u64 pval; - - vpta.val = vcpu_get_pta(vcpu); - vrr.val = vcpu_get_rr(vcpu, vadr); - if (vpta.vf) { - pval = ia64_call_vsa(PAL_VPS_TTAG, vadr, vrr.val, - 0, 0, 0, 0, 0); - } else - pval = 1; - - return pval; -} - -u64 vcpu_tak(struct kvm_vcpu *vcpu, u64 vadr) -{ - struct thash_data *data; - union ia64_pta vpta; - u64 key; - - vpta.val = vcpu_get_pta(vcpu); - if (vpta.vf == 0) { - key = 1; - return key; - } - data = vtlb_lookup(vcpu, vadr, D_TLB); - if (!data || !data->p) - key = 1; - else - key = data->key; - - return key; -} - -void kvm_thash(struct kvm_vcpu *vcpu, INST64 inst) -{ - unsigned long thash, vadr; - - vadr = vcpu_get_gr(vcpu, inst.M46.r3); - thash = vcpu_thash(vcpu, vadr); - vcpu_set_gr(vcpu, inst.M46.r1, thash, 0); -} - -void kvm_ttag(struct kvm_vcpu *vcpu, INST64 inst) -{ - unsigned long tag, vadr; - - vadr = vcpu_get_gr(vcpu, inst.M46.r3); - tag = vcpu_ttag(vcpu, vadr); - vcpu_set_gr(vcpu, inst.M46.r1, tag, 0); -} - -int vcpu_tpa(struct kvm_vcpu *vcpu, u64 vadr, unsigned long *padr) -{ - struct thash_data *data; - union ia64_isr visr, pt_isr; - struct kvm_pt_regs *regs; - struct ia64_psr vpsr; - - regs = vcpu_regs(vcpu); - pt_isr.val = VMX(vcpu, cr_isr); - visr.val = 0; - visr.ei = pt_isr.ei; - visr.ir = pt_isr.ir; - vpsr = *(struct ia64_psr *)&VCPU(vcpu, vpsr); - visr.na = 1; - - data = vhpt_lookup(vadr); - if (data) { - if (data->p == 0) { - vcpu_set_isr(vcpu, visr.val); - data_page_not_present(vcpu, vadr); - return IA64_FAULT; - } else if (data->ma == VA_MATTR_NATPAGE) { - vcpu_set_isr(vcpu, visr.val); - dnat_page_consumption(vcpu, vadr); - return IA64_FAULT; - } else { - *padr = (data->gpaddr >> data->ps << data->ps) | - (vadr & (PSIZE(data->ps) - 1)); - return IA64_NO_FAULT; - } - } - - data = vtlb_lookup(vcpu, vadr, D_TLB); - if (data) { - if (data->p == 0) { - vcpu_set_isr(vcpu, visr.val); - data_page_not_present(vcpu, vadr); - return IA64_FAULT; - } else if (data->ma == VA_MATTR_NATPAGE) { - vcpu_set_isr(vcpu, visr.val); - dnat_page_consumption(vcpu, vadr); - return IA64_FAULT; - } else{ - *padr = ((data->ppn >> (data->ps - 12)) << data->ps) - | (vadr & (PSIZE(data->ps) - 1)); - return IA64_NO_FAULT; - } - } - if (!vhpt_enabled(vcpu, vadr, NA_REF)) { - if (vpsr.ic) { - vcpu_set_isr(vcpu, visr.val); - alt_dtlb(vcpu, vadr); - return IA64_FAULT; - } else { - nested_dtlb(vcpu); - return IA64_FAULT; - } - } else { - if (vpsr.ic) { - vcpu_set_isr(vcpu, visr.val); - dvhpt_fault(vcpu, vadr); - return IA64_FAULT; - } else{ - nested_dtlb(vcpu); - return IA64_FAULT; - } - } - - return IA64_NO_FAULT; -} - -int kvm_tpa(struct kvm_vcpu *vcpu, INST64 inst) -{ - unsigned long r1, r3; - - r3 = vcpu_get_gr(vcpu, inst.M46.r3); - - if (vcpu_tpa(vcpu, r3, &r1)) - return IA64_FAULT; - - vcpu_set_gr(vcpu, inst.M46.r1, r1, 0); - return(IA64_NO_FAULT); -} - -void kvm_tak(struct kvm_vcpu *vcpu, INST64 inst) -{ - unsigned long r1, r3; - - r3 = vcpu_get_gr(vcpu, inst.M46.r3); - r1 = vcpu_tak(vcpu, r3); - vcpu_set_gr(vcpu, inst.M46.r1, r1, 0); -} - -/************************************ - * Insert/Purge translation register/cache - ************************************/ -void vcpu_itc_i(struct kvm_vcpu *vcpu, u64 pte, u64 itir, u64 ifa) -{ - thash_purge_and_insert(vcpu, pte, itir, ifa, I_TLB); -} - -void vcpu_itc_d(struct kvm_vcpu *vcpu, u64 pte, u64 itir, u64 ifa) -{ - thash_purge_and_insert(vcpu, pte, itir, ifa, D_TLB); -} - -void vcpu_itr_i(struct kvm_vcpu *vcpu, u64 slot, u64 pte, u64 itir, u64 ifa) -{ - u64 ps, va, rid; - struct thash_data *p_itr; - - ps = itir_ps(itir); - va = PAGEALIGN(ifa, ps); - pte &= ~PAGE_FLAGS_RV_MASK; - rid = vcpu_get_rr(vcpu, ifa); - rid = rid & RR_RID_MASK; - p_itr = (struct thash_data *)&vcpu->arch.itrs[slot]; - vcpu_set_tr(p_itr, pte, itir, va, rid); - vcpu_quick_region_set(VMX(vcpu, itr_regions), va); -} - - -void vcpu_itr_d(struct kvm_vcpu *vcpu, u64 slot, u64 pte, u64 itir, u64 ifa) -{ - u64 gpfn; - u64 ps, va, rid; - struct thash_data *p_dtr; - - ps = itir_ps(itir); - va = PAGEALIGN(ifa, ps); - pte &= ~PAGE_FLAGS_RV_MASK; - - if (ps != _PAGE_SIZE_16M) - thash_purge_entries(vcpu, va, ps); - gpfn = (pte & _PAGE_PPN_MASK) >> PAGE_SHIFT; - if (__gpfn_is_io(gpfn)) - pte |= VTLB_PTE_IO; - rid = vcpu_get_rr(vcpu, va); - rid = rid & RR_RID_MASK; - p_dtr = (struct thash_data *)&vcpu->arch.dtrs[slot]; - vcpu_set_tr((struct thash_data *)&vcpu->arch.dtrs[slot], - pte, itir, va, rid); - vcpu_quick_region_set(VMX(vcpu, dtr_regions), va); -} - -void vcpu_ptr_d(struct kvm_vcpu *vcpu, u64 ifa, u64 ps) -{ - int index; - u64 va; - - va = PAGEALIGN(ifa, ps); - while ((index = vtr_find_overlap(vcpu, va, ps, D_TLB)) >= 0) - vcpu->arch.dtrs[index].page_flags = 0; - - thash_purge_entries(vcpu, va, ps); -} - -void vcpu_ptr_i(struct kvm_vcpu *vcpu, u64 ifa, u64 ps) -{ - int index; - u64 va; - - va = PAGEALIGN(ifa, ps); - while ((index = vtr_find_overlap(vcpu, va, ps, I_TLB)) >= 0) - vcpu->arch.itrs[index].page_flags = 0; - - thash_purge_entries(vcpu, va, ps); -} - -void vcpu_ptc_l(struct kvm_vcpu *vcpu, u64 va, u64 ps) -{ - va = PAGEALIGN(va, ps); - thash_purge_entries(vcpu, va, ps); -} - -void vcpu_ptc_e(struct kvm_vcpu *vcpu, u64 va) -{ - thash_purge_all(vcpu); -} - -void vcpu_ptc_ga(struct kvm_vcpu *vcpu, u64 va, u64 ps) -{ - struct exit_ctl_data *p = &vcpu->arch.exit_data; - long psr; - local_irq_save(psr); - p->exit_reason = EXIT_REASON_PTC_G; - - p->u.ptc_g_data.rr = vcpu_get_rr(vcpu, va); - p->u.ptc_g_data.vaddr = va; - p->u.ptc_g_data.ps = ps; - vmm_transition(vcpu); - /* Do Local Purge Here*/ - vcpu_ptc_l(vcpu, va, ps); - local_irq_restore(psr); -} - - -void vcpu_ptc_g(struct kvm_vcpu *vcpu, u64 va, u64 ps) -{ - vcpu_ptc_ga(vcpu, va, ps); -} - -void kvm_ptc_e(struct kvm_vcpu *vcpu, INST64 inst) -{ - unsigned long ifa; - - ifa = vcpu_get_gr(vcpu, inst.M45.r3); - vcpu_ptc_e(vcpu, ifa); -} - -void kvm_ptc_g(struct kvm_vcpu *vcpu, INST64 inst) -{ - unsigned long ifa, itir; - - ifa = vcpu_get_gr(vcpu, inst.M45.r3); - itir = vcpu_get_gr(vcpu, inst.M45.r2); - vcpu_ptc_g(vcpu, ifa, itir_ps(itir)); -} - -void kvm_ptc_ga(struct kvm_vcpu *vcpu, INST64 inst) -{ - unsigned long ifa, itir; - - ifa = vcpu_get_gr(vcpu, inst.M45.r3); - itir = vcpu_get_gr(vcpu, inst.M45.r2); - vcpu_ptc_ga(vcpu, ifa, itir_ps(itir)); -} - -void kvm_ptc_l(struct kvm_vcpu *vcpu, INST64 inst) -{ - unsigned long ifa, itir; - - ifa = vcpu_get_gr(vcpu, inst.M45.r3); - itir = vcpu_get_gr(vcpu, inst.M45.r2); - vcpu_ptc_l(vcpu, ifa, itir_ps(itir)); -} - -void kvm_ptr_d(struct kvm_vcpu *vcpu, INST64 inst) -{ - unsigned long ifa, itir; - - ifa = vcpu_get_gr(vcpu, inst.M45.r3); - itir = vcpu_get_gr(vcpu, inst.M45.r2); - vcpu_ptr_d(vcpu, ifa, itir_ps(itir)); -} - -void kvm_ptr_i(struct kvm_vcpu *vcpu, INST64 inst) -{ - unsigned long ifa, itir; - - ifa = vcpu_get_gr(vcpu, inst.M45.r3); - itir = vcpu_get_gr(vcpu, inst.M45.r2); - vcpu_ptr_i(vcpu, ifa, itir_ps(itir)); -} - -void kvm_itr_d(struct kvm_vcpu *vcpu, INST64 inst) -{ - unsigned long itir, ifa, pte, slot; - - slot = vcpu_get_gr(vcpu, inst.M45.r3); - pte = vcpu_get_gr(vcpu, inst.M45.r2); - itir = vcpu_get_itir(vcpu); - ifa = vcpu_get_ifa(vcpu); - vcpu_itr_d(vcpu, slot, pte, itir, ifa); -} - - - -void kvm_itr_i(struct kvm_vcpu *vcpu, INST64 inst) -{ - unsigned long itir, ifa, pte, slot; - - slot = vcpu_get_gr(vcpu, inst.M45.r3); - pte = vcpu_get_gr(vcpu, inst.M45.r2); - itir = vcpu_get_itir(vcpu); - ifa = vcpu_get_ifa(vcpu); - vcpu_itr_i(vcpu, slot, pte, itir, ifa); -} - -void kvm_itc_d(struct kvm_vcpu *vcpu, INST64 inst) -{ - unsigned long itir, ifa, pte; - - itir = vcpu_get_itir(vcpu); - ifa = vcpu_get_ifa(vcpu); - pte = vcpu_get_gr(vcpu, inst.M45.r2); - vcpu_itc_d(vcpu, pte, itir, ifa); -} - -void kvm_itc_i(struct kvm_vcpu *vcpu, INST64 inst) -{ - unsigned long itir, ifa, pte; - - itir = vcpu_get_itir(vcpu); - ifa = vcpu_get_ifa(vcpu); - pte = vcpu_get_gr(vcpu, inst.M45.r2); - vcpu_itc_i(vcpu, pte, itir, ifa); -} - -/************************************* - * Moves to semi-privileged registers - *************************************/ - -void kvm_mov_to_ar_imm(struct kvm_vcpu *vcpu, INST64 inst) -{ - unsigned long imm; - - if (inst.M30.s) - imm = -inst.M30.imm; - else - imm = inst.M30.imm; - - vcpu_set_itc(vcpu, imm); -} - -void kvm_mov_to_ar_reg(struct kvm_vcpu *vcpu, INST64 inst) -{ - unsigned long r2; - - r2 = vcpu_get_gr(vcpu, inst.M29.r2); - vcpu_set_itc(vcpu, r2); -} - -void kvm_mov_from_ar_reg(struct kvm_vcpu *vcpu, INST64 inst) -{ - unsigned long r1; - - r1 = vcpu_get_itc(vcpu); - vcpu_set_gr(vcpu, inst.M31.r1, r1, 0); -} - -/************************************************************************** - struct kvm_vcpu protection key register access routines - **************************************************************************/ - -unsigned long vcpu_get_pkr(struct kvm_vcpu *vcpu, unsigned long reg) -{ - return ((unsigned long)ia64_get_pkr(reg)); -} - -void vcpu_set_pkr(struct kvm_vcpu *vcpu, unsigned long reg, unsigned long val) -{ - ia64_set_pkr(reg, val); -} - -/******************************** - * Moves to privileged registers - ********************************/ -unsigned long vcpu_set_rr(struct kvm_vcpu *vcpu, unsigned long reg, - unsigned long val) -{ - union ia64_rr oldrr, newrr; - unsigned long rrval; - struct exit_ctl_data *p = &vcpu->arch.exit_data; - unsigned long psr; - - oldrr.val = vcpu_get_rr(vcpu, reg); - newrr.val = val; - vcpu->arch.vrr[reg >> VRN_SHIFT] = val; - - switch ((unsigned long)(reg >> VRN_SHIFT)) { - case VRN6: - vcpu->arch.vmm_rr = vrrtomrr(val); - local_irq_save(psr); - p->exit_reason = EXIT_REASON_SWITCH_RR6; - vmm_transition(vcpu); - local_irq_restore(psr); - break; - case VRN4: - rrval = vrrtomrr(val); - vcpu->arch.metaphysical_saved_rr4 = rrval; - if (!is_physical_mode(vcpu)) - ia64_set_rr(reg, rrval); - break; - case VRN0: - rrval = vrrtomrr(val); - vcpu->arch.metaphysical_saved_rr0 = rrval; - if (!is_physical_mode(vcpu)) - ia64_set_rr(reg, rrval); - break; - default: - ia64_set_rr(reg, vrrtomrr(val)); - break; - } - - return (IA64_NO_FAULT); -} - -void kvm_mov_to_rr(struct kvm_vcpu *vcpu, INST64 inst) -{ - unsigned long r3, r2; - - r3 = vcpu_get_gr(vcpu, inst.M42.r3); - r2 = vcpu_get_gr(vcpu, inst.M42.r2); - vcpu_set_rr(vcpu, r3, r2); -} - -void kvm_mov_to_dbr(struct kvm_vcpu *vcpu, INST64 inst) -{ -} - -void kvm_mov_to_ibr(struct kvm_vcpu *vcpu, INST64 inst) -{ -} - -void kvm_mov_to_pmc(struct kvm_vcpu *vcpu, INST64 inst) -{ - unsigned long r3, r2; - - r3 = vcpu_get_gr(vcpu, inst.M42.r3); - r2 = vcpu_get_gr(vcpu, inst.M42.r2); - vcpu_set_pmc(vcpu, r3, r2); -} - -void kvm_mov_to_pmd(struct kvm_vcpu *vcpu, INST64 inst) -{ - unsigned long r3, r2; - - r3 = vcpu_get_gr(vcpu, inst.M42.r3); - r2 = vcpu_get_gr(vcpu, inst.M42.r2); - vcpu_set_pmd(vcpu, r3, r2); -} - -void kvm_mov_to_pkr(struct kvm_vcpu *vcpu, INST64 inst) -{ - u64 r3, r2; - - r3 = vcpu_get_gr(vcpu, inst.M42.r3); - r2 = vcpu_get_gr(vcpu, inst.M42.r2); - vcpu_set_pkr(vcpu, r3, r2); -} - -void kvm_mov_from_rr(struct kvm_vcpu *vcpu, INST64 inst) -{ - unsigned long r3, r1; - - r3 = vcpu_get_gr(vcpu, inst.M43.r3); - r1 = vcpu_get_rr(vcpu, r3); - vcpu_set_gr(vcpu, inst.M43.r1, r1, 0); -} - -void kvm_mov_from_pkr(struct kvm_vcpu *vcpu, INST64 inst) -{ - unsigned long r3, r1; - - r3 = vcpu_get_gr(vcpu, inst.M43.r3); - r1 = vcpu_get_pkr(vcpu, r3); - vcpu_set_gr(vcpu, inst.M43.r1, r1, 0); -} - -void kvm_mov_from_dbr(struct kvm_vcpu *vcpu, INST64 inst) -{ - unsigned long r3, r1; - - r3 = vcpu_get_gr(vcpu, inst.M43.r3); - r1 = vcpu_get_dbr(vcpu, r3); - vcpu_set_gr(vcpu, inst.M43.r1, r1, 0); -} - -void kvm_mov_from_ibr(struct kvm_vcpu *vcpu, INST64 inst) -{ - unsigned long r3, r1; - - r3 = vcpu_get_gr(vcpu, inst.M43.r3); - r1 = vcpu_get_ibr(vcpu, r3); - vcpu_set_gr(vcpu, inst.M43.r1, r1, 0); -} - -void kvm_mov_from_pmc(struct kvm_vcpu *vcpu, INST64 inst) -{ - unsigned long r3, r1; - - r3 = vcpu_get_gr(vcpu, inst.M43.r3); - r1 = vcpu_get_pmc(vcpu, r3); - vcpu_set_gr(vcpu, inst.M43.r1, r1, 0); -} - -unsigned long vcpu_get_cpuid(struct kvm_vcpu *vcpu, unsigned long reg) -{ - /* FIXME: This could get called as a result of a rsvd-reg fault */ - if (reg > (ia64_get_cpuid(3) & 0xff)) - return 0; - else - return ia64_get_cpuid(reg); -} - -void kvm_mov_from_cpuid(struct kvm_vcpu *vcpu, INST64 inst) -{ - unsigned long r3, r1; - - r3 = vcpu_get_gr(vcpu, inst.M43.r3); - r1 = vcpu_get_cpuid(vcpu, r3); - vcpu_set_gr(vcpu, inst.M43.r1, r1, 0); -} - -void vcpu_set_tpr(struct kvm_vcpu *vcpu, unsigned long val) -{ - VCPU(vcpu, tpr) = val; - vcpu->arch.irq_check = 1; -} - -unsigned long kvm_mov_to_cr(struct kvm_vcpu *vcpu, INST64 inst) -{ - unsigned long r2; - - r2 = vcpu_get_gr(vcpu, inst.M32.r2); - VCPU(vcpu, vcr[inst.M32.cr3]) = r2; - - switch (inst.M32.cr3) { - case 0: - vcpu_set_dcr(vcpu, r2); - break; - case 1: - vcpu_set_itm(vcpu, r2); - break; - case 66: - vcpu_set_tpr(vcpu, r2); - break; - case 67: - vcpu_set_eoi(vcpu, r2); - break; - default: - break; - } - - return 0; -} - -unsigned long kvm_mov_from_cr(struct kvm_vcpu *vcpu, INST64 inst) -{ - unsigned long tgt = inst.M33.r1; - unsigned long val; - - switch (inst.M33.cr3) { - case 65: - val = vcpu_get_ivr(vcpu); - vcpu_set_gr(vcpu, tgt, val, 0); - break; - - case 67: - vcpu_set_gr(vcpu, tgt, 0L, 0); - break; - default: - val = VCPU(vcpu, vcr[inst.M33.cr3]); - vcpu_set_gr(vcpu, tgt, val, 0); - break; - } - - return 0; -} - -void vcpu_set_psr(struct kvm_vcpu *vcpu, unsigned long val) -{ - - unsigned long mask; - struct kvm_pt_regs *regs; - struct ia64_psr old_psr, new_psr; - - old_psr = *(struct ia64_psr *)&VCPU(vcpu, vpsr); - - regs = vcpu_regs(vcpu); - /* We only support guest as: - * vpsr.pk = 0 - * vpsr.is = 0 - * Otherwise panic - */ - if (val & (IA64_PSR_PK | IA64_PSR_IS | IA64_PSR_VM)) - panic_vm(vcpu, "Only support guests with vpsr.pk =0 " - "& vpsr.is=0\n"); - - /* - * For those IA64_PSR bits: id/da/dd/ss/ed/ia - * Since these bits will become 0, after success execution of each - * instruction, we will change set them to mIA64_PSR - */ - VCPU(vcpu, vpsr) = val - & (~(IA64_PSR_ID | IA64_PSR_DA | IA64_PSR_DD | - IA64_PSR_SS | IA64_PSR_ED | IA64_PSR_IA)); - - if (!old_psr.i && (val & IA64_PSR_I)) { - /* vpsr.i 0->1 */ - vcpu->arch.irq_check = 1; - } - new_psr = *(struct ia64_psr *)&VCPU(vcpu, vpsr); - - /* - * All vIA64_PSR bits shall go to mPSR (v->tf->tf_special.psr) - * , except for the following bits: - * ic/i/dt/si/rt/mc/it/bn/vm - */ - mask = IA64_PSR_IC + IA64_PSR_I + IA64_PSR_DT + IA64_PSR_SI + - IA64_PSR_RT + IA64_PSR_MC + IA64_PSR_IT + IA64_PSR_BN + - IA64_PSR_VM; - - regs->cr_ipsr = (regs->cr_ipsr & mask) | (val & (~mask)); - - check_mm_mode_switch(vcpu, old_psr, new_psr); - - return ; -} - -unsigned long vcpu_cover(struct kvm_vcpu *vcpu) -{ - struct ia64_psr vpsr; - - struct kvm_pt_regs *regs = vcpu_regs(vcpu); - vpsr = *(struct ia64_psr *)&VCPU(vcpu, vpsr); - - if (!vpsr.ic) - VCPU(vcpu, ifs) = regs->cr_ifs; - regs->cr_ifs = IA64_IFS_V; - return (IA64_NO_FAULT); -} - - - -/************************************************************************** - VCPU banked general register access routines - **************************************************************************/ -#define vcpu_bsw0_unat(i, b0unat, b1unat, runat, VMM_PT_REGS_R16_SLOT) \ - do { \ - __asm__ __volatile__ ( \ - ";;extr.u %0 = %3,%6,16;;\n" \ - "dep %1 = %0, %1, 0, 16;;\n" \ - "st8 [%4] = %1\n" \ - "extr.u %0 = %2, 16, 16;;\n" \ - "dep %3 = %0, %3, %6, 16;;\n" \ - "st8 [%5] = %3\n" \ - ::"r"(i), "r"(*b1unat), "r"(*b0unat), \ - "r"(*runat), "r"(b1unat), "r"(runat), \ - "i"(VMM_PT_REGS_R16_SLOT) : "memory"); \ - } while (0) - -void vcpu_bsw0(struct kvm_vcpu *vcpu) -{ - unsigned long i; - - struct kvm_pt_regs *regs = vcpu_regs(vcpu); - unsigned long *r = ®s->r16; - unsigned long *b0 = &VCPU(vcpu, vbgr[0]); - unsigned long *b1 = &VCPU(vcpu, vgr[0]); - unsigned long *runat = ®s->eml_unat; - unsigned long *b0unat = &VCPU(vcpu, vbnat); - unsigned long *b1unat = &VCPU(vcpu, vnat); - - - if (VCPU(vcpu, vpsr) & IA64_PSR_BN) { - for (i = 0; i < 16; i++) { - *b1++ = *r; - *r++ = *b0++; - } - vcpu_bsw0_unat(i, b0unat, b1unat, runat, - VMM_PT_REGS_R16_SLOT); - VCPU(vcpu, vpsr) &= ~IA64_PSR_BN; - } -} - -#define vcpu_bsw1_unat(i, b0unat, b1unat, runat, VMM_PT_REGS_R16_SLOT) \ - do { \ - __asm__ __volatile__ (";;extr.u %0 = %3, %6, 16;;\n" \ - "dep %1 = %0, %1, 16, 16;;\n" \ - "st8 [%4] = %1\n" \ - "extr.u %0 = %2, 0, 16;;\n" \ - "dep %3 = %0, %3, %6, 16;;\n" \ - "st8 [%5] = %3\n" \ - ::"r"(i), "r"(*b0unat), "r"(*b1unat), \ - "r"(*runat), "r"(b0unat), "r"(runat), \ - "i"(VMM_PT_REGS_R16_SLOT) : "memory"); \ - } while (0) - -void vcpu_bsw1(struct kvm_vcpu *vcpu) -{ - unsigned long i; - struct kvm_pt_regs *regs = vcpu_regs(vcpu); - unsigned long *r = ®s->r16; - unsigned long *b0 = &VCPU(vcpu, vbgr[0]); - unsigned long *b1 = &VCPU(vcpu, vgr[0]); - unsigned long *runat = ®s->eml_unat; - unsigned long *b0unat = &VCPU(vcpu, vbnat); - unsigned long *b1unat = &VCPU(vcpu, vnat); - - if (!(VCPU(vcpu, vpsr) & IA64_PSR_BN)) { - for (i = 0; i < 16; i++) { - *b0++ = *r; - *r++ = *b1++; - } - vcpu_bsw1_unat(i, b0unat, b1unat, runat, - VMM_PT_REGS_R16_SLOT); - VCPU(vcpu, vpsr) |= IA64_PSR_BN; - } -} - -void vcpu_rfi(struct kvm_vcpu *vcpu) -{ - unsigned long ifs, psr; - struct kvm_pt_regs *regs = vcpu_regs(vcpu); - - psr = VCPU(vcpu, ipsr); - if (psr & IA64_PSR_BN) - vcpu_bsw1(vcpu); - else - vcpu_bsw0(vcpu); - vcpu_set_psr(vcpu, psr); - ifs = VCPU(vcpu, ifs); - if (ifs >> 63) - regs->cr_ifs = ifs; - regs->cr_iip = VCPU(vcpu, iip); -} - -/* - VPSR can't keep track of below bits of guest PSR - This function gets guest PSR - */ - -unsigned long vcpu_get_psr(struct kvm_vcpu *vcpu) -{ - unsigned long mask; - struct kvm_pt_regs *regs = vcpu_regs(vcpu); - - mask = IA64_PSR_BE | IA64_PSR_UP | IA64_PSR_AC | IA64_PSR_MFL | - IA64_PSR_MFH | IA64_PSR_CPL | IA64_PSR_RI; - return (VCPU(vcpu, vpsr) & ~mask) | (regs->cr_ipsr & mask); -} - -void kvm_rsm(struct kvm_vcpu *vcpu, INST64 inst) -{ - unsigned long vpsr; - unsigned long imm24 = (inst.M44.i<<23) | (inst.M44.i2<<21) - | inst.M44.imm; - - vpsr = vcpu_get_psr(vcpu); - vpsr &= (~imm24); - vcpu_set_psr(vcpu, vpsr); -} - -void kvm_ssm(struct kvm_vcpu *vcpu, INST64 inst) -{ - unsigned long vpsr; - unsigned long imm24 = (inst.M44.i << 23) | (inst.M44.i2 << 21) - | inst.M44.imm; - - vpsr = vcpu_get_psr(vcpu); - vpsr |= imm24; - vcpu_set_psr(vcpu, vpsr); -} - -/* Generate Mask - * Parameter: - * bit -- starting bit - * len -- how many bits - */ -#define MASK(bit,len) \ -({ \ - __u64 ret; \ - \ - __asm __volatile("dep %0=-1, r0, %1, %2"\ - : "=r" (ret): \ - "M" (bit), \ - "M" (len)); \ - ret; \ -}) - -void vcpu_set_psr_l(struct kvm_vcpu *vcpu, unsigned long val) -{ - val = (val & MASK(0, 32)) | (vcpu_get_psr(vcpu) & MASK(32, 32)); - vcpu_set_psr(vcpu, val); -} - -void kvm_mov_to_psr(struct kvm_vcpu *vcpu, INST64 inst) -{ - unsigned long val; - - val = vcpu_get_gr(vcpu, inst.M35.r2); - vcpu_set_psr_l(vcpu, val); -} - -void kvm_mov_from_psr(struct kvm_vcpu *vcpu, INST64 inst) -{ - unsigned long val; - - val = vcpu_get_psr(vcpu); - val = (val & MASK(0, 32)) | (val & MASK(35, 2)); - vcpu_set_gr(vcpu, inst.M33.r1, val, 0); -} - -void vcpu_increment_iip(struct kvm_vcpu *vcpu) -{ - struct kvm_pt_regs *regs = vcpu_regs(vcpu); - struct ia64_psr *ipsr = (struct ia64_psr *)®s->cr_ipsr; - if (ipsr->ri == 2) { - ipsr->ri = 0; - regs->cr_iip += 16; - } else - ipsr->ri++; -} - -void vcpu_decrement_iip(struct kvm_vcpu *vcpu) -{ - struct kvm_pt_regs *regs = vcpu_regs(vcpu); - struct ia64_psr *ipsr = (struct ia64_psr *)®s->cr_ipsr; - - if (ipsr->ri == 0) { - ipsr->ri = 2; - regs->cr_iip -= 16; - } else - ipsr->ri--; -} - -/** Emulate a privileged operation. - * - * - * @param vcpu virtual cpu - * @cause the reason cause virtualization fault - * @opcode the instruction code which cause virtualization fault - */ - -void kvm_emulate(struct kvm_vcpu *vcpu, struct kvm_pt_regs *regs) -{ - unsigned long status, cause, opcode ; - INST64 inst; - - status = IA64_NO_FAULT; - cause = VMX(vcpu, cause); - opcode = VMX(vcpu, opcode); - inst.inst = opcode; - /* - * Switch to actual virtual rid in rr0 and rr4, - * which is required by some tlb related instructions. - */ - prepare_if_physical_mode(vcpu); - - switch (cause) { - case EVENT_RSM: - kvm_rsm(vcpu, inst); - break; - case EVENT_SSM: - kvm_ssm(vcpu, inst); - break; - case EVENT_MOV_TO_PSR: - kvm_mov_to_psr(vcpu, inst); - break; - case EVENT_MOV_FROM_PSR: - kvm_mov_from_psr(vcpu, inst); - break; - case EVENT_MOV_FROM_CR: - kvm_mov_from_cr(vcpu, inst); - break; - case EVENT_MOV_TO_CR: - kvm_mov_to_cr(vcpu, inst); - break; - case EVENT_BSW_0: - vcpu_bsw0(vcpu); - break; - case EVENT_BSW_1: - vcpu_bsw1(vcpu); - break; - case EVENT_COVER: - vcpu_cover(vcpu); - break; - case EVENT_RFI: - vcpu_rfi(vcpu); - break; - case EVENT_ITR_D: - kvm_itr_d(vcpu, inst); - break; - case EVENT_ITR_I: - kvm_itr_i(vcpu, inst); - break; - case EVENT_PTR_D: - kvm_ptr_d(vcpu, inst); - break; - case EVENT_PTR_I: - kvm_ptr_i(vcpu, inst); - break; - case EVENT_ITC_D: - kvm_itc_d(vcpu, inst); - break; - case EVENT_ITC_I: - kvm_itc_i(vcpu, inst); - break; - case EVENT_PTC_L: - kvm_ptc_l(vcpu, inst); - break; - case EVENT_PTC_G: - kvm_ptc_g(vcpu, inst); - break; - case EVENT_PTC_GA: - kvm_ptc_ga(vcpu, inst); - break; - case EVENT_PTC_E: - kvm_ptc_e(vcpu, inst); - break; - case EVENT_MOV_TO_RR: - kvm_mov_to_rr(vcpu, inst); - break; - case EVENT_MOV_FROM_RR: - kvm_mov_from_rr(vcpu, inst); - break; - case EVENT_THASH: - kvm_thash(vcpu, inst); - break; - case EVENT_TTAG: - kvm_ttag(vcpu, inst); - break; - case EVENT_TPA: - status = kvm_tpa(vcpu, inst); - break; - case EVENT_TAK: - kvm_tak(vcpu, inst); - break; - case EVENT_MOV_TO_AR_IMM: - kvm_mov_to_ar_imm(vcpu, inst); - break; - case EVENT_MOV_TO_AR: - kvm_mov_to_ar_reg(vcpu, inst); - break; - case EVENT_MOV_FROM_AR: - kvm_mov_from_ar_reg(vcpu, inst); - break; - case EVENT_MOV_TO_DBR: - kvm_mov_to_dbr(vcpu, inst); - break; - case EVENT_MOV_TO_IBR: - kvm_mov_to_ibr(vcpu, inst); - break; - case EVENT_MOV_TO_PMC: - kvm_mov_to_pmc(vcpu, inst); - break; - case EVENT_MOV_TO_PMD: - kvm_mov_to_pmd(vcpu, inst); - break; - case EVENT_MOV_TO_PKR: - kvm_mov_to_pkr(vcpu, inst); - break; - case EVENT_MOV_FROM_DBR: - kvm_mov_from_dbr(vcpu, inst); - break; - case EVENT_MOV_FROM_IBR: - kvm_mov_from_ibr(vcpu, inst); - break; - case EVENT_MOV_FROM_PMC: - kvm_mov_from_pmc(vcpu, inst); - break; - case EVENT_MOV_FROM_PKR: - kvm_mov_from_pkr(vcpu, inst); - break; - case EVENT_MOV_FROM_CPUID: - kvm_mov_from_cpuid(vcpu, inst); - break; - case EVENT_VMSW: - status = IA64_FAULT; - break; - default: - break; - }; - /*Assume all status is NO_FAULT ?*/ - if (status == IA64_NO_FAULT && cause != EVENT_RFI) - vcpu_increment_iip(vcpu); - - recover_if_physical_mode(vcpu); -} - -void init_vcpu(struct kvm_vcpu *vcpu) -{ - int i; - - vcpu->arch.mode_flags = GUEST_IN_PHY; - VMX(vcpu, vrr[0]) = 0x38; - VMX(vcpu, vrr[1]) = 0x38; - VMX(vcpu, vrr[2]) = 0x38; - VMX(vcpu, vrr[3]) = 0x38; - VMX(vcpu, vrr[4]) = 0x38; - VMX(vcpu, vrr[5]) = 0x38; - VMX(vcpu, vrr[6]) = 0x38; - VMX(vcpu, vrr[7]) = 0x38; - VCPU(vcpu, vpsr) = IA64_PSR_BN; - VCPU(vcpu, dcr) = 0; - /* pta.size must not be 0. The minimum is 15 (32k) */ - VCPU(vcpu, pta) = 15 << 2; - VCPU(vcpu, itv) = 0x10000; - VCPU(vcpu, itm) = 0; - VMX(vcpu, last_itc) = 0; - - VCPU(vcpu, lid) = VCPU_LID(vcpu); - VCPU(vcpu, ivr) = 0; - VCPU(vcpu, tpr) = 0x10000; - VCPU(vcpu, eoi) = 0; - VCPU(vcpu, irr[0]) = 0; - VCPU(vcpu, irr[1]) = 0; - VCPU(vcpu, irr[2]) = 0; - VCPU(vcpu, irr[3]) = 0; - VCPU(vcpu, pmv) = 0x10000; - VCPU(vcpu, cmcv) = 0x10000; - VCPU(vcpu, lrr0) = 0x10000; /* default reset value? */ - VCPU(vcpu, lrr1) = 0x10000; /* default reset value? */ - update_vhpi(vcpu, NULL_VECTOR); - VLSAPIC_XTP(vcpu) = 0x80; /* disabled */ - - for (i = 0; i < 4; i++) - VLSAPIC_INSVC(vcpu, i) = 0; -} - -void kvm_init_all_rr(struct kvm_vcpu *vcpu) -{ - unsigned long psr; - - local_irq_save(psr); - - /* WARNING: not allow co-exist of both virtual mode and physical - * mode in same region - */ - - vcpu->arch.metaphysical_saved_rr0 = vrrtomrr(VMX(vcpu, vrr[VRN0])); - vcpu->arch.metaphysical_saved_rr4 = vrrtomrr(VMX(vcpu, vrr[VRN4])); - - if (is_physical_mode(vcpu)) { - if (vcpu->arch.mode_flags & GUEST_PHY_EMUL) - panic_vm(vcpu, "Machine Status conflicts!\n"); - - ia64_set_rr((VRN0 << VRN_SHIFT), vcpu->arch.metaphysical_rr0); - ia64_dv_serialize_data(); - ia64_set_rr((VRN4 << VRN_SHIFT), vcpu->arch.metaphysical_rr4); - ia64_dv_serialize_data(); - } else { - ia64_set_rr((VRN0 << VRN_SHIFT), - vcpu->arch.metaphysical_saved_rr0); - ia64_dv_serialize_data(); - ia64_set_rr((VRN4 << VRN_SHIFT), - vcpu->arch.metaphysical_saved_rr4); - ia64_dv_serialize_data(); - } - ia64_set_rr((VRN1 << VRN_SHIFT), - vrrtomrr(VMX(vcpu, vrr[VRN1]))); - ia64_dv_serialize_data(); - ia64_set_rr((VRN2 << VRN_SHIFT), - vrrtomrr(VMX(vcpu, vrr[VRN2]))); - ia64_dv_serialize_data(); - ia64_set_rr((VRN3 << VRN_SHIFT), - vrrtomrr(VMX(vcpu, vrr[VRN3]))); - ia64_dv_serialize_data(); - ia64_set_rr((VRN5 << VRN_SHIFT), - vrrtomrr(VMX(vcpu, vrr[VRN5]))); - ia64_dv_serialize_data(); - ia64_set_rr((VRN7 << VRN_SHIFT), - vrrtomrr(VMX(vcpu, vrr[VRN7]))); - ia64_dv_serialize_data(); - ia64_srlz_d(); - ia64_set_psr(psr); -} - -int vmm_entry(void) -{ - struct kvm_vcpu *v; - v = current_vcpu; - - ia64_call_vsa(PAL_VPS_RESTORE, (unsigned long)v->arch.vpd, - 0, 0, 0, 0, 0, 0); - kvm_init_vtlb(v); - kvm_init_vhpt(v); - init_vcpu(v); - kvm_init_all_rr(v); - vmm_reset_entry(); - - return 0; -} - -static void kvm_show_registers(struct kvm_pt_regs *regs) -{ - unsigned long ip = regs->cr_iip + ia64_psr(regs)->ri; - - struct kvm_vcpu *vcpu = current_vcpu; - if (vcpu != NULL) - printk("vcpu 0x%p vcpu %d\n", - vcpu, vcpu->vcpu_id); - - printk("psr : %016lx ifs : %016lx ip : [<%016lx>]\n", - regs->cr_ipsr, regs->cr_ifs, ip); - - printk("unat: %016lx pfs : %016lx rsc : %016lx\n", - regs->ar_unat, regs->ar_pfs, regs->ar_rsc); - printk("rnat: %016lx bspstore: %016lx pr : %016lx\n", - regs->ar_rnat, regs->ar_bspstore, regs->pr); - printk("ldrs: %016lx ccv : %016lx fpsr: %016lx\n", - regs->loadrs, regs->ar_ccv, regs->ar_fpsr); - printk("csd : %016lx ssd : %016lx\n", regs->ar_csd, regs->ar_ssd); - printk("b0 : %016lx b6 : %016lx b7 : %016lx\n", regs->b0, - regs->b6, regs->b7); - printk("f6 : %05lx%016lx f7 : %05lx%016lx\n", - regs->f6.u.bits[1], regs->f6.u.bits[0], - regs->f7.u.bits[1], regs->f7.u.bits[0]); - printk("f8 : %05lx%016lx f9 : %05lx%016lx\n", - regs->f8.u.bits[1], regs->f8.u.bits[0], - regs->f9.u.bits[1], regs->f9.u.bits[0]); - printk("f10 : %05lx%016lx f11 : %05lx%016lx\n", - regs->f10.u.bits[1], regs->f10.u.bits[0], - regs->f11.u.bits[1], regs->f11.u.bits[0]); - - printk("r1 : %016lx r2 : %016lx r3 : %016lx\n", regs->r1, - regs->r2, regs->r3); - printk("r8 : %016lx r9 : %016lx r10 : %016lx\n", regs->r8, - regs->r9, regs->r10); - printk("r11 : %016lx r12 : %016lx r13 : %016lx\n", regs->r11, - regs->r12, regs->r13); - printk("r14 : %016lx r15 : %016lx r16 : %016lx\n", regs->r14, - regs->r15, regs->r16); - printk("r17 : %016lx r18 : %016lx r19 : %016lx\n", regs->r17, - regs->r18, regs->r19); - printk("r20 : %016lx r21 : %016lx r22 : %016lx\n", regs->r20, - regs->r21, regs->r22); - printk("r23 : %016lx r24 : %016lx r25 : %016lx\n", regs->r23, - regs->r24, regs->r25); - printk("r26 : %016lx r27 : %016lx r28 : %016lx\n", regs->r26, - regs->r27, regs->r28); - printk("r29 : %016lx r30 : %016lx r31 : %016lx\n", regs->r29, - regs->r30, regs->r31); - -} - -void panic_vm(struct kvm_vcpu *v, const char *fmt, ...) -{ - va_list args; - char buf[256]; - - struct kvm_pt_regs *regs = vcpu_regs(v); - struct exit_ctl_data *p = &v->arch.exit_data; - va_start(args, fmt); - vsnprintf(buf, sizeof(buf), fmt, args); - va_end(args); - printk(buf); - kvm_show_registers(regs); - p->exit_reason = EXIT_REASON_VM_PANIC; - vmm_transition(v); - /*Never to return*/ - while (1); -} diff --git a/arch/ia64/kvm/vcpu.h b/arch/ia64/kvm/vcpu.h deleted file mode 100644 index 988911b4cc7..00000000000 --- a/arch/ia64/kvm/vcpu.h +++ /dev/null @@ -1,752 +0,0 @@ -/* - * vcpu.h: vcpu routines - * Copyright (c) 2005, Intel Corporation. - * Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com) - * Yaozu Dong (Eddie Dong) (Eddie.dong@intel.com) - * - * Copyright (c) 2007, Intel Corporation. - * Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com) - * Xiantao Zhang (xiantao.zhang@intel.com) - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple - * Place - Suite 330, Boston, MA 02111-1307 USA. - * - */ - - -#ifndef __KVM_VCPU_H__ -#define __KVM_VCPU_H__ - -#include <asm/types.h> -#include <asm/fpu.h> -#include <asm/processor.h> - -#ifndef __ASSEMBLY__ -#include "vti.h" - -#include <linux/kvm_host.h> -#include <linux/spinlock.h> - -typedef unsigned long IA64_INST; - -typedef union U_IA64_BUNDLE { - unsigned long i64[2]; - struct { unsigned long template:5, slot0:41, slot1a:18, - slot1b:23, slot2:41; }; - /* NOTE: following doesn't work because bitfields can't cross natural - size boundaries - struct { unsigned long template:5, slot0:41, slot1:41, slot2:41; }; */ -} IA64_BUNDLE; - -typedef union U_INST64_A5 { - IA64_INST inst; - struct { unsigned long qp:6, r1:7, imm7b:7, r3:2, imm5c:5, - imm9d:9, s:1, major:4; }; -} INST64_A5; - -typedef union U_INST64_B4 { - IA64_INST inst; - struct { unsigned long qp:6, btype:3, un3:3, p:1, b2:3, un11:11, x6:6, - wh:2, d:1, un1:1, major:4; }; -} INST64_B4; - -typedef union U_INST64_B8 { - IA64_INST inst; - struct { unsigned long qp:6, un21:21, x6:6, un4:4, major:4; }; -} INST64_B8; - -typedef union U_INST64_B9 { - IA64_INST inst; - struct { unsigned long qp:6, imm20:20, :1, x6:6, :3, i:1, major:4; }; -} INST64_B9; - -typedef union U_INST64_I19 { - IA64_INST inst; - struct { unsigned long qp:6, imm20:20, :1, x6:6, x3:3, i:1, major:4; }; -} INST64_I19; - -typedef union U_INST64_I26 { - IA64_INST inst; - struct { unsigned long qp:6, :7, r2:7, ar3:7, x6:6, x3:3, :1, major:4; }; -} INST64_I26; - -typedef union U_INST64_I27 { - IA64_INST inst; - struct { unsigned long qp:6, :7, imm:7, ar3:7, x6:6, x3:3, s:1, major:4; }; -} INST64_I27; - -typedef union U_INST64_I28 { /* not privileged (mov from AR) */ - IA64_INST inst; - struct { unsigned long qp:6, r1:7, :7, ar3:7, x6:6, x3:3, :1, major:4; }; -} INST64_I28; - -typedef union U_INST64_M28 { - IA64_INST inst; - struct { unsigned long qp:6, :14, r3:7, x6:6, x3:3, :1, major:4; }; -} INST64_M28; - -typedef union U_INST64_M29 { - IA64_INST inst; - struct { unsigned long qp:6, :7, r2:7, ar3:7, x6:6, x3:3, :1, major:4; }; -} INST64_M29; - -typedef union U_INST64_M30 { - IA64_INST inst; - struct { unsigned long qp:6, :7, imm:7, ar3:7, x4:4, x2:2, - x3:3, s:1, major:4; }; -} INST64_M30; - -typedef union U_INST64_M31 { - IA64_INST inst; - struct { unsigned long qp:6, r1:7, :7, ar3:7, x6:6, x3:3, :1, major:4; }; -} INST64_M31; - -typedef union U_INST64_M32 { - IA64_INST inst; - struct { unsigned long qp:6, :7, r2:7, cr3:7, x6:6, x3:3, :1, major:4; }; -} INST64_M32; - -typedef union U_INST64_M33 { - IA64_INST inst; - struct { unsigned long qp:6, r1:7, :7, cr3:7, x6:6, x3:3, :1, major:4; }; -} INST64_M33; - -typedef union U_INST64_M35 { - IA64_INST inst; - struct { unsigned long qp:6, :7, r2:7, :7, x6:6, x3:3, :1, major:4; }; - -} INST64_M35; - -typedef union U_INST64_M36 { - IA64_INST inst; - struct { unsigned long qp:6, r1:7, :14, x6:6, x3:3, :1, major:4; }; -} INST64_M36; - -typedef union U_INST64_M37 { - IA64_INST inst; - struct { unsigned long qp:6, imm20a:20, :1, x4:4, x2:2, x3:3, - i:1, major:4; }; -} INST64_M37; - -typedef union U_INST64_M41 { - IA64_INST inst; - struct { unsigned long qp:6, :7, r2:7, :7, x6:6, x3:3, :1, major:4; }; -} INST64_M41; - -typedef union U_INST64_M42 { - IA64_INST inst; - struct { unsigned long qp:6, :7, r2:7, r3:7, x6:6, x3:3, :1, major:4; }; -} INST64_M42; - -typedef union U_INST64_M43 { - IA64_INST inst; - struct { unsigned long qp:6, r1:7, :7, r3:7, x6:6, x3:3, :1, major:4; }; -} INST64_M43; - -typedef union U_INST64_M44 { - IA64_INST inst; - struct { unsigned long qp:6, imm:21, x4:4, i2:2, x3:3, i:1, major:4; }; -} INST64_M44; - -typedef union U_INST64_M45 { - IA64_INST inst; - struct { unsigned long qp:6, :7, r2:7, r3:7, x6:6, x3:3, :1, major:4; }; -} INST64_M45; - -typedef union U_INST64_M46 { - IA64_INST inst; - struct { unsigned long qp:6, r1:7, un7:7, r3:7, x6:6, - x3:3, un1:1, major:4; }; -} INST64_M46; - -typedef union U_INST64_M47 { - IA64_INST inst; - struct { unsigned long qp:6, un14:14, r3:7, x6:6, x3:3, un1:1, major:4; }; -} INST64_M47; - -typedef union U_INST64_M1{ - IA64_INST inst; - struct { unsigned long qp:6, r1:7, un7:7, r3:7, x:1, hint:2, - x6:6, m:1, major:4; }; -} INST64_M1; - -typedef union U_INST64_M2{ - IA64_INST inst; - struct { unsigned long qp:6, r1:7, r2:7, r3:7, x:1, hint:2, - x6:6, m:1, major:4; }; -} INST64_M2; - -typedef union U_INST64_M3{ - IA64_INST inst; - struct { unsigned long qp:6, r1:7, imm7:7, r3:7, i:1, hint:2, - x6:6, s:1, major:4; }; -} INST64_M3; - -typedef union U_INST64_M4 { - IA64_INST inst; - struct { unsigned long qp:6, un7:7, r2:7, r3:7, x:1, hint:2, - x6:6, m:1, major:4; }; -} INST64_M4; - -typedef union U_INST64_M5 { - IA64_INST inst; - struct { unsigned long qp:6, imm7:7, r2:7, r3:7, i:1, hint:2, - x6:6, s:1, major:4; }; -} INST64_M5; - -typedef union U_INST64_M6 { - IA64_INST inst; - struct { unsigned long qp:6, f1:7, un7:7, r3:7, x:1, hint:2, - x6:6, m:1, major:4; }; -} INST64_M6; - -typedef union U_INST64_M9 { - IA64_INST inst; - struct { unsigned long qp:6, :7, f2:7, r3:7, x:1, hint:2, - x6:6, m:1, major:4; }; -} INST64_M9; - -typedef union U_INST64_M10 { - IA64_INST inst; - struct { unsigned long qp:6, imm7:7, f2:7, r3:7, i:1, hint:2, - x6:6, s:1, major:4; }; -} INST64_M10; - -typedef union U_INST64_M12 { - IA64_INST inst; - struct { unsigned long qp:6, f1:7, f2:7, r3:7, x:1, hint:2, - x6:6, m:1, major:4; }; -} INST64_M12; - -typedef union U_INST64_M15 { - IA64_INST inst; - struct { unsigned long qp:6, :7, imm7:7, r3:7, i:1, hint:2, - x6:6, s:1, major:4; }; -} INST64_M15; - -typedef union U_INST64 { - IA64_INST inst; - struct { unsigned long :37, major:4; } generic; - INST64_A5 A5; /* used in build_hypercall_bundle only */ - INST64_B4 B4; /* used in build_hypercall_bundle only */ - INST64_B8 B8; /* rfi, bsw.[01] */ - INST64_B9 B9; /* break.b */ - INST64_I19 I19; /* used in build_hypercall_bundle only */ - INST64_I26 I26; /* mov register to ar (I unit) */ - INST64_I27 I27; /* mov immediate to ar (I unit) */ - INST64_I28 I28; /* mov from ar (I unit) */ - INST64_M1 M1; /* ld integer */ - INST64_M2 M2; - INST64_M3 M3; - INST64_M4 M4; /* st integer */ - INST64_M5 M5; - INST64_M6 M6; /* ldfd floating pointer */ - INST64_M9 M9; /* stfd floating pointer */ - INST64_M10 M10; /* stfd floating pointer */ - INST64_M12 M12; /* ldfd pair floating pointer */ - INST64_M15 M15; /* lfetch + imm update */ - INST64_M28 M28; /* purge translation cache entry */ - INST64_M29 M29; /* mov register to ar (M unit) */ - INST64_M30 M30; /* mov immediate to ar (M unit) */ - INST64_M31 M31; /* mov from ar (M unit) */ - INST64_M32 M32; /* mov reg to cr */ - INST64_M33 M33; /* mov from cr */ - INST64_M35 M35; /* mov to psr */ - INST64_M36 M36; /* mov from psr */ - INST64_M37 M37; /* break.m */ - INST64_M41 M41; /* translation cache insert */ - INST64_M42 M42; /* mov to indirect reg/translation reg insert*/ - INST64_M43 M43; /* mov from indirect reg */ - INST64_M44 M44; /* set/reset system mask */ - INST64_M45 M45; /* translation purge */ - INST64_M46 M46; /* translation access (tpa,tak) */ - INST64_M47 M47; /* purge translation entry */ -} INST64; - -#define MASK_41 ((unsigned long)0x1ffffffffff) - -/* Virtual address memory attributes encoding */ -#define VA_MATTR_WB 0x0 -#define VA_MATTR_UC 0x4 -#define VA_MATTR_UCE 0x5 -#define VA_MATTR_WC 0x6 -#define VA_MATTR_NATPAGE 0x7 - -#define PMASK(size) (~((size) - 1)) -#define PSIZE(size) (1UL<<(size)) -#define CLEARLSB(ppn, nbits) (((ppn) >> (nbits)) << (nbits)) -#define PAGEALIGN(va, ps) CLEARLSB(va, ps) -#define PAGE_FLAGS_RV_MASK (0x2|(0x3UL<<50)|(((1UL<<11)-1)<<53)) -#define _PAGE_MA_ST (0x1 << 2) /* is reserved for software use */ - -#define ARCH_PAGE_SHIFT 12 - -#define INVALID_TI_TAG (1UL << 63) - -#define VTLB_PTE_P_BIT 0 -#define VTLB_PTE_IO_BIT 60 -#define VTLB_PTE_IO (1UL<<VTLB_PTE_IO_BIT) -#define VTLB_PTE_P (1UL<<VTLB_PTE_P_BIT) - -#define vcpu_quick_region_check(_tr_regions,_ifa) \ - (_tr_regions & (1 << ((unsigned long)_ifa >> 61))) - -#define vcpu_quick_region_set(_tr_regions,_ifa) \ - do {_tr_regions |= (1 << ((unsigned long)_ifa >> 61)); } while (0) - -static inline void vcpu_set_tr(struct thash_data *trp, u64 pte, u64 itir, - u64 va, u64 rid) -{ - trp->page_flags = pte; - trp->itir = itir; - trp->vadr = va; - trp->rid = rid; -} - -extern u64 kvm_get_mpt_entry(u64 gpfn); - -/* Return I/ */ -static inline u64 __gpfn_is_io(u64 gpfn) -{ - u64 pte; - pte = kvm_get_mpt_entry(gpfn); - if (!(pte & GPFN_INV_MASK)) { - pte = pte & GPFN_IO_MASK; - if (pte != GPFN_PHYS_MMIO) - return pte; - } - return 0; -} -#endif -#define IA64_NO_FAULT 0 -#define IA64_FAULT 1 - -#define VMM_RBS_OFFSET ((VMM_TASK_SIZE + 15) & ~15) - -#define SW_BAD 0 /* Bad mode transitition */ -#define SW_V2P 1 /* Physical emulatino is activated */ -#define SW_P2V 2 /* Exit physical mode emulation */ -#define SW_SELF 3 /* No mode transition */ -#define SW_NOP 4 /* Mode transition, but without action required */ - -#define GUEST_IN_PHY 0x1 -#define GUEST_PHY_EMUL 0x2 - -#define current_vcpu ((struct kvm_vcpu *) ia64_getreg(_IA64_REG_TP)) - -#define VRN_SHIFT 61 -#define VRN_MASK 0xe000000000000000 -#define VRN0 0x0UL -#define VRN1 0x1UL -#define VRN2 0x2UL -#define VRN3 0x3UL -#define VRN4 0x4UL -#define VRN5 0x5UL -#define VRN6 0x6UL -#define VRN7 0x7UL - -#define IRQ_NO_MASKED 0 -#define IRQ_MASKED_BY_VTPR 1 -#define IRQ_MASKED_BY_INSVC 2 /* masked by inservice IRQ */ - -#define PTA_BASE_SHIFT 15 - -#define IA64_PSR_VM_BIT 46 -#define IA64_PSR_VM (__IA64_UL(1) << IA64_PSR_VM_BIT) - -/* Interruption Function State */ -#define IA64_IFS_V_BIT 63 -#define IA64_IFS_V (__IA64_UL(1) << IA64_IFS_V_BIT) - -#define PHY_PAGE_UC (_PAGE_A|_PAGE_D|_PAGE_P|_PAGE_MA_UC|_PAGE_AR_RWX) -#define PHY_PAGE_WB (_PAGE_A|_PAGE_D|_PAGE_P|_PAGE_MA_WB|_PAGE_AR_RWX) - -#ifndef __ASSEMBLY__ - -#include <asm/gcc_intrin.h> - -#define is_physical_mode(v) \ - ((v->arch.mode_flags) & GUEST_IN_PHY) - -#define is_virtual_mode(v) \ - (!is_physical_mode(v)) - -#define MODE_IND(psr) \ - (((psr).it << 2) + ((psr).dt << 1) + (psr).rt) - -#ifndef CONFIG_SMP -#define _vmm_raw_spin_lock(x) do {}while(0) -#define _vmm_raw_spin_unlock(x) do {}while(0) -#else -typedef struct { - volatile unsigned int lock; -} vmm_spinlock_t; -#define _vmm_raw_spin_lock(x) \ - do { \ - __u32 *ia64_spinlock_ptr = (__u32 *) (x); \ - __u64 ia64_spinlock_val; \ - ia64_spinlock_val = ia64_cmpxchg4_acq(ia64_spinlock_ptr, 1, 0);\ - if (unlikely(ia64_spinlock_val)) { \ - do { \ - while (*ia64_spinlock_ptr) \ - ia64_barrier(); \ - ia64_spinlock_val = \ - ia64_cmpxchg4_acq(ia64_spinlock_ptr, 1, 0);\ - } while (ia64_spinlock_val); \ - } \ - } while (0) - -#define _vmm_raw_spin_unlock(x) \ - do { barrier(); \ - ((vmm_spinlock_t *)x)->lock = 0; } \ -while (0) -#endif - -void vmm_spin_lock(vmm_spinlock_t *lock); -void vmm_spin_unlock(vmm_spinlock_t *lock); -enum { - I_TLB = 1, - D_TLB = 2 -}; - -union kvm_va { - struct { - unsigned long off : 60; /* intra-region offset */ - unsigned long reg : 4; /* region number */ - } f; - unsigned long l; - void *p; -}; - -#define __kvm_pa(x) ({union kvm_va _v; _v.l = (long) (x); \ - _v.f.reg = 0; _v.l; }) -#define __kvm_va(x) ({union kvm_va _v; _v.l = (long) (x); \ - _v.f.reg = -1; _v.p; }) - -#define _REGION_ID(x) ({union ia64_rr _v; _v.val = (long)(x); \ - _v.rid; }) -#define _REGION_PAGE_SIZE(x) ({union ia64_rr _v; _v.val = (long)(x); \ - _v.ps; }) -#define _REGION_HW_WALKER(x) ({union ia64_rr _v; _v.val = (long)(x); \ - _v.ve; }) - -enum vhpt_ref{ DATA_REF, NA_REF, INST_REF, RSE_REF }; -enum tlb_miss_type { INSTRUCTION, DATA, REGISTER }; - -#define VCPU(_v, _x) ((_v)->arch.vpd->_x) -#define VMX(_v, _x) ((_v)->arch._x) - -#define VLSAPIC_INSVC(vcpu, i) ((vcpu)->arch.insvc[i]) -#define VLSAPIC_XTP(_v) VMX(_v, xtp) - -static inline unsigned long itir_ps(unsigned long itir) -{ - return ((itir >> 2) & 0x3f); -} - - -/************************************************************************** - VCPU control register access routines - **************************************************************************/ - -static inline u64 vcpu_get_itir(struct kvm_vcpu *vcpu) -{ - return ((u64)VCPU(vcpu, itir)); -} - -static inline void vcpu_set_itir(struct kvm_vcpu *vcpu, u64 val) -{ - VCPU(vcpu, itir) = val; -} - -static inline u64 vcpu_get_ifa(struct kvm_vcpu *vcpu) -{ - return ((u64)VCPU(vcpu, ifa)); -} - -static inline void vcpu_set_ifa(struct kvm_vcpu *vcpu, u64 val) -{ - VCPU(vcpu, ifa) = val; -} - -static inline u64 vcpu_get_iva(struct kvm_vcpu *vcpu) -{ - return ((u64)VCPU(vcpu, iva)); -} - -static inline u64 vcpu_get_pta(struct kvm_vcpu *vcpu) -{ - return ((u64)VCPU(vcpu, pta)); -} - -static inline u64 vcpu_get_lid(struct kvm_vcpu *vcpu) -{ - return ((u64)VCPU(vcpu, lid)); -} - -static inline u64 vcpu_get_tpr(struct kvm_vcpu *vcpu) -{ - return ((u64)VCPU(vcpu, tpr)); -} - -static inline u64 vcpu_get_eoi(struct kvm_vcpu *vcpu) -{ - return (0UL); /*reads of eoi always return 0 */ -} - -static inline u64 vcpu_get_irr0(struct kvm_vcpu *vcpu) -{ - return ((u64)VCPU(vcpu, irr[0])); -} - -static inline u64 vcpu_get_irr1(struct kvm_vcpu *vcpu) -{ - return ((u64)VCPU(vcpu, irr[1])); -} - -static inline u64 vcpu_get_irr2(struct kvm_vcpu *vcpu) -{ - return ((u64)VCPU(vcpu, irr[2])); -} - -static inline u64 vcpu_get_irr3(struct kvm_vcpu *vcpu) -{ - return ((u64)VCPU(vcpu, irr[3])); -} - -static inline void vcpu_set_dcr(struct kvm_vcpu *vcpu, u64 val) -{ - ia64_setreg(_IA64_REG_CR_DCR, val); -} - -static inline void vcpu_set_isr(struct kvm_vcpu *vcpu, u64 val) -{ - VCPU(vcpu, isr) = val; -} - -static inline void vcpu_set_lid(struct kvm_vcpu *vcpu, u64 val) -{ - VCPU(vcpu, lid) = val; -} - -static inline void vcpu_set_ipsr(struct kvm_vcpu *vcpu, u64 val) -{ - VCPU(vcpu, ipsr) = val; -} - -static inline void vcpu_set_iip(struct kvm_vcpu *vcpu, u64 val) -{ - VCPU(vcpu, iip) = val; -} - -static inline void vcpu_set_ifs(struct kvm_vcpu *vcpu, u64 val) -{ - VCPU(vcpu, ifs) = val; -} - -static inline void vcpu_set_iipa(struct kvm_vcpu *vcpu, u64 val) -{ - VCPU(vcpu, iipa) = val; -} - -static inline void vcpu_set_iha(struct kvm_vcpu *vcpu, u64 val) -{ - VCPU(vcpu, iha) = val; -} - - -static inline u64 vcpu_get_rr(struct kvm_vcpu *vcpu, u64 reg) -{ - return vcpu->arch.vrr[reg>>61]; -} - -/************************************************************************** - VCPU debug breakpoint register access routines - **************************************************************************/ - -static inline void vcpu_set_dbr(struct kvm_vcpu *vcpu, u64 reg, u64 val) -{ - __ia64_set_dbr(reg, val); -} - -static inline void vcpu_set_ibr(struct kvm_vcpu *vcpu, u64 reg, u64 val) -{ - ia64_set_ibr(reg, val); -} - -static inline u64 vcpu_get_dbr(struct kvm_vcpu *vcpu, u64 reg) -{ - return ((u64)__ia64_get_dbr(reg)); -} - -static inline u64 vcpu_get_ibr(struct kvm_vcpu *vcpu, u64 reg) -{ - return ((u64)ia64_get_ibr(reg)); -} - -/************************************************************************** - VCPU performance monitor register access routines - **************************************************************************/ -static inline void vcpu_set_pmc(struct kvm_vcpu *vcpu, u64 reg, u64 val) -{ - /* NOTE: Writes to unimplemented PMC registers are discarded */ - ia64_set_pmc(reg, val); -} - -static inline void vcpu_set_pmd(struct kvm_vcpu *vcpu, u64 reg, u64 val) -{ - /* NOTE: Writes to unimplemented PMD registers are discarded */ - ia64_set_pmd(reg, val); -} - -static inline u64 vcpu_get_pmc(struct kvm_vcpu *vcpu, u64 reg) -{ - /* NOTE: Reads from unimplemented PMC registers return zero */ - return ((u64)ia64_get_pmc(reg)); -} - -static inline u64 vcpu_get_pmd(struct kvm_vcpu *vcpu, u64 reg) -{ - /* NOTE: Reads from unimplemented PMD registers return zero */ - return ((u64)ia64_get_pmd(reg)); -} - -static inline unsigned long vrrtomrr(unsigned long val) -{ - union ia64_rr rr; - rr.val = val; - rr.rid = (rr.rid << 4) | 0xe; - if (rr.ps > PAGE_SHIFT) - rr.ps = PAGE_SHIFT; - rr.ve = 1; - return rr.val; -} - - -static inline int highest_bits(int *dat) -{ - u32 bits, bitnum; - int i; - - /* loop for all 256 bits */ - for (i = 7; i >= 0 ; i--) { - bits = dat[i]; - if (bits) { - bitnum = fls(bits); - return i * 32 + bitnum - 1; - } - } - return NULL_VECTOR; -} - -/* - * The pending irq is higher than the inservice one. - * - */ -static inline int is_higher_irq(int pending, int inservice) -{ - return ((pending > inservice) - || ((pending != NULL_VECTOR) - && (inservice == NULL_VECTOR))); -} - -static inline int is_higher_class(int pending, int mic) -{ - return ((pending >> 4) > mic); -} - -/* - * Return 0-255 for pending irq. - * NULL_VECTOR: when no pending. - */ -static inline int highest_pending_irq(struct kvm_vcpu *vcpu) -{ - if (VCPU(vcpu, irr[0]) & (1UL<<NMI_VECTOR)) - return NMI_VECTOR; - if (VCPU(vcpu, irr[0]) & (1UL<<ExtINT_VECTOR)) - return ExtINT_VECTOR; - - return highest_bits((int *)&VCPU(vcpu, irr[0])); -} - -static inline int highest_inservice_irq(struct kvm_vcpu *vcpu) -{ - if (VMX(vcpu, insvc[0]) & (1UL<<NMI_VECTOR)) - return NMI_VECTOR; - if (VMX(vcpu, insvc[0]) & (1UL<<ExtINT_VECTOR)) - return ExtINT_VECTOR; - - return highest_bits((int *)&(VMX(vcpu, insvc[0]))); -} - -extern void vcpu_get_fpreg(struct kvm_vcpu *vcpu, unsigned long reg, - struct ia64_fpreg *val); -extern void vcpu_set_fpreg(struct kvm_vcpu *vcpu, unsigned long reg, - struct ia64_fpreg *val); -extern u64 vcpu_get_gr(struct kvm_vcpu *vcpu, unsigned long reg); -extern void vcpu_set_gr(struct kvm_vcpu *vcpu, unsigned long reg, - u64 val, int nat); -extern unsigned long vcpu_get_psr(struct kvm_vcpu *vcpu); -extern void vcpu_set_psr(struct kvm_vcpu *vcpu, unsigned long val); -extern u64 vcpu_thash(struct kvm_vcpu *vcpu, u64 vadr); -extern void vcpu_bsw0(struct kvm_vcpu *vcpu); -extern void thash_vhpt_insert(struct kvm_vcpu *v, u64 pte, - u64 itir, u64 va, int type); -extern struct thash_data *vhpt_lookup(u64 va); -extern u64 guest_vhpt_lookup(u64 iha, u64 *pte); -extern void thash_purge_entries(struct kvm_vcpu *v, u64 va, u64 ps); -extern void thash_purge_entries_remote(struct kvm_vcpu *v, u64 va, u64 ps); -extern u64 translate_phy_pte(u64 *pte, u64 itir, u64 va); -extern void thash_purge_and_insert(struct kvm_vcpu *v, u64 pte, - u64 itir, u64 ifa, int type); -extern void thash_purge_all(struct kvm_vcpu *v); -extern struct thash_data *vtlb_lookup(struct kvm_vcpu *v, - u64 va, int is_data); -extern int vtr_find_overlap(struct kvm_vcpu *vcpu, u64 va, - u64 ps, int is_data); - -extern void vcpu_increment_iip(struct kvm_vcpu *v); -extern void vcpu_decrement_iip(struct kvm_vcpu *vcpu); -extern void vcpu_pend_interrupt(struct kvm_vcpu *vcpu, u8 vec); -extern void vcpu_unpend_interrupt(struct kvm_vcpu *vcpu, u8 vec); -extern void data_page_not_present(struct kvm_vcpu *vcpu, u64 vadr); -extern void dnat_page_consumption(struct kvm_vcpu *vcpu, u64 vadr); -extern void alt_dtlb(struct kvm_vcpu *vcpu, u64 vadr); -extern void nested_dtlb(struct kvm_vcpu *vcpu); -extern void dvhpt_fault(struct kvm_vcpu *vcpu, u64 vadr); -extern int vhpt_enabled(struct kvm_vcpu *vcpu, u64 vadr, enum vhpt_ref ref); - -extern void update_vhpi(struct kvm_vcpu *vcpu, int vec); -extern int irq_masked(struct kvm_vcpu *vcpu, int h_pending, int h_inservice); - -extern int fetch_code(struct kvm_vcpu *vcpu, u64 gip, IA64_BUNDLE *pbundle); -extern void emulate_io_inst(struct kvm_vcpu *vcpu, u64 padr, u64 ma); -extern void vmm_transition(struct kvm_vcpu *vcpu); -extern void vmm_trampoline(union context *from, union context *to); -extern int vmm_entry(void); -extern u64 vcpu_get_itc(struct kvm_vcpu *vcpu); - -extern void vmm_reset_entry(void); -void kvm_init_vtlb(struct kvm_vcpu *v); -void kvm_init_vhpt(struct kvm_vcpu *v); -void thash_init(struct thash_cb *hcb, u64 sz); - -void panic_vm(struct kvm_vcpu *v, const char *fmt, ...); -u64 kvm_gpa_to_mpa(u64 gpa); -extern u64 ia64_call_vsa(u64 proc, u64 arg1, u64 arg2, u64 arg3, - u64 arg4, u64 arg5, u64 arg6, u64 arg7); - -extern long vmm_sanity; - -#endif -#endif /* __VCPU_H__ */ diff --git a/arch/ia64/kvm/vmm.c b/arch/ia64/kvm/vmm.c deleted file mode 100644 index 176a12cd56d..00000000000 --- a/arch/ia64/kvm/vmm.c +++ /dev/null @@ -1,99 +0,0 @@ -/* - * vmm.c: vmm module interface with kvm module - * - * Copyright (c) 2007, Intel Corporation. - * - * Xiantao Zhang (xiantao.zhang@intel.com) - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple - * Place - Suite 330, Boston, MA 02111-1307 USA. - */ - - -#include <linux/kernel.h> -#include <linux/module.h> -#include <asm/fpswa.h> - -#include "vcpu.h" - -MODULE_AUTHOR("Intel"); -MODULE_LICENSE("GPL"); - -extern char kvm_ia64_ivt; -extern char kvm_asm_mov_from_ar; -extern char kvm_asm_mov_from_ar_sn2; -extern fpswa_interface_t *vmm_fpswa_interface; - -long vmm_sanity = 1; - -struct kvm_vmm_info vmm_info = { - .module = THIS_MODULE, - .vmm_entry = vmm_entry, - .tramp_entry = vmm_trampoline, - .vmm_ivt = (unsigned long)&kvm_ia64_ivt, - .patch_mov_ar = (unsigned long)&kvm_asm_mov_from_ar, - .patch_mov_ar_sn2 = (unsigned long)&kvm_asm_mov_from_ar_sn2, -}; - -static int __init kvm_vmm_init(void) -{ - - vmm_fpswa_interface = fpswa_interface; - - /*Register vmm data to kvm side*/ - return kvm_init(&vmm_info, 1024, 0, THIS_MODULE); -} - -static void __exit kvm_vmm_exit(void) -{ - kvm_exit(); - return ; -} - -void vmm_spin_lock(vmm_spinlock_t *lock) -{ - _vmm_raw_spin_lock(lock); -} - -void vmm_spin_unlock(vmm_spinlock_t *lock) -{ - _vmm_raw_spin_unlock(lock); -} - -static void vcpu_debug_exit(struct kvm_vcpu *vcpu) -{ - struct exit_ctl_data *p = &vcpu->arch.exit_data; - long psr; - - local_irq_save(psr); - p->exit_reason = EXIT_REASON_DEBUG; - vmm_transition(vcpu); - local_irq_restore(psr); -} - -asmlinkage int printk(const char *fmt, ...) -{ - struct kvm_vcpu *vcpu = current_vcpu; - va_list args; - int r; - - memset(vcpu->arch.log_buf, 0, VMM_LOG_LEN); - va_start(args, fmt); - r = vsnprintf(vcpu->arch.log_buf, VMM_LOG_LEN, fmt, args); - va_end(args); - vcpu_debug_exit(vcpu); - return r; -} - -module_init(kvm_vmm_init) -module_exit(kvm_vmm_exit) diff --git a/arch/ia64/kvm/vmm_ivt.S b/arch/ia64/kvm/vmm_ivt.S deleted file mode 100644 index 397e34a63e1..00000000000 --- a/arch/ia64/kvm/vmm_ivt.S +++ /dev/null @@ -1,1392 +0,0 @@ -/* - * arch/ia64/kvm/vmm_ivt.S - * - * Copyright (C) 1998-2001, 2003 Hewlett-Packard Co - * Stephane Eranian <eranian@hpl.hp.com> - * David Mosberger <davidm@hpl.hp.com> - * Copyright (C) 2000, 2002-2003 Intel Co - * Asit Mallick <asit.k.mallick@intel.com> - * Suresh Siddha <suresh.b.siddha@intel.com> - * Kenneth Chen <kenneth.w.chen@intel.com> - * Fenghua Yu <fenghua.yu@intel.com> - * - * - * 00/08/23 Asit Mallick <asit.k.mallick@intel.com> TLB handling - * for SMP - * 00/12/20 David Mosberger-Tang <davidm@hpl.hp.com> DTLB/ITLB - * handler now uses virtual PT. - * - * 07/6/20 Xuefei Xu (Anthony Xu) (anthony.xu@intel.com) - * Supporting Intel virtualization architecture - * - */ - -/* - * This file defines the interruption vector table used by the CPU. - * It does not include one entry per possible cause of interruption. - * - * The first 20 entries of the table contain 64 bundles each while the - * remaining 48 entries contain only 16 bundles each. - * - * The 64 bundles are used to allow inlining the whole handler for - * critical - * interruptions like TLB misses. - * - * For each entry, the comment is as follows: - * - * // 0x1c00 Entry 7 (size 64 bundles) Data Key Miss - * (12,51) - * entry offset ----/ / / / - * / - * entry number ---------/ / / - * / - * size of the entry -------------/ / - * / - * vector name -------------------------------------/ - * / - * interruptions triggering this vector - * ----------------------/ - * - * The table is 32KB in size and must be aligned on 32KB - * boundary. - * (The CPU ignores the 15 lower bits of the address) - * - * Table is based upon EAS2.6 (Oct 1999) - */ - - -#include <asm/asmmacro.h> -#include <asm/cache.h> -#include <asm/pgtable.h> - -#include "asm-offsets.h" -#include "vcpu.h" -#include "kvm_minstate.h" -#include "vti.h" - -#if 0 -# define PSR_DEFAULT_BITS psr.ac -#else -# define PSR_DEFAULT_BITS 0 -#endif - -#define KVM_FAULT(n) \ - kvm_fault_##n:; \ - mov r19=n;; \ - br.sptk.many kvm_vmm_panic; \ - ;; \ - -#define KVM_REFLECT(n) \ - mov r31=pr; \ - mov r19=n; /* prepare to save predicates */ \ - mov r29=cr.ipsr; \ - ;; \ - tbit.z p6,p7=r29,IA64_PSR_VM_BIT; \ -(p7) br.sptk.many kvm_dispatch_reflection; \ - br.sptk.many kvm_vmm_panic; \ - -GLOBAL_ENTRY(kvm_vmm_panic) - KVM_SAVE_MIN_WITH_COVER_R19 - alloc r14=ar.pfs,0,0,1,0 - mov out0=r15 - adds r3=8,r2 // set up second base pointer - ;; - ssm psr.ic - ;; - srlz.i // guarantee that interruption collection is on - ;; - (p15) ssm psr.i // restore psr. - addl r14=@gprel(ia64_leave_hypervisor),gp - ;; - KVM_SAVE_REST - mov rp=r14 - ;; - br.call.sptk.many b6=vmm_panic_handler; -END(kvm_vmm_panic) - - .section .text..ivt,"ax" - - .align 32768 // align on 32KB boundary - .global kvm_ia64_ivt -kvm_ia64_ivt: -/////////////////////////////////////////////////////////////// -// 0x0000 Entry 0 (size 64 bundles) VHPT Translation (8,20,47) -ENTRY(kvm_vhpt_miss) - KVM_FAULT(0) -END(kvm_vhpt_miss) - - .org kvm_ia64_ivt+0x400 -//////////////////////////////////////////////////////////////// -// 0x0400 Entry 1 (size 64 bundles) ITLB (21) -ENTRY(kvm_itlb_miss) - mov r31 = pr - mov r29=cr.ipsr; - ;; - tbit.z p6,p7=r29,IA64_PSR_VM_BIT; -(p6) br.sptk kvm_alt_itlb_miss - mov r19 = 1 - br.sptk kvm_itlb_miss_dispatch - KVM_FAULT(1); -END(kvm_itlb_miss) - - .org kvm_ia64_ivt+0x0800 -////////////////////////////////////////////////////////////////// -// 0x0800 Entry 2 (size 64 bundles) DTLB (9,48) -ENTRY(kvm_dtlb_miss) - mov r31 = pr - mov r29=cr.ipsr; - ;; - tbit.z p6,p7=r29,IA64_PSR_VM_BIT; -(p6) br.sptk kvm_alt_dtlb_miss - br.sptk kvm_dtlb_miss_dispatch -END(kvm_dtlb_miss) - - .org kvm_ia64_ivt+0x0c00 -//////////////////////////////////////////////////////////////////// -// 0x0c00 Entry 3 (size 64 bundles) Alt ITLB (19) -ENTRY(kvm_alt_itlb_miss) - mov r16=cr.ifa // get address that caused the TLB miss - ;; - movl r17=PAGE_KERNEL - mov r24=cr.ipsr - movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff) - ;; - and r19=r19,r16 // clear ed, reserved bits, and PTE control bits - ;; - or r19=r17,r19 // insert PTE control bits into r19 - ;; - movl r20=IA64_GRANULE_SHIFT<<2 - ;; - mov cr.itir=r20 - ;; - itc.i r19 // insert the TLB entry - mov pr=r31,-1 - rfi -END(kvm_alt_itlb_miss) - - .org kvm_ia64_ivt+0x1000 -///////////////////////////////////////////////////////////////////// -// 0x1000 Entry 4 (size 64 bundles) Alt DTLB (7,46) -ENTRY(kvm_alt_dtlb_miss) - mov r16=cr.ifa // get address that caused the TLB miss - ;; - movl r17=PAGE_KERNEL - movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff) - mov r24=cr.ipsr - ;; - and r19=r19,r16 // clear ed, reserved bits, and PTE control bits - ;; - or r19=r19,r17 // insert PTE control bits into r19 - ;; - movl r20=IA64_GRANULE_SHIFT<<2 - ;; - mov cr.itir=r20 - ;; - itc.d r19 // insert the TLB entry - mov pr=r31,-1 - rfi -END(kvm_alt_dtlb_miss) - - .org kvm_ia64_ivt+0x1400 -////////////////////////////////////////////////////////////////////// -// 0x1400 Entry 5 (size 64 bundles) Data nested TLB (6,45) -ENTRY(kvm_nested_dtlb_miss) - KVM_FAULT(5) -END(kvm_nested_dtlb_miss) - - .org kvm_ia64_ivt+0x1800 -///////////////////////////////////////////////////////////////////// -// 0x1800 Entry 6 (size 64 bundles) Instruction Key Miss (24) -ENTRY(kvm_ikey_miss) - KVM_REFLECT(6) -END(kvm_ikey_miss) - - .org kvm_ia64_ivt+0x1c00 -///////////////////////////////////////////////////////////////////// -// 0x1c00 Entry 7 (size 64 bundles) Data Key Miss (12,51) -ENTRY(kvm_dkey_miss) - KVM_REFLECT(7) -END(kvm_dkey_miss) - - .org kvm_ia64_ivt+0x2000 -//////////////////////////////////////////////////////////////////// -// 0x2000 Entry 8 (size 64 bundles) Dirty-bit (54) -ENTRY(kvm_dirty_bit) - KVM_REFLECT(8) -END(kvm_dirty_bit) - - .org kvm_ia64_ivt+0x2400 -//////////////////////////////////////////////////////////////////// -// 0x2400 Entry 9 (size 64 bundles) Instruction Access-bit (27) -ENTRY(kvm_iaccess_bit) - KVM_REFLECT(9) -END(kvm_iaccess_bit) - - .org kvm_ia64_ivt+0x2800 -/////////////////////////////////////////////////////////////////// -// 0x2800 Entry 10 (size 64 bundles) Data Access-bit (15,55) -ENTRY(kvm_daccess_bit) - KVM_REFLECT(10) -END(kvm_daccess_bit) - - .org kvm_ia64_ivt+0x2c00 -///////////////////////////////////////////////////////////////// -// 0x2c00 Entry 11 (size 64 bundles) Break instruction (33) -ENTRY(kvm_break_fault) - mov r31=pr - mov r19=11 - mov r29=cr.ipsr - ;; - KVM_SAVE_MIN_WITH_COVER_R19 - ;; - alloc r14=ar.pfs,0,0,4,0 //(must be first in insn group!) - mov out0=cr.ifa - mov out2=cr.isr // FIXME: pity to make this slow access twice - mov out3=cr.iim // FIXME: pity to make this slow access twice - adds r3=8,r2 // set up second base pointer - ;; - ssm psr.ic - ;; - srlz.i // guarantee that interruption collection is on - ;; - (p15)ssm psr.i // restore psr.i - addl r14=@gprel(ia64_leave_hypervisor),gp - ;; - KVM_SAVE_REST - mov rp=r14 - ;; - adds out1=16,sp - br.call.sptk.many b6=kvm_ia64_handle_break - ;; -END(kvm_break_fault) - - .org kvm_ia64_ivt+0x3000 -///////////////////////////////////////////////////////////////// -// 0x3000 Entry 12 (size 64 bundles) External Interrupt (4) -ENTRY(kvm_interrupt) - mov r31=pr // prepare to save predicates - mov r19=12 - mov r29=cr.ipsr - ;; - tbit.z p6,p7=r29,IA64_PSR_VM_BIT - tbit.z p0,p15=r29,IA64_PSR_I_BIT - ;; -(p7) br.sptk kvm_dispatch_interrupt - ;; - mov r27=ar.rsc /* M */ - mov r20=r1 /* A */ - mov r25=ar.unat /* M */ - mov r26=ar.pfs /* I */ - mov r28=cr.iip /* M */ - cover /* B (or nothing) */ - ;; - mov r1=sp - ;; - invala /* M */ - mov r30=cr.ifs - ;; - addl r1=-VMM_PT_REGS_SIZE,r1 - ;; - adds r17=2*L1_CACHE_BYTES,r1 /* really: biggest cache-line size */ - adds r16=PT(CR_IPSR),r1 - ;; - lfetch.fault.excl.nt1 [r17],L1_CACHE_BYTES - st8 [r16]=r29 /* save cr.ipsr */ - ;; - lfetch.fault.excl.nt1 [r17] - mov r29=b0 - ;; - adds r16=PT(R8),r1 /* initialize first base pointer */ - adds r17=PT(R9),r1 /* initialize second base pointer */ - mov r18=r0 /* make sure r18 isn't NaT */ - ;; -.mem.offset 0,0; st8.spill [r16]=r8,16 -.mem.offset 8,0; st8.spill [r17]=r9,16 - ;; -.mem.offset 0,0; st8.spill [r16]=r10,24 -.mem.offset 8,0; st8.spill [r17]=r11,24 - ;; - st8 [r16]=r28,16 /* save cr.iip */ - st8 [r17]=r30,16 /* save cr.ifs */ - mov r8=ar.fpsr /* M */ - mov r9=ar.csd - mov r10=ar.ssd - movl r11=FPSR_DEFAULT /* L-unit */ - ;; - st8 [r16]=r25,16 /* save ar.unat */ - st8 [r17]=r26,16 /* save ar.pfs */ - shl r18=r18,16 /* compute ar.rsc to be used for "loadrs" */ - ;; - st8 [r16]=r27,16 /* save ar.rsc */ - adds r17=16,r17 /* skip over ar_rnat field */ - ;; - st8 [r17]=r31,16 /* save predicates */ - adds r16=16,r16 /* skip over ar_bspstore field */ - ;; - st8 [r16]=r29,16 /* save b0 */ - st8 [r17]=r18,16 /* save ar.rsc value for "loadrs" */ - ;; -.mem.offset 0,0; st8.spill [r16]=r20,16 /* save original r1 */ -.mem.offset 8,0; st8.spill [r17]=r12,16 - adds r12=-16,r1 - /* switch to kernel memory stack (with 16 bytes of scratch) */ - ;; -.mem.offset 0,0; st8.spill [r16]=r13,16 -.mem.offset 8,0; st8.spill [r17]=r8,16 /* save ar.fpsr */ - ;; -.mem.offset 0,0; st8.spill [r16]=r15,16 -.mem.offset 8,0; st8.spill [r17]=r14,16 - dep r14=-1,r0,60,4 - ;; -.mem.offset 0,0; st8.spill [r16]=r2,16 -.mem.offset 8,0; st8.spill [r17]=r3,16 - adds r2=VMM_PT_REGS_R16_OFFSET,r1 - adds r14 = VMM_VCPU_GP_OFFSET,r13 - ;; - mov r8=ar.ccv - ld8 r14 = [r14] - ;; - mov r1=r14 /* establish kernel global pointer */ - ;; \ - bsw.1 - ;; - alloc r14=ar.pfs,0,0,1,0 // must be first in an insn group - mov out0=r13 - ;; - ssm psr.ic - ;; - srlz.i - ;; - //(p15) ssm psr.i - adds r3=8,r2 // set up second base pointer for SAVE_REST - srlz.i // ensure everybody knows psr.ic is back on - ;; -.mem.offset 0,0; st8.spill [r2]=r16,16 -.mem.offset 8,0; st8.spill [r3]=r17,16 - ;; -.mem.offset 0,0; st8.spill [r2]=r18,16 -.mem.offset 8,0; st8.spill [r3]=r19,16 - ;; -.mem.offset 0,0; st8.spill [r2]=r20,16 -.mem.offset 8,0; st8.spill [r3]=r21,16 - mov r18=b6 - ;; -.mem.offset 0,0; st8.spill [r2]=r22,16 -.mem.offset 8,0; st8.spill [r3]=r23,16 - mov r19=b7 - ;; -.mem.offset 0,0; st8.spill [r2]=r24,16 -.mem.offset 8,0; st8.spill [r3]=r25,16 - ;; -.mem.offset 0,0; st8.spill [r2]=r26,16 -.mem.offset 8,0; st8.spill [r3]=r27,16 - ;; -.mem.offset 0,0; st8.spill [r2]=r28,16 -.mem.offset 8,0; st8.spill [r3]=r29,16 - ;; -.mem.offset 0,0; st8.spill [r2]=r30,16 -.mem.offset 8,0; st8.spill [r3]=r31,32 - ;; - mov ar.fpsr=r11 /* M-unit */ - st8 [r2]=r8,8 /* ar.ccv */ - adds r24=PT(B6)-PT(F7),r3 - ;; - stf.spill [r2]=f6,32 - stf.spill [r3]=f7,32 - ;; - stf.spill [r2]=f8,32 - stf.spill [r3]=f9,32 - ;; - stf.spill [r2]=f10 - stf.spill [r3]=f11 - adds r25=PT(B7)-PT(F11),r3 - ;; - st8 [r24]=r18,16 /* b6 */ - st8 [r25]=r19,16 /* b7 */ - ;; - st8 [r24]=r9 /* ar.csd */ - st8 [r25]=r10 /* ar.ssd */ - ;; - srlz.d // make sure we see the effect of cr.ivr - addl r14=@gprel(ia64_leave_nested),gp - ;; - mov rp=r14 - br.call.sptk.many b6=kvm_ia64_handle_irq - ;; -END(kvm_interrupt) - - .global kvm_dispatch_vexirq - .org kvm_ia64_ivt+0x3400 -////////////////////////////////////////////////////////////////////// -// 0x3400 Entry 13 (size 64 bundles) Reserved -ENTRY(kvm_virtual_exirq) - mov r31=pr - mov r19=13 - mov r30 =r0 - ;; -kvm_dispatch_vexirq: - cmp.eq p6,p0 = 1,r30 - ;; -(p6) add r29 = VMM_VCPU_SAVED_GP_OFFSET,r21 - ;; -(p6) ld8 r1 = [r29] - ;; - KVM_SAVE_MIN_WITH_COVER_R19 - alloc r14=ar.pfs,0,0,1,0 - mov out0=r13 - - ssm psr.ic - ;; - srlz.i // guarantee that interruption collection is on - ;; - (p15) ssm psr.i // restore psr.i - adds r3=8,r2 // set up second base pointer - ;; - KVM_SAVE_REST - addl r14=@gprel(ia64_leave_hypervisor),gp - ;; - mov rp=r14 - br.call.sptk.many b6=kvm_vexirq -END(kvm_virtual_exirq) - - .org kvm_ia64_ivt+0x3800 -///////////////////////////////////////////////////////////////////// -// 0x3800 Entry 14 (size 64 bundles) Reserved - KVM_FAULT(14) - // this code segment is from 2.6.16.13 - - .org kvm_ia64_ivt+0x3c00 -/////////////////////////////////////////////////////////////////////// -// 0x3c00 Entry 15 (size 64 bundles) Reserved - KVM_FAULT(15) - - .org kvm_ia64_ivt+0x4000 -/////////////////////////////////////////////////////////////////////// -// 0x4000 Entry 16 (size 64 bundles) Reserved - KVM_FAULT(16) - - .org kvm_ia64_ivt+0x4400 -////////////////////////////////////////////////////////////////////// -// 0x4400 Entry 17 (size 64 bundles) Reserved - KVM_FAULT(17) - - .org kvm_ia64_ivt+0x4800 -////////////////////////////////////////////////////////////////////// -// 0x4800 Entry 18 (size 64 bundles) Reserved - KVM_FAULT(18) - - .org kvm_ia64_ivt+0x4c00 -////////////////////////////////////////////////////////////////////// -// 0x4c00 Entry 19 (size 64 bundles) Reserved - KVM_FAULT(19) - - .org kvm_ia64_ivt+0x5000 -////////////////////////////////////////////////////////////////////// -// 0x5000 Entry 20 (size 16 bundles) Page Not Present -ENTRY(kvm_page_not_present) - KVM_REFLECT(20) -END(kvm_page_not_present) - - .org kvm_ia64_ivt+0x5100 -/////////////////////////////////////////////////////////////////////// -// 0x5100 Entry 21 (size 16 bundles) Key Permission vector -ENTRY(kvm_key_permission) - KVM_REFLECT(21) -END(kvm_key_permission) - - .org kvm_ia64_ivt+0x5200 -////////////////////////////////////////////////////////////////////// -// 0x5200 Entry 22 (size 16 bundles) Instruction Access Rights (26) -ENTRY(kvm_iaccess_rights) - KVM_REFLECT(22) -END(kvm_iaccess_rights) - - .org kvm_ia64_ivt+0x5300 -////////////////////////////////////////////////////////////////////// -// 0x5300 Entry 23 (size 16 bundles) Data Access Rights (14,53) -ENTRY(kvm_daccess_rights) - KVM_REFLECT(23) -END(kvm_daccess_rights) - - .org kvm_ia64_ivt+0x5400 -///////////////////////////////////////////////////////////////////// -// 0x5400 Entry 24 (size 16 bundles) General Exception (5,32,34,36,38,39) -ENTRY(kvm_general_exception) - KVM_REFLECT(24) - KVM_FAULT(24) -END(kvm_general_exception) - - .org kvm_ia64_ivt+0x5500 -////////////////////////////////////////////////////////////////////// -// 0x5500 Entry 25 (size 16 bundles) Disabled FP-Register (35) -ENTRY(kvm_disabled_fp_reg) - KVM_REFLECT(25) -END(kvm_disabled_fp_reg) - - .org kvm_ia64_ivt+0x5600 -//////////////////////////////////////////////////////////////////// -// 0x5600 Entry 26 (size 16 bundles) Nat Consumption (11,23,37,50) -ENTRY(kvm_nat_consumption) - KVM_REFLECT(26) -END(kvm_nat_consumption) - - .org kvm_ia64_ivt+0x5700 -///////////////////////////////////////////////////////////////////// -// 0x5700 Entry 27 (size 16 bundles) Speculation (40) -ENTRY(kvm_speculation_vector) - KVM_REFLECT(27) -END(kvm_speculation_vector) - - .org kvm_ia64_ivt+0x5800 -///////////////////////////////////////////////////////////////////// -// 0x5800 Entry 28 (size 16 bundles) Reserved - KVM_FAULT(28) - - .org kvm_ia64_ivt+0x5900 -/////////////////////////////////////////////////////////////////// -// 0x5900 Entry 29 (size 16 bundles) Debug (16,28,56) -ENTRY(kvm_debug_vector) - KVM_FAULT(29) -END(kvm_debug_vector) - - .org kvm_ia64_ivt+0x5a00 -/////////////////////////////////////////////////////////////// -// 0x5a00 Entry 30 (size 16 bundles) Unaligned Reference (57) -ENTRY(kvm_unaligned_access) - KVM_REFLECT(30) -END(kvm_unaligned_access) - - .org kvm_ia64_ivt+0x5b00 -////////////////////////////////////////////////////////////////////// -// 0x5b00 Entry 31 (size 16 bundles) Unsupported Data Reference (57) -ENTRY(kvm_unsupported_data_reference) - KVM_REFLECT(31) -END(kvm_unsupported_data_reference) - - .org kvm_ia64_ivt+0x5c00 -//////////////////////////////////////////////////////////////////// -// 0x5c00 Entry 32 (size 16 bundles) Floating Point FAULT (65) -ENTRY(kvm_floating_point_fault) - KVM_REFLECT(32) -END(kvm_floating_point_fault) - - .org kvm_ia64_ivt+0x5d00 -///////////////////////////////////////////////////////////////////// -// 0x5d00 Entry 33 (size 16 bundles) Floating Point Trap (66) -ENTRY(kvm_floating_point_trap) - KVM_REFLECT(33) -END(kvm_floating_point_trap) - - .org kvm_ia64_ivt+0x5e00 -////////////////////////////////////////////////////////////////////// -// 0x5e00 Entry 34 (size 16 bundles) Lower Privilege Transfer Trap (66) -ENTRY(kvm_lower_privilege_trap) - KVM_REFLECT(34) -END(kvm_lower_privilege_trap) - - .org kvm_ia64_ivt+0x5f00 -////////////////////////////////////////////////////////////////////// -// 0x5f00 Entry 35 (size 16 bundles) Taken Branch Trap (68) -ENTRY(kvm_taken_branch_trap) - KVM_REFLECT(35) -END(kvm_taken_branch_trap) - - .org kvm_ia64_ivt+0x6000 -//////////////////////////////////////////////////////////////////// -// 0x6000 Entry 36 (size 16 bundles) Single Step Trap (69) -ENTRY(kvm_single_step_trap) - KVM_REFLECT(36) -END(kvm_single_step_trap) - .global kvm_virtualization_fault_back - .org kvm_ia64_ivt+0x6100 -///////////////////////////////////////////////////////////////////// -// 0x6100 Entry 37 (size 16 bundles) Virtualization Fault -ENTRY(kvm_virtualization_fault) - mov r31=pr - adds r16 = VMM_VCPU_SAVED_GP_OFFSET,r21 - ;; - st8 [r16] = r1 - adds r17 = VMM_VCPU_GP_OFFSET, r21 - ;; - ld8 r1 = [r17] - cmp.eq p6,p0=EVENT_MOV_FROM_AR,r24 - cmp.eq p7,p0=EVENT_MOV_FROM_RR,r24 - cmp.eq p8,p0=EVENT_MOV_TO_RR,r24 - cmp.eq p9,p0=EVENT_RSM,r24 - cmp.eq p10,p0=EVENT_SSM,r24 - cmp.eq p11,p0=EVENT_MOV_TO_PSR,r24 - cmp.eq p12,p0=EVENT_THASH,r24 -(p6) br.dptk.many kvm_asm_mov_from_ar -(p7) br.dptk.many kvm_asm_mov_from_rr -(p8) br.dptk.many kvm_asm_mov_to_rr -(p9) br.dptk.many kvm_asm_rsm -(p10) br.dptk.many kvm_asm_ssm -(p11) br.dptk.many kvm_asm_mov_to_psr -(p12) br.dptk.many kvm_asm_thash - ;; -kvm_virtualization_fault_back: - adds r16 = VMM_VCPU_SAVED_GP_OFFSET,r21 - ;; - ld8 r1 = [r16] - ;; - mov r19=37 - adds r16 = VMM_VCPU_CAUSE_OFFSET,r21 - adds r17 = VMM_VCPU_OPCODE_OFFSET,r21 - ;; - st8 [r16] = r24 - st8 [r17] = r25 - ;; - cmp.ne p6,p0=EVENT_RFI, r24 -(p6) br.sptk kvm_dispatch_virtualization_fault - ;; - adds r18=VMM_VPD_BASE_OFFSET,r21 - ;; - ld8 r18=[r18] - ;; - adds r18=VMM_VPD_VIFS_OFFSET,r18 - ;; - ld8 r18=[r18] - ;; - tbit.z p6,p0=r18,63 -(p6) br.sptk kvm_dispatch_virtualization_fault - ;; -//if vifs.v=1 desert current register frame - alloc r18=ar.pfs,0,0,0,0 - br.sptk kvm_dispatch_virtualization_fault -END(kvm_virtualization_fault) - - .org kvm_ia64_ivt+0x6200 -////////////////////////////////////////////////////////////// -// 0x6200 Entry 38 (size 16 bundles) Reserved - KVM_FAULT(38) - - .org kvm_ia64_ivt+0x6300 -///////////////////////////////////////////////////////////////// -// 0x6300 Entry 39 (size 16 bundles) Reserved - KVM_FAULT(39) - - .org kvm_ia64_ivt+0x6400 -///////////////////////////////////////////////////////////////// -// 0x6400 Entry 40 (size 16 bundles) Reserved - KVM_FAULT(40) - - .org kvm_ia64_ivt+0x6500 -////////////////////////////////////////////////////////////////// -// 0x6500 Entry 41 (size 16 bundles) Reserved - KVM_FAULT(41) - - .org kvm_ia64_ivt+0x6600 -////////////////////////////////////////////////////////////////// -// 0x6600 Entry 42 (size 16 bundles) Reserved - KVM_FAULT(42) - - .org kvm_ia64_ivt+0x6700 -////////////////////////////////////////////////////////////////// -// 0x6700 Entry 43 (size 16 bundles) Reserved - KVM_FAULT(43) - - .org kvm_ia64_ivt+0x6800 -////////////////////////////////////////////////////////////////// -// 0x6800 Entry 44 (size 16 bundles) Reserved - KVM_FAULT(44) - - .org kvm_ia64_ivt+0x6900 -/////////////////////////////////////////////////////////////////// -// 0x6900 Entry 45 (size 16 bundles) IA-32 Exeception -//(17,18,29,41,42,43,44,58,60,61,62,72,73,75,76,77) -ENTRY(kvm_ia32_exception) - KVM_FAULT(45) -END(kvm_ia32_exception) - - .org kvm_ia64_ivt+0x6a00 -//////////////////////////////////////////////////////////////////// -// 0x6a00 Entry 46 (size 16 bundles) IA-32 Intercept (30,31,59,70,71) -ENTRY(kvm_ia32_intercept) - KVM_FAULT(47) -END(kvm_ia32_intercept) - - .org kvm_ia64_ivt+0x6c00 -///////////////////////////////////////////////////////////////////// -// 0x6c00 Entry 48 (size 16 bundles) Reserved - KVM_FAULT(48) - - .org kvm_ia64_ivt+0x6d00 -////////////////////////////////////////////////////////////////////// -// 0x6d00 Entry 49 (size 16 bundles) Reserved - KVM_FAULT(49) - - .org kvm_ia64_ivt+0x6e00 -////////////////////////////////////////////////////////////////////// -// 0x6e00 Entry 50 (size 16 bundles) Reserved - KVM_FAULT(50) - - .org kvm_ia64_ivt+0x6f00 -///////////////////////////////////////////////////////////////////// -// 0x6f00 Entry 51 (size 16 bundles) Reserved - KVM_FAULT(52) - - .org kvm_ia64_ivt+0x7100 -//////////////////////////////////////////////////////////////////// -// 0x7100 Entry 53 (size 16 bundles) Reserved - KVM_FAULT(53) - - .org kvm_ia64_ivt+0x7200 -///////////////////////////////////////////////////////////////////// -// 0x7200 Entry 54 (size 16 bundles) Reserved - KVM_FAULT(54) - - .org kvm_ia64_ivt+0x7300 -//////////////////////////////////////////////////////////////////// -// 0x7300 Entry 55 (size 16 bundles) Reserved - KVM_FAULT(55) - - .org kvm_ia64_ivt+0x7400 -//////////////////////////////////////////////////////////////////// -// 0x7400 Entry 56 (size 16 bundles) Reserved - KVM_FAULT(56) - - .org kvm_ia64_ivt+0x7500 -///////////////////////////////////////////////////////////////////// -// 0x7500 Entry 57 (size 16 bundles) Reserved - KVM_FAULT(57) - - .org kvm_ia64_ivt+0x7600 -///////////////////////////////////////////////////////////////////// -// 0x7600 Entry 58 (size 16 bundles) Reserved - KVM_FAULT(58) - - .org kvm_ia64_ivt+0x7700 -//////////////////////////////////////////////////////////////////// -// 0x7700 Entry 59 (size 16 bundles) Reserved - KVM_FAULT(59) - - .org kvm_ia64_ivt+0x7800 -//////////////////////////////////////////////////////////////////// -// 0x7800 Entry 60 (size 16 bundles) Reserved - KVM_FAULT(60) - - .org kvm_ia64_ivt+0x7900 -///////////////////////////////////////////////////////////////////// -// 0x7900 Entry 61 (size 16 bundles) Reserved - KVM_FAULT(61) - - .org kvm_ia64_ivt+0x7a00 -///////////////////////////////////////////////////////////////////// -// 0x7a00 Entry 62 (size 16 bundles) Reserved - KVM_FAULT(62) - - .org kvm_ia64_ivt+0x7b00 -///////////////////////////////////////////////////////////////////// -// 0x7b00 Entry 63 (size 16 bundles) Reserved - KVM_FAULT(63) - - .org kvm_ia64_ivt+0x7c00 -//////////////////////////////////////////////////////////////////// -// 0x7c00 Entry 64 (size 16 bundles) Reserved - KVM_FAULT(64) - - .org kvm_ia64_ivt+0x7d00 -///////////////////////////////////////////////////////////////////// -// 0x7d00 Entry 65 (size 16 bundles) Reserved - KVM_FAULT(65) - - .org kvm_ia64_ivt+0x7e00 -///////////////////////////////////////////////////////////////////// -// 0x7e00 Entry 66 (size 16 bundles) Reserved - KVM_FAULT(66) - - .org kvm_ia64_ivt+0x7f00 -//////////////////////////////////////////////////////////////////// -// 0x7f00 Entry 67 (size 16 bundles) Reserved - KVM_FAULT(67) - - .org kvm_ia64_ivt+0x8000 -// There is no particular reason for this code to be here, other than that -// there happens to be space here that would go unused otherwise. If this -// fault ever gets "unreserved", simply moved the following code to a more -// suitable spot... - - -ENTRY(kvm_dtlb_miss_dispatch) - mov r19 = 2 - KVM_SAVE_MIN_WITH_COVER_R19 - alloc r14=ar.pfs,0,0,3,0 - mov out0=cr.ifa - mov out1=r15 - adds r3=8,r2 // set up second base pointer - ;; - ssm psr.ic - ;; - srlz.i // guarantee that interruption collection is on - ;; - (p15) ssm psr.i // restore psr.i - addl r14=@gprel(ia64_leave_hypervisor_prepare),gp - ;; - KVM_SAVE_REST - KVM_SAVE_EXTRA - mov rp=r14 - ;; - adds out2=16,r12 - br.call.sptk.many b6=kvm_page_fault -END(kvm_dtlb_miss_dispatch) - -ENTRY(kvm_itlb_miss_dispatch) - - KVM_SAVE_MIN_WITH_COVER_R19 - alloc r14=ar.pfs,0,0,3,0 - mov out0=cr.ifa - mov out1=r15 - adds r3=8,r2 // set up second base pointer - ;; - ssm psr.ic - ;; - srlz.i // guarantee that interruption collection is on - ;; - (p15) ssm psr.i // restore psr.i - addl r14=@gprel(ia64_leave_hypervisor),gp - ;; - KVM_SAVE_REST - mov rp=r14 - ;; - adds out2=16,r12 - br.call.sptk.many b6=kvm_page_fault -END(kvm_itlb_miss_dispatch) - -ENTRY(kvm_dispatch_reflection) -/* - * Input: - * psr.ic: off - * r19: intr type (offset into ivt, see ia64_int.h) - * r31: contains saved predicates (pr) - */ - KVM_SAVE_MIN_WITH_COVER_R19 - alloc r14=ar.pfs,0,0,5,0 - mov out0=cr.ifa - mov out1=cr.isr - mov out2=cr.iim - mov out3=r15 - adds r3=8,r2 // set up second base pointer - ;; - ssm psr.ic - ;; - srlz.i // guarantee that interruption collection is on - ;; - (p15) ssm psr.i // restore psr.i - addl r14=@gprel(ia64_leave_hypervisor),gp - ;; - KVM_SAVE_REST - mov rp=r14 - ;; - adds out4=16,r12 - br.call.sptk.many b6=reflect_interruption -END(kvm_dispatch_reflection) - -ENTRY(kvm_dispatch_virtualization_fault) - adds r16 = VMM_VCPU_CAUSE_OFFSET,r21 - adds r17 = VMM_VCPU_OPCODE_OFFSET,r21 - ;; - st8 [r16] = r24 - st8 [r17] = r25 - ;; - KVM_SAVE_MIN_WITH_COVER_R19 - ;; - alloc r14=ar.pfs,0,0,2,0 // (must be first in insn group!) - mov out0=r13 //vcpu - adds r3=8,r2 // set up second base pointer - ;; - ssm psr.ic - ;; - srlz.i // guarantee that interruption collection is on - ;; - (p15) ssm psr.i // restore psr.i - addl r14=@gprel(ia64_leave_hypervisor_prepare),gp - ;; - KVM_SAVE_REST - KVM_SAVE_EXTRA - mov rp=r14 - ;; - adds out1=16,sp //regs - br.call.sptk.many b6=kvm_emulate -END(kvm_dispatch_virtualization_fault) - - -ENTRY(kvm_dispatch_interrupt) - KVM_SAVE_MIN_WITH_COVER_R19 // uses r31; defines r2 and r3 - ;; - alloc r14=ar.pfs,0,0,1,0 // must be first in an insn group - adds r3=8,r2 // set up second base pointer for SAVE_REST - ;; - ssm psr.ic - ;; - srlz.i - ;; - (p15) ssm psr.i - addl r14=@gprel(ia64_leave_hypervisor),gp - ;; - KVM_SAVE_REST - mov rp=r14 - ;; - mov out0=r13 // pass pointer to pt_regs as second arg - br.call.sptk.many b6=kvm_ia64_handle_irq -END(kvm_dispatch_interrupt) - -GLOBAL_ENTRY(ia64_leave_nested) - rsm psr.i - ;; - adds r21=PT(PR)+16,r12 - ;; - lfetch [r21],PT(CR_IPSR)-PT(PR) - adds r2=PT(B6)+16,r12 - adds r3=PT(R16)+16,r12 - ;; - lfetch [r21] - ld8 r28=[r2],8 // load b6 - adds r29=PT(R24)+16,r12 - - ld8.fill r16=[r3] - adds r3=PT(AR_CSD)-PT(R16),r3 - adds r30=PT(AR_CCV)+16,r12 - ;; - ld8.fill r24=[r29] - ld8 r15=[r30] // load ar.ccv - ;; - ld8 r29=[r2],16 // load b7 - ld8 r30=[r3],16 // load ar.csd - ;; - ld8 r31=[r2],16 // load ar.ssd - ld8.fill r8=[r3],16 - ;; - ld8.fill r9=[r2],16 - ld8.fill r10=[r3],PT(R17)-PT(R10) - ;; - ld8.fill r11=[r2],PT(R18)-PT(R11) - ld8.fill r17=[r3],16 - ;; - ld8.fill r18=[r2],16 - ld8.fill r19=[r3],16 - ;; - ld8.fill r20=[r2],16 - ld8.fill r21=[r3],16 - mov ar.csd=r30 - mov ar.ssd=r31 - ;; - rsm psr.i | psr.ic - // initiate turning off of interrupt and interruption collection - invala // invalidate ALAT - ;; - srlz.i - ;; - ld8.fill r22=[r2],24 - ld8.fill r23=[r3],24 - mov b6=r28 - ;; - ld8.fill r25=[r2],16 - ld8.fill r26=[r3],16 - mov b7=r29 - ;; - ld8.fill r27=[r2],16 - ld8.fill r28=[r3],16 - ;; - ld8.fill r29=[r2],16 - ld8.fill r30=[r3],24 - ;; - ld8.fill r31=[r2],PT(F9)-PT(R31) - adds r3=PT(F10)-PT(F6),r3 - ;; - ldf.fill f9=[r2],PT(F6)-PT(F9) - ldf.fill f10=[r3],PT(F8)-PT(F10) - ;; - ldf.fill f6=[r2],PT(F7)-PT(F6) - ;; - ldf.fill f7=[r2],PT(F11)-PT(F7) - ldf.fill f8=[r3],32 - ;; - srlz.i // ensure interruption collection is off - mov ar.ccv=r15 - ;; - bsw.0 // switch back to bank 0 (no stop bit required beforehand...) - ;; - ldf.fill f11=[r2] -// mov r18=r13 -// mov r21=r13 - adds r16=PT(CR_IPSR)+16,r12 - adds r17=PT(CR_IIP)+16,r12 - ;; - ld8 r29=[r16],16 // load cr.ipsr - ld8 r28=[r17],16 // load cr.iip - ;; - ld8 r30=[r16],16 // load cr.ifs - ld8 r25=[r17],16 // load ar.unat - ;; - ld8 r26=[r16],16 // load ar.pfs - ld8 r27=[r17],16 // load ar.rsc - cmp.eq p9,p0=r0,r0 - // set p9 to indicate that we should restore cr.ifs - ;; - ld8 r24=[r16],16 // load ar.rnat (may be garbage) - ld8 r23=[r17],16// load ar.bspstore (may be garbage) - ;; - ld8 r31=[r16],16 // load predicates - ld8 r22=[r17],16 // load b0 - ;; - ld8 r19=[r16],16 // load ar.rsc value for "loadrs" - ld8.fill r1=[r17],16 // load r1 - ;; - ld8.fill r12=[r16],16 - ld8.fill r13=[r17],16 - ;; - ld8 r20=[r16],16 // ar.fpsr - ld8.fill r15=[r17],16 - ;; - ld8.fill r14=[r16],16 - ld8.fill r2=[r17] - ;; - ld8.fill r3=[r16] - ;; - mov r16=ar.bsp // get existing backing store pointer - ;; - mov b0=r22 - mov ar.pfs=r26 - mov cr.ifs=r30 - mov cr.ipsr=r29 - mov ar.fpsr=r20 - mov cr.iip=r28 - ;; - mov ar.rsc=r27 - mov ar.unat=r25 - mov pr=r31,-1 - rfi -END(ia64_leave_nested) - -GLOBAL_ENTRY(ia64_leave_hypervisor_prepare) -/* - * work.need_resched etc. mustn't get changed - *by this CPU before it returns to - * user- or fsys-mode, hence we disable interrupts early on: - */ - adds r2 = PT(R4)+16,r12 - adds r3 = PT(R5)+16,r12 - adds r8 = PT(EML_UNAT)+16,r12 - ;; - ld8 r8 = [r8] - ;; - mov ar.unat=r8 - ;; - ld8.fill r4=[r2],16 //load r4 - ld8.fill r5=[r3],16 //load r5 - ;; - ld8.fill r6=[r2] //load r6 - ld8.fill r7=[r3] //load r7 - ;; -END(ia64_leave_hypervisor_prepare) -//fall through -GLOBAL_ENTRY(ia64_leave_hypervisor) - rsm psr.i - ;; - br.call.sptk.many b0=leave_hypervisor_tail - ;; - adds r20=PT(PR)+16,r12 - adds r8=PT(EML_UNAT)+16,r12 - ;; - ld8 r8=[r8] - ;; - mov ar.unat=r8 - ;; - lfetch [r20],PT(CR_IPSR)-PT(PR) - adds r2 = PT(B6)+16,r12 - adds r3 = PT(B7)+16,r12 - ;; - lfetch [r20] - ;; - ld8 r24=[r2],16 /* B6 */ - ld8 r25=[r3],16 /* B7 */ - ;; - ld8 r26=[r2],16 /* ar_csd */ - ld8 r27=[r3],16 /* ar_ssd */ - mov b6 = r24 - ;; - ld8.fill r8=[r2],16 - ld8.fill r9=[r3],16 - mov b7 = r25 - ;; - mov ar.csd = r26 - mov ar.ssd = r27 - ;; - ld8.fill r10=[r2],PT(R15)-PT(R10) - ld8.fill r11=[r3],PT(R14)-PT(R11) - ;; - ld8.fill r15=[r2],PT(R16)-PT(R15) - ld8.fill r14=[r3],PT(R17)-PT(R14) - ;; - ld8.fill r16=[r2],16 - ld8.fill r17=[r3],16 - ;; - ld8.fill r18=[r2],16 - ld8.fill r19=[r3],16 - ;; - ld8.fill r20=[r2],16 - ld8.fill r21=[r3],16 - ;; - ld8.fill r22=[r2],16 - ld8.fill r23=[r3],16 - ;; - ld8.fill r24=[r2],16 - ld8.fill r25=[r3],16 - ;; - ld8.fill r26=[r2],16 - ld8.fill r27=[r3],16 - ;; - ld8.fill r28=[r2],16 - ld8.fill r29=[r3],16 - ;; - ld8.fill r30=[r2],PT(F6)-PT(R30) - ld8.fill r31=[r3],PT(F7)-PT(R31) - ;; - rsm psr.i | psr.ic - // initiate turning off of interrupt and interruption collection - invala // invalidate ALAT - ;; - srlz.i // ensure interruption collection is off - ;; - bsw.0 - ;; - adds r16 = PT(CR_IPSR)+16,r12 - adds r17 = PT(CR_IIP)+16,r12 - mov r21=r13 // get current - ;; - ld8 r31=[r16],16 // load cr.ipsr - ld8 r30=[r17],16 // load cr.iip - ;; - ld8 r29=[r16],16 // load cr.ifs - ld8 r28=[r17],16 // load ar.unat - ;; - ld8 r27=[r16],16 // load ar.pfs - ld8 r26=[r17],16 // load ar.rsc - ;; - ld8 r25=[r16],16 // load ar.rnat - ld8 r24=[r17],16 // load ar.bspstore - ;; - ld8 r23=[r16],16 // load predicates - ld8 r22=[r17],16 // load b0 - ;; - ld8 r20=[r16],16 // load ar.rsc value for "loadrs" - ld8.fill r1=[r17],16 //load r1 - ;; - ld8.fill r12=[r16],16 //load r12 - ld8.fill r13=[r17],PT(R2)-PT(R13) //load r13 - ;; - ld8 r19=[r16],PT(R3)-PT(AR_FPSR) //load ar_fpsr - ld8.fill r2=[r17],PT(AR_CCV)-PT(R2) //load r2 - ;; - ld8.fill r3=[r16] //load r3 - ld8 r18=[r17] //load ar_ccv - ;; - mov ar.fpsr=r19 - mov ar.ccv=r18 - shr.u r18=r20,16 - ;; -kvm_rbs_switch: - mov r19=96 - -kvm_dont_preserve_current_frame: -/* - * To prevent leaking bits between the hypervisor and guest domain, - * we must clear the stacked registers in the "invalid" partition here. - * 5 registers/cycle on McKinley). - */ -# define pRecurse p6 -# define pReturn p7 -# define Nregs 14 - - alloc loc0=ar.pfs,2,Nregs-2,2,0 - shr.u loc1=r18,9 // RNaTslots <= floor(dirtySize / (64*8)) - sub r19=r19,r18 // r19 = (physStackedSize + 8) - dirtySize - ;; - mov ar.rsc=r20 // load ar.rsc to be used for "loadrs" - shladd in0=loc1,3,r19 - mov in1=0 - ;; - TEXT_ALIGN(32) -kvm_rse_clear_invalid: - alloc loc0=ar.pfs,2,Nregs-2,2,0 - cmp.lt pRecurse,p0=Nregs*8,in0 - // if more than Nregs regs left to clear, (re)curse - add out0=-Nregs*8,in0 - add out1=1,in1 // increment recursion count - mov loc1=0 - mov loc2=0 - ;; - mov loc3=0 - mov loc4=0 - mov loc5=0 - mov loc6=0 - mov loc7=0 -(pRecurse) br.call.dptk.few b0=kvm_rse_clear_invalid - ;; - mov loc8=0 - mov loc9=0 - cmp.ne pReturn,p0=r0,in1 - // if recursion count != 0, we need to do a br.ret - mov loc10=0 - mov loc11=0 -(pReturn) br.ret.dptk.many b0 - -# undef pRecurse -# undef pReturn - -// loadrs has already been shifted - alloc r16=ar.pfs,0,0,0,0 // drop current register frame - ;; - loadrs - ;; - mov ar.bspstore=r24 - ;; - mov ar.unat=r28 - mov ar.rnat=r25 - mov ar.rsc=r26 - ;; - mov cr.ipsr=r31 - mov cr.iip=r30 - mov cr.ifs=r29 - mov ar.pfs=r27 - adds r18=VMM_VPD_BASE_OFFSET,r21 - ;; - ld8 r18=[r18] //vpd - adds r17=VMM_VCPU_ISR_OFFSET,r21 - ;; - ld8 r17=[r17] - adds r19=VMM_VPD_VPSR_OFFSET,r18 - ;; - ld8 r19=[r19] //vpsr - mov r25=r18 - adds r16= VMM_VCPU_GP_OFFSET,r21 - ;; - ld8 r16= [r16] // Put gp in r24 - movl r24=@gprel(ia64_vmm_entry) // calculate return address - ;; - add r24=r24,r16 - ;; - br.sptk.many kvm_vps_sync_write // call the service - ;; -END(ia64_leave_hypervisor) -// fall through -GLOBAL_ENTRY(ia64_vmm_entry) -/* - * must be at bank 0 - * parameter: - * r17:cr.isr - * r18:vpd - * r19:vpsr - * r22:b0 - * r23:predicate - */ - mov r24=r22 - mov r25=r18 - tbit.nz p1,p2 = r19,IA64_PSR_IC_BIT // p1=vpsr.ic -(p1) br.cond.sptk.few kvm_vps_resume_normal -(p2) br.cond.sptk.many kvm_vps_resume_handler - ;; -END(ia64_vmm_entry) - -/* - * extern u64 ia64_call_vsa(u64 proc, u64 arg1, u64 arg2, - * u64 arg3, u64 arg4, u64 arg5, - * u64 arg6, u64 arg7); - * - * XXX: The currently defined services use only 4 args at the max. The - * rest are not consumed. - */ -GLOBAL_ENTRY(ia64_call_vsa) - .regstk 4,4,0,0 - -rpsave = loc0 -pfssave = loc1 -psrsave = loc2 -entry = loc3 -hostret = r24 - - alloc pfssave=ar.pfs,4,4,0,0 - mov rpsave=rp - adds entry=VMM_VCPU_VSA_BASE_OFFSET, r13 - ;; - ld8 entry=[entry] -1: mov hostret=ip - mov r25=in1 // copy arguments - mov r26=in2 - mov r27=in3 - mov psrsave=psr - ;; - tbit.nz p6,p0=psrsave,14 // IA64_PSR_I - tbit.nz p7,p0=psrsave,13 // IA64_PSR_IC - ;; - add hostret=2f-1b,hostret // calculate return address - add entry=entry,in0 - ;; - rsm psr.i | psr.ic - ;; - srlz.i - mov b6=entry - br.cond.sptk b6 // call the service -2: -// Architectural sequence for enabling interrupts if necessary -(p7) ssm psr.ic - ;; -(p7) srlz.i - ;; -(p6) ssm psr.i - ;; - mov rp=rpsave - mov ar.pfs=pfssave - mov r8=r31 - ;; - srlz.d - br.ret.sptk rp - -END(ia64_call_vsa) - -#define INIT_BSPSTORE ((4<<30)-(12<<20)-0x100) - -GLOBAL_ENTRY(vmm_reset_entry) - //set up ipsr, iip, vpd.vpsr, dcr - // For IPSR: it/dt/rt=1, i/ic=1, si=1, vm/bn=1 - // For DCR: all bits 0 - bsw.0 - ;; - mov r21 =r13 - adds r14=-VMM_PT_REGS_SIZE, r12 - ;; - movl r6=0x501008826000 // IPSR dt/rt/it:1;i/ic:1, si:1, vm/bn:1 - movl r10=0x8000000000000000 - adds r16=PT(CR_IIP), r14 - adds r20=PT(R1), r14 - ;; - rsm psr.ic | psr.i - ;; - srlz.i - ;; - mov ar.rsc = 0 - ;; - flushrs - ;; - mov ar.bspstore = 0 - // clear BSPSTORE - ;; - mov cr.ipsr=r6 - mov cr.ifs=r10 - ld8 r4 = [r16] // Set init iip for first run. - ld8 r1 = [r20] - ;; - mov cr.iip=r4 - adds r16=VMM_VPD_BASE_OFFSET,r13 - ;; - ld8 r18=[r16] - ;; - adds r19=VMM_VPD_VPSR_OFFSET,r18 - ;; - ld8 r19=[r19] - mov r17=r0 - mov r22=r0 - mov r23=r0 - br.cond.sptk ia64_vmm_entry - br.ret.sptk b0 -END(vmm_reset_entry) diff --git a/arch/ia64/kvm/vti.h b/arch/ia64/kvm/vti.h deleted file mode 100644 index b214b5b0432..00000000000 --- a/arch/ia64/kvm/vti.h +++ /dev/null @@ -1,290 +0,0 @@ -/* - * vti.h: prototype for generial vt related interface - * Copyright (c) 2004, Intel Corporation. - * - * Xuefei Xu (Anthony Xu) (anthony.xu@intel.com) - * Fred Yang (fred.yang@intel.com) - * Kun Tian (Kevin Tian) (kevin.tian@intel.com) - * - * Copyright (c) 2007, Intel Corporation. - * Zhang xiantao <xiantao.zhang@intel.com> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple - * Place - Suite 330, Boston, MA 02111-1307 USA. - */ -#ifndef _KVM_VT_I_H -#define _KVM_VT_I_H - -#ifndef __ASSEMBLY__ -#include <asm/page.h> - -#include <linux/kvm_host.h> - -/* define itr.i and itr.d in ia64_itr function */ -#define ITR 0x01 -#define DTR 0x02 -#define IaDTR 0x03 - -#define IA64_TR_VMM 6 /*itr6, dtr6 : maps vmm code, vmbuffer*/ -#define IA64_TR_VM_DATA 7 /*dtr7 : maps current vm data*/ - -#define RR6 (6UL<<61) -#define RR7 (7UL<<61) - - -/* config_options in pal_vp_init_env */ -#define VP_INITIALIZE 1UL -#define VP_FR_PMC 1UL<<1 -#define VP_OPCODE 1UL<<8 -#define VP_CAUSE 1UL<<9 -#define VP_FW_ACC 1UL<<63 - -/* init vp env with initializing vm_buffer */ -#define VP_INIT_ENV_INITALIZE (VP_INITIALIZE | VP_FR_PMC |\ - VP_OPCODE | VP_CAUSE | VP_FW_ACC) -/* init vp env without initializing vm_buffer */ -#define VP_INIT_ENV VP_FR_PMC | VP_OPCODE | VP_CAUSE | VP_FW_ACC - -#define PAL_VP_CREATE 265 -/* Stacked Virt. Initializes a new VPD for the operation of - * a new virtual processor in the virtual environment. - */ -#define PAL_VP_ENV_INFO 266 -/*Stacked Virt. Returns the parameters needed to enter a virtual environment.*/ -#define PAL_VP_EXIT_ENV 267 -/*Stacked Virt. Allows a logical processor to exit a virtual environment.*/ -#define PAL_VP_INIT_ENV 268 -/*Stacked Virt. Allows a logical processor to enter a virtual environment.*/ -#define PAL_VP_REGISTER 269 -/*Stacked Virt. Register a different host IVT for the virtual processor.*/ -#define PAL_VP_RESUME 270 -/* Renamed from PAL_VP_RESUME */ -#define PAL_VP_RESTORE 270 -/*Stacked Virt. Resumes virtual processor operation on the logical processor.*/ -#define PAL_VP_SUSPEND 271 -/* Renamed from PAL_VP_SUSPEND */ -#define PAL_VP_SAVE 271 -/* Stacked Virt. Suspends operation for the specified virtual processor on - * the logical processor. - */ -#define PAL_VP_TERMINATE 272 -/* Stacked Virt. Terminates operation for the specified virtual processor.*/ - -union vac { - unsigned long value; - struct { - unsigned int a_int:1; - unsigned int a_from_int_cr:1; - unsigned int a_to_int_cr:1; - unsigned int a_from_psr:1; - unsigned int a_from_cpuid:1; - unsigned int a_cover:1; - unsigned int a_bsw:1; - long reserved:57; - }; -}; - -union vdc { - unsigned long value; - struct { - unsigned int d_vmsw:1; - unsigned int d_extint:1; - unsigned int d_ibr_dbr:1; - unsigned int d_pmc:1; - unsigned int d_to_pmd:1; - unsigned int d_itm:1; - long reserved:58; - }; -}; - -struct vpd { - union vac vac; - union vdc vdc; - unsigned long virt_env_vaddr; - unsigned long reserved1[29]; - unsigned long vhpi; - unsigned long reserved2[95]; - unsigned long vgr[16]; - unsigned long vbgr[16]; - unsigned long vnat; - unsigned long vbnat; - unsigned long vcpuid[5]; - unsigned long reserved3[11]; - unsigned long vpsr; - unsigned long vpr; - unsigned long reserved4[76]; - union { - unsigned long vcr[128]; - struct { - unsigned long dcr; - unsigned long itm; - unsigned long iva; - unsigned long rsv1[5]; - unsigned long pta; - unsigned long rsv2[7]; - unsigned long ipsr; - unsigned long isr; - unsigned long rsv3; - unsigned long iip; - unsigned long ifa; - unsigned long itir; - unsigned long iipa; - unsigned long ifs; - unsigned long iim; - unsigned long iha; - unsigned long rsv4[38]; - unsigned long lid; - unsigned long ivr; - unsigned long tpr; - unsigned long eoi; - unsigned long irr[4]; - unsigned long itv; - unsigned long pmv; - unsigned long cmcv; - unsigned long rsv5[5]; - unsigned long lrr0; - unsigned long lrr1; - unsigned long rsv6[46]; - }; - }; - unsigned long reserved5[128]; - unsigned long reserved6[3456]; - unsigned long vmm_avail[128]; - unsigned long reserved7[4096]; -}; - -#define PAL_PROC_VM_BIT (1UL << 40) -#define PAL_PROC_VMSW_BIT (1UL << 54) - -static inline s64 ia64_pal_vp_env_info(u64 *buffer_size, - u64 *vp_env_info) -{ - struct ia64_pal_retval iprv; - PAL_CALL_STK(iprv, PAL_VP_ENV_INFO, 0, 0, 0); - *buffer_size = iprv.v0; - *vp_env_info = iprv.v1; - return iprv.status; -} - -static inline s64 ia64_pal_vp_exit_env(u64 iva) -{ - struct ia64_pal_retval iprv; - - PAL_CALL_STK(iprv, PAL_VP_EXIT_ENV, (u64)iva, 0, 0); - return iprv.status; -} - -static inline s64 ia64_pal_vp_init_env(u64 config_options, u64 pbase_addr, - u64 vbase_addr, u64 *vsa_base) -{ - struct ia64_pal_retval iprv; - - PAL_CALL_STK(iprv, PAL_VP_INIT_ENV, config_options, pbase_addr, - vbase_addr); - *vsa_base = iprv.v0; - - return iprv.status; -} - -static inline s64 ia64_pal_vp_restore(u64 *vpd, u64 pal_proc_vector) -{ - struct ia64_pal_retval iprv; - - PAL_CALL_STK(iprv, PAL_VP_RESTORE, (u64)vpd, pal_proc_vector, 0); - - return iprv.status; -} - -static inline s64 ia64_pal_vp_save(u64 *vpd, u64 pal_proc_vector) -{ - struct ia64_pal_retval iprv; - - PAL_CALL_STK(iprv, PAL_VP_SAVE, (u64)vpd, pal_proc_vector, 0); - - return iprv.status; -} - -#endif - -/*VPD field offset*/ -#define VPD_VAC_START_OFFSET 0 -#define VPD_VDC_START_OFFSET 8 -#define VPD_VHPI_START_OFFSET 256 -#define VPD_VGR_START_OFFSET 1024 -#define VPD_VBGR_START_OFFSET 1152 -#define VPD_VNAT_START_OFFSET 1280 -#define VPD_VBNAT_START_OFFSET 1288 -#define VPD_VCPUID_START_OFFSET 1296 -#define VPD_VPSR_START_OFFSET 1424 -#define VPD_VPR_START_OFFSET 1432 -#define VPD_VRSE_CFLE_START_OFFSET 1440 -#define VPD_VCR_START_OFFSET 2048 -#define VPD_VTPR_START_OFFSET 2576 -#define VPD_VRR_START_OFFSET 3072 -#define VPD_VMM_VAIL_START_OFFSET 31744 - -/*Virtualization faults*/ - -#define EVENT_MOV_TO_AR 1 -#define EVENT_MOV_TO_AR_IMM 2 -#define EVENT_MOV_FROM_AR 3 -#define EVENT_MOV_TO_CR 4 -#define EVENT_MOV_FROM_CR 5 -#define EVENT_MOV_TO_PSR 6 -#define EVENT_MOV_FROM_PSR 7 -#define EVENT_ITC_D 8 -#define EVENT_ITC_I 9 -#define EVENT_MOV_TO_RR 10 -#define EVENT_MOV_TO_DBR 11 -#define EVENT_MOV_TO_IBR 12 -#define EVENT_MOV_TO_PKR 13 -#define EVENT_MOV_TO_PMC 14 -#define EVENT_MOV_TO_PMD 15 -#define EVENT_ITR_D 16 -#define EVENT_ITR_I 17 -#define EVENT_MOV_FROM_RR 18 -#define EVENT_MOV_FROM_DBR 19 -#define EVENT_MOV_FROM_IBR 20 -#define EVENT_MOV_FROM_PKR 21 -#define EVENT_MOV_FROM_PMC 22 -#define EVENT_MOV_FROM_CPUID 23 -#define EVENT_SSM 24 -#define EVENT_RSM 25 -#define EVENT_PTC_L 26 -#define EVENT_PTC_G 27 -#define EVENT_PTC_GA 28 -#define EVENT_PTR_D 29 -#define EVENT_PTR_I 30 -#define EVENT_THASH 31 -#define EVENT_TTAG 32 -#define EVENT_TPA 33 -#define EVENT_TAK 34 -#define EVENT_PTC_E 35 -#define EVENT_COVER 36 -#define EVENT_RFI 37 -#define EVENT_BSW_0 38 -#define EVENT_BSW_1 39 -#define EVENT_VMSW 40 - -/**PAL virtual services offsets */ -#define PAL_VPS_RESUME_NORMAL 0x0000 -#define PAL_VPS_RESUME_HANDLER 0x0400 -#define PAL_VPS_SYNC_READ 0x0800 -#define PAL_VPS_SYNC_WRITE 0x0c00 -#define PAL_VPS_SET_PENDING_INTERRUPT 0x1000 -#define PAL_VPS_THASH 0x1400 -#define PAL_VPS_TTAG 0x1800 -#define PAL_VPS_RESTORE 0x1c00 -#define PAL_VPS_SAVE 0x2000 - -#endif/* _VT_I_H*/ diff --git a/arch/ia64/kvm/vtlb.c b/arch/ia64/kvm/vtlb.c deleted file mode 100644 index a7869f8f49a..00000000000 --- a/arch/ia64/kvm/vtlb.c +++ /dev/null @@ -1,640 +0,0 @@ -/* - * vtlb.c: guest virtual tlb handling module. - * Copyright (c) 2004, Intel Corporation. - * Yaozu Dong (Eddie Dong) <Eddie.dong@intel.com> - * Xuefei Xu (Anthony Xu) <anthony.xu@intel.com> - * - * Copyright (c) 2007, Intel Corporation. - * Xuefei Xu (Anthony Xu) <anthony.xu@intel.com> - * Xiantao Zhang <xiantao.zhang@intel.com> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple - * Place - Suite 330, Boston, MA 02111-1307 USA. - * - */ - -#include "vcpu.h" - -#include <linux/rwsem.h> - -#include <asm/tlb.h> - -/* - * Check to see if the address rid:va is translated by the TLB - */ - -static int __is_tr_translated(struct thash_data *trp, u64 rid, u64 va) -{ - return ((trp->p) && (trp->rid == rid) - && ((va-trp->vadr) < PSIZE(trp->ps))); -} - -/* - * Only for GUEST TR format. - */ -static int __is_tr_overlap(struct thash_data *trp, u64 rid, u64 sva, u64 eva) -{ - u64 sa1, ea1; - - if (!trp->p || trp->rid != rid) - return 0; - - sa1 = trp->vadr; - ea1 = sa1 + PSIZE(trp->ps) - 1; - eva -= 1; - if ((sva > ea1) || (sa1 > eva)) - return 0; - else - return 1; - -} - -void machine_tlb_purge(u64 va, u64 ps) -{ - ia64_ptcl(va, ps << 2); -} - -void local_flush_tlb_all(void) -{ - int i, j; - unsigned long flags, count0, count1; - unsigned long stride0, stride1, addr; - - addr = current_vcpu->arch.ptce_base; - count0 = current_vcpu->arch.ptce_count[0]; - count1 = current_vcpu->arch.ptce_count[1]; - stride0 = current_vcpu->arch.ptce_stride[0]; - stride1 = current_vcpu->arch.ptce_stride[1]; - - local_irq_save(flags); - for (i = 0; i < count0; ++i) { - for (j = 0; j < count1; ++j) { - ia64_ptce(addr); - addr += stride1; - } - addr += stride0; - } - local_irq_restore(flags); - ia64_srlz_i(); /* srlz.i implies srlz.d */ -} - -int vhpt_enabled(struct kvm_vcpu *vcpu, u64 vadr, enum vhpt_ref ref) -{ - union ia64_rr vrr; - union ia64_pta vpta; - struct ia64_psr vpsr; - - vpsr = *(struct ia64_psr *)&VCPU(vcpu, vpsr); - vrr.val = vcpu_get_rr(vcpu, vadr); - vpta.val = vcpu_get_pta(vcpu); - - if (vrr.ve & vpta.ve) { - switch (ref) { - case DATA_REF: - case NA_REF: - return vpsr.dt; - case INST_REF: - return vpsr.dt && vpsr.it && vpsr.ic; - case RSE_REF: - return vpsr.dt && vpsr.rt; - - } - } - return 0; -} - -struct thash_data *vsa_thash(union ia64_pta vpta, u64 va, u64 vrr, u64 *tag) -{ - u64 index, pfn, rid, pfn_bits; - - pfn_bits = vpta.size - 5 - 8; - pfn = REGION_OFFSET(va) >> _REGION_PAGE_SIZE(vrr); - rid = _REGION_ID(vrr); - index = ((rid & 0xff) << pfn_bits)|(pfn & ((1UL << pfn_bits) - 1)); - *tag = ((rid >> 8) & 0xffff) | ((pfn >> pfn_bits) << 16); - - return (struct thash_data *)((vpta.base << PTA_BASE_SHIFT) + - (index << 5)); -} - -struct thash_data *__vtr_lookup(struct kvm_vcpu *vcpu, u64 va, int type) -{ - - struct thash_data *trp; - int i; - u64 rid; - - rid = vcpu_get_rr(vcpu, va); - rid = rid & RR_RID_MASK; - if (type == D_TLB) { - if (vcpu_quick_region_check(vcpu->arch.dtr_regions, va)) { - for (trp = (struct thash_data *)&vcpu->arch.dtrs, i = 0; - i < NDTRS; i++, trp++) { - if (__is_tr_translated(trp, rid, va)) - return trp; - } - } - } else { - if (vcpu_quick_region_check(vcpu->arch.itr_regions, va)) { - for (trp = (struct thash_data *)&vcpu->arch.itrs, i = 0; - i < NITRS; i++, trp++) { - if (__is_tr_translated(trp, rid, va)) - return trp; - } - } - } - - return NULL; -} - -static void vhpt_insert(u64 pte, u64 itir, u64 ifa, u64 gpte) -{ - union ia64_rr rr; - struct thash_data *head; - unsigned long ps, gpaddr; - - ps = itir_ps(itir); - rr.val = ia64_get_rr(ifa); - - gpaddr = ((gpte & _PAGE_PPN_MASK) >> ps << ps) | - (ifa & ((1UL << ps) - 1)); - - head = (struct thash_data *)ia64_thash(ifa); - head->etag = INVALID_TI_TAG; - ia64_mf(); - head->page_flags = pte & ~PAGE_FLAGS_RV_MASK; - head->itir = rr.ps << 2; - head->etag = ia64_ttag(ifa); - head->gpaddr = gpaddr; -} - -void mark_pages_dirty(struct kvm_vcpu *v, u64 pte, u64 ps) -{ - u64 i, dirty_pages = 1; - u64 base_gfn = (pte&_PAGE_PPN_MASK) >> PAGE_SHIFT; - vmm_spinlock_t *lock = __kvm_va(v->arch.dirty_log_lock_pa); - void *dirty_bitmap = (void *)KVM_MEM_DIRTY_LOG_BASE; - - dirty_pages <<= ps <= PAGE_SHIFT ? 0 : ps - PAGE_SHIFT; - - vmm_spin_lock(lock); - for (i = 0; i < dirty_pages; i++) { - /* avoid RMW */ - if (!test_bit(base_gfn + i, dirty_bitmap)) - set_bit(base_gfn + i , dirty_bitmap); - } - vmm_spin_unlock(lock); -} - -void thash_vhpt_insert(struct kvm_vcpu *v, u64 pte, u64 itir, u64 va, int type) -{ - u64 phy_pte, psr; - union ia64_rr mrr; - - mrr.val = ia64_get_rr(va); - phy_pte = translate_phy_pte(&pte, itir, va); - - if (itir_ps(itir) >= mrr.ps) { - vhpt_insert(phy_pte, itir, va, pte); - } else { - phy_pte &= ~PAGE_FLAGS_RV_MASK; - psr = ia64_clear_ic(); - ia64_itc(type, va, phy_pte, itir_ps(itir)); - paravirt_dv_serialize_data(); - ia64_set_psr(psr); - } - - if (!(pte&VTLB_PTE_IO)) - mark_pages_dirty(v, pte, itir_ps(itir)); -} - -/* - * vhpt lookup - */ -struct thash_data *vhpt_lookup(u64 va) -{ - struct thash_data *head; - u64 tag; - - head = (struct thash_data *)ia64_thash(va); - tag = ia64_ttag(va); - if (head->etag == tag) - return head; - return NULL; -} - -u64 guest_vhpt_lookup(u64 iha, u64 *pte) -{ - u64 ret; - struct thash_data *data; - - data = __vtr_lookup(current_vcpu, iha, D_TLB); - if (data != NULL) - thash_vhpt_insert(current_vcpu, data->page_flags, - data->itir, iha, D_TLB); - - asm volatile ("rsm psr.ic|psr.i;;" - "srlz.d;;" - "ld8.s r9=[%1];;" - "tnat.nz p6,p7=r9;;" - "(p6) mov %0=1;" - "(p6) mov r9=r0;" - "(p7) extr.u r9=r9,0,53;;" - "(p7) mov %0=r0;" - "(p7) st8 [%2]=r9;;" - "ssm psr.ic;;" - "srlz.d;;" - "ssm psr.i;;" - "srlz.d;;" - : "=&r"(ret) : "r"(iha), "r"(pte) : "memory"); - - return ret; -} - -/* - * purge software guest tlb - */ - -static void vtlb_purge(struct kvm_vcpu *v, u64 va, u64 ps) -{ - struct thash_data *cur; - u64 start, curadr, size, psbits, tag, rr_ps, num; - union ia64_rr vrr; - struct thash_cb *hcb = &v->arch.vtlb; - - vrr.val = vcpu_get_rr(v, va); - psbits = VMX(v, psbits[(va >> 61)]); - start = va & ~((1UL << ps) - 1); - while (psbits) { - curadr = start; - rr_ps = __ffs(psbits); - psbits &= ~(1UL << rr_ps); - num = 1UL << ((ps < rr_ps) ? 0 : (ps - rr_ps)); - size = PSIZE(rr_ps); - vrr.ps = rr_ps; - while (num) { - cur = vsa_thash(hcb->pta, curadr, vrr.val, &tag); - if (cur->etag == tag && cur->ps == rr_ps) - cur->etag = INVALID_TI_TAG; - curadr += size; - num--; - } - } -} - - -/* - * purge VHPT and machine TLB - */ -static void vhpt_purge(struct kvm_vcpu *v, u64 va, u64 ps) -{ - struct thash_data *cur; - u64 start, size, tag, num; - union ia64_rr rr; - - start = va & ~((1UL << ps) - 1); - rr.val = ia64_get_rr(va); - size = PSIZE(rr.ps); - num = 1UL << ((ps < rr.ps) ? 0 : (ps - rr.ps)); - while (num) { - cur = (struct thash_data *)ia64_thash(start); - tag = ia64_ttag(start); - if (cur->etag == tag) - cur->etag = INVALID_TI_TAG; - start += size; - num--; - } - machine_tlb_purge(va, ps); -} - -/* - * Insert an entry into hash TLB or VHPT. - * NOTES: - * 1: When inserting VHPT to thash, "va" is a must covered - * address by the inserted machine VHPT entry. - * 2: The format of entry is always in TLB. - * 3: The caller need to make sure the new entry will not overlap - * with any existed entry. - */ -void vtlb_insert(struct kvm_vcpu *v, u64 pte, u64 itir, u64 va) -{ - struct thash_data *head; - union ia64_rr vrr; - u64 tag; - struct thash_cb *hcb = &v->arch.vtlb; - - vrr.val = vcpu_get_rr(v, va); - vrr.ps = itir_ps(itir); - VMX(v, psbits[va >> 61]) |= (1UL << vrr.ps); - head = vsa_thash(hcb->pta, va, vrr.val, &tag); - head->page_flags = pte; - head->itir = itir; - head->etag = tag; -} - -int vtr_find_overlap(struct kvm_vcpu *vcpu, u64 va, u64 ps, int type) -{ - struct thash_data *trp; - int i; - u64 end, rid; - - rid = vcpu_get_rr(vcpu, va); - rid = rid & RR_RID_MASK; - end = va + PSIZE(ps); - if (type == D_TLB) { - if (vcpu_quick_region_check(vcpu->arch.dtr_regions, va)) { - for (trp = (struct thash_data *)&vcpu->arch.dtrs, i = 0; - i < NDTRS; i++, trp++) { - if (__is_tr_overlap(trp, rid, va, end)) - return i; - } - } - } else { - if (vcpu_quick_region_check(vcpu->arch.itr_regions, va)) { - for (trp = (struct thash_data *)&vcpu->arch.itrs, i = 0; - i < NITRS; i++, trp++) { - if (__is_tr_overlap(trp, rid, va, end)) - return i; - } - } - } - return -1; -} - -/* - * Purge entries in VTLB and VHPT - */ -void thash_purge_entries(struct kvm_vcpu *v, u64 va, u64 ps) -{ - if (vcpu_quick_region_check(v->arch.tc_regions, va)) - vtlb_purge(v, va, ps); - vhpt_purge(v, va, ps); -} - -void thash_purge_entries_remote(struct kvm_vcpu *v, u64 va, u64 ps) -{ - u64 old_va = va; - va = REGION_OFFSET(va); - if (vcpu_quick_region_check(v->arch.tc_regions, old_va)) - vtlb_purge(v, va, ps); - vhpt_purge(v, va, ps); -} - -u64 translate_phy_pte(u64 *pte, u64 itir, u64 va) -{ - u64 ps, ps_mask, paddr, maddr, io_mask; - union pte_flags phy_pte; - - ps = itir_ps(itir); - ps_mask = ~((1UL << ps) - 1); - phy_pte.val = *pte; - paddr = *pte; - paddr = ((paddr & _PAGE_PPN_MASK) & ps_mask) | (va & ~ps_mask); - maddr = kvm_get_mpt_entry(paddr >> PAGE_SHIFT); - io_mask = maddr & GPFN_IO_MASK; - if (io_mask && (io_mask != GPFN_PHYS_MMIO)) { - *pte |= VTLB_PTE_IO; - return -1; - } - maddr = ((maddr & _PAGE_PPN_MASK) & PAGE_MASK) | - (paddr & ~PAGE_MASK); - phy_pte.ppn = maddr >> ARCH_PAGE_SHIFT; - return phy_pte.val; -} - -/* - * Purge overlap TCs and then insert the new entry to emulate itc ops. - * Notes: Only TC entry can purge and insert. - */ -void thash_purge_and_insert(struct kvm_vcpu *v, u64 pte, u64 itir, - u64 ifa, int type) -{ - u64 ps; - u64 phy_pte, io_mask, index; - union ia64_rr vrr, mrr; - - ps = itir_ps(itir); - vrr.val = vcpu_get_rr(v, ifa); - mrr.val = ia64_get_rr(ifa); - - index = (pte & _PAGE_PPN_MASK) >> PAGE_SHIFT; - io_mask = kvm_get_mpt_entry(index) & GPFN_IO_MASK; - phy_pte = translate_phy_pte(&pte, itir, ifa); - - /* Ensure WB attribute if pte is related to a normal mem page, - * which is required by vga acceleration since qemu maps shared - * vram buffer with WB. - */ - if (!(pte & VTLB_PTE_IO) && ((pte & _PAGE_MA_MASK) != _PAGE_MA_NAT) && - io_mask != GPFN_PHYS_MMIO) { - pte &= ~_PAGE_MA_MASK; - phy_pte &= ~_PAGE_MA_MASK; - } - - vtlb_purge(v, ifa, ps); - vhpt_purge(v, ifa, ps); - - if ((ps != mrr.ps) || (pte & VTLB_PTE_IO)) { - vtlb_insert(v, pte, itir, ifa); - vcpu_quick_region_set(VMX(v, tc_regions), ifa); - } - if (pte & VTLB_PTE_IO) - return; - - if (ps >= mrr.ps) - vhpt_insert(phy_pte, itir, ifa, pte); - else { - u64 psr; - phy_pte &= ~PAGE_FLAGS_RV_MASK; - psr = ia64_clear_ic(); - ia64_itc(type, ifa, phy_pte, ps); - paravirt_dv_serialize_data(); - ia64_set_psr(psr); - } - if (!(pte&VTLB_PTE_IO)) - mark_pages_dirty(v, pte, ps); - -} - -/* - * Purge all TCs or VHPT entries including those in Hash table. - * - */ - -void thash_purge_all(struct kvm_vcpu *v) -{ - int i; - struct thash_data *head; - struct thash_cb *vtlb, *vhpt; - vtlb = &v->arch.vtlb; - vhpt = &v->arch.vhpt; - - for (i = 0; i < 8; i++) - VMX(v, psbits[i]) = 0; - - head = vtlb->hash; - for (i = 0; i < vtlb->num; i++) { - head->page_flags = 0; - head->etag = INVALID_TI_TAG; - head->itir = 0; - head->next = 0; - head++; - }; - - head = vhpt->hash; - for (i = 0; i < vhpt->num; i++) { - head->page_flags = 0; - head->etag = INVALID_TI_TAG; - head->itir = 0; - head->next = 0; - head++; - }; - - local_flush_tlb_all(); -} - -/* - * Lookup the hash table and its collision chain to find an entry - * covering this address rid:va or the entry. - * - * INPUT: - * in: TLB format for both VHPT & TLB. - */ -struct thash_data *vtlb_lookup(struct kvm_vcpu *v, u64 va, int is_data) -{ - struct thash_data *cch; - u64 psbits, ps, tag; - union ia64_rr vrr; - - struct thash_cb *hcb = &v->arch.vtlb; - - cch = __vtr_lookup(v, va, is_data); - if (cch) - return cch; - - if (vcpu_quick_region_check(v->arch.tc_regions, va) == 0) - return NULL; - - psbits = VMX(v, psbits[(va >> 61)]); - vrr.val = vcpu_get_rr(v, va); - while (psbits) { - ps = __ffs(psbits); - psbits &= ~(1UL << ps); - vrr.ps = ps; - cch = vsa_thash(hcb->pta, va, vrr.val, &tag); - if (cch->etag == tag && cch->ps == ps) - return cch; - } - - return NULL; -} - -/* - * Initialize internal control data before service. - */ -void thash_init(struct thash_cb *hcb, u64 sz) -{ - int i; - struct thash_data *head; - - hcb->pta.val = (unsigned long)hcb->hash; - hcb->pta.vf = 1; - hcb->pta.ve = 1; - hcb->pta.size = sz; - head = hcb->hash; - for (i = 0; i < hcb->num; i++) { - head->page_flags = 0; - head->itir = 0; - head->etag = INVALID_TI_TAG; - head->next = 0; - head++; - } -} - -u64 kvm_get_mpt_entry(u64 gpfn) -{ - u64 *base = (u64 *) KVM_P2M_BASE; - - if (gpfn >= (KVM_P2M_SIZE >> 3)) - panic_vm(current_vcpu, "Invalid gpfn =%lx\n", gpfn); - - return *(base + gpfn); -} - -u64 kvm_lookup_mpa(u64 gpfn) -{ - u64 maddr; - maddr = kvm_get_mpt_entry(gpfn); - return maddr&_PAGE_PPN_MASK; -} - -u64 kvm_gpa_to_mpa(u64 gpa) -{ - u64 pte = kvm_lookup_mpa(gpa >> PAGE_SHIFT); - return (pte >> PAGE_SHIFT << PAGE_SHIFT) | (gpa & ~PAGE_MASK); -} - -/* - * Fetch guest bundle code. - * INPUT: - * gip: guest ip - * pbundle: used to return fetched bundle. - */ -int fetch_code(struct kvm_vcpu *vcpu, u64 gip, IA64_BUNDLE *pbundle) -{ - u64 gpip = 0; /* guest physical IP*/ - u64 *vpa; - struct thash_data *tlb; - u64 maddr; - - if (!(VCPU(vcpu, vpsr) & IA64_PSR_IT)) { - /* I-side physical mode */ - gpip = gip; - } else { - tlb = vtlb_lookup(vcpu, gip, I_TLB); - if (tlb) - gpip = (tlb->ppn >> (tlb->ps - 12) << tlb->ps) | - (gip & (PSIZE(tlb->ps) - 1)); - } - if (gpip) { - maddr = kvm_gpa_to_mpa(gpip); - } else { - tlb = vhpt_lookup(gip); - if (tlb == NULL) { - ia64_ptcl(gip, ARCH_PAGE_SHIFT << 2); - return IA64_FAULT; - } - maddr = (tlb->ppn >> (tlb->ps - 12) << tlb->ps) - | (gip & (PSIZE(tlb->ps) - 1)); - } - vpa = (u64 *)__kvm_va(maddr); - - pbundle->i64[0] = *vpa++; - pbundle->i64[1] = *vpa; - - return IA64_NO_FAULT; -} - -void kvm_init_vhpt(struct kvm_vcpu *v) -{ - v->arch.vhpt.num = VHPT_NUM_ENTRIES; - thash_init(&v->arch.vhpt, VHPT_SHIFT); - ia64_set_pta(v->arch.vhpt.pta.val); - /*Enable VHPT here?*/ -} - -void kvm_init_vtlb(struct kvm_vcpu *v) -{ - v->arch.vtlb.num = VTLB_NUM_ENTRIES; - thash_init(&v->arch.vtlb, VTLB_SHIFT); -} diff --git a/arch/microblaze/include/asm/pgtable.h b/arch/microblaze/include/asm/pgtable.h index 95cef0b5f83..df19d0c47be 100644 --- a/arch/microblaze/include/asm/pgtable.h +++ b/arch/microblaze/include/asm/pgtable.h @@ -565,6 +565,7 @@ void consistent_free(size_t size, void *vaddr); void consistent_sync(void *vaddr, size_t size, int direction); void consistent_sync_page(struct page *page, unsigned long offset, size_t size, int direction); +unsigned long consistent_virt_to_pfn(void *vaddr); void setup_memory(void); #endif /* __ASSEMBLY__ */ diff --git a/arch/microblaze/kernel/dma.c b/arch/microblaze/kernel/dma.c index 4633c36c1b3..ed7ba8a1182 100644 --- a/arch/microblaze/kernel/dma.c +++ b/arch/microblaze/kernel/dma.c @@ -154,9 +154,36 @@ dma_direct_sync_sg_for_device(struct device *dev, __dma_sync(sg->dma_address, sg->length, direction); } +int dma_direct_mmap_coherent(struct device *dev, struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t handle, size_t size, + struct dma_attrs *attrs) +{ +#ifdef CONFIG_MMU + unsigned long user_count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; + unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT; + unsigned long off = vma->vm_pgoff; + unsigned long pfn; + + if (off >= count || user_count > (count - off)) + return -ENXIO; + +#ifdef NOT_COHERENT_CACHE + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + pfn = consistent_virt_to_pfn(cpu_addr); +#else + pfn = virt_to_pfn(cpu_addr); +#endif + return remap_pfn_range(vma, vma->vm_start, pfn + off, + vma->vm_end - vma->vm_start, vma->vm_page_prot); +#else + return -ENXIO; +#endif +} + struct dma_map_ops dma_direct_ops = { .alloc = dma_direct_alloc_coherent, .free = dma_direct_free_coherent, + .mmap = dma_direct_mmap_coherent, .map_sg = dma_direct_map_sg, .dma_supported = dma_direct_dma_supported, .map_page = dma_direct_map_page, diff --git a/arch/microblaze/mm/consistent.c b/arch/microblaze/mm/consistent.c index e10ad930895..b06c3a7faf2 100644 --- a/arch/microblaze/mm/consistent.c +++ b/arch/microblaze/mm/consistent.c @@ -156,6 +156,25 @@ void *consistent_alloc(gfp_t gfp, size_t size, dma_addr_t *dma_handle) } EXPORT_SYMBOL(consistent_alloc); +#ifdef CONFIG_MMU +static pte_t *consistent_virt_to_pte(void *vaddr) +{ + unsigned long addr = (unsigned long)vaddr; + + return pte_offset_kernel(pmd_offset(pgd_offset_k(addr), addr), addr); +} + +unsigned long consistent_virt_to_pfn(void *vaddr) +{ + pte_t *ptep = consistent_virt_to_pte(vaddr); + + if (pte_none(*ptep) || !pte_present(*ptep)) + return 0; + + return pte_pfn(*ptep); +} +#endif + /* * free page(s) as defined by the above mapping. */ @@ -181,13 +200,9 @@ void consistent_free(size_t size, void *vaddr) } while (size -= PAGE_SIZE); #else do { - pte_t *ptep; + pte_t *ptep = consistent_virt_to_pte(vaddr); unsigned long pfn; - ptep = pte_offset_kernel(pmd_offset(pgd_offset_k( - (unsigned int)vaddr), - (unsigned int)vaddr), - (unsigned int)vaddr); if (!pte_none(*ptep) && pte_present(*ptep)) { pfn = pte_pfn(*ptep); pte_clear(&init_mm, (unsigned int)vaddr, ptep); diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index 6acf0c2a0f9..942c7b1678e 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -170,8 +170,6 @@ extern void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long addr, unsigned long *nb_ret); extern void kvmppc_unpin_guest_page(struct kvm *kvm, void *addr, unsigned long gpa, bool dirty); -extern long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, - long pte_index, unsigned long pteh, unsigned long ptel); extern long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, long pte_index, unsigned long pteh, unsigned long ptel, pgd_t *pgdir, bool realmode, unsigned long *idx_ret); diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index 0aa817933e6..2d81e202bdc 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h @@ -37,7 +37,6 @@ static inline void svcpu_put(struct kvmppc_book3s_shadow_vcpu *svcpu) #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE #define KVM_DEFAULT_HPT_ORDER 24 /* 16MB HPT by default */ -extern unsigned long kvm_rma_pages; #endif #define VRMA_VSID 0x1ffffffUL /* 1TB VSID reserved for VRMA */ @@ -148,7 +147,7 @@ static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r, /* This covers 14..54 bits of va*/ rb = (v & ~0x7fUL) << 16; /* AVA field */ - rb |= v >> (62 - 8); /* B field */ + rb |= (v >> HPTE_V_SSIZE_SHIFT) << 8; /* B field */ /* * AVA in v had cleared lower 23 bits. We need to derive * that from pteg index diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 047855619cc..7efd666a3fa 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -180,11 +180,6 @@ struct kvmppc_spapr_tce_table { struct page *pages[0]; }; -struct kvm_rma_info { - atomic_t use_count; - unsigned long base_pfn; -}; - /* XICS components, defined in book3s_xics.c */ struct kvmppc_xics; struct kvmppc_icp; @@ -214,16 +209,9 @@ struct revmap_entry { #define KVMPPC_RMAP_PRESENT 0x100000000ul #define KVMPPC_RMAP_INDEX 0xfffffffful -/* Low-order bits in memslot->arch.slot_phys[] */ -#define KVMPPC_PAGE_ORDER_MASK 0x1f -#define KVMPPC_PAGE_NO_CACHE HPTE_R_I /* 0x20 */ -#define KVMPPC_PAGE_WRITETHRU HPTE_R_W /* 0x40 */ -#define KVMPPC_GOT_PAGE 0x80 - struct kvm_arch_memory_slot { #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE unsigned long *rmap; - unsigned long *slot_phys; #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ }; @@ -242,14 +230,12 @@ struct kvm_arch { struct kvm_rma_info *rma; unsigned long vrma_slb_v; int rma_setup_done; - int using_mmu_notifiers; u32 hpt_order; atomic_t vcpus_running; u32 online_vcores; unsigned long hpt_npte; unsigned long hpt_mask; atomic_t hpte_mod_interest; - spinlock_t slot_phys_lock; cpumask_t need_tlb_flush; int hpt_cma_alloc; #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ @@ -297,6 +283,7 @@ struct kvmppc_vcore { struct list_head runnable_threads; spinlock_t lock; wait_queue_head_t wq; + spinlock_t stoltb_lock; /* protects stolen_tb and preempt_tb */ u64 stolen_tb; u64 preempt_tb; struct kvm_vcpu *runner; @@ -308,6 +295,7 @@ struct kvmppc_vcore { ulong dpdes; /* doorbell state (POWER8) */ void *mpp_buffer; /* Micro Partition Prefetch buffer */ bool mpp_buffer_is_valid; + ulong conferring_threads; }; #define VCORE_ENTRY_COUNT(vc) ((vc)->entry_exit_count & 0xff) @@ -664,6 +652,8 @@ struct kvm_vcpu_arch { spinlock_t tbacct_lock; u64 busy_stolen; u64 busy_preempt; + + u32 emul_inst; #endif }; diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index a6dcdb6d13c..46bf652c916 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -170,8 +170,6 @@ extern long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, unsigned long ioba, unsigned long tce); extern long kvmppc_h_get_tce(struct kvm_vcpu *vcpu, unsigned long liobn, unsigned long ioba); -extern struct kvm_rma_info *kvm_alloc_rma(void); -extern void kvm_release_rma(struct kvm_rma_info *ri); extern struct page *kvm_alloc_hpt(unsigned long nr_pages); extern void kvm_release_hpt(struct page *page, unsigned long nr_pages); extern int kvmppc_core_init_vm(struct kvm *kvm); diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index c161ef3f28a..24d78e1871c 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -489,7 +489,6 @@ int main(void) DEFINE(KVM_HOST_LPID, offsetof(struct kvm, arch.host_lpid)); DEFINE(KVM_HOST_LPCR, offsetof(struct kvm, arch.host_lpcr)); DEFINE(KVM_HOST_SDR1, offsetof(struct kvm, arch.host_sdr1)); - DEFINE(KVM_TLBIE_LOCK, offsetof(struct kvm, arch.tlbie_lock)); DEFINE(KVM_NEED_FLUSH, offsetof(struct kvm, arch.need_tlb_flush.bits)); DEFINE(KVM_ENABLED_HCALLS, offsetof(struct kvm, arch.enabled_hcalls)); DEFINE(KVM_LPCR, offsetof(struct kvm, arch.lpcr)); @@ -499,6 +498,7 @@ int main(void) DEFINE(VCPU_DAR, offsetof(struct kvm_vcpu, arch.shregs.dar)); DEFINE(VCPU_VPA, offsetof(struct kvm_vcpu, arch.vpa.pinned_addr)); DEFINE(VCPU_VPA_DIRTY, offsetof(struct kvm_vcpu, arch.vpa.dirty)); + DEFINE(VCPU_HEIR, offsetof(struct kvm_vcpu, arch.emul_inst)); #endif #ifdef CONFIG_PPC_BOOK3S DEFINE(VCPU_VCPUID, offsetof(struct kvm_vcpu, vcpu_id)); diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index 602eb51d20b..f5769f19ae2 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig @@ -172,6 +172,7 @@ config KVM_XICS depends on KVM_BOOK3S_64 && !KVM_MPIC select HAVE_KVM_IRQCHIP select HAVE_KVM_IRQFD + default y ---help--- Include support for the XICS (eXternal Interrupt Controller Specification) interrupt controller architecture used on diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index b32db4b9536..888bf466d8c 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -64,14 +64,6 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { NULL } }; -void kvmppc_core_load_host_debugstate(struct kvm_vcpu *vcpu) -{ -} - -void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu) -{ -} - void kvmppc_unfixup_split_real(struct kvm_vcpu *vcpu) { if (vcpu->arch.hflags & BOOK3S_HFLAG_SPLIT_HACK) { diff --git a/arch/powerpc/kvm/book3s_32_mmu.c b/arch/powerpc/kvm/book3s_32_mmu.c index cd0b0730e29..a2eb6d354a5 100644 --- a/arch/powerpc/kvm/book3s_32_mmu.c +++ b/arch/powerpc/kvm/book3s_32_mmu.c @@ -78,11 +78,6 @@ static inline bool sr_kp(u32 sr_raw) return (sr_raw & 0x20000000) ? true: false; } -static inline bool sr_nx(u32 sr_raw) -{ - return (sr_raw & 0x10000000) ? true: false; -} - static int kvmppc_mmu_book3s_32_xlate_bat(struct kvm_vcpu *vcpu, gva_t eaddr, struct kvmppc_pte *pte, bool data, bool iswrite); diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index d40770248b6..534acb3c6c3 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -37,8 +37,7 @@ #include <asm/ppc-opcode.h> #include <asm/cputable.h> -/* POWER7 has 10-bit LPIDs, PPC970 has 6-bit LPIDs */ -#define MAX_LPID_970 63 +#include "trace_hv.h" /* Power architecture requires HPT is at least 256kB */ #define PPC_MIN_HPT_ORDER 18 @@ -229,14 +228,9 @@ int kvmppc_mmu_hv_init(void) if (!cpu_has_feature(CPU_FTR_HVMODE)) return -EINVAL; - /* POWER7 has 10-bit LPIDs, PPC970 and e500mc have 6-bit LPIDs */ - if (cpu_has_feature(CPU_FTR_ARCH_206)) { - host_lpid = mfspr(SPRN_LPID); /* POWER7 */ - rsvd_lpid = LPID_RSVD; - } else { - host_lpid = 0; /* PPC970 */ - rsvd_lpid = MAX_LPID_970; - } + /* POWER7 has 10-bit LPIDs (12-bit in POWER8) */ + host_lpid = mfspr(SPRN_LPID); + rsvd_lpid = LPID_RSVD; kvmppc_init_lpid(rsvd_lpid + 1); @@ -259,130 +253,12 @@ static void kvmppc_mmu_book3s_64_hv_reset_msr(struct kvm_vcpu *vcpu) kvmppc_set_msr(vcpu, msr); } -/* - * This is called to get a reference to a guest page if there isn't - * one already in the memslot->arch.slot_phys[] array. - */ -static long kvmppc_get_guest_page(struct kvm *kvm, unsigned long gfn, - struct kvm_memory_slot *memslot, - unsigned long psize) -{ - unsigned long start; - long np, err; - struct page *page, *hpage, *pages[1]; - unsigned long s, pgsize; - unsigned long *physp; - unsigned int is_io, got, pgorder; - struct vm_area_struct *vma; - unsigned long pfn, i, npages; - - physp = memslot->arch.slot_phys; - if (!physp) - return -EINVAL; - if (physp[gfn - memslot->base_gfn]) - return 0; - - is_io = 0; - got = 0; - page = NULL; - pgsize = psize; - err = -EINVAL; - start = gfn_to_hva_memslot(memslot, gfn); - - /* Instantiate and get the page we want access to */ - np = get_user_pages_fast(start, 1, 1, pages); - if (np != 1) { - /* Look up the vma for the page */ - down_read(¤t->mm->mmap_sem); - vma = find_vma(current->mm, start); - if (!vma || vma->vm_start > start || - start + psize > vma->vm_end || - !(vma->vm_flags & VM_PFNMAP)) - goto up_err; - is_io = hpte_cache_bits(pgprot_val(vma->vm_page_prot)); - pfn = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT); - /* check alignment of pfn vs. requested page size */ - if (psize > PAGE_SIZE && (pfn & ((psize >> PAGE_SHIFT) - 1))) - goto up_err; - up_read(¤t->mm->mmap_sem); - - } else { - page = pages[0]; - got = KVMPPC_GOT_PAGE; - - /* See if this is a large page */ - s = PAGE_SIZE; - if (PageHuge(page)) { - hpage = compound_head(page); - s <<= compound_order(hpage); - /* Get the whole large page if slot alignment is ok */ - if (s > psize && slot_is_aligned(memslot, s) && - !(memslot->userspace_addr & (s - 1))) { - start &= ~(s - 1); - pgsize = s; - get_page(hpage); - put_page(page); - page = hpage; - } - } - if (s < psize) - goto out; - pfn = page_to_pfn(page); - } - - npages = pgsize >> PAGE_SHIFT; - pgorder = __ilog2(npages); - physp += (gfn - memslot->base_gfn) & ~(npages - 1); - spin_lock(&kvm->arch.slot_phys_lock); - for (i = 0; i < npages; ++i) { - if (!physp[i]) { - physp[i] = ((pfn + i) << PAGE_SHIFT) + - got + is_io + pgorder; - got = 0; - } - } - spin_unlock(&kvm->arch.slot_phys_lock); - err = 0; - - out: - if (got) - put_page(page); - return err; - - up_err: - up_read(¤t->mm->mmap_sem); - return err; -} - long kvmppc_virtmode_do_h_enter(struct kvm *kvm, unsigned long flags, long pte_index, unsigned long pteh, unsigned long ptel, unsigned long *pte_idx_ret) { - unsigned long psize, gpa, gfn; - struct kvm_memory_slot *memslot; long ret; - if (kvm->arch.using_mmu_notifiers) - goto do_insert; - - psize = hpte_page_size(pteh, ptel); - if (!psize) - return H_PARAMETER; - - pteh &= ~(HPTE_V_HVLOCK | HPTE_V_ABSENT | HPTE_V_VALID); - - /* Find the memslot (if any) for this address */ - gpa = (ptel & HPTE_R_RPN) & ~(psize - 1); - gfn = gpa >> PAGE_SHIFT; - memslot = gfn_to_memslot(kvm, gfn); - if (memslot && !(memslot->flags & KVM_MEMSLOT_INVALID)) { - if (!slot_is_aligned(memslot, psize)) - return H_PARAMETER; - if (kvmppc_get_guest_page(kvm, gfn, memslot, psize) < 0) - return H_PARAMETER; - } - - do_insert: /* Protect linux PTE lookup from page table destruction */ rcu_read_lock_sched(); /* this disables preemption too */ ret = kvmppc_do_h_enter(kvm, flags, pte_index, pteh, ptel, @@ -397,19 +273,6 @@ long kvmppc_virtmode_do_h_enter(struct kvm *kvm, unsigned long flags, } -/* - * We come here on a H_ENTER call from the guest when we are not - * using mmu notifiers and we don't have the requested page pinned - * already. - */ -long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, - long pte_index, unsigned long pteh, - unsigned long ptel) -{ - return kvmppc_virtmode_do_h_enter(vcpu->kvm, flags, pte_index, - pteh, ptel, &vcpu->arch.gpr[4]); -} - static struct kvmppc_slb *kvmppc_mmu_book3s_hv_find_slbe(struct kvm_vcpu *vcpu, gva_t eaddr) { @@ -494,7 +357,7 @@ static int kvmppc_mmu_book3s_64_hv_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, gpte->may_execute = gpte->may_read && !(gr & (HPTE_R_N | HPTE_R_G)); /* Storage key permission check for POWER7 */ - if (data && virtmode && cpu_has_feature(CPU_FTR_ARCH_206)) { + if (data && virtmode) { int amrfield = hpte_get_skey_perm(gr, vcpu->arch.amr); if (amrfield & 1) gpte->may_read = 0; @@ -622,14 +485,13 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, gfn = gpa >> PAGE_SHIFT; memslot = gfn_to_memslot(kvm, gfn); + trace_kvm_page_fault_enter(vcpu, hpte, memslot, ea, dsisr); + /* No memslot means it's an emulated MMIO region */ if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) return kvmppc_hv_emulate_mmio(run, vcpu, gpa, ea, dsisr & DSISR_ISSTORE); - if (!kvm->arch.using_mmu_notifiers) - return -EFAULT; /* should never get here */ - /* * This should never happen, because of the slot_is_aligned() * check in kvmppc_do_h_enter(). @@ -641,6 +503,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, mmu_seq = kvm->mmu_notifier_seq; smp_rmb(); + ret = -EFAULT; is_io = 0; pfn = 0; page = NULL; @@ -664,7 +527,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, } up_read(¤t->mm->mmap_sem); if (!pfn) - return -EFAULT; + goto out_put; } else { page = pages[0]; pfn = page_to_pfn(page); @@ -694,14 +557,14 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, } } - ret = -EFAULT; if (psize > pte_size) goto out_put; /* Check WIMG vs. the actual page we're accessing */ if (!hpte_cache_flags_ok(r, is_io)) { if (is_io) - return -EFAULT; + goto out_put; + /* * Allow guest to map emulated device memory as * uncacheable, but actually make it cacheable. @@ -765,6 +628,8 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, SetPageDirty(page); out_put: + trace_kvm_page_fault_exit(vcpu, hpte, ret); + if (page) { /* * We drop pages[0] here, not page because page might @@ -895,8 +760,7 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, psize = hpte_page_size(be64_to_cpu(hptep[0]), ptel); if ((be64_to_cpu(hptep[0]) & HPTE_V_VALID) && hpte_rpn(ptel, psize) == gfn) { - if (kvm->arch.using_mmu_notifiers) - hptep[0] |= cpu_to_be64(HPTE_V_ABSENT); + hptep[0] |= cpu_to_be64(HPTE_V_ABSENT); kvmppc_invalidate_hpte(kvm, hptep, i); /* Harvest R and C */ rcbits = be64_to_cpu(hptep[1]) & (HPTE_R_R | HPTE_R_C); @@ -914,15 +778,13 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, int kvm_unmap_hva_hv(struct kvm *kvm, unsigned long hva) { - if (kvm->arch.using_mmu_notifiers) - kvm_handle_hva(kvm, hva, kvm_unmap_rmapp); + kvm_handle_hva(kvm, hva, kvm_unmap_rmapp); return 0; } int kvm_unmap_hva_range_hv(struct kvm *kvm, unsigned long start, unsigned long end) { - if (kvm->arch.using_mmu_notifiers) - kvm_handle_hva_range(kvm, start, end, kvm_unmap_rmapp); + kvm_handle_hva_range(kvm, start, end, kvm_unmap_rmapp); return 0; } @@ -1004,8 +866,6 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, int kvm_age_hva_hv(struct kvm *kvm, unsigned long start, unsigned long end) { - if (!kvm->arch.using_mmu_notifiers) - return 0; return kvm_handle_hva_range(kvm, start, end, kvm_age_rmapp); } @@ -1042,15 +902,11 @@ static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp, int kvm_test_age_hva_hv(struct kvm *kvm, unsigned long hva) { - if (!kvm->arch.using_mmu_notifiers) - return 0; return kvm_handle_hva(kvm, hva, kvm_test_age_rmapp); } void kvm_set_spte_hva_hv(struct kvm *kvm, unsigned long hva, pte_t pte) { - if (!kvm->arch.using_mmu_notifiers) - return; kvm_handle_hva(kvm, hva, kvm_unmap_rmapp); } @@ -1117,8 +973,11 @@ static int kvm_test_clear_dirty_npages(struct kvm *kvm, unsigned long *rmapp) } /* Now check and modify the HPTE */ - if (!(hptep[0] & cpu_to_be64(HPTE_V_VALID))) + if (!(hptep[0] & cpu_to_be64(HPTE_V_VALID))) { + /* unlock and continue */ + hptep[0] &= ~cpu_to_be64(HPTE_V_HVLOCK); continue; + } /* need to make it temporarily absent so C is stable */ hptep[0] |= cpu_to_be64(HPTE_V_ABSENT); @@ -1206,35 +1065,17 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa, struct page *page, *pages[1]; int npages; unsigned long hva, offset; - unsigned long pa; - unsigned long *physp; int srcu_idx; srcu_idx = srcu_read_lock(&kvm->srcu); memslot = gfn_to_memslot(kvm, gfn); if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) goto err; - if (!kvm->arch.using_mmu_notifiers) { - physp = memslot->arch.slot_phys; - if (!physp) - goto err; - physp += gfn - memslot->base_gfn; - pa = *physp; - if (!pa) { - if (kvmppc_get_guest_page(kvm, gfn, memslot, - PAGE_SIZE) < 0) - goto err; - pa = *physp; - } - page = pfn_to_page(pa >> PAGE_SHIFT); - get_page(page); - } else { - hva = gfn_to_hva_memslot(memslot, gfn); - npages = get_user_pages_fast(hva, 1, 1, pages); - if (npages < 1) - goto err; - page = pages[0]; - } + hva = gfn_to_hva_memslot(memslot, gfn); + npages = get_user_pages_fast(hva, 1, 1, pages); + if (npages < 1) + goto err; + page = pages[0]; srcu_read_unlock(&kvm->srcu, srcu_idx); offset = gpa & (PAGE_SIZE - 1); @@ -1258,7 +1099,7 @@ void kvmppc_unpin_guest_page(struct kvm *kvm, void *va, unsigned long gpa, put_page(page); - if (!dirty || !kvm->arch.using_mmu_notifiers) + if (!dirty) return; /* We need to mark this page dirty in the rmap chain */ @@ -1539,9 +1380,15 @@ static ssize_t kvm_htab_write(struct file *file, const char __user *buf, hptp = (__be64 *)(kvm->arch.hpt_virt + (i * HPTE_SIZE)); lbuf = (unsigned long __user *)buf; for (j = 0; j < hdr.n_valid; ++j) { + __be64 hpte_v; + __be64 hpte_r; + err = -EFAULT; - if (__get_user(v, lbuf) || __get_user(r, lbuf + 1)) + if (__get_user(hpte_v, lbuf) || + __get_user(hpte_r, lbuf + 1)) goto out; + v = be64_to_cpu(hpte_v); + r = be64_to_cpu(hpte_r); err = -EINVAL; if (!(v & HPTE_V_VALID)) goto out; @@ -1652,10 +1499,7 @@ void kvmppc_mmu_book3s_hv_init(struct kvm_vcpu *vcpu) { struct kvmppc_mmu *mmu = &vcpu->arch.mmu; - if (cpu_has_feature(CPU_FTR_ARCH_206)) - vcpu->arch.slb_nr = 32; /* POWER7 */ - else - vcpu->arch.slb_nr = 64; + vcpu->arch.slb_nr = 32; /* POWER7/POWER8 */ mmu->xlate = kvmppc_mmu_book3s_64_hv_xlate; mmu->reset_msr = kvmppc_mmu_book3s_64_hv_reset_msr; diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index e63587d30b7..de4018a1bc4 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -58,6 +58,9 @@ #include "book3s.h" +#define CREATE_TRACE_POINTS +#include "trace_hv.h" + /* #define EXIT_DEBUG */ /* #define EXIT_DEBUG_SIMPLE */ /* #define EXIT_DEBUG_INT */ @@ -135,11 +138,10 @@ static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu) * stolen. * * Updates to busy_stolen are protected by arch.tbacct_lock; - * updates to vc->stolen_tb are protected by the arch.tbacct_lock - * of the vcpu that has taken responsibility for running the vcore - * (i.e. vc->runner). The stolen times are measured in units of - * timebase ticks. (Note that the != TB_NIL checks below are - * purely defensive; they should never fail.) + * updates to vc->stolen_tb are protected by the vcore->stoltb_lock + * lock. The stolen times are measured in units of timebase ticks. + * (Note that the != TB_NIL checks below are purely defensive; + * they should never fail.) */ static void kvmppc_core_vcpu_load_hv(struct kvm_vcpu *vcpu, int cpu) @@ -147,12 +149,21 @@ static void kvmppc_core_vcpu_load_hv(struct kvm_vcpu *vcpu, int cpu) struct kvmppc_vcore *vc = vcpu->arch.vcore; unsigned long flags; - spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags); - if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE && - vc->preempt_tb != TB_NIL) { - vc->stolen_tb += mftb() - vc->preempt_tb; - vc->preempt_tb = TB_NIL; + /* + * We can test vc->runner without taking the vcore lock, + * because only this task ever sets vc->runner to this + * vcpu, and once it is set to this vcpu, only this task + * ever sets it to NULL. + */ + if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE) { + spin_lock_irqsave(&vc->stoltb_lock, flags); + if (vc->preempt_tb != TB_NIL) { + vc->stolen_tb += mftb() - vc->preempt_tb; + vc->preempt_tb = TB_NIL; + } + spin_unlock_irqrestore(&vc->stoltb_lock, flags); } + spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags); if (vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST && vcpu->arch.busy_preempt != TB_NIL) { vcpu->arch.busy_stolen += mftb() - vcpu->arch.busy_preempt; @@ -166,9 +177,12 @@ static void kvmppc_core_vcpu_put_hv(struct kvm_vcpu *vcpu) struct kvmppc_vcore *vc = vcpu->arch.vcore; unsigned long flags; - spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags); - if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE) + if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE) { + spin_lock_irqsave(&vc->stoltb_lock, flags); vc->preempt_tb = mftb(); + spin_unlock_irqrestore(&vc->stoltb_lock, flags); + } + spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags); if (vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST) vcpu->arch.busy_preempt = mftb(); spin_unlock_irqrestore(&vcpu->arch.tbacct_lock, flags); @@ -191,9 +205,6 @@ int kvmppc_set_arch_compat(struct kvm_vcpu *vcpu, u32 arch_compat) struct kvmppc_vcore *vc = vcpu->arch.vcore; if (arch_compat) { - if (!cpu_has_feature(CPU_FTR_ARCH_206)) - return -EINVAL; /* 970 has no compat mode support */ - switch (arch_compat) { case PVR_ARCH_205: /* @@ -505,25 +516,14 @@ static void kvmppc_update_vpas(struct kvm_vcpu *vcpu) static u64 vcore_stolen_time(struct kvmppc_vcore *vc, u64 now) { u64 p; + unsigned long flags; - /* - * If we are the task running the vcore, then since we hold - * the vcore lock, we can't be preempted, so stolen_tb/preempt_tb - * can't be updated, so we don't need the tbacct_lock. - * If the vcore is inactive, it can't become active (since we - * hold the vcore lock), so the vcpu load/put functions won't - * update stolen_tb/preempt_tb, and we don't need tbacct_lock. - */ + spin_lock_irqsave(&vc->stoltb_lock, flags); + p = vc->stolen_tb; if (vc->vcore_state != VCORE_INACTIVE && - vc->runner->arch.run_task != current) { - spin_lock_irq(&vc->runner->arch.tbacct_lock); - p = vc->stolen_tb; - if (vc->preempt_tb != TB_NIL) - p += now - vc->preempt_tb; - spin_unlock_irq(&vc->runner->arch.tbacct_lock); - } else { - p = vc->stolen_tb; - } + vc->preempt_tb != TB_NIL) + p += now - vc->preempt_tb; + spin_unlock_irqrestore(&vc->stoltb_lock, flags); return p; } @@ -607,10 +607,45 @@ static int kvmppc_h_set_mode(struct kvm_vcpu *vcpu, unsigned long mflags, } } +static int kvm_arch_vcpu_yield_to(struct kvm_vcpu *target) +{ + struct kvmppc_vcore *vcore = target->arch.vcore; + + /* + * We expect to have been called by the real mode handler + * (kvmppc_rm_h_confer()) which would have directly returned + * H_SUCCESS if the source vcore wasn't idle (e.g. if it may + * have useful work to do and should not confer) so we don't + * recheck that here. + */ + + spin_lock(&vcore->lock); + if (target->arch.state == KVMPPC_VCPU_RUNNABLE && + vcore->vcore_state != VCORE_INACTIVE) + target = vcore->runner; + spin_unlock(&vcore->lock); + + return kvm_vcpu_yield_to(target); +} + +static int kvmppc_get_yield_count(struct kvm_vcpu *vcpu) +{ + int yield_count = 0; + struct lppaca *lppaca; + + spin_lock(&vcpu->arch.vpa_update_lock); + lppaca = (struct lppaca *)vcpu->arch.vpa.pinned_addr; + if (lppaca) + yield_count = lppaca->yield_count; + spin_unlock(&vcpu->arch.vpa_update_lock); + return yield_count; +} + int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) { unsigned long req = kvmppc_get_gpr(vcpu, 3); unsigned long target, ret = H_SUCCESS; + int yield_count; struct kvm_vcpu *tvcpu; int idx, rc; @@ -619,14 +654,6 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) return RESUME_HOST; switch (req) { - case H_ENTER: - idx = srcu_read_lock(&vcpu->kvm->srcu); - ret = kvmppc_virtmode_h_enter(vcpu, kvmppc_get_gpr(vcpu, 4), - kvmppc_get_gpr(vcpu, 5), - kvmppc_get_gpr(vcpu, 6), - kvmppc_get_gpr(vcpu, 7)); - srcu_read_unlock(&vcpu->kvm->srcu, idx); - break; case H_CEDE: break; case H_PROD: @@ -654,7 +681,10 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) ret = H_PARAMETER; break; } - kvm_vcpu_yield_to(tvcpu); + yield_count = kvmppc_get_gpr(vcpu, 5); + if (kvmppc_get_yield_count(tvcpu) != yield_count) + break; + kvm_arch_vcpu_yield_to(tvcpu); break; case H_REGISTER_VPA: ret = do_h_register_vpa(vcpu, kvmppc_get_gpr(vcpu, 4), @@ -769,6 +799,8 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu, vcpu->stat.ext_intr_exits++; r = RESUME_GUEST; break; + /* HMI is hypervisor interrupt and host has handled it. Resume guest.*/ + case BOOK3S_INTERRUPT_HMI: case BOOK3S_INTERRUPT_PERFMON: r = RESUME_GUEST; break; @@ -837,6 +869,10 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu, * Accordingly return to Guest or Host. */ case BOOK3S_INTERRUPT_H_EMUL_ASSIST: + if (vcpu->arch.emul_inst != KVM_INST_FETCH_FAILED) + vcpu->arch.last_inst = kvmppc_need_byteswap(vcpu) ? + swab32(vcpu->arch.emul_inst) : + vcpu->arch.emul_inst; if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) { r = kvmppc_emulate_debug_inst(run, vcpu); } else { @@ -1357,6 +1393,7 @@ static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int core) INIT_LIST_HEAD(&vcore->runnable_threads); spin_lock_init(&vcore->lock); + spin_lock_init(&vcore->stoltb_lock); init_waitqueue_head(&vcore->wq); vcore->preempt_tb = TB_NIL; vcore->lpcr = kvm->arch.lpcr; @@ -1694,9 +1731,11 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc) vc->n_woken = 0; vc->nap_count = 0; vc->entry_exit_count = 0; + vc->preempt_tb = TB_NIL; vc->vcore_state = VCORE_STARTING; vc->in_guest = 0; vc->napping_threads = 0; + vc->conferring_threads = 0; /* * Updating any of the vpas requires calling kvmppc_pin_guest_page, @@ -1726,6 +1765,7 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc) list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) { kvmppc_start_thread(vcpu); kvmppc_create_dtl_entry(vcpu, vc); + trace_kvm_guest_enter(vcpu); } /* Set this explicitly in case thread 0 doesn't have a vcpu */ @@ -1734,6 +1774,9 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc) vc->vcore_state = VCORE_RUNNING; preempt_disable(); + + trace_kvmppc_run_core(vc, 0); + spin_unlock(&vc->lock); kvm_guest_enter(); @@ -1779,6 +1822,8 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc) kvmppc_core_pending_dec(vcpu)) kvmppc_core_dequeue_dec(vcpu); + trace_kvm_guest_exit(vcpu); + ret = RESUME_GUEST; if (vcpu->arch.trap) ret = kvmppc_handle_exit_hv(vcpu->arch.kvm_run, vcpu, @@ -1804,6 +1849,8 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc) wake_up(&vcpu->arch.cpu_run); } } + + trace_kvmppc_run_core(vc, 1); } /* @@ -1826,15 +1873,37 @@ static void kvmppc_wait_for_exec(struct kvm_vcpu *vcpu, int wait_state) */ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc) { + struct kvm_vcpu *vcpu; + int do_sleep = 1; + DEFINE_WAIT(wait); prepare_to_wait(&vc->wq, &wait, TASK_INTERRUPTIBLE); + + /* + * Check one last time for pending exceptions and ceded state after + * we put ourselves on the wait queue + */ + list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) { + if (vcpu->arch.pending_exceptions || !vcpu->arch.ceded) { + do_sleep = 0; + break; + } + } + + if (!do_sleep) { + finish_wait(&vc->wq, &wait); + return; + } + vc->vcore_state = VCORE_SLEEPING; + trace_kvmppc_vcore_blocked(vc, 0); spin_unlock(&vc->lock); schedule(); finish_wait(&vc->wq, &wait); spin_lock(&vc->lock); vc->vcore_state = VCORE_INACTIVE; + trace_kvmppc_vcore_blocked(vc, 1); } static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) @@ -1843,6 +1912,8 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) struct kvmppc_vcore *vc; struct kvm_vcpu *v, *vn; + trace_kvmppc_run_vcpu_enter(vcpu); + kvm_run->exit_reason = 0; vcpu->arch.ret = RESUME_GUEST; vcpu->arch.trap = 0; @@ -1872,6 +1943,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) VCORE_EXIT_COUNT(vc) == 0) { kvmppc_create_dtl_entry(vcpu, vc); kvmppc_start_thread(vcpu); + trace_kvm_guest_enter(vcpu); } else if (vc->vcore_state == VCORE_SLEEPING) { wake_up(&vc->wq); } @@ -1936,6 +2008,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) wake_up(&v->arch.cpu_run); } + trace_kvmppc_run_vcpu_exit(vcpu, kvm_run); spin_unlock(&vc->lock); return vcpu->arch.ret; } @@ -1962,7 +2035,7 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu) /* Order vcpus_running vs. rma_setup_done, see kvmppc_alloc_reset_hpt */ smp_mb(); - /* On the first time here, set up HTAB and VRMA or RMA */ + /* On the first time here, set up HTAB and VRMA */ if (!vcpu->kvm->arch.rma_setup_done) { r = kvmppc_hv_setup_htab_rma(vcpu); if (r) @@ -1981,7 +2054,9 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu) if (run->exit_reason == KVM_EXIT_PAPR_HCALL && !(vcpu->arch.shregs.msr & MSR_PR)) { + trace_kvm_hcall_enter(vcpu); r = kvmppc_pseries_do_hcall(vcpu); + trace_kvm_hcall_exit(vcpu, r); kvmppc_core_prepare_to_enter(vcpu); } else if (r == RESUME_PAGE_FAULT) { srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); @@ -1997,98 +2072,6 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu) return r; } - -/* Work out RMLS (real mode limit selector) field value for a given RMA size. - Assumes POWER7 or PPC970. */ -static inline int lpcr_rmls(unsigned long rma_size) -{ - switch (rma_size) { - case 32ul << 20: /* 32 MB */ - if (cpu_has_feature(CPU_FTR_ARCH_206)) - return 8; /* only supported on POWER7 */ - return -1; - case 64ul << 20: /* 64 MB */ - return 3; - case 128ul << 20: /* 128 MB */ - return 7; - case 256ul << 20: /* 256 MB */ - return 4; - case 1ul << 30: /* 1 GB */ - return 2; - case 16ul << 30: /* 16 GB */ - return 1; - case 256ul << 30: /* 256 GB */ - return 0; - default: - return -1; - } -} - -static int kvm_rma_fault(struct vm_area_struct *vma, struct vm_fault *vmf) -{ - struct page *page; - struct kvm_rma_info *ri = vma->vm_file->private_data; - - if (vmf->pgoff >= kvm_rma_pages) - return VM_FAULT_SIGBUS; - - page = pfn_to_page(ri->base_pfn + vmf->pgoff); - get_page(page); - vmf->page = page; - return 0; -} - -static const struct vm_operations_struct kvm_rma_vm_ops = { - .fault = kvm_rma_fault, -}; - -static int kvm_rma_mmap(struct file *file, struct vm_area_struct *vma) -{ - vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP; - vma->vm_ops = &kvm_rma_vm_ops; - return 0; -} - -static int kvm_rma_release(struct inode *inode, struct file *filp) -{ - struct kvm_rma_info *ri = filp->private_data; - - kvm_release_rma(ri); - return 0; -} - -static const struct file_operations kvm_rma_fops = { - .mmap = kvm_rma_mmap, - .release = kvm_rma_release, -}; - -static long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, - struct kvm_allocate_rma *ret) -{ - long fd; - struct kvm_rma_info *ri; - /* - * Only do this on PPC970 in HV mode - */ - if (!cpu_has_feature(CPU_FTR_HVMODE) || - !cpu_has_feature(CPU_FTR_ARCH_201)) - return -EINVAL; - - if (!kvm_rma_pages) - return -EINVAL; - - ri = kvm_alloc_rma(); - if (!ri) - return -ENOMEM; - - fd = anon_inode_getfd("kvm-rma", &kvm_rma_fops, ri, O_RDWR | O_CLOEXEC); - if (fd < 0) - kvm_release_rma(ri); - - ret->rma_size = kvm_rma_pages << PAGE_SHIFT; - return fd; -} - static void kvmppc_add_seg_page_size(struct kvm_ppc_one_seg_page_size **sps, int linux_psize) { @@ -2167,26 +2150,6 @@ out: return r; } -static void unpin_slot(struct kvm_memory_slot *memslot) -{ - unsigned long *physp; - unsigned long j, npages, pfn; - struct page *page; - - physp = memslot->arch.slot_phys; - npages = memslot->npages; - if (!physp) - return; - for (j = 0; j < npages; j++) { - if (!(physp[j] & KVMPPC_GOT_PAGE)) - continue; - pfn = physp[j] >> PAGE_SHIFT; - page = pfn_to_page(pfn); - SetPageDirty(page); - put_page(page); - } -} - static void kvmppc_core_free_memslot_hv(struct kvm_memory_slot *free, struct kvm_memory_slot *dont) { @@ -2194,11 +2157,6 @@ static void kvmppc_core_free_memslot_hv(struct kvm_memory_slot *free, vfree(free->arch.rmap); free->arch.rmap = NULL; } - if (!dont || free->arch.slot_phys != dont->arch.slot_phys) { - unpin_slot(free); - vfree(free->arch.slot_phys); - free->arch.slot_phys = NULL; - } } static int kvmppc_core_create_memslot_hv(struct kvm_memory_slot *slot, @@ -2207,7 +2165,6 @@ static int kvmppc_core_create_memslot_hv(struct kvm_memory_slot *slot, slot->arch.rmap = vzalloc(npages * sizeof(*slot->arch.rmap)); if (!slot->arch.rmap) return -ENOMEM; - slot->arch.slot_phys = NULL; return 0; } @@ -2216,17 +2173,6 @@ static int kvmppc_core_prepare_memory_region_hv(struct kvm *kvm, struct kvm_memory_slot *memslot, struct kvm_userspace_memory_region *mem) { - unsigned long *phys; - - /* Allocate a slot_phys array if needed */ - phys = memslot->arch.slot_phys; - if (!kvm->arch.using_mmu_notifiers && !phys && memslot->npages) { - phys = vzalloc(memslot->npages * sizeof(unsigned long)); - if (!phys) - return -ENOMEM; - memslot->arch.slot_phys = phys; - } - return 0; } @@ -2284,17 +2230,11 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu) { int err = 0; struct kvm *kvm = vcpu->kvm; - struct kvm_rma_info *ri = NULL; unsigned long hva; struct kvm_memory_slot *memslot; struct vm_area_struct *vma; unsigned long lpcr = 0, senc; - unsigned long lpcr_mask = 0; unsigned long psize, porder; - unsigned long rma_size; - unsigned long rmls; - unsigned long *physp; - unsigned long i, npages; int srcu_idx; mutex_lock(&kvm->lock); @@ -2329,88 +2269,25 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu) psize = vma_kernel_pagesize(vma); porder = __ilog2(psize); - /* Is this one of our preallocated RMAs? */ - if (vma->vm_file && vma->vm_file->f_op == &kvm_rma_fops && - hva == vma->vm_start) - ri = vma->vm_file->private_data; - up_read(¤t->mm->mmap_sem); - if (!ri) { - /* On POWER7, use VRMA; on PPC970, give up */ - err = -EPERM; - if (cpu_has_feature(CPU_FTR_ARCH_201)) { - pr_err("KVM: CPU requires an RMO\n"); - goto out_srcu; - } + /* We can handle 4k, 64k or 16M pages in the VRMA */ + err = -EINVAL; + if (!(psize == 0x1000 || psize == 0x10000 || + psize == 0x1000000)) + goto out_srcu; - /* We can handle 4k, 64k or 16M pages in the VRMA */ - err = -EINVAL; - if (!(psize == 0x1000 || psize == 0x10000 || - psize == 0x1000000)) - goto out_srcu; + /* Update VRMASD field in the LPCR */ + senc = slb_pgsize_encoding(psize); + kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T | + (VRMA_VSID << SLB_VSID_SHIFT_1T); + /* the -4 is to account for senc values starting at 0x10 */ + lpcr = senc << (LPCR_VRMASD_SH - 4); - /* Update VRMASD field in the LPCR */ - senc = slb_pgsize_encoding(psize); - kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T | - (VRMA_VSID << SLB_VSID_SHIFT_1T); - lpcr_mask = LPCR_VRMASD; - /* the -4 is to account for senc values starting at 0x10 */ - lpcr = senc << (LPCR_VRMASD_SH - 4); + /* Create HPTEs in the hash page table for the VRMA */ + kvmppc_map_vrma(vcpu, memslot, porder); - /* Create HPTEs in the hash page table for the VRMA */ - kvmppc_map_vrma(vcpu, memslot, porder); - - } else { - /* Set up to use an RMO region */ - rma_size = kvm_rma_pages; - if (rma_size > memslot->npages) - rma_size = memslot->npages; - rma_size <<= PAGE_SHIFT; - rmls = lpcr_rmls(rma_size); - err = -EINVAL; - if ((long)rmls < 0) { - pr_err("KVM: Can't use RMA of 0x%lx bytes\n", rma_size); - goto out_srcu; - } - atomic_inc(&ri->use_count); - kvm->arch.rma = ri; - - /* Update LPCR and RMOR */ - if (cpu_has_feature(CPU_FTR_ARCH_201)) { - /* PPC970; insert RMLS value (split field) in HID4 */ - lpcr_mask = (1ul << HID4_RMLS0_SH) | - (3ul << HID4_RMLS2_SH) | HID4_RMOR; - lpcr = ((rmls >> 2) << HID4_RMLS0_SH) | - ((rmls & 3) << HID4_RMLS2_SH); - /* RMOR is also in HID4 */ - lpcr |= ((ri->base_pfn >> (26 - PAGE_SHIFT)) & 0xffff) - << HID4_RMOR_SH; - } else { - /* POWER7 */ - lpcr_mask = LPCR_VPM0 | LPCR_VRMA_L | LPCR_RMLS; - lpcr = rmls << LPCR_RMLS_SH; - kvm->arch.rmor = ri->base_pfn << PAGE_SHIFT; - } - pr_info("KVM: Using RMO at %lx size %lx (LPCR = %lx)\n", - ri->base_pfn << PAGE_SHIFT, rma_size, lpcr); - - /* Initialize phys addrs of pages in RMO */ - npages = kvm_rma_pages; - porder = __ilog2(npages); - physp = memslot->arch.slot_phys; - if (physp) { - if (npages > memslot->npages) - npages = memslot->npages; - spin_lock(&kvm->arch.slot_phys_lock); - for (i = 0; i < npages; ++i) - physp[i] = ((ri->base_pfn + i) << PAGE_SHIFT) + - porder; - spin_unlock(&kvm->arch.slot_phys_lock); - } - } - - kvmppc_update_lpcr(kvm, lpcr, lpcr_mask); + kvmppc_update_lpcr(kvm, lpcr, LPCR_VRMASD); /* Order updates to kvm->arch.lpcr etc. vs. rma_setup_done */ smp_wmb(); @@ -2449,35 +2326,21 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm) memcpy(kvm->arch.enabled_hcalls, default_enabled_hcalls, sizeof(kvm->arch.enabled_hcalls)); - kvm->arch.rma = NULL; - kvm->arch.host_sdr1 = mfspr(SPRN_SDR1); - if (cpu_has_feature(CPU_FTR_ARCH_201)) { - /* PPC970; HID4 is effectively the LPCR */ - kvm->arch.host_lpid = 0; - kvm->arch.host_lpcr = lpcr = mfspr(SPRN_HID4); - lpcr &= ~((3 << HID4_LPID1_SH) | (0xful << HID4_LPID5_SH)); - lpcr |= ((lpid >> 4) << HID4_LPID1_SH) | - ((lpid & 0xf) << HID4_LPID5_SH); - } else { - /* POWER7; init LPCR for virtual RMA mode */ - kvm->arch.host_lpid = mfspr(SPRN_LPID); - kvm->arch.host_lpcr = lpcr = mfspr(SPRN_LPCR); - lpcr &= LPCR_PECE | LPCR_LPES; - lpcr |= (4UL << LPCR_DPFD_SH) | LPCR_HDICE | - LPCR_VPM0 | LPCR_VPM1; - kvm->arch.vrma_slb_v = SLB_VSID_B_1T | - (VRMA_VSID << SLB_VSID_SHIFT_1T); - /* On POWER8 turn on online bit to enable PURR/SPURR */ - if (cpu_has_feature(CPU_FTR_ARCH_207S)) - lpcr |= LPCR_ONL; - } + /* Init LPCR for virtual RMA mode */ + kvm->arch.host_lpid = mfspr(SPRN_LPID); + kvm->arch.host_lpcr = lpcr = mfspr(SPRN_LPCR); + lpcr &= LPCR_PECE | LPCR_LPES; + lpcr |= (4UL << LPCR_DPFD_SH) | LPCR_HDICE | + LPCR_VPM0 | LPCR_VPM1; + kvm->arch.vrma_slb_v = SLB_VSID_B_1T | + (VRMA_VSID << SLB_VSID_SHIFT_1T); + /* On POWER8 turn on online bit to enable PURR/SPURR */ + if (cpu_has_feature(CPU_FTR_ARCH_207S)) + lpcr |= LPCR_ONL; kvm->arch.lpcr = lpcr; - kvm->arch.using_mmu_notifiers = !!cpu_has_feature(CPU_FTR_ARCH_206); - spin_lock_init(&kvm->arch.slot_phys_lock); - /* * Track that we now have a HV mode VM active. This blocks secondary * CPU threads from coming online. @@ -2507,10 +2370,6 @@ static void kvmppc_core_destroy_vm_hv(struct kvm *kvm) kvm_hv_vm_deactivated(); kvmppc_free_vcores(kvm); - if (kvm->arch.rma) { - kvm_release_rma(kvm->arch.rma); - kvm->arch.rma = NULL; - } kvmppc_free_hpt(kvm); } @@ -2536,7 +2395,8 @@ static int kvmppc_core_emulate_mfspr_hv(struct kvm_vcpu *vcpu, int sprn, static int kvmppc_core_check_processor_compat_hv(void) { - if (!cpu_has_feature(CPU_FTR_HVMODE)) + if (!cpu_has_feature(CPU_FTR_HVMODE) || + !cpu_has_feature(CPU_FTR_ARCH_206)) return -EIO; return 0; } @@ -2550,16 +2410,6 @@ static long kvm_arch_vm_ioctl_hv(struct file *filp, switch (ioctl) { - case KVM_ALLOCATE_RMA: { - struct kvm_allocate_rma rma; - struct kvm *kvm = filp->private_data; - - r = kvm_vm_ioctl_allocate_rma(kvm, &rma); - if (r >= 0 && copy_to_user(argp, &rma, sizeof(rma))) - r = -EFAULT; - break; - } - case KVM_PPC_ALLOCATE_HTAB: { u32 htab_order; diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index 3f1bb5a36c2..1f083ff8a61 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -16,6 +16,7 @@ #include <linux/memblock.h> #include <linux/sizes.h> #include <linux/cma.h> +#include <linux/bitops.h> #include <asm/cputable.h> #include <asm/kvm_ppc.h> @@ -32,95 +33,9 @@ * By default we reserve 5% of memory for hash pagetable allocation. */ static unsigned long kvm_cma_resv_ratio = 5; -/* - * We allocate RMAs (real mode areas) for KVM guests from the KVM CMA area. - * Each RMA has to be physically contiguous and of a size that the - * hardware supports. PPC970 and POWER7 support 64MB, 128MB and 256MB, - * and other larger sizes. Since we are unlikely to be allocate that - * much physically contiguous memory after the system is up and running, - * we preallocate a set of RMAs in early boot using CMA. - * should be power of 2. - */ -unsigned long kvm_rma_pages = (1 << 27) >> PAGE_SHIFT; /* 128MB */ -EXPORT_SYMBOL_GPL(kvm_rma_pages); static struct cma *kvm_cma; -/* Work out RMLS (real mode limit selector) field value for a given RMA size. - Assumes POWER7 or PPC970. */ -static inline int lpcr_rmls(unsigned long rma_size) -{ - switch (rma_size) { - case 32ul << 20: /* 32 MB */ - if (cpu_has_feature(CPU_FTR_ARCH_206)) - return 8; /* only supported on POWER7 */ - return -1; - case 64ul << 20: /* 64 MB */ - return 3; - case 128ul << 20: /* 128 MB */ - return 7; - case 256ul << 20: /* 256 MB */ - return 4; - case 1ul << 30: /* 1 GB */ - return 2; - case 16ul << 30: /* 16 GB */ - return 1; - case 256ul << 30: /* 256 GB */ - return 0; - default: - return -1; - } -} - -static int __init early_parse_rma_size(char *p) -{ - unsigned long kvm_rma_size; - - pr_debug("%s(%s)\n", __func__, p); - if (!p) - return -EINVAL; - kvm_rma_size = memparse(p, &p); - /* - * Check that the requested size is one supported in hardware - */ - if (lpcr_rmls(kvm_rma_size) < 0) { - pr_err("RMA size of 0x%lx not supported\n", kvm_rma_size); - return -EINVAL; - } - kvm_rma_pages = kvm_rma_size >> PAGE_SHIFT; - return 0; -} -early_param("kvm_rma_size", early_parse_rma_size); - -struct kvm_rma_info *kvm_alloc_rma() -{ - struct page *page; - struct kvm_rma_info *ri; - - ri = kmalloc(sizeof(struct kvm_rma_info), GFP_KERNEL); - if (!ri) - return NULL; - page = cma_alloc(kvm_cma, kvm_rma_pages, order_base_2(kvm_rma_pages)); - if (!page) - goto err_out; - atomic_set(&ri->use_count, 1); - ri->base_pfn = page_to_pfn(page); - return ri; -err_out: - kfree(ri); - return NULL; -} -EXPORT_SYMBOL_GPL(kvm_alloc_rma); - -void kvm_release_rma(struct kvm_rma_info *ri) -{ - if (atomic_dec_and_test(&ri->use_count)) { - cma_release(kvm_cma, pfn_to_page(ri->base_pfn), kvm_rma_pages); - kfree(ri); - } -} -EXPORT_SYMBOL_GPL(kvm_release_rma); - static int __init early_parse_kvm_cma_resv(char *p) { pr_debug("%s(%s)\n", __func__, p); @@ -132,14 +47,9 @@ early_param("kvm_cma_resv_ratio", early_parse_kvm_cma_resv); struct page *kvm_alloc_hpt(unsigned long nr_pages) { - unsigned long align_pages = HPT_ALIGN_PAGES; - VM_BUG_ON(order_base_2(nr_pages) < KVM_CMA_CHUNK_ORDER - PAGE_SHIFT); - /* Old CPUs require HPT aligned on a multiple of its size */ - if (!cpu_has_feature(CPU_FTR_ARCH_206)) - align_pages = nr_pages; - return cma_alloc(kvm_cma, nr_pages, order_base_2(align_pages)); + return cma_alloc(kvm_cma, nr_pages, order_base_2(HPT_ALIGN_PAGES)); } EXPORT_SYMBOL_GPL(kvm_alloc_hpt); @@ -180,22 +90,44 @@ void __init kvm_cma_reserve(void) if (selected_size) { pr_debug("%s: reserving %ld MiB for global area\n", __func__, (unsigned long)selected_size / SZ_1M); - /* - * Old CPUs require HPT aligned on a multiple of its size. So for them - * make the alignment as max size we could request. - */ - if (!cpu_has_feature(CPU_FTR_ARCH_206)) - align_size = __rounddown_pow_of_two(selected_size); - else - align_size = HPT_ALIGN_PAGES << PAGE_SHIFT; - - align_size = max(kvm_rma_pages << PAGE_SHIFT, align_size); + align_size = HPT_ALIGN_PAGES << PAGE_SHIFT; cma_declare_contiguous(0, selected_size, 0, align_size, KVM_CMA_CHUNK_ORDER - PAGE_SHIFT, false, &kvm_cma); } } /* + * Real-mode H_CONFER implementation. + * We check if we are the only vcpu out of this virtual core + * still running in the guest and not ceded. If so, we pop up + * to the virtual-mode implementation; if not, just return to + * the guest. + */ +long int kvmppc_rm_h_confer(struct kvm_vcpu *vcpu, int target, + unsigned int yield_count) +{ + struct kvmppc_vcore *vc = vcpu->arch.vcore; + int threads_running; + int threads_ceded; + int threads_conferring; + u64 stop = get_tb() + 10 * tb_ticks_per_usec; + int rv = H_SUCCESS; /* => don't yield */ + + set_bit(vcpu->arch.ptid, &vc->conferring_threads); + while ((get_tb() < stop) && (VCORE_EXIT_COUNT(vc) == 0)) { + threads_running = VCORE_ENTRY_COUNT(vc); + threads_ceded = hweight32(vc->napping_threads); + threads_conferring = hweight32(vc->conferring_threads); + if (threads_ceded + threads_conferring >= threads_running) { + rv = H_TOO_HARD; /* => do yield */ + break; + } + } + clear_bit(vcpu->arch.ptid, &vc->conferring_threads); + return rv; +} + +/* * When running HV mode KVM we need to block certain operations while KVM VMs * exist in the system. We use a counter of VMs to track this. * diff --git a/arch/powerpc/kvm/book3s_hv_interrupts.S b/arch/powerpc/kvm/book3s_hv_interrupts.S index 731be7478b2..36540a99d17 100644 --- a/arch/powerpc/kvm/book3s_hv_interrupts.S +++ b/arch/powerpc/kvm/book3s_hv_interrupts.S @@ -52,10 +52,8 @@ _GLOBAL(__kvmppc_vcore_entry) std r3, _CCR(r1) /* Save host DSCR */ -BEGIN_FTR_SECTION mfspr r3, SPRN_DSCR std r3, HSTATE_DSCR(r13) -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) BEGIN_FTR_SECTION /* Save host DABR */ @@ -84,11 +82,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) mfspr r7, SPRN_MMCR0 /* save MMCR0 */ mtspr SPRN_MMCR0, r3 /* freeze all counters, disable interrupts */ mfspr r6, SPRN_MMCRA -BEGIN_FTR_SECTION - /* On P7, clear MMCRA in order to disable SDAR updates */ + /* Clear MMCRA in order to disable SDAR updates */ li r5, 0 mtspr SPRN_MMCRA, r5 -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) isync ld r3, PACALPPACAPTR(r13) /* is the host using the PMU? */ lbz r5, LPPACA_PMCINUSE(r3) @@ -113,20 +109,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) mfspr r7, SPRN_PMC4 mfspr r8, SPRN_PMC5 mfspr r9, SPRN_PMC6 -BEGIN_FTR_SECTION - mfspr r10, SPRN_PMC7 - mfspr r11, SPRN_PMC8 -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) stw r3, HSTATE_PMC(r13) stw r5, HSTATE_PMC + 4(r13) stw r6, HSTATE_PMC + 8(r13) stw r7, HSTATE_PMC + 12(r13) stw r8, HSTATE_PMC + 16(r13) stw r9, HSTATE_PMC + 20(r13) -BEGIN_FTR_SECTION - stw r10, HSTATE_PMC + 24(r13) - stw r11, HSTATE_PMC + 28(r13) -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) 31: /* @@ -140,31 +128,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) add r8,r8,r7 std r8,HSTATE_DECEXP(r13) -#ifdef CONFIG_SMP - /* - * On PPC970, if the guest vcpu has an external interrupt pending, - * send ourselves an IPI so as to interrupt the guest once it - * enables interrupts. (It must have interrupts disabled, - * otherwise we would already have delivered the interrupt.) - * - * XXX If this is a UP build, smp_send_reschedule is not available, - * so the interrupt will be delayed until the next time the vcpu - * enters the guest with interrupts enabled. - */ -BEGIN_FTR_SECTION - ld r4, HSTATE_KVM_VCPU(r13) - ld r0, VCPU_PENDING_EXC(r4) - li r7, (1 << BOOK3S_IRQPRIO_EXTERNAL) - oris r7, r7, (1 << BOOK3S_IRQPRIO_EXTERNAL_LEVEL)@h - and. r0, r0, r7 - beq 32f - lhz r3, PACAPACAINDEX(r13) - bl smp_send_reschedule - nop -32: -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) -#endif /* CONFIG_SMP */ - /* Jump to partition switch code */ bl kvmppc_hv_entry_trampoline nop diff --git a/arch/powerpc/kvm/book3s_hv_ras.c b/arch/powerpc/kvm/book3s_hv_ras.c index d562c8e2bc3..60081bd7584 100644 --- a/arch/powerpc/kvm/book3s_hv_ras.c +++ b/arch/powerpc/kvm/book3s_hv_ras.c @@ -138,8 +138,5 @@ out: long kvmppc_realmode_machine_check(struct kvm_vcpu *vcpu) { - if (cpu_has_feature(CPU_FTR_ARCH_206)) - return kvmppc_realmode_mc_power7(vcpu); - - return 0; + return kvmppc_realmode_mc_power7(vcpu); } diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 084ad54c73c..510bdfbc407 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -45,16 +45,12 @@ static int global_invalidates(struct kvm *kvm, unsigned long flags) * as indicated by local_paca->kvm_hstate.kvm_vcpu being set, * we can use tlbiel as long as we mark all other physical * cores as potentially having stale TLB entries for this lpid. - * If we're not using MMU notifiers, we never take pages away - * from the guest, so we can use tlbiel if requested. * Otherwise, don't use tlbiel. */ if (kvm->arch.online_vcores == 1 && local_paca->kvm_hstate.kvm_vcpu) global = 0; - else if (kvm->arch.using_mmu_notifiers) - global = 1; else - global = !(flags & H_LOCAL); + global = 1; if (!global) { /* any other core might now have stale TLB entries... */ @@ -170,7 +166,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, struct revmap_entry *rev; unsigned long g_ptel; struct kvm_memory_slot *memslot; - unsigned long *physp, pte_size; + unsigned long pte_size; unsigned long is_io; unsigned long *rmap; pte_t pte; @@ -198,9 +194,6 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, is_io = ~0ul; rmap = NULL; if (!(memslot && !(memslot->flags & KVM_MEMSLOT_INVALID))) { - /* PPC970 can't do emulated MMIO */ - if (!cpu_has_feature(CPU_FTR_ARCH_206)) - return H_PARAMETER; /* Emulated MMIO - mark this with key=31 */ pteh |= HPTE_V_ABSENT; ptel |= HPTE_R_KEY_HI | HPTE_R_KEY_LO; @@ -213,37 +206,20 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, slot_fn = gfn - memslot->base_gfn; rmap = &memslot->arch.rmap[slot_fn]; - if (!kvm->arch.using_mmu_notifiers) { - physp = memslot->arch.slot_phys; - if (!physp) - return H_PARAMETER; - physp += slot_fn; - if (realmode) - physp = real_vmalloc_addr(physp); - pa = *physp; - if (!pa) - return H_TOO_HARD; - is_io = pa & (HPTE_R_I | HPTE_R_W); - pte_size = PAGE_SIZE << (pa & KVMPPC_PAGE_ORDER_MASK); - pa &= PAGE_MASK; + /* Translate to host virtual address */ + hva = __gfn_to_hva_memslot(memslot, gfn); + + /* Look up the Linux PTE for the backing page */ + pte_size = psize; + pte = lookup_linux_pte_and_update(pgdir, hva, writing, &pte_size); + if (pte_present(pte) && !pte_numa(pte)) { + if (writing && !pte_write(pte)) + /* make the actual HPTE be read-only */ + ptel = hpte_make_readonly(ptel); + is_io = hpte_cache_bits(pte_val(pte)); + pa = pte_pfn(pte) << PAGE_SHIFT; + pa |= hva & (pte_size - 1); pa |= gpa & ~PAGE_MASK; - } else { - /* Translate to host virtual address */ - hva = __gfn_to_hva_memslot(memslot, gfn); - - /* Look up the Linux PTE for the backing page */ - pte_size = psize; - pte = lookup_linux_pte_and_update(pgdir, hva, writing, - &pte_size); - if (pte_present(pte) && !pte_numa(pte)) { - if (writing && !pte_write(pte)) - /* make the actual HPTE be read-only */ - ptel = hpte_make_readonly(ptel); - is_io = hpte_cache_bits(pte_val(pte)); - pa = pte_pfn(pte) << PAGE_SHIFT; - pa |= hva & (pte_size - 1); - pa |= gpa & ~PAGE_MASK; - } } if (pte_size < psize) @@ -337,8 +313,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, rmap = real_vmalloc_addr(rmap); lock_rmap(rmap); /* Check for pending invalidations under the rmap chain lock */ - if (kvm->arch.using_mmu_notifiers && - mmu_notifier_retry(kvm, mmu_seq)) { + if (mmu_notifier_retry(kvm, mmu_seq)) { /* inval in progress, write a non-present HPTE */ pteh |= HPTE_V_ABSENT; pteh &= ~HPTE_V_VALID; @@ -395,61 +370,11 @@ static inline int try_lock_tlbie(unsigned int *lock) return old == 0; } -/* - * tlbie/tlbiel is a bit different on the PPC970 compared to later - * processors such as POWER7; the large page bit is in the instruction - * not RB, and the top 16 bits and the bottom 12 bits of the VA - * in RB must be 0. - */ -static void do_tlbies_970(struct kvm *kvm, unsigned long *rbvalues, - long npages, int global, bool need_sync) -{ - long i; - - if (global) { - while (!try_lock_tlbie(&kvm->arch.tlbie_lock)) - cpu_relax(); - if (need_sync) - asm volatile("ptesync" : : : "memory"); - for (i = 0; i < npages; ++i) { - unsigned long rb = rbvalues[i]; - - if (rb & 1) /* large page */ - asm volatile("tlbie %0,1" : : - "r" (rb & 0x0000fffffffff000ul)); - else - asm volatile("tlbie %0,0" : : - "r" (rb & 0x0000fffffffff000ul)); - } - asm volatile("eieio; tlbsync; ptesync" : : : "memory"); - kvm->arch.tlbie_lock = 0; - } else { - if (need_sync) - asm volatile("ptesync" : : : "memory"); - for (i = 0; i < npages; ++i) { - unsigned long rb = rbvalues[i]; - - if (rb & 1) /* large page */ - asm volatile("tlbiel %0,1" : : - "r" (rb & 0x0000fffffffff000ul)); - else - asm volatile("tlbiel %0,0" : : - "r" (rb & 0x0000fffffffff000ul)); - } - asm volatile("ptesync" : : : "memory"); - } -} - static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues, long npages, int global, bool need_sync) { long i; - if (cpu_has_feature(CPU_FTR_ARCH_201)) { - /* PPC970 tlbie instruction is a bit different */ - do_tlbies_970(kvm, rbvalues, npages, global, need_sync); - return; - } if (global) { while (!try_lock_tlbie(&kvm->arch.tlbie_lock)) cpu_relax(); @@ -667,40 +592,29 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags, rev->guest_rpte = r; note_hpte_modification(kvm, rev); } - r = (be64_to_cpu(hpte[1]) & ~mask) | bits; /* Update HPTE */ if (v & HPTE_V_VALID) { - rb = compute_tlbie_rb(v, r, pte_index); - hpte[0] = cpu_to_be64(v & ~HPTE_V_VALID); - do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags), true); /* - * If the host has this page as readonly but the guest - * wants to make it read/write, reduce the permissions. - * Checking the host permissions involves finding the - * memslot and then the Linux PTE for the page. + * If the page is valid, don't let it transition from + * readonly to writable. If it should be writable, we'll + * take a trap and let the page fault code sort it out. */ - if (hpte_is_writable(r) && kvm->arch.using_mmu_notifiers) { - unsigned long psize, gfn, hva; - struct kvm_memory_slot *memslot; - pgd_t *pgdir = vcpu->arch.pgdir; - pte_t pte; - - psize = hpte_page_size(v, r); - gfn = ((r & HPTE_R_RPN) & ~(psize - 1)) >> PAGE_SHIFT; - memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn); - if (memslot) { - hva = __gfn_to_hva_memslot(memslot, gfn); - pte = lookup_linux_pte_and_update(pgdir, hva, - 1, &psize); - if (pte_present(pte) && !pte_write(pte)) - r = hpte_make_readonly(r); - } + pte = be64_to_cpu(hpte[1]); + r = (pte & ~mask) | bits; + if (hpte_is_writable(r) && !hpte_is_writable(pte)) + r = hpte_make_readonly(r); + /* If the PTE is changing, invalidate it first */ + if (r != pte) { + rb = compute_tlbie_rb(v, r, pte_index); + hpte[0] = cpu_to_be64((v & ~HPTE_V_VALID) | + HPTE_V_ABSENT); + do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags), + true); + hpte[1] = cpu_to_be64(r); } } - hpte[1] = cpu_to_be64(r); - eieio(); - hpte[0] = cpu_to_be64(v & ~HPTE_V_HVLOCK); + unlock_hpte(hpte, v & ~HPTE_V_HVLOCK); asm volatile("ptesync" : : : "memory"); return H_SUCCESS; } diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c index 3ee38e6e884..7b066f6b02a 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_xics.c +++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c @@ -183,8 +183,10 @@ static void icp_rm_down_cppr(struct kvmppc_xics *xics, struct kvmppc_icp *icp, * state update in HW (ie bus transactions) so we can handle them * separately here as well. */ - if (resend) + if (resend) { icp->rm_action |= XICS_RM_CHECK_RESEND; + icp->rm_resend_icp = icp; + } } @@ -254,10 +256,25 @@ int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server, * nothing needs to be done as there can be no XISR to * reject. * + * ICP state: Check_IPI + * * If the CPPR is less favored, then we might be replacing - * an interrupt, and thus need to possibly reject it as in + * an interrupt, and thus need to possibly reject it. * - * ICP state: Check_IPI + * ICP State: IPI + * + * Besides rejecting any pending interrupts, we also + * update XISR and pending_pri to mark IPI as pending. + * + * PAPR does not describe this state, but if the MFRR is being + * made less favored than its earlier value, there might be + * a previously-rejected interrupt needing to be resent. + * Ideally, we would want to resend only if + * prio(pending_interrupt) < mfrr && + * prio(pending_interrupt) < cppr + * where pending interrupt is the one that was rejected. But + * we don't have that state, so we simply trigger a resend + * whenever the MFRR is made less favored. */ do { old_state = new_state = ACCESS_ONCE(icp->state); @@ -270,13 +287,14 @@ int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server, resend = false; if (mfrr < new_state.cppr) { /* Reject a pending interrupt if not an IPI */ - if (mfrr <= new_state.pending_pri) + if (mfrr <= new_state.pending_pri) { reject = new_state.xisr; - new_state.pending_pri = mfrr; - new_state.xisr = XICS_IPI; + new_state.pending_pri = mfrr; + new_state.xisr = XICS_IPI; + } } - if (mfrr > old_state.mfrr && mfrr > new_state.cppr) { + if (mfrr > old_state.mfrr) { resend = new_state.need_resend; new_state.need_resend = 0; } @@ -289,8 +307,10 @@ int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server, } /* Pass resends to virtual mode */ - if (resend) + if (resend) { this_icp->rm_action |= XICS_RM_CHECK_RESEND; + this_icp->rm_resend_icp = icp; + } return check_too_hard(xics, this_icp); } diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 65c105b17a2..10554df1385 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -94,20 +94,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_PMAO_BUG) lwz r6, HSTATE_PMC + 12(r13) lwz r8, HSTATE_PMC + 16(r13) lwz r9, HSTATE_PMC + 20(r13) -BEGIN_FTR_SECTION - lwz r10, HSTATE_PMC + 24(r13) - lwz r11, HSTATE_PMC + 28(r13) -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) mtspr SPRN_PMC1, r3 mtspr SPRN_PMC2, r4 mtspr SPRN_PMC3, r5 mtspr SPRN_PMC4, r6 mtspr SPRN_PMC5, r8 mtspr SPRN_PMC6, r9 -BEGIN_FTR_SECTION - mtspr SPRN_PMC7, r10 - mtspr SPRN_PMC8, r11 -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) ld r3, HSTATE_MMCR(r13) ld r4, HSTATE_MMCR + 8(r13) ld r5, HSTATE_MMCR + 16(r13) @@ -153,11 +145,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) cmpwi cr1, r12, BOOK3S_INTERRUPT_MACHINE_CHECK cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL -BEGIN_FTR_SECTION beq 11f cmpwi cr2, r12, BOOK3S_INTERRUPT_HMI beq cr2, 14f /* HMI check */ -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) /* RFI into the highmem handler, or branch to interrupt handler */ mfmsr r6 @@ -166,7 +156,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) mtmsrd r6, 1 /* Clear RI in MSR */ mtsrr0 r8 mtsrr1 r7 - beqa 0x500 /* external interrupt (PPC970) */ beq cr1, 13f /* machine check */ RFI @@ -393,11 +382,8 @@ kvmppc_hv_entry: slbia ptesync -BEGIN_FTR_SECTION - b 30f -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) /* - * POWER7 host -> guest partition switch code. + * POWER7/POWER8 host -> guest partition switch code. * We don't have to lock against concurrent tlbies, * but we do have to coordinate across hardware threads. */ @@ -505,97 +491,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) cmpwi r3,512 /* 1 microsecond */ li r12,BOOK3S_INTERRUPT_HV_DECREMENTER blt hdec_soon - b 31f - - /* - * PPC970 host -> guest partition switch code. - * We have to lock against concurrent tlbies, - * using native_tlbie_lock to lock against host tlbies - * and kvm->arch.tlbie_lock to lock against guest tlbies. - * We also have to invalidate the TLB since its - * entries aren't tagged with the LPID. - */ -30: ld r5,HSTATE_KVM_VCORE(r13) - ld r9,VCORE_KVM(r5) /* pointer to struct kvm */ - - /* first take native_tlbie_lock */ - .section ".toc","aw" -toc_tlbie_lock: - .tc native_tlbie_lock[TC],native_tlbie_lock - .previous - ld r3,toc_tlbie_lock@toc(r2) -#ifdef __BIG_ENDIAN__ - lwz r8,PACA_LOCK_TOKEN(r13) -#else - lwz r8,PACAPACAINDEX(r13) -#endif -24: lwarx r0,0,r3 - cmpwi r0,0 - bne 24b - stwcx. r8,0,r3 - bne 24b - isync - - ld r5,HSTATE_KVM_VCORE(r13) - ld r7,VCORE_LPCR(r5) /* use vcore->lpcr to store HID4 */ - li r0,0x18f - rotldi r0,r0,HID4_LPID5_SH /* all lpid bits in HID4 = 1 */ - or r0,r7,r0 - ptesync - sync - mtspr SPRN_HID4,r0 /* switch to reserved LPID */ - isync - li r0,0 - stw r0,0(r3) /* drop native_tlbie_lock */ - - /* invalidate the whole TLB */ - li r0,256 - mtctr r0 - li r6,0 -25: tlbiel r6 - addi r6,r6,0x1000 - bdnz 25b - ptesync - /* Take the guest's tlbie_lock */ - addi r3,r9,KVM_TLBIE_LOCK -24: lwarx r0,0,r3 - cmpwi r0,0 - bne 24b - stwcx. r8,0,r3 - bne 24b - isync - ld r6,KVM_SDR1(r9) - mtspr SPRN_SDR1,r6 /* switch to partition page table */ - - /* Set up HID4 with the guest's LPID etc. */ - sync - mtspr SPRN_HID4,r7 - isync - - /* drop the guest's tlbie_lock */ - li r0,0 - stw r0,0(r3) - - /* Check if HDEC expires soon */ - mfspr r3,SPRN_HDEC - cmpwi r3,10 - li r12,BOOK3S_INTERRUPT_HV_DECREMENTER - blt hdec_soon - - /* Enable HDEC interrupts */ - mfspr r0,SPRN_HID0 - li r3,1 - rldimi r0,r3, HID0_HDICE_SH, 64-HID0_HDICE_SH-1 - sync - mtspr SPRN_HID0,r0 - mfspr r0,SPRN_HID0 - mfspr r0,SPRN_HID0 - mfspr r0,SPRN_HID0 - mfspr r0,SPRN_HID0 - mfspr r0,SPRN_HID0 - mfspr r0,SPRN_HID0 -31: /* Do we have a guest vcpu to run? */ cmpdi r4, 0 beq kvmppc_primary_no_guest @@ -625,7 +521,6 @@ kvmppc_got_guest: stb r6, VCPU_VPA_DIRTY(r4) 25: -BEGIN_FTR_SECTION /* Save purr/spurr */ mfspr r5,SPRN_PURR mfspr r6,SPRN_SPURR @@ -635,7 +530,6 @@ BEGIN_FTR_SECTION ld r8,VCPU_SPURR(r4) mtspr SPRN_PURR,r7 mtspr SPRN_SPURR,r8 -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) BEGIN_FTR_SECTION /* Set partition DABR */ @@ -644,9 +538,7 @@ BEGIN_FTR_SECTION ld r6,VCPU_DABR(r4) mtspr SPRN_DABRX,r5 mtspr SPRN_DABR,r6 - BEGIN_FTR_SECTION_NESTED(89) isync - END_FTR_SECTION_NESTED(CPU_FTR_ARCH_206, CPU_FTR_ARCH_206, 89) END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) #ifdef CONFIG_PPC_TRANSACTIONAL_MEM @@ -777,20 +669,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_PMAO_BUG) lwz r7, VCPU_PMC + 12(r4) lwz r8, VCPU_PMC + 16(r4) lwz r9, VCPU_PMC + 20(r4) -BEGIN_FTR_SECTION - lwz r10, VCPU_PMC + 24(r4) - lwz r11, VCPU_PMC + 28(r4) -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) mtspr SPRN_PMC1, r3 mtspr SPRN_PMC2, r5 mtspr SPRN_PMC3, r6 mtspr SPRN_PMC4, r7 mtspr SPRN_PMC5, r8 mtspr SPRN_PMC6, r9 -BEGIN_FTR_SECTION - mtspr SPRN_PMC7, r10 - mtspr SPRN_PMC8, r11 -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) ld r3, VCPU_MMCR(r4) ld r5, VCPU_MMCR + 8(r4) ld r6, VCPU_MMCR + 16(r4) @@ -837,14 +721,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) ld r30, VCPU_GPR(R30)(r4) ld r31, VCPU_GPR(R31)(r4) -BEGIN_FTR_SECTION /* Switch DSCR to guest value */ ld r5, VCPU_DSCR(r4) mtspr SPRN_DSCR, r5 -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) BEGIN_FTR_SECTION - /* Skip next section on POWER7 or PPC970 */ + /* Skip next section on POWER7 */ b 8f END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) /* Turn on TM so we can access TFHAR/TFIAR/TEXASR */ @@ -920,7 +802,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) mtspr SPRN_DAR, r5 mtspr SPRN_DSISR, r6 -BEGIN_FTR_SECTION /* Restore AMR and UAMOR, set AMOR to all 1s */ ld r5,VCPU_AMR(r4) ld r6,VCPU_UAMOR(r4) @@ -928,7 +809,6 @@ BEGIN_FTR_SECTION mtspr SPRN_AMR,r5 mtspr SPRN_UAMOR,r6 mtspr SPRN_AMOR,r7 -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) /* Restore state of CTRL run bit; assume 1 on entry */ lwz r5,VCPU_CTRL(r4) @@ -963,13 +843,11 @@ deliver_guest_interrupt: rldicl r0, r0, 64 - BOOK3S_IRQPRIO_EXTERNAL_LEVEL, 63 cmpdi cr1, r0, 0 andi. r8, r11, MSR_EE -BEGIN_FTR_SECTION mfspr r8, SPRN_LPCR /* Insert EXTERNAL_LEVEL bit into LPCR at the MER bit position */ rldimi r8, r0, LPCR_MER_SH, 63 - LPCR_MER_SH mtspr SPRN_LPCR, r8 isync -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) beq 5f li r0, BOOK3S_INTERRUPT_EXTERNAL bne cr1, 12f @@ -1124,15 +1002,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) stw r12,VCPU_TRAP(r9) - /* Save HEIR (HV emulation assist reg) in last_inst + /* Save HEIR (HV emulation assist reg) in emul_inst if this is an HEI (HV emulation interrupt, e40) */ li r3,KVM_INST_FETCH_FAILED -BEGIN_FTR_SECTION cmpwi r12,BOOK3S_INTERRUPT_H_EMUL_ASSIST bne 11f mfspr r3,SPRN_HEIR -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) -11: stw r3,VCPU_LAST_INST(r9) +11: stw r3,VCPU_HEIR(r9) /* these are volatile across C function calls */ mfctr r3 @@ -1140,13 +1016,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) std r3, VCPU_CTR(r9) stw r4, VCPU_XER(r9) -BEGIN_FTR_SECTION /* If this is a page table miss then see if it's theirs or ours */ cmpwi r12, BOOK3S_INTERRUPT_H_DATA_STORAGE beq kvmppc_hdsi cmpwi r12, BOOK3S_INTERRUPT_H_INST_STORAGE beq kvmppc_hisi -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) /* See if this is a leftover HDEC interrupt */ cmpwi r12,BOOK3S_INTERRUPT_HV_DECREMENTER @@ -1159,11 +1033,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) cmpwi r12,BOOK3S_INTERRUPT_SYSCALL beq hcall_try_real_mode - /* Only handle external interrupts here on arch 206 and later */ -BEGIN_FTR_SECTION - b ext_interrupt_to_host -END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206) - /* External interrupt ? */ cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL bne+ ext_interrupt_to_host @@ -1193,11 +1062,9 @@ guest_exit_cont: /* r9 = vcpu, r12 = trap, r13 = paca */ mfdsisr r7 std r6, VCPU_DAR(r9) stw r7, VCPU_DSISR(r9) -BEGIN_FTR_SECTION /* don't overwrite fault_dar/fault_dsisr if HDSI */ cmpwi r12,BOOK3S_INTERRUPT_H_DATA_STORAGE beq 6f -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) std r6, VCPU_FAULT_DAR(r9) stw r7, VCPU_FAULT_DSISR(r9) @@ -1236,7 +1103,6 @@ mc_cont: /* * Save the guest PURR/SPURR */ -BEGIN_FTR_SECTION mfspr r5,SPRN_PURR mfspr r6,SPRN_SPURR ld r7,VCPU_PURR(r9) @@ -1256,7 +1122,6 @@ BEGIN_FTR_SECTION add r4,r4,r6 mtspr SPRN_PURR,r3 mtspr SPRN_SPURR,r4 -END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_201) /* Save DEC */ mfspr r5,SPRN_DEC @@ -1306,22 +1171,18 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) 8: /* Save and reset AMR and UAMOR before turning on the MMU */ -BEGIN_FTR_SECTION mfspr r5,SPRN_AMR mfspr r6,SPRN_UAMOR std r5,VCPU_AMR(r9) std r6,VCPU_UAMOR(r9) li r6,0 mtspr SPRN_AMR,r6 -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) /* Switch DSCR back to host value */ -BEGIN_FTR_SECTION mfspr r8, SPRN_DSCR ld r7, HSTATE_DSCR(r13) std r8, VCPU_DSCR(r9) mtspr SPRN_DSCR, r7 -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) /* Save non-volatile GPRs */ std r14, VCPU_GPR(R14)(r9) @@ -1503,11 +1364,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) mfspr r4, SPRN_MMCR0 /* save MMCR0 */ mtspr SPRN_MMCR0, r3 /* freeze all counters, disable ints */ mfspr r6, SPRN_MMCRA -BEGIN_FTR_SECTION - /* On P7, clear MMCRA in order to disable SDAR updates */ + /* Clear MMCRA in order to disable SDAR updates */ li r7, 0 mtspr SPRN_MMCRA, r7 -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) isync beq 21f /* if no VPA, save PMU stuff anyway */ lbz r7, LPPACA_PMCINUSE(r8) @@ -1532,10 +1391,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) mfspr r6, SPRN_PMC4 mfspr r7, SPRN_PMC5 mfspr r8, SPRN_PMC6 -BEGIN_FTR_SECTION - mfspr r10, SPRN_PMC7 - mfspr r11, SPRN_PMC8 -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) stw r3, VCPU_PMC(r9) stw r4, VCPU_PMC + 4(r9) stw r5, VCPU_PMC + 8(r9) @@ -1543,10 +1398,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) stw r7, VCPU_PMC + 16(r9) stw r8, VCPU_PMC + 20(r9) BEGIN_FTR_SECTION - stw r10, VCPU_PMC + 24(r9) - stw r11, VCPU_PMC + 28(r9) -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) -BEGIN_FTR_SECTION mfspr r5, SPRN_SIER mfspr r6, SPRN_SPMC1 mfspr r7, SPRN_SPMC2 @@ -1566,11 +1417,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) ptesync hdec_soon: /* r12 = trap, r13 = paca */ -BEGIN_FTR_SECTION - b 32f -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) /* - * POWER7 guest -> host partition switch code. + * POWER7/POWER8 guest -> host partition switch code. * We don't have to lock against tlbies but we do * have to coordinate the hardware threads. */ @@ -1698,87 +1546,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) 16: ld r8,KVM_HOST_LPCR(r4) mtspr SPRN_LPCR,r8 isync - b 33f - - /* - * PPC970 guest -> host partition switch code. - * We have to lock against concurrent tlbies, and - * we have to flush the whole TLB. - */ -32: ld r5,HSTATE_KVM_VCORE(r13) - ld r4,VCORE_KVM(r5) /* pointer to struct kvm */ - - /* Take the guest's tlbie_lock */ -#ifdef __BIG_ENDIAN__ - lwz r8,PACA_LOCK_TOKEN(r13) -#else - lwz r8,PACAPACAINDEX(r13) -#endif - addi r3,r4,KVM_TLBIE_LOCK -24: lwarx r0,0,r3 - cmpwi r0,0 - bne 24b - stwcx. r8,0,r3 - bne 24b - isync - - ld r7,KVM_HOST_LPCR(r4) /* use kvm->arch.host_lpcr for HID4 */ - li r0,0x18f - rotldi r0,r0,HID4_LPID5_SH /* all lpid bits in HID4 = 1 */ - or r0,r7,r0 - ptesync - sync - mtspr SPRN_HID4,r0 /* switch to reserved LPID */ - isync - li r0,0 - stw r0,0(r3) /* drop guest tlbie_lock */ - - /* invalidate the whole TLB */ - li r0,256 - mtctr r0 - li r6,0 -25: tlbiel r6 - addi r6,r6,0x1000 - bdnz 25b - ptesync - - /* take native_tlbie_lock */ - ld r3,toc_tlbie_lock@toc(2) -24: lwarx r0,0,r3 - cmpwi r0,0 - bne 24b - stwcx. r8,0,r3 - bne 24b - isync - - ld r6,KVM_HOST_SDR1(r4) - mtspr SPRN_SDR1,r6 /* switch to host page table */ - - /* Set up host HID4 value */ - sync - mtspr SPRN_HID4,r7 - isync - li r0,0 - stw r0,0(r3) /* drop native_tlbie_lock */ - - lis r8,0x7fff /* MAX_INT@h */ - mtspr SPRN_HDEC,r8 - - /* Disable HDEC interrupts */ - mfspr r0,SPRN_HID0 - li r3,0 - rldimi r0,r3, HID0_HDICE_SH, 64-HID0_HDICE_SH-1 - sync - mtspr SPRN_HID0,r0 - mfspr r0,SPRN_HID0 - mfspr r0,SPRN_HID0 - mfspr r0,SPRN_HID0 - mfspr r0,SPRN_HID0 - mfspr r0,SPRN_HID0 - mfspr r0,SPRN_HID0 /* load host SLB entries */ -33: ld r8,PACA_SLBSHADOWPTR(r13) + ld r8,PACA_SLBSHADOWPTR(r13) .rept SLB_NUM_BOLTED li r3, SLBSHADOW_SAVEAREA @@ -2047,7 +1817,7 @@ hcall_real_table: .long 0 /* 0xd8 */ .long 0 /* 0xdc */ .long DOTSYM(kvmppc_h_cede) - hcall_real_table - .long 0 /* 0xe4 */ + .long DOTSYM(kvmppc_rm_h_confer) - hcall_real_table .long 0 /* 0xe8 */ .long 0 /* 0xec */ .long 0 /* 0xf0 */ @@ -2126,9 +1896,6 @@ _GLOBAL(kvmppc_h_cede) stw r0,VCPU_TRAP(r3) li r0,H_SUCCESS std r0,VCPU_GPR(R3)(r3) -BEGIN_FTR_SECTION - b kvm_cede_exit /* just send it up to host on 970 */ -END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206) /* * Set our bit in the bitmask of napping threads unless all the @@ -2455,7 +2222,6 @@ BEGIN_FTR_SECTION END_FTR_SECTION_IFSET(CPU_FTR_VSX) #endif mtmsrd r8 - isync addi r3,r3,VCPU_FPRS bl store_fp_state #ifdef CONFIG_ALTIVEC @@ -2491,7 +2257,6 @@ BEGIN_FTR_SECTION END_FTR_SECTION_IFSET(CPU_FTR_VSX) #endif mtmsrd r8 - isync addi r3,r4,VCPU_FPRS bl load_fp_state #ifdef CONFIG_ALTIVEC diff --git a/arch/powerpc/kvm/book3s_paired_singles.c b/arch/powerpc/kvm/book3s_paired_singles.c index bfb8035314e..bd6ab1672ae 100644 --- a/arch/powerpc/kvm/book3s_paired_singles.c +++ b/arch/powerpc/kvm/book3s_paired_singles.c @@ -352,14 +352,6 @@ static inline u32 inst_get_field(u32 inst, int msb, int lsb) return kvmppc_get_field(inst, msb + 32, lsb + 32); } -/* - * Replaces inst bits with ordering according to spec. - */ -static inline u32 inst_set_field(u32 inst, int msb, int lsb, int value) -{ - return kvmppc_set_field(inst, msb + 32, lsb + 32, value); -} - bool kvmppc_inst_is_paired_single(struct kvm_vcpu *vcpu, u32 inst) { if (!(vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE)) diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index cf2eb16846d..f57383941d0 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -644,11 +644,6 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu, return r; } -static inline int get_fpr_index(int i) -{ - return i * TS_FPRWIDTH; -} - /* Give up external provider (FPU, Altivec, VSX) */ void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr) { diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c index eaeb78047fb..807351f76f8 100644 --- a/arch/powerpc/kvm/book3s_xics.c +++ b/arch/powerpc/kvm/book3s_xics.c @@ -613,10 +613,25 @@ static noinline int kvmppc_h_ipi(struct kvm_vcpu *vcpu, unsigned long server, * there might be a previously-rejected interrupt needing * to be resent. * + * ICP state: Check_IPI + * * If the CPPR is less favored, then we might be replacing - * an interrupt, and thus need to possibly reject it as in + * an interrupt, and thus need to possibly reject it. * - * ICP state: Check_IPI + * ICP State: IPI + * + * Besides rejecting any pending interrupts, we also + * update XISR and pending_pri to mark IPI as pending. + * + * PAPR does not describe this state, but if the MFRR is being + * made less favored than its earlier value, there might be + * a previously-rejected interrupt needing to be resent. + * Ideally, we would want to resend only if + * prio(pending_interrupt) < mfrr && + * prio(pending_interrupt) < cppr + * where pending interrupt is the one that was rejected. But + * we don't have that state, so we simply trigger a resend + * whenever the MFRR is made less favored. */ do { old_state = new_state = ACCESS_ONCE(icp->state); @@ -629,13 +644,14 @@ static noinline int kvmppc_h_ipi(struct kvm_vcpu *vcpu, unsigned long server, resend = false; if (mfrr < new_state.cppr) { /* Reject a pending interrupt if not an IPI */ - if (mfrr <= new_state.pending_pri) + if (mfrr <= new_state.pending_pri) { reject = new_state.xisr; - new_state.pending_pri = mfrr; - new_state.xisr = XICS_IPI; + new_state.pending_pri = mfrr; + new_state.xisr = XICS_IPI; + } } - if (mfrr > old_state.mfrr && mfrr > new_state.cppr) { + if (mfrr > old_state.mfrr) { resend = new_state.need_resend; new_state.need_resend = 0; } @@ -789,7 +805,7 @@ static noinline int kvmppc_xics_rm_complete(struct kvm_vcpu *vcpu, u32 hcall) if (icp->rm_action & XICS_RM_KICK_VCPU) kvmppc_fast_vcpu_kick(icp->rm_kick_target); if (icp->rm_action & XICS_RM_CHECK_RESEND) - icp_check_resend(xics, icp); + icp_check_resend(xics, icp->rm_resend_icp); if (icp->rm_action & XICS_RM_REJECT) icp_deliver_irq(xics, icp, icp->rm_reject); if (icp->rm_action & XICS_RM_NOTIFY_EOI) diff --git a/arch/powerpc/kvm/book3s_xics.h b/arch/powerpc/kvm/book3s_xics.h index e8aaa7a3f20..73f0f2723c0 100644 --- a/arch/powerpc/kvm/book3s_xics.h +++ b/arch/powerpc/kvm/book3s_xics.h @@ -74,6 +74,7 @@ struct kvmppc_icp { #define XICS_RM_NOTIFY_EOI 0x8 u32 rm_action; struct kvm_vcpu *rm_kick_target; + struct kvmppc_icp *rm_resend_icp; u32 rm_reject; u32 rm_eoied_irq; diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c index 16095841afe..b29ce752c7d 100644 --- a/arch/powerpc/kvm/e500.c +++ b/arch/powerpc/kvm/e500.c @@ -78,7 +78,7 @@ static inline int local_sid_setup_one(struct id *entry) sid = __this_cpu_inc_return(pcpu_last_used_sid); if (sid < NUM_TIDS) { - __this_cpu_write(pcpu_sids)entry[sid], entry); + __this_cpu_write(pcpu_sids.entry[sid], entry); entry->val = sid; entry->pentry = this_cpu_ptr(&pcpu_sids.entry[sid]); ret = sid; @@ -299,14 +299,6 @@ void kvmppc_mmu_msr_notify(struct kvm_vcpu *vcpu, u32 old_msr) kvmppc_e500_recalc_shadow_pid(to_e500(vcpu)); } -void kvmppc_core_load_host_debugstate(struct kvm_vcpu *vcpu) -{ -} - -void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu) -{ -} - static void kvmppc_core_vcpu_load_e500(struct kvm_vcpu *vcpu, int cpu) { kvmppc_booke_vcpu_load(vcpu, cpu); diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index c1f8f53cd31..c45eaab752b 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -527,18 +527,12 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) r = 0; break; case KVM_CAP_PPC_RMA: - r = hv_enabled; - /* PPC970 requires an RMA */ - if (r && cpu_has_feature(CPU_FTR_ARCH_201)) - r = 2; + r = 0; break; #endif case KVM_CAP_SYNC_MMU: #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE - if (hv_enabled) - r = cpu_has_feature(CPU_FTR_ARCH_206) ? 1 : 0; - else - r = 0; + r = hv_enabled; #elif defined(KVM_ARCH_WANT_MMU_NOTIFIER) r = 1; #else diff --git a/arch/powerpc/kvm/trace_book3s.h b/arch/powerpc/kvm/trace_book3s.h new file mode 100644 index 00000000000..f647ce0f428 --- /dev/null +++ b/arch/powerpc/kvm/trace_book3s.h @@ -0,0 +1,32 @@ +#if !defined(_TRACE_KVM_BOOK3S_H) +#define _TRACE_KVM_BOOK3S_H + +/* + * Common defines used by the trace macros in trace_pr.h and trace_hv.h + */ + +#define kvm_trace_symbol_exit \ + {0x100, "SYSTEM_RESET"}, \ + {0x200, "MACHINE_CHECK"}, \ + {0x300, "DATA_STORAGE"}, \ + {0x380, "DATA_SEGMENT"}, \ + {0x400, "INST_STORAGE"}, \ + {0x480, "INST_SEGMENT"}, \ + {0x500, "EXTERNAL"}, \ + {0x501, "EXTERNAL_LEVEL"}, \ + {0x502, "EXTERNAL_HV"}, \ + {0x600, "ALIGNMENT"}, \ + {0x700, "PROGRAM"}, \ + {0x800, "FP_UNAVAIL"}, \ + {0x900, "DECREMENTER"}, \ + {0x980, "HV_DECREMENTER"}, \ + {0xc00, "SYSCALL"}, \ + {0xd00, "TRACE"}, \ + {0xe00, "H_DATA_STORAGE"}, \ + {0xe20, "H_INST_STORAGE"}, \ + {0xe40, "H_EMUL_ASSIST"}, \ + {0xf00, "PERFMON"}, \ + {0xf20, "ALTIVEC"}, \ + {0xf40, "VSX"} + +#endif diff --git a/arch/powerpc/kvm/trace_booke.h b/arch/powerpc/kvm/trace_booke.h index f7537cf26ce..7ec534d1db9 100644 --- a/arch/powerpc/kvm/trace_booke.h +++ b/arch/powerpc/kvm/trace_booke.h @@ -151,6 +151,47 @@ TRACE_EVENT(kvm_booke206_ref_release, __entry->pfn, __entry->flags) ); +#ifdef CONFIG_SPE_POSSIBLE +#define kvm_trace_symbol_irqprio_spe \ + {BOOKE_IRQPRIO_SPE_UNAVAIL, "SPE_UNAVAIL"}, \ + {BOOKE_IRQPRIO_SPE_FP_DATA, "SPE_FP_DATA"}, \ + {BOOKE_IRQPRIO_SPE_FP_ROUND, "SPE_FP_ROUND"}, +#else +#define kvm_trace_symbol_irqprio_spe +#endif + +#ifdef CONFIG_PPC_E500MC +#define kvm_trace_symbol_irqprio_e500mc \ + {BOOKE_IRQPRIO_ALTIVEC_UNAVAIL, "ALTIVEC_UNAVAIL"}, \ + {BOOKE_IRQPRIO_ALTIVEC_ASSIST, "ALTIVEC_ASSIST"}, +#else +#define kvm_trace_symbol_irqprio_e500mc +#endif + +#define kvm_trace_symbol_irqprio \ + kvm_trace_symbol_irqprio_spe \ + kvm_trace_symbol_irqprio_e500mc \ + {BOOKE_IRQPRIO_DATA_STORAGE, "DATA_STORAGE"}, \ + {BOOKE_IRQPRIO_INST_STORAGE, "INST_STORAGE"}, \ + {BOOKE_IRQPRIO_ALIGNMENT, "ALIGNMENT"}, \ + {BOOKE_IRQPRIO_PROGRAM, "PROGRAM"}, \ + {BOOKE_IRQPRIO_FP_UNAVAIL, "FP_UNAVAIL"}, \ + {BOOKE_IRQPRIO_SYSCALL, "SYSCALL"}, \ + {BOOKE_IRQPRIO_AP_UNAVAIL, "AP_UNAVAIL"}, \ + {BOOKE_IRQPRIO_DTLB_MISS, "DTLB_MISS"}, \ + {BOOKE_IRQPRIO_ITLB_MISS, "ITLB_MISS"}, \ + {BOOKE_IRQPRIO_MACHINE_CHECK, "MACHINE_CHECK"}, \ + {BOOKE_IRQPRIO_DEBUG, "DEBUG"}, \ + {BOOKE_IRQPRIO_CRITICAL, "CRITICAL"}, \ + {BOOKE_IRQPRIO_WATCHDOG, "WATCHDOG"}, \ + {BOOKE_IRQPRIO_EXTERNAL, "EXTERNAL"}, \ + {BOOKE_IRQPRIO_FIT, "FIT"}, \ + {BOOKE_IRQPRIO_DECREMENTER, "DECREMENTER"}, \ + {BOOKE_IRQPRIO_PERFORMANCE_MONITOR, "PERFORMANCE_MONITOR"}, \ + {BOOKE_IRQPRIO_EXTERNAL_LEVEL, "EXTERNAL_LEVEL"}, \ + {BOOKE_IRQPRIO_DBELL, "DBELL"}, \ + {BOOKE_IRQPRIO_DBELL_CRIT, "DBELL_CRIT"} \ + TRACE_EVENT(kvm_booke_queue_irqprio, TP_PROTO(struct kvm_vcpu *vcpu, unsigned int priority), TP_ARGS(vcpu, priority), @@ -167,8 +208,10 @@ TRACE_EVENT(kvm_booke_queue_irqprio, __entry->pending = vcpu->arch.pending_exceptions; ), - TP_printk("vcpu=%x prio=%x pending=%lx", - __entry->cpu_nr, __entry->priority, __entry->pending) + TP_printk("vcpu=%x prio=%s pending=%lx", + __entry->cpu_nr, + __print_symbolic(__entry->priority, kvm_trace_symbol_irqprio), + __entry->pending) ); #endif diff --git a/arch/powerpc/kvm/trace_hv.h b/arch/powerpc/kvm/trace_hv.h new file mode 100644 index 00000000000..33d9daff578 --- /dev/null +++ b/arch/powerpc/kvm/trace_hv.h @@ -0,0 +1,477 @@ +#if !defined(_TRACE_KVM_HV_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_KVM_HV_H + +#include <linux/tracepoint.h> +#include "trace_book3s.h" +#include <asm/hvcall.h> +#include <asm/kvm_asm.h> + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM kvm_hv +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE trace_hv + +#define kvm_trace_symbol_hcall \ + {H_REMOVE, "H_REMOVE"}, \ + {H_ENTER, "H_ENTER"}, \ + {H_READ, "H_READ"}, \ + {H_CLEAR_MOD, "H_CLEAR_MOD"}, \ + {H_CLEAR_REF, "H_CLEAR_REF"}, \ + {H_PROTECT, "H_PROTECT"}, \ + {H_GET_TCE, "H_GET_TCE"}, \ + {H_PUT_TCE, "H_PUT_TCE"}, \ + {H_SET_SPRG0, "H_SET_SPRG0"}, \ + {H_SET_DABR, "H_SET_DABR"}, \ + {H_PAGE_INIT, "H_PAGE_INIT"}, \ + {H_SET_ASR, "H_SET_ASR"}, \ + {H_ASR_ON, "H_ASR_ON"}, \ + {H_ASR_OFF, "H_ASR_OFF"}, \ + {H_LOGICAL_CI_LOAD, "H_LOGICAL_CI_LOAD"}, \ + {H_LOGICAL_CI_STORE, "H_LOGICAL_CI_STORE"}, \ + {H_LOGICAL_CACHE_LOAD, "H_LOGICAL_CACHE_LOAD"}, \ + {H_LOGICAL_CACHE_STORE, "H_LOGICAL_CACHE_STORE"}, \ + {H_LOGICAL_ICBI, "H_LOGICAL_ICBI"}, \ + {H_LOGICAL_DCBF, "H_LOGICAL_DCBF"}, \ + {H_GET_TERM_CHAR, "H_GET_TERM_CHAR"}, \ + {H_PUT_TERM_CHAR, "H_PUT_TERM_CHAR"}, \ + {H_REAL_TO_LOGICAL, "H_REAL_TO_LOGICAL"}, \ + {H_HYPERVISOR_DATA, "H_HYPERVISOR_DATA"}, \ + {H_EOI, "H_EOI"}, \ + {H_CPPR, "H_CPPR"}, \ + {H_IPI, "H_IPI"}, \ + {H_IPOLL, "H_IPOLL"}, \ + {H_XIRR, "H_XIRR"}, \ + {H_PERFMON, "H_PERFMON"}, \ + {H_MIGRATE_DMA, "H_MIGRATE_DMA"}, \ + {H_REGISTER_VPA, "H_REGISTER_VPA"}, \ + {H_CEDE, "H_CEDE"}, \ + {H_CONFER, "H_CONFER"}, \ + {H_PROD, "H_PROD"}, \ + {H_GET_PPP, "H_GET_PPP"}, \ + {H_SET_PPP, "H_SET_PPP"}, \ + {H_PURR, "H_PURR"}, \ + {H_PIC, "H_PIC"}, \ + {H_REG_CRQ, "H_REG_CRQ"}, \ + {H_FREE_CRQ, "H_FREE_CRQ"}, \ + {H_VIO_SIGNAL, "H_VIO_SIGNAL"}, \ + {H_SEND_CRQ, "H_SEND_CRQ"}, \ + {H_COPY_RDMA, "H_COPY_RDMA"}, \ + {H_REGISTER_LOGICAL_LAN, "H_REGISTER_LOGICAL_LAN"}, \ + {H_FREE_LOGICAL_LAN, "H_FREE_LOGICAL_LAN"}, \ + {H_ADD_LOGICAL_LAN_BUFFER, "H_ADD_LOGICAL_LAN_BUFFER"}, \ + {H_SEND_LOGICAL_LAN, "H_SEND_LOGICAL_LAN"}, \ + {H_BULK_REMOVE, "H_BULK_REMOVE"}, \ + {H_MULTICAST_CTRL, "H_MULTICAST_CTRL"}, \ + {H_SET_XDABR, "H_SET_XDABR"}, \ + {H_STUFF_TCE, "H_STUFF_TCE"}, \ + {H_PUT_TCE_INDIRECT, "H_PUT_TCE_INDIRECT"}, \ + {H_CHANGE_LOGICAL_LAN_MAC, "H_CHANGE_LOGICAL_LAN_MAC"}, \ + {H_VTERM_PARTNER_INFO, "H_VTERM_PARTNER_INFO"}, \ + {H_REGISTER_VTERM, "H_REGISTER_VTERM"}, \ + {H_FREE_VTERM, "H_FREE_VTERM"}, \ + {H_RESET_EVENTS, "H_RESET_EVENTS"}, \ + {H_ALLOC_RESOURCE, "H_ALLOC_RESOURCE"}, \ + {H_FREE_RESOURCE, "H_FREE_RESOURCE"}, \ + {H_MODIFY_QP, "H_MODIFY_QP"}, \ + {H_QUERY_QP, "H_QUERY_QP"}, \ + {H_REREGISTER_PMR, "H_REREGISTER_PMR"}, \ + {H_REGISTER_SMR, "H_REGISTER_SMR"}, \ + {H_QUERY_MR, "H_QUERY_MR"}, \ + {H_QUERY_MW, "H_QUERY_MW"}, \ + {H_QUERY_HCA, "H_QUERY_HCA"}, \ + {H_QUERY_PORT, "H_QUERY_PORT"}, \ + {H_MODIFY_PORT, "H_MODIFY_PORT"}, \ + {H_DEFINE_AQP1, "H_DEFINE_AQP1"}, \ + {H_GET_TRACE_BUFFER, "H_GET_TRACE_BUFFER"}, \ + {H_DEFINE_AQP0, "H_DEFINE_AQP0"}, \ + {H_RESIZE_MR, "H_RESIZE_MR"}, \ + {H_ATTACH_MCQP, "H_ATTACH_MCQP"}, \ + {H_DETACH_MCQP, "H_DETACH_MCQP"}, \ + {H_CREATE_RPT, "H_CREATE_RPT"}, \ + {H_REMOVE_RPT, "H_REMOVE_RPT"}, \ + {H_REGISTER_RPAGES, "H_REGISTER_RPAGES"}, \ + {H_DISABLE_AND_GETC, "H_DISABLE_AND_GETC"}, \ + {H_ERROR_DATA, "H_ERROR_DATA"}, \ + {H_GET_HCA_INFO, "H_GET_HCA_INFO"}, \ + {H_GET_PERF_COUNT, "H_GET_PERF_COUNT"}, \ + {H_MANAGE_TRACE, "H_MANAGE_TRACE"}, \ + {H_FREE_LOGICAL_LAN_BUFFER, "H_FREE_LOGICAL_LAN_BUFFER"}, \ + {H_QUERY_INT_STATE, "H_QUERY_INT_STATE"}, \ + {H_POLL_PENDING, "H_POLL_PENDING"}, \ + {H_ILLAN_ATTRIBUTES, "H_ILLAN_ATTRIBUTES"}, \ + {H_MODIFY_HEA_QP, "H_MODIFY_HEA_QP"}, \ + {H_QUERY_HEA_QP, "H_QUERY_HEA_QP"}, \ + {H_QUERY_HEA, "H_QUERY_HEA"}, \ + {H_QUERY_HEA_PORT, "H_QUERY_HEA_PORT"}, \ + {H_MODIFY_HEA_PORT, "H_MODIFY_HEA_PORT"}, \ + {H_REG_BCMC, "H_REG_BCMC"}, \ + {H_DEREG_BCMC, "H_DEREG_BCMC"}, \ + {H_REGISTER_HEA_RPAGES, "H_REGISTER_HEA_RPAGES"}, \ + {H_DISABLE_AND_GET_HEA, "H_DISABLE_AND_GET_HEA"}, \ + {H_GET_HEA_INFO, "H_GET_HEA_INFO"}, \ + {H_ALLOC_HEA_RESOURCE, "H_ALLOC_HEA_RESOURCE"}, \ + {H_ADD_CONN, "H_ADD_CONN"}, \ + {H_DEL_CONN, "H_DEL_CONN"}, \ + {H_JOIN, "H_JOIN"}, \ + {H_VASI_STATE, "H_VASI_STATE"}, \ + {H_ENABLE_CRQ, "H_ENABLE_CRQ"}, \ + {H_GET_EM_PARMS, "H_GET_EM_PARMS"}, \ + {H_SET_MPP, "H_SET_MPP"}, \ + {H_GET_MPP, "H_GET_MPP"}, \ + {H_HOME_NODE_ASSOCIATIVITY, "H_HOME_NODE_ASSOCIATIVITY"}, \ + {H_BEST_ENERGY, "H_BEST_ENERGY"}, \ + {H_XIRR_X, "H_XIRR_X"}, \ + {H_RANDOM, "H_RANDOM"}, \ + {H_COP, "H_COP"}, \ + {H_GET_MPP_X, "H_GET_MPP_X"}, \ + {H_SET_MODE, "H_SET_MODE"}, \ + {H_RTAS, "H_RTAS"} + +#define kvm_trace_symbol_kvmret \ + {RESUME_GUEST, "RESUME_GUEST"}, \ + {RESUME_GUEST_NV, "RESUME_GUEST_NV"}, \ + {RESUME_HOST, "RESUME_HOST"}, \ + {RESUME_HOST_NV, "RESUME_HOST_NV"} + +#define kvm_trace_symbol_hcall_rc \ + {H_SUCCESS, "H_SUCCESS"}, \ + {H_BUSY, "H_BUSY"}, \ + {H_CLOSED, "H_CLOSED"}, \ + {H_NOT_AVAILABLE, "H_NOT_AVAILABLE"}, \ + {H_CONSTRAINED, "H_CONSTRAINED"}, \ + {H_PARTIAL, "H_PARTIAL"}, \ + {H_IN_PROGRESS, "H_IN_PROGRESS"}, \ + {H_PAGE_REGISTERED, "H_PAGE_REGISTERED"}, \ + {H_PARTIAL_STORE, "H_PARTIAL_STORE"}, \ + {H_PENDING, "H_PENDING"}, \ + {H_CONTINUE, "H_CONTINUE"}, \ + {H_LONG_BUSY_START_RANGE, "H_LONG_BUSY_START_RANGE"}, \ + {H_LONG_BUSY_ORDER_1_MSEC, "H_LONG_BUSY_ORDER_1_MSEC"}, \ + {H_LONG_BUSY_ORDER_10_MSEC, "H_LONG_BUSY_ORDER_10_MSEC"}, \ + {H_LONG_BUSY_ORDER_100_MSEC, "H_LONG_BUSY_ORDER_100_MSEC"}, \ + {H_LONG_BUSY_ORDER_1_SEC, "H_LONG_BUSY_ORDER_1_SEC"}, \ + {H_LONG_BUSY_ORDER_10_SEC, "H_LONG_BUSY_ORDER_10_SEC"}, \ + {H_LONG_BUSY_ORDER_100_SEC, "H_LONG_BUSY_ORDER_100_SEC"}, \ + {H_LONG_BUSY_END_RANGE, "H_LONG_BUSY_END_RANGE"}, \ + {H_TOO_HARD, "H_TOO_HARD"}, \ + {H_HARDWARE, "H_HARDWARE"}, \ + {H_FUNCTION, "H_FUNCTION"}, \ + {H_PRIVILEGE, "H_PRIVILEGE"}, \ + {H_PARAMETER, "H_PARAMETER"}, \ + {H_BAD_MODE, "H_BAD_MODE"}, \ + {H_PTEG_FULL, "H_PTEG_FULL"}, \ + {H_NOT_FOUND, "H_NOT_FOUND"}, \ + {H_RESERVED_DABR, "H_RESERVED_DABR"}, \ + {H_NO_MEM, "H_NO_MEM"}, \ + {H_AUTHORITY, "H_AUTHORITY"}, \ + {H_PERMISSION, "H_PERMISSION"}, \ + {H_DROPPED, "H_DROPPED"}, \ + {H_SOURCE_PARM, "H_SOURCE_PARM"}, \ + {H_DEST_PARM, "H_DEST_PARM"}, \ + {H_REMOTE_PARM, "H_REMOTE_PARM"}, \ + {H_RESOURCE, "H_RESOURCE"}, \ + {H_ADAPTER_PARM, "H_ADAPTER_PARM"}, \ + {H_RH_PARM, "H_RH_PARM"}, \ + {H_RCQ_PARM, "H_RCQ_PARM"}, \ + {H_SCQ_PARM, "H_SCQ_PARM"}, \ + {H_EQ_PARM, "H_EQ_PARM"}, \ + {H_RT_PARM, "H_RT_PARM"}, \ + {H_ST_PARM, "H_ST_PARM"}, \ + {H_SIGT_PARM, "H_SIGT_PARM"}, \ + {H_TOKEN_PARM, "H_TOKEN_PARM"}, \ + {H_MLENGTH_PARM, "H_MLENGTH_PARM"}, \ + {H_MEM_PARM, "H_MEM_PARM"}, \ + {H_MEM_ACCESS_PARM, "H_MEM_ACCESS_PARM"}, \ + {H_ATTR_PARM, "H_ATTR_PARM"}, \ + {H_PORT_PARM, "H_PORT_PARM"}, \ + {H_MCG_PARM, "H_MCG_PARM"}, \ + {H_VL_PARM, "H_VL_PARM"}, \ + {H_TSIZE_PARM, "H_TSIZE_PARM"}, \ + {H_TRACE_PARM, "H_TRACE_PARM"}, \ + {H_MASK_PARM, "H_MASK_PARM"}, \ + {H_MCG_FULL, "H_MCG_FULL"}, \ + {H_ALIAS_EXIST, "H_ALIAS_EXIST"}, \ + {H_P_COUNTER, "H_P_COUNTER"}, \ + {H_TABLE_FULL, "H_TABLE_FULL"}, \ + {H_ALT_TABLE, "H_ALT_TABLE"}, \ + {H_MR_CONDITION, "H_MR_CONDITION"}, \ + {H_NOT_ENOUGH_RESOURCES, "H_NOT_ENOUGH_RESOURCES"}, \ + {H_R_STATE, "H_R_STATE"}, \ + {H_RESCINDED, "H_RESCINDED"}, \ + {H_P2, "H_P2"}, \ + {H_P3, "H_P3"}, \ + {H_P4, "H_P4"}, \ + {H_P5, "H_P5"}, \ + {H_P6, "H_P6"}, \ + {H_P7, "H_P7"}, \ + {H_P8, "H_P8"}, \ + {H_P9, "H_P9"}, \ + {H_TOO_BIG, "H_TOO_BIG"}, \ + {H_OVERLAP, "H_OVERLAP"}, \ + {H_INTERRUPT, "H_INTERRUPT"}, \ + {H_BAD_DATA, "H_BAD_DATA"}, \ + {H_NOT_ACTIVE, "H_NOT_ACTIVE"}, \ + {H_SG_LIST, "H_SG_LIST"}, \ + {H_OP_MODE, "H_OP_MODE"}, \ + {H_COP_HW, "H_COP_HW"}, \ + {H_UNSUPPORTED_FLAG_START, "H_UNSUPPORTED_FLAG_START"}, \ + {H_UNSUPPORTED_FLAG_END, "H_UNSUPPORTED_FLAG_END"}, \ + {H_MULTI_THREADS_ACTIVE, "H_MULTI_THREADS_ACTIVE"}, \ + {H_OUTSTANDING_COP_OPS, "H_OUTSTANDING_COP_OPS"} + +TRACE_EVENT(kvm_guest_enter, + TP_PROTO(struct kvm_vcpu *vcpu), + TP_ARGS(vcpu), + + TP_STRUCT__entry( + __field(int, vcpu_id) + __field(unsigned long, pc) + __field(unsigned long, pending_exceptions) + __field(u8, ceded) + ), + + TP_fast_assign( + __entry->vcpu_id = vcpu->vcpu_id; + __entry->pc = kvmppc_get_pc(vcpu); + __entry->ceded = vcpu->arch.ceded; + __entry->pending_exceptions = vcpu->arch.pending_exceptions; + ), + + TP_printk("VCPU %d: pc=0x%lx pexcp=0x%lx ceded=%d", + __entry->vcpu_id, + __entry->pc, + __entry->pending_exceptions, __entry->ceded) +); + +TRACE_EVENT(kvm_guest_exit, + TP_PROTO(struct kvm_vcpu *vcpu), + TP_ARGS(vcpu), + + TP_STRUCT__entry( + __field(int, vcpu_id) + __field(int, trap) + __field(unsigned long, pc) + __field(unsigned long, msr) + __field(u8, ceded) + ), + + TP_fast_assign( + __entry->vcpu_id = vcpu->vcpu_id; + __entry->trap = vcpu->arch.trap; + __entry->ceded = vcpu->arch.ceded; + __entry->pc = kvmppc_get_pc(vcpu); + __entry->msr = vcpu->arch.shregs.msr; + ), + + TP_printk("VCPU %d: trap=%s pc=0x%lx msr=0x%lx, ceded=%d", + __entry->vcpu_id, + __print_symbolic(__entry->trap, kvm_trace_symbol_exit), + __entry->pc, __entry->msr, __entry->ceded + ) +); + +TRACE_EVENT(kvm_page_fault_enter, + TP_PROTO(struct kvm_vcpu *vcpu, unsigned long *hptep, + struct kvm_memory_slot *memslot, unsigned long ea, + unsigned long dsisr), + + TP_ARGS(vcpu, hptep, memslot, ea, dsisr), + + TP_STRUCT__entry( + __field(int, vcpu_id) + __field(unsigned long, hpte_v) + __field(unsigned long, hpte_r) + __field(unsigned long, gpte_r) + __field(unsigned long, ea) + __field(u64, base_gfn) + __field(u32, slot_flags) + __field(u32, dsisr) + ), + + TP_fast_assign( + __entry->vcpu_id = vcpu->vcpu_id; + __entry->hpte_v = hptep[0]; + __entry->hpte_r = hptep[1]; + __entry->gpte_r = hptep[2]; + __entry->ea = ea; + __entry->dsisr = dsisr; + __entry->base_gfn = memslot ? memslot->base_gfn : -1UL; + __entry->slot_flags = memslot ? memslot->flags : 0; + ), + + TP_printk("VCPU %d: hpte=0x%lx:0x%lx guest=0x%lx ea=0x%lx,%x slot=0x%llx,0x%x", + __entry->vcpu_id, + __entry->hpte_v, __entry->hpte_r, __entry->gpte_r, + __entry->ea, __entry->dsisr, + __entry->base_gfn, __entry->slot_flags) +); + +TRACE_EVENT(kvm_page_fault_exit, + TP_PROTO(struct kvm_vcpu *vcpu, unsigned long *hptep, long ret), + + TP_ARGS(vcpu, hptep, ret), + + TP_STRUCT__entry( + __field(int, vcpu_id) + __field(unsigned long, hpte_v) + __field(unsigned long, hpte_r) + __field(long, ret) + ), + + TP_fast_assign( + __entry->vcpu_id = vcpu->vcpu_id; + __entry->hpte_v = hptep[0]; + __entry->hpte_r = hptep[1]; + __entry->ret = ret; + ), + + TP_printk("VCPU %d: hpte=0x%lx:0x%lx ret=0x%lx", + __entry->vcpu_id, + __entry->hpte_v, __entry->hpte_r, __entry->ret) +); + +TRACE_EVENT(kvm_hcall_enter, + TP_PROTO(struct kvm_vcpu *vcpu), + + TP_ARGS(vcpu), + + TP_STRUCT__entry( + __field(int, vcpu_id) + __field(unsigned long, req) + __field(unsigned long, gpr4) + __field(unsigned long, gpr5) + __field(unsigned long, gpr6) + __field(unsigned long, gpr7) + ), + + TP_fast_assign( + __entry->vcpu_id = vcpu->vcpu_id; + __entry->req = kvmppc_get_gpr(vcpu, 3); + __entry->gpr4 = kvmppc_get_gpr(vcpu, 4); + __entry->gpr5 = kvmppc_get_gpr(vcpu, 5); + __entry->gpr6 = kvmppc_get_gpr(vcpu, 6); + __entry->gpr7 = kvmppc_get_gpr(vcpu, 7); + ), + + TP_printk("VCPU %d: hcall=%s GPR4-7=0x%lx,0x%lx,0x%lx,0x%lx", + __entry->vcpu_id, + __print_symbolic(__entry->req, kvm_trace_symbol_hcall), + __entry->gpr4, __entry->gpr5, __entry->gpr6, __entry->gpr7) +); + +TRACE_EVENT(kvm_hcall_exit, + TP_PROTO(struct kvm_vcpu *vcpu, int ret), + + TP_ARGS(vcpu, ret), + + TP_STRUCT__entry( + __field(int, vcpu_id) + __field(unsigned long, ret) + __field(unsigned long, hcall_rc) + ), + + TP_fast_assign( + __entry->vcpu_id = vcpu->vcpu_id; + __entry->ret = ret; + __entry->hcall_rc = kvmppc_get_gpr(vcpu, 3); + ), + + TP_printk("VCPU %d: ret=%s hcall_rc=%s", + __entry->vcpu_id, + __print_symbolic(__entry->ret, kvm_trace_symbol_kvmret), + __print_symbolic(__entry->ret & RESUME_FLAG_HOST ? + H_TOO_HARD : __entry->hcall_rc, + kvm_trace_symbol_hcall_rc)) +); + +TRACE_EVENT(kvmppc_run_core, + TP_PROTO(struct kvmppc_vcore *vc, int where), + + TP_ARGS(vc, where), + + TP_STRUCT__entry( + __field(int, n_runnable) + __field(int, runner_vcpu) + __field(int, where) + __field(pid_t, tgid) + ), + + TP_fast_assign( + __entry->runner_vcpu = vc->runner->vcpu_id; + __entry->n_runnable = vc->n_runnable; + __entry->where = where; + __entry->tgid = current->tgid; + ), + + TP_printk("%s runner_vcpu==%d runnable=%d tgid=%d", + __entry->where ? "Exit" : "Enter", + __entry->runner_vcpu, __entry->n_runnable, __entry->tgid) +); + +TRACE_EVENT(kvmppc_vcore_blocked, + TP_PROTO(struct kvmppc_vcore *vc, int where), + + TP_ARGS(vc, where), + + TP_STRUCT__entry( + __field(int, n_runnable) + __field(int, runner_vcpu) + __field(int, where) + __field(pid_t, tgid) + ), + + TP_fast_assign( + __entry->runner_vcpu = vc->runner->vcpu_id; + __entry->n_runnable = vc->n_runnable; + __entry->where = where; + __entry->tgid = current->tgid; + ), + + TP_printk("%s runner_vcpu=%d runnable=%d tgid=%d", + __entry->where ? "Exit" : "Enter", + __entry->runner_vcpu, __entry->n_runnable, __entry->tgid) +); + +TRACE_EVENT(kvmppc_run_vcpu_enter, + TP_PROTO(struct kvm_vcpu *vcpu), + + TP_ARGS(vcpu), + + TP_STRUCT__entry( + __field(int, vcpu_id) + __field(pid_t, tgid) + ), + + TP_fast_assign( + __entry->vcpu_id = vcpu->vcpu_id; + __entry->tgid = current->tgid; + ), + + TP_printk("VCPU %d: tgid=%d", __entry->vcpu_id, __entry->tgid) +); + +TRACE_EVENT(kvmppc_run_vcpu_exit, + TP_PROTO(struct kvm_vcpu *vcpu, struct kvm_run *run), + + TP_ARGS(vcpu, run), + + TP_STRUCT__entry( + __field(int, vcpu_id) + __field(int, exit) + __field(int, ret) + ), + + TP_fast_assign( + __entry->vcpu_id = vcpu->vcpu_id; + __entry->exit = run->exit_reason; + __entry->ret = vcpu->arch.ret; + ), + + TP_printk("VCPU %d: exit=%d, ret=%d", + __entry->vcpu_id, __entry->exit, __entry->ret) +); + +#endif /* _TRACE_KVM_HV_H */ + +/* This part must be outside protection */ +#include <trace/define_trace.h> diff --git a/arch/powerpc/kvm/trace_pr.h b/arch/powerpc/kvm/trace_pr.h index e1357cd8dc1..810507cb688 100644 --- a/arch/powerpc/kvm/trace_pr.h +++ b/arch/powerpc/kvm/trace_pr.h @@ -3,36 +3,13 @@ #define _TRACE_KVM_PR_H #include <linux/tracepoint.h> +#include "trace_book3s.h" #undef TRACE_SYSTEM #define TRACE_SYSTEM kvm_pr #define TRACE_INCLUDE_PATH . #define TRACE_INCLUDE_FILE trace_pr -#define kvm_trace_symbol_exit \ - {0x100, "SYSTEM_RESET"}, \ - {0x200, "MACHINE_CHECK"}, \ - {0x300, "DATA_STORAGE"}, \ - {0x380, "DATA_SEGMENT"}, \ - {0x400, "INST_STORAGE"}, \ - {0x480, "INST_SEGMENT"}, \ - {0x500, "EXTERNAL"}, \ - {0x501, "EXTERNAL_LEVEL"}, \ - {0x502, "EXTERNAL_HV"}, \ - {0x600, "ALIGNMENT"}, \ - {0x700, "PROGRAM"}, \ - {0x800, "FP_UNAVAIL"}, \ - {0x900, "DECREMENTER"}, \ - {0x980, "HV_DECREMENTER"}, \ - {0xc00, "SYSCALL"}, \ - {0xd00, "TRACE"}, \ - {0xe00, "H_DATA_STORAGE"}, \ - {0xe20, "H_INST_STORAGE"}, \ - {0xe40, "H_EMUL_ASSIST"}, \ - {0xf00, "PERFMON"}, \ - {0xf20, "ALTIVEC"}, \ - {0xf40, "VSX"} - TRACE_EVENT(kvm_book3s_reenter, TP_PROTO(int r, struct kvm_vcpu *vcpu), TP_ARGS(r, vcpu), diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 2175f911a73..9cba74d5d85 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -123,7 +123,7 @@ struct kvm_s390_sie_block { #define ICPT_PARTEXEC 0x38 #define ICPT_IOINST 0x40 __u8 icptcode; /* 0x0050 */ - __u8 reserved51; /* 0x0051 */ + __u8 icptstatus; /* 0x0051 */ __u16 ihcpu; /* 0x0052 */ __u8 reserved54[2]; /* 0x0054 */ __u16 ipa; /* 0x0056 */ @@ -226,10 +226,17 @@ struct kvm_vcpu_stat { u32 instruction_sigp_sense_running; u32 instruction_sigp_external_call; u32 instruction_sigp_emergency; + u32 instruction_sigp_cond_emergency; + u32 instruction_sigp_start; u32 instruction_sigp_stop; + u32 instruction_sigp_stop_store_status; + u32 instruction_sigp_store_status; u32 instruction_sigp_arch; u32 instruction_sigp_prefix; u32 instruction_sigp_restart; + u32 instruction_sigp_init_cpu_reset; + u32 instruction_sigp_cpu_reset; + u32 instruction_sigp_unknown; u32 diagnose_10; u32 diagnose_44; u32 diagnose_9c; @@ -288,6 +295,79 @@ struct kvm_vcpu_stat { #define PGM_PER 0x80 #define PGM_CRYPTO_OPERATION 0x119 +/* irq types in order of priority */ +enum irq_types { + IRQ_PEND_MCHK_EX = 0, + IRQ_PEND_SVC, + IRQ_PEND_PROG, + IRQ_PEND_MCHK_REP, + IRQ_PEND_EXT_IRQ_KEY, + IRQ_PEND_EXT_MALFUNC, + IRQ_PEND_EXT_EMERGENCY, + IRQ_PEND_EXT_EXTERNAL, + IRQ_PEND_EXT_CLOCK_COMP, + IRQ_PEND_EXT_CPU_TIMER, + IRQ_PEND_EXT_TIMING, + IRQ_PEND_EXT_SERVICE, + IRQ_PEND_EXT_HOST, + IRQ_PEND_PFAULT_INIT, + IRQ_PEND_PFAULT_DONE, + IRQ_PEND_VIRTIO, + IRQ_PEND_IO_ISC_0, + IRQ_PEND_IO_ISC_1, + IRQ_PEND_IO_ISC_2, + IRQ_PEND_IO_ISC_3, + IRQ_PEND_IO_ISC_4, + IRQ_PEND_IO_ISC_5, + IRQ_PEND_IO_ISC_6, + IRQ_PEND_IO_ISC_7, + IRQ_PEND_SIGP_STOP, + IRQ_PEND_RESTART, + IRQ_PEND_SET_PREFIX, + IRQ_PEND_COUNT +}; + +/* + * Repressible (non-floating) machine check interrupts + * subclass bits in MCIC + */ +#define MCHK_EXTD_BIT 58 +#define MCHK_DEGR_BIT 56 +#define MCHK_WARN_BIT 55 +#define MCHK_REP_MASK ((1UL << MCHK_DEGR_BIT) | \ + (1UL << MCHK_EXTD_BIT) | \ + (1UL << MCHK_WARN_BIT)) + +/* Exigent machine check interrupts subclass bits in MCIC */ +#define MCHK_SD_BIT 63 +#define MCHK_PD_BIT 62 +#define MCHK_EX_MASK ((1UL << MCHK_SD_BIT) | (1UL << MCHK_PD_BIT)) + +#define IRQ_PEND_EXT_MASK ((1UL << IRQ_PEND_EXT_IRQ_KEY) | \ + (1UL << IRQ_PEND_EXT_CLOCK_COMP) | \ + (1UL << IRQ_PEND_EXT_CPU_TIMER) | \ + (1UL << IRQ_PEND_EXT_MALFUNC) | \ + (1UL << IRQ_PEND_EXT_EMERGENCY) | \ + (1UL << IRQ_PEND_EXT_EXTERNAL) | \ + (1UL << IRQ_PEND_EXT_TIMING) | \ + (1UL << IRQ_PEND_EXT_HOST) | \ + (1UL << IRQ_PEND_EXT_SERVICE) | \ + (1UL << IRQ_PEND_VIRTIO) | \ + (1UL << IRQ_PEND_PFAULT_INIT) | \ + (1UL << IRQ_PEND_PFAULT_DONE)) + +#define IRQ_PEND_IO_MASK ((1UL << IRQ_PEND_IO_ISC_0) | \ + (1UL << IRQ_PEND_IO_ISC_1) | \ + (1UL << IRQ_PEND_IO_ISC_2) | \ + (1UL << IRQ_PEND_IO_ISC_3) | \ + (1UL << IRQ_PEND_IO_ISC_4) | \ + (1UL << IRQ_PEND_IO_ISC_5) | \ + (1UL << IRQ_PEND_IO_ISC_6) | \ + (1UL << IRQ_PEND_IO_ISC_7)) + +#define IRQ_PEND_MCHK_MASK ((1UL << IRQ_PEND_MCHK_REP) | \ + (1UL << IRQ_PEND_MCHK_EX)) + struct kvm_s390_interrupt_info { struct list_head list; u64 type; @@ -306,14 +386,25 @@ struct kvm_s390_interrupt_info { #define ACTION_STORE_ON_STOP (1<<0) #define ACTION_STOP_ON_STOP (1<<1) +struct kvm_s390_irq_payload { + struct kvm_s390_io_info io; + struct kvm_s390_ext_info ext; + struct kvm_s390_pgm_info pgm; + struct kvm_s390_emerg_info emerg; + struct kvm_s390_extcall_info extcall; + struct kvm_s390_prefix_info prefix; + struct kvm_s390_mchk_info mchk; +}; + struct kvm_s390_local_interrupt { spinlock_t lock; - struct list_head list; - atomic_t active; struct kvm_s390_float_interrupt *float_int; wait_queue_head_t *wq; atomic_t *cpuflags; unsigned int action_bits; + DECLARE_BITMAP(sigp_emerg_pending, KVM_MAX_VCPUS); + struct kvm_s390_irq_payload irq; + unsigned long pending_irqs; }; struct kvm_s390_float_interrupt { @@ -434,6 +525,8 @@ struct kvm_arch{ int user_cpu_state_ctrl; struct s390_io_adapter *adapters[MAX_S390_IO_ADAPTERS]; wait_queue_head_t ipte_wq; + int ipte_lock_count; + struct mutex ipte_mutex; spinlock_t start_stop_lock; struct kvm_s390_crypto crypto; }; diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h index e510b9460ef..3009c2ba46d 100644 --- a/arch/s390/include/asm/pgalloc.h +++ b/arch/s390/include/asm/pgalloc.h @@ -24,6 +24,7 @@ void page_table_free_rcu(struct mmu_gather *, unsigned long *, unsigned long); int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, unsigned long key, bool nq); +unsigned long get_guest_storage_key(struct mm_struct *mm, unsigned long addr); static inline void clear_table(unsigned long *s, unsigned long val, size_t n) { diff --git a/arch/s390/include/asm/sigp.h b/arch/s390/include/asm/sigp.h index 49576115dbb..fad4ae23ece 100644 --- a/arch/s390/include/asm/sigp.h +++ b/arch/s390/include/asm/sigp.h @@ -10,6 +10,7 @@ #define SIGP_RESTART 6 #define SIGP_STOP_AND_STORE_STATUS 9 #define SIGP_INITIAL_CPU_RESET 11 +#define SIGP_CPU_RESET 12 #define SIGP_SET_PREFIX 13 #define SIGP_STORE_STATUS_AT_ADDRESS 14 #define SIGP_SET_ARCHITECTURE 18 diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c index ca38139423a..437e6115927 100644 --- a/arch/s390/kernel/compat_linux.c +++ b/arch/s390/kernel/compat_linux.c @@ -249,7 +249,7 @@ COMPAT_SYSCALL_DEFINE2(s390_setgroups16, int, gidsetsize, u16 __user *, grouplis struct group_info *group_info; int retval; - if (!capable(CAP_SETGID)) + if (!may_setgroups()) return -EPERM; if ((unsigned)gidsetsize > NGROUPS_MAX) return -EINVAL; diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index 0f961a1c64b..8b9ccf02a2c 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c @@ -207,8 +207,6 @@ union raddress { unsigned long pfra : 52; /* Page-Frame Real Address */ }; -static int ipte_lock_count; -static DEFINE_MUTEX(ipte_mutex); int ipte_lock_held(struct kvm_vcpu *vcpu) { @@ -216,47 +214,51 @@ int ipte_lock_held(struct kvm_vcpu *vcpu) if (vcpu->arch.sie_block->eca & 1) return ic->kh != 0; - return ipte_lock_count != 0; + return vcpu->kvm->arch.ipte_lock_count != 0; } static void ipte_lock_simple(struct kvm_vcpu *vcpu) { union ipte_control old, new, *ic; - mutex_lock(&ipte_mutex); - ipte_lock_count++; - if (ipte_lock_count > 1) + mutex_lock(&vcpu->kvm->arch.ipte_mutex); + vcpu->kvm->arch.ipte_lock_count++; + if (vcpu->kvm->arch.ipte_lock_count > 1) goto out; ic = &vcpu->kvm->arch.sca->ipte_control; do { - old = ACCESS_ONCE(*ic); + old = *ic; + barrier(); while (old.k) { cond_resched(); - old = ACCESS_ONCE(*ic); + old = *ic; + barrier(); } new = old; new.k = 1; } while (cmpxchg(&ic->val, old.val, new.val) != old.val); out: - mutex_unlock(&ipte_mutex); + mutex_unlock(&vcpu->kvm->arch.ipte_mutex); } static void ipte_unlock_simple(struct kvm_vcpu *vcpu) { union ipte_control old, new, *ic; - mutex_lock(&ipte_mutex); - ipte_lock_count--; - if (ipte_lock_count) + mutex_lock(&vcpu->kvm->arch.ipte_mutex); + vcpu->kvm->arch.ipte_lock_count--; + if (vcpu->kvm->arch.ipte_lock_count) goto out; ic = &vcpu->kvm->arch.sca->ipte_control; do { - new = old = ACCESS_ONCE(*ic); + old = *ic; + barrier(); + new = old; new.k = 0; } while (cmpxchg(&ic->val, old.val, new.val) != old.val); wake_up(&vcpu->kvm->arch.ipte_wq); out: - mutex_unlock(&ipte_mutex); + mutex_unlock(&vcpu->kvm->arch.ipte_mutex); } static void ipte_lock_siif(struct kvm_vcpu *vcpu) @@ -265,10 +267,12 @@ static void ipte_lock_siif(struct kvm_vcpu *vcpu) ic = &vcpu->kvm->arch.sca->ipte_control; do { - old = ACCESS_ONCE(*ic); + old = *ic; + barrier(); while (old.kg) { cond_resched(); - old = ACCESS_ONCE(*ic); + old = *ic; + barrier(); } new = old; new.k = 1; @@ -282,7 +286,9 @@ static void ipte_unlock_siif(struct kvm_vcpu *vcpu) ic = &vcpu->kvm->arch.sca->ipte_control; do { - new = old = ACCESS_ONCE(*ic); + old = *ic; + barrier(); + new = old; new.kh--; if (!new.kh) new.k = 0; diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index eaf46291d36..81c77ab8102 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c @@ -38,6 +38,19 @@ static const intercept_handler_t instruction_handlers[256] = { [0xeb] = kvm_s390_handle_eb, }; +void kvm_s390_rewind_psw(struct kvm_vcpu *vcpu, int ilc) +{ + struct kvm_s390_sie_block *sie_block = vcpu->arch.sie_block; + + /* Use the length of the EXECUTE instruction if necessary */ + if (sie_block->icptstatus & 1) { + ilc = (sie_block->icptstatus >> 4) & 0x6; + if (!ilc) + ilc = 4; + } + sie_block->gpsw.addr = __rewind_psw(sie_block->gpsw, ilc); +} + static int handle_noop(struct kvm_vcpu *vcpu) { switch (vcpu->arch.sie_block->icptcode) { @@ -244,7 +257,7 @@ static int handle_instruction_and_prog(struct kvm_vcpu *vcpu) static int handle_external_interrupt(struct kvm_vcpu *vcpu) { u16 eic = vcpu->arch.sie_block->eic; - struct kvm_s390_interrupt irq; + struct kvm_s390_irq irq; psw_t newpsw; int rc; @@ -269,7 +282,7 @@ static int handle_external_interrupt(struct kvm_vcpu *vcpu) if (kvm_s390_si_ext_call_pending(vcpu)) return 0; irq.type = KVM_S390_INT_EXTERNAL_CALL; - irq.parm = vcpu->arch.sie_block->extcpuaddr; + irq.u.extcall.code = vcpu->arch.sie_block->extcpuaddr; break; default: return -EOPNOTSUPP; @@ -288,7 +301,6 @@ static int handle_external_interrupt(struct kvm_vcpu *vcpu) */ static int handle_mvpg_pei(struct kvm_vcpu *vcpu) { - psw_t *psw = &vcpu->arch.sie_block->gpsw; unsigned long srcaddr, dstaddr; int reg1, reg2, rc; @@ -310,7 +322,7 @@ static int handle_mvpg_pei(struct kvm_vcpu *vcpu) if (rc != 0) return rc; - psw->addr = __rewind_psw(*psw, 4); + kvm_s390_rewind_psw(vcpu, 4); return 0; } diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index a39838457f0..f00f31e66cd 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -16,6 +16,7 @@ #include <linux/mmu_context.h> #include <linux/signal.h> #include <linux/slab.h> +#include <linux/bitmap.h> #include <asm/asm-offsets.h> #include <asm/uaccess.h> #include "kvm-s390.h" @@ -27,8 +28,8 @@ #define IOINT_CSSID_MASK 0x03fc0000 #define IOINT_AI_MASK 0x04000000 #define PFAULT_INIT 0x0600 - -static int __must_check deliver_ckc_interrupt(struct kvm_vcpu *vcpu); +#define PFAULT_DONE 0x0680 +#define VIRTIO_PARAM 0x0d00 static int is_ioint(u64 type) { @@ -136,6 +137,31 @@ static int __must_check __interrupt_is_deliverable(struct kvm_vcpu *vcpu, return 0; } +static inline unsigned long pending_local_irqs(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.local_int.pending_irqs; +} + +static unsigned long deliverable_local_irqs(struct kvm_vcpu *vcpu) +{ + unsigned long active_mask = pending_local_irqs(vcpu); + + if (psw_extint_disabled(vcpu)) + active_mask &= ~IRQ_PEND_EXT_MASK; + if (!(vcpu->arch.sie_block->gcr[0] & 0x2000ul)) + __clear_bit(IRQ_PEND_EXT_EXTERNAL, &active_mask); + if (!(vcpu->arch.sie_block->gcr[0] & 0x4000ul)) + __clear_bit(IRQ_PEND_EXT_EMERGENCY, &active_mask); + if (!(vcpu->arch.sie_block->gcr[0] & 0x800ul)) + __clear_bit(IRQ_PEND_EXT_CLOCK_COMP, &active_mask); + if (!(vcpu->arch.sie_block->gcr[0] & 0x400ul)) + __clear_bit(IRQ_PEND_EXT_CPU_TIMER, &active_mask); + if (psw_mchk_disabled(vcpu)) + active_mask &= ~IRQ_PEND_MCHK_MASK; + + return active_mask; +} + static void __set_cpu_idle(struct kvm_vcpu *vcpu) { atomic_set_mask(CPUSTAT_WAIT, &vcpu->arch.sie_block->cpuflags); @@ -170,26 +196,45 @@ static void __set_cpuflag(struct kvm_vcpu *vcpu, u32 flag) atomic_set_mask(flag, &vcpu->arch.sie_block->cpuflags); } +static void set_intercept_indicators_ext(struct kvm_vcpu *vcpu) +{ + if (!(pending_local_irqs(vcpu) & IRQ_PEND_EXT_MASK)) + return; + if (psw_extint_disabled(vcpu)) + __set_cpuflag(vcpu, CPUSTAT_EXT_INT); + else + vcpu->arch.sie_block->lctl |= LCTL_CR0; +} + +static void set_intercept_indicators_mchk(struct kvm_vcpu *vcpu) +{ + if (!(pending_local_irqs(vcpu) & IRQ_PEND_MCHK_MASK)) + return; + if (psw_mchk_disabled(vcpu)) + vcpu->arch.sie_block->ictl |= ICTL_LPSW; + else + vcpu->arch.sie_block->lctl |= LCTL_CR14; +} + +/* Set interception request for non-deliverable local interrupts */ +static void set_intercept_indicators_local(struct kvm_vcpu *vcpu) +{ + set_intercept_indicators_ext(vcpu); + set_intercept_indicators_mchk(vcpu); +} + static void __set_intercept_indicator(struct kvm_vcpu *vcpu, struct kvm_s390_interrupt_info *inti) { switch (inti->type) { - case KVM_S390_INT_EXTERNAL_CALL: - case KVM_S390_INT_EMERGENCY: case KVM_S390_INT_SERVICE: - case KVM_S390_INT_PFAULT_INIT: case KVM_S390_INT_PFAULT_DONE: case KVM_S390_INT_VIRTIO: - case KVM_S390_INT_CLOCK_COMP: - case KVM_S390_INT_CPU_TIMER: if (psw_extint_disabled(vcpu)) __set_cpuflag(vcpu, CPUSTAT_EXT_INT); else vcpu->arch.sie_block->lctl |= LCTL_CR0; break; - case KVM_S390_SIGP_STOP: - __set_cpuflag(vcpu, CPUSTAT_STOP_INT); - break; case KVM_S390_MCHK: if (psw_mchk_disabled(vcpu)) vcpu->arch.sie_block->ictl |= ICTL_LPSW; @@ -226,13 +271,236 @@ static u16 get_ilc(struct kvm_vcpu *vcpu) } } -static int __must_check __deliver_prog_irq(struct kvm_vcpu *vcpu, - struct kvm_s390_pgm_info *pgm_info) +static int __must_check __deliver_cpu_timer(struct kvm_vcpu *vcpu) +{ + struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; + int rc; + + trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_INT_CPU_TIMER, + 0, 0); + + rc = put_guest_lc(vcpu, EXT_IRQ_CPU_TIMER, + (u16 *)__LC_EXT_INT_CODE); + rc |= put_guest_lc(vcpu, 0, (u16 *)__LC_EXT_CPU_ADDR); + rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + clear_bit(IRQ_PEND_EXT_CPU_TIMER, &li->pending_irqs); + return rc ? -EFAULT : 0; +} + +static int __must_check __deliver_ckc(struct kvm_vcpu *vcpu) +{ + struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; + int rc; + + trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_INT_CLOCK_COMP, + 0, 0); + + rc = put_guest_lc(vcpu, EXT_IRQ_CLK_COMP, + (u16 __user *)__LC_EXT_INT_CODE); + rc |= put_guest_lc(vcpu, 0, (u16 *)__LC_EXT_CPU_ADDR); + rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + clear_bit(IRQ_PEND_EXT_CLOCK_COMP, &li->pending_irqs); + return rc ? -EFAULT : 0; +} + +static int __must_check __deliver_pfault_init(struct kvm_vcpu *vcpu) +{ + struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; + struct kvm_s390_ext_info ext; + int rc; + + spin_lock(&li->lock); + ext = li->irq.ext; + clear_bit(IRQ_PEND_PFAULT_INIT, &li->pending_irqs); + li->irq.ext.ext_params2 = 0; + spin_unlock(&li->lock); + + VCPU_EVENT(vcpu, 4, "interrupt: pfault init parm:%x,parm64:%llx", + 0, ext.ext_params2); + trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, + KVM_S390_INT_PFAULT_INIT, + 0, ext.ext_params2); + + rc = put_guest_lc(vcpu, EXT_IRQ_CP_SERVICE, (u16 *) __LC_EXT_INT_CODE); + rc |= put_guest_lc(vcpu, PFAULT_INIT, (u16 *) __LC_EXT_CPU_ADDR); + rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + rc |= put_guest_lc(vcpu, ext.ext_params2, (u64 *) __LC_EXT_PARAMS2); + return rc ? -EFAULT : 0; +} + +static int __must_check __deliver_machine_check(struct kvm_vcpu *vcpu) +{ + struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; + struct kvm_s390_mchk_info mchk; + int rc; + + spin_lock(&li->lock); + mchk = li->irq.mchk; + /* + * If there was an exigent machine check pending, then any repressible + * machine checks that might have been pending are indicated along + * with it, so always clear both bits + */ + clear_bit(IRQ_PEND_MCHK_EX, &li->pending_irqs); + clear_bit(IRQ_PEND_MCHK_REP, &li->pending_irqs); + memset(&li->irq.mchk, 0, sizeof(mchk)); + spin_unlock(&li->lock); + + VCPU_EVENT(vcpu, 4, "interrupt: machine check mcic=%llx", + mchk.mcic); + trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_MCHK, + mchk.cr14, mchk.mcic); + + rc = kvm_s390_vcpu_store_status(vcpu, KVM_S390_STORE_STATUS_PREFIXED); + rc |= put_guest_lc(vcpu, mchk.mcic, + (u64 __user *) __LC_MCCK_CODE); + rc |= put_guest_lc(vcpu, mchk.failing_storage_address, + (u64 __user *) __LC_MCCK_FAIL_STOR_ADDR); + rc |= write_guest_lc(vcpu, __LC_PSW_SAVE_AREA, + &mchk.fixed_logout, sizeof(mchk.fixed_logout)); + rc |= write_guest_lc(vcpu, __LC_MCK_OLD_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + rc |= read_guest_lc(vcpu, __LC_MCK_NEW_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + return rc ? -EFAULT : 0; +} + +static int __must_check __deliver_restart(struct kvm_vcpu *vcpu) +{ + struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; + int rc; + + VCPU_EVENT(vcpu, 4, "%s", "interrupt: cpu restart"); + vcpu->stat.deliver_restart_signal++; + trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_RESTART, 0, 0); + + rc = write_guest_lc(vcpu, + offsetof(struct _lowcore, restart_old_psw), + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + rc |= read_guest_lc(vcpu, offsetof(struct _lowcore, restart_psw), + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + clear_bit(IRQ_PEND_RESTART, &li->pending_irqs); + return rc ? -EFAULT : 0; +} + +static int __must_check __deliver_stop(struct kvm_vcpu *vcpu) +{ + VCPU_EVENT(vcpu, 4, "%s", "interrupt: cpu stop"); + vcpu->stat.deliver_stop_signal++; + trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_SIGP_STOP, + 0, 0); + + __set_cpuflag(vcpu, CPUSTAT_STOP_INT); + clear_bit(IRQ_PEND_SIGP_STOP, &vcpu->arch.local_int.pending_irqs); + return 0; +} + +static int __must_check __deliver_set_prefix(struct kvm_vcpu *vcpu) +{ + struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; + struct kvm_s390_prefix_info prefix; + + spin_lock(&li->lock); + prefix = li->irq.prefix; + li->irq.prefix.address = 0; + clear_bit(IRQ_PEND_SET_PREFIX, &li->pending_irqs); + spin_unlock(&li->lock); + + VCPU_EVENT(vcpu, 4, "interrupt: set prefix to %x", prefix.address); + vcpu->stat.deliver_prefix_signal++; + trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, + KVM_S390_SIGP_SET_PREFIX, + prefix.address, 0); + + kvm_s390_set_prefix(vcpu, prefix.address); + return 0; +} + +static int __must_check __deliver_emergency_signal(struct kvm_vcpu *vcpu) +{ + struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; + int rc; + int cpu_addr; + + spin_lock(&li->lock); + cpu_addr = find_first_bit(li->sigp_emerg_pending, KVM_MAX_VCPUS); + clear_bit(cpu_addr, li->sigp_emerg_pending); + if (bitmap_empty(li->sigp_emerg_pending, KVM_MAX_VCPUS)) + clear_bit(IRQ_PEND_EXT_EMERGENCY, &li->pending_irqs); + spin_unlock(&li->lock); + + VCPU_EVENT(vcpu, 4, "%s", "interrupt: sigp emerg"); + vcpu->stat.deliver_emergency_signal++; + trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_INT_EMERGENCY, + cpu_addr, 0); + + rc = put_guest_lc(vcpu, EXT_IRQ_EMERGENCY_SIG, + (u16 *)__LC_EXT_INT_CODE); + rc |= put_guest_lc(vcpu, cpu_addr, (u16 *)__LC_EXT_CPU_ADDR); + rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + return rc ? -EFAULT : 0; +} + +static int __must_check __deliver_external_call(struct kvm_vcpu *vcpu) +{ + struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; + struct kvm_s390_extcall_info extcall; + int rc; + + spin_lock(&li->lock); + extcall = li->irq.extcall; + li->irq.extcall.code = 0; + clear_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs); + spin_unlock(&li->lock); + + VCPU_EVENT(vcpu, 4, "%s", "interrupt: sigp ext call"); + vcpu->stat.deliver_external_call++; + trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, + KVM_S390_INT_EXTERNAL_CALL, + extcall.code, 0); + + rc = put_guest_lc(vcpu, EXT_IRQ_EXTERNAL_CALL, + (u16 *)__LC_EXT_INT_CODE); + rc |= put_guest_lc(vcpu, extcall.code, (u16 *)__LC_EXT_CPU_ADDR); + rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW, &vcpu->arch.sie_block->gpsw, + sizeof(psw_t)); + return rc ? -EFAULT : 0; +} + +static int __must_check __deliver_prog(struct kvm_vcpu *vcpu) { + struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; + struct kvm_s390_pgm_info pgm_info; int rc = 0; u16 ilc = get_ilc(vcpu); - switch (pgm_info->code & ~PGM_PER) { + spin_lock(&li->lock); + pgm_info = li->irq.pgm; + clear_bit(IRQ_PEND_PROG, &li->pending_irqs); + memset(&li->irq.pgm, 0, sizeof(pgm_info)); + spin_unlock(&li->lock); + + VCPU_EVENT(vcpu, 4, "interrupt: pgm check code:%x, ilc:%x", + pgm_info.code, ilc); + vcpu->stat.deliver_program_int++; + trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_PROGRAM_INT, + pgm_info.code, 0); + + switch (pgm_info.code & ~PGM_PER) { case PGM_AFX_TRANSLATION: case PGM_ASX_TRANSLATION: case PGM_EX_TRANSLATION: @@ -243,7 +511,7 @@ static int __must_check __deliver_prog_irq(struct kvm_vcpu *vcpu, case PGM_PRIMARY_AUTHORITY: case PGM_SECONDARY_AUTHORITY: case PGM_SPACE_SWITCH: - rc = put_guest_lc(vcpu, pgm_info->trans_exc_code, + rc = put_guest_lc(vcpu, pgm_info.trans_exc_code, (u64 *)__LC_TRANS_EXC_CODE); break; case PGM_ALEN_TRANSLATION: @@ -252,7 +520,7 @@ static int __must_check __deliver_prog_irq(struct kvm_vcpu *vcpu, case PGM_ASTE_SEQUENCE: case PGM_ASTE_VALIDITY: case PGM_EXTENDED_AUTHORITY: - rc = put_guest_lc(vcpu, pgm_info->exc_access_id, + rc = put_guest_lc(vcpu, pgm_info.exc_access_id, (u8 *)__LC_EXC_ACCESS_ID); break; case PGM_ASCE_TYPE: @@ -261,247 +529,208 @@ static int __must_check __deliver_prog_irq(struct kvm_vcpu *vcpu, case PGM_REGION_SECOND_TRANS: case PGM_REGION_THIRD_TRANS: case PGM_SEGMENT_TRANSLATION: - rc = put_guest_lc(vcpu, pgm_info->trans_exc_code, + rc = put_guest_lc(vcpu, pgm_info.trans_exc_code, (u64 *)__LC_TRANS_EXC_CODE); - rc |= put_guest_lc(vcpu, pgm_info->exc_access_id, + rc |= put_guest_lc(vcpu, pgm_info.exc_access_id, (u8 *)__LC_EXC_ACCESS_ID); - rc |= put_guest_lc(vcpu, pgm_info->op_access_id, + rc |= put_guest_lc(vcpu, pgm_info.op_access_id, (u8 *)__LC_OP_ACCESS_ID); break; case PGM_MONITOR: - rc = put_guest_lc(vcpu, pgm_info->mon_class_nr, - (u64 *)__LC_MON_CLASS_NR); - rc |= put_guest_lc(vcpu, pgm_info->mon_code, + rc = put_guest_lc(vcpu, pgm_info.mon_class_nr, + (u16 *)__LC_MON_CLASS_NR); + rc |= put_guest_lc(vcpu, pgm_info.mon_code, (u64 *)__LC_MON_CODE); break; case PGM_DATA: - rc = put_guest_lc(vcpu, pgm_info->data_exc_code, + rc = put_guest_lc(vcpu, pgm_info.data_exc_code, (u32 *)__LC_DATA_EXC_CODE); break; case PGM_PROTECTION: - rc = put_guest_lc(vcpu, pgm_info->trans_exc_code, + rc = put_guest_lc(vcpu, pgm_info.trans_exc_code, (u64 *)__LC_TRANS_EXC_CODE); - rc |= put_guest_lc(vcpu, pgm_info->exc_access_id, + rc |= put_guest_lc(vcpu, pgm_info.exc_access_id, (u8 *)__LC_EXC_ACCESS_ID); break; } - if (pgm_info->code & PGM_PER) { - rc |= put_guest_lc(vcpu, pgm_info->per_code, + if (pgm_info.code & PGM_PER) { + rc |= put_guest_lc(vcpu, pgm_info.per_code, (u8 *) __LC_PER_CODE); - rc |= put_guest_lc(vcpu, pgm_info->per_atmid, + rc |= put_guest_lc(vcpu, pgm_info.per_atmid, (u8 *)__LC_PER_ATMID); - rc |= put_guest_lc(vcpu, pgm_info->per_address, + rc |= put_guest_lc(vcpu, pgm_info.per_address, (u64 *) __LC_PER_ADDRESS); - rc |= put_guest_lc(vcpu, pgm_info->per_access_id, + rc |= put_guest_lc(vcpu, pgm_info.per_access_id, (u8 *) __LC_PER_ACCESS_ID); } rc |= put_guest_lc(vcpu, ilc, (u16 *) __LC_PGM_ILC); - rc |= put_guest_lc(vcpu, pgm_info->code, + rc |= put_guest_lc(vcpu, pgm_info.code, (u16 *)__LC_PGM_INT_CODE); rc |= write_guest_lc(vcpu, __LC_PGM_OLD_PSW, &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); rc |= read_guest_lc(vcpu, __LC_PGM_NEW_PSW, &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + return rc ? -EFAULT : 0; +} - return rc; +static int __must_check __deliver_service(struct kvm_vcpu *vcpu, + struct kvm_s390_interrupt_info *inti) +{ + int rc; + + VCPU_EVENT(vcpu, 4, "interrupt: sclp parm:%x", + inti->ext.ext_params); + vcpu->stat.deliver_service_signal++; + trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, + inti->ext.ext_params, 0); + + rc = put_guest_lc(vcpu, EXT_IRQ_SERVICE_SIG, (u16 *)__LC_EXT_INT_CODE); + rc |= put_guest_lc(vcpu, 0, (u16 *)__LC_EXT_CPU_ADDR); + rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + rc |= put_guest_lc(vcpu, inti->ext.ext_params, + (u32 *)__LC_EXT_PARAMS); + return rc ? -EFAULT : 0; } -static int __must_check __do_deliver_interrupt(struct kvm_vcpu *vcpu, - struct kvm_s390_interrupt_info *inti) +static int __must_check __deliver_pfault_done(struct kvm_vcpu *vcpu, + struct kvm_s390_interrupt_info *inti) { - const unsigned short table[] = { 2, 4, 4, 6 }; - int rc = 0; + int rc; + + trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, + KVM_S390_INT_PFAULT_DONE, 0, + inti->ext.ext_params2); + + rc = put_guest_lc(vcpu, EXT_IRQ_CP_SERVICE, (u16 *)__LC_EXT_INT_CODE); + rc |= put_guest_lc(vcpu, PFAULT_DONE, (u16 *)__LC_EXT_CPU_ADDR); + rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + rc |= put_guest_lc(vcpu, inti->ext.ext_params2, + (u64 *)__LC_EXT_PARAMS2); + return rc ? -EFAULT : 0; +} + +static int __must_check __deliver_virtio(struct kvm_vcpu *vcpu, + struct kvm_s390_interrupt_info *inti) +{ + int rc; + + VCPU_EVENT(vcpu, 4, "interrupt: virtio parm:%x,parm64:%llx", + inti->ext.ext_params, inti->ext.ext_params2); + vcpu->stat.deliver_virtio_interrupt++; + trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, + inti->ext.ext_params, + inti->ext.ext_params2); + + rc = put_guest_lc(vcpu, EXT_IRQ_CP_SERVICE, (u16 *)__LC_EXT_INT_CODE); + rc |= put_guest_lc(vcpu, VIRTIO_PARAM, (u16 *)__LC_EXT_CPU_ADDR); + rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + rc |= put_guest_lc(vcpu, inti->ext.ext_params, + (u32 *)__LC_EXT_PARAMS); + rc |= put_guest_lc(vcpu, inti->ext.ext_params2, + (u64 *)__LC_EXT_PARAMS2); + return rc ? -EFAULT : 0; +} + +static int __must_check __deliver_io(struct kvm_vcpu *vcpu, + struct kvm_s390_interrupt_info *inti) +{ + int rc; + + VCPU_EVENT(vcpu, 4, "interrupt: I/O %llx", inti->type); + vcpu->stat.deliver_io_int++; + trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, + ((__u32)inti->io.subchannel_id << 16) | + inti->io.subchannel_nr, + ((__u64)inti->io.io_int_parm << 32) | + inti->io.io_int_word); + + rc = put_guest_lc(vcpu, inti->io.subchannel_id, + (u16 *)__LC_SUBCHANNEL_ID); + rc |= put_guest_lc(vcpu, inti->io.subchannel_nr, + (u16 *)__LC_SUBCHANNEL_NR); + rc |= put_guest_lc(vcpu, inti->io.io_int_parm, + (u32 *)__LC_IO_INT_PARM); + rc |= put_guest_lc(vcpu, inti->io.io_int_word, + (u32 *)__LC_IO_INT_WORD); + rc |= write_guest_lc(vcpu, __LC_IO_OLD_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + rc |= read_guest_lc(vcpu, __LC_IO_NEW_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + return rc ? -EFAULT : 0; +} + +static int __must_check __deliver_mchk_floating(struct kvm_vcpu *vcpu, + struct kvm_s390_interrupt_info *inti) +{ + struct kvm_s390_mchk_info *mchk = &inti->mchk; + int rc; + + VCPU_EVENT(vcpu, 4, "interrupt: machine check mcic=%llx", + mchk->mcic); + trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_MCHK, + mchk->cr14, mchk->mcic); + + rc = kvm_s390_vcpu_store_status(vcpu, KVM_S390_STORE_STATUS_PREFIXED); + rc |= put_guest_lc(vcpu, mchk->mcic, + (u64 __user *) __LC_MCCK_CODE); + rc |= put_guest_lc(vcpu, mchk->failing_storage_address, + (u64 __user *) __LC_MCCK_FAIL_STOR_ADDR); + rc |= write_guest_lc(vcpu, __LC_PSW_SAVE_AREA, + &mchk->fixed_logout, sizeof(mchk->fixed_logout)); + rc |= write_guest_lc(vcpu, __LC_MCK_OLD_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + rc |= read_guest_lc(vcpu, __LC_MCK_NEW_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + return rc ? -EFAULT : 0; +} + +typedef int (*deliver_irq_t)(struct kvm_vcpu *vcpu); + +static const deliver_irq_t deliver_irq_funcs[] = { + [IRQ_PEND_MCHK_EX] = __deliver_machine_check, + [IRQ_PEND_PROG] = __deliver_prog, + [IRQ_PEND_EXT_EMERGENCY] = __deliver_emergency_signal, + [IRQ_PEND_EXT_EXTERNAL] = __deliver_external_call, + [IRQ_PEND_EXT_CLOCK_COMP] = __deliver_ckc, + [IRQ_PEND_EXT_CPU_TIMER] = __deliver_cpu_timer, + [IRQ_PEND_RESTART] = __deliver_restart, + [IRQ_PEND_SIGP_STOP] = __deliver_stop, + [IRQ_PEND_SET_PREFIX] = __deliver_set_prefix, + [IRQ_PEND_PFAULT_INIT] = __deliver_pfault_init, +}; + +static int __must_check __deliver_floating_interrupt(struct kvm_vcpu *vcpu, + struct kvm_s390_interrupt_info *inti) +{ + int rc; switch (inti->type) { - case KVM_S390_INT_EMERGENCY: - VCPU_EVENT(vcpu, 4, "%s", "interrupt: sigp emerg"); - vcpu->stat.deliver_emergency_signal++; - trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, - inti->emerg.code, 0); - rc = put_guest_lc(vcpu, 0x1201, (u16 *)__LC_EXT_INT_CODE); - rc |= put_guest_lc(vcpu, inti->emerg.code, - (u16 *)__LC_EXT_CPU_ADDR); - rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW, - &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); - rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW, - &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); - break; - case KVM_S390_INT_EXTERNAL_CALL: - VCPU_EVENT(vcpu, 4, "%s", "interrupt: sigp ext call"); - vcpu->stat.deliver_external_call++; - trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, - inti->extcall.code, 0); - rc = put_guest_lc(vcpu, 0x1202, (u16 *)__LC_EXT_INT_CODE); - rc |= put_guest_lc(vcpu, inti->extcall.code, - (u16 *)__LC_EXT_CPU_ADDR); - rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW, - &vcpu->arch.sie_block->gpsw, - sizeof(psw_t)); - rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW, - &vcpu->arch.sie_block->gpsw, - sizeof(psw_t)); - break; - case KVM_S390_INT_CLOCK_COMP: - trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, - inti->ext.ext_params, 0); - rc = deliver_ckc_interrupt(vcpu); - break; - case KVM_S390_INT_CPU_TIMER: - trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, - inti->ext.ext_params, 0); - rc = put_guest_lc(vcpu, EXT_IRQ_CPU_TIMER, - (u16 *)__LC_EXT_INT_CODE); - rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW, - &vcpu->arch.sie_block->gpsw, - sizeof(psw_t)); - rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW, - &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); - rc |= put_guest_lc(vcpu, inti->ext.ext_params, - (u32 *)__LC_EXT_PARAMS); - break; case KVM_S390_INT_SERVICE: - VCPU_EVENT(vcpu, 4, "interrupt: sclp parm:%x", - inti->ext.ext_params); - vcpu->stat.deliver_service_signal++; - trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, - inti->ext.ext_params, 0); - rc = put_guest_lc(vcpu, 0x2401, (u16 *)__LC_EXT_INT_CODE); - rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW, - &vcpu->arch.sie_block->gpsw, - sizeof(psw_t)); - rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW, - &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); - rc |= put_guest_lc(vcpu, inti->ext.ext_params, - (u32 *)__LC_EXT_PARAMS); - break; - case KVM_S390_INT_PFAULT_INIT: - trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, 0, - inti->ext.ext_params2); - rc = put_guest_lc(vcpu, EXT_IRQ_CP_SERVICE, - (u16 *) __LC_EXT_INT_CODE); - rc |= put_guest_lc(vcpu, PFAULT_INIT, (u16 *) __LC_EXT_CPU_ADDR); - rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW, - &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); - rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW, - &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); - rc |= put_guest_lc(vcpu, inti->ext.ext_params2, - (u64 *) __LC_EXT_PARAMS2); + rc = __deliver_service(vcpu, inti); break; case KVM_S390_INT_PFAULT_DONE: - trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, 0, - inti->ext.ext_params2); - rc = put_guest_lc(vcpu, 0x2603, (u16 *)__LC_EXT_INT_CODE); - rc |= put_guest_lc(vcpu, 0x0680, (u16 *)__LC_EXT_CPU_ADDR); - rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW, - &vcpu->arch.sie_block->gpsw, - sizeof(psw_t)); - rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW, - &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); - rc |= put_guest_lc(vcpu, inti->ext.ext_params2, - (u64 *)__LC_EXT_PARAMS2); + rc = __deliver_pfault_done(vcpu, inti); break; case KVM_S390_INT_VIRTIO: - VCPU_EVENT(vcpu, 4, "interrupt: virtio parm:%x,parm64:%llx", - inti->ext.ext_params, inti->ext.ext_params2); - vcpu->stat.deliver_virtio_interrupt++; - trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, - inti->ext.ext_params, - inti->ext.ext_params2); - rc = put_guest_lc(vcpu, 0x2603, (u16 *)__LC_EXT_INT_CODE); - rc |= put_guest_lc(vcpu, 0x0d00, (u16 *)__LC_EXT_CPU_ADDR); - rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW, - &vcpu->arch.sie_block->gpsw, - sizeof(psw_t)); - rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW, - &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); - rc |= put_guest_lc(vcpu, inti->ext.ext_params, - (u32 *)__LC_EXT_PARAMS); - rc |= put_guest_lc(vcpu, inti->ext.ext_params2, - (u64 *)__LC_EXT_PARAMS2); - break; - case KVM_S390_SIGP_STOP: - VCPU_EVENT(vcpu, 4, "%s", "interrupt: cpu stop"); - vcpu->stat.deliver_stop_signal++; - trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, - 0, 0); - __set_intercept_indicator(vcpu, inti); - break; - - case KVM_S390_SIGP_SET_PREFIX: - VCPU_EVENT(vcpu, 4, "interrupt: set prefix to %x", - inti->prefix.address); - vcpu->stat.deliver_prefix_signal++; - trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, - inti->prefix.address, 0); - kvm_s390_set_prefix(vcpu, inti->prefix.address); - break; - - case KVM_S390_RESTART: - VCPU_EVENT(vcpu, 4, "%s", "interrupt: cpu restart"); - vcpu->stat.deliver_restart_signal++; - trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, - 0, 0); - rc = write_guest_lc(vcpu, - offsetof(struct _lowcore, restart_old_psw), - &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); - rc |= read_guest_lc(vcpu, offsetof(struct _lowcore, restart_psw), - &vcpu->arch.sie_block->gpsw, - sizeof(psw_t)); + rc = __deliver_virtio(vcpu, inti); break; - case KVM_S390_PROGRAM_INT: - VCPU_EVENT(vcpu, 4, "interrupt: pgm check code:%x, ilc:%x", - inti->pgm.code, - table[vcpu->arch.sie_block->ipa >> 14]); - vcpu->stat.deliver_program_int++; - trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, - inti->pgm.code, 0); - rc = __deliver_prog_irq(vcpu, &inti->pgm); - break; - case KVM_S390_MCHK: - VCPU_EVENT(vcpu, 4, "interrupt: machine check mcic=%llx", - inti->mchk.mcic); - trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, - inti->mchk.cr14, - inti->mchk.mcic); - rc = kvm_s390_vcpu_store_status(vcpu, - KVM_S390_STORE_STATUS_PREFIXED); - rc |= put_guest_lc(vcpu, inti->mchk.mcic, (u64 *)__LC_MCCK_CODE); - rc |= write_guest_lc(vcpu, __LC_MCK_OLD_PSW, - &vcpu->arch.sie_block->gpsw, - sizeof(psw_t)); - rc |= read_guest_lc(vcpu, __LC_MCK_NEW_PSW, - &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + rc = __deliver_mchk_floating(vcpu, inti); break; - case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: - { - __u32 param0 = ((__u32)inti->io.subchannel_id << 16) | - inti->io.subchannel_nr; - __u64 param1 = ((__u64)inti->io.io_int_parm << 32) | - inti->io.io_int_word; - VCPU_EVENT(vcpu, 4, "interrupt: I/O %llx", inti->type); - vcpu->stat.deliver_io_int++; - trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, - param0, param1); - rc = put_guest_lc(vcpu, inti->io.subchannel_id, - (u16 *)__LC_SUBCHANNEL_ID); - rc |= put_guest_lc(vcpu, inti->io.subchannel_nr, - (u16 *)__LC_SUBCHANNEL_NR); - rc |= put_guest_lc(vcpu, inti->io.io_int_parm, - (u32 *)__LC_IO_INT_PARM); - rc |= put_guest_lc(vcpu, inti->io.io_int_word, - (u32 *)__LC_IO_INT_WORD); - rc |= write_guest_lc(vcpu, __LC_IO_OLD_PSW, - &vcpu->arch.sie_block->gpsw, - sizeof(psw_t)); - rc |= read_guest_lc(vcpu, __LC_IO_NEW_PSW, - &vcpu->arch.sie_block->gpsw, - sizeof(psw_t)); + rc = __deliver_io(vcpu, inti); break; - } default: BUG(); } @@ -509,19 +738,6 @@ static int __must_check __do_deliver_interrupt(struct kvm_vcpu *vcpu, return rc; } -static int __must_check deliver_ckc_interrupt(struct kvm_vcpu *vcpu) -{ - int rc; - - rc = put_guest_lc(vcpu, 0x1004, (u16 __user *)__LC_EXT_INT_CODE); - rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW, - &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); - rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW, - &vcpu->arch.sie_block->gpsw, - sizeof(psw_t)); - return rc; -} - /* Check whether SIGP interpretation facility has an external call pending */ int kvm_s390_si_ext_call_pending(struct kvm_vcpu *vcpu) { @@ -538,20 +754,11 @@ int kvm_s390_si_ext_call_pending(struct kvm_vcpu *vcpu) int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu) { - struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; struct kvm_s390_float_interrupt *fi = vcpu->arch.local_int.float_int; struct kvm_s390_interrupt_info *inti; - int rc = 0; + int rc; - if (atomic_read(&li->active)) { - spin_lock(&li->lock); - list_for_each_entry(inti, &li->list, list) - if (__interrupt_is_deliverable(vcpu, inti)) { - rc = 1; - break; - } - spin_unlock(&li->lock); - } + rc = !!deliverable_local_irqs(vcpu); if ((!rc) && atomic_read(&fi->active)) { spin_lock(&fi->lock); @@ -643,18 +850,15 @@ enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer) void kvm_s390_clear_local_irqs(struct kvm_vcpu *vcpu) { struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; - struct kvm_s390_interrupt_info *n, *inti = NULL; spin_lock(&li->lock); - list_for_each_entry_safe(inti, n, &li->list, list) { - list_del(&inti->list); - kfree(inti); - } - atomic_set(&li->active, 0); + li->pending_irqs = 0; + bitmap_zero(li->sigp_emerg_pending, KVM_MAX_VCPUS); + memset(&li->irq, 0, sizeof(li->irq)); spin_unlock(&li->lock); /* clear pending external calls set by sigp interpretation facility */ - atomic_clear_mask(CPUSTAT_ECALL_PEND, &vcpu->arch.sie_block->cpuflags); + atomic_clear_mask(CPUSTAT_ECALL_PEND, li->cpuflags); atomic_clear_mask(SIGP_CTRL_C, &vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].ctrl); } @@ -664,34 +868,35 @@ int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu) struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; struct kvm_s390_float_interrupt *fi = vcpu->arch.local_int.float_int; struct kvm_s390_interrupt_info *n, *inti = NULL; + deliver_irq_t func; int deliver; int rc = 0; + unsigned long irq_type; + unsigned long deliverable_irqs; __reset_intercept_indicators(vcpu); - if (atomic_read(&li->active)) { - do { - deliver = 0; - spin_lock(&li->lock); - list_for_each_entry_safe(inti, n, &li->list, list) { - if (__interrupt_is_deliverable(vcpu, inti)) { - list_del(&inti->list); - deliver = 1; - break; - } - __set_intercept_indicator(vcpu, inti); - } - if (list_empty(&li->list)) - atomic_set(&li->active, 0); - spin_unlock(&li->lock); - if (deliver) { - rc = __do_deliver_interrupt(vcpu, inti); - kfree(inti); - } - } while (!rc && deliver); - } - if (!rc && kvm_cpu_has_pending_timer(vcpu)) - rc = deliver_ckc_interrupt(vcpu); + /* pending ckc conditions might have been invalidated */ + clear_bit(IRQ_PEND_EXT_CLOCK_COMP, &li->pending_irqs); + if (kvm_cpu_has_pending_timer(vcpu)) + set_bit(IRQ_PEND_EXT_CLOCK_COMP, &li->pending_irqs); + + do { + deliverable_irqs = deliverable_local_irqs(vcpu); + /* bits are in the order of interrupt priority */ + irq_type = find_first_bit(&deliverable_irqs, IRQ_PEND_COUNT); + if (irq_type == IRQ_PEND_COUNT) + break; + func = deliver_irq_funcs[irq_type]; + if (!func) { + WARN_ON_ONCE(func == NULL); + clear_bit(irq_type, &li->pending_irqs); + continue; + } + rc = func(vcpu); + } while (!rc && irq_type != IRQ_PEND_COUNT); + + set_intercept_indicators_local(vcpu); if (!rc && atomic_read(&fi->active)) { do { @@ -710,7 +915,7 @@ int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu) atomic_set(&fi->active, 0); spin_unlock(&fi->lock); if (deliver) { - rc = __do_deliver_interrupt(vcpu, inti); + rc = __deliver_floating_interrupt(vcpu, inti); kfree(inti); } } while (!rc && deliver); @@ -719,23 +924,26 @@ int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu) return rc; } -int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code) +static int __inject_prog(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) { struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; - struct kvm_s390_interrupt_info *inti; - inti = kzalloc(sizeof(*inti), GFP_KERNEL); - if (!inti) - return -ENOMEM; + li->irq.pgm = irq->u.pgm; + set_bit(IRQ_PEND_PROG, &li->pending_irqs); + return 0; +} - inti->type = KVM_S390_PROGRAM_INT; - inti->pgm.code = code; +int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code) +{ + struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; + struct kvm_s390_irq irq; VCPU_EVENT(vcpu, 3, "inject: program check %d (from kernel)", code); - trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, inti->type, code, 0, 1); + trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_PROGRAM_INT, code, + 0, 1); spin_lock(&li->lock); - list_add(&inti->list, &li->list); - atomic_set(&li->active, 1); + irq.u.pgm.code = code; + __inject_prog(vcpu, &irq); BUG_ON(waitqueue_active(li->wq)); spin_unlock(&li->lock); return 0; @@ -745,27 +953,166 @@ int kvm_s390_inject_prog_irq(struct kvm_vcpu *vcpu, struct kvm_s390_pgm_info *pgm_info) { struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; - struct kvm_s390_interrupt_info *inti; - - inti = kzalloc(sizeof(*inti), GFP_KERNEL); - if (!inti) - return -ENOMEM; + struct kvm_s390_irq irq; + int rc; VCPU_EVENT(vcpu, 3, "inject: prog irq %d (from kernel)", pgm_info->code); trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_PROGRAM_INT, pgm_info->code, 0, 1); - - inti->type = KVM_S390_PROGRAM_INT; - memcpy(&inti->pgm, pgm_info, sizeof(inti->pgm)); spin_lock(&li->lock); - list_add(&inti->list, &li->list); - atomic_set(&li->active, 1); + irq.u.pgm = *pgm_info; + rc = __inject_prog(vcpu, &irq); BUG_ON(waitqueue_active(li->wq)); spin_unlock(&li->lock); + return rc; +} + +static int __inject_pfault_init(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) +{ + struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; + + VCPU_EVENT(vcpu, 3, "inject: external irq params:%x, params2:%llx", + irq->u.ext.ext_params, irq->u.ext.ext_params2); + trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_PFAULT_INIT, + irq->u.ext.ext_params, + irq->u.ext.ext_params2, 2); + + li->irq.ext = irq->u.ext; + set_bit(IRQ_PEND_PFAULT_INIT, &li->pending_irqs); + atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); return 0; } +int __inject_extcall(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) +{ + struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; + struct kvm_s390_extcall_info *extcall = &li->irq.extcall; + + VCPU_EVENT(vcpu, 3, "inject: external call source-cpu:%u", + irq->u.extcall.code); + trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_EXTERNAL_CALL, + irq->u.extcall.code, 0, 2); + + *extcall = irq->u.extcall; + set_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs); + atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); + return 0; +} + +static int __inject_set_prefix(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) +{ + struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; + struct kvm_s390_prefix_info *prefix = &li->irq.prefix; + + VCPU_EVENT(vcpu, 3, "inject: set prefix to %x (from user)", + prefix->address); + trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_SIGP_SET_PREFIX, + prefix->address, 0, 2); + + *prefix = irq->u.prefix; + set_bit(IRQ_PEND_SET_PREFIX, &li->pending_irqs); + return 0; +} + +static int __inject_sigp_stop(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) +{ + struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; + + trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_SIGP_STOP, 0, 0, 2); + + li->action_bits |= ACTION_STOP_ON_STOP; + set_bit(IRQ_PEND_SIGP_STOP, &li->pending_irqs); + return 0; +} + +static int __inject_sigp_restart(struct kvm_vcpu *vcpu, + struct kvm_s390_irq *irq) +{ + struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; + + VCPU_EVENT(vcpu, 3, "inject: restart type %llx", irq->type); + trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_RESTART, 0, 0, 2); + + set_bit(IRQ_PEND_RESTART, &li->pending_irqs); + return 0; +} + +static int __inject_sigp_emergency(struct kvm_vcpu *vcpu, + struct kvm_s390_irq *irq) +{ + struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; + struct kvm_s390_emerg_info *emerg = &li->irq.emerg; + + VCPU_EVENT(vcpu, 3, "inject: emergency %u\n", + irq->u.emerg.code); + trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_EMERGENCY, + emerg->code, 0, 2); + + set_bit(emerg->code, li->sigp_emerg_pending); + set_bit(IRQ_PEND_EXT_EMERGENCY, &li->pending_irqs); + atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); + return 0; +} + +static int __inject_mchk(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) +{ + struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; + struct kvm_s390_mchk_info *mchk = &li->irq.mchk; + + VCPU_EVENT(vcpu, 5, "inject: machine check parm64:%llx", + mchk->mcic); + trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_MCHK, 0, + mchk->mcic, 2); + + /* + * Because repressible machine checks can be indicated along with + * exigent machine checks (PoP, Chapter 11, Interruption action) + * we need to combine cr14, mcic and external damage code. + * Failing storage address and the logout area should not be or'ed + * together, we just indicate the last occurrence of the corresponding + * machine check + */ + mchk->cr14 |= irq->u.mchk.cr14; + mchk->mcic |= irq->u.mchk.mcic; + mchk->ext_damage_code |= irq->u.mchk.ext_damage_code; + mchk->failing_storage_address = irq->u.mchk.failing_storage_address; + memcpy(&mchk->fixed_logout, &irq->u.mchk.fixed_logout, + sizeof(mchk->fixed_logout)); + if (mchk->mcic & MCHK_EX_MASK) + set_bit(IRQ_PEND_MCHK_EX, &li->pending_irqs); + else if (mchk->mcic & MCHK_REP_MASK) + set_bit(IRQ_PEND_MCHK_REP, &li->pending_irqs); + return 0; +} + +static int __inject_ckc(struct kvm_vcpu *vcpu) +{ + struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; + + VCPU_EVENT(vcpu, 3, "inject: type %x", KVM_S390_INT_CLOCK_COMP); + trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_CLOCK_COMP, + 0, 0, 2); + + set_bit(IRQ_PEND_EXT_CLOCK_COMP, &li->pending_irqs); + atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); + return 0; +} + +static int __inject_cpu_timer(struct kvm_vcpu *vcpu) +{ + struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; + + VCPU_EVENT(vcpu, 3, "inject: type %x", KVM_S390_INT_CPU_TIMER); + trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_CPU_TIMER, + 0, 0, 2); + + set_bit(IRQ_PEND_EXT_CPU_TIMER, &li->pending_irqs); + atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); + return 0; +} + + struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm, u64 cr6, u64 schid) { @@ -851,7 +1198,17 @@ static int __inject_vm(struct kvm *kvm, struct kvm_s390_interrupt_info *inti) dst_vcpu = kvm_get_vcpu(kvm, sigcpu); li = &dst_vcpu->arch.local_int; spin_lock(&li->lock); - atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); + switch (inti->type) { + case KVM_S390_MCHK: + atomic_set_mask(CPUSTAT_STOP_INT, li->cpuflags); + break; + case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: + atomic_set_mask(CPUSTAT_IO_INT, li->cpuflags); + break; + default: + atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); + break; + } spin_unlock(&li->lock); kvm_s390_vcpu_wakeup(kvm_get_vcpu(kvm, sigcpu)); unlock_fi: @@ -920,92 +1277,85 @@ void kvm_s390_reinject_io_int(struct kvm *kvm, __inject_vm(kvm, inti); } -int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, - struct kvm_s390_interrupt *s390int) +int s390int_to_s390irq(struct kvm_s390_interrupt *s390int, + struct kvm_s390_irq *irq) { - struct kvm_s390_local_interrupt *li; - struct kvm_s390_interrupt_info *inti; + irq->type = s390int->type; + switch (irq->type) { + case KVM_S390_PROGRAM_INT: + if (s390int->parm & 0xffff0000) + return -EINVAL; + irq->u.pgm.code = s390int->parm; + break; + case KVM_S390_SIGP_SET_PREFIX: + irq->u.prefix.address = s390int->parm; + break; + case KVM_S390_INT_EXTERNAL_CALL: + if (irq->u.extcall.code & 0xffff0000) + return -EINVAL; + irq->u.extcall.code = s390int->parm; + break; + case KVM_S390_INT_EMERGENCY: + if (irq->u.emerg.code & 0xffff0000) + return -EINVAL; + irq->u.emerg.code = s390int->parm; + break; + case KVM_S390_MCHK: + irq->u.mchk.mcic = s390int->parm64; + break; + } + return 0; +} - inti = kzalloc(sizeof(*inti), GFP_KERNEL); - if (!inti) - return -ENOMEM; +int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) +{ + struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; + int rc; - switch (s390int->type) { + spin_lock(&li->lock); + switch (irq->type) { case KVM_S390_PROGRAM_INT: - if (s390int->parm & 0xffff0000) { - kfree(inti); - return -EINVAL; - } - inti->type = s390int->type; - inti->pgm.code = s390int->parm; VCPU_EVENT(vcpu, 3, "inject: program check %d (from user)", - s390int->parm); + irq->u.pgm.code); + rc = __inject_prog(vcpu, irq); break; case KVM_S390_SIGP_SET_PREFIX: - inti->prefix.address = s390int->parm; - inti->type = s390int->type; - VCPU_EVENT(vcpu, 3, "inject: set prefix to %x (from user)", - s390int->parm); + rc = __inject_set_prefix(vcpu, irq); break; case KVM_S390_SIGP_STOP: + rc = __inject_sigp_stop(vcpu, irq); + break; case KVM_S390_RESTART: + rc = __inject_sigp_restart(vcpu, irq); + break; case KVM_S390_INT_CLOCK_COMP: + rc = __inject_ckc(vcpu); + break; case KVM_S390_INT_CPU_TIMER: - VCPU_EVENT(vcpu, 3, "inject: type %x", s390int->type); - inti->type = s390int->type; + rc = __inject_cpu_timer(vcpu); break; case KVM_S390_INT_EXTERNAL_CALL: - if (s390int->parm & 0xffff0000) { - kfree(inti); - return -EINVAL; - } - VCPU_EVENT(vcpu, 3, "inject: external call source-cpu:%u", - s390int->parm); - inti->type = s390int->type; - inti->extcall.code = s390int->parm; + rc = __inject_extcall(vcpu, irq); break; case KVM_S390_INT_EMERGENCY: - if (s390int->parm & 0xffff0000) { - kfree(inti); - return -EINVAL; - } - VCPU_EVENT(vcpu, 3, "inject: emergency %u\n", s390int->parm); - inti->type = s390int->type; - inti->emerg.code = s390int->parm; + rc = __inject_sigp_emergency(vcpu, irq); break; case KVM_S390_MCHK: - VCPU_EVENT(vcpu, 5, "inject: machine check parm64:%llx", - s390int->parm64); - inti->type = s390int->type; - inti->mchk.mcic = s390int->parm64; + rc = __inject_mchk(vcpu, irq); break; case KVM_S390_INT_PFAULT_INIT: - inti->type = s390int->type; - inti->ext.ext_params2 = s390int->parm64; + rc = __inject_pfault_init(vcpu, irq); break; case KVM_S390_INT_VIRTIO: case KVM_S390_INT_SERVICE: case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: default: - kfree(inti); - return -EINVAL; + rc = -EINVAL; } - trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, s390int->type, s390int->parm, - s390int->parm64, 2); - - li = &vcpu->arch.local_int; - spin_lock(&li->lock); - if (inti->type == KVM_S390_PROGRAM_INT) - list_add(&inti->list, &li->list); - else - list_add_tail(&inti->list, &li->list); - atomic_set(&li->active, 1); - if (inti->type == KVM_S390_SIGP_STOP) - li->action_bits |= ACTION_STOP_ON_STOP; - atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); spin_unlock(&li->lock); - kvm_s390_vcpu_wakeup(vcpu); - return 0; + if (!rc) + kvm_s390_vcpu_wakeup(vcpu); + return rc; } void kvm_s390_clear_float_irqs(struct kvm *kvm) diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 6b049ee75a5..3e09801e310 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -81,10 +81,17 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) }, { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) }, { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) }, + { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) }, + { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) }, { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) }, + { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) }, + { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) }, { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) }, { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) }, { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) }, + { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) }, + { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) }, + { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) }, { "diagnose_10", VCPU_STAT(diagnose_10) }, { "diagnose_44", VCPU_STAT(diagnose_44) }, { "diagnose_9c", VCPU_STAT(diagnose_9c) }, @@ -453,6 +460,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) spin_lock_init(&kvm->arch.float_int.lock); INIT_LIST_HEAD(&kvm->arch.float_int.list); init_waitqueue_head(&kvm->arch.ipte_wq); + mutex_init(&kvm->arch.ipte_mutex); debug_register_view(kvm->arch.dbf, &debug_sprintf_view); VM_EVENT(kvm, 3, "%s", "vm created"); @@ -711,7 +719,6 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, } spin_lock_init(&vcpu->arch.local_int.lock); - INIT_LIST_HEAD(&vcpu->arch.local_int.list); vcpu->arch.local_int.float_int = &kvm->arch.float_int; vcpu->arch.local_int.wq = &vcpu->wq; vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags; @@ -1114,13 +1121,15 @@ static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token, unsigned long token) { struct kvm_s390_interrupt inti; - inti.parm64 = token; + struct kvm_s390_irq irq; if (start_token) { - inti.type = KVM_S390_INT_PFAULT_INIT; - WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &inti)); + irq.u.ext.ext_params2 = token; + irq.type = KVM_S390_INT_PFAULT_INIT; + WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq)); } else { inti.type = KVM_S390_INT_PFAULT_DONE; + inti.parm64 = token; WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti)); } } @@ -1614,11 +1623,14 @@ long kvm_arch_vcpu_ioctl(struct file *filp, switch (ioctl) { case KVM_S390_INTERRUPT: { struct kvm_s390_interrupt s390int; + struct kvm_s390_irq s390irq; r = -EFAULT; if (copy_from_user(&s390int, argp, sizeof(s390int))) break; - r = kvm_s390_inject_vcpu(vcpu, &s390int); + if (s390int_to_s390irq(&s390int, &s390irq)) + return -EINVAL; + r = kvm_s390_inject_vcpu(vcpu, &s390irq); break; } case KVM_S390_STORE_STATUS: diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index 244d0230318..a8f3d9b71c1 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -24,8 +24,6 @@ typedef int (*intercept_handler_t)(struct kvm_vcpu *vcpu); /* declare vfacilities extern */ extern unsigned long *vfacilities; -int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu); - /* Transactional Memory Execution related macros */ #define IS_TE_ENABLED(vcpu) ((vcpu->arch.sie_block->ecb & 0x10)) #define TDB_FORMAT1 1 @@ -144,7 +142,7 @@ void kvm_s390_clear_float_irqs(struct kvm *kvm); int __must_check kvm_s390_inject_vm(struct kvm *kvm, struct kvm_s390_interrupt *s390int); int __must_check kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, - struct kvm_s390_interrupt *s390int); + struct kvm_s390_irq *irq); int __must_check kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code); struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm, u64 cr6, u64 schid); @@ -152,6 +150,10 @@ void kvm_s390_reinject_io_int(struct kvm *kvm, struct kvm_s390_interrupt_info *inti); int kvm_s390_mask_adapter(struct kvm *kvm, unsigned int id, bool masked); +/* implemented in intercept.c */ +void kvm_s390_rewind_psw(struct kvm_vcpu *vcpu, int ilc); +int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu); + /* implemented in priv.c */ int is_valid_psw(psw_t *psw); int kvm_s390_handle_b2(struct kvm_vcpu *vcpu); @@ -222,6 +224,9 @@ static inline int kvm_s390_inject_prog_cond(struct kvm_vcpu *vcpu, int rc) return kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm); } +int s390int_to_s390irq(struct kvm_s390_interrupt *s390int, + struct kvm_s390_irq *s390irq); + /* implemented in interrupt.c */ int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu); int psw_extint_disabled(struct kvm_vcpu *vcpu); diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index f47cb0c6d90..1be578d64df 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -180,21 +180,18 @@ static int handle_skey(struct kvm_vcpu *vcpu) if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); - vcpu->arch.sie_block->gpsw.addr = - __rewind_psw(vcpu->arch.sie_block->gpsw, 4); + kvm_s390_rewind_psw(vcpu, 4); VCPU_EVENT(vcpu, 4, "%s", "retrying storage key operation"); return 0; } static int handle_ipte_interlock(struct kvm_vcpu *vcpu) { - psw_t *psw = &vcpu->arch.sie_block->gpsw; - vcpu->stat.instruction_ipte_interlock++; - if (psw_bits(*psw).p) + if (psw_bits(vcpu->arch.sie_block->gpsw).p) return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); wait_event(vcpu->kvm->arch.ipte_wq, !ipte_lock_held(vcpu)); - psw->addr = __rewind_psw(*psw, 4); + kvm_s390_rewind_psw(vcpu, 4); VCPU_EVENT(vcpu, 4, "%s", "retrying ipte interlock operation"); return 0; } @@ -650,10 +647,7 @@ static int handle_pfmf(struct kvm_vcpu *vcpu) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); start = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK; - if (vcpu->run->s.regs.gprs[reg1] & PFMF_CF) { - if (kvm_s390_check_low_addr_protection(vcpu, start)) - return kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm); - } + start = kvm_s390_logical_to_effective(vcpu, start); switch (vcpu->run->s.regs.gprs[reg1] & PFMF_FSC) { case 0x00000000: @@ -669,6 +663,12 @@ static int handle_pfmf(struct kvm_vcpu *vcpu) default: return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); } + + if (vcpu->run->s.regs.gprs[reg1] & PFMF_CF) { + if (kvm_s390_check_low_addr_protection(vcpu, start)) + return kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm); + } + while (start < end) { unsigned long useraddr, abs_addr; @@ -725,8 +725,7 @@ static int handle_essa(struct kvm_vcpu *vcpu) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); /* Rewind PSW to repeat the ESSA instruction */ - vcpu->arch.sie_block->gpsw.addr = - __rewind_psw(vcpu->arch.sie_block->gpsw, 4); + kvm_s390_rewind_psw(vcpu, 4); vcpu->arch.sie_block->cbrlo &= PAGE_MASK; /* reset nceo */ cbrlo = phys_to_virt(vcpu->arch.sie_block->cbrlo); down_read(&gmap->mm->mmap_sem); @@ -769,8 +768,8 @@ int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu) { int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4; int reg3 = vcpu->arch.sie_block->ipa & 0x000f; - u32 val = 0; - int reg, rc; + int reg, rc, nr_regs; + u32 ctl_array[16]; u64 ga; vcpu->stat.instruction_lctl++; @@ -786,19 +785,20 @@ int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu) VCPU_EVENT(vcpu, 5, "lctl r1:%x, r3:%x, addr:%llx", reg1, reg3, ga); trace_kvm_s390_handle_lctl(vcpu, 0, reg1, reg3, ga); + nr_regs = ((reg3 - reg1) & 0xf) + 1; + rc = read_guest(vcpu, ga, ctl_array, nr_regs * sizeof(u32)); + if (rc) + return kvm_s390_inject_prog_cond(vcpu, rc); reg = reg1; + nr_regs = 0; do { - rc = read_guest(vcpu, ga, &val, sizeof(val)); - if (rc) - return kvm_s390_inject_prog_cond(vcpu, rc); vcpu->arch.sie_block->gcr[reg] &= 0xffffffff00000000ul; - vcpu->arch.sie_block->gcr[reg] |= val; - ga += 4; + vcpu->arch.sie_block->gcr[reg] |= ctl_array[nr_regs++]; if (reg == reg3) break; reg = (reg + 1) % 16; } while (1); - + kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); return 0; } @@ -806,9 +806,9 @@ int kvm_s390_handle_stctl(struct kvm_vcpu *vcpu) { int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4; int reg3 = vcpu->arch.sie_block->ipa & 0x000f; + int reg, rc, nr_regs; + u32 ctl_array[16]; u64 ga; - u32 val; - int reg, rc; vcpu->stat.instruction_stctl++; @@ -824,26 +824,24 @@ int kvm_s390_handle_stctl(struct kvm_vcpu *vcpu) trace_kvm_s390_handle_stctl(vcpu, 0, reg1, reg3, ga); reg = reg1; + nr_regs = 0; do { - val = vcpu->arch.sie_block->gcr[reg] & 0x00000000fffffffful; - rc = write_guest(vcpu, ga, &val, sizeof(val)); - if (rc) - return kvm_s390_inject_prog_cond(vcpu, rc); - ga += 4; + ctl_array[nr_regs++] = vcpu->arch.sie_block->gcr[reg]; if (reg == reg3) break; reg = (reg + 1) % 16; } while (1); - - return 0; + rc = write_guest(vcpu, ga, ctl_array, nr_regs * sizeof(u32)); + return rc ? kvm_s390_inject_prog_cond(vcpu, rc) : 0; } static int handle_lctlg(struct kvm_vcpu *vcpu) { int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4; int reg3 = vcpu->arch.sie_block->ipa & 0x000f; - u64 ga, val; - int reg, rc; + int reg, rc, nr_regs; + u64 ctl_array[16]; + u64 ga; vcpu->stat.instruction_lctlg++; @@ -855,22 +853,22 @@ static int handle_lctlg(struct kvm_vcpu *vcpu) if (ga & 7) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - reg = reg1; - VCPU_EVENT(vcpu, 5, "lctlg r1:%x, r3:%x, addr:%llx", reg1, reg3, ga); trace_kvm_s390_handle_lctl(vcpu, 1, reg1, reg3, ga); + nr_regs = ((reg3 - reg1) & 0xf) + 1; + rc = read_guest(vcpu, ga, ctl_array, nr_regs * sizeof(u64)); + if (rc) + return kvm_s390_inject_prog_cond(vcpu, rc); + reg = reg1; + nr_regs = 0; do { - rc = read_guest(vcpu, ga, &val, sizeof(val)); - if (rc) - return kvm_s390_inject_prog_cond(vcpu, rc); - vcpu->arch.sie_block->gcr[reg] = val; - ga += 8; + vcpu->arch.sie_block->gcr[reg] = ctl_array[nr_regs++]; if (reg == reg3) break; reg = (reg + 1) % 16; } while (1); - + kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); return 0; } @@ -878,8 +876,9 @@ static int handle_stctg(struct kvm_vcpu *vcpu) { int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4; int reg3 = vcpu->arch.sie_block->ipa & 0x000f; - u64 ga, val; - int reg, rc; + int reg, rc, nr_regs; + u64 ctl_array[16]; + u64 ga; vcpu->stat.instruction_stctg++; @@ -891,23 +890,19 @@ static int handle_stctg(struct kvm_vcpu *vcpu) if (ga & 7) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - reg = reg1; - VCPU_EVENT(vcpu, 5, "stctg r1:%x, r3:%x, addr:%llx", reg1, reg3, ga); trace_kvm_s390_handle_stctl(vcpu, 1, reg1, reg3, ga); + reg = reg1; + nr_regs = 0; do { - val = vcpu->arch.sie_block->gcr[reg]; - rc = write_guest(vcpu, ga, &val, sizeof(val)); - if (rc) - return kvm_s390_inject_prog_cond(vcpu, rc); - ga += 8; + ctl_array[nr_regs++] = vcpu->arch.sie_block->gcr[reg]; if (reg == reg3) break; reg = (reg + 1) % 16; } while (1); - - return 0; + rc = write_guest(vcpu, ga, ctl_array, nr_regs * sizeof(u64)); + return rc ? kvm_s390_inject_prog_cond(vcpu, rc) : 0; } static const intercept_handler_t eb_handlers[256] = { diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c index cf243ba3d50..6651f9f7397 100644 --- a/arch/s390/kvm/sigp.c +++ b/arch/s390/kvm/sigp.c @@ -20,20 +20,13 @@ #include "kvm-s390.h" #include "trace.h" -static int __sigp_sense(struct kvm_vcpu *vcpu, u16 cpu_addr, +static int __sigp_sense(struct kvm_vcpu *vcpu, struct kvm_vcpu *dst_vcpu, u64 *reg) { struct kvm_s390_local_interrupt *li; - struct kvm_vcpu *dst_vcpu = NULL; int cpuflags; int rc; - if (cpu_addr >= KVM_MAX_VCPUS) - return SIGP_CC_NOT_OPERATIONAL; - - dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr); - if (!dst_vcpu) - return SIGP_CC_NOT_OPERATIONAL; li = &dst_vcpu->arch.local_int; cpuflags = atomic_read(li->cpuflags); @@ -48,55 +41,53 @@ static int __sigp_sense(struct kvm_vcpu *vcpu, u16 cpu_addr, rc = SIGP_CC_STATUS_STORED; } - VCPU_EVENT(vcpu, 4, "sensed status of cpu %x rc %x", cpu_addr, rc); + VCPU_EVENT(vcpu, 4, "sensed status of cpu %x rc %x", dst_vcpu->vcpu_id, + rc); return rc; } -static int __sigp_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr) +static int __inject_sigp_emergency(struct kvm_vcpu *vcpu, + struct kvm_vcpu *dst_vcpu) { - struct kvm_s390_interrupt s390int = { + struct kvm_s390_irq irq = { .type = KVM_S390_INT_EMERGENCY, - .parm = vcpu->vcpu_id, + .u.emerg.code = vcpu->vcpu_id, }; - struct kvm_vcpu *dst_vcpu = NULL; int rc = 0; - if (cpu_addr < KVM_MAX_VCPUS) - dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr); - if (!dst_vcpu) - return SIGP_CC_NOT_OPERATIONAL; - - rc = kvm_s390_inject_vcpu(dst_vcpu, &s390int); + rc = kvm_s390_inject_vcpu(dst_vcpu, &irq); if (!rc) - VCPU_EVENT(vcpu, 4, "sent sigp emerg to cpu %x", cpu_addr); + VCPU_EVENT(vcpu, 4, "sent sigp emerg to cpu %x", + dst_vcpu->vcpu_id); return rc ? rc : SIGP_CC_ORDER_CODE_ACCEPTED; } -static int __sigp_conditional_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr, +static int __sigp_emergency(struct kvm_vcpu *vcpu, struct kvm_vcpu *dst_vcpu) +{ + return __inject_sigp_emergency(vcpu, dst_vcpu); +} + +static int __sigp_conditional_emergency(struct kvm_vcpu *vcpu, + struct kvm_vcpu *dst_vcpu, u16 asn, u64 *reg) { - struct kvm_vcpu *dst_vcpu = NULL; const u64 psw_int_mask = PSW_MASK_IO | PSW_MASK_EXT; u16 p_asn, s_asn; psw_t *psw; u32 flags; - if (cpu_addr < KVM_MAX_VCPUS) - dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr); - if (!dst_vcpu) - return SIGP_CC_NOT_OPERATIONAL; flags = atomic_read(&dst_vcpu->arch.sie_block->cpuflags); psw = &dst_vcpu->arch.sie_block->gpsw; p_asn = dst_vcpu->arch.sie_block->gcr[4] & 0xffff; /* Primary ASN */ s_asn = dst_vcpu->arch.sie_block->gcr[3] & 0xffff; /* Secondary ASN */ - /* Deliver the emergency signal? */ + /* Inject the emergency signal? */ if (!(flags & CPUSTAT_STOPPED) || (psw->mask & psw_int_mask) != psw_int_mask || ((flags & CPUSTAT_WAIT) && psw->addr != 0) || (!(flags & CPUSTAT_WAIT) && (asn == p_asn || asn == s_asn))) { - return __sigp_emergency(vcpu, cpu_addr); + return __inject_sigp_emergency(vcpu, dst_vcpu); } else { *reg &= 0xffffffff00000000UL; *reg |= SIGP_STATUS_INCORRECT_STATE; @@ -104,23 +95,19 @@ static int __sigp_conditional_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr, } } -static int __sigp_external_call(struct kvm_vcpu *vcpu, u16 cpu_addr) +static int __sigp_external_call(struct kvm_vcpu *vcpu, + struct kvm_vcpu *dst_vcpu) { - struct kvm_s390_interrupt s390int = { + struct kvm_s390_irq irq = { .type = KVM_S390_INT_EXTERNAL_CALL, - .parm = vcpu->vcpu_id, + .u.extcall.code = vcpu->vcpu_id, }; - struct kvm_vcpu *dst_vcpu = NULL; int rc; - if (cpu_addr < KVM_MAX_VCPUS) - dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr); - if (!dst_vcpu) - return SIGP_CC_NOT_OPERATIONAL; - - rc = kvm_s390_inject_vcpu(dst_vcpu, &s390int); + rc = kvm_s390_inject_vcpu(dst_vcpu, &irq); if (!rc) - VCPU_EVENT(vcpu, 4, "sent sigp ext call to cpu %x", cpu_addr); + VCPU_EVENT(vcpu, 4, "sent sigp ext call to cpu %x", + dst_vcpu->vcpu_id); return rc ? rc : SIGP_CC_ORDER_CODE_ACCEPTED; } @@ -128,29 +115,20 @@ static int __sigp_external_call(struct kvm_vcpu *vcpu, u16 cpu_addr) static int __inject_sigp_stop(struct kvm_vcpu *dst_vcpu, int action) { struct kvm_s390_local_interrupt *li = &dst_vcpu->arch.local_int; - struct kvm_s390_interrupt_info *inti; int rc = SIGP_CC_ORDER_CODE_ACCEPTED; - inti = kzalloc(sizeof(*inti), GFP_ATOMIC); - if (!inti) - return -ENOMEM; - inti->type = KVM_S390_SIGP_STOP; - spin_lock(&li->lock); if (li->action_bits & ACTION_STOP_ON_STOP) { /* another SIGP STOP is pending */ - kfree(inti); rc = SIGP_CC_BUSY; goto out; } if ((atomic_read(li->cpuflags) & CPUSTAT_STOPPED)) { - kfree(inti); if ((action & ACTION_STORE_ON_STOP) != 0) rc = -ESHUTDOWN; goto out; } - list_add_tail(&inti->list, &li->list); - atomic_set(&li->active, 1); + set_bit(IRQ_PEND_SIGP_STOP, &li->pending_irqs); li->action_bits |= action; atomic_set_mask(CPUSTAT_STOP_INT, li->cpuflags); kvm_s390_vcpu_wakeup(dst_vcpu); @@ -160,23 +138,27 @@ out: return rc; } -static int __sigp_stop(struct kvm_vcpu *vcpu, u16 cpu_addr, int action) +static int __sigp_stop(struct kvm_vcpu *vcpu, struct kvm_vcpu *dst_vcpu) { - struct kvm_vcpu *dst_vcpu = NULL; int rc; - if (cpu_addr >= KVM_MAX_VCPUS) - return SIGP_CC_NOT_OPERATIONAL; + rc = __inject_sigp_stop(dst_vcpu, ACTION_STOP_ON_STOP); + VCPU_EVENT(vcpu, 4, "sent sigp stop to cpu %x", dst_vcpu->vcpu_id); - dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr); - if (!dst_vcpu) - return SIGP_CC_NOT_OPERATIONAL; + return rc; +} - rc = __inject_sigp_stop(dst_vcpu, action); +static int __sigp_stop_and_store_status(struct kvm_vcpu *vcpu, + struct kvm_vcpu *dst_vcpu, u64 *reg) +{ + int rc; - VCPU_EVENT(vcpu, 4, "sent sigp stop to cpu %x", cpu_addr); + rc = __inject_sigp_stop(dst_vcpu, ACTION_STOP_ON_STOP | + ACTION_STORE_ON_STOP); + VCPU_EVENT(vcpu, 4, "sent sigp stop and store status to cpu %x", + dst_vcpu->vcpu_id); - if ((action & ACTION_STORE_ON_STOP) != 0 && rc == -ESHUTDOWN) { + if (rc == -ESHUTDOWN) { /* If the CPU has already been stopped, we still have * to save the status when doing stop-and-store. This * has to be done after unlocking all spinlocks. */ @@ -212,18 +194,12 @@ static int __sigp_set_arch(struct kvm_vcpu *vcpu, u32 parameter) return rc; } -static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address, - u64 *reg) +static int __sigp_set_prefix(struct kvm_vcpu *vcpu, struct kvm_vcpu *dst_vcpu, + u32 address, u64 *reg) { struct kvm_s390_local_interrupt *li; - struct kvm_vcpu *dst_vcpu = NULL; - struct kvm_s390_interrupt_info *inti; int rc; - if (cpu_addr < KVM_MAX_VCPUS) - dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr); - if (!dst_vcpu) - return SIGP_CC_NOT_OPERATIONAL; li = &dst_vcpu->arch.local_int; /* @@ -238,46 +214,34 @@ static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address, return SIGP_CC_STATUS_STORED; } - inti = kzalloc(sizeof(*inti), GFP_KERNEL); - if (!inti) - return SIGP_CC_BUSY; - spin_lock(&li->lock); /* cpu must be in stopped state */ if (!(atomic_read(li->cpuflags) & CPUSTAT_STOPPED)) { *reg &= 0xffffffff00000000UL; *reg |= SIGP_STATUS_INCORRECT_STATE; rc = SIGP_CC_STATUS_STORED; - kfree(inti); goto out_li; } - inti->type = KVM_S390_SIGP_SET_PREFIX; - inti->prefix.address = address; - - list_add_tail(&inti->list, &li->list); - atomic_set(&li->active, 1); + li->irq.prefix.address = address; + set_bit(IRQ_PEND_SET_PREFIX, &li->pending_irqs); kvm_s390_vcpu_wakeup(dst_vcpu); rc = SIGP_CC_ORDER_CODE_ACCEPTED; - VCPU_EVENT(vcpu, 4, "set prefix of cpu %02x to %x", cpu_addr, address); + VCPU_EVENT(vcpu, 4, "set prefix of cpu %02x to %x", dst_vcpu->vcpu_id, + address); out_li: spin_unlock(&li->lock); return rc; } -static int __sigp_store_status_at_addr(struct kvm_vcpu *vcpu, u16 cpu_id, - u32 addr, u64 *reg) +static int __sigp_store_status_at_addr(struct kvm_vcpu *vcpu, + struct kvm_vcpu *dst_vcpu, + u32 addr, u64 *reg) { - struct kvm_vcpu *dst_vcpu = NULL; int flags; int rc; - if (cpu_id < KVM_MAX_VCPUS) - dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_id); - if (!dst_vcpu) - return SIGP_CC_NOT_OPERATIONAL; - spin_lock(&dst_vcpu->arch.local_int.lock); flags = atomic_read(dst_vcpu->arch.local_int.cpuflags); spin_unlock(&dst_vcpu->arch.local_int.lock); @@ -297,19 +261,12 @@ static int __sigp_store_status_at_addr(struct kvm_vcpu *vcpu, u16 cpu_id, return rc; } -static int __sigp_sense_running(struct kvm_vcpu *vcpu, u16 cpu_addr, - u64 *reg) +static int __sigp_sense_running(struct kvm_vcpu *vcpu, + struct kvm_vcpu *dst_vcpu, u64 *reg) { struct kvm_s390_local_interrupt *li; - struct kvm_vcpu *dst_vcpu = NULL; int rc; - if (cpu_addr >= KVM_MAX_VCPUS) - return SIGP_CC_NOT_OPERATIONAL; - - dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr); - if (!dst_vcpu) - return SIGP_CC_NOT_OPERATIONAL; li = &dst_vcpu->arch.local_int; if (atomic_read(li->cpuflags) & CPUSTAT_RUNNING) { /* running */ @@ -321,26 +278,19 @@ static int __sigp_sense_running(struct kvm_vcpu *vcpu, u16 cpu_addr, rc = SIGP_CC_STATUS_STORED; } - VCPU_EVENT(vcpu, 4, "sensed running status of cpu %x rc %x", cpu_addr, - rc); + VCPU_EVENT(vcpu, 4, "sensed running status of cpu %x rc %x", + dst_vcpu->vcpu_id, rc); return rc; } -/* Test whether the destination CPU is available and not busy */ -static int sigp_check_callable(struct kvm_vcpu *vcpu, u16 cpu_addr) +static int __prepare_sigp_re_start(struct kvm_vcpu *vcpu, + struct kvm_vcpu *dst_vcpu, u8 order_code) { - struct kvm_s390_local_interrupt *li; - int rc = SIGP_CC_ORDER_CODE_ACCEPTED; - struct kvm_vcpu *dst_vcpu = NULL; - - if (cpu_addr >= KVM_MAX_VCPUS) - return SIGP_CC_NOT_OPERATIONAL; + struct kvm_s390_local_interrupt *li = &dst_vcpu->arch.local_int; + /* handle (RE)START in user space */ + int rc = -EOPNOTSUPP; - dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr); - if (!dst_vcpu) - return SIGP_CC_NOT_OPERATIONAL; - li = &dst_vcpu->arch.local_int; spin_lock(&li->lock); if (li->action_bits & ACTION_STOP_ON_STOP) rc = SIGP_CC_BUSY; @@ -349,90 +299,131 @@ static int sigp_check_callable(struct kvm_vcpu *vcpu, u16 cpu_addr) return rc; } -int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu) +static int __prepare_sigp_cpu_reset(struct kvm_vcpu *vcpu, + struct kvm_vcpu *dst_vcpu, u8 order_code) { - int r1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4; - int r3 = vcpu->arch.sie_block->ipa & 0x000f; - u32 parameter; - u16 cpu_addr = vcpu->run->s.regs.gprs[r3]; - u8 order_code; - int rc; + /* handle (INITIAL) CPU RESET in user space */ + return -EOPNOTSUPP; +} - /* sigp in userspace can exit */ - if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) - return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); +static int __prepare_sigp_unknown(struct kvm_vcpu *vcpu, + struct kvm_vcpu *dst_vcpu) +{ + /* handle unknown orders in user space */ + return -EOPNOTSUPP; +} - order_code = kvm_s390_get_base_disp_rs(vcpu); +static int handle_sigp_dst(struct kvm_vcpu *vcpu, u8 order_code, + u16 cpu_addr, u32 parameter, u64 *status_reg) +{ + int rc; + struct kvm_vcpu *dst_vcpu; - if (r1 % 2) - parameter = vcpu->run->s.regs.gprs[r1]; - else - parameter = vcpu->run->s.regs.gprs[r1 + 1]; + if (cpu_addr >= KVM_MAX_VCPUS) + return SIGP_CC_NOT_OPERATIONAL; + + dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr); + if (!dst_vcpu) + return SIGP_CC_NOT_OPERATIONAL; - trace_kvm_s390_handle_sigp(vcpu, order_code, cpu_addr, parameter); switch (order_code) { case SIGP_SENSE: vcpu->stat.instruction_sigp_sense++; - rc = __sigp_sense(vcpu, cpu_addr, - &vcpu->run->s.regs.gprs[r1]); + rc = __sigp_sense(vcpu, dst_vcpu, status_reg); break; case SIGP_EXTERNAL_CALL: vcpu->stat.instruction_sigp_external_call++; - rc = __sigp_external_call(vcpu, cpu_addr); + rc = __sigp_external_call(vcpu, dst_vcpu); break; case SIGP_EMERGENCY_SIGNAL: vcpu->stat.instruction_sigp_emergency++; - rc = __sigp_emergency(vcpu, cpu_addr); + rc = __sigp_emergency(vcpu, dst_vcpu); break; case SIGP_STOP: vcpu->stat.instruction_sigp_stop++; - rc = __sigp_stop(vcpu, cpu_addr, ACTION_STOP_ON_STOP); + rc = __sigp_stop(vcpu, dst_vcpu); break; case SIGP_STOP_AND_STORE_STATUS: - vcpu->stat.instruction_sigp_stop++; - rc = __sigp_stop(vcpu, cpu_addr, ACTION_STORE_ON_STOP | - ACTION_STOP_ON_STOP); + vcpu->stat.instruction_sigp_stop_store_status++; + rc = __sigp_stop_and_store_status(vcpu, dst_vcpu, status_reg); break; case SIGP_STORE_STATUS_AT_ADDRESS: - rc = __sigp_store_status_at_addr(vcpu, cpu_addr, parameter, - &vcpu->run->s.regs.gprs[r1]); - break; - case SIGP_SET_ARCHITECTURE: - vcpu->stat.instruction_sigp_arch++; - rc = __sigp_set_arch(vcpu, parameter); + vcpu->stat.instruction_sigp_store_status++; + rc = __sigp_store_status_at_addr(vcpu, dst_vcpu, parameter, + status_reg); break; case SIGP_SET_PREFIX: vcpu->stat.instruction_sigp_prefix++; - rc = __sigp_set_prefix(vcpu, cpu_addr, parameter, - &vcpu->run->s.regs.gprs[r1]); + rc = __sigp_set_prefix(vcpu, dst_vcpu, parameter, status_reg); break; case SIGP_COND_EMERGENCY_SIGNAL: - rc = __sigp_conditional_emergency(vcpu, cpu_addr, parameter, - &vcpu->run->s.regs.gprs[r1]); + vcpu->stat.instruction_sigp_cond_emergency++; + rc = __sigp_conditional_emergency(vcpu, dst_vcpu, parameter, + status_reg); break; case SIGP_SENSE_RUNNING: vcpu->stat.instruction_sigp_sense_running++; - rc = __sigp_sense_running(vcpu, cpu_addr, - &vcpu->run->s.regs.gprs[r1]); + rc = __sigp_sense_running(vcpu, dst_vcpu, status_reg); break; case SIGP_START: - rc = sigp_check_callable(vcpu, cpu_addr); - if (rc == SIGP_CC_ORDER_CODE_ACCEPTED) - rc = -EOPNOTSUPP; /* Handle START in user space */ + vcpu->stat.instruction_sigp_start++; + rc = __prepare_sigp_re_start(vcpu, dst_vcpu, order_code); break; case SIGP_RESTART: vcpu->stat.instruction_sigp_restart++; - rc = sigp_check_callable(vcpu, cpu_addr); - if (rc == SIGP_CC_ORDER_CODE_ACCEPTED) { - VCPU_EVENT(vcpu, 4, - "sigp restart %x to handle userspace", - cpu_addr); - /* user space must know about restart */ - rc = -EOPNOTSUPP; - } + rc = __prepare_sigp_re_start(vcpu, dst_vcpu, order_code); + break; + case SIGP_INITIAL_CPU_RESET: + vcpu->stat.instruction_sigp_init_cpu_reset++; + rc = __prepare_sigp_cpu_reset(vcpu, dst_vcpu, order_code); + break; + case SIGP_CPU_RESET: + vcpu->stat.instruction_sigp_cpu_reset++; + rc = __prepare_sigp_cpu_reset(vcpu, dst_vcpu, order_code); + break; + default: + vcpu->stat.instruction_sigp_unknown++; + rc = __prepare_sigp_unknown(vcpu, dst_vcpu); + } + + if (rc == -EOPNOTSUPP) + VCPU_EVENT(vcpu, 4, + "sigp order %u -> cpu %x: handled in user space", + order_code, dst_vcpu->vcpu_id); + + return rc; +} + +int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu) +{ + int r1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4; + int r3 = vcpu->arch.sie_block->ipa & 0x000f; + u32 parameter; + u16 cpu_addr = vcpu->run->s.regs.gprs[r3]; + u8 order_code; + int rc; + + /* sigp in userspace can exit */ + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + + order_code = kvm_s390_get_base_disp_rs(vcpu); + + if (r1 % 2) + parameter = vcpu->run->s.regs.gprs[r1]; + else + parameter = vcpu->run->s.regs.gprs[r1 + 1]; + + trace_kvm_s390_handle_sigp(vcpu, order_code, cpu_addr, parameter); + switch (order_code) { + case SIGP_SET_ARCHITECTURE: + vcpu->stat.instruction_sigp_arch++; + rc = __sigp_set_arch(vcpu, parameter); break; default: - return -EOPNOTSUPP; + rc = handle_sigp_dst(vcpu, order_code, cpu_addr, + parameter, + &vcpu->run->s.regs.gprs[r1]); } if (rc < 0) diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 71c7eff2c89..be99357d238 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -844,7 +844,7 @@ int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, down_read(&mm->mmap_sem); retry: - ptep = get_locked_pte(current->mm, addr, &ptl); + ptep = get_locked_pte(mm, addr, &ptl); if (unlikely(!ptep)) { up_read(&mm->mmap_sem); return -EFAULT; @@ -888,6 +888,45 @@ retry: } EXPORT_SYMBOL(set_guest_storage_key); +unsigned long get_guest_storage_key(struct mm_struct *mm, unsigned long addr) +{ + spinlock_t *ptl; + pgste_t pgste; + pte_t *ptep; + uint64_t physaddr; + unsigned long key = 0; + + down_read(&mm->mmap_sem); + ptep = get_locked_pte(mm, addr, &ptl); + if (unlikely(!ptep)) { + up_read(&mm->mmap_sem); + return -EFAULT; + } + pgste = pgste_get_lock(ptep); + + if (pte_val(*ptep) & _PAGE_INVALID) { + key |= (pgste_val(pgste) & PGSTE_ACC_BITS) >> 56; + key |= (pgste_val(pgste) & PGSTE_FP_BIT) >> 56; + key |= (pgste_val(pgste) & PGSTE_GR_BIT) >> 48; + key |= (pgste_val(pgste) & PGSTE_GC_BIT) >> 48; + } else { + physaddr = pte_val(*ptep) & PAGE_MASK; + key = page_get_storage_key(physaddr); + + /* Reflect guest's logical view, not physical */ + if (pgste_val(pgste) & PGSTE_GR_BIT) + key |= _PAGE_REFERENCED; + if (pgste_val(pgste) & PGSTE_GC_BIT) + key |= _PAGE_CHANGED; + } + + pgste_set_unlock(ptep, pgste); + pte_unmap_unlock(ptep, ptl); + up_read(&mm->mmap_sem); + return key; +} +EXPORT_SYMBOL(get_guest_storage_key); + #else /* CONFIG_PGSTE */ static inline int page_table_with_pgste(struct page *page) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 6ed0c30d6a0..d89c6b828c9 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -33,7 +33,7 @@ #define KVM_MAX_VCPUS 255 #define KVM_SOFT_MAX_VCPUS 160 -#define KVM_USER_MEM_SLOTS 125 +#define KVM_USER_MEM_SLOTS 509 /* memory slots that are not exposed to userspace */ #define KVM_PRIVATE_MEM_SLOTS 3 #define KVM_MEM_SLOTS_NUM (KVM_USER_MEM_SLOTS + KVM_PRIVATE_MEM_SLOTS) @@ -51,6 +51,7 @@ | X86_CR0_NW | X86_CR0_CD | X86_CR0_PG)) #define CR3_L_MODE_RESERVED_BITS 0xFFFFFF0000000000ULL +#define CR3_PCID_INVD (1UL << 63) #define CR4_RESERVED_BITS \ (~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\ | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \ @@ -361,6 +362,7 @@ struct kvm_vcpu_arch { int mp_state; u64 ia32_misc_enable_msr; bool tpr_access_reporting; + u64 ia32_xss; /* * Paging state of the vcpu @@ -542,7 +544,7 @@ struct kvm_apic_map { struct rcu_head rcu; u8 ldr_bits; /* fields bellow are used to decode ldr values in different modes */ - u32 cid_shift, cid_mask, lid_mask; + u32 cid_shift, cid_mask, lid_mask, broadcast; struct kvm_lapic *phys_map[256]; /* first index is cluster id second is cpu id in a cluster */ struct kvm_lapic *logical_map[16][16]; @@ -602,6 +604,9 @@ struct kvm_arch { struct kvm_xen_hvm_config xen_hvm_config; + /* reads protected by irq_srcu, writes by irq_lock */ + struct hlist_head mask_notifier_list; + /* fields used by HYPER-V emulation */ u64 hv_guest_os_id; u64 hv_hypercall; @@ -659,6 +664,16 @@ struct msr_data { u64 data; }; +struct kvm_lapic_irq { + u32 vector; + u32 delivery_mode; + u32 dest_mode; + u32 level; + u32 trig_mode; + u32 shorthand; + u32 dest_id; +}; + struct kvm_x86_ops { int (*cpu_has_kvm_support)(void); /* __init */ int (*disabled_by_bios)(void); /* __init */ @@ -767,6 +782,7 @@ struct kvm_x86_ops { enum x86_intercept_stage stage); void (*handle_external_intr)(struct kvm_vcpu *vcpu); bool (*mpx_supported)(void); + bool (*xsaves_supported)(void); int (*check_nested_events)(struct kvm_vcpu *vcpu, bool external_intr); @@ -818,6 +834,19 @@ int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, const void *val, int bytes); u8 kvm_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn); +struct kvm_irq_mask_notifier { + void (*func)(struct kvm_irq_mask_notifier *kimn, bool masked); + int irq; + struct hlist_node link; +}; + +void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq, + struct kvm_irq_mask_notifier *kimn); +void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq, + struct kvm_irq_mask_notifier *kimn); +void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin, + bool mask); + extern bool tdp_enabled; u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu); @@ -863,7 +892,7 @@ int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu); void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg); -void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, unsigned int vector); +void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector); int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index, int reason, bool has_error_code, u32 error_code); @@ -895,6 +924,7 @@ int kvm_read_guest_page_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, gfn_t gfn, void *data, int offset, int len, u32 access); bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl); +bool kvm_require_dr(struct kvm_vcpu *vcpu, int dr); static inline int __kvm_irq_line_state(unsigned long *irq_state, int irq_source_id, int level) @@ -1066,6 +1096,7 @@ void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm, void kvm_define_shared_msr(unsigned index, u32 msr); int kvm_set_shared_msr(unsigned index, u64 val, u64 mask); +unsigned long kvm_get_linear_rip(struct kvm_vcpu *vcpu); bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip); void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index bcbfade26d8..45afaee9555 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -69,6 +69,7 @@ #define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400 #define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000 #define SECONDARY_EXEC_SHADOW_VMCS 0x00004000 +#define SECONDARY_EXEC_XSAVES 0x00100000 #define PIN_BASED_EXT_INTR_MASK 0x00000001 @@ -159,6 +160,8 @@ enum vmcs_field { EOI_EXIT_BITMAP3_HIGH = 0x00002023, VMREAD_BITMAP = 0x00002026, VMWRITE_BITMAP = 0x00002028, + XSS_EXIT_BITMAP = 0x0000202C, + XSS_EXIT_BITMAP_HIGH = 0x0000202D, GUEST_PHYSICAL_ADDRESS = 0x00002400, GUEST_PHYSICAL_ADDRESS_HIGH = 0x00002401, VMCS_LINK_POINTER = 0x00002800, diff --git a/arch/x86/include/asm/xsave.h b/arch/x86/include/asm/xsave.h index 7e7a79ada65..5fa9770035d 100644 --- a/arch/x86/include/asm/xsave.h +++ b/arch/x86/include/asm/xsave.h @@ -16,6 +16,7 @@ #define XSTATE_Hi16_ZMM 0x80 #define XSTATE_FPSSE (XSTATE_FP | XSTATE_SSE) +#define XSTATE_AVX512 (XSTATE_OPMASK | XSTATE_ZMM_Hi256 | XSTATE_Hi16_ZMM) /* Bit 63 of XCR0 is reserved for future expansion */ #define XSTATE_EXTEND_MASK (~(XSTATE_FPSSE | (1ULL << 63))) diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h index 990a2fe1588..b813bf9da1e 100644 --- a/arch/x86/include/uapi/asm/vmx.h +++ b/arch/x86/include/uapi/asm/vmx.h @@ -72,6 +72,8 @@ #define EXIT_REASON_XSETBV 55 #define EXIT_REASON_APIC_WRITE 56 #define EXIT_REASON_INVPCID 58 +#define EXIT_REASON_XSAVES 63 +#define EXIT_REASON_XRSTORS 64 #define VMX_EXIT_REASONS \ { EXIT_REASON_EXCEPTION_NMI, "EXCEPTION_NMI" }, \ @@ -116,6 +118,8 @@ { EXIT_REASON_INVALID_STATE, "INVALID_STATE" }, \ { EXIT_REASON_INVD, "INVD" }, \ { EXIT_REASON_INVVPID, "INVVPID" }, \ - { EXIT_REASON_INVPCID, "INVPCID" } + { EXIT_REASON_INVPCID, "INVPCID" }, \ + { EXIT_REASON_XSAVES, "XSAVES" }, \ + { EXIT_REASON_XRSTORS, "XRSTORS" } #endif /* _UAPIVMX_H */ diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index f6945bef2cd..94f64348430 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -283,7 +283,14 @@ NOKPROBE_SYMBOL(do_async_page_fault); static void __init paravirt_ops_setup(void) { pv_info.name = "KVM"; - pv_info.paravirt_enabled = 1; + + /* + * KVM isn't paravirt in the sense of paravirt_enabled. A KVM + * guest kernel works like a bare metal kernel with additional + * features, and paravirt_enabled is about features that are + * missing. + */ + pv_info.paravirt_enabled = 0; if (kvm_para_has_feature(KVM_FEATURE_NOP_IO_DELAY)) pv_cpu_ops.io_delay = kvm_io_delay; diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index d9156ceecdf..42caaef897c 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -59,13 +59,12 @@ static void kvm_get_wallclock(struct timespec *now) native_write_msr(msr_kvm_wall_clock, low, high); - preempt_disable(); - cpu = smp_processor_id(); + cpu = get_cpu(); vcpu_time = &hv_clock[cpu].pvti; pvclock_read_wallclock(&wall_clock, vcpu_time, now); - preempt_enable(); + put_cpu(); } static int kvm_set_wallclock(const struct timespec *now) @@ -107,11 +106,10 @@ static unsigned long kvm_get_tsc_khz(void) int cpu; unsigned long tsc_khz; - preempt_disable(); - cpu = smp_processor_id(); + cpu = get_cpu(); src = &hv_clock[cpu].pvti; tsc_khz = pvclock_tsc_khz(src); - preempt_enable(); + put_cpu(); return tsc_khz; } @@ -263,7 +261,6 @@ void __init kvmclock_init(void) #endif kvm_get_preset_lpj(); clocksource_register_hz(&kvm_clock, NSEC_PER_SEC); - pv_info.paravirt_enabled = 1; pv_info.name = "KVM"; if (kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE_STABLE_BIT)) @@ -284,23 +281,22 @@ int __init kvm_setup_vsyscall_timeinfo(void) size = PAGE_ALIGN(sizeof(struct pvclock_vsyscall_time_info)*NR_CPUS); - preempt_disable(); - cpu = smp_processor_id(); + cpu = get_cpu(); vcpu_time = &hv_clock[cpu].pvti; flags = pvclock_read_flags(vcpu_time); if (!(flags & PVCLOCK_TSC_STABLE_BIT)) { - preempt_enable(); + put_cpu(); return 1; } if ((ret = pvclock_init_vsyscall(hv_clock, size))) { - preempt_enable(); + put_cpu(); return ret; } - preempt_enable(); + put_cpu(); kvm_clock.archdata.vclock_mode = VCLOCK_PVCLOCK; #endif diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index 4c540c4719d..0de1fae2bdf 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c @@ -738,3 +738,4 @@ void *get_xsave_addr(struct xsave_struct *xsave, int xstate) return (void *)xsave + xstate_comp_offsets[feature]; } +EXPORT_SYMBOL_GPL(get_xsave_addr); diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile index 25d22b2d650..08f790dfadc 100644 --- a/arch/x86/kvm/Makefile +++ b/arch/x86/kvm/Makefile @@ -7,14 +7,13 @@ CFLAGS_vmx.o := -I. KVM := ../../../virt/kvm -kvm-y += $(KVM)/kvm_main.o $(KVM)/ioapic.o \ - $(KVM)/coalesced_mmio.o $(KVM)/irq_comm.o \ +kvm-y += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \ $(KVM)/eventfd.o $(KVM)/irqchip.o $(KVM)/vfio.o -kvm-$(CONFIG_KVM_DEVICE_ASSIGNMENT) += $(KVM)/assigned-dev.o $(KVM)/iommu.o kvm-$(CONFIG_KVM_ASYNC_PF) += $(KVM)/async_pf.o kvm-y += x86.o mmu.o emulate.o i8259.o irq.o lapic.o \ - i8254.o cpuid.o pmu.o + i8254.o ioapic.o irq_comm.o cpuid.o pmu.o +kvm-$(CONFIG_KVM_DEVICE_ASSIGNMENT) += assigned-dev.o iommu.o kvm-intel-y += vmx.o kvm-amd-y += svm.o diff --git a/virt/kvm/assigned-dev.c b/arch/x86/kvm/assigned-dev.c index e05000e200d..6eb5c20ee37 100644 --- a/virt/kvm/assigned-dev.c +++ b/arch/x86/kvm/assigned-dev.c @@ -20,6 +20,32 @@ #include <linux/namei.h> #include <linux/fs.h> #include "irq.h" +#include "assigned-dev.h" + +struct kvm_assigned_dev_kernel { + struct kvm_irq_ack_notifier ack_notifier; + struct list_head list; + int assigned_dev_id; + int host_segnr; + int host_busnr; + int host_devfn; + unsigned int entries_nr; + int host_irq; + bool host_irq_disabled; + bool pci_2_3; + struct msix_entry *host_msix_entries; + int guest_irq; + struct msix_entry *guest_msix_entries; + unsigned long irq_requested_type; + int irq_source_id; + int flags; + struct pci_dev *dev; + struct kvm *kvm; + spinlock_t intx_lock; + spinlock_t intx_mask_lock; + char irq_name[32]; + struct pci_saved_state *pci_saved_state; +}; static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head, int assigned_dev_id) @@ -748,7 +774,7 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm, if (r) goto out_list_del; } - r = kvm_assign_device(kvm, match); + r = kvm_assign_device(kvm, match->dev); if (r) goto out_list_del; @@ -790,7 +816,7 @@ static int kvm_vm_ioctl_deassign_device(struct kvm *kvm, goto out; } - kvm_deassign_device(kvm, match); + kvm_deassign_device(kvm, match->dev); kvm_free_assigned_device(kvm, match); diff --git a/arch/x86/kvm/assigned-dev.h b/arch/x86/kvm/assigned-dev.h new file mode 100644 index 00000000000..a428c1a211b --- /dev/null +++ b/arch/x86/kvm/assigned-dev.h @@ -0,0 +1,32 @@ +#ifndef ARCH_X86_KVM_ASSIGNED_DEV_H +#define ARCH_X86_KVM_ASSIGNED_DEV_H + +#include <linux/kvm_host.h> + +#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT +int kvm_assign_device(struct kvm *kvm, struct pci_dev *pdev); +int kvm_deassign_device(struct kvm *kvm, struct pci_dev *pdev); + +int kvm_iommu_map_guest(struct kvm *kvm); +int kvm_iommu_unmap_guest(struct kvm *kvm); + +long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl, + unsigned long arg); + +void kvm_free_all_assigned_devices(struct kvm *kvm); +#else +static inline int kvm_iommu_unmap_guest(struct kvm *kvm) +{ + return 0; +} + +static inline long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl, + unsigned long arg) +{ + return -ENOTTY; +} + +static inline void kvm_free_all_assigned_devices(struct kvm *kvm) {} +#endif /* CONFIG_KVM_DEVICE_ASSIGNMENT */ + +#endif /* ARCH_X86_KVM_ASSIGNED_DEV_H */ diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 976e3a57f9e..8a80737ee6e 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -23,7 +23,7 @@ #include "mmu.h" #include "trace.h" -static u32 xstate_required_size(u64 xstate_bv) +static u32 xstate_required_size(u64 xstate_bv, bool compacted) { int feature_bit = 0; u32 ret = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET; @@ -31,9 +31,10 @@ static u32 xstate_required_size(u64 xstate_bv) xstate_bv &= XSTATE_EXTEND_MASK; while (xstate_bv) { if (xstate_bv & 0x1) { - u32 eax, ebx, ecx, edx; + u32 eax, ebx, ecx, edx, offset; cpuid_count(0xD, feature_bit, &eax, &ebx, &ecx, &edx); - ret = max(ret, eax + ebx); + offset = compacted ? ret : ebx; + ret = max(ret, offset + eax); } xstate_bv >>= 1; @@ -53,6 +54,8 @@ u64 kvm_supported_xcr0(void) return xcr0; } +#define F(x) bit(X86_FEATURE_##x) + int kvm_update_cpuid(struct kvm_vcpu *vcpu) { struct kvm_cpuid_entry2 *best; @@ -64,13 +67,13 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu) /* Update OSXSAVE bit */ if (cpu_has_xsave && best->function == 0x1) { - best->ecx &= ~(bit(X86_FEATURE_OSXSAVE)); + best->ecx &= ~F(OSXSAVE); if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE)) - best->ecx |= bit(X86_FEATURE_OSXSAVE); + best->ecx |= F(OSXSAVE); } if (apic) { - if (best->ecx & bit(X86_FEATURE_TSC_DEADLINE_TIMER)) + if (best->ecx & F(TSC_DEADLINE_TIMER)) apic->lapic_timer.timer_mode_mask = 3 << 17; else apic->lapic_timer.timer_mode_mask = 1 << 17; @@ -85,9 +88,13 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu) (best->eax | ((u64)best->edx << 32)) & kvm_supported_xcr0(); vcpu->arch.guest_xstate_size = best->ebx = - xstate_required_size(vcpu->arch.xcr0); + xstate_required_size(vcpu->arch.xcr0, false); } + best = kvm_find_cpuid_entry(vcpu, 0xD, 1); + if (best && (best->eax & (F(XSAVES) | F(XSAVEC)))) + best->ebx = xstate_required_size(vcpu->arch.xcr0, true); + /* * The existing code assumes virtual address is 48-bit in the canonical * address checks; exit if it is ever changed. @@ -122,8 +129,8 @@ static void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu) break; } } - if (entry && (entry->edx & bit(X86_FEATURE_NX)) && !is_efer_nx()) { - entry->edx &= ~bit(X86_FEATURE_NX); + if (entry && (entry->edx & F(NX)) && !is_efer_nx()) { + entry->edx &= ~F(NX); printk(KERN_INFO "kvm: guest NX capability removed\n"); } } @@ -227,8 +234,6 @@ static void do_cpuid_1_ent(struct kvm_cpuid_entry2 *entry, u32 function, entry->flags = 0; } -#define F(x) bit(X86_FEATURE_##x) - static int __do_cpuid_ent_emulated(struct kvm_cpuid_entry2 *entry, u32 func, u32 index, int *nent, int maxnent) { @@ -267,6 +272,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, unsigned f_rdtscp = kvm_x86_ops->rdtscp_supported() ? F(RDTSCP) : 0; unsigned f_invpcid = kvm_x86_ops->invpcid_supported() ? F(INVPCID) : 0; unsigned f_mpx = kvm_x86_ops->mpx_supported() ? F(MPX) : 0; + unsigned f_xsaves = kvm_x86_ops->xsaves_supported() ? F(XSAVES) : 0; /* cpuid 1.edx */ const u32 kvm_supported_word0_x86_features = @@ -317,7 +323,12 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, const u32 kvm_supported_word9_x86_features = F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) | F(BMI2) | F(ERMS) | f_invpcid | F(RTM) | f_mpx | F(RDSEED) | - F(ADX) | F(SMAP); + F(ADX) | F(SMAP) | F(AVX512F) | F(AVX512PF) | F(AVX512ER) | + F(AVX512CD); + + /* cpuid 0xD.1.eax */ + const u32 kvm_supported_word10_x86_features = + F(XSAVEOPT) | F(XSAVEC) | F(XGETBV1) | f_xsaves; /* all calls to cpuid_count() should be made on the same cpu */ get_cpu(); @@ -453,16 +464,34 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, u64 supported = kvm_supported_xcr0(); entry->eax &= supported; + entry->ebx = xstate_required_size(supported, false); + entry->ecx = entry->ebx; entry->edx &= supported >> 32; entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; + if (!supported) + break; + for (idx = 1, i = 1; idx < 64; ++idx) { u64 mask = ((u64)1 << idx); if (*nent >= maxnent) goto out; do_cpuid_1_ent(&entry[i], function, idx); - if (entry[i].eax == 0 || !(supported & mask)) - continue; + if (idx == 1) { + entry[i].eax &= kvm_supported_word10_x86_features; + entry[i].ebx = 0; + if (entry[i].eax & (F(XSAVES)|F(XSAVEC))) + entry[i].ebx = + xstate_required_size(supported, + true); + } else { + if (entry[i].eax == 0 || !(supported & mask)) + continue; + if (WARN_ON_ONCE(entry[i].ecx & 1)) + continue; + } + entry[i].ecx = 0; + entry[i].edx = 0; entry[i].flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; ++*nent; diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 9f8a2faf504..169b09d76dd 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -123,6 +123,7 @@ #define Prefix (3<<15) /* Instruction varies with 66/f2/f3 prefix */ #define RMExt (4<<15) /* Opcode extension in ModRM r/m if mod == 3 */ #define Escape (5<<15) /* Escape to coprocessor instruction */ +#define InstrDual (6<<15) /* Alternate instruction decoding of mod == 3 */ #define Sse (1<<18) /* SSE Vector instruction */ /* Generic ModRM decode. */ #define ModRM (1<<19) @@ -166,6 +167,8 @@ #define CheckPerm ((u64)1 << 49) /* Has valid check_perm field */ #define NoBigReal ((u64)1 << 50) /* No big real mode */ #define PrivUD ((u64)1 << 51) /* #UD instead of #GP on CPL > 0 */ +#define NearBranch ((u64)1 << 52) /* Near branches */ +#define No16 ((u64)1 << 53) /* No 16 bit operand */ #define DstXacc (DstAccLo | SrcAccHi | SrcWrite) @@ -209,6 +212,7 @@ struct opcode { const struct group_dual *gdual; const struct gprefix *gprefix; const struct escape *esc; + const struct instr_dual *idual; void (*fastop)(struct fastop *fake); } u; int (*check_perm)(struct x86_emulate_ctxt *ctxt); @@ -231,6 +235,11 @@ struct escape { struct opcode high[64]; }; +struct instr_dual { + struct opcode mod012; + struct opcode mod3; +}; + /* EFLAGS bit definitions. */ #define EFLG_ID (1<<21) #define EFLG_VIP (1<<20) @@ -379,6 +388,15 @@ static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *)); ON64(FOP2E(op##q, rax, cl)) \ FOP_END +/* 2 operand, src and dest are reversed */ +#define FASTOP2R(op, name) \ + FOP_START(name) \ + FOP2E(op##b, dl, al) \ + FOP2E(op##w, dx, ax) \ + FOP2E(op##l, edx, eax) \ + ON64(FOP2E(op##q, rdx, rax)) \ + FOP_END + #define FOP3E(op, dst, src, src2) \ FOP_ALIGN #op " %" #src2 ", %" #src ", %" #dst " \n\t" FOP_RET @@ -477,9 +495,9 @@ address_mask(struct x86_emulate_ctxt *ctxt, unsigned long reg) } static inline unsigned long -register_address(struct x86_emulate_ctxt *ctxt, unsigned long reg) +register_address(struct x86_emulate_ctxt *ctxt, int reg) { - return address_mask(ctxt, reg); + return address_mask(ctxt, reg_read(ctxt, reg)); } static void masked_increment(ulong *reg, ulong mask, int inc) @@ -488,7 +506,7 @@ static void masked_increment(ulong *reg, ulong mask, int inc) } static inline void -register_address_increment(struct x86_emulate_ctxt *ctxt, unsigned long *reg, int inc) +register_address_increment(struct x86_emulate_ctxt *ctxt, int reg, int inc) { ulong mask; @@ -496,7 +514,7 @@ register_address_increment(struct x86_emulate_ctxt *ctxt, unsigned long *reg, in mask = ~0UL; else mask = ad_mask(ctxt); - masked_increment(reg, mask, inc); + masked_increment(reg_rmw(ctxt, reg), mask, inc); } static void rsp_increment(struct x86_emulate_ctxt *ctxt, int inc) @@ -564,40 +582,6 @@ static int emulate_nm(struct x86_emulate_ctxt *ctxt) return emulate_exception(ctxt, NM_VECTOR, 0, false); } -static inline int assign_eip_far(struct x86_emulate_ctxt *ctxt, ulong dst, - int cs_l) -{ - switch (ctxt->op_bytes) { - case 2: - ctxt->_eip = (u16)dst; - break; - case 4: - ctxt->_eip = (u32)dst; - break; -#ifdef CONFIG_X86_64 - case 8: - if ((cs_l && is_noncanonical_address(dst)) || - (!cs_l && (dst >> 32) != 0)) - return emulate_gp(ctxt, 0); - ctxt->_eip = dst; - break; -#endif - default: - WARN(1, "unsupported eip assignment size\n"); - } - return X86EMUL_CONTINUE; -} - -static inline int assign_eip_near(struct x86_emulate_ctxt *ctxt, ulong dst) -{ - return assign_eip_far(ctxt, dst, ctxt->mode == X86EMUL_MODE_PROT64); -} - -static inline int jmp_rel(struct x86_emulate_ctxt *ctxt, int rel) -{ - return assign_eip_near(ctxt, ctxt->_eip + rel); -} - static u16 get_segment_selector(struct x86_emulate_ctxt *ctxt, unsigned seg) { u16 selector; @@ -641,25 +625,24 @@ static bool insn_aligned(struct x86_emulate_ctxt *ctxt, unsigned size) return true; } -static int __linearize(struct x86_emulate_ctxt *ctxt, - struct segmented_address addr, - unsigned *max_size, unsigned size, - bool write, bool fetch, - ulong *linear) +static __always_inline int __linearize(struct x86_emulate_ctxt *ctxt, + struct segmented_address addr, + unsigned *max_size, unsigned size, + bool write, bool fetch, + enum x86emul_mode mode, ulong *linear) { struct desc_struct desc; bool usable; ulong la; u32 lim; u16 sel; - unsigned cpl; la = seg_base(ctxt, addr.seg) + addr.ea; *max_size = 0; - switch (ctxt->mode) { + switch (mode) { case X86EMUL_MODE_PROT64: - if (((signed long)la << 16) >> 16 != la) - return emulate_gp(ctxt, 0); + if (is_noncanonical_address(la)) + goto bad; *max_size = min_t(u64, ~0u, (1ull << 48) - la); if (size > *max_size) @@ -678,46 +661,20 @@ static int __linearize(struct x86_emulate_ctxt *ctxt, if (!fetch && (desc.type & 8) && !(desc.type & 2)) goto bad; lim = desc_limit_scaled(&desc); - if ((ctxt->mode == X86EMUL_MODE_REAL) && !fetch && - (ctxt->d & NoBigReal)) { - /* la is between zero and 0xffff */ - if (la > 0xffff) - goto bad; - *max_size = 0x10000 - la; - } else if ((desc.type & 8) || !(desc.type & 4)) { - /* expand-up segment */ - if (addr.ea > lim) - goto bad; - *max_size = min_t(u64, ~0u, (u64)lim + 1 - addr.ea); - } else { + if (!(desc.type & 8) && (desc.type & 4)) { /* expand-down segment */ if (addr.ea <= lim) goto bad; lim = desc.d ? 0xffffffff : 0xffff; - if (addr.ea > lim) - goto bad; - *max_size = min_t(u64, ~0u, (u64)lim + 1 - addr.ea); } + if (addr.ea > lim) + goto bad; + *max_size = min_t(u64, ~0u, (u64)lim + 1 - addr.ea); if (size > *max_size) goto bad; - cpl = ctxt->ops->cpl(ctxt); - if (!(desc.type & 8)) { - /* data segment */ - if (cpl > desc.dpl) - goto bad; - } else if ((desc.type & 8) && !(desc.type & 4)) { - /* nonconforming code segment */ - if (cpl != desc.dpl) - goto bad; - } else if ((desc.type & 8) && (desc.type & 4)) { - /* conforming code segment */ - if (cpl < desc.dpl) - goto bad; - } + la &= (u32)-1; break; } - if (fetch ? ctxt->mode != X86EMUL_MODE_PROT64 : ctxt->ad_bytes != 8) - la &= (u32)-1; if (insn_aligned(ctxt, size) && ((la & (size - 1)) != 0)) return emulate_gp(ctxt, 0); *linear = la; @@ -735,9 +692,55 @@ static int linearize(struct x86_emulate_ctxt *ctxt, ulong *linear) { unsigned max_size; - return __linearize(ctxt, addr, &max_size, size, write, false, linear); + return __linearize(ctxt, addr, &max_size, size, write, false, + ctxt->mode, linear); +} + +static inline int assign_eip(struct x86_emulate_ctxt *ctxt, ulong dst, + enum x86emul_mode mode) +{ + ulong linear; + int rc; + unsigned max_size; + struct segmented_address addr = { .seg = VCPU_SREG_CS, + .ea = dst }; + + if (ctxt->op_bytes != sizeof(unsigned long)) + addr.ea = dst & ((1UL << (ctxt->op_bytes << 3)) - 1); + rc = __linearize(ctxt, addr, &max_size, 1, false, true, mode, &linear); + if (rc == X86EMUL_CONTINUE) + ctxt->_eip = addr.ea; + return rc; +} + +static inline int assign_eip_near(struct x86_emulate_ctxt *ctxt, ulong dst) +{ + return assign_eip(ctxt, dst, ctxt->mode); } +static int assign_eip_far(struct x86_emulate_ctxt *ctxt, ulong dst, + const struct desc_struct *cs_desc) +{ + enum x86emul_mode mode = ctxt->mode; + +#ifdef CONFIG_X86_64 + if (ctxt->mode >= X86EMUL_MODE_PROT32 && cs_desc->l) { + u64 efer = 0; + + ctxt->ops->get_msr(ctxt, MSR_EFER, &efer); + if (efer & EFER_LMA) + mode = X86EMUL_MODE_PROT64; + } +#endif + if (mode == X86EMUL_MODE_PROT16 || mode == X86EMUL_MODE_PROT32) + mode = cs_desc->d ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16; + return assign_eip(ctxt, dst, mode); +} + +static inline int jmp_rel(struct x86_emulate_ctxt *ctxt, int rel) +{ + return assign_eip_near(ctxt, ctxt->_eip + rel); +} static int segmented_read_std(struct x86_emulate_ctxt *ctxt, struct segmented_address addr, @@ -776,7 +779,8 @@ static int __do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt, int op_size) * boundary check itself. Instead, we use max_size to check * against op_size. */ - rc = __linearize(ctxt, addr, &max_size, 0, false, true, &linear); + rc = __linearize(ctxt, addr, &max_size, 0, false, true, ctxt->mode, + &linear); if (unlikely(rc != X86EMUL_CONTINUE)) return rc; @@ -911,6 +915,8 @@ FASTOP2W(btc); FASTOP2(xadd); +FASTOP2R(cmp, cmp_r); + static u8 test_cc(unsigned int condition, unsigned long flags) { u8 rc; @@ -1221,6 +1227,7 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt, if (index_reg != 4) modrm_ea += reg_read(ctxt, index_reg) << scale; } else if ((ctxt->modrm_rm & 7) == 5 && ctxt->modrm_mod == 0) { + modrm_ea += insn_fetch(s32, ctxt); if (ctxt->mode == X86EMUL_MODE_PROT64) ctxt->rip_relative = 1; } else { @@ -1229,10 +1236,6 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt, adjust_modrm_seg(ctxt, base_reg); } switch (ctxt->modrm_mod) { - case 0: - if (ctxt->modrm_rm == 5) - modrm_ea += insn_fetch(s32, ctxt); - break; case 1: modrm_ea += insn_fetch(s8, ctxt); break; @@ -1284,7 +1287,8 @@ static void fetch_bit_operand(struct x86_emulate_ctxt *ctxt) else sv = (s64)ctxt->src.val & (s64)mask; - ctxt->dst.addr.mem.ea += (sv >> 3); + ctxt->dst.addr.mem.ea = address_mask(ctxt, + ctxt->dst.addr.mem.ea + (sv >> 3)); } /* only subword offset */ @@ -1610,6 +1614,9 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt, sizeof(base3), &ctxt->exception); if (ret != X86EMUL_CONTINUE) return ret; + if (is_noncanonical_address(get_desc_base(&seg_desc) | + ((u64)base3 << 32))) + return emulate_gp(ctxt, 0); } load: ctxt->ops->set_segment(ctxt, selector, &seg_desc, base3, seg); @@ -1807,6 +1814,10 @@ static int em_push_sreg(struct x86_emulate_ctxt *ctxt) int seg = ctxt->src2.val; ctxt->src.val = get_segment_selector(ctxt, seg); + if (ctxt->op_bytes == 4) { + rsp_increment(ctxt, -2); + ctxt->op_bytes = 2; + } return em_push(ctxt); } @@ -1850,7 +1861,7 @@ static int em_pusha(struct x86_emulate_ctxt *ctxt) static int em_pushf(struct x86_emulate_ctxt *ctxt) { - ctxt->src.val = (unsigned long)ctxt->eflags; + ctxt->src.val = (unsigned long)ctxt->eflags & ~EFLG_VM; return em_push(ctxt); } @@ -2035,7 +2046,7 @@ static int em_jmp_far(struct x86_emulate_ctxt *ctxt) if (rc != X86EMUL_CONTINUE) return rc; - rc = assign_eip_far(ctxt, ctxt->src.val, new_desc.l); + rc = assign_eip_far(ctxt, ctxt->src.val, &new_desc); if (rc != X86EMUL_CONTINUE) { WARN_ON(ctxt->mode != X86EMUL_MODE_PROT64); /* assigning eip failed; restore the old cs */ @@ -2045,31 +2056,22 @@ static int em_jmp_far(struct x86_emulate_ctxt *ctxt) return rc; } -static int em_grp45(struct x86_emulate_ctxt *ctxt) +static int em_jmp_abs(struct x86_emulate_ctxt *ctxt) { - int rc = X86EMUL_CONTINUE; + return assign_eip_near(ctxt, ctxt->src.val); +} - switch (ctxt->modrm_reg) { - case 2: /* call near abs */ { - long int old_eip; - old_eip = ctxt->_eip; - rc = assign_eip_near(ctxt, ctxt->src.val); - if (rc != X86EMUL_CONTINUE) - break; - ctxt->src.val = old_eip; - rc = em_push(ctxt); - break; - } - case 4: /* jmp abs */ - rc = assign_eip_near(ctxt, ctxt->src.val); - break; - case 5: /* jmp far */ - rc = em_jmp_far(ctxt); - break; - case 6: /* push */ - rc = em_push(ctxt); - break; - } +static int em_call_near_abs(struct x86_emulate_ctxt *ctxt) +{ + int rc; + long int old_eip; + + old_eip = ctxt->_eip; + rc = assign_eip_near(ctxt, ctxt->src.val); + if (rc != X86EMUL_CONTINUE) + return rc; + ctxt->src.val = old_eip; + rc = em_push(ctxt); return rc; } @@ -2128,11 +2130,11 @@ static int em_ret_far(struct x86_emulate_ctxt *ctxt) /* Outer-privilege level return is not implemented */ if (ctxt->mode >= X86EMUL_MODE_PROT16 && (cs & 3) > cpl) return X86EMUL_UNHANDLEABLE; - rc = __load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS, 0, false, + rc = __load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS, cpl, false, &new_desc); if (rc != X86EMUL_CONTINUE) return rc; - rc = assign_eip_far(ctxt, eip, new_desc.l); + rc = assign_eip_far(ctxt, eip, &new_desc); if (rc != X86EMUL_CONTINUE) { WARN_ON(ctxt->mode != X86EMUL_MODE_PROT64); ops->set_segment(ctxt, old_cs, &old_desc, 0, VCPU_SREG_CS); @@ -2316,6 +2318,7 @@ static int em_syscall(struct x86_emulate_ctxt *ctxt) ops->get_msr(ctxt, MSR_SYSCALL_MASK, &msr_data); ctxt->eflags &= ~msr_data; + ctxt->eflags |= EFLG_RESERVED_ONE_MASK; #endif } else { /* legacy mode */ @@ -2349,11 +2352,9 @@ static int em_sysenter(struct x86_emulate_ctxt *ctxt) && !vendor_intel(ctxt)) return emulate_ud(ctxt); - /* XXX sysenter/sysexit have not been tested in 64bit mode. - * Therefore, we inject an #UD. - */ + /* sysenter/sysexit have not been tested in 64bit mode. */ if (ctxt->mode == X86EMUL_MODE_PROT64) - return emulate_ud(ctxt); + return X86EMUL_UNHANDLEABLE; setup_syscalls_segments(ctxt, &cs, &ss); @@ -2425,6 +2426,8 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt) if ((msr_data & 0xfffc) == 0x0) return emulate_gp(ctxt, 0); ss_sel = (u16)(msr_data + 24); + rcx = (u32)rcx; + rdx = (u32)rdx; break; case X86EMUL_MODE_PROT64: cs_sel = (u16)(msr_data + 32); @@ -2599,7 +2602,6 @@ static int task_switch_16(struct x86_emulate_ctxt *ctxt, ret = ops->read_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg, &ctxt->exception); if (ret != X86EMUL_CONTINUE) - /* FIXME: need to provide precise fault address */ return ret; save_state_to_tss16(ctxt, &tss_seg); @@ -2607,13 +2609,11 @@ static int task_switch_16(struct x86_emulate_ctxt *ctxt, ret = ops->write_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg, &ctxt->exception); if (ret != X86EMUL_CONTINUE) - /* FIXME: need to provide precise fault address */ return ret; ret = ops->read_std(ctxt, new_tss_base, &tss_seg, sizeof tss_seg, &ctxt->exception); if (ret != X86EMUL_CONTINUE) - /* FIXME: need to provide precise fault address */ return ret; if (old_tss_sel != 0xffff) { @@ -2624,7 +2624,6 @@ static int task_switch_16(struct x86_emulate_ctxt *ctxt, sizeof tss_seg.prev_task_link, &ctxt->exception); if (ret != X86EMUL_CONTINUE) - /* FIXME: need to provide precise fault address */ return ret; } @@ -2813,7 +2812,8 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt, * * 1. jmp/call/int to task gate: Check against DPL of the task gate * 2. Exception/IRQ/iret: No check is performed - * 3. jmp/call to TSS: Check against DPL of the TSS + * 3. jmp/call to TSS/task-gate: No check is performed since the + * hardware checks it before exiting. */ if (reason == TASK_SWITCH_GATE) { if (idt_index != -1) { @@ -2830,13 +2830,8 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt, if ((tss_selector & 3) > dpl || ops->cpl(ctxt) > dpl) return emulate_gp(ctxt, (idt_index << 3) | 0x2); } - } else if (reason != TASK_SWITCH_IRET) { - int dpl = next_tss_desc.dpl; - if ((tss_selector & 3) > dpl || ops->cpl(ctxt) > dpl) - return emulate_gp(ctxt, tss_selector); } - desc_limit = desc_limit_scaled(&next_tss_desc); if (!next_tss_desc.p || ((desc_limit < 0x67 && (next_tss_desc.type & 8)) || @@ -2913,8 +2908,8 @@ static void string_addr_inc(struct x86_emulate_ctxt *ctxt, int reg, { int df = (ctxt->eflags & EFLG_DF) ? -op->count : op->count; - register_address_increment(ctxt, reg_rmw(ctxt, reg), df * op->bytes); - op->addr.mem.ea = register_address(ctxt, reg_read(ctxt, reg)); + register_address_increment(ctxt, reg, df * op->bytes); + op->addr.mem.ea = register_address(ctxt, reg); } static int em_das(struct x86_emulate_ctxt *ctxt) @@ -3025,7 +3020,7 @@ static int em_call_far(struct x86_emulate_ctxt *ctxt) if (rc != X86EMUL_CONTINUE) return X86EMUL_CONTINUE; - rc = assign_eip_far(ctxt, ctxt->src.val, new_desc.l); + rc = assign_eip_far(ctxt, ctxt->src.val, &new_desc); if (rc != X86EMUL_CONTINUE) goto fail; @@ -3215,6 +3210,8 @@ static int em_mov_rm_sreg(struct x86_emulate_ctxt *ctxt) return emulate_ud(ctxt); ctxt->dst.val = get_segment_selector(ctxt, ctxt->modrm_reg); + if (ctxt->dst.bytes == 4 && ctxt->dst.type == OP_MEM) + ctxt->dst.bytes = 2; return X86EMUL_CONTINUE; } @@ -3317,7 +3314,7 @@ static int em_sidt(struct x86_emulate_ctxt *ctxt) return emulate_store_desc_ptr(ctxt, ctxt->ops->get_idt); } -static int em_lgdt(struct x86_emulate_ctxt *ctxt) +static int em_lgdt_lidt(struct x86_emulate_ctxt *ctxt, bool lgdt) { struct desc_ptr desc_ptr; int rc; @@ -3329,12 +3326,23 @@ static int em_lgdt(struct x86_emulate_ctxt *ctxt) ctxt->op_bytes); if (rc != X86EMUL_CONTINUE) return rc; - ctxt->ops->set_gdt(ctxt, &desc_ptr); + if (ctxt->mode == X86EMUL_MODE_PROT64 && + is_noncanonical_address(desc_ptr.address)) + return emulate_gp(ctxt, 0); + if (lgdt) + ctxt->ops->set_gdt(ctxt, &desc_ptr); + else + ctxt->ops->set_idt(ctxt, &desc_ptr); /* Disable writeback. */ ctxt->dst.type = OP_NONE; return X86EMUL_CONTINUE; } +static int em_lgdt(struct x86_emulate_ctxt *ctxt) +{ + return em_lgdt_lidt(ctxt, true); +} + static int em_vmmcall(struct x86_emulate_ctxt *ctxt) { int rc; @@ -3348,20 +3356,7 @@ static int em_vmmcall(struct x86_emulate_ctxt *ctxt) static int em_lidt(struct x86_emulate_ctxt *ctxt) { - struct desc_ptr desc_ptr; - int rc; - - if (ctxt->mode == X86EMUL_MODE_PROT64) - ctxt->op_bytes = 8; - rc = read_descriptor(ctxt, ctxt->src.addr.mem, - &desc_ptr.size, &desc_ptr.address, - ctxt->op_bytes); - if (rc != X86EMUL_CONTINUE) - return rc; - ctxt->ops->set_idt(ctxt, &desc_ptr); - /* Disable writeback. */ - ctxt->dst.type = OP_NONE; - return X86EMUL_CONTINUE; + return em_lgdt_lidt(ctxt, false); } static int em_smsw(struct x86_emulate_ctxt *ctxt) @@ -3384,7 +3379,7 @@ static int em_loop(struct x86_emulate_ctxt *ctxt) { int rc = X86EMUL_CONTINUE; - register_address_increment(ctxt, reg_rmw(ctxt, VCPU_REGS_RCX), -1); + register_address_increment(ctxt, VCPU_REGS_RCX, -1); if ((address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) != 0) && (ctxt->b == 0xe2 || test_cc(ctxt->b ^ 0x5, ctxt->eflags))) rc = jmp_rel(ctxt, ctxt->src.val); @@ -3554,7 +3549,7 @@ static int check_cr_write(struct x86_emulate_ctxt *ctxt) ctxt->ops->get_msr(ctxt, MSR_EFER, &efer); if (efer & EFER_LMA) - rsvd = CR3_L_MODE_RESERVED_BITS; + rsvd = CR3_L_MODE_RESERVED_BITS & ~CR3_PCID_INVD; if (new_val & rsvd) return emulate_gp(ctxt, 0); @@ -3596,8 +3591,15 @@ static int check_dr_read(struct x86_emulate_ctxt *ctxt) if ((cr4 & X86_CR4_DE) && (dr == 4 || dr == 5)) return emulate_ud(ctxt); - if (check_dr7_gd(ctxt)) + if (check_dr7_gd(ctxt)) { + ulong dr6; + + ctxt->ops->get_dr(ctxt, 6, &dr6); + dr6 &= ~15; + dr6 |= DR6_BD | DR6_RTM; + ctxt->ops->set_dr(ctxt, 6, dr6); return emulate_db(ctxt); + } return X86EMUL_CONTINUE; } @@ -3684,6 +3686,7 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt) #define EXT(_f, _e) { .flags = ((_f) | RMExt), .u.group = (_e) } #define G(_f, _g) { .flags = ((_f) | Group | ModRM), .u.group = (_g) } #define GD(_f, _g) { .flags = ((_f) | GroupDual | ModRM), .u.gdual = (_g) } +#define ID(_f, _i) { .flags = ((_f) | InstrDual | ModRM), .u.idual = (_i) } #define E(_f, _e) { .flags = ((_f) | Escape | ModRM), .u.esc = (_e) } #define I(_f, _e) { .flags = (_f), .u.execute = (_e) } #define F(_f, _e) { .flags = (_f) | Fastop, .u.fastop = (_e) } @@ -3780,11 +3783,11 @@ static const struct opcode group4[] = { static const struct opcode group5[] = { F(DstMem | SrcNone | Lock, em_inc), F(DstMem | SrcNone | Lock, em_dec), - I(SrcMem | Stack, em_grp45), + I(SrcMem | NearBranch, em_call_near_abs), I(SrcMemFAddr | ImplicitOps | Stack, em_call_far), - I(SrcMem | Stack, em_grp45), - I(SrcMemFAddr | ImplicitOps, em_grp45), - I(SrcMem | Stack, em_grp45), D(Undefined), + I(SrcMem | NearBranch, em_jmp_abs), + I(SrcMemFAddr | ImplicitOps, em_jmp_far), + I(SrcMem | Stack, em_push), D(Undefined), }; static const struct opcode group6[] = { @@ -3845,8 +3848,12 @@ static const struct gprefix pfx_0f_6f_0f_7f = { I(Mmx, em_mov), I(Sse | Aligned, em_mov), N, I(Sse | Unaligned, em_mov), }; +static const struct instr_dual instr_dual_0f_2b = { + I(0, em_mov), N +}; + static const struct gprefix pfx_0f_2b = { - I(0, em_mov), I(0, em_mov), N, N, + ID(0, &instr_dual_0f_2b), ID(0, &instr_dual_0f_2b), N, N, }; static const struct gprefix pfx_0f_28_0f_29 = { @@ -3920,6 +3927,10 @@ static const struct escape escape_dd = { { N, N, N, N, N, N, N, N, } }; +static const struct instr_dual instr_dual_0f_c3 = { + I(DstMem | SrcReg | ModRM | No16 | Mov, em_mov), N +}; + static const struct opcode opcode_table[256] = { /* 0x00 - 0x07 */ F6ALU(Lock, em_add), @@ -3964,7 +3975,7 @@ static const struct opcode opcode_table[256] = { I2bvIP(DstDI | SrcDX | Mov | String | Unaligned, em_in, ins, check_perm_in), /* insb, insw/insd */ I2bvIP(SrcSI | DstDX | String, em_out, outs, check_perm_out), /* outsb, outsw/outsd */ /* 0x70 - 0x7F */ - X16(D(SrcImmByte)), + X16(D(SrcImmByte | NearBranch)), /* 0x80 - 0x87 */ G(ByteOp | DstMem | SrcImm, group1), G(DstMem | SrcImm, group1), @@ -3991,20 +4002,20 @@ static const struct opcode opcode_table[256] = { I2bv(DstAcc | SrcMem | Mov | MemAbs, em_mov), I2bv(DstMem | SrcAcc | Mov | MemAbs | PageTable, em_mov), I2bv(SrcSI | DstDI | Mov | String, em_mov), - F2bv(SrcSI | DstDI | String | NoWrite, em_cmp), + F2bv(SrcSI | DstDI | String | NoWrite, em_cmp_r), /* 0xA8 - 0xAF */ F2bv(DstAcc | SrcImm | NoWrite, em_test), I2bv(SrcAcc | DstDI | Mov | String, em_mov), I2bv(SrcSI | DstAcc | Mov | String, em_mov), - F2bv(SrcAcc | DstDI | String | NoWrite, em_cmp), + F2bv(SrcAcc | DstDI | String | NoWrite, em_cmp_r), /* 0xB0 - 0xB7 */ X8(I(ByteOp | DstReg | SrcImm | Mov, em_mov)), /* 0xB8 - 0xBF */ X8(I(DstReg | SrcImm64 | Mov, em_mov)), /* 0xC0 - 0xC7 */ G(ByteOp | Src2ImmByte, group2), G(Src2ImmByte, group2), - I(ImplicitOps | Stack | SrcImmU16, em_ret_near_imm), - I(ImplicitOps | Stack, em_ret), + I(ImplicitOps | NearBranch | SrcImmU16, em_ret_near_imm), + I(ImplicitOps | NearBranch, em_ret), I(DstReg | SrcMemFAddr | ModRM | No64 | Src2ES, em_lseg), I(DstReg | SrcMemFAddr | ModRM | No64 | Src2DS, em_lseg), G(ByteOp, group11), G(0, group11), @@ -4024,13 +4035,14 @@ static const struct opcode opcode_table[256] = { /* 0xD8 - 0xDF */ N, E(0, &escape_d9), N, E(0, &escape_db), N, E(0, &escape_dd), N, N, /* 0xE0 - 0xE7 */ - X3(I(SrcImmByte, em_loop)), - I(SrcImmByte, em_jcxz), + X3(I(SrcImmByte | NearBranch, em_loop)), + I(SrcImmByte | NearBranch, em_jcxz), I2bvIP(SrcImmUByte | DstAcc, em_in, in, check_perm_in), I2bvIP(SrcAcc | DstImmUByte, em_out, out, check_perm_out), /* 0xE8 - 0xEF */ - I(SrcImm | Stack, em_call), D(SrcImm | ImplicitOps), - I(SrcImmFAddr | No64, em_jmp_far), D(SrcImmByte | ImplicitOps), + I(SrcImm | NearBranch, em_call), D(SrcImm | ImplicitOps | NearBranch), + I(SrcImmFAddr | No64, em_jmp_far), + D(SrcImmByte | ImplicitOps | NearBranch), I2bvIP(SrcDX | DstAcc, em_in, in, check_perm_in), I2bvIP(SrcAcc | DstDX, em_out, out, check_perm_out), /* 0xF0 - 0xF7 */ @@ -4090,7 +4102,7 @@ static const struct opcode twobyte_table[256] = { N, N, N, N, N, N, N, GP(SrcReg | DstMem | ModRM | Mov, &pfx_0f_6f_0f_7f), /* 0x80 - 0x8F */ - X16(D(SrcImm)), + X16(D(SrcImm | NearBranch)), /* 0x90 - 0x9F */ X16(D(ByteOp | DstMem | SrcNone | ModRM| Mov)), /* 0xA0 - 0xA7 */ @@ -4121,7 +4133,7 @@ static const struct opcode twobyte_table[256] = { D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov), /* 0xC0 - 0xC7 */ F2bv(DstMem | SrcReg | ModRM | SrcWrite | Lock, em_xadd), - N, D(DstMem | SrcReg | ModRM | Mov), + N, ID(0, &instr_dual_0f_c3), N, N, N, GD(0, &group9), /* 0xC8 - 0xCF */ X8(I(DstReg, em_bswap)), @@ -4134,12 +4146,20 @@ static const struct opcode twobyte_table[256] = { N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N }; +static const struct instr_dual instr_dual_0f_38_f0 = { + I(DstReg | SrcMem | Mov, em_movbe), N +}; + +static const struct instr_dual instr_dual_0f_38_f1 = { + I(DstMem | SrcReg | Mov, em_movbe), N +}; + static const struct gprefix three_byte_0f_38_f0 = { - I(DstReg | SrcMem | Mov, em_movbe), N, N, N + ID(0, &instr_dual_0f_38_f0), N, N, N }; static const struct gprefix three_byte_0f_38_f1 = { - I(DstMem | SrcReg | Mov, em_movbe), N, N, N + ID(0, &instr_dual_0f_38_f1), N, N, N }; /* @@ -4152,8 +4172,8 @@ static const struct opcode opcode_map_0f_38[256] = { /* 0x80 - 0xef */ X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), /* 0xf0 - 0xf1 */ - GP(EmulateOnUD | ModRM | Prefix, &three_byte_0f_38_f0), - GP(EmulateOnUD | ModRM | Prefix, &three_byte_0f_38_f1), + GP(EmulateOnUD | ModRM, &three_byte_0f_38_f0), + GP(EmulateOnUD | ModRM, &three_byte_0f_38_f1), /* 0xf2 - 0xff */ N, N, X4(N), X8(N) }; @@ -4275,7 +4295,7 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op, op->type = OP_MEM; op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes; op->addr.mem.ea = - register_address(ctxt, reg_read(ctxt, VCPU_REGS_RDI)); + register_address(ctxt, VCPU_REGS_RDI); op->addr.mem.seg = VCPU_SREG_ES; op->val = 0; op->count = 1; @@ -4329,7 +4349,7 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op, op->type = OP_MEM; op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes; op->addr.mem.ea = - register_address(ctxt, reg_read(ctxt, VCPU_REGS_RSI)); + register_address(ctxt, VCPU_REGS_RSI); op->addr.mem.seg = ctxt->seg_override; op->val = 0; op->count = 1; @@ -4338,7 +4358,7 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op, op->type = OP_MEM; op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes; op->addr.mem.ea = - register_address(ctxt, + address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RBX) + (reg_read(ctxt, VCPU_REGS_RAX) & 0xff)); op->addr.mem.seg = ctxt->seg_override; @@ -4510,8 +4530,7 @@ done_prefixes: /* vex-prefix instructions are not implemented */ if (ctxt->opcode_len == 1 && (ctxt->b == 0xc5 || ctxt->b == 0xc4) && - (mode == X86EMUL_MODE_PROT64 || - (mode >= X86EMUL_MODE_PROT16 && (ctxt->modrm & 0x80)))) { + (mode == X86EMUL_MODE_PROT64 || (ctxt->modrm & 0xc0) == 0xc0)) { ctxt->d = NotImpl; } @@ -4549,6 +4568,12 @@ done_prefixes: else opcode = opcode.u.esc->op[(ctxt->modrm >> 3) & 7]; break; + case InstrDual: + if ((ctxt->modrm >> 6) == 3) + opcode = opcode.u.idual->mod3; + else + opcode = opcode.u.idual->mod012; + break; default: return EMULATION_FAILED; } @@ -4567,7 +4592,8 @@ done_prefixes: return EMULATION_FAILED; if (unlikely(ctxt->d & - (NotImpl|Stack|Op3264|Sse|Mmx|Intercept|CheckPerm))) { + (NotImpl|Stack|Op3264|Sse|Mmx|Intercept|CheckPerm|NearBranch| + No16))) { /* * These are copied unconditionally here, and checked unconditionally * in x86_emulate_insn. @@ -4578,8 +4604,12 @@ done_prefixes: if (ctxt->d & NotImpl) return EMULATION_FAILED; - if (mode == X86EMUL_MODE_PROT64 && (ctxt->d & Stack)) - ctxt->op_bytes = 8; + if (mode == X86EMUL_MODE_PROT64) { + if (ctxt->op_bytes == 4 && (ctxt->d & Stack)) + ctxt->op_bytes = 8; + else if (ctxt->d & NearBranch) + ctxt->op_bytes = 8; + } if (ctxt->d & Op3264) { if (mode == X86EMUL_MODE_PROT64) @@ -4588,6 +4618,9 @@ done_prefixes: ctxt->op_bytes = 4; } + if ((ctxt->d & No16) && ctxt->op_bytes == 2) + ctxt->op_bytes = 4; + if (ctxt->d & Sse) ctxt->op_bytes = 16; else if (ctxt->d & Mmx) @@ -4631,7 +4664,8 @@ done_prefixes: rc = decode_operand(ctxt, &ctxt->dst, (ctxt->d >> DstShift) & OpMask); if (ctxt->rip_relative) - ctxt->memopp->addr.mem.ea += ctxt->_eip; + ctxt->memopp->addr.mem.ea = address_mask(ctxt, + ctxt->memopp->addr.mem.ea + ctxt->_eip); done: return (rc != X86EMUL_CONTINUE) ? EMULATION_FAILED : EMULATION_OK; @@ -4775,6 +4809,12 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) goto done; } + /* Instruction can only be executed in protected mode */ + if ((ctxt->d & Prot) && ctxt->mode < X86EMUL_MODE_PROT16) { + rc = emulate_ud(ctxt); + goto done; + } + /* Privileged instruction can be executed only in CPL=0 */ if ((ctxt->d & Priv) && ops->cpl(ctxt)) { if (ctxt->d & PrivUD) @@ -4784,12 +4824,6 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) goto done; } - /* Instruction can only be executed in protected mode */ - if ((ctxt->d & Prot) && ctxt->mode < X86EMUL_MODE_PROT16) { - rc = emulate_ud(ctxt); - goto done; - } - /* Do instruction specific permission checks */ if (ctxt->d & CheckPerm) { rc = ctxt->check_perm(ctxt); @@ -4974,8 +5008,7 @@ writeback: count = ctxt->src.count; else count = ctxt->dst.count; - register_address_increment(ctxt, reg_rmw(ctxt, VCPU_REGS_RCX), - -count); + register_address_increment(ctxt, VCPU_REGS_RCX, -count); if (!string_insn_completed(ctxt)) { /* @@ -5053,11 +5086,6 @@ twobyte_insn: ctxt->dst.val = (ctxt->src.bytes == 1) ? (s8) ctxt->src.val : (s16) ctxt->src.val; break; - case 0xc3: /* movnti */ - ctxt->dst.bytes = ctxt->op_bytes; - ctxt->dst.val = (ctxt->op_bytes == 8) ? (u64) ctxt->src.val : - (u32) ctxt->src.val; - break; default: goto cannot_emulate; } diff --git a/virt/kvm/ioapic.c b/arch/x86/kvm/ioapic.c index 0ba4057d271..b1947e0f3e1 100644 --- a/virt/kvm/ioapic.c +++ b/arch/x86/kvm/ioapic.c @@ -270,7 +270,6 @@ void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap, spin_unlock(&ioapic->lock); } -#ifdef CONFIG_X86 void kvm_vcpu_request_scan_ioapic(struct kvm *kvm) { struct kvm_ioapic *ioapic = kvm->arch.vioapic; @@ -279,12 +278,6 @@ void kvm_vcpu_request_scan_ioapic(struct kvm *kvm) return; kvm_make_scan_ioapic_request(kvm); } -#else -void kvm_vcpu_request_scan_ioapic(struct kvm *kvm) -{ - return; -} -#endif static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) { @@ -586,11 +579,6 @@ static int ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len, case IOAPIC_REG_WINDOW: ioapic_write_indirect(ioapic, data); break; -#ifdef CONFIG_IA64 - case IOAPIC_REG_EOI: - __kvm_ioapic_update_eoi(NULL, ioapic, data, IOAPIC_LEVEL_TRIG); - break; -#endif default: break; diff --git a/virt/kvm/ioapic.h b/arch/x86/kvm/ioapic.h index e23b70634f1..3c9195535ff 100644 --- a/virt/kvm/ioapic.h +++ b/arch/x86/kvm/ioapic.h @@ -19,7 +19,6 @@ struct kvm_vcpu; /* Direct registers. */ #define IOAPIC_REG_SELECT 0x00 #define IOAPIC_REG_WINDOW 0x10 -#define IOAPIC_REG_EOI 0x40 /* IA64 IOSAPIC only */ /* Indirect registers. */ #define IOAPIC_REG_APIC_ID 0x00 /* x86 IOAPIC only */ @@ -45,6 +44,23 @@ struct rtc_status { DECLARE_BITMAP(dest_map, KVM_MAX_VCPUS); }; +union kvm_ioapic_redirect_entry { + u64 bits; + struct { + u8 vector; + u8 delivery_mode:3; + u8 dest_mode:1; + u8 delivery_status:1; + u8 polarity:1; + u8 remote_irr:1; + u8 trig_mode:1; + u8 mask:1; + u8 reserve:7; + u8 reserved[4]; + u8 dest_id; + } fields; +}; + struct kvm_ioapic { u64 base_address; u32 ioregsel; @@ -83,7 +99,7 @@ static inline struct kvm_ioapic *ioapic_irqchip(struct kvm *kvm) void kvm_rtc_eoi_tracking_restore_one(struct kvm_vcpu *vcpu); int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, - int short_hand, int dest, int dest_mode); + int short_hand, unsigned int dest, int dest_mode); int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2); void kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, int vector, int trigger_mode); @@ -97,7 +113,6 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, struct kvm_lapic_irq *irq, unsigned long *dest_map); int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state); int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state); -void kvm_vcpu_request_scan_ioapic(struct kvm *kvm); void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap, u32 *tmr); diff --git a/virt/kvm/iommu.c b/arch/x86/kvm/iommu.c index c1e6ae989a4..17b73eeac8a 100644 --- a/virt/kvm/iommu.c +++ b/arch/x86/kvm/iommu.c @@ -31,6 +31,7 @@ #include <linux/dmar.h> #include <linux/iommu.h> #include <linux/intel-iommu.h> +#include "assigned-dev.h" static bool allow_unsafe_assigned_interrupts; module_param_named(allow_unsafe_assigned_interrupts, @@ -169,10 +170,8 @@ static int kvm_iommu_map_memslots(struct kvm *kvm) return r; } -int kvm_assign_device(struct kvm *kvm, - struct kvm_assigned_dev_kernel *assigned_dev) +int kvm_assign_device(struct kvm *kvm, struct pci_dev *pdev) { - struct pci_dev *pdev = NULL; struct iommu_domain *domain = kvm->arch.iommu_domain; int r; bool noncoherent; @@ -181,7 +180,6 @@ int kvm_assign_device(struct kvm *kvm, if (!domain) return 0; - pdev = assigned_dev->dev; if (pdev == NULL) return -ENODEV; @@ -212,17 +210,14 @@ out_unmap: return r; } -int kvm_deassign_device(struct kvm *kvm, - struct kvm_assigned_dev_kernel *assigned_dev) +int kvm_deassign_device(struct kvm *kvm, struct pci_dev *pdev) { struct iommu_domain *domain = kvm->arch.iommu_domain; - struct pci_dev *pdev = NULL; /* check if iommu exists and in use */ if (!domain) return 0; - pdev = assigned_dev->dev; if (pdev == NULL) return -ENODEV; diff --git a/virt/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c index 963b8995a9e..72298b3ac02 100644 --- a/virt/kvm/irq_comm.c +++ b/arch/x86/kvm/irq_comm.c @@ -26,9 +26,6 @@ #include <trace/events/kvm.h> #include <asm/msidef.h> -#ifdef CONFIG_IA64 -#include <asm/iosapic.h> -#endif #include "irq.h" @@ -38,12 +35,8 @@ static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e, struct kvm *kvm, int irq_source_id, int level, bool line_status) { -#ifdef CONFIG_X86 struct kvm_pic *pic = pic_irqchip(kvm); return kvm_pic_set_irq(pic, e->irqchip.pin, irq_source_id, level); -#else - return -1; -#endif } static int kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e, @@ -57,12 +50,7 @@ static int kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e, inline static bool kvm_is_dm_lowest_prio(struct kvm_lapic_irq *irq) { -#ifdef CONFIG_IA64 - return irq->delivery_mode == - (IOSAPIC_LOWEST_PRIORITY << IOSAPIC_DELIVERY_SHIFT); -#else return irq->delivery_mode == APIC_DM_LOWEST; -#endif } int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, @@ -202,9 +190,7 @@ int kvm_request_irq_source_id(struct kvm *kvm) } ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID); -#ifdef CONFIG_X86 ASSERT(irq_source_id != KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID); -#endif set_bit(irq_source_id, bitmap); unlock: mutex_unlock(&kvm->irq_lock); @@ -215,9 +201,7 @@ unlock: void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id) { ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID); -#ifdef CONFIG_X86 ASSERT(irq_source_id != KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID); -#endif mutex_lock(&kvm->irq_lock); if (irq_source_id < 0 || @@ -230,9 +214,7 @@ void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id) goto unlock; kvm_ioapic_clear_all(kvm->arch.vioapic, irq_source_id); -#ifdef CONFIG_X86 kvm_pic_clear_all(pic_irqchip(kvm), irq_source_id); -#endif unlock: mutex_unlock(&kvm->irq_lock); } @@ -242,7 +224,7 @@ void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq, { mutex_lock(&kvm->irq_lock); kimn->irq = irq; - hlist_add_head_rcu(&kimn->link, &kvm->mask_notifier_list); + hlist_add_head_rcu(&kimn->link, &kvm->arch.mask_notifier_list); mutex_unlock(&kvm->irq_lock); } @@ -264,7 +246,7 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin, idx = srcu_read_lock(&kvm->irq_srcu); gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin); if (gsi != -1) - hlist_for_each_entry_rcu(kimn, &kvm->mask_notifier_list, link) + hlist_for_each_entry_rcu(kimn, &kvm->arch.mask_notifier_list, link) if (kimn->irq == gsi) kimn->func(kimn, mask); srcu_read_unlock(&kvm->irq_srcu, idx); @@ -322,16 +304,11 @@ out: .u.irqchip = { .irqchip = KVM_IRQCHIP_IOAPIC, .pin = (irq) } } #define ROUTING_ENTRY1(irq) IOAPIC_ROUTING_ENTRY(irq) -#ifdef CONFIG_X86 -# define PIC_ROUTING_ENTRY(irq) \ +#define PIC_ROUTING_ENTRY(irq) \ { .gsi = irq, .type = KVM_IRQ_ROUTING_IRQCHIP, \ .u.irqchip = { .irqchip = SELECT_PIC(irq), .pin = (irq) % 8 } } -# define ROUTING_ENTRY2(irq) \ +#define ROUTING_ENTRY2(irq) \ IOAPIC_ROUTING_ENTRY(irq), PIC_ROUTING_ENTRY(irq) -#else -# define ROUTING_ENTRY2(irq) \ - IOAPIC_ROUTING_ENTRY(irq) -#endif static const struct kvm_irq_routing_entry default_routing[] = { ROUTING_ENTRY2(0), ROUTING_ENTRY2(1), @@ -346,20 +323,6 @@ static const struct kvm_irq_routing_entry default_routing[] = { ROUTING_ENTRY1(18), ROUTING_ENTRY1(19), ROUTING_ENTRY1(20), ROUTING_ENTRY1(21), ROUTING_ENTRY1(22), ROUTING_ENTRY1(23), -#ifdef CONFIG_IA64 - ROUTING_ENTRY1(24), ROUTING_ENTRY1(25), - ROUTING_ENTRY1(26), ROUTING_ENTRY1(27), - ROUTING_ENTRY1(28), ROUTING_ENTRY1(29), - ROUTING_ENTRY1(30), ROUTING_ENTRY1(31), - ROUTING_ENTRY1(32), ROUTING_ENTRY1(33), - ROUTING_ENTRY1(34), ROUTING_ENTRY1(35), - ROUTING_ENTRY1(36), ROUTING_ENTRY1(37), - ROUTING_ENTRY1(38), ROUTING_ENTRY1(39), - ROUTING_ENTRY1(40), ROUTING_ENTRY1(41), - ROUTING_ENTRY1(42), ROUTING_ENTRY1(43), - ROUTING_ENTRY1(44), ROUTING_ENTRY1(45), - ROUTING_ENTRY1(46), ROUTING_ENTRY1(47), -#endif }; int kvm_setup_default_irq_routing(struct kvm *kvm) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index b8345dd41b2..4f0c0b95468 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -68,6 +68,9 @@ #define MAX_APIC_VECTOR 256 #define APIC_VECTORS_PER_REG 32 +#define APIC_BROADCAST 0xFF +#define X2APIC_BROADCAST 0xFFFFFFFFul + #define VEC_POS(v) ((v) & (32 - 1)) #define REG_POS(v) (((v) >> 5) << 4) @@ -129,8 +132,6 @@ static inline int kvm_apic_id(struct kvm_lapic *apic) return (kvm_apic_get_reg(apic, APIC_ID) >> 24) & 0xff; } -#define KVM_X2APIC_CID_BITS 0 - static void recalculate_apic_map(struct kvm *kvm) { struct kvm_apic_map *new, *old = NULL; @@ -149,42 +150,56 @@ static void recalculate_apic_map(struct kvm *kvm) new->cid_shift = 8; new->cid_mask = 0; new->lid_mask = 0xff; + new->broadcast = APIC_BROADCAST; kvm_for_each_vcpu(i, vcpu, kvm) { struct kvm_lapic *apic = vcpu->arch.apic; - u16 cid, lid; - u32 ldr; if (!kvm_apic_present(vcpu)) continue; + if (apic_x2apic_mode(apic)) { + new->ldr_bits = 32; + new->cid_shift = 16; + new->cid_mask = new->lid_mask = 0xffff; + new->broadcast = X2APIC_BROADCAST; + } else if (kvm_apic_get_reg(apic, APIC_LDR)) { + if (kvm_apic_get_reg(apic, APIC_DFR) == + APIC_DFR_CLUSTER) { + new->cid_shift = 4; + new->cid_mask = 0xf; + new->lid_mask = 0xf; + } else { + new->cid_shift = 8; + new->cid_mask = 0; + new->lid_mask = 0xff; + } + } + /* * All APICs have to be configured in the same mode by an OS. * We take advatage of this while building logical id loockup - * table. After reset APICs are in xapic/flat mode, so if we - * find apic with different setting we assume this is the mode + * table. After reset APICs are in software disabled mode, so if + * we find apic with different setting we assume this is the mode * OS wants all apics to be in; build lookup table accordingly. */ - if (apic_x2apic_mode(apic)) { - new->ldr_bits = 32; - new->cid_shift = 16; - new->cid_mask = (1 << KVM_X2APIC_CID_BITS) - 1; - new->lid_mask = 0xffff; - } else if (kvm_apic_sw_enabled(apic) && - !new->cid_mask /* flat mode */ && - kvm_apic_get_reg(apic, APIC_DFR) == APIC_DFR_CLUSTER) { - new->cid_shift = 4; - new->cid_mask = 0xf; - new->lid_mask = 0xf; - } + if (kvm_apic_sw_enabled(apic)) + break; + } - new->phys_map[kvm_apic_id(apic)] = apic; + kvm_for_each_vcpu(i, vcpu, kvm) { + struct kvm_lapic *apic = vcpu->arch.apic; + u16 cid, lid; + u32 ldr, aid; + aid = kvm_apic_id(apic); ldr = kvm_apic_get_reg(apic, APIC_LDR); cid = apic_cluster_id(new, ldr); lid = apic_logical_id(new, ldr); - if (lid) + if (aid < ARRAY_SIZE(new->phys_map)) + new->phys_map[aid] = apic; + if (lid && cid < ARRAY_SIZE(new->logical_map)) new->logical_map[cid][ffs(lid) - 1] = apic; } out: @@ -201,11 +216,13 @@ out: static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val) { - u32 prev = kvm_apic_get_reg(apic, APIC_SPIV); + bool enabled = val & APIC_SPIV_APIC_ENABLED; apic_set_reg(apic, APIC_SPIV, val); - if ((prev ^ val) & APIC_SPIV_APIC_ENABLED) { - if (val & APIC_SPIV_APIC_ENABLED) { + + if (enabled != apic->sw_enabled) { + apic->sw_enabled = enabled; + if (enabled) { static_key_slow_dec_deferred(&apic_sw_disabled); recalculate_apic_map(apic->vcpu->kvm); } else @@ -237,21 +254,17 @@ static inline int apic_lvt_vector(struct kvm_lapic *apic, int lvt_type) static inline int apic_lvtt_oneshot(struct kvm_lapic *apic) { - return ((kvm_apic_get_reg(apic, APIC_LVTT) & - apic->lapic_timer.timer_mode_mask) == APIC_LVT_TIMER_ONESHOT); + return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_ONESHOT; } static inline int apic_lvtt_period(struct kvm_lapic *apic) { - return ((kvm_apic_get_reg(apic, APIC_LVTT) & - apic->lapic_timer.timer_mode_mask) == APIC_LVT_TIMER_PERIODIC); + return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_PERIODIC; } static inline int apic_lvtt_tscdeadline(struct kvm_lapic *apic) { - return ((kvm_apic_get_reg(apic, APIC_LVTT) & - apic->lapic_timer.timer_mode_mask) == - APIC_LVT_TIMER_TSCDEADLINE); + return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_TSCDEADLINE; } static inline int apic_lvt_nmi_mode(u32 lvt_val) @@ -326,8 +339,12 @@ EXPORT_SYMBOL_GPL(kvm_apic_update_irr); static inline void apic_set_irr(int vec, struct kvm_lapic *apic) { - apic->irr_pending = true; apic_set_vector(vec, apic->regs + APIC_IRR); + /* + * irr_pending must be true if any interrupt is pending; set it after + * APIC_IRR to avoid race with apic_clear_irr + */ + apic->irr_pending = true; } static inline int apic_search_irr(struct kvm_lapic *apic) @@ -359,13 +376,15 @@ static inline void apic_clear_irr(int vec, struct kvm_lapic *apic) vcpu = apic->vcpu; - apic_clear_vector(vec, apic->regs + APIC_IRR); - if (unlikely(kvm_apic_vid_enabled(vcpu->kvm))) + if (unlikely(kvm_apic_vid_enabled(vcpu->kvm))) { /* try to update RVI */ + apic_clear_vector(vec, apic->regs + APIC_IRR); kvm_make_request(KVM_REQ_EVENT, vcpu); - else { - vec = apic_search_irr(apic); - apic->irr_pending = (vec != -1); + } else { + apic->irr_pending = false; + apic_clear_vector(vec, apic->regs + APIC_IRR); + if (apic_search_irr(apic) != -1) + apic->irr_pending = true; } } @@ -558,16 +577,25 @@ static void apic_set_tpr(struct kvm_lapic *apic, u32 tpr) apic_update_ppr(apic); } -int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest) +static int kvm_apic_broadcast(struct kvm_lapic *apic, u32 dest) +{ + return dest == (apic_x2apic_mode(apic) ? + X2APIC_BROADCAST : APIC_BROADCAST); +} + +int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u32 dest) { - return dest == 0xff || kvm_apic_id(apic) == dest; + return kvm_apic_id(apic) == dest || kvm_apic_broadcast(apic, dest); } -int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda) +int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda) { int result = 0; u32 logical_id; + if (kvm_apic_broadcast(apic, mda)) + return 1; + if (apic_x2apic_mode(apic)) { logical_id = kvm_apic_get_reg(apic, APIC_LDR); return logical_id & mda; @@ -595,7 +623,7 @@ int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda) } int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, - int short_hand, int dest, int dest_mode) + int short_hand, unsigned int dest, int dest_mode) { int result = 0; struct kvm_lapic *target = vcpu->arch.apic; @@ -657,15 +685,24 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, if (!map) goto out; + if (irq->dest_id == map->broadcast) + goto out; + + ret = true; + if (irq->dest_mode == 0) { /* physical mode */ - if (irq->delivery_mode == APIC_DM_LOWEST || - irq->dest_id == 0xff) + if (irq->dest_id >= ARRAY_SIZE(map->phys_map)) goto out; - dst = &map->phys_map[irq->dest_id & 0xff]; + + dst = &map->phys_map[irq->dest_id]; } else { u32 mda = irq->dest_id << (32 - map->ldr_bits); + u16 cid = apic_cluster_id(map, mda); + + if (cid >= ARRAY_SIZE(map->logical_map)) + goto out; - dst = map->logical_map[apic_cluster_id(map, mda)]; + dst = map->logical_map[cid]; bitmap = apic_logical_id(map, mda); @@ -691,8 +728,6 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, *r = 0; *r += kvm_apic_set_irq(dst[i]->vcpu, irq, dest_map); } - - ret = true; out: rcu_read_unlock(); return ret; @@ -1034,6 +1069,26 @@ static void update_divide_count(struct kvm_lapic *apic) apic->divide_count); } +static void apic_timer_expired(struct kvm_lapic *apic) +{ + struct kvm_vcpu *vcpu = apic->vcpu; + wait_queue_head_t *q = &vcpu->wq; + + /* + * Note: KVM_REQ_PENDING_TIMER is implicitly checked in + * vcpu_enter_guest. + */ + if (atomic_read(&apic->lapic_timer.pending)) + return; + + atomic_inc(&apic->lapic_timer.pending); + /* FIXME: this code should not know anything about vcpus */ + kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu); + + if (waitqueue_active(q)) + wake_up_interruptible(q); +} + static void start_apic_timer(struct kvm_lapic *apic) { ktime_t now; @@ -1096,9 +1151,10 @@ static void start_apic_timer(struct kvm_lapic *apic) if (likely(tscdeadline > guest_tsc)) { ns = (tscdeadline - guest_tsc) * 1000000ULL; do_div(ns, this_tsc_khz); - } - hrtimer_start(&apic->lapic_timer.timer, - ktime_add_ns(now, ns), HRTIMER_MODE_ABS); + hrtimer_start(&apic->lapic_timer.timer, + ktime_add_ns(now, ns), HRTIMER_MODE_ABS); + } else + apic_timer_expired(apic); local_irq_restore(flags); } @@ -1203,17 +1259,20 @@ static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val) break; - case APIC_LVTT: - if ((kvm_apic_get_reg(apic, APIC_LVTT) & - apic->lapic_timer.timer_mode_mask) != - (val & apic->lapic_timer.timer_mode_mask)) + case APIC_LVTT: { + u32 timer_mode = val & apic->lapic_timer.timer_mode_mask; + + if (apic->lapic_timer.timer_mode != timer_mode) { + apic->lapic_timer.timer_mode = timer_mode; hrtimer_cancel(&apic->lapic_timer.timer); + } if (!kvm_apic_sw_enabled(apic)) val |= APIC_LVT_MASKED; val &= (apic_lvt_mask[0] | apic->lapic_timer.timer_mode_mask); apic_set_reg(apic, APIC_LVTT, val); break; + } case APIC_TMICT: if (apic_lvtt_tscdeadline(apic)) @@ -1320,7 +1379,7 @@ void kvm_free_lapic(struct kvm_vcpu *vcpu) if (!(vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE)) static_key_slow_dec_deferred(&apic_hw_disabled); - if (!(kvm_apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_APIC_ENABLED)) + if (!apic->sw_enabled) static_key_slow_dec_deferred(&apic_sw_disabled); if (apic->regs) @@ -1355,9 +1414,6 @@ void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data) return; hrtimer_cancel(&apic->lapic_timer.timer); - /* Inject here so clearing tscdeadline won't override new value */ - if (apic_has_pending_timer(vcpu)) - kvm_inject_apic_timer_irqs(vcpu); apic->lapic_timer.tscdeadline = data; start_apic_timer(apic); } @@ -1422,6 +1478,10 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value) apic->base_address = apic->vcpu->arch.apic_base & MSR_IA32_APICBASE_BASE; + if ((value & MSR_IA32_APICBASE_ENABLE) && + apic->base_address != APIC_DEFAULT_PHYS_BASE) + pr_warn_once("APIC base relocation is unsupported by KVM"); + /* with FSB delivery interrupt, we can restart APIC functionality */ apic_debug("apic base msr is 0x%016" PRIx64 ", and base address is " "0x%lx.\n", apic->vcpu->arch.apic_base, apic->base_address); @@ -1447,6 +1507,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu) for (i = 0; i < APIC_LVT_NUM; i++) apic_set_reg(apic, APIC_LVTT + 0x10 * i, APIC_LVT_MASKED); + apic->lapic_timer.timer_mode = 0; apic_set_reg(apic, APIC_LVT0, SET_APIC_DELIVERY_MODE(0, APIC_MODE_EXTINT)); @@ -1538,23 +1599,8 @@ static enum hrtimer_restart apic_timer_fn(struct hrtimer *data) { struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer); struct kvm_lapic *apic = container_of(ktimer, struct kvm_lapic, lapic_timer); - struct kvm_vcpu *vcpu = apic->vcpu; - wait_queue_head_t *q = &vcpu->wq; - - /* - * There is a race window between reading and incrementing, but we do - * not care about potentially losing timer events in the !reinject - * case anyway. Note: KVM_REQ_PENDING_TIMER is implicitly checked - * in vcpu_enter_guest. - */ - if (!atomic_read(&ktimer->pending)) { - atomic_inc(&ktimer->pending); - /* FIXME: this code should not know anything about vcpus */ - kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu); - } - if (waitqueue_active(q)) - wake_up_interruptible(q); + apic_timer_expired(apic); if (lapic_is_periodic(apic)) { hrtimer_add_expires_ns(&ktimer->timer, ktimer->period); @@ -1693,6 +1739,9 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu, apic->isr_count = kvm_apic_vid_enabled(vcpu->kvm) ? 1 : count_vectors(apic->regs + APIC_ISR); apic->highest_isr_cache = -1; + if (kvm_x86_ops->hwapic_irr_update) + kvm_x86_ops->hwapic_irr_update(vcpu, + apic_find_highest_irr(apic)); kvm_x86_ops->hwapic_isr_update(vcpu->kvm, apic_find_highest_isr(apic)); kvm_make_request(KVM_REQ_EVENT, vcpu); kvm_rtc_eoi_tracking_restore_one(vcpu); @@ -1837,8 +1886,11 @@ int kvm_x2apic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data) if (!irqchip_in_kernel(vcpu->kvm) || !apic_x2apic_mode(apic)) return 1; + if (reg == APIC_ICR2) + return 1; + /* if this is ICR write vector before command */ - if (msr == 0x830) + if (reg == APIC_ICR) apic_reg_write(apic, APIC_ICR2, (u32)(data >> 32)); return apic_reg_write(apic, reg, (u32)data); } @@ -1851,9 +1903,15 @@ int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data) if (!irqchip_in_kernel(vcpu->kvm) || !apic_x2apic_mode(apic)) return 1; + if (reg == APIC_DFR || reg == APIC_ICR2) { + apic_debug("KVM_APIC_READ: read x2apic reserved register %x\n", + reg); + return 1; + } + if (apic_reg_read(apic, reg, 4, &low)) return 1; - if (msr == 0x830) + if (reg == APIC_ICR) apic_reg_read(apic, APIC_ICR2, 4, &high); *data = (((u64)high) << 32) | low; @@ -1908,7 +1966,7 @@ int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data) void kvm_apic_accept_events(struct kvm_vcpu *vcpu) { struct kvm_lapic *apic = vcpu->arch.apic; - unsigned int sipi_vector; + u8 sipi_vector; unsigned long pe; if (!kvm_vcpu_has_lapic(vcpu) || !apic->pending_events) diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index 6a11845fd8b..c674fce53cf 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -11,6 +11,7 @@ struct kvm_timer { struct hrtimer timer; s64 period; /* unit: ns */ + u32 timer_mode; u32 timer_mode_mask; u64 tscdeadline; atomic_t pending; /* accumulated triggered timers */ @@ -22,6 +23,7 @@ struct kvm_lapic { struct kvm_timer lapic_timer; u32 divide_count; struct kvm_vcpu *vcpu; + bool sw_enabled; bool irr_pending; /* Number of bits set in ISR. */ s16 isr_count; @@ -55,8 +57,8 @@ void kvm_apic_set_version(struct kvm_vcpu *vcpu); void kvm_apic_update_tmr(struct kvm_vcpu *vcpu, u32 *tmr); void kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir); -int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest); -int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda); +int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u32 dest); +int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda); int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq, unsigned long *dest_map); int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type); @@ -119,11 +121,11 @@ static inline int kvm_apic_hw_enabled(struct kvm_lapic *apic) extern struct static_key_deferred apic_sw_disabled; -static inline int kvm_apic_sw_enabled(struct kvm_lapic *apic) +static inline bool kvm_apic_sw_enabled(struct kvm_lapic *apic) { if (static_key_false(&apic_sw_disabled.key)) - return kvm_apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_APIC_ENABLED; - return APIC_SPIV_APIC_ENABLED; + return apic->sw_enabled; + return true; } static inline bool kvm_apic_present(struct kvm_vcpu *vcpu) @@ -152,8 +154,6 @@ static inline u16 apic_cluster_id(struct kvm_apic_map *map, u32 ldr) ldr >>= 32 - map->ldr_bits; cid = (ldr >> map->cid_shift) & map->cid_mask; - BUG_ON(cid >= ARRAY_SIZE(map->logical_map)); - return cid; } diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 978f402006e..10fbed126b1 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -214,13 +214,12 @@ EXPORT_SYMBOL_GPL(kvm_mmu_set_mmio_spte_mask); #define MMIO_GEN_LOW_SHIFT 10 #define MMIO_GEN_LOW_MASK ((1 << MMIO_GEN_LOW_SHIFT) - 2) #define MMIO_GEN_MASK ((1 << MMIO_GEN_SHIFT) - 1) -#define MMIO_MAX_GEN ((1 << MMIO_GEN_SHIFT) - 1) static u64 generation_mmio_spte_mask(unsigned int gen) { u64 mask; - WARN_ON(gen > MMIO_MAX_GEN); + WARN_ON(gen & ~MMIO_GEN_MASK); mask = (gen & MMIO_GEN_LOW_MASK) << MMIO_SPTE_GEN_LOW_SHIFT; mask |= ((u64)gen >> MMIO_GEN_LOW_SHIFT) << MMIO_SPTE_GEN_HIGH_SHIFT; @@ -263,13 +262,13 @@ static bool is_mmio_spte(u64 spte) static gfn_t get_mmio_spte_gfn(u64 spte) { - u64 mask = generation_mmio_spte_mask(MMIO_MAX_GEN) | shadow_mmio_mask; + u64 mask = generation_mmio_spte_mask(MMIO_GEN_MASK) | shadow_mmio_mask; return (spte & ~mask) >> PAGE_SHIFT; } static unsigned get_mmio_spte_access(u64 spte) { - u64 mask = generation_mmio_spte_mask(MMIO_MAX_GEN) | shadow_mmio_mask; + u64 mask = generation_mmio_spte_mask(MMIO_GEN_MASK) | shadow_mmio_mask; return (spte & ~mask) & ~PAGE_MASK; } diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 7527cefc5a4..41dd0387ccc 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1056,9 +1056,11 @@ static void svm_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment, bool ho { struct vcpu_svm *svm = to_svm(vcpu); - WARN_ON(adjustment < 0); - if (host) - adjustment = svm_scale_tsc(vcpu, adjustment); + if (host) { + if (svm->tsc_ratio != TSC_RATIO_DEFAULT) + WARN_ON(adjustment < 0); + adjustment = svm_scale_tsc(vcpu, (u64)adjustment); + } svm->vmcb->control.tsc_offset += adjustment; if (is_guest_mode(vcpu)) @@ -2999,7 +3001,6 @@ static int dr_interception(struct vcpu_svm *svm) { int reg, dr; unsigned long val; - int err; if (svm->vcpu.guest_debug == 0) { /* @@ -3019,12 +3020,15 @@ static int dr_interception(struct vcpu_svm *svm) dr = svm->vmcb->control.exit_code - SVM_EXIT_READ_DR0; if (dr >= 16) { /* mov to DRn */ + if (!kvm_require_dr(&svm->vcpu, dr - 16)) + return 1; val = kvm_register_read(&svm->vcpu, reg); kvm_set_dr(&svm->vcpu, dr - 16, val); } else { - err = kvm_get_dr(&svm->vcpu, dr, &val); - if (!err) - kvm_register_write(&svm->vcpu, reg, val); + if (!kvm_require_dr(&svm->vcpu, dr)) + return 1; + kvm_get_dr(&svm->vcpu, dr, &val); + kvm_register_write(&svm->vcpu, reg, val); } skip_emulated_instruction(&svm->vcpu); @@ -4123,6 +4127,11 @@ static bool svm_mpx_supported(void) return false; } +static bool svm_xsaves_supported(void) +{ + return false; +} + static bool svm_has_wbinvd_exit(void) { return true; @@ -4410,6 +4419,7 @@ static struct kvm_x86_ops svm_x86_ops = { .rdtscp_supported = svm_rdtscp_supported, .invpcid_supported = svm_invpcid_supported, .mpx_supported = svm_mpx_supported, + .xsaves_supported = svm_xsaves_supported, .set_supported_cpuid = svm_set_supported_cpuid, diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index 6b06ab8748d..c2a34bb5ad9 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h @@ -5,6 +5,7 @@ #include <asm/vmx.h> #include <asm/svm.h> #include <asm/clocksource.h> +#include <asm/pvclock-abi.h> #undef TRACE_SYSTEM #define TRACE_SYSTEM kvm @@ -877,6 +878,42 @@ TRACE_EVENT(kvm_ple_window, #define trace_kvm_ple_window_shrink(vcpu_id, new, old) \ trace_kvm_ple_window(false, vcpu_id, new, old) +TRACE_EVENT(kvm_pvclock_update, + TP_PROTO(unsigned int vcpu_id, struct pvclock_vcpu_time_info *pvclock), + TP_ARGS(vcpu_id, pvclock), + + TP_STRUCT__entry( + __field( unsigned int, vcpu_id ) + __field( __u32, version ) + __field( __u64, tsc_timestamp ) + __field( __u64, system_time ) + __field( __u32, tsc_to_system_mul ) + __field( __s8, tsc_shift ) + __field( __u8, flags ) + ), + + TP_fast_assign( + __entry->vcpu_id = vcpu_id; + __entry->version = pvclock->version; + __entry->tsc_timestamp = pvclock->tsc_timestamp; + __entry->system_time = pvclock->system_time; + __entry->tsc_to_system_mul = pvclock->tsc_to_system_mul; + __entry->tsc_shift = pvclock->tsc_shift; + __entry->flags = pvclock->flags; + ), + + TP_printk("vcpu_id %u, pvclock { version %u, tsc_timestamp 0x%llx, " + "system_time 0x%llx, tsc_to_system_mul 0x%x, tsc_shift %d, " + "flags 0x%x }", + __entry->vcpu_id, + __entry->version, + __entry->tsc_timestamp, + __entry->system_time, + __entry->tsc_to_system_mul, + __entry->tsc_shift, + __entry->flags) +); + #endif /* _TRACE_KVM_H */ #undef TRACE_INCLUDE_PATH diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 3e556c68351..feb852b0459 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -99,13 +99,15 @@ module_param_named(enable_shadow_vmcs, enable_shadow_vmcs, bool, S_IRUGO); static bool __read_mostly nested = 0; module_param(nested, bool, S_IRUGO); +static u64 __read_mostly host_xss; + #define KVM_GUEST_CR0_MASK (X86_CR0_NW | X86_CR0_CD) #define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST (X86_CR0_WP | X86_CR0_NE) #define KVM_VM_CR0_ALWAYS_ON \ (KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE) #define KVM_CR4_GUEST_OWNED_BITS \ (X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR \ - | X86_CR4_OSXMMEXCPT) + | X86_CR4_OSXMMEXCPT | X86_CR4_TSD) #define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE) #define KVM_RMODE_VM_CR4_ALWAYS_ON (X86_CR4_VME | X86_CR4_PAE | X86_CR4_VMXE) @@ -214,6 +216,7 @@ struct __packed vmcs12 { u64 virtual_apic_page_addr; u64 apic_access_addr; u64 ept_pointer; + u64 xss_exit_bitmap; u64 guest_physical_address; u64 vmcs_link_pointer; u64 guest_ia32_debugctl; @@ -616,6 +619,7 @@ static const unsigned short vmcs_field_to_offset_table[] = { FIELD64(VIRTUAL_APIC_PAGE_ADDR, virtual_apic_page_addr), FIELD64(APIC_ACCESS_ADDR, apic_access_addr), FIELD64(EPT_POINTER, ept_pointer), + FIELD64(XSS_EXIT_BITMAP, xss_exit_bitmap), FIELD64(GUEST_PHYSICAL_ADDRESS, guest_physical_address), FIELD64(VMCS_LINK_POINTER, vmcs_link_pointer), FIELD64(GUEST_IA32_DEBUGCTL, guest_ia32_debugctl), @@ -720,12 +724,15 @@ static const unsigned short vmcs_field_to_offset_table[] = { FIELD(HOST_RSP, host_rsp), FIELD(HOST_RIP, host_rip), }; -static const int max_vmcs_field = ARRAY_SIZE(vmcs_field_to_offset_table); static inline short vmcs_field_to_offset(unsigned long field) { - if (field >= max_vmcs_field || vmcs_field_to_offset_table[field] == 0) - return -1; + BUILD_BUG_ON(ARRAY_SIZE(vmcs_field_to_offset_table) > SHRT_MAX); + + if (field >= ARRAY_SIZE(vmcs_field_to_offset_table) || + vmcs_field_to_offset_table[field] == 0) + return -ENOENT; + return vmcs_field_to_offset_table[field]; } @@ -758,6 +765,7 @@ static u64 construct_eptp(unsigned long root_hpa); static void kvm_cpu_vmxon(u64 addr); static void kvm_cpu_vmxoff(void); static bool vmx_mpx_supported(void); +static bool vmx_xsaves_supported(void); static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr); static void vmx_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); @@ -1098,6 +1106,12 @@ static inline int nested_cpu_has_ept(struct vmcs12 *vmcs12) return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_EPT); } +static inline bool nested_cpu_has_xsaves(struct vmcs12 *vmcs12) +{ + return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES) && + vmx_xsaves_supported(); +} + static inline bool is_exception(u32 intr_info) { return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK)) @@ -1659,12 +1673,20 @@ static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset) vmx->guest_msrs[efer_offset].mask = ~ignore_bits; clear_atomic_switch_msr(vmx, MSR_EFER); - /* On ept, can't emulate nx, and must switch nx atomically */ - if (enable_ept && ((vmx->vcpu.arch.efer ^ host_efer) & EFER_NX)) { + + /* + * On EPT, we can't emulate NX, so we must switch EFER atomically. + * On CPUs that support "load IA32_EFER", always switch EFER + * atomically, since it's faster than switching it manually. + */ + if (cpu_has_load_ia32_efer || + (enable_ept && ((vmx->vcpu.arch.efer ^ host_efer) & EFER_NX))) { guest_efer = vmx->vcpu.arch.efer; if (!(guest_efer & EFER_LMA)) guest_efer &= ~EFER_LME; - add_atomic_switch_msr(vmx, MSR_EFER, guest_efer, host_efer); + if (guest_efer != host_efer) + add_atomic_switch_msr(vmx, MSR_EFER, + guest_efer, host_efer); return false; } @@ -2377,12 +2399,13 @@ static __init void nested_vmx_setup_ctls_msrs(void) nested_vmx_secondary_ctls_low = 0; nested_vmx_secondary_ctls_high &= SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | - SECONDARY_EXEC_UNRESTRICTED_GUEST | - SECONDARY_EXEC_WBINVD_EXITING; + SECONDARY_EXEC_WBINVD_EXITING | + SECONDARY_EXEC_XSAVES; if (enable_ept) { /* nested EPT: emulate EPT also to L1 */ - nested_vmx_secondary_ctls_high |= SECONDARY_EXEC_ENABLE_EPT; + nested_vmx_secondary_ctls_high |= SECONDARY_EXEC_ENABLE_EPT | + SECONDARY_EXEC_UNRESTRICTED_GUEST; nested_vmx_ept_caps = VMX_EPT_PAGE_WALK_4_BIT | VMX_EPTP_WB_BIT | VMX_EPT_2MB_PAGE_BIT | VMX_EPT_INVEPT_BIT; @@ -2558,6 +2581,11 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) if (!nested_vmx_allowed(vcpu)) return 1; return vmx_get_vmx_msr(vcpu, msr_index, pdata); + case MSR_IA32_XSS: + if (!vmx_xsaves_supported()) + return 1; + data = vcpu->arch.ia32_xss; + break; case MSR_TSC_AUX: if (!to_vmx(vcpu)->rdtscp_enabled) return 1; @@ -2649,6 +2677,22 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) break; case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC: return 1; /* they are read-only */ + case MSR_IA32_XSS: + if (!vmx_xsaves_supported()) + return 1; + /* + * The only supported bit as of Skylake is bit 8, but + * it is not supported on KVM. + */ + if (data != 0) + return 1; + vcpu->arch.ia32_xss = data; + if (vcpu->arch.ia32_xss != host_xss) + add_atomic_switch_msr(vmx, MSR_IA32_XSS, + vcpu->arch.ia32_xss, host_xss); + else + clear_atomic_switch_msr(vmx, MSR_IA32_XSS); + break; case MSR_TSC_AUX: if (!vmx->rdtscp_enabled) return 1; @@ -2884,7 +2928,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) SECONDARY_EXEC_ENABLE_INVPCID | SECONDARY_EXEC_APIC_REGISTER_VIRT | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | - SECONDARY_EXEC_SHADOW_VMCS; + SECONDARY_EXEC_SHADOW_VMCS | + SECONDARY_EXEC_XSAVES; if (adjust_vmx_controls(min2, opt2, MSR_IA32_VMX_PROCBASED_CTLS2, &_cpu_based_2nd_exec_control) < 0) @@ -3007,6 +3052,9 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) } } + if (cpu_has_xsaves) + rdmsrl(MSR_IA32_XSS, host_xss); + return 0; } @@ -3110,76 +3158,6 @@ static __init int alloc_kvm_area(void) return 0; } -static __init int hardware_setup(void) -{ - if (setup_vmcs_config(&vmcs_config) < 0) - return -EIO; - - if (boot_cpu_has(X86_FEATURE_NX)) - kvm_enable_efer_bits(EFER_NX); - - if (!cpu_has_vmx_vpid()) - enable_vpid = 0; - if (!cpu_has_vmx_shadow_vmcs()) - enable_shadow_vmcs = 0; - if (enable_shadow_vmcs) - init_vmcs_shadow_fields(); - - if (!cpu_has_vmx_ept() || - !cpu_has_vmx_ept_4levels()) { - enable_ept = 0; - enable_unrestricted_guest = 0; - enable_ept_ad_bits = 0; - } - - if (!cpu_has_vmx_ept_ad_bits()) - enable_ept_ad_bits = 0; - - if (!cpu_has_vmx_unrestricted_guest()) - enable_unrestricted_guest = 0; - - if (!cpu_has_vmx_flexpriority()) { - flexpriority_enabled = 0; - - /* - * set_apic_access_page_addr() is used to reload apic access - * page upon invalidation. No need to do anything if the - * processor does not have the APIC_ACCESS_ADDR VMCS field. - */ - kvm_x86_ops->set_apic_access_page_addr = NULL; - } - - if (!cpu_has_vmx_tpr_shadow()) - kvm_x86_ops->update_cr8_intercept = NULL; - - if (enable_ept && !cpu_has_vmx_ept_2m_page()) - kvm_disable_largepages(); - - if (!cpu_has_vmx_ple()) - ple_gap = 0; - - if (!cpu_has_vmx_apicv()) - enable_apicv = 0; - - if (enable_apicv) - kvm_x86_ops->update_cr8_intercept = NULL; - else { - kvm_x86_ops->hwapic_irr_update = NULL; - kvm_x86_ops->deliver_posted_interrupt = NULL; - kvm_x86_ops->sync_pir_to_irr = vmx_sync_pir_to_irr_dummy; - } - - if (nested) - nested_vmx_setup_ctls_msrs(); - - return alloc_kvm_area(); -} - -static __exit void hardware_unsetup(void) -{ - free_kvm_area(); -} - static bool emulation_required(struct kvm_vcpu *vcpu) { return emulate_invalid_guest_state && !guest_state_valid(vcpu); @@ -4396,6 +4374,7 @@ static void ept_set_mmio_spte_mask(void) kvm_mmu_set_mmio_spte_mask((0x3ull << 62) | 0x6ull); } +#define VMX_XSS_EXIT_BITMAP 0 /* * Sets up the vmcs for emulated real mode. */ @@ -4505,6 +4484,9 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) vmcs_writel(CR0_GUEST_HOST_MASK, ~0UL); set_cr4_guest_host_mask(vmx); + if (vmx_xsaves_supported()) + vmcs_write64(XSS_EXIT_BITMAP, VMX_XSS_EXIT_BITMAP); + return 0; } @@ -5163,13 +5145,20 @@ static int handle_cr(struct kvm_vcpu *vcpu) static int handle_dr(struct kvm_vcpu *vcpu) { unsigned long exit_qualification; - int dr, reg; + int dr, dr7, reg; + + exit_qualification = vmcs_readl(EXIT_QUALIFICATION); + dr = exit_qualification & DEBUG_REG_ACCESS_NUM; + + /* First, if DR does not exist, trigger UD */ + if (!kvm_require_dr(vcpu, dr)) + return 1; /* Do not handle if the CPL > 0, will trigger GP on re-entry */ if (!kvm_require_cpl(vcpu, 0)) return 1; - dr = vmcs_readl(GUEST_DR7); - if (dr & DR7_GD) { + dr7 = vmcs_readl(GUEST_DR7); + if (dr7 & DR7_GD) { /* * As the vm-exit takes precedence over the debug trap, we * need to emulate the latter, either for the host or the @@ -5177,17 +5166,14 @@ static int handle_dr(struct kvm_vcpu *vcpu) */ if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) { vcpu->run->debug.arch.dr6 = vcpu->arch.dr6; - vcpu->run->debug.arch.dr7 = dr; - vcpu->run->debug.arch.pc = - vmcs_readl(GUEST_CS_BASE) + - vmcs_readl(GUEST_RIP); + vcpu->run->debug.arch.dr7 = dr7; + vcpu->run->debug.arch.pc = kvm_get_linear_rip(vcpu); vcpu->run->debug.arch.exception = DB_VECTOR; vcpu->run->exit_reason = KVM_EXIT_DEBUG; return 0; } else { - vcpu->arch.dr7 &= ~DR7_GD; + vcpu->arch.dr6 &= ~15; vcpu->arch.dr6 |= DR6_BD | DR6_RTM; - vmcs_writel(GUEST_DR7, vcpu->arch.dr7); kvm_queue_exception(vcpu, DB_VECTOR); return 1; } @@ -5209,8 +5195,6 @@ static int handle_dr(struct kvm_vcpu *vcpu) return 1; } - exit_qualification = vmcs_readl(EXIT_QUALIFICATION); - dr = exit_qualification & DEBUG_REG_ACCESS_NUM; reg = DEBUG_REG_ACCESS_REG(exit_qualification); if (exit_qualification & TYPE_MOV_FROM_DR) { unsigned long val; @@ -5391,6 +5375,20 @@ static int handle_xsetbv(struct kvm_vcpu *vcpu) return 1; } +static int handle_xsaves(struct kvm_vcpu *vcpu) +{ + skip_emulated_instruction(vcpu); + WARN(1, "this should never happen\n"); + return 1; +} + +static int handle_xrstors(struct kvm_vcpu *vcpu) +{ + skip_emulated_instruction(vcpu); + WARN(1, "this should never happen\n"); + return 1; +} + static int handle_apic_access(struct kvm_vcpu *vcpu) { if (likely(fasteoi)) { @@ -5492,7 +5490,7 @@ static int handle_task_switch(struct kvm_vcpu *vcpu) } /* clear all local breakpoint enable flags */ - vmcs_writel(GUEST_DR7, vmcs_readl(GUEST_DR7) & ~0x55); + vmcs_writel(GUEST_DR7, vmcs_readl(GUEST_DR7) & ~0x155); /* * TODO: What about debug traps on tss switch? @@ -5539,11 +5537,11 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu) trace_kvm_page_fault(gpa, exit_qualification); /* It is a write fault? */ - error_code = exit_qualification & (1U << 1); + error_code = exit_qualification & PFERR_WRITE_MASK; /* It is a fetch fault? */ - error_code |= (exit_qualification & (1U << 2)) << 2; + error_code |= (exit_qualification << 2) & PFERR_FETCH_MASK; /* ept page table is present? */ - error_code |= (exit_qualification >> 3) & 0x1; + error_code |= (exit_qualification >> 3) & PFERR_PRESENT_MASK; vcpu->arch.exit_qualification = exit_qualification; @@ -5785,6 +5783,204 @@ static void update_ple_window_actual_max(void) ple_window_grow, INT_MIN); } +static __init int hardware_setup(void) +{ + int r = -ENOMEM, i, msr; + + rdmsrl_safe(MSR_EFER, &host_efer); + + for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i) + kvm_define_shared_msr(i, vmx_msr_index[i]); + + vmx_io_bitmap_a = (unsigned long *)__get_free_page(GFP_KERNEL); + if (!vmx_io_bitmap_a) + return r; + + vmx_io_bitmap_b = (unsigned long *)__get_free_page(GFP_KERNEL); + if (!vmx_io_bitmap_b) + goto out; + + vmx_msr_bitmap_legacy = (unsigned long *)__get_free_page(GFP_KERNEL); + if (!vmx_msr_bitmap_legacy) + goto out1; + + vmx_msr_bitmap_legacy_x2apic = + (unsigned long *)__get_free_page(GFP_KERNEL); + if (!vmx_msr_bitmap_legacy_x2apic) + goto out2; + + vmx_msr_bitmap_longmode = (unsigned long *)__get_free_page(GFP_KERNEL); + if (!vmx_msr_bitmap_longmode) + goto out3; + + vmx_msr_bitmap_longmode_x2apic = + (unsigned long *)__get_free_page(GFP_KERNEL); + if (!vmx_msr_bitmap_longmode_x2apic) + goto out4; + vmx_vmread_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL); + if (!vmx_vmread_bitmap) + goto out5; + + vmx_vmwrite_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL); + if (!vmx_vmwrite_bitmap) + goto out6; + + memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE); + memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE); + + /* + * Allow direct access to the PC debug port (it is often used for I/O + * delays, but the vmexits simply slow things down). + */ + memset(vmx_io_bitmap_a, 0xff, PAGE_SIZE); + clear_bit(0x80, vmx_io_bitmap_a); + + memset(vmx_io_bitmap_b, 0xff, PAGE_SIZE); + + memset(vmx_msr_bitmap_legacy, 0xff, PAGE_SIZE); + memset(vmx_msr_bitmap_longmode, 0xff, PAGE_SIZE); + + vmx_disable_intercept_for_msr(MSR_FS_BASE, false); + vmx_disable_intercept_for_msr(MSR_GS_BASE, false); + vmx_disable_intercept_for_msr(MSR_KERNEL_GS_BASE, true); + vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false); + vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false); + vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false); + vmx_disable_intercept_for_msr(MSR_IA32_BNDCFGS, true); + + memcpy(vmx_msr_bitmap_legacy_x2apic, + vmx_msr_bitmap_legacy, PAGE_SIZE); + memcpy(vmx_msr_bitmap_longmode_x2apic, + vmx_msr_bitmap_longmode, PAGE_SIZE); + + if (enable_apicv) { + for (msr = 0x800; msr <= 0x8ff; msr++) + vmx_disable_intercept_msr_read_x2apic(msr); + + /* According SDM, in x2apic mode, the whole id reg is used. + * But in KVM, it only use the highest eight bits. Need to + * intercept it */ + vmx_enable_intercept_msr_read_x2apic(0x802); + /* TMCCT */ + vmx_enable_intercept_msr_read_x2apic(0x839); + /* TPR */ + vmx_disable_intercept_msr_write_x2apic(0x808); + /* EOI */ + vmx_disable_intercept_msr_write_x2apic(0x80b); + /* SELF-IPI */ + vmx_disable_intercept_msr_write_x2apic(0x83f); + } + + if (enable_ept) { + kvm_mmu_set_mask_ptes(0ull, + (enable_ept_ad_bits) ? VMX_EPT_ACCESS_BIT : 0ull, + (enable_ept_ad_bits) ? VMX_EPT_DIRTY_BIT : 0ull, + 0ull, VMX_EPT_EXECUTABLE_MASK); + ept_set_mmio_spte_mask(); + kvm_enable_tdp(); + } else + kvm_disable_tdp(); + + update_ple_window_actual_max(); + + if (setup_vmcs_config(&vmcs_config) < 0) { + r = -EIO; + goto out7; + } + + if (boot_cpu_has(X86_FEATURE_NX)) + kvm_enable_efer_bits(EFER_NX); + + if (!cpu_has_vmx_vpid()) + enable_vpid = 0; + if (!cpu_has_vmx_shadow_vmcs()) + enable_shadow_vmcs = 0; + if (enable_shadow_vmcs) + init_vmcs_shadow_fields(); + + if (!cpu_has_vmx_ept() || + !cpu_has_vmx_ept_4levels()) { + enable_ept = 0; + enable_unrestricted_guest = 0; + enable_ept_ad_bits = 0; + } + + if (!cpu_has_vmx_ept_ad_bits()) + enable_ept_ad_bits = 0; + + if (!cpu_has_vmx_unrestricted_guest()) + enable_unrestricted_guest = 0; + + if (!cpu_has_vmx_flexpriority()) { + flexpriority_enabled = 0; + + /* + * set_apic_access_page_addr() is used to reload apic access + * page upon invalidation. No need to do anything if the + * processor does not have the APIC_ACCESS_ADDR VMCS field. + */ + kvm_x86_ops->set_apic_access_page_addr = NULL; + } + + if (!cpu_has_vmx_tpr_shadow()) + kvm_x86_ops->update_cr8_intercept = NULL; + + if (enable_ept && !cpu_has_vmx_ept_2m_page()) + kvm_disable_largepages(); + + if (!cpu_has_vmx_ple()) + ple_gap = 0; + + if (!cpu_has_vmx_apicv()) + enable_apicv = 0; + + if (enable_apicv) + kvm_x86_ops->update_cr8_intercept = NULL; + else { + kvm_x86_ops->hwapic_irr_update = NULL; + kvm_x86_ops->deliver_posted_interrupt = NULL; + kvm_x86_ops->sync_pir_to_irr = vmx_sync_pir_to_irr_dummy; + } + + if (nested) + nested_vmx_setup_ctls_msrs(); + + return alloc_kvm_area(); + +out7: + free_page((unsigned long)vmx_vmwrite_bitmap); +out6: + free_page((unsigned long)vmx_vmread_bitmap); +out5: + free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic); +out4: + free_page((unsigned long)vmx_msr_bitmap_longmode); +out3: + free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic); +out2: + free_page((unsigned long)vmx_msr_bitmap_legacy); +out1: + free_page((unsigned long)vmx_io_bitmap_b); +out: + free_page((unsigned long)vmx_io_bitmap_a); + + return r; +} + +static __exit void hardware_unsetup(void) +{ + free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic); + free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic); + free_page((unsigned long)vmx_msr_bitmap_legacy); + free_page((unsigned long)vmx_msr_bitmap_longmode); + free_page((unsigned long)vmx_io_bitmap_b); + free_page((unsigned long)vmx_io_bitmap_a); + free_page((unsigned long)vmx_vmwrite_bitmap); + free_page((unsigned long)vmx_vmread_bitmap); + + free_kvm_area(); +} + /* * Indicate a busy-waiting vcpu in spinlock. We do not enable the PAUSE * exiting, so only get here on cpu with PAUSE-Loop-Exiting. @@ -6361,58 +6557,60 @@ static inline int vmcs_field_readonly(unsigned long field) * some of the bits we return here (e.g., on 32-bit guests, only 32 bits of * 64-bit fields are to be returned). */ -static inline bool vmcs12_read_any(struct kvm_vcpu *vcpu, - unsigned long field, u64 *ret) +static inline int vmcs12_read_any(struct kvm_vcpu *vcpu, + unsigned long field, u64 *ret) { short offset = vmcs_field_to_offset(field); char *p; if (offset < 0) - return 0; + return offset; p = ((char *)(get_vmcs12(vcpu))) + offset; switch (vmcs_field_type(field)) { case VMCS_FIELD_TYPE_NATURAL_WIDTH: *ret = *((natural_width *)p); - return 1; + return 0; case VMCS_FIELD_TYPE_U16: *ret = *((u16 *)p); - return 1; + return 0; case VMCS_FIELD_TYPE_U32: *ret = *((u32 *)p); - return 1; + return 0; case VMCS_FIELD_TYPE_U64: *ret = *((u64 *)p); - return 1; + return 0; default: - return 0; /* can never happen. */ + WARN_ON(1); + return -ENOENT; } } -static inline bool vmcs12_write_any(struct kvm_vcpu *vcpu, - unsigned long field, u64 field_value){ +static inline int vmcs12_write_any(struct kvm_vcpu *vcpu, + unsigned long field, u64 field_value){ short offset = vmcs_field_to_offset(field); char *p = ((char *) get_vmcs12(vcpu)) + offset; if (offset < 0) - return false; + return offset; switch (vmcs_field_type(field)) { case VMCS_FIELD_TYPE_U16: *(u16 *)p = field_value; - return true; + return 0; case VMCS_FIELD_TYPE_U32: *(u32 *)p = field_value; - return true; + return 0; case VMCS_FIELD_TYPE_U64: *(u64 *)p = field_value; - return true; + return 0; case VMCS_FIELD_TYPE_NATURAL_WIDTH: *(natural_width *)p = field_value; - return true; + return 0; default: - return false; /* can never happen. */ + WARN_ON(1); + return -ENOENT; } } @@ -6445,6 +6643,9 @@ static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx) case VMCS_FIELD_TYPE_NATURAL_WIDTH: field_value = vmcs_readl(field); break; + default: + WARN_ON(1); + continue; } vmcs12_write_any(&vmx->vcpu, field, field_value); } @@ -6490,6 +6691,9 @@ static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx) case VMCS_FIELD_TYPE_NATURAL_WIDTH: vmcs_writel(field, (long)field_value); break; + default: + WARN_ON(1); + break; } } } @@ -6528,7 +6732,7 @@ static int handle_vmread(struct kvm_vcpu *vcpu) /* Decode instruction info and find the field to read */ field = kvm_register_readl(vcpu, (((vmx_instruction_info) >> 28) & 0xf)); /* Read the field, zero-extended to a u64 field_value */ - if (!vmcs12_read_any(vcpu, field, &field_value)) { + if (vmcs12_read_any(vcpu, field, &field_value) < 0) { nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT); skip_emulated_instruction(vcpu); return 1; @@ -6598,7 +6802,7 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu) return 1; } - if (!vmcs12_write_any(vcpu, field, field_value)) { + if (vmcs12_write_any(vcpu, field, field_value) < 0) { nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT); skip_emulated_instruction(vcpu); return 1; @@ -6802,6 +7006,8 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { [EXIT_REASON_MONITOR_INSTRUCTION] = handle_monitor, [EXIT_REASON_INVEPT] = handle_invept, [EXIT_REASON_INVVPID] = handle_invvpid, + [EXIT_REASON_XSAVES] = handle_xsaves, + [EXIT_REASON_XRSTORS] = handle_xrstors, }; static const int kvm_vmx_max_exit_handlers = @@ -7089,6 +7295,14 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) return nested_cpu_has2(vmcs12, SECONDARY_EXEC_WBINVD_EXITING); case EXIT_REASON_XSETBV: return 1; + case EXIT_REASON_XSAVES: case EXIT_REASON_XRSTORS: + /* + * This should never happen, since it is not possible to + * set XSS to a non-zero value---neither in L1 nor in L2. + * If if it were, XSS would have to be checked against + * the XSS exit bitmap in vmcs12. + */ + return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES); default: return 1; } @@ -7277,6 +7491,9 @@ static void vmx_set_rvi(int vector) u16 status; u8 old; + if (vector == -1) + vector = 0; + status = vmcs_read16(GUEST_INTR_STATUS); old = (u8)status & 0xff; if ((u8)vector != old) { @@ -7288,22 +7505,23 @@ static void vmx_set_rvi(int vector) static void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr) { + if (!is_guest_mode(vcpu)) { + vmx_set_rvi(max_irr); + return; + } + if (max_irr == -1) return; /* - * If a vmexit is needed, vmx_check_nested_events handles it. + * In guest mode. If a vmexit is needed, vmx_check_nested_events + * handles it. */ - if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu)) + if (nested_exit_on_intr(vcpu)) return; - if (!is_guest_mode(vcpu)) { - vmx_set_rvi(max_irr); - return; - } - /* - * Fall back to pre-APICv interrupt injection since L2 + * Else, fall back to pre-APICv interrupt injection since L2 * is run without virtual interrupt delivery. */ if (!kvm_event_needs_reinjection(vcpu) && @@ -7400,6 +7618,12 @@ static bool vmx_mpx_supported(void) (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_BNDCFGS); } +static bool vmx_xsaves_supported(void) +{ + return vmcs_config.cpu_based_2nd_exec_ctrl & + SECONDARY_EXEC_XSAVES; +} + static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx) { u32 exit_intr_info; @@ -8135,6 +8359,8 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) vmcs_writel(GUEST_SYSENTER_ESP, vmcs12->guest_sysenter_esp); vmcs_writel(GUEST_SYSENTER_EIP, vmcs12->guest_sysenter_eip); + if (nested_cpu_has_xsaves(vmcs12)) + vmcs_write64(XSS_EXIT_BITMAP, vmcs12->xss_exit_bitmap); vmcs_write64(VMCS_LINK_POINTER, -1ull); exec_control = vmcs12->pin_based_vm_exec_control; @@ -8775,6 +9001,8 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, vmcs12->guest_sysenter_eip = vmcs_readl(GUEST_SYSENTER_EIP); if (vmx_mpx_supported()) vmcs12->guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS); + if (nested_cpu_has_xsaves(vmcs12)) + vmcs12->xss_exit_bitmap = vmcs_read64(XSS_EXIT_BITMAP); /* update exit information fields: */ @@ -9176,6 +9404,7 @@ static struct kvm_x86_ops vmx_x86_ops = { .check_intercept = vmx_check_intercept, .handle_external_intr = vmx_handle_external_intr, .mpx_supported = vmx_mpx_supported, + .xsaves_supported = vmx_xsaves_supported, .check_nested_events = vmx_check_nested_events, @@ -9184,150 +9413,21 @@ static struct kvm_x86_ops vmx_x86_ops = { static int __init vmx_init(void) { - int r, i, msr; - - rdmsrl_safe(MSR_EFER, &host_efer); - - for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i) - kvm_define_shared_msr(i, vmx_msr_index[i]); - - vmx_io_bitmap_a = (unsigned long *)__get_free_page(GFP_KERNEL); - if (!vmx_io_bitmap_a) - return -ENOMEM; - - r = -ENOMEM; - - vmx_io_bitmap_b = (unsigned long *)__get_free_page(GFP_KERNEL); - if (!vmx_io_bitmap_b) - goto out; - - vmx_msr_bitmap_legacy = (unsigned long *)__get_free_page(GFP_KERNEL); - if (!vmx_msr_bitmap_legacy) - goto out1; - - vmx_msr_bitmap_legacy_x2apic = - (unsigned long *)__get_free_page(GFP_KERNEL); - if (!vmx_msr_bitmap_legacy_x2apic) - goto out2; - - vmx_msr_bitmap_longmode = (unsigned long *)__get_free_page(GFP_KERNEL); - if (!vmx_msr_bitmap_longmode) - goto out3; - - vmx_msr_bitmap_longmode_x2apic = - (unsigned long *)__get_free_page(GFP_KERNEL); - if (!vmx_msr_bitmap_longmode_x2apic) - goto out4; - vmx_vmread_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL); - if (!vmx_vmread_bitmap) - goto out5; - - vmx_vmwrite_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL); - if (!vmx_vmwrite_bitmap) - goto out6; - - memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE); - memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE); - - /* - * Allow direct access to the PC debug port (it is often used for I/O - * delays, but the vmexits simply slow things down). - */ - memset(vmx_io_bitmap_a, 0xff, PAGE_SIZE); - clear_bit(0x80, vmx_io_bitmap_a); - - memset(vmx_io_bitmap_b, 0xff, PAGE_SIZE); - - memset(vmx_msr_bitmap_legacy, 0xff, PAGE_SIZE); - memset(vmx_msr_bitmap_longmode, 0xff, PAGE_SIZE); - - set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */ - - r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx), - __alignof__(struct vcpu_vmx), THIS_MODULE); + int r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx), + __alignof__(struct vcpu_vmx), THIS_MODULE); if (r) - goto out7; + return r; #ifdef CONFIG_KEXEC rcu_assign_pointer(crash_vmclear_loaded_vmcss, crash_vmclear_local_loaded_vmcss); #endif - vmx_disable_intercept_for_msr(MSR_FS_BASE, false); - vmx_disable_intercept_for_msr(MSR_GS_BASE, false); - vmx_disable_intercept_for_msr(MSR_KERNEL_GS_BASE, true); - vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false); - vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false); - vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false); - vmx_disable_intercept_for_msr(MSR_IA32_BNDCFGS, true); - - memcpy(vmx_msr_bitmap_legacy_x2apic, - vmx_msr_bitmap_legacy, PAGE_SIZE); - memcpy(vmx_msr_bitmap_longmode_x2apic, - vmx_msr_bitmap_longmode, PAGE_SIZE); - - if (enable_apicv) { - for (msr = 0x800; msr <= 0x8ff; msr++) - vmx_disable_intercept_msr_read_x2apic(msr); - - /* According SDM, in x2apic mode, the whole id reg is used. - * But in KVM, it only use the highest eight bits. Need to - * intercept it */ - vmx_enable_intercept_msr_read_x2apic(0x802); - /* TMCCT */ - vmx_enable_intercept_msr_read_x2apic(0x839); - /* TPR */ - vmx_disable_intercept_msr_write_x2apic(0x808); - /* EOI */ - vmx_disable_intercept_msr_write_x2apic(0x80b); - /* SELF-IPI */ - vmx_disable_intercept_msr_write_x2apic(0x83f); - } - - if (enable_ept) { - kvm_mmu_set_mask_ptes(0ull, - (enable_ept_ad_bits) ? VMX_EPT_ACCESS_BIT : 0ull, - (enable_ept_ad_bits) ? VMX_EPT_DIRTY_BIT : 0ull, - 0ull, VMX_EPT_EXECUTABLE_MASK); - ept_set_mmio_spte_mask(); - kvm_enable_tdp(); - } else - kvm_disable_tdp(); - - update_ple_window_actual_max(); - return 0; - -out7: - free_page((unsigned long)vmx_vmwrite_bitmap); -out6: - free_page((unsigned long)vmx_vmread_bitmap); -out5: - free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic); -out4: - free_page((unsigned long)vmx_msr_bitmap_longmode); -out3: - free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic); -out2: - free_page((unsigned long)vmx_msr_bitmap_legacy); -out1: - free_page((unsigned long)vmx_io_bitmap_b); -out: - free_page((unsigned long)vmx_io_bitmap_a); - return r; } static void __exit vmx_exit(void) { - free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic); - free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic); - free_page((unsigned long)vmx_msr_bitmap_legacy); - free_page((unsigned long)vmx_msr_bitmap_longmode); - free_page((unsigned long)vmx_io_bitmap_b); - free_page((unsigned long)vmx_io_bitmap_a); - free_page((unsigned long)vmx_vmwrite_bitmap); - free_page((unsigned long)vmx_vmread_bitmap); - #ifdef CONFIG_KEXEC RCU_INIT_POINTER(crash_vmclear_loaded_vmcss, NULL); synchronize_rcu(); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0033df32a74..c259814200b 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -27,6 +27,7 @@ #include "kvm_cache_regs.h" #include "x86.h" #include "cpuid.h" +#include "assigned-dev.h" #include <linux/clocksource.h> #include <linux/interrupt.h> @@ -353,6 +354,8 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu, if (!vcpu->arch.exception.pending) { queue: + if (has_error && !is_protmode(vcpu)) + has_error = false; vcpu->arch.exception.pending = true; vcpu->arch.exception.has_error_code = has_error; vcpu->arch.exception.nr = nr; @@ -455,6 +458,16 @@ bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl) } EXPORT_SYMBOL_GPL(kvm_require_cpl); +bool kvm_require_dr(struct kvm_vcpu *vcpu, int dr) +{ + if ((dr != 4 && dr != 5) || !kvm_read_cr4_bits(vcpu, X86_CR4_DE)) + return true; + + kvm_queue_exception(vcpu, UD_VECTOR); + return false; +} +EXPORT_SYMBOL_GPL(kvm_require_dr); + /* * This function will be used to read from the physical memory of the currently * running guest. The difference to kvm_read_guest_page is that this function @@ -656,6 +669,12 @@ int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr) if ((!(xcr0 & XSTATE_BNDREGS)) != (!(xcr0 & XSTATE_BNDCSR))) return 1; + if (xcr0 & XSTATE_AVX512) { + if (!(xcr0 & XSTATE_YMM)) + return 1; + if ((xcr0 & XSTATE_AVX512) != XSTATE_AVX512) + return 1; + } kvm_put_guest_xcr0(vcpu); vcpu->arch.xcr0 = xcr0; @@ -732,6 +751,10 @@ EXPORT_SYMBOL_GPL(kvm_set_cr4); int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) { +#ifdef CONFIG_X86_64 + cr3 &= ~CR3_PCID_INVD; +#endif + if (cr3 == kvm_read_cr3(vcpu) && !pdptrs_changed(vcpu)) { kvm_mmu_sync_roots(vcpu); kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); @@ -811,8 +834,6 @@ static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val) vcpu->arch.eff_db[dr] = val; break; case 4: - if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) - return 1; /* #UD */ /* fall through */ case 6: if (val & 0xffffffff00000000ULL) @@ -821,8 +842,6 @@ static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val) kvm_update_dr6(vcpu); break; case 5: - if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) - return 1; /* #UD */ /* fall through */ default: /* 7 */ if (val & 0xffffffff00000000ULL) @@ -837,27 +856,21 @@ static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val) int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val) { - int res; - - res = __kvm_set_dr(vcpu, dr, val); - if (res > 0) - kvm_queue_exception(vcpu, UD_VECTOR); - else if (res < 0) + if (__kvm_set_dr(vcpu, dr, val)) { kvm_inject_gp(vcpu, 0); - - return res; + return 1; + } + return 0; } EXPORT_SYMBOL_GPL(kvm_set_dr); -static int _kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val) +int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val) { switch (dr) { case 0 ... 3: *val = vcpu->arch.db[dr]; break; case 4: - if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) - return 1; /* fall through */ case 6: if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) @@ -866,23 +879,11 @@ static int _kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val) *val = kvm_x86_ops->get_dr6(vcpu); break; case 5: - if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) - return 1; /* fall through */ default: /* 7 */ *val = vcpu->arch.dr7; break; } - - return 0; -} - -int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val) -{ - if (_kvm_get_dr(vcpu, dr, val)) { - kvm_queue_exception(vcpu, UD_VECTOR); - return 1; - } return 0; } EXPORT_SYMBOL_GPL(kvm_get_dr); @@ -1237,21 +1238,22 @@ void kvm_track_tsc_matching(struct kvm_vcpu *vcpu) { #ifdef CONFIG_X86_64 bool vcpus_matched; - bool do_request = false; struct kvm_arch *ka = &vcpu->kvm->arch; struct pvclock_gtod_data *gtod = &pvclock_gtod_data; vcpus_matched = (ka->nr_vcpus_matched_tsc + 1 == atomic_read(&vcpu->kvm->online_vcpus)); - if (vcpus_matched && gtod->clock.vclock_mode == VCLOCK_TSC) - if (!ka->use_master_clock) - do_request = 1; - - if (!vcpus_matched && ka->use_master_clock) - do_request = 1; - - if (do_request) + /* + * Once the masterclock is enabled, always perform request in + * order to update it. + * + * In order to enable masterclock, the host clocksource must be TSC + * and the vcpus need to have matched TSCs. When that happens, + * perform request to enable masterclock. + */ + if (ka->use_master_clock || + (gtod->clock.vclock_mode == VCLOCK_TSC && vcpus_matched)) kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu); trace_kvm_track_tsc(vcpu->vcpu_id, ka->nr_vcpus_matched_tsc, @@ -1637,16 +1639,16 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset; vcpu->last_guest_tsc = tsc_timestamp; + if (unlikely(kvm_read_guest_cached(v->kvm, &vcpu->pv_time, + &guest_hv_clock, sizeof(guest_hv_clock)))) + return 0; + /* * The interface expects us to write an even number signaling that the * update is finished. Since the guest won't see the intermediate * state, we just increase by 2 at the end. */ - vcpu->hv_clock.version += 2; - - if (unlikely(kvm_read_guest_cached(v->kvm, &vcpu->pv_time, - &guest_hv_clock, sizeof(guest_hv_clock)))) - return 0; + vcpu->hv_clock.version = guest_hv_clock.version + 2; /* retain PVCLOCK_GUEST_STOPPED if set in guest copy */ pvclock_flags = (guest_hv_clock.flags & PVCLOCK_GUEST_STOPPED); @@ -1662,6 +1664,8 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) vcpu->hv_clock.flags = pvclock_flags; + trace_kvm_pvclock_update(v->vcpu_id, &vcpu->hv_clock); + kvm_write_guest_cached(v->kvm, &vcpu->pv_time, &vcpu->hv_clock, sizeof(vcpu->hv_clock)); @@ -2140,7 +2144,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_IA32_TSC_ADJUST: if (guest_cpuid_has_tsc_adjust(vcpu)) { if (!msr_info->host_initiated) { - u64 adj = data - vcpu->arch.ia32_tsc_adjust_msr; + s64 adj = data - vcpu->arch.ia32_tsc_adjust_msr; kvm_x86_ops->adjust_tsc_offset(vcpu, adj, true); } vcpu->arch.ia32_tsc_adjust_msr = data; @@ -3106,7 +3110,7 @@ static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu, unsigned long val; memcpy(dbgregs->db, vcpu->arch.db, sizeof(vcpu->arch.db)); - _kvm_get_dr(vcpu, 6, &val); + kvm_get_dr(vcpu, 6, &val); dbgregs->dr6 = val; dbgregs->dr7 = vcpu->arch.dr7; dbgregs->flags = 0; @@ -3128,15 +3132,89 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu, return 0; } +#define XSTATE_COMPACTION_ENABLED (1ULL << 63) + +static void fill_xsave(u8 *dest, struct kvm_vcpu *vcpu) +{ + struct xsave_struct *xsave = &vcpu->arch.guest_fpu.state->xsave; + u64 xstate_bv = xsave->xsave_hdr.xstate_bv; + u64 valid; + + /* + * Copy legacy XSAVE area, to avoid complications with CPUID + * leaves 0 and 1 in the loop below. + */ + memcpy(dest, xsave, XSAVE_HDR_OFFSET); + + /* Set XSTATE_BV */ + *(u64 *)(dest + XSAVE_HDR_OFFSET) = xstate_bv; + + /* + * Copy each region from the possibly compacted offset to the + * non-compacted offset. + */ + valid = xstate_bv & ~XSTATE_FPSSE; + while (valid) { + u64 feature = valid & -valid; + int index = fls64(feature) - 1; + void *src = get_xsave_addr(xsave, feature); + + if (src) { + u32 size, offset, ecx, edx; + cpuid_count(XSTATE_CPUID, index, + &size, &offset, &ecx, &edx); + memcpy(dest + offset, src, size); + } + + valid -= feature; + } +} + +static void load_xsave(struct kvm_vcpu *vcpu, u8 *src) +{ + struct xsave_struct *xsave = &vcpu->arch.guest_fpu.state->xsave; + u64 xstate_bv = *(u64 *)(src + XSAVE_HDR_OFFSET); + u64 valid; + + /* + * Copy legacy XSAVE area, to avoid complications with CPUID + * leaves 0 and 1 in the loop below. + */ + memcpy(xsave, src, XSAVE_HDR_OFFSET); + + /* Set XSTATE_BV and possibly XCOMP_BV. */ + xsave->xsave_hdr.xstate_bv = xstate_bv; + if (cpu_has_xsaves) + xsave->xsave_hdr.xcomp_bv = host_xcr0 | XSTATE_COMPACTION_ENABLED; + + /* + * Copy each region from the non-compacted offset to the + * possibly compacted offset. + */ + valid = xstate_bv & ~XSTATE_FPSSE; + while (valid) { + u64 feature = valid & -valid; + int index = fls64(feature) - 1; + void *dest = get_xsave_addr(xsave, feature); + + if (dest) { + u32 size, offset, ecx, edx; + cpuid_count(XSTATE_CPUID, index, + &size, &offset, &ecx, &edx); + memcpy(dest, src + offset, size); + } else + WARN_ON_ONCE(1); + + valid -= feature; + } +} + static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu, struct kvm_xsave *guest_xsave) { if (cpu_has_xsave) { - memcpy(guest_xsave->region, - &vcpu->arch.guest_fpu.state->xsave, - vcpu->arch.guest_xstate_size); - *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)] &= - vcpu->arch.guest_supported_xcr0 | XSTATE_FPSSE; + memset(guest_xsave, 0, sizeof(struct kvm_xsave)); + fill_xsave((u8 *) guest_xsave->region, vcpu); } else { memcpy(guest_xsave->region, &vcpu->arch.guest_fpu.state->fxsave, @@ -3160,8 +3238,7 @@ static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu, */ if (xstate_bv & ~kvm_supported_xcr0()) return -EINVAL; - memcpy(&vcpu->arch.guest_fpu.state->xsave, - guest_xsave->region, vcpu->arch.guest_xstate_size); + load_xsave(vcpu, (u8 *)guest_xsave->region); } else { if (xstate_bv & ~XSTATE_FPSSE) return -EINVAL; @@ -4004,7 +4081,7 @@ long kvm_arch_vm_ioctl(struct file *filp, } default: - ; + r = kvm_vm_ioctl_assigned_device(kvm, ioctl, arg); } out: return r; @@ -4667,7 +4744,7 @@ static void emulator_wbinvd(struct x86_emulate_ctxt *ctxt) int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest) { - return _kvm_get_dr(emul_to_vcpu(ctxt), dr, dest); + return kvm_get_dr(emul_to_vcpu(ctxt), dr, dest); } int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value) @@ -5211,21 +5288,17 @@ static void kvm_vcpu_check_singlestep(struct kvm_vcpu *vcpu, unsigned long rflag static bool kvm_vcpu_check_breakpoint(struct kvm_vcpu *vcpu, int *r) { - struct kvm_run *kvm_run = vcpu->run; - unsigned long eip = vcpu->arch.emulate_ctxt.eip; - u32 dr6 = 0; - if (unlikely(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) && (vcpu->arch.guest_debug_dr7 & DR7_BP_EN_MASK)) { - dr6 = kvm_vcpu_check_hw_bp(eip, 0, + struct kvm_run *kvm_run = vcpu->run; + unsigned long eip = kvm_get_linear_rip(vcpu); + u32 dr6 = kvm_vcpu_check_hw_bp(eip, 0, vcpu->arch.guest_debug_dr7, vcpu->arch.eff_db); if (dr6 != 0) { kvm_run->debug.arch.dr6 = dr6 | DR6_FIXED_1 | DR6_RTM; - kvm_run->debug.arch.pc = kvm_rip_read(vcpu) + - get_segment_base(vcpu, VCPU_SREG_CS); - + kvm_run->debug.arch.pc = eip; kvm_run->debug.arch.exception = DB_VECTOR; kvm_run->exit_reason = KVM_EXIT_DEBUG; *r = EMULATE_USER_EXIT; @@ -5235,7 +5308,8 @@ static bool kvm_vcpu_check_breakpoint(struct kvm_vcpu *vcpu, int *r) if (unlikely(vcpu->arch.dr7 & DR7_BP_EN_MASK) && !(kvm_get_rflags(vcpu) & X86_EFLAGS_RF)) { - dr6 = kvm_vcpu_check_hw_bp(eip, 0, + unsigned long eip = kvm_get_linear_rip(vcpu); + u32 dr6 = kvm_vcpu_check_hw_bp(eip, 0, vcpu->arch.dr7, vcpu->arch.db); @@ -5365,7 +5439,9 @@ restart: kvm_rip_write(vcpu, ctxt->eip); if (r == EMULATE_DONE) kvm_vcpu_check_singlestep(vcpu, rflags, &r); - __kvm_set_rflags(vcpu, ctxt->eflags); + if (!ctxt->have_exception || + exception_type(ctxt->exception.vector) == EXCPT_TRAP) + __kvm_set_rflags(vcpu, ctxt->eflags); /* * For STI, interrupts are shadowed; so KVM_REQ_EVENT will @@ -5965,6 +6041,12 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win) __kvm_set_rflags(vcpu, kvm_get_rflags(vcpu) | X86_EFLAGS_RF); + if (vcpu->arch.exception.nr == DB_VECTOR && + (vcpu->arch.dr7 & DR7_GD)) { + vcpu->arch.dr7 &= ~DR7_GD; + kvm_update_dr7(vcpu); + } + kvm_x86_ops->queue_exception(vcpu, vcpu->arch.exception.nr, vcpu->arch.exception.has_error_code, vcpu->arch.exception.error_code, @@ -6873,6 +6955,9 @@ int fx_init(struct kvm_vcpu *vcpu) return err; fpu_finit(&vcpu->arch.guest_fpu); + if (cpu_has_xsaves) + vcpu->arch.guest_fpu.state->xsave.xsave_hdr.xcomp_bv = + host_xcr0 | XSTATE_COMPACTION_ENABLED; /* * Ensure guest xcr0 is valid for loading @@ -7024,7 +7109,7 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu) kvm_x86_ops->vcpu_reset(vcpu); } -void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, unsigned int vector) +void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector) { struct kvm_segment cs; @@ -7256,6 +7341,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) if (type) return -EINVAL; + INIT_HLIST_HEAD(&kvm->arch.mask_notifier_list); INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages); INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); @@ -7536,12 +7622,18 @@ int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu) return kvm_x86_ops->interrupt_allowed(vcpu); } -bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip) +unsigned long kvm_get_linear_rip(struct kvm_vcpu *vcpu) { - unsigned long current_rip = kvm_rip_read(vcpu) + - get_segment_base(vcpu, VCPU_SREG_CS); + if (is_64_bit_mode(vcpu)) + return kvm_rip_read(vcpu); + return (u32)(get_segment_base(vcpu, VCPU_SREG_CS) + + kvm_rip_read(vcpu)); +} +EXPORT_SYMBOL_GPL(kvm_get_linear_rip); - return current_rip == linear_rip; +bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip) +{ + return kvm_get_linear_rip(vcpu) == linear_rip; } EXPORT_SYMBOL_GPL(kvm_is_linear_rip); diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 7cb9c45a5fe..cc1d61af614 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -162,7 +162,8 @@ int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt, bool kvm_mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data); #define KVM_SUPPORTED_XCR0 (XSTATE_FP | XSTATE_SSE | XSTATE_YMM \ - | XSTATE_BNDREGS | XSTATE_BNDCSR) + | XSTATE_BNDREGS | XSTATE_BNDCSR \ + | XSTATE_AVX512) extern u64 host_xcr0; extern u64 kvm_supported_xcr0(void); diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index b74a7e130b0..38dcec403b4 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -1247,7 +1247,7 @@ good_area: } /* User mode? Just return to handle the fatal exception */ - if (fault & FAULT_FLAG_USER) + if (flags & FAULT_FLAG_USER) return; /* Not returning to user mode? Handle exceptions or die: */ diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 27b71a0b72d..3ec85dfce12 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -2370,8 +2370,12 @@ static void rbd_img_obj_request_fill(struct rbd_obj_request *obj_request, opcode = CEPH_OSD_OP_READ; } - osd_req_op_extent_init(osd_request, num_ops, opcode, offset, length, - 0, 0); + if (opcode == CEPH_OSD_OP_DELETE) + osd_req_op_init(osd_request, num_ops, opcode); + else + osd_req_op_extent_init(osd_request, num_ops, opcode, + offset, length, 0, 0); + if (obj_request->type == OBJ_REQUEST_BIO) osd_req_op_extent_osd_data_bio(osd_request, num_ops, obj_request->bio_list, length); @@ -3405,8 +3409,7 @@ err_rq: if (result) rbd_warn(rbd_dev, "%s %llx at %llx result %d", obj_op_name(op_type), length, offset, result); - if (snapc) - ceph_put_snap_context(snapc); + ceph_put_snap_context(snapc); blk_end_request_all(rq, result); } diff --git a/drivers/hwmon/lm75.c b/drivers/hwmon/lm75.c index 6753fd940c7..fe41d5ae7cb 100644 --- a/drivers/hwmon/lm75.c +++ b/drivers/hwmon/lm75.c @@ -177,6 +177,10 @@ static struct attribute *lm75_attrs[] = { }; ATTRIBUTE_GROUPS(lm75); +static const struct thermal_zone_of_device_ops lm75_of_thermal_ops = { + .get_temp = lm75_read_temp, +}; + /*-----------------------------------------------------------------------*/ /* device probe and removal */ @@ -296,10 +300,9 @@ lm75_probe(struct i2c_client *client, const struct i2c_device_id *id) if (IS_ERR(data->hwmon_dev)) return PTR_ERR(data->hwmon_dev); - data->tz = thermal_zone_of_sensor_register(data->hwmon_dev, - 0, + data->tz = thermal_zone_of_sensor_register(data->hwmon_dev, 0, data->hwmon_dev, - lm75_read_temp, NULL); + &lm75_of_thermal_ops); if (IS_ERR(data->tz)) data->tz = NULL; diff --git a/drivers/hwmon/ntc_thermistor.c b/drivers/hwmon/ntc_thermistor.c index fd9a945fe8d..112e4d45e4a 100644 --- a/drivers/hwmon/ntc_thermistor.c +++ b/drivers/hwmon/ntc_thermistor.c @@ -486,6 +486,10 @@ static const struct attribute_group ntc_attr_group = { .attrs = ntc_attributes, }; +static const struct thermal_zone_of_device_ops ntc_of_thermal_ops = { + .get_temp = ntc_read_temp, +}; + static int ntc_thermistor_probe(struct platform_device *pdev) { const struct of_device_id *of_id = @@ -579,7 +583,7 @@ static int ntc_thermistor_probe(struct platform_device *pdev) pdev_id->name); data->tz = thermal_zone_of_sensor_register(data->dev, 0, data->dev, - ntc_read_temp, NULL); + &ntc_of_thermal_ops); if (IS_ERR(data->tz)) { dev_dbg(&pdev->dev, "Failed to register to thermal fw.\n"); data->tz = NULL; diff --git a/drivers/hwmon/tmp102.c b/drivers/hwmon/tmp102.c index 51719956cc0..ba9f478f64e 100644 --- a/drivers/hwmon/tmp102.c +++ b/drivers/hwmon/tmp102.c @@ -158,6 +158,10 @@ ATTRIBUTE_GROUPS(tmp102); #define TMP102_CONFIG (TMP102_CONF_TM | TMP102_CONF_EM | TMP102_CONF_CR1) #define TMP102_CONFIG_RD_ONLY (TMP102_CONF_R0 | TMP102_CONF_R1 | TMP102_CONF_AL) +static const struct thermal_zone_of_device_ops tmp102_of_thermal_ops = { + .get_temp = tmp102_read_temp, +}; + static int tmp102_probe(struct i2c_client *client, const struct i2c_device_id *id) { @@ -215,7 +219,7 @@ static int tmp102_probe(struct i2c_client *client, } tmp102->hwmon_dev = hwmon_dev; tmp102->tz = thermal_zone_of_sensor_register(hwmon_dev, 0, hwmon_dev, - tmp102_read_temp, NULL); + &tmp102_of_thermal_ops); if (IS_ERR(tmp102->tz)) tmp102->tz = NULL; diff --git a/drivers/input/gameport/gameport.c b/drivers/input/gameport/gameport.c index e29c04e2aff..e853a213468 100644 --- a/drivers/input/gameport/gameport.c +++ b/drivers/input/gameport/gameport.c @@ -527,14 +527,14 @@ EXPORT_SYMBOL(gameport_set_phys); */ static void gameport_init_port(struct gameport *gameport) { - static atomic_t gameport_no = ATOMIC_INIT(0); + static atomic_t gameport_no = ATOMIC_INIT(-1); __module_get(THIS_MODULE); mutex_init(&gameport->drv_mutex); device_initialize(&gameport->dev); dev_set_name(&gameport->dev, "gameport%lu", - (unsigned long)atomic_inc_return(&gameport_no) - 1); + (unsigned long)atomic_inc_return(&gameport_no)); gameport->dev.bus = &gameport_bus; gameport->dev.release = gameport_release_port; if (gameport->parent) diff --git a/drivers/input/input.c b/drivers/input/input.c index 0f175f55782..04217c2e345 100644 --- a/drivers/input/input.c +++ b/drivers/input/input.c @@ -1775,7 +1775,7 @@ EXPORT_SYMBOL_GPL(input_class); */ struct input_dev *input_allocate_device(void) { - static atomic_t input_no = ATOMIC_INIT(0); + static atomic_t input_no = ATOMIC_INIT(-1); struct input_dev *dev; dev = kzalloc(sizeof(struct input_dev), GFP_KERNEL); @@ -1790,7 +1790,7 @@ struct input_dev *input_allocate_device(void) INIT_LIST_HEAD(&dev->node); dev_set_name(&dev->dev, "input%lu", - (unsigned long) atomic_inc_return(&input_no) - 1); + (unsigned long)atomic_inc_return(&input_no)); __module_get(THIS_MODULE); } diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index fc55f0d15b7..3aa2f3f3da5 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -886,8 +886,8 @@ static void xpad_led_set(struct led_classdev *led_cdev, static int xpad_led_probe(struct usb_xpad *xpad) { - static atomic_t led_seq = ATOMIC_INIT(0); - long led_no; + static atomic_t led_seq = ATOMIC_INIT(-1); + unsigned long led_no; struct xpad_led *led; struct led_classdev *led_cdev; int error; @@ -899,9 +899,9 @@ static int xpad_led_probe(struct usb_xpad *xpad) if (!led) return -ENOMEM; - led_no = (long)atomic_inc_return(&led_seq) - 1; + led_no = atomic_inc_return(&led_seq); - snprintf(led->name, sizeof(led->name), "xpad%ld", led_no); + snprintf(led->name, sizeof(led->name), "xpad%lu", led_no); led->xpad = xpad; led_cdev = &led->led_cdev; diff --git a/drivers/input/keyboard/Kconfig b/drivers/input/keyboard/Kconfig index a3958c63d7d..96ee26c555e 100644 --- a/drivers/input/keyboard/Kconfig +++ b/drivers/input/keyboard/Kconfig @@ -665,14 +665,14 @@ config KEYBOARD_CROS_EC To compile this driver as a module, choose M here: the module will be called cros_ec_keyb. -config KEYBOARD_CAP1106 - tristate "Microchip CAP1106 touch sensor" +config KEYBOARD_CAP11XX + tristate "Microchip CAP11XX based touch sensors" depends on OF && I2C select REGMAP_I2C help - Say Y here to enable the CAP1106 touch sensor driver. + Say Y here to enable the CAP11XX touch sensor driver. To compile this driver as a module, choose M here: the - module will be called cap1106. + module will be called cap11xx. endif diff --git a/drivers/input/keyboard/Makefile b/drivers/input/keyboard/Makefile index 0a3345634d7..febafa527eb 100644 --- a/drivers/input/keyboard/Makefile +++ b/drivers/input/keyboard/Makefile @@ -11,7 +11,7 @@ obj-$(CONFIG_KEYBOARD_AMIGA) += amikbd.o obj-$(CONFIG_KEYBOARD_ATARI) += atakbd.o obj-$(CONFIG_KEYBOARD_ATKBD) += atkbd.o obj-$(CONFIG_KEYBOARD_BFIN) += bf54x-keys.o -obj-$(CONFIG_KEYBOARD_CAP1106) += cap1106.o +obj-$(CONFIG_KEYBOARD_CAP11XX) += cap11xx.o obj-$(CONFIG_KEYBOARD_CLPS711X) += clps711x-keypad.o obj-$(CONFIG_KEYBOARD_CROS_EC) += cros_ec_keyb.o obj-$(CONFIG_KEYBOARD_DAVINCI) += davinci_keyscan.o diff --git a/drivers/input/keyboard/amikbd.c b/drivers/input/keyboard/amikbd.c index d3b8c58fcfd..e04a3b4e55d 100644 --- a/drivers/input/keyboard/amikbd.c +++ b/drivers/input/keyboard/amikbd.c @@ -45,6 +45,7 @@ MODULE_AUTHOR("Vojtech Pavlik <vojtech@ucw.cz>"); MODULE_DESCRIPTION("Amiga keyboard driver"); MODULE_LICENSE("GPL"); +#ifdef CONFIG_HW_CONSOLE static unsigned char amikbd_keycode[0x78] __initdata = { [0] = KEY_GRAVE, [1] = KEY_1, @@ -144,6 +145,32 @@ static unsigned char amikbd_keycode[0x78] __initdata = { [103] = KEY_RIGHTMETA }; +static void __init amikbd_init_console_keymaps(void) +{ + /* We can spare 512 bytes on stack for temp_map in init path. */ + unsigned short temp_map[NR_KEYS]; + int i, j; + + for (i = 0; i < MAX_NR_KEYMAPS; i++) { + if (!key_maps[i]) + continue; + memset(temp_map, 0, sizeof(temp_map)); + for (j = 0; j < 0x78; j++) { + if (!amikbd_keycode[j]) + continue; + temp_map[j] = key_maps[i][amikbd_keycode[j]]; + } + for (j = 0; j < NR_KEYS; j++) { + if (!temp_map[j]) + temp_map[j] = 0xf200; + } + memcpy(key_maps[i], temp_map, sizeof(temp_map)); + } +} +#else /* !CONFIG_HW_CONSOLE */ +static inline void amikbd_init_console_keymaps(void) {} +#endif /* !CONFIG_HW_CONSOLE */ + static const char *amikbd_messages[8] = { [0] = KERN_ALERT "amikbd: Ctrl-Amiga-Amiga reset warning!!\n", [1] = KERN_WARNING "amikbd: keyboard lost sync\n", @@ -186,7 +213,7 @@ static irqreturn_t amikbd_interrupt(int irq, void *data) static int __init amikbd_probe(struct platform_device *pdev) { struct input_dev *dev; - int i, j, err; + int i, err; dev = input_allocate_device(); if (!dev) { @@ -207,22 +234,8 @@ static int __init amikbd_probe(struct platform_device *pdev) for (i = 0; i < 0x78; i++) set_bit(i, dev->keybit); - for (i = 0; i < MAX_NR_KEYMAPS; i++) { - static u_short temp_map[NR_KEYS] __initdata; - if (!key_maps[i]) - continue; - memset(temp_map, 0, sizeof(temp_map)); - for (j = 0; j < 0x78; j++) { - if (!amikbd_keycode[j]) - continue; - temp_map[j] = key_maps[i][amikbd_keycode[j]]; - } - for (j = 0; j < NR_KEYS; j++) { - if (!temp_map[j]) - temp_map[j] = 0xf200; - } - memcpy(key_maps[i], temp_map, sizeof(temp_map)); - } + amikbd_init_console_keymaps(); + ciaa.cra &= ~0x41; /* serial data in, turn off TA */ err = request_irq(IRQ_AMIGA_CIAA_SP, amikbd_interrupt, 0, "amikbd", dev); diff --git a/drivers/input/keyboard/atkbd.c b/drivers/input/keyboard/atkbd.c index 6f5d7956913..e27a25892db 100644 --- a/drivers/input/keyboard/atkbd.c +++ b/drivers/input/keyboard/atkbd.c @@ -456,8 +456,9 @@ static irqreturn_t atkbd_interrupt(struct serio *serio, unsigned char data, keycode = atkbd->keycode[code]; - if (keycode != ATKBD_KEY_NULL) - input_event(dev, EV_MSC, MSC_SCAN, code); + if (!(atkbd->release && test_bit(code, atkbd->force_release_mask))) + if (keycode != ATKBD_KEY_NULL) + input_event(dev, EV_MSC, MSC_SCAN, code); switch (keycode) { case ATKBD_KEY_NULL: @@ -511,6 +512,7 @@ static irqreturn_t atkbd_interrupt(struct serio *serio, unsigned char data, input_sync(dev); if (value && test_bit(code, atkbd->force_release_mask)) { + input_event(dev, EV_MSC, MSC_SCAN, code); input_report_key(dev, keycode, 0); input_sync(dev); } diff --git a/drivers/input/keyboard/cap1106.c b/drivers/input/keyboard/cap1106.c deleted file mode 100644 index d70b65a14ce..00000000000 --- a/drivers/input/keyboard/cap1106.c +++ /dev/null @@ -1,341 +0,0 @@ -/* - * Input driver for Microchip CAP1106, 6 channel capacitive touch sensor - * - * http://www.microchip.com/wwwproducts/Devices.aspx?product=CAP1106 - * - * (c) 2014 Daniel Mack <linux@zonque.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/interrupt.h> -#include <linux/input.h> -#include <linux/of_irq.h> -#include <linux/regmap.h> -#include <linux/i2c.h> -#include <linux/gpio/consumer.h> - -#define CAP1106_REG_MAIN_CONTROL 0x00 -#define CAP1106_REG_MAIN_CONTROL_GAIN_SHIFT (6) -#define CAP1106_REG_MAIN_CONTROL_GAIN_MASK (0xc0) -#define CAP1106_REG_MAIN_CONTROL_DLSEEP BIT(4) -#define CAP1106_REG_GENERAL_STATUS 0x02 -#define CAP1106_REG_SENSOR_INPUT 0x03 -#define CAP1106_REG_NOISE_FLAG_STATUS 0x0a -#define CAP1106_REG_SENOR_DELTA(X) (0x10 + (X)) -#define CAP1106_REG_SENSITIVITY_CONTROL 0x1f -#define CAP1106_REG_CONFIG 0x20 -#define CAP1106_REG_SENSOR_ENABLE 0x21 -#define CAP1106_REG_SENSOR_CONFIG 0x22 -#define CAP1106_REG_SENSOR_CONFIG2 0x23 -#define CAP1106_REG_SAMPLING_CONFIG 0x24 -#define CAP1106_REG_CALIBRATION 0x26 -#define CAP1106_REG_INT_ENABLE 0x27 -#define CAP1106_REG_REPEAT_RATE 0x28 -#define CAP1106_REG_MT_CONFIG 0x2a -#define CAP1106_REG_MT_PATTERN_CONFIG 0x2b -#define CAP1106_REG_MT_PATTERN 0x2d -#define CAP1106_REG_RECALIB_CONFIG 0x2f -#define CAP1106_REG_SENSOR_THRESH(X) (0x30 + (X)) -#define CAP1106_REG_SENSOR_NOISE_THRESH 0x38 -#define CAP1106_REG_STANDBY_CHANNEL 0x40 -#define CAP1106_REG_STANDBY_CONFIG 0x41 -#define CAP1106_REG_STANDBY_SENSITIVITY 0x42 -#define CAP1106_REG_STANDBY_THRESH 0x43 -#define CAP1106_REG_CONFIG2 0x44 -#define CAP1106_REG_SENSOR_BASE_CNT(X) (0x50 + (X)) -#define CAP1106_REG_SENSOR_CALIB (0xb1 + (X)) -#define CAP1106_REG_SENSOR_CALIB_LSB1 0xb9 -#define CAP1106_REG_SENSOR_CALIB_LSB2 0xba -#define CAP1106_REG_PRODUCT_ID 0xfd -#define CAP1106_REG_MANUFACTURER_ID 0xfe -#define CAP1106_REG_REVISION 0xff - -#define CAP1106_NUM_CHN 6 -#define CAP1106_PRODUCT_ID 0x55 -#define CAP1106_MANUFACTURER_ID 0x5d - -struct cap1106_priv { - struct regmap *regmap; - struct input_dev *idev; - - /* config */ - unsigned short keycodes[CAP1106_NUM_CHN]; -}; - -static const struct reg_default cap1106_reg_defaults[] = { - { CAP1106_REG_MAIN_CONTROL, 0x00 }, - { CAP1106_REG_GENERAL_STATUS, 0x00 }, - { CAP1106_REG_SENSOR_INPUT, 0x00 }, - { CAP1106_REG_NOISE_FLAG_STATUS, 0x00 }, - { CAP1106_REG_SENSITIVITY_CONTROL, 0x2f }, - { CAP1106_REG_CONFIG, 0x20 }, - { CAP1106_REG_SENSOR_ENABLE, 0x3f }, - { CAP1106_REG_SENSOR_CONFIG, 0xa4 }, - { CAP1106_REG_SENSOR_CONFIG2, 0x07 }, - { CAP1106_REG_SAMPLING_CONFIG, 0x39 }, - { CAP1106_REG_CALIBRATION, 0x00 }, - { CAP1106_REG_INT_ENABLE, 0x3f }, - { CAP1106_REG_REPEAT_RATE, 0x3f }, - { CAP1106_REG_MT_CONFIG, 0x80 }, - { CAP1106_REG_MT_PATTERN_CONFIG, 0x00 }, - { CAP1106_REG_MT_PATTERN, 0x3f }, - { CAP1106_REG_RECALIB_CONFIG, 0x8a }, - { CAP1106_REG_SENSOR_THRESH(0), 0x40 }, - { CAP1106_REG_SENSOR_THRESH(1), 0x40 }, - { CAP1106_REG_SENSOR_THRESH(2), 0x40 }, - { CAP1106_REG_SENSOR_THRESH(3), 0x40 }, - { CAP1106_REG_SENSOR_THRESH(4), 0x40 }, - { CAP1106_REG_SENSOR_THRESH(5), 0x40 }, - { CAP1106_REG_SENSOR_NOISE_THRESH, 0x01 }, - { CAP1106_REG_STANDBY_CHANNEL, 0x00 }, - { CAP1106_REG_STANDBY_CONFIG, 0x39 }, - { CAP1106_REG_STANDBY_SENSITIVITY, 0x02 }, - { CAP1106_REG_STANDBY_THRESH, 0x40 }, - { CAP1106_REG_CONFIG2, 0x40 }, - { CAP1106_REG_SENSOR_CALIB_LSB1, 0x00 }, - { CAP1106_REG_SENSOR_CALIB_LSB2, 0x00 }, -}; - -static bool cap1106_volatile_reg(struct device *dev, unsigned int reg) -{ - switch (reg) { - case CAP1106_REG_MAIN_CONTROL: - case CAP1106_REG_SENSOR_INPUT: - case CAP1106_REG_SENOR_DELTA(0): - case CAP1106_REG_SENOR_DELTA(1): - case CAP1106_REG_SENOR_DELTA(2): - case CAP1106_REG_SENOR_DELTA(3): - case CAP1106_REG_SENOR_DELTA(4): - case CAP1106_REG_SENOR_DELTA(5): - case CAP1106_REG_PRODUCT_ID: - case CAP1106_REG_MANUFACTURER_ID: - case CAP1106_REG_REVISION: - return true; - } - - return false; -} - -static const struct regmap_config cap1106_regmap_config = { - .reg_bits = 8, - .val_bits = 8, - - .max_register = CAP1106_REG_REVISION, - .reg_defaults = cap1106_reg_defaults, - - .num_reg_defaults = ARRAY_SIZE(cap1106_reg_defaults), - .cache_type = REGCACHE_RBTREE, - .volatile_reg = cap1106_volatile_reg, -}; - -static irqreturn_t cap1106_thread_func(int irq_num, void *data) -{ - struct cap1106_priv *priv = data; - unsigned int status; - int ret, i; - - /* - * Deassert interrupt. This needs to be done before reading the status - * registers, which will not carry valid values otherwise. - */ - ret = regmap_update_bits(priv->regmap, CAP1106_REG_MAIN_CONTROL, 1, 0); - if (ret < 0) - goto out; - - ret = regmap_read(priv->regmap, CAP1106_REG_SENSOR_INPUT, &status); - if (ret < 0) - goto out; - - for (i = 0; i < CAP1106_NUM_CHN; i++) - input_report_key(priv->idev, priv->keycodes[i], - status & (1 << i)); - - input_sync(priv->idev); - -out: - return IRQ_HANDLED; -} - -static int cap1106_set_sleep(struct cap1106_priv *priv, bool sleep) -{ - return regmap_update_bits(priv->regmap, CAP1106_REG_MAIN_CONTROL, - CAP1106_REG_MAIN_CONTROL_DLSEEP, - sleep ? CAP1106_REG_MAIN_CONTROL_DLSEEP : 0); -} - -static int cap1106_input_open(struct input_dev *idev) -{ - struct cap1106_priv *priv = input_get_drvdata(idev); - - return cap1106_set_sleep(priv, false); -} - -static void cap1106_input_close(struct input_dev *idev) -{ - struct cap1106_priv *priv = input_get_drvdata(idev); - - cap1106_set_sleep(priv, true); -} - -static int cap1106_i2c_probe(struct i2c_client *i2c_client, - const struct i2c_device_id *id) -{ - struct device *dev = &i2c_client->dev; - struct cap1106_priv *priv; - struct device_node *node; - int i, error, irq, gain = 0; - unsigned int val, rev; - u32 gain32, keycodes[CAP1106_NUM_CHN]; - - priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); - if (!priv) - return -ENOMEM; - - priv->regmap = devm_regmap_init_i2c(i2c_client, &cap1106_regmap_config); - if (IS_ERR(priv->regmap)) - return PTR_ERR(priv->regmap); - - error = regmap_read(priv->regmap, CAP1106_REG_PRODUCT_ID, &val); - if (error) - return error; - - if (val != CAP1106_PRODUCT_ID) { - dev_err(dev, "Product ID: Got 0x%02x, expected 0x%02x\n", - val, CAP1106_PRODUCT_ID); - return -ENODEV; - } - - error = regmap_read(priv->regmap, CAP1106_REG_MANUFACTURER_ID, &val); - if (error) - return error; - - if (val != CAP1106_MANUFACTURER_ID) { - dev_err(dev, "Manufacturer ID: Got 0x%02x, expected 0x%02x\n", - val, CAP1106_MANUFACTURER_ID); - return -ENODEV; - } - - error = regmap_read(priv->regmap, CAP1106_REG_REVISION, &rev); - if (error < 0) - return error; - - dev_info(dev, "CAP1106 detected, revision 0x%02x\n", rev); - i2c_set_clientdata(i2c_client, priv); - node = dev->of_node; - - if (!of_property_read_u32(node, "microchip,sensor-gain", &gain32)) { - if (is_power_of_2(gain32) && gain32 <= 8) - gain = ilog2(gain32); - else - dev_err(dev, "Invalid sensor-gain value %d\n", gain32); - } - - BUILD_BUG_ON(ARRAY_SIZE(keycodes) != ARRAY_SIZE(priv->keycodes)); - - /* Provide some useful defaults */ - for (i = 0; i < ARRAY_SIZE(keycodes); i++) - keycodes[i] = KEY_A + i; - - of_property_read_u32_array(node, "linux,keycodes", - keycodes, ARRAY_SIZE(keycodes)); - - for (i = 0; i < ARRAY_SIZE(keycodes); i++) - priv->keycodes[i] = keycodes[i]; - - error = regmap_update_bits(priv->regmap, CAP1106_REG_MAIN_CONTROL, - CAP1106_REG_MAIN_CONTROL_GAIN_MASK, - gain << CAP1106_REG_MAIN_CONTROL_GAIN_SHIFT); - if (error) - return error; - - /* Disable autorepeat. The Linux input system has its own handling. */ - error = regmap_write(priv->regmap, CAP1106_REG_REPEAT_RATE, 0); - if (error) - return error; - - priv->idev = devm_input_allocate_device(dev); - if (!priv->idev) - return -ENOMEM; - - priv->idev->name = "CAP1106 capacitive touch sensor"; - priv->idev->id.bustype = BUS_I2C; - priv->idev->evbit[0] = BIT_MASK(EV_KEY); - - if (of_property_read_bool(node, "autorepeat")) - __set_bit(EV_REP, priv->idev->evbit); - - for (i = 0; i < CAP1106_NUM_CHN; i++) - __set_bit(priv->keycodes[i], priv->idev->keybit); - - __clear_bit(KEY_RESERVED, priv->idev->keybit); - - priv->idev->keycode = priv->keycodes; - priv->idev->keycodesize = sizeof(priv->keycodes[0]); - priv->idev->keycodemax = ARRAY_SIZE(priv->keycodes); - - priv->idev->id.vendor = CAP1106_MANUFACTURER_ID; - priv->idev->id.product = CAP1106_PRODUCT_ID; - priv->idev->id.version = rev; - - priv->idev->open = cap1106_input_open; - priv->idev->close = cap1106_input_close; - - input_set_drvdata(priv->idev, priv); - - /* - * Put the device in deep sleep mode for now. - * ->open() will bring it back once the it is actually needed. - */ - cap1106_set_sleep(priv, true); - - error = input_register_device(priv->idev); - if (error) - return error; - - irq = irq_of_parse_and_map(node, 0); - if (!irq) { - dev_err(dev, "Unable to parse or map IRQ\n"); - return -ENXIO; - } - - error = devm_request_threaded_irq(dev, irq, NULL, cap1106_thread_func, - IRQF_ONESHOT, dev_name(dev), priv); - if (error) - return error; - - return 0; -} - -static const struct of_device_id cap1106_dt_ids[] = { - { .compatible = "microchip,cap1106", }, - {} -}; -MODULE_DEVICE_TABLE(of, cap1106_dt_ids); - -static const struct i2c_device_id cap1106_i2c_ids[] = { - { "cap1106", 0 }, - {} -}; -MODULE_DEVICE_TABLE(i2c, cap1106_i2c_ids); - -static struct i2c_driver cap1106_i2c_driver = { - .driver = { - .name = "cap1106", - .owner = THIS_MODULE, - .of_match_table = cap1106_dt_ids, - }, - .id_table = cap1106_i2c_ids, - .probe = cap1106_i2c_probe, -}; - -module_i2c_driver(cap1106_i2c_driver); - -MODULE_ALIAS("platform:cap1106"); -MODULE_DESCRIPTION("Microchip CAP1106 driver"); -MODULE_AUTHOR("Daniel Mack <linux@zonque.org>"); -MODULE_LICENSE("GPL v2"); diff --git a/drivers/input/keyboard/cap11xx.c b/drivers/input/keyboard/cap11xx.c new file mode 100644 index 00000000000..4f59f0bab28 --- /dev/null +++ b/drivers/input/keyboard/cap11xx.c @@ -0,0 +1,376 @@ +/* + * Input driver for Microchip CAP11xx based capacitive touch sensors + * + * (c) 2014 Daniel Mack <linux@zonque.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/interrupt.h> +#include <linux/input.h> +#include <linux/of_irq.h> +#include <linux/regmap.h> +#include <linux/i2c.h> +#include <linux/gpio/consumer.h> + +#define CAP11XX_REG_MAIN_CONTROL 0x00 +#define CAP11XX_REG_MAIN_CONTROL_GAIN_SHIFT (6) +#define CAP11XX_REG_MAIN_CONTROL_GAIN_MASK (0xc0) +#define CAP11XX_REG_MAIN_CONTROL_DLSEEP BIT(4) +#define CAP11XX_REG_GENERAL_STATUS 0x02 +#define CAP11XX_REG_SENSOR_INPUT 0x03 +#define CAP11XX_REG_NOISE_FLAG_STATUS 0x0a +#define CAP11XX_REG_SENOR_DELTA(X) (0x10 + (X)) +#define CAP11XX_REG_SENSITIVITY_CONTROL 0x1f +#define CAP11XX_REG_CONFIG 0x20 +#define CAP11XX_REG_SENSOR_ENABLE 0x21 +#define CAP11XX_REG_SENSOR_CONFIG 0x22 +#define CAP11XX_REG_SENSOR_CONFIG2 0x23 +#define CAP11XX_REG_SAMPLING_CONFIG 0x24 +#define CAP11XX_REG_CALIBRATION 0x26 +#define CAP11XX_REG_INT_ENABLE 0x27 +#define CAP11XX_REG_REPEAT_RATE 0x28 +#define CAP11XX_REG_MT_CONFIG 0x2a +#define CAP11XX_REG_MT_PATTERN_CONFIG 0x2b +#define CAP11XX_REG_MT_PATTERN 0x2d +#define CAP11XX_REG_RECALIB_CONFIG 0x2f +#define CAP11XX_REG_SENSOR_THRESH(X) (0x30 + (X)) +#define CAP11XX_REG_SENSOR_NOISE_THRESH 0x38 +#define CAP11XX_REG_STANDBY_CHANNEL 0x40 +#define CAP11XX_REG_STANDBY_CONFIG 0x41 +#define CAP11XX_REG_STANDBY_SENSITIVITY 0x42 +#define CAP11XX_REG_STANDBY_THRESH 0x43 +#define CAP11XX_REG_CONFIG2 0x44 +#define CAP11XX_REG_CONFIG2_ALT_POL BIT(6) +#define CAP11XX_REG_SENSOR_BASE_CNT(X) (0x50 + (X)) +#define CAP11XX_REG_SENSOR_CALIB (0xb1 + (X)) +#define CAP11XX_REG_SENSOR_CALIB_LSB1 0xb9 +#define CAP11XX_REG_SENSOR_CALIB_LSB2 0xba +#define CAP11XX_REG_PRODUCT_ID 0xfd +#define CAP11XX_REG_MANUFACTURER_ID 0xfe +#define CAP11XX_REG_REVISION 0xff + +#define CAP11XX_MANUFACTURER_ID 0x5d + +struct cap11xx_priv { + struct regmap *regmap; + struct input_dev *idev; + + /* config */ + u32 keycodes[]; +}; + +struct cap11xx_hw_model { + u8 product_id; + unsigned int num_channels; +}; + +enum { + CAP1106, + CAP1126, + CAP1188, +}; + +static const struct cap11xx_hw_model cap11xx_devices[] = { + [CAP1106] = { .product_id = 0x55, .num_channels = 6 }, + [CAP1126] = { .product_id = 0x53, .num_channels = 6 }, + [CAP1188] = { .product_id = 0x50, .num_channels = 8 }, +}; + +static const struct reg_default cap11xx_reg_defaults[] = { + { CAP11XX_REG_MAIN_CONTROL, 0x00 }, + { CAP11XX_REG_GENERAL_STATUS, 0x00 }, + { CAP11XX_REG_SENSOR_INPUT, 0x00 }, + { CAP11XX_REG_NOISE_FLAG_STATUS, 0x00 }, + { CAP11XX_REG_SENSITIVITY_CONTROL, 0x2f }, + { CAP11XX_REG_CONFIG, 0x20 }, + { CAP11XX_REG_SENSOR_ENABLE, 0x3f }, + { CAP11XX_REG_SENSOR_CONFIG, 0xa4 }, + { CAP11XX_REG_SENSOR_CONFIG2, 0x07 }, + { CAP11XX_REG_SAMPLING_CONFIG, 0x39 }, + { CAP11XX_REG_CALIBRATION, 0x00 }, + { CAP11XX_REG_INT_ENABLE, 0x3f }, + { CAP11XX_REG_REPEAT_RATE, 0x3f }, + { CAP11XX_REG_MT_CONFIG, 0x80 }, + { CAP11XX_REG_MT_PATTERN_CONFIG, 0x00 }, + { CAP11XX_REG_MT_PATTERN, 0x3f }, + { CAP11XX_REG_RECALIB_CONFIG, 0x8a }, + { CAP11XX_REG_SENSOR_THRESH(0), 0x40 }, + { CAP11XX_REG_SENSOR_THRESH(1), 0x40 }, + { CAP11XX_REG_SENSOR_THRESH(2), 0x40 }, + { CAP11XX_REG_SENSOR_THRESH(3), 0x40 }, + { CAP11XX_REG_SENSOR_THRESH(4), 0x40 }, + { CAP11XX_REG_SENSOR_THRESH(5), 0x40 }, + { CAP11XX_REG_SENSOR_NOISE_THRESH, 0x01 }, + { CAP11XX_REG_STANDBY_CHANNEL, 0x00 }, + { CAP11XX_REG_STANDBY_CONFIG, 0x39 }, + { CAP11XX_REG_STANDBY_SENSITIVITY, 0x02 }, + { CAP11XX_REG_STANDBY_THRESH, 0x40 }, + { CAP11XX_REG_CONFIG2, 0x40 }, + { CAP11XX_REG_SENSOR_CALIB_LSB1, 0x00 }, + { CAP11XX_REG_SENSOR_CALIB_LSB2, 0x00 }, +}; + +static bool cap11xx_volatile_reg(struct device *dev, unsigned int reg) +{ + switch (reg) { + case CAP11XX_REG_MAIN_CONTROL: + case CAP11XX_REG_SENSOR_INPUT: + case CAP11XX_REG_SENOR_DELTA(0): + case CAP11XX_REG_SENOR_DELTA(1): + case CAP11XX_REG_SENOR_DELTA(2): + case CAP11XX_REG_SENOR_DELTA(3): + case CAP11XX_REG_SENOR_DELTA(4): + case CAP11XX_REG_SENOR_DELTA(5): + case CAP11XX_REG_PRODUCT_ID: + case CAP11XX_REG_MANUFACTURER_ID: + case CAP11XX_REG_REVISION: + return true; + } + + return false; +} + +static const struct regmap_config cap11xx_regmap_config = { + .reg_bits = 8, + .val_bits = 8, + + .max_register = CAP11XX_REG_REVISION, + .reg_defaults = cap11xx_reg_defaults, + + .num_reg_defaults = ARRAY_SIZE(cap11xx_reg_defaults), + .cache_type = REGCACHE_RBTREE, + .volatile_reg = cap11xx_volatile_reg, +}; + +static irqreturn_t cap11xx_thread_func(int irq_num, void *data) +{ + struct cap11xx_priv *priv = data; + unsigned int status; + int ret, i; + + /* + * Deassert interrupt. This needs to be done before reading the status + * registers, which will not carry valid values otherwise. + */ + ret = regmap_update_bits(priv->regmap, CAP11XX_REG_MAIN_CONTROL, 1, 0); + if (ret < 0) + goto out; + + ret = regmap_read(priv->regmap, CAP11XX_REG_SENSOR_INPUT, &status); + if (ret < 0) + goto out; + + for (i = 0; i < priv->idev->keycodemax; i++) + input_report_key(priv->idev, priv->keycodes[i], + status & (1 << i)); + + input_sync(priv->idev); + +out: + return IRQ_HANDLED; +} + +static int cap11xx_set_sleep(struct cap11xx_priv *priv, bool sleep) +{ + return regmap_update_bits(priv->regmap, CAP11XX_REG_MAIN_CONTROL, + CAP11XX_REG_MAIN_CONTROL_DLSEEP, + sleep ? CAP11XX_REG_MAIN_CONTROL_DLSEEP : 0); +} + +static int cap11xx_input_open(struct input_dev *idev) +{ + struct cap11xx_priv *priv = input_get_drvdata(idev); + + return cap11xx_set_sleep(priv, false); +} + +static void cap11xx_input_close(struct input_dev *idev) +{ + struct cap11xx_priv *priv = input_get_drvdata(idev); + + cap11xx_set_sleep(priv, true); +} + +static int cap11xx_i2c_probe(struct i2c_client *i2c_client, + const struct i2c_device_id *id) +{ + struct device *dev = &i2c_client->dev; + struct cap11xx_priv *priv; + struct device_node *node; + const struct cap11xx_hw_model *cap; + int i, error, irq, gain = 0; + unsigned int val, rev; + u32 gain32; + + if (id->driver_data >= ARRAY_SIZE(cap11xx_devices)) { + dev_err(dev, "Invalid device ID %lu\n", id->driver_data); + return -EINVAL; + } + + cap = &cap11xx_devices[id->driver_data]; + if (!cap || !cap->num_channels) { + dev_err(dev, "Invalid device configuration\n"); + return -EINVAL; + } + + priv = devm_kzalloc(dev, + sizeof(*priv) + + cap->num_channels * sizeof(priv->keycodes[0]), + GFP_KERNEL); + if (!priv) + return -ENOMEM; + + priv->regmap = devm_regmap_init_i2c(i2c_client, &cap11xx_regmap_config); + if (IS_ERR(priv->regmap)) + return PTR_ERR(priv->regmap); + + error = regmap_read(priv->regmap, CAP11XX_REG_PRODUCT_ID, &val); + if (error) + return error; + + if (val != cap->product_id) { + dev_err(dev, "Product ID: Got 0x%02x, expected 0x%02x\n", + val, cap->product_id); + return -ENXIO; + } + + error = regmap_read(priv->regmap, CAP11XX_REG_MANUFACTURER_ID, &val); + if (error) + return error; + + if (val != CAP11XX_MANUFACTURER_ID) { + dev_err(dev, "Manufacturer ID: Got 0x%02x, expected 0x%02x\n", + val, CAP11XX_MANUFACTURER_ID); + return -ENXIO; + } + + error = regmap_read(priv->regmap, CAP11XX_REG_REVISION, &rev); + if (error < 0) + return error; + + dev_info(dev, "CAP11XX detected, revision 0x%02x\n", rev); + i2c_set_clientdata(i2c_client, priv); + node = dev->of_node; + + if (!of_property_read_u32(node, "microchip,sensor-gain", &gain32)) { + if (is_power_of_2(gain32) && gain32 <= 8) + gain = ilog2(gain32); + else + dev_err(dev, "Invalid sensor-gain value %d\n", gain32); + } + + if (of_property_read_bool(node, "microchip,irq-active-high")) { + error = regmap_update_bits(priv->regmap, CAP11XX_REG_CONFIG2, + CAP11XX_REG_CONFIG2_ALT_POL, 0); + if (error) + return error; + } + + /* Provide some useful defaults */ + for (i = 0; i < cap->num_channels; i++) + priv->keycodes[i] = KEY_A + i; + + of_property_read_u32_array(node, "linux,keycodes", + priv->keycodes, cap->num_channels); + + error = regmap_update_bits(priv->regmap, CAP11XX_REG_MAIN_CONTROL, + CAP11XX_REG_MAIN_CONTROL_GAIN_MASK, + gain << CAP11XX_REG_MAIN_CONTROL_GAIN_SHIFT); + if (error) + return error; + + /* Disable autorepeat. The Linux input system has its own handling. */ + error = regmap_write(priv->regmap, CAP11XX_REG_REPEAT_RATE, 0); + if (error) + return error; + + priv->idev = devm_input_allocate_device(dev); + if (!priv->idev) + return -ENOMEM; + + priv->idev->name = "CAP11XX capacitive touch sensor"; + priv->idev->id.bustype = BUS_I2C; + priv->idev->evbit[0] = BIT_MASK(EV_KEY); + + if (of_property_read_bool(node, "autorepeat")) + __set_bit(EV_REP, priv->idev->evbit); + + for (i = 0; i < cap->num_channels; i++) + __set_bit(priv->keycodes[i], priv->idev->keybit); + + __clear_bit(KEY_RESERVED, priv->idev->keybit); + + priv->idev->keycode = priv->keycodes; + priv->idev->keycodesize = sizeof(priv->keycodes[0]); + priv->idev->keycodemax = cap->num_channels; + + priv->idev->id.vendor = CAP11XX_MANUFACTURER_ID; + priv->idev->id.product = cap->product_id; + priv->idev->id.version = rev; + + priv->idev->open = cap11xx_input_open; + priv->idev->close = cap11xx_input_close; + + input_set_drvdata(priv->idev, priv); + + /* + * Put the device in deep sleep mode for now. + * ->open() will bring it back once the it is actually needed. + */ + cap11xx_set_sleep(priv, true); + + error = input_register_device(priv->idev); + if (error) + return error; + + irq = irq_of_parse_and_map(node, 0); + if (!irq) { + dev_err(dev, "Unable to parse or map IRQ\n"); + return -ENXIO; + } + + error = devm_request_threaded_irq(dev, irq, NULL, cap11xx_thread_func, + IRQF_ONESHOT, dev_name(dev), priv); + if (error) + return error; + + return 0; +} + +static const struct of_device_id cap11xx_dt_ids[] = { + { .compatible = "microchip,cap1106", }, + { .compatible = "microchip,cap1126", }, + { .compatible = "microchip,cap1188", }, + {} +}; +MODULE_DEVICE_TABLE(of, cap11xx_dt_ids); + +static const struct i2c_device_id cap11xx_i2c_ids[] = { + { "cap1106", CAP1106 }, + { "cap1126", CAP1126 }, + { "cap1188", CAP1188 }, + {} +}; +MODULE_DEVICE_TABLE(i2c, cap11xx_i2c_ids); + +static struct i2c_driver cap11xx_i2c_driver = { + .driver = { + .name = "cap11xx", + .owner = THIS_MODULE, + .of_match_table = cap11xx_dt_ids, + }, + .id_table = cap11xx_i2c_ids, + .probe = cap11xx_i2c_probe, +}; + +module_i2c_driver(cap11xx_i2c_driver); + +MODULE_ALIAS("platform:cap11xx"); +MODULE_DESCRIPTION("Microchip CAP11XX driver"); +MODULE_AUTHOR("Daniel Mack <linux@zonque.org>"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/input/keyboard/gpio_keys.c b/drivers/input/keyboard/gpio_keys.c index 8f3a24e1540..d4dd78a7d56 100644 --- a/drivers/input/keyboard/gpio_keys.c +++ b/drivers/input/keyboard/gpio_keys.c @@ -29,6 +29,7 @@ #include <linux/of.h> #include <linux/of_platform.h> #include <linux/of_gpio.h> +#include <linux/of_irq.h> #include <linux/spinlock.h> struct gpio_button_data { @@ -617,27 +618,31 @@ gpio_keys_get_devtree_pdata(struct device *dev) i = 0; for_each_child_of_node(node, pp) { - int gpio; + int gpio = -1; enum of_gpio_flags flags; - if (!of_find_property(pp, "gpios", NULL)) { - pdata->nbuttons--; - dev_warn(dev, "Found button without gpios\n"); - continue; - } + button = &pdata->buttons[i++]; - gpio = of_get_gpio_flags(pp, 0, &flags); - if (gpio < 0) { - error = gpio; - if (error != -EPROBE_DEFER) - dev_err(dev, - "Failed to get gpio flags, error: %d\n", - error); - return ERR_PTR(error); + if (!of_find_property(pp, "gpios", NULL)) { + button->irq = irq_of_parse_and_map(pp, 0); + if (button->irq == 0) { + i--; + pdata->nbuttons--; + dev_warn(dev, "Found button without gpios or irqs\n"); + continue; + } + } else { + gpio = of_get_gpio_flags(pp, 0, &flags); + if (gpio < 0) { + error = gpio; + if (error != -EPROBE_DEFER) + dev_err(dev, + "Failed to get gpio flags, error: %d\n", + error); + return ERR_PTR(error); + } } - button = &pdata->buttons[i++]; - button->gpio = gpio; button->active_low = flags & OF_GPIO_ACTIVE_LOW; diff --git a/drivers/input/keyboard/lm8323.c b/drivers/input/keyboard/lm8323.c index cb32e2b506b..21bea52d436 100644 --- a/drivers/input/keyboard/lm8323.c +++ b/drivers/input/keyboard/lm8323.c @@ -616,6 +616,8 @@ static ssize_t lm8323_set_disable(struct device *dev, unsigned int i; ret = kstrtouint(buf, 10, &i); + if (ret) + return ret; mutex_lock(&lm->lock); lm->kp_enabled = !i; diff --git a/drivers/input/keyboard/lpc32xx-keys.c b/drivers/input/keyboard/lpc32xx-keys.c index 8c079371c2e..265d641c40e 100644 --- a/drivers/input/keyboard/lpc32xx-keys.c +++ b/drivers/input/keyboard/lpc32xx-keys.c @@ -66,7 +66,6 @@ struct lpc32xx_kscan_drv { struct input_dev *input; struct clk *clk; - struct resource *iores; void __iomem *kscan_base; unsigned int irq; @@ -188,32 +187,27 @@ static int lpc32xx_kscan_probe(struct platform_device *pdev) return -EINVAL; } - kscandat = kzalloc(sizeof(struct lpc32xx_kscan_drv), GFP_KERNEL); - if (!kscandat) { - dev_err(&pdev->dev, "failed to allocate memory\n"); + kscandat = devm_kzalloc(&pdev->dev, sizeof(*kscandat), + GFP_KERNEL); + if (!kscandat) return -ENOMEM; - } error = lpc32xx_parse_dt(&pdev->dev, kscandat); if (error) { dev_err(&pdev->dev, "failed to parse device tree\n"); - goto err_free_mem; + return error; } keymap_size = sizeof(kscandat->keymap[0]) * (kscandat->matrix_sz << kscandat->row_shift); - kscandat->keymap = kzalloc(keymap_size, GFP_KERNEL); - if (!kscandat->keymap) { - dev_err(&pdev->dev, "could not allocate memory for keymap\n"); - error = -ENOMEM; - goto err_free_mem; - } + kscandat->keymap = devm_kzalloc(&pdev->dev, keymap_size, GFP_KERNEL); + if (!kscandat->keymap) + return -ENOMEM; - kscandat->input = input = input_allocate_device(); + kscandat->input = input = devm_input_allocate_device(&pdev->dev); if (!input) { dev_err(&pdev->dev, "failed to allocate input device\n"); - error = -ENOMEM; - goto err_free_keymap; + return -ENOMEM; } /* Setup key input */ @@ -234,39 +228,26 @@ static int lpc32xx_kscan_probe(struct platform_device *pdev) kscandat->keymap, kscandat->input); if (error) { dev_err(&pdev->dev, "failed to build keymap\n"); - goto err_free_input; + return error; } input_set_drvdata(kscandat->input, kscandat); - kscandat->iores = request_mem_region(res->start, resource_size(res), - pdev->name); - if (!kscandat->iores) { - dev_err(&pdev->dev, "failed to request I/O memory\n"); - error = -EBUSY; - goto err_free_input; - } - - kscandat->kscan_base = ioremap(kscandat->iores->start, - resource_size(kscandat->iores)); - if (!kscandat->kscan_base) { - dev_err(&pdev->dev, "failed to remap I/O memory\n"); - error = -EBUSY; - goto err_release_memregion; - } + kscandat->kscan_base = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(kscandat->kscan_base)) + return PTR_ERR(kscandat->kscan_base); /* Get the key scanner clock */ - kscandat->clk = clk_get(&pdev->dev, NULL); + kscandat->clk = devm_clk_get(&pdev->dev, NULL); if (IS_ERR(kscandat->clk)) { dev_err(&pdev->dev, "failed to get clock\n"); - error = PTR_ERR(kscandat->clk); - goto err_unmap; + return PTR_ERR(kscandat->clk); } /* Configure the key scanner */ error = clk_prepare_enable(kscandat->clk); if (error) - goto err_clk_put; + return error; writel(kscandat->deb_clks, LPC32XX_KS_DEB(kscandat->kscan_base)); writel(kscandat->scan_delay, LPC32XX_KS_SCAN_CTL(kscandat->kscan_base)); @@ -277,52 +258,20 @@ static int lpc32xx_kscan_probe(struct platform_device *pdev) writel(1, LPC32XX_KS_IRQ(kscandat->kscan_base)); clk_disable_unprepare(kscandat->clk); - error = request_irq(irq, lpc32xx_kscan_irq, 0, pdev->name, kscandat); + error = devm_request_irq(&pdev->dev, irq, lpc32xx_kscan_irq, 0, + pdev->name, kscandat); if (error) { dev_err(&pdev->dev, "failed to request irq\n"); - goto err_clk_put; + return error; } error = input_register_device(kscandat->input); if (error) { dev_err(&pdev->dev, "failed to register input device\n"); - goto err_free_irq; + return error; } platform_set_drvdata(pdev, kscandat); - return 0; - -err_free_irq: - free_irq(irq, kscandat); -err_clk_put: - clk_put(kscandat->clk); -err_unmap: - iounmap(kscandat->kscan_base); -err_release_memregion: - release_mem_region(kscandat->iores->start, - resource_size(kscandat->iores)); -err_free_input: - input_free_device(kscandat->input); -err_free_keymap: - kfree(kscandat->keymap); -err_free_mem: - kfree(kscandat); - - return error; -} - -static int lpc32xx_kscan_remove(struct platform_device *pdev) -{ - struct lpc32xx_kscan_drv *kscandat = platform_get_drvdata(pdev); - - free_irq(platform_get_irq(pdev, 0), kscandat); - clk_put(kscandat->clk); - iounmap(kscandat->kscan_base); - release_mem_region(kscandat->iores->start, - resource_size(kscandat->iores)); - input_unregister_device(kscandat->input); - kfree(kscandat->keymap); - kfree(kscandat); return 0; } @@ -378,7 +327,6 @@ MODULE_DEVICE_TABLE(of, lpc32xx_kscan_match); static struct platform_driver lpc32xx_kscan_driver = { .probe = lpc32xx_kscan_probe, - .remove = lpc32xx_kscan_remove, .driver = { .name = DRV_NAME, .pm = &lpc32xx_kscan_pm_ops, diff --git a/drivers/input/keyboard/mpr121_touchkey.c b/drivers/input/keyboard/mpr121_touchkey.c index 009c82256e8..3aa2ec45bca 100644 --- a/drivers/input/keyboard/mpr121_touchkey.c +++ b/drivers/input/keyboard/mpr121_touchkey.c @@ -214,13 +214,14 @@ static int mpr_touchkey_probe(struct i2c_client *client, return -EINVAL; } - mpr121 = kzalloc(sizeof(struct mpr121_touchkey), GFP_KERNEL); - input_dev = input_allocate_device(); - if (!mpr121 || !input_dev) { - dev_err(&client->dev, "Failed to allocate memory\n"); - error = -ENOMEM; - goto err_free_mem; - } + mpr121 = devm_kzalloc(&client->dev, sizeof(*mpr121), + GFP_KERNEL); + if (!mpr121) + return -ENOMEM; + + input_dev = devm_input_allocate_device(&client->dev); + if (!input_dev) + return -ENOMEM; mpr121->client = client; mpr121->input_dev = input_dev; @@ -243,44 +244,26 @@ static int mpr_touchkey_probe(struct i2c_client *client, error = mpr121_phys_init(pdata, mpr121, client); if (error) { dev_err(&client->dev, "Failed to init register\n"); - goto err_free_mem; + return error; } - error = request_threaded_irq(client->irq, NULL, + error = devm_request_threaded_irq(&client->dev, client->irq, NULL, mpr_touchkey_interrupt, IRQF_TRIGGER_FALLING | IRQF_ONESHOT, client->dev.driver->name, mpr121); if (error) { dev_err(&client->dev, "Failed to register interrupt\n"); - goto err_free_mem; + return error; } error = input_register_device(input_dev); if (error) - goto err_free_irq; + return error; i2c_set_clientdata(client, mpr121); device_init_wakeup(&client->dev, pdata->wakeup); return 0; - -err_free_irq: - free_irq(client->irq, mpr121); -err_free_mem: - input_free_device(input_dev); - kfree(mpr121); - return error; -} - -static int mpr_touchkey_remove(struct i2c_client *client) -{ - struct mpr121_touchkey *mpr121 = i2c_get_clientdata(client); - - free_irq(client->irq, mpr121); - input_unregister_device(mpr121->input_dev); - kfree(mpr121); - - return 0; } #ifdef CONFIG_PM_SLEEP @@ -327,7 +310,6 @@ static struct i2c_driver mpr_touchkey_driver = { }, .id_table = mpr121_id, .probe = mpr_touchkey_probe, - .remove = mpr_touchkey_remove, }; module_i2c_driver(mpr_touchkey_driver); diff --git a/drivers/input/keyboard/pxa27x_keypad.c b/drivers/input/keyboard/pxa27x_keypad.c index 6ab3e7c9632..a90d6bdc499 100644 --- a/drivers/input/keyboard/pxa27x_keypad.c +++ b/drivers/input/keyboard/pxa27x_keypad.c @@ -741,37 +741,27 @@ static int pxa27x_keypad_probe(struct platform_device *pdev) return -ENXIO; } - keypad = kzalloc(sizeof(struct pxa27x_keypad), GFP_KERNEL); - input_dev = input_allocate_device(); - if (!keypad || !input_dev) { - dev_err(&pdev->dev, "failed to allocate memory\n"); - error = -ENOMEM; - goto failed_free; - } + keypad = devm_kzalloc(&pdev->dev, sizeof(*keypad), + GFP_KERNEL); + if (!keypad) + return -ENOMEM; + + input_dev = devm_input_allocate_device(&pdev->dev); + if (!input_dev) + return -ENOMEM; keypad->pdata = pdata; keypad->input_dev = input_dev; keypad->irq = irq; - res = request_mem_region(res->start, resource_size(res), pdev->name); - if (res == NULL) { - dev_err(&pdev->dev, "failed to request I/O memory\n"); - error = -EBUSY; - goto failed_free; - } - - keypad->mmio_base = ioremap(res->start, resource_size(res)); - if (keypad->mmio_base == NULL) { - dev_err(&pdev->dev, "failed to remap I/O memory\n"); - error = -ENXIO; - goto failed_free_mem; - } + keypad->mmio_base = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(keypad->mmio_base)) + return PTR_ERR(keypad->mmio_base); - keypad->clk = clk_get(&pdev->dev, NULL); + keypad->clk = devm_clk_get(&pdev->dev, NULL); if (IS_ERR(keypad->clk)) { dev_err(&pdev->dev, "failed to get keypad clock\n"); - error = PTR_ERR(keypad->clk); - goto failed_free_io; + return PTR_ERR(keypad->clk); } input_dev->name = pdev->name; @@ -802,7 +792,7 @@ static int pxa27x_keypad_probe(struct platform_device *pdev) } if (error) { dev_err(&pdev->dev, "failed to build keycode\n"); - goto failed_put_clk; + return error; } keypad->row_shift = get_count_order(pdata->matrix_key_cols); @@ -812,61 +802,26 @@ static int pxa27x_keypad_probe(struct platform_device *pdev) input_dev->evbit[0] |= BIT_MASK(EV_REL); } - error = request_irq(irq, pxa27x_keypad_irq_handler, 0, - pdev->name, keypad); + error = devm_request_irq(&pdev->dev, irq, pxa27x_keypad_irq_handler, + 0, pdev->name, keypad); if (error) { dev_err(&pdev->dev, "failed to request IRQ\n"); - goto failed_put_clk; + return error; } /* Register the input device */ error = input_register_device(input_dev); if (error) { dev_err(&pdev->dev, "failed to register input device\n"); - goto failed_free_irq; + return error; } platform_set_drvdata(pdev, keypad); device_init_wakeup(&pdev->dev, 1); return 0; - -failed_free_irq: - free_irq(irq, keypad); -failed_put_clk: - clk_put(keypad->clk); -failed_free_io: - iounmap(keypad->mmio_base); -failed_free_mem: - release_mem_region(res->start, resource_size(res)); -failed_free: - input_free_device(input_dev); - kfree(keypad); - return error; } -static int pxa27x_keypad_remove(struct platform_device *pdev) -{ - struct pxa27x_keypad *keypad = platform_get_drvdata(pdev); - struct resource *res; - - free_irq(keypad->irq, keypad); - clk_put(keypad->clk); - - input_unregister_device(keypad->input_dev); - iounmap(keypad->mmio_base); - - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - release_mem_region(res->start, resource_size(res)); - - kfree(keypad); - - return 0; -} - -/* work with hotplug and coldplug */ -MODULE_ALIAS("platform:pxa27x-keypad"); - #ifdef CONFIG_OF static const struct of_device_id pxa27x_keypad_dt_match[] = { { .compatible = "marvell,pxa27x-keypad" }, @@ -877,7 +832,6 @@ MODULE_DEVICE_TABLE(of, pxa27x_keypad_dt_match); static struct platform_driver pxa27x_keypad_driver = { .probe = pxa27x_keypad_probe, - .remove = pxa27x_keypad_remove, .driver = { .name = "pxa27x-keypad", .of_match_table = of_match_ptr(pxa27x_keypad_dt_match), @@ -888,3 +842,5 @@ module_platform_driver(pxa27x_keypad_driver); MODULE_DESCRIPTION("PXA27x Keypad Controller Driver"); MODULE_LICENSE("GPL"); +/* work with hotplug and coldplug */ +MODULE_ALIAS("platform:pxa27x-keypad"); diff --git a/drivers/input/misc/88pm860x_onkey.c b/drivers/input/misc/88pm860x_onkey.c index cfdca6e9977..cc87443aa2e 100644 --- a/drivers/input/misc/88pm860x_onkey.c +++ b/drivers/input/misc/88pm860x_onkey.c @@ -112,8 +112,7 @@ static int pm860x_onkey_probe(struct platform_device *pdev) return 0; } -#ifdef CONFIG_PM_SLEEP -static int pm860x_onkey_suspend(struct device *dev) +static int __maybe_unused pm860x_onkey_suspend(struct device *dev) { struct platform_device *pdev = to_platform_device(dev); struct pm860x_chip *chip = dev_get_drvdata(pdev->dev.parent); @@ -122,7 +121,7 @@ static int pm860x_onkey_suspend(struct device *dev) chip->wakeup_flag |= 1 << PM8607_IRQ_ONKEY; return 0; } -static int pm860x_onkey_resume(struct device *dev) +static int __maybe_unused pm860x_onkey_resume(struct device *dev) { struct platform_device *pdev = to_platform_device(dev); struct pm860x_chip *chip = dev_get_drvdata(pdev->dev.parent); @@ -131,7 +130,6 @@ static int pm860x_onkey_resume(struct device *dev) chip->wakeup_flag &= ~(1 << PM8607_IRQ_ONKEY); return 0; } -#endif static SIMPLE_DEV_PM_OPS(pm860x_onkey_pm_ops, pm860x_onkey_suspend, pm860x_onkey_resume); diff --git a/drivers/input/misc/ad714x-i2c.c b/drivers/input/misc/ad714x-i2c.c index e0f522516ef..189bdc8e91a 100644 --- a/drivers/input/misc/ad714x-i2c.c +++ b/drivers/input/misc/ad714x-i2c.c @@ -13,17 +13,15 @@ #include <linux/pm.h> #include "ad714x.h" -#ifdef CONFIG_PM_SLEEP -static int ad714x_i2c_suspend(struct device *dev) +static int __maybe_unused ad714x_i2c_suspend(struct device *dev) { return ad714x_disable(i2c_get_clientdata(to_i2c_client(dev))); } -static int ad714x_i2c_resume(struct device *dev) +static int __maybe_unused ad714x_i2c_resume(struct device *dev) { return ad714x_enable(i2c_get_clientdata(to_i2c_client(dev))); } -#endif static SIMPLE_DEV_PM_OPS(ad714x_i2c_pm, ad714x_i2c_suspend, ad714x_i2c_resume); diff --git a/drivers/input/misc/ad714x-spi.c b/drivers/input/misc/ad714x-spi.c index 3a90b710e30..a79e50b58bf 100644 --- a/drivers/input/misc/ad714x-spi.c +++ b/drivers/input/misc/ad714x-spi.c @@ -16,17 +16,15 @@ #define AD714x_SPI_CMD_PREFIX 0xE000 /* bits 15:11 */ #define AD714x_SPI_READ BIT(10) -#ifdef CONFIG_PM_SLEEP -static int ad714x_spi_suspend(struct device *dev) +static int __maybe_unused ad714x_spi_suspend(struct device *dev) { return ad714x_disable(spi_get_drvdata(to_spi_device(dev))); } -static int ad714x_spi_resume(struct device *dev) +static int __maybe_unused ad714x_spi_resume(struct device *dev) { return ad714x_enable(spi_get_drvdata(to_spi_device(dev))); } -#endif static SIMPLE_DEV_PM_OPS(ad714x_spi_pm, ad714x_spi_suspend, ad714x_spi_resume); diff --git a/drivers/input/misc/adxl34x-i2c.c b/drivers/input/misc/adxl34x-i2c.c index 416f47ddcc9..470bfd6f083 100644 --- a/drivers/input/misc/adxl34x-i2c.c +++ b/drivers/input/misc/adxl34x-i2c.c @@ -105,8 +105,7 @@ static int adxl34x_i2c_remove(struct i2c_client *client) return adxl34x_remove(ac); } -#ifdef CONFIG_PM_SLEEP -static int adxl34x_i2c_suspend(struct device *dev) +static int __maybe_unused adxl34x_i2c_suspend(struct device *dev) { struct i2c_client *client = to_i2c_client(dev); struct adxl34x *ac = i2c_get_clientdata(client); @@ -116,7 +115,7 @@ static int adxl34x_i2c_suspend(struct device *dev) return 0; } -static int adxl34x_i2c_resume(struct device *dev) +static int __maybe_unused adxl34x_i2c_resume(struct device *dev) { struct i2c_client *client = to_i2c_client(dev); struct adxl34x *ac = i2c_get_clientdata(client); @@ -125,7 +124,6 @@ static int adxl34x_i2c_resume(struct device *dev) return 0; } -#endif static SIMPLE_DEV_PM_OPS(adxl34x_i2c_pm, adxl34x_i2c_suspend, adxl34x_i2c_resume); diff --git a/drivers/input/misc/adxl34x-spi.c b/drivers/input/misc/adxl34x-spi.c index 76dc0679d3b..da6e76b58da 100644 --- a/drivers/input/misc/adxl34x-spi.c +++ b/drivers/input/misc/adxl34x-spi.c @@ -94,8 +94,7 @@ static int adxl34x_spi_remove(struct spi_device *spi) return adxl34x_remove(ac); } -#ifdef CONFIG_PM_SLEEP -static int adxl34x_spi_suspend(struct device *dev) +static int __maybe_unused adxl34x_spi_suspend(struct device *dev) { struct spi_device *spi = to_spi_device(dev); struct adxl34x *ac = spi_get_drvdata(spi); @@ -105,7 +104,7 @@ static int adxl34x_spi_suspend(struct device *dev) return 0; } -static int adxl34x_spi_resume(struct device *dev) +static int __maybe_unused adxl34x_spi_resume(struct device *dev) { struct spi_device *spi = to_spi_device(dev); struct adxl34x *ac = spi_get_drvdata(spi); @@ -114,7 +113,6 @@ static int adxl34x_spi_resume(struct device *dev) return 0; } -#endif static SIMPLE_DEV_PM_OPS(adxl34x_spi_pm, adxl34x_spi_suspend, adxl34x_spi_resume); diff --git a/drivers/input/misc/drv260x.c b/drivers/input/misc/drv260x.c index cab87f5ce6d..a364e109ca7 100644 --- a/drivers/input/misc/drv260x.c +++ b/drivers/input/misc/drv260x.c @@ -639,8 +639,7 @@ static int drv260x_probe(struct i2c_client *client, return 0; } -#ifdef CONFIG_PM_SLEEP -static int drv260x_suspend(struct device *dev) +static int __maybe_unused drv260x_suspend(struct device *dev) { struct drv260x_data *haptics = dev_get_drvdata(dev); int ret = 0; @@ -672,7 +671,7 @@ out: return ret; } -static int drv260x_resume(struct device *dev) +static int __maybe_unused drv260x_resume(struct device *dev) { struct drv260x_data *haptics = dev_get_drvdata(dev); int ret = 0; @@ -702,7 +701,6 @@ out: mutex_unlock(&haptics->input_dev->mutex); return ret; } -#endif static SIMPLE_DEV_PM_OPS(drv260x_pm_ops, drv260x_suspend, drv260x_resume); diff --git a/drivers/input/misc/drv2667.c b/drivers/input/misc/drv2667.c index 0f437581cc0..a021744e608 100644 --- a/drivers/input/misc/drv2667.c +++ b/drivers/input/misc/drv2667.c @@ -406,8 +406,7 @@ static int drv2667_probe(struct i2c_client *client, return 0; } -#ifdef CONFIG_PM_SLEEP -static int drv2667_suspend(struct device *dev) +static int __maybe_unused drv2667_suspend(struct device *dev) { struct drv2667_data *haptics = dev_get_drvdata(dev); int ret = 0; @@ -436,7 +435,7 @@ out: return ret; } -static int drv2667_resume(struct device *dev) +static int __maybe_unused drv2667_resume(struct device *dev) { struct drv2667_data *haptics = dev_get_drvdata(dev); int ret = 0; @@ -464,7 +463,6 @@ out: mutex_unlock(&haptics->input_dev->mutex); return ret; } -#endif static SIMPLE_DEV_PM_OPS(drv2667_pm_ops, drv2667_suspend, drv2667_resume); diff --git a/drivers/input/misc/gp2ap002a00f.c b/drivers/input/misc/gp2ap002a00f.c index de21e317da3..0ac176d66a6 100644 --- a/drivers/input/misc/gp2ap002a00f.c +++ b/drivers/input/misc/gp2ap002a00f.c @@ -225,8 +225,7 @@ static int gp2a_remove(struct i2c_client *client) return 0; } -#ifdef CONFIG_PM_SLEEP -static int gp2a_suspend(struct device *dev) +static int __maybe_unused gp2a_suspend(struct device *dev) { struct i2c_client *client = to_i2c_client(dev); struct gp2a_data *dt = i2c_get_clientdata(client); @@ -244,7 +243,7 @@ static int gp2a_suspend(struct device *dev) return retval; } -static int gp2a_resume(struct device *dev) +static int __maybe_unused gp2a_resume(struct device *dev) { struct i2c_client *client = to_i2c_client(dev); struct gp2a_data *dt = i2c_get_clientdata(client); @@ -261,7 +260,6 @@ static int gp2a_resume(struct device *dev) return retval; } -#endif static SIMPLE_DEV_PM_OPS(gp2a_pm, gp2a_suspend, gp2a_resume); diff --git a/drivers/input/misc/ims-pcu.c b/drivers/input/misc/ims-pcu.c index afed8e2b2f9..ac1fa5f4458 100644 --- a/drivers/input/misc/ims-pcu.c +++ b/drivers/input/misc/ims-pcu.c @@ -1851,7 +1851,7 @@ static int ims_pcu_identify_type(struct ims_pcu *pcu, u8 *device_id) static int ims_pcu_init_application_mode(struct ims_pcu *pcu) { - static atomic_t device_no = ATOMIC_INIT(0); + static atomic_t device_no = ATOMIC_INIT(-1); const struct ims_pcu_device_info *info; int error; @@ -1882,7 +1882,7 @@ static int ims_pcu_init_application_mode(struct ims_pcu *pcu) } /* Device appears to be operable, complete initialization */ - pcu->device_no = atomic_inc_return(&device_no) - 1; + pcu->device_no = atomic_inc_return(&device_no); /* * PCU-B devices, both GEN_1 and GEN_2 do not have OFN sensor diff --git a/drivers/input/misc/kxtj9.c b/drivers/input/misc/kxtj9.c index d708478bc5b..6e29349da53 100644 --- a/drivers/input/misc/kxtj9.c +++ b/drivers/input/misc/kxtj9.c @@ -615,8 +615,7 @@ static int kxtj9_remove(struct i2c_client *client) return 0; } -#ifdef CONFIG_PM_SLEEP -static int kxtj9_suspend(struct device *dev) +static int __maybe_unused kxtj9_suspend(struct device *dev) { struct i2c_client *client = to_i2c_client(dev); struct kxtj9_data *tj9 = i2c_get_clientdata(client); @@ -631,7 +630,7 @@ static int kxtj9_suspend(struct device *dev) return 0; } -static int kxtj9_resume(struct device *dev) +static int __maybe_unused kxtj9_resume(struct device *dev) { struct i2c_client *client = to_i2c_client(dev); struct kxtj9_data *tj9 = i2c_get_clientdata(client); @@ -646,7 +645,6 @@ static int kxtj9_resume(struct device *dev) mutex_unlock(&input_dev->mutex); return retval; } -#endif static SIMPLE_DEV_PM_OPS(kxtj9_pm_ops, kxtj9_suspend, kxtj9_resume); diff --git a/drivers/input/misc/max77693-haptic.c b/drivers/input/misc/max77693-haptic.c index 034093ee63b..39e930c10eb 100644 --- a/drivers/input/misc/max77693-haptic.c +++ b/drivers/input/misc/max77693-haptic.c @@ -309,8 +309,7 @@ static int max77693_haptic_probe(struct platform_device *pdev) return 0; } -#ifdef CONFIG_PM_SLEEP -static int max77693_haptic_suspend(struct device *dev) +static int __maybe_unused max77693_haptic_suspend(struct device *dev) { struct platform_device *pdev = to_platform_device(dev); struct max77693_haptic *haptic = platform_get_drvdata(pdev); @@ -323,7 +322,7 @@ static int max77693_haptic_suspend(struct device *dev) return 0; } -static int max77693_haptic_resume(struct device *dev) +static int __maybe_unused max77693_haptic_resume(struct device *dev) { struct platform_device *pdev = to_platform_device(dev); struct max77693_haptic *haptic = platform_get_drvdata(pdev); @@ -335,7 +334,6 @@ static int max77693_haptic_resume(struct device *dev) return 0; } -#endif static SIMPLE_DEV_PM_OPS(max77693_haptic_pm_ops, max77693_haptic_suspend, max77693_haptic_resume); diff --git a/drivers/input/misc/max8925_onkey.c b/drivers/input/misc/max8925_onkey.c index 297e2a9169d..7c49b8d2389 100644 --- a/drivers/input/misc/max8925_onkey.c +++ b/drivers/input/misc/max8925_onkey.c @@ -133,8 +133,7 @@ static int max8925_onkey_probe(struct platform_device *pdev) return 0; } -#ifdef CONFIG_PM_SLEEP -static int max8925_onkey_suspend(struct device *dev) +static int __maybe_unused max8925_onkey_suspend(struct device *dev) { struct platform_device *pdev = to_platform_device(dev); struct max8925_onkey_info *info = platform_get_drvdata(pdev); @@ -148,7 +147,7 @@ static int max8925_onkey_suspend(struct device *dev) return 0; } -static int max8925_onkey_resume(struct device *dev) +static int __maybe_unused max8925_onkey_resume(struct device *dev) { struct platform_device *pdev = to_platform_device(dev); struct max8925_onkey_info *info = platform_get_drvdata(pdev); @@ -161,7 +160,6 @@ static int max8925_onkey_resume(struct device *dev) return 0; } -#endif static SIMPLE_DEV_PM_OPS(max8925_onkey_pm_ops, max8925_onkey_suspend, max8925_onkey_resume); diff --git a/drivers/input/misc/max8997_haptic.c b/drivers/input/misc/max8997_haptic.c index 5b3154edf82..d0f68728133 100644 --- a/drivers/input/misc/max8997_haptic.c +++ b/drivers/input/misc/max8997_haptic.c @@ -378,8 +378,7 @@ static int max8997_haptic_remove(struct platform_device *pdev) return 0; } -#ifdef CONFIG_PM_SLEEP -static int max8997_haptic_suspend(struct device *dev) +static int __maybe_unused max8997_haptic_suspend(struct device *dev) { struct platform_device *pdev = to_platform_device(dev); struct max8997_haptic *chip = platform_get_drvdata(pdev); @@ -388,7 +387,6 @@ static int max8997_haptic_suspend(struct device *dev) return 0; } -#endif static SIMPLE_DEV_PM_OPS(max8997_haptic_pm_ops, max8997_haptic_suspend, NULL); diff --git a/drivers/input/misc/palmas-pwrbutton.c b/drivers/input/misc/palmas-pwrbutton.c index 066c5ab632c..1f9b5ee9274 100644 --- a/drivers/input/misc/palmas-pwrbutton.c +++ b/drivers/input/misc/palmas-pwrbutton.c @@ -260,7 +260,6 @@ static int palmas_pwron_remove(struct platform_device *pdev) return 0; } -#ifdef CONFIG_PM_SLEEP /** * palmas_pwron_suspend() - suspend handler * @dev: power button device @@ -269,7 +268,7 @@ static int palmas_pwron_remove(struct platform_device *pdev) * * Return: 0 */ -static int palmas_pwron_suspend(struct device *dev) +static int __maybe_unused palmas_pwron_suspend(struct device *dev) { struct platform_device *pdev = to_platform_device(dev); struct palmas_pwron *pwron = platform_get_drvdata(pdev); @@ -290,7 +289,7 @@ static int palmas_pwron_suspend(struct device *dev) * * Return: 0 */ -static int palmas_pwron_resume(struct device *dev) +static int __maybe_unused palmas_pwron_resume(struct device *dev) { struct platform_device *pdev = to_platform_device(dev); struct palmas_pwron *pwron = platform_get_drvdata(pdev); @@ -300,7 +299,6 @@ static int palmas_pwron_resume(struct device *dev) return 0; } -#endif static SIMPLE_DEV_PM_OPS(palmas_pwron_pm, palmas_pwron_suspend, palmas_pwron_resume); diff --git a/drivers/input/misc/pm8xxx-vibrator.c b/drivers/input/misc/pm8xxx-vibrator.c index e9c77a95717..5113877153d 100644 --- a/drivers/input/misc/pm8xxx-vibrator.c +++ b/drivers/input/misc/pm8xxx-vibrator.c @@ -199,8 +199,7 @@ static int pm8xxx_vib_probe(struct platform_device *pdev) return 0; } -#ifdef CONFIG_PM_SLEEP -static int pm8xxx_vib_suspend(struct device *dev) +static int __maybe_unused pm8xxx_vib_suspend(struct device *dev) { struct pm8xxx_vib *vib = dev_get_drvdata(dev); @@ -209,7 +208,6 @@ static int pm8xxx_vib_suspend(struct device *dev) return 0; } -#endif static SIMPLE_DEV_PM_OPS(pm8xxx_vib_pm_ops, pm8xxx_vib_suspend, NULL); diff --git a/drivers/input/misc/pmic8xxx-pwrkey.c b/drivers/input/misc/pmic8xxx-pwrkey.c index cb799177cbd..c4ca20e6322 100644 --- a/drivers/input/misc/pmic8xxx-pwrkey.c +++ b/drivers/input/misc/pmic8xxx-pwrkey.c @@ -53,8 +53,7 @@ static irqreturn_t pwrkey_release_irq(int irq, void *_pwr) return IRQ_HANDLED; } -#ifdef CONFIG_PM_SLEEP -static int pmic8xxx_pwrkey_suspend(struct device *dev) +static int __maybe_unused pmic8xxx_pwrkey_suspend(struct device *dev) { struct pmic8xxx_pwrkey *pwrkey = dev_get_drvdata(dev); @@ -64,7 +63,7 @@ static int pmic8xxx_pwrkey_suspend(struct device *dev) return 0; } -static int pmic8xxx_pwrkey_resume(struct device *dev) +static int __maybe_unused pmic8xxx_pwrkey_resume(struct device *dev) { struct pmic8xxx_pwrkey *pwrkey = dev_get_drvdata(dev); @@ -73,7 +72,6 @@ static int pmic8xxx_pwrkey_resume(struct device *dev) return 0; } -#endif static SIMPLE_DEV_PM_OPS(pm8xxx_pwr_key_pm_ops, pmic8xxx_pwrkey_suspend, pmic8xxx_pwrkey_resume); diff --git a/drivers/input/misc/pwm-beeper.c b/drivers/input/misc/pwm-beeper.c index 294aa48bad5..a28ee70ff15 100644 --- a/drivers/input/misc/pwm-beeper.c +++ b/drivers/input/misc/pwm-beeper.c @@ -144,8 +144,7 @@ static int pwm_beeper_remove(struct platform_device *pdev) return 0; } -#ifdef CONFIG_PM_SLEEP -static int pwm_beeper_suspend(struct device *dev) +static int __maybe_unused pwm_beeper_suspend(struct device *dev) { struct pwm_beeper *beeper = dev_get_drvdata(dev); @@ -155,7 +154,7 @@ static int pwm_beeper_suspend(struct device *dev) return 0; } -static int pwm_beeper_resume(struct device *dev) +static int __maybe_unused pwm_beeper_resume(struct device *dev) { struct pwm_beeper *beeper = dev_get_drvdata(dev); @@ -170,6 +169,7 @@ static int pwm_beeper_resume(struct device *dev) static SIMPLE_DEV_PM_OPS(pwm_beeper_pm_ops, pwm_beeper_suspend, pwm_beeper_resume); +#ifdef CONFIG_PM_SLEEP #define PWM_BEEPER_PM_OPS (&pwm_beeper_pm_ops) #else #define PWM_BEEPER_PM_OPS NULL diff --git a/drivers/input/misc/sirfsoc-onkey.c b/drivers/input/misc/sirfsoc-onkey.c index 4faf9f8d124..9d5b89befe6 100644 --- a/drivers/input/misc/sirfsoc-onkey.c +++ b/drivers/input/misc/sirfsoc-onkey.c @@ -179,8 +179,7 @@ static int sirfsoc_pwrc_remove(struct platform_device *pdev) return 0; } -#ifdef CONFIG_PM_SLEEP -static int sirfsoc_pwrc_resume(struct device *dev) +static int __maybe_unused sirfsoc_pwrc_resume(struct device *dev) { struct sirfsoc_pwrc_drvdata *pwrcdrv = dev_get_drvdata(dev); struct input_dev *input = pwrcdrv->input; @@ -196,7 +195,6 @@ static int sirfsoc_pwrc_resume(struct device *dev) return 0; } -#endif static SIMPLE_DEV_PM_OPS(sirfsoc_pwrc_pm_ops, NULL, sirfsoc_pwrc_resume); diff --git a/drivers/input/misc/twl4030-vibra.c b/drivers/input/misc/twl4030-vibra.c index ccd6dd18f8f..fc17b9592f5 100644 --- a/drivers/input/misc/twl4030-vibra.c +++ b/drivers/input/misc/twl4030-vibra.c @@ -157,8 +157,7 @@ static void twl4030_vibra_close(struct input_dev *input) } /*** Module ***/ -#ifdef CONFIG_PM_SLEEP -static int twl4030_vibra_suspend(struct device *dev) +static int __maybe_unused twl4030_vibra_suspend(struct device *dev) { struct platform_device *pdev = to_platform_device(dev); struct vibra_info *info = platform_get_drvdata(pdev); @@ -169,12 +168,11 @@ static int twl4030_vibra_suspend(struct device *dev) return 0; } -static int twl4030_vibra_resume(struct device *dev) +static int __maybe_unused twl4030_vibra_resume(struct device *dev) { vibra_disable_leds(); return 0; } -#endif static SIMPLE_DEV_PM_OPS(twl4030_vibra_pm_ops, twl4030_vibra_suspend, twl4030_vibra_resume); diff --git a/drivers/input/misc/twl6040-vibra.c b/drivers/input/misc/twl6040-vibra.c index 96e0e0c0ccb..0e0d094df2e 100644 --- a/drivers/input/misc/twl6040-vibra.c +++ b/drivers/input/misc/twl6040-vibra.c @@ -236,8 +236,7 @@ static void twl6040_vibra_close(struct input_dev *input) mutex_unlock(&info->mutex); } -#ifdef CONFIG_PM_SLEEP -static int twl6040_vibra_suspend(struct device *dev) +static int __maybe_unused twl6040_vibra_suspend(struct device *dev) { struct platform_device *pdev = to_platform_device(dev); struct vibra_info *info = platform_get_drvdata(pdev); @@ -251,7 +250,6 @@ static int twl6040_vibra_suspend(struct device *dev) return 0; } -#endif static SIMPLE_DEV_PM_OPS(twl6040_vibra_pm_ops, twl6040_vibra_suspend, NULL); diff --git a/drivers/input/mouse/Kconfig b/drivers/input/mouse/Kconfig index 366fc7ad5eb..d8b46b0f2db 100644 --- a/drivers/input/mouse/Kconfig +++ b/drivers/input/mouse/Kconfig @@ -215,6 +215,36 @@ config MOUSE_CYAPA To compile this driver as a module, choose M here: the module will be called cyapa. +config MOUSE_ELAN_I2C + tristate "ELAN I2C Touchpad support" + depends on I2C + help + This driver adds support for Elan I2C/SMbus Trackpads. + + Say Y here if you have a ELAN I2C/SMbus Touchpad. + + To compile this driver as a module, choose M here: the module will be + called elan_i2c. + +config MOUSE_ELAN_I2C_I2C + bool "Enable I2C support" + depends on MOUSE_ELAN_I2C + default y + help + Say Y here if Elan Touchpad in your system is connected to + a standard I2C controller. + + If unsure, say Y. + +config MOUSE_ELAN_I2C_SMBUS + bool "Enable SMbus support" + depends on MOUSE_ELAN_I2C + help + Say Y here if Elan Touchpad in your system is connected to + a SMbus adapter. + + If unsure, say Y. + config MOUSE_INPORT tristate "InPort/MS/ATIXL busmouse" depends on ISA diff --git a/drivers/input/mouse/Makefile b/drivers/input/mouse/Makefile index dda507f8b3a..560003dcac3 100644 --- a/drivers/input/mouse/Makefile +++ b/drivers/input/mouse/Makefile @@ -9,6 +9,7 @@ obj-$(CONFIG_MOUSE_APPLETOUCH) += appletouch.o obj-$(CONFIG_MOUSE_ATARI) += atarimouse.o obj-$(CONFIG_MOUSE_BCM5974) += bcm5974.o obj-$(CONFIG_MOUSE_CYAPA) += cyapa.o +obj-$(CONFIG_MOUSE_ELAN_I2C) += elan_i2c.o obj-$(CONFIG_MOUSE_GPIO) += gpio_mouse.o obj-$(CONFIG_MOUSE_INPORT) += inport.o obj-$(CONFIG_MOUSE_LOGIBM) += logibm.o @@ -34,3 +35,7 @@ psmouse-$(CONFIG_MOUSE_PS2_SENTELIC) += sentelic.o psmouse-$(CONFIG_MOUSE_PS2_TRACKPOINT) += trackpoint.o psmouse-$(CONFIG_MOUSE_PS2_TOUCHKIT) += touchkit_ps2.o psmouse-$(CONFIG_MOUSE_PS2_CYPRESS) += cypress_ps2.o + +elan_i2c-objs := elan_i2c_core.o +elan_i2c-$(CONFIG_MOUSE_ELAN_I2C_I2C) += elan_i2c_i2c.o +elan_i2c-$(CONFIG_MOUSE_ELAN_I2C_SMBUS) += elan_i2c_smbus.o diff --git a/drivers/input/mouse/cyapa.c b/drivers/input/mouse/cyapa.c index b409c3d7d4f..1bece8cad46 100644 --- a/drivers/input/mouse/cyapa.c +++ b/drivers/input/mouse/cyapa.c @@ -6,7 +6,7 @@ * Daniel Kurtz <djkurtz@chromium.org> * Benson Leung <bleung@chromium.org> * - * Copyright (C) 2011-2012 Cypress Semiconductor, Inc. + * Copyright (C) 2011-2014 Cypress Semiconductor, Inc. * Copyright (C) 2011-2012 Google, Inc. * * This file is subject to the terms and conditions of the GNU General Public @@ -206,7 +206,6 @@ struct cyapa { struct i2c_client *client; struct input_dev *input; char phys[32]; /* device physical location */ - int irq; bool irq_wake; /* irq wake is enabled */ bool smbus; @@ -422,8 +421,8 @@ static ssize_t cyapa_read_block(struct cyapa *cyapa, u8 cmd_idx, u8 *values) */ static int cyapa_get_state(struct cyapa *cyapa) { - int ret; u8 status[BL_STATUS_SIZE]; + int error; cyapa->state = CYAPA_STATE_NO_DEVICE; @@ -433,18 +432,18 @@ static int cyapa_get_state(struct cyapa *cyapa) * If the device is in operation mode, this will be the DATA regs. * */ - ret = cyapa_i2c_reg_read_block(cyapa, BL_HEAD_OFFSET, BL_STATUS_SIZE, - status); + error = cyapa_i2c_reg_read_block(cyapa, BL_HEAD_OFFSET, BL_STATUS_SIZE, + status); /* * On smbus systems in OP mode, the i2c_reg_read will fail with * -ETIMEDOUT. In this case, try again using the smbus equivalent * command. This should return a BL_HEAD indicating CYAPA_STATE_OP. */ - if (cyapa->smbus && (ret == -ETIMEDOUT || ret == -ENXIO)) - ret = cyapa_read_block(cyapa, CYAPA_CMD_BL_STATUS, status); + if (cyapa->smbus && (error == -ETIMEDOUT || error == -ENXIO)) + error = cyapa_read_block(cyapa, CYAPA_CMD_BL_STATUS, status); - if (ret != BL_STATUS_SIZE) + if (error != BL_STATUS_SIZE) goto error; if ((status[REG_OP_STATUS] & OP_STATUS_SRC) == OP_STATUS_SRC) { @@ -454,7 +453,7 @@ static int cyapa_get_state(struct cyapa *cyapa) cyapa->state = CYAPA_STATE_OP; break; default: - ret = -EAGAIN; + error = -EAGAIN; goto error; } } else { @@ -468,7 +467,7 @@ static int cyapa_get_state(struct cyapa *cyapa) return 0; error: - return (ret < 0) ? ret : -EAGAIN; + return (error < 0) ? error : -EAGAIN; } /* @@ -487,31 +486,31 @@ error: */ static int cyapa_poll_state(struct cyapa *cyapa, unsigned int timeout) { - int ret; + int error; int tries = timeout / 100; - ret = cyapa_get_state(cyapa); - while ((ret || cyapa->state >= CYAPA_STATE_BL_BUSY) && tries--) { + error = cyapa_get_state(cyapa); + while ((error || cyapa->state >= CYAPA_STATE_BL_BUSY) && tries--) { msleep(100); - ret = cyapa_get_state(cyapa); + error = cyapa_get_state(cyapa); } - return (ret == -EAGAIN || ret == -ETIMEDOUT) ? -ETIMEDOUT : ret; + return (error == -EAGAIN || error == -ETIMEDOUT) ? -ETIMEDOUT : error; } static int cyapa_bl_deactivate(struct cyapa *cyapa) { - int ret; + int error; - ret = cyapa_i2c_reg_write_block(cyapa, 0, sizeof(bl_deactivate), - bl_deactivate); - if (ret < 0) - return ret; + error = cyapa_i2c_reg_write_block(cyapa, 0, sizeof(bl_deactivate), + bl_deactivate); + if (error) + return error; /* wait for bootloader to switch to idle state; should take < 100ms */ msleep(100); - ret = cyapa_poll_state(cyapa, 500); - if (ret < 0) - return ret; + error = cyapa_poll_state(cyapa, 500); + if (error) + return error; if (cyapa->state != CYAPA_STATE_BL_IDLE) return -EAGAIN; return 0; @@ -532,11 +531,11 @@ static int cyapa_bl_deactivate(struct cyapa *cyapa) */ static int cyapa_bl_exit(struct cyapa *cyapa) { - int ret; + int error; - ret = cyapa_i2c_reg_write_block(cyapa, 0, sizeof(bl_exit), bl_exit); - if (ret < 0) - return ret; + error = cyapa_i2c_reg_write_block(cyapa, 0, sizeof(bl_exit), bl_exit); + if (error) + return error; /* * Wait for bootloader to exit, and operation mode to start. @@ -548,9 +547,9 @@ static int cyapa_bl_exit(struct cyapa *cyapa) * updated to new firmware, it must first calibrate its sensors, which * can take up to an additional 2 seconds. */ - ret = cyapa_poll_state(cyapa, 2000); - if (ret < 0) - return ret; + error = cyapa_poll_state(cyapa, 2000); + if (error < 0) + return error; if (cyapa->state != CYAPA_STATE_OP) return -EAGAIN; @@ -577,10 +576,13 @@ static int cyapa_set_power_mode(struct cyapa *cyapa, u8 power_mode) power = ret & ~PWR_MODE_MASK; power |= power_mode & PWR_MODE_MASK; ret = cyapa_write_byte(cyapa, CYAPA_CMD_POWER_MODE, power); - if (ret < 0) + if (ret < 0) { dev_err(dev, "failed to set power_mode 0x%02x err = %d\n", power_mode, ret); - return ret; + return ret; + } + + return 0; } static int cyapa_get_query_data(struct cyapa *cyapa) @@ -637,28 +639,28 @@ static int cyapa_check_is_operational(struct cyapa *cyapa) { struct device *dev = &cyapa->client->dev; static const char unique_str[] = "CYTRA"; - int ret; + int error; - ret = cyapa_poll_state(cyapa, 2000); - if (ret < 0) - return ret; + error = cyapa_poll_state(cyapa, 2000); + if (error) + return error; switch (cyapa->state) { case CYAPA_STATE_BL_ACTIVE: - ret = cyapa_bl_deactivate(cyapa); - if (ret) - return ret; + error = cyapa_bl_deactivate(cyapa); + if (error) + return error; /* Fallthrough state */ case CYAPA_STATE_BL_IDLE: - ret = cyapa_bl_exit(cyapa); - if (ret) - return ret; + error = cyapa_bl_exit(cyapa); + if (error) + return error; /* Fallthrough state */ case CYAPA_STATE_OP: - ret = cyapa_get_query_data(cyapa); - if (ret < 0) - return ret; + error = cyapa_get_query_data(cyapa); + if (error) + return error; /* only support firmware protocol gen3 */ if (cyapa->gen != CYAPA_GEN3) { @@ -753,18 +755,42 @@ static u8 cyapa_check_adapter_functionality(struct i2c_client *client) return ret; } +static int cyapa_open(struct input_dev *input) +{ + struct cyapa *cyapa = input_get_drvdata(input); + struct i2c_client *client = cyapa->client; + int error; + + error = cyapa_set_power_mode(cyapa, PWR_MODE_FULL_ACTIVE); + if (error) { + dev_err(&client->dev, "set active power failed: %d\n", error); + return error; + } + + enable_irq(client->irq); + return 0; +} + +static void cyapa_close(struct input_dev *input) +{ + struct cyapa *cyapa = input_get_drvdata(input); + + disable_irq(cyapa->client->irq); + cyapa_set_power_mode(cyapa, PWR_MODE_OFF); +} + static int cyapa_create_input_dev(struct cyapa *cyapa) { struct device *dev = &cyapa->client->dev; - int ret; struct input_dev *input; + int error; if (!cyapa->physical_size_x || !cyapa->physical_size_y) return -EINVAL; - input = cyapa->input = input_allocate_device(); + input = devm_input_allocate_device(dev); if (!input) { - dev_err(dev, "allocate memory for input device failed\n"); + dev_err(dev, "failed to allocate memory for input device.\n"); return -ENOMEM; } @@ -772,14 +798,17 @@ static int cyapa_create_input_dev(struct cyapa *cyapa) input->phys = cyapa->phys; input->id.bustype = BUS_I2C; input->id.version = 1; - input->id.product = 0; /* means any product in eventcomm. */ + input->id.product = 0; /* Means any product in eventcomm. */ input->dev.parent = &cyapa->client->dev; + input->open = cyapa_open; + input->close = cyapa_close; + input_set_drvdata(input, cyapa); __set_bit(EV_ABS, input->evbit); - /* finger position */ + /* Finger position */ input_set_abs_params(input, ABS_MT_POSITION_X, 0, cyapa->max_abs_x, 0, 0); input_set_abs_params(input, ABS_MT_POSITION_Y, 0, cyapa->max_abs_y, 0, @@ -801,35 +830,25 @@ static int cyapa_create_input_dev(struct cyapa *cyapa) if (cyapa->btn_capability == CAPABILITY_LEFT_BTN_MASK) __set_bit(INPUT_PROP_BUTTONPAD, input->propbit); - /* handle pointer emulation and unused slots in core */ - ret = input_mt_init_slots(input, CYAPA_MAX_MT_SLOTS, - INPUT_MT_POINTER | INPUT_MT_DROP_UNUSED); - if (ret) { - dev_err(dev, "allocate memory for MT slots failed, %d\n", ret); - goto err_free_device; + /* Handle pointer emulation and unused slots in core */ + error = input_mt_init_slots(input, CYAPA_MAX_MT_SLOTS, + INPUT_MT_POINTER | INPUT_MT_DROP_UNUSED); + if (error) { + dev_err(dev, "failed to initialize MT slots: %d\n", error); + return error; } - /* Register the device in input subsystem */ - ret = input_register_device(input); - if (ret) { - dev_err(dev, "input device register failed, %d\n", ret); - goto err_free_device; - } + cyapa->input = input; return 0; - -err_free_device: - input_free_device(input); - cyapa->input = NULL; - return ret; } static int cyapa_probe(struct i2c_client *client, const struct i2c_device_id *dev_id) { - int ret; - u8 adapter_func; - struct cyapa *cyapa; struct device *dev = &client->dev; + struct cyapa *cyapa; + u8 adapter_func; + int error; adapter_func = cyapa_check_adapter_functionality(client); if (adapter_func == CYAPA_ADAPTER_FUNC_NONE) { @@ -837,11 +856,9 @@ static int cyapa_probe(struct i2c_client *client, return -EIO; } - cyapa = kzalloc(sizeof(struct cyapa), GFP_KERNEL); - if (!cyapa) { - dev_err(dev, "allocate memory for cyapa failed\n"); + cyapa = devm_kzalloc(dev, sizeof(struct cyapa), GFP_KERNEL); + if (!cyapa) return -ENOMEM; - } cyapa->gen = CYAPA_GEN3; cyapa->client = client; @@ -852,67 +869,61 @@ static int cyapa_probe(struct i2c_client *client, /* i2c isn't supported, use smbus */ if (adapter_func == CYAPA_ADAPTER_FUNC_SMBUS) cyapa->smbus = true; + cyapa->state = CYAPA_STATE_NO_DEVICE; - ret = cyapa_check_is_operational(cyapa); - if (ret) { - dev_err(dev, "device not operational, %d\n", ret); - goto err_mem_free; - } - ret = cyapa_create_input_dev(cyapa); - if (ret) { - dev_err(dev, "create input_dev instance failed, %d\n", ret); - goto err_mem_free; + error = cyapa_check_is_operational(cyapa); + if (error) { + dev_err(dev, "device not operational, %d\n", error); + return error; } - ret = cyapa_set_power_mode(cyapa, PWR_MODE_FULL_ACTIVE); - if (ret) { - dev_err(dev, "set active power failed, %d\n", ret); - goto err_unregister_device; + /* Power down the device until we need it */ + error = cyapa_set_power_mode(cyapa, PWR_MODE_OFF); + if (error) { + dev_err(dev, "failed to quiesce the device: %d\n", error); + return error; } - cyapa->irq = client->irq; - ret = request_threaded_irq(cyapa->irq, - NULL, - cyapa_irq, - IRQF_TRIGGER_FALLING | IRQF_ONESHOT, - "cyapa", - cyapa); - if (ret) { - dev_err(dev, "IRQ request failed: %d\n, ", ret); - goto err_unregister_device; + error = cyapa_create_input_dev(cyapa); + if (error) + return error; + + error = devm_request_threaded_irq(dev, client->irq, + NULL, cyapa_irq, + IRQF_TRIGGER_FALLING | IRQF_ONESHOT, + "cyapa", cyapa); + if (error) { + dev_err(dev, "failed to request threaded irq: %d\n", error); + return error; } - return 0; + /* Disable IRQ until the device is opened */ + disable_irq(client->irq); -err_unregister_device: - input_unregister_device(cyapa->input); -err_mem_free: - kfree(cyapa); - - return ret; -} - -static int cyapa_remove(struct i2c_client *client) -{ - struct cyapa *cyapa = i2c_get_clientdata(client); - - free_irq(cyapa->irq, cyapa); - input_unregister_device(cyapa->input); - cyapa_set_power_mode(cyapa, PWR_MODE_OFF); - kfree(cyapa); + /* Register the device in input subsystem */ + error = input_register_device(cyapa->input); + if (error) { + dev_err(dev, "failed to register input device: %d\n", error); + return error; + } return 0; } -#ifdef CONFIG_PM_SLEEP -static int cyapa_suspend(struct device *dev) +static int __maybe_unused cyapa_suspend(struct device *dev) { - int ret; + struct i2c_client *client = to_i2c_client(dev); + struct cyapa *cyapa = i2c_get_clientdata(client); + struct input_dev *input = cyapa->input; u8 power_mode; - struct cyapa *cyapa = dev_get_drvdata(dev); + int error; - disable_irq(cyapa->irq); + error = mutex_lock_interruptible(&input->mutex); + if (error) + return error; + + disable_irq(client->irq); /* * Set trackpad device to idle mode if wakeup is allowed, @@ -920,31 +931,44 @@ static int cyapa_suspend(struct device *dev) */ power_mode = device_may_wakeup(dev) ? PWR_MODE_IDLE : PWR_MODE_OFF; - ret = cyapa_set_power_mode(cyapa, power_mode); - if (ret < 0) - dev_err(dev, "set power mode failed, %d\n", ret); + error = cyapa_set_power_mode(cyapa, power_mode); + if (error) + dev_err(dev, "resume: set power mode to %d failed: %d\n", + power_mode, error); if (device_may_wakeup(dev)) - cyapa->irq_wake = (enable_irq_wake(cyapa->irq) == 0); + cyapa->irq_wake = (enable_irq_wake(client->irq) == 0); + + mutex_unlock(&input->mutex); + return 0; } -static int cyapa_resume(struct device *dev) +static int __maybe_unused cyapa_resume(struct device *dev) { - int ret; - struct cyapa *cyapa = dev_get_drvdata(dev); + struct i2c_client *client = to_i2c_client(dev); + struct cyapa *cyapa = i2c_get_clientdata(client); + struct input_dev *input = cyapa->input; + u8 power_mode; + int error; + + mutex_lock(&input->mutex); if (device_may_wakeup(dev) && cyapa->irq_wake) - disable_irq_wake(cyapa->irq); + disable_irq_wake(client->irq); + + power_mode = input->users ? PWR_MODE_FULL_ACTIVE : PWR_MODE_OFF; + error = cyapa_set_power_mode(cyapa, PWR_MODE_FULL_ACTIVE); + if (error) + dev_warn(dev, "resume: set power mode to %d failed: %d\n", + power_mode, error); + + enable_irq(client->irq); - ret = cyapa_set_power_mode(cyapa, PWR_MODE_FULL_ACTIVE); - if (ret) - dev_warn(dev, "resume active power failed, %d\n", ret); + mutex_unlock(&input->mutex); - enable_irq(cyapa->irq); return 0; } -#endif /* CONFIG_PM_SLEEP */ static SIMPLE_DEV_PM_OPS(cyapa_pm_ops, cyapa_suspend, cyapa_resume); @@ -962,7 +986,6 @@ static struct i2c_driver cyapa_driver = { }, .probe = cyapa_probe, - .remove = cyapa_remove, .id_table = cyapa_id_table, }; diff --git a/drivers/input/mouse/elan_i2c.h b/drivers/input/mouse/elan_i2c.h new file mode 100644 index 00000000000..2e838626205 --- /dev/null +++ b/drivers/input/mouse/elan_i2c.h @@ -0,0 +1,86 @@ +/* + * Elan I2C/SMBus Touchpad driver + * + * Copyright (c) 2013 ELAN Microelectronics Corp. + * + * Author: æž—æ”¿ç¶ (Duson Lin) <dusonlin@emc.com.tw> + * Version: 1.5.5 + * + * Based on cyapa driver: + * copyright (c) 2011-2012 Cypress Semiconductor, Inc. + * copyright (c) 2011-2012 Google, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * Trademarks are the property of their respective owners. + */ + +#ifndef _ELAN_I2C_H +#define _ELAN_i2C_H + +#include <linux/types.h> + +#define ETP_ENABLE_ABS 0x0001 +#define ETP_ENABLE_CALIBRATE 0x0002 +#define ETP_DISABLE_CALIBRATE 0x0000 +#define ETP_DISABLE_POWER 0x0001 + +/* IAP Firmware handling */ +#define ETP_FW_NAME "elan_i2c.bin" +#define ETP_IAP_START_ADDR 0x0083 +#define ETP_FW_IAP_PAGE_ERR (1 << 5) +#define ETP_FW_IAP_INTF_ERR (1 << 4) +#define ETP_FW_PAGE_SIZE 64 +#define ETP_FW_PAGE_COUNT 768 +#define ETP_FW_SIZE (ETP_FW_PAGE_SIZE * ETP_FW_PAGE_COUNT) + +struct i2c_client; +struct completion; + +enum tp_mode { + IAP_MODE = 1, + MAIN_MODE +}; + +struct elan_transport_ops { + int (*initialize)(struct i2c_client *client); + int (*sleep_control)(struct i2c_client *, bool sleep); + int (*power_control)(struct i2c_client *, bool enable); + int (*set_mode)(struct i2c_client *client, u8 mode); + + int (*calibrate)(struct i2c_client *client); + int (*calibrate_result)(struct i2c_client *client, u8 *val); + + int (*get_baseline_data)(struct i2c_client *client, + bool max_baseliune, u8 *value); + + int (*get_version)(struct i2c_client *client, bool iap, u8 *version); + int (*get_sm_version)(struct i2c_client *client, u8 *version); + int (*get_checksum)(struct i2c_client *client, bool iap, u16 *csum); + int (*get_product_id)(struct i2c_client *client, u8 *id); + + int (*get_max)(struct i2c_client *client, + unsigned int *max_x, unsigned int *max_y); + int (*get_resolution)(struct i2c_client *client, + u8 *hw_res_x, u8 *hw_res_y); + int (*get_num_traces)(struct i2c_client *client, + unsigned int *x_tracenum, + unsigned int *y_tracenum); + + int (*iap_get_mode)(struct i2c_client *client, enum tp_mode *mode); + int (*iap_reset)(struct i2c_client *client); + + int (*prepare_fw_update)(struct i2c_client *client); + int (*write_fw_block)(struct i2c_client *client, + const u8 *page, u16 checksum, int idx); + int (*finish_fw_update)(struct i2c_client *client, + struct completion *reset_done); + + int (*get_report)(struct i2c_client *client, u8 *report); +}; + +extern const struct elan_transport_ops elan_smbus_ops, elan_i2c_ops; + +#endif /* _ELAN_I2C_H */ diff --git a/drivers/input/mouse/elan_i2c_core.c b/drivers/input/mouse/elan_i2c_core.c new file mode 100644 index 00000000000..0cb2be48d53 --- /dev/null +++ b/drivers/input/mouse/elan_i2c_core.c @@ -0,0 +1,1137 @@ +/* + * Elan I2C/SMBus Touchpad driver + * + * Copyright (c) 2013 ELAN Microelectronics Corp. + * + * Author: æž—æ”¿ç¶ (Duson Lin) <dusonlin@emc.com.tw> + * Version: 1.5.5 + * + * Based on cyapa driver: + * copyright (c) 2011-2012 Cypress Semiconductor, Inc. + * copyright (c) 2011-2012 Google, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * Trademarks are the property of their respective owners. + */ + +#include <linux/acpi.h> +#include <linux/delay.h> +#include <linux/device.h> +#include <linux/firmware.h> +#include <linux/i2c.h> +#include <linux/init.h> +#include <linux/input/mt.h> +#include <linux/interrupt.h> +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/input.h> +#include <linux/uaccess.h> +#include <linux/jiffies.h> +#include <linux/completion.h> +#include <linux/of.h> +#include <linux/regulator/consumer.h> +#include <asm/unaligned.h> + +#include "elan_i2c.h" + +#define DRIVER_NAME "elan_i2c" +#define ELAN_DRIVER_VERSION "1.5.5" +#define ETP_PRESSURE_OFFSET 25 +#define ETP_MAX_PRESSURE 255 +#define ETP_FWIDTH_REDUCE 90 +#define ETP_FINGER_WIDTH 15 +#define ETP_RETRY_COUNT 3 + +#define ETP_MAX_FINGERS 5 +#define ETP_FINGER_DATA_LEN 5 +#define ETP_REPORT_ID 0x5D +#define ETP_REPORT_ID_OFFSET 2 +#define ETP_TOUCH_INFO_OFFSET 3 +#define ETP_FINGER_DATA_OFFSET 4 +#define ETP_MAX_REPORT_LEN 34 + +/* The main device structure */ +struct elan_tp_data { + struct i2c_client *client; + struct input_dev *input; + struct regulator *vcc; + + const struct elan_transport_ops *ops; + + /* for fw update */ + struct completion fw_completion; + bool in_fw_update; + + struct mutex sysfs_mutex; + + unsigned int max_x; + unsigned int max_y; + unsigned int width_x; + unsigned int width_y; + unsigned int x_res; + unsigned int y_res; + + u8 product_id; + u8 fw_version; + u8 sm_version; + u8 iap_version; + u16 fw_checksum; + + u8 mode; + + bool irq_wake; + + u8 min_baseline; + u8 max_baseline; + bool baseline_ready; +}; + +static int elan_enable_power(struct elan_tp_data *data) +{ + int repeat = ETP_RETRY_COUNT; + int error; + + error = regulator_enable(data->vcc); + if (error) { + dev_err(&data->client->dev, + "Failed to enable regulator: %d\n", error); + return error; + } + + do { + error = data->ops->power_control(data->client, true); + if (error >= 0) + return 0; + + msleep(30); + } while (--repeat > 0); + + return error; +} + +static int elan_disable_power(struct elan_tp_data *data) +{ + int repeat = ETP_RETRY_COUNT; + int error; + + do { + error = data->ops->power_control(data->client, false); + if (!error) { + error = regulator_disable(data->vcc); + if (error) { + dev_err(&data->client->dev, + "Failed to disable regulator: %d\n", + error); + /* Attempt to power the chip back up */ + data->ops->power_control(data->client, true); + break; + } + + return 0; + } + + msleep(30); + } while (--repeat > 0); + + return error; +} + +static int elan_sleep(struct elan_tp_data *data) +{ + int repeat = ETP_RETRY_COUNT; + int error; + + do { + error = data->ops->sleep_control(data->client, true); + if (!error) + return 0; + + msleep(30); + } while (--repeat > 0); + + return error; +} + +static int __elan_initialize(struct elan_tp_data *data) +{ + struct i2c_client *client = data->client; + int error; + + error = data->ops->initialize(client); + if (error) { + dev_err(&client->dev, "device initialize failed: %d\n", error); + return error; + } + + data->mode |= ETP_ENABLE_ABS; + error = data->ops->set_mode(client, data->mode); + if (error) { + dev_err(&client->dev, + "failed to switch to absolute mode: %d\n", error); + return error; + } + + error = data->ops->sleep_control(client, false); + if (error) { + dev_err(&client->dev, + "failed to wake device up: %d\n", error); + return error; + } + + return 0; +} + +static int elan_initialize(struct elan_tp_data *data) +{ + int repeat = ETP_RETRY_COUNT; + int error; + + do { + error = __elan_initialize(data); + if (!error) + return 0; + + repeat--; + msleep(30); + } while (--repeat > 0); + + return error; +} + +static int elan_query_device_info(struct elan_tp_data *data) +{ + int error; + + error = data->ops->get_product_id(data->client, &data->product_id); + if (error) + return error; + + error = data->ops->get_version(data->client, false, &data->fw_version); + if (error) + return error; + + error = data->ops->get_checksum(data->client, false, + &data->fw_checksum); + if (error) + return error; + + error = data->ops->get_sm_version(data->client, &data->sm_version); + if (error) + return error; + + error = data->ops->get_version(data->client, true, &data->iap_version); + if (error) + return error; + + return 0; +} + +static unsigned int elan_convert_resolution(u8 val) +{ + /* + * (value from firmware) * 10 + 790 = dpi + * + * We also have to convert dpi to dots/mm (*10/254 to avoid floating + * point). + */ + + return ((int)(char)val * 10 + 790) * 10 / 254; +} + +static int elan_query_device_parameters(struct elan_tp_data *data) +{ + unsigned int x_traces, y_traces; + u8 hw_x_res, hw_y_res; + int error; + + error = data->ops->get_max(data->client, &data->max_x, &data->max_y); + if (error) + return error; + + error = data->ops->get_num_traces(data->client, &x_traces, &y_traces); + if (error) + return error; + + data->width_x = data->max_x / x_traces; + data->width_y = data->max_y / y_traces; + + error = data->ops->get_resolution(data->client, &hw_x_res, &hw_y_res); + if (error) + return error; + + data->x_res = elan_convert_resolution(hw_x_res); + data->y_res = elan_convert_resolution(hw_y_res); + + return 0; +} + +/* + ********************************************************** + * IAP firmware updater related routines + ********************************************************** + */ +static int elan_write_fw_block(struct elan_tp_data *data, + const u8 *page, u16 checksum, int idx) +{ + int retry = ETP_RETRY_COUNT; + int error; + + do { + error = data->ops->write_fw_block(data->client, + page, checksum, idx); + if (!error) + return 0; + + dev_dbg(&data->client->dev, + "IAP retrying page %d (error: %d)\n", idx, error); + } while (--retry > 0); + + return error; +} + +static int __elan_update_firmware(struct elan_tp_data *data, + const struct firmware *fw) +{ + struct i2c_client *client = data->client; + struct device *dev = &client->dev; + int i, j; + int error; + u16 iap_start_addr; + u16 boot_page_count; + u16 sw_checksum = 0, fw_checksum = 0; + + error = data->ops->prepare_fw_update(client); + if (error) + return error; + + iap_start_addr = get_unaligned_le16(&fw->data[ETP_IAP_START_ADDR * 2]); + + boot_page_count = (iap_start_addr * 2) / ETP_FW_PAGE_SIZE; + for (i = boot_page_count; i < ETP_FW_PAGE_COUNT; i++) { + u16 checksum = 0; + const u8 *page = &fw->data[i * ETP_FW_PAGE_SIZE]; + + for (j = 0; j < ETP_FW_PAGE_SIZE; j += 2) + checksum += ((page[j + 1] << 8) | page[j]); + + error = elan_write_fw_block(data, page, checksum, i); + if (error) { + dev_err(dev, "write page %d fail: %d\n", i, error); + return error; + } + + sw_checksum += checksum; + } + + /* Wait WDT reset and power on reset */ + msleep(600); + + error = data->ops->finish_fw_update(client, &data->fw_completion); + if (error) + return error; + + error = data->ops->get_checksum(client, true, &fw_checksum); + if (error) + return error; + + if (sw_checksum != fw_checksum) { + dev_err(dev, "checksum diff sw=[%04X], fw=[%04X]\n", + sw_checksum, fw_checksum); + return -EIO; + } + + return 0; +} + +static int elan_update_firmware(struct elan_tp_data *data, + const struct firmware *fw) +{ + struct i2c_client *client = data->client; + int retval; + + dev_dbg(&client->dev, "Starting firmware update....\n"); + + disable_irq(client->irq); + data->in_fw_update = true; + + retval = __elan_update_firmware(data, fw); + if (retval) { + dev_err(&client->dev, "firmware update failed: %d\n", retval); + data->ops->iap_reset(client); + } else { + /* Reinitialize TP after fw is updated */ + elan_initialize(data); + elan_query_device_info(data); + } + + data->in_fw_update = false; + enable_irq(client->irq); + + return retval; +} + +/* + ******************************************************************* + * SYSFS attributes + ******************************************************************* + */ +static ssize_t elan_sysfs_read_fw_checksum(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct i2c_client *client = to_i2c_client(dev); + struct elan_tp_data *data = i2c_get_clientdata(client); + + return sprintf(buf, "0x%04x\n", data->fw_checksum); +} + +static ssize_t elan_sysfs_read_product_id(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct i2c_client *client = to_i2c_client(dev); + struct elan_tp_data *data = i2c_get_clientdata(client); + + return sprintf(buf, "%d.0\n", data->product_id); +} + +static ssize_t elan_sysfs_read_fw_ver(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct i2c_client *client = to_i2c_client(dev); + struct elan_tp_data *data = i2c_get_clientdata(client); + + return sprintf(buf, "%d.0\n", data->fw_version); +} + +static ssize_t elan_sysfs_read_sm_ver(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct i2c_client *client = to_i2c_client(dev); + struct elan_tp_data *data = i2c_get_clientdata(client); + + return sprintf(buf, "%d.0\n", data->sm_version); +} + +static ssize_t elan_sysfs_read_iap_ver(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct i2c_client *client = to_i2c_client(dev); + struct elan_tp_data *data = i2c_get_clientdata(client); + + return sprintf(buf, "%d.0\n", data->iap_version); +} + +static ssize_t elan_sysfs_update_fw(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct i2c_client *client = to_i2c_client(dev); + struct elan_tp_data *data = i2c_get_clientdata(client); + const struct firmware *fw; + int error; + + error = request_firmware(&fw, ETP_FW_NAME, dev); + if (error) { + dev_err(dev, "cannot load firmware %s: %d\n", + ETP_FW_NAME, error); + return error; + } + + /* Firmware must be exactly PAGE_NUM * PAGE_SIZE bytes */ + if (fw->size != ETP_FW_SIZE) { + dev_err(dev, "invalid firmware size = %zu, expected %d.\n", + fw->size, ETP_FW_SIZE); + error = -EBADF; + goto out_release_fw; + } + + error = mutex_lock_interruptible(&data->sysfs_mutex); + if (error) + goto out_release_fw; + + error = elan_update_firmware(data, fw); + + mutex_unlock(&data->sysfs_mutex); + +out_release_fw: + release_firmware(fw); + return error ?: count; +} + +static ssize_t calibrate_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct i2c_client *client = to_i2c_client(dev); + struct elan_tp_data *data = i2c_get_clientdata(client); + int tries = 20; + int retval; + int error; + u8 val[3]; + + retval = mutex_lock_interruptible(&data->sysfs_mutex); + if (retval) + return retval; + + disable_irq(client->irq); + + data->mode |= ETP_ENABLE_CALIBRATE; + retval = data->ops->set_mode(client, data->mode); + if (retval) { + dev_err(dev, "failed to enable calibration mode: %d\n", + retval); + goto out; + } + + retval = data->ops->calibrate(client); + if (retval) { + dev_err(dev, "failed to start calibration: %d\n", + retval); + goto out_disable_calibrate; + } + + val[0] = 0xff; + do { + /* Wait 250ms before checking if calibration has completed. */ + msleep(250); + + retval = data->ops->calibrate_result(client, val); + if (retval) + dev_err(dev, "failed to check calibration result: %d\n", + retval); + else if (val[0] == 0) + break; /* calibration done */ + + } while (--tries); + + if (tries == 0) { + dev_err(dev, "failed to calibrate. Timeout.\n"); + retval = -ETIMEDOUT; + } + +out_disable_calibrate: + data->mode &= ~ETP_ENABLE_CALIBRATE; + error = data->ops->set_mode(data->client, data->mode); + if (error) { + dev_err(dev, "failed to disable calibration mode: %d\n", + error); + if (!retval) + retval = error; + } +out: + enable_irq(client->irq); + mutex_unlock(&data->sysfs_mutex); + return retval ?: count; +} + +static ssize_t elan_sysfs_read_mode(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct i2c_client *client = to_i2c_client(dev); + struct elan_tp_data *data = i2c_get_clientdata(client); + int error; + enum tp_mode mode; + + error = mutex_lock_interruptible(&data->sysfs_mutex); + if (error) + return error; + + error = data->ops->iap_get_mode(data->client, &mode); + + mutex_unlock(&data->sysfs_mutex); + + if (error) + return error; + + return sprintf(buf, "%d\n", (int)mode); +} + +static DEVICE_ATTR(product_id, S_IRUGO, elan_sysfs_read_product_id, NULL); +static DEVICE_ATTR(firmware_version, S_IRUGO, elan_sysfs_read_fw_ver, NULL); +static DEVICE_ATTR(sample_version, S_IRUGO, elan_sysfs_read_sm_ver, NULL); +static DEVICE_ATTR(iap_version, S_IRUGO, elan_sysfs_read_iap_ver, NULL); +static DEVICE_ATTR(fw_checksum, S_IRUGO, elan_sysfs_read_fw_checksum, NULL); +static DEVICE_ATTR(mode, S_IRUGO, elan_sysfs_read_mode, NULL); +static DEVICE_ATTR(update_fw, S_IWUSR, NULL, elan_sysfs_update_fw); + +static DEVICE_ATTR_WO(calibrate); + +static struct attribute *elan_sysfs_entries[] = { + &dev_attr_product_id.attr, + &dev_attr_firmware_version.attr, + &dev_attr_sample_version.attr, + &dev_attr_iap_version.attr, + &dev_attr_fw_checksum.attr, + &dev_attr_calibrate.attr, + &dev_attr_mode.attr, + &dev_attr_update_fw.attr, + NULL, +}; + +static const struct attribute_group elan_sysfs_group = { + .attrs = elan_sysfs_entries, +}; + +static ssize_t acquire_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct i2c_client *client = to_i2c_client(dev); + struct elan_tp_data *data = i2c_get_clientdata(client); + int error; + int retval; + + retval = mutex_lock_interruptible(&data->sysfs_mutex); + if (retval) + return retval; + + disable_irq(client->irq); + + data->baseline_ready = false; + + data->mode |= ETP_ENABLE_CALIBRATE; + retval = data->ops->set_mode(data->client, data->mode); + if (retval) { + dev_err(dev, "Failed to enable calibration mode to get baseline: %d\n", + retval); + goto out; + } + + msleep(250); + + retval = data->ops->get_baseline_data(data->client, true, + &data->max_baseline); + if (retval) { + dev_err(dev, "Failed to read max baseline form device: %d\n", + retval); + goto out_disable_calibrate; + } + + retval = data->ops->get_baseline_data(data->client, false, + &data->min_baseline); + if (retval) { + dev_err(dev, "Failed to read min baseline form device: %d\n", + retval); + goto out_disable_calibrate; + } + + data->baseline_ready = true; + +out_disable_calibrate: + data->mode &= ~ETP_ENABLE_CALIBRATE; + error = data->ops->set_mode(data->client, data->mode); + if (error) { + dev_err(dev, "Failed to disable calibration mode after acquiring baseline: %d\n", + error); + if (!retval) + retval = error; + } +out: + enable_irq(client->irq); + mutex_unlock(&data->sysfs_mutex); + return retval ?: count; +} + +static ssize_t min_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct i2c_client *client = to_i2c_client(dev); + struct elan_tp_data *data = i2c_get_clientdata(client); + int retval; + + retval = mutex_lock_interruptible(&data->sysfs_mutex); + if (retval) + return retval; + + if (!data->baseline_ready) { + retval = -ENODATA; + goto out; + } + + retval = snprintf(buf, PAGE_SIZE, "%d", data->min_baseline); + +out: + mutex_unlock(&data->sysfs_mutex); + return retval; +} + +static ssize_t max_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct i2c_client *client = to_i2c_client(dev); + struct elan_tp_data *data = i2c_get_clientdata(client); + int retval; + + retval = mutex_lock_interruptible(&data->sysfs_mutex); + if (retval) + return retval; + + if (!data->baseline_ready) { + retval = -ENODATA; + goto out; + } + + retval = snprintf(buf, PAGE_SIZE, "%d", data->max_baseline); + +out: + mutex_unlock(&data->sysfs_mutex); + return retval; +} + + +static DEVICE_ATTR_WO(acquire); +static DEVICE_ATTR_RO(min); +static DEVICE_ATTR_RO(max); + +static struct attribute *elan_baseline_sysfs_entries[] = { + &dev_attr_acquire.attr, + &dev_attr_min.attr, + &dev_attr_max.attr, + NULL, +}; + +static const struct attribute_group elan_baseline_sysfs_group = { + .name = "baseline", + .attrs = elan_baseline_sysfs_entries, +}; + +static const struct attribute_group *elan_sysfs_groups[] = { + &elan_sysfs_group, + &elan_baseline_sysfs_group, + NULL +}; + +/* + ****************************************************************** + * Elan isr functions + ****************************************************************** + */ +static void elan_report_contact(struct elan_tp_data *data, + int contact_num, bool contact_valid, + u8 *finger_data) +{ + struct input_dev *input = data->input; + unsigned int pos_x, pos_y; + unsigned int pressure, mk_x, mk_y; + unsigned int area_x, area_y, major, minor, new_pressure; + + + if (contact_valid) { + pos_x = ((finger_data[0] & 0xf0) << 4) | + finger_data[1]; + pos_y = ((finger_data[0] & 0x0f) << 8) | + finger_data[2]; + mk_x = (finger_data[3] & 0x0f); + mk_y = (finger_data[3] >> 4); + pressure = finger_data[4]; + + if (pos_x > data->max_x || pos_y > data->max_y) { + dev_dbg(input->dev.parent, + "[%d] x=%d y=%d over max (%d, %d)", + contact_num, pos_x, pos_y, + data->max_x, data->max_y); + return; + } + + /* + * To avoid treating large finger as palm, let's reduce the + * width x and y per trace. + */ + area_x = mk_x * (data->width_x - ETP_FWIDTH_REDUCE); + area_y = mk_y * (data->width_y - ETP_FWIDTH_REDUCE); + + major = max(area_x, area_y); + minor = min(area_x, area_y); + + new_pressure = pressure + ETP_PRESSURE_OFFSET; + if (new_pressure > ETP_MAX_PRESSURE) + new_pressure = ETP_MAX_PRESSURE; + + input_mt_slot(input, contact_num); + input_mt_report_slot_state(input, MT_TOOL_FINGER, true); + input_report_abs(input, ABS_MT_POSITION_X, pos_x); + input_report_abs(input, ABS_MT_POSITION_Y, data->max_y - pos_y); + input_report_abs(input, ABS_MT_PRESSURE, new_pressure); + input_report_abs(input, ABS_TOOL_WIDTH, mk_x); + input_report_abs(input, ABS_MT_TOUCH_MAJOR, major); + input_report_abs(input, ABS_MT_TOUCH_MINOR, minor); + } else { + input_mt_slot(input, contact_num); + input_mt_report_slot_state(input, MT_TOOL_FINGER, false); + } +} + +static void elan_report_absolute(struct elan_tp_data *data, u8 *packet) +{ + struct input_dev *input = data->input; + u8 *finger_data = &packet[ETP_FINGER_DATA_OFFSET]; + int i; + u8 tp_info = packet[ETP_TOUCH_INFO_OFFSET]; + bool contact_valid; + + for (i = 0; i < ETP_MAX_FINGERS; i++) { + contact_valid = tp_info & (1U << (3 + i)); + elan_report_contact(data, i, contact_valid, finger_data); + + if (contact_valid) + finger_data += ETP_FINGER_DATA_LEN; + } + + input_report_key(input, BTN_LEFT, tp_info & 0x01); + input_mt_report_pointer_emulation(input, true); + input_sync(input); +} + +static irqreturn_t elan_isr(int irq, void *dev_id) +{ + struct elan_tp_data *data = dev_id; + struct device *dev = &data->client->dev; + int error; + u8 report[ETP_MAX_REPORT_LEN]; + + /* + * When device is connected to i2c bus, when all IAP page writes + * complete, the driver will receive interrupt and must read + * 0000 to confirm that IAP is finished. + */ + if (data->in_fw_update) { + complete(&data->fw_completion); + goto out; + } + + error = data->ops->get_report(data->client, report); + if (error) + goto out; + + if (report[ETP_REPORT_ID_OFFSET] != ETP_REPORT_ID) + dev_err(dev, "invalid report id data (%x)\n", + report[ETP_REPORT_ID_OFFSET]); + else + elan_report_absolute(data, report); + +out: + return IRQ_HANDLED; +} + +/* + ****************************************************************** + * Elan initialization functions + ****************************************************************** + */ +static int elan_setup_input_device(struct elan_tp_data *data) +{ + struct device *dev = &data->client->dev; + struct input_dev *input; + unsigned int max_width = max(data->width_x, data->width_y); + unsigned int min_width = min(data->width_x, data->width_y); + int error; + + input = devm_input_allocate_device(dev); + if (!input) + return -ENOMEM; + + input->name = "Elan Touchpad"; + input->id.bustype = BUS_I2C; + input_set_drvdata(input, data); + + error = input_mt_init_slots(input, ETP_MAX_FINGERS, + INPUT_MT_POINTER | INPUT_MT_DROP_UNUSED); + if (error) { + dev_err(dev, "failed to initialize MT slots: %d\n", error); + return error; + } + + __set_bit(EV_ABS, input->evbit); + __set_bit(INPUT_PROP_POINTER, input->propbit); + __set_bit(INPUT_PROP_BUTTONPAD, input->propbit); + __set_bit(BTN_LEFT, input->keybit); + + /* Set up ST parameters */ + input_set_abs_params(input, ABS_X, 0, data->max_x, 0, 0); + input_set_abs_params(input, ABS_Y, 0, data->max_y, 0, 0); + input_abs_set_res(input, ABS_X, data->x_res); + input_abs_set_res(input, ABS_Y, data->y_res); + input_set_abs_params(input, ABS_PRESSURE, 0, ETP_MAX_PRESSURE, 0, 0); + input_set_abs_params(input, ABS_TOOL_WIDTH, 0, ETP_FINGER_WIDTH, 0, 0); + + /* And MT parameters */ + input_set_abs_params(input, ABS_MT_POSITION_X, 0, data->max_x, 0, 0); + input_set_abs_params(input, ABS_MT_POSITION_Y, 0, data->max_y, 0, 0); + input_abs_set_res(input, ABS_MT_POSITION_X, data->x_res); + input_abs_set_res(input, ABS_MT_POSITION_Y, data->y_res); + input_set_abs_params(input, ABS_MT_PRESSURE, 0, + ETP_MAX_PRESSURE, 0, 0); + input_set_abs_params(input, ABS_MT_TOUCH_MAJOR, 0, + ETP_FINGER_WIDTH * max_width, 0, 0); + input_set_abs_params(input, ABS_MT_TOUCH_MINOR, 0, + ETP_FINGER_WIDTH * min_width, 0, 0); + + data->input = input; + + return 0; +} + +static void elan_disable_regulator(void *_data) +{ + struct elan_tp_data *data = _data; + + regulator_disable(data->vcc); +} + +static void elan_remove_sysfs_groups(void *_data) +{ + struct elan_tp_data *data = _data; + + sysfs_remove_groups(&data->client->dev.kobj, elan_sysfs_groups); +} + +static int elan_probe(struct i2c_client *client, + const struct i2c_device_id *dev_id) +{ + const struct elan_transport_ops *transport_ops; + struct device *dev = &client->dev; + struct elan_tp_data *data; + unsigned long irqflags; + int error; + + if (IS_ENABLED(CONFIG_MOUSE_ELAN_I2C_I2C) && + i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) { + transport_ops = &elan_i2c_ops; + } else if (IS_ENABLED(CONFIG_MOUSE_ELAN_I2C_SMBUS) && + i2c_check_functionality(client->adapter, + I2C_FUNC_SMBUS_BYTE_DATA | + I2C_FUNC_SMBUS_BLOCK_DATA | + I2C_FUNC_SMBUS_I2C_BLOCK)) { + transport_ops = &elan_smbus_ops; + } else { + dev_err(dev, "not a supported I2C/SMBus adapter\n"); + return -EIO; + } + + data = devm_kzalloc(&client->dev, sizeof(struct elan_tp_data), + GFP_KERNEL); + if (!data) + return -ENOMEM; + + i2c_set_clientdata(client, data); + + data->ops = transport_ops; + data->client = client; + init_completion(&data->fw_completion); + mutex_init(&data->sysfs_mutex); + + data->vcc = devm_regulator_get(&client->dev, "vcc"); + if (IS_ERR(data->vcc)) { + error = PTR_ERR(data->vcc); + if (error != -EPROBE_DEFER) + dev_err(&client->dev, + "Failed to get 'vcc' regulator: %d\n", + error); + return error; + } + + error = regulator_enable(data->vcc); + if (error) { + dev_err(&client->dev, + "Failed to enable regulator: %d\n", error); + return error; + } + + error = devm_add_action(&client->dev, + elan_disable_regulator, data); + if (error) { + regulator_disable(data->vcc); + dev_err(&client->dev, + "Failed to add disable regulator action: %d\n", + error); + return error; + } + + /* Initialize the touchpad. */ + error = elan_initialize(data); + if (error) + return error; + + error = elan_query_device_info(data); + if (error) + return error; + + error = elan_query_device_parameters(data); + if (error) + return error; + + dev_dbg(&client->dev, + "Elan Touchpad Information:\n" + " Module product ID: 0x%04x\n" + " Firmware Version: 0x%04x\n" + " Sample Version: 0x%04x\n" + " IAP Version: 0x%04x\n" + " Max ABS X,Y: %d,%d\n" + " Width X,Y: %d,%d\n" + " Resolution X,Y: %d,%d (dots/mm)\n", + data->product_id, + data->fw_version, + data->sm_version, + data->iap_version, + data->max_x, data->max_y, + data->width_x, data->width_y, + data->x_res, data->y_res); + + /* Set up input device properties based on queried parameters. */ + error = elan_setup_input_device(data); + if (error) + return error; + + /* + * Systems using device tree should set up interrupt via DTS, + * the rest will use the default falling edge interrupts. + */ + irqflags = client->dev.of_node ? 0 : IRQF_TRIGGER_FALLING; + + error = devm_request_threaded_irq(&client->dev, client->irq, + NULL, elan_isr, + irqflags | IRQF_ONESHOT, + client->name, data); + if (error) { + dev_err(&client->dev, "cannot register irq=%d\n", client->irq); + return error; + } + + error = sysfs_create_groups(&client->dev.kobj, elan_sysfs_groups); + if (error) { + dev_err(&client->dev, "failed to create sysfs attributes: %d\n", + error); + return error; + } + + error = devm_add_action(&client->dev, + elan_remove_sysfs_groups, data); + if (error) { + elan_remove_sysfs_groups(data); + dev_err(&client->dev, + "Failed to add sysfs cleanup action: %d\n", + error); + return error; + } + + error = input_register_device(data->input); + if (error) { + dev_err(&client->dev, "failed to register input device: %d\n", + error); + return error; + } + + /* + * Systems using device tree should set up wakeup via DTS, + * the rest will configure device as wakeup source by default. + */ + if (!client->dev.of_node) + device_init_wakeup(&client->dev, true); + + return 0; +} + +static int __maybe_unused elan_suspend(struct device *dev) +{ + struct i2c_client *client = to_i2c_client(dev); + struct elan_tp_data *data = i2c_get_clientdata(client); + int ret; + + /* + * We are taking the mutex to make sure sysfs operations are + * complete before we attempt to bring the device into low[er] + * power mode. + */ + ret = mutex_lock_interruptible(&data->sysfs_mutex); + if (ret) + return ret; + + disable_irq(client->irq); + + if (device_may_wakeup(dev)) { + ret = elan_sleep(data); + /* Enable wake from IRQ */ + data->irq_wake = (enable_irq_wake(client->irq) == 0); + } else { + ret = elan_disable_power(data); + } + + mutex_unlock(&data->sysfs_mutex); + return ret; +} + +static int __maybe_unused elan_resume(struct device *dev) +{ + struct i2c_client *client = to_i2c_client(dev); + struct elan_tp_data *data = i2c_get_clientdata(client); + int error; + + if (device_may_wakeup(dev) && data->irq_wake) { + disable_irq_wake(client->irq); + data->irq_wake = false; + } + + error = elan_enable_power(data); + if (error) + dev_err(dev, "power up when resuming failed: %d\n", error); + + error = elan_initialize(data); + if (error) + dev_err(dev, "initialize when resuming failed: %d\n", error); + + enable_irq(data->client->irq); + + return 0; +} + +static SIMPLE_DEV_PM_OPS(elan_pm_ops, elan_suspend, elan_resume); + +static const struct i2c_device_id elan_id[] = { + { DRIVER_NAME, 0 }, + { }, +}; +MODULE_DEVICE_TABLE(i2c, elan_id); + +#ifdef CONFIG_ACPI +static const struct acpi_device_id elan_acpi_id[] = { + { "ELAN0000", 0 }, + { } +}; +MODULE_DEVICE_TABLE(acpi, elan_acpi_id); +#endif + +#ifdef CONFIG_OF +static const struct of_device_id elan_of_match[] = { + { .compatible = "elan,ekth3000" }, + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(of, elan_of_match); +#endif + +static struct i2c_driver elan_driver = { + .driver = { + .name = DRIVER_NAME, + .owner = THIS_MODULE, + .pm = &elan_pm_ops, + .acpi_match_table = ACPI_PTR(elan_acpi_id), + .of_match_table = of_match_ptr(elan_of_match), + }, + .probe = elan_probe, + .id_table = elan_id, +}; + +module_i2c_driver(elan_driver); + +MODULE_AUTHOR("Duson Lin <dusonlin@emc.com.tw>"); +MODULE_DESCRIPTION("Elan I2C/SMBus Touchpad driver"); +MODULE_LICENSE("GPL"); +MODULE_VERSION(ELAN_DRIVER_VERSION); diff --git a/drivers/input/mouse/elan_i2c_i2c.c b/drivers/input/mouse/elan_i2c_i2c.c new file mode 100644 index 00000000000..97d4937fc24 --- /dev/null +++ b/drivers/input/mouse/elan_i2c_i2c.c @@ -0,0 +1,611 @@ +/* + * Elan I2C/SMBus Touchpad driver - I2C interface + * + * Copyright (c) 2013 ELAN Microelectronics Corp. + * + * Author: æž—æ”¿ç¶ (Duson Lin) <dusonlin@emc.com.tw> + * Version: 1.5.5 + * + * Based on cyapa driver: + * copyright (c) 2011-2012 Cypress Semiconductor, Inc. + * copyright (c) 2011-2012 Google, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * Trademarks are the property of their respective owners. + */ + +#include <linux/completion.h> +#include <linux/delay.h> +#include <linux/i2c.h> +#include <linux/interrupt.h> +#include <linux/jiffies.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <asm/unaligned.h> + +#include "elan_i2c.h" + +/* Elan i2c commands */ +#define ETP_I2C_RESET 0x0100 +#define ETP_I2C_WAKE_UP 0x0800 +#define ETP_I2C_SLEEP 0x0801 +#define ETP_I2C_DESC_CMD 0x0001 +#define ETP_I2C_REPORT_DESC_CMD 0x0002 +#define ETP_I2C_STAND_CMD 0x0005 +#define ETP_I2C_UNIQUEID_CMD 0x0101 +#define ETP_I2C_FW_VERSION_CMD 0x0102 +#define ETP_I2C_SM_VERSION_CMD 0x0103 +#define ETP_I2C_XY_TRACENUM_CMD 0x0105 +#define ETP_I2C_MAX_X_AXIS_CMD 0x0106 +#define ETP_I2C_MAX_Y_AXIS_CMD 0x0107 +#define ETP_I2C_RESOLUTION_CMD 0x0108 +#define ETP_I2C_IAP_VERSION_CMD 0x0110 +#define ETP_I2C_SET_CMD 0x0300 +#define ETP_I2C_POWER_CMD 0x0307 +#define ETP_I2C_FW_CHECKSUM_CMD 0x030F +#define ETP_I2C_IAP_CTRL_CMD 0x0310 +#define ETP_I2C_IAP_CMD 0x0311 +#define ETP_I2C_IAP_RESET_CMD 0x0314 +#define ETP_I2C_IAP_CHECKSUM_CMD 0x0315 +#define ETP_I2C_CALIBRATE_CMD 0x0316 +#define ETP_I2C_MAX_BASELINE_CMD 0x0317 +#define ETP_I2C_MIN_BASELINE_CMD 0x0318 + +#define ETP_I2C_REPORT_LEN 34 +#define ETP_I2C_DESC_LENGTH 30 +#define ETP_I2C_REPORT_DESC_LENGTH 158 +#define ETP_I2C_INF_LENGTH 2 +#define ETP_I2C_IAP_PASSWORD 0x1EA5 +#define ETP_I2C_IAP_RESET 0xF0F0 +#define ETP_I2C_MAIN_MODE_ON (1 << 9) +#define ETP_I2C_IAP_REG_L 0x01 +#define ETP_I2C_IAP_REG_H 0x06 + +static int elan_i2c_read_block(struct i2c_client *client, + u16 reg, u8 *val, u16 len) +{ + __le16 buf[] = { + cpu_to_le16(reg), + }; + struct i2c_msg msgs[] = { + { + .addr = client->addr, + .flags = client->flags & I2C_M_TEN, + .len = sizeof(buf), + .buf = (u8 *)buf, + }, + { + .addr = client->addr, + .flags = (client->flags & I2C_M_TEN) | I2C_M_RD, + .len = len, + .buf = val, + } + }; + int ret; + + ret = i2c_transfer(client->adapter, msgs, ARRAY_SIZE(msgs)); + return ret == ARRAY_SIZE(msgs) ? 0 : (ret < 0 ? ret : -EIO); +} + +static int elan_i2c_read_cmd(struct i2c_client *client, u16 reg, u8 *val) +{ + int retval; + + retval = elan_i2c_read_block(client, reg, val, ETP_I2C_INF_LENGTH); + if (retval < 0) { + dev_err(&client->dev, "reading cmd (0x%04x) fail.\n", reg); + return retval; + } + + return 0; +} + +static int elan_i2c_write_cmd(struct i2c_client *client, u16 reg, u16 cmd) +{ + __le16 buf[] = { + cpu_to_le16(reg), + cpu_to_le16(cmd), + }; + struct i2c_msg msg = { + .addr = client->addr, + .flags = client->flags & I2C_M_TEN, + .len = sizeof(buf), + .buf = (u8 *)buf, + }; + int ret; + + ret = i2c_transfer(client->adapter, &msg, 1); + return ret == 1 ? 0 : (ret < 0 ? ret : -EIO); +} + +static int elan_i2c_initialize(struct i2c_client *client) +{ + struct device *dev = &client->dev; + int error; + u8 val[256]; + + error = elan_i2c_write_cmd(client, ETP_I2C_STAND_CMD, ETP_I2C_RESET); + if (error) { + dev_err(dev, "device reset failed: %d\n", error); + return error; + } + + /* Wait for the device to reset */ + msleep(100); + + /* get reset acknowledgement 0000 */ + error = i2c_master_recv(client, val, ETP_I2C_INF_LENGTH); + if (error < 0) { + dev_err(dev, "failed to read reset response: %d\n", error); + return error; + } + + error = elan_i2c_read_block(client, ETP_I2C_DESC_CMD, + val, ETP_I2C_DESC_LENGTH); + if (error) { + dev_err(dev, "cannot get device descriptor: %d\n", error); + return error; + } + + error = elan_i2c_read_block(client, ETP_I2C_REPORT_DESC_CMD, + val, ETP_I2C_REPORT_DESC_LENGTH); + if (error) { + dev_err(dev, "fetching report descriptor failed.: %d\n", error); + return error; + } + + return 0; +} + +static int elan_i2c_sleep_control(struct i2c_client *client, bool sleep) +{ + return elan_i2c_write_cmd(client, ETP_I2C_STAND_CMD, + sleep ? ETP_I2C_SLEEP : ETP_I2C_WAKE_UP); +} + +static int elan_i2c_power_control(struct i2c_client *client, bool enable) +{ + u8 val[2]; + u16 reg; + int error; + + error = elan_i2c_read_cmd(client, ETP_I2C_POWER_CMD, val); + if (error) { + dev_err(&client->dev, + "failed to read current power state: %d\n", + error); + return error; + } + + reg = le16_to_cpup((__le16 *)val); + if (enable) + reg &= ~ETP_DISABLE_POWER; + else + reg |= ETP_DISABLE_POWER; + + error = elan_i2c_write_cmd(client, ETP_I2C_POWER_CMD, reg); + if (error) { + dev_err(&client->dev, + "failed to write current power state: %d\n", + error); + return error; + } + + return 0; +} + +static int elan_i2c_set_mode(struct i2c_client *client, u8 mode) +{ + return elan_i2c_write_cmd(client, ETP_I2C_SET_CMD, mode); +} + + +static int elan_i2c_calibrate(struct i2c_client *client) +{ + return elan_i2c_write_cmd(client, ETP_I2C_CALIBRATE_CMD, 1); +} + +static int elan_i2c_calibrate_result(struct i2c_client *client, u8 *val) +{ + return elan_i2c_read_block(client, ETP_I2C_CALIBRATE_CMD, val, 1); +} + +static int elan_i2c_get_baseline_data(struct i2c_client *client, + bool max_baseline, u8 *value) +{ + int error; + u8 val[3]; + + error = elan_i2c_read_cmd(client, + max_baseline ? ETP_I2C_MAX_BASELINE_CMD : + ETP_I2C_MIN_BASELINE_CMD, + val); + if (error) + return error; + + *value = le16_to_cpup((__le16 *)val); + + return 0; +} + +static int elan_i2c_get_version(struct i2c_client *client, + bool iap, u8 *version) +{ + int error; + u8 val[3]; + + error = elan_i2c_read_cmd(client, + iap ? ETP_I2C_IAP_VERSION_CMD : + ETP_I2C_FW_VERSION_CMD, + val); + if (error) { + dev_err(&client->dev, "failed to get %s version: %d\n", + iap ? "IAP" : "FW", error); + return error; + } + + *version = val[0]; + return 0; +} + +static int elan_i2c_get_sm_version(struct i2c_client *client, u8 *version) +{ + int error; + u8 val[3]; + + error = elan_i2c_read_cmd(client, ETP_I2C_SM_VERSION_CMD, val); + if (error) { + dev_err(&client->dev, "failed to get SM version: %d\n", error); + return error; + } + + *version = val[0]; + return 0; +} + +static int elan_i2c_get_product_id(struct i2c_client *client, u8 *id) +{ + int error; + u8 val[3]; + + error = elan_i2c_read_cmd(client, ETP_I2C_UNIQUEID_CMD, val); + if (error) { + dev_err(&client->dev, "failed to get product ID: %d\n", error); + return error; + } + + *id = val[0]; + return 0; +} + +static int elan_i2c_get_checksum(struct i2c_client *client, + bool iap, u16 *csum) +{ + int error; + u8 val[3]; + + error = elan_i2c_read_cmd(client, + iap ? ETP_I2C_IAP_CHECKSUM_CMD : + ETP_I2C_FW_CHECKSUM_CMD, + val); + if (error) { + dev_err(&client->dev, "failed to get %s checksum: %d\n", + iap ? "IAP" : "FW", error); + return error; + } + + *csum = le16_to_cpup((__le16 *)val); + return 0; +} + +static int elan_i2c_get_max(struct i2c_client *client, + unsigned int *max_x, unsigned int *max_y) +{ + int error; + u8 val[3]; + + error = elan_i2c_read_cmd(client, ETP_I2C_MAX_X_AXIS_CMD, val); + if (error) { + dev_err(&client->dev, "failed to get X dimension: %d\n", error); + return error; + } + + *max_x = le16_to_cpup((__le16 *)val) & 0x0fff; + + error = elan_i2c_read_cmd(client, ETP_I2C_MAX_Y_AXIS_CMD, val); + if (error) { + dev_err(&client->dev, "failed to get Y dimension: %d\n", error); + return error; + } + + *max_y = le16_to_cpup((__le16 *)val) & 0x0fff; + + return 0; +} + +static int elan_i2c_get_resolution(struct i2c_client *client, + u8 *hw_res_x, u8 *hw_res_y) +{ + int error; + u8 val[3]; + + error = elan_i2c_read_cmd(client, ETP_I2C_RESOLUTION_CMD, val); + if (error) { + dev_err(&client->dev, "failed to get resolution: %d\n", error); + return error; + } + + *hw_res_x = val[0]; + *hw_res_y = val[1]; + + return 0; +} + +static int elan_i2c_get_num_traces(struct i2c_client *client, + unsigned int *x_traces, + unsigned int *y_traces) +{ + int error; + u8 val[3]; + + error = elan_i2c_read_cmd(client, ETP_I2C_XY_TRACENUM_CMD, val); + if (error) { + dev_err(&client->dev, "failed to get trace info: %d\n", error); + return error; + } + + *x_traces = val[0] - 1; + *y_traces = val[1] - 1; + + return 0; +} + +static int elan_i2c_iap_get_mode(struct i2c_client *client, enum tp_mode *mode) +{ + int error; + u16 constant; + u8 val[3]; + + error = elan_i2c_read_cmd(client, ETP_I2C_IAP_CTRL_CMD, val); + if (error) { + dev_err(&client->dev, + "failed to read iap control register: %d\n", + error); + return error; + } + + constant = le16_to_cpup((__le16 *)val); + dev_dbg(&client->dev, "iap control reg: 0x%04x.\n", constant); + + *mode = (constant & ETP_I2C_MAIN_MODE_ON) ? MAIN_MODE : IAP_MODE; + + return 0; +} + +static int elan_i2c_iap_reset(struct i2c_client *client) +{ + int error; + + error = elan_i2c_write_cmd(client, ETP_I2C_IAP_RESET_CMD, + ETP_I2C_IAP_RESET); + if (error) { + dev_err(&client->dev, "cannot reset IC: %d\n", error); + return error; + } + + return 0; +} + +static int elan_i2c_set_flash_key(struct i2c_client *client) +{ + int error; + + error = elan_i2c_write_cmd(client, ETP_I2C_IAP_CMD, + ETP_I2C_IAP_PASSWORD); + if (error) { + dev_err(&client->dev, "cannot set flash key: %d\n", error); + return error; + } + + return 0; +} + +static int elan_i2c_prepare_fw_update(struct i2c_client *client) +{ + struct device *dev = &client->dev; + int error; + enum tp_mode mode; + u8 val[3]; + u16 password; + + /* Get FW in which mode (IAP_MODE/MAIN_MODE) */ + error = elan_i2c_iap_get_mode(client, &mode); + if (error) + return error; + + if (mode == IAP_MODE) { + /* Reset IC */ + error = elan_i2c_iap_reset(client); + if (error) + return error; + + msleep(30); + } + + /* Set flash key*/ + error = elan_i2c_set_flash_key(client); + if (error) + return error; + + /* Wait for F/W IAP initialization */ + msleep(mode == MAIN_MODE ? 100 : 30); + + /* Check if we are in IAP mode or not */ + error = elan_i2c_iap_get_mode(client, &mode); + if (error) + return error; + + if (mode == MAIN_MODE) { + dev_err(dev, "wrong mode: %d\n", mode); + return -EIO; + } + + /* Set flash key again */ + error = elan_i2c_set_flash_key(client); + if (error) + return error; + + /* Wait for F/W IAP initialization */ + msleep(30); + + /* read back to check we actually enabled successfully. */ + error = elan_i2c_read_cmd(client, ETP_I2C_IAP_CMD, val); + if (error) { + dev_err(dev, "cannot read iap password: %d\n", + error); + return error; + } + + password = le16_to_cpup((__le16 *)val); + if (password != ETP_I2C_IAP_PASSWORD) { + dev_err(dev, "wrong iap password: 0x%X\n", password); + return -EIO; + } + + return 0; +} + +static int elan_i2c_write_fw_block(struct i2c_client *client, + const u8 *page, u16 checksum, int idx) +{ + struct device *dev = &client->dev; + u8 page_store[ETP_FW_PAGE_SIZE + 4]; + u8 val[3]; + u16 result; + int ret, error; + + page_store[0] = ETP_I2C_IAP_REG_L; + page_store[1] = ETP_I2C_IAP_REG_H; + memcpy(&page_store[2], page, ETP_FW_PAGE_SIZE); + /* recode checksum at last two bytes */ + put_unaligned_le16(checksum, &page_store[ETP_FW_PAGE_SIZE + 2]); + + ret = i2c_master_send(client, page_store, sizeof(page_store)); + if (ret != sizeof(page_store)) { + error = ret < 0 ? ret : -EIO; + dev_err(dev, "Failed to write page %d: %d\n", idx, error); + return error; + } + + /* Wait for F/W to update one page ROM data. */ + msleep(20); + + error = elan_i2c_read_cmd(client, ETP_I2C_IAP_CTRL_CMD, val); + if (error) { + dev_err(dev, "Failed to read IAP write result: %d\n", error); + return error; + } + + result = le16_to_cpup((__le16 *)val); + if (result & (ETP_FW_IAP_PAGE_ERR | ETP_FW_IAP_INTF_ERR)) { + dev_err(dev, "IAP reports failed write: %04hx\n", + result); + return -EIO; + } + + return 0; +} + +static int elan_i2c_finish_fw_update(struct i2c_client *client, + struct completion *completion) +{ + struct device *dev = &client->dev; + long ret; + int error; + int len; + u8 buffer[ETP_I2C_INF_LENGTH]; + + reinit_completion(completion); + enable_irq(client->irq); + + error = elan_i2c_write_cmd(client, ETP_I2C_STAND_CMD, ETP_I2C_RESET); + if (!error) + ret = wait_for_completion_interruptible_timeout(completion, + msecs_to_jiffies(300)); + disable_irq(client->irq); + + if (error) { + dev_err(dev, "device reset failed: %d\n", error); + return error; + } else if (ret == 0) { + dev_err(dev, "timeout waiting for device reset\n"); + return -ETIMEDOUT; + } else if (ret < 0) { + error = ret; + dev_err(dev, "error waiting for device reset: %d\n", error); + return error; + } + + len = i2c_master_recv(client, buffer, ETP_I2C_INF_LENGTH); + if (len != ETP_I2C_INF_LENGTH) { + error = len < 0 ? len : -EIO; + dev_err(dev, "failed to read INT signal: %d (%d)\n", + error, len); + return error; + } + + return 0; +} + +static int elan_i2c_get_report(struct i2c_client *client, u8 *report) +{ + int len; + + len = i2c_master_recv(client, report, ETP_I2C_REPORT_LEN); + if (len < 0) { + dev_err(&client->dev, "failed to read report data: %d\n", len); + return len; + } + + if (len != ETP_I2C_REPORT_LEN) { + dev_err(&client->dev, + "wrong report length (%d vs %d expected)\n", + len, ETP_I2C_REPORT_LEN); + return -EIO; + } + + return 0; +} + +const struct elan_transport_ops elan_i2c_ops = { + .initialize = elan_i2c_initialize, + .sleep_control = elan_i2c_sleep_control, + .power_control = elan_i2c_power_control, + .set_mode = elan_i2c_set_mode, + + .calibrate = elan_i2c_calibrate, + .calibrate_result = elan_i2c_calibrate_result, + + .get_baseline_data = elan_i2c_get_baseline_data, + + .get_version = elan_i2c_get_version, + .get_sm_version = elan_i2c_get_sm_version, + .get_product_id = elan_i2c_get_product_id, + .get_checksum = elan_i2c_get_checksum, + + .get_max = elan_i2c_get_max, + .get_resolution = elan_i2c_get_resolution, + .get_num_traces = elan_i2c_get_num_traces, + + .iap_get_mode = elan_i2c_iap_get_mode, + .iap_reset = elan_i2c_iap_reset, + + .prepare_fw_update = elan_i2c_prepare_fw_update, + .write_fw_block = elan_i2c_write_fw_block, + .finish_fw_update = elan_i2c_finish_fw_update, + + .get_report = elan_i2c_get_report, +}; diff --git a/drivers/input/mouse/elan_i2c_smbus.c b/drivers/input/mouse/elan_i2c_smbus.c new file mode 100644 index 00000000000..359bf8583d5 --- /dev/null +++ b/drivers/input/mouse/elan_i2c_smbus.c @@ -0,0 +1,514 @@ +/* + * Elan I2C/SMBus Touchpad driver - SMBus interface + * + * Copyright (c) 2013 ELAN Microelectronics Corp. + * + * Author: æž—æ”¿ç¶ (Duson Lin) <dusonlin@emc.com.tw> + * Version: 1.5.5 + * + * Based on cyapa driver: + * copyright (c) 2011-2012 Cypress Semiconductor, Inc. + * copyright (c) 2011-2012 Google, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * Trademarks are the property of their respective owners. + */ + +#include <linux/delay.h> +#include <linux/i2c.h> +#include <linux/init.h> +#include <linux/kernel.h> + +#include "elan_i2c.h" + +/* Elan SMbus commands */ +#define ETP_SMBUS_IAP_CMD 0x00 +#define ETP_SMBUS_ENABLE_TP 0x20 +#define ETP_SMBUS_SLEEP_CMD 0x21 +#define ETP_SMBUS_IAP_PASSWORD_WRITE 0x29 +#define ETP_SMBUS_IAP_PASSWORD_READ 0x80 +#define ETP_SMBUS_WRITE_FW_BLOCK 0x2A +#define ETP_SMBUS_IAP_RESET_CMD 0x2B +#define ETP_SMBUS_RANGE_CMD 0xA0 +#define ETP_SMBUS_FW_VERSION_CMD 0xA1 +#define ETP_SMBUS_XY_TRACENUM_CMD 0xA2 +#define ETP_SMBUS_SM_VERSION_CMD 0xA3 +#define ETP_SMBUS_UNIQUEID_CMD 0xA3 +#define ETP_SMBUS_RESOLUTION_CMD 0xA4 +#define ETP_SMBUS_HELLOPACKET_CMD 0xA7 +#define ETP_SMBUS_PACKET_QUERY 0xA8 +#define ETP_SMBUS_IAP_VERSION_CMD 0xAC +#define ETP_SMBUS_IAP_CTRL_CMD 0xAD +#define ETP_SMBUS_IAP_CHECKSUM_CMD 0xAE +#define ETP_SMBUS_FW_CHECKSUM_CMD 0xAF +#define ETP_SMBUS_MAX_BASELINE_CMD 0xC3 +#define ETP_SMBUS_MIN_BASELINE_CMD 0xC4 +#define ETP_SMBUS_CALIBRATE_QUERY 0xC5 + +#define ETP_SMBUS_REPORT_LEN 32 +#define ETP_SMBUS_REPORT_OFFSET 2 +#define ETP_SMBUS_HELLOPACKET_LEN 5 +#define ETP_SMBUS_IAP_PASSWORD 0x1234 +#define ETP_SMBUS_IAP_MODE_ON (1 << 6) + +static int elan_smbus_initialize(struct i2c_client *client) +{ + u8 check[ETP_SMBUS_HELLOPACKET_LEN] = { 0x55, 0x55, 0x55, 0x55, 0x55 }; + u8 values[ETP_SMBUS_HELLOPACKET_LEN] = { 0, 0, 0, 0, 0 }; + int len, error; + + /* Get hello packet */ + len = i2c_smbus_read_block_data(client, + ETP_SMBUS_HELLOPACKET_CMD, values); + if (len != ETP_SMBUS_HELLOPACKET_LEN) { + dev_err(&client->dev, "hello packet length fail: %d\n", len); + error = len < 0 ? len : -EIO; + return error; + } + + /* compare hello packet */ + if (memcmp(values, check, ETP_SMBUS_HELLOPACKET_LEN)) { + dev_err(&client->dev, "hello packet fail [%*px]\n", + ETP_SMBUS_HELLOPACKET_LEN, values); + return -ENXIO; + } + + /* enable tp */ + error = i2c_smbus_write_byte(client, ETP_SMBUS_ENABLE_TP); + if (error) { + dev_err(&client->dev, "failed to enable touchpad: %d\n", error); + return error; + } + + return 0; +} + +static int elan_smbus_set_mode(struct i2c_client *client, u8 mode) +{ + u8 cmd[4] = { 0x00, 0x07, 0x00, mode }; + + return i2c_smbus_write_block_data(client, ETP_SMBUS_IAP_CMD, + sizeof(cmd), cmd); +} + +static int elan_smbus_sleep_control(struct i2c_client *client, bool sleep) +{ + if (sleep) + return i2c_smbus_write_byte(client, ETP_SMBUS_SLEEP_CMD); + else + return 0; /* XXX should we send ETP_SMBUS_ENABLE_TP here? */ +} + +static int elan_smbus_power_control(struct i2c_client *client, bool enable) +{ + return 0; /* A no-op */ +} + +static int elan_smbus_calibrate(struct i2c_client *client) +{ + u8 cmd[4] = { 0x00, 0x08, 0x00, 0x01 }; + + return i2c_smbus_write_block_data(client, ETP_SMBUS_IAP_CMD, + sizeof(cmd), cmd); +} + +static int elan_smbus_calibrate_result(struct i2c_client *client, u8 *val) +{ + int error; + + error = i2c_smbus_read_block_data(client, + ETP_SMBUS_CALIBRATE_QUERY, val); + if (error < 0) + return error; + + return 0; +} + +static int elan_smbus_get_baseline_data(struct i2c_client *client, + bool max_baseline, u8 *value) +{ + int error; + u8 val[3]; + + error = i2c_smbus_read_block_data(client, + max_baseline ? + ETP_SMBUS_MAX_BASELINE_CMD : + ETP_SMBUS_MIN_BASELINE_CMD, + val); + if (error < 0) + return error; + + *value = be16_to_cpup((__be16 *)val); + + return 0; +} + +static int elan_smbus_get_version(struct i2c_client *client, + bool iap, u8 *version) +{ + int error; + u8 val[3]; + + error = i2c_smbus_read_block_data(client, + iap ? ETP_SMBUS_IAP_VERSION_CMD : + ETP_SMBUS_FW_VERSION_CMD, + val); + if (error < 0) { + dev_err(&client->dev, "failed to get %s version: %d\n", + iap ? "IAP" : "FW", error); + return error; + } + + *version = val[2]; + return 0; +} + +static int elan_smbus_get_sm_version(struct i2c_client *client, u8 *version) +{ + int error; + u8 val[3]; + + error = i2c_smbus_read_block_data(client, + ETP_SMBUS_SM_VERSION_CMD, val); + if (error < 0) { + dev_err(&client->dev, "failed to get SM version: %d\n", error); + return error; + } + + *version = val[0]; /* XXX Why 0 and not 2 as in IAP/FW versions? */ + return 0; +} + +static int elan_smbus_get_product_id(struct i2c_client *client, u8 *id) +{ + int error; + u8 val[3]; + + error = i2c_smbus_read_block_data(client, + ETP_SMBUS_UNIQUEID_CMD, val); + if (error < 0) { + dev_err(&client->dev, "failed to get product ID: %d\n", error); + return error; + } + + *id = val[1]; + return 0; +} + +static int elan_smbus_get_checksum(struct i2c_client *client, + bool iap, u16 *csum) +{ + int error; + u8 val[3]; + + error = i2c_smbus_read_block_data(client, + iap ? ETP_SMBUS_FW_CHECKSUM_CMD : + ETP_SMBUS_IAP_CHECKSUM_CMD, + val); + if (error < 0) { + dev_err(&client->dev, "failed to get %s checksum: %d\n", + iap ? "IAP" : "FW", error); + return error; + } + + *csum = be16_to_cpup((__be16 *)val); + return 0; +} + +static int elan_smbus_get_max(struct i2c_client *client, + unsigned int *max_x, unsigned int *max_y) +{ + int error; + u8 val[3]; + + error = i2c_smbus_read_block_data(client, ETP_SMBUS_RANGE_CMD, val); + if (error) { + dev_err(&client->dev, "failed to get dimensions: %d\n", error); + return error; + } + + *max_x = (0x0f & val[0]) << 8 | val[1]; + *max_y = (0xf0 & val[0]) << 4 | val[2]; + + return 0; +} + +static int elan_smbus_get_resolution(struct i2c_client *client, + u8 *hw_res_x, u8 *hw_res_y) +{ + int error; + u8 val[3]; + + error = i2c_smbus_read_block_data(client, + ETP_SMBUS_RESOLUTION_CMD, val); + if (error) { + dev_err(&client->dev, "failed to get resolution: %d\n", error); + return error; + } + + *hw_res_x = val[1] & 0x0F; + *hw_res_y = (val[1] & 0xF0) >> 4; + + return 0; +} + +static int elan_smbus_get_num_traces(struct i2c_client *client, + unsigned int *x_traces, + unsigned int *y_traces) +{ + int error; + u8 val[3]; + + error = i2c_smbus_read_block_data(client, + ETP_SMBUS_XY_TRACENUM_CMD, val); + if (error) { + dev_err(&client->dev, "failed to get trace info: %d\n", error); + return error; + } + + *x_traces = val[1] - 1; + *y_traces = val[2] - 1; + + return 0; +} + +static int elan_smbus_iap_get_mode(struct i2c_client *client, + enum tp_mode *mode) +{ + int error; + u16 constant; + u8 val[3]; + + error = i2c_smbus_read_block_data(client, ETP_SMBUS_IAP_CTRL_CMD, val); + if (error < 0) { + dev_err(&client->dev, "failed to read iap ctrol register: %d\n", + error); + return error; + } + + constant = be16_to_cpup((__be16 *)val); + dev_dbg(&client->dev, "iap control reg: 0x%04x.\n", constant); + + *mode = (constant & ETP_SMBUS_IAP_MODE_ON) ? IAP_MODE : MAIN_MODE; + + return 0; +} + +static int elan_smbus_iap_reset(struct i2c_client *client) +{ + int error; + + error = i2c_smbus_write_byte(client, ETP_SMBUS_IAP_RESET_CMD); + if (error) { + dev_err(&client->dev, "cannot reset IC: %d\n", error); + return error; + } + + return 0; +} + +static int elan_smbus_set_flash_key(struct i2c_client *client) +{ + int error; + u8 cmd[4] = { 0x00, 0x0B, 0x00, 0x5A }; + + error = i2c_smbus_write_block_data(client, ETP_SMBUS_IAP_CMD, + sizeof(cmd), cmd); + if (error) { + dev_err(&client->dev, "cannot set flash key: %d\n", error); + return error; + } + + return 0; +} + +static int elan_smbus_prepare_fw_update(struct i2c_client *client) +{ + struct device *dev = &client->dev; + int len; + int error; + enum tp_mode mode; + u8 val[3]; + u8 cmd[4] = {0x0F, 0x78, 0x00, 0x06}; + u16 password; + + /* Get FW in which mode (IAP_MODE/MAIN_MODE) */ + error = elan_smbus_iap_get_mode(client, &mode); + if (error) + return error; + + if (mode == MAIN_MODE) { + + /* set flash key */ + error = elan_smbus_set_flash_key(client); + if (error) + return error; + + /* write iap password */ + if (i2c_smbus_write_byte(client, + ETP_SMBUS_IAP_PASSWORD_WRITE) < 0) { + dev_err(dev, "cannot write iap password\n"); + return -EIO; + } + + error = i2c_smbus_write_block_data(client, ETP_SMBUS_IAP_CMD, + sizeof(cmd), cmd); + if (error) { + dev_err(dev, "failed to write iap password: %d\n", + error); + return error; + } + + /* + * Read back password to make sure we enabled flash + * successfully. + */ + len = i2c_smbus_read_block_data(client, + ETP_SMBUS_IAP_PASSWORD_READ, + val); + if (len < sizeof(u16)) { + error = len < 0 ? len : -EIO; + dev_err(dev, "failed to read iap password: %d\n", + error); + return error; + } + + password = be16_to_cpup((__be16 *)val); + if (password != ETP_SMBUS_IAP_PASSWORD) { + dev_err(dev, "wrong iap password = 0x%X\n", password); + return -EIO; + } + + /* Wait 30ms for MAIN_MODE change to IAP_MODE */ + msleep(30); + } + + error = elan_smbus_set_flash_key(client); + if (error) + return error; + + /* Reset IC */ + error = elan_smbus_iap_reset(client); + if (error) + return error; + + return 0; +} + + +static int elan_smbus_write_fw_block(struct i2c_client *client, + const u8 *page, u16 checksum, int idx) +{ + struct device *dev = &client->dev; + int error; + u16 result; + u8 val[3]; + + /* + * Due to the limitation of smbus protocol limiting + * transfer to 32 bytes at a time, we must split block + * in 2 transfers. + */ + error = i2c_smbus_write_block_data(client, + ETP_SMBUS_WRITE_FW_BLOCK, + ETP_FW_PAGE_SIZE / 2, + page); + if (error) { + dev_err(dev, "Failed to write page %d (part %d): %d\n", + idx, 1, error); + return error; + } + + error = i2c_smbus_write_block_data(client, + ETP_SMBUS_WRITE_FW_BLOCK, + ETP_FW_PAGE_SIZE / 2, + page + ETP_FW_PAGE_SIZE / 2); + if (error) { + dev_err(dev, "Failed to write page %d (part %d): %d\n", + idx, 2, error); + return error; + } + + + /* Wait for F/W to update one page ROM data. */ + usleep_range(8000, 10000); + + error = i2c_smbus_read_block_data(client, + ETP_SMBUS_IAP_CTRL_CMD, val); + if (error < 0) { + dev_err(dev, "Failed to read IAP write result: %d\n", + error); + return error; + } + + result = be16_to_cpup((__be16 *)val); + if (result & (ETP_FW_IAP_PAGE_ERR | ETP_FW_IAP_INTF_ERR)) { + dev_err(dev, "IAP reports failed write: %04hx\n", + result); + return -EIO; + } + + return 0; +} + +static int elan_smbus_get_report(struct i2c_client *client, u8 *report) +{ + int len; + + len = i2c_smbus_read_block_data(client, + ETP_SMBUS_PACKET_QUERY, + &report[ETP_SMBUS_REPORT_OFFSET]); + if (len < 0) { + dev_err(&client->dev, "failed to read report data: %d\n", len); + return len; + } + + if (len != ETP_SMBUS_REPORT_LEN) { + dev_err(&client->dev, + "wrong report length (%d vs %d expected)\n", + len, ETP_SMBUS_REPORT_LEN); + return -EIO; + } + + return 0; +} + +static int elan_smbus_finish_fw_update(struct i2c_client *client, + struct completion *fw_completion) +{ + /* No special handling unlike I2C transport */ + return 0; +} + +const struct elan_transport_ops elan_smbus_ops = { + .initialize = elan_smbus_initialize, + .sleep_control = elan_smbus_sleep_control, + .power_control = elan_smbus_power_control, + .set_mode = elan_smbus_set_mode, + + .calibrate = elan_smbus_calibrate, + .calibrate_result = elan_smbus_calibrate_result, + + .get_baseline_data = elan_smbus_get_baseline_data, + + .get_version = elan_smbus_get_version, + .get_sm_version = elan_smbus_get_sm_version, + .get_product_id = elan_smbus_get_product_id, + .get_checksum = elan_smbus_get_checksum, + + .get_max = elan_smbus_get_max, + .get_resolution = elan_smbus_get_resolution, + .get_num_traces = elan_smbus_get_num_traces, + + .iap_get_mode = elan_smbus_iap_get_mode, + .iap_reset = elan_smbus_iap_reset, + + .prepare_fw_update = elan_smbus_prepare_fw_update, + .write_fw_block = elan_smbus_write_fw_block, + .finish_fw_update = elan_smbus_finish_fw_update, + + .get_report = elan_smbus_get_report, +}; diff --git a/drivers/input/mouse/lifebook.h b/drivers/input/mouse/lifebook.h index 4c4326c6f50..0baf02a70a9 100644 --- a/drivers/input/mouse/lifebook.h +++ b/drivers/input/mouse/lifebook.h @@ -16,14 +16,14 @@ void lifebook_module_init(void); int lifebook_detect(struct psmouse *psmouse, bool set_properties); int lifebook_init(struct psmouse *psmouse); #else -inline void lifebook_module_init(void) +static inline void lifebook_module_init(void) { } -inline int lifebook_detect(struct psmouse *psmouse, bool set_properties) +static inline int lifebook_detect(struct psmouse *psmouse, bool set_properties) { return -ENOSYS; } -inline int lifebook_init(struct psmouse *psmouse) +static inline int lifebook_init(struct psmouse *psmouse) { return -ENOSYS; } diff --git a/drivers/input/mouse/navpoint.c b/drivers/input/mouse/navpoint.c index 2a0360f5b5f..d6e8f58a1de 100644 --- a/drivers/input/mouse/navpoint.c +++ b/drivers/input/mouse/navpoint.c @@ -318,8 +318,7 @@ static int navpoint_remove(struct platform_device *pdev) return 0; } -#ifdef CONFIG_PM_SLEEP -static int navpoint_suspend(struct device *dev) +static int __maybe_unused navpoint_suspend(struct device *dev) { struct platform_device *pdev = to_platform_device(dev); struct navpoint *navpoint = platform_get_drvdata(pdev); @@ -333,7 +332,7 @@ static int navpoint_suspend(struct device *dev) return 0; } -static int navpoint_resume(struct device *dev) +static int __maybe_unused navpoint_resume(struct device *dev) { struct platform_device *pdev = to_platform_device(dev); struct navpoint *navpoint = platform_get_drvdata(pdev); @@ -346,7 +345,6 @@ static int navpoint_resume(struct device *dev) return 0; } -#endif static SIMPLE_DEV_PM_OPS(navpoint_pm_ops, navpoint_suspend, navpoint_resume); diff --git a/drivers/input/mouse/synaptics_i2c.c b/drivers/input/mouse/synaptics_i2c.c index ad822608f6e..878f18498f3 100644 --- a/drivers/input/mouse/synaptics_i2c.c +++ b/drivers/input/mouse/synaptics_i2c.c @@ -614,8 +614,7 @@ static int synaptics_i2c_remove(struct i2c_client *client) return 0; } -#ifdef CONFIG_PM_SLEEP -static int synaptics_i2c_suspend(struct device *dev) +static int __maybe_unused synaptics_i2c_suspend(struct device *dev) { struct i2c_client *client = to_i2c_client(dev); struct synaptics_i2c *touch = i2c_get_clientdata(client); @@ -628,7 +627,7 @@ static int synaptics_i2c_suspend(struct device *dev) return 0; } -static int synaptics_i2c_resume(struct device *dev) +static int __maybe_unused synaptics_i2c_resume(struct device *dev) { int ret; struct i2c_client *client = to_i2c_client(dev); @@ -643,7 +642,6 @@ static int synaptics_i2c_resume(struct device *dev) return 0; } -#endif static SIMPLE_DEV_PM_OPS(synaptics_i2c_pm, synaptics_i2c_suspend, synaptics_i2c_resume); diff --git a/drivers/input/serio/altera_ps2.c b/drivers/input/serio/altera_ps2.c index 8921c96589b..131d7826dc6 100644 --- a/drivers/input/serio/altera_ps2.c +++ b/drivers/input/serio/altera_ps2.c @@ -24,9 +24,7 @@ struct ps2if { struct serio *io; - struct resource *iomem_res; void __iomem *base; - unsigned irq; }; /* @@ -83,16 +81,34 @@ static void altera_ps2_close(struct serio *io) static int altera_ps2_probe(struct platform_device *pdev) { struct ps2if *ps2if; + struct resource *res; struct serio *serio; int error, irq; - ps2if = kzalloc(sizeof(struct ps2if), GFP_KERNEL); - serio = kzalloc(sizeof(struct serio), GFP_KERNEL); - if (!ps2if || !serio) { - error = -ENOMEM; - goto err_free_mem; + ps2if = devm_kzalloc(&pdev->dev, sizeof(struct ps2if), GFP_KERNEL); + if (!ps2if) + return -ENOMEM; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + ps2if->base = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(ps2if->base)) + return PTR_ERR(ps2if->base); + + irq = platform_get_irq(pdev, 0); + if (irq < 0) + return -ENXIO; + + error = devm_request_irq(&pdev->dev, irq, altera_ps2_rxint, 0, + pdev->name, ps2if); + if (error) { + dev_err(&pdev->dev, "could not request IRQ %d\n", irq); + return error; } + serio = kzalloc(sizeof(struct serio), GFP_KERNEL); + if (!serio) + return -ENOMEM; + serio->id.type = SERIO_8042; serio->write = altera_ps2_write; serio->open = altera_ps2_open; @@ -103,56 +119,12 @@ static int altera_ps2_probe(struct platform_device *pdev) serio->dev.parent = &pdev->dev; ps2if->io = serio; - ps2if->iomem_res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (ps2if->iomem_res == NULL) { - error = -ENOENT; - goto err_free_mem; - } - - - irq = platform_get_irq(pdev, 0); - if (irq < 0) { - error = -ENXIO; - goto err_free_mem; - } - ps2if->irq = irq; - - if (!request_mem_region(ps2if->iomem_res->start, - resource_size(ps2if->iomem_res), pdev->name)) { - error = -EBUSY; - goto err_free_mem; - } - - ps2if->base = ioremap(ps2if->iomem_res->start, - resource_size(ps2if->iomem_res)); - if (!ps2if->base) { - error = -ENOMEM; - goto err_free_res; - } - - error = request_irq(ps2if->irq, altera_ps2_rxint, 0, pdev->name, ps2if); - if (error) { - dev_err(&pdev->dev, "could not allocate IRQ %d: %d\n", - ps2if->irq, error); - goto err_unmap; - } - - dev_info(&pdev->dev, "base %p, irq %d\n", ps2if->base, ps2if->irq); + dev_info(&pdev->dev, "base %p, irq %d\n", ps2if->base, irq); serio_register_port(ps2if->io); platform_set_drvdata(pdev, ps2if); return 0; - - err_unmap: - iounmap(ps2if->base); - err_free_res: - release_mem_region(ps2if->iomem_res->start, - resource_size(ps2if->iomem_res)); - err_free_mem: - kfree(ps2if); - kfree(serio); - return error; } /* @@ -163,11 +135,6 @@ static int altera_ps2_remove(struct platform_device *pdev) struct ps2if *ps2if = platform_get_drvdata(pdev); serio_unregister_port(ps2if->io); - free_irq(ps2if->irq, ps2if); - iounmap(ps2if->base); - release_mem_region(ps2if->iomem_res->start, - resource_size(ps2if->iomem_res)); - kfree(ps2if); return 0; } diff --git a/drivers/input/serio/i8042-x86ia64io.h b/drivers/input/serio/i8042-x86ia64io.h index faeeb137246..c66d1b53843 100644 --- a/drivers/input/serio/i8042-x86ia64io.h +++ b/drivers/input/serio/i8042-x86ia64io.h @@ -579,6 +579,16 @@ static const struct dmi_system_id __initconst i8042_dmi_nopnp_table[] = { }, }, { + /* + * Intel NUC D54250WYK - does not have i8042 controller but + * declares PS/2 devices in DSDT. + */ + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "D54250WYK"), + DMI_MATCH(DMI_BOARD_VENDOR, "Intel Corporation"), + }, + }, + { /* MSI Wind U-100 */ .matches = { DMI_MATCH(DMI_BOARD_NAME, "U-100"), diff --git a/drivers/input/serio/serio.c b/drivers/input/serio/serio.c index d399b8b0f00..a05a5179da3 100644 --- a/drivers/input/serio/serio.c +++ b/drivers/input/serio/serio.c @@ -514,7 +514,7 @@ static void serio_release_port(struct device *dev) */ static void serio_init_port(struct serio *serio) { - static atomic_t serio_no = ATOMIC_INIT(0); + static atomic_t serio_no = ATOMIC_INIT(-1); __module_get(THIS_MODULE); @@ -525,7 +525,7 @@ static void serio_init_port(struct serio *serio) mutex_init(&serio->drv_mutex); device_initialize(&serio->dev); dev_set_name(&serio->dev, "serio%lu", - (unsigned long)atomic_inc_return(&serio_no) - 1); + (unsigned long)atomic_inc_return(&serio_no)); serio->dev.bus = &serio_bus; serio->dev.release = serio_release_port; serio->dev.groups = serio_device_attr_groups; diff --git a/drivers/input/serio/serio_raw.c b/drivers/input/serio/serio_raw.c index c9a02fe5757..71ef5d65a0c 100644 --- a/drivers/input/serio/serio_raw.c +++ b/drivers/input/serio/serio_raw.c @@ -292,7 +292,7 @@ static irqreturn_t serio_raw_interrupt(struct serio *serio, unsigned char data, static int serio_raw_connect(struct serio *serio, struct serio_driver *drv) { - static atomic_t serio_raw_no = ATOMIC_INIT(0); + static atomic_t serio_raw_no = ATOMIC_INIT(-1); struct serio_raw *serio_raw; int err; @@ -303,7 +303,7 @@ static int serio_raw_connect(struct serio *serio, struct serio_driver *drv) } snprintf(serio_raw->name, sizeof(serio_raw->name), - "serio_raw%ld", (long)atomic_inc_return(&serio_raw_no) - 1); + "serio_raw%ld", (long)atomic_inc_return(&serio_raw_no)); kref_init(&serio_raw->kref); INIT_LIST_HEAD(&serio_raw->client_list); init_waitqueue_head(&serio_raw->wait); diff --git a/drivers/input/touchscreen/Kconfig b/drivers/input/touchscreen/Kconfig index e1d8003d01f..58917525126 100644 --- a/drivers/input/touchscreen/Kconfig +++ b/drivers/input/touchscreen/Kconfig @@ -295,6 +295,19 @@ config TOUCHSCREEN_FUJITSU To compile this driver as a module, choose M here: the module will be called fujitsu-ts. +config TOUCHSCREEN_GOODIX + tristate "Goodix I2C touchscreen" + depends on I2C && ACPI + help + Say Y here if you have the Goodix touchscreen (such as one + installed in Onda v975w tablets) connected to your + system. + + If unsure, say N. + + To compile this driver as a module, choose M here: the + module will be called goodix. + config TOUCHSCREEN_ILI210X tristate "Ilitek ILI210X based touchscreen" depends on I2C @@ -334,6 +347,18 @@ config TOUCHSCREEN_GUNZE To compile this driver as a module, choose M here: the module will be called gunze. +config TOUCHSCREEN_ELAN + tristate "Elan eKTH I2C touchscreen" + depends on I2C + help + Say Y here if you have an Elan eKTH I2C touchscreen + connected to your system. + + If unsure, say N. + + To compile this driver as a module, choose M here: the + module will be called elants_i2c. + config TOUCHSCREEN_ELO tristate "Elo serial touchscreens" select SERIO diff --git a/drivers/input/touchscreen/Makefile b/drivers/input/touchscreen/Makefile index 090e61cc917..0242fea2102 100644 --- a/drivers/input/touchscreen/Makefile +++ b/drivers/input/touchscreen/Makefile @@ -31,9 +31,11 @@ obj-$(CONFIG_TOUCHSCREEN_EDT_FT5X06) += edt-ft5x06.o obj-$(CONFIG_TOUCHSCREEN_HAMPSHIRE) += hampshire.o obj-$(CONFIG_TOUCHSCREEN_GUNZE) += gunze.o obj-$(CONFIG_TOUCHSCREEN_EETI) += eeti_ts.o +obj-$(CONFIG_TOUCHSCREEN_ELAN) += elants_i2c.o obj-$(CONFIG_TOUCHSCREEN_ELO) += elo.o obj-$(CONFIG_TOUCHSCREEN_EGALAX) += egalax_ts.o obj-$(CONFIG_TOUCHSCREEN_FUJITSU) += fujitsu_ts.o +obj-$(CONFIG_TOUCHSCREEN_GOODIX) += goodix.o obj-$(CONFIG_TOUCHSCREEN_ILI210X) += ili210x.o obj-$(CONFIG_TOUCHSCREEN_INEXIO) += inexio.o obj-$(CONFIG_TOUCHSCREEN_INTEL_MID) += intel-mid-touch.o diff --git a/drivers/input/touchscreen/ad7877.c b/drivers/input/touchscreen/ad7877.c index 523865daa1d..da4e5bb5e04 100644 --- a/drivers/input/touchscreen/ad7877.c +++ b/drivers/input/touchscreen/ad7877.c @@ -820,8 +820,7 @@ static int ad7877_remove(struct spi_device *spi) return 0; } -#ifdef CONFIG_PM_SLEEP -static int ad7877_suspend(struct device *dev) +static int __maybe_unused ad7877_suspend(struct device *dev) { struct ad7877 *ts = dev_get_drvdata(dev); @@ -830,7 +829,7 @@ static int ad7877_suspend(struct device *dev) return 0; } -static int ad7877_resume(struct device *dev) +static int __maybe_unused ad7877_resume(struct device *dev) { struct ad7877 *ts = dev_get_drvdata(dev); @@ -838,7 +837,6 @@ static int ad7877_resume(struct device *dev) return 0; } -#endif static SIMPLE_DEV_PM_OPS(ad7877_pm, ad7877_suspend, ad7877_resume); diff --git a/drivers/input/touchscreen/ad7879.c b/drivers/input/touchscreen/ad7879.c index 1eb9d3c2088..fec66ad8051 100644 --- a/drivers/input/touchscreen/ad7879.c +++ b/drivers/input/touchscreen/ad7879.c @@ -284,8 +284,7 @@ static void ad7879_close(struct input_dev* input) __ad7879_disable(ts); } -#ifdef CONFIG_PM_SLEEP -static int ad7879_suspend(struct device *dev) +static int __maybe_unused ad7879_suspend(struct device *dev) { struct ad7879 *ts = dev_get_drvdata(dev); @@ -301,7 +300,7 @@ static int ad7879_suspend(struct device *dev) return 0; } -static int ad7879_resume(struct device *dev) +static int __maybe_unused ad7879_resume(struct device *dev) { struct ad7879 *ts = dev_get_drvdata(dev); @@ -316,7 +315,6 @@ static int ad7879_resume(struct device *dev) return 0; } -#endif SIMPLE_DEV_PM_OPS(ad7879_pm_ops, ad7879_suspend, ad7879_resume); EXPORT_SYMBOL(ad7879_pm_ops); diff --git a/drivers/input/touchscreen/ads7846.c b/drivers/input/touchscreen/ads7846.c index e57ba52bf48..e4eb8a6c658 100644 --- a/drivers/input/touchscreen/ads7846.c +++ b/drivers/input/touchscreen/ads7846.c @@ -883,8 +883,7 @@ static irqreturn_t ads7846_irq(int irq, void *handle) return IRQ_HANDLED; } -#ifdef CONFIG_PM_SLEEP -static int ads7846_suspend(struct device *dev) +static int __maybe_unused ads7846_suspend(struct device *dev) { struct ads7846 *ts = dev_get_drvdata(dev); @@ -906,7 +905,7 @@ static int ads7846_suspend(struct device *dev) return 0; } -static int ads7846_resume(struct device *dev) +static int __maybe_unused ads7846_resume(struct device *dev) { struct ads7846 *ts = dev_get_drvdata(dev); @@ -927,7 +926,6 @@ static int ads7846_resume(struct device *dev) return 0; } -#endif static SIMPLE_DEV_PM_OPS(ads7846_pm, ads7846_suspend, ads7846_resume); diff --git a/drivers/input/touchscreen/atmel_mxt_ts.c b/drivers/input/touchscreen/atmel_mxt_ts.c index aaacf8bfa61..bb070206223 100644 --- a/drivers/input/touchscreen/atmel_mxt_ts.c +++ b/drivers/input/touchscreen/atmel_mxt_ts.c @@ -2244,8 +2244,7 @@ static int mxt_remove(struct i2c_client *client) return 0; } -#ifdef CONFIG_PM_SLEEP -static int mxt_suspend(struct device *dev) +static int __maybe_unused mxt_suspend(struct device *dev) { struct i2c_client *client = to_i2c_client(dev); struct mxt_data *data = i2c_get_clientdata(client); @@ -2261,7 +2260,7 @@ static int mxt_suspend(struct device *dev) return 0; } -static int mxt_resume(struct device *dev) +static int __maybe_unused mxt_resume(struct device *dev) { struct i2c_client *client = to_i2c_client(dev); struct mxt_data *data = i2c_get_clientdata(client); @@ -2276,7 +2275,6 @@ static int mxt_resume(struct device *dev) return 0; } -#endif static SIMPLE_DEV_PM_OPS(mxt_pm_ops, mxt_suspend, mxt_resume); diff --git a/drivers/input/touchscreen/auo-pixcir-ts.c b/drivers/input/touchscreen/auo-pixcir-ts.c index 7f3c9478778..40e02dd5b2f 100644 --- a/drivers/input/touchscreen/auo-pixcir-ts.c +++ b/drivers/input/touchscreen/auo-pixcir-ts.c @@ -417,8 +417,7 @@ static void auo_pixcir_input_close(struct input_dev *dev) return; } -#ifdef CONFIG_PM_SLEEP -static int auo_pixcir_suspend(struct device *dev) +static int __maybe_unused auo_pixcir_suspend(struct device *dev) { struct i2c_client *client = to_i2c_client(dev); struct auo_pixcir_ts *ts = i2c_get_clientdata(client); @@ -450,7 +449,7 @@ unlock: return ret; } -static int auo_pixcir_resume(struct device *dev) +static int __maybe_unused auo_pixcir_resume(struct device *dev) { struct i2c_client *client = to_i2c_client(dev); struct auo_pixcir_ts *ts = i2c_get_clientdata(client); @@ -479,7 +478,6 @@ unlock: return ret; } -#endif static SIMPLE_DEV_PM_OPS(auo_pixcir_pm_ops, auo_pixcir_suspend, auo_pixcir_resume); diff --git a/drivers/input/touchscreen/cy8ctmg110_ts.c b/drivers/input/touchscreen/cy8ctmg110_ts.c index 5bf1aeeea82..f2119ee0e21 100644 --- a/drivers/input/touchscreen/cy8ctmg110_ts.c +++ b/drivers/input/touchscreen/cy8ctmg110_ts.c @@ -291,8 +291,7 @@ err_free_mem: return err; } -#ifdef CONFIG_PM_SLEEP -static int cy8ctmg110_suspend(struct device *dev) +static int __maybe_unused cy8ctmg110_suspend(struct device *dev) { struct i2c_client *client = to_i2c_client(dev); struct cy8ctmg110 *ts = i2c_get_clientdata(client); @@ -306,7 +305,7 @@ static int cy8ctmg110_suspend(struct device *dev) return 0; } -static int cy8ctmg110_resume(struct device *dev) +static int __maybe_unused cy8ctmg110_resume(struct device *dev) { struct i2c_client *client = to_i2c_client(dev); struct cy8ctmg110 *ts = i2c_get_clientdata(client); @@ -319,7 +318,6 @@ static int cy8ctmg110_resume(struct device *dev) } return 0; } -#endif static SIMPLE_DEV_PM_OPS(cy8ctmg110_pm, cy8ctmg110_suspend, cy8ctmg110_resume); diff --git a/drivers/input/touchscreen/cyttsp_core.c b/drivers/input/touchscreen/cyttsp_core.c index eee656f77a2..5b74e8b84e7 100644 --- a/drivers/input/touchscreen/cyttsp_core.c +++ b/drivers/input/touchscreen/cyttsp_core.c @@ -472,8 +472,7 @@ static int cyttsp_disable(struct cyttsp *ts) return 0; } -#ifdef CONFIG_PM_SLEEP -static int cyttsp_suspend(struct device *dev) +static int __maybe_unused cyttsp_suspend(struct device *dev) { struct cyttsp *ts = dev_get_drvdata(dev); int retval = 0; @@ -491,7 +490,7 @@ static int cyttsp_suspend(struct device *dev) return retval; } -static int cyttsp_resume(struct device *dev) +static int __maybe_unused cyttsp_resume(struct device *dev) { struct cyttsp *ts = dev_get_drvdata(dev); @@ -507,8 +506,6 @@ static int cyttsp_resume(struct device *dev) return 0; } -#endif - SIMPLE_DEV_PM_OPS(cyttsp_pm_ops, cyttsp_suspend, cyttsp_resume); EXPORT_SYMBOL_GPL(cyttsp_pm_ops); diff --git a/drivers/input/touchscreen/edt-ft5x06.c b/drivers/input/touchscreen/edt-ft5x06.c index ee3434f1e94..3793fcc7e5d 100644 --- a/drivers/input/touchscreen/edt-ft5x06.c +++ b/drivers/input/touchscreen/edt-ft5x06.c @@ -1092,8 +1092,7 @@ static int edt_ft5x06_ts_remove(struct i2c_client *client) return 0; } -#ifdef CONFIG_PM_SLEEP -static int edt_ft5x06_ts_suspend(struct device *dev) +static int __maybe_unused edt_ft5x06_ts_suspend(struct device *dev) { struct i2c_client *client = to_i2c_client(dev); @@ -1103,7 +1102,7 @@ static int edt_ft5x06_ts_suspend(struct device *dev) return 0; } -static int edt_ft5x06_ts_resume(struct device *dev) +static int __maybe_unused edt_ft5x06_ts_resume(struct device *dev) { struct i2c_client *client = to_i2c_client(dev); @@ -1112,7 +1111,6 @@ static int edt_ft5x06_ts_resume(struct device *dev) return 0; } -#endif static SIMPLE_DEV_PM_OPS(edt_ft5x06_ts_pm_ops, edt_ft5x06_ts_suspend, edt_ft5x06_ts_resume); diff --git a/drivers/input/touchscreen/eeti_ts.c b/drivers/input/touchscreen/eeti_ts.c index b1884ddd7a8..09be6ced715 100644 --- a/drivers/input/touchscreen/eeti_ts.c +++ b/drivers/input/touchscreen/eeti_ts.c @@ -264,8 +264,7 @@ static int eeti_ts_remove(struct i2c_client *client) return 0; } -#ifdef CONFIG_PM_SLEEP -static int eeti_ts_suspend(struct device *dev) +static int __maybe_unused eeti_ts_suspend(struct device *dev) { struct i2c_client *client = to_i2c_client(dev); struct eeti_ts_priv *priv = i2c_get_clientdata(client); @@ -284,7 +283,7 @@ static int eeti_ts_suspend(struct device *dev) return 0; } -static int eeti_ts_resume(struct device *dev) +static int __maybe_unused eeti_ts_resume(struct device *dev) { struct i2c_client *client = to_i2c_client(dev); struct eeti_ts_priv *priv = i2c_get_clientdata(client); @@ -302,7 +301,6 @@ static int eeti_ts_resume(struct device *dev) return 0; } -#endif static SIMPLE_DEV_PM_OPS(eeti_ts_pm, eeti_ts_suspend, eeti_ts_resume); diff --git a/drivers/input/touchscreen/egalax_ts.c b/drivers/input/touchscreen/egalax_ts.c index c8057847d71..4c56299284e 100644 --- a/drivers/input/touchscreen/egalax_ts.c +++ b/drivers/input/touchscreen/egalax_ts.c @@ -239,8 +239,7 @@ static const struct i2c_device_id egalax_ts_id[] = { }; MODULE_DEVICE_TABLE(i2c, egalax_ts_id); -#ifdef CONFIG_PM_SLEEP -static int egalax_ts_suspend(struct device *dev) +static int __maybe_unused egalax_ts_suspend(struct device *dev) { static const u8 suspend_cmd[MAX_I2C_DATA_LEN] = { 0x3, 0x6, 0xa, 0x3, 0x36, 0x3f, 0x2, 0, 0, 0 @@ -252,13 +251,12 @@ static int egalax_ts_suspend(struct device *dev) return ret > 0 ? 0 : ret; } -static int egalax_ts_resume(struct device *dev) +static int __maybe_unused egalax_ts_resume(struct device *dev) { struct i2c_client *client = to_i2c_client(dev); return egalax_wake_up_device(client); } -#endif static SIMPLE_DEV_PM_OPS(egalax_ts_pm_ops, egalax_ts_suspend, egalax_ts_resume); diff --git a/drivers/input/touchscreen/elants_i2c.c b/drivers/input/touchscreen/elants_i2c.c new file mode 100644 index 00000000000..a510f7ef9b6 --- /dev/null +++ b/drivers/input/touchscreen/elants_i2c.c @@ -0,0 +1,1271 @@ +/* + * Elan Microelectronics touch panels with I2C interface + * + * Copyright (C) 2014 Elan Microelectronics Corporation. + * Scott Liu <scott.liu@emc.com.tw> + * + * This code is partly based on hid-multitouch.c: + * + * Copyright (c) 2010-2012 Stephane Chatty <chatty@enac.fr> + * Copyright (c) 2010-2012 Benjamin Tissoires <benjamin.tissoires@gmail.com> + * Copyright (c) 2010-2012 Ecole Nationale de l'Aviation Civile, France + * + * + * This code is partly based on i2c-hid.c: + * + * Copyright (c) 2012 Benjamin Tissoires <benjamin.tissoires@gmail.com> + * Copyright (c) 2012 Ecole Nationale de l'Aviation Civile, France + * Copyright (c) 2012 Red Hat, Inc + */ + +/* + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + */ + +#include <linux/module.h> +#include <linux/input.h> +#include <linux/interrupt.h> +#include <linux/platform_device.h> +#include <linux/async.h> +#include <linux/i2c.h> +#include <linux/delay.h> +#include <linux/uaccess.h> +#include <linux/buffer_head.h> +#include <linux/version.h> +#include <linux/slab.h> +#include <linux/firmware.h> +#include <linux/version.h> +#include <linux/input/mt.h> +#include <linux/acpi.h> +#include <linux/of.h> +#include <asm/unaligned.h> + +/* Device, Driver information */ +#define DEVICE_NAME "elants_i2c" +#define DRV_VERSION "1.0.9" + +/* Convert from rows or columns into resolution */ +#define ELAN_TS_RESOLUTION(n, m) (((n) - 1) * (m)) + +/* FW header data */ +#define HEADER_SIZE 4 +#define FW_HDR_TYPE 0 +#define FW_HDR_COUNT 1 +#define FW_HDR_LENGTH 2 + +/* Buffer mode Queue Header information */ +#define QUEUE_HEADER_SINGLE 0x62 +#define QUEUE_HEADER_NORMAL 0X63 +#define QUEUE_HEADER_WAIT 0x64 + +/* Command header definition */ +#define CMD_HEADER_WRITE 0x54 +#define CMD_HEADER_READ 0x53 +#define CMD_HEADER_6B_READ 0x5B +#define CMD_HEADER_RESP 0x52 +#define CMD_HEADER_6B_RESP 0x9B +#define CMD_HEADER_HELLO 0x55 +#define CMD_HEADER_REK 0x66 + +/* FW position data */ +#define PACKET_SIZE 55 +#define MAX_CONTACT_NUM 10 +#define FW_POS_HEADER 0 +#define FW_POS_STATE 1 +#define FW_POS_TOTAL 2 +#define FW_POS_XY 3 +#define FW_POS_CHECKSUM 34 +#define FW_POS_WIDTH 35 +#define FW_POS_PRESSURE 45 + +#define HEADER_REPORT_10_FINGER 0x62 + +/* Header (4 bytes) plus 3 fill 10-finger packets */ +#define MAX_PACKET_SIZE 169 + +#define BOOT_TIME_DELAY_MS 50 + +/* FW read command, 0x53 0x?? 0x0, 0x01 */ +#define E_ELAN_INFO_FW_VER 0x00 +#define E_ELAN_INFO_BC_VER 0x10 +#define E_ELAN_INFO_TEST_VER 0xE0 +#define E_ELAN_INFO_FW_ID 0xF0 +#define E_INFO_OSR 0xD6 +#define E_INFO_PHY_SCAN 0xD7 +#define E_INFO_PHY_DRIVER 0xD8 + +#define MAX_RETRIES 3 +#define MAX_FW_UPDATE_RETRIES 30 + +#define ELAN_FW_PAGESIZE 132 +#define ELAN_FW_FILENAME "elants_i2c.bin" + +/* calibration timeout definition */ +#define ELAN_CALI_TIMEOUT_MSEC 10000 + +enum elants_state { + ELAN_STATE_NORMAL, + ELAN_WAIT_QUEUE_HEADER, + ELAN_WAIT_RECALIBRATION, +}; + +enum elants_iap_mode { + ELAN_IAP_OPERATIONAL, + ELAN_IAP_RECOVERY, +}; + +/* struct elants_data - represents state of Elan touchscreen device */ +struct elants_data { + struct i2c_client *client; + struct input_dev *input; + + u16 fw_version; + u8 test_version; + u8 solution_version; + u8 bc_version; + u8 iap_version; + u16 hw_version; + unsigned int x_res; /* resolution in units/mm */ + unsigned int y_res; + unsigned int x_max; + unsigned int y_max; + + enum elants_state state; + enum elants_iap_mode iap_mode; + + /* Guards against concurrent access to the device via sysfs */ + struct mutex sysfs_mutex; + + u8 cmd_resp[HEADER_SIZE]; + struct completion cmd_done; + + u8 buf[MAX_PACKET_SIZE]; + + bool wake_irq_enabled; +}; + +static int elants_i2c_send(struct i2c_client *client, + const void *data, size_t size) +{ + int ret; + + ret = i2c_master_send(client, data, size); + if (ret == size) + return 0; + + if (ret >= 0) + ret = -EIO; + + dev_err(&client->dev, "%s failed (%*ph): %d\n", + __func__, (int)size, data, ret); + + return ret; +} + +static int elants_i2c_read(struct i2c_client *client, void *data, size_t size) +{ + int ret; + + ret = i2c_master_recv(client, data, size); + if (ret == size) + return 0; + + if (ret >= 0) + ret = -EIO; + + dev_err(&client->dev, "%s failed: %d\n", __func__, ret); + + return ret; +} + +static int elants_i2c_execute_command(struct i2c_client *client, + const u8 *cmd, size_t cmd_size, + u8 *resp, size_t resp_size) +{ + struct i2c_msg msgs[2]; + int ret; + u8 expected_response; + + switch (cmd[0]) { + case CMD_HEADER_READ: + expected_response = CMD_HEADER_RESP; + break; + + case CMD_HEADER_6B_READ: + expected_response = CMD_HEADER_6B_RESP; + break; + + default: + dev_err(&client->dev, "%s: invalid command %*ph\n", + __func__, (int)cmd_size, cmd); + return -EINVAL; + } + + msgs[0].addr = client->addr; + msgs[0].flags = client->flags & I2C_M_TEN; + msgs[0].len = cmd_size; + msgs[0].buf = (u8 *)cmd; + + msgs[1].addr = client->addr; + msgs[1].flags = client->flags & I2C_M_TEN; + msgs[1].flags |= I2C_M_RD; + msgs[1].len = resp_size; + msgs[1].buf = resp; + + ret = i2c_transfer(client->adapter, msgs, ARRAY_SIZE(msgs)); + if (ret < 0) + return ret; + + if (ret != ARRAY_SIZE(msgs) || resp[FW_HDR_TYPE] != expected_response) + return -EIO; + + return 0; +} + +static int elants_i2c_calibrate(struct elants_data *ts) +{ + struct i2c_client *client = ts->client; + int ret, error; + static const u8 w_flashkey[] = { 0x54, 0xC0, 0xE1, 0x5A }; + static const u8 rek[] = { 0x54, 0x29, 0x00, 0x01 }; + static const u8 rek_resp[] = { CMD_HEADER_REK, 0x66, 0x66, 0x66 }; + + disable_irq(client->irq); + + ts->state = ELAN_WAIT_RECALIBRATION; + reinit_completion(&ts->cmd_done); + + elants_i2c_send(client, w_flashkey, sizeof(w_flashkey)); + elants_i2c_send(client, rek, sizeof(rek)); + + enable_irq(client->irq); + + ret = wait_for_completion_interruptible_timeout(&ts->cmd_done, + msecs_to_jiffies(ELAN_CALI_TIMEOUT_MSEC)); + + ts->state = ELAN_STATE_NORMAL; + + if (ret <= 0) { + error = ret < 0 ? ret : -ETIMEDOUT; + dev_err(&client->dev, + "error while waiting for calibration to complete: %d\n", + error); + return error; + } + + if (memcmp(rek_resp, ts->cmd_resp, sizeof(rek_resp))) { + dev_err(&client->dev, + "unexpected calibration response: %*ph\n", + (int)sizeof(ts->cmd_resp), ts->cmd_resp); + return -EINVAL; + } + + return 0; +} + +static int elants_i2c_sw_reset(struct i2c_client *client) +{ + const u8 soft_rst_cmd[] = { 0x77, 0x77, 0x77, 0x77 }; + int error; + + error = elants_i2c_send(client, soft_rst_cmd, + sizeof(soft_rst_cmd)); + if (error) { + dev_err(&client->dev, "software reset failed: %d\n", error); + return error; + } + + /* + * We should wait at least 10 msec (but no more than 40) before + * sending fastboot or IAP command to the device. + */ + msleep(30); + + return 0; +} + +static u16 elants_i2c_parse_version(u8 *buf) +{ + return get_unaligned_be32(buf) >> 4; +} + +static int elants_i2c_query_fw_id(struct elants_data *ts) +{ + struct i2c_client *client = ts->client; + int error, retry_cnt; + const u8 cmd[] = { CMD_HEADER_READ, E_ELAN_INFO_FW_ID, 0x00, 0x01 }; + u8 resp[HEADER_SIZE]; + + for (retry_cnt = 0; retry_cnt < MAX_RETRIES; retry_cnt++) { + error = elants_i2c_execute_command(client, cmd, sizeof(cmd), + resp, sizeof(resp)); + if (!error) { + ts->hw_version = elants_i2c_parse_version(resp); + if (ts->hw_version != 0xffff) + return 0; + } + + dev_dbg(&client->dev, "read fw id error=%d, buf=%*phC\n", + error, (int)sizeof(resp), resp); + } + + dev_err(&client->dev, + "Failed to read fw id or fw id is invalid\n"); + + return -EINVAL; +} + +static int elants_i2c_query_fw_version(struct elants_data *ts) +{ + struct i2c_client *client = ts->client; + int error, retry_cnt; + const u8 cmd[] = { CMD_HEADER_READ, E_ELAN_INFO_FW_VER, 0x00, 0x01 }; + u8 resp[HEADER_SIZE]; + + for (retry_cnt = 0; retry_cnt < MAX_RETRIES; retry_cnt++) { + error = elants_i2c_execute_command(client, cmd, sizeof(cmd), + resp, sizeof(resp)); + if (!error) { + ts->fw_version = elants_i2c_parse_version(resp); + if (ts->fw_version != 0x0000 && + ts->fw_version != 0xffff) + return 0; + } + + dev_dbg(&client->dev, "read fw version error=%d, buf=%*phC\n", + error, (int)sizeof(resp), resp); + } + + dev_err(&client->dev, + "Failed to read fw version or fw version is invalid\n"); + + return -EINVAL; +} + +static int elants_i2c_query_test_version(struct elants_data *ts) +{ + struct i2c_client *client = ts->client; + int error, retry_cnt; + u16 version; + const u8 cmd[] = { CMD_HEADER_READ, E_ELAN_INFO_TEST_VER, 0x00, 0x01 }; + u8 resp[HEADER_SIZE]; + + for (retry_cnt = 0; retry_cnt < MAX_RETRIES; retry_cnt++) { + error = elants_i2c_execute_command(client, cmd, sizeof(cmd), + resp, sizeof(resp)); + if (!error) { + version = elants_i2c_parse_version(resp); + ts->test_version = version >> 8; + ts->solution_version = version & 0xff; + + return 0; + } + + dev_dbg(&client->dev, + "read test version error rc=%d, buf=%*phC\n", + error, (int)sizeof(resp), resp); + } + + dev_err(&client->dev, "Failed to read test version\n"); + + return -EINVAL; +} + +static int elants_i2c_query_bc_version(struct elants_data *ts) +{ + struct i2c_client *client = ts->client; + const u8 cmd[] = { CMD_HEADER_READ, E_ELAN_INFO_BC_VER, 0x00, 0x01 }; + u8 resp[HEADER_SIZE]; + u16 version; + int error; + + error = elants_i2c_execute_command(client, cmd, sizeof(cmd), + resp, sizeof(resp)); + if (error) { + dev_err(&client->dev, + "read BC version error=%d, buf=%*phC\n", + error, (int)sizeof(resp), resp); + return error; + } + + version = elants_i2c_parse_version(resp); + ts->bc_version = version >> 8; + ts->iap_version = version & 0xff; + + return 0; +} + +static int elants_i2c_query_ts_info(struct elants_data *ts) +{ + struct i2c_client *client = ts->client; + int error; + u8 resp[17]; + u16 phy_x, phy_y, rows, cols, osr; + const u8 get_resolution_cmd[] = { + CMD_HEADER_6B_READ, 0x00, 0x00, 0x00, 0x00, 0x00 + }; + const u8 get_osr_cmd[] = { + CMD_HEADER_READ, E_INFO_OSR, 0x00, 0x01 + }; + const u8 get_physical_scan_cmd[] = { + CMD_HEADER_READ, E_INFO_PHY_SCAN, 0x00, 0x01 + }; + const u8 get_physical_drive_cmd[] = { + CMD_HEADER_READ, E_INFO_PHY_DRIVER, 0x00, 0x01 + }; + + /* Get trace number */ + error = elants_i2c_execute_command(client, + get_resolution_cmd, + sizeof(get_resolution_cmd), + resp, sizeof(resp)); + if (error) { + dev_err(&client->dev, "get resolution command failed: %d\n", + error); + return error; + } + + rows = resp[2] + resp[6] + resp[10]; + cols = resp[3] + resp[7] + resp[11]; + + /* Process mm_to_pixel information */ + error = elants_i2c_execute_command(client, + get_osr_cmd, sizeof(get_osr_cmd), + resp, sizeof(resp)); + if (error) { + dev_err(&client->dev, "get osr command failed: %d\n", + error); + return error; + } + + osr = resp[3]; + + error = elants_i2c_execute_command(client, + get_physical_scan_cmd, + sizeof(get_physical_scan_cmd), + resp, sizeof(resp)); + if (error) { + dev_err(&client->dev, "get physical scan command failed: %d\n", + error); + return error; + } + + phy_x = get_unaligned_be16(&resp[2]); + + error = elants_i2c_execute_command(client, + get_physical_drive_cmd, + sizeof(get_physical_drive_cmd), + resp, sizeof(resp)); + if (error) { + dev_err(&client->dev, "get physical drive command failed: %d\n", + error); + return error; + } + + phy_y = get_unaligned_be16(&resp[2]); + + dev_dbg(&client->dev, "phy_x=%d, phy_y=%d\n", phy_x, phy_y); + + if (rows == 0 || cols == 0 || osr == 0) { + dev_warn(&client->dev, + "invalid trace number data: %d, %d, %d\n", + rows, cols, osr); + } else { + /* translate trace number to TS resolution */ + ts->x_max = ELAN_TS_RESOLUTION(rows, osr); + ts->x_res = DIV_ROUND_CLOSEST(ts->x_max, phy_x); + ts->y_max = ELAN_TS_RESOLUTION(cols, osr); + ts->y_res = DIV_ROUND_CLOSEST(ts->y_max, phy_y); + } + + return 0; +} + +static int elants_i2c_fastboot(struct i2c_client *client) +{ + const u8 boot_cmd[] = { 0x4D, 0x61, 0x69, 0x6E }; + int error; + + error = elants_i2c_send(client, boot_cmd, sizeof(boot_cmd)); + if (error) { + dev_err(&client->dev, "boot failed: %d\n", error); + return error; + } + + dev_dbg(&client->dev, "boot success -- 0x%x\n", client->addr); + return 0; +} + +static int elants_i2c_initialize(struct elants_data *ts) +{ + struct i2c_client *client = ts->client; + int error, retry_cnt; + const u8 hello_packet[] = { 0x55, 0x55, 0x55, 0x55 }; + const u8 recov_packet[] = { 0x55, 0x55, 0x80, 0x80 }; + u8 buf[HEADER_SIZE]; + + for (retry_cnt = 0; retry_cnt < MAX_RETRIES; retry_cnt++) { + error = elants_i2c_sw_reset(client); + if (error) { + /* Continue initializing if it's the last try */ + if (retry_cnt < MAX_RETRIES - 1) + continue; + } + + error = elants_i2c_fastboot(client); + if (error) { + /* Continue initializing if it's the last try */ + if (retry_cnt < MAX_RETRIES - 1) + continue; + } + + /* Wait for Hello packet */ + msleep(BOOT_TIME_DELAY_MS); + + error = elants_i2c_read(client, buf, sizeof(buf)); + if (error) { + dev_err(&client->dev, + "failed to read 'hello' packet: %d\n", error); + } else if (!memcmp(buf, hello_packet, sizeof(hello_packet))) { + ts->iap_mode = ELAN_IAP_OPERATIONAL; + break; + } else if (!memcmp(buf, recov_packet, sizeof(recov_packet))) { + /* + * Setting error code will mark device + * in recovery mode below. + */ + error = -EIO; + break; + } else { + error = -EINVAL; + dev_err(&client->dev, + "invalid 'hello' packet: %*ph\n", + (int)sizeof(buf), buf); + } + } + + if (!error) + error = elants_i2c_query_fw_id(ts); + if (!error) + error = elants_i2c_query_fw_version(ts); + + if (error) { + ts->iap_mode = ELAN_IAP_RECOVERY; + } else { + elants_i2c_query_test_version(ts); + elants_i2c_query_bc_version(ts); + elants_i2c_query_ts_info(ts); + } + + return 0; +} + +/* + * Firmware update interface. + */ + +static int elants_i2c_fw_write_page(struct i2c_client *client, + const void *page) +{ + const u8 ack_ok[] = { 0xaa, 0xaa }; + u8 buf[2]; + int retry; + int error; + + for (retry = 0; retry < MAX_FW_UPDATE_RETRIES; retry++) { + error = elants_i2c_send(client, page, ELAN_FW_PAGESIZE); + if (error) { + dev_err(&client->dev, + "IAP Write Page failed: %d\n", error); + continue; + } + + error = elants_i2c_read(client, buf, 2); + if (error) { + dev_err(&client->dev, + "IAP Ack read failed: %d\n", error); + return error; + } + + if (!memcmp(buf, ack_ok, sizeof(ack_ok))) + return 0; + + error = -EIO; + dev_err(&client->dev, + "IAP Get Ack Error [%02x:%02x]\n", + buf[0], buf[1]); + } + + return error; +} + +static int elants_i2c_do_update_firmware(struct i2c_client *client, + const struct firmware *fw, + bool force) +{ + const u8 enter_iap[] = { 0x45, 0x49, 0x41, 0x50 }; + const u8 enter_iap2[] = { 0x54, 0x00, 0x12, 0x34 }; + const u8 iap_ack[] = { 0x55, 0xaa, 0x33, 0xcc }; + u8 buf[HEADER_SIZE]; + u16 send_id; + int page, n_fw_pages; + int error; + + /* Recovery mode detection! */ + if (force) { + dev_dbg(&client->dev, "Recovery mode procedure\n"); + error = elants_i2c_send(client, enter_iap2, sizeof(enter_iap2)); + } else { + /* Start IAP Procedure */ + dev_dbg(&client->dev, "Normal IAP procedure\n"); + elants_i2c_sw_reset(client); + + error = elants_i2c_send(client, enter_iap, sizeof(enter_iap)); + } + + if (error) { + dev_err(&client->dev, "failed to enter IAP mode: %d\n", error); + return error; + } + + msleep(20); + + /* check IAP state */ + error = elants_i2c_read(client, buf, 4); + if (error) { + dev_err(&client->dev, + "failed to read IAP acknowledgement: %d\n", + error); + return error; + } + + if (memcmp(buf, iap_ack, sizeof(iap_ack))) { + dev_err(&client->dev, + "failed to enter IAP: %*ph (expected %*ph)\n", + (int)sizeof(buf), buf, (int)sizeof(iap_ack), iap_ack); + return -EIO; + } + + dev_info(&client->dev, "successfully entered IAP mode"); + + send_id = client->addr; + error = elants_i2c_send(client, &send_id, 1); + if (error) { + dev_err(&client->dev, "sending dummy byte failed: %d\n", + error); + return error; + } + + /* Clear the last page of Master */ + error = elants_i2c_send(client, fw->data, ELAN_FW_PAGESIZE); + if (error) { + dev_err(&client->dev, "clearing of the last page failed: %d\n", + error); + return error; + } + + error = elants_i2c_read(client, buf, 2); + if (error) { + dev_err(&client->dev, + "failed to read ACK for clearing the last page: %d\n", + error); + return error; + } + + n_fw_pages = fw->size / ELAN_FW_PAGESIZE; + dev_dbg(&client->dev, "IAP Pages = %d\n", n_fw_pages); + + for (page = 0; page < n_fw_pages; page++) { + error = elants_i2c_fw_write_page(client, + fw->data + page * ELAN_FW_PAGESIZE); + if (error) { + dev_err(&client->dev, + "failed to write FW page %d: %d\n", + page, error); + return error; + } + } + + /* Old iap needs to wait 200ms for WDT and rest is for hello packets */ + msleep(300); + + dev_info(&client->dev, "firmware update completed\n"); + return 0; +} + +static int elants_i2c_fw_update(struct elants_data *ts) +{ + struct i2c_client *client = ts->client; + const struct firmware *fw; + int error; + + error = request_firmware(&fw, ELAN_FW_FILENAME, &client->dev); + if (error) { + dev_err(&client->dev, "failed to request firmware %s: %d\n", + ELAN_FW_FILENAME, error); + return error; + } + + if (fw->size % ELAN_FW_PAGESIZE) { + dev_err(&client->dev, "invalid firmware length: %zu\n", + fw->size); + error = -EINVAL; + goto out; + } + + disable_irq(client->irq); + + error = elants_i2c_do_update_firmware(client, fw, + ts->iap_mode == ELAN_IAP_RECOVERY); + if (error) { + dev_err(&client->dev, "firmware update failed: %d\n", error); + ts->iap_mode = ELAN_IAP_RECOVERY; + goto out_enable_irq; + } + + error = elants_i2c_initialize(ts); + if (error) { + dev_err(&client->dev, + "failed to initialize device after firmware update: %d\n", + error); + ts->iap_mode = ELAN_IAP_RECOVERY; + goto out_enable_irq; + } + + ts->iap_mode = ELAN_IAP_OPERATIONAL; + +out_enable_irq: + ts->state = ELAN_STATE_NORMAL; + enable_irq(client->irq); + msleep(100); + + if (!error) + elants_i2c_calibrate(ts); +out: + release_firmware(fw); + return error; +} + +/* + * Event reporting. + */ + +static void elants_i2c_mt_event(struct elants_data *ts, u8 *buf) +{ + struct input_dev *input = ts->input; + unsigned int n_fingers; + u16 finger_state; + int i; + + n_fingers = buf[FW_POS_STATE + 1] & 0x0f; + finger_state = ((buf[FW_POS_STATE + 1] & 0x30) << 4) | + buf[FW_POS_STATE]; + + dev_dbg(&ts->client->dev, + "n_fingers: %u, state: %04x\n", n_fingers, finger_state); + + for (i = 0; i < MAX_CONTACT_NUM && n_fingers; i++) { + if (finger_state & 1) { + unsigned int x, y, p, w; + u8 *pos; + + pos = &buf[FW_POS_XY + i * 3]; + x = (((u16)pos[0] & 0xf0) << 4) | pos[1]; + y = (((u16)pos[0] & 0x0f) << 8) | pos[2]; + p = buf[FW_POS_PRESSURE + i]; + w = buf[FW_POS_WIDTH + i]; + + dev_dbg(&ts->client->dev, "i=%d x=%d y=%d p=%d w=%d\n", + i, x, y, p, w); + + input_mt_slot(input, i); + input_mt_report_slot_state(input, MT_TOOL_FINGER, true); + input_event(input, EV_ABS, ABS_MT_POSITION_X, x); + input_event(input, EV_ABS, ABS_MT_POSITION_Y, y); + input_event(input, EV_ABS, ABS_MT_PRESSURE, p); + input_event(input, EV_ABS, ABS_MT_TOUCH_MAJOR, w); + + n_fingers--; + } + + finger_state >>= 1; + } + + input_mt_sync_frame(input); + input_sync(input); +} + +static u8 elants_i2c_calculate_checksum(u8 *buf) +{ + u8 checksum = 0; + u8 i; + + for (i = 0; i < FW_POS_CHECKSUM; i++) + checksum += buf[i]; + + return checksum; +} + +static void elants_i2c_event(struct elants_data *ts, u8 *buf) +{ + u8 checksum = elants_i2c_calculate_checksum(buf); + + if (unlikely(buf[FW_POS_CHECKSUM] != checksum)) + dev_warn(&ts->client->dev, + "%s: invalid checksum for packet %02x: %02x vs. %02x\n", + __func__, buf[FW_POS_HEADER], + checksum, buf[FW_POS_CHECKSUM]); + else if (unlikely(buf[FW_POS_HEADER] != HEADER_REPORT_10_FINGER)) + dev_warn(&ts->client->dev, + "%s: unknown packet type: %02x\n", + __func__, buf[FW_POS_HEADER]); + else + elants_i2c_mt_event(ts, buf); +} + +static irqreturn_t elants_i2c_irq(int irq, void *_dev) +{ + const u8 wait_packet[] = { 0x64, 0x64, 0x64, 0x64 }; + struct elants_data *ts = _dev; + struct i2c_client *client = ts->client; + int report_count, report_len; + int i; + int len; + + len = i2c_master_recv(client, ts->buf, sizeof(ts->buf)); + if (len < 0) { + dev_err(&client->dev, "%s: failed to read data: %d\n", + __func__, len); + goto out; + } + + dev_dbg(&client->dev, "%s: packet %*ph\n", + __func__, HEADER_SIZE, ts->buf); + + switch (ts->state) { + case ELAN_WAIT_RECALIBRATION: + if (ts->buf[FW_HDR_TYPE] == CMD_HEADER_REK) { + memcpy(ts->cmd_resp, ts->buf, sizeof(ts->cmd_resp)); + complete(&ts->cmd_done); + ts->state = ELAN_STATE_NORMAL; + } + break; + + case ELAN_WAIT_QUEUE_HEADER: + if (ts->buf[FW_HDR_TYPE] != QUEUE_HEADER_NORMAL) + break; + + ts->state = ELAN_STATE_NORMAL; + /* fall through */ + + case ELAN_STATE_NORMAL: + + switch (ts->buf[FW_HDR_TYPE]) { + case CMD_HEADER_HELLO: + case CMD_HEADER_RESP: + case CMD_HEADER_REK: + break; + + case QUEUE_HEADER_WAIT: + if (memcmp(ts->buf, wait_packet, sizeof(wait_packet))) { + dev_err(&client->dev, + "invalid wait packet %*ph\n", + HEADER_SIZE, ts->buf); + } else { + ts->state = ELAN_WAIT_QUEUE_HEADER; + udelay(30); + } + break; + + case QUEUE_HEADER_SINGLE: + elants_i2c_event(ts, &ts->buf[HEADER_SIZE]); + break; + + case QUEUE_HEADER_NORMAL: + report_count = ts->buf[FW_HDR_COUNT]; + if (report_count > 3) { + dev_err(&client->dev, + "too large report count: %*ph\n", + HEADER_SIZE, ts->buf); + break; + } + + report_len = ts->buf[FW_HDR_LENGTH] / report_count; + if (report_len != PACKET_SIZE) { + dev_err(&client->dev, + "mismatching report length: %*ph\n", + HEADER_SIZE, ts->buf); + break; + } + + for (i = 0; i < report_count; i++) { + u8 *buf = ts->buf + HEADER_SIZE + + i * PACKET_SIZE; + elants_i2c_event(ts, buf); + } + break; + + default: + dev_err(&client->dev, "unknown packet %*ph\n", + HEADER_SIZE, ts->buf); + break; + } + break; + } + +out: + return IRQ_HANDLED; +} + +/* + * sysfs interface + */ +static ssize_t calibrate_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct i2c_client *client = to_i2c_client(dev); + struct elants_data *ts = i2c_get_clientdata(client); + int error; + + error = mutex_lock_interruptible(&ts->sysfs_mutex); + if (error) + return error; + + error = elants_i2c_calibrate(ts); + + mutex_unlock(&ts->sysfs_mutex); + return error ?: count; +} + +static ssize_t write_update_fw(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct i2c_client *client = to_i2c_client(dev); + struct elants_data *ts = i2c_get_clientdata(client); + int error; + + error = mutex_lock_interruptible(&ts->sysfs_mutex); + if (error) + return error; + + error = elants_i2c_fw_update(ts); + dev_dbg(dev, "firmware update result: %d\n", error); + + mutex_unlock(&ts->sysfs_mutex); + return error ?: count; +} + +static ssize_t show_iap_mode(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct i2c_client *client = to_i2c_client(dev); + struct elants_data *ts = i2c_get_clientdata(client); + + return sprintf(buf, "%s\n", + ts->iap_mode == ELAN_IAP_OPERATIONAL ? + "Normal" : "Recovery"); +} + +static DEVICE_ATTR(calibrate, S_IWUSR, NULL, calibrate_store); +static DEVICE_ATTR(iap_mode, S_IRUGO, show_iap_mode, NULL); +static DEVICE_ATTR(update_fw, S_IWUSR, NULL, write_update_fw); + +struct elants_version_attribute { + struct device_attribute dattr; + size_t field_offset; + size_t field_size; +}; + +#define __ELANTS_FIELD_SIZE(_field) \ + sizeof(((struct elants_data *)NULL)->_field) +#define __ELANTS_VERIFY_SIZE(_field) \ + (BUILD_BUG_ON_ZERO(__ELANTS_FIELD_SIZE(_field) > 2) + \ + __ELANTS_FIELD_SIZE(_field)) +#define ELANTS_VERSION_ATTR(_field) \ + struct elants_version_attribute elants_ver_attr_##_field = { \ + .dattr = __ATTR(_field, S_IRUGO, \ + elants_version_attribute_show, NULL), \ + .field_offset = offsetof(struct elants_data, _field), \ + .field_size = __ELANTS_VERIFY_SIZE(_field), \ + } + +static ssize_t elants_version_attribute_show(struct device *dev, + struct device_attribute *dattr, + char *buf) +{ + struct i2c_client *client = to_i2c_client(dev); + struct elants_data *ts = i2c_get_clientdata(client); + struct elants_version_attribute *attr = + container_of(dattr, struct elants_version_attribute, dattr); + u8 *field = (u8 *)((char *)ts + attr->field_offset); + unsigned int fmt_size; + unsigned int val; + + if (attr->field_size == 1) { + val = *field; + fmt_size = 2; /* 2 HEX digits */ + } else { + val = *(u16 *)field; + fmt_size = 4; /* 4 HEX digits */ + } + + return sprintf(buf, "%0*x\n", fmt_size, val); +} + +static ELANTS_VERSION_ATTR(fw_version); +static ELANTS_VERSION_ATTR(hw_version); +static ELANTS_VERSION_ATTR(test_version); +static ELANTS_VERSION_ATTR(solution_version); +static ELANTS_VERSION_ATTR(bc_version); +static ELANTS_VERSION_ATTR(iap_version); + +static struct attribute *elants_attributes[] = { + &dev_attr_calibrate.attr, + &dev_attr_update_fw.attr, + &dev_attr_iap_mode.attr, + + &elants_ver_attr_fw_version.dattr.attr, + &elants_ver_attr_hw_version.dattr.attr, + &elants_ver_attr_test_version.dattr.attr, + &elants_ver_attr_solution_version.dattr.attr, + &elants_ver_attr_bc_version.dattr.attr, + &elants_ver_attr_iap_version.dattr.attr, + NULL +}; + +static struct attribute_group elants_attribute_group = { + .attrs = elants_attributes, +}; + +static void elants_i2c_remove_sysfs_group(void *_data) +{ + struct elants_data *ts = _data; + + sysfs_remove_group(&ts->client->dev.kobj, &elants_attribute_group); +} + +static int elants_i2c_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + union i2c_smbus_data dummy; + struct elants_data *ts; + unsigned long irqflags; + int error; + + if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) { + dev_err(&client->dev, + "%s: i2c check functionality error\n", DEVICE_NAME); + return -ENXIO; + } + + /* Make sure there is something at this address */ + if (i2c_smbus_xfer(client->adapter, client->addr, 0, + I2C_SMBUS_READ, 0, I2C_SMBUS_BYTE, &dummy) < 0) { + dev_err(&client->dev, "nothing at this address\n"); + return -ENXIO; + } + + ts = devm_kzalloc(&client->dev, sizeof(struct elants_data), GFP_KERNEL); + if (!ts) + return -ENOMEM; + + mutex_init(&ts->sysfs_mutex); + init_completion(&ts->cmd_done); + + ts->client = client; + i2c_set_clientdata(client, ts); + + error = elants_i2c_initialize(ts); + if (error) { + dev_err(&client->dev, "failed to initialize: %d\n", error); + return error; + } + + ts->input = devm_input_allocate_device(&client->dev); + if (!ts->input) { + dev_err(&client->dev, "Failed to allocate input device\n"); + return -ENOMEM; + } + + ts->input->name = "Elan Touchscreen"; + ts->input->id.bustype = BUS_I2C; + + __set_bit(BTN_TOUCH, ts->input->keybit); + __set_bit(EV_ABS, ts->input->evbit); + __set_bit(EV_KEY, ts->input->evbit); + + /* Single touch input params setup */ + input_set_abs_params(ts->input, ABS_X, 0, ts->x_max, 0, 0); + input_set_abs_params(ts->input, ABS_Y, 0, ts->y_max, 0, 0); + input_set_abs_params(ts->input, ABS_PRESSURE, 0, 255, 0, 0); + input_abs_set_res(ts->input, ABS_X, ts->x_res); + input_abs_set_res(ts->input, ABS_Y, ts->y_res); + + /* Multitouch input params setup */ + error = input_mt_init_slots(ts->input, MAX_CONTACT_NUM, + INPUT_MT_DIRECT | INPUT_MT_DROP_UNUSED); + if (error) { + dev_err(&client->dev, + "failed to initialize MT slots: %d\n", error); + return error; + } + + input_set_abs_params(ts->input, ABS_MT_POSITION_X, 0, ts->x_max, 0, 0); + input_set_abs_params(ts->input, ABS_MT_POSITION_Y, 0, ts->y_max, 0, 0); + input_set_abs_params(ts->input, ABS_MT_TOUCH_MAJOR, 0, 255, 0, 0); + input_set_abs_params(ts->input, ABS_MT_PRESSURE, 0, 255, 0, 0); + input_abs_set_res(ts->input, ABS_MT_POSITION_X, ts->x_res); + input_abs_set_res(ts->input, ABS_MT_POSITION_Y, ts->y_res); + + input_set_drvdata(ts->input, ts); + + error = input_register_device(ts->input); + if (error) { + dev_err(&client->dev, + "unable to register input device: %d\n", error); + return error; + } + + /* + * Systems using device tree should set up interrupt via DTS, + * the rest will use the default falling edge interrupts. + */ + irqflags = client->dev.of_node ? 0 : IRQF_TRIGGER_FALLING; + + error = devm_request_threaded_irq(&client->dev, client->irq, + NULL, elants_i2c_irq, + irqflags | IRQF_ONESHOT, + client->name, ts); + if (error) { + dev_err(&client->dev, "Failed to register interrupt\n"); + return error; + } + + /* + * Systems using device tree should set up wakeup via DTS, + * the rest will configure device as wakeup source by default. + */ + if (!client->dev.of_node) + device_init_wakeup(&client->dev, true); + + error = sysfs_create_group(&client->dev.kobj, &elants_attribute_group); + if (error) { + dev_err(&client->dev, "failed to create sysfs attributes: %d\n", + error); + return error; + } + + error = devm_add_action(&client->dev, + elants_i2c_remove_sysfs_group, ts); + if (error) { + elants_i2c_remove_sysfs_group(ts); + dev_err(&client->dev, + "Failed to add sysfs cleanup action: %d\n", + error); + return error; + } + + return 0; +} + +static int __maybe_unused elants_i2c_suspend(struct device *dev) +{ + struct i2c_client *client = to_i2c_client(dev); + struct elants_data *ts = i2c_get_clientdata(client); + const u8 set_sleep_cmd[] = { 0x54, 0x50, 0x00, 0x01 }; + int retry_cnt; + int error; + + /* Command not support in IAP recovery mode */ + if (ts->iap_mode != ELAN_IAP_OPERATIONAL) + return -EBUSY; + + disable_irq(client->irq); + + for (retry_cnt = 0; retry_cnt < MAX_RETRIES; retry_cnt++) { + error = elants_i2c_send(client, set_sleep_cmd, + sizeof(set_sleep_cmd)); + if (!error) + break; + + dev_err(&client->dev, "suspend command failed: %d\n", error); + } + + if (device_may_wakeup(dev)) + ts->wake_irq_enabled = (enable_irq_wake(client->irq) == 0); + + return 0; +} + +static int __maybe_unused elants_i2c_resume(struct device *dev) +{ + struct i2c_client *client = to_i2c_client(dev); + struct elants_data *ts = i2c_get_clientdata(client); + const u8 set_active_cmd[] = { 0x54, 0x58, 0x00, 0x01 }; + int retry_cnt; + int error; + + if (device_may_wakeup(dev) && ts->wake_irq_enabled) + disable_irq_wake(client->irq); + + for (retry_cnt = 0; retry_cnt < MAX_RETRIES; retry_cnt++) { + error = elants_i2c_send(client, set_active_cmd, + sizeof(set_active_cmd)); + if (!error) + break; + + dev_err(&client->dev, "resume command failed: %d\n", error); + } + + ts->state = ELAN_STATE_NORMAL; + enable_irq(client->irq); + + return 0; +} + +static SIMPLE_DEV_PM_OPS(elants_i2c_pm_ops, + elants_i2c_suspend, elants_i2c_resume); + +static const struct i2c_device_id elants_i2c_id[] = { + { DEVICE_NAME, 0 }, + { } +}; +MODULE_DEVICE_TABLE(i2c, elants_i2c_id); + +#ifdef CONFIG_ACPI +static const struct acpi_device_id elants_acpi_id[] = { + { "ELAN0001", 0 }, + { } +}; +MODULE_DEVICE_TABLE(acpi, elants_acpi_id); +#endif + +#ifdef CONFIG_OF +static const struct of_device_id elants_of_match[] = { + { .compatible = "elan,ekth3500" }, + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(of, elants_of_match); +#endif + +static struct i2c_driver elants_i2c_driver = { + .probe = elants_i2c_probe, + .id_table = elants_i2c_id, + .driver = { + .name = DEVICE_NAME, + .owner = THIS_MODULE, + .pm = &elants_i2c_pm_ops, + .acpi_match_table = ACPI_PTR(elants_acpi_id), + .of_match_table = of_match_ptr(elants_of_match), + }, +}; +module_i2c_driver(elants_i2c_driver); + +MODULE_AUTHOR("Scott Liu <scott.liu@emc.com.tw>"); +MODULE_DESCRIPTION("Elan I2c Touchscreen driver"); +MODULE_VERSION(DRV_VERSION); +MODULE_LICENSE("GPL"); diff --git a/drivers/input/touchscreen/goodix.c b/drivers/input/touchscreen/goodix.c new file mode 100644 index 00000000000..ca196689f02 --- /dev/null +++ b/drivers/input/touchscreen/goodix.c @@ -0,0 +1,395 @@ +/* + * Driver for Goodix Touchscreens + * + * Copyright (c) 2014 Red Hat Inc. + * + * This code is based on gt9xx.c authored by andrew@goodix.com: + * + * 2010 - 2012 Goodix Technology. + */ + +/* + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; version 2 of the License. + */ + +#include <linux/kernel.h> +#include <linux/i2c.h> +#include <linux/input.h> +#include <linux/input/mt.h> +#include <linux/module.h> +#include <linux/delay.h> +#include <linux/irq.h> +#include <linux/interrupt.h> +#include <linux/slab.h> +#include <asm/unaligned.h> + +struct goodix_ts_data { + struct i2c_client *client; + struct input_dev *input_dev; + int abs_x_max; + int abs_y_max; + unsigned int max_touch_num; + unsigned int int_trigger_type; +}; + +#define GOODIX_MAX_HEIGHT 4096 +#define GOODIX_MAX_WIDTH 4096 +#define GOODIX_INT_TRIGGER 1 +#define GOODIX_CONTACT_SIZE 8 +#define GOODIX_MAX_CONTACTS 10 + +#define GOODIX_CONFIG_MAX_LENGTH 240 + +/* Register defines */ +#define GOODIX_READ_COOR_ADDR 0x814E +#define GOODIX_REG_CONFIG_DATA 0x8047 +#define GOODIX_REG_VERSION 0x8140 + +#define RESOLUTION_LOC 1 +#define TRIGGER_LOC 6 + +static const unsigned long goodix_irq_flags[] = { + IRQ_TYPE_EDGE_RISING, + IRQ_TYPE_EDGE_FALLING, + IRQ_TYPE_LEVEL_LOW, + IRQ_TYPE_LEVEL_HIGH, +}; + +/** + * goodix_i2c_read - read data from a register of the i2c slave device. + * + * @client: i2c device. + * @reg: the register to read from. + * @buf: raw write data buffer. + * @len: length of the buffer to write + */ +static int goodix_i2c_read(struct i2c_client *client, + u16 reg, u8 *buf, int len) +{ + struct i2c_msg msgs[2]; + u16 wbuf = cpu_to_be16(reg); + int ret; + + msgs[0].flags = 0; + msgs[0].addr = client->addr; + msgs[0].len = 2; + msgs[0].buf = (u8 *) &wbuf; + + msgs[1].flags = I2C_M_RD; + msgs[1].addr = client->addr; + msgs[1].len = len; + msgs[1].buf = buf; + + ret = i2c_transfer(client->adapter, msgs, 2); + return ret < 0 ? ret : (ret != ARRAY_SIZE(msgs) ? -EIO : 0); +} + +static int goodix_ts_read_input_report(struct goodix_ts_data *ts, u8 *data) +{ + int touch_num; + int error; + + error = goodix_i2c_read(ts->client, GOODIX_READ_COOR_ADDR, data, + GOODIX_CONTACT_SIZE + 1); + if (error) { + dev_err(&ts->client->dev, "I2C transfer error: %d\n", error); + return error; + } + + touch_num = data[0] & 0x0f; + if (touch_num > GOODIX_MAX_CONTACTS) + return -EPROTO; + + if (touch_num > 1) { + data += 1 + GOODIX_CONTACT_SIZE; + error = goodix_i2c_read(ts->client, + GOODIX_READ_COOR_ADDR + + 1 + GOODIX_CONTACT_SIZE, + data, + GOODIX_CONTACT_SIZE * (touch_num - 1)); + if (error) + return error; + } + + return touch_num; +} + +static void goodix_ts_report_touch(struct goodix_ts_data *ts, u8 *coor_data) +{ + int id = coor_data[0] & 0x0F; + int input_x = get_unaligned_le16(&coor_data[1]); + int input_y = get_unaligned_le16(&coor_data[3]); + int input_w = get_unaligned_le16(&coor_data[5]); + + input_mt_slot(ts->input_dev, id); + input_mt_report_slot_state(ts->input_dev, MT_TOOL_FINGER, true); + input_report_abs(ts->input_dev, ABS_MT_POSITION_X, input_x); + input_report_abs(ts->input_dev, ABS_MT_POSITION_Y, input_y); + input_report_abs(ts->input_dev, ABS_MT_TOUCH_MAJOR, input_w); + input_report_abs(ts->input_dev, ABS_MT_WIDTH_MAJOR, input_w); +} + +/** + * goodix_process_events - Process incoming events + * + * @ts: our goodix_ts_data pointer + * + * Called when the IRQ is triggered. Read the current device state, and push + * the input events to the user space. + */ +static void goodix_process_events(struct goodix_ts_data *ts) +{ + u8 point_data[1 + GOODIX_CONTACT_SIZE * GOODIX_MAX_CONTACTS]; + int touch_num; + int i; + + touch_num = goodix_ts_read_input_report(ts, point_data); + if (touch_num < 0) + return; + + for (i = 0; i < touch_num; i++) + goodix_ts_report_touch(ts, + &point_data[1 + GOODIX_CONTACT_SIZE * i]); + + input_mt_sync_frame(ts->input_dev); + input_sync(ts->input_dev); +} + +/** + * goodix_ts_irq_handler - The IRQ handler + * + * @irq: interrupt number. + * @dev_id: private data pointer. + */ +static irqreturn_t goodix_ts_irq_handler(int irq, void *dev_id) +{ + static const u8 end_cmd[] = { + GOODIX_READ_COOR_ADDR >> 8, + GOODIX_READ_COOR_ADDR & 0xff, + 0 + }; + struct goodix_ts_data *ts = dev_id; + + goodix_process_events(ts); + + if (i2c_master_send(ts->client, end_cmd, sizeof(end_cmd)) < 0) + dev_err(&ts->client->dev, "I2C write end_cmd error\n"); + + return IRQ_HANDLED; +} + +/** + * goodix_read_config - Read the embedded configuration of the panel + * + * @ts: our goodix_ts_data pointer + * + * Must be called during probe + */ +static void goodix_read_config(struct goodix_ts_data *ts) +{ + u8 config[GOODIX_CONFIG_MAX_LENGTH]; + int error; + + error = goodix_i2c_read(ts->client, GOODIX_REG_CONFIG_DATA, + config, + GOODIX_CONFIG_MAX_LENGTH); + if (error) { + dev_warn(&ts->client->dev, + "Error reading config (%d), using defaults\n", + error); + ts->abs_x_max = GOODIX_MAX_WIDTH; + ts->abs_y_max = GOODIX_MAX_HEIGHT; + ts->int_trigger_type = GOODIX_INT_TRIGGER; + return; + } + + ts->abs_x_max = get_unaligned_le16(&config[RESOLUTION_LOC]); + ts->abs_y_max = get_unaligned_le16(&config[RESOLUTION_LOC + 2]); + ts->int_trigger_type = (config[TRIGGER_LOC]) & 0x03; + if (!ts->abs_x_max || !ts->abs_y_max) { + dev_err(&ts->client->dev, + "Invalid config, using defaults\n"); + ts->abs_x_max = GOODIX_MAX_WIDTH; + ts->abs_y_max = GOODIX_MAX_HEIGHT; + } +} + + +/** + * goodix_read_version - Read goodix touchscreen version + * + * @client: the i2c client + * @version: output buffer containing the version on success + */ +static int goodix_read_version(struct i2c_client *client, u16 *version) +{ + int error; + u8 buf[6]; + + error = goodix_i2c_read(client, GOODIX_REG_VERSION, buf, sizeof(buf)); + if (error) { + dev_err(&client->dev, "read version failed: %d\n", error); + return error; + } + + if (version) + *version = get_unaligned_le16(&buf[4]); + + dev_info(&client->dev, "IC VERSION: %6ph\n", buf); + + return 0; +} + +/** + * goodix_i2c_test - I2C test function to check if the device answers. + * + * @client: the i2c client + */ +static int goodix_i2c_test(struct i2c_client *client) +{ + int retry = 0; + int error; + u8 test; + + while (retry++ < 2) { + error = goodix_i2c_read(client, GOODIX_REG_CONFIG_DATA, + &test, 1); + if (!error) + return 0; + + dev_err(&client->dev, "i2c test failed attempt %d: %d\n", + retry, error); + msleep(20); + } + + return error; +} + +/** + * goodix_request_input_dev - Allocate, populate and register the input device + * + * @ts: our goodix_ts_data pointer + * + * Must be called during probe + */ +static int goodix_request_input_dev(struct goodix_ts_data *ts) +{ + int error; + + ts->input_dev = devm_input_allocate_device(&ts->client->dev); + if (!ts->input_dev) { + dev_err(&ts->client->dev, "Failed to allocate input device."); + return -ENOMEM; + } + + ts->input_dev->evbit[0] = BIT_MASK(EV_SYN) | + BIT_MASK(EV_KEY) | + BIT_MASK(EV_ABS); + + input_set_abs_params(ts->input_dev, ABS_MT_POSITION_X, 0, + ts->abs_x_max, 0, 0); + input_set_abs_params(ts->input_dev, ABS_MT_POSITION_Y, 0, + ts->abs_y_max, 0, 0); + input_set_abs_params(ts->input_dev, ABS_MT_WIDTH_MAJOR, 0, 255, 0, 0); + input_set_abs_params(ts->input_dev, ABS_MT_TOUCH_MAJOR, 0, 255, 0, 0); + + input_mt_init_slots(ts->input_dev, GOODIX_MAX_CONTACTS, + INPUT_MT_DIRECT | INPUT_MT_DROP_UNUSED); + + ts->input_dev->name = "Goodix Capacitive TouchScreen"; + ts->input_dev->phys = "input/ts"; + ts->input_dev->id.bustype = BUS_I2C; + ts->input_dev->id.vendor = 0x0416; + ts->input_dev->id.product = 0x1001; + ts->input_dev->id.version = 10427; + + error = input_register_device(ts->input_dev); + if (error) { + dev_err(&ts->client->dev, + "Failed to register input device: %d", error); + return error; + } + + return 0; +} + +static int goodix_ts_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + struct goodix_ts_data *ts; + unsigned long irq_flags; + int error; + u16 version_info; + + dev_dbg(&client->dev, "I2C Address: 0x%02x\n", client->addr); + + if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) { + dev_err(&client->dev, "I2C check functionality failed.\n"); + return -ENXIO; + } + + ts = devm_kzalloc(&client->dev, sizeof(*ts), GFP_KERNEL); + if (!ts) + return -ENOMEM; + + ts->client = client; + i2c_set_clientdata(client, ts); + + error = goodix_i2c_test(client); + if (error) { + dev_err(&client->dev, "I2C communication failure: %d\n", error); + return error; + } + + error = goodix_read_version(client, &version_info); + if (error) { + dev_err(&client->dev, "Read version failed.\n"); + return error; + } + + goodix_read_config(ts); + + error = goodix_request_input_dev(ts); + if (error) + return error; + + irq_flags = goodix_irq_flags[ts->int_trigger_type] | IRQF_ONESHOT; + error = devm_request_threaded_irq(&ts->client->dev, client->irq, + NULL, goodix_ts_irq_handler, + irq_flags, client->name, ts); + if (error) { + dev_err(&client->dev, "request IRQ failed: %d\n", error); + return error; + } + + return 0; +} + +static const struct i2c_device_id goodix_ts_id[] = { + { "GDIX1001:00", 0 }, + { } +}; + +static const struct acpi_device_id goodix_acpi_match[] = { + { "GDIX1001", 0 }, + { } +}; +MODULE_DEVICE_TABLE(acpi, goodix_acpi_match); + +static struct i2c_driver goodix_ts_driver = { + .probe = goodix_ts_probe, + .id_table = goodix_ts_id, + .driver = { + .name = "Goodix-TS", + .owner = THIS_MODULE, + .acpi_match_table = goodix_acpi_match, + }, +}; +module_i2c_driver(goodix_ts_driver); + +MODULE_AUTHOR("Benjamin Tissoires <benjamin.tissoires@gmail.com>"); +MODULE_AUTHOR("Bastien Nocera <hadess@hadess.net>"); +MODULE_DESCRIPTION("Goodix touchscreen driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/input/touchscreen/ili210x.c b/drivers/input/touchscreen/ili210x.c index 2a508913981..da6dc819c84 100644 --- a/drivers/input/touchscreen/ili210x.c +++ b/drivers/input/touchscreen/ili210x.c @@ -311,8 +311,7 @@ static int ili210x_i2c_remove(struct i2c_client *client) return 0; } -#ifdef CONFIG_PM_SLEEP -static int ili210x_i2c_suspend(struct device *dev) +static int __maybe_unused ili210x_i2c_suspend(struct device *dev) { struct i2c_client *client = to_i2c_client(dev); @@ -322,7 +321,7 @@ static int ili210x_i2c_suspend(struct device *dev) return 0; } -static int ili210x_i2c_resume(struct device *dev) +static int __maybe_unused ili210x_i2c_resume(struct device *dev) { struct i2c_client *client = to_i2c_client(dev); @@ -331,7 +330,6 @@ static int ili210x_i2c_resume(struct device *dev) return 0; } -#endif static SIMPLE_DEV_PM_OPS(ili210x_i2c_pm, ili210x_i2c_suspend, ili210x_i2c_resume); diff --git a/drivers/input/touchscreen/ipaq-micro-ts.c b/drivers/input/touchscreen/ipaq-micro-ts.c index 62c8976e616..33c134820ef 100644 --- a/drivers/input/touchscreen/ipaq-micro-ts.c +++ b/drivers/input/touchscreen/ipaq-micro-ts.c @@ -122,8 +122,7 @@ static int micro_ts_probe(struct platform_device *pdev) return 0; } -#ifdef CONFIG_PM_SLEEP -static int micro_ts_suspend(struct device *dev) +static int __maybe_unused micro_ts_suspend(struct device *dev) { struct touchscreen_data *ts = dev_get_drvdata(dev); @@ -132,7 +131,7 @@ static int micro_ts_suspend(struct device *dev) return 0; } -static int micro_ts_resume(struct device *dev) +static int __maybe_unused micro_ts_resume(struct device *dev) { struct touchscreen_data *ts = dev_get_drvdata(dev); struct input_dev *input = ts->input; @@ -146,7 +145,6 @@ static int micro_ts_resume(struct device *dev) return 0; } -#endif static const struct dev_pm_ops micro_ts_dev_pm_ops = { SET_SYSTEM_SLEEP_PM_OPS(micro_ts_suspend, micro_ts_resume) diff --git a/drivers/input/touchscreen/mms114.c b/drivers/input/touchscreen/mms114.c index 372bbf7658f..67c0d31613d 100644 --- a/drivers/input/touchscreen/mms114.c +++ b/drivers/input/touchscreen/mms114.c @@ -515,8 +515,7 @@ static int mms114_probe(struct i2c_client *client, return 0; } -#ifdef CONFIG_PM_SLEEP -static int mms114_suspend(struct device *dev) +static int __maybe_unused mms114_suspend(struct device *dev) { struct i2c_client *client = to_i2c_client(dev); struct mms114_data *data = i2c_get_clientdata(client); @@ -540,7 +539,7 @@ static int mms114_suspend(struct device *dev) return 0; } -static int mms114_resume(struct device *dev) +static int __maybe_unused mms114_resume(struct device *dev) { struct i2c_client *client = to_i2c_client(dev); struct mms114_data *data = i2c_get_clientdata(client); @@ -559,7 +558,6 @@ static int mms114_resume(struct device *dev) return 0; } -#endif static SIMPLE_DEV_PM_OPS(mms114_pm_ops, mms114_suspend, mms114_resume); diff --git a/drivers/input/touchscreen/pixcir_i2c_ts.c b/drivers/input/touchscreen/pixcir_i2c_ts.c index fc49c75317d..4fb5537fdd4 100644 --- a/drivers/input/touchscreen/pixcir_i2c_ts.c +++ b/drivers/input/touchscreen/pixcir_i2c_ts.c @@ -347,8 +347,7 @@ static void pixcir_input_close(struct input_dev *dev) pixcir_stop(ts); } -#ifdef CONFIG_PM_SLEEP -static int pixcir_i2c_ts_suspend(struct device *dev) +static int __maybe_unused pixcir_i2c_ts_suspend(struct device *dev) { struct i2c_client *client = to_i2c_client(dev); struct pixcir_i2c_ts_data *ts = i2c_get_clientdata(client); @@ -377,7 +376,7 @@ unlock: return ret; } -static int pixcir_i2c_ts_resume(struct device *dev) +static int __maybe_unused pixcir_i2c_ts_resume(struct device *dev) { struct i2c_client *client = to_i2c_client(dev); struct pixcir_i2c_ts_data *ts = i2c_get_clientdata(client); @@ -405,7 +404,6 @@ unlock: return ret; } -#endif static SIMPLE_DEV_PM_OPS(pixcir_dev_pm_ops, pixcir_i2c_ts_suspend, pixcir_i2c_ts_resume); diff --git a/drivers/input/touchscreen/st1232.c b/drivers/input/touchscreen/st1232.c index 3c0f57efe7b..697e26e52d5 100644 --- a/drivers/input/touchscreen/st1232.c +++ b/drivers/input/touchscreen/st1232.c @@ -243,8 +243,7 @@ static int st1232_ts_remove(struct i2c_client *client) return 0; } -#ifdef CONFIG_PM_SLEEP -static int st1232_ts_suspend(struct device *dev) +static int __maybe_unused st1232_ts_suspend(struct device *dev) { struct i2c_client *client = to_i2c_client(dev); struct st1232_ts_data *ts = i2c_get_clientdata(client); @@ -259,7 +258,7 @@ static int st1232_ts_suspend(struct device *dev) return 0; } -static int st1232_ts_resume(struct device *dev) +static int __maybe_unused st1232_ts_resume(struct device *dev) { struct i2c_client *client = to_i2c_client(dev); struct st1232_ts_data *ts = i2c_get_clientdata(client); @@ -274,8 +273,6 @@ static int st1232_ts_resume(struct device *dev) return 0; } -#endif - static SIMPLE_DEV_PM_OPS(st1232_ts_pm_ops, st1232_ts_suspend, st1232_ts_resume); diff --git a/drivers/input/touchscreen/tsc2005.c b/drivers/input/touchscreen/tsc2005.c index 52380b68ebd..72657c57943 100644 --- a/drivers/input/touchscreen/tsc2005.c +++ b/drivers/input/touchscreen/tsc2005.c @@ -773,8 +773,7 @@ static int tsc2005_remove(struct spi_device *spi) return 0; } -#ifdef CONFIG_PM_SLEEP -static int tsc2005_suspend(struct device *dev) +static int __maybe_unused tsc2005_suspend(struct device *dev) { struct spi_device *spi = to_spi_device(dev); struct tsc2005 *ts = spi_get_drvdata(spi); @@ -791,7 +790,7 @@ static int tsc2005_suspend(struct device *dev) return 0; } -static int tsc2005_resume(struct device *dev) +static int __maybe_unused tsc2005_resume(struct device *dev) { struct spi_device *spi = to_spi_device(dev); struct tsc2005 *ts = spi_get_drvdata(spi); @@ -807,7 +806,6 @@ static int tsc2005_resume(struct device *dev) return 0; } -#endif static SIMPLE_DEV_PM_OPS(tsc2005_pm_ops, tsc2005_suspend, tsc2005_resume); diff --git a/drivers/input/touchscreen/ucb1400_ts.c b/drivers/input/touchscreen/ucb1400_ts.c index 0eca00da584..c1e23cfc615 100644 --- a/drivers/input/touchscreen/ucb1400_ts.c +++ b/drivers/input/touchscreen/ucb1400_ts.c @@ -406,8 +406,7 @@ static int ucb1400_ts_remove(struct platform_device *pdev) return 0; } -#ifdef CONFIG_PM_SLEEP -static int ucb1400_ts_suspend(struct device *dev) +static int __maybe_unused ucb1400_ts_suspend(struct device *dev) { struct ucb1400_ts *ucb = dev_get_platdata(dev); struct input_dev *idev = ucb->ts_idev; @@ -421,7 +420,7 @@ static int ucb1400_ts_suspend(struct device *dev) return 0; } -static int ucb1400_ts_resume(struct device *dev) +static int __maybe_unused ucb1400_ts_resume(struct device *dev) { struct ucb1400_ts *ucb = dev_get_platdata(dev); struct input_dev *idev = ucb->ts_idev; @@ -434,7 +433,6 @@ static int ucb1400_ts_resume(struct device *dev) mutex_unlock(&idev->mutex); return 0; } -#endif static SIMPLE_DEV_PM_OPS(ucb1400_ts_pm_ops, ucb1400_ts_suspend, ucb1400_ts_resume); diff --git a/drivers/input/touchscreen/wacom_i2c.c b/drivers/input/touchscreen/wacom_i2c.c index 7ccaa1b12b0..32f8ac00393 100644 --- a/drivers/input/touchscreen/wacom_i2c.c +++ b/drivers/input/touchscreen/wacom_i2c.c @@ -242,8 +242,7 @@ static int wacom_i2c_remove(struct i2c_client *client) return 0; } -#ifdef CONFIG_PM_SLEEP -static int wacom_i2c_suspend(struct device *dev) +static int __maybe_unused wacom_i2c_suspend(struct device *dev) { struct i2c_client *client = to_i2c_client(dev); @@ -252,7 +251,7 @@ static int wacom_i2c_suspend(struct device *dev) return 0; } -static int wacom_i2c_resume(struct device *dev) +static int __maybe_unused wacom_i2c_resume(struct device *dev) { struct i2c_client *client = to_i2c_client(dev); @@ -260,7 +259,6 @@ static int wacom_i2c_resume(struct device *dev) return 0; } -#endif static SIMPLE_DEV_PM_OPS(wacom_i2c_pm, wacom_i2c_suspend, wacom_i2c_resume); diff --git a/drivers/input/touchscreen/zforce_ts.c b/drivers/input/touchscreen/zforce_ts.c index 8ba48f5eff7..19880c7385e 100644 --- a/drivers/input/touchscreen/zforce_ts.c +++ b/drivers/input/touchscreen/zforce_ts.c @@ -602,8 +602,7 @@ static void zforce_input_close(struct input_dev *dev) return; } -#ifdef CONFIG_PM_SLEEP -static int zforce_suspend(struct device *dev) +static int __maybe_unused zforce_suspend(struct device *dev) { struct i2c_client *client = to_i2c_client(dev); struct zforce_ts *ts = i2c_get_clientdata(client); @@ -648,7 +647,7 @@ unlock: return ret; } -static int zforce_resume(struct device *dev) +static int __maybe_unused zforce_resume(struct device *dev) { struct i2c_client *client = to_i2c_client(dev); struct zforce_ts *ts = i2c_get_clientdata(client); @@ -685,7 +684,6 @@ unlock: return ret; } -#endif static SIMPLE_DEV_PM_OPS(zforce_pm_ops, zforce_suspend, zforce_resume); diff --git a/drivers/memory/fsl_ifc.c b/drivers/memory/fsl_ifc.c index 3d5d792d5cb..410c3974987 100644 --- a/drivers/memory/fsl_ifc.c +++ b/drivers/memory/fsl_ifc.c @@ -61,7 +61,7 @@ int fsl_ifc_find(phys_addr_t addr_base) if (!fsl_ifc_ctrl_dev || !fsl_ifc_ctrl_dev->regs) return -ENODEV; - for (i = 0; i < ARRAY_SIZE(fsl_ifc_ctrl_dev->regs->cspr_cs); i++) { + for (i = 0; i < fsl_ifc_ctrl_dev->banks; i++) { u32 cspr = in_be32(&fsl_ifc_ctrl_dev->regs->cspr_cs[i].cspr); if (cspr & CSPR_V && (cspr & CSPR_BA) == convert_ifc_address(addr_base)) @@ -213,7 +213,7 @@ static irqreturn_t fsl_ifc_ctrl_irq(int irqno, void *data) static int fsl_ifc_ctrl_probe(struct platform_device *dev) { int ret = 0; - + int version, banks; dev_info(&dev->dev, "Freescale Integrated Flash Controller\n"); @@ -231,6 +231,15 @@ static int fsl_ifc_ctrl_probe(struct platform_device *dev) goto err; } + version = ioread32be(&fsl_ifc_ctrl_dev->regs->ifc_rev) & + FSL_IFC_VERSION_MASK; + banks = (version == FSL_IFC_VERSION_1_0_0) ? 4 : 8; + dev_info(&dev->dev, "IFC version %d.%d, %d banks\n", + version >> 24, (version >> 16) & 0xf, banks); + + fsl_ifc_ctrl_dev->version = version; + fsl_ifc_ctrl_dev->banks = banks; + /* get the Controller level irq */ fsl_ifc_ctrl_dev->irq = irq_of_parse_and_map(dev->dev.of_node, 0); if (fsl_ifc_ctrl_dev->irq == NO_IRQ) { diff --git a/drivers/mtd/Kconfig b/drivers/mtd/Kconfig index 94b821042d9..71fea895ce3 100644 --- a/drivers/mtd/Kconfig +++ b/drivers/mtd/Kconfig @@ -133,7 +133,7 @@ config MTD_OF_PARTS help This provides a partition parsing function which derives the partition map from the children of the flash node, - as described in Documentation/devicetree/booting-without-of.txt. + as described in Documentation/devicetree/bindings/mtd/partition.txt. config MTD_AR7_PARTS tristate "TI AR7 partitioning support" diff --git a/drivers/mtd/bcm47xxpart.c b/drivers/mtd/bcm47xxpart.c index 8057f52a45b..cc13ea5ce4d 100644 --- a/drivers/mtd/bcm47xxpart.c +++ b/drivers/mtd/bcm47xxpart.c @@ -15,8 +15,12 @@ #include <linux/mtd/mtd.h> #include <linux/mtd/partitions.h> -/* 10 parts were found on sflash on Netgear WNDR4500 */ -#define BCM47XXPART_MAX_PARTS 12 +/* + * NAND flash on Netgear R6250 was verified to contain 15 partitions. + * This will result in allocating too big array for some old devices, but the + * memory will be freed soon anyway (see mtd_device_parse_register). + */ +#define BCM47XXPART_MAX_PARTS 20 /* * Amount of bytes we read when analyzing each block of flash memory. @@ -168,18 +172,26 @@ static int bcm47xxpart_parse(struct mtd_info *master, i++; } - bcm47xxpart_add_part(&parts[curr_part++], "linux", - offset + trx->offset[i], 0); - i++; + if (trx->offset[i]) { + bcm47xxpart_add_part(&parts[curr_part++], + "linux", + offset + trx->offset[i], + 0); + i++; + } /* * Pure rootfs size is known and can be calculated as: * trx->length - trx->offset[i]. We don't fill it as * we want to have jffs2 (overlay) in the same mtd. */ - bcm47xxpart_add_part(&parts[curr_part++], "rootfs", - offset + trx->offset[i], 0); - i++; + if (trx->offset[i]) { + bcm47xxpart_add_part(&parts[curr_part++], + "rootfs", + offset + trx->offset[i], + 0); + i++; + } last_trx_part = curr_part - 1; diff --git a/drivers/mtd/chips/cfi_cmdset_0001.c b/drivers/mtd/chips/cfi_cmdset_0001.c index 3096f3ded3a..286b97a304c 100644 --- a/drivers/mtd/chips/cfi_cmdset_0001.c +++ b/drivers/mtd/chips/cfi_cmdset_0001.c @@ -2654,8 +2654,7 @@ static void cfi_intelext_destroy(struct mtd_info *mtd) kfree(cfi); for (i = 0; i < mtd->numeraseregions; i++) { region = &mtd->eraseregions[i]; - if (region->lockmap) - kfree(region->lockmap); + kfree(region->lockmap); } kfree(mtd->eraseregions); } diff --git a/drivers/mtd/devices/docg3.c b/drivers/mtd/devices/docg3.c index 72346048532..448ce42f951 100644 --- a/drivers/mtd/devices/docg3.c +++ b/drivers/mtd/devices/docg3.c @@ -22,6 +22,7 @@ #include <linux/kernel.h> #include <linux/module.h> #include <linux/errno.h> +#include <linux/of.h> #include <linux/platform_device.h> #include <linux/string.h> #include <linux/slab.h> @@ -1655,22 +1656,21 @@ static int dbg_flashctrl_show(struct seq_file *s, void *p) { struct docg3 *docg3 = (struct docg3 *)s->private; - int pos = 0; u8 fctrl; mutex_lock(&docg3->cascade->lock); fctrl = doc_register_readb(docg3, DOC_FLASHCONTROL); mutex_unlock(&docg3->cascade->lock); - pos += seq_printf(s, - "FlashControl : 0x%02x (%s,CE# %s,%s,%s,flash %s)\n", - fctrl, - fctrl & DOC_CTRL_VIOLATION ? "protocol violation" : "-", - fctrl & DOC_CTRL_CE ? "active" : "inactive", - fctrl & DOC_CTRL_PROTECTION_ERROR ? "protection error" : "-", - fctrl & DOC_CTRL_SEQUENCE_ERROR ? "sequence error" : "-", - fctrl & DOC_CTRL_FLASHREADY ? "ready" : "not ready"); - return pos; + seq_printf(s, "FlashControl : 0x%02x (%s,CE# %s,%s,%s,flash %s)\n", + fctrl, + fctrl & DOC_CTRL_VIOLATION ? "protocol violation" : "-", + fctrl & DOC_CTRL_CE ? "active" : "inactive", + fctrl & DOC_CTRL_PROTECTION_ERROR ? "protection error" : "-", + fctrl & DOC_CTRL_SEQUENCE_ERROR ? "sequence error" : "-", + fctrl & DOC_CTRL_FLASHREADY ? "ready" : "not ready"); + + return 0; } DEBUGFS_RO_ATTR(flashcontrol, dbg_flashctrl_show); @@ -1678,58 +1678,56 @@ static int dbg_asicmode_show(struct seq_file *s, void *p) { struct docg3 *docg3 = (struct docg3 *)s->private; - int pos = 0, pctrl, mode; + int pctrl, mode; mutex_lock(&docg3->cascade->lock); pctrl = doc_register_readb(docg3, DOC_ASICMODE); mode = pctrl & 0x03; mutex_unlock(&docg3->cascade->lock); - pos += seq_printf(s, - "%04x : RAM_WE=%d,RSTIN_RESET=%d,BDETCT_RESET=%d,WRITE_ENABLE=%d,POWERDOWN=%d,MODE=%d%d (", - pctrl, - pctrl & DOC_ASICMODE_RAM_WE ? 1 : 0, - pctrl & DOC_ASICMODE_RSTIN_RESET ? 1 : 0, - pctrl & DOC_ASICMODE_BDETCT_RESET ? 1 : 0, - pctrl & DOC_ASICMODE_MDWREN ? 1 : 0, - pctrl & DOC_ASICMODE_POWERDOWN ? 1 : 0, - mode >> 1, mode & 0x1); + seq_printf(s, + "%04x : RAM_WE=%d,RSTIN_RESET=%d,BDETCT_RESET=%d,WRITE_ENABLE=%d,POWERDOWN=%d,MODE=%d%d (", + pctrl, + pctrl & DOC_ASICMODE_RAM_WE ? 1 : 0, + pctrl & DOC_ASICMODE_RSTIN_RESET ? 1 : 0, + pctrl & DOC_ASICMODE_BDETCT_RESET ? 1 : 0, + pctrl & DOC_ASICMODE_MDWREN ? 1 : 0, + pctrl & DOC_ASICMODE_POWERDOWN ? 1 : 0, + mode >> 1, mode & 0x1); switch (mode) { case DOC_ASICMODE_RESET: - pos += seq_puts(s, "reset"); + seq_puts(s, "reset"); break; case DOC_ASICMODE_NORMAL: - pos += seq_puts(s, "normal"); + seq_puts(s, "normal"); break; case DOC_ASICMODE_POWERDOWN: - pos += seq_puts(s, "powerdown"); + seq_puts(s, "powerdown"); break; } - pos += seq_puts(s, ")\n"); - return pos; + seq_puts(s, ")\n"); + return 0; } DEBUGFS_RO_ATTR(asic_mode, dbg_asicmode_show); static int dbg_device_id_show(struct seq_file *s, void *p) { struct docg3 *docg3 = (struct docg3 *)s->private; - int pos = 0; int id; mutex_lock(&docg3->cascade->lock); id = doc_register_readb(docg3, DOC_DEVICESELECT); mutex_unlock(&docg3->cascade->lock); - pos += seq_printf(s, "DeviceId = %d\n", id); - return pos; + seq_printf(s, "DeviceId = %d\n", id); + return 0; } DEBUGFS_RO_ATTR(device_id, dbg_device_id_show); static int dbg_protection_show(struct seq_file *s, void *p) { struct docg3 *docg3 = (struct docg3 *)s->private; - int pos = 0; int protect, dps0, dps0_low, dps0_high, dps1, dps1_low, dps1_high; mutex_lock(&docg3->cascade->lock); @@ -1742,45 +1740,40 @@ static int dbg_protection_show(struct seq_file *s, void *p) dps1_high = doc_register_readw(docg3, DOC_DPS1_ADDRHIGH); mutex_unlock(&docg3->cascade->lock); - pos += seq_printf(s, "Protection = 0x%02x (", - protect); + seq_printf(s, "Protection = 0x%02x (", protect); if (protect & DOC_PROTECT_FOUNDRY_OTP_LOCK) - pos += seq_puts(s, "FOUNDRY_OTP_LOCK,"); + seq_puts(s, "FOUNDRY_OTP_LOCK,"); if (protect & DOC_PROTECT_CUSTOMER_OTP_LOCK) - pos += seq_puts(s, "CUSTOMER_OTP_LOCK,"); + seq_puts(s, "CUSTOMER_OTP_LOCK,"); if (protect & DOC_PROTECT_LOCK_INPUT) - pos += seq_puts(s, "LOCK_INPUT,"); + seq_puts(s, "LOCK_INPUT,"); if (protect & DOC_PROTECT_STICKY_LOCK) - pos += seq_puts(s, "STICKY_LOCK,"); + seq_puts(s, "STICKY_LOCK,"); if (protect & DOC_PROTECT_PROTECTION_ENABLED) - pos += seq_puts(s, "PROTECTION ON,"); + seq_puts(s, "PROTECTION ON,"); if (protect & DOC_PROTECT_IPL_DOWNLOAD_LOCK) - pos += seq_puts(s, "IPL_DOWNLOAD_LOCK,"); + seq_puts(s, "IPL_DOWNLOAD_LOCK,"); if (protect & DOC_PROTECT_PROTECTION_ERROR) - pos += seq_puts(s, "PROTECT_ERR,"); + seq_puts(s, "PROTECT_ERR,"); else - pos += seq_puts(s, "NO_PROTECT_ERR"); - pos += seq_puts(s, ")\n"); - - pos += seq_printf(s, "DPS0 = 0x%02x : " - "Protected area [0x%x - 0x%x] : OTP=%d, READ=%d, " - "WRITE=%d, HW_LOCK=%d, KEY_OK=%d\n", - dps0, dps0_low, dps0_high, - !!(dps0 & DOC_DPS_OTP_PROTECTED), - !!(dps0 & DOC_DPS_READ_PROTECTED), - !!(dps0 & DOC_DPS_WRITE_PROTECTED), - !!(dps0 & DOC_DPS_HW_LOCK_ENABLED), - !!(dps0 & DOC_DPS_KEY_OK)); - pos += seq_printf(s, "DPS1 = 0x%02x : " - "Protected area [0x%x - 0x%x] : OTP=%d, READ=%d, " - "WRITE=%d, HW_LOCK=%d, KEY_OK=%d\n", - dps1, dps1_low, dps1_high, - !!(dps1 & DOC_DPS_OTP_PROTECTED), - !!(dps1 & DOC_DPS_READ_PROTECTED), - !!(dps1 & DOC_DPS_WRITE_PROTECTED), - !!(dps1 & DOC_DPS_HW_LOCK_ENABLED), - !!(dps1 & DOC_DPS_KEY_OK)); - return pos; + seq_puts(s, "NO_PROTECT_ERR"); + seq_puts(s, ")\n"); + + seq_printf(s, "DPS0 = 0x%02x : Protected area [0x%x - 0x%x] : OTP=%d, READ=%d, WRITE=%d, HW_LOCK=%d, KEY_OK=%d\n", + dps0, dps0_low, dps0_high, + !!(dps0 & DOC_DPS_OTP_PROTECTED), + !!(dps0 & DOC_DPS_READ_PROTECTED), + !!(dps0 & DOC_DPS_WRITE_PROTECTED), + !!(dps0 & DOC_DPS_HW_LOCK_ENABLED), + !!(dps0 & DOC_DPS_KEY_OK)); + seq_printf(s, "DPS1 = 0x%02x : Protected area [0x%x - 0x%x] : OTP=%d, READ=%d, WRITE=%d, HW_LOCK=%d, KEY_OK=%d\n", + dps1, dps1_low, dps1_high, + !!(dps1 & DOC_DPS_OTP_PROTECTED), + !!(dps1 & DOC_DPS_READ_PROTECTED), + !!(dps1 & DOC_DPS_WRITE_PROTECTED), + !!(dps1 & DOC_DPS_HW_LOCK_ENABLED), + !!(dps1 & DOC_DPS_KEY_OK)); + return 0; } DEBUGFS_RO_ATTR(protection, dbg_protection_show); @@ -2126,9 +2119,18 @@ static int __exit docg3_release(struct platform_device *pdev) return 0; } +#ifdef CONFIG_OF +static struct of_device_id docg3_dt_ids[] = { + { .compatible = "m-systems,diskonchip-g3" }, + {} +}; +MODULE_DEVICE_TABLE(of, docg3_dt_ids); +#endif + static struct platform_driver g3_driver = { .driver = { .name = "docg3", + .of_match_table = of_match_ptr(docg3_dt_ids), }, .suspend = docg3_suspend, .resume = docg3_resume, diff --git a/drivers/mtd/devices/m25p80.c b/drivers/mtd/devices/m25p80.c index ed827cf894e..85e35467fba 100644 --- a/drivers/mtd/devices/m25p80.c +++ b/drivers/mtd/devices/m25p80.c @@ -128,13 +128,10 @@ static int m25p80_read(struct spi_nor *nor, loff_t from, size_t len, struct spi_device *spi = flash->spi; struct spi_transfer t[2]; struct spi_message m; - int dummy = nor->read_dummy; - int ret; + unsigned int dummy = nor->read_dummy; - /* Wait till previous write/erase is done. */ - ret = nor->wait_till_ready(nor); - if (ret) - return ret; + /* convert the dummy cycles to the number of bytes */ + dummy /= 8; spi_message_init(&m); memset(t, 0, (sizeof t)); @@ -160,21 +157,10 @@ static int m25p80_read(struct spi_nor *nor, loff_t from, size_t len, static int m25p80_erase(struct spi_nor *nor, loff_t offset) { struct m25p *flash = nor->priv; - int ret; dev_dbg(nor->dev, "%dKiB at 0x%08x\n", flash->mtd.erasesize / 1024, (u32)offset); - /* Wait until finished previous write command. */ - ret = nor->wait_till_ready(nor); - if (ret) - return ret; - - /* Send write enable, then erase commands. */ - ret = nor->write_reg(nor, SPINOR_OP_WREN, NULL, 0, 0); - if (ret) - return ret; - /* Set up command buffer. */ flash->command[0] = nor->erase_opcode; m25p_addr2cmd(nor, offset, flash->command); @@ -260,7 +246,6 @@ static int m25p_remove(struct spi_device *spi) return mtd_device_unregister(&flash->mtd); } - /* * XXX This needs to be kept in sync with spi_nor_ids. We can't share * it with spi-nor, because if this is built as a module then modpost @@ -287,7 +272,7 @@ static const struct spi_device_id m25p_ids[] = { {"s25fl512s"}, {"s70fl01gs"}, {"s25sl12800"}, {"s25sl12801"}, {"s25fl129p0"}, {"s25fl129p1"}, {"s25sl004a"}, {"s25sl008a"}, {"s25sl016a"}, {"s25sl032a"}, {"s25sl064a"}, {"s25fl008k"}, - {"s25fl016k"}, {"s25fl064k"}, + {"s25fl016k"}, {"s25fl064k"}, {"s25fl132k"}, {"sst25vf040b"},{"sst25vf080b"},{"sst25vf016b"},{"sst25vf032b"}, {"sst25vf064c"},{"sst25wf512"}, {"sst25wf010"}, {"sst25wf020"}, {"sst25wf040"}, @@ -300,17 +285,16 @@ static const struct spi_device_id m25p_ids[] = { {"m45pe10"}, {"m45pe80"}, {"m45pe16"}, {"m25pe20"}, {"m25pe80"}, {"m25pe16"}, {"m25px16"}, {"m25px32"}, {"m25px32-s0"}, {"m25px32-s1"}, - {"m25px64"}, + {"m25px64"}, {"m25px80"}, {"w25x10"}, {"w25x20"}, {"w25x40"}, {"w25x80"}, {"w25x16"}, {"w25x32"}, {"w25q32"}, {"w25q32dw"}, - {"w25x64"}, {"w25q64"}, {"w25q128"}, {"w25q80"}, - {"w25q80bl"}, {"w25q128"}, {"w25q256"}, {"cat25c11"}, + {"w25x64"}, {"w25q64"}, {"w25q80"}, {"w25q80bl"}, + {"w25q128"}, {"w25q256"}, {"cat25c11"}, {"cat25c03"}, {"cat25c09"}, {"cat25c17"}, {"cat25128"}, { }, }; MODULE_DEVICE_TABLE(spi, m25p_ids); - static struct spi_driver m25p80_driver = { .driver = { .name = "m25p80", diff --git a/drivers/mtd/devices/mtd_dataflash.c b/drivers/mtd/devices/mtd_dataflash.c index dd22ce2cc9a..0099aba72a8 100644 --- a/drivers/mtd/devices/mtd_dataflash.c +++ b/drivers/mtd/devices/mtd_dataflash.c @@ -149,7 +149,7 @@ static int dataflash_erase(struct mtd_info *mtd, struct erase_info *instr) { struct dataflash *priv = mtd->priv; struct spi_device *spi = priv->spi; - struct spi_transfer x = { .tx_dma = 0, }; + struct spi_transfer x = { }; struct spi_message msg; unsigned blocksize = priv->page_size << 3; uint8_t *command; @@ -235,7 +235,7 @@ static int dataflash_read(struct mtd_info *mtd, loff_t from, size_t len, size_t *retlen, u_char *buf) { struct dataflash *priv = mtd->priv; - struct spi_transfer x[2] = { { .tx_dma = 0, }, }; + struct spi_transfer x[2] = { }; struct spi_message msg; unsigned int addr; uint8_t *command; @@ -301,7 +301,7 @@ static int dataflash_write(struct mtd_info *mtd, loff_t to, size_t len, { struct dataflash *priv = mtd->priv; struct spi_device *spi = priv->spi; - struct spi_transfer x[2] = { { .tx_dma = 0, }, }; + struct spi_transfer x[2] = { }; struct spi_message msg; unsigned int pageaddr, addr, offset, writelen; size_t remaining = len; diff --git a/drivers/mtd/devices/phram.c b/drivers/mtd/devices/phram.c index effd9a4ef7e..8b66e52ca3c 100644 --- a/drivers/mtd/devices/phram.c +++ b/drivers/mtd/devices/phram.c @@ -17,7 +17,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -#include <asm/io.h> +#include <linux/io.h> #include <linux/init.h> #include <linux/kernel.h> #include <linux/list.h> diff --git a/drivers/mtd/devices/pmc551.c b/drivers/mtd/devices/pmc551.c index f02603e1bfe..708b7e8c8b1 100644 --- a/drivers/mtd/devices/pmc551.c +++ b/drivers/mtd/devices/pmc551.c @@ -812,8 +812,7 @@ static int __init init_pmc551(void) } /* Exited early, reference left over */ - if (PCI_Device) - pci_dev_put(PCI_Device); + pci_dev_put(PCI_Device); if (!pmc551list) { printk(KERN_NOTICE "pmc551: not detected\n"); diff --git a/drivers/mtd/inftlmount.c b/drivers/mtd/inftlmount.c index 487e64f411a..1388c8d7f30 100644 --- a/drivers/mtd/inftlmount.c +++ b/drivers/mtd/inftlmount.c @@ -518,7 +518,7 @@ void INFTL_dumpVUchains(struct INFTLrecord *s) pr_debug("INFTL Virtual Unit Chains:\n"); for (logical = 0; logical < s->nb_blocks; logical++) { block = s->VUtable[logical]; - if (block > s->nb_blocks) + if (block >= s->nb_blocks) continue; pr_debug(" LOGICAL %d --> %d ", logical, block); for (i = 0; i < s->nb_blocks; i++) { diff --git a/drivers/mtd/maps/bfin-async-flash.c b/drivers/mtd/maps/bfin-async-flash.c index 6ea51e54904..41730feeace 100644 --- a/drivers/mtd/maps/bfin-async-flash.c +++ b/drivers/mtd/maps/bfin-async-flash.c @@ -126,7 +126,6 @@ static const char * const part_probe_types[] = { static int bfin_flash_probe(struct platform_device *pdev) { - int ret; struct physmap_flash_data *pdata = dev_get_platdata(&pdev->dev); struct resource *memory = platform_get_resource(pdev, IORESOURCE_MEM, 0); struct resource *flash_ambctl = platform_get_resource(pdev, IORESOURCE_MEM, 1); diff --git a/drivers/mtd/maps/physmap_of.c b/drivers/mtd/maps/physmap_of.c index 991d0cb871f..f35cd208131 100644 --- a/drivers/mtd/maps/physmap_of.c +++ b/drivers/mtd/maps/physmap_of.c @@ -47,14 +47,12 @@ static int of_flash_remove(struct platform_device *dev) return 0; dev_set_drvdata(&dev->dev, NULL); - if (info->cmtd != info->list[0].mtd) { + if (info->cmtd) { mtd_device_unregister(info->cmtd); - mtd_concat_destroy(info->cmtd); + if (info->cmtd != info->list[0].mtd) + mtd_concat_destroy(info->cmtd); } - if (info->cmtd) - mtd_device_unregister(info->cmtd); - for (i = 0; i < info->list_size; i++) { if (info->list[i].mtd) map_destroy(info->list[i].mtd); diff --git a/drivers/mtd/nand/Kconfig b/drivers/mtd/nand/Kconfig index dd10646982a..7d0150d2043 100644 --- a/drivers/mtd/nand/Kconfig +++ b/drivers/mtd/nand/Kconfig @@ -75,10 +75,12 @@ config MTD_NAND_DENALI_SCRATCH_REG_ADDR boards, the scratch register is at 0xFF108018. config MTD_NAND_GPIO - tristate "GPIO NAND Flash driver" + tristate "GPIO assisted NAND Flash driver" depends on GPIOLIB help - This enables a GPIO based NAND flash driver. + This enables a NAND flash driver where control signals are + connected to GPIO pins, and commands and data are communicated + via a memory mapped interface. config MTD_NAND_AMS_DELTA tristate "NAND Flash device on Amstrad E3" @@ -516,4 +518,10 @@ config MTD_NAND_XWAY Enables support for NAND Flash chips on Lantiq XWAY SoCs. NAND is attached to the External Bus Unit (EBU). +config MTD_NAND_SUNXI + tristate "Support for NAND on Allwinner SoCs" + depends on ARCH_SUNXI + help + Enables support for NAND Flash chips on Allwinner SoCs. + endif # MTD_NAND diff --git a/drivers/mtd/nand/Makefile b/drivers/mtd/nand/Makefile index 9c847e469ca..bd38f21d2e2 100644 --- a/drivers/mtd/nand/Makefile +++ b/drivers/mtd/nand/Makefile @@ -50,5 +50,6 @@ obj-$(CONFIG_MTD_NAND_JZ4740) += jz4740_nand.o obj-$(CONFIG_MTD_NAND_GPMI_NAND) += gpmi-nand/ obj-$(CONFIG_MTD_NAND_XWAY) += xway_nand.o obj-$(CONFIG_MTD_NAND_BCM47XXNFLASH) += bcm47xxnflash/ +obj-$(CONFIG_MTD_NAND_SUNXI) += sunxi_nand.o nand-objs := nand_base.o nand_bbt.o nand_timings.o diff --git a/drivers/mtd/nand/atmel_nand.c b/drivers/mtd/nand/atmel_nand.c index 84c38f3c65b..a345e7b2463 100644 --- a/drivers/mtd/nand/atmel_nand.c +++ b/drivers/mtd/nand/atmel_nand.c @@ -92,7 +92,7 @@ static struct nand_ecclayout atmel_oobinfo_small = { struct atmel_nfc { void __iomem *base_cmd_regs; void __iomem *hsmc_regs; - void __iomem *sram_bank0; + void *sram_bank0; dma_addr_t sram_bank0_phys; bool use_nfc_sram; bool write_by_sram; @@ -105,7 +105,7 @@ struct atmel_nfc { struct completion comp_xfer_done; /* Point to the sram bank which include readed data via NFC */ - void __iomem *data_in_sram; + void *data_in_sram; bool will_write_sram; }; static struct atmel_nfc nand_nfc; @@ -127,6 +127,7 @@ struct atmel_nand_host { bool has_pmecc; u8 pmecc_corr_cap; u16 pmecc_sector_size; + bool has_no_lookup_table; u32 pmecc_lookup_table_offset; u32 pmecc_lookup_table_offset_512; u32 pmecc_lookup_table_offset_1024; @@ -256,26 +257,6 @@ static int atmel_nand_set_enable_ready_pins(struct mtd_info *mtd) return res; } -static void memcpy32_fromio(void *trg, const void __iomem *src, size_t size) -{ - int i; - u32 *t = trg; - const __iomem u32 *s = src; - - for (i = 0; i < (size >> 2); i++) - *t++ = readl_relaxed(s++); -} - -static void memcpy32_toio(void __iomem *trg, const void *src, int size) -{ - int i; - u32 __iomem *t = trg; - const u32 *s = src; - - for (i = 0; i < (size >> 2); i++) - writel_relaxed(*s++, t++); -} - /* * Minimal-overhead PIO for data access. */ @@ -285,7 +266,7 @@ static void atmel_read_buf8(struct mtd_info *mtd, u8 *buf, int len) struct atmel_nand_host *host = nand_chip->priv; if (host->nfc && host->nfc->use_nfc_sram && host->nfc->data_in_sram) { - memcpy32_fromio(buf, host->nfc->data_in_sram, len); + memcpy(buf, host->nfc->data_in_sram, len); host->nfc->data_in_sram += len; } else { __raw_readsb(nand_chip->IO_ADDR_R, buf, len); @@ -298,7 +279,7 @@ static void atmel_read_buf16(struct mtd_info *mtd, u8 *buf, int len) struct atmel_nand_host *host = nand_chip->priv; if (host->nfc && host->nfc->use_nfc_sram && host->nfc->data_in_sram) { - memcpy32_fromio(buf, host->nfc->data_in_sram, len); + memcpy(buf, host->nfc->data_in_sram, len); host->nfc->data_in_sram += len; } else { __raw_readsw(nand_chip->IO_ADDR_R, buf, len / 2); @@ -1112,12 +1093,66 @@ static int pmecc_choose_ecc(struct atmel_nand_host *host, return 0; } +static inline int deg(unsigned int poly) +{ + /* polynomial degree is the most-significant bit index */ + return fls(poly) - 1; +} + +static int build_gf_tables(int mm, unsigned int poly, + int16_t *index_of, int16_t *alpha_to) +{ + unsigned int i, x = 1; + const unsigned int k = 1 << deg(poly); + unsigned int nn = (1 << mm) - 1; + + /* primitive polynomial must be of degree m */ + if (k != (1u << mm)) + return -EINVAL; + + for (i = 0; i < nn; i++) { + alpha_to[i] = x; + index_of[x] = i; + if (i && (x == 1)) + /* polynomial is not primitive (a^i=1 with 0<i<2^m-1) */ + return -EINVAL; + x <<= 1; + if (x & k) + x ^= poly; + } + alpha_to[nn] = 1; + index_of[0] = 0; + + return 0; +} + +static uint16_t *create_lookup_table(struct device *dev, int sector_size) +{ + int degree = (sector_size == 512) ? + PMECC_GF_DIMENSION_13 : + PMECC_GF_DIMENSION_14; + unsigned int poly = (sector_size == 512) ? + PMECC_GF_13_PRIMITIVE_POLY : + PMECC_GF_14_PRIMITIVE_POLY; + int table_size = (sector_size == 512) ? + PMECC_LOOKUP_TABLE_SIZE_512 : + PMECC_LOOKUP_TABLE_SIZE_1024; + + int16_t *addr = devm_kzalloc(dev, 2 * table_size * sizeof(uint16_t), + GFP_KERNEL); + if (addr && build_gf_tables(degree, poly, addr, addr + table_size)) + return NULL; + + return addr; +} + static int atmel_pmecc_nand_init_params(struct platform_device *pdev, struct atmel_nand_host *host) { struct mtd_info *mtd = &host->mtd; struct nand_chip *nand_chip = &host->nand_chip; struct resource *regs, *regs_pmerr, *regs_rom; + uint16_t *galois_table; int cap, sector_size, err_no; err_no = pmecc_choose_ecc(host, &cap, §or_size); @@ -1163,8 +1198,24 @@ static int atmel_pmecc_nand_init_params(struct platform_device *pdev, regs_rom = platform_get_resource(pdev, IORESOURCE_MEM, 3); host->pmecc_rom_base = devm_ioremap_resource(&pdev->dev, regs_rom); if (IS_ERR(host->pmecc_rom_base)) { - err_no = PTR_ERR(host->pmecc_rom_base); - goto err; + if (!host->has_no_lookup_table) + /* Don't display the information again */ + dev_err(host->dev, "Can not get I/O resource for ROM, will build a lookup table in runtime!\n"); + + host->has_no_lookup_table = true; + } + + if (host->has_no_lookup_table) { + /* Build the look-up table in runtime */ + galois_table = create_lookup_table(host->dev, sector_size); + if (!galois_table) { + dev_err(host->dev, "Failed to build a lookup table in runtime!\n"); + err_no = -EINVAL; + goto err; + } + + host->pmecc_rom_base = (void __iomem *)galois_table; + host->pmecc_lookup_table_offset = 0; } nand_chip->ecc.size = sector_size; @@ -1501,8 +1552,10 @@ static int atmel_of_init_port(struct atmel_nand_host *host, if (of_property_read_u32_array(np, "atmel,pmecc-lookup-table-offset", offset, 2) != 0) { - dev_err(host->dev, "Cannot get PMECC lookup table offset\n"); - return -EINVAL; + dev_err(host->dev, "Cannot get PMECC lookup table offset, will build a lookup table in runtime.\n"); + host->has_no_lookup_table = true; + /* Will build a lookup table and initialize the offset later */ + return 0; } if (!offset[0] && !offset[1]) { dev_err(host->dev, "Invalid PMECC lookup table offset\n"); @@ -1899,7 +1952,7 @@ static int nfc_sram_write_page(struct mtd_info *mtd, struct nand_chip *chip, int cfg, len; int status = 0; struct atmel_nand_host *host = chip->priv; - void __iomem *sram = host->nfc->sram_bank0 + nfc_get_sram_off(host); + void *sram = host->nfc->sram_bank0 + nfc_get_sram_off(host); /* Subpage write is not supported */ if (offset || (data_len < mtd->writesize)) @@ -1910,14 +1963,14 @@ static int nfc_sram_write_page(struct mtd_info *mtd, struct nand_chip *chip, if (use_dma) { if (atmel_nand_dma_op(mtd, (void *)buf, len, 0) != 0) /* Fall back to use cpu copy */ - memcpy32_toio(sram, buf, len); + memcpy(sram, buf, len); } else { - memcpy32_toio(sram, buf, len); + memcpy(sram, buf, len); } cfg = nfc_readl(host->nfc->hsmc_regs, CFG); if (unlikely(raw) && oob_required) { - memcpy32_toio(sram + len, chip->oob_poi, mtd->oobsize); + memcpy(sram + len, chip->oob_poi, mtd->oobsize); len += mtd->oobsize; nfc_writel(host->nfc->hsmc_regs, CFG, cfg | NFC_CFG_WSPARE); } else { @@ -2260,7 +2313,8 @@ static int atmel_nand_nfc_probe(struct platform_device *pdev) nfc_sram = platform_get_resource(pdev, IORESOURCE_MEM, 2); if (nfc_sram) { - nfc->sram_bank0 = devm_ioremap_resource(&pdev->dev, nfc_sram); + nfc->sram_bank0 = (void * __force) + devm_ioremap_resource(&pdev->dev, nfc_sram); if (IS_ERR(nfc->sram_bank0)) { dev_warn(&pdev->dev, "Fail to ioremap the NFC sram with error: %ld. So disable NFC sram.\n", PTR_ERR(nfc->sram_bank0)); diff --git a/drivers/mtd/nand/atmel_nand_ecc.h b/drivers/mtd/nand/atmel_nand_ecc.h index 8a1e9a68675..d4035e335ad 100644 --- a/drivers/mtd/nand/atmel_nand_ecc.h +++ b/drivers/mtd/nand/atmel_nand_ecc.h @@ -142,6 +142,10 @@ #define PMECC_GF_DIMENSION_13 13 #define PMECC_GF_DIMENSION_14 14 +/* Primitive Polynomial used by PMECC */ +#define PMECC_GF_13_PRIMITIVE_POLY 0x201b +#define PMECC_GF_14_PRIMITIVE_POLY 0x4443 + #define PMECC_LOOKUP_TABLE_SIZE_512 0x2000 #define PMECC_LOOKUP_TABLE_SIZE_1024 0x4000 diff --git a/drivers/mtd/nand/cafe_nand.c b/drivers/mtd/nand/cafe_nand.c index 4e66726da9a..9a0f45f1d93 100644 --- a/drivers/mtd/nand/cafe_nand.c +++ b/drivers/mtd/nand/cafe_nand.c @@ -529,50 +529,6 @@ static int cafe_nand_write_page_lowlevel(struct mtd_info *mtd, return 0; } -static int cafe_nand_write_page(struct mtd_info *mtd, struct nand_chip *chip, - uint32_t offset, int data_len, const uint8_t *buf, - int oob_required, int page, int cached, int raw) -{ - int status; - - chip->cmdfunc(mtd, NAND_CMD_SEQIN, 0x00, page); - - if (unlikely(raw)) - status = chip->ecc.write_page_raw(mtd, chip, buf, oob_required); - else - status = chip->ecc.write_page(mtd, chip, buf, oob_required); - - if (status < 0) - return status; - - /* - * Cached progamming disabled for now, Not sure if its worth the - * trouble. The speed gain is not very impressive. (2.3->2.6Mib/s) - */ - cached = 0; - - if (!cached || !(chip->options & NAND_CACHEPRG)) { - - chip->cmdfunc(mtd, NAND_CMD_PAGEPROG, -1, -1); - status = chip->waitfunc(mtd, chip); - /* - * See if operation failed and additional status checks are - * available - */ - if ((status & NAND_STATUS_FAIL) && (chip->errstat)) - status = chip->errstat(mtd, chip, FL_WRITING, status, - page); - - if (status & NAND_STATUS_FAIL) - return -EIO; - } else { - chip->cmdfunc(mtd, NAND_CMD_CACHEDPROG, -1, -1); - status = chip->waitfunc(mtd, chip); - } - - return 0; -} - static int cafe_nand_block_bad(struct mtd_info *mtd, loff_t ofs, int getchip) { return 0; @@ -800,7 +756,6 @@ static int cafe_nand_probe(struct pci_dev *pdev, cafe->nand.ecc.hwctl = (void *)cafe_nand_bug; cafe->nand.ecc.calculate = (void *)cafe_nand_bug; cafe->nand.ecc.correct = (void *)cafe_nand_bug; - cafe->nand.write_page = cafe_nand_write_page; cafe->nand.ecc.write_page = cafe_nand_write_page_lowlevel; cafe->nand.ecc.write_oob = cafe_nand_write_oob; cafe->nand.ecc.read_page = cafe_nand_read_page; diff --git a/drivers/mtd/nand/fsl_ifc_nand.c b/drivers/mtd/nand/fsl_ifc_nand.c index b9ef7a6bba4..4c05f4f6a5c 100644 --- a/drivers/mtd/nand/fsl_ifc_nand.c +++ b/drivers/mtd/nand/fsl_ifc_nand.c @@ -31,7 +31,6 @@ #include <linux/mtd/nand_ecc.h> #include <linux/fsl_ifc.h> -#define FSL_IFC_V1_1_0 0x01010000 #define ERR_BYTE 0xFF /* Value returned for read bytes when read failed */ #define IFC_TIMEOUT_MSECS 500 /* Maximum number of mSecs to wait @@ -877,7 +876,7 @@ static int fsl_ifc_chip_init(struct fsl_ifc_mtd *priv) struct fsl_ifc_regs __iomem *ifc = ctrl->regs; struct nand_chip *chip = &priv->chip; struct nand_ecclayout *layout; - u32 csor, ver; + u32 csor; /* Fill in fsl_ifc_mtd structure */ priv->mtd.priv = chip; @@ -984,8 +983,7 @@ static int fsl_ifc_chip_init(struct fsl_ifc_mtd *priv) chip->ecc.mode = NAND_ECC_SOFT; } - ver = ioread32be(&ifc->ifc_rev); - if (ver == FSL_IFC_V1_1_0) + if (ctrl->version == FSL_IFC_VERSION_1_1_0) fsl_ifc_sram_init(priv); return 0; @@ -1045,12 +1043,12 @@ static int fsl_ifc_nand_probe(struct platform_device *dev) } /* find which chip select it is connected to */ - for (bank = 0; bank < FSL_IFC_BANK_COUNT; bank++) { + for (bank = 0; bank < fsl_ifc_ctrl_dev->banks; bank++) { if (match_bank(ifc, bank, res.start)) break; } - if (bank >= FSL_IFC_BANK_COUNT) { + if (bank >= fsl_ifc_ctrl_dev->banks) { dev_err(&dev->dev, "%s: address did not match any chip selects\n", __func__); return -ENODEV; diff --git a/drivers/mtd/nand/gpio.c b/drivers/mtd/nand/gpio.c index 918283999a4..73c4048c3a5 100644 --- a/drivers/mtd/nand/gpio.c +++ b/drivers/mtd/nand/gpio.c @@ -8,7 +8,9 @@ * * © 2004 Simtec Electronics * - * Device driver for NAND connected via GPIO + * Device driver for NAND flash that uses a memory mapped interface to + * read/write the NAND commands and data, and GPIO pins for control signals + * (the DT binding refers to this as "GPIO assisted NAND flash") * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as diff --git a/drivers/mtd/nand/gpmi-nand/gpmi-lib.c b/drivers/mtd/nand/gpmi-nand/gpmi-lib.c index 87e658ce23e..27f272ed502 100644 --- a/drivers/mtd/nand/gpmi-nand/gpmi-lib.c +++ b/drivers/mtd/nand/gpmi-nand/gpmi-lib.c @@ -1353,3 +1353,156 @@ int gpmi_read_page(struct gpmi_nand_data *this, set_dma_type(this, DMA_FOR_READ_ECC_PAGE); return start_dma_with_bch_irq(this, desc); } + +/** + * gpmi_copy_bits - copy bits from one memory region to another + * @dst: destination buffer + * @dst_bit_off: bit offset we're starting to write at + * @src: source buffer + * @src_bit_off: bit offset we're starting to read from + * @nbits: number of bits to copy + * + * This functions copies bits from one memory region to another, and is used by + * the GPMI driver to copy ECC sections which are not guaranteed to be byte + * aligned. + * + * src and dst should not overlap. + * + */ +void gpmi_copy_bits(u8 *dst, size_t dst_bit_off, + const u8 *src, size_t src_bit_off, + size_t nbits) +{ + size_t i; + size_t nbytes; + u32 src_buffer = 0; + size_t bits_in_src_buffer = 0; + + if (!nbits) + return; + + /* + * Move src and dst pointers to the closest byte pointer and store bit + * offsets within a byte. + */ + src += src_bit_off / 8; + src_bit_off %= 8; + + dst += dst_bit_off / 8; + dst_bit_off %= 8; + + /* + * Initialize the src_buffer value with bits available in the first + * byte of data so that we end up with a byte aligned src pointer. + */ + if (src_bit_off) { + src_buffer = src[0] >> src_bit_off; + if (nbits >= (8 - src_bit_off)) { + bits_in_src_buffer += 8 - src_bit_off; + } else { + src_buffer &= GENMASK(nbits - 1, 0); + bits_in_src_buffer += nbits; + } + nbits -= bits_in_src_buffer; + src++; + } + + /* Calculate the number of bytes that can be copied from src to dst. */ + nbytes = nbits / 8; + + /* Try to align dst to a byte boundary. */ + if (dst_bit_off) { + if (bits_in_src_buffer < (8 - dst_bit_off) && nbytes) { + src_buffer |= src[0] << bits_in_src_buffer; + bits_in_src_buffer += 8; + src++; + nbytes--; + } + + if (bits_in_src_buffer >= (8 - dst_bit_off)) { + dst[0] &= GENMASK(dst_bit_off - 1, 0); + dst[0] |= src_buffer << dst_bit_off; + src_buffer >>= (8 - dst_bit_off); + bits_in_src_buffer -= (8 - dst_bit_off); + dst_bit_off = 0; + dst++; + if (bits_in_src_buffer > 7) { + bits_in_src_buffer -= 8; + dst[0] = src_buffer; + dst++; + src_buffer >>= 8; + } + } + } + + if (!bits_in_src_buffer && !dst_bit_off) { + /* + * Both src and dst pointers are byte aligned, thus we can + * just use the optimized memcpy function. + */ + if (nbytes) + memcpy(dst, src, nbytes); + } else { + /* + * src buffer is not byte aligned, hence we have to copy each + * src byte to the src_buffer variable before extracting a byte + * to store in dst. + */ + for (i = 0; i < nbytes; i++) { + src_buffer |= src[i] << bits_in_src_buffer; + dst[i] = src_buffer; + src_buffer >>= 8; + } + } + /* Update dst and src pointers */ + dst += nbytes; + src += nbytes; + + /* + * nbits is the number of remaining bits. It should not exceed 8 as + * we've already copied as much bytes as possible. + */ + nbits %= 8; + + /* + * If there's no more bits to copy to the destination and src buffer + * was already byte aligned, then we're done. + */ + if (!nbits && !bits_in_src_buffer) + return; + + /* Copy the remaining bits to src_buffer */ + if (nbits) + src_buffer |= (*src & GENMASK(nbits - 1, 0)) << + bits_in_src_buffer; + bits_in_src_buffer += nbits; + + /* + * In case there were not enough bits to get a byte aligned dst buffer + * prepare the src_buffer variable to match the dst organization (shift + * src_buffer by dst_bit_off and retrieve the least significant bits + * from dst). + */ + if (dst_bit_off) + src_buffer = (src_buffer << dst_bit_off) | + (*dst & GENMASK(dst_bit_off - 1, 0)); + bits_in_src_buffer += dst_bit_off; + + /* + * Keep most significant bits from dst if we end up with an unaligned + * number of bits. + */ + nbytes = bits_in_src_buffer / 8; + if (bits_in_src_buffer % 8) { + src_buffer |= (dst[nbytes] & + GENMASK(7, bits_in_src_buffer % 8)) << + (nbytes * 8); + nbytes++; + } + + /* Copy the remaining bytes to dst */ + for (i = 0; i < nbytes; i++) { + dst[i] = src_buffer; + src_buffer >>= 8; + } +} diff --git a/drivers/mtd/nand/gpmi-nand/gpmi-nand.c b/drivers/mtd/nand/gpmi-nand/gpmi-nand.c index 959cb9b7031..4f3851a24bb 100644 --- a/drivers/mtd/nand/gpmi-nand/gpmi-nand.c +++ b/drivers/mtd/nand/gpmi-nand/gpmi-nand.c @@ -791,6 +791,7 @@ static void gpmi_free_dma_buffer(struct gpmi_nand_data *this) this->page_buffer_phys); kfree(this->cmd_buffer); kfree(this->data_buffer_dma); + kfree(this->raw_buffer); this->cmd_buffer = NULL; this->data_buffer_dma = NULL; @@ -837,6 +838,9 @@ static int gpmi_alloc_dma_buffer(struct gpmi_nand_data *this) if (!this->page_buffer_virt) goto error_alloc; + this->raw_buffer = kzalloc(mtd->writesize + mtd->oobsize, GFP_KERNEL); + if (!this->raw_buffer) + goto error_alloc; /* Slice up the page buffer. */ this->payload_virt = this->page_buffer_virt; @@ -1347,6 +1351,199 @@ gpmi_ecc_write_oob(struct mtd_info *mtd, struct nand_chip *chip, int page) return status & NAND_STATUS_FAIL ? -EIO : 0; } +/* + * This function reads a NAND page without involving the ECC engine (no HW + * ECC correction). + * The tricky part in the GPMI/BCH controller is that it stores ECC bits + * inline (interleaved with payload DATA), and do not align data chunk on + * byte boundaries. + * We thus need to take care moving the payload data and ECC bits stored in the + * page into the provided buffers, which is why we're using gpmi_copy_bits. + * + * See set_geometry_by_ecc_info inline comments to have a full description + * of the layout used by the GPMI controller. + */ +static int gpmi_ecc_read_page_raw(struct mtd_info *mtd, + struct nand_chip *chip, uint8_t *buf, + int oob_required, int page) +{ + struct gpmi_nand_data *this = chip->priv; + struct bch_geometry *nfc_geo = &this->bch_geometry; + int eccsize = nfc_geo->ecc_chunk_size; + int eccbits = nfc_geo->ecc_strength * nfc_geo->gf_len; + u8 *tmp_buf = this->raw_buffer; + size_t src_bit_off; + size_t oob_bit_off; + size_t oob_byte_off; + uint8_t *oob = chip->oob_poi; + int step; + + chip->read_buf(mtd, tmp_buf, + mtd->writesize + mtd->oobsize); + + /* + * If required, swap the bad block marker and the data stored in the + * metadata section, so that we don't wrongly consider a block as bad. + * + * See the layout description for a detailed explanation on why this + * is needed. + */ + if (this->swap_block_mark) { + u8 swap = tmp_buf[0]; + + tmp_buf[0] = tmp_buf[mtd->writesize]; + tmp_buf[mtd->writesize] = swap; + } + + /* + * Copy the metadata section into the oob buffer (this section is + * guaranteed to be aligned on a byte boundary). + */ + if (oob_required) + memcpy(oob, tmp_buf, nfc_geo->metadata_size); + + oob_bit_off = nfc_geo->metadata_size * 8; + src_bit_off = oob_bit_off; + + /* Extract interleaved payload data and ECC bits */ + for (step = 0; step < nfc_geo->ecc_chunk_count; step++) { + if (buf) + gpmi_copy_bits(buf, step * eccsize * 8, + tmp_buf, src_bit_off, + eccsize * 8); + src_bit_off += eccsize * 8; + + /* Align last ECC block to align a byte boundary */ + if (step == nfc_geo->ecc_chunk_count - 1 && + (oob_bit_off + eccbits) % 8) + eccbits += 8 - ((oob_bit_off + eccbits) % 8); + + if (oob_required) + gpmi_copy_bits(oob, oob_bit_off, + tmp_buf, src_bit_off, + eccbits); + + src_bit_off += eccbits; + oob_bit_off += eccbits; + } + + if (oob_required) { + oob_byte_off = oob_bit_off / 8; + + if (oob_byte_off < mtd->oobsize) + memcpy(oob + oob_byte_off, + tmp_buf + mtd->writesize + oob_byte_off, + mtd->oobsize - oob_byte_off); + } + + return 0; +} + +/* + * This function writes a NAND page without involving the ECC engine (no HW + * ECC generation). + * The tricky part in the GPMI/BCH controller is that it stores ECC bits + * inline (interleaved with payload DATA), and do not align data chunk on + * byte boundaries. + * We thus need to take care moving the OOB area at the right place in the + * final page, which is why we're using gpmi_copy_bits. + * + * See set_geometry_by_ecc_info inline comments to have a full description + * of the layout used by the GPMI controller. + */ +static int gpmi_ecc_write_page_raw(struct mtd_info *mtd, + struct nand_chip *chip, + const uint8_t *buf, + int oob_required) +{ + struct gpmi_nand_data *this = chip->priv; + struct bch_geometry *nfc_geo = &this->bch_geometry; + int eccsize = nfc_geo->ecc_chunk_size; + int eccbits = nfc_geo->ecc_strength * nfc_geo->gf_len; + u8 *tmp_buf = this->raw_buffer; + uint8_t *oob = chip->oob_poi; + size_t dst_bit_off; + size_t oob_bit_off; + size_t oob_byte_off; + int step; + + /* + * Initialize all bits to 1 in case we don't have a buffer for the + * payload or oob data in order to leave unspecified bits of data + * to their initial state. + */ + if (!buf || !oob_required) + memset(tmp_buf, 0xff, mtd->writesize + mtd->oobsize); + + /* + * First copy the metadata section (stored in oob buffer) at the + * beginning of the page, as imposed by the GPMI layout. + */ + memcpy(tmp_buf, oob, nfc_geo->metadata_size); + oob_bit_off = nfc_geo->metadata_size * 8; + dst_bit_off = oob_bit_off; + + /* Interleave payload data and ECC bits */ + for (step = 0; step < nfc_geo->ecc_chunk_count; step++) { + if (buf) + gpmi_copy_bits(tmp_buf, dst_bit_off, + buf, step * eccsize * 8, eccsize * 8); + dst_bit_off += eccsize * 8; + + /* Align last ECC block to align a byte boundary */ + if (step == nfc_geo->ecc_chunk_count - 1 && + (oob_bit_off + eccbits) % 8) + eccbits += 8 - ((oob_bit_off + eccbits) % 8); + + if (oob_required) + gpmi_copy_bits(tmp_buf, dst_bit_off, + oob, oob_bit_off, eccbits); + + dst_bit_off += eccbits; + oob_bit_off += eccbits; + } + + oob_byte_off = oob_bit_off / 8; + + if (oob_required && oob_byte_off < mtd->oobsize) + memcpy(tmp_buf + mtd->writesize + oob_byte_off, + oob + oob_byte_off, mtd->oobsize - oob_byte_off); + + /* + * If required, swap the bad block marker and the first byte of the + * metadata section, so that we don't modify the bad block marker. + * + * See the layout description for a detailed explanation on why this + * is needed. + */ + if (this->swap_block_mark) { + u8 swap = tmp_buf[0]; + + tmp_buf[0] = tmp_buf[mtd->writesize]; + tmp_buf[mtd->writesize] = swap; + } + + chip->write_buf(mtd, tmp_buf, mtd->writesize + mtd->oobsize); + + return 0; +} + +static int gpmi_ecc_read_oob_raw(struct mtd_info *mtd, struct nand_chip *chip, + int page) +{ + chip->cmdfunc(mtd, NAND_CMD_READ0, 0, page); + + return gpmi_ecc_read_page_raw(mtd, chip, NULL, 1, page); +} + +static int gpmi_ecc_write_oob_raw(struct mtd_info *mtd, struct nand_chip *chip, + int page) +{ + chip->cmdfunc(mtd, NAND_CMD_SEQIN, 0, page); + + return gpmi_ecc_write_page_raw(mtd, chip, NULL, 1); +} + static int gpmi_block_markbad(struct mtd_info *mtd, loff_t ofs) { struct nand_chip *chip = mtd->priv; @@ -1664,6 +1861,10 @@ static int gpmi_init_last(struct gpmi_nand_data *this) ecc->write_page = gpmi_ecc_write_page; ecc->read_oob = gpmi_ecc_read_oob; ecc->write_oob = gpmi_ecc_write_oob; + ecc->read_page_raw = gpmi_ecc_read_page_raw; + ecc->write_page_raw = gpmi_ecc_write_page_raw; + ecc->read_oob_raw = gpmi_ecc_read_oob_raw; + ecc->write_oob_raw = gpmi_ecc_write_oob_raw; ecc->mode = NAND_ECC_HW; ecc->size = bch_geo->ecc_chunk_size; ecc->strength = bch_geo->ecc_strength; diff --git a/drivers/mtd/nand/gpmi-nand/gpmi-nand.h b/drivers/mtd/nand/gpmi-nand/gpmi-nand.h index 32c6ba49f98..544062f6502 100644 --- a/drivers/mtd/nand/gpmi-nand/gpmi-nand.h +++ b/drivers/mtd/nand/gpmi-nand/gpmi-nand.h @@ -189,6 +189,8 @@ struct gpmi_nand_data { void *auxiliary_virt; dma_addr_t auxiliary_phys; + void *raw_buffer; + /* DMA channels */ #define DMA_CHANS 8 struct dma_chan *dma_chans[DMA_CHANS]; @@ -290,6 +292,10 @@ extern int gpmi_send_page(struct gpmi_nand_data *, extern int gpmi_read_page(struct gpmi_nand_data *, dma_addr_t payload, dma_addr_t auxiliary); +void gpmi_copy_bits(u8 *dst, size_t dst_bit_off, + const u8 *src, size_t src_bit_off, + size_t nbits); + /* BCH : Status Block Completion Codes */ #define STATUS_GOOD 0x00 #define STATUS_ERASED 0xff diff --git a/drivers/mtd/nand/mxc_nand.c b/drivers/mtd/nand/mxc_nand.c index e1d56beeca7..a8f550fec35 100644 --- a/drivers/mtd/nand/mxc_nand.c +++ b/drivers/mtd/nand/mxc_nand.c @@ -280,14 +280,10 @@ static void memcpy32_fromio(void *trg, const void __iomem *src, size_t size) *t++ = __raw_readl(s++); } -static void memcpy32_toio(void __iomem *trg, const void *src, int size) +static inline void memcpy32_toio(void __iomem *trg, const void *src, int size) { - int i; - u32 __iomem *t = trg; - const u32 *s = src; - - for (i = 0; i < (size >> 2); i++) - __raw_writel(*s++, t++); + /* __iowrite32_copy use 32bit size values so divide by 4 */ + __iowrite32_copy(trg, src, size / 4); } static int check_int_v3(struct mxc_nand_host *host) diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c index 5b5c6271281..41585dfb206 100644 --- a/drivers/mtd/nand/nand_base.c +++ b/drivers/mtd/nand/nand_base.c @@ -485,11 +485,11 @@ static int nand_check_wp(struct mtd_info *mtd) } /** - * nand_block_checkbad - [GENERIC] Check if a block is marked bad + * nand_block_isreserved - [GENERIC] Check if a block is marked reserved. * @mtd: MTD device structure * @ofs: offset from device start * - * Check if the block is mark as reserved. + * Check if the block is marked as reserved. */ static int nand_block_isreserved(struct mtd_info *mtd, loff_t ofs) { @@ -720,7 +720,7 @@ static void nand_command_lp(struct mtd_info *mtd, unsigned int command, /* * Program and erase have their own busy handlers status, sequential - * in, and deplete1 need no delay. + * in and status need no delay. */ switch (command) { @@ -3765,9 +3765,9 @@ ident_done: pr_info("%s %s\n", nand_manuf_ids[maf_idx].name, type->name); - pr_info("%dMiB, %s, page size: %d, OOB size: %d\n", + pr_info("%d MiB, %s, erase size: %d KiB, page size: %d, OOB size: %d\n", (int)(chip->chipsize >> 20), nand_is_slc(chip) ? "SLC" : "MLC", - mtd->writesize, mtd->oobsize); + mtd->erasesize >> 10, mtd->writesize, mtd->oobsize); return type; } @@ -4035,7 +4035,7 @@ int nand_scan_tail(struct mtd_info *mtd) */ if (!ecc->size && (mtd->oobsize >= 64)) { ecc->size = 512; - ecc->bytes = 7; + ecc->bytes = DIV_ROUND_UP(13 * ecc->strength, 8); } ecc->priv = nand_bch_init(mtd, ecc->size, ecc->bytes, &ecc->layout); diff --git a/drivers/mtd/nand/nand_ids.c b/drivers/mtd/nand/nand_ids.c index fbde8910524..dd620c19c61 100644 --- a/drivers/mtd/nand/nand_ids.c +++ b/drivers/mtd/nand/nand_ids.c @@ -178,6 +178,7 @@ struct nand_manufacturers nand_manuf_ids[] = { {NAND_MFR_EON, "Eon"}, {NAND_MFR_SANDISK, "SanDisk"}, {NAND_MFR_INTEL, "Intel"}, + {NAND_MFR_ATO, "ATO"}, {0x0, "Unknown"} }; diff --git a/drivers/mtd/nand/nandsim.c b/drivers/mtd/nand/nandsim.c index 7dc1dd28d89..ab5bbf56743 100644 --- a/drivers/mtd/nand/nandsim.c +++ b/drivers/mtd/nand/nandsim.c @@ -87,10 +87,6 @@ #define CONFIG_NANDSIM_MAX_PARTS 32 #endif -static uint first_id_byte = CONFIG_NANDSIM_FIRST_ID_BYTE; -static uint second_id_byte = CONFIG_NANDSIM_SECOND_ID_BYTE; -static uint third_id_byte = CONFIG_NANDSIM_THIRD_ID_BYTE; -static uint fourth_id_byte = CONFIG_NANDSIM_FOURTH_ID_BYTE; static uint access_delay = CONFIG_NANDSIM_ACCESS_DELAY; static uint programm_delay = CONFIG_NANDSIM_PROGRAMM_DELAY; static uint erase_delay = CONFIG_NANDSIM_ERASE_DELAY; @@ -111,11 +107,19 @@ static unsigned int overridesize = 0; static char *cache_file = NULL; static unsigned int bbt; static unsigned int bch; +static u_char id_bytes[8] = { + [0] = CONFIG_NANDSIM_FIRST_ID_BYTE, + [1] = CONFIG_NANDSIM_SECOND_ID_BYTE, + [2] = CONFIG_NANDSIM_THIRD_ID_BYTE, + [3] = CONFIG_NANDSIM_FOURTH_ID_BYTE, + [4 ... 7] = 0xFF, +}; -module_param(first_id_byte, uint, 0400); -module_param(second_id_byte, uint, 0400); -module_param(third_id_byte, uint, 0400); -module_param(fourth_id_byte, uint, 0400); +module_param_array(id_bytes, byte, NULL, 0400); +module_param_named(first_id_byte, id_bytes[0], byte, 0400); +module_param_named(second_id_byte, id_bytes[1], byte, 0400); +module_param_named(third_id_byte, id_bytes[2], byte, 0400); +module_param_named(fourth_id_byte, id_bytes[3], byte, 0400); module_param(access_delay, uint, 0400); module_param(programm_delay, uint, 0400); module_param(erase_delay, uint, 0400); @@ -136,10 +140,11 @@ module_param(cache_file, charp, 0400); module_param(bbt, uint, 0400); module_param(bch, uint, 0400); -MODULE_PARM_DESC(first_id_byte, "The first byte returned by NAND Flash 'read ID' command (manufacturer ID)"); -MODULE_PARM_DESC(second_id_byte, "The second byte returned by NAND Flash 'read ID' command (chip ID)"); -MODULE_PARM_DESC(third_id_byte, "The third byte returned by NAND Flash 'read ID' command"); -MODULE_PARM_DESC(fourth_id_byte, "The fourth byte returned by NAND Flash 'read ID' command"); +MODULE_PARM_DESC(id_bytes, "The ID bytes returned by NAND Flash 'read ID' command"); +MODULE_PARM_DESC(first_id_byte, "The first byte returned by NAND Flash 'read ID' command (manufacturer ID) (obsolete)"); +MODULE_PARM_DESC(second_id_byte, "The second byte returned by NAND Flash 'read ID' command (chip ID) (obsolete)"); +MODULE_PARM_DESC(third_id_byte, "The third byte returned by NAND Flash 'read ID' command (obsolete)"); +MODULE_PARM_DESC(fourth_id_byte, "The fourth byte returned by NAND Flash 'read ID' command (obsolete)"); MODULE_PARM_DESC(access_delay, "Initial page access delay (microseconds)"); MODULE_PARM_DESC(programm_delay, "Page programm delay (microseconds"); MODULE_PARM_DESC(erase_delay, "Sector erase delay (milliseconds)"); @@ -304,7 +309,7 @@ struct nandsim { unsigned int nbparts; uint busw; /* flash chip bus width (8 or 16) */ - u_char ids[4]; /* chip's ID bytes */ + u_char ids[8]; /* chip's ID bytes */ uint32_t options; /* chip's characteristic bits */ uint32_t state; /* current chip state */ uint32_t nxstate; /* next expected state */ @@ -2279,17 +2284,18 @@ static int __init ns_init_module(void) * Perform minimum nandsim structure initialization to handle * the initial ID read command correctly */ - if (third_id_byte != 0xFF || fourth_id_byte != 0xFF) + if (id_bytes[6] != 0xFF || id_bytes[7] != 0xFF) + nand->geom.idbytes = 8; + else if (id_bytes[4] != 0xFF || id_bytes[5] != 0xFF) + nand->geom.idbytes = 6; + else if (id_bytes[2] != 0xFF || id_bytes[3] != 0xFF) nand->geom.idbytes = 4; else nand->geom.idbytes = 2; nand->regs.status = NS_STATUS_OK(nand); nand->nxstate = STATE_UNKNOWN; nand->options |= OPT_PAGE512; /* temporary value */ - nand->ids[0] = first_id_byte; - nand->ids[1] = second_id_byte; - nand->ids[2] = third_id_byte; - nand->ids[3] = fourth_id_byte; + memcpy(nand->ids, id_bytes, sizeof(nand->ids)); if (bus_width == 16) { nand->busw = 16; chip->options |= NAND_BUSWIDTH_16; diff --git a/drivers/mtd/nand/omap2.c b/drivers/mtd/nand/omap2.c index 6d74b56dd9f..63f858e6bf3 100644 --- a/drivers/mtd/nand/omap2.c +++ b/drivers/mtd/nand/omap2.c @@ -144,11 +144,13 @@ static u_char bch8_vector[] = {0xf3, 0xdb, 0x14, 0x16, 0x8b, 0xd2, 0xbe, 0xcc, 0xac, 0x6b, 0xff, 0x99, 0x7b}; static u_char bch4_vector[] = {0x00, 0x6b, 0x31, 0xdd, 0x41, 0xbc, 0x10}; -/* oob info generated runtime depending on ecc algorithm and layout selected */ -static struct nand_ecclayout omap_oobinfo; +/* Shared among all NAND instances to synchronize access to the ECC Engine */ +static struct nand_hw_control omap_gpmc_controller = { + .lock = __SPIN_LOCK_UNLOCKED(omap_gpmc_controller.lock), + .wq = __WAIT_QUEUE_HEAD_INITIALIZER(omap_gpmc_controller.wq), +}; struct omap_nand_info { - struct nand_hw_control controller; struct omap_nand_platform_data *pdata; struct mtd_info mtd; struct nand_chip nand; @@ -168,6 +170,8 @@ struct omap_nand_info { u_char *buf; int buf_len; struct gpmc_nand_regs reg; + /* generated at runtime depending on ECC algorithm and layout selected */ + struct nand_ecclayout oobinfo; /* fields specific for BCHx_HW ECC scheme */ struct device *elm_dev; struct device_node *of_node; @@ -1686,9 +1690,6 @@ static int omap_nand_probe(struct platform_device *pdev) platform_set_drvdata(pdev, info); - spin_lock_init(&info->controller.lock); - init_waitqueue_head(&info->controller.wq); - info->pdev = pdev; info->gpmc_cs = pdata->cs; info->reg = pdata->reg; @@ -1708,7 +1709,7 @@ static int omap_nand_probe(struct platform_device *pdev) info->phys_base = res->start; - nand_chip->controller = &info->controller; + nand_chip->controller = &omap_gpmc_controller; nand_chip->IO_ADDR_W = nand_chip->IO_ADDR_R; nand_chip->cmd_ctrl = omap_hwcontrol; @@ -1741,13 +1742,6 @@ static int omap_nand_probe(struct platform_device *pdev) goto return_error; } - /* check for small page devices */ - if ((mtd->oobsize < 64) && (pdata->ecc_opt != OMAP_ECC_HAM1_CODE_HW)) { - dev_err(&info->pdev->dev, "small page devices are not supported\n"); - err = -EINVAL; - goto return_error; - } - /* re-populate low-level callbacks based on xfer modes */ switch (pdata->xfer_type) { case NAND_OMAP_PREFETCH_POLLED: @@ -1840,7 +1834,7 @@ static int omap_nand_probe(struct platform_device *pdev) } /* populate MTD interface based on ECC scheme */ - ecclayout = &omap_oobinfo; + ecclayout = &info->oobinfo; switch (info->ecc_opt) { case OMAP_ECC_HAM1_CODE_SW: nand_chip->ecc.mode = NAND_ECC_SOFT; diff --git a/drivers/mtd/nand/orion_nand.c b/drivers/mtd/nand/orion_nand.c index c53e36956bf..c3c6d305caa 100644 --- a/drivers/mtd/nand/orion_nand.c +++ b/drivers/mtd/nand/orion_nand.c @@ -19,7 +19,7 @@ #include <linux/mtd/partitions.h> #include <linux/clk.h> #include <linux/err.h> -#include <asm/io.h> +#include <linux/io.h> #include <asm/sizes.h> #include <linux/platform_data/mtd-orion_nand.h> @@ -85,33 +85,24 @@ static int __init orion_nand_probe(struct platform_device *pdev) int ret = 0; u32 val = 0; - nc = kzalloc(sizeof(struct nand_chip) + sizeof(struct mtd_info), GFP_KERNEL); - if (!nc) { - ret = -ENOMEM; - goto no_res; - } + nc = devm_kzalloc(&pdev->dev, + sizeof(struct nand_chip) + sizeof(struct mtd_info), + GFP_KERNEL); + if (!nc) + return -ENOMEM; mtd = (struct mtd_info *)(nc + 1); res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (!res) { - ret = -ENODEV; - goto no_res; - } + io_base = devm_ioremap_resource(&pdev->dev, res); - io_base = ioremap(res->start, resource_size(res)); - if (!io_base) { - dev_err(&pdev->dev, "ioremap failed\n"); - ret = -EIO; - goto no_res; - } + if (IS_ERR(io_base)) + return PTR_ERR(io_base); if (pdev->dev.of_node) { board = devm_kzalloc(&pdev->dev, sizeof(struct orion_nand_data), GFP_KERNEL); - if (!board) { - ret = -ENOMEM; - goto no_res; - } + if (!board) + return -ENOMEM; if (!of_property_read_u32(pdev->dev.of_node, "cle", &val)) board->cle = (u8)val; else @@ -185,9 +176,6 @@ no_dev: clk_disable_unprepare(clk); clk_put(clk); } - iounmap(io_base); -no_res: - kfree(nc); return ret; } @@ -195,15 +183,10 @@ no_res: static int orion_nand_remove(struct platform_device *pdev) { struct mtd_info *mtd = platform_get_drvdata(pdev); - struct nand_chip *nc = mtd->priv; struct clk *clk; nand_release(mtd); - iounmap(nc->IO_ADDR_W); - - kfree(nc); - clk = clk_get(&pdev->dev, NULL); if (!IS_ERR(clk)) { clk_disable_unprepare(clk); diff --git a/drivers/mtd/nand/sunxi_nand.c b/drivers/mtd/nand/sunxi_nand.c new file mode 100644 index 00000000000..ccaa8e28338 --- /dev/null +++ b/drivers/mtd/nand/sunxi_nand.c @@ -0,0 +1,1432 @@ +/* + * Copyright (C) 2013 Boris BREZILLON <b.brezillon.dev@gmail.com> + * + * Derived from: + * https://github.com/yuq/sunxi-nfc-mtd + * Copyright (C) 2013 Qiang Yu <yuq825@gmail.com> + * + * https://github.com/hno/Allwinner-Info + * Copyright (C) 2013 Henrik Nordström <Henrik Nordström> + * + * Copyright (C) 2013 Dmitriy B. <rzk333@gmail.com> + * Copyright (C) 2013 Sergey Lapin <slapin@ossfans.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <linux/dma-mapping.h> +#include <linux/slab.h> +#include <linux/module.h> +#include <linux/moduleparam.h> +#include <linux/platform_device.h> +#include <linux/of.h> +#include <linux/of_device.h> +#include <linux/of_gpio.h> +#include <linux/of_mtd.h> +#include <linux/mtd/mtd.h> +#include <linux/mtd/nand.h> +#include <linux/mtd/partitions.h> +#include <linux/clk.h> +#include <linux/delay.h> +#include <linux/dmaengine.h> +#include <linux/gpio.h> +#include <linux/interrupt.h> +#include <linux/io.h> + +#define NFC_REG_CTL 0x0000 +#define NFC_REG_ST 0x0004 +#define NFC_REG_INT 0x0008 +#define NFC_REG_TIMING_CTL 0x000C +#define NFC_REG_TIMING_CFG 0x0010 +#define NFC_REG_ADDR_LOW 0x0014 +#define NFC_REG_ADDR_HIGH 0x0018 +#define NFC_REG_SECTOR_NUM 0x001C +#define NFC_REG_CNT 0x0020 +#define NFC_REG_CMD 0x0024 +#define NFC_REG_RCMD_SET 0x0028 +#define NFC_REG_WCMD_SET 0x002C +#define NFC_REG_IO_DATA 0x0030 +#define NFC_REG_ECC_CTL 0x0034 +#define NFC_REG_ECC_ST 0x0038 +#define NFC_REG_DEBUG 0x003C +#define NFC_REG_ECC_CNT0 0x0040 +#define NFC_REG_ECC_CNT1 0x0044 +#define NFC_REG_ECC_CNT2 0x0048 +#define NFC_REG_ECC_CNT3 0x004c +#define NFC_REG_USER_DATA_BASE 0x0050 +#define NFC_REG_SPARE_AREA 0x00A0 +#define NFC_RAM0_BASE 0x0400 +#define NFC_RAM1_BASE 0x0800 + +/* define bit use in NFC_CTL */ +#define NFC_EN BIT(0) +#define NFC_RESET BIT(1) +#define NFC_BUS_WIDYH BIT(2) +#define NFC_RB_SEL BIT(3) +#define NFC_CE_SEL GENMASK(26, 24) +#define NFC_CE_CTL BIT(6) +#define NFC_CE_CTL1 BIT(7) +#define NFC_PAGE_SIZE GENMASK(11, 8) +#define NFC_SAM BIT(12) +#define NFC_RAM_METHOD BIT(14) +#define NFC_DEBUG_CTL BIT(31) + +/* define bit use in NFC_ST */ +#define NFC_RB_B2R BIT(0) +#define NFC_CMD_INT_FLAG BIT(1) +#define NFC_DMA_INT_FLAG BIT(2) +#define NFC_CMD_FIFO_STATUS BIT(3) +#define NFC_STA BIT(4) +#define NFC_NATCH_INT_FLAG BIT(5) +#define NFC_RB_STATE0 BIT(8) +#define NFC_RB_STATE1 BIT(9) +#define NFC_RB_STATE2 BIT(10) +#define NFC_RB_STATE3 BIT(11) + +/* define bit use in NFC_INT */ +#define NFC_B2R_INT_ENABLE BIT(0) +#define NFC_CMD_INT_ENABLE BIT(1) +#define NFC_DMA_INT_ENABLE BIT(2) +#define NFC_INT_MASK (NFC_B2R_INT_ENABLE | \ + NFC_CMD_INT_ENABLE | \ + NFC_DMA_INT_ENABLE) + +/* define bit use in NFC_CMD */ +#define NFC_CMD_LOW_BYTE GENMASK(7, 0) +#define NFC_CMD_HIGH_BYTE GENMASK(15, 8) +#define NFC_ADR_NUM GENMASK(18, 16) +#define NFC_SEND_ADR BIT(19) +#define NFC_ACCESS_DIR BIT(20) +#define NFC_DATA_TRANS BIT(21) +#define NFC_SEND_CMD1 BIT(22) +#define NFC_WAIT_FLAG BIT(23) +#define NFC_SEND_CMD2 BIT(24) +#define NFC_SEQ BIT(25) +#define NFC_DATA_SWAP_METHOD BIT(26) +#define NFC_ROW_AUTO_INC BIT(27) +#define NFC_SEND_CMD3 BIT(28) +#define NFC_SEND_CMD4 BIT(29) +#define NFC_CMD_TYPE GENMASK(31, 30) + +/* define bit use in NFC_RCMD_SET */ +#define NFC_READ_CMD GENMASK(7, 0) +#define NFC_RANDOM_READ_CMD0 GENMASK(15, 8) +#define NFC_RANDOM_READ_CMD1 GENMASK(23, 16) + +/* define bit use in NFC_WCMD_SET */ +#define NFC_PROGRAM_CMD GENMASK(7, 0) +#define NFC_RANDOM_WRITE_CMD GENMASK(15, 8) +#define NFC_READ_CMD0 GENMASK(23, 16) +#define NFC_READ_CMD1 GENMASK(31, 24) + +/* define bit use in NFC_ECC_CTL */ +#define NFC_ECC_EN BIT(0) +#define NFC_ECC_PIPELINE BIT(3) +#define NFC_ECC_EXCEPTION BIT(4) +#define NFC_ECC_BLOCK_SIZE BIT(5) +#define NFC_RANDOM_EN BIT(9) +#define NFC_RANDOM_DIRECTION BIT(10) +#define NFC_ECC_MODE_SHIFT 12 +#define NFC_ECC_MODE GENMASK(15, 12) +#define NFC_RANDOM_SEED GENMASK(30, 16) + +#define NFC_DEFAULT_TIMEOUT_MS 1000 + +#define NFC_SRAM_SIZE 1024 + +#define NFC_MAX_CS 7 + +/* + * Ready/Busy detection type: describes the Ready/Busy detection modes + * + * @RB_NONE: no external detection available, rely on STATUS command + * and software timeouts + * @RB_NATIVE: use sunxi NAND controller Ready/Busy support. The Ready/Busy + * pin of the NAND flash chip must be connected to one of the + * native NAND R/B pins (those which can be muxed to the NAND + * Controller) + * @RB_GPIO: use a simple GPIO to handle Ready/Busy status. The Ready/Busy + * pin of the NAND flash chip must be connected to a GPIO capable + * pin. + */ +enum sunxi_nand_rb_type { + RB_NONE, + RB_NATIVE, + RB_GPIO, +}; + +/* + * Ready/Busy structure: stores information related to Ready/Busy detection + * + * @type: the Ready/Busy detection mode + * @info: information related to the R/B detection mode. Either a gpio + * id or a native R/B id (those supported by the NAND controller). + */ +struct sunxi_nand_rb { + enum sunxi_nand_rb_type type; + union { + int gpio; + int nativeid; + } info; +}; + +/* + * Chip Select structure: stores information related to NAND Chip Select + * + * @cs: the NAND CS id used to communicate with a NAND Chip + * @rb: the Ready/Busy description + */ +struct sunxi_nand_chip_sel { + u8 cs; + struct sunxi_nand_rb rb; +}; + +/* + * sunxi HW ECC infos: stores information related to HW ECC support + * + * @mode: the sunxi ECC mode field deduced from ECC requirements + * @layout: the OOB layout depending on the ECC requirements and the + * selected ECC mode + */ +struct sunxi_nand_hw_ecc { + int mode; + struct nand_ecclayout layout; +}; + +/* + * NAND chip structure: stores NAND chip device related information + * + * @node: used to store NAND chips into a list + * @nand: base NAND chip structure + * @mtd: base MTD structure + * @clk_rate: clk_rate required for this NAND chip + * @selected: current active CS + * @nsels: number of CS lines required by the NAND chip + * @sels: array of CS lines descriptions + */ +struct sunxi_nand_chip { + struct list_head node; + struct nand_chip nand; + struct mtd_info mtd; + unsigned long clk_rate; + int selected; + int nsels; + struct sunxi_nand_chip_sel sels[0]; +}; + +static inline struct sunxi_nand_chip *to_sunxi_nand(struct nand_chip *nand) +{ + return container_of(nand, struct sunxi_nand_chip, nand); +} + +/* + * NAND Controller structure: stores sunxi NAND controller information + * + * @controller: base controller structure + * @dev: parent device (used to print error messages) + * @regs: NAND controller registers + * @ahb_clk: NAND Controller AHB clock + * @mod_clk: NAND Controller mod clock + * @assigned_cs: bitmask describing already assigned CS lines + * @clk_rate: NAND controller current clock rate + * @chips: a list containing all the NAND chips attached to + * this NAND controller + * @complete: a completion object used to wait for NAND + * controller events + */ +struct sunxi_nfc { + struct nand_hw_control controller; + struct device *dev; + void __iomem *regs; + struct clk *ahb_clk; + struct clk *mod_clk; + unsigned long assigned_cs; + unsigned long clk_rate; + struct list_head chips; + struct completion complete; +}; + +static inline struct sunxi_nfc *to_sunxi_nfc(struct nand_hw_control *ctrl) +{ + return container_of(ctrl, struct sunxi_nfc, controller); +} + +static irqreturn_t sunxi_nfc_interrupt(int irq, void *dev_id) +{ + struct sunxi_nfc *nfc = dev_id; + u32 st = readl(nfc->regs + NFC_REG_ST); + u32 ien = readl(nfc->regs + NFC_REG_INT); + + if (!(ien & st)) + return IRQ_NONE; + + if ((ien & st) == ien) + complete(&nfc->complete); + + writel(st & NFC_INT_MASK, nfc->regs + NFC_REG_ST); + writel(~st & ien & NFC_INT_MASK, nfc->regs + NFC_REG_INT); + + return IRQ_HANDLED; +} + +static int sunxi_nfc_wait_int(struct sunxi_nfc *nfc, u32 flags, + unsigned int timeout_ms) +{ + init_completion(&nfc->complete); + + writel(flags, nfc->regs + NFC_REG_INT); + + if (!timeout_ms) + timeout_ms = NFC_DEFAULT_TIMEOUT_MS; + + if (!wait_for_completion_timeout(&nfc->complete, + msecs_to_jiffies(timeout_ms))) { + dev_err(nfc->dev, "wait interrupt timedout\n"); + return -ETIMEDOUT; + } + + return 0; +} + +static int sunxi_nfc_wait_cmd_fifo_empty(struct sunxi_nfc *nfc) +{ + unsigned long timeout = jiffies + + msecs_to_jiffies(NFC_DEFAULT_TIMEOUT_MS); + + do { + if (!(readl(nfc->regs + NFC_REG_ST) & NFC_CMD_FIFO_STATUS)) + return 0; + } while (time_before(jiffies, timeout)); + + dev_err(nfc->dev, "wait for empty cmd FIFO timedout\n"); + return -ETIMEDOUT; +} + +static int sunxi_nfc_rst(struct sunxi_nfc *nfc) +{ + unsigned long timeout = jiffies + + msecs_to_jiffies(NFC_DEFAULT_TIMEOUT_MS); + + writel(0, nfc->regs + NFC_REG_ECC_CTL); + writel(NFC_RESET, nfc->regs + NFC_REG_CTL); + + do { + if (!(readl(nfc->regs + NFC_REG_CTL) & NFC_RESET)) + return 0; + } while (time_before(jiffies, timeout)); + + dev_err(nfc->dev, "wait for NAND controller reset timedout\n"); + return -ETIMEDOUT; +} + +static int sunxi_nfc_dev_ready(struct mtd_info *mtd) +{ + struct nand_chip *nand = mtd->priv; + struct sunxi_nand_chip *sunxi_nand = to_sunxi_nand(nand); + struct sunxi_nfc *nfc = to_sunxi_nfc(sunxi_nand->nand.controller); + struct sunxi_nand_rb *rb; + unsigned long timeo = (sunxi_nand->nand.state == FL_ERASING ? 400 : 20); + int ret; + + if (sunxi_nand->selected < 0) + return 0; + + rb = &sunxi_nand->sels[sunxi_nand->selected].rb; + + switch (rb->type) { + case RB_NATIVE: + ret = !!(readl(nfc->regs + NFC_REG_ST) & + (NFC_RB_STATE0 << rb->info.nativeid)); + if (ret) + break; + + sunxi_nfc_wait_int(nfc, NFC_RB_B2R, timeo); + ret = !!(readl(nfc->regs + NFC_REG_ST) & + (NFC_RB_STATE0 << rb->info.nativeid)); + break; + case RB_GPIO: + ret = gpio_get_value(rb->info.gpio); + break; + case RB_NONE: + default: + ret = 0; + dev_err(nfc->dev, "cannot check R/B NAND status!\n"); + break; + } + + return ret; +} + +static void sunxi_nfc_select_chip(struct mtd_info *mtd, int chip) +{ + struct nand_chip *nand = mtd->priv; + struct sunxi_nand_chip *sunxi_nand = to_sunxi_nand(nand); + struct sunxi_nfc *nfc = to_sunxi_nfc(sunxi_nand->nand.controller); + struct sunxi_nand_chip_sel *sel; + u32 ctl; + + if (chip > 0 && chip >= sunxi_nand->nsels) + return; + + if (chip == sunxi_nand->selected) + return; + + ctl = readl(nfc->regs + NFC_REG_CTL) & + ~(NFC_CE_SEL | NFC_RB_SEL | NFC_EN); + + if (chip >= 0) { + sel = &sunxi_nand->sels[chip]; + + ctl |= (sel->cs << 24) | NFC_EN | + (((nand->page_shift - 10) & 0xf) << 8); + if (sel->rb.type == RB_NONE) { + nand->dev_ready = NULL; + } else { + nand->dev_ready = sunxi_nfc_dev_ready; + if (sel->rb.type == RB_NATIVE) + ctl |= (sel->rb.info.nativeid << 3); + } + + writel(mtd->writesize, nfc->regs + NFC_REG_SPARE_AREA); + + if (nfc->clk_rate != sunxi_nand->clk_rate) { + clk_set_rate(nfc->mod_clk, sunxi_nand->clk_rate); + nfc->clk_rate = sunxi_nand->clk_rate; + } + } + + writel(ctl, nfc->regs + NFC_REG_CTL); + + sunxi_nand->selected = chip; +} + +static void sunxi_nfc_read_buf(struct mtd_info *mtd, uint8_t *buf, int len) +{ + struct nand_chip *nand = mtd->priv; + struct sunxi_nand_chip *sunxi_nand = to_sunxi_nand(nand); + struct sunxi_nfc *nfc = to_sunxi_nfc(sunxi_nand->nand.controller); + int ret; + int cnt; + int offs = 0; + u32 tmp; + + while (len > offs) { + cnt = min(len - offs, NFC_SRAM_SIZE); + + ret = sunxi_nfc_wait_cmd_fifo_empty(nfc); + if (ret) + break; + + writel(cnt, nfc->regs + NFC_REG_CNT); + tmp = NFC_DATA_TRANS | NFC_DATA_SWAP_METHOD; + writel(tmp, nfc->regs + NFC_REG_CMD); + + ret = sunxi_nfc_wait_int(nfc, NFC_CMD_INT_FLAG, 0); + if (ret) + break; + + if (buf) + memcpy_fromio(buf + offs, nfc->regs + NFC_RAM0_BASE, + cnt); + offs += cnt; + } +} + +static void sunxi_nfc_write_buf(struct mtd_info *mtd, const uint8_t *buf, + int len) +{ + struct nand_chip *nand = mtd->priv; + struct sunxi_nand_chip *sunxi_nand = to_sunxi_nand(nand); + struct sunxi_nfc *nfc = to_sunxi_nfc(sunxi_nand->nand.controller); + int ret; + int cnt; + int offs = 0; + u32 tmp; + + while (len > offs) { + cnt = min(len - offs, NFC_SRAM_SIZE); + + ret = sunxi_nfc_wait_cmd_fifo_empty(nfc); + if (ret) + break; + + writel(cnt, nfc->regs + NFC_REG_CNT); + memcpy_toio(nfc->regs + NFC_RAM0_BASE, buf + offs, cnt); + tmp = NFC_DATA_TRANS | NFC_DATA_SWAP_METHOD | + NFC_ACCESS_DIR; + writel(tmp, nfc->regs + NFC_REG_CMD); + + ret = sunxi_nfc_wait_int(nfc, NFC_CMD_INT_FLAG, 0); + if (ret) + break; + + offs += cnt; + } +} + +static uint8_t sunxi_nfc_read_byte(struct mtd_info *mtd) +{ + uint8_t ret; + + sunxi_nfc_read_buf(mtd, &ret, 1); + + return ret; +} + +static void sunxi_nfc_cmd_ctrl(struct mtd_info *mtd, int dat, + unsigned int ctrl) +{ + struct nand_chip *nand = mtd->priv; + struct sunxi_nand_chip *sunxi_nand = to_sunxi_nand(nand); + struct sunxi_nfc *nfc = to_sunxi_nfc(sunxi_nand->nand.controller); + int ret; + u32 tmp; + + ret = sunxi_nfc_wait_cmd_fifo_empty(nfc); + if (ret) + return; + + if (ctrl & NAND_CTRL_CHANGE) { + tmp = readl(nfc->regs + NFC_REG_CTL); + if (ctrl & NAND_NCE) + tmp |= NFC_CE_CTL; + else + tmp &= ~NFC_CE_CTL; + writel(tmp, nfc->regs + NFC_REG_CTL); + } + + if (dat == NAND_CMD_NONE) + return; + + if (ctrl & NAND_CLE) { + writel(NFC_SEND_CMD1 | dat, nfc->regs + NFC_REG_CMD); + } else { + writel(dat, nfc->regs + NFC_REG_ADDR_LOW); + writel(NFC_SEND_ADR, nfc->regs + NFC_REG_CMD); + } + + sunxi_nfc_wait_int(nfc, NFC_CMD_INT_FLAG, 0); +} + +static int sunxi_nfc_hw_ecc_read_page(struct mtd_info *mtd, + struct nand_chip *chip, uint8_t *buf, + int oob_required, int page) +{ + struct sunxi_nfc *nfc = to_sunxi_nfc(chip->controller); + struct nand_ecc_ctrl *ecc = &chip->ecc; + struct nand_ecclayout *layout = ecc->layout; + struct sunxi_nand_hw_ecc *data = ecc->priv; + unsigned int max_bitflips = 0; + int offset; + int ret; + u32 tmp; + int i; + int cnt; + + tmp = readl(nfc->regs + NFC_REG_ECC_CTL); + tmp &= ~(NFC_ECC_MODE | NFC_ECC_PIPELINE | NFC_ECC_BLOCK_SIZE); + tmp |= NFC_ECC_EN | (data->mode << NFC_ECC_MODE_SHIFT) | + NFC_ECC_EXCEPTION; + + writel(tmp, nfc->regs + NFC_REG_ECC_CTL); + + for (i = 0; i < ecc->steps; i++) { + if (i) + chip->cmdfunc(mtd, NAND_CMD_RNDOUT, i * ecc->size, -1); + + offset = mtd->writesize + layout->eccpos[i * ecc->bytes] - 4; + + chip->read_buf(mtd, NULL, ecc->size); + + chip->cmdfunc(mtd, NAND_CMD_RNDOUT, offset, -1); + + ret = sunxi_nfc_wait_cmd_fifo_empty(nfc); + if (ret) + return ret; + + tmp = NFC_DATA_TRANS | NFC_DATA_SWAP_METHOD | (1 << 30); + writel(tmp, nfc->regs + NFC_REG_CMD); + + ret = sunxi_nfc_wait_int(nfc, NFC_CMD_INT_FLAG, 0); + if (ret) + return ret; + + memcpy_fromio(buf + (i * ecc->size), + nfc->regs + NFC_RAM0_BASE, ecc->size); + + if (readl(nfc->regs + NFC_REG_ECC_ST) & 0x1) { + mtd->ecc_stats.failed++; + } else { + tmp = readl(nfc->regs + NFC_REG_ECC_CNT0) & 0xff; + mtd->ecc_stats.corrected += tmp; + max_bitflips = max_t(unsigned int, max_bitflips, tmp); + } + + if (oob_required) { + chip->cmdfunc(mtd, NAND_CMD_RNDOUT, offset, -1); + + ret = sunxi_nfc_wait_cmd_fifo_empty(nfc); + if (ret) + return ret; + + offset -= mtd->writesize; + chip->read_buf(mtd, chip->oob_poi + offset, + ecc->bytes + 4); + } + } + + if (oob_required) { + cnt = ecc->layout->oobfree[ecc->steps].length; + if (cnt > 0) { + offset = mtd->writesize + + ecc->layout->oobfree[ecc->steps].offset; + chip->cmdfunc(mtd, NAND_CMD_RNDOUT, offset, -1); + offset -= mtd->writesize; + chip->read_buf(mtd, chip->oob_poi + offset, cnt); + } + } + + tmp = readl(nfc->regs + NFC_REG_ECC_CTL); + tmp &= ~NFC_ECC_EN; + + writel(tmp, nfc->regs + NFC_REG_ECC_CTL); + + return max_bitflips; +} + +static int sunxi_nfc_hw_ecc_write_page(struct mtd_info *mtd, + struct nand_chip *chip, + const uint8_t *buf, int oob_required) +{ + struct sunxi_nfc *nfc = to_sunxi_nfc(chip->controller); + struct nand_ecc_ctrl *ecc = &chip->ecc; + struct nand_ecclayout *layout = ecc->layout; + struct sunxi_nand_hw_ecc *data = ecc->priv; + int offset; + int ret; + u32 tmp; + int i; + int cnt; + + tmp = readl(nfc->regs + NFC_REG_ECC_CTL); + tmp &= ~(NFC_ECC_MODE | NFC_ECC_PIPELINE | NFC_ECC_BLOCK_SIZE); + tmp |= NFC_ECC_EN | (data->mode << NFC_ECC_MODE_SHIFT) | + NFC_ECC_EXCEPTION; + + writel(tmp, nfc->regs + NFC_REG_ECC_CTL); + + for (i = 0; i < ecc->steps; i++) { + if (i) + chip->cmdfunc(mtd, NAND_CMD_RNDIN, i * ecc->size, -1); + + chip->write_buf(mtd, buf + (i * ecc->size), ecc->size); + + offset = layout->eccpos[i * ecc->bytes] - 4 + mtd->writesize; + + /* Fill OOB data in */ + if (oob_required) { + tmp = 0xffffffff; + memcpy_toio(nfc->regs + NFC_REG_USER_DATA_BASE, &tmp, + 4); + } else { + memcpy_toio(nfc->regs + NFC_REG_USER_DATA_BASE, + chip->oob_poi + offset - mtd->writesize, + 4); + } + + chip->cmdfunc(mtd, NAND_CMD_RNDIN, offset, -1); + + ret = sunxi_nfc_wait_cmd_fifo_empty(nfc); + if (ret) + return ret; + + tmp = NFC_DATA_TRANS | NFC_DATA_SWAP_METHOD | NFC_ACCESS_DIR | + (1 << 30); + writel(tmp, nfc->regs + NFC_REG_CMD); + ret = sunxi_nfc_wait_int(nfc, NFC_CMD_INT_FLAG, 0); + if (ret) + return ret; + } + + if (oob_required) { + cnt = ecc->layout->oobfree[i].length; + if (cnt > 0) { + offset = mtd->writesize + + ecc->layout->oobfree[i].offset; + chip->cmdfunc(mtd, NAND_CMD_RNDIN, offset, -1); + offset -= mtd->writesize; + chip->write_buf(mtd, chip->oob_poi + offset, cnt); + } + } + + tmp = readl(nfc->regs + NFC_REG_ECC_CTL); + tmp &= ~NFC_ECC_EN; + + writel(tmp, nfc->regs + NFC_REG_ECC_CTL); + + return 0; +} + +static int sunxi_nfc_hw_syndrome_ecc_read_page(struct mtd_info *mtd, + struct nand_chip *chip, + uint8_t *buf, int oob_required, + int page) +{ + struct sunxi_nfc *nfc = to_sunxi_nfc(chip->controller); + struct nand_ecc_ctrl *ecc = &chip->ecc; + struct sunxi_nand_hw_ecc *data = ecc->priv; + unsigned int max_bitflips = 0; + uint8_t *oob = chip->oob_poi; + int offset = 0; + int ret; + int cnt; + u32 tmp; + int i; + + tmp = readl(nfc->regs + NFC_REG_ECC_CTL); + tmp &= ~(NFC_ECC_MODE | NFC_ECC_PIPELINE | NFC_ECC_BLOCK_SIZE); + tmp |= NFC_ECC_EN | (data->mode << NFC_ECC_MODE_SHIFT) | + NFC_ECC_EXCEPTION; + + writel(tmp, nfc->regs + NFC_REG_ECC_CTL); + + for (i = 0; i < ecc->steps; i++) { + chip->read_buf(mtd, NULL, ecc->size); + + tmp = NFC_DATA_TRANS | NFC_DATA_SWAP_METHOD | (1 << 30); + writel(tmp, nfc->regs + NFC_REG_CMD); + + ret = sunxi_nfc_wait_int(nfc, NFC_CMD_INT_FLAG, 0); + if (ret) + return ret; + + memcpy_fromio(buf, nfc->regs + NFC_RAM0_BASE, ecc->size); + buf += ecc->size; + offset += ecc->size; + + if (readl(nfc->regs + NFC_REG_ECC_ST) & 0x1) { + mtd->ecc_stats.failed++; + } else { + tmp = readl(nfc->regs + NFC_REG_ECC_CNT0) & 0xff; + mtd->ecc_stats.corrected += tmp; + max_bitflips = max_t(unsigned int, max_bitflips, tmp); + } + + if (oob_required) { + chip->cmdfunc(mtd, NAND_CMD_RNDOUT, offset, -1); + chip->read_buf(mtd, oob, ecc->bytes + ecc->prepad); + oob += ecc->bytes + ecc->prepad; + } + + offset += ecc->bytes + ecc->prepad; + } + + if (oob_required) { + cnt = mtd->oobsize - (oob - chip->oob_poi); + if (cnt > 0) { + chip->cmdfunc(mtd, NAND_CMD_RNDOUT, offset, -1); + chip->read_buf(mtd, oob, cnt); + } + } + + writel(readl(nfc->regs + NFC_REG_ECC_CTL) & ~NFC_ECC_EN, + nfc->regs + NFC_REG_ECC_CTL); + + return max_bitflips; +} + +static int sunxi_nfc_hw_syndrome_ecc_write_page(struct mtd_info *mtd, + struct nand_chip *chip, + const uint8_t *buf, + int oob_required) +{ + struct sunxi_nfc *nfc = to_sunxi_nfc(chip->controller); + struct nand_ecc_ctrl *ecc = &chip->ecc; + struct sunxi_nand_hw_ecc *data = ecc->priv; + uint8_t *oob = chip->oob_poi; + int offset = 0; + int ret; + int cnt; + u32 tmp; + int i; + + tmp = readl(nfc->regs + NFC_REG_ECC_CTL); + tmp &= ~(NFC_ECC_MODE | NFC_ECC_PIPELINE | NFC_ECC_BLOCK_SIZE); + tmp |= NFC_ECC_EN | (data->mode << NFC_ECC_MODE_SHIFT) | + NFC_ECC_EXCEPTION; + + writel(tmp, nfc->regs + NFC_REG_ECC_CTL); + + for (i = 0; i < ecc->steps; i++) { + chip->write_buf(mtd, buf + (i * ecc->size), ecc->size); + offset += ecc->size; + + /* Fill OOB data in */ + if (oob_required) { + tmp = 0xffffffff; + memcpy_toio(nfc->regs + NFC_REG_USER_DATA_BASE, &tmp, + 4); + } else { + memcpy_toio(nfc->regs + NFC_REG_USER_DATA_BASE, oob, + 4); + } + + tmp = NFC_DATA_TRANS | NFC_DATA_SWAP_METHOD | NFC_ACCESS_DIR | + (1 << 30); + writel(tmp, nfc->regs + NFC_REG_CMD); + + ret = sunxi_nfc_wait_int(nfc, NFC_CMD_INT_FLAG, 0); + if (ret) + return ret; + + offset += ecc->bytes + ecc->prepad; + oob += ecc->bytes + ecc->prepad; + } + + if (oob_required) { + cnt = mtd->oobsize - (oob - chip->oob_poi); + if (cnt > 0) { + chip->cmdfunc(mtd, NAND_CMD_RNDIN, offset, -1); + chip->write_buf(mtd, oob, cnt); + } + } + + tmp = readl(nfc->regs + NFC_REG_ECC_CTL); + tmp &= ~NFC_ECC_EN; + + writel(tmp, nfc->regs + NFC_REG_ECC_CTL); + + return 0; +} + +static int sunxi_nand_chip_set_timings(struct sunxi_nand_chip *chip, + const struct nand_sdr_timings *timings) +{ + u32 min_clk_period = 0; + + /* T1 <=> tCLS */ + if (timings->tCLS_min > min_clk_period) + min_clk_period = timings->tCLS_min; + + /* T2 <=> tCLH */ + if (timings->tCLH_min > min_clk_period) + min_clk_period = timings->tCLH_min; + + /* T3 <=> tCS */ + if (timings->tCS_min > min_clk_period) + min_clk_period = timings->tCS_min; + + /* T4 <=> tCH */ + if (timings->tCH_min > min_clk_period) + min_clk_period = timings->tCH_min; + + /* T5 <=> tWP */ + if (timings->tWP_min > min_clk_period) + min_clk_period = timings->tWP_min; + + /* T6 <=> tWH */ + if (timings->tWH_min > min_clk_period) + min_clk_period = timings->tWH_min; + + /* T7 <=> tALS */ + if (timings->tALS_min > min_clk_period) + min_clk_period = timings->tALS_min; + + /* T8 <=> tDS */ + if (timings->tDS_min > min_clk_period) + min_clk_period = timings->tDS_min; + + /* T9 <=> tDH */ + if (timings->tDH_min > min_clk_period) + min_clk_period = timings->tDH_min; + + /* T10 <=> tRR */ + if (timings->tRR_min > (min_clk_period * 3)) + min_clk_period = DIV_ROUND_UP(timings->tRR_min, 3); + + /* T11 <=> tALH */ + if (timings->tALH_min > min_clk_period) + min_clk_period = timings->tALH_min; + + /* T12 <=> tRP */ + if (timings->tRP_min > min_clk_period) + min_clk_period = timings->tRP_min; + + /* T13 <=> tREH */ + if (timings->tREH_min > min_clk_period) + min_clk_period = timings->tREH_min; + + /* T14 <=> tRC */ + if (timings->tRC_min > (min_clk_period * 2)) + min_clk_period = DIV_ROUND_UP(timings->tRC_min, 2); + + /* T15 <=> tWC */ + if (timings->tWC_min > (min_clk_period * 2)) + min_clk_period = DIV_ROUND_UP(timings->tWC_min, 2); + + + /* Convert min_clk_period from picoseconds to nanoseconds */ + min_clk_period = DIV_ROUND_UP(min_clk_period, 1000); + + /* + * Convert min_clk_period into a clk frequency, then get the + * appropriate rate for the NAND controller IP given this formula + * (specified in the datasheet): + * nand clk_rate = 2 * min_clk_rate + */ + chip->clk_rate = (2 * NSEC_PER_SEC) / min_clk_period; + + /* TODO: configure T16-T19 */ + + return 0; +} + +static int sunxi_nand_chip_init_timings(struct sunxi_nand_chip *chip, + struct device_node *np) +{ + const struct nand_sdr_timings *timings; + int ret; + int mode; + + mode = onfi_get_async_timing_mode(&chip->nand); + if (mode == ONFI_TIMING_MODE_UNKNOWN) { + mode = chip->nand.onfi_timing_mode_default; + } else { + uint8_t feature[ONFI_SUBFEATURE_PARAM_LEN] = {}; + + mode = fls(mode) - 1; + if (mode < 0) + mode = 0; + + feature[0] = mode; + ret = chip->nand.onfi_set_features(&chip->mtd, &chip->nand, + ONFI_FEATURE_ADDR_TIMING_MODE, + feature); + if (ret) + return ret; + } + + timings = onfi_async_timing_mode_to_sdr_timings(mode); + if (IS_ERR(timings)) + return PTR_ERR(timings); + + return sunxi_nand_chip_set_timings(chip, timings); +} + +static int sunxi_nand_hw_common_ecc_ctrl_init(struct mtd_info *mtd, + struct nand_ecc_ctrl *ecc, + struct device_node *np) +{ + static const u8 strengths[] = { 16, 24, 28, 32, 40, 48, 56, 60, 64 }; + struct nand_chip *nand = mtd->priv; + struct sunxi_nand_chip *sunxi_nand = to_sunxi_nand(nand); + struct sunxi_nfc *nfc = to_sunxi_nfc(sunxi_nand->nand.controller); + struct sunxi_nand_hw_ecc *data; + struct nand_ecclayout *layout; + int nsectors; + int ret; + int i; + + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + + /* Add ECC info retrieval from DT */ + for (i = 0; i < ARRAY_SIZE(strengths); i++) { + if (ecc->strength <= strengths[i]) + break; + } + + if (i >= ARRAY_SIZE(strengths)) { + dev_err(nfc->dev, "unsupported strength\n"); + ret = -ENOTSUPP; + goto err; + } + + data->mode = i; + + /* HW ECC always request ECC bytes for 1024 bytes blocks */ + ecc->bytes = DIV_ROUND_UP(ecc->strength * fls(8 * 1024), 8); + + /* HW ECC always work with even numbers of ECC bytes */ + ecc->bytes = ALIGN(ecc->bytes, 2); + + layout = &data->layout; + nsectors = mtd->writesize / ecc->size; + + if (mtd->oobsize < ((ecc->bytes + 4) * nsectors)) { + ret = -EINVAL; + goto err; + } + + layout->eccbytes = (ecc->bytes * nsectors); + + ecc->layout = layout; + ecc->priv = data; + + return 0; + +err: + kfree(data); + + return ret; +} + +static void sunxi_nand_hw_common_ecc_ctrl_cleanup(struct nand_ecc_ctrl *ecc) +{ + kfree(ecc->priv); +} + +static int sunxi_nand_hw_ecc_ctrl_init(struct mtd_info *mtd, + struct nand_ecc_ctrl *ecc, + struct device_node *np) +{ + struct nand_ecclayout *layout; + int nsectors; + int i, j; + int ret; + + ret = sunxi_nand_hw_common_ecc_ctrl_init(mtd, ecc, np); + if (ret) + return ret; + + ecc->read_page = sunxi_nfc_hw_ecc_read_page; + ecc->write_page = sunxi_nfc_hw_ecc_write_page; + layout = ecc->layout; + nsectors = mtd->writesize / ecc->size; + + for (i = 0; i < nsectors; i++) { + if (i) { + layout->oobfree[i].offset = + layout->oobfree[i - 1].offset + + layout->oobfree[i - 1].length + + ecc->bytes; + layout->oobfree[i].length = 4; + } else { + /* + * The first 2 bytes are used for BB markers, hence we + * only have 2 bytes available in the first user data + * section. + */ + layout->oobfree[i].length = 2; + layout->oobfree[i].offset = 2; + } + + for (j = 0; j < ecc->bytes; j++) + layout->eccpos[(ecc->bytes * i) + j] = + layout->oobfree[i].offset + + layout->oobfree[i].length + j; + } + + if (mtd->oobsize > (ecc->bytes + 4) * nsectors) { + layout->oobfree[nsectors].offset = + layout->oobfree[nsectors - 1].offset + + layout->oobfree[nsectors - 1].length + + ecc->bytes; + layout->oobfree[nsectors].length = mtd->oobsize - + ((ecc->bytes + 4) * nsectors); + } + + return 0; +} + +static int sunxi_nand_hw_syndrome_ecc_ctrl_init(struct mtd_info *mtd, + struct nand_ecc_ctrl *ecc, + struct device_node *np) +{ + struct nand_ecclayout *layout; + int nsectors; + int i; + int ret; + + ret = sunxi_nand_hw_common_ecc_ctrl_init(mtd, ecc, np); + if (ret) + return ret; + + ecc->prepad = 4; + ecc->read_page = sunxi_nfc_hw_syndrome_ecc_read_page; + ecc->write_page = sunxi_nfc_hw_syndrome_ecc_write_page; + + layout = ecc->layout; + nsectors = mtd->writesize / ecc->size; + + for (i = 0; i < (ecc->bytes * nsectors); i++) + layout->eccpos[i] = i; + + layout->oobfree[0].length = mtd->oobsize - i; + layout->oobfree[0].offset = i; + + return 0; +} + +static void sunxi_nand_ecc_cleanup(struct nand_ecc_ctrl *ecc) +{ + switch (ecc->mode) { + case NAND_ECC_HW: + case NAND_ECC_HW_SYNDROME: + sunxi_nand_hw_common_ecc_ctrl_cleanup(ecc); + break; + case NAND_ECC_NONE: + kfree(ecc->layout); + default: + break; + } +} + +static int sunxi_nand_ecc_init(struct mtd_info *mtd, struct nand_ecc_ctrl *ecc, + struct device_node *np) +{ + struct nand_chip *nand = mtd->priv; + int strength; + int blk_size; + int ret; + + blk_size = of_get_nand_ecc_step_size(np); + strength = of_get_nand_ecc_strength(np); + if (blk_size > 0 && strength > 0) { + ecc->size = blk_size; + ecc->strength = strength; + } else { + ecc->size = nand->ecc_step_ds; + ecc->strength = nand->ecc_strength_ds; + } + + if (!ecc->size || !ecc->strength) + return -EINVAL; + + ecc->mode = NAND_ECC_HW; + + ret = of_get_nand_ecc_mode(np); + if (ret >= 0) + ecc->mode = ret; + + switch (ecc->mode) { + case NAND_ECC_SOFT_BCH: + ecc->bytes = DIV_ROUND_UP(ecc->strength * fls(8 * ecc->size), + 8); + break; + case NAND_ECC_HW: + ret = sunxi_nand_hw_ecc_ctrl_init(mtd, ecc, np); + if (ret) + return ret; + break; + case NAND_ECC_HW_SYNDROME: + ret = sunxi_nand_hw_syndrome_ecc_ctrl_init(mtd, ecc, np); + if (ret) + return ret; + break; + case NAND_ECC_NONE: + ecc->layout = kzalloc(sizeof(*ecc->layout), GFP_KERNEL); + if (!ecc->layout) + return -ENOMEM; + ecc->layout->oobfree[0].length = mtd->oobsize; + case NAND_ECC_SOFT: + break; + default: + return -EINVAL; + } + + return 0; +} + +static int sunxi_nand_chip_init(struct device *dev, struct sunxi_nfc *nfc, + struct device_node *np) +{ + const struct nand_sdr_timings *timings; + struct sunxi_nand_chip *chip; + struct mtd_part_parser_data ppdata; + struct mtd_info *mtd; + struct nand_chip *nand; + int nsels; + int ret; + int i; + u32 tmp; + + if (!of_get_property(np, "reg", &nsels)) + return -EINVAL; + + nsels /= sizeof(u32); + if (!nsels) { + dev_err(dev, "invalid reg property size\n"); + return -EINVAL; + } + + chip = devm_kzalloc(dev, + sizeof(*chip) + + (nsels * sizeof(struct sunxi_nand_chip_sel)), + GFP_KERNEL); + if (!chip) { + dev_err(dev, "could not allocate chip\n"); + return -ENOMEM; + } + + chip->nsels = nsels; + chip->selected = -1; + + for (i = 0; i < nsels; i++) { + ret = of_property_read_u32_index(np, "reg", i, &tmp); + if (ret) { + dev_err(dev, "could not retrieve reg property: %d\n", + ret); + return ret; + } + + if (tmp > NFC_MAX_CS) { + dev_err(dev, + "invalid reg value: %u (max CS = 7)\n", + tmp); + return -EINVAL; + } + + if (test_and_set_bit(tmp, &nfc->assigned_cs)) { + dev_err(dev, "CS %d already assigned\n", tmp); + return -EINVAL; + } + + chip->sels[i].cs = tmp; + + if (!of_property_read_u32_index(np, "allwinner,rb", i, &tmp) && + tmp < 2) { + chip->sels[i].rb.type = RB_NATIVE; + chip->sels[i].rb.info.nativeid = tmp; + } else { + ret = of_get_named_gpio(np, "rb-gpios", i); + if (ret >= 0) { + tmp = ret; + chip->sels[i].rb.type = RB_GPIO; + chip->sels[i].rb.info.gpio = tmp; + ret = devm_gpio_request(dev, tmp, "nand-rb"); + if (ret) + return ret; + + ret = gpio_direction_input(tmp); + if (ret) + return ret; + } else { + chip->sels[i].rb.type = RB_NONE; + } + } + } + + timings = onfi_async_timing_mode_to_sdr_timings(0); + if (IS_ERR(timings)) { + ret = PTR_ERR(timings); + dev_err(dev, + "could not retrieve timings for ONFI mode 0: %d\n", + ret); + return ret; + } + + ret = sunxi_nand_chip_set_timings(chip, timings); + if (ret) { + dev_err(dev, "could not configure chip timings: %d\n", ret); + return ret; + } + + nand = &chip->nand; + /* Default tR value specified in the ONFI spec (chapter 4.15.1) */ + nand->chip_delay = 200; + nand->controller = &nfc->controller; + nand->select_chip = sunxi_nfc_select_chip; + nand->cmd_ctrl = sunxi_nfc_cmd_ctrl; + nand->read_buf = sunxi_nfc_read_buf; + nand->write_buf = sunxi_nfc_write_buf; + nand->read_byte = sunxi_nfc_read_byte; + + if (of_get_nand_on_flash_bbt(np)) + nand->bbt_options |= NAND_BBT_USE_FLASH | NAND_BBT_NO_OOB; + + mtd = &chip->mtd; + mtd->dev.parent = dev; + mtd->priv = nand; + mtd->owner = THIS_MODULE; + + ret = nand_scan_ident(mtd, nsels, NULL); + if (ret) + return ret; + + ret = sunxi_nand_chip_init_timings(chip, np); + if (ret) { + dev_err(dev, "could not configure chip timings: %d\n", ret); + return ret; + } + + ret = sunxi_nand_ecc_init(mtd, &nand->ecc, np); + if (ret) { + dev_err(dev, "ECC init failed: %d\n", ret); + return ret; + } + + ret = nand_scan_tail(mtd); + if (ret) { + dev_err(dev, "nand_scan_tail failed: %d\n", ret); + return ret; + } + + ppdata.of_node = np; + ret = mtd_device_parse_register(mtd, NULL, &ppdata, NULL, 0); + if (ret) { + dev_err(dev, "failed to register mtd device: %d\n", ret); + nand_release(mtd); + return ret; + } + + list_add_tail(&chip->node, &nfc->chips); + + return 0; +} + +static int sunxi_nand_chips_init(struct device *dev, struct sunxi_nfc *nfc) +{ + struct device_node *np = dev->of_node; + struct device_node *nand_np; + int nchips = of_get_child_count(np); + int ret; + + if (nchips > 8) { + dev_err(dev, "too many NAND chips: %d (max = 8)\n", nchips); + return -EINVAL; + } + + for_each_child_of_node(np, nand_np) { + ret = sunxi_nand_chip_init(dev, nfc, nand_np); + if (ret) + return ret; + } + + return 0; +} + +static void sunxi_nand_chips_cleanup(struct sunxi_nfc *nfc) +{ + struct sunxi_nand_chip *chip; + + while (!list_empty(&nfc->chips)) { + chip = list_first_entry(&nfc->chips, struct sunxi_nand_chip, + node); + nand_release(&chip->mtd); + sunxi_nand_ecc_cleanup(&chip->nand.ecc); + } +} + +static int sunxi_nfc_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct resource *r; + struct sunxi_nfc *nfc; + int irq; + int ret; + + nfc = devm_kzalloc(dev, sizeof(*nfc), GFP_KERNEL); + if (!nfc) + return -ENOMEM; + + nfc->dev = dev; + spin_lock_init(&nfc->controller.lock); + init_waitqueue_head(&nfc->controller.wq); + INIT_LIST_HEAD(&nfc->chips); + + r = platform_get_resource(pdev, IORESOURCE_MEM, 0); + nfc->regs = devm_ioremap_resource(dev, r); + if (IS_ERR(nfc->regs)) + return PTR_ERR(nfc->regs); + + irq = platform_get_irq(pdev, 0); + if (irq < 0) { + dev_err(dev, "failed to retrieve irq\n"); + return irq; + } + + nfc->ahb_clk = devm_clk_get(dev, "ahb"); + if (IS_ERR(nfc->ahb_clk)) { + dev_err(dev, "failed to retrieve ahb clk\n"); + return PTR_ERR(nfc->ahb_clk); + } + + ret = clk_prepare_enable(nfc->ahb_clk); + if (ret) + return ret; + + nfc->mod_clk = devm_clk_get(dev, "mod"); + if (IS_ERR(nfc->mod_clk)) { + dev_err(dev, "failed to retrieve mod clk\n"); + ret = PTR_ERR(nfc->mod_clk); + goto out_ahb_clk_unprepare; + } + + ret = clk_prepare_enable(nfc->mod_clk); + if (ret) + goto out_ahb_clk_unprepare; + + ret = sunxi_nfc_rst(nfc); + if (ret) + goto out_mod_clk_unprepare; + + writel(0, nfc->regs + NFC_REG_INT); + ret = devm_request_irq(dev, irq, sunxi_nfc_interrupt, + 0, "sunxi-nand", nfc); + if (ret) + goto out_mod_clk_unprepare; + + platform_set_drvdata(pdev, nfc); + + /* + * TODO: replace these magic values with proper flags as soon as we + * know what they are encoding. + */ + writel(0x100, nfc->regs + NFC_REG_TIMING_CTL); + writel(0x7ff, nfc->regs + NFC_REG_TIMING_CFG); + + ret = sunxi_nand_chips_init(dev, nfc); + if (ret) { + dev_err(dev, "failed to init nand chips\n"); + goto out_mod_clk_unprepare; + } + + return 0; + +out_mod_clk_unprepare: + clk_disable_unprepare(nfc->mod_clk); +out_ahb_clk_unprepare: + clk_disable_unprepare(nfc->ahb_clk); + + return ret; +} + +static int sunxi_nfc_remove(struct platform_device *pdev) +{ + struct sunxi_nfc *nfc = platform_get_drvdata(pdev); + + sunxi_nand_chips_cleanup(nfc); + + return 0; +} + +static const struct of_device_id sunxi_nfc_ids[] = { + { .compatible = "allwinner,sun4i-a10-nand" }, + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(of, sunxi_nfc_ids); + +static struct platform_driver sunxi_nfc_driver = { + .driver = { + .name = "sunxi_nand", + .of_match_table = sunxi_nfc_ids, + }, + .probe = sunxi_nfc_probe, + .remove = sunxi_nfc_remove, +}; +module_platform_driver(sunxi_nfc_driver); + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Boris BREZILLON"); +MODULE_DESCRIPTION("Allwinner NAND Flash Controller driver"); +MODULE_ALIAS("platform:sunxi_nand"); diff --git a/drivers/mtd/spi-nor/fsl-quadspi.c b/drivers/mtd/spi-nor/fsl-quadspi.c index 2fb07eced2b..39763b94f67 100644 --- a/drivers/mtd/spi-nor/fsl-quadspi.c +++ b/drivers/mtd/spi-nor/fsl-quadspi.c @@ -719,16 +719,10 @@ static int fsl_qspi_read(struct spi_nor *nor, loff_t from, { struct fsl_qspi *q = nor->priv; u8 cmd = nor->read_opcode; - int ret; dev_dbg(q->dev, "cmd [%x],read from (0x%p, 0x%.8x, 0x%.8x),len:%d\n", cmd, q->ahb_base, q->chip_base_addr, (unsigned int)from, len); - /* Wait until the previous command is finished. */ - ret = nor->wait_till_ready(nor); - if (ret) - return ret; - /* Read out the data directly from the AHB buffer.*/ memcpy(buf, q->ahb_base + q->chip_base_addr + from, len); @@ -744,16 +738,6 @@ static int fsl_qspi_erase(struct spi_nor *nor, loff_t offs) dev_dbg(nor->dev, "%dKiB at 0x%08x:0x%08x\n", nor->mtd->erasesize / 1024, q->chip_base_addr, (u32)offs); - /* Wait until finished previous write command. */ - ret = nor->wait_till_ready(nor); - if (ret) - return ret; - - /* Send write enable, then erase commands. */ - ret = nor->write_reg(nor, SPINOR_OP_WREN, NULL, 0, 0); - if (ret) - return ret; - ret = fsl_qspi_runcmd(q, nor->erase_opcode, offs, 0); if (ret) return ret; @@ -849,9 +833,8 @@ static int fsl_qspi_probe(struct platform_device *pdev) ret = clk_prepare_enable(q->clk); if (ret) { - clk_disable_unprepare(q->clk_en); dev_err(dev, "can not enable the qspi clock\n"); - goto map_failed; + goto clk_failed; } /* find the irq */ @@ -905,7 +888,8 @@ static int fsl_qspi_probe(struct platform_device *pdev) nor->prepare = fsl_qspi_prep; nor->unprepare = fsl_qspi_unprep; - if (of_modalias_node(np, modalias, sizeof(modalias)) < 0) + ret = of_modalias_node(np, modalias, sizeof(modalias)); + if (ret < 0) goto map_failed; ret = of_property_read_u32(np, "spi-max-frequency", @@ -964,6 +948,7 @@ last_init_failed: irq_failed: clk_disable_unprepare(q->clk); +clk_failed: clk_disable_unprepare(q->clk_en); map_failed: dev_err(dev, "Freescale QuadSPI probe failed\n"); diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c index c51ee52386a..0f8ec3c2d01 100644 --- a/drivers/mtd/spi-nor/spi-nor.c +++ b/drivers/mtd/spi-nor/spi-nor.c @@ -26,7 +26,38 @@ /* Define max times to check status register before we give up. */ #define MAX_READY_WAIT_JIFFIES (40 * HZ) /* M25P16 specs 40s max chip erase */ -#define JEDEC_MFR(_jedec_id) ((_jedec_id) >> 16) +#define SPI_NOR_MAX_ID_LEN 6 + +struct flash_info { + /* + * This array stores the ID bytes. + * The first three bytes are the JEDIC ID. + * JEDEC ID zero means "no ID" (mostly older chips). + */ + u8 id[SPI_NOR_MAX_ID_LEN]; + u8 id_len; + + /* The size listed here is what works with SPINOR_OP_SE, which isn't + * necessarily called a "sector" by the vendor. + */ + unsigned sector_size; + u16 n_sectors; + + u16 page_size; + u16 addr_width; + + u16 flags; +#define SECT_4K 0x01 /* SPINOR_OP_BE_4K works uniformly */ +#define SPI_NOR_NO_ERASE 0x02 /* No erase command needed */ +#define SST_WRITE 0x04 /* use SST byte programming */ +#define SPI_NOR_NO_FR 0x08 /* Can't do fastread */ +#define SECT_4K_PMC 0x10 /* SPINOR_OP_BE_4K_PMC works uniformly */ +#define SPI_NOR_DUAL_READ 0x20 /* Flash supports Dual Read */ +#define SPI_NOR_QUAD_READ 0x40 /* Flash supports Quad Read */ +#define USE_FSR 0x80 /* use flag status register */ +}; + +#define JEDEC_MFR(info) ((info)->id[0]) static const struct spi_device_id *spi_nor_match_id(const char *name); @@ -98,7 +129,7 @@ static inline int spi_nor_read_dummy_cycles(struct spi_nor *nor) case SPI_NOR_FAST: case SPI_NOR_DUAL: case SPI_NOR_QUAD: - return 1; + return 8; case SPI_NOR_NORMAL: return 0; } @@ -138,13 +169,14 @@ static inline struct spi_nor *mtd_to_spi_nor(struct mtd_info *mtd) } /* Enable/disable 4-byte addressing mode. */ -static inline int set_4byte(struct spi_nor *nor, u32 jedec_id, int enable) +static inline int set_4byte(struct spi_nor *nor, struct flash_info *info, + int enable) { int status; bool need_wren = false; u8 cmd; - switch (JEDEC_MFR(jedec_id)) { + switch (JEDEC_MFR(info)) { case CFI_MFR_ST: /* Micron, actually */ /* Some Micron need WREN command; all will accept it */ need_wren = true; @@ -165,81 +197,74 @@ static inline int set_4byte(struct spi_nor *nor, u32 jedec_id, int enable) return nor->write_reg(nor, SPINOR_OP_BRWR, nor->cmd_buf, 1, 0); } } - -static int spi_nor_wait_till_ready(struct spi_nor *nor) +static inline int spi_nor_sr_ready(struct spi_nor *nor) { - unsigned long deadline; - int sr; - - deadline = jiffies + MAX_READY_WAIT_JIFFIES; - - do { - cond_resched(); + int sr = read_sr(nor); + if (sr < 0) + return sr; + else + return !(sr & SR_WIP); +} - sr = read_sr(nor); - if (sr < 0) - break; - else if (!(sr & SR_WIP)) - return 0; - } while (!time_after_eq(jiffies, deadline)); +static inline int spi_nor_fsr_ready(struct spi_nor *nor) +{ + int fsr = read_fsr(nor); + if (fsr < 0) + return fsr; + else + return fsr & FSR_READY; +} - return -ETIMEDOUT; +static int spi_nor_ready(struct spi_nor *nor) +{ + int sr, fsr; + sr = spi_nor_sr_ready(nor); + if (sr < 0) + return sr; + fsr = nor->flags & SNOR_F_USE_FSR ? spi_nor_fsr_ready(nor) : 1; + if (fsr < 0) + return fsr; + return sr && fsr; } -static int spi_nor_wait_till_fsr_ready(struct spi_nor *nor) +/* + * Service routine to read status register until ready, or timeout occurs. + * Returns non-zero if error. + */ +static int spi_nor_wait_till_ready(struct spi_nor *nor) { unsigned long deadline; - int sr; - int fsr; + int timeout = 0, ret; deadline = jiffies + MAX_READY_WAIT_JIFFIES; - do { + while (!timeout) { + if (time_after_eq(jiffies, deadline)) + timeout = 1; + + ret = spi_nor_ready(nor); + if (ret < 0) + return ret; + if (ret) + return 0; + cond_resched(); + } - sr = read_sr(nor); - if (sr < 0) { - break; - } else if (!(sr & SR_WIP)) { - fsr = read_fsr(nor); - if (fsr < 0) - break; - if (fsr & FSR_READY) - return 0; - } - } while (!time_after_eq(jiffies, deadline)); + dev_err(nor->dev, "flash operation timed out\n"); return -ETIMEDOUT; } /* - * Service routine to read status register until ready, or timeout occurs. - * Returns non-zero if error. - */ -static int wait_till_ready(struct spi_nor *nor) -{ - return nor->wait_till_ready(nor); -} - -/* * Erase the whole flash memory * * Returns 0 if successful, non-zero otherwise. */ static int erase_chip(struct spi_nor *nor) { - int ret; - dev_dbg(nor->dev, " %lldKiB\n", (long long)(nor->mtd->size >> 10)); - /* Wait until finished previous write command. */ - ret = wait_till_ready(nor); - if (ret) - return ret; - - /* Send write enable, then erase commands. */ - write_enable(nor); - return nor->write_reg(nor, SPINOR_OP_CHIP_ERASE, NULL, 0, 0); } @@ -294,11 +319,17 @@ static int spi_nor_erase(struct mtd_info *mtd, struct erase_info *instr) /* whole-chip erase? */ if (len == mtd->size) { + write_enable(nor); + if (erase_chip(nor)) { ret = -EIO; goto erase_err; } + ret = spi_nor_wait_till_ready(nor); + if (ret) + goto erase_err; + /* REVISIT in some cases we could speed up erasing large regions * by using SPINOR_OP_SE instead of SPINOR_OP_BE_4K. We may have set up * to use "small sector erase", but that's not always optimal. @@ -307,6 +338,8 @@ static int spi_nor_erase(struct mtd_info *mtd, struct erase_info *instr) /* "sector"-at-a-time erase */ } else { while (len) { + write_enable(nor); + if (nor->erase(nor, addr)) { ret = -EIO; goto erase_err; @@ -314,9 +347,15 @@ static int spi_nor_erase(struct mtd_info *mtd, struct erase_info *instr) addr += mtd->erasesize; len -= mtd->erasesize; + + ret = spi_nor_wait_till_ready(nor); + if (ret) + goto erase_err; } } + write_disable(nor); + spi_nor_unlock_and_unprep(nor, SPI_NOR_OPS_ERASE); instr->state = MTD_ERASE_DONE; @@ -341,11 +380,6 @@ static int spi_nor_lock(struct mtd_info *mtd, loff_t ofs, uint64_t len) if (ret) return ret; - /* Wait until finished previous command */ - ret = wait_till_ready(nor); - if (ret) - goto err; - status_old = read_sr(nor); if (offset < mtd->size - (mtd->size / 2)) @@ -388,11 +422,6 @@ static int spi_nor_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len) if (ret) return ret; - /* Wait until finished previous command */ - ret = wait_till_ready(nor); - if (ret) - goto err; - status_old = read_sr(nor); if (offset+len > mtd->size - (mtd->size / 64)) @@ -424,38 +453,34 @@ err: return ret; } -struct flash_info { - /* JEDEC id zero means "no ID" (most older chips); otherwise it has - * a high byte of zero plus three data bytes: the manufacturer id, - * then a two byte device id. - */ - u32 jedec_id; - u16 ext_id; - - /* The size listed here is what works with SPINOR_OP_SE, which isn't - * necessarily called a "sector" by the vendor. - */ - unsigned sector_size; - u16 n_sectors; - - u16 page_size; - u16 addr_width; - - u16 flags; -#define SECT_4K 0x01 /* SPINOR_OP_BE_4K works uniformly */ -#define SPI_NOR_NO_ERASE 0x02 /* No erase command needed */ -#define SST_WRITE 0x04 /* use SST byte programming */ -#define SPI_NOR_NO_FR 0x08 /* Can't do fastread */ -#define SECT_4K_PMC 0x10 /* SPINOR_OP_BE_4K_PMC works uniformly */ -#define SPI_NOR_DUAL_READ 0x20 /* Flash supports Dual Read */ -#define SPI_NOR_QUAD_READ 0x40 /* Flash supports Quad Read */ -#define USE_FSR 0x80 /* use flag status register */ -}; - +/* Used when the "_ext_id" is two bytes at most */ #define INFO(_jedec_id, _ext_id, _sector_size, _n_sectors, _flags) \ ((kernel_ulong_t)&(struct flash_info) { \ - .jedec_id = (_jedec_id), \ - .ext_id = (_ext_id), \ + .id = { \ + ((_jedec_id) >> 16) & 0xff, \ + ((_jedec_id) >> 8) & 0xff, \ + (_jedec_id) & 0xff, \ + ((_ext_id) >> 8) & 0xff, \ + (_ext_id) & 0xff, \ + }, \ + .id_len = (!(_jedec_id) ? 0 : (3 + ((_ext_id) ? 2 : 0))), \ + .sector_size = (_sector_size), \ + .n_sectors = (_n_sectors), \ + .page_size = 256, \ + .flags = (_flags), \ + }) + +#define INFO6(_jedec_id, _ext_id, _sector_size, _n_sectors, _flags) \ + ((kernel_ulong_t)&(struct flash_info) { \ + .id = { \ + ((_jedec_id) >> 16) & 0xff, \ + ((_jedec_id) >> 8) & 0xff, \ + (_jedec_id) & 0xff, \ + ((_ext_id) >> 16) & 0xff, \ + ((_ext_id) >> 8) & 0xff, \ + (_ext_id) & 0xff, \ + }, \ + .id_len = 6, \ .sector_size = (_sector_size), \ .n_sectors = (_n_sectors), \ .page_size = 256, \ @@ -507,6 +532,9 @@ static const struct spi_device_id spi_nor_ids[] = { { "mr25h256", CAT25_INFO( 32 * 1024, 1, 256, 2, SPI_NOR_NO_ERASE | SPI_NOR_NO_FR) }, { "mr25h10", CAT25_INFO(128 * 1024, 1, 256, 3, SPI_NOR_NO_ERASE | SPI_NOR_NO_FR) }, + /* Fujitsu */ + { "mb85rs1mt", INFO(0x047f27, 0, 128 * 1024, 1, SPI_NOR_NO_ERASE) }, + /* GigaDevice */ { "gd25q32", INFO(0xc84016, 0, 64 * 1024, 64, SECT_4K) }, { "gd25q64", INFO(0xc84017, 0, 64 * 1024, 128, SECT_4K) }, @@ -532,6 +560,7 @@ static const struct spi_device_id spi_nor_ids[] = { { "mx66l1g55g", INFO(0xc2261b, 0, 64 * 1024, 2048, SPI_NOR_QUAD_READ) }, /* Micron */ + { "n25q032", INFO(0x20ba16, 0, 64 * 1024, 64, 0) }, { "n25q064", INFO(0x20ba17, 0, 64 * 1024, 128, 0) }, { "n25q128a11", INFO(0x20bb18, 0, 64 * 1024, 256, 0) }, { "n25q128a13", INFO(0x20ba18, 0, 64 * 1024, 256, 0) }, @@ -556,6 +585,7 @@ static const struct spi_device_id spi_nor_ids[] = { { "s70fl01gs", INFO(0x010221, 0x4d00, 256 * 1024, 256, 0) }, { "s25sl12800", INFO(0x012018, 0x0300, 256 * 1024, 64, 0) }, { "s25sl12801", INFO(0x012018, 0x0301, 64 * 1024, 256, 0) }, + { "s25fl128s", INFO6(0x012018, 0x4d0180, 64 * 1024, 256, SPI_NOR_QUAD_READ) }, { "s25fl129p0", INFO(0x012018, 0x4d00, 256 * 1024, 64, 0) }, { "s25fl129p1", INFO(0x012018, 0x4d01, 64 * 1024, 256, 0) }, { "s25sl004a", INFO(0x010212, 0, 64 * 1024, 8, 0) }, @@ -566,6 +596,7 @@ static const struct spi_device_id spi_nor_ids[] = { { "s25fl008k", INFO(0xef4014, 0, 64 * 1024, 16, SECT_4K) }, { "s25fl016k", INFO(0xef4015, 0, 64 * 1024, 32, SECT_4K) }, { "s25fl064k", INFO(0xef4017, 0, 64 * 1024, 128, SECT_4K) }, + { "s25fl132k", INFO(0x014016, 0, 64 * 1024, 64, 0) }, /* SST -- large erase sizes are "overlays", "sectors" are 4K */ { "sst25vf040b", INFO(0xbf258d, 0, 64 * 1024, 8, SECT_4K | SST_WRITE) }, @@ -577,6 +608,7 @@ static const struct spi_device_id spi_nor_ids[] = { { "sst25wf010", INFO(0xbf2502, 0, 64 * 1024, 2, SECT_4K | SST_WRITE) }, { "sst25wf020", INFO(0xbf2503, 0, 64 * 1024, 4, SECT_4K | SST_WRITE) }, { "sst25wf040", INFO(0xbf2504, 0, 64 * 1024, 8, SECT_4K | SST_WRITE) }, + { "sst25wf080", INFO(0xbf2505, 0, 64 * 1024, 16, SECT_4K | SST_WRITE) }, /* ST Microelectronics -- newer production may have feature updates */ { "m25p05", INFO(0x202010, 0, 32 * 1024, 2, 0) }, @@ -588,7 +620,6 @@ static const struct spi_device_id spi_nor_ids[] = { { "m25p32", INFO(0x202016, 0, 64 * 1024, 64, 0) }, { "m25p64", INFO(0x202017, 0, 64 * 1024, 128, 0) }, { "m25p128", INFO(0x202018, 0, 256 * 1024, 64, 0) }, - { "n25q032", INFO(0x20ba16, 0, 64 * 1024, 64, 0) }, { "m25p05-nonjedec", INFO(0, 0, 32 * 1024, 2, 0) }, { "m25p10-nonjedec", INFO(0, 0, 32 * 1024, 4, 0) }, @@ -643,32 +674,24 @@ static const struct spi_device_id spi_nor_ids[] = { static const struct spi_device_id *spi_nor_read_id(struct spi_nor *nor) { int tmp; - u8 id[5]; - u32 jedec; - u16 ext_jedec; + u8 id[SPI_NOR_MAX_ID_LEN]; struct flash_info *info; - tmp = nor->read_reg(nor, SPINOR_OP_RDID, id, 5); + tmp = nor->read_reg(nor, SPINOR_OP_RDID, id, SPI_NOR_MAX_ID_LEN); if (tmp < 0) { dev_dbg(nor->dev, " error %d reading JEDEC ID\n", tmp); return ERR_PTR(tmp); } - jedec = id[0]; - jedec = jedec << 8; - jedec |= id[1]; - jedec = jedec << 8; - jedec |= id[2]; - - ext_jedec = id[3] << 8 | id[4]; for (tmp = 0; tmp < ARRAY_SIZE(spi_nor_ids) - 1; tmp++) { info = (void *)spi_nor_ids[tmp].driver_data; - if (info->jedec_id == jedec) { - if (info->ext_id == 0 || info->ext_id == ext_jedec) + if (info->id_len) { + if (!memcmp(info->id, id, info->id_len)) return &spi_nor_ids[tmp]; } } - dev_err(nor->dev, "unrecognized JEDEC id %06x\n", jedec); + dev_err(nor->dev, "unrecognized JEDEC id bytes: %02x, %2x, %2x\n", + id[0], id[1], id[2]); return ERR_PTR(-ENODEV); } @@ -703,11 +726,6 @@ static int sst_write(struct mtd_info *mtd, loff_t to, size_t len, if (ret) return ret; - /* Wait until finished previous write command. */ - ret = wait_till_ready(nor); - if (ret) - goto time_out; - write_enable(nor); nor->sst_write_second = false; @@ -719,7 +737,7 @@ static int sst_write(struct mtd_info *mtd, loff_t to, size_t len, /* write one byte. */ nor->write(nor, to, 1, retlen, buf); - ret = wait_till_ready(nor); + ret = spi_nor_wait_till_ready(nor); if (ret) goto time_out; } @@ -731,7 +749,7 @@ static int sst_write(struct mtd_info *mtd, loff_t to, size_t len, /* write two bytes. */ nor->write(nor, to, 2, retlen, buf + actual); - ret = wait_till_ready(nor); + ret = spi_nor_wait_till_ready(nor); if (ret) goto time_out; to += 2; @@ -740,7 +758,7 @@ static int sst_write(struct mtd_info *mtd, loff_t to, size_t len, nor->sst_write_second = false; write_disable(nor); - ret = wait_till_ready(nor); + ret = spi_nor_wait_till_ready(nor); if (ret) goto time_out; @@ -751,7 +769,7 @@ static int sst_write(struct mtd_info *mtd, loff_t to, size_t len, nor->program_opcode = SPINOR_OP_BP; nor->write(nor, to, 1, retlen, buf + actual); - ret = wait_till_ready(nor); + ret = spi_nor_wait_till_ready(nor); if (ret) goto time_out; write_disable(nor); @@ -779,11 +797,6 @@ static int spi_nor_write(struct mtd_info *mtd, loff_t to, size_t len, if (ret) return ret; - /* Wait until finished previous write command. */ - ret = wait_till_ready(nor); - if (ret) - goto write_err; - write_enable(nor); page_offset = to & (nor->page_size - 1); @@ -802,16 +815,20 @@ static int spi_nor_write(struct mtd_info *mtd, loff_t to, size_t len, if (page_size > nor->page_size) page_size = nor->page_size; - wait_till_ready(nor); + ret = spi_nor_wait_till_ready(nor); + if (ret) + goto write_err; + write_enable(nor); nor->write(nor, to + i, page_size, retlen, buf + i); } } + ret = spi_nor_wait_till_ready(nor); write_err: spi_nor_unlock_and_unprep(nor, SPI_NOR_OPS_WRITE); - return 0; + return ret; } static int macronix_quad_enable(struct spi_nor *nor) @@ -824,7 +841,7 @@ static int macronix_quad_enable(struct spi_nor *nor) nor->cmd_buf[0] = val | SR_QUAD_EN_MX; nor->write_reg(nor, SPINOR_OP_WRSR, nor->cmd_buf, 1, 0); - if (wait_till_ready(nor)) + if (spi_nor_wait_till_ready(nor)) return 1; ret = read_sr(nor); @@ -874,11 +891,11 @@ static int spansion_quad_enable(struct spi_nor *nor) return 0; } -static int set_quad_mode(struct spi_nor *nor, u32 jedec_id) +static int set_quad_mode(struct spi_nor *nor, struct flash_info *info) { int status; - switch (JEDEC_MFR(jedec_id)) { + switch (JEDEC_MFR(info)) { case CFI_MFR_MACRONIX: status = macronix_quad_enable(nor); if (status) { @@ -904,11 +921,6 @@ static int spi_nor_check(struct spi_nor *nor) return -EINVAL; } - if (!nor->read_id) - nor->read_id = spi_nor_read_id; - if (!nor->wait_till_ready) - nor->wait_till_ready = spi_nor_wait_till_ready; - return 0; } @@ -926,16 +938,24 @@ int spi_nor_scan(struct spi_nor *nor, const char *name, enum read_mode mode) if (ret) return ret; - id = spi_nor_match_id(name); - if (!id) + /* Try to auto-detect if chip name wasn't specified */ + if (!name) + id = spi_nor_read_id(nor); + else + id = spi_nor_match_id(name); + if (IS_ERR_OR_NULL(id)) return -ENOENT; info = (void *)id->driver_data; - if (info->jedec_id) { + /* + * If caller has specified name of flash model that can normally be + * detected using JEDEC, let's verify it. + */ + if (name && info->id_len) { const struct spi_device_id *jid; - jid = nor->read_id(nor); + jid = spi_nor_read_id(nor); if (IS_ERR(jid)) { return PTR_ERR(jid); } else if (jid != id) { @@ -960,9 +980,9 @@ int spi_nor_scan(struct spi_nor *nor, const char *name, enum read_mode mode) * up with the software protection bits set */ - if (JEDEC_MFR(info->jedec_id) == CFI_MFR_ATMEL || - JEDEC_MFR(info->jedec_id) == CFI_MFR_INTEL || - JEDEC_MFR(info->jedec_id) == CFI_MFR_SST) { + if (JEDEC_MFR(info) == CFI_MFR_ATMEL || + JEDEC_MFR(info) == CFI_MFR_INTEL || + JEDEC_MFR(info) == CFI_MFR_SST) { write_enable(nor); write_sr(nor, 0); } @@ -977,7 +997,7 @@ int spi_nor_scan(struct spi_nor *nor, const char *name, enum read_mode mode) mtd->_read = spi_nor_read; /* nor protection support for STmicro chips */ - if (JEDEC_MFR(info->jedec_id) == CFI_MFR_ST) { + if (JEDEC_MFR(info) == CFI_MFR_ST) { mtd->_lock = spi_nor_lock; mtd->_unlock = spi_nor_unlock; } @@ -988,9 +1008,8 @@ int spi_nor_scan(struct spi_nor *nor, const char *name, enum read_mode mode) else mtd->_write = spi_nor_write; - if ((info->flags & USE_FSR) && - nor->wait_till_ready == spi_nor_wait_till_ready) - nor->wait_till_ready = spi_nor_wait_till_fsr_ready; + if (info->flags & USE_FSR) + nor->flags |= SNOR_F_USE_FSR; #ifdef CONFIG_MTD_SPI_NOR_USE_4K_SECTORS /* prefer "small sector" erase if possible */ @@ -1031,7 +1050,7 @@ int spi_nor_scan(struct spi_nor *nor, const char *name, enum read_mode mode) /* Quad/Dual-read mode takes precedence over fast/normal */ if (mode == SPI_NOR_QUAD && info->flags & SPI_NOR_QUAD_READ) { - ret = set_quad_mode(nor, info->jedec_id); + ret = set_quad_mode(nor, info); if (ret) { dev_err(dev, "quad mode not supported\n"); return ret; @@ -1067,7 +1086,7 @@ int spi_nor_scan(struct spi_nor *nor, const char *name, enum read_mode mode) else if (mtd->size > 0x1000000) { /* enable 4-byte addressing if the device exceeds 16MiB */ nor->addr_width = 4; - if (JEDEC_MFR(info->jedec_id) == CFI_MFR_AMD) { + if (JEDEC_MFR(info) == CFI_MFR_AMD) { /* Dedicated 4-byte command set */ switch (nor->flash_read) { case SPI_NOR_QUAD: @@ -1088,7 +1107,7 @@ int spi_nor_scan(struct spi_nor *nor, const char *name, enum read_mode mode) nor->erase_opcode = SPINOR_OP_SE_4B; mtd->erasesize = info->sector_size; } else - set_4byte(nor, info->jedec_id, 1); + set_4byte(nor, info, 1); } else { nor->addr_width = 3; } diff --git a/drivers/mtd/tests/oobtest.c b/drivers/mtd/tests/oobtest.c index dc4f9602b97..5e061186eab 100644 --- a/drivers/mtd/tests/oobtest.c +++ b/drivers/mtd/tests/oobtest.c @@ -34,8 +34,11 @@ #include "mtd_test.h" static int dev = -EINVAL; +static int bitflip_limit; module_param(dev, int, S_IRUGO); MODULE_PARM_DESC(dev, "MTD device number to use"); +module_param(bitflip_limit, int, S_IRUGO); +MODULE_PARM_DESC(bitflip_limit, "Max. allowed bitflips per page"); static struct mtd_info *mtd; static unsigned char *readbuf; @@ -115,12 +118,36 @@ static int write_whole_device(void) return 0; } +/* + * Display the address, offset and data bytes at comparison failure. + * Return number of bitflips encountered. + */ +static size_t memcmpshow(loff_t addr, const void *cs, const void *ct, size_t count) +{ + const unsigned char *su1, *su2; + int res; + size_t i = 0; + size_t bitflips = 0; + + for (su1 = cs, su2 = ct; 0 < count; ++su1, ++su2, count--, i++) { + res = *su1 ^ *su2; + if (res) { + pr_info("error @addr[0x%lx:0x%zx] 0x%x -> 0x%x diff 0x%x\n", + (unsigned long)addr, i, *su1, *su2, res); + bitflips += hweight8(res); + } + } + + return bitflips; +} + static int verify_eraseblock(int ebnum) { int i; struct mtd_oob_ops ops; int err = 0; loff_t addr = (loff_t)ebnum * mtd->erasesize; + size_t bitflips; prandom_bytes_state(&rnd_state, writebuf, use_len_max * pgcnt); for (i = 0; i < pgcnt; ++i, addr += mtd->writesize) { @@ -139,8 +166,11 @@ static int verify_eraseblock(int ebnum) errcnt += 1; return err ? err : -1; } - if (memcmp(readbuf, writebuf + (use_len_max * i) + use_offset, - use_len)) { + + bitflips = memcmpshow(addr, readbuf, + writebuf + (use_len_max * i) + use_offset, + use_len); + if (bitflips > bitflip_limit) { pr_err("error: verify failed at %#llx\n", (long long)addr); errcnt += 1; @@ -148,7 +178,10 @@ static int verify_eraseblock(int ebnum) pr_err("error: too many errors\n"); return -1; } + } else if (bitflips) { + pr_info("ignoring error as within bitflip_limit\n"); } + if (use_offset != 0 || use_len < mtd->ecclayout->oobavail) { int k; @@ -167,9 +200,10 @@ static int verify_eraseblock(int ebnum) errcnt += 1; return err ? err : -1; } - if (memcmp(readbuf + use_offset, - writebuf + (use_len_max * i) + use_offset, - use_len)) { + bitflips = memcmpshow(addr, readbuf + use_offset, + writebuf + (use_len_max * i) + use_offset, + use_len); + if (bitflips > bitflip_limit) { pr_err("error: verify failed at %#llx\n", (long long)addr); errcnt += 1; @@ -177,7 +211,10 @@ static int verify_eraseblock(int ebnum) pr_err("error: too many errors\n"); return -1; } + } else if (bitflips) { + pr_info("ignoring error as within bitflip_limit\n"); } + for (k = 0; k < use_offset; ++k) if (readbuf[k] != 0xff) { pr_err("error: verify 0xff " @@ -216,6 +253,9 @@ static int verify_eraseblock_in_one_go(int ebnum) int err = 0; loff_t addr = (loff_t)ebnum * mtd->erasesize; size_t len = mtd->ecclayout->oobavail * pgcnt; + size_t oobavail = mtd->ecclayout->oobavail; + size_t bitflips; + int i; prandom_bytes_state(&rnd_state, writebuf, len); ops.mode = MTD_OPS_AUTO_OOB; @@ -226,6 +266,8 @@ static int verify_eraseblock_in_one_go(int ebnum) ops.ooboffs = 0; ops.datbuf = NULL; ops.oobbuf = readbuf; + + /* read entire block's OOB at one go */ err = mtd_read_oob(mtd, addr, &ops); if (err || ops.oobretlen != len) { pr_err("error: readoob failed at %#llx\n", @@ -233,13 +275,21 @@ static int verify_eraseblock_in_one_go(int ebnum) errcnt += 1; return err ? err : -1; } - if (memcmp(readbuf, writebuf, len)) { - pr_err("error: verify failed at %#llx\n", - (long long)addr); - errcnt += 1; - if (errcnt > 1000) { - pr_err("error: too many errors\n"); - return -1; + + /* verify one page OOB at a time for bitflip per page limit check */ + for (i = 0; i < pgcnt; ++i, addr += mtd->writesize) { + bitflips = memcmpshow(addr, readbuf + (i * oobavail), + writebuf + (i * oobavail), oobavail); + if (bitflips > bitflip_limit) { + pr_err("error: verify failed at %#llx\n", + (long long)addr); + errcnt += 1; + if (errcnt > 1000) { + pr_err("error: too many errors\n"); + return -1; + } + } else if (bitflips) { + pr_info("ignoring error as within bitflip_limit\n"); } } @@ -610,7 +660,8 @@ static int __init mtd_oobtest_init(void) err = mtd_read_oob(mtd, addr, &ops); if (err) goto out; - if (memcmp(readbuf, writebuf, mtd->ecclayout->oobavail * 2)) { + if (memcmpshow(addr, readbuf, writebuf, + mtd->ecclayout->oobavail * 2)) { pr_err("error: verify failed at %#llx\n", (long long)addr); errcnt += 1; diff --git a/drivers/mtd/tests/torturetest.c b/drivers/mtd/tests/torturetest.c index eeab96973cf..b55bc52a134 100644 --- a/drivers/mtd/tests/torturetest.c +++ b/drivers/mtd/tests/torturetest.c @@ -264,7 +264,9 @@ static int __init tort_init(void) int i; void *patt; - mtdtest_erase_good_eraseblocks(mtd, bad_ebs, eb, ebcnt); + err = mtdtest_erase_good_eraseblocks(mtd, bad_ebs, eb, ebcnt); + if (err) + goto out; /* Check if the eraseblocks contain only 0xFF bytes */ if (check) { diff --git a/drivers/pwm/Kconfig b/drivers/pwm/Kconfig index ef2dd2e4754..a3ecf580963 100644 --- a/drivers/pwm/Kconfig +++ b/drivers/pwm/Kconfig @@ -50,6 +50,17 @@ config PWM_ATMEL To compile this driver as a module, choose M here: the module will be called pwm-atmel. +config PWM_ATMEL_HLCDC_PWM + tristate "Atmel HLCDC PWM support" + depends on MFD_ATMEL_HLCDC + help + Generic PWM framework driver for the PWM output of the HLCDC + (Atmel High-end LCD Controller). This PWM output is mainly used + to control the LCD backlight. + + To compile this driver as a module, choose M here: the module + will be called pwm-atmel-hlcdc. + config PWM_ATMEL_TCB tristate "Atmel TC Block PWM support" depends on ATMEL_TCLIB && OF @@ -71,6 +82,15 @@ config PWM_BCM_KONA To compile this driver as a module, choose M here: the module will be called pwm-bcm-kona. +config PWM_BCM2835 + tristate "BCM2835 PWM support" + depends on ARCH_BCM2835 + help + PWM framework driver for BCM2835 controller (Raspberry Pi) + + To compile this driver as a module, choose M here: the module + will be called pwm-bcm2835. + config PWM_BFIN tristate "Blackfin PWM support" depends on BFIN_GPTIMERS @@ -235,7 +255,7 @@ config PWM_ROCKCHIP config PWM_SAMSUNG tristate "Samsung PWM support" - depends on PLAT_SAMSUNG + depends on PLAT_SAMSUNG || ARCH_EXYNOS help Generic PWM framework driver for Samsung. diff --git a/drivers/pwm/Makefile b/drivers/pwm/Makefile index c458606c375..65259ac1e8d 100644 --- a/drivers/pwm/Makefile +++ b/drivers/pwm/Makefile @@ -2,8 +2,10 @@ obj-$(CONFIG_PWM) += core.o obj-$(CONFIG_PWM_SYSFS) += sysfs.o obj-$(CONFIG_PWM_AB8500) += pwm-ab8500.o obj-$(CONFIG_PWM_ATMEL) += pwm-atmel.o +obj-$(CONFIG_PWM_ATMEL_HLCDC_PWM) += pwm-atmel-hlcdc.o obj-$(CONFIG_PWM_ATMEL_TCB) += pwm-atmel-tcb.o obj-$(CONFIG_PWM_BCM_KONA) += pwm-bcm-kona.o +obj-$(CONFIG_PWM_BCM2835) += pwm-bcm2835.o obj-$(CONFIG_PWM_BFIN) += pwm-bfin.o obj-$(CONFIG_PWM_CLPS711X) += pwm-clps711x.o obj-$(CONFIG_PWM_EP93XX) += pwm-ep93xx.o diff --git a/drivers/pwm/pwm-atmel-hlcdc.c b/drivers/pwm/pwm-atmel-hlcdc.c new file mode 100644 index 00000000000..e7a785fadcd --- /dev/null +++ b/drivers/pwm/pwm-atmel-hlcdc.c @@ -0,0 +1,299 @@ +/* + * Copyright (C) 2014 Free Electrons + * Copyright (C) 2014 Atmel + * + * Author: Boris BREZILLON <boris.brezillon@free-electrons.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/clk.h> +#include <linux/delay.h> +#include <linux/mfd/atmel-hlcdc.h> +#include <linux/module.h> +#include <linux/platform_device.h> +#include <linux/pwm.h> +#include <linux/regmap.h> + +#define ATMEL_HLCDC_PWMCVAL_MASK GENMASK(15, 8) +#define ATMEL_HLCDC_PWMCVAL(x) (((x) << 8) & ATMEL_HLCDC_PWMCVAL_MASK) +#define ATMEL_HLCDC_PWMPOL BIT(4) +#define ATMEL_HLCDC_PWMPS_MASK GENMASK(2, 0) +#define ATMEL_HLCDC_PWMPS_MAX 0x6 +#define ATMEL_HLCDC_PWMPS(x) ((x) & ATMEL_HLCDC_PWMPS_MASK) + +struct atmel_hlcdc_pwm_errata { + bool slow_clk_erratum; + bool div1_clk_erratum; +}; + +struct atmel_hlcdc_pwm { + struct pwm_chip chip; + struct atmel_hlcdc *hlcdc; + struct clk *cur_clk; + const struct atmel_hlcdc_pwm_errata *errata; +}; + +static inline struct atmel_hlcdc_pwm *to_atmel_hlcdc_pwm(struct pwm_chip *chip) +{ + return container_of(chip, struct atmel_hlcdc_pwm, chip); +} + +static int atmel_hlcdc_pwm_config(struct pwm_chip *c, + struct pwm_device *pwm, + int duty_ns, int period_ns) +{ + struct atmel_hlcdc_pwm *chip = to_atmel_hlcdc_pwm(c); + struct atmel_hlcdc *hlcdc = chip->hlcdc; + struct clk *new_clk = hlcdc->slow_clk; + u64 pwmcval = duty_ns * 256; + unsigned long clk_freq; + u64 clk_period_ns; + u32 pwmcfg; + int pres; + + if (!chip->errata || !chip->errata->slow_clk_erratum) { + clk_freq = clk_get_rate(new_clk); + clk_period_ns = (u64)NSEC_PER_SEC * 256; + do_div(clk_period_ns, clk_freq); + } + + /* Errata: cannot use slow clk on some IP revisions */ + if ((chip->errata && chip->errata->slow_clk_erratum) || + clk_period_ns > period_ns) { + new_clk = hlcdc->sys_clk; + clk_freq = clk_get_rate(new_clk); + clk_period_ns = (u64)NSEC_PER_SEC * 256; + do_div(clk_period_ns, clk_freq); + } + + for (pres = 0; pres <= ATMEL_HLCDC_PWMPS_MAX; pres++) { + /* Errata: cannot divide by 1 on some IP revisions */ + if (!pres && chip->errata && chip->errata->div1_clk_erratum) + continue; + + if ((clk_period_ns << pres) >= period_ns) + break; + } + + if (pres > ATMEL_HLCDC_PWMPS_MAX) + return -EINVAL; + + pwmcfg = ATMEL_HLCDC_PWMPS(pres); + + if (new_clk != chip->cur_clk) { + u32 gencfg = 0; + int ret; + + ret = clk_prepare_enable(new_clk); + if (ret) + return ret; + + clk_disable_unprepare(chip->cur_clk); + chip->cur_clk = new_clk; + + if (new_clk == hlcdc->sys_clk) + gencfg = ATMEL_HLCDC_CLKPWMSEL; + + ret = regmap_update_bits(hlcdc->regmap, ATMEL_HLCDC_CFG(0), + ATMEL_HLCDC_CLKPWMSEL, gencfg); + if (ret) + return ret; + } + + do_div(pwmcval, period_ns); + + /* + * The PWM duty cycle is configurable from 0/256 to 255/256 of the + * period cycle. Hence we can't set a duty cycle occupying the + * whole period cycle if we're asked to. + * Set it to 255 if pwmcval is greater than 256. + */ + if (pwmcval > 255) + pwmcval = 255; + + pwmcfg |= ATMEL_HLCDC_PWMCVAL(pwmcval); + + return regmap_update_bits(hlcdc->regmap, ATMEL_HLCDC_CFG(6), + ATMEL_HLCDC_PWMCVAL_MASK | + ATMEL_HLCDC_PWMPS_MASK, + pwmcfg); +} + +static int atmel_hlcdc_pwm_set_polarity(struct pwm_chip *c, + struct pwm_device *pwm, + enum pwm_polarity polarity) +{ + struct atmel_hlcdc_pwm *chip = to_atmel_hlcdc_pwm(c); + struct atmel_hlcdc *hlcdc = chip->hlcdc; + u32 cfg = 0; + + if (polarity == PWM_POLARITY_NORMAL) + cfg = ATMEL_HLCDC_PWMPOL; + + return regmap_update_bits(hlcdc->regmap, ATMEL_HLCDC_CFG(6), + ATMEL_HLCDC_PWMPOL, cfg); +} + +static int atmel_hlcdc_pwm_enable(struct pwm_chip *c, struct pwm_device *pwm) +{ + struct atmel_hlcdc_pwm *chip = to_atmel_hlcdc_pwm(c); + struct atmel_hlcdc *hlcdc = chip->hlcdc; + u32 status; + int ret; + + ret = regmap_write(hlcdc->regmap, ATMEL_HLCDC_EN, ATMEL_HLCDC_PWM); + if (ret) + return ret; + + while (true) { + ret = regmap_read(hlcdc->regmap, ATMEL_HLCDC_SR, &status); + if (ret) + return ret; + + if ((status & ATMEL_HLCDC_PWM) != 0) + break; + + usleep_range(1, 10); + } + + return 0; +} + +static void atmel_hlcdc_pwm_disable(struct pwm_chip *c, + struct pwm_device *pwm) +{ + struct atmel_hlcdc_pwm *chip = to_atmel_hlcdc_pwm(c); + struct atmel_hlcdc *hlcdc = chip->hlcdc; + u32 status; + int ret; + + ret = regmap_write(hlcdc->regmap, ATMEL_HLCDC_DIS, ATMEL_HLCDC_PWM); + if (ret) + return; + + while (true) { + ret = regmap_read(hlcdc->regmap, ATMEL_HLCDC_SR, &status); + if (ret) + return; + + if ((status & ATMEL_HLCDC_PWM) == 0) + break; + + usleep_range(1, 10); + } +} + +static const struct pwm_ops atmel_hlcdc_pwm_ops = { + .config = atmel_hlcdc_pwm_config, + .set_polarity = atmel_hlcdc_pwm_set_polarity, + .enable = atmel_hlcdc_pwm_enable, + .disable = atmel_hlcdc_pwm_disable, + .owner = THIS_MODULE, +}; + +static const struct atmel_hlcdc_pwm_errata atmel_hlcdc_pwm_at91sam9x5_errata = { + .slow_clk_erratum = true, +}; + +static const struct atmel_hlcdc_pwm_errata atmel_hlcdc_pwm_sama5d3_errata = { + .div1_clk_erratum = true, +}; + +static const struct of_device_id atmel_hlcdc_dt_ids[] = { + { + .compatible = "atmel,at91sam9x5-hlcdc", + .data = &atmel_hlcdc_pwm_at91sam9x5_errata, + }, + { + .compatible = "atmel,sama5d3-hlcdc", + .data = &atmel_hlcdc_pwm_sama5d3_errata, + }, + { /* sentinel */ }, +}; + +static int atmel_hlcdc_pwm_probe(struct platform_device *pdev) +{ + const struct of_device_id *match; + struct device *dev = &pdev->dev; + struct atmel_hlcdc_pwm *chip; + struct atmel_hlcdc *hlcdc; + int ret; + + hlcdc = dev_get_drvdata(dev->parent); + + chip = devm_kzalloc(dev, sizeof(*chip), GFP_KERNEL); + if (!chip) + return -ENOMEM; + + ret = clk_prepare_enable(hlcdc->periph_clk); + if (ret) + return ret; + + match = of_match_node(atmel_hlcdc_dt_ids, dev->parent->of_node); + if (match) + chip->errata = match->data; + + chip->hlcdc = hlcdc; + chip->chip.ops = &atmel_hlcdc_pwm_ops; + chip->chip.dev = dev; + chip->chip.base = -1; + chip->chip.npwm = 1; + chip->chip.of_xlate = of_pwm_xlate_with_flags; + chip->chip.of_pwm_n_cells = 3; + chip->chip.can_sleep = 1; + + ret = pwmchip_add(&chip->chip); + if (ret) { + clk_disable_unprepare(hlcdc->periph_clk); + return ret; + } + + platform_set_drvdata(pdev, chip); + + return 0; +} + +static int atmel_hlcdc_pwm_remove(struct platform_device *pdev) +{ + struct atmel_hlcdc_pwm *chip = platform_get_drvdata(pdev); + int ret; + + ret = pwmchip_remove(&chip->chip); + if (ret) + return ret; + + clk_disable_unprepare(chip->hlcdc->periph_clk); + + return 0; +} + +static const struct of_device_id atmel_hlcdc_pwm_dt_ids[] = { + { .compatible = "atmel,hlcdc-pwm" }, + { /* sentinel */ }, +}; + +static struct platform_driver atmel_hlcdc_pwm_driver = { + .driver = { + .name = "atmel-hlcdc-pwm", + .of_match_table = atmel_hlcdc_pwm_dt_ids, + }, + .probe = atmel_hlcdc_pwm_probe, + .remove = atmel_hlcdc_pwm_remove, +}; +module_platform_driver(atmel_hlcdc_pwm_driver); + +MODULE_ALIAS("platform:atmel-hlcdc-pwm"); +MODULE_AUTHOR("Boris Brezillon <boris.brezillon@free-electrons.com>"); +MODULE_DESCRIPTION("Atmel HLCDC PWM driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/pwm/pwm-bcm2835.c b/drivers/pwm/pwm-bcm2835.c new file mode 100644 index 00000000000..b4c7f956b6f --- /dev/null +++ b/drivers/pwm/pwm-bcm2835.c @@ -0,0 +1,205 @@ +/* + * Copyright 2014 Bart Tanghe <bart.tanghe@thomasmore.be> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2. + */ + +#include <linux/clk.h> +#include <linux/err.h> +#include <linux/io.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/platform_device.h> +#include <linux/pwm.h> + +#define PWM_CONTROL 0x000 +#define PWM_CONTROL_SHIFT(x) ((x) * 8) +#define PWM_CONTROL_MASK 0xff +#define PWM_MODE 0x80 /* set timer in PWM mode */ +#define PWM_ENABLE (1 << 0) +#define PWM_POLARITY (1 << 4) + +#define PERIOD(x) (((x) * 0x10) + 0x10) +#define DUTY(x) (((x) * 0x10) + 0x14) + +#define MIN_PERIOD 108 /* 9.2 MHz max. PWM clock */ + +struct bcm2835_pwm { + struct pwm_chip chip; + struct device *dev; + unsigned long scaler; + void __iomem *base; + struct clk *clk; +}; + +static inline struct bcm2835_pwm *to_bcm2835_pwm(struct pwm_chip *chip) +{ + return container_of(chip, struct bcm2835_pwm, chip); +} + +static int bcm2835_pwm_request(struct pwm_chip *chip, struct pwm_device *pwm) +{ + struct bcm2835_pwm *pc = to_bcm2835_pwm(chip); + u32 value; + + value = readl(pc->base + PWM_CONTROL); + value &= ~(PWM_CONTROL_MASK << PWM_CONTROL_SHIFT(pwm->hwpwm)); + value |= (PWM_MODE << PWM_CONTROL_SHIFT(pwm->hwpwm)); + writel(value, pc->base + PWM_CONTROL); + + return 0; +} + +static void bcm2835_pwm_free(struct pwm_chip *chip, struct pwm_device *pwm) +{ + struct bcm2835_pwm *pc = to_bcm2835_pwm(chip); + u32 value; + + value = readl(pc->base + PWM_CONTROL); + value &= ~(PWM_CONTROL_MASK << PWM_CONTROL_SHIFT(pwm->hwpwm)); + writel(value, pc->base + PWM_CONTROL); +} + +static int bcm2835_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm, + int duty_ns, int period_ns) +{ + struct bcm2835_pwm *pc = to_bcm2835_pwm(chip); + + if (period_ns <= MIN_PERIOD) { + dev_err(pc->dev, "period %d not supported, minimum %d\n", + period_ns, MIN_PERIOD); + return -EINVAL; + } + + writel(duty_ns / pc->scaler, pc->base + DUTY(pwm->hwpwm)); + writel(period_ns / pc->scaler, pc->base + PERIOD(pwm->hwpwm)); + + return 0; +} + +static int bcm2835_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm) +{ + struct bcm2835_pwm *pc = to_bcm2835_pwm(chip); + u32 value; + + value = readl(pc->base + PWM_CONTROL); + value |= PWM_ENABLE << PWM_CONTROL_SHIFT(pwm->hwpwm); + writel(value, pc->base + PWM_CONTROL); + + return 0; +} + +static void bcm2835_pwm_disable(struct pwm_chip *chip, struct pwm_device *pwm) +{ + struct bcm2835_pwm *pc = to_bcm2835_pwm(chip); + u32 value; + + value = readl(pc->base + PWM_CONTROL); + value &= ~(PWM_ENABLE << PWM_CONTROL_SHIFT(pwm->hwpwm)); + writel(value, pc->base + PWM_CONTROL); +} + +static int bcm2835_set_polarity(struct pwm_chip *chip, struct pwm_device *pwm, + enum pwm_polarity polarity) +{ + struct bcm2835_pwm *pc = to_bcm2835_pwm(chip); + u32 value; + + value = readl(pc->base + PWM_CONTROL); + + if (polarity == PWM_POLARITY_NORMAL) + value &= ~(PWM_POLARITY << PWM_CONTROL_SHIFT(pwm->hwpwm)); + else + value |= PWM_POLARITY << PWM_CONTROL_SHIFT(pwm->hwpwm); + + writel(value, pc->base + PWM_CONTROL); + + return 0; +} + +static const struct pwm_ops bcm2835_pwm_ops = { + .request = bcm2835_pwm_request, + .free = bcm2835_pwm_free, + .config = bcm2835_pwm_config, + .enable = bcm2835_pwm_enable, + .disable = bcm2835_pwm_disable, + .set_polarity = bcm2835_set_polarity, + .owner = THIS_MODULE, +}; + +static int bcm2835_pwm_probe(struct platform_device *pdev) +{ + struct bcm2835_pwm *pc; + struct resource *res; + int ret; + + pc = devm_kzalloc(&pdev->dev, sizeof(*pc), GFP_KERNEL); + if (!pc) + return -ENOMEM; + + pc->dev = &pdev->dev; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + pc->base = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(pc->base)) + return PTR_ERR(pc->base); + + pc->clk = devm_clk_get(&pdev->dev, NULL); + if (IS_ERR(pc->clk)) { + dev_err(&pdev->dev, "clock not found: %ld\n", PTR_ERR(pc->clk)); + return PTR_ERR(pc->clk); + } + + ret = clk_prepare_enable(pc->clk); + if (ret) + return ret; + + pc->scaler = NSEC_PER_SEC / clk_get_rate(pc->clk); + + pc->chip.dev = &pdev->dev; + pc->chip.ops = &bcm2835_pwm_ops; + pc->chip.npwm = 2; + + platform_set_drvdata(pdev, pc); + + ret = pwmchip_add(&pc->chip); + if (ret < 0) + goto add_fail; + + return 0; + +add_fail: + clk_disable_unprepare(pc->clk); + return ret; +} + +static int bcm2835_pwm_remove(struct platform_device *pdev) +{ + struct bcm2835_pwm *pc = platform_get_drvdata(pdev); + + clk_disable_unprepare(pc->clk); + + return pwmchip_remove(&pc->chip); +} + +static const struct of_device_id bcm2835_pwm_of_match[] = { + { .compatible = "brcm,bcm2835-pwm", }, + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(of, bcm2835_pwm_of_match); + +static struct platform_driver bcm2835_pwm_driver = { + .driver = { + .name = "bcm2835-pwm", + .of_match_table = bcm2835_pwm_of_match, + }, + .probe = bcm2835_pwm_probe, + .remove = bcm2835_pwm_remove, +}; +module_platform_driver(bcm2835_pwm_driver); + +MODULE_AUTHOR("Bart Tanghe <bart.tanghe@thomasmore.be"); +MODULE_DESCRIPTION("Broadcom BCM2835 PWM driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/pwm/pwm-fsl-ftm.c b/drivers/pwm/pwm-fsl-ftm.c index 0f2cc7ef778..f9dfc8b6407 100644 --- a/drivers/pwm/pwm-fsl-ftm.c +++ b/drivers/pwm/pwm-fsl-ftm.c @@ -17,6 +17,7 @@ #include <linux/mutex.h> #include <linux/of_address.h> #include <linux/platform_device.h> +#include <linux/pm.h> #include <linux/pwm.h> #include <linux/regmap.h> #include <linux/slab.h> @@ -299,7 +300,7 @@ static int fsl_counter_clock_enable(struct fsl_pwm_chip *fpc) { int ret; - if (fpc->use_count != 0) + if (fpc->use_count++ != 0) return 0; /* select counter clock source */ @@ -316,8 +317,6 @@ static int fsl_counter_clock_enable(struct fsl_pwm_chip *fpc) return ret; } - fpc->use_count++; - return 0; } @@ -399,12 +398,23 @@ static int fsl_pwm_init(struct fsl_pwm_chip *fpc) return 0; } +static bool fsl_pwm_volatile_reg(struct device *dev, unsigned int reg) +{ + switch (reg) { + case FTM_CNT: + return true; + } + return false; +} + static const struct regmap_config fsl_pwm_regmap_config = { .reg_bits = 32, .reg_stride = 4, .val_bits = 32, .max_register = FTM_PWMLOAD, + .volatile_reg = fsl_pwm_volatile_reg, + .cache_type = REGCACHE_RBTREE, }; static int fsl_pwm_probe(struct platform_device *pdev) @@ -427,7 +437,7 @@ static int fsl_pwm_probe(struct platform_device *pdev) if (IS_ERR(base)) return PTR_ERR(base); - fpc->regmap = devm_regmap_init_mmio_clk(&pdev->dev, NULL, base, + fpc->regmap = devm_regmap_init_mmio_clk(&pdev->dev, "ftm_sys", base, &fsl_pwm_regmap_config); if (IS_ERR(fpc->regmap)) { dev_err(&pdev->dev, "regmap init failed\n"); @@ -478,6 +488,51 @@ static int fsl_pwm_remove(struct platform_device *pdev) return pwmchip_remove(&fpc->chip); } +#ifdef CONFIG_PM_SLEEP +static int fsl_pwm_suspend(struct device *dev) +{ + struct fsl_pwm_chip *fpc = dev_get_drvdata(dev); + u32 val; + + regcache_cache_only(fpc->regmap, true); + regcache_mark_dirty(fpc->regmap); + + /* read from cache */ + regmap_read(fpc->regmap, FTM_OUTMASK, &val); + if ((val & 0xFF) != 0xFF) { + clk_disable_unprepare(fpc->clk[FSL_PWM_CLK_CNTEN]); + clk_disable_unprepare(fpc->clk[fpc->cnt_select]); + clk_disable_unprepare(fpc->clk[FSL_PWM_CLK_SYS]); + } + + return 0; +} + +static int fsl_pwm_resume(struct device *dev) +{ + struct fsl_pwm_chip *fpc = dev_get_drvdata(dev); + u32 val; + + /* read from cache */ + regmap_read(fpc->regmap, FTM_OUTMASK, &val); + if ((val & 0xFF) != 0xFF) { + clk_prepare_enable(fpc->clk[FSL_PWM_CLK_SYS]); + clk_prepare_enable(fpc->clk[fpc->cnt_select]); + clk_prepare_enable(fpc->clk[FSL_PWM_CLK_CNTEN]); + } + + /* restore all registers from cache */ + regcache_cache_only(fpc->regmap, false); + regcache_sync(fpc->regmap); + + return 0; +} +#endif + +static const struct dev_pm_ops fsl_pwm_pm_ops = { + SET_SYSTEM_SLEEP_PM_OPS(fsl_pwm_suspend, fsl_pwm_resume) +}; + static const struct of_device_id fsl_pwm_dt_ids[] = { { .compatible = "fsl,vf610-ftm-pwm", }, { /* sentinel */ } @@ -488,6 +543,7 @@ static struct platform_driver fsl_pwm_driver = { .driver = { .name = "fsl-ftm-pwm", .of_match_table = fsl_pwm_dt_ids, + .pm = &fsl_pwm_pm_ops, }, .probe = fsl_pwm_probe, .remove = fsl_pwm_remove, diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig index f554d25b439..af40db0df58 100644 --- a/drivers/thermal/Kconfig +++ b/drivers/thermal/Kconfig @@ -112,6 +112,18 @@ config CPU_THERMAL If you want this support, you should say Y here. +config CLOCK_THERMAL + bool "Generic clock cooling support" + depends on COMMON_CLK + depends on PM_OPP + help + This entry implements the generic clock cooling mechanism through + frequency clipping. Typically used to cool off co-processors. The + device that is configured to use this cooling mechanism will be + controlled to reduce clock frequency whenever temperature is high. + + If you want this support, you should say Y here. + config THERMAL_EMULATION bool "Thermal emulation mode support" help @@ -143,6 +155,16 @@ config SPEAR_THERMAL Enable this to plug the SPEAr thermal sensor driver into the Linux thermal framework. +config ROCKCHIP_THERMAL + tristate "Rockchip thermal driver" + depends on ARCH_ROCKCHIP + depends on RESET_CONTROLLER + help + Rockchip thermal driver provides support for Temperature sensor + ADC (TS-ADC) found on Rockchip SoCs. It supports one critical + trip point. Cpufreq is used as the cooling device and will throttle + CPUs when the Temperature crosses the passive trip point. + config RCAR_THERMAL tristate "Renesas R-Car thermal driver" depends on ARCH_SHMOBILE || COMPILE_TEST @@ -185,6 +207,16 @@ config ARMADA_THERMAL Enable this option if you want to have support for thermal management controller present in Armada 370 and Armada XP SoC. +config TEGRA_SOCTHERM + tristate "Tegra SOCTHERM thermal management" + depends on ARCH_TEGRA + help + Enable this option for integrated thermal management support on NVIDIA + Tegra124 systems-on-chip. The driver supports four thermal zones + (CPU, GPU, MEM, PLLX). Cooling devices can be bound to the thermal + zones to manage temperatures. This option is also required for the + emergency thermal reset (thermtrip) feature to function. + config DB8500_CPUFREQ_COOLING tristate "DB8500 cpufreq cooling" depends on ARCH_U8500 diff --git a/drivers/thermal/Makefile b/drivers/thermal/Makefile index 39c4fe87da2..fa0dc486790 100644 --- a/drivers/thermal/Makefile +++ b/drivers/thermal/Makefile @@ -18,8 +18,12 @@ thermal_sys-$(CONFIG_THERMAL_GOV_USER_SPACE) += user_space.o # cpufreq cooling thermal_sys-$(CONFIG_CPU_THERMAL) += cpu_cooling.o +# clock cooling +thermal_sys-$(CONFIG_CLOCK_THERMAL) += clock_cooling.o + # platform thermal drivers obj-$(CONFIG_SPEAR_THERMAL) += spear_thermal.o +obj-$(CONFIG_ROCKCHIP_THERMAL) += rockchip_thermal.o obj-$(CONFIG_RCAR_THERMAL) += rcar_thermal.o obj-$(CONFIG_KIRKWOOD_THERMAL) += kirkwood_thermal.o obj-y += samsung/ @@ -34,3 +38,4 @@ obj-$(CONFIG_INTEL_SOC_DTS_THERMAL) += intel_soc_dts_thermal.o obj-$(CONFIG_TI_SOC_THERMAL) += ti-soc-thermal/ obj-$(CONFIG_INT340X_THERMAL) += int340x_thermal/ obj-$(CONFIG_ST_THERMAL) += st/ +obj-$(CONFIG_TEGRA_SOCTHERM) += tegra_soctherm.o diff --git a/drivers/thermal/armada_thermal.c b/drivers/thermal/armada_thermal.c index eaaf59c98ba..c2556cf5186 100644 --- a/drivers/thermal/armada_thermal.c +++ b/drivers/thermal/armada_thermal.c @@ -35,10 +35,6 @@ #define PMU_TDC0_OTF_CAL_MASK (0x1 << 30) #define PMU_TDC0_START_CAL_MASK (0x1 << 25) -#define A375_Z1_CAL_RESET_LSB 0x8011e214 -#define A375_Z1_CAL_RESET_MSB 0x30a88019 -#define A375_Z1_WORKAROUND_BIT BIT(9) - #define A375_UNIT_CONTROL_SHIFT 27 #define A375_UNIT_CONTROL_MASK 0x7 #define A375_READOUT_INVERT BIT(15) @@ -124,24 +120,12 @@ static void armada375_init_sensor(struct platform_device *pdev, struct armada_thermal_priv *priv) { unsigned long reg; - bool quirk_needed = - !!of_device_is_compatible(pdev->dev.of_node, - "marvell,armada375-z1-thermal"); - - if (quirk_needed) { - /* Ensure these registers have the default (reset) values */ - writel(A375_Z1_CAL_RESET_LSB, priv->control); - writel(A375_Z1_CAL_RESET_MSB, priv->control + 0x4); - } reg = readl(priv->control + 4); reg &= ~(A375_UNIT_CONTROL_MASK << A375_UNIT_CONTROL_SHIFT); reg &= ~A375_READOUT_INVERT; reg &= ~A375_HW_RESETn; - if (quirk_needed) - reg |= A375_Z1_WORKAROUND_BIT; - writel(reg, priv->control + 4); mdelay(20); @@ -260,10 +244,6 @@ static const struct of_device_id armada_thermal_id_table[] = { .data = &armada375_data, }, { - .compatible = "marvell,armada375-z1-thermal", - .data = &armada375_data, - }, - { .compatible = "marvell,armada380-thermal", .data = &armada380_data, }, diff --git a/drivers/thermal/clock_cooling.c b/drivers/thermal/clock_cooling.c new file mode 100644 index 00000000000..1b4ff0f4c71 --- /dev/null +++ b/drivers/thermal/clock_cooling.c @@ -0,0 +1,485 @@ +/* + * drivers/thermal/clock_cooling.c + * + * Copyright (C) 2014 Eduardo Valentin <edubezval@gmail.com> + * + * Copyright (C) 2013 Texas Instruments Inc. + * Contact: Eduardo Valentin <eduardo.valentin@ti.com> + * + * Highly based on cpu_cooling.c. + * Copyright (C) 2012 Samsung Electronics Co., Ltd(http://www.samsung.com) + * Copyright (C) 2012 Amit Daniel <amit.kachhap@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#include <linux/clk.h> +#include <linux/cpufreq.h> +#include <linux/device.h> +#include <linux/err.h> +#include <linux/idr.h> +#include <linux/mutex.h> +#include <linux/pm_opp.h> +#include <linux/slab.h> +#include <linux/thermal.h> +#include <linux/clock_cooling.h> + +/** + * struct clock_cooling_device - data for cooling device with clock + * @id: unique integer value corresponding to each clock_cooling_device + * registered. + * @dev: struct device pointer to the device being used to cool off using + * clock frequencies. + * @cdev: thermal_cooling_device pointer to keep track of the + * registered cooling device. + * @clk_rate_change_nb: reference to notifier block used to receive clock + * rate changes. + * @freq_table: frequency table used to keep track of available frequencies. + * @clock_state: integer value representing the current state of clock + * cooling devices. + * @clock_val: integer value representing the absolute value of the clipped + * frequency. + * @clk: struct clk reference used to enforce clock limits. + * @lock: mutex lock to protect this struct. + * + * This structure is required for keeping information of each + * clock_cooling_device registered. In order to prevent corruption of this a + * mutex @lock is used. + */ +struct clock_cooling_device { + int id; + struct device *dev; + struct thermal_cooling_device *cdev; + struct notifier_block clk_rate_change_nb; + struct cpufreq_frequency_table *freq_table; + unsigned long clock_state; + unsigned long clock_val; + struct clk *clk; + struct mutex lock; /* lock to protect the content of this struct */ +}; +#define to_clock_cooling_device(x) \ + container_of(x, struct clock_cooling_device, clk_rate_change_nb) +static DEFINE_IDR(clock_idr); +static DEFINE_MUTEX(cooling_clock_lock); + +/** + * clock_cooling_get_idr - function to get an unique id. + * @id: int * value generated by this function. + * + * This function will populate @id with an unique + * id, using the idr API. + * + * Return: 0 on success, an error code on failure. + */ +static int clock_cooling_get_idr(int *id) +{ + int ret; + + mutex_lock(&cooling_clock_lock); + ret = idr_alloc(&clock_idr, NULL, 0, 0, GFP_KERNEL); + mutex_unlock(&cooling_clock_lock); + if (unlikely(ret < 0)) + return ret; + *id = ret; + + return 0; +} + +/** + * release_idr - function to free the unique id. + * @id: int value representing the unique id. + */ +static void release_idr(int id) +{ + mutex_lock(&cooling_clock_lock); + idr_remove(&clock_idr, id); + mutex_unlock(&cooling_clock_lock); +} + +/* Below code defines functions to be used for clock as cooling device */ + +enum clock_cooling_property { + GET_LEVEL, + GET_FREQ, + GET_MAXL, +}; + +/** + * clock_cooling_get_property - fetch a property of interest for a give cpu. + * @ccdev: clock cooling device reference + * @input: query parameter + * @output: query return + * @property: type of query (frequency, level, max level) + * + * This is the common function to + * 1. get maximum clock cooling states + * 2. translate frequency to cooling state + * 3. translate cooling state to frequency + * Note that the code may be not in good shape + * but it is written in this way in order to: + * a) reduce duplicate code as most of the code can be shared. + * b) make sure the logic is consistent when translating between + * cooling states and frequencies. + * + * Return: 0 on success, -EINVAL when invalid parameters are passed. + */ +static int clock_cooling_get_property(struct clock_cooling_device *ccdev, + unsigned long input, + unsigned long *output, + enum clock_cooling_property property) +{ + int i; + unsigned long max_level = 0, level = 0; + unsigned int freq = CPUFREQ_ENTRY_INVALID; + int descend = -1; + struct cpufreq_frequency_table *pos, *table = ccdev->freq_table; + + if (!output) + return -EINVAL; + + if (!table) + return -EINVAL; + + cpufreq_for_each_valid_entry(pos, table) { + /* ignore duplicate entry */ + if (freq == pos->frequency) + continue; + + /* get the frequency order */ + if (freq != CPUFREQ_ENTRY_INVALID && descend == -1) + descend = freq > pos->frequency; + + freq = pos->frequency; + max_level++; + } + + /* No valid cpu frequency entry */ + if (max_level == 0) + return -EINVAL; + + /* max_level is an index, not a counter */ + max_level--; + + /* get max level */ + if (property == GET_MAXL) { + *output = max_level; + return 0; + } + + if (property == GET_FREQ) + level = descend ? input : (max_level - input); + + i = 0; + cpufreq_for_each_valid_entry(pos, table) { + /* ignore duplicate entry */ + if (freq == pos->frequency) + continue; + + /* now we have a valid frequency entry */ + freq = pos->frequency; + + if (property == GET_LEVEL && (unsigned int)input == freq) { + /* get level by frequency */ + *output = descend ? i : (max_level - i); + return 0; + } + if (property == GET_FREQ && level == i) { + /* get frequency by level */ + *output = freq; + return 0; + } + i++; + } + + return -EINVAL; +} + +/** + * clock_cooling_get_level - return the cooling level of given clock cooling. + * @cdev: reference of a thermal cooling device of used as clock cooling device + * @freq: the frequency of interest + * + * This function will match the cooling level corresponding to the + * requested @freq and return it. + * + * Return: The matched cooling level on success or THERMAL_CSTATE_INVALID + * otherwise. + */ +unsigned long clock_cooling_get_level(struct thermal_cooling_device *cdev, + unsigned long freq) +{ + struct clock_cooling_device *ccdev = cdev->devdata; + unsigned long val; + + if (clock_cooling_get_property(ccdev, (unsigned long)freq, &val, + GET_LEVEL)) + return THERMAL_CSTATE_INVALID; + + return val; +} +EXPORT_SYMBOL_GPL(clock_cooling_get_level); + +/** + * clock_cooling_get_frequency - get the absolute value of frequency from level. + * @ccdev: clock cooling device reference + * @level: cooling level + * + * This function matches cooling level with frequency. Based on a cooling level + * of frequency, equals cooling state of cpu cooling device, it will return + * the corresponding frequency. + * e.g level=0 --> 1st MAX FREQ, level=1 ---> 2nd MAX FREQ, .... etc + * + * Return: 0 on error, the corresponding frequency otherwise. + */ +static unsigned long +clock_cooling_get_frequency(struct clock_cooling_device *ccdev, + unsigned long level) +{ + int ret = 0; + unsigned long freq; + + ret = clock_cooling_get_property(ccdev, level, &freq, GET_FREQ); + if (ret) + return 0; + + return freq; +} + +/** + * clock_cooling_apply - function to apply frequency clipping. + * @ccdev: clock_cooling_device pointer containing frequency clipping data. + * @cooling_state: value of the cooling state. + * + * Function used to make sure the clock layer is aware of current thermal + * limits. The limits are applied by updating the clock rate in case it is + * higher than the corresponding frequency based on the requested cooling_state. + * + * Return: 0 on success, an error code otherwise (-EINVAL in case wrong + * cooling state). + */ +static int clock_cooling_apply(struct clock_cooling_device *ccdev, + unsigned long cooling_state) +{ + unsigned long clip_freq, cur_freq; + int ret = 0; + + /* Here we write the clipping */ + /* Check if the old cooling action is same as new cooling action */ + if (ccdev->clock_state == cooling_state) + return 0; + + clip_freq = clock_cooling_get_frequency(ccdev, cooling_state); + if (!clip_freq) + return -EINVAL; + + cur_freq = clk_get_rate(ccdev->clk); + + mutex_lock(&ccdev->lock); + ccdev->clock_state = cooling_state; + ccdev->clock_val = clip_freq; + /* enforce clock level */ + if (cur_freq > clip_freq) + ret = clk_set_rate(ccdev->clk, clip_freq); + mutex_unlock(&ccdev->lock); + + return ret; +} + +/** + * clock_cooling_clock_notifier - notifier callback on clock rate changes. + * @nb: struct notifier_block * with callback info. + * @event: value showing clock event for which this function invoked. + * @data: callback-specific data + * + * Callback to hijack the notification on clock transition. + * Every time there is a clock change, we intercept all pre change events + * and block the transition in case the new rate infringes thermal limits. + * + * Return: NOTIFY_DONE (success) or NOTIFY_BAD (new_rate > thermal limit). + */ +static int clock_cooling_clock_notifier(struct notifier_block *nb, + unsigned long event, void *data) +{ + struct clk_notifier_data *ndata = data; + struct clock_cooling_device *ccdev = to_clock_cooling_device(nb); + + switch (event) { + case PRE_RATE_CHANGE: + /* + * checks on current state + * TODO: current method is not best we can find as it + * allows possibly voltage transitions, in case DVFS + * layer is also hijacking clock pre notifications. + */ + if (ndata->new_rate > ccdev->clock_val) + return NOTIFY_BAD; + /* fall through */ + case POST_RATE_CHANGE: + case ABORT_RATE_CHANGE: + default: + return NOTIFY_DONE; + } +} + +/* clock cooling device thermal callback functions are defined below */ + +/** + * clock_cooling_get_max_state - callback function to get the max cooling state. + * @cdev: thermal cooling device pointer. + * @state: fill this variable with the max cooling state. + * + * Callback for the thermal cooling device to return the clock + * max cooling state. + * + * Return: 0 on success, an error code otherwise. + */ +static int clock_cooling_get_max_state(struct thermal_cooling_device *cdev, + unsigned long *state) +{ + struct clock_cooling_device *ccdev = cdev->devdata; + unsigned long count = 0; + int ret; + + ret = clock_cooling_get_property(ccdev, 0, &count, GET_MAXL); + if (!ret) + *state = count; + + return ret; +} + +/** + * clock_cooling_get_cur_state - function to get the current cooling state. + * @cdev: thermal cooling device pointer. + * @state: fill this variable with the current cooling state. + * + * Callback for the thermal cooling device to return the clock + * current cooling state. + * + * Return: 0 (success) + */ +static int clock_cooling_get_cur_state(struct thermal_cooling_device *cdev, + unsigned long *state) +{ + struct clock_cooling_device *ccdev = cdev->devdata; + + *state = ccdev->clock_state; + + return 0; +} + +/** + * clock_cooling_set_cur_state - function to set the current cooling state. + * @cdev: thermal cooling device pointer. + * @state: set this variable to the current cooling state. + * + * Callback for the thermal cooling device to change the clock cooling + * current cooling state. + * + * Return: 0 on success, an error code otherwise. + */ +static int clock_cooling_set_cur_state(struct thermal_cooling_device *cdev, + unsigned long state) +{ + struct clock_cooling_device *clock_device = cdev->devdata; + + return clock_cooling_apply(clock_device, state); +} + +/* Bind clock callbacks to thermal cooling device ops */ +static struct thermal_cooling_device_ops const clock_cooling_ops = { + .get_max_state = clock_cooling_get_max_state, + .get_cur_state = clock_cooling_get_cur_state, + .set_cur_state = clock_cooling_set_cur_state, +}; + +/** + * clock_cooling_register - function to create clock cooling device. + * @dev: struct device pointer to the device used as clock cooling device. + * @clock_name: string containing the clock used as cooling mechanism. + * + * This interface function registers the clock cooling device with the name + * "thermal-clock-%x". The cooling device is based on clock frequencies. + * The struct device is assumed to be capable of DVFS transitions. + * The OPP layer is used to fetch and fill the available frequencies for + * the referred device. The ordered frequency table is used to control + * the clock cooling device cooling states and to limit clock transitions + * based on the cooling state requested by the thermal framework. + * + * Return: a valid struct thermal_cooling_device pointer on success, + * on failure, it returns a corresponding ERR_PTR(). + */ +struct thermal_cooling_device * +clock_cooling_register(struct device *dev, const char *clock_name) +{ + struct thermal_cooling_device *cdev; + struct clock_cooling_device *ccdev = NULL; + char dev_name[THERMAL_NAME_LENGTH]; + int ret = 0; + + ccdev = devm_kzalloc(dev, sizeof(*ccdev), GFP_KERNEL); + if (!ccdev) + return ERR_PTR(-ENOMEM); + + ccdev->dev = dev; + ccdev->clk = devm_clk_get(dev, clock_name); + if (IS_ERR(ccdev->clk)) + return ERR_CAST(ccdev->clk); + + ret = clock_cooling_get_idr(&ccdev->id); + if (ret) + return ERR_PTR(-EINVAL); + + snprintf(dev_name, sizeof(dev_name), "thermal-clock-%d", ccdev->id); + + cdev = thermal_cooling_device_register(dev_name, ccdev, + &clock_cooling_ops); + if (IS_ERR(cdev)) { + release_idr(ccdev->id); + return ERR_PTR(-EINVAL); + } + ccdev->cdev = cdev; + ccdev->clk_rate_change_nb.notifier_call = clock_cooling_clock_notifier; + + /* Assuming someone has already filled the opp table for this device */ + ret = dev_pm_opp_init_cpufreq_table(dev, &ccdev->freq_table); + if (ret) { + release_idr(ccdev->id); + return ERR_PTR(ret); + } + ccdev->clock_state = 0; + ccdev->clock_val = clock_cooling_get_frequency(ccdev, 0); + + clk_notifier_register(ccdev->clk, &ccdev->clk_rate_change_nb); + + return cdev; +} +EXPORT_SYMBOL_GPL(clock_cooling_register); + +/** + * clock_cooling_unregister - function to remove clock cooling device. + * @cdev: thermal cooling device pointer. + * + * This interface function unregisters the "thermal-clock-%x" cooling device. + */ +void clock_cooling_unregister(struct thermal_cooling_device *cdev) +{ + struct clock_cooling_device *ccdev; + + if (!cdev) + return; + + ccdev = cdev->devdata; + + clk_notifier_unregister(ccdev->clk, &ccdev->clk_rate_change_nb); + dev_pm_opp_free_cpufreq_table(ccdev->dev, &ccdev->freq_table); + + thermal_cooling_device_unregister(ccdev->cdev); + release_idr(ccdev->id); +} +EXPORT_SYMBOL_GPL(clock_cooling_unregister); diff --git a/drivers/thermal/int340x_thermal/acpi_thermal_rel.c b/drivers/thermal/int340x_thermal/acpi_thermal_rel.c index 0d8db808f0a..e4e61b3fb11 100644 --- a/drivers/thermal/int340x_thermal/acpi_thermal_rel.c +++ b/drivers/thermal/int340x_thermal/acpi_thermal_rel.c @@ -131,6 +131,8 @@ int acpi_parse_trt(acpi_handle handle, int *trt_count, struct trt **trtp, pr_warn("Failed to get target ACPI device\n"); } + result = 0; + *trtp = trts; /* don't count bad entries */ *trt_count -= nr_bad_entries; @@ -317,21 +319,21 @@ static long acpi_thermal_rel_ioctl(struct file *f, unsigned int cmd, { int ret = 0; unsigned long length = 0; - unsigned long count = 0; + int count = 0; char __user *arg = (void __user *)__arg; struct trt *trts; struct art *arts; switch (cmd) { case ACPI_THERMAL_GET_TRT_COUNT: - ret = acpi_parse_trt(acpi_thermal_rel_handle, (int *)&count, + ret = acpi_parse_trt(acpi_thermal_rel_handle, &count, &trts, false); kfree(trts); if (!ret) return put_user(count, (unsigned long __user *)__arg); return ret; case ACPI_THERMAL_GET_TRT_LEN: - ret = acpi_parse_trt(acpi_thermal_rel_handle, (int *)&count, + ret = acpi_parse_trt(acpi_thermal_rel_handle, &count, &trts, false); kfree(trts); length = count * sizeof(union trt_object); @@ -341,14 +343,14 @@ static long acpi_thermal_rel_ioctl(struct file *f, unsigned int cmd, case ACPI_THERMAL_GET_TRT: return fill_trt(arg); case ACPI_THERMAL_GET_ART_COUNT: - ret = acpi_parse_art(acpi_thermal_rel_handle, (int *)&count, + ret = acpi_parse_art(acpi_thermal_rel_handle, &count, &arts, false); kfree(arts); if (!ret) return put_user(count, (unsigned long __user *)__arg); return ret; case ACPI_THERMAL_GET_ART_LEN: - ret = acpi_parse_art(acpi_thermal_rel_handle, (int *)&count, + ret = acpi_parse_art(acpi_thermal_rel_handle, &count, &arts, false); kfree(arts); length = count * sizeof(union art_object); diff --git a/drivers/thermal/int340x_thermal/int3400_thermal.c b/drivers/thermal/int340x_thermal/int3400_thermal.c index edc1cce117b..dcb306ea14a 100644 --- a/drivers/thermal/int340x_thermal/int3400_thermal.c +++ b/drivers/thermal/int340x_thermal/int3400_thermal.c @@ -43,6 +43,74 @@ struct int3400_thermal_priv { struct trt *trts; u8 uuid_bitmap; int rel_misc_dev_res; + int current_uuid_index; +}; + +static ssize_t available_uuids_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct platform_device *pdev = to_platform_device(dev); + struct int3400_thermal_priv *priv = platform_get_drvdata(pdev); + int i; + int length = 0; + + for (i = 0; i < INT3400_THERMAL_MAXIMUM_UUID; i++) { + if (priv->uuid_bitmap & (1 << i)) + if (PAGE_SIZE - length > 0) + length += snprintf(&buf[length], + PAGE_SIZE - length, + "%s\n", + int3400_thermal_uuids[i]); + } + + return length; +} + +static ssize_t current_uuid_show(struct device *dev, + struct device_attribute *devattr, char *buf) +{ + struct platform_device *pdev = to_platform_device(dev); + struct int3400_thermal_priv *priv = platform_get_drvdata(pdev); + + if (priv->uuid_bitmap & (1 << priv->current_uuid_index)) + return sprintf(buf, "%s\n", + int3400_thermal_uuids[priv->current_uuid_index]); + else + return sprintf(buf, "INVALID\n"); +} + +static ssize_t current_uuid_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct platform_device *pdev = to_platform_device(dev); + struct int3400_thermal_priv *priv = platform_get_drvdata(pdev); + int i; + + for (i = 0; i < INT3400_THERMAL_MAXIMUM_UUID; ++i) { + if ((priv->uuid_bitmap & (1 << i)) && + !(strncmp(buf, int3400_thermal_uuids[i], + sizeof(int3400_thermal_uuids[i]) - 1))) { + priv->current_uuid_index = i; + return count; + } + } + + return -EINVAL; +} + +static DEVICE_ATTR(current_uuid, 0644, current_uuid_show, current_uuid_store); +static DEVICE_ATTR_RO(available_uuids); +static struct attribute *uuid_attrs[] = { + &dev_attr_available_uuids.attr, + &dev_attr_current_uuid.attr, + NULL +}; + +static struct attribute_group uuid_attribute_group = { + .attrs = uuid_attrs, + .name = "uuids" }; static int int3400_thermal_get_uuids(struct int3400_thermal_priv *priv) @@ -160,9 +228,9 @@ static int int3400_thermal_set_mode(struct thermal_zone_device *thermal, if (enable != priv->mode) { priv->mode = enable; - /* currently, only PASSIVE COOLING is supported */ result = int3400_thermal_run_osc(priv->adev->handle, - INT3400_THERMAL_PASSIVE_1, enable); + priv->current_uuid_index, + enable); } return result; } @@ -223,7 +291,14 @@ static int int3400_thermal_probe(struct platform_device *pdev) priv->rel_misc_dev_res = acpi_thermal_rel_misc_device_add( priv->adev->handle); + result = sysfs_create_group(&pdev->dev.kobj, &uuid_attribute_group); + if (result) + goto free_zone; + return 0; + +free_zone: + thermal_zone_device_unregister(priv->thermal); free_trt: kfree(priv->trts); free_art: @@ -240,6 +315,7 @@ static int int3400_thermal_remove(struct platform_device *pdev) if (!priv->rel_misc_dev_res) acpi_thermal_rel_misc_device_remove(priv->adev->handle); + sysfs_remove_group(&pdev->dev.kobj, &uuid_attribute_group); thermal_zone_device_unregister(priv->thermal); kfree(priv->trts); kfree(priv->arts); diff --git a/drivers/thermal/int340x_thermal/int3403_thermal.c b/drivers/thermal/int340x_thermal/int3403_thermal.c index 6e9fb62eb81..1bfa6a69e77 100644 --- a/drivers/thermal/int340x_thermal/int3403_thermal.c +++ b/drivers/thermal/int340x_thermal/int3403_thermal.c @@ -293,8 +293,7 @@ static int int3403_sensor_add(struct int3403_priv *priv) return 0; err_free_obj: - if (obj->tzone) - thermal_zone_device_unregister(obj->tzone); + thermal_zone_device_unregister(obj->tzone); return result; } @@ -471,7 +470,6 @@ static struct platform_driver int3403_driver = { .remove = int3403_remove, .driver = { .name = "int3403 thermal", - .owner = THIS_MODULE, .acpi_match_table = int3403_device_ids, }, }; diff --git a/drivers/thermal/intel_powerclamp.c b/drivers/thermal/intel_powerclamp.c index 95cb7fc20e1..b46c706e1ca 100644 --- a/drivers/thermal/intel_powerclamp.c +++ b/drivers/thermal/intel_powerclamp.c @@ -689,6 +689,7 @@ static const struct x86_cpu_id intel_powerclamp_ids[] = { { X86_VENDOR_INTEL, 6, 0x3f}, { X86_VENDOR_INTEL, 6, 0x45}, { X86_VENDOR_INTEL, 6, 0x46}, + { X86_VENDOR_INTEL, 6, 0x4c}, {} }; MODULE_DEVICE_TABLE(x86cpu, intel_powerclamp_ids); diff --git a/drivers/thermal/intel_soc_dts_thermal.c b/drivers/thermal/intel_soc_dts_thermal.c index a6a0a18ec0a..5580f5b24eb 100644 --- a/drivers/thermal/intel_soc_dts_thermal.c +++ b/drivers/thermal/intel_soc_dts_thermal.c @@ -360,6 +360,9 @@ static void proc_thermal_interrupt(void) u32 sticky_out; int status; u32 ptmc_out; + unsigned long flags; + + spin_lock_irqsave(&intr_notify_lock, flags); /* Clear APIC interrupt */ status = iosf_mbi_read(BT_MBI_UNIT_PMC, BT_MBI_BUNIT_READ, @@ -378,21 +381,20 @@ static void proc_thermal_interrupt(void) /* reset sticky bit */ status = iosf_mbi_write(BT_MBI_UNIT_PMC, BT_MBI_BUNIT_WRITE, SOC_DTS_OFFSET_PTTSS, sticky_out); + spin_unlock_irqrestore(&intr_notify_lock, flags); + for (i = 0; i < SOC_MAX_DTS_SENSORS; ++i) { pr_debug("TZD update for zone %d\n", i); thermal_zone_device_update(soc_dts[i]->tzone); } - } + } else + spin_unlock_irqrestore(&intr_notify_lock, flags); } static irqreturn_t soc_irq_thread_fn(int irq, void *dev_data) { - unsigned long flags; - - spin_lock_irqsave(&intr_notify_lock, flags); proc_thermal_interrupt(); - spin_unlock_irqrestore(&intr_notify_lock, flags); pr_debug("proc_thermal_interrupt\n"); return IRQ_HANDLED; diff --git a/drivers/thermal/of-thermal.c b/drivers/thermal/of-thermal.c index 62143ba3100..e145b66df44 100644 --- a/drivers/thermal/of-thermal.c +++ b/drivers/thermal/of-thermal.c @@ -30,27 +30,13 @@ #include <linux/err.h> #include <linux/export.h> #include <linux/string.h> +#include <linux/thermal.h> #include "thermal_core.h" /*** Private data structures to represent thermal device tree data ***/ /** - * struct __thermal_trip - representation of a point in temperature domain - * @np: pointer to struct device_node that this trip point was created from - * @temperature: temperature value in miliCelsius - * @hysteresis: relative hysteresis in miliCelsius - * @type: trip point type - */ - -struct __thermal_trip { - struct device_node *np; - unsigned long int temperature; - unsigned long int hysteresis; - enum thermal_trip_type type; -}; - -/** * struct __thermal_bind_param - a match between trip and cooling device * @cooling_device: a pointer to identify the referred cooling device * @trip_id: the trip point index @@ -77,8 +63,7 @@ struct __thermal_bind_params { * @num_tbps: number of thermal bind params * @tbps: an array of thermal bind params (0..num_tbps - 1) * @sensor_data: sensor private data used while reading temperature and trend - * @get_temp: sensor callback to read temperature - * @get_trend: sensor callback to read temperature trend + * @ops: set of callbacks to handle the thermal zone based on DT */ struct __thermal_zone { @@ -88,7 +73,7 @@ struct __thermal_zone { /* trip data */ int ntrips; - struct __thermal_trip *trips; + struct thermal_trip *trips; /* cooling binding data */ int num_tbps; @@ -96,8 +81,7 @@ struct __thermal_zone { /* sensor interface */ void *sensor_data; - int (*get_temp)(void *, long *); - int (*get_trend)(void *, long *); + const struct thermal_zone_of_device_ops *ops; }; /*** DT thermal zone device callbacks ***/ @@ -107,10 +91,96 @@ static int of_thermal_get_temp(struct thermal_zone_device *tz, { struct __thermal_zone *data = tz->devdata; - if (!data->get_temp) + if (!data->ops->get_temp) return -EINVAL; - return data->get_temp(data->sensor_data, temp); + return data->ops->get_temp(data->sensor_data, temp); +} + +/** + * of_thermal_get_ntrips - function to export number of available trip + * points. + * @tz: pointer to a thermal zone + * + * This function is a globally visible wrapper to get number of trip points + * stored in the local struct __thermal_zone + * + * Return: number of available trip points, -ENODEV when data not available + */ +int of_thermal_get_ntrips(struct thermal_zone_device *tz) +{ + struct __thermal_zone *data = tz->devdata; + + if (!data || IS_ERR(data)) + return -ENODEV; + + return data->ntrips; +} +EXPORT_SYMBOL_GPL(of_thermal_get_ntrips); + +/** + * of_thermal_is_trip_valid - function to check if trip point is valid + * + * @tz: pointer to a thermal zone + * @trip: trip point to evaluate + * + * This function is responsible for checking if passed trip point is valid + * + * Return: true if trip point is valid, false otherwise + */ +bool of_thermal_is_trip_valid(struct thermal_zone_device *tz, int trip) +{ + struct __thermal_zone *data = tz->devdata; + + if (!data || trip >= data->ntrips || trip < 0) + return false; + + return true; +} +EXPORT_SYMBOL_GPL(of_thermal_is_trip_valid); + +/** + * of_thermal_get_trip_points - function to get access to a globally exported + * trip points + * + * @tz: pointer to a thermal zone + * + * This function provides a pointer to trip points table + * + * Return: pointer to trip points table, NULL otherwise + */ +const struct thermal_trip * const +of_thermal_get_trip_points(struct thermal_zone_device *tz) +{ + struct __thermal_zone *data = tz->devdata; + + if (!data) + return NULL; + + return data->trips; +} +EXPORT_SYMBOL_GPL(of_thermal_get_trip_points); + +/** + * of_thermal_set_emul_temp - function to set emulated temperature + * + * @tz: pointer to a thermal zone + * @temp: temperature to set + * + * This function gives the ability to set emulated value of temperature, + * which is handy for debugging + * + * Return: zero on success, error code otherwise + */ +static int of_thermal_set_emul_temp(struct thermal_zone_device *tz, + unsigned long temp) +{ + struct __thermal_zone *data = tz->devdata; + + if (!data->ops || !data->ops->set_emul_temp) + return -EINVAL; + + return data->ops->set_emul_temp(data->sensor_data, temp); } static int of_thermal_get_trend(struct thermal_zone_device *tz, int trip, @@ -120,10 +190,10 @@ static int of_thermal_get_trend(struct thermal_zone_device *tz, int trip, long dev_trend; int r; - if (!data->get_trend) + if (!data->ops->get_trend) return -EINVAL; - r = data->get_trend(data->sensor_data, &dev_trend); + r = data->ops->get_trend(data->sensor_data, &dev_trend); if (r) return r; @@ -324,8 +394,7 @@ static struct thermal_zone_device_ops of_thermal_ops = { static struct thermal_zone_device * thermal_zone_of_add_sensor(struct device_node *zone, struct device_node *sensor, void *data, - int (*get_temp)(void *, long *), - int (*get_trend)(void *, long *)) + const struct thermal_zone_of_device_ops *ops) { struct thermal_zone_device *tzd; struct __thermal_zone *tz; @@ -336,13 +405,16 @@ thermal_zone_of_add_sensor(struct device_node *zone, tz = tzd->devdata; + if (!ops) + return ERR_PTR(-EINVAL); + mutex_lock(&tzd->lock); - tz->get_temp = get_temp; - tz->get_trend = get_trend; + tz->ops = ops; tz->sensor_data = data; tzd->ops->get_temp = of_thermal_get_temp; tzd->ops->get_trend = of_thermal_get_trend; + tzd->ops->set_emul_temp = of_thermal_set_emul_temp; mutex_unlock(&tzd->lock); return tzd; @@ -356,8 +428,7 @@ thermal_zone_of_add_sensor(struct device_node *zone, * than one sensors * @data: a private pointer (owned by the caller) that will be passed * back, when a temperature reading is needed. - * @get_temp: a pointer to a function that reads the sensor temperature. - * @get_trend: a pointer to a function that reads the sensor temperature trend. + * @ops: struct thermal_zone_of_device_ops *. Must contain at least .get_temp. * * This function will search the list of thermal zones described in device * tree and look for the zone that refer to the sensor device pointed by @@ -382,9 +453,8 @@ thermal_zone_of_add_sensor(struct device_node *zone, * check the return value with help of IS_ERR() helper. */ struct thermal_zone_device * -thermal_zone_of_sensor_register(struct device *dev, int sensor_id, - void *data, int (*get_temp)(void *, long *), - int (*get_trend)(void *, long *)) +thermal_zone_of_sensor_register(struct device *dev, int sensor_id, void *data, + const struct thermal_zone_of_device_ops *ops) { struct device_node *np, *child, *sensor_np; struct thermal_zone_device *tzd = ERR_PTR(-ENODEV); @@ -426,9 +496,7 @@ thermal_zone_of_sensor_register(struct device *dev, int sensor_id, if (sensor_specs.np == sensor_np && id == sensor_id) { tzd = thermal_zone_of_add_sensor(child, sensor_np, - data, - get_temp, - get_trend); + data, ops); of_node_put(sensor_specs.np); of_node_put(child); goto exit; @@ -475,9 +543,9 @@ void thermal_zone_of_sensor_unregister(struct device *dev, mutex_lock(&tzd->lock); tzd->ops->get_temp = NULL; tzd->ops->get_trend = NULL; + tzd->ops->set_emul_temp = NULL; - tz->get_temp = NULL; - tz->get_trend = NULL; + tz->ops = NULL; tz->sensor_data = NULL; mutex_unlock(&tzd->lock); } @@ -501,7 +569,7 @@ EXPORT_SYMBOL_GPL(thermal_zone_of_sensor_unregister); */ static int thermal_of_populate_bind_params(struct device_node *np, struct __thermal_bind_params *__tbp, - struct __thermal_trip *trips, + struct thermal_trip *trips, int ntrips) { struct of_phandle_args cooling_spec; @@ -604,7 +672,7 @@ static int thermal_of_get_trip_type(struct device_node *np, * Return: 0 on success, proper error code otherwise */ static int thermal_of_populate_trip(struct device_node *np, - struct __thermal_trip *trip) + struct thermal_trip *trip) { int prop; int ret; diff --git a/drivers/thermal/rockchip_thermal.c b/drivers/thermal/rockchip_thermal.c new file mode 100644 index 00000000000..1bcddfc60e9 --- /dev/null +++ b/drivers/thermal/rockchip_thermal.c @@ -0,0 +1,693 @@ +/* + * Copyright (c) 2014, Fuzhou Rockchip Electronics Co., Ltd + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#include <linux/clk.h> +#include <linux/delay.h> +#include <linux/interrupt.h> +#include <linux/io.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/of_address.h> +#include <linux/of_irq.h> +#include <linux/platform_device.h> +#include <linux/reset.h> +#include <linux/thermal.h> + +/** + * If the temperature over a period of time High, + * the resulting TSHUT gave CRU module,let it reset the entire chip, + * or via GPIO give PMIC. + */ +enum tshut_mode { + TSHUT_MODE_CRU = 0, + TSHUT_MODE_GPIO, +}; + +/** + * the system Temperature Sensors tshut(tshut) polarity + * the bit 8 is tshut polarity. + * 0: low active, 1: high active + */ +enum tshut_polarity { + TSHUT_LOW_ACTIVE = 0, + TSHUT_HIGH_ACTIVE, +}; + +/** + * The system has three Temperature Sensors. channel 0 is reserved, + * channel 1 is for CPU, and channel 2 is for GPU. + */ +enum sensor_id { + SENSOR_CPU = 1, + SENSOR_GPU, +}; + +struct rockchip_tsadc_chip { + /* The hardware-controlled tshut property */ + long tshut_temp; + enum tshut_mode tshut_mode; + enum tshut_polarity tshut_polarity; + + /* Chip-wide methods */ + void (*initialize)(void __iomem *reg, enum tshut_polarity p); + void (*irq_ack)(void __iomem *reg); + void (*control)(void __iomem *reg, bool on); + + /* Per-sensor methods */ + int (*get_temp)(int chn, void __iomem *reg, long *temp); + void (*set_tshut_temp)(int chn, void __iomem *reg, long temp); + void (*set_tshut_mode)(int chn, void __iomem *reg, enum tshut_mode m); +}; + +struct rockchip_thermal_sensor { + struct rockchip_thermal_data *thermal; + struct thermal_zone_device *tzd; + enum sensor_id id; +}; + +#define NUM_SENSORS 2 /* Ignore unused sensor 0 */ + +struct rockchip_thermal_data { + const struct rockchip_tsadc_chip *chip; + struct platform_device *pdev; + struct reset_control *reset; + + struct rockchip_thermal_sensor sensors[NUM_SENSORS]; + + struct clk *clk; + struct clk *pclk; + + void __iomem *regs; + + long tshut_temp; + enum tshut_mode tshut_mode; + enum tshut_polarity tshut_polarity; +}; + +/* TSADC V2 Sensor info define: */ +#define TSADCV2_AUTO_CON 0x04 +#define TSADCV2_INT_EN 0x08 +#define TSADCV2_INT_PD 0x0c +#define TSADCV2_DATA(chn) (0x20 + (chn) * 0x04) +#define TSADCV2_COMP_SHUT(chn) (0x40 + (chn) * 0x04) +#define TSADCV2_HIGHT_INT_DEBOUNCE 0x60 +#define TSADCV2_HIGHT_TSHUT_DEBOUNCE 0x64 +#define TSADCV2_AUTO_PERIOD 0x68 +#define TSADCV2_AUTO_PERIOD_HT 0x6c + +#define TSADCV2_AUTO_EN BIT(0) +#define TSADCV2_AUTO_DISABLE ~BIT(0) +#define TSADCV2_AUTO_SRC_EN(chn) BIT(4 + (chn)) +#define TSADCV2_AUTO_TSHUT_POLARITY_HIGH BIT(8) +#define TSADCV2_AUTO_TSHUT_POLARITY_LOW ~BIT(8) + +#define TSADCV2_INT_SRC_EN(chn) BIT(chn) +#define TSADCV2_SHUT_2GPIO_SRC_EN(chn) BIT(4 + (chn)) +#define TSADCV2_SHUT_2CRU_SRC_EN(chn) BIT(8 + (chn)) + +#define TSADCV2_INT_PD_CLEAR ~BIT(8) + +#define TSADCV2_DATA_MASK 0xfff +#define TSADCV2_HIGHT_INT_DEBOUNCE_COUNT 4 +#define TSADCV2_HIGHT_TSHUT_DEBOUNCE_COUNT 4 +#define TSADCV2_AUTO_PERIOD_TIME 250 /* msec */ +#define TSADCV2_AUTO_PERIOD_HT_TIME 50 /* msec */ + +struct tsadc_table { + unsigned long code; + long temp; +}; + +static const struct tsadc_table v2_code_table[] = { + {TSADCV2_DATA_MASK, -40000}, + {3800, -40000}, + {3792, -35000}, + {3783, -30000}, + {3774, -25000}, + {3765, -20000}, + {3756, -15000}, + {3747, -10000}, + {3737, -5000}, + {3728, 0}, + {3718, 5000}, + {3708, 10000}, + {3698, 15000}, + {3688, 20000}, + {3678, 25000}, + {3667, 30000}, + {3656, 35000}, + {3645, 40000}, + {3634, 45000}, + {3623, 50000}, + {3611, 55000}, + {3600, 60000}, + {3588, 65000}, + {3575, 70000}, + {3563, 75000}, + {3550, 80000}, + {3537, 85000}, + {3524, 90000}, + {3510, 95000}, + {3496, 100000}, + {3482, 105000}, + {3467, 110000}, + {3452, 115000}, + {3437, 120000}, + {3421, 125000}, + {0, 125000}, +}; + +static u32 rk_tsadcv2_temp_to_code(long temp) +{ + int high, low, mid; + + low = 0; + high = ARRAY_SIZE(v2_code_table) - 1; + mid = (high + low) / 2; + + if (temp < v2_code_table[low].temp || temp > v2_code_table[high].temp) + return 0; + + while (low <= high) { + if (temp == v2_code_table[mid].temp) + return v2_code_table[mid].code; + else if (temp < v2_code_table[mid].temp) + high = mid - 1; + else + low = mid + 1; + mid = (low + high) / 2; + } + + return 0; +} + +static long rk_tsadcv2_code_to_temp(u32 code) +{ + int high, low, mid; + + low = 0; + high = ARRAY_SIZE(v2_code_table) - 1; + mid = (high + low) / 2; + + if (code > v2_code_table[low].code || code < v2_code_table[high].code) + return 125000; /* No code available, return max temperature */ + + while (low <= high) { + if (code >= v2_code_table[mid].code && code < + v2_code_table[mid - 1].code) + return v2_code_table[mid].temp; + else if (code < v2_code_table[mid].code) + low = mid + 1; + else + high = mid - 1; + mid = (low + high) / 2; + } + + return 125000; +} + +/** + * rk_tsadcv2_initialize - initialize TASDC Controller + * (1) Set TSADCV2_AUTO_PERIOD, configure the interleave between + * every two accessing of TSADC in normal operation. + * (2) Set TSADCV2_AUTO_PERIOD_HT, configure the interleave between + * every two accessing of TSADC after the temperature is higher + * than COM_SHUT or COM_INT. + * (3) Set TSADCV2_HIGH_INT_DEBOUNCE and TSADC_HIGHT_TSHUT_DEBOUNCE, + * if the temperature is higher than COMP_INT or COMP_SHUT for + * "debounce" times, TSADC controller will generate interrupt or TSHUT. + */ +static void rk_tsadcv2_initialize(void __iomem *regs, + enum tshut_polarity tshut_polarity) +{ + if (tshut_polarity == TSHUT_HIGH_ACTIVE) + writel_relaxed(0 | (TSADCV2_AUTO_TSHUT_POLARITY_HIGH), + regs + TSADCV2_AUTO_CON); + else + writel_relaxed(0 | (TSADCV2_AUTO_TSHUT_POLARITY_LOW), + regs + TSADCV2_AUTO_CON); + + writel_relaxed(TSADCV2_AUTO_PERIOD_TIME, regs + TSADCV2_AUTO_PERIOD); + writel_relaxed(TSADCV2_HIGHT_INT_DEBOUNCE_COUNT, + regs + TSADCV2_HIGHT_INT_DEBOUNCE); + writel_relaxed(TSADCV2_AUTO_PERIOD_HT_TIME, + regs + TSADCV2_AUTO_PERIOD_HT); + writel_relaxed(TSADCV2_HIGHT_TSHUT_DEBOUNCE_COUNT, + regs + TSADCV2_HIGHT_TSHUT_DEBOUNCE); +} + +static void rk_tsadcv2_irq_ack(void __iomem *regs) +{ + u32 val; + + val = readl_relaxed(regs + TSADCV2_INT_PD); + writel_relaxed(val & TSADCV2_INT_PD_CLEAR, regs + TSADCV2_INT_PD); +} + +static void rk_tsadcv2_control(void __iomem *regs, bool enable) +{ + u32 val; + + val = readl_relaxed(regs + TSADCV2_AUTO_CON); + if (enable) + val |= TSADCV2_AUTO_EN; + else + val &= ~TSADCV2_AUTO_EN; + + writel_relaxed(val, regs + TSADCV2_AUTO_CON); +} + +static int rk_tsadcv2_get_temp(int chn, void __iomem *regs, long *temp) +{ + u32 val; + + /* the A/D value of the channel last conversion need some time */ + val = readl_relaxed(regs + TSADCV2_DATA(chn)); + if (val == 0) + return -EAGAIN; + + *temp = rk_tsadcv2_code_to_temp(val); + + return 0; +} + +static void rk_tsadcv2_tshut_temp(int chn, void __iomem *regs, long temp) +{ + u32 tshut_value, val; + + tshut_value = rk_tsadcv2_temp_to_code(temp); + writel_relaxed(tshut_value, regs + TSADCV2_COMP_SHUT(chn)); + + /* TSHUT will be valid */ + val = readl_relaxed(regs + TSADCV2_AUTO_CON); + writel_relaxed(val | TSADCV2_AUTO_SRC_EN(chn), regs + TSADCV2_AUTO_CON); +} + +static void rk_tsadcv2_tshut_mode(int chn, void __iomem *regs, + enum tshut_mode mode) +{ + u32 val; + + val = readl_relaxed(regs + TSADCV2_INT_EN); + if (mode == TSHUT_MODE_GPIO) { + val &= ~TSADCV2_SHUT_2CRU_SRC_EN(chn); + val |= TSADCV2_SHUT_2GPIO_SRC_EN(chn); + } else { + val &= ~TSADCV2_SHUT_2GPIO_SRC_EN(chn); + val |= TSADCV2_SHUT_2CRU_SRC_EN(chn); + } + + writel_relaxed(val, regs + TSADCV2_INT_EN); +} + +static const struct rockchip_tsadc_chip rk3288_tsadc_data = { + .tshut_mode = TSHUT_MODE_GPIO, /* default TSHUT via GPIO give PMIC */ + .tshut_polarity = TSHUT_LOW_ACTIVE, /* default TSHUT LOW ACTIVE */ + .tshut_temp = 95000, + + .initialize = rk_tsadcv2_initialize, + .irq_ack = rk_tsadcv2_irq_ack, + .control = rk_tsadcv2_control, + .get_temp = rk_tsadcv2_get_temp, + .set_tshut_temp = rk_tsadcv2_tshut_temp, + .set_tshut_mode = rk_tsadcv2_tshut_mode, +}; + +static const struct of_device_id of_rockchip_thermal_match[] = { + { + .compatible = "rockchip,rk3288-tsadc", + .data = (void *)&rk3288_tsadc_data, + }, + { /* end */ }, +}; +MODULE_DEVICE_TABLE(of, of_rockchip_thermal_match); + +static void +rockchip_thermal_toggle_sensor(struct rockchip_thermal_sensor *sensor, bool on) +{ + struct thermal_zone_device *tzd = sensor->tzd; + + tzd->ops->set_mode(tzd, + on ? THERMAL_DEVICE_ENABLED : THERMAL_DEVICE_DISABLED); +} + +static irqreturn_t rockchip_thermal_alarm_irq_thread(int irq, void *dev) +{ + struct rockchip_thermal_data *thermal = dev; + int i; + + dev_dbg(&thermal->pdev->dev, "thermal alarm\n"); + + thermal->chip->irq_ack(thermal->regs); + + for (i = 0; i < ARRAY_SIZE(thermal->sensors); i++) + thermal_zone_device_update(thermal->sensors[i].tzd); + + return IRQ_HANDLED; +} + +static int rockchip_thermal_get_temp(void *_sensor, long *out_temp) +{ + struct rockchip_thermal_sensor *sensor = _sensor; + struct rockchip_thermal_data *thermal = sensor->thermal; + const struct rockchip_tsadc_chip *tsadc = sensor->thermal->chip; + int retval; + + retval = tsadc->get_temp(sensor->id, thermal->regs, out_temp); + dev_dbg(&thermal->pdev->dev, "sensor %d - temp: %ld, retval: %d\n", + sensor->id, *out_temp, retval); + + return retval; +} + +static const struct thermal_zone_of_device_ops rockchip_of_thermal_ops = { + .get_temp = rockchip_thermal_get_temp, +}; + +static int rockchip_configure_from_dt(struct device *dev, + struct device_node *np, + struct rockchip_thermal_data *thermal) +{ + u32 shut_temp, tshut_mode, tshut_polarity; + + if (of_property_read_u32(np, "rockchip,hw-tshut-temp", &shut_temp)) { + dev_warn(dev, + "Missing tshut temp property, using default %ld\n", + thermal->chip->tshut_temp); + thermal->tshut_temp = thermal->chip->tshut_temp; + } else { + thermal->tshut_temp = shut_temp; + } + + if (thermal->tshut_temp > INT_MAX) { + dev_err(dev, "Invalid tshut temperature specified: %ld\n", + thermal->tshut_temp); + return -ERANGE; + } + + if (of_property_read_u32(np, "rockchip,hw-tshut-mode", &tshut_mode)) { + dev_warn(dev, + "Missing tshut mode property, using default (%s)\n", + thermal->chip->tshut_mode == TSHUT_MODE_GPIO ? + "gpio" : "cru"); + thermal->tshut_mode = thermal->chip->tshut_mode; + } else { + thermal->tshut_mode = tshut_mode; + } + + if (thermal->tshut_mode > 1) { + dev_err(dev, "Invalid tshut mode specified: %d\n", + thermal->tshut_mode); + return -EINVAL; + } + + if (of_property_read_u32(np, "rockchip,hw-tshut-polarity", + &tshut_polarity)) { + dev_warn(dev, + "Missing tshut-polarity property, using default (%s)\n", + thermal->chip->tshut_polarity == TSHUT_LOW_ACTIVE ? + "low" : "high"); + thermal->tshut_polarity = thermal->chip->tshut_polarity; + } else { + thermal->tshut_polarity = tshut_polarity; + } + + if (thermal->tshut_polarity > 1) { + dev_err(dev, "Invalid tshut-polarity specified: %d\n", + thermal->tshut_polarity); + return -EINVAL; + } + + return 0; +} + +static int +rockchip_thermal_register_sensor(struct platform_device *pdev, + struct rockchip_thermal_data *thermal, + struct rockchip_thermal_sensor *sensor, + enum sensor_id id) +{ + const struct rockchip_tsadc_chip *tsadc = thermal->chip; + int error; + + tsadc->set_tshut_mode(id, thermal->regs, thermal->tshut_mode); + tsadc->set_tshut_temp(id, thermal->regs, thermal->tshut_temp); + + sensor->thermal = thermal; + sensor->id = id; + sensor->tzd = thermal_zone_of_sensor_register(&pdev->dev, id, sensor, + &rockchip_of_thermal_ops); + if (IS_ERR(sensor->tzd)) { + error = PTR_ERR(sensor->tzd); + dev_err(&pdev->dev, "failed to register sensor %d: %d\n", + id, error); + return error; + } + + return 0; +} + +/* + * Reset TSADC Controller, reset all tsadc registers. + */ +static void rockchip_thermal_reset_controller(struct reset_control *reset) +{ + reset_control_assert(reset); + usleep_range(10, 20); + reset_control_deassert(reset); +} + +static int rockchip_thermal_probe(struct platform_device *pdev) +{ + struct device_node *np = pdev->dev.of_node; + struct rockchip_thermal_data *thermal; + const struct of_device_id *match; + struct resource *res; + int irq; + int i; + int error; + + match = of_match_node(of_rockchip_thermal_match, np); + if (!match) + return -ENXIO; + + irq = platform_get_irq(pdev, 0); + if (irq < 0) { + dev_err(&pdev->dev, "no irq resource?\n"); + return -EINVAL; + } + + thermal = devm_kzalloc(&pdev->dev, sizeof(struct rockchip_thermal_data), + GFP_KERNEL); + if (!thermal) + return -ENOMEM; + + thermal->pdev = pdev; + + thermal->chip = (const struct rockchip_tsadc_chip *)match->data; + if (!thermal->chip) + return -EINVAL; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + thermal->regs = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(thermal->regs)) + return PTR_ERR(thermal->regs); + + thermal->reset = devm_reset_control_get(&pdev->dev, "tsadc-apb"); + if (IS_ERR(thermal->reset)) { + error = PTR_ERR(thermal->reset); + dev_err(&pdev->dev, "failed to get tsadc reset: %d\n", error); + return error; + } + + thermal->clk = devm_clk_get(&pdev->dev, "tsadc"); + if (IS_ERR(thermal->clk)) { + error = PTR_ERR(thermal->clk); + dev_err(&pdev->dev, "failed to get tsadc clock: %d\n", error); + return error; + } + + thermal->pclk = devm_clk_get(&pdev->dev, "apb_pclk"); + if (IS_ERR(thermal->pclk)) { + error = PTR_ERR(thermal->clk); + dev_err(&pdev->dev, "failed to get apb_pclk clock: %d\n", + error); + return error; + } + + error = clk_prepare_enable(thermal->clk); + if (error) { + dev_err(&pdev->dev, "failed to enable converter clock: %d\n", + error); + return error; + } + + error = clk_prepare_enable(thermal->pclk); + if (error) { + dev_err(&pdev->dev, "failed to enable pclk: %d\n", error); + goto err_disable_clk; + } + + rockchip_thermal_reset_controller(thermal->reset); + + error = rockchip_configure_from_dt(&pdev->dev, np, thermal); + if (error) { + dev_err(&pdev->dev, "failed to parse device tree data: %d\n", + error); + goto err_disable_pclk; + } + + thermal->chip->initialize(thermal->regs, thermal->tshut_polarity); + + error = rockchip_thermal_register_sensor(pdev, thermal, + &thermal->sensors[0], + SENSOR_CPU); + if (error) { + dev_err(&pdev->dev, + "failed to register CPU thermal sensor: %d\n", error); + goto err_disable_pclk; + } + + error = rockchip_thermal_register_sensor(pdev, thermal, + &thermal->sensors[1], + SENSOR_GPU); + if (error) { + dev_err(&pdev->dev, + "failed to register GPU thermal sensor: %d\n", error); + goto err_unregister_cpu_sensor; + } + + error = devm_request_threaded_irq(&pdev->dev, irq, NULL, + &rockchip_thermal_alarm_irq_thread, + IRQF_ONESHOT, + "rockchip_thermal", thermal); + if (error) { + dev_err(&pdev->dev, + "failed to request tsadc irq: %d\n", error); + goto err_unregister_gpu_sensor; + } + + thermal->chip->control(thermal->regs, true); + + for (i = 0; i < ARRAY_SIZE(thermal->sensors); i++) + rockchip_thermal_toggle_sensor(&thermal->sensors[i], true); + + platform_set_drvdata(pdev, thermal); + + return 0; + +err_unregister_gpu_sensor: + thermal_zone_of_sensor_unregister(&pdev->dev, thermal->sensors[1].tzd); +err_unregister_cpu_sensor: + thermal_zone_of_sensor_unregister(&pdev->dev, thermal->sensors[0].tzd); +err_disable_pclk: + clk_disable_unprepare(thermal->pclk); +err_disable_clk: + clk_disable_unprepare(thermal->clk); + + return error; +} + +static int rockchip_thermal_remove(struct platform_device *pdev) +{ + struct rockchip_thermal_data *thermal = platform_get_drvdata(pdev); + int i; + + for (i = 0; i < ARRAY_SIZE(thermal->sensors); i++) { + struct rockchip_thermal_sensor *sensor = &thermal->sensors[i]; + + rockchip_thermal_toggle_sensor(sensor, false); + thermal_zone_of_sensor_unregister(&pdev->dev, sensor->tzd); + } + + thermal->chip->control(thermal->regs, false); + + clk_disable_unprepare(thermal->pclk); + clk_disable_unprepare(thermal->clk); + + return 0; +} + +static int __maybe_unused rockchip_thermal_suspend(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + struct rockchip_thermal_data *thermal = platform_get_drvdata(pdev); + int i; + + for (i = 0; i < ARRAY_SIZE(thermal->sensors); i++) + rockchip_thermal_toggle_sensor(&thermal->sensors[i], false); + + thermal->chip->control(thermal->regs, false); + + clk_disable(thermal->pclk); + clk_disable(thermal->clk); + + return 0; +} + +static int __maybe_unused rockchip_thermal_resume(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + struct rockchip_thermal_data *thermal = platform_get_drvdata(pdev); + int i; + int error; + + error = clk_enable(thermal->clk); + if (error) + return error; + + error = clk_enable(thermal->pclk); + if (error) + return error; + + rockchip_thermal_reset_controller(thermal->reset); + + thermal->chip->initialize(thermal->regs, thermal->tshut_polarity); + + for (i = 0; i < ARRAY_SIZE(thermal->sensors); i++) { + enum sensor_id id = thermal->sensors[i].id; + + thermal->chip->set_tshut_mode(id, thermal->regs, + thermal->tshut_mode); + thermal->chip->set_tshut_temp(id, thermal->regs, + thermal->tshut_temp); + } + + thermal->chip->control(thermal->regs, true); + + for (i = 0; i < ARRAY_SIZE(thermal->sensors); i++) + rockchip_thermal_toggle_sensor(&thermal->sensors[i], true); + + return 0; +} + +static SIMPLE_DEV_PM_OPS(rockchip_thermal_pm_ops, + rockchip_thermal_suspend, rockchip_thermal_resume); + +static struct platform_driver rockchip_thermal_driver = { + .driver = { + .name = "rockchip-thermal", + .owner = THIS_MODULE, + .pm = &rockchip_thermal_pm_ops, + .of_match_table = of_rockchip_thermal_match, + }, + .probe = rockchip_thermal_probe, + .remove = rockchip_thermal_remove, +}; + +module_platform_driver(rockchip_thermal_driver); + +MODULE_DESCRIPTION("ROCKCHIP THERMAL Driver"); +MODULE_AUTHOR("Rockchip, Inc."); +MODULE_LICENSE("GPL v2"); +MODULE_ALIAS("platform:rockchip-thermal"); diff --git a/drivers/thermal/samsung/exynos_thermal_common.h b/drivers/thermal/samsung/exynos_thermal_common.h index 158f5aa8dc5..cd4471925cd 100644 --- a/drivers/thermal/samsung/exynos_thermal_common.h +++ b/drivers/thermal/samsung/exynos_thermal_common.h @@ -27,7 +27,6 @@ #define SENSOR_NAME_LEN 16 #define MAX_TRIP_COUNT 8 #define MAX_COOLING_DEVICE 4 -#define MAX_TRIMINFO_CTRL_REG 2 #define ACTIVE_INTERVAL 500 #define IDLE_INTERVAL 10000 diff --git a/drivers/thermal/samsung/exynos_tmu.c b/drivers/thermal/samsung/exynos_tmu.c index 1e7d0736e86..d44d91d681d 100644 --- a/drivers/thermal/samsung/exynos_tmu.c +++ b/drivers/thermal/samsung/exynos_tmu.c @@ -33,7 +33,87 @@ #include "exynos_thermal_common.h" #include "exynos_tmu.h" -#include "exynos_tmu_data.h" + +/* Exynos generic registers */ +#define EXYNOS_TMU_REG_TRIMINFO 0x0 +#define EXYNOS_TMU_REG_CONTROL 0x20 +#define EXYNOS_TMU_REG_STATUS 0x28 +#define EXYNOS_TMU_REG_CURRENT_TEMP 0x40 +#define EXYNOS_TMU_REG_INTEN 0x70 +#define EXYNOS_TMU_REG_INTSTAT 0x74 +#define EXYNOS_TMU_REG_INTCLEAR 0x78 + +#define EXYNOS_TMU_TEMP_MASK 0xff +#define EXYNOS_TMU_REF_VOLTAGE_SHIFT 24 +#define EXYNOS_TMU_REF_VOLTAGE_MASK 0x1f +#define EXYNOS_TMU_BUF_SLOPE_SEL_MASK 0xf +#define EXYNOS_TMU_BUF_SLOPE_SEL_SHIFT 8 +#define EXYNOS_TMU_CORE_EN_SHIFT 0 + +/* Exynos3250 specific registers */ +#define EXYNOS_TMU_TRIMINFO_CON1 0x10 + +/* Exynos4210 specific registers */ +#define EXYNOS4210_TMU_REG_THRESHOLD_TEMP 0x44 +#define EXYNOS4210_TMU_REG_TRIG_LEVEL0 0x50 + +/* Exynos5250, Exynos4412, Exynos3250 specific registers */ +#define EXYNOS_TMU_TRIMINFO_CON2 0x14 +#define EXYNOS_THD_TEMP_RISE 0x50 +#define EXYNOS_THD_TEMP_FALL 0x54 +#define EXYNOS_EMUL_CON 0x80 + +#define EXYNOS_TRIMINFO_RELOAD_ENABLE 1 +#define EXYNOS_TRIMINFO_25_SHIFT 0 +#define EXYNOS_TRIMINFO_85_SHIFT 8 +#define EXYNOS_TMU_TRIP_MODE_SHIFT 13 +#define EXYNOS_TMU_TRIP_MODE_MASK 0x7 +#define EXYNOS_TMU_THERM_TRIP_EN_SHIFT 12 + +#define EXYNOS_TMU_INTEN_RISE0_SHIFT 0 +#define EXYNOS_TMU_INTEN_RISE1_SHIFT 4 +#define EXYNOS_TMU_INTEN_RISE2_SHIFT 8 +#define EXYNOS_TMU_INTEN_RISE3_SHIFT 12 +#define EXYNOS_TMU_INTEN_FALL0_SHIFT 16 + +#define EXYNOS_EMUL_TIME 0x57F0 +#define EXYNOS_EMUL_TIME_MASK 0xffff +#define EXYNOS_EMUL_TIME_SHIFT 16 +#define EXYNOS_EMUL_DATA_SHIFT 8 +#define EXYNOS_EMUL_DATA_MASK 0xFF +#define EXYNOS_EMUL_ENABLE 0x1 + +/* Exynos5260 specific */ +#define EXYNOS5260_TMU_REG_INTEN 0xC0 +#define EXYNOS5260_TMU_REG_INTSTAT 0xC4 +#define EXYNOS5260_TMU_REG_INTCLEAR 0xC8 +#define EXYNOS5260_EMUL_CON 0x100 + +/* Exynos4412 specific */ +#define EXYNOS4412_MUX_ADDR_VALUE 6 +#define EXYNOS4412_MUX_ADDR_SHIFT 20 + +/*exynos5440 specific registers*/ +#define EXYNOS5440_TMU_S0_7_TRIM 0x000 +#define EXYNOS5440_TMU_S0_7_CTRL 0x020 +#define EXYNOS5440_TMU_S0_7_DEBUG 0x040 +#define EXYNOS5440_TMU_S0_7_TEMP 0x0f0 +#define EXYNOS5440_TMU_S0_7_TH0 0x110 +#define EXYNOS5440_TMU_S0_7_TH1 0x130 +#define EXYNOS5440_TMU_S0_7_TH2 0x150 +#define EXYNOS5440_TMU_S0_7_IRQEN 0x210 +#define EXYNOS5440_TMU_S0_7_IRQ 0x230 +/* exynos5440 common registers */ +#define EXYNOS5440_TMU_IRQ_STATUS 0x000 +#define EXYNOS5440_TMU_PMIN 0x004 + +#define EXYNOS5440_TMU_INTEN_RISE0_SHIFT 0 +#define EXYNOS5440_TMU_INTEN_RISE1_SHIFT 1 +#define EXYNOS5440_TMU_INTEN_RISE2_SHIFT 2 +#define EXYNOS5440_TMU_INTEN_RISE3_SHIFT 3 +#define EXYNOS5440_TMU_INTEN_FALL0_SHIFT 4 +#define EXYNOS5440_TMU_TH_RISE4_SHIFT 24 +#define EXYNOS5440_EFUSE_SWAP_OFFSET 8 /** * struct exynos_tmu_data : A structure to hold the private data of the TMU @@ -52,6 +132,11 @@ * @temp_error2: fused value of the second point trim. * @regulator: pointer to the TMU regulator structure. * @reg_conf: pointer to structure to register with core thermal. + * @tmu_initialize: SoC specific TMU initialization method + * @tmu_control: SoC specific TMU control method + * @tmu_read: SoC specific TMU temperature read method + * @tmu_set_emulation: SoC specific TMU emulation setting method + * @tmu_clear_irqs: SoC specific TMU interrupts clearing method */ struct exynos_tmu_data { int id; @@ -66,6 +151,12 @@ struct exynos_tmu_data { u8 temp_error1, temp_error2; struct regulator *regulator; struct thermal_sensor_conf *reg_conf; + int (*tmu_initialize)(struct platform_device *pdev); + void (*tmu_control)(struct platform_device *pdev, bool on); + int (*tmu_read)(struct exynos_tmu_data *data); + void (*tmu_set_emulation)(struct exynos_tmu_data *data, + unsigned long temp); + void (*tmu_clear_irqs)(struct exynos_tmu_data *data); }; /* @@ -122,83 +213,10 @@ static int code_to_temp(struct exynos_tmu_data *data, u8 temp_code) return temp; } -static void exynos_tmu_clear_irqs(struct exynos_tmu_data *data) -{ - const struct exynos_tmu_registers *reg = data->pdata->registers; - unsigned int val_irq; - - val_irq = readl(data->base + reg->tmu_intstat); - /* - * Clear the interrupts. Please note that the documentation for - * Exynos3250, Exynos4412, Exynos5250 and Exynos5260 incorrectly - * states that INTCLEAR register has a different placing of bits - * responsible for FALL IRQs than INTSTAT register. Exynos5420 - * and Exynos5440 documentation is correct (Exynos4210 doesn't - * support FALL IRQs at all). - */ - writel(val_irq, data->base + reg->tmu_intclear); -} - -static int exynos_tmu_initialize(struct platform_device *pdev) +static void sanitize_temp_error(struct exynos_tmu_data *data, u32 trim_info) { - struct exynos_tmu_data *data = platform_get_drvdata(pdev); struct exynos_tmu_platform_data *pdata = data->pdata; - const struct exynos_tmu_registers *reg = pdata->registers; - unsigned int status, trim_info = 0, con, ctrl; - unsigned int rising_threshold = 0, falling_threshold = 0; - int ret = 0, threshold_code, i; - - mutex_lock(&data->lock); - clk_enable(data->clk); - if (!IS_ERR(data->clk_sec)) - clk_enable(data->clk_sec); - if (TMU_SUPPORTS(pdata, READY_STATUS)) { - status = readb(data->base + reg->tmu_status); - if (!status) { - ret = -EBUSY; - goto out; - } - } - - if (TMU_SUPPORTS(pdata, TRIM_RELOAD)) { - for (i = 0; i < reg->triminfo_ctrl_count; i++) { - if (pdata->triminfo_reload[i]) { - ctrl = readl(data->base + - reg->triminfo_ctrl[i]); - ctrl |= pdata->triminfo_reload[i]; - writel(ctrl, data->base + - reg->triminfo_ctrl[i]); - } - } - } - - /* Save trimming info in order to perform calibration */ - if (data->soc == SOC_ARCH_EXYNOS5440) { - /* - * For exynos5440 soc triminfo value is swapped between TMU0 and - * TMU2, so the below logic is needed. - */ - switch (data->id) { - case 0: - trim_info = readl(data->base + - EXYNOS5440_EFUSE_SWAP_OFFSET + reg->triminfo_data); - break; - case 1: - trim_info = readl(data->base + reg->triminfo_data); - break; - case 2: - trim_info = readl(data->base - - EXYNOS5440_EFUSE_SWAP_OFFSET + reg->triminfo_data); - } - } else { - /* On exynos5420 the triminfo register is in the shared space */ - if (data->soc == SOC_ARCH_EXYNOS5420_TRIMINFO) - trim_info = readl(data->base_second + - reg->triminfo_data); - else - trim_info = readl(data->base + reg->triminfo_data); - } data->temp_error1 = trim_info & EXYNOS_TMU_TEMP_MASK; data->temp_error2 = ((trim_info >> EXYNOS_TRIMINFO_85_SHIFT) & EXYNOS_TMU_TEMP_MASK); @@ -212,69 +230,37 @@ static int exynos_tmu_initialize(struct platform_device *pdev) data->temp_error2 = (pdata->efuse_value >> EXYNOS_TRIMINFO_85_SHIFT) & EXYNOS_TMU_TEMP_MASK; +} - rising_threshold = readl(data->base + reg->threshold_th0); +static u32 get_th_reg(struct exynos_tmu_data *data, u32 threshold, bool falling) +{ + struct exynos_tmu_platform_data *pdata = data->pdata; + int i; - if (data->soc == SOC_ARCH_EXYNOS4210) { - /* Write temperature code for threshold */ - threshold_code = temp_to_code(data, pdata->threshold); - writeb(threshold_code, - data->base + reg->threshold_temp); - for (i = 0; i < pdata->non_hw_trigger_levels; i++) - writeb(pdata->trigger_levels[i], data->base + - reg->threshold_th0 + i * sizeof(reg->threshold_th0)); + for (i = 0; i < pdata->non_hw_trigger_levels; i++) { + u8 temp = pdata->trigger_levels[i]; - exynos_tmu_clear_irqs(data); - } else { - /* Write temperature code for rising and falling threshold */ - for (i = 0; i < pdata->non_hw_trigger_levels; i++) { - threshold_code = temp_to_code(data, - pdata->trigger_levels[i]); - rising_threshold &= ~(0xff << 8 * i); - rising_threshold |= threshold_code << 8 * i; - if (pdata->threshold_falling) { - threshold_code = temp_to_code(data, - pdata->trigger_levels[i] - - pdata->threshold_falling); - falling_threshold |= threshold_code << 8 * i; - } - } + if (falling) + temp -= pdata->threshold_falling; + else + threshold &= ~(0xff << 8 * i); - writel(rising_threshold, - data->base + reg->threshold_th0); - writel(falling_threshold, - data->base + reg->threshold_th1); - - exynos_tmu_clear_irqs(data); - - /* if last threshold limit is also present */ - i = pdata->max_trigger_level - 1; - if (pdata->trigger_levels[i] && - (pdata->trigger_type[i] == HW_TRIP)) { - threshold_code = temp_to_code(data, - pdata->trigger_levels[i]); - if (i == EXYNOS_MAX_TRIGGER_PER_REG - 1) { - /* 1-4 level to be assigned in th0 reg */ - rising_threshold &= ~(0xff << 8 * i); - rising_threshold |= threshold_code << 8 * i; - writel(rising_threshold, - data->base + reg->threshold_th0); - } else if (i == EXYNOS_MAX_TRIGGER_PER_REG) { - /* 5th level to be assigned in th2 reg */ - rising_threshold = - threshold_code << reg->threshold_th3_l0_shift; - writel(rising_threshold, - data->base + reg->threshold_th2); - } - con = readl(data->base + reg->tmu_ctrl); - con |= (1 << reg->therm_trip_en_shift); - writel(con, data->base + reg->tmu_ctrl); - } + threshold |= temp_to_code(data, temp) << 8 * i; } - /*Clear the PMIN in the common TMU register*/ - if (reg->tmu_pmin && !data->id) - writel(0, data->base_second + reg->tmu_pmin); -out: + + return threshold; +} + +static int exynos_tmu_initialize(struct platform_device *pdev) +{ + struct exynos_tmu_data *data = platform_get_drvdata(pdev); + int ret; + + mutex_lock(&data->lock); + clk_enable(data->clk); + if (!IS_ERR(data->clk_sec)) + clk_enable(data->clk_sec); + ret = data->tmu_initialize(pdev); clk_disable(data->clk); mutex_unlock(&data->lock); if (!IS_ERR(data->clk_sec)) @@ -283,20 +269,13 @@ out: return ret; } -static void exynos_tmu_control(struct platform_device *pdev, bool on) +static u32 get_con_reg(struct exynos_tmu_data *data, u32 con) { - struct exynos_tmu_data *data = platform_get_drvdata(pdev); struct exynos_tmu_platform_data *pdata = data->pdata; - const struct exynos_tmu_registers *reg = pdata->registers; - unsigned int con, interrupt_en; - mutex_lock(&data->lock); - clk_enable(data->clk); - - con = readl(data->base + reg->tmu_ctrl); - - if (pdata->test_mux) - con |= (pdata->test_mux << reg->test_mux_addr_shift); + if (data->soc == SOC_ARCH_EXYNOS4412 || + data->soc == SOC_ARCH_EXYNOS3250) + con |= (EXYNOS4412_MUX_ADDR_VALUE << EXYNOS4412_MUX_ADDR_SHIFT); con &= ~(EXYNOS_TMU_REF_VOLTAGE_MASK << EXYNOS_TMU_REF_VOLTAGE_SHIFT); con |= pdata->reference_voltage << EXYNOS_TMU_REF_VOLTAGE_SHIFT; @@ -305,95 +284,287 @@ static void exynos_tmu_control(struct platform_device *pdev, bool on) con |= (pdata->gain << EXYNOS_TMU_BUF_SLOPE_SEL_SHIFT); if (pdata->noise_cancel_mode) { - con &= ~(reg->therm_trip_mode_mask << - reg->therm_trip_mode_shift); - con |= (pdata->noise_cancel_mode << reg->therm_trip_mode_shift); + con &= ~(EXYNOS_TMU_TRIP_MODE_MASK << EXYNOS_TMU_TRIP_MODE_SHIFT); + con |= (pdata->noise_cancel_mode << EXYNOS_TMU_TRIP_MODE_SHIFT); } - if (on) { - con |= (1 << EXYNOS_TMU_CORE_EN_SHIFT); - interrupt_en = - pdata->trigger_enable[3] << reg->inten_rise3_shift | - pdata->trigger_enable[2] << reg->inten_rise2_shift | - pdata->trigger_enable[1] << reg->inten_rise1_shift | - pdata->trigger_enable[0] << reg->inten_rise0_shift; - if (TMU_SUPPORTS(pdata, FALLING_TRIP)) - interrupt_en |= - interrupt_en << reg->inten_fall0_shift; - } else { - con &= ~(1 << EXYNOS_TMU_CORE_EN_SHIFT); - interrupt_en = 0; /* Disable all interrupts */ - } - writel(interrupt_en, data->base + reg->tmu_inten); - writel(con, data->base + reg->tmu_ctrl); + return con; +} + +static void exynos_tmu_control(struct platform_device *pdev, bool on) +{ + struct exynos_tmu_data *data = platform_get_drvdata(pdev); + mutex_lock(&data->lock); + clk_enable(data->clk); + data->tmu_control(pdev, on); clk_disable(data->clk); mutex_unlock(&data->lock); } -static int exynos_tmu_read(struct exynos_tmu_data *data) +static int exynos4210_tmu_initialize(struct platform_device *pdev) { + struct exynos_tmu_data *data = platform_get_drvdata(pdev); struct exynos_tmu_platform_data *pdata = data->pdata; - const struct exynos_tmu_registers *reg = pdata->registers; - u8 temp_code; - int temp; + unsigned int status; + int ret = 0, threshold_code, i; - mutex_lock(&data->lock); - clk_enable(data->clk); + status = readb(data->base + EXYNOS_TMU_REG_STATUS); + if (!status) { + ret = -EBUSY; + goto out; + } - temp_code = readb(data->base + reg->tmu_cur_temp); + sanitize_temp_error(data, readl(data->base + EXYNOS_TMU_REG_TRIMINFO)); - if (data->soc == SOC_ARCH_EXYNOS4210) - /* temp_code should range between 75 and 175 */ - if (temp_code < 75 || temp_code > 175) { - temp = -ENODATA; - goto out; + /* Write temperature code for threshold */ + threshold_code = temp_to_code(data, pdata->threshold); + writeb(threshold_code, data->base + EXYNOS4210_TMU_REG_THRESHOLD_TEMP); + + for (i = 0; i < pdata->non_hw_trigger_levels; i++) + writeb(pdata->trigger_levels[i], data->base + + EXYNOS4210_TMU_REG_TRIG_LEVEL0 + i * 4); + + data->tmu_clear_irqs(data); +out: + return ret; +} + +static int exynos4412_tmu_initialize(struct platform_device *pdev) +{ + struct exynos_tmu_data *data = platform_get_drvdata(pdev); + struct exynos_tmu_platform_data *pdata = data->pdata; + unsigned int status, trim_info, con, ctrl, rising_threshold; + int ret = 0, threshold_code, i; + + status = readb(data->base + EXYNOS_TMU_REG_STATUS); + if (!status) { + ret = -EBUSY; + goto out; + } + + if (data->soc == SOC_ARCH_EXYNOS3250 || + data->soc == SOC_ARCH_EXYNOS4412 || + data->soc == SOC_ARCH_EXYNOS5250) { + if (data->soc == SOC_ARCH_EXYNOS3250) { + ctrl = readl(data->base + EXYNOS_TMU_TRIMINFO_CON1); + ctrl |= EXYNOS_TRIMINFO_RELOAD_ENABLE; + writel(ctrl, data->base + EXYNOS_TMU_TRIMINFO_CON1); } + ctrl = readl(data->base + EXYNOS_TMU_TRIMINFO_CON2); + ctrl |= EXYNOS_TRIMINFO_RELOAD_ENABLE; + writel(ctrl, data->base + EXYNOS_TMU_TRIMINFO_CON2); + } - temp = code_to_temp(data, temp_code); + /* On exynos5420 the triminfo register is in the shared space */ + if (data->soc == SOC_ARCH_EXYNOS5420_TRIMINFO) + trim_info = readl(data->base_second + EXYNOS_TMU_REG_TRIMINFO); + else + trim_info = readl(data->base + EXYNOS_TMU_REG_TRIMINFO); + + sanitize_temp_error(data, trim_info); + + /* Write temperature code for rising and falling threshold */ + rising_threshold = readl(data->base + EXYNOS_THD_TEMP_RISE); + rising_threshold = get_th_reg(data, rising_threshold, false); + writel(rising_threshold, data->base + EXYNOS_THD_TEMP_RISE); + writel(get_th_reg(data, 0, true), data->base + EXYNOS_THD_TEMP_FALL); + + data->tmu_clear_irqs(data); + + /* if last threshold limit is also present */ + i = pdata->max_trigger_level - 1; + if (pdata->trigger_levels[i] && pdata->trigger_type[i] == HW_TRIP) { + threshold_code = temp_to_code(data, pdata->trigger_levels[i]); + /* 1-4 level to be assigned in th0 reg */ + rising_threshold &= ~(0xff << 8 * i); + rising_threshold |= threshold_code << 8 * i; + writel(rising_threshold, data->base + EXYNOS_THD_TEMP_RISE); + con = readl(data->base + EXYNOS_TMU_REG_CONTROL); + con |= (1 << EXYNOS_TMU_THERM_TRIP_EN_SHIFT); + writel(con, data->base + EXYNOS_TMU_REG_CONTROL); + } out: - clk_disable(data->clk); - mutex_unlock(&data->lock); + return ret; +} - return temp; +static int exynos5440_tmu_initialize(struct platform_device *pdev) +{ + struct exynos_tmu_data *data = platform_get_drvdata(pdev); + struct exynos_tmu_platform_data *pdata = data->pdata; + unsigned int trim_info = 0, con, rising_threshold; + int ret = 0, threshold_code, i; + + /* + * For exynos5440 soc triminfo value is swapped between TMU0 and + * TMU2, so the below logic is needed. + */ + switch (data->id) { + case 0: + trim_info = readl(data->base + EXYNOS5440_EFUSE_SWAP_OFFSET + + EXYNOS5440_TMU_S0_7_TRIM); + break; + case 1: + trim_info = readl(data->base + EXYNOS5440_TMU_S0_7_TRIM); + break; + case 2: + trim_info = readl(data->base - EXYNOS5440_EFUSE_SWAP_OFFSET + + EXYNOS5440_TMU_S0_7_TRIM); + } + sanitize_temp_error(data, trim_info); + + /* Write temperature code for rising and falling threshold */ + rising_threshold = readl(data->base + EXYNOS5440_TMU_S0_7_TH0); + rising_threshold = get_th_reg(data, rising_threshold, false); + writel(rising_threshold, data->base + EXYNOS5440_TMU_S0_7_TH0); + writel(0, data->base + EXYNOS5440_TMU_S0_7_TH1); + + data->tmu_clear_irqs(data); + + /* if last threshold limit is also present */ + i = pdata->max_trigger_level - 1; + if (pdata->trigger_levels[i] && pdata->trigger_type[i] == HW_TRIP) { + threshold_code = temp_to_code(data, pdata->trigger_levels[i]); + /* 5th level to be assigned in th2 reg */ + rising_threshold = + threshold_code << EXYNOS5440_TMU_TH_RISE4_SHIFT; + writel(rising_threshold, data->base + EXYNOS5440_TMU_S0_7_TH2); + con = readl(data->base + EXYNOS5440_TMU_S0_7_CTRL); + con |= (1 << EXYNOS_TMU_THERM_TRIP_EN_SHIFT); + writel(con, data->base + EXYNOS5440_TMU_S0_7_CTRL); + } + /* Clear the PMIN in the common TMU register */ + if (!data->id) + writel(0, data->base_second + EXYNOS5440_TMU_PMIN); + return ret; } -#ifdef CONFIG_THERMAL_EMULATION -static int exynos_tmu_set_emulation(void *drv_data, unsigned long temp) +static void exynos4210_tmu_control(struct platform_device *pdev, bool on) { - struct exynos_tmu_data *data = drv_data; + struct exynos_tmu_data *data = platform_get_drvdata(pdev); struct exynos_tmu_platform_data *pdata = data->pdata; - const struct exynos_tmu_registers *reg = pdata->registers; - unsigned int val; - int ret = -EINVAL; + unsigned int con, interrupt_en; - if (!TMU_SUPPORTS(pdata, EMULATION)) - goto out; + con = get_con_reg(data, readl(data->base + EXYNOS_TMU_REG_CONTROL)); - if (temp && temp < MCELSIUS) - goto out; + if (on) { + con |= (1 << EXYNOS_TMU_CORE_EN_SHIFT); + interrupt_en = + pdata->trigger_enable[3] << EXYNOS_TMU_INTEN_RISE3_SHIFT | + pdata->trigger_enable[2] << EXYNOS_TMU_INTEN_RISE2_SHIFT | + pdata->trigger_enable[1] << EXYNOS_TMU_INTEN_RISE1_SHIFT | + pdata->trigger_enable[0] << EXYNOS_TMU_INTEN_RISE0_SHIFT; + if (data->soc != SOC_ARCH_EXYNOS4210) + interrupt_en |= + interrupt_en << EXYNOS_TMU_INTEN_FALL0_SHIFT; + } else { + con &= ~(1 << EXYNOS_TMU_CORE_EN_SHIFT); + interrupt_en = 0; /* Disable all interrupts */ + } + writel(interrupt_en, data->base + EXYNOS_TMU_REG_INTEN); + writel(con, data->base + EXYNOS_TMU_REG_CONTROL); +} + +static void exynos5440_tmu_control(struct platform_device *pdev, bool on) +{ + struct exynos_tmu_data *data = platform_get_drvdata(pdev); + struct exynos_tmu_platform_data *pdata = data->pdata; + unsigned int con, interrupt_en; + + con = get_con_reg(data, readl(data->base + EXYNOS5440_TMU_S0_7_CTRL)); + + if (on) { + con |= (1 << EXYNOS_TMU_CORE_EN_SHIFT); + interrupt_en = + pdata->trigger_enable[3] << EXYNOS5440_TMU_INTEN_RISE3_SHIFT | + pdata->trigger_enable[2] << EXYNOS5440_TMU_INTEN_RISE2_SHIFT | + pdata->trigger_enable[1] << EXYNOS5440_TMU_INTEN_RISE1_SHIFT | + pdata->trigger_enable[0] << EXYNOS5440_TMU_INTEN_RISE0_SHIFT; + interrupt_en |= interrupt_en << EXYNOS5440_TMU_INTEN_FALL0_SHIFT; + } else { + con &= ~(1 << EXYNOS_TMU_CORE_EN_SHIFT); + interrupt_en = 0; /* Disable all interrupts */ + } + writel(interrupt_en, data->base + EXYNOS5440_TMU_S0_7_IRQEN); + writel(con, data->base + EXYNOS5440_TMU_S0_7_CTRL); +} + +static int exynos_tmu_read(struct exynos_tmu_data *data) +{ + int ret; mutex_lock(&data->lock); clk_enable(data->clk); + ret = data->tmu_read(data); + if (ret >= 0) + ret = code_to_temp(data, ret); + clk_disable(data->clk); + mutex_unlock(&data->lock); - val = readl(data->base + reg->emul_con); + return ret; +} +#ifdef CONFIG_THERMAL_EMULATION +static u32 get_emul_con_reg(struct exynos_tmu_data *data, unsigned int val, + unsigned long temp) +{ if (temp) { temp /= MCELSIUS; - if (TMU_SUPPORTS(pdata, EMUL_TIME)) { - val &= ~(EXYNOS_EMUL_TIME_MASK << reg->emul_time_shift); - val |= (EXYNOS_EMUL_TIME << reg->emul_time_shift); + if (data->soc != SOC_ARCH_EXYNOS5440) { + val &= ~(EXYNOS_EMUL_TIME_MASK << EXYNOS_EMUL_TIME_SHIFT); + val |= (EXYNOS_EMUL_TIME << EXYNOS_EMUL_TIME_SHIFT); } - val &= ~(EXYNOS_EMUL_DATA_MASK << reg->emul_temp_shift); - val |= (temp_to_code(data, temp) << reg->emul_temp_shift) | + val &= ~(EXYNOS_EMUL_DATA_MASK << EXYNOS_EMUL_DATA_SHIFT); + val |= (temp_to_code(data, temp) << EXYNOS_EMUL_DATA_SHIFT) | EXYNOS_EMUL_ENABLE; } else { val &= ~EXYNOS_EMUL_ENABLE; } - writel(val, data->base + reg->emul_con); + return val; +} + +static void exynos4412_tmu_set_emulation(struct exynos_tmu_data *data, + unsigned long temp) +{ + unsigned int val; + u32 emul_con; + + if (data->soc == SOC_ARCH_EXYNOS5260) + emul_con = EXYNOS5260_EMUL_CON; + else + emul_con = EXYNOS_EMUL_CON; + + val = readl(data->base + emul_con); + val = get_emul_con_reg(data, val, temp); + writel(val, data->base + emul_con); +} + +static void exynos5440_tmu_set_emulation(struct exynos_tmu_data *data, + unsigned long temp) +{ + unsigned int val; + + val = readl(data->base + EXYNOS5440_TMU_S0_7_DEBUG); + val = get_emul_con_reg(data, val, temp); + writel(val, data->base + EXYNOS5440_TMU_S0_7_DEBUG); +} + +static int exynos_tmu_set_emulation(void *drv_data, unsigned long temp) +{ + struct exynos_tmu_data *data = drv_data; + int ret = -EINVAL; + + if (data->soc == SOC_ARCH_EXYNOS4210) + goto out; + if (temp && temp < MCELSIUS) + goto out; + + mutex_lock(&data->lock); + clk_enable(data->clk); + data->tmu_set_emulation(data, temp); clk_disable(data->clk); mutex_unlock(&data->lock); return 0; @@ -401,23 +572,41 @@ out: return ret; } #else +#define exynos4412_tmu_set_emulation NULL +#define exynos5440_tmu_set_emulation NULL static int exynos_tmu_set_emulation(void *drv_data, unsigned long temp) { return -EINVAL; } #endif/*CONFIG_THERMAL_EMULATION*/ +static int exynos4210_tmu_read(struct exynos_tmu_data *data) +{ + int ret = readb(data->base + EXYNOS_TMU_REG_CURRENT_TEMP); + + /* "temp_code" should range between 75 and 175 */ + return (ret < 75 || ret > 175) ? -ENODATA : ret; +} + +static int exynos4412_tmu_read(struct exynos_tmu_data *data) +{ + return readb(data->base + EXYNOS_TMU_REG_CURRENT_TEMP); +} + +static int exynos5440_tmu_read(struct exynos_tmu_data *data) +{ + return readb(data->base + EXYNOS5440_TMU_S0_7_TEMP); +} + static void exynos_tmu_work(struct work_struct *work) { struct exynos_tmu_data *data = container_of(work, struct exynos_tmu_data, irq_work); - struct exynos_tmu_platform_data *pdata = data->pdata; - const struct exynos_tmu_registers *reg = pdata->registers; unsigned int val_type; if (!IS_ERR(data->clk_sec)) clk_enable(data->clk_sec); /* Find which sensor generated this interrupt */ - if (reg->tmu_irqstatus) { - val_type = readl(data->base_second + reg->tmu_irqstatus); + if (data->soc == SOC_ARCH_EXYNOS5440) { + val_type = readl(data->base_second + EXYNOS5440_TMU_IRQ_STATUS); if (!((val_type >> data->id) & 0x1)) goto out; } @@ -429,7 +618,7 @@ static void exynos_tmu_work(struct work_struct *work) clk_enable(data->clk); /* TODO: take action based on particular interrupt */ - exynos_tmu_clear_irqs(data); + data->tmu_clear_irqs(data); clk_disable(data->clk); mutex_unlock(&data->lock); @@ -437,6 +626,40 @@ out: enable_irq(data->irq); } +static void exynos4210_tmu_clear_irqs(struct exynos_tmu_data *data) +{ + unsigned int val_irq; + u32 tmu_intstat, tmu_intclear; + + if (data->soc == SOC_ARCH_EXYNOS5260) { + tmu_intstat = EXYNOS5260_TMU_REG_INTSTAT; + tmu_intclear = EXYNOS5260_TMU_REG_INTCLEAR; + } else { + tmu_intstat = EXYNOS_TMU_REG_INTSTAT; + tmu_intclear = EXYNOS_TMU_REG_INTCLEAR; + } + + val_irq = readl(data->base + tmu_intstat); + /* + * Clear the interrupts. Please note that the documentation for + * Exynos3250, Exynos4412, Exynos5250 and Exynos5260 incorrectly + * states that INTCLEAR register has a different placing of bits + * responsible for FALL IRQs than INTSTAT register. Exynos5420 + * and Exynos5440 documentation is correct (Exynos4210 doesn't + * support FALL IRQs at all). + */ + writel(val_irq, data->base + tmu_intclear); +} + +static void exynos5440_tmu_clear_irqs(struct exynos_tmu_data *data) +{ + unsigned int val_irq; + + val_irq = readl(data->base + EXYNOS5440_TMU_S0_7_IRQ); + /* clear the interrupts */ + writel(val_irq, data->base + EXYNOS5440_TMU_S0_7_IRQ); +} + static irqreturn_t exynos_tmu_irq(int irq, void *id) { struct exynos_tmu_data *data = id; @@ -450,35 +673,35 @@ static irqreturn_t exynos_tmu_irq(int irq, void *id) static const struct of_device_id exynos_tmu_match[] = { { .compatible = "samsung,exynos3250-tmu", - .data = (void *)EXYNOS3250_TMU_DRV_DATA, + .data = &exynos3250_default_tmu_data, }, { .compatible = "samsung,exynos4210-tmu", - .data = (void *)EXYNOS4210_TMU_DRV_DATA, + .data = &exynos4210_default_tmu_data, }, { .compatible = "samsung,exynos4412-tmu", - .data = (void *)EXYNOS4412_TMU_DRV_DATA, + .data = &exynos4412_default_tmu_data, }, { .compatible = "samsung,exynos5250-tmu", - .data = (void *)EXYNOS5250_TMU_DRV_DATA, + .data = &exynos5250_default_tmu_data, }, { .compatible = "samsung,exynos5260-tmu", - .data = (void *)EXYNOS5260_TMU_DRV_DATA, + .data = &exynos5260_default_tmu_data, }, { .compatible = "samsung,exynos5420-tmu", - .data = (void *)EXYNOS5420_TMU_DRV_DATA, + .data = &exynos5420_default_tmu_data, }, { .compatible = "samsung,exynos5420-tmu-ext-triminfo", - .data = (void *)EXYNOS5420_TMU_DRV_DATA, + .data = &exynos5420_default_tmu_data, }, { .compatible = "samsung,exynos5440-tmu", - .data = (void *)EXYNOS5440_TMU_DRV_DATA, + .data = &exynos5440_default_tmu_data, }, {}, }; @@ -553,12 +776,47 @@ static int exynos_map_dt_data(struct platform_device *pdev) dev_err(&pdev->dev, "No platform init data supplied.\n"); return -ENODEV; } + data->pdata = pdata; + data->soc = pdata->type; + + switch (data->soc) { + case SOC_ARCH_EXYNOS4210: + data->tmu_initialize = exynos4210_tmu_initialize; + data->tmu_control = exynos4210_tmu_control; + data->tmu_read = exynos4210_tmu_read; + data->tmu_clear_irqs = exynos4210_tmu_clear_irqs; + break; + case SOC_ARCH_EXYNOS3250: + case SOC_ARCH_EXYNOS4412: + case SOC_ARCH_EXYNOS5250: + case SOC_ARCH_EXYNOS5260: + case SOC_ARCH_EXYNOS5420: + case SOC_ARCH_EXYNOS5420_TRIMINFO: + data->tmu_initialize = exynos4412_tmu_initialize; + data->tmu_control = exynos4210_tmu_control; + data->tmu_read = exynos4412_tmu_read; + data->tmu_set_emulation = exynos4412_tmu_set_emulation; + data->tmu_clear_irqs = exynos4210_tmu_clear_irqs; + break; + case SOC_ARCH_EXYNOS5440: + data->tmu_initialize = exynos5440_tmu_initialize; + data->tmu_control = exynos5440_tmu_control; + data->tmu_read = exynos5440_tmu_read; + data->tmu_set_emulation = exynos5440_tmu_set_emulation; + data->tmu_clear_irqs = exynos5440_tmu_clear_irqs; + break; + default: + dev_err(&pdev->dev, "Platform not supported\n"); + return -EINVAL; + } + /* * Check if the TMU shares some registers and then try to map the * memory of common registers. */ - if (!TMU_SUPPORTS(pdata, ADDRESS_MULTIPLE)) + if (data->soc != SOC_ARCH_EXYNOS5420_TRIMINFO && + data->soc != SOC_ARCH_EXYNOS5440) return 0; if (of_address_to_resource(pdev->dev.of_node, 1, &res)) { @@ -625,20 +883,6 @@ static int exynos_tmu_probe(struct platform_device *pdev) goto err_clk_sec; } - if (pdata->type == SOC_ARCH_EXYNOS3250 || - pdata->type == SOC_ARCH_EXYNOS4210 || - pdata->type == SOC_ARCH_EXYNOS4412 || - pdata->type == SOC_ARCH_EXYNOS5250 || - pdata->type == SOC_ARCH_EXYNOS5260 || - pdata->type == SOC_ARCH_EXYNOS5420_TRIMINFO || - pdata->type == SOC_ARCH_EXYNOS5440) - data->soc = pdata->type; - else { - ret = -EINVAL; - dev_err(&pdev->dev, "Platform not supported\n"); - goto err_clk; - } - ret = exynos_tmu_initialize(pdev); if (ret) { dev_err(&pdev->dev, "Failed to initialize TMU\n"); diff --git a/drivers/thermal/samsung/exynos_tmu.h b/drivers/thermal/samsung/exynos_tmu.h index c58c7663a3f..da3009bff6c 100644 --- a/drivers/thermal/samsung/exynos_tmu.h +++ b/drivers/thermal/samsung/exynos_tmu.h @@ -40,115 +40,12 @@ enum soc_type { SOC_ARCH_EXYNOS4412, SOC_ARCH_EXYNOS5250, SOC_ARCH_EXYNOS5260, + SOC_ARCH_EXYNOS5420, SOC_ARCH_EXYNOS5420_TRIMINFO, SOC_ARCH_EXYNOS5440, }; /** - * EXYNOS TMU supported features. - * TMU_SUPPORT_EMULATION - This features is used to set user defined - * temperature to the TMU controller. - * TMU_SUPPORT_MULTI_INST - This features denotes that the soc - * has many instances of TMU. - * TMU_SUPPORT_TRIM_RELOAD - This features shows that trimming can - * be reloaded. - * TMU_SUPPORT_FALLING_TRIP - This features shows that interrupt can - * be registered for falling trips also. - * TMU_SUPPORT_READY_STATUS - This feature tells that the TMU current - * state(active/idle) can be checked. - * TMU_SUPPORT_EMUL_TIME - This features allows to set next temp emulation - * sample time. - * TMU_SUPPORT_ADDRESS_MULTIPLE - This feature tells that the different TMU - * sensors shares some common registers. - * TMU_SUPPORT - macro to compare the above features with the supplied. - */ -#define TMU_SUPPORT_EMULATION BIT(0) -#define TMU_SUPPORT_MULTI_INST BIT(1) -#define TMU_SUPPORT_TRIM_RELOAD BIT(2) -#define TMU_SUPPORT_FALLING_TRIP BIT(3) -#define TMU_SUPPORT_READY_STATUS BIT(4) -#define TMU_SUPPORT_EMUL_TIME BIT(5) -#define TMU_SUPPORT_ADDRESS_MULTIPLE BIT(6) - -#define TMU_SUPPORTS(a, b) (a->features & TMU_SUPPORT_ ## b) - -/** - * struct exynos_tmu_register - register descriptors to access registers and - * bitfields. The register validity, offsets and bitfield values may vary - * slightly across different exynos SOC's. - * @triminfo_data: register containing 2 pont trimming data - * @triminfo_ctrl: trim info controller register. - * @triminfo_ctrl_count: the number of trim info controller register. - * @tmu_ctrl: TMU main controller register. - * @test_mux_addr_shift: shift bits of test mux address. - * @therm_trip_mode_shift: shift bits of tripping mode in tmu_ctrl register. - * @therm_trip_mode_mask: mask bits of tripping mode in tmu_ctrl register. - * @therm_trip_en_shift: shift bits of tripping enable in tmu_ctrl register. - * @tmu_status: register drescribing the TMU status. - * @tmu_cur_temp: register containing the current temperature of the TMU. - * @threshold_temp: register containing the base threshold level. - * @threshold_th0: Register containing first set of rising levels. - * @threshold_th1: Register containing second set of rising levels. - * @threshold_th2: Register containing third set of rising levels. - * @threshold_th3_l0_shift: shift bits of level0 threshold temperature. - * @tmu_inten: register containing the different threshold interrupt - enable bits. - * @inten_rise0_shift: shift bits of rising 0 interrupt bits. - * @inten_rise1_shift: shift bits of rising 1 interrupt bits. - * @inten_rise2_shift: shift bits of rising 2 interrupt bits. - * @inten_rise3_shift: shift bits of rising 3 interrupt bits. - * @inten_fall0_shift: shift bits of falling 0 interrupt bits. - * @tmu_intstat: Register containing the interrupt status values. - * @tmu_intclear: Register for clearing the raised interrupt status. - * @emul_con: TMU emulation controller register. - * @emul_temp_shift: shift bits of emulation temperature. - * @emul_time_shift: shift bits of emulation time. - * @tmu_irqstatus: register to find which TMU generated interrupts. - * @tmu_pmin: register to get/set the Pmin value. - */ -struct exynos_tmu_registers { - u32 triminfo_data; - - u32 triminfo_ctrl[MAX_TRIMINFO_CTRL_REG]; - u32 triminfo_ctrl_count; - - u32 tmu_ctrl; - u32 test_mux_addr_shift; - u32 therm_trip_mode_shift; - u32 therm_trip_mode_mask; - u32 therm_trip_en_shift; - - u32 tmu_status; - - u32 tmu_cur_temp; - - u32 threshold_temp; - - u32 threshold_th0; - u32 threshold_th1; - u32 threshold_th2; - u32 threshold_th3_l0_shift; - - u32 tmu_inten; - u32 inten_rise0_shift; - u32 inten_rise1_shift; - u32 inten_rise2_shift; - u32 inten_rise3_shift; - u32 inten_fall0_shift; - - u32 tmu_intstat; - - u32 tmu_intclear; - - u32 emul_con; - u32 emul_temp_shift; - u32 emul_time_shift; - - u32 tmu_irqstatus; - u32 tmu_pmin; -}; - -/** * struct exynos_tmu_platform_data * @threshold: basic temperature for generating interrupt * 25 <= threshold <= 125 [unit: degree Celsius] @@ -192,16 +89,10 @@ struct exynos_tmu_registers { * @first_point_trim: temp value of the first point trimming * @second_point_trim: temp value of the second point trimming * @default_temp_offset: default temperature offset in case of no trimming - * @test_mux; information if SoC supports test MUX - * @triminfo_reload: reload value to read TRIMINFO register * @cal_type: calibration type for temperature * @freq_clip_table: Table representing frequency reduction percentage. * @freq_tab_count: Count of the above table as frequency reduction may * applicable to only some of the trigger levels. - * @registers: Pointer to structure containing all the TMU controller registers - * and bitfields shifts and masks. - * @features: a bitfield value indicating the features supported in SOC like - * emulation, multi instance etc * * This structure is required for configuration of exynos_tmu driver. */ @@ -223,15 +114,11 @@ struct exynos_tmu_platform_data { u8 first_point_trim; u8 second_point_trim; u8 default_temp_offset; - u8 test_mux; - u8 triminfo_reload[MAX_TRIMINFO_CTRL_REG]; enum calibration_type cal_type; enum soc_type type; struct freq_clip_table freq_tab[4]; unsigned int freq_tab_count; - const struct exynos_tmu_registers *registers; - unsigned int features; }; /** @@ -246,4 +133,12 @@ struct exynos_tmu_init_data { struct exynos_tmu_platform_data tmu_data[]; }; +extern struct exynos_tmu_init_data const exynos3250_default_tmu_data; +extern struct exynos_tmu_init_data const exynos4210_default_tmu_data; +extern struct exynos_tmu_init_data const exynos4412_default_tmu_data; +extern struct exynos_tmu_init_data const exynos5250_default_tmu_data; +extern struct exynos_tmu_init_data const exynos5260_default_tmu_data; +extern struct exynos_tmu_init_data const exynos5420_default_tmu_data; +extern struct exynos_tmu_init_data const exynos5440_default_tmu_data; + #endif /* _EXYNOS_TMU_H */ diff --git a/drivers/thermal/samsung/exynos_tmu_data.c b/drivers/thermal/samsung/exynos_tmu_data.c index 1724f6cdaef..b23910069f6 100644 --- a/drivers/thermal/samsung/exynos_tmu_data.c +++ b/drivers/thermal/samsung/exynos_tmu_data.c @@ -22,24 +22,6 @@ #include "exynos_thermal_common.h" #include "exynos_tmu.h" -#include "exynos_tmu_data.h" - -#if defined(CONFIG_CPU_EXYNOS4210) -static const struct exynos_tmu_registers exynos4210_tmu_registers = { - .triminfo_data = EXYNOS_TMU_REG_TRIMINFO, - .tmu_ctrl = EXYNOS_TMU_REG_CONTROL, - .tmu_status = EXYNOS_TMU_REG_STATUS, - .tmu_cur_temp = EXYNOS_TMU_REG_CURRENT_TEMP, - .threshold_temp = EXYNOS4210_TMU_REG_THRESHOLD_TEMP, - .threshold_th0 = EXYNOS4210_TMU_REG_TRIG_LEVEL0, - .tmu_inten = EXYNOS_TMU_REG_INTEN, - .inten_rise0_shift = EXYNOS_TMU_INTEN_RISE0_SHIFT, - .inten_rise1_shift = EXYNOS_TMU_INTEN_RISE1_SHIFT, - .inten_rise2_shift = EXYNOS_TMU_INTEN_RISE2_SHIFT, - .inten_rise3_shift = EXYNOS_TMU_INTEN_RISE3_SHIFT, - .tmu_intstat = EXYNOS_TMU_REG_INTSTAT, - .tmu_intclear = EXYNOS_TMU_REG_INTCLEAR, -}; struct exynos_tmu_init_data const exynos4210_default_tmu_data = { .tmu_data = { @@ -75,40 +57,10 @@ struct exynos_tmu_init_data const exynos4210_default_tmu_data = { }, .freq_tab_count = 2, .type = SOC_ARCH_EXYNOS4210, - .registers = &exynos4210_tmu_registers, - .features = TMU_SUPPORT_READY_STATUS, }, }, .tmu_count = 1, }; -#endif - -#if defined(CONFIG_SOC_EXYNOS3250) -static const struct exynos_tmu_registers exynos3250_tmu_registers = { - .triminfo_data = EXYNOS_TMU_REG_TRIMINFO, - .triminfo_ctrl[0] = EXYNOS_TMU_TRIMINFO_CON1, - .triminfo_ctrl[1] = EXYNOS_TMU_TRIMINFO_CON2, - .triminfo_ctrl_count = 2, - .tmu_ctrl = EXYNOS_TMU_REG_CONTROL, - .test_mux_addr_shift = EXYNOS4412_MUX_ADDR_SHIFT, - .therm_trip_mode_shift = EXYNOS_TMU_TRIP_MODE_SHIFT, - .therm_trip_mode_mask = EXYNOS_TMU_TRIP_MODE_MASK, - .therm_trip_en_shift = EXYNOS_TMU_THERM_TRIP_EN_SHIFT, - .tmu_status = EXYNOS_TMU_REG_STATUS, - .tmu_cur_temp = EXYNOS_TMU_REG_CURRENT_TEMP, - .threshold_th0 = EXYNOS_THD_TEMP_RISE, - .threshold_th1 = EXYNOS_THD_TEMP_FALL, - .tmu_inten = EXYNOS_TMU_REG_INTEN, - .inten_rise0_shift = EXYNOS_TMU_INTEN_RISE0_SHIFT, - .inten_rise1_shift = EXYNOS_TMU_INTEN_RISE1_SHIFT, - .inten_rise2_shift = EXYNOS_TMU_INTEN_RISE2_SHIFT, - .inten_fall0_shift = EXYNOS_TMU_INTEN_FALL0_SHIFT, - .tmu_intstat = EXYNOS_TMU_REG_INTSTAT, - .tmu_intclear = EXYNOS_TMU_REG_INTCLEAR, - .emul_con = EXYNOS_EMUL_CON, - .emul_temp_shift = EXYNOS_EMUL_DATA_SHIFT, - .emul_time_shift = EXYNOS_EMUL_TIME_SHIFT, -}; #define EXYNOS3250_TMU_DATA \ .threshold_falling = 10, \ @@ -144,54 +96,17 @@ static const struct exynos_tmu_registers exynos3250_tmu_registers = { .freq_clip_max = 400 * 1000, \ .temp_level = 95, \ }, \ - .freq_tab_count = 2, \ - .triminfo_reload[0] = EXYNOS_TRIMINFO_RELOAD_ENABLE, \ - .triminfo_reload[1] = EXYNOS_TRIMINFO_RELOAD_ENABLE, \ - .registers = &exynos3250_tmu_registers, \ - .features = (TMU_SUPPORT_EMULATION | TMU_SUPPORT_TRIM_RELOAD | \ - TMU_SUPPORT_FALLING_TRIP | TMU_SUPPORT_READY_STATUS | \ - TMU_SUPPORT_EMUL_TIME) -#endif + .freq_tab_count = 2 -#if defined(CONFIG_SOC_EXYNOS3250) struct exynos_tmu_init_data const exynos3250_default_tmu_data = { .tmu_data = { { EXYNOS3250_TMU_DATA, .type = SOC_ARCH_EXYNOS3250, - .test_mux = EXYNOS4412_MUX_ADDR_VALUE, }, }, .tmu_count = 1, }; -#endif - -#if defined(CONFIG_SOC_EXYNOS4412) || defined(CONFIG_SOC_EXYNOS5250) -static const struct exynos_tmu_registers exynos4412_tmu_registers = { - .triminfo_data = EXYNOS_TMU_REG_TRIMINFO, - .triminfo_ctrl[0] = EXYNOS_TMU_TRIMINFO_CON2, - .triminfo_ctrl_count = 1, - .tmu_ctrl = EXYNOS_TMU_REG_CONTROL, - .test_mux_addr_shift = EXYNOS4412_MUX_ADDR_SHIFT, - .therm_trip_mode_shift = EXYNOS_TMU_TRIP_MODE_SHIFT, - .therm_trip_mode_mask = EXYNOS_TMU_TRIP_MODE_MASK, - .therm_trip_en_shift = EXYNOS_TMU_THERM_TRIP_EN_SHIFT, - .tmu_status = EXYNOS_TMU_REG_STATUS, - .tmu_cur_temp = EXYNOS_TMU_REG_CURRENT_TEMP, - .threshold_th0 = EXYNOS_THD_TEMP_RISE, - .threshold_th1 = EXYNOS_THD_TEMP_FALL, - .tmu_inten = EXYNOS_TMU_REG_INTEN, - .inten_rise0_shift = EXYNOS_TMU_INTEN_RISE0_SHIFT, - .inten_rise1_shift = EXYNOS_TMU_INTEN_RISE1_SHIFT, - .inten_rise2_shift = EXYNOS_TMU_INTEN_RISE2_SHIFT, - .inten_rise3_shift = EXYNOS_TMU_INTEN_RISE3_SHIFT, - .inten_fall0_shift = EXYNOS_TMU_INTEN_FALL0_SHIFT, - .tmu_intstat = EXYNOS_TMU_REG_INTSTAT, - .tmu_intclear = EXYNOS_TMU_REG_INTCLEAR, - .emul_con = EXYNOS_EMUL_CON, - .emul_temp_shift = EXYNOS_EMUL_DATA_SHIFT, - .emul_time_shift = EXYNOS_EMUL_TIME_SHIFT, -}; #define EXYNOS4412_TMU_DATA \ .threshold_falling = 10, \ @@ -227,28 +142,18 @@ static const struct exynos_tmu_registers exynos4412_tmu_registers = { .freq_clip_max = 400 * 1000, \ .temp_level = 95, \ }, \ - .freq_tab_count = 2, \ - .triminfo_reload[0] = EXYNOS_TRIMINFO_RELOAD_ENABLE, \ - .registers = &exynos4412_tmu_registers, \ - .features = (TMU_SUPPORT_EMULATION | TMU_SUPPORT_TRIM_RELOAD | \ - TMU_SUPPORT_FALLING_TRIP | TMU_SUPPORT_READY_STATUS | \ - TMU_SUPPORT_EMUL_TIME) -#endif + .freq_tab_count = 2 -#if defined(CONFIG_SOC_EXYNOS4412) struct exynos_tmu_init_data const exynos4412_default_tmu_data = { .tmu_data = { { EXYNOS4412_TMU_DATA, .type = SOC_ARCH_EXYNOS4412, - .test_mux = EXYNOS4412_MUX_ADDR_VALUE, }, }, .tmu_count = 1, }; -#endif -#if defined(CONFIG_SOC_EXYNOS5250) struct exynos_tmu_init_data const exynos5250_default_tmu_data = { .tmu_data = { { @@ -258,31 +163,6 @@ struct exynos_tmu_init_data const exynos5250_default_tmu_data = { }, .tmu_count = 1, }; -#endif - -#if defined(CONFIG_SOC_EXYNOS5260) -static const struct exynos_tmu_registers exynos5260_tmu_registers = { - .triminfo_data = EXYNOS_TMU_REG_TRIMINFO, - .tmu_ctrl = EXYNOS_TMU_REG_CONTROL, - .therm_trip_mode_shift = EXYNOS_TMU_TRIP_MODE_SHIFT, - .therm_trip_mode_mask = EXYNOS_TMU_TRIP_MODE_MASK, - .therm_trip_en_shift = EXYNOS_TMU_THERM_TRIP_EN_SHIFT, - .tmu_status = EXYNOS_TMU_REG_STATUS, - .tmu_cur_temp = EXYNOS_TMU_REG_CURRENT_TEMP, - .threshold_th0 = EXYNOS_THD_TEMP_RISE, - .threshold_th1 = EXYNOS_THD_TEMP_FALL, - .tmu_inten = EXYNOS5260_TMU_REG_INTEN, - .inten_rise0_shift = EXYNOS_TMU_INTEN_RISE0_SHIFT, - .inten_rise1_shift = EXYNOS_TMU_INTEN_RISE1_SHIFT, - .inten_rise2_shift = EXYNOS_TMU_INTEN_RISE2_SHIFT, - .inten_rise3_shift = EXYNOS_TMU_INTEN_RISE3_SHIFT, - .inten_fall0_shift = EXYNOS_TMU_INTEN_FALL0_SHIFT, - .tmu_intstat = EXYNOS5260_TMU_REG_INTSTAT, - .tmu_intclear = EXYNOS5260_TMU_REG_INTCLEAR, - .emul_con = EXYNOS5260_EMUL_CON, - .emul_temp_shift = EXYNOS_EMUL_DATA_SHIFT, - .emul_time_shift = EXYNOS_EMUL_TIME_SHIFT, -}; #define __EXYNOS5260_TMU_DATA \ .threshold_falling = 10, \ @@ -319,13 +199,10 @@ static const struct exynos_tmu_registers exynos5260_tmu_registers = { .temp_level = 103, \ }, \ .freq_tab_count = 2, \ - .registers = &exynos5260_tmu_registers, \ #define EXYNOS5260_TMU_DATA \ __EXYNOS5260_TMU_DATA \ - .type = SOC_ARCH_EXYNOS5260, \ - .features = (TMU_SUPPORT_EMULATION | TMU_SUPPORT_FALLING_TRIP | \ - TMU_SUPPORT_READY_STATUS | TMU_SUPPORT_EMUL_TIME) + .type = SOC_ARCH_EXYNOS5260 struct exynos_tmu_init_data const exynos5260_default_tmu_data = { .tmu_data = { @@ -337,82 +214,14 @@ struct exynos_tmu_init_data const exynos5260_default_tmu_data = { }, .tmu_count = 5, }; -#endif - -#if defined(CONFIG_SOC_EXYNOS5420) -static const struct exynos_tmu_registers exynos5420_tmu_registers = { - .triminfo_data = EXYNOS_TMU_REG_TRIMINFO, - .tmu_ctrl = EXYNOS_TMU_REG_CONTROL, - .therm_trip_mode_shift = EXYNOS_TMU_TRIP_MODE_SHIFT, - .therm_trip_mode_mask = EXYNOS_TMU_TRIP_MODE_MASK, - .therm_trip_en_shift = EXYNOS_TMU_THERM_TRIP_EN_SHIFT, - .tmu_status = EXYNOS_TMU_REG_STATUS, - .tmu_cur_temp = EXYNOS_TMU_REG_CURRENT_TEMP, - .threshold_th0 = EXYNOS_THD_TEMP_RISE, - .threshold_th1 = EXYNOS_THD_TEMP_FALL, - .tmu_inten = EXYNOS_TMU_REG_INTEN, - .inten_rise0_shift = EXYNOS_TMU_INTEN_RISE0_SHIFT, - .inten_rise1_shift = EXYNOS_TMU_INTEN_RISE1_SHIFT, - .inten_rise2_shift = EXYNOS_TMU_INTEN_RISE2_SHIFT, - /* INTEN_RISE3 Not availble in exynos5420 */ - .inten_rise3_shift = EXYNOS_TMU_INTEN_RISE3_SHIFT, - .inten_fall0_shift = EXYNOS_TMU_INTEN_FALL0_SHIFT, - .tmu_intstat = EXYNOS_TMU_REG_INTSTAT, - .tmu_intclear = EXYNOS_TMU_REG_INTCLEAR, - .emul_con = EXYNOS_EMUL_CON, - .emul_temp_shift = EXYNOS_EMUL_DATA_SHIFT, - .emul_time_shift = EXYNOS_EMUL_TIME_SHIFT, -}; - -#define __EXYNOS5420_TMU_DATA \ - .threshold_falling = 10, \ - .trigger_levels[0] = 85, \ - .trigger_levels[1] = 103, \ - .trigger_levels[2] = 110, \ - .trigger_levels[3] = 120, \ - .trigger_enable[0] = true, \ - .trigger_enable[1] = true, \ - .trigger_enable[2] = true, \ - .trigger_enable[3] = false, \ - .trigger_type[0] = THROTTLE_ACTIVE, \ - .trigger_type[1] = THROTTLE_ACTIVE, \ - .trigger_type[2] = SW_TRIP, \ - .trigger_type[3] = HW_TRIP, \ - .max_trigger_level = 4, \ - .non_hw_trigger_levels = 3, \ - .gain = 8, \ - .reference_voltage = 16, \ - .noise_cancel_mode = 4, \ - .cal_type = TYPE_ONE_POINT_TRIMMING, \ - .efuse_value = 55, \ - .min_efuse_value = 40, \ - .max_efuse_value = 100, \ - .first_point_trim = 25, \ - .second_point_trim = 85, \ - .default_temp_offset = 50, \ - .freq_tab[0] = { \ - .freq_clip_max = 800 * 1000, \ - .temp_level = 85, \ - }, \ - .freq_tab[1] = { \ - .freq_clip_max = 200 * 1000, \ - .temp_level = 103, \ - }, \ - .freq_tab_count = 2, \ - .registers = &exynos5420_tmu_registers, \ #define EXYNOS5420_TMU_DATA \ - __EXYNOS5420_TMU_DATA \ - .type = SOC_ARCH_EXYNOS5250, \ - .features = (TMU_SUPPORT_EMULATION | TMU_SUPPORT_FALLING_TRIP | \ - TMU_SUPPORT_READY_STATUS | TMU_SUPPORT_EMUL_TIME) + __EXYNOS5260_TMU_DATA \ + .type = SOC_ARCH_EXYNOS5420 #define EXYNOS5420_TMU_DATA_SHARED \ - __EXYNOS5420_TMU_DATA \ - .type = SOC_ARCH_EXYNOS5420_TRIMINFO, \ - .features = (TMU_SUPPORT_EMULATION | TMU_SUPPORT_FALLING_TRIP | \ - TMU_SUPPORT_READY_STATUS | TMU_SUPPORT_EMUL_TIME | \ - TMU_SUPPORT_ADDRESS_MULTIPLE) + __EXYNOS5260_TMU_DATA \ + .type = SOC_ARCH_EXYNOS5420_TRIMINFO struct exynos_tmu_init_data const exynos5420_default_tmu_data = { .tmu_data = { @@ -424,34 +233,6 @@ struct exynos_tmu_init_data const exynos5420_default_tmu_data = { }, .tmu_count = 5, }; -#endif - -#if defined(CONFIG_SOC_EXYNOS5440) -static const struct exynos_tmu_registers exynos5440_tmu_registers = { - .triminfo_data = EXYNOS5440_TMU_S0_7_TRIM, - .tmu_ctrl = EXYNOS5440_TMU_S0_7_CTRL, - .therm_trip_mode_shift = EXYNOS_TMU_TRIP_MODE_SHIFT, - .therm_trip_mode_mask = EXYNOS_TMU_TRIP_MODE_MASK, - .therm_trip_en_shift = EXYNOS_TMU_THERM_TRIP_EN_SHIFT, - .tmu_status = EXYNOS5440_TMU_S0_7_STATUS, - .tmu_cur_temp = EXYNOS5440_TMU_S0_7_TEMP, - .threshold_th0 = EXYNOS5440_TMU_S0_7_TH0, - .threshold_th1 = EXYNOS5440_TMU_S0_7_TH1, - .threshold_th2 = EXYNOS5440_TMU_S0_7_TH2, - .threshold_th3_l0_shift = EXYNOS5440_TMU_TH_RISE4_SHIFT, - .tmu_inten = EXYNOS5440_TMU_S0_7_IRQEN, - .inten_rise0_shift = EXYNOS5440_TMU_INTEN_RISE0_SHIFT, - .inten_rise1_shift = EXYNOS5440_TMU_INTEN_RISE1_SHIFT, - .inten_rise2_shift = EXYNOS5440_TMU_INTEN_RISE2_SHIFT, - .inten_rise3_shift = EXYNOS5440_TMU_INTEN_RISE3_SHIFT, - .inten_fall0_shift = EXYNOS5440_TMU_INTEN_FALL0_SHIFT, - .tmu_intstat = EXYNOS5440_TMU_S0_7_IRQ, - .tmu_intclear = EXYNOS5440_TMU_S0_7_IRQ, - .tmu_irqstatus = EXYNOS5440_TMU_IRQ_STATUS, - .emul_con = EXYNOS5440_TMU_S0_7_DEBUG, - .emul_temp_shift = EXYNOS_EMUL_DATA_SHIFT, - .tmu_pmin = EXYNOS5440_TMU_PMIN, -}; #define EXYNOS5440_TMU_DATA \ .trigger_levels[0] = 100, \ @@ -471,10 +252,7 @@ static const struct exynos_tmu_registers exynos5440_tmu_registers = { .first_point_trim = 25, \ .second_point_trim = 70, \ .default_temp_offset = 25, \ - .type = SOC_ARCH_EXYNOS5440, \ - .registers = &exynos5440_tmu_registers, \ - .features = (TMU_SUPPORT_EMULATION | TMU_SUPPORT_FALLING_TRIP | \ - TMU_SUPPORT_MULTI_INST | TMU_SUPPORT_ADDRESS_MULTIPLE), + .type = SOC_ARCH_EXYNOS5440 struct exynos_tmu_init_data const exynos5440_default_tmu_data = { .tmu_data = { @@ -484,4 +262,3 @@ struct exynos_tmu_init_data const exynos5440_default_tmu_data = { }, .tmu_count = 3, }; -#endif diff --git a/drivers/thermal/samsung/exynos_tmu_data.h b/drivers/thermal/samsung/exynos_tmu_data.h deleted file mode 100644 index 63de598c9c2..00000000000 --- a/drivers/thermal/samsung/exynos_tmu_data.h +++ /dev/null @@ -1,159 +0,0 @@ -/* - * exynos_tmu_data.h - Samsung EXYNOS tmu data header file - * - * Copyright (C) 2013 Samsung Electronics - * Amit Daniel Kachhap <amit.daniel@samsung.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - */ - -#ifndef _EXYNOS_TMU_DATA_H -#define _EXYNOS_TMU_DATA_H - -/* Exynos generic registers */ -#define EXYNOS_TMU_REG_TRIMINFO 0x0 -#define EXYNOS_TMU_REG_CONTROL 0x20 -#define EXYNOS_TMU_REG_STATUS 0x28 -#define EXYNOS_TMU_REG_CURRENT_TEMP 0x40 -#define EXYNOS_TMU_REG_INTEN 0x70 -#define EXYNOS_TMU_REG_INTSTAT 0x74 -#define EXYNOS_TMU_REG_INTCLEAR 0x78 - -#define EXYNOS_TMU_TEMP_MASK 0xff -#define EXYNOS_TMU_REF_VOLTAGE_SHIFT 24 -#define EXYNOS_TMU_REF_VOLTAGE_MASK 0x1f -#define EXYNOS_TMU_BUF_SLOPE_SEL_MASK 0xf -#define EXYNOS_TMU_BUF_SLOPE_SEL_SHIFT 8 -#define EXYNOS_TMU_CORE_EN_SHIFT 0 - -/* Exynos3250 specific registers */ -#define EXYNOS_TMU_TRIMINFO_CON1 0x10 - -/* Exynos4210 specific registers */ -#define EXYNOS4210_TMU_REG_THRESHOLD_TEMP 0x44 -#define EXYNOS4210_TMU_REG_TRIG_LEVEL0 0x50 - -/* Exynos5250, Exynos4412, Exynos3250 specific registers */ -#define EXYNOS_TMU_TRIMINFO_CON2 0x14 -#define EXYNOS_THD_TEMP_RISE 0x50 -#define EXYNOS_THD_TEMP_FALL 0x54 -#define EXYNOS_EMUL_CON 0x80 - -#define EXYNOS_TRIMINFO_RELOAD_ENABLE 1 -#define EXYNOS_TRIMINFO_25_SHIFT 0 -#define EXYNOS_TRIMINFO_85_SHIFT 8 -#define EXYNOS_TMU_TRIP_MODE_SHIFT 13 -#define EXYNOS_TMU_TRIP_MODE_MASK 0x7 -#define EXYNOS_TMU_THERM_TRIP_EN_SHIFT 12 - -#define EXYNOS_TMU_INTEN_RISE0_SHIFT 0 -#define EXYNOS_TMU_INTEN_RISE1_SHIFT 4 -#define EXYNOS_TMU_INTEN_RISE2_SHIFT 8 -#define EXYNOS_TMU_INTEN_RISE3_SHIFT 12 -#define EXYNOS_TMU_INTEN_FALL0_SHIFT 16 - -#define EXYNOS_EMUL_TIME 0x57F0 -#define EXYNOS_EMUL_TIME_MASK 0xffff -#define EXYNOS_EMUL_TIME_SHIFT 16 -#define EXYNOS_EMUL_DATA_SHIFT 8 -#define EXYNOS_EMUL_DATA_MASK 0xFF -#define EXYNOS_EMUL_ENABLE 0x1 - -#define EXYNOS_MAX_TRIGGER_PER_REG 4 - -/* Exynos5260 specific */ -#define EXYNOS5260_TMU_REG_INTEN 0xC0 -#define EXYNOS5260_TMU_REG_INTSTAT 0xC4 -#define EXYNOS5260_TMU_REG_INTCLEAR 0xC8 -#define EXYNOS5260_EMUL_CON 0x100 - -/* Exynos4412 specific */ -#define EXYNOS4412_MUX_ADDR_VALUE 6 -#define EXYNOS4412_MUX_ADDR_SHIFT 20 - -/*exynos5440 specific registers*/ -#define EXYNOS5440_TMU_S0_7_TRIM 0x000 -#define EXYNOS5440_TMU_S0_7_CTRL 0x020 -#define EXYNOS5440_TMU_S0_7_DEBUG 0x040 -#define EXYNOS5440_TMU_S0_7_STATUS 0x060 -#define EXYNOS5440_TMU_S0_7_TEMP 0x0f0 -#define EXYNOS5440_TMU_S0_7_TH0 0x110 -#define EXYNOS5440_TMU_S0_7_TH1 0x130 -#define EXYNOS5440_TMU_S0_7_TH2 0x150 -#define EXYNOS5440_TMU_S0_7_IRQEN 0x210 -#define EXYNOS5440_TMU_S0_7_IRQ 0x230 -/* exynos5440 common registers */ -#define EXYNOS5440_TMU_IRQ_STATUS 0x000 -#define EXYNOS5440_TMU_PMIN 0x004 - -#define EXYNOS5440_TMU_INTEN_RISE0_SHIFT 0 -#define EXYNOS5440_TMU_INTEN_RISE1_SHIFT 1 -#define EXYNOS5440_TMU_INTEN_RISE2_SHIFT 2 -#define EXYNOS5440_TMU_INTEN_RISE3_SHIFT 3 -#define EXYNOS5440_TMU_INTEN_FALL0_SHIFT 4 -#define EXYNOS5440_TMU_TH_RISE4_SHIFT 24 -#define EXYNOS5440_EFUSE_SWAP_OFFSET 8 - -#if defined(CONFIG_SOC_EXYNOS3250) -extern struct exynos_tmu_init_data const exynos3250_default_tmu_data; -#define EXYNOS3250_TMU_DRV_DATA (&exynos3250_default_tmu_data) -#else -#define EXYNOS3250_TMU_DRV_DATA (NULL) -#endif - -#if defined(CONFIG_CPU_EXYNOS4210) -extern struct exynos_tmu_init_data const exynos4210_default_tmu_data; -#define EXYNOS4210_TMU_DRV_DATA (&exynos4210_default_tmu_data) -#else -#define EXYNOS4210_TMU_DRV_DATA (NULL) -#endif - -#if defined(CONFIG_SOC_EXYNOS4412) -extern struct exynos_tmu_init_data const exynos4412_default_tmu_data; -#define EXYNOS4412_TMU_DRV_DATA (&exynos4412_default_tmu_data) -#else -#define EXYNOS4412_TMU_DRV_DATA (NULL) -#endif - -#if defined(CONFIG_SOC_EXYNOS5250) -extern struct exynos_tmu_init_data const exynos5250_default_tmu_data; -#define EXYNOS5250_TMU_DRV_DATA (&exynos5250_default_tmu_data) -#else -#define EXYNOS5250_TMU_DRV_DATA (NULL) -#endif - -#if defined(CONFIG_SOC_EXYNOS5260) -extern struct exynos_tmu_init_data const exynos5260_default_tmu_data; -#define EXYNOS5260_TMU_DRV_DATA (&exynos5260_default_tmu_data) -#else -#define EXYNOS5260_TMU_DRV_DATA (NULL) -#endif - -#if defined(CONFIG_SOC_EXYNOS5420) -extern struct exynos_tmu_init_data const exynos5420_default_tmu_data; -#define EXYNOS5420_TMU_DRV_DATA (&exynos5420_default_tmu_data) -#else -#define EXYNOS5420_TMU_DRV_DATA (NULL) -#endif - -#if defined(CONFIG_SOC_EXYNOS5440) -extern struct exynos_tmu_init_data const exynos5440_default_tmu_data; -#define EXYNOS5440_TMU_DRV_DATA (&exynos5440_default_tmu_data) -#else -#define EXYNOS5440_TMU_DRV_DATA (NULL) -#endif - -#endif /*_EXYNOS_TMU_DATA_H*/ diff --git a/drivers/thermal/tegra_soctherm.c b/drivers/thermal/tegra_soctherm.c new file mode 100644 index 00000000000..9197fc05c5c --- /dev/null +++ b/drivers/thermal/tegra_soctherm.c @@ -0,0 +1,476 @@ +/* + * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. + * + * Author: + * Mikko Perttunen <mperttunen@nvidia.com> + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include <linux/bitops.h> +#include <linux/clk.h> +#include <linux/delay.h> +#include <linux/err.h> +#include <linux/interrupt.h> +#include <linux/io.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/platform_device.h> +#include <linux/reset.h> +#include <linux/thermal.h> + +#include <soc/tegra/fuse.h> + +#define SENSOR_CONFIG0 0 +#define SENSOR_CONFIG0_STOP BIT(0) +#define SENSOR_CONFIG0_TALL_SHIFT 8 +#define SENSOR_CONFIG0_TCALC_OVER BIT(4) +#define SENSOR_CONFIG0_OVER BIT(3) +#define SENSOR_CONFIG0_CPTR_OVER BIT(2) + +#define SENSOR_CONFIG1 4 +#define SENSOR_CONFIG1_TSAMPLE_SHIFT 0 +#define SENSOR_CONFIG1_TIDDQ_EN_SHIFT 15 +#define SENSOR_CONFIG1_TEN_COUNT_SHIFT 24 +#define SENSOR_CONFIG1_TEMP_ENABLE BIT(31) + +#define SENSOR_CONFIG2 8 +#define SENSOR_CONFIG2_THERMA_SHIFT 16 +#define SENSOR_CONFIG2_THERMB_SHIFT 0 + +#define SENSOR_PDIV 0x1c0 +#define SENSOR_PDIV_T124 0x8888 +#define SENSOR_HOTSPOT_OFF 0x1c4 +#define SENSOR_HOTSPOT_OFF_T124 0x00060600 +#define SENSOR_TEMP1 0x1c8 +#define SENSOR_TEMP2 0x1cc + +#define SENSOR_TEMP_MASK 0xffff +#define READBACK_VALUE_MASK 0xff00 +#define READBACK_VALUE_SHIFT 8 +#define READBACK_ADD_HALF BIT(7) +#define READBACK_NEGATE BIT(1) + +#define FUSE_TSENSOR8_CALIB 0x180 +#define FUSE_SPARE_REALIGNMENT_REG_0 0x1fc + +#define FUSE_TSENSOR_CALIB_CP_TS_BASE_MASK 0x1fff +#define FUSE_TSENSOR_CALIB_FT_TS_BASE_MASK (0x1fff << 13) +#define FUSE_TSENSOR_CALIB_FT_TS_BASE_SHIFT 13 + +#define FUSE_TSENSOR8_CALIB_CP_TS_BASE_MASK 0x3ff +#define FUSE_TSENSOR8_CALIB_FT_TS_BASE_MASK (0x7ff << 10) +#define FUSE_TSENSOR8_CALIB_FT_TS_BASE_SHIFT 10 + +#define FUSE_SPARE_REALIGNMENT_REG_SHIFT_CP_MASK 0x3f +#define FUSE_SPARE_REALIGNMENT_REG_SHIFT_FT_MASK (0x1f << 21) +#define FUSE_SPARE_REALIGNMENT_REG_SHIFT_FT_SHIFT 21 + +#define NOMINAL_CALIB_FT_T124 105 +#define NOMINAL_CALIB_CP_T124 25 + +struct tegra_tsensor_configuration { + u32 tall, tsample, tiddq_en, ten_count, pdiv, tsample_ate, pdiv_ate; +}; + +struct tegra_tsensor { + const struct tegra_tsensor_configuration *config; + u32 base, calib_fuse_offset; + /* Correction values used to modify values read from calibration fuses */ + s32 fuse_corr_alpha, fuse_corr_beta; +}; + +struct tegra_thermctl_zone { + void __iomem *reg; + unsigned int shift; +}; + +static const struct tegra_tsensor_configuration t124_tsensor_config = { + .tall = 16300, + .tsample = 120, + .tiddq_en = 1, + .ten_count = 1, + .pdiv = 8, + .tsample_ate = 480, + .pdiv_ate = 8 +}; + +static const struct tegra_tsensor t124_tsensors[] = { + { + .config = &t124_tsensor_config, + .base = 0xc0, + .calib_fuse_offset = 0x098, + .fuse_corr_alpha = 1135400, + .fuse_corr_beta = -6266900, + }, + { + .config = &t124_tsensor_config, + .base = 0xe0, + .calib_fuse_offset = 0x084, + .fuse_corr_alpha = 1122220, + .fuse_corr_beta = -5700700, + }, + { + .config = &t124_tsensor_config, + .base = 0x100, + .calib_fuse_offset = 0x088, + .fuse_corr_alpha = 1127000, + .fuse_corr_beta = -6768200, + }, + { + .config = &t124_tsensor_config, + .base = 0x120, + .calib_fuse_offset = 0x12c, + .fuse_corr_alpha = 1110900, + .fuse_corr_beta = -6232000, + }, + { + .config = &t124_tsensor_config, + .base = 0x140, + .calib_fuse_offset = 0x158, + .fuse_corr_alpha = 1122300, + .fuse_corr_beta = -5936400, + }, + { + .config = &t124_tsensor_config, + .base = 0x160, + .calib_fuse_offset = 0x15c, + .fuse_corr_alpha = 1145700, + .fuse_corr_beta = -7124600, + }, + { + .config = &t124_tsensor_config, + .base = 0x180, + .calib_fuse_offset = 0x154, + .fuse_corr_alpha = 1120100, + .fuse_corr_beta = -6000500, + }, + { + .config = &t124_tsensor_config, + .base = 0x1a0, + .calib_fuse_offset = 0x160, + .fuse_corr_alpha = 1106500, + .fuse_corr_beta = -6729300, + }, +}; + +struct tegra_soctherm { + struct reset_control *reset; + struct clk *clock_tsensor; + struct clk *clock_soctherm; + void __iomem *regs; + + struct thermal_zone_device *thermctl_tzs[4]; +}; + +struct tsensor_shared_calibration { + u32 base_cp, base_ft; + u32 actual_temp_cp, actual_temp_ft; +}; + +static int calculate_shared_calibration(struct tsensor_shared_calibration *r) +{ + u32 val, shifted_cp, shifted_ft; + int err; + + err = tegra_fuse_readl(FUSE_TSENSOR8_CALIB, &val); + if (err) + return err; + r->base_cp = val & FUSE_TSENSOR8_CALIB_CP_TS_BASE_MASK; + r->base_ft = (val & FUSE_TSENSOR8_CALIB_FT_TS_BASE_MASK) + >> FUSE_TSENSOR8_CALIB_FT_TS_BASE_SHIFT; + val = ((val & FUSE_SPARE_REALIGNMENT_REG_SHIFT_FT_MASK) + >> FUSE_SPARE_REALIGNMENT_REG_SHIFT_FT_SHIFT); + shifted_ft = sign_extend32(val, 4); + + err = tegra_fuse_readl(FUSE_SPARE_REALIGNMENT_REG_0, &val); + if (err) + return err; + shifted_cp = sign_extend32(val, 5); + + r->actual_temp_cp = 2 * NOMINAL_CALIB_CP_T124 + shifted_cp; + r->actual_temp_ft = 2 * NOMINAL_CALIB_FT_T124 + shifted_ft; + + return 0; +} + +static s64 div64_s64_precise(s64 a, s64 b) +{ + s64 r, al; + + /* Scale up for increased precision division */ + al = a << 16; + + r = div64_s64(al * 2 + 1, 2 * b); + return r >> 16; +} + +static int +calculate_tsensor_calibration(const struct tegra_tsensor *sensor, + const struct tsensor_shared_calibration *shared, + u32 *calib) +{ + u32 val; + s32 actual_tsensor_ft, actual_tsensor_cp, delta_sens, delta_temp, + mult, div; + s16 therma, thermb; + s64 tmp; + int err; + + err = tegra_fuse_readl(sensor->calib_fuse_offset, &val); + if (err) + return err; + + actual_tsensor_cp = (shared->base_cp * 64) + sign_extend32(val, 12); + val = (val & FUSE_TSENSOR_CALIB_FT_TS_BASE_MASK) + >> FUSE_TSENSOR_CALIB_FT_TS_BASE_SHIFT; + actual_tsensor_ft = (shared->base_ft * 32) + sign_extend32(val, 12); + + delta_sens = actual_tsensor_ft - actual_tsensor_cp; + delta_temp = shared->actual_temp_ft - shared->actual_temp_cp; + + mult = sensor->config->pdiv * sensor->config->tsample_ate; + div = sensor->config->tsample * sensor->config->pdiv_ate; + + therma = div64_s64_precise((s64) delta_temp * (1LL << 13) * mult, + (s64) delta_sens * div); + + tmp = (s64)actual_tsensor_ft * shared->actual_temp_cp - + (s64)actual_tsensor_cp * shared->actual_temp_ft; + thermb = div64_s64_precise(tmp, (s64)delta_sens); + + therma = div64_s64_precise((s64)therma * sensor->fuse_corr_alpha, + (s64)1000000LL); + thermb = div64_s64_precise((s64)thermb * sensor->fuse_corr_alpha + + sensor->fuse_corr_beta, (s64)1000000LL); + + *calib = ((u16)therma << SENSOR_CONFIG2_THERMA_SHIFT) | + ((u16)thermb << SENSOR_CONFIG2_THERMB_SHIFT); + + return 0; +} + +static int enable_tsensor(struct tegra_soctherm *tegra, + const struct tegra_tsensor *sensor, + const struct tsensor_shared_calibration *shared) +{ + void __iomem *base = tegra->regs + sensor->base; + unsigned int val; + u32 calib; + int err; + + err = calculate_tsensor_calibration(sensor, shared, &calib); + if (err) + return err; + + val = sensor->config->tall << SENSOR_CONFIG0_TALL_SHIFT; + writel(val, base + SENSOR_CONFIG0); + + val = (sensor->config->tsample - 1) << SENSOR_CONFIG1_TSAMPLE_SHIFT; + val |= sensor->config->tiddq_en << SENSOR_CONFIG1_TIDDQ_EN_SHIFT; + val |= sensor->config->ten_count << SENSOR_CONFIG1_TEN_COUNT_SHIFT; + val |= SENSOR_CONFIG1_TEMP_ENABLE; + writel(val, base + SENSOR_CONFIG1); + + writel(calib, base + SENSOR_CONFIG2); + + return 0; +} + +/* + * Translate from soctherm readback format to millicelsius. + * The soctherm readback format in bits is as follows: + * TTTTTTTT H______N + * where T's contain the temperature in Celsius, + * H denotes an addition of 0.5 Celsius and N denotes negation + * of the final value. + */ +static long translate_temp(u16 val) +{ + long t; + + t = ((val & READBACK_VALUE_MASK) >> READBACK_VALUE_SHIFT) * 1000; + if (val & READBACK_ADD_HALF) + t += 500; + if (val & READBACK_NEGATE) + t *= -1; + + return t; +} + +static int tegra_thermctl_get_temp(void *data, long *out_temp) +{ + struct tegra_thermctl_zone *zone = data; + u32 val; + + val = (readl(zone->reg) >> zone->shift) & SENSOR_TEMP_MASK; + *out_temp = translate_temp(val); + + return 0; +} + +static const struct thermal_zone_of_device_ops tegra_of_thermal_ops = { + .get_temp = tegra_thermctl_get_temp, +}; + +static const struct of_device_id tegra_soctherm_of_match[] = { + { .compatible = "nvidia,tegra124-soctherm" }, + { }, +}; +MODULE_DEVICE_TABLE(of, tegra_soctherm_of_match); + +struct thermctl_zone_desc { + unsigned int offset; + unsigned int shift; +}; + +static const struct thermctl_zone_desc t124_thermctl_temp_zones[] = { + { SENSOR_TEMP1, 16 }, + { SENSOR_TEMP2, 16 }, + { SENSOR_TEMP1, 0 }, + { SENSOR_TEMP2, 0 } +}; + +static int tegra_soctherm_probe(struct platform_device *pdev) +{ + struct tegra_soctherm *tegra; + struct thermal_zone_device *tz; + struct tsensor_shared_calibration shared_calib; + struct resource *res; + unsigned int i; + int err; + + const struct tegra_tsensor *tsensors = t124_tsensors; + + tegra = devm_kzalloc(&pdev->dev, sizeof(*tegra), GFP_KERNEL); + if (!tegra) + return -ENOMEM; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + tegra->regs = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(tegra->regs)) + return PTR_ERR(tegra->regs); + + tegra->reset = devm_reset_control_get(&pdev->dev, "soctherm"); + if (IS_ERR(tegra->reset)) { + dev_err(&pdev->dev, "can't get soctherm reset\n"); + return PTR_ERR(tegra->reset); + } + + tegra->clock_tsensor = devm_clk_get(&pdev->dev, "tsensor"); + if (IS_ERR(tegra->clock_tsensor)) { + dev_err(&pdev->dev, "can't get tsensor clock\n"); + return PTR_ERR(tegra->clock_tsensor); + } + + tegra->clock_soctherm = devm_clk_get(&pdev->dev, "soctherm"); + if (IS_ERR(tegra->clock_soctherm)) { + dev_err(&pdev->dev, "can't get soctherm clock\n"); + return PTR_ERR(tegra->clock_soctherm); + } + + reset_control_assert(tegra->reset); + + err = clk_prepare_enable(tegra->clock_soctherm); + if (err) + return err; + + err = clk_prepare_enable(tegra->clock_tsensor); + if (err) { + clk_disable_unprepare(tegra->clock_soctherm); + return err; + } + + reset_control_deassert(tegra->reset); + + /* Initialize raw sensors */ + + err = calculate_shared_calibration(&shared_calib); + if (err) + goto disable_clocks; + + for (i = 0; i < ARRAY_SIZE(t124_tsensors); ++i) { + err = enable_tsensor(tegra, tsensors + i, &shared_calib); + if (err) + goto disable_clocks; + } + + writel(SENSOR_PDIV_T124, tegra->regs + SENSOR_PDIV); + writel(SENSOR_HOTSPOT_OFF_T124, tegra->regs + SENSOR_HOTSPOT_OFF); + + /* Initialize thermctl sensors */ + + for (i = 0; i < ARRAY_SIZE(tegra->thermctl_tzs); ++i) { + struct tegra_thermctl_zone *zone = + devm_kzalloc(&pdev->dev, sizeof(*zone), GFP_KERNEL); + if (!zone) { + err = -ENOMEM; + goto unregister_tzs; + } + + zone->reg = tegra->regs + t124_thermctl_temp_zones[i].offset; + zone->shift = t124_thermctl_temp_zones[i].shift; + + tz = thermal_zone_of_sensor_register(&pdev->dev, i, zone, + &tegra_of_thermal_ops); + if (IS_ERR(tz)) { + err = PTR_ERR(tz); + dev_err(&pdev->dev, "failed to register sensor: %d\n", + err); + goto unregister_tzs; + } + + tegra->thermctl_tzs[i] = tz; + } + + return 0; + +unregister_tzs: + while (i--) + thermal_zone_of_sensor_unregister(&pdev->dev, + tegra->thermctl_tzs[i]); + +disable_clocks: + clk_disable_unprepare(tegra->clock_tsensor); + clk_disable_unprepare(tegra->clock_soctherm); + + return err; +} + +static int tegra_soctherm_remove(struct platform_device *pdev) +{ + struct tegra_soctherm *tegra = platform_get_drvdata(pdev); + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(tegra->thermctl_tzs); ++i) { + thermal_zone_of_sensor_unregister(&pdev->dev, + tegra->thermctl_tzs[i]); + } + + clk_disable_unprepare(tegra->clock_tsensor); + clk_disable_unprepare(tegra->clock_soctherm); + + return 0; +} + +static struct platform_driver tegra_soctherm_driver = { + .probe = tegra_soctherm_probe, + .remove = tegra_soctherm_remove, + .driver = { + .name = "tegra-soctherm", + .of_match_table = tegra_soctherm_of_match, + }, +}; +module_platform_driver(tegra_soctherm_driver); + +MODULE_AUTHOR("Mikko Perttunen <mperttunen@nvidia.com>"); +MODULE_DESCRIPTION("NVIDIA Tegra SOCTHERM thermal management driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c index 43b90709585..84fdf0792e2 100644 --- a/drivers/thermal/thermal_core.c +++ b/drivers/thermal/thermal_core.c @@ -368,7 +368,7 @@ static void handle_critical_trips(struct thermal_zone_device *tz, tz->ops->get_trip_temp(tz, trip, &trip_temp); /* If we have not crossed the trip_temp, we do not care. */ - if (tz->temperature < trip_temp) + if (trip_temp <= 0 || tz->temperature < trip_temp) return; trace_thermal_zone_trip(tz, trip, trip_type); @@ -757,6 +757,7 @@ policy_store(struct device *dev, struct device_attribute *attr, snprintf(name, sizeof(name), "%s", buf); mutex_lock(&thermal_governor_lock); + mutex_lock(&tz->lock); gov = __find_governor(strim(name)); if (!gov) @@ -766,6 +767,7 @@ policy_store(struct device *dev, struct device_attribute *attr, ret = count; exit: + mutex_unlock(&tz->lock); mutex_unlock(&thermal_governor_lock); return ret; } @@ -1835,10 +1837,10 @@ static int __init thermal_init(void) exit_netlink: genetlink_exit(); -unregister_governors: - thermal_unregister_governors(); unregister_class: class_unregister(&thermal_class); +unregister_governors: + thermal_unregister_governors(); error: idr_destroy(&thermal_tz_idr); idr_destroy(&thermal_cdev_idr); diff --git a/drivers/thermal/thermal_core.h b/drivers/thermal/thermal_core.h index d15d243de27..9083e752062 100644 --- a/drivers/thermal/thermal_core.h +++ b/drivers/thermal/thermal_core.h @@ -89,9 +89,27 @@ static inline void thermal_gov_user_space_unregister(void) {} #ifdef CONFIG_THERMAL_OF int of_parse_thermal_zones(void); void of_thermal_destroy_zones(void); +int of_thermal_get_ntrips(struct thermal_zone_device *); +bool of_thermal_is_trip_valid(struct thermal_zone_device *, int); +const struct thermal_trip * const +of_thermal_get_trip_points(struct thermal_zone_device *); #else static inline int of_parse_thermal_zones(void) { return 0; } static inline void of_thermal_destroy_zones(void) { } +static inline int of_thermal_get_ntrips(struct thermal_zone_device *tz) +{ + return 0; +} +static inline bool of_thermal_is_trip_valid(struct thermal_zone_device *tz, + int trip) +{ + return 0; +} +static inline const struct thermal_trip * const +of_thermal_get_trip_points(struct thermal_zone_device *tz) +{ + return NULL; +} #endif #endif /* __THERMAL_CORE_H__ */ diff --git a/drivers/thermal/ti-soc-thermal/ti-thermal-common.c b/drivers/thermal/ti-soc-thermal/ti-thermal-common.c index 9eec26dc044..5fd03865e39 100644 --- a/drivers/thermal/ti-soc-thermal/ti-thermal-common.c +++ b/drivers/thermal/ti-soc-thermal/ti-thermal-common.c @@ -286,6 +286,11 @@ static int ti_thermal_get_crit_temp(struct thermal_zone_device *thermal, return ti_thermal_get_trip_temp(thermal, OMAP_TRIP_NUMBER - 1, temp); } +static const struct thermal_zone_of_device_ops ti_of_thermal_ops = { + .get_temp = __ti_thermal_get_temp, + .get_trend = __ti_thermal_get_trend, +}; + static struct thermal_zone_device_ops ti_thermal_ops = { .get_temp = ti_thermal_get_temp, .get_trend = ti_thermal_get_trend, @@ -333,8 +338,7 @@ int ti_thermal_expose_sensor(struct ti_bandgap *bgp, int id, /* in case this is specified by DT */ data->ti_thermal = thermal_zone_of_sensor_register(bgp->dev, id, - data, __ti_thermal_get_temp, - __ti_thermal_get_trend); + data, &ti_of_thermal_ops); if (IS_ERR(data->ti_thermal)) { /* Create thermal zone */ data->ti_thermal = thermal_zone_device_register(domain, diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig index d8c57636b9c..14e27ab3245 100644 --- a/drivers/vfio/Kconfig +++ b/drivers/vfio/Kconfig @@ -16,7 +16,7 @@ config VFIO_SPAPR_EEH menuconfig VFIO tristate "VFIO Non-Privileged userspace driver framework" depends on IOMMU_API - select VFIO_IOMMU_TYPE1 if X86 + select VFIO_IOMMU_TYPE1 if (X86 || S390 || ARM_SMMU) select VFIO_IOMMU_SPAPR_TCE if (PPC_POWERNV || PPC_PSERIES) select VFIO_SPAPR_EEH if (PPC_POWERNV || PPC_PSERIES) select ANON_INODES diff --git a/drivers/vfio/pci/Kconfig b/drivers/vfio/pci/Kconfig index c41b01e2b69..c6bb5da2d2a 100644 --- a/drivers/vfio/pci/Kconfig +++ b/drivers/vfio/pci/Kconfig @@ -16,3 +16,11 @@ config VFIO_PCI_VGA BIOS and generic video drivers. If you don't know what to do here, say N. + +config VFIO_PCI_MMAP + depends on VFIO_PCI + def_bool y if !S390 + +config VFIO_PCI_INTX + depends on VFIO_PCI + def_bool y if !S390 diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index 9558da3f06a..255201f2212 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -215,7 +215,7 @@ static int vfio_pci_get_irq_count(struct vfio_pci_device *vdev, int irq_type) if (irq_type == VFIO_PCI_INTX_IRQ_INDEX) { u8 pin; pci_read_config_byte(vdev->pdev, PCI_INTERRUPT_PIN, &pin); - if (pin) + if (IS_ENABLED(CONFIG_VFIO_PCI_INTX) && pin) return 1; } else if (irq_type == VFIO_PCI_MSI_IRQ_INDEX) { @@ -406,7 +406,8 @@ static long vfio_pci_ioctl(void *device_data, info.flags = VFIO_REGION_INFO_FLAG_READ | VFIO_REGION_INFO_FLAG_WRITE; - if (pci_resource_flags(pdev, info.index) & + if (IS_ENABLED(CONFIG_VFIO_PCI_MMAP) && + pci_resource_flags(pdev, info.index) & IORESOURCE_MEM && info.size >= PAGE_SIZE) info.flags |= VFIO_REGION_INFO_FLAG_MMAP; break; diff --git a/drivers/vfio/pci/vfio_pci_config.c b/drivers/vfio/pci/vfio_pci_config.c index 1de3f94aa7d..ff75ca31a19 100644 --- a/drivers/vfio/pci/vfio_pci_config.c +++ b/drivers/vfio/pci/vfio_pci_config.c @@ -609,6 +609,10 @@ static int __init init_pci_cap_basic_perm(struct perm_bits *perm) /* Sometimes used by sw, just virtualize */ p_setb(perm, PCI_INTERRUPT_LINE, (u8)ALL_VIRT, (u8)ALL_WRITE); + + /* Virtualize interrupt pin to allow hiding INTx */ + p_setb(perm, PCI_INTERRUPT_PIN, (u8)ALL_VIRT, (u8)NO_WRITE); + return 0; } @@ -1445,6 +1449,9 @@ int vfio_config_init(struct vfio_pci_device *vdev) *(__le16 *)&vconfig[PCI_DEVICE_ID] = cpu_to_le16(pdev->device); } + if (!IS_ENABLED(CONFIG_VFIO_PCI_INTX)) + vconfig[PCI_INTERRUPT_PIN] = 0; + ret = vfio_cap_init(vdev); if (ret) goto out; diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index c9703d4d6f6..50c5f42d7a9 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -28,6 +28,7 @@ #include <linux/slab.h> #include <linux/module.h> #include <linux/balloon_compaction.h> +#include <linux/oom.h> /* * Balloon device works in 4K page units. So each page is pointed to by @@ -36,6 +37,12 @@ */ #define VIRTIO_BALLOON_PAGES_PER_PAGE (unsigned)(PAGE_SIZE >> VIRTIO_BALLOON_PFN_SHIFT) #define VIRTIO_BALLOON_ARRAY_PFNS_MAX 256 +#define OOM_VBALLOON_DEFAULT_PAGES 256 +#define VIRTBALLOON_OOM_NOTIFY_PRIORITY 80 + +static int oom_pages = OOM_VBALLOON_DEFAULT_PAGES; +module_param(oom_pages, int, S_IRUSR | S_IWUSR); +MODULE_PARM_DESC(oom_pages, "pages to free on OOM"); struct virtio_balloon { @@ -71,6 +78,9 @@ struct virtio_balloon /* Memory statistics */ int need_stats_update; struct virtio_balloon_stat stats[VIRTIO_BALLOON_S_NR]; + + /* To register callback in oom notifier call chain */ + struct notifier_block nb; }; static struct virtio_device_id id_table[] = { @@ -168,8 +178,9 @@ static void release_pages_by_pfn(const u32 pfns[], unsigned int num) } } -static void leak_balloon(struct virtio_balloon *vb, size_t num) +static unsigned leak_balloon(struct virtio_balloon *vb, size_t num) { + unsigned num_freed_pages; struct page *page; struct balloon_dev_info *vb_dev_info = &vb->vb_dev_info; @@ -186,6 +197,7 @@ static void leak_balloon(struct virtio_balloon *vb, size_t num) vb->num_pages -= VIRTIO_BALLOON_PAGES_PER_PAGE; } + num_freed_pages = vb->num_pfns; /* * Note that if * virtio_has_feature(vdev, VIRTIO_BALLOON_F_MUST_TELL_HOST); @@ -195,6 +207,7 @@ static void leak_balloon(struct virtio_balloon *vb, size_t num) tell_host(vb, vb->deflate_vq); mutex_unlock(&vb->balloon_lock); release_pages_by_pfn(vb->pfns, vb->num_pfns); + return num_freed_pages; } static inline void update_stat(struct virtio_balloon *vb, int idx, @@ -287,6 +300,38 @@ static void update_balloon_size(struct virtio_balloon *vb) &actual); } +/* + * virtballoon_oom_notify - release pages when system is under severe + * memory pressure (called from out_of_memory()) + * @self : notifier block struct + * @dummy: not used + * @parm : returned - number of freed pages + * + * The balancing of memory by use of the virtio balloon should not cause + * the termination of processes while there are pages in the balloon. + * If virtio balloon manages to release some memory, it will make the + * system return and retry the allocation that forced the OOM killer + * to run. + */ +static int virtballoon_oom_notify(struct notifier_block *self, + unsigned long dummy, void *parm) +{ + struct virtio_balloon *vb; + unsigned long *freed; + unsigned num_freed_pages; + + vb = container_of(self, struct virtio_balloon, nb); + if (!virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM)) + return NOTIFY_OK; + + freed = parm; + num_freed_pages = leak_balloon(vb, oom_pages); + update_balloon_size(vb); + *freed += num_freed_pages; + + return NOTIFY_OK; +} + static int balloon(void *_vballoon) { struct virtio_balloon *vb = _vballoon; @@ -443,6 +488,12 @@ static int virtballoon_probe(struct virtio_device *vdev) if (err) goto out_free_vb; + vb->nb.notifier_call = virtballoon_oom_notify; + vb->nb.priority = VIRTBALLOON_OOM_NOTIFY_PRIORITY; + err = register_oom_notifier(&vb->nb); + if (err < 0) + goto out_oom_notify; + vb->thread = kthread_run(balloon, vb, "vballoon"); if (IS_ERR(vb->thread)) { err = PTR_ERR(vb->thread); @@ -452,6 +503,8 @@ static int virtballoon_probe(struct virtio_device *vdev) return 0; out_del_vqs: + unregister_oom_notifier(&vb->nb); +out_oom_notify: vdev->config->del_vqs(vdev); out_free_vb: kfree(vb); @@ -476,6 +529,7 @@ static void virtballoon_remove(struct virtio_device *vdev) { struct virtio_balloon *vb = vdev->priv; + unregister_oom_notifier(&vb->nb); kthread_stop(vb->thread); remove_common(vb); kfree(vb); @@ -515,6 +569,7 @@ static int virtballoon_restore(struct virtio_device *vdev) static unsigned int features[] = { VIRTIO_BALLOON_F_MUST_TELL_HOST, VIRTIO_BALLOON_F_STATS_VQ, + VIRTIO_BALLOON_F_DEFLATE_ON_OOM, }; static struct virtio_driver virtio_balloon_driver = { diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 18c06bbaf13..f5013d92a7e 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -192,17 +192,30 @@ static int readpage_nounlock(struct file *filp, struct page *page) struct ceph_osd_client *osdc = &ceph_inode_to_client(inode)->client->osdc; int err = 0; + u64 off = page_offset(page); u64 len = PAGE_CACHE_SIZE; - err = ceph_readpage_from_fscache(inode, page); + if (off >= i_size_read(inode)) { + zero_user_segment(page, err, PAGE_CACHE_SIZE); + SetPageUptodate(page); + return 0; + } + /* + * Uptodate inline data should have been added into page cache + * while getting Fcr caps. + */ + if (ci->i_inline_version != CEPH_INLINE_NONE) + return -EINVAL; + + err = ceph_readpage_from_fscache(inode, page); if (err == 0) goto out; dout("readpage inode %p file %p page %p index %lu\n", inode, filp, page, page->index); err = ceph_osdc_readpages(osdc, ceph_vino(inode), &ci->i_layout, - (u64) page_offset(page), &len, + off, &len, ci->i_truncate_seq, ci->i_truncate_size, &page, 1, 0); if (err == -ENOENT) @@ -319,7 +332,7 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max) off, len); vino = ceph_vino(inode); req = ceph_osdc_new_request(osdc, &ci->i_layout, vino, off, &len, - 1, CEPH_OSD_OP_READ, + 0, 1, CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ, NULL, ci->i_truncate_seq, ci->i_truncate_size, false); @@ -384,6 +397,9 @@ static int ceph_readpages(struct file *file, struct address_space *mapping, int rc = 0; int max = 0; + if (ceph_inode(inode)->i_inline_version != CEPH_INLINE_NONE) + return -EINVAL; + rc = ceph_readpages_from_fscache(mapping->host, mapping, page_list, &nr_pages); @@ -673,7 +689,7 @@ static int ceph_writepages_start(struct address_space *mapping, int rc = 0; unsigned wsize = 1 << inode->i_blkbits; struct ceph_osd_request *req = NULL; - int do_sync; + int do_sync = 0; u64 truncate_size, snap_size; u32 truncate_seq; @@ -750,7 +766,6 @@ retry: last_snapc = snapc; while (!done && index <= end) { - int num_ops = do_sync ? 2 : 1; unsigned i; int first; pgoff_t next; @@ -850,7 +865,8 @@ get_more_pages: len = wsize; req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, vino, - offset, &len, num_ops, + offset, &len, 0, + do_sync ? 2 : 1, CEPH_OSD_OP_WRITE, CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK, @@ -862,6 +878,9 @@ get_more_pages: break; } + if (do_sync) + osd_req_op_init(req, 1, CEPH_OSD_OP_STARTSYNC); + req->r_callback = writepages_finish; req->r_inode = inode; @@ -1204,6 +1223,7 @@ static int ceph_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) struct inode *inode = file_inode(vma->vm_file); struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_file_info *fi = vma->vm_file->private_data; + struct page *pinned_page = NULL; loff_t off = vmf->pgoff << PAGE_CACHE_SHIFT; int want, got, ret; @@ -1215,7 +1235,8 @@ static int ceph_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) want = CEPH_CAP_FILE_CACHE; while (1) { got = 0; - ret = ceph_get_caps(ci, CEPH_CAP_FILE_RD, want, &got, -1); + ret = ceph_get_caps(ci, CEPH_CAP_FILE_RD, want, + -1, &got, &pinned_page); if (ret == 0) break; if (ret != -ERESTARTSYS) { @@ -1226,12 +1247,54 @@ static int ceph_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) dout("filemap_fault %p %llu~%zd got cap refs on %s\n", inode, off, (size_t)PAGE_CACHE_SIZE, ceph_cap_string(got)); - ret = filemap_fault(vma, vmf); + if ((got & (CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO)) || + ci->i_inline_version == CEPH_INLINE_NONE) + ret = filemap_fault(vma, vmf); + else + ret = -EAGAIN; dout("filemap_fault %p %llu~%zd dropping cap refs on %s ret %d\n", inode, off, (size_t)PAGE_CACHE_SIZE, ceph_cap_string(got), ret); + if (pinned_page) + page_cache_release(pinned_page); ceph_put_cap_refs(ci, got); + if (ret != -EAGAIN) + return ret; + + /* read inline data */ + if (off >= PAGE_CACHE_SIZE) { + /* does not support inline data > PAGE_SIZE */ + ret = VM_FAULT_SIGBUS; + } else { + int ret1; + struct address_space *mapping = inode->i_mapping; + struct page *page = find_or_create_page(mapping, 0, + mapping_gfp_mask(mapping) & + ~__GFP_FS); + if (!page) { + ret = VM_FAULT_OOM; + goto out; + } + ret1 = __ceph_do_getattr(inode, page, + CEPH_STAT_CAP_INLINE_DATA, true); + if (ret1 < 0 || off >= i_size_read(inode)) { + unlock_page(page); + page_cache_release(page); + ret = VM_FAULT_SIGBUS; + goto out; + } + if (ret1 < PAGE_CACHE_SIZE) + zero_user_segment(page, ret1, PAGE_CACHE_SIZE); + else + flush_dcache_page(page); + SetPageUptodate(page); + vmf->page = page; + ret = VM_FAULT_MAJOR | VM_FAULT_LOCKED; + } +out: + dout("filemap_fault %p %llu~%zd read inline data ret %d\n", + inode, off, (size_t)PAGE_CACHE_SIZE, ret); return ret; } @@ -1250,6 +1313,19 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) size_t len; int want, got, ret; + if (ci->i_inline_version != CEPH_INLINE_NONE) { + struct page *locked_page = NULL; + if (off == 0) { + lock_page(page); + locked_page = page; + } + ret = ceph_uninline_data(vma->vm_file, locked_page); + if (locked_page) + unlock_page(locked_page); + if (ret < 0) + return VM_FAULT_SIGBUS; + } + if (off + PAGE_CACHE_SIZE <= size) len = PAGE_CACHE_SIZE; else @@ -1263,7 +1339,8 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) want = CEPH_CAP_FILE_BUFFER; while (1) { got = 0; - ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, &got, off + len); + ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, off + len, + &got, NULL); if (ret == 0) break; if (ret != -ERESTARTSYS) { @@ -1297,11 +1374,13 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) ret = VM_FAULT_SIGBUS; } out: - if (ret != VM_FAULT_LOCKED) { + if (ret != VM_FAULT_LOCKED) unlock_page(page); - } else { + if (ret == VM_FAULT_LOCKED || + ci->i_inline_version != CEPH_INLINE_NONE) { int dirty; spin_lock(&ci->i_ceph_lock); + ci->i_inline_version = CEPH_INLINE_NONE; dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR); spin_unlock(&ci->i_ceph_lock); if (dirty) @@ -1315,6 +1394,178 @@ out: return ret; } +void ceph_fill_inline_data(struct inode *inode, struct page *locked_page, + char *data, size_t len) +{ + struct address_space *mapping = inode->i_mapping; + struct page *page; + + if (locked_page) { + page = locked_page; + } else { + if (i_size_read(inode) == 0) + return; + page = find_or_create_page(mapping, 0, + mapping_gfp_mask(mapping) & ~__GFP_FS); + if (!page) + return; + if (PageUptodate(page)) { + unlock_page(page); + page_cache_release(page); + return; + } + } + + dout("fill_inline_data %p %llx.%llx len %lu locked_page %p\n", + inode, ceph_vinop(inode), len, locked_page); + + if (len > 0) { + void *kaddr = kmap_atomic(page); + memcpy(kaddr, data, len); + kunmap_atomic(kaddr); + } + + if (page != locked_page) { + if (len < PAGE_CACHE_SIZE) + zero_user_segment(page, len, PAGE_CACHE_SIZE); + else + flush_dcache_page(page); + + SetPageUptodate(page); + unlock_page(page); + page_cache_release(page); + } +} + +int ceph_uninline_data(struct file *filp, struct page *locked_page) +{ + struct inode *inode = file_inode(filp); + struct ceph_inode_info *ci = ceph_inode(inode); + struct ceph_fs_client *fsc = ceph_inode_to_client(inode); + struct ceph_osd_request *req; + struct page *page = NULL; + u64 len, inline_version; + int err = 0; + bool from_pagecache = false; + + spin_lock(&ci->i_ceph_lock); + inline_version = ci->i_inline_version; + spin_unlock(&ci->i_ceph_lock); + + dout("uninline_data %p %llx.%llx inline_version %llu\n", + inode, ceph_vinop(inode), inline_version); + + if (inline_version == 1 || /* initial version, no data */ + inline_version == CEPH_INLINE_NONE) + goto out; + + if (locked_page) { + page = locked_page; + WARN_ON(!PageUptodate(page)); + } else if (ceph_caps_issued(ci) & + (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) { + page = find_get_page(inode->i_mapping, 0); + if (page) { + if (PageUptodate(page)) { + from_pagecache = true; + lock_page(page); + } else { + page_cache_release(page); + page = NULL; + } + } + } + + if (page) { + len = i_size_read(inode); + if (len > PAGE_CACHE_SIZE) + len = PAGE_CACHE_SIZE; + } else { + page = __page_cache_alloc(GFP_NOFS); + if (!page) { + err = -ENOMEM; + goto out; + } + err = __ceph_do_getattr(inode, page, + CEPH_STAT_CAP_INLINE_DATA, true); + if (err < 0) { + /* no inline data */ + if (err == -ENODATA) + err = 0; + goto out; + } + len = err; + } + + req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, + ceph_vino(inode), 0, &len, 0, 1, + CEPH_OSD_OP_CREATE, + CEPH_OSD_FLAG_ONDISK | CEPH_OSD_FLAG_WRITE, + ci->i_snap_realm->cached_context, + 0, 0, false); + if (IS_ERR(req)) { + err = PTR_ERR(req); + goto out; + } + + ceph_osdc_build_request(req, 0, NULL, CEPH_NOSNAP, &inode->i_mtime); + err = ceph_osdc_start_request(&fsc->client->osdc, req, false); + if (!err) + err = ceph_osdc_wait_request(&fsc->client->osdc, req); + ceph_osdc_put_request(req); + if (err < 0) + goto out; + + req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, + ceph_vino(inode), 0, &len, 1, 3, + CEPH_OSD_OP_WRITE, + CEPH_OSD_FLAG_ONDISK | CEPH_OSD_FLAG_WRITE, + ci->i_snap_realm->cached_context, + ci->i_truncate_seq, ci->i_truncate_size, + false); + if (IS_ERR(req)) { + err = PTR_ERR(req); + goto out; + } + + osd_req_op_extent_osd_data_pages(req, 1, &page, len, 0, false, false); + + err = osd_req_op_xattr_init(req, 0, CEPH_OSD_OP_CMPXATTR, + "inline_version", &inline_version, + sizeof(inline_version), + CEPH_OSD_CMPXATTR_OP_GT, + CEPH_OSD_CMPXATTR_MODE_U64); + if (err) + goto out_put; + + err = osd_req_op_xattr_init(req, 2, CEPH_OSD_OP_SETXATTR, + "inline_version", &inline_version, + sizeof(inline_version), 0, 0); + if (err) + goto out_put; + + ceph_osdc_build_request(req, 0, NULL, CEPH_NOSNAP, &inode->i_mtime); + err = ceph_osdc_start_request(&fsc->client->osdc, req, false); + if (!err) + err = ceph_osdc_wait_request(&fsc->client->osdc, req); +out_put: + ceph_osdc_put_request(req); + if (err == -ECANCELED) + err = 0; +out: + if (page && page != locked_page) { + if (from_pagecache) { + unlock_page(page); + page_cache_release(page); + } else + __free_pages(page, 0); + } + + dout("uninline_data %p %llx.%llx inline_version %llu = %d\n", + inode, ceph_vinop(inode), inline_version, err); + return err; +} + static struct vm_operations_struct ceph_vmops = { .fault = ceph_filemap_fault, .page_mkwrite = ceph_page_mkwrite, diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index cefca661464..b93c631c6c8 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -975,10 +975,12 @@ static int send_cap_msg(struct ceph_mds_session *session, kuid_t uid, kgid_t gid, umode_t mode, u64 xattr_version, struct ceph_buffer *xattrs_buf, - u64 follows) + u64 follows, bool inline_data) { struct ceph_mds_caps *fc; struct ceph_msg *msg; + void *p; + size_t extra_len; dout("send_cap_msg %s %llx %llx caps %s wanted %s dirty %s" " seq %u/%u mseq %u follows %lld size %llu/%llu" @@ -988,7 +990,10 @@ static int send_cap_msg(struct ceph_mds_session *session, seq, issue_seq, mseq, follows, size, max_size, xattr_version, xattrs_buf ? (int)xattrs_buf->vec.iov_len : 0); - msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPS, sizeof(*fc), GFP_NOFS, false); + /* flock buffer size + inline version + inline data size */ + extra_len = 4 + 8 + 4; + msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPS, sizeof(*fc) + extra_len, + GFP_NOFS, false); if (!msg) return -ENOMEM; @@ -1020,6 +1025,14 @@ static int send_cap_msg(struct ceph_mds_session *session, fc->gid = cpu_to_le32(from_kgid(&init_user_ns, gid)); fc->mode = cpu_to_le32(mode); + p = fc + 1; + /* flock buffer size */ + ceph_encode_32(&p, 0); + /* inline version */ + ceph_encode_64(&p, inline_data ? 0 : CEPH_INLINE_NONE); + /* inline data size */ + ceph_encode_32(&p, 0); + fc->xattr_version = cpu_to_le64(xattr_version); if (xattrs_buf) { msg->middle = ceph_buffer_get(xattrs_buf); @@ -1126,6 +1139,7 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap, u64 flush_tid = 0; int i; int ret; + bool inline_data; held = cap->issued | cap->implemented; revoking = cap->implemented & ~cap->issued; @@ -1209,13 +1223,15 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap, xattr_version = ci->i_xattrs.version; } + inline_data = ci->i_inline_version != CEPH_INLINE_NONE; + spin_unlock(&ci->i_ceph_lock); ret = send_cap_msg(session, ceph_vino(inode).ino, cap_id, op, keep, want, flushing, seq, flush_tid, issue_seq, mseq, size, max_size, &mtime, &atime, time_warp_seq, uid, gid, mode, xattr_version, xattr_blob, - follows); + follows, inline_data); if (ret < 0) { dout("error sending cap msg, must requeue %p\n", inode); delayed = 1; @@ -1336,7 +1352,7 @@ retry: capsnap->time_warp_seq, capsnap->uid, capsnap->gid, capsnap->mode, capsnap->xattr_version, capsnap->xattr_blob, - capsnap->follows); + capsnap->follows, capsnap->inline_data); next_follows = capsnap->follows + 1; ceph_put_cap_snap(capsnap); @@ -2057,15 +2073,17 @@ static void __take_cap_refs(struct ceph_inode_info *ci, int got) * requested from the MDS. */ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want, - int *got, loff_t endoff, int *check_max, int *err) + loff_t endoff, int *got, struct page **pinned_page, + int *check_max, int *err) { struct inode *inode = &ci->vfs_inode; int ret = 0; - int have, implemented; + int have, implemented, _got = 0; int file_wanted; dout("get_cap_refs %p need %s want %s\n", inode, ceph_cap_string(need), ceph_cap_string(want)); +again: spin_lock(&ci->i_ceph_lock); /* make sure file is actually open */ @@ -2075,7 +2093,7 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want, ceph_cap_string(need), ceph_cap_string(file_wanted)); *err = -EBADF; ret = 1; - goto out; + goto out_unlock; } /* finish pending truncate */ @@ -2095,7 +2113,7 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want, *check_max = 1; ret = 1; } - goto out; + goto out_unlock; } /* * If a sync write is in progress, we must wait, so that we @@ -2103,7 +2121,7 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want, */ if (__ceph_have_pending_cap_snap(ci)) { dout("get_cap_refs %p cap_snap_pending\n", inode); - goto out; + goto out_unlock; } } @@ -2120,18 +2138,50 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want, inode, ceph_cap_string(have), ceph_cap_string(not), ceph_cap_string(revoking)); if ((revoking & not) == 0) { - *got = need | (have & want); - __take_cap_refs(ci, *got); + _got = need | (have & want); + __take_cap_refs(ci, _got); ret = 1; } } else { dout("get_cap_refs %p have %s needed %s\n", inode, ceph_cap_string(have), ceph_cap_string(need)); } -out: +out_unlock: spin_unlock(&ci->i_ceph_lock); + + if (ci->i_inline_version != CEPH_INLINE_NONE && + (_got & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) && + i_size_read(inode) > 0) { + int ret1; + struct page *page = find_get_page(inode->i_mapping, 0); + if (page) { + if (PageUptodate(page)) { + *pinned_page = page; + goto out; + } + page_cache_release(page); + } + /* + * drop cap refs first because getattr while holding + * caps refs can cause deadlock. + */ + ceph_put_cap_refs(ci, _got); + _got = 0; + + /* getattr request will bring inline data into page cache */ + ret1 = __ceph_do_getattr(inode, NULL, + CEPH_STAT_CAP_INLINE_DATA, true); + if (ret1 >= 0) { + ret = 0; + goto again; + } + *err = ret1; + ret = 1; + } +out: dout("get_cap_refs %p ret %d got %s\n", inode, - ret, ceph_cap_string(*got)); + ret, ceph_cap_string(_got)); + *got = _got; return ret; } @@ -2168,8 +2218,8 @@ static void check_max_size(struct inode *inode, loff_t endoff) * due to a small max_size, make sure we check_max_size (and possibly * ask the mds) so we don't get hung up indefinitely. */ -int ceph_get_caps(struct ceph_inode_info *ci, int need, int want, int *got, - loff_t endoff) +int ceph_get_caps(struct ceph_inode_info *ci, int need, int want, + loff_t endoff, int *got, struct page **pinned_page) { int check_max, ret, err; @@ -2179,8 +2229,8 @@ retry: check_max = 0; err = 0; ret = wait_event_interruptible(ci->i_cap_wq, - try_get_cap_refs(ci, need, want, - got, endoff, + try_get_cap_refs(ci, need, want, endoff, + got, pinned_page, &check_max, &err)); if (err) ret = err; @@ -2383,6 +2433,8 @@ static void invalidate_aliases(struct inode *inode) static void handle_cap_grant(struct ceph_mds_client *mdsc, struct inode *inode, struct ceph_mds_caps *grant, void *snaptrace, int snaptrace_len, + u64 inline_version, + void *inline_data, int inline_len, struct ceph_buffer *xattr_buf, struct ceph_mds_session *session, struct ceph_cap *cap, int issued) @@ -2403,6 +2455,7 @@ static void handle_cap_grant(struct ceph_mds_client *mdsc, bool queue_invalidate = false; bool queue_revalidate = false; bool deleted_inode = false; + bool fill_inline = false; dout("handle_cap_grant inode %p cap %p mds%d seq %d %s\n", inode, cap, mds, seq, ceph_cap_string(newcaps)); @@ -2576,6 +2629,13 @@ static void handle_cap_grant(struct ceph_mds_client *mdsc, } BUG_ON(cap->issued & ~cap->implemented); + if (inline_version > 0 && inline_version >= ci->i_inline_version) { + ci->i_inline_version = inline_version; + if (ci->i_inline_version != CEPH_INLINE_NONE && + (newcaps & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO))) + fill_inline = true; + } + spin_unlock(&ci->i_ceph_lock); if (le32_to_cpu(grant->op) == CEPH_CAP_OP_IMPORT) { @@ -2589,6 +2649,9 @@ static void handle_cap_grant(struct ceph_mds_client *mdsc, wake = true; } + if (fill_inline) + ceph_fill_inline_data(inode, NULL, inline_data, inline_len); + if (queue_trunc) { ceph_queue_vmtruncate(inode); ceph_queue_revalidate(inode); @@ -2996,11 +3059,12 @@ void ceph_handle_caps(struct ceph_mds_session *session, u64 cap_id; u64 size, max_size; u64 tid; + u64 inline_version = 0; + void *inline_data = NULL; + u32 inline_len = 0; void *snaptrace; size_t snaptrace_len; - void *flock; - void *end; - u32 flock_len; + void *p, *end; dout("handle_caps from mds%d\n", mds); @@ -3021,30 +3085,37 @@ void ceph_handle_caps(struct ceph_mds_session *session, snaptrace = h + 1; snaptrace_len = le32_to_cpu(h->snap_trace_len); + p = snaptrace + snaptrace_len; if (le16_to_cpu(msg->hdr.version) >= 2) { - void *p = snaptrace + snaptrace_len; + u32 flock_len; ceph_decode_32_safe(&p, end, flock_len, bad); if (p + flock_len > end) goto bad; - flock = p; - } else { - flock = NULL; - flock_len = 0; + p += flock_len; } if (le16_to_cpu(msg->hdr.version) >= 3) { if (op == CEPH_CAP_OP_IMPORT) { - void *p = flock + flock_len; if (p + sizeof(*peer) > end) goto bad; peer = p; + p += sizeof(*peer); } else if (op == CEPH_CAP_OP_EXPORT) { /* recorded in unused fields */ peer = (void *)&h->size; } } + if (le16_to_cpu(msg->hdr.version) >= 4) { + ceph_decode_64_safe(&p, end, inline_version, bad); + ceph_decode_32_safe(&p, end, inline_len, bad); + if (p + inline_len > end) + goto bad; + inline_data = p; + p += inline_len; + } + /* lookup ino */ inode = ceph_find_inode(sb, vino); ci = ceph_inode(inode); @@ -3085,6 +3156,7 @@ void ceph_handle_caps(struct ceph_mds_session *session, handle_cap_import(mdsc, inode, h, peer, session, &cap, &issued); handle_cap_grant(mdsc, inode, h, snaptrace, snaptrace_len, + inline_version, inline_data, inline_len, msg->middle, session, cap, issued); goto done_unlocked; } @@ -3105,8 +3177,9 @@ void ceph_handle_caps(struct ceph_mds_session *session, case CEPH_CAP_OP_GRANT: __ceph_caps_issued(ci, &issued); issued |= __ceph_caps_dirty(ci); - handle_cap_grant(mdsc, inode, h, NULL, 0, msg->middle, - session, cap, issued); + handle_cap_grant(mdsc, inode, h, NULL, 0, + inline_version, inline_data, inline_len, + msg->middle, session, cap, issued); goto done_unlocked; case CEPH_CAP_OP_FLUSH_ACK: @@ -3137,8 +3210,7 @@ flush_cap_releases: done: mutex_unlock(&session->s_mutex); done_unlocked: - if (inode) - iput(inode); + iput(inode); return; bad: diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 681a8537b64..c241603764f 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -183,7 +183,7 @@ more: spin_unlock(&parent->d_lock); /* make sure a dentry wasn't dropped while we didn't have parent lock */ - if (!ceph_dir_is_complete(dir)) { + if (!ceph_dir_is_complete_ordered(dir)) { dout(" lost dir complete on %p; falling back to mds\n", dir); dput(dentry); err = -EAGAIN; @@ -261,10 +261,6 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx) /* always start with . and .. */ if (ctx->pos == 0) { - /* note dir version at start of readdir so we can tell - * if any dentries get dropped */ - fi->dir_release_count = atomic_read(&ci->i_release_count); - dout("readdir off 0 -> '.'\n"); if (!dir_emit(ctx, ".", 1, ceph_translate_ino(inode->i_sb, inode->i_ino), @@ -289,7 +285,7 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx) if ((ctx->pos == 2 || fi->dentry) && !ceph_test_mount_opt(fsc, NOASYNCREADDIR) && ceph_snap(inode) != CEPH_SNAPDIR && - __ceph_dir_is_complete(ci) && + __ceph_dir_is_complete_ordered(ci) && __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1)) { u32 shared_gen = ci->i_shared_gen; spin_unlock(&ci->i_ceph_lock); @@ -312,6 +308,13 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx) /* proceed with a normal readdir */ + if (ctx->pos == 2) { + /* note dir version at start of readdir so we can tell + * if any dentries get dropped */ + fi->dir_release_count = atomic_read(&ci->i_release_count); + fi->dir_ordered_count = ci->i_ordered_count; + } + more: /* do we have the correct frag content buffered? */ if (fi->frag != frag || fi->last_readdir == NULL) { @@ -446,8 +449,12 @@ more: */ spin_lock(&ci->i_ceph_lock); if (atomic_read(&ci->i_release_count) == fi->dir_release_count) { - dout(" marking %p complete\n", inode); - __ceph_dir_set_complete(ci, fi->dir_release_count); + if (ci->i_ordered_count == fi->dir_ordered_count) + dout(" marking %p complete and ordered\n", inode); + else + dout(" marking %p complete\n", inode); + __ceph_dir_set_complete(ci, fi->dir_release_count, + fi->dir_ordered_count); } spin_unlock(&ci->i_ceph_lock); @@ -805,7 +812,9 @@ static int ceph_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) acls.pagelist = NULL; } err = ceph_mdsc_do_request(mdsc, dir, req); - if (!err && !req->r_reply_info.head->is_dentry) + if (!err && + !req->r_reply_info.head->is_target && + !req->r_reply_info.head->is_dentry) err = ceph_handle_notrace_create(dir, dentry); ceph_mdsc_put_request(req); out: diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 9f8e3572040..ce74b394b49 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -333,6 +333,11 @@ int ceph_release(struct inode *inode, struct file *file) return 0; } +enum { + CHECK_EOF = 1, + READ_INLINE = 2, +}; + /* * Read a range of bytes striped over one or more objects. Iterate over * objects we stripe over. (That's not atomic, but good enough for now.) @@ -412,7 +417,7 @@ more: ret = read; /* did we bounce off eof? */ if (pos + left > inode->i_size) - *checkeof = 1; + *checkeof = CHECK_EOF; } dout("striped_read returns %d\n", ret); @@ -598,7 +603,7 @@ ceph_sync_direct_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos) snapc = ci->i_snap_realm->cached_context; vino = ceph_vino(inode); req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, - vino, pos, &len, + vino, pos, &len, 0, 2,/*include a 'startsync' command*/ CEPH_OSD_OP_WRITE, flags, snapc, ci->i_truncate_seq, @@ -609,6 +614,8 @@ ceph_sync_direct_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos) break; } + osd_req_op_init(req, 1, CEPH_OSD_OP_STARTSYNC); + n = iov_iter_get_pages_alloc(from, &pages, len, &start); if (unlikely(n < 0)) { ret = n; @@ -713,7 +720,7 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos) snapc = ci->i_snap_realm->cached_context; vino = ceph_vino(inode); req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, - vino, pos, &len, 1, + vino, pos, &len, 0, 1, CEPH_OSD_OP_WRITE, flags, snapc, ci->i_truncate_seq, ci->i_truncate_size, @@ -803,9 +810,10 @@ static ssize_t ceph_read_iter(struct kiocb *iocb, struct iov_iter *to) size_t len = iocb->ki_nbytes; struct inode *inode = file_inode(filp); struct ceph_inode_info *ci = ceph_inode(inode); + struct page *pinned_page = NULL; ssize_t ret; int want, got = 0; - int checkeof = 0, read = 0; + int retry_op = 0, read = 0; again: dout("aio_read %p %llx.%llx %llu~%u trying to get caps on %p\n", @@ -815,7 +823,7 @@ again: want = CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO; else want = CEPH_CAP_FILE_CACHE; - ret = ceph_get_caps(ci, CEPH_CAP_FILE_RD, want, &got, -1); + ret = ceph_get_caps(ci, CEPH_CAP_FILE_RD, want, -1, &got, &pinned_page); if (ret < 0) return ret; @@ -827,8 +835,12 @@ again: inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len, ceph_cap_string(got)); - /* hmm, this isn't really async... */ - ret = ceph_sync_read(iocb, to, &checkeof); + if (ci->i_inline_version == CEPH_INLINE_NONE) { + /* hmm, this isn't really async... */ + ret = ceph_sync_read(iocb, to, &retry_op); + } else { + retry_op = READ_INLINE; + } } else { dout("aio_read %p %llx.%llx %llu~%u got cap refs on %s\n", inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len, @@ -838,13 +850,55 @@ again: } dout("aio_read %p %llx.%llx dropping cap refs on %s = %d\n", inode, ceph_vinop(inode), ceph_cap_string(got), (int)ret); + if (pinned_page) { + page_cache_release(pinned_page); + pinned_page = NULL; + } ceph_put_cap_refs(ci, got); + if (retry_op && ret >= 0) { + int statret; + struct page *page = NULL; + loff_t i_size; + if (retry_op == READ_INLINE) { + page = __page_cache_alloc(GFP_NOFS); + if (!page) + return -ENOMEM; + } - if (checkeof && ret >= 0) { - int statret = ceph_do_getattr(inode, CEPH_STAT_CAP_SIZE, false); + statret = __ceph_do_getattr(inode, page, + CEPH_STAT_CAP_INLINE_DATA, !!page); + if (statret < 0) { + __free_page(page); + if (statret == -ENODATA) { + BUG_ON(retry_op != READ_INLINE); + goto again; + } + return statret; + } + + i_size = i_size_read(inode); + if (retry_op == READ_INLINE) { + /* does not support inline data > PAGE_SIZE */ + if (i_size > PAGE_CACHE_SIZE) { + ret = -EIO; + } else if (iocb->ki_pos < i_size) { + loff_t end = min_t(loff_t, i_size, + iocb->ki_pos + len); + if (statret < end) + zero_user_segment(page, statret, end); + ret = copy_page_to_iter(page, + iocb->ki_pos & ~PAGE_MASK, + end - iocb->ki_pos, to); + iocb->ki_pos += ret; + } else { + ret = 0; + } + __free_pages(page, 0); + return ret; + } /* hit EOF or hole? */ - if (statret == 0 && iocb->ki_pos < inode->i_size && + if (retry_op == CHECK_EOF && iocb->ki_pos < i_size && ret < len) { dout("sync_read hit hole, ppos %lld < size %lld" ", reading more\n", iocb->ki_pos, @@ -852,7 +906,7 @@ again: read += ret; len -= ret; - checkeof = 0; + retry_op = 0; goto again; } } @@ -909,6 +963,12 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from) if (err) goto out; + if (ci->i_inline_version != CEPH_INLINE_NONE) { + err = ceph_uninline_data(file, NULL); + if (err < 0) + goto out; + } + retry_snap: if (ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_FULL)) { err = -ENOSPC; @@ -922,7 +982,8 @@ retry_snap: else want = CEPH_CAP_FILE_BUFFER; got = 0; - err = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, &got, pos + count); + err = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, pos + count, + &got, NULL); if (err < 0) goto out; @@ -969,6 +1030,7 @@ retry_snap: if (written >= 0) { int dirty; spin_lock(&ci->i_ceph_lock); + ci->i_inline_version = CEPH_INLINE_NONE; dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR); spin_unlock(&ci->i_ceph_lock); if (dirty) @@ -1111,7 +1173,7 @@ static int ceph_zero_partial_object(struct inode *inode, req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, ceph_vino(inode), offset, length, - 1, op, + 0, 1, op, CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK, NULL, 0, 0, false); @@ -1214,6 +1276,12 @@ static long ceph_fallocate(struct file *file, int mode, goto unlock; } + if (ci->i_inline_version != CEPH_INLINE_NONE) { + ret = ceph_uninline_data(file, NULL); + if (ret < 0) + goto unlock; + } + size = i_size_read(inode); if (!(mode & FALLOC_FL_KEEP_SIZE)) endoff = offset + length; @@ -1223,7 +1291,7 @@ static long ceph_fallocate(struct file *file, int mode, else want = CEPH_CAP_FILE_BUFFER; - ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, &got, endoff); + ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, endoff, &got, NULL); if (ret < 0) goto unlock; @@ -1240,6 +1308,7 @@ static long ceph_fallocate(struct file *file, int mode, if (!ret) { spin_lock(&ci->i_ceph_lock); + ci->i_inline_version = CEPH_INLINE_NONE; dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR); spin_unlock(&ci->i_ceph_lock); if (dirty) diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index a5593d51d03..f61a74115be 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -387,8 +387,10 @@ struct inode *ceph_alloc_inode(struct super_block *sb) spin_lock_init(&ci->i_ceph_lock); ci->i_version = 0; + ci->i_inline_version = 0; ci->i_time_warp_seq = 0; ci->i_ceph_flags = 0; + ci->i_ordered_count = 0; atomic_set(&ci->i_release_count, 1); atomic_set(&ci->i_complete_count, 0); ci->i_symlink = NULL; @@ -657,7 +659,7 @@ void ceph_fill_file_time(struct inode *inode, int issued, * Populate an inode based on info from mds. May be called on new or * existing inodes. */ -static int fill_inode(struct inode *inode, +static int fill_inode(struct inode *inode, struct page *locked_page, struct ceph_mds_reply_info_in *iinfo, struct ceph_mds_reply_dirfrag *dirinfo, struct ceph_mds_session *session, @@ -675,6 +677,7 @@ static int fill_inode(struct inode *inode, bool wake = false; bool queue_trunc = false; bool new_version = false; + bool fill_inline = false; dout("fill_inode %p ino %llx.%llx v %llu had %llu\n", inode, ceph_vinop(inode), le64_to_cpu(info->version), @@ -845,7 +848,8 @@ static int fill_inode(struct inode *inode, (issued & CEPH_CAP_FILE_EXCL) == 0 && !__ceph_dir_is_complete(ci)) { dout(" marking %p complete (empty)\n", inode); - __ceph_dir_set_complete(ci, atomic_read(&ci->i_release_count)); + __ceph_dir_set_complete(ci, atomic_read(&ci->i_release_count), + ci->i_ordered_count); } /* were we issued a capability? */ @@ -873,8 +877,23 @@ static int fill_inode(struct inode *inode, ceph_vinop(inode)); __ceph_get_fmode(ci, cap_fmode); } + + if (iinfo->inline_version > 0 && + iinfo->inline_version >= ci->i_inline_version) { + int cache_caps = CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO; + ci->i_inline_version = iinfo->inline_version; + if (ci->i_inline_version != CEPH_INLINE_NONE && + (locked_page || + (le32_to_cpu(info->cap.caps) & cache_caps))) + fill_inline = true; + } + spin_unlock(&ci->i_ceph_lock); + if (fill_inline) + ceph_fill_inline_data(inode, locked_page, + iinfo->inline_data, iinfo->inline_len); + if (wake) wake_up_all(&ci->i_cap_wq); @@ -1062,7 +1081,8 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, struct inode *dir = req->r_locked_dir; if (dir) { - err = fill_inode(dir, &rinfo->diri, rinfo->dirfrag, + err = fill_inode(dir, NULL, + &rinfo->diri, rinfo->dirfrag, session, req->r_request_started, -1, &req->r_caps_reservation); if (err < 0) @@ -1132,7 +1152,7 @@ retry_lookup: } req->r_target_inode = in; - err = fill_inode(in, &rinfo->targeti, NULL, + err = fill_inode(in, req->r_locked_page, &rinfo->targeti, NULL, session, req->r_request_started, (!req->r_aborted && rinfo->head->result == 0) ? req->r_fmode : -1, @@ -1204,8 +1224,8 @@ retry_lookup: ceph_invalidate_dentry_lease(dn); /* d_move screws up sibling dentries' offsets */ - ceph_dir_clear_complete(dir); - ceph_dir_clear_complete(olddir); + ceph_dir_clear_ordered(dir); + ceph_dir_clear_ordered(olddir); dout("dn %p gets new offset %lld\n", req->r_old_dentry, ceph_dentry(req->r_old_dentry)->offset); @@ -1217,6 +1237,7 @@ retry_lookup: if (!rinfo->head->is_target) { dout("fill_trace null dentry\n"); if (dn->d_inode) { + ceph_dir_clear_ordered(dir); dout("d_delete %p\n", dn); d_delete(dn); } else { @@ -1233,7 +1254,7 @@ retry_lookup: /* attach proper inode */ if (!dn->d_inode) { - ceph_dir_clear_complete(dir); + ceph_dir_clear_ordered(dir); ihold(in); dn = splice_dentry(dn, in, &have_lease); if (IS_ERR(dn)) { @@ -1263,7 +1284,7 @@ retry_lookup: BUG_ON(!dir); BUG_ON(ceph_snap(dir) != CEPH_SNAPDIR); dout(" linking snapped dir %p to dn %p\n", in, dn); - ceph_dir_clear_complete(dir); + ceph_dir_clear_ordered(dir); ihold(in); dn = splice_dentry(dn, in, NULL); if (IS_ERR(dn)) { @@ -1300,7 +1321,7 @@ static int readdir_prepopulate_inodes_only(struct ceph_mds_request *req, dout("new_inode badness got %d\n", err); continue; } - rc = fill_inode(in, &rinfo->dir_in[i], NULL, session, + rc = fill_inode(in, NULL, &rinfo->dir_in[i], NULL, session, req->r_request_started, -1, &req->r_caps_reservation); if (rc < 0) { @@ -1416,7 +1437,7 @@ retry_lookup: } } - if (fill_inode(in, &rinfo->dir_in[i], NULL, session, + if (fill_inode(in, NULL, &rinfo->dir_in[i], NULL, session, req->r_request_started, -1, &req->r_caps_reservation) < 0) { pr_err("fill_inode badness on %p\n", in); @@ -1899,7 +1920,8 @@ out_put: * Verify that we have a lease on the given mask. If not, * do a getattr against an mds. */ -int ceph_do_getattr(struct inode *inode, int mask, bool force) +int __ceph_do_getattr(struct inode *inode, struct page *locked_page, + int mask, bool force) { struct ceph_fs_client *fsc = ceph_sb_to_client(inode->i_sb); struct ceph_mds_client *mdsc = fsc->mdsc; @@ -1911,7 +1933,8 @@ int ceph_do_getattr(struct inode *inode, int mask, bool force) return 0; } - dout("do_getattr inode %p mask %s mode 0%o\n", inode, ceph_cap_string(mask), inode->i_mode); + dout("do_getattr inode %p mask %s mode 0%o\n", + inode, ceph_cap_string(mask), inode->i_mode); if (!force && ceph_caps_issued_mask(ceph_inode(inode), mask, 1)) return 0; @@ -1922,7 +1945,19 @@ int ceph_do_getattr(struct inode *inode, int mask, bool force) ihold(inode); req->r_num_caps = 1; req->r_args.getattr.mask = cpu_to_le32(mask); + req->r_locked_page = locked_page; err = ceph_mdsc_do_request(mdsc, NULL, req); + if (locked_page && err == 0) { + u64 inline_version = req->r_reply_info.targeti.inline_version; + if (inline_version == 0) { + /* the reply is supposed to contain inline data */ + err = -EINVAL; + } else if (inline_version == CEPH_INLINE_NONE) { + err = -ENODATA; + } else { + err = req->r_reply_info.targeti.inline_len; + } + } ceph_mdsc_put_request(req); dout("do_getattr result=%d\n", err); return err; diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c index fbc39c47bac..c35c5c614e3 100644 --- a/fs/ceph/locks.c +++ b/fs/ceph/locks.c @@ -9,6 +9,8 @@ #include <linux/ceph/pagelist.h> static u64 lock_secret; +static int ceph_lock_wait_for_completion(struct ceph_mds_client *mdsc, + struct ceph_mds_request *req); static inline u64 secure_addr(void *addr) { @@ -40,6 +42,9 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file, u64 length = 0; u64 owner; + if (operation != CEPH_MDS_OP_SETFILELOCK || cmd == CEPH_LOCK_UNLOCK) + wait = 0; + req = ceph_mdsc_create_request(mdsc, operation, USE_AUTH_MDS); if (IS_ERR(req)) return PTR_ERR(req); @@ -68,6 +73,9 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file, req->r_args.filelock_change.length = cpu_to_le64(length); req->r_args.filelock_change.wait = wait; + if (wait) + req->r_wait_for_completion = ceph_lock_wait_for_completion; + err = ceph_mdsc_do_request(mdsc, inode, req); if (operation == CEPH_MDS_OP_GETFILELOCK) { @@ -96,6 +104,52 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file, return err; } +static int ceph_lock_wait_for_completion(struct ceph_mds_client *mdsc, + struct ceph_mds_request *req) +{ + struct ceph_mds_request *intr_req; + struct inode *inode = req->r_inode; + int err, lock_type; + + BUG_ON(req->r_op != CEPH_MDS_OP_SETFILELOCK); + if (req->r_args.filelock_change.rule == CEPH_LOCK_FCNTL) + lock_type = CEPH_LOCK_FCNTL_INTR; + else if (req->r_args.filelock_change.rule == CEPH_LOCK_FLOCK) + lock_type = CEPH_LOCK_FLOCK_INTR; + else + BUG_ON(1); + BUG_ON(req->r_args.filelock_change.type == CEPH_LOCK_UNLOCK); + + err = wait_for_completion_interruptible(&req->r_completion); + if (!err) + return 0; + + dout("ceph_lock_wait_for_completion: request %llu was interrupted\n", + req->r_tid); + + intr_req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SETFILELOCK, + USE_AUTH_MDS); + if (IS_ERR(intr_req)) + return PTR_ERR(intr_req); + + intr_req->r_inode = inode; + ihold(inode); + intr_req->r_num_caps = 1; + + intr_req->r_args.filelock_change = req->r_args.filelock_change; + intr_req->r_args.filelock_change.rule = lock_type; + intr_req->r_args.filelock_change.type = CEPH_LOCK_UNLOCK; + + err = ceph_mdsc_do_request(mdsc, inode, intr_req); + ceph_mdsc_put_request(intr_req); + + if (err && err != -ERESTARTSYS) + return err; + + wait_for_completion(&req->r_completion); + return 0; +} + /** * Attempt to set an fcntl lock. * For now, this just goes away to the server. Later it may be more awesome. @@ -143,11 +197,6 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl) err); } } - - } else if (err == -ERESTARTSYS) { - dout("undoing lock\n"); - ceph_lock_message(CEPH_LOCK_FCNTL, op, file, - CEPH_LOCK_UNLOCK, 0, fl); } return err; } @@ -186,11 +235,6 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl) file, CEPH_LOCK_UNLOCK, 0, fl); dout("got %d on flock_lock_file_wait, undid lock", err); } - } else if (err == -ERESTARTSYS) { - dout("undoing lock\n"); - ceph_lock_message(CEPH_LOCK_FLOCK, - CEPH_MDS_OP_SETFILELOCK, - file, CEPH_LOCK_UNLOCK, 0, fl); } return err; } diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index a92d3f5c6c1..d2171f4a698 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -89,6 +89,16 @@ static int parse_reply_info_in(void **p, void *end, ceph_decode_need(p, end, info->xattr_len, bad); info->xattr_data = *p; *p += info->xattr_len; + + if (features & CEPH_FEATURE_MDS_INLINE_DATA) { + ceph_decode_64_safe(p, end, info->inline_version, bad); + ceph_decode_32_safe(p, end, info->inline_len, bad); + ceph_decode_need(p, end, info->inline_len, bad); + info->inline_data = *p; + *p += info->inline_len; + } else + info->inline_version = CEPH_INLINE_NONE; + return 0; bad: return err; @@ -524,8 +534,7 @@ void ceph_mdsc_release_request(struct kref *kref) } if (req->r_locked_dir) ceph_put_cap_refs(ceph_inode(req->r_locked_dir), CEPH_CAP_PIN); - if (req->r_target_inode) - iput(req->r_target_inode); + iput(req->r_target_inode); if (req->r_dentry) dput(req->r_dentry); if (req->r_old_dentry) @@ -861,8 +870,11 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6 /* * Serialize client metadata into waiting buffer space, using * the format that userspace expects for map<string, string> + * + * ClientSession messages with metadata are v2 */ - msg->hdr.version = 2; /* ClientSession messages with metadata are v2 */ + msg->hdr.version = cpu_to_le16(2); + msg->hdr.compat_version = cpu_to_le16(1); /* The write pointer, following the session_head structure */ p = msg->front.iov_base + sizeof(*h); @@ -1066,8 +1078,7 @@ out: session->s_cap_iterator = NULL; spin_unlock(&session->s_cap_lock); - if (last_inode) - iput(last_inode); + iput(last_inode); if (old_cap) ceph_put_cap(session->s_mdsc, old_cap); @@ -1874,7 +1885,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc, goto out_free2; } - msg->hdr.version = 2; + msg->hdr.version = cpu_to_le16(2); msg->hdr.tid = cpu_to_le64(req->r_tid); head = msg->front.iov_base; @@ -2208,6 +2219,8 @@ int ceph_mdsc_do_request(struct ceph_mds_client *mdsc, &req->r_completion, req->r_timeout); if (err == 0) err = -EIO; + } else if (req->r_wait_for_completion) { + err = req->r_wait_for_completion(mdsc, req); } else { err = wait_for_completion_killable(&req->r_completion); } @@ -3744,6 +3757,20 @@ static struct ceph_msg *mds_alloc_msg(struct ceph_connection *con, return msg; } +static int sign_message(struct ceph_connection *con, struct ceph_msg *msg) +{ + struct ceph_mds_session *s = con->private; + struct ceph_auth_handshake *auth = &s->s_auth; + return ceph_auth_sign_message(auth, msg); +} + +static int check_message_signature(struct ceph_connection *con, struct ceph_msg *msg) +{ + struct ceph_mds_session *s = con->private; + struct ceph_auth_handshake *auth = &s->s_auth; + return ceph_auth_check_message_signature(auth, msg); +} + static const struct ceph_connection_operations mds_con_ops = { .get = con_get, .put = con_put, @@ -3753,6 +3780,8 @@ static const struct ceph_connection_operations mds_con_ops = { .invalidate_authorizer = invalidate_authorizer, .peer_reset = peer_reset, .alloc_msg = mds_alloc_msg, + .sign_message = sign_message, + .check_message_signature = check_message_signature, }; /* eof */ diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index 3288359353e..e2817d00f7d 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -41,6 +41,9 @@ struct ceph_mds_reply_info_in { char *symlink; u32 xattr_len; char *xattr_data; + u64 inline_version; + u32 inline_len; + char *inline_data; }; /* @@ -166,6 +169,11 @@ struct ceph_mds_client; */ typedef void (*ceph_mds_request_callback_t) (struct ceph_mds_client *mdsc, struct ceph_mds_request *req); +/* + * wait for request completion callback + */ +typedef int (*ceph_mds_request_wait_callback_t) (struct ceph_mds_client *mdsc, + struct ceph_mds_request *req); /* * an in-flight mds request @@ -215,6 +223,7 @@ struct ceph_mds_request { int r_request_release_offset; struct ceph_msg *r_reply; struct ceph_mds_reply_info_parsed r_reply_info; + struct page *r_locked_page; int r_err; bool r_aborted; @@ -239,6 +248,7 @@ struct ceph_mds_request { struct completion r_completion; struct completion r_safe_completion; ceph_mds_request_callback_t r_callback; + ceph_mds_request_wait_callback_t r_wait_for_completion; struct list_head r_unsafe_item; /* per-session unsafe list item */ bool r_got_unsafe, r_got_safe, r_got_result; diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c index f01645a2775..ce35fbd4ba5 100644 --- a/fs/ceph/snap.c +++ b/fs/ceph/snap.c @@ -288,6 +288,9 @@ static int cmpu64_rev(const void *a, const void *b) return 0; } + +static struct ceph_snap_context *empty_snapc; + /* * build the snap context for a given realm. */ @@ -328,6 +331,12 @@ static int build_snap_context(struct ceph_snap_realm *realm) return 0; } + if (num == 0 && realm->seq == empty_snapc->seq) { + ceph_get_snap_context(empty_snapc); + snapc = empty_snapc; + goto done; + } + /* alloc new snap context */ err = -ENOMEM; if (num > (SIZE_MAX - sizeof(*snapc)) / sizeof(u64)) @@ -365,8 +374,8 @@ static int build_snap_context(struct ceph_snap_realm *realm) realm->ino, realm, snapc, snapc->seq, (unsigned int) snapc->num_snaps); - if (realm->cached_context) - ceph_put_snap_context(realm->cached_context); +done: + ceph_put_snap_context(realm->cached_context); realm->cached_context = snapc; return 0; @@ -466,6 +475,9 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci) cap_snap. lucky us. */ dout("queue_cap_snap %p already pending\n", inode); kfree(capsnap); + } else if (ci->i_snap_realm->cached_context == empty_snapc) { + dout("queue_cap_snap %p empty snapc\n", inode); + kfree(capsnap); } else if (dirty & (CEPH_CAP_AUTH_EXCL|CEPH_CAP_XATTR_EXCL| CEPH_CAP_FILE_EXCL|CEPH_CAP_FILE_WR)) { struct ceph_snap_context *snapc = ci->i_head_snapc; @@ -504,6 +516,8 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci) capsnap->xattr_version = 0; } + capsnap->inline_data = ci->i_inline_version != CEPH_INLINE_NONE; + /* dirty page count moved from _head to this cap_snap; all subsequent writes page dirties occur _after_ this snapshot. */ @@ -590,15 +604,13 @@ static void queue_realm_cap_snaps(struct ceph_snap_realm *realm) if (!inode) continue; spin_unlock(&realm->inodes_with_caps_lock); - if (lastinode) - iput(lastinode); + iput(lastinode); lastinode = inode; ceph_queue_cap_snap(ci); spin_lock(&realm->inodes_with_caps_lock); } spin_unlock(&realm->inodes_with_caps_lock); - if (lastinode) - iput(lastinode); + iput(lastinode); list_for_each_entry(child, &realm->children, child_item) { dout("queue_realm_cap_snaps %p %llx queue child %p %llx\n", @@ -928,5 +940,16 @@ out: return; } +int __init ceph_snap_init(void) +{ + empty_snapc = ceph_create_snap_context(0, GFP_NOFS); + if (!empty_snapc) + return -ENOMEM; + empty_snapc->seq = 1; + return 0; +} - +void ceph_snap_exit(void) +{ + ceph_put_snap_context(empty_snapc); +} diff --git a/fs/ceph/super.c b/fs/ceph/super.c index f6e12377335..50f06cddc94 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -515,7 +515,8 @@ static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt, struct ceph_fs_client *fsc; const u64 supported_features = CEPH_FEATURE_FLOCK | - CEPH_FEATURE_DIRLAYOUTHASH; + CEPH_FEATURE_DIRLAYOUTHASH | + CEPH_FEATURE_MDS_INLINE_DATA; const u64 required_features = 0; int page_count; size_t size; @@ -1017,9 +1018,6 @@ static struct file_system_type ceph_fs_type = { }; MODULE_ALIAS_FS("ceph"); -#define _STRINGIFY(x) #x -#define STRINGIFY(x) _STRINGIFY(x) - static int __init init_ceph(void) { int ret = init_caches(); @@ -1028,15 +1026,20 @@ static int __init init_ceph(void) ceph_flock_init(); ceph_xattr_init(); + ret = ceph_snap_init(); + if (ret) + goto out_xattr; ret = register_filesystem(&ceph_fs_type); if (ret) - goto out_icache; + goto out_snap; pr_info("loaded (mds proto %d)\n", CEPH_MDSC_PROTOCOL); return 0; -out_icache: +out_snap: + ceph_snap_exit(); +out_xattr: ceph_xattr_exit(); destroy_caches(); out: @@ -1047,6 +1050,7 @@ static void __exit exit_ceph(void) { dout("exit_ceph\n"); unregister_filesystem(&ceph_fs_type); + ceph_snap_exit(); ceph_xattr_exit(); destroy_caches(); } diff --git a/fs/ceph/super.h b/fs/ceph/super.h index b82f507979b..e1aa32d0759 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -161,6 +161,7 @@ struct ceph_cap_snap { u64 time_warp_seq; int writing; /* a sync write is still in progress */ int dirty_pages; /* dirty pages awaiting writeback */ + bool inline_data; }; static inline void ceph_put_cap_snap(struct ceph_cap_snap *capsnap) @@ -253,9 +254,11 @@ struct ceph_inode_info { spinlock_t i_ceph_lock; u64 i_version; + u64 i_inline_version; u32 i_time_warp_seq; unsigned i_ceph_flags; + int i_ordered_count; atomic_t i_release_count; atomic_t i_complete_count; @@ -434,14 +437,19 @@ static inline struct inode *ceph_find_inode(struct super_block *sb, /* * Ceph inode. */ -#define CEPH_I_NODELAY 4 /* do not delay cap release */ -#define CEPH_I_FLUSH 8 /* do not delay flush of dirty metadata */ -#define CEPH_I_NOFLUSH 16 /* do not flush dirty caps */ +#define CEPH_I_DIR_ORDERED 1 /* dentries in dir are ordered */ +#define CEPH_I_NODELAY 4 /* do not delay cap release */ +#define CEPH_I_FLUSH 8 /* do not delay flush of dirty metadata */ +#define CEPH_I_NOFLUSH 16 /* do not flush dirty caps */ static inline void __ceph_dir_set_complete(struct ceph_inode_info *ci, - int release_count) + int release_count, int ordered_count) { atomic_set(&ci->i_complete_count, release_count); + if (ci->i_ordered_count == ordered_count) + ci->i_ceph_flags |= CEPH_I_DIR_ORDERED; + else + ci->i_ceph_flags &= ~CEPH_I_DIR_ORDERED; } static inline void __ceph_dir_clear_complete(struct ceph_inode_info *ci) @@ -455,16 +463,35 @@ static inline bool __ceph_dir_is_complete(struct ceph_inode_info *ci) atomic_read(&ci->i_release_count); } +static inline bool __ceph_dir_is_complete_ordered(struct ceph_inode_info *ci) +{ + return __ceph_dir_is_complete(ci) && + (ci->i_ceph_flags & CEPH_I_DIR_ORDERED); +} + static inline void ceph_dir_clear_complete(struct inode *inode) { __ceph_dir_clear_complete(ceph_inode(inode)); } -static inline bool ceph_dir_is_complete(struct inode *inode) +static inline void ceph_dir_clear_ordered(struct inode *inode) { - return __ceph_dir_is_complete(ceph_inode(inode)); + struct ceph_inode_info *ci = ceph_inode(inode); + spin_lock(&ci->i_ceph_lock); + ci->i_ordered_count++; + ci->i_ceph_flags &= ~CEPH_I_DIR_ORDERED; + spin_unlock(&ci->i_ceph_lock); } +static inline bool ceph_dir_is_complete_ordered(struct inode *inode) +{ + struct ceph_inode_info *ci = ceph_inode(inode); + bool ret; + spin_lock(&ci->i_ceph_lock); + ret = __ceph_dir_is_complete_ordered(ci); + spin_unlock(&ci->i_ceph_lock); + return ret; +} /* find a specific frag @f */ extern struct ceph_inode_frag *__ceph_find_frag(struct ceph_inode_info *ci, @@ -580,6 +607,7 @@ struct ceph_file_info { char *last_name; /* last entry in previous chunk */ struct dentry *dentry; /* next dentry (for dcache readdir) */ int dir_release_count; + int dir_ordered_count; /* used for -o dirstat read() on directory thing */ char *dir_info; @@ -673,6 +701,8 @@ extern void ceph_queue_cap_snap(struct ceph_inode_info *ci); extern int __ceph_finish_cap_snap(struct ceph_inode_info *ci, struct ceph_cap_snap *capsnap); extern void ceph_cleanup_empty_realms(struct ceph_mds_client *mdsc); +extern int ceph_snap_init(void); +extern void ceph_snap_exit(void); /* * a cap_snap is "pending" if it is still awaiting an in-progress @@ -715,7 +745,12 @@ extern void ceph_queue_vmtruncate(struct inode *inode); extern void ceph_queue_invalidate(struct inode *inode); extern void ceph_queue_writeback(struct inode *inode); -extern int ceph_do_getattr(struct inode *inode, int mask, bool force); +extern int __ceph_do_getattr(struct inode *inode, struct page *locked_page, + int mask, bool force); +static inline int ceph_do_getattr(struct inode *inode, int mask, bool force) +{ + return __ceph_do_getattr(inode, NULL, mask, force); +} extern int ceph_permission(struct inode *inode, int mask); extern int ceph_setattr(struct dentry *dentry, struct iattr *attr); extern int ceph_getattr(struct vfsmount *mnt, struct dentry *dentry, @@ -830,7 +865,7 @@ extern int ceph_encode_dentry_release(void **p, struct dentry *dn, int mds, int drop, int unless); extern int ceph_get_caps(struct ceph_inode_info *ci, int need, int want, - int *got, loff_t endoff); + loff_t endoff, int *got, struct page **pinned_page); /* for counting open files by mode */ static inline void __ceph_get_fmode(struct ceph_inode_info *ci, int mode) @@ -852,7 +887,9 @@ extern int ceph_atomic_open(struct inode *dir, struct dentry *dentry, struct file *file, unsigned flags, umode_t mode, int *opened); extern int ceph_release(struct inode *inode, struct file *filp); - +extern void ceph_fill_inline_data(struct inode *inode, struct page *locked_page, + char *data, size_t len); +int ceph_uninline_data(struct file *filp, struct page *locked_page); /* dir.c */ extern const struct file_operations ceph_dir_fops; extern const struct inode_operations ceph_dir_iops; diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index 678b0d2bbbc..5a492caf34c 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c @@ -854,7 +854,7 @@ static int ceph_sync_setxattr(struct dentry *dentry, const char *name, struct ceph_pagelist *pagelist = NULL; int err; - if (value) { + if (size > 0) { /* copy value into pagelist */ pagelist = kmalloc(sizeof(*pagelist), GFP_NOFS); if (!pagelist) @@ -864,7 +864,7 @@ static int ceph_sync_setxattr(struct dentry *dentry, const char *name, err = ceph_pagelist_append(pagelist, value, size); if (err) goto out; - } else { + } else if (!value) { flags |= CEPH_XATTR_REMOVE; } @@ -1001,6 +1001,9 @@ int ceph_setxattr(struct dentry *dentry, const char *name, if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN)) return generic_setxattr(dentry, name, value, size, flags); + if (size == 0) + value = ""; /* empty EA, do not remove */ + return __ceph_setxattr(dentry, name, value, size, flags); } diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c index 966ace8b243..28d0c7abba1 100644 --- a/fs/fuse/cuse.c +++ b/fs/fuse/cuse.c @@ -415,7 +415,7 @@ err_unlock: err_region: unregister_chrdev_region(devt, 1); err: - fuse_conn_kill(fc); + fuse_abort_conn(fc); goto out; } diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index ca887314aba..ba1107977f2 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -511,6 +511,35 @@ void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req) } EXPORT_SYMBOL_GPL(fuse_request_send); +ssize_t fuse_simple_request(struct fuse_conn *fc, struct fuse_args *args) +{ + struct fuse_req *req; + ssize_t ret; + + req = fuse_get_req(fc, 0); + if (IS_ERR(req)) + return PTR_ERR(req); + + req->in.h.opcode = args->in.h.opcode; + req->in.h.nodeid = args->in.h.nodeid; + req->in.numargs = args->in.numargs; + memcpy(req->in.args, args->in.args, + args->in.numargs * sizeof(struct fuse_in_arg)); + req->out.argvar = args->out.argvar; + req->out.numargs = args->out.numargs; + memcpy(req->out.args, args->out.args, + args->out.numargs * sizeof(struct fuse_arg)); + fuse_request_send(fc, req); + ret = req->out.h.error; + if (!ret && args->out.argvar) { + BUG_ON(args->out.numargs != 1); + ret = req->out.args[0].size; + } + fuse_put_request(fc, req); + + return ret; +} + static void fuse_request_send_nowait_locked(struct fuse_conn *fc, struct fuse_req *req) { diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index df562cc8776..252b8a5de8b 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -145,22 +145,22 @@ static void fuse_invalidate_entry(struct dentry *entry) fuse_invalidate_entry_cache(entry); } -static void fuse_lookup_init(struct fuse_conn *fc, struct fuse_req *req, +static void fuse_lookup_init(struct fuse_conn *fc, struct fuse_args *args, u64 nodeid, struct qstr *name, struct fuse_entry_out *outarg) { memset(outarg, 0, sizeof(struct fuse_entry_out)); - req->in.h.opcode = FUSE_LOOKUP; - req->in.h.nodeid = nodeid; - req->in.numargs = 1; - req->in.args[0].size = name->len + 1; - req->in.args[0].value = name->name; - req->out.numargs = 1; + args->in.h.opcode = FUSE_LOOKUP; + args->in.h.nodeid = nodeid; + args->in.numargs = 1; + args->in.args[0].size = name->len + 1; + args->in.args[0].value = name->name; + args->out.numargs = 1; if (fc->minor < 9) - req->out.args[0].size = FUSE_COMPAT_ENTRY_OUT_SIZE; + args->out.args[0].size = FUSE_COMPAT_ENTRY_OUT_SIZE; else - req->out.args[0].size = sizeof(struct fuse_entry_out); - req->out.args[0].value = outarg; + args->out.args[0].size = sizeof(struct fuse_entry_out); + args->out.args[0].value = outarg; } u64 fuse_get_attr_version(struct fuse_conn *fc) @@ -200,9 +200,8 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags) goto invalid; else if (time_before64(fuse_dentry_time(entry), get_jiffies_64()) || (flags & LOOKUP_REVAL)) { - int err; struct fuse_entry_out outarg; - struct fuse_req *req; + FUSE_ARGS(args); struct fuse_forget_link *forget; u64 attr_version; @@ -215,31 +214,23 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags) goto out; fc = get_fuse_conn(inode); - req = fuse_get_req_nopages(fc); - ret = PTR_ERR(req); - if (IS_ERR(req)) - goto out; forget = fuse_alloc_forget(); - if (!forget) { - fuse_put_request(fc, req); - ret = -ENOMEM; + ret = -ENOMEM; + if (!forget) goto out; - } attr_version = fuse_get_attr_version(fc); parent = dget_parent(entry); - fuse_lookup_init(fc, req, get_node_id(parent->d_inode), + fuse_lookup_init(fc, &args, get_node_id(parent->d_inode), &entry->d_name, &outarg); - fuse_request_send(fc, req); + ret = fuse_simple_request(fc, &args); dput(parent); - err = req->out.h.error; - fuse_put_request(fc, req); /* Zero nodeid is same as -ENOENT */ - if (!err && !outarg.nodeid) - err = -ENOENT; - if (!err) { + if (!ret && !outarg.nodeid) + ret = -ENOENT; + if (!ret) { fi = get_fuse_inode(inode); if (outarg.nodeid != get_node_id(inode)) { fuse_queue_forget(fc, forget, outarg.nodeid, 1); @@ -250,7 +241,9 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags) spin_unlock(&fc->lock); } kfree(forget); - if (err || (outarg.attr.mode ^ inode->i_mode) & S_IFMT) + if (ret == -ENOMEM) + goto out; + if (ret || (outarg.attr.mode ^ inode->i_mode) & S_IFMT) goto invalid; fuse_change_attributes(inode, &outarg.attr, @@ -296,7 +289,7 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name, struct fuse_entry_out *outarg, struct inode **inode) { struct fuse_conn *fc = get_fuse_conn_super(sb); - struct fuse_req *req; + FUSE_ARGS(args); struct fuse_forget_link *forget; u64 attr_version; int err; @@ -306,24 +299,16 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name, if (name->len > FUSE_NAME_MAX) goto out; - req = fuse_get_req_nopages(fc); - err = PTR_ERR(req); - if (IS_ERR(req)) - goto out; forget = fuse_alloc_forget(); err = -ENOMEM; - if (!forget) { - fuse_put_request(fc, req); + if (!forget) goto out; - } attr_version = fuse_get_attr_version(fc); - fuse_lookup_init(fc, req, nodeid, name, outarg); - fuse_request_send(fc, req); - err = req->out.h.error; - fuse_put_request(fc, req); + fuse_lookup_init(fc, &args, nodeid, name, outarg); + err = fuse_simple_request(fc, &args); /* Zero nodeid is same as -ENOENT, but with valid timeout */ if (err || !outarg->nodeid) goto out_put_forget; @@ -405,7 +390,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int err; struct inode *inode; struct fuse_conn *fc = get_fuse_conn(dir); - struct fuse_req *req; + FUSE_ARGS(args); struct fuse_forget_link *forget; struct fuse_create_in inarg; struct fuse_open_out outopen; @@ -420,15 +405,10 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, if (!forget) goto out_err; - req = fuse_get_req_nopages(fc); - err = PTR_ERR(req); - if (IS_ERR(req)) - goto out_put_forget_req; - err = -ENOMEM; ff = fuse_file_alloc(fc); if (!ff) - goto out_put_request; + goto out_put_forget_req; if (!fc->dont_mask) mode &= ~current_umask(); @@ -439,24 +419,23 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, inarg.flags = flags; inarg.mode = mode; inarg.umask = current_umask(); - req->in.h.opcode = FUSE_CREATE; - req->in.h.nodeid = get_node_id(dir); - req->in.numargs = 2; - req->in.args[0].size = fc->minor < 12 ? sizeof(struct fuse_open_in) : + args.in.h.opcode = FUSE_CREATE; + args.in.h.nodeid = get_node_id(dir); + args.in.numargs = 2; + args.in.args[0].size = fc->minor < 12 ? sizeof(struct fuse_open_in) : sizeof(inarg); - req->in.args[0].value = &inarg; - req->in.args[1].size = entry->d_name.len + 1; - req->in.args[1].value = entry->d_name.name; - req->out.numargs = 2; + args.in.args[0].value = &inarg; + args.in.args[1].size = entry->d_name.len + 1; + args.in.args[1].value = entry->d_name.name; + args.out.numargs = 2; if (fc->minor < 9) - req->out.args[0].size = FUSE_COMPAT_ENTRY_OUT_SIZE; + args.out.args[0].size = FUSE_COMPAT_ENTRY_OUT_SIZE; else - req->out.args[0].size = sizeof(outentry); - req->out.args[0].value = &outentry; - req->out.args[1].size = sizeof(outopen); - req->out.args[1].value = &outopen; - fuse_request_send(fc, req); - err = req->out.h.error; + args.out.args[0].size = sizeof(outentry); + args.out.args[0].value = &outentry; + args.out.args[1].size = sizeof(outopen); + args.out.args[1].value = &outopen; + err = fuse_simple_request(fc, &args); if (err) goto out_free_ff; @@ -464,7 +443,6 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, if (!S_ISREG(outentry.attr.mode) || invalid_nodeid(outentry.nodeid)) goto out_free_ff; - fuse_put_request(fc, req); ff->fh = outopen.fh; ff->nodeid = outentry.nodeid; ff->open_flags = outopen.open_flags; @@ -492,8 +470,6 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, out_free_ff: fuse_file_free(ff); -out_put_request: - fuse_put_request(fc, req); out_put_forget_req: kfree(forget); out_err: @@ -547,7 +523,7 @@ no_open: /* * Code shared between mknod, mkdir, symlink and link */ -static int create_new_entry(struct fuse_conn *fc, struct fuse_req *req, +static int create_new_entry(struct fuse_conn *fc, struct fuse_args *args, struct inode *dir, struct dentry *entry, umode_t mode) { @@ -557,22 +533,18 @@ static int create_new_entry(struct fuse_conn *fc, struct fuse_req *req, struct fuse_forget_link *forget; forget = fuse_alloc_forget(); - if (!forget) { - fuse_put_request(fc, req); + if (!forget) return -ENOMEM; - } memset(&outarg, 0, sizeof(outarg)); - req->in.h.nodeid = get_node_id(dir); - req->out.numargs = 1; + args->in.h.nodeid = get_node_id(dir); + args->out.numargs = 1; if (fc->minor < 9) - req->out.args[0].size = FUSE_COMPAT_ENTRY_OUT_SIZE; + args->out.args[0].size = FUSE_COMPAT_ENTRY_OUT_SIZE; else - req->out.args[0].size = sizeof(outarg); - req->out.args[0].value = &outarg; - fuse_request_send(fc, req); - err = req->out.h.error; - fuse_put_request(fc, req); + args->out.args[0].size = sizeof(outarg); + args->out.args[0].value = &outarg; + err = fuse_simple_request(fc, args); if (err) goto out_put_forget_req; @@ -609,9 +581,7 @@ static int fuse_mknod(struct inode *dir, struct dentry *entry, umode_t mode, { struct fuse_mknod_in inarg; struct fuse_conn *fc = get_fuse_conn(dir); - struct fuse_req *req = fuse_get_req_nopages(fc); - if (IS_ERR(req)) - return PTR_ERR(req); + FUSE_ARGS(args); if (!fc->dont_mask) mode &= ~current_umask(); @@ -620,14 +590,14 @@ static int fuse_mknod(struct inode *dir, struct dentry *entry, umode_t mode, inarg.mode = mode; inarg.rdev = new_encode_dev(rdev); inarg.umask = current_umask(); - req->in.h.opcode = FUSE_MKNOD; - req->in.numargs = 2; - req->in.args[0].size = fc->minor < 12 ? FUSE_COMPAT_MKNOD_IN_SIZE : + args.in.h.opcode = FUSE_MKNOD; + args.in.numargs = 2; + args.in.args[0].size = fc->minor < 12 ? FUSE_COMPAT_MKNOD_IN_SIZE : sizeof(inarg); - req->in.args[0].value = &inarg; - req->in.args[1].size = entry->d_name.len + 1; - req->in.args[1].value = entry->d_name.name; - return create_new_entry(fc, req, dir, entry, mode); + args.in.args[0].value = &inarg; + args.in.args[1].size = entry->d_name.len + 1; + args.in.args[1].value = entry->d_name.name; + return create_new_entry(fc, &args, dir, entry, mode); } static int fuse_create(struct inode *dir, struct dentry *entry, umode_t mode, @@ -640,9 +610,7 @@ static int fuse_mkdir(struct inode *dir, struct dentry *entry, umode_t mode) { struct fuse_mkdir_in inarg; struct fuse_conn *fc = get_fuse_conn(dir); - struct fuse_req *req = fuse_get_req_nopages(fc); - if (IS_ERR(req)) - return PTR_ERR(req); + FUSE_ARGS(args); if (!fc->dont_mask) mode &= ~current_umask(); @@ -650,13 +618,13 @@ static int fuse_mkdir(struct inode *dir, struct dentry *entry, umode_t mode) memset(&inarg, 0, sizeof(inarg)); inarg.mode = mode; inarg.umask = current_umask(); - req->in.h.opcode = FUSE_MKDIR; - req->in.numargs = 2; - req->in.args[0].size = sizeof(inarg); - req->in.args[0].value = &inarg; - req->in.args[1].size = entry->d_name.len + 1; - req->in.args[1].value = entry->d_name.name; - return create_new_entry(fc, req, dir, entry, S_IFDIR); + args.in.h.opcode = FUSE_MKDIR; + args.in.numargs = 2; + args.in.args[0].size = sizeof(inarg); + args.in.args[0].value = &inarg; + args.in.args[1].size = entry->d_name.len + 1; + args.in.args[1].value = entry->d_name.name; + return create_new_entry(fc, &args, dir, entry, S_IFDIR); } static int fuse_symlink(struct inode *dir, struct dentry *entry, @@ -664,17 +632,15 @@ static int fuse_symlink(struct inode *dir, struct dentry *entry, { struct fuse_conn *fc = get_fuse_conn(dir); unsigned len = strlen(link) + 1; - struct fuse_req *req = fuse_get_req_nopages(fc); - if (IS_ERR(req)) - return PTR_ERR(req); + FUSE_ARGS(args); - req->in.h.opcode = FUSE_SYMLINK; - req->in.numargs = 2; - req->in.args[0].size = entry->d_name.len + 1; - req->in.args[0].value = entry->d_name.name; - req->in.args[1].size = len; - req->in.args[1].value = link; - return create_new_entry(fc, req, dir, entry, S_IFLNK); + args.in.h.opcode = FUSE_SYMLINK; + args.in.numargs = 2; + args.in.args[0].size = entry->d_name.len + 1; + args.in.args[0].value = entry->d_name.name; + args.in.args[1].size = len; + args.in.args[1].value = link; + return create_new_entry(fc, &args, dir, entry, S_IFLNK); } static inline void fuse_update_ctime(struct inode *inode) @@ -689,18 +655,14 @@ static int fuse_unlink(struct inode *dir, struct dentry *entry) { int err; struct fuse_conn *fc = get_fuse_conn(dir); - struct fuse_req *req = fuse_get_req_nopages(fc); - if (IS_ERR(req)) - return PTR_ERR(req); - - req->in.h.opcode = FUSE_UNLINK; - req->in.h.nodeid = get_node_id(dir); - req->in.numargs = 1; - req->in.args[0].size = entry->d_name.len + 1; - req->in.args[0].value = entry->d_name.name; - fuse_request_send(fc, req); - err = req->out.h.error; - fuse_put_request(fc, req); + FUSE_ARGS(args); + + args.in.h.opcode = FUSE_UNLINK; + args.in.h.nodeid = get_node_id(dir); + args.in.numargs = 1; + args.in.args[0].size = entry->d_name.len + 1; + args.in.args[0].value = entry->d_name.name; + err = fuse_simple_request(fc, &args); if (!err) { struct inode *inode = entry->d_inode; struct fuse_inode *fi = get_fuse_inode(inode); @@ -729,18 +691,14 @@ static int fuse_rmdir(struct inode *dir, struct dentry *entry) { int err; struct fuse_conn *fc = get_fuse_conn(dir); - struct fuse_req *req = fuse_get_req_nopages(fc); - if (IS_ERR(req)) - return PTR_ERR(req); - - req->in.h.opcode = FUSE_RMDIR; - req->in.h.nodeid = get_node_id(dir); - req->in.numargs = 1; - req->in.args[0].size = entry->d_name.len + 1; - req->in.args[0].value = entry->d_name.name; - fuse_request_send(fc, req); - err = req->out.h.error; - fuse_put_request(fc, req); + FUSE_ARGS(args); + + args.in.h.opcode = FUSE_RMDIR; + args.in.h.nodeid = get_node_id(dir); + args.in.numargs = 1; + args.in.args[0].size = entry->d_name.len + 1; + args.in.args[0].value = entry->d_name.name; + err = fuse_simple_request(fc, &args); if (!err) { clear_nlink(entry->d_inode); fuse_invalidate_attr(dir); @@ -757,27 +715,21 @@ static int fuse_rename_common(struct inode *olddir, struct dentry *oldent, int err; struct fuse_rename2_in inarg; struct fuse_conn *fc = get_fuse_conn(olddir); - struct fuse_req *req; - - req = fuse_get_req_nopages(fc); - if (IS_ERR(req)) - return PTR_ERR(req); + FUSE_ARGS(args); memset(&inarg, 0, argsize); inarg.newdir = get_node_id(newdir); inarg.flags = flags; - req->in.h.opcode = opcode; - req->in.h.nodeid = get_node_id(olddir); - req->in.numargs = 3; - req->in.args[0].size = argsize; - req->in.args[0].value = &inarg; - req->in.args[1].size = oldent->d_name.len + 1; - req->in.args[1].value = oldent->d_name.name; - req->in.args[2].size = newent->d_name.len + 1; - req->in.args[2].value = newent->d_name.name; - fuse_request_send(fc, req); - err = req->out.h.error; - fuse_put_request(fc, req); + args.in.h.opcode = opcode; + args.in.h.nodeid = get_node_id(olddir); + args.in.numargs = 3; + args.in.args[0].size = argsize; + args.in.args[0].value = &inarg; + args.in.args[1].size = oldent->d_name.len + 1; + args.in.args[1].value = oldent->d_name.name; + args.in.args[2].size = newent->d_name.len + 1; + args.in.args[2].value = newent->d_name.name; + err = fuse_simple_request(fc, &args); if (!err) { /* ctime changes */ fuse_invalidate_attr(oldent->d_inode); @@ -849,19 +801,17 @@ static int fuse_link(struct dentry *entry, struct inode *newdir, struct fuse_link_in inarg; struct inode *inode = entry->d_inode; struct fuse_conn *fc = get_fuse_conn(inode); - struct fuse_req *req = fuse_get_req_nopages(fc); - if (IS_ERR(req)) - return PTR_ERR(req); + FUSE_ARGS(args); memset(&inarg, 0, sizeof(inarg)); inarg.oldnodeid = get_node_id(inode); - req->in.h.opcode = FUSE_LINK; - req->in.numargs = 2; - req->in.args[0].size = sizeof(inarg); - req->in.args[0].value = &inarg; - req->in.args[1].size = newent->d_name.len + 1; - req->in.args[1].value = newent->d_name.name; - err = create_new_entry(fc, req, newdir, newent, inode->i_mode); + args.in.h.opcode = FUSE_LINK; + args.in.numargs = 2; + args.in.args[0].size = sizeof(inarg); + args.in.args[0].value = &inarg; + args.in.args[1].size = newent->d_name.len + 1; + args.in.args[1].value = newent->d_name.name; + err = create_new_entry(fc, &args, newdir, newent, inode->i_mode); /* Contrary to "normal" filesystems it can happen that link makes two "logical" inodes point to the same "physical" inode. We invalidate the attributes of the old one, so it @@ -929,13 +879,9 @@ static int fuse_do_getattr(struct inode *inode, struct kstat *stat, struct fuse_getattr_in inarg; struct fuse_attr_out outarg; struct fuse_conn *fc = get_fuse_conn(inode); - struct fuse_req *req; + FUSE_ARGS(args); u64 attr_version; - req = fuse_get_req_nopages(fc); - if (IS_ERR(req)) - return PTR_ERR(req); - attr_version = fuse_get_attr_version(fc); memset(&inarg, 0, sizeof(inarg)); @@ -947,20 +893,18 @@ static int fuse_do_getattr(struct inode *inode, struct kstat *stat, inarg.getattr_flags |= FUSE_GETATTR_FH; inarg.fh = ff->fh; } - req->in.h.opcode = FUSE_GETATTR; - req->in.h.nodeid = get_node_id(inode); - req->in.numargs = 1; - req->in.args[0].size = sizeof(inarg); - req->in.args[0].value = &inarg; - req->out.numargs = 1; + args.in.h.opcode = FUSE_GETATTR; + args.in.h.nodeid = get_node_id(inode); + args.in.numargs = 1; + args.in.args[0].size = sizeof(inarg); + args.in.args[0].value = &inarg; + args.out.numargs = 1; if (fc->minor < 9) - req->out.args[0].size = FUSE_COMPAT_ATTR_OUT_SIZE; + args.out.args[0].size = FUSE_COMPAT_ATTR_OUT_SIZE; else - req->out.args[0].size = sizeof(outarg); - req->out.args[0].value = &outarg; - fuse_request_send(fc, req); - err = req->out.h.error; - fuse_put_request(fc, req); + args.out.args[0].size = sizeof(outarg); + args.out.args[0].value = &outarg; + err = fuse_simple_request(fc, &args); if (!err) { if ((inode->i_mode ^ outarg.attr.mode) & S_IFMT) { make_bad_inode(inode); @@ -1102,7 +1046,7 @@ int fuse_allow_current_process(struct fuse_conn *fc) static int fuse_access(struct inode *inode, int mask) { struct fuse_conn *fc = get_fuse_conn(inode); - struct fuse_req *req; + FUSE_ARGS(args); struct fuse_access_in inarg; int err; @@ -1111,20 +1055,14 @@ static int fuse_access(struct inode *inode, int mask) if (fc->no_access) return 0; - req = fuse_get_req_nopages(fc); - if (IS_ERR(req)) - return PTR_ERR(req); - memset(&inarg, 0, sizeof(inarg)); inarg.mask = mask & (MAY_READ | MAY_WRITE | MAY_EXEC); - req->in.h.opcode = FUSE_ACCESS; - req->in.h.nodeid = get_node_id(inode); - req->in.numargs = 1; - req->in.args[0].size = sizeof(inarg); - req->in.args[0].value = &inarg; - fuse_request_send(fc, req); - err = req->out.h.error; - fuse_put_request(fc, req); + args.in.h.opcode = FUSE_ACCESS; + args.in.h.nodeid = get_node_id(inode); + args.in.numargs = 1; + args.in.args[0].size = sizeof(inarg); + args.in.args[0].value = &inarg; + err = fuse_simple_request(fc, &args); if (err == -ENOSYS) { fc->no_access = 1; err = 0; @@ -1445,31 +1383,27 @@ static char *read_link(struct dentry *dentry) { struct inode *inode = dentry->d_inode; struct fuse_conn *fc = get_fuse_conn(inode); - struct fuse_req *req = fuse_get_req_nopages(fc); + FUSE_ARGS(args); char *link; - - if (IS_ERR(req)) - return ERR_CAST(req); + ssize_t ret; link = (char *) __get_free_page(GFP_KERNEL); - if (!link) { - link = ERR_PTR(-ENOMEM); - goto out; - } - req->in.h.opcode = FUSE_READLINK; - req->in.h.nodeid = get_node_id(inode); - req->out.argvar = 1; - req->out.numargs = 1; - req->out.args[0].size = PAGE_SIZE - 1; - req->out.args[0].value = link; - fuse_request_send(fc, req); - if (req->out.h.error) { + if (!link) + return ERR_PTR(-ENOMEM); + + args.in.h.opcode = FUSE_READLINK; + args.in.h.nodeid = get_node_id(inode); + args.out.argvar = 1; + args.out.numargs = 1; + args.out.args[0].size = PAGE_SIZE - 1; + args.out.args[0].value = link; + ret = fuse_simple_request(fc, &args); + if (ret < 0) { free_page((unsigned long) link); - link = ERR_PTR(req->out.h.error); - } else - link[req->out.args[0].size] = '\0'; - out: - fuse_put_request(fc, req); + link = ERR_PTR(ret); + } else { + link[ret] = '\0'; + } fuse_invalidate_atime(inode); return link; } @@ -1629,22 +1563,22 @@ void fuse_release_nowrite(struct inode *inode) spin_unlock(&fc->lock); } -static void fuse_setattr_fill(struct fuse_conn *fc, struct fuse_req *req, +static void fuse_setattr_fill(struct fuse_conn *fc, struct fuse_args *args, struct inode *inode, struct fuse_setattr_in *inarg_p, struct fuse_attr_out *outarg_p) { - req->in.h.opcode = FUSE_SETATTR; - req->in.h.nodeid = get_node_id(inode); - req->in.numargs = 1; - req->in.args[0].size = sizeof(*inarg_p); - req->in.args[0].value = inarg_p; - req->out.numargs = 1; + args->in.h.opcode = FUSE_SETATTR; + args->in.h.nodeid = get_node_id(inode); + args->in.numargs = 1; + args->in.args[0].size = sizeof(*inarg_p); + args->in.args[0].value = inarg_p; + args->out.numargs = 1; if (fc->minor < 9) - req->out.args[0].size = FUSE_COMPAT_ATTR_OUT_SIZE; + args->out.args[0].size = FUSE_COMPAT_ATTR_OUT_SIZE; else - req->out.args[0].size = sizeof(*outarg_p); - req->out.args[0].value = outarg_p; + args->out.args[0].size = sizeof(*outarg_p); + args->out.args[0].value = outarg_p; } /* @@ -1653,14 +1587,9 @@ static void fuse_setattr_fill(struct fuse_conn *fc, struct fuse_req *req, int fuse_flush_times(struct inode *inode, struct fuse_file *ff) { struct fuse_conn *fc = get_fuse_conn(inode); - struct fuse_req *req; + FUSE_ARGS(args); struct fuse_setattr_in inarg; struct fuse_attr_out outarg; - int err; - - req = fuse_get_req_nopages(fc); - if (IS_ERR(req)) - return PTR_ERR(req); memset(&inarg, 0, sizeof(inarg)); memset(&outarg, 0, sizeof(outarg)); @@ -1677,12 +1606,9 @@ int fuse_flush_times(struct inode *inode, struct fuse_file *ff) inarg.valid |= FATTR_FH; inarg.fh = ff->fh; } - fuse_setattr_fill(fc, req, inode, &inarg, &outarg); - fuse_request_send(fc, req); - err = req->out.h.error; - fuse_put_request(fc, req); + fuse_setattr_fill(fc, &args, inode, &inarg, &outarg); - return err; + return fuse_simple_request(fc, &args); } /* @@ -1698,7 +1624,7 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr, { struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_inode *fi = get_fuse_inode(inode); - struct fuse_req *req; + FUSE_ARGS(args); struct fuse_setattr_in inarg; struct fuse_attr_out outarg; bool is_truncate = false; @@ -1723,10 +1649,6 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr, if (attr->ia_valid & ATTR_SIZE) is_truncate = true; - req = fuse_get_req_nopages(fc); - if (IS_ERR(req)) - return PTR_ERR(req); - if (is_truncate) { fuse_set_nowrite(inode); set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state); @@ -1747,10 +1669,8 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr, inarg.valid |= FATTR_LOCKOWNER; inarg.lock_owner = fuse_lock_owner_id(fc, current->files); } - fuse_setattr_fill(fc, req, inode, &inarg, &outarg); - fuse_request_send(fc, req); - err = req->out.h.error; - fuse_put_request(fc, req); + fuse_setattr_fill(fc, &args, inode, &inarg, &outarg); + err = fuse_simple_request(fc, &args); if (err) { if (err == -EINTR) fuse_invalidate_attr(inode); @@ -1837,32 +1757,26 @@ static int fuse_setxattr(struct dentry *entry, const char *name, { struct inode *inode = entry->d_inode; struct fuse_conn *fc = get_fuse_conn(inode); - struct fuse_req *req; + FUSE_ARGS(args); struct fuse_setxattr_in inarg; int err; if (fc->no_setxattr) return -EOPNOTSUPP; - req = fuse_get_req_nopages(fc); - if (IS_ERR(req)) - return PTR_ERR(req); - memset(&inarg, 0, sizeof(inarg)); inarg.size = size; inarg.flags = flags; - req->in.h.opcode = FUSE_SETXATTR; - req->in.h.nodeid = get_node_id(inode); - req->in.numargs = 3; - req->in.args[0].size = sizeof(inarg); - req->in.args[0].value = &inarg; - req->in.args[1].size = strlen(name) + 1; - req->in.args[1].value = name; - req->in.args[2].size = size; - req->in.args[2].value = value; - fuse_request_send(fc, req); - err = req->out.h.error; - fuse_put_request(fc, req); + args.in.h.opcode = FUSE_SETXATTR; + args.in.h.nodeid = get_node_id(inode); + args.in.numargs = 3; + args.in.args[0].size = sizeof(inarg); + args.in.args[0].value = &inarg; + args.in.args[1].size = strlen(name) + 1; + args.in.args[1].value = name; + args.in.args[2].size = size; + args.in.args[2].value = value; + err = fuse_simple_request(fc, &args); if (err == -ENOSYS) { fc->no_setxattr = 1; err = -EOPNOTSUPP; @@ -1879,7 +1793,7 @@ static ssize_t fuse_getxattr(struct dentry *entry, const char *name, { struct inode *inode = entry->d_inode; struct fuse_conn *fc = get_fuse_conn(inode); - struct fuse_req *req; + FUSE_ARGS(args); struct fuse_getxattr_in inarg; struct fuse_getxattr_out outarg; ssize_t ret; @@ -1887,40 +1801,32 @@ static ssize_t fuse_getxattr(struct dentry *entry, const char *name, if (fc->no_getxattr) return -EOPNOTSUPP; - req = fuse_get_req_nopages(fc); - if (IS_ERR(req)) - return PTR_ERR(req); - memset(&inarg, 0, sizeof(inarg)); inarg.size = size; - req->in.h.opcode = FUSE_GETXATTR; - req->in.h.nodeid = get_node_id(inode); - req->in.numargs = 2; - req->in.args[0].size = sizeof(inarg); - req->in.args[0].value = &inarg; - req->in.args[1].size = strlen(name) + 1; - req->in.args[1].value = name; + args.in.h.opcode = FUSE_GETXATTR; + args.in.h.nodeid = get_node_id(inode); + args.in.numargs = 2; + args.in.args[0].size = sizeof(inarg); + args.in.args[0].value = &inarg; + args.in.args[1].size = strlen(name) + 1; + args.in.args[1].value = name; /* This is really two different operations rolled into one */ - req->out.numargs = 1; + args.out.numargs = 1; if (size) { - req->out.argvar = 1; - req->out.args[0].size = size; - req->out.args[0].value = value; + args.out.argvar = 1; + args.out.args[0].size = size; + args.out.args[0].value = value; } else { - req->out.args[0].size = sizeof(outarg); - req->out.args[0].value = &outarg; + args.out.args[0].size = sizeof(outarg); + args.out.args[0].value = &outarg; } - fuse_request_send(fc, req); - ret = req->out.h.error; - if (!ret) - ret = size ? req->out.args[0].size : outarg.size; - else { - if (ret == -ENOSYS) { - fc->no_getxattr = 1; - ret = -EOPNOTSUPP; - } + ret = fuse_simple_request(fc, &args); + if (!ret && !size) + ret = outarg.size; + if (ret == -ENOSYS) { + fc->no_getxattr = 1; + ret = -EOPNOTSUPP; } - fuse_put_request(fc, req); return ret; } @@ -1928,7 +1834,7 @@ static ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size) { struct inode *inode = entry->d_inode; struct fuse_conn *fc = get_fuse_conn(inode); - struct fuse_req *req; + FUSE_ARGS(args); struct fuse_getxattr_in inarg; struct fuse_getxattr_out outarg; ssize_t ret; @@ -1939,38 +1845,30 @@ static ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size) if (fc->no_listxattr) return -EOPNOTSUPP; - req = fuse_get_req_nopages(fc); - if (IS_ERR(req)) - return PTR_ERR(req); - memset(&inarg, 0, sizeof(inarg)); inarg.size = size; - req->in.h.opcode = FUSE_LISTXATTR; - req->in.h.nodeid = get_node_id(inode); - req->in.numargs = 1; - req->in.args[0].size = sizeof(inarg); - req->in.args[0].value = &inarg; + args.in.h.opcode = FUSE_LISTXATTR; + args.in.h.nodeid = get_node_id(inode); + args.in.numargs = 1; + args.in.args[0].size = sizeof(inarg); + args.in.args[0].value = &inarg; /* This is really two different operations rolled into one */ - req->out.numargs = 1; + args.out.numargs = 1; if (size) { - req->out.argvar = 1; - req->out.args[0].size = size; - req->out.args[0].value = list; + args.out.argvar = 1; + args.out.args[0].size = size; + args.out.args[0].value = list; } else { - req->out.args[0].size = sizeof(outarg); - req->out.args[0].value = &outarg; + args.out.args[0].size = sizeof(outarg); + args.out.args[0].value = &outarg; } - fuse_request_send(fc, req); - ret = req->out.h.error; - if (!ret) - ret = size ? req->out.args[0].size : outarg.size; - else { - if (ret == -ENOSYS) { - fc->no_listxattr = 1; - ret = -EOPNOTSUPP; - } + ret = fuse_simple_request(fc, &args); + if (!ret && !size) + ret = outarg.size; + if (ret == -ENOSYS) { + fc->no_listxattr = 1; + ret = -EOPNOTSUPP; } - fuse_put_request(fc, req); return ret; } @@ -1978,24 +1876,18 @@ static int fuse_removexattr(struct dentry *entry, const char *name) { struct inode *inode = entry->d_inode; struct fuse_conn *fc = get_fuse_conn(inode); - struct fuse_req *req; + FUSE_ARGS(args); int err; if (fc->no_removexattr) return -EOPNOTSUPP; - req = fuse_get_req_nopages(fc); - if (IS_ERR(req)) - return PTR_ERR(req); - - req->in.h.opcode = FUSE_REMOVEXATTR; - req->in.h.nodeid = get_node_id(inode); - req->in.numargs = 1; - req->in.args[0].size = strlen(name) + 1; - req->in.args[0].value = name; - fuse_request_send(fc, req); - err = req->out.h.error; - fuse_put_request(fc, req); + args.in.h.opcode = FUSE_REMOVEXATTR; + args.in.h.nodeid = get_node_id(inode); + args.in.numargs = 1; + args.in.args[0].size = strlen(name) + 1; + args.in.args[0].value = name; + err = fuse_simple_request(fc, &args); if (err == -ENOSYS) { fc->no_removexattr = 1; err = -EOPNOTSUPP; diff --git a/fs/fuse/file.c b/fs/fuse/file.c index bf50259012a..760b2c55219 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -24,30 +24,22 @@ static int fuse_send_open(struct fuse_conn *fc, u64 nodeid, struct file *file, int opcode, struct fuse_open_out *outargp) { struct fuse_open_in inarg; - struct fuse_req *req; - int err; - - req = fuse_get_req_nopages(fc); - if (IS_ERR(req)) - return PTR_ERR(req); + FUSE_ARGS(args); memset(&inarg, 0, sizeof(inarg)); inarg.flags = file->f_flags & ~(O_CREAT | O_EXCL | O_NOCTTY); if (!fc->atomic_o_trunc) inarg.flags &= ~O_TRUNC; - req->in.h.opcode = opcode; - req->in.h.nodeid = nodeid; - req->in.numargs = 1; - req->in.args[0].size = sizeof(inarg); - req->in.args[0].value = &inarg; - req->out.numargs = 1; - req->out.args[0].size = sizeof(*outargp); - req->out.args[0].value = outargp; - fuse_request_send(fc, req); - err = req->out.h.error; - fuse_put_request(fc, req); + args.in.h.opcode = opcode; + args.in.h.nodeid = nodeid; + args.in.numargs = 1; + args.in.args[0].size = sizeof(inarg); + args.in.args[0].value = &inarg; + args.out.numargs = 1; + args.out.args[0].size = sizeof(*outargp); + args.out.args[0].value = outargp; - return err; + return fuse_simple_request(fc, &args); } struct fuse_file *fuse_file_alloc(struct fuse_conn *fc) @@ -89,37 +81,9 @@ struct fuse_file *fuse_file_get(struct fuse_file *ff) return ff; } -static void fuse_release_async(struct work_struct *work) -{ - struct fuse_req *req; - struct fuse_conn *fc; - struct path path; - - req = container_of(work, struct fuse_req, misc.release.work); - path = req->misc.release.path; - fc = get_fuse_conn(path.dentry->d_inode); - - fuse_put_request(fc, req); - path_put(&path); -} - static void fuse_release_end(struct fuse_conn *fc, struct fuse_req *req) { - if (fc->destroy_req) { - /* - * If this is a fuseblk mount, then it's possible that - * releasing the path will result in releasing the - * super block and sending the DESTROY request. If - * the server is single threaded, this would hang. - * For this reason do the path_put() in a separate - * thread. - */ - atomic_inc(&req->count); - INIT_WORK(&req->misc.release.work, fuse_release_async); - schedule_work(&req->misc.release.work); - } else { - path_put(&req->misc.release.path); - } + iput(req->misc.release.inode); } static void fuse_file_put(struct fuse_file *ff, bool sync) @@ -133,12 +97,12 @@ static void fuse_file_put(struct fuse_file *ff, bool sync) * implement 'open' */ req->background = 0; - path_put(&req->misc.release.path); + iput(req->misc.release.inode); fuse_put_request(ff->fc, req); } else if (sync) { req->background = 0; fuse_request_send(ff->fc, req); - path_put(&req->misc.release.path); + iput(req->misc.release.inode); fuse_put_request(ff->fc, req); } else { req->end = fuse_release_end; @@ -297,9 +261,8 @@ void fuse_release_common(struct file *file, int opcode) inarg->lock_owner = fuse_lock_owner_id(ff->fc, (fl_owner_t) file); } - /* Hold vfsmount and dentry until release is finished */ - path_get(&file->f_path); - req->misc.release.path = file->f_path; + /* Hold inode until release is finished */ + req->misc.release.inode = igrab(file_inode(file)); /* * Normally this will send the RELEASE request, however if @@ -480,7 +443,7 @@ int fuse_fsync_common(struct file *file, loff_t start, loff_t end, struct inode *inode = file->f_mapping->host; struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_file *ff = file->private_data; - struct fuse_req *req; + FUSE_ARGS(args); struct fuse_fsync_in inarg; int err; @@ -506,23 +469,15 @@ int fuse_fsync_common(struct file *file, loff_t start, loff_t end, if ((!isdir && fc->no_fsync) || (isdir && fc->no_fsyncdir)) goto out; - req = fuse_get_req_nopages(fc); - if (IS_ERR(req)) { - err = PTR_ERR(req); - goto out; - } - memset(&inarg, 0, sizeof(inarg)); inarg.fh = ff->fh; inarg.fsync_flags = datasync ? 1 : 0; - req->in.h.opcode = isdir ? FUSE_FSYNCDIR : FUSE_FSYNC; - req->in.h.nodeid = get_node_id(inode); - req->in.numargs = 1; - req->in.args[0].size = sizeof(inarg); - req->in.args[0].value = &inarg; - fuse_request_send(fc, req); - err = req->out.h.error; - fuse_put_request(fc, req); + args.in.h.opcode = isdir ? FUSE_FSYNCDIR : FUSE_FSYNC; + args.in.h.nodeid = get_node_id(inode); + args.in.numargs = 1; + args.in.args[0].size = sizeof(inarg); + args.in.args[0].value = &inarg; + err = fuse_simple_request(fc, &args); if (err == -ENOSYS) { if (isdir) fc->no_fsyncdir = 1; @@ -2156,49 +2111,44 @@ static int convert_fuse_file_lock(const struct fuse_file_lock *ffl, return 0; } -static void fuse_lk_fill(struct fuse_req *req, struct file *file, +static void fuse_lk_fill(struct fuse_args *args, struct file *file, const struct file_lock *fl, int opcode, pid_t pid, - int flock) + int flock, struct fuse_lk_in *inarg) { struct inode *inode = file_inode(file); struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_file *ff = file->private_data; - struct fuse_lk_in *arg = &req->misc.lk_in; - - arg->fh = ff->fh; - arg->owner = fuse_lock_owner_id(fc, fl->fl_owner); - arg->lk.start = fl->fl_start; - arg->lk.end = fl->fl_end; - arg->lk.type = fl->fl_type; - arg->lk.pid = pid; + + memset(inarg, 0, sizeof(*inarg)); + inarg->fh = ff->fh; + inarg->owner = fuse_lock_owner_id(fc, fl->fl_owner); + inarg->lk.start = fl->fl_start; + inarg->lk.end = fl->fl_end; + inarg->lk.type = fl->fl_type; + inarg->lk.pid = pid; if (flock) - arg->lk_flags |= FUSE_LK_FLOCK; - req->in.h.opcode = opcode; - req->in.h.nodeid = get_node_id(inode); - req->in.numargs = 1; - req->in.args[0].size = sizeof(*arg); - req->in.args[0].value = arg; + inarg->lk_flags |= FUSE_LK_FLOCK; + args->in.h.opcode = opcode; + args->in.h.nodeid = get_node_id(inode); + args->in.numargs = 1; + args->in.args[0].size = sizeof(*inarg); + args->in.args[0].value = inarg; } static int fuse_getlk(struct file *file, struct file_lock *fl) { struct inode *inode = file_inode(file); struct fuse_conn *fc = get_fuse_conn(inode); - struct fuse_req *req; + FUSE_ARGS(args); + struct fuse_lk_in inarg; struct fuse_lk_out outarg; int err; - req = fuse_get_req_nopages(fc); - if (IS_ERR(req)) - return PTR_ERR(req); - - fuse_lk_fill(req, file, fl, FUSE_GETLK, 0, 0); - req->out.numargs = 1; - req->out.args[0].size = sizeof(outarg); - req->out.args[0].value = &outarg; - fuse_request_send(fc, req); - err = req->out.h.error; - fuse_put_request(fc, req); + fuse_lk_fill(&args, file, fl, FUSE_GETLK, 0, 0, &inarg); + args.out.numargs = 1; + args.out.args[0].size = sizeof(outarg); + args.out.args[0].value = &outarg; + err = fuse_simple_request(fc, &args); if (!err) err = convert_fuse_file_lock(&outarg.lk, fl); @@ -2209,7 +2159,8 @@ static int fuse_setlk(struct file *file, struct file_lock *fl, int flock) { struct inode *inode = file_inode(file); struct fuse_conn *fc = get_fuse_conn(inode); - struct fuse_req *req; + FUSE_ARGS(args); + struct fuse_lk_in inarg; int opcode = (fl->fl_flags & FL_SLEEP) ? FUSE_SETLKW : FUSE_SETLK; pid_t pid = fl->fl_type != F_UNLCK ? current->tgid : 0; int err; @@ -2223,17 +2174,13 @@ static int fuse_setlk(struct file *file, struct file_lock *fl, int flock) if (fl->fl_flags & FL_CLOSE) return 0; - req = fuse_get_req_nopages(fc); - if (IS_ERR(req)) - return PTR_ERR(req); + fuse_lk_fill(&args, file, fl, opcode, pid, flock, &inarg); + err = fuse_simple_request(fc, &args); - fuse_lk_fill(req, file, fl, opcode, pid, flock); - fuse_request_send(fc, req); - err = req->out.h.error; /* locking is restartable */ if (err == -EINTR) err = -ERESTARTSYS; - fuse_put_request(fc, req); + return err; } @@ -2283,7 +2230,7 @@ static sector_t fuse_bmap(struct address_space *mapping, sector_t block) { struct inode *inode = mapping->host; struct fuse_conn *fc = get_fuse_conn(inode); - struct fuse_req *req; + FUSE_ARGS(args); struct fuse_bmap_in inarg; struct fuse_bmap_out outarg; int err; @@ -2291,24 +2238,18 @@ static sector_t fuse_bmap(struct address_space *mapping, sector_t block) if (!inode->i_sb->s_bdev || fc->no_bmap) return 0; - req = fuse_get_req_nopages(fc); - if (IS_ERR(req)) - return 0; - memset(&inarg, 0, sizeof(inarg)); inarg.block = block; inarg.blocksize = inode->i_sb->s_blocksize; - req->in.h.opcode = FUSE_BMAP; - req->in.h.nodeid = get_node_id(inode); - req->in.numargs = 1; - req->in.args[0].size = sizeof(inarg); - req->in.args[0].value = &inarg; - req->out.numargs = 1; - req->out.args[0].size = sizeof(outarg); - req->out.args[0].value = &outarg; - fuse_request_send(fc, req); - err = req->out.h.error; - fuse_put_request(fc, req); + args.in.h.opcode = FUSE_BMAP; + args.in.h.nodeid = get_node_id(inode); + args.in.numargs = 1; + args.in.args[0].size = sizeof(inarg); + args.in.args[0].value = &inarg; + args.out.numargs = 1; + args.out.args[0].size = sizeof(outarg); + args.out.args[0].value = &outarg; + err = fuse_simple_request(fc, &args); if (err == -ENOSYS) fc->no_bmap = 1; @@ -2776,7 +2717,7 @@ unsigned fuse_file_poll(struct file *file, poll_table *wait) struct fuse_conn *fc = ff->fc; struct fuse_poll_in inarg = { .fh = ff->fh, .kh = ff->kh }; struct fuse_poll_out outarg; - struct fuse_req *req; + FUSE_ARGS(args); int err; if (fc->no_poll) @@ -2794,21 +2735,15 @@ unsigned fuse_file_poll(struct file *file, poll_table *wait) fuse_register_polled_file(fc, ff); } - req = fuse_get_req_nopages(fc); - if (IS_ERR(req)) - return POLLERR; - - req->in.h.opcode = FUSE_POLL; - req->in.h.nodeid = ff->nodeid; - req->in.numargs = 1; - req->in.args[0].size = sizeof(inarg); - req->in.args[0].value = &inarg; - req->out.numargs = 1; - req->out.args[0].size = sizeof(outarg); - req->out.args[0].value = &outarg; - fuse_request_send(fc, req); - err = req->out.h.error; - fuse_put_request(fc, req); + args.in.h.opcode = FUSE_POLL; + args.in.h.nodeid = ff->nodeid; + args.in.numargs = 1; + args.in.args[0].size = sizeof(inarg); + args.in.args[0].value = &inarg; + args.out.numargs = 1; + args.out.args[0].size = sizeof(outarg); + args.out.args[0].value = &outarg; + err = fuse_simple_request(fc, &args); if (!err) return outarg.revents; @@ -2949,10 +2884,10 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset, loff_t length) { struct fuse_file *ff = file->private_data; - struct inode *inode = file->f_inode; + struct inode *inode = file_inode(file); struct fuse_inode *fi = get_fuse_inode(inode); struct fuse_conn *fc = ff->fc; - struct fuse_req *req; + FUSE_ARGS(args); struct fuse_fallocate_in inarg = { .fh = ff->fh, .offset = offset, @@ -2985,25 +2920,16 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset, if (!(mode & FALLOC_FL_KEEP_SIZE)) set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state); - req = fuse_get_req_nopages(fc); - if (IS_ERR(req)) { - err = PTR_ERR(req); - goto out; - } - - req->in.h.opcode = FUSE_FALLOCATE; - req->in.h.nodeid = ff->nodeid; - req->in.numargs = 1; - req->in.args[0].size = sizeof(inarg); - req->in.args[0].value = &inarg; - fuse_request_send(fc, req); - err = req->out.h.error; + args.in.h.opcode = FUSE_FALLOCATE; + args.in.h.nodeid = ff->nodeid; + args.in.numargs = 1; + args.in.args[0].size = sizeof(inarg); + args.in.args[0].value = &inarg; + err = fuse_simple_request(fc, &args); if (err == -ENOSYS) { fc->no_fallocate = 1; err = -EOPNOTSUPP; } - fuse_put_request(fc, req); - if (err) goto out; diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index e8e47a6ab51..e0fc6725d1d 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -213,7 +213,7 @@ struct fuse_out { unsigned numargs; /** Array of arguments */ - struct fuse_arg args[3]; + struct fuse_arg args[2]; }; /** FUSE page descriptor */ @@ -222,6 +222,25 @@ struct fuse_page_desc { unsigned int offset; }; +struct fuse_args { + struct { + struct { + uint32_t opcode; + uint64_t nodeid; + } h; + unsigned numargs; + struct fuse_in_arg args[3]; + + } in; + struct { + unsigned argvar:1; + unsigned numargs; + struct fuse_arg args[2]; + } out; +}; + +#define FUSE_ARGS(args) struct fuse_args args = {} + /** The request state */ enum fuse_req_state { FUSE_REQ_INIT = 0, @@ -305,11 +324,8 @@ struct fuse_req { /** Data for asynchronous requests */ union { struct { - union { - struct fuse_release_in in; - struct work_struct work; - }; - struct path path; + struct fuse_release_in in; + struct inode *inode; } release; struct fuse_init_in init_in; struct fuse_init_out init_out; @@ -324,7 +340,6 @@ struct fuse_req { struct fuse_req *next; } write; struct fuse_notify_retrieve_in retrieve_in; - struct fuse_lk_in lk_in; } misc; /** page vector */ @@ -754,15 +769,6 @@ struct fuse_req *fuse_get_req_for_background(struct fuse_conn *fc, void __fuse_get_request(struct fuse_req *req); /** - * Get a request, may fail with -ENOMEM, - * useful for callers who doesn't use req->pages[] - */ -static inline struct fuse_req *fuse_get_req_nopages(struct fuse_conn *fc) -{ - return fuse_get_req(fc, 0); -} - -/** * Gets a requests for a file operation, always succeeds */ struct fuse_req *fuse_get_req_nofail_nopages(struct fuse_conn *fc, @@ -780,6 +786,11 @@ void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req); void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req); /** + * Simple request sending that does request allocation and freeing + */ +ssize_t fuse_simple_request(struct fuse_conn *fc, struct fuse_args *args); + +/** * Send a request in the background */ void fuse_request_send_background(struct fuse_conn *fc, struct fuse_req *req); @@ -804,8 +815,6 @@ void fuse_invalidate_atime(struct inode *inode); */ struct fuse_conn *fuse_conn_get(struct fuse_conn *fc); -void fuse_conn_kill(struct fuse_conn *fc); - /** * Initialize fuse_conn */ diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 03246cd9d47..6749109f255 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -376,28 +376,13 @@ static void fuse_bdi_destroy(struct fuse_conn *fc) bdi_destroy(&fc->bdi); } -void fuse_conn_kill(struct fuse_conn *fc) -{ - spin_lock(&fc->lock); - fc->connected = 0; - fc->blocked = 0; - fc->initialized = 1; - spin_unlock(&fc->lock); - /* Flush all readers on this fs */ - kill_fasync(&fc->fasync, SIGIO, POLL_IN); - wake_up_all(&fc->waitq); - wake_up_all(&fc->blocked_waitq); - wake_up_all(&fc->reserved_req_waitq); -} -EXPORT_SYMBOL_GPL(fuse_conn_kill); - static void fuse_put_super(struct super_block *sb) { struct fuse_conn *fc = get_fuse_conn_super(sb); fuse_send_destroy(fc); - fuse_conn_kill(fc); + fuse_abort_conn(fc); mutex_lock(&fuse_mutex); list_del(&fc->entry); fuse_ctl_remove_conn(fc); @@ -425,7 +410,7 @@ static int fuse_statfs(struct dentry *dentry, struct kstatfs *buf) { struct super_block *sb = dentry->d_sb; struct fuse_conn *fc = get_fuse_conn_super(sb); - struct fuse_req *req; + FUSE_ARGS(args); struct fuse_statfs_out outarg; int err; @@ -434,23 +419,17 @@ static int fuse_statfs(struct dentry *dentry, struct kstatfs *buf) return 0; } - req = fuse_get_req_nopages(fc); - if (IS_ERR(req)) - return PTR_ERR(req); - memset(&outarg, 0, sizeof(outarg)); - req->in.numargs = 0; - req->in.h.opcode = FUSE_STATFS; - req->in.h.nodeid = get_node_id(dentry->d_inode); - req->out.numargs = 1; - req->out.args[0].size = + args.in.numargs = 0; + args.in.h.opcode = FUSE_STATFS; + args.in.h.nodeid = get_node_id(dentry->d_inode); + args.out.numargs = 1; + args.out.args[0].size = fc->minor < 4 ? FUSE_COMPAT_STATFS_SIZE : sizeof(outarg); - req->out.args[0].value = &outarg; - fuse_request_send(fc, req); - err = req->out.h.error; + args.out.args[0].value = &outarg; + err = fuse_simple_request(fc, &args); if (!err) convert_fuse_statfs(buf, &outarg.st); - fuse_put_request(fc, req); return err; } diff --git a/fs/jffs2/readinode.c b/fs/jffs2/readinode.c index 386303dca38..dddbde4f56f 100644 --- a/fs/jffs2/readinode.c +++ b/fs/jffs2/readinode.c @@ -224,7 +224,7 @@ static int jffs2_add_tn_to_tree(struct jffs2_sb_info *c, dbg_readinode("insert fragment %#04x-%#04x, ver %u at %08x\n", tn->fn->ofs, fn_end, tn->version, ref_offset(tn->fn->raw)); - /* If a node has zero dsize, we only have to keep if it if it might be the + /* If a node has zero dsize, we only have to keep it if it might be the node with highest version -- i.e. the one which will end up as f->metadata. Note that such nodes won't be REF_UNCHECKED since there are no data to check anyway. */ diff --git a/fs/jffs2/summary.c b/fs/jffs2/summary.c index c522d098bb4..bc5385471a6 100644 --- a/fs/jffs2/summary.c +++ b/fs/jffs2/summary.c @@ -844,6 +844,7 @@ static int jffs2_sum_write_data(struct jffs2_sb_info *c, struct jffs2_eraseblock /* Write out summary information - called from jffs2_do_reserve_space */ int jffs2_sum_write_sumnode(struct jffs2_sb_info *c) + __must_hold(&c->erase_completion_block) { int datasize, infosize, padsize; struct jffs2_eraseblock *jeb; diff --git a/fs/namespace.c b/fs/namespace.c index 30df6e7dd80..cd1e9681a0c 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -963,7 +963,8 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root, } /* Don't allow unprivileged users to reveal what is under a mount */ - if ((flag & CL_UNPRIVILEGED) && list_empty(&old->mnt_expire)) + if ((flag & CL_UNPRIVILEGED) && + (!(flag & CL_EXPIRE) || list_empty(&old->mnt_expire))) mnt->mnt.mnt_flags |= MNT_LOCKED; atomic_inc(&sb->s_active); @@ -1369,6 +1370,8 @@ void umount_tree(struct mount *mnt, int how) } if (last) { last->mnt_hash.next = unmounted.first; + if (unmounted.first) + unmounted.first->pprev = &last->mnt_hash.next; unmounted.first = tmp_list.first; unmounted.first->pprev = &unmounted.first; } @@ -1544,6 +1547,9 @@ SYSCALL_DEFINE2(umount, char __user *, name, int, flags) goto dput_and_out; if (mnt->mnt.mnt_flags & MNT_LOCKED) goto dput_and_out; + retval = -EPERM; + if (flags & MNT_FORCE && !capable(CAP_SYS_ADMIN)) + goto dput_and_out; retval = do_umount(mnt, flags); dput_and_out: @@ -1606,7 +1612,6 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry, if (IS_ERR(q)) return q; - q->mnt.mnt_flags &= ~MNT_LOCKED; q->mnt_mountpoint = mnt->mnt_mountpoint; p = mnt; @@ -2097,7 +2102,13 @@ static int do_remount(struct path *path, int flags, int mnt_flags, } if ((mnt->mnt.mnt_flags & MNT_LOCK_NODEV) && !(mnt_flags & MNT_NODEV)) { - return -EPERM; + /* Was the nodev implicitly added in mount? */ + if ((mnt->mnt_ns->user_ns != &init_user_ns) && + !(sb->s_type->fs_flags & FS_USERNS_DEV_MOUNT)) { + mnt_flags |= MNT_NODEV; + } else { + return -EPERM; + } } if ((mnt->mnt.mnt_flags & MNT_LOCK_NOSUID) && !(mnt_flags & MNT_NOSUID)) { @@ -2958,6 +2969,8 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root, /* mount new_root on / */ attach_mnt(new_mnt, real_mount(root_parent.mnt), root_mp); touch_mnt_namespace(current->nsproxy->mnt_ns); + /* A moved mount should not expire automatically */ + list_del_init(&new_mnt->mnt_expire); unlock_mount_hash(); chroot_fs_refs(&root, &new); put_mountpoint(root_mp); @@ -3002,6 +3015,7 @@ static void __init init_mount_tree(void) root.mnt = mnt; root.dentry = mnt->mnt_root; + mnt->mnt_flags |= MNT_LOCKED; set_fs_pwd(current->fs, &root); set_fs_root(current->fs, &root); diff --git a/fs/pnode.c b/fs/pnode.c index aae331a5d03..260ac8f898a 100644 --- a/fs/pnode.c +++ b/fs/pnode.c @@ -242,6 +242,7 @@ static int propagate_one(struct mount *m) child = copy_tree(last_source, last_source->mnt.mnt_root, type); if (IS_ERR(child)) return PTR_ERR(child); + child->mnt.mnt_flags &= ~MNT_LOCKED; mnt_set_mountpoint(m, mp, child); last_dest = m; last_source = child; diff --git a/fs/proc/base.c b/fs/proc/base.c index 590aeda5af1..3f3d7aeb071 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -2464,6 +2464,57 @@ static const struct file_operations proc_projid_map_operations = { .llseek = seq_lseek, .release = proc_id_map_release, }; + +static int proc_setgroups_open(struct inode *inode, struct file *file) +{ + struct user_namespace *ns = NULL; + struct task_struct *task; + int ret; + + ret = -ESRCH; + task = get_proc_task(inode); + if (task) { + rcu_read_lock(); + ns = get_user_ns(task_cred_xxx(task, user_ns)); + rcu_read_unlock(); + put_task_struct(task); + } + if (!ns) + goto err; + + if (file->f_mode & FMODE_WRITE) { + ret = -EACCES; + if (!ns_capable(ns, CAP_SYS_ADMIN)) + goto err_put_ns; + } + + ret = single_open(file, &proc_setgroups_show, ns); + if (ret) + goto err_put_ns; + + return 0; +err_put_ns: + put_user_ns(ns); +err: + return ret; +} + +static int proc_setgroups_release(struct inode *inode, struct file *file) +{ + struct seq_file *seq = file->private_data; + struct user_namespace *ns = seq->private; + int ret = single_release(inode, file); + put_user_ns(ns); + return ret; +} + +static const struct file_operations proc_setgroups_operations = { + .open = proc_setgroups_open, + .write = proc_setgroups_write, + .read = seq_read, + .llseek = seq_lseek, + .release = proc_setgroups_release, +}; #endif /* CONFIG_USER_NS */ static int proc_pid_personality(struct seq_file *m, struct pid_namespace *ns, @@ -2572,6 +2623,7 @@ static const struct pid_entry tgid_base_stuff[] = { REG("uid_map", S_IRUGO|S_IWUSR, proc_uid_map_operations), REG("gid_map", S_IRUGO|S_IWUSR, proc_gid_map_operations), REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations), + REG("setgroups", S_IRUGO|S_IWUSR, proc_setgroups_operations), #endif #ifdef CONFIG_CHECKPOINT_RESTORE REG("timers", S_IRUGO, proc_timers_operations), @@ -2916,6 +2968,7 @@ static const struct pid_entry tid_base_stuff[] = { REG("uid_map", S_IRUGO|S_IWUSR, proc_uid_map_operations), REG("gid_map", S_IRUGO|S_IWUSR, proc_gid_map_operations), REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations), + REG("setgroups", S_IRUGO|S_IWUSR, proc_setgroups_operations), #endif }; diff --git a/include/dt-bindings/thermal/tegra124-soctherm.h b/include/dt-bindings/thermal/tegra124-soctherm.h new file mode 100644 index 00000000000..85aaf66690f --- /dev/null +++ b/include/dt-bindings/thermal/tegra124-soctherm.h @@ -0,0 +1,13 @@ +/* + * This header provides constants for binding nvidia,tegra124-soctherm. + */ + +#ifndef _DT_BINDINGS_THERMAL_TEGRA124_SOCTHERM_H +#define _DT_BINDINGS_THERMAL_TEGRA124_SOCTHERM_H + +#define TEGRA124_SOCTHERM_SENSOR_CPU 0 +#define TEGRA124_SOCTHERM_SENSOR_MEM 1 +#define TEGRA124_SOCTHERM_SENSOR_GPU 2 +#define TEGRA124_SOCTHERM_SENSOR_PLLX 3 + +#endif diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h index ad9db6045b2..b3f45a57834 100644 --- a/include/kvm/arm_arch_timer.h +++ b/include/kvm/arm_arch_timer.h @@ -60,7 +60,8 @@ struct arch_timer_cpu { #ifdef CONFIG_KVM_ARM_TIMER int kvm_timer_hyp_init(void); -int kvm_timer_init(struct kvm *kvm); +void kvm_timer_enable(struct kvm *kvm); +void kvm_timer_init(struct kvm *kvm); void kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu, const struct kvm_irq_level *irq); void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu); @@ -77,11 +78,8 @@ static inline int kvm_timer_hyp_init(void) return 0; }; -static inline int kvm_timer_init(struct kvm *kvm) -{ - return 0; -} - +static inline void kvm_timer_enable(struct kvm *kvm) {} +static inline void kvm_timer_init(struct kvm *kvm) {} static inline void kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu, const struct kvm_irq_level *irq) {} static inline void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) {} diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 206dcc3b3f7..ac4888dc86b 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -274,7 +274,7 @@ struct kvm_exit_mmio; #ifdef CONFIG_KVM_ARM_VGIC int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write); int kvm_vgic_hyp_init(void); -int kvm_vgic_init(struct kvm *kvm); +int kvm_vgic_map_resources(struct kvm *kvm); int kvm_vgic_create(struct kvm *kvm); void kvm_vgic_destroy(struct kvm *kvm); void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu); @@ -287,7 +287,8 @@ bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run, struct kvm_exit_mmio *mmio); #define irqchip_in_kernel(k) (!!((k)->arch.vgic.in_kernel)) -#define vgic_initialized(k) ((k)->arch.vgic.ready) +#define vgic_initialized(k) (!!((k)->arch.vgic.nr_cpus)) +#define vgic_ready(k) ((k)->arch.vgic.ready) int vgic_v2_probe(struct device_node *vgic_node, const struct vgic_ops **ops, @@ -321,7 +322,7 @@ static inline int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, return -ENXIO; } -static inline int kvm_vgic_init(struct kvm *kvm) +static inline int kvm_vgic_map_resources(struct kvm *kvm) { return 0; } @@ -373,6 +374,11 @@ static inline bool vgic_initialized(struct kvm *kvm) { return true; } + +static inline bool vgic_ready(struct kvm *kvm) +{ + return true; +} #endif #endif diff --git a/include/linux/ceph/auth.h b/include/linux/ceph/auth.h index 5f338684413..260d78b587c 100644 --- a/include/linux/ceph/auth.h +++ b/include/linux/ceph/auth.h @@ -13,6 +13,7 @@ struct ceph_auth_client; struct ceph_authorizer; +struct ceph_msg; struct ceph_auth_handshake { struct ceph_authorizer *authorizer; @@ -20,6 +21,10 @@ struct ceph_auth_handshake { size_t authorizer_buf_len; void *authorizer_reply_buf; size_t authorizer_reply_buf_len; + int (*sign_message)(struct ceph_auth_handshake *auth, + struct ceph_msg *msg); + int (*check_message_signature)(struct ceph_auth_handshake *auth, + struct ceph_msg *msg); }; struct ceph_auth_client_ops { @@ -66,6 +71,11 @@ struct ceph_auth_client_ops { void (*reset)(struct ceph_auth_client *ac); void (*destroy)(struct ceph_auth_client *ac); + + int (*sign_message)(struct ceph_auth_handshake *auth, + struct ceph_msg *msg); + int (*check_message_signature)(struct ceph_auth_handshake *auth, + struct ceph_msg *msg); }; struct ceph_auth_client { @@ -113,4 +123,20 @@ extern int ceph_auth_verify_authorizer_reply(struct ceph_auth_client *ac, extern void ceph_auth_invalidate_authorizer(struct ceph_auth_client *ac, int peer_type); +static inline int ceph_auth_sign_message(struct ceph_auth_handshake *auth, + struct ceph_msg *msg) +{ + if (auth->sign_message) + return auth->sign_message(auth, msg); + return 0; +} + +static inline +int ceph_auth_check_message_signature(struct ceph_auth_handshake *auth, + struct ceph_msg *msg) +{ + if (auth->check_message_signature) + return auth->check_message_signature(auth, msg); + return 0; +} #endif diff --git a/include/linux/ceph/buffer.h b/include/linux/ceph/buffer.h index 07ad423cc37..07ca15e7610 100644 --- a/include/linux/ceph/buffer.h +++ b/include/linux/ceph/buffer.h @@ -10,8 +10,7 @@ /* * a simple reference counted buffer. * - * use kmalloc for small sizes (<= one page), vmalloc for larger - * sizes. + * use kmalloc for smaller sizes, vmalloc for larger sizes. */ struct ceph_buffer { struct kref kref; diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h index d12659ce550..71e05bbf8ce 100644 --- a/include/linux/ceph/ceph_features.h +++ b/include/linux/ceph/ceph_features.h @@ -84,6 +84,7 @@ static inline u64 ceph_sanitize_features(u64 features) CEPH_FEATURE_PGPOOL3 | \ CEPH_FEATURE_OSDENC | \ CEPH_FEATURE_CRUSH_TUNABLES | \ + CEPH_FEATURE_MSG_AUTH | \ CEPH_FEATURE_CRUSH_TUNABLES2 | \ CEPH_FEATURE_REPLY_CREATE_INODE | \ CEPH_FEATURE_OSDHASHPSPOOL | \ diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h index 3c97d5e9b95..c0dadaac26e 100644 --- a/include/linux/ceph/ceph_fs.h +++ b/include/linux/ceph/ceph_fs.h @@ -522,8 +522,11 @@ struct ceph_mds_reply_dirfrag { __le32 dist[]; } __attribute__ ((packed)); -#define CEPH_LOCK_FCNTL 1 -#define CEPH_LOCK_FLOCK 2 +#define CEPH_LOCK_FCNTL 1 +#define CEPH_LOCK_FLOCK 2 +#define CEPH_LOCK_FCNTL_INTR 3 +#define CEPH_LOCK_FLOCK_INTR 4 + #define CEPH_LOCK_SHARED 1 #define CEPH_LOCK_EXCL 2 @@ -549,6 +552,7 @@ struct ceph_filelock { int ceph_flags_to_mode(int flags); +#define CEPH_INLINE_NONE ((__u64)-1) /* capability bits */ #define CEPH_CAP_PIN 1 /* no specific capabilities beyond the pin */ @@ -613,6 +617,8 @@ int ceph_flags_to_mode(int flags); CEPH_CAP_LINK_SHARED | \ CEPH_CAP_FILE_SHARED | \ CEPH_CAP_XATTR_SHARED) +#define CEPH_STAT_CAP_INLINE_DATA (CEPH_CAP_FILE_SHARED | \ + CEPH_CAP_FILE_RD) #define CEPH_CAP_ANY_SHARED (CEPH_CAP_AUTH_SHARED | \ CEPH_CAP_LINK_SHARED | \ diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index 07bc359b88a..8b11a79ca1c 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h @@ -29,6 +29,7 @@ #define CEPH_OPT_NOSHARE (1<<1) /* don't share client with other sbs */ #define CEPH_OPT_MYIP (1<<2) /* specified my ip */ #define CEPH_OPT_NOCRC (1<<3) /* no data crc on writes */ +#define CEPH_OPT_NOMSGAUTH (1<<4) /* not require cephx message signature */ #define CEPH_OPT_DEFAULT (0) @@ -184,7 +185,6 @@ extern bool libceph_compatible(void *data); extern const char *ceph_msg_type_name(int type); extern int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid); extern void *ceph_kvmalloc(size_t size, gfp_t flags); -extern void ceph_kvfree(const void *ptr); extern struct ceph_options *ceph_parse_options(char *options, const char *dev_name, const char *dev_name_end, diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index 40ae58e3e9d..d9d396c1650 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -42,6 +42,10 @@ struct ceph_connection_operations { struct ceph_msg * (*alloc_msg) (struct ceph_connection *con, struct ceph_msg_header *hdr, int *skip); + int (*sign_message) (struct ceph_connection *con, struct ceph_msg *msg); + + int (*check_message_signature) (struct ceph_connection *con, + struct ceph_msg *msg); }; /* use format string %s%d */ @@ -142,7 +146,10 @@ struct ceph_msg_data_cursor { */ struct ceph_msg { struct ceph_msg_header hdr; /* header */ - struct ceph_msg_footer footer; /* footer */ + union { + struct ceph_msg_footer footer; /* footer */ + struct ceph_msg_footer_old old_footer; /* old format footer */ + }; struct kvec front; /* unaligned blobs of message */ struct ceph_buffer *middle; diff --git a/include/linux/ceph/msgr.h b/include/linux/ceph/msgr.h index 3d94a73b5f3..1c1887206ff 100644 --- a/include/linux/ceph/msgr.h +++ b/include/linux/ceph/msgr.h @@ -152,7 +152,8 @@ struct ceph_msg_header { receiver: mask against ~PAGE_MASK */ struct ceph_entity_name src; - __le32 reserved; + __le16 compat_version; + __le16 reserved; __le32 crc; /* header crc32c */ } __attribute__ ((packed)); @@ -164,13 +165,21 @@ struct ceph_msg_header { /* * follows data payload */ +struct ceph_msg_footer_old { + __le32 front_crc, middle_crc, data_crc; + __u8 flags; +} __attribute__ ((packed)); + struct ceph_msg_footer { __le32 front_crc, middle_crc, data_crc; + // sig holds the 64 bits of the digital signature for the message PLR + __le64 sig; __u8 flags; } __attribute__ ((packed)); #define CEPH_MSG_FOOTER_COMPLETE (1<<0) /* msg wasn't aborted */ #define CEPH_MSG_FOOTER_NOCRC (1<<1) /* no data crc */ +#define CEPH_MSG_FOOTER_SIGNED (1<<2) /* msg was signed */ #endif diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index 03aeb27fcc6..5d86416d35f 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -87,6 +87,13 @@ struct ceph_osd_req_op { struct ceph_osd_data osd_data; } extent; struct { + __le32 name_len; + __le32 value_len; + __u8 cmp_op; /* CEPH_OSD_CMPXATTR_OP_* */ + __u8 cmp_mode; /* CEPH_OSD_CMPXATTR_MODE_* */ + struct ceph_osd_data osd_data; + } xattr; + struct { const char *class_name; const char *method_name; struct ceph_osd_data request_info; @@ -295,6 +302,9 @@ extern void osd_req_op_cls_response_data_pages(struct ceph_osd_request *, extern void osd_req_op_cls_init(struct ceph_osd_request *osd_req, unsigned int which, u16 opcode, const char *class, const char *method); +extern int osd_req_op_xattr_init(struct ceph_osd_request *osd_req, unsigned int which, + u16 opcode, const char *name, const void *value, + size_t size, u8 cmp_op, u8 cmp_mode); extern void osd_req_op_watch_init(struct ceph_osd_request *osd_req, unsigned int which, u16 opcode, u64 cookie, u64 version, int flag); @@ -318,7 +328,8 @@ extern struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *, struct ceph_file_layout *layout, struct ceph_vino vino, u64 offset, u64 *len, - int num_ops, int opcode, int flags, + unsigned int which, int num_ops, + int opcode, int flags, struct ceph_snap_context *snapc, u32 truncate_seq, u64 truncate_size, bool use_mempool); diff --git a/include/linux/ceph/pagelist.h b/include/linux/ceph/pagelist.h index 5f871d84ddc..13d71fe18b0 100644 --- a/include/linux/ceph/pagelist.h +++ b/include/linux/ceph/pagelist.h @@ -1,8 +1,10 @@ #ifndef __FS_CEPH_PAGELIST_H #define __FS_CEPH_PAGELIST_H -#include <linux/list.h> +#include <asm/byteorder.h> #include <linux/atomic.h> +#include <linux/list.h> +#include <linux/types.h> struct ceph_pagelist { struct list_head head; diff --git a/include/linux/clock_cooling.h b/include/linux/clock_cooling.h new file mode 100644 index 00000000000..4d1019d56f7 --- /dev/null +++ b/include/linux/clock_cooling.h @@ -0,0 +1,65 @@ +/* + * linux/include/linux/clock_cooling.h + * + * Copyright (C) 2014 Eduardo Valentin <edubezval@gmail.com> + * + * Copyright (C) 2013 Texas Instruments Inc. + * Contact: Eduardo Valentin <eduardo.valentin@ti.com> + * + * Highly based on cpu_cooling.c. + * Copyright (C) 2012 Samsung Electronics Co., Ltd(http://www.samsung.com) + * Copyright (C) 2012 Amit Daniel <amit.kachhap@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ + +#ifndef __CPU_COOLING_H__ +#define __CPU_COOLING_H__ + +#include <linux/of.h> +#include <linux/thermal.h> +#include <linux/cpumask.h> + +#ifdef CONFIG_CLOCK_THERMAL +/** + * clock_cooling_register - function to create clock cooling device. + * @dev: struct device pointer to the device used as clock cooling device. + * @clock_name: string containing the clock used as cooling mechanism. + */ +struct thermal_cooling_device * +clock_cooling_register(struct device *dev, const char *clock_name); + +/** + * clock_cooling_unregister - function to remove clock cooling device. + * @cdev: thermal cooling device pointer. + */ +void clock_cooling_unregister(struct thermal_cooling_device *cdev); + +unsigned long clock_cooling_get_level(struct thermal_cooling_device *cdev, + unsigned long freq); +#else /* !CONFIG_CLOCK_THERMAL */ +static inline struct thermal_cooling_device * +clock_cooling_register(struct device *dev, const char *clock_name) +{ + return NULL; +} +static inline +void clock_cooling_unregister(struct thermal_cooling_device *cdev) +{ +} +static inline +unsigned long clock_cooling_get_level(struct thermal_cooling_device *cdev, + unsigned long freq) +{ + return THERMAL_CSTATE_INVALID; +} +#endif /* CONFIG_CLOCK_THERMAL */ + +#endif /* __CPU_COOLING_H__ */ diff --git a/include/linux/cred.h b/include/linux/cred.h index b2d0820837c..2fb2ca2127e 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -68,6 +68,7 @@ extern void groups_free(struct group_info *); extern int set_current_groups(struct group_info *); extern void set_groups(struct cred *, struct group_info *); extern int groups_search(const struct group_info *, kgid_t); +extern bool may_setgroups(void); /* access the groups "array" with this macro */ #define GROUP_AT(gi, i) \ diff --git a/include/linux/fsl_ifc.h b/include/linux/fsl_ifc.h index 84d60cb841b..bf0321eabbd 100644 --- a/include/linux/fsl_ifc.h +++ b/include/linux/fsl_ifc.h @@ -29,7 +29,16 @@ #include <linux/of_platform.h> #include <linux/interrupt.h> -#define FSL_IFC_BANK_COUNT 4 +/* + * The actual number of banks implemented depends on the IFC version + * - IFC version 1.0 implements 4 banks. + * - IFC version 1.1 onward implements 8 banks. + */ +#define FSL_IFC_BANK_COUNT 8 + +#define FSL_IFC_VERSION_MASK 0x0F0F0000 +#define FSL_IFC_VERSION_1_0_0 0x01000000 +#define FSL_IFC_VERSION_1_1_0 0x01010000 /* * CSPR - Chip Select Property Register @@ -776,23 +785,23 @@ struct fsl_ifc_regs { __be32 cspr; u32 res2; } cspr_cs[FSL_IFC_BANK_COUNT]; - u32 res3[0x19]; + u32 res3[0xd]; struct { __be32 amask; u32 res4[0x2]; } amask_cs[FSL_IFC_BANK_COUNT]; - u32 res5[0x18]; + u32 res5[0xc]; struct { __be32 csor; __be32 csor_ext; u32 res6; } csor_cs[FSL_IFC_BANK_COUNT]; - u32 res7[0x18]; + u32 res7[0xc]; struct { __be32 ftim[4]; u32 res8[0x8]; } ftim_cs[FSL_IFC_BANK_COUNT]; - u32 res9[0x60]; + u32 res9[0x30]; __be32 rb_stat; u32 res10[0x2]; __be32 ifc_gcr; @@ -827,6 +836,8 @@ struct fsl_ifc_ctrl { int nand_irq; spinlock_t lock; void *nand; + int version; + int banks; u32 nand_stat; wait_queue_head_t nand_wait; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index a6059bdf7b0..26f106022c8 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -43,6 +43,7 @@ * include/linux/kvm_h. */ #define KVM_MEMSLOT_INVALID (1UL << 16) +#define KVM_MEMSLOT_INCOHERENT (1UL << 17) /* Two fragments for cross MMIO pages. */ #define KVM_MAX_MMIO_FRAGMENTS 2 @@ -353,6 +354,8 @@ struct kvm_memslots { struct kvm_memory_slot memslots[KVM_MEM_SLOTS_NUM]; /* The mapping table from slot id to the index in memslots[]. */ short id_to_index[KVM_MEM_SLOTS_NUM]; + atomic_t lru_slot; + int used_slots; }; struct kvm { @@ -395,7 +398,6 @@ struct kvm { * Update side is protected by irq_lock. */ struct kvm_irq_routing_table __rcu *irq_routing; - struct hlist_head mask_notifier_list; #endif #ifdef CONFIG_HAVE_KVM_IRQFD struct hlist_head irq_ack_notifier_list; @@ -447,6 +449,14 @@ void kvm_vcpu_uninit(struct kvm_vcpu *vcpu); int __must_check vcpu_load(struct kvm_vcpu *vcpu); void vcpu_put(struct kvm_vcpu *vcpu); +#ifdef __KVM_HAVE_IOAPIC +void kvm_vcpu_request_scan_ioapic(struct kvm *kvm); +#else +static inline void kvm_vcpu_request_scan_ioapic(struct kvm *kvm) +{ +} +#endif + #ifdef CONFIG_HAVE_KVM_IRQFD int kvm_irqfd_init(void); void kvm_irqfd_exit(void); @@ -711,44 +721,6 @@ struct kvm_irq_ack_notifier { void (*irq_acked)(struct kvm_irq_ack_notifier *kian); }; -struct kvm_assigned_dev_kernel { - struct kvm_irq_ack_notifier ack_notifier; - struct list_head list; - int assigned_dev_id; - int host_segnr; - int host_busnr; - int host_devfn; - unsigned int entries_nr; - int host_irq; - bool host_irq_disabled; - bool pci_2_3; - struct msix_entry *host_msix_entries; - int guest_irq; - struct msix_entry *guest_msix_entries; - unsigned long irq_requested_type; - int irq_source_id; - int flags; - struct pci_dev *dev; - struct kvm *kvm; - spinlock_t intx_lock; - spinlock_t intx_mask_lock; - char irq_name[32]; - struct pci_saved_state *pci_saved_state; -}; - -struct kvm_irq_mask_notifier { - void (*func)(struct kvm_irq_mask_notifier *kimn, bool masked); - int irq; - struct hlist_node link; -}; - -void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq, - struct kvm_irq_mask_notifier *kimn); -void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq, - struct kvm_irq_mask_notifier *kimn); -void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin, - bool mask); - int kvm_irq_map_gsi(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *entries, int gsi); int kvm_irq_map_chip_pin(struct kvm *kvm, unsigned irqchip, unsigned pin); @@ -770,12 +742,6 @@ void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id); #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot); void kvm_iommu_unmap_pages(struct kvm *kvm, struct kvm_memory_slot *slot); -int kvm_iommu_map_guest(struct kvm *kvm); -int kvm_iommu_unmap_guest(struct kvm *kvm); -int kvm_assign_device(struct kvm *kvm, - struct kvm_assigned_dev_kernel *assigned_dev); -int kvm_deassign_device(struct kvm *kvm, - struct kvm_assigned_dev_kernel *assigned_dev); #else static inline int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot) @@ -787,11 +753,6 @@ static inline void kvm_iommu_unmap_pages(struct kvm *kvm, struct kvm_memory_slot *slot) { } - -static inline int kvm_iommu_unmap_guest(struct kvm *kvm) -{ - return 0; -} #endif static inline void kvm_guest_enter(void) @@ -832,12 +793,28 @@ static inline void kvm_guest_exit(void) static inline struct kvm_memory_slot * search_memslots(struct kvm_memslots *slots, gfn_t gfn) { - struct kvm_memory_slot *memslot; + int start = 0, end = slots->used_slots; + int slot = atomic_read(&slots->lru_slot); + struct kvm_memory_slot *memslots = slots->memslots; + + if (gfn >= memslots[slot].base_gfn && + gfn < memslots[slot].base_gfn + memslots[slot].npages) + return &memslots[slot]; - kvm_for_each_memslot(memslot, slots) - if (gfn >= memslot->base_gfn && - gfn < memslot->base_gfn + memslot->npages) - return memslot; + while (start < end) { + slot = start + (end - start) / 2; + + if (gfn >= memslots[slot].base_gfn) + end = slot; + else + start = slot + 1; + } + + if (gfn >= memslots[start].base_gfn && + gfn < memslots[start].base_gfn + memslots[start].npages) { + atomic_set(&slots->lru_slot, start); + return &memslots[start]; + } return NULL; } @@ -1011,25 +988,6 @@ static inline bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu) { return true; } #endif -#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT - -long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl, - unsigned long arg); - -void kvm_free_all_assigned_devices(struct kvm *kvm); - -#else - -static inline long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl, - unsigned long arg) -{ - return -ENOTTY; -} - -static inline void kvm_free_all_assigned_devices(struct kvm *kvm) {} - -#endif - static inline void kvm_make_request(int req, struct kvm_vcpu *vcpu) { set_bit(req, &vcpu->requests); diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h index b606bb689a3..931da7e917c 100644 --- a/include/linux/kvm_types.h +++ b/include/linux/kvm_types.h @@ -54,33 +54,6 @@ typedef u64 hfn_t; typedef hfn_t pfn_t; -union kvm_ioapic_redirect_entry { - u64 bits; - struct { - u8 vector; - u8 delivery_mode:3; - u8 dest_mode:1; - u8 delivery_status:1; - u8 polarity:1; - u8 remote_irr:1; - u8 trig_mode:1; - u8 mask:1; - u8 reserve:7; - u8 reserved[4]; - u8 dest_id; - } fields; -}; - -struct kvm_lapic_irq { - u32 vector; - u32 delivery_mode; - u32 dest_mode; - u32 level; - u32 trig_mode; - u32 shorthand; - u32 dest_id; -}; - struct gfn_to_hva_cache { u64 generation; gpa_t gpa; diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index e4d451e4600..3d4ea7eb2b6 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -455,8 +455,21 @@ struct nand_hw_control { * be provided if an hardware ECC is available * @calculate: function for ECC calculation or readback from ECC hardware * @correct: function for ECC correction, matching to ECC generator (sw/hw) - * @read_page_raw: function to read a raw page without ECC - * @write_page_raw: function to write a raw page without ECC + * @read_page_raw: function to read a raw page without ECC. This function + * should hide the specific layout used by the ECC + * controller and always return contiguous in-band and + * out-of-band data even if they're not stored + * contiguously on the NAND chip (e.g. + * NAND_ECC_HW_SYNDROME interleaves in-band and + * out-of-band data). + * @write_page_raw: function to write a raw page without ECC. This function + * should hide the specific layout used by the ECC + * controller and consider the passed data as contiguous + * in-band and out-of-band data. ECC controller is + * responsible for doing the appropriate transformations + * to adapt to its specific layout (e.g. + * NAND_ECC_HW_SYNDROME interleaves in-band and + * out-of-band data). * @read_page: function to read a page according to the ECC generator * requirements; returns maximum number of bitflips corrected in * any single ECC step, 0 if bitflips uncorrectable, -EIO hw error @@ -723,6 +736,7 @@ struct nand_chip { #define NAND_MFR_EON 0x92 #define NAND_MFR_SANDISK 0x45 #define NAND_MFR_INTEL 0x89 +#define NAND_MFR_ATO 0x9b /* The maximum expected count of bytes in the NAND ID sequence */ #define NAND_MAX_ID_LEN 8 diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h index 046a0a2e4c4..63aeccf9ddc 100644 --- a/include/linux/mtd/spi-nor.h +++ b/include/linux/mtd/spi-nor.h @@ -116,6 +116,10 @@ enum spi_nor_ops { SPI_NOR_OPS_UNLOCK, }; +enum spi_nor_option_flags { + SNOR_F_USE_FSR = BIT(0), +}; + /** * struct spi_nor - Structure for defining a the SPI NOR layer * @mtd: point to a mtd_info structure @@ -129,6 +133,7 @@ enum spi_nor_ops { * @program_opcode: the program opcode * @flash_read: the mode of the read * @sst_write_second: used by the SST write operation + * @flags: flag options for the current SPI-NOR (SNOR_F_*) * @cfg: used by the read_xfer/write_xfer * @cmd_buf: used by the write_reg * @prepare: [OPTIONAL] do some preparations for the @@ -139,9 +144,6 @@ enum spi_nor_ops { * @write_xfer: [OPTIONAL] the writefundamental primitive * @read_reg: [DRIVER-SPECIFIC] read out the register * @write_reg: [DRIVER-SPECIFIC] write data to the register - * @read_id: [REPLACEABLE] read out the ID data, and find - * the proper spi_device_id - * @wait_till_ready: [REPLACEABLE] wait till the NOR becomes ready * @read: [DRIVER-SPECIFIC] read data from the SPI NOR * @write: [DRIVER-SPECIFIC] write data to the SPI NOR * @erase: [DRIVER-SPECIFIC] erase a sector of the SPI NOR @@ -160,6 +162,7 @@ struct spi_nor { u8 program_opcode; enum read_mode flash_read; bool sst_write_second; + u32 flags; struct spi_nor_xfer_cfg cfg; u8 cmd_buf[SPI_NOR_MAX_CMD_SIZE]; @@ -172,8 +175,6 @@ struct spi_nor { int (*read_reg)(struct spi_nor *nor, u8 opcode, u8 *buf, int len); int (*write_reg)(struct spi_nor *nor, u8 opcode, u8 *buf, int len, int write_enable); - const struct spi_device_id *(*read_id)(struct spi_nor *nor); - int (*wait_till_ready)(struct spi_nor *nor); int (*read)(struct spi_nor *nor, loff_t from, size_t len, size_t *retlen, u_char *read_buf); diff --git a/include/linux/thermal.h b/include/linux/thermal.h index ef90838b36a..c611a02fbc5 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -29,10 +29,10 @@ #include <linux/idr.h> #include <linux/device.h> #include <linux/workqueue.h> +#include <uapi/linux/thermal.h> #define THERMAL_TRIPS_NONE -1 #define THERMAL_MAX_TRIPS 12 -#define THERMAL_NAME_LENGTH 20 /* invalid cooling state */ #define THERMAL_CSTATE_INVALID -1UL @@ -49,11 +49,6 @@ #define MILLICELSIUS_TO_DECI_KELVIN_WITH_OFFSET(t, off) (((t) / 100) + (off)) #define MILLICELSIUS_TO_DECI_KELVIN(t) MILLICELSIUS_TO_DECI_KELVIN_WITH_OFFSET(t, 2732) -/* Adding event notification support elements */ -#define THERMAL_GENL_FAMILY_NAME "thermal_event" -#define THERMAL_GENL_VERSION 0x01 -#define THERMAL_GENL_MCAST_GROUP_NAME "thermal_mc_grp" - /* Default Thermal Governor */ #if defined(CONFIG_THERMAL_DEFAULT_GOV_STEP_WISE) #define DEFAULT_THERMAL_GOVERNOR "step_wise" @@ -86,30 +81,6 @@ enum thermal_trend { THERMAL_TREND_DROP_FULL, /* apply lowest cooling action */ }; -/* Events supported by Thermal Netlink */ -enum events { - THERMAL_AUX0, - THERMAL_AUX1, - THERMAL_CRITICAL, - THERMAL_DEV_FAULT, -}; - -/* attributes of thermal_genl_family */ -enum { - THERMAL_GENL_ATTR_UNSPEC, - THERMAL_GENL_ATTR_EVENT, - __THERMAL_GENL_ATTR_MAX, -}; -#define THERMAL_GENL_ATTR_MAX (__THERMAL_GENL_ATTR_MAX - 1) - -/* commands supported by the thermal_genl_family */ -enum { - THERMAL_GENL_CMD_UNSPEC, - THERMAL_GENL_CMD_EVENT, - __THERMAL_GENL_CMD_MAX, -}; -#define THERMAL_GENL_CMD_MAX (__THERMAL_GENL_CMD_MAX - 1) - struct thermal_zone_device_ops { int (*bind) (struct thermal_zone_device *, struct thermal_cooling_device *); @@ -289,19 +260,49 @@ struct thermal_genl_event { enum events event; }; +/** + * struct thermal_zone_of_device_ops - scallbacks for handling DT based zones + * + * Mandatory: + * @get_temp: a pointer to a function that reads the sensor temperature. + * + * Optional: + * @get_trend: a pointer to a function that reads the sensor temperature trend. + * @set_emul_temp: a pointer to a function that sets sensor emulated + * temperature. + */ +struct thermal_zone_of_device_ops { + int (*get_temp)(void *, long *); + int (*get_trend)(void *, long *); + int (*set_emul_temp)(void *, unsigned long); +}; + +/** + * struct thermal_trip - representation of a point in temperature domain + * @np: pointer to struct device_node that this trip point was created from + * @temperature: temperature value in miliCelsius + * @hysteresis: relative hysteresis in miliCelsius + * @type: trip point type + */ + +struct thermal_trip { + struct device_node *np; + unsigned long int temperature; + unsigned long int hysteresis; + enum thermal_trip_type type; +}; + /* Function declarations */ #ifdef CONFIG_THERMAL_OF struct thermal_zone_device * -thermal_zone_of_sensor_register(struct device *dev, int id, - void *data, int (*get_temp)(void *, long *), - int (*get_trend)(void *, long *)); +thermal_zone_of_sensor_register(struct device *dev, int id, void *data, + const struct thermal_zone_of_device_ops *ops); void thermal_zone_of_sensor_unregister(struct device *dev, struct thermal_zone_device *tz); #else static inline struct thermal_zone_device * -thermal_zone_of_sensor_register(struct device *dev, int id, - void *data, int (*get_temp)(void *, long *), - int (*get_trend)(void *, long *)) +thermal_zone_of_sensor_register(struct device *dev, int id, void *data, + const struct thermal_zone_of_device_ops *ops) { return NULL; } diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index 4cf06c140e2..8297e5b341d 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -18,6 +18,10 @@ struct uid_gid_map { /* 64 bytes -- 1 cache line */ } extent[UID_GID_MAP_MAX_EXTENTS]; }; +#define USERNS_SETGROUPS_ALLOWED 1UL + +#define USERNS_INIT_FLAGS USERNS_SETGROUPS_ALLOWED + struct user_namespace { struct uid_gid_map uid_map; struct uid_gid_map gid_map; @@ -28,6 +32,7 @@ struct user_namespace { kuid_t owner; kgid_t group; struct ns_common ns; + unsigned long flags; /* Register of per-UID persistent keyrings for this namespace */ #ifdef CONFIG_PERSISTENT_KEYRINGS @@ -64,6 +69,9 @@ extern const struct seq_operations proc_projid_seq_operations; extern ssize_t proc_uid_map_write(struct file *, const char __user *, size_t, loff_t *); extern ssize_t proc_gid_map_write(struct file *, const char __user *, size_t, loff_t *); extern ssize_t proc_projid_map_write(struct file *, const char __user *, size_t, loff_t *); +extern ssize_t proc_setgroups_write(struct file *, const char __user *, size_t, loff_t *); +extern int proc_setgroups_show(struct seq_file *m, void *v); +extern bool userns_may_setgroups(const struct user_namespace *ns); #else static inline struct user_namespace *get_user_ns(struct user_namespace *ns) @@ -88,6 +96,10 @@ static inline void put_user_ns(struct user_namespace *ns) { } +static inline bool userns_may_setgroups(const struct user_namespace *ns) +{ + return true; +} #endif #endif /* _LINUX_USER_H */ diff --git a/include/linux/virtio.h b/include/linux/virtio.h index d09e0938fd6..28f0e65b9a1 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -81,7 +81,7 @@ void *virtqueue_get_used(struct virtqueue *vq); /** * virtio_device - representation of a device using virtio * @index: unique position on the virtio bus - * @failed: saved value for CONFIG_S_FAILED bit (for restore) + * @failed: saved value for VIRTIO_CONFIG_S_FAILED bit (for restore) * @config_enabled: configuration change reporting enabled * @config_change_pending: configuration change reported while disabled * @config_lock: protects configuration change reporting diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild index 259d31fc907..00b100023c4 100644 --- a/include/uapi/linux/Kbuild +++ b/include/uapi/linux/Kbuild @@ -387,6 +387,7 @@ header-y += tcp.h header-y += tcp_metrics.h header-y += telephony.h header-y += termios.h +header-y += thermal.h header-y += time.h header-y += times.h header-y += timex.h diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 60768822b14..a37fd1224f3 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -647,11 +647,7 @@ struct kvm_ppc_smmu_info { #define KVM_CAP_MP_STATE 14 #define KVM_CAP_COALESCED_MMIO 15 #define KVM_CAP_SYNC_MMU 16 /* Changes to host mmap are reflected in guest */ -#define KVM_CAP_DEVICE_ASSIGNMENT 17 #define KVM_CAP_IOMMU 18 -#ifdef __KVM_HAVE_MSI -#define KVM_CAP_DEVICE_MSI 20 -#endif /* Bug in KVM_SET_USER_MEMORY_REGION fixed: */ #define KVM_CAP_DESTROY_MEMORY_REGION_WORKS 21 #define KVM_CAP_USER_NMI 22 @@ -663,10 +659,6 @@ struct kvm_ppc_smmu_info { #endif #define KVM_CAP_IRQ_ROUTING 25 #define KVM_CAP_IRQ_INJECT_STATUS 26 -#define KVM_CAP_DEVICE_DEASSIGNMENT 27 -#ifdef __KVM_HAVE_MSIX -#define KVM_CAP_DEVICE_MSIX 28 -#endif #define KVM_CAP_ASSIGN_DEV_IRQ 29 /* Another bug in KVM_SET_USER_MEMORY_REGION fixed: */ #define KVM_CAP_JOIN_MEMORY_REGIONS_WORKS 30 @@ -1107,9 +1099,6 @@ struct kvm_s390_ucas_mapping { #define KVM_X86_SETUP_MCE _IOW(KVMIO, 0x9c, __u64) #define KVM_X86_GET_MCE_CAP_SUPPORTED _IOR(KVMIO, 0x9d, __u64) #define KVM_X86_SET_MCE _IOW(KVMIO, 0x9e, struct kvm_x86_mce) -/* IA64 stack access */ -#define KVM_IA64_VCPU_GET_STACK _IOR(KVMIO, 0x9a, void *) -#define KVM_IA64_VCPU_SET_STACK _IOW(KVMIO, 0x9b, void *) /* Available with KVM_CAP_VCPU_EVENTS */ #define KVM_GET_VCPU_EVENTS _IOR(KVMIO, 0x9f, struct kvm_vcpu_events) #define KVM_SET_VCPU_EVENTS _IOW(KVMIO, 0xa0, struct kvm_vcpu_events) diff --git a/include/uapi/linux/thermal.h b/include/uapi/linux/thermal.h new file mode 100644 index 00000000000..ac553585598 --- /dev/null +++ b/include/uapi/linux/thermal.h @@ -0,0 +1,35 @@ +#ifndef _UAPI_LINUX_THERMAL_H +#define _UAPI_LINUX_THERMAL_H + +#define THERMAL_NAME_LENGTH 20 + +/* Adding event notification support elements */ +#define THERMAL_GENL_FAMILY_NAME "thermal_event" +#define THERMAL_GENL_VERSION 0x01 +#define THERMAL_GENL_MCAST_GROUP_NAME "thermal_mc_grp" + +/* Events supported by Thermal Netlink */ +enum events { + THERMAL_AUX0, + THERMAL_AUX1, + THERMAL_CRITICAL, + THERMAL_DEV_FAULT, +}; + +/* attributes of thermal_genl_family */ +enum { + THERMAL_GENL_ATTR_UNSPEC, + THERMAL_GENL_ATTR_EVENT, + __THERMAL_GENL_ATTR_MAX, +}; +#define THERMAL_GENL_ATTR_MAX (__THERMAL_GENL_ATTR_MAX - 1) + +/* commands supported by the thermal_genl_family */ +enum { + THERMAL_GENL_CMD_UNSPEC, + THERMAL_GENL_CMD_EVENT, + __THERMAL_GENL_CMD_MAX, +}; +#define THERMAL_GENL_CMD_MAX (__THERMAL_GENL_CMD_MAX - 1) + +#endif /* _UAPI_LINUX_THERMAL_H */ diff --git a/include/uapi/linux/virtio_balloon.h b/include/uapi/linux/virtio_balloon.h index 5e26f61b5df..be40f7059e9 100644 --- a/include/uapi/linux/virtio_balloon.h +++ b/include/uapi/linux/virtio_balloon.h @@ -31,6 +31,7 @@ /* The feature bitmap for virtio balloon */ #define VIRTIO_BALLOON_F_MUST_TELL_HOST 0 /* Tell before reclaiming pages */ #define VIRTIO_BALLOON_F_STATS_VQ 1 /* Memory Stats virtqueue */ +#define VIRTIO_BALLOON_F_DEFLATE_ON_OOM 2 /* Deflate balloon on OOM */ /* Size of a PFN in the balloon interface. */ #define VIRTIO_BALLOON_PFN_SHIFT 12 diff --git a/kernel/groups.c b/kernel/groups.c index 451698f86cf..664411f171b 100644 --- a/kernel/groups.c +++ b/kernel/groups.c @@ -6,6 +6,7 @@ #include <linux/slab.h> #include <linux/security.h> #include <linux/syscalls.h> +#include <linux/user_namespace.h> #include <asm/uaccess.h> /* init to 2 - one for init_task, one to ensure it is never freed */ @@ -213,6 +214,14 @@ out: return i; } +bool may_setgroups(void) +{ + struct user_namespace *user_ns = current_user_ns(); + + return ns_capable(user_ns, CAP_SETGID) && + userns_may_setgroups(user_ns); +} + /* * SMP: Our groups are copy-on-write. We can set them safely * without another task interfering. @@ -223,7 +232,7 @@ SYSCALL_DEFINE2(setgroups, int, gidsetsize, gid_t __user *, grouplist) struct group_info *group_info; int retval; - if (!ns_capable(current_user_ns(), CAP_SETGID)) + if (!may_setgroups()) return -EPERM; if ((unsigned)gidsetsize > NGROUPS_MAX) return -EINVAL; diff --git a/kernel/uid16.c b/kernel/uid16.c index 602e5bbbcef..d58cc4d8f0d 100644 --- a/kernel/uid16.c +++ b/kernel/uid16.c @@ -176,7 +176,7 @@ SYSCALL_DEFINE2(setgroups16, int, gidsetsize, old_gid_t __user *, grouplist) struct group_info *group_info; int retval; - if (!ns_capable(current_user_ns(), CAP_SETGID)) + if (!may_setgroups()) return -EPERM; if ((unsigned)gidsetsize > NGROUPS_MAX) return -EINVAL; diff --git a/kernel/user.c b/kernel/user.c index 69b800aebf1..b069ccbfb0b 100644 --- a/kernel/user.c +++ b/kernel/user.c @@ -54,6 +54,7 @@ struct user_namespace init_user_ns = { #ifdef CONFIG_USER_NS .ns.ops = &userns_operations, #endif + .flags = USERNS_INIT_FLAGS, #ifdef CONFIG_PERSISTENT_KEYRINGS .persistent_keyring_register_sem = __RWSEM_INITIALIZER(init_user_ns.persistent_keyring_register_sem), diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 1491ad00388..4109f832068 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -24,6 +24,7 @@ #include <linux/fs_struct.h> static struct kmem_cache *user_ns_cachep __read_mostly; +static DEFINE_MUTEX(userns_state_mutex); static bool new_idmap_permitted(const struct file *file, struct user_namespace *ns, int cap_setid, @@ -100,6 +101,11 @@ int create_user_ns(struct cred *new) ns->owner = owner; ns->group = group; + /* Inherit USERNS_SETGROUPS_ALLOWED from our parent */ + mutex_lock(&userns_state_mutex); + ns->flags = parent_ns->flags; + mutex_unlock(&userns_state_mutex); + set_cred_user_ns(new, ns); #ifdef CONFIG_PERSISTENT_KEYRINGS @@ -584,9 +590,6 @@ static bool mappings_overlap(struct uid_gid_map *new_map, return false; } - -static DEFINE_MUTEX(id_map_mutex); - static ssize_t map_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos, int cap_setid, @@ -603,7 +606,7 @@ static ssize_t map_write(struct file *file, const char __user *buf, ssize_t ret = -EINVAL; /* - * The id_map_mutex serializes all writes to any given map. + * The userns_state_mutex serializes all writes to any given map. * * Any map is only ever written once. * @@ -621,7 +624,7 @@ static ssize_t map_write(struct file *file, const char __user *buf, * order and smp_rmb() is guaranteed that we don't have crazy * architectures returning stale data. */ - mutex_lock(&id_map_mutex); + mutex_lock(&userns_state_mutex); ret = -EPERM; /* Only allow one successful write to the map */ @@ -641,7 +644,7 @@ static ssize_t map_write(struct file *file, const char __user *buf, if (!page) goto out; - /* Only allow <= page size writes at the beginning of the file */ + /* Only allow < page size writes at the beginning of the file */ ret = -EINVAL; if ((*ppos != 0) || (count >= PAGE_SIZE)) goto out; @@ -751,7 +754,7 @@ static ssize_t map_write(struct file *file, const char __user *buf, *ppos = count; ret = count; out: - mutex_unlock(&id_map_mutex); + mutex_unlock(&userns_state_mutex); if (page) free_page(page); return ret; @@ -813,16 +816,21 @@ static bool new_idmap_permitted(const struct file *file, struct user_namespace *ns, int cap_setid, struct uid_gid_map *new_map) { - /* Allow mapping to your own filesystem ids */ - if ((new_map->nr_extents == 1) && (new_map->extent[0].count == 1)) { + const struct cred *cred = file->f_cred; + /* Don't allow mappings that would allow anything that wouldn't + * be allowed without the establishment of unprivileged mappings. + */ + if ((new_map->nr_extents == 1) && (new_map->extent[0].count == 1) && + uid_eq(ns->owner, cred->euid)) { u32 id = new_map->extent[0].lower_first; if (cap_setid == CAP_SETUID) { kuid_t uid = make_kuid(ns->parent, id); - if (uid_eq(uid, file->f_cred->fsuid)) + if (uid_eq(uid, cred->euid)) return true; } else if (cap_setid == CAP_SETGID) { kgid_t gid = make_kgid(ns->parent, id); - if (gid_eq(gid, file->f_cred->fsgid)) + if (!(ns->flags & USERNS_SETGROUPS_ALLOWED) && + gid_eq(gid, cred->egid)) return true; } } @@ -842,6 +850,100 @@ static bool new_idmap_permitted(const struct file *file, return false; } +int proc_setgroups_show(struct seq_file *seq, void *v) +{ + struct user_namespace *ns = seq->private; + unsigned long userns_flags = ACCESS_ONCE(ns->flags); + + seq_printf(seq, "%s\n", + (userns_flags & USERNS_SETGROUPS_ALLOWED) ? + "allow" : "deny"); + return 0; +} + +ssize_t proc_setgroups_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct seq_file *seq = file->private_data; + struct user_namespace *ns = seq->private; + char kbuf[8], *pos; + bool setgroups_allowed; + ssize_t ret; + + /* Only allow a very narrow range of strings to be written */ + ret = -EINVAL; + if ((*ppos != 0) || (count >= sizeof(kbuf))) + goto out; + + /* What was written? */ + ret = -EFAULT; + if (copy_from_user(kbuf, buf, count)) + goto out; + kbuf[count] = '\0'; + pos = kbuf; + + /* What is being requested? */ + ret = -EINVAL; + if (strncmp(pos, "allow", 5) == 0) { + pos += 5; + setgroups_allowed = true; + } + else if (strncmp(pos, "deny", 4) == 0) { + pos += 4; + setgroups_allowed = false; + } + else + goto out; + + /* Verify there is not trailing junk on the line */ + pos = skip_spaces(pos); + if (*pos != '\0') + goto out; + + ret = -EPERM; + mutex_lock(&userns_state_mutex); + if (setgroups_allowed) { + /* Enabling setgroups after setgroups has been disabled + * is not allowed. + */ + if (!(ns->flags & USERNS_SETGROUPS_ALLOWED)) + goto out_unlock; + } else { + /* Permanently disabling setgroups after setgroups has + * been enabled by writing the gid_map is not allowed. + */ + if (ns->gid_map.nr_extents != 0) + goto out_unlock; + ns->flags &= ~USERNS_SETGROUPS_ALLOWED; + } + mutex_unlock(&userns_state_mutex); + + /* Report a successful write */ + *ppos = count; + ret = count; +out: + return ret; +out_unlock: + mutex_unlock(&userns_state_mutex); + goto out; +} + +bool userns_may_setgroups(const struct user_namespace *ns) +{ + bool allowed; + + mutex_lock(&userns_state_mutex); + /* It is not safe to use setgroups until a gid mapping in + * the user namespace has been established. + */ + allowed = ns->gid_map.nr_extents != 0; + /* Is setgroups allowed? */ + allowed = allowed && (ns->flags & USERNS_SETGROUPS_ALLOWED); + mutex_unlock(&userns_state_mutex); + + return allowed; +} + static inline struct user_namespace *to_user_ns(struct ns_common *ns) { return container_of(ns, struct user_namespace, ns); diff --git a/mm/memory.c b/mm/memory.c index c3b9097251c..6efe36a998b 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -235,9 +235,6 @@ void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long static void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb) { - if (!tlb->end) - return; - tlb_flush(tlb); mmu_notifier_invalidate_range(tlb->mm, tlb->start, tlb->end); #ifdef CONFIG_HAVE_RCU_TABLE_FREE @@ -259,6 +256,9 @@ static void tlb_flush_mmu_free(struct mmu_gather *tlb) void tlb_flush_mmu(struct mmu_gather *tlb) { + if (!tlb->end) + return; + tlb_flush_mmu_tlbonly(tlb); tlb_flush_mmu_free(tlb); } diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c index 7e38b729696..15845814a0f 100644 --- a/net/ceph/auth_x.c +++ b/net/ceph/auth_x.c @@ -8,6 +8,7 @@ #include <linux/ceph/decode.h> #include <linux/ceph/auth.h> +#include <linux/ceph/messenger.h> #include "crypto.h" #include "auth_x.h" @@ -293,6 +294,11 @@ static int ceph_x_build_authorizer(struct ceph_auth_client *ac, dout("build_authorizer for %s %p\n", ceph_entity_type_name(th->service), au); + ceph_crypto_key_destroy(&au->session_key); + ret = ceph_crypto_key_clone(&au->session_key, &th->session_key); + if (ret) + return ret; + maxlen = sizeof(*msg_a) + sizeof(msg_b) + ceph_x_encrypt_buflen(ticket_blob_len); dout(" need len %d\n", maxlen); @@ -302,8 +308,10 @@ static int ceph_x_build_authorizer(struct ceph_auth_client *ac, } if (!au->buf) { au->buf = ceph_buffer_new(maxlen, GFP_NOFS); - if (!au->buf) + if (!au->buf) { + ceph_crypto_key_destroy(&au->session_key); return -ENOMEM; + } } au->service = th->service; au->secret_id = th->secret_id; @@ -329,7 +337,7 @@ static int ceph_x_build_authorizer(struct ceph_auth_client *ac, get_random_bytes(&au->nonce, sizeof(au->nonce)); msg_b.struct_v = 1; msg_b.nonce = cpu_to_le64(au->nonce); - ret = ceph_x_encrypt(&th->session_key, &msg_b, sizeof(msg_b), + ret = ceph_x_encrypt(&au->session_key, &msg_b, sizeof(msg_b), p, end - p); if (ret < 0) goto out_buf; @@ -560,6 +568,8 @@ static int ceph_x_create_authorizer( auth->authorizer_buf_len = au->buf->vec.iov_len; auth->authorizer_reply_buf = au->reply_buf; auth->authorizer_reply_buf_len = sizeof (au->reply_buf); + auth->sign_message = ac->ops->sign_message; + auth->check_message_signature = ac->ops->check_message_signature; return 0; } @@ -588,17 +598,13 @@ static int ceph_x_verify_authorizer_reply(struct ceph_auth_client *ac, struct ceph_authorizer *a, size_t len) { struct ceph_x_authorizer *au = (void *)a; - struct ceph_x_ticket_handler *th; int ret = 0; struct ceph_x_authorize_reply reply; void *preply = &reply; void *p = au->reply_buf; void *end = p + sizeof(au->reply_buf); - th = get_ticket_handler(ac, au->service); - if (IS_ERR(th)) - return PTR_ERR(th); - ret = ceph_x_decrypt(&th->session_key, &p, end, &preply, sizeof(reply)); + ret = ceph_x_decrypt(&au->session_key, &p, end, &preply, sizeof(reply)); if (ret < 0) return ret; if (ret != sizeof(reply)) @@ -618,6 +624,7 @@ static void ceph_x_destroy_authorizer(struct ceph_auth_client *ac, { struct ceph_x_authorizer *au = (void *)a; + ceph_crypto_key_destroy(&au->session_key); ceph_buffer_put(au->buf); kfree(au); } @@ -663,6 +670,59 @@ static void ceph_x_invalidate_authorizer(struct ceph_auth_client *ac, memset(&th->validity, 0, sizeof(th->validity)); } +static int calcu_signature(struct ceph_x_authorizer *au, + struct ceph_msg *msg, __le64 *sig) +{ + int ret; + char tmp_enc[40]; + __le32 tmp[5] = { + 16u, msg->hdr.crc, msg->footer.front_crc, + msg->footer.middle_crc, msg->footer.data_crc, + }; + ret = ceph_x_encrypt(&au->session_key, &tmp, sizeof(tmp), + tmp_enc, sizeof(tmp_enc)); + if (ret < 0) + return ret; + *sig = *(__le64*)(tmp_enc + 4); + return 0; +} + +static int ceph_x_sign_message(struct ceph_auth_handshake *auth, + struct ceph_msg *msg) +{ + int ret; + if (!auth->authorizer) + return 0; + ret = calcu_signature((struct ceph_x_authorizer *)auth->authorizer, + msg, &msg->footer.sig); + if (ret < 0) + return ret; + msg->footer.flags |= CEPH_MSG_FOOTER_SIGNED; + return 0; +} + +static int ceph_x_check_message_signature(struct ceph_auth_handshake *auth, + struct ceph_msg *msg) +{ + __le64 sig_check; + int ret; + + if (!auth->authorizer) + return 0; + ret = calcu_signature((struct ceph_x_authorizer *)auth->authorizer, + msg, &sig_check); + if (ret < 0) + return ret; + if (sig_check == msg->footer.sig) + return 0; + if (msg->footer.flags & CEPH_MSG_FOOTER_SIGNED) + dout("ceph_x_check_message_signature %p has signature %llx " + "expect %llx\n", msg, msg->footer.sig, sig_check); + else + dout("ceph_x_check_message_signature %p sender did not set " + "CEPH_MSG_FOOTER_SIGNED\n", msg); + return -EBADMSG; +} static const struct ceph_auth_client_ops ceph_x_ops = { .name = "x", @@ -677,6 +737,8 @@ static const struct ceph_auth_client_ops ceph_x_ops = { .invalidate_authorizer = ceph_x_invalidate_authorizer, .reset = ceph_x_reset, .destroy = ceph_x_destroy, + .sign_message = ceph_x_sign_message, + .check_message_signature = ceph_x_check_message_signature, }; diff --git a/net/ceph/auth_x.h b/net/ceph/auth_x.h index 65ee72082d9..e8b7c6917d4 100644 --- a/net/ceph/auth_x.h +++ b/net/ceph/auth_x.h @@ -26,6 +26,7 @@ struct ceph_x_ticket_handler { struct ceph_x_authorizer { + struct ceph_crypto_key session_key; struct ceph_buffer *buf; unsigned int service; u64 nonce; diff --git a/net/ceph/buffer.c b/net/ceph/buffer.c index 621b5f65407..add5f921a0f 100644 --- a/net/ceph/buffer.c +++ b/net/ceph/buffer.c @@ -6,7 +6,7 @@ #include <linux/ceph/buffer.h> #include <linux/ceph/decode.h> -#include <linux/ceph/libceph.h> /* for ceph_kv{malloc,free} */ +#include <linux/ceph/libceph.h> /* for ceph_kvmalloc */ struct ceph_buffer *ceph_buffer_new(size_t len, gfp_t gfp) { @@ -35,7 +35,7 @@ void ceph_buffer_release(struct kref *kref) struct ceph_buffer *b = container_of(kref, struct ceph_buffer, kref); dout("buffer_release %p\n", b); - ceph_kvfree(b->vec.iov_base); + kvfree(b->vec.iov_base); kfree(b); } EXPORT_SYMBOL(ceph_buffer_release); diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c index 58fbfe134f9..5d5ab67f516 100644 --- a/net/ceph/ceph_common.c +++ b/net/ceph/ceph_common.c @@ -184,14 +184,6 @@ void *ceph_kvmalloc(size_t size, gfp_t flags) return __vmalloc(size, flags | __GFP_HIGHMEM, PAGE_KERNEL); } -void ceph_kvfree(const void *ptr) -{ - if (is_vmalloc_addr(ptr)) - vfree(ptr); - else - kfree(ptr); -} - static int parse_fsid(const char *str, struct ceph_fsid *fsid) { @@ -245,6 +237,8 @@ enum { Opt_noshare, Opt_crc, Opt_nocrc, + Opt_cephx_require_signatures, + Opt_nocephx_require_signatures, }; static match_table_t opt_tokens = { @@ -263,6 +257,8 @@ static match_table_t opt_tokens = { {Opt_noshare, "noshare"}, {Opt_crc, "crc"}, {Opt_nocrc, "nocrc"}, + {Opt_cephx_require_signatures, "cephx_require_signatures"}, + {Opt_nocephx_require_signatures, "nocephx_require_signatures"}, {-1, NULL} }; @@ -461,6 +457,12 @@ ceph_parse_options(char *options, const char *dev_name, case Opt_nocrc: opt->flags |= CEPH_OPT_NOCRC; break; + case Opt_cephx_require_signatures: + opt->flags &= ~CEPH_OPT_NOMSGAUTH; + break; + case Opt_nocephx_require_signatures: + opt->flags |= CEPH_OPT_NOMSGAUTH; + break; default: BUG_ON(token); @@ -504,6 +506,9 @@ struct ceph_client *ceph_create_client(struct ceph_options *opt, void *private, init_waitqueue_head(&client->auth_wq); client->auth_err = 0; + if (!ceph_test_opt(client, NOMSGAUTH)) + required_features |= CEPH_FEATURE_MSG_AUTH; + client->extra_mon_dispatch = NULL; client->supported_features = CEPH_FEATURES_SUPPORTED_DEFAULT | supported_features; diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 8d1653caffd..33a2f201e46 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -1196,8 +1196,18 @@ static void prepare_write_message_footer(struct ceph_connection *con) dout("prepare_write_message_footer %p\n", con); con->out_kvec_is_msg = true; con->out_kvec[v].iov_base = &m->footer; - con->out_kvec[v].iov_len = sizeof(m->footer); - con->out_kvec_bytes += sizeof(m->footer); + if (con->peer_features & CEPH_FEATURE_MSG_AUTH) { + if (con->ops->sign_message) + con->ops->sign_message(con, m); + else + m->footer.sig = 0; + con->out_kvec[v].iov_len = sizeof(m->footer); + con->out_kvec_bytes += sizeof(m->footer); + } else { + m->old_footer.flags = m->footer.flags; + con->out_kvec[v].iov_len = sizeof(m->old_footer); + con->out_kvec_bytes += sizeof(m->old_footer); + } con->out_kvec_left++; con->out_more = m->more_to_follow; con->out_msg_done = true; @@ -2249,6 +2259,7 @@ static int read_partial_message(struct ceph_connection *con) int ret; unsigned int front_len, middle_len, data_len; bool do_datacrc = !con->msgr->nocrc; + bool need_sign = (con->peer_features & CEPH_FEATURE_MSG_AUTH); u64 seq; u32 crc; @@ -2361,12 +2372,21 @@ static int read_partial_message(struct ceph_connection *con) } /* footer */ - size = sizeof (m->footer); + if (need_sign) + size = sizeof(m->footer); + else + size = sizeof(m->old_footer); + end += size; ret = read_partial(con, end, size, &m->footer); if (ret <= 0) return ret; + if (!need_sign) { + m->footer.flags = m->old_footer.flags; + m->footer.sig = 0; + } + dout("read_partial_message got msg %p %d (%u) + %d (%u) + %d (%u)\n", m, front_len, m->footer.front_crc, middle_len, m->footer.middle_crc, data_len, m->footer.data_crc); @@ -2390,6 +2410,12 @@ static int read_partial_message(struct ceph_connection *con) return -EBADMSG; } + if (need_sign && con->ops->check_message_signature && + con->ops->check_message_signature(con, m)) { + pr_err("read_partial_message %p signature check failed\n", m); + return -EBADMSG; + } + return 1; /* done! */ } @@ -3288,7 +3314,7 @@ static int ceph_con_in_msg_alloc(struct ceph_connection *con, int *skip) static void ceph_msg_free(struct ceph_msg *m) { dout("%s %p\n", __func__, m); - ceph_kvfree(m->front.iov_base); + kvfree(m->front.iov_base); kmem_cache_free(ceph_msg_cache, m); } diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 6f164289bde..53299c7b0ca 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -292,6 +292,10 @@ static void osd_req_op_data_release(struct ceph_osd_request *osd_req, ceph_osd_data_release(&op->cls.request_data); ceph_osd_data_release(&op->cls.response_data); break; + case CEPH_OSD_OP_SETXATTR: + case CEPH_OSD_OP_CMPXATTR: + ceph_osd_data_release(&op->xattr.osd_data); + break; default: break; } @@ -476,8 +480,7 @@ void osd_req_op_extent_init(struct ceph_osd_request *osd_req, size_t payload_len = 0; BUG_ON(opcode != CEPH_OSD_OP_READ && opcode != CEPH_OSD_OP_WRITE && - opcode != CEPH_OSD_OP_DELETE && opcode != CEPH_OSD_OP_ZERO && - opcode != CEPH_OSD_OP_TRUNCATE); + opcode != CEPH_OSD_OP_ZERO && opcode != CEPH_OSD_OP_TRUNCATE); op->extent.offset = offset; op->extent.length = length; @@ -545,6 +548,39 @@ void osd_req_op_cls_init(struct ceph_osd_request *osd_req, unsigned int which, } EXPORT_SYMBOL(osd_req_op_cls_init); +int osd_req_op_xattr_init(struct ceph_osd_request *osd_req, unsigned int which, + u16 opcode, const char *name, const void *value, + size_t size, u8 cmp_op, u8 cmp_mode) +{ + struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which, opcode); + struct ceph_pagelist *pagelist; + size_t payload_len; + + BUG_ON(opcode != CEPH_OSD_OP_SETXATTR && opcode != CEPH_OSD_OP_CMPXATTR); + + pagelist = kmalloc(sizeof(*pagelist), GFP_NOFS); + if (!pagelist) + return -ENOMEM; + + ceph_pagelist_init(pagelist); + + payload_len = strlen(name); + op->xattr.name_len = payload_len; + ceph_pagelist_append(pagelist, name, payload_len); + + op->xattr.value_len = size; + ceph_pagelist_append(pagelist, value, size); + payload_len += size; + + op->xattr.cmp_op = cmp_op; + op->xattr.cmp_mode = cmp_mode; + + ceph_osd_data_pagelist_init(&op->xattr.osd_data, pagelist); + op->payload_len = payload_len; + return 0; +} +EXPORT_SYMBOL(osd_req_op_xattr_init); + void osd_req_op_watch_init(struct ceph_osd_request *osd_req, unsigned int which, u16 opcode, u64 cookie, u64 version, int flag) @@ -626,7 +662,6 @@ static u64 osd_req_encode_op(struct ceph_osd_request *req, case CEPH_OSD_OP_READ: case CEPH_OSD_OP_WRITE: case CEPH_OSD_OP_ZERO: - case CEPH_OSD_OP_DELETE: case CEPH_OSD_OP_TRUNCATE: if (src->op == CEPH_OSD_OP_WRITE) request_data_len = src->extent.length; @@ -676,6 +711,19 @@ static u64 osd_req_encode_op(struct ceph_osd_request *req, dst->alloc_hint.expected_write_size = cpu_to_le64(src->alloc_hint.expected_write_size); break; + case CEPH_OSD_OP_SETXATTR: + case CEPH_OSD_OP_CMPXATTR: + dst->xattr.name_len = cpu_to_le32(src->xattr.name_len); + dst->xattr.value_len = cpu_to_le32(src->xattr.value_len); + dst->xattr.cmp_op = src->xattr.cmp_op; + dst->xattr.cmp_mode = src->xattr.cmp_mode; + osd_data = &src->xattr.osd_data; + ceph_osdc_msg_data_add(req->r_request, osd_data); + request_data_len = osd_data->pagelist->length; + break; + case CEPH_OSD_OP_CREATE: + case CEPH_OSD_OP_DELETE: + break; default: pr_err("unsupported osd opcode %s\n", ceph_osd_op_name(src->op)); @@ -705,7 +753,8 @@ static u64 osd_req_encode_op(struct ceph_osd_request *req, struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, struct ceph_file_layout *layout, struct ceph_vino vino, - u64 off, u64 *plen, int num_ops, + u64 off, u64 *plen, + unsigned int which, int num_ops, int opcode, int flags, struct ceph_snap_context *snapc, u32 truncate_seq, @@ -716,13 +765,11 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, u64 objnum = 0; u64 objoff = 0; u64 objlen = 0; - u32 object_size; - u64 object_base; int r; BUG_ON(opcode != CEPH_OSD_OP_READ && opcode != CEPH_OSD_OP_WRITE && - opcode != CEPH_OSD_OP_DELETE && opcode != CEPH_OSD_OP_ZERO && - opcode != CEPH_OSD_OP_TRUNCATE); + opcode != CEPH_OSD_OP_ZERO && opcode != CEPH_OSD_OP_TRUNCATE && + opcode != CEPH_OSD_OP_CREATE && opcode != CEPH_OSD_OP_DELETE); req = ceph_osdc_alloc_request(osdc, snapc, num_ops, use_mempool, GFP_NOFS); @@ -738,29 +785,24 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, return ERR_PTR(r); } - object_size = le32_to_cpu(layout->fl_object_size); - object_base = off - objoff; - if (!(truncate_seq == 1 && truncate_size == -1ULL)) { - if (truncate_size <= object_base) { - truncate_size = 0; - } else { - truncate_size -= object_base; - if (truncate_size > object_size) - truncate_size = object_size; + if (opcode == CEPH_OSD_OP_CREATE || opcode == CEPH_OSD_OP_DELETE) { + osd_req_op_init(req, which, opcode); + } else { + u32 object_size = le32_to_cpu(layout->fl_object_size); + u32 object_base = off - objoff; + if (!(truncate_seq == 1 && truncate_size == -1ULL)) { + if (truncate_size <= object_base) { + truncate_size = 0; + } else { + truncate_size -= object_base; + if (truncate_size > object_size) + truncate_size = object_size; + } } + osd_req_op_extent_init(req, which, opcode, objoff, objlen, + truncate_size, truncate_seq); } - osd_req_op_extent_init(req, 0, opcode, objoff, objlen, - truncate_size, truncate_seq); - - /* - * A second op in the ops array means the caller wants to - * also issue a include a 'startsync' command so that the - * osd will flush data quickly. - */ - if (num_ops > 1) - osd_req_op_init(req, 1, CEPH_OSD_OP_STARTSYNC); - req->r_base_oloc.pool = ceph_file_layout_pg_pool(*layout); snprintf(req->r_base_oid.name, sizeof(req->r_base_oid.name), @@ -2626,7 +2668,7 @@ int ceph_osdc_readpages(struct ceph_osd_client *osdc, dout("readpages on ino %llx.%llx on %llu~%llu\n", vino.ino, vino.snap, off, *plen); - req = ceph_osdc_new_request(osdc, layout, vino, off, plen, 1, + req = ceph_osdc_new_request(osdc, layout, vino, off, plen, 0, 1, CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ, NULL, truncate_seq, truncate_size, false); @@ -2669,7 +2711,7 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino, int page_align = off & ~PAGE_MASK; BUG_ON(vino.snap != CEPH_NOSNAP); /* snapshots aren't writeable */ - req = ceph_osdc_new_request(osdc, layout, vino, off, &len, 1, + req = ceph_osdc_new_request(osdc, layout, vino, off, &len, 0, 1, CEPH_OSD_OP_WRITE, CEPH_OSD_FLAG_ONDISK | CEPH_OSD_FLAG_WRITE, snapc, truncate_seq, truncate_size, @@ -2920,6 +2962,20 @@ static int invalidate_authorizer(struct ceph_connection *con) return ceph_monc_validate_auth(&osdc->client->monc); } +static int sign_message(struct ceph_connection *con, struct ceph_msg *msg) +{ + struct ceph_osd *o = con->private; + struct ceph_auth_handshake *auth = &o->o_auth; + return ceph_auth_sign_message(auth, msg); +} + +static int check_message_signature(struct ceph_connection *con, struct ceph_msg *msg) +{ + struct ceph_osd *o = con->private; + struct ceph_auth_handshake *auth = &o->o_auth; + return ceph_auth_check_message_signature(auth, msg); +} + static const struct ceph_connection_operations osd_con_ops = { .get = get_osd_con, .put = put_osd_con, @@ -2928,5 +2984,7 @@ static const struct ceph_connection_operations osd_con_ops = { .verify_authorizer_reply = verify_authorizer_reply, .invalidate_authorizer = invalidate_authorizer, .alloc_msg = alloc_msg, + .sign_message = sign_message, + .check_message_signature = check_message_signature, .fault = osd_reset, }; diff --git a/security/integrity/ima/Kconfig b/security/integrity/ima/Kconfig index b80a93ec1cc..57515bc915c 100644 --- a/security/integrity/ima/Kconfig +++ b/security/integrity/ima/Kconfig @@ -10,7 +10,7 @@ config IMA select CRYPTO_HASH_INFO select TCG_TPM if HAS_IOMEM && !UML select TCG_TIS if TCG_TPM && X86 - select TCG_IBMVTPM if TCG_TPM && PPC64 + select TCG_IBMVTPM if TCG_TPM && PPC_PSERIES help The Trusted Computing Group(TCG) runtime Integrity Measurement Architecture(IMA) maintains a list of hash diff --git a/security/keys/encrypted-keys/encrypted.c b/security/keys/encrypted-keys/encrypted.c index db9675db102..7bed4ad7cd7 100644 --- a/security/keys/encrypted-keys/encrypted.c +++ b/security/keys/encrypted-keys/encrypted.c @@ -1017,10 +1017,13 @@ static int __init init_encrypted(void) ret = encrypted_shash_alloc(); if (ret < 0) return ret; + ret = aes_get_sizes(); + if (ret < 0) + goto out; ret = register_key_type(&key_type_encrypted); if (ret < 0) goto out; - return aes_get_sizes(); + return 0; out: encrypted_shash_release(); return ret; diff --git a/security/keys/key.c b/security/keys/key.c index e17ba6aefdc..aee2ec5a18f 100644 --- a/security/keys/key.c +++ b/security/keys/key.c @@ -276,12 +276,10 @@ struct key *key_alloc(struct key_type *type, const char *desc, if (!key) goto no_memory_2; - if (desc) { - key->index_key.desc_len = desclen; - key->index_key.description = kmemdup(desc, desclen + 1, GFP_KERNEL); - if (!key->description) - goto no_memory_3; - } + key->index_key.desc_len = desclen; + key->index_key.description = kmemdup(desc, desclen + 1, GFP_KERNEL); + if (!key->description) + goto no_memory_3; atomic_set(&key->usage, 1); init_rwsem(&key->sem); diff --git a/tools/testing/selftests/mount/unprivileged-remount-test.c b/tools/testing/selftests/mount/unprivileged-remount-test.c index 1b3ff2fda4d..517785052f1 100644 --- a/tools/testing/selftests/mount/unprivileged-remount-test.c +++ b/tools/testing/selftests/mount/unprivileged-remount-test.c @@ -6,6 +6,8 @@ #include <sys/types.h> #include <sys/mount.h> #include <sys/wait.h> +#include <sys/vfs.h> +#include <sys/statvfs.h> #include <stdlib.h> #include <unistd.h> #include <fcntl.h> @@ -32,11 +34,14 @@ # define CLONE_NEWPID 0x20000000 #endif +#ifndef MS_REC +# define MS_REC 16384 +#endif #ifndef MS_RELATIME -#define MS_RELATIME (1 << 21) +# define MS_RELATIME (1 << 21) #endif #ifndef MS_STRICTATIME -#define MS_STRICTATIME (1 << 24) +# define MS_STRICTATIME (1 << 24) #endif static void die(char *fmt, ...) @@ -48,17 +53,14 @@ static void die(char *fmt, ...) exit(EXIT_FAILURE); } -static void write_file(char *filename, char *fmt, ...) +static void vmaybe_write_file(bool enoent_ok, char *filename, char *fmt, va_list ap) { char buf[4096]; int fd; ssize_t written; int buf_len; - va_list ap; - va_start(ap, fmt); buf_len = vsnprintf(buf, sizeof(buf), fmt, ap); - va_end(ap); if (buf_len < 0) { die("vsnprintf failed: %s\n", strerror(errno)); @@ -69,6 +71,8 @@ static void write_file(char *filename, char *fmt, ...) fd = open(filename, O_WRONLY); if (fd < 0) { + if ((errno == ENOENT) && enoent_ok) + return; die("open of %s failed: %s\n", filename, strerror(errno)); } @@ -87,6 +91,65 @@ static void write_file(char *filename, char *fmt, ...) } } +static void maybe_write_file(char *filename, char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vmaybe_write_file(true, filename, fmt, ap); + va_end(ap); + +} + +static void write_file(char *filename, char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vmaybe_write_file(false, filename, fmt, ap); + va_end(ap); + +} + +static int read_mnt_flags(const char *path) +{ + int ret; + struct statvfs stat; + int mnt_flags; + + ret = statvfs(path, &stat); + if (ret != 0) { + die("statvfs of %s failed: %s\n", + path, strerror(errno)); + } + if (stat.f_flag & ~(ST_RDONLY | ST_NOSUID | ST_NODEV | \ + ST_NOEXEC | ST_NOATIME | ST_NODIRATIME | ST_RELATIME | \ + ST_SYNCHRONOUS | ST_MANDLOCK)) { + die("Unrecognized mount flags\n"); + } + mnt_flags = 0; + if (stat.f_flag & ST_RDONLY) + mnt_flags |= MS_RDONLY; + if (stat.f_flag & ST_NOSUID) + mnt_flags |= MS_NOSUID; + if (stat.f_flag & ST_NODEV) + mnt_flags |= MS_NODEV; + if (stat.f_flag & ST_NOEXEC) + mnt_flags |= MS_NOEXEC; + if (stat.f_flag & ST_NOATIME) + mnt_flags |= MS_NOATIME; + if (stat.f_flag & ST_NODIRATIME) + mnt_flags |= MS_NODIRATIME; + if (stat.f_flag & ST_RELATIME) + mnt_flags |= MS_RELATIME; + if (stat.f_flag & ST_SYNCHRONOUS) + mnt_flags |= MS_SYNCHRONOUS; + if (stat.f_flag & ST_MANDLOCK) + mnt_flags |= ST_MANDLOCK; + + return mnt_flags; +} + static void create_and_enter_userns(void) { uid_t uid; @@ -100,13 +163,10 @@ static void create_and_enter_userns(void) strerror(errno)); } + maybe_write_file("/proc/self/setgroups", "deny"); write_file("/proc/self/uid_map", "0 %d 1", uid); write_file("/proc/self/gid_map", "0 %d 1", gid); - if (setgroups(0, NULL) != 0) { - die("setgroups failed: %s\n", - strerror(errno)); - } if (setgid(0) != 0) { die ("setgid(0) failed %s\n", strerror(errno)); @@ -118,7 +178,8 @@ static void create_and_enter_userns(void) } static -bool test_unpriv_remount(int mount_flags, int remount_flags, int invalid_flags) +bool test_unpriv_remount(const char *fstype, const char *mount_options, + int mount_flags, int remount_flags, int invalid_flags) { pid_t child; @@ -151,9 +212,11 @@ bool test_unpriv_remount(int mount_flags, int remount_flags, int invalid_flags) strerror(errno)); } - if (mount("testing", "/tmp", "ramfs", mount_flags, NULL) != 0) { - die("mount of /tmp failed: %s\n", - strerror(errno)); + if (mount("testing", "/tmp", fstype, mount_flags, mount_options) != 0) { + die("mount of %s with options '%s' on /tmp failed: %s\n", + fstype, + mount_options? mount_options : "", + strerror(errno)); } create_and_enter_userns(); @@ -181,62 +244,127 @@ bool test_unpriv_remount(int mount_flags, int remount_flags, int invalid_flags) static bool test_unpriv_remount_simple(int mount_flags) { - return test_unpriv_remount(mount_flags, mount_flags, 0); + return test_unpriv_remount("ramfs", NULL, mount_flags, mount_flags, 0); } static bool test_unpriv_remount_atime(int mount_flags, int invalid_flags) { - return test_unpriv_remount(mount_flags, mount_flags, invalid_flags); + return test_unpriv_remount("ramfs", NULL, mount_flags, mount_flags, + invalid_flags); +} + +static bool test_priv_mount_unpriv_remount(void) +{ + pid_t child; + int ret; + const char *orig_path = "/dev"; + const char *dest_path = "/tmp"; + int orig_mnt_flags, remount_mnt_flags; + + child = fork(); + if (child == -1) { + die("fork failed: %s\n", + strerror(errno)); + } + if (child != 0) { /* parent */ + pid_t pid; + int status; + pid = waitpid(child, &status, 0); + if (pid == -1) { + die("waitpid failed: %s\n", + strerror(errno)); + } + if (pid != child) { + die("waited for %d got %d\n", + child, pid); + } + if (!WIFEXITED(status)) { + die("child did not terminate cleanly\n"); + } + return WEXITSTATUS(status) == EXIT_SUCCESS ? true : false; + } + + orig_mnt_flags = read_mnt_flags(orig_path); + + create_and_enter_userns(); + ret = unshare(CLONE_NEWNS); + if (ret != 0) { + die("unshare(CLONE_NEWNS) failed: %s\n", + strerror(errno)); + } + + ret = mount(orig_path, dest_path, "bind", MS_BIND | MS_REC, NULL); + if (ret != 0) { + die("recursive bind mount of %s onto %s failed: %s\n", + orig_path, dest_path, strerror(errno)); + } + + ret = mount(dest_path, dest_path, "none", + MS_REMOUNT | MS_BIND | orig_mnt_flags , NULL); + if (ret != 0) { + /* system("cat /proc/self/mounts"); */ + die("remount of /tmp failed: %s\n", + strerror(errno)); + } + + remount_mnt_flags = read_mnt_flags(dest_path); + if (orig_mnt_flags != remount_mnt_flags) { + die("Mount flags unexpectedly changed during remount of %s originally mounted on %s\n", + dest_path, orig_path); + } + exit(EXIT_SUCCESS); } int main(int argc, char **argv) { - if (!test_unpriv_remount_simple(MS_RDONLY|MS_NODEV)) { + if (!test_unpriv_remount_simple(MS_RDONLY)) { die("MS_RDONLY malfunctions\n"); } - if (!test_unpriv_remount_simple(MS_NODEV)) { + if (!test_unpriv_remount("devpts", "newinstance", MS_NODEV, MS_NODEV, 0)) { die("MS_NODEV malfunctions\n"); } - if (!test_unpriv_remount_simple(MS_NOSUID|MS_NODEV)) { + if (!test_unpriv_remount_simple(MS_NOSUID)) { die("MS_NOSUID malfunctions\n"); } - if (!test_unpriv_remount_simple(MS_NOEXEC|MS_NODEV)) { + if (!test_unpriv_remount_simple(MS_NOEXEC)) { die("MS_NOEXEC malfunctions\n"); } - if (!test_unpriv_remount_atime(MS_RELATIME|MS_NODEV, - MS_NOATIME|MS_NODEV)) + if (!test_unpriv_remount_atime(MS_RELATIME, + MS_NOATIME)) { die("MS_RELATIME malfunctions\n"); } - if (!test_unpriv_remount_atime(MS_STRICTATIME|MS_NODEV, - MS_NOATIME|MS_NODEV)) + if (!test_unpriv_remount_atime(MS_STRICTATIME, + MS_NOATIME)) { die("MS_STRICTATIME malfunctions\n"); } - if (!test_unpriv_remount_atime(MS_NOATIME|MS_NODEV, - MS_STRICTATIME|MS_NODEV)) + if (!test_unpriv_remount_atime(MS_NOATIME, + MS_STRICTATIME)) { - die("MS_RELATIME malfunctions\n"); + die("MS_NOATIME malfunctions\n"); } - if (!test_unpriv_remount_atime(MS_RELATIME|MS_NODIRATIME|MS_NODEV, - MS_NOATIME|MS_NODEV)) + if (!test_unpriv_remount_atime(MS_RELATIME|MS_NODIRATIME, + MS_NOATIME)) { - die("MS_RELATIME malfunctions\n"); + die("MS_RELATIME|MS_NODIRATIME malfunctions\n"); } - if (!test_unpriv_remount_atime(MS_STRICTATIME|MS_NODIRATIME|MS_NODEV, - MS_NOATIME|MS_NODEV)) + if (!test_unpriv_remount_atime(MS_STRICTATIME|MS_NODIRATIME, + MS_NOATIME)) { - die("MS_RELATIME malfunctions\n"); + die("MS_STRICTATIME|MS_NODIRATIME malfunctions\n"); } - if (!test_unpriv_remount_atime(MS_NOATIME|MS_NODIRATIME|MS_NODEV, - MS_STRICTATIME|MS_NODEV)) + if (!test_unpriv_remount_atime(MS_NOATIME|MS_NODIRATIME, + MS_STRICTATIME)) { - die("MS_RELATIME malfunctions\n"); + die("MS_NOATIME|MS_DIRATIME malfunctions\n"); } - if (!test_unpriv_remount(MS_STRICTATIME|MS_NODEV, MS_NODEV, - MS_NOATIME|MS_NODEV)) + if (!test_unpriv_remount("ramfs", NULL, MS_STRICTATIME, 0, MS_NOATIME)) { die("Default atime malfunctions\n"); } + if (!test_priv_mount_unpriv_remount()) { + die("Mount flags unexpectedly changed after remount\n"); + } return EXIT_SUCCESS; } diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index 22fa819a9b6..1c0772b340d 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -61,12 +61,14 @@ static void timer_disarm(struct arch_timer_cpu *timer) static void kvm_timer_inject_irq(struct kvm_vcpu *vcpu) { + int ret; struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; timer->cntv_ctl |= ARCH_TIMER_CTRL_IT_MASK; - kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id, - timer->irq->irq, - timer->irq->level); + ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id, + timer->irq->irq, + timer->irq->level); + WARN_ON(ret); } static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id) @@ -307,12 +309,24 @@ void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu) timer_disarm(timer); } -int kvm_timer_init(struct kvm *kvm) +void kvm_timer_enable(struct kvm *kvm) { - if (timecounter && wqueue) { - kvm->arch.timer.cntvoff = kvm_phys_timer_read(); + if (kvm->arch.timer.enabled) + return; + + /* + * There is a potential race here between VCPUs starting for the first + * time, which may be enabling the timer multiple times. That doesn't + * hurt though, because we're just setting a variable to the same + * variable that it already was. The important thing is that all + * VCPUs have the enabled variable set, before entering the guest, if + * the arch timers are enabled. + */ + if (timecounter && wqueue) kvm->arch.timer.enabled = 1; - } +} - return 0; +void kvm_timer_init(struct kvm *kvm) +{ + kvm->arch.timer.cntvoff = kvm_phys_timer_read(); } diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index aacdb59f30d..03affc7bf45 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -91,6 +91,7 @@ #define ACCESS_WRITE_VALUE (3 << 1) #define ACCESS_WRITE_MASK(x) ((x) & (3 << 1)) +static int vgic_init(struct kvm *kvm); static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu); static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu); static void vgic_update_state(struct kvm *kvm); @@ -1607,7 +1608,7 @@ static int vgic_validate_injection(struct kvm_vcpu *vcpu, int irq, int level) } } -static bool vgic_update_irq_pending(struct kvm *kvm, int cpuid, +static int vgic_update_irq_pending(struct kvm *kvm, int cpuid, unsigned int irq_num, bool level) { struct vgic_dist *dist = &kvm->arch.vgic; @@ -1643,9 +1644,10 @@ static bool vgic_update_irq_pending(struct kvm *kvm, int cpuid, vgic_dist_irq_clear_level(vcpu, irq_num); if (!vgic_dist_irq_soft_pend(vcpu, irq_num)) vgic_dist_irq_clear_pending(vcpu, irq_num); - } else { - vgic_dist_irq_clear_pending(vcpu, irq_num); } + + ret = false; + goto out; } enabled = vgic_irq_is_enabled(vcpu, irq_num); @@ -1672,7 +1674,7 @@ static bool vgic_update_irq_pending(struct kvm *kvm, int cpuid, out: spin_unlock(&dist->lock); - return ret; + return ret ? cpuid : -EINVAL; } /** @@ -1692,11 +1694,26 @@ out: int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num, bool level) { - if (likely(vgic_initialized(kvm)) && - vgic_update_irq_pending(kvm, cpuid, irq_num, level)) - vgic_kick_vcpus(kvm); + int ret = 0; + int vcpu_id; - return 0; + if (unlikely(!vgic_initialized(kvm))) { + mutex_lock(&kvm->lock); + ret = vgic_init(kvm); + mutex_unlock(&kvm->lock); + + if (ret) + goto out; + } + + vcpu_id = vgic_update_irq_pending(kvm, cpuid, irq_num, level); + if (vcpu_id >= 0) { + /* kick the specified vcpu */ + kvm_vcpu_kick(kvm_get_vcpu(kvm, vcpu_id)); + } + +out: + return ret; } static irqreturn_t vgic_maintenance_handler(int irq, void *data) @@ -1726,39 +1743,14 @@ static int vgic_vcpu_init_maps(struct kvm_vcpu *vcpu, int nr_irqs) int sz = (nr_irqs - VGIC_NR_PRIVATE_IRQS) / 8; vgic_cpu->pending_shared = kzalloc(sz, GFP_KERNEL); - vgic_cpu->vgic_irq_lr_map = kzalloc(nr_irqs, GFP_KERNEL); + vgic_cpu->vgic_irq_lr_map = kmalloc(nr_irqs, GFP_KERNEL); if (!vgic_cpu->pending_shared || !vgic_cpu->vgic_irq_lr_map) { kvm_vgic_vcpu_destroy(vcpu); return -ENOMEM; } - return 0; -} - -/** - * kvm_vgic_vcpu_init - Initialize per-vcpu VGIC state - * @vcpu: pointer to the vcpu struct - * - * Initialize the vgic_cpu struct and vgic_dist struct fields pertaining to - * this vcpu and enable the VGIC for this VCPU - */ -static void kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) -{ - struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - int i; - - for (i = 0; i < dist->nr_irqs; i++) { - if (i < VGIC_NR_PPIS) - vgic_bitmap_set_irq_val(&dist->irq_enabled, - vcpu->vcpu_id, i, 1); - if (i < VGIC_NR_PRIVATE_IRQS) - vgic_bitmap_set_irq_val(&dist->irq_cfg, - vcpu->vcpu_id, i, VGIC_CFG_EDGE); - - vgic_cpu->vgic_irq_lr_map[i] = LR_EMPTY; - } + memset(vgic_cpu->vgic_irq_lr_map, LR_EMPTY, nr_irqs); /* * Store the number of LRs per vcpu, so we don't have to go @@ -1767,7 +1759,7 @@ static void kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) */ vgic_cpu->nr_lr = vgic->nr_lr; - vgic_enable(vcpu); + return 0; } void kvm_vgic_destroy(struct kvm *kvm) @@ -1798,20 +1790,21 @@ void kvm_vgic_destroy(struct kvm *kvm) dist->irq_spi_cpu = NULL; dist->irq_spi_target = NULL; dist->irq_pending_on_cpu = NULL; + dist->nr_cpus = 0; } /* * Allocate and initialize the various data structures. Must be called * with kvm->lock held! */ -static int vgic_init_maps(struct kvm *kvm) +static int vgic_init(struct kvm *kvm) { struct vgic_dist *dist = &kvm->arch.vgic; struct kvm_vcpu *vcpu; int nr_cpus, nr_irqs; - int ret, i; + int ret, i, vcpu_id; - if (dist->nr_cpus) /* Already allocated */ + if (vgic_initialized(kvm)) return 0; nr_cpus = dist->nr_cpus = atomic_read(&kvm->online_vcpus); @@ -1859,16 +1852,28 @@ static int vgic_init_maps(struct kvm *kvm) if (ret) goto out; - kvm_for_each_vcpu(i, vcpu, kvm) { + for (i = VGIC_NR_PRIVATE_IRQS; i < dist->nr_irqs; i += 4) + vgic_set_target_reg(kvm, 0, i); + + kvm_for_each_vcpu(vcpu_id, vcpu, kvm) { ret = vgic_vcpu_init_maps(vcpu, nr_irqs); if (ret) { kvm_err("VGIC: Failed to allocate vcpu memory\n"); break; } - } - for (i = VGIC_NR_PRIVATE_IRQS; i < dist->nr_irqs; i += 4) - vgic_set_target_reg(kvm, 0, i); + for (i = 0; i < dist->nr_irqs; i++) { + if (i < VGIC_NR_PPIS) + vgic_bitmap_set_irq_val(&dist->irq_enabled, + vcpu->vcpu_id, i, 1); + if (i < VGIC_NR_PRIVATE_IRQS) + vgic_bitmap_set_irq_val(&dist->irq_cfg, + vcpu->vcpu_id, i, + VGIC_CFG_EDGE); + } + + vgic_enable(vcpu); + } out: if (ret) @@ -1878,25 +1883,23 @@ out: } /** - * kvm_vgic_init - Initialize global VGIC state before running any VCPUs + * kvm_vgic_map_resources - Configure global VGIC state before running any VCPUs * @kvm: pointer to the kvm struct * * Map the virtual CPU interface into the VM before running any VCPUs. We * can't do this at creation time, because user space must first set the - * virtual CPU interface address in the guest physical address space. Also - * initialize the ITARGETSRn regs to 0 on the emulated distributor. + * virtual CPU interface address in the guest physical address space. */ -int kvm_vgic_init(struct kvm *kvm) +int kvm_vgic_map_resources(struct kvm *kvm) { - struct kvm_vcpu *vcpu; - int ret = 0, i; + int ret = 0; if (!irqchip_in_kernel(kvm)) return 0; mutex_lock(&kvm->lock); - if (vgic_initialized(kvm)) + if (vgic_ready(kvm)) goto out; if (IS_VGIC_ADDR_UNDEF(kvm->arch.vgic.vgic_dist_base) || @@ -1906,7 +1909,11 @@ int kvm_vgic_init(struct kvm *kvm) goto out; } - ret = vgic_init_maps(kvm); + /* + * Initialize the vgic if this hasn't already been done on demand by + * accessing the vgic state from userspace. + */ + ret = vgic_init(kvm); if (ret) { kvm_err("Unable to allocate maps\n"); goto out; @@ -1920,9 +1927,6 @@ int kvm_vgic_init(struct kvm *kvm) goto out; } - kvm_for_each_vcpu(i, vcpu, kvm) - kvm_vgic_vcpu_init(vcpu); - kvm->arch.vgic.ready = true; out: if (ret) @@ -2167,7 +2171,7 @@ static int vgic_attr_regs_access(struct kvm_device *dev, mutex_lock(&dev->kvm->lock); - ret = vgic_init_maps(dev->kvm); + ret = vgic_init(dev->kvm); if (ret) goto out; @@ -2289,7 +2293,7 @@ static int vgic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) mutex_lock(&dev->kvm->lock); - if (vgic_initialized(dev->kvm) || dev->kvm->arch.vgic.nr_irqs) + if (vgic_ready(dev->kvm) || dev->kvm->arch.vgic.nr_irqs) ret = -EBUSY; else dev->kvm->arch.vgic.nr_irqs = val; diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index b0fb390943c..148b2392c76 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -36,9 +36,6 @@ #include <linux/seqlock.h> #include <trace/events/kvm.h> -#ifdef __KVM_HAVE_IOAPIC -#include "ioapic.h" -#endif #include "iodev.h" #ifdef CONFIG_HAVE_KVM_IRQFD @@ -492,9 +489,7 @@ void kvm_register_irq_ack_notifier(struct kvm *kvm, mutex_lock(&kvm->irq_lock); hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list); mutex_unlock(&kvm->irq_lock); -#ifdef __KVM_HAVE_IOAPIC kvm_vcpu_request_scan_ioapic(kvm); -#endif } void kvm_unregister_irq_ack_notifier(struct kvm *kvm, @@ -504,9 +499,7 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm, hlist_del_init_rcu(&kian->link); mutex_unlock(&kvm->irq_lock); synchronize_srcu(&kvm->irq_srcu); -#ifdef __KVM_HAVE_IOAPIC kvm_vcpu_request_scan_ioapic(kvm); -#endif } #endif diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 3cee7b16705..f5283438ee0 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -124,15 +124,6 @@ int vcpu_load(struct kvm_vcpu *vcpu) if (mutex_lock_killable(&vcpu->mutex)) return -EINTR; - if (unlikely(vcpu->pid != current->pids[PIDTYPE_PID].pid)) { - /* The thread running this VCPU changed. */ - struct pid *oldpid = vcpu->pid; - struct pid *newpid = get_task_pid(current, PIDTYPE_PID); - rcu_assign_pointer(vcpu->pid, newpid); - if (oldpid) - synchronize_rcu(); - put_pid(oldpid); - } cpu = get_cpu(); preempt_notifier_register(&vcpu->preempt_notifier); kvm_arch_vcpu_load(vcpu, cpu); @@ -468,9 +459,6 @@ static struct kvm *kvm_create_vm(unsigned long type) if (r) goto out_err_no_disable; -#ifdef CONFIG_HAVE_KVM_IRQCHIP - INIT_HLIST_HEAD(&kvm->mask_notifier_list); -#endif #ifdef CONFIG_HAVE_KVM_IRQFD INIT_HLIST_HEAD(&kvm->irq_ack_notifier_list); #endif @@ -668,48 +656,46 @@ static int kvm_create_dirty_bitmap(struct kvm_memory_slot *memslot) return 0; } -static int cmp_memslot(const void *slot1, const void *slot2) -{ - struct kvm_memory_slot *s1, *s2; - - s1 = (struct kvm_memory_slot *)slot1; - s2 = (struct kvm_memory_slot *)slot2; - - if (s1->npages < s2->npages) - return 1; - if (s1->npages > s2->npages) - return -1; - - return 0; -} - /* - * Sort the memslots base on its size, so the larger slots - * will get better fit. + * Insert memslot and re-sort memslots based on their GFN, + * so binary search could be used to lookup GFN. + * Sorting algorithm takes advantage of having initially + * sorted array and known changed memslot position. */ -static void sort_memslots(struct kvm_memslots *slots) -{ - int i; - - sort(slots->memslots, KVM_MEM_SLOTS_NUM, - sizeof(struct kvm_memory_slot), cmp_memslot, NULL); - - for (i = 0; i < KVM_MEM_SLOTS_NUM; i++) - slots->id_to_index[slots->memslots[i].id] = i; -} - static void update_memslots(struct kvm_memslots *slots, struct kvm_memory_slot *new) { - if (new) { - int id = new->id; - struct kvm_memory_slot *old = id_to_memslot(slots, id); - unsigned long npages = old->npages; + int id = new->id; + int i = slots->id_to_index[id]; + struct kvm_memory_slot *mslots = slots->memslots; - *old = *new; - if (new->npages != npages) - sort_memslots(slots); + WARN_ON(mslots[i].id != id); + if (!new->npages) { + new->base_gfn = 0; + if (mslots[i].npages) + slots->used_slots--; + } else { + if (!mslots[i].npages) + slots->used_slots++; } + + while (i < KVM_MEM_SLOTS_NUM - 1 && + new->base_gfn <= mslots[i + 1].base_gfn) { + if (!mslots[i + 1].npages) + break; + mslots[i] = mslots[i + 1]; + slots->id_to_index[mslots[i].id] = i; + i++; + } + while (i > 0 && + new->base_gfn > mslots[i - 1].base_gfn) { + mslots[i] = mslots[i - 1]; + slots->id_to_index[mslots[i].id] = i; + i--; + } + + mslots[i] = *new; + slots->id_to_index[mslots[i].id] = i; } static int check_memory_region_flags(struct kvm_userspace_memory_region *mem) @@ -727,7 +713,7 @@ static int check_memory_region_flags(struct kvm_userspace_memory_region *mem) } static struct kvm_memslots *install_new_memslots(struct kvm *kvm, - struct kvm_memslots *slots, struct kvm_memory_slot *new) + struct kvm_memslots *slots) { struct kvm_memslots *old_memslots = kvm->memslots; @@ -738,7 +724,6 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm, WARN_ON(old_memslots->generation & 1); slots->generation = old_memslots->generation + 1; - update_memslots(slots, new); rcu_assign_pointer(kvm->memslots, slots); synchronize_srcu_expedited(&kvm->srcu); @@ -760,7 +745,7 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm, * * Discontiguous memory is allowed, mostly for framebuffers. * - * Must be called holding mmap_sem for write. + * Must be called holding kvm->slots_lock for write. */ int __kvm_set_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem) @@ -866,15 +851,16 @@ int __kvm_set_memory_region(struct kvm *kvm, goto out_free; } + slots = kmemdup(kvm->memslots, sizeof(struct kvm_memslots), + GFP_KERNEL); + if (!slots) + goto out_free; + if ((change == KVM_MR_DELETE) || (change == KVM_MR_MOVE)) { - slots = kmemdup(kvm->memslots, sizeof(struct kvm_memslots), - GFP_KERNEL); - if (!slots) - goto out_free; slot = id_to_memslot(slots, mem->slot); slot->flags |= KVM_MEMSLOT_INVALID; - old_memslots = install_new_memslots(kvm, slots, NULL); + old_memslots = install_new_memslots(kvm, slots); /* slot was deleted or moved, clear iommu mapping */ kvm_iommu_unmap_pages(kvm, &old); @@ -886,6 +872,12 @@ int __kvm_set_memory_region(struct kvm *kvm, * - kvm_is_visible_gfn (mmu_check_roots) */ kvm_arch_flush_shadow_memslot(kvm, slot); + + /* + * We can re-use the old_memslots from above, the only difference + * from the currently installed memslots is the invalid flag. This + * will get overwritten by update_memslots anyway. + */ slots = old_memslots; } @@ -893,26 +885,14 @@ int __kvm_set_memory_region(struct kvm *kvm, if (r) goto out_slots; - r = -ENOMEM; - /* - * We can re-use the old_memslots from above, the only difference - * from the currently installed memslots is the invalid flag. This - * will get overwritten by update_memslots anyway. - */ - if (!slots) { - slots = kmemdup(kvm->memslots, sizeof(struct kvm_memslots), - GFP_KERNEL); - if (!slots) - goto out_free; - } - /* actual memory is freed via old in kvm_free_physmem_slot below */ if (change == KVM_MR_DELETE) { new.dirty_bitmap = NULL; memset(&new.arch, 0, sizeof(new.arch)); } - old_memslots = install_new_memslots(kvm, slots, &new); + update_memslots(slots, &new); + old_memslots = install_new_memslots(kvm, slots); kvm_arch_commit_memory_region(kvm, mem, &old, change); @@ -1799,10 +1779,6 @@ int kvm_vcpu_yield_to(struct kvm_vcpu *target) rcu_read_unlock(); if (!task) return ret; - if (task->flags & PF_VCPU) { - put_task_struct(task); - return ret; - } ret = yield_to(task, 1); put_task_struct(task); @@ -2065,6 +2041,15 @@ static long kvm_vcpu_ioctl(struct file *filp, r = -EINVAL; if (arg) goto out; + if (unlikely(vcpu->pid != current->pids[PIDTYPE_PID].pid)) { + /* The thread running this VCPU changed. */ + struct pid *oldpid = vcpu->pid; + struct pid *newpid = get_task_pid(current, PIDTYPE_PID); + rcu_assign_pointer(vcpu->pid, newpid); + if (oldpid) + synchronize_rcu(); + put_pid(oldpid); + } r = kvm_arch_vcpu_ioctl_run(vcpu, vcpu->run); trace_kvm_userspace_exit(vcpu->run->exit_reason, r); break; @@ -2599,8 +2584,6 @@ static long kvm_vm_ioctl(struct file *filp, break; default: r = kvm_arch_vm_ioctl(filp, ioctl, arg); - if (r == -ENOTTY) - r = kvm_vm_ioctl_assigned_device(kvm, ioctl, arg); } out: return r; |