diff options
-rw-r--r-- | Documentation/virtual/kvm/api.txt | 27 | ||||
-rw-r--r-- | Documentation/virtual/kvm/devices/s390_flic.txt | 45 | ||||
-rw-r--r-- | arch/mips/include/asm/kvm_host.h | 417 | ||||
-rw-r--r-- | arch/mips/kvm/kvm_mips_emul.c | 40 | ||||
-rw-r--r-- | arch/s390/include/asm/kvm_host.h | 32 | ||||
-rw-r--r-- | arch/s390/include/uapi/asm/kvm.h | 22 | ||||
-rw-r--r-- | arch/s390/kvm/Kconfig | 2 | ||||
-rw-r--r-- | arch/s390/kvm/Makefile | 2 | ||||
-rw-r--r-- | arch/s390/kvm/interrupt.c | 294 | ||||
-rw-r--r-- | arch/s390/kvm/irq.h | 22 | ||||
-rw-r--r-- | arch/s390/kvm/kvm-s390.c | 42 | ||||
-rw-r--r-- | arch/s390/kvm/kvm-s390.h | 2 | ||||
-rw-r--r-- | arch/x86/kvm/cpuid.c | 30 | ||||
-rw-r--r-- | arch/x86/kvm/svm.c | 6 | ||||
-rw-r--r-- | arch/x86/kvm/vmx.c | 22 | ||||
-rw-r--r-- | arch/x86/kvm/x86.c | 21 | ||||
-rw-r--r-- | arch/x86/kvm/x86.h | 2 | ||||
-rw-r--r-- | include/linux/kvm_host.h | 13 | ||||
-rw-r--r-- | include/uapi/linux/kvm.h | 16 | ||||
-rw-r--r-- | virt/kvm/eventfd.c | 8 | ||||
-rw-r--r-- | virt/kvm/ioapic.c | 107 |
21 files changed, 878 insertions, 294 deletions
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 4714f282a43..2cb1640a90a 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -586,8 +586,8 @@ struct kvm_fpu { 4.24 KVM_CREATE_IRQCHIP -Capability: KVM_CAP_IRQCHIP -Architectures: x86, ia64, ARM, arm64 +Capability: KVM_CAP_IRQCHIP, KVM_CAP_S390_IRQCHIP (s390) +Architectures: x86, ia64, ARM, arm64, s390 Type: vm ioctl Parameters: none Returns: 0 on success, -1 on error @@ -596,7 +596,10 @@ Creates an interrupt controller model in the kernel. On x86, creates a virtual ioapic, a virtual PIC (two PICs, nested), and sets up future vcpus to have a local APIC. IRQ routing for GSIs 0-15 is set to both PIC and IOAPIC; GSI 16-23 only go to the IOAPIC. On ia64, a IOSAPIC is created. On ARM/arm64, a GIC is -created. +created. On s390, a dummy irq routing table is created. + +Note that on s390 the KVM_CAP_S390_IRQCHIP vm capability needs to be enabled +before KVM_CREATE_IRQCHIP can be used. 4.25 KVM_IRQ_LINE @@ -932,9 +935,9 @@ documentation when it pops into existence). 4.37 KVM_ENABLE_CAP -Capability: KVM_CAP_ENABLE_CAP +Capability: KVM_CAP_ENABLE_CAP, KVM_CAP_ENABLE_CAP_VM Architectures: ppc, s390 -Type: vcpu ioctl +Type: vcpu ioctl, vm ioctl (with KVM_CAP_ENABLE_CAP_VM) Parameters: struct kvm_enable_cap (in) Returns: 0 on success; -1 on error @@ -965,6 +968,8 @@ function properly, this is the place to put them. __u8 pad[64]; }; +The vcpu ioctl should be used for vcpu-specific capabilities, the vm ioctl +for vm-wide capabilities. 4.38 KVM_GET_MP_STATE @@ -1334,7 +1339,7 @@ KVM_ASSIGN_DEV_IRQ. Partial deassignment of host or guest IRQ is allowed. 4.52 KVM_SET_GSI_ROUTING Capability: KVM_CAP_IRQ_ROUTING -Architectures: x86 ia64 +Architectures: x86 ia64 s390 Type: vm ioctl Parameters: struct kvm_irq_routing (in) Returns: 0 on success, -1 on error @@ -1357,6 +1362,7 @@ struct kvm_irq_routing_entry { union { struct kvm_irq_routing_irqchip irqchip; struct kvm_irq_routing_msi msi; + struct kvm_irq_routing_s390_adapter adapter; __u32 pad[8]; } u; }; @@ -1364,6 +1370,7 @@ struct kvm_irq_routing_entry { /* gsi routing entry types */ #define KVM_IRQ_ROUTING_IRQCHIP 1 #define KVM_IRQ_ROUTING_MSI 2 +#define KVM_IRQ_ROUTING_S390_ADAPTER 3 No flags are specified so far, the corresponding field must be set to zero. @@ -1379,6 +1386,14 @@ struct kvm_irq_routing_msi { __u32 pad; }; +struct kvm_irq_routing_s390_adapter { + __u64 ind_addr; + __u64 summary_addr; + __u64 ind_offset; + __u32 summary_offset; + __u32 adapter_id; +}; + 4.53 KVM_ASSIGN_SET_MSIX_NR diff --git a/Documentation/virtual/kvm/devices/s390_flic.txt b/Documentation/virtual/kvm/devices/s390_flic.txt index 410fa673e5b..4ceef53164b 100644 --- a/Documentation/virtual/kvm/devices/s390_flic.txt +++ b/Documentation/virtual/kvm/devices/s390_flic.txt @@ -12,6 +12,7 @@ FLIC provides support to - inspect currently pending interrupts (KVM_FLIC_GET_ALL_IRQS) - purge all pending floating interrupts (KVM_DEV_FLIC_CLEAR_IRQS) - enable/disable for the guest transparent async page faults +- register and modify adapter interrupt sources (KVM_DEV_FLIC_ADAPTER_*) Groups: KVM_DEV_FLIC_ENQUEUE @@ -44,3 +45,47 @@ Groups: Disables async page faults for the guest and waits until already pending async page faults are done. This is necessary to trigger a completion interrupt for every init interrupt before migrating the interrupt list. + + KVM_DEV_FLIC_ADAPTER_REGISTER + Register an I/O adapter interrupt source. Takes a kvm_s390_io_adapter + describing the adapter to register: + +struct kvm_s390_io_adapter { + __u32 id; + __u8 isc; + __u8 maskable; + __u8 swap; + __u8 pad; +}; + + id contains the unique id for the adapter, isc the I/O interruption subclass + to use, maskable whether this adapter may be masked (interrupts turned off) + and swap whether the indicators need to be byte swapped. + + + KVM_DEV_FLIC_ADAPTER_MODIFY + Modifies attributes of an existing I/O adapter interrupt source. Takes + a kvm_s390_io_adapter_req specifiying the adapter and the operation: + +struct kvm_s390_io_adapter_req { + __u32 id; + __u8 type; + __u8 mask; + __u16 pad0; + __u64 addr; +}; + + id specifies the adapter and type the operation. The supported operations + are: + + KVM_S390_IO_ADAPTER_MASK + mask or unmask the adapter, as specified in mask + + KVM_S390_IO_ADAPTER_MAP + perform a gmap translation for the guest address provided in addr, + pin a userspace page for the translated address and add it to the + list of mappings + + KVM_S390_IO_ADAPTER_UNMAP + release a userspace page for the translated address specified in addr + from the list of mappings diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h index a995fce8779..060aaa6348d 100644 --- a/arch/mips/include/asm/kvm_host.h +++ b/arch/mips/include/asm/kvm_host.h @@ -30,16 +30,16 @@ /* Special address that contains the comm page, used for reducing # of traps */ -#define KVM_GUEST_COMMPAGE_ADDR 0x0 +#define KVM_GUEST_COMMPAGE_ADDR 0x0 #define KVM_GUEST_KERNEL_MODE(vcpu) ((kvm_read_c0_guest_status(vcpu->arch.cop0) & (ST0_EXL | ST0_ERL)) || \ ((kvm_read_c0_guest_status(vcpu->arch.cop0) & KSU_USER) == 0)) -#define KVM_GUEST_KUSEG 0x00000000UL -#define KVM_GUEST_KSEG0 0x40000000UL -#define KVM_GUEST_KSEG23 0x60000000UL -#define KVM_GUEST_KSEGX(a) ((_ACAST32_(a)) & 0x60000000) -#define KVM_GUEST_CPHYSADDR(a) ((_ACAST32_(a)) & 0x1fffffff) +#define KVM_GUEST_KUSEG 0x00000000UL +#define KVM_GUEST_KSEG0 0x40000000UL +#define KVM_GUEST_KSEG23 0x60000000UL +#define KVM_GUEST_KSEGX(a) ((_ACAST32_(a)) & 0x60000000) +#define KVM_GUEST_CPHYSADDR(a) ((_ACAST32_(a)) & 0x1fffffff) #define KVM_GUEST_CKSEG0ADDR(a) (KVM_GUEST_CPHYSADDR(a) | KVM_GUEST_KSEG0) #define KVM_GUEST_CKSEG1ADDR(a) (KVM_GUEST_CPHYSADDR(a) | KVM_GUEST_KSEG1) @@ -52,17 +52,17 @@ #define KVM_GUEST_KSEG1ADDR(a) (KVM_GUEST_CPHYSADDR(a) | KVM_GUEST_KSEG1) #define KVM_GUEST_KSEG23ADDR(a) (KVM_GUEST_CPHYSADDR(a) | KVM_GUEST_KSEG23) -#define KVM_INVALID_PAGE 0xdeadbeef -#define KVM_INVALID_INST 0xdeadbeef -#define KVM_INVALID_ADDR 0xdeadbeef +#define KVM_INVALID_PAGE 0xdeadbeef +#define KVM_INVALID_INST 0xdeadbeef +#define KVM_INVALID_ADDR 0xdeadbeef -#define KVM_MALTA_GUEST_RTC_ADDR 0xb8000070UL +#define KVM_MALTA_GUEST_RTC_ADDR 0xb8000070UL -#define GUEST_TICKS_PER_JIFFY (40000000/HZ) -#define MS_TO_NS(x) (x * 1E6L) +#define GUEST_TICKS_PER_JIFFY (40000000/HZ) +#define MS_TO_NS(x) (x * 1E6L) -#define CAUSEB_DC 27 -#define CAUSEF_DC (_ULCAST_(1) << 27) +#define CAUSEB_DC 27 +#define CAUSEF_DC (_ULCAST_(1) << 27) struct kvm; struct kvm_run; @@ -126,8 +126,8 @@ struct kvm_arch { int commpage_tlb; }; -#define N_MIPS_COPROC_REGS 32 -#define N_MIPS_COPROC_SEL 8 +#define N_MIPS_COPROC_REGS 32 +#define N_MIPS_COPROC_SEL 8 struct mips_coproc { unsigned long reg[N_MIPS_COPROC_REGS][N_MIPS_COPROC_SEL]; @@ -139,124 +139,124 @@ struct mips_coproc { /* * Coprocessor 0 register names */ -#define MIPS_CP0_TLB_INDEX 0 -#define MIPS_CP0_TLB_RANDOM 1 -#define MIPS_CP0_TLB_LOW 2 -#define MIPS_CP0_TLB_LO0 2 -#define MIPS_CP0_TLB_LO1 3 -#define MIPS_CP0_TLB_CONTEXT 4 -#define MIPS_CP0_TLB_PG_MASK 5 -#define MIPS_CP0_TLB_WIRED 6 -#define MIPS_CP0_HWRENA 7 -#define MIPS_CP0_BAD_VADDR 8 -#define MIPS_CP0_COUNT 9 -#define MIPS_CP0_TLB_HI 10 -#define MIPS_CP0_COMPARE 11 -#define MIPS_CP0_STATUS 12 -#define MIPS_CP0_CAUSE 13 -#define MIPS_CP0_EXC_PC 14 -#define MIPS_CP0_PRID 15 -#define MIPS_CP0_CONFIG 16 -#define MIPS_CP0_LLADDR 17 -#define MIPS_CP0_WATCH_LO 18 -#define MIPS_CP0_WATCH_HI 19 -#define MIPS_CP0_TLB_XCONTEXT 20 -#define MIPS_CP0_ECC 26 -#define MIPS_CP0_CACHE_ERR 27 -#define MIPS_CP0_TAG_LO 28 -#define MIPS_CP0_TAG_HI 29 -#define MIPS_CP0_ERROR_PC 30 -#define MIPS_CP0_DEBUG 23 -#define MIPS_CP0_DEPC 24 -#define MIPS_CP0_PERFCNT 25 -#define MIPS_CP0_ERRCTL 26 -#define MIPS_CP0_DATA_LO 28 -#define MIPS_CP0_DATA_HI 29 -#define MIPS_CP0_DESAVE 31 - -#define MIPS_CP0_CONFIG_SEL 0 -#define MIPS_CP0_CONFIG1_SEL 1 -#define MIPS_CP0_CONFIG2_SEL 2 -#define MIPS_CP0_CONFIG3_SEL 3 +#define MIPS_CP0_TLB_INDEX 0 +#define MIPS_CP0_TLB_RANDOM 1 +#define MIPS_CP0_TLB_LOW 2 +#define MIPS_CP0_TLB_LO0 2 +#define MIPS_CP0_TLB_LO1 3 +#define MIPS_CP0_TLB_CONTEXT 4 +#define MIPS_CP0_TLB_PG_MASK 5 +#define MIPS_CP0_TLB_WIRED 6 +#define MIPS_CP0_HWRENA 7 +#define MIPS_CP0_BAD_VADDR 8 +#define MIPS_CP0_COUNT 9 +#define MIPS_CP0_TLB_HI 10 +#define MIPS_CP0_COMPARE 11 +#define MIPS_CP0_STATUS 12 +#define MIPS_CP0_CAUSE 13 +#define MIPS_CP0_EXC_PC 14 +#define MIPS_CP0_PRID 15 +#define MIPS_CP0_CONFIG 16 +#define MIPS_CP0_LLADDR 17 +#define MIPS_CP0_WATCH_LO 18 +#define MIPS_CP0_WATCH_HI 19 +#define MIPS_CP0_TLB_XCONTEXT 20 +#define MIPS_CP0_ECC 26 +#define MIPS_CP0_CACHE_ERR 27 +#define MIPS_CP0_TAG_LO 28 +#define MIPS_CP0_TAG_HI 29 +#define MIPS_CP0_ERROR_PC 30 +#define MIPS_CP0_DEBUG 23 +#define MIPS_CP0_DEPC 24 +#define MIPS_CP0_PERFCNT 25 +#define MIPS_CP0_ERRCTL 26 +#define MIPS_CP0_DATA_LO 28 +#define MIPS_CP0_DATA_HI 29 +#define MIPS_CP0_DESAVE 31 + +#define MIPS_CP0_CONFIG_SEL 0 +#define MIPS_CP0_CONFIG1_SEL 1 +#define MIPS_CP0_CONFIG2_SEL 2 +#define MIPS_CP0_CONFIG3_SEL 3 /* Config0 register bits */ -#define CP0C0_M 31 -#define CP0C0_K23 28 -#define CP0C0_KU 25 -#define CP0C0_MDU 20 -#define CP0C0_MM 17 -#define CP0C0_BM 16 -#define CP0C0_BE 15 -#define CP0C0_AT 13 -#define CP0C0_AR 10 -#define CP0C0_MT 7 -#define CP0C0_VI 3 -#define CP0C0_K0 0 +#define CP0C0_M 31 +#define CP0C0_K23 28 +#define CP0C0_KU 25 +#define CP0C0_MDU 20 +#define CP0C0_MM 17 +#define CP0C0_BM 16 +#define CP0C0_BE 15 +#define CP0C0_AT 13 +#define CP0C0_AR 10 +#define CP0C0_MT 7 +#define CP0C0_VI 3 +#define CP0C0_K0 0 /* Config1 register bits */ -#define CP0C1_M 31 -#define CP0C1_MMU 25 -#define CP0C1_IS 22 -#define CP0C1_IL 19 -#define CP0C1_IA 16 -#define CP0C1_DS 13 -#define CP0C1_DL 10 -#define CP0C1_DA 7 -#define CP0C1_C2 6 -#define CP0C1_MD 5 -#define CP0C1_PC 4 -#define CP0C1_WR 3 -#define CP0C1_CA 2 -#define CP0C1_EP 1 -#define CP0C1_FP 0 +#define CP0C1_M 31 +#define CP0C1_MMU 25 +#define CP0C1_IS 22 +#define CP0C1_IL 19 +#define CP0C1_IA 16 +#define CP0C1_DS 13 +#define CP0C1_DL 10 +#define CP0C1_DA 7 +#define CP0C1_C2 6 +#define CP0C1_MD 5 +#define CP0C1_PC 4 +#define CP0C1_WR 3 +#define CP0C1_CA 2 +#define CP0C1_EP 1 +#define CP0C1_FP 0 /* Config2 Register bits */ -#define CP0C2_M 31 -#define CP0C2_TU 28 -#define CP0C2_TS 24 -#define CP0C2_TL 20 -#define CP0C2_TA 16 -#define CP0C2_SU 12 -#define CP0C2_SS 8 -#define CP0C2_SL 4 -#define CP0C2_SA 0 +#define CP0C2_M 31 +#define CP0C2_TU 28 +#define CP0C2_TS 24 +#define CP0C2_TL 20 +#define CP0C2_TA 16 +#define CP0C2_SU 12 +#define CP0C2_SS 8 +#define CP0C2_SL 4 +#define CP0C2_SA 0 /* Config3 Register bits */ -#define CP0C3_M 31 -#define CP0C3_ISA_ON_EXC 16 -#define CP0C3_ULRI 13 -#define CP0C3_DSPP 10 -#define CP0C3_LPA 7 -#define CP0C3_VEIC 6 -#define CP0C3_VInt 5 -#define CP0C3_SP 4 -#define CP0C3_MT 2 -#define CP0C3_SM 1 -#define CP0C3_TL 0 +#define CP0C3_M 31 +#define CP0C3_ISA_ON_EXC 16 +#define CP0C3_ULRI 13 +#define CP0C3_DSPP 10 +#define CP0C3_LPA 7 +#define CP0C3_VEIC 6 +#define CP0C3_VInt 5 +#define CP0C3_SP 4 +#define CP0C3_MT 2 +#define CP0C3_SM 1 +#define CP0C3_TL 0 /* Have config1, Cacheable, noncoherent, write-back, write allocate*/ -#define MIPS_CONFIG0 \ +#define MIPS_CONFIG0 \ ((1 << CP0C0_M) | (0x3 << CP0C0_K0)) /* Have config2, no coprocessor2 attached, no MDMX support attached, no performance counters, watch registers present, no code compression, EJTAG present, no FPU, no watch registers */ -#define MIPS_CONFIG1 \ -((1 << CP0C1_M) | \ - (0 << CP0C1_C2) | (0 << CP0C1_MD) | (0 << CP0C1_PC) | \ - (0 << CP0C1_WR) | (0 << CP0C1_CA) | (1 << CP0C1_EP) | \ +#define MIPS_CONFIG1 \ +((1 << CP0C1_M) | \ + (0 << CP0C1_C2) | (0 << CP0C1_MD) | (0 << CP0C1_PC) | \ + (0 << CP0C1_WR) | (0 << CP0C1_CA) | (1 << CP0C1_EP) | \ (0 << CP0C1_FP)) /* Have config3, no tertiary/secondary caches implemented */ -#define MIPS_CONFIG2 \ +#define MIPS_CONFIG2 \ ((1 << CP0C2_M)) /* No config4, no DSP ASE, no large physaddr (PABITS), no external interrupt controller, no vectored interrupts, no 1kb pages, no SmartMIPS ASE, no trace logic */ -#define MIPS_CONFIG3 \ -((0 << CP0C3_M) | (0 << CP0C3_DSPP) | (0 << CP0C3_LPA) | \ - (0 << CP0C3_VEIC) | (0 << CP0C3_VInt) | (0 << CP0C3_SP) | \ +#define MIPS_CONFIG3 \ +((0 << CP0C3_M) | (0 << CP0C3_DSPP) | (0 << CP0C3_LPA) | \ + (0 << CP0C3_VEIC) | (0 << CP0C3_VInt) | (0 << CP0C3_SP) | \ (0 << CP0C3_SM) | (0 << CP0C3_TL)) /* MMU types, the first four entries have the same layout as the @@ -274,36 +274,36 @@ enum mips_mmu_types { /* * Trap codes */ -#define T_INT 0 /* Interrupt pending */ -#define T_TLB_MOD 1 /* TLB modified fault */ -#define T_TLB_LD_MISS 2 /* TLB miss on load or ifetch */ -#define T_TLB_ST_MISS 3 /* TLB miss on a store */ -#define T_ADDR_ERR_LD 4 /* Address error on a load or ifetch */ -#define T_ADDR_ERR_ST 5 /* Address error on a store */ -#define T_BUS_ERR_IFETCH 6 /* Bus error on an ifetch */ -#define T_BUS_ERR_LD_ST 7 /* Bus error on a load or store */ -#define T_SYSCALL 8 /* System call */ -#define T_BREAK 9 /* Breakpoint */ -#define T_RES_INST 10 /* Reserved instruction exception */ -#define T_COP_UNUSABLE 11 /* Coprocessor unusable */ -#define T_OVFLOW 12 /* Arithmetic overflow */ +#define T_INT 0 /* Interrupt pending */ +#define T_TLB_MOD 1 /* TLB modified fault */ +#define T_TLB_LD_MISS 2 /* TLB miss on load or ifetch */ +#define T_TLB_ST_MISS 3 /* TLB miss on a store */ +#define T_ADDR_ERR_LD 4 /* Address error on a load or ifetch */ +#define T_ADDR_ERR_ST 5 /* Address error on a store */ +#define T_BUS_ERR_IFETCH 6 /* Bus error on an ifetch */ +#define T_BUS_ERR_LD_ST 7 /* Bus error on a load or store */ +#define T_SYSCALL 8 /* System call */ +#define T_BREAK 9 /* Breakpoint */ +#define T_RES_INST 10 /* Reserved instruction exception */ +#define T_COP_UNUSABLE 11 /* Coprocessor unusable */ +#define T_OVFLOW 12 /* Arithmetic overflow */ /* * Trap definitions added for r4000 port. */ -#define T_TRAP 13 /* Trap instruction */ -#define T_VCEI 14 /* Virtual coherency exception */ -#define T_FPE 15 /* Floating point exception */ -#define T_WATCH 23 /* Watch address reference */ -#define T_VCED 31 /* Virtual coherency data */ +#define T_TRAP 13 /* Trap instruction */ +#define T_VCEI 14 /* Virtual coherency exception */ +#define T_FPE 15 /* Floating point exception */ +#define T_WATCH 23 /* Watch address reference */ +#define T_VCED 31 /* Virtual coherency data */ /* Resume Flags */ -#define RESUME_FLAG_DR (1<<0) /* Reload guest nonvolatile state? */ -#define RESUME_FLAG_HOST (1<<1) /* Resume host? */ +#define RESUME_FLAG_DR (1<<0) /* Reload guest nonvolatile state? */ +#define RESUME_FLAG_HOST (1<<1) /* Resume host? */ -#define RESUME_GUEST 0 -#define RESUME_GUEST_DR RESUME_FLAG_DR -#define RESUME_HOST RESUME_FLAG_HOST +#define RESUME_GUEST 0 +#define RESUME_GUEST_DR RESUME_FLAG_DR +#define RESUME_HOST RESUME_FLAG_HOST enum emulation_result { EMULATE_DONE, /* no further processing */ @@ -313,24 +313,27 @@ enum emulation_result { EMULATE_PRIV_FAIL, }; -#define MIPS3_PG_G 0x00000001 /* Global; ignore ASID if in lo0 & lo1 */ -#define MIPS3_PG_V 0x00000002 /* Valid */ -#define MIPS3_PG_NV 0x00000000 -#define MIPS3_PG_D 0x00000004 /* Dirty */ +#define MIPS3_PG_G 0x00000001 /* Global; ignore ASID if in lo0 & lo1 */ +#define MIPS3_PG_V 0x00000002 /* Valid */ +#define MIPS3_PG_NV 0x00000000 +#define MIPS3_PG_D 0x00000004 /* Dirty */ #define mips3_paddr_to_tlbpfn(x) \ - (((unsigned long)(x) >> MIPS3_PG_SHIFT) & MIPS3_PG_FRAME) + (((unsigned long)(x) >> MIPS3_PG_SHIFT) & MIPS3_PG_FRAME) #define mips3_tlbpfn_to_paddr(x) \ - ((unsigned long)((x) & MIPS3_PG_FRAME) << MIPS3_PG_SHIFT) + ((unsigned long)((x) & MIPS3_PG_FRAME) << MIPS3_PG_SHIFT) -#define MIPS3_PG_SHIFT 6 -#define MIPS3_PG_FRAME 0x3fffffc0 +#define MIPS3_PG_SHIFT 6 +#define MIPS3_PG_FRAME 0x3fffffc0 -#define VPN2_MASK 0xffffe000 -#define TLB_IS_GLOBAL(x) (((x).tlb_lo0 & MIPS3_PG_G) && ((x).tlb_lo1 & MIPS3_PG_G)) -#define TLB_VPN2(x) ((x).tlb_hi & VPN2_MASK) -#define TLB_ASID(x) ((x).tlb_hi & ASID_MASK) -#define TLB_IS_VALID(x, va) (((va) & (1 << PAGE_SHIFT)) ? ((x).tlb_lo1 & MIPS3_PG_V) : ((x).tlb_lo0 & MIPS3_PG_V)) +#define VPN2_MASK 0xffffe000 +#define TLB_IS_GLOBAL(x) (((x).tlb_lo0 & MIPS3_PG_G) && \ + ((x).tlb_lo1 & MIPS3_PG_G)) +#define TLB_VPN2(x) ((x).tlb_hi & VPN2_MASK) +#define TLB_ASID(x) ((x).tlb_hi & ASID_MASK) +#define TLB_IS_VALID(x, va) (((va) & (1 << PAGE_SHIFT)) \ + ? ((x).tlb_lo1 & MIPS3_PG_V) \ + : ((x).tlb_lo0 & MIPS3_PG_V)) struct kvm_mips_tlb { long tlb_mask; @@ -339,7 +342,7 @@ struct kvm_mips_tlb { long tlb_lo1; }; -#define KVM_MIPS_GUEST_TLB_SIZE 64 +#define KVM_MIPS_GUEST_TLB_SIZE 64 struct kvm_vcpu_arch { void *host_ebase, *guest_ebase; unsigned long host_stack; @@ -400,65 +403,67 @@ struct kvm_vcpu_arch { }; -#define kvm_read_c0_guest_index(cop0) (cop0->reg[MIPS_CP0_TLB_INDEX][0]) -#define kvm_write_c0_guest_index(cop0, val) (cop0->reg[MIPS_CP0_TLB_INDEX][0] = val) -#define kvm_read_c0_guest_entrylo0(cop0) (cop0->reg[MIPS_CP0_TLB_LO0][0]) -#define kvm_read_c0_guest_entrylo1(cop0) (cop0->reg[MIPS_CP0_TLB_LO1][0]) -#define kvm_read_c0_guest_context(cop0) (cop0->reg[MIPS_CP0_TLB_CONTEXT][0]) -#define kvm_write_c0_guest_context(cop0, val) (cop0->reg[MIPS_CP0_TLB_CONTEXT][0] = (val)) -#define kvm_read_c0_guest_userlocal(cop0) (cop0->reg[MIPS_CP0_TLB_CONTEXT][2]) -#define kvm_read_c0_guest_pagemask(cop0) (cop0->reg[MIPS_CP0_TLB_PG_MASK][0]) -#define kvm_write_c0_guest_pagemask(cop0, val) (cop0->reg[MIPS_CP0_TLB_PG_MASK][0] = (val)) -#define kvm_read_c0_guest_wired(cop0) (cop0->reg[MIPS_CP0_TLB_WIRED][0]) -#define kvm_write_c0_guest_wired(cop0, val) (cop0->reg[MIPS_CP0_TLB_WIRED][0] = (val)) -#define kvm_read_c0_guest_badvaddr(cop0) (cop0->reg[MIPS_CP0_BAD_VADDR][0]) -#define kvm_write_c0_guest_badvaddr(cop0, val) (cop0->reg[MIPS_CP0_BAD_VADDR][0] = (val)) -#define kvm_read_c0_guest_count(cop0) (cop0->reg[MIPS_CP0_COUNT][0]) -#define kvm_write_c0_guest_count(cop0, val) (cop0->reg[MIPS_CP0_COUNT][0] = (val)) -#define kvm_read_c0_guest_entryhi(cop0) (cop0->reg[MIPS_CP0_TLB_HI][0]) -#define kvm_write_c0_guest_entryhi(cop0, val) (cop0->reg[MIPS_CP0_TLB_HI][0] = (val)) -#define kvm_read_c0_guest_compare(cop0) (cop0->reg[MIPS_CP0_COMPARE][0]) -#define kvm_write_c0_guest_compare(cop0, val) (cop0->reg[MIPS_CP0_COMPARE][0] = (val)) -#define kvm_read_c0_guest_status(cop0) (cop0->reg[MIPS_CP0_STATUS][0]) -#define kvm_write_c0_guest_status(cop0, val) (cop0->reg[MIPS_CP0_STATUS][0] = (val)) -#define kvm_read_c0_guest_intctl(cop0) (cop0->reg[MIPS_CP0_STATUS][1]) -#define kvm_write_c0_guest_intctl(cop0, val) (cop0->reg[MIPS_CP0_STATUS][1] = (val)) -#define kvm_read_c0_guest_cause(cop0) (cop0->reg[MIPS_CP0_CAUSE][0]) -#define kvm_write_c0_guest_cause(cop0, val) (cop0->reg[MIPS_CP0_CAUSE][0] = (val)) -#define kvm_read_c0_guest_epc(cop0) (cop0->reg[MIPS_CP0_EXC_PC][0]) -#define kvm_write_c0_guest_epc(cop0, val) (cop0->reg[MIPS_CP0_EXC_PC][0] = (val)) -#define kvm_read_c0_guest_prid(cop0) (cop0->reg[MIPS_CP0_PRID][0]) -#define kvm_write_c0_guest_prid(cop0, val) (cop0->reg[MIPS_CP0_PRID][0] = (val)) -#define kvm_read_c0_guest_ebase(cop0) (cop0->reg[MIPS_CP0_PRID][1]) -#define kvm_write_c0_guest_ebase(cop0, val) (cop0->reg[MIPS_CP0_PRID][1] = (val)) -#define kvm_read_c0_guest_config(cop0) (cop0->reg[MIPS_CP0_CONFIG][0]) -#define kvm_read_c0_guest_config1(cop0) (cop0->reg[MIPS_CP0_CONFIG][1]) -#define kvm_read_c0_guest_config2(cop0) (cop0->reg[MIPS_CP0_CONFIG][2]) -#define kvm_read_c0_guest_config3(cop0) (cop0->reg[MIPS_CP0_CONFIG][3]) -#define kvm_read_c0_guest_config7(cop0) (cop0->reg[MIPS_CP0_CONFIG][7]) -#define kvm_write_c0_guest_config(cop0, val) (cop0->reg[MIPS_CP0_CONFIG][0] = (val)) -#define kvm_write_c0_guest_config1(cop0, val) (cop0->reg[MIPS_CP0_CONFIG][1] = (val)) -#define kvm_write_c0_guest_config2(cop0, val) (cop0->reg[MIPS_CP0_CONFIG][2] = (val)) -#define kvm_write_c0_guest_config3(cop0, val) (cop0->reg[MIPS_CP0_CONFIG][3] = (val)) -#define kvm_write_c0_guest_config7(cop0, val) (cop0->reg[MIPS_CP0_CONFIG][7] = (val)) -#define kvm_read_c0_guest_errorepc(cop0) (cop0->reg[MIPS_CP0_ERROR_PC][0]) -#define kvm_write_c0_guest_errorepc(cop0, val) (cop0->reg[MIPS_CP0_ERROR_PC][0] = (val)) - -#define kvm_set_c0_guest_status(cop0, val) (cop0->reg[MIPS_CP0_STATUS][0] |= (val)) -#define kvm_clear_c0_guest_status(cop0, val) (cop0->reg[MIPS_CP0_STATUS][0] &= ~(val)) -#define kvm_set_c0_guest_cause(cop0, val) (cop0->reg[MIPS_CP0_CAUSE][0] |= (val)) -#define kvm_clear_c0_guest_cause(cop0, val) (cop0->reg[MIPS_CP0_CAUSE][0] &= ~(val)) -#define kvm_change_c0_guest_cause(cop0, change, val) \ -{ \ - kvm_clear_c0_guest_cause(cop0, change); \ - kvm_set_c0_guest_cause(cop0, ((val) & (change))); \ +#define kvm_read_c0_guest_index(cop0) (cop0->reg[MIPS_CP0_TLB_INDEX][0]) +#define kvm_write_c0_guest_index(cop0, val) (cop0->reg[MIPS_CP0_TLB_INDEX][0] = val) +#define kvm_read_c0_guest_entrylo0(cop0) (cop0->reg[MIPS_CP0_TLB_LO0][0]) +#define kvm_read_c0_guest_entrylo1(cop0) (cop0->reg[MIPS_CP0_TLB_LO1][0]) +#define kvm_read_c0_guest_context(cop0) (cop0->reg[MIPS_CP0_TLB_CONTEXT][0]) +#define kvm_write_c0_guest_context(cop0, val) (cop0->reg[MIPS_CP0_TLB_CONTEXT][0] = (val)) +#define kvm_read_c0_guest_userlocal(cop0) (cop0->reg[MIPS_CP0_TLB_CONTEXT][2]) +#define kvm_read_c0_guest_pagemask(cop0) (cop0->reg[MIPS_CP0_TLB_PG_MASK][0]) +#define kvm_write_c0_guest_pagemask(cop0, val) (cop0->reg[MIPS_CP0_TLB_PG_MASK][0] = (val)) +#define kvm_read_c0_guest_wired(cop0) (cop0->reg[MIPS_CP0_TLB_WIRED][0]) +#define kvm_write_c0_guest_wired(cop0, val) (cop0->reg[MIPS_CP0_TLB_WIRED][0] = (val)) +#define kvm_read_c0_guest_hwrena(cop0) (cop0->reg[MIPS_CP0_HWRENA][0]) +#define kvm_write_c0_guest_hwrena(cop0, val) (cop0->reg[MIPS_CP0_HWRENA][0] = (val)) +#define kvm_read_c0_guest_badvaddr(cop0) (cop0->reg[MIPS_CP0_BAD_VADDR][0]) +#define kvm_write_c0_guest_badvaddr(cop0, val) (cop0->reg[MIPS_CP0_BAD_VADDR][0] = (val)) +#define kvm_read_c0_guest_count(cop0) (cop0->reg[MIPS_CP0_COUNT][0]) +#define kvm_write_c0_guest_count(cop0, val) (cop0->reg[MIPS_CP0_COUNT][0] = (val)) +#define kvm_read_c0_guest_entryhi(cop0) (cop0->reg[MIPS_CP0_TLB_HI][0]) +#define kvm_write_c0_guest_entryhi(cop0, val) (cop0->reg[MIPS_CP0_TLB_HI][0] = (val)) +#define kvm_read_c0_guest_compare(cop0) (cop0->reg[MIPS_CP0_COMPARE][0]) +#define kvm_write_c0_guest_compare(cop0, val) (cop0->reg[MIPS_CP0_COMPARE][0] = (val)) +#define kvm_read_c0_guest_status(cop0) (cop0->reg[MIPS_CP0_STATUS][0]) +#define kvm_write_c0_guest_status(cop0, val) (cop0->reg[MIPS_CP0_STATUS][0] = (val)) +#define kvm_read_c0_guest_intctl(cop0) (cop0->reg[MIPS_CP0_STATUS][1]) +#define kvm_write_c0_guest_intctl(cop0, val) (cop0->reg[MIPS_CP0_STATUS][1] = (val)) +#define kvm_read_c0_guest_cause(cop0) (cop0->reg[MIPS_CP0_CAUSE][0]) +#define kvm_write_c0_guest_cause(cop0, val) (cop0->reg[MIPS_CP0_CAUSE][0] = (val)) +#define kvm_read_c0_guest_epc(cop0) (cop0->reg[MIPS_CP0_EXC_PC][0]) +#define kvm_write_c0_guest_epc(cop0, val) (cop0->reg[MIPS_CP0_EXC_PC][0] = (val)) +#define kvm_read_c0_guest_prid(cop0) (cop0->reg[MIPS_CP0_PRID][0]) +#define kvm_write_c0_guest_prid(cop0, val) (cop0->reg[MIPS_CP0_PRID][0] = (val)) +#define kvm_read_c0_guest_ebase(cop0) (cop0->reg[MIPS_CP0_PRID][1]) +#define kvm_write_c0_guest_ebase(cop0, val) (cop0->reg[MIPS_CP0_PRID][1] = (val)) +#define kvm_read_c0_guest_config(cop0) (cop0->reg[MIPS_CP0_CONFIG][0]) +#define kvm_read_c0_guest_config1(cop0) (cop0->reg[MIPS_CP0_CONFIG][1]) +#define kvm_read_c0_guest_config2(cop0) (cop0->reg[MIPS_CP0_CONFIG][2]) +#define kvm_read_c0_guest_config3(cop0) (cop0->reg[MIPS_CP0_CONFIG][3]) +#define kvm_read_c0_guest_config7(cop0) (cop0->reg[MIPS_CP0_CONFIG][7]) +#define kvm_write_c0_guest_config(cop0, val) (cop0->reg[MIPS_CP0_CONFIG][0] = (val)) +#define kvm_write_c0_guest_config1(cop0, val) (cop0->reg[MIPS_CP0_CONFIG][1] = (val)) +#define kvm_write_c0_guest_config2(cop0, val) (cop0->reg[MIPS_CP0_CONFIG][2] = (val)) +#define kvm_write_c0_guest_config3(cop0, val) (cop0->reg[MIPS_CP0_CONFIG][3] = (val)) +#define kvm_write_c0_guest_config7(cop0, val) (cop0->reg[MIPS_CP0_CONFIG][7] = (val)) +#define kvm_read_c0_guest_errorepc(cop0) (cop0->reg[MIPS_CP0_ERROR_PC][0]) +#define kvm_write_c0_guest_errorepc(cop0, val) (cop0->reg[MIPS_CP0_ERROR_PC][0] = (val)) + +#define kvm_set_c0_guest_status(cop0, val) (cop0->reg[MIPS_CP0_STATUS][0] |= (val)) +#define kvm_clear_c0_guest_status(cop0, val) (cop0->reg[MIPS_CP0_STATUS][0] &= ~(val)) +#define kvm_set_c0_guest_cause(cop0, val) (cop0->reg[MIPS_CP0_CAUSE][0] |= (val)) +#define kvm_clear_c0_guest_cause(cop0, val) (cop0->reg[MIPS_CP0_CAUSE][0] &= ~(val)) +#define kvm_change_c0_guest_cause(cop0, change, val) \ +{ \ + kvm_clear_c0_guest_cause(cop0, change); \ + kvm_set_c0_guest_cause(cop0, ((val) & (change))); \ } -#define kvm_set_c0_guest_ebase(cop0, val) (cop0->reg[MIPS_CP0_PRID][1] |= (val)) -#define kvm_clear_c0_guest_ebase(cop0, val) (cop0->reg[MIPS_CP0_PRID][1] &= ~(val)) -#define kvm_change_c0_guest_ebase(cop0, change, val) \ -{ \ - kvm_clear_c0_guest_ebase(cop0, change); \ - kvm_set_c0_guest_ebase(cop0, ((val) & (change))); \ +#define kvm_set_c0_guest_ebase(cop0, val) (cop0->reg[MIPS_CP0_PRID][1] |= (val)) +#define kvm_clear_c0_guest_ebase(cop0, val) (cop0->reg[MIPS_CP0_PRID][1] &= ~(val)) +#define kvm_change_c0_guest_ebase(cop0, change, val) \ +{ \ + kvm_clear_c0_guest_ebase(cop0, change); \ + kvm_set_c0_guest_ebase(cop0, ((val) & (change))); \ } diff --git a/arch/mips/kvm/kvm_mips_emul.c b/arch/mips/kvm/kvm_mips_emul.c index 4b6274b47f3..e3fec99941a 100644 --- a/arch/mips/kvm/kvm_mips_emul.c +++ b/arch/mips/kvm/kvm_mips_emul.c @@ -436,13 +436,6 @@ kvm_mips_emulate_CP0(uint32_t inst, uint32_t *opc, uint32_t cause, sel = inst & 0x7; co_bit = (inst >> 25) & 1; - /* Verify that the register is valid */ - if (rd > MIPS_CP0_DESAVE) { - printk("Invalid rd: %d\n", rd); - er = EMULATE_FAIL; - goto done; - } - if (co_bit) { op = (inst) & 0xff; @@ -1542,8 +1535,15 @@ kvm_mips_handle_ri(unsigned long cause, uint32_t *opc, } if ((inst & OPCODE) == SPEC3 && (inst & FUNC) == RDHWR) { + int usermode = !KVM_GUEST_KERNEL_MODE(vcpu); int rd = (inst & RD) >> 11; int rt = (inst & RT) >> 16; + /* If usermode, check RDHWR rd is allowed by guest HWREna */ + if (usermode && !(kvm_read_c0_guest_hwrena(cop0) & BIT(rd))) { + kvm_debug("RDHWR %#x disallowed by HWREna @ %p\n", + rd, opc); + goto emulate_ri; + } switch (rd) { case 0: /* CPU number */ arch->gprs[rt] = 0; @@ -1567,31 +1567,27 @@ kvm_mips_handle_ri(unsigned long cause, uint32_t *opc, } break; case 29: -#if 1 arch->gprs[rt] = kvm_read_c0_guest_userlocal(cop0); -#else - /* UserLocal not implemented */ - er = kvm_mips_emulate_ri_exc(cause, opc, run, vcpu); -#endif break; default: - printk("RDHWR not supported\n"); - er = EMULATE_FAIL; - break; + kvm_debug("RDHWR %#x not supported @ %p\n", rd, opc); + goto emulate_ri; } } else { - printk("Emulate RI not supported @ %p: %#x\n", opc, inst); - er = EMULATE_FAIL; + kvm_debug("Emulate RI not supported @ %p: %#x\n", opc, inst); + goto emulate_ri; } + return EMULATE_DONE; + +emulate_ri: /* - * Rollback PC only if emulation was unsuccessful + * Rollback PC (if in branch delay slot then the PC already points to + * branch target), and pass the RI exception to the guest OS. */ - if (er == EMULATE_FAIL) { - vcpu->arch.pc = curr_pc; - } - return er; + vcpu->arch.pc = curr_pc; + return kvm_mips_emulate_ri_exc(cause, opc, run, vcpu); } enum emulation_result diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index c36cd35e03f..68897fc6595 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -19,10 +19,19 @@ #include <linux/kvm.h> #include <asm/debug.h> #include <asm/cpu.h> +#include <asm/isc.h> #define KVM_MAX_VCPUS 64 #define KVM_USER_MEM_SLOTS 32 +/* + * These seem to be used for allocating ->chip in the routing table, + * which we don't use. 4096 is an out-of-thin-air value. If we need + * to look at ->chip later on, we'll need to revisit this. + */ +#define KVM_NR_IRQCHIPS 1 +#define KVM_IRQCHIP_NUM_PINS 4096 + struct sca_entry { atomic_t scn; __u32 reserved; @@ -244,6 +253,27 @@ struct kvm_vm_stat { struct kvm_arch_memory_slot { }; +struct s390_map_info { + struct list_head list; + __u64 guest_addr; + __u64 addr; + struct page *page; +}; + +struct s390_io_adapter { + unsigned int id; + int isc; + bool maskable; + bool masked; + bool swap; + struct rw_semaphore maps_lock; + struct list_head maps; + atomic_t nr_maps; +}; + +#define MAX_S390_IO_ADAPTERS ((MAX_ISC + 1) * 8) +#define MAX_S390_ADAPTER_MAPS 256 + struct kvm_arch{ struct sca_block *sca; debug_info_t *dbf; @@ -251,6 +281,8 @@ struct kvm_arch{ struct kvm_device *flic; struct gmap *gmap; int css_support; + int use_irqchip; + struct s390_io_adapter *adapters[MAX_S390_IO_ADAPTERS]; }; #define KVM_HVA_ERR_BAD (-1UL) diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h index 2f0ade24f96..c003c6a73b1 100644 --- a/arch/s390/include/uapi/asm/kvm.h +++ b/arch/s390/include/uapi/asm/kvm.h @@ -22,6 +22,8 @@ #define KVM_DEV_FLIC_CLEAR_IRQS 3 #define KVM_DEV_FLIC_APF_ENABLE 4 #define KVM_DEV_FLIC_APF_DISABLE_WAIT 5 +#define KVM_DEV_FLIC_ADAPTER_REGISTER 6 +#define KVM_DEV_FLIC_ADAPTER_MODIFY 7 /* * We can have up to 4*64k pending subchannels + 8 adapter interrupts, * as well as up to ASYNC_PF_PER_VCPU*KVM_MAX_VCPUS pfault done interrupts. @@ -32,6 +34,26 @@ #define KVM_S390_MAX_FLOAT_IRQS 266250 #define KVM_S390_FLIC_MAX_BUFFER 0x2000000 +struct kvm_s390_io_adapter { + __u32 id; + __u8 isc; + __u8 maskable; + __u8 swap; + __u8 pad; +}; + +#define KVM_S390_IO_ADAPTER_MASK 1 +#define KVM_S390_IO_ADAPTER_MAP 2 +#define KVM_S390_IO_ADAPTER_UNMAP 3 + +struct kvm_s390_io_adapter_req { + __u32 id; + __u8 type; + __u8 mask; + __u16 pad0; + __u64 addr; +}; + /* for KVM_GET_REGS and KVM_SET_REGS */ struct kvm_regs { /* general purpose regs for s390 */ diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig index c8bacbcd2e5..10d529ac982 100644 --- a/arch/s390/kvm/Kconfig +++ b/arch/s390/kvm/Kconfig @@ -25,6 +25,8 @@ config KVM select HAVE_KVM_EVENTFD select KVM_ASYNC_PF select KVM_ASYNC_PF_SYNC + select HAVE_KVM_IRQCHIP + select HAVE_KVM_IRQ_ROUTING ---help--- Support hosting paravirtualized guest machines using the SIE virtualization capability on the mainframe. This should work diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile index a47d2c355f6..d3adb37e93a 100644 --- a/arch/s390/kvm/Makefile +++ b/arch/s390/kvm/Makefile @@ -7,7 +7,7 @@ # as published by the Free Software Foundation. KVM := ../../../virt/kvm -common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o $(KVM)/async_pf.o +common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o $(KVM)/async_pf.o $(KVM)/irqchip.o ccflags-y := -Ivirt/kvm -Iarch/s390/kvm diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 05bffd74961..200a8f9390b 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -1,7 +1,7 @@ /* * handling kvm guest interrupts * - * Copyright IBM Corp. 2008 + * Copyright IBM Corp. 2008,2014 * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License (version 2 only) @@ -13,6 +13,7 @@ #include <linux/interrupt.h> #include <linux/kvm_host.h> #include <linux/hrtimer.h> +#include <linux/mmu_context.h> #include <linux/signal.h> #include <linux/slab.h> #include <asm/asm-offsets.h> @@ -1068,6 +1069,171 @@ static int enqueue_floating_irq(struct kvm_device *dev, return r; } +static struct s390_io_adapter *get_io_adapter(struct kvm *kvm, unsigned int id) +{ + if (id >= MAX_S390_IO_ADAPTERS) + return NULL; + return kvm->arch.adapters[id]; +} + +static int register_io_adapter(struct kvm_device *dev, + struct kvm_device_attr *attr) +{ + struct s390_io_adapter *adapter; + struct kvm_s390_io_adapter adapter_info; + + if (copy_from_user(&adapter_info, + (void __user *)attr->addr, sizeof(adapter_info))) + return -EFAULT; + + if ((adapter_info.id >= MAX_S390_IO_ADAPTERS) || + (dev->kvm->arch.adapters[adapter_info.id] != NULL)) + return -EINVAL; + + adapter = kzalloc(sizeof(*adapter), GFP_KERNEL); + if (!adapter) + return -ENOMEM; + + INIT_LIST_HEAD(&adapter->maps); + init_rwsem(&adapter->maps_lock); + atomic_set(&adapter->nr_maps, 0); + adapter->id = adapter_info.id; + adapter->isc = adapter_info.isc; + adapter->maskable = adapter_info.maskable; + adapter->masked = false; + adapter->swap = adapter_info.swap; + dev->kvm->arch.adapters[adapter->id] = adapter; + + return 0; +} + +int kvm_s390_mask_adapter(struct kvm *kvm, unsigned int id, bool masked) +{ + int ret; + struct s390_io_adapter *adapter = get_io_adapter(kvm, id); + + if (!adapter || !adapter->maskable) + return -EINVAL; + ret = adapter->masked; + adapter->masked = masked; + return ret; +} + +static int kvm_s390_adapter_map(struct kvm *kvm, unsigned int id, __u64 addr) +{ + struct s390_io_adapter *adapter = get_io_adapter(kvm, id); + struct s390_map_info *map; + int ret; + + if (!adapter || !addr) + return -EINVAL; + + map = kzalloc(sizeof(*map), GFP_KERNEL); + if (!map) { + ret = -ENOMEM; + goto out; + } + INIT_LIST_HEAD(&map->list); + map->guest_addr = addr; + map->addr = gmap_translate(addr, kvm->arch.gmap); + if (map->addr == -EFAULT) { + ret = -EFAULT; + goto out; + } + ret = get_user_pages_fast(map->addr, 1, 1, &map->page); + if (ret < 0) + goto out; + BUG_ON(ret != 1); + down_write(&adapter->maps_lock); + if (atomic_inc_return(&adapter->nr_maps) < MAX_S390_ADAPTER_MAPS) { + list_add_tail(&map->list, &adapter->maps); + ret = 0; + } else { + put_page(map->page); + ret = -EINVAL; + } + up_write(&adapter->maps_lock); +out: + if (ret) + kfree(map); + return ret; +} + +static int kvm_s390_adapter_unmap(struct kvm *kvm, unsigned int id, __u64 addr) +{ + struct s390_io_adapter *adapter = get_io_adapter(kvm, id); + struct s390_map_info *map, *tmp; + int found = 0; + + if (!adapter || !addr) + return -EINVAL; + + down_write(&adapter->maps_lock); + list_for_each_entry_safe(map, tmp, &adapter->maps, list) { + if (map->guest_addr == addr) { + found = 1; + atomic_dec(&adapter->nr_maps); + list_del(&map->list); + put_page(map->page); + kfree(map); + break; + } + } + up_write(&adapter->maps_lock); + + return found ? 0 : -EINVAL; +} + +void kvm_s390_destroy_adapters(struct kvm *kvm) +{ + int i; + struct s390_map_info *map, *tmp; + + for (i = 0; i < MAX_S390_IO_ADAPTERS; i++) { + if (!kvm->arch.adapters[i]) + continue; + list_for_each_entry_safe(map, tmp, + &kvm->arch.adapters[i]->maps, list) { + list_del(&map->list); + put_page(map->page); + kfree(map); + } + kfree(kvm->arch.adapters[i]); + } +} + +static int modify_io_adapter(struct kvm_device *dev, + struct kvm_device_attr *attr) +{ + struct kvm_s390_io_adapter_req req; + struct s390_io_adapter *adapter; + int ret; + + if (copy_from_user(&req, (void __user *)attr->addr, sizeof(req))) + return -EFAULT; + + adapter = get_io_adapter(dev->kvm, req.id); + if (!adapter) + return -EINVAL; + switch (req.type) { + case KVM_S390_IO_ADAPTER_MASK: + ret = kvm_s390_mask_adapter(dev->kvm, req.id, req.mask); + if (ret > 0) + ret = 0; + break; + case KVM_S390_IO_ADAPTER_MAP: + ret = kvm_s390_adapter_map(dev->kvm, req.id, req.addr); + break; + case KVM_S390_IO_ADAPTER_UNMAP: + ret = kvm_s390_adapter_unmap(dev->kvm, req.id, req.addr); + break; + default: + ret = -EINVAL; + } + + return ret; +} + static int flic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) { int r = 0; @@ -1096,6 +1262,12 @@ static int flic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) kvm_for_each_vcpu(i, vcpu, dev->kvm) kvm_clear_async_pf_completion_queue(vcpu); break; + case KVM_DEV_FLIC_ADAPTER_REGISTER: + r = register_io_adapter(dev, attr); + break; + case KVM_DEV_FLIC_ADAPTER_MODIFY: + r = modify_io_adapter(dev, attr); + break; default: r = -EINVAL; } @@ -1127,3 +1299,123 @@ struct kvm_device_ops kvm_flic_ops = { .create = flic_create, .destroy = flic_destroy, }; + +static unsigned long get_ind_bit(__u64 addr, unsigned long bit_nr, bool swap) +{ + unsigned long bit; + + bit = bit_nr + (addr % PAGE_SIZE) * 8; + + return swap ? (bit ^ (BITS_PER_LONG - 1)) : bit; +} + +static struct s390_map_info *get_map_info(struct s390_io_adapter *adapter, + u64 addr) +{ + struct s390_map_info *map; + + if (!adapter) + return NULL; + + list_for_each_entry(map, &adapter->maps, list) { + if (map->guest_addr == addr) + return map; + } + return NULL; +} + +static int adapter_indicators_set(struct kvm *kvm, + struct s390_io_adapter *adapter, + struct kvm_s390_adapter_int *adapter_int) +{ + unsigned long bit; + int summary_set, idx; + struct s390_map_info *info; + void *map; + + info = get_map_info(adapter, adapter_int->ind_addr); + if (!info) + return -1; + map = page_address(info->page); + bit = get_ind_bit(info->addr, adapter_int->ind_offset, adapter->swap); + set_bit(bit, map); + idx = srcu_read_lock(&kvm->srcu); + mark_page_dirty(kvm, info->guest_addr >> PAGE_SHIFT); + set_page_dirty_lock(info->page); + info = get_map_info(adapter, adapter_int->summary_addr); + if (!info) { + srcu_read_unlock(&kvm->srcu, idx); + return -1; + } + map = page_address(info->page); + bit = get_ind_bit(info->addr, adapter_int->summary_offset, + adapter->swap); + summary_set = test_and_set_bit(bit, map); + mark_page_dirty(kvm, info->guest_addr >> PAGE_SHIFT); + set_page_dirty_lock(info->page); + srcu_read_unlock(&kvm->srcu, idx); + return summary_set ? 0 : 1; +} + +/* + * < 0 - not injected due to error + * = 0 - coalesced, summary indicator already active + * > 0 - injected interrupt + */ +static int set_adapter_int(struct kvm_kernel_irq_routing_entry *e, + struct kvm *kvm, int irq_source_id, int level, + bool line_status) +{ + int ret; + struct s390_io_adapter *adapter; + + /* We're only interested in the 0->1 transition. */ + if (!level) + return 0; + adapter = get_io_adapter(kvm, e->adapter.adapter_id); + if (!adapter) + return -1; + down_read(&adapter->maps_lock); + ret = adapter_indicators_set(kvm, adapter, &e->adapter); + up_read(&adapter->maps_lock); + if ((ret > 0) && !adapter->masked) { + struct kvm_s390_interrupt s390int = { + .type = KVM_S390_INT_IO(1, 0, 0, 0), + .parm = 0, + .parm64 = (adapter->isc << 27) | 0x80000000, + }; + ret = kvm_s390_inject_vm(kvm, &s390int); + if (ret == 0) + ret = 1; + } + return ret; +} + +int kvm_set_routing_entry(struct kvm_irq_routing_table *rt, + struct kvm_kernel_irq_routing_entry *e, + const struct kvm_irq_routing_entry *ue) +{ + int ret; + + switch (ue->type) { + case KVM_IRQ_ROUTING_S390_ADAPTER: + e->set = set_adapter_int; + e->adapter.summary_addr = ue->u.adapter.summary_addr; + e->adapter.ind_addr = ue->u.adapter.ind_addr; + e->adapter.summary_offset = ue->u.adapter.summary_offset; + e->adapter.ind_offset = ue->u.adapter.ind_offset; + e->adapter.adapter_id = ue->u.adapter.adapter_id; + ret = 0; + break; + default: + ret = -EINVAL; + } + + return ret; +} + +int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, struct kvm *kvm, + int irq_source_id, int level, bool line_status) +{ + return -EINVAL; +} diff --git a/arch/s390/kvm/irq.h b/arch/s390/kvm/irq.h new file mode 100644 index 00000000000..d98e4159643 --- /dev/null +++ b/arch/s390/kvm/irq.h @@ -0,0 +1,22 @@ +/* + * s390 irqchip routines + * + * Copyright IBM Corp. 2014 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + * + * Author(s): Cornelia Huck <cornelia.huck@de.ibm.com> + */ +#ifndef __KVM_IRQ_H +#define __KVM_IRQ_H + +#include <linux/kvm_host.h> + +static inline int irqchip_in_kernel(struct kvm *kvm) +{ + return 1; +} + +#endif diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 83b79447de5..6e1b990e427 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -159,6 +159,7 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_S390_CSS_SUPPORT: case KVM_CAP_IOEVENTFD: case KVM_CAP_DEVICE_CTRL: + case KVM_CAP_ENABLE_CAP_VM: r = 1; break; case KVM_CAP_NR_VCPUS: @@ -187,6 +188,25 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, return 0; } +static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) +{ + int r; + + if (cap->flags) + return -EINVAL; + + switch (cap->cap) { + case KVM_CAP_S390_IRQCHIP: + kvm->arch.use_irqchip = 1; + r = 0; + break; + default: + r = -EINVAL; + break; + } + return r; +} + long kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { @@ -204,6 +224,26 @@ long kvm_arch_vm_ioctl(struct file *filp, r = kvm_s390_inject_vm(kvm, &s390int); break; } + case KVM_ENABLE_CAP: { + struct kvm_enable_cap cap; + r = -EFAULT; + if (copy_from_user(&cap, argp, sizeof(cap))) + break; + r = kvm_vm_ioctl_enable_cap(kvm, &cap); + break; + } + case KVM_CREATE_IRQCHIP: { + struct kvm_irq_routing_entry routing; + + r = -EINVAL; + if (kvm->arch.use_irqchip) { + /* Set up dummy routing. */ + memset(&routing, 0, sizeof(routing)); + kvm_set_irq_routing(kvm, &routing, 0, 0); + r = 0; + } + break; + } default: r = -ENOTTY; } @@ -265,6 +305,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) } kvm->arch.css_support = 0; + kvm->arch.use_irqchip = 0; return 0; out_nogmap: @@ -324,6 +365,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm) debug_unregister(kvm->arch.dbf); if (!kvm_is_ucontrol(kvm)) gmap_free(kvm->arch.gmap); + kvm_s390_destroy_adapters(kvm); } /* Section: vcpu related */ diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index 6311170843b..660e79f8f8e 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -137,6 +137,7 @@ int __must_check kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, int __must_check kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code); struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm, u64 cr6, u64 schid); +int kvm_s390_mask_adapter(struct kvm *kvm, unsigned int id, bool masked); /* implemented in priv.c */ int kvm_s390_handle_b2(struct kvm_vcpu *vcpu); @@ -163,5 +164,6 @@ int kvm_s390_handle_diag(struct kvm_vcpu *vcpu); /* implemented in interrupt.c */ int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu); int psw_extint_disabled(struct kvm_vcpu *vcpu); +void kvm_s390_destroy_adapters(struct kvm *kvm); #endif diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index ddc8a7e165d..64fae65730f 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -43,6 +43,16 @@ static u32 xstate_required_size(u64 xstate_bv) return ret; } +u64 kvm_supported_xcr0(void) +{ + u64 xcr0 = KVM_SUPPORTED_XCR0 & host_xcr0; + + if (!kvm_x86_ops->mpx_supported()) + xcr0 &= ~(XSTATE_BNDREGS | XSTATE_BNDCSR); + + return xcr0; +} + void kvm_update_cpuid(struct kvm_vcpu *vcpu) { struct kvm_cpuid_entry2 *best; @@ -73,7 +83,7 @@ void kvm_update_cpuid(struct kvm_vcpu *vcpu) } else { vcpu->arch.guest_supported_xcr0 = (best->eax | ((u64)best->edx << 32)) & - host_xcr0 & KVM_SUPPORTED_XCR0; + kvm_supported_xcr0(); vcpu->arch.guest_xstate_size = best->ebx = xstate_required_size(vcpu->arch.xcr0); } @@ -210,13 +220,6 @@ static void do_cpuid_1_ent(struct kvm_cpuid_entry2 *entry, u32 function, entry->flags = 0; } -static bool supported_xcr0_bit(unsigned bit) -{ - u64 mask = ((u64)1 << bit); - - return mask & KVM_SUPPORTED_XCR0 & host_xcr0; -} - #define F(x) bit(X86_FEATURE_##x) static int __do_cpuid_ent_emulated(struct kvm_cpuid_entry2 *entry, @@ -256,8 +259,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, #endif unsigned f_rdtscp = kvm_x86_ops->rdtscp_supported() ? F(RDTSCP) : 0; unsigned f_invpcid = kvm_x86_ops->invpcid_supported() ? F(INVPCID) : 0; - unsigned f_mpx = kvm_x86_ops->mpx_supported ? - (kvm_x86_ops->mpx_supported() ? F(MPX) : 0) : 0; + unsigned f_mpx = kvm_x86_ops->mpx_supported() ? F(MPX) : 0; /* cpuid 1.edx */ const u32 kvm_supported_word0_x86_features = @@ -439,16 +441,18 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, } case 0xd: { int idx, i; + u64 supported = kvm_supported_xcr0(); - entry->eax &= host_xcr0 & KVM_SUPPORTED_XCR0; - entry->edx &= (host_xcr0 & KVM_SUPPORTED_XCR0) >> 32; + entry->eax &= supported; + entry->edx &= supported >> 32; entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; for (idx = 1, i = 1; idx < 64; ++idx) { + u64 mask = ((u64)1 << idx); if (*nent >= maxnent) goto out; do_cpuid_1_ent(&entry[i], function, idx); - if (entry[i].eax == 0 || !supported_xcr0_bit(idx)) + if (entry[i].eax == 0 || !(supported & mask)) continue; entry[i].flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index a449c3d76cb..2136cb6ab13 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -4089,6 +4089,11 @@ static bool svm_invpcid_supported(void) return false; } +static bool svm_mpx_supported(void) +{ + return false; +} + static bool svm_has_wbinvd_exit(void) { return true; @@ -4371,6 +4376,7 @@ static struct kvm_x86_ops svm_x86_ops = { .rdtscp_supported = svm_rdtscp_supported, .invpcid_supported = svm_invpcid_supported, + .mpx_supported = svm_mpx_supported, .set_supported_cpuid = svm_set_supported_cpuid, diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index f4e5aeda5ed..1320e0f8e61 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -206,6 +206,7 @@ struct __packed vmcs12 { u64 guest_pdptr1; u64 guest_pdptr2; u64 guest_pdptr3; + u64 guest_bndcfgs; u64 host_ia32_pat; u64 host_ia32_efer; u64 host_ia32_perf_global_ctrl; @@ -541,6 +542,7 @@ static const unsigned long shadow_read_write_fields[] = { GUEST_CS_LIMIT, GUEST_CS_BASE, GUEST_ES_BASE, + GUEST_BNDCFGS, CR0_GUEST_HOST_MASK, CR0_READ_SHADOW, CR4_READ_SHADOW, @@ -596,6 +598,7 @@ static const unsigned short vmcs_field_to_offset_table[] = { FIELD64(GUEST_PDPTR1, guest_pdptr1), FIELD64(GUEST_PDPTR2, guest_pdptr2), FIELD64(GUEST_PDPTR3, guest_pdptr3), + FIELD64(GUEST_BNDCFGS, guest_bndcfgs), FIELD64(HOST_IA32_PAT, host_ia32_pat), FIELD64(HOST_IA32_EFER, host_ia32_efer), FIELD64(HOST_IA32_PERF_GLOBAL_CTRL, host_ia32_perf_global_ctrl), @@ -726,6 +729,7 @@ static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu); static u64 construct_eptp(unsigned long root_hpa); static void kvm_cpu_vmxon(u64 addr); static void kvm_cpu_vmxoff(void); +static bool vmx_mpx_supported(void); static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr); static void vmx_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); @@ -736,6 +740,7 @@ static u32 vmx_segment_access_rights(struct kvm_segment *var); static void vmx_sync_pir_to_irr_dummy(struct kvm_vcpu *vcpu); static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx); static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx); +static bool vmx_mpx_supported(void); static DEFINE_PER_CPU(struct vmcs *, vmxarea); static DEFINE_PER_CPU(struct vmcs *, current_vmcs); @@ -2287,6 +2292,8 @@ static __init void nested_vmx_setup_ctls_msrs(void) nested_vmx_exit_ctls_high |= VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR | VM_EXIT_LOAD_IA32_EFER | VM_EXIT_SAVE_IA32_EFER | VM_EXIT_SAVE_VMX_PREEMPTION_TIMER; + if (vmx_mpx_supported()) + nested_vmx_exit_ctls_high |= VM_EXIT_CLEAR_BNDCFGS; /* entry controls */ rdmsr(MSR_IA32_VMX_ENTRY_CTLS, @@ -2300,6 +2307,8 @@ static __init void nested_vmx_setup_ctls_msrs(void) VM_ENTRY_LOAD_IA32_PAT; nested_vmx_entry_ctls_high |= (VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR | VM_ENTRY_LOAD_IA32_EFER); + if (vmx_mpx_supported()) + nested_vmx_entry_ctls_high |= VM_ENTRY_LOAD_BNDCFGS; /* cpu-based controls */ rdmsr(MSR_IA32_VMX_PROCBASED_CTLS, @@ -2493,6 +2502,8 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) data = vmcs_readl(GUEST_SYSENTER_ESP); break; case MSR_IA32_BNDCFGS: + if (!vmx_mpx_supported()) + return 1; data = vmcs_read64(GUEST_BNDCFGS); break; case MSR_IA32_FEATURE_CONTROL: @@ -2564,6 +2575,8 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) vmcs_writel(GUEST_SYSENTER_ESP, data); break; case MSR_IA32_BNDCFGS: + if (!vmx_mpx_supported()) + return 1; vmcs_write64(GUEST_BNDCFGS, data); break; case MSR_IA32_TSC: @@ -7866,6 +7879,9 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) set_cr4_guest_host_mask(vmx); + if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS) + vmcs_write64(GUEST_BNDCFGS, vmcs12->guest_bndcfgs); + if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING) vmcs_write64(TSC_OFFSET, vmx->nested.vmcs01_tsc_offset + vmcs12->tsc_offset); @@ -8351,6 +8367,8 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, vmcs12->guest_sysenter_cs = vmcs_read32(GUEST_SYSENTER_CS); vmcs12->guest_sysenter_esp = vmcs_readl(GUEST_SYSENTER_ESP); vmcs12->guest_sysenter_eip = vmcs_readl(GUEST_SYSENTER_EIP); + if (vmx_mpx_supported()) + vmcs12->guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS); /* update exit information fields: */ @@ -8460,6 +8478,10 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, vmcs_writel(GUEST_IDTR_BASE, vmcs12->host_idtr_base); vmcs_writel(GUEST_GDTR_BASE, vmcs12->host_gdtr_base); + /* If not VM_EXIT_CLEAR_BNDCFGS, the L2 value propagates to L1. */ + if (vmcs12->vm_exit_controls & VM_EXIT_CLEAR_BNDCFGS) + vmcs_write64(GUEST_BNDCFGS, 0); + if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PAT) { vmcs_write64(GUEST_IA32_PAT, vmcs12->host_ia32_pat); vcpu->arch.pat = vmcs12->host_ia32_pat; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index a37da6b0165..aa986959f23 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3084,9 +3084,7 @@ static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu, * CPUID leaf 0xD, index 0, EDX:EAX. This is for compatibility * with old userspace. */ - if (xstate_bv & ~KVM_SUPPORTED_XCR0) - return -EINVAL; - if (xstate_bv & ~host_xcr0) + if (xstate_bv & ~kvm_supported_xcr0()) return -EINVAL; memcpy(&vcpu->arch.guest_fpu.state->xsave, guest_xsave->region, vcpu->arch.guest_xstate_size); @@ -3939,6 +3937,23 @@ static void kvm_init_msr_list(void) for (i = j = KVM_SAVE_MSRS_BEGIN; i < ARRAY_SIZE(msrs_to_save); i++) { if (rdmsr_safe(msrs_to_save[i], &dummy[0], &dummy[1]) < 0) continue; + + /* + * Even MSRs that are valid in the host may not be exposed + * to the guests in some cases. We could work around this + * in VMX with the generic MSR save/load machinery, but it + * is not really worthwhile since it will really only + * happen with nested virtualization. + */ + switch (msrs_to_save[i]) { + case MSR_IA32_BNDCFGS: + if (!kvm_x86_ops->mpx_supported()) + continue; + break; + default: + break; + } + if (j < i) msrs_to_save[j] = msrs_to_save[i]; j++; diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 392ecbff003..8c97bac9a89 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -126,6 +126,8 @@ int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt, | XSTATE_BNDREGS | XSTATE_BNDCSR) extern u64 host_xcr0; +extern u64 kvm_supported_xcr0(void); + extern unsigned int min_timer_period_us; extern struct static_key kvm_no_apic_vcpu; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 9816b68b085..7d21cf9f438 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -297,6 +297,14 @@ static inline unsigned long kvm_dirty_bitmap_bytes(struct kvm_memory_slot *memsl return ALIGN(memslot->npages, BITS_PER_LONG) / 8; } +struct kvm_s390_adapter_int { + u64 ind_addr; + u64 summary_addr; + u64 ind_offset; + u32 summary_offset; + u32 adapter_id; +}; + struct kvm_kernel_irq_routing_entry { u32 gsi; u32 type; @@ -309,6 +317,7 @@ struct kvm_kernel_irq_routing_entry { unsigned pin; } irqchip; struct msi_msg msi; + struct kvm_s390_adapter_int adapter; }; struct hlist_node link; }; @@ -913,7 +922,11 @@ static inline int mmu_notifier_retry(struct kvm *kvm, unsigned long mmu_seq) #ifdef CONFIG_HAVE_KVM_IRQ_ROUTING +#ifdef CONFIG_S390 +#define KVM_MAX_IRQ_ROUTES 4096 //FIXME: we can have more than that... +#else #define KVM_MAX_IRQ_ROUTES 1024 +#endif int kvm_setup_default_irq_routing(struct kvm *kvm); int kvm_set_irq_routing(struct kvm *kvm, diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index a7518be31d5..a8f4ee5d2e8 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -741,6 +741,8 @@ struct kvm_ppc_smmu_info { #define KVM_CAP_EXT_EMUL_CPUID 95 #define KVM_CAP_HYPERV_TIME 96 #define KVM_CAP_IOAPIC_POLARITY_IGNORED 97 +#define KVM_CAP_ENABLE_CAP_VM 98 +#define KVM_CAP_S390_IRQCHIP 99 #ifdef KVM_CAP_IRQ_ROUTING @@ -756,9 +758,18 @@ struct kvm_irq_routing_msi { __u32 pad; }; +struct kvm_irq_routing_s390_adapter { + __u64 ind_addr; + __u64 summary_addr; + __u64 ind_offset; + __u32 summary_offset; + __u32 adapter_id; +}; + /* gsi routing entry types */ #define KVM_IRQ_ROUTING_IRQCHIP 1 #define KVM_IRQ_ROUTING_MSI 2 +#define KVM_IRQ_ROUTING_S390_ADAPTER 3 struct kvm_irq_routing_entry { __u32 gsi; @@ -768,6 +779,7 @@ struct kvm_irq_routing_entry { union { struct kvm_irq_routing_irqchip irqchip; struct kvm_irq_routing_msi msi; + struct kvm_irq_routing_s390_adapter adapter; __u32 pad[8]; } u; }; @@ -1076,6 +1088,10 @@ struct kvm_s390_ucas_mapping { /* Available with KVM_CAP_DEBUGREGS */ #define KVM_GET_DEBUGREGS _IOR(KVMIO, 0xa1, struct kvm_debugregs) #define KVM_SET_DEBUGREGS _IOW(KVMIO, 0xa2, struct kvm_debugregs) +/* + * vcpu version available with KVM_ENABLE_CAP + * vm version available with KVM_CAP_ENABLE_CAP_VM + */ #define KVM_ENABLE_CAP _IOW(KVMIO, 0xa3, struct kvm_enable_cap) /* Available with KVM_CAP_XSAVE */ #define KVM_GET_XSAVE _IOR(KVMIO, 0xa4, struct kvm_xsave) diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index abe4d6043b3..29c2a04e036 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -391,19 +391,19 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) lockdep_is_held(&kvm->irqfds.lock)); irqfd_update(kvm, irqfd, irq_rt); - events = f.file->f_op->poll(f.file, &irqfd->pt); - list_add_tail(&irqfd->list, &kvm->irqfds.items); + spin_unlock_irq(&kvm->irqfds.lock); + /* * Check if there was an event already pending on the eventfd * before we registered, and trigger it as if we didn't miss it. */ + events = f.file->f_op->poll(f.file, &irqfd->pt); + if (events & POLLIN) schedule_work(&irqfd->inject); - spin_unlock_irq(&kvm->irqfds.lock); - /* * do not drop the file until the irqfd is fully initialized, otherwise * we might race against the POLLHUP diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c index 1539d3757a0..d4b601547f1 100644 --- a/virt/kvm/ioapic.c +++ b/virt/kvm/ioapic.c @@ -50,7 +50,7 @@ #else #define ioapic_debug(fmt, arg...) #endif -static int ioapic_deliver(struct kvm_ioapic *vioapic, int irq, +static int ioapic_service(struct kvm_ioapic *vioapic, int irq, bool line_status); static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic, @@ -163,23 +163,67 @@ static bool rtc_irq_check_coalesced(struct kvm_ioapic *ioapic) return false; } -static int ioapic_service(struct kvm_ioapic *ioapic, unsigned int idx, - bool line_status) +static int ioapic_set_irq(struct kvm_ioapic *ioapic, unsigned int irq, + int irq_level, bool line_status) { - union kvm_ioapic_redirect_entry *pent; - int injected = -1; + union kvm_ioapic_redirect_entry entry; + u32 mask = 1 << irq; + u32 old_irr; + int edge, ret; - pent = &ioapic->redirtbl[idx]; + entry = ioapic->redirtbl[irq]; + edge = (entry.fields.trig_mode == IOAPIC_EDGE_TRIG); - if (!pent->fields.mask) { - injected = ioapic_deliver(ioapic, idx, line_status); - if (injected && pent->fields.trig_mode == IOAPIC_LEVEL_TRIG) - pent->fields.remote_irr = 1; + if (!irq_level) { + ioapic->irr &= ~mask; + ret = 1; + goto out; + } + + /* + * Return 0 for coalesced interrupts; for edge-triggered interrupts, + * this only happens if a previous edge has not been delivered due + * do masking. For level interrupts, the remote_irr field tells + * us if the interrupt is waiting for an EOI. + * + * RTC is special: it is edge-triggered, but userspace likes to know + * if it has been already ack-ed via EOI because coalesced RTC + * interrupts lead to time drift in Windows guests. So we track + * EOI manually for the RTC interrupt. + */ + if (irq == RTC_GSI && line_status && + rtc_irq_check_coalesced(ioapic)) { + ret = 0; + goto out; } - return injected; + old_irr = ioapic->irr; + ioapic->irr |= mask; + if ((edge && old_irr == ioapic->irr) || + (!edge && entry.fields.remote_irr)) { + ret = 0; + goto out; + } + + ret = ioapic_service(ioapic, irq, line_status); + +out: + trace_kvm_ioapic_set_irq(entry.bits, irq, ret == 0); + return ret; +} + +static void kvm_ioapic_inject_all(struct kvm_ioapic *ioapic, unsigned long irr) +{ + u32 idx; + + rtc_irq_eoi_tracking_reset(ioapic); + for_each_set_bit(idx, &irr, IOAPIC_NUM_PINS) + ioapic_set_irq(ioapic, idx, 1, true); + + kvm_rtc_eoi_tracking_restore_all(ioapic); } + static void update_handled_vectors(struct kvm_ioapic *ioapic) { DECLARE_BITMAP(handled_vectors, 256); @@ -282,12 +326,15 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) } } -static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq, bool line_status) +static int ioapic_service(struct kvm_ioapic *ioapic, int irq, bool line_status) { union kvm_ioapic_redirect_entry *entry = &ioapic->redirtbl[irq]; struct kvm_lapic_irq irqe; int ret; + if (entry->fields.mask) + return -1; + ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x " "vector=%x trig_mode=%x\n", entry->fields.dest_id, entry->fields.dest_mode, @@ -302,6 +349,9 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq, bool line_status) irqe.level = 1; irqe.shorthand = 0; + if (irqe.trig_mode == IOAPIC_EDGE_TRIG) + ioapic->irr &= ~(1 << irq); + if (irq == RTC_GSI && line_status) { BUG_ON(ioapic->rtc_status.pending_eoi != 0); ret = kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe, @@ -310,44 +360,24 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq, bool line_status) } else ret = kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe, NULL); + if (ret && irqe.trig_mode == IOAPIC_LEVEL_TRIG) + entry->fields.remote_irr = 1; + return ret; } int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id, int level, bool line_status) { - u32 old_irr; - u32 mask = 1 << irq; - union kvm_ioapic_redirect_entry entry; int ret, irq_level; BUG_ON(irq < 0 || irq >= IOAPIC_NUM_PINS); spin_lock(&ioapic->lock); - old_irr = ioapic->irr; irq_level = __kvm_irq_line_state(&ioapic->irq_states[irq], irq_source_id, level); - entry = ioapic->redirtbl[irq]; - if (!irq_level) { - ioapic->irr &= ~mask; - ret = 1; - } else { - int edge = (entry.fields.trig_mode == IOAPIC_EDGE_TRIG); + ret = ioapic_set_irq(ioapic, irq, irq_level, line_status); - if (irq == RTC_GSI && line_status && - rtc_irq_check_coalesced(ioapic)) { - ret = 0; /* coalesced */ - goto out; - } - ioapic->irr |= mask; - if ((edge && old_irr != ioapic->irr) || - (!edge && !entry.fields.remote_irr)) - ret = ioapic_service(ioapic, irq, line_status); - else - ret = 0; /* report coalesced interrupt */ - } -out: - trace_kvm_ioapic_set_irq(entry.bits, irq, ret == 0); spin_unlock(&ioapic->lock); return ret; @@ -393,7 +423,7 @@ static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG); ent->fields.remote_irr = 0; - if (!ent->fields.mask && (ioapic->irr & (1 << i))) + if (ioapic->irr & (1 << i)) ioapic_service(ioapic, i, false); } } @@ -594,9 +624,10 @@ int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state) spin_lock(&ioapic->lock); memcpy(ioapic, state, sizeof(struct kvm_ioapic_state)); + ioapic->irr = 0; update_handled_vectors(ioapic); kvm_vcpu_request_scan_ioapic(kvm); - kvm_rtc_eoi_tracking_restore_all(ioapic); + kvm_ioapic_inject_all(ioapic, state->irr); spin_unlock(&ioapic->lock); return 0; } |