From 344d9588a9df06182684168be4f1408b55c7da3e Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Thu, 14 Oct 2010 11:22:50 +0200 Subject: KVM: Add PV MSR to enable asynchronous page faults delivery. Guest enables async PF vcpu functionality using this MSR. Reviewed-by: Rik van Riel Signed-off-by: Gleb Natapov Signed-off-by: Marcelo Tosatti --- Documentation/kvm/cpuid.txt | 3 +++ Documentation/kvm/msr.txt | 35 ++++++++++++++++++++++++++++++++++- 2 files changed, 37 insertions(+), 1 deletion(-) (limited to 'Documentation/kvm') diff --git a/Documentation/kvm/cpuid.txt b/Documentation/kvm/cpuid.txt index 14a12ea92b7..882068538c9 100644 --- a/Documentation/kvm/cpuid.txt +++ b/Documentation/kvm/cpuid.txt @@ -36,6 +36,9 @@ KVM_FEATURE_MMU_OP || 2 || deprecated. KVM_FEATURE_CLOCKSOURCE2 || 3 || kvmclock available at msrs || || 0x4b564d00 and 0x4b564d01 ------------------------------------------------------------------------------ +KVM_FEATURE_ASYNC_PF || 4 || async pf can be enabled by + || || writing to msr 0x4b564d02 +------------------------------------------------------------------------------ KVM_FEATURE_CLOCKSOURCE_STABLE_BIT || 24 || host will warn if no guest-side || || per-cpu warps are expected in || || kvmclock. diff --git a/Documentation/kvm/msr.txt b/Documentation/kvm/msr.txt index 8ddcfe84c09..e67b4a8783d 100644 --- a/Documentation/kvm/msr.txt +++ b/Documentation/kvm/msr.txt @@ -3,7 +3,6 @@ Glauber Costa , Red Hat Inc, 2010 ===================================================== KVM makes use of some custom MSRs to service some requests. -At present, this facility is only used by kvmclock. Custom MSRs have a range reserved for them, that goes from 0x4b564d00 to 0x4b564dff. There are MSRs outside this area, @@ -151,3 +150,37 @@ MSR_KVM_SYSTEM_TIME: 0x12 return PRESENT; } else return NON_PRESENT; + +MSR_KVM_ASYNC_PF_EN: 0x4b564d02 + data: Bits 63-6 hold 64-byte aligned physical address of a + 64 byte memory area which must be in guest RAM and must be + zeroed. Bits 5-1 are reserved and should be zero. Bit 0 is 1 + when asynchronous page faults are enabled on the vcpu 0 when + disabled. + + First 4 byte of 64 byte memory location will be written to by + the hypervisor at the time of asynchronous page fault (APF) + injection to indicate type of asynchronous page fault. Value + of 1 means that the page referred to by the page fault is not + present. Value 2 means that the page is now available. Disabling + interrupt inhibits APFs. Guest must not enable interrupt + before the reason is read, or it may be overwritten by another + APF. Since APF uses the same exception vector as regular page + fault guest must reset the reason to 0 before it does + something that can generate normal page fault. If during page + fault APF reason is 0 it means that this is regular page + fault. + + During delivery of type 1 APF cr2 contains a token that will + be used to notify a guest when missing page becomes + available. When page becomes available type 2 APF is sent with + cr2 set to the token associated with the page. There is special + kind of token 0xffffffff which tells vcpu that it should wake + up all processes waiting for APFs and no individual type 2 APFs + will be sent. + + If APF is disabled while there are outstanding APFs, they will + not be delivered. + + Currently type 2 APF will be always delivered on the same vcpu as + type 1 was, but guest should not rely on that. -- cgit v1.2.3-70-g09d2 From 6adba527420651b6cacaf392541c09fb108711a2 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Thu, 14 Oct 2010 11:22:55 +0200 Subject: KVM: Let host know whether the guest can handle async PF in non-userspace context. If guest can detect that it runs in non-preemptable context it can handle async PFs at any time, so let host know that it can send async PF even if guest cpu is not in userspace. Acked-by: Rik van Riel Signed-off-by: Gleb Natapov Signed-off-by: Marcelo Tosatti --- Documentation/kvm/msr.txt | 5 +++-- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/include/asm/kvm_para.h | 1 + arch/x86/kernel/kvm.c | 3 +++ arch/x86/kvm/x86.c | 5 +++-- 5 files changed, 11 insertions(+), 4 deletions(-) (limited to 'Documentation/kvm') diff --git a/Documentation/kvm/msr.txt b/Documentation/kvm/msr.txt index e67b4a8783d..d079aed27e0 100644 --- a/Documentation/kvm/msr.txt +++ b/Documentation/kvm/msr.txt @@ -154,9 +154,10 @@ MSR_KVM_SYSTEM_TIME: 0x12 MSR_KVM_ASYNC_PF_EN: 0x4b564d02 data: Bits 63-6 hold 64-byte aligned physical address of a 64 byte memory area which must be in guest RAM and must be - zeroed. Bits 5-1 are reserved and should be zero. Bit 0 is 1 + zeroed. Bits 5-2 are reserved and should be zero. Bit 0 is 1 when asynchronous page faults are enabled on the vcpu 0 when - disabled. + disabled. Bit 2 is 1 if asynchronous page faults can be injected + when vcpu is in cpl == 0. First 4 byte of 64 byte memory location will be written to by the hypervisor at the time of asynchronous page fault (APF) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 167375cc49f..b2ea42870e4 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -422,6 +422,7 @@ struct kvm_vcpu_arch { struct gfn_to_hva_cache data; u64 msr_val; u32 id; + bool send_user_only; } apf; }; diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h index fbfd3679bc1..d3a1a4805ab 100644 --- a/arch/x86/include/asm/kvm_para.h +++ b/arch/x86/include/asm/kvm_para.h @@ -38,6 +38,7 @@ #define KVM_MAX_MMU_OP_BATCH 32 #define KVM_ASYNC_PF_ENABLED (1 << 0) +#define KVM_ASYNC_PF_SEND_ALWAYS (1 << 1) /* Operations for KVM_HC_MMU_OP */ #define KVM_MMU_OP_WRITE_PTE 1 diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 47ea93e6b0d..91b3d650898 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -449,6 +449,9 @@ void __cpuinit kvm_guest_cpu_init(void) if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF) && kvmapf) { u64 pa = __pa(&__get_cpu_var(apf_reason)); +#ifdef CONFIG_PREEMPT + pa |= KVM_ASYNC_PF_SEND_ALWAYS; +#endif wrmsrl(MSR_KVM_ASYNC_PF_EN, pa | KVM_ASYNC_PF_ENABLED); __get_cpu_var(apf_reason).enabled = 1; printk(KERN_INFO"KVM setup async PF for cpu %d\n", diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index ac4c368afd4..fff70b50725 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1429,8 +1429,8 @@ static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data) { gpa_t gpa = data & ~0x3f; - /* Bits 1:5 are resrved, Should be zero */ - if (data & 0x3e) + /* Bits 2:5 are resrved, Should be zero */ + if (data & 0x3c) return 1; vcpu->arch.apf.msr_val = data; @@ -1444,6 +1444,7 @@ static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data) if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.apf.data, gpa)) return 1; + vcpu->arch.apf.send_user_only = !(data & KVM_ASYNC_PF_SEND_ALWAYS); kvm_async_pf_wakeup_all(vcpu); return 0; } -- cgit v1.2.3-70-g09d2 From 49f481720438bbd0138218b0bcb494c3512a454f Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Tue, 16 Nov 2010 22:30:07 +0100 Subject: KVM: Document device assigment API Adds API documentation for KVM_[DE]ASSIGN_PCI_DEVICE, KVM_[DE]ASSIGN_DEV_IRQ, KVM_SET_GSI_ROUTING, KVM_ASSIGN_SET_MSIX_NR, and KVM_ASSIGN_SET_MSIX_ENTRY. Acked-by: Alex Williamson Acked-by: Michael S. Tsirkin Signed-off-by: Jan Kiszka Signed-off-by: Marcelo Tosatti --- Documentation/kvm/api.txt | 178 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 178 insertions(+) (limited to 'Documentation/kvm') diff --git a/Documentation/kvm/api.txt b/Documentation/kvm/api.txt index b336266bea5..e1a92977833 100644 --- a/Documentation/kvm/api.txt +++ b/Documentation/kvm/api.txt @@ -1085,6 +1085,184 @@ of 4 instructions that make up a hypercall. If any additional field gets added to this structure later on, a bit for that additional piece of information will be set in the flags bitmap. +4.47 KVM_ASSIGN_PCI_DEVICE + +Capability: KVM_CAP_DEVICE_ASSIGNMENT +Architectures: x86 ia64 +Type: vm ioctl +Parameters: struct kvm_assigned_pci_dev (in) +Returns: 0 on success, -1 on error + +Assigns a host PCI device to the VM. + +struct kvm_assigned_pci_dev { + __u32 assigned_dev_id; + __u32 busnr; + __u32 devfn; + __u32 flags; + __u32 segnr; + union { + __u32 reserved[11]; + }; +}; + +The PCI device is specified by the triple segnr, busnr, and devfn. +Identification in succeeding service requests is done via assigned_dev_id. The +following flags are specified: + +/* Depends on KVM_CAP_IOMMU */ +#define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) + +4.48 KVM_DEASSIGN_PCI_DEVICE + +Capability: KVM_CAP_DEVICE_DEASSIGNMENT +Architectures: x86 ia64 +Type: vm ioctl +Parameters: struct kvm_assigned_pci_dev (in) +Returns: 0 on success, -1 on error + +Ends PCI device assignment, releasing all associated resources. + +See KVM_CAP_DEVICE_ASSIGNMENT for the data structure. Only assigned_dev_id is +used in kvm_assigned_pci_dev to identify the device. + +4.49 KVM_ASSIGN_DEV_IRQ + +Capability: KVM_CAP_ASSIGN_DEV_IRQ +Architectures: x86 ia64 +Type: vm ioctl +Parameters: struct kvm_assigned_irq (in) +Returns: 0 on success, -1 on error + +Assigns an IRQ to a passed-through device. + +struct kvm_assigned_irq { + __u32 assigned_dev_id; + __u32 host_irq; + __u32 guest_irq; + __u32 flags; + union { + struct { + __u32 addr_lo; + __u32 addr_hi; + __u32 data; + } guest_msi; + __u32 reserved[12]; + }; +}; + +The following flags are defined: + +#define KVM_DEV_IRQ_HOST_INTX (1 << 0) +#define KVM_DEV_IRQ_HOST_MSI (1 << 1) +#define KVM_DEV_IRQ_HOST_MSIX (1 << 2) + +#define KVM_DEV_IRQ_GUEST_INTX (1 << 8) +#define KVM_DEV_IRQ_GUEST_MSI (1 << 9) +#define KVM_DEV_IRQ_GUEST_MSIX (1 << 10) + +It is not valid to specify multiple types per host or guest IRQ. However, the +IRQ type of host and guest can differ or can even be null. + +4.50 KVM_DEASSIGN_DEV_IRQ + +Capability: KVM_CAP_ASSIGN_DEV_IRQ +Architectures: x86 ia64 +Type: vm ioctl +Parameters: struct kvm_assigned_irq (in) +Returns: 0 on success, -1 on error + +Ends an IRQ assignment to a passed-through device. + +See KVM_ASSIGN_DEV_IRQ for the data structure. The target device is specified +by assigned_dev_id, flags must correspond to the IRQ type specified on +KVM_ASSIGN_DEV_IRQ. Partial deassignment of host or guest IRQ is allowed. + +4.51 KVM_SET_GSI_ROUTING + +Capability: KVM_CAP_IRQ_ROUTING +Architectures: x86 ia64 +Type: vm ioctl +Parameters: struct kvm_irq_routing (in) +Returns: 0 on success, -1 on error + +Sets the GSI routing table entries, overwriting any previously set entries. + +struct kvm_irq_routing { + __u32 nr; + __u32 flags; + struct kvm_irq_routing_entry entries[0]; +}; + +No flags are specified so far, the corresponding field must be set to zero. + +struct kvm_irq_routing_entry { + __u32 gsi; + __u32 type; + __u32 flags; + __u32 pad; + union { + struct kvm_irq_routing_irqchip irqchip; + struct kvm_irq_routing_msi msi; + __u32 pad[8]; + } u; +}; + +/* gsi routing entry types */ +#define KVM_IRQ_ROUTING_IRQCHIP 1 +#define KVM_IRQ_ROUTING_MSI 2 + +No flags are specified so far, the corresponding field must be set to zero. + +struct kvm_irq_routing_irqchip { + __u32 irqchip; + __u32 pin; +}; + +struct kvm_irq_routing_msi { + __u32 address_lo; + __u32 address_hi; + __u32 data; + __u32 pad; +}; + +4.52 KVM_ASSIGN_SET_MSIX_NR + +Capability: KVM_CAP_DEVICE_MSIX +Architectures: x86 ia64 +Type: vm ioctl +Parameters: struct kvm_assigned_msix_nr (in) +Returns: 0 on success, -1 on error + +Set the number of MSI-X interrupts for an assigned device. This service can +only be called once in the lifetime of an assigned device. + +struct kvm_assigned_msix_nr { + __u32 assigned_dev_id; + __u16 entry_nr; + __u16 padding; +}; + +#define KVM_MAX_MSIX_PER_DEV 256 + +4.53 KVM_ASSIGN_SET_MSIX_ENTRY + +Capability: KVM_CAP_DEVICE_MSIX +Architectures: x86 ia64 +Type: vm ioctl +Parameters: struct kvm_assigned_msix_entry (in) +Returns: 0 on success, -1 on error + +Specifies the routing of an MSI-X assigned device interrupt to a GSI. Setting +the GSI vector to zero means disabling the interrupt. + +struct kvm_assigned_msix_entry { + __u32 assigned_dev_id; + __u32 gsi; + __u16 entry; /* The index of entry in the MSI-X table */ + __u16 padding[3]; +}; + 5. The kvm_run structure Application code obtains a pointer to the kvm_run structure by -- cgit v1.2.3-70-g09d2