summaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
Diffstat (limited to 'include')
-rw-r--r--include/kvm/arm_vgic.h112
-rw-r--r--include/linux/kvm_host.h31
-rw-r--r--include/linux/kvm_types.h14
-rw-r--r--include/linux/mm.h1
-rw-r--r--include/linux/mmu_notifier.h24
-rw-r--r--include/trace/events/kvm.h36
-rw-r--r--include/uapi/linux/kvm.h28
7 files changed, 180 insertions, 66 deletions
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 35b0c121bb6..2f2aac8448a 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -25,26 +25,25 @@
#include <linux/spinlock.h>
#include <linux/types.h>
-#define VGIC_NR_IRQS 256
+#define VGIC_NR_IRQS_LEGACY 256
#define VGIC_NR_SGIS 16
#define VGIC_NR_PPIS 16
#define VGIC_NR_PRIVATE_IRQS (VGIC_NR_SGIS + VGIC_NR_PPIS)
-#define VGIC_NR_SHARED_IRQS (VGIC_NR_IRQS - VGIC_NR_PRIVATE_IRQS)
-#define VGIC_MAX_CPUS KVM_MAX_VCPUS
#define VGIC_V2_MAX_LRS (1 << 6)
#define VGIC_V3_MAX_LRS 16
+#define VGIC_MAX_IRQS 1024
/* Sanity checks... */
-#if (VGIC_MAX_CPUS > 8)
+#if (KVM_MAX_VCPUS > 8)
#error Invalid number of CPU interfaces
#endif
-#if (VGIC_NR_IRQS & 31)
+#if (VGIC_NR_IRQS_LEGACY & 31)
#error "VGIC_NR_IRQS must be a multiple of 32"
#endif
-#if (VGIC_NR_IRQS > 1024)
+#if (VGIC_NR_IRQS_LEGACY > VGIC_MAX_IRQS)
#error "VGIC_NR_IRQS must be <= 1024"
#endif
@@ -54,19 +53,33 @@
* - a bunch of shared interrupts (SPI)
*/
struct vgic_bitmap {
- union {
- u32 reg[VGIC_NR_PRIVATE_IRQS / 32];
- DECLARE_BITMAP(reg_ul, VGIC_NR_PRIVATE_IRQS);
- } percpu[VGIC_MAX_CPUS];
- union {
- u32 reg[VGIC_NR_SHARED_IRQS / 32];
- DECLARE_BITMAP(reg_ul, VGIC_NR_SHARED_IRQS);
- } shared;
+ /*
+ * - One UL per VCPU for private interrupts (assumes UL is at
+ * least 32 bits)
+ * - As many UL as necessary for shared interrupts.
+ *
+ * The private interrupts are accessed via the "private"
+ * field, one UL per vcpu (the state for vcpu n is in
+ * private[n]). The shared interrupts are accessed via the
+ * "shared" pointer (IRQn state is at bit n-32 in the bitmap).
+ */
+ unsigned long *private;
+ unsigned long *shared;
};
struct vgic_bytemap {
- u32 percpu[VGIC_MAX_CPUS][VGIC_NR_PRIVATE_IRQS / 4];
- u32 shared[VGIC_NR_SHARED_IRQS / 4];
+ /*
+ * - 8 u32 per VCPU for private interrupts
+ * - As many u32 as necessary for shared interrupts.
+ *
+ * The private interrupts are accessed via the "private"
+ * field, (the state for vcpu n is in private[n*8] to
+ * private[n*8 + 7]). The shared interrupts are accessed via
+ * the "shared" pointer (IRQn state is at byte (n-32)%4 of the
+ * shared[(n-32)/4] word).
+ */
+ u32 *private;
+ u32 *shared;
};
struct kvm_vcpu;
@@ -127,6 +140,9 @@ struct vgic_dist {
bool in_kernel;
bool ready;
+ int nr_cpus;
+ int nr_irqs;
+
/* Virtual control interface mapping */
void __iomem *vctrl_base;
@@ -140,11 +156,25 @@ struct vgic_dist {
/* Interrupt enabled (one bit per IRQ) */
struct vgic_bitmap irq_enabled;
- /* Interrupt 'pin' level */
- struct vgic_bitmap irq_state;
+ /* Level-triggered interrupt external input is asserted */
+ struct vgic_bitmap irq_level;
- /* Level-triggered interrupt in progress */
- struct vgic_bitmap irq_active;
+ /*
+ * Interrupt state is pending on the distributor
+ */
+ struct vgic_bitmap irq_pending;
+
+ /*
+ * Tracks writes to GICD_ISPENDRn and GICD_ICPENDRn for level-triggered
+ * interrupts. Essentially holds the state of the flip-flop in
+ * Figure 4-10 on page 4-101 in ARM IHI 0048B.b.
+ * Once set, it is only cleared for level-triggered interrupts on
+ * guest ACKs (when we queue it) or writes to GICD_ICPENDRn.
+ */
+ struct vgic_bitmap irq_soft_pend;
+
+ /* Level-triggered interrupt queued on VCPU interface */
+ struct vgic_bitmap irq_queued;
/* Interrupt priority. Not used yet. */
struct vgic_bytemap irq_priority;
@@ -152,15 +182,36 @@ struct vgic_dist {
/* Level/edge triggered */
struct vgic_bitmap irq_cfg;
- /* Source CPU per SGI and target CPU */
- u8 irq_sgi_sources[VGIC_MAX_CPUS][VGIC_NR_SGIS];
-
- /* Target CPU for each IRQ */
- u8 irq_spi_cpu[VGIC_NR_SHARED_IRQS];
- struct vgic_bitmap irq_spi_target[VGIC_MAX_CPUS];
+ /*
+ * Source CPU per SGI and target CPU:
+ *
+ * Each byte represent a SGI observable on a VCPU, each bit of
+ * this byte indicating if the corresponding VCPU has
+ * generated this interrupt. This is a GICv2 feature only.
+ *
+ * For VCPUn (n < 8), irq_sgi_sources[n*16] to [n*16 + 15] are
+ * the SGIs observable on VCPUn.
+ */
+ u8 *irq_sgi_sources;
+
+ /*
+ * Target CPU for each SPI:
+ *
+ * Array of available SPI, each byte indicating the target
+ * VCPU for SPI. IRQn (n >=32) is at irq_spi_cpu[n-32].
+ */
+ u8 *irq_spi_cpu;
+
+ /*
+ * Reverse lookup of irq_spi_cpu for faster compute pending:
+ *
+ * Array of bitmaps, one per VCPU, describing if IRQn is
+ * routed to a particular VCPU.
+ */
+ struct vgic_bitmap *irq_spi_target;
/* Bitmap indicating which CPU has something pending */
- unsigned long irq_pending_on_cpu;
+ unsigned long *irq_pending_on_cpu;
#endif
};
@@ -190,11 +241,11 @@ struct vgic_v3_cpu_if {
struct vgic_cpu {
#ifdef CONFIG_KVM_ARM_VGIC
/* per IRQ to LR mapping */
- u8 vgic_irq_lr_map[VGIC_NR_IRQS];
+ u8 *vgic_irq_lr_map;
/* Pending interrupts on this VCPU */
DECLARE_BITMAP( pending_percpu, VGIC_NR_PRIVATE_IRQS);
- DECLARE_BITMAP( pending_shared, VGIC_NR_SHARED_IRQS);
+ unsigned long *pending_shared;
/* Bitmap of used/free list registers */
DECLARE_BITMAP( lr_used, VGIC_V2_MAX_LRS);
@@ -225,7 +276,8 @@ int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write);
int kvm_vgic_hyp_init(void);
int kvm_vgic_init(struct kvm *kvm);
int kvm_vgic_create(struct kvm *kvm);
-int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu);
+void kvm_vgic_destroy(struct kvm *kvm);
+void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu);
void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu);
void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu);
int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num,
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index a4c33b34fe3..28be31f4925 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -136,12 +136,11 @@ static inline bool is_error_page(struct page *page)
#define KVM_REQ_GLOBAL_CLOCK_UPDATE 22
#define KVM_REQ_ENABLE_IBS 23
#define KVM_REQ_DISABLE_IBS 24
+#define KVM_REQ_APIC_PAGE_RELOAD 25
#define KVM_USERSPACE_IRQ_SOURCE_ID 0
#define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1
-struct kvm;
-struct kvm_vcpu;
extern struct kmem_cache *kvm_vcpu_cache;
extern spinlock_t kvm_lock;
@@ -200,6 +199,17 @@ int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, unsigned long hva,
int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu);
#endif
+/*
+ * Carry out a gup that requires IO. Allow the mm to relinquish the mmap
+ * semaphore if the filemap/swap has to wait on a page lock. pagep == NULL
+ * controls whether we retry the gup one more time to completion in that case.
+ * Typically this is called after a FAULT_FLAG_RETRY_NOWAIT in the main tdp
+ * handler.
+ */
+int kvm_get_user_page_io(struct task_struct *tsk, struct mm_struct *mm,
+ unsigned long addr, bool write_fault,
+ struct page **pagep);
+
enum {
OUTSIDE_GUEST_MODE,
IN_GUEST_MODE,
@@ -325,8 +335,6 @@ struct kvm_kernel_irq_routing_entry {
struct hlist_node link;
};
-struct kvm_irq_routing_table;
-
#ifndef KVM_PRIVATE_MEM_SLOTS
#define KVM_PRIVATE_MEM_SLOTS 0
#endif
@@ -528,6 +536,8 @@ struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn);
unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn);
unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable);
unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn);
+unsigned long gfn_to_hva_memslot_prot(struct kvm_memory_slot *slot, gfn_t gfn,
+ bool *writable);
void kvm_release_page_clean(struct page *page);
void kvm_release_page_dirty(struct page *page);
void kvm_set_page_accessed(struct page *page);
@@ -579,6 +589,7 @@ void kvm_flush_remote_tlbs(struct kvm *kvm);
void kvm_reload_remote_mmus(struct kvm *kvm);
void kvm_make_mclock_inprogress_request(struct kvm *kvm);
void kvm_make_scan_ioapic_request(struct kvm *kvm);
+bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req);
long kvm_arch_dev_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg);
@@ -624,6 +635,8 @@ void kvm_arch_exit(void);
int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu);
void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu);
+void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu);
+
void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu);
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu);
@@ -632,8 +645,8 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu);
int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu);
void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu);
-int kvm_arch_hardware_enable(void *garbage);
-void kvm_arch_hardware_disable(void *garbage);
+int kvm_arch_hardware_enable(void);
+void kvm_arch_hardware_disable(void);
int kvm_arch_hardware_setup(void);
void kvm_arch_hardware_unsetup(void);
void kvm_arch_check_processor_compat(void *rtn);
@@ -1034,8 +1047,6 @@ static inline bool kvm_check_request(int req, struct kvm_vcpu *vcpu)
extern bool kvm_rebooting;
-struct kvm_device_ops;
-
struct kvm_device {
struct kvm_device_ops *ops;
struct kvm *kvm;
@@ -1068,12 +1079,10 @@ struct kvm_device_ops {
void kvm_device_get(struct kvm_device *dev);
void kvm_device_put(struct kvm_device *dev);
struct kvm_device *kvm_device_from_filp(struct file *filp);
+int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type);
extern struct kvm_device_ops kvm_mpic_ops;
extern struct kvm_device_ops kvm_xics_ops;
-extern struct kvm_device_ops kvm_vfio_ops;
-extern struct kvm_device_ops kvm_arm_vgic_v2_ops;
-extern struct kvm_device_ops kvm_flic_ops;
#ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT
diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h
index b0bcce0ddc9..b606bb689a3 100644
--- a/include/linux/kvm_types.h
+++ b/include/linux/kvm_types.h
@@ -17,6 +17,20 @@
#ifndef __KVM_TYPES_H__
#define __KVM_TYPES_H__
+struct kvm;
+struct kvm_async_pf;
+struct kvm_device_ops;
+struct kvm_interrupt;
+struct kvm_irq_routing_table;
+struct kvm_memory_slot;
+struct kvm_one_reg;
+struct kvm_run;
+struct kvm_userspace_memory_region;
+struct kvm_vcpu;
+struct kvm_vcpu_init;
+
+enum kvm_mr_change;
+
#include <asm/types.h>
/*
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 8981cc882ed..0f4196a0bc2 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1985,6 +1985,7 @@ static inline struct page *follow_page(struct vm_area_struct *vma,
#define FOLL_HWPOISON 0x100 /* check page is hwpoisoned */
#define FOLL_NUMA 0x200 /* force NUMA hinting page fault */
#define FOLL_MIGRATION 0x400 /* wait for page to replace migration entry */
+#define FOLL_TRIED 0x800 /* a retry, previous pass started an IO */
typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr,
void *data);
diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
index 27288692241..88787bb4b3b 100644
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -57,10 +57,13 @@ struct mmu_notifier_ops {
* pte. This way the VM will provide proper aging to the
* accesses to the page through the secondary MMUs and not
* only to the ones through the Linux pte.
+ * Start-end is necessary in case the secondary MMU is mapping the page
+ * at a smaller granularity than the primary MMU.
*/
int (*clear_flush_young)(struct mmu_notifier *mn,
struct mm_struct *mm,
- unsigned long address);
+ unsigned long start,
+ unsigned long end);
/*
* test_young is called to check the young/accessed bitflag in
@@ -175,7 +178,8 @@ extern void mmu_notifier_unregister_no_release(struct mmu_notifier *mn,
extern void __mmu_notifier_mm_destroy(struct mm_struct *mm);
extern void __mmu_notifier_release(struct mm_struct *mm);
extern int __mmu_notifier_clear_flush_young(struct mm_struct *mm,
- unsigned long address);
+ unsigned long start,
+ unsigned long end);
extern int __mmu_notifier_test_young(struct mm_struct *mm,
unsigned long address);
extern void __mmu_notifier_change_pte(struct mm_struct *mm,
@@ -194,10 +198,11 @@ static inline void mmu_notifier_release(struct mm_struct *mm)
}
static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm,
- unsigned long address)
+ unsigned long start,
+ unsigned long end)
{
if (mm_has_notifiers(mm))
- return __mmu_notifier_clear_flush_young(mm, address);
+ return __mmu_notifier_clear_flush_young(mm, start, end);
return 0;
}
@@ -255,7 +260,9 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
unsigned long ___address = __address; \
__young = ptep_clear_flush_young(___vma, ___address, __ptep); \
__young |= mmu_notifier_clear_flush_young(___vma->vm_mm, \
- ___address); \
+ ___address, \
+ ___address + \
+ PAGE_SIZE); \
__young; \
})
@@ -266,7 +273,9 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
unsigned long ___address = __address; \
__young = pmdp_clear_flush_young(___vma, ___address, __pmdp); \
__young |= mmu_notifier_clear_flush_young(___vma->vm_mm, \
- ___address); \
+ ___address, \
+ ___address + \
+ PMD_SIZE); \
__young; \
})
@@ -301,7 +310,8 @@ static inline void mmu_notifier_release(struct mm_struct *mm)
}
static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm,
- unsigned long address)
+ unsigned long start,
+ unsigned long end)
{
return 0;
}
diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h
index 908925ace77..6edf1f2028c 100644
--- a/include/trace/events/kvm.h
+++ b/include/trace/events/kvm.h
@@ -95,6 +95,26 @@ TRACE_EVENT(kvm_ioapic_set_irq,
__entry->coalesced ? " (coalesced)" : "")
);
+TRACE_EVENT(kvm_ioapic_delayed_eoi_inj,
+ TP_PROTO(__u64 e),
+ TP_ARGS(e),
+
+ TP_STRUCT__entry(
+ __field( __u64, e )
+ ),
+
+ TP_fast_assign(
+ __entry->e = e;
+ ),
+
+ TP_printk("dst %x vec=%u (%s|%s|%s%s)",
+ (u8)(__entry->e >> 56), (u8)__entry->e,
+ __print_symbolic((__entry->e >> 8 & 0x7), kvm_deliver_mode),
+ (__entry->e & (1<<11)) ? "logical" : "physical",
+ (__entry->e & (1<<15)) ? "level" : "edge",
+ (__entry->e & (1<<16)) ? "|masked" : "")
+);
+
TRACE_EVENT(kvm_msi_set_irq,
TP_PROTO(__u64 address, __u64 data),
TP_ARGS(address, data),
@@ -205,24 +225,26 @@ TRACE_EVENT(kvm_fpu,
);
TRACE_EVENT(kvm_age_page,
- TP_PROTO(ulong hva, struct kvm_memory_slot *slot, int ref),
- TP_ARGS(hva, slot, ref),
+ TP_PROTO(ulong gfn, int level, struct kvm_memory_slot *slot, int ref),
+ TP_ARGS(gfn, level, slot, ref),
TP_STRUCT__entry(
__field( u64, hva )
__field( u64, gfn )
+ __field( u8, level )
__field( u8, referenced )
),
TP_fast_assign(
- __entry->hva = hva;
- __entry->gfn =
- slot->base_gfn + ((hva - slot->userspace_addr) >> PAGE_SHIFT);
+ __entry->gfn = gfn;
+ __entry->level = level;
+ __entry->hva = ((gfn - slot->base_gfn) <<
+ PAGE_SHIFT) + slot->userspace_addr;
__entry->referenced = ref;
),
- TP_printk("hva %llx gfn %llx %s",
- __entry->hva, __entry->gfn,
+ TP_printk("hva %llx gfn %llx level %u %s",
+ __entry->hva, __entry->gfn, __entry->level,
__entry->referenced ? "YOUNG" : "OLD")
);
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index cf3a2ff440e..60768822b14 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -654,9 +654,7 @@ struct kvm_ppc_smmu_info {
#endif
/* Bug in KVM_SET_USER_MEMORY_REGION fixed: */
#define KVM_CAP_DESTROY_MEMORY_REGION_WORKS 21
-#ifdef __KVM_HAVE_USER_NMI
#define KVM_CAP_USER_NMI 22
-#endif
#ifdef __KVM_HAVE_GUEST_DEBUG
#define KVM_CAP_SET_GUEST_DEBUG 23
#endif
@@ -738,9 +736,7 @@ struct kvm_ppc_smmu_info {
#define KVM_CAP_PPC_GET_SMMU_INFO 78
#define KVM_CAP_S390_COW 79
#define KVM_CAP_PPC_ALLOC_HTAB 80
-#ifdef __KVM_HAVE_READONLY_MEM
#define KVM_CAP_READONLY_MEM 81
-#endif
#define KVM_CAP_IRQFD_RESAMPLE 82
#define KVM_CAP_PPC_BOOKE_WATCHDOG 83
#define KVM_CAP_PPC_HTAB_FD 84
@@ -947,15 +943,25 @@ struct kvm_device_attr {
__u64 addr; /* userspace address of attr data */
};
-#define KVM_DEV_TYPE_FSL_MPIC_20 1
-#define KVM_DEV_TYPE_FSL_MPIC_42 2
-#define KVM_DEV_TYPE_XICS 3
-#define KVM_DEV_TYPE_VFIO 4
#define KVM_DEV_VFIO_GROUP 1
#define KVM_DEV_VFIO_GROUP_ADD 1
#define KVM_DEV_VFIO_GROUP_DEL 2
-#define KVM_DEV_TYPE_ARM_VGIC_V2 5
-#define KVM_DEV_TYPE_FLIC 6
+
+enum kvm_device_type {
+ KVM_DEV_TYPE_FSL_MPIC_20 = 1,
+#define KVM_DEV_TYPE_FSL_MPIC_20 KVM_DEV_TYPE_FSL_MPIC_20
+ KVM_DEV_TYPE_FSL_MPIC_42,
+#define KVM_DEV_TYPE_FSL_MPIC_42 KVM_DEV_TYPE_FSL_MPIC_42
+ KVM_DEV_TYPE_XICS,
+#define KVM_DEV_TYPE_XICS KVM_DEV_TYPE_XICS
+ KVM_DEV_TYPE_VFIO,
+#define KVM_DEV_TYPE_VFIO KVM_DEV_TYPE_VFIO
+ KVM_DEV_TYPE_ARM_VGIC_V2,
+#define KVM_DEV_TYPE_ARM_VGIC_V2 KVM_DEV_TYPE_ARM_VGIC_V2
+ KVM_DEV_TYPE_FLIC,
+#define KVM_DEV_TYPE_FLIC KVM_DEV_TYPE_FLIC
+ KVM_DEV_TYPE_MAX,
+};
/*
* ioctls for VM fds
@@ -1093,7 +1099,7 @@ struct kvm_s390_ucas_mapping {
#define KVM_S390_INITIAL_RESET _IO(KVMIO, 0x97)
#define KVM_GET_MP_STATE _IOR(KVMIO, 0x98, struct kvm_mp_state)
#define KVM_SET_MP_STATE _IOW(KVMIO, 0x99, struct kvm_mp_state)
-/* Available with KVM_CAP_NMI */
+/* Available with KVM_CAP_USER_NMI */
#define KVM_NMI _IO(KVMIO, 0x9a)
/* Available with KVM_CAP_SET_GUEST_DEBUG */
#define KVM_SET_GUEST_DEBUG _IOW(KVMIO, 0x9b, struct kvm_guest_debug)