diff options
Diffstat (limited to 'include')
-rw-r--r-- | include/kvm/arm_vgic.h | 112 | ||||
-rw-r--r-- | include/linux/kvm_host.h | 31 | ||||
-rw-r--r-- | include/linux/kvm_types.h | 14 | ||||
-rw-r--r-- | include/linux/mm.h | 1 | ||||
-rw-r--r-- | include/linux/mmu_notifier.h | 24 | ||||
-rw-r--r-- | include/trace/events/kvm.h | 36 | ||||
-rw-r--r-- | include/uapi/linux/kvm.h | 28 |
7 files changed, 180 insertions, 66 deletions
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 35b0c121bb6..2f2aac8448a 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -25,26 +25,25 @@ #include <linux/spinlock.h> #include <linux/types.h> -#define VGIC_NR_IRQS 256 +#define VGIC_NR_IRQS_LEGACY 256 #define VGIC_NR_SGIS 16 #define VGIC_NR_PPIS 16 #define VGIC_NR_PRIVATE_IRQS (VGIC_NR_SGIS + VGIC_NR_PPIS) -#define VGIC_NR_SHARED_IRQS (VGIC_NR_IRQS - VGIC_NR_PRIVATE_IRQS) -#define VGIC_MAX_CPUS KVM_MAX_VCPUS #define VGIC_V2_MAX_LRS (1 << 6) #define VGIC_V3_MAX_LRS 16 +#define VGIC_MAX_IRQS 1024 /* Sanity checks... */ -#if (VGIC_MAX_CPUS > 8) +#if (KVM_MAX_VCPUS > 8) #error Invalid number of CPU interfaces #endif -#if (VGIC_NR_IRQS & 31) +#if (VGIC_NR_IRQS_LEGACY & 31) #error "VGIC_NR_IRQS must be a multiple of 32" #endif -#if (VGIC_NR_IRQS > 1024) +#if (VGIC_NR_IRQS_LEGACY > VGIC_MAX_IRQS) #error "VGIC_NR_IRQS must be <= 1024" #endif @@ -54,19 +53,33 @@ * - a bunch of shared interrupts (SPI) */ struct vgic_bitmap { - union { - u32 reg[VGIC_NR_PRIVATE_IRQS / 32]; - DECLARE_BITMAP(reg_ul, VGIC_NR_PRIVATE_IRQS); - } percpu[VGIC_MAX_CPUS]; - union { - u32 reg[VGIC_NR_SHARED_IRQS / 32]; - DECLARE_BITMAP(reg_ul, VGIC_NR_SHARED_IRQS); - } shared; + /* + * - One UL per VCPU for private interrupts (assumes UL is at + * least 32 bits) + * - As many UL as necessary for shared interrupts. + * + * The private interrupts are accessed via the "private" + * field, one UL per vcpu (the state for vcpu n is in + * private[n]). The shared interrupts are accessed via the + * "shared" pointer (IRQn state is at bit n-32 in the bitmap). + */ + unsigned long *private; + unsigned long *shared; }; struct vgic_bytemap { - u32 percpu[VGIC_MAX_CPUS][VGIC_NR_PRIVATE_IRQS / 4]; - u32 shared[VGIC_NR_SHARED_IRQS / 4]; + /* + * - 8 u32 per VCPU for private interrupts + * - As many u32 as necessary for shared interrupts. + * + * The private interrupts are accessed via the "private" + * field, (the state for vcpu n is in private[n*8] to + * private[n*8 + 7]). The shared interrupts are accessed via + * the "shared" pointer (IRQn state is at byte (n-32)%4 of the + * shared[(n-32)/4] word). + */ + u32 *private; + u32 *shared; }; struct kvm_vcpu; @@ -127,6 +140,9 @@ struct vgic_dist { bool in_kernel; bool ready; + int nr_cpus; + int nr_irqs; + /* Virtual control interface mapping */ void __iomem *vctrl_base; @@ -140,11 +156,25 @@ struct vgic_dist { /* Interrupt enabled (one bit per IRQ) */ struct vgic_bitmap irq_enabled; - /* Interrupt 'pin' level */ - struct vgic_bitmap irq_state; + /* Level-triggered interrupt external input is asserted */ + struct vgic_bitmap irq_level; - /* Level-triggered interrupt in progress */ - struct vgic_bitmap irq_active; + /* + * Interrupt state is pending on the distributor + */ + struct vgic_bitmap irq_pending; + + /* + * Tracks writes to GICD_ISPENDRn and GICD_ICPENDRn for level-triggered + * interrupts. Essentially holds the state of the flip-flop in + * Figure 4-10 on page 4-101 in ARM IHI 0048B.b. + * Once set, it is only cleared for level-triggered interrupts on + * guest ACKs (when we queue it) or writes to GICD_ICPENDRn. + */ + struct vgic_bitmap irq_soft_pend; + + /* Level-triggered interrupt queued on VCPU interface */ + struct vgic_bitmap irq_queued; /* Interrupt priority. Not used yet. */ struct vgic_bytemap irq_priority; @@ -152,15 +182,36 @@ struct vgic_dist { /* Level/edge triggered */ struct vgic_bitmap irq_cfg; - /* Source CPU per SGI and target CPU */ - u8 irq_sgi_sources[VGIC_MAX_CPUS][VGIC_NR_SGIS]; - - /* Target CPU for each IRQ */ - u8 irq_spi_cpu[VGIC_NR_SHARED_IRQS]; - struct vgic_bitmap irq_spi_target[VGIC_MAX_CPUS]; + /* + * Source CPU per SGI and target CPU: + * + * Each byte represent a SGI observable on a VCPU, each bit of + * this byte indicating if the corresponding VCPU has + * generated this interrupt. This is a GICv2 feature only. + * + * For VCPUn (n < 8), irq_sgi_sources[n*16] to [n*16 + 15] are + * the SGIs observable on VCPUn. + */ + u8 *irq_sgi_sources; + + /* + * Target CPU for each SPI: + * + * Array of available SPI, each byte indicating the target + * VCPU for SPI. IRQn (n >=32) is at irq_spi_cpu[n-32]. + */ + u8 *irq_spi_cpu; + + /* + * Reverse lookup of irq_spi_cpu for faster compute pending: + * + * Array of bitmaps, one per VCPU, describing if IRQn is + * routed to a particular VCPU. + */ + struct vgic_bitmap *irq_spi_target; /* Bitmap indicating which CPU has something pending */ - unsigned long irq_pending_on_cpu; + unsigned long *irq_pending_on_cpu; #endif }; @@ -190,11 +241,11 @@ struct vgic_v3_cpu_if { struct vgic_cpu { #ifdef CONFIG_KVM_ARM_VGIC /* per IRQ to LR mapping */ - u8 vgic_irq_lr_map[VGIC_NR_IRQS]; + u8 *vgic_irq_lr_map; /* Pending interrupts on this VCPU */ DECLARE_BITMAP( pending_percpu, VGIC_NR_PRIVATE_IRQS); - DECLARE_BITMAP( pending_shared, VGIC_NR_SHARED_IRQS); + unsigned long *pending_shared; /* Bitmap of used/free list registers */ DECLARE_BITMAP( lr_used, VGIC_V2_MAX_LRS); @@ -225,7 +276,8 @@ int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write); int kvm_vgic_hyp_init(void); int kvm_vgic_init(struct kvm *kvm); int kvm_vgic_create(struct kvm *kvm); -int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu); +void kvm_vgic_destroy(struct kvm *kvm); +void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu); void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu); void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu); int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num, diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index a4c33b34fe3..28be31f4925 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -136,12 +136,11 @@ static inline bool is_error_page(struct page *page) #define KVM_REQ_GLOBAL_CLOCK_UPDATE 22 #define KVM_REQ_ENABLE_IBS 23 #define KVM_REQ_DISABLE_IBS 24 +#define KVM_REQ_APIC_PAGE_RELOAD 25 #define KVM_USERSPACE_IRQ_SOURCE_ID 0 #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1 -struct kvm; -struct kvm_vcpu; extern struct kmem_cache *kvm_vcpu_cache; extern spinlock_t kvm_lock; @@ -200,6 +199,17 @@ int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, unsigned long hva, int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu); #endif +/* + * Carry out a gup that requires IO. Allow the mm to relinquish the mmap + * semaphore if the filemap/swap has to wait on a page lock. pagep == NULL + * controls whether we retry the gup one more time to completion in that case. + * Typically this is called after a FAULT_FLAG_RETRY_NOWAIT in the main tdp + * handler. + */ +int kvm_get_user_page_io(struct task_struct *tsk, struct mm_struct *mm, + unsigned long addr, bool write_fault, + struct page **pagep); + enum { OUTSIDE_GUEST_MODE, IN_GUEST_MODE, @@ -325,8 +335,6 @@ struct kvm_kernel_irq_routing_entry { struct hlist_node link; }; -struct kvm_irq_routing_table; - #ifndef KVM_PRIVATE_MEM_SLOTS #define KVM_PRIVATE_MEM_SLOTS 0 #endif @@ -528,6 +536,8 @@ struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn); unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn); unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable); unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn); +unsigned long gfn_to_hva_memslot_prot(struct kvm_memory_slot *slot, gfn_t gfn, + bool *writable); void kvm_release_page_clean(struct page *page); void kvm_release_page_dirty(struct page *page); void kvm_set_page_accessed(struct page *page); @@ -579,6 +589,7 @@ void kvm_flush_remote_tlbs(struct kvm *kvm); void kvm_reload_remote_mmus(struct kvm *kvm); void kvm_make_mclock_inprogress_request(struct kvm *kvm); void kvm_make_scan_ioapic_request(struct kvm *kvm); +bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req); long kvm_arch_dev_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg); @@ -624,6 +635,8 @@ void kvm_arch_exit(void); int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu); +void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu); + void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu); void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu); @@ -632,8 +645,8 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu); int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu); -int kvm_arch_hardware_enable(void *garbage); -void kvm_arch_hardware_disable(void *garbage); +int kvm_arch_hardware_enable(void); +void kvm_arch_hardware_disable(void); int kvm_arch_hardware_setup(void); void kvm_arch_hardware_unsetup(void); void kvm_arch_check_processor_compat(void *rtn); @@ -1034,8 +1047,6 @@ static inline bool kvm_check_request(int req, struct kvm_vcpu *vcpu) extern bool kvm_rebooting; -struct kvm_device_ops; - struct kvm_device { struct kvm_device_ops *ops; struct kvm *kvm; @@ -1068,12 +1079,10 @@ struct kvm_device_ops { void kvm_device_get(struct kvm_device *dev); void kvm_device_put(struct kvm_device *dev); struct kvm_device *kvm_device_from_filp(struct file *filp); +int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type); extern struct kvm_device_ops kvm_mpic_ops; extern struct kvm_device_ops kvm_xics_ops; -extern struct kvm_device_ops kvm_vfio_ops; -extern struct kvm_device_ops kvm_arm_vgic_v2_ops; -extern struct kvm_device_ops kvm_flic_ops; #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h index b0bcce0ddc9..b606bb689a3 100644 --- a/include/linux/kvm_types.h +++ b/include/linux/kvm_types.h @@ -17,6 +17,20 @@ #ifndef __KVM_TYPES_H__ #define __KVM_TYPES_H__ +struct kvm; +struct kvm_async_pf; +struct kvm_device_ops; +struct kvm_interrupt; +struct kvm_irq_routing_table; +struct kvm_memory_slot; +struct kvm_one_reg; +struct kvm_run; +struct kvm_userspace_memory_region; +struct kvm_vcpu; +struct kvm_vcpu_init; + +enum kvm_mr_change; + #include <asm/types.h> /* diff --git a/include/linux/mm.h b/include/linux/mm.h index 8981cc882ed..0f4196a0bc2 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1985,6 +1985,7 @@ static inline struct page *follow_page(struct vm_area_struct *vma, #define FOLL_HWPOISON 0x100 /* check page is hwpoisoned */ #define FOLL_NUMA 0x200 /* force NUMA hinting page fault */ #define FOLL_MIGRATION 0x400 /* wait for page to replace migration entry */ +#define FOLL_TRIED 0x800 /* a retry, previous pass started an IO */ typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr, void *data); diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h index 27288692241..88787bb4b3b 100644 --- a/include/linux/mmu_notifier.h +++ b/include/linux/mmu_notifier.h @@ -57,10 +57,13 @@ struct mmu_notifier_ops { * pte. This way the VM will provide proper aging to the * accesses to the page through the secondary MMUs and not * only to the ones through the Linux pte. + * Start-end is necessary in case the secondary MMU is mapping the page + * at a smaller granularity than the primary MMU. */ int (*clear_flush_young)(struct mmu_notifier *mn, struct mm_struct *mm, - unsigned long address); + unsigned long start, + unsigned long end); /* * test_young is called to check the young/accessed bitflag in @@ -175,7 +178,8 @@ extern void mmu_notifier_unregister_no_release(struct mmu_notifier *mn, extern void __mmu_notifier_mm_destroy(struct mm_struct *mm); extern void __mmu_notifier_release(struct mm_struct *mm); extern int __mmu_notifier_clear_flush_young(struct mm_struct *mm, - unsigned long address); + unsigned long start, + unsigned long end); extern int __mmu_notifier_test_young(struct mm_struct *mm, unsigned long address); extern void __mmu_notifier_change_pte(struct mm_struct *mm, @@ -194,10 +198,11 @@ static inline void mmu_notifier_release(struct mm_struct *mm) } static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm, - unsigned long address) + unsigned long start, + unsigned long end) { if (mm_has_notifiers(mm)) - return __mmu_notifier_clear_flush_young(mm, address); + return __mmu_notifier_clear_flush_young(mm, start, end); return 0; } @@ -255,7 +260,9 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm) unsigned long ___address = __address; \ __young = ptep_clear_flush_young(___vma, ___address, __ptep); \ __young |= mmu_notifier_clear_flush_young(___vma->vm_mm, \ - ___address); \ + ___address, \ + ___address + \ + PAGE_SIZE); \ __young; \ }) @@ -266,7 +273,9 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm) unsigned long ___address = __address; \ __young = pmdp_clear_flush_young(___vma, ___address, __pmdp); \ __young |= mmu_notifier_clear_flush_young(___vma->vm_mm, \ - ___address); \ + ___address, \ + ___address + \ + PMD_SIZE); \ __young; \ }) @@ -301,7 +310,8 @@ static inline void mmu_notifier_release(struct mm_struct *mm) } static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm, - unsigned long address) + unsigned long start, + unsigned long end) { return 0; } diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h index 908925ace77..6edf1f2028c 100644 --- a/include/trace/events/kvm.h +++ b/include/trace/events/kvm.h @@ -95,6 +95,26 @@ TRACE_EVENT(kvm_ioapic_set_irq, __entry->coalesced ? " (coalesced)" : "") ); +TRACE_EVENT(kvm_ioapic_delayed_eoi_inj, + TP_PROTO(__u64 e), + TP_ARGS(e), + + TP_STRUCT__entry( + __field( __u64, e ) + ), + + TP_fast_assign( + __entry->e = e; + ), + + TP_printk("dst %x vec=%u (%s|%s|%s%s)", + (u8)(__entry->e >> 56), (u8)__entry->e, + __print_symbolic((__entry->e >> 8 & 0x7), kvm_deliver_mode), + (__entry->e & (1<<11)) ? "logical" : "physical", + (__entry->e & (1<<15)) ? "level" : "edge", + (__entry->e & (1<<16)) ? "|masked" : "") +); + TRACE_EVENT(kvm_msi_set_irq, TP_PROTO(__u64 address, __u64 data), TP_ARGS(address, data), @@ -205,24 +225,26 @@ TRACE_EVENT(kvm_fpu, ); TRACE_EVENT(kvm_age_page, - TP_PROTO(ulong hva, struct kvm_memory_slot *slot, int ref), - TP_ARGS(hva, slot, ref), + TP_PROTO(ulong gfn, int level, struct kvm_memory_slot *slot, int ref), + TP_ARGS(gfn, level, slot, ref), TP_STRUCT__entry( __field( u64, hva ) __field( u64, gfn ) + __field( u8, level ) __field( u8, referenced ) ), TP_fast_assign( - __entry->hva = hva; - __entry->gfn = - slot->base_gfn + ((hva - slot->userspace_addr) >> PAGE_SHIFT); + __entry->gfn = gfn; + __entry->level = level; + __entry->hva = ((gfn - slot->base_gfn) << + PAGE_SHIFT) + slot->userspace_addr; __entry->referenced = ref; ), - TP_printk("hva %llx gfn %llx %s", - __entry->hva, __entry->gfn, + TP_printk("hva %llx gfn %llx level %u %s", + __entry->hva, __entry->gfn, __entry->level, __entry->referenced ? "YOUNG" : "OLD") ); diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index cf3a2ff440e..60768822b14 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -654,9 +654,7 @@ struct kvm_ppc_smmu_info { #endif /* Bug in KVM_SET_USER_MEMORY_REGION fixed: */ #define KVM_CAP_DESTROY_MEMORY_REGION_WORKS 21 -#ifdef __KVM_HAVE_USER_NMI #define KVM_CAP_USER_NMI 22 -#endif #ifdef __KVM_HAVE_GUEST_DEBUG #define KVM_CAP_SET_GUEST_DEBUG 23 #endif @@ -738,9 +736,7 @@ struct kvm_ppc_smmu_info { #define KVM_CAP_PPC_GET_SMMU_INFO 78 #define KVM_CAP_S390_COW 79 #define KVM_CAP_PPC_ALLOC_HTAB 80 -#ifdef __KVM_HAVE_READONLY_MEM #define KVM_CAP_READONLY_MEM 81 -#endif #define KVM_CAP_IRQFD_RESAMPLE 82 #define KVM_CAP_PPC_BOOKE_WATCHDOG 83 #define KVM_CAP_PPC_HTAB_FD 84 @@ -947,15 +943,25 @@ struct kvm_device_attr { __u64 addr; /* userspace address of attr data */ }; -#define KVM_DEV_TYPE_FSL_MPIC_20 1 -#define KVM_DEV_TYPE_FSL_MPIC_42 2 -#define KVM_DEV_TYPE_XICS 3 -#define KVM_DEV_TYPE_VFIO 4 #define KVM_DEV_VFIO_GROUP 1 #define KVM_DEV_VFIO_GROUP_ADD 1 #define KVM_DEV_VFIO_GROUP_DEL 2 -#define KVM_DEV_TYPE_ARM_VGIC_V2 5 -#define KVM_DEV_TYPE_FLIC 6 + +enum kvm_device_type { + KVM_DEV_TYPE_FSL_MPIC_20 = 1, +#define KVM_DEV_TYPE_FSL_MPIC_20 KVM_DEV_TYPE_FSL_MPIC_20 + KVM_DEV_TYPE_FSL_MPIC_42, +#define KVM_DEV_TYPE_FSL_MPIC_42 KVM_DEV_TYPE_FSL_MPIC_42 + KVM_DEV_TYPE_XICS, +#define KVM_DEV_TYPE_XICS KVM_DEV_TYPE_XICS + KVM_DEV_TYPE_VFIO, +#define KVM_DEV_TYPE_VFIO KVM_DEV_TYPE_VFIO + KVM_DEV_TYPE_ARM_VGIC_V2, +#define KVM_DEV_TYPE_ARM_VGIC_V2 KVM_DEV_TYPE_ARM_VGIC_V2 + KVM_DEV_TYPE_FLIC, +#define KVM_DEV_TYPE_FLIC KVM_DEV_TYPE_FLIC + KVM_DEV_TYPE_MAX, +}; /* * ioctls for VM fds @@ -1093,7 +1099,7 @@ struct kvm_s390_ucas_mapping { #define KVM_S390_INITIAL_RESET _IO(KVMIO, 0x97) #define KVM_GET_MP_STATE _IOR(KVMIO, 0x98, struct kvm_mp_state) #define KVM_SET_MP_STATE _IOW(KVMIO, 0x99, struct kvm_mp_state) -/* Available with KVM_CAP_NMI */ +/* Available with KVM_CAP_USER_NMI */ #define KVM_NMI _IO(KVMIO, 0x9a) /* Available with KVM_CAP_SET_GUEST_DEBUG */ #define KVM_SET_GUEST_DEBUG _IOW(KVMIO, 0x9b, struct kvm_guest_debug) |