diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2014-04-02 14:50:10 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-04-02 14:50:10 -0700 |
commit | 7cbb39d4d4d530dff12f2ff06ed6c85c504ba91a (patch) | |
tree | 82f721591d739eca99817def86ca5b6ebd682fe6 /virt/kvm | |
parent | 64056a94256e7a476de67fbe581dfe5515c56288 (diff) | |
parent | 7227fc0666606b0df2c0d2966a7f4859b01bdf74 (diff) |
Merge tag 'kvm-3.15-1' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull kvm updates from Paolo Bonzini:
"PPC and ARM do not have much going on this time. Most of the cool
stuff, instead, is in s390 and (after a few releases) x86.
ARM has some caching fixes and PPC has transactional memory support in
guests. MIPS has some fixes, with more probably coming in 3.16 as
QEMU will soon get support for MIPS KVM.
For x86 there are optimizations for debug registers, which trigger on
some Windows games, and other important fixes for Windows guests. We
now expose to the guest Broadwell instruction set extensions and also
Intel MPX. There's also a fix/workaround for OS X guests, nested
virtualization features (preemption timer), and a couple kvmclock
refinements.
For s390, the main news is asynchronous page faults, together with
improvements to IRQs (floating irqs and adapter irqs) that speed up
virtio devices"
* tag 'kvm-3.15-1' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (96 commits)
KVM: PPC: Book3S HV: Save/restore host PMU registers that are new in POWER8
KVM: PPC: Book3S HV: Fix decrementer timeouts with non-zero TB offset
KVM: PPC: Book3S HV: Don't use kvm_memslots() in real mode
KVM: PPC: Book3S HV: Return ENODEV error rather than EIO
KVM: PPC: Book3S: Trim top 4 bits of physical address in RTAS code
KVM: PPC: Book3S HV: Add get/set_one_reg for new TM state
KVM: PPC: Book3S HV: Add transactional memory support
KVM: Specify byte order for KVM_EXIT_MMIO
KVM: vmx: fix MPX detection
KVM: PPC: Book3S HV: Fix KVM hang with CONFIG_KVM_XICS=n
KVM: PPC: Book3S: Introduce hypervisor call H_GET_TCE
KVM: PPC: Book3S HV: Fix incorrect userspace exit on ioeventfd write
KVM: s390: clear local interrupts at cpu initial reset
KVM: s390: Fix possible memory leak in SIGP functions
KVM: s390: fix calculation of idle_mask array size
KVM: s390: randomize sca address
KVM: ioapic: reinject pending interrupts on KVM_SET_IRQCHIP
KVM: Bump KVM_MAX_IRQ_ROUTES for s390
KVM: s390: irq routing for adapter interrupts.
KVM: s390: adapter interrupt sources
...
Diffstat (limited to 'virt/kvm')
-rw-r--r-- | virt/kvm/Kconfig | 4 | ||||
-rw-r--r-- | virt/kvm/async_pf.c | 27 | ||||
-rw-r--r-- | virt/kvm/eventfd.c | 8 | ||||
-rw-r--r-- | virt/kvm/ioapic.c | 108 | ||||
-rw-r--r-- | virt/kvm/kvm_main.c | 12 |
5 files changed, 108 insertions, 51 deletions
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig index fbe1a48bd62..13f2d19793e 100644 --- a/virt/kvm/Kconfig +++ b/virt/kvm/Kconfig @@ -22,6 +22,10 @@ config KVM_MMIO config KVM_ASYNC_PF bool +# Toggle to switch between direct notification and batch job +config KVM_ASYNC_PF_SYNC + bool + config HAVE_KVM_MSI bool diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c index 8631d9c1432..10df100c451 100644 --- a/virt/kvm/async_pf.c +++ b/virt/kvm/async_pf.c @@ -28,6 +28,21 @@ #include "async_pf.h" #include <trace/events/kvm.h> +static inline void kvm_async_page_present_sync(struct kvm_vcpu *vcpu, + struct kvm_async_pf *work) +{ +#ifdef CONFIG_KVM_ASYNC_PF_SYNC + kvm_arch_async_page_present(vcpu, work); +#endif +} +static inline void kvm_async_page_present_async(struct kvm_vcpu *vcpu, + struct kvm_async_pf *work) +{ +#ifndef CONFIG_KVM_ASYNC_PF_SYNC + kvm_arch_async_page_present(vcpu, work); +#endif +} + static struct kmem_cache *async_pf_cache; int kvm_async_pf_init(void) @@ -69,6 +84,7 @@ static void async_pf_execute(struct work_struct *work) down_read(&mm->mmap_sem); get_user_pages(current, mm, addr, 1, 1, 0, NULL, NULL); up_read(&mm->mmap_sem); + kvm_async_page_present_sync(vcpu, apf); unuse_mm(mm); spin_lock(&vcpu->async_pf.lock); @@ -97,11 +113,16 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu) list_entry(vcpu->async_pf.queue.next, typeof(*work), queue); list_del(&work->queue); + +#ifdef CONFIG_KVM_ASYNC_PF_SYNC + flush_work(&work->work); +#else if (cancel_work_sync(&work->work)) { mmdrop(work->mm); kvm_put_kvm(vcpu->kvm); /* == work->vcpu->kvm */ kmem_cache_free(async_pf_cache, work); } +#endif } spin_lock(&vcpu->async_pf.lock); @@ -130,7 +151,7 @@ void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu) spin_unlock(&vcpu->async_pf.lock); kvm_arch_async_page_ready(vcpu, work); - kvm_arch_async_page_present(vcpu, work); + kvm_async_page_present_async(vcpu, work); list_del(&work->queue); vcpu->async_pf.queued--; @@ -138,7 +159,7 @@ void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu) } } -int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn, +int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, unsigned long hva, struct kvm_arch_async_pf *arch) { struct kvm_async_pf *work; @@ -159,7 +180,7 @@ int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn, work->wakeup_all = false; work->vcpu = vcpu; work->gva = gva; - work->addr = gfn_to_hva(vcpu->kvm, gfn); + work->addr = hva; work->arch = *arch; work->mm = current->mm; atomic_inc(&work->mm->mm_count); diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index abe4d6043b3..29c2a04e036 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -391,19 +391,19 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) lockdep_is_held(&kvm->irqfds.lock)); irqfd_update(kvm, irqfd, irq_rt); - events = f.file->f_op->poll(f.file, &irqfd->pt); - list_add_tail(&irqfd->list, &kvm->irqfds.items); + spin_unlock_irq(&kvm->irqfds.lock); + /* * Check if there was an event already pending on the eventfd * before we registered, and trigger it as if we didn't miss it. */ + events = f.file->f_op->poll(f.file, &irqfd->pt); + if (events & POLLIN) schedule_work(&irqfd->inject); - spin_unlock_irq(&kvm->irqfds.lock); - /* * do not drop the file until the irqfd is fully initialized, otherwise * we might race against the POLLHUP diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c index ce9ed99ad7d..d4b601547f1 100644 --- a/virt/kvm/ioapic.c +++ b/virt/kvm/ioapic.c @@ -50,7 +50,7 @@ #else #define ioapic_debug(fmt, arg...) #endif -static int ioapic_deliver(struct kvm_ioapic *vioapic, int irq, +static int ioapic_service(struct kvm_ioapic *vioapic, int irq, bool line_status); static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic, @@ -163,23 +163,67 @@ static bool rtc_irq_check_coalesced(struct kvm_ioapic *ioapic) return false; } -static int ioapic_service(struct kvm_ioapic *ioapic, unsigned int idx, - bool line_status) +static int ioapic_set_irq(struct kvm_ioapic *ioapic, unsigned int irq, + int irq_level, bool line_status) { - union kvm_ioapic_redirect_entry *pent; - int injected = -1; + union kvm_ioapic_redirect_entry entry; + u32 mask = 1 << irq; + u32 old_irr; + int edge, ret; - pent = &ioapic->redirtbl[idx]; + entry = ioapic->redirtbl[irq]; + edge = (entry.fields.trig_mode == IOAPIC_EDGE_TRIG); - if (!pent->fields.mask) { - injected = ioapic_deliver(ioapic, idx, line_status); - if (injected && pent->fields.trig_mode == IOAPIC_LEVEL_TRIG) - pent->fields.remote_irr = 1; + if (!irq_level) { + ioapic->irr &= ~mask; + ret = 1; + goto out; + } + + /* + * Return 0 for coalesced interrupts; for edge-triggered interrupts, + * this only happens if a previous edge has not been delivered due + * do masking. For level interrupts, the remote_irr field tells + * us if the interrupt is waiting for an EOI. + * + * RTC is special: it is edge-triggered, but userspace likes to know + * if it has been already ack-ed via EOI because coalesced RTC + * interrupts lead to time drift in Windows guests. So we track + * EOI manually for the RTC interrupt. + */ + if (irq == RTC_GSI && line_status && + rtc_irq_check_coalesced(ioapic)) { + ret = 0; + goto out; } - return injected; + old_irr = ioapic->irr; + ioapic->irr |= mask; + if ((edge && old_irr == ioapic->irr) || + (!edge && entry.fields.remote_irr)) { + ret = 0; + goto out; + } + + ret = ioapic_service(ioapic, irq, line_status); + +out: + trace_kvm_ioapic_set_irq(entry.bits, irq, ret == 0); + return ret; +} + +static void kvm_ioapic_inject_all(struct kvm_ioapic *ioapic, unsigned long irr) +{ + u32 idx; + + rtc_irq_eoi_tracking_reset(ioapic); + for_each_set_bit(idx, &irr, IOAPIC_NUM_PINS) + ioapic_set_irq(ioapic, idx, 1, true); + + kvm_rtc_eoi_tracking_restore_all(ioapic); } + static void update_handled_vectors(struct kvm_ioapic *ioapic) { DECLARE_BITMAP(handled_vectors, 256); @@ -282,12 +326,15 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) } } -static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq, bool line_status) +static int ioapic_service(struct kvm_ioapic *ioapic, int irq, bool line_status) { union kvm_ioapic_redirect_entry *entry = &ioapic->redirtbl[irq]; struct kvm_lapic_irq irqe; int ret; + if (entry->fields.mask) + return -1; + ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x " "vector=%x trig_mode=%x\n", entry->fields.dest_id, entry->fields.dest_mode, @@ -302,6 +349,9 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq, bool line_status) irqe.level = 1; irqe.shorthand = 0; + if (irqe.trig_mode == IOAPIC_EDGE_TRIG) + ioapic->irr &= ~(1 << irq); + if (irq == RTC_GSI && line_status) { BUG_ON(ioapic->rtc_status.pending_eoi != 0); ret = kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe, @@ -310,45 +360,24 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq, bool line_status) } else ret = kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe, NULL); + if (ret && irqe.trig_mode == IOAPIC_LEVEL_TRIG) + entry->fields.remote_irr = 1; + return ret; } int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id, int level, bool line_status) { - u32 old_irr; - u32 mask = 1 << irq; - union kvm_ioapic_redirect_entry entry; int ret, irq_level; BUG_ON(irq < 0 || irq >= IOAPIC_NUM_PINS); spin_lock(&ioapic->lock); - old_irr = ioapic->irr; irq_level = __kvm_irq_line_state(&ioapic->irq_states[irq], irq_source_id, level); - entry = ioapic->redirtbl[irq]; - irq_level ^= entry.fields.polarity; - if (!irq_level) { - ioapic->irr &= ~mask; - ret = 1; - } else { - int edge = (entry.fields.trig_mode == IOAPIC_EDGE_TRIG); + ret = ioapic_set_irq(ioapic, irq, irq_level, line_status); - if (irq == RTC_GSI && line_status && - rtc_irq_check_coalesced(ioapic)) { - ret = 0; /* coalesced */ - goto out; - } - ioapic->irr |= mask; - if ((edge && old_irr != ioapic->irr) || - (!edge && !entry.fields.remote_irr)) - ret = ioapic_service(ioapic, irq, line_status); - else - ret = 0; /* report coalesced interrupt */ - } -out: - trace_kvm_ioapic_set_irq(entry.bits, irq, ret == 0); spin_unlock(&ioapic->lock); return ret; @@ -394,7 +423,7 @@ static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG); ent->fields.remote_irr = 0; - if (!ent->fields.mask && (ioapic->irr & (1 << i))) + if (ioapic->irr & (1 << i)) ioapic_service(ioapic, i, false); } } @@ -595,9 +624,10 @@ int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state) spin_lock(&ioapic->lock); memcpy(ioapic, state, sizeof(struct kvm_ioapic_state)); + ioapic->irr = 0; update_handled_vectors(ioapic); kvm_vcpu_request_scan_ioapic(kvm); - kvm_rtc_eoi_tracking_restore_all(ioapic); + kvm_ioapic_inject_all(ioapic, state->irr); spin_unlock(&ioapic->lock); return 0; } diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index b5ec7fb986f..56baae8c2f5 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -186,12 +186,9 @@ static bool make_all_cpus_request(struct kvm *kvm, unsigned int req) void kvm_flush_remote_tlbs(struct kvm *kvm) { - long dirty_count = kvm->tlbs_dirty; - - smp_mb(); if (make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH)) ++kvm->stat.remote_tlb_flush; - cmpxchg(&kvm->tlbs_dirty, dirty_count, 0); + kvm->tlbs_dirty = false; } EXPORT_SYMBOL_GPL(kvm_flush_remote_tlbs); @@ -1804,7 +1801,7 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me) continue; if (vcpu == me) continue; - if (waitqueue_active(&vcpu->wq)) + if (waitqueue_active(&vcpu->wq) && !kvm_arch_vcpu_runnable(vcpu)) continue; if (!kvm_vcpu_eligible_for_directed_yield(vcpu)) continue; @@ -2284,6 +2281,11 @@ static int kvm_ioctl_create_device(struct kvm *kvm, ops = &kvm_arm_vgic_v2_ops; break; #endif +#ifdef CONFIG_S390 + case KVM_DEV_TYPE_FLIC: + ops = &kvm_flic_ops; + break; +#endif default: return -ENODEV; } |