summaryrefslogtreecommitdiffstats
path: root/virt/kvm
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-04-02 14:50:10 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2014-04-02 14:50:10 -0700
commit7cbb39d4d4d530dff12f2ff06ed6c85c504ba91a (patch)
tree82f721591d739eca99817def86ca5b6ebd682fe6 /virt/kvm
parent64056a94256e7a476de67fbe581dfe5515c56288 (diff)
parent7227fc0666606b0df2c0d2966a7f4859b01bdf74 (diff)
Merge tag 'kvm-3.15-1' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull kvm updates from Paolo Bonzini: "PPC and ARM do not have much going on this time. Most of the cool stuff, instead, is in s390 and (after a few releases) x86. ARM has some caching fixes and PPC has transactional memory support in guests. MIPS has some fixes, with more probably coming in 3.16 as QEMU will soon get support for MIPS KVM. For x86 there are optimizations for debug registers, which trigger on some Windows games, and other important fixes for Windows guests. We now expose to the guest Broadwell instruction set extensions and also Intel MPX. There's also a fix/workaround for OS X guests, nested virtualization features (preemption timer), and a couple kvmclock refinements. For s390, the main news is asynchronous page faults, together with improvements to IRQs (floating irqs and adapter irqs) that speed up virtio devices" * tag 'kvm-3.15-1' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (96 commits) KVM: PPC: Book3S HV: Save/restore host PMU registers that are new in POWER8 KVM: PPC: Book3S HV: Fix decrementer timeouts with non-zero TB offset KVM: PPC: Book3S HV: Don't use kvm_memslots() in real mode KVM: PPC: Book3S HV: Return ENODEV error rather than EIO KVM: PPC: Book3S: Trim top 4 bits of physical address in RTAS code KVM: PPC: Book3S HV: Add get/set_one_reg for new TM state KVM: PPC: Book3S HV: Add transactional memory support KVM: Specify byte order for KVM_EXIT_MMIO KVM: vmx: fix MPX detection KVM: PPC: Book3S HV: Fix KVM hang with CONFIG_KVM_XICS=n KVM: PPC: Book3S: Introduce hypervisor call H_GET_TCE KVM: PPC: Book3S HV: Fix incorrect userspace exit on ioeventfd write KVM: s390: clear local interrupts at cpu initial reset KVM: s390: Fix possible memory leak in SIGP functions KVM: s390: fix calculation of idle_mask array size KVM: s390: randomize sca address KVM: ioapic: reinject pending interrupts on KVM_SET_IRQCHIP KVM: Bump KVM_MAX_IRQ_ROUTES for s390 KVM: s390: irq routing for adapter interrupts. KVM: s390: adapter interrupt sources ...
Diffstat (limited to 'virt/kvm')
-rw-r--r--virt/kvm/Kconfig4
-rw-r--r--virt/kvm/async_pf.c27
-rw-r--r--virt/kvm/eventfd.c8
-rw-r--r--virt/kvm/ioapic.c108
-rw-r--r--virt/kvm/kvm_main.c12
5 files changed, 108 insertions, 51 deletions
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
index fbe1a48bd62..13f2d19793e 100644
--- a/virt/kvm/Kconfig
+++ b/virt/kvm/Kconfig
@@ -22,6 +22,10 @@ config KVM_MMIO
config KVM_ASYNC_PF
bool
+# Toggle to switch between direct notification and batch job
+config KVM_ASYNC_PF_SYNC
+ bool
+
config HAVE_KVM_MSI
bool
diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c
index 8631d9c1432..10df100c451 100644
--- a/virt/kvm/async_pf.c
+++ b/virt/kvm/async_pf.c
@@ -28,6 +28,21 @@
#include "async_pf.h"
#include <trace/events/kvm.h>
+static inline void kvm_async_page_present_sync(struct kvm_vcpu *vcpu,
+ struct kvm_async_pf *work)
+{
+#ifdef CONFIG_KVM_ASYNC_PF_SYNC
+ kvm_arch_async_page_present(vcpu, work);
+#endif
+}
+static inline void kvm_async_page_present_async(struct kvm_vcpu *vcpu,
+ struct kvm_async_pf *work)
+{
+#ifndef CONFIG_KVM_ASYNC_PF_SYNC
+ kvm_arch_async_page_present(vcpu, work);
+#endif
+}
+
static struct kmem_cache *async_pf_cache;
int kvm_async_pf_init(void)
@@ -69,6 +84,7 @@ static void async_pf_execute(struct work_struct *work)
down_read(&mm->mmap_sem);
get_user_pages(current, mm, addr, 1, 1, 0, NULL, NULL);
up_read(&mm->mmap_sem);
+ kvm_async_page_present_sync(vcpu, apf);
unuse_mm(mm);
spin_lock(&vcpu->async_pf.lock);
@@ -97,11 +113,16 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu)
list_entry(vcpu->async_pf.queue.next,
typeof(*work), queue);
list_del(&work->queue);
+
+#ifdef CONFIG_KVM_ASYNC_PF_SYNC
+ flush_work(&work->work);
+#else
if (cancel_work_sync(&work->work)) {
mmdrop(work->mm);
kvm_put_kvm(vcpu->kvm); /* == work->vcpu->kvm */
kmem_cache_free(async_pf_cache, work);
}
+#endif
}
spin_lock(&vcpu->async_pf.lock);
@@ -130,7 +151,7 @@ void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu)
spin_unlock(&vcpu->async_pf.lock);
kvm_arch_async_page_ready(vcpu, work);
- kvm_arch_async_page_present(vcpu, work);
+ kvm_async_page_present_async(vcpu, work);
list_del(&work->queue);
vcpu->async_pf.queued--;
@@ -138,7 +159,7 @@ void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu)
}
}
-int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn,
+int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, unsigned long hva,
struct kvm_arch_async_pf *arch)
{
struct kvm_async_pf *work;
@@ -159,7 +180,7 @@ int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn,
work->wakeup_all = false;
work->vcpu = vcpu;
work->gva = gva;
- work->addr = gfn_to_hva(vcpu->kvm, gfn);
+ work->addr = hva;
work->arch = *arch;
work->mm = current->mm;
atomic_inc(&work->mm->mm_count);
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index abe4d6043b3..29c2a04e036 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -391,19 +391,19 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
lockdep_is_held(&kvm->irqfds.lock));
irqfd_update(kvm, irqfd, irq_rt);
- events = f.file->f_op->poll(f.file, &irqfd->pt);
-
list_add_tail(&irqfd->list, &kvm->irqfds.items);
+ spin_unlock_irq(&kvm->irqfds.lock);
+
/*
* Check if there was an event already pending on the eventfd
* before we registered, and trigger it as if we didn't miss it.
*/
+ events = f.file->f_op->poll(f.file, &irqfd->pt);
+
if (events & POLLIN)
schedule_work(&irqfd->inject);
- spin_unlock_irq(&kvm->irqfds.lock);
-
/*
* do not drop the file until the irqfd is fully initialized, otherwise
* we might race against the POLLHUP
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index ce9ed99ad7d..d4b601547f1 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -50,7 +50,7 @@
#else
#define ioapic_debug(fmt, arg...)
#endif
-static int ioapic_deliver(struct kvm_ioapic *vioapic, int irq,
+static int ioapic_service(struct kvm_ioapic *vioapic, int irq,
bool line_status);
static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic,
@@ -163,23 +163,67 @@ static bool rtc_irq_check_coalesced(struct kvm_ioapic *ioapic)
return false;
}
-static int ioapic_service(struct kvm_ioapic *ioapic, unsigned int idx,
- bool line_status)
+static int ioapic_set_irq(struct kvm_ioapic *ioapic, unsigned int irq,
+ int irq_level, bool line_status)
{
- union kvm_ioapic_redirect_entry *pent;
- int injected = -1;
+ union kvm_ioapic_redirect_entry entry;
+ u32 mask = 1 << irq;
+ u32 old_irr;
+ int edge, ret;
- pent = &ioapic->redirtbl[idx];
+ entry = ioapic->redirtbl[irq];
+ edge = (entry.fields.trig_mode == IOAPIC_EDGE_TRIG);
- if (!pent->fields.mask) {
- injected = ioapic_deliver(ioapic, idx, line_status);
- if (injected && pent->fields.trig_mode == IOAPIC_LEVEL_TRIG)
- pent->fields.remote_irr = 1;
+ if (!irq_level) {
+ ioapic->irr &= ~mask;
+ ret = 1;
+ goto out;
+ }
+
+ /*
+ * Return 0 for coalesced interrupts; for edge-triggered interrupts,
+ * this only happens if a previous edge has not been delivered due
+ * do masking. For level interrupts, the remote_irr field tells
+ * us if the interrupt is waiting for an EOI.
+ *
+ * RTC is special: it is edge-triggered, but userspace likes to know
+ * if it has been already ack-ed via EOI because coalesced RTC
+ * interrupts lead to time drift in Windows guests. So we track
+ * EOI manually for the RTC interrupt.
+ */
+ if (irq == RTC_GSI && line_status &&
+ rtc_irq_check_coalesced(ioapic)) {
+ ret = 0;
+ goto out;
}
- return injected;
+ old_irr = ioapic->irr;
+ ioapic->irr |= mask;
+ if ((edge && old_irr == ioapic->irr) ||
+ (!edge && entry.fields.remote_irr)) {
+ ret = 0;
+ goto out;
+ }
+
+ ret = ioapic_service(ioapic, irq, line_status);
+
+out:
+ trace_kvm_ioapic_set_irq(entry.bits, irq, ret == 0);
+ return ret;
+}
+
+static void kvm_ioapic_inject_all(struct kvm_ioapic *ioapic, unsigned long irr)
+{
+ u32 idx;
+
+ rtc_irq_eoi_tracking_reset(ioapic);
+ for_each_set_bit(idx, &irr, IOAPIC_NUM_PINS)
+ ioapic_set_irq(ioapic, idx, 1, true);
+
+ kvm_rtc_eoi_tracking_restore_all(ioapic);
}
+
static void update_handled_vectors(struct kvm_ioapic *ioapic)
{
DECLARE_BITMAP(handled_vectors, 256);
@@ -282,12 +326,15 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
}
}
-static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq, bool line_status)
+static int ioapic_service(struct kvm_ioapic *ioapic, int irq, bool line_status)
{
union kvm_ioapic_redirect_entry *entry = &ioapic->redirtbl[irq];
struct kvm_lapic_irq irqe;
int ret;
+ if (entry->fields.mask)
+ return -1;
+
ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x "
"vector=%x trig_mode=%x\n",
entry->fields.dest_id, entry->fields.dest_mode,
@@ -302,6 +349,9 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq, bool line_status)
irqe.level = 1;
irqe.shorthand = 0;
+ if (irqe.trig_mode == IOAPIC_EDGE_TRIG)
+ ioapic->irr &= ~(1 << irq);
+
if (irq == RTC_GSI && line_status) {
BUG_ON(ioapic->rtc_status.pending_eoi != 0);
ret = kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe,
@@ -310,45 +360,24 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq, bool line_status)
} else
ret = kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe, NULL);
+ if (ret && irqe.trig_mode == IOAPIC_LEVEL_TRIG)
+ entry->fields.remote_irr = 1;
+
return ret;
}
int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id,
int level, bool line_status)
{
- u32 old_irr;
- u32 mask = 1 << irq;
- union kvm_ioapic_redirect_entry entry;
int ret, irq_level;
BUG_ON(irq < 0 || irq >= IOAPIC_NUM_PINS);
spin_lock(&ioapic->lock);
- old_irr = ioapic->irr;
irq_level = __kvm_irq_line_state(&ioapic->irq_states[irq],
irq_source_id, level);
- entry = ioapic->redirtbl[irq];
- irq_level ^= entry.fields.polarity;
- if (!irq_level) {
- ioapic->irr &= ~mask;
- ret = 1;
- } else {
- int edge = (entry.fields.trig_mode == IOAPIC_EDGE_TRIG);
+ ret = ioapic_set_irq(ioapic, irq, irq_level, line_status);
- if (irq == RTC_GSI && line_status &&
- rtc_irq_check_coalesced(ioapic)) {
- ret = 0; /* coalesced */
- goto out;
- }
- ioapic->irr |= mask;
- if ((edge && old_irr != ioapic->irr) ||
- (!edge && !entry.fields.remote_irr))
- ret = ioapic_service(ioapic, irq, line_status);
- else
- ret = 0; /* report coalesced interrupt */
- }
-out:
- trace_kvm_ioapic_set_irq(entry.bits, irq, ret == 0);
spin_unlock(&ioapic->lock);
return ret;
@@ -394,7 +423,7 @@ static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu,
ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG);
ent->fields.remote_irr = 0;
- if (!ent->fields.mask && (ioapic->irr & (1 << i)))
+ if (ioapic->irr & (1 << i))
ioapic_service(ioapic, i, false);
}
}
@@ -595,9 +624,10 @@ int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state)
spin_lock(&ioapic->lock);
memcpy(ioapic, state, sizeof(struct kvm_ioapic_state));
+ ioapic->irr = 0;
update_handled_vectors(ioapic);
kvm_vcpu_request_scan_ioapic(kvm);
- kvm_rtc_eoi_tracking_restore_all(ioapic);
+ kvm_ioapic_inject_all(ioapic, state->irr);
spin_unlock(&ioapic->lock);
return 0;
}
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index b5ec7fb986f..56baae8c2f5 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -186,12 +186,9 @@ static bool make_all_cpus_request(struct kvm *kvm, unsigned int req)
void kvm_flush_remote_tlbs(struct kvm *kvm)
{
- long dirty_count = kvm->tlbs_dirty;
-
- smp_mb();
if (make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH))
++kvm->stat.remote_tlb_flush;
- cmpxchg(&kvm->tlbs_dirty, dirty_count, 0);
+ kvm->tlbs_dirty = false;
}
EXPORT_SYMBOL_GPL(kvm_flush_remote_tlbs);
@@ -1804,7 +1801,7 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me)
continue;
if (vcpu == me)
continue;
- if (waitqueue_active(&vcpu->wq))
+ if (waitqueue_active(&vcpu->wq) && !kvm_arch_vcpu_runnable(vcpu))
continue;
if (!kvm_vcpu_eligible_for_directed_yield(vcpu))
continue;
@@ -2284,6 +2281,11 @@ static int kvm_ioctl_create_device(struct kvm *kvm,
ops = &kvm_arm_vgic_v2_ops;
break;
#endif
+#ifdef CONFIG_S390
+ case KVM_DEV_TYPE_FLIC:
+ ops = &kvm_flic_ops;
+ break;
+#endif
default:
return -ENODEV;
}