diff options
Diffstat (limited to 'virt')
-rw-r--r-- | virt/kvm/Kconfig | 3 | ||||
-rw-r--r-- | virt/kvm/async_pf.c | 190 | ||||
-rw-r--r-- | virt/kvm/async_pf.h | 36 | ||||
-rw-r--r-- | virt/kvm/kvm_main.c | 48 |
4 files changed, 266 insertions, 11 deletions
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig index 7f1178f6b83..f63ccb0a598 100644 --- a/virt/kvm/Kconfig +++ b/virt/kvm/Kconfig @@ -15,3 +15,6 @@ config KVM_APIC_ARCHITECTURE config KVM_MMIO bool + +config KVM_ASYNC_PF + bool diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c new file mode 100644 index 00000000000..857d63431cb --- /dev/null +++ b/virt/kvm/async_pf.c @@ -0,0 +1,190 @@ +/* + * kvm asynchronous fault support + * + * Copyright 2010 Red Hat, Inc. + * + * Author: + * Gleb Natapov <gleb@redhat.com> + * + * This file is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#include <linux/kvm_host.h> +#include <linux/slab.h> +#include <linux/module.h> +#include <linux/mmu_context.h> + +#include "async_pf.h" +#include <trace/events/kvm.h> + +static struct kmem_cache *async_pf_cache; + +int kvm_async_pf_init(void) +{ + async_pf_cache = KMEM_CACHE(kvm_async_pf, 0); + + if (!async_pf_cache) + return -ENOMEM; + + return 0; +} + +void kvm_async_pf_deinit(void) +{ + if (async_pf_cache) + kmem_cache_destroy(async_pf_cache); + async_pf_cache = NULL; +} + +void kvm_async_pf_vcpu_init(struct kvm_vcpu *vcpu) +{ + INIT_LIST_HEAD(&vcpu->async_pf.done); + INIT_LIST_HEAD(&vcpu->async_pf.queue); + spin_lock_init(&vcpu->async_pf.lock); +} + +static void async_pf_execute(struct work_struct *work) +{ + struct page *page = NULL; + struct kvm_async_pf *apf = + container_of(work, struct kvm_async_pf, work); + struct mm_struct *mm = apf->mm; + struct kvm_vcpu *vcpu = apf->vcpu; + unsigned long addr = apf->addr; + gva_t gva = apf->gva; + + might_sleep(); + + use_mm(mm); + down_read(&mm->mmap_sem); + get_user_pages(current, mm, addr, 1, 1, 0, &page, NULL); + up_read(&mm->mmap_sem); + unuse_mm(mm); + + spin_lock(&vcpu->async_pf.lock); + list_add_tail(&apf->link, &vcpu->async_pf.done); + apf->page = page; + apf->done = true; + spin_unlock(&vcpu->async_pf.lock); + + /* + * apf may be freed by kvm_check_async_pf_completion() after + * this point + */ + + trace_kvm_async_pf_completed(addr, page, gva); + + if (waitqueue_active(&vcpu->wq)) + wake_up_interruptible(&vcpu->wq); + + mmdrop(mm); + kvm_put_kvm(vcpu->kvm); +} + +void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu) +{ + /* cancel outstanding work queue item */ + while (!list_empty(&vcpu->async_pf.queue)) { + struct kvm_async_pf *work = + list_entry(vcpu->async_pf.queue.next, + typeof(*work), queue); + cancel_work_sync(&work->work); + list_del(&work->queue); + if (!work->done) /* work was canceled */ + kmem_cache_free(async_pf_cache, work); + } + + spin_lock(&vcpu->async_pf.lock); + while (!list_empty(&vcpu->async_pf.done)) { + struct kvm_async_pf *work = + list_entry(vcpu->async_pf.done.next, + typeof(*work), link); + list_del(&work->link); + if (work->page) + put_page(work->page); + kmem_cache_free(async_pf_cache, work); + } + spin_unlock(&vcpu->async_pf.lock); + + vcpu->async_pf.queued = 0; +} + +void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu) +{ + struct kvm_async_pf *work; + + if (list_empty_careful(&vcpu->async_pf.done)) + return; + + spin_lock(&vcpu->async_pf.lock); + work = list_first_entry(&vcpu->async_pf.done, typeof(*work), link); + list_del(&work->link); + spin_unlock(&vcpu->async_pf.lock); + + kvm_arch_async_page_present(vcpu, work); + + list_del(&work->queue); + vcpu->async_pf.queued--; + if (work->page) + put_page(work->page); + kmem_cache_free(async_pf_cache, work); +} + +int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn, + struct kvm_arch_async_pf *arch) +{ + struct kvm_async_pf *work; + + if (vcpu->async_pf.queued >= ASYNC_PF_PER_VCPU) + return 0; + + /* setup delayed work */ + + /* + * do alloc nowait since if we are going to sleep anyway we + * may as well sleep faulting in page + */ + work = kmem_cache_zalloc(async_pf_cache, GFP_NOWAIT); + if (!work) + return 0; + + work->page = NULL; + work->done = false; + work->vcpu = vcpu; + work->gva = gva; + work->addr = gfn_to_hva(vcpu->kvm, gfn); + work->arch = *arch; + work->mm = current->mm; + atomic_inc(&work->mm->mm_count); + kvm_get_kvm(work->vcpu->kvm); + + /* this can't really happen otherwise gfn_to_pfn_async + would succeed */ + if (unlikely(kvm_is_error_hva(work->addr))) + goto retry_sync; + + INIT_WORK(&work->work, async_pf_execute); + if (!schedule_work(&work->work)) + goto retry_sync; + + list_add_tail(&work->queue, &vcpu->async_pf.queue); + vcpu->async_pf.queued++; + kvm_arch_async_page_not_present(vcpu, work); + return 1; +retry_sync: + kvm_put_kvm(work->vcpu->kvm); + mmdrop(work->mm); + kmem_cache_free(async_pf_cache, work); + return 0; +} diff --git a/virt/kvm/async_pf.h b/virt/kvm/async_pf.h new file mode 100644 index 00000000000..e7ef6447cb8 --- /dev/null +++ b/virt/kvm/async_pf.h @@ -0,0 +1,36 @@ +/* + * kvm asynchronous fault support + * + * Copyright 2010 Red Hat, Inc. + * + * Author: + * Gleb Natapov <gleb@redhat.com> + * + * This file is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef __KVM_ASYNC_PF_H__ +#define __KVM_ASYNC_PF_H__ + +#ifdef CONFIG_KVM_ASYNC_PF +int kvm_async_pf_init(void); +void kvm_async_pf_deinit(void); +void kvm_async_pf_vcpu_init(struct kvm_vcpu *vcpu); +#else +#define kvm_async_pf_init() (0) +#define kvm_async_pf_deinit() do{}while(0) +#define kvm_async_pf_vcpu_init(C) do{}while(0) +#endif + +#endif diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 5225052aebc..75fd590c021 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -55,6 +55,7 @@ #include <asm-generic/bitops/le.h> #include "coalesced_mmio.h" +#include "async_pf.h" #define CREATE_TRACE_POINTS #include <trace/events/kvm.h> @@ -186,6 +187,7 @@ int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) vcpu->kvm = kvm; vcpu->vcpu_id = id; init_waitqueue_head(&vcpu->wq); + kvm_async_pf_vcpu_init(vcpu); page = alloc_page(GFP_KERNEL | __GFP_ZERO); if (!page) { @@ -946,15 +948,20 @@ unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn) } EXPORT_SYMBOL_GPL(gfn_to_hva); -static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr, bool atomic) +static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr, bool atomic, + bool *async) { struct page *page[1]; - int npages; + int npages = 0; pfn_t pfn; - if (atomic) + /* we can do it either atomically or asynchronously, not both */ + BUG_ON(atomic && async); + + if (atomic || async) npages = __get_user_pages_fast(addr, 1, 1, page); - else { + + if (unlikely(npages != 1) && !atomic) { might_sleep(); npages = get_user_pages_fast(addr, 1, 1, page); } @@ -976,6 +983,9 @@ static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr, bool atomic) if (vma == NULL || addr < vma->vm_start || !(vma->vm_flags & VM_PFNMAP)) { + if (async && !(vma->vm_flags & VM_PFNMAP) && + (vma->vm_flags & VM_WRITE)) + *async = true; up_read(¤t->mm->mmap_sem); return_fault_page: get_page(fault_page); @@ -993,32 +1003,41 @@ return_fault_page: pfn_t hva_to_pfn_atomic(struct kvm *kvm, unsigned long addr) { - return hva_to_pfn(kvm, addr, true); + return hva_to_pfn(kvm, addr, true, NULL); } EXPORT_SYMBOL_GPL(hva_to_pfn_atomic); -static pfn_t __gfn_to_pfn(struct kvm *kvm, gfn_t gfn, bool atomic) +static pfn_t __gfn_to_pfn(struct kvm *kvm, gfn_t gfn, bool atomic, bool *async) { unsigned long addr; + if (async) + *async = false; + addr = gfn_to_hva(kvm, gfn); if (kvm_is_error_hva(addr)) { get_page(bad_page); return page_to_pfn(bad_page); } - return hva_to_pfn(kvm, addr, atomic); + return hva_to_pfn(kvm, addr, atomic, async); } pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn) { - return __gfn_to_pfn(kvm, gfn, true); + return __gfn_to_pfn(kvm, gfn, true, NULL); } EXPORT_SYMBOL_GPL(gfn_to_pfn_atomic); +pfn_t gfn_to_pfn_async(struct kvm *kvm, gfn_t gfn, bool *async) +{ + return __gfn_to_pfn(kvm, gfn, false, async); +} +EXPORT_SYMBOL_GPL(gfn_to_pfn_async); + pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn) { - return __gfn_to_pfn(kvm, gfn, false); + return __gfn_to_pfn(kvm, gfn, false, NULL); } EXPORT_SYMBOL_GPL(gfn_to_pfn); @@ -1026,7 +1045,7 @@ pfn_t gfn_to_pfn_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn) { unsigned long addr = gfn_to_hva_memslot(slot, gfn); - return hva_to_pfn(kvm, addr, false); + return hva_to_pfn(kvm, addr, false, NULL); } int gfn_to_page_many_atomic(struct kvm *kvm, gfn_t gfn, struct page **pages, @@ -2336,6 +2355,10 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, goto out_free_5; } + r = kvm_async_pf_init(); + if (r) + goto out_free; + kvm_chardev_ops.owner = module; kvm_vm_fops.owner = module; kvm_vcpu_fops.owner = module; @@ -2343,7 +2366,7 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, r = misc_register(&kvm_dev); if (r) { printk(KERN_ERR "kvm: misc device register failed\n"); - goto out_free; + goto out_unreg; } kvm_preempt_ops.sched_in = kvm_sched_in; @@ -2353,6 +2376,8 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, return 0; +out_unreg: + kvm_async_pf_deinit(); out_free: kmem_cache_destroy(kvm_vcpu_cache); out_free_5: @@ -2385,6 +2410,7 @@ void kvm_exit(void) kvm_exit_debug(); misc_deregister(&kvm_dev); kmem_cache_destroy(kvm_vcpu_cache); + kvm_async_pf_deinit(); sysdev_unregister(&kvm_sysdev); sysdev_class_unregister(&kvm_sysdev_class); unregister_reboot_notifier(&kvm_reboot_notifier); |