diff options
Diffstat (limited to 'virt/kvm')
-rw-r--r-- | virt/kvm/assigned-dev.c | 15 | ||||
-rw-r--r-- | virt/kvm/coalesced_mmio.c | 7 | ||||
-rw-r--r-- | virt/kvm/eventfd.c | 1 | ||||
-rw-r--r-- | virt/kvm/ioapic.c | 6 | ||||
-rw-r--r-- | virt/kvm/iommu.c | 129 | ||||
-rw-r--r-- | virt/kvm/irq_comm.c | 15 | ||||
-rw-r--r-- | virt/kvm/kvm_main.c | 169 |
7 files changed, 239 insertions, 103 deletions
diff --git a/virt/kvm/assigned-dev.c b/virt/kvm/assigned-dev.c index 02ff2b19dbe..7c98928b09d 100644 --- a/virt/kvm/assigned-dev.c +++ b/virt/kvm/assigned-dev.c @@ -1,7 +1,7 @@ /* * Kernel-based Virtual Machine - device assignment support * - * Copyright (C) 2006-9 Red Hat, Inc + * Copyright (C) 2010 Red Hat, Inc. and/or its affiliates. * * This work is licensed under the terms of the GNU GPL, version 2. See * the COPYING file in the top-level directory. @@ -58,12 +58,10 @@ static int find_index_from_host_irq(struct kvm_assigned_dev_kernel static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work) { struct kvm_assigned_dev_kernel *assigned_dev; - struct kvm *kvm; int i; assigned_dev = container_of(work, struct kvm_assigned_dev_kernel, interrupt_work); - kvm = assigned_dev->kvm; spin_lock_irq(&assigned_dev->assigned_dev_lock); if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) { @@ -316,12 +314,16 @@ static int assigned_device_enable_host_msix(struct kvm *kvm, kvm_assigned_dev_intr, 0, "kvm_assigned_msix_device", (void *)dev); - /* FIXME: free requested_irq's on failure */ if (r) - return r; + goto err; } return 0; +err: + for (i -= 1; i >= 0; i--) + free_irq(dev->host_msix_entries[i].vector, (void *)dev); + pci_disable_msix(dev->dev); + return r; } #endif @@ -444,9 +446,6 @@ static int kvm_vm_ioctl_assign_irq(struct kvm *kvm, struct kvm_assigned_dev_kernel *match; unsigned long host_irq_type, guest_irq_type; - if (!capable(CAP_SYS_RAWIO)) - return -EPERM; - if (!irqchip_in_kernel(kvm)) return r; diff --git a/virt/kvm/coalesced_mmio.c b/virt/kvm/coalesced_mmio.c index 36e25802964..fc8487564d1 100644 --- a/virt/kvm/coalesced_mmio.c +++ b/virt/kvm/coalesced_mmio.c @@ -2,6 +2,7 @@ * KVM coalesced MMIO * * Copyright (c) 2008 Bull S.A.S. + * Copyright 2009 Red Hat, Inc. and/or its affiliates. * * Author: Laurent Vivier <Laurent.Vivier@bull.net> * @@ -120,8 +121,10 @@ int kvm_coalesced_mmio_init(struct kvm *kvm) return ret; out_free_dev: + kvm->coalesced_mmio_dev = NULL; kfree(dev); out_free_page: + kvm->coalesced_mmio_ring = NULL; __free_page(page); out_err: return ret; @@ -139,7 +142,7 @@ int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm, struct kvm_coalesced_mmio_dev *dev = kvm->coalesced_mmio_dev; if (dev == NULL) - return -EINVAL; + return -ENXIO; mutex_lock(&kvm->slots_lock); if (dev->nb_zones >= KVM_COALESCED_MMIO_ZONE_MAX) { @@ -162,7 +165,7 @@ int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm, struct kvm_coalesced_mmio_zone *z; if (dev == NULL) - return -EINVAL; + return -ENXIO; mutex_lock(&kvm->slots_lock); diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index b81f0ebbaaa..66cf65b510b 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -2,6 +2,7 @@ * kvm eventfd support - use eventfd objects to signal various KVM events * * Copyright 2009 Novell. All Rights Reserved. + * Copyright 2010 Red Hat, Inc. and/or its affiliates. * * Author: * Gregory Haskins <ghaskins@novell.com> diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c index 7c79c1d76d0..0b9df8303dc 100644 --- a/virt/kvm/ioapic.c +++ b/virt/kvm/ioapic.c @@ -1,5 +1,6 @@ /* * Copyright (C) 2001 MandrakeSoft S.A. + * Copyright 2010 Red Hat, Inc. and/or its affiliates. * * MandrakeSoft S.A. * 43, rue d'Aboukir @@ -151,7 +152,7 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) update_handled_vectors(ioapic); mask_after = e->fields.mask; if (mask_before != mask_after) - kvm_fire_mask_notifiers(ioapic->kvm, index, mask_after); + kvm_fire_mask_notifiers(ioapic->kvm, KVM_IRQCHIP_IOAPIC, index, mask_after); if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG && ioapic->irr & (1 << index)) ioapic_service(ioapic, index); @@ -192,12 +193,13 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq) int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level) { - u32 old_irr = ioapic->irr; + u32 old_irr; u32 mask = 1 << irq; union kvm_ioapic_redirect_entry entry; int ret = 1; spin_lock(&ioapic->lock); + old_irr = ioapic->irr; if (irq >= 0 && irq < IOAPIC_NUM_PINS) { entry = ioapic->redirtbl[irq]; level ^= entry.fields.polarity; diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c index 80fd3ad3b2d..62a9caf0563 100644 --- a/virt/kvm/iommu.c +++ b/virt/kvm/iommu.c @@ -16,6 +16,8 @@ * * Copyright (C) 2006-2008 Intel Corporation * Copyright IBM Corporation, 2008 + * Copyright 2010 Red Hat, Inc. and/or its affiliates. + * * Author: Allen M. Kay <allen.m.kay@intel.com> * Author: Weidong Han <weidong.han@intel.com> * Author: Ben-Ami Yassour <benami@il.ibm.com> @@ -32,12 +34,30 @@ static int kvm_iommu_unmap_memslots(struct kvm *kvm); static void kvm_iommu_put_pages(struct kvm *kvm, gfn_t base_gfn, unsigned long npages); +static pfn_t kvm_pin_pages(struct kvm *kvm, struct kvm_memory_slot *slot, + gfn_t gfn, unsigned long size) +{ + gfn_t end_gfn; + pfn_t pfn; + + pfn = gfn_to_pfn_memslot(kvm, slot, gfn); + end_gfn = gfn + (size >> PAGE_SHIFT); + gfn += 1; + + if (is_error_pfn(pfn)) + return pfn; + + while (gfn < end_gfn) + gfn_to_pfn_memslot(kvm, slot, gfn++); + + return pfn; +} + int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot) { - gfn_t gfn = slot->base_gfn; - unsigned long npages = slot->npages; + gfn_t gfn, end_gfn; pfn_t pfn; - int i, r = 0; + int r = 0; struct iommu_domain *domain = kvm->arch.iommu_domain; int flags; @@ -45,46 +65,79 @@ int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot) if (!domain) return 0; + gfn = slot->base_gfn; + end_gfn = gfn + slot->npages; + flags = IOMMU_READ | IOMMU_WRITE; if (kvm->arch.iommu_flags & KVM_IOMMU_CACHE_COHERENCY) flags |= IOMMU_CACHE; - for (i = 0; i < npages; i++) { - /* check if already mapped */ - if (iommu_iova_to_phys(domain, gfn_to_gpa(gfn))) + + while (gfn < end_gfn) { + unsigned long page_size; + + /* Check if already mapped */ + if (iommu_iova_to_phys(domain, gfn_to_gpa(gfn))) { + gfn += 1; + continue; + } + + /* Get the page size we could use to map */ + page_size = kvm_host_page_size(kvm, gfn); + + /* Make sure the page_size does not exceed the memslot */ + while ((gfn + (page_size >> PAGE_SHIFT)) > end_gfn) + page_size >>= 1; + + /* Make sure gfn is aligned to the page size we want to map */ + while ((gfn << PAGE_SHIFT) & (page_size - 1)) + page_size >>= 1; + + /* + * Pin all pages we are about to map in memory. This is + * important because we unmap and unpin in 4kb steps later. + */ + pfn = kvm_pin_pages(kvm, slot, gfn, page_size); + if (is_error_pfn(pfn)) { + gfn += 1; continue; + } - pfn = gfn_to_pfn_memslot(kvm, slot, gfn); - r = iommu_map_range(domain, - gfn_to_gpa(gfn), - pfn_to_hpa(pfn), - PAGE_SIZE, flags); + /* Map into IO address space */ + r = iommu_map(domain, gfn_to_gpa(gfn), pfn_to_hpa(pfn), + get_order(page_size), flags); if (r) { printk(KERN_ERR "kvm_iommu_map_address:" - "iommu failed to map pfn=%lx\n", pfn); + "iommu failed to map pfn=%llx\n", pfn); goto unmap_pages; } - gfn++; + + gfn += page_size >> PAGE_SHIFT; + + } + return 0; unmap_pages: - kvm_iommu_put_pages(kvm, slot->base_gfn, i); + kvm_iommu_put_pages(kvm, slot->base_gfn, gfn); return r; } static int kvm_iommu_map_memslots(struct kvm *kvm) { - int i, r = 0; + int i, idx, r = 0; struct kvm_memslots *slots; - slots = rcu_dereference(kvm->memslots); + idx = srcu_read_lock(&kvm->srcu); + slots = kvm_memslots(kvm); for (i = 0; i < slots->nmemslots; i++) { r = kvm_iommu_map_pages(kvm, &slots->memslots[i]); if (r) break; } + srcu_read_unlock(&kvm->srcu, idx); return r; } @@ -189,40 +242,62 @@ out_unmap: return r; } +static void kvm_unpin_pages(struct kvm *kvm, pfn_t pfn, unsigned long npages) +{ + unsigned long i; + + for (i = 0; i < npages; ++i) + kvm_release_pfn_clean(pfn + i); +} + static void kvm_iommu_put_pages(struct kvm *kvm, gfn_t base_gfn, unsigned long npages) { - gfn_t gfn = base_gfn; + struct iommu_domain *domain; + gfn_t end_gfn, gfn; pfn_t pfn; - struct iommu_domain *domain = kvm->arch.iommu_domain; - unsigned long i; u64 phys; + domain = kvm->arch.iommu_domain; + end_gfn = base_gfn + npages; + gfn = base_gfn; + /* check if iommu exists and in use */ if (!domain) return; - for (i = 0; i < npages; i++) { + while (gfn < end_gfn) { + unsigned long unmap_pages; + int order; + + /* Get physical address */ phys = iommu_iova_to_phys(domain, gfn_to_gpa(gfn)); - pfn = phys >> PAGE_SHIFT; - kvm_release_pfn_clean(pfn); - gfn++; - } + pfn = phys >> PAGE_SHIFT; + + /* Unmap address from IO address space */ + order = iommu_unmap(domain, gfn_to_gpa(gfn), 0); + unmap_pages = 1ULL << order; - iommu_unmap_range(domain, gfn_to_gpa(base_gfn), PAGE_SIZE * npages); + /* Unpin all pages we just unmapped to not leak any memory */ + kvm_unpin_pages(kvm, pfn, unmap_pages); + + gfn += unmap_pages; + } } static int kvm_iommu_unmap_memslots(struct kvm *kvm) { - int i; + int i, idx; struct kvm_memslots *slots; - slots = rcu_dereference(kvm->memslots); + idx = srcu_read_lock(&kvm->srcu); + slots = kvm_memslots(kvm); for (i = 0; i < slots->nmemslots; i++) { kvm_iommu_put_pages(kvm, slots->memslots[i].base_gfn, slots->memslots[i].npages); } + srcu_read_unlock(&kvm->srcu, idx); return 0; } diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c index a0e88809e45..369e38010ad 100644 --- a/virt/kvm/irq_comm.c +++ b/virt/kvm/irq_comm.c @@ -17,6 +17,7 @@ * Authors: * Yaozu (Eddie) Dong <Eddie.dong@intel.com> * + * Copyright 2010 Red Hat, Inc. and/or its affilates. */ #include <linux/kvm_host.h> @@ -99,7 +100,7 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, if (r < 0) r = 0; r += kvm_apic_set_irq(vcpu, irq); - } else { + } else if (kvm_lapic_enabled(vcpu)) { if (!lowest) lowest = vcpu; else if (kvm_apic_compare_prio(vcpu, lowest) < 0) @@ -278,15 +279,19 @@ void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq, synchronize_rcu(); } -void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask) +void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin, + bool mask) { struct kvm_irq_mask_notifier *kimn; struct hlist_node *n; + int gsi; rcu_read_lock(); - hlist_for_each_entry_rcu(kimn, n, &kvm->mask_notifier_list, link) - if (kimn->irq == irq) - kimn->func(kimn, mask); + gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin]; + if (gsi != -1) + hlist_for_each_entry_rcu(kimn, n, &kvm->mask_notifier_list, link) + if (kimn->irq == gsi) + kimn->func(kimn, mask); rcu_read_unlock(); } diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index c82ae249263..b78b794c103 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -5,6 +5,7 @@ * machines without emulation or binary translation. * * Copyright (C) 2006 Qumranet, Inc. + * Copyright 2010 Red Hat, Inc. and/or its affilates. * * Authors: * Avi Kivity <avi@qumranet.com> @@ -92,6 +93,12 @@ static bool kvm_rebooting; static bool largepages_enabled = true; +static struct page *hwpoison_page; +static pfn_t hwpoison_pfn; + +static struct page *fault_page; +static pfn_t fault_pfn; + inline int kvm_is_mmio_pfn(pfn_t pfn) { if (pfn_valid(pfn)) { @@ -141,7 +148,7 @@ static bool make_all_cpus_request(struct kvm *kvm, unsigned int req) raw_spin_lock(&kvm->requests_lock); me = smp_processor_id(); kvm_for_each_vcpu(i, vcpu, kvm) { - if (test_and_set_bit(req, &vcpu->requests)) + if (kvm_make_check_request(req, vcpu)) continue; cpu = vcpu->cpu; if (cpus != NULL && cpu != -1 && cpu != me) @@ -422,9 +429,6 @@ static struct kvm *kvm_create_vm(void) spin_lock(&kvm_lock); list_add(&kvm->vm_list, &vm_list); spin_unlock(&kvm_lock); -#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET - kvm_coalesced_mmio_init(kvm); -#endif out: return kvm; @@ -560,11 +564,16 @@ int __kvm_set_memory_region(struct kvm *kvm, base_gfn = mem->guest_phys_addr >> PAGE_SHIFT; npages = mem->memory_size >> PAGE_SHIFT; + r = -EINVAL; + if (npages > KVM_MEM_MAX_NR_PAGES) + goto out; + if (!npages) mem->flags &= ~KVM_MEM_LOG_DIRTY_PAGES; new = old = *memslot; + new.id = mem->slot; new.base_gfn = base_gfn; new.npages = npages; new.flags = mem->flags; @@ -595,7 +604,7 @@ int __kvm_set_memory_region(struct kvm *kvm, /* Allocate if a slot is being created */ #ifndef CONFIG_S390 if (npages && !new.rmap) { - new.rmap = vmalloc(npages * sizeof(struct page *)); + new.rmap = vmalloc(npages * sizeof(*new.rmap)); if (!new.rmap) goto out_free; @@ -620,9 +629,9 @@ int __kvm_set_memory_region(struct kvm *kvm, if (new.lpage_info[i]) continue; - lpages = 1 + (base_gfn + npages - 1) / - KVM_PAGES_PER_HPAGE(level); - lpages -= base_gfn / KVM_PAGES_PER_HPAGE(level); + lpages = 1 + ((base_gfn + npages - 1) + >> KVM_HPAGE_GFN_SHIFT(level)); + lpages -= base_gfn >> KVM_HPAGE_GFN_SHIFT(level); new.lpage_info[i] = vmalloc(lpages * sizeof(*new.lpage_info[i])); @@ -632,9 +641,9 @@ int __kvm_set_memory_region(struct kvm *kvm, memset(new.lpage_info[i], 0, lpages * sizeof(*new.lpage_info[i])); - if (base_gfn % KVM_PAGES_PER_HPAGE(level)) + if (base_gfn & (KVM_PAGES_PER_HPAGE(level) - 1)) new.lpage_info[i][0].write_count = 1; - if ((base_gfn+npages) % KVM_PAGES_PER_HPAGE(level)) + if ((base_gfn+npages) & (KVM_PAGES_PER_HPAGE(level) - 1)) new.lpage_info[i][lpages - 1].write_count = 1; ugfn = new.userspace_addr >> PAGE_SHIFT; /* @@ -809,16 +818,28 @@ EXPORT_SYMBOL_GPL(kvm_disable_largepages); int is_error_page(struct page *page) { - return page == bad_page; + return page == bad_page || page == hwpoison_page || page == fault_page; } EXPORT_SYMBOL_GPL(is_error_page); int is_error_pfn(pfn_t pfn) { - return pfn == bad_pfn; + return pfn == bad_pfn || pfn == hwpoison_pfn || pfn == fault_pfn; } EXPORT_SYMBOL_GPL(is_error_pfn); +int is_hwpoison_pfn(pfn_t pfn) +{ + return pfn == hwpoison_pfn; +} +EXPORT_SYMBOL_GPL(is_hwpoison_pfn); + +int is_fault_pfn(pfn_t pfn) +{ + return pfn == fault_pfn; +} +EXPORT_SYMBOL_GPL(is_fault_pfn); + static inline unsigned long bad_hva(void) { return PAGE_OFFSET; @@ -830,10 +851,10 @@ int kvm_is_error_hva(unsigned long addr) } EXPORT_SYMBOL_GPL(kvm_is_error_hva); -struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn) +struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn) { int i; - struct kvm_memslots *slots = rcu_dereference(kvm->memslots); + struct kvm_memslots *slots = kvm_memslots(kvm); for (i = 0; i < slots->nmemslots; ++i) { struct kvm_memory_slot *memslot = &slots->memslots[i]; @@ -844,20 +865,13 @@ struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn) } return NULL; } -EXPORT_SYMBOL_GPL(gfn_to_memslot_unaliased); - -struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn) -{ - gfn = unalias_gfn(kvm, gfn); - return gfn_to_memslot_unaliased(kvm, gfn); -} +EXPORT_SYMBOL_GPL(gfn_to_memslot); int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn) { int i; - struct kvm_memslots *slots = rcu_dereference(kvm->memslots); + struct kvm_memslots *slots = kvm_memslots(kvm); - gfn = unalias_gfn_instantiation(kvm, gfn); for (i = 0; i < KVM_MEMORY_SLOTS; ++i) { struct kvm_memory_slot *memslot = &slots->memslots[i]; @@ -899,10 +913,9 @@ out: int memslot_id(struct kvm *kvm, gfn_t gfn) { int i; - struct kvm_memslots *slots = rcu_dereference(kvm->memslots); + struct kvm_memslots *slots = kvm_memslots(kvm); struct kvm_memory_slot *memslot = NULL; - gfn = unalias_gfn(kvm, gfn); for (i = 0; i < slots->nmemslots; ++i) { memslot = &slots->memslots[i]; @@ -914,15 +927,19 @@ int memslot_id(struct kvm *kvm, gfn_t gfn) return memslot - slots->memslots; } +static unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn) +{ + return slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE; +} + unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn) { struct kvm_memory_slot *slot; - gfn = unalias_gfn_instantiation(kvm, gfn); - slot = gfn_to_memslot_unaliased(kvm, gfn); + slot = gfn_to_memslot(kvm, gfn); if (!slot || slot->flags & KVM_MEMSLOT_INVALID) return bad_hva(); - return (slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE); + return gfn_to_hva_memslot(slot, gfn); } EXPORT_SYMBOL_GPL(gfn_to_hva); @@ -940,13 +957,19 @@ static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr) struct vm_area_struct *vma; down_read(¤t->mm->mmap_sem); + if (is_hwpoison_address(addr)) { + up_read(¤t->mm->mmap_sem); + get_page(hwpoison_page); + return page_to_pfn(hwpoison_page); + } + vma = find_vma(current->mm, addr); if (vma == NULL || addr < vma->vm_start || !(vma->vm_flags & VM_PFNMAP)) { up_read(¤t->mm->mmap_sem); - get_page(bad_page); - return page_to_pfn(bad_page); + get_page(fault_page); + return page_to_pfn(fault_page); } pfn = ((addr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; @@ -972,11 +995,6 @@ pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn) } EXPORT_SYMBOL_GPL(gfn_to_pfn); -static unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn) -{ - return (slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE); -} - pfn_t gfn_to_pfn_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn) { @@ -1186,17 +1204,11 @@ void mark_page_dirty(struct kvm *kvm, gfn_t gfn) { struct kvm_memory_slot *memslot; - gfn = unalias_gfn(kvm, gfn); - memslot = gfn_to_memslot_unaliased(kvm, gfn); + memslot = gfn_to_memslot(kvm, gfn); if (memslot && memslot->dirty_bitmap) { unsigned long rel_gfn = gfn - memslot->base_gfn; - unsigned long *p = memslot->dirty_bitmap + - rel_gfn / BITS_PER_LONG; - int offset = rel_gfn % BITS_PER_LONG; - /* avoid RMW */ - if (!generic_test_le_bit(offset, p)) - generic___set_le_bit(offset, p); + generic___set_le_bit(rel_gfn, memslot->dirty_bitmap); } } @@ -1211,7 +1223,7 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu) prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE); if (kvm_arch_vcpu_runnable(vcpu)) { - set_bit(KVM_REQ_UNHALT, &vcpu->requests); + kvm_make_request(KVM_REQ_UNHALT, vcpu); break; } if (kvm_cpu_has_pending_timer(vcpu)) @@ -1382,6 +1394,18 @@ static long kvm_vcpu_ioctl(struct file *filp, if (vcpu->kvm->mm != current->mm) return -EIO; + +#if defined(CONFIG_S390) || defined(CONFIG_PPC) + /* + * Special cases: vcpu ioctls that are asynchronous to vcpu execution, + * so vcpu_load() would break it. + */ + if (ioctl == KVM_S390_INTERRUPT || ioctl == KVM_INTERRUPT) + return kvm_arch_vcpu_ioctl(filp, ioctl, arg); +#endif + + + vcpu_load(vcpu); switch (ioctl) { case KVM_RUN: r = -EINVAL; @@ -1524,7 +1548,7 @@ out_free2: goto out; p = &sigset; } - r = kvm_vcpu_ioctl_set_sigmask(vcpu, &sigset); + r = kvm_vcpu_ioctl_set_sigmask(vcpu, p); break; } case KVM_GET_FPU: { @@ -1559,6 +1583,7 @@ out_free2: r = kvm_arch_vcpu_ioctl(filp, ioctl, arg); } out: + vcpu_put(vcpu); kfree(fpu); kfree(kvm_sregs); return r; @@ -1609,7 +1634,6 @@ static long kvm_vm_ioctl(struct file *filp, r = -EFAULT; if (copy_from_user(&zone, argp, sizeof zone)) goto out; - r = -ENXIO; r = kvm_vm_ioctl_register_coalesced_mmio(kvm, &zone); if (r) goto out; @@ -1621,7 +1645,6 @@ static long kvm_vm_ioctl(struct file *filp, r = -EFAULT; if (copy_from_user(&zone, argp, sizeof zone)) goto out; - r = -ENXIO; r = kvm_vm_ioctl_unregister_coalesced_mmio(kvm, &zone); if (r) goto out; @@ -1755,12 +1778,19 @@ static struct file_operations kvm_vm_fops = { static int kvm_dev_ioctl_create_vm(void) { - int fd; + int fd, r; struct kvm *kvm; kvm = kvm_create_vm(); if (IS_ERR(kvm)) return PTR_ERR(kvm); +#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET + r = kvm_coalesced_mmio_init(kvm); + if (r < 0) { + kvm_put_kvm(kvm); + return r; + } +#endif fd = anon_inode_getfd("kvm-vm", &kvm_vm_fops, kvm, O_RDWR); if (fd < 0) kvm_put_kvm(kvm); @@ -1928,11 +1958,6 @@ static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val, cpu); hardware_disable(NULL); break; - case CPU_UP_CANCELED: - printk(KERN_INFO "kvm: disabling virtualization on CPU%d\n", - cpu); - smp_call_function_single(cpu, hardware_disable, NULL, 1); - break; case CPU_ONLINE: printk(KERN_INFO "kvm: enabling virtualization on CPU%d\n", cpu); @@ -1991,7 +2016,9 @@ int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, int len, const void *val) { int i; - struct kvm_io_bus *bus = rcu_dereference(kvm->buses[bus_idx]); + struct kvm_io_bus *bus; + + bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu); for (i = 0; i < bus->dev_count; i++) if (!kvm_iodevice_write(bus->devs[i], addr, len, val)) return 0; @@ -2003,8 +2030,9 @@ int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, int len, void *val) { int i; - struct kvm_io_bus *bus = rcu_dereference(kvm->buses[bus_idx]); + struct kvm_io_bus *bus; + bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu); for (i = 0; i < bus->dev_count; i++) if (!kvm_iodevice_read(bus->devs[i], addr, len, val)) return 0; @@ -2179,7 +2207,7 @@ static void kvm_sched_out(struct preempt_notifier *pn, kvm_arch_vcpu_put(vcpu); } -int kvm_init(void *opaque, unsigned int vcpu_size, +int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, struct module *module) { int r; @@ -2198,6 +2226,24 @@ int kvm_init(void *opaque, unsigned int vcpu_size, bad_pfn = page_to_pfn(bad_page); + hwpoison_page = alloc_page(GFP_KERNEL | __GFP_ZERO); + + if (hwpoison_page == NULL) { + r = -ENOMEM; + goto out_free_0; + } + + hwpoison_pfn = page_to_pfn(hwpoison_page); + + fault_page = alloc_page(GFP_KERNEL | __GFP_ZERO); + + if (fault_page == NULL) { + r = -ENOMEM; + goto out_free_0; + } + + fault_pfn = page_to_pfn(fault_page); + if (!zalloc_cpumask_var(&cpus_hardware_enabled, GFP_KERNEL)) { r = -ENOMEM; goto out_free_0; @@ -2229,8 +2275,9 @@ int kvm_init(void *opaque, unsigned int vcpu_size, goto out_free_4; /* A kmem cache lets us meet the alignment requirements of fx_save. */ - kvm_vcpu_cache = kmem_cache_create("kvm_vcpu", vcpu_size, - __alignof__(struct kvm_vcpu), + if (!vcpu_align) + vcpu_align = __alignof__(struct kvm_vcpu); + kvm_vcpu_cache = kmem_cache_create("kvm_vcpu", vcpu_size, vcpu_align, 0, NULL); if (!kvm_vcpu_cache) { r = -ENOMEM; @@ -2269,6 +2316,10 @@ out_free_1: out_free_0a: free_cpumask_var(cpus_hardware_enabled); out_free_0: + if (fault_page) + __free_page(fault_page); + if (hwpoison_page) + __free_page(hwpoison_page); __free_page(bad_page); out: kvm_arch_exit(); @@ -2279,7 +2330,6 @@ EXPORT_SYMBOL_GPL(kvm_init); void kvm_exit(void) { - tracepoint_synchronize_unregister(); kvm_exit_debug(); misc_deregister(&kvm_dev); kmem_cache_destroy(kvm_vcpu_cache); @@ -2291,6 +2341,7 @@ void kvm_exit(void) kvm_arch_hardware_unsetup(); kvm_arch_exit(); free_cpumask_var(cpus_hardware_enabled); + __free_page(hwpoison_page); __free_page(bad_page); } EXPORT_SYMBOL_GPL(kvm_exit); |