From e32c8f2c0720fb21c6f4a5f6ccbebdadc878f707 Mon Sep 17 00:00:00 2001 From: Christian Ehrhardt Date: Mon, 14 Jul 2008 14:00:00 +0200 Subject: KVM: kvmtrace: Remove use of bit fields in kvm trace structure This patch fixes kvmtrace use on big endian systems. When using bit fields the compiler will lay data out in the wrong order expected when laid down into a file. This fixes it by using one variable instead of using bit fields. Signed-off-by: Jerone Young Signed-off-by: Christian Ehrhardt Signed-off-by: Avi Kivity --- virt/kvm/kvm_trace.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'virt/kvm') diff --git a/virt/kvm/kvm_trace.c b/virt/kvm/kvm_trace.c index 58141f31ea8..9acb78b3cc9 100644 --- a/virt/kvm/kvm_trace.c +++ b/virt/kvm/kvm_trace.c @@ -54,12 +54,13 @@ static void kvm_add_trace(void *probe_private, void *call_data, struct kvm_trace *kt = kvm_trace; struct kvm_trace_rec rec; struct kvm_vcpu *vcpu; - int i, extra, size; + int i, size; + u32 extra; if (unlikely(kt->trace_state != KVM_TRACE_STATE_RUNNING)) return; - rec.event = va_arg(*args, u32); + rec.rec_val = TRACE_REC_EVENT_ID(va_arg(*args, u32)); vcpu = va_arg(*args, struct kvm_vcpu *); rec.pid = current->tgid; rec.vcpu_id = vcpu->vcpu_id; @@ -67,21 +68,21 @@ static void kvm_add_trace(void *probe_private, void *call_data, extra = va_arg(*args, u32); WARN_ON(!(extra <= KVM_TRC_EXTRA_MAX)); extra = min_t(u32, extra, KVM_TRC_EXTRA_MAX); - rec.extra_u32 = extra; - rec.cycle_in = p->cycle_in; + rec.rec_val |= TRACE_REC_TCS(p->cycle_in) + | TRACE_REC_NUM_DATA_ARGS(extra); - if (rec.cycle_in) { + if (p->cycle_in) { rec.u.cycle.cycle_u64 = get_cycles(); - for (i = 0; i < rec.extra_u32; i++) + for (i = 0; i < extra; i++) rec.u.cycle.extra_u32[i] = va_arg(*args, u32); } else { - for (i = 0; i < rec.extra_u32; i++) + for (i = 0; i < extra; i++) rec.u.nocycle.extra_u32[i] = va_arg(*args, u32); } - size = calc_rec_size(rec.cycle_in, rec.extra_u32 * sizeof(u32)); + size = calc_rec_size(p->cycle_in, extra * sizeof(u32)); relay_write(kt->rchan, &rec, size); } -- cgit v1.2.3-70-g09d2 From 3f7f95c65ef6a89472a28da1b9436eaeee288831 Mon Sep 17 00:00:00 2001 From: Christian Ehrhardt Date: Mon, 14 Jul 2008 14:00:01 +0200 Subject: KVM: kvmtrace: replace get_cycles with ktime_get v3 The current kvmtrace code uses get_cycles() while the interpretation would be easier using using nanoseconds. ktime_get() should give at least the same accuracy as get_cycles on all architectures (even better on 32bit archs) but at a better unit (e.g. comparable between hosts with different frequencies. [avi: avoid ktime_t in public header] Signed-off-by: Christian Ehrhardt Acked-by: Christian Borntraeger Signed-off-by: Avi Kivity --- include/linux/kvm.h | 6 +++--- virt/kvm/kvm_trace.c | 19 ++++++++++--------- 2 files changed, 13 insertions(+), 12 deletions(-) (limited to 'virt/kvm') diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 8a16b083df2..5d08f11bb27 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -322,12 +322,12 @@ struct kvm_trace_rec { __u32 vcpu_id; union { struct { - __u64 cycle_u64; + __u64 timestamp; __u32 extra_u32[KVM_TRC_EXTRA_MAX]; - } __attribute__((packed)) cycle; + } __attribute__((packed)) timestamp; struct { __u32 extra_u32[KVM_TRC_EXTRA_MAX]; - } nocycle; + } notimestamp; } u; }; diff --git a/virt/kvm/kvm_trace.c b/virt/kvm/kvm_trace.c index 9acb78b3cc9..41dcc845f78 100644 --- a/virt/kvm/kvm_trace.c +++ b/virt/kvm/kvm_trace.c @@ -17,6 +17,7 @@ #include #include #include +#include #include @@ -35,16 +36,16 @@ static struct kvm_trace *kvm_trace; struct kvm_trace_probe { const char *name; const char *format; - u32 cycle_in; + u32 timestamp_in; marker_probe_func *probe_func; }; -static inline int calc_rec_size(int cycle, int extra) +static inline int calc_rec_size(int timestamp, int extra) { int rec_size = KVM_TRC_HEAD_SIZE; rec_size += extra; - return cycle ? rec_size += KVM_TRC_CYCLE_SIZE : rec_size; + return timestamp ? rec_size += KVM_TRC_CYCLE_SIZE : rec_size; } static void kvm_add_trace(void *probe_private, void *call_data, @@ -69,20 +70,20 @@ static void kvm_add_trace(void *probe_private, void *call_data, WARN_ON(!(extra <= KVM_TRC_EXTRA_MAX)); extra = min_t(u32, extra, KVM_TRC_EXTRA_MAX); - rec.rec_val |= TRACE_REC_TCS(p->cycle_in) + rec.rec_val |= TRACE_REC_TCS(p->timestamp_in) | TRACE_REC_NUM_DATA_ARGS(extra); - if (p->cycle_in) { - rec.u.cycle.cycle_u64 = get_cycles(); + if (p->timestamp_in) { + rec.u.timestamp.timestamp = ktime_to_ns(ktime_get()); for (i = 0; i < extra; i++) - rec.u.cycle.extra_u32[i] = va_arg(*args, u32); + rec.u.timestamp.extra_u32[i] = va_arg(*args, u32); } else { for (i = 0; i < extra; i++) - rec.u.nocycle.extra_u32[i] = va_arg(*args, u32); + rec.u.notimestamp.extra_u32[i] = va_arg(*args, u32); } - size = calc_rec_size(p->cycle_in, extra * sizeof(u32)); + size = calc_rec_size(p->timestamp_in, extra * sizeof(u32)); relay_write(kt->rchan, &rec, size); } -- cgit v1.2.3-70-g09d2 From f52447261bc8c21dfd4635196e32d2da1352f589 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Sat, 26 Jul 2008 17:01:00 -0300 Subject: KVM: irq ack notification Based on a patch from: Ben-Ami Yassour which was based on a patch from: Amit Shah Notify IRQ acking on PIC/APIC emulation. The previous patch missed two things: - Edge triggered interrupts on IOAPIC - PIC reset with IRR/ISR set should be equivalent to ack (LAPIC probably needs something similar). Signed-off-by: Marcelo Tosatti CC: Amit Shah CC: Ben-Ami Yassour Signed-off-by: Avi Kivity --- arch/x86/kvm/i8259.c | 12 +++++++++++- arch/x86/kvm/irq.c | 2 +- arch/x86/kvm/irq.h | 3 ++- arch/x86/kvm/lapic.c | 7 +++++-- virt/kvm/ioapic.c | 20 +++++++++++++------- virt/kvm/ioapic.h | 3 ++- 6 files changed, 34 insertions(+), 13 deletions(-) (limited to 'virt/kvm') diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c index 55e179ad98e..de704995b81 100644 --- a/arch/x86/kvm/i8259.c +++ b/arch/x86/kvm/i8259.c @@ -159,9 +159,10 @@ static inline void pic_intack(struct kvm_kpic_state *s, int irq) s->irr &= ~(1 << irq); } -int kvm_pic_read_irq(struct kvm_pic *s) +int kvm_pic_read_irq(struct kvm *kvm) { int irq, irq2, intno; + struct kvm_pic *s = pic_irqchip(kvm); irq = pic_get_irq(&s->pics[0]); if (irq >= 0) { @@ -187,12 +188,21 @@ int kvm_pic_read_irq(struct kvm_pic *s) intno = s->pics[0].irq_base + irq; } pic_update_irq(s); + kvm_notify_acked_irq(kvm, irq); return intno; } void kvm_pic_reset(struct kvm_kpic_state *s) { + int irq; + struct kvm *kvm = s->pics_state->irq_request_opaque; + + for (irq = 0; irq < PIC_NUM_PINS; irq++) { + if (!(s->imr & (1 << irq)) && (s->irr & (1 << irq) || + s->isr & (1 << irq))) + kvm_notify_acked_irq(kvm, irq); + } s->last_irr = 0; s->irr = 0; s->imr = 0; diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c index 90911958d85..3c508afaa28 100644 --- a/arch/x86/kvm/irq.c +++ b/arch/x86/kvm/irq.c @@ -72,7 +72,7 @@ int kvm_cpu_get_interrupt(struct kvm_vcpu *v) if (kvm_apic_accept_pic_intr(v)) { s = pic_irqchip(v->kvm); s->output = 0; /* PIC */ - vector = kvm_pic_read_irq(s); + vector = kvm_pic_read_irq(v->kvm); } } return vector; diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h index 95fe718e3ab..479a3d2d561 100644 --- a/arch/x86/kvm/irq.h +++ b/arch/x86/kvm/irq.h @@ -63,11 +63,12 @@ struct kvm_pic { void *irq_request_opaque; int output; /* intr from master PIC */ struct kvm_io_device dev; + void (*ack_notifier)(void *opaque, int irq); }; struct kvm_pic *kvm_create_pic(struct kvm *kvm); void kvm_pic_set_irq(void *opaque, int irq, int level); -int kvm_pic_read_irq(struct kvm_pic *s); +int kvm_pic_read_irq(struct kvm *kvm); void kvm_pic_update_irq(struct kvm_pic *s); static inline struct kvm_pic *pic_irqchip(struct kvm *kvm) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 9fde0ac2426..be94f93a73f 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -439,7 +439,7 @@ struct kvm_vcpu *kvm_get_lowest_prio_vcpu(struct kvm *kvm, u8 vector, static void apic_set_eoi(struct kvm_lapic *apic) { int vector = apic_find_highest_isr(apic); - + int trigger_mode; /* * Not every write EOI will has corresponding ISR, * one example is when Kernel check timer on setup_IO_APIC @@ -451,7 +451,10 @@ static void apic_set_eoi(struct kvm_lapic *apic) apic_update_ppr(apic); if (apic_test_and_clear_vector(vector, apic->regs + APIC_TMR)) - kvm_ioapic_update_eoi(apic->vcpu->kvm, vector); + trigger_mode = IOAPIC_LEVEL_TRIG; + else + trigger_mode = IOAPIC_EDGE_TRIG; + kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode); } static void apic_send_ipi(struct kvm_lapic *apic) diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c index c0d22870ee9..515cd7ce761 100644 --- a/virt/kvm/ioapic.c +++ b/virt/kvm/ioapic.c @@ -39,6 +39,7 @@ #include "ioapic.h" #include "lapic.h" +#include "irq.h" #if 0 #define ioapic_debug(fmt,arg...) printk(KERN_WARNING fmt,##arg) @@ -285,26 +286,31 @@ void kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level) } } -static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int gsi) +static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int gsi, + int trigger_mode) { union ioapic_redir_entry *ent; ent = &ioapic->redirtbl[gsi]; - ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG); - ent->fields.remote_irr = 0; - if (!ent->fields.mask && (ioapic->irr & (1 << gsi))) - ioapic_service(ioapic, gsi); + kvm_notify_acked_irq(ioapic->kvm, gsi); + + if (trigger_mode == IOAPIC_LEVEL_TRIG) { + ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG); + ent->fields.remote_irr = 0; + if (!ent->fields.mask && (ioapic->irr & (1 << gsi))) + ioapic_service(ioapic, gsi); + } } -void kvm_ioapic_update_eoi(struct kvm *kvm, int vector) +void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode) { struct kvm_ioapic *ioapic = kvm->arch.vioapic; int i; for (i = 0; i < IOAPIC_NUM_PINS; i++) if (ioapic->redirtbl[i].fields.vector == vector) - __kvm_ioapic_update_eoi(ioapic, i); + __kvm_ioapic_update_eoi(ioapic, i, trigger_mode); } static int ioapic_in_range(struct kvm_io_device *this, gpa_t addr, diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h index 7f16675fe78..b52732f493c 100644 --- a/virt/kvm/ioapic.h +++ b/virt/kvm/ioapic.h @@ -58,6 +58,7 @@ struct kvm_ioapic { } redirtbl[IOAPIC_NUM_PINS]; struct kvm_io_device dev; struct kvm *kvm; + void (*ack_notifier)(void *opaque, int irq); }; #ifdef DEBUG @@ -87,7 +88,7 @@ static inline int irqchip_in_kernel(struct kvm *kvm) struct kvm_vcpu *kvm_get_lowest_prio_vcpu(struct kvm *kvm, u8 vector, unsigned long bitmap); -void kvm_ioapic_update_eoi(struct kvm *kvm, int vector); +void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode); int kvm_ioapic_init(struct kvm *kvm); void kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level); void kvm_ioapic_reset(struct kvm_ioapic *ioapic); -- cgit v1.2.3-70-g09d2 From cbff90a7caa49507d399c9a55ba4a411e840bfb4 Mon Sep 17 00:00:00 2001 From: Ben-Ami Yassour Date: Mon, 28 Jul 2008 19:26:24 +0300 Subject: KVM: direct mmio pfn check Userspace may specify memory slots that are backed by mmio pages rather than normal RAM. In some cases it is not enough to identify these mmio pages by pfn_valid(). This patch adds checking the PageReserved as well. Signed-off-by: Ben-Ami Yassour Signed-off-by: Muli Ben-Yehuda Signed-off-by: Avi Kivity --- virt/kvm/kvm_main.c | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) (limited to 'virt/kvm') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 7dd9b0b85e4..5eb96c7c8d7 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -76,6 +76,14 @@ static inline int valid_vcpu(int n) return likely(n >= 0 && n < KVM_MAX_VCPUS); } +static inline int is_mmio_pfn(pfn_t pfn) +{ + if (pfn_valid(pfn)) + return PageReserved(pfn_to_page(pfn)); + + return true; +} + /* * Switches to specified vcpu, until a matching vcpu_put() */ @@ -740,7 +748,7 @@ pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn) } pfn = ((addr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; - BUG_ON(pfn_valid(pfn)); + BUG_ON(!is_mmio_pfn(pfn)); } else pfn = page_to_pfn(page[0]); @@ -754,10 +762,10 @@ struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn) pfn_t pfn; pfn = gfn_to_pfn(kvm, gfn); - if (pfn_valid(pfn)) + if (!is_mmio_pfn(pfn)) return pfn_to_page(pfn); - WARN_ON(!pfn_valid(pfn)); + WARN_ON(is_mmio_pfn(pfn)); get_page(bad_page); return bad_page; @@ -773,7 +781,7 @@ EXPORT_SYMBOL_GPL(kvm_release_page_clean); void kvm_release_pfn_clean(pfn_t pfn) { - if (pfn_valid(pfn)) + if (!is_mmio_pfn(pfn)) put_page(pfn_to_page(pfn)); } EXPORT_SYMBOL_GPL(kvm_release_pfn_clean); @@ -799,7 +807,7 @@ EXPORT_SYMBOL_GPL(kvm_set_page_dirty); void kvm_set_pfn_dirty(pfn_t pfn) { - if (pfn_valid(pfn)) { + if (!is_mmio_pfn(pfn)) { struct page *page = pfn_to_page(pfn); if (!PageReserved(page)) SetPageDirty(page); @@ -809,14 +817,14 @@ EXPORT_SYMBOL_GPL(kvm_set_pfn_dirty); void kvm_set_pfn_accessed(pfn_t pfn) { - if (pfn_valid(pfn)) + if (!is_mmio_pfn(pfn)) mark_page_accessed(pfn_to_page(pfn)); } EXPORT_SYMBOL_GPL(kvm_set_pfn_accessed); void kvm_get_pfn(pfn_t pfn) { - if (pfn_valid(pfn)) + if (!is_mmio_pfn(pfn)) get_page(pfn_to_page(pfn)); } EXPORT_SYMBOL_GPL(kvm_get_pfn); -- cgit v1.2.3-70-g09d2 From fa3795a7308df099f0f2c9e5ca2c20a5ff65bdc4 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Mon, 11 Aug 2008 10:01:46 -0700 Subject: KVM: Reduce stack usage in kvm_vcpu_ioctl() Signed-off-by: Dave Hansen Signed-off-by: Avi Kivity --- virt/kvm/kvm_main.c | 46 ++++++++++++++++++++++++++++------------------ 1 file changed, 28 insertions(+), 18 deletions(-) (limited to 'virt/kvm') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 5eb96c7c8d7..0309571fcb2 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1126,6 +1126,8 @@ static long kvm_vcpu_ioctl(struct file *filp, struct kvm_vcpu *vcpu = filp->private_data; void __user *argp = (void __user *)arg; int r; + struct kvm_fpu *fpu = NULL; + struct kvm_sregs *kvm_sregs = NULL; if (vcpu->kvm->mm != current->mm) return -EIO; @@ -1173,25 +1175,28 @@ out_free2: break; } case KVM_GET_SREGS: { - struct kvm_sregs kvm_sregs; - - memset(&kvm_sregs, 0, sizeof kvm_sregs); - r = kvm_arch_vcpu_ioctl_get_sregs(vcpu, &kvm_sregs); + kvm_sregs = kzalloc(sizeof(struct kvm_sregs), GFP_KERNEL); + r = -ENOMEM; + if (!kvm_sregs) + goto out; + r = kvm_arch_vcpu_ioctl_get_sregs(vcpu, kvm_sregs); if (r) goto out; r = -EFAULT; - if (copy_to_user(argp, &kvm_sregs, sizeof kvm_sregs)) + if (copy_to_user(argp, kvm_sregs, sizeof(struct kvm_sregs))) goto out; r = 0; break; } case KVM_SET_SREGS: { - struct kvm_sregs kvm_sregs; - + kvm_sregs = kmalloc(sizeof(struct kvm_sregs), GFP_KERNEL); + r = -ENOMEM; + if (!kvm_sregs) + goto out; r = -EFAULT; - if (copy_from_user(&kvm_sregs, argp, sizeof kvm_sregs)) + if (copy_from_user(kvm_sregs, argp, sizeof(struct kvm_sregs))) goto out; - r = kvm_arch_vcpu_ioctl_set_sregs(vcpu, &kvm_sregs); + r = kvm_arch_vcpu_ioctl_set_sregs(vcpu, kvm_sregs); if (r) goto out; r = 0; @@ -1272,25 +1277,28 @@ out_free2: break; } case KVM_GET_FPU: { - struct kvm_fpu fpu; - - memset(&fpu, 0, sizeof fpu); - r = kvm_arch_vcpu_ioctl_get_fpu(vcpu, &fpu); + fpu = kzalloc(sizeof(struct kvm_fpu), GFP_KERNEL); + r = -ENOMEM; + if (!fpu) + goto out; + r = kvm_arch_vcpu_ioctl_get_fpu(vcpu, fpu); if (r) goto out; r = -EFAULT; - if (copy_to_user(argp, &fpu, sizeof fpu)) + if (copy_to_user(argp, fpu, sizeof(struct kvm_fpu))) goto out; r = 0; break; } case KVM_SET_FPU: { - struct kvm_fpu fpu; - + fpu = kmalloc(sizeof(struct kvm_fpu), GFP_KERNEL); + r = -ENOMEM; + if (!fpu) + goto out; r = -EFAULT; - if (copy_from_user(&fpu, argp, sizeof fpu)) + if (copy_from_user(fpu, argp, sizeof(struct kvm_fpu))) goto out; - r = kvm_arch_vcpu_ioctl_set_fpu(vcpu, &fpu); + r = kvm_arch_vcpu_ioctl_set_fpu(vcpu, fpu); if (r) goto out; r = 0; @@ -1300,6 +1308,8 @@ out_free2: r = kvm_arch_vcpu_ioctl(filp, ioctl, arg); } out: + kfree(fpu); + kfree(kvm_sregs); return r; } -- cgit v1.2.3-70-g09d2 From 26815a648e1ec2b338a63a2bc301dcf449b93e5a Mon Sep 17 00:00:00 2001 From: Xiantao Zhang Date: Tue, 19 Aug 2008 20:48:03 +0800 Subject: KVM: ia64: add a dummy irq ack notification Before enabling notify_acked_irq for ia64, leave the related APIs as nop-op first. Signed-off-by: Xiantao Zhang Signed-off-by: Avi Kivity --- arch/ia64/kvm/irq.h | 32 ++++++++++++++++++++++++++++++++ virt/kvm/ioapic.c | 2 +- 2 files changed, 33 insertions(+), 1 deletion(-) create mode 100644 arch/ia64/kvm/irq.h (limited to 'virt/kvm') diff --git a/arch/ia64/kvm/irq.h b/arch/ia64/kvm/irq.h new file mode 100644 index 00000000000..f2e6545debf --- /dev/null +++ b/arch/ia64/kvm/irq.h @@ -0,0 +1,32 @@ +/* + * irq.h: In-kernel interrupt controller related definitions + * Copyright (c) 2008, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Authors: + * Xiantao Zhang + * + */ + +#ifndef __IRQ_H +#define __IRQ_H + +struct kvm; + +static inline void kvm_notify_acked_irq(struct kvm *kvm, unsigned gsi) +{ +} + +#endif diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c index 515cd7ce761..53772bb4632 100644 --- a/virt/kvm/ioapic.c +++ b/virt/kvm/ioapic.c @@ -386,7 +386,7 @@ static void ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len, break; #ifdef CONFIG_IA64 case IOAPIC_REG_EOI: - kvm_ioapic_update_eoi(ioapic->kvm, data); + kvm_ioapic_update_eoi(ioapic->kvm, data, IOAPIC_LEVEL_TRIG); break; #endif -- cgit v1.2.3-70-g09d2 From d657c7335b97d746aa6123c56504b46c20e37df3 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Tue, 26 Aug 2008 17:31:31 +0300 Subject: KVM: Don't call get_user_pages(.force = 1) This is esoteric and only needed to break COW on MAP_SHARED mappings. Since KVM no longer does these sorts of mappings, breaking COW on them is no longer necessary. Signed-off-by: Avi Kivity --- virt/kvm/kvm_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'virt/kvm') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 0309571fcb2..de3b029f6ad 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -734,7 +734,7 @@ pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn) return page_to_pfn(bad_page); } - npages = get_user_pages(current, current->mm, addr, 1, 1, 1, page, + npages = get_user_pages(current, current->mm, addr, 1, 1, 0, page, NULL); if (unlikely(npages != 1)) { -- cgit v1.2.3-70-g09d2 From d76901750ab9f71091d33ef3d2b5909d8a9a4ad4 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Mon, 8 Sep 2008 15:23:48 -0300 Subject: KVM: x86: do not execute halted vcpus Offline or uninitialized vcpu's can be executed if requested to perform userspace work. Follow Avi's suggestion to handle halted vcpu's in the main loop, simplifying kvm_emulate_halt(). Introduce a new vcpu->requests bit to indicate events that promote state from halted to running. Also standardize vcpu wake sites. Signed-off-by: Marcelo Tosatti redhat.com> Signed-off-by: Avi Kivity --- arch/x86/kvm/i8254.c | 5 +-- arch/x86/kvm/lapic.c | 16 ++------ arch/x86/kvm/x86.c | 100 ++++++++++++++++++++++++++--------------------- include/linux/kvm_host.h | 1 + virt/kvm/kvm_main.c | 10 ++--- 5 files changed, 67 insertions(+), 65 deletions(-) (limited to 'virt/kvm') diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index 4cb443026ec..634132a9a51 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c @@ -200,10 +200,9 @@ static int __pit_timer_fn(struct kvm_kpit_state *ps) if (!atomic_inc_and_test(&pt->pending)) set_bit(KVM_REQ_PENDING_TIMER, &vcpu0->requests); - if (vcpu0 && waitqueue_active(&vcpu0->wq)) { - vcpu0->arch.mp_state = KVM_MP_STATE_RUNNABLE; + + if (vcpu0 && waitqueue_active(&vcpu0->wq)) wake_up_interruptible(&vcpu0->wq); - } pt->timer.expires = ktime_add_ns(pt->timer.expires, pt->period); pt->scheduled = ktime_to_ns(pt->timer.expires); diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index be94f93a73f..fd00f698692 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -339,13 +339,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, } else apic_clear_vector(vector, apic->regs + APIC_TMR); - if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE) - kvm_vcpu_kick(vcpu); - else if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED) { - vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; - if (waitqueue_active(&vcpu->wq)) - wake_up_interruptible(&vcpu->wq); - } + kvm_vcpu_kick(vcpu); result = (orig_irr == 0); break; @@ -384,8 +378,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) { vcpu->arch.sipi_vector = vector; vcpu->arch.mp_state = KVM_MP_STATE_SIPI_RECEIVED; - if (waitqueue_active(&vcpu->wq)) - wake_up_interruptible(&vcpu->wq); + kvm_vcpu_kick(vcpu); } break; @@ -950,10 +943,9 @@ static int __apic_timer_fn(struct kvm_lapic *apic) if(!atomic_inc_and_test(&apic->timer.pending)) set_bit(KVM_REQ_PENDING_TIMER, &apic->vcpu->requests); - if (waitqueue_active(q)) { - apic->vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; + if (waitqueue_active(q)) wake_up_interruptible(q); - } + if (apic_lvtt_period(apic)) { result = 1; apic->timer.dev.expires = ktime_add_ns( diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 3f3cb7107c0..bf98d40b21e 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2798,11 +2798,6 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu) KVMTRACE_0D(HLT, vcpu, handler); if (irqchip_in_kernel(vcpu->kvm)) { vcpu->arch.mp_state = KVM_MP_STATE_HALTED; - up_read(&vcpu->kvm->slots_lock); - kvm_vcpu_block(vcpu); - down_read(&vcpu->kvm->slots_lock); - if (vcpu->arch.mp_state != KVM_MP_STATE_RUNNABLE) - return -EINTR; return 1; } else { vcpu->run->exit_reason = KVM_EXIT_HLT; @@ -3097,24 +3092,10 @@ static void vapic_exit(struct kvm_vcpu *vcpu) up_read(&vcpu->kvm->slots_lock); } -static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { int r; - if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED)) { - pr_debug("vcpu %d received sipi with vector # %x\n", - vcpu->vcpu_id, vcpu->arch.sipi_vector); - kvm_lapic_reset(vcpu); - r = kvm_x86_ops->vcpu_reset(vcpu); - if (r) - return r; - vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; - } - - down_read(&vcpu->kvm->slots_lock); - vapic_enter(vcpu); - -again: if (vcpu->requests) if (test_and_clear_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests)) kvm_mmu_unload(vcpu); @@ -3151,22 +3132,13 @@ again: local_irq_disable(); - if (vcpu->requests || need_resched()) { + if (vcpu->requests || need_resched() || signal_pending(current)) { local_irq_enable(); preempt_enable(); r = 1; goto out; } - if (signal_pending(current)) { - local_irq_enable(); - preempt_enable(); - r = -EINTR; - kvm_run->exit_reason = KVM_EXIT_INTR; - ++vcpu->stat.signal_exits; - goto out; - } - if (vcpu->guest_debug.enabled) kvm_x86_ops->guest_debug_pre(vcpu); @@ -3227,26 +3199,63 @@ again: kvm_lapic_sync_from_vapic(vcpu); r = kvm_x86_ops->handle_exit(kvm_run, vcpu); +out: + return r; +} - if (r > 0) { - if (dm_request_for_irq_injection(vcpu, kvm_run)) { - r = -EINTR; - kvm_run->exit_reason = KVM_EXIT_INTR; - ++vcpu->stat.request_irq_exits; - goto out; - } - if (!need_resched()) - goto again; +static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +{ + int r; + + if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED)) { + printk("vcpu %d received sipi with vector # %x\n", + vcpu->vcpu_id, vcpu->arch.sipi_vector); + kvm_lapic_reset(vcpu); + r = kvm_x86_ops->vcpu_reset(vcpu); + if (r) + return r; + vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; } -out: - up_read(&vcpu->kvm->slots_lock); - if (r > 0) { - kvm_resched(vcpu); - down_read(&vcpu->kvm->slots_lock); - goto again; + down_read(&vcpu->kvm->slots_lock); + vapic_enter(vcpu); + + r = 1; + while (r > 0) { + if (kvm_arch_vcpu_runnable(vcpu)) + r = vcpu_enter_guest(vcpu, kvm_run); + else { + up_read(&vcpu->kvm->slots_lock); + kvm_vcpu_block(vcpu); + down_read(&vcpu->kvm->slots_lock); + if (test_and_clear_bit(KVM_REQ_UNHALT, &vcpu->requests)) + if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED) + vcpu->arch.mp_state = + KVM_MP_STATE_RUNNABLE; + if (vcpu->arch.mp_state != KVM_MP_STATE_RUNNABLE) + r = -EINTR; + } + + if (r > 0) { + if (dm_request_for_irq_injection(vcpu, kvm_run)) { + r = -EINTR; + kvm_run->exit_reason = KVM_EXIT_INTR; + ++vcpu->stat.request_irq_exits; + } + if (signal_pending(current)) { + r = -EINTR; + kvm_run->exit_reason = KVM_EXIT_INTR; + ++vcpu->stat.signal_exits; + } + if (need_resched()) { + up_read(&vcpu->kvm->slots_lock); + kvm_resched(vcpu); + down_read(&vcpu->kvm->slots_lock); + } + } } + up_read(&vcpu->kvm->slots_lock); post_kvm_run_save(vcpu, kvm_run); vapic_exit(vcpu); @@ -3266,6 +3275,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) { kvm_vcpu_block(vcpu); + clear_bit(KVM_REQ_UNHALT, &vcpu->requests); r = -EAGAIN; goto out; } diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index a18aaad2ab7..4b036430ea2 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -34,6 +34,7 @@ #define KVM_REQ_MMU_RELOAD 3 #define KVM_REQ_TRIPLE_FAULT 4 #define KVM_REQ_PENDING_TIMER 5 +#define KVM_REQ_UNHALT 6 struct kvm_vcpu; extern struct kmem_cache *kvm_vcpu_cache; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index de3b029f6ad..63e661be040 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -980,12 +980,12 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu) for (;;) { prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE); - if (kvm_cpu_has_interrupt(vcpu)) - break; - if (kvm_cpu_has_pending_timer(vcpu)) - break; - if (kvm_arch_vcpu_runnable(vcpu)) + if (kvm_cpu_has_interrupt(vcpu) || + kvm_cpu_has_pending_timer(vcpu) || + kvm_arch_vcpu_runnable(vcpu)) { + set_bit(KVM_REQ_UNHALT, &vcpu->requests); break; + } if (signal_pending(current)) break; -- cgit v1.2.3-70-g09d2 From 62c476c7c7f25a5b245b9902a935636e6316e58c Mon Sep 17 00:00:00 2001 From: Ben-Ami Yassour Date: Sun, 14 Sep 2008 03:48:28 +0300 Subject: KVM: Device Assignment with VT-d Based on a patch by: Kay, Allen M This patch enables PCI device assignment based on VT-d support. When a device is assigned to the guest, the guest memory is pinned and the mapping is updated in the VT-d IOMMU. [Amit: Expose KVM_CAP_IOMMU so we can check if an IOMMU is present and also control enable/disable from userspace] Signed-off-by: Kay, Allen M Signed-off-by: Weidong Han Signed-off-by: Ben-Ami Yassour Signed-off-by: Amit Shah Acked-by: Mark Gross Signed-off-by: Avi Kivity --- arch/x86/kvm/Makefile | 3 + arch/x86/kvm/vtd.c | 198 +++++++++++++++++++++++++++++++++++++++++++++ arch/x86/kvm/x86.c | 14 ++++ include/asm-x86/kvm_host.h | 23 +----- include/linux/kvm.h | 3 + include/linux/kvm_host.h | 52 ++++++++++++ virt/kvm/kvm_main.c | 9 ++- 7 files changed, 281 insertions(+), 21 deletions(-) create mode 100644 arch/x86/kvm/vtd.c (limited to 'virt/kvm') diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile index d0e940bb6f4..3072b17447a 100644 --- a/arch/x86/kvm/Makefile +++ b/arch/x86/kvm/Makefile @@ -12,6 +12,9 @@ EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm kvm-objs := $(common-objs) x86.o mmu.o x86_emulate.o i8259.o irq.o lapic.o \ i8254.o +ifeq ($(CONFIG_DMAR),y) +kvm-objs += vtd.o +endif obj-$(CONFIG_KVM) += kvm.o kvm-intel-objs = vmx.o obj-$(CONFIG_KVM_INTEL) += kvm-intel.o diff --git a/arch/x86/kvm/vtd.c b/arch/x86/kvm/vtd.c new file mode 100644 index 00000000000..667bf3fb64b --- /dev/null +++ b/arch/x86/kvm/vtd.c @@ -0,0 +1,198 @@ +/* + * Copyright (c) 2006, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Copyright (C) 2006-2008 Intel Corporation + * Copyright IBM Corporation, 2008 + * Author: Allen M. Kay + * Author: Weidong Han + * Author: Ben-Ami Yassour + */ + +#include +#include +#include +#include +#include + +static int kvm_iommu_unmap_memslots(struct kvm *kvm); +static void kvm_iommu_put_pages(struct kvm *kvm, + gfn_t base_gfn, unsigned long npages); + +int kvm_iommu_map_pages(struct kvm *kvm, + gfn_t base_gfn, unsigned long npages) +{ + gfn_t gfn = base_gfn; + pfn_t pfn; + int i, r; + struct dmar_domain *domain = kvm->arch.intel_iommu_domain; + + /* check if iommu exists and in use */ + if (!domain) + return 0; + + r = -EINVAL; + for (i = 0; i < npages; i++) { + /* check if already mapped */ + pfn = (pfn_t)intel_iommu_iova_to_pfn(domain, + gfn_to_gpa(gfn)); + if (pfn && !is_mmio_pfn(pfn)) + continue; + + pfn = gfn_to_pfn(kvm, gfn); + if (!is_mmio_pfn(pfn)) { + r = intel_iommu_page_mapping(domain, + gfn_to_gpa(gfn), + pfn_to_hpa(pfn), + PAGE_SIZE, + DMA_PTE_READ | + DMA_PTE_WRITE); + if (r) { + printk(KERN_DEBUG "kvm_iommu_map_pages:" + "iommu failed to map pfn=%lx\n", pfn); + goto unmap_pages; + } + } else { + printk(KERN_DEBUG "kvm_iommu_map_page:" + "invalid pfn=%lx\n", pfn); + goto unmap_pages; + } + gfn++; + } + return 0; + +unmap_pages: + kvm_iommu_put_pages(kvm, base_gfn, i); + return r; +} + +static int kvm_iommu_map_memslots(struct kvm *kvm) +{ + int i, r; + + down_read(&kvm->slots_lock); + for (i = 0; i < kvm->nmemslots; i++) { + r = kvm_iommu_map_pages(kvm, kvm->memslots[i].base_gfn, + kvm->memslots[i].npages); + if (r) + break; + } + up_read(&kvm->slots_lock); + return r; +} + +int kvm_iommu_map_guest(struct kvm *kvm, + struct kvm_assigned_dev_kernel *assigned_dev) +{ + struct pci_dev *pdev = NULL; + int r; + + if (!intel_iommu_found()) { + printk(KERN_ERR "%s: intel iommu not found\n", __func__); + return -ENODEV; + } + + printk(KERN_DEBUG "VT-d direct map: host bdf = %x:%x:%x\n", + assigned_dev->host_busnr, + PCI_SLOT(assigned_dev->host_devfn), + PCI_FUNC(assigned_dev->host_devfn)); + + pdev = assigned_dev->dev; + + if (pdev == NULL) { + if (kvm->arch.intel_iommu_domain) { + intel_iommu_domain_exit(kvm->arch.intel_iommu_domain); + kvm->arch.intel_iommu_domain = NULL; + } + return -ENODEV; + } + + kvm->arch.intel_iommu_domain = intel_iommu_domain_alloc(pdev); + if (!kvm->arch.intel_iommu_domain) + return -ENODEV; + + r = kvm_iommu_map_memslots(kvm); + if (r) + goto out_unmap; + + intel_iommu_detach_dev(kvm->arch.intel_iommu_domain, + pdev->bus->number, pdev->devfn); + + r = intel_iommu_context_mapping(kvm->arch.intel_iommu_domain, + pdev); + if (r) { + printk(KERN_ERR "Domain context map for %s failed", + pci_name(pdev)); + goto out_unmap; + } + return 0; + +out_unmap: + kvm_iommu_unmap_memslots(kvm); + return r; +} + +static void kvm_iommu_put_pages(struct kvm *kvm, + gfn_t base_gfn, unsigned long npages) +{ + gfn_t gfn = base_gfn; + pfn_t pfn; + struct dmar_domain *domain = kvm->arch.intel_iommu_domain; + int i; + + for (i = 0; i < npages; i++) { + pfn = (pfn_t)intel_iommu_iova_to_pfn(domain, + gfn_to_gpa(gfn)); + kvm_release_pfn_clean(pfn); + gfn++; + } +} + +static int kvm_iommu_unmap_memslots(struct kvm *kvm) +{ + int i; + down_read(&kvm->slots_lock); + for (i = 0; i < kvm->nmemslots; i++) { + kvm_iommu_put_pages(kvm, kvm->memslots[i].base_gfn, + kvm->memslots[i].npages); + } + up_read(&kvm->slots_lock); + + return 0; +} + +int kvm_iommu_unmap_guest(struct kvm *kvm) +{ + struct kvm_assigned_dev_kernel *entry; + struct dmar_domain *domain = kvm->arch.intel_iommu_domain; + + /* check if iommu exists and in use */ + if (!domain) + return 0; + + list_for_each_entry(entry, &kvm->arch.assigned_dev_head, list) { + printk(KERN_DEBUG "VT-d unmap: host bdf = %x:%x:%x\n", + entry->host_busnr, + PCI_SLOT(entry->host_devfn), + PCI_FUNC(entry->host_devfn)); + + /* detach kvm dmar domain */ + intel_iommu_detach_dev(domain, entry->host_busnr, + entry->host_devfn); + } + kvm_iommu_unmap_memslots(kvm); + intel_iommu_domain_exit(domain); + return 0; +} diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 2134f3e0a51..c8a2793626e 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include @@ -277,9 +278,18 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm, list_add(&match->list, &kvm->arch.assigned_dev_head); + if (assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) { + r = kvm_iommu_map_guest(kvm, match); + if (r) + goto out_list_del; + } + out: mutex_unlock(&kvm->lock); return r; +out_list_del: + list_del(&match->list); + pci_release_regions(dev); out_disable: pci_disable_device(dev); out_put: @@ -1147,6 +1157,9 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_PV_MMU: r = !tdp_enabled; break; + case KVM_CAP_IOMMU: + r = intel_iommu_found(); + break; default: r = 0; break; @@ -4282,6 +4295,7 @@ static void kvm_free_vcpus(struct kvm *kvm) void kvm_arch_destroy_vm(struct kvm *kvm) { + kvm_iommu_unmap_guest(kvm); kvm_free_assigned_devices(kvm); kvm_free_pit(kvm); kfree(kvm->arch.vpic); diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h index 68a3ac13afc..805629c0f15 100644 --- a/include/asm-x86/kvm_host.h +++ b/include/asm-x86/kvm_host.h @@ -331,26 +331,6 @@ struct kvm_mem_alias { gfn_t target_gfn; }; -struct kvm_irq_ack_notifier { - struct hlist_node link; - unsigned gsi; - void (*irq_acked)(struct kvm_irq_ack_notifier *kian); -}; - -struct kvm_assigned_dev_kernel { - struct kvm_irq_ack_notifier ack_notifier; - struct work_struct interrupt_work; - struct list_head list; - int assigned_dev_id; - int host_busnr; - int host_devfn; - int host_irq; - int guest_irq; - int irq_requested; - struct pci_dev *dev; - struct kvm *kvm; -}; - struct kvm_arch{ int naliases; struct kvm_mem_alias aliases[KVM_ALIAS_SLOTS]; @@ -364,6 +344,7 @@ struct kvm_arch{ */ struct list_head active_mmu_pages; struct list_head assigned_dev_head; + struct dmar_domain *intel_iommu_domain; struct kvm_pic *vpic; struct kvm_ioapic *vioapic; struct kvm_pit *vpit; @@ -514,6 +495,8 @@ int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, int kvm_pv_mmu_op(struct kvm_vcpu *vcpu, unsigned long bytes, gpa_t addr, unsigned long *ret); +int is_mmio_pfn(pfn_t pfn); + extern bool tdp_enabled; enum emulation_result { diff --git a/include/linux/kvm.h b/include/linux/kvm.h index ef4bc6f8977..4269be171fa 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -384,6 +384,7 @@ struct kvm_trace_rec { #define KVM_CAP_COALESCED_MMIO 15 #define KVM_CAP_SYNC_MMU 16 /* Changes to host mmap are reflected in guest */ #define KVM_CAP_DEVICE_ASSIGNMENT 17 +#define KVM_CAP_IOMMU 18 /* * ioctls for VM fds @@ -495,4 +496,6 @@ struct kvm_assigned_irq { __u32 flags; }; +#define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) + #endif diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 4b036430ea2..6252802c3cc 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -286,6 +286,53 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *v); int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu); void kvm_vcpu_kick(struct kvm_vcpu *vcpu); +struct kvm_irq_ack_notifier { + struct hlist_node link; + unsigned gsi; + void (*irq_acked)(struct kvm_irq_ack_notifier *kian); +}; + +struct kvm_assigned_dev_kernel { + struct kvm_irq_ack_notifier ack_notifier; + struct work_struct interrupt_work; + struct list_head list; + int assigned_dev_id; + int host_busnr; + int host_devfn; + int host_irq; + int guest_irq; + int irq_requested; + struct pci_dev *dev; + struct kvm *kvm; +}; + +#ifdef CONFIG_DMAR +int kvm_iommu_map_pages(struct kvm *kvm, gfn_t base_gfn, + unsigned long npages); +int kvm_iommu_map_guest(struct kvm *kvm, + struct kvm_assigned_dev_kernel *assigned_dev); +int kvm_iommu_unmap_guest(struct kvm *kvm); +#else /* CONFIG_DMAR */ +static inline int kvm_iommu_map_pages(struct kvm *kvm, + gfn_t base_gfn, + unsigned long npages) +{ + return 0; +} + +static inline int kvm_iommu_map_guest(struct kvm *kvm, + struct kvm_assigned_dev_kernel + *assigned_dev) +{ + return -ENODEV; +} + +static inline int kvm_iommu_unmap_guest(struct kvm *kvm) +{ + return 0; +} +#endif /* CONFIG_DMAR */ + static inline void kvm_guest_enter(void) { account_system_vtime(current); @@ -308,6 +355,11 @@ static inline gpa_t gfn_to_gpa(gfn_t gfn) return (gpa_t)gfn << PAGE_SHIFT; } +static inline hpa_t pfn_to_hpa(pfn_t pfn) +{ + return (hpa_t)pfn << PAGE_SHIFT; +} + static inline void kvm_migrate_timers(struct kvm_vcpu *vcpu) { set_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 63e661be040..f42d5c2a396 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -41,6 +41,7 @@ #include #include #include +#include #include #include @@ -76,7 +77,7 @@ static inline int valid_vcpu(int n) return likely(n >= 0 && n < KVM_MAX_VCPUS); } -static inline int is_mmio_pfn(pfn_t pfn) +inline int is_mmio_pfn(pfn_t pfn) { if (pfn_valid(pfn)) return PageReserved(pfn_to_page(pfn)); @@ -578,6 +579,12 @@ int __kvm_set_memory_region(struct kvm *kvm, } kvm_free_physmem_slot(&old, &new); + + /* map the pages in iommu page table */ + r = kvm_iommu_map_pages(kvm, base_gfn, npages); + if (r) + goto out; + return 0; out_free: -- cgit v1.2.3-70-g09d2 From 777b3f49d297e387866604093b635e5bc9b9d2a6 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Tue, 16 Sep 2008 20:54:46 -0300 Subject: KVM: opencode gfn_to_page in kvm_vm_fault kvm_vm_fault is invoked with mmap_sem held in read mode. Since gfn_to_page will be converted to get_user_pages_fast, which requires this lock NOT to be held, switch to opencoded get_user_pages. Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- virt/kvm/kvm_main.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) (limited to 'virt/kvm') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index f42d5c2a396..2907d05cfcc 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1394,17 +1394,22 @@ out: static int kvm_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) { + struct page *page[1]; + unsigned long addr; + int npages; + gfn_t gfn = vmf->pgoff; struct kvm *kvm = vma->vm_file->private_data; - struct page *page; - if (!kvm_is_visible_gfn(kvm, vmf->pgoff)) + addr = gfn_to_hva(kvm, gfn); + if (kvm_is_error_hva(addr)) return VM_FAULT_SIGBUS; - page = gfn_to_page(kvm, vmf->pgoff); - if (is_error_page(page)) { - kvm_release_page_clean(page); + + npages = get_user_pages(current, current->mm, addr, 1, 1, 0, page, + NULL); + if (unlikely(npages != 1)) return VM_FAULT_SIGBUS; - } - vmf->page = page; + + vmf->page = page[0]; return 0; } -- cgit v1.2.3-70-g09d2 From 4c2155ce81c193788082d4b8cdbc26d79edebc58 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Tue, 16 Sep 2008 20:54:47 -0300 Subject: KVM: switch to get_user_pages_fast Convert gfn_to_pfn to use get_user_pages_fast, which can do lockless pagetable lookups on x86. Kernel compilation on 4-way guest is 3.7% faster on VMX. Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- arch/powerpc/kvm/44x_tlb.c | 2 -- arch/x86/kvm/mmu.c | 23 +++++++++-------------- arch/x86/kvm/paging_tmpl.h | 8 +------- arch/x86/kvm/vmx.c | 4 ---- arch/x86/kvm/x86.c | 6 ------ virt/kvm/kvm_main.c | 10 +++++----- 6 files changed, 15 insertions(+), 38 deletions(-) (limited to 'virt/kvm') diff --git a/arch/powerpc/kvm/44x_tlb.c b/arch/powerpc/kvm/44x_tlb.c index 7b11fd7be54..2e227a412bc 100644 --- a/arch/powerpc/kvm/44x_tlb.c +++ b/arch/powerpc/kvm/44x_tlb.c @@ -147,9 +147,7 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gfn_t gfn, u64 asid, stlbe = &vcpu->arch.shadow_tlb[victim]; /* Get reference to new page. */ - down_read(¤t->mm->mmap_sem); new_page = gfn_to_page(vcpu->kvm, gfn); - up_read(¤t->mm->mmap_sem); if (is_error_page(new_page)) { printk(KERN_ERR "Couldn't get guest page for gfn %lx!\n", gfn); kvm_release_page_clean(new_page); diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index bce3e25ec79..5779a2323e2 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -405,16 +405,19 @@ static int host_largepage_backed(struct kvm *kvm, gfn_t gfn) { struct vm_area_struct *vma; unsigned long addr; + int ret = 0; addr = gfn_to_hva(kvm, gfn); if (kvm_is_error_hva(addr)) - return 0; + return ret; + down_read(¤t->mm->mmap_sem); vma = find_vma(current->mm, addr); if (vma && is_vm_hugetlb_page(vma)) - return 1; + ret = 1; + up_read(¤t->mm->mmap_sem); - return 0; + return ret; } static int is_largepage_backed(struct kvm_vcpu *vcpu, gfn_t large_gfn) @@ -1140,9 +1143,7 @@ struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva) if (gpa == UNMAPPED_GVA) return NULL; - down_read(¤t->mm->mmap_sem); page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT); - up_read(¤t->mm->mmap_sem); return page; } @@ -1330,16 +1331,14 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn) pfn_t pfn; unsigned long mmu_seq; - down_read(¤t->mm->mmap_sem); if (is_largepage_backed(vcpu, gfn & ~(KVM_PAGES_PER_HPAGE-1))) { gfn &= ~(KVM_PAGES_PER_HPAGE-1); largepage = 1; } mmu_seq = vcpu->kvm->mmu_notifier_seq; - /* implicit mb(), we'll read before PT lock is unlocked */ + smp_rmb(); pfn = gfn_to_pfn(vcpu->kvm, gfn); - up_read(¤t->mm->mmap_sem); /* mmio */ if (is_error_pfn(pfn)) { @@ -1488,15 +1487,13 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, if (r) return r; - down_read(¤t->mm->mmap_sem); if (is_largepage_backed(vcpu, gfn & ~(KVM_PAGES_PER_HPAGE-1))) { gfn &= ~(KVM_PAGES_PER_HPAGE-1); largepage = 1; } mmu_seq = vcpu->kvm->mmu_notifier_seq; - /* implicit mb(), we'll read before PT lock is unlocked */ + smp_rmb(); pfn = gfn_to_pfn(vcpu->kvm, gfn); - up_read(¤t->mm->mmap_sem); if (is_error_pfn(pfn)) { kvm_release_pfn_clean(pfn); return 1; @@ -1809,15 +1806,13 @@ static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, return; gfn = (gpte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT; - down_read(¤t->mm->mmap_sem); if (is_large_pte(gpte) && is_largepage_backed(vcpu, gfn)) { gfn &= ~(KVM_PAGES_PER_HPAGE-1); vcpu->arch.update_pte.largepage = 1; } vcpu->arch.update_pte.mmu_seq = vcpu->kvm->mmu_notifier_seq; - /* implicit mb(), we'll read before PT lock is unlocked */ + smp_rmb(); pfn = gfn_to_pfn(vcpu->kvm, gfn); - up_read(¤t->mm->mmap_sem); if (is_error_pfn(pfn)) { kvm_release_pfn_clean(pfn); diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index b671f61be41..6dd08e096e2 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -102,14 +102,10 @@ static bool FNAME(cmpxchg_gpte)(struct kvm *kvm, pt_element_t *table; struct page *page; - down_read(¤t->mm->mmap_sem); page = gfn_to_page(kvm, table_gfn); - up_read(¤t->mm->mmap_sem); table = kmap_atomic(page, KM_USER0); - ret = CMPXCHG(&table[index], orig_pte, new_pte); - kunmap_atomic(table, KM_USER0); kvm_release_page_dirty(page); @@ -418,7 +414,6 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, return 0; } - down_read(¤t->mm->mmap_sem); if (walker.level == PT_DIRECTORY_LEVEL) { gfn_t large_gfn; large_gfn = walker.gfn & ~(KVM_PAGES_PER_HPAGE-1); @@ -428,9 +423,8 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, } } mmu_seq = vcpu->kvm->mmu_notifier_seq; - /* implicit mb(), we'll read before PT lock is unlocked */ + smp_rmb(); pfn = gfn_to_pfn(vcpu->kvm, walker.gfn); - up_read(¤t->mm->mmap_sem); /* mmio */ if (is_error_pfn(pfn)) { diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 046a91b5a4b..025bf4011ab 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2010,9 +2010,7 @@ static int alloc_apic_access_page(struct kvm *kvm) if (r) goto out; - down_read(¤t->mm->mmap_sem); kvm->arch.apic_access_page = gfn_to_page(kvm, 0xfee00); - up_read(¤t->mm->mmap_sem); out: up_write(&kvm->slots_lock); return r; @@ -2034,10 +2032,8 @@ static int alloc_identity_pagetable(struct kvm *kvm) if (r) goto out; - down_read(¤t->mm->mmap_sem); kvm->arch.ept_identity_pagetable = gfn_to_page(kvm, VMX_EPT_IDENTITY_PAGETABLE_ADDR >> PAGE_SHIFT); - up_read(¤t->mm->mmap_sem); out: up_write(&kvm->slots_lock); return r; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 61eddbeabeb..108f07267e8 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -946,10 +946,8 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) /* ...but clean it before doing the actual write */ vcpu->arch.time_offset = data & ~(PAGE_MASK | 1); - down_read(¤t->mm->mmap_sem); vcpu->arch.time_page = gfn_to_page(vcpu->kvm, data >> PAGE_SHIFT); - up_read(¤t->mm->mmap_sem); if (is_error_page(vcpu->arch.time_page)) { kvm_release_page_clean(vcpu->arch.time_page); @@ -2322,9 +2320,7 @@ static int emulator_cmpxchg_emulated(unsigned long addr, val = *(u64 *)new; - down_read(¤t->mm->mmap_sem); page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT); - up_read(¤t->mm->mmap_sem); kaddr = kmap_atomic(page, KM_USER0); set_64bit((u64 *)(kaddr + offset_in_page(gpa)), val); @@ -3089,9 +3085,7 @@ static void vapic_enter(struct kvm_vcpu *vcpu) if (!apic || !apic->vapic_addr) return; - down_read(¤t->mm->mmap_sem); page = gfn_to_page(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT); - up_read(¤t->mm->mmap_sem); vcpu->arch.apic->vapic_page = page; } diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 2907d05cfcc..cd34f73513d 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -723,9 +723,6 @@ unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn) } EXPORT_SYMBOL_GPL(gfn_to_hva); -/* - * Requires current->mm->mmap_sem to be held - */ pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn) { struct page *page[1]; @@ -741,20 +738,23 @@ pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn) return page_to_pfn(bad_page); } - npages = get_user_pages(current, current->mm, addr, 1, 1, 0, page, - NULL); + npages = get_user_pages_fast(addr, 1, 1, page); if (unlikely(npages != 1)) { struct vm_area_struct *vma; + down_read(¤t->mm->mmap_sem); vma = find_vma(current->mm, addr); + if (vma == NULL || addr < vma->vm_start || !(vma->vm_flags & VM_PFNMAP)) { + up_read(¤t->mm->mmap_sem); get_page(bad_page); return page_to_pfn(bad_page); } pfn = ((addr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; + up_read(¤t->mm->mmap_sem); BUG_ON(!is_mmio_pfn(pfn)); } else pfn = page_to_pfn(page[0]); -- cgit v1.2.3-70-g09d2 From 7d8fece678c1abc2ca3e1ceda2277c3538a9161c Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Wed, 17 Sep 2008 23:16:59 -0300 Subject: KVM: Don't destroy vcpu in case vcpu_setup fails One of vcpu_setup responsibilities is to do mmu initialization. However, in case we fail in kvm_arch_vcpu_reset, before we get the chance to init mmu. OTOH, vcpu_destroy will attempt to destroy mmu, triggering a bug. Keeping track of whether or not mmu is initialized would unnecessarily complicate things. Rather, we just make return, making sure any needed uninitialization is done before we return, in case we fail. Signed-off-by: Glauber Costa Signed-off-by: Avi Kivity --- virt/kvm/kvm_main.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'virt/kvm') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index cd34f73513d..ef9a121bbd1 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1089,12 +1089,11 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n) r = kvm_arch_vcpu_setup(vcpu); if (r) - goto vcpu_destroy; + return r; mutex_lock(&kvm->lock); if (kvm->vcpus[n]) { r = -EEXIST; - mutex_unlock(&kvm->lock); goto vcpu_destroy; } kvm->vcpus[n] = vcpu; @@ -1110,8 +1109,8 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n) unlink: mutex_lock(&kvm->lock); kvm->vcpus[n] = NULL; - mutex_unlock(&kvm->lock); vcpu_destroy: + mutex_unlock(&kvm->lock); kvm_arch_vcpu_destroy(vcpu); return r; } -- cgit v1.2.3-70-g09d2 From 271b05281f7f485a0be8764860687ebb98459b80 Mon Sep 17 00:00:00 2001 From: Weidong Han Date: Wed, 24 Sep 2008 13:39:57 +0800 Subject: KVM: Remove useless intel-iommu.h header inclusion Currently "#include " is not needed in virt/kvm/kvm_main.c. Signed-off-by: Weidong Han Signed-off-by: Avi Kivity --- virt/kvm/kvm_main.c | 1 - 1 file changed, 1 deletion(-) (limited to 'virt/kvm') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index ef9a121bbd1..6cf042789ad 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -41,7 +41,6 @@ #include #include #include -#include #include #include -- cgit v1.2.3-70-g09d2 From e5fcfc821a467bd0827635db8fd39ab1213987e5 Mon Sep 17 00:00:00 2001 From: Weidong Han Date: Thu, 25 Sep 2008 23:32:02 +0800 Subject: KVM: Device Assignment: Map mmio pages into VT-d page table Assigned device could DMA to mmio pages, so also need to map mmio pages into VT-d page table. Signed-off-by: Weidong Han Signed-off-by: Avi Kivity --- arch/x86/kvm/vtd.c | 29 +++++++++++------------------ include/asm-x86/kvm_host.h | 2 -- virt/kvm/kvm_main.c | 2 +- 3 files changed, 12 insertions(+), 21 deletions(-) (limited to 'virt/kvm') diff --git a/arch/x86/kvm/vtd.c b/arch/x86/kvm/vtd.c index 667bf3fb64b..a770874f3a3 100644 --- a/arch/x86/kvm/vtd.c +++ b/arch/x86/kvm/vtd.c @@ -36,37 +36,30 @@ int kvm_iommu_map_pages(struct kvm *kvm, { gfn_t gfn = base_gfn; pfn_t pfn; - int i, r; + int i, r = 0; struct dmar_domain *domain = kvm->arch.intel_iommu_domain; /* check if iommu exists and in use */ if (!domain) return 0; - r = -EINVAL; for (i = 0; i < npages; i++) { /* check if already mapped */ pfn = (pfn_t)intel_iommu_iova_to_pfn(domain, gfn_to_gpa(gfn)); - if (pfn && !is_mmio_pfn(pfn)) + if (pfn) continue; pfn = gfn_to_pfn(kvm, gfn); - if (!is_mmio_pfn(pfn)) { - r = intel_iommu_page_mapping(domain, - gfn_to_gpa(gfn), - pfn_to_hpa(pfn), - PAGE_SIZE, - DMA_PTE_READ | - DMA_PTE_WRITE); - if (r) { - printk(KERN_DEBUG "kvm_iommu_map_pages:" - "iommu failed to map pfn=%lx\n", pfn); - goto unmap_pages; - } - } else { - printk(KERN_DEBUG "kvm_iommu_map_page:" - "invalid pfn=%lx\n", pfn); + r = intel_iommu_page_mapping(domain, + gfn_to_gpa(gfn), + pfn_to_hpa(pfn), + PAGE_SIZE, + DMA_PTE_READ | + DMA_PTE_WRITE); + if (r) { + printk(KERN_ERR "kvm_iommu_map_pages:" + "iommu failed to map pfn=%lx\n", pfn); goto unmap_pages; } gfn++; diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h index 0992d721c5f..ca6bbc0bd97 100644 --- a/include/asm-x86/kvm_host.h +++ b/include/asm-x86/kvm_host.h @@ -502,8 +502,6 @@ int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, int kvm_pv_mmu_op(struct kvm_vcpu *vcpu, unsigned long bytes, gpa_t addr, unsigned long *ret); -int is_mmio_pfn(pfn_t pfn); - extern bool tdp_enabled; enum emulation_result { diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 6cf042789ad..98cd916448a 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -76,7 +76,7 @@ static inline int valid_vcpu(int n) return likely(n >= 0 && n < KVM_MAX_VCPUS); } -inline int is_mmio_pfn(pfn_t pfn) +static inline int is_mmio_pfn(pfn_t pfn) { if (pfn_valid(pfn)) return PageReserved(pfn_to_page(pfn)); -- cgit v1.2.3-70-g09d2 From 371c01b28e4049d1fbf60a9631cdad98f7cae030 Mon Sep 17 00:00:00 2001 From: Zhang xiantao Date: Thu, 11 Sep 2008 13:19:32 +0800 Subject: KVM: Device Assignment: Move vtd.c from arch/x86/kvm/ to virt/kvm/ Preparation for kvm/ia64 VT-d support. Signed-off-by: Zhang xiantao Signed-off-by: Avi Kivity --- arch/x86/kvm/Makefile | 6 +- arch/x86/kvm/vtd.c | 191 -------------------------------------------------- virt/kvm/vtd.c | 191 ++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 194 insertions(+), 194 deletions(-) delete mode 100644 arch/x86/kvm/vtd.c create mode 100644 virt/kvm/vtd.c (limited to 'virt/kvm') diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile index 3072b17447a..7dce593b9c6 100644 --- a/arch/x86/kvm/Makefile +++ b/arch/x86/kvm/Makefile @@ -7,14 +7,14 @@ common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \ ifeq ($(CONFIG_KVM_TRACE),y) common-objs += $(addprefix ../../../virt/kvm/, kvm_trace.o) endif +ifeq ($(CONFIG_DMAR),y) +common-objs += $(addprefix ../../../virt/kvm/, vtd.o) +endif EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm kvm-objs := $(common-objs) x86.o mmu.o x86_emulate.o i8259.o irq.o lapic.o \ i8254.o -ifeq ($(CONFIG_DMAR),y) -kvm-objs += vtd.o -endif obj-$(CONFIG_KVM) += kvm.o kvm-intel-objs = vmx.o obj-$(CONFIG_KVM_INTEL) += kvm-intel.o diff --git a/arch/x86/kvm/vtd.c b/arch/x86/kvm/vtd.c deleted file mode 100644 index a770874f3a3..00000000000 --- a/arch/x86/kvm/vtd.c +++ /dev/null @@ -1,191 +0,0 @@ -/* - * Copyright (c) 2006, Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple - * Place - Suite 330, Boston, MA 02111-1307 USA. - * - * Copyright (C) 2006-2008 Intel Corporation - * Copyright IBM Corporation, 2008 - * Author: Allen M. Kay - * Author: Weidong Han - * Author: Ben-Ami Yassour - */ - -#include -#include -#include -#include -#include - -static int kvm_iommu_unmap_memslots(struct kvm *kvm); -static void kvm_iommu_put_pages(struct kvm *kvm, - gfn_t base_gfn, unsigned long npages); - -int kvm_iommu_map_pages(struct kvm *kvm, - gfn_t base_gfn, unsigned long npages) -{ - gfn_t gfn = base_gfn; - pfn_t pfn; - int i, r = 0; - struct dmar_domain *domain = kvm->arch.intel_iommu_domain; - - /* check if iommu exists and in use */ - if (!domain) - return 0; - - for (i = 0; i < npages; i++) { - /* check if already mapped */ - pfn = (pfn_t)intel_iommu_iova_to_pfn(domain, - gfn_to_gpa(gfn)); - if (pfn) - continue; - - pfn = gfn_to_pfn(kvm, gfn); - r = intel_iommu_page_mapping(domain, - gfn_to_gpa(gfn), - pfn_to_hpa(pfn), - PAGE_SIZE, - DMA_PTE_READ | - DMA_PTE_WRITE); - if (r) { - printk(KERN_ERR "kvm_iommu_map_pages:" - "iommu failed to map pfn=%lx\n", pfn); - goto unmap_pages; - } - gfn++; - } - return 0; - -unmap_pages: - kvm_iommu_put_pages(kvm, base_gfn, i); - return r; -} - -static int kvm_iommu_map_memslots(struct kvm *kvm) -{ - int i, r; - - down_read(&kvm->slots_lock); - for (i = 0; i < kvm->nmemslots; i++) { - r = kvm_iommu_map_pages(kvm, kvm->memslots[i].base_gfn, - kvm->memslots[i].npages); - if (r) - break; - } - up_read(&kvm->slots_lock); - return r; -} - -int kvm_iommu_map_guest(struct kvm *kvm, - struct kvm_assigned_dev_kernel *assigned_dev) -{ - struct pci_dev *pdev = NULL; - int r; - - if (!intel_iommu_found()) { - printk(KERN_ERR "%s: intel iommu not found\n", __func__); - return -ENODEV; - } - - printk(KERN_DEBUG "VT-d direct map: host bdf = %x:%x:%x\n", - assigned_dev->host_busnr, - PCI_SLOT(assigned_dev->host_devfn), - PCI_FUNC(assigned_dev->host_devfn)); - - pdev = assigned_dev->dev; - - if (pdev == NULL) { - if (kvm->arch.intel_iommu_domain) { - intel_iommu_domain_exit(kvm->arch.intel_iommu_domain); - kvm->arch.intel_iommu_domain = NULL; - } - return -ENODEV; - } - - kvm->arch.intel_iommu_domain = intel_iommu_domain_alloc(pdev); - if (!kvm->arch.intel_iommu_domain) - return -ENODEV; - - r = kvm_iommu_map_memslots(kvm); - if (r) - goto out_unmap; - - intel_iommu_detach_dev(kvm->arch.intel_iommu_domain, - pdev->bus->number, pdev->devfn); - - r = intel_iommu_context_mapping(kvm->arch.intel_iommu_domain, - pdev); - if (r) { - printk(KERN_ERR "Domain context map for %s failed", - pci_name(pdev)); - goto out_unmap; - } - return 0; - -out_unmap: - kvm_iommu_unmap_memslots(kvm); - return r; -} - -static void kvm_iommu_put_pages(struct kvm *kvm, - gfn_t base_gfn, unsigned long npages) -{ - gfn_t gfn = base_gfn; - pfn_t pfn; - struct dmar_domain *domain = kvm->arch.intel_iommu_domain; - int i; - - for (i = 0; i < npages; i++) { - pfn = (pfn_t)intel_iommu_iova_to_pfn(domain, - gfn_to_gpa(gfn)); - kvm_release_pfn_clean(pfn); - gfn++; - } -} - -static int kvm_iommu_unmap_memslots(struct kvm *kvm) -{ - int i; - down_read(&kvm->slots_lock); - for (i = 0; i < kvm->nmemslots; i++) { - kvm_iommu_put_pages(kvm, kvm->memslots[i].base_gfn, - kvm->memslots[i].npages); - } - up_read(&kvm->slots_lock); - - return 0; -} - -int kvm_iommu_unmap_guest(struct kvm *kvm) -{ - struct kvm_assigned_dev_kernel *entry; - struct dmar_domain *domain = kvm->arch.intel_iommu_domain; - - /* check if iommu exists and in use */ - if (!domain) - return 0; - - list_for_each_entry(entry, &kvm->arch.assigned_dev_head, list) { - printk(KERN_DEBUG "VT-d unmap: host bdf = %x:%x:%x\n", - entry->host_busnr, - PCI_SLOT(entry->host_devfn), - PCI_FUNC(entry->host_devfn)); - - /* detach kvm dmar domain */ - intel_iommu_detach_dev(domain, entry->host_busnr, - entry->host_devfn); - } - kvm_iommu_unmap_memslots(kvm); - intel_iommu_domain_exit(domain); - return 0; -} diff --git a/virt/kvm/vtd.c b/virt/kvm/vtd.c new file mode 100644 index 00000000000..a770874f3a3 --- /dev/null +++ b/virt/kvm/vtd.c @@ -0,0 +1,191 @@ +/* + * Copyright (c) 2006, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Copyright (C) 2006-2008 Intel Corporation + * Copyright IBM Corporation, 2008 + * Author: Allen M. Kay + * Author: Weidong Han + * Author: Ben-Ami Yassour + */ + +#include +#include +#include +#include +#include + +static int kvm_iommu_unmap_memslots(struct kvm *kvm); +static void kvm_iommu_put_pages(struct kvm *kvm, + gfn_t base_gfn, unsigned long npages); + +int kvm_iommu_map_pages(struct kvm *kvm, + gfn_t base_gfn, unsigned long npages) +{ + gfn_t gfn = base_gfn; + pfn_t pfn; + int i, r = 0; + struct dmar_domain *domain = kvm->arch.intel_iommu_domain; + + /* check if iommu exists and in use */ + if (!domain) + return 0; + + for (i = 0; i < npages; i++) { + /* check if already mapped */ + pfn = (pfn_t)intel_iommu_iova_to_pfn(domain, + gfn_to_gpa(gfn)); + if (pfn) + continue; + + pfn = gfn_to_pfn(kvm, gfn); + r = intel_iommu_page_mapping(domain, + gfn_to_gpa(gfn), + pfn_to_hpa(pfn), + PAGE_SIZE, + DMA_PTE_READ | + DMA_PTE_WRITE); + if (r) { + printk(KERN_ERR "kvm_iommu_map_pages:" + "iommu failed to map pfn=%lx\n", pfn); + goto unmap_pages; + } + gfn++; + } + return 0; + +unmap_pages: + kvm_iommu_put_pages(kvm, base_gfn, i); + return r; +} + +static int kvm_iommu_map_memslots(struct kvm *kvm) +{ + int i, r; + + down_read(&kvm->slots_lock); + for (i = 0; i < kvm->nmemslots; i++) { + r = kvm_iommu_map_pages(kvm, kvm->memslots[i].base_gfn, + kvm->memslots[i].npages); + if (r) + break; + } + up_read(&kvm->slots_lock); + return r; +} + +int kvm_iommu_map_guest(struct kvm *kvm, + struct kvm_assigned_dev_kernel *assigned_dev) +{ + struct pci_dev *pdev = NULL; + int r; + + if (!intel_iommu_found()) { + printk(KERN_ERR "%s: intel iommu not found\n", __func__); + return -ENODEV; + } + + printk(KERN_DEBUG "VT-d direct map: host bdf = %x:%x:%x\n", + assigned_dev->host_busnr, + PCI_SLOT(assigned_dev->host_devfn), + PCI_FUNC(assigned_dev->host_devfn)); + + pdev = assigned_dev->dev; + + if (pdev == NULL) { + if (kvm->arch.intel_iommu_domain) { + intel_iommu_domain_exit(kvm->arch.intel_iommu_domain); + kvm->arch.intel_iommu_domain = NULL; + } + return -ENODEV; + } + + kvm->arch.intel_iommu_domain = intel_iommu_domain_alloc(pdev); + if (!kvm->arch.intel_iommu_domain) + return -ENODEV; + + r = kvm_iommu_map_memslots(kvm); + if (r) + goto out_unmap; + + intel_iommu_detach_dev(kvm->arch.intel_iommu_domain, + pdev->bus->number, pdev->devfn); + + r = intel_iommu_context_mapping(kvm->arch.intel_iommu_domain, + pdev); + if (r) { + printk(KERN_ERR "Domain context map for %s failed", + pci_name(pdev)); + goto out_unmap; + } + return 0; + +out_unmap: + kvm_iommu_unmap_memslots(kvm); + return r; +} + +static void kvm_iommu_put_pages(struct kvm *kvm, + gfn_t base_gfn, unsigned long npages) +{ + gfn_t gfn = base_gfn; + pfn_t pfn; + struct dmar_domain *domain = kvm->arch.intel_iommu_domain; + int i; + + for (i = 0; i < npages; i++) { + pfn = (pfn_t)intel_iommu_iova_to_pfn(domain, + gfn_to_gpa(gfn)); + kvm_release_pfn_clean(pfn); + gfn++; + } +} + +static int kvm_iommu_unmap_memslots(struct kvm *kvm) +{ + int i; + down_read(&kvm->slots_lock); + for (i = 0; i < kvm->nmemslots; i++) { + kvm_iommu_put_pages(kvm, kvm->memslots[i].base_gfn, + kvm->memslots[i].npages); + } + up_read(&kvm->slots_lock); + + return 0; +} + +int kvm_iommu_unmap_guest(struct kvm *kvm) +{ + struct kvm_assigned_dev_kernel *entry; + struct dmar_domain *domain = kvm->arch.intel_iommu_domain; + + /* check if iommu exists and in use */ + if (!domain) + return 0; + + list_for_each_entry(entry, &kvm->arch.assigned_dev_head, list) { + printk(KERN_DEBUG "VT-d unmap: host bdf = %x:%x:%x\n", + entry->host_busnr, + PCI_SLOT(entry->host_devfn), + PCI_FUNC(entry->host_devfn)); + + /* detach kvm dmar domain */ + intel_iommu_detach_dev(domain, entry->host_busnr, + entry->host_devfn); + } + kvm_iommu_unmap_memslots(kvm); + intel_iommu_domain_exit(domain); + return 0; +} -- cgit v1.2.3-70-g09d2 From 8a98f6648a2b0756d8f26d6c13332f5526355fec Mon Sep 17 00:00:00 2001 From: Xiantao Zhang Date: Mon, 6 Oct 2008 13:47:38 +0800 Subject: KVM: Move device assignment logic to common code To share with other archs, this patch moves device assignment logic to common parts. Signed-off-by: Xiantao Zhang Signed-off-by: Avi Kivity --- arch/x86/kvm/x86.c | 255 -------------------------------------------- include/linux/kvm.h | 2 + include/linux/kvm_host.h | 1 + virt/kvm/kvm_main.c | 268 ++++++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 269 insertions(+), 257 deletions(-) (limited to 'virt/kvm') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index d6d7123d264..f8bde01ba8e 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -30,7 +30,6 @@ #include #include #include -#include #include #include #include @@ -107,238 +106,6 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { NULL } }; -static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head, - int assigned_dev_id) -{ - struct list_head *ptr; - struct kvm_assigned_dev_kernel *match; - - list_for_each(ptr, head) { - match = list_entry(ptr, struct kvm_assigned_dev_kernel, list); - if (match->assigned_dev_id == assigned_dev_id) - return match; - } - return NULL; -} - -static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work) -{ - struct kvm_assigned_dev_kernel *assigned_dev; - - assigned_dev = container_of(work, struct kvm_assigned_dev_kernel, - interrupt_work); - - /* This is taken to safely inject irq inside the guest. When - * the interrupt injection (or the ioapic code) uses a - * finer-grained lock, update this - */ - mutex_lock(&assigned_dev->kvm->lock); - kvm_set_irq(assigned_dev->kvm, - assigned_dev->guest_irq, 1); - mutex_unlock(&assigned_dev->kvm->lock); - kvm_put_kvm(assigned_dev->kvm); -} - -/* FIXME: Implement the OR logic needed to make shared interrupts on - * this line behave properly - */ -static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id) -{ - struct kvm_assigned_dev_kernel *assigned_dev = - (struct kvm_assigned_dev_kernel *) dev_id; - - kvm_get_kvm(assigned_dev->kvm); - schedule_work(&assigned_dev->interrupt_work); - disable_irq_nosync(irq); - return IRQ_HANDLED; -} - -/* Ack the irq line for an assigned device */ -static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian) -{ - struct kvm_assigned_dev_kernel *dev; - - if (kian->gsi == -1) - return; - - dev = container_of(kian, struct kvm_assigned_dev_kernel, - ack_notifier); - kvm_set_irq(dev->kvm, dev->guest_irq, 0); - enable_irq(dev->host_irq); -} - -static void kvm_free_assigned_device(struct kvm *kvm, - struct kvm_assigned_dev_kernel - *assigned_dev) -{ - if (irqchip_in_kernel(kvm) && assigned_dev->irq_requested) - free_irq(assigned_dev->host_irq, (void *)assigned_dev); - - kvm_unregister_irq_ack_notifier(kvm, &assigned_dev->ack_notifier); - - if (cancel_work_sync(&assigned_dev->interrupt_work)) - /* We had pending work. That means we will have to take - * care of kvm_put_kvm. - */ - kvm_put_kvm(kvm); - - pci_release_regions(assigned_dev->dev); - pci_disable_device(assigned_dev->dev); - pci_dev_put(assigned_dev->dev); - - list_del(&assigned_dev->list); - kfree(assigned_dev); -} - -static void kvm_free_all_assigned_devices(struct kvm *kvm) -{ - struct list_head *ptr, *ptr2; - struct kvm_assigned_dev_kernel *assigned_dev; - - list_for_each_safe(ptr, ptr2, &kvm->arch.assigned_dev_head) { - assigned_dev = list_entry(ptr, - struct kvm_assigned_dev_kernel, - list); - - kvm_free_assigned_device(kvm, assigned_dev); - } -} - -static int kvm_vm_ioctl_assign_irq(struct kvm *kvm, - struct kvm_assigned_irq - *assigned_irq) -{ - int r = 0; - struct kvm_assigned_dev_kernel *match; - - mutex_lock(&kvm->lock); - - match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, - assigned_irq->assigned_dev_id); - if (!match) { - mutex_unlock(&kvm->lock); - return -EINVAL; - } - - if (match->irq_requested) { - match->guest_irq = assigned_irq->guest_irq; - match->ack_notifier.gsi = assigned_irq->guest_irq; - mutex_unlock(&kvm->lock); - return 0; - } - - INIT_WORK(&match->interrupt_work, - kvm_assigned_dev_interrupt_work_handler); - - if (irqchip_in_kernel(kvm)) { - if (!capable(CAP_SYS_RAWIO)) { - r = -EPERM; - goto out_release; - } - - if (assigned_irq->host_irq) - match->host_irq = assigned_irq->host_irq; - else - match->host_irq = match->dev->irq; - match->guest_irq = assigned_irq->guest_irq; - match->ack_notifier.gsi = assigned_irq->guest_irq; - match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq; - kvm_register_irq_ack_notifier(kvm, &match->ack_notifier); - - /* Even though this is PCI, we don't want to use shared - * interrupts. Sharing host devices with guest-assigned devices - * on the same interrupt line is not a happy situation: there - * are going to be long delays in accepting, acking, etc. - */ - if (request_irq(match->host_irq, kvm_assigned_dev_intr, 0, - "kvm_assigned_device", (void *)match)) { - r = -EIO; - goto out_release; - } - } - - match->irq_requested = true; - mutex_unlock(&kvm->lock); - return r; -out_release: - mutex_unlock(&kvm->lock); - kvm_free_assigned_device(kvm, match); - return r; -} - -static int kvm_vm_ioctl_assign_device(struct kvm *kvm, - struct kvm_assigned_pci_dev *assigned_dev) -{ - int r = 0; - struct kvm_assigned_dev_kernel *match; - struct pci_dev *dev; - - mutex_lock(&kvm->lock); - - match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, - assigned_dev->assigned_dev_id); - if (match) { - /* device already assigned */ - r = -EINVAL; - goto out; - } - - match = kzalloc(sizeof(struct kvm_assigned_dev_kernel), GFP_KERNEL); - if (match == NULL) { - printk(KERN_INFO "%s: Couldn't allocate memory\n", - __func__); - r = -ENOMEM; - goto out; - } - dev = pci_get_bus_and_slot(assigned_dev->busnr, - assigned_dev->devfn); - if (!dev) { - printk(KERN_INFO "%s: host device not found\n", __func__); - r = -EINVAL; - goto out_free; - } - if (pci_enable_device(dev)) { - printk(KERN_INFO "%s: Could not enable PCI device\n", __func__); - r = -EBUSY; - goto out_put; - } - r = pci_request_regions(dev, "kvm_assigned_device"); - if (r) { - printk(KERN_INFO "%s: Could not get access to device regions\n", - __func__); - goto out_disable; - } - match->assigned_dev_id = assigned_dev->assigned_dev_id; - match->host_busnr = assigned_dev->busnr; - match->host_devfn = assigned_dev->devfn; - match->dev = dev; - - match->kvm = kvm; - - list_add(&match->list, &kvm->arch.assigned_dev_head); - - if (assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) { - r = kvm_iommu_map_guest(kvm, match); - if (r) - goto out_list_del; - } - -out: - mutex_unlock(&kvm->lock); - return r; -out_list_del: - list_del(&match->list); - pci_release_regions(dev); -out_disable: - pci_disable_device(dev); -out_put: - pci_dev_put(dev); -out_free: - kfree(match); - mutex_unlock(&kvm->lock); - return r; -} - unsigned long segment_base(u16 selector) { struct descriptor_table gdt; @@ -2030,28 +1797,6 @@ long kvm_arch_vm_ioctl(struct file *filp, goto out; break; } - case KVM_ASSIGN_PCI_DEVICE: { - struct kvm_assigned_pci_dev assigned_dev; - - r = -EFAULT; - if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev)) - goto out; - r = kvm_vm_ioctl_assign_device(kvm, &assigned_dev); - if (r) - goto out; - break; - } - case KVM_ASSIGN_IRQ: { - struct kvm_assigned_irq assigned_irq; - - r = -EFAULT; - if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq)) - goto out; - r = kvm_vm_ioctl_assign_irq(kvm, &assigned_irq); - if (r) - goto out; - break; - } case KVM_GET_PIT: { r = -EFAULT; if (copy_from_user(&u.ps, argp, sizeof(struct kvm_pit_state))) diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 4269be171fa..9acf34a6dfb 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -383,7 +383,9 @@ struct kvm_trace_rec { #define KVM_CAP_MP_STATE 14 #define KVM_CAP_COALESCED_MMIO 15 #define KVM_CAP_SYNC_MMU 16 /* Changes to host mmap are reflected in guest */ +#ifdef CONFIG_X86 #define KVM_CAP_DEVICE_ASSIGNMENT 17 +#endif #define KVM_CAP_IOMMU 18 /* diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 73b7c52b949..10c1146cd00 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -281,6 +281,7 @@ void kvm_free_physmem(struct kvm *kvm); struct kvm *kvm_arch_create_vm(void); void kvm_arch_destroy_vm(struct kvm *kvm); +void kvm_free_all_assigned_devices(struct kvm *kvm); int kvm_cpu_get_interrupt(struct kvm_vcpu *v); int kvm_cpu_has_interrupt(struct kvm_vcpu *v); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 98cd916448a..485bcdc1655 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -51,6 +51,12 @@ #include "coalesced_mmio.h" #endif +#ifdef KVM_CAP_DEVICE_ASSIGNMENT +#include +#include +#include "irq.h" +#endif + MODULE_AUTHOR("Qumranet"); MODULE_LICENSE("GPL"); @@ -71,6 +77,240 @@ static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl, bool kvm_rebooting; +#ifdef KVM_CAP_DEVICE_ASSIGNMENT +static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head, + int assigned_dev_id) +{ + struct list_head *ptr; + struct kvm_assigned_dev_kernel *match; + + list_for_each(ptr, head) { + match = list_entry(ptr, struct kvm_assigned_dev_kernel, list); + if (match->assigned_dev_id == assigned_dev_id) + return match; + } + return NULL; +} + +static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work) +{ + struct kvm_assigned_dev_kernel *assigned_dev; + + assigned_dev = container_of(work, struct kvm_assigned_dev_kernel, + interrupt_work); + + /* This is taken to safely inject irq inside the guest. When + * the interrupt injection (or the ioapic code) uses a + * finer-grained lock, update this + */ + mutex_lock(&assigned_dev->kvm->lock); + kvm_set_irq(assigned_dev->kvm, + assigned_dev->guest_irq, 1); + mutex_unlock(&assigned_dev->kvm->lock); + kvm_put_kvm(assigned_dev->kvm); +} + +/* FIXME: Implement the OR logic needed to make shared interrupts on + * this line behave properly + */ +static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id) +{ + struct kvm_assigned_dev_kernel *assigned_dev = + (struct kvm_assigned_dev_kernel *) dev_id; + + kvm_get_kvm(assigned_dev->kvm); + schedule_work(&assigned_dev->interrupt_work); + disable_irq_nosync(irq); + return IRQ_HANDLED; +} + +/* Ack the irq line for an assigned device */ +static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian) +{ + struct kvm_assigned_dev_kernel *dev; + + if (kian->gsi == -1) + return; + + dev = container_of(kian, struct kvm_assigned_dev_kernel, + ack_notifier); + kvm_set_irq(dev->kvm, dev->guest_irq, 0); + enable_irq(dev->host_irq); +} + +static void kvm_free_assigned_device(struct kvm *kvm, + struct kvm_assigned_dev_kernel + *assigned_dev) +{ + if (irqchip_in_kernel(kvm) && assigned_dev->irq_requested) + free_irq(assigned_dev->host_irq, (void *)assigned_dev); + + kvm_unregister_irq_ack_notifier(kvm, &assigned_dev->ack_notifier); + + if (cancel_work_sync(&assigned_dev->interrupt_work)) + /* We had pending work. That means we will have to take + * care of kvm_put_kvm. + */ + kvm_put_kvm(kvm); + + pci_release_regions(assigned_dev->dev); + pci_disable_device(assigned_dev->dev); + pci_dev_put(assigned_dev->dev); + + list_del(&assigned_dev->list); + kfree(assigned_dev); +} + +void kvm_free_all_assigned_devices(struct kvm *kvm) +{ + struct list_head *ptr, *ptr2; + struct kvm_assigned_dev_kernel *assigned_dev; + + list_for_each_safe(ptr, ptr2, &kvm->arch.assigned_dev_head) { + assigned_dev = list_entry(ptr, + struct kvm_assigned_dev_kernel, + list); + + kvm_free_assigned_device(kvm, assigned_dev); + } +} + +static int kvm_vm_ioctl_assign_irq(struct kvm *kvm, + struct kvm_assigned_irq + *assigned_irq) +{ + int r = 0; + struct kvm_assigned_dev_kernel *match; + + mutex_lock(&kvm->lock); + + match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, + assigned_irq->assigned_dev_id); + if (!match) { + mutex_unlock(&kvm->lock); + return -EINVAL; + } + + if (match->irq_requested) { + match->guest_irq = assigned_irq->guest_irq; + match->ack_notifier.gsi = assigned_irq->guest_irq; + mutex_unlock(&kvm->lock); + return 0; + } + + INIT_WORK(&match->interrupt_work, + kvm_assigned_dev_interrupt_work_handler); + + if (irqchip_in_kernel(kvm)) { + if (!capable(CAP_SYS_RAWIO)) { + r = -EPERM; + goto out_release; + } + + if (assigned_irq->host_irq) + match->host_irq = assigned_irq->host_irq; + else + match->host_irq = match->dev->irq; + match->guest_irq = assigned_irq->guest_irq; + match->ack_notifier.gsi = assigned_irq->guest_irq; + match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq; + kvm_register_irq_ack_notifier(kvm, &match->ack_notifier); + + /* Even though this is PCI, we don't want to use shared + * interrupts. Sharing host devices with guest-assigned devices + * on the same interrupt line is not a happy situation: there + * are going to be long delays in accepting, acking, etc. + */ + if (request_irq(match->host_irq, kvm_assigned_dev_intr, 0, + "kvm_assigned_device", (void *)match)) { + r = -EIO; + goto out_release; + } + } + + match->irq_requested = true; + mutex_unlock(&kvm->lock); + return r; +out_release: + mutex_unlock(&kvm->lock); + kvm_free_assigned_device(kvm, match); + return r; +} + +static int kvm_vm_ioctl_assign_device(struct kvm *kvm, + struct kvm_assigned_pci_dev *assigned_dev) +{ + int r = 0; + struct kvm_assigned_dev_kernel *match; + struct pci_dev *dev; + + mutex_lock(&kvm->lock); + + match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, + assigned_dev->assigned_dev_id); + if (match) { + /* device already assigned */ + r = -EINVAL; + goto out; + } + + match = kzalloc(sizeof(struct kvm_assigned_dev_kernel), GFP_KERNEL); + if (match == NULL) { + printk(KERN_INFO "%s: Couldn't allocate memory\n", + __func__); + r = -ENOMEM; + goto out; + } + dev = pci_get_bus_and_slot(assigned_dev->busnr, + assigned_dev->devfn); + if (!dev) { + printk(KERN_INFO "%s: host device not found\n", __func__); + r = -EINVAL; + goto out_free; + } + if (pci_enable_device(dev)) { + printk(KERN_INFO "%s: Could not enable PCI device\n", __func__); + r = -EBUSY; + goto out_put; + } + r = pci_request_regions(dev, "kvm_assigned_device"); + if (r) { + printk(KERN_INFO "%s: Could not get access to device regions\n", + __func__); + goto out_disable; + } + match->assigned_dev_id = assigned_dev->assigned_dev_id; + match->host_busnr = assigned_dev->busnr; + match->host_devfn = assigned_dev->devfn; + match->dev = dev; + + match->kvm = kvm; + + list_add(&match->list, &kvm->arch.assigned_dev_head); + + if (assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) { + r = kvm_iommu_map_guest(kvm, match); + if (r) + goto out_list_del; + } + +out: + mutex_unlock(&kvm->lock); + return r; +out_list_del: + list_del(&match->list); + pci_release_regions(dev); +out_disable: + pci_disable_device(dev); +out_put: + pci_dev_put(dev); +out_free: + kfree(match); + mutex_unlock(&kvm->lock); + return r; +} +#endif + static inline int valid_vcpu(int n) { return likely(n >= 0 && n < KVM_MAX_VCPUS); @@ -578,12 +818,12 @@ int __kvm_set_memory_region(struct kvm *kvm, } kvm_free_physmem_slot(&old, &new); - +#ifdef CONFIG_DMAR /* map the pages in iommu page table */ r = kvm_iommu_map_pages(kvm, base_gfn, npages); if (r) goto out; - +#endif return 0; out_free: @@ -1382,6 +1622,30 @@ static long kvm_vm_ioctl(struct file *filp, r = 0; break; } +#endif +#ifdef KVM_CAP_DEVICE_ASSIGNMENT + case KVM_ASSIGN_PCI_DEVICE: { + struct kvm_assigned_pci_dev assigned_dev; + + r = -EFAULT; + if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev)) + goto out; + r = kvm_vm_ioctl_assign_device(kvm, &assigned_dev); + if (r) + goto out; + break; + } + case KVM_ASSIGN_IRQ: { + struct kvm_assigned_irq assigned_irq; + + r = -EFAULT; + if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq)) + goto out; + r = kvm_vm_ioctl_assign_irq(kvm, &assigned_irq); + if (r) + goto out; + break; + } #endif default: r = kvm_arch_vm_ioctl(filp, ioctl, arg); -- cgit v1.2.3-70-g09d2 From c77fb9dc7a0383c86eabef30272a763a482403e1 Mon Sep 17 00:00:00 2001 From: Xiantao Zhang Date: Sat, 27 Sep 2008 10:55:40 +0800 Subject: KVM: Change is_mmio_pfn to kvm_is_mmio_pfn, and make it common for all archs Add a kvm_ prefix to avoid polluting kernel's name space. Signed-off-by: Xiantao Zhang Signed-off-by: Avi Kivity --- include/linux/kvm_host.h | 2 ++ virt/kvm/kvm_main.c | 16 ++++++++-------- 2 files changed, 10 insertions(+), 8 deletions(-) (limited to 'virt/kvm') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 10c1146cd00..b3b7598b4d9 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -288,6 +288,8 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *v); int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu); void kvm_vcpu_kick(struct kvm_vcpu *vcpu); +int kvm_is_mmio_pfn(pfn_t pfn); + struct kvm_irq_ack_notifier { struct hlist_node link; unsigned gsi; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 485bcdc1655..cf0ab8ed384 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -316,7 +316,7 @@ static inline int valid_vcpu(int n) return likely(n >= 0 && n < KVM_MAX_VCPUS); } -static inline int is_mmio_pfn(pfn_t pfn) +inline int kvm_is_mmio_pfn(pfn_t pfn) { if (pfn_valid(pfn)) return PageReserved(pfn_to_page(pfn)); @@ -994,7 +994,7 @@ pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn) pfn = ((addr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; up_read(¤t->mm->mmap_sem); - BUG_ON(!is_mmio_pfn(pfn)); + BUG_ON(!kvm_is_mmio_pfn(pfn)); } else pfn = page_to_pfn(page[0]); @@ -1008,10 +1008,10 @@ struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn) pfn_t pfn; pfn = gfn_to_pfn(kvm, gfn); - if (!is_mmio_pfn(pfn)) + if (!kvm_is_mmio_pfn(pfn)) return pfn_to_page(pfn); - WARN_ON(is_mmio_pfn(pfn)); + WARN_ON(kvm_is_mmio_pfn(pfn)); get_page(bad_page); return bad_page; @@ -1027,7 +1027,7 @@ EXPORT_SYMBOL_GPL(kvm_release_page_clean); void kvm_release_pfn_clean(pfn_t pfn) { - if (!is_mmio_pfn(pfn)) + if (!kvm_is_mmio_pfn(pfn)) put_page(pfn_to_page(pfn)); } EXPORT_SYMBOL_GPL(kvm_release_pfn_clean); @@ -1053,7 +1053,7 @@ EXPORT_SYMBOL_GPL(kvm_set_page_dirty); void kvm_set_pfn_dirty(pfn_t pfn) { - if (!is_mmio_pfn(pfn)) { + if (!kvm_is_mmio_pfn(pfn)) { struct page *page = pfn_to_page(pfn); if (!PageReserved(page)) SetPageDirty(page); @@ -1063,14 +1063,14 @@ EXPORT_SYMBOL_GPL(kvm_set_pfn_dirty); void kvm_set_pfn_accessed(pfn_t pfn) { - if (!is_mmio_pfn(pfn)) + if (!kvm_is_mmio_pfn(pfn)) mark_page_accessed(pfn_to_page(pfn)); } EXPORT_SYMBOL_GPL(kvm_set_pfn_accessed); void kvm_get_pfn(pfn_t pfn) { - if (!is_mmio_pfn(pfn)) + if (!kvm_is_mmio_pfn(pfn)) get_page(pfn_to_page(pfn)); } EXPORT_SYMBOL_GPL(kvm_get_pfn); -- cgit v1.2.3-70-g09d2 From 3de42dc094ecd313dc7d551e007a134b52f8663d Mon Sep 17 00:00:00 2001 From: Xiantao Zhang Date: Mon, 6 Oct 2008 13:48:45 +0800 Subject: KVM: Separate irq ack notification out of arch/x86/kvm/irq.c Moving irq ack notification logic as common, and make it shared with ia64 side. Signed-off-by: Xiantao Zhang Signed-off-by: Avi Kivity --- arch/ia64/include/asm/kvm_host.h | 4 +++ arch/ia64/kvm/Makefile | 2 +- arch/ia64/kvm/irq.h | 5 ---- arch/x86/kvm/Makefile | 2 +- arch/x86/kvm/irq.c | 33 ---------------------- arch/x86/kvm/irq.h | 8 ------ include/asm-x86/kvm_host.h | 2 ++ include/linux/kvm_host.h | 6 ++++ virt/kvm/irq_comm.c | 60 ++++++++++++++++++++++++++++++++++++++++ 9 files changed, 74 insertions(+), 48 deletions(-) create mode 100644 virt/kvm/irq_comm.c (limited to 'virt/kvm') diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h index 1efe513a994..da579a33db1 100644 --- a/arch/ia64/include/asm/kvm_host.h +++ b/arch/ia64/include/asm/kvm_host.h @@ -413,6 +413,10 @@ struct kvm_arch { struct kvm_ioapic *vioapic; struct kvm_vm_stat stat; struct kvm_sal_data rdv_sal_data; + + struct list_head assigned_dev_head; + struct dmar_domain *intel_iommu_domain; + struct hlist_head irq_ack_notifier_list; }; union cpuid3_t { diff --git a/arch/ia64/kvm/Makefile b/arch/ia64/kvm/Makefile index bf22fb9e6dc..3b1a1c156ef 100644 --- a/arch/ia64/kvm/Makefile +++ b/arch/ia64/kvm/Makefile @@ -44,7 +44,7 @@ EXTRA_CFLAGS += -Ivirt/kvm -Iarch/ia64/kvm/ EXTRA_AFLAGS += -Ivirt/kvm -Iarch/ia64/kvm/ common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \ - coalesced_mmio.o) + coalesced_mmio.o irq_comm.o) kvm-objs := $(common-objs) kvm-ia64.o kvm_fw.o obj-$(CONFIG_KVM) += kvm.o diff --git a/arch/ia64/kvm/irq.h b/arch/ia64/kvm/irq.h index f2e6545debf..604329ac3c9 100644 --- a/arch/ia64/kvm/irq.h +++ b/arch/ia64/kvm/irq.h @@ -23,10 +23,5 @@ #ifndef __IRQ_H #define __IRQ_H -struct kvm; - -static inline void kvm_notify_acked_irq(struct kvm *kvm, unsigned gsi) -{ -} #endif diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile index 7dce593b9c6..c02343594b4 100644 --- a/arch/x86/kvm/Makefile +++ b/arch/x86/kvm/Makefile @@ -3,7 +3,7 @@ # common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \ - coalesced_mmio.o) + coalesced_mmio.o irq_comm.o) ifeq ($(CONFIG_KVM_TRACE),y) common-objs += $(addprefix ../../../virt/kvm/, kvm_trace.o) endif diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c index 8c1b9c5def7..c019b8edcdb 100644 --- a/arch/x86/kvm/irq.c +++ b/arch/x86/kvm/irq.c @@ -99,36 +99,3 @@ void __kvm_migrate_timers(struct kvm_vcpu *vcpu) __kvm_migrate_apic_timer(vcpu); __kvm_migrate_pit_timer(vcpu); } - -/* This should be called with the kvm->lock mutex held */ -void kvm_set_irq(struct kvm *kvm, int irq, int level) -{ - /* Not possible to detect if the guest uses the PIC or the - * IOAPIC. So set the bit in both. The guest will ignore - * writes to the unused one. - */ - kvm_ioapic_set_irq(kvm->arch.vioapic, irq, level); - kvm_pic_set_irq(pic_irqchip(kvm), irq, level); -} - -void kvm_notify_acked_irq(struct kvm *kvm, unsigned gsi) -{ - struct kvm_irq_ack_notifier *kian; - struct hlist_node *n; - - hlist_for_each_entry(kian, n, &kvm->arch.irq_ack_notifier_list, link) - if (kian->gsi == gsi) - kian->irq_acked(kian); -} - -void kvm_register_irq_ack_notifier(struct kvm *kvm, - struct kvm_irq_ack_notifier *kian) -{ - hlist_add_head(&kian->link, &kvm->arch.irq_ack_notifier_list); -} - -void kvm_unregister_irq_ack_notifier(struct kvm *kvm, - struct kvm_irq_ack_notifier *kian) -{ - hlist_del(&kian->link); -} diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h index 4748532fd1d..f17c8f5bbf3 100644 --- a/arch/x86/kvm/irq.h +++ b/arch/x86/kvm/irq.h @@ -68,7 +68,6 @@ struct kvm_pic { }; struct kvm_pic *kvm_create_pic(struct kvm *kvm); -void kvm_pic_set_irq(void *opaque, int irq, int level); int kvm_pic_read_irq(struct kvm *kvm); void kvm_pic_update_irq(struct kvm_pic *s); void kvm_pic_clear_isr_ack(struct kvm *kvm); @@ -85,13 +84,6 @@ static inline int irqchip_in_kernel(struct kvm *kvm) void kvm_pic_reset(struct kvm_kpic_state *s); -void kvm_set_irq(struct kvm *kvm, int irq, int level); -void kvm_notify_acked_irq(struct kvm *kvm, unsigned gsi); -void kvm_register_irq_ack_notifier(struct kvm *kvm, - struct kvm_irq_ack_notifier *kian); -void kvm_unregister_irq_ack_notifier(struct kvm *kvm, - struct kvm_irq_ack_notifier *kian); - void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec); void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu); void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu); diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h index ca6bbc0bd97..411fb8cfb24 100644 --- a/include/asm-x86/kvm_host.h +++ b/include/asm-x86/kvm_host.h @@ -565,6 +565,8 @@ void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code); void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long cr2, u32 error_code); +void kvm_pic_set_irq(void *opaque, int irq, int level); + void kvm_inject_nmi(struct kvm_vcpu *vcpu); void fx_init(struct kvm_vcpu *vcpu); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index b3b7598b4d9..3833c48fae3 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -309,6 +309,12 @@ struct kvm_assigned_dev_kernel { struct pci_dev *dev; struct kvm *kvm; }; +void kvm_set_irq(struct kvm *kvm, int irq, int level); +void kvm_notify_acked_irq(struct kvm *kvm, unsigned gsi); +void kvm_register_irq_ack_notifier(struct kvm *kvm, + struct kvm_irq_ack_notifier *kian); +void kvm_unregister_irq_ack_notifier(struct kvm *kvm, + struct kvm_irq_ack_notifier *kian); #ifdef CONFIG_DMAR int kvm_iommu_map_pages(struct kvm *kvm, gfn_t base_gfn, diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c new file mode 100644 index 00000000000..d0169f5e604 --- /dev/null +++ b/virt/kvm/irq_comm.c @@ -0,0 +1,60 @@ +/* + * irq_comm.c: Common API for in kernel interrupt controller + * Copyright (c) 2007, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * Authors: + * Yaozu (Eddie) Dong + * + */ + +#include +#include "irq.h" + +#include "ioapic.h" + +/* This should be called with the kvm->lock mutex held */ +void kvm_set_irq(struct kvm *kvm, int irq, int level) +{ + /* Not possible to detect if the guest uses the PIC or the + * IOAPIC. So set the bit in both. The guest will ignore + * writes to the unused one. + */ + kvm_ioapic_set_irq(kvm->arch.vioapic, irq, level); +#ifdef CONFIG_X86 + kvm_pic_set_irq(pic_irqchip(kvm), irq, level); +#endif +} + +void kvm_notify_acked_irq(struct kvm *kvm, unsigned gsi) +{ + struct kvm_irq_ack_notifier *kian; + struct hlist_node *n; + + hlist_for_each_entry(kian, n, &kvm->arch.irq_ack_notifier_list, link) + if (kian->gsi == gsi) + kian->irq_acked(kian); +} + +void kvm_register_irq_ack_notifier(struct kvm *kvm, + struct kvm_irq_ack_notifier *kian) +{ + hlist_add_head(&kian->link, &kvm->arch.irq_ack_notifier_list); +} + +void kvm_unregister_irq_ack_notifier(struct kvm *kvm, + struct kvm_irq_ack_notifier *kian) +{ + hlist_del(&kian->link); +} -- cgit v1.2.3-70-g09d2 From 2f7497719179a9f3270b05434be989d21f9fdc09 Mon Sep 17 00:00:00 2001 From: Xiantao Zhang Date: Sat, 27 Sep 2008 11:46:36 +0800 Subject: KVM: Move irqchip_in_kernel() from ioapic.h to irq.h Moving irqchip_in_kernel() from ioapic.h to irq.h. Signed-off-by: Xiantao Zhang Signed-off-by: Avi Kivity --- arch/ia64/kvm/irq.h | 4 ++++ arch/ia64/kvm/kvm-ia64.c | 1 + virt/kvm/ioapic.h | 7 ------- 3 files changed, 5 insertions(+), 7 deletions(-) (limited to 'virt/kvm') diff --git a/arch/ia64/kvm/irq.h b/arch/ia64/kvm/irq.h index 604329ac3c9..c6786e8b1bf 100644 --- a/arch/ia64/kvm/irq.h +++ b/arch/ia64/kvm/irq.h @@ -23,5 +23,9 @@ #ifndef __IRQ_H #define __IRQ_H +static inline int irqchip_in_kernel(struct kvm *kvm) +{ + return 1; +} #endif diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index 7ad759e3429..a6cf719811b 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -45,6 +45,7 @@ #include "iodev.h" #include "ioapic.h" #include "lapic.h" +#include "irq.h" static unsigned long kvm_vmm_base; static unsigned long kvm_vsa_base; diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h index b52732f493c..cd7ae7691c9 100644 --- a/virt/kvm/ioapic.h +++ b/virt/kvm/ioapic.h @@ -79,13 +79,6 @@ static inline struct kvm_ioapic *ioapic_irqchip(struct kvm *kvm) return kvm->arch.vioapic; } -#ifdef CONFIG_IA64 -static inline int irqchip_in_kernel(struct kvm *kvm) -{ - return 1; -} -#endif - struct kvm_vcpu *kvm_get_lowest_prio_vcpu(struct kvm *kvm, u8 vector, unsigned long bitmap); void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode); -- cgit v1.2.3-70-g09d2