summaryrefslogtreecommitdiffstats
path: root/virt
diff options
context:
space:
mode:
Diffstat (limited to 'virt')
-rw-r--r--virt/kvm/ioapic.c39
-rw-r--r--virt/kvm/ioapic.h2
-rw-r--r--virt/kvm/iommu.c6
-rw-r--r--virt/kvm/irq_comm.c297
-rw-r--r--virt/kvm/kvm_main.c184
5 files changed, 415 insertions, 113 deletions
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index 23b81cf242a..c3b99def9cb 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -83,24 +83,28 @@ static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic,
return result;
}
-static void ioapic_service(struct kvm_ioapic *ioapic, unsigned int idx)
+static int ioapic_service(struct kvm_ioapic *ioapic, unsigned int idx)
{
union ioapic_redir_entry *pent;
+ int injected = -1;
pent = &ioapic->redirtbl[idx];
if (!pent->fields.mask) {
- int injected = ioapic_deliver(ioapic, idx);
+ injected = ioapic_deliver(ioapic, idx);
if (injected && pent->fields.trig_mode == IOAPIC_LEVEL_TRIG)
pent->fields.remote_irr = 1;
}
if (!pent->fields.trig_mode)
ioapic->irr &= ~(1 << idx);
+
+ return injected;
}
static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
{
unsigned index;
+ bool mask_before, mask_after;
switch (ioapic->ioregsel) {
case IOAPIC_REG_VERSION:
@@ -120,6 +124,7 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
ioapic_debug("change redir index %x val %x\n", index, val);
if (index >= IOAPIC_NUM_PINS)
return;
+ mask_before = ioapic->redirtbl[index].fields.mask;
if (ioapic->ioregsel & 1) {
ioapic->redirtbl[index].bits &= 0xffffffff;
ioapic->redirtbl[index].bits |= (u64) val << 32;
@@ -128,6 +133,9 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
ioapic->redirtbl[index].bits |= (u32) val;
ioapic->redirtbl[index].fields.remote_irr = 0;
}
+ mask_after = ioapic->redirtbl[index].fields.mask;
+ if (mask_before != mask_after)
+ kvm_fire_mask_notifiers(ioapic->kvm, index, mask_after);
if (ioapic->irr & (1 << index))
ioapic_service(ioapic, index);
break;
@@ -202,7 +210,7 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq)
u8 trig_mode = ioapic->redirtbl[irq].fields.trig_mode;
u32 deliver_bitmask;
struct kvm_vcpu *vcpu;
- int vcpu_id, r = 0;
+ int vcpu_id, r = -1;
ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x "
"vector=%x trig_mode=%x\n",
@@ -242,7 +250,9 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq)
deliver_bitmask &= ~(1 << vcpu_id);
vcpu = ioapic->kvm->vcpus[vcpu_id];
if (vcpu) {
- r = ioapic_inj_irq(ioapic, vcpu, vector,
+ if (r < 0)
+ r = 0;
+ r += ioapic_inj_irq(ioapic, vcpu, vector,
trig_mode, delivery_mode);
}
}
@@ -253,8 +263,10 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq)
continue;
deliver_bitmask &= ~(1 << vcpu_id);
vcpu = ioapic->kvm->vcpus[vcpu_id];
- if (vcpu)
+ if (vcpu) {
ioapic_inj_nmi(vcpu);
+ r = 1;
+ }
else
ioapic_debug("NMI to vcpu %d failed\n",
vcpu->vcpu_id);
@@ -268,11 +280,12 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq)
return r;
}
-void kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level)
+int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level)
{
u32 old_irr = ioapic->irr;
u32 mask = 1 << irq;
union ioapic_redir_entry entry;
+ int ret = 1;
if (irq >= 0 && irq < IOAPIC_NUM_PINS) {
entry = ioapic->redirtbl[irq];
@@ -283,25 +296,26 @@ void kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level)
ioapic->irr |= mask;
if ((!entry.fields.trig_mode && old_irr != ioapic->irr)
|| !entry.fields.remote_irr)
- ioapic_service(ioapic, irq);
+ ret = ioapic_service(ioapic, irq);
}
}
+ return ret;
}
-static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int gsi,
+static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int pin,
int trigger_mode)
{
union ioapic_redir_entry *ent;
- ent = &ioapic->redirtbl[gsi];
+ ent = &ioapic->redirtbl[pin];
- kvm_notify_acked_irq(ioapic->kvm, gsi);
+ kvm_notify_acked_irq(ioapic->kvm, KVM_IRQCHIP_IOAPIC, pin);
if (trigger_mode == IOAPIC_LEVEL_TRIG) {
ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG);
ent->fields.remote_irr = 0;
- if (!ent->fields.mask && (ioapic->irr & (1 << gsi)))
- ioapic_service(ioapic, gsi);
+ if (!ent->fields.mask && (ioapic->irr & (1 << pin)))
+ ioapic_service(ioapic, pin);
}
}
@@ -426,3 +440,4 @@ int kvm_ioapic_init(struct kvm *kvm)
kvm_io_bus_register_dev(&kvm->mmio_bus, &ioapic->dev);
return 0;
}
+
diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h
index 49c9581d258..a34bd5e6436 100644
--- a/virt/kvm/ioapic.h
+++ b/virt/kvm/ioapic.h
@@ -83,7 +83,7 @@ struct kvm_vcpu *kvm_get_lowest_prio_vcpu(struct kvm *kvm, u8 vector,
unsigned long bitmap);
void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode);
int kvm_ioapic_init(struct kvm *kvm);
-void kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level);
+int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level);
void kvm_ioapic_reset(struct kvm_ioapic *ioapic);
u32 kvm_ioapic_get_delivery_bitmask(struct kvm_ioapic *ioapic, u8 dest,
u8 dest_mode);
diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c
index e9693a29d00..4c403750360 100644
--- a/virt/kvm/iommu.c
+++ b/virt/kvm/iommu.c
@@ -73,14 +73,13 @@ static int kvm_iommu_map_memslots(struct kvm *kvm)
{
int i, r = 0;
- down_read(&kvm->slots_lock);
for (i = 0; i < kvm->nmemslots; i++) {
r = kvm_iommu_map_pages(kvm, kvm->memslots[i].base_gfn,
kvm->memslots[i].npages);
if (r)
break;
}
- up_read(&kvm->slots_lock);
+
return r;
}
@@ -190,12 +189,11 @@ static void kvm_iommu_put_pages(struct kvm *kvm,
static int kvm_iommu_unmap_memslots(struct kvm *kvm)
{
int i;
- down_read(&kvm->slots_lock);
+
for (i = 0; i < kvm->nmemslots; i++) {
kvm_iommu_put_pages(kvm, kvm->memslots[i].base_gfn,
kvm->memslots[i].npages);
}
- up_read(&kvm->slots_lock);
return 0;
}
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index aa5d1e5c497..864ac5483ba 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -20,35 +20,132 @@
*/
#include <linux/kvm_host.h>
+
+#include <asm/msidef.h>
+
#include "irq.h"
#include "ioapic.h"
-/* This should be called with the kvm->lock mutex held */
-void kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
+static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e,
+ struct kvm *kvm, int level)
+{
+#ifdef CONFIG_X86
+ return kvm_pic_set_irq(pic_irqchip(kvm), e->irqchip.pin, level);
+#else
+ return -1;
+#endif
+}
+
+static int kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e,
+ struct kvm *kvm, int level)
+{
+ return kvm_ioapic_set_irq(kvm->arch.vioapic, e->irqchip.pin, level);
+}
+
+static int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
+ struct kvm *kvm, int level)
+{
+ int vcpu_id, r = -1;
+ struct kvm_vcpu *vcpu;
+ struct kvm_ioapic *ioapic = ioapic_irqchip(kvm);
+ int dest_id = (e->msi.address_lo & MSI_ADDR_DEST_ID_MASK)
+ >> MSI_ADDR_DEST_ID_SHIFT;
+ int vector = (e->msi.data & MSI_DATA_VECTOR_MASK)
+ >> MSI_DATA_VECTOR_SHIFT;
+ int dest_mode = test_bit(MSI_ADDR_DEST_MODE_SHIFT,
+ (unsigned long *)&e->msi.address_lo);
+ int trig_mode = test_bit(MSI_DATA_TRIGGER_SHIFT,
+ (unsigned long *)&e->msi.data);
+ int delivery_mode = test_bit(MSI_DATA_DELIVERY_MODE_SHIFT,
+ (unsigned long *)&e->msi.data);
+ u32 deliver_bitmask;
+
+ BUG_ON(!ioapic);
+
+ deliver_bitmask = kvm_ioapic_get_delivery_bitmask(ioapic,
+ dest_id, dest_mode);
+ /* IOAPIC delivery mode value is the same as MSI here */
+ switch (delivery_mode) {
+ case IOAPIC_LOWEST_PRIORITY:
+ vcpu = kvm_get_lowest_prio_vcpu(ioapic->kvm, vector,
+ deliver_bitmask);
+ if (vcpu != NULL)
+ r = kvm_apic_set_irq(vcpu, vector, trig_mode);
+ else
+ printk(KERN_INFO "kvm: null lowest priority vcpu!\n");
+ break;
+ case IOAPIC_FIXED:
+ for (vcpu_id = 0; deliver_bitmask != 0; vcpu_id++) {
+ if (!(deliver_bitmask & (1 << vcpu_id)))
+ continue;
+ deliver_bitmask &= ~(1 << vcpu_id);
+ vcpu = ioapic->kvm->vcpus[vcpu_id];
+ if (vcpu) {
+ if (r < 0)
+ r = 0;
+ r += kvm_apic_set_irq(vcpu, vector, trig_mode);
+ }
+ }
+ break;
+ default:
+ break;
+ }
+ return r;
+}
+
+/* This should be called with the kvm->lock mutex held
+ * Return value:
+ * < 0 Interrupt was ignored (masked or not delivered for other reasons)
+ * = 0 Interrupt was coalesced (previous irq is still pending)
+ * > 0 Number of CPUs interrupt was delivered to
+ */
+int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
{
- unsigned long *irq_state = (unsigned long *)&kvm->arch.irq_states[irq];
+ struct kvm_kernel_irq_routing_entry *e;
+ unsigned long *irq_state, sig_level;
+ int ret = -1;
+
+ if (irq < KVM_IOAPIC_NUM_PINS) {
+ irq_state = (unsigned long *)&kvm->arch.irq_states[irq];
- /* Logical OR for level trig interrupt */
- if (level)
- set_bit(irq_source_id, irq_state);
- else
- clear_bit(irq_source_id, irq_state);
+ /* Logical OR for level trig interrupt */
+ if (level)
+ set_bit(irq_source_id, irq_state);
+ else
+ clear_bit(irq_source_id, irq_state);
+ sig_level = !!(*irq_state);
+ } else /* Deal with MSI/MSI-X */
+ sig_level = 1;
/* Not possible to detect if the guest uses the PIC or the
* IOAPIC. So set the bit in both. The guest will ignore
* writes to the unused one.
*/
- kvm_ioapic_set_irq(kvm->arch.vioapic, irq, !!(*irq_state));
-#ifdef CONFIG_X86
- kvm_pic_set_irq(pic_irqchip(kvm), irq, !!(*irq_state));
-#endif
+ list_for_each_entry(e, &kvm->irq_routing, link)
+ if (e->gsi == irq) {
+ int r = e->set(e, kvm, sig_level);
+ if (r < 0)
+ continue;
+
+ ret = r + ((ret < 0) ? 0 : ret);
+ }
+ return ret;
}
-void kvm_notify_acked_irq(struct kvm *kvm, unsigned gsi)
+void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
{
+ struct kvm_kernel_irq_routing_entry *e;
struct kvm_irq_ack_notifier *kian;
struct hlist_node *n;
+ unsigned gsi = pin;
+
+ list_for_each_entry(e, &kvm->irq_routing, link)
+ if (e->irqchip.irqchip == irqchip &&
+ e->irqchip.pin == pin) {
+ gsi = e->gsi;
+ break;
+ }
hlist_for_each_entry(kian, n, &kvm->arch.irq_ack_notifier_list, link)
if (kian->gsi == gsi)
@@ -99,3 +196,177 @@ void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id)
clear_bit(irq_source_id, &kvm->arch.irq_states[i]);
clear_bit(irq_source_id, &kvm->arch.irq_sources_bitmap);
}
+
+void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq,
+ struct kvm_irq_mask_notifier *kimn)
+{
+ kimn->irq = irq;
+ hlist_add_head(&kimn->link, &kvm->mask_notifier_list);
+}
+
+void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
+ struct kvm_irq_mask_notifier *kimn)
+{
+ hlist_del(&kimn->link);
+}
+
+void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask)
+{
+ struct kvm_irq_mask_notifier *kimn;
+ struct hlist_node *n;
+
+ hlist_for_each_entry(kimn, n, &kvm->mask_notifier_list, link)
+ if (kimn->irq == irq)
+ kimn->func(kimn, mask);
+}
+
+static void __kvm_free_irq_routing(struct list_head *irq_routing)
+{
+ struct kvm_kernel_irq_routing_entry *e, *n;
+
+ list_for_each_entry_safe(e, n, irq_routing, link)
+ kfree(e);
+}
+
+void kvm_free_irq_routing(struct kvm *kvm)
+{
+ __kvm_free_irq_routing(&kvm->irq_routing);
+}
+
+static int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e,
+ const struct kvm_irq_routing_entry *ue)
+{
+ int r = -EINVAL;
+ int delta;
+
+ e->gsi = ue->gsi;
+ switch (ue->type) {
+ case KVM_IRQ_ROUTING_IRQCHIP:
+ delta = 0;
+ switch (ue->u.irqchip.irqchip) {
+ case KVM_IRQCHIP_PIC_MASTER:
+ e->set = kvm_set_pic_irq;
+ break;
+ case KVM_IRQCHIP_PIC_SLAVE:
+ e->set = kvm_set_pic_irq;
+ delta = 8;
+ break;
+ case KVM_IRQCHIP_IOAPIC:
+ e->set = kvm_set_ioapic_irq;
+ break;
+ default:
+ goto out;
+ }
+ e->irqchip.irqchip = ue->u.irqchip.irqchip;
+ e->irqchip.pin = ue->u.irqchip.pin + delta;
+ break;
+ case KVM_IRQ_ROUTING_MSI:
+ e->set = kvm_set_msi;
+ e->msi.address_lo = ue->u.msi.address_lo;
+ e->msi.address_hi = ue->u.msi.address_hi;
+ e->msi.data = ue->u.msi.data;
+ break;
+ default:
+ goto out;
+ }
+ r = 0;
+out:
+ return r;
+}
+
+
+int kvm_set_irq_routing(struct kvm *kvm,
+ const struct kvm_irq_routing_entry *ue,
+ unsigned nr,
+ unsigned flags)
+{
+ struct list_head irq_list = LIST_HEAD_INIT(irq_list);
+ struct list_head tmp = LIST_HEAD_INIT(tmp);
+ struct kvm_kernel_irq_routing_entry *e = NULL;
+ unsigned i;
+ int r;
+
+ for (i = 0; i < nr; ++i) {
+ r = -EINVAL;
+ if (ue->gsi >= KVM_MAX_IRQ_ROUTES)
+ goto out;
+ if (ue->flags)
+ goto out;
+ r = -ENOMEM;
+ e = kzalloc(sizeof(*e), GFP_KERNEL);
+ if (!e)
+ goto out;
+ r = setup_routing_entry(e, ue);
+ if (r)
+ goto out;
+ ++ue;
+ list_add(&e->link, &irq_list);
+ e = NULL;
+ }
+
+ mutex_lock(&kvm->lock);
+ list_splice(&kvm->irq_routing, &tmp);
+ INIT_LIST_HEAD(&kvm->irq_routing);
+ list_splice(&irq_list, &kvm->irq_routing);
+ INIT_LIST_HEAD(&irq_list);
+ list_splice(&tmp, &irq_list);
+ mutex_unlock(&kvm->lock);
+
+ r = 0;
+
+out:
+ kfree(e);
+ __kvm_free_irq_routing(&irq_list);
+ return r;
+}
+
+#define IOAPIC_ROUTING_ENTRY(irq) \
+ { .gsi = irq, .type = KVM_IRQ_ROUTING_IRQCHIP, \
+ .u.irqchip.irqchip = KVM_IRQCHIP_IOAPIC, .u.irqchip.pin = (irq) }
+#define ROUTING_ENTRY1(irq) IOAPIC_ROUTING_ENTRY(irq)
+
+#ifdef CONFIG_X86
+# define PIC_ROUTING_ENTRY(irq) \
+ { .gsi = irq, .type = KVM_IRQ_ROUTING_IRQCHIP, \
+ .u.irqchip.irqchip = SELECT_PIC(irq), .u.irqchip.pin = (irq) % 8 }
+# define ROUTING_ENTRY2(irq) \
+ IOAPIC_ROUTING_ENTRY(irq), PIC_ROUTING_ENTRY(irq)
+#else
+# define ROUTING_ENTRY2(irq) \
+ IOAPIC_ROUTING_ENTRY(irq)
+#endif
+
+static const struct kvm_irq_routing_entry default_routing[] = {
+ ROUTING_ENTRY2(0), ROUTING_ENTRY2(1),
+ ROUTING_ENTRY2(2), ROUTING_ENTRY2(3),
+ ROUTING_ENTRY2(4), ROUTING_ENTRY2(5),
+ ROUTING_ENTRY2(6), ROUTING_ENTRY2(7),
+ ROUTING_ENTRY2(8), ROUTING_ENTRY2(9),
+ ROUTING_ENTRY2(10), ROUTING_ENTRY2(11),
+ ROUTING_ENTRY2(12), ROUTING_ENTRY2(13),
+ ROUTING_ENTRY2(14), ROUTING_ENTRY2(15),
+ ROUTING_ENTRY1(16), ROUTING_ENTRY1(17),
+ ROUTING_ENTRY1(18), ROUTING_ENTRY1(19),
+ ROUTING_ENTRY1(20), ROUTING_ENTRY1(21),
+ ROUTING_ENTRY1(22), ROUTING_ENTRY1(23),
+#ifdef CONFIG_IA64
+ ROUTING_ENTRY1(24), ROUTING_ENTRY1(25),
+ ROUTING_ENTRY1(26), ROUTING_ENTRY1(27),
+ ROUTING_ENTRY1(28), ROUTING_ENTRY1(29),
+ ROUTING_ENTRY1(30), ROUTING_ENTRY1(31),
+ ROUTING_ENTRY1(32), ROUTING_ENTRY1(33),
+ ROUTING_ENTRY1(34), ROUTING_ENTRY1(35),
+ ROUTING_ENTRY1(36), ROUTING_ENTRY1(37),
+ ROUTING_ENTRY1(38), ROUTING_ENTRY1(39),
+ ROUTING_ENTRY1(40), ROUTING_ENTRY1(41),
+ ROUTING_ENTRY1(42), ROUTING_ENTRY1(43),
+ ROUTING_ENTRY1(44), ROUTING_ENTRY1(45),
+ ROUTING_ENTRY1(46), ROUTING_ENTRY1(47),
+#endif
+};
+
+int kvm_setup_default_irq_routing(struct kvm *kvm)
+{
+ return kvm_set_irq_routing(kvm, default_routing,
+ ARRAY_SIZE(default_routing), 0);
+}
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 3a5a08298aa..605697e9c4d 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -47,10 +47,6 @@
#include <asm/uaccess.h>
#include <asm/pgtable.h>
-#ifdef CONFIG_X86
-#include <asm/msidef.h>
-#endif
-
#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
#include "coalesced_mmio.h"
#endif
@@ -85,57 +81,6 @@ static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl,
static bool kvm_rebooting;
#ifdef KVM_CAP_DEVICE_ASSIGNMENT
-
-#ifdef CONFIG_X86
-static void assigned_device_msi_dispatch(struct kvm_assigned_dev_kernel *dev)
-{
- int vcpu_id;
- struct kvm_vcpu *vcpu;
- struct kvm_ioapic *ioapic = ioapic_irqchip(dev->kvm);
- int dest_id = (dev->guest_msi.address_lo & MSI_ADDR_DEST_ID_MASK)
- >> MSI_ADDR_DEST_ID_SHIFT;
- int vector = (dev->guest_msi.data & MSI_DATA_VECTOR_MASK)
- >> MSI_DATA_VECTOR_SHIFT;
- int dest_mode = test_bit(MSI_ADDR_DEST_MODE_SHIFT,
- (unsigned long *)&dev->guest_msi.address_lo);
- int trig_mode = test_bit(MSI_DATA_TRIGGER_SHIFT,
- (unsigned long *)&dev->guest_msi.data);
- int delivery_mode = test_bit(MSI_DATA_DELIVERY_MODE_SHIFT,
- (unsigned long *)&dev->guest_msi.data);
- u32 deliver_bitmask;
-
- BUG_ON(!ioapic);
-
- deliver_bitmask = kvm_ioapic_get_delivery_bitmask(ioapic,
- dest_id, dest_mode);
- /* IOAPIC delivery mode value is the same as MSI here */
- switch (delivery_mode) {
- case IOAPIC_LOWEST_PRIORITY:
- vcpu = kvm_get_lowest_prio_vcpu(ioapic->kvm, vector,
- deliver_bitmask);
- if (vcpu != NULL)
- kvm_apic_set_irq(vcpu, vector, trig_mode);
- else
- printk(KERN_INFO "kvm: null lowest priority vcpu!\n");
- break;
- case IOAPIC_FIXED:
- for (vcpu_id = 0; deliver_bitmask != 0; vcpu_id++) {
- if (!(deliver_bitmask & (1 << vcpu_id)))
- continue;
- deliver_bitmask &= ~(1 << vcpu_id);
- vcpu = ioapic->kvm->vcpus[vcpu_id];
- if (vcpu)
- kvm_apic_set_irq(vcpu, vector, trig_mode);
- }
- break;
- default:
- printk(KERN_INFO "kvm: unsupported MSI delivery mode\n");
- }
-}
-#else
-static void assigned_device_msi_dispatch(struct kvm_assigned_dev_kernel *dev) {}
-#endif
-
static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head,
int assigned_dev_id)
{
@@ -162,18 +107,14 @@ static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work)
* finer-grained lock, update this
*/
mutex_lock(&assigned_dev->kvm->lock);
- if (assigned_dev->irq_requested_type & KVM_ASSIGNED_DEV_GUEST_INTX)
- kvm_set_irq(assigned_dev->kvm,
- assigned_dev->irq_source_id,
- assigned_dev->guest_irq, 1);
- else if (assigned_dev->irq_requested_type &
- KVM_ASSIGNED_DEV_GUEST_MSI) {
- assigned_device_msi_dispatch(assigned_dev);
+ kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id,
+ assigned_dev->guest_irq, 1);
+
+ if (assigned_dev->irq_requested_type & KVM_ASSIGNED_DEV_GUEST_MSI) {
enable_irq(assigned_dev->host_irq);
assigned_dev->host_irq_disabled = false;
}
mutex_unlock(&assigned_dev->kvm->lock);
- kvm_put_kvm(assigned_dev->kvm);
}
static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id)
@@ -181,8 +122,6 @@ static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id)
struct kvm_assigned_dev_kernel *assigned_dev =
(struct kvm_assigned_dev_kernel *) dev_id;
- kvm_get_kvm(assigned_dev->kvm);
-
schedule_work(&assigned_dev->interrupt_work);
disable_irq_nosync(irq);
@@ -213,6 +152,7 @@ static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian)
}
}
+/* The function implicit hold kvm->lock mutex due to cancel_work_sync() */
static void kvm_free_assigned_irq(struct kvm *kvm,
struct kvm_assigned_dev_kernel *assigned_dev)
{
@@ -228,11 +168,24 @@ static void kvm_free_assigned_irq(struct kvm *kvm,
if (!assigned_dev->irq_requested_type)
return;
- if (cancel_work_sync(&assigned_dev->interrupt_work))
- /* We had pending work. That means we will have to take
- * care of kvm_put_kvm.
- */
- kvm_put_kvm(kvm);
+ /*
+ * In kvm_free_device_irq, cancel_work_sync return true if:
+ * 1. work is scheduled, and then cancelled.
+ * 2. work callback is executed.
+ *
+ * The first one ensured that the irq is disabled and no more events
+ * would happen. But for the second one, the irq may be enabled (e.g.
+ * for MSI). So we disable irq here to prevent further events.
+ *
+ * Notice this maybe result in nested disable if the interrupt type is
+ * INTx, but it's OK for we are going to free it.
+ *
+ * If this function is a part of VM destroy, please ensure that till
+ * now, the kvm state is still legal for probably we also have to wait
+ * interrupt_work done.
+ */
+ disable_irq_nosync(assigned_dev->host_irq);
+ cancel_work_sync(&assigned_dev->interrupt_work);
free_irq(assigned_dev->host_irq, (void *)assigned_dev);
@@ -285,8 +238,8 @@ static int assigned_device_update_intx(struct kvm *kvm,
if (irqchip_in_kernel(kvm)) {
if (!msi2intx &&
- adev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI) {
- free_irq(adev->host_irq, (void *)kvm);
+ (adev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI)) {
+ free_irq(adev->host_irq, (void *)adev);
pci_disable_msi(adev->dev);
}
@@ -320,18 +273,24 @@ static int assigned_device_update_msi(struct kvm *kvm,
{
int r;
+ adev->guest_irq = airq->guest_irq;
if (airq->flags & KVM_DEV_IRQ_ASSIGN_ENABLE_MSI) {
/* x86 don't care upper address of guest msi message addr */
adev->irq_requested_type |= KVM_ASSIGNED_DEV_GUEST_MSI;
adev->irq_requested_type &= ~KVM_ASSIGNED_DEV_GUEST_INTX;
- adev->guest_msi.address_lo = airq->guest_msi.addr_lo;
- adev->guest_msi.data = airq->guest_msi.data;
adev->ack_notifier.gsi = -1;
} else if (msi2intx) {
adev->irq_requested_type |= KVM_ASSIGNED_DEV_GUEST_INTX;
adev->irq_requested_type &= ~KVM_ASSIGNED_DEV_GUEST_MSI;
- adev->guest_irq = airq->guest_irq;
adev->ack_notifier.gsi = airq->guest_irq;
+ } else {
+ /*
+ * Guest require to disable device MSI, we disable MSI and
+ * re-enable INTx by default again. Notice it's only for
+ * non-msi2intx.
+ */
+ assigned_device_update_intx(kvm, adev, airq);
+ return 0;
}
if (adev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI)
@@ -368,6 +327,7 @@ static int kvm_vm_ioctl_assign_irq(struct kvm *kvm,
{
int r = 0;
struct kvm_assigned_dev_kernel *match;
+ u32 current_flags = 0, changed_flags;
mutex_lock(&kvm->lock);
@@ -405,8 +365,13 @@ static int kvm_vm_ioctl_assign_irq(struct kvm *kvm,
}
}
- if ((!msi2intx &&
- (assigned_irq->flags & KVM_DEV_IRQ_ASSIGN_ENABLE_MSI)) ||
+ if ((match->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI) &&
+ (match->irq_requested_type & KVM_ASSIGNED_DEV_GUEST_MSI))
+ current_flags |= KVM_DEV_IRQ_ASSIGN_ENABLE_MSI;
+
+ changed_flags = assigned_irq->flags ^ current_flags;
+
+ if ((changed_flags & KVM_DEV_IRQ_ASSIGN_MSI_ACTION) ||
(msi2intx && match->dev->msi_enabled)) {
#ifdef CONFIG_X86
r = assigned_device_update_msi(kvm, match, assigned_irq);
@@ -455,6 +420,7 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
struct kvm_assigned_dev_kernel *match;
struct pci_dev *dev;
+ down_read(&kvm->slots_lock);
mutex_lock(&kvm->lock);
match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
@@ -516,6 +482,7 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
out:
mutex_unlock(&kvm->lock);
+ up_read(&kvm->slots_lock);
return r;
out_list_del:
list_del(&match->list);
@@ -527,6 +494,7 @@ out_put:
out_free:
kfree(match);
mutex_unlock(&kvm->lock);
+ up_read(&kvm->slots_lock);
return r;
}
#endif
@@ -549,7 +517,7 @@ static int kvm_vm_ioctl_deassign_device(struct kvm *kvm,
goto out;
}
- if (assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU)
+ if (match->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU)
kvm_deassign_device(kvm, match);
kvm_free_assigned_device(kvm, match);
@@ -567,8 +535,10 @@ static inline int valid_vcpu(int n)
inline int kvm_is_mmio_pfn(pfn_t pfn)
{
- if (pfn_valid(pfn))
- return PageReserved(pfn_to_page(pfn));
+ if (pfn_valid(pfn)) {
+ struct page *page = compound_head(pfn_to_page(pfn));
+ return PageReserved(page);
+ }
return true;
}
@@ -789,11 +759,19 @@ static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn,
return young;
}
+static void kvm_mmu_notifier_release(struct mmu_notifier *mn,
+ struct mm_struct *mm)
+{
+ struct kvm *kvm = mmu_notifier_to_kvm(mn);
+ kvm_arch_flush_shadow(kvm);
+}
+
static const struct mmu_notifier_ops kvm_mmu_notifier_ops = {
.invalidate_page = kvm_mmu_notifier_invalidate_page,
.invalidate_range_start = kvm_mmu_notifier_invalidate_range_start,
.invalidate_range_end = kvm_mmu_notifier_invalidate_range_end,
.clear_flush_young = kvm_mmu_notifier_clear_flush_young,
+ .release = kvm_mmu_notifier_release,
};
#endif /* CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER */
@@ -806,6 +784,10 @@ static struct kvm *kvm_create_vm(void)
if (IS_ERR(kvm))
goto out;
+#ifdef CONFIG_HAVE_KVM_IRQCHIP
+ INIT_LIST_HEAD(&kvm->irq_routing);
+ INIT_HLIST_HEAD(&kvm->mask_notifier_list);
+#endif
#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
page = alloc_page(GFP_KERNEL | __GFP_ZERO);
@@ -883,9 +865,11 @@ static void kvm_destroy_vm(struct kvm *kvm)
{
struct mm_struct *mm = kvm->mm;
+ kvm_arch_sync_events(kvm);
spin_lock(&kvm_lock);
list_del(&kvm->vm_list);
spin_unlock(&kvm_lock);
+ kvm_free_irq_routing(kvm);
kvm_io_bus_destroy(&kvm->pio_bus);
kvm_io_bus_destroy(&kvm->mmio_bus);
#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
@@ -1732,13 +1716,13 @@ out_free2:
r = 0;
break;
}
- case KVM_DEBUG_GUEST: {
- struct kvm_debug_guest dbg;
+ case KVM_SET_GUEST_DEBUG: {
+ struct kvm_guest_debug dbg;
r = -EFAULT;
if (copy_from_user(&dbg, argp, sizeof dbg))
goto out;
- r = kvm_arch_vcpu_ioctl_debug_guest(vcpu, &dbg);
+ r = kvm_arch_vcpu_ioctl_set_guest_debug(vcpu, &dbg);
if (r)
goto out;
r = 0;
@@ -1906,6 +1890,36 @@ static long kvm_vm_ioctl(struct file *filp,
break;
}
#endif
+#ifdef KVM_CAP_IRQ_ROUTING
+ case KVM_SET_GSI_ROUTING: {
+ struct kvm_irq_routing routing;
+ struct kvm_irq_routing __user *urouting;
+ struct kvm_irq_routing_entry *entries;
+
+ r = -EFAULT;
+ if (copy_from_user(&routing, argp, sizeof(routing)))
+ goto out;
+ r = -EINVAL;
+ if (routing.nr >= KVM_MAX_IRQ_ROUTES)
+ goto out;
+ if (routing.flags)
+ goto out;
+ r = -ENOMEM;
+ entries = vmalloc(routing.nr * sizeof(*entries));
+ if (!entries)
+ goto out;
+ r = -EFAULT;
+ urouting = argp;
+ if (copy_from_user(entries, urouting->entries,
+ routing.nr * sizeof(*entries)))
+ goto out_free_irq_routing;
+ r = kvm_set_irq_routing(kvm, entries, routing.nr,
+ routing.flags);
+ out_free_irq_routing:
+ vfree(entries);
+ break;
+ }
+#endif
default:
r = kvm_arch_vm_ioctl(filp, ioctl, arg);
}
@@ -1972,6 +1986,10 @@ static long kvm_dev_ioctl_check_extension_generic(long arg)
case KVM_CAP_USER_MEMORY:
case KVM_CAP_DESTROY_MEMORY_REGION_WORKS:
return 1;
+#ifdef CONFIG_HAVE_KVM_IRQCHIP
+ case KVM_CAP_IRQ_ROUTING:
+ return KVM_MAX_IRQ_ROUTES;
+#endif
default:
break;
}