diff options
Diffstat (limited to 'drivers/iommu')
-rw-r--r-- | drivers/iommu/Kconfig | 8 | ||||
-rw-r--r-- | drivers/iommu/amd_iommu.c | 8 | ||||
-rw-r--r-- | drivers/iommu/dmar.c | 2 | ||||
-rw-r--r-- | drivers/iommu/intel-iommu.c | 2 | ||||
-rw-r--r-- | drivers/iommu/intel_irq_remapping.c | 48 | ||||
-rw-r--r-- | drivers/iommu/irq_remapping.c | 231 | ||||
-rw-r--r-- | drivers/iommu/irq_remapping.h | 1 | ||||
-rw-r--r-- | drivers/iommu/tegra-smmu.c | 7 |
8 files changed, 278 insertions, 29 deletions
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index e39f9dbf297..01068987809 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -65,8 +65,8 @@ config AMD_IOMMU_STATS If unsure, say N. config AMD_IOMMU_V2 - tristate "AMD IOMMU Version 2 driver (EXPERIMENTAL)" - depends on AMD_IOMMU && PROFILING && EXPERIMENTAL + tristate "AMD IOMMU Version 2 driver" + depends on AMD_IOMMU && PROFILING select MMU_NOTIFIER ---help--- This option enables support for the AMD IOMMUv2 features of the IOMMU @@ -119,8 +119,8 @@ config INTEL_IOMMU_FLOPPY_WA 16MiB to make floppy (an ISA device) work. config IRQ_REMAP - bool "Support for Interrupt Remapping (EXPERIMENTAL)" - depends on X86_64 && X86_IO_APIC && PCI_MSI && ACPI && EXPERIMENTAL + bool "Support for Interrupt Remapping" + depends on X86_64 && X86_IO_APIC && PCI_MSI && ACPI select DMAR_TABLE ---help--- Supports Interrupt remapping for IO-APIC and MSI devices. diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index c1c74e030a5..d33eaaf783a 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -4017,10 +4017,10 @@ static int alloc_irq_index(struct irq_cfg *cfg, u16 devid, int count) index -= count - 1; + cfg->remapped = 1; irte_info = &cfg->irq_2_iommu; irte_info->sub_handle = devid; irte_info->irte_index = index; - irte_info->iommu = (void *)cfg; goto out; } @@ -4127,9 +4127,9 @@ static int setup_ioapic_entry(int irq, struct IO_APIC_route_entry *entry, index = attr->ioapic_pin; /* Setup IRQ remapping info */ + cfg->remapped = 1; irte_info->sub_handle = devid; irte_info->irte_index = index; - irte_info->iommu = (void *)cfg; /* Setup IRTE for IOMMU */ irte.val = 0; @@ -4288,9 +4288,9 @@ static int msi_setup_irq(struct pci_dev *pdev, unsigned int irq, devid = get_device_id(&pdev->dev); irte_info = &cfg->irq_2_iommu; + cfg->remapped = 1; irte_info->sub_handle = devid; irte_info->irte_index = index + offset; - irte_info->iommu = (void *)cfg; return 0; } @@ -4314,9 +4314,9 @@ static int setup_hpet_msi(unsigned int irq, unsigned int id) if (index < 0) return index; + cfg->remapped = 1; irte_info->sub_handle = devid; irte_info->irte_index = index; - irte_info->iommu = (void *)cfg; return 0; } diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c index 86e2f4a62b9..174bb654453 100644 --- a/drivers/iommu/dmar.c +++ b/drivers/iommu/dmar.c @@ -41,6 +41,8 @@ #include <asm/irq_remapping.h> #include <asm/iommu_table.h> +#include "irq_remapping.h" + /* No locks are needed as DMA remapping hardware unit * list is constructed at boot time and hotplug of * these units are not supported by the architecture. diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index f1e7b86a7c3..0099667a397 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -46,6 +46,8 @@ #include <asm/cacheflush.h> #include <asm/iommu.h> +#include "irq_remapping.h" + #define ROOT_SIZE VTD_PAGE_SIZE #define CONTEXT_SIZE VTD_PAGE_SIZE diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c index af8904de1d4..f3b8f23b5d8 100644 --- a/drivers/iommu/intel_irq_remapping.c +++ b/drivers/iommu/intel_irq_remapping.c @@ -68,6 +68,7 @@ static int alloc_irte(struct intel_iommu *iommu, int irq, u16 count) { struct ir_table *table = iommu->ir_table; struct irq_2_iommu *irq_iommu = irq_2_iommu(irq); + struct irq_cfg *cfg = irq_get_chip_data(irq); u16 index, start_index; unsigned int mask = 0; unsigned long flags; @@ -115,6 +116,7 @@ static int alloc_irte(struct intel_iommu *iommu, int irq, u16 count) for (i = index; i < index + count; i++) table->base[i].present = 1; + cfg->remapped = 1; irq_iommu->iommu = iommu; irq_iommu->irte_index = index; irq_iommu->sub_handle = 0; @@ -155,6 +157,7 @@ static int map_irq_to_irte_handle(int irq, u16 *sub_handle) static int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index, u16 subhandle) { struct irq_2_iommu *irq_iommu = irq_2_iommu(irq); + struct irq_cfg *cfg = irq_get_chip_data(irq); unsigned long flags; if (!irq_iommu) @@ -162,6 +165,7 @@ static int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index, u16 subha raw_spin_lock_irqsave(&irq_2_ir_lock, flags); + cfg->remapped = 1; irq_iommu->iommu = iommu; irq_iommu->irte_index = index; irq_iommu->sub_handle = subhandle; @@ -425,11 +429,22 @@ static void iommu_set_irq_remapping(struct intel_iommu *iommu, int mode) /* Enable interrupt-remapping */ iommu->gcmd |= DMA_GCMD_IRE; + iommu->gcmd &= ~DMA_GCMD_CFI; /* Block compatibility-format MSIs */ writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG); IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, (sts & DMA_GSTS_IRES), sts); + /* + * With CFI clear in the Global Command register, we should be + * protected from dangerous (i.e. compatibility) interrupts + * regardless of x2apic status. Check just to be sure. + */ + if (sts & DMA_GSTS_CFIS) + WARN(1, KERN_WARNING + "Compatibility-format IRQs enabled despite intr remapping;\n" + "you are vulnerable to IRQ injection.\n"); + raw_spin_unlock_irqrestore(&iommu->register_lock, flags); } @@ -526,20 +541,24 @@ static int __init intel_irq_remapping_supported(void) static int __init intel_enable_irq_remapping(void) { struct dmar_drhd_unit *drhd; + bool x2apic_present; int setup = 0; int eim = 0; + x2apic_present = x2apic_supported(); + if (parse_ioapics_under_ir() != 1) { printk(KERN_INFO "Not enable interrupt remapping\n"); - return -1; + goto error; } - if (x2apic_supported()) { + if (x2apic_present) { eim = !dmar_x2apic_optout(); - WARN(!eim, KERN_WARNING - "Your BIOS is broken and requested that x2apic be disabled\n" - "This will leave your machine vulnerable to irq-injection attacks\n" - "Use 'intremap=no_x2apic_optout' to override BIOS request\n"); + if (!eim) + printk(KERN_WARNING + "Your BIOS is broken and requested that x2apic be disabled.\n" + "This will slightly decrease performance.\n" + "Use 'intremap=no_x2apic_optout' to override BIOS request.\n"); } for_each_drhd_unit(drhd) { @@ -578,7 +597,7 @@ static int __init intel_enable_irq_remapping(void) if (eim && !ecap_eim_support(iommu->ecap)) { printk(KERN_INFO "DRHD %Lx: EIM not supported by DRHD, " " ecap %Lx\n", drhd->reg_base_addr, iommu->ecap); - return -1; + goto error; } } @@ -594,7 +613,7 @@ static int __init intel_enable_irq_remapping(void) printk(KERN_ERR "DRHD %Lx: failed to enable queued, " " invalidation, ecap %Lx, ret %d\n", drhd->reg_base_addr, iommu->ecap, ret); - return -1; + goto error; } } @@ -617,6 +636,14 @@ static int __init intel_enable_irq_remapping(void) goto error; irq_remapping_enabled = 1; + + /* + * VT-d has a different layout for IO-APIC entries when + * interrupt remapping is enabled. So it needs a special routine + * to print IO-APIC entries for debugging purposes too. + */ + x86_io_apic_ops.print_entries = intel_ir_io_apic_print_entries; + pr_info("Enabled IRQ remapping in %s mode\n", eim ? "x2apic" : "xapic"); return eim ? IRQ_REMAP_X2APIC_MODE : IRQ_REMAP_XAPIC_MODE; @@ -625,6 +652,11 @@ error: /* * handle error condition gracefully here! */ + + if (x2apic_present) + WARN(1, KERN_WARNING + "Failed to enable irq remapping. You are vulnerable to irq-injection attacks.\n"); + return -1; } diff --git a/drivers/iommu/irq_remapping.c b/drivers/iommu/irq_remapping.c index faf85d6e33f..d56f8c17c5f 100644 --- a/drivers/iommu/irq_remapping.c +++ b/drivers/iommu/irq_remapping.c @@ -1,11 +1,18 @@ +#include <linux/seq_file.h> +#include <linux/cpumask.h> #include <linux/kernel.h> #include <linux/string.h> #include <linux/cpumask.h> #include <linux/errno.h> #include <linux/msi.h> +#include <linux/irq.h> +#include <linux/pci.h> #include <asm/hw_irq.h> #include <asm/irq_remapping.h> +#include <asm/processor.h> +#include <asm/x86_init.h> +#include <asm/apic.h> #include "irq_remapping.h" @@ -17,6 +24,152 @@ int no_x2apic_optout; static struct irq_remap_ops *remap_ops; +static int msi_alloc_remapped_irq(struct pci_dev *pdev, int irq, int nvec); +static int msi_setup_remapped_irq(struct pci_dev *pdev, unsigned int irq, + int index, int sub_handle); +static int set_remapped_irq_affinity(struct irq_data *data, + const struct cpumask *mask, + bool force); + +static bool irq_remapped(struct irq_cfg *cfg) +{ + return (cfg->remapped == 1); +} + +static void irq_remapping_disable_io_apic(void) +{ + /* + * With interrupt-remapping, for now we will use virtual wire A + * mode, as virtual wire B is little complex (need to configure + * both IOAPIC RTE as well as interrupt-remapping table entry). + * As this gets called during crash dump, keep this simple for + * now. + */ + if (cpu_has_apic || apic_from_smp_config()) + disconnect_bsp_APIC(0); +} + +static int do_setup_msi_irqs(struct pci_dev *dev, int nvec) +{ + int node, ret, sub_handle, index = 0; + unsigned int irq; + struct msi_desc *msidesc; + + nvec = __roundup_pow_of_two(nvec); + + WARN_ON(!list_is_singular(&dev->msi_list)); + msidesc = list_entry(dev->msi_list.next, struct msi_desc, list); + WARN_ON(msidesc->irq); + WARN_ON(msidesc->msi_attrib.multiple); + + node = dev_to_node(&dev->dev); + irq = __create_irqs(get_nr_irqs_gsi(), nvec, node); + if (irq == 0) + return -ENOSPC; + + msidesc->msi_attrib.multiple = ilog2(nvec); + for (sub_handle = 0; sub_handle < nvec; sub_handle++) { + if (!sub_handle) { + index = msi_alloc_remapped_irq(dev, irq, nvec); + if (index < 0) { + ret = index; + goto error; + } + } else { + ret = msi_setup_remapped_irq(dev, irq + sub_handle, + index, sub_handle); + if (ret < 0) + goto error; + } + ret = setup_msi_irq(dev, msidesc, irq, sub_handle); + if (ret < 0) + goto error; + } + return 0; + +error: + destroy_irqs(irq, nvec); + + /* + * Restore altered MSI descriptor fields and prevent just destroyed + * IRQs from tearing down again in default_teardown_msi_irqs() + */ + msidesc->irq = 0; + msidesc->msi_attrib.multiple = 0; + + return ret; +} + +static int do_setup_msix_irqs(struct pci_dev *dev, int nvec) +{ + int node, ret, sub_handle, index = 0; + struct msi_desc *msidesc; + unsigned int irq; + + node = dev_to_node(&dev->dev); + irq = get_nr_irqs_gsi(); + sub_handle = 0; + + list_for_each_entry(msidesc, &dev->msi_list, list) { + + irq = create_irq_nr(irq, node); + if (irq == 0) + return -1; + + if (sub_handle == 0) + ret = index = msi_alloc_remapped_irq(dev, irq, nvec); + else + ret = msi_setup_remapped_irq(dev, irq, index, sub_handle); + + if (ret < 0) + goto error; + + ret = setup_msi_irq(dev, msidesc, irq, 0); + if (ret < 0) + goto error; + + sub_handle += 1; + irq += 1; + } + + return 0; + +error: + destroy_irq(irq); + return ret; +} + +static int irq_remapping_setup_msi_irqs(struct pci_dev *dev, + int nvec, int type) +{ + if (type == PCI_CAP_ID_MSI) + return do_setup_msi_irqs(dev, nvec); + else + return do_setup_msix_irqs(dev, nvec); +} + +void eoi_ioapic_pin_remapped(int apic, int pin, int vector) +{ + /* + * Intr-remapping uses pin number as the virtual vector + * in the RTE. Actual vector is programmed in + * intr-remapping table entry. Hence for the io-apic + * EOI we use the pin number. + */ + io_apic_eoi(apic, pin); +} + +static void __init irq_remapping_modify_x86_ops(void) +{ + x86_io_apic_ops.disable = irq_remapping_disable_io_apic; + x86_io_apic_ops.set_affinity = set_remapped_irq_affinity; + x86_io_apic_ops.setup_entry = setup_ioapic_remapped_entry; + x86_io_apic_ops.eoi_ioapic_pin = eoi_ioapic_pin_remapped; + x86_msi.setup_msi_irqs = irq_remapping_setup_msi_irqs; + x86_msi.setup_hpet_msi = setup_hpet_msi_remapped; + x86_msi.compose_msi_msg = compose_remapped_msi_msg; +} + static __init int setup_nointremap(char *str) { disable_irq_remap = 1; @@ -79,15 +232,24 @@ int __init irq_remapping_prepare(void) int __init irq_remapping_enable(void) { + int ret; + if (!remap_ops || !remap_ops->enable) return -ENODEV; - return remap_ops->enable(); + ret = remap_ops->enable(); + + if (irq_remapping_enabled) + irq_remapping_modify_x86_ops(); + + return ret; } void irq_remapping_disable(void) { - if (!remap_ops || !remap_ops->disable) + if (!irq_remapping_enabled || + !remap_ops || + !remap_ops->disable) return; remap_ops->disable(); @@ -95,7 +257,9 @@ void irq_remapping_disable(void) int irq_remapping_reenable(int mode) { - if (!remap_ops || !remap_ops->reenable) + if (!irq_remapping_enabled || + !remap_ops || + !remap_ops->reenable) return 0; return remap_ops->reenable(mode); @@ -103,6 +267,9 @@ int irq_remapping_reenable(int mode) int __init irq_remap_enable_fault_handling(void) { + if (!irq_remapping_enabled) + return 0; + if (!remap_ops || !remap_ops->enable_faulting) return -ENODEV; @@ -133,23 +300,28 @@ int set_remapped_irq_affinity(struct irq_data *data, const struct cpumask *mask, void free_remapped_irq(int irq) { + struct irq_cfg *cfg = irq_get_chip_data(irq); + if (!remap_ops || !remap_ops->free_irq) return; - remap_ops->free_irq(irq); + if (irq_remapped(cfg)) + remap_ops->free_irq(irq); } void compose_remapped_msi_msg(struct pci_dev *pdev, unsigned int irq, unsigned int dest, struct msi_msg *msg, u8 hpet_id) { - if (!remap_ops || !remap_ops->compose_msi_msg) - return; + struct irq_cfg *cfg = irq_get_chip_data(irq); - remap_ops->compose_msi_msg(pdev, irq, dest, msg, hpet_id); + if (!irq_remapped(cfg)) + native_compose_msi_msg(pdev, irq, dest, msg, hpet_id); + else if (remap_ops && remap_ops->compose_msi_msg) + remap_ops->compose_msi_msg(pdev, irq, dest, msg, hpet_id); } -int msi_alloc_remapped_irq(struct pci_dev *pdev, int irq, int nvec) +static int msi_alloc_remapped_irq(struct pci_dev *pdev, int irq, int nvec) { if (!remap_ops || !remap_ops->msi_alloc_irq) return -ENODEV; @@ -157,8 +329,8 @@ int msi_alloc_remapped_irq(struct pci_dev *pdev, int irq, int nvec) return remap_ops->msi_alloc_irq(pdev, irq, nvec); } -int msi_setup_remapped_irq(struct pci_dev *pdev, unsigned int irq, - int index, int sub_handle) +static int msi_setup_remapped_irq(struct pci_dev *pdev, unsigned int irq, + int index, int sub_handle) { if (!remap_ops || !remap_ops->msi_setup_irq) return -ENODEV; @@ -173,3 +345,42 @@ int setup_hpet_msi_remapped(unsigned int irq, unsigned int id) return remap_ops->setup_hpet_msi(irq, id); } + +void panic_if_irq_remap(const char *msg) +{ + if (irq_remapping_enabled) + panic(msg); +} + +static void ir_ack_apic_edge(struct irq_data *data) +{ + ack_APIC_irq(); +} + +static void ir_ack_apic_level(struct irq_data *data) +{ + ack_APIC_irq(); + eoi_ioapic_irq(data->irq, data->chip_data); +} + +static void ir_print_prefix(struct irq_data *data, struct seq_file *p) +{ + seq_printf(p, " IR-%s", data->chip->name); +} + +void irq_remap_modify_chip_defaults(struct irq_chip *chip) +{ + chip->irq_print_chip = ir_print_prefix; + chip->irq_ack = ir_ack_apic_edge; + chip->irq_eoi = ir_ack_apic_level; + chip->irq_set_affinity = x86_io_apic_ops.set_affinity; +} + +bool setup_remapped_irq(int irq, struct irq_cfg *cfg, struct irq_chip *chip) +{ + if (!irq_remapped(cfg)) + return false; + irq_set_status_flags(irq, IRQ_MOVE_PCNTXT); + irq_remap_modify_chip_defaults(chip); + return true; +} diff --git a/drivers/iommu/irq_remapping.h b/drivers/iommu/irq_remapping.h index 95363acb583..ecb63767040 100644 --- a/drivers/iommu/irq_remapping.h +++ b/drivers/iommu/irq_remapping.h @@ -34,6 +34,7 @@ struct msi_msg; extern int disable_irq_remap; extern int disable_sourceid_checking; extern int no_x2apic_optout; +extern int irq_remapping_enabled; struct irq_remap_ops { /* Check whether Interrupt Remapping is supported */ diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c index fc178893789..f08dbcd2f17 100644 --- a/drivers/iommu/tegra-smmu.c +++ b/drivers/iommu/tegra-smmu.c @@ -19,6 +19,7 @@ #define pr_fmt(fmt) "%s(): " fmt, __func__ +#include <linux/err.h> #include <linux/module.h> #include <linux/platform_device.h> #include <linux/spinlock.h> @@ -1176,9 +1177,9 @@ static int tegra_smmu_probe(struct platform_device *pdev) res = platform_get_resource(pdev, IORESOURCE_MEM, i); if (!res) return -ENODEV; - smmu->regs[i] = devm_request_and_ioremap(&pdev->dev, res); - if (!smmu->regs[i]) - return -EBUSY; + smmu->regs[i] = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(smmu->regs[i])) + return PTR_ERR(smmu->regs[i]); } err = of_get_dma_window(dev->of_node, NULL, 0, NULL, &base, &size); |