diff options
Diffstat (limited to 'drivers/iommu')
-rw-r--r-- | drivers/iommu/Kconfig | 2 | ||||
-rw-r--r-- | drivers/iommu/amd_iommu.c | 545 | ||||
-rw-r--r-- | drivers/iommu/amd_iommu_init.c | 261 | ||||
-rw-r--r-- | drivers/iommu/amd_iommu_proto.h | 8 | ||||
-rw-r--r-- | drivers/iommu/amd_iommu_types.h | 59 | ||||
-rw-r--r-- | drivers/iommu/exynos-iommu.c | 9 | ||||
-rw-r--r-- | drivers/iommu/intel-iommu.c | 30 | ||||
-rw-r--r-- | drivers/iommu/intel_irq_remapping.c | 18 | ||||
-rw-r--r-- | drivers/iommu/irq_remapping.c | 5 | ||||
-rw-r--r-- | drivers/iommu/irq_remapping.h | 6 | ||||
-rw-r--r-- | drivers/iommu/tegra-smmu.c | 17 |
11 files changed, 902 insertions, 58 deletions
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index 9f69b561f5d..e39f9dbf297 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -42,7 +42,7 @@ config AMD_IOMMU select PCI_PRI select PCI_PASID select IOMMU_API - depends on X86_64 && PCI && ACPI + depends on X86_64 && PCI && ACPI && X86_IO_APIC ---help--- With this option you can enable support for AMD IOMMU hardware in your system. An IOMMU is a hardware component which provides diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 6d1cbdfc9b2..55074cba20e 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -31,6 +31,12 @@ #include <linux/amd-iommu.h> #include <linux/notifier.h> #include <linux/export.h> +#include <linux/irq.h> +#include <linux/msi.h> +#include <asm/irq_remapping.h> +#include <asm/io_apic.h> +#include <asm/apic.h> +#include <asm/hw_irq.h> #include <asm/msidef.h> #include <asm/proto.h> #include <asm/iommu.h> @@ -39,6 +45,7 @@ #include "amd_iommu_proto.h" #include "amd_iommu_types.h" +#include "irq_remapping.h" #define CMD_SET_TYPE(cmd, t) ((cmd)->data[1] |= ((t) << 28)) @@ -72,6 +79,9 @@ static DEFINE_SPINLOCK(iommu_pd_list_lock); static LIST_HEAD(dev_data_list); static DEFINE_SPINLOCK(dev_data_list_lock); +LIST_HEAD(ioapic_map); +LIST_HEAD(hpet_map); + /* * Domain for untranslated devices - only allocated * if iommu=pt passed on kernel cmd line. @@ -92,6 +102,8 @@ struct iommu_cmd { u32 data[4]; }; +struct kmem_cache *amd_iommu_irq_cache; + static void update_domain(struct protection_domain *domain); static int __init alloc_passthrough_domain(void); @@ -266,7 +278,7 @@ static void swap_pci_ref(struct pci_dev **from, struct pci_dev *to) static int iommu_init_device(struct device *dev) { - struct pci_dev *dma_pdev, *pdev = to_pci_dev(dev); + struct pci_dev *dma_pdev = NULL, *pdev = to_pci_dev(dev); struct iommu_dev_data *dev_data; struct iommu_group *group; u16 alias; @@ -293,11 +305,18 @@ static int iommu_init_device(struct device *dev) dev_data->alias_data = alias_data; dma_pdev = pci_get_bus_and_slot(alias >> 8, alias & 0xff); - } else + } + + if (dma_pdev == NULL) dma_pdev = pci_dev_get(pdev); + /* Account for quirked devices */ swap_pci_ref(&dma_pdev, pci_get_dma_source(dma_pdev)); + /* + * If it's a multifunction device that does not support our + * required ACS flags, add to the same group as function 0. + */ if (dma_pdev->multifunction && !pci_acs_enabled(dma_pdev, REQ_ACS_FLAGS)) swap_pci_ref(&dma_pdev, @@ -305,14 +324,28 @@ static int iommu_init_device(struct device *dev) PCI_DEVFN(PCI_SLOT(dma_pdev->devfn), 0))); + /* + * Devices on the root bus go through the iommu. If that's not us, + * find the next upstream device and test ACS up to the root bus. + * Finding the next device may require skipping virtual buses. + */ while (!pci_is_root_bus(dma_pdev->bus)) { - if (pci_acs_path_enabled(dma_pdev->bus->self, - NULL, REQ_ACS_FLAGS)) + struct pci_bus *bus = dma_pdev->bus; + + while (!bus->self) { + if (!pci_is_root_bus(bus)) + bus = bus->parent; + else + goto root_bus; + } + + if (pci_acs_path_enabled(bus->self, NULL, REQ_ACS_FLAGS)) break; - swap_pci_ref(&dma_pdev, pci_dev_get(dma_pdev->bus->self)); + swap_pci_ref(&dma_pdev, pci_dev_get(bus->self)); } +root_bus: group = iommu_group_get(&dma_pdev->dev); pci_dev_put(dma_pdev); if (!group) { @@ -665,7 +698,7 @@ static void iommu_poll_ppr_log(struct amd_iommu *iommu) /* * Release iommu->lock because ppr-handling might need to - * re-aquire it + * re-acquire it */ spin_unlock_irqrestore(&iommu->lock, flags); @@ -783,7 +816,7 @@ static void build_inv_iommu_pages(struct iommu_cmd *cmd, u64 address, CMD_SET_TYPE(cmd, CMD_INV_IOMMU_PAGES); if (s) /* size bit - we flush more than one 4kb page */ cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK; - if (pde) /* PDE bit - we wan't flush everything not only the PTEs */ + if (pde) /* PDE bit - we want to flush everything, not only the PTEs */ cmd->data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK; } @@ -878,6 +911,13 @@ static void build_inv_all(struct iommu_cmd *cmd) CMD_SET_TYPE(cmd, CMD_INV_ALL); } +static void build_inv_irt(struct iommu_cmd *cmd, u16 devid) +{ + memset(cmd, 0, sizeof(*cmd)); + cmd->data[0] = devid; + CMD_SET_TYPE(cmd, CMD_INV_IRT); +} + /* * Writes the command to the IOMMUs command buffer and informs the * hardware about the new command. @@ -999,12 +1039,32 @@ static void iommu_flush_all(struct amd_iommu *iommu) iommu_completion_wait(iommu); } +static void iommu_flush_irt(struct amd_iommu *iommu, u16 devid) +{ + struct iommu_cmd cmd; + + build_inv_irt(&cmd, devid); + + iommu_queue_command(iommu, &cmd); +} + +static void iommu_flush_irt_all(struct amd_iommu *iommu) +{ + u32 devid; + + for (devid = 0; devid <= MAX_DEV_TABLE_ENTRIES; devid++) + iommu_flush_irt(iommu, devid); + + iommu_completion_wait(iommu); +} + void iommu_flush_all_caches(struct amd_iommu *iommu) { if (iommu_feature(iommu, FEATURE_IA)) { iommu_flush_all(iommu); } else { iommu_flush_dte_all(iommu); + iommu_flush_irt_all(iommu); iommu_flush_tlb_all(iommu); } } @@ -2134,7 +2194,7 @@ static bool pci_pri_tlp_required(struct pci_dev *pdev) } /* - * If a device is not yet associated with a domain, this function does + * If a device is not yet associated with a domain, this function * assigns it visible for the hardware */ static int attach_device(struct device *dev, @@ -2384,7 +2444,7 @@ static struct protection_domain *get_domain(struct device *dev) if (domain != NULL) return domain; - /* Device not bount yet - bind it */ + /* Device not bound yet - bind it */ dma_dom = find_protection_domain(devid); if (!dma_dom) dma_dom = amd_iommu_rlookup_table[devid]->default_dom; @@ -2923,7 +2983,7 @@ static void __init prealloc_protection_domains(void) alloc_passthrough_domain(); dev_data->passthrough = true; attach_device(&dev->dev, pt_domain); - pr_info("AMD-Vi: Using passthough domain for device %s\n", + pr_info("AMD-Vi: Using passthrough domain for device %s\n", dev_name(&dev->dev)); } @@ -3295,6 +3355,8 @@ static int amd_iommu_domain_has_cap(struct iommu_domain *domain, switch (cap) { case IOMMU_CAP_CACHE_COHERENCY: return 1; + case IOMMU_CAP_INTR_REMAP: + return irq_remapping_enabled; } return 0; @@ -3722,3 +3784,466 @@ int amd_iommu_device_info(struct pci_dev *pdev, return 0; } EXPORT_SYMBOL(amd_iommu_device_info); + +#ifdef CONFIG_IRQ_REMAP + +/***************************************************************************** + * + * Interrupt Remapping Implementation + * + *****************************************************************************/ + +union irte { + u32 val; + struct { + u32 valid : 1, + no_fault : 1, + int_type : 3, + rq_eoi : 1, + dm : 1, + rsvd_1 : 1, + destination : 8, + vector : 8, + rsvd_2 : 8; + } fields; +}; + +#define DTE_IRQ_PHYS_ADDR_MASK (((1ULL << 45)-1) << 6) +#define DTE_IRQ_REMAP_INTCTL (2ULL << 60) +#define DTE_IRQ_TABLE_LEN (8ULL << 1) +#define DTE_IRQ_REMAP_ENABLE 1ULL + +static void set_dte_irq_entry(u16 devid, struct irq_remap_table *table) +{ + u64 dte; + + dte = amd_iommu_dev_table[devid].data[2]; + dte &= ~DTE_IRQ_PHYS_ADDR_MASK; + dte |= virt_to_phys(table->table); + dte |= DTE_IRQ_REMAP_INTCTL; + dte |= DTE_IRQ_TABLE_LEN; + dte |= DTE_IRQ_REMAP_ENABLE; + + amd_iommu_dev_table[devid].data[2] = dte; +} + +#define IRTE_ALLOCATED (~1U) + +static struct irq_remap_table *get_irq_table(u16 devid, bool ioapic) +{ + struct irq_remap_table *table = NULL; + struct amd_iommu *iommu; + unsigned long flags; + u16 alias; + + write_lock_irqsave(&amd_iommu_devtable_lock, flags); + + iommu = amd_iommu_rlookup_table[devid]; + if (!iommu) + goto out_unlock; + + table = irq_lookup_table[devid]; + if (table) + goto out; + + alias = amd_iommu_alias_table[devid]; + table = irq_lookup_table[alias]; + if (table) { + irq_lookup_table[devid] = table; + set_dte_irq_entry(devid, table); + iommu_flush_dte(iommu, devid); + goto out; + } + + /* Nothing there yet, allocate new irq remapping table */ + table = kzalloc(sizeof(*table), GFP_ATOMIC); + if (!table) + goto out; + + if (ioapic) + /* Keep the first 32 indexes free for IOAPIC interrupts */ + table->min_index = 32; + + table->table = kmem_cache_alloc(amd_iommu_irq_cache, GFP_ATOMIC); + if (!table->table) { + kfree(table); + table = NULL; + goto out; + } + + memset(table->table, 0, MAX_IRQS_PER_TABLE * sizeof(u32)); + + if (ioapic) { + int i; + + for (i = 0; i < 32; ++i) + table->table[i] = IRTE_ALLOCATED; + } + + irq_lookup_table[devid] = table; + set_dte_irq_entry(devid, table); + iommu_flush_dte(iommu, devid); + if (devid != alias) { + irq_lookup_table[alias] = table; + set_dte_irq_entry(devid, table); + iommu_flush_dte(iommu, alias); + } + +out: + iommu_completion_wait(iommu); + +out_unlock: + write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); + + return table; +} + +static int alloc_irq_index(struct irq_cfg *cfg, u16 devid, int count) +{ + struct irq_remap_table *table; + unsigned long flags; + int index, c; + + table = get_irq_table(devid, false); + if (!table) + return -ENODEV; + + spin_lock_irqsave(&table->lock, flags); + + /* Scan table for free entries */ + for (c = 0, index = table->min_index; + index < MAX_IRQS_PER_TABLE; + ++index) { + if (table->table[index] == 0) + c += 1; + else + c = 0; + + if (c == count) { + struct irq_2_iommu *irte_info; + + for (; c != 0; --c) + table->table[index - c + 1] = IRTE_ALLOCATED; + + index -= count - 1; + + irte_info = &cfg->irq_2_iommu; + irte_info->sub_handle = devid; + irte_info->irte_index = index; + irte_info->iommu = (void *)cfg; + + goto out; + } + } + + index = -ENOSPC; + +out: + spin_unlock_irqrestore(&table->lock, flags); + + return index; +} + +static int get_irte(u16 devid, int index, union irte *irte) +{ + struct irq_remap_table *table; + unsigned long flags; + + table = get_irq_table(devid, false); + if (!table) + return -ENOMEM; + + spin_lock_irqsave(&table->lock, flags); + irte->val = table->table[index]; + spin_unlock_irqrestore(&table->lock, flags); + + return 0; +} + +static int modify_irte(u16 devid, int index, union irte irte) +{ + struct irq_remap_table *table; + struct amd_iommu *iommu; + unsigned long flags; + + iommu = amd_iommu_rlookup_table[devid]; + if (iommu == NULL) + return -EINVAL; + + table = get_irq_table(devid, false); + if (!table) + return -ENOMEM; + + spin_lock_irqsave(&table->lock, flags); + table->table[index] = irte.val; + spin_unlock_irqrestore(&table->lock, flags); + + iommu_flush_irt(iommu, devid); + iommu_completion_wait(iommu); + + return 0; +} + +static void free_irte(u16 devid, int index) +{ + struct irq_remap_table *table; + struct amd_iommu *iommu; + unsigned long flags; + + iommu = amd_iommu_rlookup_table[devid]; + if (iommu == NULL) + return; + + table = get_irq_table(devid, false); + if (!table) + return; + + spin_lock_irqsave(&table->lock, flags); + table->table[index] = 0; + spin_unlock_irqrestore(&table->lock, flags); + + iommu_flush_irt(iommu, devid); + iommu_completion_wait(iommu); +} + +static int setup_ioapic_entry(int irq, struct IO_APIC_route_entry *entry, + unsigned int destination, int vector, + struct io_apic_irq_attr *attr) +{ + struct irq_remap_table *table; + struct irq_2_iommu *irte_info; + struct irq_cfg *cfg; + union irte irte; + int ioapic_id; + int index; + int devid; + int ret; + + cfg = irq_get_chip_data(irq); + if (!cfg) + return -EINVAL; + + irte_info = &cfg->irq_2_iommu; + ioapic_id = mpc_ioapic_id(attr->ioapic); + devid = get_ioapic_devid(ioapic_id); + + if (devid < 0) + return devid; + + table = get_irq_table(devid, true); + if (table == NULL) + return -ENOMEM; + + index = attr->ioapic_pin; + + /* Setup IRQ remapping info */ + irte_info->sub_handle = devid; + irte_info->irte_index = index; + irte_info->iommu = (void *)cfg; + + /* Setup IRTE for IOMMU */ + irte.val = 0; + irte.fields.vector = vector; + irte.fields.int_type = apic->irq_delivery_mode; + irte.fields.destination = destination; + irte.fields.dm = apic->irq_dest_mode; + irte.fields.valid = 1; + + ret = modify_irte(devid, index, irte); + if (ret) + return ret; + + /* Setup IOAPIC entry */ + memset(entry, 0, sizeof(*entry)); + + entry->vector = index; + entry->mask = 0; + entry->trigger = attr->trigger; + entry->polarity = attr->polarity; + + /* + * Mask level triggered irqs. + */ + if (attr->trigger) + entry->mask = 1; + + return 0; +} + +static int set_affinity(struct irq_data *data, const struct cpumask *mask, + bool force) +{ + struct irq_2_iommu *irte_info; + unsigned int dest, irq; + struct irq_cfg *cfg; + union irte irte; + int err; + + if (!config_enabled(CONFIG_SMP)) + return -1; + + cfg = data->chip_data; + irq = data->irq; + irte_info = &cfg->irq_2_iommu; + + if (!cpumask_intersects(mask, cpu_online_mask)) + return -EINVAL; + + if (get_irte(irte_info->sub_handle, irte_info->irte_index, &irte)) + return -EBUSY; + + if (assign_irq_vector(irq, cfg, mask)) + return -EBUSY; + + err = apic->cpu_mask_to_apicid_and(cfg->domain, mask, &dest); + if (err) { + if (assign_irq_vector(irq, cfg, data->affinity)) + pr_err("AMD-Vi: Failed to recover vector for irq %d\n", irq); + return err; + } + + irte.fields.vector = cfg->vector; + irte.fields.destination = dest; + + modify_irte(irte_info->sub_handle, irte_info->irte_index, irte); + + if (cfg->move_in_progress) + send_cleanup_vector(cfg); + + cpumask_copy(data->affinity, mask); + + return 0; +} + +static int free_irq(int irq) +{ + struct irq_2_iommu *irte_info; + struct irq_cfg *cfg; + + cfg = irq_get_chip_data(irq); + if (!cfg) + return -EINVAL; + + irte_info = &cfg->irq_2_iommu; + + free_irte(irte_info->sub_handle, irte_info->irte_index); + + return 0; +} + +static void compose_msi_msg(struct pci_dev *pdev, + unsigned int irq, unsigned int dest, + struct msi_msg *msg, u8 hpet_id) +{ + struct irq_2_iommu *irte_info; + struct irq_cfg *cfg; + union irte irte; + + cfg = irq_get_chip_data(irq); + if (!cfg) + return; + + irte_info = &cfg->irq_2_iommu; + + irte.val = 0; + irte.fields.vector = cfg->vector; + irte.fields.int_type = apic->irq_delivery_mode; + irte.fields.destination = dest; + irte.fields.dm = apic->irq_dest_mode; + irte.fields.valid = 1; + + modify_irte(irte_info->sub_handle, irte_info->irte_index, irte); + + msg->address_hi = MSI_ADDR_BASE_HI; + msg->address_lo = MSI_ADDR_BASE_LO; + msg->data = irte_info->irte_index; +} + +static int msi_alloc_irq(struct pci_dev *pdev, int irq, int nvec) +{ + struct irq_cfg *cfg; + int index; + u16 devid; + + if (!pdev) + return -EINVAL; + + cfg = irq_get_chip_data(irq); + if (!cfg) + return -EINVAL; + + devid = get_device_id(&pdev->dev); + index = alloc_irq_index(cfg, devid, nvec); + + return index < 0 ? MAX_IRQS_PER_TABLE : index; +} + +static int msi_setup_irq(struct pci_dev *pdev, unsigned int irq, + int index, int offset) +{ + struct irq_2_iommu *irte_info; + struct irq_cfg *cfg; + u16 devid; + + if (!pdev) + return -EINVAL; + + cfg = irq_get_chip_data(irq); + if (!cfg) + return -EINVAL; + + if (index >= MAX_IRQS_PER_TABLE) + return 0; + + devid = get_device_id(&pdev->dev); + irte_info = &cfg->irq_2_iommu; + + irte_info->sub_handle = devid; + irte_info->irte_index = index + offset; + irte_info->iommu = (void *)cfg; + + return 0; +} + +static int setup_hpet_msi(unsigned int irq, unsigned int id) +{ + struct irq_2_iommu *irte_info; + struct irq_cfg *cfg; + int index, devid; + + cfg = irq_get_chip_data(irq); + if (!cfg) + return -EINVAL; + + irte_info = &cfg->irq_2_iommu; + devid = get_hpet_devid(id); + if (devid < 0) + return devid; + + index = alloc_irq_index(cfg, devid, 1); + if (index < 0) + return index; + + irte_info->sub_handle = devid; + irte_info->irte_index = index; + irte_info->iommu = (void *)cfg; + + return 0; +} + +struct irq_remap_ops amd_iommu_irq_ops = { + .supported = amd_iommu_supported, + .prepare = amd_iommu_prepare, + .enable = amd_iommu_enable, + .disable = amd_iommu_disable, + .reenable = amd_iommu_reenable, + .enable_faulting = amd_iommu_enable_faulting, + .setup_ioapic_entry = setup_ioapic_entry, + .set_affinity = set_affinity, + .free_irq = free_irq, + .compose_msi_msg = compose_msi_msg, + .msi_alloc_irq = msi_alloc_irq, + .msi_setup_irq = msi_setup_irq, + .setup_hpet_msi = setup_hpet_msi, +}; +#endif diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c index 500e7f15f5c..18b0d99bd4d 100644 --- a/drivers/iommu/amd_iommu_init.c +++ b/drivers/iommu/amd_iommu_init.c @@ -26,16 +26,18 @@ #include <linux/msi.h> #include <linux/amd-iommu.h> #include <linux/export.h> -#include <linux/acpi.h> #include <acpi/acpi.h> #include <asm/pci-direct.h> #include <asm/iommu.h> #include <asm/gart.h> #include <asm/x86_init.h> #include <asm/iommu_table.h> +#include <asm/io_apic.h> +#include <asm/irq_remapping.h> #include "amd_iommu_proto.h" #include "amd_iommu_types.h" +#include "irq_remapping.h" /* * definitions for the ACPI scanning code @@ -55,6 +57,10 @@ #define IVHD_DEV_ALIAS_RANGE 0x43 #define IVHD_DEV_EXT_SELECT 0x46 #define IVHD_DEV_EXT_SELECT_RANGE 0x47 +#define IVHD_DEV_SPECIAL 0x48 + +#define IVHD_SPECIAL_IOAPIC 1 +#define IVHD_SPECIAL_HPET 2 #define IVHD_FLAG_HT_TUN_EN_MASK 0x01 #define IVHD_FLAG_PASSPW_EN_MASK 0x02 @@ -123,6 +129,7 @@ struct ivmd_header { } __attribute__((packed)); bool amd_iommu_dump; +bool amd_iommu_irq_remap __read_mostly; static bool amd_iommu_detected; static bool __initdata amd_iommu_disabled; @@ -178,7 +185,13 @@ u16 *amd_iommu_alias_table; struct amd_iommu **amd_iommu_rlookup_table; /* - * AMD IOMMU allows up to 2^16 differend protection domains. This is a bitmap + * This table is used to find the irq remapping table for a given device id + * quickly. + */ +struct irq_remap_table **irq_lookup_table; + +/* + * AMD IOMMU allows up to 2^16 different protection domains. This is a bitmap * to know which ones are already in use. */ unsigned long *amd_iommu_pd_alloc_bitmap; @@ -478,7 +491,7 @@ static int __init find_last_devid_acpi(struct acpi_table_header *table) /**************************************************************************** * - * The following functions belong the the code path which parses the ACPI table + * The following functions belong to the code path which parses the ACPI table * the second time. In this ACPI parsing iteration we allocate IOMMU specific * data structures, initialize the device/alias/rlookup table and also * basically initialize the hardware. @@ -690,8 +703,33 @@ static void __init set_dev_entry_from_acpi(struct amd_iommu *iommu, set_iommu_for_device(iommu, devid); } +static int add_special_device(u8 type, u8 id, u16 devid) +{ + struct devid_map *entry; + struct list_head *list; + + if (type != IVHD_SPECIAL_IOAPIC && type != IVHD_SPECIAL_HPET) + return -EINVAL; + + entry = kzalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) + return -ENOMEM; + + entry->id = id; + entry->devid = devid; + + if (type == IVHD_SPECIAL_IOAPIC) + list = &ioapic_map; + else + list = &hpet_map; + + list_add_tail(&entry->list, list); + + return 0; +} + /* - * Reads the device exclusion range from ACPI and initialize IOMMU with + * Reads the device exclusion range from ACPI and initializes the IOMMU with * it */ static void __init set_device_exclusion_range(u16 devid, struct ivmd_header *m) @@ -717,7 +755,7 @@ static void __init set_device_exclusion_range(u16 devid, struct ivmd_header *m) * Takes a pointer to an AMD IOMMU entry in the ACPI table and * initializes the hardware and our data structures with it. */ -static void __init init_iommu_from_acpi(struct amd_iommu *iommu, +static int __init init_iommu_from_acpi(struct amd_iommu *iommu, struct ivhd_header *h) { u8 *p = (u8 *)h; @@ -867,12 +905,43 @@ static void __init init_iommu_from_acpi(struct amd_iommu *iommu, flags, ext_flags); } break; + case IVHD_DEV_SPECIAL: { + u8 handle, type; + const char *var; + u16 devid; + int ret; + + handle = e->ext & 0xff; + devid = (e->ext >> 8) & 0xffff; + type = (e->ext >> 24) & 0xff; + + if (type == IVHD_SPECIAL_IOAPIC) + var = "IOAPIC"; + else if (type == IVHD_SPECIAL_HPET) + var = "HPET"; + else + var = "UNKNOWN"; + + DUMP_printk(" DEV_SPECIAL(%s[%d])\t\tdevid: %02x:%02x.%x\n", + var, (int)handle, + PCI_BUS(devid), + PCI_SLOT(devid), + PCI_FUNC(devid)); + + set_dev_entry_from_acpi(iommu, devid, e->flags, 0); + ret = add_special_device(type, handle, devid); + if (ret) + return ret; + break; + } default: break; } p += ivhd_entry_length(p); } + + return 0; } /* Initializes the device->iommu mapping for the driver */ @@ -912,6 +981,8 @@ static void __init free_iommu_all(void) */ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) { + int ret; + spin_lock_init(&iommu->lock); /* Add IOMMU to internal data structures */ @@ -947,7 +1018,16 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) iommu->int_enabled = false; - init_iommu_from_acpi(iommu, h); + ret = init_iommu_from_acpi(iommu, h); + if (ret) + return ret; + + /* + * Make sure IOMMU is not considered to translate itself. The IVRS + * table tells us so, but this is a lie! + */ + amd_iommu_rlookup_table[iommu->devid] = NULL; + init_iommu_devices(iommu); return 0; @@ -1111,13 +1191,15 @@ static void print_iommu_info(void) if (iommu->cap & (1 << IOMMU_CAP_EFR)) { pr_info("AMD-Vi: Extended features: "); - for (i = 0; ARRAY_SIZE(feat_str); ++i) { + for (i = 0; i < ARRAY_SIZE(feat_str); ++i) { if (iommu_feature(iommu, (1ULL << i))) pr_cont(" %s", feat_str[i]); } - } pr_cont("\n"); + } } + if (irq_remapping_enabled) + pr_info("AMD-Vi: Interrupt remapping enabled\n"); } static int __init amd_iommu_init_pci(void) @@ -1131,9 +1213,6 @@ static int __init amd_iommu_init_pci(void) break; } - /* Make sure ACS will be enabled */ - pci_request_acs(); - ret = amd_iommu_init_devices(); print_iommu_info(); @@ -1144,7 +1223,7 @@ static int __init amd_iommu_init_pci(void) /**************************************************************************** * * The following functions initialize the MSI interrupts for all IOMMUs - * in the system. Its a bit challenging because there could be multiple + * in the system. It's a bit challenging because there could be multiple * IOMMUs per PCI BDF but we can call pci_enable_msi(x) only once per * pci_dev. * @@ -1202,7 +1281,7 @@ enable_faults: * * The next functions belong to the third pass of parsing the ACPI * table. In this last pass the memory mapping requirements are - * gathered (like exclusion and unity mapping reanges). + * gathered (like exclusion and unity mapping ranges). * ****************************************************************************/ @@ -1311,7 +1390,7 @@ static int __init init_memory_definitions(struct acpi_table_header *table) * Init the device table to not allow DMA access for devices and * suppress all page faults */ -static void init_device_table(void) +static void init_device_table_dma(void) { u32 devid; @@ -1321,6 +1400,27 @@ static void init_device_table(void) } } +static void __init uninit_device_table_dma(void) +{ + u32 devid; + + for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) { + amd_iommu_dev_table[devid].data[0] = 0ULL; + amd_iommu_dev_table[devid].data[1] = 0ULL; + } +} + +static void init_device_table(void) +{ + u32 devid; + + if (!amd_iommu_irq_remap) + return; + + for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) + set_dev_entry_bit(devid, DEV_ENTRY_IRQ_TBL_EN); +} + static void iommu_init_flags(struct amd_iommu *iommu) { iommu->acpi_flags & IVHD_FLAG_HT_TUN_EN_MASK ? @@ -1469,10 +1569,14 @@ static struct syscore_ops amd_iommu_syscore_ops = { static void __init free_on_init_error(void) { - amd_iommu_uninit_devices(); + free_pages((unsigned long)irq_lookup_table, + get_order(rlookup_table_size)); - free_pages((unsigned long)amd_iommu_pd_alloc_bitmap, - get_order(MAX_DOMAIN_ID/8)); + if (amd_iommu_irq_cache) { + kmem_cache_destroy(amd_iommu_irq_cache); + amd_iommu_irq_cache = NULL; + + } free_pages((unsigned long)amd_iommu_rlookup_table, get_order(rlookup_table_size)); @@ -1485,8 +1589,6 @@ static void __init free_on_init_error(void) free_iommu_all(); - free_unity_maps(); - #ifdef CONFIG_GART_IOMMU /* * We failed to initialize the AMD IOMMU - try fallback to GART @@ -1497,6 +1599,33 @@ static void __init free_on_init_error(void) #endif } +static bool __init check_ioapic_information(void) +{ + int idx; + + for (idx = 0; idx < nr_ioapics; idx++) { + int id = mpc_ioapic_id(idx); + + if (get_ioapic_devid(id) < 0) { + pr_err(FW_BUG "AMD-Vi: IO-APIC[%d] not in IVRS table\n", id); + pr_err("AMD-Vi: Disabling interrupt remapping due to BIOS Bug\n"); + return false; + } + } + + return true; +} + +static void __init free_dma_resources(void) +{ + amd_iommu_uninit_devices(); + + free_pages((unsigned long)amd_iommu_pd_alloc_bitmap, + get_order(MAX_DOMAIN_ID/8)); + + free_unity_maps(); +} + /* * This is the hardware init function for AMD IOMMU in the system. * This function is called either from amd_iommu_init or from the interrupt @@ -1583,9 +1712,6 @@ static int __init early_amd_iommu_init(void) if (amd_iommu_pd_alloc_bitmap == NULL) goto out; - /* init the device table */ - init_device_table(); - /* * let all alias entries point to itself */ @@ -1608,10 +1734,35 @@ static int __init early_amd_iommu_init(void) if (ret) goto out; + if (amd_iommu_irq_remap) + amd_iommu_irq_remap = check_ioapic_information(); + + if (amd_iommu_irq_remap) { + /* + * Interrupt remapping enabled, create kmem_cache for the + * remapping tables. + */ + amd_iommu_irq_cache = kmem_cache_create("irq_remap_cache", + MAX_IRQS_PER_TABLE * sizeof(u32), + IRQ_TABLE_ALIGNMENT, + 0, NULL); + if (!amd_iommu_irq_cache) + goto out; + + irq_lookup_table = (void *)__get_free_pages( + GFP_KERNEL | __GFP_ZERO, + get_order(rlookup_table_size)); + if (!irq_lookup_table) + goto out; + } + ret = init_memory_definitions(ivrs_base); if (ret) goto out; + /* init the device table */ + init_device_table(); + out: /* Don't leak any ACPI memory */ early_acpi_os_unmap_memory((char __iomem *)ivrs_base, ivrs_size); @@ -1652,13 +1803,25 @@ static bool detect_ivrs(void) early_acpi_os_unmap_memory((char __iomem *)ivrs_base, ivrs_size); + /* Make sure ACS will be enabled during PCI probe */ + pci_request_acs(); + + if (!disable_irq_remap) + amd_iommu_irq_remap = true; + return true; } static int amd_iommu_init_dma(void) { + struct amd_iommu *iommu; int ret; + init_device_table_dma(); + + for_each_iommu(iommu) + iommu_flush_all_caches(iommu); + if (iommu_pass_through) ret = amd_iommu_init_passthrough(); else @@ -1749,7 +1912,48 @@ static int __init iommu_go_to_state(enum iommu_init_state state) return ret; } +#ifdef CONFIG_IRQ_REMAP +int __init amd_iommu_prepare(void) +{ + return iommu_go_to_state(IOMMU_ACPI_FINISHED); +} +int __init amd_iommu_supported(void) +{ + return amd_iommu_irq_remap ? 1 : 0; +} + +int __init amd_iommu_enable(void) +{ + int ret; + + ret = iommu_go_to_state(IOMMU_ENABLED); + if (ret) + return ret; + + irq_remapping_enabled = 1; + + return 0; +} + +void amd_iommu_disable(void) +{ + amd_iommu_suspend(); +} + +int amd_iommu_reenable(int mode) +{ + amd_iommu_resume(); + + return 0; +} + +int __init amd_iommu_enable_faulting(void) +{ + /* We enable MSI later when PCI is initialized */ + return 0; +} +#endif /* * This is the core init function for AMD IOMMU hardware in the system. @@ -1762,8 +1966,17 @@ static int __init amd_iommu_init(void) ret = iommu_go_to_state(IOMMU_INITIALIZED); if (ret) { - disable_iommus(); - free_on_init_error(); + free_dma_resources(); + if (!irq_remapping_enabled) { + disable_iommus(); + free_on_init_error(); + } else { + struct amd_iommu *iommu; + + uninit_device_table_dma(); + for_each_iommu(iommu) + iommu_flush_all_caches(iommu); + } } return ret; diff --git a/drivers/iommu/amd_iommu_proto.h b/drivers/iommu/amd_iommu_proto.h index 1a7f41c6cc6..c294961bdd3 100644 --- a/drivers/iommu/amd_iommu_proto.h +++ b/drivers/iommu/amd_iommu_proto.h @@ -32,6 +32,14 @@ extern void amd_iommu_uninit_devices(void); extern void amd_iommu_init_notifier(void); extern void amd_iommu_init_api(void); +/* Needed for interrupt remapping */ +extern int amd_iommu_supported(void); +extern int amd_iommu_prepare(void); +extern int amd_iommu_enable(void); +extern void amd_iommu_disable(void); +extern int amd_iommu_reenable(int); +extern int amd_iommu_enable_faulting(void); + /* IOMMUv2 specific functions */ struct iommu_domain; diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h index d0dab865a8b..c9aa3d079ff 100644 --- a/drivers/iommu/amd_iommu_types.h +++ b/drivers/iommu/amd_iommu_types.h @@ -152,6 +152,7 @@ #define CMD_INV_DEV_ENTRY 0x02 #define CMD_INV_IOMMU_PAGES 0x03 #define CMD_INV_IOTLB_PAGES 0x04 +#define CMD_INV_IRT 0x05 #define CMD_COMPLETE_PPR 0x07 #define CMD_INV_ALL 0x08 @@ -175,6 +176,7 @@ #define DEV_ENTRY_EX 0x67 #define DEV_ENTRY_SYSMGT1 0x68 #define DEV_ENTRY_SYSMGT2 0x69 +#define DEV_ENTRY_IRQ_TBL_EN 0x80 #define DEV_ENTRY_INIT_PASS 0xb8 #define DEV_ENTRY_EINT_PASS 0xb9 #define DEV_ENTRY_NMI_PASS 0xba @@ -183,6 +185,8 @@ #define DEV_ENTRY_MODE_MASK 0x07 #define DEV_ENTRY_MODE_SHIFT 0x09 +#define MAX_DEV_TABLE_ENTRIES 0xffff + /* constants to configure the command buffer */ #define CMD_BUFFER_SIZE 8192 #define CMD_BUFFER_UNINITIALIZED 1 @@ -255,7 +259,7 @@ #define PAGE_SIZE_ALIGN(address, pagesize) \ ((address) & ~((pagesize) - 1)) /* - * Creates an IOMMU PTE for an address an a given pagesize + * Creates an IOMMU PTE for an address and a given pagesize * The PTE has no permission bits set * Pagesize is expected to be a power-of-two larger than 4096 */ @@ -334,6 +338,23 @@ extern bool amd_iommu_np_cache; /* Only true if all IOMMUs support device IOTLBs */ extern bool amd_iommu_iotlb_sup; +#define MAX_IRQS_PER_TABLE 256 +#define IRQ_TABLE_ALIGNMENT 128 + +struct irq_remap_table { + spinlock_t lock; + unsigned min_index; + u32 *table; +}; + +extern struct irq_remap_table **irq_lookup_table; + +/* Interrupt remapping feature used? */ +extern bool amd_iommu_irq_remap; + +/* kmem_cache to get tables with 128 byte alignement */ +extern struct kmem_cache *amd_iommu_irq_cache; + /* * Make iterating over all IOMMUs easier */ @@ -404,7 +425,7 @@ struct iommu_dev_data { struct list_head dev_data_list; /* For global dev_data_list */ struct iommu_dev_data *alias_data;/* The alias dev_data */ struct protection_domain *domain; /* Domain the device is bound to */ - atomic_t bind; /* Domain attach reverent count */ + atomic_t bind; /* Domain attach reference count */ u16 devid; /* PCI Device ID */ bool iommu_v2; /* Device can make use of IOMMUv2 */ bool passthrough; /* Default for device is pt_domain */ @@ -565,6 +586,16 @@ struct amd_iommu { u32 stored_l2[0x83]; }; +struct devid_map { + struct list_head list; + u8 id; + u16 devid; +}; + +/* Map HPET and IOAPIC ids to the devid used by the IOMMU */ +extern struct list_head ioapic_map; +extern struct list_head hpet_map; + /* * List with all IOMMUs in the system. This list is not locked because it is * only written and read at driver initialization or suspend time @@ -678,6 +709,30 @@ static inline u16 calc_devid(u8 bus, u8 devfn) return (((u16)bus) << 8) | devfn; } +static inline int get_ioapic_devid(int id) +{ + struct devid_map *entry; + + list_for_each_entry(entry, &ioapic_map, list) { + if (entry->id == id) + return entry->devid; + } + + return -EINVAL; +} + +static inline int get_hpet_devid(int id) +{ + struct devid_map *entry; + + list_for_each_entry(entry, &hpet_map, list) { + if (entry->id == id) + return entry->devid; + } + + return -EINVAL; +} + #ifdef CONFIG_AMD_IOMMU_STATS struct __iommu_counter { diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c index 45350ff5e93..7fe44f83cc3 100644 --- a/drivers/iommu/exynos-iommu.c +++ b/drivers/iommu/exynos-iommu.c @@ -732,9 +732,9 @@ static int exynos_iommu_domain_init(struct iommu_domain *domain) spin_lock_init(&priv->pgtablelock); INIT_LIST_HEAD(&priv->clients); - dom->geometry.aperture_start = 0; - dom->geometry.aperture_end = ~0UL; - dom->geometry.force_aperture = true; + domain->geometry.aperture_start = 0; + domain->geometry.aperture_end = ~0UL; + domain->geometry.force_aperture = true; domain->priv = priv; return 0; @@ -840,8 +840,7 @@ static void exynos_iommu_detach_device(struct iommu_domain *domain, if (__exynos_sysmmu_disable(data)) { dev_dbg(dev, "%s: Detached IOMMU with pgtable %#lx\n", __func__, __pa(priv->pgtable)); - list_del(&data->node); - INIT_LIST_HEAD(&data->node); + list_del_init(&data->node); } else { dev_dbg(dev, "%s: Detaching IOMMU with pgtable %#lx delayed", diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 7469b534664..cb9e1146b02 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -589,7 +589,9 @@ static void domain_update_iommu_coherency(struct dmar_domain *domain) { int i; - domain->iommu_coherency = 1; + i = find_first_bit(domain->iommu_bmp, g_num_of_iommus); + + domain->iommu_coherency = i < g_num_of_iommus ? 1 : 0; for_each_set_bit(i, domain->iommu_bmp, g_num_of_iommus) { if (!ecap_coherent(g_iommus[i]->ecap)) { @@ -2008,6 +2010,7 @@ static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw) if (!drhd) { printk(KERN_ERR "IOMMU: can't find DMAR for device %s\n", pci_name(pdev)); + free_domain_mem(domain); return NULL; } iommu = drhd->iommu; @@ -4124,8 +4127,13 @@ static int intel_iommu_add_device(struct device *dev) } else dma_pdev = pci_dev_get(pdev); + /* Account for quirked devices */ swap_pci_ref(&dma_pdev, pci_get_dma_source(dma_pdev)); + /* + * If it's a multifunction device that does not support our + * required ACS flags, add to the same group as function 0. + */ if (dma_pdev->multifunction && !pci_acs_enabled(dma_pdev, REQ_ACS_FLAGS)) swap_pci_ref(&dma_pdev, @@ -4133,14 +4141,28 @@ static int intel_iommu_add_device(struct device *dev) PCI_DEVFN(PCI_SLOT(dma_pdev->devfn), 0))); + /* + * Devices on the root bus go through the iommu. If that's not us, + * find the next upstream device and test ACS up to the root bus. + * Finding the next device may require skipping virtual buses. + */ while (!pci_is_root_bus(dma_pdev->bus)) { - if (pci_acs_path_enabled(dma_pdev->bus->self, - NULL, REQ_ACS_FLAGS)) + struct pci_bus *bus = dma_pdev->bus; + + while (!bus->self) { + if (!pci_is_root_bus(bus)) + bus = bus->parent; + else + goto root_bus; + } + + if (pci_acs_path_enabled(bus->self, NULL, REQ_ACS_FLAGS)) break; - swap_pci_ref(&dma_pdev, pci_dev_get(dma_pdev->bus->self)); + swap_pci_ref(&dma_pdev, pci_dev_get(bus->self)); } +root_bus: group = iommu_group_get(&dma_pdev->dev); pci_dev_put(dma_pdev); if (!group) { diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c index e0b18f3ae9a..af8904de1d4 100644 --- a/drivers/iommu/intel_irq_remapping.c +++ b/drivers/iommu/intel_irq_remapping.c @@ -736,6 +736,7 @@ int __init parse_ioapics_under_ir(void) { struct dmar_drhd_unit *drhd; int ir_supported = 0; + int ioapic_idx; for_each_drhd_unit(drhd) { struct intel_iommu *iommu = drhd->iommu; @@ -748,13 +749,20 @@ int __init parse_ioapics_under_ir(void) } } - if (ir_supported && ir_ioapic_num != nr_ioapics) { - printk(KERN_WARNING - "Not all IO-APIC's listed under remapping hardware\n"); - return -1; + if (!ir_supported) + return 0; + + for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++) { + int ioapic_id = mpc_ioapic_id(ioapic_idx); + if (!map_ioapic_to_ir(ioapic_id)) { + pr_err(FW_BUG "ioapic %d has no mapping iommu, " + "interrupt remapping will be disabled\n", + ioapic_id); + return -1; + } } - return ir_supported; + return 1; } int __init ir_dev_scope_init(void) diff --git a/drivers/iommu/irq_remapping.c b/drivers/iommu/irq_remapping.c index 151690db692..faf85d6e33f 100644 --- a/drivers/iommu/irq_remapping.c +++ b/drivers/iommu/irq_remapping.c @@ -51,6 +51,11 @@ early_param("intremap", setup_irqremap); void __init setup_irq_remapping_ops(void) { remap_ops = &intel_irq_remap_ops; + +#ifdef CONFIG_AMD_IOMMU + if (amd_iommu_irq_ops.prepare() == 0) + remap_ops = &amd_iommu_irq_ops; +#endif } int irq_remapping_supported(void) diff --git a/drivers/iommu/irq_remapping.h b/drivers/iommu/irq_remapping.h index b12974cc1df..95363acb583 100644 --- a/drivers/iommu/irq_remapping.h +++ b/drivers/iommu/irq_remapping.h @@ -82,6 +82,12 @@ struct irq_remap_ops { }; extern struct irq_remap_ops intel_irq_remap_ops; +extern struct irq_remap_ops amd_iommu_irq_ops; + +#else /* CONFIG_IRQ_REMAP */ + +#define irq_remapping_enabled 0 +#define disable_irq_remap 1 #endif /* CONFIG_IRQ_REMAP */ diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c index e1d17feb52b..0b4d62e0c64 100644 --- a/drivers/iommu/tegra-smmu.c +++ b/drivers/iommu/tegra-smmu.c @@ -862,14 +862,14 @@ static void smmu_iommu_detach_dev(struct iommu_domain *domain, goto out; } } - dev_err(smmu->dev, "Couldn't find %s\n", dev_name(c->dev)); + dev_err(smmu->dev, "Couldn't find %s\n", dev_name(dev)); out: spin_unlock(&as->client_lock); } static int smmu_iommu_domain_init(struct iommu_domain *domain) { - int i, err = -ENODEV; + int i, err = -EAGAIN; unsigned long flags; struct smmu_as *as; struct smmu_device *smmu = smmu_handle; @@ -877,11 +877,14 @@ static int smmu_iommu_domain_init(struct iommu_domain *domain) /* Look for a free AS with lock held */ for (i = 0; i < smmu->num_as; i++) { as = &smmu->as[i]; - if (!as->pdir_page) { - err = alloc_pdir(as); - if (!err) - goto found; - } + + if (as->pdir_page) + continue; + + err = alloc_pdir(as); + if (!err) + goto found; + if (err != -EAGAIN) break; } |