diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2009-12-16 10:11:38 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-12-16 10:11:38 -0800 |
commit | a79960e576ebca9dbf24489b562689f2be7e9ff0 (patch) | |
tree | b0748839230c2bba1d49ccdd732608d7d1f334cb | |
parent | 661e338f728d101b4839b6b157d44cfcb80e3c5e (diff) | |
parent | cd7bcf32d42b15891620b3f1387a00178b54291a (diff) |
Merge git://git.infradead.org/iommu-2.6
* git://git.infradead.org/iommu-2.6:
implement early_io{re,un}map for ia64
Revert "Intel IOMMU: Avoid memory allocation failures in dma map api calls"
intel-iommu: ignore page table validation in pass through mode
intel-iommu: Fix oops with intel_iommu=igfx_off
intel-iommu: Check for an RMRR which ends before it starts.
intel-iommu: Apply BIOS sanity checks for interrupt remapping too.
intel-iommu: Detect DMAR in hyperspace at probe time.
dmar: Fix build failure without NUMA, warn on bogus RHSA tables and don't abort
iommu: Allocate dma-remapping structures using numa locality info
intr_remap: Allocate intr-remapping table using numa locality info
dmar: Allocate queued invalidation structure using numa locality info
dmar: support for parsing Remapping Hardware Static Affinity structure
-rw-r--r-- | arch/ia64/include/asm/io.h | 2 | ||||
-rw-r--r-- | arch/ia64/mm/ioremap.c | 11 | ||||
-rw-r--r-- | drivers/pci/dmar.c | 110 | ||||
-rw-r--r-- | drivers/pci/intel-iommu.c | 78 | ||||
-rw-r--r-- | drivers/pci/intr_remapping.c | 3 | ||||
-rw-r--r-- | include/linux/intel-iommu.h | 1 |
6 files changed, 154 insertions, 51 deletions
diff --git a/arch/ia64/include/asm/io.h b/arch/ia64/include/asm/io.h index 0d9d16e2d94..cc8335eb311 100644 --- a/arch/ia64/include/asm/io.h +++ b/arch/ia64/include/asm/io.h @@ -424,6 +424,8 @@ __writeq (unsigned long val, volatile void __iomem *addr) extern void __iomem * ioremap(unsigned long offset, unsigned long size); extern void __iomem * ioremap_nocache (unsigned long offset, unsigned long size); extern void iounmap (volatile void __iomem *addr); +extern void __iomem * early_ioremap (unsigned long phys_addr, unsigned long size); +extern void early_iounmap (volatile void __iomem *addr, unsigned long size); /* * String version of IO memory access ops: diff --git a/arch/ia64/mm/ioremap.c b/arch/ia64/mm/ioremap.c index 2a140627dfd..3dccdd8eb27 100644 --- a/arch/ia64/mm/ioremap.c +++ b/arch/ia64/mm/ioremap.c @@ -22,6 +22,12 @@ __ioremap (unsigned long phys_addr) } void __iomem * +early_ioremap (unsigned long phys_addr, unsigned long size) +{ + return __ioremap(phys_addr); +} + +void __iomem * ioremap (unsigned long phys_addr, unsigned long size) { void __iomem *addr; @@ -102,6 +108,11 @@ ioremap_nocache (unsigned long phys_addr, unsigned long size) EXPORT_SYMBOL(ioremap_nocache); void +early_iounmap (volatile void __iomem *addr, unsigned long size) +{ +} + +void iounmap (volatile void __iomem *addr) { if (REGION_NUMBER(addr) == RGN_GATE) diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index 6cdc931f7c1..83aae474759 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -339,6 +339,35 @@ found: } #endif +#ifdef CONFIG_ACPI_NUMA +static int __init +dmar_parse_one_rhsa(struct acpi_dmar_header *header) +{ + struct acpi_dmar_rhsa *rhsa; + struct dmar_drhd_unit *drhd; + + rhsa = (struct acpi_dmar_rhsa *)header; + for_each_drhd_unit(drhd) { + if (drhd->reg_base_addr == rhsa->base_address) { + int node = acpi_map_pxm_to_node(rhsa->proximity_domain); + + if (!node_online(node)) + node = -1; + drhd->iommu->node = node; + return 0; + } + } + WARN(1, "Your BIOS is broken; RHSA refers to non-existent DMAR unit at %llx\n" + "BIOS vendor: %s; Ver: %s; Product Version: %s\n", + drhd->reg_base_addr, + dmi_get_system_info(DMI_BIOS_VENDOR), + dmi_get_system_info(DMI_BIOS_VERSION), + dmi_get_system_info(DMI_PRODUCT_VERSION)); + + return 0; +} +#endif + static void __init dmar_table_print_dmar_entry(struct acpi_dmar_header *header) { @@ -458,7 +487,9 @@ parse_dmar_table(void) #endif break; case ACPI_DMAR_HARDWARE_AFFINITY: - /* We don't do anything with RHSA (yet?) */ +#ifdef CONFIG_ACPI_NUMA + ret = dmar_parse_one_rhsa(entry_header); +#endif break; default: printk(KERN_WARNING PREFIX @@ -582,6 +613,8 @@ int __init dmar_table_init(void) return 0; } +static int bios_warned; + int __init check_zero_address(void) { struct acpi_table_dmar *dmar; @@ -601,6 +634,9 @@ int __init check_zero_address(void) } if (entry_header->type == ACPI_DMAR_TYPE_HARDWARE_UNIT) { + void __iomem *addr; + u64 cap, ecap; + drhd = (void *)entry_header; if (!drhd->address) { /* Promote an attitude of violence to a BIOS engineer today */ @@ -609,17 +645,40 @@ int __init check_zero_address(void) dmi_get_system_info(DMI_BIOS_VENDOR), dmi_get_system_info(DMI_BIOS_VERSION), dmi_get_system_info(DMI_PRODUCT_VERSION)); -#ifdef CONFIG_DMAR - dmar_disabled = 1; -#endif - return 0; + bios_warned = 1; + goto failed; + } + + addr = early_ioremap(drhd->address, VTD_PAGE_SIZE); + if (!addr ) { + printk("IOMMU: can't validate: %llx\n", drhd->address); + goto failed; + } + cap = dmar_readq(addr + DMAR_CAP_REG); + ecap = dmar_readq(addr + DMAR_ECAP_REG); + early_iounmap(addr, VTD_PAGE_SIZE); + if (cap == (uint64_t)-1 && ecap == (uint64_t)-1) { + /* Promote an attitude of violence to a BIOS engineer today */ + WARN(1, "Your BIOS is broken; DMAR reported at address %llx returns all ones!\n" + "BIOS vendor: %s; Ver: %s; Product Version: %s\n", + drhd->address, + dmi_get_system_info(DMI_BIOS_VENDOR), + dmi_get_system_info(DMI_BIOS_VERSION), + dmi_get_system_info(DMI_PRODUCT_VERSION)); + bios_warned = 1; + goto failed; } - break; } entry_header = ((void *)entry_header + entry_header->length); } return 1; + +failed: +#ifdef CONFIG_DMAR + dmar_disabled = 1; +#endif + return 0; } void __init detect_intel_iommu(void) @@ -670,6 +729,18 @@ int alloc_iommu(struct dmar_drhd_unit *drhd) int agaw = 0; int msagaw = 0; + if (!drhd->reg_base_addr) { + if (!bios_warned) { + WARN(1, "Your BIOS is broken; DMAR reported at address zero!\n" + "BIOS vendor: %s; Ver: %s; Product Version: %s\n", + dmi_get_system_info(DMI_BIOS_VENDOR), + dmi_get_system_info(DMI_BIOS_VERSION), + dmi_get_system_info(DMI_PRODUCT_VERSION)); + bios_warned = 1; + } + return -EINVAL; + } + iommu = kzalloc(sizeof(*iommu), GFP_KERNEL); if (!iommu) return -ENOMEM; @@ -686,13 +757,16 @@ int alloc_iommu(struct dmar_drhd_unit *drhd) iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG); if (iommu->cap == (uint64_t)-1 && iommu->ecap == (uint64_t)-1) { - /* Promote an attitude of violence to a BIOS engineer today */ - WARN(1, "Your BIOS is broken; DMAR reported at address %llx returns all ones!\n" - "BIOS vendor: %s; Ver: %s; Product Version: %s\n", - drhd->reg_base_addr, - dmi_get_system_info(DMI_BIOS_VENDOR), - dmi_get_system_info(DMI_BIOS_VERSION), - dmi_get_system_info(DMI_PRODUCT_VERSION)); + if (!bios_warned) { + /* Promote an attitude of violence to a BIOS engineer today */ + WARN(1, "Your BIOS is broken; DMAR reported at address %llx returns all ones!\n" + "BIOS vendor: %s; Ver: %s; Product Version: %s\n", + drhd->reg_base_addr, + dmi_get_system_info(DMI_BIOS_VENDOR), + dmi_get_system_info(DMI_BIOS_VERSION), + dmi_get_system_info(DMI_PRODUCT_VERSION)); + bios_warned = 1; + } goto err_unmap; } @@ -715,6 +789,8 @@ int alloc_iommu(struct dmar_drhd_unit *drhd) iommu->agaw = agaw; iommu->msagaw = msagaw; + iommu->node = -1; + /* the registers might be more than one page */ map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap), cap_max_fault_reg_offset(iommu->cap)); @@ -1056,6 +1132,7 @@ static void __dmar_enable_qi(struct intel_iommu *iommu) int dmar_enable_qi(struct intel_iommu *iommu) { struct q_inval *qi; + struct page *desc_page; if (!ecap_qis(iommu->ecap)) return -ENOENT; @@ -1072,13 +1149,16 @@ int dmar_enable_qi(struct intel_iommu *iommu) qi = iommu->qi; - qi->desc = (void *)(get_zeroed_page(GFP_ATOMIC)); - if (!qi->desc) { + + desc_page = alloc_pages_node(iommu->node, GFP_ATOMIC | __GFP_ZERO, 0); + if (!desc_page) { kfree(qi); iommu->qi = 0; return -ENOMEM; } + qi->desc = page_address(desc_page); + qi->desc_status = kmalloc(QI_LENGTH * sizeof(int), GFP_ATOMIC); if (!qi->desc_status) { free_page((unsigned long) qi->desc); diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 8d615942631..e56f9bed6f2 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -277,6 +277,7 @@ static int hw_pass_through = 1; struct dmar_domain { int id; /* domain id */ + int nid; /* node id */ unsigned long iommu_bmp; /* bitmap of iommus this domain uses*/ struct list_head devices; /* all devices' list */ @@ -386,30 +387,14 @@ static struct kmem_cache *iommu_domain_cache; static struct kmem_cache *iommu_devinfo_cache; static struct kmem_cache *iommu_iova_cache; -static inline void *iommu_kmem_cache_alloc(struct kmem_cache *cachep) +static inline void *alloc_pgtable_page(int node) { - unsigned int flags; - void *vaddr; - - /* trying to avoid low memory issues */ - flags = current->flags & PF_MEMALLOC; - current->flags |= PF_MEMALLOC; - vaddr = kmem_cache_alloc(cachep, GFP_ATOMIC); - current->flags &= (~PF_MEMALLOC | flags); - return vaddr; -} - + struct page *page; + void *vaddr = NULL; -static inline void *alloc_pgtable_page(void) -{ - unsigned int flags; - void *vaddr; - - /* trying to avoid low memory issues */ - flags = current->flags & PF_MEMALLOC; - current->flags |= PF_MEMALLOC; - vaddr = (void *)get_zeroed_page(GFP_ATOMIC); - current->flags &= (~PF_MEMALLOC | flags); + page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0); + if (page) + vaddr = page_address(page); return vaddr; } @@ -420,7 +405,7 @@ static inline void free_pgtable_page(void *vaddr) static inline void *alloc_domain_mem(void) { - return iommu_kmem_cache_alloc(iommu_domain_cache); + return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC); } static void free_domain_mem(void *vaddr) @@ -430,7 +415,7 @@ static void free_domain_mem(void *vaddr) static inline void * alloc_devinfo_mem(void) { - return iommu_kmem_cache_alloc(iommu_devinfo_cache); + return kmem_cache_alloc(iommu_devinfo_cache, GFP_ATOMIC); } static inline void free_devinfo_mem(void *vaddr) @@ -440,7 +425,7 @@ static inline void free_devinfo_mem(void *vaddr) struct iova *alloc_iova_mem(void) { - return iommu_kmem_cache_alloc(iommu_iova_cache); + return kmem_cache_alloc(iommu_iova_cache, GFP_ATOMIC); } void free_iova_mem(struct iova *iova) @@ -589,7 +574,8 @@ static struct context_entry * device_to_context_entry(struct intel_iommu *iommu, root = &iommu->root_entry[bus]; context = get_context_addr_from_root(root); if (!context) { - context = (struct context_entry *)alloc_pgtable_page(); + context = (struct context_entry *) + alloc_pgtable_page(iommu->node); if (!context) { spin_unlock_irqrestore(&iommu->lock, flags); return NULL; @@ -732,7 +718,7 @@ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain, if (!dma_pte_present(pte)) { uint64_t pteval; - tmp_page = alloc_pgtable_page(); + tmp_page = alloc_pgtable_page(domain->nid); if (!tmp_page) return NULL; @@ -868,7 +854,7 @@ static int iommu_alloc_root_entry(struct intel_iommu *iommu) struct root_entry *root; unsigned long flags; - root = (struct root_entry *)alloc_pgtable_page(); + root = (struct root_entry *)alloc_pgtable_page(iommu->node); if (!root) return -ENOMEM; @@ -1263,6 +1249,7 @@ static struct dmar_domain *alloc_domain(void) if (!domain) return NULL; + domain->nid = -1; memset(&domain->iommu_bmp, 0, sizeof(unsigned long)); domain->flags = 0; @@ -1420,9 +1407,10 @@ static int domain_init(struct dmar_domain *domain, int guest_width) domain->iommu_snooping = 0; domain->iommu_count = 1; + domain->nid = iommu->node; /* always allocate the top pgd */ - domain->pgd = (struct dma_pte *)alloc_pgtable_page(); + domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid); if (!domain->pgd) return -ENOMEM; __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE); @@ -1523,12 +1511,15 @@ static int domain_context_mapping_one(struct dmar_domain *domain, int segment, /* Skip top levels of page tables for * iommu which has less agaw than default. + * Unnecessary for PT mode. */ - for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) { - pgd = phys_to_virt(dma_pte_addr(pgd)); - if (!dma_pte_present(pgd)) { - spin_unlock_irqrestore(&iommu->lock, flags); - return -ENOMEM; + if (translation != CONTEXT_TT_PASS_THROUGH) { + for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) { + pgd = phys_to_virt(dma_pte_addr(pgd)); + if (!dma_pte_present(pgd)) { + spin_unlock_irqrestore(&iommu->lock, flags); + return -ENOMEM; + } } } } @@ -1577,6 +1568,8 @@ static int domain_context_mapping_one(struct dmar_domain *domain, int segment, spin_lock_irqsave(&domain->iommu_lock, flags); if (!test_and_set_bit(iommu->seq_id, &domain->iommu_bmp)) { domain->iommu_count++; + if (domain->iommu_count == 1) + domain->nid = iommu->node; domain_update_iommu_cap(domain); } spin_unlock_irqrestore(&domain->iommu_lock, flags); @@ -1991,6 +1984,16 @@ static int iommu_prepare_identity_map(struct pci_dev *pdev, "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n", pci_name(pdev), start, end); + if (end < start) { + WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n" + "BIOS vendor: %s; Ver: %s; Product Version: %s\n", + dmi_get_system_info(DMI_BIOS_VENDOR), + dmi_get_system_info(DMI_BIOS_VERSION), + dmi_get_system_info(DMI_PRODUCT_VERSION)); + ret = -EIO; + goto error; + } + if (end >> agaw_to_width(domain->agaw)) { WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n" "BIOS vendor: %s; Ver: %s; Product Version: %s\n", @@ -3228,6 +3231,9 @@ static int device_notifier(struct notifier_block *nb, struct pci_dev *pdev = to_pci_dev(dev); struct dmar_domain *domain; + if (iommu_no_mapping(dev)) + return 0; + domain = find_domain(pdev); if (!domain) return 0; @@ -3455,6 +3461,7 @@ static struct dmar_domain *iommu_alloc_vm_domain(void) return NULL; domain->id = vm_domid++; + domain->nid = -1; memset(&domain->iommu_bmp, 0, sizeof(unsigned long)); domain->flags = DOMAIN_FLAG_VIRTUAL_MACHINE; @@ -3481,9 +3488,10 @@ static int md_domain_init(struct dmar_domain *domain, int guest_width) domain->iommu_coherency = 0; domain->iommu_snooping = 0; domain->max_addr = 0; + domain->nid = -1; /* always allocate the top pgd */ - domain->pgd = (struct dma_pte *)alloc_pgtable_page(); + domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid); if (!domain->pgd) return -ENOMEM; domain_flush_cache(domain, domain->pgd, PAGE_SIZE); diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c index 1487bf2be86..8b65a489581 100644 --- a/drivers/pci/intr_remapping.c +++ b/drivers/pci/intr_remapping.c @@ -590,7 +590,8 @@ static int setup_intr_remapping(struct intel_iommu *iommu, int mode) if (!iommu->ir_table) return -ENOMEM; - pages = alloc_pages(GFP_ATOMIC | __GFP_ZERO, INTR_REMAP_PAGE_ORDER); + pages = alloc_pages_node(iommu->node, GFP_ATOMIC | __GFP_ZERO, + INTR_REMAP_PAGE_ORDER); if (!pages) { printk(KERN_ERR "failed to allocate pages of order %d\n", diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 4f0a72a9740..9310c699a37 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -332,6 +332,7 @@ struct intel_iommu { #ifdef CONFIG_INTR_REMAP struct ir_table *ir_table; /* Interrupt remapping info */ #endif + int node; }; static inline void __iommu_flush_cache( |