From ee34b32d8c2950f66038c8975747ef9aec855289 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Fri, 2 Oct 2009 11:01:21 -0700 Subject: dmar: support for parsing Remapping Hardware Static Affinity structure Add support for parsing Remapping Hardware Static Affinity (RHSA) structure. This enables identifying the association between remapping hardware units and the corresponding proximity domain. This enables to allocate transalation structures closer to the remapping hardware unit. Signed-off-by: Suresh Siddha Signed-off-by: David Woodhouse --- drivers/pci/dmar.c | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) (limited to 'drivers/pci') diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index 22b02c6df85..577956566a0 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -348,6 +348,26 @@ found: } #endif +static int __init +dmar_parse_one_rhsa(struct acpi_dmar_header *header) +{ + struct acpi_dmar_rhsa *rhsa; + struct dmar_drhd_unit *drhd; + + rhsa = (struct acpi_dmar_rhsa *)header; + for_each_drhd_unit(drhd) + if (drhd->reg_base_addr == rhsa->base_address) { + int node = acpi_map_pxm_to_node(rhsa->proximity_domain); + + if (!node_online(node)) + node = -1; + drhd->iommu->node = node; + return 0; + } + + return -ENODEV; +} + static void __init dmar_table_print_dmar_entry(struct acpi_dmar_header *header) { @@ -467,7 +487,7 @@ parse_dmar_table(void) #endif break; case ACPI_DMAR_HARDWARE_AFFINITY: - /* We don't do anything with RHSA (yet?) */ + ret = dmar_parse_one_rhsa(entry_header); break; default: printk(KERN_WARNING PREFIX @@ -677,6 +697,8 @@ int alloc_iommu(struct dmar_drhd_unit *drhd) iommu->agaw = agaw; iommu->msagaw = msagaw; + iommu->node = -1; + /* the registers might be more than one page */ map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap), cap_max_fault_reg_offset(iommu->cap)); -- cgit v1.2.3-70-g09d2 From 751cafe3aeceb9ff887c97237f6daaf596c9e547 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Fri, 2 Oct 2009 11:01:22 -0700 Subject: dmar: Allocate queued invalidation structure using numa locality info Allocate queued invalidation descriptor structures using numa locality info. Signed-off-by: Suresh Siddha Signed-off-by: David Woodhouse --- drivers/pci/dmar.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index 577956566a0..63aa52973d4 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -1040,6 +1040,7 @@ static void __dmar_enable_qi(struct intel_iommu *iommu) int dmar_enable_qi(struct intel_iommu *iommu) { struct q_inval *qi; + struct page *desc_page; if (!ecap_qis(iommu->ecap)) return -ENOENT; @@ -1056,13 +1057,16 @@ int dmar_enable_qi(struct intel_iommu *iommu) qi = iommu->qi; - qi->desc = (void *)(get_zeroed_page(GFP_ATOMIC)); - if (!qi->desc) { + + desc_page = alloc_pages_node(iommu->node, GFP_ATOMIC | __GFP_ZERO, 0); + if (!desc_page) { kfree(qi); iommu->qi = 0; return -ENOMEM; } + qi->desc = page_address(desc_page); + qi->desc_status = kmalloc(QI_LENGTH * sizeof(int), GFP_ATOMIC); if (!qi->desc_status) { free_page((unsigned long) qi->desc); -- cgit v1.2.3-70-g09d2 From 824cd75bf30cfc52c4b468f3cabf6932fd012654 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Fri, 2 Oct 2009 11:01:23 -0700 Subject: intr_remap: Allocate intr-remapping table using numa locality info Allocate intr-remapping table using numa locality info. On platforms having remapping hardware units span different nodes, this enables optimized intr-remapping table entry access by remapping hardware. Signed-off-by: Suresh Siddha Signed-off-by: David Woodhouse --- drivers/pci/intr_remapping.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/pci') diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c index 0ed78a764de..fccf0e2fcba 100644 --- a/drivers/pci/intr_remapping.c +++ b/drivers/pci/intr_remapping.c @@ -548,7 +548,8 @@ static int setup_intr_remapping(struct intel_iommu *iommu, int mode) if (!iommu->ir_table) return -ENOMEM; - pages = alloc_pages(GFP_ATOMIC | __GFP_ZERO, INTR_REMAP_PAGE_ORDER); + pages = alloc_pages_node(iommu->node, GFP_ATOMIC | __GFP_ZERO, + INTR_REMAP_PAGE_ORDER); if (!pages) { printk(KERN_ERR "failed to allocate pages of order %d\n", -- cgit v1.2.3-70-g09d2 From 4c923d4714821cf32ff115bb9c91867dff711972 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Fri, 2 Oct 2009 11:01:24 -0700 Subject: iommu: Allocate dma-remapping structures using numa locality info Allocate dma-remapping structures using numa locality info. On platforms having remapping hardware units span different nodes, this enables optimized dma-remapping transalation structures access by remapping hardware. Signed-off-by: Suresh Siddha Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index b1e97e68250..c8311364622 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -277,6 +277,7 @@ static int hw_pass_through = 1; struct dmar_domain { int id; /* domain id */ + int nid; /* node id */ unsigned long iommu_bmp; /* bitmap of iommus this domain uses*/ struct list_head devices; /* all devices' list */ @@ -400,15 +401,18 @@ static inline void *iommu_kmem_cache_alloc(struct kmem_cache *cachep) } -static inline void *alloc_pgtable_page(void) +static inline void *alloc_pgtable_page(int node) { unsigned int flags; - void *vaddr; + struct page *page; + void *vaddr = NULL; /* trying to avoid low memory issues */ flags = current->flags & PF_MEMALLOC; current->flags |= PF_MEMALLOC; - vaddr = (void *)get_zeroed_page(GFP_ATOMIC); + page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0); + if (page) + vaddr = page_address(page); current->flags &= (~PF_MEMALLOC | flags); return vaddr; } @@ -589,7 +593,8 @@ static struct context_entry * device_to_context_entry(struct intel_iommu *iommu, root = &iommu->root_entry[bus]; context = get_context_addr_from_root(root); if (!context) { - context = (struct context_entry *)alloc_pgtable_page(); + context = (struct context_entry *) + alloc_pgtable_page(iommu->node); if (!context) { spin_unlock_irqrestore(&iommu->lock, flags); return NULL; @@ -732,7 +737,7 @@ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain, if (!dma_pte_present(pte)) { uint64_t pteval; - tmp_page = alloc_pgtable_page(); + tmp_page = alloc_pgtable_page(domain->nid); if (!tmp_page) return NULL; @@ -868,7 +873,7 @@ static int iommu_alloc_root_entry(struct intel_iommu *iommu) struct root_entry *root; unsigned long flags; - root = (struct root_entry *)alloc_pgtable_page(); + root = (struct root_entry *)alloc_pgtable_page(iommu->node); if (!root) return -ENOMEM; @@ -1263,6 +1268,7 @@ static struct dmar_domain *alloc_domain(void) if (!domain) return NULL; + domain->nid = -1; memset(&domain->iommu_bmp, 0, sizeof(unsigned long)); domain->flags = 0; @@ -1420,9 +1426,10 @@ static int domain_init(struct dmar_domain *domain, int guest_width) domain->iommu_snooping = 0; domain->iommu_count = 1; + domain->nid = iommu->node; /* always allocate the top pgd */ - domain->pgd = (struct dma_pte *)alloc_pgtable_page(); + domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid); if (!domain->pgd) return -ENOMEM; __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE); @@ -1577,6 +1584,8 @@ static int domain_context_mapping_one(struct dmar_domain *domain, int segment, spin_lock_irqsave(&domain->iommu_lock, flags); if (!test_and_set_bit(iommu->seq_id, &domain->iommu_bmp)) { domain->iommu_count++; + if (domain->iommu_count == 1) + domain->nid = iommu->node; domain_update_iommu_cap(domain); } spin_unlock_irqrestore(&domain->iommu_lock, flags); @@ -3416,6 +3425,7 @@ static struct dmar_domain *iommu_alloc_vm_domain(void) return NULL; domain->id = vm_domid++; + domain->nid = -1; memset(&domain->iommu_bmp, 0, sizeof(unsigned long)); domain->flags = DOMAIN_FLAG_VIRTUAL_MACHINE; @@ -3442,9 +3452,10 @@ static int md_domain_init(struct dmar_domain *domain, int guest_width) domain->iommu_coherency = 0; domain->iommu_snooping = 0; domain->max_addr = 0; + domain->nid = -1; /* always allocate the top pgd */ - domain->pgd = (struct dma_pte *)alloc_pgtable_page(); + domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid); if (!domain->pgd) return -ENOMEM; domain_flush_cache(domain, domain->pgd, PAGE_SIZE); -- cgit v1.2.3-70-g09d2 From aa697079ee66315c4b9747a5eb3e48487fb1b8be Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 7 Oct 2009 12:18:00 +0100 Subject: dmar: Fix build failure without NUMA, warn on bogus RHSA tables and don't abort Signed-off-by: David Woodhouse --- drivers/pci/dmar.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index 63aa52973d4..0d064d1e840 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -348,6 +348,7 @@ found: } #endif +#ifdef CONFIG_ACPI_NUMA static int __init dmar_parse_one_rhsa(struct acpi_dmar_header *header) { @@ -355,18 +356,26 @@ dmar_parse_one_rhsa(struct acpi_dmar_header *header) struct dmar_drhd_unit *drhd; rhsa = (struct acpi_dmar_rhsa *)header; - for_each_drhd_unit(drhd) + for_each_drhd_unit(drhd) { if (drhd->reg_base_addr == rhsa->base_address) { int node = acpi_map_pxm_to_node(rhsa->proximity_domain); if (!node_online(node)) node = -1; drhd->iommu->node = node; - return 0; + return 0; + } } + WARN(1, "Your BIOS is broken; RHSA refers to non-existent DMAR unit at %llx\n" + "BIOS vendor: %s; Ver: %s; Product Version: %s\n", + drhd->reg_base_addr, + dmi_get_system_info(DMI_BIOS_VENDOR), + dmi_get_system_info(DMI_BIOS_VERSION), + dmi_get_system_info(DMI_PRODUCT_VERSION)); - return -ENODEV; + return 0; } +#endif static void __init dmar_table_print_dmar_entry(struct acpi_dmar_header *header) @@ -487,7 +496,9 @@ parse_dmar_table(void) #endif break; case ACPI_DMAR_HARDWARE_AFFINITY: +#ifdef CONFIG_ACPI_NUMA ret = dmar_parse_one_rhsa(entry_header); +#endif break; default: printk(KERN_WARNING PREFIX -- cgit v1.2.3-70-g09d2 From 2c99220810c1c79322034628b993573b088ff2da Mon Sep 17 00:00:00 2001 From: Chris Wright Date: Wed, 2 Dec 2009 09:17:13 +0000 Subject: intel-iommu: Detect DMAR in hyperspace at probe time. Many BIOSes will lie to us about the existence of an IOMMU, and claim that there is one at an address which actually returns all 0xFF. We need to detect this early, so that we know we don't have a viable IOMMU and can set up swiotlb before it's too late. Cc: stable@kernel.org Signed-off-by: Chris Wright Signed-off-by: David Woodhouse --- drivers/pci/dmar.c | 34 +++++++++++++++++++++++++++++----- 1 file changed, 29 insertions(+), 5 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index 525a32487ab..56883fc1c7b 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -632,6 +632,9 @@ int __init check_zero_address(void) } if (entry_header->type == ACPI_DMAR_TYPE_HARDWARE_UNIT) { + void __iomem *addr; + u64 cap, ecap; + drhd = (void *)entry_header; if (!drhd->address) { /* Promote an attitude of violence to a BIOS engineer today */ @@ -640,17 +643,38 @@ int __init check_zero_address(void) dmi_get_system_info(DMI_BIOS_VENDOR), dmi_get_system_info(DMI_BIOS_VERSION), dmi_get_system_info(DMI_PRODUCT_VERSION)); -#ifdef CONFIG_DMAR - dmar_disabled = 1; -#endif - return 0; + goto failed; + } + + addr = early_ioremap(drhd->address, VTD_PAGE_SIZE); + if (!addr ) { + printk("IOMMU: can't validate: %llx\n", drhd->address); + goto failed; + } + cap = dmar_readq(addr + DMAR_CAP_REG); + ecap = dmar_readq(addr + DMAR_ECAP_REG); + early_iounmap(addr, VTD_PAGE_SIZE); + if (cap == (uint64_t)-1 && ecap == (uint64_t)-1) { + /* Promote an attitude of violence to a BIOS engineer today */ + WARN(1, "Your BIOS is broken; DMAR reported at address %llx returns all ones!\n" + "BIOS vendor: %s; Ver: %s; Product Version: %s\n", + drhd->address, + dmi_get_system_info(DMI_BIOS_VENDOR), + dmi_get_system_info(DMI_BIOS_VERSION), + dmi_get_system_info(DMI_PRODUCT_VERSION)); + goto failed; } - break; } entry_header = ((void *)entry_header + entry_header->length); } return 1; + +failed: +#ifdef CONFIG_DMAR + dmar_disabled = 1; +#endif + return 0; } void __init detect_intel_iommu(void) -- cgit v1.2.3-70-g09d2 From 6ecbf01c7ce4c0f4c3bdfa0e64ac6258328fda6c Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 2 Dec 2009 09:20:27 +0000 Subject: intel-iommu: Apply BIOS sanity checks for interrupt remapping too. The BIOS errors where an IOMMU is reported either at zero or a bogus address are causing problems even when the IOMMU is disabled -- because interrupt remapping uses the same hardware. Ensure that the checks get applied for the interrupt remapping initialisation too. Cc: stable@kernel.org Signed-off-by: David Woodhouse --- drivers/pci/dmar.c | 33 ++++++++++++++++++++++++++------- 1 file changed, 26 insertions(+), 7 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index 56883fc1c7b..beeaef84e15 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -613,6 +613,8 @@ int __init dmar_table_init(void) return 0; } +static int bios_warned; + int __init check_zero_address(void) { struct acpi_table_dmar *dmar; @@ -643,6 +645,7 @@ int __init check_zero_address(void) dmi_get_system_info(DMI_BIOS_VENDOR), dmi_get_system_info(DMI_BIOS_VERSION), dmi_get_system_info(DMI_PRODUCT_VERSION)); + bios_warned = 1; goto failed; } @@ -662,6 +665,7 @@ int __init check_zero_address(void) dmi_get_system_info(DMI_BIOS_VENDOR), dmi_get_system_info(DMI_BIOS_VERSION), dmi_get_system_info(DMI_PRODUCT_VERSION)); + bios_warned = 1; goto failed; } } @@ -722,6 +726,18 @@ int alloc_iommu(struct dmar_drhd_unit *drhd) int agaw = 0; int msagaw = 0; + if (!drhd->reg_base_addr) { + if (!bios_warned) { + WARN(1, "Your BIOS is broken; DMAR reported at address zero!\n" + "BIOS vendor: %s; Ver: %s; Product Version: %s\n", + dmi_get_system_info(DMI_BIOS_VENDOR), + dmi_get_system_info(DMI_BIOS_VERSION), + dmi_get_system_info(DMI_PRODUCT_VERSION)); + bios_warned = 1; + } + return -EINVAL; + } + iommu = kzalloc(sizeof(*iommu), GFP_KERNEL); if (!iommu) return -ENOMEM; @@ -738,13 +754,16 @@ int alloc_iommu(struct dmar_drhd_unit *drhd) iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG); if (iommu->cap == (uint64_t)-1 && iommu->ecap == (uint64_t)-1) { - /* Promote an attitude of violence to a BIOS engineer today */ - WARN(1, "Your BIOS is broken; DMAR reported at address %llx returns all ones!\n" - "BIOS vendor: %s; Ver: %s; Product Version: %s\n", - drhd->reg_base_addr, - dmi_get_system_info(DMI_BIOS_VENDOR), - dmi_get_system_info(DMI_BIOS_VERSION), - dmi_get_system_info(DMI_PRODUCT_VERSION)); + if (!bios_warned) { + /* Promote an attitude of violence to a BIOS engineer today */ + WARN(1, "Your BIOS is broken; DMAR reported at address %llx returns all ones!\n" + "BIOS vendor: %s; Ver: %s; Product Version: %s\n", + drhd->reg_base_addr, + dmi_get_system_info(DMI_BIOS_VENDOR), + dmi_get_system_info(DMI_BIOS_VERSION), + dmi_get_system_info(DMI_PRODUCT_VERSION)); + bios_warned = 1; + } goto err_unmap; } -- cgit v1.2.3-70-g09d2 From 5595b528b49a702c0428c0762bab60999648254c Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 2 Dec 2009 09:21:55 +0000 Subject: intel-iommu: Check for an RMRR which ends before it starts. Some HP BIOSes report an RMRR region (a region which needs a 1:1 mapping in the IOMMU for a given device) which has an end address lower than its start address. Detect that and warn, rather than triggering the BUG() in dma_pte_clear_range(). Cc: stable@kernel.org Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'drivers/pci') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index cb5cae3e020..83cabdc118d 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -2000,6 +2000,16 @@ static int iommu_prepare_identity_map(struct pci_dev *pdev, "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n", pci_name(pdev), start, end); + if (end < start) { + WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n" + "BIOS vendor: %s; Ver: %s; Product Version: %s\n", + dmi_get_system_info(DMI_BIOS_VENDOR), + dmi_get_system_info(DMI_BIOS_VERSION), + dmi_get_system_info(DMI_PRODUCT_VERSION)); + ret = -EIO; + goto error; + } + if (end >> agaw_to_width(domain->agaw)) { WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n" "BIOS vendor: %s; Ver: %s; Product Version: %s\n", -- cgit v1.2.3-70-g09d2 From 44cd613c0e4cd93079ea2a93aa06649d8ca0830a Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 2 Dec 2009 10:18:30 +0000 Subject: intel-iommu: Fix oops with intel_iommu=igfx_off The hotplug notifier will call find_domain() to see if the device in question has been assigned an IOMMU domain. However, this should never be called for devices with a "dummy" domain, such as graphics devices when intel_iommu=igfx_off is set and the corresponding IOMMU isn't even initialised. If you do that, it'll oops as it dereferences the (-1) pointer. The notifier function should check iommu_no_mapping() for the device before doing anything else. Cc: stable@kernel.org Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/pci') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 83cabdc118d..e3e84cac0e9 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -3247,6 +3247,9 @@ static int device_notifier(struct notifier_block *nb, struct pci_dev *pdev = to_pci_dev(dev); struct dmar_domain *domain; + if (iommu_no_mapping(dev)) + return 0; + domain = find_domain(pdev); if (!domain) return 0; -- cgit v1.2.3-70-g09d2 From 1672af1164d3d50ba8908014fd34cc0b58afdc1e Mon Sep 17 00:00:00 2001 From: Chris Wright Date: Wed, 2 Dec 2009 12:06:34 -0800 Subject: intel-iommu: ignore page table validation in pass through mode We are seeing a bug when booting w/ iommu=pt with current upstream (bisect blames 19943b0e30b05d42e494ae6fef78156ebc8c637e "intel-iommu: Unify hardware and software passthrough support). The issue is specific to this loop during identity map initialization of each device: domain_context_mapping_one(si_domain, ..., CONTEXT_TT_PASS_THROUGH) ... /* Skip top levels of page tables for * iommu which has less agaw than default. */ for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) { pgd = phys_to_virt(dma_pte_addr(pgd)); if (!dma_pte_present(pgd)) { <------ failing here spin_unlock_irqrestore(&iommu->lock, flags); return -ENOMEM; } This box has 2 iommu's in it. The catchall iommu has MGAW == 48, and SAGAW == 4. The other iommu has MGAW == 39, SAGAW == 2. The device that's failing the above pgd test is the only device connected to the non-catchall iommu, which has a smaller address width than the domain default. This test is not necessary since the context is in PT mode and the ASR is ignored. Thanks to Don Dutile for discovering and debugging this one. Cc: stable@kernel.org Signed-off-by: Chris Wright Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index e3e84cac0e9..46607953533 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -1530,12 +1530,15 @@ static int domain_context_mapping_one(struct dmar_domain *domain, int segment, /* Skip top levels of page tables for * iommu which has less agaw than default. + * Unnecessary for PT mode. */ - for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) { - pgd = phys_to_virt(dma_pte_addr(pgd)); - if (!dma_pte_present(pgd)) { - spin_unlock_irqrestore(&iommu->lock, flags); - return -ENOMEM; + if (translation != CONTEXT_TT_PASS_THROUGH) { + for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) { + pgd = phys_to_virt(dma_pte_addr(pgd)); + if (!dma_pte_present(pgd)) { + spin_unlock_irqrestore(&iommu->lock, flags); + return -ENOMEM; + } } } } -- cgit v1.2.3-70-g09d2 From 354bb65e6e0df0aaae0e5b1ea33948d8e0b61418 Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Tue, 17 Nov 2009 16:21:09 +0900 Subject: Revert "Intel IOMMU: Avoid memory allocation failures in dma map api calls" commit eb3fa7cb51 said Intel IOMMU Intel IOMMU driver needs memory during DMA map calls to setup its internal page tables and for other data structures. As we all know that these DMA map calls are mostly called in the interrupt context or with the spinlock held by the upper level drivers(network/storage drivers), so in order to avoid any memory allocation failure due to low memory issues, this patch makes memory allocation by temporarily setting PF_MEMALLOC flags for the current task before making memory allocation calls. We evaluated mempools as a backup when kmem_cache_alloc() fails and found that mempools are really not useful here because 1) We don't know for sure how much to reserve in advance 2) And mempools are not useful for GFP_ATOMIC case (as we call memory alloc functions with GFP_ATOMIC) (akpm: point 2 is wrong...) The above description doesn't justify to waste system emergency memory at all. Non MM subsystem must not use PF_MEMALLOC. Memory reclaim need few memory, anyone must not prevent it. Otherwise the system cause mysterious hang-up and/or OOM Killer invokation. Plus, akpm already pointed out what we should do. Then, this patch revert it. Cc: Keshavamurthy Anil S Signed-off-by: KOSAKI Motohiro Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 25 +++---------------------- 1 file changed, 3 insertions(+), 22 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 46607953533..4e1dd40f18e 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -387,33 +387,14 @@ static struct kmem_cache *iommu_domain_cache; static struct kmem_cache *iommu_devinfo_cache; static struct kmem_cache *iommu_iova_cache; -static inline void *iommu_kmem_cache_alloc(struct kmem_cache *cachep) -{ - unsigned int flags; - void *vaddr; - - /* trying to avoid low memory issues */ - flags = current->flags & PF_MEMALLOC; - current->flags |= PF_MEMALLOC; - vaddr = kmem_cache_alloc(cachep, GFP_ATOMIC); - current->flags &= (~PF_MEMALLOC | flags); - return vaddr; -} - - static inline void *alloc_pgtable_page(int node) { - unsigned int flags; struct page *page; void *vaddr = NULL; - /* trying to avoid low memory issues */ - flags = current->flags & PF_MEMALLOC; - current->flags |= PF_MEMALLOC; page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0); if (page) vaddr = page_address(page); - current->flags &= (~PF_MEMALLOC | flags); return vaddr; } @@ -424,7 +405,7 @@ static inline void free_pgtable_page(void *vaddr) static inline void *alloc_domain_mem(void) { - return iommu_kmem_cache_alloc(iommu_domain_cache); + return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC); } static void free_domain_mem(void *vaddr) @@ -434,7 +415,7 @@ static void free_domain_mem(void *vaddr) static inline void * alloc_devinfo_mem(void) { - return iommu_kmem_cache_alloc(iommu_devinfo_cache); + return kmem_cache_alloc(iommu_devinfo_cache, GFP_ATOMIC); } static inline void free_devinfo_mem(void *vaddr) @@ -444,7 +425,7 @@ static inline void free_devinfo_mem(void *vaddr) struct iova *alloc_iova_mem(void) { - return iommu_kmem_cache_alloc(iommu_iova_cache); + return kmem_cache_alloc(iommu_iova_cache, GFP_ATOMIC); } void free_iova_mem(struct iova *iova) -- cgit v1.2.3-70-g09d2