From 937582382c71b75b29fbb92615629494e1a05ac0 Mon Sep 17 00:00:00 2001 From: Weidong Han Date: Fri, 17 Apr 2009 16:42:14 +0800 Subject: x86, intr-remap: enable interrupt remapping early Currently, when x2apic is not enabled, interrupt remapping will be enabled in init_dmars(), where it is too late to remap ioapic interrupts, that is, ioapic interrupts are really in compatibility mode, not remappable mode. This patch always enables interrupt remapping before ioapic setup, it guarantees all interrupts will be remapped when interrupt remapping is enabled. Thus it doesn't need to set the compatibility interrupt bit. [ Impact: refactor intr-remap init sequence, enable fuller remap mode ] Signed-off-by: Suresh Siddha Signed-off-by: Weidong Han Acked-by: David Woodhouse Cc: iommu@lists.linux-foundation.org Cc: allen.m.kay@intel.com Cc: fenghua.yu@intel.com LKML-Reference: <1239957736-6161-4-git-send-email-weidong.han@intel.com> Signed-off-by: Ingo Molnar --- drivers/pci/intel-iommu.c | 9 --------- 1 file changed, 9 deletions(-) (limited to 'drivers/pci/intel-iommu.c') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 001b328adf8..9ce8f0764be 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -1968,15 +1968,6 @@ static int __init init_dmars(void) } } -#ifdef CONFIG_INTR_REMAP - if (!intr_remapping_enabled) { - ret = enable_intr_remapping(0); - if (ret) - printk(KERN_ERR - "IOMMU: enable interrupt remapping failed\n"); - } -#endif - /* * For each rmrr * for each dev attached to rmrr -- cgit v1.2.3-70-g09d2 From 4ed0d3e6c64cfd9ba4ceb2099b10d1cf8ece4320 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Fri, 24 Apr 2009 17:30:20 -0700 Subject: Intel IOMMU Pass Through Support The patch adds kernel parameter intel_iommu=pt to set up pass through mode in context mapping entry. This disables DMAR in linux kernel; but KVM still runs on VT-d and interrupt remapping still works. In this mode, kernel uses swiotlb for DMA API functions but other VT-d functionalities are enabled for KVM. KVM always uses multi level translation page table in VT-d. By default, pass though mode is disabled in kernel. This is useful when people don't want to enable VT-d DMAR in kernel but still want to use KVM and interrupt remapping for reasons like DMAR performance concern or debug purpose. Signed-off-by: Fenghua Yu Acked-by: Weidong Han Signed-off-by: David Woodhouse --- Documentation/kernel-parameters.txt | 1 + arch/ia64/include/asm/iommu.h | 1 + arch/ia64/kernel/pci-swiotlb.c | 2 +- arch/x86/include/asm/iommu.h | 1 + arch/x86/kernel/pci-dma.c | 6 ++ arch/x86/kernel/pci-swiotlb.c | 3 +- drivers/pci/dmar.c | 11 ++- drivers/pci/intel-iommu.c | 180 ++++++++++++++++++++++++++---------- include/linux/dma_remapping.h | 8 ++ include/linux/intel-iommu.h | 2 + 10 files changed, 165 insertions(+), 50 deletions(-) (limited to 'drivers/pci/intel-iommu.c') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 600cdd72900..fa4faeb7597 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -965,6 +965,7 @@ and is between 256 and 4096 characters. It is defined in the file nomerge forcesac soft + pt [x86, IA64] io7= [HW] IO7 for Marvel based alpha systems See comment before marvel_specify_io7 in diff --git a/arch/ia64/include/asm/iommu.h b/arch/ia64/include/asm/iommu.h index 0490794fe4a..37d41ca5645 100644 --- a/arch/ia64/include/asm/iommu.h +++ b/arch/ia64/include/asm/iommu.h @@ -9,6 +9,7 @@ extern void pci_iommu_shutdown(void); extern void no_iommu_init(void); extern int force_iommu, no_iommu; extern int iommu_detected; +extern int iommu_pass_through; extern void iommu_dma_init(void); extern void machvec_init(const char *name); diff --git a/arch/ia64/kernel/pci-swiotlb.c b/arch/ia64/kernel/pci-swiotlb.c index 285aae8431c..223abb13410 100644 --- a/arch/ia64/kernel/pci-swiotlb.c +++ b/arch/ia64/kernel/pci-swiotlb.c @@ -46,7 +46,7 @@ void __init swiotlb_dma_init(void) void __init pci_swiotlb_init(void) { - if (!iommu_detected) { + if (!iommu_detected || iommu_pass_through) { #ifdef CONFIG_IA64_GENERIC swiotlb = 1; printk(KERN_INFO "PCI-DMA: Re-initialize machine vector.\n"); diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h index af326a2975b..fd6d21bbee6 100644 --- a/arch/x86/include/asm/iommu.h +++ b/arch/x86/include/asm/iommu.h @@ -6,6 +6,7 @@ extern void no_iommu_init(void); extern struct dma_map_ops nommu_dma_ops; extern int force_iommu, no_iommu; extern int iommu_detected; +extern int iommu_pass_through; /* 10 seconds */ #define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000) diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 745579bc825..8cad0d85424 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -160,6 +160,8 @@ again: return page_address(page); } +extern int iommu_pass_through; + /* * See for the iommu kernel parameter * documentation. @@ -209,6 +211,10 @@ static __init int iommu_setup(char *p) #ifdef CONFIG_SWIOTLB if (!strncmp(p, "soft", 4)) swiotlb = 1; + if (!strncmp(p, "pt", 2)) { + iommu_pass_through = 1; + return 1; + } #endif gart_parse_options(p); diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c index 221a3853e26..3a0c51e0ba6 100644 --- a/arch/x86/kernel/pci-swiotlb.c +++ b/arch/x86/kernel/pci-swiotlb.c @@ -71,7 +71,8 @@ void __init pci_swiotlb_init(void) { /* don't initialize swiotlb if iommu=off (no_iommu=1) */ #ifdef CONFIG_X86_64 - if (!iommu_detected && !no_iommu && max_pfn > MAX_DMA32_PFN) + if ((!iommu_detected && !no_iommu && max_pfn > MAX_DMA32_PFN) || + iommu_pass_through) swiotlb = 1; #endif if (swiotlb_force) diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index fa3a11365ec..d3d86b749ee 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -515,6 +515,7 @@ int alloc_iommu(struct dmar_drhd_unit *drhd) u32 ver; static int iommu_allocated = 0; int agaw = 0; + int msagaw = 0; iommu = kzalloc(sizeof(*iommu), GFP_KERNEL); if (!iommu) @@ -535,12 +536,20 @@ int alloc_iommu(struct dmar_drhd_unit *drhd) agaw = iommu_calculate_agaw(iommu); if (agaw < 0) { printk(KERN_ERR - "Cannot get a valid agaw for iommu (seq_id = %d)\n", + "Cannot get a valid agaw for iommu (seq_id = %d)\n", + iommu->seq_id); + goto error; + } + msagaw = iommu_calculate_max_sagaw(iommu); + if (msagaw < 0) { + printk(KERN_ERR + "Cannot get a valid max agaw for iommu (seq_id = %d)\n", iommu->seq_id); goto error; } #endif iommu->agaw = agaw; + iommu->msagaw = msagaw; /* the registers might be more than one page */ map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap), diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 001b328adf8..13121821db7 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -53,6 +53,8 @@ #define DEFAULT_DOMAIN_ADDRESS_WIDTH 48 +#define MAX_AGAW_WIDTH 64 + #define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1) #define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT) @@ -127,8 +129,6 @@ static inline void context_set_fault_enable(struct context_entry *context) context->lo &= (((u64)-1) << 2) | 1; } -#define CONTEXT_TT_MULTI_LEVEL 0 - static inline void context_set_translation_type(struct context_entry *context, unsigned long value) { @@ -288,6 +288,7 @@ int dmar_disabled = 1; static int __initdata dmar_map_gfx = 1; static int dmar_forcedac; static int intel_iommu_strict; +int iommu_pass_through; #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1)) static DEFINE_SPINLOCK(device_domain_lock); @@ -397,17 +398,13 @@ void free_iova_mem(struct iova *iova) static inline int width_to_agaw(int width); -/* calculate agaw for each iommu. - * "SAGAW" may be different across iommus, use a default agaw, and - * get a supported less agaw for iommus that don't support the default agaw. - */ -int iommu_calculate_agaw(struct intel_iommu *iommu) +static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw) { unsigned long sagaw; int agaw = -1; sagaw = cap_sagaw(iommu->cap); - for (agaw = width_to_agaw(DEFAULT_DOMAIN_ADDRESS_WIDTH); + for (agaw = width_to_agaw(max_gaw); agaw >= 0; agaw--) { if (test_bit(agaw, &sagaw)) break; @@ -416,6 +413,24 @@ int iommu_calculate_agaw(struct intel_iommu *iommu) return agaw; } +/* + * Calculate max SAGAW for each iommu. + */ +int iommu_calculate_max_sagaw(struct intel_iommu *iommu) +{ + return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH); +} + +/* + * calculate agaw for each iommu. + * "SAGAW" may be different across iommus, use a default agaw, and + * get a supported less agaw for iommus that don't support the default agaw. + */ +int iommu_calculate_agaw(struct intel_iommu *iommu) +{ + return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH); +} + /* in native case, each domain is related to only one iommu */ static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain) { @@ -1321,8 +1336,8 @@ static void domain_exit(struct dmar_domain *domain) free_domain_mem(domain); } -static int domain_context_mapping_one(struct dmar_domain *domain, - int segment, u8 bus, u8 devfn) +static int domain_context_mapping_one(struct dmar_domain *domain, int segment, + u8 bus, u8 devfn, int translation) { struct context_entry *context; unsigned long flags; @@ -1335,7 +1350,10 @@ static int domain_context_mapping_one(struct dmar_domain *domain, pr_debug("Set context mapping for %02x:%02x.%d\n", bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); + BUG_ON(!domain->pgd); + BUG_ON(translation != CONTEXT_TT_PASS_THROUGH && + translation != CONTEXT_TT_MULTI_LEVEL); iommu = device_to_iommu(segment, bus, devfn); if (!iommu) @@ -1395,9 +1413,18 @@ static int domain_context_mapping_one(struct dmar_domain *domain, } context_set_domain_id(context, id); - context_set_address_width(context, iommu->agaw); - context_set_address_root(context, virt_to_phys(pgd)); - context_set_translation_type(context, CONTEXT_TT_MULTI_LEVEL); + + /* + * In pass through mode, AW must be programmed to indicate the largest + * AGAW value supported by hardware. And ASR is ignored by hardware. + */ + if (likely(translation == CONTEXT_TT_MULTI_LEVEL)) { + context_set_address_width(context, iommu->agaw); + context_set_address_root(context, virt_to_phys(pgd)); + } else + context_set_address_width(context, iommu->msagaw); + + context_set_translation_type(context, translation); context_set_fault_enable(context); context_set_present(context); domain_flush_cache(domain, context, sizeof(*context)); @@ -1422,13 +1449,15 @@ static int domain_context_mapping_one(struct dmar_domain *domain, } static int -domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev) +domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev, + int translation) { int ret; struct pci_dev *tmp, *parent; ret = domain_context_mapping_one(domain, pci_domain_nr(pdev->bus), - pdev->bus->number, pdev->devfn); + pdev->bus->number, pdev->devfn, + translation); if (ret) return ret; @@ -1442,7 +1471,7 @@ domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev) ret = domain_context_mapping_one(domain, pci_domain_nr(parent->bus), parent->bus->number, - parent->devfn); + parent->devfn, translation); if (ret) return ret; parent = parent->bus->self; @@ -1450,12 +1479,14 @@ domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev) if (tmp->is_pcie) /* this is a PCIE-to-PCI bridge */ return domain_context_mapping_one(domain, pci_domain_nr(tmp->subordinate), - tmp->subordinate->number, 0); + tmp->subordinate->number, 0, + translation); else /* this is a legacy PCI bridge */ return domain_context_mapping_one(domain, pci_domain_nr(tmp->bus), tmp->bus->number, - tmp->devfn); + tmp->devfn, + translation); } static int domain_context_mapped(struct pci_dev *pdev) @@ -1752,7 +1783,7 @@ static int iommu_prepare_identity_map(struct pci_dev *pdev, goto error; /* context entry init */ - ret = domain_context_mapping(domain, pdev); + ret = domain_context_mapping(domain, pdev, CONTEXT_TT_MULTI_LEVEL); if (!ret) return 0; error: @@ -1853,6 +1884,23 @@ static inline void iommu_prepare_isa(void) } #endif /* !CONFIG_DMAR_FLPY_WA */ +/* Initialize each context entry as pass through.*/ +static int __init init_context_pass_through(void) +{ + struct pci_dev *pdev = NULL; + struct dmar_domain *domain; + int ret; + + for_each_pci_dev(pdev) { + domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH); + ret = domain_context_mapping(domain, pdev, + CONTEXT_TT_PASS_THROUGH); + if (ret) + return ret; + } + return 0; +} + static int __init init_dmars(void) { struct dmar_drhd_unit *drhd; @@ -1860,6 +1908,7 @@ static int __init init_dmars(void) struct pci_dev *pdev; struct intel_iommu *iommu; int i, ret; + int pass_through = 1; /* * for each drhd @@ -1913,7 +1962,15 @@ static int __init init_dmars(void) printk(KERN_ERR "IOMMU: allocate root entry failed\n"); goto error; } + if (!ecap_pass_through(iommu->ecap)) + pass_through = 0; } + if (iommu_pass_through) + if (!pass_through) { + printk(KERN_INFO + "Pass Through is not supported by hardware.\n"); + iommu_pass_through = 0; + } /* * Start from the sane iommu hardware state. @@ -1976,37 +2033,57 @@ static int __init init_dmars(void) "IOMMU: enable interrupt remapping failed\n"); } #endif + /* + * If pass through is set and enabled, context entries of all pci + * devices are intialized by pass through translation type. + */ + if (iommu_pass_through) { + ret = init_context_pass_through(); + if (ret) { + printk(KERN_ERR "IOMMU: Pass through init failed.\n"); + iommu_pass_through = 0; + } + } /* - * For each rmrr - * for each dev attached to rmrr - * do - * locate drhd for dev, alloc domain for dev - * allocate free domain - * allocate page table entries for rmrr - * if context not allocated for bus - * allocate and init context - * set present in root table for this bus - * init context with domain, translation etc - * endfor - * endfor + * If pass through is not set or not enabled, setup context entries for + * identity mappings for rmrr, gfx, and isa. */ - for_each_rmrr_units(rmrr) { - for (i = 0; i < rmrr->devices_cnt; i++) { - pdev = rmrr->devices[i]; - /* some BIOS lists non-exist devices in DMAR table */ - if (!pdev) - continue; - ret = iommu_prepare_rmrr_dev(rmrr, pdev); - if (ret) - printk(KERN_ERR + if (!iommu_pass_through) { + /* + * For each rmrr + * for each dev attached to rmrr + * do + * locate drhd for dev, alloc domain for dev + * allocate free domain + * allocate page table entries for rmrr + * if context not allocated for bus + * allocate and init context + * set present in root table for this bus + * init context with domain, translation etc + * endfor + * endfor + */ + for_each_rmrr_units(rmrr) { + for (i = 0; i < rmrr->devices_cnt; i++) { + pdev = rmrr->devices[i]; + /* + * some BIOS lists non-exist devices in DMAR + * table. + */ + if (!pdev) + continue; + ret = iommu_prepare_rmrr_dev(rmrr, pdev); + if (ret) + printk(KERN_ERR "IOMMU: mapping reserved region failed\n"); + } } - } - iommu_prepare_gfx_mapping(); + iommu_prepare_gfx_mapping(); - iommu_prepare_isa(); + iommu_prepare_isa(); + } /* * for each drhd @@ -2117,7 +2194,8 @@ get_valid_domain_for_dev(struct pci_dev *pdev) /* make sure context mapping is ok */ if (unlikely(!domain_context_mapped(pdev))) { - ret = domain_context_mapping(domain, pdev); + ret = domain_context_mapping(domain, pdev, + CONTEXT_TT_MULTI_LEVEL); if (ret) { printk(KERN_ERR "Domain context map for %s failed", @@ -2786,7 +2864,7 @@ int __init intel_iommu_init(void) * Check the need for DMA-remapping initialization now. * Above initialization will also be used by Interrupt-remapping. */ - if (no_iommu || swiotlb || dmar_disabled) + if (no_iommu || (swiotlb && !iommu_pass_through) || dmar_disabled) return -ENODEV; iommu_init_mempool(); @@ -2806,7 +2884,15 @@ int __init intel_iommu_init(void) init_timer(&unmap_timer); force_iommu = 1; - dma_ops = &intel_dma_ops; + + if (!iommu_pass_through) { + printk(KERN_INFO + "Multi-level page-table translation for DMAR.\n"); + dma_ops = &intel_dma_ops; + } else + printk(KERN_INFO + "DMAR: Pass through translation for DMAR.\n"); + init_iommu_sysfs(); register_iommu(&intel_iommu_ops); @@ -3146,7 +3232,7 @@ static int intel_iommu_attach_device(struct iommu_domain *domain, return -EFAULT; } - ret = domain_context_mapping(dmar_domain, pdev); + ret = domain_context_mapping(dmar_domain, pdev, CONTEXT_TT_MULTI_LEVEL); if (ret) return ret; diff --git a/include/linux/dma_remapping.h b/include/linux/dma_remapping.h index 1a455f1f86d..e0a03aff63d 100644 --- a/include/linux/dma_remapping.h +++ b/include/linux/dma_remapping.h @@ -13,6 +13,9 @@ #define DMA_PTE_WRITE (2) #define DMA_PTE_SNP (1 << 11) +#define CONTEXT_TT_MULTI_LEVEL 0 +#define CONTEXT_TT_PASS_THROUGH 2 + struct intel_iommu; struct dmar_domain; struct root_entry; @@ -21,11 +24,16 @@ extern void free_dmar_iommu(struct intel_iommu *iommu); #ifdef CONFIG_DMAR extern int iommu_calculate_agaw(struct intel_iommu *iommu); +extern int iommu_calculate_max_sagaw(struct intel_iommu *iommu); #else static inline int iommu_calculate_agaw(struct intel_iommu *iommu) { return 0; } +static inline int iommu_calculate_max_sagaw(struct intel_iommu *iommu) +{ + return 0; +} #endif extern int dmar_disabled; diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index aa8c5317123..7246971a7fe 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -120,6 +120,7 @@ static inline void dmar_writeq(void __iomem *addr, u64 val) (ecap_iotlb_offset(e) + ecap_niotlb_iunits(e) * 16) #define ecap_coherent(e) ((e) & 0x1) #define ecap_qis(e) ((e) & 0x2) +#define ecap_pass_through(e) ((e >> 6) & 0x1) #define ecap_eim_support(e) ((e >> 4) & 0x1) #define ecap_ir_support(e) ((e >> 3) & 0x1) #define ecap_max_handle_mask(e) ((e >> 20) & 0xf) @@ -302,6 +303,7 @@ struct intel_iommu { spinlock_t register_lock; /* protect register handling */ int seq_id; /* sequence id of the iommu */ int agaw; /* agaw of this iommu */ + int msagaw; /* max sagaw of this iommu */ unsigned int irq; unsigned char name[13]; /* Device Name */ -- cgit v1.2.3-70-g09d2 From aed5d5f4c5ea5da01a774e42cff08c4b4fa59072 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Thu, 30 Apr 2009 17:57:11 -0700 Subject: Fix !CONFIG_DMAR build failure introduced by Intel IOMMU Pass Through Support This updated patch should fix the compiling errors and remove the extern iommu_pass_through from drivers/pci/intel-iommu.c file. Signed-off-by: Fenghua Yu Signed-off-by: David Woodhouse --- arch/ia64/kernel/pci-dma.c | 2 ++ arch/x86/kernel/pci-dma.c | 4 ++-- drivers/pci/intel-iommu.c | 1 - 3 files changed, 4 insertions(+), 3 deletions(-) (limited to 'drivers/pci/intel-iommu.c') diff --git a/arch/ia64/kernel/pci-dma.c b/arch/ia64/kernel/pci-dma.c index eb987386f69..ecdde25d0d1 100644 --- a/arch/ia64/kernel/pci-dma.c +++ b/arch/ia64/kernel/pci-dma.c @@ -32,6 +32,8 @@ int force_iommu __read_mostly = 1; int force_iommu __read_mostly; #endif +int iommu_pass_through; + /* Dummy device used for NULL arguments (normally ISA). Better would be probably a smaller DMA mask, but this is bug-to-bug compatible to i386. */ diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 8cad0d85424..049005e8217 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -32,6 +32,8 @@ int no_iommu __read_mostly; /* Set this to 1 if there is a HW IOMMU in the system */ int iommu_detected __read_mostly = 0; +int iommu_pass_through; + dma_addr_t bad_dma_address __read_mostly = 0; EXPORT_SYMBOL(bad_dma_address); @@ -160,8 +162,6 @@ again: return page_address(page); } -extern int iommu_pass_through; - /* * See for the iommu kernel parameter * documentation. diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 13121821db7..d3edd6aa82c 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -288,7 +288,6 @@ int dmar_disabled = 1; static int __initdata dmar_map_gfx = 1; static int dmar_forcedac; static int intel_iommu_strict; -int iommu_pass_through; #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1)) static DEFINE_SPINLOCK(device_domain_lock); -- cgit v1.2.3-70-g09d2 From fa3b6dcd5298db2e7b63c17795c9e5570d3df8d9 Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Fri, 8 May 2009 10:33:38 +0800 Subject: VT-d: fix invalid domain id for KVM context flush The domain->id is a sequence number associated with the KVM guest and should not be used for the context flush. This patch replaces the domain->id with a proper id value for both bare metal and KVM. Signed-off-by: Yu Zhao Acked-by: Weidong Han Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/pci/intel-iommu.c') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index d3edd6aa82c..d6f4ee50924 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -1429,7 +1429,7 @@ static int domain_context_mapping_one(struct dmar_domain *domain, int segment, domain_flush_cache(domain, context, sizeof(*context)); /* it's a non-present to present mapping */ - if (iommu->flush.flush_context(iommu, domain->id, + if (iommu->flush.flush_context(iommu, id, (((u16)bus) << 8) | devfn, DMA_CCMD_MASK_NOBIT, DMA_CCMD_DEVICE_INVL, 1)) iommu_flush_write_buffer(iommu); -- cgit v1.2.3-70-g09d2 From 4c25a2c1b90bf785fc2e2f0f0c74a80b3e070d39 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sun, 10 May 2009 17:16:06 +0100 Subject: intel-iommu: Clean up handling of "caching mode" vs. context flushing. It really doesn't make a lot of sense to have some of the logic to handle caching vs. non-caching mode duplicated in qi_flush_context() and __iommu_flush_context(), while the return value indicates whether the caller should take other action which depends on the same thing. Especially since qi_flush_context() thought it was returning something entirely different anyway. This patch makes qi_flush_context() and __iommu_flush_context() both return void, removes the 'non_present_entry_flush' argument and makes the only call site which _set_ that argument to 1 do the right thing. Signed-off-by: David Woodhouse --- drivers/pci/dmar.c | 13 +++--------- drivers/pci/intel-iommu.c | 52 ++++++++++++++++++--------------------------- include/linux/intel-iommu.h | 8 +++---- 3 files changed, 28 insertions(+), 45 deletions(-) (limited to 'drivers/pci/intel-iommu.c') diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index d3d86b749ee..10a071ba323 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -723,23 +723,16 @@ void qi_global_iec(struct intel_iommu *iommu) qi_submit_sync(&desc, iommu); } -int qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm, - u64 type, int non_present_entry_flush) +void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm, + u64 type) { struct qi_desc desc; - if (non_present_entry_flush) { - if (!cap_caching_mode(iommu->cap)) - return 1; - else - did = 0; - } - desc.low = QI_CC_FM(fm) | QI_CC_SID(sid) | QI_CC_DID(did) | QI_CC_GRAN(type) | QI_CC_TYPE; desc.high = 0; - return qi_submit_sync(&desc, iommu); + qi_submit_sync(&desc, iommu); } int qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr, diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index d6f4ee50924..9f5d9151edc 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -857,26 +857,13 @@ static void iommu_flush_write_buffer(struct intel_iommu *iommu) } /* return value determine if we need a write buffer flush */ -static int __iommu_flush_context(struct intel_iommu *iommu, - u16 did, u16 source_id, u8 function_mask, u64 type, - int non_present_entry_flush) +static void __iommu_flush_context(struct intel_iommu *iommu, + u16 did, u16 source_id, u8 function_mask, + u64 type) { u64 val = 0; unsigned long flag; - /* - * In the non-present entry flush case, if hardware doesn't cache - * non-present entry we do nothing and if hardware cache non-present - * entry, we flush entries of domain 0 (the domain id is used to cache - * any non-present entries) - */ - if (non_present_entry_flush) { - if (!cap_caching_mode(iommu->cap)) - return 1; - else - did = 0; - } - switch (type) { case DMA_CCMD_GLOBAL_INVL: val = DMA_CCMD_GLOBAL_INVL; @@ -901,9 +888,6 @@ static int __iommu_flush_context(struct intel_iommu *iommu, dmar_readq, (!(val & DMA_CCMD_ICC)), val); spin_unlock_irqrestore(&iommu->register_lock, flag); - - /* flush context entry will implicitly flush write buffer */ - return 0; } /* return value determine if we need a write buffer flush */ @@ -1428,14 +1412,21 @@ static int domain_context_mapping_one(struct dmar_domain *domain, int segment, context_set_present(context); domain_flush_cache(domain, context, sizeof(*context)); - /* it's a non-present to present mapping */ - if (iommu->flush.flush_context(iommu, id, - (((u16)bus) << 8) | devfn, DMA_CCMD_MASK_NOBIT, - DMA_CCMD_DEVICE_INVL, 1)) - iommu_flush_write_buffer(iommu); - else + /* + * It's a non-present to present mapping. If hardware doesn't cache + * non-present entry we only need to flush the write-buffer. If the + * _does_ cache non-present entries, then it does so in the special + * domain #0, which we have to flush: + */ + if (cap_caching_mode(iommu->cap)) { + iommu->flush.flush_context(iommu, 0, + (((u16)bus) << 8) | devfn, + DMA_CCMD_MASK_NOBIT, + DMA_CCMD_DEVICE_INVL); iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_DSI_FLUSH, 0); - + } else { + iommu_flush_write_buffer(iommu); + } spin_unlock_irqrestore(&iommu->lock, flags); spin_lock_irqsave(&domain->iommu_lock, flags); @@ -1566,7 +1557,7 @@ static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn) clear_context_table(iommu, bus, devfn); iommu->flush.flush_context(iommu, 0, 0, 0, - DMA_CCMD_GLOBAL_INVL, 0); + DMA_CCMD_GLOBAL_INVL); iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH, 0); } @@ -2104,8 +2095,7 @@ static int __init init_dmars(void) iommu_set_root_entry(iommu); - iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL, - 0); + iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL); iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH, 0); iommu_disable_protect_mem_regions(iommu); @@ -2721,7 +2711,7 @@ static int init_iommu_hw(void) iommu_set_root_entry(iommu); iommu->flush.flush_context(iommu, 0, 0, 0, - DMA_CCMD_GLOBAL_INVL, 0); + DMA_CCMD_GLOBAL_INVL); iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH, 0); iommu_disable_protect_mem_regions(iommu); @@ -2738,7 +2728,7 @@ static void iommu_flush_all(void) for_each_active_iommu(iommu, drhd) { iommu->flush.flush_context(iommu, 0, 0, 0, - DMA_CCMD_GLOBAL_INVL, 0); + DMA_CCMD_GLOBAL_INVL); iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH, 0); } diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 7246971a7fe..f2b94dafbf3 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -281,8 +281,8 @@ struct ir_table { #endif struct iommu_flush { - int (*flush_context)(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm, - u64 type, int non_present_entry_flush); + void (*flush_context)(struct intel_iommu *iommu, u16 did, u16 sid, + u8 fm, u64 type); int (*flush_iotlb)(struct intel_iommu *iommu, u16 did, u64 addr, unsigned int size_order, u64 type, int non_present_entry_flush); }; @@ -339,8 +339,8 @@ extern void dmar_disable_qi(struct intel_iommu *iommu); extern int dmar_reenable_qi(struct intel_iommu *iommu); extern void qi_global_iec(struct intel_iommu *iommu); -extern int qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, - u8 fm, u64 type, int non_present_entry_flush); +extern void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, + u8 fm, u64 type); extern int qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr, unsigned int size_order, u64 type, int non_present_entry_flush); -- cgit v1.2.3-70-g09d2 From 1f0ef2aa18802a8ce7eb5a5164aaaf4d59073801 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sun, 10 May 2009 19:58:49 +0100 Subject: intel-iommu: Clean up handling of "caching mode" vs. IOTLB flushing. As we just did for context cache flushing, clean up the logic around whether we need to flush the iotlb or just the write-buffer, depending on caching mode. Fix the same bug in qi_flush_iotlb() that qi_flush_context() had -- it isn't supposed to be returning an error; it's supposed to be returning a flag which triggers a write-buffer flush. Remove some superfluous conditional write-buffer flushes which could never have happened because they weren't for non-present-to-present mapping changes anyway. Signed-off-by: David Woodhouse --- drivers/pci/dmar.c | 14 ++------ drivers/pci/intel-iommu.c | 78 +++++++++++++++++---------------------------- include/linux/intel-iommu.h | 9 +++--- 3 files changed, 37 insertions(+), 64 deletions(-) (limited to 'drivers/pci/intel-iommu.c') diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index 10a071ba323..df6af0d4ec0 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -735,22 +735,14 @@ void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm, qi_submit_sync(&desc, iommu); } -int qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr, - unsigned int size_order, u64 type, - int non_present_entry_flush) +void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr, + unsigned int size_order, u64 type) { u8 dw = 0, dr = 0; struct qi_desc desc; int ih = 0; - if (non_present_entry_flush) { - if (!cap_caching_mode(iommu->cap)) - return 1; - else - did = 0; - } - if (cap_write_drain(iommu->cap)) dw = 1; @@ -762,7 +754,7 @@ int qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr, desc.high = QI_IOTLB_ADDR(addr) | QI_IOTLB_IH(ih) | QI_IOTLB_AM(size_order); - return qi_submit_sync(&desc, iommu); + qi_submit_sync(&desc, iommu); } /* diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 9f5d9151edc..f47d04aced8 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -891,27 +891,13 @@ static void __iommu_flush_context(struct intel_iommu *iommu, } /* return value determine if we need a write buffer flush */ -static int __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did, - u64 addr, unsigned int size_order, u64 type, - int non_present_entry_flush) +static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did, + u64 addr, unsigned int size_order, u64 type) { int tlb_offset = ecap_iotlb_offset(iommu->ecap); u64 val = 0, val_iva = 0; unsigned long flag; - /* - * In the non-present entry flush case, if hardware doesn't cache - * non-present entry we do nothing and if hardware cache non-present - * entry, we flush entries of domain 0 (the domain id is used to cache - * any non-present entries) - */ - if (non_present_entry_flush) { - if (!cap_caching_mode(iommu->cap)) - return 1; - else - did = 0; - } - switch (type) { case DMA_TLB_GLOBAL_FLUSH: /* global flush doesn't need set IVA_REG */ @@ -959,12 +945,10 @@ static int __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did, pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n", (unsigned long long)DMA_TLB_IIRG(type), (unsigned long long)DMA_TLB_IAIG(val)); - /* flush iotlb entry will implicitly flush write buffer */ - return 0; } -static int iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did, - u64 addr, unsigned int pages, int non_present_entry_flush) +static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did, + u64 addr, unsigned int pages) { unsigned int mask; @@ -974,8 +958,7 @@ static int iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did, /* Fallback to domain selective flush if no PSI support */ if (!cap_pgsel_inv(iommu->cap)) return iommu->flush.flush_iotlb(iommu, did, 0, 0, - DMA_TLB_DSI_FLUSH, - non_present_entry_flush); + DMA_TLB_DSI_FLUSH); /* * PSI requires page size to be 2 ^ x, and the base address is naturally @@ -985,11 +968,10 @@ static int iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did, /* Fallback to domain selective flush if size is too big */ if (mask > cap_max_amask_val(iommu->cap)) return iommu->flush.flush_iotlb(iommu, did, 0, 0, - DMA_TLB_DSI_FLUSH, non_present_entry_flush); + DMA_TLB_DSI_FLUSH); return iommu->flush.flush_iotlb(iommu, did, addr, mask, - DMA_TLB_PSI_FLUSH, - non_present_entry_flush); + DMA_TLB_PSI_FLUSH); } static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu) @@ -1423,7 +1405,7 @@ static int domain_context_mapping_one(struct dmar_domain *domain, int segment, (((u16)bus) << 8) | devfn, DMA_CCMD_MASK_NOBIT, DMA_CCMD_DEVICE_INVL); - iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_DSI_FLUSH, 0); + iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_DSI_FLUSH); } else { iommu_flush_write_buffer(iommu); } @@ -1558,8 +1540,7 @@ static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn) clear_context_table(iommu, bus, devfn); iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL); - iommu->flush.flush_iotlb(iommu, 0, 0, 0, - DMA_TLB_GLOBAL_FLUSH, 0); + iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH); } static void domain_remove_dev_info(struct dmar_domain *domain) @@ -2096,8 +2077,7 @@ static int __init init_dmars(void) iommu_set_root_entry(iommu); iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL); - iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH, - 0); + iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH); iommu_disable_protect_mem_regions(iommu); ret = iommu_enable_translation(iommu); @@ -2244,10 +2224,11 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr, if (ret) goto error; - /* it's a non-present to present mapping */ - ret = iommu_flush_iotlb_psi(iommu, domain->id, - start_paddr, size >> VTD_PAGE_SHIFT, 1); - if (ret) + /* it's a non-present to present mapping. Only flush if caching mode */ + if (cap_caching_mode(iommu->cap)) + iommu_flush_iotlb_psi(iommu, 0, start_paddr, + size >> VTD_PAGE_SHIFT); + else iommu_flush_write_buffer(iommu); return start_paddr + ((u64)paddr & (~PAGE_MASK)); @@ -2283,7 +2264,7 @@ static void flush_unmaps(void) if (deferred_flush[i].next) { iommu->flush.flush_iotlb(iommu, 0, 0, 0, - DMA_TLB_GLOBAL_FLUSH, 0); + DMA_TLB_GLOBAL_FLUSH); for (j = 0; j < deferred_flush[i].next; j++) { __free_iova(&deferred_flush[i].domain[j]->iovad, deferred_flush[i].iova[j]); @@ -2362,9 +2343,8 @@ static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr, /* free page tables */ dma_pte_free_pagetable(domain, start_addr, start_addr + size); if (intel_iommu_strict) { - if (iommu_flush_iotlb_psi(iommu, - domain->id, start_addr, size >> VTD_PAGE_SHIFT, 0)) - iommu_flush_write_buffer(iommu); + iommu_flush_iotlb_psi(iommu, domain->id, start_addr, + size >> VTD_PAGE_SHIFT); /* free iova */ __free_iova(&domain->iovad, iova); } else { @@ -2455,9 +2435,8 @@ static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist, /* free page tables */ dma_pte_free_pagetable(domain, start_addr, start_addr + size); - if (iommu_flush_iotlb_psi(iommu, domain->id, start_addr, - size >> VTD_PAGE_SHIFT, 0)) - iommu_flush_write_buffer(iommu); + iommu_flush_iotlb_psi(iommu, domain->id, start_addr, + size >> VTD_PAGE_SHIFT); /* free iova */ __free_iova(&domain->iovad, iova); @@ -2549,10 +2528,13 @@ static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int ne offset += size; } - /* it's a non-present to present mapping */ - if (iommu_flush_iotlb_psi(iommu, domain->id, - start_addr, offset >> VTD_PAGE_SHIFT, 1)) + /* it's a non-present to present mapping. Only flush if caching mode */ + if (cap_caching_mode(iommu->cap)) + iommu_flush_iotlb_psi(iommu, 0, start_addr, + offset >> VTD_PAGE_SHIFT); + else iommu_flush_write_buffer(iommu); + return nelems; } @@ -2711,9 +2693,9 @@ static int init_iommu_hw(void) iommu_set_root_entry(iommu); iommu->flush.flush_context(iommu, 0, 0, 0, - DMA_CCMD_GLOBAL_INVL); + DMA_CCMD_GLOBAL_INVL); iommu->flush.flush_iotlb(iommu, 0, 0, 0, - DMA_TLB_GLOBAL_FLUSH, 0); + DMA_TLB_GLOBAL_FLUSH); iommu_disable_protect_mem_regions(iommu); iommu_enable_translation(iommu); } @@ -2728,9 +2710,9 @@ static void iommu_flush_all(void) for_each_active_iommu(iommu, drhd) { iommu->flush.flush_context(iommu, 0, 0, 0, - DMA_CCMD_GLOBAL_INVL); + DMA_CCMD_GLOBAL_INVL); iommu->flush.flush_iotlb(iommu, 0, 0, 0, - DMA_TLB_GLOBAL_FLUSH, 0); + DMA_TLB_GLOBAL_FLUSH); } } diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index f2b94dafbf3..29e05a034c0 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -283,8 +283,8 @@ struct ir_table { struct iommu_flush { void (*flush_context)(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm, u64 type); - int (*flush_iotlb)(struct intel_iommu *iommu, u16 did, u64 addr, - unsigned int size_order, u64 type, int non_present_entry_flush); + void (*flush_iotlb)(struct intel_iommu *iommu, u16 did, u64 addr, + unsigned int size_order, u64 type); }; enum { @@ -341,9 +341,8 @@ extern void qi_global_iec(struct intel_iommu *iommu); extern void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm, u64 type); -extern int qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr, - unsigned int size_order, u64 type, - int non_present_entry_flush); +extern void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr, + unsigned int size_order, u64 type); extern int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu); -- cgit v1.2.3-70-g09d2 From 462b60f6ccc685f7e8aa04ff430e6b4ffedf629f Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sun, 10 May 2009 20:18:18 +0100 Subject: intel-iommu: Fix tiny theoretical race in write-buffer flush. In iommu_flush_write_buffer() we read iommu->gcmd before taking the register_lock, and then we mask in the WBF bit and write it to the register. There is a tiny chance that something else could have _changed_ iommu->gcmd before we take the lock, but after we read it. So we could be undoing that change. Never actually going to have happened in practice, since nothing else changes that register at runtime -- aside from the write-buffer flush it's only ever touched at startup for enabling translation, etc. But worth fixing anyway. Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'drivers/pci/intel-iommu.c') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index f47d04aced8..2e2c7406131 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -844,10 +844,8 @@ static void iommu_flush_write_buffer(struct intel_iommu *iommu) if (!rwbf_quirk && !cap_rwbf(iommu->cap)) return; - val = iommu->gcmd | DMA_GCMD_WBF; - spin_lock_irqsave(&iommu->register_lock, flag); - writel(val, iommu->reg + DMAR_GCMD_REG); + writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG); /* Make sure hardware complete it */ IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, -- cgit v1.2.3-70-g09d2 From c416daa98a584596df21ee2c26fac6579ee58f57 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sun, 10 May 2009 20:30:58 +0100 Subject: intel-iommu: Tidy up iommu->gcmd handling Signed-off-by: David Woodhouse --- drivers/pci/dmar.c | 6 ++---- drivers/pci/intel-iommu.c | 18 +++++++++--------- drivers/pci/intr_remapping.c | 11 ++++------- 3 files changed, 15 insertions(+), 20 deletions(-) (limited to 'drivers/pci/intel-iommu.c') diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index df6af0d4ec0..faf77a00caf 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -784,7 +784,6 @@ void dmar_disable_qi(struct intel_iommu *iommu) cpu_relax(); iommu->gcmd &= ~DMA_GCMD_QIE; - writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG); IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, @@ -798,7 +797,7 @@ end: */ static void __dmar_enable_qi(struct intel_iommu *iommu) { - u32 cmd, sts; + u32 sts; unsigned long flags; struct q_inval *qi = iommu->qi; @@ -812,9 +811,8 @@ static void __dmar_enable_qi(struct intel_iommu *iommu) dmar_writeq(iommu->reg + DMAR_IQA_REG, virt_to_phys(qi->desc)); - cmd = iommu->gcmd | DMA_GCMD_QIE; iommu->gcmd |= DMA_GCMD_QIE; - writel(cmd, iommu->reg + DMAR_GCMD_REG); + writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG); /* Make sure hardware complete it */ IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, (sts & DMA_GSTS_QIES), sts); diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 2e2c7406131..bc99b1e47fb 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -819,7 +819,7 @@ static int iommu_alloc_root_entry(struct intel_iommu *iommu) static void iommu_set_root_entry(struct intel_iommu *iommu) { void *addr; - u32 cmd, sts; + u32 sts; unsigned long flag; addr = iommu->root_entry; @@ -827,12 +827,11 @@ static void iommu_set_root_entry(struct intel_iommu *iommu) spin_lock_irqsave(&iommu->register_lock, flag); dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr)); - cmd = iommu->gcmd | DMA_GCMD_SRTP; - writel(cmd, iommu->reg + DMAR_GCMD_REG); + writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG); /* Make sure hardware complete it */ IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, - readl, (sts & DMA_GSTS_RTPS), sts); + readl, (sts & DMA_GSTS_RTPS), sts); spin_unlock_irqrestore(&iommu->register_lock, flag); } @@ -844,12 +843,13 @@ static void iommu_flush_write_buffer(struct intel_iommu *iommu) if (!rwbf_quirk && !cap_rwbf(iommu->cap)) return; + spin_lock_irqsave(&iommu->register_lock, flag); writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG); /* Make sure hardware complete it */ IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, - readl, (!(val & DMA_GSTS_WBFS)), val); + readl, (!(val & DMA_GSTS_WBFS)), val); spin_unlock_irqrestore(&iommu->register_lock, flag); } @@ -995,13 +995,13 @@ static int iommu_enable_translation(struct intel_iommu *iommu) unsigned long flags; spin_lock_irqsave(&iommu->register_lock, flags); - writel(iommu->gcmd|DMA_GCMD_TE, iommu->reg + DMAR_GCMD_REG); + iommu->gcmd |= DMA_GCMD_TE; + writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG); /* Make sure hardware complete it */ IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, - readl, (sts & DMA_GSTS_TES), sts); + readl, (sts & DMA_GSTS_TES), sts); - iommu->gcmd |= DMA_GCMD_TE; spin_unlock_irqrestore(&iommu->register_lock, flags); return 0; } @@ -1017,7 +1017,7 @@ static int iommu_disable_translation(struct intel_iommu *iommu) /* Make sure hardware complete it */ IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, - readl, (!(sts & DMA_GSTS_TES)), sts); + readl, (!(sts & DMA_GSTS_TES)), sts); spin_unlock_irqrestore(&iommu->register_lock, flag); return 0; diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c index f5e0ea724a6..16695961408 100644 --- a/drivers/pci/intr_remapping.c +++ b/drivers/pci/intr_remapping.c @@ -404,7 +404,7 @@ int free_irte(int irq) static void iommu_set_intr_remapping(struct intel_iommu *iommu, int mode) { u64 addr; - u32 cmd, sts; + u32 sts; unsigned long flags; addr = virt_to_phys((void *)iommu->ir_table->base); @@ -415,9 +415,8 @@ static void iommu_set_intr_remapping(struct intel_iommu *iommu, int mode) (addr) | IR_X2APIC_MODE(mode) | INTR_REMAP_TABLE_REG_SIZE); /* Set interrupt-remapping table pointer */ - cmd = iommu->gcmd | DMA_GCMD_SIRTP; iommu->gcmd |= DMA_GCMD_SIRTP; - writel(cmd, iommu->reg + DMAR_GCMD_REG); + writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG); IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, (sts & DMA_GSTS_IRTPS), sts); @@ -427,9 +426,8 @@ static void iommu_set_intr_remapping(struct intel_iommu *iommu, int mode) spin_lock_irqsave(&iommu->register_lock, flags); /* enable comaptiblity format interrupt pass through */ - cmd = iommu->gcmd | DMA_GCMD_CFI; iommu->gcmd |= DMA_GCMD_CFI; - writel(cmd, iommu->reg + DMAR_GCMD_REG); + writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG); IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, (sts & DMA_GSTS_CFIS), sts); @@ -446,9 +444,8 @@ static void iommu_set_intr_remapping(struct intel_iommu *iommu, int mode) spin_lock_irqsave(&iommu->register_lock, flags); /* Enable interrupt-remapping */ - cmd = iommu->gcmd | DMA_GCMD_IRE; iommu->gcmd |= DMA_GCMD_IRE; - writel(cmd, iommu->reg + DMAR_GCMD_REG); + writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG); IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, (sts & DMA_GSTS_IRES), sts); -- cgit v1.2.3-70-g09d2 From 9dd2fe89062c90a964d122b8be5615d6f2203bbe Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Mon, 18 May 2009 13:51:36 +0800 Subject: VT-d: cleanup iommu_flush_iotlb_psi and flush_unmaps Make iommu_flush_iotlb_psi() and flush_unmaps() more readable. Signed-off-by: Yu Zhao Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 38 +++++++++++++++++--------------------- 1 file changed, 17 insertions(+), 21 deletions(-) (limited to 'drivers/pci/intel-iommu.c') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index bc99b1e47fb..6d7cb84c63e 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -948,28 +948,23 @@ static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did, static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did, u64 addr, unsigned int pages) { - unsigned int mask; + unsigned int mask = ilog2(__roundup_pow_of_two(pages)); BUG_ON(addr & (~VTD_PAGE_MASK)); BUG_ON(pages == 0); - /* Fallback to domain selective flush if no PSI support */ - if (!cap_pgsel_inv(iommu->cap)) - return iommu->flush.flush_iotlb(iommu, did, 0, 0, - DMA_TLB_DSI_FLUSH); - /* + * Fallback to domain selective flush if no PSI support or the size is + * too big. * PSI requires page size to be 2 ^ x, and the base address is naturally * aligned to the size */ - mask = ilog2(__roundup_pow_of_two(pages)); - /* Fallback to domain selective flush if size is too big */ - if (mask > cap_max_amask_val(iommu->cap)) - return iommu->flush.flush_iotlb(iommu, did, 0, 0, + if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap)) + iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH); - - return iommu->flush.flush_iotlb(iommu, did, addr, mask, - DMA_TLB_PSI_FLUSH); + else + iommu->flush.flush_iotlb(iommu, did, addr, mask, + DMA_TLB_PSI_FLUSH); } static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu) @@ -2260,15 +2255,16 @@ static void flush_unmaps(void) if (!iommu) continue; - if (deferred_flush[i].next) { - iommu->flush.flush_iotlb(iommu, 0, 0, 0, - DMA_TLB_GLOBAL_FLUSH); - for (j = 0; j < deferred_flush[i].next; j++) { - __free_iova(&deferred_flush[i].domain[j]->iovad, - deferred_flush[i].iova[j]); - } - deferred_flush[i].next = 0; + if (!deferred_flush[i].next) + continue; + + iommu->flush.flush_iotlb(iommu, 0, 0, 0, + DMA_TLB_GLOBAL_FLUSH, 0); + for (j = 0; j < deferred_flush[i].next; j++) { + __free_iova(&deferred_flush[i].domain[j]->iovad, + deferred_flush[i].iova[j]); } + deferred_flush[i].next = 0; } list_size = 0; -- cgit v1.2.3-70-g09d2 From 93a23a7271dfb811b3adb72779054c3a24433112 Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Mon, 18 May 2009 13:51:37 +0800 Subject: VT-d: support the device IOTLB Enable the device IOTLB (i.e. ATS) for both the bare metal and KVM environments. Signed-off-by: Yu Zhao Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 109 ++++++++++++++++++++++++++++++++++++++---- include/linux/dma_remapping.h | 1 + include/linux/intel-iommu.h | 1 + 3 files changed, 102 insertions(+), 9 deletions(-) (limited to 'drivers/pci/intel-iommu.c') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 6d7cb84c63e..c3cdfc90c13 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -252,6 +252,7 @@ struct device_domain_info { u8 bus; /* PCI bus number */ u8 devfn; /* PCI devfn number */ struct pci_dev *dev; /* it's NULL for PCIE-to-PCI bridge */ + struct intel_iommu *iommu; /* IOMMU used by this device */ struct dmar_domain *domain; /* pointer to domain */ }; @@ -945,6 +946,77 @@ static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did, (unsigned long long)DMA_TLB_IAIG(val)); } +static struct device_domain_info *iommu_support_dev_iotlb( + struct dmar_domain *domain, int segment, u8 bus, u8 devfn) +{ + int found = 0; + unsigned long flags; + struct device_domain_info *info; + struct intel_iommu *iommu = device_to_iommu(segment, bus, devfn); + + if (!ecap_dev_iotlb_support(iommu->ecap)) + return NULL; + + if (!iommu->qi) + return NULL; + + spin_lock_irqsave(&device_domain_lock, flags); + list_for_each_entry(info, &domain->devices, link) + if (info->bus == bus && info->devfn == devfn) { + found = 1; + break; + } + spin_unlock_irqrestore(&device_domain_lock, flags); + + if (!found || !info->dev) + return NULL; + + if (!pci_find_ext_capability(info->dev, PCI_EXT_CAP_ID_ATS)) + return NULL; + + if (!dmar_find_matched_atsr_unit(info->dev)) + return NULL; + + info->iommu = iommu; + + return info; +} + +static void iommu_enable_dev_iotlb(struct device_domain_info *info) +{ + if (!info) + return; + + pci_enable_ats(info->dev, VTD_PAGE_SHIFT); +} + +static void iommu_disable_dev_iotlb(struct device_domain_info *info) +{ + if (!info->dev || !pci_ats_enabled(info->dev)) + return; + + pci_disable_ats(info->dev); +} + +static void iommu_flush_dev_iotlb(struct dmar_domain *domain, + u64 addr, unsigned mask) +{ + u16 sid, qdep; + unsigned long flags; + struct device_domain_info *info; + + spin_lock_irqsave(&device_domain_lock, flags); + list_for_each_entry(info, &domain->devices, link) { + if (!info->dev || !pci_ats_enabled(info->dev)) + continue; + + sid = info->bus << 8 | info->devfn; + qdep = pci_ats_queue_depth(info->dev); + qi_flush_dev_iotlb(info->iommu, sid, qdep, addr, mask); + } + spin_unlock_irqrestore(&device_domain_lock, flags); +} + static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did, u64 addr, unsigned int pages) { @@ -965,6 +1037,8 @@ static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did, else iommu->flush.flush_iotlb(iommu, did, addr, mask, DMA_TLB_PSI_FLUSH); + if (did) + iommu_flush_dev_iotlb(iommu->domains[did], addr, mask); } static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu) @@ -1305,6 +1379,7 @@ static int domain_context_mapping_one(struct dmar_domain *domain, int segment, unsigned long ndomains; int id; int agaw; + struct device_domain_info *info = NULL; pr_debug("Set context mapping for %02x:%02x.%d\n", bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); @@ -1372,15 +1447,21 @@ static int domain_context_mapping_one(struct dmar_domain *domain, int segment, context_set_domain_id(context, id); + if (translation != CONTEXT_TT_PASS_THROUGH) { + info = iommu_support_dev_iotlb(domain, segment, bus, devfn); + translation = info ? CONTEXT_TT_DEV_IOTLB : + CONTEXT_TT_MULTI_LEVEL; + } /* * In pass through mode, AW must be programmed to indicate the largest * AGAW value supported by hardware. And ASR is ignored by hardware. */ - if (likely(translation == CONTEXT_TT_MULTI_LEVEL)) { - context_set_address_width(context, iommu->agaw); - context_set_address_root(context, virt_to_phys(pgd)); - } else + if (unlikely(translation == CONTEXT_TT_PASS_THROUGH)) context_set_address_width(context, iommu->msagaw); + else { + context_set_address_root(context, virt_to_phys(pgd)); + context_set_address_width(context, iommu->agaw); + } context_set_translation_type(context, translation); context_set_fault_enable(context); @@ -1402,6 +1483,7 @@ static int domain_context_mapping_one(struct dmar_domain *domain, int segment, } else { iommu_flush_write_buffer(iommu); } + iommu_enable_dev_iotlb(info); spin_unlock_irqrestore(&iommu->lock, flags); spin_lock_irqsave(&domain->iommu_lock, flags); @@ -1552,6 +1634,7 @@ static void domain_remove_dev_info(struct dmar_domain *domain) info->dev->dev.archdata.iommu = NULL; spin_unlock_irqrestore(&device_domain_lock, flags); + iommu_disable_dev_iotlb(info); iommu = device_to_iommu(info->segment, info->bus, info->devfn); iommu_detach_dev(iommu, info->bus, info->devfn); free_devinfo_mem(info); @@ -2259,10 +2342,16 @@ static void flush_unmaps(void) continue; iommu->flush.flush_iotlb(iommu, 0, 0, 0, - DMA_TLB_GLOBAL_FLUSH, 0); + DMA_TLB_GLOBAL_FLUSH); for (j = 0; j < deferred_flush[i].next; j++) { - __free_iova(&deferred_flush[i].domain[j]->iovad, - deferred_flush[i].iova[j]); + unsigned long mask; + struct iova *iova = deferred_flush[i].iova[j]; + + mask = (iova->pfn_hi - iova->pfn_lo + 1) << PAGE_SHIFT; + mask = ilog2(mask >> VTD_PAGE_SHIFT); + iommu_flush_dev_iotlb(deferred_flush[i].domain[j], + iova->pfn_lo << PAGE_SHIFT, mask); + __free_iova(&deferred_flush[i].domain[j]->iovad, iova); } deferred_flush[i].next = 0; } @@ -2943,6 +3032,7 @@ static void vm_domain_remove_one_dev_info(struct dmar_domain *domain, info->dev->dev.archdata.iommu = NULL; spin_unlock_irqrestore(&device_domain_lock, flags); + iommu_disable_dev_iotlb(info); iommu_detach_dev(iommu, info->bus, info->devfn); iommu_detach_dependent_devices(iommu, pdev); free_devinfo_mem(info); @@ -2993,6 +3083,7 @@ static void vm_domain_remove_all_dev_info(struct dmar_domain *domain) spin_unlock_irqrestore(&device_domain_lock, flags1); + iommu_disable_dev_iotlb(info); iommu = device_to_iommu(info->segment, info->bus, info->devfn); iommu_detach_dev(iommu, info->bus, info->devfn); iommu_detach_dependent_devices(iommu, info->dev); @@ -3197,11 +3288,11 @@ static int intel_iommu_attach_device(struct iommu_domain *domain, return -EFAULT; } - ret = domain_context_mapping(dmar_domain, pdev, CONTEXT_TT_MULTI_LEVEL); + ret = vm_domain_add_dev_info(dmar_domain, pdev); if (ret) return ret; - ret = vm_domain_add_dev_info(dmar_domain, pdev); + ret = domain_context_mapping(dmar_domain, pdev, CONTEXT_TT_MULTI_LEVEL); return ret; } diff --git a/include/linux/dma_remapping.h b/include/linux/dma_remapping.h index e0a03aff63d..5619f852273 100644 --- a/include/linux/dma_remapping.h +++ b/include/linux/dma_remapping.h @@ -14,6 +14,7 @@ #define DMA_PTE_SNP (1 << 11) #define CONTEXT_TT_MULTI_LEVEL 0 +#define CONTEXT_TT_DEV_IOTLB 1 #define CONTEXT_TT_PASS_THROUGH 2 struct intel_iommu; diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 40561b224a1..482dc91fd53 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -124,6 +124,7 @@ static inline void dmar_writeq(void __iomem *addr, u64 val) #define ecap_pass_through(e) ((e >> 6) & 0x1) #define ecap_eim_support(e) ((e >> 4) & 0x1) #define ecap_ir_support(e) ((e >> 3) & 0x1) +#define ecap_dev_iotlb_support(e) (((e) >> 2) & 0x1) #define ecap_max_handle_mask(e) ((e >> 20) & 0xf) #define ecap_sc_support(e) ((e >> 7) & 0x1) /* Snooping Control */ -- cgit v1.2.3-70-g09d2 From 2c2e2c389d03bb16b8cdf9db3ac615385fac100f Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Fri, 19 Jun 2009 13:47:29 -0700 Subject: IOMMU Identity Mapping Support (drivers/pci/intel_iommu.c) Identity mapping for IOMMU defines a single domain to 1:1 map all PCI devices to all usable memory. This reduces map/unmap overhead in DMA API's and improve IOMMU performance. On 10Gb network cards, Netperf shows no performance degradation compared to non-IOMMU performance. This method may lose some of DMA remapping benefits like isolation. The patch sets up identity mapping for all PCI devices to all usable memory. In the DMA API, there is no overhead to maintain page tables, invalidate iotlb, flush cache etc. 32 bit DMA devices don't use identity mapping domain, in order to access memory beyond 4GiB. When kernel option iommu=pt, pass through is first tried. If pass through succeeds, IOMMU goes to pass through. If pass through is not supported in hw or fail for whatever reason, IOMMU goes to identity mapping. Signed-off-by: Fenghua Yu Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 314 +++++++++++++++++++++++++++++++++++++--------- 1 file changed, 254 insertions(+), 60 deletions(-) (limited to 'drivers/pci/intel-iommu.c') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 178853a0744..e53eacd75c8 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -39,6 +39,7 @@ #include #include #include +#include #include "pci.h" #define ROOT_SIZE VTD_PAGE_SIZE @@ -217,6 +218,14 @@ static inline bool dma_pte_present(struct dma_pte *pte) return (pte->val & 3) != 0; } +/* + * This domain is a statically identity mapping domain. + * 1. This domain creats a static 1:1 mapping to all usable memory. + * 2. It maps to each iommu if successful. + * 3. Each iommu mapps to this domain if successful. + */ +struct dmar_domain *si_domain; + /* devices under the same p2p bridge are owned in one domain */ #define DOMAIN_FLAG_P2P_MULTIPLE_DEVICES (1 << 0) @@ -225,6 +234,9 @@ static inline bool dma_pte_present(struct dma_pte *pte) */ #define DOMAIN_FLAG_VIRTUAL_MACHINE (1 << 1) +/* si_domain contains mulitple devices */ +#define DOMAIN_FLAG_STATIC_IDENTITY (1 << 2) + struct dmar_domain { int id; /* domain id */ unsigned long iommu_bmp; /* bitmap of iommus this domain uses*/ @@ -435,12 +447,14 @@ int iommu_calculate_agaw(struct intel_iommu *iommu) return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH); } -/* in native case, each domain is related to only one iommu */ +/* This functionin only returns single iommu in a domain */ static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain) { int iommu_id; + /* si_domain and vm domain should not get here. */ BUG_ON(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE); + BUG_ON(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY); iommu_id = find_first_bit(&domain->iommu_bmp, g_num_of_iommus); if (iommu_id < 0 || iommu_id >= g_num_of_iommus) @@ -1189,48 +1203,71 @@ void free_dmar_iommu(struct intel_iommu *iommu) free_context_table(iommu); } -static struct dmar_domain * iommu_alloc_domain(struct intel_iommu *iommu) +static struct dmar_domain *alloc_domain(void) { - unsigned long num; - unsigned long ndomains; struct dmar_domain *domain; - unsigned long flags; domain = alloc_domain_mem(); if (!domain) return NULL; + memset(&domain->iommu_bmp, 0, sizeof(unsigned long)); + domain->flags = 0; + + return domain; +} + +static int iommu_attach_domain(struct dmar_domain *domain, + struct intel_iommu *iommu) +{ + int num; + unsigned long ndomains; + unsigned long flags; + ndomains = cap_ndoms(iommu->cap); spin_lock_irqsave(&iommu->lock, flags); + num = find_first_zero_bit(iommu->domain_ids, ndomains); if (num >= ndomains) { spin_unlock_irqrestore(&iommu->lock, flags); - free_domain_mem(domain); printk(KERN_ERR "IOMMU: no free domain ids\n"); - return NULL; + return -ENOMEM; } - set_bit(num, iommu->domain_ids); domain->id = num; - memset(&domain->iommu_bmp, 0, sizeof(unsigned long)); + set_bit(num, iommu->domain_ids); set_bit(iommu->seq_id, &domain->iommu_bmp); - domain->flags = 0; iommu->domains[num] = domain; spin_unlock_irqrestore(&iommu->lock, flags); - return domain; + return 0; } -static void iommu_free_domain(struct dmar_domain *domain) +static void iommu_detach_domain(struct dmar_domain *domain, + struct intel_iommu *iommu) { unsigned long flags; - struct intel_iommu *iommu; - - iommu = domain_get_iommu(domain); + int num, ndomains; + int found = 0; spin_lock_irqsave(&iommu->lock, flags); - clear_bit(domain->id, iommu->domain_ids); + ndomains = cap_ndoms(iommu->cap); + num = find_first_bit(iommu->domain_ids, ndomains); + for (; num < ndomains; ) { + if (iommu->domains[num] == domain) { + found = 1; + break; + } + num = find_next_bit(iommu->domain_ids, + cap_ndoms(iommu->cap), num+1); + } + + if (found) { + clear_bit(num, iommu->domain_ids); + clear_bit(iommu->seq_id, &domain->iommu_bmp); + iommu->domains[num] = NULL; + } spin_unlock_irqrestore(&iommu->lock, flags); } @@ -1350,6 +1387,8 @@ static int domain_init(struct dmar_domain *domain, int guest_width) static void domain_exit(struct dmar_domain *domain) { + struct dmar_drhd_unit *drhd; + struct intel_iommu *iommu; u64 end; /* Domain 0 is reserved, so dont process it */ @@ -1368,7 +1407,10 @@ static void domain_exit(struct dmar_domain *domain) /* free page tables */ dma_pte_free_pagetable(domain, 0, end); - iommu_free_domain(domain); + for_each_active_iommu(iommu, drhd) + if (test_bit(iommu->seq_id, &domain->iommu_bmp)) + iommu_detach_domain(domain, iommu); + free_domain_mem(domain); } @@ -1408,7 +1450,8 @@ static int domain_context_mapping_one(struct dmar_domain *domain, int segment, id = domain->id; pgd = domain->pgd; - if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) { + if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE || + domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) { int found = 0; /* find an available domain id for this device in iommu */ @@ -1433,6 +1476,7 @@ static int domain_context_mapping_one(struct dmar_domain *domain, int segment, } set_bit(num, iommu->domain_ids); + set_bit(iommu->seq_id, &domain->iommu_bmp); iommu->domains[num] = domain; id = num; } @@ -1675,6 +1719,7 @@ static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw) unsigned long flags; int bus = 0, devfn = 0; int segment; + int ret; domain = find_domain(pdev); if (domain) @@ -1707,6 +1752,10 @@ static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw) } } + domain = alloc_domain(); + if (!domain) + goto error; + /* Allocate new domain for the device */ drhd = dmar_find_matched_drhd_unit(pdev); if (!drhd) { @@ -1716,9 +1765,11 @@ static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw) } iommu = drhd->iommu; - domain = iommu_alloc_domain(iommu); - if (!domain) + ret = iommu_attach_domain(domain, iommu); + if (ret) { + domain_exit(domain); goto error; + } if (domain_init(domain, gaw)) { domain_exit(domain); @@ -1792,6 +1843,8 @@ error: return find_domain(pdev); } +static int iommu_identity_mapping; + static int iommu_prepare_identity_map(struct pci_dev *pdev, unsigned long long start, unsigned long long end) @@ -1804,8 +1857,11 @@ static int iommu_prepare_identity_map(struct pci_dev *pdev, printk(KERN_INFO "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n", pci_name(pdev), start, end); - /* page table init */ - domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH); + if (iommu_identity_mapping) + domain = si_domain; + else + /* page table init */ + domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH); if (!domain) return -ENOMEM; @@ -1952,7 +2008,110 @@ static int __init init_context_pass_through(void) return 0; } -static int __init init_dmars(void) +static int md_domain_init(struct dmar_domain *domain, int guest_width); +static int si_domain_init(void) +{ + struct dmar_drhd_unit *drhd; + struct intel_iommu *iommu; + int ret = 0; + + si_domain = alloc_domain(); + if (!si_domain) + return -EFAULT; + + + for_each_active_iommu(iommu, drhd) { + ret = iommu_attach_domain(si_domain, iommu); + if (ret) { + domain_exit(si_domain); + return -EFAULT; + } + } + + if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) { + domain_exit(si_domain); + return -EFAULT; + } + + si_domain->flags = DOMAIN_FLAG_STATIC_IDENTITY; + + return 0; +} + +static void domain_remove_one_dev_info(struct dmar_domain *domain, + struct pci_dev *pdev); +static int identity_mapping(struct pci_dev *pdev) +{ + struct device_domain_info *info; + + if (likely(!iommu_identity_mapping)) + return 0; + + + list_for_each_entry(info, &si_domain->devices, link) + if (info->dev == pdev) + return 1; + return 0; +} + +static int domain_add_dev_info(struct dmar_domain *domain, + struct pci_dev *pdev) +{ + struct device_domain_info *info; + unsigned long flags; + + info = alloc_devinfo_mem(); + if (!info) + return -ENOMEM; + + info->segment = pci_domain_nr(pdev->bus); + info->bus = pdev->bus->number; + info->devfn = pdev->devfn; + info->dev = pdev; + info->domain = domain; + + spin_lock_irqsave(&device_domain_lock, flags); + list_add(&info->link, &domain->devices); + list_add(&info->global, &device_domain_list); + pdev->dev.archdata.iommu = info; + spin_unlock_irqrestore(&device_domain_lock, flags); + + return 0; +} + +static int iommu_prepare_static_identity_mapping(void) +{ + int i; + struct pci_dev *pdev = NULL; + int ret; + + ret = si_domain_init(); + if (ret) + return -EFAULT; + + printk(KERN_INFO "IOMMU: Setting identity map:\n"); + for_each_pci_dev(pdev) { + for (i = 0; i < e820.nr_map; i++) { + struct e820entry *ei = &e820.map[i]; + + if (ei->type == E820_RAM) { + ret = iommu_prepare_identity_map(pdev, + ei->addr, ei->addr + ei->size); + if (ret) { + printk(KERN_INFO "1:1 mapping to one domain failed.\n"); + return -EFAULT; + } + } + } + ret = domain_add_dev_info(si_domain, pdev); + if (ret) + return ret; + } + + return 0; +} + +int __init init_dmars(void) { struct dmar_drhd_unit *drhd; struct dmar_rmrr_unit *rmrr; @@ -1961,6 +2120,13 @@ static int __init init_dmars(void) int i, ret; int pass_through = 1; + /* + * In case pass through can not be enabled, iommu tries to use identity + * mapping. + */ + if (iommu_pass_through) + iommu_identity_mapping = 1; + /* * for each drhd * allocate root @@ -2090,9 +2256,12 @@ static int __init init_dmars(void) /* * If pass through is not set or not enabled, setup context entries for - * identity mappings for rmrr, gfx, and isa. + * identity mappings for rmrr, gfx, and isa and may fall back to static + * identity mapping if iommu_identity_mapping is set. */ if (!iommu_pass_through) { + if (iommu_identity_mapping) + iommu_prepare_static_identity_mapping(); /* * For each rmrr * for each dev attached to rmrr @@ -2107,6 +2276,7 @@ static int __init init_dmars(void) * endfor * endfor */ + printk(KERN_INFO "IOMMU: Setting RMRR:\n"); for_each_rmrr_units(rmrr) { for (i = 0; i < rmrr->devices_cnt; i++) { pdev = rmrr->devices[i]; @@ -2248,6 +2418,52 @@ get_valid_domain_for_dev(struct pci_dev *pdev) return domain; } +static int iommu_dummy(struct pci_dev *pdev) +{ + return pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO; +} + +/* Check if the pdev needs to go through non-identity map and unmap process.*/ +static int iommu_no_mapping(struct pci_dev *pdev) +{ + int found; + + if (!iommu_identity_mapping) + return iommu_dummy(pdev); + + found = identity_mapping(pdev); + if (found) { + if (pdev->dma_mask > DMA_BIT_MASK(32)) + return 1; + else { + /* + * 32 bit DMA is removed from si_domain and fall back + * to non-identity mapping. + */ + domain_remove_one_dev_info(si_domain, pdev); + printk(KERN_INFO "32bit %s uses non-identity mapping\n", + pci_name(pdev)); + return 0; + } + } else { + /* + * In case of a detached 64 bit DMA device from vm, the device + * is put into si_domain for identity mapping. + */ + if (pdev->dma_mask > DMA_BIT_MASK(32)) { + int ret; + ret = domain_add_dev_info(si_domain, pdev); + if (!ret) { + printk(KERN_INFO "64bit %s uses identity mapping\n", + pci_name(pdev)); + return 1; + } + } + } + + return iommu_dummy(pdev); +} + static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr, size_t size, int dir, u64 dma_mask) { @@ -2260,7 +2476,8 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr, struct intel_iommu *iommu; BUG_ON(dir == DMA_NONE); - if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO) + + if (iommu_no_mapping(pdev)) return paddr; domain = get_valid_domain_for_dev(pdev); @@ -2401,8 +2618,9 @@ static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr, struct iova *iova; struct intel_iommu *iommu; - if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO) + if (iommu_no_mapping(pdev)) return; + domain = find_domain(pdev); BUG_ON(!domain); @@ -2492,7 +2710,7 @@ static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist, struct scatterlist *sg; struct intel_iommu *iommu; - if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO) + if (iommu_no_mapping(pdev)) return; domain = find_domain(pdev); @@ -2553,7 +2771,7 @@ static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int ne struct intel_iommu *iommu; BUG_ON(dir == DMA_NONE); - if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO) + if (iommu_no_mapping(pdev)) return intel_nontranslate_map_sg(hwdev, sglist, nelems, dir); domain = get_valid_domain_for_dev(pdev); @@ -2951,31 +3169,6 @@ int __init intel_iommu_init(void) return 0; } -static int vm_domain_add_dev_info(struct dmar_domain *domain, - struct pci_dev *pdev) -{ - struct device_domain_info *info; - unsigned long flags; - - info = alloc_devinfo_mem(); - if (!info) - return -ENOMEM; - - info->segment = pci_domain_nr(pdev->bus); - info->bus = pdev->bus->number; - info->devfn = pdev->devfn; - info->dev = pdev; - info->domain = domain; - - spin_lock_irqsave(&device_domain_lock, flags); - list_add(&info->link, &domain->devices); - list_add(&info->global, &device_domain_list); - pdev->dev.archdata.iommu = info; - spin_unlock_irqrestore(&device_domain_lock, flags); - - return 0; -} - static void iommu_detach_dependent_devices(struct intel_iommu *iommu, struct pci_dev *pdev) { @@ -3003,7 +3196,7 @@ static void iommu_detach_dependent_devices(struct intel_iommu *iommu, } } -static void vm_domain_remove_one_dev_info(struct dmar_domain *domain, +static void domain_remove_one_dev_info(struct dmar_domain *domain, struct pci_dev *pdev) { struct device_domain_info *info; @@ -3136,7 +3329,7 @@ static struct dmar_domain *iommu_alloc_vm_domain(void) return domain; } -static int vm_domain_init(struct dmar_domain *domain, int guest_width) +static int md_domain_init(struct dmar_domain *domain, int guest_width) { int adjust_width; @@ -3227,7 +3420,7 @@ static int intel_iommu_domain_init(struct iommu_domain *domain) "intel_iommu_domain_init: dmar_domain == NULL\n"); return -ENOMEM; } - if (vm_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) { + if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) { printk(KERN_ERR "intel_iommu_domain_init() failed\n"); vm_domain_exit(dmar_domain); @@ -3262,8 +3455,9 @@ static int intel_iommu_attach_device(struct iommu_domain *domain, old_domain = find_domain(pdev); if (old_domain) { - if (dmar_domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) - vm_domain_remove_one_dev_info(old_domain, pdev); + if (dmar_domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE || + dmar_domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) + domain_remove_one_dev_info(old_domain, pdev); else domain_remove_dev_info(old_domain); } @@ -3285,7 +3479,7 @@ static int intel_iommu_attach_device(struct iommu_domain *domain, return -EFAULT; } - ret = vm_domain_add_dev_info(dmar_domain, pdev); + ret = domain_add_dev_info(dmar_domain, pdev); if (ret) return ret; @@ -3299,7 +3493,7 @@ static void intel_iommu_detach_device(struct iommu_domain *domain, struct dmar_domain *dmar_domain = domain->priv; struct pci_dev *pdev = to_pci_dev(dev); - vm_domain_remove_one_dev_info(dmar_domain, pdev); + domain_remove_one_dev_info(dmar_domain, pdev); } static int intel_iommu_map_range(struct iommu_domain *domain, -- cgit v1.2.3-70-g09d2