summaryrefslogtreecommitdiffstats
path: root/drivers/pci/intel-iommu.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/pci/intel-iommu.c')
-rw-r--r--drivers/pci/intel-iommu.c186
1 files changed, 150 insertions, 36 deletions
diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 4cb949f0ebd..1fd8bb76570 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -22,6 +22,7 @@
#include <linux/init.h>
#include <linux/bitmap.h>
+#include <linux/debugfs.h>
#include <linux/slab.h>
#include <linux/irq.h>
#include <linux/interrupt.h>
@@ -31,6 +32,7 @@
#include <linux/dmar.h>
#include <linux/dma-mapping.h>
#include <linux/mempool.h>
+#include <linux/timer.h>
#include "iova.h"
#include "intel-iommu.h"
#include <asm/proto.h> /* force_iommu in this header in x86-64*/
@@ -51,11 +53,37 @@
#define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1)
+
+static void flush_unmaps_timeout(unsigned long data);
+
+DEFINE_TIMER(unmap_timer, flush_unmaps_timeout, 0, 0);
+
+static struct intel_iommu *g_iommus;
+
+#define HIGH_WATER_MARK 250
+struct deferred_flush_tables {
+ int next;
+ struct iova *iova[HIGH_WATER_MARK];
+ struct dmar_domain *domain[HIGH_WATER_MARK];
+};
+
+static struct deferred_flush_tables *deferred_flush;
+
+/* bitmap for indexing intel_iommus */
+static int g_num_of_iommus;
+
+static DEFINE_SPINLOCK(async_umap_flush_lock);
+static LIST_HEAD(unmaps_to_do);
+
+static int timer_on;
+static long list_size;
+
static void domain_remove_dev_info(struct dmar_domain *domain);
static int dmar_disabled;
static int __initdata dmar_map_gfx = 1;
static int dmar_forcedac;
+static int intel_iommu_strict;
#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
static DEFINE_SPINLOCK(device_domain_lock);
@@ -74,9 +102,13 @@ static int __init intel_iommu_setup(char *str)
printk(KERN_INFO
"Intel-IOMMU: disable GFX device mapping\n");
} else if (!strncmp(str, "forcedac", 8)) {
- printk (KERN_INFO
+ printk(KERN_INFO
"Intel-IOMMU: Forcing DAC for PCI devices\n");
dmar_forcedac = 1;
+ } else if (!strncmp(str, "strict", 6)) {
+ printk(KERN_INFO
+ "Intel-IOMMU: disable batched IOTLB flush\n");
+ intel_iommu_strict = 1;
}
str += strcspn(str, ",");
@@ -966,17 +998,13 @@ static int iommu_init_domains(struct intel_iommu *iommu)
set_bit(0, iommu->domain_ids);
return 0;
}
-
-static struct intel_iommu *alloc_iommu(struct dmar_drhd_unit *drhd)
+static struct intel_iommu *alloc_iommu(struct intel_iommu *iommu,
+ struct dmar_drhd_unit *drhd)
{
- struct intel_iommu *iommu;
int ret;
int map_size;
u32 ver;
- iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
- if (!iommu)
- return NULL;
iommu->reg = ioremap(drhd->reg_base_addr, PAGE_SIZE_4K);
if (!iommu->reg) {
printk(KERN_ERR "IOMMU: can't map the region\n");
@@ -1404,7 +1432,7 @@ static int dmar_pci_device_match(struct pci_dev *devices[], int cnt,
int index;
while (dev) {
- for (index = 0; index < cnt; index ++)
+ for (index = 0; index < cnt; index++)
if (dev == devices[index])
return 1;
@@ -1669,7 +1697,7 @@ int __init init_dmars(void)
struct dmar_rmrr_unit *rmrr;
struct pci_dev *pdev;
struct intel_iommu *iommu;
- int ret, unit = 0;
+ int i, ret, unit = 0;
/*
* for each drhd
@@ -1680,7 +1708,34 @@ int __init init_dmars(void)
for_each_drhd_unit(drhd) {
if (drhd->ignored)
continue;
- iommu = alloc_iommu(drhd);
+ g_num_of_iommus++;
+ /*
+ * lock not needed as this is only incremented in the single
+ * threaded kernel __init code path all other access are read
+ * only
+ */
+ }
+
+ g_iommus = kzalloc(g_num_of_iommus * sizeof(*iommu), GFP_KERNEL);
+ if (!g_iommus) {
+ ret = -ENOMEM;
+ goto error;
+ }
+
+ deferred_flush = kzalloc(g_num_of_iommus *
+ sizeof(struct deferred_flush_tables), GFP_KERNEL);
+ if (!deferred_flush) {
+ kfree(g_iommus);
+ ret = -ENOMEM;
+ goto error;
+ }
+
+ i = 0;
+ for_each_drhd_unit(drhd) {
+ if (drhd->ignored)
+ continue;
+ iommu = alloc_iommu(&g_iommus[i], drhd);
+ i++;
if (!iommu) {
ret = -ENOMEM;
goto error;
@@ -1713,7 +1768,6 @@ int __init init_dmars(void)
* endfor
*/
for_each_rmrr_units(rmrr) {
- int i;
for (i = 0; i < rmrr->devices_cnt; i++) {
pdev = rmrr->devices[i];
/* some BIOS lists non-exist devices in DMAR table */
@@ -1769,6 +1823,7 @@ error:
iommu = drhd->iommu;
free_iommu(iommu);
}
+ kfree(g_iommus);
return ret;
}
@@ -1850,32 +1905,31 @@ get_valid_domain_for_dev(struct pci_dev *pdev)
return domain;
}
-static dma_addr_t intel_map_single(struct device *hwdev, void *addr,
- size_t size, int dir)
+static dma_addr_t
+intel_map_single(struct device *hwdev, phys_addr_t paddr, size_t size, int dir)
{
struct pci_dev *pdev = to_pci_dev(hwdev);
- int ret;
struct dmar_domain *domain;
- unsigned long start_addr;
+ unsigned long start_paddr;
struct iova *iova;
int prot = 0;
+ int ret;
BUG_ON(dir == DMA_NONE);
if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
- return virt_to_bus(addr);
+ return paddr;
domain = get_valid_domain_for_dev(pdev);
if (!domain)
return 0;
- addr = (void *)virt_to_phys(addr);
- size = aligned_size((u64)addr, size);
+ size = aligned_size((u64)paddr, size);
iova = __intel_alloc_iova(hwdev, domain, size);
if (!iova)
goto error;
- start_addr = iova->pfn_lo << PAGE_SHIFT_4K;
+ start_paddr = iova->pfn_lo << PAGE_SHIFT_4K;
/*
* Check if DMAR supports zero-length reads on write only
@@ -1887,36 +1941,89 @@ static dma_addr_t intel_map_single(struct device *hwdev, void *addr,
if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
prot |= DMA_PTE_WRITE;
/*
- * addr - (addr + size) might be partial page, we should map the whole
+ * paddr - (paddr + size) might be partial page, we should map the whole
* page. Note: if two part of one page are separately mapped, we
- * might have two guest_addr mapping to the same host addr, but this
+ * might have two guest_addr mapping to the same host paddr, but this
* is not a big problem
*/
- ret = domain_page_mapping(domain, start_addr,
- ((u64)addr) & PAGE_MASK_4K, size, prot);
+ ret = domain_page_mapping(domain, start_paddr,
+ ((u64)paddr) & PAGE_MASK_4K, size, prot);
if (ret)
goto error;
pr_debug("Device %s request: %lx@%llx mapping: %lx@%llx, dir %d\n",
- pci_name(pdev), size, (u64)addr,
- size, (u64)start_addr, dir);
+ pci_name(pdev), size, (u64)paddr,
+ size, (u64)start_paddr, dir);
/* it's a non-present to present mapping */
ret = iommu_flush_iotlb_psi(domain->iommu, domain->id,
- start_addr, size >> PAGE_SHIFT_4K, 1);
+ start_paddr, size >> PAGE_SHIFT_4K, 1);
if (ret)
iommu_flush_write_buffer(domain->iommu);
- return (start_addr + ((u64)addr & (~PAGE_MASK_4K)));
+ return (start_paddr + ((u64)paddr & (~PAGE_MASK_4K)));
error:
if (iova)
__free_iova(&domain->iovad, iova);
printk(KERN_ERR"Device %s request: %lx@%llx dir %d --- failed\n",
- pci_name(pdev), size, (u64)addr, dir);
+ pci_name(pdev), size, (u64)paddr, dir);
return 0;
}
+static void flush_unmaps(void)
+{
+ int i, j;
+
+ timer_on = 0;
+
+ /* just flush them all */
+ for (i = 0; i < g_num_of_iommus; i++) {
+ if (deferred_flush[i].next) {
+ iommu_flush_iotlb_global(&g_iommus[i], 0);
+ for (j = 0; j < deferred_flush[i].next; j++) {
+ __free_iova(&deferred_flush[i].domain[j]->iovad,
+ deferred_flush[i].iova[j]);
+ }
+ deferred_flush[i].next = 0;
+ }
+ }
+
+ list_size = 0;
+}
+
+static void flush_unmaps_timeout(unsigned long data)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&async_umap_flush_lock, flags);
+ flush_unmaps();
+ spin_unlock_irqrestore(&async_umap_flush_lock, flags);
+}
+
+static void add_unmap(struct dmar_domain *dom, struct iova *iova)
+{
+ unsigned long flags;
+ int next, iommu_id;
+
+ spin_lock_irqsave(&async_umap_flush_lock, flags);
+ if (list_size == HIGH_WATER_MARK)
+ flush_unmaps();
+
+ iommu_id = dom->iommu - g_iommus;
+ next = deferred_flush[iommu_id].next;
+ deferred_flush[iommu_id].domain[next] = dom;
+ deferred_flush[iommu_id].iova[next] = iova;
+ deferred_flush[iommu_id].next++;
+
+ if (!timer_on) {
+ mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10));
+ timer_on = 1;
+ }
+ list_size++;
+ spin_unlock_irqrestore(&async_umap_flush_lock, flags);
+}
+
static void intel_unmap_single(struct device *dev, dma_addr_t dev_addr,
size_t size, int dir)
{
@@ -1944,13 +2051,19 @@ static void intel_unmap_single(struct device *dev, dma_addr_t dev_addr,
dma_pte_clear_range(domain, start_addr, start_addr + size);
/* free page tables */
dma_pte_free_pagetable(domain, start_addr, start_addr + size);
-
- if (iommu_flush_iotlb_psi(domain->iommu, domain->id, start_addr,
- size >> PAGE_SHIFT_4K, 0))
- iommu_flush_write_buffer(domain->iommu);
-
- /* free iova */
- __free_iova(&domain->iovad, iova);
+ if (intel_iommu_strict) {
+ if (iommu_flush_iotlb_psi(domain->iommu,
+ domain->id, start_addr, size >> PAGE_SHIFT_4K, 0))
+ iommu_flush_write_buffer(domain->iommu);
+ /* free iova */
+ __free_iova(&domain->iovad, iova);
+ } else {
+ add_unmap(domain, iova);
+ /*
+ * queue up the release of the unmap to save the 1/6th of the
+ * cpu used up by the iotlb flush operation...
+ */
+ }
}
static void * intel_alloc_coherent(struct device *hwdev, size_t size,
@@ -1968,7 +2081,7 @@ static void * intel_alloc_coherent(struct device *hwdev, size_t size,
return NULL;
memset(vaddr, 0, size);
- *dma_handle = intel_map_single(hwdev, vaddr, size, DMA_BIDIRECTIONAL);
+ *dma_handle = intel_map_single(hwdev, virt_to_bus(vaddr), size, DMA_BIDIRECTIONAL);
if (*dma_handle)
return vaddr;
free_pages((unsigned long)vaddr, order);
@@ -2289,6 +2402,7 @@ int __init intel_iommu_init(void)
printk(KERN_INFO
"PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
+ init_timer(&unmap_timer);
force_iommu = 1;
dma_ops = &intel_dma_ops;
return 0;