12 files changed, 410 insertions, 147 deletions
diff --git a/drivers/gpu/drm/msm/Makefile b/drivers/gpu/drm/msm/Makefile
index e5fa12b0d21..ca62457e043 100644
--- a/drivers/gpu/drm/msm/Makefile
+++ b/drivers/gpu/drm/msm/Makefile
@@ -24,6 +24,7 @@ msm-y := \
 	msm_gem_prime.o \
 	msm_gem_submit.o \
 	msm_gpu.o \
+	msm_iommu.o \
 	msm_ringbuffer.o
 
 msm-$(CONFIG_DRM_MSM_FBDEV) += msm_fbdev.o
diff --git a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c
index d9e72a60080..16fe15d4f57 100644
--- a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c
@@ -426,7 +426,20 @@ struct msm_gpu *a3xx_gpu_init(struct drm_device *dev)
 	if (ret)
 		goto fail;
 
-	return &a3xx_gpu->base.base;
+	if (!gpu->mmu) {
+		/* TODO we think it is possible to configure the GPU to
+		 * restrict access to VRAM carveout.  But the required
+		 * registers are unknown.  For now just bail out and
+		 * limp along with just modesetting.  If it turns out
+		 * to not be possible to restrict access, then we must
+		 * implement a cmdstream validator.
+		 */
+		dev_err(dev->dev, "No memory protection without IOMMU\n");
+		ret = -ENXIO;
+		goto fail;
+	}
+
+	return gpu;
 
 fail:
 	if (a3xx_gpu)
diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
index d7bc51b385b..3f1c7b27e33 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
@@ -17,6 +17,7 @@
 
 #include "adreno_gpu.h"
 #include "msm_gem.h"
+#include "msm_mmu.h"
 
 struct adreno_info {
 	struct adreno_rev rev;
@@ -291,6 +292,7 @@ int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev,
 		struct adreno_gpu *gpu, const struct adreno_gpu_funcs *funcs,
 		struct adreno_rev rev)
 {
+	struct msm_mmu *mmu;
 	int i, ret;
 
 	/* identify gpu: */
@@ -338,10 +340,13 @@ int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev,
 	if (ret)
 		return ret;
 
-	ret = msm_iommu_attach(drm, gpu->base.iommu,
-			iommu_ports, ARRAY_SIZE(iommu_ports));
-	if (ret)
-		return ret;
+	mmu = gpu->base.mmu;
+	if (mmu) {
+		ret = mmu->funcs->attach(mmu, iommu_ports,
+				ARRAY_SIZE(iommu_ports));
+		if (ret)
+			return ret;
+	}
 
 	gpu->memptrs_bo = msm_gem_new(drm, sizeof(*gpu->memptrs),
 			MSM_BO_UNCACHED);
diff --git a/drivers/gpu/drm/msm/mdp4/mdp4_kms.c b/drivers/gpu/drm/msm/mdp4/mdp4_kms.c
index 8972ac35a43..bab8cbc6d79 100644
--- a/drivers/gpu/drm/msm/mdp4/mdp4_kms.c
+++ b/drivers/gpu/drm/msm/mdp4/mdp4_kms.c
@@ -17,6 +17,7 @@
 
 
 #include "msm_drv.h"
+#include "msm_mmu.h"
 #include "mdp4_kms.h"
 
 static struct mdp4_platform_config *mdp4_get_config(struct platform_device *dev);
@@ -260,6 +261,7 @@ struct msm_kms *mdp4_kms_init(struct drm_device *dev)
 	struct mdp4_platform_config *config = mdp4_get_config(pdev);
 	struct mdp4_kms *mdp4_kms;
 	struct msm_kms *kms = NULL;
+	struct msm_mmu *mmu;
 	int ret;
 
 	mdp4_kms = kzalloc(sizeof(*mdp4_kms), GFP_KERNEL);
@@ -322,12 +324,6 @@ struct msm_kms *mdp4_kms_init(struct drm_device *dev)
 	clk_set_rate(mdp4_kms->clk, config->max_clk);
 	clk_set_rate(mdp4_kms->lut_clk, config->max_clk);
 
-	if (!config->iommu) {
-		dev_err(dev->dev, "no iommu\n");
-		ret = -ENXIO;
-		goto fail;
-	}
-
 	/* make sure things are off before attaching iommu (bootloader could
 	 * have left things on, in which case we'll start getting faults if
 	 * we don't disable):
@@ -337,12 +333,23 @@ struct msm_kms *mdp4_kms_init(struct drm_device *dev)
 	mdp4_write(mdp4_kms, REG_MDP4_DSI_ENABLE, 0);
 	mdelay(16);
 
-	ret = msm_iommu_attach(dev, config->iommu,
-			iommu_ports, ARRAY_SIZE(iommu_ports));
-	if (ret)
-		goto fail;
+	if (config->iommu) {
+		mmu = msm_iommu_new(dev, config->iommu);
+		if (IS_ERR(mmu)) {
+			ret = PTR_ERR(mmu);
+			goto fail;
+		}
+		ret = mmu->funcs->attach(mmu, iommu_ports,
+				ARRAY_SIZE(iommu_ports));
+		if (ret)
+			goto fail;
+	} else {
+		dev_info(dev->dev, "no iommu, fallback to phys "
+				"contig buffers for scanout\n");
+		mmu = NULL;
+	}
 
-	mdp4_kms->id = msm_register_iommu(dev, config->iommu);
+	mdp4_kms->id = msm_register_mmu(dev, mmu);
 	if (mdp4_kms->id < 0) {
 		ret = mdp4_kms->id;
 		dev_err(dev->dev, "failed to register mdp4 iommu: %d\n", ret);
diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
index 28b57eb6f9a..a7f0c65ca65 100644
--- a/drivers/gpu/drm/msm/msm_drv.c
+++ b/drivers/gpu/drm/msm/msm_drv.c
@@ -30,50 +30,19 @@ static const struct drm_mode_config_funcs mode_config_funcs = {
 	.output_poll_changed = msm_fb_output_poll_changed,
 };
 
-static int msm_fault_handler(struct iommu_domain *iommu, struct device *dev,
-		unsigned long iova, int flags, void *arg)
-{
-	DBG("*** fault: iova=%08lx, flags=%d", iova, flags);
-	return 0;
-}
-
-int msm_register_iommu(struct drm_device *dev, struct iommu_domain *iommu)
+int msm_register_mmu(struct drm_device *dev, struct msm_mmu *mmu)
 {
 	struct msm_drm_private *priv = dev->dev_private;
-	int idx = priv->num_iommus++;
+	int idx = priv->num_mmus++;
 
-	if (WARN_ON(idx >= ARRAY_SIZE(priv->iommus)))
+	if (WARN_ON(idx >= ARRAY_SIZE(priv->mmus)))
 		return -EINVAL;
 
-	priv->iommus[idx] = iommu;
-
-	iommu_set_fault_handler(iommu, msm_fault_handler, dev);
-
-	/* need to iommu_attach_device() somewhere??  on resume?? */
+	priv->mmus[idx] = mmu;
 
 	return idx;
 }
 
-int msm_iommu_attach(struct drm_device *dev, struct iommu_domain *iommu,
-		const char **names, int cnt)
-{
-	int i, ret;
-
-	for (i = 0; i < cnt; i++) {
-		/* TODO maybe some day msm iommu won't require this hack: */
-		struct device *msm_iommu_get_ctx(const char *ctx_name);
-		struct device *ctx = msm_iommu_get_ctx(names[i]);
-		if (!ctx)
-			continue;
-		ret = iommu_attach_device(iommu, ctx);
-		if (ret) {
-			dev_warn(dev->dev, "could not attach iommu to %s", names[i]);
-			return ret;
-		}
-	}
-	return 0;
-}
-
 #ifdef CONFIG_DRM_MSM_REGISTER_LOGGING
 static bool reglog = false;
 MODULE_PARM_DESC(reglog, "Enable register read/write logging");
@@ -82,6 +51,10 @@ module_param(reglog, bool, 0600);
 #define reglog 0
 #endif
 
+static char *vram;
+MODULE_PARM_DESC(vram, "Configure VRAM size (for devices without IOMMU/GPUMMU");
+module_param(vram, charp, 0);
+
 void __iomem *msm_ioremap(struct platform_device *pdev, const char *name,
 		const char *dbgname)
 {
@@ -161,6 +134,14 @@ static int msm_unload(struct drm_device *dev)
 		mutex_unlock(&dev->struct_mutex);
 	}
 
+	if (priv->vram.paddr) {
+		DEFINE_DMA_ATTRS(attrs);
+		dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &attrs);
+		drm_mm_takedown(&priv->vram.mm);
+		dma_free_attrs(dev->dev, priv->vram.size, NULL,
+				priv->vram.paddr, &attrs);
+	}
+
 	dev->dev_private = NULL;
 
 	kfree(priv);
@@ -191,6 +172,41 @@ static int msm_load(struct drm_device *dev, unsigned long flags)
 
 	drm_mode_config_init(dev);
 
+	/* if we have no IOMMU, then we need to use carveout allocator.
+	 * Grab the entire CMA chunk carved out in early startup in
+	 * mach-msm:
+	 */
+	if (!iommu_present(&platform_bus_type)) {
+		DEFINE_DMA_ATTRS(attrs);
+		unsigned long size;
+		void *p;
+
+		DBG("using %s VRAM carveout", vram);
+		size = memparse(vram, NULL);
+		priv->vram.size = size;
+
+		drm_mm_init(&priv->vram.mm, 0, (size >> PAGE_SHIFT) - 1);
+
+		dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &attrs);
+		dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
+
+		/* note that for no-kernel-mapping, the vaddr returned
+		 * is bogus, but non-null if allocation succeeded:
+		 */
+		p = dma_alloc_attrs(dev->dev, size,
+				&priv->vram.paddr, 0, &attrs);
+		if (!p) {
+			dev_err(dev->dev, "failed to allocate VRAM\n");
+			priv->vram.paddr = 0;
+			ret = -ENOMEM;
+			goto fail;
+		}
+
+		dev_info(dev->dev, "VRAM: %08x->%08x\n",
+				(uint32_t)priv->vram.paddr,
+				(uint32_t)(priv->vram.paddr + size));
+	}
+
 	kms = mdp4_kms_init(dev);
 	if (IS_ERR(kms)) {
 		/*
@@ -778,6 +794,7 @@ static const struct dev_pm_ops msm_pm_ops = {
 
 static int msm_pdev_probe(struct platform_device *pdev)
 {
+	pdev->dev.coherent_dma_mask = DMA_BIT_MASK(32);
 	return drm_platform_init(&msm_driver, pdev);
 }
 
diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h
index a4c52cfe7fc..1d22d87a49f 100644
--- a/drivers/gpu/drm/msm/msm_drv.h
+++ b/drivers/gpu/drm/msm/msm_drv.h
@@ -53,6 +53,7 @@ static inline struct device *msm_iommu_get_ctx(const char *ctx_name)
 
 struct msm_kms;
 struct msm_gpu;
+struct msm_mmu;
 
 #define NUM_DOMAINS 2    /* one for KMS, then one per gpu core (?) */
 
@@ -85,9 +86,9 @@ struct msm_drm_private {
 	/* callbacks deferred until bo is inactive: */
 	struct list_head fence_cbs;
 
-	/* registered IOMMU domains: */
-	unsigned int num_iommus;
-	struct iommu_domain *iommus[NUM_DOMAINS];
+	/* registered MMUs: */
+	unsigned int num_mmus;
+	struct msm_mmu *mmus[NUM_DOMAINS];
 
 	unsigned int num_planes;
 	struct drm_plane *planes[8];
@@ -103,6 +104,16 @@ struct msm_drm_private {
 
 	unsigned int num_connectors;
 	struct drm_connector *connectors[8];
+
+	/* VRAM carveout, used when no IOMMU: */
+	struct {
+		unsigned long size;
+		dma_addr_t paddr;
+		/* NOTE: mm managed at the page level, size is in # of pages
+		 * and position mm_node->start is in # of pages:
+		 */
+		struct drm_mm mm;
+	} vram;
 };
 
 struct msm_format {
@@ -153,9 +164,7 @@ struct msm_kms {
 
 struct msm_kms *mdp4_kms_init(struct drm_device *dev);
 
-int msm_register_iommu(struct drm_device *dev, struct iommu_domain *iommu);
-int msm_iommu_attach(struct drm_device *dev, struct iommu_domain *iommu,
-		const char **names, int cnt);
+int msm_register_mmu(struct drm_device *dev, struct msm_mmu *mmu);
 
 int msm_wait_fence_interruptable(struct drm_device *dev, uint32_t fence,
 		struct timespec *timeout);
diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c
index e587d251c59..d8d60c969ac 100644
--- a/drivers/gpu/drm/msm/msm_gem.c
+++ b/drivers/gpu/drm/msm/msm_gem.c
@@ -22,7 +22,45 @@
 #include "msm_drv.h"
 #include "msm_gem.h"
 #include "msm_gpu.h"
+#include "msm_mmu.h"
 
+static dma_addr_t physaddr(struct drm_gem_object *obj)
+{
+	struct msm_gem_object *msm_obj = to_msm_bo(obj);
+	struct msm_drm_private *priv = obj->dev->dev_private;
+	return (((dma_addr_t)msm_obj->vram_node->start) << PAGE_SHIFT) +
+			priv->vram.paddr;
+}
+
+/* allocate pages from VRAM carveout, used when no IOMMU: */
+static struct page **get_pages_vram(struct drm_gem_object *obj,
+		int npages)
+{
+	struct msm_gem_object *msm_obj = to_msm_bo(obj);
+	struct msm_drm_private *priv = obj->dev->dev_private;
+	dma_addr_t paddr;
+	struct page **p;
+	int ret, i;
+
+	p = drm_malloc_ab(npages, sizeof(struct page *));
+	if (!p)
+		return ERR_PTR(-ENOMEM);
+
+	ret = drm_mm_insert_node(&priv->vram.mm, msm_obj->vram_node,
+			npages, 0, DRM_MM_SEARCH_DEFAULT);
+	if (ret) {
+		drm_free_large(p);
+		return ERR_PTR(ret);
+	}
+
+	paddr = physaddr(obj);
+	for (i = 0; i < npages; i++) {
+		p[i] = phys_to_page(paddr);
+		paddr += PAGE_SIZE;
+	}
+
+	return p;
+}
 
 /* called with dev->struct_mutex held */
 static struct page **get_pages(struct drm_gem_object *obj)
@@ -31,9 +69,14 @@ static struct page **get_pages(struct drm_gem_object *obj)
 
 	if (!msm_obj->pages) {
 		struct drm_device *dev = obj->dev;
-		struct page **p = drm_gem_get_pages(obj, 0);
+		struct page **p;
 		int npages = obj->size >> PAGE_SHIFT;
 
+		if (iommu_present(&platform_bus_type))
+			p = drm_gem_get_pages(obj, 0);
+		else
+			p = get_pages_vram(obj, npages);
+
 		if (IS_ERR(p)) {
 			dev_err(dev->dev, "could not get pages: %ld\n",
 					PTR_ERR(p));
@@ -73,7 +116,11 @@ static void put_pages(struct drm_gem_object *obj)
 		sg_free_table(msm_obj->sgt);
 		kfree(msm_obj->sgt);
 
-		drm_gem_put_pages(obj, msm_obj->pages, true, false);
+		if (iommu_present(&platform_bus_type))
+			drm_gem_put_pages(obj, msm_obj->pages, true, false);
+		else
+			drm_mm_remove_node(msm_obj->vram_node);
+
 		msm_obj->pages = NULL;
 	}
 }
@@ -138,7 +185,6 @@ int msm_gem_mmap(struct file *filp, struct vm_area_struct *vma)
 int msm_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
 	struct drm_gem_object *obj = vma->vm_private_data;
-	struct msm_gem_object *msm_obj = to_msm_bo(obj);
 	struct drm_device *dev = obj->dev;
 	struct page **pages;
 	unsigned long pfn;
@@ -163,7 +209,7 @@ int msm_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	pgoff = ((unsigned long)vmf->virtual_address -
 			vma->vm_start) >> PAGE_SHIFT;
 
-	pfn = page_to_pfn(msm_obj->pages[pgoff]);
+	pfn = page_to_pfn(pages[pgoff]);
 
 	VERB("Inserting %p pfn %lx, pa %lx", vmf->virtual_address,
 			pfn, pfn << PAGE_SHIFT);
@@ -219,67 +265,6 @@ uint64_t msm_gem_mmap_offset(struct drm_gem_object *obj)
 	return offset;
 }
 
-/* helpers for dealing w/ iommu: */
-static int map_range(struct iommu_domain *domain, unsigned int iova,
-		struct sg_table *sgt, unsigned int len, int prot)
-{
-	struct scatterlist *sg;
-	unsigned int da = iova;
-	unsigned int i, j;
-	int ret;
-
-	if (!domain || !sgt)
-		return -EINVAL;
-
-	for_each_sg(sgt->sgl, sg, sgt->nents, i) {
-		u32 pa = sg_phys(sg) - sg->offset;
-		size_t bytes = sg->length + sg->offset;
-
-		VERB("map[%d]: %08x %08x(%x)", i, iova, pa, bytes);
-
-		ret = iommu_map(domain, da, pa, bytes, prot);
-		if (ret)
-			goto fail;
-
-		da += bytes;
-	}
-
-	return 0;
-
-fail:
-	da = iova;
-
-	for_each_sg(sgt->sgl, sg, i, j) {
-		size_t bytes = sg->length + sg->offset;
-		iommu_unmap(domain, da, bytes);
-		da += bytes;
-	}
-	return ret;
-}
-
-static void unmap_range(struct iommu_domain *domain, unsigned int iova,
-		struct sg_table *sgt, unsigned int len)
-{
-	struct scatterlist *sg;
-	unsigned int da = iova;
-	int i;
-
-	for_each_sg(sgt->sgl, sg, sgt->nents, i) {
-		size_t bytes = sg->length + sg->offset;
-		size_t unmapped;
-
-		unmapped = iommu_unmap(domain, da, bytes);
-		if (unmapped < bytes)
-			break;
-
-		VERB("unmap[%d]: %08x(%x)", i, iova, bytes);
-
-		BUG_ON(!IS_ALIGNED(bytes, PAGE_SIZE));
-
-		da += bytes;
-	}
-}
-
 /* should be called under struct_mutex.. although it can be called
  * from atomic context without struct_mutex to acquire an extra
  * iova ref if you know one is already held.
@@ -295,15 +280,20 @@ int msm_gem_get_iova_locked(struct drm_gem_object *obj, int id,
 
 	if (!msm_obj->domain[id].iova) {
 		struct msm_drm_private *priv = obj->dev->dev_private;
-		uint32_t offset = (uint32_t)mmap_offset(obj);
-		struct page **pages;
-		pages = get_pages(obj);
+		struct msm_mmu *mmu = priv->mmus[id];
+		struct page **pages = get_pages(obj);
+
 		if (IS_ERR(pages))
 			return PTR_ERR(pages);
-		// XXX ideally we would not map buffers writable when not needed...
-		ret = map_range(priv->iommus[id], offset, msm_obj->sgt,
-				obj->size, IOMMU_READ | IOMMU_WRITE);
-		msm_obj->domain[id].iova = offset;
+
+		if (iommu_present(&platform_bus_type)) {
+			uint32_t offset = (uint32_t)mmap_offset(obj);
+			ret = mmu->funcs->map(mmu, offset, msm_obj->sgt,
+					obj->size, IOMMU_READ | IOMMU_WRITE);
+			msm_obj->domain[id].iova = offset;
+		} else {
+			msm_obj->domain[id].iova = physaddr(obj);
+		}
 	}
 
 	if (!ret)
@@ -514,6 +504,7 @@ void msm_gem_describe_objects(struct list_head *list, struct seq_file *m)
 void msm_gem_free_object(struct drm_gem_object *obj)
 {
 	struct drm_device *dev = obj->dev;
+	struct msm_drm_private *priv = obj->dev->dev_private;
 	struct msm_gem_object *msm_obj = to_msm_bo(obj);
 	int id;
 
@@ -525,11 +516,10 @@ void msm_gem_free_object(struct drm_gem_object *obj)
 	list_del(&msm_obj->mm_list);
 
 	for (id = 0; id < ARRAY_SIZE(msm_obj->domain); id++) {
-		if (msm_obj->domain[id].iova) {
-			struct msm_drm_private *priv = obj->dev->dev_private;
+		struct msm_mmu *mmu = priv->mmus[id];
+		if (mmu && msm_obj->domain[id].iova) {
 			uint32_t offset = (uint32_t)mmap_offset(obj);
-			unmap_range(priv->iommus[id], offset,
-					msm_obj->sgt, obj->size);
+			mmu->funcs->unmap(mmu, offset, msm_obj->sgt, obj->size);
 		}
 	}
 
@@ -591,6 +581,7 @@ static int msm_gem_new_impl(struct drm_device *dev,
 {
 	struct msm_drm_private *priv = dev->dev_private;
 	struct msm_gem_object *msm_obj;
+	unsigned sz;
 
 	switch (flags & MSM_BO_CACHE_MASK) {
 	case MSM_BO_UNCACHED:
@@ -603,10 +594,17 @@ static int msm_gem_new_impl(struct drm_device *dev,
 		return -EINVAL;
 	}
 
-	msm_obj = kzalloc(sizeof(*msm_obj), GFP_KERNEL);
+	sz = sizeof(*msm_obj);
+	if (!iommu_present(&platform_bus_type))
+		sz += sizeof(struct drm_mm_node);
+
+	msm_obj = kzalloc(sz, GFP_KERNEL);
 	if (!msm_obj)
 		return -ENOMEM;
 
+	if (!iommu_present(&platform_bus_type))
+		msm_obj->vram_node = (void *)&msm_obj[1];
+
 	msm_obj->flags = flags;
 
 	msm_obj->resv = &msm_obj->_resv;
@@ -623,7 +621,7 @@ static int msm_gem_new_impl(struct drm_device *dev,
 struct drm_gem_object *msm_gem_new(struct drm_device *dev,
 		uint32_t size, uint32_t flags)
 {
-	struct drm_gem_object *obj;
+	struct drm_gem_object *obj = NULL;
 	int ret;
 
 	WARN_ON(!mutex_is_locked(&dev->struct_mutex));
@@ -634,9 +632,13 @@ struct drm_gem_object *msm_gem_new(struct drm_device *dev,
 	if (ret)
 		goto fail;
 
-	ret = drm_gem_object_init(dev, obj, size);
-	if (ret)
-		goto fail;
+	if (iommu_present(&platform_bus_type)) {
+		ret = drm_gem_object_init(dev, obj, size);
+		if (ret)
+			goto fail;
+	} else {
+		drm_gem_private_object_init(dev, obj, size);
+	}
 
 	return obj;
 
@@ -654,6 +656,12 @@ struct drm_gem_object *msm_gem_import(struct drm_device *dev,
 	struct drm_gem_object *obj;
 	int ret, npages;
 
+	/* if we don't have IOMMU, don't bother pretending we can import: */
+	if (!iommu_present(&platform_bus_type)) {
+		dev_err(dev->dev, "cannot import without IOMMU\n");
+		return ERR_PTR(-EINVAL);
+	}
+
 	size = PAGE_ALIGN(size);
 
 	ret = msm_gem_new_impl(dev, size, MSM_BO_WC, &obj);
diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h
index f4f23a578d9..3246bb46c4f 100644
--- a/drivers/gpu/drm/msm/msm_gem.h
+++ b/drivers/gpu/drm/msm/msm_gem.h
@@ -57,6 +57,11 @@ struct msm_gem_object {
 	/* normally (resv == &_resv) except for imported bo's */
 	struct reservation_object *resv;
 	struct reservation_object _resv;
+
+	/* For physically contiguous buffers.  Used when we don't have
+	 * an IOMMU.
+	 */
+	struct drm_mm_node *vram_node;
 };
 #define to_msm_bo(x) container_of(x, struct msm_gem_object, base)
 
diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c
index 71f105f0d89..4ebce8be489 100644
--- a/drivers/gpu/drm/msm/msm_gpu.c
+++ b/drivers/gpu/drm/msm/msm_gpu.c
@@ -17,6 +17,7 @@
 
 #include "msm_gpu.h"
 #include "msm_gem.h"
+#include "msm_mmu.h"
 
 
 /*
@@ -353,6 +354,7 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev,
 		struct msm_gpu *gpu, const struct msm_gpu_funcs *funcs,
 		const char *name, const char *ioname, const char *irqname, int ringsz)
 {
+	struct iommu_domain *iommu;
 	int i, ret;
 
 	gpu->dev = drm;
@@ -418,13 +420,14 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev,
 	 * and have separate page tables per context.  For now, to keep things
 	 * simple and to get something working, just use a single address space:
 	 */
-	gpu->iommu = iommu_domain_alloc(&platform_bus_type);
-	if (!gpu->iommu) {
-		dev_err(drm->dev, "failed to allocate IOMMU\n");
-		ret = -ENOMEM;
-		goto fail;
+	iommu = iommu_domain_alloc(&platform_bus_type);
+	if (iommu) {
+		dev_info(drm->dev, "%s: using IOMMU\n", name);
+		gpu->mmu = msm_iommu_new(drm, iommu);
+	} else {
+		dev_info(drm->dev, "%s: no IOMMU, fallback to VRAM carveout!\n", name);
 	}
-	gpu->id = msm_register_iommu(drm, gpu->iommu);
+	gpu->id = msm_register_mmu(drm, gpu->mmu);
 
 	/* Create ringbuffer: */
 	gpu->rb = msm_ringbuffer_new(gpu, ringsz);
@@ -464,6 +467,6 @@ void msm_gpu_cleanup(struct msm_gpu *gpu)
 		msm_ringbuffer_destroy(gpu->rb);
 	}
 
-	if (gpu->iommu)
-		iommu_domain_free(gpu->iommu);
+	if (gpu->mmu)
+		gpu->mmu->funcs->destroy(gpu->mmu);
 }
diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h
index 08d08420cbc..458db8c64c2 100644
--- a/drivers/gpu/drm/msm/msm_gpu.h
+++ b/drivers/gpu/drm/msm/msm_gpu.h
@@ -78,7 +78,7 @@ struct msm_gpu {
 	void __iomem *mmio;
 	int irq;
 
-	struct iommu_domain *iommu;
+	struct msm_mmu *mmu;
 	int id;
 
 	/* Power Control: */
diff --git a/drivers/gpu/drm/msm/msm_iommu.c b/drivers/gpu/drm/msm/msm_iommu.c
new file mode 100644
index 00000000000..014a3fd04f6
--- /dev/null
+++ b/drivers/gpu/drm/msm/msm_iommu.c
@@ -0,0 +1,148 @@
+/*
+ * Copyright (C) 2013 Red Hat
+ * Author: Rob Clark <robdclark@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "msm_drv.h"
+#include "msm_mmu.h"
+
+struct msm_iommu {
+	struct msm_mmu base;
+	struct iommu_domain *domain;
+};
+#define to_msm_iommu(x) container_of(x, struct msm_iommu, base)
+
+static int msm_fault_handler(struct iommu_domain *iommu, struct device *dev,
+		unsigned long iova, int flags, void *arg)
+{
+	DBG("*** fault: iova=%08lx, flags=%d", iova, flags);
+	return 0;
+}
+
+static int msm_iommu_attach(struct msm_mmu *mmu, const char **names, int cnt)
+{
+	struct drm_device *dev = mmu->dev;
+	struct msm_iommu *iommu = to_msm_iommu(mmu);
+	int i, ret;
+
+	for (i = 0; i < cnt; i++) {
+		struct device *msm_iommu_get_ctx(const char *ctx_name);
+		struct device *ctx = msm_iommu_get_ctx(names[i]);
+		if (!ctx)
+			continue;
+		ret = iommu_attach_device(iommu->domain, ctx);
+		if (ret) {
+			dev_warn(dev->dev, "could not attach iommu to %s", names[i]);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+static int msm_iommu_map(struct msm_mmu *mmu, uint32_t iova,
+		struct sg_table *sgt, unsigned len, int prot)
+{
+	struct msm_iommu *iommu = to_msm_iommu(mmu);
+	struct iommu_domain *domain = iommu->domain;
+	struct scatterlist *sg;
+	unsigned int da = iova;
+	unsigned int i, j;
+	int ret;
+
+	if (!domain || !sgt)
+		return -EINVAL;
+
+	for_each_sg(sgt->sgl, sg, sgt->nents, i) {
+		u32 pa = sg_phys(sg) - sg->offset;
+		size_t bytes = sg->length + sg->offset;
+
+		VERB("map[%d]: %08x %08x(%x)", i, iova, pa, bytes);
+
+		ret = iommu_map(domain, da, pa, bytes, prot);
+		if (ret)
+			goto fail;
+
+		da += bytes;
+	}
+
+	return 0;
+
+fail:
+	da = iova;
+
+	for_each_sg(sgt->sgl, sg, i, j) {
+		size_t bytes = sg->length + sg->offset;
+		iommu_unmap(domain, da, bytes);
+		da += bytes;
+	}
+	return ret;
+}
+
+static int msm_iommu_unmap(struct msm_mmu *mmu, uint32_t iova,
+		struct sg_table *sgt, unsigned len)
+{
+	struct msm_iommu *iommu = to_msm_iommu(mmu);
+	struct iommu_domain *domain = iommu->domain;
+	struct scatterlist *sg;
+	unsigned int da = iova;
+	int i;
+
+	for_each_sg(sgt->sgl, sg, sgt->nents, i) {
+		size_t bytes = sg->length + sg->offset;
+		size_t unmapped;
+
+		unmapped = iommu_unmap(domain, da, bytes);
+		if (unmapped < bytes)
+			return unmapped;
+
+		VERB("unmap[%d]: %08x(%x)", i, iova, bytes);
+
+		BUG_ON(!IS_ALIGNED(bytes, PAGE_SIZE));
+
+		da += bytes;
+	}
+
+	return 0;
+}
+
+static void msm_iommu_destroy(struct msm_mmu *mmu)
+{
+	struct msm_iommu *iommu = to_msm_iommu(mmu);
+	iommu_domain_free(iommu->domain);
+	kfree(iommu);
+}
+
+static const struct msm_mmu_funcs funcs = {
+		.attach = msm_iommu_attach,
+		.map = msm_iommu_map,
+		.unmap = msm_iommu_unmap,
+		.destroy = msm_iommu_destroy,
+};
+
+struct msm_mmu *msm_iommu_new(struct drm_device *dev, struct iommu_domain *domain)
+{
+	struct msm_iommu *iommu;
+
+	iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
+	if (!iommu)
+		return ERR_PTR(-ENOMEM);
+
+	iommu->domain = domain;
+	msm_mmu_init(&iommu->base, dev, &funcs);
+	iommu_set_fault_handler(domain, msm_fault_handler, dev);
+
+	return &iommu->base;
+}
diff --git a/drivers/gpu/drm/msm/msm_mmu.h b/drivers/gpu/drm/msm/msm_mmu.h
new file mode 100644
index 00000000000..030324482b4
--- /dev/null
+++ b/drivers/gpu/drm/msm/msm_mmu.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (C) 2013 Red Hat
+ * Author: Rob Clark <robdclark@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __MSM_MMU_H__
+#define __MSM_MMU_H__
+
+#include <linux/iommu.h>
+
+struct msm_mmu_funcs {
+	int (*attach)(struct msm_mmu *mmu, const char **names, int cnt);
+	int (*map)(struct msm_mmu *mmu, uint32_t iova, struct sg_table *sgt,
+			unsigned len, int prot);
+	int (*unmap)(struct msm_mmu *mmu, uint32_t iova, struct sg_table *sgt,
+			unsigned len);
+	void (*destroy)(struct msm_mmu *mmu);
+};
+
+struct msm_mmu {
+	const struct msm_mmu_funcs *funcs;
+	struct drm_device *dev;
+};
+
+static inline void msm_mmu_init(struct msm_mmu *mmu, struct drm_device *dev,
+		const struct msm_mmu_funcs *funcs)
+{
+	mmu->dev = dev;
+	mmu->funcs = funcs;
+}
+
+struct msm_mmu *msm_iommu_new(struct drm_device *dev, struct iommu_domain *domain);
+struct msm_mmu *msm_gpummu_new(struct drm_device *dev, struct msm_gpu *gpu);
+
+#endif /* __MSM_MMU_H__ */