From 6bcd8d3c782b7b2c98c8f414a6bb43cf6b84e53c Mon Sep 17 00:00:00 2001
From: Thomas Hellstrom <thellstrom@vmware.com>
Date: Thu, 1 Sep 2011 20:18:42 +0000
Subject: vmwgfx: Fix confusion caused by using "fence" in various places

This is needed before we introduce the fence objects.
Otherwise this will be even more confusing. The plan is to use the following:

seqno: A 32-bit sequence number that may be passed in the fifo.
marker: Objects, carrying a seqno, that track fifo submission time. They
are used for fifo lag based throttling.
fence objects: Kernel space objects, possibly accessible from user-space and
carrying a 32-bit seqno together with signaled status.

Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
Reviewed-by: Jakob Bornecrantz <jakob@vmware.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c')

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c b/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c
index 87e43e0733b..72d95617bc5 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c
@@ -295,18 +295,18 @@ static int vmw_sync_obj_flush(void *sync_obj, void *sync_arg)
 static bool vmw_sync_obj_signaled(void *sync_obj, void *sync_arg)
 {
 	struct vmw_private *dev_priv = (struct vmw_private *)sync_arg;
-	uint32_t sequence = (unsigned long) sync_obj;
+	uint32_t seqno = (unsigned long) sync_obj;
 
-	return vmw_fence_signaled(dev_priv, sequence);
+	return vmw_seqno_passed(dev_priv, seqno);
 }
 
 static int vmw_sync_obj_wait(void *sync_obj, void *sync_arg,
 			     bool lazy, bool interruptible)
 {
 	struct vmw_private *dev_priv = (struct vmw_private *)sync_arg;
-	uint32_t sequence = (unsigned long) sync_obj;
+	uint32_t seqno = (unsigned long) sync_obj;
 
-	return vmw_wait_fence(dev_priv, false, sequence, false, 3*HZ);
+	return vmw_wait_seqno(dev_priv, false, seqno, false, 3*HZ);
 }
 
 struct ttm_bo_driver vmw_bo_driver = {
-- 
cgit v1.2.3-70-g09d2


From ae2a104058e217548215bfe6c6c8a98752139c29 Mon Sep 17 00:00:00 2001
From: Thomas Hellstrom <thellstrom@vmware.com>
Date: Thu, 1 Sep 2011 20:18:44 +0000
Subject: vmwgfx: Implement fence objects

Will be needed for queries and drm event-driven throttling.

As a benefit, they help avoid stale user-space fence handles.

Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
Reviewed-by: Jakob Bornecrantz <jakob@vmware.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/vmwgfx/Makefile          |   3 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c   |  26 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.c      |  28 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.h      |  16 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c  | 112 ++++--
 drivers/gpu/drm/vmwgfx/vmwgfx_fence.c    | 619 +++++++++++++++++++++++++++++++
 drivers/gpu/drm/vmwgfx/vmwgfx_fence.h    | 105 ++++++
 drivers/gpu/drm/vmwgfx/vmwgfx_irq.c      |  35 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_resource.c |   4 -
 include/drm/vmwgfx_drm.h                 | 149 +++++++-
 10 files changed, 1010 insertions(+), 87 deletions(-)
 create mode 100644 drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
 create mode 100644 drivers/gpu/drm/vmwgfx/vmwgfx_fence.h

(limited to 'drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c')

diff --git a/drivers/gpu/drm/vmwgfx/Makefile b/drivers/gpu/drm/vmwgfx/Makefile
index f41e8b49997..7d8e9d5d498 100644
--- a/drivers/gpu/drm/vmwgfx/Makefile
+++ b/drivers/gpu/drm/vmwgfx/Makefile
@@ -4,6 +4,7 @@ ccflags-y := -Iinclude/drm
 vmwgfx-y := vmwgfx_execbuf.o vmwgfx_gmr.o vmwgfx_kms.o vmwgfx_drv.o \
 	    vmwgfx_fb.o vmwgfx_ioctl.o vmwgfx_resource.o vmwgfx_buffer.o \
 	    vmwgfx_fifo.o vmwgfx_irq.o vmwgfx_ldu.o vmwgfx_ttm_glue.o \
-	    vmwgfx_overlay.o vmwgfx_marker.o vmwgfx_gmrid_manager.o
+	    vmwgfx_overlay.o vmwgfx_marker.o vmwgfx_gmrid_manager.o \
+	    vmwgfx_fence.o
 
 obj-$(CONFIG_DRM_VMWGFX) := vmwgfx.o
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c b/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c
index 72d95617bc5..5d665ce8cbe 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c
@@ -274,39 +274,39 @@ static int vmw_ttm_fault_reserve_notify(struct ttm_buffer_object *bo)
 
 static void *vmw_sync_obj_ref(void *sync_obj)
 {
-	return sync_obj;
+
+	return (void *)
+		vmw_fence_obj_reference((struct vmw_fence_obj *) sync_obj);
 }
 
 static void vmw_sync_obj_unref(void **sync_obj)
 {
-	*sync_obj = NULL;
+	vmw_fence_obj_unreference((struct vmw_fence_obj **) sync_obj);
 }
 
 static int vmw_sync_obj_flush(void *sync_obj, void *sync_arg)
 {
-	struct vmw_private *dev_priv = (struct vmw_private *)sync_arg;
-
-	mutex_lock(&dev_priv->hw_mutex);
-	vmw_write(dev_priv, SVGA_REG_SYNC, SVGA_SYNC_GENERIC);
-	mutex_unlock(&dev_priv->hw_mutex);
+	vmw_fence_obj_flush((struct vmw_fence_obj *) sync_obj);
 	return 0;
 }
 
 static bool vmw_sync_obj_signaled(void *sync_obj, void *sync_arg)
 {
-	struct vmw_private *dev_priv = (struct vmw_private *)sync_arg;
-	uint32_t seqno = (unsigned long) sync_obj;
+	unsigned long flags = (unsigned long) sync_arg;
+	return	vmw_fence_obj_signaled((struct vmw_fence_obj *) sync_obj,
+				       (uint32_t) flags);
 
-	return vmw_seqno_passed(dev_priv, seqno);
 }
 
 static int vmw_sync_obj_wait(void *sync_obj, void *sync_arg,
 			     bool lazy, bool interruptible)
 {
-	struct vmw_private *dev_priv = (struct vmw_private *)sync_arg;
-	uint32_t seqno = (unsigned long) sync_obj;
+	unsigned long flags = (unsigned long) sync_arg;
 
-	return vmw_wait_seqno(dev_priv, false, seqno, false, 3*HZ);
+	return vmw_fence_obj_wait((struct vmw_fence_obj *) sync_obj,
+				  (uint32_t) flags,
+				  lazy, interruptible,
+				  VMW_FENCE_WAIT_TIMEOUT);
 }
 
 struct ttm_bo_driver vmw_bo_driver = {
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index 4f65f1e34b8..d4829cbf326 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -82,12 +82,18 @@
 #define DRM_IOCTL_VMW_EXECBUF					\
 	DRM_IOW(DRM_COMMAND_BASE + DRM_VMW_EXECBUF,		\
 		struct drm_vmw_execbuf_arg)
+#define DRM_IOCTL_VMW_GET_3D_CAP				\
+	DRM_IOW(DRM_COMMAND_BASE + DRM_VMW_GET_3D_CAP,		\
+		 struct drm_vmw_get_3d_cap_arg)
 #define DRM_IOCTL_VMW_FENCE_WAIT				\
 	DRM_IOWR(DRM_COMMAND_BASE + DRM_VMW_FENCE_WAIT,		\
 		 struct drm_vmw_fence_wait_arg)
-#define DRM_IOCTL_VMW_GET_3D_CAP				\
-	DRM_IOW(DRM_COMMAND_BASE + DRM_VMW_GET_3D_CAP,		\
-		struct drm_vmw_get_3d_cap_arg)
+#define DRM_IOCTL_VMW_FENCE_SIGNALED				\
+	DRM_IOWR(DRM_COMMAND_BASE + DRM_VMW_FENCE_SIGNALED,	\
+		 struct drm_vmw_fence_signaled_arg)
+#define DRM_IOCTL_VMW_FENCE_UNREF				\
+	DRM_IOW(DRM_COMMAND_BASE + DRM_VMW_FENCE_UNREF,		\
+		 struct drm_vmw_fence_arg)
 
 /**
  * The core DRM version of this macro doesn't account for
@@ -131,7 +137,12 @@ static struct drm_ioctl_desc vmw_ioctls[] = {
 		      DRM_AUTH | DRM_UNLOCKED),
 	VMW_IOCTL_DEF(VMW_EXECBUF, vmw_execbuf_ioctl,
 		      DRM_AUTH | DRM_UNLOCKED),
-	VMW_IOCTL_DEF(VMW_FENCE_WAIT, vmw_fence_wait_ioctl,
+	VMW_IOCTL_DEF(VMW_FENCE_WAIT, vmw_fence_obj_wait_ioctl,
+		      DRM_AUTH | DRM_UNLOCKED),
+	VMW_IOCTL_DEF(VMW_FENCE_SIGNALED,
+		      vmw_fence_obj_signaled_ioctl,
+		      DRM_AUTH | DRM_UNLOCKED),
+	VMW_IOCTL_DEF(VMW_FENCE_UNREF, vmw_fence_obj_unref_ioctl,
 		      DRM_AUTH | DRM_UNLOCKED),
 	VMW_IOCTL_DEF(VMW_GET_3D_CAP, vmw_get_cap_3d_ioctl,
 		      DRM_AUTH | DRM_UNLOCKED),
@@ -198,12 +209,14 @@ static int vmw_request_device(struct vmw_private *dev_priv)
 		DRM_ERROR("Unable to initialize FIFO.\n");
 		return ret;
 	}
+	vmw_fence_fifo_up(dev_priv->fman);
 
 	return 0;
 }
 
 static void vmw_release_device(struct vmw_private *dev_priv)
 {
+	vmw_fence_fifo_down(dev_priv->fman);
 	vmw_fifo_release(dev_priv, &dev_priv->fifo);
 }
 
@@ -434,6 +447,10 @@ static int vmw_driver_load(struct drm_device *dev, unsigned long chipset)
 			goto out_no_device;
 		}
 	}
+
+	dev_priv->fman = vmw_fence_manager_init(dev_priv);
+	if (unlikely(dev_priv->fman == NULL))
+		goto out_no_fman;
 	ret = vmw_kms_init(dev_priv);
 	if (unlikely(ret != 0))
 		goto out_no_kms;
@@ -475,6 +492,8 @@ out_no_fifo:
 	vmw_overlay_close(dev_priv);
 	vmw_kms_close(dev_priv);
 out_no_kms:
+	vmw_fence_manager_takedown(dev_priv->fman);
+out_no_fman:
 	if (dev_priv->stealth)
 		pci_release_region(dev->pdev, 2);
 	else
@@ -518,6 +537,7 @@ static int vmw_driver_unload(struct drm_device *dev)
 	}
 	vmw_kms_close(dev_priv);
 	vmw_overlay_close(dev_priv);
+	vmw_fence_manager_takedown(dev_priv->fman);
 	if (dev_priv->stealth)
 		pci_release_region(dev->pdev, 2);
 	else
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
index 3018871aaaf..770f0636cee 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
@@ -38,6 +38,7 @@
 #include "ttm/ttm_lock.h"
 #include "ttm/ttm_execbuf_util.h"
 #include "ttm/ttm_module.h"
+#include "vmwgfx_fence.h"
 
 #define VMWGFX_DRIVER_DATE "20100927"
 #define VMWGFX_DRIVER_MAJOR 1
@@ -53,6 +54,11 @@
 #define VMW_PL_GMR TTM_PL_PRIV0
 #define VMW_PL_FLAG_GMR TTM_PL_FLAG_PRIV0
 
+#define VMW_RES_CONTEXT ttm_driver_type0
+#define VMW_RES_SURFACE ttm_driver_type1
+#define VMW_RES_STREAM ttm_driver_type2
+#define VMW_RES_FENCE ttm_driver_type3
+
 struct vmw_fpriv {
 	struct drm_master *locked_master;
 	struct ttm_object_file *tfile;
@@ -245,6 +251,7 @@ struct vmw_private {
 	atomic_t fifo_queue_waiters;
 	uint32_t last_read_seqno;
 	spinlock_t irq_lock;
+	struct vmw_fence_manager *fman;
 
 	/*
 	 * Device state
@@ -456,8 +463,6 @@ extern int vmw_irq_postinstall(struct drm_device *dev);
 extern void vmw_irq_uninstall(struct drm_device *dev);
 extern bool vmw_seqno_passed(struct vmw_private *dev_priv,
 				uint32_t seqno);
-extern int vmw_fence_wait_ioctl(struct drm_device *dev, void *data,
-				struct drm_file *file_priv);
 extern int vmw_fallback_wait(struct vmw_private *dev_priv,
 			     bool lazy,
 			     bool fifo_idle,
@@ -466,7 +471,8 @@ extern int vmw_fallback_wait(struct vmw_private *dev_priv,
 			     unsigned long timeout);
 extern void vmw_update_seqno(struct vmw_private *dev_priv,
 				struct vmw_fifo_state *fifo_state);
-
+extern void vmw_seqno_waiter_add(struct vmw_private *dev_priv);
+extern void vmw_seqno_waiter_remove(struct vmw_private *dev_priv);
 
 /**
  * Rudimentary fence-like objects currently used only for throttling -
@@ -572,4 +578,8 @@ static inline struct vmw_dma_buffer *vmw_dmabuf_reference(struct vmw_dma_buffer
 	return NULL;
 }
 
+static inline struct ttm_mem_global *vmw_mem_glob(struct vmw_private *dev_priv)
+{
+	return (struct ttm_mem_global *) dev_priv->mem_global_ref.object;
+}
 #endif
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
index be41484735b..d48ee89a519 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
@@ -256,7 +256,7 @@ static int vmw_translate_guest_ptr(struct vmw_private *dev_priv,
 		val_buf = &sw_context->val_bufs[cur_validate_node];
 		val_buf->bo = ttm_bo_reference(bo);
 		val_buf->usage = TTM_USAGE_READWRITE;
-		val_buf->new_sync_obj_arg = (void *) dev_priv;
+		val_buf->new_sync_obj_arg = (void *) DRM_VMW_FENCE_FLAG_EXEC;
 		list_add_tail(&val_buf->head, &sw_context->validate_nodes);
 		++sw_context->cur_val_buf;
 	}
@@ -321,7 +321,6 @@ static int vmw_cmd_wait_query(struct vmw_private *dev_priv,
 	return 0;
 }
 
-
 static int vmw_cmd_dma(struct vmw_private *dev_priv,
 		       struct vmw_sw_context *sw_context,
 		       SVGA3dCmdHeader *header)
@@ -676,6 +675,50 @@ static int vmw_resize_cmd_bounce(struct vmw_sw_context *sw_context,
 	return 0;
 }
 
+/**
+ * vmw_execbuf_fence_commands - create and submit a command stream fence
+ *
+ * Creates a fence object and submits a command stream marker.
+ * If this fails for some reason, We sync the fifo and return NULL.
+ * It is then safe to fence buffers with a NULL pointer.
+ */
+
+int vmw_execbuf_fence_commands(struct drm_file *file_priv,
+			       struct vmw_private *dev_priv,
+			       struct vmw_fence_obj **p_fence,
+			       uint32_t *p_handle)
+{
+	uint32_t sequence;
+	int ret;
+	bool synced = false;
+
+
+	ret = vmw_fifo_send_fence(dev_priv, &sequence);
+	if (unlikely(ret != 0)) {
+		DRM_ERROR("Fence submission error. Syncing.\n");
+		synced = true;
+	}
+
+	if (p_handle != NULL)
+		ret = vmw_user_fence_create(file_priv, dev_priv->fman,
+					    sequence,
+					    DRM_VMW_FENCE_FLAG_EXEC,
+					    p_fence, p_handle);
+	else
+		ret = vmw_fence_create(dev_priv->fman, sequence,
+				       DRM_VMW_FENCE_FLAG_EXEC,
+				       p_fence);
+
+	if (unlikely(ret != 0 && !synced)) {
+		(void) vmw_fallback_wait(dev_priv, false, false,
+					 sequence, false,
+					 VMW_FENCE_WAIT_TIMEOUT);
+		*p_fence = NULL;
+	}
+
+	return 0;
+}
+
 int vmw_execbuf_ioctl(struct drm_device *dev, void *data,
 		      struct drm_file *file_priv)
 {
@@ -686,9 +729,10 @@ int vmw_execbuf_ioctl(struct drm_device *dev, void *data,
 	int ret;
 	void *user_cmd;
 	void *cmd;
-	uint32_t seqno;
 	struct vmw_sw_context *sw_context = &dev_priv->ctx;
 	struct vmw_master *vmaster = vmw_master(file_priv->master);
+	struct vmw_fence_obj *fence;
+	uint32_t handle;
 
 	ret = ttm_read_lock(&vmaster->lock, true);
 	if (unlikely(ret != 0))
@@ -755,34 +799,60 @@ int vmw_execbuf_ioctl(struct drm_device *dev, void *data,
 	memcpy(cmd, sw_context->cmd_bounce, arg->command_size);
 	vmw_fifo_commit(dev_priv, arg->command_size);
 
-	ret = vmw_fifo_send_fence(dev_priv, &seqno);
-
-	ttm_eu_fence_buffer_objects(&sw_context->validate_nodes,
-				    (void *)(unsigned long) seqno);
-	vmw_clear_validations(sw_context);
-	mutex_unlock(&dev_priv->cmdbuf_mutex);
-
+	user_fence_rep = (struct drm_vmw_fence_rep __user *)
+		(unsigned long)arg->fence_rep;
+	ret = vmw_execbuf_fence_commands(file_priv, dev_priv,
+					 &fence,
+					 (user_fence_rep) ? &handle : NULL);
 	/*
 	 * This error is harmless, because if fence submission fails,
-	 * vmw_fifo_send_fence will sync.
+	 * vmw_fifo_send_fence will sync. The error will be propagated to
+	 * user-space in @fence_rep
 	 */
 
 	if (ret != 0)
 		DRM_ERROR("Fence submission error. Syncing.\n");
 
-	fence_rep.error = ret;
-	fence_rep.fence_seq = (uint64_t) seqno;
-	fence_rep.pad64 = 0;
+	ttm_eu_fence_buffer_objects(&sw_context->validate_nodes,
+				    (void *) fence);
 
-	user_fence_rep = (struct drm_vmw_fence_rep __user *)
-	    (unsigned long)arg->fence_rep;
+	vmw_clear_validations(sw_context);
+	mutex_unlock(&dev_priv->cmdbuf_mutex);
 
-	/*
-	 * copy_to_user errors will be detected by user space not
-	 * seeing fence_rep::error filled in.
-	 */
+	if (user_fence_rep) {
+		fence_rep.error = ret;
+		fence_rep.handle = handle;
+		fence_rep.seqno = fence->seqno;
+		vmw_update_seqno(dev_priv, &dev_priv->fifo);
+		fence_rep.passed_seqno = dev_priv->last_read_seqno;
+
+		/*
+		 * copy_to_user errors will be detected by user space not
+		 * seeing fence_rep::error filled in. Typically
+		 * user-space would have pre-set that member to -EFAULT.
+		 */
+		ret = copy_to_user(user_fence_rep, &fence_rep,
+				   sizeof(fence_rep));
+
+		/*
+		 * User-space lost the fence object. We need to sync
+		 * and unreference the handle.
+		 */
+		if (unlikely(ret != 0) && (fence_rep.error == 0)) {
+			BUG_ON(fence == NULL);
+
+			ttm_ref_object_base_unref(vmw_fpriv(file_priv)->tfile,
+						  handle, TTM_REF_USAGE);
+			DRM_ERROR("Fence copy error. Syncing.\n");
+			(void) vmw_fence_obj_wait(fence,
+						  fence->signal_mask,
+						  false, false,
+						  VMW_FENCE_WAIT_TIMEOUT);
+		}
+	}
 
-	ret = copy_to_user(user_fence_rep, &fence_rep, sizeof(fence_rep));
+	if (likely(fence != NULL))
+		vmw_fence_obj_unreference(&fence);
 
 	vmw_kms_cursor_post_execbuf(dev_priv);
 	ttm_read_unlock(&vmaster->lock);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
new file mode 100644
index 00000000000..5065a140fdf
--- /dev/null
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
@@ -0,0 +1,619 @@
+/**************************************************************************
+ *
+ * Copyright © 2011 VMware, Inc., Palo Alto, CA., USA
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "drmP.h"
+#include "vmwgfx_drv.h"
+
+#define VMW_FENCE_WRAP (1 << 31)
+
+struct vmw_fence_manager {
+	int num_fence_objects;
+	struct vmw_private *dev_priv;
+	spinlock_t lock;
+	u32 next_seqno;
+	struct list_head fence_list;
+	struct work_struct work;
+	u32 user_fence_size;
+	u32 fence_size;
+	bool fifo_down;
+	struct list_head cleanup_list;
+};
+
+struct vmw_user_fence {
+	struct ttm_base_object base;
+	struct vmw_fence_obj fence;
+};
+
+/**
+ * vmw_fence_destroy_locked
+ *
+ */
+
+static void vmw_fence_obj_destroy_locked(struct kref *kref)
+{
+	struct vmw_fence_obj *fence =
+		container_of(kref, struct vmw_fence_obj, kref);
+
+	struct vmw_fence_manager *fman = fence->fman;
+	unsigned int num_fences;
+
+	list_del_init(&fence->head);
+	num_fences = --fman->num_fence_objects;
+	spin_unlock_irq(&fman->lock);
+	if (fence->destroy)
+		fence->destroy(fence);
+	else
+		kfree(fence);
+
+	spin_lock_irq(&fman->lock);
+}
+
+
+/**
+ * Execute signal actions on fences recently signaled.
+ * This is done from a workqueue so we don't have to execute
+ * signal actions from atomic context.
+ */
+
+static void vmw_fence_work_func(struct work_struct *work)
+{
+	struct vmw_fence_manager *fman =
+		container_of(work, struct vmw_fence_manager, work);
+	struct list_head list;
+	struct vmw_fence_action *action, *next_action;
+
+	do {
+		INIT_LIST_HEAD(&list);
+		spin_lock_irq(&fman->lock);
+		list_splice_init(&fman->cleanup_list, &list);
+		spin_unlock_irq(&fman->lock);
+
+		if (list_empty(&list))
+			return;
+
+		/*
+		 * At this point, only we should be able to manipulate the
+		 * list heads of the actions we have on the private list.
+		 */
+
+		list_for_each_entry_safe(action, next_action, &list, head) {
+			list_del_init(&action->head);
+			action->cleanup(action);
+		}
+	} while (1);
+}
+
+struct vmw_fence_manager *vmw_fence_manager_init(struct vmw_private *dev_priv)
+{
+	struct vmw_fence_manager *fman = kzalloc(sizeof(*fman), GFP_KERNEL);
+
+	if (unlikely(fman == NULL))
+		return NULL;
+
+	fman->dev_priv = dev_priv;
+	spin_lock_init(&fman->lock);
+	INIT_LIST_HEAD(&fman->fence_list);
+	INIT_LIST_HEAD(&fman->cleanup_list);
+	INIT_WORK(&fman->work, &vmw_fence_work_func);
+	fman->fifo_down = true;
+	fman->user_fence_size = ttm_round_pot(sizeof(struct vmw_user_fence));
+	fman->fence_size = ttm_round_pot(sizeof(struct vmw_fence_obj));
+
+	return fman;
+}
+
+void vmw_fence_manager_takedown(struct vmw_fence_manager *fman)
+{
+	unsigned long irq_flags;
+	bool lists_empty;
+
+	(void) cancel_work_sync(&fman->work);
+
+	spin_lock_irqsave(&fman->lock, irq_flags);
+	lists_empty = list_empty(&fman->fence_list) &&
+		list_empty(&fman->cleanup_list);
+	spin_unlock_irqrestore(&fman->lock, irq_flags);
+
+	BUG_ON(!lists_empty);
+	kfree(fman);
+}
+
+static int vmw_fence_obj_init(struct vmw_fence_manager *fman,
+			      struct vmw_fence_obj *fence,
+			      u32 seqno,
+			      uint32_t mask,
+			      void (*destroy) (struct vmw_fence_obj *fence))
+{
+	unsigned long irq_flags;
+	unsigned int num_fences;
+	int ret = 0;
+
+	fence->seqno = seqno;
+	INIT_LIST_HEAD(&fence->seq_passed_actions);
+	fence->fman = fman;
+	fence->signaled = 0;
+	fence->signal_mask = mask;
+	kref_init(&fence->kref);
+	fence->destroy = destroy;
+	init_waitqueue_head(&fence->queue);
+
+	spin_lock_irqsave(&fman->lock, irq_flags);
+	if (unlikely(fman->fifo_down)) {
+		ret = -EBUSY;
+		goto out_unlock;
+	}
+	list_add_tail(&fence->head, &fman->fence_list);
+	num_fences = ++fman->num_fence_objects;
+
+out_unlock:
+	spin_unlock_irqrestore(&fman->lock, irq_flags);
+	return ret;
+
+}
+
+struct vmw_fence_obj *vmw_fence_obj_reference(struct vmw_fence_obj *fence)
+{
+	kref_get(&fence->kref);
+	return fence;
+}
+
+/**
+ * vmw_fence_obj_unreference
+ *
+ * Note that this function may not be entered with disabled irqs since
+ * it may re-enable them in the destroy function.
+ *
+ */
+void vmw_fence_obj_unreference(struct vmw_fence_obj **fence_p)
+{
+	struct vmw_fence_obj *fence = *fence_p;
+	struct vmw_fence_manager *fman = fence->fman;
+
+	*fence_p = NULL;
+	spin_lock_irq(&fman->lock);
+	BUG_ON(atomic_read(&fence->kref.refcount) == 0);
+	kref_put(&fence->kref, vmw_fence_obj_destroy_locked);
+	spin_unlock_irq(&fman->lock);
+}
+
+void vmw_fences_perform_actions(struct vmw_fence_manager *fman,
+				struct list_head *list)
+{
+	struct vmw_fence_action *action, *next_action;
+
+	list_for_each_entry_safe(action, next_action, list, head) {
+		list_del_init(&action->head);
+		if (action->seq_passed != NULL)
+			action->seq_passed(action);
+
+		/*
+		 * Add the cleanup action to the cleanup list so that
+		 * it will be performed by a worker task.
+		 */
+
+		if (action->cleanup != NULL)
+			list_add_tail(&action->head, &fman->cleanup_list);
+	}
+}
+
+void vmw_fences_update(struct vmw_fence_manager *fman, u32 seqno)
+{
+	unsigned long flags;
+	struct vmw_fence_obj *fence, *next_fence;
+	struct list_head action_list;
+
+	spin_lock_irqsave(&fman->lock, flags);
+	list_for_each_entry_safe(fence, next_fence, &fman->fence_list, head) {
+		if (seqno - fence->seqno < VMW_FENCE_WRAP) {
+			list_del_init(&fence->head);
+			fence->signaled |= DRM_VMW_FENCE_FLAG_EXEC;
+			INIT_LIST_HEAD(&action_list);
+			list_splice_init(&fence->seq_passed_actions,
+					 &action_list);
+			vmw_fences_perform_actions(fman, &action_list);
+			wake_up_all(&fence->queue);
+		}
+
+	}
+	if (!list_empty(&fman->cleanup_list))
+		(void) schedule_work(&fman->work);
+	spin_unlock_irqrestore(&fman->lock, flags);
+}
+
+
+bool vmw_fence_obj_signaled(struct vmw_fence_obj *fence,
+			    uint32_t flags)
+{
+	struct vmw_fence_manager *fman = fence->fman;
+	unsigned long irq_flags;
+	uint32_t signaled;
+
+	spin_lock_irqsave(&fman->lock, irq_flags);
+	signaled = fence->signaled;
+	spin_unlock_irqrestore(&fman->lock, irq_flags);
+
+	flags &= fence->signal_mask;
+	if ((signaled & flags) == flags)
+		return 1;
+
+	if ((signaled & DRM_VMW_FENCE_FLAG_EXEC) == 0) {
+		struct vmw_private *dev_priv = fman->dev_priv;
+		__le32 __iomem *fifo_mem = dev_priv->mmio_virt;
+		u32 seqno;
+
+		seqno = ioread32(fifo_mem + SVGA_FIFO_FENCE);
+		vmw_fences_update(fman, seqno);
+	}
+
+	spin_lock_irqsave(&fman->lock, irq_flags);
+	signaled = fence->signaled;
+	spin_unlock_irqrestore(&fman->lock, irq_flags);
+
+	return ((signaled & flags) == flags);
+}
+
+int vmw_fence_obj_wait(struct vmw_fence_obj *fence,
+		       uint32_t flags, bool lazy,
+		       bool interruptible, unsigned long timeout)
+{
+	struct vmw_private *dev_priv = fence->fman->dev_priv;
+	long ret;
+
+	if (likely(vmw_fence_obj_signaled(fence, flags)))
+		return 0;
+
+	vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC);
+	vmw_seqno_waiter_add(dev_priv);
+
+	if (interruptible)
+		ret = wait_event_interruptible_timeout
+			(fence->queue,
+			 vmw_fence_obj_signaled(fence, flags),
+			 timeout);
+	else
+		ret = wait_event_timeout
+			(fence->queue,
+			 vmw_fence_obj_signaled(fence, flags),
+			 timeout);
+
+	vmw_seqno_waiter_remove(dev_priv);
+
+	if (unlikely(ret == 0))
+		ret = -EBUSY;
+	else if (likely(ret > 0))
+		ret = 0;
+
+	return ret;
+}
+
+void vmw_fence_obj_flush(struct vmw_fence_obj *fence)
+{
+	struct vmw_private *dev_priv = fence->fman->dev_priv;
+
+	vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC);
+}
+
+static void vmw_fence_destroy(struct vmw_fence_obj *fence)
+{
+	struct vmw_fence_manager *fman = fence->fman;
+
+	kfree(fence);
+	/*
+	 * Free kernel space accounting.
+	 */
+	ttm_mem_global_free(vmw_mem_glob(fman->dev_priv),
+			    fman->fence_size);
+}
+
+int vmw_fence_create(struct vmw_fence_manager *fman,
+		     uint32_t seqno,
+		     uint32_t mask,
+		     struct vmw_fence_obj **p_fence)
+{
+	struct ttm_mem_global *mem_glob = vmw_mem_glob(fman->dev_priv);
+	struct vmw_fence_obj *fence;
+	int ret;
+
+	ret = ttm_mem_global_alloc(mem_glob, fman->fence_size,
+				   false, false);
+	if (unlikely(ret != 0))
+		return ret;
+
+	fence = kzalloc(sizeof(*fence), GFP_KERNEL);
+	if (unlikely(fence == NULL)) {
+		ret = -ENOMEM;
+		goto out_no_object;
+	}
+
+	ret = vmw_fence_obj_init(fman, fence, seqno, mask,
+				 vmw_fence_destroy);
+	if (unlikely(ret != 0))
+		goto out_err_init;
+
+	*p_fence = fence;
+	return 0;
+
+out_err_init:
+	kfree(fence);
+out_no_object:
+	ttm_mem_global_free(mem_glob, fman->fence_size);
+	return ret;
+}
+
+
+static void vmw_user_fence_destroy(struct vmw_fence_obj *fence)
+{
+	struct vmw_user_fence *ufence =
+		container_of(fence, struct vmw_user_fence, fence);
+	struct vmw_fence_manager *fman = fence->fman;
+
+	kfree(ufence);
+	/*
+	 * Free kernel space accounting.
+	 */
+	ttm_mem_global_free(vmw_mem_glob(fman->dev_priv),
+			    fman->user_fence_size);
+}
+
+static void vmw_user_fence_base_release(struct ttm_base_object **p_base)
+{
+	struct ttm_base_object *base = *p_base;
+	struct vmw_user_fence *ufence =
+		container_of(base, struct vmw_user_fence, base);
+	struct vmw_fence_obj *fence = &ufence->fence;
+
+	*p_base = NULL;
+	vmw_fence_obj_unreference(&fence);
+}
+
+int vmw_user_fence_create(struct drm_file *file_priv,
+			  struct vmw_fence_manager *fman,
+			  uint32_t seqno,
+			  uint32_t mask,
+			  struct vmw_fence_obj **p_fence,
+			  uint32_t *p_handle)
+{
+	struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile;
+	struct vmw_user_fence *ufence;
+	struct vmw_fence_obj *tmp;
+	struct ttm_mem_global *mem_glob = vmw_mem_glob(fman->dev_priv);
+	int ret;
+
+	/*
+	 * Kernel memory space accounting, since this object may
+	 * be created by a user-space request.
+	 */
+
+	ret = ttm_mem_global_alloc(mem_glob, fman->user_fence_size,
+				   false, false);
+	if (unlikely(ret != 0))
+		return ret;
+
+	ufence = kzalloc(sizeof(*ufence), GFP_KERNEL);
+	if (unlikely(ufence == NULL)) {
+		ret = -ENOMEM;
+		goto out_no_object;
+	}
+
+	ret = vmw_fence_obj_init(fman, &ufence->fence, seqno,
+				 mask, vmw_user_fence_destroy);
+	if (unlikely(ret != 0)) {
+		kfree(ufence);
+		goto out_no_object;
+	}
+
+	/*
+	 * The base object holds a reference which is freed in
+	 * vmw_user_fence_base_release.
+	 */
+	tmp = vmw_fence_obj_reference(&ufence->fence);
+	ret = ttm_base_object_init(tfile, &ufence->base, false,
+				   VMW_RES_FENCE,
+				   &vmw_user_fence_base_release, NULL);
+
+
+	if (unlikely(ret != 0)) {
+		/*
+		 * Free the base object's reference
+		 */
+		vmw_fence_obj_unreference(&tmp);
+		goto out_err;
+	}
+
+	*p_fence = &ufence->fence;
+	*p_handle = ufence->base.hash.key;
+
+	return 0;
+out_err:
+	tmp = &ufence->fence;
+	vmw_fence_obj_unreference(&tmp);
+out_no_object:
+	ttm_mem_global_free(mem_glob, fman->user_fence_size);
+	return ret;
+}
+
+
+/**
+ * vmw_fence_fifo_down - signal all unsignaled fence objects.
+ */
+
+void vmw_fence_fifo_down(struct vmw_fence_manager *fman)
+{
+	unsigned long irq_flags;
+	struct list_head action_list;
+	int ret;
+
+	/*
+	 * The list may be altered while we traverse it, so always
+	 * restart when we've released the fman->lock.
+	 */
+
+	spin_lock_irqsave(&fman->lock, irq_flags);
+	fman->fifo_down = true;
+	while (!list_empty(&fman->fence_list)) {
+		struct vmw_fence_obj *fence =
+			list_entry(fman->fence_list.prev, struct vmw_fence_obj,
+				   head);
+		kref_get(&fence->kref);
+		spin_unlock_irq(&fman->lock);
+
+		ret = vmw_fence_obj_wait(fence, fence->signal_mask,
+					 false, false,
+					 VMW_FENCE_WAIT_TIMEOUT);
+
+		if (unlikely(ret != 0)) {
+			list_del_init(&fence->head);
+			fence->signaled |= DRM_VMW_FENCE_FLAG_EXEC;
+			INIT_LIST_HEAD(&action_list);
+			list_splice_init(&fence->seq_passed_actions,
+					 &action_list);
+			vmw_fences_perform_actions(fman, &action_list);
+			wake_up_all(&fence->queue);
+		}
+
+		spin_lock_irq(&fman->lock);
+
+		BUG_ON(!list_empty(&fence->head));
+		kref_put(&fence->kref, vmw_fence_obj_destroy_locked);
+	}
+	spin_unlock_irqrestore(&fman->lock, irq_flags);
+}
+
+void vmw_fence_fifo_up(struct vmw_fence_manager *fman)
+{
+	unsigned long irq_flags;
+
+	spin_lock_irqsave(&fman->lock, irq_flags);
+	fman->fifo_down = false;
+	spin_unlock_irqrestore(&fman->lock, irq_flags);
+}
+
+
+int vmw_fence_obj_wait_ioctl(struct drm_device *dev, void *data,
+			     struct drm_file *file_priv)
+{
+	struct drm_vmw_fence_wait_arg *arg =
+	    (struct drm_vmw_fence_wait_arg *)data;
+	unsigned long timeout;
+	struct ttm_base_object *base;
+	struct vmw_fence_obj *fence;
+	struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile;
+	int ret;
+	uint64_t wait_timeout = ((uint64_t)arg->timeout_us * HZ);
+
+	/*
+	 * 64-bit division not present on 32-bit systems, so do an
+	 * approximation. (Divide by 1000000).
+	 */
+
+	wait_timeout = (wait_timeout >> 20) + (wait_timeout >> 24) -
+	  (wait_timeout >> 26);
+
+	if (!arg->cookie_valid) {
+		arg->cookie_valid = 1;
+		arg->kernel_cookie = jiffies + wait_timeout;
+	}
+
+	base = ttm_base_object_lookup(tfile, arg->handle);
+	if (unlikely(base == NULL)) {
+		printk(KERN_ERR "Wait invalid fence object handle "
+		       "0x%08lx.\n",
+		       (unsigned long)arg->handle);
+		return -EINVAL;
+	}
+
+	fence = &(container_of(base, struct vmw_user_fence, base)->fence);
+
+	timeout = jiffies;
+	if (time_after_eq(timeout, (unsigned long)arg->kernel_cookie)) {
+		ret = ((vmw_fence_obj_signaled(fence, arg->flags)) ?
+		       0 : -EBUSY);
+		goto out;
+	}
+
+	timeout = (unsigned long)arg->kernel_cookie - timeout;
+
+	ret = vmw_fence_obj_wait(fence, arg->flags, arg->lazy, true, timeout);
+
+out:
+	ttm_base_object_unref(&base);
+
+	/*
+	 * Optionally unref the fence object.
+	 */
+
+	if (ret == 0 && (arg->wait_options & DRM_VMW_WAIT_OPTION_UNREF))
+		return ttm_ref_object_base_unref(tfile, arg->handle,
+						 TTM_REF_USAGE);
+	return ret;
+}
+
+int vmw_fence_obj_signaled_ioctl(struct drm_device *dev, void *data,
+				 struct drm_file *file_priv)
+{
+	struct drm_vmw_fence_signaled_arg *arg =
+		(struct drm_vmw_fence_signaled_arg *) data;
+	struct ttm_base_object *base;
+	struct vmw_fence_obj *fence;
+	struct vmw_fence_manager *fman;
+	struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile;
+	struct vmw_private *dev_priv = vmw_priv(dev);
+
+	base = ttm_base_object_lookup(tfile, arg->handle);
+	if (unlikely(base == NULL)) {
+		printk(KERN_ERR "Fence signaled invalid fence object handle "
+		       "0x%08lx.\n",
+		       (unsigned long)arg->handle);
+		return -EINVAL;
+	}
+
+	fence = &(container_of(base, struct vmw_user_fence, base)->fence);
+	fman = fence->fman;
+
+	arg->signaled = vmw_fence_obj_signaled(fence, arg->flags);
+	spin_lock_irq(&fman->lock);
+
+	arg->signaled_flags = fence->signaled;
+	arg->passed_seqno = dev_priv->last_read_seqno;
+	spin_unlock_irq(&fman->lock);
+
+	ttm_base_object_unref(&base);
+
+	return 0;
+}
+
+
+int vmw_fence_obj_unref_ioctl(struct drm_device *dev, void *data,
+			      struct drm_file *file_priv)
+{
+	struct drm_vmw_fence_arg *arg =
+		(struct drm_vmw_fence_arg *) data;
+
+	return ttm_ref_object_base_unref(vmw_fpriv(file_priv)->tfile,
+					 arg->handle,
+					 TTM_REF_USAGE);
+}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.h b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.h
new file mode 100644
index 00000000000..93074064aaf
--- /dev/null
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.h
@@ -0,0 +1,105 @@
+/**************************************************************************
+ *
+ * Copyright © 2011 VMware, Inc., Palo Alto, CA., USA
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef _VMWGFX_FENCE_H_
+
+#define VMW_FENCE_WAIT_TIMEOUT (5*HZ)
+
+struct vmw_private;
+
+struct vmw_fence_manager;
+
+/**
+ *
+ *
+ */
+struct vmw_fence_action {
+	struct list_head head;
+	void (*seq_passed) (struct vmw_fence_action *action);
+	void (*cleanup) (struct vmw_fence_action *action);
+};
+
+struct vmw_fence_obj {
+	struct kref kref;
+	u32 seqno;
+
+	struct vmw_fence_manager *fman;
+	struct list_head head;
+	uint32_t signaled;
+	uint32_t signal_mask;
+	struct list_head seq_passed_actions;
+	void (*destroy)(struct vmw_fence_obj *fence);
+	wait_queue_head_t queue;
+};
+
+extern struct vmw_fence_manager *
+vmw_fence_manager_init(struct vmw_private *dev_priv);
+
+extern void vmw_fence_manager_takedown(struct vmw_fence_manager *fman);
+
+extern void vmw_fence_obj_unreference(struct vmw_fence_obj **fence_p);
+
+extern struct vmw_fence_obj *
+vmw_fence_obj_reference(struct vmw_fence_obj *fence);
+
+extern void vmw_fences_update(struct vmw_fence_manager *fman,
+			      u32 sequence);
+
+extern bool vmw_fence_obj_signaled(struct vmw_fence_obj *fence,
+				   uint32_t flags);
+
+extern int vmw_fence_obj_wait(struct vmw_fence_obj *fence, uint32_t flags,
+			      bool lazy,
+			      bool interruptible, unsigned long timeout);
+
+extern void vmw_fence_obj_flush(struct vmw_fence_obj *fence);
+
+extern int vmw_fence_create(struct vmw_fence_manager *fman,
+			    uint32_t seqno,
+			    uint32_t mask,
+			    struct vmw_fence_obj **p_fence);
+
+extern int vmw_user_fence_create(struct drm_file *file_priv,
+				 struct vmw_fence_manager *fman,
+				 uint32_t sequence,
+				 uint32_t mask,
+				 struct vmw_fence_obj **p_fence,
+				 uint32_t *p_handle);
+
+extern void vmw_fence_fifo_up(struct vmw_fence_manager *fman);
+
+extern void vmw_fence_fifo_down(struct vmw_fence_manager *fman);
+
+extern int vmw_fence_obj_wait_ioctl(struct drm_device *dev, void *data,
+				    struct drm_file *file_priv);
+
+extern int vmw_fence_obj_signaled_ioctl(struct drm_device *dev, void *data,
+					struct drm_file *file_priv);
+
+extern int vmw_fence_obj_unref_ioctl(struct drm_device *dev, void *data,
+				     struct drm_file *file_priv);
+#endif /* _VMWGFX_FENCE_H_ */
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c b/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c
index 13dde06b60b..a005292a890 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c
@@ -40,8 +40,13 @@ irqreturn_t vmw_irq_handler(DRM_IRQ_ARGS)
 	status = inl(dev_priv->io_start + VMWGFX_IRQSTATUS_PORT);
 	spin_unlock(&dev_priv->irq_lock);
 
-	if (status & SVGA_IRQFLAG_ANY_FENCE)
+	if (status & SVGA_IRQFLAG_ANY_FENCE) {
+		__le32 __iomem *fifo_mem = dev_priv->mmio_virt;
+		uint32_t seqno = ioread32(fifo_mem + SVGA_FIFO_FENCE);
+
+		vmw_fences_update(dev_priv->fman, seqno);
 		wake_up_all(&dev_priv->fence_queue);
+	}
 	if (status & SVGA_IRQFLAG_FIFO_PROGRESS)
 		wake_up_all(&dev_priv->fifo_queue);
 
@@ -68,12 +73,12 @@ void vmw_update_seqno(struct vmw_private *dev_priv,
 			 struct vmw_fifo_state *fifo_state)
 {
 	__le32 __iomem *fifo_mem = dev_priv->mmio_virt;
-
 	uint32_t seqno = ioread32(fifo_mem + SVGA_FIFO_FENCE);
 
 	if (dev_priv->last_read_seqno != seqno) {
 		dev_priv->last_read_seqno = seqno;
 		vmw_marker_pull(&fifo_state->marker_queue, seqno);
+		vmw_fences_update(dev_priv->fman, seqno);
 	}
 }
 
@@ -175,7 +180,7 @@ int vmw_fallback_wait(struct vmw_private *dev_priv,
 	return ret;
 }
 
-static void vmw_seqno_waiter_add(struct vmw_private *dev_priv)
+void vmw_seqno_waiter_add(struct vmw_private *dev_priv)
 {
 	mutex_lock(&dev_priv->hw_mutex);
 	if (dev_priv->fence_queue_waiters++ == 0) {
@@ -192,7 +197,7 @@ static void vmw_seqno_waiter_add(struct vmw_private *dev_priv)
 	mutex_unlock(&dev_priv->hw_mutex);
 }
 
-static void vmw_seqno_waiter_remove(struct vmw_private *dev_priv)
+void vmw_seqno_waiter_remove(struct vmw_private *dev_priv)
 {
 	mutex_lock(&dev_priv->hw_mutex);
 	if (--dev_priv->fence_queue_waiters == 0) {
@@ -286,25 +291,3 @@ void vmw_irq_uninstall(struct drm_device *dev)
 	status = inl(dev_priv->io_start + VMWGFX_IRQSTATUS_PORT);
 	outl(status, dev_priv->io_start + VMWGFX_IRQSTATUS_PORT);
 }
-
-#define VMW_FENCE_WAIT_TIMEOUT 3*HZ;
-
-int vmw_fence_wait_ioctl(struct drm_device *dev, void *data,
-			 struct drm_file *file_priv)
-{
-	struct drm_vmw_fence_wait_arg *arg =
-	    (struct drm_vmw_fence_wait_arg *)data;
-	unsigned long timeout;
-
-	if (!arg->cookie_valid) {
-		arg->cookie_valid = 1;
-		arg->kernel_cookie = jiffies + VMW_FENCE_WAIT_TIMEOUT;
-	}
-
-	timeout = jiffies;
-	if (time_after_eq(timeout, (unsigned long)arg->kernel_cookie))
-		return -EBUSY;
-
-	timeout = (unsigned long)arg->kernel_cookie - timeout;
-	return vmw_wait_seqno(vmw_priv(dev), true, arg->seqno, true, timeout);
-}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
index 4b53803d0fa..c1b6ffd4ce7 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
@@ -31,10 +31,6 @@
 #include "ttm/ttm_placement.h"
 #include "drmP.h"
 
-#define VMW_RES_CONTEXT ttm_driver_type0
-#define VMW_RES_SURFACE ttm_driver_type1
-#define VMW_RES_STREAM ttm_driver_type2
-
 struct vmw_user_context {
 	struct ttm_base_object base;
 	struct vmw_resource res;
diff --git a/include/drm/vmwgfx_drm.h b/include/drm/vmwgfx_drm.h
index c2b3909ac50..763a7a3885a 100644
--- a/include/drm/vmwgfx_drm.h
+++ b/include/drm/vmwgfx_drm.h
@@ -48,8 +48,12 @@
 #define DRM_VMW_UNREF_SURFACE        10
 #define DRM_VMW_REF_SURFACE          11
 #define DRM_VMW_EXECBUF              12
-#define DRM_VMW_FENCE_WAIT           13
-#define DRM_VMW_GET_3D_CAP           14
+#define DRM_VMW_GET_3D_CAP           13
+#define DRM_VMW_FENCE_WAIT           14
+#define DRM_VMW_FENCE_SIGNALED       15
+#define DRM_VMW_FENCE_UNREF          16
+#define DRM_VMW_FENCE_EVENT          17
+
 
 /*************************************************************************/
 /**
@@ -318,14 +322,23 @@ struct drm_vmw_execbuf_arg {
 	uint32_t command_size;
 	uint32_t throttle_us;
 	uint64_t fence_rep;
-	 uint32_t version;
-	 uint32_t flags;
+	uint32_t version;
+	uint32_t flags;
 };
 
 /**
  * struct drm_vmw_fence_rep
  *
- * @fence_seq: Fence seqno associated with a command submission.
+ * @handle: Fence object handle for fence associated with a command submission.
+ * @mask: Fence flags relevant for this fence object.
+ * @seqno: Fence sequence number in fifo. A fence object with a lower
+ * seqno will signal the EXEC flag before a fence object with a higher
+ * seqno. This can be used by user-space to avoid kernel calls to determine
+ * whether a fence has signaled the EXEC flag. Note that @seqno will
+ * wrap at 32-bit.
+ * @passed_seqno: The highest seqno number processed by the hardware
+ * so far. This can be used to mark user-space fence objects as signaled, and
+ * to determine whether a fence seqno might be stale.
  * @error: This member should've been set to -EFAULT on submission.
  * The following actions should be take on completion:
  * error == -EFAULT: Fence communication failed. The host is synchronized.
@@ -339,9 +352,12 @@ struct drm_vmw_execbuf_arg {
  */
 
 struct drm_vmw_fence_rep {
-	uint64_t fence_seq;
-	int32_t error;
+	uint32_t handle;
+	uint32_t mask;
+	uint32_t seqno;
+	uint32_t passed_seqno;
 	uint32_t pad64;
+	int32_t error;
 };
 
 /*************************************************************************/
@@ -430,14 +446,6 @@ struct drm_vmw_unref_dmabuf_arg {
 	uint32_t pad64;
 };
 
-
-struct drm_vmw_fence_wait_arg {
-	uint64_t seqno;
-	uint64_t kernel_cookie;
-	int32_t cookie_valid;
-	int32_t pad64;
-};
-
 /*************************************************************************/
 /**
  * DRM_VMW_CONTROL_STREAM - Control overlays, aka streams.
@@ -559,6 +567,7 @@ struct drm_vmw_stream_arg {
  * Return a single stream that was claimed by this process. Also makes
  * sure that the stream has been stopped.
  */
+
 /*************************************************************************/
 /**
  * DRM_VMW_GET_3D_CAP
@@ -607,4 +616,114 @@ struct drm_vmw_update_layout_arg {
 	uint64_t rects;
 };
 
+
+/*************************************************************************/
+/**
+ * DRM_VMW_FENCE_WAIT
+ *
+ * Waits for a fence object to signal. The wait is interruptible, so that
+ * signals may be delivered during the interrupt. The wait may timeout,
+ * in which case the calls returns -EBUSY. If the wait is restarted,
+ * that is restarting without resetting @cookie_valid to zero,
+ * the timeout is computed from the first call.
+ *
+ * The flags argument to the DRM_VMW_FENCE_WAIT ioctl indicates what to wait
+ * on:
+ * DRM_VMW_FENCE_FLAG_EXEC: All commands ahead of the fence in the command
+ * stream
+ * have executed.
+ * DRM_VMW_FENCE_FLAG_QUERY: All query results resulting from query finish
+ * commands
+ * in the buffer given to the EXECBUF ioctl returning the fence object handle
+ * are available to user-space.
+ *
+ * DRM_VMW_WAIT_OPTION_UNREF: If this wait option is given, and the
+ * fenc wait ioctl returns 0, the fence object has been unreferenced after
+ * the wait.
+ */
+
+#define DRM_VMW_FENCE_FLAG_EXEC   (1 << 0)
+#define DRM_VMW_FENCE_FLAG_QUERY  (1 << 1)
+
+#define DRM_VMW_WAIT_OPTION_UNREF (1 << 0)
+
+/**
+ * struct drm_vmw_fence_wait_arg
+ *
+ * @handle: Fence object handle as returned by the DRM_VMW_EXECBUF ioctl.
+ * @cookie_valid: Must be reset to 0 on first call. Left alone on restart.
+ * @kernel_cookie: Set to 0 on first call. Left alone on restart.
+ * @timeout_us: Wait timeout in microseconds. 0 for indefinite timeout.
+ * @lazy: Set to 1 if timing is not critical. Allow more than a kernel tick
+ * before returning.
+ * @flags: Fence flags to wait on.
+ * @wait_options: Options that control the behaviour of the wait ioctl.
+ *
+ * Input argument to the DRM_VMW_FENCE_WAIT ioctl.
+ */
+
+struct drm_vmw_fence_wait_arg {
+	uint32_t handle;
+	int32_t  cookie_valid;
+	uint64_t kernel_cookie;
+	uint64_t timeout_us;
+	int32_t lazy;
+	int32_t flags;
+	int32_t wait_options;
+	int32_t pad64;
+};
+
+/*************************************************************************/
+/**
+ * DRM_VMW_FENCE_SIGNALED
+ *
+ * Checks if a fence object is signaled..
+ */
+
+/**
+ * struct drm_vmw_fence_signaled_arg
+ *
+ * @handle: Fence object handle as returned by the DRM_VMW_EXECBUF ioctl.
+ * @flags: Fence object flags input to DRM_VMW_FENCE_SIGNALED ioctl
+ * @signaled: Out: Flags signaled.
+ * @sequence: Out: Highest sequence passed so far. Can be used to signal the
+ * EXEC flag of user-space fence objects.
+ *
+ * Input/Output argument to the DRM_VMW_FENCE_SIGNALED and DRM_VMW_FENCE_UNREF
+ * ioctls.
+ */
+
+struct drm_vmw_fence_signaled_arg {
+	 uint32_t handle;
+	 uint32_t flags;
+	 int32_t signaled;
+	 uint32_t passed_seqno;
+	 uint32_t signaled_flags;
+	 uint32_t pad64;
+};
+
+/*************************************************************************/
+/**
+ * DRM_VMW_FENCE_UNREF
+ *
+ * Unreferences a fence object, and causes it to be destroyed if there are no
+ * other references to it.
+ *
+ */
+
+/**
+ * struct drm_vmw_fence_arg
+ *
+ * @handle: Fence object handle as returned by the DRM_VMW_EXECBUF ioctl.
+ *
+ * Input/Output argument to the DRM_VMW_FENCE_UNREF ioctl..
+ */
+
+struct drm_vmw_fence_arg {
+	 uint32_t handle;
+	 uint32_t pad64;
+};
+
+
+
 #endif
-- 
cgit v1.2.3-70-g09d2


From d991ef0395596c4aeabcded322011d3f5fa9e74e Mon Sep 17 00:00:00 2001
From: Jakob Bornecrantz <jakob@vmware.com>
Date: Tue, 4 Oct 2011 20:13:21 +0200
Subject: vmwgfx: Add dmabuf helper functions for pinning

Signed-off-by: Jakob Bornecrantz <jakob@vmware.com>
Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/vmwgfx/Makefile         |   2 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c  |  33 ++++
 drivers/gpu/drm/vmwgfx/vmwgfx_dmabuf.c  | 292 ++++++++++++++++++++++++++++++++
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.h     |  32 +++-
 drivers/gpu/drm/vmwgfx/vmwgfx_fb.c      |  57 +------
 drivers/gpu/drm/vmwgfx/vmwgfx_kms.c     |   4 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c |  66 +++-----
 7 files changed, 379 insertions(+), 107 deletions(-)
 create mode 100644 drivers/gpu/drm/vmwgfx/vmwgfx_dmabuf.c

(limited to 'drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c')

diff --git a/drivers/gpu/drm/vmwgfx/Makefile b/drivers/gpu/drm/vmwgfx/Makefile
index 7d8e9d5d498..e13a118b2ee 100644
--- a/drivers/gpu/drm/vmwgfx/Makefile
+++ b/drivers/gpu/drm/vmwgfx/Makefile
@@ -5,6 +5,6 @@ vmwgfx-y := vmwgfx_execbuf.o vmwgfx_gmr.o vmwgfx_kms.o vmwgfx_drv.o \
 	    vmwgfx_fb.o vmwgfx_ioctl.o vmwgfx_resource.o vmwgfx_buffer.o \
 	    vmwgfx_fifo.o vmwgfx_irq.o vmwgfx_ldu.o vmwgfx_ttm_glue.o \
 	    vmwgfx_overlay.o vmwgfx_marker.o vmwgfx_gmrid_manager.o \
-	    vmwgfx_fence.o
+	    vmwgfx_fence.o vmwgfx_dmabuf.o
 
 obj-$(CONFIG_DRM_VMWGFX) := vmwgfx.o
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c b/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c
index 5d665ce8cbe..98a5d7e9054 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c
@@ -42,6 +42,10 @@ static uint32_t sys_placement_flags = TTM_PL_FLAG_SYSTEM |
 static uint32_t gmr_placement_flags = VMW_PL_FLAG_GMR |
 	TTM_PL_FLAG_CACHED;
 
+static uint32_t gmr_ne_placement_flags = VMW_PL_FLAG_GMR |
+	TTM_PL_FLAG_CACHED |
+	TTM_PL_FLAG_NO_EVICT;
+
 struct ttm_placement vmw_vram_placement = {
 	.fpfn = 0,
 	.lpfn = 0,
@@ -65,6 +69,20 @@ struct ttm_placement vmw_vram_gmr_placement = {
 	.busy_placement = &gmr_placement_flags
 };
 
+static uint32_t vram_gmr_ne_placement_flags[] = {
+	TTM_PL_FLAG_VRAM | TTM_PL_FLAG_CACHED | TTM_PL_FLAG_NO_EVICT,
+	VMW_PL_FLAG_GMR | TTM_PL_FLAG_CACHED | TTM_PL_FLAG_NO_EVICT
+};
+
+struct ttm_placement vmw_vram_gmr_ne_placement = {
+	.fpfn = 0,
+	.lpfn = 0,
+	.num_placement = 2,
+	.placement = vram_gmr_ne_placement_flags,
+	.num_busy_placement = 1,
+	.busy_placement = &gmr_ne_placement_flags
+};
+
 struct ttm_placement vmw_vram_sys_placement = {
 	.fpfn = 0,
 	.lpfn = 0,
@@ -92,6 +110,21 @@ struct ttm_placement vmw_sys_placement = {
 	.busy_placement = &sys_placement_flags
 };
 
+static uint32_t evictable_placement_flags[] = {
+	TTM_PL_FLAG_SYSTEM | TTM_PL_FLAG_CACHED,
+	TTM_PL_FLAG_VRAM | TTM_PL_FLAG_CACHED,
+	VMW_PL_FLAG_GMR | TTM_PL_FLAG_CACHED
+};
+
+struct ttm_placement vmw_evictable_placement = {
+	.fpfn = 0,
+	.lpfn = 0,
+	.num_placement = 3,
+	.placement = evictable_placement_flags,
+	.num_busy_placement = 1,
+	.busy_placement = &sys_placement_flags
+};
+
 struct vmw_ttm_backend {
 	struct ttm_backend backend;
 	struct page **pages;
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_dmabuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_dmabuf.c
new file mode 100644
index 00000000000..5668ad980cb
--- /dev/null
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_dmabuf.c
@@ -0,0 +1,292 @@
+/**************************************************************************
+ *
+ * Copyright © 2011 VMware, Inc., Palo Alto, CA., USA
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "ttm/ttm_placement.h"
+
+#include "drmP.h"
+#include "vmwgfx_drv.h"
+
+
+/**
+ * Validate a buffer to placement.
+ *
+ * May only be called by the current master as this function takes the
+ * its lock in write mode.
+ *
+ * Returns
+ *  -ERESTARTSYS if interrupted by a signal.
+ */
+int vmw_dmabuf_to_placement(struct vmw_private *dev_priv,
+			    struct vmw_dma_buffer *buf,
+			    struct ttm_placement *placement,
+			    bool interruptible)
+{
+	struct vmw_master *vmaster = dev_priv->active_master;
+	struct ttm_buffer_object *bo = &buf->base;
+	int ret;
+
+	ret = ttm_write_lock(&vmaster->lock, interruptible);
+	if (unlikely(ret != 0))
+		return ret;
+
+	ret = ttm_bo_reserve(bo, interruptible, false, false, 0);
+	if (unlikely(ret != 0))
+		goto err;
+
+	ret = ttm_bo_validate(bo, placement, interruptible, false, false);
+
+	ttm_bo_unreserve(bo);
+
+err:
+	ttm_write_unlock(&vmaster->lock);
+	return ret;
+}
+
+/**
+ * Move a buffer to vram or gmr.
+ *
+ * May only be called by the current master as this function takes the
+ * its lock in write mode.
+ *
+ * @dev_priv:  Driver private.
+ * @buf:  DMA buffer to move.
+ * @pin:  Pin buffer if true.
+ * @interruptible:  Use interruptible wait.
+ *
+ * Returns
+ * -ERESTARTSYS if interrupted by a signal.
+ */
+int vmw_dmabuf_to_vram_or_gmr(struct vmw_private *dev_priv,
+			      struct vmw_dma_buffer *buf,
+			      bool pin, bool interruptible)
+{
+	struct vmw_master *vmaster = dev_priv->active_master;
+	struct ttm_buffer_object *bo = &buf->base;
+	struct ttm_placement *placement;
+	int ret;
+
+	ret = ttm_write_lock(&vmaster->lock, interruptible);
+	if (unlikely(ret != 0))
+		return ret;
+
+	ret = ttm_bo_reserve(bo, interruptible, false, false, 0);
+	if (unlikely(ret != 0))
+		goto err;
+
+	/**
+	 * Put BO in VRAM if there is space, otherwise as a GMR.
+	 * If there is no space in VRAM and GMR ids are all used up,
+	 * start evicting GMRs to make room. If the DMA buffer can't be
+	 * used as a GMR, this will return -ENOMEM.
+	 */
+
+	if (pin)
+		placement = &vmw_vram_gmr_ne_placement;
+	else
+		placement = &vmw_vram_gmr_placement;
+
+	ret = ttm_bo_validate(bo, placement, interruptible, false, false);
+	if (likely(ret == 0) || ret == -ERESTARTSYS)
+		goto err_unreserve;
+
+
+	/**
+	 * If that failed, try VRAM again, this time evicting
+	 * previous contents.
+	 */
+
+	if (pin)
+		placement = &vmw_vram_ne_placement;
+	else
+		placement = &vmw_vram_placement;
+
+	ret = ttm_bo_validate(bo, placement, interruptible, false, false);
+
+err_unreserve:
+	ttm_bo_unreserve(bo);
+err:
+	ttm_write_unlock(&vmaster->lock);
+	return ret;
+}
+
+/**
+ * Move a buffer to vram.
+ *
+ * May only be called by the current master as this function takes the
+ * its lock in write mode.
+ *
+ * @dev_priv:  Driver private.
+ * @buf:  DMA buffer to move.
+ * @pin:  Pin buffer in vram if true.
+ * @interruptible:  Use interruptible wait.
+ *
+ * Returns
+ * -ERESTARTSYS if interrupted by a signal.
+ */
+int vmw_dmabuf_to_vram(struct vmw_private *dev_priv,
+		       struct vmw_dma_buffer *buf,
+		       bool pin, bool interruptible)
+{
+	struct ttm_placement *placement;
+
+	if (pin)
+		placement = &vmw_vram_ne_placement;
+	else
+		placement = &vmw_vram_placement;
+
+	return vmw_dmabuf_to_placement(dev_priv, buf,
+				       placement,
+				       interruptible);
+}
+
+/**
+ * Move a buffer to start of vram.
+ *
+ * May only be called by the current master as this function takes the
+ * its lock in write mode.
+ *
+ * @dev_priv:  Driver private.
+ * @buf:  DMA buffer to move.
+ * @pin:  Pin buffer in vram if true.
+ * @interruptible:  Use interruptible wait.
+ *
+ * Returns
+ * -ERESTARTSYS if interrupted by a signal.
+ */
+int vmw_dmabuf_to_start_of_vram(struct vmw_private *dev_priv,
+				struct vmw_dma_buffer *buf,
+				bool pin, bool interruptible)
+{
+	struct vmw_master *vmaster = dev_priv->active_master;
+	struct ttm_buffer_object *bo = &buf->base;
+	struct ttm_placement placement;
+	int ret = 0;
+
+	if (pin)
+		placement = vmw_vram_ne_placement;
+	else
+		placement = vmw_vram_placement;
+	placement.lpfn = bo->num_pages;
+
+	ret = ttm_write_lock(&vmaster->lock, interruptible);
+	if (unlikely(ret != 0))
+		return ret;
+
+	ret = ttm_bo_reserve(bo, interruptible, false, false, 0);
+	if (unlikely(ret != 0))
+		goto err_unlock;
+
+	/* Is this buffer already in vram but not at the start of it? */
+	if (bo->mem.mem_type == TTM_PL_VRAM &&
+	    bo->mem.start < bo->num_pages &&
+	    bo->mem.start > 0)
+		(void) ttm_bo_validate(bo, &vmw_sys_placement, false,
+				       false, false);
+
+	ret = ttm_bo_validate(bo, &placement, interruptible, false, false);
+
+	/* For some reason we didn't up at the start of vram */
+	WARN_ON(ret == 0 && bo->offset != 0);
+
+	ttm_bo_unreserve(bo);
+err_unlock:
+	ttm_write_unlock(&vmaster->lock);
+
+	return ret;
+}
+
+/**
+ * Unpin the buffer given buffer, does not move the buffer.
+ *
+ * May only be called by the current master as this function takes the
+ * its lock in write mode.
+ *
+ * @dev_priv:  Driver private.
+ * @buf:  DMA buffer to unpin.
+ * @interruptible:  Use interruptible wait.
+ *
+ * Returns
+ * -ERESTARTSYS if interrupted by a signal.
+ */
+int vmw_dmabuf_unpin(struct vmw_private *dev_priv,
+		     struct vmw_dma_buffer *buf,
+		     bool interruptible)
+{
+	/*
+	 * We could in theory early out if the buffer is
+	 * unpinned but we need to lock and reserve the buffer
+	 * anyways so we don't gain much by that.
+	 */
+	return vmw_dmabuf_to_placement(dev_priv, buf,
+				       &vmw_evictable_placement,
+				       interruptible);
+}
+
+/**
+ * Move a buffer to system memory, does not pin the buffer.
+ *
+ * May only be called by the current master as this function takes the
+ * its lock in write mode.
+ *
+ * @dev_priv:  Driver private.
+ * @buf:  DMA buffer to move.
+ * @interruptible:  Use interruptible wait.
+ *
+ * Returns
+ * -ERESTARTSYS if interrupted by a signal.
+ */
+int vmw_dmabuf_to_system(struct vmw_private *dev_priv,
+			 struct vmw_dma_buffer *buf,
+			 bool interruptible)
+{
+	return vmw_dmabuf_to_placement(dev_priv, buf,
+				       &vmw_sys_placement,
+				       interruptible);
+}
+
+void vmw_dmabuf_get_id_offset(struct vmw_dma_buffer *buf,
+			      uint32_t *gmrId, uint32_t *offset)
+{
+	if (buf->base.mem.mem_type == TTM_PL_VRAM) {
+		*gmrId = SVGA_GMR_FRAMEBUFFER;
+		*offset = buf->base.offset;
+	} else {
+		*gmrId = buf->base.mem.start;
+		*offset = 0;
+	}
+}
+
+void vmw_dmabuf_get_guest_ptr(struct vmw_dma_buffer *buf, SVGAGuestPtr *ptr)
+{
+	if (buf->base.mem.mem_type == TTM_PL_VRAM) {
+		ptr->gmrId = SVGA_GMR_FRAMEBUFFER;
+		ptr->offset = buf->base.offset;
+	} else {
+		ptr->gmrId = buf->base.mem.start;
+		ptr->offset = 0;
+	}
+}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
index edd1e8362f3..5acf1f2c498 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
@@ -385,10 +385,6 @@ extern uint32_t vmw_dmabuf_validate_node(struct ttm_buffer_object *bo,
 extern void vmw_dmabuf_validate_clear(struct ttm_buffer_object *bo);
 extern int vmw_user_dmabuf_lookup(struct ttm_object_file *tfile,
 				  uint32_t id, struct vmw_dma_buffer **out);
-extern int vmw_dmabuf_to_start_of_vram(struct vmw_private *vmw_priv,
-				       struct vmw_dma_buffer *bo);
-extern int vmw_dmabuf_from_vram(struct vmw_private *vmw_priv,
-				struct vmw_dma_buffer *bo);
 extern int vmw_stream_claim_ioctl(struct drm_device *dev, void *data,
 				  struct drm_file *file_priv);
 extern int vmw_stream_unref_ioctl(struct drm_device *dev, void *data,
@@ -398,6 +394,32 @@ extern int vmw_user_stream_lookup(struct vmw_private *dev_priv,
 				  uint32_t *inout_id,
 				  struct vmw_resource **out);
 
+/**
+ * DMA buffer helper routines - vmwgfx_dmabuf.c
+ */
+extern int vmw_dmabuf_to_placement(struct vmw_private *vmw_priv,
+				   struct vmw_dma_buffer *bo,
+				   struct ttm_placement *placement,
+				   bool interruptible);
+extern int vmw_dmabuf_to_vram(struct vmw_private *dev_priv,
+			      struct vmw_dma_buffer *buf,
+			      bool pin, bool interruptible);
+extern int vmw_dmabuf_to_vram_or_gmr(struct vmw_private *dev_priv,
+				     struct vmw_dma_buffer *buf,
+				     bool pin, bool interruptible);
+extern int vmw_dmabuf_to_start_of_vram(struct vmw_private *vmw_priv,
+				       struct vmw_dma_buffer *bo,
+				       bool pin, bool interruptible);
+extern int vmw_dmabuf_unpin(struct vmw_private *vmw_priv,
+			    struct vmw_dma_buffer *bo,
+			    bool interruptible);
+extern int vmw_dmabuf_to_system(struct vmw_private *vmw_priv,
+				struct vmw_dma_buffer *bo,
+				bool interruptible);
+extern void vmw_dmabuf_get_id_offset(struct vmw_dma_buffer *buf,
+				     uint32_t *gmrId, uint32_t *offset);
+extern void vmw_dmabuf_get_guest_ptr(struct vmw_dma_buffer *buf,
+				     SVGAGuestPtr *ptr);
 
 /**
  * Misc Ioctl functionality - vmwgfx_ioctl.c
@@ -440,7 +462,9 @@ extern struct ttm_placement vmw_vram_placement;
 extern struct ttm_placement vmw_vram_ne_placement;
 extern struct ttm_placement vmw_vram_sys_placement;
 extern struct ttm_placement vmw_vram_gmr_placement;
+extern struct ttm_placement vmw_vram_gmr_ne_placement;
 extern struct ttm_placement vmw_sys_placement;
+extern struct ttm_placement vmw_evictable_placement;
 extern struct ttm_bo_driver vmw_bo_driver;
 extern int vmw_dma_quiescent(struct drm_device *dev);
 
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c
index b1888e801e2..191f1b2a2a2 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c
@@ -592,58 +592,6 @@ int vmw_fb_close(struct vmw_private *vmw_priv)
 	return 0;
 }
 
-int vmw_dmabuf_from_vram(struct vmw_private *vmw_priv,
-			 struct vmw_dma_buffer *vmw_bo)
-{
-	struct ttm_buffer_object *bo = &vmw_bo->base;
-	int ret = 0;
-
-	ret = ttm_bo_reserve(bo, false, false, false, 0);
-	if (unlikely(ret != 0))
-		return ret;
-
-	ret = ttm_bo_validate(bo, &vmw_sys_placement, false, false, false);
-	ttm_bo_unreserve(bo);
-
-	return ret;
-}
-
-int vmw_dmabuf_to_start_of_vram(struct vmw_private *vmw_priv,
-				struct vmw_dma_buffer *vmw_bo)
-{
-	struct ttm_buffer_object *bo = &vmw_bo->base;
-	struct ttm_placement ne_placement = vmw_vram_ne_placement;
-	int ret = 0;
-
-	ne_placement.lpfn = bo->num_pages;
-
-	/* interuptable? */
-	ret = ttm_write_lock(&vmw_priv->active_master->lock, false);
-	if (unlikely(ret != 0))
-		return ret;
-
-	ret = ttm_bo_reserve(bo, false, false, false, 0);
-	if (unlikely(ret != 0))
-		goto err_unlock;
-
-	if (bo->mem.mem_type == TTM_PL_VRAM &&
-	    bo->mem.start < bo->num_pages &&
-	    bo->mem.start > 0)
-		(void) ttm_bo_validate(bo, &vmw_sys_placement, false,
-				       false, false);
-
-	ret = ttm_bo_validate(bo, &ne_placement, false, false, false);
-
-	/* Could probably bug on */
-	WARN_ON(bo->offset != 0);
-
-	ttm_bo_unreserve(bo);
-err_unlock:
-	ttm_write_unlock(&vmw_priv->active_master->lock);
-
-	return ret;
-}
-
 int vmw_fb_off(struct vmw_private *vmw_priv)
 {
 	struct fb_info *info;
@@ -665,7 +613,8 @@ int vmw_fb_off(struct vmw_private *vmw_priv)
 	par->bo_ptr = NULL;
 	ttm_bo_kunmap(&par->map);
 
-	vmw_dmabuf_from_vram(vmw_priv, par->vmw_bo);
+	/* move this to system instead of just unpinning it */
+	vmw_dmabuf_to_system(vmw_priv, par->vmw_bo, false);
 
 	return 0;
 }
@@ -691,7 +640,7 @@ int vmw_fb_on(struct vmw_private *vmw_priv)
 	/* Make sure that all overlays are stoped when we take over */
 	vmw_overlay_stop_all(vmw_priv);
 
-	ret = vmw_dmabuf_to_start_of_vram(vmw_priv, par->vmw_bo);
+	ret = vmw_dmabuf_to_start_of_vram(vmw_priv, par->vmw_bo, true, false);
 	if (unlikely(ret != 0)) {
 		DRM_ERROR("could not move buffer to start of VRAM\n");
 		goto err_no_buffer;
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
index c34866ab352..b3d5120b1f4 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
@@ -804,7 +804,7 @@ static int vmw_framebuffer_dmabuf_pin(struct vmw_framebuffer *vfb)
 
 	vmw_overlay_pause_all(dev_priv);
 
-	ret = vmw_dmabuf_to_start_of_vram(dev_priv, vfbd->buffer);
+	ret = vmw_dmabuf_to_start_of_vram(dev_priv, vfbd->buffer, true, false);
 
 	vmw_overlay_resume_all(dev_priv);
 
@@ -824,7 +824,7 @@ static int vmw_framebuffer_dmabuf_unpin(struct vmw_framebuffer *vfb)
 		return 0;
 	}
 
-	return vmw_dmabuf_from_vram(dev_priv, vfbd->buffer);
+	return vmw_dmabuf_unpin(dev_priv, vfbd->buffer, false);
 }
 
 static int vmw_kms_new_framebuffer_dmabuf(struct vmw_private *dev_priv,
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c b/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c
index 07ce02da78a..7a7abcdf102 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c
@@ -86,48 +86,6 @@ static inline void fill_flush(struct vmw_escape_video_flush *cmd,
 	cmd->flush.streamId = stream_id;
 }
 
-/**
- * Pin or unpin a buffer in vram.
- *
- * @dev_priv:  Driver private.
- * @buf:  DMA buffer to pin or unpin.
- * @pin:  Pin buffer in vram if true.
- * @interruptible:  Use interruptible wait.
- *
- * Takes the current masters ttm lock in read.
- *
- * Returns
- * -ERESTARTSYS if interrupted by a signal.
- */
-static int vmw_dmabuf_pin_in_vram(struct vmw_private *dev_priv,
-				  struct vmw_dma_buffer *buf,
-				  bool pin, bool interruptible)
-{
-	struct ttm_buffer_object *bo = &buf->base;
-	struct ttm_placement *overlay_placement = &vmw_vram_placement;
-	int ret;
-
-	ret = ttm_read_lock(&dev_priv->active_master->lock, interruptible);
-	if (unlikely(ret != 0))
-		return ret;
-
-	ret = ttm_bo_reserve(bo, interruptible, false, false, 0);
-	if (unlikely(ret != 0))
-		goto err;
-
-	if (pin)
-		overlay_placement = &vmw_vram_ne_placement;
-
-	ret = ttm_bo_validate(bo, overlay_placement, interruptible, false, false);
-
-	ttm_bo_unreserve(bo);
-
-err:
-	ttm_read_unlock(&dev_priv->active_master->lock);
-
-	return ret;
-}
-
 /**
  * Send put command to hw.
  *
@@ -247,6 +205,21 @@ static int vmw_overlay_send_stop(struct vmw_private *dev_priv,
 	return 0;
 }
 
+/**
+ * Move a buffer to vram, and pin it if @pin.
+ *
+ * XXX: This function is here to be changed at a later date.
+ */
+static int vmw_overlay_move_buffer(struct vmw_private *dev_priv,
+				   struct vmw_dma_buffer *buf,
+				   bool pin, bool inter)
+{
+	if (pin)
+		return vmw_dmabuf_to_vram(dev_priv, buf, true, inter);
+	else
+		return vmw_dmabuf_unpin(dev_priv, buf, inter);
+}
+
 /**
  * Stop or pause a stream.
  *
@@ -279,8 +252,8 @@ static int vmw_overlay_stop(struct vmw_private *dev_priv,
 			return ret;
 
 		/* We just remove the NO_EVICT flag so no -ENOMEM */
-		ret = vmw_dmabuf_pin_in_vram(dev_priv, stream->buf, false,
-					     interruptible);
+		ret = vmw_overlay_move_buffer(dev_priv, stream->buf, false,
+					      interruptible);
 		if (interruptible && ret == -ERESTARTSYS)
 			return ret;
 		else
@@ -342,7 +315,7 @@ static int vmw_overlay_update_stream(struct vmw_private *dev_priv,
 	/* We don't start the old stream if we are interrupted.
 	 * Might return -ENOMEM if it can't fit the buffer in vram.
 	 */
-	ret = vmw_dmabuf_pin_in_vram(dev_priv, buf, true, interruptible);
+	ret = vmw_overlay_move_buffer(dev_priv, buf, true, interruptible);
 	if (ret)
 		return ret;
 
@@ -351,7 +324,8 @@ static int vmw_overlay_update_stream(struct vmw_private *dev_priv,
 		/* This one needs to happen no matter what. We only remove
 		 * the NO_EVICT flag so this is safe from -ENOMEM.
 		 */
-		BUG_ON(vmw_dmabuf_pin_in_vram(dev_priv, buf, false, false) != 0);
+		BUG_ON(vmw_overlay_move_buffer(dev_priv, buf, false, false)
+		       != 0);
 		return ret;
 	}
 
-- 
cgit v1.2.3-70-g09d2


From 5bb39e818169783ee17ddbbefbd7bd16a4383fec Mon Sep 17 00:00:00 2001
From: Thomas Hellstrom <thellstrom@vmware.com>
Date: Tue, 4 Oct 2011 20:13:33 +0200
Subject: vmwgfx: Handle device surface memory limit

Make surfaces swappable. Make sure we honor the maximum amount of surface
memory the device accepts. This is done by potentially reading back surface
contents not used by the current command submission and storing it
locally in buffer objects.

Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
Reviewed-by: Jakob Bornecrantz <jakob@vmware.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c   |  14 +
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.c      |  22 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.h      |  26 +
 drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c  |  19 +
 drivers/gpu/drm/vmwgfx/vmwgfx_resource.c | 836 ++++++++++++++++++++++++++++---
 5 files changed, 835 insertions(+), 82 deletions(-)

(limited to 'drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c')

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c b/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c
index 98a5d7e9054..5a72ed90823 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c
@@ -60,6 +60,11 @@ static uint32_t vram_gmr_placement_flags[] = {
 	VMW_PL_FLAG_GMR | TTM_PL_FLAG_CACHED
 };
 
+static uint32_t gmr_vram_placement_flags[] = {
+	VMW_PL_FLAG_GMR | TTM_PL_FLAG_CACHED,
+	TTM_PL_FLAG_VRAM | TTM_PL_FLAG_CACHED
+};
+
 struct ttm_placement vmw_vram_gmr_placement = {
 	.fpfn = 0,
 	.lpfn = 0,
@@ -125,6 +130,15 @@ struct ttm_placement vmw_evictable_placement = {
 	.busy_placement = &sys_placement_flags
 };
 
+struct ttm_placement vmw_srf_placement = {
+	.fpfn = 0,
+	.lpfn = 0,
+	.num_placement = 1,
+	.num_busy_placement = 2,
+	.placement = &gmr_placement_flags,
+	.busy_placement = gmr_vram_placement_flags
+};
+
 struct vmw_ttm_backend {
 	struct ttm_backend backend;
 	struct page **pages;
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index 7b88104144c..a98ee19bd68 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -402,6 +402,8 @@ static int vmw_driver_load(struct drm_device *dev, unsigned long chipset)
 	init_waitqueue_head(&dev_priv->fifo_queue);
 	dev_priv->fence_queue_waiters = 0;
 	atomic_set(&dev_priv->fifo_queue_waiters, 0);
+	INIT_LIST_HEAD(&dev_priv->surface_lru);
+	dev_priv->used_memory_size = 0;
 
 	dev_priv->io_start = pci_resource_start(dev->pdev, 0);
 	dev_priv->vram_start = pci_resource_start(dev->pdev, 1);
@@ -422,6 +424,10 @@ static int vmw_driver_load(struct drm_device *dev, unsigned long chipset)
 
 	dev_priv->capabilities = vmw_read(dev_priv, SVGA_REG_CAPABILITIES);
 
+	dev_priv->vram_size = vmw_read(dev_priv, SVGA_REG_VRAM_SIZE);
+	dev_priv->mmio_size = vmw_read(dev_priv, SVGA_REG_MEM_SIZE);
+	dev_priv->fb_max_width = vmw_read(dev_priv, SVGA_REG_MAX_WIDTH);
+	dev_priv->fb_max_height = vmw_read(dev_priv, SVGA_REG_MAX_HEIGHT);
 	if (dev_priv->capabilities & SVGA_CAP_GMR) {
 		dev_priv->max_gmr_descriptors =
 			vmw_read(dev_priv,
@@ -434,13 +440,15 @@ static int vmw_driver_load(struct drm_device *dev, unsigned long chipset)
 			vmw_read(dev_priv, SVGA_REG_GMRS_MAX_PAGES);
 		dev_priv->memory_size =
 			vmw_read(dev_priv, SVGA_REG_MEMORY_SIZE);
+		dev_priv->memory_size -= dev_priv->vram_size;
+	} else {
+		/*
+		 * An arbitrary limit of 512MiB on surface
+		 * memory. But all HWV8 hardware supports GMR2.
+		 */
+		dev_priv->memory_size = 512*1024*1024;
 	}
 
-	dev_priv->vram_size = vmw_read(dev_priv, SVGA_REG_VRAM_SIZE);
-	dev_priv->mmio_size = vmw_read(dev_priv, SVGA_REG_MEM_SIZE);
-	dev_priv->fb_max_width = vmw_read(dev_priv, SVGA_REG_MAX_WIDTH);
-	dev_priv->fb_max_height = vmw_read(dev_priv, SVGA_REG_MAX_HEIGHT);
-
 	mutex_unlock(&dev_priv->hw_mutex);
 
 	vmw_print_capabilities(dev_priv->capabilities);
@@ -454,8 +462,8 @@ static int vmw_driver_load(struct drm_device *dev, unsigned long chipset)
 	if (dev_priv->capabilities & SVGA_CAP_GMR2) {
 		DRM_INFO("Max number of GMR pages is %u\n",
 			 (unsigned)dev_priv->max_gmr_pages);
-		DRM_INFO("Max dedicated hypervisor graphics memory is %u\n",
-			 (unsigned)dev_priv->memory_size);
+		DRM_INFO("Max dedicated hypervisor surface memory is %u kiB\n",
+			 (unsigned)dev_priv->memory_size / 1024);
 	}
 	DRM_INFO("VRAM at 0x%08x size is %u kiB\n",
 		 dev_priv->vram_start, dev_priv->vram_size / 1024);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
index 75e6d10281a..ee564f0a4fb 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
@@ -79,6 +79,7 @@ struct vmw_resource {
 	int id;
 	enum ttm_object_type res_type;
 	bool avail;
+	void (*remove_from_lists) (struct vmw_resource *res);
 	void (*hw_destroy) (struct vmw_resource *res);
 	void (*res_free) (struct vmw_resource *res);
 	struct list_head validate_head;
@@ -99,9 +100,11 @@ struct vmw_cursor_snooper {
 };
 
 struct vmw_framebuffer;
+struct vmw_surface_offset;
 
 struct vmw_surface {
 	struct vmw_resource res;
+	struct list_head lru_head; /* Protected by the resource lock */
 	uint32_t flags;
 	uint32_t format;
 	uint32_t mip_levels[DRM_VMW_MAX_SURFACE_FACES];
@@ -112,6 +115,9 @@ struct vmw_surface {
 
 	/* TODO so far just a extra pointer */
 	struct vmw_cursor_snooper snooper;
+	struct ttm_buffer_object *backup;
+	struct vmw_surface_offset *offsets;
+	uint32_t backup_size;
 };
 
 struct vmw_marker_queue {
@@ -310,6 +316,16 @@ struct vmw_private {
 	struct ttm_buffer_object *pinned_bo;
 	uint32_t query_cid;
 	bool dummy_query_bo_pinned;
+
+	/*
+	 * Surface swapping. The "surface_lru" list is protected by the
+	 * resource lock in order to be able to destroy a surface and take
+	 * it off the lru atomically. "used_memory_size" is currently
+	 * protected by the cmdbuf mutex for simplicity.
+	 */
+
+	struct list_head surface_lru;
+	uint32_t used_memory_size;
 };
 
 static inline struct vmw_private *vmw_priv(struct drm_device *dev)
@@ -389,6 +405,8 @@ extern int vmw_surface_reference_ioctl(struct drm_device *dev, void *data,
 extern int vmw_surface_check(struct vmw_private *dev_priv,
 			     struct ttm_object_file *tfile,
 			     uint32_t handle, int *id);
+extern int vmw_surface_validate(struct vmw_private *dev_priv,
+				struct vmw_surface *srf);
 extern void vmw_dmabuf_bo_free(struct ttm_buffer_object *bo);
 extern int vmw_dmabuf_init(struct vmw_private *dev_priv,
 			   struct vmw_dma_buffer *vmw_bo,
@@ -412,6 +430,7 @@ extern int vmw_user_stream_lookup(struct vmw_private *dev_priv,
 				  struct ttm_object_file *tfile,
 				  uint32_t *inout_id,
 				  struct vmw_resource **out);
+extern void vmw_resource_unreserve(struct list_head *list);
 
 /**
  * DMA buffer helper routines - vmwgfx_dmabuf.c
@@ -486,6 +505,7 @@ extern struct ttm_placement vmw_vram_gmr_placement;
 extern struct ttm_placement vmw_vram_gmr_ne_placement;
 extern struct ttm_placement vmw_sys_placement;
 extern struct ttm_placement vmw_evictable_placement;
+extern struct ttm_placement vmw_srf_placement;
 extern struct ttm_bo_driver vmw_bo_driver;
 extern int vmw_dma_quiescent(struct drm_device *dev);
 
@@ -508,6 +528,12 @@ extern void
 vmw_execbuf_release_pinned_bo(struct vmw_private *dev_priv,
 			      bool only_on_cid_match, uint32_t cid);
 
+extern int vmw_execbuf_fence_commands(struct drm_file *file_priv,
+				      struct vmw_private *dev_priv,
+				      struct vmw_fence_obj **p_fence,
+				      uint32_t *p_handle);
+
+
 /**
  * IRQs and wating - vmwgfx_irq.c
  */
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
index dfd7fca6b3f..8a22f9d4a61 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
@@ -164,6 +164,14 @@ static int vmw_cmd_sid_check(struct vmw_private *dev_priv,
 		return ret;
 	}
 
+	ret = vmw_surface_validate(dev_priv, srf);
+	if (unlikely(ret != 0)) {
+		if (ret != -ERESTARTSYS)
+			DRM_ERROR("Could not validate surface.\n");
+		vmw_surface_unreference(&srf);
+		return ret;
+	}
+
 	sw_context->last_sid = *sid;
 	sw_context->sid_valid = true;
 	sw_context->sid_translation = srf->res.id;
@@ -257,6 +265,7 @@ static int vmw_cmd_present_check(struct vmw_private *dev_priv,
 		SVGA3dCmdPresent body;
 	} *cmd;
 
+
 	cmd = container_of(header, struct vmw_sid_cmd, header);
 
 	if (unlikely(!sw_context->kernel)) {
@@ -566,6 +575,13 @@ static int vmw_cmd_dma(struct vmw_private *dev_priv,
 		goto out_no_reloc;
 	}
 
+	ret = vmw_surface_validate(dev_priv, srf);
+	if (unlikely(ret != 0)) {
+		if (ret != -ERESTARTSYS)
+			DRM_ERROR("Culd not validate surface.\n");
+		goto out_no_validate;
+	}
+
 	/*
 	 * Patch command stream with device SID.
 	 */
@@ -579,6 +595,8 @@ static int vmw_cmd_dma(struct vmw_private *dev_priv,
 
 	return 0;
 
+out_no_validate:
+	vmw_surface_unreference(&srf);
 out_no_reloc:
 	vmw_dmabuf_unreference(&vmw_bo);
 	return ret;
@@ -882,6 +900,7 @@ static void vmw_clear_validations(struct vmw_sw_context *sw_context)
 	/*
 	 * Drop references to resources held during command submission.
 	 */
+	vmw_resource_unreserve(&sw_context->resource_list);
 	list_for_each_entry_safe(res, res_next, &sw_context->resource_list,
 				 validate_head) {
 		list_del_init(&res->validate_head);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
index e0a41818d9d..93a68a61419 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
@@ -61,6 +61,12 @@ struct vmw_user_stream {
 	struct vmw_stream stream;
 };
 
+struct vmw_surface_offset {
+	uint32_t face;
+	uint32_t mip;
+	uint32_t bo_offset;
+};
+
 static inline struct vmw_dma_buffer *
 vmw_dma_buffer(struct ttm_buffer_object *bo)
 {
@@ -80,13 +86,36 @@ struct vmw_resource *vmw_resource_reference(struct vmw_resource *res)
 	return res;
 }
 
+
+/**
+ * vmw_resource_release_id - release a resource id to the id manager.
+ *
+ * @res: Pointer to the resource.
+ *
+ * Release the resource id to the resource id manager and set it to -1
+ */
+static void vmw_resource_release_id(struct vmw_resource *res)
+{
+	struct vmw_private *dev_priv = res->dev_priv;
+
+	write_lock(&dev_priv->resource_lock);
+	if (res->id != -1)
+		idr_remove(res->idr, res->id);
+	res->id = -1;
+	write_unlock(&dev_priv->resource_lock);
+}
+
 static void vmw_resource_release(struct kref *kref)
 {
 	struct vmw_resource *res =
 	    container_of(kref, struct vmw_resource, kref);
 	struct vmw_private *dev_priv = res->dev_priv;
+	int id = res->id;
+	struct idr *idr = res->idr;
 
-	idr_remove(res->idr, res->id);
+	res->avail = false;
+	if (res->remove_from_lists != NULL)
+		res->remove_from_lists(res);
 	write_unlock(&dev_priv->resource_lock);
 
 	if (likely(res->hw_destroy != NULL))
@@ -98,6 +127,9 @@ static void vmw_resource_release(struct kref *kref)
 		kfree(res);
 
 	write_lock(&dev_priv->resource_lock);
+
+	if (id != -1)
+		idr_remove(idr, id);
 }
 
 void vmw_resource_unreference(struct vmw_resource **p_res)
@@ -111,34 +143,61 @@ void vmw_resource_unreference(struct vmw_resource **p_res)
 	write_unlock(&dev_priv->resource_lock);
 }
 
+
+/**
+ * vmw_resource_alloc_id - release a resource id to the id manager.
+ *
+ * @dev_priv: Pointer to the device private structure.
+ * @res: Pointer to the resource.
+ *
+ * Allocate the lowest free resource from the resource manager, and set
+ * @res->id to that id. Returns 0 on success and -ENOMEM on failure.
+ */
+static int vmw_resource_alloc_id(struct vmw_private *dev_priv,
+				 struct vmw_resource *res)
+{
+	int ret;
+
+	BUG_ON(res->id != -1);
+
+	do {
+		if (unlikely(idr_pre_get(res->idr, GFP_KERNEL) == 0))
+			return -ENOMEM;
+
+		write_lock(&dev_priv->resource_lock);
+		ret = idr_get_new_above(res->idr, res, 1, &res->id);
+		write_unlock(&dev_priv->resource_lock);
+
+	} while (ret == -EAGAIN);
+
+	return ret;
+}
+
+
 static int vmw_resource_init(struct vmw_private *dev_priv,
 			     struct vmw_resource *res,
 			     struct idr *idr,
 			     enum ttm_object_type obj_type,
-			     void (*res_free) (struct vmw_resource *res))
+			     bool delay_id,
+			     void (*res_free) (struct vmw_resource *res),
+			     void (*remove_from_lists)
+			     (struct vmw_resource *res))
 {
-	int ret;
-
 	kref_init(&res->kref);
 	res->hw_destroy = NULL;
 	res->res_free = res_free;
+	res->remove_from_lists = remove_from_lists;
 	res->res_type = obj_type;
 	res->idr = idr;
 	res->avail = false;
 	res->dev_priv = dev_priv;
 	INIT_LIST_HEAD(&res->query_head);
 	INIT_LIST_HEAD(&res->validate_head);
-	do {
-		if (unlikely(idr_pre_get(idr, GFP_KERNEL) == 0))
-			return -ENOMEM;
-
-		write_lock(&dev_priv->resource_lock);
-		ret = idr_get_new_above(idr, res, 1, &res->id);
-		write_unlock(&dev_priv->resource_lock);
-
-	} while (ret == -EAGAIN);
-
-	return ret;
+	res->id = -1;
+	if (delay_id)
+		return 0;
+	else
+		return vmw_resource_alloc_id(dev_priv, res);
 }
 
 /**
@@ -227,14 +286,17 @@ static int vmw_context_init(struct vmw_private *dev_priv,
 	} *cmd;
 
 	ret = vmw_resource_init(dev_priv, res, &dev_priv->context_idr,
-				VMW_RES_CONTEXT, res_free);
+				VMW_RES_CONTEXT, false, res_free, NULL);
 
 	if (unlikely(ret != 0)) {
-		if (res_free == NULL)
-			kfree(res);
-		else
-			res_free(res);
-		return ret;
+		DRM_ERROR("Failed to allocate a resource id.\n");
+		goto out_early;
+	}
+
+	if (unlikely(res->id >= SVGA3D_MAX_CONTEXT_IDS)) {
+		DRM_ERROR("Out of hw context ids.\n");
+		vmw_resource_unreference(&res);
+		return -ENOMEM;
 	}
 
 	cmd = vmw_fifo_reserve(dev_priv, sizeof(*cmd));
@@ -252,6 +314,13 @@ static int vmw_context_init(struct vmw_private *dev_priv,
 	(void) vmw_3d_resource_inc(dev_priv, false);
 	vmw_resource_activate(res, vmw_hw_context_destroy);
 	return 0;
+
+out_early:
+	if (res_free == NULL)
+		kfree(res);
+	else
+		res_free(res);
+	return ret;
 }
 
 struct vmw_resource *vmw_context_alloc(struct vmw_private *dev_priv)
@@ -387,31 +456,285 @@ int vmw_context_check(struct vmw_private *dev_priv,
 	return ret;
 }
 
+struct vmw_bpp {
+	uint8_t bpp;
+	uint8_t s_bpp;
+};
+
+/*
+ * Size table for the supported SVGA3D surface formats. It consists of
+ * two values. The bpp value and the s_bpp value which is short for
+ * "stride bits per pixel" The values are given in such a way that the
+ * minimum stride for the image is calculated using
+ *
+ * min_stride = w*s_bpp
+ *
+ * and the total memory requirement for the image is
+ *
+ * h*min_stride*bpp/s_bpp
+ *
+ */
+static const struct vmw_bpp vmw_sf_bpp[] = {
+	[SVGA3D_FORMAT_INVALID] = {0, 0},
+	[SVGA3D_X8R8G8B8] = {32, 32},
+	[SVGA3D_A8R8G8B8] = {32, 32},
+	[SVGA3D_R5G6B5] = {16, 16},
+	[SVGA3D_X1R5G5B5] = {16, 16},
+	[SVGA3D_A1R5G5B5] = {16, 16},
+	[SVGA3D_A4R4G4B4] = {16, 16},
+	[SVGA3D_Z_D32] = {32, 32},
+	[SVGA3D_Z_D16] = {16, 16},
+	[SVGA3D_Z_D24S8] = {32, 32},
+	[SVGA3D_Z_D15S1] = {16, 16},
+	[SVGA3D_LUMINANCE8] = {8, 8},
+	[SVGA3D_LUMINANCE4_ALPHA4] = {8, 8},
+	[SVGA3D_LUMINANCE16] = {16, 16},
+	[SVGA3D_LUMINANCE8_ALPHA8] = {16, 16},
+	[SVGA3D_DXT1] = {4, 16},
+	[SVGA3D_DXT2] = {8, 32},
+	[SVGA3D_DXT3] = {8, 32},
+	[SVGA3D_DXT4] = {8, 32},
+	[SVGA3D_DXT5] = {8, 32},
+	[SVGA3D_BUMPU8V8] = {16, 16},
+	[SVGA3D_BUMPL6V5U5] = {16, 16},
+	[SVGA3D_BUMPX8L8V8U8] = {32, 32},
+	[SVGA3D_ARGB_S10E5] = {16, 16},
+	[SVGA3D_ARGB_S23E8] = {32, 32},
+	[SVGA3D_A2R10G10B10] = {32, 32},
+	[SVGA3D_V8U8] = {16, 16},
+	[SVGA3D_Q8W8V8U8] = {32, 32},
+	[SVGA3D_CxV8U8] = {16, 16},
+	[SVGA3D_X8L8V8U8] = {32, 32},
+	[SVGA3D_A2W10V10U10] = {32, 32},
+	[SVGA3D_ALPHA8] = {8, 8},
+	[SVGA3D_R_S10E5] = {16, 16},
+	[SVGA3D_R_S23E8] = {32, 32},
+	[SVGA3D_RG_S10E5] = {16, 16},
+	[SVGA3D_RG_S23E8] = {32, 32},
+	[SVGA3D_BUFFER] = {8, 8},
+	[SVGA3D_Z_D24X8] = {32, 32},
+	[SVGA3D_V16U16] = {32, 32},
+	[SVGA3D_G16R16] = {32, 32},
+	[SVGA3D_A16B16G16R16] = {64,  64},
+	[SVGA3D_UYVY] = {12, 12},
+	[SVGA3D_YUY2] = {12, 12},
+	[SVGA3D_NV12] = {12, 8},
+	[SVGA3D_AYUV] = {32, 32},
+	[SVGA3D_BC4_UNORM] = {4,  16},
+	[SVGA3D_BC5_UNORM] = {8,  32},
+	[SVGA3D_Z_DF16] = {16,  16},
+	[SVGA3D_Z_DF24] = {24,  24},
+	[SVGA3D_Z_D24S8_INT] = {32,  32}
+};
+
 
 /**
  * Surface management.
  */
 
+struct vmw_surface_dma {
+	SVGA3dCmdHeader header;
+	SVGA3dCmdSurfaceDMA body;
+	SVGA3dCopyBox cb;
+	SVGA3dCmdSurfaceDMASuffix suffix;
+};
+
+struct vmw_surface_define {
+	SVGA3dCmdHeader header;
+	SVGA3dCmdDefineSurface body;
+};
+
+struct vmw_surface_destroy {
+	SVGA3dCmdHeader header;
+	SVGA3dCmdDestroySurface body;
+};
+
+
+/**
+ * vmw_surface_dma_size - Compute fifo size for a dma command.
+ *
+ * @srf: Pointer to a struct vmw_surface
+ *
+ * Computes the required size for a surface dma command for backup or
+ * restoration of the surface represented by @srf.
+ */
+static inline uint32_t vmw_surface_dma_size(const struct vmw_surface *srf)
+{
+	return srf->num_sizes * sizeof(struct vmw_surface_dma);
+}
+
+
+/**
+ * vmw_surface_define_size - Compute fifo size for a surface define command.
+ *
+ * @srf: Pointer to a struct vmw_surface
+ *
+ * Computes the required size for a surface define command for the definition
+ * of the surface represented by @srf.
+ */
+static inline uint32_t vmw_surface_define_size(const struct vmw_surface *srf)
+{
+	return sizeof(struct vmw_surface_define) + srf->num_sizes *
+		sizeof(SVGA3dSize);
+}
+
+
+/**
+ * vmw_surface_destroy_size - Compute fifo size for a surface destroy command.
+ *
+ * Computes the required size for a surface destroy command for the destruction
+ * of a hw surface.
+ */
+static inline uint32_t vmw_surface_destroy_size(void)
+{
+	return sizeof(struct vmw_surface_destroy);
+}
+
+/**
+ * vmw_surface_destroy_encode - Encode a surface_destroy command.
+ *
+ * @id: The surface id
+ * @cmd_space: Pointer to memory area in which the commands should be encoded.
+ */
+static void vmw_surface_destroy_encode(uint32_t id,
+				       void *cmd_space)
+{
+	struct vmw_surface_destroy *cmd = (struct vmw_surface_destroy *)
+		cmd_space;
+
+	cmd->header.id = SVGA_3D_CMD_SURFACE_DESTROY;
+	cmd->header.size = sizeof(cmd->body);
+	cmd->body.sid = id;
+}
+
+/**
+ * vmw_surface_define_encode - Encode a surface_define command.
+ *
+ * @srf: Pointer to a struct vmw_surface object.
+ * @cmd_space: Pointer to memory area in which the commands should be encoded.
+ */
+static void vmw_surface_define_encode(const struct vmw_surface *srf,
+				      void *cmd_space)
+{
+	struct vmw_surface_define *cmd = (struct vmw_surface_define *)
+		cmd_space;
+	struct drm_vmw_size *src_size;
+	SVGA3dSize *cmd_size;
+	uint32_t cmd_len;
+	int i;
+
+	cmd_len = sizeof(cmd->body) + srf->num_sizes * sizeof(SVGA3dSize);
+
+	cmd->header.id = SVGA_3D_CMD_SURFACE_DEFINE;
+	cmd->header.size = cmd_len;
+	cmd->body.sid = srf->res.id;
+	cmd->body.surfaceFlags = srf->flags;
+	cmd->body.format = cpu_to_le32(srf->format);
+	for (i = 0; i < DRM_VMW_MAX_SURFACE_FACES; ++i)
+		cmd->body.face[i].numMipLevels = srf->mip_levels[i];
+
+	cmd += 1;
+	cmd_size = (SVGA3dSize *) cmd;
+	src_size = srf->sizes;
+
+	for (i = 0; i < srf->num_sizes; ++i, cmd_size++, src_size++) {
+		cmd_size->width = src_size->width;
+		cmd_size->height = src_size->height;
+		cmd_size->depth = src_size->depth;
+	}
+}
+
+
+/**
+ * vmw_surface_dma_encode - Encode a surface_dma command.
+ *
+ * @srf: Pointer to a struct vmw_surface object.
+ * @cmd_space: Pointer to memory area in which the commands should be encoded.
+ * @ptr: Pointer to an SVGAGuestPtr indicating where the surface contents
+ * should be placed or read from.
+ * @to_surface: Boolean whether to DMA to the surface or from the surface.
+ */
+static void vmw_surface_dma_encode(struct vmw_surface *srf,
+				   void *cmd_space,
+				   const SVGAGuestPtr *ptr,
+				   bool to_surface)
+{
+	uint32_t i;
+	uint32_t bpp = vmw_sf_bpp[srf->format].bpp;
+	uint32_t stride_bpp = vmw_sf_bpp[srf->format].s_bpp;
+	struct vmw_surface_dma *cmd = (struct vmw_surface_dma *)cmd_space;
+
+	for (i = 0; i < srf->num_sizes; ++i) {
+		SVGA3dCmdHeader *header = &cmd->header;
+		SVGA3dCmdSurfaceDMA *body = &cmd->body;
+		SVGA3dCopyBox *cb = &cmd->cb;
+		SVGA3dCmdSurfaceDMASuffix *suffix = &cmd->suffix;
+		const struct vmw_surface_offset *cur_offset = &srf->offsets[i];
+		const struct drm_vmw_size *cur_size = &srf->sizes[i];
+
+		header->id = SVGA_3D_CMD_SURFACE_DMA;
+		header->size = sizeof(*body) + sizeof(*cb) + sizeof(*suffix);
+
+		body->guest.ptr = *ptr;
+		body->guest.ptr.offset += cur_offset->bo_offset;
+		body->guest.pitch = (cur_size->width * stride_bpp + 7) >> 3;
+		body->host.sid = srf->res.id;
+		body->host.face = cur_offset->face;
+		body->host.mipmap = cur_offset->mip;
+		body->transfer = ((to_surface) ?  SVGA3D_WRITE_HOST_VRAM :
+				  SVGA3D_READ_HOST_VRAM);
+		cb->x = 0;
+		cb->y = 0;
+		cb->z = 0;
+		cb->srcx = 0;
+		cb->srcy = 0;
+		cb->srcz = 0;
+		cb->w = cur_size->width;
+		cb->h = cur_size->height;
+		cb->d = cur_size->depth;
+
+		suffix->suffixSize = sizeof(*suffix);
+		suffix->maximumOffset = body->guest.pitch*cur_size->height*
+			cur_size->depth*bpp / stride_bpp;
+		suffix->flags.discard = 0;
+		suffix->flags.unsynchronized = 0;
+		suffix->flags.reserved = 0;
+		++cmd;
+	}
+};
+
+
 static void vmw_hw_surface_destroy(struct vmw_resource *res)
 {
 
 	struct vmw_private *dev_priv = res->dev_priv;
-	struct {
-		SVGA3dCmdHeader header;
-		SVGA3dCmdDestroySurface body;
-	} *cmd = vmw_fifo_reserve(dev_priv, sizeof(*cmd));
+	struct vmw_surface *srf;
+	void *cmd;
 
-	if (unlikely(cmd == NULL)) {
-		DRM_ERROR("Failed reserving FIFO space for surface "
-			  "destruction.\n");
-		return;
-	}
+	if (res->id != -1) {
 
-	cmd->header.id = cpu_to_le32(SVGA_3D_CMD_SURFACE_DESTROY);
-	cmd->header.size = cpu_to_le32(sizeof(cmd->body));
-	cmd->body.sid = cpu_to_le32(res->id);
+		cmd = vmw_fifo_reserve(dev_priv, vmw_surface_destroy_size());
+		if (unlikely(cmd == NULL)) {
+			DRM_ERROR("Failed reserving FIFO space for surface "
+				  "destruction.\n");
+			return;
+		}
 
-	vmw_fifo_commit(dev_priv, sizeof(*cmd));
+		vmw_surface_destroy_encode(res->id, cmd);
+		vmw_fifo_commit(dev_priv, vmw_surface_destroy_size());
+
+		/*
+		 * used_memory_size_atomic, or separate lock
+		 * to avoid taking dev_priv::cmdbuf_mutex in
+		 * the destroy path.
+		 */
+
+		mutex_lock(&dev_priv->cmdbuf_mutex);
+		srf = container_of(res, struct vmw_surface, res);
+		dev_priv->used_memory_size -= srf->backup_size;
+		mutex_unlock(&dev_priv->cmdbuf_mutex);
+
+	}
 	vmw_3d_resource_dec(dev_priv, false);
 }
 
@@ -419,70 +742,352 @@ void vmw_surface_res_free(struct vmw_resource *res)
 {
 	struct vmw_surface *srf = container_of(res, struct vmw_surface, res);
 
+	if (srf->backup)
+		ttm_bo_unref(&srf->backup);
+	kfree(srf->offsets);
 	kfree(srf->sizes);
 	kfree(srf->snooper.image);
 	kfree(srf);
 }
 
-int vmw_surface_init(struct vmw_private *dev_priv,
-		     struct vmw_surface *srf,
-		     void (*res_free) (struct vmw_resource *res))
+
+/**
+ * vmw_surface_do_validate - make a surface available to the device.
+ *
+ * @dev_priv: Pointer to a device private struct.
+ * @srf: Pointer to a struct vmw_surface.
+ *
+ * If the surface doesn't have a hw id, allocate one, and optionally
+ * DMA the backed up surface contents to the device.
+ *
+ * Returns -EBUSY if there wasn't sufficient device resources to
+ * complete the validation. Retry after freeing up resources.
+ *
+ * May return other errors if the kernel is out of guest resources.
+ */
+int vmw_surface_do_validate(struct vmw_private *dev_priv,
+			    struct vmw_surface *srf)
 {
-	int ret;
-	struct {
-		SVGA3dCmdHeader header;
-		SVGA3dCmdDefineSurface body;
-	} *cmd;
-	SVGA3dSize *cmd_size;
 	struct vmw_resource *res = &srf->res;
-	struct drm_vmw_size *src_size;
-	size_t submit_size;
-	uint32_t cmd_len;
-	int i;
+	struct list_head val_list;
+	struct ttm_validate_buffer val_buf;
+	uint32_t submit_size;
+	uint8_t *cmd;
+	int ret;
 
-	BUG_ON(res_free == NULL);
-	ret = vmw_resource_init(dev_priv, res, &dev_priv->surface_idr,
-				VMW_RES_SURFACE, res_free);
+	if (likely(res->id != -1))
+		return 0;
+
+	if (unlikely(dev_priv->used_memory_size + srf->backup_size >=
+		     dev_priv->memory_size))
+		return -EBUSY;
+
+	/*
+	 * Reserve- and validate the backup DMA bo.
+	 */
+
+	if (srf->backup) {
+		INIT_LIST_HEAD(&val_list);
+		val_buf.bo = ttm_bo_reference(srf->backup);
+		val_buf.new_sync_obj_arg = (void *)((unsigned long)
+						    DRM_VMW_FENCE_FLAG_EXEC);
+		list_add_tail(&val_buf.head, &val_list);
+		ret = ttm_eu_reserve_buffers(&val_list);
+		if (unlikely(ret != 0))
+			goto out_no_reserve;
+
+		ret = ttm_bo_validate(srf->backup, &vmw_srf_placement,
+				      true, false, false);
+		if (unlikely(ret != 0))
+			goto out_no_validate;
+	}
+
+	/*
+	 * Alloc id for the resource.
+	 */
 
+	ret = vmw_resource_alloc_id(dev_priv, res);
 	if (unlikely(ret != 0)) {
-		res_free(res);
-		return ret;
+		DRM_ERROR("Failed to allocate a surface id.\n");
+		goto out_no_id;
+	}
+	if (unlikely(res->id >= SVGA3D_MAX_SURFACE_IDS)) {
+		ret = -EBUSY;
+		goto out_no_fifo;
 	}
 
-	submit_size = sizeof(*cmd) + srf->num_sizes * sizeof(SVGA3dSize);
-	cmd_len = sizeof(cmd->body) + srf->num_sizes * sizeof(SVGA3dSize);
+
+	/*
+	 * Encode surface define- and dma commands.
+	 */
+
+	submit_size = vmw_surface_define_size(srf);
+	if (srf->backup)
+		submit_size += vmw_surface_dma_size(srf);
 
 	cmd = vmw_fifo_reserve(dev_priv, submit_size);
 	if (unlikely(cmd == NULL)) {
-		DRM_ERROR("Fifo reserve failed for create surface.\n");
-		vmw_resource_unreference(&res);
-		return -ENOMEM;
+		DRM_ERROR("Failed reserving FIFO space for surface "
+			  "validation.\n");
+		ret = -ENOMEM;
+		goto out_no_fifo;
 	}
 
-	cmd->header.id = cpu_to_le32(SVGA_3D_CMD_SURFACE_DEFINE);
-	cmd->header.size = cpu_to_le32(cmd_len);
-	cmd->body.sid = cpu_to_le32(res->id);
-	cmd->body.surfaceFlags = cpu_to_le32(srf->flags);
-	cmd->body.format = cpu_to_le32(srf->format);
-	for (i = 0; i < DRM_VMW_MAX_SURFACE_FACES; ++i) {
-		cmd->body.face[i].numMipLevels =
-		    cpu_to_le32(srf->mip_levels[i]);
+	vmw_surface_define_encode(srf, cmd);
+	if (srf->backup) {
+		SVGAGuestPtr ptr;
+
+		cmd += vmw_surface_define_size(srf);
+		vmw_bo_get_guest_ptr(srf->backup, &ptr);
+		vmw_surface_dma_encode(srf, cmd, &ptr, true);
 	}
 
-	cmd += 1;
-	cmd_size = (SVGA3dSize *) cmd;
-	src_size = srf->sizes;
+	vmw_fifo_commit(dev_priv, submit_size);
 
-	for (i = 0; i < srf->num_sizes; ++i, cmd_size++, src_size++) {
-		cmd_size->width = cpu_to_le32(src_size->width);
-		cmd_size->height = cpu_to_le32(src_size->height);
-		cmd_size->depth = cpu_to_le32(src_size->depth);
+	/*
+	 * Create a fence object and fence the backup buffer.
+	 */
+
+	if (srf->backup) {
+		struct vmw_fence_obj *fence;
+
+		(void) vmw_execbuf_fence_commands(NULL, dev_priv,
+						  &fence, NULL);
+		ttm_eu_fence_buffer_objects(&val_list, fence);
+		if (likely(fence != NULL))
+			vmw_fence_obj_unreference(&fence);
+		ttm_bo_unref(&val_buf.bo);
+		ttm_bo_unref(&srf->backup);
 	}
 
+	/*
+	 * Surface memory usage accounting.
+	 */
+
+	dev_priv->used_memory_size += srf->backup_size;
+
+	return 0;
+
+out_no_fifo:
+	vmw_resource_release_id(res);
+out_no_id:
+out_no_validate:
+	if (srf->backup)
+		ttm_eu_backoff_reservation(&val_list);
+out_no_reserve:
+	if (srf->backup)
+		ttm_bo_unref(&val_buf.bo);
+	return ret;
+}
+
+/**
+ * vmw_surface_evict - Evict a hw surface.
+ *
+ * @dev_priv: Pointer to a device private struct.
+ * @srf: Pointer to a struct vmw_surface
+ *
+ * DMA the contents of a hw surface to a backup guest buffer object,
+ * and destroy the hw surface, releasing its id.
+ */
+int vmw_surface_evict(struct vmw_private *dev_priv,
+		      struct vmw_surface *srf)
+{
+	struct vmw_resource *res = &srf->res;
+	struct list_head val_list;
+	struct ttm_validate_buffer val_buf;
+	uint32_t submit_size;
+	uint8_t *cmd;
+	int ret;
+	struct vmw_fence_obj *fence;
+	SVGAGuestPtr ptr;
+
+	BUG_ON(res->id == -1);
+
+	/*
+	 * Create a surface backup buffer object.
+	 */
+
+	if (!srf->backup) {
+		ret = ttm_bo_create(&dev_priv->bdev, srf->backup_size,
+				    ttm_bo_type_device,
+				    &vmw_srf_placement, 0, 0, true,
+				    NULL, &srf->backup);
+		if (unlikely(ret != 0))
+			return ret;
+	}
+
+	/*
+	 * Reserve- and validate the backup DMA bo.
+	 */
+
+	INIT_LIST_HEAD(&val_list);
+	val_buf.bo = ttm_bo_reference(srf->backup);
+	val_buf.new_sync_obj_arg = (void *)(unsigned long)
+		DRM_VMW_FENCE_FLAG_EXEC;
+	list_add_tail(&val_buf.head, &val_list);
+	ret = ttm_eu_reserve_buffers(&val_list);
+	if (unlikely(ret != 0))
+		goto out_no_reserve;
+
+	ret = ttm_bo_validate(srf->backup, &vmw_srf_placement,
+			      true, false, false);
+	if (unlikely(ret != 0))
+		goto out_no_validate;
+
+
+	/*
+	 * Encode the dma- and surface destroy commands.
+	 */
+
+	submit_size = vmw_surface_dma_size(srf) + vmw_surface_destroy_size();
+	cmd = vmw_fifo_reserve(dev_priv, submit_size);
+	if (unlikely(cmd == NULL)) {
+		DRM_ERROR("Failed reserving FIFO space for surface "
+			  "eviction.\n");
+		ret = -ENOMEM;
+		goto out_no_fifo;
+	}
+
+	vmw_bo_get_guest_ptr(srf->backup, &ptr);
+	vmw_surface_dma_encode(srf, cmd, &ptr, false);
+	cmd += vmw_surface_dma_size(srf);
+	vmw_surface_destroy_encode(res->id, cmd);
 	vmw_fifo_commit(dev_priv, submit_size);
+
+	/*
+	 * Surface memory usage accounting.
+	 */
+
+	dev_priv->used_memory_size -= srf->backup_size;
+
+	/*
+	 * Create a fence object and fence the DMA buffer.
+	 */
+
+	(void) vmw_execbuf_fence_commands(NULL, dev_priv,
+					  &fence, NULL);
+	ttm_eu_fence_buffer_objects(&val_list, fence);
+	if (likely(fence != NULL))
+		vmw_fence_obj_unreference(&fence);
+	ttm_bo_unref(&val_buf.bo);
+
+	/*
+	 * Release the surface ID.
+	 */
+
+	vmw_resource_release_id(res);
+
+	return 0;
+
+out_no_fifo:
+out_no_validate:
+	if (srf->backup)
+		ttm_eu_backoff_reservation(&val_list);
+out_no_reserve:
+	ttm_bo_unref(&val_buf.bo);
+	ttm_bo_unref(&srf->backup);
+	return ret;
+}
+
+
+/**
+ * vmw_surface_validate - make a surface available to the device, evicting
+ * other surfaces if needed.
+ *
+ * @dev_priv: Pointer to a device private struct.
+ * @srf: Pointer to a struct vmw_surface.
+ *
+ * Try to validate a surface and if it fails due to limited device resources,
+ * repeatedly try to evict other surfaces until the request can be
+ * acommodated.
+ *
+ * May return errors if out of resources.
+ */
+int vmw_surface_validate(struct vmw_private *dev_priv,
+			 struct vmw_surface *srf)
+{
+	int ret;
+	struct vmw_surface *evict_srf;
+
+	do {
+		write_lock(&dev_priv->resource_lock);
+		list_del_init(&srf->lru_head);
+		write_unlock(&dev_priv->resource_lock);
+
+		ret = vmw_surface_do_validate(dev_priv, srf);
+		if (likely(ret != -EBUSY))
+			break;
+
+		write_lock(&dev_priv->resource_lock);
+		if (list_empty(&dev_priv->surface_lru)) {
+			DRM_ERROR("Out of device memory for surfaces.\n");
+			ret = -EBUSY;
+			write_unlock(&dev_priv->resource_lock);
+			break;
+		}
+
+		evict_srf = vmw_surface_reference
+			(list_first_entry(&dev_priv->surface_lru,
+					  struct vmw_surface,
+					  lru_head));
+		list_del_init(&evict_srf->lru_head);
+
+		write_unlock(&dev_priv->resource_lock);
+		(void) vmw_surface_evict(dev_priv, evict_srf);
+
+		vmw_surface_unreference(&evict_srf);
+
+	} while (1);
+
+	if (unlikely(ret != 0 && srf->res.id != -1)) {
+		write_lock(&dev_priv->resource_lock);
+		list_add_tail(&srf->lru_head, &dev_priv->surface_lru);
+		write_unlock(&dev_priv->resource_lock);
+	}
+
+	return ret;
+}
+
+
+/**
+ * vmw_surface_remove_from_lists - Remove surface resources from lookup lists
+ *
+ * @res: Pointer to a struct vmw_resource embedded in a struct vmw_surface
+ *
+ * As part of the resource destruction, remove the surface from any
+ * lookup lists.
+ */
+static void vmw_surface_remove_from_lists(struct vmw_resource *res)
+{
+	struct vmw_surface *srf = container_of(res, struct vmw_surface, res);
+
+	list_del_init(&srf->lru_head);
+}
+
+int vmw_surface_init(struct vmw_private *dev_priv,
+		     struct vmw_surface *srf,
+		     void (*res_free) (struct vmw_resource *res))
+{
+	int ret;
+	struct vmw_resource *res = &srf->res;
+
+	BUG_ON(res_free == NULL);
+	INIT_LIST_HEAD(&srf->lru_head);
+	ret = vmw_resource_init(dev_priv, res, &dev_priv->surface_idr,
+				VMW_RES_SURFACE, true, res_free,
+				vmw_surface_remove_from_lists);
+
+	if (unlikely(ret != 0))
+		res_free(res);
+
+	/*
+	 * The surface won't be visible to hardware until a
+	 * surface validate.
+	 */
+
 	(void) vmw_3d_resource_inc(dev_priv, false);
 	vmw_resource_activate(res, vmw_hw_surface_destroy);
-	return 0;
+	return ret;
 }
 
 static void vmw_user_surface_free(struct vmw_resource *res)
@@ -491,11 +1096,54 @@ static void vmw_user_surface_free(struct vmw_resource *res)
 	struct vmw_user_surface *user_srf =
 	    container_of(srf, struct vmw_user_surface, srf);
 
+	if (srf->backup)
+		ttm_bo_unref(&srf->backup);
+	kfree(srf->offsets);
 	kfree(srf->sizes);
 	kfree(srf->snooper.image);
 	kfree(user_srf);
 }
 
+/**
+ * vmw_resource_unreserve - unreserve resources previously reserved for
+ * command submission.
+ *
+ * @list_head: list of resources to unreserve.
+ *
+ * Currently only surfaces are considered, and unreserving a surface
+ * means putting it back on the device's surface lru list,
+ * so that it can be evicted if necessary.
+ * This function traverses the resource list and
+ * checks whether resources are surfaces, and in that case puts them back
+ * on the device's surface LRU list.
+ */
+void vmw_resource_unreserve(struct list_head *list)
+{
+	struct vmw_resource *res;
+	struct vmw_surface *srf;
+	rwlock_t *lock = NULL;
+
+	list_for_each_entry(res, list, validate_head) {
+
+		if (res->res_free != &vmw_surface_res_free &&
+		    res->res_free != &vmw_user_surface_free)
+			continue;
+
+		if (unlikely(lock == NULL)) {
+			lock = &res->dev_priv->resource_lock;
+			write_lock(lock);
+		}
+
+		srf = container_of(res, struct vmw_surface, res);
+		list_del_init(&srf->lru_head);
+		list_add_tail(&srf->lru_head, &res->dev_priv->surface_lru);
+	}
+
+	if (lock != NULL)
+		write_unlock(lock);
+}
+
+
 int vmw_user_surface_lookup_handle(struct vmw_private *dev_priv,
 				   struct ttm_object_file *tfile,
 				   uint32_t handle, struct vmw_surface **out)
@@ -572,7 +1220,12 @@ int vmw_surface_define_ioctl(struct drm_device *dev, void *data,
 	struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile;
 	struct drm_vmw_size __user *user_sizes;
 	int ret;
-	int i;
+	int i, j;
+	uint32_t cur_bo_offset;
+	struct drm_vmw_size *cur_size;
+	struct vmw_surface_offset *cur_offset;
+	uint32_t stride_bpp;
+	uint32_t bpp;
 
 	if (unlikely(user_srf == NULL))
 		return -ENOMEM;
@@ -583,6 +1236,8 @@ int vmw_surface_define_ioctl(struct drm_device *dev, void *data,
 	srf->flags = req->flags;
 	srf->format = req->format;
 	srf->scanout = req->scanout;
+	srf->backup = NULL;
+
 	memcpy(srf->mip_levels, req->mip_levels, sizeof(srf->mip_levels));
 	srf->num_sizes = 0;
 	for (i = 0; i < DRM_VMW_MAX_SURFACE_FACES; ++i)
@@ -599,6 +1254,12 @@ int vmw_surface_define_ioctl(struct drm_device *dev, void *data,
 		ret = -ENOMEM;
 		goto out_err0;
 	}
+	srf->offsets = kmalloc(srf->num_sizes * sizeof(*srf->offsets),
+			       GFP_KERNEL);
+	if (unlikely(srf->sizes == NULL)) {
+		ret = -ENOMEM;
+		goto out_no_offsets;
+	}
 
 	user_sizes = (struct drm_vmw_size __user *)(unsigned long)
 	    req->size_addr;
@@ -610,6 +1271,29 @@ int vmw_surface_define_ioctl(struct drm_device *dev, void *data,
 		goto out_err1;
 	}
 
+	cur_bo_offset = 0;
+	cur_offset = srf->offsets;
+	cur_size = srf->sizes;
+
+	bpp = vmw_sf_bpp[srf->format].bpp;
+	stride_bpp = vmw_sf_bpp[srf->format].s_bpp;
+
+	for (i = 0; i < DRM_VMW_MAX_SURFACE_FACES; ++i) {
+		for (j = 0; j < srf->mip_levels[i]; ++j) {
+			uint32_t stride =
+				(cur_size->width * stride_bpp + 7) >> 3;
+
+			cur_offset->face = i;
+			cur_offset->mip = j;
+			cur_offset->bo_offset = cur_bo_offset;
+			cur_bo_offset += stride * cur_size->height *
+				cur_size->depth * bpp / stride_bpp;
+			++cur_offset;
+			++cur_size;
+		}
+	}
+	srf->backup_size = cur_bo_offset;
+
 	if (srf->scanout &&
 	    srf->num_sizes == 1 &&
 	    srf->sizes[0].width == 64 &&
@@ -658,6 +1342,8 @@ int vmw_surface_define_ioctl(struct drm_device *dev, void *data,
 	vmw_resource_unreference(&res);
 	return 0;
 out_err1:
+	kfree(srf->offsets);
+out_no_offsets:
 	kfree(srf->sizes);
 out_err0:
 	kfree(user_srf);
@@ -974,7 +1660,7 @@ static int vmw_stream_init(struct vmw_private *dev_priv,
 	int ret;
 
 	ret = vmw_resource_init(dev_priv, res, &dev_priv->stream_idr,
-				VMW_RES_STREAM, res_free);
+				VMW_RES_STREAM, false, res_free, NULL);
 
 	if (unlikely(ret != 0)) {
 		if (res_free == NULL)
-- 
cgit v1.2.3-70-g09d2