11 files changed, 333 insertions, 134 deletions
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c b/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c
index d6f2d2b882e..825ebe3d89d 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c
@@ -48,6 +48,15 @@ struct ttm_placement vmw_vram_placement = {
 	.busy_placement = &vram_placement_flags
 };
 
+struct ttm_placement vmw_vram_sys_placement = {
+	.fpfn = 0,
+	.lpfn = 0,
+	.num_placement = 1,
+	.placement = &vram_placement_flags,
+	.num_busy_placement = 1,
+	.busy_placement = &sys_placement_flags
+};
+
 struct ttm_placement vmw_vram_ne_placement = {
 	.fpfn = 0,
 	.lpfn = 0,
@@ -172,6 +181,18 @@ static int vmw_verify_access(struct ttm_buffer_object *bo, struct file *filp)
 	return 0;
 }
 
+static void vmw_move_notify(struct ttm_buffer_object *bo,
+		     struct ttm_mem_reg *new_mem)
+{
+	if (new_mem->mem_type != TTM_PL_SYSTEM)
+		vmw_dmabuf_gmr_unbind(bo);
+}
+
+static void vmw_swap_notify(struct ttm_buffer_object *bo)
+{
+	vmw_dmabuf_gmr_unbind(bo);
+}
+
 /**
  * FIXME: We're using the old vmware polling method to sync.
  * Do this with fences instead.
@@ -225,5 +246,7 @@ struct ttm_bo_driver vmw_bo_driver = {
 	.sync_obj_wait = vmw_sync_obj_wait,
 	.sync_obj_flush = vmw_sync_obj_flush,
 	.sync_obj_unref = vmw_sync_obj_unref,
-	.sync_obj_ref = vmw_sync_obj_ref
+	.sync_obj_ref = vmw_sync_obj_ref,
+	.move_notify = vmw_move_notify,
+	.swap_notify = vmw_swap_notify
 };
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index 1db1ef30be2..0c9c0811f42 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -147,6 +147,8 @@ static char *vmw_devname = "vmwgfx";
 
 static int vmw_probe(struct pci_dev *, const struct pci_device_id *);
 static void vmw_master_init(struct vmw_master *);
+static int vmwgfx_pm_notifier(struct notifier_block *nb, unsigned long val,
+			      void *ptr);
 
 static void vmw_print_capabilities(uint32_t capabilities)
 {
@@ -207,6 +209,7 @@ static int vmw_driver_load(struct drm_device *dev, unsigned long chipset)
 {
 	struct vmw_private *dev_priv;
 	int ret;
+	uint32_t svga_id;
 
 	dev_priv = kzalloc(sizeof(*dev_priv), GFP_KERNEL);
 	if (unlikely(dev_priv == NULL)) {
@@ -217,6 +220,7 @@ static int vmw_driver_load(struct drm_device *dev, unsigned long chipset)
 
 	dev_priv->dev = dev;
 	dev_priv->vmw_chipset = chipset;
+	dev_priv->last_read_sequence = (uint32_t) -100;
 	mutex_init(&dev_priv->hw_mutex);
 	mutex_init(&dev_priv->cmdbuf_mutex);
 	rwlock_init(&dev_priv->resource_lock);
@@ -236,6 +240,16 @@ static int vmw_driver_load(struct drm_device *dev, unsigned long chipset)
 	dev_priv->mmio_start = pci_resource_start(dev->pdev, 2);
 
 	mutex_lock(&dev_priv->hw_mutex);
+
+	vmw_write(dev_priv, SVGA_REG_ID, SVGA_ID_2);
+	svga_id = vmw_read(dev_priv, SVGA_REG_ID);
+	if (svga_id != SVGA_ID_2) {
+		ret = -ENOSYS;
+		DRM_ERROR("Unsuported SVGA ID 0x%x\n", svga_id);
+		mutex_unlock(&dev_priv->hw_mutex);
+		goto out_err0;
+	}
+
 	dev_priv->capabilities = vmw_read(dev_priv, SVGA_REG_CAPABILITIES);
 
 	if (dev_priv->capabilities & SVGA_CAP_GMR) {
@@ -334,22 +348,24 @@ static int vmw_driver_load(struct drm_device *dev, unsigned long chipset)
 		 */
 
 		DRM_INFO("It appears like vesafb is loaded. "
-			 "Ignore above error if any. Entering stealth mode.\n");
+			 "Ignore above error if any.\n");
 		ret = pci_request_region(dev->pdev, 2, "vmwgfx stealth probe");
 		if (unlikely(ret != 0)) {
 			DRM_ERROR("Failed reserving the SVGA MMIO resource.\n");
 			goto out_no_device;
 		}
-		vmw_kms_init(dev_priv);
-		vmw_overlay_init(dev_priv);
-	} else {
-		ret = vmw_request_device(dev_priv);
-		if (unlikely(ret != 0))
-			goto out_no_device;
-		vmw_kms_init(dev_priv);
-		vmw_overlay_init(dev_priv);
-		vmw_fb_init(dev_priv);
 	}
+	ret = vmw_request_device(dev_priv);
+	if (unlikely(ret != 0))
+		goto out_no_device;
+	vmw_kms_init(dev_priv);
+	vmw_overlay_init(dev_priv);
+	vmw_fb_init(dev_priv);
+
+	dev_priv->pm_nb.notifier_call = vmwgfx_pm_notifier;
+	register_pm_notifier(&dev_priv->pm_nb);
+
+	DRM_INFO("%s", vmw_fifo_have_3d(dev_priv) ? "Have 3D\n" : "No 3D\n");
 
 	return 0;
 
@@ -385,17 +401,17 @@ static int vmw_driver_unload(struct drm_device *dev)
 
 	DRM_INFO(VMWGFX_DRIVER_NAME " unload.\n");
 
-	if (!dev_priv->stealth) {
-		vmw_fb_close(dev_priv);
-		vmw_kms_close(dev_priv);
-		vmw_overlay_close(dev_priv);
-		vmw_release_device(dev_priv);
-		pci_release_regions(dev->pdev);
-	} else {
-		vmw_kms_close(dev_priv);
-		vmw_overlay_close(dev_priv);
+	unregister_pm_notifier(&dev_priv->pm_nb);
+
+	vmw_fb_close(dev_priv);
+	vmw_kms_close(dev_priv);
+	vmw_overlay_close(dev_priv);
+	vmw_release_device(dev_priv);
+	if (dev_priv->stealth)
 		pci_release_region(dev->pdev, 2);
-	}
+	else
+		pci_release_regions(dev->pdev);
+
 	if (dev_priv->capabilities & SVGA_CAP_IRQMASK)
 		drm_irq_uninstall(dev_priv->dev);
 	if (dev->devname == vmw_devname)
@@ -564,11 +580,6 @@ static int vmw_master_set(struct drm_device *dev,
 	int ret = 0;
 
 	DRM_INFO("Master set.\n");
-	if (dev_priv->stealth) {
-		ret = vmw_request_device(dev_priv);
-		if (unlikely(ret != 0))
-			return ret;
-	}
 
 	if (active) {
 		BUG_ON(active != &dev_priv->fbdev_master);
@@ -628,18 +639,11 @@ static void vmw_master_drop(struct drm_device *dev,
 
 	ttm_lock_set_kill(&vmaster->lock, true, SIGTERM);
 
-	if (dev_priv->stealth) {
-		ret = ttm_bo_evict_mm(&dev_priv->bdev, TTM_PL_VRAM);
-		if (unlikely(ret != 0))
-			DRM_ERROR("Unable to clean VRAM on master drop.\n");
-		vmw_release_device(dev_priv);
-	}
 	dev_priv->active_master = &dev_priv->fbdev_master;
 	ttm_lock_set_kill(&dev_priv->fbdev_master.lock, false, SIGTERM);
 	ttm_vt_unlock(&dev_priv->fbdev_master.lock);
 
-	if (!dev_priv->stealth)
-		vmw_fb_on(dev_priv);
+	vmw_fb_on(dev_priv);
 }
 
 
@@ -650,6 +654,57 @@ static void vmw_remove(struct pci_dev *pdev)
 	drm_put_dev(dev);
 }
 
+static int vmwgfx_pm_notifier(struct notifier_block *nb, unsigned long val,
+			      void *ptr)
+{
+	struct vmw_private *dev_priv =
+		container_of(nb, struct vmw_private, pm_nb);
+	struct vmw_master *vmaster = dev_priv->active_master;
+
+	switch (val) {
+	case PM_HIBERNATION_PREPARE:
+	case PM_SUSPEND_PREPARE:
+		ttm_suspend_lock(&vmaster->lock);
+
+		/**
+		 * This empties VRAM and unbinds all GMR bindings.
+		 * Buffer contents is moved to swappable memory.
+		 */
+		ttm_bo_swapout_all(&dev_priv->bdev);
+		break;
+	case PM_POST_HIBERNATION:
+	case PM_POST_SUSPEND:
+		ttm_suspend_unlock(&vmaster->lock);
+		break;
+	case PM_RESTORE_PREPARE:
+		break;
+	case PM_POST_RESTORE:
+		break;
+	default:
+		break;
+	}
+	return 0;
+}
+
+/**
+ * These might not be needed with the virtual SVGA device.
+ */
+
+int vmw_pci_suspend(struct pci_dev *pdev, pm_message_t state)
+{
+	pci_save_state(pdev);
+	pci_disable_device(pdev);
+	pci_set_power_state(pdev, PCI_D3hot);
+	return 0;
+}
+
+int vmw_pci_resume(struct pci_dev *pdev)
+{
+	pci_set_power_state(pdev, PCI_D0);
+	pci_restore_state(pdev);
+	return pci_enable_device(pdev);
+}
+
 static struct drm_driver driver = {
 	.driver_features = DRIVER_HAVE_IRQ | DRIVER_IRQ_SHARED |
 	DRIVER_MODESET,
@@ -689,7 +744,9 @@ static struct drm_driver driver = {
 		       .name = VMWGFX_DRIVER_NAME,
 		       .id_table = vmw_pci_id_list,
 		       .probe = vmw_probe,
-		       .remove = vmw_remove
+		       .remove = vmw_remove,
+		       .suspend = vmw_pci_suspend,
+		       .resume = vmw_pci_resume
 		       },
 	.name = VMWGFX_DRIVER_NAME,
 	.desc = VMWGFX_DRIVER_DESC,
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
index e61bd85b697..356dc935ec1 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
@@ -32,16 +32,17 @@
 #include "drmP.h"
 #include "vmwgfx_drm.h"
 #include "drm_hashtab.h"
+#include "linux/suspend.h"
 #include "ttm/ttm_bo_driver.h"
 #include "ttm/ttm_object.h"
 #include "ttm/ttm_lock.h"
 #include "ttm/ttm_execbuf_util.h"
 #include "ttm/ttm_module.h"
 
-#define VMWGFX_DRIVER_DATE "20090724"
-#define VMWGFX_DRIVER_MAJOR 0
-#define VMWGFX_DRIVER_MINOR 1
-#define VMWGFX_DRIVER_PATCHLEVEL 2
+#define VMWGFX_DRIVER_DATE "20100209"
+#define VMWGFX_DRIVER_MAJOR 1
+#define VMWGFX_DRIVER_MINOR 0
+#define VMWGFX_DRIVER_PATCHLEVEL 0
 #define VMWGFX_FILE_PAGE_OFFSET 0x00100000
 #define VMWGFX_FIFO_STATIC_SIZE (1024*1024)
 #define VMWGFX_MAX_RELOCATIONS 2048
@@ -95,6 +96,8 @@ struct vmw_surface {
 	struct drm_vmw_size *sizes;
 	uint32_t num_sizes;
 
+	bool scanout;
+
 	/* TODO so far just a extra pointer */
 	struct vmw_cursor_snooper snooper;
 };
@@ -110,6 +113,7 @@ struct vmw_fifo_state {
 	unsigned long static_buffer_size;
 	bool using_bounce_buffer;
 	uint32_t capabilities;
+	struct mutex fifo_mutex;
 	struct rw_semaphore rwsem;
 };
 
@@ -210,7 +214,7 @@ struct vmw_private {
 	 * Fencing and IRQs.
 	 */
 
-	uint32_t fence_seq;
+	atomic_t fence_seq;
 	wait_queue_head_t fence_queue;
 	wait_queue_head_t fifo_queue;
 	atomic_t fence_queue_waiters;
@@ -258,6 +262,7 @@ struct vmw_private {
 
 	struct vmw_master *active_master;
 	struct vmw_master fbdev_master;
+	struct notifier_block pm_nb;
 };
 
 static inline struct vmw_private *vmw_priv(struct drm_device *dev)
@@ -353,6 +358,7 @@ extern int vmw_dmabuf_to_start_of_vram(struct vmw_private *vmw_priv,
 				       struct vmw_dma_buffer *bo);
 extern int vmw_dmabuf_from_vram(struct vmw_private *vmw_priv,
 				struct vmw_dma_buffer *bo);
+extern void vmw_dmabuf_gmr_unbind(struct ttm_buffer_object *bo);
 extern int vmw_stream_claim_ioctl(struct drm_device *dev, void *data,
 				  struct drm_file *file_priv);
 extern int vmw_stream_unref_ioctl(struct drm_device *dev, void *data,
@@ -386,6 +392,7 @@ extern int vmw_fifo_send_fence(struct vmw_private *dev_priv,
 			       uint32_t *sequence);
 extern void vmw_fifo_ping_host(struct vmw_private *dev_priv, uint32_t reason);
 extern int vmw_fifo_mmap(struct file *filp, struct vm_area_struct *vma);
+extern bool vmw_fifo_have_3d(struct vmw_private *dev_priv);
 
 /**
  * TTM glue - vmwgfx_ttm_glue.c
@@ -401,6 +408,7 @@ extern int vmw_mmap(struct file *filp, struct vm_area_struct *vma);
 
 extern struct ttm_placement vmw_vram_placement;
 extern struct ttm_placement vmw_vram_ne_placement;
+extern struct ttm_placement vmw_vram_sys_placement;
 extern struct ttm_placement vmw_sys_placement;
 extern struct ttm_bo_driver vmw_bo_driver;
 extern int vmw_dma_quiescent(struct drm_device *dev);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
index 2e92da56740..0897359b3e4 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
@@ -182,25 +182,19 @@ static int vmw_cmd_present_check(struct vmw_private *dev_priv,
 	return vmw_cmd_sid_check(dev_priv, sw_context, &cmd->body.sid);
 }
 
-static int vmw_cmd_dma(struct vmw_private *dev_priv,
-		       struct vmw_sw_context *sw_context,
-		       SVGA3dCmdHeader *header)
+static int vmw_translate_guest_ptr(struct vmw_private *dev_priv,
+				   struct vmw_sw_context *sw_context,
+				   SVGAGuestPtr *ptr,
+				   struct vmw_dma_buffer **vmw_bo_p)
 {
-	uint32_t handle;
 	struct vmw_dma_buffer *vmw_bo = NULL;
 	struct ttm_buffer_object *bo;
-	struct vmw_surface *srf = NULL;
-	struct vmw_dma_cmd {
-		SVGA3dCmdHeader header;
-		SVGA3dCmdSurfaceDMA dma;
-	} *cmd;
+	uint32_t handle = ptr->gmrId;
 	struct vmw_relocation *reloc;
-	int ret;
 	uint32_t cur_validate_node;
 	struct ttm_validate_buffer *val_buf;
+	int ret;
 
-	cmd = container_of(header, struct vmw_dma_cmd, header);
-	handle = cmd->dma.guest.ptr.gmrId;
 	ret = vmw_user_dmabuf_lookup(sw_context->tfile, handle, &vmw_bo);
 	if (unlikely(ret != 0)) {
 		DRM_ERROR("Could not find or use GMR region.\n");
@@ -209,14 +203,14 @@ static int vmw_cmd_dma(struct vmw_private *dev_priv,
 	bo = &vmw_bo->base;
 
 	if (unlikely(sw_context->cur_reloc >= VMWGFX_MAX_RELOCATIONS)) {
-		DRM_ERROR("Max number of DMA commands per submission"
+		DRM_ERROR("Max number relocations per submission"
 			  " exceeded\n");
 		ret = -EINVAL;
 		goto out_no_reloc;
 	}
 
 	reloc = &sw_context->relocs[sw_context->cur_reloc++];
-	reloc->location = &cmd->dma.guest.ptr;
+	reloc->location = ptr;
 
 	cur_validate_node = vmw_dmabuf_validate_node(bo, sw_context->cur_val_buf);
 	if (unlikely(cur_validate_node >= VMWGFX_MAX_GMRS)) {
@@ -234,7 +228,89 @@ static int vmw_cmd_dma(struct vmw_private *dev_priv,
 		list_add_tail(&val_buf->head, &sw_context->validate_nodes);
 		++sw_context->cur_val_buf;
 	}
+	*vmw_bo_p = vmw_bo;
+	return 0;
+
+out_no_reloc:
+	vmw_dmabuf_unreference(&vmw_bo);
+	vmw_bo_p = NULL;
+	return ret;
+}
+
+static int vmw_cmd_end_query(struct vmw_private *dev_priv,
+			     struct vmw_sw_context *sw_context,
+			     SVGA3dCmdHeader *header)
+{
+	struct vmw_dma_buffer *vmw_bo;
+	struct vmw_query_cmd {
+		SVGA3dCmdHeader header;
+		SVGA3dCmdEndQuery q;
+	} *cmd;
+	int ret;
 
+	cmd = container_of(header, struct vmw_query_cmd, header);
+	ret = vmw_cmd_cid_check(dev_priv, sw_context, header);
+	if (unlikely(ret != 0))
+		return ret;
+
+	ret = vmw_translate_guest_ptr(dev_priv, sw_context,
+				      &cmd->q.guestResult,
+				      &vmw_bo);
+	if (unlikely(ret != 0))
+		return ret;
+
+	vmw_dmabuf_unreference(&vmw_bo);
+	return 0;
+}
+
+static int vmw_cmd_wait_query(struct vmw_private *dev_priv,
+			      struct vmw_sw_context *sw_context,
+			      SVGA3dCmdHeader *header)
+{
+	struct vmw_dma_buffer *vmw_bo;
+	struct vmw_query_cmd {
+		SVGA3dCmdHeader header;
+		SVGA3dCmdWaitForQuery q;
+	} *cmd;
+	int ret;
+
+	cmd = container_of(header, struct vmw_query_cmd, header);
+	ret = vmw_cmd_cid_check(dev_priv, sw_context, header);
+	if (unlikely(ret != 0))
+		return ret;
+
+	ret = vmw_translate_guest_ptr(dev_priv, sw_context,
+				      &cmd->q.guestResult,
+				      &vmw_bo);
+	if (unlikely(ret != 0))
+		return ret;
+
+	vmw_dmabuf_unreference(&vmw_bo);
+	return 0;
+}
+
+
+static int vmw_cmd_dma(struct vmw_private *dev_priv,
+		       struct vmw_sw_context *sw_context,
+		       SVGA3dCmdHeader *header)
+{
+	struct vmw_dma_buffer *vmw_bo = NULL;
+	struct ttm_buffer_object *bo;
+	struct vmw_surface *srf = NULL;
+	struct vmw_dma_cmd {
+		SVGA3dCmdHeader header;
+		SVGA3dCmdSurfaceDMA dma;
+	} *cmd;
+	int ret;
+
+	cmd = container_of(header, struct vmw_dma_cmd, header);
+	ret = vmw_translate_guest_ptr(dev_priv, sw_context,
+				      &cmd->dma.guest.ptr,
+				      &vmw_bo);
+	if (unlikely(ret != 0))
+		return ret;
+
+	bo = &vmw_bo->base;
 	ret = vmw_user_surface_lookup_handle(dev_priv, sw_context->tfile,
 					     cmd->dma.host.sid, &srf);
 	if (ret) {
@@ -379,8 +455,8 @@ static vmw_cmd_func vmw_cmd_funcs[SVGA_3D_CMD_MAX] = {
 	VMW_CMD_DEF(SVGA_3D_CMD_DRAW_PRIMITIVES, &vmw_cmd_draw),
 	VMW_CMD_DEF(SVGA_3D_CMD_SETSCISSORRECT, &vmw_cmd_cid_check),
 	VMW_CMD_DEF(SVGA_3D_CMD_BEGIN_QUERY, &vmw_cmd_cid_check),
-	VMW_CMD_DEF(SVGA_3D_CMD_END_QUERY, &vmw_cmd_cid_check),
-	VMW_CMD_DEF(SVGA_3D_CMD_WAIT_FOR_QUERY, &vmw_cmd_cid_check),
+	VMW_CMD_DEF(SVGA_3D_CMD_END_QUERY, &vmw_cmd_end_query),
+	VMW_CMD_DEF(SVGA_3D_CMD_WAIT_FOR_QUERY, &vmw_cmd_wait_query),
 	VMW_CMD_DEF(SVGA_3D_CMD_PRESENT_READBACK, &vmw_cmd_ok),
 	VMW_CMD_DEF(SVGA_3D_CMD_BLIT_SURFACE_TO_SCREEN,
 		    &vmw_cmd_blt_surf_screen_check)
@@ -490,10 +566,29 @@ static int vmw_validate_single_buffer(struct vmw_private *dev_priv,
 	if (vmw_dmabuf_gmr(bo) != SVGA_GMR_NULL)
 		return 0;
 
+	/**
+	 * Put BO in VRAM, only if there is space.
+	 */
+
+	ret = ttm_bo_validate(bo, &vmw_vram_sys_placement, true, false);
+	if (unlikely(ret == -ERESTARTSYS))
+		return ret;
+
+	/**
+	 * Otherwise, set it up as GMR.
+	 */
+
+	if (vmw_dmabuf_gmr(bo) != SVGA_GMR_NULL)
+		return 0;
+
 	ret = vmw_gmr_bind(dev_priv, bo);
 	if (likely(ret == 0 || ret == -ERESTARTSYS))
 		return ret;
 
+	/**
+	 * If that failed, try VRAM again, this time evicting
+	 * previous contents.
+	 */
 
 	ret = ttm_bo_validate(bo, &vmw_vram_placement, true, false);
 	return ret;
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c
index 641dde76ada..a93367041cd 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c
@@ -559,6 +559,9 @@ int vmw_fb_init(struct vmw_private *vmw_priv)
 	info->pixmap.scan_align = 1;
 #endif
 
+	info->aperture_base = vmw_priv->vram_start;
+	info->aperture_size = vmw_priv->vram_size;
+
 	/*
 	 * Dirty & Deferred IO
 	 */
@@ -649,14 +652,6 @@ int vmw_dmabuf_to_start_of_vram(struct vmw_private *vmw_priv,
 	if (unlikely(ret != 0))
 		goto err_unlock;
 
-	if (vmw_bo->gmr_bound) {
-		vmw_gmr_unbind(vmw_priv, vmw_bo->gmr_id);
-		spin_lock(&bo->glob->lru_lock);
-		ida_remove(&vmw_priv->gmr_ida, vmw_bo->gmr_id);
-		spin_unlock(&bo->glob->lru_lock);
-		vmw_bo->gmr_bound = NULL;
-	}
-
 	ret = ttm_bo_validate(bo, &ne_placement, false, false);
 	ttm_bo_unreserve(bo);
 err_unlock:
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c
index 01feb48af33..39d43a01d84 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c
@@ -29,6 +29,25 @@
 #include "drmP.h"
 #include "ttm/ttm_placement.h"
 
+bool vmw_fifo_have_3d(struct vmw_private *dev_priv)
+{
+	__le32 __iomem *fifo_mem = dev_priv->mmio_virt;
+	uint32_t fifo_min, hwversion;
+
+	fifo_min = ioread32(fifo_mem  + SVGA_FIFO_MIN);
+	if (fifo_min <= SVGA_FIFO_3D_HWVERSION * sizeof(unsigned int))
+		return false;
+
+	hwversion = ioread32(fifo_mem + SVGA_FIFO_3D_HWVERSION);
+	if (hwversion == 0)
+		return false;
+
+	if (hwversion < SVGA3D_HWVERSION_WS65_B1)
+		return false;
+
+	return true;
+}
+
 int vmw_fifo_init(struct vmw_private *dev_priv, struct vmw_fifo_state *fifo)
 {
 	__le32 __iomem *fifo_mem = dev_priv->mmio_virt;
@@ -55,6 +74,7 @@ int vmw_fifo_init(struct vmw_private *dev_priv, struct vmw_fifo_state *fifo)
 	fifo->reserved_size = 0;
 	fifo->using_bounce_buffer = false;
 
+	mutex_init(&fifo->fifo_mutex);
 	init_rwsem(&fifo->rwsem);
 
 	/*
@@ -98,8 +118,7 @@ int vmw_fifo_init(struct vmw_private *dev_priv, struct vmw_fifo_state *fifo)
 		 (unsigned int) min,
 		 (unsigned int) fifo->capabilities);
 
-	dev_priv->fence_seq = (uint32_t) -100;
-	dev_priv->last_read_sequence = (uint32_t) -100;
+	atomic_set(&dev_priv->fence_seq, dev_priv->last_read_sequence);
 	iowrite32(dev_priv->last_read_sequence, fifo_mem + SVGA_FIFO_FENCE);
 
 	return vmw_fifo_send_fence(dev_priv, &dummy);
@@ -265,7 +284,7 @@ void *vmw_fifo_reserve(struct vmw_private *dev_priv, uint32_t bytes)
 	uint32_t reserveable = fifo_state->capabilities & SVGA_FIFO_CAP_RESERVE;
 	int ret;
 
-	down_write(&fifo_state->rwsem);
+	mutex_lock(&fifo_state->fifo_mutex);
 	max = ioread32(fifo_mem + SVGA_FIFO_MAX);
 	min = ioread32(fifo_mem + SVGA_FIFO_MIN);
 	next_cmd = ioread32(fifo_mem + SVGA_FIFO_NEXT_CMD);
@@ -333,7 +352,7 @@ void *vmw_fifo_reserve(struct vmw_private *dev_priv, uint32_t bytes)
 	}
 out_err:
 	fifo_state->reserved_size = 0;
-	up_write(&fifo_state->rwsem);
+	mutex_unlock(&fifo_state->fifo_mutex);
 	return NULL;
 }
 
@@ -408,6 +427,7 @@ void vmw_fifo_commit(struct vmw_private *dev_priv, uint32_t bytes)
 
 	}
 
+	down_write(&fifo_state->rwsem);
 	if (fifo_state->using_bounce_buffer || reserveable) {
 		next_cmd += bytes;
 		if (next_cmd >= max)
@@ -419,8 +439,9 @@ void vmw_fifo_commit(struct vmw_private *dev_priv, uint32_t bytes)
 	if (reserveable)
 		iowrite32(0, fifo_mem + SVGA_FIFO_RESERVED);
 	mb();
-	vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC);
 	up_write(&fifo_state->rwsem);
+	vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC);
+	mutex_unlock(&fifo_state->fifo_mutex);
 }
 
 int vmw_fifo_send_fence(struct vmw_private *dev_priv, uint32_t *sequence)
@@ -433,9 +454,7 @@ int vmw_fifo_send_fence(struct vmw_private *dev_priv, uint32_t *sequence)
 
 	fm = vmw_fifo_reserve(dev_priv, bytes);
 	if (unlikely(fm == NULL)) {
-		down_write(&fifo_state->rwsem);
-		*sequence = dev_priv->fence_seq;
-		up_write(&fifo_state->rwsem);
+		*sequence = atomic_read(&dev_priv->fence_seq);
 		ret = -ENOMEM;
 		(void)vmw_fallback_wait(dev_priv, false, true, *sequence,
 					false, 3*HZ);
@@ -443,7 +462,7 @@ int vmw_fifo_send_fence(struct vmw_private *dev_priv, uint32_t *sequence)
 	}
 
 	do {
-		*sequence = dev_priv->fence_seq++;
+		*sequence = atomic_add_return(1, &dev_priv->fence_seq);
 	} while (*sequence == 0);
 
 	if (!(fifo_state->capabilities & SVGA_FIFO_CAP_FENCE)) {
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c
index 5fa6a4ed238..1c7a316454d 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c
@@ -43,11 +43,17 @@ int vmw_getparam_ioctl(struct drm_device *dev, void *data,
 		param->value = vmw_overlay_num_free_overlays(dev_priv);
 		break;
 	case DRM_VMW_PARAM_3D:
-		param->value = dev_priv->capabilities & SVGA_CAP_3D ? 1 : 0;
+		param->value = vmw_fifo_have_3d(dev_priv) ? 1 : 0;
 		break;
 	case DRM_VMW_PARAM_FIFO_OFFSET:
 		param->value = dev_priv->mmio_start;
 		break;
+	case DRM_VMW_PARAM_HW_CAPS:
+		param->value = dev_priv->capabilities;
+		break;
+	case DRM_VMW_PARAM_FIFO_CAPS:
+		param->value = dev_priv->fifo.capabilities;
+		break;
 	default:
 		DRM_ERROR("Illegal vmwgfx get param request: %d\n",
 			  param->param);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c b/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c
index d40086fc864..4d7cb539386 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c
@@ -85,19 +85,12 @@ bool vmw_fence_signaled(struct vmw_private *dev_priv,
 		return true;
 
 	/**
-	 * Below is to signal stale fences that have wrapped.
-	 * First, block fence submission.
-	 */
-
-	down_read(&fifo_state->rwsem);
-
-	/**
 	 * Then check if the sequence is higher than what we've actually
 	 * emitted. Then the fence is stale and signaled.
 	 */
 
-	ret = ((dev_priv->fence_seq - sequence) > VMW_FENCE_WRAP);
-	up_read(&fifo_state->rwsem);
+	ret = ((atomic_read(&dev_priv->fence_seq) - sequence)
+	       > VMW_FENCE_WRAP);
 
 	return ret;
 }
@@ -127,7 +120,7 @@ int vmw_fallback_wait(struct vmw_private *dev_priv,
 
 	if (fifo_idle)
 		down_read(&fifo_state->rwsem);
-	signal_seq = dev_priv->fence_seq;
+	signal_seq = atomic_read(&dev_priv->fence_seq);
 	ret = 0;
 
 	for (;;) {
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
index b1af76e371c..31f9afed0a6 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
@@ -553,9 +553,7 @@ int vmw_framebuffer_dmabuf_dirty(struct drm_framebuffer *framebuffer,
 	} *cmd;
 	int i, increment = 1;
 
-	if (!num_clips ||
-	    !(dev_priv->fifo.capabilities &
-	      SVGA_FIFO_CAP_SCREEN_OBJECT)) {
+	if (!num_clips) {
 		num_clips = 1;
 		clips = &norect;
 		norect.x1 = norect.y1 = 0;
@@ -574,10 +572,10 @@ int vmw_framebuffer_dmabuf_dirty(struct drm_framebuffer *framebuffer,
 
 	for (i = 0; i < num_clips; i++, clips += increment) {
 		cmd[i].header = cpu_to_le32(SVGA_CMD_UPDATE);
-		cmd[i].body.x = cpu_to_le32(clips[i].x1);
-		cmd[i].body.y = cpu_to_le32(clips[i].y1);
-		cmd[i].body.width = cpu_to_le32(clips[i].x2 - clips[i].x1);
-		cmd[i].body.height = cpu_to_le32(clips[i].y2 - clips[i].y1);
+		cmd[i].body.x = cpu_to_le32(clips->x1);
+		cmd[i].body.y = cpu_to_le32(clips->y1);
+		cmd[i].body.width = cpu_to_le32(clips->x2 - clips->x1);
+		cmd[i].body.height = cpu_to_le32(clips->y2 - clips->y1);
 	}
 
 	vmw_fifo_commit(dev_priv, sizeof(*cmd) * num_clips);
@@ -709,6 +707,9 @@ static struct drm_framebuffer *vmw_kms_fb_create(struct drm_device *dev,
 	if (ret)
 		goto try_dmabuf;
 
+	if (!surface->scanout)
+		goto err_not_scanout;
+
 	ret = vmw_kms_new_framebuffer_surface(dev_priv, surface, &vfb,
 					      mode_cmd->width, mode_cmd->height);
 
@@ -742,6 +743,13 @@ try_dmabuf:
 	}
 
 	return &vfb->base;
+
+err_not_scanout:
+	DRM_ERROR("surface not marked as scanout\n");
+	/* vmw_user_surface_lookup takes one ref */
+	vmw_surface_unreference(&surface);
+
+	return NULL;
 }
 
 static int vmw_kms_fb_changed(struct drm_device *dev)
@@ -761,10 +769,10 @@ int vmw_kms_init(struct vmw_private *dev_priv)
 
 	drm_mode_config_init(dev);
 	dev->mode_config.funcs = &vmw_kms_funcs;
-	dev->mode_config.min_width = 640;
-	dev->mode_config.min_height = 480;
-	dev->mode_config.max_width = 2048;
-	dev->mode_config.max_height = 2048;
+	dev->mode_config.min_width = 1;
+	dev->mode_config.min_height = 1;
+	dev->mode_config.max_width = dev_priv->fb_max_width;
+	dev->mode_config.max_height = dev_priv->fb_max_height;
 
 	ret = vmw_kms_init_legacy_display_system(dev_priv);
 
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c b/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c
index bb6e6a096d2..5b6eabeb7f5 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c
@@ -104,7 +104,6 @@ static int vmw_dmabuf_pin_in_vram(struct vmw_private *dev_priv,
 				  bool pin, bool interruptible)
 {
 	struct ttm_buffer_object *bo = &buf->base;
-	struct ttm_bo_global *glob = bo->glob;
 	struct ttm_placement *overlay_placement = &vmw_vram_placement;
 	int ret;
 
@@ -116,14 +115,6 @@ static int vmw_dmabuf_pin_in_vram(struct vmw_private *dev_priv,
 	if (unlikely(ret != 0))
 		goto err;
 
-	if (buf->gmr_bound) {
-		vmw_gmr_unbind(dev_priv, buf->gmr_id);
-		spin_lock(&glob->lru_lock);
-		ida_remove(&dev_priv->gmr_ida, buf->gmr_id);
-		spin_unlock(&glob->lru_lock);
-		buf->gmr_bound = NULL;
-	}
-
 	if (pin)
 		overlay_placement = &vmw_vram_ne_placement;
 
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
index c012d5927f6..f8fbbc67a40 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
@@ -574,6 +574,7 @@ int vmw_surface_define_ioctl(struct drm_device *dev, void *data,
 
 	srf->flags = req->flags;
 	srf->format = req->format;
+	srf->scanout = req->scanout;
 	memcpy(srf->mip_levels, req->mip_levels, sizeof(srf->mip_levels));
 	srf->num_sizes = 0;
 	for (i = 0; i < DRM_VMW_MAX_SURFACE_FACES; ++i)
@@ -599,6 +600,26 @@ int vmw_surface_define_ioctl(struct drm_device *dev, void *data,
 	if (unlikely(ret != 0))
 		goto out_err1;
 
+	if (srf->scanout &&
+	    srf->num_sizes == 1 &&
+	    srf->sizes[0].width == 64 &&
+	    srf->sizes[0].height == 64 &&
+	    srf->format == SVGA3D_A8R8G8B8) {
+
+		srf->snooper.image = kmalloc(64 * 64 * 4, GFP_KERNEL);
+		/* clear the image */
+		if (srf->snooper.image) {
+			memset(srf->snooper.image, 0x00, 64 * 64 * 4);
+		} else {
+			DRM_ERROR("Failed to allocate cursor_image\n");
+			ret = -ENOMEM;
+			goto out_err1;
+		}
+	} else {
+		srf->snooper.image = NULL;
+	}
+	srf->snooper.crtc = NULL;
+
 	user_srf->base.shareable = false;
 	user_srf->base.tfile = NULL;
 
@@ -622,24 +643,6 @@ int vmw_surface_define_ioctl(struct drm_device *dev, void *data,
 		return ret;
 	}
 
-	if (srf->flags & (1 << 9) &&
-	    srf->num_sizes == 1 &&
-	    srf->sizes[0].width == 64 &&
-	    srf->sizes[0].height == 64 &&
-	    srf->format == SVGA3D_A8R8G8B8) {
-
-		srf->snooper.image = kmalloc(64 * 64 * 4, GFP_KERNEL);
-		/* clear the image */
-		if (srf->snooper.image)
-			memset(srf->snooper.image, 0x00, 64 * 64 * 4);
-		else
-			DRM_ERROR("Failed to allocate cursor_image\n");
-
-	} else {
-		srf->snooper.image = NULL;
-	}
-	srf->snooper.crtc = NULL;
-
 	rep->sid = user_srf->base.hash.key;
 	if (rep->sid == SVGA3D_INVALID_ID)
 		DRM_ERROR("Created bad Surface ID.\n");
@@ -754,20 +757,29 @@ static size_t vmw_dmabuf_acc_size(struct ttm_bo_global *glob,
 	return bo_user_size + page_array_size;
 }
 
-void vmw_dmabuf_bo_free(struct ttm_buffer_object *bo)
+void vmw_dmabuf_gmr_unbind(struct ttm_buffer_object *bo)
 {
 	struct vmw_dma_buffer *vmw_bo = vmw_dma_buffer(bo);
 	struct ttm_bo_global *glob = bo->glob;
 	struct vmw_private *dev_priv =
 		container_of(bo->bdev, struct vmw_private, bdev);
 
-	ttm_mem_global_free(glob->mem_glob, bo->acc_size);
 	if (vmw_bo->gmr_bound) {
 		vmw_gmr_unbind(dev_priv, vmw_bo->gmr_id);
 		spin_lock(&glob->lru_lock);
 		ida_remove(&dev_priv->gmr_ida, vmw_bo->gmr_id);
 		spin_unlock(&glob->lru_lock);
+		vmw_bo->gmr_bound = false;
 	}
+}
+
+void vmw_dmabuf_bo_free(struct ttm_buffer_object *bo)
+{
+	struct vmw_dma_buffer *vmw_bo = vmw_dma_buffer(bo);
+	struct ttm_bo_global *glob = bo->glob;
+
+	vmw_dmabuf_gmr_unbind(bo);
+	ttm_mem_global_free(glob->mem_glob, bo->acc_size);
 	kfree(vmw_bo);
 }
 
@@ -813,18 +825,10 @@ int vmw_dmabuf_init(struct vmw_private *dev_priv,
 static void vmw_user_dmabuf_destroy(struct ttm_buffer_object *bo)
 {
 	struct vmw_user_dma_buffer *vmw_user_bo = vmw_user_dma_buffer(bo);
-	struct vmw_dma_buffer *vmw_bo = &vmw_user_bo->dma;
 	struct ttm_bo_global *glob = bo->glob;
-	struct vmw_private *dev_priv =
-		container_of(bo->bdev, struct vmw_private, bdev);
 
+	vmw_dmabuf_gmr_unbind(bo);
 	ttm_mem_global_free(glob->mem_glob, bo->acc_size);
-	if (vmw_bo->gmr_bound) {
-		vmw_gmr_unbind(dev_priv, vmw_bo->gmr_id);
-		spin_lock(&glob->lru_lock);
-		ida_remove(&dev_priv->gmr_ida, vmw_bo->gmr_id);
-		spin_unlock(&glob->lru_lock);
-	}
 	kfree(vmw_user_bo);
 }
 
@@ -868,7 +872,7 @@ int vmw_dmabuf_alloc_ioctl(struct drm_device *dev, void *data,
 	}
 
 	ret = vmw_dmabuf_init(dev_priv, &vmw_user_bo->dma, req->size,
-			      &vmw_vram_placement, true,
+			      &vmw_vram_sys_placement, true,
 			      &vmw_user_dmabuf_destroy);
 	if (unlikely(ret != 0))
 		return ret;