1 files changed, 73 insertions, 35 deletions
diff --git a/drivers/gpu/drm/nouveau/nouveau_dma.c b/drivers/gpu/drm/nouveau/nouveau_dma.c
index 703553687b2..50d9e67745a 100644
--- a/drivers/gpu/drm/nouveau/nouveau_dma.c
+++ b/drivers/gpu/drm/nouveau/nouveau_dma.c
@@ -29,12 +29,22 @@
 #include "nouveau_drv.h"
 #include "nouveau_dma.h"
 
+void
+nouveau_dma_pre_init(struct nouveau_channel *chan)
+{
+	chan->dma.max  = (chan->pushbuf_bo->bo.mem.size >> 2) - 2;
+	chan->dma.put  = 0;
+	chan->dma.cur  = chan->dma.put;
+	chan->dma.free = chan->dma.max - chan->dma.cur;
+}
+
 int
 nouveau_dma_init(struct nouveau_channel *chan)
 {
 	struct drm_device *dev = chan->dev;
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	struct nouveau_gpuobj *m2mf = NULL;
+	struct nouveau_gpuobj *nvsw = NULL;
 	int ret, i;
 
 	/* Create NV_MEMORY_TO_MEMORY_FORMAT for buffer moves */
@@ -47,6 +57,15 @@ nouveau_dma_init(struct nouveau_channel *chan)
 	if (ret)
 		return ret;
 
+	/* Create an NV_SW object for various sync purposes */
+	ret = nouveau_gpuobj_sw_new(chan, NV_SW, &nvsw);
+	if (ret)
+		return ret;
+
+	ret = nouveau_gpuobj_ref_add(dev, chan, NvSw, nvsw, NULL);
+	if (ret)
+		return ret;
+
 	/* NV_MEMORY_TO_MEMORY_FORMAT requires a notifier object */
 	ret = nouveau_notifier_alloc(chan, NvNotify0, 32, &chan->m2mf_ntfy);
 	if (ret)
@@ -64,12 +83,6 @@ nouveau_dma_init(struct nouveau_channel *chan)
 			return ret;
 	}
 
-	/* Initialise DMA vars */
-	chan->dma.max  = (chan->pushbuf_bo->bo.mem.size >> 2) - 2;
-	chan->dma.put  = 0;
-	chan->dma.cur  = chan->dma.put;
-	chan->dma.free = chan->dma.max - chan->dma.cur;
-
 	/* Insert NOPS for NOUVEAU_DMA_SKIPS */
 	ret = RING_SPACE(chan, NOUVEAU_DMA_SKIPS);
 	if (ret)
@@ -87,6 +100,13 @@ nouveau_dma_init(struct nouveau_channel *chan)
 	BEGIN_RING(chan, NvSubM2MF, NV_MEMORY_TO_MEMORY_FORMAT_DMA_NOTIFY, 1);
 	OUT_RING(chan, NvNotify0);
 
+	/* Initialise NV_SW */
+	ret = RING_SPACE(chan, 2);
+	if (ret)
+		return ret;
+	BEGIN_RING(chan, NvSubSw, 0, 1);
+	OUT_RING(chan, NvSw);
+
 	/* Sit back and pray the channel works.. */
 	FIRE_RING(chan);
 
@@ -106,47 +126,52 @@ OUT_RINGp(struct nouveau_channel *chan, const void *data, unsigned nr_dwords)
 	chan->dma.cur += nr_dwords;
 }
 
-static inline bool
-READ_GET(struct nouveau_channel *chan, uint32_t *get)
+/* Fetch and adjust GPU GET pointer
+ *
+ * Returns:
+ *  value >= 0, the adjusted GET pointer
+ *  -EINVAL if GET pointer currently outside main push buffer
+ *  -EBUSY if timeout exceeded
+ */
+static inline int
+READ_GET(struct nouveau_channel *chan, uint32_t *prev_get, uint32_t *timeout)
 {
 	uint32_t val;
 
 	val = nvchan_rd32(chan, chan->user_get);
-	if (val < chan->pushbuf_base ||
-	    val >= chan->pushbuf_base + chan->pushbuf_bo->bo.mem.size) {
-		/* meaningless to dma_wait() except to know whether the
-		 * GPU has stalled or not
-		 */
-		*get = val;
-		return false;
+
+	/* reset counter as long as GET is still advancing, this is
+	 * to avoid misdetecting a GPU lockup if the GPU happens to
+	 * just be processing an operation that takes a long time
+	 */
+	if (val != *prev_get) {
+		*prev_get = val;
+		*timeout = 0;
+	}
+
+	if ((++*timeout & 0xff) == 0) {
+		DRM_UDELAY(1);
+		if (*timeout > 100000)
+			return -EBUSY;
 	}
 
-	*get = (val - chan->pushbuf_base) >> 2;
-	return true;
+	if (val < chan->pushbuf_base ||
+	    val > chan->pushbuf_base + (chan->dma.max << 2))
+		return -EINVAL;
+
+	return (val - chan->pushbuf_base) >> 2;
 }
 
 int
 nouveau_dma_wait(struct nouveau_channel *chan, int size)
 {
-	uint32_t get, prev_get = 0, cnt = 0;
-	bool get_valid;
+	uint32_t prev_get = 0, cnt = 0;
+	int get;
 
 	while (chan->dma.free < size) {
-		/* reset counter as long as GET is still advancing, this is
-		 * to avoid misdetecting a GPU lockup if the GPU happens to
-		 * just be processing an operation that takes a long time
-		 */
-		get_valid = READ_GET(chan, &get);
-		if (get != prev_get) {
-			prev_get = get;
-			cnt = 0;
-		}
-
-		if ((++cnt & 0xff) == 0) {
-			DRM_UDELAY(1);
-			if (cnt > 100000)
-				return -EBUSY;
-		}
+		get = READ_GET(chan, &prev_get, &cnt);
+		if (unlikely(get == -EBUSY))
+			return -EBUSY;
 
 		/* loop until we have a usable GET pointer.  the value
 		 * we read from the GPU may be outside the main ring if
@@ -157,7 +182,7 @@ nouveau_dma_wait(struct nouveau_channel *chan, int size)
 		 * from the SKIPS area, so the code below doesn't have to deal
 		 * with some fun corner cases.
 		 */
-		if (!get_valid || get < NOUVEAU_DMA_SKIPS)
+		if (unlikely(get == -EINVAL) || get < NOUVEAU_DMA_SKIPS)
 			continue;
 
 		if (get <= chan->dma.cur) {
@@ -183,6 +208,19 @@ nouveau_dma_wait(struct nouveau_channel *chan, int size)
 			 * after processing the currently pending commands.
 			 */
 			OUT_RING(chan, chan->pushbuf_base | 0x20000000);
+
+			/* wait for GET to depart from the skips area.
+			 * prevents writing GET==PUT and causing a race
+			 * condition that causes us to think the GPU is
+			 * idle when it's not.
+			 */
+			do {
+				get = READ_GET(chan, &prev_get, &cnt);
+				if (unlikely(get == -EBUSY))
+					return -EBUSY;
+				if (unlikely(get == -EINVAL))
+					continue;
+			} while (get <= NOUVEAU_DMA_SKIPS);
 			WRITE_PUT(NOUVEAU_DMA_SKIPS);
 
 			/* we're now submitting commands at the start of