1 files changed, 81 insertions, 23 deletions
diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c
index 435a3d970ab..5cac8327833 100644
--- a/drivers/gpu/drm/radeon/radeon_cs.c
+++ b/drivers/gpu/drm/radeon/radeon_cs.c
@@ -85,12 +85,6 @@ int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
 			radeon_bo_list_add_object(&p->relocs[i].lobj,
 						  &p->validated);
 
-			if (p->relocs[i].robj->tbo.sync_obj && !(r->flags & RADEON_RELOC_DONT_SYNC)) {
-				struct radeon_fence *fence = p->relocs[i].robj->tbo.sync_obj;
-				if (!radeon_fence_signaled(fence)) {
-					p->sync_to_ring[fence->ring] = true;
-				}
-			}
 		} else
 			p->relocs[i].handle = 0;
 	}
@@ -109,8 +103,13 @@ static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority
 		p->ring = RADEON_RING_TYPE_GFX_INDEX;
 		break;
 	case RADEON_CS_RING_COMPUTE:
-		/* for now */
-		p->ring = RADEON_RING_TYPE_GFX_INDEX;
+		if (p->rdev->family >= CHIP_TAHITI) {
+			if (p->priority > 0)
+				p->ring = CAYMAN_RING_TYPE_CP1_INDEX;
+			else
+				p->ring = CAYMAN_RING_TYPE_CP2_INDEX;
+		} else
+			p->ring = RADEON_RING_TYPE_GFX_INDEX;
 		break;
 	}
 	return 0;
@@ -118,11 +117,24 @@ static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority
 
 static int radeon_cs_sync_rings(struct radeon_cs_parser *p)
 {
+	bool sync_to_ring[RADEON_NUM_RINGS] = { };
 	int i, r;
 
+	for (i = 0; i < p->nrelocs; i++) {
+		if (!p->relocs[i].robj || !p->relocs[i].robj->tbo.sync_obj)
+			continue;
+
+		if (!(p->relocs[i].flags & RADEON_RELOC_DONT_SYNC)) {
+			struct radeon_fence *fence = p->relocs[i].robj->tbo.sync_obj;
+			if (!radeon_fence_signaled(fence)) {
+				sync_to_ring[fence->ring] = true;
+			}
+		}
+	}
+
 	for (i = 0; i < RADEON_NUM_RINGS; ++i) {
 		/* no need to sync to our own or unused rings */
-		if (i == p->ring || !p->sync_to_ring[i] || !p->rdev->ring[i].ready)
+		if (i == p->ring || !sync_to_ring[i] || !p->rdev->ring[i].ready)
 			continue;
 
 		if (!p->ib->fence->semaphore) {
@@ -163,6 +175,7 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data)
 	p->chunk_ib_idx = -1;
 	p->chunk_relocs_idx = -1;
 	p->chunk_flags_idx = -1;
+	p->chunk_const_ib_idx = -1;
 	p->chunks_array = kcalloc(cs->num_chunks, sizeof(uint64_t), GFP_KERNEL);
 	if (p->chunks_array == NULL) {
 		return -ENOMEM;
@@ -201,6 +214,12 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data)
 			if (p->chunks[i].length_dw == 0)
 				return -EINVAL;
 		}
+		if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_CONST_IB) {
+			p->chunk_const_ib_idx = i;
+			/* zero length CONST IB isn't useful */
+			if (p->chunks[i].length_dw == 0)
+				return -EINVAL;
+		}
 		if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_FLAGS) {
 			p->chunk_flags_idx = i;
 			/* zero length flags aren't useful */
@@ -236,21 +255,19 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data)
 	if ((p->cs_flags & RADEON_CS_USE_VM) &&
 	    !p->rdev->vm_manager.enabled) {
 		DRM_ERROR("VM not active on asic!\n");
-		if (p->chunk_relocs_idx != -1)
-			kfree(p->chunks[p->chunk_relocs_idx].kdata);
-		if (p->chunk_flags_idx != -1)
-			kfree(p->chunks[p->chunk_flags_idx].kdata);
 		return -EINVAL;
 	}
 
-	if (radeon_cs_get_ring(p, ring, priority)) {
-		if (p->chunk_relocs_idx != -1)
-			kfree(p->chunks[p->chunk_relocs_idx].kdata);
-		if (p->chunk_flags_idx != -1)
-			kfree(p->chunks[p->chunk_flags_idx].kdata);
+	/* we only support VM on SI+ */
+	if ((p->rdev->family >= CHIP_TAHITI) &&
+	    ((p->cs_flags & RADEON_CS_USE_VM) == 0)) {
+		DRM_ERROR("VM required on SI+!\n");
 		return -EINVAL;
 	}
 
+	if (radeon_cs_get_ring(p, ring, priority))
+		return -EINVAL;
+
 
 	/* deal with non-vm */
 	if ((p->chunk_ib_idx != -1) &&
@@ -264,11 +281,8 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data)
 		p->chunks[p->chunk_ib_idx].kpage[0] = kmalloc(PAGE_SIZE, GFP_KERNEL);
 		p->chunks[p->chunk_ib_idx].kpage[1] = kmalloc(PAGE_SIZE, GFP_KERNEL);
 		if (p->chunks[p->chunk_ib_idx].kpage[0] == NULL ||
-		    p->chunks[p->chunk_ib_idx].kpage[1] == NULL) {
-			kfree(p->chunks[p->chunk_ib_idx].kpage[0]);
-			kfree(p->chunks[p->chunk_ib_idx].kpage[1]);
+		    p->chunks[p->chunk_ib_idx].kpage[1] == NULL)
 			return -ENOMEM;
-		}
 		p->chunks[p->chunk_ib_idx].kpage_idx[0] = -1;
 		p->chunks[p->chunk_ib_idx].kpage_idx[1] = -1;
 		p->chunks[p->chunk_ib_idx].last_copied_page = -1;
@@ -341,7 +355,7 @@ static int radeon_cs_ib_chunk(struct radeon_device *rdev,
 		return r;
 	}
 	parser->ib->length_dw = ib_chunk->length_dw;
-	r = radeon_cs_parse(parser);
+	r = radeon_cs_parse(rdev, parser->ring, parser);
 	if (r || parser->parser_error) {
 		DRM_ERROR("Invalid command stream !\n");
 		return r;
@@ -394,6 +408,32 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev,
 	if ((parser->cs_flags & RADEON_CS_USE_VM) == 0)
 		return 0;
 
+	if ((rdev->family >= CHIP_TAHITI) &&
+	    (parser->chunk_const_ib_idx != -1)) {
+		ib_chunk = &parser->chunks[parser->chunk_const_ib_idx];
+		if (ib_chunk->length_dw > RADEON_IB_VM_MAX_SIZE) {
+			DRM_ERROR("cs IB CONST too big: %d\n", ib_chunk->length_dw);
+			return -EINVAL;
+		}
+		r =  radeon_ib_get(rdev, parser->ring, &parser->const_ib,
+				   ib_chunk->length_dw * 4);
+		if (r) {
+			DRM_ERROR("Failed to get const ib !\n");
+			return r;
+		}
+		parser->const_ib->is_const_ib = true;
+		parser->const_ib->length_dw = ib_chunk->length_dw;
+		/* Copy the packet into the IB */
+		if (DRM_COPY_FROM_USER(parser->const_ib->ptr, ib_chunk->user_ptr,
+				       ib_chunk->length_dw * 4)) {
+			return -EFAULT;
+		}
+		r = radeon_ring_ib_parse(rdev, parser->ring, parser->const_ib);
+		if (r) {
+			return r;
+		}
+	}
+
 	ib_chunk = &parser->chunks[parser->chunk_ib_idx];
 	if (ib_chunk->length_dw > RADEON_IB_VM_MAX_SIZE) {
 		DRM_ERROR("cs IB too big: %d\n", ib_chunk->length_dw);
@@ -429,11 +469,25 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev,
 	if (r) {
 		DRM_ERROR("Failed to synchronize rings !\n");
 	}
+
+	if ((rdev->family >= CHIP_TAHITI) &&
+	    (parser->chunk_const_ib_idx != -1)) {
+		parser->const_ib->vm_id = vm->id;
+		/* ib pool is bind at 0 in virtual address space to gpu_addr is the
+		 * offset inside the pool bo
+		 */
+		parser->const_ib->gpu_addr = parser->const_ib->sa_bo.offset;
+		r = radeon_ib_schedule(rdev, parser->const_ib);
+		if (r)
+			goto out;
+	}
+
 	parser->ib->vm_id = vm->id;
 	/* ib pool is bind at 0 in virtual address space to gpu_addr is the
 	 * offset inside the pool bo
 	 */
 	parser->ib->gpu_addr = parser->ib->sa_bo.offset;
+	parser->ib->is_const_ib = false;
 	r = radeon_ib_schedule(rdev, parser->ib);
 out:
 	if (!r) {
@@ -453,6 +507,10 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 	int r;
 
 	radeon_mutex_lock(&rdev->cs_mutex);
+	if (!rdev->accel_working) {
+		radeon_mutex_unlock(&rdev->cs_mutex);
+		return -EBUSY;
+	}
 	/* initialize parser */
 	memset(&parser, 0, sizeof(struct radeon_cs_parser));
 	parser.filp = filp;