summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915/i915_gem_execbuffer.c
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2012-04-14 13:18:27 +0200
committerIngo Molnar <mingo@kernel.org>2012-04-14 13:19:04 +0200
commit6ac1ef482d7ae0c690f1640bf6eb818ff9a2d91e (patch)
tree021cc9f6b477146fcebe6f3be4752abfa2ba18a9 /drivers/gpu/drm/i915/i915_gem_execbuffer.c
parent682968e0c425c60f0dde37977e5beb2b12ddc4cc (diff)
parenta385ec4f11bdcf81af094c03e2444ee9b7fad2e5 (diff)
Merge branch 'perf/core' into perf/uprobes
Merge in latest upstream (and the latest perf development tree), to prepare for tooling changes, and also to pick up v3.4 MM changes that the uprobes code needs to take care of. Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gem_execbuffer.c')
-rw-r--r--drivers/gpu/drm/i915/i915_gem_execbuffer.c205
1 files changed, 128 insertions, 77 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 65e1f0043f9..f51a696486c 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -203,9 +203,9 @@ i915_gem_object_set_to_gpu_domain(struct drm_i915_gem_object *obj,
cd->invalidate_domains |= invalidate_domains;
cd->flush_domains |= flush_domains;
if (flush_domains & I915_GEM_GPU_DOMAINS)
- cd->flush_rings |= obj->ring->id;
+ cd->flush_rings |= intel_ring_flag(obj->ring);
if (invalidate_domains & I915_GEM_GPU_DOMAINS)
- cd->flush_rings |= ring->id;
+ cd->flush_rings |= intel_ring_flag(ring);
}
struct eb_objects {
@@ -287,14 +287,14 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
* exec_object list, so it should have a GTT space bound by now.
*/
if (unlikely(target_offset == 0)) {
- DRM_ERROR("No GTT space found for object %d\n",
+ DRM_DEBUG("No GTT space found for object %d\n",
reloc->target_handle);
return ret;
}
/* Validate that the target is in a valid r/w GPU domain */
if (unlikely(reloc->write_domain & (reloc->write_domain - 1))) {
- DRM_ERROR("reloc with multiple write domains: "
+ DRM_DEBUG("reloc with multiple write domains: "
"obj %p target %d offset %d "
"read %08x write %08x",
obj, reloc->target_handle,
@@ -303,8 +303,9 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
reloc->write_domain);
return ret;
}
- if (unlikely((reloc->write_domain | reloc->read_domains) & I915_GEM_DOMAIN_CPU)) {
- DRM_ERROR("reloc with read/write CPU domains: "
+ if (unlikely((reloc->write_domain | reloc->read_domains)
+ & ~I915_GEM_GPU_DOMAINS)) {
+ DRM_DEBUG("reloc with read/write non-GPU domains: "
"obj %p target %d offset %d "
"read %08x write %08x",
obj, reloc->target_handle,
@@ -315,7 +316,7 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
}
if (unlikely(reloc->write_domain && target_obj->pending_write_domain &&
reloc->write_domain != target_obj->pending_write_domain)) {
- DRM_ERROR("Write domain conflict: "
+ DRM_DEBUG("Write domain conflict: "
"obj %p target %d offset %d "
"new %08x old %08x\n",
obj, reloc->target_handle,
@@ -336,7 +337,7 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
/* Check that the relocation address is valid... */
if (unlikely(reloc->offset > obj->base.size - 4)) {
- DRM_ERROR("Relocation beyond object bounds: "
+ DRM_DEBUG("Relocation beyond object bounds: "
"obj %p target %d offset %d size %d.\n",
obj, reloc->target_handle,
(int) reloc->offset,
@@ -344,7 +345,7 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
return ret;
}
if (unlikely(reloc->offset & 3)) {
- DRM_ERROR("Relocation not 4-byte aligned: "
+ DRM_DEBUG("Relocation not 4-byte aligned: "
"obj %p target %d offset %d.\n",
obj, reloc->target_handle,
(int) reloc->offset);
@@ -461,11 +462,60 @@ i915_gem_execbuffer_relocate(struct drm_device *dev,
return ret;
}
+#define __EXEC_OBJECT_HAS_FENCE (1<<31)
+
+static int
+pin_and_fence_object(struct drm_i915_gem_object *obj,
+ struct intel_ring_buffer *ring)
+{
+ struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
+ bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
+ bool need_fence, need_mappable;
+ int ret;
+
+ need_fence =
+ has_fenced_gpu_access &&
+ entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
+ obj->tiling_mode != I915_TILING_NONE;
+ need_mappable =
+ entry->relocation_count ? true : need_fence;
+
+ ret = i915_gem_object_pin(obj, entry->alignment, need_mappable);
+ if (ret)
+ return ret;
+
+ if (has_fenced_gpu_access) {
+ if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) {
+ if (obj->tiling_mode) {
+ ret = i915_gem_object_get_fence(obj, ring);
+ if (ret)
+ goto err_unpin;
+
+ entry->flags |= __EXEC_OBJECT_HAS_FENCE;
+ i915_gem_object_pin_fence(obj);
+ } else {
+ ret = i915_gem_object_put_fence(obj);
+ if (ret)
+ goto err_unpin;
+ }
+ obj->pending_fenced_gpu_access = true;
+ }
+ }
+
+ entry->offset = obj->gtt_offset;
+ return 0;
+
+err_unpin:
+ i915_gem_object_unpin(obj);
+ return ret;
+}
+
static int
i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
struct drm_file *file,
struct list_head *objects)
{
+ drm_i915_private_t *dev_priv = ring->dev->dev_private;
struct drm_i915_gem_object *obj;
int ret, retry;
bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
@@ -518,6 +568,7 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
list_for_each_entry(obj, objects, exec_list) {
struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
bool need_fence, need_mappable;
+
if (!obj->gtt_space)
continue;
@@ -532,58 +583,55 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
(need_mappable && !obj->map_and_fenceable))
ret = i915_gem_object_unbind(obj);
else
- ret = i915_gem_object_pin(obj,
- entry->alignment,
- need_mappable);
+ ret = pin_and_fence_object(obj, ring);
if (ret)
goto err;
-
- entry++;
}
/* Bind fresh objects */
list_for_each_entry(obj, objects, exec_list) {
- struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
- bool need_fence;
+ if (obj->gtt_space)
+ continue;
- need_fence =
- has_fenced_gpu_access &&
- entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
- obj->tiling_mode != I915_TILING_NONE;
+ ret = pin_and_fence_object(obj, ring);
+ if (ret) {
+ int ret_ignore;
+
+ /* This can potentially raise a harmless
+ * -EINVAL if we failed to bind in the above
+ * call. It cannot raise -EINTR since we know
+ * that the bo is freshly bound and so will
+ * not need to be flushed or waited upon.
+ */
+ ret_ignore = i915_gem_object_unbind(obj);
+ (void)ret_ignore;
+ WARN_ON(obj->gtt_space);
+ break;
+ }
+ }
- if (!obj->gtt_space) {
- bool need_mappable =
- entry->relocation_count ? true : need_fence;
+ /* Decrement pin count for bound objects */
+ list_for_each_entry(obj, objects, exec_list) {
+ struct drm_i915_gem_exec_object2 *entry;
- ret = i915_gem_object_pin(obj,
- entry->alignment,
- need_mappable);
- if (ret)
- break;
- }
+ if (!obj->gtt_space)
+ continue;
- if (has_fenced_gpu_access) {
- if (need_fence) {
- ret = i915_gem_object_get_fence(obj, ring);
- if (ret)
- break;
- } else if (entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
- obj->tiling_mode == I915_TILING_NONE) {
- /* XXX pipelined! */
- ret = i915_gem_object_put_fence(obj);
- if (ret)
- break;
- }
- obj->pending_fenced_gpu_access = need_fence;
+ entry = obj->exec_entry;
+ if (entry->flags & __EXEC_OBJECT_HAS_FENCE) {
+ i915_gem_object_unpin_fence(obj);
+ entry->flags &= ~__EXEC_OBJECT_HAS_FENCE;
}
- entry->offset = obj->gtt_offset;
- }
+ i915_gem_object_unpin(obj);
- /* Decrement pin count for bound objects */
- list_for_each_entry(obj, objects, exec_list) {
- if (obj->gtt_space)
- i915_gem_object_unpin(obj);
+ /* ... and ensure ppgtt mapping exist if needed. */
+ if (dev_priv->mm.aliasing_ppgtt && !obj->has_aliasing_ppgtt_mapping) {
+ i915_ppgtt_bind_object(dev_priv->mm.aliasing_ppgtt,
+ obj, obj->cache_level);
+
+ obj->has_aliasing_ppgtt_mapping = 1;
+ }
}
if (ret != -ENOSPC || retry > 1)
@@ -600,16 +648,19 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
} while (1);
err:
- obj = list_entry(obj->exec_list.prev,
- struct drm_i915_gem_object,
- exec_list);
- while (objects != &obj->exec_list) {
- if (obj->gtt_space)
- i915_gem_object_unpin(obj);
+ list_for_each_entry_continue_reverse(obj, objects, exec_list) {
+ struct drm_i915_gem_exec_object2 *entry;
+
+ if (!obj->gtt_space)
+ continue;
+
+ entry = obj->exec_entry;
+ if (entry->flags & __EXEC_OBJECT_HAS_FENCE) {
+ i915_gem_object_unpin_fence(obj);
+ entry->flags &= ~__EXEC_OBJECT_HAS_FENCE;
+ }
- obj = list_entry(obj->exec_list.prev,
- struct drm_i915_gem_object,
- exec_list);
+ i915_gem_object_unpin(obj);
}
return ret;
@@ -682,7 +733,7 @@ i915_gem_execbuffer_relocate_slow(struct drm_device *dev,
obj = to_intel_bo(drm_gem_object_lookup(dev, file,
exec[i].handle));
if (&obj->base == NULL) {
- DRM_ERROR("Invalid object handle %d at index %d\n",
+ DRM_DEBUG("Invalid object handle %d at index %d\n",
exec[i].handle, i);
ret = -ENOENT;
goto err;
@@ -1013,7 +1064,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
int ret, mode, i;
if (!i915_gem_check_execbuffer(args)) {
- DRM_ERROR("execbuf with invalid offset/length\n");
+ DRM_DEBUG("execbuf with invalid offset/length\n");
return -EINVAL;
}
@@ -1028,20 +1079,20 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
break;
case I915_EXEC_BSD:
if (!HAS_BSD(dev)) {
- DRM_ERROR("execbuf with invalid ring (BSD)\n");
+ DRM_DEBUG("execbuf with invalid ring (BSD)\n");
return -EINVAL;
}
ring = &dev_priv->ring[VCS];
break;
case I915_EXEC_BLT:
if (!HAS_BLT(dev)) {
- DRM_ERROR("execbuf with invalid ring (BLT)\n");
+ DRM_DEBUG("execbuf with invalid ring (BLT)\n");
return -EINVAL;
}
ring = &dev_priv->ring[BCS];
break;
default:
- DRM_ERROR("execbuf with unknown ring: %d\n",
+ DRM_DEBUG("execbuf with unknown ring: %d\n",
(int)(args->flags & I915_EXEC_RING_MASK));
return -EINVAL;
}
@@ -1067,18 +1118,18 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
}
break;
default:
- DRM_ERROR("execbuf with unknown constants: %d\n", mode);
+ DRM_DEBUG("execbuf with unknown constants: %d\n", mode);
return -EINVAL;
}
if (args->buffer_count < 1) {
- DRM_ERROR("execbuf with %d buffers\n", args->buffer_count);
+ DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count);
return -EINVAL;
}
if (args->num_cliprects != 0) {
if (ring != &dev_priv->ring[RCS]) {
- DRM_ERROR("clip rectangles are only valid with the render ring\n");
+ DRM_DEBUG("clip rectangles are only valid with the render ring\n");
return -EINVAL;
}
@@ -1123,7 +1174,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
obj = to_intel_bo(drm_gem_object_lookup(dev, file,
exec[i].handle));
if (&obj->base == NULL) {
- DRM_ERROR("Invalid object handle %d at index %d\n",
+ DRM_DEBUG("Invalid object handle %d at index %d\n",
exec[i].handle, i);
/* prevent error path from reading uninitialized data */
ret = -ENOENT;
@@ -1131,7 +1182,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
}
if (!list_empty(&obj->exec_list)) {
- DRM_ERROR("Object %p [handle %d, index %d] appears more than once in object list\n",
+ DRM_DEBUG("Object %p [handle %d, index %d] appears more than once in object list\n",
obj, exec[i].handle, i);
ret = -EINVAL;
goto err;
@@ -1169,7 +1220,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
/* Set the pending read domains for the batch buffer to COMMAND */
if (batch_obj->base.pending_write_domain) {
- DRM_ERROR("Attempting to use self-modifying batch buffer\n");
+ DRM_DEBUG("Attempting to use self-modifying batch buffer\n");
ret = -EINVAL;
goto err;
}
@@ -1186,7 +1237,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
* so every billion or so execbuffers, we need to stall
* the GPU in order to reset the counters.
*/
- ret = i915_gpu_idle(dev);
+ ret = i915_gpu_idle(dev, true);
if (ret)
goto err;
@@ -1274,7 +1325,7 @@ i915_gem_execbuffer(struct drm_device *dev, void *data,
int ret, i;
if (args->buffer_count < 1) {
- DRM_ERROR("execbuf with %d buffers\n", args->buffer_count);
+ DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count);
return -EINVAL;
}
@@ -1282,7 +1333,7 @@ i915_gem_execbuffer(struct drm_device *dev, void *data,
exec_list = drm_malloc_ab(sizeof(*exec_list), args->buffer_count);
exec2_list = drm_malloc_ab(sizeof(*exec2_list), args->buffer_count);
if (exec_list == NULL || exec2_list == NULL) {
- DRM_ERROR("Failed to allocate exec list for %d buffers\n",
+ DRM_DEBUG("Failed to allocate exec list for %d buffers\n",
args->buffer_count);
drm_free_large(exec_list);
drm_free_large(exec2_list);
@@ -1293,7 +1344,7 @@ i915_gem_execbuffer(struct drm_device *dev, void *data,
(uintptr_t) args->buffers_ptr,
sizeof(*exec_list) * args->buffer_count);
if (ret != 0) {
- DRM_ERROR("copy %d exec entries failed %d\n",
+ DRM_DEBUG("copy %d exec entries failed %d\n",
args->buffer_count, ret);
drm_free_large(exec_list);
drm_free_large(exec2_list);
@@ -1334,7 +1385,7 @@ i915_gem_execbuffer(struct drm_device *dev, void *data,
sizeof(*exec_list) * args->buffer_count);
if (ret) {
ret = -EFAULT;
- DRM_ERROR("failed to copy %d exec entries "
+ DRM_DEBUG("failed to copy %d exec entries "
"back to user (%d)\n",
args->buffer_count, ret);
}
@@ -1354,7 +1405,7 @@ i915_gem_execbuffer2(struct drm_device *dev, void *data,
int ret;
if (args->buffer_count < 1) {
- DRM_ERROR("execbuf2 with %d buffers\n", args->buffer_count);
+ DRM_DEBUG("execbuf2 with %d buffers\n", args->buffer_count);
return -EINVAL;
}
@@ -1364,7 +1415,7 @@ i915_gem_execbuffer2(struct drm_device *dev, void *data,
exec2_list = drm_malloc_ab(sizeof(*exec2_list),
args->buffer_count);
if (exec2_list == NULL) {
- DRM_ERROR("Failed to allocate exec list for %d buffers\n",
+ DRM_DEBUG("Failed to allocate exec list for %d buffers\n",
args->buffer_count);
return -ENOMEM;
}
@@ -1373,7 +1424,7 @@ i915_gem_execbuffer2(struct drm_device *dev, void *data,
(uintptr_t) args->buffers_ptr,
sizeof(*exec2_list) * args->buffer_count);
if (ret != 0) {
- DRM_ERROR("copy %d exec entries failed %d\n",
+ DRM_DEBUG("copy %d exec entries failed %d\n",
args->buffer_count, ret);
drm_free_large(exec2_list);
return -EFAULT;
@@ -1388,7 +1439,7 @@ i915_gem_execbuffer2(struct drm_device *dev, void *data,
sizeof(*exec2_list) * args->buffer_count);
if (ret) {
ret = -EFAULT;
- DRM_ERROR("failed to copy %d exec entries "
+ DRM_DEBUG("failed to copy %d exec entries "
"back to user (%d)\n",
args->buffer_count, ret);
}