From f343c5f6477354967ee1e331a68a56b9fece2f36 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Fri, 5 Jul 2013 14:41:04 -0700 Subject: drm/i915: Getter/setter for object attributes Soon we want to gut a lot of our existing assumptions how many address spaces an object can live in, and in doing so, embed the drm_mm_node in the object (and later the VMA). It's possible in the future we'll want to add more getter/setter methods, but for now this is enough to enable the VMAs. v2: Reworked commit message (Ben) Added comments to the main functions (Ben) sed -i "s/i915_gem_obj_set_color/i915_gem_obj_ggtt_set_color/" drivers/gpu/drm/i915/*.[ch] sed -i "s/i915_gem_obj_bound/i915_gem_obj_ggtt_bound/" drivers/gpu/drm/i915/*.[ch] sed -i "s/i915_gem_obj_size/i915_gem_obj_ggtt_size/" drivers/gpu/drm/i915/*.[ch] sed -i "s/i915_gem_obj_offset/i915_gem_obj_ggtt_offset/" drivers/gpu/drm/i915/*.[ch] (Daniel) v3: Rebased on new reserve_node patch Changed DRM_DEBUG_KMS to actually work (will need fixing later) Signed-off-by: Ben Widawsky Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem_execbuffer.c') diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 87a3227e517..5aeb447ead6 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -188,7 +188,7 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, return -ENOENT; target_i915_obj = to_intel_bo(target_obj); - target_offset = target_i915_obj->gtt_offset; + target_offset = i915_gem_obj_ggtt_offset(target_i915_obj); /* Sandybridge PPGTT errata: We need a global gtt mapping for MI and * pipe_control writes because the gpu doesn't properly redirect them @@ -280,7 +280,7 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, return ret; /* Map the page containing the relocation we're going to perform. */ - reloc->offset += obj->gtt_offset; + reloc->offset += i915_gem_obj_ggtt_offset(obj); reloc_page = io_mapping_map_atomic_wc(dev_priv->gtt.mappable, reloc->offset & PAGE_MASK); reloc_entry = (uint32_t __iomem *) @@ -436,8 +436,8 @@ i915_gem_execbuffer_reserve_object(struct drm_i915_gem_object *obj, obj->has_aliasing_ppgtt_mapping = 1; } - if (entry->offset != obj->gtt_offset) { - entry->offset = obj->gtt_offset; + if (entry->offset != i915_gem_obj_ggtt_offset(obj)) { + entry->offset = i915_gem_obj_ggtt_offset(obj); *need_reloc = true; } @@ -458,7 +458,7 @@ i915_gem_execbuffer_unreserve_object(struct drm_i915_gem_object *obj) { struct drm_i915_gem_exec_object2 *entry; - if (!obj->gtt_space) + if (!i915_gem_obj_ggtt_bound(obj)) return; entry = obj->exec_entry; @@ -530,7 +530,7 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring, struct drm_i915_gem_exec_object2 *entry = obj->exec_entry; bool need_fence, need_mappable; - if (!obj->gtt_space) + if (!i915_gem_obj_ggtt_bound(obj)) continue; need_fence = @@ -539,7 +539,8 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring, obj->tiling_mode != I915_TILING_NONE; need_mappable = need_fence || need_reloc_mappable(obj); - if ((entry->alignment && obj->gtt_offset & (entry->alignment - 1)) || + if ((entry->alignment && + i915_gem_obj_ggtt_offset(obj) & (entry->alignment - 1)) || (need_mappable && !obj->map_and_fenceable)) ret = i915_gem_object_unbind(obj); else @@ -550,7 +551,7 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring, /* Bind fresh objects */ list_for_each_entry(obj, objects, exec_list) { - if (obj->gtt_space) + if (i915_gem_obj_ggtt_bound(obj)) continue; ret = i915_gem_execbuffer_reserve_object(obj, ring, need_relocs); @@ -1058,7 +1059,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, goto err; } - exec_start = batch_obj->gtt_offset + args->batch_start_offset; + exec_start = i915_gem_obj_ggtt_offset(batch_obj) + args->batch_start_offset; exec_len = args->batch_len; if (cliprects) { for (i = 0; i < args->num_cliprects; i++) { -- cgit v1.2.3-70-g09d2 From db1b76ca6a79c774074ae87bee7afc0825a478f5 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Tue, 9 Jul 2013 16:51:37 +0200 Subject: drm/i915: don't frob mm.suspended when not using ums In kernel modeset driver mode we're in full control of the chip, always. So there's no need at all to set mm.suspended in i915_gem_idle. Hence move that out into the leavevt ioctl. Since i915_gem_idle doesn't suspend gem any more we can also drop the re-enabling for KMS in the thaw function. Also clean up the handling of mm.suspend at driver load by coalescing all the assignments. Stumbled over while reading through our resume code for unrelated reasons. v2: Shovel mm.suspended into the (newly created) ums dungeon as suggested by Chris Wilson. The plan is that once we've completely stopped relying on the register save/restore code we could shovel even that in there. v3: Improve the locking for the entervt/leavevt ioctls a bit by moving the dev->struct_mutex locking outside of i915_gem_idle. Also don't clear dev_priv->ums.mm_suspended for the kms case, we allocate it with kzalloc. Both suggested by Chris Wilson. Cc: Chris Wilson Reviewed-by: Chris Wilson (v2) Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_dma.c | 13 +++------- drivers/gpu/drm/i915/i915_drv.c | 11 +++++--- drivers/gpu/drm/i915/i915_drv.h | 24 ++++++++++-------- drivers/gpu/drm/i915/i915_gem.c | 40 ++++++++++++++++++------------ drivers/gpu/drm/i915/i915_gem_execbuffer.c | 2 +- 5 files changed, 50 insertions(+), 40 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem_execbuffer.c') diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 0e221423608..bece9973e1b 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -1323,10 +1323,8 @@ static int i915_load_modeset_init(struct drm_device *dev) /* Always safe in the mode setting case. */ /* FIXME: do pre/post-mode set stuff in core KMS code */ dev->vblank_disable_allowed = 1; - if (INTEL_INFO(dev)->num_pipes == 0) { - dev_priv->mm.suspended = 0; + if (INTEL_INFO(dev)->num_pipes == 0) return 0; - } ret = intel_fbdev_init(dev); if (ret) @@ -1352,9 +1350,6 @@ static int i915_load_modeset_init(struct drm_device *dev) drm_kms_helper_poll_init(dev); - /* We're off and running w/KMS */ - dev_priv->mm.suspended = 0; - return 0; cleanup_gem: @@ -1629,9 +1624,6 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags) goto out_gem_unload; } - /* Start out suspended */ - dev_priv->mm.suspended = 1; - if (HAS_POWER_WELL(dev)) i915_init_power_well(dev); @@ -1641,6 +1633,9 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags) DRM_ERROR("failed to init modeset\n"); goto out_gem_unload; } + } else { + /* Start out suspended in ums mode. */ + dev_priv->ums.mm_suspended = 1; } i915_setup_sysfs(dev); diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index ed9262c42b7..0485f435eee 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -556,7 +556,11 @@ static int i915_drm_freeze(struct drm_device *dev) /* If KMS is active, we do the leavevt stuff here */ if (drm_core_check_feature(dev, DRIVER_MODESET)) { - int error = i915_gem_idle(dev); + int error; + + mutex_lock(&dev->struct_mutex); + error = i915_gem_idle(dev); + mutex_unlock(&dev->struct_mutex); if (error) { dev_err(&dev->pdev->dev, "GEM idle failed, resume might fail\n"); @@ -661,7 +665,6 @@ static int __i915_drm_thaw(struct drm_device *dev) intel_init_pch_refclk(dev); mutex_lock(&dev->struct_mutex); - dev_priv->mm.suspended = 0; error = i915_gem_init_hw(dev); mutex_unlock(&dev->struct_mutex); @@ -961,11 +964,11 @@ int i915_reset(struct drm_device *dev) * switched away). */ if (drm_core_check_feature(dev, DRIVER_MODESET) || - !dev_priv->mm.suspended) { + !dev_priv->ums.mm_suspended) { struct intel_ring_buffer *ring; int i; - dev_priv->mm.suspended = 0; + dev_priv->ums.mm_suspended = 0; i915_gem_init_swizzling(dev); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 993fd2c8a45..c81ac155f5b 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -814,6 +814,18 @@ struct i915_dri1_state { uint32_t counter; }; +struct i915_ums_state { + /** + * Flag if the X Server, and thus DRM, is not currently in + * control of the device. + * + * This is set between LeaveVT and EnterVT. It needs to be + * replaced with a semaphore. It also needs to be + * transitioned away from for kernel modesetting. + */ + int mm_suspended; +}; + struct intel_l3_parity { u32 *remap_info; struct work_struct error_work; @@ -884,16 +896,6 @@ struct i915_gem_mm { */ bool interruptible; - /** - * Flag if the X Server, and thus DRM, is not currently in - * control of the device. - * - * This is set between LeaveVT and EnterVT. It needs to be - * replaced with a semaphore. It also needs to be - * transitioned away from for kernel modesetting. - */ - int suspended; - /** Bit 6 swizzling required for X tiling */ uint32_t bit_6_swizzle_x; /** Bit 6 swizzling required for Y tiling */ @@ -1187,6 +1189,8 @@ typedef struct drm_i915_private { /* Old dri1 support infrastructure, beware the dragons ya fools entering * here! */ struct i915_dri1_state dri1; + /* Old ums support infrastructure, same warning applies. */ + struct i915_ums_state ums; } drm_i915_private_t; /* Iterate over initialised rings */ diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 339404937ab..20b10a0fa45 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2082,7 +2082,7 @@ int __i915_add_request(struct intel_ring_buffer *ring, trace_i915_gem_request_add(ring, request->seqno); ring->outstanding_lazy_request = 0; - if (!dev_priv->mm.suspended) { + if (!dev_priv->ums.mm_suspended) { if (i915_enable_hangcheck) { mod_timer(&dev_priv->gpu_error.hangcheck_timer, round_jiffies_up(jiffies + DRM_I915_HANGCHECK_JIFFIES)); @@ -2398,7 +2398,7 @@ i915_gem_retire_work_handler(struct work_struct *work) idle &= list_empty(&ring->request_list); } - if (!dev_priv->mm.suspended && !idle) + if (!dev_priv->ums.mm_suspended && !idle) queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, round_jiffies_up_relative(HZ)); if (idle) @@ -3992,9 +3992,7 @@ i915_gem_idle(struct drm_device *dev) drm_i915_private_t *dev_priv = dev->dev_private; int ret; - mutex_lock(&dev->struct_mutex); - - if (dev_priv->mm.suspended) { + if (dev_priv->ums.mm_suspended) { mutex_unlock(&dev->struct_mutex); return 0; } @@ -4012,18 +4010,11 @@ i915_gem_idle(struct drm_device *dev) i915_gem_reset_fences(dev); - /* Hack! Don't let anybody do execbuf while we don't control the chip. - * We need to replace this with a semaphore, or something. - * And not confound mm.suspended! - */ - dev_priv->mm.suspended = 1; del_timer_sync(&dev_priv->gpu_error.hangcheck_timer); i915_kernel_lost_context(dev); i915_gem_cleanup_ringbuffer(dev); - mutex_unlock(&dev->struct_mutex); - /* Cancel the retire work handler, which should be idle now. */ cancel_delayed_work_sync(&dev_priv->mm.retire_work); @@ -4233,7 +4224,7 @@ int i915_gem_entervt_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { - drm_i915_private_t *dev_priv = dev->dev_private; + struct drm_i915_private *dev_priv = dev->dev_private; int ret; if (drm_core_check_feature(dev, DRIVER_MODESET)) @@ -4245,7 +4236,7 @@ i915_gem_entervt_ioctl(struct drm_device *dev, void *data, } mutex_lock(&dev->struct_mutex); - dev_priv->mm.suspended = 0; + dev_priv->ums.mm_suspended = 0; ret = i915_gem_init_hw(dev); if (ret != 0) { @@ -4265,7 +4256,7 @@ i915_gem_entervt_ioctl(struct drm_device *dev, void *data, cleanup_ringbuffer: mutex_lock(&dev->struct_mutex); i915_gem_cleanup_ringbuffer(dev); - dev_priv->mm.suspended = 1; + dev_priv->ums.mm_suspended = 1; mutex_unlock(&dev->struct_mutex); return ret; @@ -4275,11 +4266,26 @@ int i915_gem_leavevt_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { + struct drm_i915_private *dev_priv = dev->dev_private; + int ret; + if (drm_core_check_feature(dev, DRIVER_MODESET)) return 0; drm_irq_uninstall(dev); - return i915_gem_idle(dev); + + mutex_lock(&dev->struct_mutex); + ret = i915_gem_idle(dev); + + /* Hack! Don't let anybody do execbuf while we don't control the chip. + * We need to replace this with a semaphore, or something. + * And not confound ums.mm_suspended! + */ + if (ret != 0) + dev_priv->ums.mm_suspended = 1; + mutex_unlock(&dev->struct_mutex); + + return ret; } void @@ -4290,9 +4296,11 @@ i915_gem_lastclose(struct drm_device *dev) if (drm_core_check_feature(dev, DRIVER_MODESET)) return; + mutex_lock(&dev->struct_mutex); ret = i915_gem_idle(dev); if (ret) DRM_ERROR("failed to idle hardware: %d\n", ret); + mutex_unlock(&dev->struct_mutex); } static void diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 5aeb447ead6..64eda4463b7 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -973,7 +973,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, if (ret) goto pre_mutex_err; - if (dev_priv->mm.suspended) { + if (dev_priv->ums.mm_suspended) { mutex_unlock(&dev->struct_mutex); ret = -EBUSY; goto pre_mutex_err; -- cgit v1.2.3-70-g09d2 From e85209698649be30cb1389966f29107d63f16940 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 3 Jul 2013 17:22:07 +0300 Subject: drm/i915: Replace open-coding of DEFAULT_CONTEXT_ID The intent of the check is made more clear if we use the proper name for 0 here. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Reviewed-by: Ben Widawsky Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem_execbuffer.c') diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 64eda4463b7..1b58694d7be 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -873,7 +873,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, break; case I915_EXEC_BSD: ring = &dev_priv->ring[VCS]; - if (ctx_id != 0) { + if (ctx_id != DEFAULT_CONTEXT_ID) { DRM_DEBUG("Ring %s doesn't support contexts\n", ring->name); return -EPERM; @@ -881,7 +881,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, break; case I915_EXEC_BLT: ring = &dev_priv->ring[BCS]; - if (ctx_id != 0) { + if (ctx_id != DEFAULT_CONTEXT_ID) { DRM_DEBUG("Ring %s doesn't support contexts\n", ring->name); return -EPERM; @@ -889,7 +889,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, break; case I915_EXEC_VEBOX: ring = &dev_priv->ring[VECS]; - if (ctx_id != 0) { + if (ctx_id != DEFAULT_CONTEXT_ID) { DRM_DEBUG("Ring %s doesn't support contexts\n", ring->name); return -EPERM; -- cgit v1.2.3-70-g09d2 From 0b74b508f78cea96d0d1b47e72cc0ec7959cdc68 Mon Sep 17 00:00:00 2001 From: Xiong Zhang Date: Fri, 19 Jul 2013 13:51:24 +0800 Subject: drm/i915: add prefault_disable module option prefault is stll enabled by default which prevent most of pwrite/pread/reloc from running slow path, in order to verify these slow pathes, prefault need to be disabled. Signed-off-by: Xiong Zhang [danvet: Make checkpatch happy and bikeshed the module option help text a bit.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.c | 5 +++++ drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_gem.c | 12 +++++++----- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 6 ++++-- 4 files changed, 17 insertions(+), 7 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem_execbuffer.c') diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index b178a7ca129..c34086ad818 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -141,6 +141,11 @@ module_param_named(fastboot, i915_fastboot, bool, 0600); MODULE_PARM_DESC(fastboot, "Try to skip unnecessary mode sets at boot time " "(default: false)"); +bool i915_prefault_disable __read_mostly; +module_param_named(prefault_disable, i915_prefault_disable, bool, 0600); +MODULE_PARM_DESC(prefault_disable, + "Disable page prefaulting for pread/pwrite/reloc (default:false). For developers only."); + static struct drm_driver driver; extern int intel_agp_enabled; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 36d1c806e09..fc32d2034f3 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1627,6 +1627,7 @@ extern unsigned int i915_preliminary_hw_support __read_mostly; extern int i915_disable_power_well __read_mostly; extern int i915_enable_ips __read_mostly; extern bool i915_fastboot __read_mostly; +extern bool i915_prefault_disable __read_mostly; extern int i915_suspend(struct drm_device *dev, pm_message_t state); extern int i915_resume(struct drm_device *dev); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index ce043f14eab..acc99d45814 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -465,7 +465,7 @@ i915_gem_shmem_pread(struct drm_device *dev, mutex_unlock(&dev->struct_mutex); - if (!prefaulted) { + if (likely(!i915_prefault_disable) && !prefaulted) { ret = fault_in_multipages_writeable(user_data, remain); /* Userspace is tricking us, but we've already clobbered * its pages with the prefault and promised to write the @@ -860,10 +860,12 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, args->size)) return -EFAULT; - ret = fault_in_multipages_readable(to_user_ptr(args->data_ptr), - args->size); - if (ret) - return -EFAULT; + if (likely(!i915_prefault_disable)) { + ret = fault_in_multipages_readable(to_user_ptr(args->data_ptr), + args->size); + if (ret) + return -EFAULT; + } ret = i915_mutex_lock_interruptible(dev); if (ret) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 1b58694d7be..1734825bef3 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -759,8 +759,10 @@ validate_exec_list(struct drm_i915_gem_exec_object2 *exec, if (!access_ok(VERIFY_WRITE, ptr, length)) return -EFAULT; - if (fault_in_multipages_readable(ptr, length)) - return -EFAULT; + if (likely(!i915_prefault_disable)) { + if (fault_in_multipages_readable(ptr, length)) + return -EFAULT; + } } return 0; -- cgit v1.2.3-70-g09d2 From de51f04f06d5e4a37f8e5a2b1019eb34140480f0 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sun, 21 Jul 2013 17:23:11 +0100 Subject: drm/i915: Replace open-coded offset_in_page() Signed-off-by: Chris Wilson Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem_execbuffer.c') diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 1734825bef3..5b6d764e9bb 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -255,7 +255,7 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, reloc->delta += target_offset; if (use_cpu_reloc(obj)) { - uint32_t page_offset = reloc->offset & ~PAGE_MASK; + uint32_t page_offset = offset_in_page(reloc->offset); char *vaddr; ret = i915_gem_object_set_to_cpu_domain(obj, 1); @@ -284,7 +284,7 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, reloc_page = io_mapping_map_atomic_wc(dev_priv->gtt.mappable, reloc->offset & PAGE_MASK); reloc_entry = (uint32_t __iomem *) - (reloc_page + (reloc->offset & ~PAGE_MASK)); + (reloc_page + offset_in_page(reloc->offset)); iowrite32(reloc->delta, reloc_entry); io_mapping_unmap_atomic(reloc_page); } -- cgit v1.2.3-70-g09d2 From c37e22046148971a35a89931aa1f951bb99d5514 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Wed, 31 Jul 2013 16:59:58 -0700 Subject: drm/i915: Add VM to pin To verbalize it, one can say, "pin an object into the given address space." The semantics of pinning remain the same otherwise. Certain objects will always have to be bound into the global GTT. Therefore, global GTT is a special case, and keep a special interface around for it (i915_gem_obj_ggtt_pin). v2: s/i915_gem_ggtt_pin/i915_gem_obj_ggtt_pin Signed-off-by: Ben Widawsky Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 11 +++++++++++ drivers/gpu/drm/i915/i915_gem.c | 9 +++++---- drivers/gpu/drm/i915/i915_gem_context.c | 4 ++-- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 4 +++- drivers/gpu/drm/i915/intel_overlay.c | 2 +- drivers/gpu/drm/i915/intel_pm.c | 2 +- drivers/gpu/drm/i915/intel_ringbuffer.c | 8 ++++---- 7 files changed, 27 insertions(+), 13 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem_execbuffer.c') diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 79d4fed9d06..a8b51d525f8 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1710,6 +1710,7 @@ struct i915_vma *i915_gem_vma_create(struct drm_i915_gem_object *obj, void i915_gem_vma_destroy(struct i915_vma *vma); int __must_check i915_gem_object_pin(struct drm_i915_gem_object *obj, + struct i915_address_space *vm, uint32_t alignment, bool map_and_fenceable, bool nonblocking); @@ -1895,6 +1896,16 @@ i915_gem_obj_ggtt_size(struct drm_i915_gem_object *obj) { return i915_gem_obj_size(obj, obj_to_ggtt(obj)); } + +static inline int __must_check +i915_gem_obj_ggtt_pin(struct drm_i915_gem_object *obj, + uint32_t alignment, + bool map_and_fenceable, + bool nonblocking) +{ + return i915_gem_object_pin(obj, obj_to_ggtt(obj), alignment, + map_and_fenceable, nonblocking); +} #undef obj_to_ggtt /* i915_gem_context.c */ diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index c9de97ac1d0..8322dbe3ff1 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -592,7 +592,7 @@ i915_gem_gtt_pwrite_fast(struct drm_device *dev, char __user *user_data; int page_offset, page_length, ret; - ret = i915_gem_object_pin(obj, 0, true, true); + ret = i915_gem_obj_ggtt_pin(obj, 0, true, true); if (ret) goto out; @@ -1346,7 +1346,7 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) } /* Now bind it into the GTT if needed */ - ret = i915_gem_object_pin(obj, 0, true, false); + ret = i915_gem_obj_ggtt_pin(obj, 0, true, false); if (ret) goto unlock; @@ -3488,7 +3488,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, * (e.g. libkms for the bootup splash), we have to ensure that we * always use map_and_fenceable for all scanout buffers. */ - ret = i915_gem_object_pin(obj, alignment, true, false); + ret = i915_gem_obj_ggtt_pin(obj, alignment, true, false); if (ret) return ret; @@ -3631,6 +3631,7 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) int i915_gem_object_pin(struct drm_i915_gem_object *obj, + struct i915_address_space *vm, uint32_t alignment, bool map_and_fenceable, bool nonblocking) @@ -3720,7 +3721,7 @@ i915_gem_pin_ioctl(struct drm_device *dev, void *data, } if (obj->user_pin_count == 0) { - ret = i915_gem_object_pin(obj, args->alignment, true, false); + ret = i915_gem_obj_ggtt_pin(obj, args->alignment, true, false); if (ret) goto out; } diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 2470206a4d0..d1cb28cbc71 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -214,7 +214,7 @@ static int create_default_context(struct drm_i915_private *dev_priv) * default context. */ dev_priv->ring[RCS].default_context = ctx; - ret = i915_gem_object_pin(ctx->obj, CONTEXT_ALIGN, false, false); + ret = i915_gem_obj_ggtt_pin(ctx->obj, CONTEXT_ALIGN, false, false); if (ret) { DRM_DEBUG_DRIVER("Couldn't pin %d\n", ret); goto err_destroy; @@ -400,7 +400,7 @@ static int do_switch(struct i915_hw_context *to) if (from == to) return 0; - ret = i915_gem_object_pin(to->obj, CONTEXT_ALIGN, false, false); + ret = i915_gem_obj_ggtt_pin(to->obj, CONTEXT_ALIGN, false, false); if (ret) return ret; diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 5b6d764e9bb..7addab31783 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -409,7 +409,9 @@ i915_gem_execbuffer_reserve_object(struct drm_i915_gem_object *obj, obj->tiling_mode != I915_TILING_NONE; need_mappable = need_fence || need_reloc_mappable(obj); - ret = i915_gem_object_pin(obj, entry->alignment, need_mappable, false); + /* FIXME: vm plubming */ + ret = i915_gem_object_pin(obj, &dev_priv->gtt.base, entry->alignment, + need_mappable, false); if (ret) return ret; diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c index 9ec5a4e12af..ddfd0aefe0c 100644 --- a/drivers/gpu/drm/i915/intel_overlay.c +++ b/drivers/gpu/drm/i915/intel_overlay.c @@ -1352,7 +1352,7 @@ void intel_setup_overlay(struct drm_device *dev) } overlay->flip_addr = reg_bo->phys_obj->handle->busaddr; } else { - ret = i915_gem_object_pin(reg_bo, PAGE_SIZE, true, false); + ret = i915_gem_obj_ggtt_pin(reg_bo, PAGE_SIZE, true, false); if (ret) { DRM_ERROR("failed to pin overlay register bo\n"); goto out_free_bo; diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index da1b6412161..4c4020631b3 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -2886,7 +2886,7 @@ intel_alloc_context_page(struct drm_device *dev) return NULL; } - ret = i915_gem_object_pin(ctx, 4096, true, false); + ret = i915_gem_obj_ggtt_pin(ctx, 4096, true, false); if (ret) { DRM_ERROR("failed to pin power context: %d\n", ret); goto err_unref; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 8527ea05124..74d02a70451 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -501,7 +501,7 @@ init_pipe_control(struct intel_ring_buffer *ring) i915_gem_object_set_cache_level(obj, I915_CACHE_LLC); - ret = i915_gem_object_pin(obj, 4096, true, false); + ret = i915_gem_obj_ggtt_pin(obj, 4096, true, false); if (ret) goto err_unref; @@ -1224,7 +1224,7 @@ static int init_status_page(struct intel_ring_buffer *ring) i915_gem_object_set_cache_level(obj, I915_CACHE_LLC); - ret = i915_gem_object_pin(obj, 4096, true, false); + ret = i915_gem_obj_ggtt_pin(obj, 4096, true, false); if (ret != 0) { goto err_unref; } @@ -1307,7 +1307,7 @@ static int intel_init_ring_buffer(struct drm_device *dev, ring->obj = obj; - ret = i915_gem_object_pin(obj, PAGE_SIZE, true, false); + ret = i915_gem_obj_ggtt_pin(obj, PAGE_SIZE, true, false); if (ret) goto err_unref; @@ -1828,7 +1828,7 @@ int intel_init_render_ring_buffer(struct drm_device *dev) return -ENOMEM; } - ret = i915_gem_object_pin(obj, 0, true, false); + ret = i915_gem_obj_ggtt_pin(obj, 0, true, false); if (ret != 0) { drm_gem_object_unreference(&obj->base); DRM_ERROR("Failed to ping batch bo\n"); -- cgit v1.2.3-70-g09d2 From 28d6a7bfa2560cb94727a68511ed68561e84dcc8 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Wed, 31 Jul 2013 17:00:02 -0700 Subject: drm/i915: thread address space through execbuf This represents the first half of hooking up VMs to execbuf. Here we basically pass an address space all around to the different internal functions. It should be much more readable, and have less risk than the second half, which begins switching over to using VMAs instead of an obj,vm. The overall series echoes this style of, "add a VM, then make it smart later" Signed-off-by: Ben Widawsky [danvet: Switch a BUG_ON to WARN_ON.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 77 +++++++++++++++++++----------- 1 file changed, 49 insertions(+), 28 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem_execbuffer.c') diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 7addab31783..9939d2ef3ea 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -174,7 +174,8 @@ static inline int use_cpu_reloc(struct drm_i915_gem_object *obj) static int i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, struct eb_objects *eb, - struct drm_i915_gem_relocation_entry *reloc) + struct drm_i915_gem_relocation_entry *reloc, + struct i915_address_space *vm) { struct drm_device *dev = obj->base.dev; struct drm_gem_object *target_obj; @@ -297,7 +298,8 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, static int i915_gem_execbuffer_relocate_object(struct drm_i915_gem_object *obj, - struct eb_objects *eb) + struct eb_objects *eb, + struct i915_address_space *vm) { #define N_RELOC(x) ((x) / sizeof(struct drm_i915_gem_relocation_entry)) struct drm_i915_gem_relocation_entry stack_reloc[N_RELOC(512)]; @@ -321,7 +323,8 @@ i915_gem_execbuffer_relocate_object(struct drm_i915_gem_object *obj, do { u64 offset = r->presumed_offset; - ret = i915_gem_execbuffer_relocate_entry(obj, eb, r); + ret = i915_gem_execbuffer_relocate_entry(obj, eb, r, + vm); if (ret) return ret; @@ -344,13 +347,15 @@ i915_gem_execbuffer_relocate_object(struct drm_i915_gem_object *obj, static int i915_gem_execbuffer_relocate_object_slow(struct drm_i915_gem_object *obj, struct eb_objects *eb, - struct drm_i915_gem_relocation_entry *relocs) + struct drm_i915_gem_relocation_entry *relocs, + struct i915_address_space *vm) { const struct drm_i915_gem_exec_object2 *entry = obj->exec_entry; int i, ret; for (i = 0; i < entry->relocation_count; i++) { - ret = i915_gem_execbuffer_relocate_entry(obj, eb, &relocs[i]); + ret = i915_gem_execbuffer_relocate_entry(obj, eb, &relocs[i], + vm); if (ret) return ret; } @@ -359,7 +364,8 @@ i915_gem_execbuffer_relocate_object_slow(struct drm_i915_gem_object *obj, } static int -i915_gem_execbuffer_relocate(struct eb_objects *eb) +i915_gem_execbuffer_relocate(struct eb_objects *eb, + struct i915_address_space *vm) { struct drm_i915_gem_object *obj; int ret = 0; @@ -373,7 +379,7 @@ i915_gem_execbuffer_relocate(struct eb_objects *eb) */ pagefault_disable(); list_for_each_entry(obj, &eb->objects, exec_list) { - ret = i915_gem_execbuffer_relocate_object(obj, eb); + ret = i915_gem_execbuffer_relocate_object(obj, eb, vm); if (ret) break; } @@ -395,6 +401,7 @@ need_reloc_mappable(struct drm_i915_gem_object *obj) static int i915_gem_execbuffer_reserve_object(struct drm_i915_gem_object *obj, struct intel_ring_buffer *ring, + struct i915_address_space *vm, bool *need_reloc) { struct drm_i915_private *dev_priv = obj->base.dev->dev_private; @@ -409,9 +416,8 @@ i915_gem_execbuffer_reserve_object(struct drm_i915_gem_object *obj, obj->tiling_mode != I915_TILING_NONE; need_mappable = need_fence || need_reloc_mappable(obj); - /* FIXME: vm plubming */ - ret = i915_gem_object_pin(obj, &dev_priv->gtt.base, entry->alignment, - need_mappable, false); + ret = i915_gem_object_pin(obj, vm, entry->alignment, need_mappable, + false); if (ret) return ret; @@ -438,8 +444,8 @@ i915_gem_execbuffer_reserve_object(struct drm_i915_gem_object *obj, obj->has_aliasing_ppgtt_mapping = 1; } - if (entry->offset != i915_gem_obj_ggtt_offset(obj)) { - entry->offset = i915_gem_obj_ggtt_offset(obj); + if (entry->offset != i915_gem_obj_offset(obj, vm)) { + entry->offset = i915_gem_obj_offset(obj, vm); *need_reloc = true; } @@ -477,6 +483,7 @@ i915_gem_execbuffer_unreserve_object(struct drm_i915_gem_object *obj) static int i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring, struct list_head *objects, + struct i915_address_space *vm, bool *need_relocs) { struct drm_i915_gem_object *obj; @@ -531,32 +538,37 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring, list_for_each_entry(obj, objects, exec_list) { struct drm_i915_gem_exec_object2 *entry = obj->exec_entry; bool need_fence, need_mappable; + u32 obj_offset; - if (!i915_gem_obj_ggtt_bound(obj)) + if (!i915_gem_obj_bound(obj, vm)) continue; + obj_offset = i915_gem_obj_offset(obj, vm); need_fence = has_fenced_gpu_access && entry->flags & EXEC_OBJECT_NEEDS_FENCE && obj->tiling_mode != I915_TILING_NONE; need_mappable = need_fence || need_reloc_mappable(obj); + WARN_ON((need_mappable || need_fence) && + !i915_is_ggtt(vm)); + if ((entry->alignment && - i915_gem_obj_ggtt_offset(obj) & (entry->alignment - 1)) || + obj_offset & (entry->alignment - 1)) || (need_mappable && !obj->map_and_fenceable)) ret = i915_gem_object_unbind(obj); else - ret = i915_gem_execbuffer_reserve_object(obj, ring, need_relocs); + ret = i915_gem_execbuffer_reserve_object(obj, ring, vm, need_relocs); if (ret) goto err; } /* Bind fresh objects */ list_for_each_entry(obj, objects, exec_list) { - if (i915_gem_obj_ggtt_bound(obj)) + if (i915_gem_obj_bound(obj, vm)) continue; - ret = i915_gem_execbuffer_reserve_object(obj, ring, need_relocs); + ret = i915_gem_execbuffer_reserve_object(obj, ring, vm, need_relocs); if (ret) goto err; } @@ -580,7 +592,8 @@ i915_gem_execbuffer_relocate_slow(struct drm_device *dev, struct drm_file *file, struct intel_ring_buffer *ring, struct eb_objects *eb, - struct drm_i915_gem_exec_object2 *exec) + struct drm_i915_gem_exec_object2 *exec, + struct i915_address_space *vm) { struct drm_i915_gem_relocation_entry *reloc; struct drm_i915_gem_object *obj; @@ -664,14 +677,15 @@ i915_gem_execbuffer_relocate_slow(struct drm_device *dev, goto err; need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0; - ret = i915_gem_execbuffer_reserve(ring, &eb->objects, &need_relocs); + ret = i915_gem_execbuffer_reserve(ring, &eb->objects, vm, &need_relocs); if (ret) goto err; list_for_each_entry(obj, &eb->objects, exec_list) { int offset = obj->exec_entry - exec; ret = i915_gem_execbuffer_relocate_object_slow(obj, eb, - reloc + reloc_offset[offset]); + reloc + reloc_offset[offset], + vm); if (ret) goto err; } @@ -772,6 +786,7 @@ validate_exec_list(struct drm_i915_gem_exec_object2 *exec, static void i915_gem_execbuffer_move_to_active(struct list_head *objects, + struct i915_address_space *vm, struct intel_ring_buffer *ring) { struct drm_i915_gem_object *obj; @@ -840,7 +855,8 @@ static int i915_gem_do_execbuffer(struct drm_device *dev, void *data, struct drm_file *file, struct drm_i915_gem_execbuffer2 *args, - struct drm_i915_gem_exec_object2 *exec) + struct drm_i915_gem_exec_object2 *exec, + struct i915_address_space *vm) { drm_i915_private_t *dev_priv = dev->dev_private; struct eb_objects *eb; @@ -1002,17 +1018,17 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, /* Move the objects en-masse into the GTT, evicting if necessary. */ need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0; - ret = i915_gem_execbuffer_reserve(ring, &eb->objects, &need_relocs); + ret = i915_gem_execbuffer_reserve(ring, &eb->objects, vm, &need_relocs); if (ret) goto err; /* The objects are in their final locations, apply the relocations. */ if (need_relocs) - ret = i915_gem_execbuffer_relocate(eb); + ret = i915_gem_execbuffer_relocate(eb, vm); if (ret) { if (ret == -EFAULT) { ret = i915_gem_execbuffer_relocate_slow(dev, args, file, ring, - eb, exec); + eb, exec, vm); BUG_ON(!mutex_is_locked(&dev->struct_mutex)); } if (ret) @@ -1063,7 +1079,8 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, goto err; } - exec_start = i915_gem_obj_ggtt_offset(batch_obj) + args->batch_start_offset; + exec_start = i915_gem_obj_offset(batch_obj, vm) + + args->batch_start_offset; exec_len = args->batch_len; if (cliprects) { for (i = 0; i < args->num_cliprects; i++) { @@ -1088,7 +1105,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, trace_i915_gem_ring_dispatch(ring, intel_ring_get_seqno(ring), flags); - i915_gem_execbuffer_move_to_active(&eb->objects, ring); + i915_gem_execbuffer_move_to_active(&eb->objects, vm, ring); i915_gem_execbuffer_retire_commands(dev, file, ring, batch_obj); err: @@ -1109,6 +1126,7 @@ int i915_gem_execbuffer(struct drm_device *dev, void *data, struct drm_file *file) { + struct drm_i915_private *dev_priv = dev->dev_private; struct drm_i915_gem_execbuffer *args = data; struct drm_i915_gem_execbuffer2 exec2; struct drm_i915_gem_exec_object *exec_list = NULL; @@ -1164,7 +1182,8 @@ i915_gem_execbuffer(struct drm_device *dev, void *data, exec2.flags = I915_EXEC_RENDER; i915_execbuffer2_set_context_id(exec2, 0); - ret = i915_gem_do_execbuffer(dev, data, file, &exec2, exec2_list); + ret = i915_gem_do_execbuffer(dev, data, file, &exec2, exec2_list, + &dev_priv->gtt.base); if (!ret) { /* Copy the new buffer offsets back to the user's exec list. */ for (i = 0; i < args->buffer_count; i++) @@ -1190,6 +1209,7 @@ int i915_gem_execbuffer2(struct drm_device *dev, void *data, struct drm_file *file) { + struct drm_i915_private *dev_priv = dev->dev_private; struct drm_i915_gem_execbuffer2 *args = data; struct drm_i915_gem_exec_object2 *exec2_list = NULL; int ret; @@ -1220,7 +1240,8 @@ i915_gem_execbuffer2(struct drm_device *dev, void *data, return -EFAULT; } - ret = i915_gem_do_execbuffer(dev, data, file, args, exec2_list); + ret = i915_gem_do_execbuffer(dev, data, file, args, exec2_list, + &dev_priv->gtt.base); if (!ret) { /* Copy the new buffer offsets back to the user's exec list. */ ret = copy_to_user(to_user_ptr(args->buffers_ptr), -- cgit v1.2.3-70-g09d2 From 07fe0b12800d4752d729d4122c01f41f80a5ba5a Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Wed, 31 Jul 2013 17:00:10 -0700 Subject: drm/i915: plumb VM into bind/unbind code As alluded to in several patches, and it will be reiterated later... A VMA is an abstraction for a GEM BO bound into an address space. Therefore it stands to reason, that the existing bind, and unbind are the ones which will be the most impacted. This patch implements this, and updates all callers which weren't already updated in the series (because it was too messy). This patch represents the bulk of an earlier, larger patch. I've pulled out a bunch of things by the request of Daniel. The history is preserved for posterity with the email convention of ">" One big change from the original patch aside from a bunch of cropping is I've created an i915_vma_unbind() function. That is because we always have the VMA anyway, and doing an extra lookup is useful. There is a caveat, we retain an i915_gem_object_ggtt_unbind, for the global cases which might not talk in VMAs. > drm/i915: plumb VM into object operations > > This patch was formerly known as: > "drm/i915: Create VMAs (part 3) - plumbing" > > This patch adds a VM argument, bind/unbind, and the object > offset/size/color getters/setters. It preserves the old ggtt helper > functions because things still need, and will continue to need them. > > Some code will still need to be ported over after this. > > v2: Fix purge to pick an object and unbind all vmas > This was doable because of the global bound list change. > > v3: With the commit to actually pin/unpin pages in place, there is no > longer a need to check if unbind succeeded before calling put_pages(). > Make put_pages only BUG() after checking pin count. > > v4: Rebased on top of the new hangcheck work by Mika > plumbed eb_destroy also > Many checkpatch related fixes > > v5: Very large rebase > > v6: > Change BUG_ON to WARN_ON (Daniel) > Rename vm to ggtt in preallocate stolen, since it is always ggtt when > dealing with stolen memory. (Daniel) > list_for_each will short-circuit already (Daniel) > remove superflous space (Daniel) > Use per object list of vmas (Daniel) > Make obj_bound_any() use obj_bound for each vm (Ben) > s/bind_to_gtt/bind_to_vm/ (Ben) > > Fixed up the inactive shrinker. As Daniel noticed the code could > potentially count the same object multiple times. While it's not > possible in the current case, since 1 object can only ever be bound into > 1 address space thus far - we may as well try to get something more > future proof in place now. With a prep patch before this to switch over > to using the bound list + inactive check, we're now able to carry that > forward for every address space an object is bound into. Signed-off-by: Ben Widawsky [danvet: Rebase on top of the loss of "drm/i915: Cleanup more of VMA in destroy".] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_debugfs.c | 2 +- drivers/gpu/drm/i915/i915_drv.h | 3 +- drivers/gpu/drm/i915/i915_gem.c | 134 +++++++++++++++++++---------- drivers/gpu/drm/i915/i915_gem_evict.c | 4 +- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 2 +- drivers/gpu/drm/i915/i915_gem_tiling.c | 9 +- drivers/gpu/drm/i915/i915_trace.h | 37 ++++---- 7 files changed, 120 insertions(+), 71 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem_execbuffer.c') diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 748af58b0ce..d2935b4fd69 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1800,7 +1800,7 @@ i915_drop_caches_set(void *data, u64 val) if (obj->pin_count) continue; - ret = i915_gem_object_unbind(obj); + ret = i915_gem_object_ggtt_unbind(obj); if (ret) goto unlock; } diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 4f93467fdfd..8205b4b4f2b 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1737,7 +1737,8 @@ int __must_check i915_gem_object_pin(struct drm_i915_gem_object *obj, bool map_and_fenceable, bool nonblocking); void i915_gem_object_unpin(struct drm_i915_gem_object *obj); -int __must_check i915_gem_object_unbind(struct drm_i915_gem_object *obj); +int __must_check i915_vma_unbind(struct i915_vma *vma); +int __must_check i915_gem_object_ggtt_unbind(struct drm_i915_gem_object *obj); int i915_gem_object_put_pages(struct drm_i915_gem_object *obj); void i915_gem_release_mmap(struct drm_i915_gem_object *obj); void i915_gem_lastclose(struct drm_device *dev); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 4ca8f9fad91..db9792c4782 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -38,10 +38,12 @@ static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj); static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj); -static __must_check int i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj, - unsigned alignment, - bool map_and_fenceable, - bool nonblocking); +static __must_check int +i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, + struct i915_address_space *vm, + unsigned alignment, + bool map_and_fenceable, + bool nonblocking); static int i915_gem_phys_pwrite(struct drm_device *dev, struct drm_i915_gem_object *obj, struct drm_i915_gem_pwrite *args, @@ -1692,7 +1694,6 @@ __i915_gem_shrink(struct drm_i915_private *dev_priv, long target, bool purgeable_only) { struct drm_i915_gem_object *obj, *next; - struct i915_address_space *vm = &dev_priv->gtt.base; long count = 0; list_for_each_entry_safe(obj, next, @@ -1706,13 +1707,16 @@ __i915_gem_shrink(struct drm_i915_private *dev_priv, long target, } } - list_for_each_entry_safe(obj, next, &vm->inactive_list, mm_list) { + list_for_each_entry_safe(obj, next, &dev_priv->mm.bound_list, + global_list) { + struct i915_vma *vma, *v; if (!i915_gem_object_is_purgeable(obj) && purgeable_only) continue; - if (i915_gem_object_unbind(obj)) - continue; + list_for_each_entry_safe(vma, v, &obj->vma_list, vma_link) + if (i915_vma_unbind(vma)) + break; if (!i915_gem_object_put_pages(obj)) { count += obj->base.size >> PAGE_SHIFT; @@ -2596,17 +2600,13 @@ static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj) old_write_domain); } -/** - * Unbinds an object from the GTT aperture. - */ -int -i915_gem_object_unbind(struct drm_i915_gem_object *obj) +int i915_vma_unbind(struct i915_vma *vma) { + struct drm_i915_gem_object *obj = vma->obj; drm_i915_private_t *dev_priv = obj->base.dev->dev_private; - struct i915_vma *vma; int ret; - if (!i915_gem_obj_ggtt_bound(obj)) + if (list_empty(&vma->vma_link)) return 0; if (obj->pin_count) @@ -2629,7 +2629,7 @@ i915_gem_object_unbind(struct drm_i915_gem_object *obj) if (ret) return ret; - trace_i915_gem_object_unbind(obj); + trace_i915_vma_unbind(vma); if (obj->has_global_gtt_mapping) i915_gem_gtt_unbind_object(obj); @@ -2644,7 +2644,6 @@ i915_gem_object_unbind(struct drm_i915_gem_object *obj) /* Avoid an unnecessary call to unbind on rebind. */ obj->map_and_fenceable = true; - vma = i915_gem_obj_to_vma(obj, &dev_priv->gtt.base); list_del(&vma->vma_link); drm_mm_remove_node(&vma->node); i915_gem_vma_destroy(vma); @@ -2659,6 +2658,26 @@ i915_gem_object_unbind(struct drm_i915_gem_object *obj) return 0; } +/** + * Unbinds an object from the global GTT aperture. + */ +int +i915_gem_object_ggtt_unbind(struct drm_i915_gem_object *obj) +{ + struct drm_i915_private *dev_priv = obj->base.dev->dev_private; + struct i915_address_space *ggtt = &dev_priv->gtt.base; + + if (!i915_gem_obj_ggtt_bound(obj)); + return 0; + + if (obj->pin_count) + return -EBUSY; + + BUG_ON(obj->pages == NULL); + + return i915_vma_unbind(i915_gem_obj_to_vma(obj, ggtt)); +} + int i915_gpu_idle(struct drm_device *dev) { drm_i915_private_t *dev_priv = dev->dev_private; @@ -3076,18 +3095,18 @@ static void i915_gem_verify_gtt(struct drm_device *dev) * Finds free space in the GTT aperture and binds the object there. */ static int -i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj, - unsigned alignment, - bool map_and_fenceable, - bool nonblocking) +i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, + struct i915_address_space *vm, + unsigned alignment, + bool map_and_fenceable, + bool nonblocking) { struct drm_device *dev = obj->base.dev; drm_i915_private_t *dev_priv = dev->dev_private; - struct i915_address_space *vm = &dev_priv->gtt.base; u32 size, fence_size, fence_alignment, unfenced_alignment; bool mappable, fenceable; - size_t gtt_max = map_and_fenceable ? - dev_priv->gtt.mappable_end : dev_priv->gtt.base.total; + size_t gtt_max = + map_and_fenceable ? dev_priv->gtt.mappable_end : vm->total; struct i915_vma *vma; int ret; @@ -3132,15 +3151,18 @@ i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj, i915_gem_object_pin_pages(obj); - vma = i915_gem_vma_create(obj, &dev_priv->gtt.base); + /* FIXME: For now we only ever use 1 VMA per object */ + BUG_ON(!i915_is_ggtt(vm)); + WARN_ON(!list_empty(&obj->vma_list)); + + vma = i915_gem_vma_create(obj, vm); if (IS_ERR(vma)) { ret = PTR_ERR(vma); goto err_unpin; } search_free: - ret = drm_mm_insert_node_in_range_generic(&dev_priv->gtt.base.mm, - &vma->node, + ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node, size, alignment, obj->cache_level, 0, gtt_max); if (ret) { @@ -3165,18 +3187,25 @@ search_free: list_move_tail(&obj->global_list, &dev_priv->mm.bound_list); list_add_tail(&obj->mm_list, &vm->inactive_list); - list_add(&vma->vma_link, &obj->vma_list); + + /* Keep GGTT vmas first to make debug easier */ + if (i915_is_ggtt(vm)) + list_add(&vma->vma_link, &obj->vma_list); + else + list_add_tail(&vma->vma_link, &obj->vma_list); fenceable = + i915_is_ggtt(vm) && i915_gem_obj_ggtt_size(obj) == fence_size && (i915_gem_obj_ggtt_offset(obj) & (fence_alignment - 1)) == 0; - mappable = i915_gem_obj_ggtt_offset(obj) + obj->base.size <= - dev_priv->gtt.mappable_end; + mappable = + i915_is_ggtt(vm) && + vma->node.start + obj->base.size <= dev_priv->gtt.mappable_end; obj->map_and_fenceable = mappable && fenceable; - trace_i915_gem_object_bind(obj, map_and_fenceable); + trace_i915_vma_bind(vma, map_and_fenceable); i915_gem_verify_gtt(dev); return 0; @@ -3345,7 +3374,7 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, list_for_each_entry(vma, &obj->vma_list, vma_link) { if (!i915_gem_valid_gtt_space(dev, &vma->node, cache_level)) { - ret = i915_gem_object_unbind(obj); + ret = i915_vma_unbind(vma); if (ret) return ret; @@ -3653,33 +3682,39 @@ i915_gem_object_pin(struct drm_i915_gem_object *obj, bool map_and_fenceable, bool nonblocking) { + struct i915_vma *vma; int ret; if (WARN_ON(obj->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT)) return -EBUSY; - if (i915_gem_obj_ggtt_bound(obj)) { - if ((alignment && i915_gem_obj_ggtt_offset(obj) & (alignment - 1)) || + WARN_ON(map_and_fenceable && !i915_is_ggtt(vm)); + + vma = i915_gem_obj_to_vma(obj, vm); + + if (vma) { + if ((alignment && + vma->node.start & (alignment - 1)) || (map_and_fenceable && !obj->map_and_fenceable)) { WARN(obj->pin_count, "bo is already pinned with incorrect alignment:" " offset=%lx, req.alignment=%x, req.map_and_fenceable=%d," " obj->map_and_fenceable=%d\n", - i915_gem_obj_ggtt_offset(obj), alignment, + i915_gem_obj_offset(obj, vm), alignment, map_and_fenceable, obj->map_and_fenceable); - ret = i915_gem_object_unbind(obj); + ret = i915_vma_unbind(vma); if (ret) return ret; } } - if (!i915_gem_obj_ggtt_bound(obj)) { + if (!i915_gem_obj_bound(obj, vm)) { struct drm_i915_private *dev_priv = obj->base.dev->dev_private; - ret = i915_gem_object_bind_to_gtt(obj, alignment, - map_and_fenceable, - nonblocking); + ret = i915_gem_object_bind_to_vm(obj, vm, alignment, + map_and_fenceable, + nonblocking); if (ret) return ret; @@ -3975,6 +4010,7 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj) struct drm_i915_gem_object *obj = to_intel_bo(gem_obj); struct drm_device *dev = obj->base.dev; drm_i915_private_t *dev_priv = dev->dev_private; + struct i915_vma *vma, *next; trace_i915_gem_object_destroy(obj); @@ -3982,15 +4018,21 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj) i915_gem_detach_phys_object(dev, obj); obj->pin_count = 0; - if (WARN_ON(i915_gem_object_unbind(obj) == -ERESTARTSYS)) { - bool was_interruptible; + /* NB: 0 or 1 elements */ + WARN_ON(!list_empty(&obj->vma_list) && + !list_is_singular(&obj->vma_list)); + list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) { + int ret = i915_vma_unbind(vma); + if (WARN_ON(ret == -ERESTARTSYS)) { + bool was_interruptible; - was_interruptible = dev_priv->mm.interruptible; - dev_priv->mm.interruptible = false; + was_interruptible = dev_priv->mm.interruptible; + dev_priv->mm.interruptible = false; - WARN_ON(i915_gem_object_unbind(obj)); + WARN_ON(i915_vma_unbind(vma)); - dev_priv->mm.interruptible = was_interruptible; + dev_priv->mm.interruptible = was_interruptible; + } } /* Stolen objects don't hold a ref, but do hold pin count. Fix that up diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index 33d85a4447a..9205a4179b7 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -147,7 +147,7 @@ found: struct drm_i915_gem_object, exec_list); if (ret == 0) - ret = i915_gem_object_unbind(obj); + ret = i915_gem_object_ggtt_unbind(obj); list_del_init(&obj->exec_list); drm_gem_object_unreference(&obj->base); @@ -185,7 +185,7 @@ i915_gem_evict_everything(struct drm_device *dev) /* Having flushed everything, unbind() should never raise an error */ list_for_each_entry_safe(obj, next, &vm->inactive_list, mm_list) if (obj->pin_count == 0) - WARN_ON(i915_gem_object_unbind(obj)); + WARN_ON(i915_gem_object_ggtt_unbind(obj)); return 0; } diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 9939d2ef3ea..17be2e4bae6 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -556,7 +556,7 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring, if ((entry->alignment && obj_offset & (entry->alignment - 1)) || (need_mappable && !obj->map_and_fenceable)) - ret = i915_gem_object_unbind(obj); + ret = i915_vma_unbind(i915_gem_obj_to_vma(obj, vm)); else ret = i915_gem_execbuffer_reserve_object(obj, ring, vm, need_relocs); if (ret) diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c index 92a8d279ca3..032e9ef9c89 100644 --- a/drivers/gpu/drm/i915/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c @@ -360,17 +360,18 @@ i915_gem_set_tiling(struct drm_device *dev, void *data, obj->map_and_fenceable = !i915_gem_obj_ggtt_bound(obj) || - (i915_gem_obj_ggtt_offset(obj) + obj->base.size <= dev_priv->gtt.mappable_end && + (i915_gem_obj_ggtt_offset(obj) + + obj->base.size <= dev_priv->gtt.mappable_end && i915_gem_object_fence_ok(obj, args->tiling_mode)); /* Rebind if we need a change of alignment */ if (!obj->map_and_fenceable) { - u32 unfenced_alignment = + u32 unfenced_align = i915_gem_get_gtt_alignment(dev, obj->base.size, args->tiling_mode, false); - if (i915_gem_obj_ggtt_offset(obj) & (unfenced_alignment - 1)) - ret = i915_gem_object_unbind(obj); + if (i915_gem_obj_ggtt_offset(obj) & (unfenced_align - 1)) + ret = i915_gem_object_ggtt_unbind(obj); } if (ret == 0) { diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index 2933e2ffeaa..e2c5ee6f619 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -33,47 +33,52 @@ TRACE_EVENT(i915_gem_object_create, TP_printk("obj=%p, size=%u", __entry->obj, __entry->size) ); -TRACE_EVENT(i915_gem_object_bind, - TP_PROTO(struct drm_i915_gem_object *obj, bool mappable), - TP_ARGS(obj, mappable), +TRACE_EVENT(i915_vma_bind, + TP_PROTO(struct i915_vma *vma, bool mappable), + TP_ARGS(vma, mappable), TP_STRUCT__entry( __field(struct drm_i915_gem_object *, obj) + __field(struct i915_address_space *, vm) __field(u32, offset) __field(u32, size) __field(bool, mappable) ), TP_fast_assign( - __entry->obj = obj; - __entry->offset = i915_gem_obj_ggtt_offset(obj); - __entry->size = i915_gem_obj_ggtt_size(obj); + __entry->obj = vma->obj; + __entry->vm = vma->vm; + __entry->offset = vma->node.start; + __entry->size = vma->node.size; __entry->mappable = mappable; ), - TP_printk("obj=%p, offset=%08x size=%x%s", + TP_printk("obj=%p, offset=%08x size=%x%s vm=%p", __entry->obj, __entry->offset, __entry->size, - __entry->mappable ? ", mappable" : "") + __entry->mappable ? ", mappable" : "", + __entry->vm) ); -TRACE_EVENT(i915_gem_object_unbind, - TP_PROTO(struct drm_i915_gem_object *obj), - TP_ARGS(obj), +TRACE_EVENT(i915_vma_unbind, + TP_PROTO(struct i915_vma *vma), + TP_ARGS(vma), TP_STRUCT__entry( __field(struct drm_i915_gem_object *, obj) + __field(struct i915_address_space *, vm) __field(u32, offset) __field(u32, size) ), TP_fast_assign( - __entry->obj = obj; - __entry->offset = i915_gem_obj_ggtt_offset(obj); - __entry->size = i915_gem_obj_ggtt_size(obj); + __entry->obj = vma->obj; + __entry->vm = vma->vm; + __entry->offset = vma->node.start; + __entry->size = vma->node.size; ), - TP_printk("obj=%p, offset=%08x size=%x", - __entry->obj, __entry->offset, __entry->size) + TP_printk("obj=%p, offset=%08x size=%x vm=%p", + __entry->obj, __entry->offset, __entry->size, __entry->vm) ); TRACE_EVENT(i915_gem_object_change_domain, -- cgit v1.2.3-70-g09d2 From 9843877d10700d6b64b615e0e8724fc9f6ff6268 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Wed, 31 Jul 2013 17:00:12 -0700 Subject: drm/i915: turn bound_ggtt checks to bound_any In some places, we want to know if an object is bound in any address space, and not just the global GTT. This often applies when there is a single global resource (object, pages, etc.) function | reason -------------------------------------------------- i915_gem_object_is_inactive | global object i915_gem_object_put_pages | object's pages 915_gem_object_unpin | global object i915_gem_execbuffer_unreserve_object | temporary until we plumb vma pread/pwrite | see the note below Note: set_to_gtt_domain in pwrite/pread is abused as a wait_rendering call - but that once only worked if the object is bound. We really should replace this with a plain wait_rendering call, which would have the upside that in pread it would be clearer that we actually only wait for oustanding gpu writes. Signed-off-by: Ben Widawsky [danvet: Explain the set_to_gtt_domain in pwrite/pread and volunteer Ben to replace those with wait_rendering calls.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem.c | 12 ++++++------ drivers/gpu/drm/i915/i915_gem_execbuffer.c | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem_execbuffer.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 6c8c6b6b91c..a51731e9233 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -141,7 +141,7 @@ int i915_mutex_lock_interruptible(struct drm_device *dev) static inline bool i915_gem_object_is_inactive(struct drm_i915_gem_object *obj) { - return i915_gem_obj_ggtt_bound(obj) && !obj->active; + return i915_gem_obj_bound_any(obj) && !obj->active; } int @@ -422,7 +422,7 @@ i915_gem_shmem_pread(struct drm_device *dev, * anyway again before the next pread happens. */ if (obj->cache_level == I915_CACHE_NONE) needs_clflush = 1; - if (i915_gem_obj_ggtt_bound(obj)) { + if (i915_gem_obj_bound_any(obj)) { ret = i915_gem_object_set_to_gtt_domain(obj, false); if (ret) return ret; @@ -739,7 +739,7 @@ i915_gem_shmem_pwrite(struct drm_device *dev, * right away and we therefore have to clflush anyway. */ if (obj->cache_level == I915_CACHE_NONE) needs_clflush_after = 1; - if (i915_gem_obj_ggtt_bound(obj)) { + if (i915_gem_obj_bound_any(obj)) { ret = i915_gem_object_set_to_gtt_domain(obj, true); if (ret) return ret; @@ -1673,7 +1673,7 @@ i915_gem_object_put_pages(struct drm_i915_gem_object *obj) if (obj->pages_pin_count) return -EBUSY; - BUG_ON(i915_gem_obj_ggtt_bound(obj)); + BUG_ON(i915_gem_obj_bound_any(obj)); /* ->put_pages might need to allocate memory for the bit17 swizzle * array, hence protect them from being reaped by removing them from gtt @@ -3311,7 +3311,7 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) int ret; /* Not valid to be called on unbound objects. */ - if (!i915_gem_obj_ggtt_bound(obj)) + if (!i915_gem_obj_bound_any(obj)) return -EINVAL; if (obj->base.write_domain == I915_GEM_DOMAIN_GTT) @@ -3735,7 +3735,7 @@ void i915_gem_object_unpin(struct drm_i915_gem_object *obj) { BUG_ON(obj->pin_count == 0); - BUG_ON(!i915_gem_obj_ggtt_bound(obj)); + BUG_ON(!i915_gem_obj_bound_any(obj)); if (--obj->pin_count == 0) obj->pin_mappable = false; diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 17be2e4bae6..aa3fa9425ca 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -466,7 +466,7 @@ i915_gem_execbuffer_unreserve_object(struct drm_i915_gem_object *obj) { struct drm_i915_gem_exec_object2 *entry; - if (!i915_gem_obj_ggtt_bound(obj)) + if (!i915_gem_obj_bound_any(obj)) return; entry = obj->exec_entry; -- cgit v1.2.3-70-g09d2 From ca191b1313e733e47a9fb37c26b44aa6cdd9b1b1 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Wed, 31 Jul 2013 17:00:14 -0700 Subject: drm/i915: mm_list is per VMA formerly: "drm/i915: Create VMAs (part 5) - move mm_list" The mm_list is used for the active/inactive LRUs. Since those LRUs are per address space, the link should be per VMx . Because we'll only ever have 1 VMA before this point, it's not incorrect to defer this change until this point in the patch series, and doing it here makes the change much easier to understand. Shamelessly manipulated out of Daniel: "active/inactive stuff is used by eviction when we run out of address space, so needs to be per-vma and per-address space. Bound/unbound otoh is used by the shrinker which only cares about the amount of memory used and not one bit about in which address space this memory is all used in. Of course to actual kick out an object we need to unbind it from every address space, but for that we have the per-object list of vmas." v2: only bump GGTT LRU in i915_gem_object_set_to_gtt_domain (Chris) v3: Moved earlier in the series v4: Add dropped message from v3 Signed-off-by: Ben Widawsky [danvet: Frob patch to apply and use vma->node.size directly as discused with Ben. Also drop a needles BUG_ON before move_to_inactive, the function itself has the same check.] [danvet 2nd: Rebase on top of the lost "drm/i915: Cleanup more of VMA in destroy", specifically unlink the vma from the mm_list in vma_unbind (to keep it symmetric with bind_to_vm) instead of vma_destroy.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_debugfs.c | 54 +++++++++++++++++++----------- drivers/gpu/drm/i915/i915_drv.h | 5 +-- drivers/gpu/drm/i915/i915_gem.c | 28 +++++++++------- drivers/gpu/drm/i915/i915_gem_context.c | 3 ++ drivers/gpu/drm/i915/i915_gem_evict.c | 14 ++++---- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 2 ++ drivers/gpu/drm/i915/i915_gem_stolen.c | 2 +- drivers/gpu/drm/i915/i915_gpu_error.c | 37 +++++++++++--------- 8 files changed, 86 insertions(+), 59 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem_execbuffer.c') diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 5d52a23d566..a1f4c91fb11 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -149,7 +149,7 @@ static int i915_gem_object_list_info(struct seq_file *m, void *data) struct drm_device *dev = node->minor->dev; struct drm_i915_private *dev_priv = dev->dev_private; struct i915_address_space *vm = &dev_priv->gtt.base; - struct drm_i915_gem_object *obj; + struct i915_vma *vma; size_t total_obj_size, total_gtt_size; int count, ret; @@ -157,6 +157,7 @@ static int i915_gem_object_list_info(struct seq_file *m, void *data) if (ret) return ret; + /* FIXME: the user of this interface might want more than just GGTT */ switch (list) { case ACTIVE_LIST: seq_puts(m, "Active:\n"); @@ -172,12 +173,12 @@ static int i915_gem_object_list_info(struct seq_file *m, void *data) } total_obj_size = total_gtt_size = count = 0; - list_for_each_entry(obj, head, mm_list) { - seq_puts(m, " "); - describe_obj(m, obj); - seq_putc(m, '\n'); - total_obj_size += obj->base.size; - total_gtt_size += i915_gem_obj_ggtt_size(obj); + list_for_each_entry(vma, head, mm_list) { + seq_printf(m, " "); + describe_obj(m, vma->obj); + seq_printf(m, "\n"); + total_obj_size += vma->obj->base.size; + total_gtt_size += vma->node.size; count++; } mutex_unlock(&dev->struct_mutex); @@ -224,7 +225,18 @@ static int per_file_stats(int id, void *ptr, void *data) return 0; } -static int i915_gem_object_info(struct seq_file *m, void *data) +#define count_vmas(list, member) do { \ + list_for_each_entry(vma, list, member) { \ + size += i915_gem_obj_ggtt_size(vma->obj); \ + ++count; \ + if (vma->obj->map_and_fenceable) { \ + mappable_size += i915_gem_obj_ggtt_size(vma->obj); \ + ++mappable_count; \ + } \ + } \ +} while (0) + +static int i915_gem_object_info(struct seq_file *m, void* data) { struct drm_info_node *node = (struct drm_info_node *) m->private; struct drm_device *dev = node->minor->dev; @@ -234,6 +246,7 @@ static int i915_gem_object_info(struct seq_file *m, void *data) struct drm_i915_gem_object *obj; struct i915_address_space *vm = &dev_priv->gtt.base; struct drm_file *file; + struct i915_vma *vma; int ret; ret = mutex_lock_interruptible(&dev->struct_mutex); @@ -250,12 +263,12 @@ static int i915_gem_object_info(struct seq_file *m, void *data) count, mappable_count, size, mappable_size); size = count = mappable_size = mappable_count = 0; - count_objects(&vm->active_list, mm_list); + count_vmas(&vm->active_list, mm_list); seq_printf(m, " %u [%u] active objects, %zu [%zu] bytes\n", count, mappable_count, size, mappable_size); size = count = mappable_size = mappable_count = 0; - count_objects(&vm->inactive_list, mm_list); + count_vmas(&vm->inactive_list, mm_list); seq_printf(m, " %u [%u] inactive objects, %zu [%zu] bytes\n", count, mappable_count, size, mappable_size); @@ -1774,7 +1787,8 @@ i915_drop_caches_set(void *data, u64 val) struct drm_device *dev = data; struct drm_i915_private *dev_priv = dev->dev_private; struct drm_i915_gem_object *obj, *next; - struct i915_address_space *vm = &dev_priv->gtt.base; + struct i915_address_space *vm; + struct i915_vma *vma, *x; int ret; DRM_DEBUG_DRIVER("Dropping caches: 0x%08llx\n", val); @@ -1795,14 +1809,16 @@ i915_drop_caches_set(void *data, u64 val) i915_gem_retire_requests(dev); if (val & DROP_BOUND) { - list_for_each_entry_safe(obj, next, &vm->inactive_list, - mm_list) { - if (obj->pin_count) - continue; - - ret = i915_gem_object_ggtt_unbind(obj); - if (ret) - goto unlock; + list_for_each_entry(vm, &dev_priv->vm_list, global_link) { + list_for_each_entry_safe(vma, x, &vm->inactive_list, + mm_list) { + if (vma->obj->pin_count) + continue; + + ret = i915_vma_unbind(vma); + if (ret) + goto unlock; + } } } diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index cb4521d9542..20becc5500b 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -558,6 +558,9 @@ struct i915_vma { struct drm_i915_gem_object *obj; struct i915_address_space *vm; + /** This object's place on the active/inactive lists */ + struct list_head mm_list; + struct list_head vma_link; /* Link in the object's VMA list */ }; @@ -1299,9 +1302,7 @@ struct drm_i915_gem_object { struct drm_mm_node *stolen; struct list_head global_list; - /** This object's place on the active/inactive lists */ struct list_head ring_list; - struct list_head mm_list; /** This object's place in the batchbuffer or on the eviction list */ struct list_head exec_list; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 5eacc497f17..985a1303555 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1886,7 +1886,6 @@ i915_gem_object_move_to_active(struct drm_i915_gem_object *obj, { struct drm_device *dev = obj->base.dev; struct drm_i915_private *dev_priv = dev->dev_private; - struct i915_address_space *vm = &dev_priv->gtt.base; u32 seqno = intel_ring_get_seqno(ring); BUG_ON(ring == NULL); @@ -1902,8 +1901,6 @@ i915_gem_object_move_to_active(struct drm_i915_gem_object *obj, obj->active = 1; } - /* Move from whatever list we were on to the tail of execution. */ - list_move_tail(&obj->mm_list, &vm->active_list); list_move_tail(&obj->ring_list, &ring->active_list); obj->last_read_seqno = seqno; @@ -1925,14 +1922,14 @@ i915_gem_object_move_to_active(struct drm_i915_gem_object *obj, static void i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj) { - struct drm_device *dev = obj->base.dev; - struct drm_i915_private *dev_priv = dev->dev_private; - struct i915_address_space *vm = &dev_priv->gtt.base; + struct drm_i915_private *dev_priv = obj->base.dev->dev_private; + struct i915_address_space *ggtt_vm = &dev_priv->gtt.base; + struct i915_vma *vma = i915_gem_obj_to_vma(obj, ggtt_vm); BUG_ON(obj->base.write_domain & ~I915_GEM_GPU_DOMAINS); BUG_ON(!obj->active); - list_move_tail(&obj->mm_list, &vm->inactive_list); + list_move_tail(&vma->mm_list, &ggtt_vm->inactive_list); list_del_init(&obj->ring_list); obj->ring = NULL; @@ -2640,7 +2637,7 @@ int i915_vma_unbind(struct i915_vma *vma) i915_gem_gtt_finish_object(obj); i915_gem_object_unpin_pages(obj); - list_del(&obj->mm_list); + list_del(&vma->mm_list); /* Avoid an unnecessary call to unbind on rebind. */ if (i915_is_ggtt(vma->vm)) obj->map_and_fenceable = true; @@ -3187,7 +3184,7 @@ search_free: goto err_remove_node; list_move_tail(&obj->global_list, &dev_priv->mm.bound_list); - list_add_tail(&obj->mm_list, &vm->inactive_list); + list_add_tail(&vma->mm_list, &vm->inactive_list); /* Keep GGTT vmas first to make debug easier */ if (i915_is_ggtt(vm)) @@ -3352,9 +3349,14 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) old_write_domain); /* And bump the LRU for this access */ - if (i915_gem_object_is_inactive(obj)) - list_move_tail(&obj->mm_list, - &dev_priv->gtt.base.inactive_list); + if (i915_gem_object_is_inactive(obj)) { + struct i915_vma *vma = i915_gem_obj_to_vma(obj, + &dev_priv->gtt.base); + if (vma) + list_move_tail(&vma->mm_list, + &dev_priv->gtt.base.inactive_list); + + } return 0; } @@ -3927,7 +3929,6 @@ unlock: void i915_gem_object_init(struct drm_i915_gem_object *obj, const struct drm_i915_gem_object_ops *ops) { - INIT_LIST_HEAD(&obj->mm_list); INIT_LIST_HEAD(&obj->global_list); INIT_LIST_HEAD(&obj->ring_list); INIT_LIST_HEAD(&obj->exec_list); @@ -4069,6 +4070,7 @@ struct i915_vma *i915_gem_vma_create(struct drm_i915_gem_object *obj, return ERR_PTR(-ENOMEM); INIT_LIST_HEAD(&vma->vma_link); + INIT_LIST_HEAD(&vma->mm_list); vma->vm = vm; vma->obj = obj; diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 7273a729a03..403309c2a7d 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -436,7 +436,10 @@ static int do_switch(struct i915_hw_context *to) * MI_SET_CONTEXT instead of when the next seqno has completed. */ if (from != NULL) { + struct drm_i915_private *dev_priv = from->obj->base.dev->dev_private; + struct i915_address_space *ggtt = &dev_priv->gtt.base; from->obj->base.read_domains = I915_GEM_DOMAIN_INSTRUCTION; + list_move_tail(&i915_gem_obj_to_vma(from->obj, ggtt)->mm_list, &ggtt->active_list); i915_gem_object_move_to_active(from->obj, ring); /* As long as MI_SET_CONTEXT is serializing, ie. it flushes the * whole damn pipeline, we don't need to explicitly mark the diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index 61bf5e20e5e..425939b7d34 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -87,8 +87,7 @@ i915_gem_evict_something(struct drm_device *dev, struct i915_address_space *vm, drm_mm_init_scan(&vm->mm, min_size, alignment, cache_level); /* First see if there is a large enough contiguous idle region... */ - list_for_each_entry(obj, &vm->inactive_list, mm_list) { - struct i915_vma *vma = i915_gem_obj_to_vma(obj, vm); + list_for_each_entry(vma, &vm->inactive_list, mm_list) { if (mark_free(vma, &unwind_list)) goto found; } @@ -97,8 +96,7 @@ i915_gem_evict_something(struct drm_device *dev, struct i915_address_space *vm, goto none; /* Now merge in the soon-to-be-expired objects... */ - list_for_each_entry(obj, &vm->active_list, mm_list) { - struct i915_vma *vma = i915_gem_obj_to_vma(obj, vm); + list_for_each_entry(vma, &vm->active_list, mm_list) { if (mark_free(vma, &unwind_list)) goto found; } @@ -159,7 +157,7 @@ i915_gem_evict_everything(struct drm_device *dev) { drm_i915_private_t *dev_priv = dev->dev_private; struct i915_address_space *vm; - struct drm_i915_gem_object *obj, *next; + struct i915_vma *vma, *next; bool lists_empty = true; int ret; @@ -187,9 +185,9 @@ i915_gem_evict_everything(struct drm_device *dev) /* Having flushed everything, unbind() should never raise an error */ list_for_each_entry(vm, &dev_priv->vm_list, global_link) { - list_for_each_entry_safe(obj, next, &vm->inactive_list, mm_list) - if (obj->pin_count == 0) - WARN_ON(i915_vma_unbind(i915_gem_obj_to_vma(obj, vm))); + list_for_each_entry_safe(vma, next, &vm->inactive_list, mm_list) + if (vma->obj->pin_count == 0) + WARN_ON(i915_vma_unbind(vma)); } return 0; diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index aa3fa9425ca..8ccc29ac962 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -801,6 +801,8 @@ i915_gem_execbuffer_move_to_active(struct list_head *objects, obj->base.read_domains = obj->base.pending_read_domains; obj->fenced_gpu_access = obj->pending_fenced_gpu_access; + /* FIXME: This lookup gets fixed later <-- danvet */ + list_move_tail(&i915_gem_obj_to_vma(obj, vm)->mm_list, &vm->active_list); i915_gem_object_move_to_active(obj, ring); if (obj->base.write_domain) { obj->dirty = 1; diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c index 934840860c6..e68c4b5da46 100644 --- a/drivers/gpu/drm/i915/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c @@ -401,7 +401,7 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_device *dev, obj->has_global_gtt_mapping = 1; list_add_tail(&obj->global_list, &dev_priv->mm.bound_list); - list_add_tail(&obj->mm_list, &ggtt->inactive_list); + list_add_tail(&vma->mm_list, &ggtt->inactive_list); return obj; diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 8091485e7e8..fad48b2bb87 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -556,11 +556,11 @@ static void capture_bo(struct drm_i915_error_buffer *err, static u32 capture_active_bo(struct drm_i915_error_buffer *err, int count, struct list_head *head) { - struct drm_i915_gem_object *obj; + struct i915_vma *vma; int i = 0; - list_for_each_entry(obj, head, mm_list) { - capture_bo(err++, obj); + list_for_each_entry(vma, head, mm_list) { + capture_bo(err++, vma->obj); if (++i == count) break; } @@ -622,7 +622,8 @@ static struct drm_i915_error_object * i915_error_first_batchbuffer(struct drm_i915_private *dev_priv, struct intel_ring_buffer *ring) { - struct i915_address_space *vm = &dev_priv->gtt.base; + struct i915_address_space *vm; + struct i915_vma *vma; struct drm_i915_gem_object *obj; u32 seqno; @@ -642,20 +643,23 @@ i915_error_first_batchbuffer(struct drm_i915_private *dev_priv, } seqno = ring->get_seqno(ring, false); - list_for_each_entry(obj, &vm->active_list, mm_list) { - if (obj->ring != ring) - continue; + list_for_each_entry(vm, &dev_priv->vm_list, global_link) { + list_for_each_entry(vma, &vm->active_list, mm_list) { + obj = vma->obj; + if (obj->ring != ring) + continue; - if (i915_seqno_passed(seqno, obj->last_read_seqno)) - continue; + if (i915_seqno_passed(seqno, obj->last_read_seqno)) + continue; - if ((obj->base.read_domains & I915_GEM_DOMAIN_COMMAND) == 0) - continue; + if ((obj->base.read_domains & I915_GEM_DOMAIN_COMMAND) == 0) + continue; - /* We need to copy these to an anonymous buffer as the simplest - * method to avoid being overwritten by userspace. - */ - return i915_error_object_create(dev_priv, obj); + /* We need to copy these to an anonymous buffer as the simplest + * method to avoid being overwritten by userspace. + */ + return i915_error_object_create(dev_priv, obj); + } } return NULL; @@ -775,11 +779,12 @@ static void i915_gem_capture_buffers(struct drm_i915_private *dev_priv, struct drm_i915_error_state *error) { struct i915_address_space *vm = &dev_priv->gtt.base; + struct i915_vma *vma; struct drm_i915_gem_object *obj; int i; i = 0; - list_for_each_entry(obj, &vm->active_list, mm_list) + list_for_each_entry(vma, &vm->active_list, mm_list) i++; error->active_bo_count = i; list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) -- cgit v1.2.3-70-g09d2 From 2c22569bba8af6c2976d5f9479fe54a53a39966b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 9 Aug 2013 12:26:45 +0100 Subject: drm/i915: Update rules for writing through the LLC with the cpu MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As mentioned in the previous commit, reads and writes from both the CPU and GPU go through the LLC. This gives us coherency between the CPU and GPU irrespective of the attribute settings either device sets. We can use to avoid having to clflush even uncached memory. Except for the scanout. The scanout resides within another functional block that does not use the LLC but reads directly from main memory. So in order to maintain coherency with the scanout, writes to uncached memory must be flushed. In order to optimize writes elsewhere, we start tracking whether an framebuffer is attached to an object. v2: Use pin_display tracking rather than fb_count (to ensure we flush cursors as well etc) and only force the clflush along explicit writes to the scanout paths (i.e. pin_to_display_plane and pwrite into scanout). v3: Force the flush after hitting the slowpath in pwrite, as after dropping the lock the object's cache domain may be invalidated. (Ville) Based on a patch by Ville Syrjälä. Signed-off-by: Chris Wilson Cc: Ville Syrjälä Reviewed-by: Ville Syrjälä Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 2 +- drivers/gpu/drm/i915/i915_gem.c | 58 ++++++++++++++++-------------- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 2 +- drivers/gpu/drm/i915/i915_gem_gtt.c | 2 +- 4 files changed, 35 insertions(+), 29 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem_execbuffer.c') diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index b5df2308f8e..34d3f2fae8a 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1841,7 +1841,7 @@ static inline bool i915_terminally_wedged(struct i915_gpu_error *error) } void i915_gem_reset(struct drm_device *dev); -void i915_gem_clflush_object(struct drm_i915_gem_object *obj); +void i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force); int __must_check i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj); int __must_check i915_gem_init(struct drm_device *dev); int __must_check i915_gem_init_hw(struct drm_device *dev); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index b98c3b0e5a0..54d76e9392d 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -37,7 +37,8 @@ #include static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj); -static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj); +static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj, + bool force); static __must_check int i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, struct i915_address_space *vm, @@ -67,6 +68,14 @@ static bool cpu_cache_is_coherent(struct drm_device *dev, return HAS_LLC(dev) || level != I915_CACHE_NONE; } +static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj) +{ + if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) + return true; + + return obj->pin_display; +} + static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj) { if (obj->tiling_mode) @@ -742,8 +751,7 @@ i915_gem_shmem_pwrite(struct drm_device *dev, * write domain and manually flush cachelines (if required). This * optimizes for the case when the gpu will use the data * right away and we therefore have to clflush anyway. */ - if (obj->cache_level == I915_CACHE_NONE) - needs_clflush_after = 1; + needs_clflush_after = cpu_write_needs_clflush(obj); if (i915_gem_obj_bound_any(obj)) { ret = i915_gem_object_set_to_gtt_domain(obj, true); if (ret) @@ -833,7 +841,7 @@ out: */ if (!needs_clflush_after && obj->base.write_domain != I915_GEM_DOMAIN_CPU) { - i915_gem_clflush_object(obj); + i915_gem_clflush_object(obj, obj->pin_display); i915_gem_chipset_flush(dev); } } @@ -911,9 +919,9 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, goto out; } - if (obj->cache_level == I915_CACHE_NONE && - obj->tiling_mode == I915_TILING_NONE && - obj->base.write_domain != I915_GEM_DOMAIN_CPU) { + if (obj->tiling_mode == I915_TILING_NONE && + obj->base.write_domain != I915_GEM_DOMAIN_CPU && + cpu_write_needs_clflush(obj)) { ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file); /* Note that the gtt paths might fail with non-page-backed user * pointers (e.g. gtt mappings when moving data between @@ -1262,8 +1270,8 @@ i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, } /* Pinned buffers may be scanout, so flush the cache */ - if (obj->pin_count) - i915_gem_object_flush_cpu_write_domain(obj); + if (obj->pin_display) + i915_gem_object_flush_cpu_write_domain(obj, true); drm_gem_object_unreference(&obj->base); unlock: @@ -1640,7 +1648,7 @@ i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj) * hope for the best. */ WARN_ON(ret != -EIO); - i915_gem_clflush_object(obj); + i915_gem_clflush_object(obj, true); obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU; } @@ -3217,7 +3225,8 @@ err_unpin: } void -i915_gem_clflush_object(struct drm_i915_gem_object *obj) +i915_gem_clflush_object(struct drm_i915_gem_object *obj, + bool force) { /* If we don't have a page list set up, then we're not pinned * to GPU, and we can ignore the cache flush because it'll happen @@ -3241,7 +3250,7 @@ i915_gem_clflush_object(struct drm_i915_gem_object *obj) * snooping behaviour occurs naturally as the result of our domain * tracking. */ - if (obj->cache_level != I915_CACHE_NONE) + if (!force && cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) return; trace_i915_gem_object_clflush(obj); @@ -3278,14 +3287,15 @@ i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj) /** Flushes the CPU write domain for the object if it's dirty. */ static void -i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj) +i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj, + bool force) { uint32_t old_write_domain; if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) return; - i915_gem_clflush_object(obj); + i915_gem_clflush_object(obj, force); i915_gem_chipset_flush(obj->base.dev); old_write_domain = obj->base.write_domain; obj->base.write_domain = 0; @@ -3319,7 +3329,7 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) if (ret) return ret; - i915_gem_object_flush_cpu_write_domain(obj); + i915_gem_object_flush_cpu_write_domain(obj, false); /* Serialise direct access to this object with the barriers for * coherent writes from the GPU, by effectively invalidating the @@ -3409,7 +3419,11 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, obj, cache_level); } - if (cache_level == I915_CACHE_NONE) { + list_for_each_entry(vma, &obj->vma_list, vma_link) + vma->node.color = cache_level; + obj->cache_level = cache_level; + + if (cpu_write_needs_clflush(obj)) { u32 old_read_domains, old_write_domain; /* If we're coming from LLC cached, then we haven't @@ -3432,9 +3446,6 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, old_write_domain); } - list_for_each_entry(vma, &obj->vma_list, vma_link) - vma->node.color = cache_level; - obj->cache_level = cache_level; i915_gem_verify_gtt(dev); return 0; } @@ -3562,7 +3573,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, if (ret) goto err_unpin_display; - i915_gem_object_flush_cpu_write_domain(obj); + i915_gem_object_flush_cpu_write_domain(obj, true); old_write_domain = obj->base.write_domain; old_read_domains = obj->base.read_domains; @@ -3634,8 +3645,7 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) /* Flush the CPU cache if it's still invalid. */ if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) { - if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) - i915_gem_clflush_object(obj); + i915_gem_clflush_object(obj, false); obj->base.read_domains |= I915_GEM_DOMAIN_CPU; } @@ -3817,10 +3827,6 @@ i915_gem_pin_ioctl(struct drm_device *dev, void *data, obj->user_pin_count++; obj->pin_filp = file; - /* XXX - flush the CPU caches for pinned objects - * as the X server doesn't manage domains yet - */ - i915_gem_object_flush_cpu_write_domain(obj); args->offset = i915_gem_obj_ggtt_offset(obj); out: drm_gem_object_unreference(&obj->base); diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 8ccc29ac962..e999578a021 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -716,7 +716,7 @@ i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring, return ret; if (obj->base.write_domain & I915_GEM_DOMAIN_CPU) - i915_gem_clflush_object(obj); + i915_gem_clflush_object(obj, false); flush_domains |= obj->base.write_domain; } diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 24fb989593f..c9420c280cf 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -487,7 +487,7 @@ void i915_gem_restore_gtt_mappings(struct drm_device *dev) dev_priv->gtt.base.total / PAGE_SIZE); list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { - i915_gem_clflush_object(obj); + i915_gem_clflush_object(obj, obj->pin_display); i915_gem_gtt_bind_object(obj, obj->cache_level); } -- cgit v1.2.3-70-g09d2 From 000433b67e46771a7c08f78574943855a98c53ec Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 8 Aug 2013 14:41:09 +0100 Subject: drm/i915: Only do a chipset flush after a clflush MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that we skip clflushes more often, return a boolean indicating whether the clflush was actually performed, and only if it was do the chipset flush. (Though on most of the architectures where the clflush will be skipped, the chipset flush is a no-op!) Signed-off-by: Chris Wilson Cc: Ville Syrjälä Reviewed-by: Ville Syrjälä Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 2 +- drivers/gpu/drm/i915/i915_gem.c | 20 +++++++++++--------- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 5 +++-- 3 files changed, 15 insertions(+), 12 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem_execbuffer.c') diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 34d3f2fae8a..1af59d72ddc 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1841,7 +1841,7 @@ static inline bool i915_terminally_wedged(struct i915_gpu_error *error) } void i915_gem_reset(struct drm_device *dev); -void i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force); +bool i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force); int __must_check i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj); int __must_check i915_gem_init(struct drm_device *dev); int __must_check i915_gem_init_hw(struct drm_device *dev); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 54d76e9392d..959dffba0f0 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -841,8 +841,8 @@ out: */ if (!needs_clflush_after && obj->base.write_domain != I915_GEM_DOMAIN_CPU) { - i915_gem_clflush_object(obj, obj->pin_display); - i915_gem_chipset_flush(dev); + if (i915_gem_clflush_object(obj, obj->pin_display)) + i915_gem_chipset_flush(dev); } } @@ -3224,7 +3224,7 @@ err_unpin: return ret; } -void +bool i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force) { @@ -3233,14 +3233,14 @@ i915_gem_clflush_object(struct drm_i915_gem_object *obj, * again at bind time. */ if (obj->pages == NULL) - return; + return false; /* * Stolen memory is always coherent with the GPU as it is explicitly * marked as wc by the system, or the system is cache-coherent. */ if (obj->stolen) - return; + return false; /* If the GPU is snooping the contents of the CPU cache, * we do not need to manually clear the CPU cache lines. However, @@ -3251,11 +3251,12 @@ i915_gem_clflush_object(struct drm_i915_gem_object *obj, * tracking. */ if (!force && cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) - return; + return false; trace_i915_gem_object_clflush(obj); - drm_clflush_sg(obj->pages); + + return true; } /** Flushes the GTT write domain for the object if it's dirty. */ @@ -3295,8 +3296,9 @@ i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj, if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) return; - i915_gem_clflush_object(obj, force); - i915_gem_chipset_flush(obj->base.dev); + if (i915_gem_clflush_object(obj, force)) + i915_gem_chipset_flush(obj->base.dev); + old_write_domain = obj->base.write_domain; obj->base.write_domain = 0; diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index e999578a021..7dcf78cf678 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -708,6 +708,7 @@ i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring, { struct drm_i915_gem_object *obj; uint32_t flush_domains = 0; + bool flush_chipset = false; int ret; list_for_each_entry(obj, objects, exec_list) { @@ -716,12 +717,12 @@ i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring, return ret; if (obj->base.write_domain & I915_GEM_DOMAIN_CPU) - i915_gem_clflush_object(obj, false); + flush_chipset |= i915_gem_clflush_object(obj, false); flush_domains |= obj->base.write_domain; } - if (flush_domains & I915_GEM_DOMAIN_CPU) + if (flush_chipset) i915_gem_chipset_flush(ring->dev); if (flush_domains & I915_GEM_DOMAIN_GTT) -- cgit v1.2.3-70-g09d2 From 5032d871f7d300aee10c309ea004eb4f851553fe Mon Sep 17 00:00:00 2001 From: Rafael Barbalho Date: Wed, 21 Aug 2013 17:10:51 +0100 Subject: drm/i915: Cleaning up the relocate entry function As the relocate entry function was getting a bit too big I've moved the code that used to use either the cpu or the gtt to for the relocation into two separate functions. Signed-off-by: Rafael Barbalho Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 88 ++++++++++++++++++------------ 1 file changed, 54 insertions(+), 34 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem_execbuffer.c') diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 7dcf78cf678..792c52a235e 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -171,6 +171,56 @@ static inline int use_cpu_reloc(struct drm_i915_gem_object *obj) obj->cache_level != I915_CACHE_NONE); } +static int +relocate_entry_cpu(struct drm_i915_gem_object *obj, + struct drm_i915_gem_relocation_entry *reloc) +{ + uint32_t page_offset = offset_in_page(reloc->offset); + char *vaddr; + int ret = -EINVAL; + + ret = i915_gem_object_set_to_cpu_domain(obj, 1); + if (ret) + return ret; + + vaddr = kmap_atomic(i915_gem_object_get_page(obj, + reloc->offset >> PAGE_SHIFT)); + *(uint32_t *)(vaddr + page_offset) = reloc->delta; + kunmap_atomic(vaddr); + + return 0; +} + +static int +relocate_entry_gtt(struct drm_i915_gem_object *obj, + struct drm_i915_gem_relocation_entry *reloc) +{ + struct drm_device *dev = obj->base.dev; + struct drm_i915_private *dev_priv = dev->dev_private; + uint32_t __iomem *reloc_entry; + void __iomem *reloc_page; + int ret = -EINVAL; + + ret = i915_gem_object_set_to_gtt_domain(obj, true); + if (ret) + return ret; + + ret = i915_gem_object_put_fence(obj); + if (ret) + return ret; + + /* Map the page containing the relocation we're going to perform. */ + reloc->offset += i915_gem_obj_ggtt_offset(obj); + reloc_page = io_mapping_map_atomic_wc(dev_priv->gtt.mappable, + reloc->offset & PAGE_MASK); + reloc_entry = (uint32_t __iomem *) + (reloc_page + offset_in_page(reloc->offset)); + iowrite32(reloc->delta, reloc_entry); + io_mapping_unmap_atomic(reloc_page); + + return 0; +} + static int i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, struct eb_objects *eb, @@ -255,40 +305,10 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, return -EFAULT; reloc->delta += target_offset; - if (use_cpu_reloc(obj)) { - uint32_t page_offset = offset_in_page(reloc->offset); - char *vaddr; - - ret = i915_gem_object_set_to_cpu_domain(obj, 1); - if (ret) - return ret; - - vaddr = kmap_atomic(i915_gem_object_get_page(obj, - reloc->offset >> PAGE_SHIFT)); - *(uint32_t *)(vaddr + page_offset) = reloc->delta; - kunmap_atomic(vaddr); - } else { - struct drm_i915_private *dev_priv = dev->dev_private; - uint32_t __iomem *reloc_entry; - void __iomem *reloc_page; - - ret = i915_gem_object_set_to_gtt_domain(obj, true); - if (ret) - return ret; - - ret = i915_gem_object_put_fence(obj); - if (ret) - return ret; - - /* Map the page containing the relocation we're going to perform. */ - reloc->offset += i915_gem_obj_ggtt_offset(obj); - reloc_page = io_mapping_map_atomic_wc(dev_priv->gtt.mappable, - reloc->offset & PAGE_MASK); - reloc_entry = (uint32_t __iomem *) - (reloc_page + offset_in_page(reloc->offset)); - iowrite32(reloc->delta, reloc_entry); - io_mapping_unmap_atomic(reloc_page); - } + if (use_cpu_reloc(obj)) + ret = relocate_entry_cpu(obj, reloc); + else + ret = relocate_entry_gtt(obj, reloc); /* and update the user's relocation entry */ reloc->presumed_offset = target_offset; -- cgit v1.2.3-70-g09d2