diff options
-rw-r--r-- | drivers/gpu/drm/i915/i915_drv.h | 10 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/i915_gem.c | 184 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/i915_gem_gtt.c | 39 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/intel_display.c | 20 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/intel_overlay.c | 6 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/intel_ringbuffer.c | 6 |
6 files changed, 200 insertions, 65 deletions
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index f63ee162f12..8a9fd917786 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1190,7 +1190,7 @@ void i915_gem_clflush_object(struct drm_i915_gem_object *obj); int __must_check i915_gem_object_set_domain(struct drm_i915_gem_object *obj, uint32_t read_domains, uint32_t write_domain); -int __must_check i915_gem_object_flush_gpu(struct drm_i915_gem_object *obj); +int __must_check i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj); int __must_check i915_gem_init_ringbuffer(struct drm_device *dev); void i915_gem_cleanup_ringbuffer(struct drm_device *dev); void i915_gem_do_init(struct drm_device *dev, @@ -1209,7 +1209,8 @@ int __must_check i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write); int __must_check -i915_gem_object_set_to_display_plane(struct drm_i915_gem_object *obj, +i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, + u32 alignment, struct intel_ring_buffer *pipelined); int i915_gem_attach_phys_object(struct drm_device *dev, struct drm_i915_gem_object *obj, @@ -1223,9 +1224,14 @@ void i915_gem_release(struct drm_device *dev, struct drm_file *file); uint32_t i915_gem_get_unfenced_gtt_alignment(struct drm_i915_gem_object *obj); +int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, + enum i915_cache_level cache_level); + /* i915_gem_gtt.c */ void i915_gem_restore_gtt_mappings(struct drm_device *dev); int __must_check i915_gem_gtt_bind_object(struct drm_i915_gem_object *obj); +void i915_gem_gtt_rebind_object(struct drm_i915_gem_object *obj, + enum i915_cache_level cache_level); void i915_gem_gtt_unbind_object(struct drm_i915_gem_object *obj); /* i915_gem_evict.c */ diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index c6389de5316..b29e0f2b780 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2151,6 +2151,30 @@ i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj) return 0; } +static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj) +{ + u32 old_write_domain, old_read_domains; + + if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) + return; + + /* Act a barrier for all accesses through the GTT */ + mb(); + + /* Force a pagefault for domain tracking on next user access */ + i915_gem_release_mmap(obj); + + old_read_domains = obj->base.read_domains; + old_write_domain = obj->base.write_domain; + + obj->base.read_domains &= ~I915_GEM_DOMAIN_GTT; + obj->base.write_domain &= ~I915_GEM_DOMAIN_GTT; + + trace_i915_gem_object_change_domain(obj, + old_read_domains, + old_write_domain); +} + /** * Unbinds an object from the GTT aperture. */ @@ -2167,23 +2191,28 @@ i915_gem_object_unbind(struct drm_i915_gem_object *obj) return -EINVAL; } - /* blow away mappings if mapped through GTT */ - i915_gem_release_mmap(obj); - - /* Move the object to the CPU domain to ensure that - * any possible CPU writes while it's not in the GTT - * are flushed when we go to remap it. This will - * also ensure that all pending GPU writes are finished - * before we unbind. - */ - ret = i915_gem_object_set_to_cpu_domain(obj, 1); + ret = i915_gem_object_finish_gpu(obj); if (ret == -ERESTARTSYS) return ret; /* Continue on if we fail due to EIO, the GPU is hung so we * should be safe and we need to cleanup or else we might * cause memory corruption through use-after-free. */ + + i915_gem_object_finish_gtt(obj); + + /* Move the object to the CPU domain to ensure that + * any possible CPU writes while it's not in the GTT + * are flushed when we go to remap it. + */ + if (ret == 0) + ret = i915_gem_object_set_to_cpu_domain(obj, 1); + if (ret == -ERESTARTSYS) + return ret; if (ret) { + /* In the event of a disaster, abandon all caches and + * hope for the best. + */ i915_gem_clflush_object(obj); obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU; } @@ -3005,51 +3034,139 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) return 0; } +int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, + enum i915_cache_level cache_level) +{ + int ret; + + if (obj->cache_level == cache_level) + return 0; + + if (obj->pin_count) { + DRM_DEBUG("can not change the cache level of pinned objects\n"); + return -EBUSY; + } + + if (obj->gtt_space) { + ret = i915_gem_object_finish_gpu(obj); + if (ret) + return ret; + + i915_gem_object_finish_gtt(obj); + + /* Before SandyBridge, you could not use tiling or fence + * registers with snooped memory, so relinquish any fences + * currently pointing to our region in the aperture. + */ + if (INTEL_INFO(obj->base.dev)->gen < 6) { + ret = i915_gem_object_put_fence(obj); + if (ret) + return ret; + } + + i915_gem_gtt_rebind_object(obj, cache_level); + } + + if (cache_level == I915_CACHE_NONE) { + u32 old_read_domains, old_write_domain; + + /* If we're coming from LLC cached, then we haven't + * actually been tracking whether the data is in the + * CPU cache or not, since we only allow one bit set + * in obj->write_domain and have been skipping the clflushes. + * Just set it to the CPU cache for now. + */ + WARN_ON(obj->base.write_domain & ~I915_GEM_DOMAIN_CPU); + WARN_ON(obj->base.read_domains & ~I915_GEM_DOMAIN_CPU); + + old_read_domains = obj->base.read_domains; + old_write_domain = obj->base.write_domain; + + obj->base.read_domains = I915_GEM_DOMAIN_CPU; + obj->base.write_domain = I915_GEM_DOMAIN_CPU; + + trace_i915_gem_object_change_domain(obj, + old_read_domains, + old_write_domain); + } + + obj->cache_level = cache_level; + return 0; +} + /* - * Prepare buffer for display plane. Use uninterruptible for possible flush - * wait, as in modesetting process we're not supposed to be interrupted. + * Prepare buffer for display plane (scanout, cursors, etc). + * Can be called from an uninterruptible phase (modesetting) and allows + * any flushes to be pipelined (for pageflips). + * + * For the display plane, we want to be in the GTT but out of any write + * domains. So in many ways this looks like set_to_gtt_domain() apart from the + * ability to pipeline the waits, pinning and any additional subtleties + * that may differentiate the display plane from ordinary buffers. */ int -i915_gem_object_set_to_display_plane(struct drm_i915_gem_object *obj, +i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, + u32 alignment, struct intel_ring_buffer *pipelined) { - uint32_t old_read_domains; + u32 old_read_domains, old_write_domain; int ret; - /* Not valid to be called on unbound objects. */ - if (obj->gtt_space == NULL) - return -EINVAL; - ret = i915_gem_object_flush_gpu_write_domain(obj); if (ret) return ret; - - /* Currently, we are always called from an non-interruptible context. */ if (pipelined != obj->ring) { ret = i915_gem_object_wait_rendering(obj); if (ret) return ret; } + /* The display engine is not coherent with the LLC cache on gen6. As + * a result, we make sure that the pinning that is about to occur is + * done with uncached PTEs. This is lowest common denominator for all + * chipsets. + * + * However for gen6+, we could do better by using the GFDT bit instead + * of uncaching, which would allow us to flush all the LLC-cached data + * with that bit in the PTE to main memory with just one PIPE_CONTROL. + */ + ret = i915_gem_object_set_cache_level(obj, I915_CACHE_NONE); + if (ret) + return ret; + + /* As the user may map the buffer once pinned in the display plane + * (e.g. libkms for the bootup splash), we have to ensure that we + * always use map_and_fenceable for all scanout buffers. + */ + ret = i915_gem_object_pin(obj, alignment, true); + if (ret) + return ret; + i915_gem_object_flush_cpu_write_domain(obj); + old_write_domain = obj->base.write_domain; old_read_domains = obj->base.read_domains; + + /* It should now be out of any other write domains, and we can update + * the domain values for our changes. + */ + BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0); obj->base.read_domains |= I915_GEM_DOMAIN_GTT; trace_i915_gem_object_change_domain(obj, old_read_domains, - obj->base.write_domain); + old_write_domain); return 0; } int -i915_gem_object_flush_gpu(struct drm_i915_gem_object *obj) +i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj) { int ret; - if (!obj->active) + if ((obj->base.read_domains & I915_GEM_GPU_DOMAINS) == 0) return 0; if (obj->base.write_domain & I915_GEM_GPU_DOMAINS) { @@ -3058,6 +3175,9 @@ i915_gem_object_flush_gpu(struct drm_i915_gem_object *obj) return ret; } + /* Ensure that we invalidate the GPU's caches and TLBs. */ + obj->base.read_domains &= ~I915_GEM_GPU_DOMAINS; + return i915_gem_object_wait_rendering(obj); } @@ -3580,7 +3700,23 @@ struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev, obj->base.write_domain = I915_GEM_DOMAIN_CPU; obj->base.read_domains = I915_GEM_DOMAIN_CPU; - obj->cache_level = I915_CACHE_NONE; + if (IS_GEN6(dev)) { + /* On Gen6, we can have the GPU use the LLC (the CPU + * cache) for about a 10% performance improvement + * compared to uncached. Graphics requests other than + * display scanout are coherent with the CPU in + * accessing this cache. This means in this mode we + * don't need to clflush on the CPU side, and on the + * GPU side we only need to flush internal caches to + * get data visible to the CPU. + * + * However, we maintain the display planes as UC, and so + * need to rebind when first used as such. + */ + obj->cache_level = I915_CACHE_LLC; + } else + obj->cache_level = I915_CACHE_NONE; + obj->base.driver_private = NULL; obj->fence_reg = I915_FENCE_REG_NONE; INIT_LIST_HEAD(&obj->mm_list); diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index e46b645773c..7a709cd8d54 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -59,24 +59,8 @@ void i915_gem_restore_gtt_mappings(struct drm_device *dev) (dev_priv->mm.gtt_end - dev_priv->mm.gtt_start) / PAGE_SIZE); list_for_each_entry(obj, &dev_priv->mm.gtt_list, gtt_list) { - unsigned int agp_type = - cache_level_to_agp_type(dev, obj->cache_level); - i915_gem_clflush_object(obj); - - if (dev_priv->mm.gtt->needs_dmar) { - BUG_ON(!obj->sg_list); - - intel_gtt_insert_sg_entries(obj->sg_list, - obj->num_sg, - obj->gtt_space->start >> PAGE_SHIFT, - agp_type); - } else - intel_gtt_insert_pages(obj->gtt_space->start - >> PAGE_SHIFT, - obj->base.size >> PAGE_SHIFT, - obj->pages, - agp_type); + i915_gem_gtt_rebind_object(obj, obj->cache_level); } intel_gtt_chipset_flush(); @@ -110,6 +94,27 @@ int i915_gem_gtt_bind_object(struct drm_i915_gem_object *obj) return 0; } +void i915_gem_gtt_rebind_object(struct drm_i915_gem_object *obj, + enum i915_cache_level cache_level) +{ + struct drm_device *dev = obj->base.dev; + struct drm_i915_private *dev_priv = dev->dev_private; + unsigned int agp_type = cache_level_to_agp_type(dev, cache_level); + + if (dev_priv->mm.gtt->needs_dmar) { + BUG_ON(!obj->sg_list); + + intel_gtt_insert_sg_entries(obj->sg_list, + obj->num_sg, + obj->gtt_space->start >> PAGE_SHIFT, + agp_type); + } else + intel_gtt_insert_pages(obj->gtt_space->start >> PAGE_SHIFT, + obj->base.size >> PAGE_SHIFT, + obj->pages, + agp_type); +} + void i915_gem_gtt_unbind_object(struct drm_i915_gem_object *obj) { intel_gtt_clear_range(obj->gtt_space->start >> PAGE_SHIFT, diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index aa43e7be605..86a3ec1469b 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -1812,14 +1812,10 @@ intel_pin_and_fence_fb_obj(struct drm_device *dev, } dev_priv->mm.interruptible = false; - ret = i915_gem_object_pin(obj, alignment, true); + ret = i915_gem_object_pin_to_display_plane(obj, alignment, pipelined); if (ret) goto err_interruptible; - ret = i915_gem_object_set_to_display_plane(obj, pipelined); - if (ret) - goto err_unpin; - /* Install a fence for tiled scan-out. Pre-i965 always needs a * fence, whereas 965+ only requires a fence if using * framebuffer compression. For simplicity, we always install @@ -1971,7 +1967,7 @@ intel_pipe_set_base(struct drm_crtc *crtc, int x, int y, * This should only fail upon a hung GPU, in which case we * can safely continue. */ - ret = i915_gem_object_flush_gpu(obj); + ret = i915_gem_object_finish_gpu(obj); (void) ret; } @@ -5434,21 +5430,15 @@ static int intel_crtc_cursor_set(struct drm_crtc *crtc, goto fail_locked; } - ret = i915_gem_object_pin(obj, PAGE_SIZE, true); - if (ret) { - DRM_ERROR("failed to pin cursor bo\n"); - goto fail_locked; - } - - ret = i915_gem_object_set_to_gtt_domain(obj, 0); + ret = i915_gem_object_pin_to_display_plane(obj, 0, NULL); if (ret) { DRM_ERROR("failed to move cursor bo into the GTT\n"); - goto fail_unpin; + goto fail_locked; } ret = i915_gem_object_put_fence(obj); if (ret) { - DRM_ERROR("failed to move cursor bo into the GTT\n"); + DRM_ERROR("failed to release fence for cursor"); goto fail_unpin; } diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c index a670c006982..fcf6fcb0b48 100644 --- a/drivers/gpu/drm/i915/intel_overlay.c +++ b/drivers/gpu/drm/i915/intel_overlay.c @@ -773,14 +773,10 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay, if (ret != 0) return ret; - ret = i915_gem_object_pin(new_bo, PAGE_SIZE, true); + ret = i915_gem_object_pin_to_display_plane(new_bo, 0, NULL); if (ret != 0) return ret; - ret = i915_gem_object_set_to_gtt_domain(new_bo, 0); - if (ret != 0) - goto out_unpin; - ret = i915_gem_object_put_fence(new_bo); if (ret) goto out_unpin; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 95c4b142993..e9615685a39 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -236,7 +236,8 @@ init_pipe_control(struct intel_ring_buffer *ring) ret = -ENOMEM; goto err; } - obj->cache_level = I915_CACHE_LLC; + + i915_gem_object_set_cache_level(obj, I915_CACHE_LLC); ret = i915_gem_object_pin(obj, 4096, true); if (ret) @@ -776,7 +777,8 @@ static int init_status_page(struct intel_ring_buffer *ring) ret = -ENOMEM; goto err; } - obj->cache_level = I915_CACHE_LLC; + + i915_gem_object_set_cache_level(obj, I915_CACHE_LLC); ret = i915_gem_object_pin(obj, 4096, true); if (ret != 0) { |