diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-12-17 08:26:17 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-12-17 08:26:17 -0800 |
commit | 3c2e81ef344a90bb0a39d84af6878b4aeff568a2 (patch) | |
tree | bd8c8b23466174899d2fe4d35af6e1e838edb068 /drivers/gpu/drm/i915/i915_gem.c | |
parent | 221392c3ad0432e39fd74a349364f66cb0ed78f6 (diff) | |
parent | 55bde6b1442fed8af67b92d21acce67db454c9f9 (diff) |
Merge branch 'drm-next' of git://people.freedesktop.org/~airlied/linux
Pull DRM updates from Dave Airlie:
"This is the one and only next pull for 3.8, we had a regression we
found last week, so I was waiting for that to resolve itself, and I
ended up with some Intel fixes on top as well.
Highlights:
- new driver: nvidia tegra 20/30/hdmi support
- radeon: add support for previously unused DMA engines, more HDMI
regs, eviction speeds ups and fixes
- i915: HSW support enable, agp removal on GEN6, seqno wrapping
- exynos: IPP subsystem support (image post proc), HDMI
- nouveau: display class reworking, nv20->40 z compression
- ttm: start of locking fixes, rcu usage for lookups,
- core: documentation updates, docbook integration, monotonic clock
usage, move from connector to object properties"
* 'drm-next' of git://people.freedesktop.org/~airlied/linux: (590 commits)
drm/exynos: add gsc ipp driver
drm/exynos: add rotator ipp driver
drm/exynos: add fimc ipp driver
drm/exynos: add iommu support for ipp
drm/exynos: add ipp subsystem
drm/exynos: support device tree for fimd
radeon: fix regression with eviction since evict caching changes
drm/radeon: add more pedantic checks in the CP DMA checker
drm/radeon: bump version for CS ioctl support for async DMA
drm/radeon: enable the async DMA rings in the CS ioctl
drm/radeon: add VM CS parser support for async DMA on cayman/TN/SI
drm/radeon/kms: add evergreen/cayman CS parser for async DMA (v2)
drm/radeon/kms: add 6xx/7xx CS parser for async DMA (v2)
drm/radeon: fix htile buffer size computation for command stream checker
drm/radeon: fix fence locking in the pageflip callback
drm/radeon: make indirect register access concurrency-safe
drm/radeon: add W|RREG32_IDX for MM_INDEX|DATA based mmio accesss
drm/exynos: support extended screen coordinate of fimd
drm/exynos: fix x, y coordinates for right bottom pixel
drm/exynos: fix fb offset calculation for plane
...
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gem.c')
-rw-r--r-- | drivers/gpu/drm/i915/i915_gem.c | 292 |
1 files changed, 133 insertions, 159 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 9b285da4449..742206e4510 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -845,12 +845,12 @@ out: * domain anymore. */ if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) { i915_gem_clflush_object(obj); - intel_gtt_chipset_flush(); + i915_gem_chipset_flush(dev); } } if (needs_clflush_after) - intel_gtt_chipset_flush(); + i915_gem_chipset_flush(dev); return ret; } @@ -1345,30 +1345,17 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) trace_i915_gem_object_fault(obj, page_offset, true, write); /* Now bind it into the GTT if needed */ - if (!obj->map_and_fenceable) { - ret = i915_gem_object_unbind(obj); - if (ret) - goto unlock; - } - if (!obj->gtt_space) { - ret = i915_gem_object_bind_to_gtt(obj, 0, true, false); - if (ret) - goto unlock; - - ret = i915_gem_object_set_to_gtt_domain(obj, write); - if (ret) - goto unlock; - } + ret = i915_gem_object_pin(obj, 0, true, false); + if (ret) + goto unlock; - if (!obj->has_global_gtt_mapping) - i915_gem_gtt_bind_object(obj, obj->cache_level); + ret = i915_gem_object_set_to_gtt_domain(obj, write); + if (ret) + goto unpin; ret = i915_gem_object_get_fence(obj); if (ret) - goto unlock; - - if (i915_gem_object_is_inactive(obj)) - list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list); + goto unpin; obj->fault_mappable = true; @@ -1377,6 +1364,8 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) /* Finally, remap it using the new GTT offset */ ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address, pfn); +unpin: + i915_gem_object_unpin(obj); unlock: mutex_unlock(&dev->struct_mutex); out: @@ -1707,10 +1696,14 @@ i915_gem_object_put_pages(struct drm_i915_gem_object *obj) if (obj->pages_pin_count) return -EBUSY; + /* ->put_pages might need to allocate memory for the bit17 swizzle + * array, hence protect them from being reaped by removing them from gtt + * lists early. */ + list_del(&obj->gtt_list); + ops->put_pages(obj); obj->pages = NULL; - list_del(&obj->gtt_list); if (i915_gem_object_is_purgeable(obj)) i915_gem_object_truncate(obj); @@ -1868,11 +1861,11 @@ i915_gem_object_get_pages(struct drm_i915_gem_object *obj) void i915_gem_object_move_to_active(struct drm_i915_gem_object *obj, - struct intel_ring_buffer *ring, - u32 seqno) + struct intel_ring_buffer *ring) { struct drm_device *dev = obj->base.dev; struct drm_i915_private *dev_priv = dev->dev_private; + u32 seqno = intel_ring_get_seqno(ring); BUG_ON(ring == NULL); obj->ring = ring; @@ -1933,26 +1926,54 @@ i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj) WARN_ON(i915_verify_lists(dev)); } -static u32 -i915_gem_get_seqno(struct drm_device *dev) +static int +i915_gem_handle_seqno_wrap(struct drm_device *dev) { - drm_i915_private_t *dev_priv = dev->dev_private; - u32 seqno = dev_priv->next_seqno; + struct drm_i915_private *dev_priv = dev->dev_private; + struct intel_ring_buffer *ring; + int ret, i, j; - /* reserve 0 for non-seqno */ - if (++dev_priv->next_seqno == 0) - dev_priv->next_seqno = 1; + /* The hardware uses various monotonic 32-bit counters, if we + * detect that they will wraparound we need to idle the GPU + * and reset those counters. + */ + ret = 0; + for_each_ring(ring, dev_priv, i) { + for (j = 0; j < ARRAY_SIZE(ring->sync_seqno); j++) + ret |= ring->sync_seqno[j] != 0; + } + if (ret == 0) + return ret; + + ret = i915_gpu_idle(dev); + if (ret) + return ret; + + i915_gem_retire_requests(dev); + for_each_ring(ring, dev_priv, i) { + for (j = 0; j < ARRAY_SIZE(ring->sync_seqno); j++) + ring->sync_seqno[j] = 0; + } - return seqno; + return 0; } -u32 -i915_gem_next_request_seqno(struct intel_ring_buffer *ring) +int +i915_gem_get_seqno(struct drm_device *dev, u32 *seqno) { - if (ring->outstanding_lazy_request == 0) - ring->outstanding_lazy_request = i915_gem_get_seqno(ring->dev); + struct drm_i915_private *dev_priv = dev->dev_private; + + /* reserve 0 for non-seqno */ + if (dev_priv->next_seqno == 0) { + int ret = i915_gem_handle_seqno_wrap(dev); + if (ret) + return ret; - return ring->outstanding_lazy_request; + dev_priv->next_seqno = 1; + } + + *seqno = dev_priv->next_seqno++; + return 0; } int @@ -1963,7 +1984,6 @@ i915_add_request(struct intel_ring_buffer *ring, drm_i915_private_t *dev_priv = ring->dev->dev_private; struct drm_i915_gem_request *request; u32 request_ring_position; - u32 seqno; int was_empty; int ret; @@ -1982,7 +2002,6 @@ i915_add_request(struct intel_ring_buffer *ring, if (request == NULL) return -ENOMEM; - seqno = i915_gem_next_request_seqno(ring); /* Record the position of the start of the request so that * should we detect the updated seqno part-way through the @@ -1991,15 +2010,13 @@ i915_add_request(struct intel_ring_buffer *ring, */ request_ring_position = intel_ring_get_tail(ring); - ret = ring->add_request(ring, &seqno); + ret = ring->add_request(ring); if (ret) { kfree(request); return ret; } - trace_i915_gem_request_add(ring, seqno); - - request->seqno = seqno; + request->seqno = intel_ring_get_seqno(ring); request->ring = ring; request->tail = request_ring_position; request->emitted_jiffies = jiffies; @@ -2017,23 +2034,24 @@ i915_add_request(struct intel_ring_buffer *ring, spin_unlock(&file_priv->mm.lock); } + trace_i915_gem_request_add(ring, request->seqno); ring->outstanding_lazy_request = 0; if (!dev_priv->mm.suspended) { if (i915_enable_hangcheck) { mod_timer(&dev_priv->hangcheck_timer, - jiffies + - msecs_to_jiffies(DRM_I915_HANGCHECK_PERIOD)); + round_jiffies_up(jiffies + DRM_I915_HANGCHECK_JIFFIES)); } if (was_empty) { queue_delayed_work(dev_priv->wq, - &dev_priv->mm.retire_work, HZ); + &dev_priv->mm.retire_work, + round_jiffies_up_relative(HZ)); intel_mark_busy(dev_priv->dev); } } if (out_seqno) - *out_seqno = seqno; + *out_seqno = request->seqno; return 0; } @@ -2131,7 +2149,6 @@ void i915_gem_retire_requests_ring(struct intel_ring_buffer *ring) { uint32_t seqno; - int i; if (list_empty(&ring->request_list)) return; @@ -2140,10 +2157,6 @@ i915_gem_retire_requests_ring(struct intel_ring_buffer *ring) seqno = ring->get_seqno(ring, true); - for (i = 0; i < ARRAY_SIZE(ring->sync_seqno); i++) - if (seqno >= ring->sync_seqno[i]) - ring->sync_seqno[i] = 0; - while (!list_empty(&ring->request_list)) { struct drm_i915_gem_request *request; @@ -2218,7 +2231,8 @@ i915_gem_retire_work_handler(struct work_struct *work) /* Come back later if the device is busy... */ if (!mutex_trylock(&dev->struct_mutex)) { - queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, HZ); + queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, + round_jiffies_up_relative(HZ)); return; } @@ -2236,7 +2250,8 @@ i915_gem_retire_work_handler(struct work_struct *work) } if (!dev_priv->mm.suspended && !idle) - queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, HZ); + queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, + round_jiffies_up_relative(HZ)); if (idle) intel_mark_idle(dev); @@ -2386,7 +2401,11 @@ i915_gem_object_sync(struct drm_i915_gem_object *obj, ret = to->sync_to(to, from, seqno); if (!ret) - from->sync_seqno[idx] = seqno; + /* We use last_read_seqno because sync_to() + * might have just caused seqno wrap under + * the radar. + */ + from->sync_seqno[idx] = obj->last_read_seqno; return ret; } @@ -2469,14 +2488,6 @@ i915_gem_object_unbind(struct drm_i915_gem_object *obj) return 0; } -static int i915_ring_idle(struct intel_ring_buffer *ring) -{ - if (list_empty(&ring->active_list)) - return 0; - - return i915_wait_seqno(ring, i915_gem_next_request_seqno(ring)); -} - int i915_gpu_idle(struct drm_device *dev) { drm_i915_private_t *dev_priv = dev->dev_private; @@ -2489,7 +2500,7 @@ int i915_gpu_idle(struct drm_device *dev) if (ret) return ret; - ret = i915_ring_idle(ring); + ret = intel_ring_idle(ring); if (ret) return ret; } @@ -2923,13 +2934,14 @@ i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj, if (ret) return ret; + i915_gem_object_pin_pages(obj); + search_free: if (map_and_fenceable) - free_space = - drm_mm_search_free_in_range_color(&dev_priv->mm.gtt_space, - size, alignment, obj->cache_level, - 0, dev_priv->mm.gtt_mappable_end, - false); + free_space = drm_mm_search_free_in_range_color(&dev_priv->mm.gtt_space, + size, alignment, obj->cache_level, + 0, dev_priv->mm.gtt_mappable_end, + false); else free_space = drm_mm_search_free_color(&dev_priv->mm.gtt_space, size, alignment, obj->cache_level, @@ -2937,60 +2949,60 @@ i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj, if (free_space != NULL) { if (map_and_fenceable) - obj->gtt_space = + free_space = drm_mm_get_block_range_generic(free_space, size, alignment, obj->cache_level, 0, dev_priv->mm.gtt_mappable_end, false); else - obj->gtt_space = + free_space = drm_mm_get_block_generic(free_space, size, alignment, obj->cache_level, false); } - if (obj->gtt_space == NULL) { + if (free_space == NULL) { ret = i915_gem_evict_something(dev, size, alignment, obj->cache_level, map_and_fenceable, nonblocking); - if (ret) + if (ret) { + i915_gem_object_unpin_pages(obj); return ret; + } goto search_free; } if (WARN_ON(!i915_gem_valid_gtt_space(dev, - obj->gtt_space, + free_space, obj->cache_level))) { - drm_mm_put_block(obj->gtt_space); - obj->gtt_space = NULL; + i915_gem_object_unpin_pages(obj); + drm_mm_put_block(free_space); return -EINVAL; } - ret = i915_gem_gtt_prepare_object(obj); if (ret) { - drm_mm_put_block(obj->gtt_space); - obj->gtt_space = NULL; + i915_gem_object_unpin_pages(obj); + drm_mm_put_block(free_space); return ret; } - if (!dev_priv->mm.aliasing_ppgtt) - i915_gem_gtt_bind_object(obj, obj->cache_level); - list_move_tail(&obj->gtt_list, &dev_priv->mm.bound_list); list_add_tail(&obj->mm_list, &dev_priv->mm.inactive_list); - obj->gtt_offset = obj->gtt_space->start; + obj->gtt_space = free_space; + obj->gtt_offset = free_space->start; fenceable = - obj->gtt_space->size == fence_size && - (obj->gtt_space->start & (fence_alignment - 1)) == 0; + free_space->size == fence_size && + (free_space->start & (fence_alignment - 1)) == 0; mappable = obj->gtt_offset + obj->base.size <= dev_priv->mm.gtt_mappable_end; obj->map_and_fenceable = mappable && fenceable; + i915_gem_object_unpin_pages(obj); trace_i915_gem_object_bind(obj, map_and_fenceable); i915_gem_verify_gtt(dev); return 0; @@ -3059,7 +3071,7 @@ i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj) return; i915_gem_clflush_object(obj); - intel_gtt_chipset_flush(); + i915_gem_chipset_flush(obj->base.dev); old_write_domain = obj->base.write_domain; obj->base.write_domain = 0; @@ -3454,11 +3466,16 @@ i915_gem_object_pin(struct drm_i915_gem_object *obj, } if (obj->gtt_space == NULL) { + struct drm_i915_private *dev_priv = obj->base.dev->dev_private; + ret = i915_gem_object_bind_to_gtt(obj, alignment, map_and_fenceable, nonblocking); if (ret) return ret; + + if (!dev_priv->mm.aliasing_ppgtt) + i915_gem_gtt_bind_object(obj, obj->cache_level); } if (!obj->has_global_gtt_mapping && map_and_fenceable) @@ -3832,7 +3849,7 @@ void i915_gem_l3_remap(struct drm_device *dev) if (!IS_IVYBRIDGE(dev)) return; - if (!dev_priv->mm.l3_remap_info) + if (!dev_priv->l3_parity.remap_info) return; misccpctl = I915_READ(GEN7_MISCCPCTL); @@ -3841,12 +3858,12 @@ void i915_gem_l3_remap(struct drm_device *dev) for (i = 0; i < GEN7_L3LOG_SIZE; i += 4) { u32 remap = I915_READ(GEN7_L3LOG_BASE + i); - if (remap && remap != dev_priv->mm.l3_remap_info[i/4]) + if (remap && remap != dev_priv->l3_parity.remap_info[i/4]) DRM_DEBUG("0x%x was already programmed to %x\n", GEN7_L3LOG_BASE + i, remap); - if (remap && !dev_priv->mm.l3_remap_info[i/4]) + if (remap && !dev_priv->l3_parity.remap_info[i/4]) DRM_DEBUG_DRIVER("Clearing remapped register\n"); - I915_WRITE(GEN7_L3LOG_BASE + i, dev_priv->mm.l3_remap_info[i/4]); + I915_WRITE(GEN7_L3LOG_BASE + i, dev_priv->l3_parity.remap_info[i/4]); } /* Make sure all the writes land before disabling dop clock gating */ @@ -3876,68 +3893,6 @@ void i915_gem_init_swizzling(struct drm_device *dev) I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB)); } -void i915_gem_init_ppgtt(struct drm_device *dev) -{ - drm_i915_private_t *dev_priv = dev->dev_private; - uint32_t pd_offset; - struct intel_ring_buffer *ring; - struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt; - uint32_t __iomem *pd_addr; - uint32_t pd_entry; - int i; - - if (!dev_priv->mm.aliasing_ppgtt) - return; - - - pd_addr = dev_priv->mm.gtt->gtt + ppgtt->pd_offset/sizeof(uint32_t); - for (i = 0; i < ppgtt->num_pd_entries; i++) { - dma_addr_t pt_addr; - - if (dev_priv->mm.gtt->needs_dmar) - pt_addr = ppgtt->pt_dma_addr[i]; - else - pt_addr = page_to_phys(ppgtt->pt_pages[i]); - - pd_entry = GEN6_PDE_ADDR_ENCODE(pt_addr); - pd_entry |= GEN6_PDE_VALID; - - writel(pd_entry, pd_addr + i); - } - readl(pd_addr); - - pd_offset = ppgtt->pd_offset; - pd_offset /= 64; /* in cachelines, */ - pd_offset <<= 16; - - if (INTEL_INFO(dev)->gen == 6) { - uint32_t ecochk, gab_ctl, ecobits; - - ecobits = I915_READ(GAC_ECO_BITS); - I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_PPGTT_CACHE64B); - - gab_ctl = I915_READ(GAB_CTL); - I915_WRITE(GAB_CTL, gab_ctl | GAB_CTL_CONT_AFTER_PAGEFAULT); - - ecochk = I915_READ(GAM_ECOCHK); - I915_WRITE(GAM_ECOCHK, ecochk | ECOCHK_SNB_BIT | - ECOCHK_PPGTT_CACHE64B); - I915_WRITE(GFX_MODE, _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); - } else if (INTEL_INFO(dev)->gen >= 7) { - I915_WRITE(GAM_ECOCHK, ECOCHK_PPGTT_CACHE64B); - /* GFX_MODE is per-ring on gen7+ */ - } - - for_each_ring(ring, dev_priv, i) { - if (INTEL_INFO(dev)->gen >= 7) - I915_WRITE(RING_MODE_GEN7(ring), - _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); - - I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G); - I915_WRITE(RING_PP_DIR_BASE(ring), pd_offset); - } -} - static bool intel_enable_blt(struct drm_device *dev) { @@ -3960,7 +3915,7 @@ i915_gem_init_hw(struct drm_device *dev) drm_i915_private_t *dev_priv = dev->dev_private; int ret; - if (!intel_enable_gtt()) + if (INTEL_INFO(dev)->gen < 6 && !intel_enable_gtt()) return -EIO; if (IS_HASWELL(dev) && (I915_READ(0x120010) == 1)) @@ -4295,7 +4250,7 @@ void i915_gem_detach_phys_object(struct drm_device *dev, page_cache_release(page); } } - intel_gtt_chipset_flush(); + i915_gem_chipset_flush(dev); obj->phys_obj->cur_obj = NULL; obj->phys_obj = NULL; @@ -4382,7 +4337,7 @@ i915_gem_phys_pwrite(struct drm_device *dev, return -EFAULT; } - intel_gtt_chipset_flush(); + i915_gem_chipset_flush(dev); return 0; } @@ -4407,6 +4362,19 @@ void i915_gem_release(struct drm_device *dev, struct drm_file *file) spin_unlock(&file_priv->mm.lock); } +static bool mutex_is_locked_by(struct mutex *mutex, struct task_struct *task) +{ + if (!mutex_is_locked(mutex)) + return false; + +#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_MUTEXES) + return mutex->owner == task; +#else + /* Since UP may be pre-empted, we cannot assume that we own the lock */ + return false; +#endif +} + static int i915_gem_inactive_shrink(struct shrinker *shrinker, struct shrink_control *sc) { @@ -4417,10 +4385,15 @@ i915_gem_inactive_shrink(struct shrinker *shrinker, struct shrink_control *sc) struct drm_device *dev = dev_priv->dev; struct drm_i915_gem_object *obj; int nr_to_scan = sc->nr_to_scan; + bool unlock = true; int cnt; - if (!mutex_trylock(&dev->struct_mutex)) - return 0; + if (!mutex_trylock(&dev->struct_mutex)) { + if (!mutex_is_locked_by(&dev->struct_mutex, current)) + return 0; + + unlock = false; + } if (nr_to_scan) { nr_to_scan -= i915_gem_purge(dev_priv, nr_to_scan); @@ -4436,6 +4409,7 @@ i915_gem_inactive_shrink(struct shrinker *shrinker, struct shrink_control *sc) if (obj->pin_count == 0 && obj->pages_pin_count == 0) cnt += obj->base.size >> PAGE_SHIFT; - mutex_unlock(&dev->struct_mutex); + if (unlock) + mutex_unlock(&dev->struct_mutex); return cnt; } |