diff options
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gem.c')
-rw-r--r-- | drivers/gpu/drm/i915/i915_gem.c | 338 |
1 files changed, 245 insertions, 93 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 9e35dafc580..d9e2208cfe9 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -176,7 +176,7 @@ i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, pinned = 0; mutex_lock(&dev->struct_mutex); - list_for_each_entry(obj, &dev_priv->mm.bound_list, gtt_list) + list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) if (obj->pin_count) pinned += obj->gtt_space->size; mutex_unlock(&dev->struct_mutex); @@ -956,7 +956,7 @@ i915_gem_check_olr(struct intel_ring_buffer *ring, u32 seqno) ret = 0; if (seqno == ring->outstanding_lazy_request) - ret = i915_add_request(ring, NULL, NULL); + ret = i915_add_request(ring, NULL); return ret; } @@ -1087,6 +1087,25 @@ i915_wait_seqno(struct intel_ring_buffer *ring, uint32_t seqno) interruptible, NULL); } +static int +i915_gem_object_wait_rendering__tail(struct drm_i915_gem_object *obj, + struct intel_ring_buffer *ring) +{ + i915_gem_retire_requests_ring(ring); + + /* Manually manage the write flush as we may have not yet + * retired the buffer. + * + * Note that the last_write_seqno is always the earlier of + * the two (read/write) seqno, so if we haved successfully waited, + * we know we have passed the last write. + */ + obj->last_write_seqno = 0; + obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS; + + return 0; +} + /** * Ensures that all rendering to the object has completed and the object is * safe to unbind from the GTT or access from the CPU. @@ -1107,18 +1126,7 @@ i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj, if (ret) return ret; - i915_gem_retire_requests_ring(ring); - - /* Manually manage the write flush as we may have not yet - * retired the buffer. - */ - if (obj->last_write_seqno && - i915_seqno_passed(seqno, obj->last_write_seqno)) { - obj->last_write_seqno = 0; - obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS; - } - - return 0; + return i915_gem_object_wait_rendering__tail(obj, ring); } /* A nonblocking variant of the above wait. This is a highly dangerous routine @@ -1154,19 +1162,10 @@ i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj, mutex_unlock(&dev->struct_mutex); ret = __wait_seqno(ring, seqno, reset_counter, true, NULL); mutex_lock(&dev->struct_mutex); + if (ret) + return ret; - i915_gem_retire_requests_ring(ring); - - /* Manually manage the write flush as we may have not yet - * retired the buffer. - */ - if (obj->last_write_seqno && - i915_seqno_passed(seqno, obj->last_write_seqno)) { - obj->last_write_seqno = 0; - obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS; - } - - return ret; + return i915_gem_object_wait_rendering__tail(obj, ring); } /** @@ -1676,7 +1675,7 @@ i915_gem_object_put_pages(struct drm_i915_gem_object *obj) /* ->put_pages might need to allocate memory for the bit17 swizzle * array, hence protect them from being reaped by removing them from gtt * lists early. */ - list_del(&obj->gtt_list); + list_del(&obj->global_list); ops->put_pages(obj); obj->pages = NULL; @@ -1696,7 +1695,7 @@ __i915_gem_shrink(struct drm_i915_private *dev_priv, long target, list_for_each_entry_safe(obj, next, &dev_priv->mm.unbound_list, - gtt_list) { + global_list) { if ((i915_gem_object_is_purgeable(obj) || !purgeable_only) && i915_gem_object_put_pages(obj) == 0) { count += obj->base.size >> PAGE_SHIFT; @@ -1733,7 +1732,8 @@ i915_gem_shrink_all(struct drm_i915_private *dev_priv) i915_gem_evict_everything(dev_priv->dev); - list_for_each_entry_safe(obj, next, &dev_priv->mm.unbound_list, gtt_list) + list_for_each_entry_safe(obj, next, &dev_priv->mm.unbound_list, + global_list) i915_gem_object_put_pages(obj); } @@ -1867,7 +1867,7 @@ i915_gem_object_get_pages(struct drm_i915_gem_object *obj) if (ret) return ret; - list_add_tail(&obj->gtt_list, &dev_priv->mm.unbound_list); + list_add_tail(&obj->global_list, &dev_priv->mm.unbound_list); return 0; } @@ -1880,6 +1880,10 @@ i915_gem_object_move_to_active(struct drm_i915_gem_object *obj, u32 seqno = intel_ring_get_seqno(ring); BUG_ON(ring == NULL); + if (obj->ring != ring && obj->last_write_seqno) { + /* Keep the seqno relative to the current ring */ + obj->last_write_seqno = seqno; + } obj->ring = ring; /* Add a reference if we're newly entering the active list. */ @@ -2005,17 +2009,18 @@ i915_gem_get_seqno(struct drm_device *dev, u32 *seqno) return 0; } -int -i915_add_request(struct intel_ring_buffer *ring, - struct drm_file *file, - u32 *out_seqno) +int __i915_add_request(struct intel_ring_buffer *ring, + struct drm_file *file, + struct drm_i915_gem_object *obj, + u32 *out_seqno) { drm_i915_private_t *dev_priv = ring->dev->dev_private; struct drm_i915_gem_request *request; - u32 request_ring_position; + u32 request_ring_position, request_start; int was_empty; int ret; + request_start = intel_ring_get_tail(ring); /* * Emit any outstanding flushes - execbuf can fail to emit the flush * after having emitted the batchbuffer command. Hence we need to fix @@ -2047,7 +2052,21 @@ i915_add_request(struct intel_ring_buffer *ring, request->seqno = intel_ring_get_seqno(ring); request->ring = ring; + request->head = request_start; request->tail = request_ring_position; + request->ctx = ring->last_context; + request->batch_obj = obj; + + /* Whilst this request exists, batch_obj will be on the + * active_list, and so will hold the active reference. Only when this + * request is retired will the the batch_obj be moved onto the + * inactive_list and lose its active reference. Hence we do not need + * to explicitly hold another reference here. + */ + + if (request->ctx) + i915_gem_context_reference(request->ctx); + request->emitted_jiffies = jiffies; was_empty = list_empty(&ring->request_list); list_add_tail(&request->list, &ring->request_list); @@ -2100,9 +2119,114 @@ i915_gem_request_remove_from_client(struct drm_i915_gem_request *request) spin_unlock(&file_priv->mm.lock); } +static bool i915_head_inside_object(u32 acthd, struct drm_i915_gem_object *obj) +{ + if (acthd >= obj->gtt_offset && + acthd < obj->gtt_offset + obj->base.size) + return true; + + return false; +} + +static bool i915_head_inside_request(const u32 acthd_unmasked, + const u32 request_start, + const u32 request_end) +{ + const u32 acthd = acthd_unmasked & HEAD_ADDR; + + if (request_start < request_end) { + if (acthd >= request_start && acthd < request_end) + return true; + } else if (request_start > request_end) { + if (acthd >= request_start || acthd < request_end) + return true; + } + + return false; +} + +static bool i915_request_guilty(struct drm_i915_gem_request *request, + const u32 acthd, bool *inside) +{ + /* There is a possibility that unmasked head address + * pointing inside the ring, matches the batch_obj address range. + * However this is extremely unlikely. + */ + + if (request->batch_obj) { + if (i915_head_inside_object(acthd, request->batch_obj)) { + *inside = true; + return true; + } + } + + if (i915_head_inside_request(acthd, request->head, request->tail)) { + *inside = false; + return true; + } + + return false; +} + +static void i915_set_reset_status(struct intel_ring_buffer *ring, + struct drm_i915_gem_request *request, + u32 acthd) +{ + struct i915_ctx_hang_stats *hs = NULL; + bool inside, guilty; + + /* Innocent until proven guilty */ + guilty = false; + + if (ring->hangcheck.action != wait && + i915_request_guilty(request, acthd, &inside)) { + DRM_ERROR("%s hung %s bo (0x%x ctx %d) at 0x%x\n", + ring->name, + inside ? "inside" : "flushing", + request->batch_obj ? + request->batch_obj->gtt_offset : 0, + request->ctx ? request->ctx->id : 0, + acthd); + + guilty = true; + } + + /* If contexts are disabled or this is the default context, use + * file_priv->reset_state + */ + if (request->ctx && request->ctx->id != DEFAULT_CONTEXT_ID) + hs = &request->ctx->hang_stats; + else if (request->file_priv) + hs = &request->file_priv->hang_stats; + + if (hs) { + if (guilty) + hs->batch_active++; + else + hs->batch_pending++; + } +} + +static void i915_gem_free_request(struct drm_i915_gem_request *request) +{ + list_del(&request->list); + i915_gem_request_remove_from_client(request); + + if (request->ctx) + i915_gem_context_unreference(request->ctx); + + kfree(request); +} + static void i915_gem_reset_ring_lists(struct drm_i915_private *dev_priv, struct intel_ring_buffer *ring) { + u32 completed_seqno; + u32 acthd; + + acthd = intel_ring_get_active_head(ring); + completed_seqno = ring->get_seqno(ring, false); + while (!list_empty(&ring->request_list)) { struct drm_i915_gem_request *request; @@ -2110,9 +2234,10 @@ static void i915_gem_reset_ring_lists(struct drm_i915_private *dev_priv, struct drm_i915_gem_request, list); - list_del(&request->list); - i915_gem_request_remove_from_client(request); - kfree(request); + if (request->seqno > completed_seqno) + i915_set_reset_status(ring, request, acthd); + + i915_gem_free_request(request); } while (!list_empty(&ring->active_list)) { @@ -2133,7 +2258,17 @@ void i915_gem_restore_fences(struct drm_device *dev) for (i = 0; i < dev_priv->num_fence_regs; i++) { struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i]; - i915_gem_write_fence(dev, i, reg->obj); + + /* + * Commit delayed tiling changes if we have an object still + * attached to the fence, otherwise just clear the fence. + */ + if (reg->obj) { + i915_gem_object_update_fence(reg->obj, reg, + reg->obj->tiling_mode); + } else { + i915_gem_write_fence(dev, i, NULL); + } } } @@ -2193,9 +2328,7 @@ i915_gem_retire_requests_ring(struct intel_ring_buffer *ring) */ ring->last_retired_head = request->tail; - list_del(&request->list); - i915_gem_request_remove_from_client(request); - kfree(request); + i915_gem_free_request(request); } /* Move any buffers on the active list that are no longer referenced @@ -2262,7 +2395,7 @@ i915_gem_retire_work_handler(struct work_struct *work) idle = true; for_each_ring(ring, dev_priv, i) { if (ring->gpu_caches_dirty) - i915_add_request(ring, NULL, NULL); + i915_add_request(ring, NULL); idle &= list_empty(&ring->request_list); } @@ -2494,9 +2627,10 @@ i915_gem_object_unbind(struct drm_i915_gem_object *obj) obj->has_aliasing_ppgtt_mapping = 0; } i915_gem_gtt_finish_object(obj); + i915_gem_object_unpin_pages(obj); list_del(&obj->mm_list); - list_move_tail(&obj->gtt_list, &dev_priv->mm.unbound_list); + list_move_tail(&obj->global_list, &dev_priv->mm.unbound_list); /* Avoid an unnecessary call to unbind on rebind. */ obj->map_and_fenceable = true; @@ -2533,7 +2667,6 @@ static void i965_write_fence_reg(struct drm_device *dev, int reg, drm_i915_private_t *dev_priv = dev->dev_private; int fence_reg; int fence_pitch_shift; - uint64_t val; if (INTEL_INFO(dev)->gen >= 6) { fence_reg = FENCE_REG_SANDYBRIDGE_0; @@ -2543,8 +2676,23 @@ static void i965_write_fence_reg(struct drm_device *dev, int reg, fence_pitch_shift = I965_FENCE_PITCH_SHIFT; } + fence_reg += reg * 8; + + /* To w/a incoherency with non-atomic 64-bit register updates, + * we split the 64-bit update into two 32-bit writes. In order + * for a partial fence not to be evaluated between writes, we + * precede the update with write to turn off the fence register, + * and only enable the fence as the last step. + * + * For extra levels of paranoia, we make sure each step lands + * before applying the next step. + */ + I915_WRITE(fence_reg, 0); + POSTING_READ(fence_reg); + if (obj) { u32 size = obj->gtt_space->size; + uint64_t val; val = (uint64_t)((obj->gtt_offset + size - 4096) & 0xfffff000) << 32; @@ -2553,12 +2701,16 @@ static void i965_write_fence_reg(struct drm_device *dev, int reg, if (obj->tiling_mode == I915_TILING_Y) val |= 1 << I965_FENCE_TILING_Y_SHIFT; val |= I965_FENCE_REG_VALID; - } else - val = 0; - fence_reg += reg * 8; - I915_WRITE64(fence_reg, val); - POSTING_READ(fence_reg); + I915_WRITE(fence_reg + 4, val >> 32); + POSTING_READ(fence_reg + 4); + + I915_WRITE(fence_reg + 0, val); + POSTING_READ(fence_reg); + } else { + I915_WRITE(fence_reg + 4, 0); + POSTING_READ(fence_reg + 4); + } } static void i915_write_fence_reg(struct drm_device *dev, int reg, @@ -2653,6 +2805,10 @@ static void i915_gem_write_fence(struct drm_device *dev, int reg, if (i915_gem_object_needs_mb(dev_priv->fence_regs[reg].obj)) mb(); + WARN(obj && (!obj->stride || !obj->tiling_mode), + "bogus fence setup with stride: 0x%x, tiling mode: %i\n", + obj->stride, obj->tiling_mode); + switch (INTEL_INFO(dev)->gen) { case 7: case 6: @@ -2676,35 +2832,17 @@ static inline int fence_number(struct drm_i915_private *dev_priv, return fence - dev_priv->fence_regs; } -static void i915_gem_write_fence__ipi(void *data) -{ - wbinvd(); -} - static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj, struct drm_i915_fence_reg *fence, bool enable) { - struct drm_device *dev = obj->base.dev; - struct drm_i915_private *dev_priv = dev->dev_private; - int fence_reg = fence_number(dev_priv, fence); - - /* In order to fully serialize access to the fenced region and - * the update to the fence register we need to take extreme - * measures on SNB+. In theory, the write to the fence register - * flushes all memory transactions before, and coupled with the - * mb() placed around the register write we serialise all memory - * operations with respect to the changes in the tiler. Yet, on - * SNB+ we need to take a step further and emit an explicit wbinvd() - * on each processor in order to manually flush all memory - * transactions before updating the fence register. - */ - if (HAS_LLC(obj->base.dev)) - on_each_cpu(i915_gem_write_fence__ipi, NULL, 1); - i915_gem_write_fence(dev, fence_reg, enable ? obj : NULL); + struct drm_i915_private *dev_priv = obj->base.dev->dev_private; + int reg = fence_number(dev_priv, fence); + + i915_gem_write_fence(obj->base.dev, reg, enable ? obj : NULL); if (enable) { - obj->fence_reg = fence_reg; + obj->fence_reg = reg; fence->obj = obj; list_move_tail(&fence->lru_list, &dev_priv->mm.fence_list); } else { @@ -2712,6 +2850,7 @@ static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj, fence->obj = NULL; list_del_init(&fence->lru_list); } + obj->fence_dirty = false; } static int @@ -2841,7 +2980,6 @@ i915_gem_object_get_fence(struct drm_i915_gem_object *obj) return 0; i915_gem_object_update_fence(obj, reg, enable); - obj->fence_dirty = false; return 0; } @@ -2883,7 +3021,7 @@ static void i915_gem_verify_gtt(struct drm_device *dev) struct drm_i915_gem_object *obj; int err = 0; - list_for_each_entry(obj, &dev_priv->mm.gtt_list, gtt_list) { + list_for_each_entry(obj, &dev_priv->mm.gtt_list, global_list) { if (obj->gtt_space == NULL) { printk(KERN_ERR "object found on GTT list with no space reserved\n"); err++; @@ -2930,6 +3068,8 @@ i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj, struct drm_mm_node *node; u32 size, fence_size, fence_alignment, unfenced_alignment; bool mappable, fenceable; + size_t gtt_max = map_and_fenceable ? + dev_priv->gtt.mappable_end : dev_priv->gtt.total; int ret; fence_size = i915_gem_get_gtt_size(dev, @@ -2956,9 +3096,11 @@ i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj, /* If the object is bigger than the entire aperture, reject it early * before evicting everything in a vain attempt to find space. */ - if (obj->base.size > - (map_and_fenceable ? dev_priv->gtt.mappable_end : dev_priv->gtt.total)) { - DRM_ERROR("Attempting to bind an object larger than the aperture\n"); + if (obj->base.size > gtt_max) { + DRM_ERROR("Attempting to bind an object larger than the aperture: object=%zd > %s aperture=%zu\n", + obj->base.size, + map_and_fenceable ? "mappable" : "total", + gtt_max); return -E2BIG; } @@ -2974,14 +3116,10 @@ i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj, return -ENOMEM; } - search_free: - if (map_and_fenceable) - ret = drm_mm_insert_node_in_range_generic(&dev_priv->mm.gtt_space, node, - size, alignment, obj->cache_level, - 0, dev_priv->gtt.mappable_end); - else - ret = drm_mm_insert_node_generic(&dev_priv->mm.gtt_space, node, - size, alignment, obj->cache_level); +search_free: + ret = drm_mm_insert_node_in_range_generic(&dev_priv->mm.gtt_space, node, + size, alignment, + obj->cache_level, 0, gtt_max); if (ret) { ret = i915_gem_evict_something(dev, size, alignment, obj->cache_level, @@ -3007,7 +3145,7 @@ i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj, return ret; } - list_move_tail(&obj->gtt_list, &dev_priv->mm.bound_list); + list_move_tail(&obj->global_list, &dev_priv->mm.bound_list); list_add_tail(&obj->mm_list, &dev_priv->mm.inactive_list); obj->gtt_space = node; @@ -3022,7 +3160,6 @@ i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj, obj->map_and_fenceable = mappable && fenceable; - i915_gem_object_unpin_pages(obj); trace_i915_gem_object_bind(obj, map_and_fenceable); i915_gem_verify_gtt(dev); return 0; @@ -3722,7 +3859,7 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj, const struct drm_i915_gem_object_ops *ops) { INIT_LIST_HEAD(&obj->mm_list); - INIT_LIST_HEAD(&obj->gtt_list); + INIT_LIST_HEAD(&obj->global_list); INIT_LIST_HEAD(&obj->ring_list); INIT_LIST_HEAD(&obj->exec_list); @@ -3822,7 +3959,13 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj) dev_priv->mm.interruptible = was_interruptible; } - obj->pages_pin_count = 0; + /* Stolen objects don't hold a ref, but do hold pin count. Fix that up + * before progressing. */ + if (obj->stolen) + i915_gem_object_unpin_pages(obj); + + if (WARN_ON(obj->pages_pin_count)) + obj->pages_pin_count = 0; i915_gem_object_put_pages(obj); i915_gem_object_free_mmap_offset(obj); i915_gem_object_release_stolen(obj); @@ -3973,12 +4116,21 @@ static int i915_gem_init_rings(struct drm_device *dev) goto cleanup_bsd_ring; } + if (HAS_VEBOX(dev)) { + ret = intel_init_vebox_ring_buffer(dev); + if (ret) + goto cleanup_blt_ring; + } + + ret = i915_gem_set_seqno(dev, ((u32)~0 - 0x1000)); if (ret) - goto cleanup_blt_ring; + goto cleanup_vebox_ring; return 0; +cleanup_vebox_ring: + intel_cleanup_ring_buffer(&dev_priv->ring[VECS]); cleanup_blt_ring: intel_cleanup_ring_buffer(&dev_priv->ring[BCS]); cleanup_bsd_ring: @@ -4453,10 +4605,10 @@ i915_gem_inactive_shrink(struct shrinker *shrinker, struct shrink_control *sc) } cnt = 0; - list_for_each_entry(obj, &dev_priv->mm.unbound_list, gtt_list) + list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_list) if (obj->pages_pin_count == 0) cnt += obj->base.size >> PAGE_SHIFT; - list_for_each_entry(obj, &dev_priv->mm.inactive_list, gtt_list) + list_for_each_entry(obj, &dev_priv->mm.inactive_list, mm_list) if (obj->pin_count == 0 && obj->pages_pin_count == 0) cnt += obj->base.size >> PAGE_SHIFT; |