diff options
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gem.c')
-rw-r--r-- | drivers/gpu/drm/i915/i915_gem.c | 430 |
1 files changed, 208 insertions, 222 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index ec8a0d7ffa3..fba37e9f775 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -128,9 +128,7 @@ i915_gem_create_ioctl(struct drm_device *dev, void *data, return -ENOMEM; ret = drm_gem_handle_create(file_priv, obj, &handle); - mutex_lock(&dev->struct_mutex); - drm_gem_object_handle_unreference(obj); - mutex_unlock(&dev->struct_mutex); + drm_gem_object_handle_unreference_unlocked(obj); if (ret) return ret; @@ -488,7 +486,7 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data, */ if (args->offset > obj->size || args->size > obj->size || args->offset + args->size > obj->size) { - drm_gem_object_unreference(obj); + drm_gem_object_unreference_unlocked(obj); return -EINVAL; } @@ -501,7 +499,7 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data, file_priv); } - drm_gem_object_unreference(obj); + drm_gem_object_unreference_unlocked(obj); return ret; } @@ -961,7 +959,7 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, */ if (args->offset > obj->size || args->size > obj->size || args->offset + args->size > obj->size) { - drm_gem_object_unreference(obj); + drm_gem_object_unreference_unlocked(obj); return -EINVAL; } @@ -995,7 +993,7 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, DRM_INFO("pwrite failed %d\n", ret); #endif - drm_gem_object_unreference(obj); + drm_gem_object_unreference_unlocked(obj); return ret; } @@ -1138,9 +1136,7 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data, PROT_READ | PROT_WRITE, MAP_SHARED, args->offset); up_write(¤t->mm->mmap_sem); - mutex_lock(&dev->struct_mutex); - drm_gem_object_unreference(obj); - mutex_unlock(&dev->struct_mutex); + drm_gem_object_unreference_unlocked(obj); if (IS_ERR((void *)addr)) return addr; @@ -1562,6 +1558,38 @@ i915_gem_object_move_to_inactive(struct drm_gem_object *obj) i915_verify_inactive(dev, __FILE__, __LINE__); } +static void +i915_gem_process_flushing_list(struct drm_device *dev, + uint32_t flush_domains, uint32_t seqno) +{ + drm_i915_private_t *dev_priv = dev->dev_private; + struct drm_i915_gem_object *obj_priv, *next; + + list_for_each_entry_safe(obj_priv, next, + &dev_priv->mm.gpu_write_list, + gpu_write_list) { + struct drm_gem_object *obj = obj_priv->obj; + + if ((obj->write_domain & flush_domains) == + obj->write_domain) { + uint32_t old_write_domain = obj->write_domain; + + obj->write_domain = 0; + list_del_init(&obj_priv->gpu_write_list); + i915_gem_object_move_to_active(obj, seqno); + + /* update the fence lru list */ + if (obj_priv->fence_reg != I915_FENCE_REG_NONE) + list_move_tail(&obj_priv->fence_list, + &dev_priv->mm.fence_list); + + trace_i915_gem_object_change_domain(obj, + obj->read_domains, + old_write_domain); + } + } +} + /** * Creates a new sequence number, emitting a write of it to the status page * plus an interrupt, which will trigger i915_user_interrupt_handler. @@ -1620,29 +1648,8 @@ i915_add_request(struct drm_device *dev, struct drm_file *file_priv, /* Associate any objects on the flushing list matching the write * domain we're flushing with our flush. */ - if (flush_domains != 0) { - struct drm_i915_gem_object *obj_priv, *next; - - list_for_each_entry_safe(obj_priv, next, - &dev_priv->mm.gpu_write_list, - gpu_write_list) { - struct drm_gem_object *obj = obj_priv->obj; - - if ((obj->write_domain & flush_domains) == - obj->write_domain) { - uint32_t old_write_domain = obj->write_domain; - - obj->write_domain = 0; - list_del_init(&obj_priv->gpu_write_list); - i915_gem_object_move_to_active(obj, seqno); - - trace_i915_gem_object_change_domain(obj, - obj->read_domains, - old_write_domain); - } - } - - } + if (flush_domains != 0) + i915_gem_process_flushing_list(dev, flush_domains, seqno); if (!dev_priv->mm.suspended) { mod_timer(&dev_priv->hangcheck_timer, jiffies + DRM_I915_HANGCHECK_PERIOD); @@ -1822,7 +1829,7 @@ i915_do_wait_request(struct drm_device *dev, uint32_t seqno, int interruptible) return -EIO; if (!i915_seqno_passed(i915_get_gem_seqno(dev), seqno)) { - if (IS_IRONLAKE(dev)) + if (HAS_PCH_SPLIT(dev)) ier = I915_READ(DEIER) | I915_READ(GTIER); else ier = I915_READ(IER); @@ -1991,6 +1998,7 @@ int i915_gem_object_unbind(struct drm_gem_object *obj) { struct drm_device *dev = obj->dev; + drm_i915_private_t *dev_priv = dev->dev_private; struct drm_i915_gem_object *obj_priv = obj->driver_private; int ret = 0; @@ -2046,8 +2054,10 @@ i915_gem_object_unbind(struct drm_gem_object *obj) } /* Remove ourselves from the LRU list if present. */ + spin_lock(&dev_priv->mm.active_list_lock); if (!list_empty(&obj_priv->list)) list_del_init(&obj_priv->list); + spin_unlock(&dev_priv->mm.active_list_lock); if (i915_gem_object_is_purgeable(obj_priv)) i915_gem_object_truncate(obj); @@ -2085,11 +2095,34 @@ i915_gem_find_inactive_object(struct drm_device *dev, int min_size) } static int +i915_gpu_idle(struct drm_device *dev) +{ + drm_i915_private_t *dev_priv = dev->dev_private; + bool lists_empty; + uint32_t seqno; + + spin_lock(&dev_priv->mm.active_list_lock); + lists_empty = list_empty(&dev_priv->mm.flushing_list) && + list_empty(&dev_priv->mm.active_list); + spin_unlock(&dev_priv->mm.active_list_lock); + + if (lists_empty) + return 0; + + /* Flush everything onto the inactive list. */ + i915_gem_flush(dev, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS); + seqno = i915_add_request(dev, NULL, I915_GEM_GPU_DOMAINS); + if (seqno == 0) + return -ENOMEM; + + return i915_wait_request(dev, seqno); +} + +static int i915_gem_evict_everything(struct drm_device *dev) { drm_i915_private_t *dev_priv = dev->dev_private; int ret; - uint32_t seqno; bool lists_empty; spin_lock(&dev_priv->mm.active_list_lock); @@ -2102,12 +2135,7 @@ i915_gem_evict_everything(struct drm_device *dev) return -ENOSPC; /* Flush everything (on to the inactive lists) and evict */ - i915_gem_flush(dev, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS); - seqno = i915_add_request(dev, NULL, I915_GEM_GPU_DOMAINS); - if (seqno == 0) - return -ENOMEM; - - ret = i915_wait_request(dev, seqno); + ret = i915_gpu_idle(dev); if (ret) return ret; @@ -2265,6 +2293,28 @@ i915_gem_object_get_pages(struct drm_gem_object *obj, return 0; } +static void sandybridge_write_fence_reg(struct drm_i915_fence_reg *reg) +{ + struct drm_gem_object *obj = reg->obj; + struct drm_device *dev = obj->dev; + drm_i915_private_t *dev_priv = dev->dev_private; + struct drm_i915_gem_object *obj_priv = obj->driver_private; + int regnum = obj_priv->fence_reg; + uint64_t val; + + val = (uint64_t)((obj_priv->gtt_offset + obj->size - 4096) & + 0xfffff000) << 32; + val |= obj_priv->gtt_offset & 0xfffff000; + val |= (uint64_t)((obj_priv->stride / 128) - 1) << + SANDYBRIDGE_FENCE_PITCH_SHIFT; + + if (obj_priv->tiling_mode == I915_TILING_Y) + val |= 1 << I965_FENCE_TILING_Y_SHIFT; + val |= I965_FENCE_REG_VALID; + + I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 + (regnum * 8), val); +} + static void i965_write_fence_reg(struct drm_i915_fence_reg *reg) { struct drm_gem_object *obj = reg->obj; @@ -2361,6 +2411,58 @@ static void i830_write_fence_reg(struct drm_i915_fence_reg *reg) I915_WRITE(FENCE_REG_830_0 + (regnum * 4), val); } +static int i915_find_fence_reg(struct drm_device *dev) +{ + struct drm_i915_fence_reg *reg = NULL; + struct drm_i915_gem_object *obj_priv = NULL; + struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_gem_object *obj = NULL; + int i, avail, ret; + + /* First try to find a free reg */ + avail = 0; + for (i = dev_priv->fence_reg_start; i < dev_priv->num_fence_regs; i++) { + reg = &dev_priv->fence_regs[i]; + if (!reg->obj) + return i; + + obj_priv = reg->obj->driver_private; + if (!obj_priv->pin_count) + avail++; + } + + if (avail == 0) + return -ENOSPC; + + /* None available, try to steal one or wait for a user to finish */ + i = I915_FENCE_REG_NONE; + list_for_each_entry(obj_priv, &dev_priv->mm.fence_list, + fence_list) { + obj = obj_priv->obj; + + if (obj_priv->pin_count) + continue; + + /* found one! */ + i = obj_priv->fence_reg; + break; + } + + BUG_ON(i == I915_FENCE_REG_NONE); + + /* We only have a reference on obj from the active list. put_fence_reg + * might drop that one, causing a use-after-free in it. So hold a + * private reference to obj like the other callers of put_fence_reg + * (set_tiling ioctl) do. */ + drm_gem_object_reference(obj); + ret = i915_gem_object_put_fence_reg(obj); + drm_gem_object_unreference(obj); + if (ret != 0) + return ret; + + return i; +} + /** * i915_gem_object_get_fence_reg - set up a fence reg for an object * @obj: object to map through a fence reg @@ -2381,8 +2483,7 @@ i915_gem_object_get_fence_reg(struct drm_gem_object *obj) struct drm_i915_private *dev_priv = dev->dev_private; struct drm_i915_gem_object *obj_priv = obj->driver_private; struct drm_i915_fence_reg *reg = NULL; - struct drm_i915_gem_object *old_obj_priv = NULL; - int i, ret, avail; + int ret; /* Just update our place in the LRU if our fence is getting used. */ if (obj_priv->fence_reg != I915_FENCE_REG_NONE) { @@ -2410,86 +2511,27 @@ i915_gem_object_get_fence_reg(struct drm_gem_object *obj) break; } - /* First try to find a free reg */ - avail = 0; - for (i = dev_priv->fence_reg_start; i < dev_priv->num_fence_regs; i++) { - reg = &dev_priv->fence_regs[i]; - if (!reg->obj) - break; - - old_obj_priv = reg->obj->driver_private; - if (!old_obj_priv->pin_count) - avail++; - } - - /* None available, try to steal one or wait for a user to finish */ - if (i == dev_priv->num_fence_regs) { - struct drm_gem_object *old_obj = NULL; - - if (avail == 0) - return -ENOSPC; - - list_for_each_entry(old_obj_priv, &dev_priv->mm.fence_list, - fence_list) { - old_obj = old_obj_priv->obj; - - if (old_obj_priv->pin_count) - continue; - - /* Take a reference, as otherwise the wait_rendering - * below may cause the object to get freed out from - * under us. - */ - drm_gem_object_reference(old_obj); - - /* i915 uses fences for GPU access to tiled buffers */ - if (IS_I965G(dev) || !old_obj_priv->active) - break; - - /* This brings the object to the head of the LRU if it - * had been written to. The only way this should - * result in us waiting longer than the expected - * optimal amount of time is if there was a - * fence-using buffer later that was read-only. - */ - i915_gem_object_flush_gpu_write_domain(old_obj); - ret = i915_gem_object_wait_rendering(old_obj); - if (ret != 0) { - drm_gem_object_unreference(old_obj); - return ret; - } - - break; - } - - /* - * Zap this virtual mapping so we can set up a fence again - * for this object next time we need it. - */ - i915_gem_release_mmap(old_obj); - - i = old_obj_priv->fence_reg; - reg = &dev_priv->fence_regs[i]; - - old_obj_priv->fence_reg = I915_FENCE_REG_NONE; - list_del_init(&old_obj_priv->fence_list); - - drm_gem_object_unreference(old_obj); - } + ret = i915_find_fence_reg(dev); + if (ret < 0) + return ret; - obj_priv->fence_reg = i; + obj_priv->fence_reg = ret; + reg = &dev_priv->fence_regs[obj_priv->fence_reg]; list_add_tail(&obj_priv->fence_list, &dev_priv->mm.fence_list); reg->obj = obj; - if (IS_I965G(dev)) + if (IS_GEN6(dev)) + sandybridge_write_fence_reg(reg); + else if (IS_I965G(dev)) i965_write_fence_reg(reg); else if (IS_I9XX(dev)) i915_write_fence_reg(reg); else i830_write_fence_reg(reg); - trace_i915_gem_object_get_fence(obj, i, obj_priv->tiling_mode); + trace_i915_gem_object_get_fence(obj, obj_priv->fence_reg, + obj_priv->tiling_mode); return 0; } @@ -2508,9 +2550,12 @@ i915_gem_clear_fence_reg(struct drm_gem_object *obj) drm_i915_private_t *dev_priv = dev->dev_private; struct drm_i915_gem_object *obj_priv = obj->driver_private; - if (IS_I965G(dev)) + if (IS_GEN6(dev)) { + I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 + + (obj_priv->fence_reg * 8), 0); + } else if (IS_I965G(dev)) { I915_WRITE64(FENCE_REG_965_0 + (obj_priv->fence_reg * 8), 0); - else { + } else { uint32_t fence_reg; if (obj_priv->fence_reg < 8) @@ -2544,6 +2589,12 @@ i915_gem_object_put_fence_reg(struct drm_gem_object *obj) if (obj_priv->fence_reg == I915_FENCE_REG_NONE) return 0; + /* If we've changed tiling, GTT-mappings of the object + * need to re-fault to ensure that the correct fence register + * setup is in place. + */ + i915_gem_release_mmap(obj); + /* On the i915, GPU access to tiled buffers is via a fence, * therefore we must wait for any outstanding access to complete * before clearing the fence. @@ -2552,12 +2603,12 @@ i915_gem_object_put_fence_reg(struct drm_gem_object *obj) int ret; i915_gem_object_flush_gpu_write_domain(obj); - i915_gem_object_flush_gtt_write_domain(obj); ret = i915_gem_object_wait_rendering(obj); if (ret != 0) return ret; } + i915_gem_object_flush_gtt_write_domain(obj); i915_gem_clear_fence_reg (obj); return 0; @@ -2697,7 +2748,6 @@ static void i915_gem_object_flush_gpu_write_domain(struct drm_gem_object *obj) { struct drm_device *dev = obj->dev; - uint32_t seqno; uint32_t old_write_domain; if ((obj->write_domain & I915_GEM_GPU_DOMAINS) == 0) @@ -2706,9 +2756,8 @@ i915_gem_object_flush_gpu_write_domain(struct drm_gem_object *obj) /* Queue the GPU write cache flushing we need. */ old_write_domain = obj->write_domain; i915_gem_flush(dev, 0, obj->write_domain); - seqno = i915_add_request(dev, NULL, obj->write_domain); + (void) i915_add_request(dev, NULL, obj->write_domain); BUG_ON(obj->write_domain); - i915_gem_object_move_to_active(obj, seqno); trace_i915_gem_object_change_domain(obj, obj->read_domains, @@ -3247,7 +3296,8 @@ i915_gem_object_pin_and_relocate(struct drm_gem_object *obj, obj_priv->tiling_mode != I915_TILING_NONE; /* Check fence reg constraints and rebind if necessary */ - if (need_fence && !i915_obj_fenceable(dev, obj)) + if (need_fence && !i915_gem_object_fence_offset_ok(obj, + obj_priv->tiling_mode)) i915_gem_object_unbind(obj); /* Choose the GTT offset for our buffer and put it there. */ @@ -3317,6 +3367,16 @@ i915_gem_object_pin_and_relocate(struct drm_gem_object *obj, } /* Validate that the target is in a valid r/w GPU domain */ + if (reloc->write_domain & (reloc->write_domain - 1)) { + DRM_ERROR("reloc with multiple write domains: " + "obj %p target %d offset %d " + "read %08x write %08x", + obj, reloc->target_handle, + (int) reloc->offset, + reloc->read_domains, + reloc->write_domain); + return -EINVAL; + } if (reloc->write_domain & I915_GEM_DOMAIN_CPU || reloc->read_domains & I915_GEM_DOMAIN_CPU) { DRM_ERROR("reloc with read/write CPU domains: " @@ -4445,8 +4505,7 @@ int i915_gem_idle(struct drm_device *dev) { drm_i915_private_t *dev_priv = dev->dev_private; - uint32_t seqno, cur_seqno, last_seqno; - int stuck, ret; + int ret; mutex_lock(&dev->struct_mutex); @@ -4455,115 +4514,36 @@ i915_gem_idle(struct drm_device *dev) return 0; } - /* Hack! Don't let anybody do execbuf while we don't control the chip. - * We need to replace this with a semaphore, or something. - */ - dev_priv->mm.suspended = 1; - del_timer(&dev_priv->hangcheck_timer); - - /* Cancel the retire work handler, wait for it to finish if running - */ - mutex_unlock(&dev->struct_mutex); - cancel_delayed_work_sync(&dev_priv->mm.retire_work); - mutex_lock(&dev->struct_mutex); - - i915_kernel_lost_context(dev); - - /* Flush the GPU along with all non-CPU write domains - */ - i915_gem_flush(dev, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS); - seqno = i915_add_request(dev, NULL, I915_GEM_GPU_DOMAINS); - - if (seqno == 0) { + ret = i915_gpu_idle(dev); + if (ret) { mutex_unlock(&dev->struct_mutex); - return -ENOMEM; + return ret; } - dev_priv->mm.waiting_gem_seqno = seqno; - last_seqno = 0; - stuck = 0; - for (;;) { - cur_seqno = i915_get_gem_seqno(dev); - if (i915_seqno_passed(cur_seqno, seqno)) - break; - if (last_seqno == cur_seqno) { - if (stuck++ > 100) { - DRM_ERROR("hardware wedged\n"); - atomic_set(&dev_priv->mm.wedged, 1); - DRM_WAKEUP(&dev_priv->irq_queue); - break; - } + /* Under UMS, be paranoid and evict. */ + if (!drm_core_check_feature(dev, DRIVER_MODESET)) { + ret = i915_gem_evict_from_inactive_list(dev); + if (ret) { + mutex_unlock(&dev->struct_mutex); + return ret; } - msleep(10); - last_seqno = cur_seqno; - } - dev_priv->mm.waiting_gem_seqno = 0; - - i915_gem_retire_requests(dev); - - spin_lock(&dev_priv->mm.active_list_lock); - if (!atomic_read(&dev_priv->mm.wedged)) { - /* Active and flushing should now be empty as we've - * waited for a sequence higher than any pending execbuffer - */ - WARN_ON(!list_empty(&dev_priv->mm.active_list)); - WARN_ON(!list_empty(&dev_priv->mm.flushing_list)); - /* Request should now be empty as we've also waited - * for the last request in the list - */ - WARN_ON(!list_empty(&dev_priv->mm.request_list)); } - /* Empty the active and flushing lists to inactive. If there's - * anything left at this point, it means that we're wedged and - * nothing good's going to happen by leaving them there. So strip - * the GPU domains and just stuff them onto inactive. + /* Hack! Don't let anybody do execbuf while we don't control the chip. + * We need to replace this with a semaphore, or something. + * And not confound mm.suspended! */ - while (!list_empty(&dev_priv->mm.active_list)) { - struct drm_gem_object *obj; - uint32_t old_write_domain; - - obj = list_first_entry(&dev_priv->mm.active_list, - struct drm_i915_gem_object, - list)->obj; - old_write_domain = obj->write_domain; - obj->write_domain &= ~I915_GEM_GPU_DOMAINS; - i915_gem_object_move_to_inactive(obj); - - trace_i915_gem_object_change_domain(obj, - obj->read_domains, - old_write_domain); - } - spin_unlock(&dev_priv->mm.active_list_lock); - - while (!list_empty(&dev_priv->mm.flushing_list)) { - struct drm_gem_object *obj; - uint32_t old_write_domain; - - obj = list_first_entry(&dev_priv->mm.flushing_list, - struct drm_i915_gem_object, - list)->obj; - old_write_domain = obj->write_domain; - obj->write_domain &= ~I915_GEM_GPU_DOMAINS; - i915_gem_object_move_to_inactive(obj); - - trace_i915_gem_object_change_domain(obj, - obj->read_domains, - old_write_domain); - } - - - /* Move all inactive buffers out of the GTT. */ - ret = i915_gem_evict_from_inactive_list(dev); - WARN_ON(!list_empty(&dev_priv->mm.inactive_list)); - if (ret) { - mutex_unlock(&dev->struct_mutex); - return ret; - } + dev_priv->mm.suspended = 1; + del_timer(&dev_priv->hangcheck_timer); + i915_kernel_lost_context(dev); i915_gem_cleanup_ringbuffer(dev); + mutex_unlock(&dev->struct_mutex); + /* Cancel the retire work handler, which should be idle now. */ + cancel_delayed_work_sync(&dev_priv->mm.retire_work); + return 0; } @@ -4607,8 +4587,13 @@ i915_gem_init_hws(struct drm_device *dev) } dev_priv->hws_obj = obj; memset(dev_priv->hw_status_page, 0, PAGE_SIZE); - I915_WRITE(HWS_PGA, dev_priv->status_gfx_addr); - I915_READ(HWS_PGA); /* posting read */ + if (IS_GEN6(dev)) { + I915_WRITE(HWS_PGA_GEN6, dev_priv->status_gfx_addr); + I915_READ(HWS_PGA_GEN6); /* posting read */ + } else { + I915_WRITE(HWS_PGA, dev_priv->status_gfx_addr); + I915_READ(HWS_PGA); /* posting read */ + } DRM_DEBUG_DRIVER("hws offset: 0x%08x\n", dev_priv->status_gfx_addr); return 0; @@ -4850,7 +4835,8 @@ i915_gem_load(struct drm_device *dev) spin_unlock(&shrink_list_lock); /* Old X drivers will take 0-2 for front, back, depth buffers */ - dev_priv->fence_reg_start = 3; + if (!drm_core_check_feature(dev, DRIVER_MODESET)) + dev_priv->fence_reg_start = 3; if (IS_I965G(dev) || IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev)) dev_priv->num_fence_regs = 16; |