From cecc21fea98cd18419e26a46e4e8123f55fead91 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 5 Oct 2012 17:02:56 +0100 Subject: drm/i915: Align the hangcheck wakeup to the nearest second round_jiffies() aligns the wakeup time to the nearest second in order to batch wakeups and reduce system load, which is useful for unimportant coarse timers like our hangcheck. v2: round_jiffies_relative() returns the relative jiffie value, whereas we need the absolute value for the timer. Suggested-by: Arjan van de Ven Signed-off-by: Chris Wilson Cc: Arjan van de Ven Reviewed-by: Jani Nikula Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 2f76905ab2b..ca3ab04c537 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2015,8 +2015,7 @@ i915_add_request(struct intel_ring_buffer *ring, if (!dev_priv->mm.suspended) { if (i915_enable_hangcheck) { mod_timer(&dev_priv->hangcheck_timer, - jiffies + - msecs_to_jiffies(DRM_I915_HANGCHECK_PERIOD)); + round_jiffies_up(jiffies + DRM_I915_HANGCHECK_JIFFIES)); } if (was_empty) { queue_delayed_work(dev_priv->wq, -- cgit v1.2.3-70-g09d2 From bcb450861653167c4fec10be3d916299c34bb1d8 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 5 Oct 2012 17:02:57 +0100 Subject: drm/i915: Align the retire_requests worker to the nearest second By using round_jiffies() we can align the wakeup of our worker to the nearest second in order to batch wakeups and reduce system load, which is useful for unimportant coarse tasks like our retire_requests. v2: round_jiffies_relative() already returns the relative timeout value, so no need to incorrectly perform the subtraction twice. The timer interface still leaves the possibility for the value of jiffies to change be we program the timer. Suggested-by: Arjan van de Ven Signed-off-by: Chris Wilson Cc: Arjan van de Ven Reviewed-by: Jani Nikula Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index ca3ab04c537..eb3316bb4c3 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2019,7 +2019,8 @@ i915_add_request(struct intel_ring_buffer *ring, } if (was_empty) { queue_delayed_work(dev_priv->wq, - &dev_priv->mm.retire_work, HZ); + &dev_priv->mm.retire_work, + round_jiffies_up_relative(HZ)); intel_mark_busy(dev_priv->dev); } } @@ -2208,7 +2209,8 @@ i915_gem_retire_work_handler(struct work_struct *work) /* Come back later if the device is busy... */ if (!mutex_trylock(&dev->struct_mutex)) { - queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, HZ); + queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, + round_jiffies_up_relative(HZ)); return; } @@ -2226,7 +2228,8 @@ i915_gem_retire_work_handler(struct work_struct *work) } if (!dev_priv->mm.suspended && !idle) - queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, HZ); + queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, + round_jiffies_up_relative(HZ)); if (idle) intel_mark_idle(dev); -- cgit v1.2.3-70-g09d2 From a4da4fa4e55a8cea7fe603e7564e72db97b77a89 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Fri, 2 Nov 2012 19:55:07 +0100 Subject: drm/i915: extract l3_parity substruct from dev_priv Pretty astonishing how far apart these two members landed ... Especially since I've already removed almost 200 lines in between. Reviewed-by: Jesse Barnes Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 10 +++++++--- drivers/gpu/drm/i915/i915_gem.c | 8 ++++---- drivers/gpu/drm/i915/i915_irq.c | 6 +++--- drivers/gpu/drm/i915/i915_sysfs.c | 6 +++--- 4 files changed, 17 insertions(+), 13 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index ad3c4c134e2..135b9db5527 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -599,6 +599,11 @@ struct i915_dri1_state { uint32_t counter; }; +struct intel_l3_parity { + u32 *remap_info; + struct work_struct error_work; +}; + typedef struct drm_i915_private { struct drm_device *dev; @@ -760,8 +765,6 @@ typedef struct drm_i915_private { /** PPGTT used for aliasing the PPGTT with the GTT */ struct i915_hw_ppgtt *aliasing_ppgtt; - u32 *l3_remap_info; - struct shrinker inactive_shrinker; /** @@ -865,6 +868,8 @@ typedef struct drm_i915_private { bool mchbar_need_disable; + struct intel_l3_parity l3_parity; + /* gen6+ rps state */ struct intel_gen6_power_mgmt rps; @@ -887,7 +892,6 @@ typedef struct drm_i915_private { struct drm_property *broadcast_rgb_property; struct drm_property *force_audio_property; - struct work_struct parity_error_work; bool hw_contexts_disabled; uint32_t hw_context_size; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 7dd103447b4..d8eaebfea93 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3831,7 +3831,7 @@ void i915_gem_l3_remap(struct drm_device *dev) if (!IS_IVYBRIDGE(dev)) return; - if (!dev_priv->mm.l3_remap_info) + if (!dev_priv->l3_parity.remap_info) return; misccpctl = I915_READ(GEN7_MISCCPCTL); @@ -3840,12 +3840,12 @@ void i915_gem_l3_remap(struct drm_device *dev) for (i = 0; i < GEN7_L3LOG_SIZE; i += 4) { u32 remap = I915_READ(GEN7_L3LOG_BASE + i); - if (remap && remap != dev_priv->mm.l3_remap_info[i/4]) + if (remap && remap != dev_priv->l3_parity.remap_info[i/4]) DRM_DEBUG("0x%x was already programmed to %x\n", GEN7_L3LOG_BASE + i, remap); - if (remap && !dev_priv->mm.l3_remap_info[i/4]) + if (remap && !dev_priv->l3_parity.remap_info[i/4]) DRM_DEBUG_DRIVER("Clearing remapped register\n"); - I915_WRITE(GEN7_L3LOG_BASE + i, dev_priv->mm.l3_remap_info[i/4]); + I915_WRITE(GEN7_L3LOG_BASE + i, dev_priv->l3_parity.remap_info[i/4]); } /* Make sure all the writes land before disabling dop clock gating */ diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index b92e6bfbb97..2c40127573e 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -409,7 +409,7 @@ static void gen6_pm_rps_work(struct work_struct *work) static void ivybridge_parity_work(struct work_struct *work) { drm_i915_private_t *dev_priv = container_of(work, drm_i915_private_t, - parity_error_work); + l3_parity.error_work); u32 error_status, row, bank, subbank; char *parity_event[5]; uint32_t misccpctl; @@ -473,7 +473,7 @@ static void ivybridge_handle_parity_error(struct drm_device *dev) I915_WRITE(GTIMR, dev_priv->gt_irq_mask); spin_unlock_irqrestore(&dev_priv->irq_lock, flags); - queue_work(dev_priv->wq, &dev_priv->parity_error_work); + queue_work(dev_priv->wq, &dev_priv->l3_parity.error_work); } static void snb_gt_irq_handler(struct drm_device *dev, @@ -2681,7 +2681,7 @@ void intel_irq_init(struct drm_device *dev) INIT_WORK(&dev_priv->hotplug_work, i915_hotplug_work_func); INIT_WORK(&dev_priv->error_work, i915_error_work_func); INIT_WORK(&dev_priv->rps.work, gen6_pm_rps_work); - INIT_WORK(&dev_priv->parity_error_work, ivybridge_parity_work); + INIT_WORK(&dev_priv->l3_parity.error_work, ivybridge_parity_work); dev->driver->get_vblank_counter = i915_get_vblank_counter; dev->max_vblank_count = 0xffffff; /* only 24 bits of frame count */ diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c index 903eebd2117..9700ae96ec1 100644 --- a/drivers/gpu/drm/i915/i915_sysfs.c +++ b/drivers/gpu/drm/i915/i915_sysfs.c @@ -162,7 +162,7 @@ i915_l3_write(struct file *filp, struct kobject *kobj, if (ret) return ret; - if (!dev_priv->mm.l3_remap_info) { + if (!dev_priv->l3_parity.remap_info) { temp = kzalloc(GEN7_L3LOG_SIZE, GFP_KERNEL); if (!temp) { mutex_unlock(&drm_dev->struct_mutex); @@ -182,9 +182,9 @@ i915_l3_write(struct file *filp, struct kobject *kobj, * at this point it is left as a TODO. */ if (temp) - dev_priv->mm.l3_remap_info = temp; + dev_priv->l3_parity.remap_info = temp; - memcpy(dev_priv->mm.l3_remap_info + (offset/4), + memcpy(dev_priv->l3_parity.remap_info + (offset/4), buf + (offset/4), count); -- cgit v1.2.3-70-g09d2 From e76e9aebcdbfebae8f4cd147e3c0f800d36e97f3 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Sun, 4 Nov 2012 09:21:27 -0800 Subject: drm/i915: Stop using AGP layer for GEN6+ As a quick hack we make the old intel_gtt structure mutable so we can fool a bunch of the existing code which depends on elements in that data structure. We can/should try to remove this in a subsequent patch. This should preserve the old gtt init behavior which upon writing these patches seems incorrect. The next patch will fix these things. The one exception is VLV which doesn't have the preserved flush control write behavior. Since we want to do that for all GEN6+ stuff, we'll handle that in a later patch. Mainstream VLV support doesn't actually exist yet anyway. v2: Update the comment to remove the "voodoo" Check that the last pte written matches what we readback v3: actually kill cache_level_to_agp_type since most of the flags will disappear in an upcoming patch v4: v3 was actually not what we wanted (Daniel) Make the ggtt bind assertions better and stricter (Chris) Fix some uncaught errors at gtt init (Chris) Some other random stuff that Chris wanted v5: check for i==0 in gen6_ggtt_bind_object to shut up gcc (Ben) Signed-off-by: Ben Widawsky Reviewed-by [v4]: Chris Wilson [danvet: Make the cache_level -> agp_flags conversion for pre-gen6 a tad more robust by mapping everything != CACHE_NONE to the cached agp flag - we have a 1:1 uncached mapping, but different modes of cacheable (at least on later generations). Suggested by Chris Wilson.] Signed-off-by: Daniel Vetter --- drivers/char/agp/intel-gtt.c | 2 +- drivers/gpu/drm/i915/i915_dma.c | 16 +- drivers/gpu/drm/i915/i915_drv.h | 10 +- drivers/gpu/drm/i915/i915_gem.c | 12 +- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 2 +- drivers/gpu/drm/i915/i915_gem_gtt.c | 257 +++++++++++++++++++++++++---- drivers/gpu/drm/i915/i915_reg.h | 6 + include/drm/intel-gtt.h | 3 +- 8 files changed, 257 insertions(+), 51 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/char/agp/intel-gtt.c b/drivers/char/agp/intel-gtt.c index 38390f7c6ab..4dfbb80f0fd 100644 --- a/drivers/char/agp/intel-gtt.c +++ b/drivers/char/agp/intel-gtt.c @@ -1686,7 +1686,7 @@ int intel_gmch_probe(struct pci_dev *bridge_pdev, struct pci_dev *gpu_pdev, } EXPORT_SYMBOL(intel_gmch_probe); -const struct intel_gtt *intel_gtt_get(void) +struct intel_gtt *intel_gtt_get(void) { return &intel_private.base; } diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index ff06e3239ad..1eea5be4361 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -1496,19 +1496,9 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags) goto free_priv; } - ret = intel_gmch_probe(dev_priv->bridge_dev, dev->pdev, NULL); - if (!ret) { - DRM_ERROR("failed to set up gmch\n"); - ret = -EIO; + ret = i915_gem_gtt_init(dev); + if (ret) goto put_bridge; - } - - dev_priv->mm.gtt = intel_gtt_get(); - if (!dev_priv->mm.gtt) { - DRM_ERROR("Failed to initialize GTT\n"); - ret = -ENODEV; - goto put_gmch; - } i915_kick_out_firmware_fb(dev_priv); @@ -1683,7 +1673,7 @@ out_mtrrfree: out_rmmap: pci_iounmap(dev->pdev, dev_priv->regs); put_gmch: - intel_gmch_remove(); + i915_gem_gtt_fini(dev); put_bridge: pci_dev_put(dev_priv->bridge_dev); free_priv: diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index c4339c2b1b5..f316916fe65 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -746,7 +746,7 @@ typedef struct drm_i915_private { struct { /** Bridge to intel-gtt-ko */ - const struct intel_gtt *gtt; + struct intel_gtt *gtt; /** Memory allocator for GTT stolen memory */ struct drm_mm stolen; /** Memory allocator for GTT */ @@ -1538,6 +1538,14 @@ void i915_gem_init_global_gtt(struct drm_device *dev, unsigned long start, unsigned long mappable_end, unsigned long end); +int i915_gem_gtt_init(struct drm_device *dev); +void i915_gem_gtt_fini(struct drm_device *dev); +extern inline void i915_gem_chipset_flush(struct drm_device *dev) +{ + if (INTEL_INFO(dev)->gen < 6) + intel_gtt_chipset_flush(); +} + /* i915_gem_evict.c */ int __must_check i915_gem_evict_something(struct drm_device *dev, int min_size, diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index d8eaebfea93..c161fdbd830 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -845,12 +845,12 @@ out: * domain anymore. */ if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) { i915_gem_clflush_object(obj); - intel_gtt_chipset_flush(); + i915_gem_chipset_flush(dev); } } if (needs_clflush_after) - intel_gtt_chipset_flush(); + i915_gem_chipset_flush(dev); return ret; } @@ -3058,7 +3058,7 @@ i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj) return; i915_gem_clflush_object(obj); - intel_gtt_chipset_flush(); + i915_gem_chipset_flush(obj->base.dev); old_write_domain = obj->base.write_domain; obj->base.write_domain = 0; @@ -3959,7 +3959,7 @@ i915_gem_init_hw(struct drm_device *dev) drm_i915_private_t *dev_priv = dev->dev_private; int ret; - if (!intel_enable_gtt()) + if (INTEL_INFO(dev)->gen < 6 && !intel_enable_gtt()) return -EIO; if (IS_HASWELL(dev) && (I915_READ(0x120010) == 1)) @@ -4294,7 +4294,7 @@ void i915_gem_detach_phys_object(struct drm_device *dev, page_cache_release(page); } } - intel_gtt_chipset_flush(); + i915_gem_chipset_flush(dev); obj->phys_obj->cur_obj = NULL; obj->phys_obj = NULL; @@ -4381,7 +4381,7 @@ i915_gem_phys_pwrite(struct drm_device *dev, return -EFAULT; } - intel_gtt_chipset_flush(); + i915_gem_chipset_flush(dev); return 0; } diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 91d43d5c452..d80e9dd00c4 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -672,7 +672,7 @@ i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring, } if (flush_domains & I915_GEM_DOMAIN_CPU) - intel_gtt_chipset_flush(); + i915_gem_chipset_flush(ring->dev); if (flush_domains & I915_GEM_DOMAIN_GTT) wmb(); diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 06202fd6dbd..e74be0c2c6a 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -262,26 +262,6 @@ void i915_ppgtt_unbind_object(struct i915_hw_ppgtt *ppgtt, obj->base.size >> PAGE_SHIFT); } -/* XXX kill agp_type! */ -static unsigned int cache_level_to_agp_type(struct drm_device *dev, - enum i915_cache_level cache_level) -{ - switch (cache_level) { - case I915_CACHE_LLC_MLC: - /* Older chipsets do not have this extra level of CPU - * cacheing, so fallthrough and request the PTE simply - * as cached. - */ - if (INTEL_INFO(dev)->gen >= 6 && !IS_HASWELL(dev)) - return AGP_USER_CACHED_MEMORY_LLC_MLC; - case I915_CACHE_LLC: - return AGP_USER_CACHED_MEMORY; - default: - case I915_CACHE_NONE: - return AGP_USER_MEMORY; - } -} - static bool do_idling(struct drm_i915_private *dev_priv) { bool ret = dev_priv->mm.interruptible; @@ -304,13 +284,38 @@ static void undo_idling(struct drm_i915_private *dev_priv, bool interruptible) dev_priv->mm.interruptible = interruptible; } + +static void i915_ggtt_clear_range(struct drm_device *dev, + unsigned first_entry, + unsigned num_entries) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + gtt_pte_t scratch_pte; + volatile void __iomem *gtt_base = dev_priv->mm.gtt->gtt + first_entry; + const int max_entries = dev_priv->mm.gtt->gtt_total_entries - first_entry; + + if (INTEL_INFO(dev)->gen < 6) { + intel_gtt_clear_range(first_entry, num_entries); + return; + } + + if (WARN(num_entries > max_entries, + "First entry = %d; Num entries = %d (max=%d)\n", + first_entry, num_entries, max_entries)) + num_entries = max_entries; + + scratch_pte = pte_encode(dev, dev_priv->mm.gtt->scratch_page_dma, I915_CACHE_LLC); + memset_io(gtt_base, scratch_pte, num_entries * sizeof(scratch_pte)); + readl(gtt_base); +} + void i915_gem_restore_gtt_mappings(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; struct drm_i915_gem_object *obj; /* First fill our portion of the GTT with scratch pages */ - intel_gtt_clear_range(dev_priv->mm.gtt_start / PAGE_SIZE, + i915_ggtt_clear_range(dev, dev_priv->mm.gtt_start / PAGE_SIZE, (dev_priv->mm.gtt_end - dev_priv->mm.gtt_start) / PAGE_SIZE); list_for_each_entry(obj, &dev_priv->mm.bound_list, gtt_list) { @@ -318,7 +323,7 @@ void i915_gem_restore_gtt_mappings(struct drm_device *dev) i915_gem_gtt_bind_object(obj, obj->cache_level); } - intel_gtt_chipset_flush(); + i915_gem_chipset_flush(dev); } int i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj) @@ -334,21 +339,69 @@ int i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj) return 0; } +/* + * Binds an object into the global gtt with the specified cache level. The object + * will be accessible to the GPU via commands whose operands reference offsets + * within the global GTT as well as accessible by the GPU through the GMADR + * mapped BAR (dev_priv->mm.gtt->gtt). + */ +static void gen6_ggtt_bind_object(struct drm_i915_gem_object *obj, + enum i915_cache_level level) +{ + struct drm_device *dev = obj->base.dev; + struct drm_i915_private *dev_priv = dev->dev_private; + struct sg_table *st = obj->pages; + struct scatterlist *sg = st->sgl; + const int first_entry = obj->gtt_space->start >> PAGE_SHIFT; + const int max_entries = dev_priv->mm.gtt->gtt_total_entries - first_entry; + gtt_pte_t __iomem *gtt_entries = dev_priv->mm.gtt->gtt + first_entry; + int unused, i = 0; + unsigned int len, m = 0; + dma_addr_t addr; + + for_each_sg(st->sgl, sg, st->nents, unused) { + len = sg_dma_len(sg) >> PAGE_SHIFT; + for (m = 0; m < len; m++) { + addr = sg_dma_address(sg) + (m << PAGE_SHIFT); + gtt_entries[i] = pte_encode(dev, addr, level); + i++; + } + } + + BUG_ON(i > max_entries); + BUG_ON(i != obj->base.size / PAGE_SIZE); + + /* XXX: This serves as a posting read to make sure that the PTE has + * actually been updated. There is some concern that even though + * registers and PTEs are within the same BAR that they are potentially + * of NUMA access patterns. Therefore, even with the way we assume + * hardware should work, we must keep this posting read for paranoia. + */ + if (i != 0) + WARN_ON(readl(>t_entries[i-1]) != pte_encode(dev, addr, level)); +} + void i915_gem_gtt_bind_object(struct drm_i915_gem_object *obj, enum i915_cache_level cache_level) { struct drm_device *dev = obj->base.dev; - unsigned int agp_type = cache_level_to_agp_type(dev, cache_level); + if (INTEL_INFO(dev)->gen < 6) { + unsigned int flags = (cache_level == I915_CACHE_NONE) ? + AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY; + intel_gtt_insert_sg_entries(obj->pages, + obj->gtt_space->start >> PAGE_SHIFT, + flags); + } else { + gen6_ggtt_bind_object(obj, cache_level); + } - intel_gtt_insert_sg_entries(obj->pages, - obj->gtt_space->start >> PAGE_SHIFT, - agp_type); obj->has_global_gtt_mapping = 1; } void i915_gem_gtt_unbind_object(struct drm_i915_gem_object *obj) { - intel_gtt_clear_range(obj->gtt_space->start >> PAGE_SHIFT, + i915_ggtt_clear_range(obj->base.dev, + obj->gtt_space->start >> PAGE_SHIFT, obj->base.size >> PAGE_SHIFT); obj->has_global_gtt_mapping = 0; @@ -406,5 +459,153 @@ void i915_gem_init_global_gtt(struct drm_device *dev, dev_priv->mm.mappable_gtt_total = min(end, mappable_end) - start; /* ... but ensure that we clear the entire range. */ - intel_gtt_clear_range(start / PAGE_SIZE, (end-start) / PAGE_SIZE); + i915_ggtt_clear_range(dev, start / PAGE_SIZE, (end-start) / PAGE_SIZE); +} + +static int setup_scratch_page(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + struct page *page; + dma_addr_t dma_addr; + + page = alloc_page(GFP_KERNEL | GFP_DMA32 | __GFP_ZERO); + if (page == NULL) + return -ENOMEM; + get_page(page); + set_pages_uc(page, 1); + +#ifdef CONFIG_INTEL_IOMMU + dma_addr = pci_map_page(dev->pdev, page, 0, PAGE_SIZE, + PCI_DMA_BIDIRECTIONAL); + if (pci_dma_mapping_error(dev->pdev, dma_addr)) + return -EINVAL; +#else + dma_addr = page_to_phys(page); +#endif + dev_priv->mm.gtt->scratch_page = page; + dev_priv->mm.gtt->scratch_page_dma = dma_addr; + + return 0; +} + +static void teardown_scratch_page(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + set_pages_wb(dev_priv->mm.gtt->scratch_page, 1); + pci_unmap_page(dev->pdev, dev_priv->mm.gtt->scratch_page_dma, + PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); + put_page(dev_priv->mm.gtt->scratch_page); + __free_page(dev_priv->mm.gtt->scratch_page); +} + +static inline unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl) +{ + snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT; + snb_gmch_ctl &= SNB_GMCH_GGMS_MASK; + return snb_gmch_ctl << 20; +} + +static inline unsigned int gen6_get_stolen_size(u16 snb_gmch_ctl) +{ + snb_gmch_ctl >>= SNB_GMCH_GMS_SHIFT; + snb_gmch_ctl &= SNB_GMCH_GMS_MASK; + return snb_gmch_ctl << 25; /* 32 MB units */ +} + +int i915_gem_gtt_init(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + phys_addr_t gtt_bus_addr; + u16 snb_gmch_ctl; + u32 tmp; + int ret; + + /* On modern platforms we need not worry ourself with the legacy + * hostbridge query stuff. Skip it entirely + */ + if (INTEL_INFO(dev)->gen < 6) { + ret = intel_gmch_probe(dev_priv->bridge_dev, dev->pdev, NULL); + if (!ret) { + DRM_ERROR("failed to set up gmch\n"); + return -EIO; + } + + dev_priv->mm.gtt = intel_gtt_get(); + if (!dev_priv->mm.gtt) { + DRM_ERROR("Failed to initialize GTT\n"); + intel_gmch_remove(); + return -ENODEV; + } + return 0; + } + + dev_priv->mm.gtt = kzalloc(sizeof(*dev_priv->mm.gtt), GFP_KERNEL); + if (!dev_priv->mm.gtt) + return -ENOMEM; + + if (!pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(40))) + pci_set_consistent_dma_mask(dev->pdev, DMA_BIT_MASK(40)); + + pci_read_config_dword(dev->pdev, PCI_BASE_ADDRESS_0, &tmp); + /* For GEN6+ the PTEs for the ggtt live at 2MB + BAR0 */ + gtt_bus_addr = (tmp & PCI_BASE_ADDRESS_MEM_MASK) + (2<<20); + + pci_read_config_dword(dev->pdev, PCI_BASE_ADDRESS_2, &tmp); + dev_priv->mm.gtt->gma_bus_addr = tmp & PCI_BASE_ADDRESS_MEM_MASK; + + /* i9xx_setup */ + pci_read_config_word(dev->pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); + dev_priv->mm.gtt->gtt_total_entries = + gen6_get_total_gtt_size(snb_gmch_ctl) / sizeof(gtt_pte_t); + dev_priv->mm.gtt->stolen_size = gen6_get_stolen_size(snb_gmch_ctl); + + dev_priv->mm.gtt->gtt_mappable_entries = pci_resource_len(dev->pdev, 2) >> PAGE_SHIFT; + /* 64/512MB is the current min/max we actually know of, but this is just a + * coarse sanity check. + */ + if ((dev_priv->mm.gtt->gtt_mappable_entries >> 8) < 64 || + dev_priv->mm.gtt->gtt_mappable_entries > dev_priv->mm.gtt->gtt_total_entries) { + DRM_ERROR("Unknown GMADR entries (%d)\n", + dev_priv->mm.gtt->gtt_mappable_entries); + ret = -ENXIO; + goto err_out; + } + + ret = setup_scratch_page(dev); + if (ret) { + DRM_ERROR("Scratch setup failed\n"); + goto err_out; + } + + dev_priv->mm.gtt->gtt = ioremap(gtt_bus_addr, + dev_priv->mm.gtt->gtt_total_entries * sizeof(gtt_pte_t)); + if (!dev_priv->mm.gtt->gtt) { + DRM_ERROR("Failed to map the gtt page table\n"); + teardown_scratch_page(dev); + ret = -ENOMEM; + goto err_out; + } + + /* GMADR is the PCI aperture used by SW to access tiled GFX surfaces in a linear fashion. */ + DRM_INFO("Memory Usable by graphics device = %dK\n", dev_priv->mm.gtt->gtt_total_entries >> 10); + DRM_DEBUG_DRIVER("GMADR size = %dM\n", dev_priv->mm.gtt->gtt_mappable_entries >> 8); + DRM_DEBUG_DRIVER("GTT stolen size = %dM\n", dev_priv->mm.gtt->stolen_size >> 20); + + return 0; + +err_out: + kfree(dev_priv->mm.gtt); + if (INTEL_INFO(dev)->gen < 6) + intel_gmch_remove(); + return ret; +} + +void i915_gem_gtt_fini(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + iounmap(dev_priv->mm.gtt->gtt); + teardown_scratch_page(dev); + if (INTEL_INFO(dev)->gen < 6) + intel_gmch_remove(); + kfree(dev_priv->mm.gtt); } diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 0866ac3d0a3..449403f60e4 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -41,6 +41,12 @@ */ #define INTEL_GMCH_CTRL 0x52 #define INTEL_GMCH_VGA_DISABLE (1 << 1) +#define SNB_GMCH_CTRL 0x50 +#define SNB_GMCH_GGMS_SHIFT 8 /* GTT Graphics Memory Size */ +#define SNB_GMCH_GGMS_MASK 0x3 +#define SNB_GMCH_GMS_SHIFT 3 /* Graphics Mode Select */ +#define SNB_GMCH_GMS_MASK 0x1f + /* PCI config space */ diff --git a/include/drm/intel-gtt.h b/include/drm/intel-gtt.h index 2e37e9f02e7..94e8f2c7f9e 100644 --- a/include/drm/intel-gtt.h +++ b/include/drm/intel-gtt.h @@ -3,7 +3,7 @@ #ifndef _DRM_INTEL_GTT_H #define _DRM_INTEL_GTT_H -const struct intel_gtt { +struct intel_gtt { /* Size of memory reserved for graphics by the BIOS */ unsigned int stolen_size; /* Total number of gtt entries. */ @@ -17,6 +17,7 @@ const struct intel_gtt { unsigned int do_idle_maps : 1; /* Share the scratch page dma with ppgtts. */ dma_addr_t scratch_page_dma; + struct page *scratch_page; /* for ppgtt PDE access */ u32 __iomem *gtt; /* needed for ioremap in drm/i915 */ -- cgit v1.2.3-70-g09d2 From 26b1ff35c8c4888fb544f290e0fc64143f7dbe45 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Sun, 4 Nov 2012 09:21:31 -0800 Subject: drm/i915: Move the remaining gtt code It's pretty much all consolidated now that we've killed AGP. We can move the one outlier, and defines too. (Kill some unused defines in the process) Signed-off-by: Ben Widawsky Reviewed-by: Chris Wilson Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem.c | 62 ------------------------------ drivers/gpu/drm/i915/i915_gem_gtt.c | 76 +++++++++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/i915_reg.h | 17 --------- 3 files changed, 76 insertions(+), 79 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index c161fdbd830..cdcf19de220 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3875,68 +3875,6 @@ void i915_gem_init_swizzling(struct drm_device *dev) I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB)); } -void i915_gem_init_ppgtt(struct drm_device *dev) -{ - drm_i915_private_t *dev_priv = dev->dev_private; - uint32_t pd_offset; - struct intel_ring_buffer *ring; - struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt; - uint32_t __iomem *pd_addr; - uint32_t pd_entry; - int i; - - if (!dev_priv->mm.aliasing_ppgtt) - return; - - - pd_addr = dev_priv->mm.gtt->gtt + ppgtt->pd_offset/sizeof(uint32_t); - for (i = 0; i < ppgtt->num_pd_entries; i++) { - dma_addr_t pt_addr; - - if (dev_priv->mm.gtt->needs_dmar) - pt_addr = ppgtt->pt_dma_addr[i]; - else - pt_addr = page_to_phys(ppgtt->pt_pages[i]); - - pd_entry = GEN6_PDE_ADDR_ENCODE(pt_addr); - pd_entry |= GEN6_PDE_VALID; - - writel(pd_entry, pd_addr + i); - } - readl(pd_addr); - - pd_offset = ppgtt->pd_offset; - pd_offset /= 64; /* in cachelines, */ - pd_offset <<= 16; - - if (INTEL_INFO(dev)->gen == 6) { - uint32_t ecochk, gab_ctl, ecobits; - - ecobits = I915_READ(GAC_ECO_BITS); - I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_PPGTT_CACHE64B); - - gab_ctl = I915_READ(GAB_CTL); - I915_WRITE(GAB_CTL, gab_ctl | GAB_CTL_CONT_AFTER_PAGEFAULT); - - ecochk = I915_READ(GAM_ECOCHK); - I915_WRITE(GAM_ECOCHK, ecochk | ECOCHK_SNB_BIT | - ECOCHK_PPGTT_CACHE64B); - I915_WRITE(GFX_MODE, _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); - } else if (INTEL_INFO(dev)->gen >= 7) { - I915_WRITE(GAM_ECOCHK, ECOCHK_PPGTT_CACHE64B); - /* GFX_MODE is per-ring on gen7+ */ - } - - for_each_ring(ring, dev_priv, i) { - if (INTEL_INFO(dev)->gen >= 7) - I915_WRITE(RING_MODE_GEN7(ring), - _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); - - I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G); - I915_WRITE(RING_PP_DIR_BASE(ring), pd_offset); - } -} - static bool intel_enable_blt(struct drm_device *dev) { diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index a3e509a7165..afa56e978e8 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -30,6 +30,20 @@ typedef uint32_t gtt_pte_t; +/* PPGTT stuff */ +#define GEN6_GTT_ADDR_ENCODE(addr) ((addr) | (((addr) >> 28) & 0xff0)) + +#define GEN6_PDE_VALID (1 << 0) +/* gen6+ has bit 11-4 for physical addr bit 39-32 */ +#define GEN6_PDE_ADDR_ENCODE(addr) GEN6_GTT_ADDR_ENCODE(addr) + +#define GEN6_PTE_VALID (1 << 0) +#define GEN6_PTE_UNCACHED (1 << 1) +#define HSW_PTE_UNCACHED (0) +#define GEN6_PTE_CACHE_LLC (2 << 1) +#define GEN6_PTE_CACHE_LLC_MLC (3 << 1) +#define GEN6_PTE_ADDR_ENCODE(addr) GEN6_GTT_ADDR_ENCODE(addr) + static inline gtt_pte_t pte_encode(struct drm_device *dev, dma_addr_t addr, enum i915_cache_level level) @@ -262,6 +276,68 @@ void i915_ppgtt_unbind_object(struct i915_hw_ppgtt *ppgtt, obj->base.size >> PAGE_SHIFT); } +void i915_gem_init_ppgtt(struct drm_device *dev) +{ + drm_i915_private_t *dev_priv = dev->dev_private; + uint32_t pd_offset; + struct intel_ring_buffer *ring; + struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt; + uint32_t __iomem *pd_addr; + uint32_t pd_entry; + int i; + + if (!dev_priv->mm.aliasing_ppgtt) + return; + + + pd_addr = dev_priv->mm.gtt->gtt + ppgtt->pd_offset/sizeof(uint32_t); + for (i = 0; i < ppgtt->num_pd_entries; i++) { + dma_addr_t pt_addr; + + if (dev_priv->mm.gtt->needs_dmar) + pt_addr = ppgtt->pt_dma_addr[i]; + else + pt_addr = page_to_phys(ppgtt->pt_pages[i]); + + pd_entry = GEN6_PDE_ADDR_ENCODE(pt_addr); + pd_entry |= GEN6_PDE_VALID; + + writel(pd_entry, pd_addr + i); + } + readl(pd_addr); + + pd_offset = ppgtt->pd_offset; + pd_offset /= 64; /* in cachelines, */ + pd_offset <<= 16; + + if (INTEL_INFO(dev)->gen == 6) { + uint32_t ecochk, gab_ctl, ecobits; + + ecobits = I915_READ(GAC_ECO_BITS); + I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_PPGTT_CACHE64B); + + gab_ctl = I915_READ(GAB_CTL); + I915_WRITE(GAB_CTL, gab_ctl | GAB_CTL_CONT_AFTER_PAGEFAULT); + + ecochk = I915_READ(GAM_ECOCHK); + I915_WRITE(GAM_ECOCHK, ecochk | ECOCHK_SNB_BIT | + ECOCHK_PPGTT_CACHE64B); + I915_WRITE(GFX_MODE, _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); + } else if (INTEL_INFO(dev)->gen >= 7) { + I915_WRITE(GAM_ECOCHK, ECOCHK_PPGTT_CACHE64B); + /* GFX_MODE is per-ring on gen7+ */ + } + + for_each_ring(ring, dev_priv, i) { + if (INTEL_INFO(dev)->gen >= 7) + I915_WRITE(RING_MODE_GEN7(ring), + _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); + + I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G); + I915_WRITE(RING_PP_DIR_BASE(ring), pd_offset); + } +} + static bool do_idling(struct drm_i915_private *dev_priv) { bool ret = dev_priv->mm.interruptible; diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index d55c49642c6..9118bd11258 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -114,23 +114,6 @@ #define GEN6_GRDOM_MEDIA (1 << 2) #define GEN6_GRDOM_BLT (1 << 3) -/* PPGTT stuff */ -#define GEN6_GTT_ADDR_ENCODE(addr) ((addr) | (((addr) >> 28) & 0xff0)) - -#define GEN6_PDE_VALID (1 << 0) -#define GEN6_PDE_LARGE_PAGE (2 << 0) /* use 32kb pages */ -/* gen6+ has bit 11-4 for physical addr bit 39-32 */ -#define GEN6_PDE_ADDR_ENCODE(addr) GEN6_GTT_ADDR_ENCODE(addr) - -#define GEN6_PTE_VALID (1 << 0) -#define GEN6_PTE_UNCACHED (1 << 1) -#define HSW_PTE_UNCACHED (0) -#define GEN6_PTE_CACHE_LLC (2 << 1) -#define GEN6_PTE_CACHE_LLC_MLC (3 << 1) -#define GEN6_PTE_CACHE_BITS (3 << 1) -#define GEN6_PTE_GFDT (1 << 3) -#define GEN6_PTE_ADDR_ENCODE(addr) GEN6_GTT_ADDR_ENCODE(addr) - #define RING_PP_DIR_BASE(ring) ((ring)->mmio_base+0x228) #define RING_PP_DIR_BASE_READ(ring) ((ring)->mmio_base+0x518) #define RING_PP_DIR_DCLV(ring) ((ring)->mmio_base+0x220) -- cgit v1.2.3-70-g09d2 From fbdda6fb5ee5da401af42226878880069a6b8615 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 20 Nov 2012 10:45:16 +0000 Subject: drm/i915: Guard pages being reaped by OOM whilst binding-to-GTT In the circumstances that the shrinker is allowed to steal the mutex in order to reap pages, we need to be careful to prevent it operating on the current object and shooting ourselves in the foot. Signed-off-by: Chris Wilson Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index a2f1b8652d6..643b7f6368a 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2925,6 +2925,8 @@ i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj, if (ret) return ret; + i915_gem_object_pin_pages(obj); + search_free: if (map_and_fenceable) free_space = @@ -2955,14 +2957,17 @@ i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj, obj->cache_level, map_and_fenceable, nonblocking); - if (ret) + if (ret) { + i915_gem_object_unpin_pages(obj); return ret; + } goto search_free; } if (WARN_ON(!i915_gem_valid_gtt_space(dev, obj->gtt_space, obj->cache_level))) { + i915_gem_object_unpin_pages(obj); drm_mm_put_block(obj->gtt_space); obj->gtt_space = NULL; return -EINVAL; @@ -2971,6 +2976,7 @@ i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj, ret = i915_gem_gtt_prepare_object(obj); if (ret) { + i915_gem_object_unpin_pages(obj); drm_mm_put_block(obj->gtt_space); obj->gtt_space = NULL; return ret; @@ -2993,6 +2999,7 @@ i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj, obj->map_and_fenceable = mappable && fenceable; + i915_gem_object_unpin_pages(obj); trace_i915_gem_object_bind(obj, map_and_fenceable); i915_gem_verify_gtt(dev); return 0; -- cgit v1.2.3-70-g09d2 From c9839303d186d6270f570ff3c5f56c2327958086 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 20 Nov 2012 10:45:17 +0000 Subject: drm/i915: Pin the object whilst faulting it in In order to prevent reaping of the object whilst setting it up to handle the pagefault, we need to mark it as pinned. This has the nice side-effect of eliminating some special cases from the pagefault handler as well! Signed-off-by: Chris Wilson Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem.c | 29 +++++++++-------------------- 1 file changed, 9 insertions(+), 20 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 643b7f6368a..e9c80065805 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1345,30 +1345,17 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) trace_i915_gem_object_fault(obj, page_offset, true, write); /* Now bind it into the GTT if needed */ - if (!obj->map_and_fenceable) { - ret = i915_gem_object_unbind(obj); - if (ret) - goto unlock; - } - if (!obj->gtt_space) { - ret = i915_gem_object_bind_to_gtt(obj, 0, true, false); - if (ret) - goto unlock; - - ret = i915_gem_object_set_to_gtt_domain(obj, write); - if (ret) - goto unlock; - } + ret = i915_gem_object_pin(obj, 0, true, false); + if (ret) + goto unlock; - if (!obj->has_global_gtt_mapping) - i915_gem_gtt_bind_object(obj, obj->cache_level); + ret = i915_gem_object_set_to_gtt_domain(obj, write); + if (ret) + goto unpin; ret = i915_gem_object_get_fence(obj); if (ret) - goto unlock; - - if (i915_gem_object_is_inactive(obj)) - list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list); + goto unpin; obj->fault_mappable = true; @@ -1377,6 +1364,8 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) /* Finally, remap it using the new GTT offset */ ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address, pfn); +unpin: + i915_gem_object_unpin(obj); unlock: mutex_unlock(&dev->struct_mutex); out: -- cgit v1.2.3-70-g09d2 From 8742267af4043606869f5b8dadbef635405543c3 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 21 Nov 2012 13:04:03 +0000 Subject: drm/i915: Defer assignment of obj->gtt_space until after all possible mallocs As we may invoke the shrinker whilst trying to allocate memory to hold the gtt_space for this object, we need to be careful not to mark the drm_mm_node as activated (by assigning it to this object) before we have finished our sequence of allocations. Note: We also need to move the binding of the object into the actual pagetables down a bit. The best way seems to be to move it out into the callsites. Reported-by: Imre Deak Signed-off-by: Chris Wilson [danvet: Added small note to commit message to summarize review discussion.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem.c | 39 +++++++++++++++++++-------------------- 1 file changed, 19 insertions(+), 20 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index e9c80065805..71b5129947b 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2918,11 +2918,10 @@ i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj, search_free: if (map_and_fenceable) - free_space = - drm_mm_search_free_in_range_color(&dev_priv->mm.gtt_space, - size, alignment, obj->cache_level, - 0, dev_priv->mm.gtt_mappable_end, - false); + free_space = drm_mm_search_free_in_range_color(&dev_priv->mm.gtt_space, + size, alignment, obj->cache_level, + 0, dev_priv->mm.gtt_mappable_end, + false); else free_space = drm_mm_search_free_color(&dev_priv->mm.gtt_space, size, alignment, obj->cache_level, @@ -2930,18 +2929,18 @@ i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj, if (free_space != NULL) { if (map_and_fenceable) - obj->gtt_space = + free_space = drm_mm_get_block_range_generic(free_space, size, alignment, obj->cache_level, 0, dev_priv->mm.gtt_mappable_end, false); else - obj->gtt_space = + free_space = drm_mm_get_block_generic(free_space, size, alignment, obj->cache_level, false); } - if (obj->gtt_space == NULL) { + if (free_space == NULL) { ret = i915_gem_evict_something(dev, size, alignment, obj->cache_level, map_and_fenceable, @@ -2954,34 +2953,29 @@ i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj, goto search_free; } if (WARN_ON(!i915_gem_valid_gtt_space(dev, - obj->gtt_space, + free_space, obj->cache_level))) { i915_gem_object_unpin_pages(obj); - drm_mm_put_block(obj->gtt_space); - obj->gtt_space = NULL; + drm_mm_put_block(free_space); return -EINVAL; } - ret = i915_gem_gtt_prepare_object(obj); if (ret) { i915_gem_object_unpin_pages(obj); - drm_mm_put_block(obj->gtt_space); - obj->gtt_space = NULL; + drm_mm_put_block(free_space); return ret; } - if (!dev_priv->mm.aliasing_ppgtt) - i915_gem_gtt_bind_object(obj, obj->cache_level); - list_move_tail(&obj->gtt_list, &dev_priv->mm.bound_list); list_add_tail(&obj->mm_list, &dev_priv->mm.inactive_list); - obj->gtt_offset = obj->gtt_space->start; + obj->gtt_space = free_space; + obj->gtt_offset = free_space->start; fenceable = - obj->gtt_space->size == fence_size && - (obj->gtt_space->start & (fence_alignment - 1)) == 0; + free_space->size == fence_size && + (free_space->start & (fence_alignment - 1)) == 0; mappable = obj->gtt_offset + obj->base.size <= dev_priv->mm.gtt_mappable_end; @@ -3452,11 +3446,16 @@ i915_gem_object_pin(struct drm_i915_gem_object *obj, } if (obj->gtt_space == NULL) { + struct drm_i915_private *dev_priv = obj->base.dev->dev_private; + ret = i915_gem_object_bind_to_gtt(obj, alignment, map_and_fenceable, nonblocking); if (ret) return ret; + + if (!dev_priv->mm.aliasing_ppgtt) + i915_gem_gtt_bind_object(obj, obj->cache_level); } if (!obj->has_global_gtt_mapping && map_and_fenceable) -- cgit v1.2.3-70-g09d2 From 5774506f157a91400c587b85d1ce4de56f0d32f6 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 21 Nov 2012 13:04:04 +0000 Subject: drm/i915: Borrow our struct_mutex for the direct reclaim If we have hit oom whilst holding our struct_mutex, then currently we cannot reap our own GPU buffers which likely pin most of memory, making an outright OOM more likely. So if we are running in direct reclaim and already hold the mutex, attempt to free buffers knowing that the original function can not continue until we return. v2: Add a note explaining that the mutex may be stolen due to pre-emption, and that is bad. Signed-off-by: Chris Wilson Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem.c | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 71b5129947b..b0016bb6563 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4342,6 +4342,19 @@ void i915_gem_release(struct drm_device *dev, struct drm_file *file) spin_unlock(&file_priv->mm.lock); } +static bool mutex_is_locked_by(struct mutex *mutex, struct task_struct *task) +{ + if (!mutex_is_locked(mutex)) + return false; + +#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_MUTEXES) + return mutex->owner == task; +#else + /* Since UP may be pre-empted, we cannot assume that we own the lock */ + return false; +#endif +} + static int i915_gem_inactive_shrink(struct shrinker *shrinker, struct shrink_control *sc) { @@ -4352,10 +4365,15 @@ i915_gem_inactive_shrink(struct shrinker *shrinker, struct shrink_control *sc) struct drm_device *dev = dev_priv->dev; struct drm_i915_gem_object *obj; int nr_to_scan = sc->nr_to_scan; + bool unlock = true; int cnt; - if (!mutex_trylock(&dev->struct_mutex)) - return 0; + if (!mutex_trylock(&dev->struct_mutex)) { + if (!mutex_is_locked_by(&dev->struct_mutex, current)) + return 0; + + unlock = false; + } if (nr_to_scan) { nr_to_scan -= i915_gem_purge(dev_priv, nr_to_scan); @@ -4371,6 +4389,7 @@ i915_gem_inactive_shrink(struct shrinker *shrinker, struct shrink_control *sc) if (obj->pin_count == 0 && obj->pages_pin_count == 0) cnt += obj->base.size >> PAGE_SHIFT; - mutex_unlock(&dev->struct_mutex); + if (unlock) + mutex_unlock(&dev->struct_mutex); return cnt; } -- cgit v1.2.3-70-g09d2 From b5d177946a30b097fbd1e5afa7c11acdeeb6bad8 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 22 Nov 2012 13:07:20 +0000 Subject: drm/i915: Wait upon the last request seqno, rather than a future seqno In commit 69c2fc891343cb5217c866d10709343cff190bdc Author: Chris Wilson Date: Fri Jul 20 12:41:03 2012 +0100 drm/i915: Remove the per-ring write list the explicit flush was removed from i915_ring_idle(). However, we continued to wait upon the next seqno which now did not correspond to any request (except for the unusual condition of a failure to queue a request after execbuffer) and so would wait indefinitely. This has an important side-effect that i915_gpu_idle() does not cause the seqno to be incremented. This is vital if we are to be able to idle the GPU to handle seqno wraparound, as in subsequent patches. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem.c | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index b0016bb6563..9be450e7c54 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2462,10 +2462,29 @@ i915_gem_object_unbind(struct drm_i915_gem_object *obj) static int i915_ring_idle(struct intel_ring_buffer *ring) { - if (list_empty(&ring->active_list)) + u32 seqno; + int ret; + + /* We need to add any requests required to flush the objects */ + if (!list_empty(&ring->active_list)) { + seqno = list_entry(ring->active_list.prev, + struct drm_i915_gem_object, + ring_list)->last_read_seqno; + + ret = i915_gem_check_olr(ring, seqno); + if (ret) + return ret; + } + + /* Wait upon the last request to be completed */ + if (list_empty(&ring->request_list)) return 0; - return i915_wait_seqno(ring, i915_gem_next_request_seqno(ring)); + seqno = list_entry(ring->request_list.prev, + struct drm_i915_gem_request, + list)->seqno; + + return i915_wait_seqno(ring, seqno); } int i915_gpu_idle(struct drm_device *dev) -- cgit v1.2.3-70-g09d2 From 9d7730914f4cd496e356acfab95b41075aa8eae8 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 27 Nov 2012 16:22:52 +0000 Subject: drm/i915: Preallocate next seqno before touching the ring Based on the work by Mika Kuoppala, we realised that we need to handle seqno wraparound prior to committing our changes to the ring. The most obvious point then is to grab the seqno inside intel_ring_begin(), and then to reuse that seqno for all ring operations until the next request. As intel_ring_begin() can fail, the callers must already be prepared to handle such failure and so we can safely add further checks. This patch looks like it should be split up into the interface changes and the tweaks to move seqno wrapping from the execbuffer into the core seqno increment. However, I found no easy way to break it into incremental steps without introducing further broken behaviour. v2: Mika found a silly mistake and a subtle error in the existing code; inside i915_gem_retire_requests() we were resetting the sync_seqno of the target ring based on the seqno from this ring - which are only related by the order of their allocation, not retirement. Hence we were applying the optimisation that the rings were synchronised too early, fortunately the only real casualty there is the handling of seqno wrapping. v3: Do not forget to reset the sync_seqno upon module reinitialisation, ala resume. Signed-off-by: Chris Wilson Cc: Mika Kuoppala Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=863861 Reviewed-by: Mika Kuoppala [v2] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 5 +- drivers/gpu/drm/i915/i915_gem.c | 74 +++++++++++++++++++----------- drivers/gpu/drm/i915/i915_gem_context.c | 3 +- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 30 +++--------- drivers/gpu/drm/i915/intel_ringbuffer.c | 49 ++++++++++---------- drivers/gpu/drm/i915/intel_ringbuffer.h | 10 ++-- 6 files changed, 89 insertions(+), 82 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 47c62f5125a..45491c1ac7d 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1417,8 +1417,7 @@ int __must_check i915_mutex_lock_interruptible(struct drm_device *dev); int i915_gem_object_sync(struct drm_i915_gem_object *obj, struct intel_ring_buffer *to); void i915_gem_object_move_to_active(struct drm_i915_gem_object *obj, - struct intel_ring_buffer *ring, - u32 seqno); + struct intel_ring_buffer *ring); int i915_gem_dumb_create(struct drm_file *file_priv, struct drm_device *dev, @@ -1436,7 +1435,7 @@ i915_seqno_passed(uint32_t seq1, uint32_t seq2) return (int32_t)(seq1 - seq2) >= 0; } -u32 i915_gem_next_request_seqno(struct intel_ring_buffer *ring); +extern int i915_gem_get_seqno(struct drm_device *dev, u32 *seqno); int __must_check i915_gem_object_get_fence(struct drm_i915_gem_object *obj); int __must_check i915_gem_object_put_fence(struct drm_i915_gem_object *obj); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 9be450e7c54..3b9b250ceac 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1857,11 +1857,11 @@ i915_gem_object_get_pages(struct drm_i915_gem_object *obj) void i915_gem_object_move_to_active(struct drm_i915_gem_object *obj, - struct intel_ring_buffer *ring, - u32 seqno) + struct intel_ring_buffer *ring) { struct drm_device *dev = obj->base.dev; struct drm_i915_private *dev_priv = dev->dev_private; + u32 seqno = intel_ring_get_seqno(ring); BUG_ON(ring == NULL); obj->ring = ring; @@ -1922,26 +1922,54 @@ i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj) WARN_ON(i915_verify_lists(dev)); } -static u32 -i915_gem_get_seqno(struct drm_device *dev) +static int +i915_gem_handle_seqno_wrap(struct drm_device *dev) { - drm_i915_private_t *dev_priv = dev->dev_private; - u32 seqno = dev_priv->next_seqno; + struct drm_i915_private *dev_priv = dev->dev_private; + struct intel_ring_buffer *ring; + int ret, i, j; - /* reserve 0 for non-seqno */ - if (++dev_priv->next_seqno == 0) - dev_priv->next_seqno = 1; + /* The hardware uses various monotonic 32-bit counters, if we + * detect that they will wraparound we need to idle the GPU + * and reset those counters. + */ + ret = 0; + for_each_ring(ring, dev_priv, i) { + for (j = 0; j < ARRAY_SIZE(ring->sync_seqno); j++) + ret |= ring->sync_seqno[j] != 0; + } + if (ret == 0) + return ret; - return seqno; + ret = i915_gpu_idle(dev); + if (ret) + return ret; + + i915_gem_retire_requests(dev); + for_each_ring(ring, dev_priv, i) { + for (j = 0; j < ARRAY_SIZE(ring->sync_seqno); j++) + ring->sync_seqno[j] = 0; + } + + return 0; } -u32 -i915_gem_next_request_seqno(struct intel_ring_buffer *ring) +int +i915_gem_get_seqno(struct drm_device *dev, u32 *seqno) { - if (ring->outstanding_lazy_request == 0) - ring->outstanding_lazy_request = i915_gem_get_seqno(ring->dev); + struct drm_i915_private *dev_priv = dev->dev_private; + + /* reserve 0 for non-seqno */ + if (dev_priv->next_seqno == 0) { + int ret = i915_gem_handle_seqno_wrap(dev); + if (ret) + return ret; - return ring->outstanding_lazy_request; + dev_priv->next_seqno = 1; + } + + *seqno = dev_priv->next_seqno++; + return 0; } int @@ -1952,7 +1980,6 @@ i915_add_request(struct intel_ring_buffer *ring, drm_i915_private_t *dev_priv = ring->dev->dev_private; struct drm_i915_gem_request *request; u32 request_ring_position; - u32 seqno; int was_empty; int ret; @@ -1971,7 +1998,6 @@ i915_add_request(struct intel_ring_buffer *ring, if (request == NULL) return -ENOMEM; - seqno = i915_gem_next_request_seqno(ring); /* Record the position of the start of the request so that * should we detect the updated seqno part-way through the @@ -1980,15 +2006,13 @@ i915_add_request(struct intel_ring_buffer *ring, */ request_ring_position = intel_ring_get_tail(ring); - ret = ring->add_request(ring, &seqno); + ret = ring->add_request(ring); if (ret) { kfree(request); return ret; } - trace_i915_gem_request_add(ring, seqno); - - request->seqno = seqno; + request->seqno = intel_ring_get_seqno(ring); request->ring = ring; request->tail = request_ring_position; request->emitted_jiffies = jiffies; @@ -2006,6 +2030,7 @@ i915_add_request(struct intel_ring_buffer *ring, spin_unlock(&file_priv->mm.lock); } + trace_i915_gem_request_add(ring, request->seqno); ring->outstanding_lazy_request = 0; if (!dev_priv->mm.suspended) { @@ -2022,7 +2047,7 @@ i915_add_request(struct intel_ring_buffer *ring, } if (out_seqno) - *out_seqno = seqno; + *out_seqno = request->seqno; return 0; } @@ -2120,7 +2145,6 @@ void i915_gem_retire_requests_ring(struct intel_ring_buffer *ring) { uint32_t seqno; - int i; if (list_empty(&ring->request_list)) return; @@ -2129,10 +2153,6 @@ i915_gem_retire_requests_ring(struct intel_ring_buffer *ring) seqno = ring->get_seqno(ring, true); - for (i = 0; i < ARRAY_SIZE(ring->sync_seqno); i++) - if (seqno >= ring->sync_seqno[i]) - ring->sync_seqno[i] = 0; - while (!list_empty(&ring->request_list)) { struct drm_i915_gem_request *request; diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 0e510df80d7..a3f06bcad55 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -410,9 +410,8 @@ static int do_switch(struct i915_hw_context *to) * MI_SET_CONTEXT instead of when the next seqno has completed. */ if (from_obj != NULL) { - u32 seqno = i915_gem_next_request_seqno(ring); from_obj->base.read_domains = I915_GEM_DOMAIN_INSTRUCTION; - i915_gem_object_move_to_active(from_obj, ring, seqno); + i915_gem_object_move_to_active(from_obj, ring); /* As long as MI_SET_CONTEXT is serializing, ie. it flushes the * whole damn pipeline, we don't need to explicitly mark the * object dirty. The only exception is that the context must be diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 48e4317e72d..ee8f97f0539 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -713,8 +713,7 @@ validate_exec_list(struct drm_i915_gem_exec_object2 *exec, static void i915_gem_execbuffer_move_to_active(struct list_head *objects, - struct intel_ring_buffer *ring, - u32 seqno) + struct intel_ring_buffer *ring) { struct drm_i915_gem_object *obj; @@ -726,10 +725,10 @@ i915_gem_execbuffer_move_to_active(struct list_head *objects, obj->base.write_domain = obj->base.pending_write_domain; obj->fenced_gpu_access = obj->pending_fenced_gpu_access; - i915_gem_object_move_to_active(obj, ring, seqno); + i915_gem_object_move_to_active(obj, ring); if (obj->base.write_domain) { obj->dirty = 1; - obj->last_write_seqno = seqno; + obj->last_write_seqno = intel_ring_get_seqno(ring); if (obj->pin_count) /* check for potential scanout */ intel_mark_fb_busy(obj); } @@ -789,7 +788,6 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, struct intel_ring_buffer *ring; u32 ctx_id = i915_execbuffer2_get_context_id(*args); u32 exec_start, exec_len; - u32 seqno; u32 mask; u32 flags; int ret, mode, i; @@ -994,22 +992,6 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, if (ret) goto err; - seqno = i915_gem_next_request_seqno(ring); - for (i = 0; i < ARRAY_SIZE(ring->sync_seqno); i++) { - if (seqno < ring->sync_seqno[i]) { - /* The GPU can not handle its semaphore value wrapping, - * so every billion or so execbuffers, we need to stall - * the GPU in order to reset the counters. - */ - ret = i915_gpu_idle(dev); - if (ret) - goto err; - i915_gem_retire_requests(dev); - - BUG_ON(ring->sync_seqno[i]); - } - } - ret = i915_switch_context(ring, file, ctx_id); if (ret) goto err; @@ -1035,8 +1017,6 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, goto err; } - trace_i915_gem_ring_dispatch(ring, seqno, flags); - exec_start = batch_obj->gtt_offset + args->batch_start_offset; exec_len = args->batch_len; if (cliprects) { @@ -1060,7 +1040,9 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, goto err; } - i915_gem_execbuffer_move_to_active(&objects, ring, seqno); + trace_i915_gem_ring_dispatch(ring, intel_ring_get_seqno(ring), flags); + + i915_gem_execbuffer_move_to_active(&objects, ring); i915_gem_execbuffer_retire_commands(dev, file, ring); err: diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 987eb5fdaf3..e4682cdc00b 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -555,12 +555,11 @@ static void render_ring_cleanup(struct intel_ring_buffer *ring) static void update_mboxes(struct intel_ring_buffer *ring, - u32 seqno, - u32 mmio_offset) + u32 mmio_offset) { intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); intel_ring_emit(ring, mmio_offset); - intel_ring_emit(ring, seqno); + intel_ring_emit(ring, ring->outstanding_lazy_request); } /** @@ -573,8 +572,7 @@ update_mboxes(struct intel_ring_buffer *ring, * This acts like a signal in the canonical semaphore. */ static int -gen6_add_request(struct intel_ring_buffer *ring, - u32 *seqno) +gen6_add_request(struct intel_ring_buffer *ring) { u32 mbox1_reg; u32 mbox2_reg; @@ -587,13 +585,11 @@ gen6_add_request(struct intel_ring_buffer *ring, mbox1_reg = ring->signal_mbox[0]; mbox2_reg = ring->signal_mbox[1]; - *seqno = i915_gem_next_request_seqno(ring); - - update_mboxes(ring, *seqno, mbox1_reg); - update_mboxes(ring, *seqno, mbox2_reg); + update_mboxes(ring, mbox1_reg); + update_mboxes(ring, mbox2_reg); intel_ring_emit(ring, MI_STORE_DWORD_INDEX); intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT); - intel_ring_emit(ring, *seqno); + intel_ring_emit(ring, ring->outstanding_lazy_request); intel_ring_emit(ring, MI_USER_INTERRUPT); intel_ring_advance(ring); @@ -650,10 +646,8 @@ do { \ } while (0) static int -pc_render_add_request(struct intel_ring_buffer *ring, - u32 *result) +pc_render_add_request(struct intel_ring_buffer *ring) { - u32 seqno = i915_gem_next_request_seqno(ring); struct pipe_control *pc = ring->private; u32 scratch_addr = pc->gtt_offset + 128; int ret; @@ -674,7 +668,7 @@ pc_render_add_request(struct intel_ring_buffer *ring, PIPE_CONTROL_WRITE_FLUSH | PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE); intel_ring_emit(ring, pc->gtt_offset | PIPE_CONTROL_GLOBAL_GTT); - intel_ring_emit(ring, seqno); + intel_ring_emit(ring, ring->outstanding_lazy_request); intel_ring_emit(ring, 0); PIPE_CONTROL_FLUSH(ring, scratch_addr); scratch_addr += 128; /* write to separate cachelines */ @@ -693,11 +687,10 @@ pc_render_add_request(struct intel_ring_buffer *ring, PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | PIPE_CONTROL_NOTIFY); intel_ring_emit(ring, pc->gtt_offset | PIPE_CONTROL_GLOBAL_GTT); - intel_ring_emit(ring, seqno); + intel_ring_emit(ring, ring->outstanding_lazy_request); intel_ring_emit(ring, 0); intel_ring_advance(ring); - *result = seqno; return 0; } @@ -885,25 +878,20 @@ bsd_ring_flush(struct intel_ring_buffer *ring, } static int -i9xx_add_request(struct intel_ring_buffer *ring, - u32 *result) +i9xx_add_request(struct intel_ring_buffer *ring) { - u32 seqno; int ret; ret = intel_ring_begin(ring, 4); if (ret) return ret; - seqno = i915_gem_next_request_seqno(ring); - intel_ring_emit(ring, MI_STORE_DWORD_INDEX); intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT); - intel_ring_emit(ring, seqno); + intel_ring_emit(ring, ring->outstanding_lazy_request); intel_ring_emit(ring, MI_USER_INTERRUPT); intel_ring_advance(ring); - *result = seqno; return 0; } @@ -1110,6 +1098,7 @@ static int intel_init_ring_buffer(struct drm_device *dev, INIT_LIST_HEAD(&ring->active_list); INIT_LIST_HEAD(&ring->request_list); ring->size = 32 * PAGE_SIZE; + memset(ring->sync_seqno, 0, sizeof(ring->sync_seqno)); init_waitqueue_head(&ring->irq_queue); @@ -1338,6 +1327,15 @@ int intel_wait_ring_buffer(struct intel_ring_buffer *ring, int n) return -EBUSY; } +static int +intel_ring_alloc_seqno(struct intel_ring_buffer *ring) +{ + if (ring->outstanding_lazy_request) + return 0; + + return i915_gem_get_seqno(ring->dev, &ring->outstanding_lazy_request); +} + int intel_ring_begin(struct intel_ring_buffer *ring, int num_dwords) { @@ -1349,6 +1347,11 @@ int intel_ring_begin(struct intel_ring_buffer *ring, if (ret) return ret; + /* Preallocate the olr before touching the ring */ + ret = intel_ring_alloc_seqno(ring); + if (ret) + return ret; + if (unlikely(ring->tail + n > ring->effective_size)) { ret = intel_wrap_ring_buffer(ring); if (unlikely(ret)) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 5af65b89765..0e613026d00 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -70,8 +70,7 @@ struct intel_ring_buffer { int __must_check (*flush)(struct intel_ring_buffer *ring, u32 invalidate_domains, u32 flush_domains); - int (*add_request)(struct intel_ring_buffer *ring, - u32 *seqno); + int (*add_request)(struct intel_ring_buffer *ring); /* Some chipsets are not quite as coherent as advertised and need * an expensive kick to force a true read of the up-to-date seqno. * However, the up-to-date seqno is not always required and the last @@ -205,7 +204,6 @@ static inline void intel_ring_emit(struct intel_ring_buffer *ring, void intel_ring_advance(struct intel_ring_buffer *ring); -u32 intel_ring_get_seqno(struct intel_ring_buffer *ring); int intel_ring_flush_all_caches(struct intel_ring_buffer *ring); int intel_ring_invalidate_all_caches(struct intel_ring_buffer *ring); @@ -221,6 +219,12 @@ static inline u32 intel_ring_get_tail(struct intel_ring_buffer *ring) return ring->tail; } +static inline u32 intel_ring_get_seqno(struct intel_ring_buffer *ring) +{ + BUG_ON(ring->outstanding_lazy_request == 0); + return ring->outstanding_lazy_request; +} + static inline void i915_trace_irq_get(struct intel_ring_buffer *ring, u32 seqno) { if (ring->trace_irq_seqno == 0 && ring->irq_get(ring)) -- cgit v1.2.3-70-g09d2 From b662a0663230853fccdfceeda5db031f5d4b657c Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 27 Nov 2012 16:22:53 +0000 Subject: drm/i915: Simplify flushing activity on the ring As we now always preallocate the seqno before writing to the ring, we can trivially test if we have any pending activity on the ring by inspecting the olr. This makes it then possible to flush operations that are not normally associated with a request, like power-management. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 3b9b250ceac..e594435eec9 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2485,13 +2485,9 @@ static int i915_ring_idle(struct intel_ring_buffer *ring) u32 seqno; int ret; - /* We need to add any requests required to flush the objects */ - if (!list_empty(&ring->active_list)) { - seqno = list_entry(ring->active_list.prev, - struct drm_i915_gem_object, - ring_list)->last_read_seqno; - - ret = i915_gem_check_olr(ring, seqno); + /* We need to add any requests required to flush the objects and ring */ + if (ring->outstanding_lazy_request) { + ret = i915_add_request(ring, NULL, NULL); if (ret) return ret; } -- cgit v1.2.3-70-g09d2 From 3e9605018ab3e333d51cc90fccfde2031886763b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 27 Nov 2012 16:22:54 +0000 Subject: drm/i915: Rearrange code to only have a single method for waiting upon the ring Replace the wait for the ring to be clear with the more common wait for the ring to be idle. The principle advantage is one less exported intel_ring_wait function, and the removal of a hardcoded value. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_dma.c | 4 +- drivers/gpu/drm/i915/i915_gem.c | 25 +---------- drivers/gpu/drm/i915/intel_pm.c | 8 +++- drivers/gpu/drm/i915/intel_ringbuffer.c | 73 ++++++++++++++++++++++----------- drivers/gpu/drm/i915/intel_ringbuffer.h | 9 +--- 5 files changed, 58 insertions(+), 61 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 4ea331b931f..80ed75117b6 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -592,10 +592,8 @@ static int i915_dispatch_flip(struct drm_device * dev) static int i915_quiescent(struct drm_device *dev) { - struct intel_ring_buffer *ring = LP_RING(dev->dev_private); - i915_kernel_lost_context(dev); - return intel_wait_ring_idle(ring); + return intel_ring_idle(LP_RING(dev->dev_private)); } static int i915_flush_ioctl(struct drm_device *dev, void *data, diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index e594435eec9..85a09482c2a 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2480,29 +2480,6 @@ i915_gem_object_unbind(struct drm_i915_gem_object *obj) return 0; } -static int i915_ring_idle(struct intel_ring_buffer *ring) -{ - u32 seqno; - int ret; - - /* We need to add any requests required to flush the objects and ring */ - if (ring->outstanding_lazy_request) { - ret = i915_add_request(ring, NULL, NULL); - if (ret) - return ret; - } - - /* Wait upon the last request to be completed */ - if (list_empty(&ring->request_list)) - return 0; - - seqno = list_entry(ring->request_list.prev, - struct drm_i915_gem_request, - list)->seqno; - - return i915_wait_seqno(ring, seqno); -} - int i915_gpu_idle(struct drm_device *dev) { drm_i915_private_t *dev_priv = dev->dev_private; @@ -2515,7 +2492,7 @@ int i915_gpu_idle(struct drm_device *dev) if (ret) return ret; - ret = i915_ring_idle(ring); + ret = intel_ring_idle(ring); if (ret) return ret; } diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 58c2f210154..f595b8d56cc 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -2653,6 +2653,7 @@ static void ironlake_enable_rc6(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; struct intel_ring_buffer *ring = &dev_priv->ring[RCS]; + bool was_interruptible; int ret; /* rc6 disabled by default due to repeated reports of hanging during @@ -2667,6 +2668,9 @@ static void ironlake_enable_rc6(struct drm_device *dev) if (ret) return; + was_interruptible = dev_priv->mm.interruptible; + dev_priv->mm.interruptible = false; + /* * GPU can automatically power down the render unit if given a page * to save state. @@ -2674,6 +2678,7 @@ static void ironlake_enable_rc6(struct drm_device *dev) ret = intel_ring_begin(ring, 6); if (ret) { ironlake_teardown_rc6(dev); + dev_priv->mm.interruptible = was_interruptible; return; } @@ -2694,7 +2699,8 @@ static void ironlake_enable_rc6(struct drm_device *dev) * does an implicit flush, combined with MI_FLUSH above, it should be * safe to assume that renderctx is valid */ - ret = intel_wait_ring_idle(ring); + ret = intel_ring_idle(ring); + dev_priv->mm.interruptible = was_interruptible; if (ret) { DRM_ERROR("failed to enable ironlake power power savings\n"); ironlake_teardown_rc6(dev); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index e4682cdc00b..bc7cf7c6310 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1175,7 +1175,7 @@ void intel_cleanup_ring_buffer(struct intel_ring_buffer *ring) /* Disable the ring buffer. The ring must be idle at this point */ dev_priv = ring->dev->dev_private; - ret = intel_wait_ring_idle(ring); + ret = intel_ring_idle(ring); if (ret) DRM_ERROR("failed to quiesce %s whilst cleaning up: %d\n", ring->name, ret); @@ -1194,28 +1194,6 @@ void intel_cleanup_ring_buffer(struct intel_ring_buffer *ring) cleanup_status_page(ring); } -static int intel_wrap_ring_buffer(struct intel_ring_buffer *ring) -{ - uint32_t __iomem *virt; - int rem = ring->size - ring->tail; - - if (ring->space < rem) { - int ret = intel_wait_ring_buffer(ring, rem); - if (ret) - return ret; - } - - virt = ring->virtual_start + ring->tail; - rem /= 4; - while (rem--) - iowrite32(MI_NOOP, virt++); - - ring->tail = 0; - ring->space = ring_space(ring); - - return 0; -} - static int intel_ring_wait_seqno(struct intel_ring_buffer *ring, u32 seqno) { int ret; @@ -1284,7 +1262,7 @@ static int intel_ring_wait_request(struct intel_ring_buffer *ring, int n) return 0; } -int intel_wait_ring_buffer(struct intel_ring_buffer *ring, int n) +static int ring_wait_for_space(struct intel_ring_buffer *ring, int n) { struct drm_device *dev = ring->dev; struct drm_i915_private *dev_priv = dev->dev_private; @@ -1327,6 +1305,51 @@ int intel_wait_ring_buffer(struct intel_ring_buffer *ring, int n) return -EBUSY; } +static int intel_wrap_ring_buffer(struct intel_ring_buffer *ring) +{ + uint32_t __iomem *virt; + int rem = ring->size - ring->tail; + + if (ring->space < rem) { + int ret = ring_wait_for_space(ring, rem); + if (ret) + return ret; + } + + virt = ring->virtual_start + ring->tail; + rem /= 4; + while (rem--) + iowrite32(MI_NOOP, virt++); + + ring->tail = 0; + ring->space = ring_space(ring); + + return 0; +} + +int intel_ring_idle(struct intel_ring_buffer *ring) +{ + u32 seqno; + int ret; + + /* We need to add any requests required to flush the objects and ring */ + if (ring->outstanding_lazy_request) { + ret = i915_add_request(ring, NULL, NULL); + if (ret) + return ret; + } + + /* Wait upon the last request to be completed */ + if (list_empty(&ring->request_list)) + return 0; + + seqno = list_entry(ring->request_list.prev, + struct drm_i915_gem_request, + list)->seqno; + + return i915_wait_seqno(ring, seqno); +} + static int intel_ring_alloc_seqno(struct intel_ring_buffer *ring) { @@ -1359,7 +1382,7 @@ int intel_ring_begin(struct intel_ring_buffer *ring, } if (unlikely(ring->space < n)) { - ret = intel_wait_ring_buffer(ring, n); + ret = ring_wait_for_space(ring, n); if (unlikely(ret)) return ret; } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 0e613026d00..d4b7416fa1b 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -187,22 +187,15 @@ intel_read_status_page(struct intel_ring_buffer *ring, void intel_cleanup_ring_buffer(struct intel_ring_buffer *ring); -int __must_check intel_wait_ring_buffer(struct intel_ring_buffer *ring, int n); -static inline int intel_wait_ring_idle(struct intel_ring_buffer *ring) -{ - return intel_wait_ring_buffer(ring, ring->size - 8); -} - int __must_check intel_ring_begin(struct intel_ring_buffer *ring, int n); - static inline void intel_ring_emit(struct intel_ring_buffer *ring, u32 data) { iowrite32(data, ring->virtual_start + ring->tail); ring->tail += 4; } - void intel_ring_advance(struct intel_ring_buffer *ring); +int __must_check intel_ring_idle(struct intel_ring_buffer *ring); int intel_ring_flush_all_caches(struct intel_ring_buffer *ring); int intel_ring_invalidate_all_caches(struct intel_ring_buffer *ring); -- cgit v1.2.3-70-g09d2 From 7b01e260a6cad9152eefb44ce64f3a2073af1e6b Mon Sep 17 00:00:00 2001 From: Mika Kuoppala Date: Wed, 28 Nov 2012 17:18:45 +0200 Subject: drm/i915: Set sync_seqno properly after seqno wrap i915_gem_handle_seqno_wrap() will zero all sync_seqnos but as the wrap can happen inside ring->sync_to(), pre wrap seqno was carried over and overwrote the zeroed sync_seqno. When wrap is handled, all outstanding requests will be retired and objects moved to inactive queue, causing their last_read_seqno to be zero. Use this to update the sync_seqno correctly. RING_SYNC registers after wrap will contain pre wrap values which are >= seqno. So injecting the semaphore wait into ring completes immediately. Original idea for using last_read_seqno from Chris Wilson. Signed-off-by: Mika Kuoppala Reviewed-by: Chris Wilson Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 85a09482c2a..4ef8b571433 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2397,7 +2397,11 @@ i915_gem_object_sync(struct drm_i915_gem_object *obj, ret = to->sync_to(to, from, seqno); if (!ret) - from->sync_seqno[idx] = seqno; + /* We use last_read_seqno because sync_to() + * might have just caused seqno wrap under + * the radar. + */ + from->sync_seqno[idx] = obj->last_read_seqno; return ret; } -- cgit v1.2.3-70-g09d2 From a2165e312381f35b8abd7dcb8a8ab955bc60d867 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 3 Dec 2012 11:49:00 +0000 Subject: drm/i915: Decouple the object from the unbound list before freeing pages As we may actually allocate in order to save the physical swizzling bits during the free, we have to be careful not to trigger the shrinker on the same object. Signed-off-by: Chris Wilson [danvet: Added a small comment in the code to really drive the scariness of this patch home.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 4ef8b571433..c1f691958f8 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1696,10 +1696,14 @@ i915_gem_object_put_pages(struct drm_i915_gem_object *obj) if (obj->pages_pin_count) return -EBUSY; + /* ->put_pages might need to allocate memory for the bit17 swizzle + * array, hence protect them from being reaped by removing them from gtt + * lists early. */ + list_del(&obj->gtt_list); + ops->put_pages(obj); obj->pages = NULL; - list_del(&obj->gtt_list); if (i915_gem_object_is_purgeable(obj)) i915_gem_object_truncate(obj); -- cgit v1.2.3-70-g09d2