diff options
Diffstat (limited to 'drivers/gpu/drm/i915/i915_irq.c')
-rw-r--r-- | drivers/gpu/drm/i915/i915_irq.c | 91 |
1 files changed, 71 insertions, 20 deletions
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index a03b445ceb5..4b91228fd9b 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1027,8 +1027,13 @@ static inline void intel_hpd_irq_handler(struct drm_device *dev, dev_priv->display.hpd_irq_setup(dev); spin_unlock(&dev_priv->irq_lock); - queue_work(dev_priv->wq, - &dev_priv->hotplug_work); + /* + * Our hotplug handler can grab modeset locks (by calling down into the + * fb helpers). Hence it must not be run on our own dev-priv->wq work + * queue for otherwise the flush_work in the pageflip code will + * deadlock. + */ + schedule_work(&dev_priv->hotplug_work); } static void gmbus_irq_handler(struct drm_device *dev) @@ -1464,6 +1469,34 @@ static irqreturn_t ironlake_irq_handler(int irq, void *arg) return ret; } +static void i915_error_wake_up(struct drm_i915_private *dev_priv, + bool reset_completed) +{ + struct intel_ring_buffer *ring; + int i; + + /* + * Notify all waiters for GPU completion events that reset state has + * been changed, and that they need to restart their wait after + * checking for potential errors (and bail out to drop locks if there is + * a gpu reset pending so that i915_error_work_func can acquire them). + */ + + /* Wake up __wait_seqno, potentially holding dev->struct_mutex. */ + for_each_ring(ring, dev_priv, i) + wake_up_all(&ring->irq_queue); + + /* Wake up intel_crtc_wait_for_pending_flips, holding crtc->mutex. */ + wake_up_all(&dev_priv->pending_flip_queue); + + /* + * Signal tasks blocked in i915_gem_wait_for_error that the pending + * reset state is cleared. + */ + if (reset_completed) + wake_up_all(&dev_priv->gpu_error.reset_queue); +} + /** * i915_error_work_func - do process context error handling work * @work: work struct @@ -1478,11 +1511,10 @@ static void i915_error_work_func(struct work_struct *work) drm_i915_private_t *dev_priv = container_of(error, drm_i915_private_t, gpu_error); struct drm_device *dev = dev_priv->dev; - struct intel_ring_buffer *ring; char *error_event[] = { I915_ERROR_UEVENT "=1", NULL }; char *reset_event[] = { I915_RESET_UEVENT "=1", NULL }; char *reset_done_event[] = { I915_ERROR_UEVENT "=0", NULL }; - int i, ret; + int ret; kobject_uevent_env(&dev->primary->kdev.kobj, KOBJ_CHANGE, error_event); @@ -1501,8 +1533,16 @@ static void i915_error_work_func(struct work_struct *work) kobject_uevent_env(&dev->primary->kdev.kobj, KOBJ_CHANGE, reset_event); + /* + * All state reset _must_ be completed before we update the + * reset counter, for otherwise waiters might miss the reset + * pending state and not properly drop locks, resulting in + * deadlocks with the reset work. + */ ret = i915_reset(dev); + intel_display_handle_reset(dev); + if (ret == 0) { /* * After all the gem state is reset, increment the reset @@ -1523,12 +1563,11 @@ static void i915_error_work_func(struct work_struct *work) atomic_set(&error->reset_counter, I915_WEDGED); } - for_each_ring(ring, dev_priv, i) - wake_up_all(&ring->irq_queue); - - intel_display_handle_reset(dev); - - wake_up_all(&dev_priv->gpu_error.reset_queue); + /* + * Note: The wake_up also serves as a memory barrier so that + * waiters see the update value of the reset counter atomic_t. + */ + i915_error_wake_up(dev_priv, true); } } @@ -1637,8 +1676,6 @@ static void i915_report_and_clear_eir(struct drm_device *dev) void i915_handle_error(struct drm_device *dev, bool wedged) { struct drm_i915_private *dev_priv = dev->dev_private; - struct intel_ring_buffer *ring; - int i; i915_capture_error_state(dev); i915_report_and_clear_eir(dev); @@ -1648,14 +1685,28 @@ void i915_handle_error(struct drm_device *dev, bool wedged) &dev_priv->gpu_error.reset_counter); /* - * Wakeup waiting processes so that the reset work item - * doesn't deadlock trying to grab various locks. + * Wakeup waiting processes so that the reset work function + * i915_error_work_func doesn't deadlock trying to grab various + * locks. By bumping the reset counter first, the woken + * processes will see a reset in progress and back off, + * releasing their locks and then wait for the reset completion. + * We must do this for _all_ gpu waiters that might hold locks + * that the reset work needs to acquire. + * + * Note: The wake_up serves as the required memory barrier to + * ensure that the waiters see the updated value of the reset + * counter atomic_t. */ - for_each_ring(ring, dev_priv, i) - wake_up_all(&ring->irq_queue); + i915_error_wake_up(dev_priv, false); } - queue_work(dev_priv->wq, &dev_priv->gpu_error.work); + /* + * Our reset work can grab modeset locks (since it needs to reset the + * state of outstanding pagelips). Hence it must not be run on our own + * dev-priv->wq work queue for otherwise the flush_work in the pageflip + * code will deadlock. + */ + schedule_work(&dev_priv->gpu_error.work); } static void __always_unused i915_pageflip_stall_check(struct drm_device *dev, int pipe) @@ -2027,9 +2078,9 @@ static void i915_hangcheck_elapsed(unsigned long data) for_each_ring(ring, dev_priv, i) { if (ring->hangcheck.score > FIRE) { - DRM_ERROR("%s on %s\n", - stuck[i] ? "stuck" : "no progress", - ring->name); + DRM_INFO("%s on %s\n", + stuck[i] ? "stuck" : "no progress", + ring->name); rings_hung++; } } |