From 1823521d2b2fa614e7ad95fdc8a0f59e571f37ce Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 4 Sep 2013 10:45:51 +0100 Subject: drm/i915: Rename ring->outstanding_lazy_request Prior to preallocating an request for lazy emission, rename the existing field to make way (and differentiate the seqno from the request struct). Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ringbuffer.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.h') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 68b1ca974d5..c6aa2b3c8c2 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -140,7 +140,7 @@ struct intel_ring_buffer { /** * Do we have some not yet emitted requests outstanding? */ - u32 outstanding_lazy_request; + u32 outstanding_lazy_seqno; bool gpu_caches_dirty; bool fbc_dirty; @@ -258,8 +258,8 @@ static inline u32 intel_ring_get_tail(struct intel_ring_buffer *ring) static inline u32 intel_ring_get_seqno(struct intel_ring_buffer *ring) { - BUG_ON(ring->outstanding_lazy_request == 0); - return ring->outstanding_lazy_request; + BUG_ON(ring->outstanding_lazy_seqno == 0); + return ring->outstanding_lazy_seqno; } static inline void i915_trace_irq_get(struct intel_ring_buffer *ring, u32 seqno) -- cgit v1.2.3-70-g09d2 From 3c0e234c847318304c12f9e7fffac7e1cf3db3ff Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 4 Sep 2013 10:45:52 +0100 Subject: drm/i915; Preallocate the lazy request It is possible for us to be forced to perform an allocation for the lazy request whilst running the shrinker. This allocation may fail, leaving us unable to reclaim any memory leading to premature OOM. A neat solution to the problem is to preallocate the request at the same time as acquiring the seqno for the ring transaction. This means that we can report ENOMEM prior to touching the rings. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem.c | 9 ++++----- drivers/gpu/drm/i915/intel_ringbuffer.c | 10 ++++++++++ drivers/gpu/drm/i915/intel_ringbuffer.h | 1 + 3 files changed, 15 insertions(+), 5 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.h') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 858e7888663..399e159016e 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2041,8 +2041,8 @@ int __i915_add_request(struct intel_ring_buffer *ring, if (ret) return ret; - request = kmalloc(sizeof(*request), GFP_KERNEL); - if (request == NULL) + request = ring->preallocated_lazy_request; + if (WARN_ON(request == NULL)) return -ENOMEM; /* Record the position of the start of the request so that @@ -2053,10 +2053,8 @@ int __i915_add_request(struct intel_ring_buffer *ring, request_ring_position = intel_ring_get_tail(ring); ret = ring->add_request(ring); - if (ret) { - kfree(request); + if (ret) return ret; - } request->seqno = intel_ring_get_seqno(ring); request->ring = ring; @@ -2095,6 +2093,7 @@ int __i915_add_request(struct intel_ring_buffer *ring, trace_i915_gem_request_add(ring, request->seqno); ring->outstanding_lazy_seqno = 0; + ring->preallocated_lazy_request = NULL; if (!dev_priv->ums.mm_suspended) { i915_queue_hangcheck(ring->dev); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index a83ff1863a5..284afaf5d6f 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1498,6 +1498,16 @@ intel_ring_alloc_seqno(struct intel_ring_buffer *ring) if (ring->outstanding_lazy_seqno) return 0; + if (ring->preallocated_lazy_request == NULL) { + struct drm_i915_gem_request *request; + + request = kmalloc(sizeof(*request), GFP_KERNEL); + if (request == NULL) + return -ENOMEM; + + ring->preallocated_lazy_request = request; + } + return i915_gem_get_seqno(ring->dev, &ring->outstanding_lazy_seqno); } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index c6aa2b3c8c2..ad2dd65c63f 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -140,6 +140,7 @@ struct intel_ring_buffer { /** * Do we have some not yet emitted requests outstanding? */ + struct drm_i915_gem_request *preallocated_lazy_request; u32 outstanding_lazy_seqno; bool gpu_caches_dirty; bool fbc_dirty; -- cgit v1.2.3-70-g09d2 From da66146425c3136943452988afd3d64cd551da58 Mon Sep 17 00:00:00 2001 From: Mika Kuoppala Date: Fri, 6 Sep 2013 16:03:28 +0300 Subject: drm/i915: include hangcheck action and score in error_state Score and action reveals what all the rings were doing and why hang was declared. Add idle state so that we can distinguish between waiting and idle ring. v2: - add idle as a hangcheck action - consensed hangcheck status to single line (Chris) - mark active explicitly when we are making progress (Chris) Reviewed-by: Chris Wilson Signed-off-by: Mika Kuoppala Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 2 ++ drivers/gpu/drm/i915/i915_gpu_error.c | 24 ++++++++++++++++++++++++ drivers/gpu/drm/i915/i915_irq.c | 5 +++++ drivers/gpu/drm/i915/intel_ringbuffer.h | 1 + 4 files changed, 32 insertions(+) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.h') diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index c5f0abaa9a2..1fb01b5b819 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -328,6 +328,8 @@ struct drm_i915_error_state { u32 *active_bo_count, *pinned_bo_count; struct intel_overlay_error_state *overlay; struct intel_display_error_state *display; + int hangcheck_score[I915_NUM_RINGS]; + enum intel_ring_hangcheck_action hangcheck_action[I915_NUM_RINGS]; }; struct intel_crtc_config; diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index aba9d749899..c38d575dc5a 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -213,6 +213,24 @@ static void print_error_buffers(struct drm_i915_error_state_buf *m, } } +static const char *hangcheck_action_to_str(enum intel_ring_hangcheck_action a) +{ + switch (a) { + case HANGCHECK_IDLE: + return "idle"; + case HANGCHECK_WAIT: + return "wait"; + case HANGCHECK_ACTIVE: + return "active"; + case HANGCHECK_KICK: + return "kick"; + case HANGCHECK_HUNG: + return "hung"; + } + + return "unknown"; +} + static void i915_ring_error_state(struct drm_i915_error_state_buf *m, struct drm_device *dev, struct drm_i915_error_state *error, @@ -253,6 +271,9 @@ static void i915_ring_error_state(struct drm_i915_error_state_buf *m, err_printf(m, " waiting: %s\n", yesno(error->waiting[ring])); err_printf(m, " ring->head: 0x%08x\n", error->cpu_ring_head[ring]); err_printf(m, " ring->tail: 0x%08x\n", error->cpu_ring_tail[ring]); + err_printf(m, " hangcheck: %s [%d]\n", + hangcheck_action_to_str(error->hangcheck_action[ring]), + error->hangcheck_score[ring]); } void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...) @@ -718,6 +739,9 @@ static void i915_record_ring_state(struct drm_device *dev, error->cpu_ring_head[ring->id] = ring->head; error->cpu_ring_tail[ring->id] = ring->tail; + + error->hangcheck_score[ring->id] = ring->hangcheck.score; + error->hangcheck_action[ring->id] = ring->hangcheck.action; } diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 9e48cf27db5..5350ef57ec3 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1975,6 +1975,8 @@ static void i915_hangcheck_elapsed(unsigned long data) if (ring->hangcheck.seqno == seqno) { if (ring_idle(ring, seqno)) { + ring->hangcheck.action = HANGCHECK_IDLE; + if (waitqueue_active(&ring->irq_queue)) { /* Issue a wake-up to catch stuck h/w. */ DRM_ERROR("Hangcheck timer elapsed... %s idle\n", @@ -2003,6 +2005,7 @@ static void i915_hangcheck_elapsed(unsigned long data) acthd); switch (ring->hangcheck.action) { + case HANGCHECK_IDLE: case HANGCHECK_WAIT: break; case HANGCHECK_ACTIVE: @@ -2018,6 +2021,8 @@ static void i915_hangcheck_elapsed(unsigned long data) } } } else { + ring->hangcheck.action = HANGCHECK_ACTIVE; + /* Gradually reduce the count so that we catch DoS * attempts across multiple batches. */ diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index ad2dd65c63f..b5aac570208 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -34,6 +34,7 @@ struct intel_hw_status_page { #define I915_WRITE_IMR(ring, val) I915_WRITE(RING_IMR((ring)->mmio_base), val) enum intel_ring_hangcheck_action { + HANGCHECK_IDLE = 0, HANGCHECK_WAIT, HANGCHECK_ACTIVE, HANGCHECK_KICK, -- cgit v1.2.3-70-g09d2 From 092467327c45edbfce6c2bb71ee842bec16b9a60 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 10 Aug 2013 22:16:32 +0100 Subject: drm/i915: Write RING_TAIL once per-request Ignoring the legacy DRI1 code, and a couple of special cases (to be discussed later), all access to the ring is mediated through requests. The first write to a ring will grab a seqno and mark the ring as having an outstanding_lazy_request. Either through explicitly adding a request after an execbuffer or through an implicit wait (either by the CPU or by a semaphore), that sequence of writes will be terminated with a request. So we can ellide all the intervening writes to the tail register and send the entire command stream to the GPU at once. This will reduce the number of *serialising* writes to the tail register by a factor or 3-5 times (depending upon architecture and number of workarounds, context switches, etc involved). This becomes even more noticeable when the register write is overloaded with a number of debugging tools. The astute reader will wonder if it is then possible to overflow the ring with a single command. It is not. When we start a command sequence to the ring, we check for available space and issue a wait in case we have not. The ring wait will in this case be forced to flush the outstanding register write and then poll the ACTHD for sufficient space to continue. The exception to the rule where everything is inside a request are a few initialisation cases where we may want to write GPU commands via the CS before userspace wakes up and page flips. Signed-off-by: Chris Wilson Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_dma.c | 2 +- drivers/gpu/drm/i915/intel_display.c | 10 +++++----- drivers/gpu/drm/i915/intel_ringbuffer.c | 30 ++++++++++++++++-------------- drivers/gpu/drm/i915/intel_ringbuffer.h | 7 ++++++- 4 files changed, 28 insertions(+), 21 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.h') diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 3de60503378..f4f98957ce4 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -52,7 +52,7 @@ intel_ring_emit(LP_RING(dev_priv), x) #define ADVANCE_LP_RING() \ - intel_ring_advance(LP_RING(dev_priv)) + __intel_ring_advance(LP_RING(dev_priv)) /** * Lock test for when it's just for synchronization of ring access. diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 1f207e46dbe..378174fffbc 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -7698,7 +7698,7 @@ static int intel_gen2_queue_flip(struct drm_device *dev, intel_ring_emit(ring, 0); /* aux display base address, unused */ intel_mark_page_flip_active(intel_crtc); - intel_ring_advance(ring); + __intel_ring_advance(ring); return 0; err_unpin: @@ -7740,7 +7740,7 @@ static int intel_gen3_queue_flip(struct drm_device *dev, intel_ring_emit(ring, MI_NOOP); intel_mark_page_flip_active(intel_crtc); - intel_ring_advance(ring); + __intel_ring_advance(ring); return 0; err_unpin: @@ -7789,7 +7789,7 @@ static int intel_gen4_queue_flip(struct drm_device *dev, intel_ring_emit(ring, pf | pipesrc); intel_mark_page_flip_active(intel_crtc); - intel_ring_advance(ring); + __intel_ring_advance(ring); return 0; err_unpin: @@ -7834,7 +7834,7 @@ static int intel_gen6_queue_flip(struct drm_device *dev, intel_ring_emit(ring, pf | pipesrc); intel_mark_page_flip_active(intel_crtc); - intel_ring_advance(ring); + __intel_ring_advance(ring); return 0; err_unpin: @@ -7913,7 +7913,7 @@ static int intel_gen7_queue_flip(struct drm_device *dev, intel_ring_emit(ring, (MI_NOOP)); intel_mark_page_flip_active(intel_crtc); - intel_ring_advance(ring); + __intel_ring_advance(ring); return 0; err_unpin: diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 284afaf5d6f..686e5b23481 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -41,6 +41,16 @@ static inline int ring_space(struct intel_ring_buffer *ring) return space; } +void __intel_ring_advance(struct intel_ring_buffer *ring) +{ + struct drm_i915_private *dev_priv = ring->dev->dev_private; + + ring->tail &= ring->size - 1; + if (dev_priv->gpu_error.stop_rings & intel_ring_flag(ring)) + return; + ring->write_tail(ring, ring->tail); +} + static int gen2_render_ring_flush(struct intel_ring_buffer *ring, u32 invalidate_domains, @@ -631,7 +641,7 @@ gen6_add_request(struct intel_ring_buffer *ring) intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT); intel_ring_emit(ring, ring->outstanding_lazy_seqno); intel_ring_emit(ring, MI_USER_INTERRUPT); - intel_ring_advance(ring); + __intel_ring_advance(ring); return 0; } @@ -744,7 +754,7 @@ pc_render_add_request(struct intel_ring_buffer *ring) intel_ring_emit(ring, ring->scratch.gtt_offset | PIPE_CONTROL_GLOBAL_GTT); intel_ring_emit(ring, ring->outstanding_lazy_seqno); intel_ring_emit(ring, 0); - intel_ring_advance(ring); + __intel_ring_advance(ring); return 0; } @@ -965,7 +975,7 @@ i9xx_add_request(struct intel_ring_buffer *ring) intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT); intel_ring_emit(ring, ring->outstanding_lazy_seqno); intel_ring_emit(ring, MI_USER_INTERRUPT); - intel_ring_advance(ring); + __intel_ring_advance(ring); return 0; } @@ -1414,6 +1424,9 @@ static int ring_wait_for_space(struct intel_ring_buffer *ring, int n) if (ret != -ENOSPC) return ret; + /* force the tail write in case we have been skipping them */ + __intel_ring_advance(ring); + trace_i915_ring_wait_begin(ring); /* With GEM the hangcheck timer should kick us out of the loop, * leaving it early runs the risk of corrupting GEM state (due @@ -1568,17 +1581,6 @@ void intel_ring_init_seqno(struct intel_ring_buffer *ring, u32 seqno) ring->hangcheck.seqno = seqno; } -void intel_ring_advance(struct intel_ring_buffer *ring) -{ - struct drm_i915_private *dev_priv = ring->dev->dev_private; - - ring->tail &= ring->size - 1; - if (dev_priv->gpu_error.stop_rings & intel_ring_flag(ring)) - return; - ring->write_tail(ring, ring->tail); -} - - static void gen6_bsd_ring_write_tail(struct intel_ring_buffer *ring, u32 value) { diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index b5aac570208..71a73f4fe25 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -239,7 +239,12 @@ static inline void intel_ring_emit(struct intel_ring_buffer *ring, iowrite32(data, ring->virtual_start + ring->tail); ring->tail += 4; } -void intel_ring_advance(struct intel_ring_buffer *ring); +static inline void intel_ring_advance(struct intel_ring_buffer *ring) +{ + ring->tail &= ring->size - 1; +} +void __intel_ring_advance(struct intel_ring_buffer *ring); + int __must_check intel_ring_idle(struct intel_ring_buffer *ring); void intel_ring_init_seqno(struct intel_ring_buffer *ring, u32 seqno); int intel_ring_flush_all_caches(struct intel_ring_buffer *ring); -- cgit v1.2.3-70-g09d2