From 1823521d2b2fa614e7ad95fdc8a0f59e571f37ce Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 4 Sep 2013 10:45:51 +0100 Subject: drm/i915: Rename ring->outstanding_lazy_request Prior to preallocating an request for lazy emission, rename the existing field to make way (and differentiate the seqno from the request struct). Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ringbuffer.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 460ee1026fc..a83ff1863a5 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -593,7 +593,7 @@ update_mboxes(struct intel_ring_buffer *ring, #define MBOX_UPDATE_DWORDS 4 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); intel_ring_emit(ring, mmio_offset); - intel_ring_emit(ring, ring->outstanding_lazy_request); + intel_ring_emit(ring, ring->outstanding_lazy_seqno); intel_ring_emit(ring, MI_NOOP); } @@ -629,7 +629,7 @@ gen6_add_request(struct intel_ring_buffer *ring) intel_ring_emit(ring, MI_STORE_DWORD_INDEX); intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT); - intel_ring_emit(ring, ring->outstanding_lazy_request); + intel_ring_emit(ring, ring->outstanding_lazy_seqno); intel_ring_emit(ring, MI_USER_INTERRUPT); intel_ring_advance(ring); @@ -723,7 +723,7 @@ pc_render_add_request(struct intel_ring_buffer *ring) PIPE_CONTROL_WRITE_FLUSH | PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE); intel_ring_emit(ring, ring->scratch.gtt_offset | PIPE_CONTROL_GLOBAL_GTT); - intel_ring_emit(ring, ring->outstanding_lazy_request); + intel_ring_emit(ring, ring->outstanding_lazy_seqno); intel_ring_emit(ring, 0); PIPE_CONTROL_FLUSH(ring, scratch_addr); scratch_addr += 128; /* write to separate cachelines */ @@ -742,7 +742,7 @@ pc_render_add_request(struct intel_ring_buffer *ring) PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | PIPE_CONTROL_NOTIFY); intel_ring_emit(ring, ring->scratch.gtt_offset | PIPE_CONTROL_GLOBAL_GTT); - intel_ring_emit(ring, ring->outstanding_lazy_request); + intel_ring_emit(ring, ring->outstanding_lazy_seqno); intel_ring_emit(ring, 0); intel_ring_advance(ring); @@ -963,7 +963,7 @@ i9xx_add_request(struct intel_ring_buffer *ring) intel_ring_emit(ring, MI_STORE_DWORD_INDEX); intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT); - intel_ring_emit(ring, ring->outstanding_lazy_request); + intel_ring_emit(ring, ring->outstanding_lazy_seqno); intel_ring_emit(ring, MI_USER_INTERRUPT); intel_ring_advance(ring); @@ -1475,7 +1475,7 @@ int intel_ring_idle(struct intel_ring_buffer *ring) int ret; /* We need to add any requests required to flush the objects and ring */ - if (ring->outstanding_lazy_request) { + if (ring->outstanding_lazy_seqno) { ret = i915_add_request(ring, NULL); if (ret) return ret; @@ -1495,10 +1495,10 @@ int intel_ring_idle(struct intel_ring_buffer *ring) static int intel_ring_alloc_seqno(struct intel_ring_buffer *ring) { - if (ring->outstanding_lazy_request) + if (ring->outstanding_lazy_seqno) return 0; - return i915_gem_get_seqno(ring->dev, &ring->outstanding_lazy_request); + return i915_gem_get_seqno(ring->dev, &ring->outstanding_lazy_seqno); } static int __intel_ring_begin(struct intel_ring_buffer *ring, @@ -1545,7 +1545,7 @@ void intel_ring_init_seqno(struct intel_ring_buffer *ring, u32 seqno) { struct drm_i915_private *dev_priv = ring->dev->dev_private; - BUG_ON(ring->outstanding_lazy_request); + BUG_ON(ring->outstanding_lazy_seqno); if (INTEL_INFO(ring->dev)->gen >= 6) { I915_WRITE(RING_SYNC_0(ring->mmio_base), 0); -- cgit v1.2.3-70-g09d2 From 3c0e234c847318304c12f9e7fffac7e1cf3db3ff Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 4 Sep 2013 10:45:52 +0100 Subject: drm/i915; Preallocate the lazy request It is possible for us to be forced to perform an allocation for the lazy request whilst running the shrinker. This allocation may fail, leaving us unable to reclaim any memory leading to premature OOM. A neat solution to the problem is to preallocate the request at the same time as acquiring the seqno for the ring transaction. This means that we can report ENOMEM prior to touching the rings. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem.c | 9 ++++----- drivers/gpu/drm/i915/intel_ringbuffer.c | 10 ++++++++++ drivers/gpu/drm/i915/intel_ringbuffer.h | 1 + 3 files changed, 15 insertions(+), 5 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 858e7888663..399e159016e 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2041,8 +2041,8 @@ int __i915_add_request(struct intel_ring_buffer *ring, if (ret) return ret; - request = kmalloc(sizeof(*request), GFP_KERNEL); - if (request == NULL) + request = ring->preallocated_lazy_request; + if (WARN_ON(request == NULL)) return -ENOMEM; /* Record the position of the start of the request so that @@ -2053,10 +2053,8 @@ int __i915_add_request(struct intel_ring_buffer *ring, request_ring_position = intel_ring_get_tail(ring); ret = ring->add_request(ring); - if (ret) { - kfree(request); + if (ret) return ret; - } request->seqno = intel_ring_get_seqno(ring); request->ring = ring; @@ -2095,6 +2093,7 @@ int __i915_add_request(struct intel_ring_buffer *ring, trace_i915_gem_request_add(ring, request->seqno); ring->outstanding_lazy_seqno = 0; + ring->preallocated_lazy_request = NULL; if (!dev_priv->ums.mm_suspended) { i915_queue_hangcheck(ring->dev); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index a83ff1863a5..284afaf5d6f 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1498,6 +1498,16 @@ intel_ring_alloc_seqno(struct intel_ring_buffer *ring) if (ring->outstanding_lazy_seqno) return 0; + if (ring->preallocated_lazy_request == NULL) { + struct drm_i915_gem_request *request; + + request = kmalloc(sizeof(*request), GFP_KERNEL); + if (request == NULL) + return -ENOMEM; + + ring->preallocated_lazy_request = request; + } + return i915_gem_get_seqno(ring->dev, &ring->outstanding_lazy_seqno); } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index c6aa2b3c8c2..ad2dd65c63f 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -140,6 +140,7 @@ struct intel_ring_buffer { /** * Do we have some not yet emitted requests outstanding? */ + struct drm_i915_gem_request *preallocated_lazy_request; u32 outstanding_lazy_seqno; bool gpu_caches_dirty; bool fbc_dirty; -- cgit v1.2.3-70-g09d2 From 092467327c45edbfce6c2bb71ee842bec16b9a60 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 10 Aug 2013 22:16:32 +0100 Subject: drm/i915: Write RING_TAIL once per-request Ignoring the legacy DRI1 code, and a couple of special cases (to be discussed later), all access to the ring is mediated through requests. The first write to a ring will grab a seqno and mark the ring as having an outstanding_lazy_request. Either through explicitly adding a request after an execbuffer or through an implicit wait (either by the CPU or by a semaphore), that sequence of writes will be terminated with a request. So we can ellide all the intervening writes to the tail register and send the entire command stream to the GPU at once. This will reduce the number of *serialising* writes to the tail register by a factor or 3-5 times (depending upon architecture and number of workarounds, context switches, etc involved). This becomes even more noticeable when the register write is overloaded with a number of debugging tools. The astute reader will wonder if it is then possible to overflow the ring with a single command. It is not. When we start a command sequence to the ring, we check for available space and issue a wait in case we have not. The ring wait will in this case be forced to flush the outstanding register write and then poll the ACTHD for sufficient space to continue. The exception to the rule where everything is inside a request are a few initialisation cases where we may want to write GPU commands via the CS before userspace wakes up and page flips. Signed-off-by: Chris Wilson Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_dma.c | 2 +- drivers/gpu/drm/i915/intel_display.c | 10 +++++----- drivers/gpu/drm/i915/intel_ringbuffer.c | 30 ++++++++++++++++-------------- drivers/gpu/drm/i915/intel_ringbuffer.h | 7 ++++++- 4 files changed, 28 insertions(+), 21 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 3de60503378..f4f98957ce4 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -52,7 +52,7 @@ intel_ring_emit(LP_RING(dev_priv), x) #define ADVANCE_LP_RING() \ - intel_ring_advance(LP_RING(dev_priv)) + __intel_ring_advance(LP_RING(dev_priv)) /** * Lock test for when it's just for synchronization of ring access. diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 1f207e46dbe..378174fffbc 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -7698,7 +7698,7 @@ static int intel_gen2_queue_flip(struct drm_device *dev, intel_ring_emit(ring, 0); /* aux display base address, unused */ intel_mark_page_flip_active(intel_crtc); - intel_ring_advance(ring); + __intel_ring_advance(ring); return 0; err_unpin: @@ -7740,7 +7740,7 @@ static int intel_gen3_queue_flip(struct drm_device *dev, intel_ring_emit(ring, MI_NOOP); intel_mark_page_flip_active(intel_crtc); - intel_ring_advance(ring); + __intel_ring_advance(ring); return 0; err_unpin: @@ -7789,7 +7789,7 @@ static int intel_gen4_queue_flip(struct drm_device *dev, intel_ring_emit(ring, pf | pipesrc); intel_mark_page_flip_active(intel_crtc); - intel_ring_advance(ring); + __intel_ring_advance(ring); return 0; err_unpin: @@ -7834,7 +7834,7 @@ static int intel_gen6_queue_flip(struct drm_device *dev, intel_ring_emit(ring, pf | pipesrc); intel_mark_page_flip_active(intel_crtc); - intel_ring_advance(ring); + __intel_ring_advance(ring); return 0; err_unpin: @@ -7913,7 +7913,7 @@ static int intel_gen7_queue_flip(struct drm_device *dev, intel_ring_emit(ring, (MI_NOOP)); intel_mark_page_flip_active(intel_crtc); - intel_ring_advance(ring); + __intel_ring_advance(ring); return 0; err_unpin: diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 284afaf5d6f..686e5b23481 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -41,6 +41,16 @@ static inline int ring_space(struct intel_ring_buffer *ring) return space; } +void __intel_ring_advance(struct intel_ring_buffer *ring) +{ + struct drm_i915_private *dev_priv = ring->dev->dev_private; + + ring->tail &= ring->size - 1; + if (dev_priv->gpu_error.stop_rings & intel_ring_flag(ring)) + return; + ring->write_tail(ring, ring->tail); +} + static int gen2_render_ring_flush(struct intel_ring_buffer *ring, u32 invalidate_domains, @@ -631,7 +641,7 @@ gen6_add_request(struct intel_ring_buffer *ring) intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT); intel_ring_emit(ring, ring->outstanding_lazy_seqno); intel_ring_emit(ring, MI_USER_INTERRUPT); - intel_ring_advance(ring); + __intel_ring_advance(ring); return 0; } @@ -744,7 +754,7 @@ pc_render_add_request(struct intel_ring_buffer *ring) intel_ring_emit(ring, ring->scratch.gtt_offset | PIPE_CONTROL_GLOBAL_GTT); intel_ring_emit(ring, ring->outstanding_lazy_seqno); intel_ring_emit(ring, 0); - intel_ring_advance(ring); + __intel_ring_advance(ring); return 0; } @@ -965,7 +975,7 @@ i9xx_add_request(struct intel_ring_buffer *ring) intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT); intel_ring_emit(ring, ring->outstanding_lazy_seqno); intel_ring_emit(ring, MI_USER_INTERRUPT); - intel_ring_advance(ring); + __intel_ring_advance(ring); return 0; } @@ -1414,6 +1424,9 @@ static int ring_wait_for_space(struct intel_ring_buffer *ring, int n) if (ret != -ENOSPC) return ret; + /* force the tail write in case we have been skipping them */ + __intel_ring_advance(ring); + trace_i915_ring_wait_begin(ring); /* With GEM the hangcheck timer should kick us out of the loop, * leaving it early runs the risk of corrupting GEM state (due @@ -1568,17 +1581,6 @@ void intel_ring_init_seqno(struct intel_ring_buffer *ring, u32 seqno) ring->hangcheck.seqno = seqno; } -void intel_ring_advance(struct intel_ring_buffer *ring) -{ - struct drm_i915_private *dev_priv = ring->dev->dev_private; - - ring->tail &= ring->size - 1; - if (dev_priv->gpu_error.stop_rings & intel_ring_flag(ring)) - return; - ring->write_tail(ring, ring->tail); -} - - static void gen6_bsd_ring_write_tail(struct intel_ring_buffer *ring, u32 value) { diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index b5aac570208..71a73f4fe25 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -239,7 +239,12 @@ static inline void intel_ring_emit(struct intel_ring_buffer *ring, iowrite32(data, ring->virtual_start + ring->tail); ring->tail += 4; } -void intel_ring_advance(struct intel_ring_buffer *ring); +static inline void intel_ring_advance(struct intel_ring_buffer *ring) +{ + ring->tail &= ring->size - 1; +} +void __intel_ring_advance(struct intel_ring_buffer *ring); + int __must_check intel_ring_idle(struct intel_ring_buffer *ring); void intel_ring_init_seqno(struct intel_ring_buffer *ring, u32 seqno); int intel_ring_flush_all_caches(struct intel_ring_buffer *ring); -- cgit v1.2.3-70-g09d2 From 35a85ac60618521d41cfdb14f3fbfc8ad7329e9e Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Thu, 19 Sep 2013 11:13:41 -0700 Subject: drm/i915: Add second slice l3 remapping MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Certain HSW SKUs have a second bank of L3. This L3 remapping has a separate register set, and interrupt from the first "slice". A slice is simply a term to define some subset of the GPU's l3 cache. This patch implements both the interrupt handler, and ability to communicate with userspace about this second slice. v2: Remove redundant check about non-existent slice. Change warning about interrupts of unknown slices to WARN_ON_ONCE Handle the case where we get 2 slice interrupts concurrently, and switch the tracking of interrupts to be non-destructive (all Ville) Don't enable/mask the second slice parity interrupt for ivb/vlv (even though all docs I can find claim it's rsvd) (Ville + Bryan) Keep BYT excluded from L3 parity v3: Fix the slice = ffs to be decremented by one (found by Ville). When I initially did my testing on the series, I was using 1-based slice counting, so this code was correct. Not sure why my simpler tests that I've been running since then didn't pick it up sooner. Reviewed-by: Ville Syrjälä Signed-off-by: Ben Widawsky Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 7 ++- drivers/gpu/drm/i915/i915_gem.c | 26 +++++----- drivers/gpu/drm/i915/i915_irq.c | 89 +++++++++++++++++++++------------ drivers/gpu/drm/i915/i915_reg.h | 7 +++ drivers/gpu/drm/i915/i915_sysfs.c | 34 ++++++++++--- drivers/gpu/drm/i915/intel_ringbuffer.c | 7 ++- include/uapi/drm/i915_drm.h | 8 +-- 7 files changed, 117 insertions(+), 61 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 8b16d47280f..c6e8df73756 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -917,9 +917,11 @@ struct i915_ums_state { int mm_suspended; }; +#define MAX_L3_SLICES 2 struct intel_l3_parity { - u32 *remap_info; + u32 *remap_info[MAX_L3_SLICES]; struct work_struct error_work; + int which_slice; }; struct i915_gem_mm { @@ -1686,6 +1688,7 @@ struct drm_i915_file_private { #define HAS_FORCE_WAKE(dev) (INTEL_INFO(dev)->has_force_wake) #define HAS_L3_GPU_CACHE(dev) (IS_IVYBRIDGE(dev) || IS_HASWELL(dev)) +#define NUM_L3_SLICES(dev) (IS_HSW_GT3(dev) ? 2 : HAS_L3_GPU_CACHE(dev)) #define GT_FREQUENCY_MULTIPLIER 50 @@ -1946,7 +1949,7 @@ bool i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force); int __must_check i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj); int __must_check i915_gem_init(struct drm_device *dev); int __must_check i915_gem_init_hw(struct drm_device *dev); -void i915_gem_l3_remap(struct drm_device *dev); +void i915_gem_l3_remap(struct drm_device *dev, int slice); void i915_gem_init_swizzling(struct drm_device *dev); void i915_gem_cleanup_ringbuffer(struct drm_device *dev); int __must_check i915_gpu_idle(struct drm_device *dev); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index d00d24f7a97..21a3d69679e 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4222,16 +4222,15 @@ i915_gem_idle(struct drm_device *dev) return 0; } -void i915_gem_l3_remap(struct drm_device *dev) +void i915_gem_l3_remap(struct drm_device *dev, int slice) { drm_i915_private_t *dev_priv = dev->dev_private; + u32 reg_base = GEN7_L3LOG_BASE + (slice * 0x200); + u32 *remap_info = dev_priv->l3_parity.remap_info[slice]; u32 misccpctl; int i; - if (!HAS_L3_GPU_CACHE(dev)) - return; - - if (!dev_priv->l3_parity.remap_info) + if (!HAS_L3_GPU_CACHE(dev) || !remap_info) return; misccpctl = I915_READ(GEN7_MISCCPCTL); @@ -4239,17 +4238,17 @@ void i915_gem_l3_remap(struct drm_device *dev) POSTING_READ(GEN7_MISCCPCTL); for (i = 0; i < GEN7_L3LOG_SIZE; i += 4) { - u32 remap = I915_READ(GEN7_L3LOG_BASE + i); - if (remap && remap != dev_priv->l3_parity.remap_info[i/4]) + u32 remap = I915_READ(reg_base + i); + if (remap && remap != remap_info[i/4]) DRM_DEBUG("0x%x was already programmed to %x\n", - GEN7_L3LOG_BASE + i, remap); - if (remap && !dev_priv->l3_parity.remap_info[i/4]) + reg_base + i, remap); + if (remap && !remap_info[i/4]) DRM_DEBUG_DRIVER("Clearing remapped register\n"); - I915_WRITE(GEN7_L3LOG_BASE + i, dev_priv->l3_parity.remap_info[i/4]); + I915_WRITE(reg_base + i, remap_info[i/4]); } /* Make sure all the writes land before disabling dop clock gating */ - POSTING_READ(GEN7_L3LOG_BASE); + POSTING_READ(reg_base); I915_WRITE(GEN7_MISCCPCTL, misccpctl); } @@ -4343,7 +4342,7 @@ int i915_gem_init_hw(struct drm_device *dev) { drm_i915_private_t *dev_priv = dev->dev_private; - int ret; + int ret, i; if (INTEL_INFO(dev)->gen < 6 && !intel_enable_gtt()) return -EIO; @@ -4362,7 +4361,8 @@ i915_gem_init_hw(struct drm_device *dev) I915_WRITE(GEN7_MSG_CTL, temp); } - i915_gem_l3_remap(dev); + for (i = 0; i < NUM_L3_SLICES(dev); i++) + i915_gem_l3_remap(dev, i); i915_gem_init_swizzling(dev); diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index a610f5abcc4..60a7bac4fc3 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -888,9 +888,10 @@ static void ivybridge_parity_work(struct work_struct *work) drm_i915_private_t *dev_priv = container_of(work, drm_i915_private_t, l3_parity.error_work); u32 error_status, row, bank, subbank; - char *parity_event[5]; + char *parity_event[6]; uint32_t misccpctl; unsigned long flags; + uint8_t slice = 0; /* We must turn off DOP level clock gating to access the L3 registers. * In order to prevent a get/put style interface, acquire struct mutex @@ -898,45 +899,64 @@ static void ivybridge_parity_work(struct work_struct *work) */ mutex_lock(&dev_priv->dev->struct_mutex); + /* If we've screwed up tracking, just let the interrupt fire again */ + if (WARN_ON(!dev_priv->l3_parity.which_slice)) + goto out; + misccpctl = I915_READ(GEN7_MISCCPCTL); I915_WRITE(GEN7_MISCCPCTL, misccpctl & ~GEN7_DOP_CLOCK_GATE_ENABLE); POSTING_READ(GEN7_MISCCPCTL); - error_status = I915_READ(GEN7_L3CDERRST1); - row = GEN7_PARITY_ERROR_ROW(error_status); - bank = GEN7_PARITY_ERROR_BANK(error_status); - subbank = GEN7_PARITY_ERROR_SUBBANK(error_status); + while ((slice = ffs(dev_priv->l3_parity.which_slice)) != 0) { + u32 reg; - I915_WRITE(GEN7_L3CDERRST1, GEN7_PARITY_ERROR_VALID | - GEN7_L3CDERRST1_ENABLE); - POSTING_READ(GEN7_L3CDERRST1); + slice--; + if (WARN_ON_ONCE(slice >= NUM_L3_SLICES(dev_priv->dev))) + break; - I915_WRITE(GEN7_MISCCPCTL, misccpctl); + dev_priv->l3_parity.which_slice &= ~(1<irq_lock, flags); - ilk_enable_gt_irq(dev_priv, GT_RENDER_L3_PARITY_ERROR_INTERRUPT); - spin_unlock_irqrestore(&dev_priv->irq_lock, flags); + reg = GEN7_L3CDERRST1 + (slice * 0x200); - mutex_unlock(&dev_priv->dev->struct_mutex); + error_status = I915_READ(reg); + row = GEN7_PARITY_ERROR_ROW(error_status); + bank = GEN7_PARITY_ERROR_BANK(error_status); + subbank = GEN7_PARITY_ERROR_SUBBANK(error_status); + + I915_WRITE(reg, GEN7_PARITY_ERROR_VALID | GEN7_L3CDERRST1_ENABLE); + POSTING_READ(reg); + + parity_event[0] = I915_L3_PARITY_UEVENT "=1"; + parity_event[1] = kasprintf(GFP_KERNEL, "ROW=%d", row); + parity_event[2] = kasprintf(GFP_KERNEL, "BANK=%d", bank); + parity_event[3] = kasprintf(GFP_KERNEL, "SUBBANK=%d", subbank); + parity_event[4] = kasprintf(GFP_KERNEL, "SLICE=%d", slice); + parity_event[5] = NULL; + + kobject_uevent_env(&dev_priv->dev->primary->kdev.kobj, + KOBJ_CHANGE, parity_event); - parity_event[0] = I915_L3_PARITY_UEVENT "=1"; - parity_event[1] = kasprintf(GFP_KERNEL, "ROW=%d", row); - parity_event[2] = kasprintf(GFP_KERNEL, "BANK=%d", bank); - parity_event[3] = kasprintf(GFP_KERNEL, "SUBBANK=%d", subbank); - parity_event[4] = NULL; + DRM_DEBUG("Parity error: Slice = %d, Row = %d, Bank = %d, Sub bank = %d.\n", + slice, row, bank, subbank); - kobject_uevent_env(&dev_priv->dev->primary->kdev.kobj, - KOBJ_CHANGE, parity_event); + kfree(parity_event[4]); + kfree(parity_event[3]); + kfree(parity_event[2]); + kfree(parity_event[1]); + } - DRM_DEBUG("Parity error: Row = %d, Bank = %d, Sub bank = %d.\n", - row, bank, subbank); + I915_WRITE(GEN7_MISCCPCTL, misccpctl); - kfree(parity_event[3]); - kfree(parity_event[2]); - kfree(parity_event[1]); +out: + WARN_ON(dev_priv->l3_parity.which_slice); + spin_lock_irqsave(&dev_priv->irq_lock, flags); + ilk_enable_gt_irq(dev_priv, GT_PARITY_ERROR(dev_priv->dev)); + spin_unlock_irqrestore(&dev_priv->irq_lock, flags); + + mutex_unlock(&dev_priv->dev->struct_mutex); } -static void ivybridge_parity_error_irq_handler(struct drm_device *dev) +static void ivybridge_parity_error_irq_handler(struct drm_device *dev, u32 iir) { drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private; @@ -944,9 +964,16 @@ static void ivybridge_parity_error_irq_handler(struct drm_device *dev) return; spin_lock(&dev_priv->irq_lock); - ilk_disable_gt_irq(dev_priv, GT_RENDER_L3_PARITY_ERROR_INTERRUPT); + ilk_disable_gt_irq(dev_priv, GT_PARITY_ERROR(dev)); spin_unlock(&dev_priv->irq_lock); + iir &= GT_PARITY_ERROR(dev); + if (iir & GT_RENDER_L3_PARITY_ERROR_INTERRUPT_S1) + dev_priv->l3_parity.which_slice |= 1 << 1; + + if (iir & GT_RENDER_L3_PARITY_ERROR_INTERRUPT) + dev_priv->l3_parity.which_slice |= 1 << 0; + queue_work(dev_priv->wq, &dev_priv->l3_parity.error_work); } @@ -981,8 +1008,8 @@ static void snb_gt_irq_handler(struct drm_device *dev, i915_handle_error(dev, false); } - if (gt_iir & GT_RENDER_L3_PARITY_ERROR_INTERRUPT) - ivybridge_parity_error_irq_handler(dev); + if (gt_iir & GT_PARITY_ERROR(dev)) + ivybridge_parity_error_irq_handler(dev, gt_iir); } #define HPD_STORM_DETECT_PERIOD 1000 @@ -2221,8 +2248,8 @@ static void gen5_gt_irq_postinstall(struct drm_device *dev) dev_priv->gt_irq_mask = ~0; if (HAS_L3_GPU_CACHE(dev)) { /* L3 parity interrupt is always unmasked. */ - dev_priv->gt_irq_mask = ~GT_RENDER_L3_PARITY_ERROR_INTERRUPT; - gt_irqs |= GT_RENDER_L3_PARITY_ERROR_INTERRUPT; + dev_priv->gt_irq_mask = ~GT_PARITY_ERROR(dev); + gt_irqs |= GT_PARITY_ERROR(dev); } gt_irqs |= GT_RENDER_USER_INTERRUPT; diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index af6f93ca729..c4f9bef6d07 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -927,6 +927,7 @@ #define GT_BLT_USER_INTERRUPT (1 << 22) #define GT_BSD_CS_ERROR_INTERRUPT (1 << 15) #define GT_BSD_USER_INTERRUPT (1 << 12) +#define GT_RENDER_L3_PARITY_ERROR_INTERRUPT_S1 (1 << 11) /* hsw+; rsvd on snb, ivb, vlv */ #define GT_RENDER_L3_PARITY_ERROR_INTERRUPT (1 << 5) /* !snb */ #define GT_RENDER_PIPECTL_NOTIFY_INTERRUPT (1 << 4) #define GT_RENDER_CS_MASTER_ERROR_INTERRUPT (1 << 3) @@ -937,6 +938,10 @@ #define PM_VEBOX_CS_ERROR_INTERRUPT (1 << 12) /* hsw+ */ #define PM_VEBOX_USER_INTERRUPT (1 << 10) /* hsw+ */ +#define GT_PARITY_ERROR(dev) \ + (GT_RENDER_L3_PARITY_ERROR_INTERRUPT | \ + IS_HASWELL(dev) ? GT_RENDER_L3_PARITY_ERROR_INTERRUPT_S1 : 0) + /* These are all the "old" interrupts */ #define ILK_BSD_USER_INTERRUPT (1<<5) #define I915_PIPE_CONTROL_NOTIFY_INTERRUPT (1<<18) @@ -4747,6 +4752,7 @@ /* IVYBRIDGE DPF */ #define GEN7_L3CDERRST1 0xB008 /* L3CD Error Status 1 */ +#define HSW_L3CDERRST11 0xB208 /* L3CD Error Status register 1 slice 1 */ #define GEN7_L3CDERRST1_ROW_MASK (0x7ff<<14) #define GEN7_PARITY_ERROR_VALID (1<<13) #define GEN7_L3CDERRST1_BANK_MASK (3<<11) @@ -4760,6 +4766,7 @@ #define GEN7_L3CDERRST1_ENABLE (1<<7) #define GEN7_L3LOG_BASE 0xB070 +#define HSW_L3LOG_BASE_SLICE1 0xB270 #define GEN7_L3LOG_SIZE 0x80 #define GEN7_HALF_SLICE_CHICKEN1 0xe100 /* IVB GT1 + VLV */ diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c index 71f6de24444..3a8bf0c9b5c 100644 --- a/drivers/gpu/drm/i915/i915_sysfs.c +++ b/drivers/gpu/drm/i915/i915_sysfs.c @@ -119,6 +119,7 @@ i915_l3_read(struct file *filp, struct kobject *kobj, struct drm_device *drm_dev = dminor->dev; struct drm_i915_private *dev_priv = drm_dev->dev_private; uint32_t misccpctl; + int slice = (int)(uintptr_t)attr->private; int i, ret; count = round_down(count, 4); @@ -134,9 +135,9 @@ i915_l3_read(struct file *filp, struct kobject *kobj, return ret; if (IS_HASWELL(drm_dev)) { - if (dev_priv->l3_parity.remap_info) + if (dev_priv->l3_parity.remap_info[slice]) memcpy(buf, - dev_priv->l3_parity.remap_info + (offset/4), + dev_priv->l3_parity.remap_info[slice] + (offset/4), count); else memset(buf, 0, count); @@ -168,6 +169,7 @@ i915_l3_write(struct file *filp, struct kobject *kobj, struct drm_device *drm_dev = dminor->dev; struct drm_i915_private *dev_priv = drm_dev->dev_private; u32 *temp = NULL; /* Just here to make handling failures easy */ + int slice = (int)(uintptr_t)attr->private; int ret; ret = l3_access_valid(drm_dev, offset); @@ -178,7 +180,7 @@ i915_l3_write(struct file *filp, struct kobject *kobj, if (ret) return ret; - if (!dev_priv->l3_parity.remap_info) { + if (!dev_priv->l3_parity.remap_info[slice]) { temp = kzalloc(GEN7_L3LOG_SIZE, GFP_KERNEL); if (!temp) { mutex_unlock(&drm_dev->struct_mutex); @@ -198,11 +200,11 @@ i915_l3_write(struct file *filp, struct kobject *kobj, * at this point it is left as a TODO. */ if (temp) - dev_priv->l3_parity.remap_info = temp; + dev_priv->l3_parity.remap_info[slice] = temp; - memcpy(dev_priv->l3_parity.remap_info + (offset/4), buf, count); + memcpy(dev_priv->l3_parity.remap_info[slice] + (offset/4), buf, count); - i915_gem_l3_remap(drm_dev); + i915_gem_l3_remap(drm_dev, slice); mutex_unlock(&drm_dev->struct_mutex); @@ -214,7 +216,17 @@ static struct bin_attribute dpf_attrs = { .size = GEN7_L3LOG_SIZE, .read = i915_l3_read, .write = i915_l3_write, - .mmap = NULL + .mmap = NULL, + .private = (void *)0 +}; + +static struct bin_attribute dpf_attrs_1 = { + .attr = {.name = "l3_parity_slice_1", .mode = (S_IRUSR | S_IWUSR)}, + .size = GEN7_L3LOG_SIZE, + .read = i915_l3_read, + .write = i915_l3_write, + .mmap = NULL, + .private = (void *)1 }; static ssize_t gt_cur_freq_mhz_show(struct device *kdev, @@ -525,6 +537,13 @@ void i915_setup_sysfs(struct drm_device *dev) ret = device_create_bin_file(&dev->primary->kdev, &dpf_attrs); if (ret) DRM_ERROR("l3 parity sysfs setup failed\n"); + + if (NUM_L3_SLICES(dev) > 1) { + ret = device_create_bin_file(&dev->primary->kdev, + &dpf_attrs_1); + if (ret) + DRM_ERROR("l3 parity slice 1 setup failed\n"); + } } ret = 0; @@ -548,6 +567,7 @@ void i915_teardown_sysfs(struct drm_device *dev) sysfs_remove_files(&dev->primary->kdev.kobj, vlv_attrs); else sysfs_remove_files(&dev->primary->kdev.kobj, gen6_attrs); + device_remove_bin_file(&dev->primary->kdev, &dpf_attrs_1); device_remove_bin_file(&dev->primary->kdev, &dpf_attrs); #ifdef CONFIG_PM sysfs_unmerge_group(&dev->primary->kdev.kobj, &rc6_attr_group); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 686e5b23481..958b7d8fea8 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -570,7 +570,7 @@ static int init_render_ring(struct intel_ring_buffer *ring) I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING)); if (HAS_L3_GPU_CACHE(dev)) - I915_WRITE_IMR(ring, ~GT_RENDER_L3_PARITY_ERROR_INTERRUPT); + I915_WRITE_IMR(ring, ~GT_PARITY_ERROR(dev)); return ret; } @@ -1000,7 +1000,7 @@ gen6_ring_get_irq(struct intel_ring_buffer *ring) if (HAS_L3_GPU_CACHE(dev) && ring->id == RCS) I915_WRITE_IMR(ring, ~(ring->irq_enable_mask | - GT_RENDER_L3_PARITY_ERROR_INTERRUPT)); + GT_PARITY_ERROR(dev))); else I915_WRITE_IMR(ring, ~ring->irq_enable_mask); ilk_enable_gt_irq(dev_priv, ring->irq_enable_mask); @@ -1020,8 +1020,7 @@ gen6_ring_put_irq(struct intel_ring_buffer *ring) spin_lock_irqsave(&dev_priv->irq_lock, flags); if (--ring->irq_refcount == 0) { if (HAS_L3_GPU_CACHE(dev) && ring->id == RCS) - I915_WRITE_IMR(ring, - ~GT_RENDER_L3_PARITY_ERROR_INTERRUPT); + I915_WRITE_IMR(ring, ~GT_PARITY_ERROR(dev)); else I915_WRITE_IMR(ring, ~0); ilk_disable_gt_irq(dev_priv, ring->irq_enable_mask); diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 55bb5729bd7..3a4e97bd860 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -38,10 +38,10 @@ * * I915_L3_PARITY_UEVENT - Generated when the driver receives a parity mismatch * event from the gpu l3 cache. Additional information supplied is ROW, - * BANK, SUBBANK of the affected cacheline. Userspace should keep track of - * these events and if a specific cache-line seems to have a persistent - * error remap it with the l3 remapping tool supplied in intel-gpu-tools. - * The value supplied with the event is always 1. + * BANK, SUBBANK, SLICE of the affected cacheline. Userspace should keep + * track of these events and if a specific cache-line seems to have a + * persistent error remap it with the l3 remapping tool supplied in + * intel-gpu-tools. The value supplied with the event is always 1. * * I915_ERROR_UEVENT - Generated upon error detection, currently only via * hangcheck. The error detection event is a good indicator of when things -- cgit v1.2.3-70-g09d2 From 040d2baa6229d50c406340035766c4e99725bf3d Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Thu, 19 Sep 2013 11:01:40 -0700 Subject: drm/i915: s/HAS_L3_GPU_CACHE/HAS_L3_DPF MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We'd only ever used this define to denote whether or not we have the dynamic parity feature (DPF) and never to determine whether or not L3 exists. Baytrail is a good example of where L3 exists, and not DPF. This patch provides clarify in the code for future use cases which might want to actually query whether or not L3 exists. v2: Add /* DPF == dynamic parity feature */ Reviewed-by: Ville Syrjälä Signed-off-by: Ben Widawsky Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 5 +++-- drivers/gpu/drm/i915/i915_gem.c | 2 +- drivers/gpu/drm/i915/i915_irq.c | 4 ++-- drivers/gpu/drm/i915/i915_sysfs.c | 4 ++-- drivers/gpu/drm/i915/intel_ringbuffer.c | 6 +++--- 5 files changed, 11 insertions(+), 10 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 015df5264dc..09a5c829168 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1691,8 +1691,9 @@ struct drm_i915_file_private { #define HAS_FORCE_WAKE(dev) (INTEL_INFO(dev)->has_force_wake) -#define HAS_L3_GPU_CACHE(dev) (IS_IVYBRIDGE(dev) || IS_HASWELL(dev)) -#define NUM_L3_SLICES(dev) (IS_HSW_GT3(dev) ? 2 : HAS_L3_GPU_CACHE(dev)) +/* DPF == dynamic parity feature */ +#define HAS_L3_DPF(dev) (IS_IVYBRIDGE(dev) || IS_HASWELL(dev)) +#define NUM_L3_SLICES(dev) (IS_HSW_GT3(dev) ? 2 : HAS_L3_DPF(dev)) #define GT_FREQUENCY_MULTIPLIER 50 diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 83464aae909..f1779b352f5 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4230,7 +4230,7 @@ int i915_gem_l3_remap(struct intel_ring_buffer *ring, int slice) u32 *remap_info = dev_priv->l3_parity.remap_info[slice]; int i, ret; - if (!HAS_L3_GPU_CACHE(dev) || !remap_info) + if (!HAS_L3_DPF(dev) || !remap_info) return 0; ret = intel_ring_begin(ring, GEN7_L3LOG_SIZE / 4 * 3); diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 60a7bac4fc3..a73e8471693 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -960,7 +960,7 @@ static void ivybridge_parity_error_irq_handler(struct drm_device *dev, u32 iir) { drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private; - if (!HAS_L3_GPU_CACHE(dev)) + if (!HAS_L3_DPF(dev)) return; spin_lock(&dev_priv->irq_lock); @@ -2246,7 +2246,7 @@ static void gen5_gt_irq_postinstall(struct drm_device *dev) pm_irqs = gt_irqs = 0; dev_priv->gt_irq_mask = ~0; - if (HAS_L3_GPU_CACHE(dev)) { + if (HAS_L3_DPF(dev)) { /* L3 parity interrupt is always unmasked. */ dev_priv->gt_irq_mask = ~GT_PARITY_ERROR(dev); gt_irqs |= GT_PARITY_ERROR(dev); diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c index deb8787308d..7b4c79cdb39 100644 --- a/drivers/gpu/drm/i915/i915_sysfs.c +++ b/drivers/gpu/drm/i915/i915_sysfs.c @@ -97,7 +97,7 @@ static struct attribute_group rc6_attr_group = { static int l3_access_valid(struct drm_device *dev, loff_t offset) { - if (!HAS_L3_GPU_CACHE(dev)) + if (!HAS_L3_DPF(dev)) return -EPERM; if (offset % 4 != 0) @@ -525,7 +525,7 @@ void i915_setup_sysfs(struct drm_device *dev) DRM_ERROR("RC6 residency sysfs setup failed\n"); } #endif - if (HAS_L3_GPU_CACHE(dev)) { + if (HAS_L3_DPF(dev)) { ret = device_create_bin_file(&dev->primary->kdev, &dpf_attrs); if (ret) DRM_ERROR("l3 parity sysfs setup failed\n"); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 958b7d8fea8..b67104aaade 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -569,7 +569,7 @@ static int init_render_ring(struct intel_ring_buffer *ring) if (INTEL_INFO(dev)->gen >= 6) I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING)); - if (HAS_L3_GPU_CACHE(dev)) + if (HAS_L3_DPF(dev)) I915_WRITE_IMR(ring, ~GT_PARITY_ERROR(dev)); return ret; @@ -997,7 +997,7 @@ gen6_ring_get_irq(struct intel_ring_buffer *ring) spin_lock_irqsave(&dev_priv->irq_lock, flags); if (ring->irq_refcount++ == 0) { - if (HAS_L3_GPU_CACHE(dev) && ring->id == RCS) + if (HAS_L3_DPF(dev) && ring->id == RCS) I915_WRITE_IMR(ring, ~(ring->irq_enable_mask | GT_PARITY_ERROR(dev))); @@ -1019,7 +1019,7 @@ gen6_ring_put_irq(struct intel_ring_buffer *ring) spin_lock_irqsave(&dev_priv->irq_lock, flags); if (--ring->irq_refcount == 0) { - if (HAS_L3_GPU_CACHE(dev) && ring->id == RCS) + if (HAS_L3_DPF(dev) && ring->id == RCS) I915_WRITE_IMR(ring, ~GT_PARITY_ERROR(dev)); else I915_WRITE_IMR(ring, ~0); -- cgit v1.2.3-70-g09d2 From ab484f8fd62c97fc52dbb380d8b7cf3ff77b1e70 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Sat, 5 Oct 2013 17:57:11 -0700 Subject: drm/i915: Remove gen specific checks in MMIO Now that MMIO has been split up into gen specific functions it is obvious when HAS_FPGA_DBG_UNCLAIMED, HAS_FORCE_WAKE are needed. As such, we can remove this extraneous condition. As a result of this, as well as previously existing function pointers for forcewake, we no longer need the has_force_wake member in the device specific data structure. Signed-off-by: Ben Widawsky Reviewed-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.c | 5 +---- drivers/gpu/drm/i915/i915_drv.h | 3 --- drivers/gpu/drm/i915/intel_ringbuffer.c | 6 ++---- drivers/gpu/drm/i915/intel_uncore.c | 16 +++++++++------- 4 files changed, 12 insertions(+), 18 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 96f230497cb..59649c06098 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -257,7 +257,6 @@ static const struct intel_device_info intel_sandybridge_d_info = { .has_bsd_ring = 1, .has_blt_ring = 1, .has_llc = 1, - .has_force_wake = 1, }; static const struct intel_device_info intel_sandybridge_m_info = { @@ -267,7 +266,6 @@ static const struct intel_device_info intel_sandybridge_m_info = { .has_bsd_ring = 1, .has_blt_ring = 1, .has_llc = 1, - .has_force_wake = 1, }; #define GEN7_FEATURES \ @@ -275,8 +273,7 @@ static const struct intel_device_info intel_sandybridge_m_info = { .need_gfx_hws = 1, .has_hotplug = 1, \ .has_bsd_ring = 1, \ .has_blt_ring = 1, \ - .has_llc = 1, \ - .has_force_wake = 1 + .has_llc = 1 static const struct intel_device_info intel_ivybridge_d_info = { GEN7_FEATURES, diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 957771fc60f..9cac93c41e1 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -441,7 +441,6 @@ struct intel_uncore { func(is_valleyview) sep \ func(is_haswell) sep \ func(is_preliminary) sep \ - func(has_force_wake) sep \ func(has_fbc) sep \ func(has_pipe_cxsr) sep \ func(has_hotplug) sep \ @@ -1729,8 +1728,6 @@ struct drm_i915_file_private { #define HAS_PCH_NOP(dev) (INTEL_PCH_TYPE(dev) == PCH_NOP) #define HAS_PCH_SPLIT(dev) (INTEL_PCH_TYPE(dev) != PCH_NONE) -#define HAS_FORCE_WAKE(dev) (INTEL_INFO(dev)->has_force_wake) - /* DPF == dynamic parity feature */ #define HAS_L3_DPF(dev) (IS_IVYBRIDGE(dev) || IS_HASWELL(dev)) #define NUM_L3_SLICES(dev) (IS_HSW_GT3(dev) ? 2 : HAS_L3_DPF(dev)) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index b67104aaade..4e108fc3c34 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -395,8 +395,7 @@ static int init_ring_common(struct intel_ring_buffer *ring) int ret = 0; u32 head; - if (HAS_FORCE_WAKE(dev)) - gen6_gt_force_wake_get(dev_priv); + gen6_gt_force_wake_get(dev_priv); if (I915_NEED_GFX_HWS(dev)) intel_ring_setup_status_page(ring); @@ -469,8 +468,7 @@ static int init_ring_common(struct intel_ring_buffer *ring) memset(&ring->hangcheck, 0, sizeof(ring->hangcheck)); out: - if (HAS_FORCE_WAKE(dev)) - gen6_gt_force_wake_put(dev_priv); + gen6_gt_force_wake_put(dev_priv); return ret; } diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 7e8dcbeb0ca..f6fae35c568 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -282,6 +282,9 @@ void gen6_gt_force_wake_get(struct drm_i915_private *dev_priv) { unsigned long irqflags; + if (!dev_priv->uncore.funcs.force_wake_get) + return; + spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); if (dev_priv->uncore.forcewake_count++ == 0) dev_priv->uncore.funcs.force_wake_get(dev_priv); @@ -295,6 +298,9 @@ void gen6_gt_force_wake_put(struct drm_i915_private *dev_priv) { unsigned long irqflags; + if (!dev_priv->uncore.funcs.force_wake_put) + return; + spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); if (--dev_priv->uncore.forcewake_count == 0) { dev_priv->uncore.forcewake_count++; @@ -307,9 +313,7 @@ void gen6_gt_force_wake_put(struct drm_i915_private *dev_priv) /* We give fast paths for the really cool registers */ #define NEEDS_FORCE_WAKE(dev_priv, reg) \ - ((HAS_FORCE_WAKE((dev_priv)->dev)) && \ - ((reg) < 0x40000) && \ - ((reg) != FORCEWAKE)) + ((reg) < 0x40000 && (reg) != FORCEWAKE) static void ilk_dummy_write(struct drm_i915_private *dev_priv) @@ -323,8 +327,7 @@ ilk_dummy_write(struct drm_i915_private *dev_priv) static void hsw_unclaimed_reg_clear(struct drm_i915_private *dev_priv, u32 reg) { - if (HAS_FPGA_DBG_UNCLAIMED(dev_priv->dev) && - (__raw_i915_read32(dev_priv, FPGA_DBG) & FPGA_DBG_RM_NOCLAIM)) { + if (__raw_i915_read32(dev_priv, FPGA_DBG) & FPGA_DBG_RM_NOCLAIM) { DRM_ERROR("Unknown unclaimed register before writing to %x\n", reg); __raw_i915_write32(dev_priv, FPGA_DBG, FPGA_DBG_RM_NOCLAIM); @@ -334,8 +337,7 @@ hsw_unclaimed_reg_clear(struct drm_i915_private *dev_priv, u32 reg) static void hsw_unclaimed_reg_check(struct drm_i915_private *dev_priv, u32 reg) { - if (HAS_FPGA_DBG_UNCLAIMED(dev_priv->dev) && - (__raw_i915_read32(dev_priv, FPGA_DBG) & FPGA_DBG_RM_NOCLAIM)) { + if (__raw_i915_read32(dev_priv, FPGA_DBG) & FPGA_DBG_RM_NOCLAIM) { DRM_ERROR("Unclaimed write to %x\n", reg); __raw_i915_write32(dev_priv, FPGA_DBG, FPGA_DBG_RM_NOCLAIM); } -- cgit v1.2.3-70-g09d2 From 3d57e5bd1284f44e325f3a52d966259ed42f9e05 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Mon, 14 Oct 2013 10:01:36 -0700 Subject: drm/i915: Do a fuller init after reset I had this lying around from he original PPGTT series, and thought we might try to get it in by itself. It's convenient to just call i915_gem_init_hw at reset because we'll be adding new things to that function, and having just one function to call instead of reimplementing it in two places is nice. In order to accommodate we cleanup ringbuffers in order to bring them back up cleanly. Optionally, we could also teardown/re initialize the default context but this was causing some problems on reset which I wasn't able to fully debug, and is unnecessary with the previous context init/enable split. This essentially reverts: commit 8e88a2bd5987178d16d53686197404e149e996d9 Author: Daniel Vetter Date: Tue Jun 19 18:40:00 2012 +0200 drm/i915: don't call modeset_init_hw in i915_reset It seems to work for me on ILK now. Perhaps it's due to: commit 8a5c2ae753c588bcb2a4e38d1c6a39865dbf1ff3 Author: Jesse Barnes Date: Thu Mar 28 13:57:19 2013 -0700 drm/i915: fix ILK GPU reset for render Signed-off-by: Ben Widawsky Reviewed-by: Chris Wilson Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.c | 29 ++++++++--------------------- drivers/gpu/drm/i915/i915_gem.c | 2 ++ drivers/gpu/drm/i915/intel_ringbuffer.c | 4 +++- 3 files changed, 13 insertions(+), 22 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 59649c06098..db84e242306 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -744,30 +744,17 @@ int i915_reset(struct drm_device *dev) */ if (drm_core_check_feature(dev, DRIVER_MODESET) || !dev_priv->ums.mm_suspended) { - struct intel_ring_buffer *ring; - int i; - + bool hw_contexts_disabled = dev_priv->hw_contexts_disabled; dev_priv->ums.mm_suspended = 0; - i915_gem_init_swizzling(dev); - - for_each_ring(ring, dev_priv, i) - ring->init(ring); - - i915_gem_context_init(dev); - if (dev_priv->mm.aliasing_ppgtt) { - ret = dev_priv->mm.aliasing_ppgtt->enable(dev); - if (ret) - i915_gem_cleanup_aliasing_ppgtt(dev); - } - - /* - * It would make sense to re-init all the other hw state, at - * least the rps/rc6/emon init done within modeset_init_hw. For - * some unknown reason, this blows up my ilk, so don't. - */ - + ret = i915_gem_init_hw(dev); + if (!hw_contexts_disabled && dev_priv->hw_contexts_disabled) + DRM_ERROR("HW contexts didn't survive reset\n"); mutex_unlock(&dev->struct_mutex); + if (ret) { + DRM_ERROR("Failed hw init on reset %d\n", ret); + return ret; + } drm_irq_uninstall(dev); drm_irq_install(dev); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 71dd0305eb3..1d22c99601e 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2406,6 +2406,8 @@ void i915_gem_reset(struct drm_device *dev) for_each_ring(ring, dev_priv, i) i915_gem_reset_ring_lists(dev_priv, ring); + i915_gem_cleanup_ringbuffer(dev); + i915_gem_restore_fences(dev); } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 4e108fc3c34..2dec134f75e 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1324,7 +1324,7 @@ void intel_cleanup_ring_buffer(struct intel_ring_buffer *ring) /* Disable the ring buffer. The ring must be idle at this point */ dev_priv = ring->dev->dev_private; ret = intel_ring_idle(ring); - if (ret) + if (ret && !i915_reset_in_progress(&dev_priv->gpu_error)) DRM_ERROR("failed to quiesce %s whilst cleaning up: %d\n", ring->name, ret); @@ -1335,6 +1335,8 @@ void intel_cleanup_ring_buffer(struct intel_ring_buffer *ring) i915_gem_object_unpin(ring->obj); drm_gem_object_unreference(&ring->obj->base); ring->obj = NULL; + ring->preallocated_lazy_request = NULL; + ring->outstanding_lazy_seqno = 0; if (ring->cleanup) ring->cleanup(ring); -- cgit v1.2.3-70-g09d2 From abd58f0175915bed644aa67c8f69dc571b8280e0 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Sat, 2 Nov 2013 21:07:09 -0700 Subject: drm/i915/bdw: Implement interrupt changes The interrupt handling implementation remains the same as previous generations with the 4 types of registers, status, identity, mask, and enable. However the layout of where the bits go have changed entirely. To address these changes, all of the interrupt vfuncs needed special gen8 code. The way it works is there is a top level status register now which informs the interrupt service routine which unit caused the interrupt, and therefore which interrupt registers to read to process the interrupt. For display the division is quite logical, a set of interrupt registers for each pipe, and in addition to those, a set each for "misc" and port. For GT the things get a bit hairy, as seen by the code. Each of the GT units has it's own bits defined. They all look *very similar* and resides in 16 bits of a GT register. As an example, RCS and BCS share register 0. To compact the code a bit, at a slight expense to complexity, this is exactly how the code works as well. 2 structures are added to the ring buffer so that our ring buffer interrupt handling code knows which ring shares the interrupt registers, and a shift value (ie. the top or bottom 16 bits of the register). The above allows us to kept the interrupt register caching scheme, the per interrupt enables, and the code to mask and unmask interrupts relatively clean (again at the cost of some more complexity). Most of the GT units mentioned above are command streamers, and so the symmetry should work quite well for even the yet to be implemented rings which Broadwell adds. v2: Fixes up a couple of bugs, and is more verbose about errors in the Broadwell interrupt handler. v3: fix DE_MISC IER offset v4: Simplify interrupts: I totally misread the docs the first time I implemented interrupts, and so this should greatly simplify the mess. Unlike GEN6, we never touch the regular mask registers in irq_get/put. v5: Rebased on to of recent pch hotplug setup changes. v6: Fixup on top of moving num_pipes to intel_info. v7: Rebased on top of Egbert Eich's hpd irq handling rework. Also wired up ibx_hpd_irq_setup for gen8. v8: Rebase on top of Jani's asle handling rework. v9: Rebase on top of Ben's VECS enabling for Haswell, where he unfortunately went OCD on the gt irq #defines. Not that they're still not yet fully consistent: - Used the GT_RENDER_ #defines + bdw shifts. - Dropped the shift from the L3_PARITY stuff, seemed clearer. - s/irq_refcount/irq_refcount.gt/ v10: Squash in VECS enabling patches and the gen8_gt_irq_handler refactoring from Zhao Yakui v11: Rebase on top of the interrupt cleanups in upstream. v12: Rebase on top of Ben's DPF changes in upstream. v13: Drop bdw from the HAS_L3_DPF feature flag for now, it's unclear what exactly needs to be done. Requested by Ben. v14: Fix the patch. - Drop the mask of reserved bits and assorted logic, it doesn't match the spec. - Do the posting read inconditionally instead of commenting it out. - Add a GEN8_MASTER_IRQ_CONTROL definition and use it. - Fix up the GEN8_PIPE interrupt defines and give the GEN8_ prefixes - we actually will need to use them. - Enclose macros in do {} while (0) (checkpatch). - Clear DE_MISC interrupt bits only after having processed them. - Fix whitespace fail (checkpatch). - Fix overtly long lines where appropriate (checkpatch). - Don't use typedef'ed private_t (maintainer-scripts). - Align the function parameter list correctly. Signed-off-by: Ben Widawsky (v4) Signed-off-by: Daniel Vetter bikeshed --- drivers/gpu/drm/i915/i915_drv.h | 5 +- drivers/gpu/drm/i915/i915_irq.c | 321 ++++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/i915_reg.h | 68 +++++++ drivers/gpu/drm/i915/intel_ringbuffer.c | 96 ++++++++-- 4 files changed, 478 insertions(+), 12 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 5a29dd3898a..12cc0c51c73 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1317,7 +1317,10 @@ typedef struct drm_i915_private { struct mutex dpio_lock; /** Cached value of IMR to avoid reads in updating the bitfield */ - u32 irq_mask; + union { + u32 irq_mask; + u32 de_irq_mask[I915_MAX_PIPES]; + }; u32 gt_irq_mask; u32 pm_irq_mask; diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index a228176676b..54338cf72fe 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1119,6 +1119,56 @@ static void snb_gt_irq_handler(struct drm_device *dev, ivybridge_parity_error_irq_handler(dev, gt_iir); } +static irqreturn_t gen8_gt_irq_handler(struct drm_device *dev, + struct drm_i915_private *dev_priv, + u32 master_ctl) +{ + u32 rcs, bcs, vcs; + uint32_t tmp = 0; + irqreturn_t ret = IRQ_NONE; + + if (master_ctl & (GEN8_GT_RCS_IRQ | GEN8_GT_BCS_IRQ)) { + tmp = I915_READ(GEN8_GT_IIR(0)); + if (tmp) { + ret = IRQ_HANDLED; + rcs = tmp >> GEN8_RCS_IRQ_SHIFT; + bcs = tmp >> GEN8_BCS_IRQ_SHIFT; + if (rcs & GT_RENDER_USER_INTERRUPT) + notify_ring(dev, &dev_priv->ring[RCS]); + if (bcs & GT_RENDER_USER_INTERRUPT) + notify_ring(dev, &dev_priv->ring[BCS]); + I915_WRITE(GEN8_GT_IIR(0), tmp); + } else + DRM_ERROR("The master control interrupt lied (GT0)!\n"); + } + + if (master_ctl & GEN8_GT_VCS1_IRQ) { + tmp = I915_READ(GEN8_GT_IIR(1)); + if (tmp) { + ret = IRQ_HANDLED; + vcs = tmp >> GEN8_VCS1_IRQ_SHIFT; + if (vcs & GT_RENDER_USER_INTERRUPT) + notify_ring(dev, &dev_priv->ring[VCS]); + I915_WRITE(GEN8_GT_IIR(1), tmp); + } else + DRM_ERROR("The master control interrupt lied (GT1)!\n"); + } + + if (master_ctl & GEN8_GT_VECS_IRQ) { + tmp = I915_READ(GEN8_GT_IIR(3)); + if (tmp) { + ret = IRQ_HANDLED; + vcs = tmp >> GEN8_VECS_IRQ_SHIFT; + if (vcs & GT_RENDER_USER_INTERRUPT) + notify_ring(dev, &dev_priv->ring[VECS]); + I915_WRITE(GEN8_GT_IIR(3), tmp); + } else + DRM_ERROR("The master control interrupt lied (GT3)!\n"); + } + + return ret; +} + #define HPD_STORM_DETECT_PERIOD 1000 #define HPD_STORM_THRESHOLD 5 @@ -1692,6 +1742,75 @@ static irqreturn_t ironlake_irq_handler(int irq, void *arg) return ret; } +static irqreturn_t gen8_irq_handler(int irq, void *arg) +{ + struct drm_device *dev = arg; + struct drm_i915_private *dev_priv = dev->dev_private; + u32 master_ctl; + irqreturn_t ret = IRQ_NONE; + uint32_t tmp = 0; + + atomic_inc(&dev_priv->irq_received); + + master_ctl = I915_READ(GEN8_MASTER_IRQ); + master_ctl &= ~GEN8_MASTER_IRQ_CONTROL; + if (!master_ctl) + return IRQ_NONE; + + I915_WRITE(GEN8_MASTER_IRQ, 0); + POSTING_READ(GEN8_MASTER_IRQ); + + ret = gen8_gt_irq_handler(dev, dev_priv, master_ctl); + + if (master_ctl & GEN8_DE_MISC_IRQ) { + tmp = I915_READ(GEN8_DE_MISC_IIR); + if (tmp & GEN8_DE_MISC_GSE) + intel_opregion_asle_intr(dev); + else if (tmp) + DRM_ERROR("Unexpected DE Misc interrupt\n"); + else + DRM_ERROR("The master control interrupt lied (DE MISC)!\n"); + + if (tmp) { + I915_WRITE(GEN8_DE_MISC_IIR, tmp); + ret = IRQ_HANDLED; + } + } + + if (master_ctl & GEN8_DE_IRQS) { + int de_ret = 0; + int pipe; + for_each_pipe(pipe) { + uint32_t pipe_iir; + + pipe_iir = I915_READ(GEN8_DE_PIPE_IIR(pipe)); + if (pipe_iir & GEN8_PIPE_VBLANK) + drm_handle_vblank(dev, pipe); + + if (pipe_iir & GEN8_PIPE_FLIP_DONE) { + intel_prepare_page_flip(dev, pipe); + intel_finish_page_flip_plane(dev, pipe); + } + + if (pipe_iir & GEN8_DE_PIPE_IRQ_ERRORS) + DRM_ERROR("Errors on pipe %c\n", 'A' + pipe); + + if (pipe_iir) { + de_ret++; + ret = IRQ_HANDLED; + I915_WRITE(GEN8_DE_PIPE_IIR(pipe), pipe_iir); + } + } + if (!de_ret) + DRM_ERROR("The master control interrupt lied (DE PIPE)!\n"); + } + + I915_WRITE(GEN8_MASTER_IRQ, GEN8_MASTER_IRQ_CONTROL); + POSTING_READ(GEN8_MASTER_IRQ); + + return ret; +} + static void i915_error_wake_up(struct drm_i915_private *dev_priv, bool reset_completed) { @@ -2045,6 +2164,25 @@ static int valleyview_enable_vblank(struct drm_device *dev, int pipe) return 0; } +static int gen8_enable_vblank(struct drm_device *dev, int pipe) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + unsigned long irqflags; + uint32_t imr; + + if (!i915_pipe_enabled(dev, pipe)) + return -EINVAL; + + spin_lock_irqsave(&dev_priv->irq_lock, irqflags); + imr = I915_READ(GEN8_DE_PIPE_IMR(pipe)); + if ((imr & GEN8_PIPE_VBLANK) == 1) { + I915_WRITE(GEN8_DE_PIPE_IMR(pipe), imr & ~GEN8_PIPE_VBLANK); + POSTING_READ(GEN8_DE_PIPE_IMR(pipe)); + } + spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags); + return 0; +} + /* Called from drm generic code, passed 'crtc' which * we use as a pipe index */ @@ -2093,6 +2231,24 @@ static void valleyview_disable_vblank(struct drm_device *dev, int pipe) spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags); } +static void gen8_disable_vblank(struct drm_device *dev, int pipe) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + unsigned long irqflags; + uint32_t imr; + + if (!i915_pipe_enabled(dev, pipe)) + return; + + spin_lock_irqsave(&dev_priv->irq_lock, irqflags); + imr = I915_READ(GEN8_DE_PIPE_IMR(pipe)); + if ((imr & GEN8_PIPE_VBLANK) == 0) { + I915_WRITE(GEN8_DE_PIPE_IMR(pipe), imr | GEN8_PIPE_VBLANK); + POSTING_READ(GEN8_DE_PIPE_IMR(pipe)); + } + spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags); +} + static u32 ring_last_seqno(struct intel_ring_buffer *ring) { @@ -2427,6 +2583,53 @@ static void valleyview_irq_preinstall(struct drm_device *dev) POSTING_READ(VLV_IER); } +static void gen8_irq_preinstall(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + int pipe; + + atomic_set(&dev_priv->irq_received, 0); + + I915_WRITE(GEN8_MASTER_IRQ, 0); + POSTING_READ(GEN8_MASTER_IRQ); + + /* IIR can theoretically queue up two events. Be paranoid */ +#define GEN8_IRQ_INIT_NDX(type, which) do { \ + I915_WRITE(GEN8_##type##_IMR(which), 0xffffffff); \ + POSTING_READ(GEN8_##type##_IMR(which)); \ + I915_WRITE(GEN8_##type##_IER(which), 0); \ + I915_WRITE(GEN8_##type##_IIR(which), 0xffffffff); \ + POSTING_READ(GEN8_##type##_IIR(which)); \ + I915_WRITE(GEN8_##type##_IIR(which), 0xffffffff); \ + } while (0) + +#define GEN8_IRQ_INIT(type) do { \ + I915_WRITE(GEN8_##type##_IMR, 0xffffffff); \ + POSTING_READ(GEN8_##type##_IMR); \ + I915_WRITE(GEN8_##type##_IER, 0); \ + I915_WRITE(GEN8_##type##_IIR, 0xffffffff); \ + POSTING_READ(GEN8_##type##_IIR); \ + I915_WRITE(GEN8_##type##_IIR, 0xffffffff); \ + } while (0) + + GEN8_IRQ_INIT_NDX(GT, 0); + GEN8_IRQ_INIT_NDX(GT, 1); + GEN8_IRQ_INIT_NDX(GT, 2); + GEN8_IRQ_INIT_NDX(GT, 3); + + for_each_pipe(pipe) { + GEN8_IRQ_INIT_NDX(DE_PIPE, pipe); + } + + GEN8_IRQ_INIT(DE_PORT); + GEN8_IRQ_INIT(DE_MISC); + GEN8_IRQ_INIT(PCU); +#undef GEN8_IRQ_INIT +#undef GEN8_IRQ_INIT_NDX + + POSTING_READ(GEN8_PCU_IIR); +} + static void ibx_hpd_irq_setup(struct drm_device *dev) { drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private; @@ -2632,6 +2835,116 @@ static int valleyview_irq_postinstall(struct drm_device *dev) return 0; } +static void gen8_gt_irq_postinstall(struct drm_i915_private *dev_priv) +{ + int i; + + /* These are interrupts we'll toggle with the ring mask register */ + uint32_t gt_interrupts[] = { + GT_RENDER_USER_INTERRUPT << GEN8_RCS_IRQ_SHIFT | + GT_RENDER_L3_PARITY_ERROR_INTERRUPT | + GT_RENDER_USER_INTERRUPT << GEN8_BCS_IRQ_SHIFT, + GT_RENDER_USER_INTERRUPT << GEN8_VCS1_IRQ_SHIFT | + GT_RENDER_USER_INTERRUPT << GEN8_VCS2_IRQ_SHIFT, + 0, + GT_RENDER_USER_INTERRUPT << GEN8_VECS_IRQ_SHIFT + }; + + for (i = 0; i < ARRAY_SIZE(gt_interrupts); i++) { + u32 tmp = I915_READ(GEN8_GT_IIR(i)); + if (tmp) + DRM_ERROR("Interrupt (%d) should have been masked in pre-install 0x%08x\n", + i, tmp); + I915_WRITE(GEN8_GT_IMR(i), ~gt_interrupts[i]); + I915_WRITE(GEN8_GT_IER(i), gt_interrupts[i]); + } + POSTING_READ(GEN8_GT_IER(0)); +} + +static void gen8_de_irq_postinstall(struct drm_i915_private *dev_priv) +{ + struct drm_device *dev = dev_priv->dev; + uint32_t de_pipe_enables = GEN8_PIPE_FLIP_DONE | + GEN8_PIPE_SCAN_LINE_EVENT | + GEN8_PIPE_VBLANK | + GEN8_DE_PIPE_IRQ_ERRORS; + int pipe; + dev_priv->de_irq_mask[PIPE_A] = ~de_pipe_enables; + dev_priv->de_irq_mask[PIPE_B] = ~de_pipe_enables; + dev_priv->de_irq_mask[PIPE_C] = ~de_pipe_enables; + + for_each_pipe(pipe) { + u32 tmp = I915_READ(GEN8_DE_PIPE_IIR(pipe)); + if (tmp) + DRM_ERROR("Interrupt (%d) should have been masked in pre-install 0x%08x\n", + pipe, tmp); + I915_WRITE(GEN8_DE_PIPE_IMR(pipe), dev_priv->de_irq_mask[pipe]); + I915_WRITE(GEN8_DE_PIPE_IER(pipe), de_pipe_enables); + } + POSTING_READ(GEN8_DE_PIPE_ISR(0)); + + I915_WRITE(GEN8_DE_PORT_IMR, ~_PORT_DP_A_HOTPLUG); + I915_WRITE(GEN8_DE_PORT_IER, _PORT_DP_A_HOTPLUG); + POSTING_READ(GEN8_DE_PORT_IER); +} + +static int gen8_irq_postinstall(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + + gen8_gt_irq_postinstall(dev_priv); + gen8_de_irq_postinstall(dev_priv); + + ibx_irq_postinstall(dev); + + I915_WRITE(GEN8_MASTER_IRQ, DE_MASTER_IRQ_CONTROL); + POSTING_READ(GEN8_MASTER_IRQ); + + return 0; +} + +static void gen8_irq_uninstall(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + int pipe; + + if (!dev_priv) + return; + + atomic_set(&dev_priv->irq_received, 0); + + I915_WRITE(GEN8_MASTER_IRQ, 0); + +#define GEN8_IRQ_FINI_NDX(type, which) do { \ + I915_WRITE(GEN8_##type##_IMR(which), 0xffffffff); \ + I915_WRITE(GEN8_##type##_IER(which), 0); \ + I915_WRITE(GEN8_##type##_IIR(which), 0xffffffff); \ + } while (0) + +#define GEN8_IRQ_FINI(type) do { \ + I915_WRITE(GEN8_##type##_IMR, 0xffffffff); \ + I915_WRITE(GEN8_##type##_IER, 0); \ + I915_WRITE(GEN8_##type##_IIR, 0xffffffff); \ + } while (0) + + GEN8_IRQ_FINI_NDX(GT, 0); + GEN8_IRQ_FINI_NDX(GT, 1); + GEN8_IRQ_FINI_NDX(GT, 2); + GEN8_IRQ_FINI_NDX(GT, 3); + + for_each_pipe(pipe) { + GEN8_IRQ_FINI_NDX(DE_PIPE, pipe); + } + + GEN8_IRQ_FINI(DE_PORT); + GEN8_IRQ_FINI(DE_MISC); + GEN8_IRQ_FINI(PCU); +#undef GEN8_IRQ_FINI +#undef GEN8_IRQ_FINI_NDX + + POSTING_READ(GEN8_PCU_IIR); +} + static void valleyview_irq_uninstall(struct drm_device *dev) { drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private; @@ -3411,6 +3724,14 @@ void intel_irq_init(struct drm_device *dev) dev->driver->enable_vblank = valleyview_enable_vblank; dev->driver->disable_vblank = valleyview_disable_vblank; dev_priv->display.hpd_irq_setup = i915_hpd_irq_setup; + } else if (IS_GEN8(dev)) { + dev->driver->irq_handler = gen8_irq_handler; + dev->driver->irq_preinstall = gen8_irq_preinstall; + dev->driver->irq_postinstall = gen8_irq_postinstall; + dev->driver->irq_uninstall = gen8_irq_uninstall; + dev->driver->enable_vblank = gen8_enable_vblank; + dev->driver->disable_vblank = gen8_disable_vblank; + dev_priv->display.hpd_irq_setup = ibx_hpd_irq_setup; } else if (HAS_PCH_SPLIT(dev)) { dev->driver->irq_handler = ironlake_irq_handler; dev->driver->irq_preinstall = ironlake_irq_preinstall; diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 48c3aef5aca..9ec524167c8 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -4002,6 +4002,74 @@ #define GTIIR 0x44018 #define GTIER 0x4401c +#define GEN8_MASTER_IRQ 0x44200 +#define GEN8_MASTER_IRQ_CONTROL (1<<31) +#define GEN8_PCU_IRQ (1<<30) +#define GEN8_DE_PCH_IRQ (1<<23) +#define GEN8_DE_MISC_IRQ (1<<22) +#define GEN8_DE_PORT_IRQ (1<<20) +#define GEN8_DE_PIPE_C_IRQ (1<<18) +#define GEN8_DE_PIPE_B_IRQ (1<<17) +#define GEN8_DE_PIPE_A_IRQ (1<<16) +#define GEN8_GT_VECS_IRQ (1<<6) +#define GEN8_GT_VCS2_IRQ (1<<3) +#define GEN8_GT_VCS1_IRQ (1<<2) +#define GEN8_GT_BCS_IRQ (1<<1) +#define GEN8_GT_RCS_IRQ (1<<0) +/* Lazy definition */ +#define GEN8_GT_IRQS 0x000000ff +#define GEN8_DE_IRQS 0x01ff0000 +#define GEN8_RSVD_IRQS 0xB700ff00 + +#define GEN8_GT_ISR(which) (0x44300 + (0x10 * (which))) +#define GEN8_GT_IMR(which) (0x44304 + (0x10 * (which))) +#define GEN8_GT_IIR(which) (0x44308 + (0x10 * (which))) +#define GEN8_GT_IER(which) (0x4430c + (0x10 * (which))) + +#define GEN8_BCS_IRQ_SHIFT 16 +#define GEN8_RCS_IRQ_SHIFT 0 +#define GEN8_VCS2_IRQ_SHIFT 16 +#define GEN8_VCS1_IRQ_SHIFT 0 +#define GEN8_VECS_IRQ_SHIFT 0 + +#define GEN8_DE_PIPE_ISR(pipe) (0x44400 + (0x10 * (pipe))) +#define GEN8_DE_PIPE_IMR(pipe) (0x44404 + (0x10 * (pipe))) +#define GEN8_DE_PIPE_IIR(pipe) (0x44408 + (0x10 * (pipe))) +#define GEN8_DE_PIPE_IER(pipe) (0x4440c + (0x10 * (pipe))) +#define GEN8_PIPE_UNDERRUN (1 << 31) +#define GEN8_PIPE_CDCLK_CRC_ERROR (1 << 29) +#define GEN8_PIPE_CDCLK_CRC_DONE (1 << 28) +#define GEN8_PIPE_CURSOR_FAULT (1 << 10) +#define GEN8_PIPE_SPRITE_FAULT (1 << 9) +#define GEN8_PIPE_PRIMARY_FAULT (1 << 8) +#define GEN8_PIPE_SPRITE_FLIP_DONE (1 << 5) +#define GEN8_PIPE_FLIP_DONE (1 << 4) +#define GEN8_PIPE_SCAN_LINE_EVENT (1 << 2) +#define GEN8_PIPE_VSYNC (1 << 1) +#define GEN8_PIPE_VBLANK (1 << 0) +#define GEN8_DE_PIPE_IRQ_ERRORS (GEN8_PIPE_UNDERRUN | \ + GEN8_PIPE_CDCLK_CRC_ERROR | \ + GEN8_PIPE_CURSOR_FAULT | \ + GEN8_PIPE_SPRITE_FAULT | \ + GEN8_PIPE_PRIMARY_FAULT) + +#define GEN8_DE_PORT_ISR 0x44440 +#define GEN8_DE_PORT_IMR 0x44444 +#define GEN8_DE_PORT_IIR 0x44448 +#define GEN8_DE_PORT_IER 0x4444c +#define _PORT_DP_A_HOTPLUG (1 << 3) + +#define GEN8_DE_MISC_ISR 0x44460 +#define GEN8_DE_MISC_IMR 0x44464 +#define GEN8_DE_MISC_IIR 0x44468 +#define GEN8_DE_MISC_IER 0x4446c +#define GEN8_DE_MISC_GSE (1 << 27) + +#define GEN8_PCU_ISR 0x444e0 +#define GEN8_PCU_IMR 0x444e4 +#define GEN8_PCU_IIR 0x444e8 +#define GEN8_PCU_IER 0x444ec + #define ILK_DISPLAY_CHICKEN2 0x42004 /* Required on all Ironlake and Sandybridge according to the B-Spec. */ #define ILK_ELPIN_409_SELECT (1 << 25) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 2dec134f75e..2fda12607b7 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1066,6 +1066,52 @@ hsw_vebox_put_irq(struct intel_ring_buffer *ring) spin_unlock_irqrestore(&dev_priv->irq_lock, flags); } +static bool +gen8_ring_get_irq(struct intel_ring_buffer *ring) +{ + struct drm_device *dev = ring->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + unsigned long flags; + + if (!dev->irq_enabled) + return false; + + spin_lock_irqsave(&dev_priv->irq_lock, flags); + if (ring->irq_refcount++ == 0) { + if (HAS_L3_DPF(dev) && ring->id == RCS) { + I915_WRITE_IMR(ring, + ~(ring->irq_enable_mask | + GT_RENDER_L3_PARITY_ERROR_INTERRUPT)); + } else { + I915_WRITE_IMR(ring, ~ring->irq_enable_mask); + } + POSTING_READ(RING_IMR(ring->mmio_base)); + } + spin_unlock_irqrestore(&dev_priv->irq_lock, flags); + + return true; +} + +static void +gen8_ring_put_irq(struct intel_ring_buffer *ring) +{ + struct drm_device *dev = ring->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + unsigned long flags; + + spin_lock_irqsave(&dev_priv->irq_lock, flags); + if (--ring->irq_refcount == 0) { + if (HAS_L3_DPF(dev) && ring->id == RCS) { + I915_WRITE_IMR(ring, + ~GT_RENDER_L3_PARITY_ERROR_INTERRUPT); + } else { + I915_WRITE_IMR(ring, ~0); + } + POSTING_READ(RING_IMR(ring->mmio_base)); + } + spin_unlock_irqrestore(&dev_priv->irq_lock, flags); +} + static int i965_dispatch_execbuffer(struct intel_ring_buffer *ring, u32 offset, u32 length, @@ -1732,8 +1778,13 @@ int intel_init_render_ring_buffer(struct drm_device *dev) ring->flush = gen7_render_ring_flush; if (INTEL_INFO(dev)->gen == 6) ring->flush = gen6_render_ring_flush; - ring->irq_get = gen6_ring_get_irq; - ring->irq_put = gen6_ring_put_irq; + if (INTEL_INFO(dev)->gen >= 8) { + ring->irq_get = gen8_ring_get_irq; + ring->irq_put = gen8_ring_put_irq; + } else { + ring->irq_get = gen6_ring_get_irq; + ring->irq_put = gen6_ring_put_irq; + } ring->irq_enable_mask = GT_RENDER_USER_INTERRUPT; ring->get_seqno = gen6_ring_get_seqno; ring->set_seqno = ring_set_seqno; @@ -1897,9 +1948,16 @@ int intel_init_bsd_ring_buffer(struct drm_device *dev) ring->add_request = gen6_add_request; ring->get_seqno = gen6_ring_get_seqno; ring->set_seqno = ring_set_seqno; - ring->irq_enable_mask = GT_BSD_USER_INTERRUPT; - ring->irq_get = gen6_ring_get_irq; - ring->irq_put = gen6_ring_put_irq; + if (INTEL_INFO(dev)->gen >= 8) { + ring->irq_enable_mask = + GT_RENDER_USER_INTERRUPT << GEN8_VCS1_IRQ_SHIFT; + ring->irq_get = gen8_ring_get_irq; + ring->irq_put = gen8_ring_put_irq; + } else { + ring->irq_enable_mask = GT_BSD_USER_INTERRUPT; + ring->irq_get = gen6_ring_get_irq; + ring->irq_put = gen6_ring_put_irq; + } ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer; ring->sync_to = gen6_ring_sync; ring->semaphore_register[RCS] = MI_SEMAPHORE_SYNC_VR; @@ -1946,9 +2004,16 @@ int intel_init_blt_ring_buffer(struct drm_device *dev) ring->add_request = gen6_add_request; ring->get_seqno = gen6_ring_get_seqno; ring->set_seqno = ring_set_seqno; - ring->irq_enable_mask = GT_BLT_USER_INTERRUPT; - ring->irq_get = gen6_ring_get_irq; - ring->irq_put = gen6_ring_put_irq; + if (INTEL_INFO(dev)->gen >= 8) { + ring->irq_enable_mask = + GT_RENDER_USER_INTERRUPT << GEN8_BCS_IRQ_SHIFT; + ring->irq_get = gen8_ring_get_irq; + ring->irq_put = gen8_ring_put_irq; + } else { + ring->irq_enable_mask = GT_BLT_USER_INTERRUPT; + ring->irq_get = gen6_ring_get_irq; + ring->irq_put = gen6_ring_put_irq; + } ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer; ring->sync_to = gen6_ring_sync; ring->semaphore_register[RCS] = MI_SEMAPHORE_SYNC_BR; @@ -1978,10 +2043,19 @@ int intel_init_vebox_ring_buffer(struct drm_device *dev) ring->add_request = gen6_add_request; ring->get_seqno = gen6_ring_get_seqno; ring->set_seqno = ring_set_seqno; - ring->irq_enable_mask = PM_VEBOX_USER_INTERRUPT; - ring->irq_get = hsw_vebox_get_irq; - ring->irq_put = hsw_vebox_put_irq; ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer; + + if (INTEL_INFO(dev)->gen >= 8) { + ring->irq_enable_mask = + (GT_RENDER_USER_INTERRUPT << GEN8_VECS_IRQ_SHIFT) | + GT_RENDER_CS_MASTER_ERROR_INTERRUPT; + ring->irq_get = gen8_ring_get_irq; + ring->irq_put = gen8_ring_put_irq; + } else { + ring->irq_enable_mask = PM_VEBOX_USER_INTERRUPT; + ring->irq_get = hsw_vebox_get_irq; + ring->irq_put = hsw_vebox_put_irq; + } ring->sync_to = gen6_ring_sync; ring->semaphore_register[RCS] = MI_SEMAPHORE_SYNC_VER; ring->semaphore_register[VCS] = MI_SEMAPHORE_SYNC_VEV; -- cgit v1.2.3-70-g09d2 From 1c7a0623c795b35349d8f19cd8e8a19ac5783008 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Sat, 2 Nov 2013 21:07:12 -0700 Subject: drm/i915/bdw: dispatch updates (64b related) The command to emit batch buffers has changed to address 48b addresses. It seemed reasonable that we could still use the old instruction where emitting 0 for length would do the right thing, but it seems to bother the simulator when the code does that. Now the second dword in the command has the upper 16b of the address of the batchbuffer. v2: Remove duplicated vfun assignment. v3: Squash in VECS support changes from Zhao Yakui v4: Make checkpatch happy. Signed-off-by: Ben Widawsky (v2) Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_reg.h | 1 + drivers/gpu/drm/i915/intel_ringbuffer.c | 34 ++++++++++++++++++++++++++++++--- 2 files changed, 32 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 9ec524167c8..c696779fb85 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -247,6 +247,7 @@ #define MI_BATCH_NON_SECURE_HSW (1<<13) #define MI_BATCH_BUFFER_START MI_INSTR(0x31, 0) #define MI_BATCH_GTT (2<<6) /* aliased with (1<<7) on gen4 */ +#define MI_BATCH_BUFFER_START_GEN8 MI_INSTR(0x31, 1) #define MI_SEMAPHORE_MBOX MI_INSTR(0x16, 1) /* gen6+ */ #define MI_SEMAPHORE_GLOBAL_GTT (1<<22) #define MI_SEMAPHORE_UPDATE (1<<21) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 2fda12607b7..7070d734b84 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1687,6 +1687,27 @@ static int gen6_bsd_ring_flush(struct intel_ring_buffer *ring, return 0; } +static int +gen8_ring_dispatch_execbuffer(struct intel_ring_buffer *ring, + u32 offset, u32 len, + unsigned flags) +{ + int ret; + + ret = intel_ring_begin(ring, 4); + if (ret) + return ret; + + /* FIXME(BDW): Address space and security selectors. */ + intel_ring_emit(ring, MI_BATCH_BUFFER_START_GEN8); + intel_ring_emit(ring, offset); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, MI_NOOP); + intel_ring_advance(ring); + + return 0; +} + static int hsw_ring_dispatch_execbuffer(struct intel_ring_buffer *ring, u32 offset, u32 len, @@ -1826,6 +1847,8 @@ int intel_init_render_ring_buffer(struct drm_device *dev) ring->write_tail = ring_write_tail; if (IS_HASWELL(dev)) ring->dispatch_execbuffer = hsw_ring_dispatch_execbuffer; + else if (IS_GEN8(dev)) + ring->dispatch_execbuffer = gen8_ring_dispatch_execbuffer; else if (INTEL_INFO(dev)->gen >= 6) ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer; else if (INTEL_INFO(dev)->gen >= 4) @@ -1953,12 +1976,15 @@ int intel_init_bsd_ring_buffer(struct drm_device *dev) GT_RENDER_USER_INTERRUPT << GEN8_VCS1_IRQ_SHIFT; ring->irq_get = gen8_ring_get_irq; ring->irq_put = gen8_ring_put_irq; + ring->dispatch_execbuffer = + gen8_ring_dispatch_execbuffer; } else { ring->irq_enable_mask = GT_BSD_USER_INTERRUPT; ring->irq_get = gen6_ring_get_irq; ring->irq_put = gen6_ring_put_irq; + ring->dispatch_execbuffer = + gen6_ring_dispatch_execbuffer; } - ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer; ring->sync_to = gen6_ring_sync; ring->semaphore_register[RCS] = MI_SEMAPHORE_SYNC_VR; ring->semaphore_register[VCS] = MI_SEMAPHORE_SYNC_INVALID; @@ -2009,12 +2035,13 @@ int intel_init_blt_ring_buffer(struct drm_device *dev) GT_RENDER_USER_INTERRUPT << GEN8_BCS_IRQ_SHIFT; ring->irq_get = gen8_ring_get_irq; ring->irq_put = gen8_ring_put_irq; + ring->dispatch_execbuffer = gen8_ring_dispatch_execbuffer; } else { ring->irq_enable_mask = GT_BLT_USER_INTERRUPT; ring->irq_get = gen6_ring_get_irq; ring->irq_put = gen6_ring_put_irq; + ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer; } - ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer; ring->sync_to = gen6_ring_sync; ring->semaphore_register[RCS] = MI_SEMAPHORE_SYNC_BR; ring->semaphore_register[VCS] = MI_SEMAPHORE_SYNC_BV; @@ -2043,7 +2070,6 @@ int intel_init_vebox_ring_buffer(struct drm_device *dev) ring->add_request = gen6_add_request; ring->get_seqno = gen6_ring_get_seqno; ring->set_seqno = ring_set_seqno; - ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer; if (INTEL_INFO(dev)->gen >= 8) { ring->irq_enable_mask = @@ -2051,10 +2077,12 @@ int intel_init_vebox_ring_buffer(struct drm_device *dev) GT_RENDER_CS_MASTER_ERROR_INTERRUPT; ring->irq_get = gen8_ring_get_irq; ring->irq_put = gen8_ring_put_irq; + ring->dispatch_execbuffer = gen8_ring_dispatch_execbuffer; } else { ring->irq_enable_mask = PM_VEBOX_USER_INTERRUPT; ring->irq_get = hsw_vebox_get_irq; ring->irq_put = hsw_vebox_put_irq; + ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer; } ring->sync_to = gen6_ring_sync; ring->semaphore_register[RCS] = MI_SEMAPHORE_SYNC_VER; -- cgit v1.2.3-70-g09d2 From 075b3bbaae0d6ec99c09d2299c3632e481028542 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Sat, 2 Nov 2013 21:07:13 -0700 Subject: drm/i915/bdw: Update MI_FLUSH_DW MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The code is more verbose than necessary for the reader's sake, hopefully the compiler optimizes away the if. Reviewed-by: Ville Syrjälä Signed-off-by: Ben Widawsky Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ringbuffer.c | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 7070d734b84..ef0e7b99ded 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1670,6 +1670,8 @@ static int gen6_bsd_ring_flush(struct intel_ring_buffer *ring, return ret; cmd = MI_FLUSH_DW; + if (INTEL_INFO(ring->dev)->gen >= 8) + cmd += 1; /* * Bspec vol 1c.5 - video engine command streamer: * "If ENABLED, all TLBs will be invalidated once the flush @@ -1681,8 +1683,13 @@ static int gen6_bsd_ring_flush(struct intel_ring_buffer *ring, MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW; intel_ring_emit(ring, cmd); intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT); - intel_ring_emit(ring, 0); - intel_ring_emit(ring, MI_NOOP); + if (INTEL_INFO(ring->dev)->gen >= 8) { + intel_ring_emit(ring, 0); /* upper addr */ + intel_ring_emit(ring, 0); /* value */ + } else { + intel_ring_emit(ring, 0); + intel_ring_emit(ring, MI_NOOP); + } intel_ring_advance(ring); return 0; } @@ -1764,6 +1771,8 @@ static int gen6_ring_flush(struct intel_ring_buffer *ring, return ret; cmd = MI_FLUSH_DW; + if (INTEL_INFO(ring->dev)->gen >= 8) + cmd += 1; /* * Bspec vol 1c.3 - blitter engine command streamer: * "If ENABLED, all TLBs will be invalidated once the flush @@ -1775,8 +1784,13 @@ static int gen6_ring_flush(struct intel_ring_buffer *ring, MI_FLUSH_DW_OP_STOREDW; intel_ring_emit(ring, cmd); intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT); - intel_ring_emit(ring, 0); - intel_ring_emit(ring, MI_NOOP); + if (INTEL_INFO(ring->dev)->gen >= 8) { + intel_ring_emit(ring, 0); /* upper addr */ + intel_ring_emit(ring, 0); /* value */ + } else { + intel_ring_emit(ring, 0); + intel_ring_emit(ring, MI_NOOP); + } intel_ring_advance(ring); if (IS_GEN7(dev) && flush) -- cgit v1.2.3-70-g09d2 From 28cf541543302d0b54703613838914fd9b6e3447 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Sat, 2 Nov 2013 21:07:26 -0700 Subject: drm/i915/bdw: unleash PPGTT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit v2: Squash in fix from Ben: Set PPGTT batches as necessary This fixes the regression in the last couple of days when we enabled PPGTT. v3: Squash in fixup to still use GTT for secure batches from Ville: BDW doesn't have a separate secure vs. non-secure bit in MI_BATCH_BUFFER_START. So for secure batches we have to simply leave the PPGTT bit unset. Fortunately older generations (except HSW) had similar limitations so execbuffer already creates a GTT mapping for all secure batches. Cc: Ville Syrjälä Signed-off-by: Ben Widawsky Reviewed-by: Imre Deak Signed-off-by: Ville Syrjälä Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 3 +-- drivers/gpu/drm/i915/i915_gem_gtt.c | 2 +- drivers/gpu/drm/i915/intel_ringbuffer.c | 5 ++++- 3 files changed, 6 insertions(+), 4 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 78786c44fe5..885d595e0e0 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1146,8 +1146,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, /* snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure * batch" bit. Hence we need to pin secure batches into the global gtt. - * hsw should have this fixed, but let's be paranoid and do it - * unconditionally for now. */ + * hsw should have this fixed, but bdw mucks it up again. */ if (flags & I915_DISPATCH_SECURE && !batch_obj->has_global_gtt_mapping) i915_gem_gtt_bind_object(batch_obj, batch_obj->cache_level); diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 60f398183c8..4fb0efa3e24 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -439,7 +439,7 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt, uint64_t size) ppgtt->num_pt_pages, (ppgtt->num_pt_pages - num_pt_pages) + size % (1<<30)); - return -ENOSYS; /* Not ready yet */ + return 0; err_out: ppgtt->base.cleanup(&ppgtt->base); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index ef0e7b99ded..db086f4bf71 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1699,6 +1699,9 @@ gen8_ring_dispatch_execbuffer(struct intel_ring_buffer *ring, u32 offset, u32 len, unsigned flags) { + struct drm_i915_private *dev_priv = ring->dev->dev_private; + bool ppgtt = dev_priv->mm.aliasing_ppgtt != NULL && + !(flags & I915_DISPATCH_SECURE); int ret; ret = intel_ring_begin(ring, 4); @@ -1706,7 +1709,7 @@ gen8_ring_dispatch_execbuffer(struct intel_ring_buffer *ring, return ret; /* FIXME(BDW): Address space and security selectors. */ - intel_ring_emit(ring, MI_BATCH_BUFFER_START_GEN8); + intel_ring_emit(ring, MI_BATCH_BUFFER_START_GEN8 | (ppgtt<<8)); intel_ring_emit(ring, offset); intel_ring_emit(ring, 0); intel_ring_emit(ring, MI_NOOP); -- cgit v1.2.3-70-g09d2 From a5f3d68e2e6eb2f8e23ba83890ea3fdaaa7dab82 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Sat, 2 Nov 2013 21:07:27 -0700 Subject: drm/i915/bdw: Render ring flushing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PIPE_CONTROL added the high address dword. I'm not sure how the simulator let me get away with this. I've explicitly left out all the workarounds from Gen7 because in the minimal digging that I did, most don't seem necessary, and the simulator doesn't complain without them Note that BLT and BSD ring commands had already been updated previously. Just render/pipe_control should have been broken. v2: Squash in a fixup from Ville to follow the recent IVB PIPE_CONTROL updates: "BDW uses the IVB PIPE_CONTROL style for specifying GTT vs. PPGTT for the PIPE_CONTROL QW/DW write." v3: Rebase on top of Chris' cleanup to have an explicit ring->scratch buffer object instead of an opaque ring->private where everyone stores the same stuff inside. Reported-by: Damien Lespiau Reviewed-by: Ben Widawsky (for the fixup) Reviewed-by: Ville Syrjälä Signed-off-by: Ben Widawsky (v1) Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ringbuffer.c | 42 +++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index db086f4bf71..08744aaf44a 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -360,6 +360,47 @@ gen7_render_ring_flush(struct intel_ring_buffer *ring, return 0; } +static int +gen8_render_ring_flush(struct intel_ring_buffer *ring, + u32 invalidate_domains, u32 flush_domains) +{ + u32 flags = 0; + u32 scratch_addr = ring->scratch.gtt_offset + 128; + int ret; + + flags |= PIPE_CONTROL_CS_STALL; + + if (flush_domains) { + flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; + flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; + } + if (invalidate_domains) { + flags |= PIPE_CONTROL_TLB_INVALIDATE; + flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_QW_WRITE; + flags |= PIPE_CONTROL_GLOBAL_GTT_IVB; + } + + ret = intel_ring_begin(ring, 6); + if (ret) + return ret; + + intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(6)); + intel_ring_emit(ring, flags); + intel_ring_emit(ring, scratch_addr); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, 0); + intel_ring_advance(ring); + + return 0; + +} + static void ring_write_tail(struct intel_ring_buffer *ring, u32 value) { @@ -1817,6 +1858,7 @@ int intel_init_render_ring_buffer(struct drm_device *dev) if (INTEL_INFO(dev)->gen == 6) ring->flush = gen6_render_ring_flush; if (INTEL_INFO(dev)->gen >= 8) { + ring->flush = gen8_render_ring_flush; ring->irq_get = gen8_ring_get_irq; ring->irq_put = gen8_ring_put_irq; } else { -- cgit v1.2.3-70-g09d2 From 780f18c84cc671071556d37ce198a01feaca00ea Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Sat, 2 Nov 2013 21:07:28 -0700 Subject: drm/i915/bdw: BSD init for gen8 also This was an oversight and should have been in a previous series somewhere. Signed-off-by: Ben Widawsky Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ringbuffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 08744aaf44a..e32c08af8e8 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -2021,7 +2021,7 @@ int intel_init_bsd_ring_buffer(struct drm_device *dev) ring->id = VCS; ring->write_tail = ring_write_tail; - if (IS_GEN6(dev) || IS_GEN7(dev)) { + if (INTEL_INFO(dev)->gen >= 6) { ring->mmio_base = GEN6_BSD_RING_BASE; /* gen6 bsd needs a special wa for tail updates */ if (IS_GEN6(dev)) -- cgit v1.2.3-70-g09d2 From 40c499f93fdefa2c496f59d18483b417ea06448b Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Thu, 7 Nov 2013 21:40:39 -0800 Subject: drm/i915/bdw: Take render error interrupt out of the mask The handling of the error interrupts isn't wired up at all. And it hasn't been ever since ilk happened, so don't bother. Reviewed-by: Ben Widawsky Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ringbuffer.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index e32c08af8e8..b620337e6d6 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -2132,8 +2132,7 @@ int intel_init_vebox_ring_buffer(struct drm_device *dev) if (INTEL_INFO(dev)->gen >= 8) { ring->irq_enable_mask = - (GT_RENDER_USER_INTERRUPT << GEN8_VECS_IRQ_SHIFT) | - GT_RENDER_CS_MASTER_ERROR_INTERRUPT; + GT_RENDER_USER_INTERRUPT << GEN8_VECS_IRQ_SHIFT; ring->irq_get = gen8_ring_get_irq; ring->irq_put = gen8_ring_put_irq; ring->dispatch_execbuffer = gen8_ring_dispatch_execbuffer; -- cgit v1.2.3-70-g09d2