summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915/intel_ringbuffer.c
diff options
context:
space:
mode:
authorJ. Bruce Fields <bfields@redhat.com>2012-10-09 18:35:22 -0400
committerJ. Bruce Fields <bfields@redhat.com>2012-10-09 18:35:22 -0400
commitf474af7051212b4efc8267583fad9c4ebf33ccff (patch)
tree1aa46ebc8065a341f247c2a2d9af2f624ad1d4f8 /drivers/gpu/drm/i915/intel_ringbuffer.c
parent0d22f68f02c10d5d10ec5712917e5828b001a822 (diff)
parente3dd9a52cb5552c46c2a4ca7ccdfb4dab5c72457 (diff)
nfs: disintegrate UAPI for nfs
This is to complete part of the Userspace API (UAPI) disintegration for which the preparatory patches were pulled recently. After these patches, userspace headers will be segregated into: include/uapi/linux/.../foo.h for the userspace interface stuff, and: include/linux/.../foo.h for the strictly kernel internal stuff. Signed-off-by: J. Bruce Fields <bfields@redhat.com>
Diffstat (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c')
-rw-r--r--drivers/gpu/drm/i915/intel_ringbuffer.c157
1 files changed, 135 insertions, 22 deletions
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index e2a73b38abe..ecbc5c5dbbb 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -27,10 +27,9 @@
*
*/
-#include "drmP.h"
-#include "drm.h"
+#include <drm/drmP.h>
#include "i915_drv.h"
-#include "i915_drm.h"
+#include <drm/i915_drm.h>
#include "i915_trace.h"
#include "intel_drv.h"
@@ -262,6 +261,83 @@ gen6_render_ring_flush(struct intel_ring_buffer *ring,
return 0;
}
+static int
+gen7_render_ring_cs_stall_wa(struct intel_ring_buffer *ring)
+{
+ int ret;
+
+ ret = intel_ring_begin(ring, 4);
+ if (ret)
+ return ret;
+
+ intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4));
+ intel_ring_emit(ring, PIPE_CONTROL_CS_STALL |
+ PIPE_CONTROL_STALL_AT_SCOREBOARD);
+ intel_ring_emit(ring, 0);
+ intel_ring_emit(ring, 0);
+ intel_ring_advance(ring);
+
+ return 0;
+}
+
+static int
+gen7_render_ring_flush(struct intel_ring_buffer *ring,
+ u32 invalidate_domains, u32 flush_domains)
+{
+ u32 flags = 0;
+ struct pipe_control *pc = ring->private;
+ u32 scratch_addr = pc->gtt_offset + 128;
+ int ret;
+
+ /*
+ * Ensure that any following seqno writes only happen when the render
+ * cache is indeed flushed.
+ *
+ * Workaround: 4th PIPE_CONTROL command (except the ones with only
+ * read-cache invalidate bits set) must have the CS_STALL bit set. We
+ * don't try to be clever and just set it unconditionally.
+ */
+ flags |= PIPE_CONTROL_CS_STALL;
+
+ /* Just flush everything. Experiments have shown that reducing the
+ * number of bits based on the write domains has little performance
+ * impact.
+ */
+ if (flush_domains) {
+ flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
+ flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
+ }
+ if (invalidate_domains) {
+ flags |= PIPE_CONTROL_TLB_INVALIDATE;
+ flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
+ flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
+ flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
+ flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
+ flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
+ /*
+ * TLB invalidate requires a post-sync write.
+ */
+ flags |= PIPE_CONTROL_QW_WRITE;
+
+ /* Workaround: we must issue a pipe_control with CS-stall bit
+ * set before a pipe_control command that has the state cache
+ * invalidate bit set. */
+ gen7_render_ring_cs_stall_wa(ring);
+ }
+
+ ret = intel_ring_begin(ring, 4);
+ if (ret)
+ return ret;
+
+ intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4));
+ intel_ring_emit(ring, flags);
+ intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT);
+ intel_ring_emit(ring, 0);
+ intel_ring_advance(ring);
+
+ return 0;
+}
+
static void ring_write_tail(struct intel_ring_buffer *ring,
u32 value)
{
@@ -382,12 +458,12 @@ init_pipe_control(struct intel_ring_buffer *ring)
i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
- ret = i915_gem_object_pin(obj, 4096, true);
+ ret = i915_gem_object_pin(obj, 4096, true, false);
if (ret)
goto err_unref;
pc->gtt_offset = obj->gtt_offset;
- pc->cpu_page = kmap(obj->pages[0]);
+ pc->cpu_page = kmap(sg_page(obj->pages->sgl));
if (pc->cpu_page == NULL)
goto err_unpin;
@@ -414,7 +490,8 @@ cleanup_pipe_control(struct intel_ring_buffer *ring)
return;
obj = pc->obj;
- kunmap(obj->pages[0]);
+
+ kunmap(sg_page(obj->pages->sgl));
i915_gem_object_unpin(obj);
drm_gem_object_unreference(&obj->base);
@@ -462,7 +539,7 @@ static int init_render_ring(struct intel_ring_buffer *ring)
if (INTEL_INFO(dev)->gen >= 6)
I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING));
- if (IS_IVYBRIDGE(dev))
+ if (HAS_L3_GPU_CACHE(dev))
I915_WRITE_IMR(ring, ~GEN6_RENDER_L3_PARITY_ERROR);
return ret;
@@ -628,26 +705,24 @@ pc_render_add_request(struct intel_ring_buffer *ring,
}
static u32
-gen6_ring_get_seqno(struct intel_ring_buffer *ring)
+gen6_ring_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency)
{
- struct drm_device *dev = ring->dev;
-
/* Workaround to force correct ordering between irq and seqno writes on
* ivb (and maybe also on snb) by reading from a CS register (like
* ACTHD) before reading the status page. */
- if (IS_GEN6(dev) || IS_GEN7(dev))
+ if (!lazy_coherency)
intel_ring_get_active_head(ring);
return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
}
static u32
-ring_get_seqno(struct intel_ring_buffer *ring)
+ring_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency)
{
return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
}
static u32
-pc_render_get_seqno(struct intel_ring_buffer *ring)
+pc_render_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency)
{
struct pipe_control *pc = ring->private;
return pc->cpu_page[0];
@@ -852,7 +927,7 @@ gen6_ring_get_irq(struct intel_ring_buffer *ring)
spin_lock_irqsave(&dev_priv->irq_lock, flags);
if (ring->irq_refcount++ == 0) {
- if (IS_IVYBRIDGE(dev) && ring->id == RCS)
+ if (HAS_L3_GPU_CACHE(dev) && ring->id == RCS)
I915_WRITE_IMR(ring, ~(ring->irq_enable_mask |
GEN6_RENDER_L3_PARITY_ERROR));
else
@@ -875,7 +950,7 @@ gen6_ring_put_irq(struct intel_ring_buffer *ring)
spin_lock_irqsave(&dev_priv->irq_lock, flags);
if (--ring->irq_refcount == 0) {
- if (IS_IVYBRIDGE(dev) && ring->id == RCS)
+ if (HAS_L3_GPU_CACHE(dev) && ring->id == RCS)
I915_WRITE_IMR(ring, ~GEN6_RENDER_L3_PARITY_ERROR);
else
I915_WRITE_IMR(ring, ~0);
@@ -951,7 +1026,7 @@ static void cleanup_status_page(struct intel_ring_buffer *ring)
if (obj == NULL)
return;
- kunmap(obj->pages[0]);
+ kunmap(sg_page(obj->pages->sgl));
i915_gem_object_unpin(obj);
drm_gem_object_unreference(&obj->base);
ring->status_page.obj = NULL;
@@ -972,13 +1047,13 @@ static int init_status_page(struct intel_ring_buffer *ring)
i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
- ret = i915_gem_object_pin(obj, 4096, true);
+ ret = i915_gem_object_pin(obj, 4096, true, false);
if (ret != 0) {
goto err_unref;
}
ring->status_page.gfx_addr = obj->gtt_offset;
- ring->status_page.page_addr = kmap(obj->pages[0]);
+ ring->status_page.page_addr = kmap(sg_page(obj->pages->sgl));
if (ring->status_page.page_addr == NULL) {
ret = -ENOMEM;
goto err_unpin;
@@ -1010,7 +1085,6 @@ static int intel_init_ring_buffer(struct drm_device *dev,
ring->dev = dev;
INIT_LIST_HEAD(&ring->active_list);
INIT_LIST_HEAD(&ring->request_list);
- INIT_LIST_HEAD(&ring->gpu_write_list);
ring->size = 32 * PAGE_SIZE;
init_waitqueue_head(&ring->irq_queue);
@@ -1030,7 +1104,7 @@ static int intel_init_ring_buffer(struct drm_device *dev,
ring->obj = obj;
- ret = i915_gem_object_pin(obj, PAGE_SIZE, true);
+ ret = i915_gem_object_pin(obj, PAGE_SIZE, true, false);
if (ret)
goto err_unref;
@@ -1379,7 +1453,9 @@ int intel_init_render_ring_buffer(struct drm_device *dev)
if (INTEL_INFO(dev)->gen >= 6) {
ring->add_request = gen6_add_request;
- ring->flush = gen6_render_ring_flush;
+ ring->flush = gen7_render_ring_flush;
+ if (INTEL_INFO(dev)->gen == 6)
+ ring->flush = gen6_render_ring_flush;
ring->irq_get = gen6_ring_get_irq;
ring->irq_put = gen6_ring_put_irq;
ring->irq_enable_mask = GT_USER_INTERRUPT;
@@ -1481,7 +1557,6 @@ int intel_render_ring_init_dri(struct drm_device *dev, u64 start, u32 size)
ring->dev = dev;
INIT_LIST_HEAD(&ring->active_list);
INIT_LIST_HEAD(&ring->request_list);
- INIT_LIST_HEAD(&ring->gpu_write_list);
ring->size = size;
ring->effective_size = ring->size;
@@ -1574,3 +1649,41 @@ int intel_init_blt_ring_buffer(struct drm_device *dev)
return intel_init_ring_buffer(dev, ring);
}
+
+int
+intel_ring_flush_all_caches(struct intel_ring_buffer *ring)
+{
+ int ret;
+
+ if (!ring->gpu_caches_dirty)
+ return 0;
+
+ ret = ring->flush(ring, 0, I915_GEM_GPU_DOMAINS);
+ if (ret)
+ return ret;
+
+ trace_i915_gem_ring_flush(ring, 0, I915_GEM_GPU_DOMAINS);
+
+ ring->gpu_caches_dirty = false;
+ return 0;
+}
+
+int
+intel_ring_invalidate_all_caches(struct intel_ring_buffer *ring)
+{
+ uint32_t flush_domains;
+ int ret;
+
+ flush_domains = 0;
+ if (ring->gpu_caches_dirty)
+ flush_domains = I915_GEM_GPU_DOMAINS;
+
+ ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, flush_domains);
+ if (ret)
+ return ret;
+
+ trace_i915_gem_ring_flush(ring, I915_GEM_GPU_DOMAINS, flush_domains);
+
+ ring->gpu_caches_dirty = false;
+ return 0;
+}