From f65d94211e2bcba17faf05a6a3809af0e4217767 Mon Sep 17 00:00:00 2001 From: Ben Gamari Date: Mon, 14 Sep 2009 17:48:44 -0400 Subject: drm/i915: Add hangcheck timer We set a periodic timer to check on the GPU, resetting it every time a batch is completed. If the timer elapses, we check acthd. If acthd hasn't changed in two timer periods, we assume the chip is wedged. This is implemented in such a way that it leaves the option open to employ adaptive timer intervals in the future. One could wait until several timer periods have elapsed before declaring the chip dead. If the chip comes back after several periods but before the "dead" threshold, the timer interval or dead threshold could be raised. It is important to note that while checking for active requests, we need to account for the fact that requests are removed from the list (i.e. retired) in a deferred work queue handler. This means that merely checking for an empty request_list is insufficient; the list could be non-empty yet the GPU still idle, causing the hangcheck timer to incorrectly mark the GPU as wedged (it took me a while to figure that out---sigh...) Signed-off-by: Ben Gamari Signed-off-by: Jesse Barnes --- drivers/gpu/drm/i915/i915_irq.c | 49 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) (limited to 'drivers/gpu/drm/i915/i915_irq.c') diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 6c89f2ff249..77e42e719d7 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -601,6 +601,8 @@ irqreturn_t i915_driver_irq_handler(DRM_IRQ_ARGS) if (iir & I915_USER_INTERRUPT) { dev_priv->mm.irq_gem_seqno = i915_get_gem_seqno(dev); DRM_WAKEUP(&dev_priv->irq_queue); + dev_priv->hangcheck_count = 0; + mod_timer(&dev_priv->hangcheck_timer, jiffies + DRM_I915_HANGCHECK_PERIOD); } if (pipea_stats & vblank_status) { @@ -880,6 +882,53 @@ int i915_vblank_swap(struct drm_device *dev, void *data, return -EINVAL; } +struct drm_i915_gem_request *i915_get_tail_request(struct drm_device *dev) { + drm_i915_private_t *dev_priv = dev->dev_private; + return list_entry(dev_priv->mm.request_list.prev, struct drm_i915_gem_request, list); +} + +/** + * This is called when the chip hasn't reported back with completed + * batchbuffers in a long time. The first time this is called we simply record + * ACTHD. If ACTHD hasn't changed by the time the hangcheck timer elapses + * again, we assume the chip is wedged and try to fix it. + */ +void i915_hangcheck_elapsed(unsigned long data) +{ + struct drm_device *dev = (struct drm_device *)data; + drm_i915_private_t *dev_priv = dev->dev_private; + uint32_t acthd; + + if (!IS_I965G(dev)) + acthd = I915_READ(ACTHD); + else + acthd = I915_READ(ACTHD_I965); + + /* If all work is done then ACTHD clearly hasn't advanced. */ + if (list_empty(&dev_priv->mm.request_list) || + i915_seqno_passed(i915_get_gem_seqno(dev), i915_get_tail_request(dev)->seqno)) { + dev_priv->hangcheck_count = 0; + return; + } + + if (dev_priv->last_acthd == acthd && dev_priv->hangcheck_count > 0) { + DRM_ERROR("Hangcheck timer elapsed... GPU hung\n"); + dev_priv->mm.wedged = true; /* Hopefully this is atomic */ + i915_handle_error(dev); + return; + } + + /* Reset timer case chip hangs without another request being added */ + mod_timer(&dev_priv->hangcheck_timer, jiffies + DRM_I915_HANGCHECK_PERIOD); + + if (acthd != dev_priv->last_acthd) + dev_priv->hangcheck_count = 0; + else + dev_priv->hangcheck_count++; + + dev_priv->last_acthd = acthd; +} + /* drm_dma.h hooks */ static void igdng_irq_preinstall(struct drm_device *dev) -- cgit v1.2.3-70-g09d2 From 11ed50ec2a316928c2bacc1149bded86c6a96068 Mon Sep 17 00:00:00 2001 From: Ben Gamari Date: Mon, 14 Sep 2009 17:48:45 -0400 Subject: drm/i915: Implement GPU reset on i965 This patch puts in place the machinery to attempt to reset the GPU. This will be used when attempting to recover from a GPU hang. Signed-off-by: Owain G. Ainsworth Signed-off-by: Ben Gamari Signed-off-by: Jesse Barnes --- drivers/gpu/drm/i915/i915_dma.c | 8 +++ drivers/gpu/drm/i915/i915_drv.c | 124 ++++++++++++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_irq.c | 8 +++ drivers/gpu/drm/i915/i915_reg.h | 4 ++ 5 files changed, 145 insertions(+) (limited to 'drivers/gpu/drm/i915/i915_irq.c') diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 08a5048335e..f47adb4aa59 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -1173,6 +1173,9 @@ static int i915_load_modeset_init(struct drm_device *dev, drm_mm_init(&dev_priv->vram, 0, prealloc_size); DRM_INFO("set up %ldM of stolen space\n", prealloc_size / (1024*1024)); + /* We're off and running w/KMS */ + dev_priv->mm.suspended = 0; + /* Let GEM Manage from end of prealloc space to end of aperture. * * However, leave one page at the end still bound to the scratch page. @@ -1184,7 +1187,9 @@ static int i915_load_modeset_init(struct drm_device *dev, */ i915_gem_do_init(dev, prealloc_size, agp_size - 4096); + mutex_lock(&dev->struct_mutex); ret = i915_gem_init_ringbuffer(dev); + mutex_unlock(&dev->struct_mutex); if (ret) goto out; @@ -1433,6 +1438,9 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags) return ret; } + /* Start out suspended */ + dev_priv->mm.suspended = 1; + if (drm_core_check_feature(dev, DRIVER_MODESET)) { ret = i915_load_modeset_init(dev, prealloc_start, prealloc_size, agp_size); diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index dbe568c9327..435082e4073 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -127,6 +127,130 @@ static int i915_resume(struct drm_device *dev) return ret; } +/** + * i965_reset - reset chip after a hang + * @dev: drm device to reset + * @flags: reset domains + * + * Reset the chip. Useful if a hang is detected. Returns zero on successful + * reset or otherwise an error code. + * + * Procedure is fairly simple: + * - reset the chip using the reset reg + * - re-init context state + * - re-init hardware status page + * - re-init ring buffer + * - re-init interrupt state + * - re-init display + */ +int i965_reset(struct drm_device *dev, u8 flags) +{ + drm_i915_private_t *dev_priv = dev->dev_private; + unsigned long timeout; + u8 gdrst; + /* + * We really should only reset the display subsystem if we actually + * need to + */ + bool need_display = true; + + mutex_lock(&dev->struct_mutex); + + /* + * Clear request list + */ + i915_gem_retire_requests(dev); + + if (need_display) + i915_save_display(dev); + + if (IS_I965G(dev) || IS_G4X(dev)) { + /* + * Set the domains we want to reset, then the reset bit (bit 0). + * Clear the reset bit after a while and wait for hardware status + * bit (bit 1) to be set + */ + pci_read_config_byte(dev->pdev, GDRST, &gdrst); + pci_write_config_byte(dev->pdev, GDRST, gdrst | flags | ((flags == GDRST_FULL) ? 0x1 : 0x0)); + udelay(50); + pci_write_config_byte(dev->pdev, GDRST, gdrst & 0xfe); + + /* ...we don't want to loop forever though, 500ms should be plenty */ + timeout = jiffies + msecs_to_jiffies(500); + do { + udelay(100); + pci_read_config_byte(dev->pdev, GDRST, &gdrst); + } while ((gdrst & 0x1) && time_after(timeout, jiffies)); + + if (gdrst & 0x1) { + WARN(true, "i915: Failed to reset chip\n"); + mutex_unlock(&dev->struct_mutex); + return -EIO; + } + } else { + DRM_ERROR("Error occurred. Don't know how to reset this chip.\n"); + return -ENODEV; + } + + /* Ok, now get things going again... */ + + /* + * Everything depends on having the GTT running, so we need to start + * there. Fortunately we don't need to do this unless we reset the + * chip at a PCI level. + * + * Next we need to restore the context, but we don't use those + * yet either... + * + * Ring buffer needs to be re-initialized in the KMS case, or if X + * was running at the time of the reset (i.e. we weren't VT + * switched away). + */ + if (drm_core_check_feature(dev, DRIVER_MODESET) || + !dev_priv->mm.suspended) { + drm_i915_ring_buffer_t *ring = &dev_priv->ring; + struct drm_gem_object *obj = ring->ring_obj; + struct drm_i915_gem_object *obj_priv = obj->driver_private; + dev_priv->mm.suspended = 0; + + /* Stop the ring if it's running. */ + I915_WRITE(PRB0_CTL, 0); + I915_WRITE(PRB0_TAIL, 0); + I915_WRITE(PRB0_HEAD, 0); + + /* Initialize the ring. */ + I915_WRITE(PRB0_START, obj_priv->gtt_offset); + I915_WRITE(PRB0_CTL, + ((obj->size - 4096) & RING_NR_PAGES) | + RING_NO_REPORT | + RING_VALID); + if (!drm_core_check_feature(dev, DRIVER_MODESET)) + i915_kernel_lost_context(dev); + else { + ring->head = I915_READ(PRB0_HEAD) & HEAD_ADDR; + ring->tail = I915_READ(PRB0_TAIL) & TAIL_ADDR; + ring->space = ring->head - (ring->tail + 8); + if (ring->space < 0) + ring->space += ring->Size; + } + + mutex_unlock(&dev->struct_mutex); + drm_irq_uninstall(dev); + drm_irq_install(dev); + mutex_lock(&dev->struct_mutex); + } + + /* + * Display needs restore too... + */ + if (need_display) + i915_restore_display(dev); + + mutex_unlock(&dev->struct_mutex); + return 0; +} + + static int __devinit i915_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index afbcaa9866f..42142f26976 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -624,6 +624,7 @@ extern long i915_compat_ioctl(struct file *filp, unsigned int cmd, extern int i915_emit_box(struct drm_device *dev, struct drm_clip_rect *boxes, int i, int DR1, int DR4); +extern int i965_reset(struct drm_device *dev, u8 flags); /* i915_irq.c */ void i915_hangcheck_elapsed(unsigned long data); diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 77e42e719d7..2a042bc173f 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -482,6 +482,14 @@ static void i915_handle_error(struct drm_device *dev) I915_WRITE(IIR, I915_RENDER_COMMAND_PARSER_ERROR_INTERRUPT); } + if (dev_priv->mm.wedged) { + /* + * Wakeup waiting processes so they don't hang + */ + printk("i915: Waking up sleeping processes\n"); + DRM_WAKEUP(&dev_priv->irq_queue); + } + queue_work(dev_priv->wq, &dev_priv->error_work); } diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 6345bf20db0..f3d41397ce7 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -85,6 +85,10 @@ #define I915_GC_RENDER_CLOCK_200_MHZ (1 << 0) #define I915_GC_RENDER_CLOCK_333_MHZ (4 << 0) #define LBB 0xf4 +#define GDRST 0xc0 +#define GDRST_FULL (0<<2) +#define GDRST_RENDER (1<<2) +#define GDRST_MEDIA (3<<2) /* VGA stuff */ -- cgit v1.2.3-70-g09d2 From f316a42cc49eca73b33d85feb6177e32431747ff Mon Sep 17 00:00:00 2001 From: Ben Gamari Date: Mon, 14 Sep 2009 17:48:46 -0400 Subject: drm/i915: Hookup chip reset in error handler This patch uses the previously introduced chip reset logic to reset the chip when an error event is detected. Signed-off-by: Ben Gamari Signed-off-by: Jesse Barnes --- drivers/gpu/drm/i915/i915_irq.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_irq.c') diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 2a042bc173f..8f5276614ce 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -302,12 +302,25 @@ static void i915_error_work_func(struct work_struct *work) drm_i915_private_t *dev_priv = container_of(work, drm_i915_private_t, error_work); struct drm_device *dev = dev_priv->dev; - char *event_string = "ERROR=1"; - char *envp[] = { event_string, NULL }; + char *error_event[] = { "ERROR=1", NULL }; + char *reset_event[] = { "RESET=1", NULL }; + char *reset_done_event[] = { "ERROR=0", NULL }; DRM_DEBUG("generating error event\n"); + kobject_uevent_env(&dev->primary->kdev.kobj, KOBJ_CHANGE, error_event); - kobject_uevent_env(&dev->primary->kdev.kobj, KOBJ_CHANGE, envp); + if (dev_priv->mm.wedged) { + if (IS_I965G(dev)) { + DRM_DEBUG("resetting chip\n"); + kobject_uevent_env(&dev->primary->kdev.kobj, KOBJ_CHANGE, reset_event); + if (!i965_reset(dev, GDRST_RENDER)) { + dev_priv->mm.wedged = 0; + kobject_uevent_env(&dev->primary->kdev.kobj, KOBJ_CHANGE, reset_done_event); + } + } else { + printk("reboot required\n"); + } + } } /** -- cgit v1.2.3-70-g09d2 From ba1234d17b3b1fe7087defb191a3c705f208aca6 Mon Sep 17 00:00:00 2001 From: Ben Gamari Date: Mon, 14 Sep 2009 17:48:47 -0400 Subject: drm/i915: Make dev_priv->mm.wedged an atomic_t There is a very real possibility that multiple CPUs will notice that the GPU is wedged. This introduces all sorts of potential race conditions. Make the wedged flag atomic to mitigate this risk. Signed-off-by: Ben Gamari Signed-off-by: Jesse Barnes --- drivers/gpu/drm/i915/i915_drv.h | 2 +- drivers/gpu/drm/i915/i915_gem.c | 18 +++++++++--------- drivers/gpu/drm/i915/i915_irq.c | 15 ++++++++------- 3 files changed, 18 insertions(+), 17 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_irq.c') diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 42142f26976..bcc1be281de 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -450,7 +450,7 @@ typedef struct drm_i915_private { * It prevents command submission from occuring and makes * every pending request fail */ - int wedged; + atomic_t wedged; /** Bit 6 swizzling required for X tiling */ uint32_t bit_6_swizzle_x; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 579b3b04ff1..f0f6f668a61 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1712,7 +1712,7 @@ i915_gem_retire_requests(struct drm_device *dev) retiring_seqno = request->seqno; if (i915_seqno_passed(seqno, retiring_seqno) || - dev_priv->mm.wedged) { + atomic_read(&dev_priv->mm.wedged)) { i915_gem_retire_request(dev, request); list_del(&request->list); @@ -1754,7 +1754,7 @@ i915_wait_request(struct drm_device *dev, uint32_t seqno) BUG_ON(seqno == 0); - if (dev_priv->mm.wedged) + if (atomic_read(&dev_priv->mm.wedged)) return -EIO; if (!i915_seqno_passed(i915_get_gem_seqno(dev), seqno)) { @@ -1774,11 +1774,11 @@ i915_wait_request(struct drm_device *dev, uint32_t seqno) ret = wait_event_interruptible(dev_priv->irq_queue, i915_seqno_passed(i915_get_gem_seqno(dev), seqno) || - dev_priv->mm.wedged); + atomic_read(&dev_priv->mm.wedged)); i915_user_irq_put(dev); dev_priv->mm.waiting_gem_seqno = 0; } - if (dev_priv->mm.wedged) + if (atomic_read(&dev_priv->mm.wedged)) ret = -EIO; if (ret && ret != -ERESTARTSYS) @@ -3359,7 +3359,7 @@ i915_gem_execbuffer(struct drm_device *dev, void *data, i915_verify_inactive(dev, __FILE__, __LINE__); - if (dev_priv->mm.wedged) { + if (atomic_read(&dev_priv->mm.wedged)) { DRM_ERROR("Execbuf while wedged\n"); mutex_unlock(&dev->struct_mutex); ret = -EIO; @@ -3929,7 +3929,7 @@ i915_gem_idle(struct drm_device *dev) if (last_seqno == cur_seqno) { if (stuck++ > 100) { DRM_ERROR("hardware wedged\n"); - dev_priv->mm.wedged = 1; + atomic_set(&dev_priv->mm.wedged, 1); DRM_WAKEUP(&dev_priv->irq_queue); break; } @@ -3942,7 +3942,7 @@ i915_gem_idle(struct drm_device *dev) i915_gem_retire_requests(dev); spin_lock(&dev_priv->mm.active_list_lock); - if (!dev_priv->mm.wedged) { + if (!atomic_read(&dev_priv->mm.wedged)) { /* Active and flushing should now be empty as we've * waited for a sequence higher than any pending execbuffer */ @@ -4204,9 +4204,9 @@ i915_gem_entervt_ioctl(struct drm_device *dev, void *data, if (drm_core_check_feature(dev, DRIVER_MODESET)) return 0; - if (dev_priv->mm.wedged) { + if (atomic_read(&dev_priv->mm.wedged)) { DRM_ERROR("Reenabling wedged hardware, good luck\n"); - dev_priv->mm.wedged = 0; + atomic_set(&dev_priv->mm.wedged, 0); } mutex_lock(&dev->struct_mutex); diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 8f5276614ce..13e664ddb61 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -309,12 +309,12 @@ static void i915_error_work_func(struct work_struct *work) DRM_DEBUG("generating error event\n"); kobject_uevent_env(&dev->primary->kdev.kobj, KOBJ_CHANGE, error_event); - if (dev_priv->mm.wedged) { + if (atomic_read(&dev_priv->mm.wedged)) { if (IS_I965G(dev)) { DRM_DEBUG("resetting chip\n"); kobject_uevent_env(&dev->primary->kdev.kobj, KOBJ_CHANGE, reset_event); if (!i965_reset(dev, GDRST_RENDER)) { - dev_priv->mm.wedged = 0; + atomic_set(&dev_priv->mm.wedged, 0); kobject_uevent_env(&dev->primary->kdev.kobj, KOBJ_CHANGE, reset_done_event); } } else { @@ -385,7 +385,7 @@ out: * so userspace knows something bad happened (should trigger collection * of a ring dump etc.). */ -static void i915_handle_error(struct drm_device *dev) +static void i915_handle_error(struct drm_device *dev, bool wedged) { struct drm_i915_private *dev_priv = dev->dev_private; u32 eir = I915_READ(EIR); @@ -495,7 +495,9 @@ static void i915_handle_error(struct drm_device *dev) I915_WRITE(IIR, I915_RENDER_COMMAND_PARSER_ERROR_INTERRUPT); } - if (dev_priv->mm.wedged) { + if (wedged) { + atomic_set(&dev_priv->mm.wedged, 1); + /* * Wakeup waiting processes so they don't hang */ @@ -548,7 +550,7 @@ irqreturn_t i915_driver_irq_handler(DRM_IRQ_ARGS) pipeb_stats = I915_READ(PIPEBSTAT); if (iir & I915_RENDER_COMMAND_PARSER_ERROR_INTERRUPT) - i915_handle_error(dev); + i915_handle_error(dev, false); /* * Clear the PIPE(A|B)STAT regs before the IIR @@ -934,8 +936,7 @@ void i915_hangcheck_elapsed(unsigned long data) if (dev_priv->last_acthd == acthd && dev_priv->hangcheck_count > 0) { DRM_ERROR("Hangcheck timer elapsed... GPU hung\n"); - dev_priv->mm.wedged = true; /* Hopefully this is atomic */ - i915_handle_error(dev); + i915_handle_error(dev, true); return; } -- cgit v1.2.3-70-g09d2 From 1c5d22f76dc721f3acb7a3dadc657a221e487fb7 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 25 Aug 2009 11:15:50 +0100 Subject: drm/i915: Add tracepoints By adding tracepoint equivalents for WATCH_BUF/EXEC we are able to monitor the lifetimes of objects, requests and significant events. These events can then be probed using the tracing frameworks, such as systemtap and, in particular, perf. For example to record the stack trace for every GPU stall during a run, use $ perf record -e i915:i915_gem_request_wait_begin -c 1 -g And $ perf report to view the results. [Updated to fix compilation issues caused.] Cc: Arjan van de Ven Cc: Ben Gamari Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/Makefile | 1 + drivers/gpu/drm/i915/i915_dma.c | 8 +- drivers/gpu/drm/i915/i915_gem.c | 119 ++++++++++-- drivers/gpu/drm/i915/i915_irq.c | 9 +- drivers/gpu/drm/i915/i915_trace.h | 315 +++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/i915_trace_points.c | 11 ++ 6 files changed, 447 insertions(+), 16 deletions(-) create mode 100644 drivers/gpu/drm/i915/i915_trace.h create mode 100644 drivers/gpu/drm/i915/i915_trace_points.c (limited to 'drivers/gpu/drm/i915/i915_irq.c') diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 5269dfa5f62..fa7b9be096b 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -9,6 +9,7 @@ i915-y := i915_drv.o i915_dma.o i915_irq.o i915_mem.o \ i915_gem.o \ i915_gem_debug.o \ i915_gem_tiling.o \ + i915_trace_points.o \ intel_display.o \ intel_crt.o \ intel_lvds.o \ diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 59826c5b876..ae7ec039002 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -33,6 +33,7 @@ #include "intel_drv.h" #include "i915_drm.h" #include "i915_drv.h" +#include "i915_trace.h" /* Really want an OS-independent resettable timer. Would like to have * this loop run for (eg) 3 sec, but have the timer reset every time @@ -49,14 +50,18 @@ int i915_wait_ring(struct drm_device * dev, int n, const char *caller) u32 last_head = I915_READ(PRB0_HEAD) & HEAD_ADDR; int i; + trace_i915_ring_wait_begin (dev); + for (i = 0; i < 100000; i++) { ring->head = I915_READ(PRB0_HEAD) & HEAD_ADDR; acthd = I915_READ(acthd_reg); ring->space = ring->head - (ring->tail + 8); if (ring->space < 0) ring->space += ring->Size; - if (ring->space >= n) + if (ring->space >= n) { + trace_i915_ring_wait_end (dev); return 0; + } if (dev->primary->master) { struct drm_i915_master_private *master_priv = dev->primary->master->driver_priv; @@ -76,6 +81,7 @@ int i915_wait_ring(struct drm_device * dev, int n, const char *caller) } + trace_i915_ring_wait_end (dev); return -EBUSY; } diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index dea9ac06985..67e2cd5636e 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -29,6 +29,7 @@ #include "drm.h" #include "i915_drm.h" #include "i915_drv.h" +#include "i915_trace.h" #include "intel_drv.h" #include #include @@ -1618,8 +1619,14 @@ i915_add_request(struct drm_device *dev, struct drm_file *file_priv, if ((obj->write_domain & flush_domains) == obj->write_domain) { + uint32_t old_write_domain = obj->write_domain; + obj->write_domain = 0; i915_gem_object_move_to_active(obj, seqno); + + trace_i915_gem_object_change_domain(obj, + obj->read_domains, + old_write_domain); } } @@ -1667,6 +1674,8 @@ i915_gem_retire_request(struct drm_device *dev, { drm_i915_private_t *dev_priv = dev->dev_private; + trace_i915_gem_request_retire(dev, request->seqno); + /* Move any buffers on the active list that are no longer referenced * by the ringbuffer to the flushing/inactive lists as appropriate. */ @@ -1810,6 +1819,8 @@ i915_wait_request(struct drm_device *dev, uint32_t seqno) i915_driver_irq_postinstall(dev); } + trace_i915_gem_request_wait_begin(dev, seqno); + dev_priv->mm.waiting_gem_seqno = seqno; i915_user_irq_get(dev); ret = wait_event_interruptible(dev_priv->irq_queue, @@ -1818,6 +1829,8 @@ i915_wait_request(struct drm_device *dev, uint32_t seqno) atomic_read(&dev_priv->mm.wedged)); i915_user_irq_put(dev); dev_priv->mm.waiting_gem_seqno = 0; + + trace_i915_gem_request_wait_end(dev, seqno); } if (atomic_read(&dev_priv->mm.wedged)) ret = -EIO; @@ -1850,6 +1863,8 @@ i915_gem_flush(struct drm_device *dev, DRM_INFO("%s: invalidate %08x flush %08x\n", __func__, invalidate_domains, flush_domains); #endif + trace_i915_gem_request_flush(dev, dev_priv->mm.next_gem_seqno, + invalidate_domains, flush_domains); if (flush_domains & I915_GEM_DOMAIN_CPU) drm_agp_chipset_flush(dev); @@ -2003,6 +2018,8 @@ i915_gem_object_unbind(struct drm_gem_object *obj) if (!list_empty(&obj_priv->list)) list_del_init(&obj_priv->list); + trace_i915_gem_object_unbind(obj); + return 0; } @@ -2452,6 +2469,8 @@ i915_gem_object_get_fence_reg(struct drm_gem_object *obj) else i830_write_fence_reg(reg); + trace_i915_gem_object_get_fence(obj, i, obj_priv->tiling_mode); + return 0; } @@ -2650,6 +2669,8 @@ i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, unsigned alignment) BUG_ON(obj->read_domains & I915_GEM_GPU_DOMAINS); BUG_ON(obj->write_domain & I915_GEM_GPU_DOMAINS); + trace_i915_gem_object_bind(obj, obj_priv->gtt_offset); + return 0; } @@ -2665,6 +2686,8 @@ i915_gem_clflush_object(struct drm_gem_object *obj) if (obj_priv->pages == NULL) return; + trace_i915_gem_object_clflush(obj); + drm_clflush_pages(obj_priv->pages, obj->size / PAGE_SIZE); } @@ -2674,21 +2697,29 @@ i915_gem_object_flush_gpu_write_domain(struct drm_gem_object *obj) { struct drm_device *dev = obj->dev; uint32_t seqno; + uint32_t old_write_domain; if ((obj->write_domain & I915_GEM_GPU_DOMAINS) == 0) return; /* Queue the GPU write cache flushing we need. */ + old_write_domain = obj->write_domain; i915_gem_flush(dev, 0, obj->write_domain); seqno = i915_add_request(dev, NULL, obj->write_domain); obj->write_domain = 0; i915_gem_object_move_to_active(obj, seqno); + + trace_i915_gem_object_change_domain(obj, + obj->read_domains, + old_write_domain); } /** Flushes the GTT write domain for the object if it's dirty. */ static void i915_gem_object_flush_gtt_write_domain(struct drm_gem_object *obj) { + uint32_t old_write_domain; + if (obj->write_domain != I915_GEM_DOMAIN_GTT) return; @@ -2696,7 +2727,12 @@ i915_gem_object_flush_gtt_write_domain(struct drm_gem_object *obj) * to it immediately go to main memory as far as we know, so there's * no chipset flush. It also doesn't land in render cache. */ + old_write_domain = obj->write_domain; obj->write_domain = 0; + + trace_i915_gem_object_change_domain(obj, + obj->read_domains, + old_write_domain); } /** Flushes the CPU write domain for the object if it's dirty. */ @@ -2704,13 +2740,19 @@ static void i915_gem_object_flush_cpu_write_domain(struct drm_gem_object *obj) { struct drm_device *dev = obj->dev; + uint32_t old_write_domain; if (obj->write_domain != I915_GEM_DOMAIN_CPU) return; i915_gem_clflush_object(obj); drm_agp_chipset_flush(dev); + old_write_domain = obj->write_domain; obj->write_domain = 0; + + trace_i915_gem_object_change_domain(obj, + obj->read_domains, + old_write_domain); } /** @@ -2723,6 +2765,7 @@ int i915_gem_object_set_to_gtt_domain(struct drm_gem_object *obj, int write) { struct drm_i915_gem_object *obj_priv = obj->driver_private; + uint32_t old_write_domain, old_read_domains; int ret; /* Not valid to be called on unbound objects. */ @@ -2735,6 +2778,9 @@ i915_gem_object_set_to_gtt_domain(struct drm_gem_object *obj, int write) if (ret != 0) return ret; + old_write_domain = obj->write_domain; + old_read_domains = obj->read_domains; + /* If we're writing through the GTT domain, then CPU and GPU caches * will need to be invalidated at next use. */ @@ -2753,6 +2799,10 @@ i915_gem_object_set_to_gtt_domain(struct drm_gem_object *obj, int write) obj_priv->dirty = 1; } + trace_i915_gem_object_change_domain(obj, + old_read_domains, + old_write_domain); + return 0; } @@ -2765,6 +2815,7 @@ i915_gem_object_set_to_gtt_domain(struct drm_gem_object *obj, int write) static int i915_gem_object_set_to_cpu_domain(struct drm_gem_object *obj, int write) { + uint32_t old_write_domain, old_read_domains; int ret; i915_gem_object_flush_gpu_write_domain(obj); @@ -2780,6 +2831,9 @@ i915_gem_object_set_to_cpu_domain(struct drm_gem_object *obj, int write) */ i915_gem_object_set_to_full_cpu_read_domain(obj); + old_write_domain = obj->write_domain; + old_read_domains = obj->read_domains; + /* Flush the CPU cache if it's still invalid. */ if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) { i915_gem_clflush_object(obj); @@ -2800,6 +2854,10 @@ i915_gem_object_set_to_cpu_domain(struct drm_gem_object *obj, int write) obj->write_domain = I915_GEM_DOMAIN_CPU; } + trace_i915_gem_object_change_domain(obj, + old_read_domains, + old_write_domain); + return 0; } @@ -2921,6 +2979,7 @@ i915_gem_object_set_to_gpu_domain(struct drm_gem_object *obj) struct drm_i915_gem_object *obj_priv = obj->driver_private; uint32_t invalidate_domains = 0; uint32_t flush_domains = 0; + uint32_t old_read_domains; BUG_ON(obj->pending_read_domains & I915_GEM_DOMAIN_CPU); BUG_ON(obj->pending_write_domain == I915_GEM_DOMAIN_CPU); @@ -2967,6 +3026,8 @@ i915_gem_object_set_to_gpu_domain(struct drm_gem_object *obj) i915_gem_clflush_object(obj); } + old_read_domains = obj->read_domains; + /* The actual obj->write_domain will be updated with * pending_write_domain after we emit the accumulated flush for all * of our domain changes in execbuffers (which clears objects' @@ -2985,6 +3046,10 @@ i915_gem_object_set_to_gpu_domain(struct drm_gem_object *obj) obj->read_domains, obj->write_domain, dev->invalidate_domains, dev->flush_domains); #endif + + trace_i915_gem_object_change_domain(obj, + old_read_domains, + obj->write_domain); } /** @@ -3037,6 +3102,7 @@ i915_gem_object_set_cpu_read_domain_range(struct drm_gem_object *obj, uint64_t offset, uint64_t size) { struct drm_i915_gem_object *obj_priv = obj->driver_private; + uint32_t old_read_domains; int i, ret; if (offset == 0 && size == obj->size) @@ -3083,8 +3149,13 @@ i915_gem_object_set_cpu_read_domain_range(struct drm_gem_object *obj, */ BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_CPU) != 0); + old_read_domains = obj->read_domains; obj->read_domains |= I915_GEM_DOMAIN_CPU; + trace_i915_gem_object_change_domain(obj, + old_read_domains, + obj->write_domain); + return 0; } @@ -3282,6 +3353,8 @@ i915_dispatch_gem_execbuffer(struct drm_device *dev, exec_start = (uint32_t) exec_offset + exec->batch_start_offset; exec_len = (uint32_t) exec->batch_len; + trace_i915_gem_request_submit(dev, dev_priv->mm.next_gem_seqno); + count = nbox ? nbox : 1; for (i = 0; i < count; i++) { @@ -3660,8 +3733,12 @@ i915_gem_execbuffer(struct drm_device *dev, void *data, for (i = 0; i < args->buffer_count; i++) { struct drm_gem_object *obj = object_list[i]; + uint32_t old_write_domain = obj->write_domain; obj->write_domain = obj->pending_write_domain; + trace_i915_gem_object_change_domain(obj, + obj->read_domains, + old_write_domain); } i915_verify_inactive(dev, __FILE__, __LINE__); @@ -4050,6 +4127,8 @@ int i915_gem_init_object(struct drm_gem_object *obj) INIT_LIST_HEAD(&obj_priv->fence_list); obj_priv->madv = I915_MADV_WILLNEED; + trace_i915_gem_object_create(obj); + return 0; } @@ -4058,6 +4137,8 @@ void i915_gem_free_object(struct drm_gem_object *obj) struct drm_device *dev = obj->dev; struct drm_i915_gem_object *obj_priv = obj->driver_private; + trace_i915_gem_object_destroy(obj); + while (obj_priv->pin_count > 0) i915_gem_object_unpin(obj); @@ -4186,24 +4267,36 @@ i915_gem_idle(struct drm_device *dev) * the GPU domains and just stuff them onto inactive. */ while (!list_empty(&dev_priv->mm.active_list)) { - struct drm_i915_gem_object *obj_priv; + struct drm_gem_object *obj; + uint32_t old_write_domain; - obj_priv = list_first_entry(&dev_priv->mm.active_list, - struct drm_i915_gem_object, - list); - obj_priv->obj->write_domain &= ~I915_GEM_GPU_DOMAINS; - i915_gem_object_move_to_inactive(obj_priv->obj); + obj = list_first_entry(&dev_priv->mm.active_list, + struct drm_i915_gem_object, + list)->obj; + old_write_domain = obj->write_domain; + obj->write_domain &= ~I915_GEM_GPU_DOMAINS; + i915_gem_object_move_to_inactive(obj); + + trace_i915_gem_object_change_domain(obj, + obj->read_domains, + old_write_domain); } spin_unlock(&dev_priv->mm.active_list_lock); while (!list_empty(&dev_priv->mm.flushing_list)) { - struct drm_i915_gem_object *obj_priv; - - obj_priv = list_first_entry(&dev_priv->mm.flushing_list, - struct drm_i915_gem_object, - list); - obj_priv->obj->write_domain &= ~I915_GEM_GPU_DOMAINS; - i915_gem_object_move_to_inactive(obj_priv->obj); + struct drm_gem_object *obj; + uint32_t old_write_domain; + + obj = list_first_entry(&dev_priv->mm.flushing_list, + struct drm_i915_gem_object, + list)->obj; + old_write_domain = obj->write_domain; + obj->write_domain &= ~I915_GEM_GPU_DOMAINS; + i915_gem_object_move_to_inactive(obj); + + trace_i915_gem_object_change_domain(obj, + obj->read_domains, + old_write_domain); } diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 13e664ddb61..4dfeec7cdd4 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -31,6 +31,7 @@ #include "drm.h" #include "i915_drm.h" #include "i915_drv.h" +#include "i915_trace.h" #include "intel_drv.h" #define MAX_NOPID ((u32)~0) @@ -279,7 +280,9 @@ irqreturn_t igdng_irq_handler(struct drm_device *dev) } if (gt_iir & GT_USER_INTERRUPT) { - dev_priv->mm.irq_gem_seqno = i915_get_gem_seqno(dev); + u32 seqno = i915_get_gem_seqno(dev); + dev_priv->mm.irq_gem_seqno = seqno; + trace_i915_gem_request_complete(dev, seqno); DRM_WAKEUP(&dev_priv->irq_queue); } @@ -622,7 +625,9 @@ irqreturn_t i915_driver_irq_handler(DRM_IRQ_ARGS) } if (iir & I915_USER_INTERRUPT) { - dev_priv->mm.irq_gem_seqno = i915_get_gem_seqno(dev); + u32 seqno = i915_get_gem_seqno(dev); + dev_priv->mm.irq_gem_seqno = seqno; + trace_i915_gem_request_complete(dev, seqno); DRM_WAKEUP(&dev_priv->irq_queue); dev_priv->hangcheck_count = 0; mod_timer(&dev_priv->hangcheck_timer, jiffies + DRM_I915_HANGCHECK_PERIOD); diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h new file mode 100644 index 00000000000..5567a40816f --- /dev/null +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -0,0 +1,315 @@ +#if !defined(_I915_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ) +#define _I915_TRACE_H_ + +#include +#include +#include + +#include + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM i915 +#define TRACE_SYSTEM_STRING __stringify(TRACE_SYSTEM) +#define TRACE_INCLUDE_FILE i915_trace + +/* object tracking */ + +TRACE_EVENT(i915_gem_object_create, + + TP_PROTO(struct drm_gem_object *obj), + + TP_ARGS(obj), + + TP_STRUCT__entry( + __field(struct drm_gem_object *, obj) + __field(u32, size) + ), + + TP_fast_assign( + __entry->obj = obj; + __entry->size = obj->size; + ), + + TP_printk("obj=%p, size=%u", __entry->obj, __entry->size) +); + +TRACE_EVENT(i915_gem_object_bind, + + TP_PROTO(struct drm_gem_object *obj, u32 gtt_offset), + + TP_ARGS(obj, gtt_offset), + + TP_STRUCT__entry( + __field(struct drm_gem_object *, obj) + __field(u32, gtt_offset) + ), + + TP_fast_assign( + __entry->obj = obj; + __entry->gtt_offset = gtt_offset; + ), + + TP_printk("obj=%p, gtt_offset=%08x", + __entry->obj, __entry->gtt_offset) +); + +TRACE_EVENT(i915_gem_object_clflush, + + TP_PROTO(struct drm_gem_object *obj), + + TP_ARGS(obj), + + TP_STRUCT__entry( + __field(struct drm_gem_object *, obj) + ), + + TP_fast_assign( + __entry->obj = obj; + ), + + TP_printk("obj=%p", __entry->obj) +); + +TRACE_EVENT(i915_gem_object_change_domain, + + TP_PROTO(struct drm_gem_object *obj, uint32_t old_read_domains, uint32_t old_write_domain), + + TP_ARGS(obj, old_read_domains, old_write_domain), + + TP_STRUCT__entry( + __field(struct drm_gem_object *, obj) + __field(u32, read_domains) + __field(u32, write_domain) + ), + + TP_fast_assign( + __entry->obj = obj; + __entry->read_domains = obj->read_domains | (old_read_domains << 16); + __entry->write_domain = obj->write_domain | (old_write_domain << 16); + ), + + TP_printk("obj=%p, read=%04x, write=%04x", + __entry->obj, + __entry->read_domains, __entry->write_domain) +); + +TRACE_EVENT(i915_gem_object_get_fence, + + TP_PROTO(struct drm_gem_object *obj, int fence, int tiling_mode), + + TP_ARGS(obj, fence, tiling_mode), + + TP_STRUCT__entry( + __field(struct drm_gem_object *, obj) + __field(int, fence) + __field(int, tiling_mode) + ), + + TP_fast_assign( + __entry->obj = obj; + __entry->fence = fence; + __entry->tiling_mode = tiling_mode; + ), + + TP_printk("obj=%p, fence=%d, tiling=%d", + __entry->obj, __entry->fence, __entry->tiling_mode) +); + +TRACE_EVENT(i915_gem_object_unbind, + + TP_PROTO(struct drm_gem_object *obj), + + TP_ARGS(obj), + + TP_STRUCT__entry( + __field(struct drm_gem_object *, obj) + ), + + TP_fast_assign( + __entry->obj = obj; + ), + + TP_printk("obj=%p", __entry->obj) +); + +TRACE_EVENT(i915_gem_object_destroy, + + TP_PROTO(struct drm_gem_object *obj), + + TP_ARGS(obj), + + TP_STRUCT__entry( + __field(struct drm_gem_object *, obj) + ), + + TP_fast_assign( + __entry->obj = obj; + ), + + TP_printk("obj=%p", __entry->obj) +); + +/* batch tracing */ + +TRACE_EVENT(i915_gem_request_submit, + + TP_PROTO(struct drm_device *dev, u32 seqno), + + TP_ARGS(dev, seqno), + + TP_STRUCT__entry( + __field(struct drm_device *, dev) + __field(u32, seqno) + ), + + TP_fast_assign( + __entry->dev = dev; + __entry->seqno = seqno; + ), + + TP_printk("dev=%p, seqno=%u", __entry->dev, __entry->seqno) +); + +TRACE_EVENT(i915_gem_request_flush, + + TP_PROTO(struct drm_device *dev, u32 seqno, + u32 flush_domains, u32 invalidate_domains), + + TP_ARGS(dev, seqno, flush_domains, invalidate_domains), + + TP_STRUCT__entry( + __field(struct drm_device *, dev) + __field(u32, seqno) + __field(u32, flush_domains) + __field(u32, invalidate_domains) + ), + + TP_fast_assign( + __entry->dev = dev; + __entry->seqno = seqno; + __entry->flush_domains = flush_domains; + __entry->invalidate_domains = invalidate_domains; + ), + + TP_printk("dev=%p, seqno=%u, flush=%04x, invalidate=%04x", + __entry->dev, __entry->seqno, + __entry->flush_domains, __entry->invalidate_domains) +); + + +TRACE_EVENT(i915_gem_request_complete, + + TP_PROTO(struct drm_device *dev, u32 seqno), + + TP_ARGS(dev, seqno), + + TP_STRUCT__entry( + __field(struct drm_device *, dev) + __field(u32, seqno) + ), + + TP_fast_assign( + __entry->dev = dev; + __entry->seqno = seqno; + ), + + TP_printk("dev=%p, seqno=%u", __entry->dev, __entry->seqno) +); + +TRACE_EVENT(i915_gem_request_retire, + + TP_PROTO(struct drm_device *dev, u32 seqno), + + TP_ARGS(dev, seqno), + + TP_STRUCT__entry( + __field(struct drm_device *, dev) + __field(u32, seqno) + ), + + TP_fast_assign( + __entry->dev = dev; + __entry->seqno = seqno; + ), + + TP_printk("dev=%p, seqno=%u", __entry->dev, __entry->seqno) +); + +TRACE_EVENT(i915_gem_request_wait_begin, + + TP_PROTO(struct drm_device *dev, u32 seqno), + + TP_ARGS(dev, seqno), + + TP_STRUCT__entry( + __field(struct drm_device *, dev) + __field(u32, seqno) + ), + + TP_fast_assign( + __entry->dev = dev; + __entry->seqno = seqno; + ), + + TP_printk("dev=%p, seqno=%u", __entry->dev, __entry->seqno) +); + +TRACE_EVENT(i915_gem_request_wait_end, + + TP_PROTO(struct drm_device *dev, u32 seqno), + + TP_ARGS(dev, seqno), + + TP_STRUCT__entry( + __field(struct drm_device *, dev) + __field(u32, seqno) + ), + + TP_fast_assign( + __entry->dev = dev; + __entry->seqno = seqno; + ), + + TP_printk("dev=%p, seqno=%u", __entry->dev, __entry->seqno) +); + +TRACE_EVENT(i915_ring_wait_begin, + + TP_PROTO(struct drm_device *dev), + + TP_ARGS(dev), + + TP_STRUCT__entry( + __field(struct drm_device *, dev) + ), + + TP_fast_assign( + __entry->dev = dev; + ), + + TP_printk("dev=%p", __entry->dev) +); + +TRACE_EVENT(i915_ring_wait_end, + + TP_PROTO(struct drm_device *dev), + + TP_ARGS(dev), + + TP_STRUCT__entry( + __field(struct drm_device *, dev) + ), + + TP_fast_assign( + __entry->dev = dev; + ), + + TP_printk("dev=%p", __entry->dev) +); + +#endif /* _I915_TRACE_H_ */ + +/* This part must be outside protection */ +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH ../../drivers/gpu/drm/i915 +#include diff --git a/drivers/gpu/drm/i915/i915_trace_points.c b/drivers/gpu/drm/i915/i915_trace_points.c new file mode 100644 index 00000000000..ead876eb6ea --- /dev/null +++ b/drivers/gpu/drm/i915/i915_trace_points.c @@ -0,0 +1,11 @@ +/* + * Copyright © 2009 Intel Corporation + * + * Authors: + * Chris Wilson + */ + +#include "i915_drv.h" + +#define CREATE_TRACE_POINTS +#include "i915_trace.h" -- cgit v1.2.3-70-g09d2