diff options
Diffstat (limited to 'drivers/gpu')
-rw-r--r-- | drivers/gpu/drm/i915/i915_drv.h | 22 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/i915_irq.c | 21 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/i915_reg.h | 4 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/intel_display.c | 3 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/intel_pm.c | 230 |
5 files changed, 241 insertions, 39 deletions
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 95820652e15..05c3c60c8f2 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -936,6 +936,23 @@ struct intel_rps_ei { u32 media_c0; }; +struct intel_rps_bdw_cal { + u32 it_threshold_pct; /* interrupt, in percentage */ + u32 eval_interval; /* evaluation interval, in us */ + u32 last_ts; + u32 last_c0; + bool is_up; +}; + +struct intel_rps_bdw_turbo { + struct intel_rps_bdw_cal up; + struct intel_rps_bdw_cal down; + struct timer_list flip_timer; + u32 timeout; + atomic_t flip_received; + struct work_struct work_max_freq; +}; + struct intel_gen6_power_mgmt { /* work and pm_iir are protected by dev_priv->irq_lock */ struct work_struct work; @@ -969,6 +986,9 @@ struct intel_gen6_power_mgmt { bool enabled; struct delayed_work delayed_resume_work; + bool is_bdw_sw_turbo; /* Switch of BDW software turbo */ + struct intel_rps_bdw_turbo sw_turbo; /* Calculate RP interrupt timing */ + /* manual wa residency calculations */ struct intel_rps_ei up_ei, down_ei; @@ -2791,6 +2811,8 @@ extern void intel_disable_fbc(struct drm_device *dev); extern bool ironlake_set_drps(struct drm_device *dev, u8 val); extern void intel_init_pch_refclk(struct drm_device *dev); extern void gen6_set_rps(struct drm_device *dev, u8 val); +extern void bdw_software_turbo(struct drm_device *dev); +extern void gen8_flip_interrupt(struct drm_device *dev); extern void valleyview_set_rps(struct drm_device *dev, u8 val); extern void intel_set_memory_cxsr(struct drm_i915_private *dev_priv, bool enable); diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index c1186e1ba48..efc79605f79 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1979,6 +1979,27 @@ static void i9xx_pipe_crc_irq_handler(struct drm_device *dev, enum pipe pipe) res1, res2); } +void gen8_flip_interrupt(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + + if (!dev_priv->rps.is_bdw_sw_turbo) + return; + + if(atomic_read(&dev_priv->rps.sw_turbo.flip_received)) { + mod_timer(&dev_priv->rps.sw_turbo.flip_timer, + usecs_to_jiffies(dev_priv->rps.sw_turbo.timeout) + jiffies); + } + else { + dev_priv->rps.sw_turbo.flip_timer.expires = + usecs_to_jiffies(dev_priv->rps.sw_turbo.timeout) + jiffies; + add_timer(&dev_priv->rps.sw_turbo.flip_timer); + atomic_set(&dev_priv->rps.sw_turbo.flip_received, true); + } + + bdw_software_turbo(dev); +} + /* The RPS events need forcewake, so we add them to a work queue and mask their * IMR bits until the work is done. Other interrupts can be processed without * the work queue. */ diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 04461141a3e..20673cc450c 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -5562,6 +5562,10 @@ enum punit_power_well { #define GEN8_UCGCTL6 0x9430 #define GEN8_SDEUNIT_CLOCK_GATE_DISABLE (1<<14) +#define TIMESTAMP_CTR 0x44070 +#define FREQ_1_28_US(us) (((us) * 100) >> 7) +#define MCHBAR_PCU_C0 (MCHBAR_MIRROR_BASE_SNB + 0x5960) + #define GEN6_GFXPAUSE 0xA000 #define GEN6_RPNSWREQ 0xA008 #define GEN6_TURBO_DISABLE (1<<31) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 127eb0ae851..e572ffa16a6 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -9757,6 +9757,9 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc, unsigned long flags; int ret; + //trigger software GT busyness calculation + gen8_flip_interrupt(dev); + /* * drm_mode_page_flip_ioctl() should already catch this, but double * check to be safe. In the future we may enable pageflipping from diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index a5c1a6cf5b5..72791a787f8 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -2258,7 +2258,6 @@ int ilk_wm_max_level(const struct drm_device *dev) else return 2; } - static void intel_print_wm_latency(struct drm_device *dev, const char *name, const uint16_t wm[5]) @@ -3227,6 +3226,9 @@ static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val) { int new_power; + if (dev_priv->rps.is_bdw_sw_turbo) + return; + new_power = dev_priv->rps.power; switch (dev_priv->rps.power) { case LOW_POWER: @@ -3434,8 +3436,11 @@ void gen6_rps_idle(struct drm_i915_private *dev_priv) valleyview_set_rps(dev_priv->dev, dev_priv->rps.min_freq_softlimit); else if (IS_VALLEYVIEW(dev)) vlv_set_rps_idle(dev_priv); - else + else if (!dev_priv->rps.is_bdw_sw_turbo + || atomic_read(&dev_priv->rps.sw_turbo.flip_received)){ gen6_set_rps(dev_priv->dev, dev_priv->rps.min_freq_softlimit); + } + dev_priv->rps.last_adj = 0; } mutex_unlock(&dev_priv->rps.hw_lock); @@ -3449,8 +3454,11 @@ void gen6_rps_boost(struct drm_i915_private *dev_priv) if (dev_priv->rps.enabled) { if (IS_VALLEYVIEW(dev)) valleyview_set_rps(dev_priv->dev, dev_priv->rps.max_freq_softlimit); - else + else if (!dev_priv->rps.is_bdw_sw_turbo + || atomic_read(&dev_priv->rps.sw_turbo.flip_received)){ gen6_set_rps(dev_priv->dev, dev_priv->rps.max_freq_softlimit); + } + dev_priv->rps.last_adj = 0; } mutex_unlock(&dev_priv->rps.hw_lock); @@ -3481,21 +3489,26 @@ void valleyview_set_rps(struct drm_device *dev, u8 val) static void gen8_disable_rps_interrupts(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; + if (IS_BROADWELL(dev) && dev_priv->rps.is_bdw_sw_turbo){ + if (atomic_read(&dev_priv->rps.sw_turbo.flip_received)) + del_timer(&dev_priv->rps.sw_turbo.flip_timer); + dev_priv-> rps.is_bdw_sw_turbo = false; + } else { + I915_WRITE(GEN6_PMINTRMSK, ~GEN8_PMINTR_REDIRECT_TO_NON_DISP); + I915_WRITE(GEN8_GT_IER(2), I915_READ(GEN8_GT_IER(2)) & + ~dev_priv->pm_rps_events); + /* Complete PM interrupt masking here doesn't race with the rps work + * item again unmasking PM interrupts because that is using a different + * register (GEN8_GT_IMR(2)) to mask PM interrupts. The only risk is in + * leaving stale bits in GEN8_GT_IIR(2) and GEN8_GT_IMR(2) which + * gen8_enable_rps will clean up. */ - I915_WRITE(GEN6_PMINTRMSK, ~GEN8_PMINTR_REDIRECT_TO_NON_DISP); - I915_WRITE(GEN8_GT_IER(2), I915_READ(GEN8_GT_IER(2)) & - ~dev_priv->pm_rps_events); - /* Complete PM interrupt masking here doesn't race with the rps work - * item again unmasking PM interrupts because that is using a different - * register (GEN8_GT_IMR(2)) to mask PM interrupts. The only risk is in - * leaving stale bits in GEN8_GT_IIR(2) and GEN8_GT_IMR(2) which - * gen8_enable_rps will clean up. */ - - spin_lock_irq(&dev_priv->irq_lock); - dev_priv->rps.pm_iir = 0; - spin_unlock_irq(&dev_priv->irq_lock); + spin_lock_irq(&dev_priv->irq_lock); + dev_priv->rps.pm_iir = 0; + spin_unlock_irq(&dev_priv->irq_lock); - I915_WRITE(GEN8_GT_IIR(2), dev_priv->pm_rps_events); + I915_WRITE(GEN8_GT_IIR(2), dev_priv->pm_rps_events); + } } static void gen6_disable_rps_interrupts(struct drm_device *dev) @@ -3653,13 +3666,111 @@ static void parse_rp_state_cap(struct drm_i915_private *dev_priv, u32 rp_state_c dev_priv->rps.min_freq_softlimit = dev_priv->rps.min_freq; } +static void bdw_sw_calculate_freq(struct drm_device *dev, + struct intel_rps_bdw_cal *c, u32 *cur_time, u32 *c0) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + u64 busy = 0; + u32 busyness_pct = 0; + u32 elapsed_time = 0; + u16 new_freq = 0; + + if (!c || !cur_time || !c0) + return; + + if (0 == c->last_c0) + goto out; + + /* Check Evaluation interval */ + elapsed_time = *cur_time - c->last_ts; + if (elapsed_time < c->eval_interval) + return; + + mutex_lock(&dev_priv->rps.hw_lock); + + /* + * c0 unit in 32*1.28 usec, elapsed_time unit in 1 usec. + * Whole busyness_pct calculation should be + * busy = ((u64)(*c0 - c->last_c0) << 5 << 7) / 100; + * busyness_pct = (u32)(busy * 100 / elapsed_time); + * The final formula is to simplify CPU calculation + */ + busy = (u64)(*c0 - c->last_c0) << 12; + do_div(busy, elapsed_time); + busyness_pct = (u32)busy; + + if (c->is_up && busyness_pct >= c->it_threshold_pct) + new_freq = (u16)dev_priv->rps.cur_freq + 3; + if (!c->is_up && busyness_pct <= c->it_threshold_pct) + new_freq = (u16)dev_priv->rps.cur_freq - 1; + + /* Adjust to new frequency busyness and compare with threshold */ + if (0 != new_freq) { + if (new_freq > dev_priv->rps.max_freq_softlimit) + new_freq = dev_priv->rps.max_freq_softlimit; + else if (new_freq < dev_priv->rps.min_freq_softlimit) + new_freq = dev_priv->rps.min_freq_softlimit; + + gen6_set_rps(dev, new_freq); + } + + mutex_unlock(&dev_priv->rps.hw_lock); + +out: + c->last_c0 = *c0; + c->last_ts = *cur_time; +} + +static void gen8_set_frequency_RP0(struct work_struct *work) +{ + struct intel_rps_bdw_turbo *p_bdw_turbo = + container_of(work, struct intel_rps_bdw_turbo, work_max_freq); + struct intel_gen6_power_mgmt *p_power_mgmt = + container_of(p_bdw_turbo, struct intel_gen6_power_mgmt, sw_turbo); + struct drm_i915_private *dev_priv = + container_of(p_power_mgmt, struct drm_i915_private, rps); + + mutex_lock(&dev_priv->rps.hw_lock); + gen6_set_rps(dev_priv->dev, dev_priv->rps.rp0_freq); + mutex_unlock(&dev_priv->rps.hw_lock); +} + +static void flip_active_timeout_handler(unsigned long var) +{ + struct drm_i915_private *dev_priv = (struct drm_i915_private *) var; + + del_timer(&dev_priv->rps.sw_turbo.flip_timer); + atomic_set(&dev_priv->rps.sw_turbo.flip_received, false); + + queue_work(dev_priv->wq, &dev_priv->rps.sw_turbo.work_max_freq); +} + +void bdw_software_turbo(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + + u32 current_time = I915_READ(TIMESTAMP_CTR); /* unit in usec */ + u32 current_c0 = I915_READ(MCHBAR_PCU_C0); /* unit in 32*1.28 usec */ + + bdw_sw_calculate_freq(dev, &dev_priv->rps.sw_turbo.up, + ¤t_time, ¤t_c0); + bdw_sw_calculate_freq(dev, &dev_priv->rps.sw_turbo.down, + ¤t_time, ¤t_c0); +} + static void gen8_enable_rps(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; struct intel_engine_cs *ring; uint32_t rc6_mask = 0, rp_state_cap; + uint32_t threshold_up_pct, threshold_down_pct; + uint32_t ei_up, ei_down; /* up and down evaluation interval */ + u32 rp_ctl_flag; int unused; + /* Use software Turbo for BDW */ + dev_priv->rps.is_bdw_sw_turbo = IS_BROADWELL(dev); + /* 1a: Software RC state - RC0 */ I915_WRITE(GEN6_RC_STATE, 0); @@ -3703,35 +3814,74 @@ static void gen8_enable_rps(struct drm_device *dev) HSW_FREQUENCY(dev_priv->rps.rp1_freq)); I915_WRITE(GEN6_RC_VIDEO_FREQ, HSW_FREQUENCY(dev_priv->rps.rp1_freq)); - /* NB: Docs say 1s, and 1000000 - which aren't equivalent */ - I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 100000000 / 128); /* 1 second timeout */ + ei_up = 84480; /* 84.48ms */ + ei_down = 448000; + threshold_up_pct = 90; /* x percent busy */ + threshold_down_pct = 70; + + if (dev_priv->rps.is_bdw_sw_turbo) { + dev_priv->rps.sw_turbo.up.it_threshold_pct = threshold_up_pct; + dev_priv->rps.sw_turbo.up.eval_interval = ei_up; + dev_priv->rps.sw_turbo.up.is_up = true; + dev_priv->rps.sw_turbo.up.last_ts = 0; + dev_priv->rps.sw_turbo.up.last_c0 = 0; + + dev_priv->rps.sw_turbo.down.it_threshold_pct = threshold_down_pct; + dev_priv->rps.sw_turbo.down.eval_interval = ei_down; + dev_priv->rps.sw_turbo.down.is_up = false; + dev_priv->rps.sw_turbo.down.last_ts = 0; + dev_priv->rps.sw_turbo.down.last_c0 = 0; + + /* Start the timer to track if flip comes*/ + dev_priv->rps.sw_turbo.timeout = 200*1000; /* in us */ + + init_timer(&dev_priv->rps.sw_turbo.flip_timer); + dev_priv->rps.sw_turbo.flip_timer.function = flip_active_timeout_handler; + dev_priv->rps.sw_turbo.flip_timer.data = (unsigned long) dev_priv; + dev_priv->rps.sw_turbo.flip_timer.expires = + usecs_to_jiffies(dev_priv->rps.sw_turbo.timeout) + jiffies; + add_timer(&dev_priv->rps.sw_turbo.flip_timer); + INIT_WORK(&dev_priv->rps.sw_turbo.work_max_freq, gen8_set_frequency_RP0); + + atomic_set(&dev_priv->rps.sw_turbo.flip_received, true); + } else { + /* NB: Docs say 1s, and 1000000 - which aren't equivalent + * 1 second timeout*/ + I915_WRITE(GEN6_RP_DOWN_TIMEOUT, FREQ_1_28_US(1000000)); - /* Docs recommend 900MHz, and 300 MHz respectively */ - I915_WRITE(GEN6_RP_INTERRUPT_LIMITS, - dev_priv->rps.max_freq_softlimit << 24 | - dev_priv->rps.min_freq_softlimit << 16); + /* Docs recommend 900MHz, and 300 MHz respectively */ + I915_WRITE(GEN6_RP_INTERRUPT_LIMITS, + dev_priv->rps.max_freq_softlimit << 24 | + dev_priv->rps.min_freq_softlimit << 16); - I915_WRITE(GEN6_RP_UP_THRESHOLD, 7600000 / 128); /* 76ms busyness per EI, 90% */ - I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 31300000 / 128); /* 313ms busyness per EI, 70%*/ - I915_WRITE(GEN6_RP_UP_EI, 66000); /* 84.48ms, XXX: random? */ - I915_WRITE(GEN6_RP_DOWN_EI, 350000); /* 448ms, XXX: random? */ + I915_WRITE(GEN6_RP_UP_THRESHOLD, + FREQ_1_28_US(ei_up * threshold_up_pct / 100)); + I915_WRITE(GEN6_RP_DOWN_THRESHOLD, + FREQ_1_28_US(ei_down * threshold_down_pct / 100)); + I915_WRITE(GEN6_RP_UP_EI, + FREQ_1_28_US(ei_up)); + I915_WRITE(GEN6_RP_DOWN_EI, + FREQ_1_28_US(ei_down)); - I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10); + I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10); + } /* 5: Enable RPS */ - I915_WRITE(GEN6_RP_CONTROL, - GEN6_RP_MEDIA_TURBO | - GEN6_RP_MEDIA_HW_NORMAL_MODE | - GEN6_RP_MEDIA_IS_GFX | - GEN6_RP_ENABLE | - GEN6_RP_UP_BUSY_AVG | - GEN6_RP_DOWN_IDLE_AVG); - - /* 6: Ring frequency + overclocking (our driver does this later */ - + rp_ctl_flag = GEN6_RP_MEDIA_TURBO | + GEN6_RP_MEDIA_HW_NORMAL_MODE | + GEN6_RP_MEDIA_IS_GFX | + GEN6_RP_UP_BUSY_AVG | + GEN6_RP_DOWN_IDLE_AVG; + if (!dev_priv->rps.is_bdw_sw_turbo) + rp_ctl_flag |= GEN6_RP_ENABLE; + + I915_WRITE(GEN6_RP_CONTROL, rp_ctl_flag); + + /* 6: Ring frequency + overclocking + * (our driver does this later */ gen6_set_rps(dev, (I915_READ(GEN6_GT_PERF_STATUS) & 0xff00) >> 8); - - gen8_enable_rps_interrupts(dev); + if (!dev_priv->rps.is_bdw_sw_turbo) + gen8_enable_rps_interrupts(dev); gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL); } @@ -5199,6 +5349,8 @@ static void intel_gen6_powersave_work(struct work_struct *work) rps.delayed_resume_work.work); struct drm_device *dev = dev_priv->dev; + dev_priv->rps.is_bdw_sw_turbo = false; + mutex_lock(&dev_priv->rps.hw_lock); if (IS_CHERRYVIEW(dev)) { |