From fcb6a15c2e7e76d493e6f91ea889ab40e1c643a4 Mon Sep 17 00:00:00 2001 From: Dirk Brandewie Date: Mon, 3 Feb 2014 08:55:31 -0800 Subject: intel_pstate: Take core C0 time into account for core busy calculation Take non-idle time into account when calculating core busy time. This ensures that intel_pstate will notice a decrease in load. References: https://bugzilla.kernel.org/show_bug.cgi?id=66581 Cc: 3.10+ # 3.10+ Signed-off-by: Dirk Brandewie Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) (limited to 'drivers/cpufreq/intel_pstate.c') diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 7e257b23360..79606f473f4 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -57,6 +57,7 @@ struct sample { int32_t core_pct_busy; u64 aperf; u64 mperf; + unsigned long long tsc; int freq; }; @@ -96,6 +97,7 @@ struct cpudata { u64 prev_aperf; u64 prev_mperf; + unsigned long long prev_tsc; int sample_ptr; struct sample samples[SAMPLE_COUNT]; }; @@ -548,30 +550,41 @@ static inline void intel_pstate_calc_busy(struct cpudata *cpu, struct sample *sample) { u64 core_pct; - core_pct = div64_u64(int_tofp(sample->aperf * 100), - sample->mperf); - sample->freq = fp_toint(cpu->pstate.max_pstate * core_pct * 1000); + u64 c0_pct; - sample->core_pct_busy = core_pct; + core_pct = div64_u64(sample->aperf * 100, sample->mperf); + + c0_pct = div64_u64(sample->mperf * 100, sample->tsc); + sample->freq = fp_toint( + mul_fp(int_tofp(cpu->pstate.max_pstate), + int_tofp(core_pct * 1000))); + + sample->core_pct_busy = mul_fp(int_tofp(core_pct), + div_fp(int_tofp(c0_pct + 1), int_tofp(100))); } static inline void intel_pstate_sample(struct cpudata *cpu) { u64 aperf, mperf; + unsigned long long tsc; rdmsrl(MSR_IA32_APERF, aperf); rdmsrl(MSR_IA32_MPERF, mperf); + tsc = native_read_tsc(); cpu->sample_ptr = (cpu->sample_ptr + 1) % SAMPLE_COUNT; cpu->samples[cpu->sample_ptr].aperf = aperf; cpu->samples[cpu->sample_ptr].mperf = mperf; + cpu->samples[cpu->sample_ptr].tsc = tsc; cpu->samples[cpu->sample_ptr].aperf -= cpu->prev_aperf; cpu->samples[cpu->sample_ptr].mperf -= cpu->prev_mperf; + cpu->samples[cpu->sample_ptr].tsc -= cpu->prev_tsc; intel_pstate_calc_busy(cpu, &cpu->samples[cpu->sample_ptr]); cpu->prev_aperf = aperf; cpu->prev_mperf = mperf; + cpu->prev_tsc = tsc; } static inline void intel_pstate_set_sample_time(struct cpudata *cpu) -- cgit v1.2.3-70-g09d2 From 709c078e176bd47227e89bb34de7c64b57aaaeab Mon Sep 17 00:00:00 2001 From: Dirk Brandewie Date: Wed, 12 Feb 2014 10:01:03 -0800 Subject: intel_pstate: Remove energy reporting from pstate_sample tracepoint Remove the reporting of energy since it does not provide any useful information about the state of the driver and will be a maintainance headache going forward since the RAPL energy units register is not architectural and subject to change between micro-architectures References: https://bugzilla.kernel.org/show_bug.cgi?id=69831 Fixes: b69880f9ccf7 (intel_pstate: Add trace point to report internal state.) Signed-off-by: Dirk Brandewie Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 9 --------- include/trace/events/power.h | 7 +------ 2 files changed, 1 insertion(+), 15 deletions(-) (limited to 'drivers/cpufreq/intel_pstate.c') diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 79606f473f4..c788abf1c45 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -51,8 +51,6 @@ static inline int32_t div_fp(int32_t x, int32_t y) return div_s64((int64_t)x << FRAC_BITS, (int64_t)y); } -static u64 energy_divisor; - struct sample { int32_t core_pct_busy; u64 aperf; @@ -630,12 +628,10 @@ static void intel_pstate_timer_func(unsigned long __data) { struct cpudata *cpu = (struct cpudata *) __data; struct sample *sample; - u64 energy; intel_pstate_sample(cpu); sample = &cpu->samples[cpu->sample_ptr]; - rdmsrl(MSR_PKG_ENERGY_STATUS, energy); intel_pstate_adjust_busy_pstate(cpu); @@ -644,7 +640,6 @@ static void intel_pstate_timer_func(unsigned long __data) cpu->pstate.current_pstate, sample->mperf, sample->aperf, - div64_u64(energy, energy_divisor), sample->freq); intel_pstate_set_sample_time(cpu); @@ -926,7 +921,6 @@ static int __init intel_pstate_init(void) int cpu, rc = 0; const struct x86_cpu_id *id; struct cpu_defaults *cpu_info; - u64 units; if (no_load) return -ENODEV; @@ -960,9 +954,6 @@ static int __init intel_pstate_init(void) if (rc) goto out; - rdmsrl(MSR_RAPL_POWER_UNIT, units); - energy_divisor = 1 << ((units >> 8) & 0x1f); /* bits{12:8} */ - intel_pstate_debug_expose_params(); intel_pstate_sysfs_expose_params(); diff --git a/include/trace/events/power.h b/include/trace/events/power.h index 9e9475c85de..e5bf9a76f16 100644 --- a/include/trace/events/power.h +++ b/include/trace/events/power.h @@ -42,7 +42,6 @@ TRACE_EVENT(pstate_sample, u32 state, u64 mperf, u64 aperf, - u32 energy, u32 freq ), @@ -51,7 +50,6 @@ TRACE_EVENT(pstate_sample, state, mperf, aperf, - energy, freq ), @@ -61,7 +59,6 @@ TRACE_EVENT(pstate_sample, __field(u32, state) __field(u64, mperf) __field(u64, aperf) - __field(u32, energy) __field(u32, freq) ), @@ -72,17 +69,15 @@ TRACE_EVENT(pstate_sample, __entry->state = state; __entry->mperf = mperf; __entry->aperf = aperf; - __entry->energy = energy; __entry->freq = freq; ), - TP_printk("core_busy=%lu scaled=%lu state=%lu mperf=%llu aperf=%llu energy=%lu freq=%lu ", + TP_printk("core_busy=%lu scaled=%lu state=%lu mperf=%llu aperf=%llu freq=%lu ", (unsigned long)__entry->core_busy, (unsigned long)__entry->scaled_busy, (unsigned long)__entry->state, (unsigned long long)__entry->mperf, (unsigned long long)__entry->aperf, - (unsigned long)__entry->energy, (unsigned long)__entry->freq ) -- cgit v1.2.3-70-g09d2