diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2009-09-17 21:00:02 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-09-17 21:00:02 -0700 |
commit | dcbf77b9e86e1726f5fbd01bb98820dac06d456e (patch) | |
tree | 2f0b728ce70c03e1d0e3461e8a3c3d1fbe68fb90 /arch/x86 | |
parent | ca043a66ae48c74fa628ec92178f7a54f5b9a106 (diff) | |
parent | 29cd8bae396583a2ee9a3340db8c5102acf9f6fd (diff) |
Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (37 commits)
sched: Fix SD_POWERSAVING_BALANCE|SD_PREFER_LOCAL vs SD_WAKE_AFFINE
sched: Stop buddies from hogging the system
sched: Add new wakeup preemption mode: WAKEUP_RUNNING
sched: Fix TASK_WAKING & loadaverage breakage
sched: Disable wakeup balancing
sched: Rename flags to wake_flags
sched: Clean up the load_idx selection in select_task_rq_fair
sched: Optimize cgroup vs wakeup a bit
sched: x86: Name old_perf in a unique way
sched: Implement a gentler fair-sleepers feature
sched: Add SD_PREFER_LOCAL
sched: Add a few SYNC hint knobs to play with
sched: Fix sync wakeups again
sched: Add WF_FORK
sched: Rename sync arguments
sched: Rename select_task_rq() argument
sched: Feature to disable APERF/MPERF cpu_power
x86: sched: Provide arch implementations using aperf/mperf
x86: Add generic aperf/mperf code
x86: Move APERF/MPERF into a X86_FEATURE
...
Fix up trivial conflict in arch/x86/include/asm/processor.h due to
nearby addition of amd_get_nb_id() declaration from the EDAC merge.
Diffstat (limited to 'arch/x86')
-rw-r--r-- | arch/x86/include/asm/cpufeature.h | 1 | ||||
-rw-r--r-- | arch/x86/include/asm/processor.h | 30 | ||||
-rw-r--r-- | arch/x86/include/asm/topology.h | 14 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/Makefile | 2 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c | 88 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/intel.c | 6 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/sched.c | 55 |
7 files changed, 108 insertions, 88 deletions
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 847fee6493a..9cfc88b9774 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -96,6 +96,7 @@ #define X86_FEATURE_CLFLUSH_MONITOR (3*32+25) /* "" clflush reqd with monitor */ #define X86_FEATURE_EXTD_APICID (3*32+26) /* has extended APICID (8 bits) */ #define X86_FEATURE_AMD_DCM (3*32+27) /* multi-node processor */ +#define X86_FEATURE_APERFMPERF (3*32+28) /* APERFMPERF */ /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ #define X86_FEATURE_XMM3 (4*32+ 0) /* "pni" SSE-3 */ diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 42a3f936dad..c3429e8b242 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -27,6 +27,7 @@ struct mm_struct; #include <linux/cpumask.h> #include <linux/cache.h> #include <linux/threads.h> +#include <linux/math64.h> #include <linux/init.h> /* @@ -1022,4 +1023,33 @@ extern int set_tsc_mode(unsigned int val); extern int amd_get_nb_id(int cpu); +struct aperfmperf { + u64 aperf, mperf; +}; + +static inline void get_aperfmperf(struct aperfmperf *am) +{ + WARN_ON_ONCE(!boot_cpu_has(X86_FEATURE_APERFMPERF)); + + rdmsrl(MSR_IA32_APERF, am->aperf); + rdmsrl(MSR_IA32_MPERF, am->mperf); +} + +#define APERFMPERF_SHIFT 10 + +static inline +unsigned long calc_aperfmperf_ratio(struct aperfmperf *old, + struct aperfmperf *new) +{ + u64 aperf = new->aperf - old->aperf; + u64 mperf = new->mperf - old->mperf; + unsigned long ratio = aperf; + + mperf >>= APERFMPERF_SHIFT; + if (mperf) + ratio = div64_u64(aperf, mperf); + + return ratio; +} + #endif /* _ASM_X86_PROCESSOR_H */ diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 26d06e052a1..6f0695d744b 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -116,15 +116,11 @@ extern unsigned long node_remap_size[]; # define SD_CACHE_NICE_TRIES 1 # define SD_IDLE_IDX 1 -# define SD_NEWIDLE_IDX 2 -# define SD_FORKEXEC_IDX 0 #else # define SD_CACHE_NICE_TRIES 2 # define SD_IDLE_IDX 2 -# define SD_NEWIDLE_IDX 2 -# define SD_FORKEXEC_IDX 1 #endif @@ -137,22 +133,20 @@ extern unsigned long node_remap_size[]; .cache_nice_tries = SD_CACHE_NICE_TRIES, \ .busy_idx = 3, \ .idle_idx = SD_IDLE_IDX, \ - .newidle_idx = SD_NEWIDLE_IDX, \ - .wake_idx = 1, \ - .forkexec_idx = SD_FORKEXEC_IDX, \ + .newidle_idx = 0, \ + .wake_idx = 0, \ + .forkexec_idx = 0, \ \ .flags = 1*SD_LOAD_BALANCE \ | 1*SD_BALANCE_NEWIDLE \ | 1*SD_BALANCE_EXEC \ | 1*SD_BALANCE_FORK \ - | 0*SD_WAKE_IDLE \ + | 0*SD_BALANCE_WAKE \ | 1*SD_WAKE_AFFINE \ - | 1*SD_WAKE_BALANCE \ | 0*SD_SHARE_CPUPOWER \ | 0*SD_POWERSAVINGS_BALANCE \ | 0*SD_SHARE_PKG_RESOURCES \ | 1*SD_SERIALIZE \ - | 1*SD_WAKE_IDLE_FAR \ | 0*SD_PREFER_SIBLING \ , \ .last_balance = jiffies, \ diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index c1f253dac15..8dd30638fe4 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -13,7 +13,7 @@ CFLAGS_common.o := $(nostackp) obj-y := intel_cacheinfo.o addon_cpuid_features.o obj-y += proc.o capflags.o powerflags.o common.o -obj-y += vmware.o hypervisor.o +obj-y += vmware.o hypervisor.o sched.o obj-$(CONFIG_X86_32) += bugs.o cmpxchg.o obj-$(CONFIG_X86_64) += bugs_64.o diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index ae9b503220c..4109679863c 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -60,7 +60,6 @@ enum { }; #define INTEL_MSR_RANGE (0xffff) -#define CPUID_6_ECX_APERFMPERF_CAPABILITY (0x1) struct acpi_cpufreq_data { struct acpi_processor_performance *acpi_data; @@ -71,11 +70,7 @@ struct acpi_cpufreq_data { static DEFINE_PER_CPU(struct acpi_cpufreq_data *, drv_data); -struct acpi_msr_data { - u64 saved_aperf, saved_mperf; -}; - -static DEFINE_PER_CPU(struct acpi_msr_data, msr_data); +static DEFINE_PER_CPU(struct aperfmperf, old_perf); DEFINE_TRACE(power_mark); @@ -244,23 +239,12 @@ static u32 get_cur_val(const struct cpumask *mask) return cmd.val; } -struct perf_pair { - union { - struct { - u32 lo; - u32 hi; - } split; - u64 whole; - } aperf, mperf; -}; - /* Called via smp_call_function_single(), on the target CPU */ static void read_measured_perf_ctrs(void *_cur) { - struct perf_pair *cur = _cur; + struct aperfmperf *am = _cur; - rdmsr(MSR_IA32_APERF, cur->aperf.split.lo, cur->aperf.split.hi); - rdmsr(MSR_IA32_MPERF, cur->mperf.split.lo, cur->mperf.split.hi); + get_aperfmperf(am); } /* @@ -279,63 +263,17 @@ static void read_measured_perf_ctrs(void *_cur) static unsigned int get_measured_perf(struct cpufreq_policy *policy, unsigned int cpu) { - struct perf_pair readin, cur; - unsigned int perf_percent; + struct aperfmperf perf; + unsigned long ratio; unsigned int retval; - if (smp_call_function_single(cpu, read_measured_perf_ctrs, &readin, 1)) + if (smp_call_function_single(cpu, read_measured_perf_ctrs, &perf, 1)) return 0; - cur.aperf.whole = readin.aperf.whole - - per_cpu(msr_data, cpu).saved_aperf; - cur.mperf.whole = readin.mperf.whole - - per_cpu(msr_data, cpu).saved_mperf; - per_cpu(msr_data, cpu).saved_aperf = readin.aperf.whole; - per_cpu(msr_data, cpu).saved_mperf = readin.mperf.whole; - -#ifdef __i386__ - /* - * We dont want to do 64 bit divide with 32 bit kernel - * Get an approximate value. Return failure in case we cannot get - * an approximate value. - */ - if (unlikely(cur.aperf.split.hi || cur.mperf.split.hi)) { - int shift_count; - u32 h; - - h = max_t(u32, cur.aperf.split.hi, cur.mperf.split.hi); - shift_count = fls(h); - - cur.aperf.whole >>= shift_count; - cur.mperf.whole >>= shift_count; - } - - if (((unsigned long)(-1) / 100) < cur.aperf.split.lo) { - int shift_count = 7; - cur.aperf.split.lo >>= shift_count; - cur.mperf.split.lo >>= shift_count; - } - - if (cur.aperf.split.lo && cur.mperf.split.lo) - perf_percent = (cur.aperf.split.lo * 100) / cur.mperf.split.lo; - else - perf_percent = 0; + ratio = calc_aperfmperf_ratio(&per_cpu(old_perf, cpu), &perf); + per_cpu(old_perf, cpu) = perf; -#else - if (unlikely(((unsigned long)(-1) / 100) < cur.aperf.whole)) { - int shift_count = 7; - cur.aperf.whole >>= shift_count; - cur.mperf.whole >>= shift_count; - } - - if (cur.aperf.whole && cur.mperf.whole) - perf_percent = (cur.aperf.whole * 100) / cur.mperf.whole; - else - perf_percent = 0; - -#endif - - retval = (policy->cpuinfo.max_freq * perf_percent) / 100; + retval = (policy->cpuinfo.max_freq * ratio) >> APERFMPERF_SHIFT; return retval; } @@ -731,12 +669,8 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) acpi_processor_notify_smm(THIS_MODULE); /* Check for APERF/MPERF support in hardware */ - if (c->x86_vendor == X86_VENDOR_INTEL && c->cpuid_level >= 6) { - unsigned int ecx; - ecx = cpuid_ecx(6); - if (ecx & CPUID_6_ECX_APERFMPERF_CAPABILITY) - acpi_cpufreq_driver.getavg = get_measured_perf; - } + if (cpu_has(c, X86_FEATURE_APERFMPERF)) + acpi_cpufreq_driver.getavg = get_measured_perf; dprintk("CPU%u - ACPI performance management activated.\n", cpu); for (i = 0; i < perf->state_count; i++) diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 80a722a071b..40e1835b35e 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -350,6 +350,12 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON); } + if (c->cpuid_level > 6) { + unsigned ecx = cpuid_ecx(6); + if (ecx & 0x01) + set_cpu_cap(c, X86_FEATURE_APERFMPERF); + } + if (cpu_has_xmm2) set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); if (cpu_has_ds) { diff --git a/arch/x86/kernel/cpu/sched.c b/arch/x86/kernel/cpu/sched.c new file mode 100644 index 00000000000..a640ae5ad20 --- /dev/null +++ b/arch/x86/kernel/cpu/sched.c @@ -0,0 +1,55 @@ +#include <linux/sched.h> +#include <linux/math64.h> +#include <linux/percpu.h> +#include <linux/irqflags.h> + +#include <asm/cpufeature.h> +#include <asm/processor.h> + +#ifdef CONFIG_SMP + +static DEFINE_PER_CPU(struct aperfmperf, old_perf_sched); + +static unsigned long scale_aperfmperf(void) +{ + struct aperfmperf val, *old = &__get_cpu_var(old_perf_sched); + unsigned long ratio, flags; + + local_irq_save(flags); + get_aperfmperf(&val); + local_irq_restore(flags); + + ratio = calc_aperfmperf_ratio(old, &val); + *old = val; + + return ratio; +} + +unsigned long arch_scale_freq_power(struct sched_domain *sd, int cpu) +{ + /* + * do aperf/mperf on the cpu level because it includes things + * like turbo mode, which are relevant to full cores. + */ + if (boot_cpu_has(X86_FEATURE_APERFMPERF)) + return scale_aperfmperf(); + + /* + * maybe have something cpufreq here + */ + + return default_scale_freq_power(sd, cpu); +} + +unsigned long arch_scale_smt_power(struct sched_domain *sd, int cpu) +{ + /* + * aperf/mperf already includes the smt gain + */ + if (boot_cpu_has(X86_FEATURE_APERFMPERF)) + return SCHED_LOAD_SCALE; + + return default_scale_smt_power(sd, cpu); +} + +#endif |