From 0ef95533326a7b37d16025af9edc0c18e644b346 Mon Sep 17 00:00:00 2001 From: Alok Kataria Date: Tue, 1 Jul 2008 11:43:18 -0700 Subject: x86: merge sched_clock handling Move the basic global variable definitions and sched_clock handling in the common "tsc.c" file. - Unify notsc kernel command line handling for 32 bit and 64bit. - Functional changes for 64bit. - "tsc_disabled" is updated if "notsc" is passed at boottime. - Fallback to jiffies for sched_clock, incase notsc is passed on commandline. Signed-off-by: Alok N Kataria Signed-off-by: Dan Hecht Cc: Dan Hecht Signed-off-by: Ingo Molnar --- arch/x86/kernel/tsc.c | 86 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) create mode 100644 arch/x86/kernel/tsc.c (limited to 'arch/x86/kernel/tsc.c') diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c new file mode 100644 index 00000000000..5d0be778fad --- /dev/null +++ b/arch/x86/kernel/tsc.c @@ -0,0 +1,86 @@ +#include +#include +#include +#include + +unsigned int cpu_khz; /* TSC clocks / usec, not used here */ +EXPORT_SYMBOL(cpu_khz); +unsigned int tsc_khz; +EXPORT_SYMBOL(tsc_khz); + +/* + * TSC can be unstable due to cpufreq or due to unsynced TSCs + */ +int tsc_unstable; + +/* native_sched_clock() is called before tsc_init(), so + we must start with the TSC soft disabled to prevent + erroneous rdtsc usage on !cpu_has_tsc processors */ +int tsc_disabled = -1; + +/* + * Scheduler clock - returns current time in nanosec units. + */ +u64 native_sched_clock(void) +{ + u64 this_offset; + + /* + * Fall back to jiffies if there's no TSC available: + * ( But note that we still use it if the TSC is marked + * unstable. We do this because unlike Time Of Day, + * the scheduler clock tolerates small errors and it's + * very important for it to be as fast as the platform + * can achive it. ) + */ + if (unlikely(tsc_disabled)) { + /* No locking but a rare wrong value is not a big deal: */ + return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ); + } + + /* read the Time Stamp Counter: */ + rdtscll(this_offset); + + /* return the value in ns */ + return cycles_2_ns(this_offset); +} + +/* We need to define a real function for sched_clock, to override the + weak default version */ +#ifdef CONFIG_PARAVIRT +unsigned long long sched_clock(void) +{ + return paravirt_sched_clock(); +} +#else +unsigned long long +sched_clock(void) __attribute__((alias("native_sched_clock"))); +#endif + +int check_tsc_unstable(void) +{ + return tsc_unstable; +} +EXPORT_SYMBOL_GPL(check_tsc_unstable); + +#ifdef CONFIG_X86_TSC +int __init notsc_setup(char *str) +{ + printk(KERN_WARNING "notsc: Kernel compiled with CONFIG_X86_TSC, " + "cannot disable TSC completely.\n"); + tsc_disabled = 1; + return 1; +} +#else +/* + * disable flag for tsc. Takes effect by clearing the TSC cpu flag + * in cpu/common.c + */ +int __init notsc_setup(char *str) +{ + setup_clear_cpu_cap(X86_FEATURE_TSC); + return 1; +} +#endif + +__setup("notsc", notsc_setup); -- cgit v1.2.3-70-g09d2 From bfc0f5947afa5e3a13e55867f4478c8a92c11dca Mon Sep 17 00:00:00 2001 From: Alok Kataria Date: Tue, 1 Jul 2008 11:43:24 -0700 Subject: x86: merge tsc calibration Merge the tsc calibration code for the 32bit and 64bit kernel. The paravirtualized calculate_cpu_khz for 64bit now points to the correct tsc_calibrate code as in 32bit. Original native_calculate_cpu_khz for 64 bit is now called as calibrate_cpu. Also moved the recalibrate_cpu_khz function in the common file. Note that this function is called only from powernow K7 cpu freq driver. Signed-off-by: Alok N Kataria Signed-off-by: Dan Hecht Cc: Dan Hecht Signed-off-by: Ingo Molnar --- arch/x86/kernel/time_64.c | 26 ++++++--- arch/x86/kernel/tsc.c | 131 ++++++++++++++++++++++++++++++++++++++++++++++ arch/x86/kernel/tsc_32.c | 74 +------------------------- arch/x86/kernel/tsc_64.c | 94 +-------------------------------- include/asm-x86/hpet.h | 2 +- include/asm-x86/tsc.h | 1 - 6 files changed, 154 insertions(+), 174 deletions(-) (limited to 'arch/x86/kernel/tsc.c') diff --git a/arch/x86/kernel/time_64.c b/arch/x86/kernel/time_64.c index 39ae8511a13..c6ac4dad41f 100644 --- a/arch/x86/kernel/time_64.c +++ b/arch/x86/kernel/time_64.c @@ -56,7 +56,7 @@ static irqreturn_t timer_event_interrupt(int irq, void *dev_id) /* calibrate_cpu is used on systems with fixed rate TSCs to determine * processor frequency */ #define TICK_COUNT 100000000 -unsigned long __init native_calculate_cpu_khz(void) +static unsigned long __init calibrate_cpu(void) { int tsc_start, tsc_now; int i, no_ctr_free; @@ -114,14 +114,18 @@ void __init hpet_time_init(void) setup_irq(0, &irq0); } +extern void set_cyc2ns_scale(unsigned long cpu_khz, int cpu); + void __init time_init(void) { - tsc_calibrate(); + int cpu; + + cpu_khz = calculate_cpu_khz(); + tsc_khz = cpu_khz; - cpu_khz = tsc_khz; if (cpu_has(&boot_cpu_data, X86_FEATURE_CONSTANT_TSC) && - (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)) - cpu_khz = calculate_cpu_khz(); + (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)) + cpu_khz = calibrate_cpu(); lpj_fine = ((unsigned long)tsc_khz * 1000)/HZ; @@ -134,7 +138,17 @@ void __init time_init(void) vgetcpu_mode = VGETCPU_LSL; printk(KERN_INFO "time.c: Detected %d.%03d MHz processor.\n", - cpu_khz / 1000, cpu_khz % 1000); + cpu_khz / 1000, cpu_khz % 1000); + + /* + * Secondary CPUs do not run through tsc_init(), so set up + * all the scale factors for all CPUs, assuming the same + * speed as the bootup CPU. (cpufreq notifiers will fix this + * up if their speed diverges) + */ + for_each_possible_cpu(cpu) + set_cyc2ns_scale(cpu_khz, cpu); + init_tsc_clocksource(); late_time_init = choose_time_init(); } diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 5d0be778fad..e6ee14533c7 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -1,7 +1,11 @@ +#include #include #include #include #include +#include + +#include unsigned int cpu_khz; /* TSC clocks / usec, not used here */ EXPORT_SYMBOL(cpu_khz); @@ -84,3 +88,130 @@ int __init notsc_setup(char *str) #endif __setup("notsc", notsc_setup); + +#define MAX_RETRIES 5 +#define SMI_TRESHOLD 50000 + +/* + * Read TSC and the reference counters. Take care of SMI disturbance + */ +static u64 __init tsc_read_refs(u64 *pm, u64 *hpet) +{ + u64 t1, t2; + int i; + + for (i = 0; i < MAX_RETRIES; i++) { + t1 = get_cycles(); + if (hpet) + *hpet = hpet_readl(HPET_COUNTER) & 0xFFFFFFFF; + else + *pm = acpi_pm_read_early(); + t2 = get_cycles(); + if ((t2 - t1) < SMI_TRESHOLD) + return t2; + } + return ULLONG_MAX; +} + +/** + * tsc_calibrate - calibrate the tsc on boot + */ +static unsigned int __init tsc_calibrate(void) +{ + unsigned long flags; + u64 tsc1, tsc2, tr1, tr2, delta, pm1, pm2, hpet1, hpet2; + int hpet = is_hpet_enabled(); + unsigned int tsc_khz_val = 0; + + local_irq_save(flags); + + tsc1 = tsc_read_refs(&pm1, hpet ? &hpet1 : NULL); + + outb((inb(0x61) & ~0x02) | 0x01, 0x61); + + outb(0xb0, 0x43); + outb((CLOCK_TICK_RATE / (1000 / 50)) & 0xff, 0x42); + outb((CLOCK_TICK_RATE / (1000 / 50)) >> 8, 0x42); + tr1 = get_cycles(); + while ((inb(0x61) & 0x20) == 0); + tr2 = get_cycles(); + + tsc2 = tsc_read_refs(&pm2, hpet ? &hpet2 : NULL); + + local_irq_restore(flags); + + /* + * Preset the result with the raw and inaccurate PIT + * calibration value + */ + delta = (tr2 - tr1); + do_div(delta, 50); + tsc_khz_val = delta; + + /* hpet or pmtimer available ? */ + if (!hpet && !pm1 && !pm2) { + printk(KERN_INFO "TSC calibrated against PIT\n"); + goto out; + } + + /* Check, whether the sampling was disturbed by an SMI */ + if (tsc1 == ULLONG_MAX || tsc2 == ULLONG_MAX) { + printk(KERN_WARNING "TSC calibration disturbed by SMI, " + "using PIT calibration result\n"); + goto out; + } + + tsc2 = (tsc2 - tsc1) * 1000000LL; + + if (hpet) { + printk(KERN_INFO "TSC calibrated against HPET\n"); + if (hpet2 < hpet1) + hpet2 += 0x100000000ULL; + hpet2 -= hpet1; + tsc1 = ((u64)hpet2 * hpet_readl(HPET_PERIOD)); + do_div(tsc1, 1000000); + } else { + printk(KERN_INFO "TSC calibrated against PM_TIMER\n"); + if (pm2 < pm1) + pm2 += (u64)ACPI_PM_OVRRUN; + pm2 -= pm1; + tsc1 = pm2 * 1000000000LL; + do_div(tsc1, PMTMR_TICKS_PER_SEC); + } + + do_div(tsc2, tsc1); + tsc_khz_val = tsc2; + +out: + return tsc_khz_val; +} + +unsigned long native_calculate_cpu_khz(void) +{ + return tsc_calibrate(); +} + +#ifdef CONFIG_X86_32 +/* Only called from the Powernow K7 cpu freq driver */ +int recalibrate_cpu_khz(void) +{ +#ifndef CONFIG_SMP + unsigned long cpu_khz_old = cpu_khz; + + if (cpu_has_tsc) { + cpu_khz = calculate_cpu_khz(); + tsc_khz = cpu_khz; + cpu_data(0).loops_per_jiffy = + cpufreq_scale(cpu_data(0).loops_per_jiffy, + cpu_khz_old, cpu_khz); + return 0; + } else + return -ENODEV; +#else + return -ENODEV; +#endif +} + +EXPORT_SYMBOL(recalibrate_cpu_khz); + +#endif /* CONFIG_X86_32 */ diff --git a/arch/x86/kernel/tsc_32.c b/arch/x86/kernel/tsc_32.c index dc8990056d7..40c0aafb358 100644 --- a/arch/x86/kernel/tsc_32.c +++ b/arch/x86/kernel/tsc_32.c @@ -42,7 +42,7 @@ extern int tsc_disabled; DEFINE_PER_CPU(unsigned long, cyc2ns); -static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu) +void set_cyc2ns_scale(unsigned long cpu_khz, int cpu) { unsigned long long tsc_now, ns_now; unsigned long flags, *scale; @@ -65,78 +65,6 @@ static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu) local_irq_restore(flags); } -unsigned long native_calculate_cpu_khz(void) -{ - unsigned long long start, end; - unsigned long count; - u64 delta64 = (u64)ULLONG_MAX; - int i; - unsigned long flags; - - local_irq_save(flags); - - /* run 3 times to ensure the cache is warm and to get an accurate reading */ - for (i = 0; i < 3; i++) { - mach_prepare_counter(); - rdtscll(start); - mach_countup(&count); - rdtscll(end); - - /* - * Error: ECTCNEVERSET - * The CTC wasn't reliable: we got a hit on the very first read, - * or the CPU was so fast/slow that the quotient wouldn't fit in - * 32 bits.. - */ - if (count <= 1) - continue; - - /* cpu freq too slow: */ - if ((end - start) <= CALIBRATE_TIME_MSEC) - continue; - - /* - * We want the minimum time of all runs in case one of them - * is inaccurate due to SMI or other delay - */ - delta64 = min(delta64, (end - start)); - } - - /* cpu freq too fast (or every run was bad): */ - if (delta64 > (1ULL<<32)) - goto err; - - delta64 += CALIBRATE_TIME_MSEC/2; /* round for do_div */ - do_div(delta64,CALIBRATE_TIME_MSEC); - - local_irq_restore(flags); - return (unsigned long)delta64; -err: - local_irq_restore(flags); - return 0; -} - -int recalibrate_cpu_khz(void) -{ -#ifndef CONFIG_SMP - unsigned long cpu_khz_old = cpu_khz; - - if (cpu_has_tsc) { - cpu_khz = calculate_cpu_khz(); - tsc_khz = cpu_khz; - cpu_data(0).loops_per_jiffy = - cpufreq_scale(cpu_data(0).loops_per_jiffy, - cpu_khz_old, cpu_khz); - return 0; - } else - return -ENODEV; -#else - return -ENODEV; -#endif -} - -EXPORT_SYMBOL(recalibrate_cpu_khz); - #ifdef CONFIG_CPU_FREQ /* diff --git a/arch/x86/kernel/tsc_64.c b/arch/x86/kernel/tsc_64.c index 69cbe4c9f05..c852ff9bd5d 100644 --- a/arch/x86/kernel/tsc_64.c +++ b/arch/x86/kernel/tsc_64.c @@ -40,7 +40,7 @@ extern int tsc_disabled; DEFINE_PER_CPU(unsigned long, cyc2ns); -static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu) +void set_cyc2ns_scale(unsigned long cpu_khz, int cpu) { unsigned long long tsc_now, ns_now; unsigned long flags, *scale; @@ -130,98 +130,6 @@ core_initcall(cpufreq_tsc); #endif -#define MAX_RETRIES 5 -#define SMI_TRESHOLD 50000 - -/* - * Read TSC and the reference counters. Take care of SMI disturbance - */ -static unsigned long __init tsc_read_refs(unsigned long *pm, - unsigned long *hpet) -{ - unsigned long t1, t2; - int i; - - for (i = 0; i < MAX_RETRIES; i++) { - t1 = get_cycles(); - if (hpet) - *hpet = hpet_readl(HPET_COUNTER) & 0xFFFFFFFF; - else - *pm = acpi_pm_read_early(); - t2 = get_cycles(); - if ((t2 - t1) < SMI_TRESHOLD) - return t2; - } - return ULONG_MAX; -} - -/** - * tsc_calibrate - calibrate the tsc on boot - */ -void __init tsc_calibrate(void) -{ - unsigned long flags, tsc1, tsc2, tr1, tr2, pm1, pm2, hpet1, hpet2; - int hpet = is_hpet_enabled(), cpu; - - local_irq_save(flags); - - tsc1 = tsc_read_refs(&pm1, hpet ? &hpet1 : NULL); - - outb((inb(0x61) & ~0x02) | 0x01, 0x61); - - outb(0xb0, 0x43); - outb((CLOCK_TICK_RATE / (1000 / 50)) & 0xff, 0x42); - outb((CLOCK_TICK_RATE / (1000 / 50)) >> 8, 0x42); - tr1 = get_cycles(); - while ((inb(0x61) & 0x20) == 0); - tr2 = get_cycles(); - - tsc2 = tsc_read_refs(&pm2, hpet ? &hpet2 : NULL); - - local_irq_restore(flags); - - /* - * Preset the result with the raw and inaccurate PIT - * calibration value - */ - tsc_khz = (tr2 - tr1) / 50; - - /* hpet or pmtimer available ? */ - if (!hpet && !pm1 && !pm2) { - printk(KERN_INFO "TSC calibrated against PIT\n"); - goto out; - } - - /* Check, whether the sampling was disturbed by an SMI */ - if (tsc1 == ULONG_MAX || tsc2 == ULONG_MAX) { - printk(KERN_WARNING "TSC calibration disturbed by SMI, " - "using PIT calibration result\n"); - goto out; - } - - tsc2 = (tsc2 - tsc1) * 1000000L; - - if (hpet) { - printk(KERN_INFO "TSC calibrated against HPET\n"); - if (hpet2 < hpet1) - hpet2 += 0x100000000UL; - hpet2 -= hpet1; - tsc1 = (hpet2 * hpet_readl(HPET_PERIOD)) / 1000000; - } else { - printk(KERN_INFO "TSC calibrated against PM_TIMER\n"); - if (pm2 < pm1) - pm2 += ACPI_PM_OVRRUN; - pm2 -= pm1; - tsc1 = (pm2 * 1000000000) / PMTMR_TICKS_PER_SEC; - } - - tsc_khz = tsc2 / tsc1; - -out: - for_each_possible_cpu(cpu) - set_cyc2ns_scale(tsc_khz, cpu); -} - /* * Make an educated guess if the TSC is trustworthy and synchronized * over all CPUs. diff --git a/include/asm-x86/hpet.h b/include/asm-x86/hpet.h index 6a9b4ac59bf..82f1ac641bd 100644 --- a/include/asm-x86/hpet.h +++ b/include/asm-x86/hpet.h @@ -86,8 +86,8 @@ extern void hpet_unregister_irq_handler(rtc_irq_handler handler); #else /* CONFIG_HPET_TIMER */ static inline int hpet_enable(void) { return 0; } -static inline unsigned long hpet_readl(unsigned long a) { return 0; } static inline int is_hpet_enabled(void) { return 0; } +#define hpet_readl(a) 0 #endif #endif /* ASM_X86_HPET_H */ diff --git a/include/asm-x86/tsc.h b/include/asm-x86/tsc.h index 548873ab5fc..761054d7fef 100644 --- a/include/asm-x86/tsc.h +++ b/include/asm-x86/tsc.h @@ -58,7 +58,6 @@ int check_tsc_unstable(void); extern void check_tsc_sync_source(int cpu); extern void check_tsc_sync_target(void); -extern void tsc_calibrate(void); extern int notsc_setup(char *); #endif -- cgit v1.2.3-70-g09d2 From 2dbe06faf37b39f9ecffc054dd173b2a1dc2adcd Mon Sep 17 00:00:00 2001 From: Alok Kataria Date: Tue, 1 Jul 2008 11:43:31 -0700 Subject: x86: merge the TSC cpu-freq code Unify the TSC cpufreq code. Signed-off-by: Alok N Kataria Signed-off-by: Dan Hecht Cc: Dan Hecht Signed-off-by: Ingo Molnar --- arch/x86/kernel/tsc.c | 114 +++++++++++++++++++++++++++++++++++++++++++++++ arch/x86/kernel/tsc_32.c | 113 ---------------------------------------------- arch/x86/kernel/tsc_64.c | 114 ----------------------------------------------- 3 files changed, 114 insertions(+), 227 deletions(-) (limited to 'arch/x86/kernel/tsc.c') diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index e6ee14533c7..595f78a2221 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -4,6 +4,7 @@ #include #include #include +#include #include @@ -215,3 +216,116 @@ int recalibrate_cpu_khz(void) EXPORT_SYMBOL(recalibrate_cpu_khz); #endif /* CONFIG_X86_32 */ + +/* Accelerators for sched_clock() + * convert from cycles(64bits) => nanoseconds (64bits) + * basic equation: + * ns = cycles / (freq / ns_per_sec) + * ns = cycles * (ns_per_sec / freq) + * ns = cycles * (10^9 / (cpu_khz * 10^3)) + * ns = cycles * (10^6 / cpu_khz) + * + * Then we use scaling math (suggested by george@mvista.com) to get: + * ns = cycles * (10^6 * SC / cpu_khz) / SC + * ns = cycles * cyc2ns_scale / SC + * + * And since SC is a constant power of two, we can convert the div + * into a shift. + * + * We can use khz divisor instead of mhz to keep a better precision, since + * cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits. + * (mathieu.desnoyers@polymtl.ca) + * + * -johnstul@us.ibm.com "math is hard, lets go shopping!" + */ + +DEFINE_PER_CPU(unsigned long, cyc2ns); + +void set_cyc2ns_scale(unsigned long cpu_khz, int cpu) +{ + unsigned long long tsc_now, ns_now; + unsigned long flags, *scale; + + local_irq_save(flags); + sched_clock_idle_sleep_event(); + + scale = &per_cpu(cyc2ns, cpu); + + rdtscll(tsc_now); + ns_now = __cycles_2_ns(tsc_now); + + if (cpu_khz) + *scale = (NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR)/cpu_khz; + + sched_clock_idle_wakeup_event(0); + local_irq_restore(flags); +} + +#ifdef CONFIG_CPU_FREQ + +/* Frequency scaling support. Adjust the TSC based timer when the cpu frequency + * changes. + * + * RED-PEN: On SMP we assume all CPUs run with the same frequency. It's + * not that important because current Opteron setups do not support + * scaling on SMP anyroads. + * + * Should fix up last_tsc too. Currently gettimeofday in the + * first tick after the change will be slightly wrong. + */ + +static unsigned int ref_freq; +static unsigned long loops_per_jiffy_ref; +static unsigned long tsc_khz_ref; + +static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, + void *data) +{ + struct cpufreq_freqs *freq = data; + unsigned long *lpj, dummy; + + if (cpu_has(&cpu_data(freq->cpu), X86_FEATURE_CONSTANT_TSC)) + return 0; + + lpj = &dummy; + if (!(freq->flags & CPUFREQ_CONST_LOOPS)) +#ifdef CONFIG_SMP + lpj = &cpu_data(freq->cpu).loops_per_jiffy; +#else + lpj = &boot_cpu_data.loops_per_jiffy; +#endif + + if (!ref_freq) { + ref_freq = freq->old; + loops_per_jiffy_ref = *lpj; + tsc_khz_ref = tsc_khz; + } + if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) || + (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) || + (val == CPUFREQ_RESUMECHANGE)) { + *lpj = cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new); + + tsc_khz = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new); + if (!(freq->flags & CPUFREQ_CONST_LOOPS)) + mark_tsc_unstable("cpufreq changes"); + } + + set_cyc2ns_scale(tsc_khz_ref, freq->cpu); + + return 0; +} + +static struct notifier_block time_cpufreq_notifier_block = { + .notifier_call = time_cpufreq_notifier +}; + +static int __init cpufreq_tsc(void) +{ + cpufreq_register_notifier(&time_cpufreq_notifier_block, + CPUFREQ_TRANSITION_NOTIFIER); + return 0; +} + +core_initcall(cpufreq_tsc); + +#endif /* CONFIG_CPU_FREQ */ diff --git a/arch/x86/kernel/tsc_32.c b/arch/x86/kernel/tsc_32.c index 40c0aafb358..bbc153d36f8 100644 --- a/arch/x86/kernel/tsc_32.c +++ b/arch/x86/kernel/tsc_32.c @@ -18,119 +18,6 @@ extern int tsc_unstable; extern int tsc_disabled; -/* Accelerators for sched_clock() - * convert from cycles(64bits) => nanoseconds (64bits) - * basic equation: - * ns = cycles / (freq / ns_per_sec) - * ns = cycles * (ns_per_sec / freq) - * ns = cycles * (10^9 / (cpu_khz * 10^3)) - * ns = cycles * (10^6 / cpu_khz) - * - * Then we use scaling math (suggested by george@mvista.com) to get: - * ns = cycles * (10^6 * SC / cpu_khz) / SC - * ns = cycles * cyc2ns_scale / SC - * - * And since SC is a constant power of two, we can convert the div - * into a shift. - * - * We can use khz divisor instead of mhz to keep a better precision, since - * cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits. - * (mathieu.desnoyers@polymtl.ca) - * - * -johnstul@us.ibm.com "math is hard, lets go shopping!" - */ - -DEFINE_PER_CPU(unsigned long, cyc2ns); - -void set_cyc2ns_scale(unsigned long cpu_khz, int cpu) -{ - unsigned long long tsc_now, ns_now; - unsigned long flags, *scale; - - local_irq_save(flags); - sched_clock_idle_sleep_event(); - - scale = &per_cpu(cyc2ns, cpu); - - rdtscll(tsc_now); - ns_now = __cycles_2_ns(tsc_now); - - if (cpu_khz) - *scale = (NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR)/cpu_khz; - - /* - * Start smoothly with the new frequency: - */ - sched_clock_idle_wakeup_event(0); - local_irq_restore(flags); -} - -#ifdef CONFIG_CPU_FREQ - -/* - * if the CPU frequency is scaled, TSC-based delays will need a different - * loops_per_jiffy value to function properly. - */ -static unsigned int ref_freq; -static unsigned long loops_per_jiffy_ref; -static unsigned long cpu_khz_ref; - -static int -time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, void *data) -{ - struct cpufreq_freqs *freq = data; - - if (!ref_freq) { - if (!freq->old){ - ref_freq = freq->new; - return 0; - } - ref_freq = freq->old; - loops_per_jiffy_ref = cpu_data(freq->cpu).loops_per_jiffy; - cpu_khz_ref = cpu_khz; - } - - if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) || - (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) || - (val == CPUFREQ_RESUMECHANGE)) { - if (!(freq->flags & CPUFREQ_CONST_LOOPS)) - cpu_data(freq->cpu).loops_per_jiffy = - cpufreq_scale(loops_per_jiffy_ref, - ref_freq, freq->new); - - if (cpu_khz) { - - if (num_online_cpus() == 1) - cpu_khz = cpufreq_scale(cpu_khz_ref, - ref_freq, freq->new); - if (!(freq->flags & CPUFREQ_CONST_LOOPS)) { - tsc_khz = cpu_khz; - set_cyc2ns_scale(cpu_khz, freq->cpu); - /* - * TSC based sched_clock turns - * to junk w/ cpufreq - */ - mark_tsc_unstable("cpufreq changes"); - } - } - } - - return 0; -} - -static struct notifier_block time_cpufreq_notifier_block = { - .notifier_call = time_cpufreq_notifier -}; - -static int __init cpufreq_tsc(void) -{ - return cpufreq_register_notifier(&time_cpufreq_notifier_block, - CPUFREQ_TRANSITION_NOTIFIER); -} -core_initcall(cpufreq_tsc); - -#endif - /* clock source code */ static struct clocksource clocksource_tsc; diff --git a/arch/x86/kernel/tsc_64.c b/arch/x86/kernel/tsc_64.c index c852ff9bd5d..80a274b018c 100644 --- a/arch/x86/kernel/tsc_64.c +++ b/arch/x86/kernel/tsc_64.c @@ -16,120 +16,6 @@ extern int tsc_unstable; extern int tsc_disabled; -/* Accelerators for sched_clock() - * convert from cycles(64bits) => nanoseconds (64bits) - * basic equation: - * ns = cycles / (freq / ns_per_sec) - * ns = cycles * (ns_per_sec / freq) - * ns = cycles * (10^9 / (cpu_khz * 10^3)) - * ns = cycles * (10^6 / cpu_khz) - * - * Then we use scaling math (suggested by george@mvista.com) to get: - * ns = cycles * (10^6 * SC / cpu_khz) / SC - * ns = cycles * cyc2ns_scale / SC - * - * And since SC is a constant power of two, we can convert the div - * into a shift. - * - * We can use khz divisor instead of mhz to keep a better precision, since - * cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits. - * (mathieu.desnoyers@polymtl.ca) - * - * -johnstul@us.ibm.com "math is hard, lets go shopping!" - */ - -DEFINE_PER_CPU(unsigned long, cyc2ns); - -void set_cyc2ns_scale(unsigned long cpu_khz, int cpu) -{ - unsigned long long tsc_now, ns_now; - unsigned long flags, *scale; - - local_irq_save(flags); - sched_clock_idle_sleep_event(); - - scale = &per_cpu(cyc2ns, cpu); - - rdtscll(tsc_now); - ns_now = __cycles_2_ns(tsc_now); - - if (cpu_khz) - *scale = (NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR)/cpu_khz; - - sched_clock_idle_wakeup_event(0); - local_irq_restore(flags); -} - -#ifdef CONFIG_CPU_FREQ - -/* Frequency scaling support. Adjust the TSC based timer when the cpu frequency - * changes. - * - * RED-PEN: On SMP we assume all CPUs run with the same frequency. It's - * not that important because current Opteron setups do not support - * scaling on SMP anyroads. - * - * Should fix up last_tsc too. Currently gettimeofday in the - * first tick after the change will be slightly wrong. - */ - -static unsigned int ref_freq; -static unsigned long loops_per_jiffy_ref; -static unsigned long tsc_khz_ref; - -static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, - void *data) -{ - struct cpufreq_freqs *freq = data; - unsigned long *lpj, dummy; - - if (cpu_has(&cpu_data(freq->cpu), X86_FEATURE_CONSTANT_TSC)) - return 0; - - lpj = &dummy; - if (!(freq->flags & CPUFREQ_CONST_LOOPS)) -#ifdef CONFIG_SMP - lpj = &cpu_data(freq->cpu).loops_per_jiffy; -#else - lpj = &boot_cpu_data.loops_per_jiffy; -#endif - - if (!ref_freq) { - ref_freq = freq->old; - loops_per_jiffy_ref = *lpj; - tsc_khz_ref = tsc_khz; - } - if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) || - (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) || - (val == CPUFREQ_RESUMECHANGE)) { - *lpj = - cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new); - - tsc_khz = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new); - if (!(freq->flags & CPUFREQ_CONST_LOOPS)) - mark_tsc_unstable("cpufreq changes"); - } - - set_cyc2ns_scale(tsc_khz_ref, freq->cpu); - - return 0; -} - -static struct notifier_block time_cpufreq_notifier_block = { - .notifier_call = time_cpufreq_notifier -}; - -static int __init cpufreq_tsc(void) -{ - cpufreq_register_notifier(&time_cpufreq_notifier_block, - CPUFREQ_TRANSITION_NOTIFIER); - return 0; -} - -core_initcall(cpufreq_tsc); - -#endif - /* * Make an educated guess if the TSC is trustworthy and synchronized * over all CPUs. -- cgit v1.2.3-70-g09d2 From 8fbbc4b45ce3e4c0eeb15004c79c72b6896a79c2 Mon Sep 17 00:00:00 2001 From: Alok Kataria Date: Tue, 1 Jul 2008 11:43:34 -0700 Subject: x86: merge tsc_init and clocksource code Unify the clocksource code. Unify the tsc_init code. Signed-off-by: Alok N Kataria Signed-off-by: Dan Hecht Cc: Dan Hecht Signed-off-by: Ingo Molnar --- arch/x86/kernel/Makefile | 2 +- arch/x86/kernel/time_64.c | 32 +------ arch/x86/kernel/tsc.c | 212 +++++++++++++++++++++++++++++++++++++++++++++- arch/x86/kernel/tsc_32.c | 188 ---------------------------------------- arch/x86/kernel/tsc_64.c | 106 ----------------------- include/asm-x86/apic.h | 7 +- include/asm-x86/delay.h | 4 + include/asm-x86/time.h | 2 + include/asm-x86/tsc.h | 1 - 9 files changed, 224 insertions(+), 330 deletions(-) delete mode 100644 arch/x86/kernel/tsc_32.c delete mode 100644 arch/x86/kernel/tsc_64.c (limited to 'arch/x86/kernel/tsc.c') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index ca904ee1725..59b14c940a2 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -26,7 +26,7 @@ obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o obj-y += bootflag.o e820.o obj-y += pci-dma.o quirks.o i8237.o topology.o kdebugfs.o obj-y += alternative.o i8253.o pci-nommu.o -obj-y += tsc_$(BITS).o io_delay.o rtc.o tsc.o +obj-y += tsc.o io_delay.o rtc.o obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o obj-y += process.o diff --git a/arch/x86/kernel/time_64.c b/arch/x86/kernel/time_64.c index c6ac4dad41f..e3d49c553af 100644 --- a/arch/x86/kernel/time_64.c +++ b/arch/x86/kernel/time_64.c @@ -56,7 +56,7 @@ static irqreturn_t timer_event_interrupt(int irq, void *dev_id) /* calibrate_cpu is used on systems with fixed rate TSCs to determine * processor frequency */ #define TICK_COUNT 100000000 -static unsigned long __init calibrate_cpu(void) +unsigned long __init calibrate_cpu(void) { int tsc_start, tsc_now; int i, no_ctr_free; @@ -114,41 +114,13 @@ void __init hpet_time_init(void) setup_irq(0, &irq0); } -extern void set_cyc2ns_scale(unsigned long cpu_khz, int cpu); - void __init time_init(void) { - int cpu; - - cpu_khz = calculate_cpu_khz(); - tsc_khz = cpu_khz; - - if (cpu_has(&boot_cpu_data, X86_FEATURE_CONSTANT_TSC) && - (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)) - cpu_khz = calibrate_cpu(); - - lpj_fine = ((unsigned long)tsc_khz * 1000)/HZ; - - if (unsynchronized_tsc()) - mark_tsc_unstable("TSCs unsynchronized"); - + tsc_init(); if (cpu_has(&boot_cpu_data, X86_FEATURE_RDTSCP)) vgetcpu_mode = VGETCPU_RDTSCP; else vgetcpu_mode = VGETCPU_LSL; - printk(KERN_INFO "time.c: Detected %d.%03d MHz processor.\n", - cpu_khz / 1000, cpu_khz % 1000); - - /* - * Secondary CPUs do not run through tsc_init(), so set up - * all the scale factors for all CPUs, assuming the same - * speed as the bootup CPU. (cpufreq notifiers will fix this - * up if their speed diverges) - */ - for_each_possible_cpu(cpu) - set_cyc2ns_scale(cpu_khz, cpu); - - init_tsc_clocksource(); late_time_init = choose_time_init(); } diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 595f78a2221..94c16bdd569 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -5,8 +5,16 @@ #include #include #include +#include +#include +#include +#include #include +#include +#include +#include +#include unsigned int cpu_khz; /* TSC clocks / usec, not used here */ EXPORT_SYMBOL(cpu_khz); @@ -16,12 +24,12 @@ EXPORT_SYMBOL(tsc_khz); /* * TSC can be unstable due to cpufreq or due to unsynced TSCs */ -int tsc_unstable; +static int tsc_unstable; /* native_sched_clock() is called before tsc_init(), so we must start with the TSC soft disabled to prevent erroneous rdtsc usage on !cpu_has_tsc processors */ -int tsc_disabled = -1; +static int tsc_disabled = -1; /* * Scheduler clock - returns current time in nanosec units. @@ -241,7 +249,7 @@ EXPORT_SYMBOL(recalibrate_cpu_khz); DEFINE_PER_CPU(unsigned long, cyc2ns); -void set_cyc2ns_scale(unsigned long cpu_khz, int cpu) +static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu) { unsigned long long tsc_now, ns_now; unsigned long flags, *scale; @@ -329,3 +337,201 @@ static int __init cpufreq_tsc(void) core_initcall(cpufreq_tsc); #endif /* CONFIG_CPU_FREQ */ + +/* clocksource code */ + +static struct clocksource clocksource_tsc; + +/* + * We compare the TSC to the cycle_last value in the clocksource + * structure to avoid a nasty time-warp. This can be observed in a + * very small window right after one CPU updated cycle_last under + * xtime/vsyscall_gtod lock and the other CPU reads a TSC value which + * is smaller than the cycle_last reference value due to a TSC which + * is slighty behind. This delta is nowhere else observable, but in + * that case it results in a forward time jump in the range of hours + * due to the unsigned delta calculation of the time keeping core + * code, which is necessary to support wrapping clocksources like pm + * timer. + */ +static cycle_t read_tsc(void) +{ + cycle_t ret = (cycle_t)get_cycles(); + + return ret >= clocksource_tsc.cycle_last ? + ret : clocksource_tsc.cycle_last; +} + +static cycle_t __vsyscall_fn vread_tsc(void) +{ + cycle_t ret = (cycle_t)vget_cycles(); + + return ret >= __vsyscall_gtod_data.clock.cycle_last ? + ret : __vsyscall_gtod_data.clock.cycle_last; +} + +static struct clocksource clocksource_tsc = { + .name = "tsc", + .rating = 300, + .read = read_tsc, + .mask = CLOCKSOURCE_MASK(64), + .shift = 22, + .flags = CLOCK_SOURCE_IS_CONTINUOUS | + CLOCK_SOURCE_MUST_VERIFY, +#ifdef CONFIG_X86_64 + .vread = vread_tsc, +#endif +}; + +void mark_tsc_unstable(char *reason) +{ + if (!tsc_unstable) { + tsc_unstable = 1; + printk("Marking TSC unstable due to %s\n", reason); + /* Change only the rating, when not registered */ + if (clocksource_tsc.mult) + clocksource_change_rating(&clocksource_tsc, 0); + else + clocksource_tsc.rating = 0; + } +} + +EXPORT_SYMBOL_GPL(mark_tsc_unstable); + +static int __init dmi_mark_tsc_unstable(const struct dmi_system_id *d) +{ + printk(KERN_NOTICE "%s detected: marking TSC unstable.\n", + d->ident); + tsc_unstable = 1; + return 0; +} + +/* List of systems that have known TSC problems */ +static struct dmi_system_id __initdata bad_tsc_dmi_table[] = { + { + .callback = dmi_mark_tsc_unstable, + .ident = "IBM Thinkpad 380XD", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "IBM"), + DMI_MATCH(DMI_BOARD_NAME, "2635FA0"), + }, + }, + {} +}; + +/* + * Geode_LX - the OLPC CPU has a possibly a very reliable TSC + */ +#ifdef CONFIG_MGEODE_LX +/* RTSC counts during suspend */ +#define RTSC_SUSP 0x100 + +static void __init check_geode_tsc_reliable(void) +{ + unsigned long res_low, res_high; + + rdmsr_safe(MSR_GEODE_BUSCONT_CONF0, &res_low, &res_high); + if (res_low & RTSC_SUSP) + clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY; +} +#else +static inline void check_geode_tsc_reliable(void) { } +#endif + +/* + * Make an educated guess if the TSC is trustworthy and synchronized + * over all CPUs. + */ +__cpuinit int unsynchronized_tsc(void) +{ + if (!cpu_has_tsc || tsc_unstable) + return 1; + +#ifdef CONFIG_SMP + if (apic_is_clustered_box()) + return 1; +#endif + + if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) + return 0; + /* + * Intel systems are normally all synchronized. + * Exceptions must mark TSC as unstable: + */ + if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) { + /* assume multi socket systems are not synchronized: */ + if (num_possible_cpus() > 1) + tsc_unstable = 1; + } + + return tsc_unstable; +} + +static void __init init_tsc_clocksource(void) +{ + clocksource_tsc.mult = clocksource_khz2mult(tsc_khz, + clocksource_tsc.shift); + /* lower the rating if we already know its unstable: */ + if (check_tsc_unstable()) { + clocksource_tsc.rating = 0; + clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS; + } + clocksource_register(&clocksource_tsc); +} + +void __init tsc_init(void) +{ + u64 lpj; + int cpu; + + if (!cpu_has_tsc) + return; + + cpu_khz = calculate_cpu_khz(); + tsc_khz = cpu_khz; + + if (!cpu_khz) { + mark_tsc_unstable("could not calculate TSC khz"); + return; + } + +#ifdef CONFIG_X86_64 + if (cpu_has(&boot_cpu_data, X86_FEATURE_CONSTANT_TSC) && + (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)) + cpu_khz = calibrate_cpu(); +#endif + + lpj = ((u64)tsc_khz * 1000); + do_div(lpj, HZ); + lpj_fine = lpj; + + printk("Detected %lu.%03lu MHz processor.\n", + (unsigned long)cpu_khz / 1000, + (unsigned long)cpu_khz % 1000); + + /* + * Secondary CPUs do not run through tsc_init(), so set up + * all the scale factors for all CPUs, assuming the same + * speed as the bootup CPU. (cpufreq notifiers will fix this + * up if their speed diverges) + */ + for_each_possible_cpu(cpu) + set_cyc2ns_scale(cpu_khz, cpu); + + if (tsc_disabled > 0) + return; + + /* now allow native_sched_clock() to use rdtsc */ + tsc_disabled = 0; + + use_tsc_delay(); + /* Check and install the TSC clocksource */ + dmi_check_system(bad_tsc_dmi_table); + + if (unsynchronized_tsc()) + mark_tsc_unstable("TSCs unsynchronized"); + + check_geode_tsc_reliable(); + init_tsc_clocksource(); +} + diff --git a/arch/x86/kernel/tsc_32.c b/arch/x86/kernel/tsc_32.c deleted file mode 100644 index bbc153d36f8..00000000000 --- a/arch/x86/kernel/tsc_32.c +++ /dev/null @@ -1,188 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -#include "mach_timer.h" - -extern int tsc_unstable; -extern int tsc_disabled; - -/* clock source code */ - -static struct clocksource clocksource_tsc; - -/* - * We compare the TSC to the cycle_last value in the clocksource - * structure to avoid a nasty time-warp issue. This can be observed in - * a very small window right after one CPU updated cycle_last under - * xtime lock and the other CPU reads a TSC value which is smaller - * than the cycle_last reference value due to a TSC which is slighty - * behind. This delta is nowhere else observable, but in that case it - * results in a forward time jump in the range of hours due to the - * unsigned delta calculation of the time keeping core code, which is - * necessary to support wrapping clocksources like pm timer. - */ -static cycle_t read_tsc(void) -{ - cycle_t ret; - - rdtscll(ret); - - return ret >= clocksource_tsc.cycle_last ? - ret : clocksource_tsc.cycle_last; -} - -static struct clocksource clocksource_tsc = { - .name = "tsc", - .rating = 300, - .read = read_tsc, - .mask = CLOCKSOURCE_MASK(64), - .mult = 0, /* to be set */ - .shift = 22, - .flags = CLOCK_SOURCE_IS_CONTINUOUS | - CLOCK_SOURCE_MUST_VERIFY, -}; - -void mark_tsc_unstable(char *reason) -{ - if (!tsc_unstable) { - tsc_unstable = 1; - printk("Marking TSC unstable due to: %s.\n", reason); - /* Can be called before registration */ - if (clocksource_tsc.mult) - clocksource_change_rating(&clocksource_tsc, 0); - else - clocksource_tsc.rating = 0; - } -} -EXPORT_SYMBOL_GPL(mark_tsc_unstable); - -static int __init dmi_mark_tsc_unstable(const struct dmi_system_id *d) -{ - printk(KERN_NOTICE "%s detected: marking TSC unstable.\n", - d->ident); - tsc_unstable = 1; - return 0; -} - -/* List of systems that have known TSC problems */ -static struct dmi_system_id __initdata bad_tsc_dmi_table[] = { - { - .callback = dmi_mark_tsc_unstable, - .ident = "IBM Thinkpad 380XD", - .matches = { - DMI_MATCH(DMI_BOARD_VENDOR, "IBM"), - DMI_MATCH(DMI_BOARD_NAME, "2635FA0"), - }, - }, - {} -}; - -/* - * Make an educated guess if the TSC is trustworthy and synchronized - * over all CPUs. - */ -__cpuinit int unsynchronized_tsc(void) -{ - if (!cpu_has_tsc || tsc_unstable) - return 1; - - /* Anything with constant TSC should be synchronized */ - if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) - return 0; - - /* - * Intel systems are normally all synchronized. - * Exceptions must mark TSC as unstable: - */ - if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) { - /* assume multi socket systems are not synchronized: */ - if (num_possible_cpus() > 1) - tsc_unstable = 1; - } - return tsc_unstable; -} - -/* - * Geode_LX - the OLPC CPU has a possibly a very reliable TSC - */ -#ifdef CONFIG_MGEODE_LX -/* RTSC counts during suspend */ -#define RTSC_SUSP 0x100 - -static void __init check_geode_tsc_reliable(void) -{ - unsigned long res_low, res_high; - - rdmsr_safe(MSR_GEODE_BUSCONT_CONF0, &res_low, &res_high); - if (res_low & RTSC_SUSP) - clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY; -} -#else -static inline void check_geode_tsc_reliable(void) { } -#endif - - -void __init tsc_init(void) -{ - int cpu; - u64 lpj; - - if (!cpu_has_tsc || tsc_disabled > 0) - return; - - cpu_khz = calculate_cpu_khz(); - tsc_khz = cpu_khz; - - if (!cpu_khz) { - mark_tsc_unstable("could not calculate TSC khz"); - return; - } - - lpj = ((u64)tsc_khz * 1000); - do_div(lpj, HZ); - lpj_fine = lpj; - - /* now allow native_sched_clock() to use rdtsc */ - tsc_disabled = 0; - - printk("Detected %lu.%03lu MHz processor.\n", - (unsigned long)cpu_khz / 1000, - (unsigned long)cpu_khz % 1000); - - /* - * Secondary CPUs do not run through tsc_init(), so set up - * all the scale factors for all CPUs, assuming the same - * speed as the bootup CPU. (cpufreq notifiers will fix this - * up if their speed diverges) - */ - for_each_possible_cpu(cpu) - set_cyc2ns_scale(cpu_khz, cpu); - - use_tsc_delay(); - - /* Check and install the TSC clocksource */ - dmi_check_system(bad_tsc_dmi_table); - - unsynchronized_tsc(); - check_geode_tsc_reliable(); - clocksource_tsc.mult = clocksource_khz2mult(tsc_khz, - clocksource_tsc.shift); - /* lower the rating if we already know its unstable: */ - if (check_tsc_unstable()) { - clocksource_tsc.rating = 0; - clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS; - } - clocksource_register(&clocksource_tsc); -} diff --git a/arch/x86/kernel/tsc_64.c b/arch/x86/kernel/tsc_64.c deleted file mode 100644 index 80a274b018c..00000000000 --- a/arch/x86/kernel/tsc_64.c +++ /dev/null @@ -1,106 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -extern int tsc_unstable; -extern int tsc_disabled; - -/* - * Make an educated guess if the TSC is trustworthy and synchronized - * over all CPUs. - */ -__cpuinit int unsynchronized_tsc(void) -{ - if (tsc_unstable) - return 1; - -#ifdef CONFIG_SMP - if (apic_is_clustered_box()) - return 1; -#endif - - if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) - return 0; - - /* Assume multi socket systems are not synchronized */ - return num_present_cpus() > 1; -} - -static struct clocksource clocksource_tsc; - -/* - * We compare the TSC to the cycle_last value in the clocksource - * structure to avoid a nasty time-warp. This can be observed in a - * very small window right after one CPU updated cycle_last under - * xtime/vsyscall_gtod lock and the other CPU reads a TSC value which - * is smaller than the cycle_last reference value due to a TSC which - * is slighty behind. This delta is nowhere else observable, but in - * that case it results in a forward time jump in the range of hours - * due to the unsigned delta calculation of the time keeping core - * code, which is necessary to support wrapping clocksources like pm - * timer. - */ -static cycle_t read_tsc(void) -{ - cycle_t ret = (cycle_t)get_cycles(); - - return ret >= clocksource_tsc.cycle_last ? - ret : clocksource_tsc.cycle_last; -} - -static cycle_t __vsyscall_fn vread_tsc(void) -{ - cycle_t ret = (cycle_t)vget_cycles(); - - return ret >= __vsyscall_gtod_data.clock.cycle_last ? - ret : __vsyscall_gtod_data.clock.cycle_last; -} - -static struct clocksource clocksource_tsc = { - .name = "tsc", - .rating = 300, - .read = read_tsc, - .mask = CLOCKSOURCE_MASK(64), - .shift = 22, - .flags = CLOCK_SOURCE_IS_CONTINUOUS | - CLOCK_SOURCE_MUST_VERIFY, - .vread = vread_tsc, -}; - -void mark_tsc_unstable(char *reason) -{ - if (!tsc_unstable) { - tsc_unstable = 1; - printk("Marking TSC unstable due to %s\n", reason); - /* Change only the rating, when not registered */ - if (clocksource_tsc.mult) - clocksource_change_rating(&clocksource_tsc, 0); - else - clocksource_tsc.rating = 0; - } -} -EXPORT_SYMBOL_GPL(mark_tsc_unstable); - -void __init init_tsc_clocksource(void) -{ - if (tsc_disabled > 0) - return; - - clocksource_tsc.mult = clocksource_khz2mult(tsc_khz, - clocksource_tsc.shift); - if (check_tsc_unstable()) - clocksource_tsc.rating = 0; - - clocksource_register(&clocksource_tsc); -} diff --git a/include/asm-x86/apic.h b/include/asm-x86/apic.h index a29807737d3..4e2c1e517f0 100644 --- a/include/asm-x86/apic.h +++ b/include/asm-x86/apic.h @@ -121,12 +121,17 @@ extern void enable_NMI_through_LVT0(void); */ #ifdef CONFIG_X86_64 extern void early_init_lapic_mapping(void); +extern int apic_is_clustered_box(void); +#else +static inline int apic_is_clustered_box(void) +{ + return 0; +} #endif extern u8 setup_APIC_eilvt_mce(u8 vector, u8 msg_type, u8 mask); extern u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask); -extern int apic_is_clustered_box(void); #else /* !CONFIG_X86_LOCAL_APIC */ static inline void lapic_shutdown(void) { } diff --git a/include/asm-x86/delay.h b/include/asm-x86/delay.h index 409a649204a..bb80880c834 100644 --- a/include/asm-x86/delay.h +++ b/include/asm-x86/delay.h @@ -26,6 +26,10 @@ extern void __delay(unsigned long loops); ((n) > 20000 ? __bad_ndelay() : __const_udelay((n) * 5ul)) : \ __ndelay(n)) +#ifdef CONFIG_X86_32 void use_tsc_delay(void); +#else +#define use_tsc_delay() {} +#endif #endif /* _ASM_X86_DELAY_H */ diff --git a/include/asm-x86/time.h b/include/asm-x86/time.h index bce72d7a958..a17fa473e91 100644 --- a/include/asm-x86/time.h +++ b/include/asm-x86/time.h @@ -56,4 +56,6 @@ static inline int native_set_wallclock(unsigned long nowtime) #endif /* CONFIG_PARAVIRT */ +extern unsigned long __init calibrate_cpu(void); + #endif diff --git a/include/asm-x86/tsc.h b/include/asm-x86/tsc.h index 761054d7fef..cb6f6ee45b8 100644 --- a/include/asm-x86/tsc.h +++ b/include/asm-x86/tsc.h @@ -48,7 +48,6 @@ static __always_inline cycles_t vget_cycles(void) extern void tsc_init(void); extern void mark_tsc_unstable(char *reason); extern int unsynchronized_tsc(void); -extern void init_tsc_clocksource(void); int check_tsc_unstable(void); /* -- cgit v1.2.3-70-g09d2 From e93ef949fd9a3f237aedfb8e64414b28980530b8 Mon Sep 17 00:00:00 2001 From: Alok Kataria Date: Tue, 1 Jul 2008 11:43:36 -0700 Subject: x86: rename paravirtualized TSC functions Rename the paravirtualized calculate_cpu_khz to calibrate_tsc. In all cases, we actually calibrate_tsc and use that as the cpu_khz value. Signed-off-by: Alok N Kataria Signed-off-by: Dan Hecht Cc: Dan Hecht Signed-off-by: Ingo Molnar --- arch/x86/kernel/paravirt.c | 2 +- arch/x86/kernel/tsc.c | 18 +++++++----------- arch/x86/kernel/vmi_32.c | 2 +- arch/x86/kernel/vmiclock_32.c | 4 ++-- arch/x86/lguest/boot.c | 4 ++-- arch/x86/xen/enlighten.c | 2 +- arch/x86/xen/time.c | 4 ++-- arch/x86/xen/xen-ops.h | 2 +- include/asm-x86/paravirt.h | 4 ++-- include/asm-x86/timer.h | 4 ++-- include/asm-x86/vmi_time.h | 2 +- 11 files changed, 22 insertions(+), 26 deletions(-) (limited to 'arch/x86/kernel/tsc.c') diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index e7e5652f65b..e0f571d58c1 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -285,7 +285,7 @@ struct pv_time_ops pv_time_ops = { .get_wallclock = native_get_wallclock, .set_wallclock = native_set_wallclock, .sched_clock = native_sched_clock, - .get_cpu_khz = native_calculate_cpu_khz, + .get_tsc_khz = native_calibrate_tsc, }; struct pv_irq_ops pv_irq_ops = { diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 94c16bdd569..3c36f92160c 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -123,9 +123,9 @@ static u64 __init tsc_read_refs(u64 *pm, u64 *hpet) } /** - * tsc_calibrate - calibrate the tsc on boot + * native_calibrate_tsc - calibrate the tsc on boot */ -static unsigned int __init tsc_calibrate(void) +unsigned long native_calibrate_tsc(void) { unsigned long flags; u64 tsc1, tsc2, tr1, tr2, delta, pm1, pm2, hpet1, hpet2; @@ -195,10 +195,6 @@ out: return tsc_khz_val; } -unsigned long native_calculate_cpu_khz(void) -{ - return tsc_calibrate(); -} #ifdef CONFIG_X86_32 /* Only called from the Powernow K7 cpu freq driver */ @@ -208,8 +204,8 @@ int recalibrate_cpu_khz(void) unsigned long cpu_khz_old = cpu_khz; if (cpu_has_tsc) { - cpu_khz = calculate_cpu_khz(); - tsc_khz = cpu_khz; + tsc_khz = calibrate_tsc(); + cpu_khz = tsc_khz; cpu_data(0).loops_per_jiffy = cpufreq_scale(cpu_data(0).loops_per_jiffy, cpu_khz_old, cpu_khz); @@ -487,10 +483,10 @@ void __init tsc_init(void) if (!cpu_has_tsc) return; - cpu_khz = calculate_cpu_khz(); - tsc_khz = cpu_khz; + tsc_khz = calibrate_tsc(); + cpu_khz = tsc_khz; - if (!cpu_khz) { + if (!tsc_khz) { mark_tsc_unstable("could not calculate TSC khz"); return; } diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c index 946bf13b44a..b15346092b7 100644 --- a/arch/x86/kernel/vmi_32.c +++ b/arch/x86/kernel/vmi_32.c @@ -932,7 +932,7 @@ static inline int __init activate_vmi(void) pv_apic_ops.setup_secondary_clock = vmi_time_ap_init; #endif pv_time_ops.sched_clock = vmi_sched_clock; - pv_time_ops.get_cpu_khz = vmi_cpu_khz; + pv_time_ops.get_tsc_khz = vmi_tsc_khz; /* We have true wallclock functions; disable CMOS clock sync */ no_sync_cmos_clock = 1; diff --git a/arch/x86/kernel/vmiclock_32.c b/arch/x86/kernel/vmiclock_32.c index ba7d19e102b..6953859fe28 100644 --- a/arch/x86/kernel/vmiclock_32.c +++ b/arch/x86/kernel/vmiclock_32.c @@ -69,8 +69,8 @@ unsigned long long vmi_sched_clock(void) return cycles_2_ns(vmi_timer_ops.get_cycle_counter(VMI_CYCLES_AVAILABLE)); } -/* paravirt_ops.get_cpu_khz = vmi_cpu_khz */ -unsigned long vmi_cpu_khz(void) +/* paravirt_ops.get_tsc_khz = vmi_tsc_khz */ +unsigned long vmi_tsc_khz(void) { unsigned long long khz; khz = vmi_timer_ops.get_cycle_frequency(); diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index e72cf0793fb..50dad44fb54 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -607,7 +607,7 @@ static unsigned long lguest_get_wallclock(void) * what speed it runs at, or 0 if it's unusable as a reliable clock source. * This matches what we want here: if we return 0 from this function, the x86 * TSC clock will give up and not register itself. */ -static unsigned long lguest_cpu_khz(void) +static unsigned long lguest_tsc_khz(void) { return lguest_data.tsc_khz; } @@ -998,7 +998,7 @@ __init void lguest_init(void) /* time operations */ pv_time_ops.get_wallclock = lguest_get_wallclock; pv_time_ops.time_init = lguest_time_init; - pv_time_ops.get_cpu_khz = lguest_cpu_khz; + pv_time_ops.get_tsc_khz = lguest_tsc_khz; /* Now is a good time to look at the implementations of these functions * before returning to the rest of lguest_init(). */ diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 3b980831602..dcd4e51f2f1 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1062,7 +1062,7 @@ static const struct pv_time_ops xen_time_ops __initdata = { .set_wallclock = xen_set_wallclock, .get_wallclock = xen_get_wallclock, - .get_cpu_khz = xen_cpu_khz, + .get_tsc_khz = xen_tsc_khz, .sched_clock = xen_sched_clock, }; diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index 64f0038b955..685b77470fc 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c @@ -197,8 +197,8 @@ unsigned long long xen_sched_clock(void) } -/* Get the CPU speed from Xen */ -unsigned long xen_cpu_khz(void) +/* Get the TSC speed from Xen */ +unsigned long xen_tsc_khz(void) { u64 xen_khz = 1000000ULL << 32; const struct pvclock_vcpu_time_info *info = diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 9a055592a30..d852ddbb344 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -32,7 +32,7 @@ void __init xen_build_dynamic_phys_to_machine(void); void xen_setup_timer(int cpu); void xen_setup_cpu_clockevents(void); -unsigned long xen_cpu_khz(void); +unsigned long xen_tsc_khz(void); void __init xen_time_init(void); unsigned long xen_get_wallclock(void); int xen_set_wallclock(unsigned long time); diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h index 6d8966f9d19..ef5e8ec6a6a 100644 --- a/include/asm-x86/paravirt.h +++ b/include/asm-x86/paravirt.h @@ -84,7 +84,7 @@ struct pv_time_ops { int (*set_wallclock)(unsigned long); unsigned long long (*sched_clock)(void); - unsigned long (*get_cpu_khz)(void); + unsigned long (*get_tsc_khz)(void); }; struct pv_cpu_ops { @@ -779,7 +779,7 @@ static inline unsigned long long paravirt_sched_clock(void) { return PVOP_CALL0(unsigned long long, pv_time_ops.sched_clock); } -#define calculate_cpu_khz() (pv_time_ops.get_cpu_khz()) +#define calibrate_tsc() (pv_time_ops.get_tsc_khz()) static inline unsigned long long paravirt_read_pmc(int counter) { diff --git a/include/asm-x86/timer.h b/include/asm-x86/timer.h index 4f6fcb050c1..fb2a4ddddf3 100644 --- a/include/asm-x86/timer.h +++ b/include/asm-x86/timer.h @@ -7,14 +7,14 @@ #define TICK_SIZE (tick_nsec / 1000) unsigned long long native_sched_clock(void); -unsigned long native_calculate_cpu_khz(void); +unsigned long native_calibrate_tsc(void); extern int timer_ack; extern int no_timer_check; extern int recalibrate_cpu_khz(void); #ifndef CONFIG_PARAVIRT -#define calculate_cpu_khz() native_calculate_cpu_khz() +#define calibrate_tsc() native_calibrate_tsc() #endif /* Accelerators for sched_clock() diff --git a/include/asm-x86/vmi_time.h b/include/asm-x86/vmi_time.h index 47818813032..c3118c38515 100644 --- a/include/asm-x86/vmi_time.h +++ b/include/asm-x86/vmi_time.h @@ -50,7 +50,7 @@ extern void __init vmi_time_init(void); extern unsigned long vmi_get_wallclock(void); extern int vmi_set_wallclock(unsigned long now); extern unsigned long long vmi_sched_clock(void); -extern unsigned long vmi_cpu_khz(void); +extern unsigned long vmi_tsc_khz(void); #ifdef CONFIG_X86_LOCAL_APIC extern void __devinit vmi_time_bsp_init(void); -- cgit v1.2.3-70-g09d2 From 0a4d8a472f645d99f86303db1462b64e371b090d Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 24 Jun 2008 09:34:08 -0300 Subject: x86: provide delay loop for x86_64. This is for consistency with i386. We call use_tsc_delay() at tsc initialization for x86_64, so we'll be always using it. Signed-off-by: Glauber Costa Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar --- arch/x86/kernel/tsc.c | 1 + arch/x86/lib/delay_64.c | 44 ++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 41 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel/tsc.c') diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 3c36f92160c..4a775d00195 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -513,6 +513,7 @@ void __init tsc_init(void) */ for_each_possible_cpu(cpu) set_cyc2ns_scale(cpu_khz, cpu); + use_tsc_delay(); if (tsc_disabled > 0) return; diff --git a/arch/x86/lib/delay_64.c b/arch/x86/lib/delay_64.c index 4c441be9264..d0326d07c84 100644 --- a/arch/x86/lib/delay_64.c +++ b/arch/x86/lib/delay_64.c @@ -22,13 +22,28 @@ #include #endif -int __devinit read_current_timer(unsigned long *timer_value) +/* simple loop based delay: */ +static void delay_loop(unsigned long loops) { - rdtscll(*timer_value); - return 0; + asm volatile( + " test %0,%0 \n" + " jz 3f \n" + " jmp 1f \n" + + ".align 16 \n" + "1: jmp 2f \n" + + ".align 16 \n" + "2: dec %0 \n" + " jnz 2b \n" + "3: dec %0 \n" + + : /* we don't need output */ + :"a" (loops) + ); } -void __delay(unsigned long loops) +static void delay_tsc(unsigned long loops) { unsigned bclock, now; int cpu; @@ -63,6 +78,27 @@ void __delay(unsigned long loops) } preempt_enable(); } + +static void (*delay_fn)(unsigned long) = delay_loop; + +void use_tsc_delay(void) +{ + delay_fn = delay_tsc; +} + +int __devinit read_current_timer(unsigned long *timer_value) +{ + if (delay_fn == delay_tsc) { + rdtscll(*timer_value); + return 0; + } + return -1; +} + +void __delay(unsigned long loops) +{ + delay_fn(loops); +} EXPORT_SYMBOL(__delay); inline void __const_udelay(unsigned long xloops) -- cgit v1.2.3-70-g09d2 From e54afe38630e3b577968428f48ed8ef1e13a2a15 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Thu, 10 Jul 2008 14:01:47 -0300 Subject: x86: remove duplicate call to use_tsc_delay Integration generated a duplicate call to use_tsc_delay. Particularly, the one that is done before we check for general tsc usability seems wrong. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/tsc.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86/kernel/tsc.c') diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 4a775d00195..3c36f92160c 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -513,7 +513,6 @@ void __init tsc_init(void) */ for_each_possible_cpu(cpu) set_cyc2ns_scale(cpu_khz, cpu); - use_tsc_delay(); if (tsc_disabled > 0) return; -- cgit v1.2.3-70-g09d2 From 431ceb83f703a343bdd14350480a2224fa4bfedf Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 15 Jul 2008 22:08:04 +0200 Subject: x86: fix TSC build error on 32bit Dave Hansen reported a build error on 32bit which went unnoticed as newer gcc versions seem to optimize unused static functions away before compiling them. Make vread_tsc() depend on CONFIG_X86_64 Signed-off-by: Thomas Gleixner --- arch/x86/kernel/tsc.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86/kernel/tsc.c') diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 3c36f92160c..7603c055390 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -358,6 +358,7 @@ static cycle_t read_tsc(void) ret : clocksource_tsc.cycle_last; } +#ifdef CONFIG_X86_64 static cycle_t __vsyscall_fn vread_tsc(void) { cycle_t ret = (cycle_t)vget_cycles(); @@ -365,6 +366,7 @@ static cycle_t __vsyscall_fn vread_tsc(void) return ret >= __vsyscall_gtod_data.clock.cycle_last ? ret : __vsyscall_gtod_data.clock.cycle_last; } +#endif static struct clocksource clocksource_tsc = { .name = "tsc", -- cgit v1.2.3-70-g09d2 From d554d9a4295dd0595d12eeccbc55d1f495b15176 Mon Sep 17 00:00:00 2001 From: Marcin Slusarz Date: Mon, 11 Aug 2008 00:07:44 +0200 Subject: x86, tsc: fix section mismatch warning WARNING: vmlinux.o(.text+0x7950): Section mismatch in reference from the function native_calibrate_tsc() to the function .init.text:tsc_read_refs() The function native_calibrate_tsc() references the function __init tsc_read_refs(). This is often because native_calibrate_tsc lacks a __init annotation or the annotation of tsc_read_refs is wrong. tsc_read_refs is called from native_calibrate_tsc which is not __init and native_calibrate_tsc cannot be marked __init Signed-off-by: Marcin Slusarz Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar --- arch/x86/kernel/tsc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel/tsc.c') diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 7603c055390..46af7167673 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -104,7 +104,7 @@ __setup("notsc", notsc_setup); /* * Read TSC and the reference counters. Take care of SMI disturbance */ -static u64 __init tsc_read_refs(u64 *pm, u64 *hpet) +static u64 tsc_read_refs(u64 *pm, u64 *hpet) { u64 t1, t2; int i; -- cgit v1.2.3-70-g09d2 From 060700b571717c997a2ea5e2049b848fa248ee13 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 24 Aug 2008 11:52:06 -0700 Subject: x86: do not enable TSC notifier if we don't need it Impact: crash on non-TSC-equipped CPUs Don't enable the TSC notifier if we *either*: 1. don't have a CPU, or 2. have a CPU with constant TSC. In either of those cases, the notifier is either damaging (1) or useless(2). From: Linus Torvalds Signed-off-by: H. Peter Anvin --- arch/x86/kernel/tsc.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/x86/kernel/tsc.c') diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 46af7167673..9bed5cae4bd 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -325,6 +325,10 @@ static struct notifier_block time_cpufreq_notifier_block = { static int __init cpufreq_tsc(void) { + if (!cpu_has_tsc) + return 0; + if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) + return 0; cpufreq_register_notifier(&time_cpufreq_notifier_block, CPUFREQ_TRANSITION_NOTIFIER); return 0; -- cgit v1.2.3-70-g09d2 From 52a8968ce95da8469ba0a9b3e4010fe31caf77a3 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 25 Aug 2008 13:35:06 +0200 Subject: x86: fix cpufreq + sched_clock() regression I noticed that my sched_clock() was slow on a number of machine, so I started looking at cpufreq. The below seems to fix the problem for me. Signed-off-by: Ingo Molnar --- arch/x86/kernel/tsc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel/tsc.c') diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 9bed5cae4bd..8e786b0d665 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -314,7 +314,7 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, mark_tsc_unstable("cpufreq changes"); } - set_cyc2ns_scale(tsc_khz_ref, freq->cpu); + set_cyc2ns_scale(tsc_khz, freq->cpu); return 0; } -- cgit v1.2.3-70-g09d2 From fbb16e243887332dd5754e48ffe5b963378f3cd2 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 3 Sep 2008 00:54:47 +0200 Subject: [x86] Fix TSC calibration issues Larry Finger reported at http://lkml.org/lkml/2008/9/1/90: An ancient laptop of mine started throwing errors from b43legacy when I started using 2.6.27 on it. This has been bisected to commit bfc0f59 "x86: merge tsc calibration". The unification of the TSC code adopted mostly the 64bit code, which prefers PMTIMER/HPET over the PIT calibration. Larrys system has an AMD K6 CPU. Such systems are known to have PMTIMER incarnations which run at double speed. This results in a miscalibration of the TSC by factor 0.5. So the resulting calibrated CPU/TSC speed is half of the real CPU speed, which means that the TSC based delay loop will run half the time it should run. That might explain why the b43legacy driver went berserk. On the other hand we know about systems, where the PIT based calibration results in random crap due to heavy SMI/SMM disturbance. On those systems the PMTIMER/HPET based calibration logic with SMI detection shows better results. According to Alok also virtualized systems suffer from the PIT calibration method. The solution is to use a more wreckage aware aproach than the current either/or decision. 1) reimplement the retry loop which was dropped from the 32bit code during the merge. It repeats the calibration and selects the lowest frequency value as this is probably the closest estimate to the real frequency 2) Monitor the delta of the TSC values in the delay loop which waits for the PIT counter to reach zero. If the maximum value is significantly different from the minimum, then we have a pretty safe indicator that the loop was disturbed by an SMI. 3) keep the pmtimer/hpet reference as a backup solution for systems where the SMI disturbance is a permanent point of failure for PIT based calibration 4) do the loop iteration for both methods, record the lowest value and decide after all iterations finished. 5) Set a clear preference to PIT based calibration when the result makes sense. The implementation does the reference calibration based on HPET/PMTIMER around the delay, which is necessary for the PIT anyway, but keeps separate TSC values to ensure the "independency" of the resulting calibration values. Tested on various 32bit/64bit machines including Geode 266Mhz, AMD K6 (affected machine with a double speed pmtimer which I grabbed out of the dump), Pentium class machines and AMD/Intel 64 bit boxen. Bisected-by: Larry Finger Signed-off-by: Thomas Gleixner Tested-by: Larry Finger Signed-off-by: Linus Torvalds --- arch/x86/kernel/tsc.c | 235 ++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 181 insertions(+), 54 deletions(-) (limited to 'arch/x86/kernel/tsc.c') diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 8e786b0d665..ac79bd143da 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -127,75 +127,202 @@ static u64 tsc_read_refs(u64 *pm, u64 *hpet) */ unsigned long native_calibrate_tsc(void) { - unsigned long flags; - u64 tsc1, tsc2, tr1, tr2, delta, pm1, pm2, hpet1, hpet2; - int hpet = is_hpet_enabled(); - unsigned int tsc_khz_val = 0; + u64 tsc1, tsc2, tr1, tr2, tsc, delta, pm1, pm2, hpet1, hpet2; + unsigned long tsc_pit_min = ULONG_MAX, tsc_ref_min = ULONG_MAX; + unsigned long flags, tscmin, tscmax; + int hpet = is_hpet_enabled(), pitcnt, i; - local_irq_save(flags); - - tsc1 = tsc_read_refs(&pm1, hpet ? &hpet1 : NULL); - - outb((inb(0x61) & ~0x02) | 0x01, 0x61); - - outb(0xb0, 0x43); - outb((CLOCK_TICK_RATE / (1000 / 50)) & 0xff, 0x42); - outb((CLOCK_TICK_RATE / (1000 / 50)) >> 8, 0x42); - tr1 = get_cycles(); - while ((inb(0x61) & 0x20) == 0); - tr2 = get_cycles(); - - tsc2 = tsc_read_refs(&pm2, hpet ? &hpet2 : NULL); - - local_irq_restore(flags); + /* + * Run 5 calibration loops to get the lowest frequency value + * (the best estimate). We use two different calibration modes + * here: + * + * 1) PIT loop. We set the PIT Channel 2 to oneshot mode and + * load a timeout of 50ms. We read the time right after we + * started the timer and wait until the PIT count down reaches + * zero. In each wait loop iteration we read the TSC and check + * the delta to the previous read. We keep track of the min + * and max values of that delta. The delta is mostly defined + * by the IO time of the PIT access, so we can detect when a + * SMI/SMM disturbance happend between the two reads. If the + * maximum time is significantly larger than the minimum time, + * then we discard the result and have another try. + * + * 2) Reference counter. If available we use the HPET or the + * PMTIMER as a reference to check the sanity of that value. + * We use separate TSC readouts and check inside of the + * reference read for a SMI/SMM disturbance. We dicard + * disturbed values here as well. We do that around the PIT + * calibration delay loop as we have to wait for a certain + * amount of time anyway. + */ + for (i = 0; i < 5; i++) { + + tscmin = ULONG_MAX; + tscmax = 0; + pitcnt = 0; + + local_irq_save(flags); + + /* + * Read the start value and the reference count of + * hpet/pmtimer when available: + */ + tsc1 = tsc_read_refs(&pm1, hpet ? &hpet1 : NULL); + + /* Set the Gate high, disable speaker */ + outb((inb(0x61) & ~0x02) | 0x01, 0x61); + + /* + * Setup CTC channel 2* for mode 0, (interrupt on terminal + * count mode), binary count. Set the latch register to 50ms + * (LSB then MSB) to begin countdown. + * + * Some devices need a delay here. + */ + outb(0xb0, 0x43); + outb((CLOCK_TICK_RATE / (1000 / 50)) & 0xff, 0x42); + outb((CLOCK_TICK_RATE / (1000 / 50)) >> 8, 0x42); + + tsc = tr1 = tr2 = get_cycles(); + + while ((inb(0x61) & 0x20) == 0) { + tr2 = get_cycles(); + delta = tr2 - tsc; + tsc = tr2; + if ((unsigned int) delta < tscmin) + tscmin = (unsigned int) delta; + if ((unsigned int) delta > tscmax) + tscmax = (unsigned int) delta; + pitcnt++; + } + + /* + * We waited at least 50ms above. Now read + * pmtimer/hpet reference again + */ + tsc2 = tsc_read_refs(&pm2, hpet ? &hpet2 : NULL); + + local_irq_restore(flags); + + /* + * Sanity checks: + * + * If we were not able to read the PIT more than 5000 + * times, then we have been hit by a massive SMI + * + * If the maximum is 10 times larger than the minimum, + * then we got hit by an SMI as well. + */ + if (pitcnt > 5000 && tscmax < 10 * tscmin) { + + /* Calculate the PIT value */ + delta = tr2 - tr1; + do_div(delta, 50); + + /* We take the smallest value into account */ + tsc_pit_min = min(tsc_pit_min, (unsigned long) delta); + } + + /* hpet or pmtimer available ? */ + if (!hpet && !pm1 && !pm2) + continue; + + /* Check, whether the sampling was disturbed by an SMI */ + if (tsc1 == ULLONG_MAX || tsc2 == ULLONG_MAX) + continue; + + tsc2 = (tsc2 - tsc1) * 1000000LL; + + if (hpet) { + if (hpet2 < hpet1) + hpet2 += 0x100000000ULL; + hpet2 -= hpet1; + tsc1 = ((u64)hpet2 * hpet_readl(HPET_PERIOD)); + do_div(tsc1, 1000000); + } else { + if (pm2 < pm1) + pm2 += (u64)ACPI_PM_OVRRUN; + pm2 -= pm1; + tsc1 = pm2 * 1000000000LL; + do_div(tsc1, PMTMR_TICKS_PER_SEC); + } + + do_div(tsc2, tsc1); + tsc_ref_min = min(tsc_ref_min, (unsigned long) tsc2); + } /* - * Preset the result with the raw and inaccurate PIT - * calibration value + * Now check the results. */ - delta = (tr2 - tr1); - do_div(delta, 50); - tsc_khz_val = delta; + if (tsc_pit_min == ULONG_MAX) { + /* PIT gave no useful value */ + printk(KERN_WARNING "TSC: PIT calibration failed due to " + "SMI disturbance.\n"); + + /* We don't have an alternative source, disable TSC */ + if (!hpet && !pm1 && !pm2) { + printk("TSC: No reference (HPET/PMTIMER) available\n"); + return 0; + } + + /* The alternative source failed as well, disable TSC */ + if (tsc_ref_min == ULONG_MAX) { + printk(KERN_WARNING "TSC: HPET/PMTIMER calibration " + "failed due to SMI disturbance.\n"); + return 0; + } + + /* Use the alternative source */ + printk(KERN_INFO "TSC: using %s reference calibration\n", + hpet ? "HPET" : "PMTIMER"); + + return tsc_ref_min; + } - /* hpet or pmtimer available ? */ + /* We don't have an alternative source, use the PIT calibration value */ if (!hpet && !pm1 && !pm2) { - printk(KERN_INFO "TSC calibrated against PIT\n"); - goto out; + printk(KERN_INFO "TSC: Using PIT calibration value\n"); + return tsc_pit_min; } - /* Check, whether the sampling was disturbed by an SMI */ - if (tsc1 == ULLONG_MAX || tsc2 == ULLONG_MAX) { - printk(KERN_WARNING "TSC calibration disturbed by SMI, " - "using PIT calibration result\n"); - goto out; + /* The alternative source failed, use the PIT calibration value */ + if (tsc_ref_min == ULONG_MAX) { + printk(KERN_WARNING "TSC: HPET/PMTIMER calibration failed due " + "to SMI disturbance. Using PIT calibration\n"); + return tsc_pit_min; } - tsc2 = (tsc2 - tsc1) * 1000000LL; - - if (hpet) { - printk(KERN_INFO "TSC calibrated against HPET\n"); - if (hpet2 < hpet1) - hpet2 += 0x100000000ULL; - hpet2 -= hpet1; - tsc1 = ((u64)hpet2 * hpet_readl(HPET_PERIOD)); - do_div(tsc1, 1000000); - } else { - printk(KERN_INFO "TSC calibrated against PM_TIMER\n"); - if (pm2 < pm1) - pm2 += (u64)ACPI_PM_OVRRUN; - pm2 -= pm1; - tsc1 = pm2 * 1000000000LL; - do_div(tsc1, PMTMR_TICKS_PER_SEC); + /* Check the reference deviation */ + delta = ((u64) tsc_pit_min) * 100; + do_div(delta, tsc_ref_min); + + /* + * If both calibration results are inside a 5% window, the we + * use the lower frequency of those as it is probably the + * closest estimate. + */ + if (delta >= 95 && delta <= 105) { + printk(KERN_INFO "TSC: PIT calibration confirmed by %s.\n", + hpet ? "HPET" : "PMTIMER"); + printk(KERN_INFO "TSC: using %s calibration value\n", + tsc_pit_min <= tsc_ref_min ? "PIT" : + hpet ? "HPET" : "PMTIMER"); + return tsc_pit_min <= tsc_ref_min ? tsc_pit_min : tsc_ref_min; } - do_div(tsc2, tsc1); - tsc_khz_val = tsc2; + printk(KERN_WARNING "TSC: PIT calibration deviates from %s: %lu %lu.\n", + hpet ? "HPET" : "PMTIMER", tsc_pit_min, tsc_ref_min); -out: - return tsc_khz_val; + /* + * The calibration values differ too much. In doubt, we use + * the PIT value as we know that there are PMTIMERs around + * running at double speed. + */ + printk(KERN_INFO "TSC: Using PIT calibration value\n"); + return tsc_pit_min; } - #ifdef CONFIG_X86_32 /* Only called from the Powernow K7 cpu freq driver */ int recalibrate_cpu_khz(void) -- cgit v1.2.3-70-g09d2 From ec0c15afb41fd9ad45b53468b60db50170e22346 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 3 Sep 2008 07:30:13 -0700 Subject: Split up PIT part of TSC calibration from native_calibrate_tsc The TSC calibration function is still very complicated, but this makes it at least a little bit less so by moving the PIT part out into a helper function of its own. Tested-by: Larry Finger Signed-of-by: Linus Torvalds --- arch/x86/kernel/tsc.c | 132 +++++++++++++++++++++++++++----------------------- 1 file changed, 71 insertions(+), 61 deletions(-) (limited to 'arch/x86/kernel/tsc.c') diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index ac79bd143da..346cae5ac42 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -122,15 +122,75 @@ static u64 tsc_read_refs(u64 *pm, u64 *hpet) return ULLONG_MAX; } +/* + * Try to calibrate the TSC against the Programmable + * Interrupt Timer and return the frequency of the TSC + * in kHz. + * + * Return ULONG_MAX on failure to calibrate. + */ +static unsigned long pit_calibrate_tsc(void) +{ + u64 tsc, t1, t2, delta; + unsigned long tscmin, tscmax; + int pitcnt; + + /* Set the Gate high, disable speaker */ + outb((inb(0x61) & ~0x02) | 0x01, 0x61); + + /* + * Setup CTC channel 2* for mode 0, (interrupt on terminal + * count mode), binary count. Set the latch register to 50ms + * (LSB then MSB) to begin countdown. + */ + outb(0xb0, 0x43); + outb((CLOCK_TICK_RATE / (1000 / 50)) & 0xff, 0x42); + outb((CLOCK_TICK_RATE / (1000 / 50)) >> 8, 0x42); + + tsc = t1 = t2 = get_cycles(); + + pitcnt = 0; + tscmax = 0; + tscmin = ULONG_MAX; + while ((inb(0x61) & 0x20) == 0) { + t2 = get_cycles(); + delta = t2 - tsc; + tsc = t2; + if ((unsigned long) delta < tscmin) + tscmin = (unsigned int) delta; + if ((unsigned long) delta > tscmax) + tscmax = (unsigned int) delta; + pitcnt++; + } + + /* + * Sanity checks: + * + * If we were not able to read the PIT more than 5000 + * times, then we have been hit by a massive SMI + * + * If the maximum is 10 times larger than the minimum, + * then we got hit by an SMI as well. + */ + if (pitcnt < 5000 || tscmax > 10 * tscmin) + return ULONG_MAX; + + /* Calculate the PIT value */ + delta = t2 - t1; + do_div(delta, 50); + return delta; +} + + /** * native_calibrate_tsc - calibrate the tsc on boot */ unsigned long native_calibrate_tsc(void) { - u64 tsc1, tsc2, tr1, tr2, tsc, delta, pm1, pm2, hpet1, hpet2; + u64 tsc1, tsc2, delta, pm1, pm2, hpet1, hpet2; unsigned long tsc_pit_min = ULONG_MAX, tsc_ref_min = ULONG_MAX; - unsigned long flags, tscmin, tscmax; - int hpet = is_hpet_enabled(), pitcnt, i; + unsigned long flags; + int hpet = is_hpet_enabled(), i; /* * Run 5 calibration loops to get the lowest frequency value @@ -157,72 +217,22 @@ unsigned long native_calibrate_tsc(void) * amount of time anyway. */ for (i = 0; i < 5; i++) { - - tscmin = ULONG_MAX; - tscmax = 0; - pitcnt = 0; - - local_irq_save(flags); + unsigned long tsc_pit_khz; /* * Read the start value and the reference count of - * hpet/pmtimer when available: + * hpet/pmtimer when available. Then do the PIT + * calibration, which will take at least 50ms, and + * read the end value. */ + local_irq_save(flags); tsc1 = tsc_read_refs(&pm1, hpet ? &hpet1 : NULL); - - /* Set the Gate high, disable speaker */ - outb((inb(0x61) & ~0x02) | 0x01, 0x61); - - /* - * Setup CTC channel 2* for mode 0, (interrupt on terminal - * count mode), binary count. Set the latch register to 50ms - * (LSB then MSB) to begin countdown. - * - * Some devices need a delay here. - */ - outb(0xb0, 0x43); - outb((CLOCK_TICK_RATE / (1000 / 50)) & 0xff, 0x42); - outb((CLOCK_TICK_RATE / (1000 / 50)) >> 8, 0x42); - - tsc = tr1 = tr2 = get_cycles(); - - while ((inb(0x61) & 0x20) == 0) { - tr2 = get_cycles(); - delta = tr2 - tsc; - tsc = tr2; - if ((unsigned int) delta < tscmin) - tscmin = (unsigned int) delta; - if ((unsigned int) delta > tscmax) - tscmax = (unsigned int) delta; - pitcnt++; - } - - /* - * We waited at least 50ms above. Now read - * pmtimer/hpet reference again - */ + tsc_pit_khz = pit_calibrate_tsc(); tsc2 = tsc_read_refs(&pm2, hpet ? &hpet2 : NULL); - local_irq_restore(flags); - /* - * Sanity checks: - * - * If we were not able to read the PIT more than 5000 - * times, then we have been hit by a massive SMI - * - * If the maximum is 10 times larger than the minimum, - * then we got hit by an SMI as well. - */ - if (pitcnt > 5000 && tscmax < 10 * tscmin) { - - /* Calculate the PIT value */ - delta = tr2 - tr1; - do_div(delta, 50); - - /* We take the smallest value into account */ - tsc_pit_min = min(tsc_pit_min, (unsigned long) delta); - } + /* Pick the lowest PIT TSC calibration so far */ + tsc_pit_min = min(tsc_pit_min, tsc_pit_khz); /* hpet or pmtimer available ? */ if (!hpet && !pm1 && !pm2) -- cgit v1.2.3-70-g09d2 From de014d617636d6a6bd5aef3b3d1f7f9a35669057 Mon Sep 17 00:00:00 2001 From: Alok N Kataria Date: Wed, 3 Sep 2008 18:18:01 -0700 Subject: x86: Change warning message in TSC calibration. When calibration against PIT fails, the warning that we print is misleading. In a virtualized environment the VM may get descheduled while calibration or, the check in PIT calibration may fail due to other virtualization overheads. The warning message explicitly assumes that calibration failed due to SMI's which may not be the case. Change that to something proper. Signed-off-by: Alok N Kataria Signed-off-by: Linus Torvalds --- arch/x86/kernel/tsc.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/x86/kernel/tsc.c') diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 346cae5ac42..8f98e9de1b8 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -267,8 +267,7 @@ unsigned long native_calibrate_tsc(void) */ if (tsc_pit_min == ULONG_MAX) { /* PIT gave no useful value */ - printk(KERN_WARNING "TSC: PIT calibration failed due to " - "SMI disturbance.\n"); + printk(KERN_WARNING "TSC: Unable to calibrate against PIT\n"); /* We don't have an alternative source, disable TSC */ if (!hpet && !pm1 && !pm2) { -- cgit v1.2.3-70-g09d2 From cce3e057242d3d46fea07b9eb3910b0076419be5 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 4 Sep 2008 15:18:44 +0000 Subject: x86: TSC: define the PIT latch value separate Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/kernel/tsc.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel/tsc.c') diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 346cae5ac42..aa11413e7c1 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -122,6 +122,10 @@ static u64 tsc_read_refs(u64 *pm, u64 *hpet) return ULLONG_MAX; } +#define CAL_MS 50 +#define CAL_LATCH (CLOCK_TICK_RATE / (1000 / CAL_MS)) +#define CAL_PIT_LOOPS 5000 + /* * Try to calibrate the TSC against the Programmable * Interrupt Timer and return the frequency of the TSC @@ -144,8 +148,8 @@ static unsigned long pit_calibrate_tsc(void) * (LSB then MSB) to begin countdown. */ outb(0xb0, 0x43); - outb((CLOCK_TICK_RATE / (1000 / 50)) & 0xff, 0x42); - outb((CLOCK_TICK_RATE / (1000 / 50)) >> 8, 0x42); + outb(CAL_LATCH & 0xff, 0x42); + outb(CAL_LATCH >> 8, 0x42); tsc = t1 = t2 = get_cycles(); @@ -166,18 +170,18 @@ static unsigned long pit_calibrate_tsc(void) /* * Sanity checks: * - * If we were not able to read the PIT more than 5000 + * If we were not able to read the PIT more than PIT_MIN_LOOPS * times, then we have been hit by a massive SMI * * If the maximum is 10 times larger than the minimum, * then we got hit by an SMI as well. */ - if (pitcnt < 5000 || tscmax > 10 * tscmin) + if (pitcnt < CAL_PIT_LOOPS || tscmax > 10 * tscmin) return ULONG_MAX; /* Calculate the PIT value */ delta = t2 - t1; - do_div(delta, 50); + do_div(delta, CAL_MS); return delta; } -- cgit v1.2.3-70-g09d2 From d683ef7afe8b6dbac6a3c681cef8a908357793ca Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 4 Sep 2008 15:18:48 +0000 Subject: x86: TSC: separate hpet/pmtimer calculation out Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/kernel/tsc.c | 56 +++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 41 insertions(+), 15 deletions(-) (limited to 'arch/x86/kernel/tsc.c') diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index aa11413e7c1..ebb9bf824a0 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -122,6 +122,43 @@ static u64 tsc_read_refs(u64 *pm, u64 *hpet) return ULLONG_MAX; } +/* + * Calculate the TSC frequency from HPET reference + */ +static unsigned long calc_hpet_ref(u64 deltatsc, u64 hpet1, u64 hpet2) +{ + u64 tmp; + + if (hpet2 < hpet1) + hpet2 += 0x100000000ULL; + hpet2 -= hpet1; + tmp = ((u64)hpet2 * hpet_readl(HPET_PERIOD)); + do_div(tmp, 1000000); + do_div(deltatsc, tmp); + + return (unsigned long) deltatsc; +} + +/* + * Calculate the TSC frequency from PMTimer reference + */ +static unsigned long calc_pmtimer_ref(u64 deltatsc, u64 pm1, u64 pm2) +{ + u64 tmp; + + if (!pm1 && !pm2) + return ULONG_MAX; + + if (pm2 < pm1) + pm2 += (u64)ACPI_PM_OVRRUN; + pm2 -= pm1; + tmp = pm2 * 1000000000LL; + do_div(tmp, PMTMR_TICKS_PER_SEC); + do_div(deltatsc, tmp); + + return (unsigned long) deltatsc; +} + #define CAL_MS 50 #define CAL_LATCH (CLOCK_TICK_RATE / (1000 / CAL_MS)) #define CAL_PIT_LOOPS 5000 @@ -247,22 +284,11 @@ unsigned long native_calibrate_tsc(void) continue; tsc2 = (tsc2 - tsc1) * 1000000LL; + if (hpet) + tsc2 = calc_hpet_ref(tsc2, hpet1, hpet2); + else + tsc2 = calc_pmtimer_ref(tsc2, pm1, pm2); - if (hpet) { - if (hpet2 < hpet1) - hpet2 += 0x100000000ULL; - hpet2 -= hpet1; - tsc1 = ((u64)hpet2 * hpet_readl(HPET_PERIOD)); - do_div(tsc1, 1000000); - } else { - if (pm2 < pm1) - pm2 += (u64)ACPI_PM_OVRRUN; - pm2 -= pm1; - tsc1 = pm2 * 1000000000LL; - do_div(tsc1, PMTMR_TICKS_PER_SEC); - } - - do_div(tsc2, tsc1); tsc_ref_min = min(tsc_ref_min, (unsigned long) tsc2); } -- cgit v1.2.3-70-g09d2 From 827014be05e4515fa0dfc32e3100c4dab2070a98 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 4 Sep 2008 15:18:53 +0000 Subject: x86: TSC: use one set of reference variables Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/kernel/tsc.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'arch/x86/kernel/tsc.c') diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index ebb9bf824a0..52284d31fc9 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -104,7 +104,7 @@ __setup("notsc", notsc_setup); /* * Read TSC and the reference counters. Take care of SMI disturbance */ -static u64 tsc_read_refs(u64 *pm, u64 *hpet) +static u64 tsc_read_refs(u64 *p, int hpet) { u64 t1, t2; int i; @@ -112,9 +112,9 @@ static u64 tsc_read_refs(u64 *pm, u64 *hpet) for (i = 0; i < MAX_RETRIES; i++) { t1 = get_cycles(); if (hpet) - *hpet = hpet_readl(HPET_COUNTER) & 0xFFFFFFFF; + *p = hpet_readl(HPET_COUNTER) & 0xFFFFFFFF; else - *pm = acpi_pm_read_early(); + *p = acpi_pm_read_early(); t2 = get_cycles(); if ((t2 - t1) < SMI_TRESHOLD) return t2; @@ -228,7 +228,7 @@ static unsigned long pit_calibrate_tsc(void) */ unsigned long native_calibrate_tsc(void) { - u64 tsc1, tsc2, delta, pm1, pm2, hpet1, hpet2; + u64 tsc1, tsc2, delta, ref1, ref2; unsigned long tsc_pit_min = ULONG_MAX, tsc_ref_min = ULONG_MAX; unsigned long flags; int hpet = is_hpet_enabled(), i; @@ -267,16 +267,16 @@ unsigned long native_calibrate_tsc(void) * read the end value. */ local_irq_save(flags); - tsc1 = tsc_read_refs(&pm1, hpet ? &hpet1 : NULL); + tsc1 = tsc_read_refs(&ref1, hpet); tsc_pit_khz = pit_calibrate_tsc(); - tsc2 = tsc_read_refs(&pm2, hpet ? &hpet2 : NULL); + tsc2 = tsc_read_refs(&ref2, hpet); local_irq_restore(flags); /* Pick the lowest PIT TSC calibration so far */ tsc_pit_min = min(tsc_pit_min, tsc_pit_khz); /* hpet or pmtimer available ? */ - if (!hpet && !pm1 && !pm2) + if (!hpet && !ref1 && !ref2) continue; /* Check, whether the sampling was disturbed by an SMI */ @@ -285,9 +285,9 @@ unsigned long native_calibrate_tsc(void) tsc2 = (tsc2 - tsc1) * 1000000LL; if (hpet) - tsc2 = calc_hpet_ref(tsc2, hpet1, hpet2); + tsc2 = calc_hpet_ref(tsc2, ref1, ref2); else - tsc2 = calc_pmtimer_ref(tsc2, pm1, pm2); + tsc2 = calc_pmtimer_ref(tsc2, ref1, ref2); tsc_ref_min = min(tsc_ref_min, (unsigned long) tsc2); } @@ -301,7 +301,7 @@ unsigned long native_calibrate_tsc(void) "SMI disturbance.\n"); /* We don't have an alternative source, disable TSC */ - if (!hpet && !pm1 && !pm2) { + if (!hpet && !ref1 && !ref2) { printk("TSC: No reference (HPET/PMTIMER) available\n"); return 0; } @@ -321,7 +321,7 @@ unsigned long native_calibrate_tsc(void) } /* We don't have an alternative source, use the PIT calibration value */ - if (!hpet && !pm1 && !pm2) { + if (!hpet && !ref1 && !ref2) { printk(KERN_INFO "TSC: Using PIT calibration value\n"); return tsc_pit_min; } -- cgit v1.2.3-70-g09d2 From a977c400957451f3bd92b9ed6022f5fe8a6cbbf5 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 4 Sep 2008 15:18:59 +0000 Subject: x86: TSC make the calibration loop smarter The last changes made the calibration loop 250ms long which is far too much. Try to do that more clever. Experiments have shown that using a 10ms delay for the PIT based calibration gives us a good enough value. If we have a reference (HPET/PMTIMER) and the result of the PIT and the reference is close enough, then we can break out of the calibration loop on a match right away and use the reference value. Otherwise we just loop 3 times and decide then, which value to take. One caveat is that for virtualized environments the PIT calibration often does not work at all and I found out that 10us is a bit too short as well for the reference to give a sane result. The solution here is to make the last loop longer when the first two PIT calibrations failed. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/kernel/tsc.c | 95 +++++++++++++++++++++++++++++++-------------------- 1 file changed, 58 insertions(+), 37 deletions(-) (limited to 'arch/x86/kernel/tsc.c') diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 52284d31fc9..da033b5b3e1 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -159,9 +159,14 @@ static unsigned long calc_pmtimer_ref(u64 deltatsc, u64 pm1, u64 pm2) return (unsigned long) deltatsc; } -#define CAL_MS 50 +#define CAL_MS 10 #define CAL_LATCH (CLOCK_TICK_RATE / (1000 / CAL_MS)) -#define CAL_PIT_LOOPS 5000 +#define CAL_PIT_LOOPS 1000 + +#define CAL2_MS 50 +#define CAL2_LATCH (CLOCK_TICK_RATE / (1000 / CAL2_MS)) +#define CAL2_PIT_LOOPS 5000 + /* * Try to calibrate the TSC against the Programmable @@ -170,7 +175,7 @@ static unsigned long calc_pmtimer_ref(u64 deltatsc, u64 pm1, u64 pm2) * * Return ULONG_MAX on failure to calibrate. */ -static unsigned long pit_calibrate_tsc(void) +static unsigned long pit_calibrate_tsc(u32 latch, unsigned long ms, int loopmin) { u64 tsc, t1, t2, delta; unsigned long tscmin, tscmax; @@ -185,8 +190,8 @@ static unsigned long pit_calibrate_tsc(void) * (LSB then MSB) to begin countdown. */ outb(0xb0, 0x43); - outb(CAL_LATCH & 0xff, 0x42); - outb(CAL_LATCH >> 8, 0x42); + outb(latch & 0xff, 0x42); + outb(latch >> 8, 0x42); tsc = t1 = t2 = get_cycles(); @@ -207,18 +212,18 @@ static unsigned long pit_calibrate_tsc(void) /* * Sanity checks: * - * If we were not able to read the PIT more than PIT_MIN_LOOPS + * If we were not able to read the PIT more than loopmin * times, then we have been hit by a massive SMI * * If the maximum is 10 times larger than the minimum, * then we got hit by an SMI as well. */ - if (pitcnt < CAL_PIT_LOOPS || tscmax > 10 * tscmin) + if (pitcnt < loopmin || tscmax > 10 * tscmin) return ULONG_MAX; /* Calculate the PIT value */ delta = t2 - t1; - do_div(delta, CAL_MS); + do_div(delta, ms); return delta; } @@ -230,8 +235,8 @@ unsigned long native_calibrate_tsc(void) { u64 tsc1, tsc2, delta, ref1, ref2; unsigned long tsc_pit_min = ULONG_MAX, tsc_ref_min = ULONG_MAX; - unsigned long flags; - int hpet = is_hpet_enabled(), i; + unsigned long flags, latch, ms; + int hpet = is_hpet_enabled(), i, loopmin; /* * Run 5 calibration loops to get the lowest frequency value @@ -257,7 +262,13 @@ unsigned long native_calibrate_tsc(void) * calibration delay loop as we have to wait for a certain * amount of time anyway. */ - for (i = 0; i < 5; i++) { + + /* Preset PIT loop values */ + latch = CAL_LATCH; + ms = CAL_MS; + loopmin = CAL_PIT_LOOPS; + + for (i = 0; i < 3; i++) { unsigned long tsc_pit_khz; /* @@ -268,7 +279,7 @@ unsigned long native_calibrate_tsc(void) */ local_irq_save(flags); tsc1 = tsc_read_refs(&ref1, hpet); - tsc_pit_khz = pit_calibrate_tsc(); + tsc_pit_khz = pit_calibrate_tsc(latch, ms, loopmin); tsc2 = tsc_read_refs(&ref2, hpet); local_irq_restore(flags); @@ -290,6 +301,35 @@ unsigned long native_calibrate_tsc(void) tsc2 = calc_pmtimer_ref(tsc2, ref1, ref2); tsc_ref_min = min(tsc_ref_min, (unsigned long) tsc2); + + /* Check the reference deviation */ + delta = ((u64) tsc_pit_min) * 100; + do_div(delta, tsc_ref_min); + + /* + * If both calibration results are inside a 10% window + * then we can be sure, that the calibration + * succeeded. We break out of the loop right away. We + * use the reference value, as it is more precise. + */ + if (delta >= 90 && delta <= 110) { + printk(KERN_INFO + "TSC: PIT calibration matches %s. %d loops\n", + hpet ? "HPET" : "PMTIMER", i + 1); + return tsc_ref_min; + } + + /* + * Check whether PIT failed more than once. This + * happens in virtualized environments. We need to + * give the virtual PC a slightly longer timeframe for + * the HPET/PMTIMER to make the result precise. + */ + if (i == 1 && tsc_pit_min == ULONG_MAX) { + latch = CAL2_LATCH; + ms = CAL2_MS; + loopmin = CAL2_PIT_LOOPS; + } } /* @@ -309,7 +349,7 @@ unsigned long native_calibrate_tsc(void) /* The alternative source failed as well, disable TSC */ if (tsc_ref_min == ULONG_MAX) { printk(KERN_WARNING "TSC: HPET/PMTIMER calibration " - "failed due to SMI disturbance.\n"); + "failed.\n"); return 0; } @@ -328,37 +368,18 @@ unsigned long native_calibrate_tsc(void) /* The alternative source failed, use the PIT calibration value */ if (tsc_ref_min == ULONG_MAX) { - printk(KERN_WARNING "TSC: HPET/PMTIMER calibration failed due " - "to SMI disturbance. Using PIT calibration\n"); + printk(KERN_WARNING "TSC: HPET/PMTIMER calibration failed. " + "Using PIT calibration\n"); return tsc_pit_min; } - /* Check the reference deviation */ - delta = ((u64) tsc_pit_min) * 100; - do_div(delta, tsc_ref_min); - - /* - * If both calibration results are inside a 5% window, the we - * use the lower frequency of those as it is probably the - * closest estimate. - */ - if (delta >= 95 && delta <= 105) { - printk(KERN_INFO "TSC: PIT calibration confirmed by %s.\n", - hpet ? "HPET" : "PMTIMER"); - printk(KERN_INFO "TSC: using %s calibration value\n", - tsc_pit_min <= tsc_ref_min ? "PIT" : - hpet ? "HPET" : "PMTIMER"); - return tsc_pit_min <= tsc_ref_min ? tsc_pit_min : tsc_ref_min; - } - - printk(KERN_WARNING "TSC: PIT calibration deviates from %s: %lu %lu.\n", - hpet ? "HPET" : "PMTIMER", tsc_pit_min, tsc_ref_min); - /* * The calibration values differ too much. In doubt, we use * the PIT value as we know that there are PMTIMERs around - * running at double speed. + * running at double speed. At least we let the user know: */ + printk(KERN_WARNING "TSC: PIT calibration deviates from %s: %lu %lu.\n", + hpet ? "HPET" : "PMTIMER", tsc_pit_min, tsc_ref_min); printk(KERN_INFO "TSC: Using PIT calibration value\n"); return tsc_pit_min; } -- cgit v1.2.3-70-g09d2 From 6ac40ed0413ef4096720f966e11c7cdf259eee3f Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 4 Sep 2008 10:41:22 -0700 Subject: x86: quick TSC calibration Introduce a fast TSC-calibration method on sane hardware. It only uses 17920 PIT timer ticks to calibrate the TSC, plus 256 ticks on each side to make sure the TSC values were very close to the tick, so the whole calibration takes 15ms. Yet, despite only takign 15ms, we can actually give pretty stringent guarantees of accuracy: - the code requires that we hit each 256-counter block at least 50 times, so the TSC error is basically at *MOST* just a few PIT cycles off in any direction. In practice, it's going to be about one microseconds off (which is how long it takes to read the counter) - so over 17920 PIT cycles, we can pretty much guarantee that the calibration error is less than one half of a percent. My testing bears this out: on my machine, the quick-calibration reports 2934.085kHz, while the slow one reports 2933.415. Yes, the slower calibration is still more precise. For me, the slow calibration is stable to within about one hundreth of a percent, so it's (at a guess) roughly an order-and-a-half of magnitude more precise. The longer you wait, the more precise you can be. However, the nice thing about the fast TSC PIT synchronization is that it's pretty much _guaranteed_ to give that 0.5% precision, and fail gracefully (and very quickly) if it doesn't get it. And it really is fairly simple (even if there's a lot of _details_ there, and I didn't get all of those right ont he first try or even the second ;) The patch says "110 insertions", but 63 of those new lines are actually comments. Signed-off-by: Linus Torvalds Signed-off-by: Ingo Molnar --- arch/x86/kernel/tsc.c | 111 ++++++++++++++++++++++++++++++++++++++++++++++++- 1 files changed, 110 insertions(+), 1 deletions(-) --- arch/x86/kernel/tsc.c | 119 +++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 118 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel/tsc.c') diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index da033b5b3e1..839070ba846 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -227,6 +227,117 @@ static unsigned long pit_calibrate_tsc(u32 latch, unsigned long ms, int loopmin) return delta; } +/* + * This reads the current MSB of the PIT counter, and + * checks if we are running on sufficiently fast and + * non-virtualized hardware. + * + * Our expectations are: + * + * - the PIT is running at roughly 1.19MHz + * + * - each IO is going to take about 1us on real hardware, + * but we allow it to be much faster (by a factor of 10) or + * _slightly_ slower (ie we allow up to a 2us read+counter + * update - anything else implies a unacceptably slow CPU + * or PIT for the fast calibration to work. + * + * - with 256 PIT ticks to read the value, we have 214us to + * see the same MSB (and overhead like doing a single TSC + * read per MSB value etc). + * + * - We're doing 2 reads per loop (LSB, MSB), and we expect + * them each to take about a microsecond on real hardware. + * So we expect a count value of around 100. But we'll be + * generous, and accept anything over 50. + * + * - if the PIT is stuck, and we see *many* more reads, we + * return early (and the next caller of pit_expect_msb() + * then consider it a failure when they don't see the + * next expected value). + * + * These expectations mean that we know that we have seen the + * transition from one expected value to another with a fairly + * high accuracy, and we didn't miss any events. We can thus + * use the TSC value at the transitions to calculate a pretty + * good value for the TSC frequencty. + */ +static inline int pit_expect_msb(unsigned char val) +{ + int count = 0; + + for (count = 0; count < 50000; count++) { + /* Ignore LSB */ + inb(0x42); + if (inb(0x42) != val) + break; + } + return count > 50; +} + +/* + * How many MSB values do we want to see? We aim for a + * 15ms calibration, which assuming a 2us counter read + * error should give us roughly 150 ppm precision for + * the calibration. + */ +#define QUICK_PIT_MS 15 +#define QUICK_PIT_ITERATIONS (QUICK_PIT_MS * PIT_TICK_RATE / 1000 / 256) + +static unsigned long quick_pit_calibrate(void) +{ + /* Set the Gate high, disable speaker */ + outb((inb(0x61) & ~0x02) | 0x01, 0x61); + + /* + * Counter 2, mode 0 (one-shot), binary count + * + * NOTE! Mode 2 decrements by two (and then the + * output is flipped each time, giving the same + * final output frequency as a decrement-by-one), + * so mode 0 is much better when looking at the + * individual counts. + */ + outb(0xb0, 0x43); + + /* Start at 0xffff */ + outb(0xff, 0x42); + outb(0xff, 0x42); + + if (pit_expect_msb(0xff)) { + int i; + u64 t1, t2, delta; + unsigned char expect = 0xfe; + + t1 = get_cycles(); + for (i = 0; i < QUICK_PIT_ITERATIONS; i++, expect--) { + if (!pit_expect_msb(expect)) + goto failed; + } + t2 = get_cycles(); + + /* + * Ok, if we get here, then we've seen the + * MSB of the PIT decrement QUICK_PIT_ITERATIONS + * times, and each MSB had many hits, so we never + * had any sudden jumps. + * + * As a result, we can depend on there not being + * any odd delays anywhere, and the TSC reads are + * reliable. + * + * kHz = ticks / time-in-seconds / 1000; + * kHz = (t2 - t1) / (QPI * 256 / PIT_TICK_RATE) / 1000 + * kHz = ((t2 - t1) * PIT_TICK_RATE) / (QPI * 256 * 1000) + */ + delta = (t2 - t1)*PIT_TICK_RATE; + do_div(delta, QUICK_PIT_ITERATIONS*256*1000); + printk("Fast TSC calibration using PIT\n"); + return delta; + } +failed: + return 0; +} /** * native_calibrate_tsc - calibrate the tsc on boot @@ -235,9 +346,15 @@ unsigned long native_calibrate_tsc(void) { u64 tsc1, tsc2, delta, ref1, ref2; unsigned long tsc_pit_min = ULONG_MAX, tsc_ref_min = ULONG_MAX; - unsigned long flags, latch, ms; + unsigned long flags, latch, ms, fast_calibrate; int hpet = is_hpet_enabled(), i, loopmin; + local_irq_save(flags); + fast_calibrate = quick_pit_calibrate(); + local_irq_restore(flags); + if (fast_calibrate) + return fast_calibrate; + /* * Run 5 calibration loops to get the lowest frequency value * (the best estimate). We use two different calibration modes -- cgit v1.2.3-70-g09d2 From 4156e9a8ef5b521185f451213d33fa661f38512e Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 4 Sep 2008 22:47:47 +0200 Subject: x86: quick TSC calibration, improve - make sure the final TSC timestamp is reliable too Signed-off-by: Ingo Molnar --- arch/x86/kernel/tsc.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch/x86/kernel/tsc.c') diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 839070ba846..6dab90f6851 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -316,6 +316,12 @@ static unsigned long quick_pit_calibrate(void) } t2 = get_cycles(); + /* + * Make sure we can rely on the second TSC timestamp: + */ + if (!pit_expect_msb(--expect)) + goto failed; + /* * Ok, if we get here, then we've seen the * MSB of the PIT decrement QUICK_PIT_ITERATIONS -- cgit v1.2.3-70-g09d2 From 5df45515512436a808d3476a90e83f2efb022422 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 6 Sep 2008 23:55:40 +0200 Subject: x86, tsc calibration: fix my brown paperbag day ... Signed-off-by: Ingo Molnar --- arch/x86/kernel/tsc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel/tsc.c') diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 6dab90f6851..4847a928050 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -319,7 +319,7 @@ static unsigned long quick_pit_calibrate(void) /* * Make sure we can rely on the second TSC timestamp: */ - if (!pit_expect_msb(--expect)) + if (!pit_expect_msb(expect)) goto failed; /* -- cgit v1.2.3-70-g09d2