From 0ef95533326a7b37d16025af9edc0c18e644b346 Mon Sep 17 00:00:00 2001 From: Alok Kataria Date: Tue, 1 Jul 2008 11:43:18 -0700 Subject: x86: merge sched_clock handling Move the basic global variable definitions and sched_clock handling in the common "tsc.c" file. - Unify notsc kernel command line handling for 32 bit and 64bit. - Functional changes for 64bit. - "tsc_disabled" is updated if "notsc" is passed at boottime. - Fallback to jiffies for sched_clock, incase notsc is passed on commandline. Signed-off-by: Alok N Kataria Signed-off-by: Dan Hecht Cc: Dan Hecht Signed-off-by: Ingo Molnar --- arch/x86/kernel/tsc.c | 86 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) create mode 100644 arch/x86/kernel/tsc.c (limited to 'arch/x86/kernel/tsc.c') diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c new file mode 100644 index 00000000000..5d0be778fad --- /dev/null +++ b/arch/x86/kernel/tsc.c @@ -0,0 +1,86 @@ +#include +#include +#include +#include + +unsigned int cpu_khz; /* TSC clocks / usec, not used here */ +EXPORT_SYMBOL(cpu_khz); +unsigned int tsc_khz; +EXPORT_SYMBOL(tsc_khz); + +/* + * TSC can be unstable due to cpufreq or due to unsynced TSCs + */ +int tsc_unstable; + +/* native_sched_clock() is called before tsc_init(), so + we must start with the TSC soft disabled to prevent + erroneous rdtsc usage on !cpu_has_tsc processors */ +int tsc_disabled = -1; + +/* + * Scheduler clock - returns current time in nanosec units. + */ +u64 native_sched_clock(void) +{ + u64 this_offset; + + /* + * Fall back to jiffies if there's no TSC available: + * ( But note that we still use it if the TSC is marked + * unstable. We do this because unlike Time Of Day, + * the scheduler clock tolerates small errors and it's + * very important for it to be as fast as the platform + * can achive it. ) + */ + if (unlikely(tsc_disabled)) { + /* No locking but a rare wrong value is not a big deal: */ + return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ); + } + + /* read the Time Stamp Counter: */ + rdtscll(this_offset); + + /* return the value in ns */ + return cycles_2_ns(this_offset); +} + +/* We need to define a real function for sched_clock, to override the + weak default version */ +#ifdef CONFIG_PARAVIRT +unsigned long long sched_clock(void) +{ + return paravirt_sched_clock(); +} +#else +unsigned long long +sched_clock(void) __attribute__((alias("native_sched_clock"))); +#endif + +int check_tsc_unstable(void) +{ + return tsc_unstable; +} +EXPORT_SYMBOL_GPL(check_tsc_unstable); + +#ifdef CONFIG_X86_TSC +int __init notsc_setup(char *str) +{ + printk(KERN_WARNING "notsc: Kernel compiled with CONFIG_X86_TSC, " + "cannot disable TSC completely.\n"); + tsc_disabled = 1; + return 1; +} +#else +/* + * disable flag for tsc. Takes effect by clearing the TSC cpu flag + * in cpu/common.c + */ +int __init notsc_setup(char *str) +{ + setup_clear_cpu_cap(X86_FEATURE_TSC); + return 1; +} +#endif + +__setup("notsc", notsc_setup); -- cgit v1.2.3-70-g09d2 From bfc0f5947afa5e3a13e55867f4478c8a92c11dca Mon Sep 17 00:00:00 2001 From: Alok Kataria Date: Tue, 1 Jul 2008 11:43:24 -0700 Subject: x86: merge tsc calibration Merge the tsc calibration code for the 32bit and 64bit kernel. The paravirtualized calculate_cpu_khz for 64bit now points to the correct tsc_calibrate code as in 32bit. Original native_calculate_cpu_khz for 64 bit is now called as calibrate_cpu. Also moved the recalibrate_cpu_khz function in the common file. Note that this function is called only from powernow K7 cpu freq driver. Signed-off-by: Alok N Kataria Signed-off-by: Dan Hecht Cc: Dan Hecht Signed-off-by: Ingo Molnar --- arch/x86/kernel/time_64.c | 26 ++++++--- arch/x86/kernel/tsc.c | 131 ++++++++++++++++++++++++++++++++++++++++++++++ arch/x86/kernel/tsc_32.c | 74 +------------------------- arch/x86/kernel/tsc_64.c | 94 +-------------------------------- include/asm-x86/hpet.h | 2 +- include/asm-x86/tsc.h | 1 - 6 files changed, 154 insertions(+), 174 deletions(-) (limited to 'arch/x86/kernel/tsc.c') diff --git a/arch/x86/kernel/time_64.c b/arch/x86/kernel/time_64.c index 39ae8511a13..c6ac4dad41f 100644 --- a/arch/x86/kernel/time_64.c +++ b/arch/x86/kernel/time_64.c @@ -56,7 +56,7 @@ static irqreturn_t timer_event_interrupt(int irq, void *dev_id) /* calibrate_cpu is used on systems with fixed rate TSCs to determine * processor frequency */ #define TICK_COUNT 100000000 -unsigned long __init native_calculate_cpu_khz(void) +static unsigned long __init calibrate_cpu(void) { int tsc_start, tsc_now; int i, no_ctr_free; @@ -114,14 +114,18 @@ void __init hpet_time_init(void) setup_irq(0, &irq0); } +extern void set_cyc2ns_scale(unsigned long cpu_khz, int cpu); + void __init time_init(void) { - tsc_calibrate(); + int cpu; + + cpu_khz = calculate_cpu_khz(); + tsc_khz = cpu_khz; - cpu_khz = tsc_khz; if (cpu_has(&boot_cpu_data, X86_FEATURE_CONSTANT_TSC) && - (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)) - cpu_khz = calculate_cpu_khz(); + (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)) + cpu_khz = calibrate_cpu(); lpj_fine = ((unsigned long)tsc_khz * 1000)/HZ; @@ -134,7 +138,17 @@ void __init time_init(void) vgetcpu_mode = VGETCPU_LSL; printk(KERN_INFO "time.c: Detected %d.%03d MHz processor.\n", - cpu_khz / 1000, cpu_khz % 1000); + cpu_khz / 1000, cpu_khz % 1000); + + /* + * Secondary CPUs do not run through tsc_init(), so set up + * all the scale factors for all CPUs, assuming the same + * speed as the bootup CPU. (cpufreq notifiers will fix this + * up if their speed diverges) + */ + for_each_possible_cpu(cpu) + set_cyc2ns_scale(cpu_khz, cpu); + init_tsc_clocksource(); late_time_init = choose_time_init(); } diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 5d0be778fad..e6ee14533c7 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -1,7 +1,11 @@ +#include #include #include #include #include +#include + +#include unsigned int cpu_khz; /* TSC clocks / usec, not used here */ EXPORT_SYMBOL(cpu_khz); @@ -84,3 +88,130 @@ int __init notsc_setup(char *str) #endif __setup("notsc", notsc_setup); + +#define MAX_RETRIES 5 +#define SMI_TRESHOLD 50000 + +/* + * Read TSC and the reference counters. Take care of SMI disturbance + */ +static u64 __init tsc_read_refs(u64 *pm, u64 *hpet) +{ + u64 t1, t2; + int i; + + for (i = 0; i < MAX_RETRIES; i++) { + t1 = get_cycles(); + if (hpet) + *hpet = hpet_readl(HPET_COUNTER) & 0xFFFFFFFF; + else + *pm = acpi_pm_read_early(); + t2 = get_cycles(); + if ((t2 - t1) < SMI_TRESHOLD) + return t2; + } + return ULLONG_MAX; +} + +/** + * tsc_calibrate - calibrate the tsc on boot + */ +static unsigned int __init tsc_calibrate(void) +{ + unsigned long flags; + u64 tsc1, tsc2, tr1, tr2, delta, pm1, pm2, hpet1, hpet2; + int hpet = is_hpet_enabled(); + unsigned int tsc_khz_val = 0; + + local_irq_save(flags); + + tsc1 = tsc_read_refs(&pm1, hpet ? &hpet1 : NULL); + + outb((inb(0x61) & ~0x02) | 0x01, 0x61); + + outb(0xb0, 0x43); + outb((CLOCK_TICK_RATE / (1000 / 50)) & 0xff, 0x42); + outb((CLOCK_TICK_RATE / (1000 / 50)) >> 8, 0x42); + tr1 = get_cycles(); + while ((inb(0x61) & 0x20) == 0); + tr2 = get_cycles(); + + tsc2 = tsc_read_refs(&pm2, hpet ? &hpet2 : NULL); + + local_irq_restore(flags); + + /* + * Preset the result with the raw and inaccurate PIT + * calibration value + */ + delta = (tr2 - tr1); + do_div(delta, 50); + tsc_khz_val = delta; + + /* hpet or pmtimer available ? */ + if (!hpet && !pm1 && !pm2) { + printk(KERN_INFO "TSC calibrated against PIT\n"); + goto out; + } + + /* Check, whether the sampling was disturbed by an SMI */ + if (tsc1 == ULLONG_MAX || tsc2 == ULLONG_MAX) { + printk(KERN_WARNING "TSC calibration disturbed by SMI, " + "using PIT calibration result\n"); + goto out; + } + + tsc2 = (tsc2 - tsc1) * 1000000LL; + + if (hpet) { + printk(KERN_INFO "TSC calibrated against HPET\n"); + if (hpet2 < hpet1) + hpet2 += 0x100000000ULL; + hpet2 -= hpet1; + tsc1 = ((u64)hpet2 * hpet_readl(HPET_PERIOD)); + do_div(tsc1, 1000000); + } else { + printk(KERN_INFO "TSC calibrated against PM_TIMER\n"); + if (pm2 < pm1) + pm2 += (u64)ACPI_PM_OVRRUN; + pm2 -= pm1; + tsc1 = pm2 * 1000000000LL; + do_div(tsc1, PMTMR_TICKS_PER_SEC); + } + + do_div(tsc2, tsc1); + tsc_khz_val = tsc2; + +out: + return tsc_khz_val; +} + +unsigned long native_calculate_cpu_khz(void) +{ + return tsc_calibrate(); +} + +#ifdef CONFIG_X86_32 +/* Only called from the Powernow K7 cpu freq driver */ +int recalibrate_cpu_khz(void) +{ +#ifndef CONFIG_SMP + unsigned long cpu_khz_old = cpu_khz; + + if (cpu_has_tsc) { + cpu_khz = calculate_cpu_khz(); + tsc_khz = cpu_khz; + cpu_data(0).loops_per_jiffy = + cpufreq_scale(cpu_data(0).loops_per_jiffy, + cpu_khz_old, cpu_khz); + return 0; + } else + return -ENODEV; +#else + return -ENODEV; +#endif +} + +EXPORT_SYMBOL(recalibrate_cpu_khz); + +#endif /* CONFIG_X86_32 */ diff --git a/arch/x86/kernel/tsc_32.c b/arch/x86/kernel/tsc_32.c index dc8990056d7..40c0aafb358 100644 --- a/arch/x86/kernel/tsc_32.c +++ b/arch/x86/kernel/tsc_32.c @@ -42,7 +42,7 @@ extern int tsc_disabled; DEFINE_PER_CPU(unsigned long, cyc2ns); -static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu) +void set_cyc2ns_scale(unsigned long cpu_khz, int cpu) { unsigned long long tsc_now, ns_now; unsigned long flags, *scale; @@ -65,78 +65,6 @@ static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu) local_irq_restore(flags); } -unsigned long native_calculate_cpu_khz(void) -{ - unsigned long long start, end; - unsigned long count; - u64 delta64 = (u64)ULLONG_MAX; - int i; - unsigned long flags; - - local_irq_save(flags); - - /* run 3 times to ensure the cache is warm and to get an accurate reading */ - for (i = 0; i < 3; i++) { - mach_prepare_counter(); - rdtscll(start); - mach_countup(&count); - rdtscll(end); - - /* - * Error: ECTCNEVERSET - * The CTC wasn't reliable: we got a hit on the very first read, - * or the CPU was so fast/slow that the quotient wouldn't fit in - * 32 bits.. - */ - if (count <= 1) - continue; - - /* cpu freq too slow: */ - if ((end - start) <= CALIBRATE_TIME_MSEC) - continue; - - /* - * We want the minimum time of all runs in case one of them - * is inaccurate due to SMI or other delay - */ - delta64 = min(delta64, (end - start)); - } - - /* cpu freq too fast (or every run was bad): */ - if (delta64 > (1ULL<<32)) - goto err; - - delta64 += CALIBRATE_TIME_MSEC/2; /* round for do_div */ - do_div(delta64,CALIBRATE_TIME_MSEC); - - local_irq_restore(flags); - return (unsigned long)delta64; -err: - local_irq_restore(flags); - return 0; -} - -int recalibrate_cpu_khz(void) -{ -#ifndef CONFIG_SMP - unsigned long cpu_khz_old = cpu_khz; - - if (cpu_has_tsc) { - cpu_khz = calculate_cpu_khz(); - tsc_khz = cpu_khz; - cpu_data(0).loops_per_jiffy = - cpufreq_scale(cpu_data(0).loops_per_jiffy, - cpu_khz_old, cpu_khz); - return 0; - } else - return -ENODEV; -#else - return -ENODEV; -#endif -} - -EXPORT_SYMBOL(recalibrate_cpu_khz); - #ifdef CONFIG_CPU_FREQ /* diff --git a/arch/x86/kernel/tsc_64.c b/arch/x86/kernel/tsc_64.c index 69cbe4c9f05..c852ff9bd5d 100644 --- a/arch/x86/kernel/tsc_64.c +++ b/arch/x86/kernel/tsc_64.c @@ -40,7 +40,7 @@ extern int tsc_disabled; DEFINE_PER_CPU(unsigned long, cyc2ns); -static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu) +void set_cyc2ns_scale(unsigned long cpu_khz, int cpu) { unsigned long long tsc_now, ns_now; unsigned long flags, *scale; @@ -130,98 +130,6 @@ core_initcall(cpufreq_tsc); #endif -#define MAX_RETRIES 5 -#define SMI_TRESHOLD 50000 - -/* - * Read TSC and the reference counters. Take care of SMI disturbance - */ -static unsigned long __init tsc_read_refs(unsigned long *pm, - unsigned long *hpet) -{ - unsigned long t1, t2; - int i; - - for (i = 0; i < MAX_RETRIES; i++) { - t1 = get_cycles(); - if (hpet) - *hpet = hpet_readl(HPET_COUNTER) & 0xFFFFFFFF; - else - *pm = acpi_pm_read_early(); - t2 = get_cycles(); - if ((t2 - t1) < SMI_TRESHOLD) - return t2; - } - return ULONG_MAX; -} - -/** - * tsc_calibrate - calibrate the tsc on boot - */ -void __init tsc_calibrate(void) -{ - unsigned long flags, tsc1, tsc2, tr1, tr2, pm1, pm2, hpet1, hpet2; - int hpet = is_hpet_enabled(), cpu; - - local_irq_save(flags); - - tsc1 = tsc_read_refs(&pm1, hpet ? &hpet1 : NULL); - - outb((inb(0x61) & ~0x02) | 0x01, 0x61); - - outb(0xb0, 0x43); - outb((CLOCK_TICK_RATE / (1000 / 50)) & 0xff, 0x42); - outb((CLOCK_TICK_RATE / (1000 / 50)) >> 8, 0x42); - tr1 = get_cycles(); - while ((inb(0x61) & 0x20) == 0); - tr2 = get_cycles(); - - tsc2 = tsc_read_refs(&pm2, hpet ? &hpet2 : NULL); - - local_irq_restore(flags); - - /* - * Preset the result with the raw and inaccurate PIT - * calibration value - */ - tsc_khz = (tr2 - tr1) / 50; - - /* hpet or pmtimer available ? */ - if (!hpet && !pm1 && !pm2) { - printk(KERN_INFO "TSC calibrated against PIT\n"); - goto out; - } - - /* Check, whether the sampling was disturbed by an SMI */ - if (tsc1 == ULONG_MAX || tsc2 == ULONG_MAX) { - printk(KERN_WARNING "TSC calibration disturbed by SMI, " - "using PIT calibration result\n"); - goto out; - } - - tsc2 = (tsc2 - tsc1) * 1000000L; - - if (hpet) { - printk(KERN_INFO "TSC calibrated against HPET\n"); - if (hpet2 < hpet1) - hpet2 += 0x100000000UL; - hpet2 -= hpet1; - tsc1 = (hpet2 * hpet_readl(HPET_PERIOD)) / 1000000; - } else { - printk(KERN_INFO "TSC calibrated against PM_TIMER\n"); - if (pm2 < pm1) - pm2 += ACPI_PM_OVRRUN; - pm2 -= pm1; - tsc1 = (pm2 * 1000000000) / PMTMR_TICKS_PER_SEC; - } - - tsc_khz = tsc2 / tsc1; - -out: - for_each_possible_cpu(cpu) - set_cyc2ns_scale(tsc_khz, cpu); -} - /* * Make an educated guess if the TSC is trustworthy and synchronized * over all CPUs. diff --git a/include/asm-x86/hpet.h b/include/asm-x86/hpet.h index 6a9b4ac59bf..82f1ac641bd 100644 --- a/include/asm-x86/hpet.h +++ b/include/asm-x86/hpet.h @@ -86,8 +86,8 @@ extern void hpet_unregister_irq_handler(rtc_irq_handler handler); #else /* CONFIG_HPET_TIMER */ static inline int hpet_enable(void) { return 0; } -static inline unsigned long hpet_readl(unsigned long a) { return 0; } static inline int is_hpet_enabled(void) { return 0; } +#define hpet_readl(a) 0 #endif #endif /* ASM_X86_HPET_H */ diff --git a/include/asm-x86/tsc.h b/include/asm-x86/tsc.h index 548873ab5fc..761054d7fef 100644 --- a/include/asm-x86/tsc.h +++ b/include/asm-x86/tsc.h @@ -58,7 +58,6 @@ int check_tsc_unstable(void); extern void check_tsc_sync_source(int cpu); extern void check_tsc_sync_target(void); -extern void tsc_calibrate(void); extern int notsc_setup(char *); #endif -- cgit v1.2.3-70-g09d2 From 2dbe06faf37b39f9ecffc054dd173b2a1dc2adcd Mon Sep 17 00:00:00 2001 From: Alok Kataria Date: Tue, 1 Jul 2008 11:43:31 -0700 Subject: x86: merge the TSC cpu-freq code Unify the TSC cpufreq code. Signed-off-by: Alok N Kataria Signed-off-by: Dan Hecht Cc: Dan Hecht Signed-off-by: Ingo Molnar --- arch/x86/kernel/tsc.c | 114 +++++++++++++++++++++++++++++++++++++++++++++++ arch/x86/kernel/tsc_32.c | 113 ---------------------------------------------- arch/x86/kernel/tsc_64.c | 114 ----------------------------------------------- 3 files changed, 114 insertions(+), 227 deletions(-) (limited to 'arch/x86/kernel/tsc.c') diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index e6ee14533c7..595f78a2221 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -4,6 +4,7 @@ #include #include #include +#include #include @@ -215,3 +216,116 @@ int recalibrate_cpu_khz(void) EXPORT_SYMBOL(recalibrate_cpu_khz); #endif /* CONFIG_X86_32 */ + +/* Accelerators for sched_clock() + * convert from cycles(64bits) => nanoseconds (64bits) + * basic equation: + * ns = cycles / (freq / ns_per_sec) + * ns = cycles * (ns_per_sec / freq) + * ns = cycles * (10^9 / (cpu_khz * 10^3)) + * ns = cycles * (10^6 / cpu_khz) + * + * Then we use scaling math (suggested by george@mvista.com) to get: + * ns = cycles * (10^6 * SC / cpu_khz) / SC + * ns = cycles * cyc2ns_scale / SC + * + * And since SC is a constant power of two, we can convert the div + * into a shift. + * + * We can use khz divisor instead of mhz to keep a better precision, since + * cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits. + * (mathieu.desnoyers@polymtl.ca) + * + * -johnstul@us.ibm.com "math is hard, lets go shopping!" + */ + +DEFINE_PER_CPU(unsigned long, cyc2ns); + +void set_cyc2ns_scale(unsigned long cpu_khz, int cpu) +{ + unsigned long long tsc_now, ns_now; + unsigned long flags, *scale; + + local_irq_save(flags); + sched_clock_idle_sleep_event(); + + scale = &per_cpu(cyc2ns, cpu); + + rdtscll(tsc_now); + ns_now = __cycles_2_ns(tsc_now); + + if (cpu_khz) + *scale = (NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR)/cpu_khz; + + sched_clock_idle_wakeup_event(0); + local_irq_restore(flags); +} + +#ifdef CONFIG_CPU_FREQ + +/* Frequency scaling support. Adjust the TSC based timer when the cpu frequency + * changes. + * + * RED-PEN: On SMP we assume all CPUs run with the same frequency. It's + * not that important because current Opteron setups do not support + * scaling on SMP anyroads. + * + * Should fix up last_tsc too. Currently gettimeofday in the + * first tick after the change will be slightly wrong. + */ + +static unsigned int ref_freq; +static unsigned long loops_per_jiffy_ref; +static unsigned long tsc_khz_ref; + +static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, + void *data) +{ + struct cpufreq_freqs *freq = data; + unsigned long *lpj, dummy; + + if (cpu_has(&cpu_data(freq->cpu), X86_FEATURE_CONSTANT_TSC)) + return 0; + + lpj = &dummy; + if (!(freq->flags & CPUFREQ_CONST_LOOPS)) +#ifdef CONFIG_SMP + lpj = &cpu_data(freq->cpu).loops_per_jiffy; +#else + lpj = &boot_cpu_data.loops_per_jiffy; +#endif + + if (!ref_freq) { + ref_freq = freq->old; + loops_per_jiffy_ref = *lpj; + tsc_khz_ref = tsc_khz; + } + if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) || + (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) || + (val == CPUFREQ_RESUMECHANGE)) { + *lpj = cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new); + + tsc_khz = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new); + if (!(freq->flags & CPUFREQ_CONST_LOOPS)) + mark_tsc_unstable("cpufreq changes"); + } + + set_cyc2ns_scale(tsc_khz_ref, freq->cpu); + + return 0; +} + +static struct notifier_block time_cpufreq_notifier_block = { + .notifier_call = time_cpufreq_notifier +}; + +static int __init cpufreq_tsc(void) +{ + cpufreq_register_notifier(&time_cpufreq_notifier_block, + CPUFREQ_TRANSITION_NOTIFIER); + return 0; +} + +core_initcall(cpufreq_tsc); + +#endif /* CONFIG_CPU_FREQ */ diff --git a/arch/x86/kernel/tsc_32.c b/arch/x86/kernel/tsc_32.c index 40c0aafb358..bbc153d36f8 100644 --- a/arch/x86/kernel/tsc_32.c +++ b/arch/x86/kernel/tsc_32.c @@ -18,119 +18,6 @@ extern int tsc_unstable; extern int tsc_disabled; -/* Accelerators for sched_clock() - * convert from cycles(64bits) => nanoseconds (64bits) - * basic equation: - * ns = cycles / (freq / ns_per_sec) - * ns = cycles * (ns_per_sec / freq) - * ns = cycles * (10^9 / (cpu_khz * 10^3)) - * ns = cycles * (10^6 / cpu_khz) - * - * Then we use scaling math (suggested by george@mvista.com) to get: - * ns = cycles * (10^6 * SC / cpu_khz) / SC - * ns = cycles * cyc2ns_scale / SC - * - * And since SC is a constant power of two, we can convert the div - * into a shift. - * - * We can use khz divisor instead of mhz to keep a better precision, since - * cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits. - * (mathieu.desnoyers@polymtl.ca) - * - * -johnstul@us.ibm.com "math is hard, lets go shopping!" - */ - -DEFINE_PER_CPU(unsigned long, cyc2ns); - -void set_cyc2ns_scale(unsigned long cpu_khz, int cpu) -{ - unsigned long long tsc_now, ns_now; - unsigned long flags, *scale; - - local_irq_save(flags); - sched_clock_idle_sleep_event(); - - scale = &per_cpu(cyc2ns, cpu); - - rdtscll(tsc_now); - ns_now = __cycles_2_ns(tsc_now); - - if (cpu_khz) - *scale = (NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR)/cpu_khz; - - /* - * Start smoothly with the new frequency: - */ - sched_clock_idle_wakeup_event(0); - local_irq_restore(flags); -} - -#ifdef CONFIG_CPU_FREQ - -/* - * if the CPU frequency is scaled, TSC-based delays will need a different - * loops_per_jiffy value to function properly. - */ -static unsigned int ref_freq; -static unsigned long loops_per_jiffy_ref; -static unsigned long cpu_khz_ref; - -static int -time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, void *data) -{ - struct cpufreq_freqs *freq = data; - - if (!ref_freq) { - if (!freq->old){ - ref_freq = freq->new; - return 0; - } - ref_freq = freq->old; - loops_per_jiffy_ref = cpu_data(freq->cpu).loops_per_jiffy; - cpu_khz_ref = cpu_khz; - } - - if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) || - (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) || - (val == CPUFREQ_RESUMECHANGE)) { - if (!(freq->flags & CPUFREQ_CONST_LOOPS)) - cpu_data(freq->cpu).loops_per_jiffy = - cpufreq_scale(loops_per_jiffy_ref, - ref_freq, freq->new); - - if (cpu_khz) { - - if (num_online_cpus() == 1) - cpu_khz = cpufreq_scale(cpu_khz_ref, - ref_freq, freq->new); - if (!(freq->flags & CPUFREQ_CONST_LOOPS)) { - tsc_khz = cpu_khz; - set_cyc2ns_scale(cpu_khz, freq->cpu); - /* - * TSC based sched_clock turns - * to junk w/ cpufreq - */ - mark_tsc_unstable("cpufreq changes"); - } - } - } - - return 0; -} - -static struct notifier_block time_cpufreq_notifier_block = { - .notifier_call = time_cpufreq_notifier -}; - -static int __init cpufreq_tsc(void) -{ - return cpufreq_register_notifier(&time_cpufreq_notifier_block, - CPUFREQ_TRANSITION_NOTIFIER); -} -core_initcall(cpufreq_tsc); - -#endif - /* clock source code */ static struct clocksource clocksource_tsc; diff --git a/arch/x86/kernel/tsc_64.c b/arch/x86/kernel/tsc_64.c index c852ff9bd5d..80a274b018c 100644 --- a/arch/x86/kernel/tsc_64.c +++ b/arch/x86/kernel/tsc_64.c @@ -16,120 +16,6 @@ extern int tsc_unstable; extern int tsc_disabled; -/* Accelerators for sched_clock() - * convert from cycles(64bits) => nanoseconds (64bits) - * basic equation: - * ns = cycles / (freq / ns_per_sec) - * ns = cycles * (ns_per_sec / freq) - * ns = cycles * (10^9 / (cpu_khz * 10^3)) - * ns = cycles * (10^6 / cpu_khz) - * - * Then we use scaling math (suggested by george@mvista.com) to get: - * ns = cycles * (10^6 * SC / cpu_khz) / SC - * ns = cycles * cyc2ns_scale / SC - * - * And since SC is a constant power of two, we can convert the div - * into a shift. - * - * We can use khz divisor instead of mhz to keep a better precision, since - * cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits. - * (mathieu.desnoyers@polymtl.ca) - * - * -johnstul@us.ibm.com "math is hard, lets go shopping!" - */ - -DEFINE_PER_CPU(unsigned long, cyc2ns); - -void set_cyc2ns_scale(unsigned long cpu_khz, int cpu) -{ - unsigned long long tsc_now, ns_now; - unsigned long flags, *scale; - - local_irq_save(flags); - sched_clock_idle_sleep_event(); - - scale = &per_cpu(cyc2ns, cpu); - - rdtscll(tsc_now); - ns_now = __cycles_2_ns(tsc_now); - - if (cpu_khz) - *scale = (NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR)/cpu_khz; - - sched_clock_idle_wakeup_event(0); - local_irq_restore(flags); -} - -#ifdef CONFIG_CPU_FREQ - -/* Frequency scaling support. Adjust the TSC based timer when the cpu frequency - * changes. - * - * RED-PEN: On SMP we assume all CPUs run with the same frequency. It's - * not that important because current Opteron setups do not support - * scaling on SMP anyroads. - * - * Should fix up last_tsc too. Currently gettimeofday in the - * first tick after the change will be slightly wrong. - */ - -static unsigned int ref_freq; -static unsigned long loops_per_jiffy_ref; -static unsigned long tsc_khz_ref; - -static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, - void *data) -{ - struct cpufreq_freqs *freq = data; - unsigned long *lpj, dummy; - - if (cpu_has(&cpu_data(freq->cpu), X86_FEATURE_CONSTANT_TSC)) - return 0; - - lpj = &dummy; - if (!(freq->flags & CPUFREQ_CONST_LOOPS)) -#ifdef CONFIG_SMP - lpj = &cpu_data(freq->cpu).loops_per_jiffy; -#else - lpj = &boot_cpu_data.loops_per_jiffy; -#endif - - if (!ref_freq) { - ref_freq = freq->old; - loops_per_jiffy_ref = *lpj; - tsc_khz_ref = tsc_khz; - } - if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) || - (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) || - (val == CPUFREQ_RESUMECHANGE)) { - *lpj = - cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new); - - tsc_khz = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new); - if (!(freq->flags & CPUFREQ_CONST_LOOPS)) - mark_tsc_unstable("cpufreq changes"); - } - - set_cyc2ns_scale(tsc_khz_ref, freq->cpu); - - return 0; -} - -static struct notifier_block time_cpufreq_notifier_block = { - .notifier_call = time_cpufreq_notifier -}; - -static int __init cpufreq_tsc(void) -{ - cpufreq_register_notifier(&time_cpufreq_notifier_block, - CPUFREQ_TRANSITION_NOTIFIER); - return 0; -} - -core_initcall(cpufreq_tsc); - -#endif - /* * Make an educated guess if the TSC is trustworthy and synchronized * over all CPUs. -- cgit v1.2.3-70-g09d2 From 8fbbc4b45ce3e4c0eeb15004c79c72b6896a79c2 Mon Sep 17 00:00:00 2001 From: Alok Kataria Date: Tue, 1 Jul 2008 11:43:34 -0700 Subject: x86: merge tsc_init and clocksource code Unify the clocksource code. Unify the tsc_init code. Signed-off-by: Alok N Kataria Signed-off-by: Dan Hecht Cc: Dan Hecht Signed-off-by: Ingo Molnar --- arch/x86/kernel/Makefile | 2 +- arch/x86/kernel/time_64.c | 32 +------ arch/x86/kernel/tsc.c | 212 +++++++++++++++++++++++++++++++++++++++++++++- arch/x86/kernel/tsc_32.c | 188 ---------------------------------------- arch/x86/kernel/tsc_64.c | 106 ----------------------- include/asm-x86/apic.h | 7 +- include/asm-x86/delay.h | 4 + include/asm-x86/time.h | 2 + include/asm-x86/tsc.h | 1 - 9 files changed, 224 insertions(+), 330 deletions(-) delete mode 100644 arch/x86/kernel/tsc_32.c delete mode 100644 arch/x86/kernel/tsc_64.c (limited to 'arch/x86/kernel/tsc.c') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index ca904ee1725..59b14c940a2 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -26,7 +26,7 @@ obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o obj-y += bootflag.o e820.o obj-y += pci-dma.o quirks.o i8237.o topology.o kdebugfs.o obj-y += alternative.o i8253.o pci-nommu.o -obj-y += tsc_$(BITS).o io_delay.o rtc.o tsc.o +obj-y += tsc.o io_delay.o rtc.o obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o obj-y += process.o diff --git a/arch/x86/kernel/time_64.c b/arch/x86/kernel/time_64.c index c6ac4dad41f..e3d49c553af 100644 --- a/arch/x86/kernel/time_64.c +++ b/arch/x86/kernel/time_64.c @@ -56,7 +56,7 @@ static irqreturn_t timer_event_interrupt(int irq, void *dev_id) /* calibrate_cpu is used on systems with fixed rate TSCs to determine * processor frequency */ #define TICK_COUNT 100000000 -static unsigned long __init calibrate_cpu(void) +unsigned long __init calibrate_cpu(void) { int tsc_start, tsc_now; int i, no_ctr_free; @@ -114,41 +114,13 @@ void __init hpet_time_init(void) setup_irq(0, &irq0); } -extern void set_cyc2ns_scale(unsigned long cpu_khz, int cpu); - void __init time_init(void) { - int cpu; - - cpu_khz = calculate_cpu_khz(); - tsc_khz = cpu_khz; - - if (cpu_has(&boot_cpu_data, X86_FEATURE_CONSTANT_TSC) && - (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)) - cpu_khz = calibrate_cpu(); - - lpj_fine = ((unsigned long)tsc_khz * 1000)/HZ; - - if (unsynchronized_tsc()) - mark_tsc_unstable("TSCs unsynchronized"); - + tsc_init(); if (cpu_has(&boot_cpu_data, X86_FEATURE_RDTSCP)) vgetcpu_mode = VGETCPU_RDTSCP; else vgetcpu_mode = VGETCPU_LSL; - printk(KERN_INFO "time.c: Detected %d.%03d MHz processor.\n", - cpu_khz / 1000, cpu_khz % 1000); - - /* - * Secondary CPUs do not run through tsc_init(), so set up - * all the scale factors for all CPUs, assuming the same - * speed as the bootup CPU. (cpufreq notifiers will fix this - * up if their speed diverges) - */ - for_each_possible_cpu(cpu) - set_cyc2ns_scale(cpu_khz, cpu); - - init_tsc_clocksource(); late_time_init = choose_time_init(); } diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 595f78a2221..94c16bdd569 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -5,8 +5,16 @@ #include #include #include +#include +#include +#include +#include #include +#include +#include +#include +#include unsigned int cpu_khz; /* TSC clocks / usec, not used here */ EXPORT_SYMBOL(cpu_khz); @@ -16,12 +24,12 @@ EXPORT_SYMBOL(tsc_khz); /* * TSC can be unstable due to cpufreq or due to unsynced TSCs */ -int tsc_unstable; +static int tsc_unstable; /* native_sched_clock() is called before tsc_init(), so we must start with the TSC soft disabled to prevent erroneous rdtsc usage on !cpu_has_tsc processors */ -int tsc_disabled = -1; +static int tsc_disabled = -1; /* * Scheduler clock - returns current time in nanosec units. @@ -241,7 +249,7 @@ EXPORT_SYMBOL(recalibrate_cpu_khz); DEFINE_PER_CPU(unsigned long, cyc2ns); -void set_cyc2ns_scale(unsigned long cpu_khz, int cpu) +static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu) { unsigned long long tsc_now, ns_now; unsigned long flags, *scale; @@ -329,3 +337,201 @@ static int __init cpufreq_tsc(void) core_initcall(cpufreq_tsc); #endif /* CONFIG_CPU_FREQ */ + +/* clocksource code */ + +static struct clocksource clocksource_tsc; + +/* + * We compare the TSC to the cycle_last value in the clocksource + * structure to avoid a nasty time-warp. This can be observed in a + * very small window right after one CPU updated cycle_last under + * xtime/vsyscall_gtod lock and the other CPU reads a TSC value which + * is smaller than the cycle_last reference value due to a TSC which + * is slighty behind. This delta is nowhere else observable, but in + * that case it results in a forward time jump in the range of hours + * due to the unsigned delta calculation of the time keeping core + * code, which is necessary to support wrapping clocksources like pm + * timer. + */ +static cycle_t read_tsc(void) +{ + cycle_t ret = (cycle_t)get_cycles(); + + return ret >= clocksource_tsc.cycle_last ? + ret : clocksource_tsc.cycle_last; +} + +static cycle_t __vsyscall_fn vread_tsc(void) +{ + cycle_t ret = (cycle_t)vget_cycles(); + + return ret >= __vsyscall_gtod_data.clock.cycle_last ? + ret : __vsyscall_gtod_data.clock.cycle_last; +} + +static struct clocksource clocksource_tsc = { + .name = "tsc", + .rating = 300, + .read = read_tsc, + .mask = CLOCKSOURCE_MASK(64), + .shift = 22, + .flags = CLOCK_SOURCE_IS_CONTINUOUS | + CLOCK_SOURCE_MUST_VERIFY, +#ifdef CONFIG_X86_64 + .vread = vread_tsc, +#endif +}; + +void mark_tsc_unstable(char *reason) +{ + if (!tsc_unstable) { + tsc_unstable = 1; + printk("Marking TSC unstable due to %s\n", reason); + /* Change only the rating, when not registered */ + if (clocksource_tsc.mult) + clocksource_change_rating(&clocksource_tsc, 0); + else + clocksource_tsc.rating = 0; + } +} + +EXPORT_SYMBOL_GPL(mark_tsc_unstable); + +static int __init dmi_mark_tsc_unstable(const struct dmi_system_id *d) +{ + printk(KERN_NOTICE "%s detected: marking TSC unstable.\n", + d->ident); + tsc_unstable = 1; + return 0; +} + +/* List of systems that have known TSC problems */ +static struct dmi_system_id __initdata bad_tsc_dmi_table[] = { + { + .callback = dmi_mark_tsc_unstable, + .ident = "IBM Thinkpad 380XD", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "IBM"), + DMI_MATCH(DMI_BOARD_NAME, "2635FA0"), + }, + }, + {} +}; + +/* + * Geode_LX - the OLPC CPU has a possibly a very reliable TSC + */ +#ifdef CONFIG_MGEODE_LX +/* RTSC counts during suspend */ +#define RTSC_SUSP 0x100 + +static void __init check_geode_tsc_reliable(void) +{ + unsigned long res_low, res_high; + + rdmsr_safe(MSR_GEODE_BUSCONT_CONF0, &res_low, &res_high); + if (res_low & RTSC_SUSP) + clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY; +} +#else +static inline void check_geode_tsc_reliable(void) { } +#endif + +/* + * Make an educated guess if the TSC is trustworthy and synchronized + * over all CPUs. + */ +__cpuinit int unsynchronized_tsc(void) +{ + if (!cpu_has_tsc || tsc_unstable) + return 1; + +#ifdef CONFIG_SMP + if (apic_is_clustered_box()) + return 1; +#endif + + if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) + return 0; + /* + * Intel systems are normally all synchronized. + * Exceptions must mark TSC as unstable: + */ + if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) { + /* assume multi socket systems are not synchronized: */ + if (num_possible_cpus() > 1) + tsc_unstable = 1; + } + + return tsc_unstable; +} + +static void __init init_tsc_clocksource(void) +{ + clocksource_tsc.mult = clocksource_khz2mult(tsc_khz, + clocksource_tsc.shift); + /* lower the rating if we already know its unstable: */ + if (check_tsc_unstable()) { + clocksource_tsc.rating = 0; + clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS; + } + clocksource_register(&clocksource_tsc); +} + +void __init tsc_init(void) +{ + u64 lpj; + int cpu; + + if (!cpu_has_tsc) + return; + + cpu_khz = calculate_cpu_khz(); + tsc_khz = cpu_khz; + + if (!cpu_khz) { + mark_tsc_unstable("could not calculate TSC khz"); + return; + } + +#ifdef CONFIG_X86_64 + if (cpu_has(&boot_cpu_data, X86_FEATURE_CONSTANT_TSC) && + (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)) + cpu_khz = calibrate_cpu(); +#endif + + lpj = ((u64)tsc_khz * 1000); + do_div(lpj, HZ); + lpj_fine = lpj; + + printk("Detected %lu.%03lu MHz processor.\n", + (unsigned long)cpu_khz / 1000, + (unsigned long)cpu_khz % 1000); + + /* + * Secondary CPUs do not run through tsc_init(), so set up + * all the scale factors for all CPUs, assuming the same + * speed as the bootup CPU. (cpufreq notifiers will fix this + * up if their speed diverges) + */ + for_each_possible_cpu(cpu) + set_cyc2ns_scale(cpu_khz, cpu); + + if (tsc_disabled > 0) + return; + + /* now allow native_sched_clock() to use rdtsc */ + tsc_disabled = 0; + + use_tsc_delay(); + /* Check and install the TSC clocksource */ + dmi_check_system(bad_tsc_dmi_table); + + if (unsynchronized_tsc()) + mark_tsc_unstable("TSCs unsynchronized"); + + check_geode_tsc_reliable(); + init_tsc_clocksource(); +} + diff --git a/arch/x86/kernel/tsc_32.c b/arch/x86/kernel/tsc_32.c deleted file mode 100644 index bbc153d36f8..00000000000 --- a/arch/x86/kernel/tsc_32.c +++ /dev/null @@ -1,188 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -#include "mach_timer.h" - -extern int tsc_unstable; -extern int tsc_disabled; - -/* clock source code */ - -static struct clocksource clocksource_tsc; - -/* - * We compare the TSC to the cycle_last value in the clocksource - * structure to avoid a nasty time-warp issue. This can be observed in - * a very small window right after one CPU updated cycle_last under - * xtime lock and the other CPU reads a TSC value which is smaller - * than the cycle_last reference value due to a TSC which is slighty - * behind. This delta is nowhere else observable, but in that case it - * results in a forward time jump in the range of hours due to the - * unsigned delta calculation of the time keeping core code, which is - * necessary to support wrapping clocksources like pm timer. - */ -static cycle_t read_tsc(void) -{ - cycle_t ret; - - rdtscll(ret); - - return ret >= clocksource_tsc.cycle_last ? - ret : clocksource_tsc.cycle_last; -} - -static struct clocksource clocksource_tsc = { - .name = "tsc", - .rating = 300, - .read = read_tsc, - .mask = CLOCKSOURCE_MASK(64), - .mult = 0, /* to be set */ - .shift = 22, - .flags = CLOCK_SOURCE_IS_CONTINUOUS | - CLOCK_SOURCE_MUST_VERIFY, -}; - -void mark_tsc_unstable(char *reason) -{ - if (!tsc_unstable) { - tsc_unstable = 1; - printk("Marking TSC unstable due to: %s.\n", reason); - /* Can be called before registration */ - if (clocksource_tsc.mult) - clocksource_change_rating(&clocksource_tsc, 0); - else - clocksource_tsc.rating = 0; - } -} -EXPORT_SYMBOL_GPL(mark_tsc_unstable); - -static int __init dmi_mark_tsc_unstable(const struct dmi_system_id *d) -{ - printk(KERN_NOTICE "%s detected: marking TSC unstable.\n", - d->ident); - tsc_unstable = 1; - return 0; -} - -/* List of systems that have known TSC problems */ -static struct dmi_system_id __initdata bad_tsc_dmi_table[] = { - { - .callback = dmi_mark_tsc_unstable, - .ident = "IBM Thinkpad 380XD", - .matches = { - DMI_MATCH(DMI_BOARD_VENDOR, "IBM"), - DMI_MATCH(DMI_BOARD_NAME, "2635FA0"), - }, - }, - {} -}; - -/* - * Make an educated guess if the TSC is trustworthy and synchronized - * over all CPUs. - */ -__cpuinit int unsynchronized_tsc(void) -{ - if (!cpu_has_tsc || tsc_unstable) - return 1; - - /* Anything with constant TSC should be synchronized */ - if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) - return 0; - - /* - * Intel systems are normally all synchronized. - * Exceptions must mark TSC as unstable: - */ - if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) { - /* assume multi socket systems are not synchronized: */ - if (num_possible_cpus() > 1) - tsc_unstable = 1; - } - return tsc_unstable; -} - -/* - * Geode_LX - the OLPC CPU has a possibly a very reliable TSC - */ -#ifdef CONFIG_MGEODE_LX -/* RTSC counts during suspend */ -#define RTSC_SUSP 0x100 - -static void __init check_geode_tsc_reliable(void) -{ - unsigned long res_low, res_high; - - rdmsr_safe(MSR_GEODE_BUSCONT_CONF0, &res_low, &res_high); - if (res_low & RTSC_SUSP) - clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY; -} -#else -static inline void check_geode_tsc_reliable(void) { } -#endif - - -void __init tsc_init(void) -{ - int cpu; - u64 lpj; - - if (!cpu_has_tsc || tsc_disabled > 0) - return; - - cpu_khz = calculate_cpu_khz(); - tsc_khz = cpu_khz; - - if (!cpu_khz) { - mark_tsc_unstable("could not calculate TSC khz"); - return; - } - - lpj = ((u64)tsc_khz * 1000); - do_div(lpj, HZ); - lpj_fine = lpj; - - /* now allow native_sched_clock() to use rdtsc */ - tsc_disabled = 0; - - printk("Detected %lu.%03lu MHz processor.\n", - (unsigned long)cpu_khz / 1000, - (unsigned long)cpu_khz % 1000); - - /* - * Secondary CPUs do not run through tsc_init(), so set up - * all the scale factors for all CPUs, assuming the same - * speed as the bootup CPU. (cpufreq notifiers will fix this - * up if their speed diverges) - */ - for_each_possible_cpu(cpu) - set_cyc2ns_scale(cpu_khz, cpu); - - use_tsc_delay(); - - /* Check and install the TSC clocksource */ - dmi_check_system(bad_tsc_dmi_table); - - unsynchronized_tsc(); - check_geode_tsc_reliable(); - clocksource_tsc.mult = clocksource_khz2mult(tsc_khz, - clocksource_tsc.shift); - /* lower the rating if we already know its unstable: */ - if (check_tsc_unstable()) { - clocksource_tsc.rating = 0; - clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS; - } - clocksource_register(&clocksource_tsc); -} diff --git a/arch/x86/kernel/tsc_64.c b/arch/x86/kernel/tsc_64.c deleted file mode 100644 index 80a274b018c..00000000000 --- a/arch/x86/kernel/tsc_64.c +++ /dev/null @@ -1,106 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -extern int tsc_unstable; -extern int tsc_disabled; - -/* - * Make an educated guess if the TSC is trustworthy and synchronized - * over all CPUs. - */ -__cpuinit int unsynchronized_tsc(void) -{ - if (tsc_unstable) - return 1; - -#ifdef CONFIG_SMP - if (apic_is_clustered_box()) - return 1; -#endif - - if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) - return 0; - - /* Assume multi socket systems are not synchronized */ - return num_present_cpus() > 1; -} - -static struct clocksource clocksource_tsc; - -/* - * We compare the TSC to the cycle_last value in the clocksource - * structure to avoid a nasty time-warp. This can be observed in a - * very small window right after one CPU updated cycle_last under - * xtime/vsyscall_gtod lock and the other CPU reads a TSC value which - * is smaller than the cycle_last reference value due to a TSC which - * is slighty behind. This delta is nowhere else observable, but in - * that case it results in a forward time jump in the range of hours - * due to the unsigned delta calculation of the time keeping core - * code, which is necessary to support wrapping clocksources like pm - * timer. - */ -static cycle_t read_tsc(void) -{ - cycle_t ret = (cycle_t)get_cycles(); - - return ret >= clocksource_tsc.cycle_last ? - ret : clocksource_tsc.cycle_last; -} - -static cycle_t __vsyscall_fn vread_tsc(void) -{ - cycle_t ret = (cycle_t)vget_cycles(); - - return ret >= __vsyscall_gtod_data.clock.cycle_last ? - ret : __vsyscall_gtod_data.clock.cycle_last; -} - -static struct clocksource clocksource_tsc = { - .name = "tsc", - .rating = 300, - .read = read_tsc, - .mask = CLOCKSOURCE_MASK(64), - .shift = 22, - .flags = CLOCK_SOURCE_IS_CONTINUOUS | - CLOCK_SOURCE_MUST_VERIFY, - .vread = vread_tsc, -}; - -void mark_tsc_unstable(char *reason) -{ - if (!tsc_unstable) { - tsc_unstable = 1; - printk("Marking TSC unstable due to %s\n", reason); - /* Change only the rating, when not registered */ - if (clocksource_tsc.mult) - clocksource_change_rating(&clocksource_tsc, 0); - else - clocksource_tsc.rating = 0; - } -} -EXPORT_SYMBOL_GPL(mark_tsc_unstable); - -void __init init_tsc_clocksource(void) -{ - if (tsc_disabled > 0) - return; - - clocksource_tsc.mult = clocksource_khz2mult(tsc_khz, - clocksource_tsc.shift); - if (check_tsc_unstable()) - clocksource_tsc.rating = 0; - - clocksource_register(&clocksource_tsc); -} diff --git a/include/asm-x86/apic.h b/include/asm-x86/apic.h index a29807737d3..4e2c1e517f0 100644 --- a/include/asm-x86/apic.h +++ b/include/asm-x86/apic.h @@ -121,12 +121,17 @@ extern void enable_NMI_through_LVT0(void); */ #ifdef CONFIG_X86_64 extern void early_init_lapic_mapping(void); +extern int apic_is_clustered_box(void); +#else +static inline int apic_is_clustered_box(void) +{ + return 0; +} #endif extern u8 setup_APIC_eilvt_mce(u8 vector, u8 msg_type, u8 mask); extern u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask); -extern int apic_is_clustered_box(void); #else /* !CONFIG_X86_LOCAL_APIC */ static inline void lapic_shutdown(void) { } diff --git a/include/asm-x86/delay.h b/include/asm-x86/delay.h index 409a649204a..bb80880c834 100644 --- a/include/asm-x86/delay.h +++ b/include/asm-x86/delay.h @@ -26,6 +26,10 @@ extern void __delay(unsigned long loops); ((n) > 20000 ? __bad_ndelay() : __const_udelay((n) * 5ul)) : \ __ndelay(n)) +#ifdef CONFIG_X86_32 void use_tsc_delay(void); +#else +#define use_tsc_delay() {} +#endif #endif /* _ASM_X86_DELAY_H */ diff --git a/include/asm-x86/time.h b/include/asm-x86/time.h index bce72d7a958..a17fa473e91 100644 --- a/include/asm-x86/time.h +++ b/include/asm-x86/time.h @@ -56,4 +56,6 @@ static inline int native_set_wallclock(unsigned long nowtime) #endif /* CONFIG_PARAVIRT */ +extern unsigned long __init calibrate_cpu(void); + #endif diff --git a/include/asm-x86/tsc.h b/include/asm-x86/tsc.h index 761054d7fef..cb6f6ee45b8 100644 --- a/include/asm-x86/tsc.h +++ b/include/asm-x86/tsc.h @@ -48,7 +48,6 @@ static __always_inline cycles_t vget_cycles(void) extern void tsc_init(void); extern void mark_tsc_unstable(char *reason); extern int unsynchronized_tsc(void); -extern void init_tsc_clocksource(void); int check_tsc_unstable(void); /* -- cgit v1.2.3-70-g09d2 From e93ef949fd9a3f237aedfb8e64414b28980530b8 Mon Sep 17 00:00:00 2001 From: Alok Kataria Date: Tue, 1 Jul 2008 11:43:36 -0700 Subject: x86: rename paravirtualized TSC functions Rename the paravirtualized calculate_cpu_khz to calibrate_tsc. In all cases, we actually calibrate_tsc and use that as the cpu_khz value. Signed-off-by: Alok N Kataria Signed-off-by: Dan Hecht Cc: Dan Hecht Signed-off-by: Ingo Molnar --- arch/x86/kernel/paravirt.c | 2 +- arch/x86/kernel/tsc.c | 18 +++++++----------- arch/x86/kernel/vmi_32.c | 2 +- arch/x86/kernel/vmiclock_32.c | 4 ++-- arch/x86/lguest/boot.c | 4 ++-- arch/x86/xen/enlighten.c | 2 +- arch/x86/xen/time.c | 4 ++-- arch/x86/xen/xen-ops.h | 2 +- include/asm-x86/paravirt.h | 4 ++-- include/asm-x86/timer.h | 4 ++-- include/asm-x86/vmi_time.h | 2 +- 11 files changed, 22 insertions(+), 26 deletions(-) (limited to 'arch/x86/kernel/tsc.c') diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index e7e5652f65b..e0f571d58c1 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -285,7 +285,7 @@ struct pv_time_ops pv_time_ops = { .get_wallclock = native_get_wallclock, .set_wallclock = native_set_wallclock, .sched_clock = native_sched_clock, - .get_cpu_khz = native_calculate_cpu_khz, + .get_tsc_khz = native_calibrate_tsc, }; struct pv_irq_ops pv_irq_ops = { diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 94c16bdd569..3c36f92160c 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -123,9 +123,9 @@ static u64 __init tsc_read_refs(u64 *pm, u64 *hpet) } /** - * tsc_calibrate - calibrate the tsc on boot + * native_calibrate_tsc - calibrate the tsc on boot */ -static unsigned int __init tsc_calibrate(void) +unsigned long native_calibrate_tsc(void) { unsigned long flags; u64 tsc1, tsc2, tr1, tr2, delta, pm1, pm2, hpet1, hpet2; @@ -195,10 +195,6 @@ out: return tsc_khz_val; } -unsigned long native_calculate_cpu_khz(void) -{ - return tsc_calibrate(); -} #ifdef CONFIG_X86_32 /* Only called from the Powernow K7 cpu freq driver */ @@ -208,8 +204,8 @@ int recalibrate_cpu_khz(void) unsigned long cpu_khz_old = cpu_khz; if (cpu_has_tsc) { - cpu_khz = calculate_cpu_khz(); - tsc_khz = cpu_khz; + tsc_khz = calibrate_tsc(); + cpu_khz = tsc_khz; cpu_data(0).loops_per_jiffy = cpufreq_scale(cpu_data(0).loops_per_jiffy, cpu_khz_old, cpu_khz); @@ -487,10 +483,10 @@ void __init tsc_init(void) if (!cpu_has_tsc) return; - cpu_khz = calculate_cpu_khz(); - tsc_khz = cpu_khz; + tsc_khz = calibrate_tsc(); + cpu_khz = tsc_khz; - if (!cpu_khz) { + if (!tsc_khz) { mark_tsc_unstable("could not calculate TSC khz"); return; } diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c index 946bf13b44a..b15346092b7 100644 --- a/arch/x86/kernel/vmi_32.c +++ b/arch/x86/kernel/vmi_32.c @@ -932,7 +932,7 @@ static inline int __init activate_vmi(void) pv_apic_ops.setup_secondary_clock = vmi_time_ap_init; #endif pv_time_ops.sched_clock = vmi_sched_clock; - pv_time_ops.get_cpu_khz = vmi_cpu_khz; + pv_time_ops.get_tsc_khz = vmi_tsc_khz; /* We have true wallclock functions; disable CMOS clock sync */ no_sync_cmos_clock = 1; diff --git a/arch/x86/kernel/vmiclock_32.c b/arch/x86/kernel/vmiclock_32.c index ba7d19e102b..6953859fe28 100644 --- a/arch/x86/kernel/vmiclock_32.c +++ b/arch/x86/kernel/vmiclock_32.c @@ -69,8 +69,8 @@ unsigned long long vmi_sched_clock(void) return cycles_2_ns(vmi_timer_ops.get_cycle_counter(VMI_CYCLES_AVAILABLE)); } -/* paravirt_ops.get_cpu_khz = vmi_cpu_khz */ -unsigned long vmi_cpu_khz(void) +/* paravirt_ops.get_tsc_khz = vmi_tsc_khz */ +unsigned long vmi_tsc_khz(void) { unsigned long long khz; khz = vmi_timer_ops.get_cycle_frequency(); diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index e72cf0793fb..50dad44fb54 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -607,7 +607,7 @@ static unsigned long lguest_get_wallclock(void) * what speed it runs at, or 0 if it's unusable as a reliable clock source. * This matches what we want here: if we return 0 from this function, the x86 * TSC clock will give up and not register itself. */ -static unsigned long lguest_cpu_khz(void) +static unsigned long lguest_tsc_khz(void) { return lguest_data.tsc_khz; } @@ -998,7 +998,7 @@ __init void lguest_init(void) /* time operations */ pv_time_ops.get_wallclock = lguest_get_wallclock; pv_time_ops.time_init = lguest_time_init; - pv_time_ops.get_cpu_khz = lguest_cpu_khz; + pv_time_ops.get_tsc_khz = lguest_tsc_khz; /* Now is a good time to look at the implementations of these functions * before returning to the rest of lguest_init(). */ diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 3b980831602..dcd4e51f2f1 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1062,7 +1062,7 @@ static const struct pv_time_ops xen_time_ops __initdata = { .set_wallclock = xen_set_wallclock, .get_wallclock = xen_get_wallclock, - .get_cpu_khz = xen_cpu_khz, + .get_tsc_khz = xen_tsc_khz, .sched_clock = xen_sched_clock, }; diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index 64f0038b955..685b77470fc 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c @@ -197,8 +197,8 @@ unsigned long long xen_sched_clock(void) } -/* Get the CPU speed from Xen */ -unsigned long xen_cpu_khz(void) +/* Get the TSC speed from Xen */ +unsigned long xen_tsc_khz(void) { u64 xen_khz = 1000000ULL << 32; const struct pvclock_vcpu_time_info *info = diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 9a055592a30..d852ddbb344 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -32,7 +32,7 @@ void __init xen_build_dynamic_phys_to_machine(void); void xen_setup_timer(int cpu); void xen_setup_cpu_clockevents(void); -unsigned long xen_cpu_khz(void); +unsigned long xen_tsc_khz(void); void __init xen_time_init(void); unsigned long xen_get_wallclock(void); int xen_set_wallclock(unsigned long time); diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h index 6d8966f9d19..ef5e8ec6a6a 100644 --- a/include/asm-x86/paravirt.h +++ b/include/asm-x86/paravirt.h @@ -84,7 +84,7 @@ struct pv_time_ops { int (*set_wallclock)(unsigned long); unsigned long long (*sched_clock)(void); - unsigned long (*get_cpu_khz)(void); + unsigned long (*get_tsc_khz)(void); }; struct pv_cpu_ops { @@ -779,7 +779,7 @@ static inline unsigned long long paravirt_sched_clock(void) { return PVOP_CALL0(unsigned long long, pv_time_ops.sched_clock); } -#define calculate_cpu_khz() (pv_time_ops.get_cpu_khz()) +#define calibrate_tsc() (pv_time_ops.get_tsc_khz()) static inline unsigned long long paravirt_read_pmc(int counter) { diff --git a/include/asm-x86/timer.h b/include/asm-x86/timer.h index 4f6fcb050c1..fb2a4ddddf3 100644 --- a/include/asm-x86/timer.h +++ b/include/asm-x86/timer.h @@ -7,14 +7,14 @@ #define TICK_SIZE (tick_nsec / 1000) unsigned long long native_sched_clock(void); -unsigned long native_calculate_cpu_khz(void); +unsigned long native_calibrate_tsc(void); extern int timer_ack; extern int no_timer_check; extern int recalibrate_cpu_khz(void); #ifndef CONFIG_PARAVIRT -#define calculate_cpu_khz() native_calculate_cpu_khz() +#define calibrate_tsc() native_calibrate_tsc() #endif /* Accelerators for sched_clock() diff --git a/include/asm-x86/vmi_time.h b/include/asm-x86/vmi_time.h index 47818813032..c3118c38515 100644 --- a/include/asm-x86/vmi_time.h +++ b/include/asm-x86/vmi_time.h @@ -50,7 +50,7 @@ extern void __init vmi_time_init(void); extern unsigned long vmi_get_wallclock(void); extern int vmi_set_wallclock(unsigned long now); extern unsigned long long vmi_sched_clock(void); -extern unsigned long vmi_cpu_khz(void); +extern unsigned long vmi_tsc_khz(void); #ifdef CONFIG_X86_LOCAL_APIC extern void __devinit vmi_time_bsp_init(void); -- cgit v1.2.3-70-g09d2 From 0a4d8a472f645d99f86303db1462b64e371b090d Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 24 Jun 2008 09:34:08 -0300 Subject: x86: provide delay loop for x86_64. This is for consistency with i386. We call use_tsc_delay() at tsc initialization for x86_64, so we'll be always using it. Signed-off-by: Glauber Costa Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar --- arch/x86/kernel/tsc.c | 1 + arch/x86/lib/delay_64.c | 44 ++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 41 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel/tsc.c') diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 3c36f92160c..4a775d00195 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -513,6 +513,7 @@ void __init tsc_init(void) */ for_each_possible_cpu(cpu) set_cyc2ns_scale(cpu_khz, cpu); + use_tsc_delay(); if (tsc_disabled > 0) return; diff --git a/arch/x86/lib/delay_64.c b/arch/x86/lib/delay_64.c index 4c441be9264..d0326d07c84 100644 --- a/arch/x86/lib/delay_64.c +++ b/arch/x86/lib/delay_64.c @@ -22,13 +22,28 @@ #include #endif -int __devinit read_current_timer(unsigned long *timer_value) +/* simple loop based delay: */ +static void delay_loop(unsigned long loops) { - rdtscll(*timer_value); - return 0; + asm volatile( + " test %0,%0 \n" + " jz 3f \n" + " jmp 1f \n" + + ".align 16 \n" + "1: jmp 2f \n" + + ".align 16 \n" + "2: dec %0 \n" + " jnz 2b \n" + "3: dec %0 \n" + + : /* we don't need output */ + :"a" (loops) + ); } -void __delay(unsigned long loops) +static void delay_tsc(unsigned long loops) { unsigned bclock, now; int cpu; @@ -63,6 +78,27 @@ void __delay(unsigned long loops) } preempt_enable(); } + +static void (*delay_fn)(unsigned long) = delay_loop; + +void use_tsc_delay(void) +{ + delay_fn = delay_tsc; +} + +int __devinit read_current_timer(unsigned long *timer_value) +{ + if (delay_fn == delay_tsc) { + rdtscll(*timer_value); + return 0; + } + return -1; +} + +void __delay(unsigned long loops) +{ + delay_fn(loops); +} EXPORT_SYMBOL(__delay); inline void __const_udelay(unsigned long xloops) -- cgit v1.2.3-70-g09d2 From e54afe38630e3b577968428f48ed8ef1e13a2a15 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Thu, 10 Jul 2008 14:01:47 -0300 Subject: x86: remove duplicate call to use_tsc_delay Integration generated a duplicate call to use_tsc_delay. Particularly, the one that is done before we check for general tsc usability seems wrong. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/tsc.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86/kernel/tsc.c') diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 4a775d00195..3c36f92160c 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -513,7 +513,6 @@ void __init tsc_init(void) */ for_each_possible_cpu(cpu) set_cyc2ns_scale(cpu_khz, cpu); - use_tsc_delay(); if (tsc_disabled > 0) return; -- cgit v1.2.3-70-g09d2 From 431ceb83f703a343bdd14350480a2224fa4bfedf Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 15 Jul 2008 22:08:04 +0200 Subject: x86: fix TSC build error on 32bit Dave Hansen reported a build error on 32bit which went unnoticed as newer gcc versions seem to optimize unused static functions away before compiling them. Make vread_tsc() depend on CONFIG_X86_64 Signed-off-by: Thomas Gleixner --- arch/x86/kernel/tsc.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86/kernel/tsc.c') diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 3c36f92160c..7603c055390 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -358,6 +358,7 @@ static cycle_t read_tsc(void) ret : clocksource_tsc.cycle_last; } +#ifdef CONFIG_X86_64 static cycle_t __vsyscall_fn vread_tsc(void) { cycle_t ret = (cycle_t)vget_cycles(); @@ -365,6 +366,7 @@ static cycle_t __vsyscall_fn vread_tsc(void) return ret >= __vsyscall_gtod_data.clock.cycle_last ? ret : __vsyscall_gtod_data.clock.cycle_last; } +#endif static struct clocksource clocksource_tsc = { .name = "tsc", -- cgit v1.2.3-70-g09d2