From d554d9a4295dd0595d12eeccbc55d1f495b15176 Mon Sep 17 00:00:00 2001 From: Marcin Slusarz Date: Mon, 11 Aug 2008 00:07:44 +0200 Subject: x86, tsc: fix section mismatch warning WARNING: vmlinux.o(.text+0x7950): Section mismatch in reference from the function native_calibrate_tsc() to the function .init.text:tsc_read_refs() The function native_calibrate_tsc() references the function __init tsc_read_refs(). This is often because native_calibrate_tsc lacks a __init annotation or the annotation of tsc_read_refs is wrong. tsc_read_refs is called from native_calibrate_tsc which is not __init and native_calibrate_tsc cannot be marked __init Signed-off-by: Marcin Slusarz Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar --- arch/x86/kernel/tsc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel/tsc.c') diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 7603c055390..46af7167673 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -104,7 +104,7 @@ __setup("notsc", notsc_setup); /* * Read TSC and the reference counters. Take care of SMI disturbance */ -static u64 __init tsc_read_refs(u64 *pm, u64 *hpet) +static u64 tsc_read_refs(u64 *pm, u64 *hpet) { u64 t1, t2; int i; -- cgit v1.2.3-70-g09d2 From 060700b571717c997a2ea5e2049b848fa248ee13 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 24 Aug 2008 11:52:06 -0700 Subject: x86: do not enable TSC notifier if we don't need it Impact: crash on non-TSC-equipped CPUs Don't enable the TSC notifier if we *either*: 1. don't have a CPU, or 2. have a CPU with constant TSC. In either of those cases, the notifier is either damaging (1) or useless(2). From: Linus Torvalds Signed-off-by: H. Peter Anvin --- arch/x86/kernel/tsc.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/x86/kernel/tsc.c') diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 46af7167673..9bed5cae4bd 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -325,6 +325,10 @@ static struct notifier_block time_cpufreq_notifier_block = { static int __init cpufreq_tsc(void) { + if (!cpu_has_tsc) + return 0; + if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) + return 0; cpufreq_register_notifier(&time_cpufreq_notifier_block, CPUFREQ_TRANSITION_NOTIFIER); return 0; -- cgit v1.2.3-70-g09d2 From 52a8968ce95da8469ba0a9b3e4010fe31caf77a3 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 25 Aug 2008 13:35:06 +0200 Subject: x86: fix cpufreq + sched_clock() regression I noticed that my sched_clock() was slow on a number of machine, so I started looking at cpufreq. The below seems to fix the problem for me. Signed-off-by: Ingo Molnar --- arch/x86/kernel/tsc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel/tsc.c') diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 9bed5cae4bd..8e786b0d665 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -314,7 +314,7 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, mark_tsc_unstable("cpufreq changes"); } - set_cyc2ns_scale(tsc_khz_ref, freq->cpu); + set_cyc2ns_scale(tsc_khz, freq->cpu); return 0; } -- cgit v1.2.3-70-g09d2 From fbb16e243887332dd5754e48ffe5b963378f3cd2 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 3 Sep 2008 00:54:47 +0200 Subject: [x86] Fix TSC calibration issues Larry Finger reported at http://lkml.org/lkml/2008/9/1/90: An ancient laptop of mine started throwing errors from b43legacy when I started using 2.6.27 on it. This has been bisected to commit bfc0f59 "x86: merge tsc calibration". The unification of the TSC code adopted mostly the 64bit code, which prefers PMTIMER/HPET over the PIT calibration. Larrys system has an AMD K6 CPU. Such systems are known to have PMTIMER incarnations which run at double speed. This results in a miscalibration of the TSC by factor 0.5. So the resulting calibrated CPU/TSC speed is half of the real CPU speed, which means that the TSC based delay loop will run half the time it should run. That might explain why the b43legacy driver went berserk. On the other hand we know about systems, where the PIT based calibration results in random crap due to heavy SMI/SMM disturbance. On those systems the PMTIMER/HPET based calibration logic with SMI detection shows better results. According to Alok also virtualized systems suffer from the PIT calibration method. The solution is to use a more wreckage aware aproach than the current either/or decision. 1) reimplement the retry loop which was dropped from the 32bit code during the merge. It repeats the calibration and selects the lowest frequency value as this is probably the closest estimate to the real frequency 2) Monitor the delta of the TSC values in the delay loop which waits for the PIT counter to reach zero. If the maximum value is significantly different from the minimum, then we have a pretty safe indicator that the loop was disturbed by an SMI. 3) keep the pmtimer/hpet reference as a backup solution for systems where the SMI disturbance is a permanent point of failure for PIT based calibration 4) do the loop iteration for both methods, record the lowest value and decide after all iterations finished. 5) Set a clear preference to PIT based calibration when the result makes sense. The implementation does the reference calibration based on HPET/PMTIMER around the delay, which is necessary for the PIT anyway, but keeps separate TSC values to ensure the "independency" of the resulting calibration values. Tested on various 32bit/64bit machines including Geode 266Mhz, AMD K6 (affected machine with a double speed pmtimer which I grabbed out of the dump), Pentium class machines and AMD/Intel 64 bit boxen. Bisected-by: Larry Finger Signed-off-by: Thomas Gleixner Tested-by: Larry Finger Signed-off-by: Linus Torvalds --- arch/x86/kernel/tsc.c | 235 ++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 181 insertions(+), 54 deletions(-) (limited to 'arch/x86/kernel/tsc.c') diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 8e786b0d665..ac79bd143da 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -127,75 +127,202 @@ static u64 tsc_read_refs(u64 *pm, u64 *hpet) */ unsigned long native_calibrate_tsc(void) { - unsigned long flags; - u64 tsc1, tsc2, tr1, tr2, delta, pm1, pm2, hpet1, hpet2; - int hpet = is_hpet_enabled(); - unsigned int tsc_khz_val = 0; + u64 tsc1, tsc2, tr1, tr2, tsc, delta, pm1, pm2, hpet1, hpet2; + unsigned long tsc_pit_min = ULONG_MAX, tsc_ref_min = ULONG_MAX; + unsigned long flags, tscmin, tscmax; + int hpet = is_hpet_enabled(), pitcnt, i; - local_irq_save(flags); - - tsc1 = tsc_read_refs(&pm1, hpet ? &hpet1 : NULL); - - outb((inb(0x61) & ~0x02) | 0x01, 0x61); - - outb(0xb0, 0x43); - outb((CLOCK_TICK_RATE / (1000 / 50)) & 0xff, 0x42); - outb((CLOCK_TICK_RATE / (1000 / 50)) >> 8, 0x42); - tr1 = get_cycles(); - while ((inb(0x61) & 0x20) == 0); - tr2 = get_cycles(); - - tsc2 = tsc_read_refs(&pm2, hpet ? &hpet2 : NULL); - - local_irq_restore(flags); + /* + * Run 5 calibration loops to get the lowest frequency value + * (the best estimate). We use two different calibration modes + * here: + * + * 1) PIT loop. We set the PIT Channel 2 to oneshot mode and + * load a timeout of 50ms. We read the time right after we + * started the timer and wait until the PIT count down reaches + * zero. In each wait loop iteration we read the TSC and check + * the delta to the previous read. We keep track of the min + * and max values of that delta. The delta is mostly defined + * by the IO time of the PIT access, so we can detect when a + * SMI/SMM disturbance happend between the two reads. If the + * maximum time is significantly larger than the minimum time, + * then we discard the result and have another try. + * + * 2) Reference counter. If available we use the HPET or the + * PMTIMER as a reference to check the sanity of that value. + * We use separate TSC readouts and check inside of the + * reference read for a SMI/SMM disturbance. We dicard + * disturbed values here as well. We do that around the PIT + * calibration delay loop as we have to wait for a certain + * amount of time anyway. + */ + for (i = 0; i < 5; i++) { + + tscmin = ULONG_MAX; + tscmax = 0; + pitcnt = 0; + + local_irq_save(flags); + + /* + * Read the start value and the reference count of + * hpet/pmtimer when available: + */ + tsc1 = tsc_read_refs(&pm1, hpet ? &hpet1 : NULL); + + /* Set the Gate high, disable speaker */ + outb((inb(0x61) & ~0x02) | 0x01, 0x61); + + /* + * Setup CTC channel 2* for mode 0, (interrupt on terminal + * count mode), binary count. Set the latch register to 50ms + * (LSB then MSB) to begin countdown. + * + * Some devices need a delay here. + */ + outb(0xb0, 0x43); + outb((CLOCK_TICK_RATE / (1000 / 50)) & 0xff, 0x42); + outb((CLOCK_TICK_RATE / (1000 / 50)) >> 8, 0x42); + + tsc = tr1 = tr2 = get_cycles(); + + while ((inb(0x61) & 0x20) == 0) { + tr2 = get_cycles(); + delta = tr2 - tsc; + tsc = tr2; + if ((unsigned int) delta < tscmin) + tscmin = (unsigned int) delta; + if ((unsigned int) delta > tscmax) + tscmax = (unsigned int) delta; + pitcnt++; + } + + /* + * We waited at least 50ms above. Now read + * pmtimer/hpet reference again + */ + tsc2 = tsc_read_refs(&pm2, hpet ? &hpet2 : NULL); + + local_irq_restore(flags); + + /* + * Sanity checks: + * + * If we were not able to read the PIT more than 5000 + * times, then we have been hit by a massive SMI + * + * If the maximum is 10 times larger than the minimum, + * then we got hit by an SMI as well. + */ + if (pitcnt > 5000 && tscmax < 10 * tscmin) { + + /* Calculate the PIT value */ + delta = tr2 - tr1; + do_div(delta, 50); + + /* We take the smallest value into account */ + tsc_pit_min = min(tsc_pit_min, (unsigned long) delta); + } + + /* hpet or pmtimer available ? */ + if (!hpet && !pm1 && !pm2) + continue; + + /* Check, whether the sampling was disturbed by an SMI */ + if (tsc1 == ULLONG_MAX || tsc2 == ULLONG_MAX) + continue; + + tsc2 = (tsc2 - tsc1) * 1000000LL; + + if (hpet) { + if (hpet2 < hpet1) + hpet2 += 0x100000000ULL; + hpet2 -= hpet1; + tsc1 = ((u64)hpet2 * hpet_readl(HPET_PERIOD)); + do_div(tsc1, 1000000); + } else { + if (pm2 < pm1) + pm2 += (u64)ACPI_PM_OVRRUN; + pm2 -= pm1; + tsc1 = pm2 * 1000000000LL; + do_div(tsc1, PMTMR_TICKS_PER_SEC); + } + + do_div(tsc2, tsc1); + tsc_ref_min = min(tsc_ref_min, (unsigned long) tsc2); + } /* - * Preset the result with the raw and inaccurate PIT - * calibration value + * Now check the results. */ - delta = (tr2 - tr1); - do_div(delta, 50); - tsc_khz_val = delta; + if (tsc_pit_min == ULONG_MAX) { + /* PIT gave no useful value */ + printk(KERN_WARNING "TSC: PIT calibration failed due to " + "SMI disturbance.\n"); + + /* We don't have an alternative source, disable TSC */ + if (!hpet && !pm1 && !pm2) { + printk("TSC: No reference (HPET/PMTIMER) available\n"); + return 0; + } + + /* The alternative source failed as well, disable TSC */ + if (tsc_ref_min == ULONG_MAX) { + printk(KERN_WARNING "TSC: HPET/PMTIMER calibration " + "failed due to SMI disturbance.\n"); + return 0; + } + + /* Use the alternative source */ + printk(KERN_INFO "TSC: using %s reference calibration\n", + hpet ? "HPET" : "PMTIMER"); + + return tsc_ref_min; + } - /* hpet or pmtimer available ? */ + /* We don't have an alternative source, use the PIT calibration value */ if (!hpet && !pm1 && !pm2) { - printk(KERN_INFO "TSC calibrated against PIT\n"); - goto out; + printk(KERN_INFO "TSC: Using PIT calibration value\n"); + return tsc_pit_min; } - /* Check, whether the sampling was disturbed by an SMI */ - if (tsc1 == ULLONG_MAX || tsc2 == ULLONG_MAX) { - printk(KERN_WARNING "TSC calibration disturbed by SMI, " - "using PIT calibration result\n"); - goto out; + /* The alternative source failed, use the PIT calibration value */ + if (tsc_ref_min == ULONG_MAX) { + printk(KERN_WARNING "TSC: HPET/PMTIMER calibration failed due " + "to SMI disturbance. Using PIT calibration\n"); + return tsc_pit_min; } - tsc2 = (tsc2 - tsc1) * 1000000LL; - - if (hpet) { - printk(KERN_INFO "TSC calibrated against HPET\n"); - if (hpet2 < hpet1) - hpet2 += 0x100000000ULL; - hpet2 -= hpet1; - tsc1 = ((u64)hpet2 * hpet_readl(HPET_PERIOD)); - do_div(tsc1, 1000000); - } else { - printk(KERN_INFO "TSC calibrated against PM_TIMER\n"); - if (pm2 < pm1) - pm2 += (u64)ACPI_PM_OVRRUN; - pm2 -= pm1; - tsc1 = pm2 * 1000000000LL; - do_div(tsc1, PMTMR_TICKS_PER_SEC); + /* Check the reference deviation */ + delta = ((u64) tsc_pit_min) * 100; + do_div(delta, tsc_ref_min); + + /* + * If both calibration results are inside a 5% window, the we + * use the lower frequency of those as it is probably the + * closest estimate. + */ + if (delta >= 95 && delta <= 105) { + printk(KERN_INFO "TSC: PIT calibration confirmed by %s.\n", + hpet ? "HPET" : "PMTIMER"); + printk(KERN_INFO "TSC: using %s calibration value\n", + tsc_pit_min <= tsc_ref_min ? "PIT" : + hpet ? "HPET" : "PMTIMER"); + return tsc_pit_min <= tsc_ref_min ? tsc_pit_min : tsc_ref_min; } - do_div(tsc2, tsc1); - tsc_khz_val = tsc2; + printk(KERN_WARNING "TSC: PIT calibration deviates from %s: %lu %lu.\n", + hpet ? "HPET" : "PMTIMER", tsc_pit_min, tsc_ref_min); -out: - return tsc_khz_val; + /* + * The calibration values differ too much. In doubt, we use + * the PIT value as we know that there are PMTIMERs around + * running at double speed. + */ + printk(KERN_INFO "TSC: Using PIT calibration value\n"); + return tsc_pit_min; } - #ifdef CONFIG_X86_32 /* Only called from the Powernow K7 cpu freq driver */ int recalibrate_cpu_khz(void) -- cgit v1.2.3-70-g09d2 From ec0c15afb41fd9ad45b53468b60db50170e22346 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 3 Sep 2008 07:30:13 -0700 Subject: Split up PIT part of TSC calibration from native_calibrate_tsc The TSC calibration function is still very complicated, but this makes it at least a little bit less so by moving the PIT part out into a helper function of its own. Tested-by: Larry Finger Signed-of-by: Linus Torvalds --- arch/x86/kernel/tsc.c | 132 +++++++++++++++++++++++++++----------------------- 1 file changed, 71 insertions(+), 61 deletions(-) (limited to 'arch/x86/kernel/tsc.c') diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index ac79bd143da..346cae5ac42 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -122,15 +122,75 @@ static u64 tsc_read_refs(u64 *pm, u64 *hpet) return ULLONG_MAX; } +/* + * Try to calibrate the TSC against the Programmable + * Interrupt Timer and return the frequency of the TSC + * in kHz. + * + * Return ULONG_MAX on failure to calibrate. + */ +static unsigned long pit_calibrate_tsc(void) +{ + u64 tsc, t1, t2, delta; + unsigned long tscmin, tscmax; + int pitcnt; + + /* Set the Gate high, disable speaker */ + outb((inb(0x61) & ~0x02) | 0x01, 0x61); + + /* + * Setup CTC channel 2* for mode 0, (interrupt on terminal + * count mode), binary count. Set the latch register to 50ms + * (LSB then MSB) to begin countdown. + */ + outb(0xb0, 0x43); + outb((CLOCK_TICK_RATE / (1000 / 50)) & 0xff, 0x42); + outb((CLOCK_TICK_RATE / (1000 / 50)) >> 8, 0x42); + + tsc = t1 = t2 = get_cycles(); + + pitcnt = 0; + tscmax = 0; + tscmin = ULONG_MAX; + while ((inb(0x61) & 0x20) == 0) { + t2 = get_cycles(); + delta = t2 - tsc; + tsc = t2; + if ((unsigned long) delta < tscmin) + tscmin = (unsigned int) delta; + if ((unsigned long) delta > tscmax) + tscmax = (unsigned int) delta; + pitcnt++; + } + + /* + * Sanity checks: + * + * If we were not able to read the PIT more than 5000 + * times, then we have been hit by a massive SMI + * + * If the maximum is 10 times larger than the minimum, + * then we got hit by an SMI as well. + */ + if (pitcnt < 5000 || tscmax > 10 * tscmin) + return ULONG_MAX; + + /* Calculate the PIT value */ + delta = t2 - t1; + do_div(delta, 50); + return delta; +} + + /** * native_calibrate_tsc - calibrate the tsc on boot */ unsigned long native_calibrate_tsc(void) { - u64 tsc1, tsc2, tr1, tr2, tsc, delta, pm1, pm2, hpet1, hpet2; + u64 tsc1, tsc2, delta, pm1, pm2, hpet1, hpet2; unsigned long tsc_pit_min = ULONG_MAX, tsc_ref_min = ULONG_MAX; - unsigned long flags, tscmin, tscmax; - int hpet = is_hpet_enabled(), pitcnt, i; + unsigned long flags; + int hpet = is_hpet_enabled(), i; /* * Run 5 calibration loops to get the lowest frequency value @@ -157,72 +217,22 @@ unsigned long native_calibrate_tsc(void) * amount of time anyway. */ for (i = 0; i < 5; i++) { - - tscmin = ULONG_MAX; - tscmax = 0; - pitcnt = 0; - - local_irq_save(flags); + unsigned long tsc_pit_khz; /* * Read the start value and the reference count of - * hpet/pmtimer when available: + * hpet/pmtimer when available. Then do the PIT + * calibration, which will take at least 50ms, and + * read the end value. */ + local_irq_save(flags); tsc1 = tsc_read_refs(&pm1, hpet ? &hpet1 : NULL); - - /* Set the Gate high, disable speaker */ - outb((inb(0x61) & ~0x02) | 0x01, 0x61); - - /* - * Setup CTC channel 2* for mode 0, (interrupt on terminal - * count mode), binary count. Set the latch register to 50ms - * (LSB then MSB) to begin countdown. - * - * Some devices need a delay here. - */ - outb(0xb0, 0x43); - outb((CLOCK_TICK_RATE / (1000 / 50)) & 0xff, 0x42); - outb((CLOCK_TICK_RATE / (1000 / 50)) >> 8, 0x42); - - tsc = tr1 = tr2 = get_cycles(); - - while ((inb(0x61) & 0x20) == 0) { - tr2 = get_cycles(); - delta = tr2 - tsc; - tsc = tr2; - if ((unsigned int) delta < tscmin) - tscmin = (unsigned int) delta; - if ((unsigned int) delta > tscmax) - tscmax = (unsigned int) delta; - pitcnt++; - } - - /* - * We waited at least 50ms above. Now read - * pmtimer/hpet reference again - */ + tsc_pit_khz = pit_calibrate_tsc(); tsc2 = tsc_read_refs(&pm2, hpet ? &hpet2 : NULL); - local_irq_restore(flags); - /* - * Sanity checks: - * - * If we were not able to read the PIT more than 5000 - * times, then we have been hit by a massive SMI - * - * If the maximum is 10 times larger than the minimum, - * then we got hit by an SMI as well. - */ - if (pitcnt > 5000 && tscmax < 10 * tscmin) { - - /* Calculate the PIT value */ - delta = tr2 - tr1; - do_div(delta, 50); - - /* We take the smallest value into account */ - tsc_pit_min = min(tsc_pit_min, (unsigned long) delta); - } + /* Pick the lowest PIT TSC calibration so far */ + tsc_pit_min = min(tsc_pit_min, tsc_pit_khz); /* hpet or pmtimer available ? */ if (!hpet && !pm1 && !pm2) -- cgit v1.2.3-70-g09d2 From de014d617636d6a6bd5aef3b3d1f7f9a35669057 Mon Sep 17 00:00:00 2001 From: Alok N Kataria Date: Wed, 3 Sep 2008 18:18:01 -0700 Subject: x86: Change warning message in TSC calibration. When calibration against PIT fails, the warning that we print is misleading. In a virtualized environment the VM may get descheduled while calibration or, the check in PIT calibration may fail due to other virtualization overheads. The warning message explicitly assumes that calibration failed due to SMI's which may not be the case. Change that to something proper. Signed-off-by: Alok N Kataria Signed-off-by: Linus Torvalds --- arch/x86/kernel/tsc.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/x86/kernel/tsc.c') diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 346cae5ac42..8f98e9de1b8 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -267,8 +267,7 @@ unsigned long native_calibrate_tsc(void) */ if (tsc_pit_min == ULONG_MAX) { /* PIT gave no useful value */ - printk(KERN_WARNING "TSC: PIT calibration failed due to " - "SMI disturbance.\n"); + printk(KERN_WARNING "TSC: Unable to calibrate against PIT\n"); /* We don't have an alternative source, disable TSC */ if (!hpet && !pm1 && !pm2) { -- cgit v1.2.3-70-g09d2