summaryrefslogtreecommitdiffstats
path: root/arch/x86_64/kernel/time.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86_64/kernel/time.c')
-rw-r--r--arch/x86_64/kernel/time.c1262
1 files changed, 1262 insertions, 0 deletions
diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c
new file mode 100644
index 00000000000..3bafe438fa7
--- /dev/null
+++ b/arch/x86_64/kernel/time.c
@@ -0,0 +1,1262 @@
+/*
+ * linux/arch/x86-64/kernel/time.c
+ *
+ * "High Precision Event Timer" based timekeeping.
+ *
+ * Copyright (c) 1991,1992,1995 Linus Torvalds
+ * Copyright (c) 1994 Alan Modra
+ * Copyright (c) 1995 Markus Kuhn
+ * Copyright (c) 1996 Ingo Molnar
+ * Copyright (c) 1998 Andrea Arcangeli
+ * Copyright (c) 2002 Vojtech Pavlik
+ * Copyright (c) 2003 Andi Kleen
+ * RTC support code taken from arch/i386/kernel/timers/time_hpet.c
+ */
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <linux/mc146818rtc.h>
+#include <linux/irq.h>
+#include <linux/time.h>
+#include <linux/ioport.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/sysdev.h>
+#include <linux/bcd.h>
+#include <linux/kallsyms.h>
+#include <asm/8253pit.h>
+#include <asm/pgtable.h>
+#include <asm/vsyscall.h>
+#include <asm/timex.h>
+#include <asm/proto.h>
+#include <asm/hpet.h>
+#include <asm/sections.h>
+#include <linux/cpufreq.h>
+#include <linux/hpet.h>
+#ifdef CONFIG_X86_LOCAL_APIC
+#include <asm/apic.h>
+#endif
+
+u64 jiffies_64 = INITIAL_JIFFIES;
+
+EXPORT_SYMBOL(jiffies_64);
+
+#ifdef CONFIG_CPU_FREQ
+static void cpufreq_delayed_get(void);
+#endif
+extern void i8254_timer_resume(void);
+extern int using_apic_timer;
+
+DEFINE_SPINLOCK(rtc_lock);
+DEFINE_SPINLOCK(i8253_lock);
+
+static int nohpet __initdata = 0;
+static int notsc __initdata = 0;
+
+#undef HPET_HACK_ENABLE_DANGEROUS
+
+unsigned int cpu_khz; /* TSC clocks / usec, not used here */
+static unsigned long hpet_period; /* fsecs / HPET clock */
+unsigned long hpet_tick; /* HPET clocks / interrupt */
+unsigned long vxtime_hz = PIT_TICK_RATE;
+int report_lost_ticks; /* command line option */
+unsigned long long monotonic_base;
+
+struct vxtime_data __vxtime __section_vxtime; /* for vsyscalls */
+
+volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES;
+unsigned long __wall_jiffies __section_wall_jiffies = INITIAL_JIFFIES;
+struct timespec __xtime __section_xtime;
+struct timezone __sys_tz __section_sys_tz;
+
+static inline void rdtscll_sync(unsigned long *tsc)
+{
+#ifdef CONFIG_SMP
+ sync_core();
+#endif
+ rdtscll(*tsc);
+}
+
+/*
+ * do_gettimeoffset() returns microseconds since last timer interrupt was
+ * triggered by hardware. A memory read of HPET is slower than a register read
+ * of TSC, but much more reliable. It's also synchronized to the timer
+ * interrupt. Note that do_gettimeoffset() may return more than hpet_tick, if a
+ * timer interrupt has happened already, but vxtime.trigger wasn't updated yet.
+ * This is not a problem, because jiffies hasn't updated either. They are bound
+ * together by xtime_lock.
+ */
+
+static inline unsigned int do_gettimeoffset_tsc(void)
+{
+ unsigned long t;
+ unsigned long x;
+ rdtscll_sync(&t);
+ if (t < vxtime.last_tsc) t = vxtime.last_tsc; /* hack */
+ x = ((t - vxtime.last_tsc) * vxtime.tsc_quot) >> 32;
+ return x;
+}
+
+static inline unsigned int do_gettimeoffset_hpet(void)
+{
+ return ((hpet_readl(HPET_COUNTER) - vxtime.last) * vxtime.quot) >> 32;
+}
+
+unsigned int (*do_gettimeoffset)(void) = do_gettimeoffset_tsc;
+
+/*
+ * This version of gettimeofday() has microsecond resolution and better than
+ * microsecond precision, as we're using at least a 10 MHz (usually 14.31818
+ * MHz) HPET timer.
+ */
+
+void do_gettimeofday(struct timeval *tv)
+{
+ unsigned long seq, t;
+ unsigned int sec, usec;
+
+ do {
+ seq = read_seqbegin(&xtime_lock);
+
+ sec = xtime.tv_sec;
+ usec = xtime.tv_nsec / 1000;
+
+ /* i386 does some correction here to keep the clock
+ monotonous even when ntpd is fixing drift.
+ But they didn't work for me, there is a non monotonic
+ clock anyways with ntp.
+ I dropped all corrections now until a real solution can
+ be found. Note when you fix it here you need to do the same
+ in arch/x86_64/kernel/vsyscall.c and export all needed
+ variables in vmlinux.lds. -AK */
+
+ t = (jiffies - wall_jiffies) * (1000000L / HZ) +
+ do_gettimeoffset();
+ usec += t;
+
+ } while (read_seqretry(&xtime_lock, seq));
+
+ tv->tv_sec = sec + usec / 1000000;
+ tv->tv_usec = usec % 1000000;
+}
+
+EXPORT_SYMBOL(do_gettimeofday);
+
+/*
+ * settimeofday() first undoes the correction that gettimeofday would do
+ * on the time, and then saves it. This is ugly, but has been like this for
+ * ages already.
+ */
+
+int do_settimeofday(struct timespec *tv)
+{
+ time_t wtm_sec, sec = tv->tv_sec;
+ long wtm_nsec, nsec = tv->tv_nsec;
+
+ if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC)
+ return -EINVAL;
+
+ write_seqlock_irq(&xtime_lock);
+
+ nsec -= do_gettimeoffset() * 1000 +
+ (jiffies - wall_jiffies) * (NSEC_PER_SEC/HZ);
+
+ wtm_sec = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec);
+ wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - nsec);
+
+ set_normalized_timespec(&xtime, sec, nsec);
+ set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec);
+
+ time_adjust = 0; /* stop active adjtime() */
+ time_status |= STA_UNSYNC;
+ time_maxerror = NTP_PHASE_LIMIT;
+ time_esterror = NTP_PHASE_LIMIT;
+
+ write_sequnlock_irq(&xtime_lock);
+ clock_was_set();
+ return 0;
+}
+
+EXPORT_SYMBOL(do_settimeofday);
+
+unsigned long profile_pc(struct pt_regs *regs)
+{
+ unsigned long pc = instruction_pointer(regs);
+
+ /* Assume the lock function has either no stack frame or only a single word.
+ This checks if the address on the stack looks like a kernel text address.
+ There is a small window for false hits, but in that case the tick
+ is just accounted to the spinlock function.
+ Better would be to write these functions in assembler again
+ and check exactly. */
+ if (in_lock_functions(pc)) {
+ char *v = *(char **)regs->rsp;
+ if ((v >= _stext && v <= _etext) ||
+ (v >= _sinittext && v <= _einittext) ||
+ (v >= (char *)MODULES_VADDR && v <= (char *)MODULES_END))
+ return (unsigned long)v;
+ return ((unsigned long *)regs->rsp)[1];
+ }
+ return pc;
+}
+EXPORT_SYMBOL(profile_pc);
+
+/*
+ * In order to set the CMOS clock precisely, set_rtc_mmss has to be called 500
+ * ms after the second nowtime has started, because when nowtime is written
+ * into the registers of the CMOS clock, it will jump to the next second
+ * precisely 500 ms later. Check the Motorola MC146818A or Dallas DS12887 data
+ * sheet for details.
+ */
+
+static void set_rtc_mmss(unsigned long nowtime)
+{
+ int real_seconds, real_minutes, cmos_minutes;
+ unsigned char control, freq_select;
+
+/*
+ * IRQs are disabled when we're called from the timer interrupt,
+ * no need for spin_lock_irqsave()
+ */
+
+ spin_lock(&rtc_lock);
+
+/*
+ * Tell the clock it's being set and stop it.
+ */
+
+ control = CMOS_READ(RTC_CONTROL);
+ CMOS_WRITE(control | RTC_SET, RTC_CONTROL);
+
+ freq_select = CMOS_READ(RTC_FREQ_SELECT);
+ CMOS_WRITE(freq_select | RTC_DIV_RESET2, RTC_FREQ_SELECT);
+
+ cmos_minutes = CMOS_READ(RTC_MINUTES);
+ BCD_TO_BIN(cmos_minutes);
+
+/*
+ * since we're only adjusting minutes and seconds, don't interfere with hour
+ * overflow. This avoids messing with unknown time zones but requires your RTC
+ * not to be off by more than 15 minutes. Since we're calling it only when
+ * our clock is externally synchronized using NTP, this shouldn't be a problem.
+ */
+
+ real_seconds = nowtime % 60;
+ real_minutes = nowtime / 60;
+ if (((abs(real_minutes - cmos_minutes) + 15) / 30) & 1)
+ real_minutes += 30; /* correct for half hour time zone */
+ real_minutes %= 60;
+
+#if 0
+ /* AMD 8111 is a really bad time keeper and hits this regularly.
+ It probably was an attempt to avoid screwing up DST, but ignore
+ that for now. */
+ if (abs(real_minutes - cmos_minutes) >= 30) {
+ printk(KERN_WARNING "time.c: can't update CMOS clock "
+ "from %d to %d\n", cmos_minutes, real_minutes);
+ } else
+#endif
+
+ {
+ BIN_TO_BCD(real_seconds);
+ BIN_TO_BCD(real_minutes);
+ CMOS_WRITE(real_seconds, RTC_SECONDS);
+ CMOS_WRITE(real_minutes, RTC_MINUTES);
+ }
+
+/*
+ * The following flags have to be released exactly in this order, otherwise the
+ * DS12887 (popular MC146818A clone with integrated battery and quartz) will
+ * not reset the oscillator and will not update precisely 500 ms later. You
+ * won't find this mentioned in the Dallas Semiconductor data sheets, but who
+ * believes data sheets anyway ... -- Markus Kuhn
+ */
+
+ CMOS_WRITE(control, RTC_CONTROL);
+ CMOS_WRITE(freq_select, RTC_FREQ_SELECT);
+
+ spin_unlock(&rtc_lock);
+}
+
+
+/* monotonic_clock(): returns # of nanoseconds passed since time_init()
+ * Note: This function is required to return accurate
+ * time even in the absence of multiple timer ticks.
+ */
+unsigned long long monotonic_clock(void)
+{
+ unsigned long seq;
+ u32 last_offset, this_offset, offset;
+ unsigned long long base;
+
+ if (vxtime.mode == VXTIME_HPET) {
+ do {
+ seq = read_seqbegin(&xtime_lock);
+
+ last_offset = vxtime.last;
+ base = monotonic_base;
+ this_offset = hpet_readl(HPET_T0_CMP) - hpet_tick;
+
+ } while (read_seqretry(&xtime_lock, seq));
+ offset = (this_offset - last_offset);
+ offset *=(NSEC_PER_SEC/HZ)/hpet_tick;
+ return base + offset;
+ }else{
+ do {
+ seq = read_seqbegin(&xtime_lock);
+
+ last_offset = vxtime.last_tsc;
+ base = monotonic_base;
+ } while (read_seqretry(&xtime_lock, seq));
+ sync_core();
+ rdtscll(this_offset);
+ offset = (this_offset - last_offset)*1000/cpu_khz;
+ return base + offset;
+ }
+
+
+}
+EXPORT_SYMBOL(monotonic_clock);
+
+static noinline void handle_lost_ticks(int lost, struct pt_regs *regs)
+{
+ static long lost_count;
+ static int warned;
+
+ if (report_lost_ticks) {
+ printk(KERN_WARNING "time.c: Lost %d timer "
+ "tick(s)! ", lost);
+ print_symbol("rip %s)\n", regs->rip);
+ }
+
+ if (lost_count == 1000 && !warned) {
+ printk(KERN_WARNING
+ "warning: many lost ticks.\n"
+ KERN_WARNING "Your time source seems to be instable or "
+ "some driver is hogging interupts\n");
+ print_symbol("rip %s\n", regs->rip);
+ if (vxtime.mode == VXTIME_TSC && vxtime.hpet_address) {
+ printk(KERN_WARNING "Falling back to HPET\n");
+ vxtime.last = hpet_readl(HPET_T0_CMP) - hpet_tick;
+ vxtime.mode = VXTIME_HPET;
+ do_gettimeoffset = do_gettimeoffset_hpet;
+ }
+ /* else should fall back to PIT, but code missing. */
+ warned = 1;
+ } else
+ lost_count++;
+
+#ifdef CONFIG_CPU_FREQ
+ /* In some cases the CPU can change frequency without us noticing
+ (like going into thermal throttle)
+ Give cpufreq a change to catch up. */
+ if ((lost_count+1) % 25 == 0) {
+ cpufreq_delayed_get();
+ }
+#endif
+}
+
+static irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
+{
+ static unsigned long rtc_update = 0;
+ unsigned long tsc;
+ int delay, offset = 0, lost = 0;
+
+/*
+ * Here we are in the timer irq handler. We have irqs locally disabled (so we
+ * don't need spin_lock_irqsave()) but we don't know if the timer_bh is running
+ * on the other CPU, so we need a lock. We also need to lock the vsyscall
+ * variables, because both do_timer() and us change them -arca+vojtech
+ */
+
+ write_seqlock(&xtime_lock);
+
+ if (vxtime.hpet_address) {
+ offset = hpet_readl(HPET_T0_CMP) - hpet_tick;
+ delay = hpet_readl(HPET_COUNTER) - offset;
+ } else {
+ spin_lock(&i8253_lock);
+ outb_p(0x00, 0x43);
+ delay = inb_p(0x40);
+ delay |= inb(0x40) << 8;
+ spin_unlock(&i8253_lock);
+ delay = LATCH - 1 - delay;
+ }
+
+ rdtscll_sync(&tsc);
+
+ if (vxtime.mode == VXTIME_HPET) {
+ if (offset - vxtime.last > hpet_tick) {
+ lost = (offset - vxtime.last) / hpet_tick - 1;
+ }
+
+ monotonic_base +=
+ (offset - vxtime.last)*(NSEC_PER_SEC/HZ) / hpet_tick;
+
+ vxtime.last = offset;
+ } else {
+ offset = (((tsc - vxtime.last_tsc) *
+ vxtime.tsc_quot) >> 32) - (USEC_PER_SEC / HZ);
+
+ if (offset < 0)
+ offset = 0;
+
+ if (offset > (USEC_PER_SEC / HZ)) {
+ lost = offset / (USEC_PER_SEC / HZ);
+ offset %= (USEC_PER_SEC / HZ);
+ }
+
+ monotonic_base += (tsc - vxtime.last_tsc)*1000000/cpu_khz ;
+
+ vxtime.last_tsc = tsc - vxtime.quot * delay / vxtime.tsc_quot;
+
+ if ((((tsc - vxtime.last_tsc) *
+ vxtime.tsc_quot) >> 32) < offset)
+ vxtime.last_tsc = tsc -
+ (((long) offset << 32) / vxtime.tsc_quot) - 1;
+ }
+
+ if (lost > 0) {
+ handle_lost_ticks(lost, regs);
+ jiffies += lost;
+ }
+
+/*
+ * Do the timer stuff.
+ */
+
+ do_timer(regs);
+#ifndef CONFIG_SMP
+ update_process_times(user_mode(regs));
+#endif
+
+/*
+ * In the SMP case we use the local APIC timer interrupt to do the profiling,
+ * except when we simulate SMP mode on a uniprocessor system, in that case we
+ * have to call the local interrupt handler.
+ */
+
+#ifndef CONFIG_X86_LOCAL_APIC
+ profile_tick(CPU_PROFILING, regs);
+#else
+ if (!using_apic_timer)
+ smp_local_timer_interrupt(regs);
+#endif
+
+/*
+ * If we have an externally synchronized Linux clock, then update CMOS clock
+ * accordingly every ~11 minutes. set_rtc_mmss() will be called in the jiffy
+ * closest to exactly 500 ms before the next second. If the update fails, we
+ * don't care, as it'll be updated on the next turn, and the problem (time way
+ * off) isn't likely to go away much sooner anyway.
+ */
+
+ if ((~time_status & STA_UNSYNC) && xtime.tv_sec > rtc_update &&
+ abs(xtime.tv_nsec - 500000000) <= tick_nsec / 2) {
+ set_rtc_mmss(xtime.tv_sec);
+ rtc_update = xtime.tv_sec + 660;
+ }
+
+ write_sequnlock(&xtime_lock);
+
+ return IRQ_HANDLED;
+}
+
+static unsigned int cyc2ns_scale;
+#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
+
+static inline void set_cyc2ns_scale(unsigned long cpu_mhz)
+{
+ cyc2ns_scale = (1000 << CYC2NS_SCALE_FACTOR)/cpu_mhz;
+}
+
+static inline unsigned long long cycles_2_ns(unsigned long long cyc)
+{
+ return (cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR;
+}
+
+unsigned long long sched_clock(void)
+{
+ unsigned long a = 0;
+
+#if 0
+ /* Don't do a HPET read here. Using TSC always is much faster
+ and HPET may not be mapped yet when the scheduler first runs.
+ Disadvantage is a small drift between CPUs in some configurations,
+ but that should be tolerable. */
+ if (__vxtime.mode == VXTIME_HPET)
+ return (hpet_readl(HPET_COUNTER) * vxtime.quot) >> 32;
+#endif
+
+ /* Could do CPU core sync here. Opteron can execute rdtsc speculatively,
+ which means it is not completely exact and may not be monotonous between
+ CPUs. But the errors should be too small to matter for scheduling
+ purposes. */
+
+ rdtscll(a);
+ return cycles_2_ns(a);
+}
+
+unsigned long get_cmos_time(void)
+{
+ unsigned int timeout, year, mon, day, hour, min, sec;
+ unsigned char last, this;
+ unsigned long flags;
+
+/*
+ * The Linux interpretation of the CMOS clock register contents: When the
+ * Update-In-Progress (UIP) flag goes from 1 to 0, the RTC registers show the
+ * second which has precisely just started. Waiting for this can take up to 1
+ * second, we timeout approximately after 2.4 seconds on a machine with
+ * standard 8.3 MHz ISA bus.
+ */
+
+ spin_lock_irqsave(&rtc_lock, flags);
+
+ timeout = 1000000;
+ last = this = 0;
+
+ while (timeout && last && !this) {
+ last = this;
+ this = CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP;
+ timeout--;
+ }
+
+/*
+ * Here we are safe to assume the registers won't change for a whole second, so
+ * we just go ahead and read them.
+ */
+
+ sec = CMOS_READ(RTC_SECONDS);
+ min = CMOS_READ(RTC_MINUTES);
+ hour = CMOS_READ(RTC_HOURS);
+ day = CMOS_READ(RTC_DAY_OF_MONTH);
+ mon = CMOS_READ(RTC_MONTH);
+ year = CMOS_READ(RTC_YEAR);
+
+ spin_unlock_irqrestore(&rtc_lock, flags);
+
+/*
+ * We know that x86-64 always uses BCD format, no need to check the config
+ * register.
+ */
+
+ BCD_TO_BIN(sec);
+ BCD_TO_BIN(min);
+ BCD_TO_BIN(hour);
+ BCD_TO_BIN(day);
+ BCD_TO_BIN(mon);
+ BCD_TO_BIN(year);
+
+/*
+ * x86-64 systems only exists since 2002.
+ * This will work up to Dec 31, 2100
+ */
+ year += 2000;
+
+ return mktime(year, mon, day, hour, min, sec);
+}
+
+#ifdef CONFIG_CPU_FREQ
+
+/* Frequency scaling support. Adjust the TSC based timer when the cpu frequency
+ changes.
+
+ RED-PEN: On SMP we assume all CPUs run with the same frequency. It's
+ not that important because current Opteron setups do not support
+ scaling on SMP anyroads.
+
+ Should fix up last_tsc too. Currently gettimeofday in the
+ first tick after the change will be slightly wrong. */
+
+#include <linux/workqueue.h>
+
+static unsigned int cpufreq_delayed_issched = 0;
+static unsigned int cpufreq_init = 0;
+static struct work_struct cpufreq_delayed_get_work;
+
+static void handle_cpufreq_delayed_get(void *v)
+{
+ unsigned int cpu;
+ for_each_online_cpu(cpu) {
+ cpufreq_get(cpu);
+ }
+ cpufreq_delayed_issched = 0;
+}
+
+/* if we notice lost ticks, schedule a call to cpufreq_get() as it tries
+ * to verify the CPU frequency the timing core thinks the CPU is running
+ * at is still correct.
+ */
+static void cpufreq_delayed_get(void)
+{
+ static int warned;
+ if (cpufreq_init && !cpufreq_delayed_issched) {
+ cpufreq_delayed_issched = 1;
+ if (!warned) {
+ warned = 1;
+ printk(KERN_DEBUG "Losing some ticks... checking if CPU frequency changed.\n");
+ }
+ schedule_work(&cpufreq_delayed_get_work);
+ }
+}
+
+static unsigned int ref_freq = 0;
+static unsigned long loops_per_jiffy_ref = 0;
+
+static unsigned long cpu_khz_ref = 0;
+
+static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
+ void *data)
+{
+ struct cpufreq_freqs *freq = data;
+ unsigned long *lpj, dummy;
+
+ lpj = &dummy;
+ if (!(freq->flags & CPUFREQ_CONST_LOOPS))
+#ifdef CONFIG_SMP
+ lpj = &cpu_data[freq->cpu].loops_per_jiffy;
+#else
+ lpj = &boot_cpu_data.loops_per_jiffy;
+#endif
+
+
+
+ if (!ref_freq) {
+ ref_freq = freq->old;
+ loops_per_jiffy_ref = *lpj;
+ cpu_khz_ref = cpu_khz;
+ }
+ if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) ||
+ (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) ||
+ (val == CPUFREQ_RESUMECHANGE)) {
+ *lpj =
+ cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new);
+
+ cpu_khz = cpufreq_scale(cpu_khz_ref, ref_freq, freq->new);
+ if (!(freq->flags & CPUFREQ_CONST_LOOPS))
+ vxtime.tsc_quot = (1000L << 32) / cpu_khz;
+ }
+
+ set_cyc2ns_scale(cpu_khz_ref / 1000);
+
+ return 0;
+}
+
+static struct notifier_block time_cpufreq_notifier_block = {
+ .notifier_call = time_cpufreq_notifier
+};
+
+static int __init cpufreq_tsc(void)
+{
+ INIT_WORK(&cpufreq_delayed_get_work, handle_cpufreq_delayed_get, NULL);
+ if (!cpufreq_register_notifier(&time_cpufreq_notifier_block,
+ CPUFREQ_TRANSITION_NOTIFIER))
+ cpufreq_init = 1;
+ return 0;
+}
+
+core_initcall(cpufreq_tsc);
+
+#endif
+
+/*
+ * calibrate_tsc() calibrates the processor TSC in a very simple way, comparing
+ * it to the HPET timer of known frequency.
+ */
+
+#define TICK_COUNT 100000000
+
+static unsigned int __init hpet_calibrate_tsc(void)
+{
+ int tsc_start, hpet_start;
+ int tsc_now, hpet_now;
+ unsigned long flags;
+
+ local_irq_save(flags);
+ local_irq_disable();
+
+ hpet_start = hpet_readl(HPET_COUNTER);
+ rdtscl(tsc_start);
+
+ do {
+ local_irq_disable();
+ hpet_now = hpet_readl(HPET_COUNTER);
+ sync_core();
+ rdtscl(tsc_now);
+ local_irq_restore(flags);
+ } while ((tsc_now - tsc_start) < TICK_COUNT &&
+ (hpet_now - hpet_start) < TICK_COUNT);
+
+ return (tsc_now - tsc_start) * 1000000000L
+ / ((hpet_now - hpet_start) * hpet_period / 1000);
+}
+
+
+/*
+ * pit_calibrate_tsc() uses the speaker output (channel 2) of
+ * the PIT. This is better than using the timer interrupt output,
+ * because we can read the value of the speaker with just one inb(),
+ * where we need three i/o operations for the interrupt channel.
+ * We count how many ticks the TSC does in 50 ms.
+ */
+
+static unsigned int __init pit_calibrate_tsc(void)
+{
+ unsigned long start, end;
+ unsigned long flags;
+
+ spin_lock_irqsave(&i8253_lock, flags);
+
+ outb((inb(0x61) & ~0x02) | 0x01, 0x61);
+
+ outb(0xb0, 0x43);
+ outb((PIT_TICK_RATE / (1000 / 50)) & 0xff, 0x42);
+ outb((PIT_TICK_RATE / (1000 / 50)) >> 8, 0x42);
+ rdtscll(start);
+ sync_core();
+ while ((inb(0x61) & 0x20) == 0);
+ sync_core();
+ rdtscll(end);
+
+ spin_unlock_irqrestore(&i8253_lock, flags);
+
+ return (end - start) / 50;
+}
+
+#ifdef CONFIG_HPET
+static __init int late_hpet_init(void)
+{
+ struct hpet_data hd;
+ unsigned int ntimer;
+
+ if (!vxtime.hpet_address)
+ return -1;
+
+ memset(&hd, 0, sizeof (hd));
+
+ ntimer = hpet_readl(HPET_ID);
+ ntimer = (ntimer & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT;
+ ntimer++;
+
+ /*
+ * Register with driver.
+ * Timer0 and Timer1 is used by platform.
+ */
+ hd.hd_phys_address = vxtime.hpet_address;
+ hd.hd_address = (void *)fix_to_virt(FIX_HPET_BASE);
+ hd.hd_nirqs = ntimer;
+ hd.hd_flags = HPET_DATA_PLATFORM;
+ hpet_reserve_timer(&hd, 0);
+#ifdef CONFIG_HPET_EMULATE_RTC
+ hpet_reserve_timer(&hd, 1);
+#endif
+ hd.hd_irq[0] = HPET_LEGACY_8254;
+ hd.hd_irq[1] = HPET_LEGACY_RTC;
+ if (ntimer > 2) {
+ struct hpet *hpet;
+ struct hpet_timer *timer;
+ int i;
+
+ hpet = (struct hpet *) fix_to_virt(FIX_HPET_BASE);
+
+ for (i = 2, timer = &hpet->hpet_timers[2]; i < ntimer;
+ timer++, i++)
+ hd.hd_irq[i] = (timer->hpet_config &
+ Tn_INT_ROUTE_CNF_MASK) >>
+ Tn_INT_ROUTE_CNF_SHIFT;
+
+ }
+
+ hpet_alloc(&hd);
+ return 0;
+}
+fs_initcall(late_hpet_init);
+#endif
+
+static int hpet_timer_stop_set_go(unsigned long tick)
+{
+ unsigned int cfg;
+
+/*
+ * Stop the timers and reset the main counter.
+ */
+
+ cfg = hpet_readl(HPET_CFG);
+ cfg &= ~(HPET_CFG_ENABLE | HPET_CFG_LEGACY);
+ hpet_writel(cfg, HPET_CFG);
+ hpet_writel(0, HPET_COUNTER);
+ hpet_writel(0, HPET_COUNTER + 4);
+
+/*
+ * Set up timer 0, as periodic with first interrupt to happen at hpet_tick,
+ * and period also hpet_tick.
+ */
+
+ hpet_writel(HPET_TN_ENABLE | HPET_TN_PERIODIC | HPET_TN_SETVAL |
+ HPET_TN_32BIT, HPET_T0_CFG);
+ hpet_writel(hpet_tick, HPET_T0_CMP);
+ hpet_writel(hpet_tick, HPET_T0_CMP); /* AK: why twice? */
+
+/*
+ * Go!
+ */
+
+ cfg |= HPET_CFG_ENABLE | HPET_CFG_LEGACY;
+ hpet_writel(cfg, HPET_CFG);
+
+ return 0;
+}
+
+static int hpet_init(void)
+{
+ unsigned int id;
+
+ if (!vxtime.hpet_address)
+ return -1;
+ set_fixmap_nocache(FIX_HPET_BASE, vxtime.hpet_address);
+ __set_fixmap(VSYSCALL_HPET, vxtime.hpet_address, PAGE_KERNEL_VSYSCALL_NOCACHE);
+
+/*
+ * Read the period, compute tick and quotient.
+ */
+
+ id = hpet_readl(HPET_ID);
+
+ if (!(id & HPET_ID_VENDOR) || !(id & HPET_ID_NUMBER) ||
+ !(id & HPET_ID_LEGSUP))
+ return -1;
+
+ hpet_period = hpet_readl(HPET_PERIOD);
+ if (hpet_period < 100000 || hpet_period > 100000000)
+ return -1;
+
+ hpet_tick = (1000000000L * (USEC_PER_SEC / HZ) + hpet_period / 2) /
+ hpet_period;
+
+ return hpet_timer_stop_set_go(hpet_tick);
+}
+
+static int hpet_reenable(void)
+{
+ return hpet_timer_stop_set_go(hpet_tick);
+}
+
+void __init pit_init(void)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&i8253_lock, flags);
+ outb_p(0x34, 0x43); /* binary, mode 2, LSB/MSB, ch 0 */
+ outb_p(LATCH & 0xff, 0x40); /* LSB */
+ outb_p(LATCH >> 8, 0x40); /* MSB */
+ spin_unlock_irqrestore(&i8253_lock, flags);
+}
+
+int __init time_setup(char *str)
+{
+ report_lost_ticks = 1;
+ return 1;
+}
+
+static struct irqaction irq0 = {
+ timer_interrupt, SA_INTERRUPT, CPU_MASK_NONE, "timer", NULL, NULL
+};
+
+extern void __init config_acpi_tables(void);
+
+void __init time_init(void)
+{
+ char *timename;
+
+#ifdef HPET_HACK_ENABLE_DANGEROUS
+ if (!vxtime.hpet_address) {
+ printk(KERN_WARNING "time.c: WARNING: Enabling HPET base "
+ "manually!\n");
+ outl(0x800038a0, 0xcf8);
+ outl(0xff000001, 0xcfc);
+ outl(0x800038a0, 0xcf8);
+ vxtime.hpet_address = inl(0xcfc) & 0xfffffffe;
+ printk(KERN_WARNING "time.c: WARNING: Enabled HPET "
+ "at %#lx.\n", vxtime.hpet_address);
+ }
+#endif
+ if (nohpet)
+ vxtime.hpet_address = 0;
+
+ xtime.tv_sec = get_cmos_time();
+ xtime.tv_nsec = 0;
+
+ set_normalized_timespec(&wall_to_monotonic,
+ -xtime.tv_sec, -xtime.tv_nsec);
+
+ if (!hpet_init()) {
+ vxtime_hz = (1000000000000000L + hpet_period / 2) /
+ hpet_period;
+ cpu_khz = hpet_calibrate_tsc();
+ timename = "HPET";
+ } else {
+ pit_init();
+ cpu_khz = pit_calibrate_tsc();
+ timename = "PIT";
+ }
+
+ printk(KERN_INFO "time.c: Using %ld.%06ld MHz %s timer.\n",
+ vxtime_hz / 1000000, vxtime_hz % 1000000, timename);
+ printk(KERN_INFO "time.c: Detected %d.%03d MHz processor.\n",
+ cpu_khz / 1000, cpu_khz % 1000);
+ vxtime.mode = VXTIME_TSC;
+ vxtime.quot = (1000000L << 32) / vxtime_hz;
+ vxtime.tsc_quot = (1000L << 32) / cpu_khz;
+ vxtime.hz = vxtime_hz;
+ rdtscll_sync(&vxtime.last_tsc);
+ setup_irq(0, &irq0);
+
+ set_cyc2ns_scale(cpu_khz / 1000);
+}
+
+void __init time_init_smp(void)
+{
+ char *timetype;
+
+ /*
+ * AMD systems with more than one CPU don't have fully synchronized
+ * TSCs. Always use HPET gettimeofday for these, although it is slower.
+ * Intel SMP systems usually have synchronized TSCs, so use always
+ * the TSC.
+ *
+ * Exceptions:
+ * IBM Summit2 checked by oem_force_hpet_timer().
+ * AMD dual core may also not need HPET. Check me.
+ *
+ * Can be turned off with "notsc".
+ */
+ if (num_online_cpus() > 1 &&
+ boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
+ notsc = 1;
+ /* Some systems will want to disable TSC and use HPET. */
+ if (oem_force_hpet_timer())
+ notsc = 1;
+ if (vxtime.hpet_address && notsc) {
+ timetype = "HPET";
+ vxtime.last = hpet_readl(HPET_T0_CMP) - hpet_tick;
+ vxtime.mode = VXTIME_HPET;
+ do_gettimeoffset = do_gettimeoffset_hpet;
+ } else {
+ timetype = vxtime.hpet_address ? "HPET/TSC" : "PIT/TSC";
+ vxtime.mode = VXTIME_TSC;
+ }
+
+ printk(KERN_INFO "time.c: Using %s based timekeeping.\n", timetype);
+}
+
+__setup("report_lost_ticks", time_setup);
+
+static long clock_cmos_diff;
+static unsigned long sleep_start;
+
+static int timer_suspend(struct sys_device *dev, u32 state)
+{
+ /*
+ * Estimate time zone so that set_time can update the clock
+ */
+ long cmos_time = get_cmos_time();
+
+ clock_cmos_diff = -cmos_time;
+ clock_cmos_diff += get_seconds();
+ sleep_start = cmos_time;
+ return 0;
+}
+
+static int timer_resume(struct sys_device *dev)
+{
+ unsigned long flags;
+ unsigned long sec;
+ unsigned long ctime = get_cmos_time();
+ unsigned long sleep_length = (ctime - sleep_start) * HZ;
+
+ if (vxtime.hpet_address)
+ hpet_reenable();
+ else
+ i8254_timer_resume();
+
+ sec = ctime + clock_cmos_diff;
+ write_seqlock_irqsave(&xtime_lock,flags);
+ xtime.tv_sec = sec;
+ xtime.tv_nsec = 0;
+ write_sequnlock_irqrestore(&xtime_lock,flags);
+ jiffies += sleep_length;
+ wall_jiffies += sleep_length;
+ return 0;
+}
+
+static struct sysdev_class timer_sysclass = {
+ .resume = timer_resume,
+ .suspend = timer_suspend,
+ set_kset_name("timer"),
+};
+
+
+/* XXX this driverfs stuff should probably go elsewhere later -john */
+static struct sys_device device_timer = {
+ .id = 0,
+ .cls = &timer_sysclass,
+};
+
+static int time_init_device(void)
+{
+ int error = sysdev_class_register(&timer_sysclass);
+ if (!error)
+ error = sysdev_register(&device_timer);
+ return error;
+}
+
+device_initcall(time_init_device);
+
+#ifdef CONFIG_HPET_EMULATE_RTC
+/* HPET in LegacyReplacement Mode eats up RTC interrupt line. When, HPET
+ * is enabled, we support RTC interrupt functionality in software.
+ * RTC has 3 kinds of interrupts:
+ * 1) Update Interrupt - generate an interrupt, every sec, when RTC clock
+ * is updated
+ * 2) Alarm Interrupt - generate an interrupt at a specific time of day
+ * 3) Periodic Interrupt - generate periodic interrupt, with frequencies
+ * 2Hz-8192Hz (2Hz-64Hz for non-root user) (all freqs in powers of 2)
+ * (1) and (2) above are implemented using polling at a frequency of
+ * 64 Hz. The exact frequency is a tradeoff between accuracy and interrupt
+ * overhead. (DEFAULT_RTC_INT_FREQ)
+ * For (3), we use interrupts at 64Hz or user specified periodic
+ * frequency, whichever is higher.
+ */
+#include <linux/rtc.h>
+
+extern irqreturn_t rtc_interrupt(int irq, void *dev_id, struct pt_regs *regs);
+
+#define DEFAULT_RTC_INT_FREQ 64
+#define RTC_NUM_INTS 1
+
+static unsigned long UIE_on;
+static unsigned long prev_update_sec;
+
+static unsigned long AIE_on;
+static struct rtc_time alarm_time;
+
+static unsigned long PIE_on;
+static unsigned long PIE_freq = DEFAULT_RTC_INT_FREQ;
+static unsigned long PIE_count;
+
+static unsigned long hpet_rtc_int_freq; /* RTC interrupt frequency */
+
+int is_hpet_enabled(void)
+{
+ return vxtime.hpet_address != 0;
+}
+
+/*
+ * Timer 1 for RTC, we do not use periodic interrupt feature,
+ * even if HPET supports periodic interrupts on Timer 1.
+ * The reason being, to set up a periodic interrupt in HPET, we need to
+ * stop the main counter. And if we do that everytime someone diables/enables
+ * RTC, we will have adverse effect on main kernel timer running on Timer 0.
+ * So, for the time being, simulate the periodic interrupt in software.
+ *
+ * hpet_rtc_timer_init() is called for the first time and during subsequent
+ * interuppts reinit happens through hpet_rtc_timer_reinit().
+ */
+int hpet_rtc_timer_init(void)
+{
+ unsigned int cfg, cnt;
+ unsigned long flags;
+
+ if (!is_hpet_enabled())
+ return 0;
+ /*
+ * Set the counter 1 and enable the interrupts.
+ */
+ if (PIE_on && (PIE_freq > DEFAULT_RTC_INT_FREQ))
+ hpet_rtc_int_freq = PIE_freq;
+ else
+ hpet_rtc_int_freq = DEFAULT_RTC_INT_FREQ;
+
+ local_irq_save(flags);
+ cnt = hpet_readl(HPET_COUNTER);
+ cnt += ((hpet_tick*HZ)/hpet_rtc_int_freq);
+ hpet_writel(cnt, HPET_T1_CMP);
+ local_irq_restore(flags);
+
+ cfg = hpet_readl(HPET_T1_CFG);
+ cfg |= HPET_TN_ENABLE | HPET_TN_SETVAL | HPET_TN_32BIT;
+ hpet_writel(cfg, HPET_T1_CFG);
+
+ return 1;
+}
+
+static void hpet_rtc_timer_reinit(void)
+{
+ unsigned int cfg, cnt;
+
+ if (!(PIE_on | AIE_on | UIE_on))
+ return;
+
+ if (PIE_on && (PIE_freq > DEFAULT_RTC_INT_FREQ))
+ hpet_rtc_int_freq = PIE_freq;
+ else
+ hpet_rtc_int_freq = DEFAULT_RTC_INT_FREQ;
+
+ /* It is more accurate to use the comparator value than current count.*/
+ cnt = hpet_readl(HPET_T1_CMP);
+ cnt += hpet_tick*HZ/hpet_rtc_int_freq;
+ hpet_writel(cnt, HPET_T1_CMP);
+
+ cfg = hpet_readl(HPET_T1_CFG);
+ cfg |= HPET_TN_ENABLE | HPET_TN_SETVAL | HPET_TN_32BIT;
+ hpet_writel(cfg, HPET_T1_CFG);
+
+ return;
+}
+
+/*
+ * The functions below are called from rtc driver.
+ * Return 0 if HPET is not being used.
+ * Otherwise do the necessary changes and return 1.
+ */
+int hpet_mask_rtc_irq_bit(unsigned long bit_mask)
+{
+ if (!is_hpet_enabled())
+ return 0;
+
+ if (bit_mask & RTC_UIE)
+ UIE_on = 0;
+ if (bit_mask & RTC_PIE)
+ PIE_on = 0;
+ if (bit_mask & RTC_AIE)
+ AIE_on = 0;
+
+ return 1;
+}
+
+int hpet_set_rtc_irq_bit(unsigned long bit_mask)
+{
+ int timer_init_reqd = 0;
+
+ if (!is_hpet_enabled())
+ return 0;
+
+ if (!(PIE_on | AIE_on | UIE_on))
+ timer_init_reqd = 1;
+
+ if (bit_mask & RTC_UIE) {
+ UIE_on = 1;
+ }
+ if (bit_mask & RTC_PIE) {
+ PIE_on = 1;
+ PIE_count = 0;
+ }
+ if (bit_mask & RTC_AIE) {
+ AIE_on = 1;
+ }
+
+ if (timer_init_reqd)
+ hpet_rtc_timer_init();
+
+ return 1;
+}
+
+int hpet_set_alarm_time(unsigned char hrs, unsigned char min, unsigned char sec)
+{
+ if (!is_hpet_enabled())
+ return 0;
+
+ alarm_time.tm_hour = hrs;
+ alarm_time.tm_min = min;
+ alarm_time.tm_sec = sec;
+
+ return 1;
+}
+
+int hpet_set_periodic_freq(unsigned long freq)
+{
+ if (!is_hpet_enabled())
+ return 0;
+
+ PIE_freq = freq;
+ PIE_count = 0;
+
+ return 1;
+}
+
+int hpet_rtc_dropped_irq(void)
+{
+ if (!is_hpet_enabled())
+ return 0;
+
+ return 1;
+}
+
+irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id, struct pt_regs *regs)
+{
+ struct rtc_time curr_time;
+ unsigned long rtc_int_flag = 0;
+ int call_rtc_interrupt = 0;
+
+ hpet_rtc_timer_reinit();
+
+ if (UIE_on | AIE_on) {
+ rtc_get_rtc_time(&curr_time);
+ }
+ if (UIE_on) {
+ if (curr_time.tm_sec != prev_update_sec) {
+ /* Set update int info, call real rtc int routine */
+ call_rtc_interrupt = 1;
+ rtc_int_flag = RTC_UF;
+ prev_update_sec = curr_time.tm_sec;
+ }
+ }
+ if (PIE_on) {
+ PIE_count++;
+ if (PIE_count >= hpet_rtc_int_freq/PIE_freq) {
+ /* Set periodic int info, call real rtc int routine */
+ call_rtc_interrupt = 1;
+ rtc_int_flag |= RTC_PF;
+ PIE_count = 0;
+ }
+ }
+ if (AIE_on) {
+ if ((curr_time.tm_sec == alarm_time.tm_sec) &&
+ (curr_time.tm_min == alarm_time.tm_min) &&
+ (curr_time.tm_hour == alarm_time.tm_hour)) {
+ /* Set alarm int info, call real rtc int routine */
+ call_rtc_interrupt = 1;
+ rtc_int_flag |= RTC_AF;
+ }
+ }
+ if (call_rtc_interrupt) {
+ rtc_int_flag |= (RTC_IRQF | (RTC_NUM_INTS << 8));
+ rtc_interrupt(rtc_int_flag, dev_id, regs);
+ }
+ return IRQ_HANDLED;
+}
+#endif
+
+
+
+static int __init nohpet_setup(char *s)
+{
+ nohpet = 1;
+ return 0;
+}
+
+__setup("nohpet", nohpet_setup);
+
+
+static int __init notsc_setup(char *s)
+{
+ notsc = 1;
+ return 0;
+}
+
+__setup("notsc", notsc_setup);
+
+