summaryrefslogtreecommitdiffstats
path: root/arch/powerpc/kernel/time.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/kernel/time.c')
-rw-r--r--arch/powerpc/kernel/time.c503
1 files changed, 227 insertions, 276 deletions
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index c627cf86d1e..9368da371f3 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -65,24 +65,68 @@
#include <asm/div64.h>
#include <asm/smp.h>
#include <asm/vdso_datapage.h>
-#ifdef CONFIG_PPC64
#include <asm/firmware.h>
-#endif
#ifdef CONFIG_PPC_ISERIES
#include <asm/iseries/it_lp_queue.h>
#include <asm/iseries/hv_call_xm.h>
#endif
-#include <asm/smp.h>
-/* keep track of when we need to update the rtc */
-time_t last_rtc_update;
+/* powerpc clocksource/clockevent code */
+
+#include <linux/clockchips.h>
+#include <linux/clocksource.h>
+
+static cycle_t rtc_read(void);
+static struct clocksource clocksource_rtc = {
+ .name = "rtc",
+ .rating = 400,
+ .flags = CLOCK_SOURCE_IS_CONTINUOUS,
+ .mask = CLOCKSOURCE_MASK(64),
+ .shift = 22,
+ .mult = 0, /* To be filled in */
+ .read = rtc_read,
+};
+
+static cycle_t timebase_read(void);
+static struct clocksource clocksource_timebase = {
+ .name = "timebase",
+ .rating = 400,
+ .flags = CLOCK_SOURCE_IS_CONTINUOUS,
+ .mask = CLOCKSOURCE_MASK(64),
+ .shift = 22,
+ .mult = 0, /* To be filled in */
+ .read = timebase_read,
+};
+
+#define DECREMENTER_MAX 0x7fffffff
+
+static int decrementer_set_next_event(unsigned long evt,
+ struct clock_event_device *dev);
+static void decrementer_set_mode(enum clock_event_mode mode,
+ struct clock_event_device *dev);
+
+static struct clock_event_device decrementer_clockevent = {
+ .name = "decrementer",
+ .rating = 200,
+ .shift = 16,
+ .mult = 0, /* To be filled in */
+ .irq = 0,
+ .set_next_event = decrementer_set_next_event,
+ .set_mode = decrementer_set_mode,
+ .features = CLOCK_EVT_FEAT_ONESHOT,
+};
+
+static DEFINE_PER_CPU(struct clock_event_device, decrementers);
+void init_decrementer_clockevent(void);
+static DEFINE_PER_CPU(u64, decrementer_next_tb);
+
#ifdef CONFIG_PPC_ISERIES
static unsigned long __initdata iSeries_recal_titan;
static signed long __initdata iSeries_recal_tb;
-#endif
-/* The decrementer counts down by 128 every 128ns on a 601. */
-#define DECREMENTER_COUNT_601 (1000000000 / HZ)
+/* Forward declaration is only needed for iSereis compiles */
+void __init clocksource_init(void);
+#endif
#define XSEC_PER_SEC (1024*1024)
@@ -349,98 +393,6 @@ void udelay(unsigned long usecs)
}
EXPORT_SYMBOL(udelay);
-static __inline__ void timer_check_rtc(void)
-{
- /*
- * update the rtc when needed, this should be performed on the
- * right fraction of a second. Half or full second ?
- * Full second works on mk48t59 clocks, others need testing.
- * Note that this update is basically only used through
- * the adjtimex system calls. Setting the HW clock in
- * any other way is a /dev/rtc and userland business.
- * This is still wrong by -0.5/+1.5 jiffies because of the
- * timer interrupt resolution and possible delay, but here we
- * hit a quantization limit which can only be solved by higher
- * resolution timers and decoupling time management from timer
- * interrupts. This is also wrong on the clocks
- * which require being written at the half second boundary.
- * We should have an rtc call that only sets the minutes and
- * seconds like on Intel to avoid problems with non UTC clocks.
- */
- if (ppc_md.set_rtc_time && ntp_synced() &&
- xtime.tv_sec - last_rtc_update >= 659 &&
- abs((xtime.tv_nsec/1000) - (1000000-1000000/HZ)) < 500000/HZ) {
- struct rtc_time tm;
- to_tm(xtime.tv_sec + 1 + timezone_offset, &tm);
- tm.tm_year -= 1900;
- tm.tm_mon -= 1;
- if (ppc_md.set_rtc_time(&tm) == 0)
- last_rtc_update = xtime.tv_sec + 1;
- else
- /* Try again one minute later */
- last_rtc_update += 60;
- }
-}
-
-/*
- * This version of gettimeofday has microsecond resolution.
- */
-static inline void __do_gettimeofday(struct timeval *tv)
-{
- unsigned long sec, usec;
- u64 tb_ticks, xsec;
- struct gettimeofday_vars *temp_varp;
- u64 temp_tb_to_xs, temp_stamp_xsec;
-
- /*
- * These calculations are faster (gets rid of divides)
- * if done in units of 1/2^20 rather than microseconds.
- * The conversion to microseconds at the end is done
- * without a divide (and in fact, without a multiply)
- */
- temp_varp = do_gtod.varp;
-
- /* Sampling the time base must be done after loading
- * do_gtod.varp in order to avoid racing with update_gtod.
- */
- data_barrier(temp_varp);
- tb_ticks = get_tb() - temp_varp->tb_orig_stamp;
- temp_tb_to_xs = temp_varp->tb_to_xs;
- temp_stamp_xsec = temp_varp->stamp_xsec;
- xsec = temp_stamp_xsec + mulhdu(tb_ticks, temp_tb_to_xs);
- sec = xsec / XSEC_PER_SEC;
- usec = (unsigned long)xsec & (XSEC_PER_SEC - 1);
- usec = SCALE_XSEC(usec, 1000000);
-
- tv->tv_sec = sec;
- tv->tv_usec = usec;
-}
-
-void do_gettimeofday(struct timeval *tv)
-{
- if (__USE_RTC()) {
- /* do this the old way */
- unsigned long flags, seq;
- unsigned int sec, nsec, usec;
-
- do {
- seq = read_seqbegin_irqsave(&xtime_lock, flags);
- sec = xtime.tv_sec;
- nsec = xtime.tv_nsec + tb_ticks_since(tb_last_jiffy);
- } while (read_seqretry_irqrestore(&xtime_lock, seq, flags));
- usec = nsec / 1000;
- while (usec >= 1000000) {
- usec -= 1000000;
- ++sec;
- }
- tv->tv_sec = sec;
- tv->tv_usec = usec;
- return;
- }
- __do_gettimeofday(tv);
-}
-
-EXPORT_SYMBOL(do_gettimeofday);
/*
* There are two copies of tb_to_xs and stamp_xsec so that no
@@ -486,56 +438,6 @@ static inline void update_gtod(u64 new_tb_stamp, u64 new_stamp_xsec,
++(vdso_data->tb_update_count);
}
-/*
- * When the timebase - tb_orig_stamp gets too big, we do a manipulation
- * between tb_orig_stamp and stamp_xsec. The goal here is to keep the
- * difference tb - tb_orig_stamp small enough to always fit inside a
- * 32 bits number. This is a requirement of our fast 32 bits userland
- * implementation in the vdso. If we "miss" a call to this function
- * (interrupt latency, CPU locked in a spinlock, ...) and we end up
- * with a too big difference, then the vdso will fallback to calling
- * the syscall
- */
-static __inline__ void timer_recalc_offset(u64 cur_tb)
-{
- unsigned long offset;
- u64 new_stamp_xsec;
- u64 tlen, t2x;
- u64 tb, xsec_old, xsec_new;
- struct gettimeofday_vars *varp;
-
- if (__USE_RTC())
- return;
- tlen = current_tick_length();
- offset = cur_tb - do_gtod.varp->tb_orig_stamp;
- if (tlen == last_tick_len && offset < 0x80000000u)
- return;
- if (tlen != last_tick_len) {
- t2x = mulhdu(tlen << TICKLEN_SHIFT, ticklen_to_xs);
- last_tick_len = tlen;
- } else
- t2x = do_gtod.varp->tb_to_xs;
- new_stamp_xsec = (u64) xtime.tv_nsec * XSEC_PER_SEC;
- do_div(new_stamp_xsec, 1000000000);
- new_stamp_xsec += (u64) xtime.tv_sec * XSEC_PER_SEC;
-
- ++vdso_data->tb_update_count;
- smp_mb();
-
- /*
- * Make sure time doesn't go backwards for userspace gettimeofday.
- */
- tb = get_tb();
- varp = do_gtod.varp;
- xsec_old = mulhdu(tb - varp->tb_orig_stamp, varp->tb_to_xs)
- + varp->stamp_xsec;
- xsec_new = mulhdu(tb - cur_tb, t2x) + new_stamp_xsec;
- if (xsec_new < xsec_old)
- new_stamp_xsec += xsec_old - xsec_new;
-
- update_gtod(cur_tb, new_stamp_xsec, t2x);
-}
-
#ifdef CONFIG_SMP
unsigned long profile_pc(struct pt_regs *regs)
{
@@ -607,6 +509,8 @@ static int __init iSeries_tb_recal(void)
iSeries_recal_titan = titan;
iSeries_recal_tb = tb;
+ /* Called here as now we know accurate values for the timebase */
+ clocksource_init();
return 0;
}
late_initcall(iSeries_tb_recal);
@@ -636,20 +540,30 @@ void __init iSeries_time_init_early(void)
void timer_interrupt(struct pt_regs * regs)
{
struct pt_regs *old_regs;
- int next_dec;
int cpu = smp_processor_id();
- unsigned long ticks;
- u64 tb_next_jiffy;
+ struct clock_event_device *evt = &per_cpu(decrementers, cpu);
+ u64 now;
+
+ /* Ensure a positive value is written to the decrementer, or else
+ * some CPUs will continuue to take decrementer exceptions */
+ set_dec(DECREMENTER_MAX);
#ifdef CONFIG_PPC32
if (atomic_read(&ppc_n_lost_interrupts) != 0)
do_IRQ(regs);
#endif
+ now = get_tb_or_rtc();
+ if (now < per_cpu(decrementer_next_tb, cpu)) {
+ /* not time for this event yet */
+ now = per_cpu(decrementer_next_tb, cpu) - now;
+ if (now <= DECREMENTER_MAX)
+ set_dec((unsigned int)now - 1);
+ return;
+ }
old_regs = set_irq_regs(regs);
irq_enter();
- profile_tick(CPU_PROFILING);
calculate_steal_time();
#ifdef CONFIG_PPC_ISERIES
@@ -657,46 +571,20 @@ void timer_interrupt(struct pt_regs * regs)
get_lppaca()->int_dword.fields.decr_int = 0;
#endif
- while ((ticks = tb_ticks_since(per_cpu(last_jiffy, cpu)))
- >= tb_ticks_per_jiffy) {
- /* Update last_jiffy */
- per_cpu(last_jiffy, cpu) += tb_ticks_per_jiffy;
- /* Handle RTCL overflow on 601 */
- if (__USE_RTC() && per_cpu(last_jiffy, cpu) >= 1000000000)
- per_cpu(last_jiffy, cpu) -= 1000000000;
-
- /*
- * We cannot disable the decrementer, so in the period
- * between this cpu's being marked offline in cpu_online_map
- * and calling stop-self, it is taking timer interrupts.
- * Avoid calling into the scheduler rebalancing code if this
- * is the case.
- */
- if (!cpu_is_offline(cpu))
- account_process_time(regs);
-
- /*
- * No need to check whether cpu is offline here; boot_cpuid
- * should have been fixed up by now.
- */
- if (cpu != boot_cpuid)
- continue;
+ /*
+ * We cannot disable the decrementer, so in the period
+ * between this cpu's being marked offline in cpu_online_map
+ * and calling stop-self, it is taking timer interrupts.
+ * Avoid calling into the scheduler rebalancing code if this
+ * is the case.
+ */
+ if (!cpu_is_offline(cpu))
+ account_process_time(regs);
- write_seqlock(&xtime_lock);
- tb_next_jiffy = tb_last_jiffy + tb_ticks_per_jiffy;
- if (__USE_RTC() && tb_next_jiffy >= 1000000000)
- tb_next_jiffy -= 1000000000;
- if (per_cpu(last_jiffy, cpu) >= tb_next_jiffy) {
- tb_last_jiffy = tb_next_jiffy;
- do_timer(1);
- timer_recalc_offset(tb_last_jiffy);
- timer_check_rtc();
- }
- write_sequnlock(&xtime_lock);
- }
-
- next_dec = tb_ticks_per_jiffy - ticks;
- set_dec(next_dec);
+ if (evt->event_handler)
+ evt->event_handler(evt);
+ else
+ evt->set_next_event(DECREMENTER_MAX, evt);
#ifdef CONFIG_PPC_ISERIES
if (firmware_has_feature(FW_FEATURE_ISERIES) && hvlpevent_is_pending())
@@ -762,71 +650,6 @@ unsigned long long sched_clock(void)
return mulhdu(get_tb() - boot_tb, tb_to_ns_scale) << tb_to_ns_shift;
}
-int do_settimeofday(struct timespec *tv)
-{
- time_t wtm_sec, new_sec = tv->tv_sec;
- long wtm_nsec, new_nsec = tv->tv_nsec;
- unsigned long flags;
- u64 new_xsec;
- unsigned long tb_delta;
-
- if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC)
- return -EINVAL;
-
- write_seqlock_irqsave(&xtime_lock, flags);
-
- /*
- * Updating the RTC is not the job of this code. If the time is
- * stepped under NTP, the RTC will be updated after STA_UNSYNC
- * is cleared. Tools like clock/hwclock either copy the RTC
- * to the system time, in which case there is no point in writing
- * to the RTC again, or write to the RTC but then they don't call
- * settimeofday to perform this operation.
- */
-
- /* Make userspace gettimeofday spin until we're done. */
- ++vdso_data->tb_update_count;
- smp_mb();
-
- /*
- * Subtract off the number of nanoseconds since the
- * beginning of the last tick.
- */
- tb_delta = tb_ticks_since(tb_last_jiffy);
- tb_delta = mulhdu(tb_delta, do_gtod.varp->tb_to_xs); /* in xsec */
- new_nsec -= SCALE_XSEC(tb_delta, 1000000000);
-
- wtm_sec = wall_to_monotonic.tv_sec + (xtime.tv_sec - new_sec);
- wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - new_nsec);
-
- set_normalized_timespec(&xtime, new_sec, new_nsec);
- set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec);
-
- /* In case of a large backwards jump in time with NTP, we want the
- * clock to be updated as soon as the PLL is again in lock.
- */
- last_rtc_update = new_sec - 658;
-
- ntp_clear();
-
- new_xsec = xtime.tv_nsec;
- if (new_xsec != 0) {
- new_xsec *= XSEC_PER_SEC;
- do_div(new_xsec, NSEC_PER_SEC);
- }
- new_xsec += (u64)xtime.tv_sec * XSEC_PER_SEC;
- update_gtod(tb_last_jiffy, new_xsec, do_gtod.varp->tb_to_xs);
-
- vdso_data->tz_minuteswest = sys_tz.tz_minuteswest;
- vdso_data->tz_dsttime = sys_tz.tz_dsttime;
-
- write_sequnlock_irqrestore(&xtime_lock, flags);
- clock_was_set();
- return 0;
-}
-
-EXPORT_SYMBOL(do_settimeofday);
-
static int __init get_freq(char *name, int cells, unsigned long *val)
{
struct device_node *cpu;
@@ -869,7 +692,7 @@ void __init generic_calibrate_decr(void)
"(not found)\n");
}
-#ifdef CONFIG_BOOKE
+#if defined(CONFIG_BOOKE) || defined(CONFIG_40x)
/* Set the time base to zero */
mtspr(SPRN_TBWL, 0);
mtspr(SPRN_TBWU, 0);
@@ -882,12 +705,35 @@ void __init generic_calibrate_decr(void)
#endif
}
-unsigned long get_boot_time(void)
+int update_persistent_clock(struct timespec now)
{
struct rtc_time tm;
- if (ppc_md.get_boot_time)
- return ppc_md.get_boot_time();
+ if (!ppc_md.set_rtc_time)
+ return 0;
+
+ to_tm(now.tv_sec + 1 + timezone_offset, &tm);
+ tm.tm_year -= 1900;
+ tm.tm_mon -= 1;
+
+ return ppc_md.set_rtc_time(&tm);
+}
+
+unsigned long read_persistent_clock(void)
+{
+ struct rtc_time tm;
+ static int first = 1;
+
+ /* XXX this is a litle fragile but will work okay in the short term */
+ if (first) {
+ first = 0;
+ if (ppc_md.time_init)
+ timezone_offset = ppc_md.time_init();
+
+ /* get_boot_time() isn't guaranteed to be safe to call late */
+ if (ppc_md.get_boot_time)
+ return ppc_md.get_boot_time() -timezone_offset;
+ }
if (!ppc_md.get_rtc_time)
return 0;
ppc_md.get_rtc_time(&tm);
@@ -895,18 +741,128 @@ unsigned long get_boot_time(void)
tm.tm_hour, tm.tm_min, tm.tm_sec);
}
+/* clocksource code */
+static cycle_t rtc_read(void)
+{
+ return (cycle_t)get_rtc();
+}
+
+static cycle_t timebase_read(void)
+{
+ return (cycle_t)get_tb();
+}
+
+void update_vsyscall(struct timespec *wall_time, struct clocksource *clock)
+{
+ u64 t2x, stamp_xsec;
+
+ if (clock != &clocksource_timebase)
+ return;
+
+ /* Make userspace gettimeofday spin until we're done. */
+ ++vdso_data->tb_update_count;
+ smp_mb();
+
+ /* XXX this assumes clock->shift == 22 */
+ /* 4611686018 ~= 2^(20+64-22) / 1e9 */
+ t2x = (u64) clock->mult * 4611686018ULL;
+ stamp_xsec = (u64) xtime.tv_nsec * XSEC_PER_SEC;
+ do_div(stamp_xsec, 1000000000);
+ stamp_xsec += (u64) xtime.tv_sec * XSEC_PER_SEC;
+ update_gtod(clock->cycle_last, stamp_xsec, t2x);
+}
+
+void update_vsyscall_tz(void)
+{
+ /* Make userspace gettimeofday spin until we're done. */
+ ++vdso_data->tb_update_count;
+ smp_mb();
+ vdso_data->tz_minuteswest = sys_tz.tz_minuteswest;
+ vdso_data->tz_dsttime = sys_tz.tz_dsttime;
+ smp_mb();
+ ++vdso_data->tb_update_count;
+}
+
+void __init clocksource_init(void)
+{
+ struct clocksource *clock;
+
+ if (__USE_RTC())
+ clock = &clocksource_rtc;
+ else
+ clock = &clocksource_timebase;
+
+ clock->mult = clocksource_hz2mult(tb_ticks_per_sec, clock->shift);
+
+ if (clocksource_register(clock)) {
+ printk(KERN_ERR "clocksource: %s is already registered\n",
+ clock->name);
+ return;
+ }
+
+ printk(KERN_INFO "clocksource: %s mult[%x] shift[%d] registered\n",
+ clock->name, clock->mult, clock->shift);
+}
+
+static int decrementer_set_next_event(unsigned long evt,
+ struct clock_event_device *dev)
+{
+ __get_cpu_var(decrementer_next_tb) = get_tb_or_rtc() + evt;
+ /* The decrementer interrupts on the 0 -> -1 transition */
+ if (evt)
+ --evt;
+ set_dec(evt);
+ return 0;
+}
+
+static void decrementer_set_mode(enum clock_event_mode mode,
+ struct clock_event_device *dev)
+{
+ if (mode != CLOCK_EVT_MODE_ONESHOT)
+ decrementer_set_next_event(DECREMENTER_MAX, dev);
+}
+
+static void register_decrementer_clockevent(int cpu)
+{
+ struct clock_event_device *dec = &per_cpu(decrementers, cpu);
+
+ *dec = decrementer_clockevent;
+ dec->cpumask = cpumask_of_cpu(cpu);
+
+ printk(KERN_ERR "clockevent: %s mult[%lx] shift[%d] cpu[%d]\n",
+ dec->name, dec->mult, dec->shift, cpu);
+
+ clockevents_register_device(dec);
+}
+
+void init_decrementer_clockevent(void)
+{
+ int cpu = smp_processor_id();
+
+ decrementer_clockevent.mult = div_sc(ppc_tb_freq, NSEC_PER_SEC,
+ decrementer_clockevent.shift);
+ decrementer_clockevent.max_delta_ns =
+ clockevent_delta2ns(DECREMENTER_MAX, &decrementer_clockevent);
+ decrementer_clockevent.min_delta_ns = 1000;
+
+ register_decrementer_clockevent(cpu);
+}
+
+void secondary_cpu_time_init(void)
+{
+ /* FIME: Should make unrelatred change to move snapshot_timebase
+ * call here ! */
+ register_decrementer_clockevent(smp_processor_id());
+}
+
/* This function is only called on the boot processor */
void __init time_init(void)
{
unsigned long flags;
- unsigned long tm = 0;
struct div_result res;
u64 scale, x;
unsigned shift;
- if (ppc_md.time_init != NULL)
- timezone_offset = ppc_md.time_init();
-
if (__USE_RTC()) {
/* 601 processor: dec counts down by 128 every 128ns */
ppc_tb_freq = 1000000000;
@@ -981,19 +937,14 @@ void __init time_init(void)
/* Save the current timebase to pretty up CONFIG_PRINTK_TIME */
boot_tb = get_tb_or_rtc();
- tm = get_boot_time();
-
write_seqlock_irqsave(&xtime_lock, flags);
/* If platform provided a timezone (pmac), we correct the time */
if (timezone_offset) {
sys_tz.tz_minuteswest = -timezone_offset / 60;
sys_tz.tz_dsttime = 0;
- tm -= timezone_offset;
}
- xtime.tv_sec = tm;
- xtime.tv_nsec = 0;
do_gtod.varp = &do_gtod.vars[0];
do_gtod.var_idx = 0;
do_gtod.varp->tb_orig_stamp = tb_last_jiffy;
@@ -1011,13 +962,13 @@ void __init time_init(void)
time_freq = 0;
- last_rtc_update = xtime.tv_sec;
- set_normalized_timespec(&wall_to_monotonic,
- -xtime.tv_sec, -xtime.tv_nsec);
write_sequnlock_irqrestore(&xtime_lock, flags);
- /* Not exact, but the timer interrupt takes care of this */
- set_dec(tb_ticks_per_jiffy);
+ /* Register the clocksource, if we're not running on iSeries */
+ if (!firmware_has_feature(FW_FEATURE_ISERIES))
+ clocksource_init();
+
+ init_decrementer_clockevent();
}