From ad596171ed635c51a9eef829187af100cbf8dcf7 Mon Sep 17 00:00:00 2001 From: john stultz Date: Mon, 26 Jun 2006 00:25:06 -0700 Subject: [PATCH] Time: Use clocksource infrastructure for update_wall_time Modify the update_wall_time function so it increments time using the clocksource abstraction instead of jiffies. Since the only clocksource driver currently provided is the jiffies clocksource, this should result in no functional change. Additionally, a timekeeping_init and timekeeping_resume function has been added to initialize and maintain some of the new timekeping state. [hirofumi@mail.parknet.co.jp: fixlet] Signed-off-by: John Stultz Signed-off-by: OGAWA Hirofumi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/timer.c | 93 ++++++++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 80 insertions(+), 13 deletions(-) (limited to 'kernel/timer.c') diff --git a/kernel/timer.c b/kernel/timer.c index eb97371b87d..524c7f63836 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -792,24 +792,93 @@ u64 current_tick_length(void) return ((u64) delta_nsec << (SHIFT_SCALE - 10)) + time_adj; } +/* XXX - all of this timekeeping code should be later moved to time.c */ +#include +static struct clocksource *clock; /* pointer to current clocksource */ +static cycle_t last_clock_cycle; /* cycle value at last update_wall_time */ /* - * Using a loop looks inefficient, but "ticks" is - * usually just one (we shouldn't be losing ticks, - * we're doing this this way mainly for interrupt - * latency reasons, not because we think we'll - * have lots of lost timer ticks + * timekeeping_init - Initializes the clocksource and common timekeeping values */ -static void update_wall_time(unsigned long ticks) +void __init timekeeping_init(void) { - do { - ticks--; + unsigned long flags; + + write_seqlock_irqsave(&xtime_lock, flags); + clock = get_next_clocksource(); + calculate_clocksource_interval(clock, tick_nsec); + last_clock_cycle = read_clocksource(clock); + ntp_clear(); + write_sequnlock_irqrestore(&xtime_lock, flags); +} + + +/* + * timekeeping_resume - Resumes the generic timekeeping subsystem. + * @dev: unused + * + * This is for the generic clocksource timekeeping. + * xtime/wall_to_monotonic/jiffies/wall_jiffies/etc are + * still managed by arch specific suspend/resume code. + */ +static int timekeeping_resume(struct sys_device *dev) +{ + unsigned long flags; + + write_seqlock_irqsave(&xtime_lock, flags); + /* restart the last cycle value */ + last_clock_cycle = read_clocksource(clock); + write_sequnlock_irqrestore(&xtime_lock, flags); + return 0; +} + +/* sysfs resume/suspend bits for timekeeping */ +static struct sysdev_class timekeeping_sysclass = { + .resume = timekeeping_resume, + set_kset_name("timekeeping"), +}; + +static struct sys_device device_timer = { + .id = 0, + .cls = &timekeeping_sysclass, +}; + +static int __init timekeeping_init_device(void) +{ + int error = sysdev_class_register(&timekeeping_sysclass); + if (!error) + error = sysdev_register(&device_timer); + return error; +} + +device_initcall(timekeeping_init_device); + +/* + * update_wall_time - Uses the current clocksource to increment the wall time + * + * Called from the timer interrupt, must hold a write on xtime_lock. + */ +static void update_wall_time(void) +{ + cycle_t now, offset; + + now = read_clocksource(clock); + offset = (now - last_clock_cycle)&clock->mask; + + /* normally this loop will run just once, however in the + * case of lost or late ticks, it will accumulate correctly. + */ + while (offset > clock->interval_cycles) { + /* accumulate one interval */ + last_clock_cycle += clock->interval_cycles; + offset -= clock->interval_cycles; + update_wall_time_one_tick(); if (xtime.tv_nsec >= 1000000000) { xtime.tv_nsec -= 1000000000; xtime.tv_sec++; second_overflow(); } - } while (ticks); + } } /* @@ -915,10 +984,8 @@ static inline void update_times(void) unsigned long ticks; ticks = jiffies - wall_jiffies; - if (ticks) { - wall_jiffies += ticks; - update_wall_time(ticks); - } + wall_jiffies += ticks; + update_wall_time(); calc_load(ticks); } -- cgit v1.2.3-70-g09d2 From 260a42309b31cbc54eb4b6b85649e412bcad053f Mon Sep 17 00:00:00 2001 From: john stultz Date: Mon, 26 Jun 2006 00:25:07 -0700 Subject: [PATCH] Time: Let user request precision from current_tick_length() Change the current_tick_length() function so it takes an argument which specifies how much precision to return in shifted nanoseconds. This provides a simple way to convert between NTPs internal nanoseconds shifted by (SHIFT_SCALE - 10) to other shifted nanosecond units that are used by the clocksource abstraction. Signed-off-by: John Stultz Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/powerpc/kernel/time.c | 2 +- include/linux/timex.h | 2 +- kernel/timer.c | 21 +++++++++++++++++---- 3 files changed, 19 insertions(+), 6 deletions(-) (limited to 'kernel/timer.c') diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index d20907561f4..742f07a6316 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -534,7 +534,7 @@ static __inline__ void timer_recalc_offset(u64 cur_tb) if (__USE_RTC()) return; - tlen = current_tick_length(); + tlen = current_tick_length(SHIFT_SCALE - 10); offset = cur_tb - do_gtod.varp->tb_orig_stamp; if (tlen == last_tick_len && offset < 0x80000000u) return; diff --git a/include/linux/timex.h b/include/linux/timex.h index 34d3ccff7bb..1ba3071fcb8 100644 --- a/include/linux/timex.h +++ b/include/linux/timex.h @@ -304,7 +304,7 @@ time_interpolator_reset(void) #endif /* !CONFIG_TIME_INTERPOLATION */ /* Returns how long ticks are at present, in ns / 2^(SHIFT_SCALE-10). */ -extern u64 current_tick_length(void); +extern u64 current_tick_length(long); extern int do_adjtimex(struct timex *); diff --git a/kernel/timer.c b/kernel/timer.c index 524c7f63836..623f9ea198d 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -780,16 +780,29 @@ static void update_wall_time_one_tick(void) * Return how long ticks are at the moment, that is, how much time * update_wall_time_one_tick will add to xtime next time we call it * (assuming no calls to do_adjtimex in the meantime). - * The return value is in fixed-point nanoseconds with SHIFT_SCALE-10 - * bits to the right of the binary point. + * The return value is in fixed-point nanoseconds shifted by the + * specified number of bits to the right of the binary point. * This function has no side-effects. */ -u64 current_tick_length(void) +u64 current_tick_length(long shift) { long delta_nsec; + u64 ret; + /* calculate the finest interval NTP will allow. + * ie: nanosecond value shifted by (SHIFT_SCALE - 10) + */ delta_nsec = tick_nsec + adjtime_adjustment() * 1000; - return ((u64) delta_nsec << (SHIFT_SCALE - 10)) + time_adj; + ret = ((u64) delta_nsec << (SHIFT_SCALE - 10)) + time_adj; + + /* convert from (SHIFT_SCALE - 10) to specified shift scale: */ + shift = shift - (SHIFT_SCALE - 10); + if (shift < 0) + ret >>= -shift; + else + ret <<= shift; + + return ret; } /* XXX - all of this timekeeping code should be later moved to time.c */ -- cgit v1.2.3-70-g09d2 From 5eb6d20533d14a432df714520939a6181e28f099 Mon Sep 17 00:00:00 2001 From: john stultz Date: Mon, 26 Jun 2006 00:25:07 -0700 Subject: [PATCH] Time: Use clocksource abstraction for NTP adjustments Instead of incrementing xtime by tick_nsec + ntp adjustments, use the clocksource abstraction to increment and scale time. Using the clocksource abstraction allows other clocksources to be used consistently in the face of late or lost ticks, while preserving the existing behavior via the jiffies clocksource. This removes the need to keep time_phase adjustments as we just use the current_tick_length() function as the NTP interface and accumulate time using shifted nanoseconds. The basics of this design was by Roman Zippel, however it is my own interpretation and implementation, so the credit should go to him and the blame to me. Signed-off-by: John Stultz Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/clocksource.h | 97 +++++++++++++++++++++++++++++++++++++++++++++ kernel/timer.c | 47 +++++++++++++--------- 2 files changed, 125 insertions(+), 19 deletions(-) (limited to 'kernel/timer.c') diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index c4187cda0ee..c4739c4e303 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -173,6 +173,103 @@ static inline void calculate_clocksource_interval(struct clocksource *c, c->interval_snsecs = (u64)c->interval_cycles * c->mult; } + +/** + * error_aproximation - calculates an error adjustment for a given error + * + * @error: Error value (unsigned) + * @unit: Adjustment unit + * + * For a given error value, this function takes the adjustment unit + * and uses binary approximation to return a power of two adjustment value. + * + * This function is only for use by the the make_ntp_adj() function + * and you must hold a write on the xtime_lock when calling. + */ +static inline int error_aproximation(u64 error, u64 unit) +{ + static int saved_adj = 0; + u64 adjusted_unit = unit << saved_adj; + + if (error > (adjusted_unit * 2)) { + /* large error, so increment the adjustment factor */ + saved_adj++; + } else if (error > adjusted_unit) { + /* just right, don't touch it */ + } else if (saved_adj) { + /* small error, so drop the adjustment factor */ + saved_adj--; + return 0; + } + + return saved_adj; +} + + +/** + * make_ntp_adj - Adjusts the specified clocksource for a given error + * + * @clock: Pointer to clock to be adjusted + * @cycles_delta: Current unacounted cycle delta + * @error: Pointer to current error value + * + * Returns clock shifted nanosecond adjustment to be applied against + * the accumulated time value (ie: xtime). + * + * If the error value is large enough, this function calulates the + * (power of two) adjustment value, and adjusts the clock's mult and + * interval_snsecs values accordingly. + * + * However, since there may be some unaccumulated cycles, to avoid + * time inconsistencies we must adjust the accumulation value + * accordingly. + * + * This is not very intuitive, so the following proof should help: + * The basic timeofday algorithm: base + cycle * mult + * Thus: + * new_base + cycle * new_mult = old_base + cycle * old_mult + * new_base = old_base + cycle * old_mult - cycle * new_mult + * new_base = old_base + cycle * (old_mult - new_mult) + * new_base - old_base = cycle * (old_mult - new_mult) + * base_delta = cycle * (old_mult - new_mult) + * base_delta = cycle * (mult_delta) + * + * Where mult_delta is the adjustment value made to mult + * + */ +static inline s64 make_ntp_adj(struct clocksource *clock, + cycles_t cycles_delta, s64* error) +{ + s64 ret = 0; + if (*error > ((s64)clock->interval_cycles+1)/2) { + /* calculate adjustment value */ + int adjustment = error_aproximation(*error, + clock->interval_cycles); + /* adjust clock */ + clock->mult += 1 << adjustment; + clock->interval_snsecs += clock->interval_cycles << adjustment; + + /* adjust the base and error for the adjustment */ + ret = -(cycles_delta << adjustment); + *error -= clock->interval_cycles << adjustment; + /* XXX adj error for cycle_delta offset? */ + } else if ((-(*error)) > ((s64)clock->interval_cycles+1)/2) { + /* calculate adjustment value */ + int adjustment = error_aproximation(-(*error), + clock->interval_cycles); + /* adjust clock */ + clock->mult -= 1 << adjustment; + clock->interval_snsecs -= clock->interval_cycles << adjustment; + + /* adjust the base and error for the adjustment */ + ret = cycles_delta << adjustment; + *error += clock->interval_cycles << adjustment; + /* XXX adj error for cycle_delta offset? */ + } + return ret; +} + + /* used to install a new clocksource */ int register_clocksource(struct clocksource*); void reselect_clocksource(void); diff --git a/kernel/timer.c b/kernel/timer.c index 623f9ea198d..6811436a031 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -597,7 +597,6 @@ long time_tolerance = MAXFREQ; /* frequency tolerance (ppm) */ long time_precision = 1; /* clock precision (us) */ long time_maxerror = NTP_PHASE_LIMIT; /* maximum error (us) */ long time_esterror = NTP_PHASE_LIMIT; /* estimated error (us) */ -static long time_phase; /* phase offset (scaled us) */ long time_freq = (((NSEC_PER_SEC + HZ/2) % HZ - HZ/2) << SHIFT_USEC) / NSEC_PER_USEC; /* frequency offset (scaled ppm)*/ static long time_adj; /* tick adjust (scaled 1 / HZ) */ @@ -747,27 +746,14 @@ static long adjtime_adjustment(void) } /* in the NTP reference this is called "hardclock()" */ -static void update_wall_time_one_tick(void) +static void update_ntp_one_tick(void) { - long time_adjust_step, delta_nsec; + long time_adjust_step; time_adjust_step = adjtime_adjustment(); if (time_adjust_step) /* Reduce by this step the amount of time left */ time_adjust -= time_adjust_step; - delta_nsec = tick_nsec + time_adjust_step * 1000; - /* - * Advance the phase, once it gets to one microsecond, then - * advance the tick more. - */ - time_phase += time_adj; - if ((time_phase >= FINENSEC) || (time_phase <= -FINENSEC)) { - long ltemp = shift_right(time_phase, (SHIFT_SCALE - 10)); - time_phase -= ltemp << (SHIFT_SCALE - 10); - delta_nsec += ltemp; - } - xtime.tv_nsec += delta_nsec; - time_interpolator_update(delta_nsec); /* Changes by adjtime() do not take effect till next tick. */ if (time_next_adjust != 0) { @@ -872,8 +858,13 @@ device_initcall(timekeeping_init_device); */ static void update_wall_time(void) { + static s64 remainder_snsecs, error; + s64 snsecs_per_sec; cycle_t now, offset; + snsecs_per_sec = (s64)NSEC_PER_SEC << clock->shift; + remainder_snsecs += (s64)xtime.tv_nsec << clock->shift; + now = read_clocksource(clock); offset = (now - last_clock_cycle)&clock->mask; @@ -881,17 +872,35 @@ static void update_wall_time(void) * case of lost or late ticks, it will accumulate correctly. */ while (offset > clock->interval_cycles) { + /* get the ntp interval in clock shifted nanoseconds */ + s64 ntp_snsecs = current_tick_length(clock->shift); + /* accumulate one interval */ + remainder_snsecs += clock->interval_snsecs; last_clock_cycle += clock->interval_cycles; offset -= clock->interval_cycles; - update_wall_time_one_tick(); - if (xtime.tv_nsec >= 1000000000) { - xtime.tv_nsec -= 1000000000; + /* interpolator bits */ + time_interpolator_update(clock->interval_snsecs + >> clock->shift); + /* increment the NTP state machine */ + update_ntp_one_tick(); + + /* accumulate error between NTP and clock interval */ + error += (ntp_snsecs - (s64)clock->interval_snsecs); + + /* correct the clock when NTP error is too big */ + remainder_snsecs += make_ntp_adj(clock, offset, &error); + + if (remainder_snsecs >= snsecs_per_sec) { + remainder_snsecs -= snsecs_per_sec; xtime.tv_sec++; second_overflow(); } } + /* store full nanoseconds into xtime */ + xtime.tv_nsec = remainder_snsecs >> clock->shift; + remainder_snsecs -= (s64)xtime.tv_nsec << clock->shift; } /* -- cgit v1.2.3-70-g09d2 From cf3c769b4b0dd1146da84d5cf045dcfe53bd0f13 Mon Sep 17 00:00:00 2001 From: john stultz Date: Mon, 26 Jun 2006 00:25:08 -0700 Subject: [PATCH] Time: Introduce arch generic time accessors Introduces clocksource switching code and the arch generic time accessor functions that use the clocksource infrastructure. Signed-off-by: John Stultz Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/time.h | 15 +++++ kernel/time.c | 2 + kernel/timer.c | 170 +++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 187 insertions(+) (limited to 'kernel/timer.c') diff --git a/include/linux/time.h b/include/linux/time.h index 88d3b812841..65dd85b2105 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -102,6 +102,7 @@ extern int do_getitimer(int which, struct itimerval *value); extern void getnstimeofday(struct timespec *tv); extern struct timespec timespec_trunc(struct timespec t, unsigned gran); +extern int timekeeping_is_continuous(void); /** * timespec_to_ns - Convert timespec to nanoseconds @@ -144,6 +145,20 @@ extern struct timespec ns_to_timespec(const s64 nsec); */ extern struct timeval ns_to_timeval(const s64 nsec); +/** + * timespec_add_ns - Adds nanoseconds to a timespec + * @a: pointer to timespec to be incremented + * @ns: unsigned nanoseconds value to be added + */ +static inline void timespec_add_ns(struct timespec *a, u64 ns) +{ + ns += a->tv_nsec; + while(unlikely(ns >= NSEC_PER_SEC)) { + ns -= NSEC_PER_SEC; + a->tv_sec++; + } + a->tv_nsec = ns; +} #endif /* __KERNEL__ */ #define NFDBITS __NFDBITS diff --git a/kernel/time.c b/kernel/time.c index b00ddc71ced..5bd48974764 100644 --- a/kernel/time.c +++ b/kernel/time.c @@ -523,6 +523,7 @@ EXPORT_SYMBOL(do_gettimeofday); #else +#ifndef CONFIG_GENERIC_TIME /* * Simulate gettimeofday using do_gettimeofday which only allows a timeval * and therefore only yields usec accuracy @@ -537,6 +538,7 @@ void getnstimeofday(struct timespec *tv) } EXPORT_SYMBOL_GPL(getnstimeofday); #endif +#endif /* Converts Gregorian date to seconds since 1970-01-01 00:00:00. * Assumes input in normal date format, i.e. 1980-12-31 23:59:59 diff --git a/kernel/timer.c b/kernel/timer.c index 6811436a031..e5adb9e2e7a 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -795,6 +795,169 @@ u64 current_tick_length(long shift) #include static struct clocksource *clock; /* pointer to current clocksource */ static cycle_t last_clock_cycle; /* cycle value at last update_wall_time */ + +#ifdef CONFIG_GENERIC_TIME +/** + * __get_nsec_offset - Returns nanoseconds since last call to periodic_hook + * + * private function, must hold xtime_lock lock when being + * called. Returns the number of nanoseconds since the + * last call to update_wall_time() (adjusted by NTP scaling) + */ +static inline s64 __get_nsec_offset(void) +{ + cycle_t cycle_now, cycle_delta; + s64 ns_offset; + + /* read clocksource: */ + cycle_now = read_clocksource(clock); + + /* calculate the delta since the last update_wall_time: */ + cycle_delta = (cycle_now - last_clock_cycle) & clock->mask; + + /* convert to nanoseconds: */ + ns_offset = cyc2ns(clock, cycle_delta); + + return ns_offset; +} + +/** + * __get_realtime_clock_ts - Returns the time of day in a timespec + * @ts: pointer to the timespec to be set + * + * Returns the time of day in a timespec. Used by + * do_gettimeofday() and get_realtime_clock_ts(). + */ +static inline void __get_realtime_clock_ts(struct timespec *ts) +{ + unsigned long seq; + s64 nsecs; + + do { + seq = read_seqbegin(&xtime_lock); + + *ts = xtime; + nsecs = __get_nsec_offset(); + + } while (read_seqretry(&xtime_lock, seq)); + + timespec_add_ns(ts, nsecs); +} + +/** + * get_realtime_clock_ts - Returns the time of day in a timespec + * @ts: pointer to the timespec to be set + * + * Returns the time of day in a timespec. + */ +void getnstimeofday(struct timespec *ts) +{ + __get_realtime_clock_ts(ts); +} + +EXPORT_SYMBOL(getnstimeofday); + +/** + * do_gettimeofday - Returns the time of day in a timeval + * @tv: pointer to the timeval to be set + * + * NOTE: Users should be converted to using get_realtime_clock_ts() + */ +void do_gettimeofday(struct timeval *tv) +{ + struct timespec now; + + __get_realtime_clock_ts(&now); + tv->tv_sec = now.tv_sec; + tv->tv_usec = now.tv_nsec/1000; +} + +EXPORT_SYMBOL(do_gettimeofday); +/** + * do_settimeofday - Sets the time of day + * @tv: pointer to the timespec variable containing the new time + * + * Sets the time of day to the new time and update NTP and notify hrtimers + */ +int do_settimeofday(struct timespec *tv) +{ + unsigned long flags; + time_t wtm_sec, sec = tv->tv_sec; + long wtm_nsec, nsec = tv->tv_nsec; + + if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC) + return -EINVAL; + + write_seqlock_irqsave(&xtime_lock, flags); + + nsec -= __get_nsec_offset(); + + wtm_sec = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec); + wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - nsec); + + set_normalized_timespec(&xtime, sec, nsec); + set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec); + + ntp_clear(); + + write_sequnlock_irqrestore(&xtime_lock, flags); + + /* signal hrtimers about time change */ + clock_was_set(); + + return 0; +} + +EXPORT_SYMBOL(do_settimeofday); + +/** + * change_clocksource - Swaps clocksources if a new one is available + * + * Accumulates current time interval and initializes new clocksource + */ +static int change_clocksource(void) +{ + struct clocksource *new; + cycle_t now; + u64 nsec; + new = get_next_clocksource(); + if (clock != new) { + now = read_clocksource(new); + nsec = __get_nsec_offset(); + timespec_add_ns(&xtime, nsec); + + clock = new; + last_clock_cycle = now; + printk(KERN_INFO "Time: %s clocksource has been installed.\n", + clock->name); + return 1; + } else if (clock->update_callback) { + return clock->update_callback(); + } + return 0; +} +#else +#define change_clocksource() (0) +#endif + +/** + * timeofday_is_continuous - check to see if timekeeping is free running + */ +int timekeeping_is_continuous(void) +{ + unsigned long seq; + int ret; + + do { + seq = read_seqbegin(&xtime_lock); + + ret = clock->is_continuous; + + } while (read_seqretry(&xtime_lock, seq)); + + return ret; +} + /* * timekeeping_init - Initializes the clocksource and common timekeeping values */ @@ -901,6 +1064,13 @@ static void update_wall_time(void) /* store full nanoseconds into xtime */ xtime.tv_nsec = remainder_snsecs >> clock->shift; remainder_snsecs -= (s64)xtime.tv_nsec << clock->shift; + + /* check to see if there is a new clocksource to use */ + if (change_clocksource()) { + error = 0; + remainder_snsecs = 0; + calculate_clocksource_interval(clock, tick_nsec); + } } /* -- cgit v1.2.3-70-g09d2 From a275254975a29c51929ee175b92ac471ac2a0043 Mon Sep 17 00:00:00 2001 From: john stultz Date: Mon, 26 Jun 2006 00:25:14 -0700 Subject: [PATCH] time: rename clocksource functions As suggested by Roman Zippel, change clocksource functions to use clocksource_xyz rather then xyz_clocksource to avoid polluting the namespace. Signed-off-by: John Stultz Cc: Roman Zippel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/hpet.c | 2 +- arch/i386/kernel/i8253.c | 2 +- arch/i386/kernel/tsc.c | 4 ++-- drivers/clocksource/acpi_pm.c | 2 +- drivers/clocksource/cyclone.c | 2 +- include/linux/clocksource.h | 14 +++++++------- kernel/time/clocksource.c | 16 ++++++++-------- kernel/time/jiffies.c | 2 +- kernel/timer.c | 20 ++++++++++---------- 9 files changed, 32 insertions(+), 32 deletions(-) (limited to 'kernel/timer.c') diff --git a/arch/i386/kernel/hpet.c b/arch/i386/kernel/hpet.c index 91a5bdd9f60..23e7d2c5d25 100644 --- a/arch/i386/kernel/hpet.c +++ b/arch/i386/kernel/hpet.c @@ -61,7 +61,7 @@ static int __init init_hpet_clocksource(void) do_div(tmp, FSEC_PER_NSEC); clocksource_hpet.mult = (u32)tmp; - return register_clocksource(&clocksource_hpet); + return clocksource_register(&clocksource_hpet); } module_init(init_hpet_clocksource); diff --git a/arch/i386/kernel/i8253.c b/arch/i386/kernel/i8253.c index a276bceade6..5b13739a7ff 100644 --- a/arch/i386/kernel/i8253.c +++ b/arch/i386/kernel/i8253.c @@ -80,6 +80,6 @@ static int __init init_pit_clocksource(void) return 0; clocksource_pit.mult = clocksource_hz2mult(CLOCK_TICK_RATE, 20); - return register_clocksource(&clocksource_pit); + return clocksource_register(&clocksource_pit); } module_init(init_pit_clocksource); diff --git a/arch/i386/kernel/tsc.c b/arch/i386/kernel/tsc.c index 7713f86389a..1c3c927755d 100644 --- a/arch/i386/kernel/tsc.c +++ b/arch/i386/kernel/tsc.c @@ -351,7 +351,7 @@ static int tsc_update_callback(void) /* check to see if we should switch to the safe clocksource: */ if (clocksource_tsc.rating != 50 && check_tsc_unstable()) { clocksource_tsc.rating = 50; - reselect_clocksource(); + clocksource_reselect(); change = 1; } @@ -469,7 +469,7 @@ static int __init init_tsc_clocksource(void) jiffies + msecs_to_jiffies(TSC_FREQ_CHECK_INTERVAL); add_timer(&verify_tsc_freq_timer); - return register_clocksource(&clocksource_tsc); + return clocksource_register(&clocksource_tsc); } return 0; diff --git a/drivers/clocksource/acpi_pm.c b/drivers/clocksource/acpi_pm.c index a0e5cde2fa7..9217be5048d 100644 --- a/drivers/clocksource/acpi_pm.c +++ b/drivers/clocksource/acpi_pm.c @@ -171,7 +171,7 @@ static int __init init_acpi_pm_clocksource(void) return -ENODEV; pm_good: - return register_clocksource(&clocksource_acpi_pm); + return clocksource_register(&clocksource_acpi_pm); } module_init(init_acpi_pm_clocksource); diff --git a/drivers/clocksource/cyclone.c b/drivers/clocksource/cyclone.c index 444eb11b9b4..5906a0af825 100644 --- a/drivers/clocksource/cyclone.c +++ b/drivers/clocksource/cyclone.c @@ -113,7 +113,7 @@ static int __init init_cyclone_clocksource(void) clocksource_cyclone.mult = clocksource_hz2mult(CYCLONE_TIMER_FREQ, clocksource_cyclone.shift); - return register_clocksource(&clocksource_cyclone); + return clocksource_register(&clocksource_cyclone); } module_init(init_cyclone_clocksource); diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index c4739c4e303..5f4a7f72f3e 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -118,12 +118,12 @@ static inline u32 clocksource_hz2mult(u32 hz, u32 shift_constant) } /** - * read_clocksource: - Access the clocksource's current cycle value + * clocksource_read: - Access the clocksource's current cycle value * @cs: pointer to clocksource being read * * Uses the clocksource to return the current cycle_t value */ -static inline cycle_t read_clocksource(struct clocksource *cs) +static inline cycle_t clocksource_read(struct clocksource *cs) { return cs->read(); } @@ -145,7 +145,7 @@ static inline s64 cyc2ns(struct clocksource *cs, cycle_t cycles) } /** - * calculate_clocksource_interval - Calculates a clocksource interval struct + * clocksource_calculate_interval - Calculates a clocksource interval struct * * @c: Pointer to clocksource. * @length_nsec: Desired interval length in nanoseconds. @@ -155,7 +155,7 @@ static inline s64 cyc2ns(struct clocksource *cs, cycle_t cycles) * * Unless you're the timekeeping code, you should not be using this! */ -static inline void calculate_clocksource_interval(struct clocksource *c, +static inline void clocksource_calculate_interval(struct clocksource *c, unsigned long length_nsec) { u64 tmp; @@ -271,8 +271,8 @@ static inline s64 make_ntp_adj(struct clocksource *clock, /* used to install a new clocksource */ -int register_clocksource(struct clocksource*); -void reselect_clocksource(void); -struct clocksource* get_next_clocksource(void); +int clocksource_register(struct clocksource*); +void clocksource_reselect(void); +struct clocksource* clocksource_get_next(void); #endif /* _LINUX_CLOCKSOURCE_H */ diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index a9f387ea83b..74eca5939bd 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -65,10 +65,10 @@ static int __init clocksource_done_booting(void) late_initcall(clocksource_done_booting); /** - * get_next_clocksource - Returns the selected clocksource + * clocksource_get_next - Returns the selected clocksource * */ -struct clocksource *get_next_clocksource(void) +struct clocksource *clocksource_get_next(void) { unsigned long flags; @@ -142,12 +142,12 @@ static int is_registered_source(struct clocksource *c) } /** - * register_clocksource - Used to install new clocksources + * clocksource_register - Used to install new clocksources * @t: clocksource to be registered * * Returns -EBUSY if registration fails, zero otherwise. */ -int register_clocksource(struct clocksource *c) +int clocksource_register(struct clocksource *c) { int ret = 0; unsigned long flags; @@ -167,17 +167,16 @@ int register_clocksource(struct clocksource *c) spin_unlock_irqrestore(&clocksource_lock, flags); return ret; } - -EXPORT_SYMBOL(register_clocksource); +EXPORT_SYMBOL(clocksource_register); /** - * reselect_clocksource - Rescan list for next clocksource + * clocksource_reselect - Rescan list for next clocksource * * A quick helper function to be used if a clocksource changes its * rating. Forces the clocksource list to be re-scanned for the best * clocksource. */ -void reselect_clocksource(void) +void clocksource_reselect(void) { unsigned long flags; @@ -185,6 +184,7 @@ void reselect_clocksource(void) next_clocksource = select_clocksource(); spin_unlock_irqrestore(&clocksource_lock, flags); } +EXPORT_SYMBOL(clocksource_reselect); /** * sysfs_show_current_clocksources - sysfs interface for current clocksource diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c index 1fe8376e717..126bb30c4af 100644 --- a/kernel/time/jiffies.c +++ b/kernel/time/jiffies.c @@ -67,7 +67,7 @@ struct clocksource clocksource_jiffies = { static int __init init_jiffies_clocksource(void) { - return register_clocksource(&clocksource_jiffies); + return clocksource_register(&clocksource_jiffies); } module_init(init_jiffies_clocksource); diff --git a/kernel/timer.c b/kernel/timer.c index e5adb9e2e7a..890a56937cf 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -810,7 +810,7 @@ static inline s64 __get_nsec_offset(void) s64 ns_offset; /* read clocksource: */ - cycle_now = read_clocksource(clock); + cycle_now = clocksource_read(clock); /* calculate the delta since the last update_wall_time: */ cycle_delta = (cycle_now - last_clock_cycle) & clock->mask; @@ -845,7 +845,7 @@ static inline void __get_realtime_clock_ts(struct timespec *ts) } /** - * get_realtime_clock_ts - Returns the time of day in a timespec + * getnstimeofday - Returns the time of day in a timespec * @ts: pointer to the timespec to be set * * Returns the time of day in a timespec. @@ -920,9 +920,9 @@ static int change_clocksource(void) struct clocksource *new; cycle_t now; u64 nsec; - new = get_next_clocksource(); + new = clocksource_get_next(); if (clock != new) { - now = read_clocksource(new); + now = clocksource_read(new); nsec = __get_nsec_offset(); timespec_add_ns(&xtime, nsec); @@ -966,9 +966,9 @@ void __init timekeeping_init(void) unsigned long flags; write_seqlock_irqsave(&xtime_lock, flags); - clock = get_next_clocksource(); - calculate_clocksource_interval(clock, tick_nsec); - last_clock_cycle = read_clocksource(clock); + clock = clocksource_get_next(); + clocksource_calculate_interval(clock, tick_nsec); + last_clock_cycle = clocksource_read(clock); ntp_clear(); write_sequnlock_irqrestore(&xtime_lock, flags); } @@ -988,7 +988,7 @@ static int timekeeping_resume(struct sys_device *dev) write_seqlock_irqsave(&xtime_lock, flags); /* restart the last cycle value */ - last_clock_cycle = read_clocksource(clock); + last_clock_cycle = clocksource_read(clock); write_sequnlock_irqrestore(&xtime_lock, flags); return 0; } @@ -1028,7 +1028,7 @@ static void update_wall_time(void) snsecs_per_sec = (s64)NSEC_PER_SEC << clock->shift; remainder_snsecs += (s64)xtime.tv_nsec << clock->shift; - now = read_clocksource(clock); + now = clocksource_read(clock); offset = (now - last_clock_cycle)&clock->mask; /* normally this loop will run just once, however in the @@ -1069,7 +1069,7 @@ static void update_wall_time(void) if (change_clocksource()) { error = 0; remainder_snsecs = 0; - calculate_clocksource_interval(clock, tick_nsec); + clocksource_calculate_interval(clock, tick_nsec); } } -- cgit v1.2.3-70-g09d2 From 19923c190e0932bf0ac1e1d06a48f5c3678dd0de Mon Sep 17 00:00:00 2001 From: Roman Zippel Date: Mon, 26 Jun 2006 00:25:18 -0700 Subject: [PATCH] fix and optimize clock source update This fixes the clock source updates in update_wall_time() to correctly track the time coming in via current_tick_length(). Optimize the fast paths to be as short as possible to keep the overhead low. Signed-off-by: Roman Zippel Acked-by: John Stultz Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/powerpc/kernel/time.c | 4 +- include/linux/clocksource.h | 113 +++------------------------------ include/linux/timex.h | 4 +- kernel/timer.c | 151 ++++++++++++++++++++++++++++++++------------ 4 files changed, 123 insertions(+), 149 deletions(-) (limited to 'kernel/timer.c') diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 742f07a6316..7dd5dab789a 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -102,7 +102,7 @@ EXPORT_SYMBOL(tb_ticks_per_sec); /* for cputime_t conversions */ u64 tb_to_xs; unsigned tb_to_us; -#define TICKLEN_SCALE (SHIFT_SCALE - 10) +#define TICKLEN_SCALE TICK_LENGTH_SHIFT u64 last_tick_len; /* units are ns / 2^TICKLEN_SCALE */ u64 ticklen_to_xs; /* 0.64 fraction */ @@ -534,7 +534,7 @@ static __inline__ void timer_recalc_offset(u64 cur_tb) if (__USE_RTC()) return; - tlen = current_tick_length(SHIFT_SCALE - 10); + tlen = current_tick_length(); offset = cur_tb - do_gtod.varp->tb_orig_stamp; if (tlen == last_tick_len && offset < 0x80000000u) return; diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index 4bc94282c36..d852024ed09 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -46,8 +46,8 @@ typedef u64 cycle_t; * @shift: cycle to nanosecond divisor (power of two) * @update_callback: called when safe to alter clocksource values * @is_continuous: defines if clocksource is free-running. - * @interval_cycles: Used internally by timekeeping core, please ignore. - * @interval_snsecs: Used internally by timekeeping core, please ignore. + * @cycle_interval: Used internally by timekeeping core, please ignore. + * @xtime_interval: Used internally by timekeeping core, please ignore. */ struct clocksource { char *name; @@ -61,8 +61,9 @@ struct clocksource { int is_continuous; /* timekeeping specific data, ignore */ - cycle_t interval_cycles; - u64 interval_snsecs; + cycle_t cycle_last, cycle_interval; + u64 xtime_nsec, xtime_interval; + s64 error; }; /* simplify initialization of mask field */ @@ -168,107 +169,11 @@ static inline void clocksource_calculate_interval(struct clocksource *c, tmp += c->mult/2; do_div(tmp, c->mult); - c->interval_cycles = (cycle_t)tmp; - if(c->interval_cycles == 0) - c->interval_cycles = 1; + c->cycle_interval = (cycle_t)tmp; + if (c->cycle_interval == 0) + c->cycle_interval = 1; - c->interval_snsecs = (u64)c->interval_cycles * c->mult; -} - - -/** - * error_aproximation - calculates an error adjustment for a given error - * - * @error: Error value (unsigned) - * @unit: Adjustment unit - * - * For a given error value, this function takes the adjustment unit - * and uses binary approximation to return a power of two adjustment value. - * - * This function is only for use by the the make_ntp_adj() function - * and you must hold a write on the xtime_lock when calling. - */ -static inline int error_aproximation(u64 error, u64 unit) -{ - static int saved_adj = 0; - u64 adjusted_unit = unit << saved_adj; - - if (error > (adjusted_unit * 2)) { - /* large error, so increment the adjustment factor */ - saved_adj++; - } else if (error > adjusted_unit) { - /* just right, don't touch it */ - } else if (saved_adj) { - /* small error, so drop the adjustment factor */ - saved_adj--; - return 0; - } - - return saved_adj; -} - - -/** - * make_ntp_adj - Adjusts the specified clocksource for a given error - * - * @clock: Pointer to clock to be adjusted - * @cycles_delta: Current unacounted cycle delta - * @error: Pointer to current error value - * - * Returns clock shifted nanosecond adjustment to be applied against - * the accumulated time value (ie: xtime). - * - * If the error value is large enough, this function calulates the - * (power of two) adjustment value, and adjusts the clock's mult and - * interval_snsecs values accordingly. - * - * However, since there may be some unaccumulated cycles, to avoid - * time inconsistencies we must adjust the accumulation value - * accordingly. - * - * This is not very intuitive, so the following proof should help: - * The basic timeofday algorithm: base + cycle * mult - * Thus: - * new_base + cycle * new_mult = old_base + cycle * old_mult - * new_base = old_base + cycle * old_mult - cycle * new_mult - * new_base = old_base + cycle * (old_mult - new_mult) - * new_base - old_base = cycle * (old_mult - new_mult) - * base_delta = cycle * (old_mult - new_mult) - * base_delta = cycle * (mult_delta) - * - * Where mult_delta is the adjustment value made to mult - * - */ -static inline s64 make_ntp_adj(struct clocksource *clock, - cycles_t cycles_delta, s64* error) -{ - s64 ret = 0; - if (*error > ((s64)clock->interval_cycles+1)/2) { - /* calculate adjustment value */ - int adjustment = error_aproximation(*error, - clock->interval_cycles); - /* adjust clock */ - clock->mult += 1 << adjustment; - clock->interval_snsecs += clock->interval_cycles << adjustment; - - /* adjust the base and error for the adjustment */ - ret = -(cycles_delta << adjustment); - *error -= clock->interval_cycles << adjustment; - /* XXX adj error for cycle_delta offset? */ - } else if ((-(*error)) > ((s64)clock->interval_cycles+1)/2) { - /* calculate adjustment value */ - int adjustment = error_aproximation(-(*error), - clock->interval_cycles); - /* adjust clock */ - clock->mult -= 1 << adjustment; - clock->interval_snsecs -= clock->interval_cycles << adjustment; - - /* adjust the base and error for the adjustment */ - ret = cycles_delta << adjustment; - *error += clock->interval_cycles << adjustment; - /* XXX adj error for cycle_delta offset? */ - } - return ret; + c->xtime_interval = (u64)c->cycle_interval * c->mult; } diff --git a/include/linux/timex.h b/include/linux/timex.h index 1ba3071fcb8..19bb6538b49 100644 --- a/include/linux/timex.h +++ b/include/linux/timex.h @@ -303,8 +303,10 @@ time_interpolator_reset(void) #endif /* !CONFIG_TIME_INTERPOLATION */ +#define TICK_LENGTH_SHIFT 32 + /* Returns how long ticks are at present, in ns / 2^(SHIFT_SCALE-10). */ -extern u64 current_tick_length(long); +extern u64 current_tick_length(void); extern int do_adjtimex(struct timex *); diff --git a/kernel/timer.c b/kernel/timer.c index 890a56937cf..5bb6b7976ee 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -770,7 +770,7 @@ static void update_ntp_one_tick(void) * specified number of bits to the right of the binary point. * This function has no side-effects. */ -u64 current_tick_length(long shift) +u64 current_tick_length(void) { long delta_nsec; u64 ret; @@ -779,14 +779,8 @@ u64 current_tick_length(long shift) * ie: nanosecond value shifted by (SHIFT_SCALE - 10) */ delta_nsec = tick_nsec + adjtime_adjustment() * 1000; - ret = ((u64) delta_nsec << (SHIFT_SCALE - 10)) + time_adj; - - /* convert from (SHIFT_SCALE - 10) to specified shift scale: */ - shift = shift - (SHIFT_SCALE - 10); - if (shift < 0) - ret >>= -shift; - else - ret <<= shift; + ret = (u64)delta_nsec << TICK_LENGTH_SHIFT; + ret += (s64)time_adj << (TICK_LENGTH_SHIFT - (SHIFT_SCALE - 10)); return ret; } @@ -794,7 +788,6 @@ u64 current_tick_length(long shift) /* XXX - all of this timekeeping code should be later moved to time.c */ #include static struct clocksource *clock; /* pointer to current clocksource */ -static cycle_t last_clock_cycle; /* cycle value at last update_wall_time */ #ifdef CONFIG_GENERIC_TIME /** @@ -813,7 +806,7 @@ static inline s64 __get_nsec_offset(void) cycle_now = clocksource_read(clock); /* calculate the delta since the last update_wall_time: */ - cycle_delta = (cycle_now - last_clock_cycle) & clock->mask; + cycle_delta = (cycle_now - clock->cycle_last) & clock->mask; /* convert to nanoseconds: */ ns_offset = cyc2ns(clock, cycle_delta); @@ -927,7 +920,7 @@ static int change_clocksource(void) timespec_add_ns(&xtime, nsec); clock = new; - last_clock_cycle = now; + clock->cycle_last = now; printk(KERN_INFO "Time: %s clocksource has been installed.\n", clock->name); return 1; @@ -968,7 +961,7 @@ void __init timekeeping_init(void) write_seqlock_irqsave(&xtime_lock, flags); clock = clocksource_get_next(); clocksource_calculate_interval(clock, tick_nsec); - last_clock_cycle = clocksource_read(clock); + clock->cycle_last = clocksource_read(clock); ntp_clear(); write_sequnlock_irqrestore(&xtime_lock, flags); } @@ -988,7 +981,7 @@ static int timekeeping_resume(struct sys_device *dev) write_seqlock_irqsave(&xtime_lock, flags); /* restart the last cycle value */ - last_clock_cycle = clocksource_read(clock); + clock->cycle_last = clocksource_read(clock); write_sequnlock_irqrestore(&xtime_lock, flags); return 0; } @@ -1014,6 +1007,81 @@ static int __init timekeeping_init_device(void) device_initcall(timekeeping_init_device); +/* + * If the error is already larger, we look ahead another tick, + * to compensate for late or lost adjustments. + */ +static __always_inline int clocksource_bigadjust(int sign, s64 error, s64 *interval, s64 *offset) +{ + int adj; + + /* + * As soon as the machine is synchronized to the external time + * source this should be the common case. + */ + error >>= 2; + if (likely(sign > 0 ? error <= *interval : error >= *interval)) + return sign; + + /* + * An extra look ahead dampens the effect of the current error, + * which can grow quite large with continously late updates, as + * it would dominate the adjustment value and can lead to + * oscillation. + */ + error += current_tick_length() >> (TICK_LENGTH_SHIFT - clock->shift + 1); + error -= clock->xtime_interval >> 1; + + adj = 0; + while (1) { + error >>= 1; + if (sign > 0 ? error <= *interval : error >= *interval) + break; + adj++; + } + + /* + * Add the current adjustments to the error and take the offset + * into account, the latter can cause the error to be hardly + * reduced at the next tick. Check the error again if there's + * room for another adjustment, thus further reducing the error + * which otherwise had to be corrected at the next update. + */ + error = (error << 1) - *interval + *offset; + if (sign > 0 ? error > *interval : error < *interval) + adj++; + + *interval <<= adj; + *offset <<= adj; + return sign << adj; +} + +/* + * Adjust the multiplier to reduce the error value, + * this is optimized for the most common adjustments of -1,0,1, + * for other values we can do a bit more work. + */ +static void clocksource_adjust(struct clocksource *clock, s64 offset) +{ + s64 error, interval = clock->cycle_interval; + int adj; + + error = clock->error >> (TICK_LENGTH_SHIFT - clock->shift - 1); + if (error > interval) { + adj = clocksource_bigadjust(1, error, &interval, &offset); + } else if (error < -interval) { + interval = -interval; + offset = -offset; + adj = clocksource_bigadjust(-1, error, &interval, &offset); + } else + return; + + clock->mult += adj; + clock->xtime_interval += interval; + clock->xtime_nsec -= offset; + clock->error -= (interval - offset) << (TICK_LENGTH_SHIFT - clock->shift); +} + /* * update_wall_time - Uses the current clocksource to increment the wall time * @@ -1021,54 +1089,53 @@ device_initcall(timekeeping_init_device); */ static void update_wall_time(void) { - static s64 remainder_snsecs, error; - s64 snsecs_per_sec; - cycle_t now, offset; + cycle_t offset; - snsecs_per_sec = (s64)NSEC_PER_SEC << clock->shift; - remainder_snsecs += (s64)xtime.tv_nsec << clock->shift; + clock->xtime_nsec += (s64)xtime.tv_nsec << clock->shift; - now = clocksource_read(clock); - offset = (now - last_clock_cycle)&clock->mask; +#ifdef CONFIG_GENERIC_TIME + offset = (clocksource_read(clock) - clock->cycle_last) & clock->mask; +#else + offset = clock->cycle_interval; +#endif /* normally this loop will run just once, however in the * case of lost or late ticks, it will accumulate correctly. */ - while (offset > clock->interval_cycles) { - /* get the ntp interval in clock shifted nanoseconds */ - s64 ntp_snsecs = current_tick_length(clock->shift); - + while (offset >= clock->cycle_interval) { /* accumulate one interval */ - remainder_snsecs += clock->interval_snsecs; - last_clock_cycle += clock->interval_cycles; - offset -= clock->interval_cycles; + clock->xtime_nsec += clock->xtime_interval; + clock->cycle_last += clock->cycle_interval; + offset -= clock->cycle_interval; + + if (clock->xtime_nsec >= (u64)NSEC_PER_SEC << clock->shift) { + clock->xtime_nsec -= (u64)NSEC_PER_SEC << clock->shift; + xtime.tv_sec++; + second_overflow(); + } /* interpolator bits */ - time_interpolator_update(clock->interval_snsecs + time_interpolator_update(clock->xtime_interval >> clock->shift); /* increment the NTP state machine */ update_ntp_one_tick(); /* accumulate error between NTP and clock interval */ - error += (ntp_snsecs - (s64)clock->interval_snsecs); + clock->error += current_tick_length(); + clock->error -= clock->xtime_interval << (TICK_LENGTH_SHIFT - clock->shift); + } - /* correct the clock when NTP error is too big */ - remainder_snsecs += make_ntp_adj(clock, offset, &error); + /* correct the clock when NTP error is too big */ + clocksource_adjust(clock, offset); - if (remainder_snsecs >= snsecs_per_sec) { - remainder_snsecs -= snsecs_per_sec; - xtime.tv_sec++; - second_overflow(); - } - } /* store full nanoseconds into xtime */ - xtime.tv_nsec = remainder_snsecs >> clock->shift; - remainder_snsecs -= (s64)xtime.tv_nsec << clock->shift; + xtime.tv_nsec = clock->xtime_nsec >> clock->shift; + clock->xtime_nsec -= (s64)xtime.tv_nsec << clock->shift; /* check to see if there is a new clocksource to use */ if (change_clocksource()) { - error = 0; - remainder_snsecs = 0; + clock->error = 0; + clock->xtime_nsec = 0; clocksource_calculate_interval(clock, tick_nsec); } } -- cgit v1.2.3-70-g09d2 From 9c7b216d23e820e0e148d5be01bbb5bd2d8378fe Mon Sep 17 00:00:00 2001 From: Chandra Seetharaman Date: Tue, 27 Jun 2006 02:54:07 -0700 Subject: [PATCH] cpu hotplug: revert init patch submitted for 2.6.17 In 2.6.17, there was a problem with cpu_notifiers and XFS. I provided a band-aid solution to solve that problem. In the process, i undid all the changes you both were making to ensure that these notifiers were available only at init time (unless CONFIG_HOTPLUG_CPU is defined). We deferred the real fix to 2.6.18. Here is a set of patches that fixes the XFS problem cleanly and makes the cpu notifiers available only at init time (unless CONFIG_HOTPLUG_CPU is defined). If CONFIG_HOTPLUG_CPU is defined then cpu notifiers are available at run time. This patch reverts the notifier_call changes made in 2.6.17 Signed-off-by: Chandra Seetharaman Cc: Ashok Raj Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/cpu/intel_cacheinfo.c | 2 +- arch/ia64/kernel/palinfo.c | 2 +- arch/ia64/kernel/salinfo.c | 2 +- arch/ia64/kernel/topology.c | 2 +- arch/powerpc/kernel/sysfs.c | 2 +- arch/x86_64/kernel/mce.c | 2 +- drivers/base/topology.c | 2 +- drivers/cpufreq/cpufreq.c | 2 +- kernel/hrtimer.c | 2 +- kernel/profile.c | 2 +- kernel/rcupdate.c | 2 +- kernel/softirq.c | 2 +- kernel/softlockup.c | 2 +- kernel/timer.c | 2 +- kernel/workqueue.c | 2 +- mm/page_alloc.c | 2 +- mm/slab.c | 2 +- mm/vmscan.c | 2 +- 18 files changed, 18 insertions(+), 18 deletions(-) (limited to 'kernel/timer.c') diff --git a/arch/i386/kernel/cpu/intel_cacheinfo.c b/arch/i386/kernel/cpu/intel_cacheinfo.c index 8a92642ea59..1d4ab104798 100644 --- a/arch/i386/kernel/cpu/intel_cacheinfo.c +++ b/arch/i386/kernel/cpu/intel_cacheinfo.c @@ -729,7 +729,7 @@ static void __cpuexit cache_remove_dev(struct sys_device * sys_dev) return; } -static int cacheinfo_cpu_callback(struct notifier_block *nfb, +static int __cpuinit cacheinfo_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { unsigned int cpu = (unsigned long)hcpu; diff --git a/arch/ia64/kernel/palinfo.c b/arch/ia64/kernel/palinfo.c index 859fb37ff49..6386f63c413 100644 --- a/arch/ia64/kernel/palinfo.c +++ b/arch/ia64/kernel/palinfo.c @@ -959,7 +959,7 @@ remove_palinfo_proc_entries(unsigned int hcpu) } } -static int palinfo_cpu_callback(struct notifier_block *nfb, +static int __devinit palinfo_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { diff --git a/arch/ia64/kernel/salinfo.c b/arch/ia64/kernel/salinfo.c index 663a186ad19..9d5a823479a 100644 --- a/arch/ia64/kernel/salinfo.c +++ b/arch/ia64/kernel/salinfo.c @@ -572,7 +572,7 @@ static struct file_operations salinfo_data_fops = { }; #ifdef CONFIG_HOTPLUG_CPU -static int +static int __devinit salinfo_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu) { unsigned int i, cpu = (unsigned long)hcpu; diff --git a/arch/ia64/kernel/topology.c b/arch/ia64/kernel/topology.c index 5737c9a061e..f07c382b57b 100644 --- a/arch/ia64/kernel/topology.c +++ b/arch/ia64/kernel/topology.c @@ -404,7 +404,7 @@ static int __cpuinit cache_remove_dev(struct sys_device * sys_dev) * When a cpu is hot-plugged, do a check and initiate * cache kobject if necessary */ -static int cache_cpu_callback(struct notifier_block *nfb, +static int __cpuinit cache_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { unsigned int cpu = (unsigned long)hcpu; diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index 412ad00e222..0231869613c 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -279,7 +279,7 @@ static void unregister_cpu_online(unsigned int cpu) } #endif /* CONFIG_HOTPLUG_CPU */ -static int sysfs_cpu_notify(struct notifier_block *self, +static int __devinit sysfs_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) { unsigned int cpu = (unsigned int)(long)hcpu; diff --git a/arch/x86_64/kernel/mce.c b/arch/x86_64/kernel/mce.c index acd5816b1a6..efe8500a5b9 100644 --- a/arch/x86_64/kernel/mce.c +++ b/arch/x86_64/kernel/mce.c @@ -629,7 +629,7 @@ static __cpuinit void mce_remove_device(unsigned int cpu) #endif /* Get notified when a cpu comes on/off. Be hotplug friendly. */ -static int +static __cpuinit int mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { unsigned int cpu = (unsigned long)hcpu; diff --git a/drivers/base/topology.c b/drivers/base/topology.c index 8c52421cbc5..915810f6237 100644 --- a/drivers/base/topology.c +++ b/drivers/base/topology.c @@ -107,7 +107,7 @@ static int __cpuinit topology_remove_dev(struct sys_device * sys_dev) return 0; } -static int topology_cpu_callback(struct notifier_block *nfb, +static int __cpuinit topology_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { unsigned int cpu = (unsigned long)hcpu; diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 44d1eca83a7..486ef666470 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1497,7 +1497,7 @@ int cpufreq_update_policy(unsigned int cpu) } EXPORT_SYMBOL(cpufreq_update_policy); -static int cpufreq_cpu_callback(struct notifier_block *nfb, +static int __cpuinit cpufreq_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { unsigned int cpu = (unsigned long)hcpu; diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 55601b3ce60..f9f53191661 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -833,7 +833,7 @@ static void migrate_hrtimers(int cpu) } #endif /* CONFIG_HOTPLUG_CPU */ -static int hrtimer_cpu_notify(struct notifier_block *self, +static int __devinit hrtimer_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) { long cpu = (long)hcpu; diff --git a/kernel/profile.c b/kernel/profile.c index 68afe121e50..5a730fdb1a2 100644 --- a/kernel/profile.c +++ b/kernel/profile.c @@ -299,7 +299,7 @@ out: } #ifdef CONFIG_HOTPLUG_CPU -static int profile_cpu_callback(struct notifier_block *info, +static int __devinit profile_cpu_callback(struct notifier_block *info, unsigned long action, void *__cpu) { int node, cpu = (unsigned long)__cpu; diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index c0e1cb95dd4..a8d80b7048b 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c @@ -548,7 +548,7 @@ static void __devinit rcu_online_cpu(int cpu) tasklet_init(&per_cpu(rcu_tasklet, cpu), rcu_process_callbacks, 0UL); } -static int rcu_cpu_notify(struct notifier_block *self, +static int __devinit rcu_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) { long cpu = (long)hcpu; diff --git a/kernel/softirq.c b/kernel/softirq.c index 9e2f1c6e73d..db65a311f14 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -446,7 +446,7 @@ static void takeover_tasklets(unsigned int cpu) } #endif /* CONFIG_HOTPLUG_CPU */ -static int cpu_callback(struct notifier_block *nfb, +static int __devinit cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { diff --git a/kernel/softlockup.c b/kernel/softlockup.c index b5c3b94e01c..29da0a847ba 100644 --- a/kernel/softlockup.c +++ b/kernel/softlockup.c @@ -104,7 +104,7 @@ static int watchdog(void * __bind_cpu) /* * Create/destroy watchdog threads as CPUs come and go: */ -static int +static int __devinit cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { int hotcpu = (unsigned long)hcpu; diff --git a/kernel/timer.c b/kernel/timer.c index 5bb6b7976ee..878194ec8bd 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -1652,7 +1652,7 @@ static void __devinit migrate_timers(int cpu) } #endif /* CONFIG_HOTPLUG_CPU */ -static int timer_cpu_notify(struct notifier_block *self, +static int __devinit timer_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) { long cpu = (long)hcpu; diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 565cf7a1feb..59f0b42bd89 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -559,7 +559,7 @@ static void take_over_work(struct workqueue_struct *wq, unsigned int cpu) } /* We're holding the cpucontrol mutex here */ -static int workqueue_cpu_callback(struct notifier_block *nfb, +static int __devinit workqueue_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 9f86191bb63..e9fb2d4064c 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2009,7 +2009,7 @@ static inline void free_zone_pagesets(int cpu) } } -static int pageset_cpuup_callback(struct notifier_block *nfb, +static int __cpuinit pageset_cpuup_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { diff --git a/mm/slab.c b/mm/slab.c index 47982c2d9f3..631c0feb964 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -1073,7 +1073,7 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp) #endif -static int cpuup_callback(struct notifier_block *nfb, +static int __devinit cpuup_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { long cpu = (long)hcpu; diff --git a/mm/vmscan.c b/mm/vmscan.c index f03da33d914..eeacb0d695c 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1450,7 +1450,7 @@ out: not required for correctness. So if the last cpu in a node goes away, we get changed to run anywhere: as the first one comes back, restore their cpu bindings. */ -static int cpu_callback(struct notifier_block *nfb, +static int __devinit cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { pg_data_t *pgdat; -- cgit v1.2.3-70-g09d2 From 054cc8a2d808822dadf488a61729e3e550f114c4 Mon Sep 17 00:00:00 2001 From: Chandra Seetharaman Date: Tue, 27 Jun 2006 02:54:07 -0700 Subject: [PATCH] cpu hotplug: revert initdata patch submitted for 2.6.17 This patch reverts notifier_block changes made in 2.6.17 Signed-off-by: Chandra Seetharaman Cc: Ashok Raj Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/powerpc/kernel/sysfs.c | 2 +- arch/s390/appldata/appldata_base.c | 2 +- block/ll_rw_blk.c | 2 +- kernel/hrtimer.c | 2 +- kernel/rcupdate.c | 2 +- kernel/sched.c | 2 +- kernel/softirq.c | 2 +- kernel/softlockup.c | 2 +- kernel/timer.c | 2 +- 9 files changed, 9 insertions(+), 9 deletions(-) (limited to 'kernel/timer.c') diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index 0231869613c..4662b580efa 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -297,7 +297,7 @@ static int __devinit sysfs_cpu_notify(struct notifier_block *self, return NOTIFY_OK; } -static struct notifier_block sysfs_cpu_nb = { +static struct notifier_block __devinitdata sysfs_cpu_nb = { .notifier_call = sysfs_cpu_notify, }; diff --git a/arch/s390/appldata/appldata_base.c b/arch/s390/appldata/appldata_base.c index 9a22434a580..54d35c13090 100644 --- a/arch/s390/appldata/appldata_base.c +++ b/arch/s390/appldata/appldata_base.c @@ -652,7 +652,7 @@ appldata_cpu_notify(struct notifier_block *self, return NOTIFY_OK; } -static struct notifier_block appldata_nb = { +static struct notifier_block __devinitdata appldata_nb = { .notifier_call = appldata_cpu_notify, }; diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c index c04422a502d..9610b07cc19 100644 --- a/block/ll_rw_blk.c +++ b/block/ll_rw_blk.c @@ -3403,7 +3403,7 @@ static int blk_cpu_notify(struct notifier_block *self, unsigned long action, } -static struct notifier_block blk_cpu_notifier = { +static struct notifier_block __devinitdata blk_cpu_notifier = { .notifier_call = blk_cpu_notify, }; diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index f9f53191661..8d3dc29ef41 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -857,7 +857,7 @@ static int __devinit hrtimer_cpu_notify(struct notifier_block *self, return NOTIFY_OK; } -static struct notifier_block hrtimers_nb = { +static struct notifier_block __devinitdata hrtimers_nb = { .notifier_call = hrtimer_cpu_notify, }; diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index a8d80b7048b..f464f5ae3f1 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c @@ -565,7 +565,7 @@ static int __devinit rcu_cpu_notify(struct notifier_block *self, return NOTIFY_OK; } -static struct notifier_block rcu_nb = { +static struct notifier_block __devinitdata rcu_nb = { .notifier_call = rcu_cpu_notify, }; diff --git a/kernel/sched.c b/kernel/sched.c index a856040c200..b8deddb7833 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -4805,7 +4805,7 @@ static int migration_call(struct notifier_block *nfb, unsigned long action, /* Register at highest priority so that task migration (migrate_all_tasks) * happens before everything else. */ -static struct notifier_block migration_notifier = { +static struct notifier_block __devinitdata migration_notifier = { .notifier_call = migration_call, .priority = 10 }; diff --git a/kernel/softirq.c b/kernel/softirq.c index db65a311f14..8f03e3b89b5 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -486,7 +486,7 @@ static int __devinit cpu_callback(struct notifier_block *nfb, return NOTIFY_OK; } -static struct notifier_block cpu_nfb = { +static struct notifier_block __devinitdata cpu_nfb = { .notifier_call = cpu_callback }; diff --git a/kernel/softlockup.c b/kernel/softlockup.c index 29da0a847ba..6b76caa2298 100644 --- a/kernel/softlockup.c +++ b/kernel/softlockup.c @@ -142,7 +142,7 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) return NOTIFY_OK; } -static struct notifier_block cpu_nfb = { +static struct notifier_block __devinitdata cpu_nfb = { .notifier_call = cpu_callback }; diff --git a/kernel/timer.c b/kernel/timer.c index 878194ec8bd..5a896025306 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -1672,7 +1672,7 @@ static int __devinit timer_cpu_notify(struct notifier_block *self, return NOTIFY_OK; } -static struct notifier_block timers_nb = { +static struct notifier_block __devinitdata timers_nb = { .notifier_call = timer_cpu_notify, }; -- cgit v1.2.3-70-g09d2