diff options
Diffstat (limited to 'kernel/time')
-rw-r--r-- | kernel/time/clockevents.c | 13 | ||||
-rw-r--r-- | kernel/time/clocksource.c | 97 | ||||
-rw-r--r-- | kernel/time/tick-oneshot.c | 4 | ||||
-rw-r--r-- | kernel/time/tick-sched.c | 141 | ||||
-rw-r--r-- | kernel/time/timekeeping.c | 119 | ||||
-rw-r--r-- | kernel/time/timer_list.c | 10 |
6 files changed, 273 insertions, 111 deletions
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index 620b58abdc3..20a8920029e 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -20,6 +20,8 @@ #include <linux/sysdev.h> #include <linux/tick.h> +#include "tick-internal.h" + /* The registered clock event devices */ static LIST_HEAD(clockevent_devices); static LIST_HEAD(clockevents_released); @@ -37,10 +39,9 @@ static DEFINE_SPINLOCK(clockevents_lock); * * Math helper, returns latch value converted to nanoseconds (bound checked) */ -unsigned long clockevent_delta2ns(unsigned long latch, - struct clock_event_device *evt) +u64 clockevent_delta2ns(unsigned long latch, struct clock_event_device *evt) { - u64 clc = ((u64) latch << evt->shift); + u64 clc = (u64) latch << evt->shift; if (unlikely(!evt->mult)) { evt->mult = 1; @@ -50,10 +51,10 @@ unsigned long clockevent_delta2ns(unsigned long latch, do_div(clc, evt->mult); if (clc < 1000) clc = 1000; - if (clc > LONG_MAX) - clc = LONG_MAX; + if (clc > KTIME_MAX) + clc = KTIME_MAX; - return (unsigned long) clc; + return clc; } EXPORT_SYMBOL_GPL(clockevent_delta2ns); diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 4a310906b3e..d422c7b2236 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -107,6 +107,59 @@ u64 timecounter_cyc2time(struct timecounter *tc, } EXPORT_SYMBOL_GPL(timecounter_cyc2time); +/** + * clocks_calc_mult_shift - calculate mult/shift factors for scaled math of clocks + * @mult: pointer to mult variable + * @shift: pointer to shift variable + * @from: frequency to convert from + * @to: frequency to convert to + * @minsec: guaranteed runtime conversion range in seconds + * + * The function evaluates the shift/mult pair for the scaled math + * operations of clocksources and clockevents. + * + * @to and @from are frequency values in HZ. For clock sources @to is + * NSEC_PER_SEC == 1GHz and @from is the counter frequency. For clock + * event @to is the counter frequency and @from is NSEC_PER_SEC. + * + * The @minsec conversion range argument controls the time frame in + * seconds which must be covered by the runtime conversion with the + * calculated mult and shift factors. This guarantees that no 64bit + * overflow happens when the input value of the conversion is + * multiplied with the calculated mult factor. Larger ranges may + * reduce the conversion accuracy by chosing smaller mult and shift + * factors. + */ +void +clocks_calc_mult_shift(u32 *mult, u32 *shift, u32 from, u32 to, u32 minsec) +{ + u64 tmp; + u32 sft, sftacc= 32; + + /* + * Calculate the shift factor which is limiting the conversion + * range: + */ + tmp = ((u64)minsec * from) >> 32; + while (tmp) { + tmp >>=1; + sftacc--; + } + + /* + * Find the conversion shift/mult pair which has the best + * accuracy and fits the maxsec conversion range: + */ + for (sft = 32; sft > 0; sft--) { + tmp = (u64) to << sft; + do_div(tmp, from); + if ((tmp >> sftacc) == 0) + break; + } + *mult = tmp; + *shift = sft; +} + /*[Clocksource internal variables]--------- * curr_clocksource: * currently selected clocksource. @@ -413,6 +466,47 @@ void clocksource_touch_watchdog(void) clocksource_resume_watchdog(); } +/** + * clocksource_max_deferment - Returns max time the clocksource can be deferred + * @cs: Pointer to clocksource + * + */ +static u64 clocksource_max_deferment(struct clocksource *cs) +{ + u64 max_nsecs, max_cycles; + + /* + * Calculate the maximum number of cycles that we can pass to the + * cyc2ns function without overflowing a 64-bit signed result. The + * maximum number of cycles is equal to ULLONG_MAX/cs->mult which + * is equivalent to the below. + * max_cycles < (2^63)/cs->mult + * max_cycles < 2^(log2((2^63)/cs->mult)) + * max_cycles < 2^(log2(2^63) - log2(cs->mult)) + * max_cycles < 2^(63 - log2(cs->mult)) + * max_cycles < 1 << (63 - log2(cs->mult)) + * Please note that we add 1 to the result of the log2 to account for + * any rounding errors, ensure the above inequality is satisfied and + * no overflow will occur. + */ + max_cycles = 1ULL << (63 - (ilog2(cs->mult) + 1)); + + /* + * The actual maximum number of cycles we can defer the clocksource is + * determined by the minimum of max_cycles and cs->mask. + */ + max_cycles = min_t(u64, max_cycles, (u64) cs->mask); + max_nsecs = clocksource_cyc2ns(max_cycles, cs->mult, cs->shift); + + /* + * To ensure that the clocksource does not wrap whilst we are idle, + * limit the time the clocksource can be deferred by 12.5%. Please + * note a margin of 12.5% is used because this can be computed with + * a shift, versus say 10% which would require division. + */ + return max_nsecs - (max_nsecs >> 5); +} + #ifdef CONFIG_GENERIC_TIME /** @@ -511,6 +605,9 @@ static void clocksource_enqueue(struct clocksource *cs) */ int clocksource_register(struct clocksource *cs) { + /* calculate max idle time permitted for this clocksource */ + cs->max_idle_ns = clocksource_max_deferment(cs); + mutex_lock(&clocksource_mutex); clocksource_enqueue(cs); clocksource_select(); diff --git a/kernel/time/tick-oneshot.c b/kernel/time/tick-oneshot.c index a96c0e2b89c..0a8a213016f 100644 --- a/kernel/time/tick-oneshot.c +++ b/kernel/time/tick-oneshot.c @@ -50,9 +50,9 @@ int tick_dev_program_event(struct clock_event_device *dev, ktime_t expires, dev->min_delta_ns += dev->min_delta_ns >> 1; printk(KERN_WARNING - "CE: %s increasing min_delta_ns to %lu nsec\n", + "CE: %s increasing min_delta_ns to %llu nsec\n", dev->name ? dev->name : "?", - dev->min_delta_ns << 1); + (unsigned long long) dev->min_delta_ns << 1); i = 0; } diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 89aed5933ed..f992762d7f5 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -134,18 +134,13 @@ __setup("nohz=", setup_tick_nohz); * value. We do this unconditionally on any cpu, as we don't know whether the * cpu, which has the update task assigned is in a long sleep. */ -static void tick_nohz_update_jiffies(void) +static void tick_nohz_update_jiffies(ktime_t now) { int cpu = smp_processor_id(); struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); unsigned long flags; - ktime_t now; - - if (!ts->tick_stopped) - return; cpumask_clear_cpu(cpu, nohz_cpu_mask); - now = ktime_get(); ts->idle_waketime = now; local_irq_save(flags); @@ -155,20 +150,17 @@ static void tick_nohz_update_jiffies(void) touch_softlockup_watchdog(); } -static void tick_nohz_stop_idle(int cpu) +static void tick_nohz_stop_idle(int cpu, ktime_t now) { struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); + ktime_t delta; - if (ts->idle_active) { - ktime_t now, delta; - now = ktime_get(); - delta = ktime_sub(now, ts->idle_entrytime); - ts->idle_lastupdate = now; - ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta); - ts->idle_active = 0; + delta = ktime_sub(now, ts->idle_entrytime); + ts->idle_lastupdate = now; + ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta); + ts->idle_active = 0; - sched_clock_idle_wakeup_event(0); - } + sched_clock_idle_wakeup_event(0); } static ktime_t tick_nohz_start_idle(struct tick_sched *ts) @@ -216,6 +208,7 @@ void tick_nohz_stop_sched_tick(int inidle) struct tick_sched *ts; ktime_t last_update, expires, now; struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev; + u64 time_delta; int cpu; local_irq_save(flags); @@ -263,7 +256,7 @@ void tick_nohz_stop_sched_tick(int inidle) if (ratelimit < 10) { printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n", - local_softirq_pending()); + (unsigned int) local_softirq_pending()); ratelimit++; } goto end; @@ -275,14 +268,18 @@ void tick_nohz_stop_sched_tick(int inidle) seq = read_seqbegin(&xtime_lock); last_update = last_jiffies_update; last_jiffies = jiffies; + time_delta = timekeeping_max_deferment(); } while (read_seqretry(&xtime_lock, seq)); - /* Get the next timer wheel timer */ - next_jiffies = get_next_timer_interrupt(last_jiffies); - delta_jiffies = next_jiffies - last_jiffies; - - if (rcu_needs_cpu(cpu) || printk_needs_cpu(cpu)) + if (rcu_needs_cpu(cpu) || printk_needs_cpu(cpu) || + arch_needs_cpu(cpu)) { + next_jiffies = last_jiffies + 1; delta_jiffies = 1; + } else { + /* Get the next timer wheel timer */ + next_jiffies = get_next_timer_interrupt(last_jiffies); + delta_jiffies = next_jiffies - last_jiffies; + } /* * Do not stop the tick, if we are only one off * or if the cpu is required for rcu @@ -294,22 +291,51 @@ void tick_nohz_stop_sched_tick(int inidle) if ((long)delta_jiffies >= 1) { /* - * calculate the expiry time for the next timer wheel - * timer - */ - expires = ktime_add_ns(last_update, tick_period.tv64 * - delta_jiffies); - - /* * If this cpu is the one which updates jiffies, then * give up the assignment and let it be taken by the * cpu which runs the tick timer next, which might be * this cpu as well. If we don't drop this here the * jiffies might be stale and do_timer() never - * invoked. + * invoked. Keep track of the fact that it was the one + * which had the do_timer() duty last. If this cpu is + * the one which had the do_timer() duty last, we + * limit the sleep time to the timekeeping + * max_deferement value which we retrieved + * above. Otherwise we can sleep as long as we want. */ - if (cpu == tick_do_timer_cpu) + if (cpu == tick_do_timer_cpu) { tick_do_timer_cpu = TICK_DO_TIMER_NONE; + ts->do_timer_last = 1; + } else if (tick_do_timer_cpu != TICK_DO_TIMER_NONE) { + time_delta = KTIME_MAX; + ts->do_timer_last = 0; + } else if (!ts->do_timer_last) { + time_delta = KTIME_MAX; + } + + /* + * calculate the expiry time for the next timer wheel + * timer. delta_jiffies >= NEXT_TIMER_MAX_DELTA signals + * that there is no timer pending or at least extremely + * far into the future (12 days for HZ=1000). In this + * case we set the expiry to the end of time. + */ + if (likely(delta_jiffies < NEXT_TIMER_MAX_DELTA)) { + /* + * Calculate the time delta for the next timer event. + * If the time delta exceeds the maximum time delta + * permitted by the current clocksource then adjust + * the time delta accordingly to ensure the + * clocksource does not wrap. + */ + time_delta = min_t(u64, time_delta, + tick_period.tv64 * delta_jiffies); + } + + if (time_delta < KTIME_MAX) + expires = ktime_add_ns(last_update, time_delta); + else + expires.tv64 = KTIME_MAX; if (delta_jiffies > 1) cpumask_set_cpu(cpu, nohz_cpu_mask); @@ -342,22 +368,19 @@ void tick_nohz_stop_sched_tick(int inidle) ts->idle_sleeps++; + /* Mark expires */ + ts->idle_expires = expires; + /* - * delta_jiffies >= NEXT_TIMER_MAX_DELTA signals that - * there is no timer pending or at least extremly far - * into the future (12 days for HZ=1000). In this case - * we simply stop the tick timer: + * If the expiration time == KTIME_MAX, then + * in this case we simply stop the tick timer. */ - if (unlikely(delta_jiffies >= NEXT_TIMER_MAX_DELTA)) { - ts->idle_expires.tv64 = KTIME_MAX; + if (unlikely(expires.tv64 == KTIME_MAX)) { if (ts->nohz_mode == NOHZ_MODE_HIGHRES) hrtimer_cancel(&ts->sched_timer); goto out; } - /* Mark expiries */ - ts->idle_expires = expires; - if (ts->nohz_mode == NOHZ_MODE_HIGHRES) { hrtimer_start(&ts->sched_timer, expires, HRTIMER_MODE_ABS_PINNED); @@ -436,7 +459,11 @@ void tick_nohz_restart_sched_tick(void) ktime_t now; local_irq_disable(); - tick_nohz_stop_idle(cpu); + if (ts->idle_active || (ts->inidle && ts->tick_stopped)) + now = ktime_get(); + + if (ts->idle_active) + tick_nohz_stop_idle(cpu, now); if (!ts->inidle || !ts->tick_stopped) { ts->inidle = 0; @@ -450,7 +477,6 @@ void tick_nohz_restart_sched_tick(void) /* Update jiffies first */ select_nohz_load_balancer(0); - now = ktime_get(); tick_do_update_jiffies64(now); cpumask_clear_cpu(cpu, nohz_cpu_mask); @@ -584,22 +610,18 @@ static void tick_nohz_switch_to_nohz(void) * timer and do not touch the other magic bits which need to be done * when idle is left. */ -static void tick_nohz_kick_tick(int cpu) +static void tick_nohz_kick_tick(int cpu, ktime_t now) { #if 0 /* Switch back to 2.6.27 behaviour */ struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); - ktime_t delta, now; - - if (!ts->tick_stopped) - return; + ktime_t delta; /* * Do not touch the tick device, when the next expiry is either * already reached or less/equal than the tick period. */ - now = ktime_get(); delta = ktime_sub(hrtimer_get_expires(&ts->sched_timer), now); if (delta.tv64 <= tick_period.tv64) return; @@ -608,9 +630,26 @@ static void tick_nohz_kick_tick(int cpu) #endif } +static inline void tick_check_nohz(int cpu) +{ + struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); + ktime_t now; + + if (!ts->idle_active && !ts->tick_stopped) + return; + now = ktime_get(); + if (ts->idle_active) + tick_nohz_stop_idle(cpu, now); + if (ts->tick_stopped) { + tick_nohz_update_jiffies(now); + tick_nohz_kick_tick(cpu, now); + } +} + #else static inline void tick_nohz_switch_to_nohz(void) { } +static inline void tick_check_nohz(int cpu) { } #endif /* NO_HZ */ @@ -620,11 +659,7 @@ static inline void tick_nohz_switch_to_nohz(void) { } void tick_check_idle(int cpu) { tick_check_oneshot_broadcast(cpu); -#ifdef CONFIG_NO_HZ - tick_nohz_stop_idle(cpu); - tick_nohz_update_jiffies(); - tick_nohz_kick_tick(cpu); -#endif + tick_check_nohz(cpu); } /* diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index c3a4e2907ea..d1aebd73b19 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -165,13 +165,6 @@ struct timespec raw_time; /* flag for if timekeeping is suspended */ int __read_mostly timekeeping_suspended; -static struct timespec xtime_cache __attribute__ ((aligned (16))); -void update_xtime_cache(u64 nsec) -{ - xtime_cache = xtime; - timespec_add_ns(&xtime_cache, nsec); -} - /* must hold xtime_lock */ void timekeeping_leap_insert(int leapsecond) { @@ -332,8 +325,6 @@ int do_settimeofday(struct timespec *tv) xtime = *tv; - update_xtime_cache(0); - timekeeper.ntp_error = 0; ntp_clear(); @@ -488,6 +479,17 @@ int timekeeping_valid_for_hres(void) } /** + * timekeeping_max_deferment - Returns max time the clocksource can be deferred + * + * Caller must observe xtime_lock via read_seqbegin/read_seqretry to + * ensure that the clocksource does not change! + */ +u64 timekeeping_max_deferment(void) +{ + return timekeeper.clock->max_idle_ns; +} + +/** * read_persistent_clock - Return time from the persistent clock. * * Weak dummy function for arches that do not yet support it. @@ -548,7 +550,6 @@ void __init timekeeping_init(void) } set_normalized_timespec(&wall_to_monotonic, -boot.tv_sec, -boot.tv_nsec); - update_xtime_cache(0); total_sleep_time.tv_sec = 0; total_sleep_time.tv_nsec = 0; write_sequnlock_irqrestore(&xtime_lock, flags); @@ -582,7 +583,6 @@ static int timekeeping_resume(struct sys_device *dev) wall_to_monotonic = timespec_sub(wall_to_monotonic, ts); total_sleep_time = timespec_add_safe(total_sleep_time, ts); } - update_xtime_cache(0); /* re-base the last cycle value */ timekeeper.clock->cycle_last = timekeeper.clock->read(timekeeper.clock); timekeeper.ntp_error = 0; @@ -723,6 +723,49 @@ static void timekeeping_adjust(s64 offset) } /** + * logarithmic_accumulation - shifted accumulation of cycles + * + * This functions accumulates a shifted interval of cycles into + * into a shifted interval nanoseconds. Allows for O(log) accumulation + * loop. + * + * Returns the unconsumed cycles. + */ +static cycle_t logarithmic_accumulation(cycle_t offset, int shift) +{ + u64 nsecps = (u64)NSEC_PER_SEC << timekeeper.shift; + + /* If the offset is smaller then a shifted interval, do nothing */ + if (offset < timekeeper.cycle_interval<<shift) + return offset; + + /* Accumulate one shifted interval */ + offset -= timekeeper.cycle_interval << shift; + timekeeper.clock->cycle_last += timekeeper.cycle_interval << shift; + + timekeeper.xtime_nsec += timekeeper.xtime_interval << shift; + while (timekeeper.xtime_nsec >= nsecps) { + timekeeper.xtime_nsec -= nsecps; + xtime.tv_sec++; + second_overflow(); + } + + /* Accumulate into raw time */ + raw_time.tv_nsec += timekeeper.raw_interval << shift;; + while (raw_time.tv_nsec >= NSEC_PER_SEC) { + raw_time.tv_nsec -= NSEC_PER_SEC; + raw_time.tv_sec++; + } + + /* Accumulate error between NTP and clock interval */ + timekeeper.ntp_error += tick_length << shift; + timekeeper.ntp_error -= timekeeper.xtime_interval << + (timekeeper.ntp_error_shift + shift); + + return offset; +} + +/** * update_wall_time - Uses the current clocksource to increment the wall time * * Called from the timer interrupt, must hold a write on xtime_lock. @@ -731,7 +774,7 @@ void update_wall_time(void) { struct clocksource *clock; cycle_t offset; - u64 nsecs; + int shift = 0, maxshift; /* Make sure we're fully resumed: */ if (unlikely(timekeeping_suspended)) @@ -745,33 +788,22 @@ void update_wall_time(void) #endif timekeeper.xtime_nsec = (s64)xtime.tv_nsec << timekeeper.shift; - /* normally this loop will run just once, however in the - * case of lost or late ticks, it will accumulate correctly. + /* + * With NO_HZ we may have to accumulate many cycle_intervals + * (think "ticks") worth of time at once. To do this efficiently, + * we calculate the largest doubling multiple of cycle_intervals + * that is smaller then the offset. We then accumulate that + * chunk in one go, and then try to consume the next smaller + * doubled multiple. */ + shift = ilog2(offset) - ilog2(timekeeper.cycle_interval); + shift = max(0, shift); + /* Bound shift to one less then what overflows tick_length */ + maxshift = (8*sizeof(tick_length) - (ilog2(tick_length)+1)) - 1; + shift = min(shift, maxshift); while (offset >= timekeeper.cycle_interval) { - u64 nsecps = (u64)NSEC_PER_SEC << timekeeper.shift; - - /* accumulate one interval */ - offset -= timekeeper.cycle_interval; - clock->cycle_last += timekeeper.cycle_interval; - - timekeeper.xtime_nsec += timekeeper.xtime_interval; - if (timekeeper.xtime_nsec >= nsecps) { - timekeeper.xtime_nsec -= nsecps; - xtime.tv_sec++; - second_overflow(); - } - - raw_time.tv_nsec += timekeeper.raw_interval; - if (raw_time.tv_nsec >= NSEC_PER_SEC) { - raw_time.tv_nsec -= NSEC_PER_SEC; - raw_time.tv_sec++; - } - - /* accumulate error between NTP and clock interval */ - timekeeper.ntp_error += tick_length; - timekeeper.ntp_error -= timekeeper.xtime_interval << - timekeeper.ntp_error_shift; + offset = logarithmic_accumulation(offset, shift); + shift--; } /* correct the clock when NTP error is too big */ @@ -807,9 +839,6 @@ void update_wall_time(void) timekeeper.ntp_error += timekeeper.xtime_nsec << timekeeper.ntp_error_shift; - nsecs = clocksource_cyc2ns(offset, timekeeper.mult, timekeeper.shift); - update_xtime_cache(nsecs); - /* check to see if there is a new clocksource to use */ update_vsyscall(&xtime, timekeeper.clock); } @@ -846,13 +875,13 @@ void monotonic_to_bootbased(struct timespec *ts) unsigned long get_seconds(void) { - return xtime_cache.tv_sec; + return xtime.tv_sec; } EXPORT_SYMBOL(get_seconds); struct timespec __current_kernel_time(void) { - return xtime_cache; + return xtime; } struct timespec current_kernel_time(void) @@ -862,8 +891,7 @@ struct timespec current_kernel_time(void) do { seq = read_seqbegin(&xtime_lock); - - now = xtime_cache; + now = xtime; } while (read_seqretry(&xtime_lock, seq)); return now; @@ -877,8 +905,7 @@ struct timespec get_monotonic_coarse(void) do { seq = read_seqbegin(&xtime_lock); - - now = xtime_cache; + now = xtime; mono = wall_to_monotonic; } while (read_seqretry(&xtime_lock, seq)); diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c index 1b5b7aa2fdf..665c76edbf1 100644 --- a/kernel/time/timer_list.c +++ b/kernel/time/timer_list.c @@ -204,10 +204,12 @@ print_tickdevice(struct seq_file *m, struct tick_device *td, int cpu) return; } SEQ_printf(m, "%s\n", dev->name); - SEQ_printf(m, " max_delta_ns: %lu\n", dev->max_delta_ns); - SEQ_printf(m, " min_delta_ns: %lu\n", dev->min_delta_ns); - SEQ_printf(m, " mult: %lu\n", dev->mult); - SEQ_printf(m, " shift: %d\n", dev->shift); + SEQ_printf(m, " max_delta_ns: %llu\n", + (unsigned long long) dev->max_delta_ns); + SEQ_printf(m, " min_delta_ns: %llu\n", + (unsigned long long) dev->min_delta_ns); + SEQ_printf(m, " mult: %u\n", dev->mult); + SEQ_printf(m, " shift: %u\n", dev->shift); SEQ_printf(m, " mode: %d\n", dev->mode); SEQ_printf(m, " next_event: %Ld nsecs\n", (unsigned long long) ktime_to_ns(dev->next_event)); |