diff options
Diffstat (limited to 'kernel/time')
-rw-r--r-- | kernel/time/clockevents.c | 2 | ||||
-rw-r--r-- | kernel/time/clocksource.c | 49 | ||||
-rw-r--r-- | kernel/time/tick-sched.c | 105 | ||||
-rw-r--r-- | kernel/time/timekeeping.c | 10 |
4 files changed, 101 insertions, 65 deletions
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index c4eb71c8b2e..9cd928f7a7c 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -17,7 +17,6 @@ #include <linux/module.h> #include <linux/notifier.h> #include <linux/smp.h> -#include <linux/sysdev.h> #include "tick-internal.h" @@ -387,7 +386,6 @@ void clockevents_exchange_device(struct clock_event_device *old, * released list and do a notify add later. */ if (old) { - old->event_handler = clockevents_handle_noop; clockevents_set_mode(old, CLOCK_EVT_MODE_UNUSED); list_del(&old->list); list_add(&old->list, &clockevents_released); diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index da2f760e780..a45ca167ab2 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -23,8 +23,8 @@ * o Allow clocksource drivers to be unregistered */ +#include <linux/device.h> #include <linux/clocksource.h> -#include <linux/sysdev.h> #include <linux/init.h> #include <linux/module.h> #include <linux/sched.h> /* for spin_unlock_irq() using preempt_count() m68k */ @@ -647,7 +647,7 @@ static void clocksource_enqueue(struct clocksource *cs) /** * __clocksource_updatefreq_scale - Used update clocksource with new freq - * @t: clocksource to be registered + * @cs: clocksource to be registered * @scale: Scale factor multiplied against freq to get clocksource hz * @freq: clocksource frequency (cycles per second) divided by scale * @@ -699,7 +699,7 @@ EXPORT_SYMBOL_GPL(__clocksource_updatefreq_scale); /** * __clocksource_register_scale - Used to install new clocksources - * @t: clocksource to be registered + * @cs: clocksource to be registered * @scale: Scale factor multiplied against freq to get clocksource hz * @freq: clocksource frequency (cycles per second) divided by scale * @@ -727,7 +727,7 @@ EXPORT_SYMBOL_GPL(__clocksource_register_scale); /** * clocksource_register - Used to install new clocksources - * @t: clocksource to be registered + * @cs: clocksource to be registered * * Returns -EBUSY if registration fails, zero otherwise. */ @@ -761,6 +761,8 @@ static void __clocksource_change_rating(struct clocksource *cs, int rating) /** * clocksource_change_rating - Change the rating of a registered clocksource + * @cs: clocksource to be changed + * @rating: new rating */ void clocksource_change_rating(struct clocksource *cs, int rating) { @@ -772,6 +774,7 @@ EXPORT_SYMBOL(clocksource_change_rating); /** * clocksource_unregister - remove a registered clocksource + * @cs: clocksource to be unregistered */ void clocksource_unregister(struct clocksource *cs) { @@ -787,13 +790,14 @@ EXPORT_SYMBOL(clocksource_unregister); /** * sysfs_show_current_clocksources - sysfs interface for current clocksource * @dev: unused + * @attr: unused * @buf: char buffer to be filled with clocksource list * * Provides sysfs interface for listing current clocksource. */ static ssize_t -sysfs_show_current_clocksources(struct sys_device *dev, - struct sysdev_attribute *attr, char *buf) +sysfs_show_current_clocksources(struct device *dev, + struct device_attribute *attr, char *buf) { ssize_t count = 0; @@ -807,14 +811,15 @@ sysfs_show_current_clocksources(struct sys_device *dev, /** * sysfs_override_clocksource - interface for manually overriding clocksource * @dev: unused + * @attr: unused * @buf: name of override clocksource * @count: length of buffer * * Takes input from sysfs interface for manually overriding the default * clocksource selection. */ -static ssize_t sysfs_override_clocksource(struct sys_device *dev, - struct sysdev_attribute *attr, +static ssize_t sysfs_override_clocksource(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) { size_t ret = count; @@ -842,13 +847,14 @@ static ssize_t sysfs_override_clocksource(struct sys_device *dev, /** * sysfs_show_available_clocksources - sysfs interface for listing clocksource * @dev: unused + * @attr: unused * @buf: char buffer to be filled with clocksource list * * Provides sysfs interface for listing registered clocksources */ static ssize_t -sysfs_show_available_clocksources(struct sys_device *dev, - struct sysdev_attribute *attr, +sysfs_show_available_clocksources(struct device *dev, + struct device_attribute *attr, char *buf) { struct clocksource *src; @@ -877,35 +883,36 @@ sysfs_show_available_clocksources(struct sys_device *dev, /* * Sysfs setup bits: */ -static SYSDEV_ATTR(current_clocksource, 0644, sysfs_show_current_clocksources, +static DEVICE_ATTR(current_clocksource, 0644, sysfs_show_current_clocksources, sysfs_override_clocksource); -static SYSDEV_ATTR(available_clocksource, 0444, +static DEVICE_ATTR(available_clocksource, 0444, sysfs_show_available_clocksources, NULL); -static struct sysdev_class clocksource_sysclass = { +static struct bus_type clocksource_subsys = { .name = "clocksource", + .dev_name = "clocksource", }; -static struct sys_device device_clocksource = { +static struct device device_clocksource = { .id = 0, - .cls = &clocksource_sysclass, + .bus = &clocksource_subsys, }; static int __init init_clocksource_sysfs(void) { - int error = sysdev_class_register(&clocksource_sysclass); + int error = subsys_system_register(&clocksource_subsys, NULL); if (!error) - error = sysdev_register(&device_clocksource); + error = device_register(&device_clocksource); if (!error) - error = sysdev_create_file( + error = device_create_file( &device_clocksource, - &attr_current_clocksource); + &dev_attr_current_clocksource); if (!error) - error = sysdev_create_file( + error = device_create_file( &device_clocksource, - &attr_available_clocksource); + &dev_attr_available_clocksource); return error; } diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 40420644d0b..7656642e4b8 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -275,42 +275,17 @@ u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time) } EXPORT_SYMBOL_GPL(get_cpu_iowait_time_us); -/** - * tick_nohz_stop_sched_tick - stop the idle tick from the idle task - * - * When the next event is more than a tick into the future, stop the idle tick - * Called either from the idle loop or from irq_exit() when an idle period was - * just interrupted by an interrupt which did not cause a reschedule. - */ -void tick_nohz_stop_sched_tick(int inidle) +static void tick_nohz_stop_sched_tick(struct tick_sched *ts) { - unsigned long seq, last_jiffies, next_jiffies, delta_jiffies, flags; - struct tick_sched *ts; + unsigned long seq, last_jiffies, next_jiffies, delta_jiffies; ktime_t last_update, expires, now; struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev; u64 time_delta; int cpu; - local_irq_save(flags); - cpu = smp_processor_id(); ts = &per_cpu(tick_cpu_sched, cpu); - /* - * Call to tick_nohz_start_idle stops the last_update_time from being - * updated. Thus, it must not be called in the event we are called from - * irq_exit() with the prior state different than idle. - */ - if (!inidle && !ts->inidle) - goto end; - - /* - * Set ts->inidle unconditionally. Even if the system did not - * switch to NOHZ mode the cpu frequency governers rely on the - * update of the idle time accounting in tick_nohz_start_idle(). - */ - ts->inidle = 1; - now = tick_nohz_start_idle(cpu, ts); /* @@ -326,10 +301,10 @@ void tick_nohz_stop_sched_tick(int inidle) } if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE)) - goto end; + return; if (need_resched()) - goto end; + return; if (unlikely(local_softirq_pending() && cpu_online(cpu))) { static int ratelimit; @@ -339,7 +314,7 @@ void tick_nohz_stop_sched_tick(int inidle) (unsigned int) local_softirq_pending()); ratelimit++; } - goto end; + return; } ts->idle_calls++; @@ -434,7 +409,6 @@ void tick_nohz_stop_sched_tick(int inidle) ts->idle_tick = hrtimer_get_expires(&ts->sched_timer); ts->tick_stopped = 1; ts->idle_jiffies = last_jiffies; - rcu_enter_nohz(); } ts->idle_sleeps++; @@ -472,8 +446,64 @@ out: ts->next_jiffies = next_jiffies; ts->last_jiffies = last_jiffies; ts->sleep_length = ktime_sub(dev->next_event, now); -end: - local_irq_restore(flags); +} + +/** + * tick_nohz_idle_enter - stop the idle tick from the idle task + * + * When the next event is more than a tick into the future, stop the idle tick + * Called when we start the idle loop. + * + * The arch is responsible of calling: + * + * - rcu_idle_enter() after its last use of RCU before the CPU is put + * to sleep. + * - rcu_idle_exit() before the first use of RCU after the CPU is woken up. + */ +void tick_nohz_idle_enter(void) +{ + struct tick_sched *ts; + + WARN_ON_ONCE(irqs_disabled()); + + /* + * Update the idle state in the scheduler domain hierarchy + * when tick_nohz_stop_sched_tick() is called from the idle loop. + * State will be updated to busy during the first busy tick after + * exiting idle. + */ + set_cpu_sd_state_idle(); + + local_irq_disable(); + + ts = &__get_cpu_var(tick_cpu_sched); + /* + * set ts->inidle unconditionally. even if the system did not + * switch to nohz mode the cpu frequency governers rely on the + * update of the idle time accounting in tick_nohz_start_idle(). + */ + ts->inidle = 1; + tick_nohz_stop_sched_tick(ts); + + local_irq_enable(); +} + +/** + * tick_nohz_irq_exit - update next tick event from interrupt exit + * + * When an interrupt fires while we are idle and it doesn't cause + * a reschedule, it may still add, modify or delete a timer, enqueue + * an RCU callback, etc... + * So we need to re-calculate and reprogram the next tick event. + */ +void tick_nohz_irq_exit(void) +{ + struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); + + if (!ts->inidle) + return; + + tick_nohz_stop_sched_tick(ts); } /** @@ -515,11 +545,13 @@ static void tick_nohz_restart(struct tick_sched *ts, ktime_t now) } /** - * tick_nohz_restart_sched_tick - restart the idle tick from the idle task + * tick_nohz_idle_exit - restart the idle tick from the idle task * * Restart the idle tick when the CPU is woken up from idle + * This also exit the RCU extended quiescent state. The CPU + * can use RCU again after this function is called. */ -void tick_nohz_restart_sched_tick(void) +void tick_nohz_idle_exit(void) { int cpu = smp_processor_id(); struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); @@ -529,6 +561,7 @@ void tick_nohz_restart_sched_tick(void) ktime_t now; local_irq_disable(); + if (ts->idle_active || (ts->inidle && ts->tick_stopped)) now = ktime_get(); @@ -543,8 +576,6 @@ void tick_nohz_restart_sched_tick(void) ts->inidle = 0; - rcu_exit_nohz(); - /* Update jiffies first */ select_nohz_load_balancer(0); tick_do_update_jiffies64(now); diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 237841378c0..0c635818640 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -131,7 +131,7 @@ static inline s64 timekeeping_get_ns_raw(void) /* calculate the delta since the last update_wall_time: */ cycle_delta = (cycle_now - clock->cycle_last) & clock->mask; - /* return delta convert to nanoseconds using ntp adjusted mult. */ + /* return delta convert to nanoseconds. */ return clocksource_cyc2ns(cycle_delta, clock->mult, clock->shift); } @@ -813,11 +813,11 @@ static void timekeeping_adjust(s64 offset) * First we shift it down from NTP_SHIFT to clocksource->shifted nsecs. * * Note we subtract one in the shift, so that error is really error*2. - * This "saves" dividing(shifting) intererval twice, but keeps the - * (error > interval) comparision as still measuring if error is + * This "saves" dividing(shifting) interval twice, but keeps the + * (error > interval) comparison as still measuring if error is * larger then half an interval. * - * Note: It does not "save" on aggrivation when reading the code. + * Note: It does not "save" on aggravation when reading the code. */ error = timekeeper.ntp_error >> (timekeeper.ntp_error_shift - 1); if (error > interval) { @@ -833,7 +833,7 @@ static void timekeeping_adjust(s64 offset) * nanosecond, and store the amount rounded up into * the error. This causes the likely below to be unlikely. * - * The properfix is to avoid rounding up by using + * The proper fix is to avoid rounding up by using * the high precision timekeeper.xtime_nsec instead of * xtime.tv_nsec everywhere. Fixing this will take some * time. |