summaryrefslogtreecommitdiffstats
path: root/kernel/time
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2011-10-26 17:15:03 +0200
committerLinus Torvalds <torvalds@linux-foundation.org>2011-10-26 17:15:03 +0200
commit39adff5f69d6849ca22353a88058c9f8630528c0 (patch)
treeb0c2d2de77ebc5c97fd19c29b81eeb03549553f8 /kernel/time
parent8a4a8918ed6e4a361f4df19f199bbc2d0a89a46c (diff)
parente35f95b36e43f67a6f806172555a152c11ea0a78 (diff)
Merge branch 'timers-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
* 'timers-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (23 commits) time, s390: Get rid of compile warning dw_apb_timer: constify clocksource name time: Cleanup old CONFIG_GENERIC_TIME references that snuck in time: Change jiffies_to_clock_t() argument type to unsigned long alarmtimers: Fix error handling clocksource: Make watchdog reset lockless posix-cpu-timers: Cure SMP accounting oddities s390: Use direct ktime path for s390 clockevent device clockevents: Add direct ktime programming function clockevents: Make minimum delay adjustments configurable nohz: Remove "Switched to NOHz mode" debugging messages proc: Consider NO_HZ when printing idle and iowait times nohz: Make idle/iowait counter update conditional nohz: Fix update_ts_time_stat idle accounting cputime: Clean up cputime_to_usecs and usecs_to_cputime macros alarmtimers: Rework RTC device selection using class interface alarmtimers: Add try_to_cancel functionality alarmtimers: Add more refined alarm state tracking alarmtimers: Remove period from alarm structure alarmtimers: Remove interval cap limit hack ...
Diffstat (limited to 'kernel/time')
-rw-r--r--kernel/time/Kconfig2
-rw-r--r--kernel/time/alarmtimer.c266
-rw-r--r--kernel/time/clockevents.c129
-rw-r--r--kernel/time/clocksource.c38
-rw-r--r--kernel/time/tick-broadcast.c4
-rw-r--r--kernel/time/tick-common.c4
-rw-r--r--kernel/time/tick-internal.h2
-rw-r--r--kernel/time/tick-oneshot.c77
-rw-r--r--kernel/time/tick-sched.c55
9 files changed, 364 insertions, 213 deletions
diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig
index f06a8a36564..b26c2228fe9 100644
--- a/kernel/time/Kconfig
+++ b/kernel/time/Kconfig
@@ -27,3 +27,5 @@ config GENERIC_CLOCKEVENTS_BUILD
default y
depends on GENERIC_CLOCKEVENTS || GENERIC_CLOCKEVENTS_MIGR
+config GENERIC_CLOCKEVENTS_MIN_ADJUST
+ bool
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index ea5e1a928d5..c436e790b21 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -53,27 +53,6 @@ static struct rtc_device *rtcdev;
static DEFINE_SPINLOCK(rtcdev_lock);
/**
- * has_wakealarm - check rtc device has wakealarm ability
- * @dev: current device
- * @name_ptr: name to be returned
- *
- * This helper function checks to see if the rtc device can wake
- * from suspend.
- */
-static int has_wakealarm(struct device *dev, void *name_ptr)
-{
- struct rtc_device *candidate = to_rtc_device(dev);
-
- if (!candidate->ops->set_alarm)
- return 0;
- if (!device_may_wakeup(candidate->dev.parent))
- return 0;
-
- *(const char **)name_ptr = dev_name(dev);
- return 1;
-}
-
-/**
* alarmtimer_get_rtcdev - Return selected rtcdevice
*
* This function returns the rtc device to use for wakealarms.
@@ -82,37 +61,64 @@ static int has_wakealarm(struct device *dev, void *name_ptr)
*/
static struct rtc_device *alarmtimer_get_rtcdev(void)
{
- struct device *dev;
- char *str;
unsigned long flags;
struct rtc_device *ret;
spin_lock_irqsave(&rtcdev_lock, flags);
- if (!rtcdev) {
- /* Find an rtc device and init the rtc_timer */
- dev = class_find_device(rtc_class, NULL, &str, has_wakealarm);
- /* If we have a device then str is valid. See has_wakealarm() */
- if (dev) {
- rtcdev = rtc_class_open(str);
- /*
- * Drop the reference we got in class_find_device,
- * rtc_open takes its own.
- */
- put_device(dev);
- rtc_timer_init(&rtctimer, NULL, NULL);
- }
- }
ret = rtcdev;
spin_unlock_irqrestore(&rtcdev_lock, flags);
return ret;
}
+
+
+static int alarmtimer_rtc_add_device(struct device *dev,
+ struct class_interface *class_intf)
+{
+ unsigned long flags;
+ struct rtc_device *rtc = to_rtc_device(dev);
+
+ if (rtcdev)
+ return -EBUSY;
+
+ if (!rtc->ops->set_alarm)
+ return -1;
+ if (!device_may_wakeup(rtc->dev.parent))
+ return -1;
+
+ spin_lock_irqsave(&rtcdev_lock, flags);
+ if (!rtcdev) {
+ rtcdev = rtc;
+ /* hold a reference so it doesn't go away */
+ get_device(dev);
+ }
+ spin_unlock_irqrestore(&rtcdev_lock, flags);
+ return 0;
+}
+
+static struct class_interface alarmtimer_rtc_interface = {
+ .add_dev = &alarmtimer_rtc_add_device,
+};
+
+static int alarmtimer_rtc_interface_setup(void)
+{
+ alarmtimer_rtc_interface.class = rtc_class;
+ return class_interface_register(&alarmtimer_rtc_interface);
+}
+static void alarmtimer_rtc_interface_remove(void)
+{
+ class_interface_unregister(&alarmtimer_rtc_interface);
+}
#else
-#define alarmtimer_get_rtcdev() (0)
-#define rtcdev (0)
+static inline struct rtc_device *alarmtimer_get_rtcdev(void)
+{
+ return NULL;
+}
+#define rtcdev (NULL)
+static inline int alarmtimer_rtc_interface_setup(void) { return 0; }
+static inline void alarmtimer_rtc_interface_remove(void) { }
#endif
-
/**
* alarmtimer_enqueue - Adds an alarm timer to an alarm_base timerqueue
* @base: pointer to the base where the timer is being run
@@ -126,6 +132,8 @@ static struct rtc_device *alarmtimer_get_rtcdev(void)
static void alarmtimer_enqueue(struct alarm_base *base, struct alarm *alarm)
{
timerqueue_add(&base->timerqueue, &alarm->node);
+ alarm->state |= ALARMTIMER_STATE_ENQUEUED;
+
if (&alarm->node == timerqueue_getnext(&base->timerqueue)) {
hrtimer_try_to_cancel(&base->timer);
hrtimer_start(&base->timer, alarm->node.expires,
@@ -147,7 +155,12 @@ static void alarmtimer_remove(struct alarm_base *base, struct alarm *alarm)
{
struct timerqueue_node *next = timerqueue_getnext(&base->timerqueue);
+ if (!(alarm->state & ALARMTIMER_STATE_ENQUEUED))
+ return;
+
timerqueue_del(&base->timerqueue, &alarm->node);
+ alarm->state &= ~ALARMTIMER_STATE_ENQUEUED;
+
if (next == &alarm->node) {
hrtimer_try_to_cancel(&base->timer);
next = timerqueue_getnext(&base->timerqueue);
@@ -174,6 +187,7 @@ static enum hrtimer_restart alarmtimer_fired(struct hrtimer *timer)
unsigned long flags;
ktime_t now;
int ret = HRTIMER_NORESTART;
+ int restart = ALARMTIMER_NORESTART;
spin_lock_irqsave(&base->lock, flags);
now = base->gettime();
@@ -187,17 +201,19 @@ static enum hrtimer_restart alarmtimer_fired(struct hrtimer *timer)
alarm = container_of(next, struct alarm, node);
timerqueue_del(&base->timerqueue, &alarm->node);
- alarm->enabled = 0;
- /* Re-add periodic timers */
- if (alarm->period.tv64) {
- alarm->node.expires = ktime_add(expired, alarm->period);
- timerqueue_add(&base->timerqueue, &alarm->node);
- alarm->enabled = 1;
- }
+ alarm->state &= ~ALARMTIMER_STATE_ENQUEUED;
+
+ alarm->state |= ALARMTIMER_STATE_CALLBACK;
spin_unlock_irqrestore(&base->lock, flags);
if (alarm->function)
- alarm->function(alarm);
+ restart = alarm->function(alarm, now);
spin_lock_irqsave(&base->lock, flags);
+ alarm->state &= ~ALARMTIMER_STATE_CALLBACK;
+
+ if (restart != ALARMTIMER_NORESTART) {
+ timerqueue_add(&base->timerqueue, &alarm->node);
+ alarm->state |= ALARMTIMER_STATE_ENQUEUED;
+ }
}
if (next) {
@@ -234,7 +250,7 @@ static int alarmtimer_suspend(struct device *dev)
freezer_delta = ktime_set(0, 0);
spin_unlock_irqrestore(&freezer_delta_lock, flags);
- rtc = rtcdev;
+ rtc = alarmtimer_get_rtcdev();
/* If we have no rtcdev, just return */
if (!rtc)
return 0;
@@ -299,53 +315,111 @@ static void alarmtimer_freezerset(ktime_t absexp, enum alarmtimer_type type)
* @function: callback that is run when the alarm fires
*/
void alarm_init(struct alarm *alarm, enum alarmtimer_type type,
- void (*function)(struct alarm *))
+ enum alarmtimer_restart (*function)(struct alarm *, ktime_t))
{
timerqueue_init(&alarm->node);
- alarm->period = ktime_set(0, 0);
alarm->function = function;
alarm->type = type;
- alarm->enabled = 0;
+ alarm->state = ALARMTIMER_STATE_INACTIVE;
}
/**
* alarm_start - Sets an alarm to fire
* @alarm: ptr to alarm to set
* @start: time to run the alarm
- * @period: period at which the alarm will recur
*/
-void alarm_start(struct alarm *alarm, ktime_t start, ktime_t period)
+void alarm_start(struct alarm *alarm, ktime_t start)
{
struct alarm_base *base = &alarm_bases[alarm->type];
unsigned long flags;
spin_lock_irqsave(&base->lock, flags);
- if (alarm->enabled)
+ if (alarmtimer_active(alarm))
alarmtimer_remove(base, alarm);
alarm->node.expires = start;
- alarm->period = period;
alarmtimer_enqueue(base, alarm);
- alarm->enabled = 1;
spin_unlock_irqrestore(&base->lock, flags);
}
/**
- * alarm_cancel - Tries to cancel an alarm timer
+ * alarm_try_to_cancel - Tries to cancel an alarm timer
* @alarm: ptr to alarm to be canceled
+ *
+ * Returns 1 if the timer was canceled, 0 if it was not running,
+ * and -1 if the callback was running
*/
-void alarm_cancel(struct alarm *alarm)
+int alarm_try_to_cancel(struct alarm *alarm)
{
struct alarm_base *base = &alarm_bases[alarm->type];
unsigned long flags;
-
+ int ret = -1;
spin_lock_irqsave(&base->lock, flags);
- if (alarm->enabled)
+
+ if (alarmtimer_callback_running(alarm))
+ goto out;
+
+ if (alarmtimer_is_queued(alarm)) {
alarmtimer_remove(base, alarm);
- alarm->enabled = 0;
+ ret = 1;
+ } else
+ ret = 0;
+out:
spin_unlock_irqrestore(&base->lock, flags);
+ return ret;
+}
+
+
+/**
+ * alarm_cancel - Spins trying to cancel an alarm timer until it is done
+ * @alarm: ptr to alarm to be canceled
+ *
+ * Returns 1 if the timer was canceled, 0 if it was not active.
+ */
+int alarm_cancel(struct alarm *alarm)
+{
+ for (;;) {
+ int ret = alarm_try_to_cancel(alarm);
+ if (ret >= 0)
+ return ret;
+ cpu_relax();
+ }
+}
+
+
+u64 alarm_forward(struct alarm *alarm, ktime_t now, ktime_t interval)
+{
+ u64 overrun = 1;
+ ktime_t delta;
+
+ delta = ktime_sub(now, alarm->node.expires);
+
+ if (delta.tv64 < 0)
+ return 0;
+
+ if (unlikely(delta.tv64 >= interval.tv64)) {
+ s64 incr = ktime_to_ns(interval);
+
+ overrun = ktime_divns(delta, incr);
+
+ alarm->node.expires = ktime_add_ns(alarm->node.expires,
+ incr*overrun);
+
+ if (alarm->node.expires.tv64 > now.tv64)
+ return overrun;
+ /*
+ * This (and the ktime_add() below) is the
+ * correction for exact:
+ */
+ overrun++;
+ }
+
+ alarm->node.expires = ktime_add(alarm->node.expires, interval);
+ return overrun;
}
+
+
/**
* clock2alarm - helper that converts from clockid to alarmtypes
* @clockid: clockid.
@@ -365,12 +439,21 @@ static enum alarmtimer_type clock2alarm(clockid_t clockid)
*
* Posix timer callback for expired alarm timers.
*/
-static void alarm_handle_timer(struct alarm *alarm)
+static enum alarmtimer_restart alarm_handle_timer(struct alarm *alarm,
+ ktime_t now)
{
struct k_itimer *ptr = container_of(alarm, struct k_itimer,
- it.alarmtimer);
+ it.alarm.alarmtimer);
if (posix_timer_event(ptr, 0) != 0)
ptr->it_overrun++;
+
+ /* Re-add periodic timers */
+ if (ptr->it.alarm.interval.tv64) {
+ ptr->it_overrun += alarm_forward(alarm, now,
+ ptr->it.alarm.interval);
+ return ALARMTIMER_RESTART;
+ }
+ return ALARMTIMER_NORESTART;
}
/**
@@ -427,7 +510,7 @@ static int alarm_timer_create(struct k_itimer *new_timer)
type = clock2alarm(new_timer->it_clock);
base = &alarm_bases[type];
- alarm_init(&new_timer->it.alarmtimer, type, alarm_handle_timer);
+ alarm_init(&new_timer->it.alarm.alarmtimer, type, alarm_handle_timer);
return 0;
}
@@ -444,9 +527,9 @@ static void alarm_timer_get(struct k_itimer *timr,
memset(cur_setting, 0, sizeof(struct itimerspec));
cur_setting->it_interval =
- ktime_to_timespec(timr->it.alarmtimer.period);
+ ktime_to_timespec(timr->it.alarm.interval);
cur_setting->it_value =
- ktime_to_timespec(timr->it.alarmtimer.node.expires);
+ ktime_to_timespec(timr->it.alarm.alarmtimer.node.expires);
return;
}
@@ -461,7 +544,9 @@ static int alarm_timer_del(struct k_itimer *timr)
if (!rtcdev)
return -ENOTSUPP;
- alarm_cancel(&timr->it.alarmtimer);
+ if (alarm_try_to_cancel(&timr->it.alarm.alarmtimer) < 0)
+ return TIMER_RETRY;
+
return 0;
}
@@ -481,25 +566,17 @@ static int alarm_timer_set(struct k_itimer *timr, int flags,
if (!rtcdev)
return -ENOTSUPP;
- /*
- * XXX HACK! Currently we can DOS a system if the interval
- * period on alarmtimers is too small. Cap the interval here
- * to 100us and solve this properly in a future patch! -jstultz
- */
- if ((new_setting->it_interval.tv_sec == 0) &&
- (new_setting->it_interval.tv_nsec < 100000))
- new_setting->it_interval.tv_nsec = 100000;
-
if (old_setting)
alarm_timer_get(timr, old_setting);
/* If the timer was already set, cancel it */
- alarm_cancel(&timr->it.alarmtimer);
+ if (alarm_try_to_cancel(&timr->it.alarm.alarmtimer) < 0)
+ return TIMER_RETRY;
/* start the timer */
- alarm_start(&timr->it.alarmtimer,
- timespec_to_ktime(new_setting->it_value),
- timespec_to_ktime(new_setting->it_interval));
+ timr->it.alarm.interval = timespec_to_ktime(new_setting->it_interval);
+ alarm_start(&timr->it.alarm.alarmtimer,
+ timespec_to_ktime(new_setting->it_value));
return 0;
}
@@ -509,13 +586,15 @@ static int alarm_timer_set(struct k_itimer *timr, int flags,
*
* Wakes up the task that set the alarmtimer
*/
-static void alarmtimer_nsleep_wakeup(struct alarm *alarm)
+static enum alarmtimer_restart alarmtimer_nsleep_wakeup(struct alarm *alarm,
+ ktime_t now)
{
struct task_struct *task = (struct task_struct *)alarm->data;
alarm->data = NULL;
if (task)
wake_up_process(task);
+ return ALARMTIMER_NORESTART;
}
/**
@@ -530,7 +609,7 @@ static int alarmtimer_do_nsleep(struct alarm *alarm, ktime_t absexp)
alarm->data = (void *)current;
do {
set_current_state(TASK_INTERRUPTIBLE);
- alarm_start(alarm, absexp, ktime_set(0, 0));
+ alarm_start(alarm, absexp);
if (likely(alarm->data))
schedule();
@@ -691,6 +770,7 @@ static struct platform_driver alarmtimer_driver = {
*/
static int __init alarmtimer_init(void)
{
+ struct platform_device *pdev;
int error = 0;
int i;
struct k_clock alarm_clock = {
@@ -719,10 +799,26 @@ static int __init alarmtimer_init(void)
HRTIMER_MODE_ABS);
alarm_bases[i].timer.function = alarmtimer_fired;
}
+
+ error = alarmtimer_rtc_interface_setup();
+ if (error)
+ return error;
+
error = platform_driver_register(&alarmtimer_driver);
- platform_device_register_simple("alarmtimer", -1, NULL, 0);
+ if (error)
+ goto out_if;
+ pdev = platform_device_register_simple("alarmtimer", -1, NULL, 0);
+ if (IS_ERR(pdev)) {
+ error = PTR_ERR(pdev);
+ goto out_drv;
+ }
+ return 0;
+
+out_drv:
+ platform_driver_unregister(&alarmtimer_driver);
+out_if:
+ alarmtimer_rtc_interface_remove();
return error;
}
device_initcall(alarmtimer_init);
-
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index e4c699dfa4e..1ecd6ba36d6 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -94,42 +94,143 @@ void clockevents_shutdown(struct clock_event_device *dev)
dev->next_event.tv64 = KTIME_MAX;
}
+#ifdef CONFIG_GENERIC_CLOCKEVENTS_MIN_ADJUST
+
+/* Limit min_delta to a jiffie */
+#define MIN_DELTA_LIMIT (NSEC_PER_SEC / HZ)
+
+/**
+ * clockevents_increase_min_delta - raise minimum delta of a clock event device
+ * @dev: device to increase the minimum delta
+ *
+ * Returns 0 on success, -ETIME when the minimum delta reached the limit.
+ */
+static int clockevents_increase_min_delta(struct clock_event_device *dev)
+{
+ /* Nothing to do if we already reached the limit */
+ if (dev->min_delta_ns >= MIN_DELTA_LIMIT) {
+ printk(KERN_WARNING "CE: Reprogramming failure. Giving up\n");
+ dev->next_event.tv64 = KTIME_MAX;
+ return -ETIME;
+ }
+
+ if (dev->min_delta_ns < 5000)
+ dev->min_delta_ns = 5000;
+ else
+ dev->min_delta_ns += dev->min_delta_ns >> 1;
+
+ if (dev->min_delta_ns > MIN_DELTA_LIMIT)
+ dev->min_delta_ns = MIN_DELTA_LIMIT;
+
+ printk(KERN_WARNING "CE: %s increased min_delta_ns to %llu nsec\n",
+ dev->name ? dev->name : "?",
+ (unsigned long long) dev->min_delta_ns);
+ return 0;
+}
+
+/**
+ * clockevents_program_min_delta - Set clock event device to the minimum delay.
+ * @dev: device to program
+ *
+ * Returns 0 on success, -ETIME when the retry loop failed.
+ */
+static int clockevents_program_min_delta(struct clock_event_device *dev)
+{
+ unsigned long long clc;
+ int64_t delta;
+ int i;
+
+ for (i = 0;;) {
+ delta = dev->min_delta_ns;
+ dev->next_event = ktime_add_ns(ktime_get(), delta);
+
+ if (dev->mode == CLOCK_EVT_MODE_SHUTDOWN)
+ return 0;
+
+ dev->retries++;
+ clc = ((unsigned long long) delta * dev->mult) >> dev->shift;
+ if (dev->set_next_event((unsigned long) clc, dev) == 0)
+ return 0;
+
+ if (++i > 2) {
+ /*
+ * We tried 3 times to program the device with the
+ * given min_delta_ns. Try to increase the minimum
+ * delta, if that fails as well get out of here.
+ */
+ if (clockevents_increase_min_delta(dev))
+ return -ETIME;
+ i = 0;
+ }
+ }
+}
+
+#else /* CONFIG_GENERIC_CLOCKEVENTS_MIN_ADJUST */
+
+/**
+ * clockevents_program_min_delta - Set clock event device to the minimum delay.
+ * @dev: device to program
+ *
+ * Returns 0 on success, -ETIME when the retry loop failed.
+ */
+static int clockevents_program_min_delta(struct clock_event_device *dev)
+{
+ unsigned long long clc;
+ int64_t delta;
+
+ delta = dev->min_delta_ns;
+ dev->next_event = ktime_add_ns(ktime_get(), delta);
+
+ if (dev->mode == CLOCK_EVT_MODE_SHUTDOWN)
+ return 0;
+
+ dev->retries++;
+ clc = ((unsigned long long) delta * dev->mult) >> dev->shift;
+ return dev->set_next_event((unsigned long) clc, dev);
+}
+
+#endif /* CONFIG_GENERIC_CLOCKEVENTS_MIN_ADJUST */
+
/**
* clockevents_program_event - Reprogram the clock event device.
+ * @dev: device to program
* @expires: absolute expiry time (monotonic clock)
+ * @force: program minimum delay if expires can not be set
*
* Returns 0 on success, -ETIME when the event is in the past.
*/
int clockevents_program_event(struct clock_event_device *dev, ktime_t expires,
- ktime_t now)
+ bool force)
{
unsigned long long clc;
int64_t delta;
+ int rc;
if (unlikely(expires.tv64 < 0)) {
WARN_ON_ONCE(1);
return -ETIME;
}
- delta = ktime_to_ns(ktime_sub(expires, now));
-
- if (delta <= 0)
- return -ETIME;
-
dev->next_event = expires;
if (dev->mode == CLOCK_EVT_MODE_SHUTDOWN)
return 0;
- if (delta > dev->max_delta_ns)
- delta = dev->max_delta_ns;
- if (delta < dev->min_delta_ns)
- delta = dev->min_delta_ns;
+ /* Shortcut for clockevent devices that can deal with ktime. */
+ if (dev->features & CLOCK_EVT_FEAT_KTIME)
+ return dev->set_next_ktime(expires, dev);
+
+ delta = ktime_to_ns(ktime_sub(expires, ktime_get()));
+ if (delta <= 0)
+ return force ? clockevents_program_min_delta(dev) : -ETIME;
- clc = delta * dev->mult;
- clc >>= dev->shift;
+ delta = min(delta, (int64_t) dev->max_delta_ns);
+ delta = max(delta, (int64_t) dev->min_delta_ns);
- return dev->set_next_event((unsigned long) clc, dev);
+ clc = ((unsigned long long) delta * dev->mult) >> dev->shift;
+ rc = dev->set_next_event((unsigned long) clc, dev);
+
+ return (rc && force) ? clockevents_program_min_delta(dev) : rc;
}
/**
@@ -258,7 +359,7 @@ int clockevents_update_freq(struct clock_event_device *dev, u32 freq)
if (dev->mode != CLOCK_EVT_MODE_ONESHOT)
return 0;
- return clockevents_program_event(dev, dev->next_event, ktime_get());
+ return clockevents_program_event(dev, dev->next_event, false);
}
/*
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index e0980f0d9a0..cf52fda2e09 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -186,6 +186,7 @@ static struct timer_list watchdog_timer;
static DECLARE_WORK(watchdog_work, clocksource_watchdog_work);
static DEFINE_SPINLOCK(watchdog_lock);
static int watchdog_running;
+static atomic_t watchdog_reset_pending;
static int clocksource_watchdog_kthread(void *data);
static void __clocksource_change_rating(struct clocksource *cs, int rating);
@@ -247,12 +248,14 @@ static void clocksource_watchdog(unsigned long data)
struct clocksource *cs;
cycle_t csnow, wdnow;
int64_t wd_nsec, cs_nsec;
- int next_cpu;
+ int next_cpu, reset_pending;
spin_lock(&watchdog_lock);
if (!watchdog_running)
goto out;
+ reset_pending = atomic_read(&watchdog_reset_pending);
+
list_for_each_entry(cs, &watchdog_list, wd_list) {
/* Clocksource already marked unstable? */
@@ -268,7 +271,8 @@ static void clocksource_watchdog(unsigned long data)
local_irq_enable();
/* Clocksource initialized ? */
- if (!(cs->flags & CLOCK_SOURCE_WATCHDOG)) {
+ if (!(cs->flags & CLOCK_SOURCE_WATCHDOG) ||
+ atomic_read(&watchdog_reset_pending)) {
cs->flags |= CLOCK_SOURCE_WATCHDOG;
cs->wd_last = wdnow;
cs->cs_last = csnow;
@@ -283,8 +287,11 @@ static void clocksource_watchdog(unsigned long data)
cs->cs_last = csnow;
cs->wd_last = wdnow;
+ if (atomic_read(&watchdog_reset_pending))
+ continue;
+
/* Check the deviation from the watchdog clocksource. */
- if (abs(cs_nsec - wd_nsec) > WATCHDOG_THRESHOLD) {
+ if ((abs(cs_nsec - wd_nsec) > WATCHDOG_THRESHOLD)) {
clocksource_unstable(cs, cs_nsec - wd_nsec);
continue;
}
@@ -303,6 +310,13 @@ static void clocksource_watchdog(unsigned long data)
}
/*
+ * We only clear the watchdog_reset_pending, when we did a
+ * full cycle through all clocksources.
+ */
+ if (reset_pending)
+ atomic_dec(&watchdog_reset_pending);
+
+ /*
* Cycle through CPUs to check if the CPUs stay synchronized
* to each other.
*/
@@ -344,23 +358,7 @@ static inline void clocksource_reset_watchdog(void)
static void clocksource_resume_watchdog(void)
{
- unsigned long flags;
-
- /*
- * We use trylock here to avoid a potential dead lock when
- * kgdb calls this code after the kernel has been stopped with
- * watchdog_lock held. When watchdog_lock is held we just
- * return and accept, that the watchdog might trigger and mark
- * the monitored clock source (usually TSC) unstable.
- *
- * This does not affect the other caller clocksource_resume()
- * because at this point the kernel is UP, interrupts are
- * disabled and nothing can hold watchdog_lock.
- */
- if (!spin_trylock_irqsave(&watchdog_lock, flags))
- return;
- clocksource_reset_watchdog();
- spin_unlock_irqrestore(&watchdog_lock, flags);
+ atomic_inc(&watchdog_reset_pending);
}
static void clocksource_enqueue_watchdog(struct clocksource *cs)
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index c7218d13273..f954282d9a8 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -194,7 +194,7 @@ static void tick_handle_periodic_broadcast(struct clock_event_device *dev)
for (next = dev->next_event; ;) {
next = ktime_add(next, tick_period);
- if (!clockevents_program_event(dev, next, ktime_get()))
+ if (!clockevents_program_event(dev, next, false))
return;
tick_do_periodic_broadcast();
}
@@ -373,7 +373,7 @@ static int tick_broadcast_set_event(ktime_t expires, int force)
{
struct clock_event_device *bc = tick_broadcast_device.evtdev;
- return tick_dev_program_event(bc, expires, force);
+ return clockevents_program_event(bc, expires, force);
}
int tick_resume_broadcast_oneshot(struct clock_event_device *bc)
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index 119528de823..da6c9ecad4e 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -94,7 +94,7 @@ void tick_handle_periodic(struct clock_event_device *dev)
*/
next = ktime_add(dev->next_event, tick_period);
for (;;) {
- if (!clockevents_program_event(dev, next, ktime_get()))
+ if (!clockevents_program_event(dev, next, false))
return;
/*
* Have to be careful here. If we're in oneshot mode,
@@ -137,7 +137,7 @@ void tick_setup_periodic(struct clock_event_device *dev, int broadcast)
clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT);
for (;;) {
- if (!clockevents_program_event(dev, next, ktime_get()))
+ if (!clockevents_program_event(dev, next, false))
return;
next = ktime_add(next, tick_period);
}
diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h
index 1009b06d6f8..4e265b901fe 100644
--- a/kernel/time/tick-internal.h
+++ b/kernel/time/tick-internal.h
@@ -26,8 +26,6 @@ extern void clockevents_shutdown(struct clock_event_device *dev);
extern void tick_setup_oneshot(struct clock_event_device *newdev,
void (*handler)(struct clock_event_device *),
ktime_t nextevt);
-extern int tick_dev_program_event(struct clock_event_device *dev,
- ktime_t expires, int force);
extern int tick_program_event(ktime_t expires, int force);
extern void tick_oneshot_notify(void);
extern int tick_switch_to_oneshot(void (*handler)(struct clock_event_device *));
diff --git a/kernel/time/tick-oneshot.c b/kernel/time/tick-oneshot.c
index 2d04411a5f0..824109060a3 100644
--- a/kernel/time/tick-oneshot.c
+++ b/kernel/time/tick-oneshot.c
@@ -21,74 +21,6 @@
#include "tick-internal.h"
-/* Limit min_delta to a jiffie */
-#define MIN_DELTA_LIMIT (NSEC_PER_SEC / HZ)
-
-static int tick_increase_min_delta(struct clock_event_device *dev)
-{
- /* Nothing to do if we already reached the limit */
- if (dev->min_delta_ns >= MIN_DELTA_LIMIT)
- return -ETIME;
-
- if (dev->min_delta_ns < 5000)
- dev->min_delta_ns = 5000;
- else
- dev->min_delta_ns += dev->min_delta_ns >> 1;
-
- if (dev->min_delta_ns > MIN_DELTA_LIMIT)
- dev->min_delta_ns = MIN_DELTA_LIMIT;
-
- printk(KERN_WARNING "CE: %s increased min_delta_ns to %llu nsec\n",
- dev->name ? dev->name : "?",
- (unsigned long long) dev->min_delta_ns);
- return 0;
-}
-
-/**
- * tick_program_event internal worker function
- */
-int tick_dev_program_event(struct clock_event_device *dev, ktime_t expires,
- int force)
-{
- ktime_t now = ktime_get();
- int i;
-
- for (i = 0;;) {
- int ret = clockevents_program_event(dev, expires, now);
-
- if (!ret || !force)
- return ret;
-
- dev->retries++;
- /*
- * We tried 3 times to program the device with the given
- * min_delta_ns. If that's not working then we increase it
- * and emit a warning.
- */
- if (++i > 2) {
- /* Increase the min. delta and try again */
- if (tick_increase_min_delta(dev)) {
- /*
- * Get out of the loop if min_delta_ns
- * hit the limit already. That's
- * better than staying here forever.
- *
- * We clear next_event so we have a
- * chance that the box survives.
- */
- printk(KERN_WARNING
- "CE: Reprogramming failure. Giving up\n");
- dev->next_event.tv64 = KTIME_MAX;
- return -ETIME;
- }
- i = 0;
- }
-
- now = ktime_get();
- expires = ktime_add_ns(now, dev->min_delta_ns);
- }
-}
-
/**
* tick_program_event
*/
@@ -96,7 +28,7 @@ int tick_program_event(ktime_t expires, int force)
{
struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev);
- return tick_dev_program_event(dev, expires, force);
+ return clockevents_program_event(dev, expires, force);
}
/**
@@ -104,11 +36,10 @@ int tick_program_event(ktime_t expires, int force)
*/
void tick_resume_oneshot(void)
{
- struct tick_device *td = &__get_cpu_var(tick_cpu_device);
- struct clock_event_device *dev = td->evtdev;
+ struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev);
clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT);
- tick_program_event(ktime_get(), 1);
+ clockevents_program_event(dev, ktime_get(), true);
}
/**
@@ -120,7 +51,7 @@ void tick_setup_oneshot(struct clock_event_device *newdev,
{
newdev->event_handler = handler;
clockevents_set_mode(newdev, CLOCK_EVT_MODE_ONESHOT);
- tick_dev_program_event(newdev, next_event, 1);
+ clockevents_program_event(newdev, next_event, true);
}
/**
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index eb98e55196b..40420644d0b 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -158,9 +158,10 @@ update_ts_time_stats(int cpu, struct tick_sched *ts, ktime_t now, u64 *last_upda
if (ts->idle_active) {
delta = ktime_sub(now, ts->idle_entrytime);
- ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta);
if (nr_iowait_cpu(cpu) > 0)
ts->iowait_sleeptime = ktime_add(ts->iowait_sleeptime, delta);
+ else
+ ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta);
ts->idle_entrytime = now;
}
@@ -196,11 +197,11 @@ static ktime_t tick_nohz_start_idle(int cpu, struct tick_sched *ts)
/**
* get_cpu_idle_time_us - get the total idle time of a cpu
* @cpu: CPU number to query
- * @last_update_time: variable to store update time in
+ * @last_update_time: variable to store update time in. Do not update
+ * counters if NULL.
*
* Return the cummulative idle time (since boot) for a given
- * CPU, in microseconds. The idle time returned includes
- * the iowait time (unlike what "top" and co report).
+ * CPU, in microseconds.
*
* This time is measured via accounting rather than sampling,
* and is as accurate as ktime_get() is.
@@ -210,20 +211,35 @@ static ktime_t tick_nohz_start_idle(int cpu, struct tick_sched *ts)
u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time)
{
struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
+ ktime_t now, idle;
if (!tick_nohz_enabled)
return -1;
- update_ts_time_stats(cpu, ts, ktime_get(), last_update_time);
+ now = ktime_get();
+ if (last_update_time) {
+ update_ts_time_stats(cpu, ts, now, last_update_time);
+ idle = ts->idle_sleeptime;
+ } else {
+ if (ts->idle_active && !nr_iowait_cpu(cpu)) {
+ ktime_t delta = ktime_sub(now, ts->idle_entrytime);
+
+ idle = ktime_add(ts->idle_sleeptime, delta);
+ } else {
+ idle = ts->idle_sleeptime;
+ }
+ }
+
+ return ktime_to_us(idle);
- return ktime_to_us(ts->idle_sleeptime);
}
EXPORT_SYMBOL_GPL(get_cpu_idle_time_us);
-/*
+/**
* get_cpu_iowait_time_us - get the total iowait time of a cpu
* @cpu: CPU number to query
- * @last_update_time: variable to store update time in
+ * @last_update_time: variable to store update time in. Do not update
+ * counters if NULL.
*
* Return the cummulative iowait time (since boot) for a given
* CPU, in microseconds.
@@ -236,13 +252,26 @@ EXPORT_SYMBOL_GPL(get_cpu_idle_time_us);
u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time)
{
struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
+ ktime_t now, iowait;
if (!tick_nohz_enabled)
return -1;
- update_ts_time_stats(cpu, ts, ktime_get(), last_update_time);
+ now = ktime_get();
+ if (last_update_time) {
+ update_ts_time_stats(cpu, ts, now, last_update_time);
+ iowait = ts->iowait_sleeptime;
+ } else {
+ if (ts->idle_active && nr_iowait_cpu(cpu) > 0) {
+ ktime_t delta = ktime_sub(now, ts->idle_entrytime);
- return ktime_to_us(ts->iowait_sleeptime);
+ iowait = ktime_add(ts->iowait_sleeptime, delta);
+ } else {
+ iowait = ts->iowait_sleeptime;
+ }
+ }
+
+ return ktime_to_us(iowait);
}
EXPORT_SYMBOL_GPL(get_cpu_iowait_time_us);
@@ -634,8 +663,6 @@ static void tick_nohz_switch_to_nohz(void)
next = ktime_add(next, tick_period);
}
local_irq_enable();
-
- printk(KERN_INFO "Switched to NOHz mode on CPU #%d\n", smp_processor_id());
}
/*
@@ -787,10 +814,8 @@ void tick_setup_sched_timer(void)
}
#ifdef CONFIG_NO_HZ
- if (tick_nohz_enabled) {
+ if (tick_nohz_enabled)
ts->nohz_mode = NOHZ_MODE_HIGHRES;
- printk(KERN_INFO "Switched to NOHz mode on CPU #%d\n", smp_processor_id());
- }
#endif
}
#endif /* HIGH_RES_TIMERS */