summaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/futex.c1
-rw-r--r--kernel/hrtimer.c110
-rw-r--r--kernel/sched.c43
-rw-r--r--kernel/sched_fair.c3
-rw-r--r--kernel/sched_rt.c18
-rw-r--r--kernel/time/clockevents.c11
-rw-r--r--kernel/trace/ftrace.c5
-rw-r--r--kernel/trace/trace_functions.c2
8 files changed, 120 insertions, 73 deletions
diff --git a/kernel/futex.c b/kernel/futex.c
index 794c862125f..0672ff88f15 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -247,6 +247,7 @@ again:
if (err < 0)
return err;
+ page = compound_head(page);
lock_page(page);
if (!page->mapping) {
unlock_page(page);
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 9002958a96e..49da79ab848 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -191,6 +191,46 @@ struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer,
}
}
+
+/*
+ * Get the preferred target CPU for NOHZ
+ */
+static int hrtimer_get_target(int this_cpu, int pinned)
+{
+#ifdef CONFIG_NO_HZ
+ if (!pinned && get_sysctl_timer_migration() && idle_cpu(this_cpu)) {
+ int preferred_cpu = get_nohz_load_balancer();
+
+ if (preferred_cpu >= 0)
+ return preferred_cpu;
+ }
+#endif
+ return this_cpu;
+}
+
+/*
+ * With HIGHRES=y we do not migrate the timer when it is expiring
+ * before the next event on the target cpu because we cannot reprogram
+ * the target cpu hardware and we would cause it to fire late.
+ *
+ * Called with cpu_base->lock of target cpu held.
+ */
+static int
+hrtimer_check_target(struct hrtimer *timer, struct hrtimer_clock_base *new_base)
+{
+#ifdef CONFIG_HIGH_RES_TIMERS
+ ktime_t expires;
+
+ if (!new_base->cpu_base->hres_active)
+ return 0;
+
+ expires = ktime_sub(hrtimer_get_expires(timer), new_base->offset);
+ return expires.tv64 <= new_base->cpu_base->expires_next.tv64;
+#else
+ return 0;
+#endif
+}
+
/*
* Switch the timer base to the current CPU when possible.
*/
@@ -200,16 +240,8 @@ switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base,
{
struct hrtimer_clock_base *new_base;
struct hrtimer_cpu_base *new_cpu_base;
- int cpu, preferred_cpu = -1;
-
- cpu = smp_processor_id();
-#if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP)
- if (!pinned && get_sysctl_timer_migration() && idle_cpu(cpu)) {
- preferred_cpu = get_nohz_load_balancer();
- if (preferred_cpu >= 0)
- cpu = preferred_cpu;
- }
-#endif
+ int this_cpu = smp_processor_id();
+ int cpu = hrtimer_get_target(this_cpu, pinned);
again:
new_cpu_base = &per_cpu(hrtimer_bases, cpu);
@@ -217,7 +249,7 @@ again:
if (base != new_base) {
/*
- * We are trying to schedule the timer on the local CPU.
+ * We are trying to move timer to new_base.
* However we can't change timer's base while it is running,
* so we keep it on the same CPU. No hassle vs. reprogramming
* the event source in the high resolution case. The softirq
@@ -233,38 +265,12 @@ again:
spin_unlock(&base->cpu_base->lock);
spin_lock(&new_base->cpu_base->lock);
- /* Optimized away for NOHZ=n SMP=n */
- if (cpu == preferred_cpu) {
- /* Calculate clock monotonic expiry time */
-#ifdef CONFIG_HIGH_RES_TIMERS
- ktime_t expires = ktime_sub(hrtimer_get_expires(timer),
- new_base->offset);
-#else
- ktime_t expires = hrtimer_get_expires(timer);
-#endif
-
- /*
- * Get the next event on target cpu from the
- * clock events layer.
- * This covers the highres=off nohz=on case as well.
- */
- ktime_t next = clockevents_get_next_event(cpu);
-
- ktime_t delta = ktime_sub(expires, next);
-
- /*
- * We do not migrate the timer when it is expiring
- * before the next event on the target cpu because
- * we cannot reprogram the target cpu hardware and
- * we would cause it to fire late.
- */
- if (delta.tv64 < 0) {
- cpu = smp_processor_id();
- spin_unlock(&new_base->cpu_base->lock);
- spin_lock(&base->cpu_base->lock);
- timer->base = base;
- goto again;
- }
+ if (cpu != this_cpu && hrtimer_check_target(timer, new_base)) {
+ cpu = this_cpu;
+ spin_unlock(&new_base->cpu_base->lock);
+ spin_lock(&base->cpu_base->lock);
+ timer->base = base;
+ goto again;
}
timer->base = new_base;
}
@@ -1276,14 +1282,22 @@ void hrtimer_interrupt(struct clock_event_device *dev)
expires_next.tv64 = KTIME_MAX;
+ spin_lock(&cpu_base->lock);
+ /*
+ * We set expires_next to KTIME_MAX here with cpu_base->lock
+ * held to prevent that a timer is enqueued in our queue via
+ * the migration code. This does not affect enqueueing of
+ * timers which run their callback and need to be requeued on
+ * this CPU.
+ */
+ cpu_base->expires_next.tv64 = KTIME_MAX;
+
base = cpu_base->clock_base;
for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
ktime_t basenow;
struct rb_node *node;
- spin_lock(&cpu_base->lock);
-
basenow = ktime_add(now, base->offset);
while ((node = base->first)) {
@@ -1316,11 +1330,15 @@ void hrtimer_interrupt(struct clock_event_device *dev)
__run_hrtimer(timer);
}
- spin_unlock(&cpu_base->lock);
base++;
}
+ /*
+ * Store the new expiry value so the migration code can verify
+ * against it.
+ */
cpu_base->expires_next = expires_next;
+ spin_unlock(&cpu_base->lock);
/* Reprogramming necessary ? */
if (expires_next.tv64 != KTIME_MAX) {
diff --git a/kernel/sched.c b/kernel/sched.c
index 01f55ada359..98972d366fd 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -493,6 +493,7 @@ struct rt_rq {
#endif
#ifdef CONFIG_SMP
unsigned long rt_nr_migratory;
+ unsigned long rt_nr_total;
int overloaded;
struct plist_head pushable_tasks;
#endif
@@ -2571,15 +2572,37 @@ static void __sched_fork(struct task_struct *p)
p->se.avg_wakeup = sysctl_sched_wakeup_granularity;
#ifdef CONFIG_SCHEDSTATS
- p->se.wait_start = 0;
- p->se.sum_sleep_runtime = 0;
- p->se.sleep_start = 0;
- p->se.block_start = 0;
- p->se.sleep_max = 0;
- p->se.block_max = 0;
- p->se.exec_max = 0;
- p->se.slice_max = 0;
- p->se.wait_max = 0;
+ p->se.wait_start = 0;
+ p->se.wait_max = 0;
+ p->se.wait_count = 0;
+ p->se.wait_sum = 0;
+
+ p->se.sleep_start = 0;
+ p->se.sleep_max = 0;
+ p->se.sum_sleep_runtime = 0;
+
+ p->se.block_start = 0;
+ p->se.block_max = 0;
+ p->se.exec_max = 0;
+ p->se.slice_max = 0;
+
+ p->se.nr_migrations_cold = 0;
+ p->se.nr_failed_migrations_affine = 0;
+ p->se.nr_failed_migrations_running = 0;
+ p->se.nr_failed_migrations_hot = 0;
+ p->se.nr_forced_migrations = 0;
+ p->se.nr_forced2_migrations = 0;
+
+ p->se.nr_wakeups = 0;
+ p->se.nr_wakeups_sync = 0;
+ p->se.nr_wakeups_migrate = 0;
+ p->se.nr_wakeups_local = 0;
+ p->se.nr_wakeups_remote = 0;
+ p->se.nr_wakeups_affine = 0;
+ p->se.nr_wakeups_affine_attempts = 0;
+ p->se.nr_wakeups_passive = 0;
+ p->se.nr_wakeups_idle = 0;
+
#endif
INIT_LIST_HEAD(&p->rt.run_list);
@@ -9074,7 +9097,7 @@ static void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq)
#ifdef CONFIG_SMP
rt_rq->rt_nr_migratory = 0;
rt_rq->overloaded = 0;
- plist_head_init(&rq->rt.pushable_tasks, &rq->lock);
+ plist_head_init(&rt_rq->pushable_tasks, &rq->lock);
#endif
rt_rq->rt_time = 0;
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index ba7fd6e9556..7c248dc30f4 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -687,7 +687,8 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
* all of which have the same weight.
*/
if (sched_feat(NORMALIZED_SLEEPER) &&
- task_of(se)->policy != SCHED_IDLE)
+ (!entity_is_task(se) ||
+ task_of(se)->policy != SCHED_IDLE))
thresh = calc_delta_fair(thresh, se);
vruntime -= thresh;
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 9bf0d2a7304..3918e01994e 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -10,6 +10,8 @@ static inline struct task_struct *rt_task_of(struct sched_rt_entity *rt_se)
#ifdef CONFIG_RT_GROUP_SCHED
+#define rt_entity_is_task(rt_se) (!(rt_se)->my_q)
+
static inline struct rq *rq_of_rt_rq(struct rt_rq *rt_rq)
{
return rt_rq->rq;
@@ -22,6 +24,8 @@ static inline struct rt_rq *rt_rq_of_se(struct sched_rt_entity *rt_se)
#else /* CONFIG_RT_GROUP_SCHED */
+#define rt_entity_is_task(rt_se) (1)
+
static inline struct rq *rq_of_rt_rq(struct rt_rq *rt_rq)
{
return container_of(rt_rq, struct rq, rt);
@@ -73,7 +77,7 @@ static inline void rt_clear_overload(struct rq *rq)
static void update_rt_migration(struct rt_rq *rt_rq)
{
- if (rt_rq->rt_nr_migratory && (rt_rq->rt_nr_running > 1)) {
+ if (rt_rq->rt_nr_migratory && rt_rq->rt_nr_total > 1) {
if (!rt_rq->overloaded) {
rt_set_overload(rq_of_rt_rq(rt_rq));
rt_rq->overloaded = 1;
@@ -86,6 +90,12 @@ static void update_rt_migration(struct rt_rq *rt_rq)
static void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
{
+ if (!rt_entity_is_task(rt_se))
+ return;
+
+ rt_rq = &rq_of_rt_rq(rt_rq)->rt;
+
+ rt_rq->rt_nr_total++;
if (rt_se->nr_cpus_allowed > 1)
rt_rq->rt_nr_migratory++;
@@ -94,6 +104,12 @@ static void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
static void dec_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
{
+ if (!rt_entity_is_task(rt_se))
+ return;
+
+ rt_rq = &rq_of_rt_rq(rt_rq)->rt;
+
+ rt_rq->rt_nr_total--;
if (rt_se->nr_cpus_allowed > 1)
rt_rq->rt_nr_migratory--;
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index 1ad6dd46111..a6dcd67b041 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -254,15 +254,4 @@ void clockevents_notify(unsigned long reason, void *arg)
spin_unlock(&clockevents_lock);
}
EXPORT_SYMBOL_GPL(clockevents_notify);
-
-ktime_t clockevents_get_next_event(int cpu)
-{
- struct tick_device *td;
- struct clock_event_device *dev;
-
- td = &per_cpu(tick_cpu_device, cpu);
- dev = td->evtdev;
-
- return dev->next_event;
-}
#endif
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index bce9e01a29c..4521c77d1a1 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -768,7 +768,7 @@ static struct tracer_stat function_stats __initdata = {
.stat_show = function_stat_show
};
-static void ftrace_profile_debugfs(struct dentry *d_tracer)
+static __init void ftrace_profile_debugfs(struct dentry *d_tracer)
{
struct ftrace_profile_stat *stat;
struct dentry *entry;
@@ -786,7 +786,6 @@ static void ftrace_profile_debugfs(struct dentry *d_tracer)
* The files created are permanent, if something happens
* we still do not free memory.
*/
- kfree(stat);
WARN(1,
"Could not allocate stat file for cpu %d\n",
cpu);
@@ -813,7 +812,7 @@ static void ftrace_profile_debugfs(struct dentry *d_tracer)
}
#else /* CONFIG_FUNCTION_PROFILER */
-static void ftrace_profile_debugfs(struct dentry *d_tracer)
+static __init void ftrace_profile_debugfs(struct dentry *d_tracer)
{
}
#endif /* CONFIG_FUNCTION_PROFILER */
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index 7402144bff2..75ef000613c 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -363,7 +363,7 @@ ftrace_trace_onoff_callback(char *glob, char *cmd, char *param, int enable)
out_reg:
ret = register_ftrace_function_probe(glob, ops, count);
- return ret;
+ return ret < 0 ? ret : 0;
}
static struct ftrace_func_command ftrace_traceon_cmd = {