summaryrefslogtreecommitdiffstats
path: root/kernel/sched/core.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched/core.c')
-rw-r--r--kernel/sched/core.c149
1 files changed, 117 insertions, 32 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 2d8927fda71..26058d0bebb 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -72,6 +72,7 @@
#include <linux/slab.h>
#include <linux/init_task.h>
#include <linux/binfmts.h>
+#include <linux/context_tracking.h>
#include <asm/switch_to.h>
#include <asm/tlb.h>
@@ -192,23 +193,10 @@ static void sched_feat_disable(int i) { };
static void sched_feat_enable(int i) { };
#endif /* HAVE_JUMP_LABEL */
-static ssize_t
-sched_feat_write(struct file *filp, const char __user *ubuf,
- size_t cnt, loff_t *ppos)
+static int sched_feat_set(char *cmp)
{
- char buf[64];
- char *cmp;
- int neg = 0;
int i;
-
- if (cnt > 63)
- cnt = 63;
-
- if (copy_from_user(&buf, ubuf, cnt))
- return -EFAULT;
-
- buf[cnt] = 0;
- cmp = strstrip(buf);
+ int neg = 0;
if (strncmp(cmp, "NO_", 3) == 0) {
neg = 1;
@@ -228,6 +216,27 @@ sched_feat_write(struct file *filp, const char __user *ubuf,
}
}
+ return i;
+}
+
+static ssize_t
+sched_feat_write(struct file *filp, const char __user *ubuf,
+ size_t cnt, loff_t *ppos)
+{
+ char buf[64];
+ char *cmp;
+ int i;
+
+ if (cnt > 63)
+ cnt = 63;
+
+ if (copy_from_user(&buf, ubuf, cnt))
+ return -EFAULT;
+
+ buf[cnt] = 0;
+ cmp = strstrip(buf);
+
+ i = sched_feat_set(cmp);
if (i == __SCHED_FEAT_NR)
return -EINVAL;
@@ -922,6 +931,13 @@ void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
rq->skip_clock_update = 1;
}
+static ATOMIC_NOTIFIER_HEAD(task_migration_notifier);
+
+void register_task_migration_notifier(struct notifier_block *n)
+{
+ atomic_notifier_chain_register(&task_migration_notifier, n);
+}
+
#ifdef CONFIG_SMP
void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
{
@@ -952,8 +968,18 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
trace_sched_migrate_task(p, new_cpu);
if (task_cpu(p) != new_cpu) {
+ struct task_migration_notifier tmn;
+
+ if (p->sched_class->migrate_task_rq)
+ p->sched_class->migrate_task_rq(p, new_cpu);
p->se.nr_migrations++;
perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS, 1, NULL, 0);
+
+ tmn.task = p;
+ tmn.from_cpu = task_cpu(p);
+ tmn.to_cpu = new_cpu;
+
+ atomic_notifier_call_chain(&task_migration_notifier, 0, &tmn);
}
__set_task_cpu(p, new_cpu);
@@ -1497,7 +1523,8 @@ out:
*/
int wake_up_process(struct task_struct *p)
{
- return try_to_wake_up(p, TASK_ALL, 0);
+ WARN_ON(task_is_stopped_or_traced(p));
+ return try_to_wake_up(p, TASK_NORMAL, 0);
}
EXPORT_SYMBOL(wake_up_process);
@@ -1524,6 +1551,15 @@ static void __sched_fork(struct task_struct *p)
p->se.vruntime = 0;
INIT_LIST_HEAD(&p->se.group_node);
+/*
+ * Load-tracking only depends on SMP, FAIR_GROUP_SCHED dependency below may be
+ * removed when useful for applications beyond shares distribution (e.g.
+ * load-balance).
+ */
+#if defined(CONFIG_SMP) && defined(CONFIG_FAIR_GROUP_SCHED)
+ p->se.avg.runnable_avg_period = 0;
+ p->se.avg.runnable_avg_sum = 0;
+#endif
#ifdef CONFIG_SCHEDSTATS
memset(&p->se.statistics, 0, sizeof(p->se.statistics));
#endif
@@ -1533,7 +1569,40 @@ static void __sched_fork(struct task_struct *p)
#ifdef CONFIG_PREEMPT_NOTIFIERS
INIT_HLIST_HEAD(&p->preempt_notifiers);
#endif
+
+#ifdef CONFIG_NUMA_BALANCING
+ if (p->mm && atomic_read(&p->mm->mm_users) == 1) {
+ p->mm->numa_next_scan = jiffies;
+ p->mm->numa_next_reset = jiffies;
+ p->mm->numa_scan_seq = 0;
+ }
+
+ p->node_stamp = 0ULL;
+ p->numa_scan_seq = p->mm ? p->mm->numa_scan_seq : 0;
+ p->numa_migrate_seq = p->mm ? p->mm->numa_scan_seq - 1 : 0;
+ p->numa_scan_period = sysctl_numa_balancing_scan_delay;
+ p->numa_work.next = &p->numa_work;
+#endif /* CONFIG_NUMA_BALANCING */
+}
+
+#ifdef CONFIG_NUMA_BALANCING
+#ifdef CONFIG_SCHED_DEBUG
+void set_numabalancing_state(bool enabled)
+{
+ if (enabled)
+ sched_feat_set("NUMA");
+ else
+ sched_feat_set("NO_NUMA");
}
+#else
+__read_mostly bool numabalancing_enabled;
+
+void set_numabalancing_state(bool enabled)
+{
+ numabalancing_enabled = enabled;
+}
+#endif /* CONFIG_SCHED_DEBUG */
+#endif /* CONFIG_NUMA_BALANCING */
/*
* fork()/clone()-time setup:
@@ -1886,8 +1955,8 @@ context_switch(struct rq *rq, struct task_struct *prev,
spin_release(&rq->lock.dep_map, 1, _THIS_IP_);
#endif
+ context_tracking_task_switch(prev, next);
/* Here we just switch the register state and the stack. */
- rcu_switch(prev, next);
switch_to(prev, next, prev);
barrier();
@@ -2911,7 +2980,7 @@ asmlinkage void __sched schedule(void)
}
EXPORT_SYMBOL(schedule);
-#ifdef CONFIG_RCU_USER_QS
+#ifdef CONFIG_CONTEXT_TRACKING
asmlinkage void __sched schedule_user(void)
{
/*
@@ -2920,9 +2989,9 @@ asmlinkage void __sched schedule_user(void)
* we haven't yet exited the RCU idle mode. Do it here manually until
* we find a better solution.
*/
- rcu_user_exit();
+ user_exit();
schedule();
- rcu_user_enter();
+ user_enter();
}
#endif
@@ -3027,7 +3096,7 @@ asmlinkage void __sched preempt_schedule_irq(void)
/* Catch callers which need to be fixed */
BUG_ON(ti->preempt_count || !irqs_disabled());
- rcu_user_exit();
+ user_exit();
do {
add_preempt_count(PREEMPT_ACTIVE);
local_irq_enable();
@@ -4029,8 +4098,14 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
goto out_free_cpus_allowed;
}
retval = -EPERM;
- if (!check_same_owner(p) && !ns_capable(task_user_ns(p), CAP_SYS_NICE))
- goto out_unlock;
+ if (!check_same_owner(p)) {
+ rcu_read_lock();
+ if (!ns_capable(__task_cred(p)->user_ns, CAP_SYS_NICE)) {
+ rcu_read_unlock();
+ goto out_unlock;
+ }
+ rcu_read_unlock();
+ }
retval = security_task_setscheduler(p);
if (retval)
@@ -4474,6 +4549,7 @@ static const char stat_nam[] = TASK_STATE_TO_CHAR_STR;
void sched_show_task(struct task_struct *p)
{
unsigned long free = 0;
+ int ppid;
unsigned state;
state = p->state ? __ffs(p->state) + 1 : 0;
@@ -4493,8 +4569,11 @@ void sched_show_task(struct task_struct *p)
#ifdef CONFIG_DEBUG_STACK_USAGE
free = stack_not_used(p);
#endif
+ rcu_read_lock();
+ ppid = task_pid_nr(rcu_dereference(p->real_parent));
+ rcu_read_unlock();
printk(KERN_CONT "%5lu %5d %6d 0x%08lx\n", free,
- task_pid_nr(p), task_pid_nr(rcu_dereference(p->real_parent)),
+ task_pid_nr(p), ppid,
(unsigned long)task_thread_info(p)->flags);
show_stack(p, NULL);
@@ -7468,7 +7547,7 @@ static inline struct task_group *cgroup_tg(struct cgroup *cgrp)
struct task_group, css);
}
-static struct cgroup_subsys_state *cpu_cgroup_create(struct cgroup *cgrp)
+static struct cgroup_subsys_state *cpu_cgroup_css_alloc(struct cgroup *cgrp)
{
struct task_group *tg, *parent;
@@ -7485,7 +7564,7 @@ static struct cgroup_subsys_state *cpu_cgroup_create(struct cgroup *cgrp)
return &tg->css;
}
-static void cpu_cgroup_destroy(struct cgroup *cgrp)
+static void cpu_cgroup_css_free(struct cgroup *cgrp)
{
struct task_group *tg = cgroup_tg(cgrp);
@@ -7845,8 +7924,8 @@ static struct cftype cpu_files[] = {
struct cgroup_subsys cpu_cgroup_subsys = {
.name = "cpu",
- .create = cpu_cgroup_create,
- .destroy = cpu_cgroup_destroy,
+ .css_alloc = cpu_cgroup_css_alloc,
+ .css_free = cpu_cgroup_css_free,
.can_attach = cpu_cgroup_can_attach,
.attach = cpu_cgroup_attach,
.exit = cpu_cgroup_exit,
@@ -7869,7 +7948,7 @@ struct cgroup_subsys cpu_cgroup_subsys = {
struct cpuacct root_cpuacct;
/* create a new cpu accounting group */
-static struct cgroup_subsys_state *cpuacct_create(struct cgroup *cgrp)
+static struct cgroup_subsys_state *cpuacct_css_alloc(struct cgroup *cgrp)
{
struct cpuacct *ca;
@@ -7899,7 +7978,7 @@ out:
}
/* destroy an existing cpu accounting group */
-static void cpuacct_destroy(struct cgroup *cgrp)
+static void cpuacct_css_free(struct cgroup *cgrp)
{
struct cpuacct *ca = cgroup_ca(cgrp);
@@ -8070,9 +8149,15 @@ void cpuacct_charge(struct task_struct *tsk, u64 cputime)
struct cgroup_subsys cpuacct_subsys = {
.name = "cpuacct",
- .create = cpuacct_create,
- .destroy = cpuacct_destroy,
+ .css_alloc = cpuacct_css_alloc,
+ .css_free = cpuacct_css_free,
.subsys_id = cpuacct_subsys_id,
.base_cftypes = files,
};
#endif /* CONFIG_CGROUP_CPUACCT */
+
+void dump_cpu_task(int cpu)
+{
+ pr_info("Task dump for CPU %d:\n", cpu);
+ sched_show_task(cpu_curr(cpu));
+}