summaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorBen Blum <bblum@andrew.cmu.edu>2011-05-26 16:25:19 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2011-05-26 17:12:34 -0700
commitf780bdb7c1c73009cb57adcf99ef50027d80bf3c (patch)
treed15668ffcc40a2aaa31723b87cfda0b166f84d57 /kernel
parent4714d1d32d97239fb5ae3e10521d3f133a899b66 (diff)
cgroups: add per-thread subsystem callbacks
Add cgroup subsystem callbacks for per-thread attachment in atomic contexts Add can_attach_task(), pre_attach(), and attach_task() as new callbacks for cgroups's subsystem interface. Unlike can_attach and attach, these are for per-thread operations, to be called potentially many times when attaching an entire threadgroup. Also, the old "bool threadgroup" interface is removed, as replaced by this. All subsystems are modified for the new interface - of note is cpuset, which requires from/to nodemasks for attach to be globally scoped (though per-cpuset would work too) to persist from its pre_attach to attach_task and attach. This is a pre-patch for cgroup-procs-writable.patch. Signed-off-by: Ben Blum <bblum@andrew.cmu.edu> Cc: "Eric W. Biederman" <ebiederm@xmission.com> Cc: Li Zefan <lizf@cn.fujitsu.com> Cc: Matt Helsley <matthltc@us.ibm.com> Reviewed-by: Paul Menage <menage@google.com> Cc: Oleg Nesterov <oleg@redhat.com> Cc: David Rientjes <rientjes@google.com> Cc: Miao Xie <miaox@cn.fujitsu.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/cgroup.c17
-rw-r--r--kernel/cgroup_freezer.c26
-rw-r--r--kernel/cpuset.c96
-rw-r--r--kernel/sched.c38
4 files changed, 73 insertions, 104 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 909a35510af..38fb0ad1cb4 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1759,7 +1759,7 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
for_each_subsys(root, ss) {
if (ss->can_attach) {
- retval = ss->can_attach(ss, cgrp, tsk, false);
+ retval = ss->can_attach(ss, cgrp, tsk);
if (retval) {
/*
* Remember on which subsystem the can_attach()
@@ -1771,6 +1771,13 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
goto out;
}
}
+ if (ss->can_attach_task) {
+ retval = ss->can_attach_task(cgrp, tsk);
+ if (retval) {
+ failed_ss = ss;
+ goto out;
+ }
+ }
}
task_lock(tsk);
@@ -1805,8 +1812,12 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
write_unlock(&css_set_lock);
for_each_subsys(root, ss) {
+ if (ss->pre_attach)
+ ss->pre_attach(cgrp);
+ if (ss->attach_task)
+ ss->attach_task(cgrp, tsk);
if (ss->attach)
- ss->attach(ss, cgrp, oldcgrp, tsk, false);
+ ss->attach(ss, cgrp, oldcgrp, tsk);
}
set_bit(CGRP_RELEASABLE, &oldcgrp->flags);
synchronize_rcu();
@@ -1829,7 +1840,7 @@ out:
*/
break;
if (ss->cancel_attach)
- ss->cancel_attach(ss, cgrp, tsk, false);
+ ss->cancel_attach(ss, cgrp, tsk);
}
}
return retval;
diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c
index e7bebb7c6c3..e691818d7e4 100644
--- a/kernel/cgroup_freezer.c
+++ b/kernel/cgroup_freezer.c
@@ -160,7 +160,7 @@ static void freezer_destroy(struct cgroup_subsys *ss,
*/
static int freezer_can_attach(struct cgroup_subsys *ss,
struct cgroup *new_cgroup,
- struct task_struct *task, bool threadgroup)
+ struct task_struct *task)
{
struct freezer *freezer;
@@ -172,26 +172,17 @@ static int freezer_can_attach(struct cgroup_subsys *ss,
if (freezer->state != CGROUP_THAWED)
return -EBUSY;
+ return 0;
+}
+
+static int freezer_can_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
+{
rcu_read_lock();
- if (__cgroup_freezing_or_frozen(task)) {
+ if (__cgroup_freezing_or_frozen(tsk)) {
rcu_read_unlock();
return -EBUSY;
}
rcu_read_unlock();
-
- if (threadgroup) {
- struct task_struct *c;
-
- rcu_read_lock();
- list_for_each_entry_rcu(c, &task->thread_group, thread_group) {
- if (__cgroup_freezing_or_frozen(c)) {
- rcu_read_unlock();
- return -EBUSY;
- }
- }
- rcu_read_unlock();
- }
-
return 0;
}
@@ -390,6 +381,9 @@ struct cgroup_subsys freezer_subsys = {
.populate = freezer_populate,
.subsys_id = freezer_subsys_id,
.can_attach = freezer_can_attach,
+ .can_attach_task = freezer_can_attach_task,
+ .pre_attach = NULL,
+ .attach_task = NULL,
.attach = NULL,
.fork = freezer_fork,
.exit = NULL,
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 2bb8c2e98ff..55b297d78ad 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -1367,14 +1367,10 @@ static int fmeter_getrate(struct fmeter *fmp)
return val;
}
-/* Protected by cgroup_lock */
-static cpumask_var_t cpus_attach;
-
/* Called by cgroups to determine if a cpuset is usable; cgroup_mutex held */
static int cpuset_can_attach(struct cgroup_subsys *ss, struct cgroup *cont,
- struct task_struct *tsk, bool threadgroup)
+ struct task_struct *tsk)
{
- int ret;
struct cpuset *cs = cgroup_cs(cont);
if (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed))
@@ -1391,29 +1387,42 @@ static int cpuset_can_attach(struct cgroup_subsys *ss, struct cgroup *cont,
if (tsk->flags & PF_THREAD_BOUND)
return -EINVAL;
- ret = security_task_setscheduler(tsk);
- if (ret)
- return ret;
- if (threadgroup) {
- struct task_struct *c;
-
- rcu_read_lock();
- list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) {
- ret = security_task_setscheduler(c);
- if (ret) {
- rcu_read_unlock();
- return ret;
- }
- }
- rcu_read_unlock();
- }
return 0;
}
-static void cpuset_attach_task(struct task_struct *tsk, nodemask_t *to,
- struct cpuset *cs)
+static int cpuset_can_attach_task(struct cgroup *cgrp, struct task_struct *task)
+{
+ return security_task_setscheduler(task);
+}
+
+/*
+ * Protected by cgroup_lock. The nodemasks must be stored globally because
+ * dynamically allocating them is not allowed in pre_attach, and they must
+ * persist among pre_attach, attach_task, and attach.
+ */
+static cpumask_var_t cpus_attach;
+static nodemask_t cpuset_attach_nodemask_from;
+static nodemask_t cpuset_attach_nodemask_to;
+
+/* Set-up work for before attaching each task. */
+static void cpuset_pre_attach(struct cgroup *cont)
+{
+ struct cpuset *cs = cgroup_cs(cont);
+
+ if (cs == &top_cpuset)
+ cpumask_copy(cpus_attach, cpu_possible_mask);
+ else
+ guarantee_online_cpus(cs, cpus_attach);
+
+ guarantee_online_mems(cs, &cpuset_attach_nodemask_to);
+}
+
+/* Per-thread attachment work. */
+static void cpuset_attach_task(struct cgroup *cont, struct task_struct *tsk)
{
int err;
+ struct cpuset *cs = cgroup_cs(cont);
+
/*
* can_attach beforehand should guarantee that this doesn't fail.
* TODO: have a better way to handle failure here
@@ -1421,45 +1430,29 @@ static void cpuset_attach_task(struct task_struct *tsk, nodemask_t *to,
err = set_cpus_allowed_ptr(tsk, cpus_attach);
WARN_ON_ONCE(err);
- cpuset_change_task_nodemask(tsk, to);
+ cpuset_change_task_nodemask(tsk, &cpuset_attach_nodemask_to);
cpuset_update_task_spread_flag(cs, tsk);
-
}
static void cpuset_attach(struct cgroup_subsys *ss, struct cgroup *cont,
- struct cgroup *oldcont, struct task_struct *tsk,
- bool threadgroup)
+ struct cgroup *oldcont, struct task_struct *tsk)
{
struct mm_struct *mm;
struct cpuset *cs = cgroup_cs(cont);
struct cpuset *oldcs = cgroup_cs(oldcont);
- static nodemask_t to; /* protected by cgroup_mutex */
- if (cs == &top_cpuset) {
- cpumask_copy(cpus_attach, cpu_possible_mask);
- } else {
- guarantee_online_cpus(cs, cpus_attach);
- }
- guarantee_online_mems(cs, &to);
-
- /* do per-task migration stuff possibly for each in the threadgroup */
- cpuset_attach_task(tsk, &to, cs);
- if (threadgroup) {
- struct task_struct *c;
- rcu_read_lock();
- list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) {
- cpuset_attach_task(c, &to, cs);
- }
- rcu_read_unlock();
- }
-
- /* change mm; only needs to be done once even if threadgroup */
- to = cs->mems_allowed;
+ /*
+ * Change mm, possibly for multiple threads in a threadgroup. This is
+ * expensive and may sleep.
+ */
+ cpuset_attach_nodemask_from = oldcs->mems_allowed;
+ cpuset_attach_nodemask_to = cs->mems_allowed;
mm = get_task_mm(tsk);
if (mm) {
- mpol_rebind_mm(mm, &to);
+ mpol_rebind_mm(mm, &cpuset_attach_nodemask_to);
if (is_memory_migrate(cs))
- cpuset_migrate_mm(mm, &oldcs->mems_allowed, &to);
+ cpuset_migrate_mm(mm, &cpuset_attach_nodemask_from,
+ &cpuset_attach_nodemask_to);
mmput(mm);
}
}
@@ -1911,6 +1904,9 @@ struct cgroup_subsys cpuset_subsys = {
.create = cpuset_create,
.destroy = cpuset_destroy,
.can_attach = cpuset_can_attach,
+ .can_attach_task = cpuset_can_attach_task,
+ .pre_attach = cpuset_pre_attach,
+ .attach_task = cpuset_attach_task,
.attach = cpuset_attach,
.populate = cpuset_populate,
.post_clone = cpuset_post_clone,
diff --git a/kernel/sched.c b/kernel/sched.c
index 2d12893b8b0..5e43e9dc65d 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -8764,42 +8764,10 @@ cpu_cgroup_can_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
return 0;
}
-static int
-cpu_cgroup_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
- struct task_struct *tsk, bool threadgroup)
-{
- int retval = cpu_cgroup_can_attach_task(cgrp, tsk);
- if (retval)
- return retval;
- if (threadgroup) {
- struct task_struct *c;
- rcu_read_lock();
- list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) {
- retval = cpu_cgroup_can_attach_task(cgrp, c);
- if (retval) {
- rcu_read_unlock();
- return retval;
- }
- }
- rcu_read_unlock();
- }
- return 0;
-}
-
static void
-cpu_cgroup_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
- struct cgroup *old_cont, struct task_struct *tsk,
- bool threadgroup)
+cpu_cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
{
sched_move_task(tsk);
- if (threadgroup) {
- struct task_struct *c;
- rcu_read_lock();
- list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) {
- sched_move_task(c);
- }
- rcu_read_unlock();
- }
}
static void
@@ -8887,8 +8855,8 @@ struct cgroup_subsys cpu_cgroup_subsys = {
.name = "cpu",
.create = cpu_cgroup_create,
.destroy = cpu_cgroup_destroy,
- .can_attach = cpu_cgroup_can_attach,
- .attach = cpu_cgroup_attach,
+ .can_attach_task = cpu_cgroup_can_attach_task,
+ .attach_task = cpu_cgroup_attach_task,
.exit = cpu_cgroup_exit,
.populate = cpu_cgroup_populate,
.subsys_id = cpu_cgroup_subsys_id,