diff options
Diffstat (limited to 'kernel/cpuset.c')
-rw-r--r-- | kernel/cpuset.c | 134 |
1 files changed, 72 insertions, 62 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 9fe58c46a42..a09ac2b9a66 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -123,6 +123,19 @@ static inline struct cpuset *task_cs(struct task_struct *task) struct cpuset, css); } +#ifdef CONFIG_NUMA +static inline bool task_has_mempolicy(struct task_struct *task) +{ + return task->mempolicy; +} +#else +static inline bool task_has_mempolicy(struct task_struct *task) +{ + return false; +} +#endif + + /* bits in struct cpuset flags field */ typedef enum { CS_CPU_EXCLUSIVE, @@ -949,7 +962,7 @@ static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from, static void cpuset_change_task_nodemask(struct task_struct *tsk, nodemask_t *newmems) { - bool masks_disjoint = !nodes_intersects(*newmems, tsk->mems_allowed); + bool need_loop; repeat: /* @@ -962,6 +975,14 @@ repeat: return; task_lock(tsk); + /* + * Determine if a loop is necessary if another thread is doing + * get_mems_allowed(). If at least one node remains unchanged and + * tsk does not have a mempolicy, then an empty nodemask will not be + * possible when mems_allowed is larger than a word. + */ + need_loop = task_has_mempolicy(tsk) || + !nodes_intersects(*newmems, tsk->mems_allowed); nodes_or(tsk->mems_allowed, tsk->mems_allowed, *newmems); mpol_rebind_task(tsk, newmems, MPOL_REBIND_STEP1); @@ -981,11 +1002,9 @@ repeat: /* * Allocation of memory is very fast, we needn't sleep when waiting - * for the read-side. No wait is necessary, however, if at least one - * node remains unchanged. + * for the read-side. */ - while (masks_disjoint && - ACCESS_ONCE(tsk->mems_allowed_change_disable)) { + while (need_loop && ACCESS_ONCE(tsk->mems_allowed_change_disable)) { task_unlock(tsk); if (!task_curr(tsk)) yield(); @@ -1370,79 +1389,73 @@ static int fmeter_getrate(struct fmeter *fmp) return val; } -/* Called by cgroups to determine if a cpuset is usable; cgroup_mutex held */ -static int cpuset_can_attach(struct cgroup_subsys *ss, struct cgroup *cont, - struct task_struct *tsk) -{ - struct cpuset *cs = cgroup_cs(cont); - - if (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed)) - return -ENOSPC; - - /* - * Kthreads bound to specific cpus cannot be moved to a new cpuset; we - * cannot change their cpu affinity and isolating such threads by their - * set of allowed nodes is unnecessary. Thus, cpusets are not - * applicable for such threads. This prevents checking for success of - * set_cpus_allowed_ptr() on all attached tasks before cpus_allowed may - * be changed. - */ - if (tsk->flags & PF_THREAD_BOUND) - return -EINVAL; - - return 0; -} - -static int cpuset_can_attach_task(struct cgroup *cgrp, struct task_struct *task) -{ - return security_task_setscheduler(task); -} - /* * Protected by cgroup_lock. The nodemasks must be stored globally because - * dynamically allocating them is not allowed in pre_attach, and they must - * persist among pre_attach, attach_task, and attach. + * dynamically allocating them is not allowed in can_attach, and they must + * persist until attach. */ static cpumask_var_t cpus_attach; static nodemask_t cpuset_attach_nodemask_from; static nodemask_t cpuset_attach_nodemask_to; -/* Set-up work for before attaching each task. */ -static void cpuset_pre_attach(struct cgroup *cont) +/* Called by cgroups to determine if a cpuset is usable; cgroup_mutex held */ +static int cpuset_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, + struct cgroup_taskset *tset) { - struct cpuset *cs = cgroup_cs(cont); + struct cpuset *cs = cgroup_cs(cgrp); + struct task_struct *task; + int ret; + if (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed)) + return -ENOSPC; + + cgroup_taskset_for_each(task, cgrp, tset) { + /* + * Kthreads bound to specific cpus cannot be moved to a new + * cpuset; we cannot change their cpu affinity and + * isolating such threads by their set of allowed nodes is + * unnecessary. Thus, cpusets are not applicable for such + * threads. This prevents checking for success of + * set_cpus_allowed_ptr() on all attached tasks before + * cpus_allowed may be changed. + */ + if (task->flags & PF_THREAD_BOUND) + return -EINVAL; + if ((ret = security_task_setscheduler(task))) + return ret; + } + + /* prepare for attach */ if (cs == &top_cpuset) cpumask_copy(cpus_attach, cpu_possible_mask); else guarantee_online_cpus(cs, cpus_attach); guarantee_online_mems(cs, &cpuset_attach_nodemask_to); -} - -/* Per-thread attachment work. */ -static void cpuset_attach_task(struct cgroup *cont, struct task_struct *tsk) -{ - int err; - struct cpuset *cs = cgroup_cs(cont); - /* - * can_attach beforehand should guarantee that this doesn't fail. - * TODO: have a better way to handle failure here - */ - err = set_cpus_allowed_ptr(tsk, cpus_attach); - WARN_ON_ONCE(err); - - cpuset_change_task_nodemask(tsk, &cpuset_attach_nodemask_to); - cpuset_update_task_spread_flag(cs, tsk); + return 0; } -static void cpuset_attach(struct cgroup_subsys *ss, struct cgroup *cont, - struct cgroup *oldcont, struct task_struct *tsk) +static void cpuset_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, + struct cgroup_taskset *tset) { struct mm_struct *mm; - struct cpuset *cs = cgroup_cs(cont); - struct cpuset *oldcs = cgroup_cs(oldcont); + struct task_struct *task; + struct task_struct *leader = cgroup_taskset_first(tset); + struct cgroup *oldcgrp = cgroup_taskset_cur_cgroup(tset); + struct cpuset *cs = cgroup_cs(cgrp); + struct cpuset *oldcs = cgroup_cs(oldcgrp); + + cgroup_taskset_for_each(task, cgrp, tset) { + /* + * can_attach beforehand should guarantee that this doesn't + * fail. TODO: have a better way to handle failure here + */ + WARN_ON_ONCE(set_cpus_allowed_ptr(task, cpus_attach)); + + cpuset_change_task_nodemask(task, &cpuset_attach_nodemask_to); + cpuset_update_task_spread_flag(cs, task); + } /* * Change mm, possibly for multiple threads in a threadgroup. This is @@ -1450,7 +1463,7 @@ static void cpuset_attach(struct cgroup_subsys *ss, struct cgroup *cont, */ cpuset_attach_nodemask_from = oldcs->mems_allowed; cpuset_attach_nodemask_to = cs->mems_allowed; - mm = get_task_mm(tsk); + mm = get_task_mm(leader); if (mm) { mpol_rebind_mm(mm, &cpuset_attach_nodemask_to); if (is_memory_migrate(cs)) @@ -1906,9 +1919,6 @@ struct cgroup_subsys cpuset_subsys = { .create = cpuset_create, .destroy = cpuset_destroy, .can_attach = cpuset_can_attach, - .can_attach_task = cpuset_can_attach_task, - .pre_attach = cpuset_pre_attach, - .attach_task = cpuset_attach_task, .attach = cpuset_attach, .populate = cpuset_populate, .post_clone = cpuset_post_clone, |