From 06d6b3cbdf94bc37732df83e7c25774370411a56 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 5 Jun 2013 17:15:11 +0800 Subject: cpuset: remove redundant check in cpuset_cpus_allowed_fallback() task_cs() will never return NULL. Signed-off-by: Li Zefan Signed-off-by: Tejun Heo --- kernel/cpuset.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 64b3f791bbe..f0c884a0e57 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -2253,8 +2253,7 @@ void cpuset_cpus_allowed_fallback(struct task_struct *tsk) rcu_read_lock(); cs = task_cs(tsk); - if (cs) - do_set_cpus_allowed(tsk, cs->cpus_allowed); + do_set_cpus_allowed(tsk, cs->cpus_allowed); rcu_read_unlock(); /* -- cgit v1.2.3-70-g09d2 From 40df2deb50570b288b7067b111af0aa9ca640e6f Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 5 Jun 2013 17:15:23 +0800 Subject: cpuset: cleanup guarantee_online_{cpus|mems}() - We never pass a NULL @cs to these functions. - The top cpuset always has some online cpus/mems. Signed-off-by: Li Zefan Signed-off-by: Tejun Heo --- kernel/cpuset.c | 29 +++++++---------------------- 1 file changed, 7 insertions(+), 22 deletions(-) (limited to 'kernel') diff --git a/kernel/cpuset.c b/kernel/cpuset.c index f0c884a0e57..d753837cca3 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -304,53 +304,38 @@ static struct file_system_type cpuset_fs_type = { /* * Return in pmask the portion of a cpusets's cpus_allowed that * are online. If none are online, walk up the cpuset hierarchy - * until we find one that does have some online cpus. If we get - * all the way to the top and still haven't found any online cpus, - * return cpu_online_mask. Or if passed a NULL cs from an exit'ing - * task, return cpu_online_mask. + * until we find one that does have some online cpus. The top + * cpuset always has some cpus online. * * One way or another, we guarantee to return some non-empty subset * of cpu_online_mask. * * Call with callback_mutex held. */ - static void guarantee_online_cpus(const struct cpuset *cs, struct cpumask *pmask) { - while (cs && !cpumask_intersects(cs->cpus_allowed, cpu_online_mask)) + while (!cpumask_intersects(cs->cpus_allowed, cpu_online_mask)) cs = parent_cs(cs); - if (cs) - cpumask_and(pmask, cs->cpus_allowed, cpu_online_mask); - else - cpumask_copy(pmask, cpu_online_mask); - BUG_ON(!cpumask_intersects(pmask, cpu_online_mask)); + cpumask_and(pmask, cs->cpus_allowed, cpu_online_mask); } /* * Return in *pmask the portion of a cpusets's mems_allowed that * are online, with memory. If none are online with memory, walk * up the cpuset hierarchy until we find one that does have some - * online mems. If we get all the way to the top and still haven't - * found any online mems, return node_states[N_MEMORY]. + * online mems. The top cpuset always has some mems online. * * One way or another, we guarantee to return some non-empty subset * of node_states[N_MEMORY]. * * Call with callback_mutex held. */ - static void guarantee_online_mems(const struct cpuset *cs, nodemask_t *pmask) { - while (cs && !nodes_intersects(cs->mems_allowed, - node_states[N_MEMORY])) + while (!nodes_intersects(cs->mems_allowed, node_states[N_MEMORY])) cs = parent_cs(cs); - if (cs) - nodes_and(*pmask, cs->mems_allowed, - node_states[N_MEMORY]); - else - *pmask = node_states[N_MEMORY]; - BUG_ON(!nodes_intersects(*pmask, node_states[N_MEMORY])); + nodes_and(*pmask, cs->mems_allowed, node_states[N_MEMORY]); } /* -- cgit v1.2.3-70-g09d2 From 67bd2c59850de20d0ecdc8084cbbfe34e53b6804 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 5 Jun 2013 17:15:35 +0800 Subject: cpuset: remove unnecessary variable in cpuset_attach() We can just use oldcs->mems_allowed. Signed-off-by: Li Zefan Signed-off-by: Tejun Heo --- kernel/cpuset.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/kernel/cpuset.c b/kernel/cpuset.c index d753837cca3..dbef832e5e2 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -1407,8 +1407,7 @@ static cpumask_var_t cpus_attach; static void cpuset_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) { - /* static bufs protected by cpuset_mutex */ - static nodemask_t cpuset_attach_nodemask_from; + /* static buf protected by cpuset_mutex */ static nodemask_t cpuset_attach_nodemask_to; struct mm_struct *mm; struct task_struct *task; @@ -1442,13 +1441,12 @@ static void cpuset_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) * Change mm, possibly for multiple threads in a threadgroup. This is * expensive and may sleep. */ - cpuset_attach_nodemask_from = oldcs->mems_allowed; cpuset_attach_nodemask_to = cs->mems_allowed; mm = get_task_mm(leader); if (mm) { mpol_rebind_mm(mm, &cpuset_attach_nodemask_to); if (is_memory_migrate(cs)) - cpuset_migrate_mm(mm, &cpuset_attach_nodemask_from, + cpuset_migrate_mm(mm, &oldcs->mems_allowed, &cpuset_attach_nodemask_to); mmput(mm); } -- cgit v1.2.3-70-g09d2 From 249cc86db7492dc8de1d2eddebc6bcc4ab2a8e9e Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 5 Jun 2013 17:15:48 +0800 Subject: cpuset: remove cpuset_test_cpumask() The test is done in set_cpus_allowed_ptr(), so it's redundant. Signed-off-by: Li Zefan Signed-off-by: Tejun Heo --- kernel/cpuset.c | 19 +------------------ 1 file changed, 1 insertion(+), 18 deletions(-) (limited to 'kernel') diff --git a/kernel/cpuset.c b/kernel/cpuset.c index dbef832e5e2..51f8e1d5a2a 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -783,23 +783,6 @@ void rebuild_sched_domains(void) mutex_unlock(&cpuset_mutex); } -/** - * cpuset_test_cpumask - test a task's cpus_allowed versus its cpuset's - * @tsk: task to test - * @scan: struct cgroup_scanner contained in its struct cpuset_hotplug_scanner - * - * Call with cpuset_mutex held. May take callback_mutex during call. - * Called for each task in a cgroup by cgroup_scan_tasks(). - * Return nonzero if this tasks's cpus_allowed mask should be changed (in other - * words, if its mask is not equal to its cpuset's mask). - */ -static int cpuset_test_cpumask(struct task_struct *tsk, - struct cgroup_scanner *scan) -{ - return !cpumask_equal(&tsk->cpus_allowed, - (cgroup_cs(scan->cg))->cpus_allowed); -} - /** * cpuset_change_cpumask - make a task's cpus_allowed the same as its cpuset's * @tsk: task to test @@ -835,7 +818,7 @@ static void update_tasks_cpumask(struct cpuset *cs, struct ptr_heap *heap) struct cgroup_scanner scan; scan.cg = cs->css.cgroup; - scan.test_task = cpuset_test_cpumask; + scan.test_task = NULL; scan.process_task = cpuset_change_cpumask; scan.heap = heap; cgroup_scan_tasks(&scan); -- cgit v1.2.3-70-g09d2 From a73456f37b9dbc917398387d0cba926b4455b70f Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 5 Jun 2013 17:15:59 +0800 Subject: cpuset: re-structure update_cpumask() a bit Check if cpus_allowed is to be changed before calling validate_change(). This won't change any behavior, but later it will allow us to do this: # mkdir /cpuset/child # echo $$ > /cpuset/child/tasks /* empty cpuset */ # echo > /cpuset/child/cpuset.cpus /* do nothing, won't fail */ Without this patch, the last operation will fail. Signed-off-by: Li Zefan Signed-off-by: Tejun Heo --- kernel/cpuset.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 51f8e1d5a2a..535dce685ee 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -856,14 +856,15 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs, if (!cpumask_subset(trialcs->cpus_allowed, cpu_active_mask)) return -EINVAL; } - retval = validate_change(cs, trialcs); - if (retval < 0) - return retval; /* Nothing to do if the cpus didn't change */ if (cpumask_equal(cs->cpus_allowed, trialcs->cpus_allowed)) return 0; + retval = validate_change(cs, trialcs); + if (retval < 0) + return retval; + retval = heap_init(&heap, PAGE_SIZE, GFP_KERNEL, NULL); if (retval) return retval; -- cgit v1.2.3-70-g09d2 From e44193d39e8d4d1de5d996fcd37ed75e5c704f10 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Sun, 9 Jun 2013 17:14:22 +0800 Subject: cpuset: let hotplug propagation work wait for task attaching Instead of triggering propagation work in cpuset_attach(), we make hotplug propagation work wait until there's no task attaching in progress. IMO this is more robust. We won't see empty masks in cpuset_attach(). Also it's a preparation for removing propagation work. Without asynchronous propagation we can't call move_tasks_in_empty_cpuset() in cpuset_attach(), because otherwise we'll deadlock on cgroup_mutex. tj: typo fixes. Signed-off-by: Li Zefan Signed-off-by: Tejun Heo --- kernel/cpuset.c | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) (limited to 'kernel') diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 535dce685ee..e902473f76b 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -59,6 +59,7 @@ #include #include #include +#include /* * Tracks how many cpusets are currently defined in system. @@ -275,6 +276,8 @@ static void schedule_cpuset_propagate_hotplug(struct cpuset *cs); static DECLARE_WORK(cpuset_hotplug_work, cpuset_hotplug_workfn); +static DECLARE_WAIT_QUEUE_HEAD(cpuset_attach_wq); + /* * This is ugly, but preserves the userspace API for existing cpuset * users. If someone tries to mount the "cpuset" filesystem, we @@ -1436,14 +1439,8 @@ static void cpuset_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) } cs->attach_in_progress--; - - /* - * We may have raced with CPU/memory hotunplug. Trigger hotplug - * propagation if @cs doesn't have any CPU or memory. It will move - * the newly added tasks to the nearest parent which can execute. - */ - if (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed)) - schedule_cpuset_propagate_hotplug(cs); + if (!cs->attach_in_progress) + wake_up(&cpuset_attach_wq); mutex_unlock(&cpuset_mutex); } @@ -1555,10 +1552,6 @@ static int cpuset_write_resmask(struct cgroup *cgrp, struct cftype *cft, * resources, wait for the previously scheduled operations before * proceeding, so that we don't end up keep removing tasks added * after execution capability is restored. - * - * Flushing cpuset_hotplug_work is enough to synchronize against - * hotplug hanlding; however, cpuset_attach() may schedule - * propagation work directly. Flush the workqueue too. */ flush_work(&cpuset_hotplug_work); flush_workqueue(cpuset_propagate_hotplug_wq); @@ -2005,8 +1998,20 @@ static void cpuset_propagate_hotplug_workfn(struct work_struct *work) struct cpuset *cs = container_of(work, struct cpuset, hotplug_work); bool is_empty; +retry: + wait_event(cpuset_attach_wq, cs->attach_in_progress == 0); + mutex_lock(&cpuset_mutex); + /* + * We have raced with task attaching. We wait until attaching + * is finished, so we won't attach a task to an empty cpuset. + */ + if (cs->attach_in_progress) { + mutex_unlock(&cpuset_mutex); + goto retry; + } + cpumask_andnot(&off_cpus, cs->cpus_allowed, top_cpuset.cpus_allowed); nodes_andnot(off_mems, cs->mems_allowed, top_cpuset.mems_allowed); -- cgit v1.2.3-70-g09d2 From 388afd8549dc8be0920e00ae9404341593b6bd7c Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Sun, 9 Jun 2013 17:14:47 +0800 Subject: cpuset: remove async hotplug propagation work As we can drop rcu read lock while iterating cgroup hierarchy, we don't have to do propagation asynchronously via workqueue. Signed-off-by: Li Zefan Signed-off-by: Tejun Heo --- kernel/cpuset.c | 69 +++++++++++++-------------------------------------------- 1 file changed, 16 insertions(+), 53 deletions(-) (limited to 'kernel') diff --git a/kernel/cpuset.c b/kernel/cpuset.c index e902473f76b..608fe1308b2 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -101,8 +101,6 @@ struct cpuset { /* for custom sched domain */ int relax_domain_level; - - struct work_struct hotplug_work; }; /* Retrieve the cpuset for a cgroup */ @@ -268,12 +266,7 @@ static DEFINE_MUTEX(callback_mutex); /* * CPU / memory hotplug is handled asynchronously. */ -static struct workqueue_struct *cpuset_propagate_hotplug_wq; - static void cpuset_hotplug_workfn(struct work_struct *work); -static void cpuset_propagate_hotplug_workfn(struct work_struct *work); -static void schedule_cpuset_propagate_hotplug(struct cpuset *cs); - static DECLARE_WORK(cpuset_hotplug_work, cpuset_hotplug_workfn); static DECLARE_WAIT_QUEUE_HEAD(cpuset_attach_wq); @@ -1554,7 +1547,6 @@ static int cpuset_write_resmask(struct cgroup *cgrp, struct cftype *cft, * after execution capability is restored. */ flush_work(&cpuset_hotplug_work); - flush_workqueue(cpuset_propagate_hotplug_wq); mutex_lock(&cpuset_mutex); if (!is_cpuset_online(cs)) @@ -1821,7 +1813,6 @@ static struct cgroup_subsys_state *cpuset_css_alloc(struct cgroup *cont) cpumask_clear(cs->cpus_allowed); nodes_clear(cs->mems_allowed); fmeter_init(&cs->fmeter); - INIT_WORK(&cs->hotplug_work, cpuset_propagate_hotplug_workfn); cs->relax_domain_level = -1; return &cs->css; @@ -1984,18 +1975,17 @@ static void remove_tasks_in_empty_cpuset(struct cpuset *cs) } /** - * cpuset_propagate_hotplug_workfn - propagate CPU/memory hotplug to a cpuset + * cpuset_hotplug_update_tasks - update tasks in a cpuset for hotunplug * @cs: cpuset in interest * * Compare @cs's cpu and mem masks against top_cpuset and if some have gone * offline, update @cs accordingly. If @cs ends up with no CPU or memory, * all its tasks are moved to the nearest ancestor with both resources. */ -static void cpuset_propagate_hotplug_workfn(struct work_struct *work) +static void cpuset_hotplug_update_tasks(struct cpuset *cs) { static cpumask_t off_cpus; static nodemask_t off_mems, tmp_mems; - struct cpuset *cs = container_of(work, struct cpuset, hotplug_work); bool is_empty; retry: @@ -2044,34 +2034,6 @@ retry: */ if (is_empty) remove_tasks_in_empty_cpuset(cs); - - /* the following may free @cs, should be the last operation */ - css_put(&cs->css); -} - -/** - * schedule_cpuset_propagate_hotplug - schedule hotplug propagation to a cpuset - * @cs: cpuset of interest - * - * Schedule cpuset_propagate_hotplug_workfn() which will update CPU and - * memory masks according to top_cpuset. - */ -static void schedule_cpuset_propagate_hotplug(struct cpuset *cs) -{ - /* - * Pin @cs. The refcnt will be released when the work item - * finishes executing. - */ - if (!css_tryget(&cs->css)) - return; - - /* - * Queue @cs->hotplug_work. If already pending, lose the css ref. - * cpuset_propagate_hotplug_wq is ordered and propagation will - * happen in the order this function is called. - */ - if (!queue_work(cpuset_propagate_hotplug_wq, &cs->hotplug_work)) - css_put(&cs->css); } /** @@ -2084,8 +2046,8 @@ static void schedule_cpuset_propagate_hotplug(struct cpuset *cs) * actively using CPU hotplug but making no active use of cpusets. * * Non-root cpusets are only affected by offlining. If any CPUs or memory - * nodes have been taken down, cpuset_propagate_hotplug() is invoked on all - * descendants. + * nodes have been taken down, cpuset_hotplug_update_tasks() is invoked on + * all descendants. * * Note that CPU offlining during suspend is ignored. We don't modify * cpusets across suspend/resume cycles at all. @@ -2128,21 +2090,26 @@ static void cpuset_hotplug_workfn(struct work_struct *work) update_tasks_nodemask(&top_cpuset, &tmp_mems, NULL); } + mutex_unlock(&cpuset_mutex); + /* if cpus or mems went down, we need to propagate to descendants */ if (cpus_offlined || mems_offlined) { struct cpuset *cs; struct cgroup *pos_cgrp; rcu_read_lock(); - cpuset_for_each_descendant_pre(cs, pos_cgrp, &top_cpuset) - schedule_cpuset_propagate_hotplug(cs); - rcu_read_unlock(); - } + cpuset_for_each_descendant_pre(cs, pos_cgrp, &top_cpuset) { + if (!css_tryget(&cs->css)) + continue; + rcu_read_unlock(); - mutex_unlock(&cpuset_mutex); + cpuset_hotplug_update_tasks(cs); - /* wait for propagations to finish */ - flush_workqueue(cpuset_propagate_hotplug_wq); + rcu_read_lock(); + css_put(&cs->css); + } + rcu_read_unlock(); + } /* rebuild sched domains if cpus_allowed has changed */ if (cpus_updated) @@ -2193,10 +2160,6 @@ void __init cpuset_init_smp(void) top_cpuset.mems_allowed = node_states[N_MEMORY]; register_hotmemory_notifier(&cpuset_track_online_nodes_nb); - - cpuset_propagate_hotplug_wq = - alloc_ordered_workqueue("cpuset_hotplug", 0); - BUG_ON(!cpuset_propagate_hotplug_wq); } /** -- cgit v1.2.3-70-g09d2 From 33ad801dfb5c8b1127c72fdb745ce8c630150f3f Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Sun, 9 Jun 2013 17:15:08 +0800 Subject: cpuset: record old_mems_allowed in struct cpuset When we update a cpuset's mems_allowed and thus update tasks' mems_allowed, it's required to pass the old mems_allowed and new mems_allowed to cpuset_migrate_mm(). Currently we save old mems_allowed in a temp local variable before changing cpuset->mems_allowed. This patch changes it by saving old mems_allowed in cpuset->old_mems_allowed. This currently won't change any behavior, but it will later allow us to keep tasks in empty cpusets. v3: restored "cpuset_attach_nodemask_to = cs->mems_allowed" Signed-off-by: Li Zefan Signed-off-by: Tejun Heo --- kernel/cpuset.c | 61 ++++++++++++++++++++++++++++++++++----------------------- 1 file changed, 36 insertions(+), 25 deletions(-) (limited to 'kernel') diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 608fe1308b2..2b4554588a0 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -88,6 +88,18 @@ struct cpuset { cpumask_var_t cpus_allowed; /* CPUs allowed to tasks in cpuset */ nodemask_t mems_allowed; /* Memory Nodes allowed to tasks */ + /* + * This is old Memory Nodes tasks took on. + * + * - top_cpuset.old_mems_allowed is initialized to mems_allowed. + * - A new cpuset's old_mems_allowed is initialized when some + * task is moved into it. + * - old_mems_allowed is used in cpuset_migrate_mm() when we change + * cpuset.mems_allowed and have tasks' nodemask updated, and + * then old_mems_allowed is updated to mems_allowed. + */ + nodemask_t old_mems_allowed; + struct fmeter fmeter; /* memory_pressure filter */ /* @@ -972,16 +984,12 @@ static void cpuset_change_task_nodemask(struct task_struct *tsk, static void cpuset_change_nodemask(struct task_struct *p, struct cgroup_scanner *scan) { + struct cpuset *cs = cgroup_cs(scan->cg); struct mm_struct *mm; - struct cpuset *cs; int migrate; - const nodemask_t *oldmem = scan->data; - static nodemask_t newmems; /* protected by cpuset_mutex */ - - cs = cgroup_cs(scan->cg); - guarantee_online_mems(cs, &newmems); + nodemask_t *newmems = scan->data; - cpuset_change_task_nodemask(p, &newmems); + cpuset_change_task_nodemask(p, newmems); mm = get_task_mm(p); if (!mm) @@ -991,7 +999,7 @@ static void cpuset_change_nodemask(struct task_struct *p, mpol_rebind_mm(mm, &cs->mems_allowed); if (migrate) - cpuset_migrate_mm(mm, oldmem, &cs->mems_allowed); + cpuset_migrate_mm(mm, &cs->old_mems_allowed, newmems); mmput(mm); } @@ -1000,25 +1008,26 @@ static void *cpuset_being_rebound; /** * update_tasks_nodemask - Update the nodemasks of tasks in the cpuset. * @cs: the cpuset in which each task's mems_allowed mask needs to be changed - * @oldmem: old mems_allowed of cpuset cs * @heap: if NULL, defer allocating heap memory to cgroup_scan_tasks() * * Called with cpuset_mutex held * No return value. It's guaranteed that cgroup_scan_tasks() always returns 0 * if @heap != NULL. */ -static void update_tasks_nodemask(struct cpuset *cs, const nodemask_t *oldmem, - struct ptr_heap *heap) +static void update_tasks_nodemask(struct cpuset *cs, struct ptr_heap *heap) { + static nodemask_t newmems; /* protected by cpuset_mutex */ struct cgroup_scanner scan; cpuset_being_rebound = cs; /* causes mpol_dup() rebind */ + guarantee_online_mems(cs, &newmems); + scan.cg = cs->css.cgroup; scan.test_task = NULL; scan.process_task = cpuset_change_nodemask; scan.heap = heap; - scan.data = (nodemask_t *)oldmem; + scan.data = &newmems; /* * The mpol_rebind_mm() call takes mmap_sem, which we couldn't @@ -1032,6 +1041,12 @@ static void update_tasks_nodemask(struct cpuset *cs, const nodemask_t *oldmem, */ cgroup_scan_tasks(&scan); + /* + * All the tasks' nodemasks have been updated, update + * cs->old_mems_allowed. + */ + cs->old_mems_allowed = newmems; + /* We're done rebinding vmas to this cpuset's new mems_allowed. */ cpuset_being_rebound = NULL; } @@ -1052,13 +1067,9 @@ static void update_tasks_nodemask(struct cpuset *cs, const nodemask_t *oldmem, static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs, const char *buf) { - NODEMASK_ALLOC(nodemask_t, oldmem, GFP_KERNEL); int retval; struct ptr_heap heap; - if (!oldmem) - return -ENOMEM; - /* * top_cpuset.mems_allowed tracks node_stats[N_MEMORY]; * it's read-only @@ -1087,8 +1098,8 @@ static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs, goto done; } } - *oldmem = cs->mems_allowed; - if (nodes_equal(*oldmem, trialcs->mems_allowed)) { + + if (nodes_equal(cs->mems_allowed, trialcs->mems_allowed)) { retval = 0; /* Too easy - nothing to do */ goto done; } @@ -1104,11 +1115,10 @@ static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs, cs->mems_allowed = trialcs->mems_allowed; mutex_unlock(&callback_mutex); - update_tasks_nodemask(cs, oldmem, &heap); + update_tasks_nodemask(cs, &heap); heap_free(&heap); done: - NODEMASK_FREE(oldmem); return retval; } @@ -1431,6 +1441,8 @@ static void cpuset_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) mmput(mm); } + cs->old_mems_allowed = cpuset_attach_nodemask_to; + cs->attach_in_progress--; if (!cs->attach_in_progress) wake_up(&cpuset_attach_wq); @@ -1985,7 +1997,7 @@ static void remove_tasks_in_empty_cpuset(struct cpuset *cs) static void cpuset_hotplug_update_tasks(struct cpuset *cs) { static cpumask_t off_cpus; - static nodemask_t off_mems, tmp_mems; + static nodemask_t off_mems; bool is_empty; retry: @@ -2015,11 +2027,10 @@ retry: /* remove offline mems from @cs */ if (!nodes_empty(off_mems)) { - tmp_mems = cs->mems_allowed; mutex_lock(&callback_mutex); nodes_andnot(cs->mems_allowed, cs->mems_allowed, off_mems); mutex_unlock(&callback_mutex); - update_tasks_nodemask(cs, &tmp_mems, NULL); + update_tasks_nodemask(cs, NULL); } is_empty = cpumask_empty(cs->cpus_allowed) || @@ -2083,11 +2094,10 @@ static void cpuset_hotplug_workfn(struct work_struct *work) /* synchronize mems_allowed to N_MEMORY */ if (mems_updated) { - tmp_mems = top_cpuset.mems_allowed; mutex_lock(&callback_mutex); top_cpuset.mems_allowed = new_mems; mutex_unlock(&callback_mutex); - update_tasks_nodemask(&top_cpuset, &tmp_mems, NULL); + update_tasks_nodemask(&top_cpuset, NULL); } mutex_unlock(&cpuset_mutex); @@ -2158,6 +2168,7 @@ void __init cpuset_init_smp(void) { cpumask_copy(top_cpuset.cpus_allowed, cpu_active_mask); top_cpuset.mems_allowed = node_states[N_MEMORY]; + top_cpuset.old_mems_allowed = top_cpuset.mems_allowed; register_hotmemory_notifier(&cpuset_track_online_nodes_nb); } -- cgit v1.2.3-70-g09d2 From 070b57fcacc9dfc23a180290079078373fb697e1 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Sun, 9 Jun 2013 17:15:22 +0800 Subject: cpuset: introduce effective_{cpumask|nodemask}_cpuset() effective_cpumask_cpuset() returns an ancestor cpuset which has non-empty cpumask. If a cpuset is empty and the tasks in it need to update their cpus_allowed, they take on the ancestor cpuset's cpumask. This currently won't change any behavior, but it will later allow us to keep tasks in empty cpusets. Signed-off-by: Li Zefan Signed-off-by: Tejun Heo --- kernel/cpuset.c | 76 ++++++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 65 insertions(+), 11 deletions(-) (limited to 'kernel') diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 2b4554588a0..82ac1f862cb 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -791,6 +791,45 @@ void rebuild_sched_domains(void) mutex_unlock(&cpuset_mutex); } +/* + * effective_cpumask_cpuset - return nearest ancestor with non-empty cpus + * @cs: the cpuset in interest + * + * A cpuset's effective cpumask is the cpumask of the nearest ancestor + * with non-empty cpus. We use effective cpumask whenever: + * - we update tasks' cpus_allowed. (they take on the ancestor's cpumask + * if the cpuset they reside in has no cpus) + * - we want to retrieve task_cs(tsk)'s cpus_allowed. + * + * Called with cpuset_mutex held. cpuset_cpus_allowed_fallback() is an + * exception. See comments there. + */ +static struct cpuset *effective_cpumask_cpuset(struct cpuset *cs) +{ + while (cpumask_empty(cs->cpus_allowed)) + cs = parent_cs(cs); + return cs; +} + +/* + * effective_nodemask_cpuset - return nearest ancestor with non-empty mems + * @cs: the cpuset in interest + * + * A cpuset's effective nodemask is the nodemask of the nearest ancestor + * with non-empty memss. We use effective nodemask whenever: + * - we update tasks' mems_allowed. (they take on the ancestor's nodemask + * if the cpuset they reside in has no mems) + * - we want to retrieve task_cs(tsk)'s mems_allowed. + * + * Called with cpuset_mutex held. + */ +static struct cpuset *effective_nodemask_cpuset(struct cpuset *cs) +{ + while (nodes_empty(cs->mems_allowed)) + cs = parent_cs(cs); + return cs; +} + /** * cpuset_change_cpumask - make a task's cpus_allowed the same as its cpuset's * @tsk: task to test @@ -805,7 +844,10 @@ void rebuild_sched_domains(void) static void cpuset_change_cpumask(struct task_struct *tsk, struct cgroup_scanner *scan) { - set_cpus_allowed_ptr(tsk, ((cgroup_cs(scan->cg))->cpus_allowed)); + struct cpuset *cpus_cs; + + cpus_cs = effective_cpumask_cpuset(cgroup_cs(scan->cg)); + set_cpus_allowed_ptr(tsk, cpus_cs->cpus_allowed); } /** @@ -920,12 +962,14 @@ static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from, const nodemask_t *to) { struct task_struct *tsk = current; + struct cpuset *mems_cs; tsk->mems_allowed = *to; do_migrate_pages(mm, from, to, MPOL_MF_MOVE_ALL); - guarantee_online_mems(task_cs(tsk),&tsk->mems_allowed); + mems_cs = effective_nodemask_cpuset(task_cs(tsk)); + guarantee_online_mems(mems_cs, &tsk->mems_allowed); } /* @@ -1018,10 +1062,11 @@ static void update_tasks_nodemask(struct cpuset *cs, struct ptr_heap *heap) { static nodemask_t newmems; /* protected by cpuset_mutex */ struct cgroup_scanner scan; + struct cpuset *mems_cs = effective_nodemask_cpuset(cs); cpuset_being_rebound = cs; /* causes mpol_dup() rebind */ - guarantee_online_mems(cs, &newmems); + guarantee_online_mems(mems_cs, &newmems); scan.cg = cs->css.cgroup; scan.test_task = NULL; @@ -1405,6 +1450,8 @@ static void cpuset_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) struct cgroup *oldcgrp = cgroup_taskset_cur_cgroup(tset); struct cpuset *cs = cgroup_cs(cgrp); struct cpuset *oldcs = cgroup_cs(oldcgrp); + struct cpuset *cpus_cs = effective_cpumask_cpuset(cs); + struct cpuset *mems_cs = effective_nodemask_cpuset(cs); mutex_lock(&cpuset_mutex); @@ -1412,9 +1459,9 @@ static void cpuset_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) if (cs == &top_cpuset) cpumask_copy(cpus_attach, cpu_possible_mask); else - guarantee_online_cpus(cs, cpus_attach); + guarantee_online_cpus(cpus_cs, cpus_attach); - guarantee_online_mems(cs, &cpuset_attach_nodemask_to); + guarantee_online_mems(mems_cs, &cpuset_attach_nodemask_to); cgroup_taskset_for_each(task, cgrp, tset) { /* @@ -1434,9 +1481,11 @@ static void cpuset_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) cpuset_attach_nodemask_to = cs->mems_allowed; mm = get_task_mm(leader); if (mm) { + struct cpuset *mems_oldcs = effective_nodemask_cpuset(oldcs); + mpol_rebind_mm(mm, &cpuset_attach_nodemask_to); if (is_memory_migrate(cs)) - cpuset_migrate_mm(mm, &oldcs->mems_allowed, + cpuset_migrate_mm(mm, &mems_oldcs->mems_allowed, &cpuset_attach_nodemask_to); mmput(mm); } @@ -2186,20 +2235,23 @@ void __init cpuset_init_smp(void) void cpuset_cpus_allowed(struct task_struct *tsk, struct cpumask *pmask) { + struct cpuset *cpus_cs; + mutex_lock(&callback_mutex); task_lock(tsk); - guarantee_online_cpus(task_cs(tsk), pmask); + cpus_cs = effective_cpumask_cpuset(task_cs(tsk)); + guarantee_online_cpus(cpus_cs, pmask); task_unlock(tsk); mutex_unlock(&callback_mutex); } void cpuset_cpus_allowed_fallback(struct task_struct *tsk) { - const struct cpuset *cs; + const struct cpuset *cpus_cs; rcu_read_lock(); - cs = task_cs(tsk); - do_set_cpus_allowed(tsk, cs->cpus_allowed); + cpus_cs = effective_cpumask_cpuset(task_cs(tsk)); + do_set_cpus_allowed(tsk, cpus_cs->cpus_allowed); rcu_read_unlock(); /* @@ -2238,11 +2290,13 @@ void cpuset_init_current_mems_allowed(void) nodemask_t cpuset_mems_allowed(struct task_struct *tsk) { + struct cpuset *mems_cs; nodemask_t mask; mutex_lock(&callback_mutex); task_lock(tsk); - guarantee_online_mems(task_cs(tsk), &mask); + mems_cs = effective_nodemask_cpuset(task_cs(tsk)); + guarantee_online_mems(mems_cs, &mask); task_unlock(tsk); mutex_unlock(&callback_mutex); -- cgit v1.2.3-70-g09d2 From 5c5cc62321d9df7a9a608346fc649c4528380c8f Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Sun, 9 Jun 2013 17:16:29 +0800 Subject: cpuset: allow to keep tasks in empty cpusets To achieve this: - We call update_tasks_cpumask/nodemask() for empty cpusets when hotplug happens, instead of moving tasks out of them. - When a cpuset's masks are changed by writing cpuset.cpus/mems, we also update tasks in child cpusets which are empty. v3: - do propagation work in one place for both hotplug and unplug v2: - drop rcu_read_lock before calling update_task_nodemask() and update_task_cpumask(), instead of using workqueue. - add documentation in include/linux/cgroup.h Signed-off-by: Li Zefan Signed-off-by: Tejun Heo --- include/linux/cgroup.h | 4 ++ kernel/cpuset.c | 141 ++++++++++++++++++++++++++++++++++++++----------- 2 files changed, 114 insertions(+), 31 deletions(-) (limited to 'kernel') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index d0ad3794b94..53e81a61be5 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -277,6 +277,10 @@ enum { * * - Remount is disallowed. * + * - cpuset: tasks will be kept in empty cpusets when hotplug happens + * and take masks of ancestors with non-empty cpus/mems, instead of + * being moved to an ancestor. + * * - memcg: use_hierarchy is on by default and the cgroup file for * the flag is not created. * diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 82ac1f862cb..3473dd2580d 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -874,6 +874,45 @@ static void update_tasks_cpumask(struct cpuset *cs, struct ptr_heap *heap) cgroup_scan_tasks(&scan); } +/* + * update_tasks_cpumask_hier - Update the cpumasks of tasks in the hierarchy. + * @root_cs: the root cpuset of the hierarchy + * @update_root: update root cpuset or not? + * @heap: the heap used by cgroup_scan_tasks() + * + * This will update cpumasks of tasks in @root_cs and all other empty cpusets + * which take on cpumask of @root_cs. + * + * Called with cpuset_mutex held + */ +static void update_tasks_cpumask_hier(struct cpuset *root_cs, + bool update_root, struct ptr_heap *heap) +{ + struct cpuset *cp; + struct cgroup *pos_cgrp; + + if (update_root) + update_tasks_cpumask(root_cs, heap); + + rcu_read_lock(); + cpuset_for_each_descendant_pre(cp, pos_cgrp, root_cs) { + /* skip the whole subtree if @cp have some CPU */ + if (!cpumask_empty(cp->cpus_allowed)) { + pos_cgrp = cgroup_rightmost_descendant(pos_cgrp); + continue; + } + if (!css_tryget(&cp->css)) + continue; + rcu_read_unlock(); + + update_tasks_cpumask(cp, heap); + + rcu_read_lock(); + css_put(&cp->css); + } + rcu_read_unlock(); +} + /** * update_cpumask - update the cpus_allowed mask of a cpuset and all tasks in it * @cs: the cpuset to consider @@ -925,11 +964,7 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs, cpumask_copy(cs->cpus_allowed, trialcs->cpus_allowed); mutex_unlock(&callback_mutex); - /* - * Scan tasks in the cpuset, and update the cpumasks of any - * that need an update. - */ - update_tasks_cpumask(cs, &heap); + update_tasks_cpumask_hier(cs, true, &heap); heap_free(&heap); @@ -1096,6 +1131,45 @@ static void update_tasks_nodemask(struct cpuset *cs, struct ptr_heap *heap) cpuset_being_rebound = NULL; } +/* + * update_tasks_nodemask_hier - Update the nodemasks of tasks in the hierarchy. + * @cs: the root cpuset of the hierarchy + * @update_root: update the root cpuset or not? + * @heap: the heap used by cgroup_scan_tasks() + * + * This will update nodemasks of tasks in @root_cs and all other empty cpusets + * which take on nodemask of @root_cs. + * + * Called with cpuset_mutex held + */ +static void update_tasks_nodemask_hier(struct cpuset *root_cs, + bool update_root, struct ptr_heap *heap) +{ + struct cpuset *cp; + struct cgroup *pos_cgrp; + + if (update_root) + update_tasks_nodemask(root_cs, heap); + + rcu_read_lock(); + cpuset_for_each_descendant_pre(cp, pos_cgrp, root_cs) { + /* skip the whole subtree if @cp have some CPU */ + if (!nodes_empty(cp->mems_allowed)) { + pos_cgrp = cgroup_rightmost_descendant(pos_cgrp); + continue; + } + if (!css_tryget(&cp->css)) + continue; + rcu_read_unlock(); + + update_tasks_nodemask(cp, heap); + + rcu_read_lock(); + css_put(&cp->css); + } + rcu_read_unlock(); +} + /* * Handle user request to change the 'mems' memory placement * of a cpuset. Needs to validate the request, update the @@ -1160,7 +1234,7 @@ static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs, cs->mems_allowed = trialcs->mems_allowed; mutex_unlock(&callback_mutex); - update_tasks_nodemask(cs, &heap); + update_tasks_nodemask_hier(cs, true, &heap); heap_free(&heap); done: @@ -2048,6 +2122,7 @@ static void cpuset_hotplug_update_tasks(struct cpuset *cs) static cpumask_t off_cpus; static nodemask_t off_mems; bool is_empty; + bool sane = cgroup_sane_behavior(cs->css.cgroup); retry: wait_event(cpuset_attach_wq, cs->attach_in_progress == 0); @@ -2066,21 +2141,29 @@ retry: cpumask_andnot(&off_cpus, cs->cpus_allowed, top_cpuset.cpus_allowed); nodes_andnot(off_mems, cs->mems_allowed, top_cpuset.mems_allowed); - /* remove offline cpus from @cs */ - if (!cpumask_empty(&off_cpus)) { - mutex_lock(&callback_mutex); - cpumask_andnot(cs->cpus_allowed, cs->cpus_allowed, &off_cpus); - mutex_unlock(&callback_mutex); + mutex_lock(&callback_mutex); + cpumask_andnot(cs->cpus_allowed, cs->cpus_allowed, &off_cpus); + mutex_unlock(&callback_mutex); + + /* + * If sane_behavior flag is set, we need to update tasks' cpumask + * for empty cpuset to take on ancestor's cpumask. + */ + if ((sane && cpumask_empty(cs->cpus_allowed)) || + !cpumask_empty(&off_cpus)) update_tasks_cpumask(cs, NULL); - } - /* remove offline mems from @cs */ - if (!nodes_empty(off_mems)) { - mutex_lock(&callback_mutex); - nodes_andnot(cs->mems_allowed, cs->mems_allowed, off_mems); - mutex_unlock(&callback_mutex); + mutex_lock(&callback_mutex); + nodes_andnot(cs->mems_allowed, cs->mems_allowed, off_mems); + mutex_unlock(&callback_mutex); + + /* + * If sane_behavior flag is set, we need to update tasks' nodemask + * for empty cpuset to take on ancestor's nodemask. + */ + if ((sane && nodes_empty(cs->mems_allowed)) || + !nodes_empty(off_mems)) update_tasks_nodemask(cs, NULL); - } is_empty = cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed); @@ -2088,11 +2171,13 @@ retry: mutex_unlock(&cpuset_mutex); /* - * If @cs became empty, move tasks to the nearest ancestor with - * execution resources. This is full cgroup operation which will + * If sane_behavior flag is set, we'll keep tasks in empty cpusets. + * + * Otherwise move tasks to the nearest ancestor with execution + * resources. This is full cgroup operation which will * also call back into cpuset. Should be done outside any lock. */ - if (is_empty) + if (!sane && is_empty) remove_tasks_in_empty_cpuset(cs); } @@ -2114,10 +2199,9 @@ retry: */ static void cpuset_hotplug_workfn(struct work_struct *work) { - static cpumask_t new_cpus, tmp_cpus; - static nodemask_t new_mems, tmp_mems; + static cpumask_t new_cpus; + static nodemask_t new_mems; bool cpus_updated, mems_updated; - bool cpus_offlined, mems_offlined; mutex_lock(&cpuset_mutex); @@ -2126,12 +2210,7 @@ static void cpuset_hotplug_workfn(struct work_struct *work) new_mems = node_states[N_MEMORY]; cpus_updated = !cpumask_equal(top_cpuset.cpus_allowed, &new_cpus); - cpus_offlined = cpumask_andnot(&tmp_cpus, top_cpuset.cpus_allowed, - &new_cpus); - mems_updated = !nodes_equal(top_cpuset.mems_allowed, new_mems); - nodes_andnot(tmp_mems, top_cpuset.mems_allowed, new_mems); - mems_offlined = !nodes_empty(tmp_mems); /* synchronize cpus_allowed to cpu_active_mask */ if (cpus_updated) { @@ -2151,8 +2230,8 @@ static void cpuset_hotplug_workfn(struct work_struct *work) mutex_unlock(&cpuset_mutex); - /* if cpus or mems went down, we need to propagate to descendants */ - if (cpus_offlined || mems_offlined) { + /* if cpus or mems changed, we need to propagate to descendants */ + if (cpus_updated || mems_updated) { struct cpuset *cs; struct cgroup *pos_cgrp; -- cgit v1.2.3-70-g09d2 From 88fa523bff295f1d60244a54833480b02f775152 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Sun, 9 Jun 2013 17:16:46 +0800 Subject: cpuset: allow to move tasks to empty cpusets Currently some cpuset behaviors are not friendly when cpuset is co-mounted with other cgroup controllers. Now with this patchset if cpuset is mounted with sane_behavior option, it behaves differently: - Tasks will be kept in empty cpusets when hotplug happens and take masks of ancestors with non-empty cpus/mems, instead of being moved to an ancestor. - A task can be moved into an empty cpuset, and again it takes masks of ancestors, so the user can drop a task into a newly created cgroup without having to do anything for it. As tasks can reside in empy cpusets, here're some rules: - They can be moved to another cpuset, regardless it's empty or not. - Though it takes masks from ancestors, it takes other configs from the empty cpuset. - If the ancestors' masks are changed, those tasks will also be updated to take new masks. v2: add documentation in include/linux/cgroup.h Signed-off-by: Li Zefan Signed-off-by: Tejun Heo --- include/linux/cgroup.h | 3 +++ kernel/cpuset.c | 9 +++++++-- 2 files changed, 10 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 53e81a61be5..74e8b8e4cd7 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -281,6 +281,9 @@ enum { * and take masks of ancestors with non-empty cpus/mems, instead of * being moved to an ancestor. * + * - cpuset: a task can be moved into an empty cpuset, and again it + * takes masks of ancestors. + * * - memcg: use_hierarchy is on by default and the cgroup file for * the flag is not created. * diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 3473dd2580d..3b3fdfdd4d7 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -479,7 +479,7 @@ static int validate_change(const struct cpuset *cur, const struct cpuset *trial) */ ret = -ENOSPC; if ((cgroup_task_count(cur->css.cgroup) || cur->attach_in_progress) && - (cpumask_empty(trial->cpus_allowed) || + (cpumask_empty(trial->cpus_allowed) && nodes_empty(trial->mems_allowed))) goto out; @@ -1466,8 +1466,13 @@ static int cpuset_can_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) mutex_lock(&cpuset_mutex); + /* + * We allow to move tasks into an empty cpuset if sane_behavior + * flag is set. + */ ret = -ENOSPC; - if (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed)) + if (!cgroup_sane_behavior(cgrp) && + (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed))) goto out_unlock; cgroup_taskset_for_each(task, cgrp, tset) { -- cgit v1.2.3-70-g09d2 From f047cecf2cfc9595b1f39c9aab383bb0682f5a53 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 13 Jun 2013 15:11:44 +0800 Subject: cpuset: fix to migrate mm correctly in a corner case Before moving tasks out of empty cpusets, update_tasks_nodemask() is called, which calls do_migrate_pages(xx, from, to). Then those tasks are moved to an ancestor, and do_migrate_pages() is called again. The first time: from = node_to_be_offlined, to = empty. The second time: from = empty, to = ancestor's nodemask. so looks like no pages will be migrated. Fix this by: - Don't call update_tasks_nodemask() on empty cpusets. - Pass cs->old_mems_allowed to do_migrate_pages(). v4: added comment in cpuset_hotplug_update_tasks() and rephased comment in cpuset_attach(). Signed-off-by: Li Zefan Signed-off-by: Tejun Heo --- kernel/cpuset.c | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) (limited to 'kernel') diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 3b3fdfdd4d7..4c17d96bd3a 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -1563,9 +1563,18 @@ static void cpuset_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) struct cpuset *mems_oldcs = effective_nodemask_cpuset(oldcs); mpol_rebind_mm(mm, &cpuset_attach_nodemask_to); - if (is_memory_migrate(cs)) - cpuset_migrate_mm(mm, &mems_oldcs->mems_allowed, + + /* + * old_mems_allowed is the same with mems_allowed here, except + * if this task is being moved automatically due to hotplug. + * In that case @mems_allowed has been updated and is empty, + * so @old_mems_allowed is the right nodesets that we migrate + * mm from. + */ + if (is_memory_migrate(cs)) { + cpuset_migrate_mm(mm, &mems_oldcs->old_mems_allowed, &cpuset_attach_nodemask_to); + } mmput(mm); } @@ -2152,10 +2161,12 @@ retry: /* * If sane_behavior flag is set, we need to update tasks' cpumask - * for empty cpuset to take on ancestor's cpumask. + * for empty cpuset to take on ancestor's cpumask. Otherwise, don't + * call update_tasks_cpumask() if the cpuset becomes empty, as + * the tasks in it will be migrated to an ancestor. */ if ((sane && cpumask_empty(cs->cpus_allowed)) || - !cpumask_empty(&off_cpus)) + (!cpumask_empty(&off_cpus) && !cpumask_empty(cs->cpus_allowed))) update_tasks_cpumask(cs, NULL); mutex_lock(&callback_mutex); @@ -2164,10 +2175,12 @@ retry: /* * If sane_behavior flag is set, we need to update tasks' nodemask - * for empty cpuset to take on ancestor's nodemask. + * for empty cpuset to take on ancestor's nodemask. Otherwise, don't + * call update_tasks_nodemask() if the cpuset becomes empty, as + * the tasks in it will be migratd to an ancestor. */ if ((sane && nodes_empty(cs->mems_allowed)) || - !nodes_empty(off_mems)) + (!nodes_empty(off_mems) && !nodes_empty(cs->mems_allowed))) update_tasks_nodemask(cs, NULL); is_empty = cpumask_empty(cs->cpus_allowed) || -- cgit v1.2.3-70-g09d2 From c9e5fe66f5947c9e56dfc7655e5b4b127ca2120f Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Fri, 14 Jun 2013 11:18:27 +0800 Subject: cpuset: rename @cont to @cgrp Cont is short for container. control group was named process container at first, but then people found container already has a meaning in linux kernel. Clean up the leftover variable name @cont. Signed-off-by: Li Zefan Signed-off-by: Tejun Heo --- kernel/cpuset.c | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) (limited to 'kernel') diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 4c17d96bd3a..654c9597902 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -116,9 +116,9 @@ struct cpuset { }; /* Retrieve the cpuset for a cgroup */ -static inline struct cpuset *cgroup_cs(struct cgroup *cont) +static inline struct cpuset *cgroup_cs(struct cgroup *cgrp) { - return container_of(cgroup_subsys_state(cont, cpuset_subsys_id), + return container_of(cgroup_subsys_state(cgrp, cpuset_subsys_id), struct cpuset, css); } @@ -433,7 +433,7 @@ static void free_trial_cpuset(struct cpuset *trial) static int validate_change(const struct cpuset *cur, const struct cpuset *trial) { - struct cgroup *cont; + struct cgroup *cgrp; struct cpuset *c, *par; int ret; @@ -441,7 +441,7 @@ static int validate_change(const struct cpuset *cur, const struct cpuset *trial) /* Each of our child cpusets must be a subset of us */ ret = -EBUSY; - cpuset_for_each_child(c, cont, cur) + cpuset_for_each_child(c, cgrp, cur) if (!is_cpuset_subset(c, trial)) goto out; @@ -462,7 +462,7 @@ static int validate_change(const struct cpuset *cur, const struct cpuset *trial) * overlap */ ret = -EINVAL; - cpuset_for_each_child(c, cont, par) { + cpuset_for_each_child(c, cgrp, par) { if ((is_cpu_exclusive(trial) || is_cpu_exclusive(c)) && c != cur && cpumask_intersects(trial->cpus_allowed, c->cpus_allowed)) @@ -1759,13 +1759,13 @@ static size_t cpuset_sprintf_memlist(char *page, struct cpuset *cs) return count; } -static ssize_t cpuset_common_file_read(struct cgroup *cont, +static ssize_t cpuset_common_file_read(struct cgroup *cgrp, struct cftype *cft, struct file *file, char __user *buf, size_t nbytes, loff_t *ppos) { - struct cpuset *cs = cgroup_cs(cont); + struct cpuset *cs = cgroup_cs(cgrp); cpuset_filetype_t type = cft->private; char *page; ssize_t retval = 0; @@ -1795,9 +1795,9 @@ out: return retval; } -static u64 cpuset_read_u64(struct cgroup *cont, struct cftype *cft) +static u64 cpuset_read_u64(struct cgroup *cgrp, struct cftype *cft) { - struct cpuset *cs = cgroup_cs(cont); + struct cpuset *cs = cgroup_cs(cgrp); cpuset_filetype_t type = cft->private; switch (type) { case FILE_CPU_EXCLUSIVE: @@ -1826,9 +1826,9 @@ static u64 cpuset_read_u64(struct cgroup *cont, struct cftype *cft) return 0; } -static s64 cpuset_read_s64(struct cgroup *cont, struct cftype *cft) +static s64 cpuset_read_s64(struct cgroup *cgrp, struct cftype *cft) { - struct cpuset *cs = cgroup_cs(cont); + struct cpuset *cs = cgroup_cs(cgrp); cpuset_filetype_t type = cft->private; switch (type) { case FILE_SCHED_RELAX_DOMAIN_LEVEL: @@ -1940,14 +1940,14 @@ static struct cftype files[] = { /* * cpuset_css_alloc - allocate a cpuset css - * cont: control group that the new cpuset will be part of + * cgrp: control group that the new cpuset will be part of */ -static struct cgroup_subsys_state *cpuset_css_alloc(struct cgroup *cont) +static struct cgroup_subsys_state *cpuset_css_alloc(struct cgroup *cgrp) { struct cpuset *cs; - if (!cont->parent) + if (!cgrp->parent) return &top_cpuset.css; cs = kzalloc(sizeof(*cs), GFP_KERNEL); @@ -2042,9 +2042,9 @@ static void cpuset_css_offline(struct cgroup *cgrp) * will call rebuild_sched_domains_locked(). */ -static void cpuset_css_free(struct cgroup *cont) +static void cpuset_css_free(struct cgroup *cgrp) { - struct cpuset *cs = cgroup_cs(cont); + struct cpuset *cs = cgroup_cs(cgrp); free_cpumask_var(cs->cpus_allowed); kfree(cs); -- cgit v1.2.3-70-g09d2