summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPaul Jackson <pj@sgi.com>2006-03-31 02:30:52 -0800
committerLinus Torvalds <torvalds@g5.osdl.org>2006-03-31 12:18:55 -0800
commite4e364e865b382f9d99c7fc230ec2ce7df21257a (patch)
tree9ff5ab54a0e40d7ad2b55d3ec48c6e175ebf50c7
parent2741a559a01e1ba9bf87285569dc1a104d134ecf (diff)
[PATCH] cpuset: memory migration interaction fix
Fix memory migration so that it works regardless of what cpuset the invoking task is in. If a task invoked a memory migration, by doing one of: 1) writing a different nodemask to a cpuset 'mems' file, or 2) writing a tasks pid to a different cpuset's 'tasks' file, where the cpuset had its 'memory_migrate' option turned on, then the allocation of the new pages for the migrated task(s) was constrained by the invoking tasks cpuset. If this task wasn't in a cpuset that allowed the requested memory nodes, the memory migration would happen to some other nodes that were in that invoking tasks cpuset. This was usually surprising and puzzling behaviour: Why didn't the pages move? Why did the pages move -there-? To fix this, temporarilly change the invoking tasks 'mems_allowed' task_struct field to the nodes the migrating tasks is moving to, so that new pages can be allocated there. Signed-off-by: Paul Jackson <pj@sgi.com> Acked-by: Christoph Lameter <clameter@sgi.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--kernel/cpuset.c57
1 files changed, 52 insertions, 5 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index bf42381a419..72248d1b9e3 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -834,6 +834,55 @@ static int update_cpumask(struct cpuset *cs, char *buf)
}
/*
+ * cpuset_migrate_mm
+ *
+ * Migrate memory region from one set of nodes to another.
+ *
+ * Temporarilly set tasks mems_allowed to target nodes of migration,
+ * so that the migration code can allocate pages on these nodes.
+ *
+ * Call holding manage_mutex, so our current->cpuset won't change
+ * during this call, as manage_mutex holds off any attach_task()
+ * calls. Therefore we don't need to take task_lock around the
+ * call to guarantee_online_mems(), as we know no one is changing
+ * our tasks cpuset.
+ *
+ * Hold callback_mutex around the two modifications of our tasks
+ * mems_allowed to synchronize with cpuset_mems_allowed().
+ *
+ * While the mm_struct we are migrating is typically from some
+ * other task, the task_struct mems_allowed that we are hacking
+ * is for our current task, which must allocate new pages for that
+ * migrating memory region.
+ *
+ * We call cpuset_update_task_memory_state() before hacking
+ * our tasks mems_allowed, so that we are assured of being in
+ * sync with our tasks cpuset, and in particular, callbacks to
+ * cpuset_update_task_memory_state() from nested page allocations
+ * won't see any mismatch of our cpuset and task mems_generation
+ * values, so won't overwrite our hacked tasks mems_allowed
+ * nodemask.
+ */
+
+static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from,
+ const nodemask_t *to)
+{
+ struct task_struct *tsk = current;
+
+ cpuset_update_task_memory_state();
+
+ mutex_lock(&callback_mutex);
+ tsk->mems_allowed = *to;
+ mutex_unlock(&callback_mutex);
+
+ do_migrate_pages(mm, from, to, MPOL_MF_MOVE_ALL);
+
+ mutex_lock(&callback_mutex);
+ guarantee_online_mems(tsk->cpuset, &tsk->mems_allowed);
+ mutex_unlock(&callback_mutex);
+}
+
+/*
* Handle user request to change the 'mems' memory placement
* of a cpuset. Needs to validate the request, update the
* cpusets mems_allowed and mems_generation, and for each
@@ -945,10 +994,8 @@ static int update_nodemask(struct cpuset *cs, char *buf)
struct mm_struct *mm = mmarray[i];
mpol_rebind_mm(mm, &cs->mems_allowed);
- if (migrate) {
- do_migrate_pages(mm, &oldmem, &cs->mems_allowed,
- MPOL_MF_MOVE_ALL);
- }
+ if (migrate)
+ cpuset_migrate_mm(mm, &oldmem, &cs->mems_allowed);
mmput(mm);
}
@@ -1184,7 +1231,7 @@ static int attach_task(struct cpuset *cs, char *pidbuf, char **ppathbuf)
if (mm) {
mpol_rebind_mm(mm, &to);
if (is_memory_migrate(cs))
- do_migrate_pages(mm, &from, &to, MPOL_MF_MOVE_ALL);
+ cpuset_migrate_mm(mm, &from, &to);
mmput(mm);
}