summaryrefslogtreecommitdiffstats
path: root/mm/mempolicy.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/mempolicy.c')
-rw-r--r--mm/mempolicy.c187
1 files changed, 141 insertions, 46 deletions
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 3eb4a6fdc04..7dd9d9f8069 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -182,13 +182,58 @@ static int mpol_new_bind(struct mempolicy *pol, const nodemask_t *nodes)
return 0;
}
-/* Create a new policy */
+/*
+ * mpol_set_nodemask is called after mpol_new() to set up the nodemask, if
+ * any, for the new policy. mpol_new() has already validated the nodes
+ * parameter with respect to the policy mode and flags. But, we need to
+ * handle an empty nodemask with MPOL_PREFERRED here.
+ *
+ * Must be called holding task's alloc_lock to protect task's mems_allowed
+ * and mempolicy. May also be called holding the mmap_semaphore for write.
+ */
+static int mpol_set_nodemask(struct mempolicy *pol,
+ const nodemask_t *nodes, struct nodemask_scratch *nsc)
+{
+ int ret;
+
+ /* if mode is MPOL_DEFAULT, pol is NULL. This is right. */
+ if (pol == NULL)
+ return 0;
+ /* Check N_HIGH_MEMORY */
+ nodes_and(nsc->mask1,
+ cpuset_current_mems_allowed, node_states[N_HIGH_MEMORY]);
+
+ VM_BUG_ON(!nodes);
+ if (pol->mode == MPOL_PREFERRED && nodes_empty(*nodes))
+ nodes = NULL; /* explicit local allocation */
+ else {
+ if (pol->flags & MPOL_F_RELATIVE_NODES)
+ mpol_relative_nodemask(&nsc->mask2, nodes,&nsc->mask1);
+ else
+ nodes_and(nsc->mask2, *nodes, nsc->mask1);
+
+ if (mpol_store_user_nodemask(pol))
+ pol->w.user_nodemask = *nodes;
+ else
+ pol->w.cpuset_mems_allowed =
+ cpuset_current_mems_allowed;
+ }
+
+ if (nodes)
+ ret = mpol_ops[pol->mode].create(pol, &nsc->mask2);
+ else
+ ret = mpol_ops[pol->mode].create(pol, NULL);
+ return ret;
+}
+
+/*
+ * This function just creates a new policy, does some check and simple
+ * initialization. You must invoke mpol_set_nodemask() to set nodes.
+ */
static struct mempolicy *mpol_new(unsigned short mode, unsigned short flags,
nodemask_t *nodes)
{
struct mempolicy *policy;
- nodemask_t cpuset_context_nmask;
- int ret;
pr_debug("setting mode %d flags %d nodes[0] %lx\n",
mode, flags, nodes ? nodes_addr(*nodes)[0] : -1);
@@ -210,7 +255,6 @@ static struct mempolicy *mpol_new(unsigned short mode, unsigned short flags,
if (((flags & MPOL_F_STATIC_NODES) ||
(flags & MPOL_F_RELATIVE_NODES)))
return ERR_PTR(-EINVAL);
- nodes = NULL; /* flag local alloc */
}
} else if (nodes_empty(*nodes))
return ERR_PTR(-EINVAL);
@@ -221,30 +265,6 @@ static struct mempolicy *mpol_new(unsigned short mode, unsigned short flags,
policy->mode = mode;
policy->flags = flags;
- if (nodes) {
- /*
- * cpuset related setup doesn't apply to local allocation
- */
- cpuset_update_task_memory_state();
- if (flags & MPOL_F_RELATIVE_NODES)
- mpol_relative_nodemask(&cpuset_context_nmask, nodes,
- &cpuset_current_mems_allowed);
- else
- nodes_and(cpuset_context_nmask, *nodes,
- cpuset_current_mems_allowed);
- if (mpol_store_user_nodemask(policy))
- policy->w.user_nodemask = *nodes;
- else
- policy->w.cpuset_mems_allowed =
- cpuset_mems_allowed(current);
- }
-
- ret = mpol_ops[mode].create(policy,
- nodes ? &cpuset_context_nmask : NULL);
- if (ret < 0) {
- kmem_cache_free(policy_cache, policy);
- return ERR_PTR(ret);
- }
return policy;
}
@@ -324,6 +344,8 @@ static void mpol_rebind_policy(struct mempolicy *pol,
/*
* Wrapper for mpol_rebind_policy() that just requires task
* pointer, and updates task mempolicy.
+ *
+ * Called with task's alloc_lock held.
*/
void mpol_rebind_task(struct task_struct *tsk, const nodemask_t *new)
@@ -600,13 +622,19 @@ static void mpol_set_task_struct_flag(void)
static long do_set_mempolicy(unsigned short mode, unsigned short flags,
nodemask_t *nodes)
{
- struct mempolicy *new;
+ struct mempolicy *new, *old;
struct mm_struct *mm = current->mm;
+ NODEMASK_SCRATCH(scratch);
+ int ret;
- new = mpol_new(mode, flags, nodes);
- if (IS_ERR(new))
- return PTR_ERR(new);
+ if (!scratch)
+ return -ENOMEM;
+ new = mpol_new(mode, flags, nodes);
+ if (IS_ERR(new)) {
+ ret = PTR_ERR(new);
+ goto out;
+ }
/*
* prevent changing our mempolicy while show_numa_maps()
* is using it.
@@ -615,20 +643,36 @@ static long do_set_mempolicy(unsigned short mode, unsigned short flags,
*/
if (mm)
down_write(&mm->mmap_sem);
- mpol_put(current->mempolicy);
+ task_lock(current);
+ ret = mpol_set_nodemask(new, nodes, scratch);
+ if (ret) {
+ task_unlock(current);
+ if (mm)
+ up_write(&mm->mmap_sem);
+ mpol_put(new);
+ goto out;
+ }
+ old = current->mempolicy;
current->mempolicy = new;
mpol_set_task_struct_flag();
if (new && new->mode == MPOL_INTERLEAVE &&
nodes_weight(new->v.nodes))
current->il_next = first_node(new->v.nodes);
+ task_unlock(current);
if (mm)
up_write(&mm->mmap_sem);
- return 0;
+ mpol_put(old);
+ ret = 0;
+out:
+ NODEMASK_SCRATCH_FREE(scratch);
+ return ret;
}
/*
* Return nodemask for policy for get_mempolicy() query
+ *
+ * Called with task's alloc_lock held
*/
static void get_policy_nodemask(struct mempolicy *p, nodemask_t *nodes)
{
@@ -674,7 +718,6 @@ static long do_get_mempolicy(int *policy, nodemask_t *nmask,
struct vm_area_struct *vma = NULL;
struct mempolicy *pol = current->mempolicy;
- cpuset_update_task_memory_state();
if (flags &
~(unsigned long)(MPOL_F_NODE|MPOL_F_ADDR|MPOL_F_MEMS_ALLOWED))
return -EINVAL;
@@ -683,7 +726,9 @@ static long do_get_mempolicy(int *policy, nodemask_t *nmask,
if (flags & (MPOL_F_NODE|MPOL_F_ADDR))
return -EINVAL;
*policy = 0; /* just so it's initialized */
+ task_lock(current);
*nmask = cpuset_current_mems_allowed;
+ task_unlock(current);
return 0;
}
@@ -738,8 +783,11 @@ static long do_get_mempolicy(int *policy, nodemask_t *nmask,
}
err = 0;
- if (nmask)
+ if (nmask) {
+ task_lock(current);
get_policy_nodemask(pol, nmask);
+ task_unlock(current);
+ }
out:
mpol_cond_put(pol);
@@ -767,7 +815,7 @@ static void migrate_page_add(struct page *page, struct list_head *pagelist,
static struct page *new_node_page(struct page *page, unsigned long node, int **x)
{
- return alloc_pages_node(node, GFP_HIGHUSER_MOVABLE, 0);
+ return alloc_pages_exact_node(node, GFP_HIGHUSER_MOVABLE, 0);
}
/*
@@ -978,7 +1026,23 @@ static long do_mbind(unsigned long start, unsigned long len,
if (err)
return err;
}
- down_write(&mm->mmap_sem);
+ {
+ NODEMASK_SCRATCH(scratch);
+ if (scratch) {
+ down_write(&mm->mmap_sem);
+ task_lock(current);
+ err = mpol_set_nodemask(new, nmask, scratch);
+ task_unlock(current);
+ if (err)
+ up_write(&mm->mmap_sem);
+ } else
+ err = -ENOMEM;
+ NODEMASK_SCRATCH_FREE(scratch);
+ }
+ if (err) {
+ mpol_put(new);
+ return err;
+ }
vma = check_range(mm, start, end, nmask,
flags | MPOL_MF_INVERT, &pagelist);
@@ -1545,8 +1609,6 @@ alloc_page_vma(gfp_t gfp, struct vm_area_struct *vma, unsigned long addr)
struct mempolicy *pol = get_vma_policy(current, vma, addr);
struct zonelist *zl;
- cpuset_update_task_memory_state();
-
if (unlikely(pol->mode == MPOL_INTERLEAVE)) {
unsigned nid;
@@ -1593,8 +1655,6 @@ struct page *alloc_pages_current(gfp_t gfp, unsigned order)
{
struct mempolicy *pol = current->mempolicy;
- if ((gfp & __GFP_WAIT) && !in_interrupt())
- cpuset_update_task_memory_state();
if (!pol || in_interrupt() || (gfp & __GFP_THISNODE))
pol = &default_policy;
@@ -1851,27 +1911,46 @@ restart:
* Install non-NULL @mpol in inode's shared policy rb-tree.
* On entry, the current task has a reference on a non-NULL @mpol.
* This must be released on exit.
+ * This is called at get_inode() calls and we can use GFP_KERNEL.
*/
void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol)
{
+ int ret;
+
sp->root = RB_ROOT; /* empty tree == default mempolicy */
spin_lock_init(&sp->lock);
if (mpol) {
struct vm_area_struct pvma;
struct mempolicy *new;
+ NODEMASK_SCRATCH(scratch);
+ if (!scratch)
+ return;
/* contextualize the tmpfs mount point mempolicy */
new = mpol_new(mpol->mode, mpol->flags, &mpol->w.user_nodemask);
- mpol_put(mpol); /* drop our ref on sb mpol */
- if (IS_ERR(new))
+ if (IS_ERR(new)) {
+ mpol_put(mpol); /* drop our ref on sb mpol */
+ NODEMASK_SCRATCH_FREE(scratch);
return; /* no valid nodemask intersection */
+ }
+
+ task_lock(current);
+ ret = mpol_set_nodemask(new, &mpol->w.user_nodemask, scratch);
+ task_unlock(current);
+ mpol_put(mpol); /* drop our ref on sb mpol */
+ if (ret) {
+ NODEMASK_SCRATCH_FREE(scratch);
+ mpol_put(new);
+ return;
+ }
/* Create pseudo-vma that contains just the policy */
memset(&pvma, 0, sizeof(struct vm_area_struct));
pvma.vm_end = TASK_SIZE; /* policy covers entire file */
mpol_set_shared_policy(sp, &pvma, new); /* adds ref */
mpol_put(new); /* drop initial ref */
+ NODEMASK_SCRATCH_FREE(scratch);
}
}
@@ -2086,8 +2165,24 @@ int mpol_parse_str(char *str, struct mempolicy **mpol, int no_context)
new = mpol_new(mode, mode_flags, &nodes);
if (IS_ERR(new))
err = 1;
- else if (no_context)
- new->w.user_nodemask = nodes; /* save for contextualization */
+ else {
+ int ret;
+ NODEMASK_SCRATCH(scratch);
+ if (scratch) {
+ task_lock(current);
+ ret = mpol_set_nodemask(new, &nodes, scratch);
+ task_unlock(current);
+ } else
+ ret = -ENOMEM;
+ NODEMASK_SCRATCH_FREE(scratch);
+ if (ret) {
+ err = 1;
+ mpol_put(new);
+ } else if (no_context) {
+ /* save for contextualization */
+ new->w.user_nodemask = nodes;
+ }
+ }
out:
/* Restore string for error message */