summaryrefslogtreecommitdiffstats
path: root/mm/mempolicy.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/mempolicy.c')
-rw-r--r--mm/mempolicy.c51
1 files changed, 34 insertions, 17 deletions
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index ec4a1a950df..cf18f094255 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -105,7 +105,7 @@ static struct kmem_cache *sn_cache;
/* Highest zone. An specific allocation for a zone below that is not
policied. */
-int policy_zone = ZONE_DMA;
+enum zone_type policy_zone = ZONE_DMA;
struct mempolicy default_policy = {
.refcnt = ATOMIC_INIT(1), /* never free it */
@@ -137,7 +137,8 @@ static int mpol_check_policy(int mode, nodemask_t *nodes)
static struct zonelist *bind_zonelist(nodemask_t *nodes)
{
struct zonelist *zl;
- int num, max, nd, k;
+ int num, max, nd;
+ enum zone_type k;
max = 1 + MAX_NR_ZONES * nodes_weight(*nodes);
zl = kmalloc(sizeof(struct zone *) * max, GFP_KERNEL);
@@ -148,12 +149,16 @@ static struct zonelist *bind_zonelist(nodemask_t *nodes)
lower zones etc. Avoid empty zones because the memory allocator
doesn't like them. If you implement node hot removal you
have to fix that. */
- for (k = policy_zone; k >= 0; k--) {
+ k = policy_zone;
+ while (1) {
for_each_node_mask(nd, *nodes) {
struct zone *z = &NODE_DATA(nd)->node_zones[k];
if (z->present_pages > 0)
zl->zones[num++] = z;
}
+ if (k == 0)
+ break;
+ k--;
}
zl->zones[num] = NULL;
return zl;
@@ -482,7 +487,7 @@ static void get_zonemask(struct mempolicy *p, nodemask_t *nodes)
switch (p->policy) {
case MPOL_BIND:
for (i = 0; p->v.zonelist->zones[i]; i++)
- node_set(p->v.zonelist->zones[i]->zone_pgdat->node_id,
+ node_set(zone_to_nid(p->v.zonelist->zones[i]),
*nodes);
break;
case MPOL_DEFAULT:
@@ -632,6 +637,10 @@ int do_migrate_pages(struct mm_struct *mm,
down_read(&mm->mmap_sem);
+ err = migrate_vmas(mm, from_nodes, to_nodes, flags);
+ if (err)
+ goto out;
+
/*
* Find a 'source' bit set in 'tmp' whose corresponding 'dest'
* bit in 'to' is not also set in 'tmp'. Clear the found 'source'
@@ -691,7 +700,7 @@ int do_migrate_pages(struct mm_struct *mm,
if (err < 0)
break;
}
-
+out:
up_read(&mm->mmap_sem);
if (err < 0)
return err;
@@ -1127,7 +1136,9 @@ static unsigned interleave_nodes(struct mempolicy *policy)
*/
unsigned slab_node(struct mempolicy *policy)
{
- switch (policy->policy) {
+ int pol = policy ? policy->policy : MPOL_DEFAULT;
+
+ switch (pol) {
case MPOL_INTERLEAVE:
return interleave_nodes(policy);
@@ -1136,7 +1147,7 @@ unsigned slab_node(struct mempolicy *policy)
* Follow bind policy behavior and start allocation at the
* first node.
*/
- return policy->v.zonelist->zones[0]->zone_pgdat->node_id;
+ return zone_to_nid(policy->v.zonelist->zones[0]);
case MPOL_PREFERRED:
if (policy->v.preferred_node >= 0)
@@ -1172,7 +1183,15 @@ static inline unsigned interleave_nid(struct mempolicy *pol,
if (vma) {
unsigned long off;
- off = vma->vm_pgoff;
+ /*
+ * for small pages, there is no difference between
+ * shift and PAGE_SHIFT, so the bit-shift is safe.
+ * for huge pages, since vm_pgoff is in units of small
+ * pages, we need to shift off the always 0 bits to get
+ * a useful offset.
+ */
+ BUG_ON(shift < PAGE_SHIFT);
+ off = vma->vm_pgoff >> (shift - PAGE_SHIFT);
off += (addr - vma->vm_start) >> shift;
return offset_il_node(pol, vma, off);
} else
@@ -1205,10 +1224,8 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order,
zl = NODE_DATA(nid)->node_zonelists + gfp_zone(gfp);
page = __alloc_pages(gfp, order, zl);
- if (page && page_zone(page) == zl->zones[0]) {
- zone_pcp(zl->zones[0],get_cpu())->interleave_hit++;
- put_cpu();
- }
+ if (page && page_zone(page) == zl->zones[0])
+ inc_zone_page_state(page, NUMA_INTERLEAVE_HIT);
return page;
}
@@ -1275,7 +1292,7 @@ struct page *alloc_pages_current(gfp_t gfp, unsigned order)
if ((gfp & __GFP_WAIT) && !in_interrupt())
cpuset_update_task_memory_state();
- if (!pol || in_interrupt())
+ if (!pol || in_interrupt() || (gfp & __GFP_THISNODE))
pol = &default_policy;
if (pol->policy == MPOL_INTERLEAVE)
return alloc_page_interleave(gfp, order, interleave_nodes(pol));
@@ -1634,7 +1651,7 @@ void mpol_rebind_policy(struct mempolicy *pol, const nodemask_t *newmask)
nodes_clear(nodes);
for (z = pol->v.zonelist->zones; *z; z++)
- node_set((*z)->zone_pgdat->node_id, nodes);
+ node_set(zone_to_nid(*z), nodes);
nodes_remap(tmp, nodes, *mpolmask, *newmask);
nodes = tmp;
@@ -1817,7 +1834,7 @@ static inline void check_huge_range(struct vm_area_struct *vma,
int show_numa_map(struct seq_file *m, void *v)
{
- struct task_struct *task = m->private;
+ struct proc_maps_private *priv = m->private;
struct vm_area_struct *vma = v;
struct numa_maps *md;
struct file *file = vma->vm_file;
@@ -1833,7 +1850,7 @@ int show_numa_map(struct seq_file *m, void *v)
return 0;
mpol_to_str(buffer, sizeof(buffer),
- get_vma_policy(task, vma, vma->vm_start));
+ get_vma_policy(priv->task, vma, vma->vm_start));
seq_printf(m, "%08lx %s", vma->vm_start, buffer);
@@ -1887,7 +1904,7 @@ out:
kfree(md);
if (m->count < m->size)
- m->version = (vma != get_gate_vma(task)) ? vma->vm_start : 0;
+ m->version = (vma != priv->tail_vma) ? vma->vm_start : 0;
return 0;
}