diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/filemap.c | 5 | ||||
-rw-r--r-- | mm/hugetlb.c | 3 | ||||
-rw-r--r-- | mm/ksm.c | 7 | ||||
-rw-r--r-- | mm/memcontrol.c | 66 | ||||
-rw-r--r-- | mm/memory-failure.c | 8 | ||||
-rw-r--r-- | mm/memory_hotplug.c | 31 | ||||
-rw-r--r-- | mm/mempolicy.c | 3 | ||||
-rw-r--r-- | mm/mmap.c | 16 | ||||
-rw-r--r-- | mm/nommu.c | 1 | ||||
-rw-r--r-- | mm/page_alloc.c | 33 | ||||
-rw-r--r-- | mm/pagewalk.c | 5 | ||||
-rw-r--r-- | mm/slub.c | 4 | ||||
-rw-r--r-- | mm/truncate.c | 4 | ||||
-rw-r--r-- | mm/vmalloc.c | 28 | ||||
-rw-r--r-- | mm/vmscan.c | 7 | ||||
-rw-r--r-- | mm/vmstat.c | 4 |
16 files changed, 155 insertions, 70 deletions
diff --git a/mm/filemap.c b/mm/filemap.c index ea89840fc65..6b9aee20f24 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -143,13 +143,18 @@ void __remove_from_page_cache(struct page *page) void remove_from_page_cache(struct page *page) { struct address_space *mapping = page->mapping; + void (*freepage)(struct page *); BUG_ON(!PageLocked(page)); + freepage = mapping->a_ops->freepage; spin_lock_irq(&mapping->tree_lock); __remove_from_page_cache(page); spin_unlock_irq(&mapping->tree_lock); mem_cgroup_uncharge_cache_page(page); + + if (freepage) + freepage(page); } EXPORT_SYMBOL(remove_from_page_cache); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index c4a3558589a..85855240933 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -2738,7 +2738,8 @@ out_page_table_lock: unlock_page(pagecache_page); put_page(pagecache_page); } - unlock_page(page); + if (page != pagecache_page) + unlock_page(page); out_mutex: mutex_unlock(&hugetlb_instantiation_mutex); @@ -1724,8 +1724,13 @@ static int ksm_memory_callback(struct notifier_block *self, /* * Keep it very simple for now: just lock out ksmd and * MADV_UNMERGEABLE while any memory is going offline. + * mutex_lock_nested() is necessary because lockdep was alarmed + * that here we take ksm_thread_mutex inside notifier chain + * mutex, and later take notifier chain mutex inside + * ksm_thread_mutex to unlock it. But that's safe because both + * are inside mem_hotplug_mutex. */ - mutex_lock(&ksm_thread_mutex); + mutex_lock_nested(&ksm_thread_mutex, SINGLE_DEPTH_NESTING); break; case MEM_OFFLINE: diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 2efa8ea07ff..7a22b412921 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -61,7 +61,14 @@ struct mem_cgroup *root_mem_cgroup __read_mostly; #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP /* Turned on only when memory cgroup is enabled && really_do_swap_account = 1 */ int do_swap_account __read_mostly; -static int really_do_swap_account __initdata = 1; /* for remember boot option*/ + +/* for remember boot option*/ +#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP_ENABLED +static int really_do_swap_account __initdata = 1; +#else +static int really_do_swap_account __initdata = 0; +#endif + #else #define do_swap_account (0) #endif @@ -278,13 +285,14 @@ enum move_type { /* "mc" and its members are protected by cgroup_mutex */ static struct move_charge_struct { - spinlock_t lock; /* for from, to, moving_task */ + spinlock_t lock; /* for from, to */ struct mem_cgroup *from; struct mem_cgroup *to; unsigned long precharge; unsigned long moved_charge; unsigned long moved_swap; struct task_struct *moving_task; /* a task moving charges */ + struct mm_struct *mm; wait_queue_head_t waitq; /* a waitq for other context */ } mc = { .lock = __SPIN_LOCK_UNLOCKED(mc.lock), @@ -2152,7 +2160,7 @@ static void __mem_cgroup_move_account(struct page_cgroup *pc, { VM_BUG_ON(from == to); VM_BUG_ON(PageLRU(pc->page)); - VM_BUG_ON(!PageCgroupLocked(pc)); + VM_BUG_ON(!page_is_cgroup_locked(pc)); VM_BUG_ON(!PageCgroupUsed(pc)); VM_BUG_ON(pc->mem_cgroup != from); @@ -4631,7 +4639,7 @@ static unsigned long mem_cgroup_count_precharge(struct mm_struct *mm) unsigned long precharge; struct vm_area_struct *vma; - down_read(&mm->mmap_sem); + /* We've already held the mmap_sem */ for (vma = mm->mmap; vma; vma = vma->vm_next) { struct mm_walk mem_cgroup_count_precharge_walk = { .pmd_entry = mem_cgroup_count_precharge_pte_range, @@ -4643,7 +4651,6 @@ static unsigned long mem_cgroup_count_precharge(struct mm_struct *mm) walk_page_range(vma->vm_start, vma->vm_end, &mem_cgroup_count_precharge_walk); } - up_read(&mm->mmap_sem); precharge = mc.precharge; mc.precharge = 0; @@ -4694,11 +4701,16 @@ static void mem_cgroup_clear_mc(void) mc.moved_swap = 0; } + if (mc.mm) { + up_read(&mc.mm->mmap_sem); + mmput(mc.mm); + } spin_lock(&mc.lock); mc.from = NULL; mc.to = NULL; - mc.moving_task = NULL; spin_unlock(&mc.lock); + mc.moving_task = NULL; + mc.mm = NULL; mem_cgroup_end_move(from); memcg_oom_recover(from); memcg_oom_recover(to); @@ -4724,12 +4736,21 @@ static int mem_cgroup_can_attach(struct cgroup_subsys *ss, return 0; /* We move charges only when we move a owner of the mm */ if (mm->owner == p) { + /* + * We do all the move charge works under one mmap_sem to + * avoid deadlock with down_write(&mmap_sem) + * -> try_charge() -> if (mc.moving_task) -> sleep. + */ + down_read(&mm->mmap_sem); + VM_BUG_ON(mc.from); VM_BUG_ON(mc.to); VM_BUG_ON(mc.precharge); VM_BUG_ON(mc.moved_charge); VM_BUG_ON(mc.moved_swap); VM_BUG_ON(mc.moving_task); + VM_BUG_ON(mc.mm); + mem_cgroup_start_move(from); spin_lock(&mc.lock); mc.from = from; @@ -4737,14 +4758,16 @@ static int mem_cgroup_can_attach(struct cgroup_subsys *ss, mc.precharge = 0; mc.moved_charge = 0; mc.moved_swap = 0; - mc.moving_task = current; spin_unlock(&mc.lock); + mc.moving_task = current; + mc.mm = mm; ret = mem_cgroup_precharge_mc(mm); if (ret) mem_cgroup_clear_mc(); - } - mmput(mm); + /* We call up_read() and mmput() in clear_mc(). */ + } else + mmput(mm); } return ret; } @@ -4832,7 +4855,7 @@ static void mem_cgroup_move_charge(struct mm_struct *mm) struct vm_area_struct *vma; lru_add_drain_all(); - down_read(&mm->mmap_sem); + /* We've already held the mmap_sem */ for (vma = mm->mmap; vma; vma = vma->vm_next) { int ret; struct mm_walk mem_cgroup_move_charge_walk = { @@ -4851,7 +4874,6 @@ static void mem_cgroup_move_charge(struct mm_struct *mm) */ break; } - up_read(&mm->mmap_sem); } static void mem_cgroup_move_task(struct cgroup_subsys *ss, @@ -4860,17 +4882,11 @@ static void mem_cgroup_move_task(struct cgroup_subsys *ss, struct task_struct *p, bool threadgroup) { - struct mm_struct *mm; - - if (!mc.to) + if (!mc.mm) /* no need to move charge */ return; - mm = get_task_mm(p); - if (mm) { - mem_cgroup_move_charge(mm); - mmput(mm); - } + mem_cgroup_move_charge(mc.mm); mem_cgroup_clear_mc(); } #else /* !CONFIG_MMU */ @@ -4911,10 +4927,20 @@ struct cgroup_subsys mem_cgroup_subsys = { }; #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP +static int __init enable_swap_account(char *s) +{ + /* consider enabled if no parameter or 1 is given */ + if (!s || !strcmp(s, "1")) + really_do_swap_account = 1; + else if (!strcmp(s, "0")) + really_do_swap_account = 0; + return 1; +} +__setup("swapaccount", enable_swap_account); static int __init disable_swap_account(char *s) { - really_do_swap_account = 0; + enable_swap_account("0"); return 1; } __setup("noswapaccount", disable_swap_account); diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 124324134ff..46ab2c044b0 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -51,6 +51,7 @@ #include <linux/slab.h> #include <linux/swapops.h> #include <linux/hugetlb.h> +#include <linux/memory_hotplug.h> #include "internal.h" int sysctl_memory_failure_early_kill __read_mostly = 0; @@ -1230,11 +1231,10 @@ static int get_any_page(struct page *p, unsigned long pfn, int flags) return 1; /* - * The lock_system_sleep prevents a race with memory hotplug, - * because the isolation assumes there's only a single user. + * The lock_memory_hotplug prevents a race with memory hotplug. * This is a big hammer, a better would be nicer. */ - lock_system_sleep(); + lock_memory_hotplug(); /* * Isolate the page, so that it doesn't get reallocated if it @@ -1264,7 +1264,7 @@ static int get_any_page(struct page *p, unsigned long pfn, int flags) ret = 1; } unset_migratetype_isolate(p); - unlock_system_sleep(); + unlock_memory_hotplug(); return ret; } diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 9260314a221..2c6523af547 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -34,6 +34,23 @@ #include "internal.h" +DEFINE_MUTEX(mem_hotplug_mutex); + +void lock_memory_hotplug(void) +{ + mutex_lock(&mem_hotplug_mutex); + + /* for exclusive hibernation if CONFIG_HIBERNATION=y */ + lock_system_sleep(); +} + +void unlock_memory_hotplug(void) +{ + unlock_system_sleep(); + mutex_unlock(&mem_hotplug_mutex); +} + + /* add this memory to iomem resource */ static struct resource *register_memory_resource(u64 start, u64 size) { @@ -493,7 +510,7 @@ int mem_online_node(int nid) pg_data_t *pgdat; int ret; - lock_system_sleep(); + lock_memory_hotplug(); pgdat = hotadd_new_pgdat(nid, 0); if (pgdat) { ret = -ENOMEM; @@ -504,7 +521,7 @@ int mem_online_node(int nid) BUG_ON(ret); out: - unlock_system_sleep(); + unlock_memory_hotplug(); return ret; } @@ -516,7 +533,7 @@ int __ref add_memory(int nid, u64 start, u64 size) struct resource *res; int ret; - lock_system_sleep(); + lock_memory_hotplug(); res = register_memory_resource(start, size); ret = -EEXIST; @@ -563,7 +580,7 @@ error: release_memory_resource(res); out: - unlock_system_sleep(); + unlock_memory_hotplug(); return ret; } EXPORT_SYMBOL_GPL(add_memory); @@ -791,7 +808,7 @@ static int offline_pages(unsigned long start_pfn, if (!test_pages_in_a_zone(start_pfn, end_pfn)) return -EINVAL; - lock_system_sleep(); + lock_memory_hotplug(); zone = page_zone(pfn_to_page(start_pfn)); node = zone_to_nid(zone); @@ -880,7 +897,7 @@ repeat: writeback_set_ratelimit(); memory_notify(MEM_OFFLINE, &arg); - unlock_system_sleep(); + unlock_memory_hotplug(); return 0; failed_removal: @@ -891,7 +908,7 @@ failed_removal: undo_isolate_page_range(start_pfn, end_pfn); out: - unlock_system_sleep(); + unlock_memory_hotplug(); return ret; } diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 4a57f135b76..11ff260fb28 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -1307,15 +1307,18 @@ SYSCALL_DEFINE4(migrate_pages, pid_t, pid, unsigned long, maxnode, goto out; /* Find the mm_struct */ + rcu_read_lock(); read_lock(&tasklist_lock); task = pid ? find_task_by_vpid(pid) : current; if (!task) { read_unlock(&tasklist_lock); + rcu_read_unlock(); err = -ESRCH; goto out; } mm = get_task_mm(task); read_unlock(&tasklist_lock); + rcu_read_unlock(); err = -EINVAL; if (!mm) diff --git a/mm/mmap.c b/mm/mmap.c index b179abb1474..50a4aa0255a 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -2462,6 +2462,7 @@ int install_special_mapping(struct mm_struct *mm, unsigned long addr, unsigned long len, unsigned long vm_flags, struct page **pages) { + int ret; struct vm_area_struct *vma; vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL); @@ -2479,16 +2480,23 @@ int install_special_mapping(struct mm_struct *mm, vma->vm_ops = &special_mapping_vmops; vma->vm_private_data = pages; - if (unlikely(insert_vm_struct(mm, vma))) { - kmem_cache_free(vm_area_cachep, vma); - return -ENOMEM; - } + ret = security_file_mmap(NULL, 0, 0, 0, vma->vm_start, 1); + if (ret) + goto out; + + ret = insert_vm_struct(mm, vma); + if (ret) + goto out; mm->total_vm += len >> PAGE_SHIFT; perf_event_mmap(vma); return 0; + +out: + kmem_cache_free(vm_area_cachep, vma); + return ret; } static DEFINE_MUTEX(mm_all_locks_mutex); diff --git a/mm/nommu.c b/mm/nommu.c index 3613517c759..27a9ac58851 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -1717,6 +1717,7 @@ void exit_mmap(struct mm_struct *mm) mm->mmap = vma->vm_next; delete_vma_from_mm(vma); delete_vma(mm, vma); + cond_resched(); } kleave(""); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 07a654486f7..ff7e1587239 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -104,19 +104,24 @@ gfp_t gfp_allowed_mask __read_mostly = GFP_BOOT_MASK; * only be modified with pm_mutex held, unless the suspend/hibernate code is * guaranteed not to run in parallel with that modification). */ -void set_gfp_allowed_mask(gfp_t mask) + +static gfp_t saved_gfp_mask; + +void pm_restore_gfp_mask(void) { WARN_ON(!mutex_is_locked(&pm_mutex)); - gfp_allowed_mask = mask; + if (saved_gfp_mask) { + gfp_allowed_mask = saved_gfp_mask; + saved_gfp_mask = 0; + } } -gfp_t clear_gfp_allowed_mask(gfp_t mask) +void pm_restrict_gfp_mask(void) { - gfp_t ret = gfp_allowed_mask; - WARN_ON(!mutex_is_locked(&pm_mutex)); - gfp_allowed_mask &= ~mask; - return ret; + WARN_ON(saved_gfp_mask); + saved_gfp_mask = gfp_allowed_mask; + gfp_allowed_mask &= ~GFP_IOFS; } #endif /* CONFIG_PM_SLEEP */ @@ -3008,14 +3013,6 @@ static __init_refok int __build_all_zonelists(void *data) build_zonelist_cache(pgdat); } -#ifdef CONFIG_MEMORY_HOTPLUG - /* Setup real pagesets for the new zone */ - if (data) { - struct zone *zone = data; - setup_zone_pageset(zone); - } -#endif - /* * Initialize the boot_pagesets that are going to be used * for bootstrapping processors. The real pagesets for @@ -3064,7 +3061,11 @@ void build_all_zonelists(void *data) } else { /* we have to stop all cpus to guarantee there is no user of zonelist */ - stop_machine(__build_all_zonelists, data, NULL); +#ifdef CONFIG_MEMORY_HOTPLUG + if (data) + setup_zone_pageset((struct zone *)data); +#endif + stop_machine(__build_all_zonelists, NULL, NULL); /* cpuset refresh routine should be here */ } vm_total_pages = nr_free_pagecache_pages(); diff --git a/mm/pagewalk.c b/mm/pagewalk.c index 8b1a2ce21ee..38cc58b8b2b 100644 --- a/mm/pagewalk.c +++ b/mm/pagewalk.c @@ -139,7 +139,6 @@ int walk_page_range(unsigned long addr, unsigned long end, pgd_t *pgd; unsigned long next; int err = 0; - struct vm_area_struct *vma; if (addr >= end) return err; @@ -149,15 +148,17 @@ int walk_page_range(unsigned long addr, unsigned long end, pgd = pgd_offset(walk->mm, addr); do { + struct vm_area_struct *uninitialized_var(vma); + next = pgd_addr_end(addr, end); +#ifdef CONFIG_HUGETLB_PAGE /* * handle hugetlb vma individually because pagetable walk for * the hugetlb page is dependent on the architecture and * we can't handled it in the same manner as non-huge pages. */ vma = find_vma(walk->mm, addr); -#ifdef CONFIG_HUGETLB_PAGE if (vma && is_vm_hugetlb_page(vma)) { if (vma->vm_end < next) next = vma->vm_end; diff --git a/mm/slub.c b/mm/slub.c index 981fb730aa0..bec0e355fba 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -3401,13 +3401,13 @@ static int validate_slab(struct kmem_cache *s, struct page *page, for_each_free_object(p, s, page->freelist) { set_bit(slab_index(p, s, addr), map); - if (!check_object(s, page, p, 0)) + if (!check_object(s, page, p, SLUB_RED_INACTIVE)) return 0; } for_each_object(p, s, addr, page->objects) if (!test_bit(slab_index(p, s, addr), map)) - if (!check_object(s, page, p, 1)) + if (!check_object(s, page, p, SLUB_RED_ACTIVE)) return 0; return 1; } diff --git a/mm/truncate.c b/mm/truncate.c index ba887bff48c..3c2d5ddfa0d 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -390,6 +390,10 @@ invalidate_complete_page2(struct address_space *mapping, struct page *page) __remove_from_page_cache(page); spin_unlock_irq(&mapping->tree_lock); mem_cgroup_uncharge_cache_page(page); + + if (mapping->a_ops->freepage) + mapping->a_ops->freepage(page); + page_cache_release(page); /* pagecache ref */ return 1; failed: diff --git a/mm/vmalloc.c b/mm/vmalloc.c index a3d66b3dc5c..eb5cc7d00c5 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -31,8 +31,6 @@ #include <asm/tlbflush.h> #include <asm/shmparam.h> -bool vmap_lazy_unmap __read_mostly = true; - /*** Page table manipulation functions ***/ static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end) @@ -503,9 +501,6 @@ static unsigned long lazy_max_pages(void) { unsigned int log; - if (!vmap_lazy_unmap) - return 0; - log = fls(num_online_cpus()); return log * (32UL * 1024 * 1024 / PAGE_SIZE); @@ -566,7 +561,6 @@ static void __purge_vmap_area_lazy(unsigned long *start, unsigned long *end, if (va->va_end > *end) *end = va->va_end; nr += (va->va_end - va->va_start) >> PAGE_SHIFT; - unmap_vmap_area(va); list_add_tail(&va->purge_list, &valist); va->flags |= VM_LAZY_FREEING; va->flags &= ~VM_LAZY_FREE; @@ -611,10 +605,11 @@ static void purge_vmap_area_lazy(void) } /* - * Free and unmap a vmap area, caller ensuring flush_cache_vunmap had been - * called for the correct range previously. + * Free a vmap area, caller ensuring that the area has been unmapped + * and flush_cache_vunmap had been called for the correct range + * previously. */ -static void free_unmap_vmap_area_noflush(struct vmap_area *va) +static void free_vmap_area_noflush(struct vmap_area *va) { va->flags |= VM_LAZY_FREE; atomic_add((va->va_end - va->va_start) >> PAGE_SHIFT, &vmap_lazy_nr); @@ -623,6 +618,16 @@ static void free_unmap_vmap_area_noflush(struct vmap_area *va) } /* + * Free and unmap a vmap area, caller ensuring flush_cache_vunmap had been + * called for the correct range previously. + */ +static void free_unmap_vmap_area_noflush(struct vmap_area *va) +{ + unmap_vmap_area(va); + free_vmap_area_noflush(va); +} + +/* * Free and unmap a vmap area */ static void free_unmap_vmap_area(struct vmap_area *va) @@ -798,7 +803,7 @@ static void free_vmap_block(struct vmap_block *vb) spin_unlock(&vmap_block_tree_lock); BUG_ON(tmp != vb); - free_unmap_vmap_area_noflush(vb->va); + free_vmap_area_noflush(vb->va); call_rcu(&vb->rcu_head, rcu_free_vb); } @@ -936,6 +941,8 @@ static void vb_free(const void *addr, unsigned long size) rcu_read_unlock(); BUG_ON(!vb); + vunmap_page_range((unsigned long)addr, (unsigned long)addr + size); + spin_lock(&vb->lock); BUG_ON(bitmap_allocate_region(vb->dirty_map, offset >> PAGE_SHIFT, order)); @@ -988,7 +995,6 @@ void vm_unmap_aliases(void) s = vb->va->va_start + (i << PAGE_SHIFT); e = vb->va->va_start + (j << PAGE_SHIFT); - vunmap_page_range(s, e); flush = 1; if (s < start) diff --git a/mm/vmscan.c b/mm/vmscan.c index d31d7ce52c0..9ca587c6927 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -494,9 +494,16 @@ static int __remove_mapping(struct address_space *mapping, struct page *page) spin_unlock_irq(&mapping->tree_lock); swapcache_free(swap, page); } else { + void (*freepage)(struct page *); + + freepage = mapping->a_ops->freepage; + __remove_from_page_cache(page); spin_unlock_irq(&mapping->tree_lock); mem_cgroup_uncharge_cache_page(page); + + if (freepage != NULL) + freepage(page); } return 1; diff --git a/mm/vmstat.c b/mm/vmstat.c index 42eac4d3321..8f62f17ee1c 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -750,8 +750,6 @@ static const char * const vmstat_text[] = { "nr_shmem", "nr_dirtied", "nr_written", - "nr_dirty_threshold", - "nr_dirty_background_threshold", #ifdef CONFIG_NUMA "numa_hit", @@ -761,6 +759,8 @@ static const char * const vmstat_text[] = { "numa_local", "numa_other", #endif + "nr_dirty_threshold", + "nr_dirty_background_threshold", #ifdef CONFIG_VM_EVENT_COUNTERS "pgpgin", |