From ef6a3c63112e865d632ff7c478ba7c7160cad0d1 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 22 Mar 2011 16:30:52 -0700 Subject: mm: add replace_page_cache_page() function This function basically does: remove_from_page_cache(old); page_cache_release(old); add_to_page_cache_locked(new); Except it does this atomically, so there's no possibility for the "add" to fail because of a race. If memory cgroups are enabled, then the memory cgroup charge is also moved from the old page to the new. This function is currently used by fuse to move pages into the page cache on read, instead of copying the page contents. [minchan.kim@gmail.com: add freepage() hook to replace_page_cache_page()] Signed-off-by: Miklos Szeredi Acked-by: Rik van Riel Acked-by: KAMEZAWA Hiroyuki Cc: Mel Gorman Signed-off-by: Minchan Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/migrate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mm/migrate.c') diff --git a/mm/migrate.c b/mm/migrate.c index 352de555626..8aacce3af8c 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -678,7 +678,7 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private, } /* charge against new page */ - charge = mem_cgroup_prepare_migration(page, newpage, &mem); + charge = mem_cgroup_prepare_migration(page, newpage, &mem, GFP_KERNEL); if (charge == -ENOMEM) { rc = -ENOMEM; goto unlock; -- cgit v1.2.3-70-g09d2 From 9e60109f125013b6c571f399a15a8b0fe1ffa4e6 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 22 Mar 2011 16:32:46 -0700 Subject: mm: rename drop_anon_vma() to put_anon_vma() The normal code pattern used in the kernel is: get/put. Signed-off-by: Peter Zijlstra Reviewed-by: KAMEZAWA Hiroyuki Acked-by: Hugh Dickins Reviewed-by: Rik van Riel Acked-by: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/rmap.h | 4 ++-- mm/ksm.c | 23 +++++------------------ mm/migrate.c | 4 ++-- mm/rmap.c | 4 ++-- 4 files changed, 11 insertions(+), 24 deletions(-) (limited to 'mm/migrate.c') diff --git a/include/linux/rmap.h b/include/linux/rmap.h index e9fd04ca1e5..b9b23ddca63 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -87,7 +87,7 @@ static inline void get_anon_vma(struct anon_vma *anon_vma) atomic_inc(&anon_vma->external_refcount); } -void drop_anon_vma(struct anon_vma *); +void put_anon_vma(struct anon_vma *); #else static inline void anonvma_external_refcount_init(struct anon_vma *anon_vma) { @@ -102,7 +102,7 @@ static inline void get_anon_vma(struct anon_vma *anon_vma) { } -static inline void drop_anon_vma(struct anon_vma *anon_vma) +static inline void put_anon_vma(struct anon_vma *anon_vma) { } #endif /* CONFIG_KSM */ diff --git a/mm/ksm.c b/mm/ksm.c index c2b2a94f9d6..1bbe785aa55 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -301,20 +301,6 @@ static inline int in_stable_tree(struct rmap_item *rmap_item) return rmap_item->address & STABLE_FLAG; } -static void hold_anon_vma(struct rmap_item *rmap_item, - struct anon_vma *anon_vma) -{ - rmap_item->anon_vma = anon_vma; - get_anon_vma(anon_vma); -} - -static void ksm_drop_anon_vma(struct rmap_item *rmap_item) -{ - struct anon_vma *anon_vma = rmap_item->anon_vma; - - drop_anon_vma(anon_vma); -} - /* * ksmd, and unmerge_and_remove_all_rmap_items(), must not touch an mm's * page tables after it has passed through ksm_exit() - which, if necessary, @@ -397,7 +383,7 @@ static void break_cow(struct rmap_item *rmap_item) * It is not an accident that whenever we want to break COW * to undo, we also need to drop a reference to the anon_vma. */ - ksm_drop_anon_vma(rmap_item); + put_anon_vma(rmap_item->anon_vma); down_read(&mm->mmap_sem); if (ksm_test_exit(mm)) @@ -466,7 +452,7 @@ static void remove_node_from_stable_tree(struct stable_node *stable_node) ksm_pages_sharing--; else ksm_pages_shared--; - ksm_drop_anon_vma(rmap_item); + put_anon_vma(rmap_item->anon_vma); rmap_item->address &= PAGE_MASK; cond_resched(); } @@ -554,7 +540,7 @@ static void remove_rmap_item_from_tree(struct rmap_item *rmap_item) else ksm_pages_shared--; - ksm_drop_anon_vma(rmap_item); + put_anon_vma(rmap_item->anon_vma); rmap_item->address &= PAGE_MASK; } else if (rmap_item->address & UNSTABLE_FLAG) { @@ -949,7 +935,8 @@ static int try_to_merge_with_ksm_page(struct rmap_item *rmap_item, goto out; /* Must get reference to anon_vma while still holding mmap_sem */ - hold_anon_vma(rmap_item, vma->anon_vma); + rmap_item->anon_vma = vma->anon_vma; + get_anon_vma(vma->anon_vma); out: up_read(&mm->mmap_sem); return err; diff --git a/mm/migrate.c b/mm/migrate.c index 8aacce3af8c..7d2983f3783 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -764,7 +764,7 @@ skip_unmap: /* Drop an anon_vma reference if we took one */ if (anon_vma) - drop_anon_vma(anon_vma); + put_anon_vma(anon_vma); uncharge: if (!charge) @@ -856,7 +856,7 @@ static int unmap_and_move_huge_page(new_page_t get_new_page, remove_migration_ptes(hpage, hpage); if (anon_vma) - drop_anon_vma(anon_vma); + put_anon_vma(anon_vma); out: unlock_page(hpage); diff --git a/mm/rmap.c b/mm/rmap.c index 941bf82e896..ad416afb206 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -278,7 +278,7 @@ static void anon_vma_unlink(struct anon_vma_chain *anon_vma_chain) if (empty) { /* We no longer need the root anon_vma */ if (anon_vma->root != anon_vma) - drop_anon_vma(anon_vma->root); + put_anon_vma(anon_vma->root); anon_vma_free(anon_vma); } } @@ -1493,7 +1493,7 @@ int try_to_munlock(struct page *page) * we know we are the last user, nobody else can get a reference and we * can do the freeing without the lock. */ -void drop_anon_vma(struct anon_vma *anon_vma) +void put_anon_vma(struct anon_vma *anon_vma) { BUG_ON(atomic_read(&anon_vma->external_refcount) <= 0); if (atomic_dec_and_lock(&anon_vma->external_refcount, &anon_vma->root->lock)) { -- cgit v1.2.3-70-g09d2 From 11bc82d67d1150767901bca54a24466621d763d7 Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Tue, 22 Mar 2011 16:33:11 -0700 Subject: mm: compaction: Use async migration for __GFP_NO_KSWAPD and enforce no writeback __GFP_NO_KSWAPD allocations are usually very expensive and not mandatory to succeed as they have graceful fallback. Waiting for I/O in those, tends to be overkill in terms of latencies, so we can reduce their latency by disabling sync migrate. Unfortunately, even with async migration it's still possible for the process to be blocked waiting for a request slot (e.g. get_request_wait in the block layer) when ->writepage is called. To prevent __GFP_NO_KSWAPD blocking, this patch prevents ->writepage being called on dirty page cache for asynchronous migration. Addresses https://bugzilla.kernel.org/show_bug.cgi?id=31142 [mel@csn.ul.ie: Avoid writebacks for NFS, retry locked pages, use bool] Signed-off-by: Andrea Arcangeli Signed-off-by: Mel Gorman Cc: Arthur Marsh Cc: Clemens Ladisch Cc: Johannes Weiner Cc: KAMEZAWA Hiroyuki Cc: Minchan Kim Reported-by: Alex Villacis Lasso Tested-by: Alex Villacis Lasso Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/migrate.c | 48 +++++++++++++++++++++++++++++++++--------------- mm/page_alloc.c | 2 +- 2 files changed, 34 insertions(+), 16 deletions(-) (limited to 'mm/migrate.c') diff --git a/mm/migrate.c b/mm/migrate.c index 7d2983f3783..89e5c3fe8bb 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -564,7 +564,7 @@ static int fallback_migrate_page(struct address_space *mapping, * == 0 - success */ static int move_to_new_page(struct page *newpage, struct page *page, - int remap_swapcache) + int remap_swapcache, bool sync) { struct address_space *mapping; int rc; @@ -586,18 +586,28 @@ static int move_to_new_page(struct page *newpage, struct page *page, mapping = page_mapping(page); if (!mapping) rc = migrate_page(mapping, newpage, page); - else if (mapping->a_ops->migratepage) + else { /* - * Most pages have a mapping and most filesystems - * should provide a migration function. Anonymous - * pages are part of swap space which also has its - * own migration function. This is the most common - * path for page migration. + * Do not writeback pages if !sync and migratepage is + * not pointing to migrate_page() which is nonblocking + * (swapcache/tmpfs uses migratepage = migrate_page). */ - rc = mapping->a_ops->migratepage(mapping, - newpage, page); - else - rc = fallback_migrate_page(mapping, newpage, page); + if (PageDirty(page) && !sync && + mapping->a_ops->migratepage != migrate_page) + rc = -EBUSY; + else if (mapping->a_ops->migratepage) + /* + * Most pages have a mapping and most filesystems + * should provide a migration function. Anonymous + * pages are part of swap space which also has its + * own migration function. This is the most common + * path for page migration. + */ + rc = mapping->a_ops->migratepage(mapping, + newpage, page); + else + rc = fallback_migrate_page(mapping, newpage, page); + } if (rc) { newpage->mapping = NULL; @@ -641,7 +651,7 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private, rc = -EAGAIN; if (!trylock_page(page)) { - if (!force) + if (!force || !sync) goto move_newpage; /* @@ -686,7 +696,15 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private, BUG_ON(charge); if (PageWriteback(page)) { - if (!force || !sync) + /* + * For !sync, there is no point retrying as the retry loop + * is expected to be too short for PageWriteback to be cleared + */ + if (!sync) { + rc = -EBUSY; + goto uncharge; + } + if (!force) goto uncharge; wait_on_page_writeback(page); } @@ -757,7 +775,7 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private, skip_unmap: if (!page_mapped(page)) - rc = move_to_new_page(newpage, page, remap_swapcache); + rc = move_to_new_page(newpage, page, remap_swapcache, sync); if (rc && remap_swapcache) remove_migration_ptes(page, page); @@ -850,7 +868,7 @@ static int unmap_and_move_huge_page(new_page_t get_new_page, try_to_unmap(hpage, TTU_MIGRATION|TTU_IGNORE_MLOCK|TTU_IGNORE_ACCESS); if (!page_mapped(hpage)) - rc = move_to_new_page(new_hpage, hpage, 1); + rc = move_to_new_page(new_hpage, hpage, 1, sync); if (rc) remove_migration_ptes(hpage, hpage); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 426056aff12..6d0032bdb5d 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2103,7 +2103,7 @@ rebalance: sync_migration); if (page) goto got_pg; - sync_migration = true; + sync_migration = !(gfp_mask & __GFP_NO_KSWAPD); /* Try direct reclaim and then allocating */ page = __alloc_pages_direct_reclaim(gfp_mask, order, -- cgit v1.2.3-70-g09d2 From 56039efa18f2530fc23e8ef19e716b65ee2a1d1e Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Wed, 23 Mar 2011 16:42:19 -0700 Subject: memcg: fix ugly initialization of return value is in caller Remove initialization of vaiable in caller of memory cgroup function. Actually, it's return value of memcg function but it's initialized in caller. Some memory cgroup uses following style to bring the result of start function to the end function for avoiding races. mem_cgroup_start_A(&(*ptr)) /* Something very complicated can happen here. */ mem_cgroup_end_A(*ptr) In some calls, *ptr should be initialized to NULL be caller. But it's ugly. This patch fixes that *ptr is initialized by _start function. Signed-off-by: KAMEZAWA Hiroyuki Acked-by: Johannes Weiner Acked-by: Daisuke Nishimura Cc: Balbir Singh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 8 ++++++-- mm/memory.c | 2 +- mm/migrate.c | 2 +- mm/swapfile.c | 2 +- 4 files changed, 9 insertions(+), 5 deletions(-) (limited to 'mm/migrate.c') diff --git a/mm/memcontrol.c b/mm/memcontrol.c index e1ee6ad9c97..b56bd74b486 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -2475,7 +2475,7 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, /* shmem */ if (PageSwapCache(page)) { - struct mem_cgroup *mem = NULL; + struct mem_cgroup *mem; ret = mem_cgroup_try_charge_swapin(mm, page, gfp_mask, &mem); if (!ret) @@ -2501,6 +2501,8 @@ int mem_cgroup_try_charge_swapin(struct mm_struct *mm, struct mem_cgroup *mem; int ret; + *ptr = NULL; + if (mem_cgroup_disabled()) return 0; @@ -2916,6 +2918,8 @@ int mem_cgroup_prepare_migration(struct page *page, enum charge_type ctype; int ret = 0; + *ptr = NULL; + VM_BUG_ON(PageTransHuge(page)); if (mem_cgroup_disabled()) return 0; @@ -3058,7 +3062,7 @@ int mem_cgroup_shmem_charge_fallback(struct page *page, struct mm_struct *mm, gfp_t gfp_mask) { - struct mem_cgroup *mem = NULL; + struct mem_cgroup *mem; int ret; if (mem_cgroup_disabled()) diff --git a/mm/memory.c b/mm/memory.c index 615be5127ce..20d5f7499ce 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2767,7 +2767,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, swp_entry_t entry; pte_t pte; int locked; - struct mem_cgroup *ptr = NULL; + struct mem_cgroup *ptr; int exclusive = 0; int ret = 0; diff --git a/mm/migrate.c b/mm/migrate.c index 89e5c3fe8bb..b0406d739ea 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -633,7 +633,7 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private, struct page *newpage = get_new_page(page, private, &result); int remap_swapcache = 1; int charge = 0; - struct mem_cgroup *mem = NULL; + struct mem_cgroup *mem; struct anon_vma *anon_vma = NULL; if (!newpage) diff --git a/mm/swapfile.c b/mm/swapfile.c index 71b42ec55b7..039e6167763 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -880,7 +880,7 @@ unsigned int count_swap_pages(int type, int free) static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr, swp_entry_t entry, struct page *page) { - struct mem_cgroup *ptr = NULL; + struct mem_cgroup *ptr; spinlock_t *ptl; pte_t *pte; int ret = 1; -- cgit v1.2.3-70-g09d2