From 5adc7b518b54f7af2b8395d2035898340d96b1d5 Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Tue, 22 Mar 2011 16:32:41 -0700 Subject: mm: truncate: change remove_from_page_cache This patch series changes remove_from_page_cache()'s page ref counting rule. Page cache ref count is decreased in delete_from_page_cache(). So we don't need to decrease the page reference in callers. Signed-off-by: Minchan Kim Cc: Dan Magenheimer Cc: Andi Kleen Cc: Nick Piggin Cc: Al Viro Acked-by: Hugh Dickins Acked-by: Mel Gorman Reviewed-by: KAMEZAWA Hiroyuki Reviewed-by: Johannes Weiner Reviewed-by: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/truncate.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'mm/truncate.c') diff --git a/mm/truncate.c b/mm/truncate.c index d64296be00d..7c617b5f03e 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -106,9 +106,8 @@ truncate_complete_page(struct address_space *mapping, struct page *page) cancel_dirty_page(page, PAGE_CACHE_SIZE); clear_page_mlock(page); - remove_from_page_cache(page); ClearPageMappedToDisk(page); - page_cache_release(page); /* pagecache ref */ + delete_from_page_cache(page); return 0; } -- cgit v1.2.3-70-g09d2 From e64a782fec684c29a8204c51b3cb554dce588592 Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Tue, 22 Mar 2011 16:32:44 -0700 Subject: mm: change __remove_from_page_cache() Now we renamed remove_from_page_cache with delete_from_page_cache. As consistency of __remove_from_swap_cache and remove_from_swap_cache, we change internal page cache handling function name, too. Signed-off-by: Minchan Kim Cc: Christoph Hellwig Acked-by: Hugh Dickins Acked-by: Mel Gorman Reviewed-by: KAMEZAWA Hiroyuki Reviewed-by: Johannes Weiner Reviewed-by: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/pagemap.h | 2 +- mm/filemap.c | 8 ++++---- mm/memory-failure.c | 2 +- mm/truncate.c | 2 +- mm/vmscan.c | 2 +- 5 files changed, 8 insertions(+), 8 deletions(-) (limited to 'mm/truncate.c') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index ea36ff29a51..29ebba54c23 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -456,7 +456,7 @@ int add_to_page_cache_locked(struct page *page, struct address_space *mapping, int add_to_page_cache_lru(struct page *page, struct address_space *mapping, pgoff_t index, gfp_t gfp_mask); extern void delete_from_page_cache(struct page *page); -extern void __remove_from_page_cache(struct page *page); +extern void __delete_from_page_cache(struct page *page); int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask); /* diff --git a/mm/filemap.c b/mm/filemap.c index 1cfb8fd84b2..cb476e70cf1 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -108,11 +108,11 @@ */ /* - * Remove a page from the page cache and free it. Caller has to make + * Delete a page from the page cache and free it. Caller has to make * sure the page is locked and that nobody else uses it - or that usage * is safe. The caller must hold the mapping's tree_lock. */ -void __remove_from_page_cache(struct page *page) +void __delete_from_page_cache(struct page *page) { struct address_space *mapping = page->mapping; @@ -154,7 +154,7 @@ void delete_from_page_cache(struct page *page) freepage = mapping->a_ops->freepage; spin_lock_irq(&mapping->tree_lock); - __remove_from_page_cache(page); + __delete_from_page_cache(page); spin_unlock_irq(&mapping->tree_lock); mem_cgroup_uncharge_cache_page(page); @@ -444,7 +444,7 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask) new->index = offset; spin_lock_irq(&mapping->tree_lock); - __remove_from_page_cache(old); + __delete_from_page_cache(old); error = radix_tree_insert(&mapping->page_tree, offset, new); BUG_ON(error); mapping->nrpages++; diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 99ccb447262..e0af336530c 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1130,7 +1130,7 @@ int __memory_failure(unsigned long pfn, int trapno, int flags) /* * Now take care of user space mappings. - * Abort on fail: __remove_from_page_cache() assumes unmapped page. + * Abort on fail: __delete_from_page_cache() assumes unmapped page. */ if (hwpoison_user_mappings(p, pfn, trapno) != SWAP_SUCCESS) { printk(KERN_ERR "MCE %#lx: cannot unmap page, give up\n", pfn); diff --git a/mm/truncate.c b/mm/truncate.c index 7c617b5f03e..3d2ae1f423d 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -388,7 +388,7 @@ invalidate_complete_page2(struct address_space *mapping, struct page *page) clear_page_mlock(page); BUG_ON(page_has_private(page)); - __remove_from_page_cache(page); + __delete_from_page_cache(page); spin_unlock_irq(&mapping->tree_lock); mem_cgroup_uncharge_cache_page(page); diff --git a/mm/vmscan.c b/mm/vmscan.c index 3b4a41d7248..665b090b6c7 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -514,7 +514,7 @@ static int __remove_mapping(struct address_space *mapping, struct page *page) freepage = mapping->a_ops->freepage; - __remove_from_page_cache(page); + __delete_from_page_cache(page); spin_unlock_irq(&mapping->tree_lock); mem_cgroup_uncharge_cache_page(page); -- cgit v1.2.3-70-g09d2 From 315601809d124d046abd6c3ffa346d0dbd7aa29d Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Tue, 22 Mar 2011 16:32:52 -0700 Subject: mm: deactivate invalidated pages Recently, there are reported problem about thrashing. (http://marc.info/?l=rsync&m=128885034930933&w=2) It happens by backup workloads(ex, nightly rsync). That's because the workload makes just use-once pages and touches pages twice. It promotes the page into active list so that it results in working set page eviction. Some app developer want to support POSIX_FADV_NOREUSE. But other OSes don't support it, either. (http://marc.info/?l=linux-mm&m=128928979512086&w=2) By other approach, app developers use POSIX_FADV_DONTNEED. But it has a problem. If kernel meets page is writing during invalidate_mapping_pages, it can't work. It makes for application programmer to use it since they always have to sync data before calling fadivse(..POSIX_FADV_DONTNEED) to make sure the pages could be discardable. At last, they can't use deferred write of kernel so that they could see performance loss. (http://insights.oetiker.ch/linux/fadvise.html) In fact, invalidation is very big hint to reclaimer. It means we don't use the page any more. So let's move the writing page into inactive list's head if we can't truncate it right now. Why I move page to head of lru on this patch, Dirty/Writeback page would be flushed sooner or later. It can prevent writeout of pageout which is less effective than flusher's writeout. Originally, I reused lru_demote of Peter with some change so added his Signed-off-by. Signed-off-by: Minchan Kim Reported-by: Ben Gamari Signed-off-by: Peter Zijlstra Acked-by: Rik van Riel Acked-by: Mel Gorman Reviewed-by: KOSAKI Motohiro Cc: Wu Fengguang Acked-by: Johannes Weiner Cc: Nick Piggin Cc: Andrea Arcangeli Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 1 + mm/swap.c | 78 ++++++++++++++++++++++++++++++++++++++++++++++++++++ mm/truncate.c | 17 ++++++++---- 3 files changed, 91 insertions(+), 5 deletions(-) (limited to 'mm/truncate.c') diff --git a/include/linux/swap.h b/include/linux/swap.h index 4d559325d91..c335055c425 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -215,6 +215,7 @@ extern void mark_page_accessed(struct page *); extern void lru_add_drain(void); extern int lru_add_drain_all(void); extern void rotate_reclaimable_page(struct page *page); +extern void deactivate_page(struct page *page); extern void swap_setup(void); extern void add_page_to_unevictable_list(struct page *page); diff --git a/mm/swap.c b/mm/swap.c index c02f93611a8..4aea806d0d4 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -39,6 +39,7 @@ int page_cluster; static DEFINE_PER_CPU(struct pagevec[NR_LRU_LISTS], lru_add_pvecs); static DEFINE_PER_CPU(struct pagevec, lru_rotate_pvecs); +static DEFINE_PER_CPU(struct pagevec, lru_deactivate_pvecs); /* * This path almost never happens for VM activity - pages are normally @@ -346,6 +347,60 @@ void add_page_to_unevictable_list(struct page *page) spin_unlock_irq(&zone->lru_lock); } +/* + * If the page can not be invalidated, it is moved to the + * inactive list to speed up its reclaim. It is moved to the + * head of the list, rather than the tail, to give the flusher + * threads some time to write it out, as this is much more + * effective than the single-page writeout from reclaim. + */ +static void lru_deactivate(struct page *page, struct zone *zone) +{ + int lru, file; + + if (!PageLRU(page) || !PageActive(page)) + return; + + /* Some processes are using the page */ + if (page_mapped(page)) + return; + + file = page_is_file_cache(page); + lru = page_lru_base_type(page); + del_page_from_lru_list(zone, page, lru + LRU_ACTIVE); + ClearPageActive(page); + ClearPageReferenced(page); + add_page_to_lru_list(zone, page, lru); + __count_vm_event(PGDEACTIVATE); + + update_page_reclaim_stat(zone, page, file, 0); +} + +static void ____pagevec_lru_deactivate(struct pagevec *pvec) +{ + int i; + struct zone *zone = NULL; + + for (i = 0; i < pagevec_count(pvec); i++) { + struct page *page = pvec->pages[i]; + struct zone *pagezone = page_zone(page); + + if (pagezone != zone) { + if (zone) + spin_unlock_irq(&zone->lru_lock); + zone = pagezone; + spin_lock_irq(&zone->lru_lock); + } + lru_deactivate(page, zone); + } + if (zone) + spin_unlock_irq(&zone->lru_lock); + + release_pages(pvec->pages, pvec->nr, pvec->cold); + pagevec_reinit(pvec); +} + + /* * Drain pages out of the cpu's pagevecs. * Either "cpu" is the current CPU, and preemption has already been @@ -372,6 +427,29 @@ static void drain_cpu_pagevecs(int cpu) pagevec_move_tail(pvec); local_irq_restore(flags); } + + pvec = &per_cpu(lru_deactivate_pvecs, cpu); + if (pagevec_count(pvec)) + ____pagevec_lru_deactivate(pvec); +} + +/** + * deactivate_page - forcefully deactivate a page + * @page: page to deactivate + * + * This function hints the VM that @page is a good reclaim candidate, + * for example if its invalidation fails due to the page being dirty + * or under writeback. + */ +void deactivate_page(struct page *page) +{ + if (likely(get_page_unless_zero(page))) { + struct pagevec *pvec = &get_cpu_var(lru_deactivate_pvecs); + + if (!pagevec_add(pvec, page)) + ____pagevec_lru_deactivate(pvec); + put_cpu_var(lru_deactivate_pvecs); + } } void lru_add_drain(void) diff --git a/mm/truncate.c b/mm/truncate.c index 3d2ae1f423d..a9566752913 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -321,11 +321,12 @@ EXPORT_SYMBOL(truncate_inode_pages); * pagetables. */ unsigned long invalidate_mapping_pages(struct address_space *mapping, - pgoff_t start, pgoff_t end) + pgoff_t start, pgoff_t end) { struct pagevec pvec; pgoff_t next = start; - unsigned long ret = 0; + unsigned long ret; + unsigned long count = 0; int i; pagevec_init(&pvec, 0); @@ -352,9 +353,15 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping, if (lock_failed) continue; - ret += invalidate_inode_page(page); - + ret = invalidate_inode_page(page); unlock_page(page); + /* + * Invalidation is a hint that the page is no longer + * of interest and try to speed up its reclaim. + */ + if (!ret) + deactivate_page(page); + count += ret; if (next > end) break; } @@ -362,7 +369,7 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping, mem_cgroup_uncharge_end(); cond_resched(); } - return ret; + return count; } EXPORT_SYMBOL(invalidate_mapping_pages); -- cgit v1.2.3-70-g09d2