summaryrefslogtreecommitdiffstats
path: root/mm/vmscan.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/vmscan.c')
-rw-r--r--mm/vmscan.c381
1 files changed, 340 insertions, 41 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c
index be8235fb193..2e34b61a70c 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -71,6 +71,9 @@ struct scan_control {
int may_writepage;
+ /* Can pages be swapped as part of reclaim? */
+ int may_swap;
+
/* This context's SWAP_CLUSTER_MAX. If freeing memory for
* suspend, we effectively ignore SWAP_CLUSTER_MAX.
* In this context, it doesn't matter that we scan the
@@ -180,8 +183,7 @@ EXPORT_SYMBOL(remove_shrinker);
*
* Returns the number of slab objects which we shrunk.
*/
-static int shrink_slab(unsigned long scanned, gfp_t gfp_mask,
- unsigned long lru_pages)
+int shrink_slab(unsigned long scanned, gfp_t gfp_mask, unsigned long lru_pages)
{
struct shrinker *shrinker;
int ret = 0;
@@ -269,9 +271,7 @@ static inline int is_page_cache_freeable(struct page *page)
static int may_write_to_queue(struct backing_dev_info *bdi)
{
- if (current_is_kswapd())
- return 1;
- if (current_is_pdflush()) /* This is unlikely, but why not... */
+ if (current->flags & PF_SWAPWRITE)
return 1;
if (!bdi_write_congested(bdi))
return 1;
@@ -376,6 +376,43 @@ static pageout_t pageout(struct page *page, struct address_space *mapping)
return PAGE_CLEAN;
}
+static int remove_mapping(struct address_space *mapping, struct page *page)
+{
+ if (!mapping)
+ return 0; /* truncate got there first */
+
+ write_lock_irq(&mapping->tree_lock);
+
+ /*
+ * The non-racy check for busy page. It is critical to check
+ * PageDirty _after_ making sure that the page is freeable and
+ * not in use by anybody. (pagecache + us == 2)
+ */
+ if (unlikely(page_count(page) != 2))
+ goto cannot_free;
+ smp_rmb();
+ if (unlikely(PageDirty(page)))
+ goto cannot_free;
+
+ if (PageSwapCache(page)) {
+ swp_entry_t swap = { .val = page_private(page) };
+ __delete_from_swap_cache(page);
+ write_unlock_irq(&mapping->tree_lock);
+ swap_free(swap);
+ __put_page(page); /* The pagecache ref */
+ return 1;
+ }
+
+ __remove_from_page_cache(page);
+ write_unlock_irq(&mapping->tree_lock);
+ __put_page(page);
+ return 1;
+
+cannot_free:
+ write_unlock_irq(&mapping->tree_lock);
+ return 0;
+}
+
/*
* shrink_list adds the number of reclaimed pages to sc->nr_reclaimed
*/
@@ -424,7 +461,9 @@ static int shrink_list(struct list_head *page_list, struct scan_control *sc)
* Try to allocate it some swap space here.
*/
if (PageAnon(page) && !PageSwapCache(page)) {
- if (!add_to_swap(page))
+ if (!sc->may_swap)
+ goto keep_locked;
+ if (!add_to_swap(page, GFP_ATOMIC))
goto activate_locked;
}
#endif /* CONFIG_SWAP */
@@ -507,36 +546,8 @@ static int shrink_list(struct list_head *page_list, struct scan_control *sc)
goto free_it;
}
- if (!mapping)
- goto keep_locked; /* truncate got there first */
-
- write_lock_irq(&mapping->tree_lock);
-
- /*
- * The non-racy check for busy page. It is critical to check
- * PageDirty _after_ making sure that the page is freeable and
- * not in use by anybody. (pagecache + us == 2)
- */
- if (unlikely(page_count(page) != 2))
- goto cannot_free;
- smp_rmb();
- if (unlikely(PageDirty(page)))
- goto cannot_free;
-
-#ifdef CONFIG_SWAP
- if (PageSwapCache(page)) {
- swp_entry_t swap = { .val = page_private(page) };
- __delete_from_swap_cache(page);
- write_unlock_irq(&mapping->tree_lock);
- swap_free(swap);
- __put_page(page); /* The pagecache ref */
- goto free_it;
- }
-#endif /* CONFIG_SWAP */
-
- __remove_from_page_cache(page);
- write_unlock_irq(&mapping->tree_lock);
- __put_page(page);
+ if (!remove_mapping(mapping, page))
+ goto keep_locked;
free_it:
unlock_page(page);
@@ -545,10 +556,6 @@ free_it:
__pagevec_release_nonlru(&freed_pvec);
continue;
-cannot_free:
- write_unlock_irq(&mapping->tree_lock);
- goto keep_locked;
-
activate_locked:
SetPageActive(page);
pgactivate++;
@@ -566,6 +573,228 @@ keep:
return reclaimed;
}
+#ifdef CONFIG_MIGRATION
+static inline void move_to_lru(struct page *page)
+{
+ list_del(&page->lru);
+ if (PageActive(page)) {
+ /*
+ * lru_cache_add_active checks that
+ * the PG_active bit is off.
+ */
+ ClearPageActive(page);
+ lru_cache_add_active(page);
+ } else {
+ lru_cache_add(page);
+ }
+ put_page(page);
+}
+
+/*
+ * Add isolated pages on the list back to the LRU.
+ *
+ * returns the number of pages put back.
+ */
+int putback_lru_pages(struct list_head *l)
+{
+ struct page *page;
+ struct page *page2;
+ int count = 0;
+
+ list_for_each_entry_safe(page, page2, l, lru) {
+ move_to_lru(page);
+ count++;
+ }
+ return count;
+}
+
+/*
+ * swapout a single page
+ * page is locked upon entry, unlocked on exit
+ */
+static int swap_page(struct page *page)
+{
+ struct address_space *mapping = page_mapping(page);
+
+ if (page_mapped(page) && mapping)
+ if (try_to_unmap(page) != SWAP_SUCCESS)
+ goto unlock_retry;
+
+ if (PageDirty(page)) {
+ /* Page is dirty, try to write it out here */
+ switch(pageout(page, mapping)) {
+ case PAGE_KEEP:
+ case PAGE_ACTIVATE:
+ goto unlock_retry;
+
+ case PAGE_SUCCESS:
+ goto retry;
+
+ case PAGE_CLEAN:
+ ; /* try to free the page below */
+ }
+ }
+
+ if (PagePrivate(page)) {
+ if (!try_to_release_page(page, GFP_KERNEL) ||
+ (!mapping && page_count(page) == 1))
+ goto unlock_retry;
+ }
+
+ if (remove_mapping(mapping, page)) {
+ /* Success */
+ unlock_page(page);
+ return 0;
+ }
+
+unlock_retry:
+ unlock_page(page);
+
+retry:
+ return -EAGAIN;
+}
+/*
+ * migrate_pages
+ *
+ * Two lists are passed to this function. The first list
+ * contains the pages isolated from the LRU to be migrated.
+ * The second list contains new pages that the pages isolated
+ * can be moved to. If the second list is NULL then all
+ * pages are swapped out.
+ *
+ * The function returns after 10 attempts or if no pages
+ * are movable anymore because t has become empty
+ * or no retryable pages exist anymore.
+ *
+ * SIMPLIFIED VERSION: This implementation of migrate_pages
+ * is only swapping out pages and never touches the second
+ * list. The direct migration patchset
+ * extends this function to avoid the use of swap.
+ *
+ * Return: Number of pages not migrated when "to" ran empty.
+ */
+int migrate_pages(struct list_head *from, struct list_head *to,
+ struct list_head *moved, struct list_head *failed)
+{
+ int retry;
+ int nr_failed = 0;
+ int pass = 0;
+ struct page *page;
+ struct page *page2;
+ int swapwrite = current->flags & PF_SWAPWRITE;
+ int rc;
+
+ if (!swapwrite)
+ current->flags |= PF_SWAPWRITE;
+
+redo:
+ retry = 0;
+
+ list_for_each_entry_safe(page, page2, from, lru) {
+ cond_resched();
+
+ rc = 0;
+ if (page_count(page) == 1)
+ /* page was freed from under us. So we are done. */
+ goto next;
+
+ /*
+ * Skip locked pages during the first two passes to give the
+ * functions holding the lock time to release the page. Later we
+ * use lock_page() to have a higher chance of acquiring the
+ * lock.
+ */
+ rc = -EAGAIN;
+ if (pass > 2)
+ lock_page(page);
+ else
+ if (TestSetPageLocked(page))
+ goto next;
+
+ /*
+ * Only wait on writeback if we have already done a pass where
+ * we we may have triggered writeouts for lots of pages.
+ */
+ if (pass > 0) {
+ wait_on_page_writeback(page);
+ } else {
+ if (PageWriteback(page))
+ goto unlock_page;
+ }
+
+ /*
+ * Anonymous pages must have swap cache references otherwise
+ * the information contained in the page maps cannot be
+ * preserved.
+ */
+ if (PageAnon(page) && !PageSwapCache(page)) {
+ if (!add_to_swap(page, GFP_KERNEL)) {
+ rc = -ENOMEM;
+ goto unlock_page;
+ }
+ }
+
+ /*
+ * Page is properly locked and writeback is complete.
+ * Try to migrate the page.
+ */
+ rc = swap_page(page);
+ goto next;
+
+unlock_page:
+ unlock_page(page);
+
+next:
+ if (rc == -EAGAIN) {
+ retry++;
+ } else if (rc) {
+ /* Permanent failure */
+ list_move(&page->lru, failed);
+ nr_failed++;
+ } else {
+ /* Success */
+ list_move(&page->lru, moved);
+ }
+ }
+ if (retry && pass++ < 10)
+ goto redo;
+
+ if (!swapwrite)
+ current->flags &= ~PF_SWAPWRITE;
+
+ return nr_failed + retry;
+}
+
+/*
+ * Isolate one page from the LRU lists and put it on the
+ * indicated list with elevated refcount.
+ *
+ * Result:
+ * 0 = page not on LRU list
+ * 1 = page removed from LRU list and added to the specified list.
+ */
+int isolate_lru_page(struct page *page)
+{
+ int ret = 0;
+
+ if (PageLRU(page)) {
+ struct zone *zone = page_zone(page);
+ spin_lock_irq(&zone->lru_lock);
+ if (TestClearPageLRU(page)) {
+ ret = 1;
+ get_page(page);
+ if (PageActive(page))
+ del_page_from_active_list(zone, page);
+ else
+ del_page_from_inactive_list(zone, page);
+ }
+ spin_unlock_irq(&zone->lru_lock);
+ }
+
+ return ret;
+}
+#endif
+
/*
* zone->lru_lock is heavily contended. Some of the functions that
* shrink the lists perform better by taking out a batch of pages
@@ -942,6 +1171,7 @@ int try_to_free_pages(struct zone **zones, gfp_t gfp_mask)
sc.gfp_mask = gfp_mask;
sc.may_writepage = 0;
+ sc.may_swap = 1;
inc_page_state(allocstall);
@@ -1044,6 +1274,7 @@ loop_again:
total_reclaimed = 0;
sc.gfp_mask = GFP_KERNEL;
sc.may_writepage = 0;
+ sc.may_swap = 1;
sc.nr_mapped = read_page_state(nr_mapped);
inc_page_state(pageoutrun);
@@ -1226,7 +1457,7 @@ static int kswapd(void *p)
* us from recursively trying to free more memory as we're
* trying to free the first piece of memory in the first place).
*/
- tsk->flags |= PF_MEMALLOC|PF_KSWAPD;
+ tsk->flags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD;
order = 0;
for ( ; ; ) {
@@ -1341,3 +1572,71 @@ static int __init kswapd_init(void)
}
module_init(kswapd_init)
+
+#ifdef CONFIG_NUMA
+/*
+ * Zone reclaim mode
+ *
+ * If non-zero call zone_reclaim when the number of free pages falls below
+ * the watermarks.
+ *
+ * In the future we may add flags to the mode. However, the page allocator
+ * should only have to check that zone_reclaim_mode != 0 before calling
+ * zone_reclaim().
+ */
+int zone_reclaim_mode __read_mostly;
+
+/*
+ * Mininum time between zone reclaim scans
+ */
+#define ZONE_RECLAIM_INTERVAL HZ/2
+/*
+ * Try to free up some pages from this zone through reclaim.
+ */
+int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
+{
+ int nr_pages = 1 << order;
+ struct task_struct *p = current;
+ struct reclaim_state reclaim_state;
+ struct scan_control sc = {
+ .gfp_mask = gfp_mask,
+ .may_writepage = 0,
+ .may_swap = 0,
+ .nr_mapped = read_page_state(nr_mapped),
+ .nr_scanned = 0,
+ .nr_reclaimed = 0,
+ .priority = 0
+ };
+
+ if (!(gfp_mask & __GFP_WAIT) ||
+ zone->zone_pgdat->node_id != numa_node_id() ||
+ zone->all_unreclaimable ||
+ atomic_read(&zone->reclaim_in_progress) > 0)
+ return 0;
+
+ if (time_before(jiffies,
+ zone->last_unsuccessful_zone_reclaim + ZONE_RECLAIM_INTERVAL))
+ return 0;
+
+ disable_swap_token();
+
+ if (nr_pages > SWAP_CLUSTER_MAX)
+ sc.swap_cluster_max = nr_pages;
+ else
+ sc.swap_cluster_max = SWAP_CLUSTER_MAX;
+
+ cond_resched();
+ p->flags |= PF_MEMALLOC;
+ reclaim_state.reclaimed_slab = 0;
+ p->reclaim_state = &reclaim_state;
+ shrink_zone(zone, &sc);
+ p->reclaim_state = NULL;
+ current->flags &= ~PF_MEMALLOC;
+
+ if (sc.nr_reclaimed == 0)
+ zone->last_unsuccessful_zone_reclaim = jiffies;
+
+ return sc.nr_reclaimed > nr_pages;
+}
+#endif
+