From 6837765963f1723e80ca97b1fae660f3a60d77df Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Tue, 16 Jun 2009 15:32:51 -0700 Subject: mm: remove CONFIG_UNEVICTABLE_LRU config option Currently, nobody wants to turn UNEVICTABLE_LRU off. Thus this configurability is unnecessary. Signed-off-by: KOSAKI Motohiro Cc: Johannes Weiner Cc: Andi Kleen Acked-by: Minchan Kim Cc: David Woodhouse Cc: Matt Mackall Cc: Rik van Riel Cc: Lee Schermerhorn Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 19 ------------------- 1 file changed, 19 deletions(-) (limited to 'include/linux/swap.h') diff --git a/include/linux/swap.h b/include/linux/swap.h index d476aad3ff5..f30c06908f0 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -235,7 +235,6 @@ static inline int zone_reclaim(struct zone *z, gfp_t mask, unsigned int order) } #endif -#ifdef CONFIG_UNEVICTABLE_LRU extern int page_evictable(struct page *page, struct vm_area_struct *vma); extern void scan_mapping_unevictable_pages(struct address_space *); @@ -244,24 +243,6 @@ extern int scan_unevictable_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *); extern int scan_unevictable_register_node(struct node *node); extern void scan_unevictable_unregister_node(struct node *node); -#else -static inline int page_evictable(struct page *page, - struct vm_area_struct *vma) -{ - return 1; -} - -static inline void scan_mapping_unevictable_pages(struct address_space *mapping) -{ -} - -static inline int scan_unevictable_register_node(struct node *node) -{ - return 0; -} - -static inline void scan_unevictable_unregister_node(struct node *node) { } -#endif extern int kswapd_run(int nid); -- cgit v1.2.3-70-g09d2 From cb4b86ba47bb0937b71fb825b3ed88adf7a190f0 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Tue, 16 Jun 2009 15:32:52 -0700 Subject: mm: add swap cache interface for swap reference In a following patch, the usage of swap cache is recorded into swap_map. This patch is for necessary interface changes to do that. 2 interfaces: - swapcache_prepare() - swapcache_free() are added for allocating/freeing refcnt from swap-cache to existing swap entries. But implementation itself is not changed under this patch. At adding swapcache_free(), memcg's hook code is moved under swapcache_free(). This is better than using scattered hooks. Signed-off-by: KAMEZAWA Hiroyuki Reviewed-by: Daisuke Nishimura Acked-by: Balbir Singh Cc: Hugh Dickins Cc: Johannes Weiner Cc: Li Zefan Cc: Dhaval Giani Cc: YAMAMOTO Takashi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 7 +++++++ mm/shmem.c | 2 +- mm/swap_state.c | 11 +++++------ mm/swapfile.c | 19 +++++++++++++++++++ mm/vmscan.c | 3 +-- 5 files changed, 33 insertions(+), 9 deletions(-) (limited to 'include/linux/swap.h') diff --git a/include/linux/swap.h b/include/linux/swap.h index f30c06908f0..259e96c150e 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -282,8 +282,10 @@ extern void si_swapinfo(struct sysinfo *); extern swp_entry_t get_swap_page(void); extern swp_entry_t get_swap_page_of_type(int); extern int swap_duplicate(swp_entry_t); +extern int swapcache_prepare(swp_entry_t); extern int valid_swaphandles(swp_entry_t, unsigned long *); extern void swap_free(swp_entry_t); +extern void swapcache_free(swp_entry_t, struct page *page); extern int free_swap_and_cache(swp_entry_t); extern int swap_type_of(dev_t, sector_t, struct block_device **); extern unsigned int count_swap_pages(int, int); @@ -352,11 +354,16 @@ static inline void show_swap_cache_info(void) #define free_swap_and_cache(swp) is_migration_entry(swp) #define swap_duplicate(swp) is_migration_entry(swp) +#define swapcache_prepare(swp) is_migration_entry(swp) static inline void swap_free(swp_entry_t swp) { } +static inline void swapcache_free(swp_entry_t swp, struct page *page) +{ +} + static inline struct page *swapin_readahead(swp_entry_t swp, gfp_t gfp_mask, struct vm_area_struct *vma, unsigned long addr) { diff --git a/mm/shmem.c b/mm/shmem.c index 0132fbd45a2..47ab1918228 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1097,7 +1097,7 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc) shmem_swp_unmap(entry); unlock: spin_unlock(&info->lock); - swap_free(swap); + swapcache_free(swap, NULL); redirty: set_page_dirty(page); if (wbc->for_reclaim) diff --git a/mm/swap_state.c b/mm/swap_state.c index 1416e7e9e02..19bdf3017a9 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -162,11 +162,11 @@ int add_to_swap(struct page *page) return 1; case -EEXIST: /* Raced with "speculative" read_swap_cache_async */ - swap_free(entry); + swapcache_free(entry, NULL); continue; default: /* -ENOMEM radix-tree allocation failure */ - swap_free(entry); + swapcache_free(entry, NULL); return 0; } } @@ -188,8 +188,7 @@ void delete_from_swap_cache(struct page *page) __delete_from_swap_cache(page); spin_unlock_irq(&swapper_space.tree_lock); - mem_cgroup_uncharge_swapcache(page, entry); - swap_free(entry); + swapcache_free(entry, page); page_cache_release(page); } @@ -293,7 +292,7 @@ struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask, /* * Swap entry may have been freed since our caller observed it. */ - if (!swap_duplicate(entry)) + if (!swapcache_prepare(entry)) break; /* @@ -317,7 +316,7 @@ struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask, } ClearPageSwapBacked(new_page); __clear_page_locked(new_page); - swap_free(entry); + swapcache_free(entry, NULL); } while (err != -ENOMEM); if (new_page) diff --git a/mm/swapfile.c b/mm/swapfile.c index 312fafe0ab6..3187079903f 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -509,6 +509,16 @@ void swap_free(swp_entry_t entry) } } +/* + * Called after dropping swapcache to decrease refcnt to swap entries. + */ +void swapcache_free(swp_entry_t entry, struct page *page) +{ + if (page) + mem_cgroup_uncharge_swapcache(page, entry); + return swap_free(entry); +} + /* * How many references to page are currently swapped out? */ @@ -1979,6 +1989,15 @@ bad_file: goto out; } +/* + * Called when allocating swap cache for exising swap entry, + */ +int swapcache_prepare(swp_entry_t entry) +{ + return swap_duplicate(entry); +} + + struct swap_info_struct * get_swap_info_struct(unsigned type) { diff --git a/mm/vmscan.c b/mm/vmscan.c index 2c4b945b011..52339dd7bf8 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -470,8 +470,7 @@ static int __remove_mapping(struct address_space *mapping, struct page *page) swp_entry_t swap = { .val = page_private(page) }; __delete_from_swap_cache(page); spin_unlock_irq(&mapping->tree_lock); - mem_cgroup_uncharge_swapcache(page, swap); - swap_free(swap); + swapcache_free(swap, page); } else { __remove_from_page_cache(page); spin_unlock_irq(&mapping->tree_lock); -- cgit v1.2.3-70-g09d2 From 355cfa73ddff2fb8fa14e93bd94a057cc022512e Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Tue, 16 Jun 2009 15:32:53 -0700 Subject: mm: modify swap_map and add SWAP_HAS_CACHE flag This is a part of the patches for fixing memcg's swap accountinf leak. But, IMHO, not a bad patch even if no memcg. There are 2 kinds of references to swap. - reference from swap entry - reference from swap cache Then, - If there is swap cache && swap's refcnt is 1, there is only swap cache. (*) swapcount(entry) == 1 && find_get_page(swapper_space, entry) != NULL This counting logic have worked well for a long time. But considering that we cannot know there is a _real_ reference or not by swap_map[], current usage of counter is not very good. This patch adds a flag SWAP_HAS_CACHE and recored information that a swap entry has a cache or not. This will remove -1 magic used in swapfile.c and be a help to avoid unnecessary find_get_page(). Signed-off-by: KAMEZAWA Hiroyuki Tested-by: Daisuke Nishimura Cc: Balbir Singh Cc: Hugh Dickins Cc: Johannes Weiner Cc: Li Zefan Cc: Dhaval Giani Cc: YAMAMOTO Takashi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 14 ++-- mm/swap_state.c | 5 +- mm/swapfile.c | 214 ++++++++++++++++++++++++++++++++++++++------------- 3 files changed, 172 insertions(+), 61 deletions(-) (limited to 'include/linux/swap.h') diff --git a/include/linux/swap.h b/include/linux/swap.h index 259e96c150e..fed5e8e1104 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -129,9 +129,10 @@ enum { #define SWAP_CLUSTER_MAX 32 -#define SWAP_MAP_MAX 0x7fff -#define SWAP_MAP_BAD 0x8000 - +#define SWAP_MAP_MAX 0x7ffe +#define SWAP_MAP_BAD 0x7fff +#define SWAP_HAS_CACHE 0x8000 /* There is a swap cache of entry. */ +#define SWAP_COUNT_MASK (~SWAP_HAS_CACHE) /* * The in-memory structure used to track swap areas. */ @@ -281,7 +282,7 @@ extern long total_swap_pages; extern void si_swapinfo(struct sysinfo *); extern swp_entry_t get_swap_page(void); extern swp_entry_t get_swap_page_of_type(int); -extern int swap_duplicate(swp_entry_t); +extern void swap_duplicate(swp_entry_t); extern int swapcache_prepare(swp_entry_t); extern int valid_swaphandles(swp_entry_t, unsigned long *); extern void swap_free(swp_entry_t); @@ -353,9 +354,12 @@ static inline void show_swap_cache_info(void) } #define free_swap_and_cache(swp) is_migration_entry(swp) -#define swap_duplicate(swp) is_migration_entry(swp) #define swapcache_prepare(swp) is_migration_entry(swp) +static inline void swap_duplicate(swp_entry_t swp) +{ +} + static inline void swap_free(swp_entry_t swp) { } diff --git a/mm/swap_state.c b/mm/swap_state.c index 19bdf3017a9..b9ca029673a 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -292,7 +292,10 @@ struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask, /* * Swap entry may have been freed since our caller observed it. */ - if (!swapcache_prepare(entry)) + err = swapcache_prepare(entry); + if (err == -EEXIST) /* seems racy */ + continue; + if (err) /* swp entry is obsolete ? */ break; /* diff --git a/mm/swapfile.c b/mm/swapfile.c index 3187079903f..0d7296971ad 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -53,6 +53,33 @@ static struct swap_info_struct swap_info[MAX_SWAPFILES]; static DEFINE_MUTEX(swapon_mutex); +/* For reference count accounting in swap_map */ +/* enum for swap_map[] handling. internal use only */ +enum { + SWAP_MAP = 0, /* ops for reference from swap users */ + SWAP_CACHE, /* ops for reference from swap cache */ +}; + +static inline int swap_count(unsigned short ent) +{ + return ent & SWAP_COUNT_MASK; +} + +static inline bool swap_has_cache(unsigned short ent) +{ + return !!(ent & SWAP_HAS_CACHE); +} + +static inline unsigned short encode_swapmap(int count, bool has_cache) +{ + unsigned short ret = count; + + if (has_cache) + return SWAP_HAS_CACHE | ret; + return ret; +} + + /* * We need this because the bdev->unplug_fn can sleep and we cannot * hold swap_lock while calling the unplug_fn. And swap_lock @@ -167,7 +194,8 @@ static int wait_for_discard(void *word) #define SWAPFILE_CLUSTER 256 #define LATENCY_LIMIT 256 -static inline unsigned long scan_swap_map(struct swap_info_struct *si) +static inline unsigned long scan_swap_map(struct swap_info_struct *si, + int cache) { unsigned long offset; unsigned long scan_base; @@ -285,7 +313,10 @@ checks: si->lowest_bit = si->max; si->highest_bit = 0; } - si->swap_map[offset] = 1; + if (cache == SWAP_CACHE) /* at usual swap-out via vmscan.c */ + si->swap_map[offset] = encode_swapmap(0, true); + else /* at suspend */ + si->swap_map[offset] = encode_swapmap(1, false); si->cluster_next = offset + 1; si->flags -= SWP_SCANNING; @@ -401,7 +432,8 @@ swp_entry_t get_swap_page(void) continue; swap_list.next = next; - offset = scan_swap_map(si); + /* This is called for allocating swap entry for cache */ + offset = scan_swap_map(si, SWAP_CACHE); if (offset) { spin_unlock(&swap_lock); return swp_entry(type, offset); @@ -415,6 +447,7 @@ noswap: return (swp_entry_t) {0}; } +/* The only caller of this function is now susupend routine */ swp_entry_t get_swap_page_of_type(int type) { struct swap_info_struct *si; @@ -424,7 +457,8 @@ swp_entry_t get_swap_page_of_type(int type) si = swap_info + type; if (si->flags & SWP_WRITEOK) { nr_swap_pages--; - offset = scan_swap_map(si); + /* This is called for allocating swap entry, not cache */ + offset = scan_swap_map(si, SWAP_MAP); if (offset) { spin_unlock(&swap_lock); return swp_entry(type, offset); @@ -471,25 +505,38 @@ out: return NULL; } -static int swap_entry_free(struct swap_info_struct *p, swp_entry_t ent) +static int swap_entry_free(struct swap_info_struct *p, + swp_entry_t ent, int cache) { unsigned long offset = swp_offset(ent); - int count = p->swap_map[offset]; - - if (count < SWAP_MAP_MAX) { - count--; - p->swap_map[offset] = count; - if (!count) { - if (offset < p->lowest_bit) - p->lowest_bit = offset; - if (offset > p->highest_bit) - p->highest_bit = offset; - if (p->prio > swap_info[swap_list.next].prio) - swap_list.next = p - swap_info; - nr_swap_pages++; - p->inuse_pages--; - mem_cgroup_uncharge_swap(ent); + int count = swap_count(p->swap_map[offset]); + bool has_cache; + + has_cache = swap_has_cache(p->swap_map[offset]); + + if (cache == SWAP_MAP) { /* dropping usage count of swap */ + if (count < SWAP_MAP_MAX) { + count--; + p->swap_map[offset] = encode_swapmap(count, has_cache); } + } else { /* dropping swap cache flag */ + VM_BUG_ON(!has_cache); + p->swap_map[offset] = encode_swapmap(count, false); + + } + /* return code. */ + count = p->swap_map[offset]; + /* free if no reference */ + if (!count) { + if (offset < p->lowest_bit) + p->lowest_bit = offset; + if (offset > p->highest_bit) + p->highest_bit = offset; + if (p->prio > swap_info[swap_list.next].prio) + swap_list.next = p - swap_info; + nr_swap_pages++; + p->inuse_pages--; + mem_cgroup_uncharge_swap(ent); } return count; } @@ -504,7 +551,7 @@ void swap_free(swp_entry_t entry) p = swap_info_get(entry); if (p) { - swap_entry_free(p, entry); + swap_entry_free(p, entry, SWAP_MAP); spin_unlock(&swap_lock); } } @@ -514,9 +561,16 @@ void swap_free(swp_entry_t entry) */ void swapcache_free(swp_entry_t entry, struct page *page) { + struct swap_info_struct *p; + if (page) mem_cgroup_uncharge_swapcache(page, entry); - return swap_free(entry); + p = swap_info_get(entry); + if (p) { + swap_entry_free(p, entry, SWAP_CACHE); + spin_unlock(&swap_lock); + } + return; } /* @@ -531,8 +585,7 @@ static inline int page_swapcount(struct page *page) entry.val = page_private(page); p = swap_info_get(entry); if (p) { - /* Subtract the 1 for the swap cache itself */ - count = p->swap_map[swp_offset(entry)] - 1; + count = swap_count(p->swap_map[swp_offset(entry)]); spin_unlock(&swap_lock); } return count; @@ -594,7 +647,7 @@ int free_swap_and_cache(swp_entry_t entry) p = swap_info_get(entry); if (p) { - if (swap_entry_free(p, entry) == 1) { + if (swap_entry_free(p, entry, SWAP_MAP) == SWAP_HAS_CACHE) { page = find_get_page(&swapper_space, entry.val); if (page && !trylock_page(page)) { page_cache_release(page); @@ -901,7 +954,7 @@ static unsigned int find_next_to_unuse(struct swap_info_struct *si, i = 1; } count = si->swap_map[i]; - if (count && count != SWAP_MAP_BAD) + if (count && swap_count(count) != SWAP_MAP_BAD) break; } return i; @@ -1005,13 +1058,13 @@ static int try_to_unuse(unsigned int type) */ shmem = 0; swcount = *swap_map; - if (swcount > 1) { + if (swap_count(swcount)) { if (start_mm == &init_mm) shmem = shmem_unuse(entry, page); else retval = unuse_mm(start_mm, entry, page); } - if (*swap_map > 1) { + if (swap_count(*swap_map)) { int set_start_mm = (*swap_map >= swcount); struct list_head *p = &start_mm->mmlist; struct mm_struct *new_start_mm = start_mm; @@ -1021,7 +1074,7 @@ static int try_to_unuse(unsigned int type) atomic_inc(&new_start_mm->mm_users); atomic_inc(&prev_mm->mm_users); spin_lock(&mmlist_lock); - while (*swap_map > 1 && !retval && !shmem && + while (swap_count(*swap_map) && !retval && !shmem && (p = p->next) != &start_mm->mmlist) { mm = list_entry(p, struct mm_struct, mmlist); if (!atomic_inc_not_zero(&mm->mm_users)) @@ -1033,14 +1086,16 @@ static int try_to_unuse(unsigned int type) cond_resched(); swcount = *swap_map; - if (swcount <= 1) + if (!swap_count(swcount)) /* any usage ? */ ; else if (mm == &init_mm) { set_start_mm = 1; shmem = shmem_unuse(entry, page); } else retval = unuse_mm(mm, entry, page); - if (set_start_mm && *swap_map < swcount) { + + if (set_start_mm && + swap_count(*swap_map) < swcount) { mmput(new_start_mm); atomic_inc(&mm->mm_users); new_start_mm = mm; @@ -1067,21 +1122,25 @@ static int try_to_unuse(unsigned int type) } /* - * How could swap count reach 0x7fff when the maximum - * pid is 0x7fff, and there's no way to repeat a swap - * page within an mm (except in shmem, where it's the - * shared object which takes the reference count)? - * We believe SWAP_MAP_MAX cannot occur in Linux 2.4. - * + * How could swap count reach 0x7ffe ? + * There's no way to repeat a swap page within an mm + * (except in shmem, where it's the shared object which takes + * the reference count)? + * We believe SWAP_MAP_MAX cannot occur.(if occur, unsigned + * short is too small....) * If that's wrong, then we should worry more about * exit_mmap() and do_munmap() cases described above: * we might be resetting SWAP_MAP_MAX too early here. * We know "Undead"s can happen, they're okay, so don't * report them; but do report if we reset SWAP_MAP_MAX. */ - if (*swap_map == SWAP_MAP_MAX) { + /* We might release the lock_page() in unuse_mm(). */ + if (!PageSwapCache(page) || page_private(page) != entry.val) + goto retry; + + if (swap_count(*swap_map) == SWAP_MAP_MAX) { spin_lock(&swap_lock); - *swap_map = 1; + *swap_map = encode_swapmap(0, true); spin_unlock(&swap_lock); reset_overflow = 1; } @@ -1099,7 +1158,8 @@ static int try_to_unuse(unsigned int type) * pages would be incorrect if swap supported "shared * private" pages, but they are handled by tmpfs files. */ - if ((*swap_map > 1) && PageDirty(page) && PageSwapCache(page)) { + if (swap_count(*swap_map) && + PageDirty(page) && PageSwapCache(page)) { struct writeback_control wbc = { .sync_mode = WB_SYNC_NONE, }; @@ -1126,6 +1186,7 @@ static int try_to_unuse(unsigned int type) * mark page dirty so shrink_page_list will preserve it. */ SetPageDirty(page); +retry: unlock_page(page); page_cache_release(page); @@ -1952,15 +2013,23 @@ void si_swapinfo(struct sysinfo *val) * * Note: if swap_map[] reaches SWAP_MAP_MAX the entries are treated as * "permanent", but will be reclaimed by the next swapoff. + * Returns error code in following case. + * - success -> 0 + * - swp_entry is invalid -> EINVAL + * - swp_entry is migration entry -> EINVAL + * - swap-cache reference is requested but there is already one. -> EEXIST + * - swap-cache reference is requested but the entry is not used. -> ENOENT */ -int swap_duplicate(swp_entry_t entry) +static int __swap_duplicate(swp_entry_t entry, bool cache) { struct swap_info_struct * p; unsigned long offset, type; - int result = 0; + int result = -EINVAL; + int count; + bool has_cache; if (is_migration_entry(entry)) - return 1; + return -EINVAL; type = swp_type(entry); if (type >= nr_swapfiles) @@ -1969,17 +2038,40 @@ int swap_duplicate(swp_entry_t entry) offset = swp_offset(entry); spin_lock(&swap_lock); - if (offset < p->max && p->swap_map[offset]) { - if (p->swap_map[offset] < SWAP_MAP_MAX - 1) { - p->swap_map[offset]++; - result = 1; - } else if (p->swap_map[offset] <= SWAP_MAP_MAX) { + + if (unlikely(offset >= p->max)) + goto unlock_out; + + count = swap_count(p->swap_map[offset]); + has_cache = swap_has_cache(p->swap_map[offset]); + + if (cache == SWAP_CACHE) { /* called for swapcache/swapin-readahead */ + + /* set SWAP_HAS_CACHE if there is no cache and entry is used */ + if (!has_cache && count) { + p->swap_map[offset] = encode_swapmap(count, true); + result = 0; + } else if (has_cache) /* someone added cache */ + result = -EEXIST; + else if (!count) /* no users */ + result = -ENOENT; + + } else if (count || has_cache) { + if (count < SWAP_MAP_MAX - 1) { + p->swap_map[offset] = encode_swapmap(count + 1, + has_cache); + result = 0; + } else if (count <= SWAP_MAP_MAX) { if (swap_overflow++ < 5) - printk(KERN_WARNING "swap_dup: swap entry overflow\n"); - p->swap_map[offset] = SWAP_MAP_MAX; - result = 1; + printk(KERN_WARNING + "swap_dup: swap entry overflow\n"); + p->swap_map[offset] = encode_swapmap(SWAP_MAP_MAX, + has_cache); + result = 0; } - } + } else + result = -ENOENT; /* unused swap entry */ +unlock_out: spin_unlock(&swap_lock); out: return result; @@ -1988,13 +2080,25 @@ bad_file: printk(KERN_ERR "swap_dup: %s%08lx\n", Bad_file, entry.val); goto out; } +/* + * increase reference count of swap entry by 1. + */ +void swap_duplicate(swp_entry_t entry) +{ + __swap_duplicate(entry, SWAP_MAP); +} /* + * @entry: swap entry for which we allocate swap cache. + * * Called when allocating swap cache for exising swap entry, + * This can return error codes. Returns 0 at success. + * -EBUSY means there is a swap cache. + * Note: return code is different from swap_duplicate(). */ int swapcache_prepare(swp_entry_t entry) { - return swap_duplicate(entry); + return __swap_duplicate(entry, SWAP_CACHE); } @@ -2035,7 +2139,7 @@ int valid_swaphandles(swp_entry_t entry, unsigned long *offset) /* Don't read in free or bad pages */ if (!si->swap_map[toff]) break; - if (si->swap_map[toff] == SWAP_MAP_BAD) + if (swap_count(si->swap_map[toff]) == SWAP_MAP_BAD) break; } /* Count contiguous allocated slots below our target */ @@ -2043,7 +2147,7 @@ int valid_swaphandles(swp_entry_t entry, unsigned long *offset) /* Don't read in free or bad pages */ if (!si->swap_map[toff]) break; - if (si->swap_map[toff] == SWAP_MAP_BAD) + if (swap_count(si->swap_map[toff]) == SWAP_MAP_BAD) break; } spin_unlock(&swap_lock); -- cgit v1.2.3-70-g09d2 From aca8bf323edd31ad462dc98c107c23a5c6022ca2 Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Tue, 16 Jun 2009 15:33:02 -0700 Subject: mm: remove file argument from swap_readpage() The file argument resulted from address_space's readpage long time ago. We don't use it any more. Let's remove unnecessary argement. Signed-off-by: Minchan Kim Acked-by: Hugh Dickins Reviewed-by: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 2 +- mm/page_io.c | 2 +- mm/swap_state.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux/swap.h') diff --git a/include/linux/swap.h b/include/linux/swap.h index fed5e8e1104..0cedf31af0b 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -256,7 +256,7 @@ extern void swap_unplug_io_fn(struct backing_dev_info *, struct page *); #ifdef CONFIG_SWAP /* linux/mm/page_io.c */ -extern int swap_readpage(struct file *, struct page *); +extern int swap_readpage(struct page *); extern int swap_writepage(struct page *page, struct writeback_control *wbc); extern void end_swap_bio_read(struct bio *bio, int err); diff --git a/mm/page_io.c b/mm/page_io.c index 3023c475e04..c6f3e5071de 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -120,7 +120,7 @@ out: return ret; } -int swap_readpage(struct file *file, struct page *page) +int swap_readpage(struct page *page) { struct bio *bio; int ret = 0; diff --git a/mm/swap_state.c b/mm/swap_state.c index b62e7f56051..42cd38eba79 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -313,7 +313,7 @@ struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask, * Initiate read into locked page and return. */ lru_cache_add_anon(new_page); - swap_readpage(NULL, new_page); + swap_readpage(new_page); return new_page; } ClearPageSwapBacked(new_page); -- cgit v1.2.3-70-g09d2 From 302362c5abdda80b5c2e4e57be610c2e3c2ab3c5 Mon Sep 17 00:00:00 2001 From: Daisuke Nishimura Date: Wed, 17 Jun 2009 16:27:15 -0700 Subject: memcg: remove mem_cgroup_cache_charge_swapin() mem_cgroup_cache_charge_swapin() isn't used any more, so remove no-op definition of it in header file. Signed-off-by: Daisuke Nishimura Cc: Balbir Singh Acked-by: KAMEZAWA Hiroyuki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux/swap.h') diff --git a/include/linux/swap.h b/include/linux/swap.h index 0cedf31af0b..dca9b9999ae 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -423,12 +423,6 @@ static inline swp_entry_t get_swap_page(void) #define has_swap_token(x) 0 #define disable_swap_token() do { } while(0) -static inline int mem_cgroup_cache_charge_swapin(struct page *page, - struct mm_struct *mm, gfp_t mask, bool locked) -{ - return 0; -} - static inline void mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent) { -- cgit v1.2.3-70-g09d2 From 8a9478ca7f4bcb8945cec7f95d52dae2d5e50cbd Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Wed, 17 Jun 2009 16:27:17 -0700 Subject: memcg: fix swap accounting This patch fixes mis-accounting of swap usage in memcg. In the current implementation, memcg's swap account is uncharged only when swap is completely freed. But there are several cases where swap cannot be freed cleanly. For handling that, this patch changes that memcg uncharges swap account when swap has no references other than cache. By this, memcg's swap entry accounting can be fully synchronous with the application's behavior. This patch also changes memcg's hooks for swap-out. Signed-off-by: KAMEZAWA Hiroyuki Cc: Daisuke Nishimura Acked-by: Balbir Singh Cc: Hugh Dickins Cc: Johannes Weiner Cc: Li Zefan Cc: Dhaval Giani Cc: YAMAMOTO Takashi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 5 +++-- mm/memcontrol.c | 17 ++++++++++++----- mm/swapfile.c | 16 ++++++++++++---- 3 files changed, 27 insertions(+), 11 deletions(-) (limited to 'include/linux/swap.h') diff --git a/include/linux/swap.h b/include/linux/swap.h index dca9b9999ae..c88b36665f7 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -319,10 +319,11 @@ static inline void disable_swap_token(void) } #ifdef CONFIG_CGROUP_MEM_RES_CTLR -extern void mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent); +extern void +mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent, bool swapout); #else static inline void -mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent) +mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent, bool swapout) { } #endif diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 3468c38adde..a83e0395444 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -189,6 +189,7 @@ enum charge_type { MEM_CGROUP_CHARGE_TYPE_SHMEM, /* used by page migration of shmem */ MEM_CGROUP_CHARGE_TYPE_FORCE, /* used by force_empty */ MEM_CGROUP_CHARGE_TYPE_SWAPOUT, /* for accounting swapcache */ + MEM_CGROUP_CHARGE_TYPE_DROP, /* a page was unused swap cache */ NR_CHARGE_TYPE, }; @@ -1493,6 +1494,7 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype) switch (ctype) { case MEM_CGROUP_CHARGE_TYPE_MAPPED: + case MEM_CGROUP_CHARGE_TYPE_DROP: if (page_mapped(page)) goto unlock_out; break; @@ -1556,18 +1558,23 @@ void mem_cgroup_uncharge_cache_page(struct page *page) * called after __delete_from_swap_cache() and drop "page" account. * memcg information is recorded to swap_cgroup of "ent" */ -void mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent) +void +mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent, bool swapout) { struct mem_cgroup *memcg; + int ctype = MEM_CGROUP_CHARGE_TYPE_SWAPOUT; + + if (!swapout) /* this was a swap cache but the swap is unused ! */ + ctype = MEM_CGROUP_CHARGE_TYPE_DROP; + + memcg = __mem_cgroup_uncharge_common(page, ctype); - memcg = __mem_cgroup_uncharge_common(page, - MEM_CGROUP_CHARGE_TYPE_SWAPOUT); /* record memcg information */ - if (do_swap_account && memcg) { + if (do_swap_account && swapout && memcg) { swap_cgroup_record(ent, css_id(&memcg->css)); mem_cgroup_get(memcg); } - if (memcg) + if (swapout && memcg) css_put(&memcg->css); } #endif diff --git a/mm/swapfile.c b/mm/swapfile.c index 28faa01cf57..d1ade1a48ee 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -583,8 +583,9 @@ static int swap_entry_free(struct swap_info_struct *p, swap_list.next = p - swap_info; nr_swap_pages++; p->inuse_pages--; - mem_cgroup_uncharge_swap(ent); } + if (!swap_count(count)) + mem_cgroup_uncharge_swap(ent); return count; } @@ -609,12 +610,19 @@ void swap_free(swp_entry_t entry) void swapcache_free(swp_entry_t entry, struct page *page) { struct swap_info_struct *p; + int ret; - if (page) - mem_cgroup_uncharge_swapcache(page, entry); p = swap_info_get(entry); if (p) { - swap_entry_free(p, entry, SWAP_CACHE); + ret = swap_entry_free(p, entry, SWAP_CACHE); + if (page) { + bool swapout; + if (ret) + swapout = true; /* the end of swap out */ + else + swapout = false; /* no more swap users! */ + mem_cgroup_uncharge_swapcache(page, entry, swapout); + } spin_unlock(&swap_lock); } return; -- cgit v1.2.3-70-g09d2 From a5c9b696ec109bb54d547fdb437a7a0c2d514670 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Tue, 23 Jun 2009 12:36:58 -0700 Subject: mm: pass mm to grab_swap_token If a kthread happens to use get_user_pages() on an mm (as KSM does), there's a chance that it will end up trying to read in a swap page, then oops in grab_swap_token() because the kthread has no mm: GUP passes down the right mm, so grab_swap_token() ought to be using it. We have not identified a stronger case than KSM's daemon (not yet in mainline), but the issue must have come up before, since RHEL has included a fix for this for years (though a different fix, they just back out of grab_swap_token if current->mm is unset: which is what we first proposed, but using the right mm here seems more correct). Reported-by: Izik Eidus Signed-off-by: Johannes Weiner Signed-off-by: Hugh Dickins Acked-by: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 12 ++++++------ mm/memory.c | 2 +- mm/thrash.c | 32 +++++++++++++++----------------- 3 files changed, 22 insertions(+), 24 deletions(-) (limited to 'include/linux/swap.h') diff --git a/include/linux/swap.h b/include/linux/swap.h index c88b36665f7..7c15334f3ff 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -298,8 +298,8 @@ extern int try_to_free_swap(struct page *); struct backing_dev_info; /* linux/mm/thrash.c */ -extern struct mm_struct * swap_token_mm; -extern void grab_swap_token(void); +extern struct mm_struct *swap_token_mm; +extern void grab_swap_token(struct mm_struct *); extern void __put_swap_token(struct mm_struct *); static inline int has_swap_token(struct mm_struct *mm) @@ -419,10 +419,10 @@ static inline swp_entry_t get_swap_page(void) } /* linux/mm/thrash.c */ -#define put_swap_token(x) do { } while(0) -#define grab_swap_token() do { } while(0) -#define has_swap_token(x) 0 -#define disable_swap_token() do { } while(0) +#define put_swap_token(mm) do { } while (0) +#define grab_swap_token(mm) do { } while (0) +#define has_swap_token(mm) 0 +#define disable_swap_token() do { } while (0) static inline void mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent) diff --git a/mm/memory.c b/mm/memory.c index 50da9511aa7..f46ac18ba23 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2519,7 +2519,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, delayacct_set_flag(DELAYACCT_PF_SWAPIN); page = lookup_swap_cache(entry); if (!page) { - grab_swap_token(); /* Contend for token _before_ read-in */ + grab_swap_token(mm); /* Contend for token _before_ read-in */ page = swapin_readahead(entry, GFP_HIGHUSER_MOVABLE, vma, address); if (!page) { diff --git a/mm/thrash.c b/mm/thrash.c index c4c5205a9c3..2372d4ed5dd 100644 --- a/mm/thrash.c +++ b/mm/thrash.c @@ -26,47 +26,45 @@ static DEFINE_SPINLOCK(swap_token_lock); struct mm_struct *swap_token_mm; static unsigned int global_faults; -void grab_swap_token(void) +void grab_swap_token(struct mm_struct *mm) { int current_interval; global_faults++; - current_interval = global_faults - current->mm->faultstamp; + current_interval = global_faults - mm->faultstamp; if (!spin_trylock(&swap_token_lock)) return; /* First come first served */ if (swap_token_mm == NULL) { - current->mm->token_priority = current->mm->token_priority + 2; - swap_token_mm = current->mm; + mm->token_priority = mm->token_priority + 2; + swap_token_mm = mm; goto out; } - if (current->mm != swap_token_mm) { - if (current_interval < current->mm->last_interval) - current->mm->token_priority++; + if (mm != swap_token_mm) { + if (current_interval < mm->last_interval) + mm->token_priority++; else { - if (likely(current->mm->token_priority > 0)) - current->mm->token_priority--; + if (likely(mm->token_priority > 0)) + mm->token_priority--; } /* Check if we deserve the token */ - if (current->mm->token_priority > - swap_token_mm->token_priority) { - current->mm->token_priority += 2; - swap_token_mm = current->mm; + if (mm->token_priority > swap_token_mm->token_priority) { + mm->token_priority += 2; + swap_token_mm = mm; } } else { /* Token holder came in again! */ - current->mm->token_priority += 2; + mm->token_priority += 2; } out: - current->mm->faultstamp = global_faults; - current->mm->last_interval = current_interval; + mm->faultstamp = global_faults; + mm->last_interval = current_interval; spin_unlock(&swap_token_lock); -return; } /* Called on process exit. */ -- cgit v1.2.3-70-g09d2