From ba9ddf49391645e6bb93219131a40446538a5e76 Mon Sep 17 00:00:00 2001 From: Lee Schermerhorn Date: Sat, 18 Oct 2008 20:26:42 -0700 Subject: Ramfs and Ram Disk pages are unevictable Christoph Lameter pointed out that ram disk pages also clutter the LRU lists. When vmscan finds them dirty and tries to clean them, the ram disk writeback function just redirties the page so that it goes back onto the active list. Round and round she goes... With the ram disk driver [rd.c] replaced by the newer 'brd.c', this is no longer the case, as ram disk pages are no longer maintained on the lru. [This makes them unmigratable for defrag or memory hot remove, but that can be addressed by a separate patch series.] However, the ramfs pages behave like ram disk pages used to, so: Define new address_space flag [shares address_space flags member with mapping's gfp mask] to indicate that the address space contains all unevictable pages. This will provide for efficient testing of ramfs pages in page_evictable(). Also provide wrapper functions to set/test the unevictable state to minimize #ifdefs in ramfs driver and any other users of this facility. Set the unevictable state on address_space structures for new ramfs inodes. Test the unevictable state in page_evictable() to cull unevictable pages. These changes depend on [CONFIG_]UNEVICTABLE_LRU. [riel@redhat.com: undo the brd.c part] Signed-off-by: Lee Schermerhorn Signed-off-by: Rik van Riel Debugged-by: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/pagemap.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include/linux/pagemap.h') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 5da31c12101..09164d2c5c2 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -32,6 +32,28 @@ static inline void mapping_set_error(struct address_space *mapping, int error) } } +#ifdef CONFIG_UNEVICTABLE_LRU +#define AS_UNEVICTABLE (__GFP_BITS_SHIFT + 2) /* e.g., ramdisk, SHM_LOCK */ + +static inline void mapping_set_unevictable(struct address_space *mapping) +{ + set_bit(AS_UNEVICTABLE, &mapping->flags); +} + +static inline int mapping_unevictable(struct address_space *mapping) +{ + if (mapping && (mapping->flags & AS_UNEVICTABLE)) + return 1; + return 0; +} +#else +static inline void mapping_set_unevictable(struct address_space *mapping) { } +static inline int mapping_unevictable(struct address_space *mapping) +{ + return 0; +} +#endif + static inline gfp_t mapping_gfp_mask(struct address_space * mapping) { return (__force gfp_t)mapping->flags & __GFP_BITS_MASK; -- cgit v1.2.3-70-g09d2 From 89e004ea55abe201b29e2d6e35124101f1288ef7 Mon Sep 17 00:00:00 2001 From: Lee Schermerhorn Date: Sat, 18 Oct 2008 20:26:43 -0700 Subject: SHM_LOCKED pages are unevictable Shmem segments locked into memory via shmctl(SHM_LOCKED) should not be kept on the normal LRU, since scanning them is a waste of time and might throw off kswapd's balancing algorithms. Place them on the unevictable LRU list instead. Use the AS_UNEVICTABLE flag to mark address_space of SHM_LOCKed shared memory regions as unevictable. Then these pages will be culled off the normal LRU lists during vmscan. Add new wrapper function to clear the mapping's unevictable state when/if shared memory segment is munlocked. Add 'scan_mapping_unevictable_page()' to mm/vmscan.c to scan all pages in the shmem segment's mapping [struct address_space] for evictability now that they're no longer locked. If so, move them to the appropriate zone lru list. Changes depend on [CONFIG_]UNEVICTABLE_LRU. [kosaki.motohiro@jp.fujitsu.com: revert shm change] Signed-off-by: Lee Schermerhorn Signed-off-by: Rik van Riel Signed-off-by: Kosaki Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 4 +-- include/linux/pagemap.h | 12 +++++-- include/linux/swap.h | 4 +++ ipc/shm.c | 4 +++ mm/shmem.c | 4 +++ mm/vmscan.c | 89 +++++++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 112 insertions(+), 5 deletions(-) (limited to 'include/linux/pagemap.h') diff --git a/include/linux/mm.h b/include/linux/mm.h index c61ba10768e..40236290e2a 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -700,10 +700,10 @@ static inline int page_mapped(struct page *page) extern void show_free_areas(void); #ifdef CONFIG_SHMEM -int shmem_lock(struct file *file, int lock, struct user_struct *user); +extern int shmem_lock(struct file *file, int lock, struct user_struct *user); #else static inline int shmem_lock(struct file *file, int lock, - struct user_struct *user) + struct user_struct *user) { return 0; } diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 09164d2c5c2..4b6c4d8d26b 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -40,14 +40,20 @@ static inline void mapping_set_unevictable(struct address_space *mapping) set_bit(AS_UNEVICTABLE, &mapping->flags); } +static inline void mapping_clear_unevictable(struct address_space *mapping) +{ + clear_bit(AS_UNEVICTABLE, &mapping->flags); +} + static inline int mapping_unevictable(struct address_space *mapping) { - if (mapping && (mapping->flags & AS_UNEVICTABLE)) - return 1; - return 0; + if (likely(mapping)) + return test_bit(AS_UNEVICTABLE, &mapping->flags); + return !!mapping; } #else static inline void mapping_set_unevictable(struct address_space *mapping) { } +static inline void mapping_clear_unevictable(struct address_space *mapping) { } static inline int mapping_unevictable(struct address_space *mapping) { return 0; diff --git a/include/linux/swap.h b/include/linux/swap.h index a2113044d20..7edb4cbc29f 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -232,12 +232,16 @@ static inline int zone_reclaim(struct zone *z, gfp_t mask, unsigned int order) #ifdef CONFIG_UNEVICTABLE_LRU extern int page_evictable(struct page *page, struct vm_area_struct *vma); +extern void scan_mapping_unevictable_pages(struct address_space *); #else static inline int page_evictable(struct page *page, struct vm_area_struct *vma) { return 1; } +static inline void scan_mapping_unevictable_pages(struct address_space *mapping) +{ +} #endif extern int kswapd_run(int nid); diff --git a/ipc/shm.c b/ipc/shm.c index e77ec698cf4..0add3fa5f54 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -737,6 +737,10 @@ asmlinkage long sys_shmctl(int shmid, int cmd, struct shmid_ds __user *buf) case SHM_LOCK: case SHM_UNLOCK: { + struct file *uninitialized_var(shm_file); + + lru_add_drain_all(); /* drain pagevecs to lru lists */ + shp = shm_lock_check(ns, shmid); if (IS_ERR(shp)) { err = PTR_ERR(shp); diff --git a/mm/shmem.c b/mm/shmem.c index fc2ccf79a77..d38d7e61fcd 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1477,12 +1477,16 @@ int shmem_lock(struct file *file, int lock, struct user_struct *user) if (!user_shm_lock(inode->i_size, user)) goto out_nomem; info->flags |= VM_LOCKED; + mapping_set_unevictable(file->f_mapping); } if (!lock && (info->flags & VM_LOCKED) && user) { user_shm_unlock(inode->i_size, user); info->flags &= ~VM_LOCKED; + mapping_clear_unevictable(file->f_mapping); + scan_mapping_unevictable_pages(file->f_mapping); } retval = 0; + out_nomem: spin_unlock(&info->lock); return retval; diff --git a/mm/vmscan.c b/mm/vmscan.c index 9babfbc1ddc..dfb342e0db9 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2346,4 +2346,93 @@ int page_evictable(struct page *page, struct vm_area_struct *vma) return 1; } + +/** + * check_move_unevictable_page - check page for evictability and move to appropriate zone lru list + * @page: page to check evictability and move to appropriate lru list + * @zone: zone page is in + * + * Checks a page for evictability and moves the page to the appropriate + * zone lru list. + * + * Restrictions: zone->lru_lock must be held, page must be on LRU and must + * have PageUnevictable set. + */ +static void check_move_unevictable_page(struct page *page, struct zone *zone) +{ + VM_BUG_ON(PageActive(page)); + +retry: + ClearPageUnevictable(page); + if (page_evictable(page, NULL)) { + enum lru_list l = LRU_INACTIVE_ANON + page_is_file_cache(page); + __dec_zone_state(zone, NR_UNEVICTABLE); + list_move(&page->lru, &zone->lru[l].list); + __inc_zone_state(zone, NR_INACTIVE_ANON + l); + __count_vm_event(UNEVICTABLE_PGRESCUED); + } else { + /* + * rotate unevictable list + */ + SetPageUnevictable(page); + list_move(&page->lru, &zone->lru[LRU_UNEVICTABLE].list); + if (page_evictable(page, NULL)) + goto retry; + } +} + +/** + * scan_mapping_unevictable_pages - scan an address space for evictable pages + * @mapping: struct address_space to scan for evictable pages + * + * Scan all pages in mapping. Check unevictable pages for + * evictability and move them to the appropriate zone lru list. + */ +void scan_mapping_unevictable_pages(struct address_space *mapping) +{ + pgoff_t next = 0; + pgoff_t end = (i_size_read(mapping->host) + PAGE_CACHE_SIZE - 1) >> + PAGE_CACHE_SHIFT; + struct zone *zone; + struct pagevec pvec; + + if (mapping->nrpages == 0) + return; + + pagevec_init(&pvec, 0); + while (next < end && + pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) { + int i; + int pg_scanned = 0; + + zone = NULL; + + for (i = 0; i < pagevec_count(&pvec); i++) { + struct page *page = pvec.pages[i]; + pgoff_t page_index = page->index; + struct zone *pagezone = page_zone(page); + + pg_scanned++; + if (page_index > next) + next = page_index; + next++; + + if (pagezone != zone) { + if (zone) + spin_unlock_irq(&zone->lru_lock); + zone = pagezone; + spin_lock_irq(&zone->lru_lock); + } + + if (PageLRU(page) && PageUnevictable(page)) + check_move_unevictable_page(page, zone); + } + if (zone) + spin_unlock_irq(&zone->lru_lock); + pagevec_release(&pvec); + + count_vm_events(UNEVICTABLE_PGSCANNED, pg_scanned); + } + +} #endif -- cgit v1.2.3-70-g09d2 From f45840b5c128445da70e7ec33adc47b4a12bdaf4 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Sat, 18 Oct 2008 20:26:57 -0700 Subject: mm: pagecache insertion fewer atomics Setting and clearing the page locked when inserting it into swapcache / pagecache when it has no other references can use non-atomic page flags operations because no other CPU may be operating on it at this time. This saves one atomic operation when inserting a page into pagecache. Signed-off-by: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/pagemap.h | 14 +++++++------- mm/swap_state.c | 4 ++-- 2 files changed, 9 insertions(+), 9 deletions(-) (limited to 'include/linux/pagemap.h') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 4b6c4d8d26b..7334b2b6c4c 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -299,14 +299,14 @@ extern int __lock_page_killable(struct page *page); extern void __lock_page_nosync(struct page *page); extern void unlock_page(struct page *page); -static inline void set_page_locked(struct page *page) +static inline void __set_page_locked(struct page *page) { - set_bit(PG_locked, &page->flags); + __set_bit(PG_locked, &page->flags); } -static inline void clear_page_locked(struct page *page) +static inline void __clear_page_locked(struct page *page) { - clear_bit(PG_locked, &page->flags); + __clear_bit(PG_locked, &page->flags); } static inline int trylock_page(struct page *page) @@ -438,17 +438,17 @@ extern void __remove_from_page_cache(struct page *page); /* * Like add_to_page_cache_locked, but used to add newly allocated pages: - * the page is new, so we can just run set_page_locked() against it. + * the page is new, so we can just run __set_page_locked() against it. */ static inline int add_to_page_cache(struct page *page, struct address_space *mapping, pgoff_t offset, gfp_t gfp_mask) { int error; - set_page_locked(page); + __set_page_locked(page); error = add_to_page_cache_locked(page, mapping, offset, gfp_mask); if (unlikely(error)) - clear_page_locked(page); + __clear_page_locked(page); return error; } diff --git a/mm/swap_state.c b/mm/swap_state.c index 43cda7b4b80..3353c9029ce 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -303,7 +303,7 @@ struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask, * re-using the just freed swap entry for an existing page. * May fail (-ENOMEM) if radix-tree node allocation failed. */ - set_page_locked(new_page); + __set_page_locked(new_page); SetPageSwapBacked(new_page); err = add_to_swap_cache(new_page, entry, gfp_mask & GFP_KERNEL); if (likely(!err)) { @@ -315,7 +315,7 @@ struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask, return new_page; } ClearPageSwapBacked(new_page); - clear_page_locked(new_page); + __clear_page_locked(new_page); swap_free(entry); } while (err != -ENOMEM); -- cgit v1.2.3-70-g09d2 From 8413ac9d8c9a1366a4f57880723126cd24e5a5c3 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Sat, 18 Oct 2008 20:26:59 -0700 Subject: mm: page lock use lock bitops trylock_page, unlock_page open and close a critical section. Hence, we can use the lock bitops to get the desired memory ordering. Also, mark trylock as likely to succeed (and remove the annotation from callers). Signed-off-by: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/pagemap.h | 2 +- mm/filemap.c | 13 +++++-------- mm/swapfile.c | 2 +- 3 files changed, 7 insertions(+), 10 deletions(-) (limited to 'include/linux/pagemap.h') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 7334b2b6c4c..709742be02f 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -311,7 +311,7 @@ static inline void __clear_page_locked(struct page *page) static inline int trylock_page(struct page *page) { - return !test_and_set_bit(PG_locked, &page->flags); + return (likely(!test_and_set_bit_lock(PG_locked, &page->flags))); } /* diff --git a/mm/filemap.c b/mm/filemap.c index a1ddd2557af..e1b23fda48d 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -573,17 +573,14 @@ EXPORT_SYMBOL(wait_on_page_bit); * mechananism between PageLocked pages and PageWriteback pages is shared. * But that's OK - sleepers in wait_on_page_writeback() just go back to sleep. * - * The first mb is necessary to safely close the critical section opened by the - * test_and_set_bit() to lock the page; the second mb is necessary to enforce - * ordering between the clear_bit and the read of the waitqueue (to avoid SMP - * races with a parallel wait_on_page_locked()). + * The mb is necessary to enforce ordering between the clear_bit and the read + * of the waitqueue (to avoid SMP races with a parallel wait_on_page_locked()). */ void unlock_page(struct page *page) { - smp_mb__before_clear_bit(); - if (!test_and_clear_bit(PG_locked, &page->flags)) - BUG(); - smp_mb__after_clear_bit(); + VM_BUG_ON(!PageLocked(page)); + clear_bit_unlock(PG_locked, &page->flags); + smp_mb__after_clear_bit(); wake_up_page(page, PG_locked); } EXPORT_SYMBOL(unlock_page); diff --git a/mm/swapfile.c b/mm/swapfile.c index 2a97fafa3d8..90cb67a5417 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -422,7 +422,7 @@ void free_swap_and_cache(swp_entry_t entry) if (p) { if (swap_entry_free(p, swp_offset(entry)) == 1) { page = find_get_page(&swapper_space, entry.val); - if (page && unlikely(!trylock_page(page))) { + if (page && !trylock_page(page)) { page_cache_release(page); page = NULL; } -- cgit v1.2.3-70-g09d2