summaryrefslogtreecommitdiffstats
path: root/mm/filemap.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/filemap.c')
-rw-r--r--mm/filemap.c124
1 files changed, 87 insertions, 37 deletions
diff --git a/mm/filemap.c b/mm/filemap.c
index c641edf553a..867d40222ec 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -34,6 +34,7 @@
#include <linux/hardirq.h> /* for BUG_ON(!in_atomic()) only */
#include <linux/memcontrol.h>
#include <linux/mm_inline.h> /* for page_is_file_cache() */
+#include <linux/cleancache.h>
#include "internal.h"
/*
@@ -58,16 +59,16 @@
/*
* Lock ordering:
*
- * ->i_mmap_lock (truncate_pagecache)
+ * ->i_mmap_mutex (truncate_pagecache)
* ->private_lock (__free_pte->__set_page_dirty_buffers)
* ->swap_lock (exclusive_swap_page, others)
* ->mapping->tree_lock
*
* ->i_mutex
- * ->i_mmap_lock (truncate->unmap_mapping_range)
+ * ->i_mmap_mutex (truncate->unmap_mapping_range)
*
* ->mmap_sem
- * ->i_mmap_lock
+ * ->i_mmap_mutex
* ->page_table_lock or pte_lock (various, mainly in memory.c)
* ->mapping->tree_lock (arch-dependent flush_dcache_mmap_lock)
*
@@ -77,14 +78,11 @@
* ->i_mutex (generic_file_buffered_write)
* ->mmap_sem (fault_in_pages_readable->do_page_fault)
*
- * ->i_mutex
- * ->i_alloc_sem (various)
- *
- * inode_wb_list_lock
+ * bdi->wb.list_lock
* sb_lock (fs/fs-writeback.c)
* ->mapping->tree_lock (__sync_single_inode)
*
- * ->i_mmap_lock
+ * ->i_mmap_mutex
* ->anon_vma.lock (vma_adjust)
*
* ->anon_vma.lock
@@ -98,15 +96,15 @@
* ->zone.lru_lock (check_pte_range->isolate_lru_page)
* ->private_lock (page_remove_rmap->set_page_dirty)
* ->tree_lock (page_remove_rmap->set_page_dirty)
- * inode_wb_list_lock (page_remove_rmap->set_page_dirty)
+ * bdi.wb->list_lock (page_remove_rmap->set_page_dirty)
* ->inode->i_lock (page_remove_rmap->set_page_dirty)
- * inode_wb_list_lock (zap_pte_range->set_page_dirty)
+ * bdi.wb->list_lock (zap_pte_range->set_page_dirty)
* ->inode->i_lock (zap_pte_range->set_page_dirty)
* ->private_lock (zap_pte_range->__set_page_dirty_buffers)
*
* (code doesn't rely on that order, so you could switch it around)
* ->tasklist_lock (memory_failure, collect_procs_ao)
- * ->i_mmap_lock
+ * ->i_mmap_mutex
*/
/*
@@ -118,8 +116,19 @@ void __delete_from_page_cache(struct page *page)
{
struct address_space *mapping = page->mapping;
+ /*
+ * if we're uptodate, flush out into the cleancache, otherwise
+ * invalidate any existing cleancache entries. We can't leave
+ * stale data around in the cleancache once our page is gone
+ */
+ if (PageUptodate(page) && PageMappedToDisk(page))
+ cleancache_put_page(page);
+ else
+ cleancache_flush_page(mapping, page);
+
radix_tree_delete(&mapping->page_tree, page->index);
page->mapping = NULL;
+ /* Leave page->index set: truncation lookup relies upon it */
mapping->nrpages--;
__dec_zone_page_state(page, NR_FILE_PAGES);
if (PageSwapBacked(page))
@@ -475,6 +484,7 @@ int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
spin_unlock_irq(&mapping->tree_lock);
} else {
page->mapping = NULL;
+ /* Leave page->index set: truncation relies upon it */
spin_unlock_irq(&mapping->tree_lock);
mem_cgroup_uncharge_cache_page(page);
page_cache_release(page);
@@ -562,6 +572,17 @@ void wait_on_page_bit(struct page *page, int bit_nr)
}
EXPORT_SYMBOL(wait_on_page_bit);
+int wait_on_page_bit_killable(struct page *page, int bit_nr)
+{
+ DEFINE_WAIT_BIT(wait, &page->flags, bit_nr);
+
+ if (!test_bit(bit_nr, &page->flags))
+ return 0;
+
+ return __wait_on_bit(page_waitqueue(page), &wait,
+ sleep_on_page_killable, TASK_KILLABLE);
+}
+
/**
* add_page_wait_queue - Add an arbitrary waiter to a page's wait queue
* @page: Page defining the wait queue of interest
@@ -643,15 +664,32 @@ EXPORT_SYMBOL_GPL(__lock_page_killable);
int __lock_page_or_retry(struct page *page, struct mm_struct *mm,
unsigned int flags)
{
- if (!(flags & FAULT_FLAG_ALLOW_RETRY)) {
- __lock_page(page);
- return 1;
- } else {
- if (!(flags & FAULT_FLAG_RETRY_NOWAIT)) {
- up_read(&mm->mmap_sem);
+ if (flags & FAULT_FLAG_ALLOW_RETRY) {
+ /*
+ * CAUTION! In this case, mmap_sem is not released
+ * even though return 0.
+ */
+ if (flags & FAULT_FLAG_RETRY_NOWAIT)
+ return 0;
+
+ up_read(&mm->mmap_sem);
+ if (flags & FAULT_FLAG_KILLABLE)
+ wait_on_page_locked_killable(page);
+ else
wait_on_page_locked(page);
- }
return 0;
+ } else {
+ if (flags & FAULT_FLAG_KILLABLE) {
+ int ret;
+
+ ret = __lock_page_killable(page);
+ if (ret) {
+ up_read(&mm->mmap_sem);
+ return 0;
+ }
+ } else
+ __lock_page(page);
+ return 1;
}
}
@@ -1528,15 +1566,17 @@ static void do_sync_mmap_readahead(struct vm_area_struct *vma,
/* If we don't want any read-ahead, don't bother */
if (VM_RandomReadHint(vma))
return;
+ if (!ra->ra_pages)
+ return;
- if (VM_SequentialReadHint(vma) ||
- offset - 1 == (ra->prev_pos >> PAGE_CACHE_SHIFT)) {
+ if (VM_SequentialReadHint(vma)) {
page_cache_sync_readahead(mapping, ra, file, offset,
ra->ra_pages);
return;
}
- if (ra->mmap_miss < INT_MAX)
+ /* Avoid banging the cache line if not needed */
+ if (ra->mmap_miss < MMAP_LOTSAMISS * 10)
ra->mmap_miss++;
/*
@@ -1550,12 +1590,10 @@ static void do_sync_mmap_readahead(struct vm_area_struct *vma,
* mmap read-around
*/
ra_pages = max_sane_readahead(ra->ra_pages);
- if (ra_pages) {
- ra->start = max_t(long, 0, offset - ra_pages/2);
- ra->size = ra_pages;
- ra->async_size = 0;
- ra_submit(ra, mapping, file);
- }
+ ra->start = max_t(long, 0, offset - ra_pages / 2);
+ ra->size = ra_pages;
+ ra->async_size = ra_pages / 4;
+ ra_submit(ra, mapping, file);
}
/*
@@ -1622,6 +1660,7 @@ int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
/* No page in the page cache at all */
do_sync_mmap_readahead(vma, ra, file, offset);
count_vm_event(PGMAJFAULT);
+ mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT);
ret = VM_FAULT_MAJOR;
retry_find:
page = find_get_page(mapping, offset);
@@ -1660,7 +1699,6 @@ retry_find:
return VM_FAULT_SIGBUS;
}
- ra->prev_pos = (loff_t)offset << PAGE_CACHE_SHIFT;
vmf->page = page;
return ret | VM_FAULT_LOCKED;
@@ -1756,7 +1794,7 @@ EXPORT_SYMBOL(generic_file_readonly_mmap);
static struct page *__read_cache_page(struct address_space *mapping,
pgoff_t index,
- int (*filler)(void *,struct page*),
+ int (*filler)(void *, struct page *),
void *data,
gfp_t gfp)
{
@@ -1787,7 +1825,7 @@ repeat:
static struct page *do_read_cache_page(struct address_space *mapping,
pgoff_t index,
- int (*filler)(void *,struct page*),
+ int (*filler)(void *, struct page *),
void *data,
gfp_t gfp)
@@ -1827,7 +1865,7 @@ out:
* @mapping: the page's address_space
* @index: the page index
* @filler: function to perform the read
- * @data: destination for read data
+ * @data: first arg to filler(data, page) function, often left as NULL
*
* Same as read_cache_page, but don't wait for page to become unlocked
* after submitting it to the filler.
@@ -1839,7 +1877,7 @@ out:
*/
struct page *read_cache_page_async(struct address_space *mapping,
pgoff_t index,
- int (*filler)(void *,struct page*),
+ int (*filler)(void *, struct page *),
void *data)
{
return do_read_cache_page(mapping, index, filler, data, mapping_gfp_mask(mapping));
@@ -1887,7 +1925,7 @@ EXPORT_SYMBOL(read_cache_page_gfp);
* @mapping: the page's address_space
* @index: the page index
* @filler: function to perform the read
- * @data: destination for read data
+ * @data: first arg to filler(data, page) function, often left as NULL
*
* Read into the page cache. If a page already exists, and PageUptodate() is
* not set, try to fill the page then wait for it to become unlocked.
@@ -1896,7 +1934,7 @@ EXPORT_SYMBOL(read_cache_page_gfp);
*/
struct page *read_cache_page(struct address_space *mapping,
pgoff_t index,
- int (*filler)(void *,struct page*),
+ int (*filler)(void *, struct page *),
void *data)
{
return wait_on_page_read(read_cache_page_async(mapping, index, filler, data));
@@ -1943,16 +1981,26 @@ static int __remove_suid(struct dentry *dentry, int kill)
int file_remove_suid(struct file *file)
{
struct dentry *dentry = file->f_path.dentry;
- int killsuid = should_remove_suid(dentry);
- int killpriv = security_inode_need_killpriv(dentry);
+ struct inode *inode = dentry->d_inode;
+ int killsuid;
+ int killpriv;
int error = 0;
+ /* Fast path for nothing security related */
+ if (IS_NOSEC(inode))
+ return 0;
+
+ killsuid = should_remove_suid(dentry);
+ killpriv = security_inode_need_killpriv(dentry);
+
if (killpriv < 0)
return killpriv;
if (killpriv)
error = security_inode_killpriv(dentry);
if (!error && killsuid)
error = __remove_suid(dentry, killsuid);
+ if (!error && (inode->i_sb->s_flags & MS_NOSEC))
+ inode->i_flags |= S_NOSEC;
return error;
}
@@ -2288,7 +2336,7 @@ struct page *grab_cache_page_write_begin(struct address_space *mapping,
repeat:
page = find_lock_page(mapping, index);
if (page)
- return page;
+ goto found;
page = __page_cache_alloc(mapping_gfp_mask(mapping) & ~gfp_notmask);
if (!page)
@@ -2301,6 +2349,8 @@ repeat:
goto repeat;
return NULL;
}
+found:
+ wait_on_page_writeback(page);
return page;
}
EXPORT_SYMBOL(grab_cache_page_write_begin);