summaryrefslogtreecommitdiffstats
path: root/fs/buffer.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/buffer.c')
-rw-r--r--fs/buffer.c110
1 files changed, 86 insertions, 24 deletions
diff --git a/fs/buffer.c b/fs/buffer.c
index aa68206bd51..0e5ec371ce7 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -356,7 +356,7 @@ static void free_more_memory(void)
for_each_online_pgdat(pgdat) {
zones = pgdat->node_zonelists[gfp_zone(GFP_NOFS)].zones;
if (*zones)
- try_to_free_pages(zones, GFP_NOFS);
+ try_to_free_pages(zones, 0, GFP_NOFS);
}
}
@@ -676,6 +676,39 @@ void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode)
EXPORT_SYMBOL(mark_buffer_dirty_inode);
/*
+ * Mark the page dirty, and set it dirty in the radix tree, and mark the inode
+ * dirty.
+ *
+ * If warn is true, then emit a warning if the page is not uptodate and has
+ * not been truncated.
+ */
+static int __set_page_dirty(struct page *page,
+ struct address_space *mapping, int warn)
+{
+ if (unlikely(!mapping))
+ return !TestSetPageDirty(page);
+
+ if (TestSetPageDirty(page))
+ return 0;
+
+ write_lock_irq(&mapping->tree_lock);
+ if (page->mapping) { /* Race with truncate? */
+ WARN_ON_ONCE(warn && !PageUptodate(page));
+
+ if (mapping_cap_account_dirty(mapping)) {
+ __inc_zone_page_state(page, NR_FILE_DIRTY);
+ task_io_account_write(PAGE_CACHE_SIZE);
+ }
+ radix_tree_tag_set(&mapping->page_tree,
+ page_index(page), PAGECACHE_TAG_DIRTY);
+ }
+ write_unlock_irq(&mapping->tree_lock);
+ __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
+
+ return 1;
+}
+
+/*
* Add a page to the dirty page list.
*
* It is a sad fact of life that this function is called from several places
@@ -702,7 +735,7 @@ EXPORT_SYMBOL(mark_buffer_dirty_inode);
*/
int __set_page_dirty_buffers(struct page *page)
{
- struct address_space * const mapping = page_mapping(page);
+ struct address_space *mapping = page_mapping(page);
if (unlikely(!mapping))
return !TestSetPageDirty(page);
@@ -719,21 +752,7 @@ int __set_page_dirty_buffers(struct page *page)
}
spin_unlock(&mapping->private_lock);
- if (TestSetPageDirty(page))
- return 0;
-
- write_lock_irq(&mapping->tree_lock);
- if (page->mapping) { /* Race with truncate? */
- if (mapping_cap_account_dirty(mapping)) {
- __inc_zone_page_state(page, NR_FILE_DIRTY);
- task_io_account_write(PAGE_CACHE_SIZE);
- }
- radix_tree_tag_set(&mapping->page_tree,
- page_index(page), PAGECACHE_TAG_DIRTY);
- }
- write_unlock_irq(&mapping->tree_lock);
- __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
- return 1;
+ return __set_page_dirty(page, mapping, 1);
}
EXPORT_SYMBOL(__set_page_dirty_buffers);
@@ -982,7 +1001,7 @@ grow_dev_page(struct block_device *bdev, sector_t block,
struct buffer_head *bh;
page = find_or_create_page(inode->i_mapping, index,
- mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS);
+ (mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS)|__GFP_MOVABLE);
if (!page)
return NULL;
@@ -1026,11 +1045,6 @@ failed:
/*
* Create buffers for the specified block device block's page. If
* that page was dirty, the buffers are set dirty also.
- *
- * Except that's a bug. Attaching dirty buffers to a dirty
- * blockdev's page can result in filesystem corruption, because
- * some of those buffers may be aliases of filesystem data.
- * grow_dev_page() will go BUG() if this happens.
*/
static int
grow_buffers(struct block_device *bdev, sector_t block, int size)
@@ -1137,8 +1151,9 @@ __getblk_slow(struct block_device *bdev, sector_t block, int size)
*/
void fastcall mark_buffer_dirty(struct buffer_head *bh)
{
+ WARN_ON_ONCE(!buffer_uptodate(bh));
if (!buffer_dirty(bh) && !test_set_buffer_dirty(bh))
- __set_page_dirty_nobuffers(bh->b_page);
+ __set_page_dirty(bh->b_page, page_mapping(bh->b_page), 0);
}
/*
@@ -2179,6 +2194,52 @@ int generic_commit_write(struct file *file, struct page *page,
return 0;
}
+/*
+ * block_page_mkwrite() is not allowed to change the file size as it gets
+ * called from a page fault handler when a page is first dirtied. Hence we must
+ * be careful to check for EOF conditions here. We set the page up correctly
+ * for a written page which means we get ENOSPC checking when writing into
+ * holes and correct delalloc and unwritten extent mapping on filesystems that
+ * support these features.
+ *
+ * We are not allowed to take the i_mutex here so we have to play games to
+ * protect against truncate races as the page could now be beyond EOF. Because
+ * vmtruncate() writes the inode size before removing pages, once we have the
+ * page lock we can determine safely if the page is beyond EOF. If it is not
+ * beyond EOF, then the page is guaranteed safe against truncation until we
+ * unlock the page.
+ */
+int
+block_page_mkwrite(struct vm_area_struct *vma, struct page *page,
+ get_block_t get_block)
+{
+ struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
+ unsigned long end;
+ loff_t size;
+ int ret = -EINVAL;
+
+ lock_page(page);
+ size = i_size_read(inode);
+ if ((page->mapping != inode->i_mapping) ||
+ (page_offset(page) > size)) {
+ /* page got truncated out from underneath us */
+ goto out_unlock;
+ }
+
+ /* page is wholly or partially inside EOF */
+ if (((page->index + 1) << PAGE_CACHE_SHIFT) > size)
+ end = size & ~PAGE_CACHE_MASK;
+ else
+ end = PAGE_CACHE_SIZE;
+
+ ret = block_prepare_write(page, 0, end, get_block);
+ if (!ret)
+ ret = block_commit_write(page, 0, end);
+
+out_unlock:
+ unlock_page(page);
+ return ret;
+}
/*
* nobh_prepare_write()'s prereads are special: the buffer_heads are freed
@@ -2962,6 +3023,7 @@ EXPORT_SYMBOL(__brelse);
EXPORT_SYMBOL(__wait_on_buffer);
EXPORT_SYMBOL(block_commit_write);
EXPORT_SYMBOL(block_prepare_write);
+EXPORT_SYMBOL(block_page_mkwrite);
EXPORT_SYMBOL(block_read_full_page);
EXPORT_SYMBOL(block_sync_page);
EXPORT_SYMBOL(block_truncate_page);