summaryrefslogtreecommitdiffstats
path: root/fs/btrfs/extent_io.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/extent_io.c')
-rw-r--r--fs/btrfs/extent_io.c175
1 files changed, 79 insertions, 96 deletions
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 472873a94d9..f173c5af646 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -4,7 +4,6 @@
#include <linux/mm.h>
#include <linux/pagemap.h>
#include <linux/page-flags.h>
-#include <linux/module.h>
#include <linux/spinlock.h>
#include <linux/blkdev.h>
#include <linux/swap.h>
@@ -341,12 +340,10 @@ static int insert_state(struct extent_io_tree *tree,
{
struct rb_node *node;
- if (end < start) {
- printk(KERN_ERR "btrfs end < start %llu %llu\n",
+ if (end < start)
+ WARN(1, KERN_ERR "btrfs end < start %llu %llu\n",
(unsigned long long)end,
(unsigned long long)start);
- WARN_ON(1);
- }
state->start = start;
state->end = end;
@@ -1836,7 +1833,7 @@ int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
*/
static void check_page_uptodate(struct extent_io_tree *tree, struct page *page)
{
- u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
+ u64 start = page_offset(page);
u64 end = start + PAGE_CACHE_SIZE - 1;
if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1, NULL))
SetPageUptodate(page);
@@ -1848,7 +1845,7 @@ static void check_page_uptodate(struct extent_io_tree *tree, struct page *page)
*/
static void check_page_locked(struct extent_io_tree *tree, struct page *page)
{
- u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
+ u64 start = page_offset(page);
u64 end = start + PAGE_CACHE_SIZE - 1;
if (!test_range_bit(tree, start, end, EXTENT_LOCKED, 0, NULL))
unlock_page(page);
@@ -1897,13 +1894,11 @@ static int free_io_failure(struct inode *inode, struct io_failure_record *rec,
if (ret)
err = ret;
- if (did_repair) {
- ret = clear_extent_bits(&BTRFS_I(inode)->io_tree, rec->start,
- rec->start + rec->len - 1,
- EXTENT_DAMAGED, GFP_NOFS);
- if (ret && !err)
- err = ret;
- }
+ ret = clear_extent_bits(&BTRFS_I(inode)->io_tree, rec->start,
+ rec->start + rec->len - 1,
+ EXTENT_DAMAGED, GFP_NOFS);
+ if (ret && !err)
+ err = ret;
kfree(rec);
return err;
@@ -1919,12 +1914,12 @@ static void repair_io_failure_callback(struct bio *bio, int err)
* the standard behavior is to write all copies in a raid setup. here we only
* want to write the one bad copy. so we do the mapping for ourselves and issue
* submit_bio directly.
- * to avoid any synchonization issues, wait for the data after writing, which
+ * to avoid any synchronization issues, wait for the data after writing, which
* actually prevents the read that triggered the error from finishing.
* currently, there can be no more than two copies of every data bit. thus,
* exactly one rewrite is required.
*/
-int repair_io_failure(struct btrfs_mapping_tree *map_tree, u64 start,
+int repair_io_failure(struct btrfs_fs_info *fs_info, u64 start,
u64 length, u64 logical, struct page *page,
int mirror_num)
{
@@ -1934,10 +1929,15 @@ int repair_io_failure(struct btrfs_mapping_tree *map_tree, u64 start,
u64 map_length = 0;
u64 sector;
struct btrfs_bio *bbio = NULL;
+ struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
int ret;
BUG_ON(!mirror_num);
+ /* we can't repair anything in raid56 yet */
+ if (btrfs_is_parity_mirror(map_tree, logical, length, mirror_num))
+ return 0;
+
bio = bio_alloc(GFP_NOFS, 1);
if (!bio)
return -EIO;
@@ -1946,7 +1946,7 @@ int repair_io_failure(struct btrfs_mapping_tree *map_tree, u64 start,
bio->bi_size = 0;
map_length = length;
- ret = btrfs_map_block(map_tree, WRITE, logical,
+ ret = btrfs_map_block(fs_info, WRITE, logical,
&map_length, &bbio, mirror_num);
if (ret) {
bio_put(bio);
@@ -1962,7 +1962,7 @@ int repair_io_failure(struct btrfs_mapping_tree *map_tree, u64 start,
return -EIO;
}
bio->bi_bdev = dev->bdev;
- bio_add_page(bio, page, length, start-page_offset(page));
+ bio_add_page(bio, page, length, start - page_offset(page));
btrfsic_submit_bio(WRITE_SYNC, bio);
wait_for_completion(&compl);
@@ -1984,14 +1984,13 @@ int repair_io_failure(struct btrfs_mapping_tree *map_tree, u64 start,
int repair_eb_io_failure(struct btrfs_root *root, struct extent_buffer *eb,
int mirror_num)
{
- struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree;
u64 start = eb->start;
unsigned long i, num_pages = num_extent_pages(eb->start, eb->len);
int ret = 0;
for (i = 0; i < num_pages; i++) {
struct page *p = extent_buffer_page(eb, i);
- ret = repair_io_failure(map_tree, start, PAGE_CACHE_SIZE,
+ ret = repair_io_failure(root->fs_info, start, PAGE_CACHE_SIZE,
start, p, mirror_num);
if (ret)
break;
@@ -2010,7 +2009,7 @@ static int clean_io_failure(u64 start, struct page *page)
u64 private;
u64 private_failure;
struct io_failure_record *failrec;
- struct btrfs_mapping_tree *map_tree;
+ struct btrfs_fs_info *fs_info;
struct extent_state *state;
int num_copies;
int did_repair = 0;
@@ -2046,15 +2045,16 @@ static int clean_io_failure(u64 start, struct page *page)
spin_unlock(&BTRFS_I(inode)->io_tree.lock);
if (state && state->start == failrec->start) {
- map_tree = &BTRFS_I(inode)->root->fs_info->mapping_tree;
- num_copies = btrfs_num_copies(map_tree, failrec->logical,
- failrec->len);
+ fs_info = BTRFS_I(inode)->root->fs_info;
+ num_copies = btrfs_num_copies(fs_info, failrec->logical,
+ failrec->len);
if (num_copies > 1) {
- ret = repair_io_failure(map_tree, start, failrec->len,
+ ret = repair_io_failure(fs_info, start, failrec->len,
failrec->logical, page,
failrec->failed_mirror);
did_repair = !ret;
}
+ ret = 0;
}
out:
@@ -2159,9 +2159,8 @@ static int bio_readpage_error(struct bio *failed_bio, struct page *page,
* clean_io_failure() clean all those errors at once.
*/
}
- num_copies = btrfs_num_copies(
- &BTRFS_I(inode)->root->fs_info->mapping_tree,
- failrec->logical, failrec->len);
+ num_copies = btrfs_num_copies(BTRFS_I(inode)->root->fs_info,
+ failrec->logical, failrec->len);
if (num_copies == 1) {
/*
* we only have a single copy of the data, so don't bother with
@@ -2297,8 +2296,7 @@ static void end_bio_extent_writepage(struct bio *bio, int err)
struct page *page = bvec->bv_page;
tree = &BTRFS_I(page->mapping->host)->io_tree;
- start = ((u64)page->index << PAGE_CACHE_SHIFT) +
- bvec->bv_offset;
+ start = page_offset(page) + bvec->bv_offset;
end = start + bvec->bv_len - 1;
if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE)
@@ -2357,8 +2355,7 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
(long int)bio->bi_bdev);
tree = &BTRFS_I(page->mapping->host)->io_tree;
- start = ((u64)page->index << PAGE_CACHE_SHIFT) +
- bvec->bv_offset;
+ start = page_offset(page) + bvec->bv_offset;
end = start + bvec->bv_len - 1;
if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE)
@@ -2466,10 +2463,6 @@ btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
return bio;
}
-/*
- * Since writes are async, they will only return -ENOMEM.
- * Reads can return the full range of I/O error conditions.
- */
static int __must_check submit_one_bio(int rw, struct bio *bio,
int mirror_num, unsigned long bio_flags)
{
@@ -2479,7 +2472,7 @@ static int __must_check submit_one_bio(int rw, struct bio *bio,
struct extent_io_tree *tree = bio->bi_private;
u64 start;
- start = ((u64)page->index << PAGE_CACHE_SHIFT) + bvec->bv_offset;
+ start = page_offset(page) + bvec->bv_offset;
bio->bi_private = NULL;
@@ -2497,13 +2490,13 @@ static int __must_check submit_one_bio(int rw, struct bio *bio,
return ret;
}
-static int merge_bio(struct extent_io_tree *tree, struct page *page,
+static int merge_bio(int rw, struct extent_io_tree *tree, struct page *page,
unsigned long offset, size_t size, struct bio *bio,
unsigned long bio_flags)
{
int ret = 0;
if (tree->ops && tree->ops->merge_bio_hook)
- ret = tree->ops->merge_bio_hook(page, offset, size, bio,
+ ret = tree->ops->merge_bio_hook(rw, page, offset, size, bio,
bio_flags);
BUG_ON(ret < 0);
return ret;
@@ -2538,7 +2531,7 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree,
sector;
if (prev_bio_flags != bio_flags || !contig ||
- merge_bio(tree, page, offset, page_size, bio, bio_flags) ||
+ merge_bio(rw, tree, page, offset, page_size, bio, bio_flags) ||
bio_add_page(bio, page, page_size, offset) < page_size) {
ret = submit_one_bio(rw, bio, mirror_num,
prev_bio_flags);
@@ -2603,7 +2596,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
unsigned long *bio_flags)
{
struct inode *inode = page->mapping->host;
- u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
+ u64 start = page_offset(page);
u64 page_end = start + PAGE_CACHE_SIZE - 1;
u64 end;
u64 cur = start;
@@ -2656,6 +2649,8 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
}
}
while (cur <= end) {
+ unsigned long pnr = (last_byte >> PAGE_CACHE_SHIFT) + 1;
+
if (cur >= last_byte) {
char *userpage;
struct extent_state *cached = NULL;
@@ -2690,7 +2685,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
iosize = min(extent_map_end(em) - cur, end - cur + 1);
cur_end = min(extent_map_end(em) - 1, end);
- iosize = (iosize + blocksize - 1) & ~((u64)blocksize - 1);
+ iosize = ALIGN(iosize, blocksize);
if (this_bio_flag & EXTENT_BIO_COMPRESSED) {
disk_io_size = em->block_len;
sector = em->block_start >> 9;
@@ -2743,26 +2738,17 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
continue;
}
- ret = 0;
- if (tree->ops && tree->ops->readpage_io_hook) {
- ret = tree->ops->readpage_io_hook(page, cur,
- cur + iosize - 1);
- }
- if (!ret) {
- unsigned long pnr = (last_byte >> PAGE_CACHE_SHIFT) + 1;
- pnr -= page->index;
- ret = submit_extent_page(READ, tree, page,
+ pnr -= page->index;
+ ret = submit_extent_page(READ, tree, page,
sector, disk_io_size, pg_offset,
bdev, bio, pnr,
end_bio_extent_readpage, mirror_num,
*bio_flags,
this_bio_flag);
- if (!ret) {
- nr++;
- *bio_flags = this_bio_flag;
- }
- }
- if (ret) {
+ if (!ret) {
+ nr++;
+ *bio_flags = this_bio_flag;
+ } else {
SetPageError(page);
unlock_extent(tree, cur, cur + iosize - 1);
}
@@ -2814,7 +2800,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
struct inode *inode = page->mapping->host;
struct extent_page_data *epd = data;
struct extent_io_tree *tree = epd->tree;
- u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
+ u64 start = page_offset(page);
u64 delalloc_start;
u64 page_end = start + PAGE_CACHE_SIZE - 1;
u64 end;
@@ -2990,7 +2976,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
BUG_ON(extent_map_end(em) <= cur);
BUG_ON(end < cur);
iosize = min(extent_map_end(em) - cur, end - cur + 1);
- iosize = (iosize + blocksize - 1) & ~((u64)blocksize - 1);
+ iosize = ALIGN(iosize, blocksize);
sector = (em->block_start + extent_offset) >> 9;
bdev = em->bdev;
block_start = em->block_start;
@@ -3132,12 +3118,9 @@ static int lock_extent_buffer_for_io(struct extent_buffer *eb,
set_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags);
spin_unlock(&eb->refs_lock);
btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN);
- spin_lock(&fs_info->delalloc_lock);
- if (fs_info->dirty_metadata_bytes >= eb->len)
- fs_info->dirty_metadata_bytes -= eb->len;
- else
- WARN_ON(1);
- spin_unlock(&fs_info->delalloc_lock);
+ __percpu_counter_add(&fs_info->dirty_metadata_bytes,
+ -eb->len,
+ fs_info->dirty_metadata_batch);
ret = 1;
} else {
spin_unlock(&eb->refs_lock);
@@ -3454,15 +3437,9 @@ retry:
* swizzled back from swapper_space to tmpfs file
* mapping
*/
- if (tree->ops &&
- tree->ops->write_cache_pages_lock_hook) {
- tree->ops->write_cache_pages_lock_hook(page,
- data, flush_fn);
- } else {
- if (!trylock_page(page)) {
- flush_fn(data);
- lock_page(page);
- }
+ if (!trylock_page(page)) {
+ flush_fn(data);
+ lock_page(page);
}
if (unlikely(page->mapping != mapping)) {
@@ -3682,11 +3659,11 @@ int extent_invalidatepage(struct extent_io_tree *tree,
struct page *page, unsigned long offset)
{
struct extent_state *cached_state = NULL;
- u64 start = ((u64)page->index << PAGE_CACHE_SHIFT);
+ u64 start = page_offset(page);
u64 end = start + PAGE_CACHE_SIZE - 1;
size_t blocksize = page->mapping->host->i_sb->s_blocksize;
- start += (offset + blocksize - 1) & ~(blocksize - 1);
+ start += ALIGN(offset, blocksize);
if (start > end)
return 0;
@@ -3708,7 +3685,7 @@ int try_release_extent_state(struct extent_map_tree *map,
struct extent_io_tree *tree, struct page *page,
gfp_t mask)
{
- u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
+ u64 start = page_offset(page);
u64 end = start + PAGE_CACHE_SIZE - 1;
int ret = 1;
@@ -3747,7 +3724,7 @@ int try_release_extent_mapping(struct extent_map_tree *map,
gfp_t mask)
{
struct extent_map *em;
- u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
+ u64 start = page_offset(page);
u64 end = start + PAGE_CACHE_SIZE - 1;
if ((mask & __GFP_WAIT) &&
@@ -3805,7 +3782,7 @@ static struct extent_map *get_extent_skip_holes(struct inode *inode,
len = last - offset;
if (len == 0)
break;
- len = (len + sectorsize - 1) & ~(sectorsize - 1);
+ len = ALIGN(len, sectorsize);
em = get_extent(inode, NULL, 0, offset, len, 0);
if (IS_ERR_OR_NULL(em))
return em;
@@ -4003,8 +3980,6 @@ static void __free_extent_buffer(struct extent_buffer *eb)
list_del(&eb->leak_list);
spin_unlock_irqrestore(&leak_lock, flags);
#endif
- if (eb->pages && eb->pages != eb->inline_pages)
- kfree(eb->pages);
kmem_cache_free(extent_buffer_cache, eb);
}
@@ -4045,19 +4020,12 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
atomic_set(&eb->refs, 1);
atomic_set(&eb->io_pages, 0);
- if (len > MAX_INLINE_EXTENT_BUFFER_SIZE) {
- struct page **pages;
- int num_pages = (len + PAGE_CACHE_SIZE - 1) >>
- PAGE_CACHE_SHIFT;
- pages = kzalloc(num_pages, mask);
- if (!pages) {
- __free_extent_buffer(eb);
- return NULL;
- }
- eb->pages = pages;
- } else {
- eb->pages = eb->inline_pages;
- }
+ /*
+ * Sanity checks, currently the maximum is 64k covered by 16x 4k pages
+ */
+ BUILD_BUG_ON(BTRFS_MAX_METADATA_BLOCKSIZE
+ > MAX_INLINE_EXTENT_BUFFER_SIZE);
+ BUG_ON(len > MAX_INLINE_EXTENT_BUFFER_SIZE);
return eb;
}
@@ -4188,6 +4156,7 @@ static inline void btrfs_release_extent_buffer(struct extent_buffer *eb)
static void check_buffer_tree_ref(struct extent_buffer *eb)
{
+ int refs;
/* the ref bit is tricky. We have to make sure it is set
* if we have the buffer dirty. Otherwise the
* code to free a buffer can end up dropping a dirty
@@ -4208,6 +4177,10 @@ static void check_buffer_tree_ref(struct extent_buffer *eb)
* So bump the ref count first, then set the bit. If someone
* beat us to it, drop the ref we added.
*/
+ refs = atomic_read(&eb->refs);
+ if (refs >= 2 && test_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
+ return;
+
spin_lock(&eb->refs_lock);
if (!test_and_set_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
atomic_inc(&eb->refs);
@@ -4409,9 +4382,20 @@ static int release_extent_buffer(struct extent_buffer *eb, gfp_t mask)
void free_extent_buffer(struct extent_buffer *eb)
{
+ int refs;
+ int old;
if (!eb)
return;
+ while (1) {
+ refs = atomic_read(&eb->refs);
+ if (refs <= 3)
+ break;
+ old = atomic_cmpxchg(&eb->refs, refs, refs - 1);
+ if (old == refs)
+ return;
+ }
+
spin_lock(&eb->refs_lock);
if (atomic_read(&eb->refs) == 2 &&
test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags))
@@ -4721,10 +4705,9 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start,
}
if (start + min_len > eb->len) {
- printk(KERN_ERR "btrfs bad mapping eb start %llu len %lu, "
+ WARN(1, KERN_ERR "btrfs bad mapping eb start %llu len %lu, "
"wanted %lu %lu\n", (unsigned long long)eb->start,
eb->len, start, min_len);
- WARN_ON(1);
return -EINVAL;
}