summaryrefslogtreecommitdiffstats
path: root/fs/btrfs/extent_io.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/extent_io.c')
-rw-r--r--fs/btrfs/extent_io.c185
1 files changed, 116 insertions, 69 deletions
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 32d67a822e9..583d98bd065 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -23,6 +23,7 @@
static struct kmem_cache *extent_state_cache;
static struct kmem_cache *extent_buffer_cache;
+static struct bio_set *btrfs_bioset;
#ifdef CONFIG_BTRFS_DEBUG
static LIST_HEAD(buffers);
@@ -76,10 +77,29 @@ void btrfs_leak_debug_check(void)
kmem_cache_free(extent_buffer_cache, eb);
}
}
+
+#define btrfs_debug_check_extent_io_range(inode, start, end) \
+ __btrfs_debug_check_extent_io_range(__func__, (inode), (start), (end))
+static inline void __btrfs_debug_check_extent_io_range(const char *caller,
+ struct inode *inode, u64 start, u64 end)
+{
+ u64 isize = i_size_read(inode);
+
+ if (end >= PAGE_SIZE && (end % 2) == 0 && end != isize - 1) {
+ printk_ratelimited(KERN_DEBUG
+ "btrfs: %s: ino %llu isize %llu odd range [%llu,%llu]\n",
+ caller,
+ (unsigned long long)btrfs_ino(inode),
+ (unsigned long long)isize,
+ (unsigned long long)start,
+ (unsigned long long)end);
+ }
+}
#else
#define btrfs_leak_debug_add(new, head) do {} while (0)
#define btrfs_leak_debug_del(entry) do {} while (0)
#define btrfs_leak_debug_check() do {} while (0)
+#define btrfs_debug_check_extent_io_range(c, s, e) do {} while (0)
#endif
#define BUFFER_LRU_MAX 64
@@ -125,10 +145,20 @@ int __init extent_io_init(void)
SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
if (!extent_buffer_cache)
goto free_state_cache;
+
+ btrfs_bioset = bioset_create(BIO_POOL_SIZE,
+ offsetof(struct btrfs_io_bio, bio));
+ if (!btrfs_bioset)
+ goto free_buffer_cache;
return 0;
+free_buffer_cache:
+ kmem_cache_destroy(extent_buffer_cache);
+ extent_buffer_cache = NULL;
+
free_state_cache:
kmem_cache_destroy(extent_state_cache);
+ extent_state_cache = NULL;
return -ENOMEM;
}
@@ -145,6 +175,8 @@ void extent_io_exit(void)
kmem_cache_destroy(extent_state_cache);
if (extent_buffer_cache)
kmem_cache_destroy(extent_buffer_cache);
+ if (btrfs_bioset)
+ bioset_free(btrfs_bioset);
}
void extent_io_tree_init(struct extent_io_tree *tree,
@@ -509,6 +541,11 @@ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
int err;
int clear = 0;
+ btrfs_debug_check_extent_io_range(tree->mapping->host, start, end);
+
+ if (bits & EXTENT_DELALLOC)
+ bits |= EXTENT_NORESERVE;
+
if (delete)
bits |= ~EXTENT_CTLBITS;
bits |= EXTENT_FIRST_DELALLOC;
@@ -664,6 +701,8 @@ static void wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
struct extent_state *state;
struct rb_node *node;
+ btrfs_debug_check_extent_io_range(tree->mapping->host, start, end);
+
spin_lock(&tree->lock);
again:
while (1) {
@@ -756,6 +795,8 @@ __set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
u64 last_start;
u64 last_end;
+ btrfs_debug_check_extent_io_range(tree->mapping->host, start, end);
+
bits |= EXTENT_FIRST_DELALLOC;
again:
if (!prealloc && (mask & __GFP_WAIT)) {
@@ -976,6 +1017,8 @@ int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
u64 last_start;
u64 last_end;
+ btrfs_debug_check_extent_io_range(tree->mapping->host, start, end);
+
again:
if (!prealloc && (mask & __GFP_WAIT)) {
prealloc = alloc_extent_state(mask);
@@ -1948,28 +1991,6 @@ static void check_page_uptodate(struct extent_io_tree *tree, struct page *page)
}
/*
- * helper function to unlock a page if all the extents in the tree
- * for that page are unlocked
- */
-static void check_page_locked(struct extent_io_tree *tree, struct page *page)
-{
- u64 start = page_offset(page);
- u64 end = start + PAGE_CACHE_SIZE - 1;
- if (!test_range_bit(tree, start, end, EXTENT_LOCKED, 0, NULL))
- unlock_page(page);
-}
-
-/*
- * helper function to end page writeback if all the extents
- * in the tree for that page are done with writeback
- */
-static void check_page_writeback(struct extent_io_tree *tree,
- struct page *page)
-{
- end_page_writeback(page);
-}
-
-/*
* When IO fails, either with EIO or csum verification fails, we
* try other mirrors that might have a good copy of the data. This
* io_failure_record is used to record state as we go through all the
@@ -2046,7 +2067,7 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 start,
if (btrfs_is_parity_mirror(map_tree, logical, length, mirror_num))
return 0;
- bio = bio_alloc(GFP_NOFS, 1);
+ bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
if (!bio)
return -EIO;
bio->bi_private = &compl;
@@ -2336,7 +2357,7 @@ static int bio_readpage_error(struct bio *failed_bio, struct page *page,
return -EIO;
}
- bio = bio_alloc(GFP_NOFS, 1);
+ bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
if (!bio) {
free_io_failure(inode, failrec, 0);
return -EIO;
@@ -2398,19 +2419,24 @@ static void end_bio_extent_writepage(struct bio *bio, int err)
struct extent_io_tree *tree;
u64 start;
u64 end;
- int whole_page;
do {
struct page *page = bvec->bv_page;
tree = &BTRFS_I(page->mapping->host)->io_tree;
- start = page_offset(page) + bvec->bv_offset;
- end = start + bvec->bv_len - 1;
+ /* We always issue full-page reads, but if some block
+ * in a page fails to read, blk_update_request() will
+ * advance bv_offset and adjust bv_len to compensate.
+ * Print a warning for nonzero offsets, and an error
+ * if they don't add up to a full page. */
+ if (bvec->bv_offset || bvec->bv_len != PAGE_CACHE_SIZE)
+ printk("%s page write in btrfs with offset %u and length %u\n",
+ bvec->bv_offset + bvec->bv_len != PAGE_CACHE_SIZE
+ ? KERN_ERR "partial" : KERN_INFO "incomplete",
+ bvec->bv_offset, bvec->bv_len);
- if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE)
- whole_page = 1;
- else
- whole_page = 0;
+ start = page_offset(page);
+ end = start + bvec->bv_offset + bvec->bv_len - 1;
if (--bvec >= bio->bi_io_vec)
prefetchw(&bvec->bv_page->flags);
@@ -2418,10 +2444,7 @@ static void end_bio_extent_writepage(struct bio *bio, int err)
if (end_extent_writepage(page, err, start, end))
continue;
- if (whole_page)
- end_page_writeback(page);
- else
- check_page_writeback(tree, page);
+ end_page_writeback(page);
} while (bvec >= bio->bi_io_vec);
bio_put(bio);
@@ -2446,7 +2469,6 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
struct extent_io_tree *tree;
u64 start;
u64 end;
- int whole_page;
int mirror;
int ret;
@@ -2457,19 +2479,27 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
struct page *page = bvec->bv_page;
struct extent_state *cached = NULL;
struct extent_state *state;
+ struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
+ struct inode *inode = page->mapping->host;
pr_debug("end_bio_extent_readpage: bi_sector=%llu, err=%d, "
- "mirror=%ld\n", (u64)bio->bi_sector, err,
- (long int)bio->bi_bdev);
- tree = &BTRFS_I(page->mapping->host)->io_tree;
-
- start = page_offset(page) + bvec->bv_offset;
- end = start + bvec->bv_len - 1;
-
- if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE)
- whole_page = 1;
- else
- whole_page = 0;
+ "mirror=%lu\n", (u64)bio->bi_sector, err,
+ io_bio->mirror_num);
+ tree = &BTRFS_I(inode)->io_tree;
+
+ /* We always issue full-page reads, but if some block
+ * in a page fails to read, blk_update_request() will
+ * advance bv_offset and adjust bv_len to compensate.
+ * Print a warning for nonzero offsets, and an error
+ * if they don't add up to a full page. */
+ if (bvec->bv_offset || bvec->bv_len != PAGE_CACHE_SIZE)
+ printk("%s page read in btrfs with offset %u and length %u\n",
+ bvec->bv_offset + bvec->bv_len != PAGE_CACHE_SIZE
+ ? KERN_ERR "partial" : KERN_INFO "incomplete",
+ bvec->bv_offset, bvec->bv_len);
+
+ start = page_offset(page);
+ end = start + bvec->bv_offset + bvec->bv_len - 1;
if (++bvec <= bvec_end)
prefetchw(&bvec->bv_page->flags);
@@ -2485,7 +2515,7 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
}
spin_unlock(&tree->lock);
- mirror = (int)(unsigned long)bio->bi_bdev;
+ mirror = io_bio->mirror_num;
if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) {
ret = tree->ops->readpage_end_io_hook(page, start, end,
state, mirror);
@@ -2528,39 +2558,43 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
}
unlock_extent_cached(tree, start, end, &cached, GFP_ATOMIC);
- if (whole_page) {
- if (uptodate) {
- SetPageUptodate(page);
- } else {
- ClearPageUptodate(page);
- SetPageError(page);
- }
- unlock_page(page);
+ if (uptodate) {
+ loff_t i_size = i_size_read(inode);
+ pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
+ unsigned offset;
+
+ /* Zero out the end if this page straddles i_size */
+ offset = i_size & (PAGE_CACHE_SIZE-1);
+ if (page->index == end_index && offset)
+ zero_user_segment(page, offset, PAGE_CACHE_SIZE);
+ SetPageUptodate(page);
} else {
- if (uptodate) {
- check_page_uptodate(tree, page);
- } else {
- ClearPageUptodate(page);
- SetPageError(page);
- }
- check_page_locked(tree, page);
+ ClearPageUptodate(page);
+ SetPageError(page);
}
+ unlock_page(page);
} while (bvec <= bvec_end);
bio_put(bio);
}
+/*
+ * this allocates from the btrfs_bioset. We're returning a bio right now
+ * but you can call btrfs_io_bio for the appropriate container_of magic
+ */
struct bio *
btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
gfp_t gfp_flags)
{
struct bio *bio;
- bio = bio_alloc(gfp_flags, nr_vecs);
+ bio = bio_alloc_bioset(gfp_flags, nr_vecs, btrfs_bioset);
if (bio == NULL && (current->flags & PF_MEMALLOC)) {
- while (!bio && (nr_vecs /= 2))
- bio = bio_alloc(gfp_flags, nr_vecs);
+ while (!bio && (nr_vecs /= 2)) {
+ bio = bio_alloc_bioset(gfp_flags,
+ nr_vecs, btrfs_bioset);
+ }
}
if (bio) {
@@ -2571,6 +2605,19 @@ btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
return bio;
}
+struct bio *btrfs_bio_clone(struct bio *bio, gfp_t gfp_mask)
+{
+ return bio_clone_bioset(bio, gfp_mask, btrfs_bioset);
+}
+
+
+/* this also allocates from the btrfs_bioset */
+struct bio *btrfs_io_bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs)
+{
+ return bio_alloc_bioset(gfp_mask, nr_iovecs, btrfs_bioset);
+}
+
+
static int __must_check submit_one_bio(int rw, struct bio *bio,
int mirror_num, unsigned long bio_flags)
{
@@ -2949,7 +2996,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
pg_offset = i_size & (PAGE_CACHE_SIZE - 1);
if (page->index > end_index ||
(page->index == end_index && !pg_offset)) {
- page->mapping->a_ops->invalidatepage(page, 0);
+ page->mapping->a_ops->invalidatepage(page, 0, PAGE_CACHE_SIZE);
unlock_page(page);
return 0;
}
@@ -3988,7 +4035,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
last_for_get_extent = isize;
}
- lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len, 0,
+ lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len - 1, 0,
&cached_state);
em = get_extent_skip_holes(inode, start, last_for_get_extent,
@@ -4075,7 +4122,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
out_free:
free_extent_map(em);
out:
- unlock_extent_cached(&BTRFS_I(inode)->io_tree, start, start + len,
+ unlock_extent_cached(&BTRFS_I(inode)->io_tree, start, start + len - 1,
&cached_state, GFP_NOFS);
return ret;
}