diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2011-05-27 13:57:12 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-05-27 13:57:12 -0700 |
commit | a0c3061093c8b49facef95dc09a618c6e0d17cb5 (patch) | |
tree | 1d6ff7c06134b71a8bd0721395386e82e46e60c8 /fs/btrfs/extent_io.c | |
parent | 10799db60cbc4f990dd69eb49883477095c66af7 (diff) | |
parent | 174ba50915b08dcfd07c8b5fb795b46a165fa09a (diff) |
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable: (58 commits)
Btrfs: use the device_list_mutex during write_dev_supers
Btrfs: setup free ino caching in a more asynchronous way
btrfs scrub: don't coalesce pages that are logically discontiguous
Btrfs: return -ENOMEM in clear_extent_bit
Btrfs: add mount -o auto_defrag
Btrfs: using rcu lock in the reader side of devices list
Btrfs: drop unnecessary device lock
Btrfs: fix the race between remove dev and alloc chunk
Btrfs: fix the race between reading and updating devices
Btrfs: fix bh leak on __btrfs_open_devices path
Btrfs: fix unsafe usage of merge_state
Btrfs: allocate extent state and check the result properly
fs/btrfs: Add missing btrfs_free_path
Btrfs: check return value of btrfs_inc_extent_ref()
Btrfs: return error to caller if read_one_inode() fails
Btrfs: BUG_ON is deleted from the caller of btrfs_truncate_item & btrfs_extend_item
Btrfs: return error code to caller when btrfs_del_item fails
Btrfs: return error code to caller when btrfs_previous_item fails
btrfs: fix typo 'testeing' -> 'testing'
btrfs: typo: 'btrfS' -> 'btrfs'
...
Diffstat (limited to 'fs/btrfs/extent_io.c')
-rw-r--r-- | fs/btrfs/extent_io.c | 324 |
1 files changed, 60 insertions, 264 deletions
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 4f9893243da..c5d9fbb92bc 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -103,7 +103,7 @@ void extent_io_exit(void) } void extent_io_tree_init(struct extent_io_tree *tree, - struct address_space *mapping, gfp_t mask) + struct address_space *mapping) { tree->state = RB_ROOT; INIT_RADIX_TREE(&tree->buffer, GFP_ATOMIC); @@ -441,6 +441,15 @@ static int clear_state_bit(struct extent_io_tree *tree, return ret; } +static struct extent_state * +alloc_extent_state_atomic(struct extent_state *prealloc) +{ + if (!prealloc) + prealloc = alloc_extent_state(GFP_ATOMIC); + + return prealloc; +} + /* * clear some bits on a range in the tree. This may require splitting * or inserting elements in the tree, so the gfp mask is used to @@ -531,8 +540,8 @@ hit_next: */ if (state->start < start) { - if (!prealloc) - prealloc = alloc_extent_state(GFP_ATOMIC); + prealloc = alloc_extent_state_atomic(prealloc); + BUG_ON(!prealloc); err = split_state(tree, state, prealloc, start); BUG_ON(err == -EEXIST); prealloc = NULL; @@ -553,8 +562,8 @@ hit_next: * on the first half */ if (state->start <= end && state->end > end) { - if (!prealloc) - prealloc = alloc_extent_state(GFP_ATOMIC); + prealloc = alloc_extent_state_atomic(prealloc); + BUG_ON(!prealloc); err = split_state(tree, state, prealloc, end + 1); BUG_ON(err == -EEXIST); if (wake) @@ -727,8 +736,7 @@ int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, again: if (!prealloc && (mask & __GFP_WAIT)) { prealloc = alloc_extent_state(mask); - if (!prealloc) - return -ENOMEM; + BUG_ON(!prealloc); } spin_lock(&tree->lock); @@ -745,6 +753,8 @@ again: */ node = tree_search(tree, start); if (!node) { + prealloc = alloc_extent_state_atomic(prealloc); + BUG_ON(!prealloc); err = insert_state(tree, prealloc, start, end, &bits); prealloc = NULL; BUG_ON(err == -EEXIST); @@ -773,20 +783,18 @@ hit_next: if (err) goto out; + next_node = rb_next(node); cache_state(state, cached_state); merge_state(tree, state); if (last_end == (u64)-1) goto out; start = last_end + 1; - if (start < end && prealloc && !need_resched()) { - next_node = rb_next(node); - if (next_node) { - state = rb_entry(next_node, struct extent_state, - rb_node); - if (state->start == start) - goto hit_next; - } + if (next_node && start < end && prealloc && !need_resched()) { + state = rb_entry(next_node, struct extent_state, + rb_node); + if (state->start == start) + goto hit_next; } goto search_again; } @@ -813,6 +821,9 @@ hit_next: err = -EEXIST; goto out; } + + prealloc = alloc_extent_state_atomic(prealloc); + BUG_ON(!prealloc); err = split_state(tree, state, prealloc, start); BUG_ON(err == -EEXIST); prealloc = NULL; @@ -843,14 +854,25 @@ hit_next: this_end = end; else this_end = last_start - 1; + + prealloc = alloc_extent_state_atomic(prealloc); + BUG_ON(!prealloc); + + /* + * Avoid to free 'prealloc' if it can be merged with + * the later extent. + */ + atomic_inc(&prealloc->refs); err = insert_state(tree, prealloc, start, this_end, &bits); BUG_ON(err == -EEXIST); if (err) { + free_extent_state(prealloc); prealloc = NULL; goto out; } cache_state(prealloc, cached_state); + free_extent_state(prealloc); prealloc = NULL; start = this_end + 1; goto search_again; @@ -867,6 +889,9 @@ hit_next: err = -EEXIST; goto out; } + + prealloc = alloc_extent_state_atomic(prealloc); + BUG_ON(!prealloc); err = split_state(tree, state, prealloc, end + 1); BUG_ON(err == -EEXIST); @@ -943,13 +968,6 @@ int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end, NULL, mask); } -static int clear_extent_new(struct extent_io_tree *tree, u64 start, u64 end, - gfp_t mask) -{ - return clear_extent_bit(tree, start, end, EXTENT_NEW, 0, 0, - NULL, mask); -} - int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, struct extent_state **cached_state, gfp_t mask) { @@ -965,11 +983,6 @@ static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, cached_state, mask); } -int wait_on_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end) -{ - return wait_extent_bit(tree, start, end, EXTENT_WRITEBACK); -} - /* * either insert or lock state struct between start and end use mask to tell * us if waiting is desired. @@ -1030,25 +1043,6 @@ int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask) } /* - * helper function to set pages and extents in the tree dirty - */ -int set_range_dirty(struct extent_io_tree *tree, u64 start, u64 end) -{ - unsigned long index = start >> PAGE_CACHE_SHIFT; - unsigned long end_index = end >> PAGE_CACHE_SHIFT; - struct page *page; - - while (index <= end_index) { - page = find_get_page(tree->mapping, index); - BUG_ON(!page); - __set_page_dirty_nobuffers(page); - page_cache_release(page); - index++; - } - return 0; -} - -/* * helper function to set both pages and extents in the tree writeback */ static int set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end) @@ -1821,46 +1815,6 @@ static void end_bio_extent_readpage(struct bio *bio, int err) bio_put(bio); } -/* - * IO done from prepare_write is pretty simple, we just unlock - * the structs in the extent tree when done, and set the uptodate bits - * as appropriate. - */ -static void end_bio_extent_preparewrite(struct bio *bio, int err) -{ - const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); - struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; - struct extent_io_tree *tree; - u64 start; - u64 end; - - do { - struct page *page = bvec->bv_page; - struct extent_state *cached = NULL; - tree = &BTRFS_I(page->mapping->host)->io_tree; - - start = ((u64)page->index << PAGE_CACHE_SHIFT) + - bvec->bv_offset; - end = start + bvec->bv_len - 1; - - if (--bvec >= bio->bi_io_vec) - prefetchw(&bvec->bv_page->flags); - - if (uptodate) { - set_extent_uptodate(tree, start, end, &cached, - GFP_ATOMIC); - } else { - ClearPageUptodate(page); - SetPageError(page); - } - - unlock_extent_cached(tree, start, end, &cached, GFP_ATOMIC); - - } while (bvec >= bio->bi_io_vec); - - bio_put(bio); -} - struct bio * btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs, gfp_t gfp_flags) @@ -2009,7 +1963,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree, struct btrfs_ordered_extent *ordered; int ret; int nr = 0; - size_t page_offset = 0; + size_t pg_offset = 0; size_t iosize; size_t disk_io_size; size_t blocksize = inode->i_sb->s_blocksize; @@ -2052,9 +2006,9 @@ static int __extent_read_full_page(struct extent_io_tree *tree, char *userpage; struct extent_state *cached = NULL; - iosize = PAGE_CACHE_SIZE - page_offset; + iosize = PAGE_CACHE_SIZE - pg_offset; userpage = kmap_atomic(page, KM_USER0); - memset(userpage + page_offset, 0, iosize); + memset(userpage + pg_offset, 0, iosize); flush_dcache_page(page); kunmap_atomic(userpage, KM_USER0); set_extent_uptodate(tree, cur, cur + iosize - 1, @@ -2063,9 +2017,9 @@ static int __extent_read_full_page(struct extent_io_tree *tree, &cached, GFP_NOFS); break; } - em = get_extent(inode, page, page_offset, cur, + em = get_extent(inode, page, pg_offset, cur, end - cur + 1, 0); - if (IS_ERR(em) || !em) { + if (IS_ERR_OR_NULL(em)) { SetPageError(page); unlock_extent(tree, cur, end, GFP_NOFS); break; @@ -2103,7 +2057,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree, struct extent_state *cached = NULL; userpage = kmap_atomic(page, KM_USER0); - memset(userpage + page_offset, 0, iosize); + memset(userpage + pg_offset, 0, iosize); flush_dcache_page(page); kunmap_atomic(userpage, KM_USER0); @@ -2112,7 +2066,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree, unlock_extent_cached(tree, cur, cur + iosize - 1, &cached, GFP_NOFS); cur = cur + iosize; - page_offset += iosize; + pg_offset += iosize; continue; } /* the get_extent function already copied into the page */ @@ -2121,7 +2075,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree, check_page_uptodate(tree, page); unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); cur = cur + iosize; - page_offset += iosize; + pg_offset += iosize; continue; } /* we have an inline extent but it didn't get marked up @@ -2131,7 +2085,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree, SetPageError(page); unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); cur = cur + iosize; - page_offset += iosize; + pg_offset += iosize; continue; } @@ -2144,7 +2098,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree, unsigned long pnr = (last_byte >> PAGE_CACHE_SHIFT) + 1; pnr -= page->index; ret = submit_extent_page(READ, tree, page, - sector, disk_io_size, page_offset, + sector, disk_io_size, pg_offset, bdev, bio, pnr, end_bio_extent_readpage, mirror_num, *bio_flags, @@ -2155,7 +2109,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree, if (ret) SetPageError(page); cur = cur + iosize; - page_offset += iosize; + pg_offset += iosize; } out: if (!nr) { @@ -2351,7 +2305,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, } em = epd->get_extent(inode, page, pg_offset, cur, end - cur + 1, 1); - if (IS_ERR(em) || !em) { + if (IS_ERR_OR_NULL(em)) { SetPageError(page); break; } @@ -2730,128 +2684,6 @@ int extent_invalidatepage(struct extent_io_tree *tree, } /* - * simple commit_write call, set_range_dirty is used to mark both - * the pages and the extent records as dirty - */ -int extent_commit_write(struct extent_io_tree *tree, - struct inode *inode, struct page *page, - unsigned from, unsigned to) -{ - loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to; - - set_page_extent_mapped(page); - set_page_dirty(page); - - if (pos > inode->i_size) { - i_size_write(inode, pos); - mark_inode_dirty(inode); - } - return 0; -} - -int extent_prepare_write(struct extent_io_tree *tree, - struct inode *inode, struct page *page, - unsigned from, unsigned to, get_extent_t *get_extent) -{ - u64 page_start = (u64)page->index << PAGE_CACHE_SHIFT; - u64 page_end = page_start + PAGE_CACHE_SIZE - 1; - u64 block_start; - u64 orig_block_start; - u64 block_end; - u64 cur_end; - struct extent_map *em; - unsigned blocksize = 1 << inode->i_blkbits; - size_t page_offset = 0; - size_t block_off_start; - size_t block_off_end; - int err = 0; - int iocount = 0; - int ret = 0; - int isnew; - - set_page_extent_mapped(page); - - block_start = (page_start + from) & ~((u64)blocksize - 1); - block_end = (page_start + to - 1) | (blocksize - 1); - orig_block_start = block_start; - - lock_extent(tree, page_start, page_end, GFP_NOFS); - while (block_start <= block_end) { - em = get_extent(inode, page, page_offset, block_start, - block_end - block_start + 1, 1); - if (IS_ERR(em) || !em) - goto err; - - cur_end = min(block_end, extent_map_end(em) - 1); - block_off_start = block_start & (PAGE_CACHE_SIZE - 1); - block_off_end = block_off_start + blocksize; - isnew = clear_extent_new(tree, block_start, cur_end, GFP_NOFS); - - if (!PageUptodate(page) && isnew && - (block_off_end > to || block_off_start < from)) { - void *kaddr; - - kaddr = kmap_atomic(page, KM_USER0); - if (block_off_end > to) - memset(kaddr + to, 0, block_off_end - to); - if (block_off_start < from) - memset(kaddr + block_off_start, 0, - from - block_off_start); - flush_dcache_page(page); - kunmap_atomic(kaddr, KM_USER0); - } - if ((em->block_start != EXTENT_MAP_HOLE && - em->block_start != EXTENT_MAP_INLINE) && - !isnew && !PageUptodate(page) && - (block_off_end > to || block_off_start < from) && - !test_range_bit(tree, block_start, cur_end, - EXTENT_UPTODATE, 1, NULL)) { - u64 sector; - u64 extent_offset = block_start - em->start; - size_t iosize; - sector = (em->block_start + extent_offset) >> 9; - iosize = (cur_end - block_start + blocksize) & - ~((u64)blocksize - 1); - /* - * we've already got the extent locked, but we - * need to split the state such that our end_bio - * handler can clear the lock. - */ - set_extent_bit(tree, block_start, - block_start + iosize - 1, - EXTENT_LOCKED, 0, NULL, NULL, GFP_NOFS); - ret = submit_extent_page(READ, tree, page, - sector, iosize, page_offset, em->bdev, - NULL, 1, - end_bio_extent_preparewrite, 0, - 0, 0); - if (ret && !err) - err = ret; - iocount++; - block_start = block_start + iosize; - } else { - struct extent_state *cached = NULL; - - set_extent_uptodate(tree, block_start, cur_end, &cached, - GFP_NOFS); - unlock_extent_cached(tree, block_start, cur_end, - &cached, GFP_NOFS); - block_start = cur_end + 1; - } - page_offset = block_start & (PAGE_CACHE_SIZE - 1); - free_extent_map(em); - } - if (iocount) { - wait_extent_bit(tree, orig_block_start, - block_end, EXTENT_LOCKED); - } - check_page_uptodate(tree, page); -err: - /* FIXME, zero out newly allocated blocks on error */ - return err; -} - -/* * a helper for releasepage, this tests for areas of the page that * are locked or under IO and drops the related state bits if it is safe * to drop the page. @@ -2909,7 +2741,7 @@ int try_release_extent_mapping(struct extent_map_tree *map, len = end - start + 1; write_lock(&map->lock); em = lookup_extent_mapping(map, start, len); - if (!em || IS_ERR(em)) { + if (IS_ERR_OR_NULL(em)) { write_unlock(&map->lock); break; } @@ -2937,33 +2769,6 @@ int try_release_extent_mapping(struct extent_map_tree *map, return try_release_extent_state(map, tree, page, mask); } -sector_t extent_bmap(struct address_space *mapping, sector_t iblock, - get_extent_t *get_extent) -{ - struct inode *inode = mapping->host; - struct extent_state *cached_state = NULL; - u64 start = iblock << inode->i_blkbits; - sector_t sector = 0; - size_t blksize = (1 << inode->i_blkbits); - struct extent_map *em; - - lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + blksize - 1, - 0, &cached_state, GFP_NOFS); - em = get_extent(inode, NULL, 0, start, blksize, 0); - unlock_extent_cached(&BTRFS_I(inode)->io_tree, start, - start + blksize - 1, &cached_state, GFP_NOFS); - if (!em || IS_ERR(em)) - return 0; - - if (em->block_start > EXTENT_MAP_LAST_BYTE) - goto out; - - sector = (em->block_start + start - em->start) >> inode->i_blkbits; -out: - free_extent_map(em); - return sector; -} - /* * helper function for fiemap, which doesn't want to see any holes. * This maps until we find something past 'last' @@ -2986,7 +2791,7 @@ static struct extent_map *get_extent_skip_holes(struct inode *inode, break; len = (len + sectorsize - 1) & ~(sectorsize - 1); em = get_extent(inode, NULL, 0, offset, len, 0); - if (!em || IS_ERR(em)) + if (IS_ERR_OR_NULL(em)) return em; /* if this isn't a hole return it */ @@ -3040,7 +2845,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, * because there might be preallocation past i_size */ ret = btrfs_lookup_file_extent(NULL, BTRFS_I(inode)->root, - path, inode->i_ino, -1, 0); + path, btrfs_ino(inode), -1, 0); if (ret < 0) { btrfs_free_path(path); return ret; @@ -3053,7 +2858,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, found_type = btrfs_key_type(&found_key); /* No extents, but there might be delalloc bits */ - if (found_key.objectid != inode->i_ino || + if (found_key.objectid != btrfs_ino(inode) || found_type != BTRFS_EXTENT_DATA_KEY) { /* have to trust i_size as the end */ last = (u64)-1; @@ -3276,8 +3081,7 @@ static inline void btrfs_release_extent_buffer(struct extent_buffer *eb) struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, u64 start, unsigned long len, - struct page *page0, - gfp_t mask) + struct page *page0) { unsigned long num_pages = num_extent_pages(start, len); unsigned long i; @@ -3298,7 +3102,7 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, } rcu_read_unlock(); - eb = __alloc_extent_buffer(tree, start, len, mask); + eb = __alloc_extent_buffer(tree, start, len, GFP_NOFS); if (!eb) return NULL; @@ -3315,7 +3119,7 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, i = 0; } for (; i < num_pages; i++, index++) { - p = find_or_create_page(mapping, index, mask | __GFP_HIGHMEM); + p = find_or_create_page(mapping, index, GFP_NOFS | __GFP_HIGHMEM); if (!p) { WARN_ON(1); goto free_eb; @@ -3387,8 +3191,7 @@ free_eb: } struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree, - u64 start, unsigned long len, - gfp_t mask) + u64 start, unsigned long len) { struct extent_buffer *eb; @@ -3449,13 +3252,6 @@ int clear_extent_buffer_dirty(struct extent_io_tree *tree, return 0; } -int wait_on_extent_buffer_writeback(struct extent_io_tree *tree, - struct extent_buffer *eb) -{ - return wait_on_extent_writeback(tree, eb->start, - eb->start + eb->len - 1); -} - int set_extent_buffer_dirty(struct extent_io_tree *tree, struct extent_buffer *eb) { |