diff options
Diffstat (limited to 'fs/ext4/extents.c')
-rw-r--r-- | fs/ext4/extents.c | 178 |
1 files changed, 129 insertions, 49 deletions
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 10539e36428..765a4826b11 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -296,29 +296,44 @@ static inline int ext4_ext_space_root_idx(struct inode *inode, int check) * to allocate @blocks * Worse case is one block per extent */ -int ext4_ext_calc_metadata_amount(struct inode *inode, int blocks) +int ext4_ext_calc_metadata_amount(struct inode *inode, sector_t lblock) { - int lcap, icap, rcap, leafs, idxs, num; - int newextents = blocks; - - rcap = ext4_ext_space_root_idx(inode, 0); - lcap = ext4_ext_space_block(inode, 0); - icap = ext4_ext_space_block_idx(inode, 0); + struct ext4_inode_info *ei = EXT4_I(inode); + int idxs, num = 0; - /* number of new leaf blocks needed */ - num = leafs = (newextents + lcap - 1) / lcap; + idxs = ((inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header)) + / sizeof(struct ext4_extent_idx)); /* - * Worse case, we need separate index block(s) - * to link all new leaf blocks + * If the new delayed allocation block is contiguous with the + * previous da block, it can share index blocks with the + * previous block, so we only need to allocate a new index + * block every idxs leaf blocks. At ldxs**2 blocks, we need + * an additional index block, and at ldxs**3 blocks, yet + * another index blocks. */ - idxs = (leafs + icap - 1) / icap; - do { - num += idxs; - idxs = (idxs + icap - 1) / icap; - } while (idxs > rcap); + if (ei->i_da_metadata_calc_len && + ei->i_da_metadata_calc_last_lblock+1 == lblock) { + if ((ei->i_da_metadata_calc_len % idxs) == 0) + num++; + if ((ei->i_da_metadata_calc_len % (idxs*idxs)) == 0) + num++; + if ((ei->i_da_metadata_calc_len % (idxs*idxs*idxs)) == 0) { + num++; + ei->i_da_metadata_calc_len = 0; + } else + ei->i_da_metadata_calc_len++; + ei->i_da_metadata_calc_last_lblock++; + return num; + } - return num; + /* + * In the worst case we need a new set of index blocks at + * every level of the inode's extent tree. + */ + ei->i_da_metadata_calc_len = 1; + ei->i_da_metadata_calc_last_lblock = lblock; + return ext_depth(inode) + 1; } static int @@ -1007,7 +1022,8 @@ cleanup: for (i = 0; i < depth; i++) { if (!ablocks[i]) continue; - ext4_free_blocks(handle, inode, ablocks[i], 1, 1); + ext4_free_blocks(handle, inode, 0, ablocks[i], 1, + EXT4_FREE_BLOCKS_METADATA); } } kfree(ablocks); @@ -1761,7 +1777,9 @@ int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block, while (block < last && block != EXT_MAX_BLOCK) { num = last - block; /* find extent for this block */ + down_read(&EXT4_I(inode)->i_data_sem); path = ext4_ext_find_extent(inode, block, path); + up_read(&EXT4_I(inode)->i_data_sem); if (IS_ERR(path)) { err = PTR_ERR(path); path = NULL; @@ -1957,7 +1975,6 @@ errout: static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, struct ext4_ext_path *path) { - struct buffer_head *bh; int err; ext4_fsblk_t leaf; @@ -1973,9 +1990,8 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, if (err) return err; ext_debug("index is empty, remove it, free block %llu\n", leaf); - bh = sb_find_get_block(inode->i_sb, leaf); - ext4_forget(handle, 1, inode, bh, leaf); - ext4_free_blocks(handle, inode, leaf, 1, 1); + ext4_free_blocks(handle, inode, 0, leaf, 1, + EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET); return err; } @@ -2042,12 +2058,11 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode, struct ext4_extent *ex, ext4_lblk_t from, ext4_lblk_t to) { - struct buffer_head *bh; unsigned short ee_len = ext4_ext_get_actual_len(ex); - int i, metadata = 0; + int flags = EXT4_FREE_BLOCKS_FORGET; if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) - metadata = 1; + flags |= EXT4_FREE_BLOCKS_METADATA; #ifdef EXTENTS_STATS { struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); @@ -2072,11 +2087,7 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode, num = le32_to_cpu(ex->ee_block) + ee_len - from; start = ext_pblock(ex) + ee_len - num; ext_debug("free last %u blocks starting %llu\n", num, start); - for (i = 0; i < num; i++) { - bh = sb_find_get_block(inode->i_sb, start + i); - ext4_forget(handle, 0, inode, bh, start + i); - } - ext4_free_blocks(handle, inode, start, num, metadata); + ext4_free_blocks(handle, inode, 0, start, num, flags); } else if (from == le32_to_cpu(ex->ee_block) && to <= le32_to_cpu(ex->ee_block) + ee_len - 1) { printk(KERN_INFO "strange request: removal %u-%u from %u:%u\n", @@ -2167,7 +2178,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, correct_index = 1; credits += (ext_depth(inode)) + 1; } - credits += 2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb); + credits += EXT4_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb); err = ext4_ext_truncate_extend_restart(handle, inode, credits); if (err) @@ -2807,6 +2818,8 @@ fix_extent_len: * into three uninitialized extent(at most). After IO complete, the part * being filled will be convert to initialized by the end_io callback function * via ext4_convert_unwritten_extents(). + * + * Returns the size of uninitialized extent to be written on success. */ static int ext4_split_unwritten_extents(handle_t *handle, struct inode *inode, @@ -2824,7 +2837,6 @@ static int ext4_split_unwritten_extents(handle_t *handle, unsigned int allocated, ee_len, depth; ext4_fsblk_t newblock; int err = 0; - int ret = 0; ext_debug("ext4_split_unwritten_extents: inode %lu," "iblock %llu, max_blocks %u\n", inode->i_ino, @@ -2842,12 +2854,12 @@ static int ext4_split_unwritten_extents(handle_t *handle, ext4_ext_store_pblock(&orig_ex, ext_pblock(ex)); /* - * if the entire unintialized extent length less than - * the size of extent to write, there is no need to split - * uninitialized extent + * If the uninitialized extent begins at the same logical + * block where the write begins, and the write completely + * covers the extent, then we don't need to split it. */ - if (allocated <= max_blocks) - return ret; + if ((iblock == ee_block) && (allocated <= max_blocks)) + return allocated; err = ext4_ext_get_access(handle, inode, path + depth); if (err) @@ -3026,6 +3038,14 @@ out: return err; } +static void unmap_underlying_metadata_blocks(struct block_device *bdev, + sector_t block, int count) +{ + int i; + for (i = 0; i < count; i++) + unmap_underlying_metadata(bdev, block + i); +} + static int ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, ext4_lblk_t iblock, unsigned int max_blocks, @@ -3048,15 +3068,23 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, ret = ext4_split_unwritten_extents(handle, inode, path, iblock, max_blocks, flags); - /* flag the io_end struct that we need convert when IO done */ + /* + * Flag the inode(non aio case) or end_io struct (aio case) + * that this IO needs to convertion to written when IO is + * completed + */ if (io) io->flag = DIO_AIO_UNWRITTEN; + else + EXT4_I(inode)->i_state |= EXT4_STATE_DIO_UNWRITTEN; goto out; } - /* DIO end_io complete, convert the filled extent to written */ + /* async DIO end_io complete, convert the filled extent to written */ if (flags == EXT4_GET_BLOCKS_DIO_CONVERT_EXT) { ret = ext4_convert_unwritten_extents_dio(handle, inode, path); + if (ret >= 0) + ext4_update_inode_fsync_trans(handle, inode, 1); goto out2; } /* buffered IO case */ @@ -3084,6 +3112,8 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, ret = ext4_ext_convert_to_initialized(handle, inode, path, iblock, max_blocks); + if (ret >= 0) + ext4_update_inode_fsync_trans(handle, inode, 1); out: if (ret <= 0) { err = ret; @@ -3091,6 +3121,30 @@ out: } else allocated = ret; set_buffer_new(bh_result); + /* + * if we allocated more blocks than requested + * we need to make sure we unmap the extra block + * allocated. The actual needed block will get + * unmapped later when we find the buffer_head marked + * new. + */ + if (allocated > max_blocks) { + unmap_underlying_metadata_blocks(inode->i_sb->s_bdev, + newblock + max_blocks, + allocated - max_blocks); + allocated = max_blocks; + } + + /* + * If we have done fallocate with the offset that is already + * delayed allocated, we would have block reservation + * and quota reservation done in the delayed write path. + * But fallocate would have already updated quota and block + * count for this offset. So cancel these reservation + */ + if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) + ext4_da_update_reserve_space(inode, allocated, 0); + map_out: set_buffer_mapped(bh_result); out1: @@ -3183,7 +3237,13 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, * this situation is possible, though, _during_ tree modification; * this is why assert can't be put in ext4_ext_find_extent() */ - BUG_ON(path[depth].p_ext == NULL && depth != 0); + if (path[depth].p_ext == NULL && depth != 0) { + ext4_error(inode->i_sb, __func__, "bad extent address " + "inode: %lu, iblock: %d, depth: %d", + inode->i_ino, iblock, depth); + err = -EIO; + goto out2; + } eh = path[depth].p_hdr; ex = path[depth].p_ext; @@ -3295,10 +3355,16 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, * To avoid unecessary convertion for every aio dio rewrite * to the mid of file, here we flag the IO that is really * need the convertion. - * + * For non asycn direct IO case, flag the inode state + * that we need to perform convertion when IO is done. */ - if (io && flags == EXT4_GET_BLOCKS_DIO_CREATE_EXT) - io->flag = DIO_AIO_UNWRITTEN; + if (flags == EXT4_GET_BLOCKS_DIO_CREATE_EXT) { + if (io) + io->flag = DIO_AIO_UNWRITTEN; + else + EXT4_I(inode)->i_state |= + EXT4_STATE_DIO_UNWRITTEN;; + } } err = ext4_ext_insert_extent(handle, inode, path, &newex, flags); if (err) { @@ -3306,20 +3372,35 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, /* not a good idea to call discard here directly, * but otherwise we'd need to call it every free() */ ext4_discard_preallocations(inode); - ext4_free_blocks(handle, inode, ext_pblock(&newex), - ext4_ext_get_actual_len(&newex), 0); + ext4_free_blocks(handle, inode, 0, ext_pblock(&newex), + ext4_ext_get_actual_len(&newex), 0); goto out2; } /* previous routine could use block we allocated */ newblock = ext_pblock(&newex); allocated = ext4_ext_get_actual_len(&newex); + if (allocated > max_blocks) + allocated = max_blocks; set_buffer_new(bh_result); - /* Cache only when it is _not_ an uninitialized extent */ - if ((flags & EXT4_GET_BLOCKS_UNINIT_EXT) == 0) + /* + * Update reserved blocks/metadata blocks after successful + * block allocation which had been deferred till now. + */ + if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) + ext4_da_update_reserve_space(inode, allocated, 1); + + /* + * Cache the extent and update transaction to commit on fdatasync only + * when it is _not_ an uninitialized extent. + */ + if ((flags & EXT4_GET_BLOCKS_UNINIT_EXT) == 0) { ext4_ext_put_in_cache(inode, iblock, allocated, newblock, EXT4_EXT_CACHE_EXTENT); + ext4_update_inode_fsync_trans(handle, inode, 1); + } else + ext4_update_inode_fsync_trans(handle, inode, 0); out: if (allocated > max_blocks) allocated = max_blocks; @@ -3519,6 +3600,7 @@ retry: * * This function is called from the direct IO end io call back * function, to convert the fallocated extents after IO is completed. + * Returns 0 on success. */ int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset, loff_t len) @@ -3706,10 +3788,8 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, * Walk the extent tree gathering extent information. * ext4_ext_fiemap_cb will push extents back to user. */ - down_read(&EXT4_I(inode)->i_data_sem); error = ext4_ext_walk_space(inode, start_blk, len_blks, ext4_ext_fiemap_cb, fieinfo); - up_read(&EXT4_I(inode)->i_data_sem); } return error; |