diff options
Diffstat (limited to 'fs/ocfs2/refcounttree.c')
-rw-r--r-- | fs/ocfs2/refcounttree.c | 184 |
1 files changed, 120 insertions, 64 deletions
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index 74db2be75dd..b5f9160e93e 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c @@ -37,7 +37,6 @@ #include <linux/bio.h> #include <linux/blkdev.h> -#include <linux/gfp.h> #include <linux/slab.h> #include <linux/writeback.h> #include <linux/pagevec.h> @@ -50,6 +49,7 @@ struct ocfs2_cow_context { struct inode *inode; + struct file *file; u32 cow_start; u32 cow_len; struct ocfs2_extent_tree data_et; @@ -571,7 +571,7 @@ static int ocfs2_create_refcount_tree(struct inode *inode, struct ocfs2_refcount_tree *new_tree = NULL, *tree = NULL; u16 suballoc_bit_start; u32 num_got; - u64 first_blkno; + u64 suballoc_loc, first_blkno; BUG_ON(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL); @@ -597,7 +597,7 @@ static int ocfs2_create_refcount_tree(struct inode *inode, goto out_commit; } - ret = ocfs2_claim_metadata(osb, handle, meta_ac, 1, + ret = ocfs2_claim_metadata(handle, meta_ac, 1, &suballoc_loc, &suballoc_bit_start, &num_got, &first_blkno); if (ret) { @@ -626,7 +626,8 @@ static int ocfs2_create_refcount_tree(struct inode *inode, rb = (struct ocfs2_refcount_block *)new_bh->b_data; memset(rb, 0, inode->i_sb->s_blocksize); strcpy((void *)rb, OCFS2_REFCOUNT_BLOCK_SIGNATURE); - rb->rf_suballoc_slot = cpu_to_le16(osb->slot_num); + rb->rf_suballoc_slot = cpu_to_le16(meta_ac->ac_alloc_slot); + rb->rf_suballoc_loc = cpu_to_le64(suballoc_loc); rb->rf_suballoc_bit = cpu_to_le16(suballoc_bit_start); rb->rf_fs_generation = cpu_to_le32(osb->fs_generation); rb->rf_blkno = cpu_to_le64(first_blkno); @@ -791,7 +792,10 @@ int ocfs2_remove_refcount_tree(struct inode *inode, struct buffer_head *di_bh) if (le32_to_cpu(rb->rf_count) == 1) { blk = le64_to_cpu(rb->rf_blkno); bit = le16_to_cpu(rb->rf_suballoc_bit); - bg_blkno = ocfs2_which_suballoc_group(blk, bit); + if (rb->rf_suballoc_loc) + bg_blkno = le64_to_cpu(rb->rf_suballoc_loc); + else + bg_blkno = ocfs2_which_suballoc_group(blk, bit); alloc_inode = ocfs2_get_system_file_inode(osb, EXTENT_ALLOC_SYSTEM_INODE, @@ -1269,9 +1273,7 @@ static int ocfs2_change_refcount_rec(handle_t *handle, } else if (merge) ocfs2_refcount_rec_merge(rb, index); - ret = ocfs2_journal_dirty(handle, ref_leaf_bh); - if (ret) - mlog_errno(ret); + ocfs2_journal_dirty(handle, ref_leaf_bh); out: return ret; } @@ -1285,7 +1287,7 @@ static int ocfs2_expand_inline_ref_root(handle_t *handle, int ret; u16 suballoc_bit_start; u32 num_got; - u64 blkno; + u64 suballoc_loc, blkno; struct super_block *sb = ocfs2_metadata_cache_get_super(ci); struct buffer_head *new_bh = NULL; struct ocfs2_refcount_block *new_rb; @@ -1299,7 +1301,7 @@ static int ocfs2_expand_inline_ref_root(handle_t *handle, goto out; } - ret = ocfs2_claim_metadata(OCFS2_SB(sb), handle, meta_ac, 1, + ret = ocfs2_claim_metadata(handle, meta_ac, 1, &suballoc_loc, &suballoc_bit_start, &num_got, &blkno); if (ret) { @@ -1330,7 +1332,8 @@ static int ocfs2_expand_inline_ref_root(handle_t *handle, memcpy(new_bh->b_data, ref_root_bh->b_data, sb->s_blocksize); new_rb = (struct ocfs2_refcount_block *)new_bh->b_data; - new_rb->rf_suballoc_slot = cpu_to_le16(OCFS2_SB(sb)->slot_num); + new_rb->rf_suballoc_slot = cpu_to_le16(meta_ac->ac_alloc_slot); + new_rb->rf_suballoc_loc = cpu_to_le64(suballoc_loc); new_rb->rf_suballoc_bit = cpu_to_le16(suballoc_bit_start); new_rb->rf_blkno = cpu_to_le64(blkno); new_rb->rf_cpos = cpu_to_le32(0); @@ -1525,7 +1528,7 @@ static int ocfs2_new_leaf_refcount_block(handle_t *handle, int ret; u16 suballoc_bit_start; u32 num_got, new_cpos; - u64 blkno; + u64 suballoc_loc, blkno; struct super_block *sb = ocfs2_metadata_cache_get_super(ci); struct ocfs2_refcount_block *root_rb = (struct ocfs2_refcount_block *)ref_root_bh->b_data; @@ -1549,7 +1552,7 @@ static int ocfs2_new_leaf_refcount_block(handle_t *handle, goto out; } - ret = ocfs2_claim_metadata(OCFS2_SB(sb), handle, meta_ac, 1, + ret = ocfs2_claim_metadata(handle, meta_ac, 1, &suballoc_loc, &suballoc_bit_start, &num_got, &blkno); if (ret) { @@ -1576,7 +1579,8 @@ static int ocfs2_new_leaf_refcount_block(handle_t *handle, new_rb = (struct ocfs2_refcount_block *)new_bh->b_data; memset(new_rb, 0, sb->s_blocksize); strcpy((void *)new_rb, OCFS2_REFCOUNT_BLOCK_SIGNATURE); - new_rb->rf_suballoc_slot = cpu_to_le16(OCFS2_SB(sb)->slot_num); + new_rb->rf_suballoc_slot = cpu_to_le16(meta_ac->ac_alloc_slot); + new_rb->rf_suballoc_loc = cpu_to_le64(suballoc_loc); new_rb->rf_suballoc_bit = cpu_to_le16(suballoc_bit_start); new_rb->rf_fs_generation = cpu_to_le32(OCFS2_SB(sb)->fs_generation); new_rb->rf_blkno = cpu_to_le64(blkno); @@ -1695,7 +1699,7 @@ static int ocfs2_adjust_refcount_rec(handle_t *handle, * 2 more credits, one for the leaf refcount block, one for * the extent block contains the extent rec. */ - ret = ocfs2_extend_trans(handle, handle->h_buffer_credits + 2); + ret = ocfs2_extend_trans(handle, 2); if (ret < 0) { mlog_errno(ret); goto out; @@ -1803,11 +1807,7 @@ static int ocfs2_insert_refcount_rec(handle_t *handle, if (merge) ocfs2_refcount_rec_merge(rb, index); - ret = ocfs2_journal_dirty(handle, ref_leaf_bh); - if (ret) { - mlog_errno(ret); - goto out; - } + ocfs2_journal_dirty(handle, ref_leaf_bh); if (index == 0) { ret = ocfs2_adjust_refcount_rec(handle, ci, @@ -1978,9 +1978,7 @@ static int ocfs2_split_refcount_rec(handle_t *handle, ocfs2_refcount_rec_merge(rb, index); } - ret = ocfs2_journal_dirty(handle, ref_leaf_bh); - if (ret) - mlog_errno(ret); + ocfs2_journal_dirty(handle, ref_leaf_bh); out: brelse(new_bh); @@ -2113,6 +2111,7 @@ static int ocfs2_remove_refcount_extent(handle_t *handle, */ ret = ocfs2_cache_block_dealloc(dealloc, EXTENT_ALLOC_SYSTEM_INODE, le16_to_cpu(rb->rf_suballoc_slot), + le64_to_cpu(rb->rf_suballoc_loc), le64_to_cpu(rb->rf_blkno), le16_to_cpu(rb->rf_suballoc_bit)); if (ret) { @@ -2438,16 +2437,26 @@ static int ocfs2_calc_refcount_meta_credits(struct super_block *sb, len = min((u64)cpos + clusters, le64_to_cpu(rec.r_cpos) + le32_to_cpu(rec.r_clusters)) - cpos; /* - * If the refcount rec already exist, cool. We just need - * to check whether there is a split. Otherwise we just need - * to increase the refcount. - * If we will insert one, increases recs_add. - * * We record all the records which will be inserted to the * same refcount block, so that we can tell exactly whether * we need a new refcount block or not. + * + * If we will insert a new one, this is easy and only happens + * during adding refcounted flag to the extent, so we don't + * have a chance of spliting. We just need one record. + * + * If the refcount rec already exists, that would be a little + * complicated. we may have to: + * 1) split at the beginning if the start pos isn't aligned. + * we need 1 more record in this case. + * 2) split int the end if the end pos isn't aligned. + * we need 1 more record in this case. + * 3) split in the middle because of file system fragmentation. + * we need 2 more records in this case(we can't detect this + * beforehand, so always think of the worst case). */ if (rec.r_refcount) { + recs_add += 2; /* Check whether we need a split at the beginning. */ if (cpos == start_cpos && cpos != le64_to_cpu(rec.r_cpos)) @@ -2517,20 +2526,19 @@ out: * * Normally the refcount blocks store these refcount should be * contiguous also, so that we can get the number easily. - * As for meta_ac, we will at most add split 2 refcount record and - * 2 more refcount block, so just check it in a rough way. + * We will at most add split 2 refcount records and 2 more + * refcount blocks, so just check it in a rough way. * * Caller must hold refcount tree lock. */ int ocfs2_prepare_refcount_change_for_del(struct inode *inode, - struct buffer_head *di_bh, + u64 refcount_loc, u64 phys_blkno, u32 clusters, int *credits, - struct ocfs2_alloc_context **meta_ac) + int *ref_blocks) { - int ret, ref_blocks = 0; - struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; + int ret; struct ocfs2_inode_info *oi = OCFS2_I(inode); struct buffer_head *ref_root_bh = NULL; struct ocfs2_refcount_tree *tree; @@ -2547,14 +2555,13 @@ int ocfs2_prepare_refcount_change_for_del(struct inode *inode, BUG_ON(!(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL)); ret = ocfs2_get_refcount_tree(OCFS2_SB(inode->i_sb), - le64_to_cpu(di->i_refcount_loc), &tree); + refcount_loc, &tree); if (ret) { mlog_errno(ret); goto out; } - ret = ocfs2_read_refcount_block(&tree->rf_ci, - le64_to_cpu(di->i_refcount_loc), + ret = ocfs2_read_refcount_block(&tree->rf_ci, refcount_loc, &ref_root_bh); if (ret) { mlog_errno(ret); @@ -2565,21 +2572,14 @@ int ocfs2_prepare_refcount_change_for_del(struct inode *inode, &tree->rf_ci, ref_root_bh, start_cpos, clusters, - &ref_blocks, credits); + ref_blocks, credits); if (ret) { mlog_errno(ret); goto out; } - mlog(0, "reserve new metadata %d, credits = %d\n", - ref_blocks, *credits); - - if (ref_blocks) { - ret = ocfs2_reserve_new_metadata_blocks(OCFS2_SB(inode->i_sb), - ref_blocks, meta_ac); - if (ret) - mlog_errno(ret); - } + mlog(0, "reserve new metadata %d blocks, credits = %d\n", + *ref_blocks, *credits); out: brelse(ref_root_bh); @@ -2933,19 +2933,28 @@ static int ocfs2_duplicate_clusters_by_page(handle_t *handle, u64 new_block = ocfs2_clusters_to_blocks(sb, new_cluster); struct page *page; pgoff_t page_index; - unsigned int from, to; + unsigned int from, to, readahead_pages; loff_t offset, end, map_end; struct address_space *mapping = context->inode->i_mapping; mlog(0, "old_cluster %u, new %u, len %u at offset %u\n", old_cluster, new_cluster, new_len, cpos); + readahead_pages = + (ocfs2_cow_contig_clusters(sb) << + OCFS2_SB(sb)->s_clustersize_bits) >> PAGE_CACHE_SHIFT; offset = ((loff_t)cpos) << OCFS2_SB(sb)->s_clustersize_bits; end = offset + (new_len << OCFS2_SB(sb)->s_clustersize_bits); + /* + * We only duplicate pages until we reach the page contains i_size - 1. + * So trim 'end' to i_size. + */ + if (end > i_size_read(context->inode)) + end = i_size_read(context->inode); while (offset < end) { page_index = offset >> PAGE_CACHE_SHIFT; - map_end = (page_index + 1) << PAGE_CACHE_SHIFT; + map_end = ((loff_t)page_index + 1) << PAGE_CACHE_SHIFT; if (map_end > end) map_end = end; @@ -2955,10 +2964,22 @@ static int ocfs2_duplicate_clusters_by_page(handle_t *handle, if (map_end & (PAGE_CACHE_SIZE - 1)) to = map_end & (PAGE_CACHE_SIZE - 1); - page = grab_cache_page(mapping, page_index); + page = find_or_create_page(mapping, page_index, GFP_NOFS); - /* This page can't be dirtied before we CoW it out. */ - BUG_ON(PageDirty(page)); + /* + * In case PAGE_CACHE_SIZE <= CLUSTER_SIZE, This page + * can't be dirtied before we CoW it out. + */ + if (PAGE_CACHE_SIZE <= OCFS2_SB(sb)->s_clustersize) + BUG_ON(PageDirty(page)); + + if (PageReadahead(page) && context->file) { + page_cache_async_readahead(mapping, + &context->file->f_ra, + context->file, + page, page_index, + readahead_pages); + } if (!PageUptodate(page)) { ret = block_read_full_page(page, ocfs2_get_block); @@ -3037,11 +3058,7 @@ static int ocfs2_duplicate_clusters_by_jbd(handle_t *handle, } memcpy(new_bh->b_data, old_bh->b_data, sb->s_blocksize); - ret = ocfs2_journal_dirty(handle, new_bh); - if (ret) { - mlog_errno(ret); - break; - } + ocfs2_journal_dirty(handle, new_bh); brelse(new_bh); brelse(old_bh); @@ -3170,11 +3187,12 @@ static int ocfs2_cow_sync_writeback(struct super_block *sb, while (offset < end) { page_index = offset >> PAGE_CACHE_SHIFT; - map_end = (page_index + 1) << PAGE_CACHE_SHIFT; + map_end = ((loff_t)page_index + 1) << PAGE_CACHE_SHIFT; if (map_end > end) map_end = end; - page = grab_cache_page(context->inode->i_mapping, page_index); + page = find_or_create_page(context->inode->i_mapping, + page_index, GFP_NOFS); BUG_ON(!page); wait_on_page_writeback(page); @@ -3279,7 +3297,7 @@ static int ocfs2_make_clusters_writable(struct super_block *sb, } else { delete = 1; - ret = __ocfs2_claim_clusters(osb, handle, + ret = __ocfs2_claim_clusters(handle, context->data_ac, 1, set_len, &new_bit, &new_len); @@ -3403,12 +3421,35 @@ static int ocfs2_replace_cow(struct ocfs2_cow_context *context) return ret; } +static void ocfs2_readahead_for_cow(struct inode *inode, + struct file *file, + u32 start, u32 len) +{ + struct address_space *mapping; + pgoff_t index; + unsigned long num_pages; + int cs_bits = OCFS2_SB(inode->i_sb)->s_clustersize_bits; + + if (!file) + return; + + mapping = file->f_mapping; + num_pages = (len << cs_bits) >> PAGE_CACHE_SHIFT; + if (!num_pages) + num_pages = 1; + + index = ((loff_t)start << cs_bits) >> PAGE_CACHE_SHIFT; + page_cache_sync_readahead(mapping, &file->f_ra, file, + index, num_pages); +} + /* * Starting at cpos, try to CoW write_len clusters. Don't CoW * past max_cpos. This will stop when it runs into a hole or an * unrefcounted extent. */ static int ocfs2_refcount_cow_hunk(struct inode *inode, + struct file *file, struct buffer_head *di_bh, u32 cpos, u32 write_len, u32 max_cpos) { @@ -3437,6 +3478,8 @@ static int ocfs2_refcount_cow_hunk(struct inode *inode, BUG_ON(cow_len == 0); + ocfs2_readahead_for_cow(inode, file, cow_start, cow_len); + context = kzalloc(sizeof(struct ocfs2_cow_context), GFP_NOFS); if (!context) { ret = -ENOMEM; @@ -3458,6 +3501,7 @@ static int ocfs2_refcount_cow_hunk(struct inode *inode, context->ref_root_bh = ref_root_bh; context->cow_duplicate_clusters = ocfs2_duplicate_clusters_by_page; context->get_clusters = ocfs2_di_get_clusters; + context->file = file; ocfs2_init_dinode_extent_tree(&context->data_et, INODE_CACHE(inode), di_bh); @@ -3486,6 +3530,7 @@ out: * clusters between cpos and cpos+write_len are safe to modify. */ int ocfs2_refcount_cow(struct inode *inode, + struct file *file, struct buffer_head *di_bh, u32 cpos, u32 write_len, u32 max_cpos) { @@ -3505,7 +3550,7 @@ int ocfs2_refcount_cow(struct inode *inode, num_clusters = write_len; if (ext_flags & OCFS2_EXT_REFCOUNTED) { - ret = ocfs2_refcount_cow_hunk(inode, di_bh, cpos, + ret = ocfs2_refcount_cow_hunk(inode, file, di_bh, cpos, num_clusters, max_cpos); if (ret) { mlog_errno(ret); @@ -4071,6 +4116,7 @@ static int ocfs2_complete_reflink(struct inode *s_inode, OCFS2_I(t_inode)->ip_dyn_features = OCFS2_I(s_inode)->ip_dyn_features; spin_unlock(&OCFS2_I(t_inode)->ip_lock); i_size_write(t_inode, size); + t_inode->i_blocks = s_inode->i_blocks; di->i_xattr_inline_size = s_di->i_xattr_inline_size; di->i_clusters = s_di->i_clusters; @@ -4079,6 +4125,9 @@ static int ocfs2_complete_reflink(struct inode *s_inode, di->i_attr = s_di->i_attr; if (preserve) { + t_inode->i_uid = s_inode->i_uid; + t_inode->i_gid = s_inode->i_gid; + t_inode->i_mode = s_inode->i_mode; di->i_uid = s_di->i_uid; di->i_gid = s_di->i_gid; di->i_mode = s_di->i_mode; @@ -4173,6 +4222,12 @@ static int __ocfs2_reflink(struct dentry *old_dentry, struct inode *inode = old_dentry->d_inode; struct buffer_head *new_bh = NULL; + if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_SYSTEM_FILE) { + ret = -EINVAL; + mlog_errno(ret); + goto out; + } + ret = filemap_fdatawrite(inode->i_mapping); if (ret) { mlog_errno(ret); @@ -4185,8 +4240,9 @@ static int __ocfs2_reflink(struct dentry *old_dentry, goto out; } - mutex_lock(&new_inode->i_mutex); - ret = ocfs2_inode_lock(new_inode, &new_bh, 1); + mutex_lock_nested(&new_inode->i_mutex, I_MUTEX_CHILD); + ret = ocfs2_inode_lock_nested(new_inode, &new_bh, 1, + OI_LS_REFLINK_TARGET); if (ret) { mlog_errno(ret); goto out_unlock; @@ -4386,7 +4442,7 @@ static int ocfs2_vfs_reflink(struct dentry *old_dentry, struct inode *dir, } mutex_lock(&inode->i_mutex); - vfs_dq_init(dir); + dquot_initialize(dir); error = ocfs2_reflink(old_dentry, dir, new_dentry, preserve); mutex_unlock(&inode->i_mutex); if (!error) |