summaryrefslogtreecommitdiffstats
path: root/fs/ocfs2/refcounttree.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ocfs2/refcounttree.c')
-rw-r--r--fs/ocfs2/refcounttree.c184
1 files changed, 120 insertions, 64 deletions
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 74db2be75dd..b5f9160e93e 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -37,7 +37,6 @@
#include <linux/bio.h>
#include <linux/blkdev.h>
-#include <linux/gfp.h>
#include <linux/slab.h>
#include <linux/writeback.h>
#include <linux/pagevec.h>
@@ -50,6 +49,7 @@
struct ocfs2_cow_context {
struct inode *inode;
+ struct file *file;
u32 cow_start;
u32 cow_len;
struct ocfs2_extent_tree data_et;
@@ -571,7 +571,7 @@ static int ocfs2_create_refcount_tree(struct inode *inode,
struct ocfs2_refcount_tree *new_tree = NULL, *tree = NULL;
u16 suballoc_bit_start;
u32 num_got;
- u64 first_blkno;
+ u64 suballoc_loc, first_blkno;
BUG_ON(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL);
@@ -597,7 +597,7 @@ static int ocfs2_create_refcount_tree(struct inode *inode,
goto out_commit;
}
- ret = ocfs2_claim_metadata(osb, handle, meta_ac, 1,
+ ret = ocfs2_claim_metadata(handle, meta_ac, 1, &suballoc_loc,
&suballoc_bit_start, &num_got,
&first_blkno);
if (ret) {
@@ -626,7 +626,8 @@ static int ocfs2_create_refcount_tree(struct inode *inode,
rb = (struct ocfs2_refcount_block *)new_bh->b_data;
memset(rb, 0, inode->i_sb->s_blocksize);
strcpy((void *)rb, OCFS2_REFCOUNT_BLOCK_SIGNATURE);
- rb->rf_suballoc_slot = cpu_to_le16(osb->slot_num);
+ rb->rf_suballoc_slot = cpu_to_le16(meta_ac->ac_alloc_slot);
+ rb->rf_suballoc_loc = cpu_to_le64(suballoc_loc);
rb->rf_suballoc_bit = cpu_to_le16(suballoc_bit_start);
rb->rf_fs_generation = cpu_to_le32(osb->fs_generation);
rb->rf_blkno = cpu_to_le64(first_blkno);
@@ -791,7 +792,10 @@ int ocfs2_remove_refcount_tree(struct inode *inode, struct buffer_head *di_bh)
if (le32_to_cpu(rb->rf_count) == 1) {
blk = le64_to_cpu(rb->rf_blkno);
bit = le16_to_cpu(rb->rf_suballoc_bit);
- bg_blkno = ocfs2_which_suballoc_group(blk, bit);
+ if (rb->rf_suballoc_loc)
+ bg_blkno = le64_to_cpu(rb->rf_suballoc_loc);
+ else
+ bg_blkno = ocfs2_which_suballoc_group(blk, bit);
alloc_inode = ocfs2_get_system_file_inode(osb,
EXTENT_ALLOC_SYSTEM_INODE,
@@ -1269,9 +1273,7 @@ static int ocfs2_change_refcount_rec(handle_t *handle,
} else if (merge)
ocfs2_refcount_rec_merge(rb, index);
- ret = ocfs2_journal_dirty(handle, ref_leaf_bh);
- if (ret)
- mlog_errno(ret);
+ ocfs2_journal_dirty(handle, ref_leaf_bh);
out:
return ret;
}
@@ -1285,7 +1287,7 @@ static int ocfs2_expand_inline_ref_root(handle_t *handle,
int ret;
u16 suballoc_bit_start;
u32 num_got;
- u64 blkno;
+ u64 suballoc_loc, blkno;
struct super_block *sb = ocfs2_metadata_cache_get_super(ci);
struct buffer_head *new_bh = NULL;
struct ocfs2_refcount_block *new_rb;
@@ -1299,7 +1301,7 @@ static int ocfs2_expand_inline_ref_root(handle_t *handle,
goto out;
}
- ret = ocfs2_claim_metadata(OCFS2_SB(sb), handle, meta_ac, 1,
+ ret = ocfs2_claim_metadata(handle, meta_ac, 1, &suballoc_loc,
&suballoc_bit_start, &num_got,
&blkno);
if (ret) {
@@ -1330,7 +1332,8 @@ static int ocfs2_expand_inline_ref_root(handle_t *handle,
memcpy(new_bh->b_data, ref_root_bh->b_data, sb->s_blocksize);
new_rb = (struct ocfs2_refcount_block *)new_bh->b_data;
- new_rb->rf_suballoc_slot = cpu_to_le16(OCFS2_SB(sb)->slot_num);
+ new_rb->rf_suballoc_slot = cpu_to_le16(meta_ac->ac_alloc_slot);
+ new_rb->rf_suballoc_loc = cpu_to_le64(suballoc_loc);
new_rb->rf_suballoc_bit = cpu_to_le16(suballoc_bit_start);
new_rb->rf_blkno = cpu_to_le64(blkno);
new_rb->rf_cpos = cpu_to_le32(0);
@@ -1525,7 +1528,7 @@ static int ocfs2_new_leaf_refcount_block(handle_t *handle,
int ret;
u16 suballoc_bit_start;
u32 num_got, new_cpos;
- u64 blkno;
+ u64 suballoc_loc, blkno;
struct super_block *sb = ocfs2_metadata_cache_get_super(ci);
struct ocfs2_refcount_block *root_rb =
(struct ocfs2_refcount_block *)ref_root_bh->b_data;
@@ -1549,7 +1552,7 @@ static int ocfs2_new_leaf_refcount_block(handle_t *handle,
goto out;
}
- ret = ocfs2_claim_metadata(OCFS2_SB(sb), handle, meta_ac, 1,
+ ret = ocfs2_claim_metadata(handle, meta_ac, 1, &suballoc_loc,
&suballoc_bit_start, &num_got,
&blkno);
if (ret) {
@@ -1576,7 +1579,8 @@ static int ocfs2_new_leaf_refcount_block(handle_t *handle,
new_rb = (struct ocfs2_refcount_block *)new_bh->b_data;
memset(new_rb, 0, sb->s_blocksize);
strcpy((void *)new_rb, OCFS2_REFCOUNT_BLOCK_SIGNATURE);
- new_rb->rf_suballoc_slot = cpu_to_le16(OCFS2_SB(sb)->slot_num);
+ new_rb->rf_suballoc_slot = cpu_to_le16(meta_ac->ac_alloc_slot);
+ new_rb->rf_suballoc_loc = cpu_to_le64(suballoc_loc);
new_rb->rf_suballoc_bit = cpu_to_le16(suballoc_bit_start);
new_rb->rf_fs_generation = cpu_to_le32(OCFS2_SB(sb)->fs_generation);
new_rb->rf_blkno = cpu_to_le64(blkno);
@@ -1695,7 +1699,7 @@ static int ocfs2_adjust_refcount_rec(handle_t *handle,
* 2 more credits, one for the leaf refcount block, one for
* the extent block contains the extent rec.
*/
- ret = ocfs2_extend_trans(handle, handle->h_buffer_credits + 2);
+ ret = ocfs2_extend_trans(handle, 2);
if (ret < 0) {
mlog_errno(ret);
goto out;
@@ -1803,11 +1807,7 @@ static int ocfs2_insert_refcount_rec(handle_t *handle,
if (merge)
ocfs2_refcount_rec_merge(rb, index);
- ret = ocfs2_journal_dirty(handle, ref_leaf_bh);
- if (ret) {
- mlog_errno(ret);
- goto out;
- }
+ ocfs2_journal_dirty(handle, ref_leaf_bh);
if (index == 0) {
ret = ocfs2_adjust_refcount_rec(handle, ci,
@@ -1978,9 +1978,7 @@ static int ocfs2_split_refcount_rec(handle_t *handle,
ocfs2_refcount_rec_merge(rb, index);
}
- ret = ocfs2_journal_dirty(handle, ref_leaf_bh);
- if (ret)
- mlog_errno(ret);
+ ocfs2_journal_dirty(handle, ref_leaf_bh);
out:
brelse(new_bh);
@@ -2113,6 +2111,7 @@ static int ocfs2_remove_refcount_extent(handle_t *handle,
*/
ret = ocfs2_cache_block_dealloc(dealloc, EXTENT_ALLOC_SYSTEM_INODE,
le16_to_cpu(rb->rf_suballoc_slot),
+ le64_to_cpu(rb->rf_suballoc_loc),
le64_to_cpu(rb->rf_blkno),
le16_to_cpu(rb->rf_suballoc_bit));
if (ret) {
@@ -2438,16 +2437,26 @@ static int ocfs2_calc_refcount_meta_credits(struct super_block *sb,
len = min((u64)cpos + clusters, le64_to_cpu(rec.r_cpos) +
le32_to_cpu(rec.r_clusters)) - cpos;
/*
- * If the refcount rec already exist, cool. We just need
- * to check whether there is a split. Otherwise we just need
- * to increase the refcount.
- * If we will insert one, increases recs_add.
- *
* We record all the records which will be inserted to the
* same refcount block, so that we can tell exactly whether
* we need a new refcount block or not.
+ *
+ * If we will insert a new one, this is easy and only happens
+ * during adding refcounted flag to the extent, so we don't
+ * have a chance of spliting. We just need one record.
+ *
+ * If the refcount rec already exists, that would be a little
+ * complicated. we may have to:
+ * 1) split at the beginning if the start pos isn't aligned.
+ * we need 1 more record in this case.
+ * 2) split int the end if the end pos isn't aligned.
+ * we need 1 more record in this case.
+ * 3) split in the middle because of file system fragmentation.
+ * we need 2 more records in this case(we can't detect this
+ * beforehand, so always think of the worst case).
*/
if (rec.r_refcount) {
+ recs_add += 2;
/* Check whether we need a split at the beginning. */
if (cpos == start_cpos &&
cpos != le64_to_cpu(rec.r_cpos))
@@ -2517,20 +2526,19 @@ out:
*
* Normally the refcount blocks store these refcount should be
* contiguous also, so that we can get the number easily.
- * As for meta_ac, we will at most add split 2 refcount record and
- * 2 more refcount block, so just check it in a rough way.
+ * We will at most add split 2 refcount records and 2 more
+ * refcount blocks, so just check it in a rough way.
*
* Caller must hold refcount tree lock.
*/
int ocfs2_prepare_refcount_change_for_del(struct inode *inode,
- struct buffer_head *di_bh,
+ u64 refcount_loc,
u64 phys_blkno,
u32 clusters,
int *credits,
- struct ocfs2_alloc_context **meta_ac)
+ int *ref_blocks)
{
- int ret, ref_blocks = 0;
- struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
+ int ret;
struct ocfs2_inode_info *oi = OCFS2_I(inode);
struct buffer_head *ref_root_bh = NULL;
struct ocfs2_refcount_tree *tree;
@@ -2547,14 +2555,13 @@ int ocfs2_prepare_refcount_change_for_del(struct inode *inode,
BUG_ON(!(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL));
ret = ocfs2_get_refcount_tree(OCFS2_SB(inode->i_sb),
- le64_to_cpu(di->i_refcount_loc), &tree);
+ refcount_loc, &tree);
if (ret) {
mlog_errno(ret);
goto out;
}
- ret = ocfs2_read_refcount_block(&tree->rf_ci,
- le64_to_cpu(di->i_refcount_loc),
+ ret = ocfs2_read_refcount_block(&tree->rf_ci, refcount_loc,
&ref_root_bh);
if (ret) {
mlog_errno(ret);
@@ -2565,21 +2572,14 @@ int ocfs2_prepare_refcount_change_for_del(struct inode *inode,
&tree->rf_ci,
ref_root_bh,
start_cpos, clusters,
- &ref_blocks, credits);
+ ref_blocks, credits);
if (ret) {
mlog_errno(ret);
goto out;
}
- mlog(0, "reserve new metadata %d, credits = %d\n",
- ref_blocks, *credits);
-
- if (ref_blocks) {
- ret = ocfs2_reserve_new_metadata_blocks(OCFS2_SB(inode->i_sb),
- ref_blocks, meta_ac);
- if (ret)
- mlog_errno(ret);
- }
+ mlog(0, "reserve new metadata %d blocks, credits = %d\n",
+ *ref_blocks, *credits);
out:
brelse(ref_root_bh);
@@ -2933,19 +2933,28 @@ static int ocfs2_duplicate_clusters_by_page(handle_t *handle,
u64 new_block = ocfs2_clusters_to_blocks(sb, new_cluster);
struct page *page;
pgoff_t page_index;
- unsigned int from, to;
+ unsigned int from, to, readahead_pages;
loff_t offset, end, map_end;
struct address_space *mapping = context->inode->i_mapping;
mlog(0, "old_cluster %u, new %u, len %u at offset %u\n", old_cluster,
new_cluster, new_len, cpos);
+ readahead_pages =
+ (ocfs2_cow_contig_clusters(sb) <<
+ OCFS2_SB(sb)->s_clustersize_bits) >> PAGE_CACHE_SHIFT;
offset = ((loff_t)cpos) << OCFS2_SB(sb)->s_clustersize_bits;
end = offset + (new_len << OCFS2_SB(sb)->s_clustersize_bits);
+ /*
+ * We only duplicate pages until we reach the page contains i_size - 1.
+ * So trim 'end' to i_size.
+ */
+ if (end > i_size_read(context->inode))
+ end = i_size_read(context->inode);
while (offset < end) {
page_index = offset >> PAGE_CACHE_SHIFT;
- map_end = (page_index + 1) << PAGE_CACHE_SHIFT;
+ map_end = ((loff_t)page_index + 1) << PAGE_CACHE_SHIFT;
if (map_end > end)
map_end = end;
@@ -2955,10 +2964,22 @@ static int ocfs2_duplicate_clusters_by_page(handle_t *handle,
if (map_end & (PAGE_CACHE_SIZE - 1))
to = map_end & (PAGE_CACHE_SIZE - 1);
- page = grab_cache_page(mapping, page_index);
+ page = find_or_create_page(mapping, page_index, GFP_NOFS);
- /* This page can't be dirtied before we CoW it out. */
- BUG_ON(PageDirty(page));
+ /*
+ * In case PAGE_CACHE_SIZE <= CLUSTER_SIZE, This page
+ * can't be dirtied before we CoW it out.
+ */
+ if (PAGE_CACHE_SIZE <= OCFS2_SB(sb)->s_clustersize)
+ BUG_ON(PageDirty(page));
+
+ if (PageReadahead(page) && context->file) {
+ page_cache_async_readahead(mapping,
+ &context->file->f_ra,
+ context->file,
+ page, page_index,
+ readahead_pages);
+ }
if (!PageUptodate(page)) {
ret = block_read_full_page(page, ocfs2_get_block);
@@ -3037,11 +3058,7 @@ static int ocfs2_duplicate_clusters_by_jbd(handle_t *handle,
}
memcpy(new_bh->b_data, old_bh->b_data, sb->s_blocksize);
- ret = ocfs2_journal_dirty(handle, new_bh);
- if (ret) {
- mlog_errno(ret);
- break;
- }
+ ocfs2_journal_dirty(handle, new_bh);
brelse(new_bh);
brelse(old_bh);
@@ -3170,11 +3187,12 @@ static int ocfs2_cow_sync_writeback(struct super_block *sb,
while (offset < end) {
page_index = offset >> PAGE_CACHE_SHIFT;
- map_end = (page_index + 1) << PAGE_CACHE_SHIFT;
+ map_end = ((loff_t)page_index + 1) << PAGE_CACHE_SHIFT;
if (map_end > end)
map_end = end;
- page = grab_cache_page(context->inode->i_mapping, page_index);
+ page = find_or_create_page(context->inode->i_mapping,
+ page_index, GFP_NOFS);
BUG_ON(!page);
wait_on_page_writeback(page);
@@ -3279,7 +3297,7 @@ static int ocfs2_make_clusters_writable(struct super_block *sb,
} else {
delete = 1;
- ret = __ocfs2_claim_clusters(osb, handle,
+ ret = __ocfs2_claim_clusters(handle,
context->data_ac,
1, set_len,
&new_bit, &new_len);
@@ -3403,12 +3421,35 @@ static int ocfs2_replace_cow(struct ocfs2_cow_context *context)
return ret;
}
+static void ocfs2_readahead_for_cow(struct inode *inode,
+ struct file *file,
+ u32 start, u32 len)
+{
+ struct address_space *mapping;
+ pgoff_t index;
+ unsigned long num_pages;
+ int cs_bits = OCFS2_SB(inode->i_sb)->s_clustersize_bits;
+
+ if (!file)
+ return;
+
+ mapping = file->f_mapping;
+ num_pages = (len << cs_bits) >> PAGE_CACHE_SHIFT;
+ if (!num_pages)
+ num_pages = 1;
+
+ index = ((loff_t)start << cs_bits) >> PAGE_CACHE_SHIFT;
+ page_cache_sync_readahead(mapping, &file->f_ra, file,
+ index, num_pages);
+}
+
/*
* Starting at cpos, try to CoW write_len clusters. Don't CoW
* past max_cpos. This will stop when it runs into a hole or an
* unrefcounted extent.
*/
static int ocfs2_refcount_cow_hunk(struct inode *inode,
+ struct file *file,
struct buffer_head *di_bh,
u32 cpos, u32 write_len, u32 max_cpos)
{
@@ -3437,6 +3478,8 @@ static int ocfs2_refcount_cow_hunk(struct inode *inode,
BUG_ON(cow_len == 0);
+ ocfs2_readahead_for_cow(inode, file, cow_start, cow_len);
+
context = kzalloc(sizeof(struct ocfs2_cow_context), GFP_NOFS);
if (!context) {
ret = -ENOMEM;
@@ -3458,6 +3501,7 @@ static int ocfs2_refcount_cow_hunk(struct inode *inode,
context->ref_root_bh = ref_root_bh;
context->cow_duplicate_clusters = ocfs2_duplicate_clusters_by_page;
context->get_clusters = ocfs2_di_get_clusters;
+ context->file = file;
ocfs2_init_dinode_extent_tree(&context->data_et,
INODE_CACHE(inode), di_bh);
@@ -3486,6 +3530,7 @@ out:
* clusters between cpos and cpos+write_len are safe to modify.
*/
int ocfs2_refcount_cow(struct inode *inode,
+ struct file *file,
struct buffer_head *di_bh,
u32 cpos, u32 write_len, u32 max_cpos)
{
@@ -3505,7 +3550,7 @@ int ocfs2_refcount_cow(struct inode *inode,
num_clusters = write_len;
if (ext_flags & OCFS2_EXT_REFCOUNTED) {
- ret = ocfs2_refcount_cow_hunk(inode, di_bh, cpos,
+ ret = ocfs2_refcount_cow_hunk(inode, file, di_bh, cpos,
num_clusters, max_cpos);
if (ret) {
mlog_errno(ret);
@@ -4071,6 +4116,7 @@ static int ocfs2_complete_reflink(struct inode *s_inode,
OCFS2_I(t_inode)->ip_dyn_features = OCFS2_I(s_inode)->ip_dyn_features;
spin_unlock(&OCFS2_I(t_inode)->ip_lock);
i_size_write(t_inode, size);
+ t_inode->i_blocks = s_inode->i_blocks;
di->i_xattr_inline_size = s_di->i_xattr_inline_size;
di->i_clusters = s_di->i_clusters;
@@ -4079,6 +4125,9 @@ static int ocfs2_complete_reflink(struct inode *s_inode,
di->i_attr = s_di->i_attr;
if (preserve) {
+ t_inode->i_uid = s_inode->i_uid;
+ t_inode->i_gid = s_inode->i_gid;
+ t_inode->i_mode = s_inode->i_mode;
di->i_uid = s_di->i_uid;
di->i_gid = s_di->i_gid;
di->i_mode = s_di->i_mode;
@@ -4173,6 +4222,12 @@ static int __ocfs2_reflink(struct dentry *old_dentry,
struct inode *inode = old_dentry->d_inode;
struct buffer_head *new_bh = NULL;
+ if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_SYSTEM_FILE) {
+ ret = -EINVAL;
+ mlog_errno(ret);
+ goto out;
+ }
+
ret = filemap_fdatawrite(inode->i_mapping);
if (ret) {
mlog_errno(ret);
@@ -4185,8 +4240,9 @@ static int __ocfs2_reflink(struct dentry *old_dentry,
goto out;
}
- mutex_lock(&new_inode->i_mutex);
- ret = ocfs2_inode_lock(new_inode, &new_bh, 1);
+ mutex_lock_nested(&new_inode->i_mutex, I_MUTEX_CHILD);
+ ret = ocfs2_inode_lock_nested(new_inode, &new_bh, 1,
+ OI_LS_REFLINK_TARGET);
if (ret) {
mlog_errno(ret);
goto out_unlock;
@@ -4386,7 +4442,7 @@ static int ocfs2_vfs_reflink(struct dentry *old_dentry, struct inode *dir,
}
mutex_lock(&inode->i_mutex);
- vfs_dq_init(dir);
+ dquot_initialize(dir);
error = ocfs2_reflink(old_dentry, dir, new_dentry, preserve);
mutex_unlock(&inode->i_mutex);
if (!error)