From 4ac6032d6c92f0ac65cf5bc56b68557b3f099b66 Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Sat, 18 Oct 2008 19:11:42 -0700 Subject: ocfs2: Field prefixes for the xattr_bucket structure The ocfs2_xattr_bucket structure keeps track of the buffers for one xattr bucket. Let's prefix the fields for easier code navigation. Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/xattr.c | 100 +++++++++++++++++++++++++++---------------------------- 1 file changed, 50 insertions(+), 50 deletions(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 74d7367ade1..9c0ee42eb93 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -61,8 +61,8 @@ struct ocfs2_xattr_def_value_root { }; struct ocfs2_xattr_bucket { - struct buffer_head *bhs[OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET]; - struct ocfs2_xattr_header *xh; + struct buffer_head *bu_bhs[OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET]; + struct ocfs2_xattr_header *bu_xh; }; #define OCFS2_XATTR_ROOT_SIZE (sizeof(struct ocfs2_xattr_def_value_root)) @@ -795,11 +795,11 @@ static int ocfs2_xattr_block_get(struct inode *inode, if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) { ret = ocfs2_xattr_bucket_get_name_value(inode, - xs->bucket.xh, + xs->bucket.bu_xh, i, &block_off, &name_offset); - xs->base = xs->bucket.bhs[block_off]->b_data; + xs->base = xs->bucket.bu_bhs[block_off]->b_data; } if (ocfs2_xattr_is_local(xs->here)) { memcpy(buffer, (void *)xs->base + @@ -818,7 +818,7 @@ static int ocfs2_xattr_block_get(struct inode *inode, ret = size; cleanup: for (i = 0; i < OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET; i++) - brelse(xs->bucket.bhs[i]); + brelse(xs->bucket.bu_bhs[i]); memset(&xs->bucket, 0, sizeof(xs->bucket)); brelse(xs->xattr_bh); @@ -2032,7 +2032,7 @@ cleanup: brelse(di_bh); brelse(xbs.xattr_bh); for (i = 0; i < blk_per_bucket; i++) - brelse(xbs.bucket.bhs[i]); + brelse(xbs.bucket.bu_bhs[i]); return ret; } @@ -2276,13 +2276,13 @@ static int ocfs2_xattr_bucket_find(struct inode *inode, lower_bh = bh; bh = NULL; } - xs->bucket.bhs[0] = lower_bh; - xs->bucket.xh = (struct ocfs2_xattr_header *) - xs->bucket.bhs[0]->b_data; + xs->bucket.bu_bhs[0] = lower_bh; + xs->bucket.bu_xh = (struct ocfs2_xattr_header *) + xs->bucket.bu_bhs[0]->b_data; lower_bh = NULL; - xs->header = xs->bucket.xh; - xs->base = xs->bucket.bhs[0]->b_data; + xs->header = xs->bucket.bu_xh; + xs->base = xs->bucket.bu_bhs[0]->b_data; xs->end = xs->base + inode->i_sb->s_blocksize; if (found) { @@ -2290,8 +2290,8 @@ static int ocfs2_xattr_bucket_find(struct inode *inode, * If we have found the xattr enty, read all the blocks in * this bucket. */ - ret = ocfs2_read_blocks(inode, xs->bucket.bhs[0]->b_blocknr + 1, - blk_per_bucket - 1, &xs->bucket.bhs[1], + ret = ocfs2_read_blocks(inode, xs->bucket.bu_bhs[0]->b_blocknr + 1, + blk_per_bucket - 1, &xs->bucket.bu_bhs[1], 0); if (ret) { mlog_errno(ret); @@ -2300,7 +2300,7 @@ static int ocfs2_xattr_bucket_find(struct inode *inode, xs->here = &xs->header->xh_entries[index]; mlog(0, "find xattr %s in bucket %llu, entry = %u\n", name, - (unsigned long long)xs->bucket.bhs[0]->b_blocknr, index); + (unsigned long long)xs->bucket.bu_bhs[0]->b_blocknr, index); } else ret = -ENODATA; @@ -2370,23 +2370,23 @@ static int ocfs2_iterate_xattr_buckets(struct inode *inode, for (i = 0; i < num_buckets; i++, blkno += blk_per_bucket) { ret = ocfs2_read_blocks(inode, blkno, blk_per_bucket, - bucket.bhs, 0); + bucket.bu_bhs, 0); if (ret) { mlog_errno(ret); goto out; } - bucket.xh = (struct ocfs2_xattr_header *)bucket.bhs[0]->b_data; + bucket.bu_xh = (struct ocfs2_xattr_header *)bucket.bu_bhs[0]->b_data; /* * The real bucket num in this series of blocks is stored * in the 1st bucket. */ if (i == 0) - num_buckets = le16_to_cpu(bucket.xh->xh_num_buckets); + num_buckets = le16_to_cpu(bucket.bu_xh->xh_num_buckets); mlog(0, "iterating xattr bucket %llu, first hash %u\n", (unsigned long long)blkno, - le32_to_cpu(bucket.xh->xh_entries[0].xe_name_hash)); + le32_to_cpu(bucket.bu_xh->xh_entries[0].xe_name_hash)); if (func) { ret = func(inode, &bucket, para); if (ret) { @@ -2396,13 +2396,13 @@ static int ocfs2_iterate_xattr_buckets(struct inode *inode, } for (j = 0; j < blk_per_bucket; j++) - brelse(bucket.bhs[j]); + brelse(bucket.bu_bhs[j]); memset(&bucket, 0, sizeof(bucket)); } out: for (j = 0; j < blk_per_bucket; j++) - brelse(bucket.bhs[j]); + brelse(bucket.bu_bhs[j]); return ret; } @@ -2441,21 +2441,21 @@ static int ocfs2_list_xattr_bucket(struct inode *inode, int i, block_off, new_offset; const char *prefix, *name; - for (i = 0 ; i < le16_to_cpu(bucket->xh->xh_count); i++) { - struct ocfs2_xattr_entry *entry = &bucket->xh->xh_entries[i]; + for (i = 0 ; i < le16_to_cpu(bucket->bu_xh->xh_count); i++) { + struct ocfs2_xattr_entry *entry = &bucket->bu_xh->xh_entries[i]; type = ocfs2_xattr_get_type(entry); prefix = ocfs2_xattr_prefix(type); if (prefix) { ret = ocfs2_xattr_bucket_get_name_value(inode, - bucket->xh, + bucket->bu_xh, i, &block_off, &new_offset); if (ret) break; - name = (const char *)bucket->bhs[block_off]->b_data + + name = (const char *)bucket->bu_bhs[block_off]->b_data + new_offset; ret = ocfs2_xattr_list_entry(xl->buffer, xl->buffer_size, @@ -2626,10 +2626,10 @@ static int ocfs2_xattr_update_xattr_search(struct inode *inode, int i, blocksize = inode->i_sb->s_blocksize; u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); - xs->bucket.bhs[0] = new_bh; + xs->bucket.bu_bhs[0] = new_bh; get_bh(new_bh); - xs->bucket.xh = (struct ocfs2_xattr_header *)xs->bucket.bhs[0]->b_data; - xs->header = xs->bucket.xh; + xs->bucket.bu_xh = (struct ocfs2_xattr_header *)xs->bucket.bu_bhs[0]->b_data; + xs->header = xs->bucket.bu_xh; xs->base = new_bh->b_data; xs->end = xs->base + inode->i_sb->s_blocksize; @@ -2637,8 +2637,8 @@ static int ocfs2_xattr_update_xattr_search(struct inode *inode, if (!xs->not_found) { if (OCFS2_XATTR_BUCKET_SIZE != blocksize) { ret = ocfs2_read_blocks(inode, - xs->bucket.bhs[0]->b_blocknr + 1, - blk_per_bucket - 1, &xs->bucket.bhs[1], + xs->bucket.bu_bhs[0]->b_blocknr + 1, + blk_per_bucket - 1, &xs->bucket.bu_bhs[1], 0); if (ret) { mlog_errno(ret); @@ -2835,7 +2835,7 @@ static int ocfs2_defrag_xattr_bucket(struct inode *inode, size_t end, offset, len, value_len; struct ocfs2_xattr_header *xh; char *entries, *buf, *bucket_buf = NULL; - u64 blkno = bucket->bhs[0]->b_blocknr; + u64 blkno = bucket->bu_bhs[0]->b_blocknr; u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); u16 xh_free_start; size_t blocksize = inode->i_sb->s_blocksize; @@ -3929,7 +3929,7 @@ static inline char *ocfs2_xattr_bucket_get_val(struct inode *inode, int block_off = offs >> inode->i_sb->s_blocksize_bits; offs = offs % inode->i_sb->s_blocksize; - return bucket->bhs[block_off]->b_data + offs; + return bucket->bu_bhs[block_off]->b_data + offs; } /* @@ -4124,12 +4124,12 @@ static int ocfs2_xattr_set_entry_in_bucket(struct inode *inode, mlog(0, "Set xattr entry len = %lu index = %d in bucket %llu\n", (unsigned long)xi->value_len, xi->name_index, - (unsigned long long)xs->bucket.bhs[0]->b_blocknr); + (unsigned long long)xs->bucket.bu_bhs[0]->b_blocknr); - if (!xs->bucket.bhs[1]) { + if (!xs->bucket.bu_bhs[1]) { ret = ocfs2_read_blocks(inode, - xs->bucket.bhs[0]->b_blocknr + 1, - blk_per_bucket - 1, &xs->bucket.bhs[1], + xs->bucket.bu_bhs[0]->b_blocknr + 1, + blk_per_bucket - 1, &xs->bucket.bu_bhs[1], 0); if (ret) { mlog_errno(ret); @@ -4146,7 +4146,7 @@ static int ocfs2_xattr_set_entry_in_bucket(struct inode *inode, } for (i = 0; i < blk_per_bucket; i++) { - ret = ocfs2_journal_access(handle, inode, xs->bucket.bhs[i], + ret = ocfs2_journal_access(handle, inode, xs->bucket.bu_bhs[i], OCFS2_JOURNAL_ACCESS_WRITE); if (ret < 0) { mlog_errno(ret); @@ -4158,7 +4158,7 @@ static int ocfs2_xattr_set_entry_in_bucket(struct inode *inode, /*Only dirty the blocks we have touched in set xattr. */ ret = ocfs2_xattr_bucket_handle_journal(inode, handle, xs, - xs->bucket.bhs, blk_per_bucket); + xs->bucket.bu_bhs, blk_per_bucket); if (ret) mlog_errno(ret); out: @@ -4272,10 +4272,10 @@ static int ocfs2_xattr_bucket_value_truncate_xs(struct inode *inode, struct ocfs2_xattr_entry *xe = xs->here; struct ocfs2_xattr_header *xh = (struct ocfs2_xattr_header *)xs->base; - BUG_ON(!xs->bucket.bhs[0] || !xe || ocfs2_xattr_is_local(xe)); + BUG_ON(!xs->bucket.bu_bhs[0] || !xe || ocfs2_xattr_is_local(xe)); offset = xe - xh->xh_entries; - ret = ocfs2_xattr_bucket_value_truncate(inode, xs->bucket.bhs[0], + ret = ocfs2_xattr_bucket_value_truncate(inode, xs->bucket.bu_bhs[0], offset, len); if (ret) mlog_errno(ret); @@ -4395,7 +4395,7 @@ static void ocfs2_xattr_bucket_remove_xs(struct inode *inode, struct ocfs2_xattr_search *xs) { handle_t *handle = NULL; - struct ocfs2_xattr_header *xh = xs->bucket.xh; + struct ocfs2_xattr_header *xh = xs->bucket.bu_xh; struct ocfs2_xattr_entry *last = &xh->xh_entries[ le16_to_cpu(xh->xh_count) - 1]; int ret = 0; @@ -4407,7 +4407,7 @@ static void ocfs2_xattr_bucket_remove_xs(struct inode *inode, return; } - ret = ocfs2_journal_access(handle, inode, xs->bucket.bhs[0], + ret = ocfs2_journal_access(handle, inode, xs->bucket.bu_bhs[0], OCFS2_JOURNAL_ACCESS_WRITE); if (ret) { mlog_errno(ret); @@ -4420,7 +4420,7 @@ static void ocfs2_xattr_bucket_remove_xs(struct inode *inode, memset(last, 0, sizeof(struct ocfs2_xattr_entry)); le16_add_cpu(&xh->xh_count, -1); - ret = ocfs2_journal_dirty(handle, xs->bucket.bhs[0]); + ret = ocfs2_journal_dirty(handle, xs->bucket.bu_bhs[0]); if (ret < 0) mlog_errno(ret); out_commit: @@ -4530,7 +4530,7 @@ static int ocfs2_check_xattr_bucket_collision(struct inode *inode, struct ocfs2_xattr_bucket *bucket, const char *name) { - struct ocfs2_xattr_header *xh = bucket->xh; + struct ocfs2_xattr_header *xh = bucket->bu_xh; u32 name_hash = ocfs2_xattr_name_hash(inode, name, strlen(name)); if (name_hash != le32_to_cpu(xh->xh_entries[0].xe_name_hash)) @@ -4540,7 +4540,7 @@ static int ocfs2_check_xattr_bucket_collision(struct inode *inode, xh->xh_entries[0].xe_name_hash) { mlog(ML_ERROR, "Too much hash collision in xattr bucket %llu, " "hash = %u\n", - (unsigned long long)bucket->bhs[0]->b_blocknr, + (unsigned long long)bucket->bu_bhs[0]->b_blocknr, le32_to_cpu(xh->xh_entries[0].xe_name_hash)); return -ENOSPC; } @@ -4574,7 +4574,7 @@ try_again: mlog_bug_on_msg(header_size > blocksize, "bucket %llu has header size " "of %u which exceed block size\n", - (unsigned long long)xs->bucket.bhs[0]->b_blocknr, + (unsigned long long)xs->bucket.bu_bhs[0]->b_blocknr, header_size); if (xi->value && xi->value_len > OCFS2_XATTR_INLINE_SIZE) @@ -4614,7 +4614,7 @@ try_again: mlog(0, "xs->not_found = %d, in xattr bucket %llu: free = %d, " "need = %d, max_free = %d, xh_free_start = %u, xh_name_value_len =" " %u\n", xs->not_found, - (unsigned long long)xs->bucket.bhs[0]->b_blocknr, + (unsigned long long)xs->bucket.bu_bhs[0]->b_blocknr, free, need, max_free, le16_to_cpu(xh->xh_free_start), le16_to_cpu(xh->xh_name_value_len)); @@ -4667,14 +4667,14 @@ try_again: ret = ocfs2_add_new_xattr_bucket(inode, xs->xattr_bh, - xs->bucket.bhs[0]); + xs->bucket.bu_bhs[0]); if (ret) { mlog_errno(ret); goto out; } for (i = 0; i < blk_per_bucket; i++) - brelse(xs->bucket.bhs[i]); + brelse(xs->bucket.bu_bhs[i]); memset(&xs->bucket, 0, sizeof(xs->bucket)); @@ -4700,7 +4700,7 @@ static int ocfs2_delete_xattr_in_bucket(struct inode *inode, void *para) { int ret = 0; - struct ocfs2_xattr_header *xh = bucket->xh; + struct ocfs2_xattr_header *xh = bucket->bu_xh; u16 i; struct ocfs2_xattr_entry *xe; @@ -4710,7 +4710,7 @@ static int ocfs2_delete_xattr_in_bucket(struct inode *inode, continue; ret = ocfs2_xattr_bucket_value_truncate(inode, - bucket->bhs[0], + bucket->bu_bhs[0], i, 0); if (ret) { mlog_errno(ret); -- cgit v1.2.3-70-g09d2 From 9c7759aa670918a48f0c6e06779cd20f2781a2ac Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Fri, 24 Oct 2008 16:21:03 -0700 Subject: ocfs2: Convenient access to an xattr bucket's block number. The xattr code often wants to know the block number of an xattr bucket. This is usually found by dereferencing the first bh hanging off of the ocfs2_xattr_bucket structure. Rather than do this all the time, let's provide a nice little macro. The idea is ripped from the ocfs2_path code. Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/xattr.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 9c0ee42eb93..3cf8e80b2b6 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -154,6 +154,8 @@ static inline u16 ocfs2_xattr_max_xe_in_bucket(struct super_block *sb) return len / sizeof(struct ocfs2_xattr_entry); } +#define bucket_blkno(_b) ((_b)->bu_bhs[0]->b_blocknr) + static inline const char *ocfs2_xattr_prefix(int name_index) { struct xattr_handler *handler = NULL; @@ -2290,7 +2292,7 @@ static int ocfs2_xattr_bucket_find(struct inode *inode, * If we have found the xattr enty, read all the blocks in * this bucket. */ - ret = ocfs2_read_blocks(inode, xs->bucket.bu_bhs[0]->b_blocknr + 1, + ret = ocfs2_read_blocks(inode, bucket_blkno(&xs->bucket) + 1, blk_per_bucket - 1, &xs->bucket.bu_bhs[1], 0); if (ret) { @@ -2300,7 +2302,7 @@ static int ocfs2_xattr_bucket_find(struct inode *inode, xs->here = &xs->header->xh_entries[index]; mlog(0, "find xattr %s in bucket %llu, entry = %u\n", name, - (unsigned long long)xs->bucket.bu_bhs[0]->b_blocknr, index); + (unsigned long long)bucket_blkno(&xs->bucket), index); } else ret = -ENODATA; @@ -2637,7 +2639,7 @@ static int ocfs2_xattr_update_xattr_search(struct inode *inode, if (!xs->not_found) { if (OCFS2_XATTR_BUCKET_SIZE != blocksize) { ret = ocfs2_read_blocks(inode, - xs->bucket.bu_bhs[0]->b_blocknr + 1, + bucket_blkno(&xs->bucket) + 1, blk_per_bucket - 1, &xs->bucket.bu_bhs[1], 0); if (ret) { @@ -2835,7 +2837,7 @@ static int ocfs2_defrag_xattr_bucket(struct inode *inode, size_t end, offset, len, value_len; struct ocfs2_xattr_header *xh; char *entries, *buf, *bucket_buf = NULL; - u64 blkno = bucket->bu_bhs[0]->b_blocknr; + u64 blkno = bucket_blkno(bucket); u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); u16 xh_free_start; size_t blocksize = inode->i_sb->s_blocksize; @@ -4124,11 +4126,11 @@ static int ocfs2_xattr_set_entry_in_bucket(struct inode *inode, mlog(0, "Set xattr entry len = %lu index = %d in bucket %llu\n", (unsigned long)xi->value_len, xi->name_index, - (unsigned long long)xs->bucket.bu_bhs[0]->b_blocknr); + (unsigned long long)bucket_blkno(&xs->bucket)); if (!xs->bucket.bu_bhs[1]) { ret = ocfs2_read_blocks(inode, - xs->bucket.bu_bhs[0]->b_blocknr + 1, + bucket_blkno(&xs->bucket) + 1, blk_per_bucket - 1, &xs->bucket.bu_bhs[1], 0); if (ret) { @@ -4540,7 +4542,7 @@ static int ocfs2_check_xattr_bucket_collision(struct inode *inode, xh->xh_entries[0].xe_name_hash) { mlog(ML_ERROR, "Too much hash collision in xattr bucket %llu, " "hash = %u\n", - (unsigned long long)bucket->bu_bhs[0]->b_blocknr, + (unsigned long long)bucket_blkno(bucket), le32_to_cpu(xh->xh_entries[0].xe_name_hash)); return -ENOSPC; } @@ -4574,7 +4576,7 @@ try_again: mlog_bug_on_msg(header_size > blocksize, "bucket %llu has header size " "of %u which exceed block size\n", - (unsigned long long)xs->bucket.bu_bhs[0]->b_blocknr, + (unsigned long long)bucket_blkno(&xs->bucket), header_size); if (xi->value && xi->value_len > OCFS2_XATTR_INLINE_SIZE) @@ -4614,7 +4616,7 @@ try_again: mlog(0, "xs->not_found = %d, in xattr bucket %llu: free = %d, " "need = %d, max_free = %d, xh_free_start = %u, xh_name_value_len =" " %u\n", xs->not_found, - (unsigned long long)xs->bucket.bu_bhs[0]->b_blocknr, + (unsigned long long)bucket_blkno(&xs->bucket), free, need, max_free, le16_to_cpu(xh->xh_free_start), le16_to_cpu(xh->xh_name_value_len)); -- cgit v1.2.3-70-g09d2 From 51def39f0cabd46131c7c4df08751cb0cb9433d1 Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Fri, 24 Oct 2008 16:57:21 -0700 Subject: ocfs2: Convenient access to xattr bucket data blocks. The xattr code often wants to access the data pointer for blocks in an xattr bucket. This is usually found by dereferencing the bh array hanging off of the ocfs2_xattr_bucket structure. Rather than do this all the time, let's provide a nice little macro. The idea is ripped from the ocfs2_path code. Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/xattr.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 3cf8e80b2b6..8594df36640 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -155,6 +155,7 @@ static inline u16 ocfs2_xattr_max_xe_in_bucket(struct super_block *sb) } #define bucket_blkno(_b) ((_b)->bu_bhs[0]->b_blocknr) +#define bucket_block(_b, _n) ((_b)->bu_bhs[(_n)]->b_data) static inline const char *ocfs2_xattr_prefix(int name_index) { @@ -801,7 +802,7 @@ static int ocfs2_xattr_block_get(struct inode *inode, i, &block_off, &name_offset); - xs->base = xs->bucket.bu_bhs[block_off]->b_data; + xs->base = bucket_block(&xs->bucket, block_off); } if (ocfs2_xattr_is_local(xs->here)) { memcpy(buffer, (void *)xs->base + @@ -2280,11 +2281,11 @@ static int ocfs2_xattr_bucket_find(struct inode *inode, } xs->bucket.bu_bhs[0] = lower_bh; xs->bucket.bu_xh = (struct ocfs2_xattr_header *) - xs->bucket.bu_bhs[0]->b_data; + bucket_block(&xs->bucket, 0); lower_bh = NULL; xs->header = xs->bucket.bu_xh; - xs->base = xs->bucket.bu_bhs[0]->b_data; + xs->base = bucket_block(&xs->bucket, 0); xs->end = xs->base + inode->i_sb->s_blocksize; if (found) { @@ -2378,7 +2379,7 @@ static int ocfs2_iterate_xattr_buckets(struct inode *inode, goto out; } - bucket.bu_xh = (struct ocfs2_xattr_header *)bucket.bu_bhs[0]->b_data; + bucket.bu_xh = (struct ocfs2_xattr_header *)bucket_block(&bucket, 0); /* * The real bucket num in this series of blocks is stored * in the 1st bucket. @@ -2457,7 +2458,7 @@ static int ocfs2_list_xattr_bucket(struct inode *inode, if (ret) break; - name = (const char *)bucket->bu_bhs[block_off]->b_data + + name = (const char *)bucket_block(bucket, block_off) + new_offset; ret = ocfs2_xattr_list_entry(xl->buffer, xl->buffer_size, @@ -2630,7 +2631,7 @@ static int ocfs2_xattr_update_xattr_search(struct inode *inode, xs->bucket.bu_bhs[0] = new_bh; get_bh(new_bh); - xs->bucket.bu_xh = (struct ocfs2_xattr_header *)xs->bucket.bu_bhs[0]->b_data; + xs->bucket.bu_xh = (struct ocfs2_xattr_header *)bucket_block(&xs->bucket, 0); xs->header = xs->bucket.bu_xh; xs->base = new_bh->b_data; @@ -3931,7 +3932,7 @@ static inline char *ocfs2_xattr_bucket_get_val(struct inode *inode, int block_off = offs >> inode->i_sb->s_blocksize_bits; offs = offs % inode->i_sb->s_blocksize; - return bucket->bu_bhs[block_off]->b_data + offs; + return bucket_block(bucket, block_off) + offs; } /* -- cgit v1.2.3-70-g09d2 From 3e6329463e3a5c311e1d607ff3db735a18b6d67a Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Fri, 24 Oct 2008 17:04:49 -0700 Subject: ocfs2: Convenient access to an xattr bucket's header. The xattr code often wants to access the ocfs2_xattr_header at the start of an bucket. Rather than walk the pointer chains, let's just create another nice macro. As a side benefit, we can get rid of the mostly spurious ->bu_xh element on the bucket structure. The idea is ripped from the ocfs2_path code. Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/xattr.c | 28 ++++++++++++---------------- 1 file changed, 12 insertions(+), 16 deletions(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 8594df36640..1b77302b54f 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -62,7 +62,6 @@ struct ocfs2_xattr_def_value_root { struct ocfs2_xattr_bucket { struct buffer_head *bu_bhs[OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET]; - struct ocfs2_xattr_header *bu_xh; }; #define OCFS2_XATTR_ROOT_SIZE (sizeof(struct ocfs2_xattr_def_value_root)) @@ -156,6 +155,7 @@ static inline u16 ocfs2_xattr_max_xe_in_bucket(struct super_block *sb) #define bucket_blkno(_b) ((_b)->bu_bhs[0]->b_blocknr) #define bucket_block(_b, _n) ((_b)->bu_bhs[(_n)]->b_data) +#define bucket_xh(_b) ((struct ocfs2_xattr_header *)bucket_block((_b), 0)) static inline const char *ocfs2_xattr_prefix(int name_index) { @@ -798,7 +798,7 @@ static int ocfs2_xattr_block_get(struct inode *inode, if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) { ret = ocfs2_xattr_bucket_get_name_value(inode, - xs->bucket.bu_xh, + bucket_xh(&xs->bucket), i, &block_off, &name_offset); @@ -2280,11 +2280,9 @@ static int ocfs2_xattr_bucket_find(struct inode *inode, bh = NULL; } xs->bucket.bu_bhs[0] = lower_bh; - xs->bucket.bu_xh = (struct ocfs2_xattr_header *) - bucket_block(&xs->bucket, 0); lower_bh = NULL; - xs->header = xs->bucket.bu_xh; + xs->header = bucket_xh(&xs->bucket); xs->base = bucket_block(&xs->bucket, 0); xs->end = xs->base + inode->i_sb->s_blocksize; @@ -2379,17 +2377,16 @@ static int ocfs2_iterate_xattr_buckets(struct inode *inode, goto out; } - bucket.bu_xh = (struct ocfs2_xattr_header *)bucket_block(&bucket, 0); /* * The real bucket num in this series of blocks is stored * in the 1st bucket. */ if (i == 0) - num_buckets = le16_to_cpu(bucket.bu_xh->xh_num_buckets); + num_buckets = le16_to_cpu(bucket_xh(&bucket)->xh_num_buckets); mlog(0, "iterating xattr bucket %llu, first hash %u\n", (unsigned long long)blkno, - le32_to_cpu(bucket.bu_xh->xh_entries[0].xe_name_hash)); + le32_to_cpu(bucket_xh(&bucket)->xh_entries[0].xe_name_hash)); if (func) { ret = func(inode, &bucket, para); if (ret) { @@ -2444,14 +2441,14 @@ static int ocfs2_list_xattr_bucket(struct inode *inode, int i, block_off, new_offset; const char *prefix, *name; - for (i = 0 ; i < le16_to_cpu(bucket->bu_xh->xh_count); i++) { - struct ocfs2_xattr_entry *entry = &bucket->bu_xh->xh_entries[i]; + for (i = 0 ; i < le16_to_cpu(bucket_xh(bucket)->xh_count); i++) { + struct ocfs2_xattr_entry *entry = &bucket_xh(bucket)->xh_entries[i]; type = ocfs2_xattr_get_type(entry); prefix = ocfs2_xattr_prefix(type); if (prefix) { ret = ocfs2_xattr_bucket_get_name_value(inode, - bucket->bu_xh, + bucket_xh(bucket), i, &block_off, &new_offset); @@ -2631,8 +2628,7 @@ static int ocfs2_xattr_update_xattr_search(struct inode *inode, xs->bucket.bu_bhs[0] = new_bh; get_bh(new_bh); - xs->bucket.bu_xh = (struct ocfs2_xattr_header *)bucket_block(&xs->bucket, 0); - xs->header = xs->bucket.bu_xh; + xs->header = bucket_xh(&xs->bucket); xs->base = new_bh->b_data; xs->end = xs->base + inode->i_sb->s_blocksize; @@ -4398,7 +4394,7 @@ static void ocfs2_xattr_bucket_remove_xs(struct inode *inode, struct ocfs2_xattr_search *xs) { handle_t *handle = NULL; - struct ocfs2_xattr_header *xh = xs->bucket.bu_xh; + struct ocfs2_xattr_header *xh = bucket_xh(&xs->bucket); struct ocfs2_xattr_entry *last = &xh->xh_entries[ le16_to_cpu(xh->xh_count) - 1]; int ret = 0; @@ -4533,7 +4529,7 @@ static int ocfs2_check_xattr_bucket_collision(struct inode *inode, struct ocfs2_xattr_bucket *bucket, const char *name) { - struct ocfs2_xattr_header *xh = bucket->bu_xh; + struct ocfs2_xattr_header *xh = bucket_xh(bucket); u32 name_hash = ocfs2_xattr_name_hash(inode, name, strlen(name)); if (name_hash != le32_to_cpu(xh->xh_entries[0].xe_name_hash)) @@ -4703,7 +4699,7 @@ static int ocfs2_delete_xattr_in_bucket(struct inode *inode, void *para) { int ret = 0; - struct ocfs2_xattr_header *xh = bucket->bu_xh; + struct ocfs2_xattr_header *xh = bucket_xh(bucket); u16 i; struct ocfs2_xattr_entry *xe; -- cgit v1.2.3-70-g09d2 From 6dde41d9e7ba62f84cd7e91c0e993500af32ceb6 Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Fri, 24 Oct 2008 17:16:48 -0700 Subject: ocfs2: Provide a wrapper to brelse() xattr bucket buffers. A common theme is walking all the buffer heads on an ocfs2_xattr_bucket and releasing them. Let's wrap that. Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/xattr.c | 33 ++++++++++++++++++--------------- 1 file changed, 18 insertions(+), 15 deletions(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 1b77302b54f..3478ad177b7 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -157,6 +157,17 @@ static inline u16 ocfs2_xattr_max_xe_in_bucket(struct super_block *sb) #define bucket_block(_b, _n) ((_b)->bu_bhs[(_n)]->b_data) #define bucket_xh(_b) ((struct ocfs2_xattr_header *)bucket_block((_b), 0)) +static void ocfs2_xattr_bucket_relse(struct inode *inode, + struct ocfs2_xattr_bucket *bucket) +{ + int i, blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb); + + for (i = 0; i < blks; i++) { + brelse(bucket->bu_bhs[i]); + bucket->bu_bhs[i] = NULL; + } +} + static inline const char *ocfs2_xattr_prefix(int name_index) { struct xattr_handler *handler = NULL; @@ -820,8 +831,7 @@ static int ocfs2_xattr_block_get(struct inode *inode, } ret = size; cleanup: - for (i = 0; i < OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET; i++) - brelse(xs->bucket.bu_bhs[i]); + ocfs2_xattr_bucket_relse(inode, &xs->bucket); memset(&xs->bucket, 0, sizeof(xs->bucket)); brelse(xs->xattr_bh); @@ -1932,7 +1942,6 @@ int ocfs2_xattr_set(struct inode *inode, struct buffer_head *di_bh = NULL; struct ocfs2_dinode *di; int ret; - u16 i, blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); struct ocfs2_xattr_info xi = { .name_index = name_index, @@ -2034,8 +2043,7 @@ cleanup: ocfs2_inode_unlock(inode, 1); brelse(di_bh); brelse(xbs.xattr_bh); - for (i = 0; i < blk_per_bucket; i++) - brelse(xbs.bucket.bu_bhs[i]); + ocfs2_xattr_bucket_relse(inode, &xbs.bucket); return ret; } @@ -2358,7 +2366,7 @@ static int ocfs2_iterate_xattr_buckets(struct inode *inode, xattr_bucket_func *func, void *para) { - int i, j, ret = 0; + int i, ret = 0; int blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); u32 bpc = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)); u32 num_buckets = clusters * bpc; @@ -2395,14 +2403,12 @@ static int ocfs2_iterate_xattr_buckets(struct inode *inode, } } - for (j = 0; j < blk_per_bucket; j++) - brelse(bucket.bu_bhs[j]); + ocfs2_xattr_bucket_relse(inode, &bucket); memset(&bucket, 0, sizeof(bucket)); } out: - for (j = 0; j < blk_per_bucket; j++) - brelse(bucket.bu_bhs[j]); + ocfs2_xattr_bucket_relse(inode, &bucket); return ret; } @@ -4554,11 +4560,10 @@ static int ocfs2_xattr_set_entry_index_block(struct inode *inode, struct ocfs2_xattr_header *xh; struct ocfs2_xattr_entry *xe; u16 count, header_size, xh_free_start; - int i, free, max_free, need, old; + int free, max_free, need, old; size_t value_size = 0, name_len = strlen(xi->name); size_t blocksize = inode->i_sb->s_blocksize; int ret, allocation = 0; - u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); mlog_entry("Set xattr %s in xattr index block\n", xi->name); @@ -4672,9 +4677,7 @@ try_again: goto out; } - for (i = 0; i < blk_per_bucket; i++) - brelse(xs->bucket.bu_bhs[i]); - + ocfs2_xattr_bucket_relse(inode, &xs->bucket); memset(&xs->bucket, 0, sizeof(xs->bucket)); ret = ocfs2_xattr_index_block_find(inode, xs->xattr_bh, -- cgit v1.2.3-70-g09d2 From 784b816a9198dc3782c97cde8ddcf52fecdf1797 Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Fri, 24 Oct 2008 17:33:40 -0700 Subject: ocfs2: Improve ocfs2_read_xattr_bucket(). The ocfs2_read_xattr_bucket() function would read an xattr bucket into a list of buffer heads. However, we have a nice ocfs2_xattr_bucket structure. Let's have it fill that out instead. In addition, ocfs2_read_xattr_bucket() would initialize buffer heads for a bucket that's never been on disk before. That's confusing. Let's call that functionality ocfs2_init_xattr_bucket(). The functions ocfs2_cp_xattr_bucket() and ocfs2_half_xattr_bucket() are updated to use the ocfs2_xattr_bucket structure rather than raw bh lists. That way they can use the new read/init calls. In addition, they drop the wasted read of an existing target bucket. Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/xattr.c | 165 ++++++++++++++++++++++++++----------------------------- 1 file changed, 79 insertions(+), 86 deletions(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 3478ad177b7..fa13fa48878 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -168,6 +168,48 @@ static void ocfs2_xattr_bucket_relse(struct inode *inode, } } +/* + * A bucket that has never been written to disk doesn't need to be + * read. We just need the buffer_heads. Don't call this for + * buckets that are already on disk. ocfs2_read_xattr_bucket() initializes + * them fully. + */ +static int ocfs2_init_xattr_bucket(struct inode *inode, + struct ocfs2_xattr_bucket *bucket, + u64 xb_blkno) +{ + int i, rc = 0; + int blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb); + + for (i = 0; i < blks; i++) { + bucket->bu_bhs[i] = sb_getblk(inode->i_sb, xb_blkno + i); + if (!bucket->bu_bhs[i]) { + rc = -EIO; + mlog_errno(rc); + break; + } + + ocfs2_set_new_buffer_uptodate(inode, bucket->bu_bhs[i]); + } + + if (rc) + ocfs2_xattr_bucket_relse(inode, bucket); + return rc; +} + +/* Read the xattr bucket at xb_blkno */ +static int ocfs2_read_xattr_bucket(struct inode *inode, + struct ocfs2_xattr_bucket *bucket, + u64 xb_blkno) +{ + int rc, blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb); + + rc = ocfs2_read_blocks(inode, xb_blkno, blks, bucket->bu_bhs, 0); + if (rc) + ocfs2_xattr_bucket_relse(inode, bucket); + return rc; +} + static inline const char *ocfs2_xattr_prefix(int name_index) { struct xattr_handler *handler = NULL; @@ -3097,31 +3139,6 @@ out: return ret; } -static int ocfs2_read_xattr_bucket(struct inode *inode, - u64 blkno, - struct buffer_head **bhs, - int new) -{ - int ret = 0; - u16 i, blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); - - if (!new) - return ocfs2_read_blocks(inode, blkno, - blk_per_bucket, bhs, 0); - - for (i = 0; i < blk_per_bucket; i++) { - bhs[i] = sb_getblk(inode->i_sb, blkno + i); - if (bhs[i] == NULL) { - ret = -EIO; - mlog_errno(ret); - break; - } - ocfs2_set_new_buffer_uptodate(inode, bhs[i]); - } - - return ret; -} - /* * Find the suitable pos when we divide a bucket into 2. * We have to make sure the xattrs with the same hash value exist @@ -3184,7 +3201,7 @@ static int ocfs2_divide_xattr_bucket(struct inode *inode, int ret, i; int count, start, len, name_value_len = 0, xe_len, name_offset = 0; u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); - struct buffer_head **s_bhs, **t_bhs = NULL; + struct ocfs2_xattr_bucket s_bucket, t_bucket; struct ocfs2_xattr_header *xh; struct ocfs2_xattr_entry *xe; int blocksize = inode->i_sb->s_blocksize; @@ -3192,37 +3209,34 @@ static int ocfs2_divide_xattr_bucket(struct inode *inode, mlog(0, "move some of xattrs from bucket %llu to %llu\n", (unsigned long long)blk, (unsigned long long)new_blk); - s_bhs = kcalloc(blk_per_bucket, sizeof(struct buffer_head *), GFP_NOFS); - if (!s_bhs) - return -ENOMEM; + memset(&s_bucket, 0, sizeof(struct ocfs2_xattr_bucket)); + memset(&t_bucket, 0, sizeof(struct ocfs2_xattr_bucket)); - ret = ocfs2_read_xattr_bucket(inode, blk, s_bhs, 0); + ret = ocfs2_read_xattr_bucket(inode, &s_bucket, blk); if (ret) { mlog_errno(ret); goto out; } - ret = ocfs2_journal_access(handle, inode, s_bhs[0], + ret = ocfs2_journal_access(handle, inode, s_bucket.bu_bhs[0], OCFS2_JOURNAL_ACCESS_WRITE); if (ret) { mlog_errno(ret); goto out; } - t_bhs = kcalloc(blk_per_bucket, sizeof(struct buffer_head *), GFP_NOFS); - if (!t_bhs) { - ret = -ENOMEM; - goto out; - } - - ret = ocfs2_read_xattr_bucket(inode, new_blk, t_bhs, new_bucket_head); + /* + * Even if !new_bucket_head, we're overwriting t_bucket. Thus, + * there's no need to read it. + */ + ret = ocfs2_init_xattr_bucket(inode, &t_bucket, new_blk); if (ret) { mlog_errno(ret); goto out; } for (i = 0; i < blk_per_bucket; i++) { - ret = ocfs2_journal_access(handle, inode, t_bhs[i], + ret = ocfs2_journal_access(handle, inode, t_bucket.bu_bhs[i], new_bucket_head ? OCFS2_JOURNAL_ACCESS_CREATE : OCFS2_JOURNAL_ACCESS_WRITE); @@ -3232,7 +3246,7 @@ static int ocfs2_divide_xattr_bucket(struct inode *inode, } } - xh = (struct ocfs2_xattr_header *)s_bhs[0]->b_data; + xh = bucket_xh(&s_bucket); count = le16_to_cpu(xh->xh_count); start = ocfs2_xattr_find_divide_pos(xh); @@ -3245,9 +3259,9 @@ static int ocfs2_divide_xattr_bucket(struct inode *inode, * that of the last entry in the previous bucket. */ for (i = 0; i < blk_per_bucket; i++) - memset(t_bhs[i]->b_data, 0, blocksize); + memset(bucket_block(&t_bucket, i), 0, blocksize); - xh = (struct ocfs2_xattr_header *)t_bhs[0]->b_data; + xh = bucket_xh(&t_bucket); xh->xh_free_start = cpu_to_le16(blocksize); xh->xh_entries[0].xe_name_hash = xe->xe_name_hash; le32_add_cpu(&xh->xh_entries[0].xe_name_hash, 1); @@ -3257,10 +3271,11 @@ static int ocfs2_divide_xattr_bucket(struct inode *inode, /* copy the whole bucket to the new first. */ for (i = 0; i < blk_per_bucket; i++) - memcpy(t_bhs[i]->b_data, s_bhs[i]->b_data, blocksize); + memcpy(bucket_block(&t_bucket, i), bucket_block(&s_bucket, i), + blocksize); /* update the new bucket. */ - xh = (struct ocfs2_xattr_header *)t_bhs[0]->b_data; + xh = bucket_xh(&t_bucket); /* * Calculate the total name/value len and xh_free_start for @@ -3325,7 +3340,7 @@ set_num_buckets: xh->xh_num_buckets = 0; for (i = 0; i < blk_per_bucket; i++) { - ocfs2_journal_dirty(handle, t_bhs[i]); + ocfs2_journal_dirty(handle, t_bucket.bu_bhs[i]); if (ret) mlog_errno(ret); } @@ -3342,29 +3357,20 @@ set_num_buckets: if (start == count) goto out; - xh = (struct ocfs2_xattr_header *)s_bhs[0]->b_data; + xh = bucket_xh(&s_bucket); memset(&xh->xh_entries[start], 0, sizeof(struct ocfs2_xattr_entry) * (count - start)); xh->xh_count = cpu_to_le16(start); xh->xh_free_start = cpu_to_le16(name_offset); xh->xh_name_value_len = cpu_to_le16(name_value_len); - ocfs2_journal_dirty(handle, s_bhs[0]); + ocfs2_journal_dirty(handle, s_bucket.bu_bhs[0]); if (ret) mlog_errno(ret); out: - if (s_bhs) { - for (i = 0; i < blk_per_bucket; i++) - brelse(s_bhs[i]); - } - kfree(s_bhs); - - if (t_bhs) { - for (i = 0; i < blk_per_bucket; i++) - brelse(t_bhs[i]); - } - kfree(t_bhs); + ocfs2_xattr_bucket_relse(inode, &s_bucket); + ocfs2_xattr_bucket_relse(inode, &t_bucket); return ret; } @@ -3384,7 +3390,7 @@ static int ocfs2_cp_xattr_bucket(struct inode *inode, int ret, i; int blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); int blocksize = inode->i_sb->s_blocksize; - struct buffer_head **s_bhs, **t_bhs = NULL; + struct ocfs2_xattr_bucket s_bucket, t_bucket; BUG_ON(s_blkno == t_blkno); @@ -3392,28 +3398,23 @@ static int ocfs2_cp_xattr_bucket(struct inode *inode, (unsigned long long)s_blkno, (unsigned long long)t_blkno, t_is_new); - s_bhs = kzalloc(sizeof(struct buffer_head *) * blk_per_bucket, - GFP_NOFS); - if (!s_bhs) - return -ENOMEM; + memset(&s_bucket, 0, sizeof(struct ocfs2_xattr_bucket)); + memset(&t_bucket, 0, sizeof(struct ocfs2_xattr_bucket)); - ret = ocfs2_read_xattr_bucket(inode, s_blkno, s_bhs, 0); + ret = ocfs2_read_xattr_bucket(inode, &s_bucket, s_blkno); if (ret) goto out; - t_bhs = kzalloc(sizeof(struct buffer_head *) * blk_per_bucket, - GFP_NOFS); - if (!t_bhs) { - ret = -ENOMEM; - goto out; - } - - ret = ocfs2_read_xattr_bucket(inode, t_blkno, t_bhs, t_is_new); + /* + * Even if !t_is_new, we're overwriting t_bucket. Thus, + * there's no need to read it. + */ + ret = ocfs2_init_xattr_bucket(inode, &t_bucket, t_blkno); if (ret) goto out; for (i = 0; i < blk_per_bucket; i++) { - ret = ocfs2_journal_access(handle, inode, t_bhs[i], + ret = ocfs2_journal_access(handle, inode, t_bucket.bu_bhs[i], t_is_new ? OCFS2_JOURNAL_ACCESS_CREATE : OCFS2_JOURNAL_ACCESS_WRITE); @@ -3422,22 +3423,14 @@ static int ocfs2_cp_xattr_bucket(struct inode *inode, } for (i = 0; i < blk_per_bucket; i++) { - memcpy(t_bhs[i]->b_data, s_bhs[i]->b_data, blocksize); - ocfs2_journal_dirty(handle, t_bhs[i]); + memcpy(bucket_block(&t_bucket, i), bucket_block(&s_bucket, i), + blocksize); + ocfs2_journal_dirty(handle, t_bucket.bu_bhs[i]); } out: - if (s_bhs) { - for (i = 0; i < blk_per_bucket; i++) - brelse(s_bhs[i]); - } - kfree(s_bhs); - - if (t_bhs) { - for (i = 0; i < blk_per_bucket; i++) - brelse(t_bhs[i]); - } - kfree(t_bhs); + ocfs2_xattr_bucket_relse(inode, &s_bucket); + ocfs2_xattr_bucket_relse(inode, &t_bucket); return ret; } -- cgit v1.2.3-70-g09d2 From 1224be020f62ada3e19822feeac3840abf80de3e Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Fri, 24 Oct 2008 18:47:33 -0700 Subject: ocfs2: Wrap journal_access/journal_dirty for xattr buckets. A common action is to call ocfs2_journal_access() and ocfs2_journal_dirty() on the buffer heads of an xattr bucket. Let's create nice wrappers. While we're there, let's drop the places that try to be smart by writing only the first and last blocks of a bucket. A bucket is contiguous, so writing the whole thing is actually more efficient. Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/xattr.c | 140 +++++++++++++++++++++++++------------------------------ 1 file changed, 64 insertions(+), 76 deletions(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index fa13fa48878..99aefe4ea75 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -210,6 +210,37 @@ static int ocfs2_read_xattr_bucket(struct inode *inode, return rc; } +static int ocfs2_xattr_bucket_journal_access(handle_t *handle, + struct inode *inode, + struct ocfs2_xattr_bucket *bucket, + int type) +{ + int i, rc = 0; + int blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb); + + for (i = 0; i < blks; i++) { + rc = ocfs2_journal_access(handle, inode, + bucket->bu_bhs[i], type); + if (rc) { + mlog_errno(rc); + break; + } + } + + return rc; +} + +static void ocfs2_xattr_bucket_journal_dirty(handle_t *handle, + struct inode *inode, + struct ocfs2_xattr_bucket *bucket) +{ + int i, blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb); + + for (i = 0; i < blks; i++) + ocfs2_journal_dirty(handle, bucket->bu_bhs[i]); +} + + static inline const char *ocfs2_xattr_prefix(int name_index) { struct xattr_handler *handler = NULL; @@ -3218,8 +3249,8 @@ static int ocfs2_divide_xattr_bucket(struct inode *inode, goto out; } - ret = ocfs2_journal_access(handle, inode, s_bucket.bu_bhs[0], - OCFS2_JOURNAL_ACCESS_WRITE); + ret = ocfs2_xattr_bucket_journal_access(handle, inode, &s_bucket, + OCFS2_JOURNAL_ACCESS_WRITE); if (ret) { mlog_errno(ret); goto out; @@ -3235,15 +3266,13 @@ static int ocfs2_divide_xattr_bucket(struct inode *inode, goto out; } - for (i = 0; i < blk_per_bucket; i++) { - ret = ocfs2_journal_access(handle, inode, t_bucket.bu_bhs[i], - new_bucket_head ? - OCFS2_JOURNAL_ACCESS_CREATE : - OCFS2_JOURNAL_ACCESS_WRITE); - if (ret) { - mlog_errno(ret); - goto out; - } + ret = ocfs2_xattr_bucket_journal_access(handle, inode, &t_bucket, + new_bucket_head ? + OCFS2_JOURNAL_ACCESS_CREATE : + OCFS2_JOURNAL_ACCESS_WRITE); + if (ret) { + mlog_errno(ret); + goto out; } xh = bucket_xh(&s_bucket); @@ -3339,11 +3368,7 @@ set_num_buckets: else xh->xh_num_buckets = 0; - for (i = 0; i < blk_per_bucket; i++) { - ocfs2_journal_dirty(handle, t_bucket.bu_bhs[i]); - if (ret) - mlog_errno(ret); - } + ocfs2_xattr_bucket_journal_dirty(handle, inode, &t_bucket); /* store the first_hash of the new bucket. */ if (first_hash) @@ -3364,9 +3389,7 @@ set_num_buckets: xh->xh_free_start = cpu_to_le16(name_offset); xh->xh_name_value_len = cpu_to_le16(name_value_len); - ocfs2_journal_dirty(handle, s_bucket.bu_bhs[0]); - if (ret) - mlog_errno(ret); + ocfs2_xattr_bucket_journal_dirty(handle, inode, &s_bucket); out: ocfs2_xattr_bucket_relse(inode, &s_bucket); @@ -3413,20 +3436,18 @@ static int ocfs2_cp_xattr_bucket(struct inode *inode, if (ret) goto out; - for (i = 0; i < blk_per_bucket; i++) { - ret = ocfs2_journal_access(handle, inode, t_bucket.bu_bhs[i], - t_is_new ? - OCFS2_JOURNAL_ACCESS_CREATE : - OCFS2_JOURNAL_ACCESS_WRITE); - if (ret) - goto out; - } + ret = ocfs2_xattr_bucket_journal_access(handle, inode, &t_bucket, + t_is_new ? + OCFS2_JOURNAL_ACCESS_CREATE : + OCFS2_JOURNAL_ACCESS_WRITE); + if (ret) + goto out; for (i = 0; i < blk_per_bucket; i++) { memcpy(bucket_block(&t_bucket, i), bucket_block(&s_bucket, i), blocksize); - ocfs2_journal_dirty(handle, t_bucket.bu_bhs[i]); } + ocfs2_xattr_bucket_journal_dirty(handle, inode, &t_bucket); out: ocfs2_xattr_bucket_relse(inode, &s_bucket); @@ -3799,9 +3820,9 @@ static int ocfs2_extend_xattr_bucket(struct inode *inode, /* * We will touch all the buckets after the start_bh(include it). - * Add one more bucket and modify the first_bh. + * Then we add one more bucket. */ - credits = end_blk - start_blk + 2 * blk_per_bucket + 1; + credits = end_blk - start_blk + 3 * blk_per_bucket + 1; handle = ocfs2_start_trans(osb, credits); if (IS_ERR(handle)) { ret = PTR_ERR(handle); @@ -4077,33 +4098,6 @@ set_new_name_value: return; } -static int ocfs2_xattr_bucket_handle_journal(struct inode *inode, - handle_t *handle, - struct ocfs2_xattr_search *xs, - struct buffer_head **bhs, - u16 bh_num) -{ - int ret = 0, off, block_off; - struct ocfs2_xattr_entry *xe = xs->here; - - /* - * First calculate all the blocks we should journal_access - * and journal_dirty. The first block should always be touched. - */ - ret = ocfs2_journal_dirty(handle, bhs[0]); - if (ret) - mlog_errno(ret); - - /* calc the data. */ - off = le16_to_cpu(xe->xe_name_offset); - block_off = off >> inode->i_sb->s_blocksize_bits; - ret = ocfs2_journal_dirty(handle, bhs[block_off]); - if (ret) - mlog_errno(ret); - - return ret; -} - /* * Set the xattr entry in the specified bucket. * The bucket is indicated by xs->bucket and it should have the enough @@ -4115,7 +4109,7 @@ static int ocfs2_xattr_set_entry_in_bucket(struct inode *inode, u32 name_hash, int local) { - int i, ret; + int ret; handle_t *handle = NULL; u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); @@ -4143,22 +4137,16 @@ static int ocfs2_xattr_set_entry_in_bucket(struct inode *inode, goto out; } - for (i = 0; i < blk_per_bucket; i++) { - ret = ocfs2_journal_access(handle, inode, xs->bucket.bu_bhs[i], - OCFS2_JOURNAL_ACCESS_WRITE); - if (ret < 0) { - mlog_errno(ret); - goto out; - } + ret = ocfs2_xattr_bucket_journal_access(handle, inode, &xs->bucket, + OCFS2_JOURNAL_ACCESS_WRITE); + if (ret < 0) { + mlog_errno(ret); + goto out; } ocfs2_xattr_set_entry_normal(inode, xi, xs, name_hash, local); + ocfs2_xattr_bucket_journal_dirty(handle, inode, &xs->bucket); - /*Only dirty the blocks we have touched in set xattr. */ - ret = ocfs2_xattr_bucket_handle_journal(inode, handle, xs, - xs->bucket.bu_bhs, blk_per_bucket); - if (ret) - mlog_errno(ret); out: ocfs2_commit_trans(osb, handle); @@ -4398,15 +4386,16 @@ static void ocfs2_xattr_bucket_remove_xs(struct inode *inode, le16_to_cpu(xh->xh_count) - 1]; int ret = 0; - handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)), 1); + handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)), + ocfs2_blocks_per_xattr_bucket(inode->i_sb)); if (IS_ERR(handle)) { ret = PTR_ERR(handle); mlog_errno(ret); return; } - ret = ocfs2_journal_access(handle, inode, xs->bucket.bu_bhs[0], - OCFS2_JOURNAL_ACCESS_WRITE); + ret = ocfs2_xattr_bucket_journal_access(handle, inode, &xs->bucket, + OCFS2_JOURNAL_ACCESS_WRITE); if (ret) { mlog_errno(ret); goto out_commit; @@ -4418,9 +4407,8 @@ static void ocfs2_xattr_bucket_remove_xs(struct inode *inode, memset(last, 0, sizeof(struct ocfs2_xattr_entry)); le16_add_cpu(&xh->xh_count, -1); - ret = ocfs2_journal_dirty(handle, xs->bucket.bu_bhs[0]); - if (ret < 0) - mlog_errno(ret); + ocfs2_xattr_bucket_journal_dirty(handle, inode, &xs->bucket); + out_commit: ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); } -- cgit v1.2.3-70-g09d2 From 4980c6daba967124ed6420032960abd2b48412e2 Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Fri, 24 Oct 2008 18:54:43 -0700 Subject: ocfs2: Copy xattr buckets with a dedicated function. Now that the places that copy whole buckets are using struct ocfs2_xattr_bucket, we can do the copy in a dedicated function. Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/xattr.c | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 99aefe4ea75..71d9e7bdd30 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -240,6 +240,19 @@ static void ocfs2_xattr_bucket_journal_dirty(handle_t *handle, ocfs2_journal_dirty(handle, bucket->bu_bhs[i]); } +static void ocfs2_xattr_bucket_copy_data(struct inode *inode, + struct ocfs2_xattr_bucket *dest, + struct ocfs2_xattr_bucket *src) +{ + int i; + int blocksize = inode->i_sb->s_blocksize; + int blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb); + + for (i = 0; i < blks; i++) { + memcpy(bucket_block(dest, i), bucket_block(src, i), + blocksize); + } +} static inline const char *ocfs2_xattr_prefix(int name_index) { @@ -3299,9 +3312,7 @@ static int ocfs2_divide_xattr_bucket(struct inode *inode, } /* copy the whole bucket to the new first. */ - for (i = 0; i < blk_per_bucket; i++) - memcpy(bucket_block(&t_bucket, i), bucket_block(&s_bucket, i), - blocksize); + ocfs2_xattr_bucket_copy_data(inode, &t_bucket, &s_bucket); /* update the new bucket. */ xh = bucket_xh(&t_bucket); @@ -3410,9 +3421,7 @@ static int ocfs2_cp_xattr_bucket(struct inode *inode, u64 t_blkno, int t_is_new) { - int ret, i; - int blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); - int blocksize = inode->i_sb->s_blocksize; + int ret; struct ocfs2_xattr_bucket s_bucket, t_bucket; BUG_ON(s_blkno == t_blkno); @@ -3443,10 +3452,7 @@ static int ocfs2_cp_xattr_bucket(struct inode *inode, if (ret) goto out; - for (i = 0; i < blk_per_bucket; i++) { - memcpy(bucket_block(&t_bucket, i), bucket_block(&s_bucket, i), - blocksize); - } + ocfs2_xattr_bucket_copy_data(inode, &t_bucket, &s_bucket); ocfs2_xattr_bucket_journal_dirty(handle, inode, &t_bucket); out: -- cgit v1.2.3-70-g09d2 From ba937127596ec2c61437006741f7d29999284de4 Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Fri, 24 Oct 2008 19:13:20 -0700 Subject: ocfs2: Take ocfs2_xattr_bucket structures off of the stack. The ocfs2_xattr_bucket structure is a nice abstraction, but it is a bit large to have on the stack. Just like ocfs2_path, let's allocate it with a ocfs2_xattr_bucket_new() function. We can now store the inode on the bucket, cleaning up all the other bucket functions. While we're here, we catch another place or two that wasn't using ocfs2_read_xattr_bucket(). Updates: - No longer allocating xis.bucket, as it will never be used. Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/xattr.c | 281 ++++++++++++++++++++++++++++++++----------------------- 1 file changed, 166 insertions(+), 115 deletions(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 71d9e7bdd30..766494ed6e1 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -61,7 +61,14 @@ struct ocfs2_xattr_def_value_root { }; struct ocfs2_xattr_bucket { + /* The inode these xattrs are associated with */ + struct inode *bu_inode; + + /* The actual buffers that make up the bucket */ struct buffer_head *bu_bhs[OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET]; + + /* How many blocks make up one bucket for this filesystem */ + int bu_blocks; }; #define OCFS2_XATTR_ROOT_SIZE (sizeof(struct ocfs2_xattr_def_value_root)) @@ -97,7 +104,7 @@ struct ocfs2_xattr_search { */ struct buffer_head *xattr_bh; struct ocfs2_xattr_header *header; - struct ocfs2_xattr_bucket bucket; + struct ocfs2_xattr_bucket *bucket; void *base; void *end; struct ocfs2_xattr_entry *here; @@ -157,69 +164,91 @@ static inline u16 ocfs2_xattr_max_xe_in_bucket(struct super_block *sb) #define bucket_block(_b, _n) ((_b)->bu_bhs[(_n)]->b_data) #define bucket_xh(_b) ((struct ocfs2_xattr_header *)bucket_block((_b), 0)) -static void ocfs2_xattr_bucket_relse(struct inode *inode, - struct ocfs2_xattr_bucket *bucket) +static struct ocfs2_xattr_bucket *ocfs2_xattr_bucket_new(struct inode *inode) { - int i, blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb); + struct ocfs2_xattr_bucket *bucket; + int blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb); - for (i = 0; i < blks; i++) { + BUG_ON(blks > OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET); + + bucket = kzalloc(sizeof(struct ocfs2_xattr_bucket), GFP_NOFS); + if (bucket) { + bucket->bu_inode = inode; + bucket->bu_blocks = blks; + } + + return bucket; +} + +static void ocfs2_xattr_bucket_relse(struct ocfs2_xattr_bucket *bucket) +{ + int i; + + for (i = 0; i < bucket->bu_blocks; i++) { brelse(bucket->bu_bhs[i]); bucket->bu_bhs[i] = NULL; } } +static void ocfs2_xattr_bucket_free(struct ocfs2_xattr_bucket *bucket) +{ + if (bucket) { + ocfs2_xattr_bucket_relse(bucket); + bucket->bu_inode = NULL; + kfree(bucket); + } +} + /* * A bucket that has never been written to disk doesn't need to be * read. We just need the buffer_heads. Don't call this for * buckets that are already on disk. ocfs2_read_xattr_bucket() initializes * them fully. */ -static int ocfs2_init_xattr_bucket(struct inode *inode, - struct ocfs2_xattr_bucket *bucket, +static int ocfs2_init_xattr_bucket(struct ocfs2_xattr_bucket *bucket, u64 xb_blkno) { int i, rc = 0; - int blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb); - for (i = 0; i < blks; i++) { - bucket->bu_bhs[i] = sb_getblk(inode->i_sb, xb_blkno + i); + for (i = 0; i < bucket->bu_blocks; i++) { + bucket->bu_bhs[i] = sb_getblk(bucket->bu_inode->i_sb, + xb_blkno + i); if (!bucket->bu_bhs[i]) { rc = -EIO; mlog_errno(rc); break; } - ocfs2_set_new_buffer_uptodate(inode, bucket->bu_bhs[i]); + ocfs2_set_new_buffer_uptodate(bucket->bu_inode, + bucket->bu_bhs[i]); } if (rc) - ocfs2_xattr_bucket_relse(inode, bucket); + ocfs2_xattr_bucket_relse(bucket); return rc; } /* Read the xattr bucket at xb_blkno */ -static int ocfs2_read_xattr_bucket(struct inode *inode, - struct ocfs2_xattr_bucket *bucket, +static int ocfs2_read_xattr_bucket(struct ocfs2_xattr_bucket *bucket, u64 xb_blkno) { - int rc, blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb); + int rc; - rc = ocfs2_read_blocks(inode, xb_blkno, blks, bucket->bu_bhs, 0); + rc = ocfs2_read_blocks(bucket->bu_inode, xb_blkno, + bucket->bu_blocks, bucket->bu_bhs, 0); if (rc) - ocfs2_xattr_bucket_relse(inode, bucket); + ocfs2_xattr_bucket_relse(bucket); return rc; } static int ocfs2_xattr_bucket_journal_access(handle_t *handle, - struct inode *inode, struct ocfs2_xattr_bucket *bucket, int type) { int i, rc = 0; - int blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb); - for (i = 0; i < blks; i++) { - rc = ocfs2_journal_access(handle, inode, + for (i = 0; i < bucket->bu_blocks; i++) { + rc = ocfs2_journal_access(handle, bucket->bu_inode, bucket->bu_bhs[i], type); if (rc) { mlog_errno(rc); @@ -231,24 +260,24 @@ static int ocfs2_xattr_bucket_journal_access(handle_t *handle, } static void ocfs2_xattr_bucket_journal_dirty(handle_t *handle, - struct inode *inode, struct ocfs2_xattr_bucket *bucket) { - int i, blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb); + int i; - for (i = 0; i < blks; i++) + for (i = 0; i < bucket->bu_blocks; i++) ocfs2_journal_dirty(handle, bucket->bu_bhs[i]); } -static void ocfs2_xattr_bucket_copy_data(struct inode *inode, - struct ocfs2_xattr_bucket *dest, +static void ocfs2_xattr_bucket_copy_data(struct ocfs2_xattr_bucket *dest, struct ocfs2_xattr_bucket *src) { int i; - int blocksize = inode->i_sb->s_blocksize; - int blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb); + int blocksize = src->bu_inode->i_sb->s_blocksize; + + BUG_ON(dest->bu_blocks != src->bu_blocks); + BUG_ON(dest->bu_inode != src->bu_inode); - for (i = 0; i < blks; i++) { + for (i = 0; i < src->bu_blocks; i++) { memcpy(bucket_block(dest, i), bucket_block(src, i), blocksize); } @@ -869,7 +898,12 @@ static int ocfs2_xattr_block_get(struct inode *inode, size_t size; int ret = -ENODATA, name_offset, name_len, block_off, i; - memset(&xs->bucket, 0, sizeof(xs->bucket)); + xs->bucket = ocfs2_xattr_bucket_new(inode); + if (!xs->bucket) { + ret = -ENOMEM; + mlog_errno(ret); + goto cleanup; + } ret = ocfs2_xattr_block_find(inode, name_index, name, xs); if (ret) { @@ -895,11 +929,11 @@ static int ocfs2_xattr_block_get(struct inode *inode, if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) { ret = ocfs2_xattr_bucket_get_name_value(inode, - bucket_xh(&xs->bucket), + bucket_xh(xs->bucket), i, &block_off, &name_offset); - xs->base = bucket_block(&xs->bucket, block_off); + xs->base = bucket_block(xs->bucket, block_off); } if (ocfs2_xattr_is_local(xs->here)) { memcpy(buffer, (void *)xs->base + @@ -917,8 +951,7 @@ static int ocfs2_xattr_block_get(struct inode *inode, } ret = size; cleanup: - ocfs2_xattr_bucket_relse(inode, &xs->bucket); - memset(&xs->bucket, 0, sizeof(xs->bucket)); + ocfs2_xattr_bucket_free(xs->bucket); brelse(xs->xattr_bh); xs->xattr_bh = NULL; @@ -2047,10 +2080,20 @@ int ocfs2_xattr_set(struct inode *inode, if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb))) return -EOPNOTSUPP; + /* + * Only xbs will be used on indexed trees. xis doesn't need a + * bucket. + */ + xbs.bucket = ocfs2_xattr_bucket_new(inode); + if (!xbs.bucket) { + mlog_errno(-ENOMEM); + return -ENOMEM; + } + ret = ocfs2_inode_lock(inode, &di_bh, 1); if (ret < 0) { mlog_errno(ret); - return ret; + goto cleanup_nolock; } xis.inode_bh = xbs.inode_bh = di_bh; di = (struct ocfs2_dinode *)di_bh->b_data; @@ -2127,9 +2170,10 @@ int ocfs2_xattr_set(struct inode *inode, cleanup: up_write(&OCFS2_I(inode)->ip_xattr_sem); ocfs2_inode_unlock(inode, 1); +cleanup_nolock: brelse(di_bh); brelse(xbs.xattr_bh); - ocfs2_xattr_bucket_relse(inode, &xbs.bucket); + ocfs2_xattr_bucket_free(xbs.bucket); return ret; } @@ -2373,11 +2417,11 @@ static int ocfs2_xattr_bucket_find(struct inode *inode, lower_bh = bh; bh = NULL; } - xs->bucket.bu_bhs[0] = lower_bh; + xs->bucket->bu_bhs[0] = lower_bh; lower_bh = NULL; - xs->header = bucket_xh(&xs->bucket); - xs->base = bucket_block(&xs->bucket, 0); + xs->header = bucket_xh(xs->bucket); + xs->base = bucket_block(xs->bucket, 0); xs->end = xs->base + inode->i_sb->s_blocksize; if (found) { @@ -2385,8 +2429,8 @@ static int ocfs2_xattr_bucket_find(struct inode *inode, * If we have found the xattr enty, read all the blocks in * this bucket. */ - ret = ocfs2_read_blocks(inode, bucket_blkno(&xs->bucket) + 1, - blk_per_bucket - 1, &xs->bucket.bu_bhs[1], + ret = ocfs2_read_blocks(inode, bucket_blkno(xs->bucket) + 1, + blk_per_bucket - 1, &xs->bucket->bu_bhs[1], 0); if (ret) { mlog_errno(ret); @@ -2395,7 +2439,7 @@ static int ocfs2_xattr_bucket_find(struct inode *inode, xs->here = &xs->header->xh_entries[index]; mlog(0, "find xattr %s in bucket %llu, entry = %u\n", name, - (unsigned long long)bucket_blkno(&xs->bucket), index); + (unsigned long long)bucket_blkno(xs->bucket), index); } else ret = -ENODATA; @@ -2453,22 +2497,24 @@ static int ocfs2_iterate_xattr_buckets(struct inode *inode, void *para) { int i, ret = 0; - int blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); u32 bpc = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)); u32 num_buckets = clusters * bpc; - struct ocfs2_xattr_bucket bucket; + struct ocfs2_xattr_bucket *bucket; - memset(&bucket, 0, sizeof(bucket)); + bucket = ocfs2_xattr_bucket_new(inode); + if (!bucket) { + mlog_errno(-ENOMEM); + return -ENOMEM; + } mlog(0, "iterating xattr buckets in %u clusters starting from %llu\n", clusters, (unsigned long long)blkno); - for (i = 0; i < num_buckets; i++, blkno += blk_per_bucket) { - ret = ocfs2_read_blocks(inode, blkno, blk_per_bucket, - bucket.bu_bhs, 0); + for (i = 0; i < num_buckets; i++, blkno += bucket->bu_blocks) { + ret = ocfs2_read_xattr_bucket(bucket, blkno); if (ret) { mlog_errno(ret); - goto out; + break; } /* @@ -2476,26 +2522,24 @@ static int ocfs2_iterate_xattr_buckets(struct inode *inode, * in the 1st bucket. */ if (i == 0) - num_buckets = le16_to_cpu(bucket_xh(&bucket)->xh_num_buckets); + num_buckets = le16_to_cpu(bucket_xh(bucket)->xh_num_buckets); mlog(0, "iterating xattr bucket %llu, first hash %u\n", (unsigned long long)blkno, - le32_to_cpu(bucket_xh(&bucket)->xh_entries[0].xe_name_hash)); + le32_to_cpu(bucket_xh(bucket)->xh_entries[0].xe_name_hash)); if (func) { - ret = func(inode, &bucket, para); - if (ret) { + ret = func(inode, bucket, para); + if (ret) mlog_errno(ret); - break; - } + /* Fall through to bucket_relse() */ } - ocfs2_xattr_bucket_relse(inode, &bucket); - memset(&bucket, 0, sizeof(bucket)); + ocfs2_xattr_bucket_relse(bucket); + if (ret) + break; } -out: - ocfs2_xattr_bucket_relse(inode, &bucket); - + ocfs2_xattr_bucket_free(bucket); return ret; } @@ -2718,9 +2762,9 @@ static int ocfs2_xattr_update_xattr_search(struct inode *inode, int i, blocksize = inode->i_sb->s_blocksize; u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); - xs->bucket.bu_bhs[0] = new_bh; + xs->bucket->bu_bhs[0] = new_bh; get_bh(new_bh); - xs->header = bucket_xh(&xs->bucket); + xs->header = bucket_xh(xs->bucket); xs->base = new_bh->b_data; xs->end = xs->base + inode->i_sb->s_blocksize; @@ -2728,8 +2772,8 @@ static int ocfs2_xattr_update_xattr_search(struct inode *inode, if (!xs->not_found) { if (OCFS2_XATTR_BUCKET_SIZE != blocksize) { ret = ocfs2_read_blocks(inode, - bucket_blkno(&xs->bucket) + 1, - blk_per_bucket - 1, &xs->bucket.bu_bhs[1], + bucket_blkno(xs->bucket) + 1, + blk_per_bucket - 1, &xs->bucket->bu_bhs[1], 0); if (ret) { mlog_errno(ret); @@ -3244,8 +3288,7 @@ static int ocfs2_divide_xattr_bucket(struct inode *inode, { int ret, i; int count, start, len, name_value_len = 0, xe_len, name_offset = 0; - u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); - struct ocfs2_xattr_bucket s_bucket, t_bucket; + struct ocfs2_xattr_bucket *s_bucket = NULL, *t_bucket = NULL; struct ocfs2_xattr_header *xh; struct ocfs2_xattr_entry *xe; int blocksize = inode->i_sb->s_blocksize; @@ -3253,16 +3296,21 @@ static int ocfs2_divide_xattr_bucket(struct inode *inode, mlog(0, "move some of xattrs from bucket %llu to %llu\n", (unsigned long long)blk, (unsigned long long)new_blk); - memset(&s_bucket, 0, sizeof(struct ocfs2_xattr_bucket)); - memset(&t_bucket, 0, sizeof(struct ocfs2_xattr_bucket)); + s_bucket = ocfs2_xattr_bucket_new(inode); + t_bucket = ocfs2_xattr_bucket_new(inode); + if (!s_bucket || !t_bucket) { + ret = -ENOMEM; + mlog_errno(ret); + goto out; + } - ret = ocfs2_read_xattr_bucket(inode, &s_bucket, blk); + ret = ocfs2_read_xattr_bucket(s_bucket, blk); if (ret) { mlog_errno(ret); goto out; } - ret = ocfs2_xattr_bucket_journal_access(handle, inode, &s_bucket, + ret = ocfs2_xattr_bucket_journal_access(handle, s_bucket, OCFS2_JOURNAL_ACCESS_WRITE); if (ret) { mlog_errno(ret); @@ -3273,13 +3321,13 @@ static int ocfs2_divide_xattr_bucket(struct inode *inode, * Even if !new_bucket_head, we're overwriting t_bucket. Thus, * there's no need to read it. */ - ret = ocfs2_init_xattr_bucket(inode, &t_bucket, new_blk); + ret = ocfs2_init_xattr_bucket(t_bucket, new_blk); if (ret) { mlog_errno(ret); goto out; } - ret = ocfs2_xattr_bucket_journal_access(handle, inode, &t_bucket, + ret = ocfs2_xattr_bucket_journal_access(handle, t_bucket, new_bucket_head ? OCFS2_JOURNAL_ACCESS_CREATE : OCFS2_JOURNAL_ACCESS_WRITE); @@ -3288,7 +3336,7 @@ static int ocfs2_divide_xattr_bucket(struct inode *inode, goto out; } - xh = bucket_xh(&s_bucket); + xh = bucket_xh(s_bucket); count = le16_to_cpu(xh->xh_count); start = ocfs2_xattr_find_divide_pos(xh); @@ -3300,10 +3348,10 @@ static int ocfs2_divide_xattr_bucket(struct inode *inode, * The hash value is set as one larger than * that of the last entry in the previous bucket. */ - for (i = 0; i < blk_per_bucket; i++) - memset(bucket_block(&t_bucket, i), 0, blocksize); + for (i = 0; i < t_bucket->bu_blocks; i++) + memset(bucket_block(t_bucket, i), 0, blocksize); - xh = bucket_xh(&t_bucket); + xh = bucket_xh(t_bucket); xh->xh_free_start = cpu_to_le16(blocksize); xh->xh_entries[0].xe_name_hash = xe->xe_name_hash; le32_add_cpu(&xh->xh_entries[0].xe_name_hash, 1); @@ -3312,10 +3360,10 @@ static int ocfs2_divide_xattr_bucket(struct inode *inode, } /* copy the whole bucket to the new first. */ - ocfs2_xattr_bucket_copy_data(inode, &t_bucket, &s_bucket); + ocfs2_xattr_bucket_copy_data(t_bucket, s_bucket); /* update the new bucket. */ - xh = bucket_xh(&t_bucket); + xh = bucket_xh(t_bucket); /* * Calculate the total name/value len and xh_free_start for @@ -3379,7 +3427,7 @@ set_num_buckets: else xh->xh_num_buckets = 0; - ocfs2_xattr_bucket_journal_dirty(handle, inode, &t_bucket); + ocfs2_xattr_bucket_journal_dirty(handle, t_bucket); /* store the first_hash of the new bucket. */ if (first_hash) @@ -3393,18 +3441,18 @@ set_num_buckets: if (start == count) goto out; - xh = bucket_xh(&s_bucket); + xh = bucket_xh(s_bucket); memset(&xh->xh_entries[start], 0, sizeof(struct ocfs2_xattr_entry) * (count - start)); xh->xh_count = cpu_to_le16(start); xh->xh_free_start = cpu_to_le16(name_offset); xh->xh_name_value_len = cpu_to_le16(name_value_len); - ocfs2_xattr_bucket_journal_dirty(handle, inode, &s_bucket); + ocfs2_xattr_bucket_journal_dirty(handle, s_bucket); out: - ocfs2_xattr_bucket_relse(inode, &s_bucket); - ocfs2_xattr_bucket_relse(inode, &t_bucket); + ocfs2_xattr_bucket_free(s_bucket); + ocfs2_xattr_bucket_free(t_bucket); return ret; } @@ -3422,7 +3470,7 @@ static int ocfs2_cp_xattr_bucket(struct inode *inode, int t_is_new) { int ret; - struct ocfs2_xattr_bucket s_bucket, t_bucket; + struct ocfs2_xattr_bucket *s_bucket = NULL, *t_bucket = NULL; BUG_ON(s_blkno == t_blkno); @@ -3430,10 +3478,15 @@ static int ocfs2_cp_xattr_bucket(struct inode *inode, (unsigned long long)s_blkno, (unsigned long long)t_blkno, t_is_new); - memset(&s_bucket, 0, sizeof(struct ocfs2_xattr_bucket)); - memset(&t_bucket, 0, sizeof(struct ocfs2_xattr_bucket)); - - ret = ocfs2_read_xattr_bucket(inode, &s_bucket, s_blkno); + s_bucket = ocfs2_xattr_bucket_new(inode); + t_bucket = ocfs2_xattr_bucket_new(inode); + if (!s_bucket || !t_bucket) { + ret = -ENOMEM; + mlog_errno(ret); + goto out; + } + + ret = ocfs2_read_xattr_bucket(s_bucket, s_blkno); if (ret) goto out; @@ -3441,23 +3494,23 @@ static int ocfs2_cp_xattr_bucket(struct inode *inode, * Even if !t_is_new, we're overwriting t_bucket. Thus, * there's no need to read it. */ - ret = ocfs2_init_xattr_bucket(inode, &t_bucket, t_blkno); + ret = ocfs2_init_xattr_bucket(t_bucket, t_blkno); if (ret) goto out; - ret = ocfs2_xattr_bucket_journal_access(handle, inode, &t_bucket, + ret = ocfs2_xattr_bucket_journal_access(handle, t_bucket, t_is_new ? OCFS2_JOURNAL_ACCESS_CREATE : OCFS2_JOURNAL_ACCESS_WRITE); if (ret) goto out; - ocfs2_xattr_bucket_copy_data(inode, &t_bucket, &s_bucket); - ocfs2_xattr_bucket_journal_dirty(handle, inode, &t_bucket); + ocfs2_xattr_bucket_copy_data(t_bucket, s_bucket); + ocfs2_xattr_bucket_journal_dirty(handle, t_bucket); out: - ocfs2_xattr_bucket_relse(inode, &s_bucket); - ocfs2_xattr_bucket_relse(inode, &t_bucket); + ocfs2_xattr_bucket_free(t_bucket); + ocfs2_xattr_bucket_free(s_bucket); return ret; } @@ -4009,7 +4062,7 @@ static void ocfs2_xattr_set_entry_normal(struct inode *inode, xe->xe_value_size = 0; val = ocfs2_xattr_bucket_get_val(inode, - &xs->bucket, offs); + xs->bucket, offs); memset(val + OCFS2_XATTR_SIZE(name_len), 0, size - OCFS2_XATTR_SIZE(name_len)); if (OCFS2_XATTR_SIZE(xi->value_len) > 0) @@ -4087,8 +4140,7 @@ set_new_name_value: xh->xh_free_start = cpu_to_le16(offs); } - val = ocfs2_xattr_bucket_get_val(inode, - &xs->bucket, offs - size); + val = ocfs2_xattr_bucket_get_val(inode, xs->bucket, offs - size); xe->xe_name_offset = cpu_to_le16(offs - size); memset(val, 0, size); @@ -4122,12 +4174,12 @@ static int ocfs2_xattr_set_entry_in_bucket(struct inode *inode, mlog(0, "Set xattr entry len = %lu index = %d in bucket %llu\n", (unsigned long)xi->value_len, xi->name_index, - (unsigned long long)bucket_blkno(&xs->bucket)); + (unsigned long long)bucket_blkno(xs->bucket)); - if (!xs->bucket.bu_bhs[1]) { + if (!xs->bucket->bu_bhs[1]) { ret = ocfs2_read_blocks(inode, - bucket_blkno(&xs->bucket) + 1, - blk_per_bucket - 1, &xs->bucket.bu_bhs[1], + bucket_blkno(xs->bucket) + 1, + blk_per_bucket - 1, &xs->bucket->bu_bhs[1], 0); if (ret) { mlog_errno(ret); @@ -4143,7 +4195,7 @@ static int ocfs2_xattr_set_entry_in_bucket(struct inode *inode, goto out; } - ret = ocfs2_xattr_bucket_journal_access(handle, inode, &xs->bucket, + ret = ocfs2_xattr_bucket_journal_access(handle, xs->bucket, OCFS2_JOURNAL_ACCESS_WRITE); if (ret < 0) { mlog_errno(ret); @@ -4151,7 +4203,7 @@ static int ocfs2_xattr_set_entry_in_bucket(struct inode *inode, } ocfs2_xattr_set_entry_normal(inode, xi, xs, name_hash, local); - ocfs2_xattr_bucket_journal_dirty(handle, inode, &xs->bucket); + ocfs2_xattr_bucket_journal_dirty(handle, xs->bucket); out: ocfs2_commit_trans(osb, handle); @@ -4264,10 +4316,10 @@ static int ocfs2_xattr_bucket_value_truncate_xs(struct inode *inode, struct ocfs2_xattr_entry *xe = xs->here; struct ocfs2_xattr_header *xh = (struct ocfs2_xattr_header *)xs->base; - BUG_ON(!xs->bucket.bu_bhs[0] || !xe || ocfs2_xattr_is_local(xe)); + BUG_ON(!xs->bucket->bu_bhs[0] || !xe || ocfs2_xattr_is_local(xe)); offset = xe - xh->xh_entries; - ret = ocfs2_xattr_bucket_value_truncate(inode, xs->bucket.bu_bhs[0], + ret = ocfs2_xattr_bucket_value_truncate(inode, xs->bucket->bu_bhs[0], offset, len); if (ret) mlog_errno(ret); @@ -4387,7 +4439,7 @@ static void ocfs2_xattr_bucket_remove_xs(struct inode *inode, struct ocfs2_xattr_search *xs) { handle_t *handle = NULL; - struct ocfs2_xattr_header *xh = bucket_xh(&xs->bucket); + struct ocfs2_xattr_header *xh = bucket_xh(xs->bucket); struct ocfs2_xattr_entry *last = &xh->xh_entries[ le16_to_cpu(xh->xh_count) - 1]; int ret = 0; @@ -4400,7 +4452,7 @@ static void ocfs2_xattr_bucket_remove_xs(struct inode *inode, return; } - ret = ocfs2_xattr_bucket_journal_access(handle, inode, &xs->bucket, + ret = ocfs2_xattr_bucket_journal_access(handle, xs->bucket, OCFS2_JOURNAL_ACCESS_WRITE); if (ret) { mlog_errno(ret); @@ -4413,7 +4465,7 @@ static void ocfs2_xattr_bucket_remove_xs(struct inode *inode, memset(last, 0, sizeof(struct ocfs2_xattr_entry)); le16_add_cpu(&xh->xh_count, -1); - ocfs2_xattr_bucket_journal_dirty(handle, inode, &xs->bucket); + ocfs2_xattr_bucket_journal_dirty(handle, xs->bucket); out_commit: ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); @@ -4565,7 +4617,7 @@ try_again: mlog_bug_on_msg(header_size > blocksize, "bucket %llu has header size " "of %u which exceed block size\n", - (unsigned long long)bucket_blkno(&xs->bucket), + (unsigned long long)bucket_blkno(xs->bucket), header_size); if (xi->value && xi->value_len > OCFS2_XATTR_INLINE_SIZE) @@ -4605,7 +4657,7 @@ try_again: mlog(0, "xs->not_found = %d, in xattr bucket %llu: free = %d, " "need = %d, max_free = %d, xh_free_start = %u, xh_name_value_len =" " %u\n", xs->not_found, - (unsigned long long)bucket_blkno(&xs->bucket), + (unsigned long long)bucket_blkno(xs->bucket), free, need, max_free, le16_to_cpu(xh->xh_free_start), le16_to_cpu(xh->xh_name_value_len)); @@ -4617,7 +4669,7 @@ try_again: * name/value will be moved, the xe shouldn't be changed * in xs. */ - ret = ocfs2_defrag_xattr_bucket(inode, &xs->bucket); + ret = ocfs2_defrag_xattr_bucket(inode, xs->bucket); if (ret) { mlog_errno(ret); goto out; @@ -4649,7 +4701,7 @@ try_again: * add a new bucket for the insert. */ ret = ocfs2_check_xattr_bucket_collision(inode, - &xs->bucket, + xs->bucket, xi->name); if (ret) { mlog_errno(ret); @@ -4658,14 +4710,13 @@ try_again: ret = ocfs2_add_new_xattr_bucket(inode, xs->xattr_bh, - xs->bucket.bu_bhs[0]); + xs->bucket->bu_bhs[0]); if (ret) { mlog_errno(ret); goto out; } - ocfs2_xattr_bucket_relse(inode, &xs->bucket); - memset(&xs->bucket, 0, sizeof(xs->bucket)); + ocfs2_xattr_bucket_relse(xs->bucket); ret = ocfs2_xattr_index_block_find(inode, xs->xattr_bh, xi->name_index, -- cgit v1.2.3-70-g09d2 From e2356a3f02cfdbce735465a2b40b6dc72a764c26 Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Mon, 27 Oct 2008 15:01:54 -0700 Subject: ocfs2: Use buckets in ocfs2_xattr_bucket_find(). Change the ocfs2_xattr_bucket_find() function to use ocfs2_xattr_bucket as its abstraction. This makes for more efficient reads, as buckets are linear blocks, and also has improved caching characteristics. It also reads better. Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/xattr.c | 89 ++++++++++++++++++++------------------------------------ 1 file changed, 31 insertions(+), 58 deletions(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 766494ed6e1..46986c635eb 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -2248,7 +2248,7 @@ typedef int (xattr_bucket_func)(struct inode *inode, void *para); static int ocfs2_find_xe_in_bucket(struct inode *inode, - struct buffer_head *header_bh, + struct ocfs2_xattr_bucket *bucket, int name_index, const char *name, u32 name_hash, @@ -2256,11 +2256,9 @@ static int ocfs2_find_xe_in_bucket(struct inode *inode, int *found) { int i, ret = 0, cmp = 1, block_off, new_offset; - struct ocfs2_xattr_header *xh = - (struct ocfs2_xattr_header *)header_bh->b_data; + struct ocfs2_xattr_header *xh = bucket_xh(bucket); size_t name_len = strlen(name); struct ocfs2_xattr_entry *xe = NULL; - struct buffer_head *name_bh = NULL; char *xe_name; /* @@ -2291,19 +2289,8 @@ static int ocfs2_find_xe_in_bucket(struct inode *inode, break; } - ret = ocfs2_read_block(inode, header_bh->b_blocknr + block_off, - &name_bh); - if (ret) { - mlog_errno(ret); - break; - } - xe_name = name_bh->b_data + new_offset; - - cmp = memcmp(name, xe_name, name_len); - brelse(name_bh); - name_bh = NULL; - - if (cmp == 0) { + xe_name = bucket_block(bucket, block_off) + new_offset; + if (!memcmp(name, xe_name, name_len)) { *xe_index = i; *found = 1; ret = 0; @@ -2333,39 +2320,42 @@ static int ocfs2_xattr_bucket_find(struct inode *inode, struct ocfs2_xattr_search *xs) { int ret, found = 0; - struct buffer_head *bh = NULL; - struct buffer_head *lower_bh = NULL; struct ocfs2_xattr_header *xh = NULL; struct ocfs2_xattr_entry *xe = NULL; u16 index = 0; u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); int low_bucket = 0, bucket, high_bucket; + struct ocfs2_xattr_bucket *search; u32 last_hash; - u64 blkno; + u64 blkno, lower_blkno = 0; - ret = ocfs2_read_block(inode, p_blkno, &bh); + search = ocfs2_xattr_bucket_new(inode); + if (!search) { + ret = -ENOMEM; + mlog_errno(ret); + goto out; + } + + ret = ocfs2_read_xattr_bucket(search, p_blkno); if (ret) { mlog_errno(ret); goto out; } - xh = (struct ocfs2_xattr_header *)bh->b_data; + xh = bucket_xh(search); high_bucket = le16_to_cpu(xh->xh_num_buckets) - 1; - while (low_bucket <= high_bucket) { - brelse(bh); - bh = NULL; - bucket = (low_bucket + high_bucket) / 2; + ocfs2_xattr_bucket_relse(search); + bucket = (low_bucket + high_bucket) / 2; blkno = p_blkno + bucket * blk_per_bucket; - - ret = ocfs2_read_block(inode, blkno, &bh); + ret = ocfs2_read_xattr_bucket(search, blkno); if (ret) { mlog_errno(ret); goto out; } - xh = (struct ocfs2_xattr_header *)bh->b_data; + xh = bucket_xh(search); xe = &xh->xh_entries[0]; if (name_hash < le32_to_cpu(xe->xe_name_hash)) { high_bucket = bucket - 1; @@ -2382,10 +2372,8 @@ static int ocfs2_xattr_bucket_find(struct inode *inode, last_hash = le32_to_cpu(xe->xe_name_hash); - /* record lower_bh which may be the insert place. */ - brelse(lower_bh); - lower_bh = bh; - bh = NULL; + /* record lower_blkno which may be the insert place. */ + lower_blkno = blkno; if (name_hash > le32_to_cpu(xe->xe_name_hash)) { low_bucket = bucket + 1; @@ -2393,7 +2381,7 @@ static int ocfs2_xattr_bucket_find(struct inode *inode, } /* the searched xattr should reside in this bucket if exists. */ - ret = ocfs2_find_xe_in_bucket(inode, lower_bh, + ret = ocfs2_find_xe_in_bucket(inode, search, name_index, name, name_hash, &index, &found); if (ret) { @@ -2408,35 +2396,21 @@ static int ocfs2_xattr_bucket_find(struct inode *inode, * When the xattr's hash value is in the gap of 2 buckets, we will * always set it to the previous bucket. */ - if (!lower_bh) { - /* - * We can't find any bucket whose first name_hash is less - * than the find name_hash. - */ - BUG_ON(bh->b_blocknr != p_blkno); - lower_bh = bh; - bh = NULL; + if (!lower_blkno) + lower_blkno = p_blkno; + + /* This should be in cache - we just read it during the search */ + ret = ocfs2_read_xattr_bucket(xs->bucket, lower_blkno); + if (ret) { + mlog_errno(ret); + goto out; } - xs->bucket->bu_bhs[0] = lower_bh; - lower_bh = NULL; xs->header = bucket_xh(xs->bucket); xs->base = bucket_block(xs->bucket, 0); xs->end = xs->base + inode->i_sb->s_blocksize; if (found) { - /* - * If we have found the xattr enty, read all the blocks in - * this bucket. - */ - ret = ocfs2_read_blocks(inode, bucket_blkno(xs->bucket) + 1, - blk_per_bucket - 1, &xs->bucket->bu_bhs[1], - 0); - if (ret) { - mlog_errno(ret); - goto out; - } - xs->here = &xs->header->xh_entries[index]; mlog(0, "find xattr %s in bucket %llu, entry = %u\n", name, (unsigned long long)bucket_blkno(xs->bucket), index); @@ -2444,8 +2418,7 @@ static int ocfs2_xattr_bucket_find(struct inode *inode, ret = -ENODATA; out: - brelse(bh); - brelse(lower_bh); + ocfs2_xattr_bucket_free(search); return ret; } -- cgit v1.2.3-70-g09d2 From 178eeac354ea28828d5e94a3a7b51368c171d6a5 Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Mon, 27 Oct 2008 15:18:29 -0700 Subject: ocfs2: Use buckets in ocfs2_xattr_create_index_block(). Use the ocfs2_xattr_bucket abstraction in ocfs2_xattr_create_index_block() and its helpers. We get more efficient reads, a lot less buffer_head munging, and nicer code to boot. While we're at it, ocfs2_xattr_update_xattr_search() becomes void. Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/xattr.c | 114 ++++++++++++++++--------------------------------------- 1 file changed, 32 insertions(+), 82 deletions(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 46986c635eb..76969b92200 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -2649,32 +2649,34 @@ static void swap_xe(void *a, void *b, int size) /* * When the ocfs2_xattr_block is filled up, new bucket will be created * and all the xattr entries will be moved to the new bucket. + * The header goes at the start of the bucket, and the names+values are + * filled from the end. This is why *target starts as the last buffer. * Note: we need to sort the entries since they are not saved in order * in the ocfs2_xattr_block. */ static void ocfs2_cp_xattr_block_to_bucket(struct inode *inode, struct buffer_head *xb_bh, - struct buffer_head *xh_bh, - struct buffer_head *data_bh) + struct ocfs2_xattr_bucket *bucket) { int i, blocksize = inode->i_sb->s_blocksize; + int blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb); u16 offset, size, off_change; struct ocfs2_xattr_entry *xe; struct ocfs2_xattr_block *xb = (struct ocfs2_xattr_block *)xb_bh->b_data; struct ocfs2_xattr_header *xb_xh = &xb->xb_attrs.xb_header; - struct ocfs2_xattr_header *xh = - (struct ocfs2_xattr_header *)xh_bh->b_data; + struct ocfs2_xattr_header *xh = bucket_xh(bucket); u16 count = le16_to_cpu(xb_xh->xh_count); - char *target = xh_bh->b_data, *src = xb_bh->b_data; + char *src = xb_bh->b_data; + char *target = bucket_block(bucket, blks - 1); mlog(0, "cp xattr from block %llu to bucket %llu\n", (unsigned long long)xb_bh->b_blocknr, - (unsigned long long)xh_bh->b_blocknr); + (unsigned long long)bucket_blkno(bucket)); + + for (i = 0; i < blks; i++) + memset(bucket_block(bucket, i), 0, blocksize); - memset(xh_bh->b_data, 0, blocksize); - if (data_bh) - memset(data_bh->b_data, 0, blocksize); /* * Since the xe_name_offset is based on ocfs2_xattr_header, * there is a offset change corresponding to the change of @@ -2686,8 +2688,6 @@ static void ocfs2_cp_xattr_block_to_bucket(struct inode *inode, size = blocksize - offset; /* copy all the names and values. */ - if (data_bh) - target = data_bh->b_data; memcpy(target + offset, src + offset, size); /* Init new header now. */ @@ -2697,7 +2697,7 @@ static void ocfs2_cp_xattr_block_to_bucket(struct inode *inode, xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE - size); /* copy all the entries. */ - target = xh_bh->b_data; + target = bucket_block(bucket, 0); offset = offsetof(struct ocfs2_xattr_header, xh_entries); size = count * sizeof(struct ocfs2_xattr_entry); memcpy(target + offset, (char *)xb_xh + offset, size); @@ -2723,42 +2723,24 @@ static void ocfs2_cp_xattr_block_to_bucket(struct inode *inode, * While if the entry is in index b-tree, "bucket" indicates the * real place of the xattr. */ -static int ocfs2_xattr_update_xattr_search(struct inode *inode, - struct ocfs2_xattr_search *xs, - struct buffer_head *old_bh, - struct buffer_head *new_bh) +static void ocfs2_xattr_update_xattr_search(struct inode *inode, + struct ocfs2_xattr_search *xs, + struct buffer_head *old_bh) { - int ret = 0; char *buf = old_bh->b_data; struct ocfs2_xattr_block *old_xb = (struct ocfs2_xattr_block *)buf; struct ocfs2_xattr_header *old_xh = &old_xb->xb_attrs.xb_header; - int i, blocksize = inode->i_sb->s_blocksize; - u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); + int i; - xs->bucket->bu_bhs[0] = new_bh; - get_bh(new_bh); xs->header = bucket_xh(xs->bucket); - - xs->base = new_bh->b_data; + xs->base = bucket_block(xs->bucket, 0); xs->end = xs->base + inode->i_sb->s_blocksize; - if (!xs->not_found) { - if (OCFS2_XATTR_BUCKET_SIZE != blocksize) { - ret = ocfs2_read_blocks(inode, - bucket_blkno(xs->bucket) + 1, - blk_per_bucket - 1, &xs->bucket->bu_bhs[1], - 0); - if (ret) { - mlog_errno(ret); - return ret; - } - - } - i = xs->here - old_xh->xh_entries; - xs->here = &xs->header->xh_entries[i]; - } + if (xs->not_found) + return; - return ret; + i = xs->here - old_xh->xh_entries; + xs->here = &xs->header->xh_entries[i]; } static int ocfs2_xattr_create_index_block(struct inode *inode, @@ -2771,18 +2753,17 @@ static int ocfs2_xattr_create_index_block(struct inode *inode, struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); struct ocfs2_inode_info *oi = OCFS2_I(inode); struct ocfs2_alloc_context *data_ac; - struct buffer_head *xh_bh = NULL, *data_bh = NULL; struct buffer_head *xb_bh = xs->xattr_bh; struct ocfs2_xattr_block *xb = (struct ocfs2_xattr_block *)xb_bh->b_data; struct ocfs2_xattr_tree_root *xr; u16 xb_flags = le16_to_cpu(xb->xb_flags); - u16 bpb = ocfs2_blocks_per_xattr_bucket(inode->i_sb); mlog(0, "create xattr index block for %llu\n", (unsigned long long)xb_bh->b_blocknr); BUG_ON(xb_flags & OCFS2_XATTR_INDEXED); + BUG_ON(!xs->bucket); ret = ocfs2_reserve_clusters(osb, 1, &data_ac); if (ret) { @@ -2798,10 +2779,10 @@ static int ocfs2_xattr_create_index_block(struct inode *inode, down_write(&oi->ip_alloc_sem); /* - * 3 more credits, one for xattr block update, one for the 1st block - * of the new xattr bucket and one for the value/data. + * We need more credits. One for the xattr block update and one + * for each block of the new xattr bucket. */ - credits += 3; + credits += 1 + ocfs2_blocks_per_xattr_bucket(inode->i_sb); handle = ocfs2_start_trans(osb, credits); if (IS_ERR(handle)) { ret = PTR_ERR(handle); @@ -2832,51 +2813,23 @@ static int ocfs2_xattr_create_index_block(struct inode *inode, mlog(0, "allocate 1 cluster from %llu to xattr block\n", (unsigned long long)blkno); - xh_bh = sb_getblk(inode->i_sb, blkno); - if (!xh_bh) { - ret = -EIO; + ret = ocfs2_init_xattr_bucket(xs->bucket, blkno); + if (ret) { mlog_errno(ret); goto out_commit; } - ocfs2_set_new_buffer_uptodate(inode, xh_bh); - - ret = ocfs2_journal_access(handle, inode, xh_bh, - OCFS2_JOURNAL_ACCESS_CREATE); + ret = ocfs2_xattr_bucket_journal_access(handle, xs->bucket, + OCFS2_JOURNAL_ACCESS_CREATE); if (ret) { mlog_errno(ret); goto out_commit; } - if (bpb > 1) { - data_bh = sb_getblk(inode->i_sb, blkno + bpb - 1); - if (!data_bh) { - ret = -EIO; - mlog_errno(ret); - goto out_commit; - } - - ocfs2_set_new_buffer_uptodate(inode, data_bh); - - ret = ocfs2_journal_access(handle, inode, data_bh, - OCFS2_JOURNAL_ACCESS_CREATE); - if (ret) { - mlog_errno(ret); - goto out_commit; - } - } - - ocfs2_cp_xattr_block_to_bucket(inode, xb_bh, xh_bh, data_bh); - - ocfs2_journal_dirty(handle, xh_bh); - if (data_bh) - ocfs2_journal_dirty(handle, data_bh); + ocfs2_cp_xattr_block_to_bucket(inode, xb_bh, xs->bucket); + ocfs2_xattr_bucket_journal_dirty(handle, xs->bucket); - ret = ocfs2_xattr_update_xattr_search(inode, xs, xb_bh, xh_bh); - if (ret) { - mlog_errno(ret); - goto out_commit; - } + ocfs2_xattr_update_xattr_search(inode, xs, xb_bh); /* Change from ocfs2_xattr_header to ocfs2_xattr_tree_root */ memset(&xb->xb_attrs, 0, inode->i_sb->s_blocksize - @@ -2911,9 +2864,6 @@ out: if (data_ac) ocfs2_free_alloc_context(data_ac); - brelse(xh_bh); - brelse(data_bh); - return ret; } -- cgit v1.2.3-70-g09d2 From 161d6f30f18c4a7e2b24705b6690cce3ff276eb9 Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Mon, 27 Oct 2008 15:25:18 -0700 Subject: ocfs2: Use buckets in ocfs2_defrag_xattr_bucket(). Use the ocfs2_xattr_bucket abstraction for reading and writing the bucket in ocfs2_defrag_xattr_bucket(). Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/xattr.c | 55 +++++++++++++++++++++++-------------------------------- 1 file changed, 23 insertions(+), 32 deletions(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 76969b92200..127a6285078 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -2894,21 +2894,11 @@ static int ocfs2_defrag_xattr_bucket(struct inode *inode, struct ocfs2_xattr_header *xh; char *entries, *buf, *bucket_buf = NULL; u64 blkno = bucket_blkno(bucket); - u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); u16 xh_free_start; size_t blocksize = inode->i_sb->s_blocksize; handle_t *handle; - struct buffer_head **bhs; struct ocfs2_xattr_entry *xe; - - bhs = kzalloc(sizeof(struct buffer_head *) * blk_per_bucket, - GFP_NOFS); - if (!bhs) - return -ENOMEM; - - ret = ocfs2_read_blocks(inode, blkno, blk_per_bucket, bhs, 0); - if (ret) - goto out; + struct ocfs2_xattr_bucket *wb = NULL; /* * In order to make the operation more efficient and generic, @@ -2922,11 +2912,21 @@ static int ocfs2_defrag_xattr_bucket(struct inode *inode, goto out; } + wb = ocfs2_xattr_bucket_new(inode); + if (!wb) { + ret = -ENOMEM; + goto out; + } + + ret = ocfs2_read_xattr_bucket(wb, blkno); + if (ret) + goto out; + buf = bucket_buf; - for (i = 0; i < blk_per_bucket; i++, buf += blocksize) - memcpy(buf, bhs[i]->b_data, blocksize); + for (i = 0; i < wb->bu_blocks; i++, buf += blocksize) + memcpy(buf, bucket_block(wb, i), blocksize); - handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)), blk_per_bucket); + handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)), wb->bu_blocks); if (IS_ERR(handle)) { ret = PTR_ERR(handle); handle = NULL; @@ -2934,13 +2934,11 @@ static int ocfs2_defrag_xattr_bucket(struct inode *inode, goto out; } - for (i = 0; i < blk_per_bucket; i++) { - ret = ocfs2_journal_access(handle, inode, bhs[i], - OCFS2_JOURNAL_ACCESS_WRITE); - if (ret < 0) { - mlog_errno(ret); - goto commit; - } + ret = ocfs2_xattr_bucket_journal_access(handle, wb, + OCFS2_JOURNAL_ACCESS_WRITE); + if (ret < 0) { + mlog_errno(ret); + goto commit; } xh = (struct ocfs2_xattr_header *)bucket_buf; @@ -3009,21 +3007,14 @@ static int ocfs2_defrag_xattr_bucket(struct inode *inode, cmp_xe, swap_xe); buf = bucket_buf; - for (i = 0; i < blk_per_bucket; i++, buf += blocksize) { - memcpy(bhs[i]->b_data, buf, blocksize); - ocfs2_journal_dirty(handle, bhs[i]); - } + for (i = 0; i < wb->bu_blocks; i++, buf += blocksize) + memcpy(bucket_block(wb, i), buf, blocksize); + ocfs2_xattr_bucket_journal_dirty(handle, wb); commit: ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); out: - - if (bhs) { - for (i = 0; i < blk_per_bucket; i++) - brelse(bhs[i]); - } - kfree(bhs); - + ocfs2_xattr_bucket_free(wb); kfree(bucket_buf); return ret; } -- cgit v1.2.3-70-g09d2 From 02dbf38d19c19016f558fe0dc0c44f8041d3eb8e Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Mon, 27 Oct 2008 18:07:45 -0700 Subject: ocfs2: Use buckets in ocfs2_xattr_set_entry_in_bucket(). The ocfs2_xattr_set_entry_in_bucket() function is already working on an ocfs2_xattr_bucket structure, so let's use the bucket API. Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/xattr.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 127a6285078..029a9f4559f 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -4083,25 +4083,24 @@ static int ocfs2_xattr_set_entry_in_bucket(struct inode *inode, { int ret; handle_t *handle = NULL; - u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); + u64 blkno; mlog(0, "Set xattr entry len = %lu index = %d in bucket %llu\n", (unsigned long)xi->value_len, xi->name_index, (unsigned long long)bucket_blkno(xs->bucket)); if (!xs->bucket->bu_bhs[1]) { - ret = ocfs2_read_blocks(inode, - bucket_blkno(xs->bucket) + 1, - blk_per_bucket - 1, &xs->bucket->bu_bhs[1], - 0); + blkno = bucket_blkno(xs->bucket); + ocfs2_xattr_bucket_relse(xs->bucket); + ret = ocfs2_read_xattr_bucket(xs->bucket, blkno); if (ret) { mlog_errno(ret); goto out; } } - handle = ocfs2_start_trans(osb, blk_per_bucket); + handle = ocfs2_start_trans(osb, xs->bucket->bu_blocks); if (IS_ERR(handle)) { ret = PTR_ERR(handle); handle = NULL; -- cgit v1.2.3-70-g09d2 From 1c32a2fd46ddc01bd86bff56a8f5d98c815750f4 Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Thu, 6 Nov 2008 08:10:47 +0800 Subject: ocfs2/xattr: Remove additional bucket allocation in bucket defragment. Joel has refactored xattr bucket and make xattr bucket a general wrapper. So in ocfs2_defrag_xattr_bucket, we have already passed the bucket in, so there is no need to allocate a new one and read it. Signed-off-by: Tao Ma Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/xattr.c | 26 +++++++------------------- 1 file changed, 7 insertions(+), 19 deletions(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 029a9f4559f..87cf39ddfe5 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -2898,7 +2898,6 @@ static int ocfs2_defrag_xattr_bucket(struct inode *inode, size_t blocksize = inode->i_sb->s_blocksize; handle_t *handle; struct ocfs2_xattr_entry *xe; - struct ocfs2_xattr_bucket *wb = NULL; /* * In order to make the operation more efficient and generic, @@ -2912,21 +2911,11 @@ static int ocfs2_defrag_xattr_bucket(struct inode *inode, goto out; } - wb = ocfs2_xattr_bucket_new(inode); - if (!wb) { - ret = -ENOMEM; - goto out; - } - - ret = ocfs2_read_xattr_bucket(wb, blkno); - if (ret) - goto out; - buf = bucket_buf; - for (i = 0; i < wb->bu_blocks; i++, buf += blocksize) - memcpy(buf, bucket_block(wb, i), blocksize); + for (i = 0; i < bucket->bu_blocks; i++, buf += blocksize) + memcpy(buf, bucket_block(bucket, i), blocksize); - handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)), wb->bu_blocks); + handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)), bucket->bu_blocks); if (IS_ERR(handle)) { ret = PTR_ERR(handle); handle = NULL; @@ -2934,7 +2923,7 @@ static int ocfs2_defrag_xattr_bucket(struct inode *inode, goto out; } - ret = ocfs2_xattr_bucket_journal_access(handle, wb, + ret = ocfs2_xattr_bucket_journal_access(handle, bucket, OCFS2_JOURNAL_ACCESS_WRITE); if (ret < 0) { mlog_errno(ret); @@ -3007,14 +2996,13 @@ static int ocfs2_defrag_xattr_bucket(struct inode *inode, cmp_xe, swap_xe); buf = bucket_buf; - for (i = 0; i < wb->bu_blocks; i++, buf += blocksize) - memcpy(bucket_block(wb, i), buf, blocksize); - ocfs2_xattr_bucket_journal_dirty(handle, wb); + for (i = 0; i < bucket->bu_blocks; i++, buf += blocksize) + memcpy(bucket_block(bucket, i), buf, blocksize); + ocfs2_xattr_bucket_journal_dirty(handle, bucket); commit: ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); out: - ocfs2_xattr_bucket_free(wb); kfree(bucket_buf); return ret; } -- cgit v1.2.3-70-g09d2 From 757055adc5d41b910bdead925060f077dd2d9169 Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Thu, 6 Nov 2008 08:10:48 +0800 Subject: ocfs2/xattr: Only set buffer update if it doesn't exist in cache. When we call ocfs2_init_xattr_bucket, we deem that the new buffer head will be written to disk immediately, so we just use sb_getblk. But in some cases the buffer may have already been in ocfs2 uptodate cache, so we only call ocfs2_set_buffer_uptodate if the buffer head isn't in the cache. Signed-off-by: Tao Ma Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/xattr.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 87cf39ddfe5..d8fc714e941 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -219,8 +219,10 @@ static int ocfs2_init_xattr_bucket(struct ocfs2_xattr_bucket *bucket, break; } - ocfs2_set_new_buffer_uptodate(bucket->bu_inode, - bucket->bu_bhs[i]); + if (!ocfs2_buffer_uptodate(bucket->bu_inode, + bucket->bu_bhs[i])) + ocfs2_set_new_buffer_uptodate(bucket->bu_inode, + bucket->bu_bhs[i]); } if (rc) -- cgit v1.2.3-70-g09d2 From 976331d8789d4d84a11b45b87c520ade83715343 Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Wed, 12 Nov 2008 08:26:57 +0800 Subject: ocfs2/xattr: Only extend xattr bucket in need. When the first block of a bucket is filled up with xattr entries, we normally extend the bucket. But if we are just replace one xattr with small length, we don't need to extend it. This is important since we will calculate what we need before the transaction and in this situation no resources will be allocated. Signed-off-by: Tao Ma Signed-off-by: Mark Fasheh --- fs/ocfs2/xattr.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index d8fc714e941..4501c63193d 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -4564,7 +4564,9 @@ try_again: free, need, max_free, le16_to_cpu(xh->xh_free_start), le16_to_cpu(xh->xh_name_value_len)); - if (free < need || count == ocfs2_xattr_max_xe_in_bucket(inode->i_sb)) { + if (free < need || + (xs->not_found && + count == ocfs2_xattr_max_xe_in_bucket(inode->i_sb))) { if (need <= max_free && count < ocfs2_xattr_max_xe_in_bucket(inode->i_sb)) { /* -- cgit v1.2.3-70-g09d2 From 2891d290aa6eee0821f7e4ad0b1c4ae4d964b0f1 Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Wed, 12 Nov 2008 08:26:58 +0800 Subject: ocfs2: Add clusters free in dealloc_ctxt. Now in ocfs2 xattr set, the whole process are divided into many small parts and they are wrapped into diffrent transactions and it make the set doesn't look like a real transaction. So we want to integrate it into a real one. In some cases we will allocate some clusters and free some in just one transaction. e.g, one xattr is larger than inline size, so it and its value root is stored within the inode while the value is outside in a cluster. Then we try to update it with a smaller value(larger than the size of root but smaller than inline size), we may need to free the outside cluster while allocate a new bucket(one cluster) since now the inode may be full. The old solution will lock the global_bitmap(if the local alloc failed in stress test) and then the truncate log. This will cause a ABBA lock with truncate log flush. This patch add the clusters free in dealloc_ctxt, so that we can record the free clusters during the transaction and then free it after we release the global_bitmap in xattr set. Signed-off-by: Tao Ma Signed-off-by: Mark Fasheh --- fs/ocfs2/alloc.c | 106 +++++++++++++++++++++++++++++++++++++++++++++++++++---- fs/ocfs2/alloc.h | 4 +++ 2 files changed, 103 insertions(+), 7 deletions(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 0cc2deb9394..4614614084d 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -5800,7 +5800,10 @@ int ocfs2_truncate_log_init(struct ocfs2_super *osb) */ /* - * Describes a single block free from a suballocator + * Describe a single bit freed from a suballocator. For the block + * suballocators, it represents one block. For the global cluster + * allocator, it represents some clusters and free_bit indicates + * clusters number. */ struct ocfs2_cached_block_free { struct ocfs2_cached_block_free *free_next; @@ -5815,10 +5818,10 @@ struct ocfs2_per_slot_free_list { struct ocfs2_cached_block_free *f_first; }; -static int ocfs2_free_cached_items(struct ocfs2_super *osb, - int sysfile_type, - int slot, - struct ocfs2_cached_block_free *head) +static int ocfs2_free_cached_blocks(struct ocfs2_super *osb, + int sysfile_type, + int slot, + struct ocfs2_cached_block_free *head) { int ret; u64 bg_blkno; @@ -5893,6 +5896,82 @@ out: return ret; } +int ocfs2_cache_cluster_dealloc(struct ocfs2_cached_dealloc_ctxt *ctxt, + u64 blkno, unsigned int bit) +{ + int ret = 0; + struct ocfs2_cached_block_free *item; + + item = kmalloc(sizeof(*item), GFP_NOFS); + if (item == NULL) { + ret = -ENOMEM; + mlog_errno(ret); + return ret; + } + + mlog(0, "Insert clusters: (bit %u, blk %llu)\n", + bit, (unsigned long long)blkno); + + item->free_blk = blkno; + item->free_bit = bit; + item->free_next = ctxt->c_global_allocator; + + ctxt->c_global_allocator = item; + return ret; +} + +static int ocfs2_free_cached_clusters(struct ocfs2_super *osb, + struct ocfs2_cached_block_free *head) +{ + struct ocfs2_cached_block_free *tmp; + struct inode *tl_inode = osb->osb_tl_inode; + handle_t *handle; + int ret = 0; + + mutex_lock(&tl_inode->i_mutex); + + while (head) { + if (ocfs2_truncate_log_needs_flush(osb)) { + ret = __ocfs2_flush_truncate_log(osb); + if (ret < 0) { + mlog_errno(ret); + break; + } + } + + handle = ocfs2_start_trans(osb, OCFS2_TRUNCATE_LOG_UPDATE); + if (IS_ERR(handle)) { + ret = PTR_ERR(handle); + mlog_errno(ret); + break; + } + + ret = ocfs2_truncate_log_append(osb, handle, head->free_blk, + head->free_bit); + + ocfs2_commit_trans(osb, handle); + tmp = head; + head = head->free_next; + kfree(tmp); + + if (ret < 0) { + mlog_errno(ret); + break; + } + } + + mutex_unlock(&tl_inode->i_mutex); + + while (head) { + /* Premature exit may have left some dangling items. */ + tmp = head; + head = head->free_next; + kfree(tmp); + } + + return ret; +} + int ocfs2_run_deallocs(struct ocfs2_super *osb, struct ocfs2_cached_dealloc_ctxt *ctxt) { @@ -5908,8 +5987,10 @@ int ocfs2_run_deallocs(struct ocfs2_super *osb, if (fl->f_first) { mlog(0, "Free items: (type %u, slot %d)\n", fl->f_inode_type, fl->f_slot); - ret2 = ocfs2_free_cached_items(osb, fl->f_inode_type, - fl->f_slot, fl->f_first); + ret2 = ocfs2_free_cached_blocks(osb, + fl->f_inode_type, + fl->f_slot, + fl->f_first); if (ret2) mlog_errno(ret2); if (!ret) @@ -5920,6 +6001,17 @@ int ocfs2_run_deallocs(struct ocfs2_super *osb, kfree(fl); } + if (ctxt->c_global_allocator) { + ret2 = ocfs2_free_cached_clusters(osb, + ctxt->c_global_allocator); + if (ret2) + mlog_errno(ret2); + if (!ret) + ret = ret2; + + ctxt->c_global_allocator = NULL; + } + return ret; } diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h index 70257c84cfb..c301cf225f0 100644 --- a/fs/ocfs2/alloc.h +++ b/fs/ocfs2/alloc.h @@ -167,11 +167,15 @@ int __ocfs2_flush_truncate_log(struct ocfs2_super *osb); */ struct ocfs2_cached_dealloc_ctxt { struct ocfs2_per_slot_free_list *c_first_suballocator; + struct ocfs2_cached_block_free *c_global_allocator; }; static inline void ocfs2_init_dealloc_ctxt(struct ocfs2_cached_dealloc_ctxt *c) { c->c_first_suballocator = NULL; + c->c_global_allocator = NULL; } +int ocfs2_cache_cluster_dealloc(struct ocfs2_cached_dealloc_ctxt *ctxt, + u64 blkno, unsigned int bit); int ocfs2_run_deallocs(struct ocfs2_super *osb, struct ocfs2_cached_dealloc_ctxt *ctxt); -- cgit v1.2.3-70-g09d2 From c73f60f900ddf73ec4ea2a143829ab97242c4e8c Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Wed, 12 Nov 2008 08:26:59 +0800 Subject: ocfs2/xattr: Move clusters free into dealloc. Move clusters free process into dealloc context so that they can be freed after the transaction. Signed-off-by: Tao Ma Signed-off-by: Mark Fasheh --- fs/ocfs2/xattr.c | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 4501c63193d..f1da381a44f 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -457,7 +457,6 @@ static int __ocfs2_remove_xattr_range(struct inode *inode, int ret; u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos); struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); - struct inode *tl_inode = osb->osb_tl_inode; handle_t *handle; struct ocfs2_alloc_context *meta_ac = NULL; struct ocfs2_extent_tree et; @@ -470,16 +469,6 @@ static int __ocfs2_remove_xattr_range(struct inode *inode, return ret; } - mutex_lock(&tl_inode->i_mutex); - - if (ocfs2_truncate_log_needs_flush(osb)) { - ret = __ocfs2_flush_truncate_log(osb); - if (ret < 0) { - mlog_errno(ret); - goto out; - } - } - handle = ocfs2_start_trans(osb, OCFS2_REMOVE_EXTENT_CREDITS); if (IS_ERR(handle)) { ret = PTR_ERR(handle); @@ -509,14 +498,13 @@ static int __ocfs2_remove_xattr_range(struct inode *inode, goto out_commit; } - ret = ocfs2_truncate_log_append(osb, handle, phys_blkno, len); + ret = ocfs2_cache_cluster_dealloc(dealloc, phys_blkno, len); if (ret) mlog_errno(ret); out_commit: ocfs2_commit_trans(osb, handle); out: - mutex_unlock(&tl_inode->i_mutex); if (meta_ac) ocfs2_free_alloc_context(meta_ac); -- cgit v1.2.3-70-g09d2 From 78f30c314a74b9dc5d7368d96fe4be883d9a3a04 Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Wed, 12 Nov 2008 08:27:00 +0800 Subject: ocfs2/xattr: Reserve meta/data at the beginning of ocfs2_xattr_set. In ocfs2 xattr set, we reserve metadata and clusters in any place they are needed. It is time-consuming and ineffective, so this patch try to reserve metadata and clusters at the beginning of ocfs2_xattr_set. Signed-off-by: Tao Ma Signed-off-by: Mark Fasheh --- fs/ocfs2/alloc.h | 4 + fs/ocfs2/xattr.c | 483 ++++++++++++++++++++++++++++++++++++++++--------------- 2 files changed, 361 insertions(+), 126 deletions(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h index c301cf225f0..3eb735eedae 100644 --- a/fs/ocfs2/alloc.h +++ b/fs/ocfs2/alloc.h @@ -176,6 +176,10 @@ static inline void ocfs2_init_dealloc_ctxt(struct ocfs2_cached_dealloc_ctxt *c) } int ocfs2_cache_cluster_dealloc(struct ocfs2_cached_dealloc_ctxt *ctxt, u64 blkno, unsigned int bit); +static inline int ocfs2_dealloc_has_cluster(struct ocfs2_cached_dealloc_ctxt *c) +{ + return c->c_global_allocator != NULL; +} int ocfs2_run_deallocs(struct ocfs2_super *osb, struct ocfs2_cached_dealloc_ctxt *ctxt); diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index f1da381a44f..4fd201a54c7 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -71,6 +71,12 @@ struct ocfs2_xattr_bucket { int bu_blocks; }; +struct ocfs2_xattr_set_ctxt { + struct ocfs2_alloc_context *meta_ac; + struct ocfs2_alloc_context *data_ac; + struct ocfs2_cached_dealloc_ctxt dealloc; +}; + #define OCFS2_XATTR_ROOT_SIZE (sizeof(struct ocfs2_xattr_def_value_root)) #define OCFS2_XATTR_INLINE_SIZE 80 @@ -133,11 +139,13 @@ static int ocfs2_xattr_tree_list_index_block(struct inode *inode, size_t buffer_size); static int ocfs2_xattr_create_index_block(struct inode *inode, - struct ocfs2_xattr_search *xs); + struct ocfs2_xattr_search *xs, + struct ocfs2_xattr_set_ctxt *ctxt); static int ocfs2_xattr_set_entry_index_block(struct inode *inode, struct ocfs2_xattr_info *xi, - struct ocfs2_xattr_search *xs); + struct ocfs2_xattr_search *xs, + struct ocfs2_xattr_set_ctxt *ctxt); static int ocfs2_delete_xattr_index_block(struct inode *inode, struct buffer_head *xb_bh); @@ -334,14 +342,13 @@ static void ocfs2_xattr_hash_entry(struct inode *inode, static int ocfs2_xattr_extend_allocation(struct inode *inode, u32 clusters_to_add, struct buffer_head *xattr_bh, - struct ocfs2_xattr_value_root *xv) + struct ocfs2_xattr_value_root *xv, + struct ocfs2_xattr_set_ctxt *ctxt) { int status = 0; int restart_func = 0; int credits = 0; handle_t *handle = NULL; - struct ocfs2_alloc_context *data_ac = NULL; - struct ocfs2_alloc_context *meta_ac = NULL; enum ocfs2_alloc_restarted why; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); u32 prev_clusters, logical_start = le32_to_cpu(xv->xr_clusters); @@ -353,13 +360,6 @@ static int ocfs2_xattr_extend_allocation(struct inode *inode, restart_all: - status = ocfs2_lock_allocators(inode, &et, clusters_to_add, 0, - &data_ac, &meta_ac); - if (status) { - mlog_errno(status); - goto leave; - } - credits = ocfs2_calc_extend_credits(osb->sb, et.et_root_el, clusters_to_add); handle = ocfs2_start_trans(osb, credits); @@ -386,8 +386,8 @@ restarted_transaction: 0, &et, handle, - data_ac, - meta_ac, + ctxt->data_ac, + ctxt->meta_ac, &why); if ((status < 0) && (status != -EAGAIN)) { if (status != -ENOSPC) @@ -432,14 +432,6 @@ leave: ocfs2_commit_trans(osb, handle); handle = NULL; } - if (data_ac) { - ocfs2_free_alloc_context(data_ac); - data_ac = NULL; - } - if (meta_ac) { - ocfs2_free_alloc_context(meta_ac); - meta_ac = NULL; - } if ((!status) && restart_func) { restart_func = 0; goto restart_all; @@ -452,23 +444,16 @@ static int __ocfs2_remove_xattr_range(struct inode *inode, struct buffer_head *root_bh, struct ocfs2_xattr_value_root *xv, u32 cpos, u32 phys_cpos, u32 len, - struct ocfs2_cached_dealloc_ctxt *dealloc) + struct ocfs2_xattr_set_ctxt *ctxt) { int ret; u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos); struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); handle_t *handle; - struct ocfs2_alloc_context *meta_ac = NULL; struct ocfs2_extent_tree et; ocfs2_init_xattr_value_extent_tree(&et, inode, root_bh, xv); - ret = ocfs2_lock_allocators(inode, &et, 0, 1, NULL, &meta_ac); - if (ret) { - mlog_errno(ret); - return ret; - } - handle = ocfs2_start_trans(osb, OCFS2_REMOVE_EXTENT_CREDITS); if (IS_ERR(handle)) { ret = PTR_ERR(handle); @@ -483,8 +468,8 @@ static int __ocfs2_remove_xattr_range(struct inode *inode, goto out_commit; } - ret = ocfs2_remove_extent(inode, &et, cpos, len, handle, meta_ac, - dealloc); + ret = ocfs2_remove_extent(inode, &et, cpos, len, handle, ctxt->meta_ac, + &ctxt->dealloc); if (ret) { mlog_errno(ret); goto out_commit; @@ -498,17 +483,13 @@ static int __ocfs2_remove_xattr_range(struct inode *inode, goto out_commit; } - ret = ocfs2_cache_cluster_dealloc(dealloc, phys_blkno, len); + ret = ocfs2_cache_cluster_dealloc(&ctxt->dealloc, phys_blkno, len); if (ret) mlog_errno(ret); out_commit: ocfs2_commit_trans(osb, handle); out: - - if (meta_ac) - ocfs2_free_alloc_context(meta_ac); - return ret; } @@ -516,15 +497,12 @@ static int ocfs2_xattr_shrink_size(struct inode *inode, u32 old_clusters, u32 new_clusters, struct buffer_head *root_bh, - struct ocfs2_xattr_value_root *xv) + struct ocfs2_xattr_value_root *xv, + struct ocfs2_xattr_set_ctxt *ctxt) { int ret = 0; u32 trunc_len, cpos, phys_cpos, alloc_size; u64 block; - struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); - struct ocfs2_cached_dealloc_ctxt dealloc; - - ocfs2_init_dealloc_ctxt(&dealloc); if (old_clusters <= new_clusters) return 0; @@ -544,7 +522,7 @@ static int ocfs2_xattr_shrink_size(struct inode *inode, ret = __ocfs2_remove_xattr_range(inode, root_bh, xv, cpos, phys_cpos, alloc_size, - &dealloc); + ctxt); if (ret) { mlog_errno(ret); goto out; @@ -558,16 +536,14 @@ static int ocfs2_xattr_shrink_size(struct inode *inode, } out: - ocfs2_schedule_truncate_log_flush(osb, 1); - ocfs2_run_deallocs(osb, &dealloc); - return ret; } static int ocfs2_xattr_value_truncate(struct inode *inode, struct buffer_head *root_bh, struct ocfs2_xattr_value_root *xv, - int len) + int len, + struct ocfs2_xattr_set_ctxt *ctxt) { int ret; u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb, len); @@ -579,11 +555,11 @@ static int ocfs2_xattr_value_truncate(struct inode *inode, if (new_clusters > old_clusters) ret = ocfs2_xattr_extend_allocation(inode, new_clusters - old_clusters, - root_bh, xv); + root_bh, xv, ctxt); else ret = ocfs2_xattr_shrink_size(inode, old_clusters, new_clusters, - root_bh, xv); + root_bh, xv, ctxt); return ret; } @@ -1167,6 +1143,7 @@ out: static int ocfs2_xattr_set_value_outside(struct inode *inode, struct ocfs2_xattr_info *xi, struct ocfs2_xattr_search *xs, + struct ocfs2_xattr_set_ctxt *ctxt, size_t offs) { size_t name_len = strlen(xi->name); @@ -1186,7 +1163,7 @@ static int ocfs2_xattr_set_value_outside(struct inode *inode, xv->xr_list.l_next_free_rec = 0; ret = ocfs2_xattr_value_truncate(inode, xs->xattr_bh, xv, - xi->value_len); + xi->value_len, ctxt); if (ret < 0) { mlog_errno(ret); return ret; @@ -1317,6 +1294,7 @@ static void ocfs2_xattr_set_entry_local(struct inode *inode, static int ocfs2_xattr_set_entry(struct inode *inode, struct ocfs2_xattr_info *xi, struct ocfs2_xattr_search *xs, + struct ocfs2_xattr_set_ctxt *ctxt, int flag) { struct ocfs2_xattr_entry *last; @@ -1387,7 +1365,7 @@ static int ocfs2_xattr_set_entry(struct inode *inode, if (ocfs2_xattr_is_local(xs->here) && size == size_l) { /* Replace existing local xattr with tree root */ ret = ocfs2_xattr_set_value_outside(inode, xi, xs, - offs); + ctxt, offs); if (ret < 0) mlog_errno(ret); goto out; @@ -1406,7 +1384,8 @@ static int ocfs2_xattr_set_entry(struct inode *inode, ret = ocfs2_xattr_value_truncate(inode, xs->xattr_bh, xv, - xi->value_len); + xi->value_len, + ctxt); if (ret < 0) { mlog_errno(ret); goto out; @@ -1436,7 +1415,8 @@ static int ocfs2_xattr_set_entry(struct inode *inode, ret = ocfs2_xattr_value_truncate(inode, xs->xattr_bh, xv, - 0); + 0, + ctxt); if (ret < 0) mlog_errno(ret); } @@ -1531,7 +1511,7 @@ out_commit: * This is the second step for value size > INLINE_SIZE. */ size_t offs = le16_to_cpu(xs->here->xe_name_offset); - ret = ocfs2_xattr_set_value_outside(inode, xi, xs, offs); + ret = ocfs2_xattr_set_value_outside(inode, xi, xs, ctxt, offs); if (ret < 0) { int ret2; @@ -1555,6 +1535,10 @@ static int ocfs2_remove_value_outside(struct inode*inode, struct ocfs2_xattr_header *header) { int ret = 0, i; + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); + struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, }; + + ocfs2_init_dealloc_ctxt(&ctxt.dealloc); for (i = 0; i < le16_to_cpu(header->xh_count); i++) { struct ocfs2_xattr_entry *entry = &header->xh_entries[i]; @@ -1567,14 +1551,17 @@ static int ocfs2_remove_value_outside(struct inode*inode, le16_to_cpu(entry->xe_name_offset); xv = (struct ocfs2_xattr_value_root *) (val + OCFS2_XATTR_SIZE(entry->xe_name_len)); - ret = ocfs2_xattr_value_truncate(inode, bh, xv, 0); + ret = ocfs2_xattr_value_truncate(inode, bh, xv, + 0, &ctxt); if (ret < 0) { mlog_errno(ret); - return ret; + break; } } } + ocfs2_schedule_truncate_log_flush(osb, 1); + ocfs2_run_deallocs(osb, &ctxt.dealloc); return ret; } @@ -1836,7 +1823,8 @@ static int ocfs2_xattr_ibody_find(struct inode *inode, */ static int ocfs2_xattr_ibody_set(struct inode *inode, struct ocfs2_xattr_info *xi, - struct ocfs2_xattr_search *xs) + struct ocfs2_xattr_search *xs, + struct ocfs2_xattr_set_ctxt *ctxt) { struct ocfs2_inode_info *oi = OCFS2_I(inode); struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data; @@ -1853,7 +1841,7 @@ static int ocfs2_xattr_ibody_set(struct inode *inode, } } - ret = ocfs2_xattr_set_entry(inode, xi, xs, + ret = ocfs2_xattr_set_entry(inode, xi, xs, ctxt, (OCFS2_INLINE_XATTR_FL | OCFS2_HAS_XATTR_FL)); out: up_write(&oi->ip_alloc_sem); @@ -1926,12 +1914,12 @@ cleanup: */ static int ocfs2_xattr_block_set(struct inode *inode, struct ocfs2_xattr_info *xi, - struct ocfs2_xattr_search *xs) + struct ocfs2_xattr_search *xs, + struct ocfs2_xattr_set_ctxt *ctxt) { struct buffer_head *new_bh = NULL; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data; - struct ocfs2_alloc_context *meta_ac = NULL; handle_t *handle = NULL; struct ocfs2_xattr_block *xblk = NULL; u16 suballoc_bit_start; @@ -1940,15 +1928,6 @@ static int ocfs2_xattr_block_set(struct inode *inode, int ret; if (!xs->xattr_bh) { - /* - * Alloc one external block for extended attribute - * outside of inode. - */ - ret = ocfs2_reserve_new_metadata_blocks(osb, 1, &meta_ac); - if (ret < 0) { - mlog_errno(ret); - goto out; - } handle = ocfs2_start_trans(osb, OCFS2_XATTR_BLOCK_CREATE_CREDITS); if (IS_ERR(handle)) { @@ -1963,7 +1942,7 @@ static int ocfs2_xattr_block_set(struct inode *inode, goto out_commit; } - ret = ocfs2_claim_metadata(osb, handle, meta_ac, 1, + ret = ocfs2_claim_metadata(osb, handle, ctxt->meta_ac, 1, &suballoc_bit_start, &num_got, &first_blkno); if (ret < 0) { @@ -1996,7 +1975,6 @@ static int ocfs2_xattr_block_set(struct inode *inode, xs->end = (void *)xblk + inode->i_sb->s_blocksize; xs->here = xs->header->xh_entries; - ret = ocfs2_journal_dirty(handle, new_bh); if (ret < 0) { mlog_errno(ret); @@ -2009,8 +1987,6 @@ static int ocfs2_xattr_block_set(struct inode *inode, out_commit: ocfs2_commit_trans(osb, handle); out: - if (meta_ac) - ocfs2_free_alloc_context(meta_ac); if (ret < 0) return ret; } else @@ -2018,22 +1994,266 @@ out: if (!(le16_to_cpu(xblk->xb_flags) & OCFS2_XATTR_INDEXED)) { /* Set extended attribute into external block */ - ret = ocfs2_xattr_set_entry(inode, xi, xs, OCFS2_HAS_XATTR_FL); + ret = ocfs2_xattr_set_entry(inode, xi, xs, ctxt, + OCFS2_HAS_XATTR_FL); if (!ret || ret != -ENOSPC) goto end; - ret = ocfs2_xattr_create_index_block(inode, xs); + ret = ocfs2_xattr_create_index_block(inode, xs, ctxt); if (ret) goto end; } - ret = ocfs2_xattr_set_entry_index_block(inode, xi, xs); + ret = ocfs2_xattr_set_entry_index_block(inode, xi, xs, ctxt); end: return ret; } +/* Check whether the new xattr can be inserted into the inode. */ +static int ocfs2_xattr_can_be_in_inode(struct inode *inode, + struct ocfs2_xattr_info *xi, + struct ocfs2_xattr_search *xs) +{ + u64 value_size; + struct ocfs2_xattr_entry *last; + int free, i; + size_t min_offs = xs->end - xs->base; + + if (!xs->header) + return 0; + + last = xs->header->xh_entries; + + for (i = 0; i < le16_to_cpu(xs->header->xh_count); i++) { + size_t offs = le16_to_cpu(last->xe_name_offset); + if (offs < min_offs) + min_offs = offs; + last += 1; + } + + free = min_offs - ((void *)last - xs->base) - sizeof(__u32); + if (free < 0) + return 0; + + BUG_ON(!xs->not_found); + + if (xi->value_len > OCFS2_XATTR_INLINE_SIZE) + value_size = OCFS2_XATTR_ROOT_SIZE; + else + value_size = OCFS2_XATTR_SIZE(xi->value_len); + + if (free >= sizeof(struct ocfs2_xattr_entry) + + OCFS2_XATTR_SIZE(strlen(xi->name)) + value_size) + return 1; + + return 0; +} + +static int ocfs2_calc_xattr_set_need(struct inode *inode, + struct ocfs2_dinode *di, + struct ocfs2_xattr_info *xi, + struct ocfs2_xattr_search *xis, + struct ocfs2_xattr_search *xbs, + int *clusters_need, + int *meta_need) +{ + int ret = 0, old_in_xb = 0; + int clusters_add = 0, meta_add = 0; + struct buffer_head *bh = NULL; + struct ocfs2_xattr_block *xb = NULL; + struct ocfs2_xattr_entry *xe = NULL; + struct ocfs2_xattr_value_root *xv = NULL; + char *base = NULL; + int name_offset, name_len = 0; + u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb, + xi->value_len); + u64 value_size; + + /* + * delete a xattr doesn't need metadata and cluster allocation. + * so return. + */ + if (!xi->value) + goto out; + + if (xis->not_found && xbs->not_found) { + if (xi->value_len > OCFS2_XATTR_INLINE_SIZE) + clusters_add += new_clusters; + + goto meta_guess; + } + + if (!xis->not_found) { + xe = xis->here; + name_offset = le16_to_cpu(xe->xe_name_offset); + name_len = OCFS2_XATTR_SIZE(xe->xe_name_len); + base = xis->base; + } else { + int i, block_off; + xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data; + xe = xbs->here; + name_offset = le16_to_cpu(xe->xe_name_offset); + name_len = OCFS2_XATTR_SIZE(xe->xe_name_len); + i = xbs->here - xbs->header->xh_entries; + old_in_xb = 1; + + if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) { + ret = ocfs2_xattr_bucket_get_name_value(inode, + bucket_xh(xbs->bucket), + i, &block_off, + &name_offset); + base = bucket_block(xbs->bucket, block_off); + } else + base = xbs->base; + } + + /* do cluster allocation guess first. */ + value_size = le64_to_cpu(xe->xe_value_size); + + if (old_in_xb) { + /* + * In xattr set, we always try to set the xe in inode first, + * so if it can be inserted into inode successfully, the old + * one will be removed from the xattr block, and this xattr + * will be inserted into inode as a new xattr in inode. + */ + if (ocfs2_xattr_can_be_in_inode(inode, xi, xis)) { + clusters_add += new_clusters; + goto out; + } + } + + if (xi->value_len > OCFS2_XATTR_INLINE_SIZE) { + /* the new values will be stored outside. */ + u32 old_clusters = 0; + + if (!ocfs2_xattr_is_local(xe)) { + old_clusters = ocfs2_clusters_for_bytes(inode->i_sb, + value_size); + xv = (struct ocfs2_xattr_value_root *) + (base + name_offset + name_len); + } else + xv = &def_xv.xv; + + if (old_clusters >= new_clusters) + goto out; + else { + meta_add += ocfs2_extend_meta_needed(&xv->xr_list); + clusters_add += new_clusters - old_clusters; + goto out; + } + } else { + /* + * Now the new value will be stored inside. So if the new + * value is smaller than the size of value root or the old + * value, we don't need any allocation, otherwise we have + * to guess metadata allocation. + */ + if ((ocfs2_xattr_is_local(xe) && value_size >= xi->value_len) || + (!ocfs2_xattr_is_local(xe) && + OCFS2_XATTR_ROOT_SIZE >= xi->value_len)) + goto out; + } + +meta_guess: + /* calculate metadata allocation. */ + if (di->i_xattr_loc) { + if (!xbs->xattr_bh) { + ret = ocfs2_read_block(inode, + le64_to_cpu(di->i_xattr_loc), + &bh); + if (ret) { + mlog_errno(ret); + goto out; + } + + xb = (struct ocfs2_xattr_block *)bh->b_data; + } else + xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data; + + if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) { + struct ocfs2_extent_list *el = + &xb->xb_attrs.xb_root.xt_list; + meta_add += ocfs2_extend_meta_needed(el); + } + + /* + * This cluster will be used either for new bucket or for + * new xattr block. + * If the cluster size is the same as the bucket size, one + * more is needed since we may need to extend the bucket + * also. + */ + clusters_add += 1; + if (OCFS2_XATTR_BUCKET_SIZE == + OCFS2_SB(inode->i_sb)->s_clustersize) + clusters_add += 1; + } else + meta_add += 1; +out: + if (clusters_need) + *clusters_need = clusters_add; + if (meta_need) + *meta_need = meta_add; + brelse(bh); + return ret; +} + +static int ocfs2_init_xattr_set_ctxt(struct inode *inode, + struct ocfs2_dinode *di, + struct ocfs2_xattr_info *xi, + struct ocfs2_xattr_search *xis, + struct ocfs2_xattr_search *xbs, + struct ocfs2_xattr_set_ctxt *ctxt) +{ + int clusters_add, meta_add, ret; + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); + + memset(ctxt, 0, sizeof(struct ocfs2_xattr_set_ctxt)); + + ocfs2_init_dealloc_ctxt(&ctxt->dealloc); + + ret = ocfs2_calc_xattr_set_need(inode, di, xi, xis, xbs, + &clusters_add, &meta_add); + if (ret) { + mlog_errno(ret); + return ret; + } + + mlog(0, "Set xattr %s, reserve meta blocks = %d, clusters = %d\n", + xi->name, meta_add, clusters_add); + + if (meta_add) { + ret = ocfs2_reserve_new_metadata_blocks(osb, meta_add, + &ctxt->meta_ac); + if (ret) { + mlog_errno(ret); + goto out; + } + } + + if (clusters_add) { + ret = ocfs2_reserve_clusters(osb, clusters_add, &ctxt->data_ac); + if (ret) + mlog_errno(ret); + } +out: + if (ret) { + if (ctxt->meta_ac) { + ocfs2_free_alloc_context(ctxt->meta_ac); + ctxt->meta_ac = NULL; + } + + /* + * We cannot have an error and a non null ctxt->data_ac. + */ + } + + return ret; +} + /* * ocfs2_xattr_set() * @@ -2051,6 +2271,8 @@ int ocfs2_xattr_set(struct inode *inode, struct buffer_head *di_bh = NULL; struct ocfs2_dinode *di; int ret; + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); + struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, }; struct ocfs2_xattr_info xi = { .name_index = name_index, @@ -2115,15 +2337,21 @@ int ocfs2_xattr_set(struct inode *inode, goto cleanup; } + ret = ocfs2_init_xattr_set_ctxt(inode, di, &xi, &xis, &xbs, &ctxt); + if (ret) { + mlog_errno(ret); + goto cleanup; + } + if (!value) { /* Remove existing extended attribute */ if (!xis.not_found) - ret = ocfs2_xattr_ibody_set(inode, &xi, &xis); + ret = ocfs2_xattr_ibody_set(inode, &xi, &xis, &ctxt); else if (!xbs.not_found) - ret = ocfs2_xattr_block_set(inode, &xi, &xbs); + ret = ocfs2_xattr_block_set(inode, &xi, &xbs, &ctxt); } else { /* We always try to set extended attribute into inode first*/ - ret = ocfs2_xattr_ibody_set(inode, &xi, &xis); + ret = ocfs2_xattr_ibody_set(inode, &xi, &xis, &ctxt); if (!ret && !xbs.not_found) { /* * If succeed and that extended attribute existing in @@ -2131,7 +2359,7 @@ int ocfs2_xattr_set(struct inode *inode, */ xi.value = NULL; xi.value_len = 0; - ret = ocfs2_xattr_block_set(inode, &xi, &xbs); + ret = ocfs2_xattr_block_set(inode, &xi, &xbs, &ctxt); } else if (ret == -ENOSPC) { if (di->i_xattr_loc && !xbs.xattr_bh) { ret = ocfs2_xattr_block_find(inode, name_index, @@ -2143,9 +2371,9 @@ int ocfs2_xattr_set(struct inode *inode, * If no space in inode, we will set extended attribute * into external block. */ - ret = ocfs2_xattr_block_set(inode, &xi, &xbs); + ret = ocfs2_xattr_block_set(inode, &xi, &xbs, &ctxt); if (ret) - goto cleanup; + goto free; if (!xis.not_found) { /* * If succeed and that extended attribute @@ -2153,10 +2381,19 @@ int ocfs2_xattr_set(struct inode *inode, */ xi.value = NULL; xi.value_len = 0; - ret = ocfs2_xattr_ibody_set(inode, &xi, &xis); + ret = ocfs2_xattr_ibody_set(inode, &xi, + &xis, &ctxt); } } } +free: + if (ctxt.data_ac) + ocfs2_free_alloc_context(ctxt.data_ac); + if (ctxt.meta_ac) + ocfs2_free_alloc_context(ctxt.meta_ac); + if (ocfs2_dealloc_has_cluster(&ctxt.dealloc)) + ocfs2_schedule_truncate_log_flush(osb, 1); + ocfs2_run_deallocs(osb, &ctxt.dealloc); cleanup: up_write(&OCFS2_I(inode)->ip_xattr_sem); ocfs2_inode_unlock(inode, 1); @@ -2734,7 +2971,8 @@ static void ocfs2_xattr_update_xattr_search(struct inode *inode, } static int ocfs2_xattr_create_index_block(struct inode *inode, - struct ocfs2_xattr_search *xs) + struct ocfs2_xattr_search *xs, + struct ocfs2_xattr_set_ctxt *ctxt) { int ret, credits = OCFS2_SUBALLOC_ALLOC; u32 bit_off, len; @@ -2742,7 +2980,6 @@ static int ocfs2_xattr_create_index_block(struct inode *inode, handle_t *handle; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); struct ocfs2_inode_info *oi = OCFS2_I(inode); - struct ocfs2_alloc_context *data_ac; struct buffer_head *xb_bh = xs->xattr_bh; struct ocfs2_xattr_block *xb = (struct ocfs2_xattr_block *)xb_bh->b_data; @@ -2755,12 +2992,6 @@ static int ocfs2_xattr_create_index_block(struct inode *inode, BUG_ON(xb_flags & OCFS2_XATTR_INDEXED); BUG_ON(!xs->bucket); - ret = ocfs2_reserve_clusters(osb, 1, &data_ac); - if (ret) { - mlog_errno(ret); - goto out; - } - /* * XXX: * We can use this lock for now, and maybe move to a dedicated mutex @@ -2787,7 +3018,8 @@ static int ocfs2_xattr_create_index_block(struct inode *inode, goto out_commit; } - ret = ocfs2_claim_clusters(osb, handle, data_ac, 1, &bit_off, &len); + ret = __ocfs2_claim_clusters(osb, handle, ctxt->data_ac, + 1, 1, &bit_off, &len); if (ret) { mlog_errno(ret); goto out_commit; @@ -2850,10 +3082,6 @@ out_commit: out_sem: up_write(&oi->ip_alloc_sem); -out: - if (data_ac) - ocfs2_free_alloc_context(data_ac); - return ret; } @@ -3614,7 +3842,8 @@ static int ocfs2_add_new_xattr_cluster(struct inode *inode, u32 *num_clusters, u32 prev_cpos, u64 prev_blkno, - int *extend) + int *extend, + struct ocfs2_xattr_set_ctxt *ctxt) { int ret, credits; u16 bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1); @@ -3622,8 +3851,6 @@ static int ocfs2_add_new_xattr_cluster(struct inode *inode, u32 clusters_to_add = 1, bit_off, num_bits, v_start = 0; u64 block; handle_t *handle = NULL; - struct ocfs2_alloc_context *data_ac = NULL; - struct ocfs2_alloc_context *meta_ac = NULL; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); struct ocfs2_extent_tree et; @@ -3634,13 +3861,6 @@ static int ocfs2_add_new_xattr_cluster(struct inode *inode, ocfs2_init_xattr_tree_extent_tree(&et, inode, root_bh); - ret = ocfs2_lock_allocators(inode, &et, clusters_to_add, 0, - &data_ac, &meta_ac); - if (ret) { - mlog_errno(ret); - goto leave; - } - credits = ocfs2_calc_extend_credits(osb->sb, et.et_root_el, clusters_to_add); handle = ocfs2_start_trans(osb, credits); @@ -3658,7 +3878,7 @@ static int ocfs2_add_new_xattr_cluster(struct inode *inode, goto leave; } - ret = __ocfs2_claim_clusters(osb, handle, data_ac, 1, + ret = __ocfs2_claim_clusters(osb, handle, ctxt->data_ac, 1, clusters_to_add, &bit_off, &num_bits); if (ret < 0) { if (ret != -ENOSPC) @@ -3719,7 +3939,7 @@ static int ocfs2_add_new_xattr_cluster(struct inode *inode, mlog(0, "Insert %u clusters at block %llu for xattr at %u\n", num_bits, (unsigned long long)block, v_start); ret = ocfs2_insert_extent(osb, handle, inode, &et, v_start, block, - num_bits, 0, meta_ac); + num_bits, 0, ctxt->meta_ac); if (ret < 0) { mlog_errno(ret); goto leave; @@ -3734,10 +3954,6 @@ static int ocfs2_add_new_xattr_cluster(struct inode *inode, leave: if (handle) ocfs2_commit_trans(osb, handle); - if (data_ac) - ocfs2_free_alloc_context(data_ac); - if (meta_ac) - ocfs2_free_alloc_context(meta_ac); return ret; } @@ -3821,7 +4037,8 @@ out: */ static int ocfs2_add_new_xattr_bucket(struct inode *inode, struct buffer_head *xb_bh, - struct buffer_head *header_bh) + struct buffer_head *header_bh, + struct ocfs2_xattr_set_ctxt *ctxt) { struct ocfs2_xattr_header *first_xh = NULL; struct buffer_head *first_bh = NULL; @@ -3872,7 +4089,8 @@ static int ocfs2_add_new_xattr_bucket(struct inode *inode, &num_clusters, e_cpos, p_blkno, - &extend); + &extend, + ctxt); if (ret) { mlog_errno(ret); goto out; @@ -4147,7 +4365,8 @@ out: static int ocfs2_xattr_bucket_value_truncate(struct inode *inode, struct buffer_head *header_bh, int xe_off, - int len) + int len, + struct ocfs2_xattr_set_ctxt *ctxt) { int ret, offset; u64 value_blk; @@ -4182,7 +4401,7 @@ static int ocfs2_xattr_bucket_value_truncate(struct inode *inode, mlog(0, "truncate %u in xattr bucket %llu to %d bytes.\n", xe_off, (unsigned long long)header_bh->b_blocknr, len); - ret = ocfs2_xattr_value_truncate(inode, value_bh, xv, len); + ret = ocfs2_xattr_value_truncate(inode, value_bh, xv, len, ctxt); if (ret) { mlog_errno(ret); goto out; @@ -4200,8 +4419,9 @@ out: } static int ocfs2_xattr_bucket_value_truncate_xs(struct inode *inode, - struct ocfs2_xattr_search *xs, - int len) + struct ocfs2_xattr_search *xs, + int len, + struct ocfs2_xattr_set_ctxt *ctxt) { int ret, offset; struct ocfs2_xattr_entry *xe = xs->here; @@ -4211,7 +4431,7 @@ static int ocfs2_xattr_bucket_value_truncate_xs(struct inode *inode, offset = xe - xh->xh_entries; ret = ocfs2_xattr_bucket_value_truncate(inode, xs->bucket->bu_bhs[0], - offset, len); + offset, len, ctxt); if (ret) mlog_errno(ret); @@ -4375,7 +4595,8 @@ out_commit: */ static int ocfs2_xattr_set_in_bucket(struct inode *inode, struct ocfs2_xattr_info *xi, - struct ocfs2_xattr_search *xs) + struct ocfs2_xattr_search *xs, + struct ocfs2_xattr_set_ctxt *ctxt) { int ret, local = 1; size_t value_len; @@ -4403,7 +4624,8 @@ static int ocfs2_xattr_set_in_bucket(struct inode *inode, value_len = 0; ret = ocfs2_xattr_bucket_value_truncate_xs(inode, xs, - value_len); + value_len, + ctxt); if (ret) goto out; @@ -4434,7 +4656,7 @@ static int ocfs2_xattr_set_in_bucket(struct inode *inode, /* allocate the space now for the outside block storage. */ ret = ocfs2_xattr_bucket_value_truncate_xs(inode, xs, - value_len); + value_len, ctxt); if (ret) { mlog_errno(ret); @@ -4485,7 +4707,8 @@ static int ocfs2_check_xattr_bucket_collision(struct inode *inode, static int ocfs2_xattr_set_entry_index_block(struct inode *inode, struct ocfs2_xattr_info *xi, - struct ocfs2_xattr_search *xs) + struct ocfs2_xattr_search *xs, + struct ocfs2_xattr_set_ctxt *ctxt) { struct ocfs2_xattr_header *xh; struct ocfs2_xattr_entry *xe; @@ -4603,7 +4826,8 @@ try_again: ret = ocfs2_add_new_xattr_bucket(inode, xs->xattr_bh, - xs->bucket->bu_bhs[0]); + xs->bucket->bu_bhs[0], + ctxt); if (ret) { mlog_errno(ret); goto out; @@ -4622,7 +4846,7 @@ try_again: } xattr_set: - ret = ocfs2_xattr_set_in_bucket(inode, xi, xs); + ret = ocfs2_xattr_set_in_bucket(inode, xi, xs, ctxt); out: mlog_exit(ret); return ret; @@ -4636,6 +4860,10 @@ static int ocfs2_delete_xattr_in_bucket(struct inode *inode, struct ocfs2_xattr_header *xh = bucket_xh(bucket); u16 i; struct ocfs2_xattr_entry *xe; + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); + struct ocfs2_xattr_set_ctxt ctxt = {NULL, NULL,}; + + ocfs2_init_dealloc_ctxt(&ctxt.dealloc); for (i = 0; i < le16_to_cpu(xh->xh_count); i++) { xe = &xh->xh_entries[i]; @@ -4644,13 +4872,16 @@ static int ocfs2_delete_xattr_in_bucket(struct inode *inode, ret = ocfs2_xattr_bucket_value_truncate(inode, bucket->bu_bhs[0], - i, 0); + i, 0, &ctxt); if (ret) { mlog_errno(ret); break; } } + ocfs2_schedule_truncate_log_flush(osb, 1); + ocfs2_run_deallocs(osb, &ctxt.dealloc); + return ret; } -- cgit v1.2.3-70-g09d2 From 85db90e77806d48a19fda77dabe8897d369a1710 Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Wed, 12 Nov 2008 08:27:01 +0800 Subject: ocfs2/xattr: Merge xattr set transaction. In current ocfs2/xattr, the whole xattr set is divided into many steps are many transaction are used, this make the xattr set process isn't like a real transaction, so this patch try to merge all the transaction into one. Another benefit is that acl can use it easily now. I don't merge the transaction of deleting xattr when we remove an inode. The reason is that if we have a large number of xattrs and every xattrs has large values(large enough for outside storage), the whole transaction will be very huge and it looks like jbd can't handle it(I meet with a jbd complain once). And the old inode removal is also divided into many steps, so I'd like to leave as it is. Note: In xattr set, I try to avoid ocfs2_extend_trans since if the credits aren't enough for the extension, it will commit all the dirty blocks and create a new transaction which may lead to inconsistency in metadata. All ocfs2_extend_trans remained are safe now. Signed-off-by: Tao Ma Signed-off-by: Mark Fasheh --- fs/ocfs2/xattr.c | 673 +++++++++++++++++++++++++++---------------------------- 1 file changed, 325 insertions(+), 348 deletions(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 4fd201a54c7..7a9089255a8 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -72,6 +72,7 @@ struct ocfs2_xattr_bucket { }; struct ocfs2_xattr_set_ctxt { + handle_t *handle; struct ocfs2_alloc_context *meta_ac; struct ocfs2_alloc_context *data_ac; struct ocfs2_cached_dealloc_ctxt dealloc; @@ -346,9 +347,7 @@ static int ocfs2_xattr_extend_allocation(struct inode *inode, struct ocfs2_xattr_set_ctxt *ctxt) { int status = 0; - int restart_func = 0; - int credits = 0; - handle_t *handle = NULL; + handle_t *handle = ctxt->handle; enum ocfs2_alloc_restarted why; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); u32 prev_clusters, logical_start = le32_to_cpu(xv->xr_clusters); @@ -358,19 +357,6 @@ static int ocfs2_xattr_extend_allocation(struct inode *inode, ocfs2_init_xattr_value_extent_tree(&et, inode, xattr_bh, xv); -restart_all: - - credits = ocfs2_calc_extend_credits(osb->sb, et.et_root_el, - clusters_to_add); - handle = ocfs2_start_trans(osb, credits); - if (IS_ERR(handle)) { - status = PTR_ERR(handle); - handle = NULL; - mlog_errno(status); - goto leave; - } - -restarted_transaction: status = ocfs2_journal_access(handle, inode, xattr_bh, OCFS2_JOURNAL_ACCESS_WRITE); if (status < 0) { @@ -389,9 +375,8 @@ restarted_transaction: ctxt->data_ac, ctxt->meta_ac, &why); - if ((status < 0) && (status != -EAGAIN)) { - if (status != -ENOSPC) - mlog_errno(status); + if (status < 0) { + mlog_errno(status); goto leave; } @@ -403,39 +388,13 @@ restarted_transaction: clusters_to_add -= le32_to_cpu(xv->xr_clusters) - prev_clusters; - if (why != RESTART_NONE && clusters_to_add) { - if (why == RESTART_META) { - mlog(0, "restarting function.\n"); - restart_func = 1; - } else { - BUG_ON(why != RESTART_TRANS); - - mlog(0, "restarting transaction.\n"); - /* TODO: This can be more intelligent. */ - credits = ocfs2_calc_extend_credits(osb->sb, - et.et_root_el, - clusters_to_add); - status = ocfs2_extend_trans(handle, credits); - if (status < 0) { - /* handle still has to be committed at - * this point. */ - status = -ENOMEM; - mlog_errno(status); - goto leave; - } - goto restarted_transaction; - } - } + /* + * We should have already allocated enough space before the transaction, + * so no need to restart. + */ + BUG_ON(why != RESTART_NONE || clusters_to_add); leave: - if (handle) { - ocfs2_commit_trans(osb, handle); - handle = NULL; - } - if ((!status) && restart_func) { - restart_func = 0; - goto restart_all; - } return status; } @@ -448,31 +407,23 @@ static int __ocfs2_remove_xattr_range(struct inode *inode, { int ret; u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos); - struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); - handle_t *handle; + handle_t *handle = ctxt->handle; struct ocfs2_extent_tree et; ocfs2_init_xattr_value_extent_tree(&et, inode, root_bh, xv); - handle = ocfs2_start_trans(osb, OCFS2_REMOVE_EXTENT_CREDITS); - if (IS_ERR(handle)) { - ret = PTR_ERR(handle); - mlog_errno(ret); - goto out; - } - ret = ocfs2_journal_access(handle, inode, root_bh, OCFS2_JOURNAL_ACCESS_WRITE); if (ret) { mlog_errno(ret); - goto out_commit; + goto out; } ret = ocfs2_remove_extent(inode, &et, cpos, len, handle, ctxt->meta_ac, &ctxt->dealloc); if (ret) { mlog_errno(ret); - goto out_commit; + goto out; } le32_add_cpu(&xv->xr_clusters, -len); @@ -480,15 +431,13 @@ static int __ocfs2_remove_xattr_range(struct inode *inode, ret = ocfs2_journal_dirty(handle, root_bh); if (ret) { mlog_errno(ret); - goto out_commit; + goto out; } ret = ocfs2_cache_cluster_dealloc(&ctxt->dealloc, phys_blkno, len); if (ret) mlog_errno(ret); -out_commit: - ocfs2_commit_trans(osb, handle); out: return ret; } @@ -975,6 +924,7 @@ static int ocfs2_xattr_get(struct inode *inode, } static int __ocfs2_xattr_set_value_outside(struct inode *inode, + handle_t *handle, struct ocfs2_xattr_value_root *xv, const void *value, int value_len) @@ -986,14 +936,17 @@ static int __ocfs2_xattr_set_value_outside(struct inode *inode, u32 clusters = ocfs2_clusters_for_bytes(inode->i_sb, value_len); u64 blkno; struct buffer_head *bh = NULL; - handle_t *handle; BUG_ON(clusters > le32_to_cpu(xv->xr_clusters)); + /* + * In __ocfs2_xattr_set_value_outside has already been dirtied, + * so we don't need to worry about whether ocfs2_extend_trans + * will create a new transactio for us or not. + */ credits = clusters * bpc; - handle = ocfs2_start_trans(OCFS2_SB(inode->i_sb), credits); - if (IS_ERR(handle)) { - ret = PTR_ERR(handle); + ret = ocfs2_extend_trans(handle, credits); + if (ret) { mlog_errno(ret); goto out; } @@ -1003,7 +956,7 @@ static int __ocfs2_xattr_set_value_outside(struct inode *inode, &num_clusters, &xv->xr_list); if (ret) { mlog_errno(ret); - goto out_commit; + goto out; } blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster); @@ -1012,7 +965,7 @@ static int __ocfs2_xattr_set_value_outside(struct inode *inode, ret = ocfs2_read_block(inode, blkno, &bh); if (ret) { mlog_errno(ret); - goto out_commit; + goto out; } ret = ocfs2_journal_access(handle, @@ -1021,7 +974,7 @@ static int __ocfs2_xattr_set_value_outside(struct inode *inode, OCFS2_JOURNAL_ACCESS_WRITE); if (ret < 0) { mlog_errno(ret); - goto out_commit; + goto out; } cp_len = value_len > blocksize ? blocksize : value_len; @@ -1035,7 +988,7 @@ static int __ocfs2_xattr_set_value_outside(struct inode *inode, ret = ocfs2_journal_dirty(handle, bh); if (ret < 0) { mlog_errno(ret); - goto out_commit; + goto out; } brelse(bh); bh = NULL; @@ -1049,8 +1002,6 @@ static int __ocfs2_xattr_set_value_outside(struct inode *inode, } cpos += num_clusters; } -out_commit: - ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); out: brelse(bh); @@ -1058,28 +1009,21 @@ out: } static int ocfs2_xattr_cleanup(struct inode *inode, + handle_t *handle, struct ocfs2_xattr_info *xi, struct ocfs2_xattr_search *xs, size_t offs) { - handle_t *handle = NULL; int ret = 0; size_t name_len = strlen(xi->name); void *val = xs->base + offs; size_t size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE; - handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)), - OCFS2_XATTR_BLOCK_UPDATE_CREDITS); - if (IS_ERR(handle)) { - ret = PTR_ERR(handle); - mlog_errno(ret); - goto out; - } ret = ocfs2_journal_access(handle, inode, xs->xattr_bh, OCFS2_JOURNAL_ACCESS_WRITE); if (ret) { mlog_errno(ret); - goto out_commit; + goto out; } /* Decrease xattr count */ le16_add_cpu(&xs->header->xh_count, -1); @@ -1090,32 +1034,23 @@ static int ocfs2_xattr_cleanup(struct inode *inode, ret = ocfs2_journal_dirty(handle, xs->xattr_bh); if (ret < 0) mlog_errno(ret); -out_commit: - ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); out: return ret; } static int ocfs2_xattr_update_entry(struct inode *inode, + handle_t *handle, struct ocfs2_xattr_info *xi, struct ocfs2_xattr_search *xs, size_t offs) { - handle_t *handle = NULL; - int ret = 0; + int ret; - handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)), - OCFS2_XATTR_BLOCK_UPDATE_CREDITS); - if (IS_ERR(handle)) { - ret = PTR_ERR(handle); - mlog_errno(ret); - goto out; - } ret = ocfs2_journal_access(handle, inode, xs->xattr_bh, OCFS2_JOURNAL_ACCESS_WRITE); if (ret) { mlog_errno(ret); - goto out_commit; + goto out; } xs->here->xe_name_offset = cpu_to_le16(offs); @@ -1129,8 +1064,6 @@ static int ocfs2_xattr_update_entry(struct inode *inode, ret = ocfs2_journal_dirty(handle, xs->xattr_bh); if (ret < 0) mlog_errno(ret); -out_commit: - ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); out: return ret; } @@ -1168,13 +1101,13 @@ static int ocfs2_xattr_set_value_outside(struct inode *inode, mlog_errno(ret); return ret; } - ret = __ocfs2_xattr_set_value_outside(inode, xv, xi->value, - xi->value_len); + ret = ocfs2_xattr_update_entry(inode, ctxt->handle, xi, xs, offs); if (ret < 0) { mlog_errno(ret); return ret; } - ret = ocfs2_xattr_update_entry(inode, xi, xs, offs); + ret = __ocfs2_xattr_set_value_outside(inode, ctxt->handle, xv, + xi->value, xi->value_len); if (ret < 0) mlog_errno(ret); @@ -1302,7 +1235,7 @@ static int ocfs2_xattr_set_entry(struct inode *inode, struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data; size_t min_offs = xs->end - xs->base, name_len = strlen(xi->name); size_t size_l = 0; - handle_t *handle = NULL; + handle_t *handle = ctxt->handle; int free, i, ret; struct ocfs2_xattr_info xi_l = { .name_index = xi->name_index, @@ -1391,19 +1324,21 @@ static int ocfs2_xattr_set_entry(struct inode *inode, goto out; } - ret = __ocfs2_xattr_set_value_outside(inode, - xv, - xi->value, - xi->value_len); + ret = ocfs2_xattr_update_entry(inode, + handle, + xi, + xs, + offs); if (ret < 0) { mlog_errno(ret); goto out; } - ret = ocfs2_xattr_update_entry(inode, - xi, - xs, - offs); + ret = __ocfs2_xattr_set_value_outside(inode, + handle, + xv, + xi->value, + xi->value_len); if (ret < 0) mlog_errno(ret); goto out; @@ -1413,45 +1348,29 @@ static int ocfs2_xattr_set_entry(struct inode *inode, * just trucate old value to zero. */ ret = ocfs2_xattr_value_truncate(inode, - xs->xattr_bh, - xv, - 0, - ctxt); + xs->xattr_bh, + xv, + 0, + ctxt); if (ret < 0) mlog_errno(ret); } } } - handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)), - OCFS2_INODE_UPDATE_CREDITS); - if (IS_ERR(handle)) { - ret = PTR_ERR(handle); - mlog_errno(ret); - goto out; - } - ret = ocfs2_journal_access(handle, inode, xs->inode_bh, OCFS2_JOURNAL_ACCESS_WRITE); if (ret) { mlog_errno(ret); - goto out_commit; + goto out; } if (!(flag & OCFS2_INLINE_XATTR_FL)) { - /* set extended attribute in external block. */ - ret = ocfs2_extend_trans(handle, - OCFS2_INODE_UPDATE_CREDITS + - OCFS2_XATTR_BLOCK_UPDATE_CREDITS); - if (ret) { - mlog_errno(ret); - goto out_commit; - } ret = ocfs2_journal_access(handle, inode, xs->xattr_bh, OCFS2_JOURNAL_ACCESS_WRITE); if (ret) { mlog_errno(ret); - goto out_commit; + goto out; } } @@ -1465,7 +1384,7 @@ static int ocfs2_xattr_set_entry(struct inode *inode, ret = ocfs2_journal_dirty(handle, xs->xattr_bh); if (ret < 0) { mlog_errno(ret); - goto out_commit; + goto out; } } @@ -1502,9 +1421,6 @@ static int ocfs2_xattr_set_entry(struct inode *inode, if (ret < 0) mlog_errno(ret); -out_commit: - ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); - if (!ret && xi->value_len > OCFS2_XATTR_INLINE_SIZE) { /* * Set value outside in B tree. @@ -1520,14 +1436,14 @@ out_commit: * If set value outside failed, we have to clean * the junk tree root we have already set in local. */ - ret2 = ocfs2_xattr_cleanup(inode, xi, xs, offs); + ret2 = ocfs2_xattr_cleanup(inode, ctxt->handle, + xi, xs, offs); if (ret2 < 0) mlog_errno(ret2); } } out: return ret; - } static int ocfs2_remove_value_outside(struct inode*inode, @@ -1540,6 +1456,13 @@ static int ocfs2_remove_value_outside(struct inode*inode, ocfs2_init_dealloc_ctxt(&ctxt.dealloc); + ctxt.handle = ocfs2_start_trans(osb, OCFS2_REMOVE_EXTENT_CREDITS); + if (IS_ERR(ctxt.handle)) { + ret = PTR_ERR(ctxt.handle); + mlog_errno(ret); + goto out; + } + for (i = 0; i < le16_to_cpu(header->xh_count); i++) { struct ocfs2_xattr_entry *entry = &header->xh_entries[i]; @@ -1560,8 +1483,10 @@ static int ocfs2_remove_value_outside(struct inode*inode, } } + ocfs2_commit_trans(osb, ctxt.handle); ocfs2_schedule_truncate_log_flush(osb, 1); ocfs2_run_deallocs(osb, &ctxt.dealloc); +out: return ret; } @@ -1920,7 +1845,7 @@ static int ocfs2_xattr_block_set(struct inode *inode, struct buffer_head *new_bh = NULL; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data; - handle_t *handle = NULL; + handle_t *handle = ctxt->handle; struct ocfs2_xattr_block *xblk = NULL; u16 suballoc_bit_start; u32 num_got; @@ -1928,18 +1853,11 @@ static int ocfs2_xattr_block_set(struct inode *inode, int ret; if (!xs->xattr_bh) { - handle = ocfs2_start_trans(osb, - OCFS2_XATTR_BLOCK_CREATE_CREDITS); - if (IS_ERR(handle)) { - ret = PTR_ERR(handle); - mlog_errno(ret); - goto out; - } ret = ocfs2_journal_access(handle, inode, xs->inode_bh, OCFS2_JOURNAL_ACCESS_CREATE); if (ret < 0) { mlog_errno(ret); - goto out_commit; + goto end; } ret = ocfs2_claim_metadata(osb, handle, ctxt->meta_ac, 1, @@ -1947,7 +1865,7 @@ static int ocfs2_xattr_block_set(struct inode *inode, &first_blkno); if (ret < 0) { mlog_errno(ret); - goto out_commit; + goto end; } new_bh = sb_getblk(inode->i_sb, first_blkno); @@ -1957,7 +1875,7 @@ static int ocfs2_xattr_block_set(struct inode *inode, OCFS2_JOURNAL_ACCESS_CREATE); if (ret < 0) { mlog_errno(ret); - goto out_commit; + goto end; } /* Initialize ocfs2_xattr_block */ @@ -1978,17 +1896,10 @@ static int ocfs2_xattr_block_set(struct inode *inode, ret = ocfs2_journal_dirty(handle, new_bh); if (ret < 0) { mlog_errno(ret); - goto out_commit; + goto end; } di->i_xattr_loc = cpu_to_le64(first_blkno); - ret = ocfs2_journal_dirty(handle, xs->inode_bh); - if (ret < 0) - mlog_errno(ret); -out_commit: - ocfs2_commit_trans(osb, handle); -out: - if (ret < 0) - return ret; + ocfs2_journal_dirty(handle, xs->inode_bh); } else xblk = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data; @@ -2057,10 +1968,11 @@ static int ocfs2_calc_xattr_set_need(struct inode *inode, struct ocfs2_xattr_search *xis, struct ocfs2_xattr_search *xbs, int *clusters_need, - int *meta_need) + int *meta_need, + int *credits_need) { int ret = 0, old_in_xb = 0; - int clusters_add = 0, meta_add = 0; + int clusters_add = 0, meta_add = 0, credits = 0; struct buffer_head *bh = NULL; struct ocfs2_xattr_block *xb = NULL; struct ocfs2_xattr_entry *xe = NULL; @@ -2071,16 +1983,15 @@ static int ocfs2_calc_xattr_set_need(struct inode *inode, xi->value_len); u64 value_size; - /* - * delete a xattr doesn't need metadata and cluster allocation. - * so return. - */ - if (!xi->value) - goto out; - if (xis->not_found && xbs->not_found) { - if (xi->value_len > OCFS2_XATTR_INLINE_SIZE) + credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb); + + if (xi->value_len > OCFS2_XATTR_INLINE_SIZE) { clusters_add += new_clusters; + credits += ocfs2_calc_extend_credits(inode->i_sb, + &def_xv.xv.xr_list, + new_clusters); + } goto meta_guess; } @@ -2090,6 +2001,7 @@ static int ocfs2_calc_xattr_set_need(struct inode *inode, name_offset = le16_to_cpu(xe->xe_name_offset); name_len = OCFS2_XATTR_SIZE(xe->xe_name_len); base = xis->base; + credits += OCFS2_INODE_UPDATE_CREDITS; } else { int i, block_off; xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data; @@ -2105,8 +2017,25 @@ static int ocfs2_calc_xattr_set_need(struct inode *inode, i, &block_off, &name_offset); base = bucket_block(xbs->bucket, block_off); - } else + credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb); + } else { base = xbs->base; + credits += OCFS2_XATTR_BLOCK_UPDATE_CREDITS; + } + } + + /* + * delete a xattr doesn't need metadata and cluster allocation. + * so just calculate the credits and return. + * + * The credits for removing the value tree will be extended + * by ocfs2_remove_extent itself. + */ + if (!xi->value) { + if (!ocfs2_xattr_is_local(xe)) + credits += OCFS2_REMOVE_EXTENT_CREDITS; + + goto out; } /* do cluster allocation guess first. */ @@ -2121,6 +2050,13 @@ static int ocfs2_calc_xattr_set_need(struct inode *inode, */ if (ocfs2_xattr_can_be_in_inode(inode, xi, xis)) { clusters_add += new_clusters; + credits += OCFS2_REMOVE_EXTENT_CREDITS + + OCFS2_INODE_UPDATE_CREDITS; + if (!ocfs2_xattr_is_local(xe)) + credits += ocfs2_calc_extend_credits( + inode->i_sb, + &def_xv.xv.xr_list, + new_clusters); goto out; } } @@ -2137,11 +2073,16 @@ static int ocfs2_calc_xattr_set_need(struct inode *inode, } else xv = &def_xv.xv; - if (old_clusters >= new_clusters) + if (old_clusters >= new_clusters) { + credits += OCFS2_REMOVE_EXTENT_CREDITS; goto out; - else { + } else { meta_add += ocfs2_extend_meta_needed(&xv->xr_list); clusters_add += new_clusters - old_clusters; + credits += ocfs2_calc_extend_credits(inode->i_sb, + &xv->xr_list, + new_clusters - + old_clusters); goto out; } } else { @@ -2177,6 +2118,8 @@ meta_guess: struct ocfs2_extent_list *el = &xb->xb_attrs.xb_root.xt_list; meta_add += ocfs2_extend_meta_needed(el); + credits += ocfs2_calc_extend_credits(inode->i_sb, + el, 1); } /* @@ -2187,16 +2130,23 @@ meta_guess: * also. */ clusters_add += 1; + credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb); if (OCFS2_XATTR_BUCKET_SIZE == - OCFS2_SB(inode->i_sb)->s_clustersize) + OCFS2_SB(inode->i_sb)->s_clustersize) { + credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb); clusters_add += 1; - } else + } + } else { meta_add += 1; + credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS; + } out: if (clusters_need) *clusters_need = clusters_add; if (meta_need) *meta_need = meta_add; + if (credits_need) + *credits_need = credits; brelse(bh); return ret; } @@ -2206,7 +2156,8 @@ static int ocfs2_init_xattr_set_ctxt(struct inode *inode, struct ocfs2_xattr_info *xi, struct ocfs2_xattr_search *xis, struct ocfs2_xattr_search *xbs, - struct ocfs2_xattr_set_ctxt *ctxt) + struct ocfs2_xattr_set_ctxt *ctxt, + int *credits) { int clusters_add, meta_add, ret; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); @@ -2216,14 +2167,14 @@ static int ocfs2_init_xattr_set_ctxt(struct inode *inode, ocfs2_init_dealloc_ctxt(&ctxt->dealloc); ret = ocfs2_calc_xattr_set_need(inode, di, xi, xis, xbs, - &clusters_add, &meta_add); + &clusters_add, &meta_add, credits); if (ret) { mlog_errno(ret); return ret; } - mlog(0, "Set xattr %s, reserve meta blocks = %d, clusters = %d\n", - xi->name, meta_add, clusters_add); + mlog(0, "Set xattr %s, reserve meta blocks = %d, clusters = %d, " + "credits = %d\n", xi->name, meta_add, clusters_add, *credits); if (meta_add) { ret = ocfs2_reserve_new_metadata_blocks(osb, meta_add, @@ -2254,6 +2205,126 @@ out: return ret; } +static int __ocfs2_xattr_set_handle(struct inode *inode, + struct ocfs2_dinode *di, + struct ocfs2_xattr_info *xi, + struct ocfs2_xattr_search *xis, + struct ocfs2_xattr_search *xbs, + struct ocfs2_xattr_set_ctxt *ctxt) +{ + int ret = 0, credits; + + if (!xi->value) { + /* Remove existing extended attribute */ + if (!xis->not_found) + ret = ocfs2_xattr_ibody_set(inode, xi, xis, ctxt); + else if (!xbs->not_found) + ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt); + } else { + /* We always try to set extended attribute into inode first*/ + ret = ocfs2_xattr_ibody_set(inode, xi, xis, ctxt); + if (!ret && !xbs->not_found) { + /* + * If succeed and that extended attribute existing in + * external block, then we will remove it. + */ + xi->value = NULL; + xi->value_len = 0; + + xis->not_found = -ENODATA; + ret = ocfs2_calc_xattr_set_need(inode, + di, + xi, + xis, + xbs, + NULL, + NULL, + &credits); + if (ret) { + mlog_errno(ret); + goto out; + } + + ret = ocfs2_extend_trans(ctxt->handle, credits + + ctxt->handle->h_buffer_credits); + if (ret) { + mlog_errno(ret); + goto out; + } + ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt); + } else if (ret == -ENOSPC) { + if (di->i_xattr_loc && !xbs->xattr_bh) { + ret = ocfs2_xattr_block_find(inode, + xi->name_index, + xi->name, xbs); + if (ret) + goto out; + + xis->not_found = -ENODATA; + ret = ocfs2_calc_xattr_set_need(inode, + di, + xi, + xis, + xbs, + NULL, + NULL, + &credits); + if (ret) { + mlog_errno(ret); + goto out; + } + + ret = ocfs2_extend_trans(ctxt->handle, credits + + ctxt->handle->h_buffer_credits); + if (ret) { + mlog_errno(ret); + goto out; + } + } + /* + * If no space in inode, we will set extended attribute + * into external block. + */ + ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt); + if (ret) + goto out; + if (!xis->not_found) { + /* + * If succeed and that extended attribute + * existing in inode, we will remove it. + */ + xi->value = NULL; + xi->value_len = 0; + xbs->not_found = -ENODATA; + ret = ocfs2_calc_xattr_set_need(inode, + di, + xi, + xis, + xbs, + NULL, + NULL, + &credits); + if (ret) { + mlog_errno(ret); + goto out; + } + + ret = ocfs2_extend_trans(ctxt->handle, credits + + ctxt->handle->h_buffer_credits); + if (ret) { + mlog_errno(ret); + goto out; + } + ret = ocfs2_xattr_ibody_set(inode, xi, + xis, ctxt); + } + } + } + +out: + return ret; +} + /* * ocfs2_xattr_set() * @@ -2270,8 +2341,9 @@ int ocfs2_xattr_set(struct inode *inode, { struct buffer_head *di_bh = NULL; struct ocfs2_dinode *di; - int ret; + int ret, credits; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); + struct inode *tl_inode = osb->osb_tl_inode; struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, }; struct ocfs2_xattr_info xi = { @@ -2337,56 +2409,37 @@ int ocfs2_xattr_set(struct inode *inode, goto cleanup; } - ret = ocfs2_init_xattr_set_ctxt(inode, di, &xi, &xis, &xbs, &ctxt); + + mutex_lock(&tl_inode->i_mutex); + + if (ocfs2_truncate_log_needs_flush(osb)) { + ret = __ocfs2_flush_truncate_log(osb); + if (ret < 0) { + mutex_unlock(&tl_inode->i_mutex); + mlog_errno(ret); + goto cleanup; + } + } + mutex_unlock(&tl_inode->i_mutex); + + ret = ocfs2_init_xattr_set_ctxt(inode, di, &xi, &xis, + &xbs, &ctxt, &credits); if (ret) { mlog_errno(ret); goto cleanup; } - if (!value) { - /* Remove existing extended attribute */ - if (!xis.not_found) - ret = ocfs2_xattr_ibody_set(inode, &xi, &xis, &ctxt); - else if (!xbs.not_found) - ret = ocfs2_xattr_block_set(inode, &xi, &xbs, &ctxt); - } else { - /* We always try to set extended attribute into inode first*/ - ret = ocfs2_xattr_ibody_set(inode, &xi, &xis, &ctxt); - if (!ret && !xbs.not_found) { - /* - * If succeed and that extended attribute existing in - * external block, then we will remove it. - */ - xi.value = NULL; - xi.value_len = 0; - ret = ocfs2_xattr_block_set(inode, &xi, &xbs, &ctxt); - } else if (ret == -ENOSPC) { - if (di->i_xattr_loc && !xbs.xattr_bh) { - ret = ocfs2_xattr_block_find(inode, name_index, - name, &xbs); - if (ret) - goto cleanup; - } - /* - * If no space in inode, we will set extended attribute - * into external block. - */ - ret = ocfs2_xattr_block_set(inode, &xi, &xbs, &ctxt); - if (ret) - goto free; - if (!xis.not_found) { - /* - * If succeed and that extended attribute - * existing in inode, we will remove it. - */ - xi.value = NULL; - xi.value_len = 0; - ret = ocfs2_xattr_ibody_set(inode, &xi, - &xis, &ctxt); - } - } + ctxt.handle = ocfs2_start_trans(osb, credits); + if (IS_ERR(ctxt.handle)) { + ret = PTR_ERR(ctxt.handle); + mlog_errno(ret); + goto cleanup; } -free: + + ret = __ocfs2_xattr_set_handle(inode, di, &xi, &xis, &xbs, &ctxt); + + ocfs2_commit_trans(osb, ctxt.handle); + if (ctxt.data_ac) ocfs2_free_alloc_context(ctxt.data_ac); if (ctxt.meta_ac) @@ -2974,10 +3027,10 @@ static int ocfs2_xattr_create_index_block(struct inode *inode, struct ocfs2_xattr_search *xs, struct ocfs2_xattr_set_ctxt *ctxt) { - int ret, credits = OCFS2_SUBALLOC_ALLOC; + int ret; u32 bit_off, len; u64 blkno; - handle_t *handle; + handle_t *handle = ctxt->handle; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); struct ocfs2_inode_info *oi = OCFS2_I(inode); struct buffer_head *xb_bh = xs->xattr_bh; @@ -2999,30 +3052,18 @@ static int ocfs2_xattr_create_index_block(struct inode *inode, */ down_write(&oi->ip_alloc_sem); - /* - * We need more credits. One for the xattr block update and one - * for each block of the new xattr bucket. - */ - credits += 1 + ocfs2_blocks_per_xattr_bucket(inode->i_sb); - handle = ocfs2_start_trans(osb, credits); - if (IS_ERR(handle)) { - ret = PTR_ERR(handle); - mlog_errno(ret); - goto out_sem; - } - ret = ocfs2_journal_access(handle, inode, xb_bh, OCFS2_JOURNAL_ACCESS_WRITE); if (ret) { mlog_errno(ret); - goto out_commit; + goto out; } ret = __ocfs2_claim_clusters(osb, handle, ctxt->data_ac, 1, 1, &bit_off, &len); if (ret) { mlog_errno(ret); - goto out_commit; + goto out; } /* @@ -3038,14 +3079,14 @@ static int ocfs2_xattr_create_index_block(struct inode *inode, ret = ocfs2_init_xattr_bucket(xs->bucket, blkno); if (ret) { mlog_errno(ret); - goto out_commit; + goto out; } ret = ocfs2_xattr_bucket_journal_access(handle, xs->bucket, OCFS2_JOURNAL_ACCESS_CREATE); if (ret) { mlog_errno(ret); - goto out_commit; + goto out; } ocfs2_cp_xattr_block_to_bucket(inode, xb_bh, xs->bucket); @@ -3070,16 +3111,9 @@ static int ocfs2_xattr_create_index_block(struct inode *inode, xb->xb_flags = cpu_to_le16(xb_flags | OCFS2_XATTR_INDEXED); - ret = ocfs2_journal_dirty(handle, xb_bh); - if (ret) { - mlog_errno(ret); - goto out_commit; - } + ocfs2_journal_dirty(handle, xb_bh); -out_commit: - ocfs2_commit_trans(osb, handle); - -out_sem: +out: up_write(&oi->ip_alloc_sem); return ret; @@ -3105,6 +3139,7 @@ static int cmp_xe_offset(const void *a, const void *b) * so that we can spare some space for insertion. */ static int ocfs2_defrag_xattr_bucket(struct inode *inode, + handle_t *handle, struct ocfs2_xattr_bucket *bucket) { int ret, i; @@ -3114,7 +3149,6 @@ static int ocfs2_defrag_xattr_bucket(struct inode *inode, u64 blkno = bucket_blkno(bucket); u16 xh_free_start; size_t blocksize = inode->i_sb->s_blocksize; - handle_t *handle; struct ocfs2_xattr_entry *xe; /* @@ -3133,19 +3167,11 @@ static int ocfs2_defrag_xattr_bucket(struct inode *inode, for (i = 0; i < bucket->bu_blocks; i++, buf += blocksize) memcpy(buf, bucket_block(bucket, i), blocksize); - handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)), bucket->bu_blocks); - if (IS_ERR(handle)) { - ret = PTR_ERR(handle); - handle = NULL; - mlog_errno(ret); - goto out; - } - ret = ocfs2_xattr_bucket_journal_access(handle, bucket, OCFS2_JOURNAL_ACCESS_WRITE); if (ret < 0) { mlog_errno(ret); - goto commit; + goto out; } xh = (struct ocfs2_xattr_header *)bucket_buf; @@ -3203,7 +3229,7 @@ static int ocfs2_defrag_xattr_bucket(struct inode *inode, "bucket %llu\n", (unsigned long long)blkno); if (xh_free_start == end) - goto commit; + goto out; memset(bucket_buf + xh_free_start, 0, end - xh_free_start); xh->xh_free_start = cpu_to_le16(end); @@ -3218,8 +3244,6 @@ static int ocfs2_defrag_xattr_bucket(struct inode *inode, memcpy(bucket_block(bucket, i), buf, blocksize); ocfs2_xattr_bucket_journal_dirty(handle, bucket); -commit: - ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); out: kfree(bucket_buf); return ret; @@ -3270,7 +3294,7 @@ static int ocfs2_mv_xattr_bucket_cross_cluster(struct inode *inode, * 1 more for the update of the 1st bucket of the previous * extent record. */ - credits = bpc / 2 + 1; + credits = bpc / 2 + 1 + handle->h_buffer_credits; ret = ocfs2_extend_trans(handle, credits); if (ret) { mlog_errno(ret); @@ -3662,7 +3686,7 @@ static int ocfs2_cp_xattr_cluster(struct inode *inode, * We need to update the new cluster and 1 more for the update of * the 1st bucket of the previous extent rec. */ - credits = bpc + 1; + credits = bpc + 1 + handle->h_buffer_credits; ret = ocfs2_extend_trans(handle, credits); if (ret) { mlog_errno(ret); @@ -3732,7 +3756,7 @@ static int ocfs2_divide_xattr_cluster(struct inode *inode, u32 *first_hash) { u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); - int ret, credits = 2 * blk_per_bucket; + int ret, credits = 2 * blk_per_bucket + handle->h_buffer_credits; BUG_ON(OCFS2_XATTR_BUCKET_SIZE < OCFS2_SB(inode->i_sb)->s_clustersize); @@ -3845,12 +3869,12 @@ static int ocfs2_add_new_xattr_cluster(struct inode *inode, int *extend, struct ocfs2_xattr_set_ctxt *ctxt) { - int ret, credits; + int ret; u16 bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1); u32 prev_clusters = *num_clusters; u32 clusters_to_add = 1, bit_off, num_bits, v_start = 0; u64 block; - handle_t *handle = NULL; + handle_t *handle = ctxt->handle; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); struct ocfs2_extent_tree et; @@ -3861,16 +3885,6 @@ static int ocfs2_add_new_xattr_cluster(struct inode *inode, ocfs2_init_xattr_tree_extent_tree(&et, inode, root_bh); - credits = ocfs2_calc_extend_credits(osb->sb, et.et_root_el, - clusters_to_add); - handle = ocfs2_start_trans(osb, credits); - if (IS_ERR(handle)) { - ret = PTR_ERR(handle); - handle = NULL; - mlog_errno(ret); - goto leave; - } - ret = ocfs2_journal_access(handle, inode, root_bh, OCFS2_JOURNAL_ACCESS_WRITE); if (ret < 0) { @@ -3924,18 +3938,6 @@ static int ocfs2_add_new_xattr_cluster(struct inode *inode, } } - if (handle->h_buffer_credits < credits) { - /* - * The journal has been restarted before, and don't - * have enough space for the insertion, so extend it - * here. - */ - ret = ocfs2_extend_trans(handle, credits); - if (ret) { - mlog_errno(ret); - goto leave; - } - } mlog(0, "Insert %u clusters at block %llu for xattr at %u\n", num_bits, (unsigned long long)block, v_start); ret = ocfs2_insert_extent(osb, handle, inode, &et, v_start, block, @@ -3946,15 +3948,10 @@ static int ocfs2_add_new_xattr_cluster(struct inode *inode, } ret = ocfs2_journal_dirty(handle, root_bh); - if (ret < 0) { + if (ret < 0) mlog_errno(ret); - goto leave; - } leave: - if (handle) - ocfs2_commit_trans(osb, handle); - return ret; } @@ -3963,6 +3960,7 @@ leave: * We meet with start_bh. Only move half of the xattrs to the bucket after it. */ static int ocfs2_extend_xattr_bucket(struct inode *inode, + handle_t *handle, struct buffer_head *first_bh, struct buffer_head *start_bh, u32 num_clusters) @@ -3972,7 +3970,6 @@ static int ocfs2_extend_xattr_bucket(struct inode *inode, u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); u64 start_blk = start_bh->b_blocknr, end_blk; u32 num_buckets = num_clusters * ocfs2_xattr_buckets_per_cluster(osb); - handle_t *handle; struct ocfs2_xattr_header *first_xh = (struct ocfs2_xattr_header *)first_bh->b_data; u16 bucket = le16_to_cpu(first_xh->xh_num_buckets); @@ -3989,11 +3986,10 @@ static int ocfs2_extend_xattr_bucket(struct inode *inode, * We will touch all the buckets after the start_bh(include it). * Then we add one more bucket. */ - credits = end_blk - start_blk + 3 * blk_per_bucket + 1; - handle = ocfs2_start_trans(osb, credits); - if (IS_ERR(handle)) { - ret = PTR_ERR(handle); - handle = NULL; + credits = end_blk - start_blk + 3 * blk_per_bucket + 1 + + handle->h_buffer_credits; + ret = ocfs2_extend_trans(handle, credits); + if (ret) { mlog_errno(ret); goto out; } @@ -4002,14 +3998,14 @@ static int ocfs2_extend_xattr_bucket(struct inode *inode, OCFS2_JOURNAL_ACCESS_WRITE); if (ret) { mlog_errno(ret); - goto commit; + goto out; } while (end_blk != start_blk) { ret = ocfs2_cp_xattr_bucket(inode, handle, end_blk, end_blk + blk_per_bucket, 0); if (ret) - goto commit; + goto out; end_blk -= blk_per_bucket; } @@ -4020,8 +4016,6 @@ static int ocfs2_extend_xattr_bucket(struct inode *inode, le16_add_cpu(&first_xh->xh_num_buckets, 1); ocfs2_journal_dirty(handle, first_bh); -commit: - ocfs2_commit_trans(osb, handle); out: return ret; } @@ -4099,6 +4093,7 @@ static int ocfs2_add_new_xattr_bucket(struct inode *inode, if (extend) ret = ocfs2_extend_xattr_bucket(inode, + ctxt->handle, first_bh, header_bh, num_clusters); @@ -4272,14 +4267,13 @@ set_new_name_value: * space for the xattr insertion. */ static int ocfs2_xattr_set_entry_in_bucket(struct inode *inode, + handle_t *handle, struct ocfs2_xattr_info *xi, struct ocfs2_xattr_search *xs, u32 name_hash, int local) { int ret; - handle_t *handle = NULL; - struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); u64 blkno; mlog(0, "Set xattr entry len = %lu index = %d in bucket %llu\n", @@ -4296,14 +4290,6 @@ static int ocfs2_xattr_set_entry_in_bucket(struct inode *inode, } } - handle = ocfs2_start_trans(osb, xs->bucket->bu_blocks); - if (IS_ERR(handle)) { - ret = PTR_ERR(handle); - handle = NULL; - mlog_errno(ret); - goto out; - } - ret = ocfs2_xattr_bucket_journal_access(handle, xs->bucket, OCFS2_JOURNAL_ACCESS_WRITE); if (ret < 0) { @@ -4315,32 +4301,22 @@ static int ocfs2_xattr_set_entry_in_bucket(struct inode *inode, ocfs2_xattr_bucket_journal_dirty(handle, xs->bucket); out: - ocfs2_commit_trans(osb, handle); - return ret; } static int ocfs2_xattr_value_update_size(struct inode *inode, + handle_t *handle, struct buffer_head *xe_bh, struct ocfs2_xattr_entry *xe, u64 new_size) { int ret; - struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); - handle_t *handle = NULL; - - handle = ocfs2_start_trans(osb, 1); - if (IS_ERR(handle)) { - ret = -ENOMEM; - mlog_errno(ret); - goto out; - } ret = ocfs2_journal_access(handle, inode, xe_bh, OCFS2_JOURNAL_ACCESS_WRITE); if (ret < 0) { mlog_errno(ret); - goto out_commit; + goto out; } xe->xe_value_size = cpu_to_le64(new_size); @@ -4349,8 +4325,6 @@ static int ocfs2_xattr_value_update_size(struct inode *inode, if (ret < 0) mlog_errno(ret); -out_commit: - ocfs2_commit_trans(osb, handle); out: return ret; } @@ -4407,7 +4381,8 @@ static int ocfs2_xattr_bucket_value_truncate(struct inode *inode, goto out; } - ret = ocfs2_xattr_value_update_size(inode, header_bh, xe, len); + ret = ocfs2_xattr_value_update_size(inode, ctxt->handle, + header_bh, xe, len); if (ret) { mlog_errno(ret); goto out; @@ -4439,6 +4414,7 @@ static int ocfs2_xattr_bucket_value_truncate_xs(struct inode *inode, } static int ocfs2_xattr_bucket_set_value_outside(struct inode *inode, + handle_t *handle, struct ocfs2_xattr_search *xs, char *val, int value_len) @@ -4454,7 +4430,8 @@ static int ocfs2_xattr_bucket_set_value_outside(struct inode *inode, xv = (struct ocfs2_xattr_value_root *)(xs->base + offset); - return __ocfs2_xattr_set_value_outside(inode, xv, val, value_len); + return __ocfs2_xattr_set_value_outside(inode, handle, + xv, val, value_len); } static int ocfs2_rm_xattr_cluster(struct inode *inode, @@ -4547,27 +4524,19 @@ out: } static void ocfs2_xattr_bucket_remove_xs(struct inode *inode, + handle_t *handle, struct ocfs2_xattr_search *xs) { - handle_t *handle = NULL; struct ocfs2_xattr_header *xh = bucket_xh(xs->bucket); struct ocfs2_xattr_entry *last = &xh->xh_entries[ le16_to_cpu(xh->xh_count) - 1]; int ret = 0; - handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)), - ocfs2_blocks_per_xattr_bucket(inode->i_sb)); - if (IS_ERR(handle)) { - ret = PTR_ERR(handle); - mlog_errno(ret); - return; - } - ret = ocfs2_xattr_bucket_journal_access(handle, xs->bucket, OCFS2_JOURNAL_ACCESS_WRITE); if (ret) { mlog_errno(ret); - goto out_commit; + return; } /* Remove the old entry. */ @@ -4577,9 +4546,6 @@ static void ocfs2_xattr_bucket_remove_xs(struct inode *inode, le16_add_cpu(&xh->xh_count, -1); ocfs2_xattr_bucket_journal_dirty(handle, xs->bucket); - -out_commit: - ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); } /* @@ -4645,7 +4611,8 @@ static int ocfs2_xattr_set_in_bucket(struct inode *inode, xi->value_len = OCFS2_XATTR_ROOT_SIZE; } - ret = ocfs2_xattr_set_entry_in_bucket(inode, xi, xs, name_hash, local); + ret = ocfs2_xattr_set_entry_in_bucket(inode, ctxt->handle, xi, xs, + name_hash, local); if (ret) { mlog_errno(ret); goto out; @@ -4666,13 +4633,14 @@ static int ocfs2_xattr_set_in_bucket(struct inode *inode, * storage and we have allocated xattr already, * so need to remove it. */ - ocfs2_xattr_bucket_remove_xs(inode, xs); + ocfs2_xattr_bucket_remove_xs(inode, ctxt->handle, xs); } goto out; } set_value_outside: - ret = ocfs2_xattr_bucket_set_value_outside(inode, xs, val, value_len); + ret = ocfs2_xattr_bucket_set_value_outside(inode, ctxt->handle, + xs, val, value_len); out: return ret; } @@ -4785,7 +4753,8 @@ try_again: * name/value will be moved, the xe shouldn't be changed * in xs. */ - ret = ocfs2_defrag_xattr_bucket(inode, xs->bucket); + ret = ocfs2_defrag_xattr_bucket(inode, ctxt->handle, + xs->bucket); if (ret) { mlog_errno(ret); goto out; @@ -4865,6 +4834,13 @@ static int ocfs2_delete_xattr_in_bucket(struct inode *inode, ocfs2_init_dealloc_ctxt(&ctxt.dealloc); + ctxt.handle = ocfs2_start_trans(osb, OCFS2_REMOVE_EXTENT_CREDITS); + if (IS_ERR(ctxt.handle)) { + ret = PTR_ERR(ctxt.handle); + mlog_errno(ret); + goto out; + } + for (i = 0; i < le16_to_cpu(xh->xh_count); i++) { xe = &xh->xh_entries[i]; if (ocfs2_xattr_is_local(xe)) @@ -4879,9 +4855,10 @@ static int ocfs2_delete_xattr_in_bucket(struct inode *inode, } } + ret = ocfs2_commit_trans(osb, ctxt.handle); ocfs2_schedule_truncate_log_flush(osb, 1); ocfs2_run_deallocs(osb, &ctxt.dealloc); - +out: return ret; } -- cgit v1.2.3-70-g09d2 From fecc01126d7a244b7e9b563c80663ffdca35343b Mon Sep 17 00:00:00 2001 From: Mark Fasheh Date: Wed, 12 Nov 2008 15:16:38 -0800 Subject: ocfs2: turn __ocfs2_remove_inode_range() into ocfs2_remove_btree_range() This patch genericizes the high level handling of extent removal. ocfs2_remove_btree_range() is nearly identical to __ocfs2_remove_inode_range(), except that extent tree operations have been used where necessary. We update ocfs2_remove_inode_range() to use the generic helper. Now extent tree based structures have an easy way to truncate ranges. Signed-off-by: Mark Fasheh Acked-by: Joel Becker --- fs/ocfs2/alloc.c | 72 +++++++++++++++++++++++++++++++++++++++++++++++ fs/ocfs2/alloc.h | 5 ++++ fs/ocfs2/file.c | 85 ++++---------------------------------------------------- 3 files changed, 82 insertions(+), 80 deletions(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 4614614084d..5592a2f6335 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -5255,6 +5255,78 @@ out: return ret; } +int ocfs2_remove_btree_range(struct inode *inode, + struct ocfs2_extent_tree *et, + u32 cpos, u32 phys_cpos, u32 len, + struct ocfs2_cached_dealloc_ctxt *dealloc) +{ + int ret; + u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos); + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); + struct inode *tl_inode = osb->osb_tl_inode; + handle_t *handle; + struct ocfs2_alloc_context *meta_ac = NULL; + + ret = ocfs2_lock_allocators(inode, et, 0, 1, NULL, &meta_ac); + if (ret) { + mlog_errno(ret); + return ret; + } + + mutex_lock(&tl_inode->i_mutex); + + if (ocfs2_truncate_log_needs_flush(osb)) { + ret = __ocfs2_flush_truncate_log(osb); + if (ret < 0) { + mlog_errno(ret); + goto out; + } + } + + handle = ocfs2_start_trans(osb, OCFS2_REMOVE_EXTENT_CREDITS); + if (IS_ERR(handle)) { + ret = PTR_ERR(handle); + mlog_errno(ret); + goto out; + } + + ret = ocfs2_journal_access(handle, inode, et->et_root_bh, + OCFS2_JOURNAL_ACCESS_WRITE); + if (ret) { + mlog_errno(ret); + goto out; + } + + ret = ocfs2_remove_extent(inode, et, cpos, len, handle, meta_ac, + dealloc); + if (ret) { + mlog_errno(ret); + goto out_commit; + } + + ocfs2_et_update_clusters(inode, et, -len); + + ret = ocfs2_journal_dirty(handle, et->et_root_bh); + if (ret) { + mlog_errno(ret); + goto out_commit; + } + + ret = ocfs2_truncate_log_append(osb, handle, phys_blkno, len); + if (ret) + mlog_errno(ret); + +out_commit: + ocfs2_commit_trans(osb, handle); +out: + mutex_unlock(&tl_inode->i_mutex); + + if (meta_ac) + ocfs2_free_alloc_context(meta_ac); + + return ret; +} + int ocfs2_truncate_log_needs_flush(struct ocfs2_super *osb) { struct buffer_head *tl_bh = osb->osb_tl_bh; diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h index 3eb735eedae..0fbf8fc55a4 100644 --- a/fs/ocfs2/alloc.h +++ b/fs/ocfs2/alloc.h @@ -110,6 +110,11 @@ int ocfs2_remove_extent(struct inode *inode, u32 cpos, u32 len, handle_t *handle, struct ocfs2_alloc_context *meta_ac, struct ocfs2_cached_dealloc_ctxt *dealloc); +int ocfs2_remove_btree_range(struct inode *inode, + struct ocfs2_extent_tree *et, + u32 cpos, u32 phys_cpos, u32 len, + struct ocfs2_cached_dealloc_ctxt *dealloc); + int ocfs2_num_free_extents(struct ocfs2_super *osb, struct inode *inode, struct ocfs2_extent_tree *et); diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index e2570a3bc2b..360549161e2 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -1226,83 +1226,6 @@ out: return ret; } -static int __ocfs2_remove_inode_range(struct inode *inode, - struct buffer_head *di_bh, - u32 cpos, u32 phys_cpos, u32 len, - struct ocfs2_cached_dealloc_ctxt *dealloc) -{ - int ret; - u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos); - struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); - struct inode *tl_inode = osb->osb_tl_inode; - handle_t *handle; - struct ocfs2_alloc_context *meta_ac = NULL; - struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; - struct ocfs2_extent_tree et; - - ocfs2_init_dinode_extent_tree(&et, inode, di_bh); - - ret = ocfs2_lock_allocators(inode, &et, 0, 1, NULL, &meta_ac); - if (ret) { - mlog_errno(ret); - return ret; - } - - mutex_lock(&tl_inode->i_mutex); - - if (ocfs2_truncate_log_needs_flush(osb)) { - ret = __ocfs2_flush_truncate_log(osb); - if (ret < 0) { - mlog_errno(ret); - goto out; - } - } - - handle = ocfs2_start_trans(osb, OCFS2_REMOVE_EXTENT_CREDITS); - if (IS_ERR(handle)) { - ret = PTR_ERR(handle); - mlog_errno(ret); - goto out; - } - - ret = ocfs2_journal_access(handle, inode, di_bh, - OCFS2_JOURNAL_ACCESS_WRITE); - if (ret) { - mlog_errno(ret); - goto out; - } - - ret = ocfs2_remove_extent(inode, &et, cpos, len, handle, meta_ac, - dealloc); - if (ret) { - mlog_errno(ret); - goto out_commit; - } - - OCFS2_I(inode)->ip_clusters -= len; - di->i_clusters = cpu_to_le32(OCFS2_I(inode)->ip_clusters); - - ret = ocfs2_journal_dirty(handle, di_bh); - if (ret) { - mlog_errno(ret); - goto out_commit; - } - - ret = ocfs2_truncate_log_append(osb, handle, phys_blkno, len); - if (ret) - mlog_errno(ret); - -out_commit: - ocfs2_commit_trans(osb, handle); -out: - mutex_unlock(&tl_inode->i_mutex); - - if (meta_ac) - ocfs2_free_alloc_context(meta_ac); - - return ret; -} - /* * Truncate a byte range, avoiding pages within partial clusters. This * preserves those pages for the zeroing code to write to. @@ -1402,7 +1325,9 @@ static int ocfs2_remove_inode_range(struct inode *inode, struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); struct ocfs2_cached_dealloc_ctxt dealloc; struct address_space *mapping = inode->i_mapping; + struct ocfs2_extent_tree et; + ocfs2_init_dinode_extent_tree(&et, inode, di_bh); ocfs2_init_dealloc_ctxt(&dealloc); if (byte_len == 0) @@ -1458,9 +1383,9 @@ static int ocfs2_remove_inode_range(struct inode *inode, /* Only do work for non-holes */ if (phys_cpos != 0) { - ret = __ocfs2_remove_inode_range(inode, di_bh, cpos, - phys_cpos, alloc_size, - &dealloc); + ret = ocfs2_remove_btree_range(inode, &et, cpos, + phys_cpos, alloc_size, + &dealloc); if (ret) { mlog_errno(ret); goto out; -- cgit v1.2.3-70-g09d2 From f5d362022a947e84b0a3dd656d09c6b2322e234f Mon Sep 17 00:00:00 2001 From: Tiger Yang Date: Fri, 14 Nov 2008 11:15:44 +0800 Subject: ocfs2: move new inode allocation out of the transaction Move out inode allocation from ocfs2_mknod_locked() because vfs_dq_init() must be called outside of a transaction. Signed-off-by: Jan Kara Signed-off-by: Tiger Yang Signed-off-by: Mark Fasheh --- fs/ocfs2/namei.c | 108 ++++++++++++++++++++++++++++++++----------------------- 1 file changed, 64 insertions(+), 44 deletions(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index 2545e7402ef..e8ff0bae179 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -66,12 +66,12 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb, struct inode *dir, - struct dentry *dentry, int mode, + struct inode *inode, + struct dentry *dentry, dev_t dev, struct buffer_head **new_fe_bh, struct buffer_head *parent_fe_bh, handle_t *handle, - struct inode **ret_inode, struct ocfs2_alloc_context *inode_ac); static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb, @@ -186,6 +186,34 @@ bail: return ret; } +static struct inode *ocfs2_get_init_inode(struct inode *dir, int mode) +{ + struct inode *inode; + + inode = new_inode(dir->i_sb); + if (!inode) { + mlog(ML_ERROR, "new_inode failed!\n"); + return NULL; + } + + /* populate as many fields early on as possible - many of + * these are used by the support functions here and in + * callers. */ + if (S_ISDIR(mode)) + inode->i_nlink = 2; + else + inode->i_nlink = 1; + inode->i_uid = current_fsuid(); + if (dir->i_mode & S_ISGID) { + inode->i_gid = dir->i_gid; + if (S_ISDIR(mode)) + mode |= S_ISGID; + } else + inode->i_gid = current_fsgid(); + inode->i_mode = mode; + return inode; +} + static int ocfs2_mknod(struct inode *dir, struct dentry *dentry, int mode, @@ -250,6 +278,13 @@ static int ocfs2_mknod(struct inode *dir, goto leave; } + inode = ocfs2_get_init_inode(dir, mode); + if (!inode) { + status = -ENOMEM; + mlog_errno(status); + goto leave; + } + /* Reserve a cluster if creating an extent based directory. */ if (S_ISDIR(mode) && !ocfs2_supports_inline_data(osb)) { status = ocfs2_reserve_clusters(osb, 1, &data_ac); @@ -269,9 +304,9 @@ static int ocfs2_mknod(struct inode *dir, } /* do the real work now. */ - status = ocfs2_mknod_locked(osb, dir, dentry, mode, dev, + status = ocfs2_mknod_locked(osb, dir, inode, dentry, dev, &new_fe_bh, parent_fe_bh, handle, - &inode, inode_ac); + inode_ac); if (status < 0) { mlog_errno(status); goto leave; @@ -332,8 +367,10 @@ leave: brelse(de_bh); brelse(parent_fe_bh); - if ((status < 0) && inode) + if ((status < 0) && inode) { + clear_nlink(inode); iput(inode); + } if (inode_ac) ocfs2_free_alloc_context(inode_ac); @@ -348,12 +385,12 @@ leave: static int ocfs2_mknod_locked(struct ocfs2_super *osb, struct inode *dir, - struct dentry *dentry, int mode, + struct inode *inode, + struct dentry *dentry, dev_t dev, struct buffer_head **new_fe_bh, struct buffer_head *parent_fe_bh, handle_t *handle, - struct inode **ret_inode, struct ocfs2_alloc_context *inode_ac) { int status = 0; @@ -361,14 +398,12 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb, struct ocfs2_extent_list *fel; u64 fe_blkno = 0; u16 suballoc_bit; - struct inode *inode = NULL; - mlog_entry("(0x%p, 0x%p, %d, %lu, '%.*s')\n", dir, dentry, mode, - (unsigned long)dev, dentry->d_name.len, + mlog_entry("(0x%p, 0x%p, %d, %lu, '%.*s')\n", dir, dentry, + inode->i_mode, (unsigned long)dev, dentry->d_name.len, dentry->d_name.name); *new_fe_bh = NULL; - *ret_inode = NULL; status = ocfs2_claim_new_inode(osb, handle, inode_ac, &suballoc_bit, &fe_blkno); @@ -377,23 +412,11 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb, goto leave; } - inode = new_inode(dir->i_sb); - if (!inode) { - status = -ENOMEM; - mlog(ML_ERROR, "new_inode failed!\n"); - goto leave; - } - /* populate as many fields early on as possible - many of * these are used by the support functions here and in * callers. */ inode->i_ino = ino_from_blkno(osb->sb, fe_blkno); OCFS2_I(inode)->ip_blkno = fe_blkno; - if (S_ISDIR(mode)) - inode->i_nlink = 2; - else - inode->i_nlink = 1; - inode->i_mode = mode; spin_lock(&osb->osb_lock); inode->i_generation = osb->s_next_generation++; spin_unlock(&osb->osb_lock); @@ -421,17 +444,11 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb, fe->i_blkno = cpu_to_le64(fe_blkno); fe->i_suballoc_bit = cpu_to_le16(suballoc_bit); fe->i_suballoc_slot = cpu_to_le16(inode_ac->ac_alloc_slot); - fe->i_uid = cpu_to_le32(current_fsuid()); - if (dir->i_mode & S_ISGID) { - fe->i_gid = cpu_to_le32(dir->i_gid); - if (S_ISDIR(mode)) - mode |= S_ISGID; - } else - fe->i_gid = cpu_to_le32(current_fsgid()); - fe->i_mode = cpu_to_le16(mode); - if (S_ISCHR(mode) || S_ISBLK(mode)) + fe->i_uid = cpu_to_le32(inode->i_uid); + fe->i_gid = cpu_to_le32(inode->i_gid); + fe->i_mode = cpu_to_le16(inode->i_mode); + if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) fe->id1.dev1.i_rdev = cpu_to_le64(huge_encode_dev(dev)); - fe->i_links_count = cpu_to_le16(inode->i_nlink); fe->i_last_eb_blk = 0; @@ -446,7 +463,7 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb, /* * If supported, directories start with inline data. */ - if (S_ISDIR(mode) && ocfs2_supports_inline_data(osb)) { + if (S_ISDIR(inode->i_mode) && ocfs2_supports_inline_data(osb)) { u16 feat = le16_to_cpu(fe->i_dyn_features); fe->i_dyn_features = cpu_to_le16(feat | OCFS2_INLINE_DATA_FL); @@ -484,17 +501,12 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb, status = 0; /* error in ocfs2_create_new_inode_locks is not * critical */ - *ret_inode = inode; leave: if (status < 0) { if (*new_fe_bh) { brelse(*new_fe_bh); *new_fe_bh = NULL; } - if (inode) { - clear_nlink(inode); - iput(inode); - } } mlog_exit(status); @@ -1542,6 +1554,13 @@ static int ocfs2_symlink(struct inode *dir, goto bail; } + inode = ocfs2_get_init_inode(dir, S_IFLNK | S_IRWXUGO); + if (!inode) { + status = -ENOMEM; + mlog_errno(status); + goto bail; + } + /* don't reserve bitmap space for fast symlinks. */ if (l > ocfs2_fast_symlink_chars(sb)) { status = ocfs2_reserve_clusters(osb, 1, &data_ac); @@ -1560,10 +1579,9 @@ static int ocfs2_symlink(struct inode *dir, goto bail; } - status = ocfs2_mknod_locked(osb, dir, dentry, - S_IFLNK | S_IRWXUGO, 0, - &new_fe_bh, parent_fe_bh, handle, - &inode, inode_ac); + status = ocfs2_mknod_locked(osb, dir, inode, dentry, + 0, &new_fe_bh, parent_fe_bh, handle, + inode_ac); if (status < 0) { mlog_errno(status); goto bail; @@ -1644,8 +1662,10 @@ bail: ocfs2_free_alloc_context(inode_ac); if (data_ac) ocfs2_free_alloc_context(data_ac); - if ((status < 0) && inode) + if ((status < 0) && inode) { + clear_nlink(inode); iput(inode); + } mlog_exit(status); -- cgit v1.2.3-70-g09d2 From 6c3faba4421e230d77a181c260972229c542dec9 Mon Sep 17 00:00:00 2001 From: Tiger Yang Date: Fri, 14 Nov 2008 11:16:03 +0800 Subject: ocfs2: add ocfs2_xattr_set_handle This function is used to set xattr's in a started transaction. It is only called during inode creation inode for initial security/acl xattrs of the new inode. These xattrs could be put into ibody or extent block, so xattr bucket would not be use in this case. Signed-off-by: Tiger Yang Signed-off-by: Mark Fasheh --- fs/ocfs2/xattr.c | 68 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ fs/ocfs2/xattr.h | 4 ++++ 2 files changed, 72 insertions(+) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 7a9089255a8..6480254fe39 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -2325,6 +2325,74 @@ out: return ret; } +/* + * This function only called duing creating inode + * for init security/acl xattrs of the new inode. + * The xattrs could be put into ibody or extent block, + * xattr bucket would not be use in this case. + * transanction credits also be reserved in here. + */ +int ocfs2_xattr_set_handle(handle_t *handle, + struct inode *inode, + struct buffer_head *di_bh, + int name_index, + const char *name, + const void *value, + size_t value_len, + int flags, + struct ocfs2_alloc_context *meta_ac, + struct ocfs2_alloc_context *data_ac) +{ + struct ocfs2_dinode *di; + int ret; + + struct ocfs2_xattr_info xi = { + .name_index = name_index, + .name = name, + .value = value, + .value_len = value_len, + }; + + struct ocfs2_xattr_search xis = { + .not_found = -ENODATA, + }; + + struct ocfs2_xattr_search xbs = { + .not_found = -ENODATA, + }; + + struct ocfs2_xattr_set_ctxt ctxt = { + .handle = handle, + .meta_ac = meta_ac, + .data_ac = data_ac, + }; + + if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb))) + return -EOPNOTSUPP; + + xis.inode_bh = xbs.inode_bh = di_bh; + di = (struct ocfs2_dinode *)di_bh->b_data; + + down_write(&OCFS2_I(inode)->ip_xattr_sem); + + ret = ocfs2_xattr_ibody_find(inode, name_index, name, &xis); + if (ret) + goto cleanup; + if (xis.not_found) { + ret = ocfs2_xattr_block_find(inode, name_index, name, &xbs); + if (ret) + goto cleanup; + } + + ret = __ocfs2_xattr_set_handle(inode, di, &xi, &xis, &xbs, &ctxt); + +cleanup: + up_write(&OCFS2_I(inode)->ip_xattr_sem); + brelse(xbs.xattr_bh); + + return ret; +} + /* * ocfs2_xattr_set() * diff --git a/fs/ocfs2/xattr.h b/fs/ocfs2/xattr.h index 1d8314c7656..8fbdc163c83 100644 --- a/fs/ocfs2/xattr.h +++ b/fs/ocfs2/xattr.h @@ -37,6 +37,10 @@ extern struct xattr_handler *ocfs2_xattr_handlers[]; ssize_t ocfs2_listxattr(struct dentry *, char *, size_t); int ocfs2_xattr_set(struct inode *, int, const char *, const void *, size_t, int); +int ocfs2_xattr_set_handle(handle_t *, struct inode *, struct buffer_head *, + int, const char *, const void *, size_t, int, + struct ocfs2_alloc_context *, + struct ocfs2_alloc_context *); int ocfs2_xattr_remove(struct inode *, struct buffer_head *); #endif /* OCFS2_XATTR_H */ -- cgit v1.2.3-70-g09d2 From 923f7f3102b80403152e05aee3d55ecfce240440 Mon Sep 17 00:00:00 2001 From: Tiger Yang Date: Fri, 14 Nov 2008 11:16:27 +0800 Subject: ocfs2: add security xattr API This patch add security xattr set/get/list APIs to support security attributes in Ocfs2. Signed-off-by: Tiger Yang Signed-off-by: Mark Fasheh --- fs/ocfs2/xattr.c | 47 +++++++++++++++++++++++++++++++++++++++++++++++ fs/ocfs2/xattr.h | 1 + 2 files changed, 48 insertions(+) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 6480254fe39..db03162914c 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -35,6 +35,7 @@ #include #include #include +#include #define MLOG_MASK_PREFIX ML_XATTR #include @@ -88,12 +89,14 @@ static struct ocfs2_xattr_def_value_root def_xv = { struct xattr_handler *ocfs2_xattr_handlers[] = { &ocfs2_xattr_user_handler, &ocfs2_xattr_trusted_handler, + &ocfs2_xattr_security_handler, NULL }; static struct xattr_handler *ocfs2_xattr_handler_map[OCFS2_XATTR_MAX] = { [OCFS2_XATTR_INDEX_USER] = &ocfs2_xattr_user_handler, [OCFS2_XATTR_INDEX_TRUSTED] = &ocfs2_xattr_trusted_handler, + [OCFS2_XATTR_INDEX_SECURITY] = &ocfs2_xattr_security_handler, }; struct ocfs2_xattr_info { @@ -4976,6 +4979,50 @@ out: return ret; } +/* + * 'security' attributes support + */ +static size_t ocfs2_xattr_security_list(struct inode *inode, char *list, + size_t list_size, const char *name, + size_t name_len) +{ + const size_t prefix_len = XATTR_SECURITY_PREFIX_LEN; + const size_t total_len = prefix_len + name_len + 1; + + if (list && total_len <= list_size) { + memcpy(list, XATTR_SECURITY_PREFIX, prefix_len); + memcpy(list + prefix_len, name, name_len); + list[prefix_len + name_len] = '\0'; + } + return total_len; +} + +static int ocfs2_xattr_security_get(struct inode *inode, const char *name, + void *buffer, size_t size) +{ + if (strcmp(name, "") == 0) + return -EINVAL; + return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_SECURITY, name, + buffer, size); +} + +static int ocfs2_xattr_security_set(struct inode *inode, const char *name, + const void *value, size_t size, int flags) +{ + if (strcmp(name, "") == 0) + return -EINVAL; + + return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_SECURITY, name, value, + size, flags); +} + +struct xattr_handler ocfs2_xattr_security_handler = { + .prefix = XATTR_SECURITY_PREFIX, + .list = ocfs2_xattr_security_list, + .get = ocfs2_xattr_security_get, + .set = ocfs2_xattr_security_set, +}; + /* * 'trusted' attributes support */ diff --git a/fs/ocfs2/xattr.h b/fs/ocfs2/xattr.h index 8fbdc163c83..55c5256ff56 100644 --- a/fs/ocfs2/xattr.h +++ b/fs/ocfs2/xattr.h @@ -32,6 +32,7 @@ enum ocfs2_xattr_type { extern struct xattr_handler ocfs2_xattr_user_handler; extern struct xattr_handler ocfs2_xattr_trusted_handler; +extern struct xattr_handler ocfs2_xattr_security_handler; extern struct xattr_handler *ocfs2_xattr_handlers[]; ssize_t ocfs2_listxattr(struct dentry *, char *, size_t); -- cgit v1.2.3-70-g09d2 From 534eadddc1de8754a227202c0e747af4973f82ce Mon Sep 17 00:00:00 2001 From: Tiger Yang Date: Fri, 14 Nov 2008 11:16:41 +0800 Subject: ocfs2: add ocfs2_init_security in during file create Security attributes must be set when creating a new inode. We do this in three steps. - First, get security xattr's name and value by security_operation - Calculate and reserve the meta data and clusters needed by this security xattr before starting transaction - Finally, we set it before add_entry Signed-off-by: Tiger Yang Signed-off-by: Mark Fasheh --- fs/ocfs2/namei.c | 107 ++++++++++++++++++++++++++++++++++++++++++++++++------- fs/ocfs2/xattr.c | 70 ++++++++++++++++++++++++++++++++++++ fs/ocfs2/xattr.h | 17 +++++++++ 3 files changed, 182 insertions(+), 12 deletions(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index e8ff0bae179..40da46b907f 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -229,6 +229,12 @@ static int ocfs2_mknod(struct inode *dir, struct inode *inode = NULL; struct ocfs2_alloc_context *inode_ac = NULL; struct ocfs2_alloc_context *data_ac = NULL; + struct ocfs2_alloc_context *xattr_ac = NULL; + int want_clusters = 0; + int xattr_credits = 0; + struct ocfs2_security_xattr_info si = { + .enable = 1, + }; mlog_entry("(0x%p, 0x%p, %d, %lu, '%.*s')\n", dir, dentry, mode, (unsigned long)dev, dentry->d_name.len, @@ -285,17 +291,39 @@ static int ocfs2_mknod(struct inode *dir, goto leave; } - /* Reserve a cluster if creating an extent based directory. */ - if (S_ISDIR(mode) && !ocfs2_supports_inline_data(osb)) { - status = ocfs2_reserve_clusters(osb, 1, &data_ac); + /* get security xattr */ + status = ocfs2_init_security_get(inode, dir, &si); + if (status) { + if (status == -EOPNOTSUPP) + si.enable = 0; + else { + mlog_errno(status); + goto leave; + } + } + + /* calculate meta data/clusters for setting security xattr */ + if (si.enable) { + status = ocfs2_calc_security_init(dir, &si, &want_clusters, + &xattr_credits, &xattr_ac); if (status < 0) { - if (status != -ENOSPC) - mlog_errno(status); + mlog_errno(status); goto leave; } } - handle = ocfs2_start_trans(osb, OCFS2_MKNOD_CREDITS); + /* Reserve a cluster if creating an extent based directory. */ + if (S_ISDIR(mode) && !ocfs2_supports_inline_data(osb)) + want_clusters += 1; + + status = ocfs2_reserve_clusters(osb, want_clusters, &data_ac); + if (status < 0) { + if (status != -ENOSPC) + mlog_errno(status); + goto leave; + } + + handle = ocfs2_start_trans(osb, OCFS2_MKNOD_CREDITS + xattr_credits); if (IS_ERR(handle)) { status = PTR_ERR(handle); handle = NULL; @@ -335,6 +363,15 @@ static int ocfs2_mknod(struct inode *dir, inc_nlink(dir); } + if (si.enable) { + status = ocfs2_init_security_set(handle, inode, new_fe_bh, &si, + xattr_ac, data_ac); + if (status < 0) { + mlog_errno(status); + goto leave; + } + } + status = ocfs2_add_entry(handle, dentry, inode, OCFS2_I(inode)->ip_blkno, parent_fe_bh, de_bh); @@ -366,6 +403,8 @@ leave: brelse(new_fe_bh); brelse(de_bh); brelse(parent_fe_bh); + kfree(si.name); + kfree(si.value); if ((status < 0) && inode) { clear_nlink(inode); @@ -378,6 +417,9 @@ leave: if (data_ac) ocfs2_free_alloc_context(data_ac); + if (xattr_ac) + ocfs2_free_alloc_context(xattr_ac); + mlog_exit(status); return status; @@ -1508,6 +1550,12 @@ static int ocfs2_symlink(struct inode *dir, handle_t *handle = NULL; struct ocfs2_alloc_context *inode_ac = NULL; struct ocfs2_alloc_context *data_ac = NULL; + struct ocfs2_alloc_context *xattr_ac = NULL; + int want_clusters = 0; + int xattr_credits = 0; + struct ocfs2_security_xattr_info si = { + .enable = 1, + }; mlog_entry("(0x%p, 0x%p, symname='%s' actual='%.*s')\n", dir, dentry, symname, dentry->d_name.len, dentry->d_name.name); @@ -1561,17 +1609,39 @@ static int ocfs2_symlink(struct inode *dir, goto bail; } - /* don't reserve bitmap space for fast symlinks. */ - if (l > ocfs2_fast_symlink_chars(sb)) { - status = ocfs2_reserve_clusters(osb, 1, &data_ac); + /* get security xattr */ + status = ocfs2_init_security_get(inode, dir, &si); + if (status) { + if (status == -EOPNOTSUPP) + si.enable = 0; + else { + mlog_errno(status); + goto bail; + } + } + + /* calculate meta data/clusters for setting security xattr */ + if (si.enable) { + status = ocfs2_calc_security_init(dir, &si, &want_clusters, + &xattr_credits, &xattr_ac); if (status < 0) { - if (status != -ENOSPC) - mlog_errno(status); + mlog_errno(status); goto bail; } } - handle = ocfs2_start_trans(osb, credits); + /* don't reserve bitmap space for fast symlinks. */ + if (l > ocfs2_fast_symlink_chars(sb)) + want_clusters += 1; + + status = ocfs2_reserve_clusters(osb, want_clusters, &data_ac); + if (status < 0) { + if (status != -ENOSPC) + mlog_errno(status); + goto bail; + } + + handle = ocfs2_start_trans(osb, credits + xattr_credits); if (IS_ERR(handle)) { status = PTR_ERR(handle); handle = NULL; @@ -1632,6 +1702,15 @@ static int ocfs2_symlink(struct inode *dir, } } + if (si.enable) { + status = ocfs2_init_security_set(handle, inode, new_fe_bh, &si, + xattr_ac, data_ac); + if (status < 0) { + mlog_errno(status); + goto bail; + } + } + status = ocfs2_add_entry(handle, dentry, inode, le64_to_cpu(fe->i_blkno), parent_fe_bh, de_bh); @@ -1658,10 +1737,14 @@ bail: brelse(new_fe_bh); brelse(parent_fe_bh); brelse(de_bh); + kfree(si.name); + kfree(si.value); if (inode_ac) ocfs2_free_alloc_context(inode_ac); if (data_ac) ocfs2_free_alloc_context(data_ac); + if (xattr_ac) + ocfs2_free_alloc_context(xattr_ac); if ((status < 0) && inode) { clear_nlink(inode); iput(inode); diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index db03162914c..2cab0d6615f 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -81,6 +81,9 @@ struct ocfs2_xattr_set_ctxt { #define OCFS2_XATTR_ROOT_SIZE (sizeof(struct ocfs2_xattr_def_value_root)) #define OCFS2_XATTR_INLINE_SIZE 80 +#define OCFS2_XATTR_FREE_IN_IBODY (OCFS2_MIN_XATTR_INLINE_SIZE \ + - sizeof(struct ocfs2_xattr_header) \ + - sizeof(__u32)) static struct ocfs2_xattr_def_value_root def_xv = { .xv.xr_list.l_count = cpu_to_le16(1), @@ -343,6 +346,52 @@ static void ocfs2_xattr_hash_entry(struct inode *inode, return; } +static int ocfs2_xattr_entry_real_size(int name_len, size_t value_len) +{ + int size = 0; + + if (value_len <= OCFS2_XATTR_INLINE_SIZE) + size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_SIZE(value_len); + else + size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE; + size += sizeof(struct ocfs2_xattr_entry); + + return size; +} + +int ocfs2_calc_security_init(struct inode *dir, + struct ocfs2_security_xattr_info *si, + int *want_clusters, + int *xattr_credits, + struct ocfs2_alloc_context **xattr_ac) +{ + int ret = 0; + struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); + int s_size = ocfs2_xattr_entry_real_size(strlen(si->name), + si->value_len); + + /* + * The max space of security xattr taken inline is + * 256(name) + 80(value) + 16(entry) = 352 bytes, + * So reserve one metadata block for it is ok. + */ + if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE || + s_size > OCFS2_XATTR_FREE_IN_IBODY) { + ret = ocfs2_reserve_new_metadata_blocks(osb, 1, xattr_ac); + if (ret) { + mlog_errno(ret); + return ret; + } + *xattr_credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS; + } + + /* reserve clusters for xattr value which will be set in B tree*/ + if (si->value_len > OCFS2_XATTR_INLINE_SIZE) + *want_clusters += ocfs2_clusters_for_bytes(dir->i_sb, + si->value_len); + return ret; +} + static int ocfs2_xattr_extend_allocation(struct inode *inode, u32 clusters_to_add, struct buffer_head *xattr_bh, @@ -5016,6 +5065,27 @@ static int ocfs2_xattr_security_set(struct inode *inode, const char *name, size, flags); } +int ocfs2_init_security_get(struct inode *inode, + struct inode *dir, + struct ocfs2_security_xattr_info *si) +{ + return security_inode_init_security(inode, dir, &si->name, &si->value, + &si->value_len); +} + +int ocfs2_init_security_set(handle_t *handle, + struct inode *inode, + struct buffer_head *di_bh, + struct ocfs2_security_xattr_info *si, + struct ocfs2_alloc_context *xattr_ac, + struct ocfs2_alloc_context *data_ac) +{ + return ocfs2_xattr_set_handle(handle, inode, di_bh, + OCFS2_XATTR_INDEX_SECURITY, + si->name, si->value, si->value_len, 0, + xattr_ac, data_ac); +} + struct xattr_handler ocfs2_xattr_security_handler = { .prefix = XATTR_SECURITY_PREFIX, .list = ocfs2_xattr_security_list, diff --git a/fs/ocfs2/xattr.h b/fs/ocfs2/xattr.h index 55c5256ff56..188ef6ba683 100644 --- a/fs/ocfs2/xattr.h +++ b/fs/ocfs2/xattr.h @@ -30,6 +30,13 @@ enum ocfs2_xattr_type { OCFS2_XATTR_MAX }; +struct ocfs2_security_xattr_info { + int enable; + char *name; + void *value; + size_t value_len; +}; + extern struct xattr_handler ocfs2_xattr_user_handler; extern struct xattr_handler ocfs2_xattr_trusted_handler; extern struct xattr_handler ocfs2_xattr_security_handler; @@ -43,5 +50,15 @@ int ocfs2_xattr_set_handle(handle_t *, struct inode *, struct buffer_head *, struct ocfs2_alloc_context *, struct ocfs2_alloc_context *); int ocfs2_xattr_remove(struct inode *, struct buffer_head *); +int ocfs2_init_security_get(struct inode *, struct inode *, + struct ocfs2_security_xattr_info *); +int ocfs2_init_security_set(handle_t *, struct inode *, + struct buffer_head *, + struct ocfs2_security_xattr_info *, + struct ocfs2_alloc_context *, + struct ocfs2_alloc_context *); +int ocfs2_calc_security_init(struct inode *, + struct ocfs2_security_xattr_info *, + int *, int *, struct ocfs2_alloc_context **); #endif /* OCFS2_XATTR_H */ -- cgit v1.2.3-70-g09d2 From 4e3e9d027f63488e676bf7700ec515a192e54f69 Mon Sep 17 00:00:00 2001 From: Tiger Yang Date: Fri, 14 Nov 2008 11:16:53 +0800 Subject: ocfs2: add ocfs2_xattr_get_nolock This function does the work of ocfs2_xattr_get under an open lock. Signed-off-by: Tiger Yang Signed-off-by: Mark Fasheh --- fs/ocfs2/xattr.c | 40 ++++++++++++++++++++++++++++------------ fs/ocfs2/xattr.h | 2 ++ 2 files changed, 30 insertions(+), 12 deletions(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 2cab0d6615f..ba9b870a5dd 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -925,12 +925,8 @@ cleanup: return ret; } -/* ocfs2_xattr_get() - * - * Copy an extended attribute into the buffer provided. - * Buffer is NULL to compute the size of buffer required. - */ -static int ocfs2_xattr_get(struct inode *inode, +int ocfs2_xattr_get_nolock(struct inode *inode, + struct buffer_head *di_bh, int name_index, const char *name, void *buffer, @@ -938,7 +934,6 @@ static int ocfs2_xattr_get(struct inode *inode, { int ret; struct ocfs2_dinode *di = NULL; - struct buffer_head *di_bh = NULL; struct ocfs2_inode_info *oi = OCFS2_I(inode); struct ocfs2_xattr_search xis = { .not_found = -ENODATA, @@ -953,11 +948,6 @@ static int ocfs2_xattr_get(struct inode *inode, if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) ret = -ENODATA; - ret = ocfs2_inode_lock(inode, &di_bh, 0); - if (ret < 0) { - mlog_errno(ret); - return ret; - } xis.inode_bh = xbs.inode_bh = di_bh; di = (struct ocfs2_dinode *)di_bh->b_data; @@ -968,6 +958,32 @@ static int ocfs2_xattr_get(struct inode *inode, ret = ocfs2_xattr_block_get(inode, name_index, name, buffer, buffer_size, &xbs); up_read(&oi->ip_xattr_sem); + + return ret; +} + +/* ocfs2_xattr_get() + * + * Copy an extended attribute into the buffer provided. + * Buffer is NULL to compute the size of buffer required. + */ +static int ocfs2_xattr_get(struct inode *inode, + int name_index, + const char *name, + void *buffer, + size_t buffer_size) +{ + int ret; + struct buffer_head *di_bh = NULL; + + ret = ocfs2_inode_lock(inode, &di_bh, 0); + if (ret < 0) { + mlog_errno(ret); + return ret; + } + ret = ocfs2_xattr_get_nolock(inode, di_bh, name_index, + name, buffer, buffer_size); + ocfs2_inode_unlock(inode, 0); brelse(di_bh); diff --git a/fs/ocfs2/xattr.h b/fs/ocfs2/xattr.h index 188ef6ba683..86aa10ffe3f 100644 --- a/fs/ocfs2/xattr.h +++ b/fs/ocfs2/xattr.h @@ -43,6 +43,8 @@ extern struct xattr_handler ocfs2_xattr_security_handler; extern struct xattr_handler *ocfs2_xattr_handlers[]; ssize_t ocfs2_listxattr(struct dentry *, char *, size_t); +int ocfs2_xattr_get_nolock(struct inode *, struct buffer_head *, int, + const char *, void *, size_t); int ocfs2_xattr_set(struct inode *, int, const char *, const void *, size_t, int); int ocfs2_xattr_set_handle(handle_t *, struct inode *, struct buffer_head *, -- cgit v1.2.3-70-g09d2 From 929fb014e041c6572c5e8c3686f1e32742b5b953 Mon Sep 17 00:00:00 2001 From: Tiger Yang Date: Fri, 14 Nov 2008 11:17:04 +0800 Subject: ocfs2: add POSIX ACL API This patch adds POSIX ACL(access control lists) APIs in ocfs2. We convert struct posix_acl to many ocfs2_acl_entry and regard them as an extended attribute entry. Signed-off-by: Tiger Yang Signed-off-by: Mark Fasheh --- fs/ocfs2/Makefile | 4 + fs/ocfs2/acl.c | 378 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ fs/ocfs2/acl.h | 29 +++++ fs/ocfs2/ocfs2.h | 1 + fs/ocfs2/xattr.c | 10 ++ fs/ocfs2/xattr.h | 4 + 6 files changed, 426 insertions(+) create mode 100644 fs/ocfs2/acl.c create mode 100644 fs/ocfs2/acl.h (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/Makefile b/fs/ocfs2/Makefile index 589dcdfdfe3..e9ef5d162db 100644 --- a/fs/ocfs2/Makefile +++ b/fs/ocfs2/Makefile @@ -37,6 +37,10 @@ ocfs2-objs := \ ver.o \ xattr.o +ifeq ($(CONFIG_OCFS2_FS_POSIX_ACL),y) +ocfs2-objs += acl.o +endif + ocfs2_stackglue-objs := stackglue.o ocfs2_stack_o2cb-objs := stack_o2cb.o ocfs2_stack_user-objs := stack_user.o diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c new file mode 100644 index 00000000000..62d0faad600 --- /dev/null +++ b/fs/ocfs2/acl.c @@ -0,0 +1,378 @@ +/* -*- mode: c; c-basic-offset: 8; -*- + * vim: noexpandtab sw=8 ts=8 sts=0: + * + * acl.c + * + * Copyright (C) 2004, 2008 Oracle. All rights reserved. + * + * CREDITS: + * Lots of code in this file is copy from linux/fs/ext3/acl.c. + * Copyright (C) 2001-2003 Andreas Gruenbacher, + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ + +#include +#include +#include + +#define MLOG_MASK_PREFIX ML_INODE +#include + +#include "ocfs2.h" +#include "alloc.h" +#include "dlmglue.h" +#include "file.h" +#include "ocfs2_fs.h" + +#include "xattr.h" +#include "acl.h" + +/* + * Convert from xattr value to acl struct. + */ +static struct posix_acl *ocfs2_acl_from_xattr(const void *value, size_t size) +{ + int n, count; + struct posix_acl *acl; + + if (!value) + return NULL; + if (size < sizeof(struct posix_acl_entry)) + return ERR_PTR(-EINVAL); + + count = size / sizeof(struct posix_acl_entry); + if (count < 0) + return ERR_PTR(-EINVAL); + if (count == 0) + return NULL; + + acl = posix_acl_alloc(count, GFP_NOFS); + if (!acl) + return ERR_PTR(-ENOMEM); + for (n = 0; n < count; n++) { + struct ocfs2_acl_entry *entry = + (struct ocfs2_acl_entry *)value; + + acl->a_entries[n].e_tag = le16_to_cpu(entry->e_tag); + acl->a_entries[n].e_perm = le16_to_cpu(entry->e_perm); + acl->a_entries[n].e_id = le32_to_cpu(entry->e_id); + value += sizeof(struct posix_acl_entry); + + } + return acl; +} + +/* + * Convert acl struct to xattr value. + */ +static void *ocfs2_acl_to_xattr(const struct posix_acl *acl, size_t *size) +{ + struct ocfs2_acl_entry *entry = NULL; + char *ocfs2_acl; + size_t n; + + *size = acl->a_count * sizeof(struct posix_acl_entry); + + ocfs2_acl = kmalloc(*size, GFP_NOFS); + if (!ocfs2_acl) + return ERR_PTR(-ENOMEM); + + entry = (struct ocfs2_acl_entry *)ocfs2_acl; + for (n = 0; n < acl->a_count; n++, entry++) { + entry->e_tag = cpu_to_le16(acl->a_entries[n].e_tag); + entry->e_perm = cpu_to_le16(acl->a_entries[n].e_perm); + entry->e_id = cpu_to_le32(acl->a_entries[n].e_id); + } + return ocfs2_acl; +} + +static struct posix_acl *ocfs2_get_acl_nolock(struct inode *inode, + int type, + struct buffer_head *di_bh) +{ + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); + int name_index; + char *value = NULL; + struct posix_acl *acl; + int retval; + + if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL)) + return NULL; + + switch (type) { + case ACL_TYPE_ACCESS: + name_index = OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS; + break; + case ACL_TYPE_DEFAULT: + name_index = OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT; + break; + default: + return ERR_PTR(-EINVAL); + } + + retval = ocfs2_xattr_get_nolock(inode, di_bh, name_index, "", NULL, 0); + if (retval > 0) { + value = kmalloc(retval, GFP_NOFS); + if (!value) + return ERR_PTR(-ENOMEM); + retval = ocfs2_xattr_get_nolock(inode, di_bh, name_index, + "", value, retval); + } + + if (retval > 0) + acl = ocfs2_acl_from_xattr(value, retval); + else if (retval == -ENODATA || retval == 0) + acl = NULL; + else + acl = ERR_PTR(retval); + + kfree(value); + + return acl; +} + + +/* + * Get posix acl. + */ +static struct posix_acl *ocfs2_get_acl(struct inode *inode, int type) +{ + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); + struct buffer_head *di_bh = NULL; + struct posix_acl *acl; + int ret; + + if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL)) + return NULL; + + ret = ocfs2_inode_lock(inode, &di_bh, 0); + if (ret < 0) { + mlog_errno(ret); + acl = ERR_PTR(ret); + return acl; + } + + acl = ocfs2_get_acl_nolock(inode, type, di_bh); + + ocfs2_inode_unlock(inode, 0); + + brelse(di_bh); + + return acl; +} + +/* + * Set the access or default ACL of an inode. + */ +static int ocfs2_set_acl(handle_t *handle, + struct inode *inode, + struct buffer_head *di_bh, + int type, + struct posix_acl *acl, + struct ocfs2_alloc_context *meta_ac, + struct ocfs2_alloc_context *data_ac) +{ + int name_index; + void *value = NULL; + size_t size = 0; + int ret; + + if (S_ISLNK(inode->i_mode)) + return -EOPNOTSUPP; + + switch (type) { + case ACL_TYPE_ACCESS: + name_index = OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS; + if (acl) { + mode_t mode = inode->i_mode; + ret = posix_acl_equiv_mode(acl, &mode); + if (ret < 0) + return ret; + else { + inode->i_mode = mode; + if (ret == 0) + acl = NULL; + } + } + break; + case ACL_TYPE_DEFAULT: + name_index = OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT; + if (!S_ISDIR(inode->i_mode)) + return acl ? -EACCES : 0; + break; + default: + return -EINVAL; + } + + if (acl) { + value = ocfs2_acl_to_xattr(acl, &size); + if (IS_ERR(value)) + return (int)PTR_ERR(value); + } + + if (handle) + ret = ocfs2_xattr_set_handle(handle, inode, di_bh, name_index, + "", value, size, 0, + meta_ac, data_ac); + else + ret = ocfs2_xattr_set(inode, name_index, "", value, size, 0); + + kfree(value); + + return ret; +} + +static size_t ocfs2_xattr_list_acl_access(struct inode *inode, + char *list, + size_t list_len, + const char *name, + size_t name_len) +{ + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); + const size_t size = sizeof(POSIX_ACL_XATTR_ACCESS); + + if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL)) + return 0; + + if (list && size <= list_len) + memcpy(list, POSIX_ACL_XATTR_ACCESS, size); + return size; +} + +static size_t ocfs2_xattr_list_acl_default(struct inode *inode, + char *list, + size_t list_len, + const char *name, + size_t name_len) +{ + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); + const size_t size = sizeof(POSIX_ACL_XATTR_DEFAULT); + + if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL)) + return 0; + + if (list && size <= list_len) + memcpy(list, POSIX_ACL_XATTR_DEFAULT, size); + return size; +} + +static int ocfs2_xattr_get_acl(struct inode *inode, + int type, + void *buffer, + size_t size) +{ + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); + struct posix_acl *acl; + int ret; + + if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL)) + return -EOPNOTSUPP; + + acl = ocfs2_get_acl(inode, type); + if (IS_ERR(acl)) + return PTR_ERR(acl); + if (acl == NULL) + return -ENODATA; + ret = posix_acl_to_xattr(acl, buffer, size); + posix_acl_release(acl); + + return ret; +} + +static int ocfs2_xattr_get_acl_access(struct inode *inode, + const char *name, + void *buffer, + size_t size) +{ + if (strcmp(name, "") != 0) + return -EINVAL; + return ocfs2_xattr_get_acl(inode, ACL_TYPE_ACCESS, buffer, size); +} + +static int ocfs2_xattr_get_acl_default(struct inode *inode, + const char *name, + void *buffer, + size_t size) +{ + if (strcmp(name, "") != 0) + return -EINVAL; + return ocfs2_xattr_get_acl(inode, ACL_TYPE_DEFAULT, buffer, size); +} + +static int ocfs2_xattr_set_acl(struct inode *inode, + int type, + const void *value, + size_t size) +{ + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); + struct posix_acl *acl; + int ret = 0; + + if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL)) + return -EOPNOTSUPP; + + if (!is_owner_or_cap(inode)) + return -EPERM; + + if (value) { + acl = posix_acl_from_xattr(value, size); + if (IS_ERR(acl)) + return PTR_ERR(acl); + else if (acl) { + ret = posix_acl_valid(acl); + if (ret) + goto cleanup; + } + } else + acl = NULL; + + ret = ocfs2_set_acl(NULL, inode, NULL, type, acl, NULL, NULL); + +cleanup: + posix_acl_release(acl); + return ret; +} + +static int ocfs2_xattr_set_acl_access(struct inode *inode, + const char *name, + const void *value, + size_t size, + int flags) +{ + if (strcmp(name, "") != 0) + return -EINVAL; + return ocfs2_xattr_set_acl(inode, ACL_TYPE_ACCESS, value, size); +} + +static int ocfs2_xattr_set_acl_default(struct inode *inode, + const char *name, + const void *value, + size_t size, + int flags) +{ + if (strcmp(name, "") != 0) + return -EINVAL; + return ocfs2_xattr_set_acl(inode, ACL_TYPE_DEFAULT, value, size); +} + +struct xattr_handler ocfs2_xattr_acl_access_handler = { + .prefix = POSIX_ACL_XATTR_ACCESS, + .list = ocfs2_xattr_list_acl_access, + .get = ocfs2_xattr_get_acl_access, + .set = ocfs2_xattr_set_acl_access, +}; + +struct xattr_handler ocfs2_xattr_acl_default_handler = { + .prefix = POSIX_ACL_XATTR_DEFAULT, + .list = ocfs2_xattr_list_acl_default, + .get = ocfs2_xattr_get_acl_default, + .set = ocfs2_xattr_set_acl_default, +}; diff --git a/fs/ocfs2/acl.h b/fs/ocfs2/acl.h new file mode 100644 index 00000000000..1b39f3e14c1 --- /dev/null +++ b/fs/ocfs2/acl.h @@ -0,0 +1,29 @@ +/* -*- mode: c; c-basic-offset: 8; -*- + * vim: noexpandtab sw=8 ts=8 sts=0: + * + * acl.h + * + * Copyright (C) 2004, 2008 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ + +#ifndef OCFS2_ACL_H +#define OCFS2_ACL_H + +#include + +struct ocfs2_acl_entry { + __le16 e_tag; + __le16 e_perm; + __le32 e_id; +}; + +#endif /* OCFS2_ACL_H */ diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index 3fed9e3d899..25d07ff1d3c 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h @@ -195,6 +195,7 @@ enum ocfs2_mount_options OCFS2_MOUNT_LOCALFLOCKS = 1 << 5, /* No cluster aware user file locks */ OCFS2_MOUNT_NOUSERXATTR = 1 << 6, /* No user xattr */ OCFS2_MOUNT_INODE64 = 1 << 7, /* Allow inode numbers > 2^32 */ + OCFS2_MOUNT_POSIX_ACL = 1 << 8, /* POSIX access control lists */ }; #define OCFS2_OSB_SOFT_RO 0x0001 diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index ba9b870a5dd..2e273c2cb83 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -91,6 +91,10 @@ static struct ocfs2_xattr_def_value_root def_xv = { struct xattr_handler *ocfs2_xattr_handlers[] = { &ocfs2_xattr_user_handler, +#ifdef CONFIG_OCFS2_FS_POSIX_ACL + &ocfs2_xattr_acl_access_handler, + &ocfs2_xattr_acl_default_handler, +#endif &ocfs2_xattr_trusted_handler, &ocfs2_xattr_security_handler, NULL @@ -98,6 +102,12 @@ struct xattr_handler *ocfs2_xattr_handlers[] = { static struct xattr_handler *ocfs2_xattr_handler_map[OCFS2_XATTR_MAX] = { [OCFS2_XATTR_INDEX_USER] = &ocfs2_xattr_user_handler, +#ifdef CONFIG_OCFS2_FS_POSIX_ACL + [OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS] + = &ocfs2_xattr_acl_access_handler, + [OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT] + = &ocfs2_xattr_acl_default_handler, +#endif [OCFS2_XATTR_INDEX_TRUSTED] = &ocfs2_xattr_trusted_handler, [OCFS2_XATTR_INDEX_SECURITY] = &ocfs2_xattr_security_handler, }; diff --git a/fs/ocfs2/xattr.h b/fs/ocfs2/xattr.h index 86aa10ffe3f..6163df336d8 100644 --- a/fs/ocfs2/xattr.h +++ b/fs/ocfs2/xattr.h @@ -40,6 +40,10 @@ struct ocfs2_security_xattr_info { extern struct xattr_handler ocfs2_xattr_user_handler; extern struct xattr_handler ocfs2_xattr_trusted_handler; extern struct xattr_handler ocfs2_xattr_security_handler; +#ifdef CONFIG_OCFS2_FS_POSIX_ACL +extern struct xattr_handler ocfs2_xattr_acl_access_handler; +extern struct xattr_handler ocfs2_xattr_acl_default_handler; +#endif extern struct xattr_handler *ocfs2_xattr_handlers[]; ssize_t ocfs2_listxattr(struct dentry *, char *, size_t); -- cgit v1.2.3-70-g09d2 From 23fc2702bea686569281708ad519b41a11d0a2f4 Mon Sep 17 00:00:00 2001 From: Tiger Yang Date: Fri, 14 Nov 2008 11:17:18 +0800 Subject: ocfs2: add ocfs2_check_acl This function is used to enhance permission checking with POSIX ACLs. Signed-off-by: Tiger Yang Signed-off-by: Mark Fasheh --- fs/ocfs2/acl.c | 15 +++++++++++++++ fs/ocfs2/acl.h | 10 ++++++++++ fs/ocfs2/file.c | 3 ++- 3 files changed, 27 insertions(+), 1 deletion(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c index 62d0faad600..a6a2bf6d684 100644 --- a/fs/ocfs2/acl.c +++ b/fs/ocfs2/acl.c @@ -230,6 +230,21 @@ static int ocfs2_set_acl(handle_t *handle, return ret; } +int ocfs2_check_acl(struct inode *inode, int mask) +{ + struct posix_acl *acl = ocfs2_get_acl(inode, ACL_TYPE_ACCESS); + + if (IS_ERR(acl)) + return PTR_ERR(acl); + if (acl) { + int ret = posix_acl_permission(inode, acl, mask); + posix_acl_release(acl); + return ret; + } + + return -EAGAIN; +} + static size_t ocfs2_xattr_list_acl_access(struct inode *inode, char *list, size_t list_len, diff --git a/fs/ocfs2/acl.h b/fs/ocfs2/acl.h index 1b39f3e14c1..fef10f1b782 100644 --- a/fs/ocfs2/acl.h +++ b/fs/ocfs2/acl.h @@ -26,4 +26,14 @@ struct ocfs2_acl_entry { __le32 e_id; }; +#ifdef CONFIG_OCFS2_FS_POSIX_ACL + +extern int ocfs2_check_acl(struct inode *, int); + +#else /* CONFIG_OCFS2_FS_POSIX_ACL*/ + +#define ocfs2_check_acl NULL + +#endif /* CONFIG_OCFS2_FS_POSIX_ACL*/ + #endif /* OCFS2_ACL_H */ diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 360549161e2..7bad7d9b9a2 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -56,6 +56,7 @@ #include "suballoc.h" #include "super.h" #include "xattr.h" +#include "acl.h" #include "buffer_head_io.h" @@ -1035,7 +1036,7 @@ int ocfs2_permission(struct inode *inode, int mask) goto out; } - ret = generic_permission(inode, mask, NULL); + ret = generic_permission(inode, mask, ocfs2_check_acl); ocfs2_inode_unlock(inode, 0); out: -- cgit v1.2.3-70-g09d2 From 060bc66dd5017460076d9e808e2198cd532c943d Mon Sep 17 00:00:00 2001 From: Tiger Yang Date: Fri, 14 Nov 2008 11:17:29 +0800 Subject: ocfs2: add ocfs2_acl_chmod This function is used to update acl xattrs during file mode changes. Signed-off-by: Tiger Yang Signed-off-by: Mark Fasheh --- fs/ocfs2/acl.c | 27 +++++++++++++++++++++++++++ fs/ocfs2/acl.h | 5 +++++ fs/ocfs2/file.c | 6 ++++++ 3 files changed, 38 insertions(+) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c index a6a2bf6d684..df72256c442 100644 --- a/fs/ocfs2/acl.c +++ b/fs/ocfs2/acl.c @@ -245,6 +245,33 @@ int ocfs2_check_acl(struct inode *inode, int mask) return -EAGAIN; } +int ocfs2_acl_chmod(struct inode *inode) +{ + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); + struct posix_acl *acl, *clone; + int ret; + + if (S_ISLNK(inode->i_mode)) + return -EOPNOTSUPP; + + if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL)) + return 0; + + acl = ocfs2_get_acl(inode, ACL_TYPE_ACCESS); + if (IS_ERR(acl) || !acl) + return PTR_ERR(acl); + clone = posix_acl_clone(acl, GFP_KERNEL); + posix_acl_release(acl); + if (!clone) + return -ENOMEM; + ret = posix_acl_chmod_masq(clone, inode->i_mode); + if (!ret) + ret = ocfs2_set_acl(NULL, inode, NULL, ACL_TYPE_ACCESS, + clone, NULL, NULL); + posix_acl_release(clone); + return ret; +} + static size_t ocfs2_xattr_list_acl_access(struct inode *inode, char *list, size_t list_len, diff --git a/fs/ocfs2/acl.h b/fs/ocfs2/acl.h index fef10f1b782..68ffd6436c5 100644 --- a/fs/ocfs2/acl.h +++ b/fs/ocfs2/acl.h @@ -29,10 +29,15 @@ struct ocfs2_acl_entry { #ifdef CONFIG_OCFS2_FS_POSIX_ACL extern int ocfs2_check_acl(struct inode *, int); +extern int ocfs2_acl_chmod(struct inode *); #else /* CONFIG_OCFS2_FS_POSIX_ACL*/ #define ocfs2_check_acl NULL +static inline int ocfs2_acl_chmod(struct inode *inode) +{ + return 0; +} #endif /* CONFIG_OCFS2_FS_POSIX_ACL*/ diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 7bad7d9b9a2..4636aa6b011 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -990,6 +990,12 @@ bail_unlock_rw: bail: brelse(bh); + if (!status && attr->ia_valid & ATTR_MODE) { + status = ocfs2_acl_chmod(inode); + if (status < 0) + mlog_errno(status); + } + mlog_exit(status); return status; } -- cgit v1.2.3-70-g09d2 From 89c38bd0ade3c567707ed8fce088b253b0369c50 Mon Sep 17 00:00:00 2001 From: Tiger Yang Date: Fri, 14 Nov 2008 11:17:41 +0800 Subject: ocfs2: add ocfs2_init_acl in mknod We need to get the parent directories acls and let the new child inherit it. To this, we add additional calculations for data/metadata allocation. Signed-off-by: Tiger Yang Signed-off-by: Mark Fasheh --- fs/ocfs2/acl.c | 59 ++++++++++++++++++++++++++++++++++++++++++ fs/ocfs2/acl.h | 14 ++++++++++ fs/ocfs2/namei.c | 23 +++++++++++------ fs/ocfs2/xattr.c | 79 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ fs/ocfs2/xattr.h | 3 +++ 5 files changed, 170 insertions(+), 8 deletions(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c index df72256c442..12dfb44c22e 100644 --- a/fs/ocfs2/acl.c +++ b/fs/ocfs2/acl.c @@ -272,6 +272,65 @@ int ocfs2_acl_chmod(struct inode *inode) return ret; } +/* + * Initialize the ACLs of a new inode. If parent directory has default ACL, + * then clone to new inode. Called from ocfs2_mknod. + */ +int ocfs2_init_acl(handle_t *handle, + struct inode *inode, + struct inode *dir, + struct buffer_head *di_bh, + struct buffer_head *dir_bh, + struct ocfs2_alloc_context *meta_ac, + struct ocfs2_alloc_context *data_ac) +{ + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); + struct posix_acl *acl = NULL; + int ret = 0; + + if (!S_ISLNK(inode->i_mode)) { + if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) { + acl = ocfs2_get_acl_nolock(dir, ACL_TYPE_DEFAULT, + dir_bh); + if (IS_ERR(acl)) + return PTR_ERR(acl); + } + if (!acl) + inode->i_mode &= ~current->fs->umask; + } + if ((osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) && acl) { + struct posix_acl *clone; + mode_t mode; + + if (S_ISDIR(inode->i_mode)) { + ret = ocfs2_set_acl(handle, inode, di_bh, + ACL_TYPE_DEFAULT, acl, + meta_ac, data_ac); + if (ret) + goto cleanup; + } + clone = posix_acl_clone(acl, GFP_NOFS); + ret = -ENOMEM; + if (!clone) + goto cleanup; + + mode = inode->i_mode; + ret = posix_acl_create_masq(clone, &mode); + if (ret >= 0) { + inode->i_mode = mode; + if (ret > 0) { + ret = ocfs2_set_acl(handle, inode, + di_bh, ACL_TYPE_ACCESS, + clone, meta_ac, data_ac); + } + } + posix_acl_release(clone); + } +cleanup: + posix_acl_release(acl); + return ret; +} + static size_t ocfs2_xattr_list_acl_access(struct inode *inode, char *list, size_t list_len, diff --git a/fs/ocfs2/acl.h b/fs/ocfs2/acl.h index 68ffd6436c5..8f6389ed4da 100644 --- a/fs/ocfs2/acl.h +++ b/fs/ocfs2/acl.h @@ -30,6 +30,10 @@ struct ocfs2_acl_entry { extern int ocfs2_check_acl(struct inode *, int); extern int ocfs2_acl_chmod(struct inode *); +extern int ocfs2_init_acl(handle_t *, struct inode *, struct inode *, + struct buffer_head *, struct buffer_head *, + struct ocfs2_alloc_context *, + struct ocfs2_alloc_context *); #else /* CONFIG_OCFS2_FS_POSIX_ACL*/ @@ -38,6 +42,16 @@ static inline int ocfs2_acl_chmod(struct inode *inode) { return 0; } +static inline int ocfs2_init_acl(handle_t *handle, + struct inode *inode, + struct inode *dir, + struct buffer_head *di_bh, + struct buffer_head *dir_bh, + struct ocfs2_alloc_context *meta_ac, + struct ocfs2_alloc_context *data_ac) +{ + return 0; +} #endif /* CONFIG_OCFS2_FS_POSIX_ACL*/ diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index 40da46b907f..76551451209 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -61,6 +61,7 @@ #include "sysfile.h" #include "uptodate.h" #include "xattr.h" +#include "acl.h" #include "buffer_head_io.h" @@ -302,14 +303,13 @@ static int ocfs2_mknod(struct inode *dir, } } - /* calculate meta data/clusters for setting security xattr */ - if (si.enable) { - status = ocfs2_calc_security_init(dir, &si, &want_clusters, - &xattr_credits, &xattr_ac); - if (status < 0) { - mlog_errno(status); - goto leave; - } + /* calculate meta data/clusters for setting security and acl xattr */ + status = ocfs2_calc_xattr_init(dir, parent_fe_bh, mode, + &si, &want_clusters, + &xattr_credits, &xattr_ac); + if (status < 0) { + mlog_errno(status); + goto leave; } /* Reserve a cluster if creating an extent based directory. */ @@ -363,6 +363,13 @@ static int ocfs2_mknod(struct inode *dir, inc_nlink(dir); } + status = ocfs2_init_acl(handle, inode, dir, new_fe_bh, parent_fe_bh, + xattr_ac, data_ac); + if (status < 0) { + mlog_errno(status); + goto leave; + } + if (si.enable) { status = ocfs2_init_security_set(handle, inode, new_fe_bh, &si, xattr_ac, data_ac); diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 2e273c2cb83..3cc8385f973 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -84,6 +84,10 @@ struct ocfs2_xattr_set_ctxt { #define OCFS2_XATTR_FREE_IN_IBODY (OCFS2_MIN_XATTR_INLINE_SIZE \ - sizeof(struct ocfs2_xattr_header) \ - sizeof(__u32)) +#define OCFS2_XATTR_FREE_IN_BLOCK(ptr) ((ptr)->i_sb->s_blocksize \ + - sizeof(struct ocfs2_xattr_block) \ + - sizeof(struct ocfs2_xattr_header) \ + - sizeof(__u32)) static struct ocfs2_xattr_def_value_root def_xv = { .xv.xr_list.l_count = cpu_to_le16(1), @@ -402,6 +406,81 @@ int ocfs2_calc_security_init(struct inode *dir, return ret; } +int ocfs2_calc_xattr_init(struct inode *dir, + struct buffer_head *dir_bh, + int mode, + struct ocfs2_security_xattr_info *si, + int *want_clusters, + int *xattr_credits, + struct ocfs2_alloc_context **xattr_ac) +{ + int ret = 0; + struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); + int s_size = 0; + int a_size = 0; + int acl_len = 0; + + if (si->enable) + s_size = ocfs2_xattr_entry_real_size(strlen(si->name), + si->value_len); + + if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) { + acl_len = ocfs2_xattr_get_nolock(dir, dir_bh, + OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT, + "", NULL, 0); + if (acl_len > 0) { + a_size = ocfs2_xattr_entry_real_size(0, acl_len); + if (S_ISDIR(mode)) + a_size <<= 1; + } else if (acl_len != 0 && acl_len != -ENODATA) { + mlog_errno(ret); + return ret; + } + } + + if (!(s_size + a_size)) + return ret; + + /* + * The max space of security xattr taken inline is + * 256(name) + 80(value) + 16(entry) = 352 bytes, + * The max space of acl xattr taken inline is + * 80(value) + 16(entry) * 2(if directory) = 192 bytes, + * when blocksize = 512, may reserve one more cluser for + * xattr bucket, otherwise reserve one metadata block + * for them is ok. + */ + if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE || + (s_size + a_size) > OCFS2_XATTR_FREE_IN_IBODY) { + ret = ocfs2_reserve_new_metadata_blocks(osb, 1, xattr_ac); + if (ret) { + mlog_errno(ret); + return ret; + } + *xattr_credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS; + } + + if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE && + (s_size + a_size) > OCFS2_XATTR_FREE_IN_BLOCK(dir)) { + *want_clusters += 1; + *xattr_credits += ocfs2_blocks_per_xattr_bucket(dir->i_sb); + } + + /* reserve clusters for xattr value which will be set in B tree*/ + if (si->enable && si->value_len > OCFS2_XATTR_INLINE_SIZE) + *want_clusters += ocfs2_clusters_for_bytes(dir->i_sb, + si->value_len); + if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL && + acl_len > OCFS2_XATTR_INLINE_SIZE) { + *want_clusters += ocfs2_clusters_for_bytes(dir->i_sb, acl_len); + if (S_ISDIR(mode)) + *want_clusters += ocfs2_clusters_for_bytes(dir->i_sb, + acl_len); + } + + return ret; +} + static int ocfs2_xattr_extend_allocation(struct inode *inode, u32 clusters_to_add, struct buffer_head *xattr_bh, diff --git a/fs/ocfs2/xattr.h b/fs/ocfs2/xattr.h index 6163df336d8..9a67e7d8f81 100644 --- a/fs/ocfs2/xattr.h +++ b/fs/ocfs2/xattr.h @@ -66,5 +66,8 @@ int ocfs2_init_security_set(handle_t *, struct inode *, int ocfs2_calc_security_init(struct inode *, struct ocfs2_security_xattr_info *, int *, int *, struct ocfs2_alloc_context **); +int ocfs2_calc_xattr_init(struct inode *, struct buffer_head *, + int, struct ocfs2_security_xattr_info *, + int *, int *, struct ocfs2_alloc_context **); #endif /* OCFS2_XATTR_H */ -- cgit v1.2.3-70-g09d2 From a68979b857283daf4acc405e476dcc8812a3ff2b Mon Sep 17 00:00:00 2001 From: Tiger Yang Date: Fri, 14 Nov 2008 11:17:52 +0800 Subject: ocfs2: add mount option and Kconfig option for acl This patch adds the Kconfig option "CONFIG_OCFS2_FS_POSIX_ACL" and mount options "acl" to enable acls in Ocfs2. Signed-off-by: Tiger Yang Signed-off-by: Mark Fasheh --- Documentation/filesystems/ocfs2.txt | 3 ++- fs/Kconfig | 9 +++++++++ fs/ocfs2/super.c | 33 +++++++++++++++++++++++++++++++++ 3 files changed, 44 insertions(+), 1 deletion(-) (limited to 'fs/ocfs2') diff --git a/Documentation/filesystems/ocfs2.txt b/Documentation/filesystems/ocfs2.txt index 67310fbbb7d..c2a0871280a 100644 --- a/Documentation/filesystems/ocfs2.txt +++ b/Documentation/filesystems/ocfs2.txt @@ -31,7 +31,6 @@ Features which OCFS2 does not support yet: - quotas - Directory change notification (F_NOTIFY) - Distributed Caching (F_SETLEASE/F_GETLEASE/break_lease) - - POSIX ACLs Mount options ============= @@ -79,3 +78,5 @@ inode64 Indicates that Ocfs2 is allowed to create inodes at bits of significance. user_xattr (*) Enables Extended User Attributes. nouser_xattr Disables Extended User Attributes. +acl Enables POSIX Access Control Lists support. +noacl (*) Disables POSIX Access Control Lists support. diff --git a/fs/Kconfig b/fs/Kconfig index ff0e8198020..e8a47f74a83 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -268,6 +268,15 @@ config OCFS2_COMPAT_JBD is backwards compatible with JBD. It is safe to say N here. However, if you really want to use the original JBD, say Y here. +config OCFS2_FS_POSIX_ACL + bool "OCFS2 POSIX Access Control Lists" + depends on OCFS2_FS + select FS_POSIX_ACL + default n + help + Posix Access Control Lists (ACLs) support permissions for users and + groups beyond the owner/group/world scheme. + endif # BLOCK source "fs/notify/Kconfig" diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 304b63ac78c..9e7accc68b4 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -158,6 +158,8 @@ enum { Opt_user_xattr, Opt_nouser_xattr, Opt_inode64, + Opt_acl, + Opt_noacl, Opt_err, }; @@ -180,6 +182,8 @@ static const match_table_t tokens = { {Opt_user_xattr, "user_xattr"}, {Opt_nouser_xattr, "nouser_xattr"}, {Opt_inode64, "inode64"}, + {Opt_acl, "acl"}, + {Opt_noacl, "noacl"}, {Opt_err, NULL} }; @@ -466,6 +470,8 @@ unlock_osb: if (!ret) { /* Only save off the new mount options in case of a successful * remount. */ + if (!(osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_XATTR)) + parsed_options.mount_opt &= ~OCFS2_MOUNT_POSIX_ACL; osb->s_mount_opt = parsed_options.mount_opt; osb->s_atime_quantum = parsed_options.atime_quantum; osb->preferred_slot = parsed_options.slot; @@ -651,6 +657,10 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) } brelse(bh); bh = NULL; + + if (!(osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_XATTR)) + parsed_options.mount_opt &= ~OCFS2_MOUNT_POSIX_ACL; + osb->s_mount_opt = parsed_options.mount_opt; osb->s_atime_quantum = parsed_options.atime_quantum; osb->preferred_slot = parsed_options.slot; @@ -664,6 +674,9 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) sb->s_magic = OCFS2_SUPER_MAGIC; + sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | + ((osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0); + /* Hard readonly mode only if: bdev_read_only, MS_RDONLY, * heartbeat=none */ if (bdev_read_only(sb->s_bdev)) { @@ -945,6 +958,19 @@ static int ocfs2_parse_options(struct super_block *sb, case Opt_inode64: mopt->mount_opt |= OCFS2_MOUNT_INODE64; break; +#ifdef CONFIG_OCFS2_FS_POSIX_ACL + case Opt_acl: + mopt->mount_opt |= OCFS2_MOUNT_POSIX_ACL; + break; + case Opt_noacl: + mopt->mount_opt &= ~OCFS2_MOUNT_POSIX_ACL; + break; +#else + case Opt_acl: + case Opt_noacl: + printk(KERN_INFO "ocfs2 (no)acl options not supported\n"); + break; +#endif default: mlog(ML_ERROR, "Unrecognized mount option \"%s\" " @@ -1017,6 +1043,13 @@ static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt) if (opts & OCFS2_MOUNT_INODE64) seq_printf(s, ",inode64"); +#ifdef CONFIG_OCFS2_FS_POSIX_ACL + if (opts & OCFS2_MOUNT_POSIX_ACL) + seq_printf(s, ",acl"); + else + seq_printf(s, ",noacl"); +#endif + return 0; } -- cgit v1.2.3-70-g09d2 From b657c95c11088d77fc1bfc9c84d940f778bf9d12 Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Thu, 13 Nov 2008 14:49:11 -0800 Subject: ocfs2: Wrap inode block reads in a dedicated function. The ocfs2 code currently reads inodes off disk with a simple ocfs2_read_block() call. Each place that does this has a different set of sanity checks it performs. Some check only the signature. A couple validate the block number (the block read vs di->i_blkno). A couple others check for VALID_FL. Only one place validates i_fs_generation. A couple check nothing. Even when an error is found, they don't all do the same thing. We wrap inode reading into ocfs2_read_inode_block(). This will validate all the above fields, going readonly if they are invalid (they never should be). ocfs2_read_inode_block_full() is provided for the places that want to pass read_block flags. Every caller is passing a struct inode with a valid ip_blkno, so we don't need a separate blkno argument either. We will remove the validation checks from the rest of the code in a later commit, as they are no longer necessary. Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/alloc.c | 2 +- fs/ocfs2/aops.c | 11 +--- fs/ocfs2/dir.c | 6 +-- fs/ocfs2/dlmglue.c | 12 ++--- fs/ocfs2/extent_map.c | 2 +- fs/ocfs2/file.c | 21 ++------ fs/ocfs2/inode.c | 136 ++++++++++++++++++++++++++++++++++++-------------- fs/ocfs2/inode.h | 16 +++++- fs/ocfs2/journal.c | 3 +- fs/ocfs2/localalloc.c | 8 +-- fs/ocfs2/namei.c | 14 +----- fs/ocfs2/symlink.c | 2 +- 12 files changed, 136 insertions(+), 97 deletions(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 5592a2f6335..9c598adc947 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -5658,7 +5658,7 @@ static int ocfs2_get_truncate_log_info(struct ocfs2_super *osb, goto bail; } - status = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, &bh); + status = ocfs2_read_inode_block(inode, &bh); if (status < 0) { iput(inode); mlog_errno(status); diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index c22543b3342..e219f8b546a 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -68,20 +68,13 @@ static int ocfs2_symlink_get_block(struct inode *inode, sector_t iblock, goto bail; } - status = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, &bh); + status = ocfs2_read_inode_block(inode, &bh); if (status < 0) { mlog_errno(status); goto bail; } fe = (struct ocfs2_dinode *) bh->b_data; - if (!OCFS2_IS_VALID_DINODE(fe)) { - mlog(ML_ERROR, "Invalid dinode #%llu: signature = %.*s\n", - (unsigned long long)le64_to_cpu(fe->i_blkno), 7, - fe->i_signature); - goto bail; - } - if ((u64)iblock >= ocfs2_clusters_to_blocks(inode->i_sb, le32_to_cpu(fe->i_clusters))) { mlog(ML_ERROR, "block offset is outside the allocated size: " @@ -262,7 +255,7 @@ static int ocfs2_readpage_inline(struct inode *inode, struct page *page) BUG_ON(!PageLocked(page)); BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL)); - ret = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, &di_bh); + ret = ocfs2_read_inode_block(inode, &di_bh); if (ret) { mlog_errno(ret); goto out; diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index 026e6eb8518..5777045f1a6 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c @@ -231,7 +231,7 @@ static struct buffer_head *ocfs2_find_entry_id(const char *name, struct ocfs2_dinode *di; struct ocfs2_inline_data *data; - ret = ocfs2_read_block(dir, OCFS2_I(dir)->ip_blkno, &di_bh); + ret = ocfs2_read_inode_block(dir, &di_bh); if (ret) { mlog_errno(ret); goto out; @@ -458,7 +458,7 @@ static inline int ocfs2_delete_entry_id(handle_t *handle, struct ocfs2_dinode *di; struct ocfs2_inline_data *data; - ret = ocfs2_read_block(dir, OCFS2_I(dir)->ip_blkno, &di_bh); + ret = ocfs2_read_inode_block(dir, &di_bh); if (ret) { mlog_errno(ret); goto out; @@ -636,7 +636,7 @@ static int ocfs2_dir_foreach_blk_id(struct inode *inode, struct ocfs2_inline_data *data; struct ocfs2_dir_entry *de; - ret = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, &di_bh); + ret = ocfs2_read_inode_block(inode, &di_bh); if (ret) { mlog(ML_ERROR, "Unable to read inode block for dir %llu\n", (unsigned long long)OCFS2_I(inode)->ip_blkno); diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 6e6cc0a2e5f..9f2a7f75d1b 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -2024,7 +2024,7 @@ static int ocfs2_inode_lock_update(struct inode *inode, } else { /* Boo, we have to go to disk. */ /* read bh, cast, ocfs2_refresh_inode */ - status = ocfs2_read_block(inode, oi->ip_blkno, bh); + status = ocfs2_read_inode_block(inode, bh); if (status < 0) { mlog_errno(status); goto bail_refresh; @@ -2032,18 +2032,14 @@ static int ocfs2_inode_lock_update(struct inode *inode, fe = (struct ocfs2_dinode *) (*bh)->b_data; /* This is a good chance to make sure we're not - * locking an invalid object. + * locking an invalid object. ocfs2_read_inode_block() + * already checked that the inode block is sane. * * We bug on a stale inode here because we checked * above whether it was wiped from disk. The wiping * node provides a guarantee that we receive that * message and can mark the inode before dropping any * locks associated with it. */ - if (!OCFS2_IS_VALID_DINODE(fe)) { - OCFS2_RO_ON_INVALID_DINODE(inode->i_sb, fe); - status = -EIO; - goto bail_refresh; - } mlog_bug_on_msg(inode->i_generation != le32_to_cpu(fe->i_generation), "Invalid dinode %llu disk generation: %u " @@ -2085,7 +2081,7 @@ static int ocfs2_assign_bh(struct inode *inode, return 0; } - status = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, ret_bh); + status = ocfs2_read_inode_block(inode, ret_bh); if (status < 0) mlog_errno(status); diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c index 2baedac5823..b686b31cf49 100644 --- a/fs/ocfs2/extent_map.c +++ b/fs/ocfs2/extent_map.c @@ -630,7 +630,7 @@ int ocfs2_get_clusters(struct inode *inode, u32 v_cluster, if (ret == 0) goto out; - ret = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, &di_bh); + ret = ocfs2_read_inode_block(inode, &di_bh); if (ret) { mlog_errno(ret); goto out; diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 4636aa6b011..41001d515fa 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -402,12 +402,9 @@ static int ocfs2_truncate_file(struct inode *inode, (unsigned long long)OCFS2_I(inode)->ip_blkno, (unsigned long long)new_i_size); + /* We trust di_bh because it comes from ocfs2_inode_lock(), which + * already validated it */ fe = (struct ocfs2_dinode *) di_bh->b_data; - if (!OCFS2_IS_VALID_DINODE(fe)) { - OCFS2_RO_ON_INVALID_DINODE(inode->i_sb, fe); - status = -EIO; - goto bail; - } mlog_bug_on_msg(le64_to_cpu(fe->i_size) != i_size_read(inode), "Inode %llu, inode i_size = %lld != di " @@ -546,18 +543,12 @@ static int __ocfs2_extend_allocation(struct inode *inode, u32 logical_start, */ BUG_ON(mark_unwritten && !ocfs2_sparse_alloc(osb)); - status = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, &bh); + status = ocfs2_read_inode_block(inode, &bh); if (status < 0) { mlog_errno(status); goto leave; } - fe = (struct ocfs2_dinode *) bh->b_data; - if (!OCFS2_IS_VALID_DINODE(fe)) { - OCFS2_RO_ON_INVALID_DINODE(inode->i_sb, fe); - status = -EIO; - goto leave; - } restart_all: BUG_ON(le32_to_cpu(fe->i_clusters) != OCFS2_I(inode)->ip_clusters); @@ -1135,9 +1126,8 @@ static int ocfs2_write_remove_suid(struct inode *inode) { int ret; struct buffer_head *bh = NULL; - struct ocfs2_inode_info *oi = OCFS2_I(inode); - ret = ocfs2_read_block(inode, oi->ip_blkno, &bh); + ret = ocfs2_read_inode_block(inode, &bh); if (ret < 0) { mlog_errno(ret); goto out; @@ -1163,8 +1153,7 @@ static int ocfs2_allocate_unwritten_extents(struct inode *inode, struct buffer_head *di_bh = NULL; if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { - ret = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, - &di_bh); + ret = ocfs2_read_inode_block(inode, &di_bh); if (ret) { mlog_errno(ret); goto out; diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index 7aa00d51187..9eb701b8646 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c @@ -214,12 +214,11 @@ static int ocfs2_init_locked_inode(struct inode *inode, void *opaque) return 0; } -int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe, - int create_ino) +void ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe, + int create_ino) { struct super_block *sb; struct ocfs2_super *osb; - int status = -EINVAL; int use_plocks = 1; mlog_entry("(0x%p, size:%llu)\n", inode, @@ -232,25 +231,17 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe, ocfs2_mount_local(osb) || !ocfs2_stack_supports_plocks()) use_plocks = 0; - /* this means that read_inode cannot create a superblock inode - * today. change if needed. */ - if (!OCFS2_IS_VALID_DINODE(fe) || - !(fe->i_flags & cpu_to_le32(OCFS2_VALID_FL))) { - mlog(0, "Invalid dinode: i_ino=%lu, i_blkno=%llu, " - "signature = %.*s, flags = 0x%x\n", - inode->i_ino, - (unsigned long long)le64_to_cpu(fe->i_blkno), 7, - fe->i_signature, le32_to_cpu(fe->i_flags)); - goto bail; - } + /* + * These have all been checked by ocfs2_read_inode_block() or set + * by ocfs2_mknod_locked(), so a failure is a code bug. + */ + BUG_ON(!OCFS2_IS_VALID_DINODE(fe)); /* This means that read_inode + cannot create a superblock + inode today. change if + that is needed. */ + BUG_ON(!(fe->i_flags & cpu_to_le32(OCFS2_VALID_FL))); + BUG_ON(le32_to_cpu(fe->i_fs_generation) != osb->fs_generation); - if (le32_to_cpu(fe->i_fs_generation) != osb->fs_generation) { - mlog(ML_ERROR, "file entry generation does not match " - "superblock! osb->fs_generation=%x, " - "fe->i_fs_generation=%x\n", - osb->fs_generation, le32_to_cpu(fe->i_fs_generation)); - goto bail; - } OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters); OCFS2_I(inode)->ip_attr = le32_to_cpu(fe->i_attr); @@ -354,10 +345,7 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe, ocfs2_set_inode_flags(inode); - status = 0; -bail: - mlog_exit(status); - return status; + mlog_exit_void(); } static int ocfs2_read_locked_inode(struct inode *inode, @@ -460,11 +448,14 @@ static int ocfs2_read_locked_inode(struct inode *inode, } } - if (can_lock) - status = ocfs2_read_blocks(inode, args->fi_blkno, 1, &bh, - OCFS2_BH_IGNORE_CACHE); - else + if (can_lock) { + status = ocfs2_read_inode_block_full(inode, &bh, + OCFS2_BH_IGNORE_CACHE); + } else { status = ocfs2_read_blocks_sync(osb, args->fi_blkno, 1, &bh); + if (!status) + status = ocfs2_validate_inode_block(osb->sb, bh); + } if (status < 0) { mlog_errno(status); goto bail; @@ -472,12 +463,6 @@ static int ocfs2_read_locked_inode(struct inode *inode, status = -EINVAL; fe = (struct ocfs2_dinode *) bh->b_data; - if (!OCFS2_IS_VALID_DINODE(fe)) { - mlog(0, "Invalid dinode #%llu: signature = %.*s\n", - (unsigned long long)args->fi_blkno, 7, - fe->i_signature); - goto bail; - } /* * This is a code bug. Right now the caller needs to @@ -491,10 +476,9 @@ static int ocfs2_read_locked_inode(struct inode *inode, if (S_ISCHR(le16_to_cpu(fe->i_mode)) || S_ISBLK(le16_to_cpu(fe->i_mode))) - inode->i_rdev = huge_decode_dev(le64_to_cpu(fe->id1.dev1.i_rdev)); + inode->i_rdev = huge_decode_dev(le64_to_cpu(fe->id1.dev1.i_rdev)); - if (ocfs2_populate_inode(inode, fe, 0) < 0) - goto bail; + ocfs2_populate_inode(inode, fe, 0); BUG_ON(args->fi_blkno != le64_to_cpu(fe->i_blkno)); @@ -1264,3 +1248,79 @@ void ocfs2_refresh_inode(struct inode *inode, spin_unlock(&OCFS2_I(inode)->ip_lock); } + +int ocfs2_validate_inode_block(struct super_block *sb, + struct buffer_head *bh) +{ + int rc = -EINVAL; + struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data; + + BUG_ON(!buffer_uptodate(bh)); + + if (!OCFS2_IS_VALID_DINODE(di)) { + ocfs2_error(sb, "Invalid dinode #%llu: signature = %.*s\n", + (unsigned long long)bh->b_blocknr, 7, + di->i_signature); + goto bail; + } + + if (le64_to_cpu(di->i_blkno) != bh->b_blocknr) { + ocfs2_error(sb, "Invalid dinode #%llu: i_blkno is %llu\n", + (unsigned long long)bh->b_blocknr, + (unsigned long long)le64_to_cpu(di->i_blkno)); + goto bail; + } + + if (!(di->i_flags & cpu_to_le32(OCFS2_VALID_FL))) { + ocfs2_error(sb, + "Invalid dinode #%llu: OCFS2_VALID_FL not set\n", + (unsigned long long)bh->b_blocknr); + goto bail; + } + + if (le32_to_cpu(di->i_fs_generation) != + OCFS2_SB(sb)->fs_generation) { + ocfs2_error(sb, + "Invalid dinode #%llu: fs_generation is %u\n", + (unsigned long long)bh->b_blocknr, + le32_to_cpu(di->i_fs_generation)); + goto bail; + } + + rc = 0; + +bail: + return rc; +} + +int ocfs2_read_inode_block_full(struct inode *inode, struct buffer_head **bh, + int flags) +{ + int rc; + struct buffer_head *tmp = *bh; + + rc = ocfs2_read_blocks(inode, OCFS2_I(inode)->ip_blkno, 1, &tmp, + flags); + if (rc) + goto out; + + if (!(flags & OCFS2_BH_READAHEAD)) { + rc = ocfs2_validate_inode_block(inode->i_sb, tmp); + if (rc) { + brelse(tmp); + goto out; + } + } + + /* If ocfs2_read_blocks() got us a new bh, pass it up. */ + if (!*bh) + *bh = tmp; + +out: + return rc; +} + +int ocfs2_read_inode_block(struct inode *inode, struct buffer_head **bh) +{ + return ocfs2_read_inode_block_full(inode, bh, 0); +} diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h index 2f37af9bcc4..b79c371a9d2 100644 --- a/fs/ocfs2/inode.h +++ b/fs/ocfs2/inode.h @@ -128,8 +128,8 @@ struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 feoff, unsigned flags, int sysfile_type); int ocfs2_inode_init_private(struct inode *inode); int ocfs2_inode_revalidate(struct dentry *dentry); -int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe, - int create_ino); +void ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe, + int create_ino); void ocfs2_read_inode(struct inode *inode); void ocfs2_read_inode2(struct inode *inode, void *opaque); ssize_t ocfs2_rw_direct(int rw, struct file *filp, char *buf, @@ -153,4 +153,16 @@ static inline blkcnt_t ocfs2_inode_sector_count(struct inode *inode) return (blkcnt_t)(OCFS2_I(inode)->ip_clusters << c_to_s_bits); } +/* Validate that a bh contains a valid inode */ +int ocfs2_validate_inode_block(struct super_block *sb, + struct buffer_head *bh); +/* + * Read an inode block into *bh. If *bh is NULL, a bh will be allocated. + * This is a cached read. The inode will be validated with + * ocfs2_validate_inode_block(). + */ +int ocfs2_read_inode_block(struct inode *inode, struct buffer_head **bh); +/* The same, but can be passed OCFS2_BH_* flags */ +int ocfs2_read_inode_block_full(struct inode *inode, struct buffer_head **bh, + int flags); #endif /* OCFS2_INODE_H */ diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index 99fe9d584f3..877aaa05e19 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -1135,8 +1135,7 @@ static int ocfs2_read_journal_inode(struct ocfs2_super *osb, } SET_INODE_JOURNAL(inode); - status = ocfs2_read_blocks(inode, OCFS2_I(inode)->ip_blkno, 1, bh, - OCFS2_BH_IGNORE_CACHE); + status = ocfs2_read_inode_block_full(inode, bh, OCFS2_BH_IGNORE_CACHE); if (status < 0) { mlog_errno(status); goto bail; diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c index 687b28713c3..19cfb1b9ce0 100644 --- a/fs/ocfs2/localalloc.c +++ b/fs/ocfs2/localalloc.c @@ -248,8 +248,8 @@ int ocfs2_load_local_alloc(struct ocfs2_super *osb) goto bail; } - status = ocfs2_read_blocks(inode, OCFS2_I(inode)->ip_blkno, 1, - &alloc_bh, OCFS2_BH_IGNORE_CACHE); + status = ocfs2_read_inode_block_full(inode, &alloc_bh, + OCFS2_BH_IGNORE_CACHE); if (status < 0) { mlog_errno(status); goto bail; @@ -459,8 +459,8 @@ int ocfs2_begin_local_alloc_recovery(struct ocfs2_super *osb, mutex_lock(&inode->i_mutex); - status = ocfs2_read_blocks(inode, OCFS2_I(inode)->ip_blkno, 1, - &alloc_bh, OCFS2_BH_IGNORE_CACHE); + status = ocfs2_read_inode_block_full(inode, &alloc_bh, + OCFS2_BH_IGNORE_CACHE); if (status < 0) { mlog_errno(status); goto bail; diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index 76551451209..0134bafdab9 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -531,15 +531,7 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb, goto leave; } - if (ocfs2_populate_inode(inode, fe, 1) < 0) { - mlog(ML_ERROR, "populate inode failed! bh->b_blocknr=%llu, " - "i_blkno=%llu, i_ino=%lu\n", - (unsigned long long)(*new_fe_bh)->b_blocknr, - (unsigned long long)le64_to_cpu(fe->i_blkno), - inode->i_ino); - BUG(); - } - + ocfs2_populate_inode(inode, fe, 1); ocfs2_inode_set_new(osb, inode); if (!ocfs2_mount_local(osb)) { status = ocfs2_create_new_inode_locks(inode); @@ -1864,9 +1856,7 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb, mlog_entry("(inode->i_ino = %lu)\n", inode->i_ino); - status = ocfs2_read_block(orphan_dir_inode, - OCFS2_I(orphan_dir_inode)->ip_blkno, - &orphan_dir_bh); + status = ocfs2_read_inode_block(orphan_dir_inode, &orphan_dir_bh); if (status < 0) { mlog_errno(status); goto leave; diff --git a/fs/ocfs2/symlink.c b/fs/ocfs2/symlink.c index cbd03dfdc7b..ed0a0cfd68d 100644 --- a/fs/ocfs2/symlink.c +++ b/fs/ocfs2/symlink.c @@ -84,7 +84,7 @@ static char *ocfs2_fast_symlink_getlink(struct inode *inode, mlog_entry_void(); - status = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, bh); + status = ocfs2_read_inode_block(inode, bh); if (status < 0) { mlog_errno(status); link = ERR_PTR(status); -- cgit v1.2.3-70-g09d2 From 10995aa2451afa20b721cc7de856cae1a13dba57 Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Thu, 13 Nov 2008 14:49:12 -0800 Subject: ocfs2: Morph the haphazard OCFS2_IS_VALID_DINODE() checks. Random places in the code would check a dinode bh to see if it was valid. Not only did they do different levels of validation, they handled errors in different ways. The previous commit unified inode block reads, validating all block reads in the same place. Thus, these haphazard checks are no longer necessary. Rather than eliminate them, however, we change them to BUG_ON() checks. This ensures the assumptions remain true. All of the code paths to these checks have been audited to ensure they come from a validated inode read. Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/alloc.c | 50 +++++++++++++++++++++----------------------------- fs/ocfs2/journal.c | 17 +++++------------ fs/ocfs2/ocfs2.h | 8 -------- fs/ocfs2/resize.c | 10 ++++------ fs/ocfs2/suballoc.c | 36 ++++++++++++++++-------------------- 5 files changed, 46 insertions(+), 75 deletions(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 9c598adc947..320545b9fe1 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -187,20 +187,12 @@ static int ocfs2_dinode_insert_check(struct inode *inode, static int ocfs2_dinode_sanity_check(struct inode *inode, struct ocfs2_extent_tree *et) { - int ret = 0; - struct ocfs2_dinode *di; + struct ocfs2_dinode *di = et->et_object; BUG_ON(et->et_ops != &ocfs2_dinode_et_ops); + BUG_ON(!OCFS2_IS_VALID_DINODE(di)); - di = et->et_object; - if (!OCFS2_IS_VALID_DINODE(di)) { - ret = -EIO; - ocfs2_error(inode->i_sb, - "Inode %llu has invalid path root", - (unsigned long long)OCFS2_I(inode)->ip_blkno); - } - - return ret; + return 0; } static void ocfs2_dinode_fill_root_el(struct ocfs2_extent_tree *et) @@ -5380,13 +5372,13 @@ int ocfs2_truncate_log_append(struct ocfs2_super *osb, start_cluster = ocfs2_blocks_to_clusters(osb->sb, start_blk); di = (struct ocfs2_dinode *) tl_bh->b_data; - tl = &di->id2.i_dealloc; - if (!OCFS2_IS_VALID_DINODE(di)) { - OCFS2_RO_ON_INVALID_DINODE(osb->sb, di); - status = -EIO; - goto bail; - } + /* tl_bh is loaded from ocfs2_truncate_log_init(). It's validated + * by the underlying call to ocfs2_read_inode_block(), so any + * corruption is a code bug */ + BUG_ON(!OCFS2_IS_VALID_DINODE(di)); + + tl = &di->id2.i_dealloc; tl_count = le16_to_cpu(tl->tl_count); mlog_bug_on_msg(tl_count > ocfs2_truncate_recs_per_inode(osb->sb) || tl_count == 0, @@ -5536,13 +5528,13 @@ int __ocfs2_flush_truncate_log(struct ocfs2_super *osb) BUG_ON(mutex_trylock(&tl_inode->i_mutex)); di = (struct ocfs2_dinode *) tl_bh->b_data; - tl = &di->id2.i_dealloc; - if (!OCFS2_IS_VALID_DINODE(di)) { - OCFS2_RO_ON_INVALID_DINODE(osb->sb, di); - status = -EIO; - goto out; - } + /* tl_bh is loaded from ocfs2_truncate_log_init(). It's validated + * by the underlying call to ocfs2_read_inode_block(), so any + * corruption is a code bug */ + BUG_ON(!OCFS2_IS_VALID_DINODE(di)); + + tl = &di->id2.i_dealloc; num_to_flush = le16_to_cpu(tl->tl_used); mlog(0, "Flush %u records from truncate log #%llu\n", num_to_flush, (unsigned long long)OCFS2_I(tl_inode)->ip_blkno); @@ -5697,13 +5689,13 @@ int ocfs2_begin_truncate_log_recovery(struct ocfs2_super *osb, } di = (struct ocfs2_dinode *) tl_bh->b_data; - tl = &di->id2.i_dealloc; - if (!OCFS2_IS_VALID_DINODE(di)) { - OCFS2_RO_ON_INVALID_DINODE(tl_inode->i_sb, di); - status = -EIO; - goto bail; - } + /* tl_bh is loaded from ocfs2_get_truncate_log_info(). It's + * validated by the underlying call to ocfs2_read_inode_block(), + * so any corruption is a code bug */ + BUG_ON(!OCFS2_IS_VALID_DINODE(di)); + + tl = &di->id2.i_dealloc; if (le16_to_cpu(tl->tl_used)) { mlog(0, "We'll have %u logs to recover\n", le16_to_cpu(tl->tl_used)); diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index 877aaa05e19..9223bfcca3b 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -587,17 +587,11 @@ static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb, mlog_entry_void(); fe = (struct ocfs2_dinode *)bh->b_data; - if (!OCFS2_IS_VALID_DINODE(fe)) { - /* This is called from startup/shutdown which will - * handle the errors in a specific manner, so no need - * to call ocfs2_error() here. */ - mlog(ML_ERROR, "Journal dinode %llu has invalid " - "signature: %.*s", - (unsigned long long)le64_to_cpu(fe->i_blkno), 7, - fe->i_signature); - status = -EIO; - goto out; - } + + /* The journal bh on the osb always comes from ocfs2_journal_init() + * and was validated there inside ocfs2_inode_lock_full(). It's a + * code bug if we mess it up. */ + BUG_ON(!OCFS2_IS_VALID_DINODE(fe)); flags = le32_to_cpu(fe->id1.journal1.ij_flags); if (dirty) @@ -613,7 +607,6 @@ static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb, if (status < 0) mlog_errno(status); -out: mlog_exit(status); return status; } diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index 25d07ff1d3c..467bdb6f71e 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h @@ -444,14 +444,6 @@ static inline int ocfs2_uses_extended_slot_map(struct ocfs2_super *osb) #define OCFS2_IS_VALID_DINODE(ptr) \ (!strcmp((ptr)->i_signature, OCFS2_INODE_SIGNATURE)) -#define OCFS2_RO_ON_INVALID_DINODE(__sb, __di) do { \ - typeof(__di) ____di = (__di); \ - ocfs2_error((__sb), \ - "Dinode # %llu has bad signature %.*s", \ - (unsigned long long)le64_to_cpu((____di)->i_blkno), 7, \ - (____di)->i_signature); \ -} while (0) - #define OCFS2_IS_VALID_EXTENT_BLOCK(ptr) \ (!strcmp((ptr)->h_signature, OCFS2_EXTENT_BLOCK_SIGNATURE)) diff --git a/fs/ocfs2/resize.c b/fs/ocfs2/resize.c index ffd48db229a..739d452f617 100644 --- a/fs/ocfs2/resize.c +++ b/fs/ocfs2/resize.c @@ -314,6 +314,10 @@ int ocfs2_group_extend(struct inode * inode, int new_clusters) fe = (struct ocfs2_dinode *)main_bm_bh->b_data; + /* main_bm_bh is validated by inode read inside ocfs2_inode_lock(), + * so any corruption is a code bug. */ + BUG_ON(!OCFS2_IS_VALID_DINODE(fe)); + if (le16_to_cpu(fe->id2.i_chain.cl_cpg) != ocfs2_group_bitmap_size(osb->sb) * 8) { mlog(ML_ERROR, "The disk is too old and small. " @@ -322,12 +326,6 @@ int ocfs2_group_extend(struct inode * inode, int new_clusters) goto out_unlock; } - if (!OCFS2_IS_VALID_DINODE(fe)) { - OCFS2_RO_ON_INVALID_DINODE(main_bm_inode->i_sb, fe); - ret = -EIO; - goto out_unlock; - } - first_new_cluster = le32_to_cpu(fe->i_clusters); lgd_blkno = ocfs2_which_cluster_group(main_bm_inode, first_new_cluster - 1); diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index c5ff18b46b5..95d432b694e 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c @@ -441,11 +441,11 @@ static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb, ac->ac_alloc_slot = slot; fe = (struct ocfs2_dinode *) bh->b_data; - if (!OCFS2_IS_VALID_DINODE(fe)) { - OCFS2_RO_ON_INVALID_DINODE(alloc_inode->i_sb, fe); - status = -EIO; - goto bail; - } + + /* The bh was validated by the inode read inside + * ocfs2_inode_lock(). Any corruption is a code bug. */ + BUG_ON(!OCFS2_IS_VALID_DINODE(fe)); + if (!(fe->i_flags & cpu_to_le32(OCFS2_CHAIN_FL))) { ocfs2_error(alloc_inode->i_sb, "Invalid chain allocator %llu", (unsigned long long)le64_to_cpu(fe->i_blkno)); @@ -931,11 +931,6 @@ static int ocfs2_relink_block_group(handle_t *handle, struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data; struct ocfs2_group_desc *prev_bg = (struct ocfs2_group_desc *) prev_bg_bh->b_data; - if (!OCFS2_IS_VALID_DINODE(fe)) { - OCFS2_RO_ON_INVALID_DINODE(alloc_inode->i_sb, fe); - status = -EIO; - goto out; - } if (!OCFS2_IS_VALID_GROUP_DESC(bg)) { OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, bg); status = -EIO; @@ -1392,11 +1387,11 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb, BUG_ON(!ac->ac_bh); fe = (struct ocfs2_dinode *) ac->ac_bh->b_data; - if (!OCFS2_IS_VALID_DINODE(fe)) { - OCFS2_RO_ON_INVALID_DINODE(osb->sb, fe); - status = -EIO; - goto bail; - } + + /* The bh was validated by the inode read during + * ocfs2_reserve_suballoc_bits(). Any corruption is a code bug. */ + BUG_ON(!OCFS2_IS_VALID_DINODE(fe)); + if (le32_to_cpu(fe->id1.bitmap1.i_used) >= le32_to_cpu(fe->id1.bitmap1.i_total)) { ocfs2_error(osb->sb, "Chain allocator dinode %llu has %u used " @@ -1782,11 +1777,12 @@ int ocfs2_free_suballoc_bits(handle_t *handle, mlog_entry_void(); - if (!OCFS2_IS_VALID_DINODE(fe)) { - OCFS2_RO_ON_INVALID_DINODE(alloc_inode->i_sb, fe); - status = -EIO; - goto bail; - } + /* The alloc_bh comes from ocfs2_free_dinode() or + * ocfs2_free_clusters(). The callers have all locked the + * allocator and gotten alloc_bh from the lock call. This + * validates the dinode buffer. Any corruption that has happended + * is a code bug. */ + BUG_ON(!OCFS2_IS_VALID_DINODE(fe)); BUG_ON((count + start_bit) > ocfs2_bits_per_group(cl)); mlog(0, "%llu: freeing %u bits from group %llu, starting at %u\n", -- cgit v1.2.3-70-g09d2 From 57e3e7971136003c96766346049aa73b82cab079 Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Thu, 13 Nov 2008 14:49:13 -0800 Subject: ocfs2: Consolidate validation of group descriptors. Currently the validation of group descriptors is directly duplicated so that one version can error the filesystem and the other (resize) can just report the problem. Consolidate to one function that takes a boolean. Wrap that function with the old call for the old users. This is in preparation for lifting the read+validate step into a single function. Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/resize.c | 40 ++++++----------------------- fs/ocfs2/suballoc.c | 74 +++++++++++++++++++++++++++++++---------------------- fs/ocfs2/suballoc.h | 20 ++++++++++++--- 3 files changed, 68 insertions(+), 66 deletions(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/resize.c b/fs/ocfs2/resize.c index 739d452f617..a2de32a317a 100644 --- a/fs/ocfs2/resize.c +++ b/fs/ocfs2/resize.c @@ -396,41 +396,16 @@ static int ocfs2_check_new_group(struct inode *inode, struct buffer_head *group_bh) { int ret; - struct ocfs2_group_desc *gd; + struct ocfs2_group_desc *gd = + (struct ocfs2_group_desc *)group_bh->b_data; u16 cl_bpc = le16_to_cpu(di->id2.i_chain.cl_bpc); - unsigned int max_bits = le16_to_cpu(di->id2.i_chain.cl_cpg) * - le16_to_cpu(di->id2.i_chain.cl_bpc); + ret = ocfs2_validate_group_descriptor(inode->i_sb, di, gd, 1); + if (ret) + goto out; - gd = (struct ocfs2_group_desc *)group_bh->b_data; - - ret = -EIO; - if (!OCFS2_IS_VALID_GROUP_DESC(gd)) - mlog(ML_ERROR, "Group descriptor # %llu isn't valid.\n", - (unsigned long long)le64_to_cpu(gd->bg_blkno)); - else if (di->i_blkno != gd->bg_parent_dinode) - mlog(ML_ERROR, "Group descriptor # %llu has bad parent " - "pointer (%llu, expected %llu)\n", - (unsigned long long)le64_to_cpu(gd->bg_blkno), - (unsigned long long)le64_to_cpu(gd->bg_parent_dinode), - (unsigned long long)le64_to_cpu(di->i_blkno)); - else if (le16_to_cpu(gd->bg_bits) > max_bits) - mlog(ML_ERROR, "Group descriptor # %llu has bit count of %u\n", - (unsigned long long)le64_to_cpu(gd->bg_blkno), - le16_to_cpu(gd->bg_bits)); - else if (le16_to_cpu(gd->bg_free_bits_count) > le16_to_cpu(gd->bg_bits)) - mlog(ML_ERROR, "Group descriptor # %llu has bit count %u but " - "claims that %u are free\n", - (unsigned long long)le64_to_cpu(gd->bg_blkno), - le16_to_cpu(gd->bg_bits), - le16_to_cpu(gd->bg_free_bits_count)); - else if (le16_to_cpu(gd->bg_bits) > (8 * le16_to_cpu(gd->bg_size))) - mlog(ML_ERROR, "Group descriptor # %llu has bit count %u but " - "max bitmap bits of %u\n", - (unsigned long long)le64_to_cpu(gd->bg_blkno), - le16_to_cpu(gd->bg_bits), - 8 * le16_to_cpu(gd->bg_size)); - else if (le16_to_cpu(gd->bg_chain) != input->chain) + ret = -EINVAL; + if (le16_to_cpu(gd->bg_chain) != input->chain) mlog(ML_ERROR, "Group descriptor # %llu has bad chain %u " "while input has %u set.\n", (unsigned long long)le64_to_cpu(gd->bg_blkno), @@ -449,6 +424,7 @@ static int ocfs2_check_new_group(struct inode *inode, else ret = 0; +out: return ret; } diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index 95d432b694e..ddba97dc06a 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c @@ -146,59 +146,71 @@ static u32 ocfs2_bits_per_group(struct ocfs2_chain_list *cl) } /* somewhat more expensive than our other checks, so use sparingly. */ -int ocfs2_check_group_descriptor(struct super_block *sb, - struct ocfs2_dinode *di, - struct ocfs2_group_desc *gd) +int ocfs2_validate_group_descriptor(struct super_block *sb, + struct ocfs2_dinode *di, + struct ocfs2_group_desc *gd, + int clean_error) { unsigned int max_bits; +#define do_error(fmt, ...) \ + do{ \ + if (clean_error) \ + mlog(ML_ERROR, fmt "\n", ##__VA_ARGS__); \ + else \ + ocfs2_error(sb, fmt, ##__VA_ARGS__); \ + } while (0) + if (!OCFS2_IS_VALID_GROUP_DESC(gd)) { - OCFS2_RO_ON_INVALID_GROUP_DESC(sb, gd); - return -EIO; + do_error("Group Descriptor #%llu has bad signature %.*s", + (unsigned long long)le64_to_cpu(gd->bg_blkno), 7, + gd->bg_signature); + return -EINVAL; } if (di->i_blkno != gd->bg_parent_dinode) { - ocfs2_error(sb, "Group descriptor # %llu has bad parent " - "pointer (%llu, expected %llu)", - (unsigned long long)le64_to_cpu(gd->bg_blkno), - (unsigned long long)le64_to_cpu(gd->bg_parent_dinode), - (unsigned long long)le64_to_cpu(di->i_blkno)); - return -EIO; + do_error("Group descriptor # %llu has bad parent " + "pointer (%llu, expected %llu)", + (unsigned long long)le64_to_cpu(gd->bg_blkno), + (unsigned long long)le64_to_cpu(gd->bg_parent_dinode), + (unsigned long long)le64_to_cpu(di->i_blkno)); + return -EINVAL; } max_bits = le16_to_cpu(di->id2.i_chain.cl_cpg) * le16_to_cpu(di->id2.i_chain.cl_bpc); if (le16_to_cpu(gd->bg_bits) > max_bits) { - ocfs2_error(sb, "Group descriptor # %llu has bit count of %u", - (unsigned long long)le64_to_cpu(gd->bg_blkno), - le16_to_cpu(gd->bg_bits)); - return -EIO; + do_error("Group descriptor # %llu has bit count of %u", + (unsigned long long)le64_to_cpu(gd->bg_blkno), + le16_to_cpu(gd->bg_bits)); + return -EINVAL; } if (le16_to_cpu(gd->bg_chain) >= le16_to_cpu(di->id2.i_chain.cl_next_free_rec)) { - ocfs2_error(sb, "Group descriptor # %llu has bad chain %u", - (unsigned long long)le64_to_cpu(gd->bg_blkno), - le16_to_cpu(gd->bg_chain)); - return -EIO; + do_error("Group descriptor # %llu has bad chain %u", + (unsigned long long)le64_to_cpu(gd->bg_blkno), + le16_to_cpu(gd->bg_chain)); + return -EINVAL; } if (le16_to_cpu(gd->bg_free_bits_count) > le16_to_cpu(gd->bg_bits)) { - ocfs2_error(sb, "Group descriptor # %llu has bit count %u but " - "claims that %u are free", - (unsigned long long)le64_to_cpu(gd->bg_blkno), - le16_to_cpu(gd->bg_bits), - le16_to_cpu(gd->bg_free_bits_count)); - return -EIO; + do_error("Group descriptor # %llu has bit count %u but " + "claims that %u are free", + (unsigned long long)le64_to_cpu(gd->bg_blkno), + le16_to_cpu(gd->bg_bits), + le16_to_cpu(gd->bg_free_bits_count)); + return -EINVAL; } if (le16_to_cpu(gd->bg_bits) > (8 * le16_to_cpu(gd->bg_size))) { - ocfs2_error(sb, "Group descriptor # %llu has bit count %u but " - "max bitmap bits of %u", - (unsigned long long)le64_to_cpu(gd->bg_blkno), - le16_to_cpu(gd->bg_bits), - 8 * le16_to_cpu(gd->bg_size)); - return -EIO; + do_error("Group descriptor # %llu has bit count %u but " + "max bitmap bits of %u", + (unsigned long long)le64_to_cpu(gd->bg_blkno), + le16_to_cpu(gd->bg_bits), + 8 * le16_to_cpu(gd->bg_size)); + return -EINVAL; } +#undef do_error return 0; } diff --git a/fs/ocfs2/suballoc.h b/fs/ocfs2/suballoc.h index 4df159d8f45..7adfcc478bd 100644 --- a/fs/ocfs2/suballoc.h +++ b/fs/ocfs2/suballoc.h @@ -165,9 +165,23 @@ void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac); u64 ocfs2_which_cluster_group(struct inode *inode, u32 cluster); /* somewhat more expensive than our other checks, so use sparingly. */ -int ocfs2_check_group_descriptor(struct super_block *sb, - struct ocfs2_dinode *di, - struct ocfs2_group_desc *gd); +/* + * By default, ocfs2_validate_group_descriptor() calls ocfs2_error() when it + * finds a problem. A caller that wants to check a group descriptor + * without going readonly passes a nonzero clean_error. This is only + * resize, really. + */ +int ocfs2_validate_group_descriptor(struct super_block *sb, + struct ocfs2_dinode *di, + struct ocfs2_group_desc *gd, + int clean_error); +static inline int ocfs2_check_group_descriptor(struct super_block *sb, + struct ocfs2_dinode *di, + struct ocfs2_group_desc *gd) +{ + return ocfs2_validate_group_descriptor(sb, di, gd, 0); +} + int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_extent_tree *et, u32 clusters_to_add, u32 extents_to_split, struct ocfs2_alloc_context **data_ac, -- cgit v1.2.3-70-g09d2 From 68f64d471be38631d7196b938d9809802dd467fa Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Thu, 13 Nov 2008 14:49:14 -0800 Subject: ocfs2: Wrap group descriptor reads in a dedicated function. We have a clean call for validating group descriptors, but every place that wants the always does a read_block()+validate() call pair. Create a toplevel ocfs2_read_group_descriptor() that does the right thing. This allows us to leverage the single call point later for fancier handling. We also add validation of gd->bg_generation against the superblock and gd->bg_blkno against the block we thought we read. Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/resize.c | 12 ++---- fs/ocfs2/suballoc.c | 108 +++++++++++++++++++++++++++++++--------------------- fs/ocfs2/suballoc.h | 19 ++++----- 3 files changed, 78 insertions(+), 61 deletions(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/resize.c b/fs/ocfs2/resize.c index a2de32a317a..252baff5eb8 100644 --- a/fs/ocfs2/resize.c +++ b/fs/ocfs2/resize.c @@ -330,20 +330,14 @@ int ocfs2_group_extend(struct inode * inode, int new_clusters) lgd_blkno = ocfs2_which_cluster_group(main_bm_inode, first_new_cluster - 1); - ret = ocfs2_read_block(main_bm_inode, lgd_blkno, &group_bh); + ret = ocfs2_read_group_descriptor(main_bm_inode, fe, lgd_blkno, + &group_bh); if (ret < 0) { mlog_errno(ret); goto out_unlock; } - group = (struct ocfs2_group_desc *)group_bh->b_data; - ret = ocfs2_check_group_descriptor(inode->i_sb, fe, group); - if (ret) { - mlog_errno(ret); - goto out_unlock; - } - cl_bpc = le16_to_cpu(fe->id2.i_chain.cl_bpc); if (le16_to_cpu(group->bg_bits) / cl_bpc + new_clusters > le16_to_cpu(fe->id2.i_chain.cl_cpg)) { @@ -400,7 +394,7 @@ static int ocfs2_check_new_group(struct inode *inode, (struct ocfs2_group_desc *)group_bh->b_data; u16 cl_bpc = le16_to_cpu(di->id2.i_chain.cl_bpc); - ret = ocfs2_validate_group_descriptor(inode->i_sb, di, gd, 1); + ret = ocfs2_validate_group_descriptor(inode->i_sb, di, group_bh, 1); if (ret) goto out; diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index ddba97dc06a..797f509d725 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c @@ -145,13 +145,13 @@ static u32 ocfs2_bits_per_group(struct ocfs2_chain_list *cl) return (u32)le16_to_cpu(cl->cl_cpg) * (u32)le16_to_cpu(cl->cl_bpc); } -/* somewhat more expensive than our other checks, so use sparingly. */ int ocfs2_validate_group_descriptor(struct super_block *sb, struct ocfs2_dinode *di, - struct ocfs2_group_desc *gd, + struct buffer_head *bh, int clean_error) { unsigned int max_bits; + struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *)bh->b_data; #define do_error(fmt, ...) \ do{ \ @@ -162,16 +162,32 @@ int ocfs2_validate_group_descriptor(struct super_block *sb, } while (0) if (!OCFS2_IS_VALID_GROUP_DESC(gd)) { - do_error("Group Descriptor #%llu has bad signature %.*s", - (unsigned long long)le64_to_cpu(gd->bg_blkno), 7, + do_error("Group descriptor #%llu has bad signature %.*s", + (unsigned long long)bh->b_blocknr, 7, gd->bg_signature); return -EINVAL; } + if (le64_to_cpu(gd->bg_blkno) != bh->b_blocknr) { + do_error("Group descriptor #%llu has an invalid bg_blkno " + "of %llu", + (unsigned long long)bh->b_blocknr, + (unsigned long long)le64_to_cpu(gd->bg_blkno)); + return -EINVAL; + } + + if (le32_to_cpu(gd->bg_generation) != OCFS2_SB(sb)->fs_generation) { + do_error("Group descriptor #%llu has an invalid " + "fs_generation of #%u", + (unsigned long long)bh->b_blocknr, + le32_to_cpu(gd->bg_generation)); + return -EINVAL; + } + if (di->i_blkno != gd->bg_parent_dinode) { - do_error("Group descriptor # %llu has bad parent " + do_error("Group descriptor #%llu has bad parent " "pointer (%llu, expected %llu)", - (unsigned long long)le64_to_cpu(gd->bg_blkno), + (unsigned long long)bh->b_blocknr, (unsigned long long)le64_to_cpu(gd->bg_parent_dinode), (unsigned long long)le64_to_cpu(di->i_blkno)); return -EINVAL; @@ -179,33 +195,33 @@ int ocfs2_validate_group_descriptor(struct super_block *sb, max_bits = le16_to_cpu(di->id2.i_chain.cl_cpg) * le16_to_cpu(di->id2.i_chain.cl_bpc); if (le16_to_cpu(gd->bg_bits) > max_bits) { - do_error("Group descriptor # %llu has bit count of %u", - (unsigned long long)le64_to_cpu(gd->bg_blkno), + do_error("Group descriptor #%llu has bit count of %u", + (unsigned long long)bh->b_blocknr, le16_to_cpu(gd->bg_bits)); return -EINVAL; } if (le16_to_cpu(gd->bg_chain) >= le16_to_cpu(di->id2.i_chain.cl_next_free_rec)) { - do_error("Group descriptor # %llu has bad chain %u", - (unsigned long long)le64_to_cpu(gd->bg_blkno), + do_error("Group descriptor #%llu has bad chain %u", + (unsigned long long)bh->b_blocknr, le16_to_cpu(gd->bg_chain)); return -EINVAL; } if (le16_to_cpu(gd->bg_free_bits_count) > le16_to_cpu(gd->bg_bits)) { - do_error("Group descriptor # %llu has bit count %u but " + do_error("Group descriptor #%llu has bit count %u but " "claims that %u are free", - (unsigned long long)le64_to_cpu(gd->bg_blkno), + (unsigned long long)bh->b_blocknr, le16_to_cpu(gd->bg_bits), le16_to_cpu(gd->bg_free_bits_count)); return -EINVAL; } if (le16_to_cpu(gd->bg_bits) > (8 * le16_to_cpu(gd->bg_size))) { - do_error("Group descriptor # %llu has bit count %u but " + do_error("Group descriptor #%llu has bit count %u but " "max bitmap bits of %u", - (unsigned long long)le64_to_cpu(gd->bg_blkno), + (unsigned long long)bh->b_blocknr, le16_to_cpu(gd->bg_bits), 8 * le16_to_cpu(gd->bg_size)); return -EINVAL; @@ -215,6 +231,30 @@ int ocfs2_validate_group_descriptor(struct super_block *sb, return 0; } +int ocfs2_read_group_descriptor(struct inode *inode, struct ocfs2_dinode *di, + u64 gd_blkno, struct buffer_head **bh) +{ + int rc; + struct buffer_head *tmp = *bh; + + rc = ocfs2_read_block(inode, gd_blkno, &tmp); + if (rc) + goto out; + + rc = ocfs2_validate_group_descriptor(inode->i_sb, di, tmp, 0); + if (rc) { + brelse(tmp); + goto out; + } + + /* If ocfs2_read_block() got us a new bh, pass it up. */ + if (!*bh) + *bh = tmp; + +out: + return rc; +} + static int ocfs2_block_group_fill(handle_t *handle, struct inode *alloc_inode, struct buffer_head *bg_bh, @@ -1177,21 +1217,17 @@ static int ocfs2_search_one_group(struct ocfs2_alloc_context *ac, u16 found; struct buffer_head *group_bh = NULL; struct ocfs2_group_desc *gd; + struct ocfs2_dinode *di = (struct ocfs2_dinode *)ac->ac_bh->b_data; struct inode *alloc_inode = ac->ac_inode; - ret = ocfs2_read_block(alloc_inode, gd_blkno, &group_bh); + ret = ocfs2_read_group_descriptor(alloc_inode, di, gd_blkno, + &group_bh); if (ret < 0) { mlog_errno(ret); return ret; } gd = (struct ocfs2_group_desc *) group_bh->b_data; - if (!OCFS2_IS_VALID_GROUP_DESC(gd)) { - OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, gd); - ret = -EIO; - goto out; - } - ret = ac->ac_group_search(alloc_inode, group_bh, bits_wanted, min_bits, ac->ac_max_block, bit_off, &found); if (ret < 0) { @@ -1248,19 +1284,14 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, bits_wanted, chain, (unsigned long long)OCFS2_I(alloc_inode)->ip_blkno); - status = ocfs2_read_block(alloc_inode, - le64_to_cpu(cl->cl_recs[chain].c_blkno), - &group_bh); + status = ocfs2_read_group_descriptor(alloc_inode, fe, + le64_to_cpu(cl->cl_recs[chain].c_blkno), + &group_bh); if (status < 0) { mlog_errno(status); goto bail; } bg = (struct ocfs2_group_desc *) group_bh->b_data; - status = ocfs2_check_group_descriptor(alloc_inode->i_sb, fe, bg); - if (status) { - mlog_errno(status); - goto bail; - } status = -ENOSPC; /* for now, the chain search is a bit simplistic. We just use @@ -1278,18 +1309,13 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, next_group = le64_to_cpu(bg->bg_next_group); prev_group_bh = group_bh; group_bh = NULL; - status = ocfs2_read_block(alloc_inode, - next_group, &group_bh); + status = ocfs2_read_group_descriptor(alloc_inode, fe, + next_group, &group_bh); if (status < 0) { mlog_errno(status); goto bail; } bg = (struct ocfs2_group_desc *) group_bh->b_data; - status = ocfs2_check_group_descriptor(alloc_inode->i_sb, fe, bg); - if (status) { - mlog_errno(status); - goto bail; - } } if (status < 0) { if (status != -ENOSPC) @@ -1801,18 +1827,14 @@ int ocfs2_free_suballoc_bits(handle_t *handle, (unsigned long long)OCFS2_I(alloc_inode)->ip_blkno, count, (unsigned long long)bg_blkno, start_bit); - status = ocfs2_read_block(alloc_inode, bg_blkno, &group_bh); + status = ocfs2_read_group_descriptor(alloc_inode, fe, bg_blkno, + &group_bh); if (status < 0) { mlog_errno(status); goto bail; } - group = (struct ocfs2_group_desc *) group_bh->b_data; - status = ocfs2_check_group_descriptor(alloc_inode->i_sb, fe, group); - if (status) { - mlog_errno(status); - goto bail; - } + BUG_ON((count + start_bit) > le16_to_cpu(group->bg_bits)); status = ocfs2_block_group_clear_bits(handle, alloc_inode, diff --git a/fs/ocfs2/suballoc.h b/fs/ocfs2/suballoc.h index 7adfcc478bd..43de4fd826d 100644 --- a/fs/ocfs2/suballoc.h +++ b/fs/ocfs2/suballoc.h @@ -164,23 +164,24 @@ void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac); * and return that block offset. */ u64 ocfs2_which_cluster_group(struct inode *inode, u32 cluster); -/* somewhat more expensive than our other checks, so use sparingly. */ /* * By default, ocfs2_validate_group_descriptor() calls ocfs2_error() when it * finds a problem. A caller that wants to check a group descriptor * without going readonly passes a nonzero clean_error. This is only - * resize, really. + * resize, really. Everyone else should be using + * ocfs2_read_group_descriptor(). */ int ocfs2_validate_group_descriptor(struct super_block *sb, struct ocfs2_dinode *di, - struct ocfs2_group_desc *gd, + struct buffer_head *bh, int clean_error); -static inline int ocfs2_check_group_descriptor(struct super_block *sb, - struct ocfs2_dinode *di, - struct ocfs2_group_desc *gd) -{ - return ocfs2_validate_group_descriptor(sb, di, gd, 0); -} +/* + * Read a group descriptor block into *bh. If *bh is NULL, a bh will be + * allocated. This is a cached read. The descriptor will be validated with + * ocfs2_validate_group_descriptor(). + */ +int ocfs2_read_group_descriptor(struct inode *inode, struct ocfs2_dinode *di, + u64 gd_blkno, struct buffer_head **bh); int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_extent_tree *et, u32 clusters_to_add, u32 extents_to_split, -- cgit v1.2.3-70-g09d2 From 4203530613280281868b3ca36c817530bca3825c Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Thu, 13 Nov 2008 14:49:15 -0800 Subject: ocfs2: Morph the haphazard OCFS2_IS_VALID_GROUP_DESC() checks. Random places in the code would check a group descriptor bh to see if it was valid. The previous commit unified descriptor block reads, validating all block reads in the same place. Thus, these checks are no longer necessary. Rather than eliminate them, however, we change them to BUG_ON() checks. This ensures the assumptions remain true. All of the code paths to these checks have been audited to ensure they come from a validated descriptor read. Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/ocfs2.h | 7 ------- fs/ocfs2/suballoc.c | 39 ++++++++++++++------------------------- 2 files changed, 14 insertions(+), 32 deletions(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index 467bdb6f71e..82ba887afa0 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h @@ -458,13 +458,6 @@ static inline int ocfs2_uses_extended_slot_map(struct ocfs2_super *osb) #define OCFS2_IS_VALID_GROUP_DESC(ptr) \ (!strcmp((ptr)->bg_signature, OCFS2_GROUP_DESC_SIGNATURE)) -#define OCFS2_RO_ON_INVALID_GROUP_DESC(__sb, __gd) do { \ - typeof(__gd) ____gd = (__gd); \ - ocfs2_error((__sb), \ - "Group Descriptor # %llu has bad signature %.*s", \ - (unsigned long long)le64_to_cpu((____gd)->bg_blkno), 7, \ - (____gd)->bg_signature); \ -} while (0) #define OCFS2_IS_VALID_XATTR_BLOCK(ptr) \ (!strcmp((ptr)->xb_signature, OCFS2_XATTR_BLOCK_SIGNATURE)) diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index 797f509d725..766a00b2644 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c @@ -842,10 +842,9 @@ static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb, int offset, start, found, status = 0; struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data; - if (!OCFS2_IS_VALID_GROUP_DESC(bg)) { - OCFS2_RO_ON_INVALID_GROUP_DESC(osb->sb, bg); - return -EIO; - } + /* Callers got this descriptor from + * ocfs2_read_group_descriptor(). Any corruption is a code bug. */ + BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(bg)); found = start = best_offset = best_size = 0; bitmap = bg->bg_bitmap; @@ -910,11 +909,9 @@ static inline int ocfs2_block_group_set_bits(handle_t *handle, mlog_entry_void(); - if (!OCFS2_IS_VALID_GROUP_DESC(bg)) { - OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, bg); - status = -EIO; - goto bail; - } + /* All callers get the descriptor via + * ocfs2_read_group_descriptor(). Any corruption is a code bug. */ + BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(bg)); BUG_ON(le16_to_cpu(bg->bg_free_bits_count) < num_bits); mlog(0, "block_group_set_bits: off = %u, num = %u\n", bit_off, @@ -983,16 +980,10 @@ static int ocfs2_relink_block_group(handle_t *handle, struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data; struct ocfs2_group_desc *prev_bg = (struct ocfs2_group_desc *) prev_bg_bh->b_data; - if (!OCFS2_IS_VALID_GROUP_DESC(bg)) { - OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, bg); - status = -EIO; - goto out; - } - if (!OCFS2_IS_VALID_GROUP_DESC(prev_bg)) { - OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, prev_bg); - status = -EIO; - goto out; - } + /* The caller got these descriptors from + * ocfs2_read_group_descriptor(). Any corruption is a code bug. */ + BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(bg)); + BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(prev_bg)); mlog(0, "Suballoc %llu, chain %u, move group %llu to top, prev = %llu\n", (unsigned long long)le64_to_cpu(fe->i_blkno), chain, @@ -1055,7 +1046,7 @@ out_rollback: bg->bg_next_group = cpu_to_le64(bg_ptr); prev_bg->bg_next_group = cpu_to_le64(prev_bg_ptr); } -out: + mlog_exit(status); return status; } @@ -1758,11 +1749,9 @@ static inline int ocfs2_block_group_clear_bits(handle_t *handle, mlog_entry_void(); - if (!OCFS2_IS_VALID_GROUP_DESC(bg)) { - OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, bg); - status = -EIO; - goto bail; - } + /* The caller got this descriptor from + * ocfs2_read_group_descriptor(). Any corruption is a code bug. */ + BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(bg)); mlog(0, "off = %u, num = %u\n", bit_off, num_bits); -- cgit v1.2.3-70-g09d2 From 5e96581a377fc6bd76e9b112da9aeb8a7ae8bf22 Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Thu, 13 Nov 2008 14:49:16 -0800 Subject: ocfs2: Wrap extent block reads in a dedicated function. We weren't consistently checking extent blocks after we read them. Most places checked the signature, but none checked h_blkno or h_fs_signature. Create a toplevel ocfs2_read_extent_block() that does the read and the validation. Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/alloc.c | 151 ++++++++++++++++++++++++++++++++------------------ fs/ocfs2/alloc.h | 8 +++ fs/ocfs2/extent_map.c | 23 ++------ fs/ocfs2/ocfs2.h | 8 --- 4 files changed, 111 insertions(+), 79 deletions(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 320545b9fe1..f430cc6e0f3 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -678,6 +678,66 @@ struct ocfs2_merge_ctxt { int c_split_covers_rec; }; +static int ocfs2_validate_extent_block(struct super_block *sb, + struct buffer_head *bh) +{ + struct ocfs2_extent_block *eb = + (struct ocfs2_extent_block *)bh->b_data; + + if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) { + ocfs2_error(sb, + "Extent block #%llu has bad signature %.*s", + (unsigned long long)bh->b_blocknr, 7, + eb->h_signature); + return -EINVAL; + } + + if (le64_to_cpu(eb->h_blkno) != bh->b_blocknr) { + ocfs2_error(sb, + "Extent block #%llu has an invalid h_blkno " + "of %llu", + (unsigned long long)bh->b_blocknr, + (unsigned long long)le64_to_cpu(eb->h_blkno)); + return -EINVAL; + } + + if (le32_to_cpu(eb->h_fs_generation) != OCFS2_SB(sb)->fs_generation) { + ocfs2_error(sb, + "Extent block #%llu has an invalid " + "h_fs_generation of #%u", + (unsigned long long)bh->b_blocknr, + le32_to_cpu(eb->h_fs_generation)); + return -EINVAL; + } + + return 0; +} + +int ocfs2_read_extent_block(struct inode *inode, u64 eb_blkno, + struct buffer_head **bh) +{ + int rc; + struct buffer_head *tmp = *bh; + + rc = ocfs2_read_block(inode, eb_blkno, &tmp); + if (rc) + goto out; + + rc = ocfs2_validate_extent_block(inode->i_sb, tmp); + if (rc) { + brelse(tmp); + goto out; + } + + /* If ocfs2_read_block() got us a new bh, pass it up. */ + if (!*bh) + *bh = tmp; + +out: + return rc; +} + + /* * How many free extents have we got before we need more meta data? */ @@ -697,8 +757,7 @@ int ocfs2_num_free_extents(struct ocfs2_super *osb, last_eb_blk = ocfs2_et_get_last_eb_blk(et); if (last_eb_blk) { - retval = ocfs2_read_block(inode, last_eb_blk, - &eb_bh); + retval = ocfs2_read_extent_block(inode, last_eb_blk, &eb_bh); if (retval < 0) { mlog_errno(retval); goto bail; @@ -900,11 +959,8 @@ static int ocfs2_add_branch(struct ocfs2_super *osb, for(i = 0; i < new_blocks; i++) { bh = new_eb_bhs[i]; eb = (struct ocfs2_extent_block *) bh->b_data; - if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) { - OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb); - status = -EIO; - goto bail; - } + /* ocfs2_create_new_meta_bhs() should create it right! */ + BUG_ON(!OCFS2_IS_VALID_EXTENT_BLOCK(eb)); eb_el = &eb->h_list; status = ocfs2_journal_access(handle, inode, bh, @@ -1044,11 +1100,8 @@ static int ocfs2_shift_tree_depth(struct ocfs2_super *osb, } eb = (struct ocfs2_extent_block *) new_eb_bh->b_data; - if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) { - OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb); - status = -EIO; - goto bail; - } + /* ocfs2_create_new_meta_bhs() should create it right! */ + BUG_ON(!OCFS2_IS_VALID_EXTENT_BLOCK(eb)); eb_el = &eb->h_list; root_el = et->et_root_el; @@ -1168,18 +1221,13 @@ static int ocfs2_find_branch_target(struct ocfs2_super *osb, brelse(bh); bh = NULL; - status = ocfs2_read_block(inode, blkno, &bh); + status = ocfs2_read_extent_block(inode, blkno, &bh); if (status < 0) { mlog_errno(status); goto bail; } eb = (struct ocfs2_extent_block *) bh->b_data; - if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) { - OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb); - status = -EIO; - goto bail; - } el = &eb->h_list; if (le16_to_cpu(el->l_next_free_rec) < @@ -1532,7 +1580,7 @@ static int __ocfs2_find_path(struct inode *inode, brelse(bh); bh = NULL; - ret = ocfs2_read_block(inode, blkno, &bh); + ret = ocfs2_read_extent_block(inode, blkno, &bh); if (ret) { mlog_errno(ret); goto out; @@ -1540,11 +1588,6 @@ static int __ocfs2_find_path(struct inode *inode, eb = (struct ocfs2_extent_block *) bh->b_data; el = &eb->h_list; - if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) { - OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb); - ret = -EIO; - goto out; - } if (le16_to_cpu(el->l_next_free_rec) > le16_to_cpu(el->l_count)) { @@ -4089,8 +4132,15 @@ ocfs2_figure_merge_contig_type(struct inode *inode, struct ocfs2_path *path, le16_to_cpu(new_el->l_count)) { bh = path_leaf_bh(left_path); eb = (struct ocfs2_extent_block *)bh->b_data; - OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, - eb); + ocfs2_error(inode->i_sb, + "Extent block #%llu has an " + "invalid l_next_free_rec of " + "%d. It should have " + "matched the l_count of %d", + (unsigned long long)le64_to_cpu(eb->h_blkno), + le16_to_cpu(new_el->l_next_free_rec), + le16_to_cpu(new_el->l_count)); + status = -EINVAL; goto out; } rec = &new_el->l_recs[ @@ -4139,8 +4189,12 @@ ocfs2_figure_merge_contig_type(struct inode *inode, struct ocfs2_path *path, if (le16_to_cpu(new_el->l_next_free_rec) <= 1) { bh = path_leaf_bh(right_path); eb = (struct ocfs2_extent_block *)bh->b_data; - OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, - eb); + ocfs2_error(inode->i_sb, + "Extent block #%llu has an " + "invalid l_next_free_rec of %d", + (unsigned long long)le64_to_cpu(eb->h_blkno), + le16_to_cpu(new_el->l_next_free_rec)); + status = -EINVAL; goto out; } rec = &new_el->l_recs[1]; @@ -4286,7 +4340,9 @@ static int ocfs2_figure_insert_type(struct inode *inode, * ocfs2_figure_insert_type() and ocfs2_add_branch() * may want it later. */ - ret = ocfs2_read_block(inode, ocfs2_et_get_last_eb_blk(et), &bh); + ret = ocfs2_read_extent_block(inode, + ocfs2_et_get_last_eb_blk(et), + &bh); if (ret) { mlog_exit(ret); goto out; @@ -4752,20 +4808,15 @@ static int __ocfs2_mark_extent_written(struct inode *inode, if (path->p_tree_depth) { struct ocfs2_extent_block *eb; - ret = ocfs2_read_block(inode, ocfs2_et_get_last_eb_blk(et), - &last_eb_bh); + ret = ocfs2_read_extent_block(inode, + ocfs2_et_get_last_eb_blk(et), + &last_eb_bh); if (ret) { mlog_exit(ret); goto out; } eb = (struct ocfs2_extent_block *) last_eb_bh->b_data; - if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) { - OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb); - ret = -EROFS; - goto out; - } - rightmost_el = &eb->h_list; } else rightmost_el = path_root_el(path); @@ -4910,8 +4961,9 @@ static int ocfs2_split_tree(struct inode *inode, struct ocfs2_extent_tree *et, depth = path->p_tree_depth; if (depth > 0) { - ret = ocfs2_read_block(inode, ocfs2_et_get_last_eb_blk(et), - &last_eb_bh); + ret = ocfs2_read_extent_block(inode, + ocfs2_et_get_last_eb_blk(et), + &last_eb_bh); if (ret < 0) { mlog_errno(ret); goto out; @@ -6231,11 +6283,10 @@ static int ocfs2_find_new_last_ext_blk(struct inode *inode, eb = (struct ocfs2_extent_block *) bh->b_data; el = &eb->h_list; - if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) { - OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb); - ret = -EROFS; - goto out; - } + + /* ocfs2_find_leaf() gets the eb from ocfs2_read_extent_block(). + * Any corruption is a code bug. */ + BUG_ON(!OCFS2_IS_VALID_EXTENT_BLOCK(eb)); *new_last_eb = bh; get_bh(*new_last_eb); @@ -7140,20 +7191,14 @@ int ocfs2_prepare_truncate(struct ocfs2_super *osb, ocfs2_init_dealloc_ctxt(&(*tc)->tc_dealloc); if (fe->id2.i_list.l_tree_depth) { - status = ocfs2_read_block(inode, le64_to_cpu(fe->i_last_eb_blk), - &last_eb_bh); + status = ocfs2_read_extent_block(inode, + le64_to_cpu(fe->i_last_eb_blk), + &last_eb_bh); if (status < 0) { mlog_errno(status); goto bail; } eb = (struct ocfs2_extent_block *) last_eb_bh->b_data; - if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) { - OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb); - - brelse(last_eb_bh); - status = -EIO; - goto bail; - } } (*tc)->tc_last_eb_bh = last_eb_bh; diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h index 0fbf8fc55a4..59d37d1b7d4 100644 --- a/fs/ocfs2/alloc.h +++ b/fs/ocfs2/alloc.h @@ -73,6 +73,14 @@ void ocfs2_init_xattr_value_extent_tree(struct ocfs2_extent_tree *et, struct buffer_head *bh, struct ocfs2_xattr_value_root *xv); +/* + * Read an extent block into *bh. If *bh is NULL, a bh will be + * allocated. This is a cached read. The extent block will be validated + * with ocfs2_validate_extent_block(). + */ +int ocfs2_read_extent_block(struct inode *inode, u64 eb_blkno, + struct buffer_head **bh); + struct ocfs2_alloc_context; int ocfs2_insert_extent(struct ocfs2_super *osb, handle_t *handle, diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c index b686b31cf49..0bd9d9698a2 100644 --- a/fs/ocfs2/extent_map.c +++ b/fs/ocfs2/extent_map.c @@ -293,7 +293,7 @@ static int ocfs2_last_eb_is_empty(struct inode *inode, struct ocfs2_extent_block *eb; struct ocfs2_extent_list *el; - ret = ocfs2_read_block(inode, last_eb_blk, &eb_bh); + ret = ocfs2_read_extent_block(inode, last_eb_blk, &eb_bh); if (ret) { mlog_errno(ret); goto out; @@ -302,12 +302,6 @@ static int ocfs2_last_eb_is_empty(struct inode *inode, eb = (struct ocfs2_extent_block *) eb_bh->b_data; el = &eb->h_list; - if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) { - ret = -EROFS; - OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb); - goto out; - } - if (el->l_tree_depth) { ocfs2_error(inode->i_sb, "Inode %lu has non zero tree depth in " @@ -381,23 +375,16 @@ static int ocfs2_figure_hole_clusters(struct inode *inode, if (le64_to_cpu(eb->h_next_leaf_blk) == 0ULL) goto no_more_extents; - ret = ocfs2_read_block(inode, - le64_to_cpu(eb->h_next_leaf_blk), - &next_eb_bh); + ret = ocfs2_read_extent_block(inode, + le64_to_cpu(eb->h_next_leaf_blk), + &next_eb_bh); if (ret) { mlog_errno(ret); goto out; } - next_eb = (struct ocfs2_extent_block *)next_eb_bh->b_data; - - if (!OCFS2_IS_VALID_EXTENT_BLOCK(next_eb)) { - ret = -EROFS; - OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, next_eb); - goto out; - } + next_eb = (struct ocfs2_extent_block *)next_eb_bh->b_data; el = &next_eb->h_list; - i = ocfs2_search_for_hole_index(el, v_cluster); } diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index 82ba887afa0..f04b229fc75 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h @@ -447,14 +447,6 @@ static inline int ocfs2_uses_extended_slot_map(struct ocfs2_super *osb) #define OCFS2_IS_VALID_EXTENT_BLOCK(ptr) \ (!strcmp((ptr)->h_signature, OCFS2_EXTENT_BLOCK_SIGNATURE)) -#define OCFS2_RO_ON_INVALID_EXTENT_BLOCK(__sb, __eb) do { \ - typeof(__eb) ____eb = (__eb); \ - ocfs2_error((__sb), \ - "Extent Block # %llu has bad signature %.*s", \ - (unsigned long long)le64_to_cpu((____eb)->h_blkno), 7, \ - (____eb)->h_signature); \ -} while (0) - #define OCFS2_IS_VALID_GROUP_DESC(ptr) \ (!strcmp((ptr)->bg_signature, OCFS2_GROUP_DESC_SIGNATURE)) -- cgit v1.2.3-70-g09d2 From a22305cc693254a2aa651e797875669112ef8635 Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Thu, 13 Nov 2008 14:49:17 -0800 Subject: ocfs2: Wrap dirblock reads in a dedicated function. We have ocfs2_bread() as a vestige of the original ext-based dir code. It's only used by directories, though. Turn it into ocfs2_read_dir_block(), with a prototype matching the other metadata read functions. It's set up to validate dirblocks when the time comes. Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/dir.c | 150 +++++++++++++++++++++++++++++++++------------------------ 1 file changed, 88 insertions(+), 62 deletions(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index 5777045f1a6..c2f3fd93be5 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c @@ -82,49 +82,6 @@ static int ocfs2_do_extend_dir(struct super_block *sb, struct ocfs2_alloc_context *meta_ac, struct buffer_head **new_bh); -static struct buffer_head *ocfs2_bread(struct inode *inode, - int block, int *err, int reada) -{ - struct buffer_head *bh = NULL; - int tmperr; - u64 p_blkno; - int readflags = 0; - - if (reada) - readflags |= OCFS2_BH_READAHEAD; - - if (((u64)block << inode->i_sb->s_blocksize_bits) >= - i_size_read(inode)) { - BUG_ON(!reada); - return NULL; - } - - down_read(&OCFS2_I(inode)->ip_alloc_sem); - tmperr = ocfs2_extent_map_get_blocks(inode, block, &p_blkno, NULL, - NULL); - up_read(&OCFS2_I(inode)->ip_alloc_sem); - if (tmperr < 0) { - mlog_errno(tmperr); - goto fail; - } - - tmperr = ocfs2_read_blocks(inode, p_blkno, 1, &bh, readflags); - if (tmperr < 0) - goto fail; - - tmperr = 0; - - *err = 0; - return bh; - -fail: - brelse(bh); - bh = NULL; - - *err = -EIO; - return NULL; -} - /* * bh passed here can be an inode block or a dir data block, depending * on the inode inline data flag. @@ -250,6 +207,76 @@ out: return NULL; } +static int ocfs2_validate_dir_block(struct super_block *sb, + struct buffer_head *bh) +{ + /* + * Nothing yet. We don't validate dirents here, that's handled + * in-place when the code walks them. + */ + + return 0; +} + +/* + * This function forces all errors to -EIO for consistency with its + * predecessor, ocfs2_bread(). We haven't audited what returning the + * real error codes would do to callers. We log the real codes with + * mlog_errno() before we squash them. + */ +static int ocfs2_read_dir_block(struct inode *inode, u64 v_block, + struct buffer_head **bh, int flags) +{ + int rc = 0; + struct buffer_head *tmp = *bh; + u64 p_blkno; + + if (((u64)v_block << inode->i_sb->s_blocksize_bits) >= + i_size_read(inode)) { + BUG_ON(!(flags & OCFS2_BH_READAHEAD)); + goto out; + } + + down_read(&OCFS2_I(inode)->ip_alloc_sem); + rc = ocfs2_extent_map_get_blocks(inode, v_block, &p_blkno, NULL, + NULL); + up_read(&OCFS2_I(inode)->ip_alloc_sem); + if (rc) { + mlog_errno(rc); + goto out; + } + + if (!p_blkno) { + rc = -EIO; + mlog(ML_ERROR, + "Directory #%llu contains a hole at offset %llu\n", + (unsigned long long)OCFS2_I(inode)->ip_blkno, + (unsigned long long)v_block << inode->i_sb->s_blocksize_bits); + goto out; + } + + rc = ocfs2_read_blocks(inode, p_blkno, 1, &tmp, flags); + if (rc) { + mlog_errno(rc); + goto out; + } + + if (!(flags & OCFS2_BH_READAHEAD)) { + rc = ocfs2_validate_dir_block(inode->i_sb, tmp); + if (rc) { + brelse(tmp); + goto out; + } + } + + /* If ocfs2_read_blocks() got us a new bh, pass it up. */ + if (!*bh) + *bh = tmp; + +out: + return rc ? -EIO : 0; +} + static struct buffer_head *ocfs2_find_entry_el(const char *name, int namelen, struct inode *dir, struct ocfs2_dir_entry **res_dir) @@ -296,15 +323,17 @@ restart: } num++; - bh = ocfs2_bread(dir, b++, &err, 1); + bh = NULL; + err = ocfs2_read_dir_block(dir, b++, &bh, + OCFS2_BH_READAHEAD); bh_use[ra_max] = bh; } } if ((bh = bh_use[ra_ptr++]) == NULL) goto next; - if (ocfs2_read_block(dir, block, &bh)) { + if (ocfs2_read_dir_block(dir, block, &bh, 0)) { /* read error, skip block & hope for the best. - * ocfs2_read_block() has released the bh. */ + * ocfs2_read_dir_block() has released the bh. */ ocfs2_error(dir->i_sb, "reading directory %llu, " "offset %lu\n", (unsigned long long)OCFS2_I(dir)->ip_blkno, @@ -724,7 +753,6 @@ static int ocfs2_dir_foreach_blk_el(struct inode *inode, int i, stored; struct buffer_head * bh, * tmp; struct ocfs2_dir_entry * de; - int err; struct super_block * sb = inode->i_sb; unsigned int ra_sectors = 16; @@ -735,12 +763,8 @@ static int ocfs2_dir_foreach_blk_el(struct inode *inode, while (!error && !stored && *f_pos < i_size_read(inode)) { blk = (*f_pos) >> sb->s_blocksize_bits; - bh = ocfs2_bread(inode, blk, &err, 0); - if (!bh) { - mlog(ML_ERROR, - "directory #%llu contains a hole at offset %lld\n", - (unsigned long long)OCFS2_I(inode)->ip_blkno, - *f_pos); + if (ocfs2_read_dir_block(inode, blk, &bh, 0)) { + /* Skip the corrupt dirblock and keep trying */ *f_pos += sb->s_blocksize - offset; continue; } @@ -754,8 +778,10 @@ static int ocfs2_dir_foreach_blk_el(struct inode *inode, || (((last_ra_blk - blk) << 9) <= (ra_sectors / 2))) { for (i = ra_sectors >> (sb->s_blocksize_bits - 9); i > 0; i--) { - tmp = ocfs2_bread(inode, ++blk, &err, 1); - brelse(tmp); + tmp = NULL; + if (!ocfs2_read_dir_block(inode, ++blk, &tmp, + OCFS2_BH_READAHEAD)) + brelse(tmp); } last_ra_blk = blk; ra_sectors = 8; @@ -828,6 +854,7 @@ revalidate: } offset = 0; brelse(bh); + bh = NULL; } stored = 0; @@ -1680,8 +1707,8 @@ static int ocfs2_find_dir_space_el(struct inode *dir, const char *name, struct super_block *sb = dir->i_sb; int status; - bh = ocfs2_bread(dir, 0, &status, 0); - if (!bh) { + status = ocfs2_read_dir_block(dir, 0, &bh, 0); + if (status) { mlog_errno(status); goto bail; } @@ -1702,11 +1729,10 @@ static int ocfs2_find_dir_space_el(struct inode *dir, const char *name, status = -ENOSPC; goto bail; } - bh = ocfs2_bread(dir, - offset >> sb->s_blocksize_bits, - &status, - 0); - if (!bh) { + status = ocfs2_read_dir_block(dir, + offset >> sb->s_blocksize_bits, + &bh, 0); + if (status) { mlog_errno(status); goto bail; } -- cgit v1.2.3-70-g09d2 From 4ae1d69bedc8d174cb8a558694607e013157cde1 Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Thu, 13 Nov 2008 14:49:18 -0800 Subject: ocfs2: Wrap xattr block reads in a dedicated function We weren't consistently checking xattr blocks after we read them. Most places checked the signature, but none checked xb_blkno or xb_fs_signature. Create a toplevel ocfs2_read_xattr_block() that does the read and the validation. Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/xattr.c | 94 +++++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 70 insertions(+), 24 deletions(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 3cc8385f973..ef4aa5482d0 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -314,6 +314,65 @@ static void ocfs2_xattr_bucket_copy_data(struct ocfs2_xattr_bucket *dest, } } +static int ocfs2_validate_xattr_block(struct super_block *sb, + struct buffer_head *bh) +{ + struct ocfs2_xattr_block *xb = + (struct ocfs2_xattr_block *)bh->b_data; + + mlog(0, "Validating xattr block %llu\n", + (unsigned long long)bh->b_blocknr); + + if (!OCFS2_IS_VALID_XATTR_BLOCK(xb)) { + ocfs2_error(sb, + "Extended attribute block #%llu has bad " + "signature %.*s", + (unsigned long long)bh->b_blocknr, 7, + xb->xb_signature); + return -EINVAL; + } + + if (le64_to_cpu(xb->xb_blkno) != bh->b_blocknr) { + ocfs2_error(sb, + "Extended attribute block #%llu has an " + "invalid xb_blkno of %llu", + (unsigned long long)bh->b_blocknr, + (unsigned long long)le64_to_cpu(xb->xb_blkno)); + return -EINVAL; + } + + if (le32_to_cpu(xb->xb_fs_generation) != OCFS2_SB(sb)->fs_generation) { + ocfs2_error(sb, + "Extended attribute block #%llu has an invalid " + "xb_fs_generation of #%u", + (unsigned long long)bh->b_blocknr, + le32_to_cpu(xb->xb_fs_generation)); + return -EINVAL; + } + + return 0; +} + +static int ocfs2_read_xattr_block(struct inode *inode, u64 xb_blkno, + struct buffer_head **bh) +{ + int rc; + struct buffer_head *tmp = *bh; + + rc = ocfs2_read_block(inode, xb_blkno, &tmp); + if (!rc) { + rc = ocfs2_validate_xattr_block(inode->i_sb, tmp); + if (rc) + brelse(tmp); + } + + /* If ocfs2_read_block() got us a new bh, pass it up. */ + if (!rc && !*bh) + *bh = tmp; + + return rc; +} + static inline const char *ocfs2_xattr_prefix(int name_index) { struct xattr_handler *handler = NULL; @@ -739,18 +798,14 @@ static int ocfs2_xattr_block_list(struct inode *inode, if (!di->i_xattr_loc) return ret; - ret = ocfs2_read_block(inode, le64_to_cpu(di->i_xattr_loc), &blk_bh); + ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc), + &blk_bh); if (ret < 0) { mlog_errno(ret); return ret; } xb = (struct ocfs2_xattr_block *)blk_bh->b_data; - if (!OCFS2_IS_VALID_XATTR_BLOCK(xb)) { - ret = -EIO; - goto cleanup; - } - if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) { struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header; ret = ocfs2_xattr_list_entries(inode, header, @@ -760,7 +815,7 @@ static int ocfs2_xattr_block_list(struct inode *inode, ret = ocfs2_xattr_tree_list_index_block(inode, xt, buffer, buffer_size); } -cleanup: + brelse(blk_bh); return ret; @@ -1693,24 +1748,19 @@ static int ocfs2_xattr_free_block(struct inode *inode, u64 blk, bg_blkno; u16 bit; - ret = ocfs2_read_block(inode, block, &blk_bh); + ret = ocfs2_read_xattr_block(inode, block, &blk_bh); if (ret < 0) { mlog_errno(ret); goto out; } - xb = (struct ocfs2_xattr_block *)blk_bh->b_data; - if (!OCFS2_IS_VALID_XATTR_BLOCK(xb)) { - ret = -EIO; - goto out; - } - ret = ocfs2_xattr_block_remove(inode, blk_bh); if (ret < 0) { mlog_errno(ret); goto out; } + xb = (struct ocfs2_xattr_block *)blk_bh->b_data; blk = le64_to_cpu(xb->xb_blkno); bit = le16_to_cpu(xb->xb_suballoc_bit); bg_blkno = ocfs2_which_suballoc_group(blk, bit); @@ -1950,19 +2000,15 @@ static int ocfs2_xattr_block_find(struct inode *inode, if (!di->i_xattr_loc) return ret; - ret = ocfs2_read_block(inode, le64_to_cpu(di->i_xattr_loc), &blk_bh); + ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc), + &blk_bh); if (ret < 0) { mlog_errno(ret); return ret; } - xb = (struct ocfs2_xattr_block *)blk_bh->b_data; - if (!OCFS2_IS_VALID_XATTR_BLOCK(xb)) { - ret = -EIO; - goto cleanup; - } - xs->xattr_bh = blk_bh; + xb = (struct ocfs2_xattr_block *)blk_bh->b_data; if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) { xs->header = &xb->xb_attrs.xb_header; @@ -2259,9 +2305,9 @@ meta_guess: /* calculate metadata allocation. */ if (di->i_xattr_loc) { if (!xbs->xattr_bh) { - ret = ocfs2_read_block(inode, - le64_to_cpu(di->i_xattr_loc), - &bh); + ret = ocfs2_read_xattr_block(inode, + le64_to_cpu(di->i_xattr_loc), + &bh); if (ret) { mlog_errno(ret); goto out; -- cgit v1.2.3-70-g09d2 From 970e4936d7d15f35d00fd15a14f5343ba78b2fc8 Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Thu, 13 Nov 2008 14:49:19 -0800 Subject: ocfs2: Validate metadata only when it's read from disk. Add an optional validation hook to ocfs2_read_blocks(). Now the validation function is only called when a block was actually read off of disk. It is not called when the buffer was in cache. We add a buffer state bit BH_NeedsValidate to flag these buffers. It must always be one higher than the last JBD2 buffer state bit. The dinode, dirblock, extent_block, and xattr_block validators are lifted to this scheme directly. The group_descriptor validator needs to be split into two pieces. The first part only needs the gd buffer and is passed to ocfs2_read_block(). The second part requires the dinode as well, and is called every time. It's only 3 compares, so it's tiny. This also allows us to clean up the non-fatal gd check used by resize.c. It now has no magic argument. Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/alloc.c | 17 ++++----- fs/ocfs2/buffer_head_io.c | 33 ++++++++++++++++- fs/ocfs2/buffer_head_io.h | 27 ++++++++------ fs/ocfs2/dir.c | 13 +++---- fs/ocfs2/inode.c | 18 +++------- fs/ocfs2/resize.c | 2 +- fs/ocfs2/slot_map.c | 4 +-- fs/ocfs2/suballoc.c | 91 +++++++++++++++++++++++++++++++++-------------- fs/ocfs2/suballoc.h | 15 ++++---- fs/ocfs2/xattr.c | 26 +++++++------- 10 files changed, 149 insertions(+), 97 deletions(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index f430cc6e0f3..e823a27ba34 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -684,6 +684,9 @@ static int ocfs2_validate_extent_block(struct super_block *sb, struct ocfs2_extent_block *eb = (struct ocfs2_extent_block *)bh->b_data; + mlog(0, "Validating extent block %llu\n", + (unsigned long long)bh->b_blocknr); + if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) { ocfs2_error(sb, "Extent block #%llu has bad signature %.*s", @@ -719,21 +722,13 @@ int ocfs2_read_extent_block(struct inode *inode, u64 eb_blkno, int rc; struct buffer_head *tmp = *bh; - rc = ocfs2_read_block(inode, eb_blkno, &tmp); - if (rc) - goto out; - - rc = ocfs2_validate_extent_block(inode->i_sb, tmp); - if (rc) { - brelse(tmp); - goto out; - } + rc = ocfs2_read_block(inode, eb_blkno, &tmp, + ocfs2_validate_extent_block); /* If ocfs2_read_block() got us a new bh, pass it up. */ - if (!*bh) + if (!rc && !*bh) *bh = tmp; -out: return rc; } diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c index 3a178ec48d7..0e9eed0c223 100644 --- a/fs/ocfs2/buffer_head_io.c +++ b/fs/ocfs2/buffer_head_io.c @@ -39,6 +39,19 @@ #include "buffer_head_io.h" +/* + * Bits on bh->b_state used by ocfs2. + * + * These MUST be after the JBD2 bits. Currently BH_Unshadow is the last + * JBD2 bit. + */ +enum ocfs2_state_bits { + BH_NeedsValidate = BH_Unshadow + 1, +}; + +/* Expand the magic b_state functions */ +BUFFER_FNS(NeedsValidate, needs_validate); + int ocfs2_write_block(struct ocfs2_super *osb, struct buffer_head *bh, struct inode *inode) { @@ -166,7 +179,9 @@ bail: } int ocfs2_read_blocks(struct inode *inode, u64 block, int nr, - struct buffer_head *bhs[], int flags) + struct buffer_head *bhs[], int flags, + int (*validate)(struct super_block *sb, + struct buffer_head *bh)) { int status = 0; int i, ignore_cache = 0; @@ -298,6 +313,8 @@ int ocfs2_read_blocks(struct inode *inode, u64 block, int nr, clear_buffer_uptodate(bh); get_bh(bh); /* for end_buffer_read_sync() */ + if (validate) + set_buffer_needs_validate(bh); bh->b_end_io = end_buffer_read_sync; submit_bh(READ, bh); continue; @@ -328,6 +345,20 @@ int ocfs2_read_blocks(struct inode *inode, u64 block, int nr, bhs[i] = NULL; continue; } + + if (buffer_needs_validate(bh)) { + /* We never set NeedsValidate if the + * buffer was held by the journal, so + * that better not have changed */ + BUG_ON(buffer_jbd(bh)); + clear_buffer_needs_validate(bh); + status = validate(inode->i_sb, bh); + if (status) { + put_bh(bh); + bhs[i] = NULL; + continue; + } + } } /* Always set the buffer in the cache, even if it was diff --git a/fs/ocfs2/buffer_head_io.h b/fs/ocfs2/buffer_head_io.h index 75e1dcb1ade..c75d682dadd 100644 --- a/fs/ocfs2/buffer_head_io.h +++ b/fs/ocfs2/buffer_head_io.h @@ -31,21 +31,24 @@ void ocfs2_end_buffer_io_sync(struct buffer_head *bh, int uptodate); -static inline int ocfs2_read_block(struct inode *inode, - u64 off, - struct buffer_head **bh); - int ocfs2_write_block(struct ocfs2_super *osb, struct buffer_head *bh, struct inode *inode); -int ocfs2_read_blocks(struct inode *inode, - u64 block, - int nr, - struct buffer_head *bhs[], - int flags); int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block, unsigned int nr, struct buffer_head *bhs[]); +/* + * If not NULL, validate() will be called on a buffer that is freshly + * read from disk. It will not be called if the buffer was in cache. + * Note that if validate() is being used for this buffer, it needs to + * be set even for a READAHEAD call, as it marks the buffer for later + * validation. + */ +int ocfs2_read_blocks(struct inode *inode, u64 block, int nr, + struct buffer_head *bhs[], int flags, + int (*validate)(struct super_block *sb, + struct buffer_head *bh)); + int ocfs2_write_super_or_backup(struct ocfs2_super *osb, struct buffer_head *bh); @@ -53,7 +56,9 @@ int ocfs2_write_super_or_backup(struct ocfs2_super *osb, #define OCFS2_BH_READAHEAD 8 static inline int ocfs2_read_block(struct inode *inode, u64 off, - struct buffer_head **bh) + struct buffer_head **bh, + int (*validate)(struct super_block *sb, + struct buffer_head *bh)) { int status = 0; @@ -63,7 +68,7 @@ static inline int ocfs2_read_block(struct inode *inode, u64 off, goto bail; } - status = ocfs2_read_blocks(inode, off, 1, bh, 0); + status = ocfs2_read_blocks(inode, off, 1, bh, 0, validate); bail: return status; diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index c2f3fd93be5..7e863d40380 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c @@ -214,6 +214,8 @@ static int ocfs2_validate_dir_block(struct super_block *sb, * Nothing yet. We don't validate dirents here, that's handled * in-place when the code walks them. */ + mlog(0, "Validating dirblock %llu\n", + (unsigned long long)bh->b_blocknr); return 0; } @@ -255,20 +257,13 @@ static int ocfs2_read_dir_block(struct inode *inode, u64 v_block, goto out; } - rc = ocfs2_read_blocks(inode, p_blkno, 1, &tmp, flags); + rc = ocfs2_read_blocks(inode, p_blkno, 1, &tmp, flags, + ocfs2_validate_dir_block); if (rc) { mlog_errno(rc); goto out; } - if (!(flags & OCFS2_BH_READAHEAD)) { - rc = ocfs2_validate_dir_block(inode->i_sb, tmp); - if (rc) { - brelse(tmp); - goto out; - } - } - /* If ocfs2_read_blocks() got us a new bh, pass it up. */ if (!*bh) *bh = tmp; diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index 9eb701b8646..ec3497bafda 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c @@ -1255,6 +1255,9 @@ int ocfs2_validate_inode_block(struct super_block *sb, int rc = -EINVAL; struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data; + mlog(0, "Validating dinode %llu\n", + (unsigned long long)bh->b_blocknr); + BUG_ON(!buffer_uptodate(bh)); if (!OCFS2_IS_VALID_DINODE(di)) { @@ -1300,23 +1303,12 @@ int ocfs2_read_inode_block_full(struct inode *inode, struct buffer_head **bh, struct buffer_head *tmp = *bh; rc = ocfs2_read_blocks(inode, OCFS2_I(inode)->ip_blkno, 1, &tmp, - flags); - if (rc) - goto out; - - if (!(flags & OCFS2_BH_READAHEAD)) { - rc = ocfs2_validate_inode_block(inode->i_sb, tmp); - if (rc) { - brelse(tmp); - goto out; - } - } + flags, ocfs2_validate_inode_block); /* If ocfs2_read_blocks() got us a new bh, pass it up. */ - if (!*bh) + if (!rc && !*bh) *bh = tmp; -out: return rc; } diff --git a/fs/ocfs2/resize.c b/fs/ocfs2/resize.c index 252baff5eb8..867de3ebfca 100644 --- a/fs/ocfs2/resize.c +++ b/fs/ocfs2/resize.c @@ -394,7 +394,7 @@ static int ocfs2_check_new_group(struct inode *inode, (struct ocfs2_group_desc *)group_bh->b_data; u16 cl_bpc = le16_to_cpu(di->id2.i_chain.cl_bpc); - ret = ocfs2_validate_group_descriptor(inode->i_sb, di, group_bh, 1); + ret = ocfs2_check_group_descriptor(inode->i_sb, di, group_bh); if (ret) goto out; diff --git a/fs/ocfs2/slot_map.c b/fs/ocfs2/slot_map.c index bdda2d8f850..40661e7824e 100644 --- a/fs/ocfs2/slot_map.c +++ b/fs/ocfs2/slot_map.c @@ -151,7 +151,7 @@ int ocfs2_refresh_slot_info(struct ocfs2_super *osb) * this is not true, the read of -1 (UINT64_MAX) will fail. */ ret = ocfs2_read_blocks(si->si_inode, -1, si->si_blocks, si->si_bh, - OCFS2_BH_IGNORE_CACHE); + OCFS2_BH_IGNORE_CACHE, NULL); if (ret == 0) { spin_lock(&osb->osb_lock); ocfs2_update_slot_info(si); @@ -405,7 +405,7 @@ static int ocfs2_map_slot_buffers(struct ocfs2_super *osb, bh = NULL; /* Acquire a fresh bh */ status = ocfs2_read_blocks(si->si_inode, blkno, 1, &bh, - OCFS2_BH_IGNORE_CACHE); + OCFS2_BH_IGNORE_CACHE, NULL); if (status < 0) { mlog_errno(status); goto bail; diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index 766a00b2644..226fe21f260 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c @@ -145,14 +145,6 @@ static u32 ocfs2_bits_per_group(struct ocfs2_chain_list *cl) return (u32)le16_to_cpu(cl->cl_cpg) * (u32)le16_to_cpu(cl->cl_bpc); } -int ocfs2_validate_group_descriptor(struct super_block *sb, - struct ocfs2_dinode *di, - struct buffer_head *bh, - int clean_error) -{ - unsigned int max_bits; - struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *)bh->b_data; - #define do_error(fmt, ...) \ do{ \ if (clean_error) \ @@ -161,6 +153,12 @@ int ocfs2_validate_group_descriptor(struct super_block *sb, ocfs2_error(sb, fmt, ##__VA_ARGS__); \ } while (0) +static int ocfs2_validate_gd_self(struct super_block *sb, + struct buffer_head *bh, + int clean_error) +{ + struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *)bh->b_data; + if (!OCFS2_IS_VALID_GROUP_DESC(gd)) { do_error("Group descriptor #%llu has bad signature %.*s", (unsigned long long)bh->b_blocknr, 7, @@ -184,6 +182,35 @@ int ocfs2_validate_group_descriptor(struct super_block *sb, return -EINVAL; } + if (le16_to_cpu(gd->bg_free_bits_count) > le16_to_cpu(gd->bg_bits)) { + do_error("Group descriptor #%llu has bit count %u but " + "claims that %u are free", + (unsigned long long)bh->b_blocknr, + le16_to_cpu(gd->bg_bits), + le16_to_cpu(gd->bg_free_bits_count)); + return -EINVAL; + } + + if (le16_to_cpu(gd->bg_bits) > (8 * le16_to_cpu(gd->bg_size))) { + do_error("Group descriptor #%llu has bit count %u but " + "max bitmap bits of %u", + (unsigned long long)bh->b_blocknr, + le16_to_cpu(gd->bg_bits), + 8 * le16_to_cpu(gd->bg_size)); + return -EINVAL; + } + + return 0; +} + +static int ocfs2_validate_gd_parent(struct super_block *sb, + struct ocfs2_dinode *di, + struct buffer_head *bh, + int clean_error) +{ + unsigned int max_bits; + struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *)bh->b_data; + if (di->i_blkno != gd->bg_parent_dinode) { do_error("Group descriptor #%llu has bad parent " "pointer (%llu, expected %llu)", @@ -209,26 +236,35 @@ int ocfs2_validate_group_descriptor(struct super_block *sb, return -EINVAL; } - if (le16_to_cpu(gd->bg_free_bits_count) > le16_to_cpu(gd->bg_bits)) { - do_error("Group descriptor #%llu has bit count %u but " - "claims that %u are free", - (unsigned long long)bh->b_blocknr, - le16_to_cpu(gd->bg_bits), - le16_to_cpu(gd->bg_free_bits_count)); - return -EINVAL; - } + return 0; +} - if (le16_to_cpu(gd->bg_bits) > (8 * le16_to_cpu(gd->bg_size))) { - do_error("Group descriptor #%llu has bit count %u but " - "max bitmap bits of %u", - (unsigned long long)bh->b_blocknr, - le16_to_cpu(gd->bg_bits), - 8 * le16_to_cpu(gd->bg_size)); - return -EINVAL; - } #undef do_error - return 0; +/* + * This version only prints errors. It does not fail the filesystem, and + * exists only for resize. + */ +int ocfs2_check_group_descriptor(struct super_block *sb, + struct ocfs2_dinode *di, + struct buffer_head *bh) +{ + int rc; + + rc = ocfs2_validate_gd_self(sb, bh, 1); + if (!rc) + rc = ocfs2_validate_gd_parent(sb, di, bh, 1); + + return rc; +} + +static int ocfs2_validate_group_descriptor(struct super_block *sb, + struct buffer_head *bh) +{ + mlog(0, "Validating group descriptor %llu\n", + (unsigned long long)bh->b_blocknr); + + return ocfs2_validate_gd_self(sb, bh, 0); } int ocfs2_read_group_descriptor(struct inode *inode, struct ocfs2_dinode *di, @@ -237,11 +273,12 @@ int ocfs2_read_group_descriptor(struct inode *inode, struct ocfs2_dinode *di, int rc; struct buffer_head *tmp = *bh; - rc = ocfs2_read_block(inode, gd_blkno, &tmp); + rc = ocfs2_read_block(inode, gd_blkno, &tmp, + ocfs2_validate_group_descriptor); if (rc) goto out; - rc = ocfs2_validate_group_descriptor(inode->i_sb, di, tmp, 0); + rc = ocfs2_validate_gd_parent(inode->i_sb, di, tmp, 0); if (rc) { brelse(tmp); goto out; diff --git a/fs/ocfs2/suballoc.h b/fs/ocfs2/suballoc.h index 43de4fd826d..e3c13c77f9e 100644 --- a/fs/ocfs2/suballoc.h +++ b/fs/ocfs2/suballoc.h @@ -165,16 +165,15 @@ void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac); u64 ocfs2_which_cluster_group(struct inode *inode, u32 cluster); /* - * By default, ocfs2_validate_group_descriptor() calls ocfs2_error() when it + * By default, ocfs2_read_group_descriptor() calls ocfs2_error() when it * finds a problem. A caller that wants to check a group descriptor - * without going readonly passes a nonzero clean_error. This is only - * resize, really. Everyone else should be using - * ocfs2_read_group_descriptor(). + * without going readonly should read the block with ocfs2_read_block[s]() + * and then checking it with this function. This is only resize, really. + * Everyone else should be using ocfs2_read_group_descriptor(). */ -int ocfs2_validate_group_descriptor(struct super_block *sb, - struct ocfs2_dinode *di, - struct buffer_head *bh, - int clean_error); +int ocfs2_check_group_descriptor(struct super_block *sb, + struct ocfs2_dinode *di, + struct buffer_head *bh); /* * Read a group descriptor block into *bh. If *bh is NULL, a bh will be * allocated. This is a cached read. The descriptor will be validated with diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index ef4aa5482d0..8af29b3bd6d 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -266,7 +266,8 @@ static int ocfs2_read_xattr_bucket(struct ocfs2_xattr_bucket *bucket, int rc; rc = ocfs2_read_blocks(bucket->bu_inode, xb_blkno, - bucket->bu_blocks, bucket->bu_bhs, 0); + bucket->bu_blocks, bucket->bu_bhs, 0, + NULL); if (rc) ocfs2_xattr_bucket_relse(bucket); return rc; @@ -359,12 +360,8 @@ static int ocfs2_read_xattr_block(struct inode *inode, u64 xb_blkno, int rc; struct buffer_head *tmp = *bh; - rc = ocfs2_read_block(inode, xb_blkno, &tmp); - if (!rc) { - rc = ocfs2_validate_xattr_block(inode->i_sb, tmp); - if (rc) - brelse(tmp); - } + rc = ocfs2_read_block(inode, xb_blkno, &tmp, + ocfs2_validate_xattr_block); /* If ocfs2_read_block() got us a new bh, pass it up. */ if (!rc && !*bh) @@ -925,7 +922,7 @@ static int ocfs2_xattr_get_value_outside(struct inode *inode, blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster); /* Copy ocfs2_xattr_value */ for (i = 0; i < num_clusters * bpc; i++, blkno++) { - ret = ocfs2_read_block(inode, blkno, &bh); + ret = ocfs2_read_block(inode, blkno, &bh, NULL); if (ret) { mlog_errno(ret); goto out; @@ -1174,7 +1171,7 @@ static int __ocfs2_xattr_set_value_outside(struct inode *inode, blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster); for (i = 0; i < num_clusters * bpc; i++, blkno++) { - ret = ocfs2_read_block(inode, blkno, &bh); + ret = ocfs2_read_block(inode, blkno, &bh, NULL); if (ret) { mlog_errno(ret); goto out; @@ -2206,7 +2203,7 @@ static int ocfs2_calc_xattr_set_need(struct inode *inode, base = xis->base; credits += OCFS2_INODE_UPDATE_CREDITS; } else { - int i, block_off; + int i, block_off = 0; xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data; xe = xbs->here; name_offset = le16_to_cpu(xe->xe_name_offset); @@ -2840,6 +2837,7 @@ static int ocfs2_find_xe_in_bucket(struct inode *inode, break; } + xe_name = bucket_block(bucket, block_off) + new_offset; if (!memcmp(name, xe_name, name_len)) { *xe_index = i; @@ -3598,7 +3596,7 @@ static int ocfs2_mv_xattr_bucket_cross_cluster(struct inode *inode, goto out; } - ret = ocfs2_read_block(inode, prev_blkno, &old_bh); + ret = ocfs2_read_block(inode, prev_blkno, &old_bh, NULL); if (ret < 0) { mlog_errno(ret); brelse(new_bh); @@ -3990,7 +3988,7 @@ static int ocfs2_cp_xattr_cluster(struct inode *inode, ocfs2_journal_dirty(handle, first_bh); /* update the new bucket header. */ - ret = ocfs2_read_block(inode, to_blk_start, &bh); + ret = ocfs2_read_block(inode, to_blk_start, &bh, NULL); if (ret < 0) { mlog_errno(ret); goto out; @@ -4337,7 +4335,7 @@ static int ocfs2_add_new_xattr_bucket(struct inode *inode, goto out; } - ret = ocfs2_read_block(inode, p_blkno, &first_bh); + ret = ocfs2_read_block(inode, p_blkno, &first_bh, NULL); if (ret) { mlog_errno(ret); goto out; @@ -4635,7 +4633,7 @@ static int ocfs2_xattr_bucket_value_truncate(struct inode *inode, BUG_ON(value_blk != (offset + OCFS2_XATTR_ROOT_SIZE - 1) / blocksize); value_blk += header_bh->b_blocknr; - ret = ocfs2_read_block(inode, value_blk, &value_bh); + ret = ocfs2_read_block(inode, value_blk, &value_bh, NULL); if (ret) { mlog_errno(ret); goto out; -- cgit v1.2.3-70-g09d2 From a8549fb5abb2b372e46d5de0d23ff8b24f4a61af Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Thu, 13 Nov 2008 14:49:20 -0800 Subject: ocfs2: Wrap virtual block reads in ocfs2_read_virt_blocks() The ocfs2_read_dir_block() function really maps an inode's virtual blocks to physical ones before calling ocfs2_read_blocks(). Let's extract that to common code, because other places might want to do that. Other than the block number being virtual, ocfs2_read_virt_blocks() takes the same arguments as ocfs2_read_blocks(). It converts those virtual block numbers to physical before calling ocfs2_read_blocks() directly. If the blocks asked for are discontiguous, this can mean multiple calls to ocfs2_read_blocks(), but this is mostly hidden from the caller. Like ocfs2_read_blocks(), the caller can pass in an existing buffer_head. This is usually done to pick up some readahead I/O. ocfs2_read_virt_blocks() checks the buffer_head's block number against the extent map - it must match. Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/extent_map.c | 71 +++++++++++++++++++++++++++++++++++++++++++++++++++ fs/ocfs2/extent_map.h | 24 +++++++++++++++++ 2 files changed, 95 insertions(+) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c index 0bd9d9698a2..f2bb1a04d25 100644 --- a/fs/ocfs2/extent_map.c +++ b/fs/ocfs2/extent_map.c @@ -806,3 +806,74 @@ out: return ret; } + +int ocfs2_read_virt_blocks(struct inode *inode, u64 v_block, int nr, + struct buffer_head *bhs[], int flags, + int (*validate)(struct super_block *sb, + struct buffer_head *bh)) +{ + int rc = 0; + u64 p_block, p_count; + int i, count, done = 0; + + mlog_entry("(inode = %p, v_block = %llu, nr = %d, bhs = %p, " + "flags = %x, validate = %p)\n", + inode, (unsigned long long)v_block, nr, bhs, flags, + validate); + + if (((v_block + nr - 1) << inode->i_sb->s_blocksize_bits) >= + i_size_read(inode)) { + BUG_ON(!(flags & OCFS2_BH_READAHEAD)); + goto out; + } + + while (done < nr) { + down_read(&OCFS2_I(inode)->ip_alloc_sem); + rc = ocfs2_extent_map_get_blocks(inode, v_block + done, + &p_block, &p_count, NULL); + up_read(&OCFS2_I(inode)->ip_alloc_sem); + if (rc) { + mlog_errno(rc); + break; + } + + if (!p_block) { + rc = -EIO; + mlog(ML_ERROR, + "Inode #%llu contains a hole at offset %llu\n", + (unsigned long long)OCFS2_I(inode)->ip_blkno, + (unsigned long long)(v_block + done) << + inode->i_sb->s_blocksize_bits); + break; + } + + count = nr - done; + if (p_count < count) + count = p_count; + + /* + * If the caller passed us bhs, they should have come + * from a previous readahead call to this function. Thus, + * they should have the right b_blocknr. + */ + for (i = 0; i < count; i++) { + if (!bhs[done + i]) + continue; + BUG_ON(bhs[done + i]->b_blocknr != (p_block + i)); + } + + rc = ocfs2_read_blocks(inode, p_block, count, bhs + done, + flags, validate); + if (rc) { + mlog_errno(rc); + break; + } + done += count; + } + +out: + mlog_exit(rc); + return rc; +} + + diff --git a/fs/ocfs2/extent_map.h b/fs/ocfs2/extent_map.h index 1c4aa8b06f3..b7dd9731b46 100644 --- a/fs/ocfs2/extent_map.h +++ b/fs/ocfs2/extent_map.h @@ -57,4 +57,28 @@ int ocfs2_xattr_get_clusters(struct inode *inode, u32 v_cluster, u32 *p_cluster, u32 *num_clusters, struct ocfs2_extent_list *el); +int ocfs2_read_virt_blocks(struct inode *inode, u64 v_block, int nr, + struct buffer_head *bhs[], int flags, + int (*validate)(struct super_block *sb, + struct buffer_head *bh)); +static inline int ocfs2_read_virt_block(struct inode *inode, u64 v_block, + struct buffer_head **bh, + int (*validate)(struct super_block *sb, + struct buffer_head *bh)) +{ + int status = 0; + + if (bh == NULL) { + printk("ocfs2: bh == NULL\n"); + status = -EINVAL; + goto bail; + } + + status = ocfs2_read_virt_blocks(inode, v_block, 1, bh, 0, validate); + +bail: + return status; +} + + #endif /* _EXTENT_MAP_H */ -- cgit v1.2.3-70-g09d2 From 511308d90b53479b194cd067715f44dc99d39b08 Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Thu, 13 Nov 2008 14:49:21 -0800 Subject: ocfs2: Convert ocfs2_read_dir_block() to ocfs2_read_virt_blocks() Now that we've centralized the ocfs2_read_virt_blocks() code, let's use it in ocfs2_read_dir_block(). Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/dir.c | 38 +++++--------------------------------- 1 file changed, 5 insertions(+), 33 deletions(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index 7e863d40380..d83cff95759 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c @@ -231,44 +231,16 @@ static int ocfs2_read_dir_block(struct inode *inode, u64 v_block, { int rc = 0; struct buffer_head *tmp = *bh; - u64 p_blkno; - if (((u64)v_block << inode->i_sb->s_blocksize_bits) >= - i_size_read(inode)) { - BUG_ON(!(flags & OCFS2_BH_READAHEAD)); - goto out; - } - - down_read(&OCFS2_I(inode)->ip_alloc_sem); - rc = ocfs2_extent_map_get_blocks(inode, v_block, &p_blkno, NULL, - NULL); - up_read(&OCFS2_I(inode)->ip_alloc_sem); - if (rc) { + rc = ocfs2_read_virt_blocks(inode, v_block, 1, &tmp, flags, + ocfs2_validate_dir_block); + if (rc) mlog_errno(rc); - goto out; - } - if (!p_blkno) { - rc = -EIO; - mlog(ML_ERROR, - "Directory #%llu contains a hole at offset %llu\n", - (unsigned long long)OCFS2_I(inode)->ip_blkno, - (unsigned long long)v_block << inode->i_sb->s_blocksize_bits); - goto out; - } - - rc = ocfs2_read_blocks(inode, p_blkno, 1, &tmp, flags, - ocfs2_validate_dir_block); - if (rc) { - mlog_errno(rc); - goto out; - } - - /* If ocfs2_read_blocks() got us a new bh, pass it up. */ - if (!*bh) + /* If ocfs2_read_virt_blocks() got us a new bh, pass it up. */ + if (!rc && !*bh) *bh = tmp; -out: return rc ? -EIO : 0; } -- cgit v1.2.3-70-g09d2 From 53ef99cad9878f02f27bb30bc304fc42af8bdd6e Mon Sep 17 00:00:00 2001 From: Mark Fasheh Date: Tue, 18 Nov 2008 16:53:43 -0800 Subject: ocfs2: Remove JBD compatibility layer JBD2 is fully backwards compatible with JBD and it's been tested enough with Ocfs2 that we can clean this code up now. Signed-off-by: Mark Fasheh --- fs/Kconfig | 10 ------ fs/ocfs2/alloc.c | 5 --- fs/ocfs2/aops.c | 24 ++----------- fs/ocfs2/journal.c | 14 -------- fs/ocfs2/journal.h | 11 +----- fs/ocfs2/ocfs2_jbd_compat.h | 82 --------------------------------------------- 6 files changed, 3 insertions(+), 143 deletions(-) delete mode 100644 fs/ocfs2/ocfs2_jbd_compat.h (limited to 'fs/ocfs2') diff --git a/fs/Kconfig b/fs/Kconfig index e8a47f74a83..b93425ad15d 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -258,16 +258,6 @@ config OCFS2_DEBUG_FS this option for debugging only as it is likely to decrease performance of the filesystem. -config OCFS2_COMPAT_JBD - bool "Use JBD for compatibility" - depends on OCFS2_FS - default n - select JBD - help - The ocfs2 filesystem now uses JBD2 for its journalling. JBD2 - is backwards compatible with JBD. It is safe to say N here. - However, if you really want to use the original JBD, say Y here. - config OCFS2_FS_POSIX_ACL bool "OCFS2 POSIX Access Control Lists" depends on OCFS2_FS diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index e823a27ba34..69d67ab069b 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -6638,11 +6638,6 @@ static void ocfs2_map_and_dirty_page(struct inode *inode, handle_t *handle, mlog_errno(ret); else if (ocfs2_should_order_data(inode)) { ret = ocfs2_jbd2_file_inode(handle, inode); -#ifdef CONFIG_OCFS2_COMPAT_JBD - ret = walk_page_buffers(handle, page_buffers(page), - from, to, &partial, - ocfs2_journal_dirty_data); -#endif if (ret < 0) mlog_errno(ret); } diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index e219f8b546a..6af79adb2ec 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -474,12 +474,6 @@ handle_t *ocfs2_start_walk_page_trans(struct inode *inode, if (ocfs2_should_order_data(inode)) { ret = ocfs2_jbd2_file_inode(handle, inode); -#ifdef CONFIG_OCFS2_COMPAT_JBD - ret = walk_page_buffers(handle, - page_buffers(page), - from, to, NULL, - ocfs2_journal_dirty_data); -#endif if (ret < 0) mlog_errno(ret); } @@ -1065,15 +1059,8 @@ static void ocfs2_write_failure(struct inode *inode, tmppage = wc->w_pages[i]; if (page_has_buffers(tmppage)) { - if (ocfs2_should_order_data(inode)) { + if (ocfs2_should_order_data(inode)) ocfs2_jbd2_file_inode(wc->w_handle, inode); -#ifdef CONFIG_OCFS2_COMPAT_JBD - walk_page_buffers(wc->w_handle, - page_buffers(tmppage), - from, to, NULL, - ocfs2_journal_dirty_data); -#endif - } block_commit_write(tmppage, from, to); } @@ -1912,15 +1899,8 @@ int ocfs2_write_end_nolock(struct address_space *mapping, } if (page_has_buffers(tmppage)) { - if (ocfs2_should_order_data(inode)) { + if (ocfs2_should_order_data(inode)) ocfs2_jbd2_file_inode(wc->w_handle, inode); -#ifdef CONFIG_OCFS2_COMPAT_JBD - walk_page_buffers(wc->w_handle, - page_buffers(tmppage), - from, to, NULL, - ocfs2_journal_dirty_data); -#endif - } block_commit_write(tmppage, from, to); } } diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index 9223bfcca3b..12b62a3cbf6 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -434,20 +434,6 @@ int ocfs2_journal_dirty(handle_t *handle, return status; } -#ifdef CONFIG_OCFS2_COMPAT_JBD -int ocfs2_journal_dirty_data(handle_t *handle, - struct buffer_head *bh) -{ - int err = journal_dirty_data(handle, bh); - if (err) - mlog_errno(err); - /* TODO: When we can handle it, abort the handle and go RO on - * error here. */ - - return err; -} -#endif - #define OCFS2_DEFAULT_COMMIT_INTERVAL (HZ * JBD2_DEFAULT_MAX_COMMIT_AGE) void ocfs2_set_journal_params(struct ocfs2_super *osb) diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h index d4d14e9a3ce..8203980fefe 100644 --- a/fs/ocfs2/journal.h +++ b/fs/ocfs2/journal.h @@ -27,12 +27,7 @@ #define OCFS2_JOURNAL_H #include -#ifndef CONFIG_OCFS2_COMPAT_JBD -# include -#else -# include -# include "ocfs2_jbd_compat.h" -#endif +#include enum ocfs2_journal_state { OCFS2_JOURNAL_FREE = 0, @@ -273,10 +268,6 @@ int ocfs2_journal_access(handle_t *handle, */ int ocfs2_journal_dirty(handle_t *handle, struct buffer_head *bh); -#ifdef CONFIG_OCFS2_COMPAT_JBD -int ocfs2_journal_dirty_data(handle_t *handle, - struct buffer_head *bh); -#endif /* * Credit Macros: diff --git a/fs/ocfs2/ocfs2_jbd_compat.h b/fs/ocfs2/ocfs2_jbd_compat.h deleted file mode 100644 index b91c78f8f55..00000000000 --- a/fs/ocfs2/ocfs2_jbd_compat.h +++ /dev/null @@ -1,82 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; -*- - * vim: noexpandtab sw=8 ts=8 sts=0: - * - * ocfs2_jbd_compat.h - * - * Compatibility defines for JBD. - * - * Copyright (C) 2008 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License version 2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - */ - -#ifndef OCFS2_JBD_COMPAT_H -#define OCFS2_JBD_COMPAT_H - -#ifndef CONFIG_OCFS2_COMPAT_JBD -# error Should not have been included -#endif - -struct jbd2_inode { - unsigned int dummy; -}; - -#define JBD2_BARRIER JFS_BARRIER -#define JBD2_DEFAULT_MAX_COMMIT_AGE JBD_DEFAULT_MAX_COMMIT_AGE - -#define jbd2_journal_ack_err journal_ack_err -#define jbd2_journal_clear_err journal_clear_err -#define jbd2_journal_destroy journal_destroy -#define jbd2_journal_dirty_metadata journal_dirty_metadata -#define jbd2_journal_errno journal_errno -#define jbd2_journal_extend journal_extend -#define jbd2_journal_flush journal_flush -#define jbd2_journal_force_commit journal_force_commit -#define jbd2_journal_get_write_access journal_get_write_access -#define jbd2_journal_get_undo_access journal_get_undo_access -#define jbd2_journal_init_inode journal_init_inode -#define jbd2_journal_invalidatepage journal_invalidatepage -#define jbd2_journal_load journal_load -#define jbd2_journal_lock_updates journal_lock_updates -#define jbd2_journal_restart journal_restart -#define jbd2_journal_start journal_start -#define jbd2_journal_start_commit journal_start_commit -#define jbd2_journal_stop journal_stop -#define jbd2_journal_try_to_free_buffers journal_try_to_free_buffers -#define jbd2_journal_unlock_updates journal_unlock_updates -#define jbd2_journal_wipe journal_wipe -#define jbd2_log_wait_commit log_wait_commit - -static inline int jbd2_journal_file_inode(handle_t *handle, - struct jbd2_inode *inode) -{ - return 0; -} - -static inline int jbd2_journal_begin_ordered_truncate(struct jbd2_inode *inode, - loff_t new_size) -{ - return 0; -} - -static inline void jbd2_journal_init_jbd_inode(struct jbd2_inode *jinode, - struct inode *inode) -{ - return; -} - -static inline void jbd2_journal_release_jbd_inode(journal_t *journal, - struct jbd2_inode *jinode) -{ - return; -} - - -#endif /* OCFS2_JBD_COMPAT_H */ -- cgit v1.2.3-70-g09d2 From 97aff52ae13d3c11a074bbbfc80ad0b59cb8cdeb Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Wed, 19 Nov 2008 16:48:41 +0800 Subject: ocfs2/xattr: Fix a bug in xattr allocation estimation When we extend one xattr's value to a large size, the old value size might be smaller than the size of a value root. In those cases, we still need to guess the metadata allocation. Reported-by: Tiger Yang Signed-off-by: Tao Ma Signed-off-by: Mark Fasheh --- fs/ocfs2/xattr.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 8af29b3bd6d..d0b94edb966 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -2270,6 +2270,7 @@ static int ocfs2_calc_xattr_set_need(struct inode *inode, value_size); xv = (struct ocfs2_xattr_value_root *) (base + name_offset + name_len); + value_size = OCFS2_XATTR_ROOT_SIZE; } else xv = &def_xv.xv; @@ -2283,7 +2284,8 @@ static int ocfs2_calc_xattr_set_need(struct inode *inode, &xv->xr_list, new_clusters - old_clusters); - goto out; + if (value_size >= OCFS2_XATTR_ROOT_SIZE) + goto out; } } else { /* -- cgit v1.2.3-70-g09d2 From 9f868f16e40e9ad8e39aebff94a4be0d96520734 Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Wed, 19 Nov 2008 16:48:42 +0800 Subject: ocfs2/xattr: Restore not_found in xis During an xattr set, when we move a xattr which was stored in inode to the outside bucket, we have to delete it and it will use the old value of xis->not_found. xis->not_found is removed by ocfs2_calc_xattr_set_need though, so we must restore it. Signed-off-by: Tao Ma Signed-off-by: Mark Fasheh --- fs/ocfs2/xattr.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index d0b94edb966..9cb71e1c7c6 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -2414,7 +2414,7 @@ static int __ocfs2_xattr_set_handle(struct inode *inode, struct ocfs2_xattr_search *xbs, struct ocfs2_xattr_set_ctxt *ctxt) { - int ret = 0, credits; + int ret = 0, credits, old_found; if (!xi->value) { /* Remove existing extended attribute */ @@ -2433,6 +2433,7 @@ static int __ocfs2_xattr_set_handle(struct inode *inode, xi->value = NULL; xi->value_len = 0; + old_found = xis->not_found; xis->not_found = -ENODATA; ret = ocfs2_calc_xattr_set_need(inode, di, @@ -2442,6 +2443,7 @@ static int __ocfs2_xattr_set_handle(struct inode *inode, NULL, NULL, &credits); + xis->not_found = old_found; if (ret) { mlog_errno(ret); goto out; @@ -2462,6 +2464,7 @@ static int __ocfs2_xattr_set_handle(struct inode *inode, if (ret) goto out; + old_found = xis->not_found; xis->not_found = -ENODATA; ret = ocfs2_calc_xattr_set_need(inode, di, @@ -2471,6 +2474,7 @@ static int __ocfs2_xattr_set_handle(struct inode *inode, NULL, NULL, &credits); + xis->not_found = old_found; if (ret) { mlog_errno(ret); goto out; -- cgit v1.2.3-70-g09d2 From 90e86a63eadf1a3b2f19b68d82150dc63fe01443 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 27 Aug 2008 22:30:28 +0200 Subject: ocfs2: Support nested transactions OCFS2 can easily support nested transactions. We just have to take care and not spoil statistics acquire semaphore unnecessarily. Signed-off-by: Jan Kara Signed-off-by: Mark Fasheh --- fs/ocfs2/journal.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index 12b62a3cbf6..11a1178d5ee 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -256,11 +256,9 @@ handle_t *ocfs2_start_trans(struct ocfs2_super *osb, int max_buffs) BUG_ON(osb->journal->j_state == OCFS2_JOURNAL_FREE); BUG_ON(max_buffs <= 0); - /* JBD might support this, but our journalling code doesn't yet. */ - if (journal_current_handle()) { - mlog(ML_ERROR, "Recursive transaction attempted!\n"); - BUG(); - } + /* Nested transaction? Just return the handle... */ + if (journal_current_handle()) + return jbd2_journal_start(journal, max_buffs); down_read(&osb->journal->j_trans_barrier); @@ -285,16 +283,18 @@ handle_t *ocfs2_start_trans(struct ocfs2_super *osb, int max_buffs) int ocfs2_commit_trans(struct ocfs2_super *osb, handle_t *handle) { - int ret; + int ret, nested; struct ocfs2_journal *journal = osb->journal; BUG_ON(!handle); + nested = handle->h_ref > 1; ret = jbd2_journal_stop(handle); if (ret < 0) mlog_errno(ret); - up_read(&journal->j_trans_barrier); + if (!nested) + up_read(&journal->j_trans_barrier); return ret; } -- cgit v1.2.3-70-g09d2 From 1a224ad11eeb190da4a123e156601aad1bb67f24 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 20 Aug 2008 15:43:36 +0200 Subject: ocfs2: Assign feature bits and system inodes to quota feature and quota files Signed-off-by: Jan Kara Signed-off-by: Mark Fasheh --- fs/Kconfig | 2 ++ fs/ocfs2/inode.c | 2 ++ fs/ocfs2/ocfs2_fs.h | 21 ++++++++++++++++++--- fs/ocfs2/super.c | 17 +++++++++++++++++ 4 files changed, 39 insertions(+), 3 deletions(-) (limited to 'fs/ocfs2') diff --git a/fs/Kconfig b/fs/Kconfig index c1ce3d8831d..f9b6e2979aa 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -189,6 +189,8 @@ config OCFS2_FS select CONFIGFS_FS select JBD2 select CRC32 + select QUOTA + select QUOTA_TREE help OCFS2 is a general purpose extent based shared disk cluster file system with many similarities to ext3. It supports 64 bit inode diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index ec3497bafda..ec25d998419 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c @@ -283,6 +283,8 @@ void ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe, mlog(0, "local alloc inode: i_ino=%lu\n", inode->i_ino); } else if (fe->i_flags & cpu_to_le32(OCFS2_BITMAP_FL)) { OCFS2_I(inode)->ip_flags |= OCFS2_INODE_BITMAP; + } else if (fe->i_flags & cpu_to_le32(OCFS2_QUOTA_FL)) { + inode->i_flags |= S_NOQUOTA; } else if (fe->i_flags & cpu_to_le32(OCFS2_SUPER_BLOCK_FL)) { mlog(0, "superblock inode: i_ino=%lu\n", inode->i_ino); /* we can't actually hit this as read_inode can't diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h index 5e0c0d0aef7..06e3bd632ff 100644 --- a/fs/ocfs2/ocfs2_fs.h +++ b/fs/ocfs2/ocfs2_fs.h @@ -94,7 +94,7 @@ | OCFS2_FEATURE_INCOMPAT_EXTENDED_SLOT_MAP \ | OCFS2_FEATURE_INCOMPAT_USERSPACE_STACK \ | OCFS2_FEATURE_INCOMPAT_XATTR) -#define OCFS2_FEATURE_RO_COMPAT_SUPP OCFS2_FEATURE_RO_COMPAT_UNWRITTEN +#define OCFS2_FEATURE_RO_COMPAT_SUPP (OCFS2_FEATURE_RO_COMPAT_UNWRITTEN) /* * Heartbeat-only devices are missing journals and other files. The @@ -163,6 +163,12 @@ */ #define OCFS2_FEATURE_RO_COMPAT_UNWRITTEN 0x0001 +/* + * Maintain quota information for this filesystem + */ +#define OCFS2_FEATURE_RO_COMPAT_USRQUOTA 0x0002 +#define OCFS2_FEATURE_RO_COMPAT_GRPQUOTA 0x0004 + /* The byte offset of the first backup block will be 1G. * The following will be 4G, 16G, 64G, 256G and 1T. */ @@ -192,6 +198,7 @@ #define OCFS2_HEARTBEAT_FL (0x00000200) /* Heartbeat area */ #define OCFS2_CHAIN_FL (0x00000400) /* Chain allocator */ #define OCFS2_DEALLOC_FL (0x00000800) /* Truncate log */ +#define OCFS2_QUOTA_FL (0x00001000) /* Quota file */ /* * Flags on ocfs2_dinode.i_dyn_features @@ -329,13 +336,17 @@ enum { #define OCFS2_FIRST_ONLINE_SYSTEM_INODE SLOT_MAP_SYSTEM_INODE HEARTBEAT_SYSTEM_INODE, GLOBAL_BITMAP_SYSTEM_INODE, -#define OCFS2_LAST_GLOBAL_SYSTEM_INODE GLOBAL_BITMAP_SYSTEM_INODE + USER_QUOTA_SYSTEM_INODE, + GROUP_QUOTA_SYSTEM_INODE, +#define OCFS2_LAST_GLOBAL_SYSTEM_INODE GROUP_QUOTA_SYSTEM_INODE ORPHAN_DIR_SYSTEM_INODE, EXTENT_ALLOC_SYSTEM_INODE, INODE_ALLOC_SYSTEM_INODE, JOURNAL_SYSTEM_INODE, LOCAL_ALLOC_SYSTEM_INODE, TRUNCATE_LOG_SYSTEM_INODE, + LOCAL_USER_QUOTA_SYSTEM_INODE, + LOCAL_GROUP_QUOTA_SYSTEM_INODE, NUM_SYSTEM_INODES }; @@ -349,6 +360,8 @@ static struct ocfs2_system_inode_info ocfs2_system_inodes[NUM_SYSTEM_INODES] = { [SLOT_MAP_SYSTEM_INODE] = { "slot_map", 0, S_IFREG | 0644 }, [HEARTBEAT_SYSTEM_INODE] = { "heartbeat", OCFS2_HEARTBEAT_FL, S_IFREG | 0644 }, [GLOBAL_BITMAP_SYSTEM_INODE] = { "global_bitmap", 0, S_IFREG | 0644 }, + [USER_QUOTA_SYSTEM_INODE] = { "aquota.user", OCFS2_QUOTA_FL, S_IFREG | 0644 }, + [GROUP_QUOTA_SYSTEM_INODE] = { "aquota.group", OCFS2_QUOTA_FL, S_IFREG | 0644 }, /* Slot-specific system inodes (one copy per slot) */ [ORPHAN_DIR_SYSTEM_INODE] = { "orphan_dir:%04d", 0, S_IFDIR | 0755 }, @@ -356,7 +369,9 @@ static struct ocfs2_system_inode_info ocfs2_system_inodes[NUM_SYSTEM_INODES] = { [INODE_ALLOC_SYSTEM_INODE] = { "inode_alloc:%04d", OCFS2_BITMAP_FL | OCFS2_CHAIN_FL, S_IFREG | 0644 }, [JOURNAL_SYSTEM_INODE] = { "journal:%04d", OCFS2_JOURNAL_FL, S_IFREG | 0644 }, [LOCAL_ALLOC_SYSTEM_INODE] = { "local_alloc:%04d", OCFS2_BITMAP_FL | OCFS2_LOCAL_ALLOC_FL, S_IFREG | 0644 }, - [TRUNCATE_LOG_SYSTEM_INODE] = { "truncate_log:%04d", OCFS2_DEALLOC_FL, S_IFREG | 0644 } + [TRUNCATE_LOG_SYSTEM_INODE] = { "truncate_log:%04d", OCFS2_DEALLOC_FL, S_IFREG | 0644 }, + [LOCAL_USER_QUOTA_SYSTEM_INODE] = { "aquota.user:%04d", OCFS2_QUOTA_FL, S_IFREG | 0644 }, + [LOCAL_GROUP_QUOTA_SYSTEM_INODE] = { "aquota.group:%04d", OCFS2_QUOTA_FL, S_IFREG | 0644 }, }; /* Parameter passed from mount.ocfs2 to module */ diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 9e7accc68b4..41bb0197cf4 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -225,6 +225,19 @@ static int ocfs2_sync_fs(struct super_block *sb, int wait) return 0; } +static int ocfs2_need_system_inode(struct ocfs2_super *osb, int ino) +{ + if (!OCFS2_HAS_RO_COMPAT_FEATURE(osb->sb, OCFS2_FEATURE_RO_COMPAT_USRQUOTA) + && (ino == USER_QUOTA_SYSTEM_INODE + || ino == LOCAL_USER_QUOTA_SYSTEM_INODE)) + return 0; + if (!OCFS2_HAS_RO_COMPAT_FEATURE(osb->sb, OCFS2_FEATURE_RO_COMPAT_GRPQUOTA) + && (ino == GROUP_QUOTA_SYSTEM_INODE + || ino == LOCAL_GROUP_QUOTA_SYSTEM_INODE)) + return 0; + return 1; +} + static int ocfs2_init_global_system_inodes(struct ocfs2_super *osb) { struct inode *new = NULL; @@ -251,6 +264,8 @@ static int ocfs2_init_global_system_inodes(struct ocfs2_super *osb) for (i = OCFS2_FIRST_ONLINE_SYSTEM_INODE; i <= OCFS2_LAST_GLOBAL_SYSTEM_INODE; i++) { + if (!ocfs2_need_system_inode(osb, i)) + continue; new = ocfs2_get_system_file_inode(osb, i, osb->slot_num); if (!new) { ocfs2_release_system_inodes(osb); @@ -281,6 +296,8 @@ static int ocfs2_init_local_system_inodes(struct ocfs2_super *osb) for (i = OCFS2_LAST_GLOBAL_SYSTEM_INODE + 1; i < NUM_SYSTEM_INODES; i++) { + if (!ocfs2_need_system_inode(osb, i)) + continue; new = ocfs2_get_system_file_inode(osb, i, osb->slot_num); if (!new) { ocfs2_release_system_inodes(osb); -- cgit v1.2.3-70-g09d2 From bbbd0eb34bf801dee01e345785959a75258f6567 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 21 Aug 2008 18:22:30 +0200 Subject: ocfs2: Mark system files as not subject to quota accounting Mark system files as not subject to quota accounting. This prevents possible recursions into quota code and thus deadlocks. Signed-off-by: Jan Kara Signed-off-by: Mark Fasheh --- fs/ocfs2/inode.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index ec25d998419..50dbc486ef7 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c @@ -275,8 +275,10 @@ void ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe, inode->i_nlink = le16_to_cpu(fe->i_links_count); - if (fe->i_flags & cpu_to_le32(OCFS2_SYSTEM_FL)) + if (fe->i_flags & cpu_to_le32(OCFS2_SYSTEM_FL)) { OCFS2_I(inode)->ip_flags |= OCFS2_INODE_SYSTEM_FILE; + inode->i_flags |= S_NOQUOTA; + } if (fe->i_flags & cpu_to_le32(OCFS2_LOCAL_ALLOC_FL)) { OCFS2_I(inode)->ip_flags |= OCFS2_INODE_BITMAP; -- cgit v1.2.3-70-g09d2 From 9e33d69f553aaf11377307e8d6f82deb3385e351 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 25 Aug 2008 19:56:50 +0200 Subject: ocfs2: Implementation of local and global quota file handling For each quota type each node has local quota file. In this file it stores changes users have made to disk usage via this node. Once in a while this information is synced to global file (and thus with other nodes) so that limits enforcement at least aproximately works. Global quota files contain all the information about usage and limits. It's mostly handled by the generic VFS code (which implements a trie of structures inside a quota file). We only have to provide functions to convert structures from on-disk format to in-memory one. We also have to provide wrappers for various quota functions starting transactions and acquiring necessary cluster locks before the actual IO is really started. Signed-off-by: Jan Kara Signed-off-by: Mark Fasheh --- fs/ocfs2/Makefile | 2 + fs/ocfs2/cluster/masklog.h | 1 + fs/ocfs2/dlmglue.c | 146 +++++++ fs/ocfs2/dlmglue.h | 19 + fs/ocfs2/file.c | 6 +- fs/ocfs2/file.h | 3 + fs/ocfs2/inode.h | 2 + fs/ocfs2/ocfs2_fs.h | 103 +++++ fs/ocfs2/ocfs2_lockid.h | 5 + fs/ocfs2/quota.h | 93 +++++ fs/ocfs2/quota_global.c | 919 +++++++++++++++++++++++++++++++++++++++++++++ fs/ocfs2/quota_local.c | 833 ++++++++++++++++++++++++++++++++++++++++ fs/ocfs2/super.c | 38 +- 13 files changed, 2165 insertions(+), 5 deletions(-) create mode 100644 fs/ocfs2/quota.h create mode 100644 fs/ocfs2/quota_global.c create mode 100644 fs/ocfs2/quota_local.c (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/Makefile b/fs/ocfs2/Makefile index e9ef5d162db..7e4b361b755 100644 --- a/fs/ocfs2/Makefile +++ b/fs/ocfs2/Makefile @@ -35,6 +35,8 @@ ocfs2-objs := \ sysfile.o \ uptodate.o \ ver.o \ + quota_local.o \ + quota_global.o \ xattr.o ifeq ($(CONFIG_OCFS2_FS_POSIX_ACL),y) diff --git a/fs/ocfs2/cluster/masklog.h b/fs/ocfs2/cluster/masklog.h index 57670c68047..7e72a81bc2d 100644 --- a/fs/ocfs2/cluster/masklog.h +++ b/fs/ocfs2/cluster/masklog.h @@ -113,6 +113,7 @@ #define ML_QUORUM 0x0000000008000000ULL /* net connection quorum */ #define ML_EXPORT 0x0000000010000000ULL /* ocfs2 export operations */ #define ML_XATTR 0x0000000020000000ULL /* ocfs2 extended attributes */ +#define ML_QUOTA 0x0000000040000000ULL /* ocfs2 quota operations */ /* bits that are infrequently given and frequently matched in the high word */ #define ML_ERROR 0x0000000100000000ULL /* sent to KERN_ERR */ #define ML_NOTICE 0x0000000200000000ULL /* setn to KERN_NOTICE */ diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 9f2a7f75d1b..058aa86490a 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -32,6 +32,7 @@ #include #include #include +#include #define MLOG_MASK_PREFIX ML_DLM_GLUE #include @@ -51,6 +52,7 @@ #include "slot_map.h" #include "super.h" #include "uptodate.h" +#include "quota.h" #include "buffer_head_io.h" @@ -68,6 +70,7 @@ struct ocfs2_mask_waiter { static struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres); static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres); static struct ocfs2_super *ocfs2_get_file_osb(struct ocfs2_lock_res *lockres); +static struct ocfs2_super *ocfs2_get_qinfo_osb(struct ocfs2_lock_res *lockres); /* * Return value from ->downconvert_worker functions. @@ -102,6 +105,7 @@ static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres, static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb, struct ocfs2_lock_res *lockres); +static void ocfs2_set_qinfo_lvb(struct ocfs2_lock_res *lockres); #define mlog_meta_lvb(__level, __lockres) ocfs2_dump_meta_lvb_info(__level, __PRETTY_FUNCTION__, __LINE__, __lockres) @@ -258,6 +262,12 @@ static struct ocfs2_lock_res_ops ocfs2_flock_lops = { .flags = 0, }; +static struct ocfs2_lock_res_ops ocfs2_qinfo_lops = { + .set_lvb = ocfs2_set_qinfo_lvb, + .get_osb = ocfs2_get_qinfo_osb, + .flags = LOCK_TYPE_REQUIRES_REFRESH | LOCK_TYPE_USES_LVB, +}; + static inline int ocfs2_is_inode_lock(struct ocfs2_lock_res *lockres) { return lockres->l_type == OCFS2_LOCK_TYPE_META || @@ -279,6 +289,13 @@ static inline struct ocfs2_dentry_lock *ocfs2_lock_res_dl(struct ocfs2_lock_res return (struct ocfs2_dentry_lock *)lockres->l_priv; } +static inline struct ocfs2_mem_dqinfo *ocfs2_lock_res_qinfo(struct ocfs2_lock_res *lockres) +{ + BUG_ON(lockres->l_type != OCFS2_LOCK_TYPE_QINFO); + + return (struct ocfs2_mem_dqinfo *)lockres->l_priv; +} + static inline struct ocfs2_super *ocfs2_get_lockres_osb(struct ocfs2_lock_res *lockres) { if (lockres->l_ops->get_osb) @@ -507,6 +524,13 @@ static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres) return OCFS2_SB(inode->i_sb); } +static struct ocfs2_super *ocfs2_get_qinfo_osb(struct ocfs2_lock_res *lockres) +{ + struct ocfs2_mem_dqinfo *info = lockres->l_priv; + + return OCFS2_SB(info->dqi_gi.dqi_sb); +} + static struct ocfs2_super *ocfs2_get_file_osb(struct ocfs2_lock_res *lockres) { struct ocfs2_file_private *fp = lockres->l_priv; @@ -609,6 +633,17 @@ void ocfs2_file_lock_res_init(struct ocfs2_lock_res *lockres, lockres->l_flags |= OCFS2_LOCK_NOCACHE; } +void ocfs2_qinfo_lock_res_init(struct ocfs2_lock_res *lockres, + struct ocfs2_mem_dqinfo *info) +{ + ocfs2_lock_res_init_once(lockres); + ocfs2_build_lock_name(OCFS2_LOCK_TYPE_QINFO, info->dqi_gi.dqi_type, + 0, lockres->l_name); + ocfs2_lock_res_init_common(OCFS2_SB(info->dqi_gi.dqi_sb), lockres, + OCFS2_LOCK_TYPE_QINFO, &ocfs2_qinfo_lops, + info); +} + void ocfs2_lock_res_free(struct ocfs2_lock_res *res) { mlog_entry_void(); @@ -3445,6 +3480,117 @@ static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres, return UNBLOCK_CONTINUE_POST; } +static void ocfs2_set_qinfo_lvb(struct ocfs2_lock_res *lockres) +{ + struct ocfs2_qinfo_lvb *lvb; + struct ocfs2_mem_dqinfo *oinfo = ocfs2_lock_res_qinfo(lockres); + struct mem_dqinfo *info = sb_dqinfo(oinfo->dqi_gi.dqi_sb, + oinfo->dqi_gi.dqi_type); + + mlog_entry_void(); + + lvb = (struct ocfs2_qinfo_lvb *)ocfs2_dlm_lvb(&lockres->l_lksb); + lvb->lvb_version = OCFS2_QINFO_LVB_VERSION; + lvb->lvb_bgrace = cpu_to_be32(info->dqi_bgrace); + lvb->lvb_igrace = cpu_to_be32(info->dqi_igrace); + lvb->lvb_syncms = cpu_to_be32(oinfo->dqi_syncms); + lvb->lvb_blocks = cpu_to_be32(oinfo->dqi_gi.dqi_blocks); + lvb->lvb_free_blk = cpu_to_be32(oinfo->dqi_gi.dqi_free_blk); + lvb->lvb_free_entry = cpu_to_be32(oinfo->dqi_gi.dqi_free_entry); + + mlog_exit_void(); +} + +void ocfs2_qinfo_unlock(struct ocfs2_mem_dqinfo *oinfo, int ex) +{ + struct ocfs2_lock_res *lockres = &oinfo->dqi_gqlock; + struct ocfs2_super *osb = OCFS2_SB(oinfo->dqi_gi.dqi_sb); + int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; + + mlog_entry_void(); + if (!ocfs2_is_hard_readonly(osb) && !ocfs2_mount_local(osb)) + ocfs2_cluster_unlock(osb, lockres, level); + mlog_exit_void(); +} + +static int ocfs2_refresh_qinfo(struct ocfs2_mem_dqinfo *oinfo) +{ + struct mem_dqinfo *info = sb_dqinfo(oinfo->dqi_gi.dqi_sb, + oinfo->dqi_gi.dqi_type); + struct ocfs2_lock_res *lockres = &oinfo->dqi_gqlock; + struct ocfs2_qinfo_lvb *lvb = ocfs2_dlm_lvb(&lockres->l_lksb); + struct buffer_head *bh; + struct ocfs2_global_disk_dqinfo *gdinfo; + int status = 0; + + if (lvb->lvb_version == OCFS2_QINFO_LVB_VERSION) { + info->dqi_bgrace = be32_to_cpu(lvb->lvb_bgrace); + info->dqi_igrace = be32_to_cpu(lvb->lvb_igrace); + oinfo->dqi_syncms = be32_to_cpu(lvb->lvb_syncms); + oinfo->dqi_gi.dqi_blocks = be32_to_cpu(lvb->lvb_blocks); + oinfo->dqi_gi.dqi_free_blk = be32_to_cpu(lvb->lvb_free_blk); + oinfo->dqi_gi.dqi_free_entry = + be32_to_cpu(lvb->lvb_free_entry); + } else { + bh = ocfs2_read_quota_block(oinfo->dqi_gqinode, 0, &status); + if (!bh) { + mlog_errno(status); + goto bail; + } + gdinfo = (struct ocfs2_global_disk_dqinfo *) + (bh->b_data + OCFS2_GLOBAL_INFO_OFF); + info->dqi_bgrace = le32_to_cpu(gdinfo->dqi_bgrace); + info->dqi_igrace = le32_to_cpu(gdinfo->dqi_igrace); + oinfo->dqi_syncms = le32_to_cpu(gdinfo->dqi_syncms); + oinfo->dqi_gi.dqi_blocks = le32_to_cpu(gdinfo->dqi_blocks); + oinfo->dqi_gi.dqi_free_blk = le32_to_cpu(gdinfo->dqi_free_blk); + oinfo->dqi_gi.dqi_free_entry = + le32_to_cpu(gdinfo->dqi_free_entry); + brelse(bh); + ocfs2_track_lock_refresh(lockres); + } + +bail: + return status; +} + +/* Lock quota info, this function expects at least shared lock on the quota file + * so that we can safely refresh quota info from disk. */ +int ocfs2_qinfo_lock(struct ocfs2_mem_dqinfo *oinfo, int ex) +{ + struct ocfs2_lock_res *lockres = &oinfo->dqi_gqlock; + struct ocfs2_super *osb = OCFS2_SB(oinfo->dqi_gi.dqi_sb); + int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; + int status = 0; + + mlog_entry_void(); + + /* On RO devices, locking really isn't needed... */ + if (ocfs2_is_hard_readonly(osb)) { + if (ex) + status = -EROFS; + goto bail; + } + if (ocfs2_mount_local(osb)) + goto bail; + + status = ocfs2_cluster_lock(osb, lockres, level, 0, 0); + if (status < 0) { + mlog_errno(status); + goto bail; + } + if (!ocfs2_should_refresh_lock_res(lockres)) + goto bail; + /* OK, we have the lock but we need to refresh the quota info */ + status = ocfs2_refresh_qinfo(oinfo); + if (status) + ocfs2_qinfo_unlock(oinfo, ex); + ocfs2_complete_lock_res_refresh(lockres, status); +bail: + mlog_exit(status); + return status; +} + /* * This is the filesystem locking protocol. It provides the lock handling * hooks for the underlying DLM. It has a maximum version number. diff --git a/fs/ocfs2/dlmglue.h b/fs/ocfs2/dlmglue.h index 2bb01f09c1b..3f8d9986b8e 100644 --- a/fs/ocfs2/dlmglue.h +++ b/fs/ocfs2/dlmglue.h @@ -49,6 +49,19 @@ struct ocfs2_meta_lvb { __be32 lvb_reserved2; }; +#define OCFS2_QINFO_LVB_VERSION 1 + +struct ocfs2_qinfo_lvb { + __u8 lvb_version; + __u8 lvb_reserved[3]; + __be32 lvb_bgrace; + __be32 lvb_igrace; + __be32 lvb_syncms; + __be32 lvb_blocks; + __be32 lvb_free_blk; + __be32 lvb_free_entry; +}; + /* ocfs2_inode_lock_full() 'arg_flags' flags */ /* don't wait on recovery. */ #define OCFS2_META_LOCK_RECOVERY (0x01) @@ -69,6 +82,9 @@ void ocfs2_dentry_lock_res_init(struct ocfs2_dentry_lock *dl, struct ocfs2_file_private; void ocfs2_file_lock_res_init(struct ocfs2_lock_res *lockres, struct ocfs2_file_private *fp); +struct ocfs2_mem_dqinfo; +void ocfs2_qinfo_lock_res_init(struct ocfs2_lock_res *lockres, + struct ocfs2_mem_dqinfo *info); void ocfs2_lock_res_free(struct ocfs2_lock_res *res); int ocfs2_create_new_inode_locks(struct inode *inode); int ocfs2_drop_inode_locks(struct inode *inode); @@ -103,6 +119,9 @@ int ocfs2_dentry_lock(struct dentry *dentry, int ex); void ocfs2_dentry_unlock(struct dentry *dentry, int ex); int ocfs2_file_lock(struct file *file, int ex, int trylock); void ocfs2_file_unlock(struct file *file); +int ocfs2_qinfo_lock(struct ocfs2_mem_dqinfo *oinfo, int ex); +void ocfs2_qinfo_unlock(struct ocfs2_mem_dqinfo *oinfo, int ex); + void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res *lockres); void ocfs2_simple_drop_lockres(struct ocfs2_super *osb, diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 41001d515fa..372d96505a7 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -304,9 +304,9 @@ bail: return status; } -static int ocfs2_simple_size_update(struct inode *inode, - struct buffer_head *di_bh, - u64 new_i_size) +int ocfs2_simple_size_update(struct inode *inode, + struct buffer_head *di_bh, + u64 new_i_size) { int ret; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); diff --git a/fs/ocfs2/file.h b/fs/ocfs2/file.h index e92382cbca5..172f9fbc9fc 100644 --- a/fs/ocfs2/file.h +++ b/fs/ocfs2/file.h @@ -51,6 +51,9 @@ int ocfs2_add_inode_data(struct ocfs2_super *osb, struct ocfs2_alloc_context *data_ac, struct ocfs2_alloc_context *meta_ac, enum ocfs2_alloc_restarted *reason_ret); +int ocfs2_simple_size_update(struct inode *inode, + struct buffer_head *di_bh, + u64 new_i_size); int ocfs2_extend_no_holes(struct inode *inode, u64 new_i_size, u64 zero_to); int ocfs2_setattr(struct dentry *dentry, struct iattr *attr); diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h index b79c371a9d2..eb3c302b38d 100644 --- a/fs/ocfs2/inode.h +++ b/fs/ocfs2/inode.h @@ -142,6 +142,8 @@ int ocfs2_mark_inode_dirty(handle_t *handle, struct buffer_head *bh); int ocfs2_aio_read(struct file *file, struct kiocb *req, struct iocb *iocb); int ocfs2_aio_write(struct file *file, struct kiocb *req, struct iocb *iocb); +struct buffer_head *ocfs2_bread(struct inode *inode, + int block, int *err, int reada); void ocfs2_set_inode_flags(struct inode *inode); void ocfs2_get_inode_flags(struct ocfs2_inode_info *oi); diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h index 06e3bd632ff..0a5ac790a62 100644 --- a/fs/ocfs2/ocfs2_fs.h +++ b/fs/ocfs2/ocfs2_fs.h @@ -883,6 +883,109 @@ static inline int ocfs2_xattr_get_type(struct ocfs2_xattr_entry *xe) return xe->xe_type & OCFS2_XATTR_TYPE_MASK; } +/* + * On disk structures for global quota file + */ + +/* Magic numbers and known versions for global quota files */ +#define OCFS2_GLOBAL_QMAGICS {\ + 0x0cf52470, /* USRQUOTA */ \ + 0x0cf52471 /* GRPQUOTA */ \ +} + +#define OCFS2_GLOBAL_QVERSIONS {\ + 0, \ + 0, \ +} + + +/* Each block of each quota file has a certain fixed number of bytes reserved + * for OCFS2 internal use at its end. OCFS2 can use it for things like + * checksums, etc. */ +#define OCFS2_QBLK_RESERVED_SPACE 8 + +/* Generic header of all quota files */ +struct ocfs2_disk_dqheader { + __le32 dqh_magic; /* Magic number identifying file */ + __le32 dqh_version; /* Quota format version */ +}; + +#define OCFS2_GLOBAL_INFO_OFF (sizeof(struct ocfs2_disk_dqheader)) + +/* Information header of global quota file (immediately follows the generic + * header) */ +struct ocfs2_global_disk_dqinfo { +/*00*/ __le32 dqi_bgrace; /* Grace time for space softlimit excess */ + __le32 dqi_igrace; /* Grace time for inode softlimit excess */ + __le32 dqi_syncms; /* Time after which we sync local changes to + * global quota file */ + __le32 dqi_blocks; /* Number of blocks in quota file */ +/*10*/ __le32 dqi_free_blk; /* First free block in quota file */ + __le32 dqi_free_entry; /* First block with free dquot entry in quota + * file */ +}; + +/* Structure with global user / group information. We reserve some space + * for future use. */ +struct ocfs2_global_disk_dqblk { +/*00*/ __le32 dqb_id; /* ID the structure belongs to */ + __le32 dqb_use_count; /* Number of nodes having reference to this structure */ + __le64 dqb_ihardlimit; /* absolute limit on allocated inodes */ +/*10*/ __le64 dqb_isoftlimit; /* preferred inode limit */ + __le64 dqb_curinodes; /* current # allocated inodes */ +/*20*/ __le64 dqb_bhardlimit; /* absolute limit on disk space */ + __le64 dqb_bsoftlimit; /* preferred limit on disk space */ +/*30*/ __le64 dqb_curspace; /* current space occupied */ + __le64 dqb_btime; /* time limit for excessive disk use */ +/*40*/ __le64 dqb_itime; /* time limit for excessive inode use */ + __le64 dqb_pad1; +/*50*/ __le64 dqb_pad2; +}; + +/* + * On-disk structures for local quota file + */ + +/* Magic numbers and known versions for local quota files */ +#define OCFS2_LOCAL_QMAGICS {\ + 0x0cf524c0, /* USRQUOTA */ \ + 0x0cf524c1 /* GRPQUOTA */ \ +} + +#define OCFS2_LOCAL_QVERSIONS {\ + 0, \ + 0, \ +} + +/* Quota flags in dqinfo header */ +#define OLQF_CLEAN 0x0001 /* Quota file is empty (this should be after\ + * quota has been cleanly turned off) */ + +#define OCFS2_LOCAL_INFO_OFF (sizeof(struct ocfs2_disk_dqheader)) + +/* Information header of local quota file (immediately follows the generic + * header) */ +struct ocfs2_local_disk_dqinfo { + __le32 dqi_flags; /* Flags for quota file */ + __le32 dqi_chunks; /* Number of chunks of quota structures + * with a bitmap */ + __le32 dqi_blocks; /* Number of blocks allocated for quota file */ +}; + +/* Header of one chunk of a quota file */ +struct ocfs2_local_disk_chunk { + __le32 dqc_free; /* Number of free entries in the bitmap */ + u8 dqc_bitmap[0]; /* Bitmap of entries in the corresponding + * chunk of quota file */ +}; + +/* One entry in local quota file */ +struct ocfs2_local_disk_dqblk { +/*00*/ __le64 dqb_id; /* id this quota applies to */ + __le64 dqb_spacemod; /* Change in the amount of used space */ +/*10*/ __le64 dqb_inodemod; /* Change in the amount of used inodes */ +}; + #ifdef __KERNEL__ static inline int ocfs2_fast_symlink_chars(struct super_block *sb) { diff --git a/fs/ocfs2/ocfs2_lockid.h b/fs/ocfs2/ocfs2_lockid.h index 82c200f7a8f..eb6f50c9cec 100644 --- a/fs/ocfs2/ocfs2_lockid.h +++ b/fs/ocfs2/ocfs2_lockid.h @@ -46,6 +46,7 @@ enum ocfs2_lock_type { OCFS2_LOCK_TYPE_DENTRY, OCFS2_LOCK_TYPE_OPEN, OCFS2_LOCK_TYPE_FLOCK, + OCFS2_LOCK_TYPE_QINFO, OCFS2_NUM_LOCK_TYPES }; @@ -77,6 +78,9 @@ static inline char ocfs2_lock_type_char(enum ocfs2_lock_type type) case OCFS2_LOCK_TYPE_FLOCK: c = 'F'; break; + case OCFS2_LOCK_TYPE_QINFO: + c = 'Q'; + break; default: c = '\0'; } @@ -95,6 +99,7 @@ static char *ocfs2_lock_type_strings[] = { [OCFS2_LOCK_TYPE_DENTRY] = "Dentry", [OCFS2_LOCK_TYPE_OPEN] = "Open", [OCFS2_LOCK_TYPE_FLOCK] = "Flock", + [OCFS2_LOCK_TYPE_QINFO] = "Quota", }; static inline const char *ocfs2_lock_type_string(enum ocfs2_lock_type type) diff --git a/fs/ocfs2/quota.h b/fs/ocfs2/quota.h new file mode 100644 index 00000000000..1f1c86311b3 --- /dev/null +++ b/fs/ocfs2/quota.h @@ -0,0 +1,93 @@ +/* + * quota.h for OCFS2 + * + * On disk quota structures for local and global quota file, in-memory + * structures. + * + */ + +#ifndef _OCFS2_QUOTA_H +#define _OCFS2_QUOTA_H + +#include +#include +#include +#include +#include + +#include "ocfs2.h" + +/* Common stuff */ +/* id number of quota format */ +#define QFMT_OCFS2 3 + +/* + * In-memory structures + */ +struct ocfs2_dquot { + struct dquot dq_dquot; /* Generic VFS dquot */ + loff_t dq_local_off; /* Offset in the local quota file */ + struct ocfs2_quota_chunk *dq_chunk; /* Chunk dquot is in */ + unsigned int dq_use_count; /* Number of nodes having reference to this entry in global quota file */ + s64 dq_origspace; /* Last globally synced space usage */ + s64 dq_originodes; /* Last globally synced inode usage */ +}; + +/* In-memory structure with quota header information */ +struct ocfs2_mem_dqinfo { + unsigned int dqi_type; /* Quota type this structure describes */ + unsigned int dqi_chunks; /* Number of chunks in local quota file */ + unsigned int dqi_blocks; /* Number of blocks allocated for local quota file */ + unsigned int dqi_syncms; /* How often should we sync with other nodes */ + struct list_head dqi_chunk; /* List of chunks */ + struct inode *dqi_gqinode; /* Global quota file inode */ + struct ocfs2_lock_res dqi_gqlock; /* Lock protecting quota information structure */ + struct buffer_head *dqi_gqi_bh; /* Buffer head with global quota file inode - set only if inode lock is obtained */ + int dqi_gqi_count; /* Number of holders of dqi_gqi_bh */ + struct buffer_head *dqi_lqi_bh; /* Buffer head with local quota file inode */ + struct buffer_head *dqi_ibh; /* Buffer with information header */ + struct qtree_mem_dqinfo dqi_gi; /* Info about global file */ +}; + +static inline struct ocfs2_dquot *OCFS2_DQUOT(struct dquot *dquot) +{ + return container_of(dquot, struct ocfs2_dquot, dq_dquot); +} + +struct ocfs2_quota_chunk { + struct list_head qc_chunk; /* List of quotafile chunks */ + int qc_num; /* Number of quota chunk */ + struct buffer_head *qc_headerbh; /* Buffer head with chunk header */ +}; + +extern struct kmem_cache *ocfs2_dquot_cachep; +extern struct kmem_cache *ocfs2_qf_chunk_cachep; + +extern struct qtree_fmt_operations ocfs2_global_ops; + +ssize_t ocfs2_quota_read(struct super_block *sb, int type, char *data, + size_t len, loff_t off); +ssize_t ocfs2_quota_write(struct super_block *sb, int type, + const char *data, size_t len, loff_t off); +int ocfs2_global_read_info(struct super_block *sb, int type); +int ocfs2_global_write_info(struct super_block *sb, int type); +int ocfs2_global_read_dquot(struct dquot *dquot); +int __ocfs2_sync_dquot(struct dquot *dquot, int freeing); +static inline int ocfs2_sync_dquot(struct dquot *dquot) +{ + return __ocfs2_sync_dquot(dquot, 0); +} +static inline int ocfs2_global_release_dquot(struct dquot *dquot) +{ + return __ocfs2_sync_dquot(dquot, 1); +} + +int ocfs2_lock_global_qf(struct ocfs2_mem_dqinfo *oinfo, int ex); +void ocfs2_unlock_global_qf(struct ocfs2_mem_dqinfo *oinfo, int ex); +struct buffer_head *ocfs2_read_quota_block(struct inode *inode, + int block, int *err); + +extern struct dquot_operations ocfs2_quota_operations; +extern struct quota_format_type ocfs2_quota_format; + +#endif /* _OCFS2_QUOTA_H */ diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c new file mode 100644 index 00000000000..af8340c4536 --- /dev/null +++ b/fs/ocfs2/quota_global.c @@ -0,0 +1,919 @@ +/* + * Implementation of operations over global quota file + */ +#include +#include +#include +#include + +#define MLOG_MASK_PREFIX ML_QUOTA +#include + +#include "ocfs2_fs.h" +#include "ocfs2.h" +#include "alloc.h" +#include "inode.h" +#include "journal.h" +#include "file.h" +#include "sysfile.h" +#include "dlmglue.h" +#include "uptodate.h" +#include "quota.h" + +static void ocfs2_global_disk2memdqb(struct dquot *dquot, void *dp) +{ + struct ocfs2_global_disk_dqblk *d = dp; + struct mem_dqblk *m = &dquot->dq_dqb; + + /* Update from disk only entries not set by the admin */ + if (!test_bit(DQ_LASTSET_B + QIF_ILIMITS_B, &dquot->dq_flags)) { + m->dqb_ihardlimit = le64_to_cpu(d->dqb_ihardlimit); + m->dqb_isoftlimit = le64_to_cpu(d->dqb_isoftlimit); + } + if (!test_bit(DQ_LASTSET_B + QIF_INODES_B, &dquot->dq_flags)) + m->dqb_curinodes = le64_to_cpu(d->dqb_curinodes); + if (!test_bit(DQ_LASTSET_B + QIF_BLIMITS_B, &dquot->dq_flags)) { + m->dqb_bhardlimit = le64_to_cpu(d->dqb_bhardlimit); + m->dqb_bsoftlimit = le64_to_cpu(d->dqb_bsoftlimit); + } + if (!test_bit(DQ_LASTSET_B + QIF_SPACE_B, &dquot->dq_flags)) + m->dqb_curspace = le64_to_cpu(d->dqb_curspace); + if (!test_bit(DQ_LASTSET_B + QIF_BTIME_B, &dquot->dq_flags)) + m->dqb_btime = le64_to_cpu(d->dqb_btime); + if (!test_bit(DQ_LASTSET_B + QIF_ITIME_B, &dquot->dq_flags)) + m->dqb_itime = le64_to_cpu(d->dqb_itime); + OCFS2_DQUOT(dquot)->dq_use_count = le32_to_cpu(d->dqb_use_count); +} + +static void ocfs2_global_mem2diskdqb(void *dp, struct dquot *dquot) +{ + struct ocfs2_global_disk_dqblk *d = dp; + struct mem_dqblk *m = &dquot->dq_dqb; + + d->dqb_id = cpu_to_le32(dquot->dq_id); + d->dqb_use_count = cpu_to_le32(OCFS2_DQUOT(dquot)->dq_use_count); + d->dqb_ihardlimit = cpu_to_le64(m->dqb_ihardlimit); + d->dqb_isoftlimit = cpu_to_le64(m->dqb_isoftlimit); + d->dqb_curinodes = cpu_to_le64(m->dqb_curinodes); + d->dqb_bhardlimit = cpu_to_le64(m->dqb_bhardlimit); + d->dqb_bsoftlimit = cpu_to_le64(m->dqb_bsoftlimit); + d->dqb_curspace = cpu_to_le64(m->dqb_curspace); + d->dqb_btime = cpu_to_le64(m->dqb_btime); + d->dqb_itime = cpu_to_le64(m->dqb_itime); +} + +static int ocfs2_global_is_id(void *dp, struct dquot *dquot) +{ + struct ocfs2_global_disk_dqblk *d = dp; + struct ocfs2_mem_dqinfo *oinfo = + sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv; + + if (qtree_entry_unused(&oinfo->dqi_gi, dp)) + return 0; + return le32_to_cpu(d->dqb_id) == dquot->dq_id; +} + +struct qtree_fmt_operations ocfs2_global_ops = { + .mem2disk_dqblk = ocfs2_global_mem2diskdqb, + .disk2mem_dqblk = ocfs2_global_disk2memdqb, + .is_id = ocfs2_global_is_id, +}; + + +struct buffer_head *ocfs2_read_quota_block(struct inode *inode, + int block, int *err) +{ + struct buffer_head *tmp = NULL; + + *err = ocfs2_read_virt_blocks(inode, block, 1, &tmp, 0, NULL); + if (*err) + mlog_errno(*err); + + return tmp; +} + +static struct buffer_head *ocfs2_get_quota_block(struct inode *inode, + int block, int *err) +{ + u64 pblock, pcount; + struct buffer_head *bh; + + down_read(&OCFS2_I(inode)->ip_alloc_sem); + *err = ocfs2_extent_map_get_blocks(inode, block, &pblock, &pcount, + NULL); + up_read(&OCFS2_I(inode)->ip_alloc_sem); + if (*err) { + mlog_errno(*err); + return NULL; + } + bh = sb_getblk(inode->i_sb, pblock); + if (!bh) { + *err = -EIO; + mlog_errno(*err); + } + return bh; +} + +/* Read data from global quotafile - avoid pagecache and such because we cannot + * afford acquiring the locks... We use quota cluster lock to serialize + * operations. Caller is responsible for acquiring it. */ +ssize_t ocfs2_quota_read(struct super_block *sb, int type, char *data, + size_t len, loff_t off) +{ + struct ocfs2_mem_dqinfo *oinfo = sb_dqinfo(sb, type)->dqi_priv; + struct inode *gqinode = oinfo->dqi_gqinode; + loff_t i_size = i_size_read(gqinode); + int offset = off & (sb->s_blocksize - 1); + sector_t blk = off >> sb->s_blocksize_bits; + int err = 0; + struct buffer_head *bh; + size_t toread, tocopy; + + if (off > i_size) + return 0; + if (off + len > i_size) + len = i_size - off; + toread = len; + while (toread > 0) { + tocopy = min((size_t)(sb->s_blocksize - offset), toread); + bh = ocfs2_read_quota_block(gqinode, blk, &err); + if (!bh) { + mlog_errno(err); + return err; + } + memcpy(data, bh->b_data + offset, tocopy); + brelse(bh); + offset = 0; + toread -= tocopy; + data += tocopy; + blk++; + } + return len; +} + +/* Write to quotafile (we know the transaction is already started and has + * enough credits) */ +ssize_t ocfs2_quota_write(struct super_block *sb, int type, + const char *data, size_t len, loff_t off) +{ + struct mem_dqinfo *info = sb_dqinfo(sb, type); + struct ocfs2_mem_dqinfo *oinfo = info->dqi_priv; + struct inode *gqinode = oinfo->dqi_gqinode; + int offset = off & (sb->s_blocksize - 1); + sector_t blk = off >> sb->s_blocksize_bits; + int err = 0, new = 0; + struct buffer_head *bh; + handle_t *handle = journal_current_handle(); + + if (!handle) { + mlog(ML_ERROR, "Quota write (off=%llu, len=%llu) cancelled " + "because transaction was not started.\n", + (unsigned long long)off, (unsigned long long)len); + return -EIO; + } + if (len > sb->s_blocksize - OCFS2_QBLK_RESERVED_SPACE - offset) { + WARN_ON(1); + len = sb->s_blocksize - OCFS2_QBLK_RESERVED_SPACE - offset; + } + + mutex_lock_nested(&gqinode->i_mutex, I_MUTEX_QUOTA); + if (gqinode->i_size < off + len) { + down_write(&OCFS2_I(gqinode)->ip_alloc_sem); + err = ocfs2_extend_no_holes(gqinode, off + len, off); + up_write(&OCFS2_I(gqinode)->ip_alloc_sem); + if (err < 0) + goto out; + err = ocfs2_simple_size_update(gqinode, + oinfo->dqi_gqi_bh, + off + len); + if (err < 0) + goto out; + new = 1; + } + /* Not rewriting whole block? */ + if ((offset || len < sb->s_blocksize - OCFS2_QBLK_RESERVED_SPACE) && + !new) { + bh = ocfs2_read_quota_block(gqinode, blk, &err); + if (!bh) { + mlog_errno(err); + return err; + } + err = ocfs2_journal_access(handle, gqinode, bh, + OCFS2_JOURNAL_ACCESS_WRITE); + } else { + bh = ocfs2_get_quota_block(gqinode, blk, &err); + if (!bh) { + mlog_errno(err); + return err; + } + err = ocfs2_journal_access(handle, gqinode, bh, + OCFS2_JOURNAL_ACCESS_CREATE); + } + if (err < 0) { + brelse(bh); + goto out; + } + lock_buffer(bh); + if (new) + memset(bh->b_data, 0, sb->s_blocksize); + memcpy(bh->b_data + offset, data, len); + flush_dcache_page(bh->b_page); + unlock_buffer(bh); + ocfs2_set_buffer_uptodate(gqinode, bh); + err = ocfs2_journal_dirty(handle, bh); + brelse(bh); + if (err < 0) + goto out; +out: + if (err) { + mutex_unlock(&gqinode->i_mutex); + mlog_errno(err); + return err; + } + gqinode->i_version++; + ocfs2_mark_inode_dirty(handle, gqinode, oinfo->dqi_gqi_bh); + mutex_unlock(&gqinode->i_mutex); + return len; +} + +int ocfs2_lock_global_qf(struct ocfs2_mem_dqinfo *oinfo, int ex) +{ + int status; + struct buffer_head *bh = NULL; + + status = ocfs2_inode_lock(oinfo->dqi_gqinode, &bh, ex); + if (status < 0) + return status; + spin_lock(&dq_data_lock); + if (!oinfo->dqi_gqi_count++) + oinfo->dqi_gqi_bh = bh; + else + WARN_ON(bh != oinfo->dqi_gqi_bh); + spin_unlock(&dq_data_lock); + return 0; +} + +void ocfs2_unlock_global_qf(struct ocfs2_mem_dqinfo *oinfo, int ex) +{ + ocfs2_inode_unlock(oinfo->dqi_gqinode, ex); + brelse(oinfo->dqi_gqi_bh); + spin_lock(&dq_data_lock); + if (!--oinfo->dqi_gqi_count) + oinfo->dqi_gqi_bh = NULL; + spin_unlock(&dq_data_lock); +} + +/* Read information header from global quota file */ +int ocfs2_global_read_info(struct super_block *sb, int type) +{ + struct inode *gqinode = NULL; + unsigned int ino[MAXQUOTAS] = { USER_QUOTA_SYSTEM_INODE, + GROUP_QUOTA_SYSTEM_INODE }; + struct ocfs2_global_disk_dqinfo dinfo; + struct mem_dqinfo *info = sb_dqinfo(sb, type); + struct ocfs2_mem_dqinfo *oinfo = info->dqi_priv; + int status; + + mlog_entry_void(); + + /* Read global header */ + gqinode = ocfs2_get_system_file_inode(OCFS2_SB(sb), ino[type], + OCFS2_INVALID_SLOT); + if (!gqinode) { + mlog(ML_ERROR, "failed to get global quota inode (type=%d)\n", + type); + status = -EINVAL; + goto out_err; + } + oinfo->dqi_gi.dqi_sb = sb; + oinfo->dqi_gi.dqi_type = type; + ocfs2_qinfo_lock_res_init(&oinfo->dqi_gqlock, oinfo); + oinfo->dqi_gi.dqi_entry_size = sizeof(struct ocfs2_global_disk_dqblk); + oinfo->dqi_gi.dqi_ops = &ocfs2_global_ops; + oinfo->dqi_gqi_bh = NULL; + oinfo->dqi_gqi_count = 0; + oinfo->dqi_gqinode = gqinode; + status = ocfs2_lock_global_qf(oinfo, 0); + if (status < 0) { + mlog_errno(status); + goto out_err; + } + status = sb->s_op->quota_read(sb, type, (char *)&dinfo, + sizeof(struct ocfs2_global_disk_dqinfo), + OCFS2_GLOBAL_INFO_OFF); + ocfs2_unlock_global_qf(oinfo, 0); + if (status != sizeof(struct ocfs2_global_disk_dqinfo)) { + mlog(ML_ERROR, "Cannot read global quota info (%d).\n", + status); + if (status >= 0) + status = -EIO; + mlog_errno(status); + goto out_err; + } + info->dqi_bgrace = le32_to_cpu(dinfo.dqi_bgrace); + info->dqi_igrace = le32_to_cpu(dinfo.dqi_igrace); + oinfo->dqi_syncms = le32_to_cpu(dinfo.dqi_syncms); + oinfo->dqi_gi.dqi_blocks = le32_to_cpu(dinfo.dqi_blocks); + oinfo->dqi_gi.dqi_free_blk = le32_to_cpu(dinfo.dqi_free_blk); + oinfo->dqi_gi.dqi_free_entry = le32_to_cpu(dinfo.dqi_free_entry); + oinfo->dqi_gi.dqi_blocksize_bits = sb->s_blocksize_bits; + oinfo->dqi_gi.dqi_usable_bs = sb->s_blocksize - + OCFS2_QBLK_RESERVED_SPACE; + oinfo->dqi_gi.dqi_qtree_depth = qtree_depth(&oinfo->dqi_gi); +out_err: + mlog_exit(status); + return status; +} + +/* Write information to global quota file. Expects exlusive lock on quota + * file inode and quota info */ +static int __ocfs2_global_write_info(struct super_block *sb, int type) +{ + struct mem_dqinfo *info = sb_dqinfo(sb, type); + struct ocfs2_mem_dqinfo *oinfo = info->dqi_priv; + struct ocfs2_global_disk_dqinfo dinfo; + ssize_t size; + + spin_lock(&dq_data_lock); + info->dqi_flags &= ~DQF_INFO_DIRTY; + dinfo.dqi_bgrace = cpu_to_le32(info->dqi_bgrace); + dinfo.dqi_igrace = cpu_to_le32(info->dqi_igrace); + spin_unlock(&dq_data_lock); + dinfo.dqi_syncms = cpu_to_le32(oinfo->dqi_syncms); + dinfo.dqi_blocks = cpu_to_le32(oinfo->dqi_gi.dqi_blocks); + dinfo.dqi_free_blk = cpu_to_le32(oinfo->dqi_gi.dqi_free_blk); + dinfo.dqi_free_entry = cpu_to_le32(oinfo->dqi_gi.dqi_free_entry); + size = sb->s_op->quota_write(sb, type, (char *)&dinfo, + sizeof(struct ocfs2_global_disk_dqinfo), + OCFS2_GLOBAL_INFO_OFF); + if (size != sizeof(struct ocfs2_global_disk_dqinfo)) { + mlog(ML_ERROR, "Cannot write global quota info structure\n"); + if (size >= 0) + size = -EIO; + return size; + } + return 0; +} + +int ocfs2_global_write_info(struct super_block *sb, int type) +{ + int err; + struct ocfs2_mem_dqinfo *info = sb_dqinfo(sb, type)->dqi_priv; + + err = ocfs2_qinfo_lock(info, 1); + if (err < 0) + return err; + err = __ocfs2_global_write_info(sb, type); + ocfs2_qinfo_unlock(info, 1); + return err; +} + +/* Read in information from global quota file and acquire a reference to it. + * dquot_acquire() has already started the transaction and locked quota file */ +int ocfs2_global_read_dquot(struct dquot *dquot) +{ + int err, err2, ex = 0; + struct ocfs2_mem_dqinfo *info = + sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv; + + err = ocfs2_qinfo_lock(info, 0); + if (err < 0) + goto out; + err = qtree_read_dquot(&info->dqi_gi, dquot); + if (err < 0) + goto out_qlock; + OCFS2_DQUOT(dquot)->dq_use_count++; + OCFS2_DQUOT(dquot)->dq_origspace = dquot->dq_dqb.dqb_curspace; + OCFS2_DQUOT(dquot)->dq_originodes = dquot->dq_dqb.dqb_curinodes; + if (!dquot->dq_off) { /* No real quota entry? */ + /* Upgrade to exclusive lock for allocation */ + err = ocfs2_qinfo_lock(info, 1); + if (err < 0) + goto out_qlock; + ex = 1; + } + err = qtree_write_dquot(&info->dqi_gi, dquot); + if (ex && info_dirty(sb_dqinfo(dquot->dq_sb, dquot->dq_type))) { + err2 = __ocfs2_global_write_info(dquot->dq_sb, dquot->dq_type); + if (!err) + err = err2; + } +out_qlock: + if (ex) + ocfs2_qinfo_unlock(info, 1); + ocfs2_qinfo_unlock(info, 0); +out: + if (err < 0) + mlog_errno(err); + return err; +} + +/* Sync local information about quota modifications with global quota file. + * Caller must have started the transaction and obtained exclusive lock for + * global quota file inode */ +int __ocfs2_sync_dquot(struct dquot *dquot, int freeing) +{ + int err, err2; + struct super_block *sb = dquot->dq_sb; + int type = dquot->dq_type; + struct ocfs2_mem_dqinfo *info = sb_dqinfo(sb, type)->dqi_priv; + struct ocfs2_global_disk_dqblk dqblk; + s64 spacechange, inodechange; + time_t olditime, oldbtime; + + err = sb->s_op->quota_read(sb, type, (char *)&dqblk, + sizeof(struct ocfs2_global_disk_dqblk), + dquot->dq_off); + if (err != sizeof(struct ocfs2_global_disk_dqblk)) { + if (err >= 0) { + mlog(ML_ERROR, "Short read from global quota file " + "(%u read)\n", err); + err = -EIO; + } + goto out; + } + + /* Update space and inode usage. Get also other information from + * global quota file so that we don't overwrite any changes there. + * We are */ + spin_lock(&dq_data_lock); + spacechange = dquot->dq_dqb.dqb_curspace - + OCFS2_DQUOT(dquot)->dq_origspace; + inodechange = dquot->dq_dqb.dqb_curinodes - + OCFS2_DQUOT(dquot)->dq_originodes; + olditime = dquot->dq_dqb.dqb_itime; + oldbtime = dquot->dq_dqb.dqb_btime; + ocfs2_global_disk2memdqb(dquot, &dqblk); + mlog(0, "Syncing global dquot %d space %lld+%lld, inodes %lld+%lld\n", + dquot->dq_id, dquot->dq_dqb.dqb_curspace, spacechange, + dquot->dq_dqb.dqb_curinodes, inodechange); + if (!test_bit(DQ_LASTSET_B + QIF_SPACE_B, &dquot->dq_flags)) + dquot->dq_dqb.dqb_curspace += spacechange; + if (!test_bit(DQ_LASTSET_B + QIF_INODES_B, &dquot->dq_flags)) + dquot->dq_dqb.dqb_curinodes += inodechange; + /* Set properly space grace time... */ + if (dquot->dq_dqb.dqb_bsoftlimit && + dquot->dq_dqb.dqb_curspace > dquot->dq_dqb.dqb_bsoftlimit) { + if (!test_bit(DQ_LASTSET_B + QIF_BTIME_B, &dquot->dq_flags) && + oldbtime > 0) { + if (dquot->dq_dqb.dqb_btime > 0) + dquot->dq_dqb.dqb_btime = + min(dquot->dq_dqb.dqb_btime, oldbtime); + else + dquot->dq_dqb.dqb_btime = oldbtime; + } + } else { + dquot->dq_dqb.dqb_btime = 0; + clear_bit(DQ_BLKS_B, &dquot->dq_flags); + } + /* Set properly inode grace time... */ + if (dquot->dq_dqb.dqb_isoftlimit && + dquot->dq_dqb.dqb_curinodes > dquot->dq_dqb.dqb_isoftlimit) { + if (!test_bit(DQ_LASTSET_B + QIF_ITIME_B, &dquot->dq_flags) && + olditime > 0) { + if (dquot->dq_dqb.dqb_itime > 0) + dquot->dq_dqb.dqb_itime = + min(dquot->dq_dqb.dqb_itime, olditime); + else + dquot->dq_dqb.dqb_itime = olditime; + } + } else { + dquot->dq_dqb.dqb_itime = 0; + clear_bit(DQ_INODES_B, &dquot->dq_flags); + } + /* All information is properly updated, clear the flags */ + __clear_bit(DQ_LASTSET_B + QIF_SPACE_B, &dquot->dq_flags); + __clear_bit(DQ_LASTSET_B + QIF_INODES_B, &dquot->dq_flags); + __clear_bit(DQ_LASTSET_B + QIF_BLIMITS_B, &dquot->dq_flags); + __clear_bit(DQ_LASTSET_B + QIF_ILIMITS_B, &dquot->dq_flags); + __clear_bit(DQ_LASTSET_B + QIF_BTIME_B, &dquot->dq_flags); + __clear_bit(DQ_LASTSET_B + QIF_ITIME_B, &dquot->dq_flags); + OCFS2_DQUOT(dquot)->dq_origspace = dquot->dq_dqb.dqb_curspace; + OCFS2_DQUOT(dquot)->dq_originodes = dquot->dq_dqb.dqb_curinodes; + spin_unlock(&dq_data_lock); + err = ocfs2_qinfo_lock(info, freeing); + if (err < 0) { + mlog(ML_ERROR, "Failed to lock quota info, loosing quota write" + " (type=%d, id=%u)\n", dquot->dq_type, + (unsigned)dquot->dq_id); + goto out; + } + if (freeing) + OCFS2_DQUOT(dquot)->dq_use_count--; + err = qtree_write_dquot(&info->dqi_gi, dquot); + if (err < 0) + goto out_qlock; + if (freeing && !OCFS2_DQUOT(dquot)->dq_use_count) { + err = qtree_release_dquot(&info->dqi_gi, dquot); + if (info_dirty(sb_dqinfo(sb, type))) { + err2 = __ocfs2_global_write_info(sb, type); + if (!err) + err = err2; + } + } +out_qlock: + ocfs2_qinfo_unlock(info, freeing); +out: + if (err < 0) + mlog_errno(err); + return err; +} + +/* + * Wrappers for generic quota functions + */ + +static int ocfs2_write_dquot(struct dquot *dquot) +{ + handle_t *handle; + struct ocfs2_super *osb = OCFS2_SB(dquot->dq_sb); + int status = 0; + + mlog_entry("id=%u, type=%d", dquot->dq_id, dquot->dq_type); + + handle = ocfs2_start_trans(osb, OCFS2_QWRITE_CREDITS); + if (IS_ERR(handle)) { + status = PTR_ERR(handle); + mlog_errno(status); + goto out; + } + status = dquot_commit(dquot); + ocfs2_commit_trans(osb, handle); +out: + mlog_exit(status); + return status; +} + +int ocfs2_calc_qdel_credits(struct super_block *sb, int type) +{ + struct ocfs2_mem_dqinfo *oinfo; + int features[MAXQUOTAS] = { OCFS2_FEATURE_RO_COMPAT_USRQUOTA, + OCFS2_FEATURE_RO_COMPAT_GRPQUOTA }; + + if (!OCFS2_HAS_RO_COMPAT_FEATURE(sb, features[type])) + return 0; + + oinfo = sb_dqinfo(sb, type)->dqi_priv; + /* We modify tree, leaf block, global info, local chunk header, + * global and local inode */ + return oinfo->dqi_gi.dqi_qtree_depth + 2 + 1 + + 2 * OCFS2_INODE_UPDATE_CREDITS; +} + +static int ocfs2_release_dquot(struct dquot *dquot) +{ + handle_t *handle; + struct ocfs2_mem_dqinfo *oinfo = + sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv; + struct ocfs2_super *osb = OCFS2_SB(dquot->dq_sb); + int status = 0; + + mlog_entry("id=%u, type=%d", dquot->dq_id, dquot->dq_type); + + status = ocfs2_lock_global_qf(oinfo, 1); + if (status < 0) + goto out; + handle = ocfs2_start_trans(osb, + ocfs2_calc_qdel_credits(dquot->dq_sb, dquot->dq_type)); + if (IS_ERR(handle)) { + status = PTR_ERR(handle); + mlog_errno(status); + goto out_ilock; + } + status = dquot_release(dquot); + ocfs2_commit_trans(osb, handle); +out_ilock: + ocfs2_unlock_global_qf(oinfo, 1); +out: + mlog_exit(status); + return status; +} + +int ocfs2_calc_qinit_credits(struct super_block *sb, int type) +{ + struct ocfs2_mem_dqinfo *oinfo; + int features[MAXQUOTAS] = { OCFS2_FEATURE_RO_COMPAT_USRQUOTA, + OCFS2_FEATURE_RO_COMPAT_GRPQUOTA }; + struct ocfs2_dinode *lfe, *gfe; + + if (!OCFS2_HAS_RO_COMPAT_FEATURE(sb, features[type])) + return 0; + + oinfo = sb_dqinfo(sb, type)->dqi_priv; + gfe = (struct ocfs2_dinode *)oinfo->dqi_gqi_bh->b_data; + lfe = (struct ocfs2_dinode *)oinfo->dqi_lqi_bh->b_data; + /* We can extend local file + global file. In local file we + * can modify info, chunk header block and dquot block. In + * global file we can modify info, tree and leaf block */ + return ocfs2_calc_extend_credits(sb, &lfe->id2.i_list, 0) + + ocfs2_calc_extend_credits(sb, &gfe->id2.i_list, 0) + + 3 + oinfo->dqi_gi.dqi_qtree_depth + 2; +} + +static int ocfs2_acquire_dquot(struct dquot *dquot) +{ + handle_t *handle; + struct ocfs2_mem_dqinfo *oinfo = + sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv; + struct ocfs2_super *osb = OCFS2_SB(dquot->dq_sb); + int status = 0; + + mlog_entry("id=%u, type=%d", dquot->dq_id, dquot->dq_type); + /* We need an exclusive lock, because we're going to update use count + * and instantiate possibly new dquot structure */ + status = ocfs2_lock_global_qf(oinfo, 1); + if (status < 0) + goto out; + handle = ocfs2_start_trans(osb, + ocfs2_calc_qinit_credits(dquot->dq_sb, dquot->dq_type)); + if (IS_ERR(handle)) { + status = PTR_ERR(handle); + mlog_errno(status); + goto out_ilock; + } + status = dquot_acquire(dquot); + ocfs2_commit_trans(osb, handle); +out_ilock: + ocfs2_unlock_global_qf(oinfo, 1); +out: + mlog_exit(status); + return status; +} + +static int ocfs2_mark_dquot_dirty(struct dquot *dquot) +{ + unsigned long mask = (1 << (DQ_LASTSET_B + QIF_ILIMITS_B)) | + (1 << (DQ_LASTSET_B + QIF_BLIMITS_B)) | + (1 << (DQ_LASTSET_B + QIF_INODES_B)) | + (1 << (DQ_LASTSET_B + QIF_SPACE_B)) | + (1 << (DQ_LASTSET_B + QIF_BTIME_B)) | + (1 << (DQ_LASTSET_B + QIF_ITIME_B)); + int sync = 0; + int status; + struct super_block *sb = dquot->dq_sb; + int type = dquot->dq_type; + struct ocfs2_mem_dqinfo *oinfo = sb_dqinfo(sb, type)->dqi_priv; + handle_t *handle; + struct ocfs2_super *osb = OCFS2_SB(sb); + + mlog_entry("id=%u, type=%d", dquot->dq_id, type); + dquot_mark_dquot_dirty(dquot); + + /* In case user set some limits, sync dquot immediately to global + * quota file so that information propagates quicker */ + spin_lock(&dq_data_lock); + if (dquot->dq_flags & mask) + sync = 1; + spin_unlock(&dq_data_lock); + if (!sync) { + status = ocfs2_write_dquot(dquot); + goto out; + } + status = ocfs2_lock_global_qf(oinfo, 1); + if (status < 0) + goto out; + handle = ocfs2_start_trans(osb, OCFS2_QSYNC_CREDITS); + if (IS_ERR(handle)) { + status = PTR_ERR(handle); + mlog_errno(status); + goto out_ilock; + } + status = ocfs2_sync_dquot(dquot); + if (status < 0) { + mlog_errno(status); + goto out_trans; + } + /* Now write updated local dquot structure */ + status = dquot_commit(dquot); +out_trans: + ocfs2_commit_trans(osb, handle); +out_ilock: + ocfs2_unlock_global_qf(oinfo, 1); +out: + mlog_exit(status); + return status; +} + +/* This should happen only after set_dqinfo(). */ +static int ocfs2_write_info(struct super_block *sb, int type) +{ + handle_t *handle; + int status = 0; + struct ocfs2_mem_dqinfo *oinfo = sb_dqinfo(sb, type)->dqi_priv; + + mlog_entry_void(); + + status = ocfs2_lock_global_qf(oinfo, 1); + if (status < 0) + goto out; + handle = ocfs2_start_trans(OCFS2_SB(sb), OCFS2_QINFO_WRITE_CREDITS); + if (IS_ERR(handle)) { + status = PTR_ERR(handle); + mlog_errno(status); + goto out_ilock; + } + status = dquot_commit_info(sb, type); + ocfs2_commit_trans(OCFS2_SB(sb), handle); +out_ilock: + ocfs2_unlock_global_qf(oinfo, 1); +out: + mlog_exit(status); + return status; +} + +/* This is difficult. We have to lock quota inode and start transaction + * in this function but we don't want to take the penalty of exlusive + * quota file lock when we are just going to use cached structures. So + * we just take read lock check whether we have dquot cached and if so, + * we don't have to take the write lock... */ +static int ocfs2_dquot_initialize(struct inode *inode, int type) +{ + handle_t *handle = NULL; + int status = 0; + struct super_block *sb = inode->i_sb; + struct ocfs2_mem_dqinfo *oinfo; + int exclusive = 0; + int cnt; + qid_t id; + + mlog_entry_void(); + + for (cnt = 0; cnt < MAXQUOTAS; cnt++) { + if (type != -1 && cnt != type) + continue; + if (!sb_has_quota_active(sb, cnt)) + continue; + oinfo = sb_dqinfo(sb, cnt)->dqi_priv; + status = ocfs2_lock_global_qf(oinfo, 0); + if (status < 0) + goto out; + /* This is just a performance optimization not a reliable test. + * Since we hold an inode lock, noone can actually release + * the structure until we are finished with initialization. */ + if (inode->i_dquot[cnt] != NODQUOT) { + ocfs2_unlock_global_qf(oinfo, 0); + continue; + } + /* When we have inode lock, we know that no dquot_release() can + * run and thus we can safely check whether we need to + * read+modify global file to get quota information or whether + * our node already has it. */ + if (cnt == USRQUOTA) + id = inode->i_uid; + else if (cnt == GRPQUOTA) + id = inode->i_gid; + else + BUG(); + /* Obtain exclusion from quota off... */ + down_write(&sb_dqopt(sb)->dqptr_sem); + exclusive = !dquot_is_cached(sb, id, cnt); + up_write(&sb_dqopt(sb)->dqptr_sem); + if (exclusive) { + status = ocfs2_lock_global_qf(oinfo, 1); + if (status < 0) { + exclusive = 0; + mlog_errno(status); + goto out_ilock; + } + handle = ocfs2_start_trans(OCFS2_SB(sb), + ocfs2_calc_qinit_credits(sb, cnt)); + if (IS_ERR(handle)) { + status = PTR_ERR(handle); + mlog_errno(status); + goto out_ilock; + } + } + dquot_initialize(inode, cnt); + if (exclusive) { + ocfs2_commit_trans(OCFS2_SB(sb), handle); + ocfs2_unlock_global_qf(oinfo, 1); + } + ocfs2_unlock_global_qf(oinfo, 0); + } + mlog_exit(0); + return 0; +out_ilock: + if (exclusive) + ocfs2_unlock_global_qf(oinfo, 1); + ocfs2_unlock_global_qf(oinfo, 0); +out: + mlog_exit(status); + return status; +} + +static int ocfs2_dquot_drop_slow(struct inode *inode) +{ + int status; + int cnt; + int got_lock[MAXQUOTAS] = {0, 0}; + handle_t *handle; + struct super_block *sb = inode->i_sb; + struct ocfs2_mem_dqinfo *oinfo; + + for (cnt = 0; cnt < MAXQUOTAS; cnt++) { + if (!sb_has_quota_active(sb, cnt)) + continue; + oinfo = sb_dqinfo(sb, cnt)->dqi_priv; + status = ocfs2_lock_global_qf(oinfo, 1); + if (status < 0) + goto out; + got_lock[cnt] = 1; + } + handle = ocfs2_start_trans(OCFS2_SB(sb), + ocfs2_calc_qinit_credits(sb, USRQUOTA) + + ocfs2_calc_qinit_credits(sb, GRPQUOTA)); + if (IS_ERR(handle)) { + status = PTR_ERR(handle); + mlog_errno(status); + goto out; + } + dquot_drop(inode); + ocfs2_commit_trans(OCFS2_SB(sb), handle); +out: + for (cnt = 0; cnt < MAXQUOTAS; cnt++) + if (got_lock[cnt]) { + oinfo = sb_dqinfo(sb, cnt)->dqi_priv; + ocfs2_unlock_global_qf(oinfo, 1); + } + return status; +} + +/* See the comment before ocfs2_dquot_initialize. */ +static int ocfs2_dquot_drop(struct inode *inode) +{ + int status = 0; + struct super_block *sb = inode->i_sb; + struct ocfs2_mem_dqinfo *oinfo; + int exclusive = 0; + int cnt; + int got_lock[MAXQUOTAS] = {0, 0}; + + mlog_entry_void(); + for (cnt = 0; cnt < MAXQUOTAS; cnt++) { + if (!sb_has_quota_active(sb, cnt)) + continue; + oinfo = sb_dqinfo(sb, cnt)->dqi_priv; + status = ocfs2_lock_global_qf(oinfo, 0); + if (status < 0) + goto out; + got_lock[cnt] = 1; + } + /* Lock against anyone releasing references so that when when we check + * we know we are not going to be last ones to release dquot */ + down_write(&sb_dqopt(sb)->dqptr_sem); + /* Urgh, this is a terrible hack :( */ + for (cnt = 0; cnt < MAXQUOTAS; cnt++) { + if (inode->i_dquot[cnt] != NODQUOT && + atomic_read(&inode->i_dquot[cnt]->dq_count) > 1) { + exclusive = 1; + break; + } + } + if (!exclusive) + dquot_drop_locked(inode); + up_write(&sb_dqopt(sb)->dqptr_sem); +out: + for (cnt = 0; cnt < MAXQUOTAS; cnt++) + if (got_lock[cnt]) { + oinfo = sb_dqinfo(sb, cnt)->dqi_priv; + ocfs2_unlock_global_qf(oinfo, 0); + } + /* In case we bailed out because we had to do expensive locking + * do it now... */ + if (exclusive) + status = ocfs2_dquot_drop_slow(inode); + mlog_exit(status); + return status; +} + +static struct dquot *ocfs2_alloc_dquot(struct super_block *sb, int type) +{ + struct ocfs2_dquot *dquot = + kmem_cache_zalloc(ocfs2_dquot_cachep, GFP_NOFS); + + if (!dquot) + return NULL; + return &dquot->dq_dquot; +} + +static void ocfs2_destroy_dquot(struct dquot *dquot) +{ + kmem_cache_free(ocfs2_dquot_cachep, dquot); +} + +struct dquot_operations ocfs2_quota_operations = { + .initialize = ocfs2_dquot_initialize, + .drop = ocfs2_dquot_drop, + .alloc_space = dquot_alloc_space, + .alloc_inode = dquot_alloc_inode, + .free_space = dquot_free_space, + .free_inode = dquot_free_inode, + .transfer = dquot_transfer, + .write_dquot = ocfs2_write_dquot, + .acquire_dquot = ocfs2_acquire_dquot, + .release_dquot = ocfs2_release_dquot, + .mark_dirty = ocfs2_mark_dquot_dirty, + .write_info = ocfs2_write_info, + .alloc_dquot = ocfs2_alloc_dquot, + .destroy_dquot = ocfs2_destroy_dquot, +}; diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c new file mode 100644 index 00000000000..55c3f2f98dc --- /dev/null +++ b/fs/ocfs2/quota_local.c @@ -0,0 +1,833 @@ +/* + * Implementation of operations over local quota file + */ + +#include +#include +#include +#include + +#define MLOG_MASK_PREFIX ML_QUOTA +#include + +#include "ocfs2_fs.h" +#include "ocfs2.h" +#include "inode.h" +#include "alloc.h" +#include "file.h" +#include "buffer_head_io.h" +#include "journal.h" +#include "sysfile.h" +#include "dlmglue.h" +#include "quota.h" + +/* Number of local quota structures per block */ +static inline unsigned int ol_quota_entries_per_block(struct super_block *sb) +{ + return ((sb->s_blocksize - OCFS2_QBLK_RESERVED_SPACE) / + sizeof(struct ocfs2_local_disk_dqblk)); +} + +/* Number of blocks with entries in one chunk */ +static inline unsigned int ol_chunk_blocks(struct super_block *sb) +{ + return ((sb->s_blocksize - sizeof(struct ocfs2_local_disk_chunk) - + OCFS2_QBLK_RESERVED_SPACE) << 3) / + ol_quota_entries_per_block(sb); +} + +/* Number of entries in a chunk bitmap */ +static unsigned int ol_chunk_entries(struct super_block *sb) +{ + return ol_chunk_blocks(sb) * ol_quota_entries_per_block(sb); +} + +/* Offset of the chunk in quota file */ +static unsigned int ol_quota_chunk_block(struct super_block *sb, int c) +{ + /* 1 block for local quota file info, 1 block per chunk for chunk info */ + return 1 + (ol_chunk_blocks(sb) + 1) * c; +} + +/* Offset of the dquot structure in the quota file */ +static loff_t ol_dqblk_off(struct super_block *sb, int c, int off) +{ + int epb = ol_quota_entries_per_block(sb); + + return ((ol_quota_chunk_block(sb, c) + 1 + off / epb) + << sb->s_blocksize_bits) + + (off % epb) * sizeof(struct ocfs2_local_disk_dqblk); +} + +/* Compute block number from given offset */ +static inline unsigned int ol_dqblk_file_block(struct super_block *sb, loff_t off) +{ + return off >> sb->s_blocksize_bits; +} + +static inline unsigned int ol_dqblk_block_offset(struct super_block *sb, loff_t off) +{ + return off & ((1 << sb->s_blocksize_bits) - 1); +} + +/* Compute offset in the chunk of a structure with the given offset */ +static int ol_dqblk_chunk_off(struct super_block *sb, int c, loff_t off) +{ + int epb = ol_quota_entries_per_block(sb); + + return ((off >> sb->s_blocksize_bits) - + ol_quota_chunk_block(sb, c) - 1) * epb + + ((unsigned int)(off & ((1 << sb->s_blocksize_bits) - 1))) / + sizeof(struct ocfs2_local_disk_dqblk); +} + +/* Write bufferhead into the fs */ +static int ocfs2_modify_bh(struct inode *inode, struct buffer_head *bh, + void (*modify)(struct buffer_head *, void *), void *private) +{ + struct super_block *sb = inode->i_sb; + handle_t *handle; + int status; + + handle = ocfs2_start_trans(OCFS2_SB(sb), 1); + if (IS_ERR(handle)) { + status = PTR_ERR(handle); + mlog_errno(status); + return status; + } + status = ocfs2_journal_access(handle, inode, bh, + OCFS2_JOURNAL_ACCESS_WRITE); + if (status < 0) { + mlog_errno(status); + ocfs2_commit_trans(OCFS2_SB(sb), handle); + return status; + } + lock_buffer(bh); + modify(bh, private); + unlock_buffer(bh); + status = ocfs2_journal_dirty(handle, bh); + if (status < 0) { + mlog_errno(status); + ocfs2_commit_trans(OCFS2_SB(sb), handle); + return status; + } + status = ocfs2_commit_trans(OCFS2_SB(sb), handle); + if (status < 0) { + mlog_errno(status); + return status; + } + return 0; +} + +/* Check whether we understand format of quota files */ +static int ocfs2_local_check_quota_file(struct super_block *sb, int type) +{ + unsigned int lmagics[MAXQUOTAS] = OCFS2_LOCAL_QMAGICS; + unsigned int lversions[MAXQUOTAS] = OCFS2_LOCAL_QVERSIONS; + unsigned int gmagics[MAXQUOTAS] = OCFS2_GLOBAL_QMAGICS; + unsigned int gversions[MAXQUOTAS] = OCFS2_GLOBAL_QVERSIONS; + unsigned int ino[MAXQUOTAS] = { USER_QUOTA_SYSTEM_INODE, + GROUP_QUOTA_SYSTEM_INODE }; + struct buffer_head *bh; + struct inode *linode = sb_dqopt(sb)->files[type]; + struct inode *ginode = NULL; + struct ocfs2_disk_dqheader *dqhead; + int status, ret = 0; + + /* First check whether we understand local quota file */ + bh = ocfs2_read_quota_block(linode, 0, &status); + if (!bh) { + mlog_errno(status); + mlog(ML_ERROR, "failed to read quota file header (type=%d)\n", + type); + goto out_err; + } + dqhead = (struct ocfs2_disk_dqheader *)(bh->b_data); + if (le32_to_cpu(dqhead->dqh_magic) != lmagics[type]) { + mlog(ML_ERROR, "quota file magic does not match (%u != %u)," + " type=%d\n", le32_to_cpu(dqhead->dqh_magic), + lmagics[type], type); + goto out_err; + } + if (le32_to_cpu(dqhead->dqh_version) != lversions[type]) { + mlog(ML_ERROR, "quota file version does not match (%u != %u)," + " type=%d\n", le32_to_cpu(dqhead->dqh_version), + lversions[type], type); + goto out_err; + } + brelse(bh); + bh = NULL; + + /* Next check whether we understand global quota file */ + ginode = ocfs2_get_system_file_inode(OCFS2_SB(sb), ino[type], + OCFS2_INVALID_SLOT); + if (!ginode) { + mlog(ML_ERROR, "cannot get global quota file inode " + "(type=%d)\n", type); + goto out_err; + } + /* Since the header is read only, we don't care about locking */ + bh = ocfs2_read_quota_block(ginode, 0, &status); + if (!bh) { + mlog_errno(status); + mlog(ML_ERROR, "failed to read global quota file header " + "(type=%d)\n", type); + goto out_err; + } + dqhead = (struct ocfs2_disk_dqheader *)(bh->b_data); + if (le32_to_cpu(dqhead->dqh_magic) != gmagics[type]) { + mlog(ML_ERROR, "global quota file magic does not match " + "(%u != %u), type=%d\n", + le32_to_cpu(dqhead->dqh_magic), gmagics[type], type); + goto out_err; + } + if (le32_to_cpu(dqhead->dqh_version) != gversions[type]) { + mlog(ML_ERROR, "global quota file version does not match " + "(%u != %u), type=%d\n", + le32_to_cpu(dqhead->dqh_version), gversions[type], + type); + goto out_err; + } + + ret = 1; +out_err: + brelse(bh); + iput(ginode); + return ret; +} + +/* Release given list of quota file chunks */ +static void ocfs2_release_local_quota_bitmaps(struct list_head *head) +{ + struct ocfs2_quota_chunk *pos, *next; + + list_for_each_entry_safe(pos, next, head, qc_chunk) { + list_del(&pos->qc_chunk); + brelse(pos->qc_headerbh); + kmem_cache_free(ocfs2_qf_chunk_cachep, pos); + } +} + +/* Load quota bitmaps into memory */ +static int ocfs2_load_local_quota_bitmaps(struct inode *inode, + struct ocfs2_local_disk_dqinfo *ldinfo, + struct list_head *head) +{ + struct ocfs2_quota_chunk *newchunk; + int i, status; + + INIT_LIST_HEAD(head); + for (i = 0; i < le32_to_cpu(ldinfo->dqi_chunks); i++) { + newchunk = kmem_cache_alloc(ocfs2_qf_chunk_cachep, GFP_NOFS); + if (!newchunk) { + ocfs2_release_local_quota_bitmaps(head); + return -ENOMEM; + } + newchunk->qc_num = i; + newchunk->qc_headerbh = ocfs2_read_quota_block(inode, + ol_quota_chunk_block(inode->i_sb, i), + &status); + if (!newchunk->qc_headerbh) { + mlog_errno(status); + kmem_cache_free(ocfs2_qf_chunk_cachep, newchunk); + ocfs2_release_local_quota_bitmaps(head); + return status; + } + list_add_tail(&newchunk->qc_chunk, head); + } + return 0; +} + +static void olq_update_info(struct buffer_head *bh, void *private) +{ + struct mem_dqinfo *info = private; + struct ocfs2_mem_dqinfo *oinfo = info->dqi_priv; + struct ocfs2_local_disk_dqinfo *ldinfo; + + ldinfo = (struct ocfs2_local_disk_dqinfo *)(bh->b_data + + OCFS2_LOCAL_INFO_OFF); + spin_lock(&dq_data_lock); + ldinfo->dqi_flags = cpu_to_le32(info->dqi_flags & DQF_MASK); + ldinfo->dqi_chunks = cpu_to_le32(oinfo->dqi_chunks); + ldinfo->dqi_blocks = cpu_to_le32(oinfo->dqi_blocks); + spin_unlock(&dq_data_lock); +} + +/* Read information header from quota file */ +static int ocfs2_local_read_info(struct super_block *sb, int type) +{ + struct ocfs2_local_disk_dqinfo *ldinfo; + struct mem_dqinfo *info = sb_dqinfo(sb, type); + struct ocfs2_mem_dqinfo *oinfo; + struct inode *lqinode = sb_dqopt(sb)->files[type]; + int status; + struct buffer_head *bh = NULL; + int locked = 0; + + info->dqi_maxblimit = 0x7fffffffffffffffLL; + info->dqi_maxilimit = 0x7fffffffffffffffLL; + oinfo = kmalloc(sizeof(struct ocfs2_mem_dqinfo), GFP_NOFS); + if (!oinfo) { + mlog(ML_ERROR, "failed to allocate memory for ocfs2 quota" + " info."); + goto out_err; + } + info->dqi_priv = oinfo; + oinfo->dqi_type = type; + INIT_LIST_HEAD(&oinfo->dqi_chunk); + oinfo->dqi_lqi_bh = NULL; + oinfo->dqi_ibh = NULL; + + status = ocfs2_global_read_info(sb, type); + if (status < 0) + goto out_err; + + status = ocfs2_inode_lock(lqinode, &oinfo->dqi_lqi_bh, 1); + if (status < 0) { + mlog_errno(status); + goto out_err; + } + locked = 1; + + /* Now read local header */ + bh = ocfs2_read_quota_block(lqinode, 0, &status); + if (!bh) { + mlog_errno(status); + mlog(ML_ERROR, "failed to read quota file info header " + "(type=%d)\n", type); + goto out_err; + } + ldinfo = (struct ocfs2_local_disk_dqinfo *)(bh->b_data + + OCFS2_LOCAL_INFO_OFF); + info->dqi_flags = le32_to_cpu(ldinfo->dqi_flags); + oinfo->dqi_chunks = le32_to_cpu(ldinfo->dqi_chunks); + oinfo->dqi_blocks = le32_to_cpu(ldinfo->dqi_blocks); + oinfo->dqi_ibh = bh; + + /* We crashed when using local quota file? */ + if (!(info->dqi_flags & OLQF_CLEAN)) + goto out_err; /* So far we just bail out. Later we should resync here */ + + status = ocfs2_load_local_quota_bitmaps(sb_dqopt(sb)->files[type], + ldinfo, + &oinfo->dqi_chunk); + if (status < 0) { + mlog_errno(status); + goto out_err; + } + + /* Now mark quota file as used */ + info->dqi_flags &= ~OLQF_CLEAN; + status = ocfs2_modify_bh(lqinode, bh, olq_update_info, info); + if (status < 0) { + mlog_errno(status); + goto out_err; + } + + return 0; +out_err: + if (oinfo) { + iput(oinfo->dqi_gqinode); + ocfs2_simple_drop_lockres(OCFS2_SB(sb), &oinfo->dqi_gqlock); + ocfs2_lock_res_free(&oinfo->dqi_gqlock); + brelse(oinfo->dqi_lqi_bh); + if (locked) + ocfs2_inode_unlock(lqinode, 1); + ocfs2_release_local_quota_bitmaps(&oinfo->dqi_chunk); + kfree(oinfo); + } + brelse(bh); + return -1; +} + +/* Write local info to quota file */ +static int ocfs2_local_write_info(struct super_block *sb, int type) +{ + struct mem_dqinfo *info = sb_dqinfo(sb, type); + struct buffer_head *bh = ((struct ocfs2_mem_dqinfo *)info->dqi_priv) + ->dqi_ibh; + int status; + + status = ocfs2_modify_bh(sb_dqopt(sb)->files[type], bh, olq_update_info, + info); + if (status < 0) { + mlog_errno(status); + return -1; + } + + return 0; +} + +/* Release info from memory */ +static int ocfs2_local_free_info(struct super_block *sb, int type) +{ + struct mem_dqinfo *info = sb_dqinfo(sb, type); + struct ocfs2_mem_dqinfo *oinfo = info->dqi_priv; + struct ocfs2_quota_chunk *chunk; + struct ocfs2_local_disk_chunk *dchunk; + int mark_clean = 1, len; + int status; + + iput(oinfo->dqi_gqinode); + ocfs2_simple_drop_lockres(OCFS2_SB(sb), &oinfo->dqi_gqlock); + ocfs2_lock_res_free(&oinfo->dqi_gqlock); + list_for_each_entry(chunk, &oinfo->dqi_chunk, qc_chunk) { + dchunk = (struct ocfs2_local_disk_chunk *) + (chunk->qc_headerbh->b_data); + if (chunk->qc_num < oinfo->dqi_chunks - 1) { + len = ol_chunk_entries(sb); + } else { + len = (oinfo->dqi_blocks - + ol_quota_chunk_block(sb, chunk->qc_num) - 1) + * ol_quota_entries_per_block(sb); + } + /* Not all entries free? Bug! */ + if (le32_to_cpu(dchunk->dqc_free) != len) { + mlog(ML_ERROR, "releasing quota file with used " + "entries (type=%d)\n", type); + mark_clean = 0; + } + } + ocfs2_release_local_quota_bitmaps(&oinfo->dqi_chunk); + + if (!mark_clean) + goto out; + + /* Mark local file as clean */ + info->dqi_flags |= OLQF_CLEAN; + status = ocfs2_modify_bh(sb_dqopt(sb)->files[type], + oinfo->dqi_ibh, + olq_update_info, + info); + if (status < 0) { + mlog_errno(status); + goto out; + } + +out: + ocfs2_inode_unlock(sb_dqopt(sb)->files[type], 1); + brelse(oinfo->dqi_ibh); + brelse(oinfo->dqi_lqi_bh); + kfree(oinfo); + return 0; +} + +static void olq_set_dquot(struct buffer_head *bh, void *private) +{ + struct ocfs2_dquot *od = private; + struct ocfs2_local_disk_dqblk *dqblk; + struct super_block *sb = od->dq_dquot.dq_sb; + + dqblk = (struct ocfs2_local_disk_dqblk *)(bh->b_data + + ol_dqblk_block_offset(sb, od->dq_local_off)); + + dqblk->dqb_id = cpu_to_le64(od->dq_dquot.dq_id); + spin_lock(&dq_data_lock); + dqblk->dqb_spacemod = cpu_to_le64(od->dq_dquot.dq_dqb.dqb_curspace - + od->dq_origspace); + dqblk->dqb_inodemod = cpu_to_le64(od->dq_dquot.dq_dqb.dqb_curinodes - + od->dq_originodes); + spin_unlock(&dq_data_lock); + mlog(0, "Writing local dquot %u space %lld inodes %lld\n", + od->dq_dquot.dq_id, dqblk->dqb_spacemod, dqblk->dqb_inodemod); +} + +/* Write dquot to local quota file */ +static int ocfs2_local_write_dquot(struct dquot *dquot) +{ + struct super_block *sb = dquot->dq_sb; + struct ocfs2_dquot *od = OCFS2_DQUOT(dquot); + struct buffer_head *bh; + int status; + + bh = ocfs2_read_quota_block(sb_dqopt(sb)->files[dquot->dq_type], + ol_dqblk_file_block(sb, od->dq_local_off), + &status); + if (!bh) { + mlog_errno(status); + goto out; + } + status = ocfs2_modify_bh(sb_dqopt(sb)->files[dquot->dq_type], bh, + olq_set_dquot, od); + if (status < 0) { + mlog_errno(status); + goto out; + } +out: + brelse(bh); + return status; +} + +/* Find free entry in local quota file */ +static struct ocfs2_quota_chunk *ocfs2_find_free_entry(struct super_block *sb, + int type, + int *offset) +{ + struct mem_dqinfo *info = sb_dqinfo(sb, type); + struct ocfs2_mem_dqinfo *oinfo = info->dqi_priv; + struct ocfs2_quota_chunk *chunk; + struct ocfs2_local_disk_chunk *dchunk; + int found = 0, len; + + list_for_each_entry(chunk, &oinfo->dqi_chunk, qc_chunk) { + dchunk = (struct ocfs2_local_disk_chunk *) + chunk->qc_headerbh->b_data; + if (le32_to_cpu(dchunk->dqc_free) > 0) { + found = 1; + break; + } + } + if (!found) + return NULL; + + if (chunk->qc_num < oinfo->dqi_chunks - 1) { + len = ol_chunk_entries(sb); + } else { + len = (oinfo->dqi_blocks - + ol_quota_chunk_block(sb, chunk->qc_num) - 1) + * ol_quota_entries_per_block(sb); + } + + found = ocfs2_find_next_zero_bit(dchunk->dqc_bitmap, len, 0); + /* We failed? */ + if (found == len) { + mlog(ML_ERROR, "Did not find empty entry in chunk %d with %u" + " entries free (type=%d)\n", chunk->qc_num, + le32_to_cpu(dchunk->dqc_free), type); + return ERR_PTR(-EIO); + } + *offset = found; + return chunk; +} + +/* Add new chunk to the local quota file */ +static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk( + struct super_block *sb, + int type, + int *offset) +{ + struct mem_dqinfo *info = sb_dqinfo(sb, type); + struct ocfs2_mem_dqinfo *oinfo = info->dqi_priv; + struct inode *lqinode = sb_dqopt(sb)->files[type]; + struct ocfs2_quota_chunk *chunk = NULL; + struct ocfs2_local_disk_chunk *dchunk; + int status; + handle_t *handle; + struct buffer_head *bh = NULL; + u64 p_blkno; + + /* We are protected by dqio_sem so no locking needed */ + status = ocfs2_extend_no_holes(lqinode, + lqinode->i_size + 2 * sb->s_blocksize, + lqinode->i_size); + if (status < 0) { + mlog_errno(status); + goto out; + } + status = ocfs2_simple_size_update(lqinode, oinfo->dqi_lqi_bh, + lqinode->i_size + 2 * sb->s_blocksize); + if (status < 0) { + mlog_errno(status); + goto out; + } + + chunk = kmem_cache_alloc(ocfs2_qf_chunk_cachep, GFP_NOFS); + if (!chunk) { + status = -ENOMEM; + mlog_errno(status); + goto out; + } + + down_read(&OCFS2_I(lqinode)->ip_alloc_sem); + status = ocfs2_extent_map_get_blocks(lqinode, oinfo->dqi_blocks, + &p_blkno, NULL, NULL); + up_read(&OCFS2_I(lqinode)->ip_alloc_sem); + if (status < 0) { + mlog_errno(status); + goto out; + } + bh = sb_getblk(sb, p_blkno); + if (!bh) { + status = -ENOMEM; + mlog_errno(status); + goto out; + } + dchunk = (struct ocfs2_local_disk_chunk *)bh->b_data; + + handle = ocfs2_start_trans(OCFS2_SB(sb), 2); + if (IS_ERR(handle)) { + status = PTR_ERR(handle); + mlog_errno(status); + goto out; + } + + status = ocfs2_journal_access(handle, lqinode, bh, + OCFS2_JOURNAL_ACCESS_WRITE); + if (status < 0) { + mlog_errno(status); + goto out_trans; + } + lock_buffer(bh); + dchunk->dqc_free = ol_quota_entries_per_block(sb); + memset(dchunk->dqc_bitmap, 0, + sb->s_blocksize - sizeof(struct ocfs2_local_disk_chunk) - + OCFS2_QBLK_RESERVED_SPACE); + set_buffer_uptodate(bh); + unlock_buffer(bh); + status = ocfs2_journal_dirty(handle, bh); + if (status < 0) { + mlog_errno(status); + goto out_trans; + } + + oinfo->dqi_blocks += 2; + oinfo->dqi_chunks++; + status = ocfs2_local_write_info(sb, type); + if (status < 0) { + mlog_errno(status); + goto out_trans; + } + status = ocfs2_commit_trans(OCFS2_SB(sb), handle); + if (status < 0) { + mlog_errno(status); + goto out; + } + + list_add_tail(&chunk->qc_chunk, &oinfo->dqi_chunk); + chunk->qc_num = list_entry(chunk->qc_chunk.prev, + struct ocfs2_quota_chunk, + qc_chunk)->qc_num + 1; + chunk->qc_headerbh = bh; + *offset = 0; + return chunk; +out_trans: + ocfs2_commit_trans(OCFS2_SB(sb), handle); +out: + brelse(bh); + kmem_cache_free(ocfs2_qf_chunk_cachep, chunk); + return ERR_PTR(status); +} + +/* Find free entry in local quota file */ +static struct ocfs2_quota_chunk *ocfs2_extend_local_quota_file( + struct super_block *sb, + int type, + int *offset) +{ + struct mem_dqinfo *info = sb_dqinfo(sb, type); + struct ocfs2_mem_dqinfo *oinfo = info->dqi_priv; + struct ocfs2_quota_chunk *chunk; + struct inode *lqinode = sb_dqopt(sb)->files[type]; + struct ocfs2_local_disk_chunk *dchunk; + int epb = ol_quota_entries_per_block(sb); + unsigned int chunk_blocks; + int status; + handle_t *handle; + + if (list_empty(&oinfo->dqi_chunk)) + return ocfs2_local_quota_add_chunk(sb, type, offset); + /* Is the last chunk full? */ + chunk = list_entry(oinfo->dqi_chunk.prev, + struct ocfs2_quota_chunk, qc_chunk); + chunk_blocks = oinfo->dqi_blocks - + ol_quota_chunk_block(sb, chunk->qc_num) - 1; + if (ol_chunk_blocks(sb) == chunk_blocks) + return ocfs2_local_quota_add_chunk(sb, type, offset); + + /* We are protected by dqio_sem so no locking needed */ + status = ocfs2_extend_no_holes(lqinode, + lqinode->i_size + sb->s_blocksize, + lqinode->i_size); + if (status < 0) { + mlog_errno(status); + goto out; + } + status = ocfs2_simple_size_update(lqinode, oinfo->dqi_lqi_bh, + lqinode->i_size + sb->s_blocksize); + if (status < 0) { + mlog_errno(status); + goto out; + } + handle = ocfs2_start_trans(OCFS2_SB(sb), 2); + if (IS_ERR(handle)) { + status = PTR_ERR(handle); + mlog_errno(status); + goto out; + } + status = ocfs2_journal_access(handle, lqinode, chunk->qc_headerbh, + OCFS2_JOURNAL_ACCESS_WRITE); + if (status < 0) { + mlog_errno(status); + goto out_trans; + } + + dchunk = (struct ocfs2_local_disk_chunk *)chunk->qc_headerbh->b_data; + lock_buffer(chunk->qc_headerbh); + le32_add_cpu(&dchunk->dqc_free, ol_quota_entries_per_block(sb)); + unlock_buffer(chunk->qc_headerbh); + status = ocfs2_journal_dirty(handle, chunk->qc_headerbh); + if (status < 0) { + mlog_errno(status); + goto out_trans; + } + oinfo->dqi_blocks++; + status = ocfs2_local_write_info(sb, type); + if (status < 0) { + mlog_errno(status); + goto out_trans; + } + + status = ocfs2_commit_trans(OCFS2_SB(sb), handle); + if (status < 0) { + mlog_errno(status); + goto out; + } + *offset = chunk_blocks * epb; + return chunk; +out_trans: + ocfs2_commit_trans(OCFS2_SB(sb), handle); +out: + return ERR_PTR(status); +} + +void olq_alloc_dquot(struct buffer_head *bh, void *private) +{ + int *offset = private; + struct ocfs2_local_disk_chunk *dchunk; + + dchunk = (struct ocfs2_local_disk_chunk *)bh->b_data; + ocfs2_set_bit(*offset, dchunk->dqc_bitmap); + le32_add_cpu(&dchunk->dqc_free, -1); +} + +/* Create dquot in the local file for given id */ +static int ocfs2_create_local_dquot(struct dquot *dquot) +{ + struct super_block *sb = dquot->dq_sb; + int type = dquot->dq_type; + struct inode *lqinode = sb_dqopt(sb)->files[type]; + struct ocfs2_quota_chunk *chunk; + struct ocfs2_dquot *od = OCFS2_DQUOT(dquot); + int offset; + int status; + + chunk = ocfs2_find_free_entry(sb, type, &offset); + if (!chunk) { + chunk = ocfs2_extend_local_quota_file(sb, type, &offset); + if (IS_ERR(chunk)) + return PTR_ERR(chunk); + } else if (IS_ERR(chunk)) { + return PTR_ERR(chunk); + } + od->dq_local_off = ol_dqblk_off(sb, chunk->qc_num, offset); + od->dq_chunk = chunk; + + /* Initialize dquot structure on disk */ + status = ocfs2_local_write_dquot(dquot); + if (status < 0) { + mlog_errno(status); + goto out; + } + + /* Mark structure as allocated */ + status = ocfs2_modify_bh(lqinode, chunk->qc_headerbh, olq_alloc_dquot, + &offset); + if (status < 0) { + mlog_errno(status); + goto out; + } +out: + return status; +} + +/* Create entry in local file for dquot, load data from the global file */ +static int ocfs2_local_read_dquot(struct dquot *dquot) +{ + int status; + + mlog_entry("id=%u, type=%d\n", dquot->dq_id, dquot->dq_type); + + status = ocfs2_global_read_dquot(dquot); + if (status < 0) { + mlog_errno(status); + goto out_err; + } + + /* Now create entry in the local quota file */ + status = ocfs2_create_local_dquot(dquot); + if (status < 0) { + mlog_errno(status); + goto out_err; + } + mlog_exit(0); + return 0; +out_err: + mlog_exit(status); + return status; +} + +/* Release dquot structure from local quota file. ocfs2_release_dquot() has + * already started a transaction and obtained exclusive lock for global + * quota file. */ +static int ocfs2_local_release_dquot(struct dquot *dquot) +{ + int status; + int type = dquot->dq_type; + struct ocfs2_dquot *od = OCFS2_DQUOT(dquot); + struct super_block *sb = dquot->dq_sb; + struct ocfs2_local_disk_chunk *dchunk; + int offset; + handle_t *handle = journal_current_handle(); + + BUG_ON(!handle); + /* First write all local changes to global file */ + status = ocfs2_global_release_dquot(dquot); + if (status < 0) { + mlog_errno(status); + goto out; + } + + status = ocfs2_journal_access(handle, sb_dqopt(sb)->files[type], + od->dq_chunk->qc_headerbh, OCFS2_JOURNAL_ACCESS_WRITE); + if (status < 0) { + mlog_errno(status); + goto out; + } + offset = ol_dqblk_chunk_off(sb, od->dq_chunk->qc_num, + od->dq_local_off); + dchunk = (struct ocfs2_local_disk_chunk *) + (od->dq_chunk->qc_headerbh->b_data); + /* Mark structure as freed */ + lock_buffer(od->dq_chunk->qc_headerbh); + ocfs2_clear_bit(offset, dchunk->dqc_bitmap); + le32_add_cpu(&dchunk->dqc_free, 1); + unlock_buffer(od->dq_chunk->qc_headerbh); + status = ocfs2_journal_dirty(handle, od->dq_chunk->qc_headerbh); + if (status < 0) { + mlog_errno(status); + goto out; + } + status = 0; +out: + /* Clear the read bit so that next time someone uses this + * dquot he reads fresh info from disk and allocates local + * dquot structure */ + clear_bit(DQ_READ_B, &dquot->dq_flags); + return status; +} + +static struct quota_format_ops ocfs2_format_ops = { + .check_quota_file = ocfs2_local_check_quota_file, + .read_file_info = ocfs2_local_read_info, + .write_file_info = ocfs2_global_write_info, + .free_file_info = ocfs2_local_free_info, + .read_dqblk = ocfs2_local_read_dquot, + .commit_dqblk = ocfs2_local_write_dquot, + .release_dqblk = ocfs2_local_release_dquot, +}; + +struct quota_format_type ocfs2_quota_format = { + .qf_fmt_id = QFMT_OCFS2, + .qf_ops = &ocfs2_format_ops, + .qf_owner = THIS_MODULE +}; diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 41bb0197cf4..7bb83e41581 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -65,10 +65,13 @@ #include "uptodate.h" #include "ver.h" #include "xattr.h" +#include "quota.h" #include "buffer_head_io.h" static struct kmem_cache *ocfs2_inode_cachep = NULL; +struct kmem_cache *ocfs2_dquot_cachep; +struct kmem_cache *ocfs2_qf_chunk_cachep; /* OCFS2 needs to schedule several differnt types of work which * require cluster locking, disk I/O, recovery waits, etc. Since these @@ -137,6 +140,8 @@ static const struct super_operations ocfs2_sops = { .put_super = ocfs2_put_super, .remount_fs = ocfs2_remount, .show_options = ocfs2_show_options, + .quota_read = ocfs2_quota_read, + .quota_write = ocfs2_quota_write, }; enum { @@ -1104,6 +1109,7 @@ static int __init ocfs2_init(void) ocfs2_set_locking_protocol(); + status = register_quota_format(&ocfs2_quota_format); leave: if (status < 0) { ocfs2_free_mem_caches(); @@ -1127,6 +1133,8 @@ static void __exit ocfs2_exit(void) destroy_workqueue(ocfs2_wq); } + unregister_quota_format(&ocfs2_quota_format); + debugfs_remove(ocfs2_debugfs_root); ocfs2_free_mem_caches(); @@ -1242,8 +1250,27 @@ static int ocfs2_initialize_mem_caches(void) (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT| SLAB_MEM_SPREAD), ocfs2_inode_init_once); - if (!ocfs2_inode_cachep) + ocfs2_dquot_cachep = kmem_cache_create("ocfs2_dquot_cache", + sizeof(struct ocfs2_dquot), + 0, + (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT| + SLAB_MEM_SPREAD), + NULL); + ocfs2_qf_chunk_cachep = kmem_cache_create("ocfs2_qf_chunk_cache", + sizeof(struct ocfs2_quota_chunk), + 0, + (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD), + NULL); + if (!ocfs2_inode_cachep || !ocfs2_dquot_cachep || + !ocfs2_qf_chunk_cachep) { + if (ocfs2_inode_cachep) + kmem_cache_destroy(ocfs2_inode_cachep); + if (ocfs2_dquot_cachep) + kmem_cache_destroy(ocfs2_dquot_cachep); + if (ocfs2_qf_chunk_cachep) + kmem_cache_destroy(ocfs2_qf_chunk_cachep); return -ENOMEM; + } return 0; } @@ -1252,8 +1279,15 @@ static void ocfs2_free_mem_caches(void) { if (ocfs2_inode_cachep) kmem_cache_destroy(ocfs2_inode_cachep); - ocfs2_inode_cachep = NULL; + + if (ocfs2_dquot_cachep) + kmem_cache_destroy(ocfs2_dquot_cachep); + ocfs2_dquot_cachep = NULL; + + if (ocfs2_qf_chunk_cachep) + kmem_cache_destroy(ocfs2_qf_chunk_cachep); + ocfs2_qf_chunk_cachep = NULL; } static int ocfs2_get_sector(struct super_block *sb, -- cgit v1.2.3-70-g09d2 From a90714c150e3ce677c57a9dac3ab1ec342c75a95 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 9 Oct 2008 19:38:40 +0200 Subject: ocfs2: Add quota calls for allocation and freeing of inodes and space Add quota calls for allocation and freeing of inodes and space, also update estimates on number of needed credits for a transaction. Move out inode allocation from ocfs2_mknod_locked() because vfs_dq_init() must be called outside of a transaction. Signed-off-by: Jan Kara Signed-off-by: Mark Fasheh --- fs/ocfs2/alloc.c | 20 +++++++++++-- fs/ocfs2/aops.c | 16 +++++++++-- fs/ocfs2/dir.c | 24 ++++++++++++++-- fs/ocfs2/file.c | 72 ++++++++++++++++++++++++++++++++++++++++++---- fs/ocfs2/inode.c | 10 +++++-- fs/ocfs2/journal.h | 84 ++++++++++++++++++++++++++++++++++++++++++++---------- fs/ocfs2/namei.c | 44 +++++++++++++++++++++++++--- fs/ocfs2/xattr.c | 14 +++++---- 8 files changed, 245 insertions(+), 39 deletions(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 69d67ab069b..84a7bd4db5d 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -28,6 +28,7 @@ #include #include #include +#include #define MLOG_MASK_PREFIX ML_DISK_ALLOC #include @@ -5322,7 +5323,7 @@ int ocfs2_remove_btree_range(struct inode *inode, } } - handle = ocfs2_start_trans(osb, OCFS2_REMOVE_EXTENT_CREDITS); + handle = ocfs2_start_trans(osb, ocfs2_remove_extent_credits(osb->sb)); if (IS_ERR(handle)) { ret = PTR_ERR(handle); mlog_errno(ret); @@ -6552,6 +6553,8 @@ static int ocfs2_do_truncate(struct ocfs2_super *osb, goto bail; } + vfs_dq_free_space_nodirty(inode, + ocfs2_clusters_to_bytes(osb->sb, clusters_to_del)); spin_lock(&OCFS2_I(inode)->ip_lock); OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters) - clusters_to_del; @@ -6860,6 +6863,7 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode, struct page **pages = NULL; loff_t end = osb->s_clustersize; struct ocfs2_extent_tree et; + int did_quota = 0; has_data = i_size_read(inode) ? 1 : 0; @@ -6879,7 +6883,8 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode, } } - handle = ocfs2_start_trans(osb, OCFS2_INLINE_TO_EXTENTS_CREDITS); + handle = ocfs2_start_trans(osb, + ocfs2_inline_to_extents_credits(osb->sb)); if (IS_ERR(handle)) { ret = PTR_ERR(handle); mlog_errno(ret); @@ -6898,6 +6903,13 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode, unsigned int page_end; u64 phys; + if (vfs_dq_alloc_space_nodirty(inode, + ocfs2_clusters_to_bytes(osb->sb, 1))) { + ret = -EDQUOT; + goto out_commit; + } + did_quota = 1; + ret = ocfs2_claim_clusters(osb, handle, data_ac, 1, &bit_off, &num); if (ret) { @@ -6971,6 +6983,10 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode, } out_commit: + if (ret < 0 && did_quota) + vfs_dq_free_space_nodirty(inode, + ocfs2_clusters_to_bytes(osb->sb, 1)); + ocfs2_commit_trans(osb, handle); out_unlock: diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 6af79adb2ec..6b647ec87bb 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -27,6 +27,7 @@ #include #include #include +#include #define MLOG_MASK_PREFIX ML_FILE_IO #include @@ -1730,6 +1731,11 @@ int ocfs2_write_begin_nolock(struct address_space *mapping, wc->w_handle = handle; + if (clusters_to_alloc && vfs_dq_alloc_space_nodirty(inode, + ocfs2_clusters_to_bytes(osb->sb, clusters_to_alloc))) { + ret = -EDQUOT; + goto out_commit; + } /* * We don't want this to fail in ocfs2_write_end(), so do it * here. @@ -1738,7 +1744,7 @@ int ocfs2_write_begin_nolock(struct address_space *mapping, OCFS2_JOURNAL_ACCESS_WRITE); if (ret) { mlog_errno(ret); - goto out_commit; + goto out_quota; } /* @@ -1751,14 +1757,14 @@ int ocfs2_write_begin_nolock(struct address_space *mapping, mmap_page); if (ret) { mlog_errno(ret); - goto out_commit; + goto out_quota; } ret = ocfs2_write_cluster_by_desc(mapping, data_ac, meta_ac, wc, pos, len); if (ret) { mlog_errno(ret); - goto out_commit; + goto out_quota; } if (data_ac) @@ -1770,6 +1776,10 @@ success: *pagep = wc->w_target_page; *fsdata = wc; return 0; +out_quota: + if (clusters_to_alloc) + vfs_dq_free_space(inode, + ocfs2_clusters_to_bytes(osb->sb, clusters_to_alloc)); out_commit: ocfs2_commit_trans(osb, handle); diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index d83cff95759..3708fe482e3 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c @@ -40,6 +40,7 @@ #include #include #include +#include #define MLOG_MASK_PREFIX ML_NAMEI #include @@ -1210,9 +1211,9 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh, unsigned int blocks_wanted, struct buffer_head **first_block_bh) { - int ret, credits = OCFS2_INLINE_TO_EXTENTS_CREDITS; u32 alloc, bit_off, len; struct super_block *sb = dir->i_sb; + int ret, credits = ocfs2_inline_to_extents_credits(sb); u64 blkno, bytes = blocks_wanted << sb->s_blocksize_bits; struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); struct ocfs2_inode_info *oi = OCFS2_I(dir); @@ -1221,6 +1222,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh, struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; handle_t *handle; struct ocfs2_extent_tree et; + int did_quota = 0; ocfs2_init_dinode_extent_tree(&et, dir, di_bh); @@ -1258,6 +1260,12 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh, goto out_sem; } + if (vfs_dq_alloc_space_nodirty(dir, + ocfs2_clusters_to_bytes(osb->sb, alloc))) { + ret = -EDQUOT; + goto out_commit; + } + did_quota = 1; /* * Try to claim as many clusters as the bitmap can give though * if we only get one now, that's enough to continue. The rest @@ -1380,6 +1388,9 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh, dirdata_bh = NULL; out_commit: + if (ret < 0 && did_quota) + vfs_dq_free_space_nodirty(dir, + ocfs2_clusters_to_bytes(osb->sb, 2)); ocfs2_commit_trans(osb, handle); out_sem: @@ -1404,7 +1415,7 @@ static int ocfs2_do_extend_dir(struct super_block *sb, struct buffer_head **new_bh) { int status; - int extend; + int extend, did_quota = 0; u64 p_blkno, v_blkno; spin_lock(&OCFS2_I(dir)->ip_lock); @@ -1414,6 +1425,13 @@ static int ocfs2_do_extend_dir(struct super_block *sb, if (extend) { u32 offset = OCFS2_I(dir)->ip_clusters; + if (vfs_dq_alloc_space_nodirty(dir, + ocfs2_clusters_to_bytes(sb, 1))) { + status = -EDQUOT; + goto bail; + } + did_quota = 1; + status = ocfs2_add_inode_data(OCFS2_SB(sb), dir, &offset, 1, 0, parent_fe_bh, handle, data_ac, meta_ac, NULL); @@ -1439,6 +1457,8 @@ static int ocfs2_do_extend_dir(struct super_block *sb, } status = 0; bail: + if (did_quota && status < 0) + vfs_dq_free_space_nodirty(dir, ocfs2_clusters_to_bytes(sb, 1)); mlog_exit(status); return status; } diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 372d96505a7..9374d374a26 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -35,6 +35,7 @@ #include #include #include +#include #define MLOG_MASK_PREFIX ML_INODE #include @@ -57,6 +58,7 @@ #include "super.h" #include "xattr.h" #include "acl.h" +#include "quota.h" #include "buffer_head_io.h" @@ -534,6 +536,7 @@ static int __ocfs2_extend_allocation(struct inode *inode, u32 logical_start, enum ocfs2_alloc_restarted why; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); struct ocfs2_extent_tree et; + int did_quota = 0; mlog_entry("(clusters_to_add = %u)\n", clusters_to_add); @@ -577,6 +580,13 @@ restart_all: } restarted_transaction: + if (vfs_dq_alloc_space_nodirty(inode, ocfs2_clusters_to_bytes(osb->sb, + clusters_to_add))) { + status = -EDQUOT; + goto leave; + } + did_quota = 1; + /* reserve a write to the file entry early on - that we if we * run out of credits in the allocation path, we can still * update i_size. */ @@ -614,6 +624,10 @@ restarted_transaction: spin_lock(&OCFS2_I(inode)->ip_lock); clusters_to_add -= (OCFS2_I(inode)->ip_clusters - prev_clusters); spin_unlock(&OCFS2_I(inode)->ip_lock); + /* Release unused quota reservation */ + vfs_dq_free_space(inode, + ocfs2_clusters_to_bytes(osb->sb, clusters_to_add)); + did_quota = 0; if (why != RESTART_NONE && clusters_to_add) { if (why == RESTART_META) { @@ -646,6 +660,9 @@ restarted_transaction: OCFS2_I(inode)->ip_clusters, (long long)i_size_read(inode)); leave: + if (status < 0 && did_quota) + vfs_dq_free_space(inode, + ocfs2_clusters_to_bytes(osb->sb, clusters_to_add)); if (handle) { ocfs2_commit_trans(osb, handle); handle = NULL; @@ -877,6 +894,9 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr) struct ocfs2_super *osb = OCFS2_SB(sb); struct buffer_head *bh = NULL; handle_t *handle = NULL; + int locked[MAXQUOTAS] = {0, 0}; + int credits, qtype; + struct ocfs2_mem_dqinfo *oinfo; mlog_entry("(0x%p, '%.*s')\n", dentry, dentry->d_name.len, dentry->d_name.name); @@ -947,11 +967,47 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr) } } - handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); - if (IS_ERR(handle)) { - status = PTR_ERR(handle); - mlog_errno(status); - goto bail_unlock; + if ((attr->ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) || + (attr->ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) { + credits = OCFS2_INODE_UPDATE_CREDITS; + if (attr->ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid + && OCFS2_HAS_RO_COMPAT_FEATURE(sb, + OCFS2_FEATURE_RO_COMPAT_USRQUOTA)) { + oinfo = sb_dqinfo(sb, USRQUOTA)->dqi_priv; + status = ocfs2_lock_global_qf(oinfo, 1); + if (status < 0) + goto bail_unlock; + credits += ocfs2_calc_qinit_credits(sb, USRQUOTA) + + ocfs2_calc_qdel_credits(sb, USRQUOTA); + locked[USRQUOTA] = 1; + } + if (attr->ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid + && OCFS2_HAS_RO_COMPAT_FEATURE(sb, + OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)) { + oinfo = sb_dqinfo(sb, GRPQUOTA)->dqi_priv; + status = ocfs2_lock_global_qf(oinfo, 1); + if (status < 0) + goto bail_unlock; + credits += ocfs2_calc_qinit_credits(sb, GRPQUOTA) + + ocfs2_calc_qdel_credits(sb, GRPQUOTA); + locked[GRPQUOTA] = 1; + } + handle = ocfs2_start_trans(osb, credits); + if (IS_ERR(handle)) { + status = PTR_ERR(handle); + mlog_errno(status); + goto bail_unlock; + } + status = vfs_dq_transfer(inode, attr) ? -EDQUOT : 0; + if (status < 0) + goto bail_commit; + } else { + handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); + if (IS_ERR(handle)) { + status = PTR_ERR(handle); + mlog_errno(status); + goto bail_unlock; + } } /* @@ -974,6 +1030,12 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr) bail_commit: ocfs2_commit_trans(osb, handle); bail_unlock: + for (qtype = 0; qtype < MAXQUOTAS; qtype++) { + if (!locked[qtype]) + continue; + oinfo = sb_dqinfo(sb, qtype)->dqi_priv; + ocfs2_unlock_global_qf(oinfo, 1); + } ocfs2_inode_unlock(inode, 1); bail_unlock_rw: if (size_change) diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index 50dbc486ef7..288512c9dbc 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c @@ -28,6 +28,7 @@ #include #include #include +#include #include @@ -603,7 +604,8 @@ static int ocfs2_remove_inode(struct inode *inode, goto bail; } - handle = ocfs2_start_trans(osb, OCFS2_DELETE_INODE_CREDITS); + handle = ocfs2_start_trans(osb, OCFS2_DELETE_INODE_CREDITS + + ocfs2_quota_trans_credits(inode->i_sb)); if (IS_ERR(handle)) { status = PTR_ERR(handle); mlog_errno(status); @@ -635,6 +637,7 @@ static int ocfs2_remove_inode(struct inode *inode, } ocfs2_remove_from_cache(inode, di_bh); + vfs_dq_free_inode(inode); status = ocfs2_free_dinode(handle, inode_alloc_inode, inode_alloc_bh, di); @@ -917,7 +920,10 @@ void ocfs2_delete_inode(struct inode *inode) mlog_entry("(inode->i_ino = %lu)\n", inode->i_ino); - if (is_bad_inode(inode)) { + /* When we fail in read_inode() we mark inode as bad. The second test + * catches the case when inode allocation fails before allocating + * a block for inode. */ + if (is_bad_inode(inode) || !OCFS2_I(inode)->ip_blkno) { mlog(0, "Skipping delete of bad inode\n"); goto bail; } diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h index 8203980fefe..ee08e9c1fc1 100644 --- a/fs/ocfs2/journal.h +++ b/fs/ocfs2/journal.h @@ -284,6 +284,37 @@ int ocfs2_journal_dirty(handle_t *handle, /* extended attribute block update */ #define OCFS2_XATTR_BLOCK_UPDATE_CREDITS 1 +/* global quotafile inode update, data block */ +#define OCFS2_QINFO_WRITE_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 1) + +/* + * The two writes below can accidentally see global info dirty due + * to set_info() quotactl so make them prepared for the writes. + */ +/* quota data block, global info */ +/* Write to local quota file */ +#define OCFS2_QWRITE_CREDITS (OCFS2_QINFO_WRITE_CREDITS + 1) + +/* global quota data block, local quota data block, global quota inode, + * global quota info */ +#define OCFS2_QSYNC_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 3) + +static inline int ocfs2_quota_trans_credits(struct super_block *sb) +{ + int credits = 0; + + if (OCFS2_HAS_RO_COMPAT_FEATURE(sb, OCFS2_FEATURE_RO_COMPAT_USRQUOTA)) + credits += OCFS2_QWRITE_CREDITS; + if (OCFS2_HAS_RO_COMPAT_FEATURE(sb, OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)) + credits += OCFS2_QWRITE_CREDITS; + return credits; +} + +/* Number of credits needed for removing quota structure from file */ +int ocfs2_calc_qdel_credits(struct super_block *sb, int type); +/* Number of credits needed for initialization of new quota structure */ +int ocfs2_calc_qinit_credits(struct super_block *sb, int type); + /* group extend. inode update and last group update. */ #define OCFS2_GROUP_EXTEND_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 1) @@ -294,8 +325,11 @@ int ocfs2_journal_dirty(handle_t *handle, * prev. group desc. if we relink. */ #define OCFS2_SUBALLOC_ALLOC (3) -#define OCFS2_INLINE_TO_EXTENTS_CREDITS (OCFS2_SUBALLOC_ALLOC \ - + OCFS2_INODE_UPDATE_CREDITS) +static inline int ocfs2_inline_to_extents_credits(struct super_block *sb) +{ + return OCFS2_SUBALLOC_ALLOC + OCFS2_INODE_UPDATE_CREDITS + + ocfs2_quota_trans_credits(sb); +} /* dinode + group descriptor update. We don't relink on free yet. */ #define OCFS2_SUBALLOC_FREE (2) @@ -304,16 +338,23 @@ int ocfs2_journal_dirty(handle_t *handle, #define OCFS2_TRUNCATE_LOG_FLUSH_ONE_REC (OCFS2_SUBALLOC_FREE \ + OCFS2_TRUNCATE_LOG_UPDATE) -#define OCFS2_REMOVE_EXTENT_CREDITS (OCFS2_TRUNCATE_LOG_UPDATE + OCFS2_INODE_UPDATE_CREDITS) +static inline int ocfs2_remove_extent_credits(struct super_block *sb) +{ + return OCFS2_TRUNCATE_LOG_UPDATE + OCFS2_INODE_UPDATE_CREDITS + + ocfs2_quota_trans_credits(sb); +} /* data block for new dir/symlink, 2 for bitmap updates (bitmap fe + * bitmap block for the new bit) */ #define OCFS2_DIR_LINK_ADDITIONAL_CREDITS (1 + 2) /* parent fe, parent block, new file entry, inode alloc fe, inode alloc - * group descriptor + mkdir/symlink blocks */ -#define OCFS2_MKNOD_CREDITS (3 + OCFS2_SUBALLOC_ALLOC \ - + OCFS2_DIR_LINK_ADDITIONAL_CREDITS) + * group descriptor + mkdir/symlink blocks + quota update */ +static inline int ocfs2_mknod_credits(struct super_block *sb) +{ + return 3 + OCFS2_SUBALLOC_ALLOC + OCFS2_DIR_LINK_ADDITIONAL_CREDITS + + ocfs2_quota_trans_credits(sb); +} /* local alloc metadata change + main bitmap updates */ #define OCFS2_WINDOW_MOVE_CREDITS (OCFS2_INODE_UPDATE_CREDITS \ @@ -323,13 +364,21 @@ int ocfs2_journal_dirty(handle_t *handle, * for the dinode, one for the new block. */ #define OCFS2_SIMPLE_DIR_EXTEND_CREDITS (2) -/* file update (nlink, etc) + directory mtime/ctime + dir entry block */ -#define OCFS2_LINK_CREDITS (2*OCFS2_INODE_UPDATE_CREDITS + 1) +/* file update (nlink, etc) + directory mtime/ctime + dir entry block + quota + * update on dir */ +static inline int ocfs2_link_credits(struct super_block *sb) +{ + return 2*OCFS2_INODE_UPDATE_CREDITS + 1 + + ocfs2_quota_trans_credits(sb); +} /* inode + dir inode (if we unlink a dir), + dir entry block + orphan * dir inode link */ -#define OCFS2_UNLINK_CREDITS (2 * OCFS2_INODE_UPDATE_CREDITS + 1 \ - + OCFS2_LINK_CREDITS) +static inline int ocfs2_unlink_credits(struct super_block *sb) +{ + /* The quota update from ocfs2_link_credits is unused here... */ + return 2 * OCFS2_INODE_UPDATE_CREDITS + 1 + ocfs2_link_credits(sb); +} /* dinode + orphan dir dinode + inode alloc dinode + orphan dir entry + * inode alloc group descriptor */ @@ -338,8 +387,10 @@ int ocfs2_journal_dirty(handle_t *handle, /* dinode update, old dir dinode update, new dir dinode update, old * dir dir entry, new dir dir entry, dir entry update for renaming * directory + target unlink */ -#define OCFS2_RENAME_CREDITS (3 * OCFS2_INODE_UPDATE_CREDITS + 3 \ - + OCFS2_UNLINK_CREDITS) +static inline int ocfs2_rename_credits(struct super_block *sb) +{ + return 3 * OCFS2_INODE_UPDATE_CREDITS + 3 + ocfs2_unlink_credits(sb); +} /* global bitmap dinode, group desc., relinked group, * suballocator dinode, group desc., relinked group, @@ -377,18 +428,19 @@ static inline int ocfs2_calc_extend_credits(struct super_block *sb, * credit for the dinode there. */ extent_blocks = 1 + 1 + le16_to_cpu(root_el->l_tree_depth); - return bitmap_blocks + sysfile_bitmap_blocks + extent_blocks; + return bitmap_blocks + sysfile_bitmap_blocks + extent_blocks + + ocfs2_quota_trans_credits(sb); } static inline int ocfs2_calc_symlink_credits(struct super_block *sb) { - int blocks = OCFS2_MKNOD_CREDITS; + int blocks = ocfs2_mknod_credits(sb); /* links can be longer than one block so we may update many * within our single allocated extent. */ blocks += ocfs2_clusters_to_blocks(sb, 1); - return blocks; + return blocks + ocfs2_quota_trans_credits(sb); } static inline int ocfs2_calc_group_alloc_credits(struct super_block *sb, @@ -425,6 +477,8 @@ static inline int ocfs2_calc_tree_trunc_credits(struct super_block *sb, /* update to the truncate log. */ credits += OCFS2_TRUNCATE_LOG_UPDATE; + credits += ocfs2_quota_trans_credits(sb); + return credits; } diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index 0134bafdab9..6173807ba23 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -40,6 +40,7 @@ #include #include #include +#include #define MLOG_MASK_PREFIX ML_NAMEI #include @@ -212,6 +213,7 @@ static struct inode *ocfs2_get_init_inode(struct inode *dir, int mode) } else inode->i_gid = current_fsgid(); inode->i_mode = mode; + vfs_dq_init(inode); return inode; } @@ -236,6 +238,7 @@ static int ocfs2_mknod(struct inode *dir, struct ocfs2_security_xattr_info si = { .enable = 1, }; + int did_quota_inode = 0; mlog_entry("(0x%p, 0x%p, %d, %lu, '%.*s')\n", dir, dentry, mode, (unsigned long)dev, dentry->d_name.len, @@ -323,7 +326,8 @@ static int ocfs2_mknod(struct inode *dir, goto leave; } - handle = ocfs2_start_trans(osb, OCFS2_MKNOD_CREDITS + xattr_credits); + handle = ocfs2_start_trans(osb, ocfs2_mknod_credits(osb->sb) + + xattr_credits); if (IS_ERR(handle)) { status = PTR_ERR(handle); handle = NULL; @@ -331,6 +335,15 @@ static int ocfs2_mknod(struct inode *dir, goto leave; } + /* We don't use standard VFS wrapper because we don't want vfs_dq_init + * to be called. */ + if (sb_any_quota_active(osb->sb) && + osb->sb->dq_op->alloc_inode(inode, 1) == NO_QUOTA) { + status = -EDQUOT; + goto leave; + } + did_quota_inode = 1; + /* do the real work now. */ status = ocfs2_mknod_locked(osb, dir, inode, dentry, dev, &new_fe_bh, parent_fe_bh, handle, @@ -399,6 +412,8 @@ static int ocfs2_mknod(struct inode *dir, d_instantiate(dentry, inode); status = 0; leave: + if (status < 0 && did_quota_inode) + vfs_dq_free_inode(inode); if (handle) ocfs2_commit_trans(osb, handle); @@ -641,7 +656,7 @@ static int ocfs2_link(struct dentry *old_dentry, goto out_unlock_inode; } - handle = ocfs2_start_trans(osb, OCFS2_LINK_CREDITS); + handle = ocfs2_start_trans(osb, ocfs2_link_credits(osb->sb)); if (IS_ERR(handle)) { err = PTR_ERR(handle); handle = NULL; @@ -828,7 +843,7 @@ static int ocfs2_unlink(struct inode *dir, } } - handle = ocfs2_start_trans(osb, OCFS2_UNLINK_CREDITS); + handle = ocfs2_start_trans(osb, ocfs2_unlink_credits(osb->sb)); if (IS_ERR(handle)) { status = PTR_ERR(handle); handle = NULL; @@ -1234,7 +1249,7 @@ static int ocfs2_rename(struct inode *old_dir, } } - handle = ocfs2_start_trans(osb, OCFS2_RENAME_CREDITS); + handle = ocfs2_start_trans(osb, ocfs2_rename_credits(osb->sb)); if (IS_ERR(handle)) { status = PTR_ERR(handle); handle = NULL; @@ -1555,6 +1570,7 @@ static int ocfs2_symlink(struct inode *dir, struct ocfs2_security_xattr_info si = { .enable = 1, }; + int did_quota = 0, did_quota_inode = 0; mlog_entry("(0x%p, 0x%p, symname='%s' actual='%.*s')\n", dir, dentry, symname, dentry->d_name.len, dentry->d_name.name); @@ -1648,6 +1664,15 @@ static int ocfs2_symlink(struct inode *dir, goto bail; } + /* We don't use standard VFS wrapper because we don't want vfs_dq_init + * to be called. */ + if (sb_any_quota_active(osb->sb) && + osb->sb->dq_op->alloc_inode(inode, 1) == NO_QUOTA) { + status = -EDQUOT; + goto bail; + } + did_quota_inode = 1; + status = ocfs2_mknod_locked(osb, dir, inode, dentry, 0, &new_fe_bh, parent_fe_bh, handle, inode_ac); @@ -1663,6 +1688,12 @@ static int ocfs2_symlink(struct inode *dir, u32 offset = 0; inode->i_op = &ocfs2_symlink_inode_operations; + if (vfs_dq_alloc_space_nodirty(inode, + ocfs2_clusters_to_bytes(osb->sb, 1))) { + status = -EDQUOT; + goto bail; + } + did_quota = 1; status = ocfs2_add_inode_data(osb, inode, &offset, 1, 0, new_fe_bh, handle, data_ac, NULL, @@ -1728,6 +1759,11 @@ static int ocfs2_symlink(struct inode *dir, dentry->d_op = &ocfs2_dentry_ops; d_instantiate(dentry, inode); bail: + if (status < 0 && did_quota) + vfs_dq_free_space_nodirty(inode, + ocfs2_clusters_to_bytes(osb->sb, 1)); + if (status < 0 && did_quota_inode) + vfs_dq_free_inode(inode); if (handle) ocfs2_commit_trans(osb, handle); diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 9cb71e1c7c6..3b9634c7d29 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -1665,7 +1665,8 @@ static int ocfs2_remove_value_outside(struct inode*inode, ocfs2_init_dealloc_ctxt(&ctxt.dealloc); - ctxt.handle = ocfs2_start_trans(osb, OCFS2_REMOVE_EXTENT_CREDITS); + ctxt.handle = ocfs2_start_trans(osb, + ocfs2_remove_extent_credits(osb->sb)); if (IS_ERR(ctxt.handle)) { ret = PTR_ERR(ctxt.handle); mlog_errno(ret); @@ -2233,7 +2234,7 @@ static int ocfs2_calc_xattr_set_need(struct inode *inode, */ if (!xi->value) { if (!ocfs2_xattr_is_local(xe)) - credits += OCFS2_REMOVE_EXTENT_CREDITS; + credits += ocfs2_remove_extent_credits(inode->i_sb); goto out; } @@ -2250,7 +2251,7 @@ static int ocfs2_calc_xattr_set_need(struct inode *inode, */ if (ocfs2_xattr_can_be_in_inode(inode, xi, xis)) { clusters_add += new_clusters; - credits += OCFS2_REMOVE_EXTENT_CREDITS + + credits += ocfs2_remove_extent_credits(inode->i_sb) + OCFS2_INODE_UPDATE_CREDITS; if (!ocfs2_xattr_is_local(xe)) credits += ocfs2_calc_extend_credits( @@ -2275,7 +2276,7 @@ static int ocfs2_calc_xattr_set_need(struct inode *inode, xv = &def_xv.xv; if (old_clusters >= new_clusters) { - credits += OCFS2_REMOVE_EXTENT_CREDITS; + credits += ocfs2_remove_extent_credits(inode->i_sb); goto out; } else { meta_add += ocfs2_extend_meta_needed(&xv->xr_list); @@ -4750,7 +4751,7 @@ static int ocfs2_rm_xattr_cluster(struct inode *inode, } } - handle = ocfs2_start_trans(osb, OCFS2_REMOVE_EXTENT_CREDITS); + handle = ocfs2_start_trans(osb, ocfs2_remove_extent_credits(osb->sb)); if (IS_ERR(handle)) { ret = -ENOMEM; mlog_errno(ret); @@ -5109,7 +5110,8 @@ static int ocfs2_delete_xattr_in_bucket(struct inode *inode, ocfs2_init_dealloc_ctxt(&ctxt.dealloc); - ctxt.handle = ocfs2_start_trans(osb, OCFS2_REMOVE_EXTENT_CREDITS); + ctxt.handle = ocfs2_start_trans(osb, + ocfs2_remove_extent_credits(osb->sb)); if (IS_ERR(ctxt.handle)) { ret = PTR_ERR(ctxt.handle); mlog_errno(ret); -- cgit v1.2.3-70-g09d2 From 171bf93ce11f4c9929fdce6ce63df8da2f3c4475 Mon Sep 17 00:00:00 2001 From: Mark Fasheh Date: Mon, 20 Oct 2008 15:36:47 +0200 Subject: ocfs2: Periodic quota syncing This patch creates a work queue for periodic syncing of locally cached quota information to the global quota files. We constantly queue a delayed work item, to get the periodic behavior. Signed-off-by: Mark Fasheh Acked-by: Jan Kara --- fs/ocfs2/quota.h | 5 +++ fs/ocfs2/quota_global.c | 85 +++++++++++++++++++++++++++++++++++++++++++++++++ fs/ocfs2/quota_local.c | 4 +++ fs/ocfs2/super.c | 7 ++++ 4 files changed, 101 insertions(+) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/quota.h b/fs/ocfs2/quota.h index 1f1c86311b3..e2233d51507 100644 --- a/fs/ocfs2/quota.h +++ b/fs/ocfs2/quota.h @@ -39,6 +39,7 @@ struct ocfs2_mem_dqinfo { unsigned int dqi_chunks; /* Number of chunks in local quota file */ unsigned int dqi_blocks; /* Number of blocks allocated for local quota file */ unsigned int dqi_syncms; /* How often should we sync with other nodes */ + unsigned int dqi_syncjiff; /* Precomputed dqi_syncms in jiffies */ struct list_head dqi_chunk; /* List of chunks */ struct inode *dqi_gqinode; /* Global quota file inode */ struct ocfs2_lock_res dqi_gqlock; /* Lock protecting quota information structure */ @@ -47,6 +48,7 @@ struct ocfs2_mem_dqinfo { struct buffer_head *dqi_lqi_bh; /* Buffer head with local quota file inode */ struct buffer_head *dqi_ibh; /* Buffer with information header */ struct qtree_mem_dqinfo dqi_gi; /* Info about global file */ + struct delayed_work dqi_sync_work; /* Work for syncing dquots */ }; static inline struct ocfs2_dquot *OCFS2_DQUOT(struct dquot *dquot) @@ -90,4 +92,7 @@ struct buffer_head *ocfs2_read_quota_block(struct inode *inode, extern struct dquot_operations ocfs2_quota_operations; extern struct quota_format_type ocfs2_quota_format; +int ocfs2_quota_setup(void); +void ocfs2_quota_shutdown(void); + #endif /* _OCFS2_QUOTA_H */ diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c index af8340c4536..adf53508bdb 100644 --- a/fs/ocfs2/quota_global.c +++ b/fs/ocfs2/quota_global.c @@ -1,10 +1,14 @@ /* * Implementation of operations over global quota file */ +#include #include #include #include #include +#include +#include +#include #define MLOG_MASK_PREFIX ML_QUOTA #include @@ -20,6 +24,10 @@ #include "uptodate.h" #include "quota.h" +static struct workqueue_struct *ocfs2_quota_wq = NULL; + +static void qsync_work_fn(struct work_struct *work); + static void ocfs2_global_disk2memdqb(struct dquot *dquot, void *dp) { struct ocfs2_global_disk_dqblk *d = dp; @@ -313,6 +321,7 @@ int ocfs2_global_read_info(struct super_block *sb, int type) info->dqi_bgrace = le32_to_cpu(dinfo.dqi_bgrace); info->dqi_igrace = le32_to_cpu(dinfo.dqi_igrace); oinfo->dqi_syncms = le32_to_cpu(dinfo.dqi_syncms); + oinfo->dqi_syncjiff = msecs_to_jiffies(oinfo->dqi_syncms); oinfo->dqi_gi.dqi_blocks = le32_to_cpu(dinfo.dqi_blocks); oinfo->dqi_gi.dqi_free_blk = le32_to_cpu(dinfo.dqi_free_blk); oinfo->dqi_gi.dqi_free_entry = le32_to_cpu(dinfo.dqi_free_entry); @@ -320,6 +329,10 @@ int ocfs2_global_read_info(struct super_block *sb, int type) oinfo->dqi_gi.dqi_usable_bs = sb->s_blocksize - OCFS2_QBLK_RESERVED_SPACE; oinfo->dqi_gi.dqi_qtree_depth = qtree_depth(&oinfo->dqi_gi); + INIT_DELAYED_WORK(&oinfo->dqi_sync_work, qsync_work_fn); + queue_delayed_work(ocfs2_quota_wq, &oinfo->dqi_sync_work, + oinfo->dqi_syncjiff); + out_err: mlog_exit(status); return status; @@ -519,6 +532,61 @@ out: return err; } +/* + * Functions for periodic syncing of dquots with global file + */ +static int ocfs2_sync_dquot_helper(struct dquot *dquot, unsigned long type) +{ + handle_t *handle; + struct super_block *sb = dquot->dq_sb; + struct ocfs2_mem_dqinfo *oinfo = sb_dqinfo(sb, type)->dqi_priv; + struct ocfs2_super *osb = OCFS2_SB(sb); + int status = 0; + + mlog_entry("id=%u qtype=%u type=%lu device=%s\n", dquot->dq_id, + dquot->dq_type, type, sb->s_id); + if (type != dquot->dq_type) + goto out; + status = ocfs2_lock_global_qf(oinfo, 1); + if (status < 0) + goto out; + + handle = ocfs2_start_trans(osb, OCFS2_QSYNC_CREDITS); + if (IS_ERR(handle)) { + status = PTR_ERR(handle); + mlog_errno(status); + goto out_ilock; + } + mutex_lock(&sb_dqopt(sb)->dqio_mutex); + status = ocfs2_sync_dquot(dquot); + mutex_unlock(&sb_dqopt(sb)->dqio_mutex); + if (status < 0) + mlog_errno(status); + /* We have to write local structure as well... */ + dquot_mark_dquot_dirty(dquot); + status = dquot_commit(dquot); + if (status < 0) + mlog_errno(status); + ocfs2_commit_trans(osb, handle); +out_ilock: + ocfs2_unlock_global_qf(oinfo, 1); +out: + mlog_exit(status); + return status; +} + +static void qsync_work_fn(struct work_struct *work) +{ + struct ocfs2_mem_dqinfo *oinfo = container_of(work, + struct ocfs2_mem_dqinfo, + dqi_sync_work.work); + struct super_block *sb = oinfo->dqi_gqinode->i_sb; + + dquot_scan_active(sb, ocfs2_sync_dquot_helper, oinfo->dqi_type); + queue_delayed_work(ocfs2_quota_wq, &oinfo->dqi_sync_work, + oinfo->dqi_syncjiff); +} + /* * Wrappers for generic quota functions */ @@ -917,3 +985,20 @@ struct dquot_operations ocfs2_quota_operations = { .alloc_dquot = ocfs2_alloc_dquot, .destroy_dquot = ocfs2_destroy_dquot, }; + +int ocfs2_quota_setup(void) +{ + ocfs2_quota_wq = create_workqueue("o2quot"); + if (!ocfs2_quota_wq) + return -ENOMEM; + return 0; +} + +void ocfs2_quota_shutdown(void) +{ + if (ocfs2_quota_wq) { + flush_workqueue(ocfs2_quota_wq); + destroy_workqueue(ocfs2_quota_wq); + ocfs2_quota_wq = NULL; + } +} diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c index 55c3f2f98dc..40e82b48313 100644 --- a/fs/ocfs2/quota_local.c +++ b/fs/ocfs2/quota_local.c @@ -368,6 +368,10 @@ static int ocfs2_local_free_info(struct super_block *sb, int type) int mark_clean = 1, len; int status; + /* At this point we know there are no more dquots and thus + * even if there's some sync in the pdflush queue, it won't + * find any dquots and return without doing anything */ + cancel_delayed_work_sync(&oinfo->dqi_sync_work); iput(oinfo->dqi_gqinode); ocfs2_simple_drop_lockres(OCFS2_SB(sb), &oinfo->dqi_gqlock); ocfs2_lock_res_free(&oinfo->dqi_gqlock); diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 7bb83e41581..60f1d29421a 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -1107,11 +1107,16 @@ static int __init ocfs2_init(void) mlog(ML_ERROR, "Unable to create ocfs2 debugfs root.\n"); } + status = ocfs2_quota_setup(); + if (status) + goto leave; + ocfs2_set_locking_protocol(); status = register_quota_format(&ocfs2_quota_format); leave: if (status < 0) { + ocfs2_quota_shutdown(); ocfs2_free_mem_caches(); exit_ocfs2_uptodate_cache(); } @@ -1128,6 +1133,8 @@ static void __exit ocfs2_exit(void) { mlog_entry_void(); + ocfs2_quota_shutdown(); + if (ocfs2_wq) { flush_workqueue(ocfs2_wq); destroy_workqueue(ocfs2_wq); -- cgit v1.2.3-70-g09d2 From 2205363dce7447b8e85f1ead14387664c1a98753 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 20 Oct 2008 23:50:38 +0200 Subject: ocfs2: Implement quota recovery Implement functions for recovery after a crash. Functions just read local quota file and sync info to global quota file. Signed-off-by: Jan Kara Signed-off-by: Mark Fasheh --- fs/ocfs2/journal.c | 108 +++++++++--- fs/ocfs2/journal.h | 1 + fs/ocfs2/ocfs2.h | 4 +- fs/ocfs2/quota.h | 21 +++ fs/ocfs2/quota_global.c | 1 - fs/ocfs2/quota_local.c | 425 +++++++++++++++++++++++++++++++++++++++++++++++- 6 files changed, 528 insertions(+), 32 deletions(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index 11a1178d5ee..c60242018d9 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -45,6 +45,7 @@ #include "slot_map.h" #include "super.h" #include "sysfile.h" +#include "quota.h" #include "buffer_head_io.h" @@ -52,7 +53,7 @@ DEFINE_SPINLOCK(trans_inc_lock); static int ocfs2_force_read_journal(struct inode *inode); static int ocfs2_recover_node(struct ocfs2_super *osb, - int node_num); + int node_num, int slot_num); static int __ocfs2_recovery_thread(void *arg); static int ocfs2_commit_cache(struct ocfs2_super *osb); static int ocfs2_wait_on_mount(struct ocfs2_super *osb); @@ -857,6 +858,7 @@ struct ocfs2_la_recovery_item { int lri_slot; struct ocfs2_dinode *lri_la_dinode; struct ocfs2_dinode *lri_tl_dinode; + struct ocfs2_quota_recovery *lri_qrec; }; /* Does the second half of the recovery process. By this point, the @@ -877,6 +879,7 @@ void ocfs2_complete_recovery(struct work_struct *work) struct ocfs2_super *osb = journal->j_osb; struct ocfs2_dinode *la_dinode, *tl_dinode; struct ocfs2_la_recovery_item *item, *n; + struct ocfs2_quota_recovery *qrec; LIST_HEAD(tmp_la_list); mlog_entry_void(); @@ -922,6 +925,16 @@ void ocfs2_complete_recovery(struct work_struct *work) if (ret < 0) mlog_errno(ret); + qrec = item->lri_qrec; + if (qrec) { + mlog(0, "Recovering quota files"); + ret = ocfs2_finish_quota_recovery(osb, qrec, + item->lri_slot); + if (ret < 0) + mlog_errno(ret); + /* Recovery info is already freed now */ + } + kfree(item); } @@ -935,7 +948,8 @@ void ocfs2_complete_recovery(struct work_struct *work) static void ocfs2_queue_recovery_completion(struct ocfs2_journal *journal, int slot_num, struct ocfs2_dinode *la_dinode, - struct ocfs2_dinode *tl_dinode) + struct ocfs2_dinode *tl_dinode, + struct ocfs2_quota_recovery *qrec) { struct ocfs2_la_recovery_item *item; @@ -950,6 +964,9 @@ static void ocfs2_queue_recovery_completion(struct ocfs2_journal *journal, if (tl_dinode) kfree(tl_dinode); + if (qrec) + ocfs2_free_quota_recovery(qrec); + mlog_errno(-ENOMEM); return; } @@ -958,6 +975,7 @@ static void ocfs2_queue_recovery_completion(struct ocfs2_journal *journal, item->lri_la_dinode = la_dinode; item->lri_slot = slot_num; item->lri_tl_dinode = tl_dinode; + item->lri_qrec = qrec; spin_lock(&journal->j_lock); list_add_tail(&item->lri_list, &journal->j_la_cleanups); @@ -977,6 +995,7 @@ void ocfs2_complete_mount_recovery(struct ocfs2_super *osb) ocfs2_queue_recovery_completion(journal, osb->slot_num, osb->local_alloc_copy, + NULL, NULL); ocfs2_schedule_truncate_log_flush(osb, 0); @@ -985,11 +1004,26 @@ void ocfs2_complete_mount_recovery(struct ocfs2_super *osb) } } +void ocfs2_complete_quota_recovery(struct ocfs2_super *osb) +{ + if (osb->quota_rec) { + ocfs2_queue_recovery_completion(osb->journal, + osb->slot_num, + NULL, + NULL, + osb->quota_rec); + osb->quota_rec = NULL; + } +} + static int __ocfs2_recovery_thread(void *arg) { - int status, node_num; + int status, node_num, slot_num; struct ocfs2_super *osb = arg; struct ocfs2_recovery_map *rm = osb->recovery_map; + int *rm_quota = NULL; + int rm_quota_used = 0, i; + struct ocfs2_quota_recovery *qrec; mlog_entry_void(); @@ -998,6 +1032,11 @@ static int __ocfs2_recovery_thread(void *arg) goto bail; } + rm_quota = kzalloc(osb->max_slots * sizeof(int), GFP_NOFS); + if (!rm_quota) { + status = -ENOMEM; + goto bail; + } restart: status = ocfs2_super_lock(osb, 1); if (status < 0) { @@ -1011,8 +1050,28 @@ restart: * clear it until ocfs2_recover_node() has succeeded. */ node_num = rm->rm_entries[0]; spin_unlock(&osb->osb_lock); - - status = ocfs2_recover_node(osb, node_num); + mlog(0, "checking node %d\n", node_num); + slot_num = ocfs2_node_num_to_slot(osb, node_num); + if (slot_num == -ENOENT) { + status = 0; + mlog(0, "no slot for this node, so no recovery" + "required.\n"); + goto skip_recovery; + } + mlog(0, "node %d was using slot %d\n", node_num, slot_num); + + /* It is a bit subtle with quota recovery. We cannot do it + * immediately because we have to obtain cluster locks from + * quota files and we also don't want to just skip it because + * then quota usage would be out of sync until some node takes + * the slot. So we remember which nodes need quota recovery + * and when everything else is done, we recover quotas. */ + for (i = 0; i < rm_quota_used && rm_quota[i] != slot_num; i++); + if (i == rm_quota_used) + rm_quota[rm_quota_used++] = slot_num; + + status = ocfs2_recover_node(osb, node_num, slot_num); +skip_recovery: if (!status) { ocfs2_recovery_map_clear(osb, node_num); } else { @@ -1034,13 +1093,27 @@ restart: if (status < 0) mlog_errno(status); + /* Now it is right time to recover quotas... We have to do this under + * superblock lock so that noone can start using the slot (and crash) + * before we recover it */ + for (i = 0; i < rm_quota_used; i++) { + qrec = ocfs2_begin_quota_recovery(osb, rm_quota[i]); + if (IS_ERR(qrec)) { + status = PTR_ERR(qrec); + mlog_errno(status); + continue; + } + ocfs2_queue_recovery_completion(osb->journal, rm_quota[i], + NULL, NULL, qrec); + } + ocfs2_super_unlock(osb, 1); /* We always run recovery on our own orphan dir - the dead * node(s) may have disallowd a previos inode delete. Re-processing * is therefore required. */ ocfs2_queue_recovery_completion(osb->journal, osb->slot_num, NULL, - NULL); + NULL, NULL); bail: mutex_lock(&osb->recovery_lock); @@ -1055,6 +1128,9 @@ bail: mutex_unlock(&osb->recovery_lock); + if (rm_quota) + kfree(rm_quota); + mlog_exit(status); /* no one is callint kthread_stop() for us so the kthread() api * requires that we call do_exit(). And it isn't exported, but @@ -1282,31 +1358,19 @@ done: * far less concerning. */ static int ocfs2_recover_node(struct ocfs2_super *osb, - int node_num) + int node_num, int slot_num) { int status = 0; - int slot_num; struct ocfs2_dinode *la_copy = NULL; struct ocfs2_dinode *tl_copy = NULL; - mlog_entry("(node_num=%d, osb->node_num = %d)\n", - node_num, osb->node_num); - - mlog(0, "checking node %d\n", node_num); + mlog_entry("(node_num=%d, slot_num=%d, osb->node_num = %d)\n", + node_num, slot_num, osb->node_num); /* Should not ever be called to recover ourselves -- in that * case we should've called ocfs2_journal_load instead. */ BUG_ON(osb->node_num == node_num); - slot_num = ocfs2_node_num_to_slot(osb, node_num); - if (slot_num == -ENOENT) { - status = 0; - mlog(0, "no slot for this node, so no recovery required.\n"); - goto done; - } - - mlog(0, "node %d was using slot %d\n", node_num, slot_num); - status = ocfs2_replay_journal(osb, node_num, slot_num); if (status < 0) { if (status == -EBUSY) { @@ -1342,7 +1406,7 @@ static int ocfs2_recover_node(struct ocfs2_super *osb, /* This will kfree the memory pointed to by la_copy and tl_copy */ ocfs2_queue_recovery_completion(osb->journal, slot_num, la_copy, - tl_copy); + tl_copy, NULL); status = 0; done: diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h index ee08e9c1fc1..37013bf9ce2 100644 --- a/fs/ocfs2/journal.h +++ b/fs/ocfs2/journal.h @@ -168,6 +168,7 @@ void ocfs2_recovery_thread(struct ocfs2_super *osb, int node_num); int ocfs2_mark_dead_nodes(struct ocfs2_super *osb); void ocfs2_complete_mount_recovery(struct ocfs2_super *osb); +void ocfs2_complete_quota_recovery(struct ocfs2_super *osb); static inline void ocfs2_start_checkpoint(struct ocfs2_super *osb) { diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index f04b229fc75..6b25b4aa720 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h @@ -206,6 +206,7 @@ enum ocfs2_mount_options struct ocfs2_journal; struct ocfs2_slot_info; struct ocfs2_recovery_map; +struct ocfs2_quota_recovery; struct ocfs2_super { struct task_struct *commit_task; @@ -287,10 +288,11 @@ struct ocfs2_super char *local_alloc_debug_buf; #endif - /* Next two fields are for local node slot recovery during + /* Next three fields are for local node slot recovery during * mount. */ int dirty; struct ocfs2_dinode *local_alloc_copy; + struct ocfs2_quota_recovery *quota_rec; struct ocfs2_alloc_stats alloc_stats; char dev_str[20]; /* "major,minor" of the device */ diff --git a/fs/ocfs2/quota.h b/fs/ocfs2/quota.h index e2233d51507..04872b45b99 100644 --- a/fs/ocfs2/quota.h +++ b/fs/ocfs2/quota.h @@ -33,6 +33,17 @@ struct ocfs2_dquot { s64 dq_originodes; /* Last globally synced inode usage */ }; +/* Description of one chunk to recover in memory */ +struct ocfs2_recovery_chunk { + struct list_head rc_list; /* List of chunks */ + int rc_chunk; /* Chunk number */ + unsigned long *rc_bitmap; /* Bitmap of entries to recover */ +}; + +struct ocfs2_quota_recovery { + struct list_head r_list[MAXQUOTAS]; /* List of chunks to recover */ +}; + /* In-memory structure with quota header information */ struct ocfs2_mem_dqinfo { unsigned int dqi_type; /* Quota type this structure describes */ @@ -49,6 +60,10 @@ struct ocfs2_mem_dqinfo { struct buffer_head *dqi_ibh; /* Buffer with information header */ struct qtree_mem_dqinfo dqi_gi; /* Info about global file */ struct delayed_work dqi_sync_work; /* Work for syncing dquots */ + struct ocfs2_quota_recovery *dqi_rec; /* Pointer to recovery + * information, in case we + * enable quotas on file + * needing it */ }; static inline struct ocfs2_dquot *OCFS2_DQUOT(struct dquot *dquot) @@ -67,6 +82,12 @@ extern struct kmem_cache *ocfs2_qf_chunk_cachep; extern struct qtree_fmt_operations ocfs2_global_ops; +struct ocfs2_quota_recovery *ocfs2_begin_quota_recovery( + struct ocfs2_super *osb, int slot_num); +int ocfs2_finish_quota_recovery(struct ocfs2_super *osb, + struct ocfs2_quota_recovery *rec, + int slot_num); +void ocfs2_free_quota_recovery(struct ocfs2_quota_recovery *rec); ssize_t ocfs2_quota_read(struct super_block *sb, int type, char *data, size_t len, loff_t off); ssize_t ocfs2_quota_write(struct super_block *sb, int type, diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c index adf53508bdb..49b536a2190 100644 --- a/fs/ocfs2/quota_global.c +++ b/fs/ocfs2/quota_global.c @@ -87,7 +87,6 @@ struct qtree_fmt_operations ocfs2_global_ops = { .is_id = ocfs2_global_is_id, }; - struct buffer_head *ocfs2_read_quota_block(struct inode *inode, int block, int *err) { diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c index 40e82b48313..b98562174cd 100644 --- a/fs/ocfs2/quota_local.c +++ b/fs/ocfs2/quota_local.c @@ -49,14 +49,25 @@ static unsigned int ol_quota_chunk_block(struct super_block *sb, int c) return 1 + (ol_chunk_blocks(sb) + 1) * c; } -/* Offset of the dquot structure in the quota file */ -static loff_t ol_dqblk_off(struct super_block *sb, int c, int off) +static unsigned int ol_dqblk_block(struct super_block *sb, int c, int off) +{ + int epb = ol_quota_entries_per_block(sb); + + return ol_quota_chunk_block(sb, c) + 1 + off / epb; +} + +static unsigned int ol_dqblk_block_off(struct super_block *sb, int c, int off) { int epb = ol_quota_entries_per_block(sb); - return ((ol_quota_chunk_block(sb, c) + 1 + off / epb) - << sb->s_blocksize_bits) + - (off % epb) * sizeof(struct ocfs2_local_disk_dqblk); + return (off % epb) * sizeof(struct ocfs2_local_disk_dqblk); +} + +/* Offset of the dquot structure in the quota file */ +static loff_t ol_dqblk_off(struct super_block *sb, int c, int off) +{ + return (ol_dqblk_block(sb, c, off) << sb->s_blocksize_bits) + + ol_dqblk_block_off(sb, c, off); } /* Compute block number from given offset */ @@ -253,6 +264,379 @@ static void olq_update_info(struct buffer_head *bh, void *private) spin_unlock(&dq_data_lock); } +static int ocfs2_add_recovery_chunk(struct super_block *sb, + struct ocfs2_local_disk_chunk *dchunk, + int chunk, + struct list_head *head) +{ + struct ocfs2_recovery_chunk *rc; + + rc = kmalloc(sizeof(struct ocfs2_recovery_chunk), GFP_NOFS); + if (!rc) + return -ENOMEM; + rc->rc_chunk = chunk; + rc->rc_bitmap = kmalloc(sb->s_blocksize, GFP_NOFS); + if (!rc->rc_bitmap) { + kfree(rc); + return -ENOMEM; + } + memcpy(rc->rc_bitmap, dchunk->dqc_bitmap, + (ol_chunk_entries(sb) + 7) >> 3); + list_add_tail(&rc->rc_list, head); + return 0; +} + +static void free_recovery_list(struct list_head *head) +{ + struct ocfs2_recovery_chunk *next; + struct ocfs2_recovery_chunk *rchunk; + + list_for_each_entry_safe(rchunk, next, head, rc_list) { + list_del(&rchunk->rc_list); + kfree(rchunk->rc_bitmap); + kfree(rchunk); + } +} + +void ocfs2_free_quota_recovery(struct ocfs2_quota_recovery *rec) +{ + int type; + + for (type = 0; type < MAXQUOTAS; type++) + free_recovery_list(&(rec->r_list[type])); + kfree(rec); +} + +/* Load entries in our quota file we have to recover*/ +static int ocfs2_recovery_load_quota(struct inode *lqinode, + struct ocfs2_local_disk_dqinfo *ldinfo, + int type, + struct list_head *head) +{ + struct super_block *sb = lqinode->i_sb; + struct buffer_head *hbh; + struct ocfs2_local_disk_chunk *dchunk; + int i, chunks = le32_to_cpu(ldinfo->dqi_chunks); + int status = 0; + + for (i = 0; i < chunks; i++) { + hbh = ocfs2_read_quota_block(lqinode, + ol_quota_chunk_block(sb, i), + &status); + if (!hbh) { + mlog_errno(status); + break; + } + dchunk = (struct ocfs2_local_disk_chunk *)hbh->b_data; + if (le32_to_cpu(dchunk->dqc_free) < ol_chunk_entries(sb)) + status = ocfs2_add_recovery_chunk(sb, dchunk, i, head); + brelse(hbh); + if (status < 0) + break; + } + if (status < 0) + free_recovery_list(head); + return status; +} + +static struct ocfs2_quota_recovery *ocfs2_alloc_quota_recovery(void) +{ + int type; + struct ocfs2_quota_recovery *rec; + + rec = kmalloc(sizeof(struct ocfs2_quota_recovery), GFP_NOFS); + if (!rec) + return NULL; + for (type = 0; type < MAXQUOTAS; type++) + INIT_LIST_HEAD(&(rec->r_list[type])); + return rec; +} + +/* Load information we need for quota recovery into memory */ +struct ocfs2_quota_recovery *ocfs2_begin_quota_recovery( + struct ocfs2_super *osb, + int slot_num) +{ + unsigned int feature[MAXQUOTAS] = { OCFS2_FEATURE_RO_COMPAT_USRQUOTA, + OCFS2_FEATURE_RO_COMPAT_GRPQUOTA}; + unsigned int ino[MAXQUOTAS] = { LOCAL_USER_QUOTA_SYSTEM_INODE, + LOCAL_GROUP_QUOTA_SYSTEM_INODE }; + struct super_block *sb = osb->sb; + struct ocfs2_local_disk_dqinfo *ldinfo; + struct inode *lqinode; + struct buffer_head *bh; + int type; + int status = 0; + struct ocfs2_quota_recovery *rec; + + mlog(ML_NOTICE, "Beginning quota recovery in slot %u\n", slot_num); + rec = ocfs2_alloc_quota_recovery(); + if (!rec) + return ERR_PTR(-ENOMEM); + /* First init... */ + + for (type = 0; type < MAXQUOTAS; type++) { + if (!OCFS2_HAS_RO_COMPAT_FEATURE(sb, feature[type])) + continue; + /* At this point, journal of the slot is already replayed so + * we can trust metadata and data of the quota file */ + lqinode = ocfs2_get_system_file_inode(osb, ino[type], slot_num); + if (!lqinode) { + status = -ENOENT; + goto out; + } + status = ocfs2_inode_lock_full(lqinode, NULL, 1, + OCFS2_META_LOCK_RECOVERY); + if (status < 0) { + mlog_errno(status); + goto out_put; + } + /* Now read local header */ + bh = ocfs2_read_quota_block(lqinode, 0, &status); + if (!bh) { + mlog_errno(status); + mlog(ML_ERROR, "failed to read quota file info header " + "(slot=%d type=%d)\n", slot_num, type); + goto out_lock; + } + ldinfo = (struct ocfs2_local_disk_dqinfo *)(bh->b_data + + OCFS2_LOCAL_INFO_OFF); + status = ocfs2_recovery_load_quota(lqinode, ldinfo, type, + &rec->r_list[type]); + brelse(bh); +out_lock: + ocfs2_inode_unlock(lqinode, 1); +out_put: + iput(lqinode); + if (status < 0) + break; + } +out: + if (status < 0) { + ocfs2_free_quota_recovery(rec); + rec = ERR_PTR(status); + } + return rec; +} + +/* Sync changes in local quota file into global quota file and + * reinitialize local quota file. + * The function expects local quota file to be already locked and + * dqonoff_mutex locked. */ +static int ocfs2_recover_local_quota_file(struct inode *lqinode, + int type, + struct ocfs2_quota_recovery *rec) +{ + struct super_block *sb = lqinode->i_sb; + struct ocfs2_mem_dqinfo *oinfo = sb_dqinfo(sb, type)->dqi_priv; + struct ocfs2_local_disk_chunk *dchunk; + struct ocfs2_local_disk_dqblk *dqblk; + struct dquot *dquot; + handle_t *handle; + struct buffer_head *hbh = NULL, *qbh = NULL; + int status = 0; + int bit, chunk; + struct ocfs2_recovery_chunk *rchunk, *next; + qsize_t spacechange, inodechange; + + mlog_entry("ino=%lu type=%u", (unsigned long)lqinode->i_ino, type); + + status = ocfs2_lock_global_qf(oinfo, 1); + if (status < 0) + goto out; + + list_for_each_entry_safe(rchunk, next, &(rec->r_list[type]), rc_list) { + chunk = rchunk->rc_chunk; + hbh = ocfs2_read_quota_block(lqinode, + ol_quota_chunk_block(sb, chunk), + &status); + if (!hbh) { + mlog_errno(status); + break; + } + dchunk = (struct ocfs2_local_disk_chunk *)hbh->b_data; + for_each_bit(bit, rchunk->rc_bitmap, ol_chunk_entries(sb)) { + qbh = ocfs2_read_quota_block(lqinode, + ol_dqblk_block(sb, chunk, bit), + &status); + if (!qbh) { + mlog_errno(status); + break; + } + dqblk = (struct ocfs2_local_disk_dqblk *)(qbh->b_data + + ol_dqblk_block_off(sb, chunk, bit)); + dquot = dqget(sb, le64_to_cpu(dqblk->dqb_id), type); + if (!dquot) { + status = -EIO; + mlog(ML_ERROR, "Failed to get quota structure " + "for id %u, type %d. Cannot finish quota " + "file recovery.\n", + (unsigned)le64_to_cpu(dqblk->dqb_id), + type); + goto out_put_bh; + } + handle = ocfs2_start_trans(OCFS2_SB(sb), + OCFS2_QSYNC_CREDITS); + if (IS_ERR(handle)) { + status = PTR_ERR(handle); + mlog_errno(status); + goto out_put_dquot; + } + mutex_lock(&sb_dqopt(sb)->dqio_mutex); + spin_lock(&dq_data_lock); + /* Add usage from quota entry into quota changes + * of our node. Auxiliary variables are important + * due to signedness */ + spacechange = le64_to_cpu(dqblk->dqb_spacemod); + inodechange = le64_to_cpu(dqblk->dqb_inodemod); + dquot->dq_dqb.dqb_curspace += spacechange; + dquot->dq_dqb.dqb_curinodes += inodechange; + spin_unlock(&dq_data_lock); + /* We want to drop reference held by the crashed + * node. Since we have our own reference we know + * global structure actually won't be freed. */ + status = ocfs2_global_release_dquot(dquot); + if (status < 0) { + mlog_errno(status); + goto out_commit; + } + /* Release local quota file entry */ + status = ocfs2_journal_access(handle, lqinode, + qbh, OCFS2_JOURNAL_ACCESS_WRITE); + if (status < 0) { + mlog_errno(status); + goto out_commit; + } + lock_buffer(qbh); + WARN_ON(!ocfs2_test_bit(bit, dchunk->dqc_bitmap)); + ocfs2_clear_bit(bit, dchunk->dqc_bitmap); + le32_add_cpu(&dchunk->dqc_free, 1); + unlock_buffer(qbh); + status = ocfs2_journal_dirty(handle, qbh); + if (status < 0) + mlog_errno(status); +out_commit: + mutex_unlock(&sb_dqopt(sb)->dqio_mutex); + ocfs2_commit_trans(OCFS2_SB(sb), handle); +out_put_dquot: + dqput(dquot); +out_put_bh: + brelse(qbh); + if (status < 0) + break; + } + brelse(hbh); + list_del(&rchunk->rc_list); + kfree(rchunk->rc_bitmap); + kfree(rchunk); + if (status < 0) + break; + } + ocfs2_unlock_global_qf(oinfo, 1); +out: + if (status < 0) + free_recovery_list(&(rec->r_list[type])); + mlog_exit(status); + return status; +} + +/* Recover local quota files for given node different from us */ +int ocfs2_finish_quota_recovery(struct ocfs2_super *osb, + struct ocfs2_quota_recovery *rec, + int slot_num) +{ + unsigned int ino[MAXQUOTAS] = { LOCAL_USER_QUOTA_SYSTEM_INODE, + LOCAL_GROUP_QUOTA_SYSTEM_INODE }; + struct super_block *sb = osb->sb; + struct ocfs2_local_disk_dqinfo *ldinfo; + struct buffer_head *bh; + handle_t *handle; + int type; + int status = 0; + struct inode *lqinode; + unsigned int flags; + + mlog(ML_NOTICE, "Finishing quota recovery in slot %u\n", slot_num); + mutex_lock(&sb_dqopt(sb)->dqonoff_mutex); + for (type = 0; type < MAXQUOTAS; type++) { + if (list_empty(&(rec->r_list[type]))) + continue; + mlog(0, "Recovering quota in slot %d\n", slot_num); + lqinode = ocfs2_get_system_file_inode(osb, ino[type], slot_num); + if (!lqinode) { + status = -ENOENT; + goto out; + } + status = ocfs2_inode_lock_full(lqinode, NULL, 1, + OCFS2_META_LOCK_NOQUEUE); + /* Someone else is holding the lock? Then he must be + * doing the recovery. Just skip the file... */ + if (status == -EAGAIN) { + mlog(ML_NOTICE, "skipping quota recovery for slot %d " + "because quota file is locked.\n", slot_num); + status = 0; + goto out_put; + } else if (status < 0) { + mlog_errno(status); + goto out_put; + } + /* Now read local header */ + bh = ocfs2_read_quota_block(lqinode, 0, &status); + if (!bh) { + mlog_errno(status); + mlog(ML_ERROR, "failed to read quota file info header " + "(slot=%d type=%d)\n", slot_num, type); + goto out_lock; + } + ldinfo = (struct ocfs2_local_disk_dqinfo *)(bh->b_data + + OCFS2_LOCAL_INFO_OFF); + /* Is recovery still needed? */ + flags = le32_to_cpu(ldinfo->dqi_flags); + if (!(flags & OLQF_CLEAN)) + status = ocfs2_recover_local_quota_file(lqinode, + type, + rec); + /* We don't want to mark file as clean when it is actually + * active */ + if (slot_num == osb->slot_num) + goto out_bh; + /* Mark quota file as clean if we are recovering quota file of + * some other node. */ + handle = ocfs2_start_trans(osb, 1); + if (IS_ERR(handle)) { + status = PTR_ERR(handle); + mlog_errno(status); + goto out_bh; + } + status = ocfs2_journal_access(handle, lqinode, bh, + OCFS2_JOURNAL_ACCESS_WRITE); + if (status < 0) { + mlog_errno(status); + goto out_trans; + } + lock_buffer(bh); + ldinfo->dqi_flags = cpu_to_le32(flags | OLQF_CLEAN); + unlock_buffer(bh); + status = ocfs2_journal_dirty(handle, bh); + if (status < 0) + mlog_errno(status); +out_trans: + ocfs2_commit_trans(osb, handle); +out_bh: + brelse(bh); +out_lock: + ocfs2_inode_unlock(lqinode, 1); +out_put: + iput(lqinode); + if (status < 0) + break; + } +out: + mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex); + kfree(rec); + return status; +} + /* Read information header from quota file */ static int ocfs2_local_read_info(struct super_block *sb, int type) { @@ -262,6 +646,7 @@ static int ocfs2_local_read_info(struct super_block *sb, int type) struct inode *lqinode = sb_dqopt(sb)->files[type]; int status; struct buffer_head *bh = NULL; + struct ocfs2_quota_recovery *rec; int locked = 0; info->dqi_maxblimit = 0x7fffffffffffffffLL; @@ -275,6 +660,7 @@ static int ocfs2_local_read_info(struct super_block *sb, int type) info->dqi_priv = oinfo; oinfo->dqi_type = type; INIT_LIST_HEAD(&oinfo->dqi_chunk); + oinfo->dqi_rec = NULL; oinfo->dqi_lqi_bh = NULL; oinfo->dqi_ibh = NULL; @@ -305,10 +691,27 @@ static int ocfs2_local_read_info(struct super_block *sb, int type) oinfo->dqi_ibh = bh; /* We crashed when using local quota file? */ - if (!(info->dqi_flags & OLQF_CLEAN)) - goto out_err; /* So far we just bail out. Later we should resync here */ + if (!(info->dqi_flags & OLQF_CLEAN)) { + rec = OCFS2_SB(sb)->quota_rec; + if (!rec) { + rec = ocfs2_alloc_quota_recovery(); + if (!rec) { + status = -ENOMEM; + mlog_errno(status); + goto out_err; + } + OCFS2_SB(sb)->quota_rec = rec; + } - status = ocfs2_load_local_quota_bitmaps(sb_dqopt(sb)->files[type], + status = ocfs2_recovery_load_quota(lqinode, ldinfo, type, + &rec->r_list[type]); + if (status < 0) { + mlog_errno(status); + goto out_err; + } + } + + status = ocfs2_load_local_quota_bitmaps(lqinode, ldinfo, &oinfo->dqi_chunk); if (status < 0) { @@ -394,6 +797,12 @@ static int ocfs2_local_free_info(struct super_block *sb, int type) } ocfs2_release_local_quota_bitmaps(&oinfo->dqi_chunk); + /* dqonoff_mutex protects us against racing with recovery thread... */ + if (oinfo->dqi_rec) { + ocfs2_free_quota_recovery(oinfo->dqi_rec); + mark_clean = 0; + } + if (!mark_clean) goto out; -- cgit v1.2.3-70-g09d2 From 19ece546a418997226bd91552fbc41abcb05cea6 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 21 Aug 2008 20:13:17 +0200 Subject: ocfs2: Enable quota accounting on mount, disable on umount Enable quota usage tracking on mount and disable it on umount. Also add support for quota on and quota off quotactls and usrquota and grpquota mount options. Add quota features among supported ones. Signed-off-by: Jan Kara Signed-off-by: Mark Fasheh --- fs/ocfs2/journal.c | 20 ++++- fs/ocfs2/ocfs2.h | 3 + fs/ocfs2/ocfs2_fs.h | 4 +- fs/ocfs2/super.c | 222 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 245 insertions(+), 4 deletions(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index c60242018d9..302f1144a70 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -56,7 +56,7 @@ static int ocfs2_recover_node(struct ocfs2_super *osb, int node_num, int slot_num); static int __ocfs2_recovery_thread(void *arg); static int ocfs2_commit_cache(struct ocfs2_super *osb); -static int ocfs2_wait_on_mount(struct ocfs2_super *osb); +static int __ocfs2_wait_on_mount(struct ocfs2_super *osb, int quota); static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb, int dirty, int replayed); static int ocfs2_trylock_journal(struct ocfs2_super *osb, @@ -65,6 +65,17 @@ static int ocfs2_recover_orphans(struct ocfs2_super *osb, int slot); static int ocfs2_commit_thread(void *arg); +static inline int ocfs2_wait_on_mount(struct ocfs2_super *osb) +{ + return __ocfs2_wait_on_mount(osb, 0); +} + +static inline int ocfs2_wait_on_quotas(struct ocfs2_super *osb) +{ + return __ocfs2_wait_on_mount(osb, 1); +} + + /* * The recovery_list is a simple linked list of node numbers to recover. @@ -895,6 +906,8 @@ void ocfs2_complete_recovery(struct work_struct *work) mlog(0, "Complete recovery for slot %d\n", item->lri_slot); + ocfs2_wait_on_quotas(osb); + la_dinode = item->lri_la_dinode; if (la_dinode) { mlog(0, "Clean up local alloc %llu\n", @@ -1701,13 +1714,14 @@ static int ocfs2_recover_orphans(struct ocfs2_super *osb, return ret; } -static int ocfs2_wait_on_mount(struct ocfs2_super *osb) +static int __ocfs2_wait_on_mount(struct ocfs2_super *osb, int quota) { /* This check is good because ocfs2 will wait on our recovery * thread before changing it to something other than MOUNTED * or DISABLED. */ wait_event(osb->osb_mount_event, - atomic_read(&osb->vol_state) == VOLUME_MOUNTED || + (!quota && atomic_read(&osb->vol_state) == VOLUME_MOUNTED) || + atomic_read(&osb->vol_state) == VOLUME_MOUNTED_QUOTAS || atomic_read(&osb->vol_state) == VOLUME_DISABLED); /* If there's an error on mount, then we may never get to the diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index 6b25b4aa720..5c777988042 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h @@ -161,6 +161,7 @@ enum ocfs2_vol_state { VOLUME_INIT = 0, VOLUME_MOUNTED, + VOLUME_MOUNTED_QUOTAS, VOLUME_DISMOUNTED, VOLUME_DISABLED }; @@ -196,6 +197,8 @@ enum ocfs2_mount_options OCFS2_MOUNT_NOUSERXATTR = 1 << 6, /* No user xattr */ OCFS2_MOUNT_INODE64 = 1 << 7, /* Allow inode numbers > 2^32 */ OCFS2_MOUNT_POSIX_ACL = 1 << 8, /* POSIX access control lists */ + OCFS2_MOUNT_USRQUOTA = 1 << 9, /* We support user quotas */ + OCFS2_MOUNT_GRPQUOTA = 1 << 10, /* We support group quotas */ }; #define OCFS2_OSB_SOFT_RO 0x0001 diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h index 0a5ac790a62..359732e18e8 100644 --- a/fs/ocfs2/ocfs2_fs.h +++ b/fs/ocfs2/ocfs2_fs.h @@ -94,7 +94,9 @@ | OCFS2_FEATURE_INCOMPAT_EXTENDED_SLOT_MAP \ | OCFS2_FEATURE_INCOMPAT_USERSPACE_STACK \ | OCFS2_FEATURE_INCOMPAT_XATTR) -#define OCFS2_FEATURE_RO_COMPAT_SUPP (OCFS2_FEATURE_RO_COMPAT_UNWRITTEN) +#define OCFS2_FEATURE_RO_COMPAT_SUPP (OCFS2_FEATURE_RO_COMPAT_UNWRITTEN \ + | OCFS2_FEATURE_RO_COMPAT_USRQUOTA \ + | OCFS2_FEATURE_RO_COMPAT_GRPQUOTA) /* * Heartbeat-only devices are missing journals and other files. The diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 60f1d29421a..2eb657c3e7a 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -41,6 +41,7 @@ #include #include #include +#include #define MLOG_MASK_PREFIX ML_SUPER #include @@ -127,6 +128,9 @@ static int ocfs2_get_sector(struct super_block *sb, static void ocfs2_write_super(struct super_block *sb); static struct inode *ocfs2_alloc_inode(struct super_block *sb); static void ocfs2_destroy_inode(struct inode *inode); +static int ocfs2_susp_quotas(struct ocfs2_super *osb, int unsuspend); +static int ocfs2_enable_quotas(struct ocfs2_super *osb); +static void ocfs2_disable_quotas(struct ocfs2_super *osb); static const struct super_operations ocfs2_sops = { .statfs = ocfs2_statfs, @@ -165,6 +169,8 @@ enum { Opt_inode64, Opt_acl, Opt_noacl, + Opt_usrquota, + Opt_grpquota, Opt_err, }; @@ -189,6 +195,8 @@ static const match_table_t tokens = { {Opt_inode64, "inode64"}, {Opt_acl, "acl"}, {Opt_noacl, "noacl"}, + {Opt_usrquota, "usrquota"}, + {Opt_grpquota, "grpquota"}, {Opt_err, NULL} }; @@ -452,6 +460,12 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data) /* We're going to/from readonly mode. */ if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY)) { + /* Disable quota accounting before remounting RO */ + if (*flags & MS_RDONLY) { + ret = ocfs2_susp_quotas(osb, 0); + if (ret < 0) + goto out; + } /* Lock here so the check of HARD_RO and the potential * setting of SOFT_RO is atomic. */ spin_lock(&osb->osb_lock); @@ -487,6 +501,21 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data) } unlock_osb: spin_unlock(&osb->osb_lock); + /* Enable quota accounting after remounting RW */ + if (!ret && !(*flags & MS_RDONLY)) { + if (sb_any_quota_suspended(sb)) + ret = ocfs2_susp_quotas(osb, 1); + else + ret = ocfs2_enable_quotas(osb); + if (ret < 0) { + /* Return back changes... */ + spin_lock(&osb->osb_lock); + sb->s_flags |= MS_RDONLY; + osb->osb_flags |= OCFS2_OSB_SOFT_RO; + spin_unlock(&osb->osb_lock); + goto out; + } + } } if (!ret) { @@ -647,6 +676,131 @@ static int ocfs2_verify_userspace_stack(struct ocfs2_super *osb, return 0; } +static int ocfs2_susp_quotas(struct ocfs2_super *osb, int unsuspend) +{ + int type; + struct super_block *sb = osb->sb; + unsigned int feature[MAXQUOTAS] = { OCFS2_FEATURE_RO_COMPAT_USRQUOTA, + OCFS2_FEATURE_RO_COMPAT_GRPQUOTA}; + int status = 0; + + for (type = 0; type < MAXQUOTAS; type++) { + if (!OCFS2_HAS_RO_COMPAT_FEATURE(sb, feature[type])) + continue; + if (unsuspend) + status = vfs_quota_enable( + sb_dqopt(sb)->files[type], + type, QFMT_OCFS2, + DQUOT_SUSPENDED); + else + status = vfs_quota_disable(sb, type, + DQUOT_SUSPENDED); + if (status < 0) + break; + } + if (status < 0) + mlog(ML_ERROR, "Failed to suspend/unsuspend quotas on " + "remount (error = %d).\n", status); + return status; +} + +static int ocfs2_enable_quotas(struct ocfs2_super *osb) +{ + struct inode *inode[MAXQUOTAS] = { NULL, NULL }; + struct super_block *sb = osb->sb; + unsigned int feature[MAXQUOTAS] = { OCFS2_FEATURE_RO_COMPAT_USRQUOTA, + OCFS2_FEATURE_RO_COMPAT_GRPQUOTA}; + unsigned int ino[MAXQUOTAS] = { LOCAL_USER_QUOTA_SYSTEM_INODE, + LOCAL_GROUP_QUOTA_SYSTEM_INODE }; + int status; + int type; + + sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE | DQUOT_NEGATIVE_USAGE; + for (type = 0; type < MAXQUOTAS; type++) { + if (!OCFS2_HAS_RO_COMPAT_FEATURE(sb, feature[type])) + continue; + inode[type] = ocfs2_get_system_file_inode(osb, ino[type], + osb->slot_num); + if (!inode[type]) { + status = -ENOENT; + goto out_quota_off; + } + status = vfs_quota_enable(inode[type], type, QFMT_OCFS2, + DQUOT_USAGE_ENABLED); + if (status < 0) + goto out_quota_off; + } + + for (type = 0; type < MAXQUOTAS; type++) + iput(inode[type]); + return 0; +out_quota_off: + ocfs2_disable_quotas(osb); + for (type = 0; type < MAXQUOTAS; type++) + iput(inode[type]); + mlog_errno(status); + return status; +} + +static void ocfs2_disable_quotas(struct ocfs2_super *osb) +{ + int type; + struct inode *inode; + struct super_block *sb = osb->sb; + + /* We mostly ignore errors in this function because there's not much + * we can do when we see them */ + for (type = 0; type < MAXQUOTAS; type++) { + if (!sb_has_quota_loaded(sb, type)) + continue; + inode = igrab(sb->s_dquot.files[type]); + /* Turn off quotas. This will remove all dquot structures from + * memory and so they will be automatically synced to global + * quota files */ + vfs_quota_disable(sb, type, DQUOT_USAGE_ENABLED | + DQUOT_LIMITS_ENABLED); + if (!inode) + continue; + iput(inode); + } +} + +/* Handle quota on quotactl */ +static int ocfs2_quota_on(struct super_block *sb, int type, int format_id, + char *path, int remount) +{ + unsigned int feature[MAXQUOTAS] = { OCFS2_FEATURE_RO_COMPAT_USRQUOTA, + OCFS2_FEATURE_RO_COMPAT_GRPQUOTA}; + + if (!OCFS2_HAS_RO_COMPAT_FEATURE(sb, feature[type])) + return -EINVAL; + + if (remount) + return 0; /* Just ignore it has been handled in + * ocfs2_remount() */ + return vfs_quota_enable(sb_dqopt(sb)->files[type], type, + format_id, DQUOT_LIMITS_ENABLED); +} + +/* Handle quota off quotactl */ +static int ocfs2_quota_off(struct super_block *sb, int type, int remount) +{ + if (remount) + return 0; /* Ignore now and handle later in + * ocfs2_remount() */ + return vfs_quota_disable(sb, type, DQUOT_LIMITS_ENABLED); +} + +static struct quotactl_ops ocfs2_quotactl_ops = { + .quota_on = ocfs2_quota_on, + .quota_off = ocfs2_quota_off, + .quota_sync = vfs_quota_sync, + .get_info = vfs_get_dqinfo, + .set_info = vfs_set_dqinfo, + .get_dqblk = vfs_get_dqblk, + .set_dqblk = vfs_set_dqblk, +}; + static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) { struct dentry *root; @@ -689,6 +843,22 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) osb->osb_commit_interval = parsed_options.commit_interval; osb->local_alloc_default_bits = ocfs2_megabytes_to_clusters(sb, parsed_options.localalloc_opt); osb->local_alloc_bits = osb->local_alloc_default_bits; + if (osb->s_mount_opt & OCFS2_MOUNT_USRQUOTA && + !OCFS2_HAS_RO_COMPAT_FEATURE(sb, + OCFS2_FEATURE_RO_COMPAT_USRQUOTA)) { + status = -EINVAL; + mlog(ML_ERROR, "User quotas were requested, but this " + "filesystem does not have the feature enabled.\n"); + goto read_super_error; + } + if (osb->s_mount_opt & OCFS2_MOUNT_GRPQUOTA && + !OCFS2_HAS_RO_COMPAT_FEATURE(sb, + OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)) { + status = -EINVAL; + mlog(ML_ERROR, "Group quotas were requested, but this " + "filesystem does not have the feature enabled.\n"); + goto read_super_error; + } status = ocfs2_verify_userspace_stack(osb, &parsed_options); if (status) @@ -793,6 +963,28 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) atomic_set(&osb->vol_state, VOLUME_MOUNTED); wake_up(&osb->osb_mount_event); + /* Now we can initialize quotas because we can afford to wait + * for cluster locks recovery now. That also means that truncation + * log recovery can happen but that waits for proper quota setup */ + if (!(sb->s_flags & MS_RDONLY)) { + status = ocfs2_enable_quotas(osb); + if (status < 0) { + /* We have to err-out specially here because + * s_root is already set */ + mlog_errno(status); + atomic_set(&osb->vol_state, VOLUME_DISABLED); + wake_up(&osb->osb_mount_event); + mlog_exit(status); + return status; + } + } + + ocfs2_complete_quota_recovery(osb); + + /* Now we wake up again for processes waiting for quotas */ + atomic_set(&osb->vol_state, VOLUME_MOUNTED_QUOTAS); + wake_up(&osb->osb_mount_event); + mlog_exit(status); return status; @@ -980,6 +1172,28 @@ static int ocfs2_parse_options(struct super_block *sb, case Opt_inode64: mopt->mount_opt |= OCFS2_MOUNT_INODE64; break; + case Opt_usrquota: + /* We check only on remount, otherwise features + * aren't yet initialized. */ + if (is_remount && !OCFS2_HAS_RO_COMPAT_FEATURE(sb, + OCFS2_FEATURE_RO_COMPAT_USRQUOTA)) { + mlog(ML_ERROR, "User quota requested but " + "filesystem feature is not set\n"); + status = 0; + goto bail; + } + mopt->mount_opt |= OCFS2_MOUNT_USRQUOTA; + break; + case Opt_grpquota: + if (is_remount && !OCFS2_HAS_RO_COMPAT_FEATURE(sb, + OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)) { + mlog(ML_ERROR, "Group quota requested but " + "filesystem feature is not set\n"); + status = 0; + goto bail; + } + mopt->mount_opt |= OCFS2_MOUNT_GRPQUOTA; + break; #ifdef CONFIG_OCFS2_FS_POSIX_ACL case Opt_acl: mopt->mount_opt |= OCFS2_MOUNT_POSIX_ACL; @@ -1056,6 +1270,10 @@ static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt) if (osb->osb_cluster_stack[0]) seq_printf(s, ",cluster_stack=%.*s", OCFS2_STACK_LABEL_LEN, osb->osb_cluster_stack); + if (opts & OCFS2_MOUNT_USRQUOTA) + seq_printf(s, ",usrquota"); + if (opts & OCFS2_MOUNT_GRPQUOTA) + seq_printf(s, ",grpquota"); if (opts & OCFS2_MOUNT_NOUSERXATTR) seq_printf(s, ",nouser_xattr"); @@ -1394,6 +1612,8 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err) osb = OCFS2_SB(sb); BUG_ON(!osb); + ocfs2_disable_quotas(osb); + ocfs2_shutdown_local_alloc(osb); ocfs2_truncate_log_shutdown(osb); @@ -1504,6 +1724,8 @@ static int ocfs2_initialize_super(struct super_block *sb, sb->s_fs_info = osb; sb->s_op = &ocfs2_sops; sb->s_export_op = &ocfs2_export_ops; + sb->s_qcop = &ocfs2_quotactl_ops; + sb->dq_op = &ocfs2_quota_operations; sb->s_xattr = ocfs2_xattr_handlers; sb->s_time_gran = 1; sb->s_flags |= MS_NOATIME; -- cgit v1.2.3-70-g09d2 From b86c86fa1feb50221dc16071ae5b8a4acf3bd32c Mon Sep 17 00:00:00 2001 From: Mark Fasheh Date: Tue, 18 Nov 2008 17:16:47 -0800 Subject: ocfs2: Use BH_JBDPrivateStart instead of BH_Unshadow This is safer. We no longer have to worry about tracking changes to jbd_state_bits. Signed-off-by: Mark Fasheh --- fs/ocfs2/buffer_head_io.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c index 0e9eed0c223..15c8e6deee2 100644 --- a/fs/ocfs2/buffer_head_io.c +++ b/fs/ocfs2/buffer_head_io.c @@ -42,11 +42,10 @@ /* * Bits on bh->b_state used by ocfs2. * - * These MUST be after the JBD2 bits. Currently BH_Unshadow is the last - * JBD2 bit. + * These MUST be after the JBD2 bits. Hence, we use BH_JBDPrivateStart. */ enum ocfs2_state_bits { - BH_NeedsValidate = BH_Unshadow + 1, + BH_NeedsValidate = BH_JBDPrivateStart, }; /* Expand the magic b_state functions */ -- cgit v1.2.3-70-g09d2 From 57a09a7b3d9445a17c78d544f1e49d4d7d61705a Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 25 Nov 2008 15:31:26 +0100 Subject: ocfs2: Add missing initialization Add missing variable initialization to ocfs2_dquot_drop_slow(). Signed-off-by: Jan Kara Signed-off-by: Mark Fasheh --- fs/ocfs2/quota_global.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c index 49b536a2190..10ecb33298d 100644 --- a/fs/ocfs2/quota_global.c +++ b/fs/ocfs2/quota_global.c @@ -870,7 +870,7 @@ out: static int ocfs2_dquot_drop_slow(struct inode *inode) { - int status; + int status = 0; int cnt; int got_lock[MAXQUOTAS] = {0, 0}; handle_t *handle; -- cgit v1.2.3-70-g09d2 From 85eb8b73d66530bb7b931789ae7a5ec9744eed34 Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Tue, 25 Nov 2008 15:31:27 +0100 Subject: ocfs2: Fix ocfs2_read_quota_block() error handling. ocfs2_bread() has become ocfs2_read_virt_blocks(), with a prototype to match ocfs2_read_blocks(). The quota code, converting from ocfs2_bread(), wraps the call to ocfs2_read_virt_blocks() in ocfs2_read_quota_block(). Unfortunately, the prototype of ocfs2_read_quota_block() matches the old prototype of ocfs2_bread(). The problem is that ocfs2_bread() returned the buffer head, and callers assumed that a NULL pointer was indicative of error. It wasn't. This is why ocfs2_bread() took an int*err argument as well. The new prototype of ocfs2_read_virt_blocks() avoids this error handling confusion. Let's change ocfs2_read_quota_block() to match. Signed-off-by: Joel Becker Acked-by: Jan Kara Signed-off-by: Mark Fasheh --- fs/ocfs2/dlmglue.c | 6 ++--- fs/ocfs2/quota.h | 4 ++-- fs/ocfs2/quota_global.c | 34 +++++++++++++++----------- fs/ocfs2/quota_local.c | 64 +++++++++++++++++++++++++++---------------------- 4 files changed, 60 insertions(+), 48 deletions(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 058aa86490a..b1c75911d8a 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -3519,7 +3519,7 @@ static int ocfs2_refresh_qinfo(struct ocfs2_mem_dqinfo *oinfo) oinfo->dqi_gi.dqi_type); struct ocfs2_lock_res *lockres = &oinfo->dqi_gqlock; struct ocfs2_qinfo_lvb *lvb = ocfs2_dlm_lvb(&lockres->l_lksb); - struct buffer_head *bh; + struct buffer_head *bh = NULL; struct ocfs2_global_disk_dqinfo *gdinfo; int status = 0; @@ -3532,8 +3532,8 @@ static int ocfs2_refresh_qinfo(struct ocfs2_mem_dqinfo *oinfo) oinfo->dqi_gi.dqi_free_entry = be32_to_cpu(lvb->lvb_free_entry); } else { - bh = ocfs2_read_quota_block(oinfo->dqi_gqinode, 0, &status); - if (!bh) { + status = ocfs2_read_quota_block(oinfo->dqi_gqinode, 0, &bh); + if (status) { mlog_errno(status); goto bail; } diff --git a/fs/ocfs2/quota.h b/fs/ocfs2/quota.h index 04872b45b99..7365e2e0870 100644 --- a/fs/ocfs2/quota.h +++ b/fs/ocfs2/quota.h @@ -107,8 +107,8 @@ static inline int ocfs2_global_release_dquot(struct dquot *dquot) int ocfs2_lock_global_qf(struct ocfs2_mem_dqinfo *oinfo, int ex); void ocfs2_unlock_global_qf(struct ocfs2_mem_dqinfo *oinfo, int ex); -struct buffer_head *ocfs2_read_quota_block(struct inode *inode, - int block, int *err); +int ocfs2_read_quota_block(struct inode *inode, u64 v_block, + struct buffer_head **bh); extern struct dquot_operations ocfs2_quota_operations; extern struct quota_format_type ocfs2_quota_format; diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c index 10ecb33298d..2bdcddd3f1c 100644 --- a/fs/ocfs2/quota_global.c +++ b/fs/ocfs2/quota_global.c @@ -87,16 +87,21 @@ struct qtree_fmt_operations ocfs2_global_ops = { .is_id = ocfs2_global_is_id, }; -struct buffer_head *ocfs2_read_quota_block(struct inode *inode, - int block, int *err) +int ocfs2_read_quota_block(struct inode *inode, u64 v_block, + struct buffer_head **bh) { - struct buffer_head *tmp = NULL; + int rc = 0; + struct buffer_head *tmp = *bh; - *err = ocfs2_read_virt_blocks(inode, block, 1, &tmp, 0, NULL); - if (*err) - mlog_errno(*err); + rc = ocfs2_read_virt_blocks(inode, v_block, 1, &tmp, 0, NULL); + if (rc) + mlog_errno(rc); + + /* If ocfs2_read_virt_blocks() got us a new bh, pass it up. */ + if (!rc && !*bh) + *bh = tmp; - return tmp; + return rc; } static struct buffer_head *ocfs2_get_quota_block(struct inode *inode, @@ -143,8 +148,9 @@ ssize_t ocfs2_quota_read(struct super_block *sb, int type, char *data, toread = len; while (toread > 0) { tocopy = min((size_t)(sb->s_blocksize - offset), toread); - bh = ocfs2_read_quota_block(gqinode, blk, &err); - if (!bh) { + bh = NULL; + err = ocfs2_read_quota_block(gqinode, blk, &bh); + if (err) { mlog_errno(err); return err; } @@ -169,7 +175,7 @@ ssize_t ocfs2_quota_write(struct super_block *sb, int type, int offset = off & (sb->s_blocksize - 1); sector_t blk = off >> sb->s_blocksize_bits; int err = 0, new = 0; - struct buffer_head *bh; + struct buffer_head *bh = NULL; handle_t *handle = journal_current_handle(); if (!handle) { @@ -200,13 +206,13 @@ ssize_t ocfs2_quota_write(struct super_block *sb, int type, /* Not rewriting whole block? */ if ((offset || len < sb->s_blocksize - OCFS2_QBLK_RESERVED_SPACE) && !new) { - bh = ocfs2_read_quota_block(gqinode, blk, &err); - if (!bh) { + err = ocfs2_read_quota_block(gqinode, blk, &bh); + if (err) { mlog_errno(err); return err; } err = ocfs2_journal_access(handle, gqinode, bh, - OCFS2_JOURNAL_ACCESS_WRITE); + OCFS2_JOURNAL_ACCESS_WRITE); } else { bh = ocfs2_get_quota_block(gqinode, blk, &err); if (!bh) { @@ -214,7 +220,7 @@ ssize_t ocfs2_quota_write(struct super_block *sb, int type, return err; } err = ocfs2_journal_access(handle, gqinode, bh, - OCFS2_JOURNAL_ACCESS_CREATE); + OCFS2_JOURNAL_ACCESS_CREATE); } if (err < 0) { brelse(bh); diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c index b98562174cd..7053664f66a 100644 --- a/fs/ocfs2/quota_local.c +++ b/fs/ocfs2/quota_local.c @@ -139,15 +139,15 @@ static int ocfs2_local_check_quota_file(struct super_block *sb, int type) unsigned int gversions[MAXQUOTAS] = OCFS2_GLOBAL_QVERSIONS; unsigned int ino[MAXQUOTAS] = { USER_QUOTA_SYSTEM_INODE, GROUP_QUOTA_SYSTEM_INODE }; - struct buffer_head *bh; + struct buffer_head *bh = NULL; struct inode *linode = sb_dqopt(sb)->files[type]; struct inode *ginode = NULL; struct ocfs2_disk_dqheader *dqhead; int status, ret = 0; /* First check whether we understand local quota file */ - bh = ocfs2_read_quota_block(linode, 0, &status); - if (!bh) { + status = ocfs2_read_quota_block(linode, 0, &bh); + if (status) { mlog_errno(status); mlog(ML_ERROR, "failed to read quota file header (type=%d)\n", type); @@ -178,8 +178,8 @@ static int ocfs2_local_check_quota_file(struct super_block *sb, int type) goto out_err; } /* Since the header is read only, we don't care about locking */ - bh = ocfs2_read_quota_block(ginode, 0, &status); - if (!bh) { + status = ocfs2_read_quota_block(ginode, 0, &bh); + if (status) { mlog_errno(status); mlog(ML_ERROR, "failed to read global quota file header " "(type=%d)\n", type); @@ -235,10 +235,11 @@ static int ocfs2_load_local_quota_bitmaps(struct inode *inode, return -ENOMEM; } newchunk->qc_num = i; - newchunk->qc_headerbh = ocfs2_read_quota_block(inode, + newchunk->qc_headerbh = NULL; + status = ocfs2_read_quota_block(inode, ol_quota_chunk_block(inode->i_sb, i), - &status); - if (!newchunk->qc_headerbh) { + &newchunk->qc_headerbh); + if (status) { mlog_errno(status); kmem_cache_free(ocfs2_qf_chunk_cachep, newchunk); ocfs2_release_local_quota_bitmaps(head); @@ -320,10 +321,11 @@ static int ocfs2_recovery_load_quota(struct inode *lqinode, int status = 0; for (i = 0; i < chunks; i++) { - hbh = ocfs2_read_quota_block(lqinode, - ol_quota_chunk_block(sb, i), - &status); - if (!hbh) { + hbh = NULL; + status = ocfs2_read_quota_block(lqinode, + ol_quota_chunk_block(sb, i), + &hbh); + if (status) { mlog_errno(status); break; } @@ -392,8 +394,9 @@ struct ocfs2_quota_recovery *ocfs2_begin_quota_recovery( goto out_put; } /* Now read local header */ - bh = ocfs2_read_quota_block(lqinode, 0, &status); - if (!bh) { + bh = NULL; + status = ocfs2_read_quota_block(lqinode, 0, &bh); + if (status) { mlog_errno(status); mlog(ML_ERROR, "failed to read quota file info header " "(slot=%d type=%d)\n", slot_num, type); @@ -447,19 +450,21 @@ static int ocfs2_recover_local_quota_file(struct inode *lqinode, list_for_each_entry_safe(rchunk, next, &(rec->r_list[type]), rc_list) { chunk = rchunk->rc_chunk; - hbh = ocfs2_read_quota_block(lqinode, - ol_quota_chunk_block(sb, chunk), - &status); - if (!hbh) { + hbh = NULL; + status = ocfs2_read_quota_block(lqinode, + ol_quota_chunk_block(sb, chunk), + &hbh); + if (status) { mlog_errno(status); break; } dchunk = (struct ocfs2_local_disk_chunk *)hbh->b_data; for_each_bit(bit, rchunk->rc_bitmap, ol_chunk_entries(sb)) { - qbh = ocfs2_read_quota_block(lqinode, + qbh = NULL; + status = ocfs2_read_quota_block(lqinode, ol_dqblk_block(sb, chunk, bit), - &status); - if (!qbh) { + &qbh); + if (status) { mlog_errno(status); break; } @@ -581,8 +586,9 @@ int ocfs2_finish_quota_recovery(struct ocfs2_super *osb, goto out_put; } /* Now read local header */ - bh = ocfs2_read_quota_block(lqinode, 0, &status); - if (!bh) { + bh = NULL; + status = ocfs2_read_quota_block(lqinode, 0, &bh); + if (status) { mlog_errno(status); mlog(ML_ERROR, "failed to read quota file info header " "(slot=%d type=%d)\n", slot_num, type); @@ -676,8 +682,8 @@ static int ocfs2_local_read_info(struct super_block *sb, int type) locked = 1; /* Now read local header */ - bh = ocfs2_read_quota_block(lqinode, 0, &status); - if (!bh) { + status = ocfs2_read_quota_block(lqinode, 0, &bh); + if (status) { mlog_errno(status); mlog(ML_ERROR, "failed to read quota file info header " "(type=%d)\n", type); @@ -850,13 +856,13 @@ static int ocfs2_local_write_dquot(struct dquot *dquot) { struct super_block *sb = dquot->dq_sb; struct ocfs2_dquot *od = OCFS2_DQUOT(dquot); - struct buffer_head *bh; + struct buffer_head *bh = NULL; int status; - bh = ocfs2_read_quota_block(sb_dqopt(sb)->files[dquot->dq_type], + status = ocfs2_read_quota_block(sb_dqopt(sb)->files[dquot->dq_type], ol_dqblk_file_block(sb, od->dq_local_off), - &status); - if (!bh) { + &bh); + if (status) { mlog_errno(status); goto out; } -- cgit v1.2.3-70-g09d2 From af09e51b6810d3408db1c0e956b3b0687b0e3723 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 25 Nov 2008 15:31:28 +0100 Subject: ocfs2: Fix oops when extending quota files We have to mark buffer as uptodate before calling ocfs2_journal_access() and ocfs2_set_buffer_uptodate() does not do this for us. Signed-off-by: Jan Kara Signed-off-by: Mark Fasheh --- fs/ocfs2/quota_global.c | 28 ++++++++++++---------------- 1 file changed, 12 insertions(+), 16 deletions(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c index 2bdcddd3f1c..8fceb0c49b3 100644 --- a/fs/ocfs2/quota_global.c +++ b/fs/ocfs2/quota_global.c @@ -174,7 +174,7 @@ ssize_t ocfs2_quota_write(struct super_block *sb, int type, struct inode *gqinode = oinfo->dqi_gqinode; int offset = off & (sb->s_blocksize - 1); sector_t blk = off >> sb->s_blocksize_bits; - int err = 0, new = 0; + int err = 0, new = 0, ja_type; struct buffer_head *bh = NULL; handle_t *handle = journal_current_handle(); @@ -207,32 +207,28 @@ ssize_t ocfs2_quota_write(struct super_block *sb, int type, if ((offset || len < sb->s_blocksize - OCFS2_QBLK_RESERVED_SPACE) && !new) { err = ocfs2_read_quota_block(gqinode, blk, &bh); - if (err) { - mlog_errno(err); - return err; - } - err = ocfs2_journal_access(handle, gqinode, bh, - OCFS2_JOURNAL_ACCESS_WRITE); + ja_type = OCFS2_JOURNAL_ACCESS_WRITE; } else { bh = ocfs2_get_quota_block(gqinode, blk, &err); - if (!bh) { - mlog_errno(err); - return err; - } - err = ocfs2_journal_access(handle, gqinode, bh, - OCFS2_JOURNAL_ACCESS_CREATE); + ja_type = OCFS2_JOURNAL_ACCESS_CREATE; } - if (err < 0) { - brelse(bh); - goto out; + if (err) { + mlog_errno(err); + return err; } lock_buffer(bh); if (new) memset(bh->b_data, 0, sb->s_blocksize); memcpy(bh->b_data + offset, data, len); flush_dcache_page(bh->b_page); + set_buffer_uptodate(bh); unlock_buffer(bh); ocfs2_set_buffer_uptodate(gqinode, bh); + err = ocfs2_journal_access(handle, gqinode, bh, ja_type); + if (err < 0) { + brelse(bh); + goto out; + } err = ocfs2_journal_dirty(handle, bh); brelse(bh); if (err < 0) -- cgit v1.2.3-70-g09d2 From 53a3604610e92a5344cf8003c19975583e71a598 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 25 Nov 2008 15:31:29 +0100 Subject: ocfs2: Make ocfs2_get_quota_block() consistent with ocfs2_read_quota_block() Make function return error status and not buffer pointer so that it's consistent with ocfs2_read_quota_block(). Signed-off-by: Jan Kara Signed-off-by: Mark Fasheh --- fs/ocfs2/quota_global.c | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c index 8fceb0c49b3..e527ec6e013 100644 --- a/fs/ocfs2/quota_global.c +++ b/fs/ocfs2/quota_global.c @@ -104,26 +104,25 @@ int ocfs2_read_quota_block(struct inode *inode, u64 v_block, return rc; } -static struct buffer_head *ocfs2_get_quota_block(struct inode *inode, - int block, int *err) +static int ocfs2_get_quota_block(struct inode *inode, int block, + struct buffer_head **bh) { u64 pblock, pcount; - struct buffer_head *bh; + int err; down_read(&OCFS2_I(inode)->ip_alloc_sem); - *err = ocfs2_extent_map_get_blocks(inode, block, &pblock, &pcount, - NULL); + err = ocfs2_extent_map_get_blocks(inode, block, &pblock, &pcount, NULL); up_read(&OCFS2_I(inode)->ip_alloc_sem); - if (*err) { - mlog_errno(*err); - return NULL; + if (err) { + mlog_errno(err); + return err; } - bh = sb_getblk(inode->i_sb, pblock); - if (!bh) { - *err = -EIO; - mlog_errno(*err); + *bh = sb_getblk(inode->i_sb, pblock); + if (!*bh) { + err = -EIO; + mlog_errno(err); } - return bh; + return err;; } /* Read data from global quotafile - avoid pagecache and such because we cannot @@ -209,7 +208,7 @@ ssize_t ocfs2_quota_write(struct super_block *sb, int type, err = ocfs2_read_quota_block(gqinode, blk, &bh); ja_type = OCFS2_JOURNAL_ACCESS_WRITE; } else { - bh = ocfs2_get_quota_block(gqinode, blk, &err); + err = ocfs2_get_quota_block(gqinode, blk, &bh); ja_type = OCFS2_JOURNAL_ACCESS_CREATE; } if (err) { -- cgit v1.2.3-70-g09d2 From 9a2f3866c825c67c3a5806799cdc93fb7517f0c4 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 25 Nov 2008 15:31:30 +0100 Subject: ocfs2: Fix build warnings (64-bit types vs long long) fs/ocfs2/quota_local.c: In function 'olq_set_dquot': fs/ocfs2/quota_local.c:844: warning: format '%lld' expects type 'long long int', but argument 7 has type '__le64' fs/ocfs2/quota_local.c:844: warning: format '%lld' expects type 'long long int', but argument 8 has type '__le64' fs/ocfs2/quota_local.c:844: warning: format '%lld' expects type 'long long int', but argument 7 has type '__le64' fs/ocfs2/quota_local.c:844: warning: format '%lld' expects type 'long long int', but argument 8 has type '__le64' fs/ocfs2/quota_local.c:844: warning: format '%lld' expects type 'long long int', but argument 7 has type '__le64' fs/ocfs2/quota_local.c:844: warning: format '%lld' expects type 'long long int', but argument 8 has type '__le64' fs/ocfs2/quota_global.c: In function '__ocfs2_sync_dquot': fs/ocfs2/quota_global.c:457: warning: format '%lld' expects type 'long long int', but argument 8 has type 's64' fs/ocfs2/quota_global.c:457: warning: format '%lld' expects type 'long long int', but argument 10 has type 's64' fs/ocfs2/quota_global.c:457: warning: format '%lld' expects type 'long long int', but argument 8 has type 's64' fs/ocfs2/quota_global.c:457: warning: format '%lld' expects type 'long long int', but argument 10 has type 's64' fs/ocfs2/quota_global.c:457: warning: format '%lld' expects type 'long long int', but argument 8 has type 's64' fs/ocfs2/quota_global.c:457: warning: format '%lld' expects type 'long long int', but argument 10 has type 's64' Signed-off-by: Jan Kara Signed-off-by: Mark Fasheh --- fs/ocfs2/quota_global.c | 6 +++--- fs/ocfs2/quota_local.c | 3 ++- 2 files changed, 5 insertions(+), 4 deletions(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c index e527ec6e013..054d52bd825 100644 --- a/fs/ocfs2/quota_global.c +++ b/fs/ocfs2/quota_global.c @@ -457,9 +457,9 @@ int __ocfs2_sync_dquot(struct dquot *dquot, int freeing) olditime = dquot->dq_dqb.dqb_itime; oldbtime = dquot->dq_dqb.dqb_btime; ocfs2_global_disk2memdqb(dquot, &dqblk); - mlog(0, "Syncing global dquot %d space %lld+%lld, inodes %lld+%lld\n", - dquot->dq_id, dquot->dq_dqb.dqb_curspace, spacechange, - dquot->dq_dqb.dqb_curinodes, inodechange); + mlog(0, "Syncing global dquot %u space %lld+%lld, inodes %lld+%lld\n", + dquot->dq_id, dquot->dq_dqb.dqb_curspace, (long long)spacechange, + dquot->dq_dqb.dqb_curinodes, (long long)inodechange); if (!test_bit(DQ_LASTSET_B + QIF_SPACE_B, &dquot->dq_flags)) dquot->dq_dqb.dqb_curspace += spacechange; if (!test_bit(DQ_LASTSET_B + QIF_INODES_B, &dquot->dq_flags)) diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c index 7053664f66a..b5ddb22e627 100644 --- a/fs/ocfs2/quota_local.c +++ b/fs/ocfs2/quota_local.c @@ -848,7 +848,8 @@ static void olq_set_dquot(struct buffer_head *bh, void *private) od->dq_originodes); spin_unlock(&dq_data_lock); mlog(0, "Writing local dquot %u space %lld inodes %lld\n", - od->dq_dquot.dq_id, dqblk->dqb_spacemod, dqblk->dqb_inodemod); + od->dq_dquot.dq_id, (long long)le64_to_cpu(dqblk->dqb_spacemod), + (long long)le64_to_cpu(dqblk->dqb_inodemod)); } /* Write dquot to local quota file */ -- cgit v1.2.3-70-g09d2 From e35ff98f7c37b7bc901b4b90a66a0287565e456c Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Wed, 26 Nov 2008 16:20:19 -0800 Subject: ocfs2: fix indendation in ocfs2_dquot_drop_slow Signed-off-by: Tao Ma Signed-off-by: Mark Fasheh --- fs/ocfs2/quota_global.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c index 054d52bd825..a10faebe88a 100644 --- a/fs/ocfs2/quota_global.c +++ b/fs/ocfs2/quota_global.c @@ -893,7 +893,7 @@ static int ocfs2_dquot_drop_slow(struct inode *inode) if (IS_ERR(handle)) { status = PTR_ERR(handle); mlog_errno(status); - goto out; + goto out; } dquot_drop(inode); ocfs2_commit_trans(OCFS2_SB(sb), handle); -- cgit v1.2.3-70-g09d2 From df32b3343aa11e0c7f54783594b24321d17d376f Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Tue, 25 Nov 2008 07:21:36 +0800 Subject: ocfs2/quota: sparse fixes for quota Fix 2 minor things in quota. They are both found by sparse check. 1. an endian bug in ocfs2_local_quota_add_chunk. 2. change olq_alloc_dquot to static. Signed-off-by: Tao Ma Signed-off-by: Mark Fasheh --- fs/ocfs2/quota_local.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c index b5ddb22e627..d451b715aef 100644 --- a/fs/ocfs2/quota_local.c +++ b/fs/ocfs2/quota_local.c @@ -988,7 +988,7 @@ static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk( goto out_trans; } lock_buffer(bh); - dchunk->dqc_free = ol_quota_entries_per_block(sb); + dchunk->dqc_free = cpu_to_le32(ol_quota_entries_per_block(sb)); memset(dchunk->dqc_bitmap, 0, sb->s_blocksize - sizeof(struct ocfs2_local_disk_chunk) - OCFS2_QBLK_RESERVED_SPACE); @@ -1110,7 +1110,7 @@ out: return ERR_PTR(status); } -void olq_alloc_dquot(struct buffer_head *bh, void *private) +static void olq_alloc_dquot(struct buffer_head *bh, void *private) { int *offset = private; struct ocfs2_local_disk_chunk *dchunk; -- cgit v1.2.3-70-g09d2 From 548b0f22bb7497ba76f91627b99f9fed53a91704 Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Mon, 24 Nov 2008 19:32:13 -0800 Subject: ocfs2: Dirty the entire bucket in ocfs2_bucket_value_truncate() ocfs2_bucket_value_truncate() currently takes the first bh of the bucket, and magically plays around with the value bh - even though the bucket structure in the calling function already has it. In addition, future code wants to always dirty the entire bucket when it is changed. So let's pass the entire bucket into this function, skip any block reads (we have them), and add the access/dirty logic. ocfs2_xattr_update_value_size() is no longer necessary, as it only did one thing other than journal access/dirty. Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/xattr.c | 74 +++++++++++++++++++++----------------------------------- 1 file changed, 28 insertions(+), 46 deletions(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 3b9634c7d29..6db68a23a29 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -4580,31 +4580,6 @@ out: return ret; } -static int ocfs2_xattr_value_update_size(struct inode *inode, - handle_t *handle, - struct buffer_head *xe_bh, - struct ocfs2_xattr_entry *xe, - u64 new_size) -{ - int ret; - - ret = ocfs2_journal_access(handle, inode, xe_bh, - OCFS2_JOURNAL_ACCESS_WRITE); - if (ret < 0) { - mlog_errno(ret); - goto out; - } - - xe->xe_value_size = cpu_to_le64(new_size); - - ret = ocfs2_journal_dirty(handle, xe_bh); - if (ret < 0) - mlog_errno(ret); - -out: - return ret; -} - /* * Truncate the specified xe_off entry in xattr bucket. * bucket is indicated by header_bh and len is the new length. @@ -4613,7 +4588,7 @@ out: * Copy the new updated xe and xe_value_root to new_xe and new_xv if needed. */ static int ocfs2_xattr_bucket_value_truncate(struct inode *inode, - struct buffer_head *header_bh, + struct ocfs2_xattr_bucket *bucket, int xe_off, int len, struct ocfs2_xattr_set_ctxt *ctxt) @@ -4623,8 +4598,7 @@ static int ocfs2_xattr_bucket_value_truncate(struct inode *inode, struct buffer_head *value_bh = NULL; struct ocfs2_xattr_value_root *xv; struct ocfs2_xattr_entry *xe; - struct ocfs2_xattr_header *xh = - (struct ocfs2_xattr_header *)header_bh->b_data; + struct ocfs2_xattr_header *xh = bucket_xh(bucket); size_t blocksize = inode->i_sb->s_blocksize; xe = &xh->xh_entries[xe_off]; @@ -4638,34 +4612,41 @@ static int ocfs2_xattr_bucket_value_truncate(struct inode *inode, /* We don't allow ocfs2_xattr_value to be stored in different block. */ BUG_ON(value_blk != (offset + OCFS2_XATTR_ROOT_SIZE - 1) / blocksize); - value_blk += header_bh->b_blocknr; - ret = ocfs2_read_block(inode, value_blk, &value_bh, NULL); - if (ret) { - mlog_errno(ret); - goto out; - } + value_bh = bucket->bu_bhs[value_blk]; + BUG_ON(!value_bh); xv = (struct ocfs2_xattr_value_root *) (value_bh->b_data + offset % blocksize); - mlog(0, "truncate %u in xattr bucket %llu to %d bytes.\n", - xe_off, (unsigned long long)header_bh->b_blocknr, len); - ret = ocfs2_xattr_value_truncate(inode, value_bh, xv, len, ctxt); + ret = ocfs2_xattr_bucket_journal_access(ctxt->handle, bucket, + OCFS2_JOURNAL_ACCESS_WRITE); if (ret) { mlog_errno(ret); goto out; } - ret = ocfs2_xattr_value_update_size(inode, ctxt->handle, - header_bh, xe, len); + /* + * From here on out we have to dirty the bucket. The generic + * value calls only modify one of the bucket's bhs, but we need + * to send the bucket at once. So if they error, they *could* have + * modified something. We have to assume they did, and dirty + * the whole bucket. This leaves us in a consistent state. + */ + mlog(0, "truncate %u in xattr bucket %llu to %d bytes.\n", + xe_off, (unsigned long long)bucket_blkno(bucket), len); + ret = ocfs2_xattr_value_truncate(inode, value_bh, xv, len, ctxt); if (ret) { mlog_errno(ret); - goto out; + goto out_dirty; } + xe->xe_value_size = cpu_to_le64(len); + +out_dirty: + ocfs2_xattr_bucket_journal_dirty(ctxt->handle, bucket); + out: - brelse(value_bh); return ret; } @@ -4681,7 +4662,7 @@ static int ocfs2_xattr_bucket_value_truncate_xs(struct inode *inode, BUG_ON(!xs->bucket->bu_bhs[0] || !xe || ocfs2_xattr_is_local(xe)); offset = xe - xh->xh_entries; - ret = ocfs2_xattr_bucket_value_truncate(inode, xs->bucket->bu_bhs[0], + ret = ocfs2_xattr_bucket_value_truncate(inode, xs->bucket, offset, len, ctxt); if (ret) mlog_errno(ret); @@ -5107,11 +5088,13 @@ static int ocfs2_delete_xattr_in_bucket(struct inode *inode, struct ocfs2_xattr_entry *xe; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); struct ocfs2_xattr_set_ctxt ctxt = {NULL, NULL,}; + int credits = ocfs2_remove_extent_credits(osb->sb) + + ocfs2_blocks_per_xattr_bucket(inode->i_sb); + ocfs2_init_dealloc_ctxt(&ctxt.dealloc); - ctxt.handle = ocfs2_start_trans(osb, - ocfs2_remove_extent_credits(osb->sb)); + ctxt.handle = ocfs2_start_trans(osb, credits); if (IS_ERR(ctxt.handle)) { ret = PTR_ERR(ctxt.handle); mlog_errno(ret); @@ -5123,8 +5106,7 @@ static int ocfs2_delete_xattr_in_bucket(struct inode *inode, if (ocfs2_xattr_is_local(xe)) continue; - ret = ocfs2_xattr_bucket_value_truncate(inode, - bucket->bu_bhs[0], + ret = ocfs2_xattr_bucket_value_truncate(inode, bucket, i, 0, &ctxt); if (ret) { mlog_errno(ret); -- cgit v1.2.3-70-g09d2 From 88c3b0622acf82c7c86fbc066e81e15edc7c1685 Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Thu, 11 Dec 2008 08:54:11 +0800 Subject: ocfs2: Narrow the transaction for deleting xattrs from a bucket. We move the transaction into the loop because in ocfs2_remove_extent, we will double the credits in function ocfs2_extend_rotate_transaction. So if we have a large loop number, we will soon waste much the journal space. Signed-off-by: Tao Ma Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/xattr.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 6db68a23a29..df53a2ce2de 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -5094,30 +5094,30 @@ static int ocfs2_delete_xattr_in_bucket(struct inode *inode, ocfs2_init_dealloc_ctxt(&ctxt.dealloc); - ctxt.handle = ocfs2_start_trans(osb, credits); - if (IS_ERR(ctxt.handle)) { - ret = PTR_ERR(ctxt.handle); - mlog_errno(ret); - goto out; - } - for (i = 0; i < le16_to_cpu(xh->xh_count); i++) { xe = &xh->xh_entries[i]; if (ocfs2_xattr_is_local(xe)) continue; + ctxt.handle = ocfs2_start_trans(osb, credits); + if (IS_ERR(ctxt.handle)) { + ret = PTR_ERR(ctxt.handle); + mlog_errno(ret); + break; + } + ret = ocfs2_xattr_bucket_value_truncate(inode, bucket, i, 0, &ctxt); + + ocfs2_commit_trans(osb, ctxt.handle); if (ret) { mlog_errno(ret); break; } } - ret = ocfs2_commit_trans(osb, ctxt.handle); ocfs2_schedule_truncate_log_flush(osb, 1); ocfs2_run_deallocs(osb, &ctxt.dealloc); -out: return ret; } -- cgit v1.2.3-70-g09d2 From 92de109ade7999084fb0bfcc65d603252504e0d0 Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Tue, 25 Nov 2008 17:06:40 -0800 Subject: ocfs2: Dirty the entire first bucket in ocfs2_extend_xattr_bucket() ocfs2_extend_xattr_bucket() takes an extent of buckets and shifts some of them down to make room for a new xattr. It is passed the first bh of the first bucket, because that is where we store the number of buckets in the extent. However, future code wants to always dirty the entire bucket when it is changed. So let's pass the entire bucket into this function, skip any block reads (we have them), and add the access/dirty logic. We also can skip passing in the target bucket bh - we only need its block number. Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/xattr.c | 85 ++++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 55 insertions(+), 30 deletions(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index df53a2ce2de..ed1e9596756 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -3905,7 +3905,7 @@ static int ocfs2_cp_xattr_bucket(struct inode *inode, mlog_errno(ret); goto out; } - + ret = ocfs2_read_xattr_bucket(s_bucket, s_blkno); if (ret) goto out; @@ -4232,37 +4232,45 @@ leave: } /* - * Extend a new xattr bucket and move xattrs to the end one by one until - * We meet with start_bh. Only move half of the xattrs to the bucket after it. + * We are given an extent. 'first' is the bucket at the very front of + * the extent. The extent has space for an additional bucket past + * bucket_xh(first)->xh_num_buckets. 'target_blkno' is the block number + * of the target bucket. We wish to shift every bucket past the target + * down one, filling in that additional space. When we get back to the + * target, we split the target between itself and the now-empty bucket + * at target+1 (aka, target_blkno + blks_per_bucket). */ static int ocfs2_extend_xattr_bucket(struct inode *inode, handle_t *handle, - struct buffer_head *first_bh, - struct buffer_head *start_bh, + struct ocfs2_xattr_bucket *first, + u64 target_blk, u32 num_clusters) { int ret, credits; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); - u64 start_blk = start_bh->b_blocknr, end_blk; - u32 num_buckets = num_clusters * ocfs2_xattr_buckets_per_cluster(osb); - struct ocfs2_xattr_header *first_xh = - (struct ocfs2_xattr_header *)first_bh->b_data; - u16 bucket = le16_to_cpu(first_xh->xh_num_buckets); + u64 end_blk; + u16 new_bucket = le16_to_cpu(bucket_xh(first)->xh_num_buckets); mlog(0, "extend xattr bucket in %llu, xattr extend rec starting " - "from %llu, len = %u\n", (unsigned long long)start_blk, - (unsigned long long)first_bh->b_blocknr, num_clusters); + "from %llu, len = %u\n", (unsigned long long)target_blk, + (unsigned long long)bucket_blkno(first), num_clusters); - BUG_ON(bucket >= num_buckets); + /* The extent must have room for an additional bucket */ + BUG_ON(new_bucket >= + (num_clusters * ocfs2_xattr_buckets_per_cluster(osb))); - end_blk = first_bh->b_blocknr + (bucket - 1) * blk_per_bucket; + /* end_blk points to the last existing bucket */ + end_blk = bucket_blkno(first) + ((new_bucket - 1) * blk_per_bucket); /* - * We will touch all the buckets after the start_bh(include it). - * Then we add one more bucket. + * end_blk is the start of the last existing bucket. + * Thus, (end_blk - target_blk) covers the target bucket and + * every bucket after it up to, but not including, the last + * existing bucket. Then we add the last existing bucket, the + * new bucket, and the first bucket (3 * blk_per_bucket). */ - credits = end_blk - start_blk + 3 * blk_per_bucket + 1 + + credits = (end_blk - target_blk) + (3 * blk_per_bucket) + handle->h_buffer_credits; ret = ocfs2_extend_trans(handle, credits); if (ret) { @@ -4270,14 +4278,14 @@ static int ocfs2_extend_xattr_bucket(struct inode *inode, goto out; } - ret = ocfs2_journal_access(handle, inode, first_bh, - OCFS2_JOURNAL_ACCESS_WRITE); + ret = ocfs2_xattr_bucket_journal_access(handle, first, + OCFS2_JOURNAL_ACCESS_WRITE); if (ret) { mlog_errno(ret); goto out; } - while (end_blk != start_blk) { + while (end_blk != target_blk) { ret = ocfs2_cp_xattr_bucket(inode, handle, end_blk, end_blk + blk_per_bucket, 0); if (ret) @@ -4285,12 +4293,12 @@ static int ocfs2_extend_xattr_bucket(struct inode *inode, end_blk -= blk_per_bucket; } - /* Move half of the xattr in start_blk to the next bucket. */ - ret = ocfs2_divide_xattr_bucket(inode, handle, start_blk, - start_blk + blk_per_bucket, NULL, 0); + /* Move half of the xattr in target_blkno to the next bucket. */ + ret = ocfs2_divide_xattr_bucket(inode, handle, target_blk, + target_blk + blk_per_bucket, NULL, 0); - le16_add_cpu(&first_xh->xh_num_buckets, 1); - ocfs2_journal_dirty(handle, first_bh); + le16_add_cpu(&bucket_xh(first)->xh_num_buckets, 1); + ocfs2_xattr_bucket_journal_dirty(handle, first); out: return ret; @@ -4324,10 +4332,19 @@ static int ocfs2_add_new_xattr_bucket(struct inode *inode, int ret, num_buckets, extend = 1; u64 p_blkno; u32 e_cpos, num_clusters; + /* The bucket at the front of the extent */ + struct ocfs2_xattr_bucket *first; mlog(0, "Add new xattr bucket starting form %llu\n", (unsigned long long)header_bh->b_blocknr); + first = ocfs2_xattr_bucket_new(inode); + if (!first) { + ret = -ENOMEM; + mlog_errno(ret); + goto out; + } + /* * Add refrence for header_bh here because it may be * changed in ocfs2_add_new_xattr_cluster and we need @@ -4367,17 +4384,25 @@ static int ocfs2_add_new_xattr_bucket(struct inode *inode, } } - if (extend) + if (extend) { + /* These bucket reads should be cached */ + ret = ocfs2_read_xattr_bucket(first, first_bh->b_blocknr); + if (ret) { + mlog_errno(ret); + goto out; + } ret = ocfs2_extend_xattr_bucket(inode, ctxt->handle, - first_bh, - header_bh, + first, header_bh->b_blocknr, num_clusters); - if (ret) - mlog_errno(ret); + if (ret) + mlog_errno(ret); + } + out: brelse(first_bh); brelse(header_bh); + ocfs2_xattr_bucket_free(first); return ret; } -- cgit v1.2.3-70-g09d2 From 15d609293d1954465a4788b9b182214323c6a2a1 Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Tue, 25 Nov 2008 18:36:42 -0800 Subject: ocfs2: Dirty the entire first bucket in ocfs2_cp_xattr_cluster(). ocfs2_cp_xattr_cluster() takes the last bucket of a full extent and copies it over to a new extent. It then updates the headers of both extents to reflect the new state. It is passed the first bh of the first bucket in order to update that first extent's bucket count. It reads and dirties the first bh of the new extent for the same reason. However, future code wants to always dirty the entire bucket when it is changed. So it is changed to read the entire bucket it is updating for both extents. Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/xattr.c | 80 +++++++++++++++++++++++++++++++++----------------------- 1 file changed, 48 insertions(+), 32 deletions(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index ed1e9596756..4dba3475882 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -3936,9 +3936,10 @@ out: } /* - * Copy one xattr cluster from src_blk to to_blk. - * The to_blk will become the first bucket header of the cluster, so its - * xh_num_buckets will be initialized as the bucket num in the cluster. + * src_blk points to the last cluster of an existing extent. to_blk + * points to a newly allocated extent. We copy the cluster over to the + * new extent, initializing its xh_num_buckets. The old extent's + * xh_num_buckets shrinks by the same amount. */ static int ocfs2_cp_xattr_cluster(struct inode *inode, handle_t *handle, @@ -3950,27 +3951,42 @@ static int ocfs2_cp_xattr_cluster(struct inode *inode, int i, ret, credits; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); int bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1); + int blks_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); int num_buckets = ocfs2_xattr_buckets_per_cluster(osb); - struct buffer_head *bh = NULL; - struct ocfs2_xattr_header *xh; - u64 to_blk_start = to_blk; + struct ocfs2_xattr_bucket *old_first, *new_first; mlog(0, "cp xattrs from cluster %llu to %llu\n", (unsigned long long)src_blk, (unsigned long long)to_blk); + /* The first bucket of the original extent */ + old_first = ocfs2_xattr_bucket_new(inode); + /* The first bucket of the new extent */ + new_first = ocfs2_xattr_bucket_new(inode); + if (!old_first || !new_first) { + ret = -ENOMEM; + mlog_errno(ret); + goto out; + } + + ret = ocfs2_read_xattr_bucket(old_first, first_bh->b_blocknr); + if (ret) { + mlog_errno(ret); + goto out; + } + /* - * We need to update the new cluster and 1 more for the update of - * the 1st bucket of the previous extent rec. + * We need to update the first bucket of the old extent and the + * entire first cluster of the new extent. */ - credits = bpc + 1 + handle->h_buffer_credits; + credits = blks_per_bucket + bpc + handle->h_buffer_credits; ret = ocfs2_extend_trans(handle, credits); if (ret) { mlog_errno(ret); goto out; } - ret = ocfs2_journal_access(handle, inode, first_bh, - OCFS2_JOURNAL_ACCESS_WRITE); + ret = ocfs2_xattr_bucket_journal_access(handle, old_first, + OCFS2_JOURNAL_ACCESS_WRITE); if (ret) { mlog_errno(ret); goto out; @@ -3978,45 +3994,45 @@ static int ocfs2_cp_xattr_cluster(struct inode *inode, for (i = 0; i < num_buckets; i++) { ret = ocfs2_cp_xattr_bucket(inode, handle, - src_blk, to_blk, 1); + src_blk + (i * blks_per_bucket), + to_blk + (i * blks_per_bucket), + 1); if (ret) { mlog_errno(ret); goto out; } - - src_blk += ocfs2_blocks_per_xattr_bucket(inode->i_sb); - to_blk += ocfs2_blocks_per_xattr_bucket(inode->i_sb); } - /* update the old bucket header. */ - xh = (struct ocfs2_xattr_header *)first_bh->b_data; - le16_add_cpu(&xh->xh_num_buckets, -num_buckets); - - ocfs2_journal_dirty(handle, first_bh); - - /* update the new bucket header. */ - ret = ocfs2_read_block(inode, to_blk_start, &bh, NULL); - if (ret < 0) { + /* + * Get the new bucket ready before we dirty anything + * (This actually shouldn't fail, because we already dirtied + * it once in ocfs2_cp_xattr_bucket()). + */ + ret = ocfs2_read_xattr_bucket(new_first, to_blk); + if (ret) { mlog_errno(ret); goto out; } - - ret = ocfs2_journal_access(handle, inode, bh, - OCFS2_JOURNAL_ACCESS_WRITE); + ret = ocfs2_xattr_bucket_journal_access(handle, new_first, + OCFS2_JOURNAL_ACCESS_WRITE); if (ret) { mlog_errno(ret); goto out; } - xh = (struct ocfs2_xattr_header *)bh->b_data; - xh->xh_num_buckets = cpu_to_le16(num_buckets); + /* Now update the headers */ + le16_add_cpu(&bucket_xh(old_first)->xh_num_buckets, -num_buckets); + ocfs2_xattr_bucket_journal_dirty(handle, old_first); - ocfs2_journal_dirty(handle, bh); + bucket_xh(new_first)->xh_num_buckets = cpu_to_le16(num_buckets); + ocfs2_xattr_bucket_journal_dirty(handle, new_first); if (first_hash) - *first_hash = le32_to_cpu(xh->xh_entries[0].xe_name_hash); + *first_hash = le32_to_cpu(bucket_xh(new_first)->xh_entries[0].xe_name_hash); + out: - brelse(bh); + ocfs2_xattr_bucket_free(new_first); + ocfs2_xattr_bucket_free(old_first); return ret; } -- cgit v1.2.3-70-g09d2 From 2b656c1d6fc5ba7791a360766780a212faed5705 Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Tue, 25 Nov 2008 19:00:15 -0800 Subject: ocfs2: Explain t_is_new in ocfs2_cp_xattr_cluster(). I was unsure of the JOURNAL_ACCESS parameters in ocfs2_cp_xattr_cluster(). They're based on the function argument 't_is_new', but I couldn't quite figure out how t_is_new mapped to allocation. ocfs2_cp_xattr_cluster() actually overwrites the target, regardless of t_is_new. Well, I just figured it out. So I'm adding a big fat comment for those who come after me. ocfs2_divide_xattr_cluster() has the same behavior. Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/xattr.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 4dba3475882..5efcf4e85d7 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -3747,6 +3747,11 @@ static int ocfs2_divide_xattr_bucket(struct inode *inode, goto out; } + /* + * Hey, if we're overwriting t_bucket, what difference does + * ACCESS_CREATE vs ACCESS_WRITE make? See the comment in the + * same part of ocfs2_cp_xattr_bucket(). + */ ret = ocfs2_xattr_bucket_journal_access(handle, t_bucket, new_bucket_head ? OCFS2_JOURNAL_ACCESS_CREATE : @@ -3918,6 +3923,18 @@ static int ocfs2_cp_xattr_bucket(struct inode *inode, if (ret) goto out; + /* + * Hey, if we're overwriting t_bucket, what difference does + * ACCESS_CREATE vs ACCESS_WRITE make? Well, if we allocated a new + * cluster to fill, we came here from ocfs2_cp_xattr_cluster(), and + * it is really new - ACCESS_CREATE is required. But we also + * might have moved data out of t_bucket before extending back + * into it. ocfs2_add_new_xattr_bucket() can do this - its call + * to ocfs2_add_new_xattr_cluster() may have created a new extent + * and copied out the end of the old extent. Then it re-extends + * the old extent back to create space for new xattrs. That's + * how we get here, and the bucket isn't really new. + */ ret = ocfs2_xattr_bucket_journal_access(handle, t_bucket, t_is_new ? OCFS2_JOURNAL_ACCESS_CREATE : -- cgit v1.2.3-70-g09d2 From b5c03e746959bb005b987e9d8511df46680c3daa Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Tue, 25 Nov 2008 19:58:16 -0800 Subject: ocfs2: Use ocfs2_cp_xattr_bucket() in ocfs2_mv_xattr_bucket_cross_cluster(). The buffer copy loop of ocfs2_mv_xattr_bucket_cross_cluster() actually looks a lot like ocfs2_cp_xattr_bucket(). Let's just use that instead. We also use bucket operations to update the buckets at the start of each extent. Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/xattr.c | 169 ++++++++++++++++++++++++++++++++++--------------------- 1 file changed, 104 insertions(+), 65 deletions(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 5efcf4e85d7..5be99666f02 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -170,6 +170,11 @@ static int ocfs2_xattr_set_entry_index_block(struct inode *inode, static int ocfs2_delete_xattr_index_block(struct inode *inode, struct buffer_head *xb_bh); +static int ocfs2_cp_xattr_bucket(struct inode *inode, + handle_t *handle, + u64 s_blkno, + u64 t_blkno, + int t_is_new); static inline u16 ocfs2_xattr_buckets_per_cluster(struct ocfs2_super *osb) { @@ -3526,13 +3531,21 @@ out: } /* - * Move half nums of the xattr bucket in the previous cluster to this new - * cluster. We only touch the last cluster of the previous extend record. + * prev_blkno points to the start of an existing extent. new_blkno + * points to a newly allocated extent. Because we know each of our + * clusters contains more than bucket, we can easily split one cluster + * at a bucket boundary. So we take the last cluster of the existing + * extent and split it down the middle. We move the last half of the + * buckets in the last cluster of the existing extent over to the new + * extent. + * + * first_bh is the buffer at prev_blkno so we can update the existing + * extent's bucket count. header_bh is the bucket were we were hoping + * to insert our xattr. If the bucket move places the target in the new + * extent, we'll update first_bh and header_bh after modifying the old + * extent. * - * first_bh is the first buffer_head of a series of bucket in the same - * extent rec and header_bh is the header of one bucket in this cluster. - * They will be updated if we move the data header_bh contains to the new - * cluster. first_hash will be set as the 1st xe's name_hash of the new cluster. + * first_hash will be set as the 1st xe's name_hash in the new extent. */ static int ocfs2_mv_xattr_bucket_cross_cluster(struct inode *inode, handle_t *handle, @@ -3545,105 +3558,131 @@ static int ocfs2_mv_xattr_bucket_cross_cluster(struct inode *inode, { int i, ret, credits; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); + int blks_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); int bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1); int num_buckets = ocfs2_xattr_buckets_per_cluster(osb); - int blocksize = inode->i_sb->s_blocksize; - struct buffer_head *old_bh, *new_bh, *prev_bh, *new_first_bh = NULL; - struct ocfs2_xattr_header *new_xh; + int to_move = num_buckets / 2; + u64 last_cluster_blkno, src_blkno; struct ocfs2_xattr_header *xh = (struct ocfs2_xattr_header *)((*first_bh)->b_data); + struct ocfs2_xattr_bucket *old_first, *new_first; BUG_ON(le16_to_cpu(xh->xh_num_buckets) < num_buckets); BUG_ON(OCFS2_XATTR_BUCKET_SIZE == osb->s_clustersize); - prev_bh = *first_bh; - get_bh(prev_bh); - xh = (struct ocfs2_xattr_header *)prev_bh->b_data; - - prev_blkno += (num_clusters - 1) * bpc + bpc / 2; + last_cluster_blkno = prev_blkno + ((num_clusters - 1) * bpc); + src_blkno = last_cluster_blkno + (to_move * blks_per_bucket); mlog(0, "move half of xattrs in cluster %llu to %llu\n", (unsigned long long)prev_blkno, (unsigned long long)new_blkno); + /* The first bucket of the original extent */ + old_first = ocfs2_xattr_bucket_new(inode); + /* The first bucket of the new extent */ + new_first = ocfs2_xattr_bucket_new(inode); + if (!old_first || !new_first) { + ret = -ENOMEM; + mlog_errno(ret); + goto out; + } + + ret = ocfs2_read_xattr_bucket(old_first, prev_blkno); + if (ret) { + mlog_errno(ret); + goto out; + } + /* - * We need to update the 1st half of the new cluster and - * 1 more for the update of the 1st bucket of the previous - * extent record. + * We need to update the 1st half of the new extent, and we + * need to update the first bucket of the old extent. */ - credits = bpc / 2 + 1 + handle->h_buffer_credits; + credits = ((to_move + 1) * blks_per_bucket) + handle->h_buffer_credits; ret = ocfs2_extend_trans(handle, credits); if (ret) { mlog_errno(ret); goto out; } - ret = ocfs2_journal_access(handle, inode, prev_bh, - OCFS2_JOURNAL_ACCESS_WRITE); + ret = ocfs2_xattr_bucket_journal_access(handle, old_first, + OCFS2_JOURNAL_ACCESS_WRITE); if (ret) { mlog_errno(ret); goto out; } - for (i = 0; i < bpc / 2; i++, prev_blkno++, new_blkno++) { - old_bh = new_bh = NULL; - new_bh = sb_getblk(inode->i_sb, new_blkno); - if (!new_bh) { - ret = -EIO; + for (i = 0; i < to_move; i++) { + ret = ocfs2_cp_xattr_bucket(inode, handle, + src_blkno + (i * blks_per_bucket), + new_blkno + (i * blks_per_bucket), + 1); + if (ret) { mlog_errno(ret); goto out; } + } - ocfs2_set_new_buffer_uptodate(inode, new_bh); + /* + * Get the new bucket ready before we dirty anything + * (This actually shouldn't fail, because we already dirtied + * it once in ocfs2_cp_xattr_bucket()). + */ + ret = ocfs2_read_xattr_bucket(new_first, new_blkno); + if (ret) { + mlog_errno(ret); + goto out; + } + ret = ocfs2_xattr_bucket_journal_access(handle, new_first, + OCFS2_JOURNAL_ACCESS_WRITE); + if (ret) { + mlog_errno(ret); + goto out; + } - ret = ocfs2_journal_access(handle, inode, new_bh, - OCFS2_JOURNAL_ACCESS_CREATE); - if (ret < 0) { - mlog_errno(ret); - brelse(new_bh); - goto out; - } + /* Now update the headers */ + le16_add_cpu(&bucket_xh(old_first)->xh_num_buckets, -to_move); + ocfs2_xattr_bucket_journal_dirty(handle, old_first); - ret = ocfs2_read_block(inode, prev_blkno, &old_bh, NULL); - if (ret < 0) { - mlog_errno(ret); - brelse(new_bh); - goto out; - } + bucket_xh(new_first)->xh_num_buckets = cpu_to_le16(to_move); + ocfs2_xattr_bucket_journal_dirty(handle, new_first); - memcpy(new_bh->b_data, old_bh->b_data, blocksize); + if (first_hash) + *first_hash = le32_to_cpu(bucket_xh(new_first)->xh_entries[0].xe_name_hash); - if (i == 0) { - new_xh = (struct ocfs2_xattr_header *)new_bh->b_data; - new_xh->xh_num_buckets = cpu_to_le16(num_buckets / 2); + /* + * If the target bucket is anywhere past src_blkno, we moved + * it to the new extent. We need to update first_bh and header_bh. + */ + if ((*header_bh)->b_blocknr >= src_blkno) { + /* We're done with old_first, so we can re-use it. */ + ocfs2_xattr_bucket_relse(old_first); - if (first_hash) - *first_hash = le32_to_cpu( - new_xh->xh_entries[0].xe_name_hash); - new_first_bh = new_bh; - get_bh(new_first_bh); - } + /* Find the block for the new target bucket */ + src_blkno = new_blkno + + ((*header_bh)->b_blocknr - src_blkno); - ocfs2_journal_dirty(handle, new_bh); + /* + * This shouldn't fail - the buffers are in the + * journal from ocfs2_cp_xattr_bucket(). + */ + ret = ocfs2_read_xattr_bucket(old_first, src_blkno); + if (ret) { + mlog_errno(ret); + goto out; + } - if (*header_bh == old_bh) { - brelse(*header_bh); - *header_bh = new_bh; - get_bh(*header_bh); + brelse(*first_bh); + *first_bh = new_first->bu_bhs[0]; + get_bh(*first_bh); - brelse(*first_bh); - *first_bh = new_first_bh; - get_bh(*first_bh); - } - brelse(new_bh); - brelse(old_bh); + brelse(*header_bh); + *header_bh = old_first->bu_bhs[0]; + get_bh(*header_bh); } - le16_add_cpu(&xh->xh_num_buckets, -(num_buckets / 2)); - - ocfs2_journal_dirty(handle, prev_bh); out: - brelse(prev_bh); - brelse(new_first_bh); + ocfs2_xattr_bucket_free(new_first); + ocfs2_xattr_bucket_free(old_first); + return ret; } -- cgit v1.2.3-70-g09d2 From 874d65af1c8b8f6456a934701e6828d3017be029 Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Wed, 26 Nov 2008 13:02:18 -0800 Subject: ocfs2: Rename ocfs2_cp_xattr_cluster() to ocfs2_mv_xattr_buckets(). ocfs2_cp_xattr_cluster() takes the last cluster of an xattr extent, copies its buckets to the front of a new extent, and then shrinks the bucket count of the original extent. So it's really moving the data, not copying it. While we're here, the function doesn't need a buffer_head for the old extent, just the block number. Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/xattr.c | 42 ++++++++++++++++++++++-------------------- 1 file changed, 22 insertions(+), 20 deletions(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 5be99666f02..c1f2e069074 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -3965,11 +3965,12 @@ static int ocfs2_cp_xattr_bucket(struct inode *inode, /* * Hey, if we're overwriting t_bucket, what difference does * ACCESS_CREATE vs ACCESS_WRITE make? Well, if we allocated a new - * cluster to fill, we came here from ocfs2_cp_xattr_cluster(), and - * it is really new - ACCESS_CREATE is required. But we also - * might have moved data out of t_bucket before extending back - * into it. ocfs2_add_new_xattr_bucket() can do this - its call - * to ocfs2_add_new_xattr_cluster() may have created a new extent + * cluster to fill, we came here from + * ocfs2_mv_xattr_buckets(), and it is really new - + * ACCESS_CREATE is required. But we also might have moved data + * out of t_bucket before extending back into it. + * ocfs2_add_new_xattr_bucket() can do this - its call to + * ocfs2_add_new_xattr_cluster() may have created a new extent * and copied out the end of the old extent. Then it re-extends * the old extent back to create space for new xattrs. That's * how we get here, and the bucket isn't really new. @@ -3992,17 +3993,16 @@ out: } /* - * src_blk points to the last cluster of an existing extent. to_blk - * points to a newly allocated extent. We copy the cluster over to the - * new extent, initializing its xh_num_buckets. The old extent's - * xh_num_buckets shrinks by the same amount. + * src_blk points to the start of an existing extent. last_blk points to + * last cluster in that extent. to_blk points to a newly allocated + * extent. We copy the buckets from cluster at last_blk to the new extent, + * initializing its xh_num_buckets. The old extent's xh_num_buckets + * shrinks by the same amount. */ -static int ocfs2_cp_xattr_cluster(struct inode *inode, +static int ocfs2_mv_xattr_buckets(struct inode *inode, handle_t *handle, - struct buffer_head *first_bh, - u64 src_blk, - u64 to_blk, - u32 *first_hash) + u64 src_blk, u64 last_blk, + u64 to_blk, u32 *first_hash) { int i, ret, credits; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); @@ -4011,8 +4011,8 @@ static int ocfs2_cp_xattr_cluster(struct inode *inode, int num_buckets = ocfs2_xattr_buckets_per_cluster(osb); struct ocfs2_xattr_bucket *old_first, *new_first; - mlog(0, "cp xattrs from cluster %llu to %llu\n", - (unsigned long long)src_blk, (unsigned long long)to_blk); + mlog(0, "mv xattrs from cluster %llu to %llu\n", + (unsigned long long)last_blk, (unsigned long long)to_blk); /* The first bucket of the original extent */ old_first = ocfs2_xattr_bucket_new(inode); @@ -4024,7 +4024,7 @@ static int ocfs2_cp_xattr_cluster(struct inode *inode, goto out; } - ret = ocfs2_read_xattr_bucket(old_first, first_bh->b_blocknr); + ret = ocfs2_read_xattr_bucket(old_first, src_blk); if (ret) { mlog_errno(ret); goto out; @@ -4050,7 +4050,7 @@ static int ocfs2_cp_xattr_cluster(struct inode *inode, for (i = 0; i < num_buckets; i++) { ret = ocfs2_cp_xattr_bucket(inode, handle, - src_blk + (i * blks_per_bucket), + last_blk + (i * blks_per_bucket), to_blk + (i * blks_per_bucket), 1); if (ret) { @@ -4175,8 +4175,10 @@ static int ocfs2_adjust_xattr_cross_cluster(struct inode *inode, u64 last_blk = prev_blk + bpc * (prev_clusters - 1); if (prev_clusters > 1 && (*header_bh)->b_blocknr != last_blk) - ret = ocfs2_cp_xattr_cluster(inode, handle, *first_bh, - last_blk, new_blk, + ret = ocfs2_mv_xattr_buckets(inode, handle, + (*first_bh)->b_blocknr, + last_blk, + new_blk, v_start); else { ret = ocfs2_divide_xattr_cluster(inode, handle, -- cgit v1.2.3-70-g09d2 From 54ecb6b6df54bf72befb359b21f3759b2952f9d9 Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Wed, 26 Nov 2008 13:18:31 -0800 Subject: ocfs2: ocfs2_mv_xattr_buckets() can handle a partial cluster now. If you look at ocfs2_mv_xattr_bucket_cross_cluster(), you'll notice that two-thirds of the code is almost identical to ocfs2_mv_xattr_buckets(). The only difference is that ocfs2_mv_xattr_buckets() moves a whole cluster's worth, while ocfs2_mv_xattr_bucket_cross_cluster() moves half the cluster. We change ocfs2_mv_xattr_buckets() to allow moving partial clusters. The original caller of ocfs2_mv_xattr_buckets() still moves the whole cluster's worth - it just passes a start_bucket of 0. Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/xattr.c | 33 ++++++++++++++++++++------------- 1 file changed, 20 insertions(+), 13 deletions(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index c1f2e069074..97340940cee 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -3995,18 +3995,19 @@ out: /* * src_blk points to the start of an existing extent. last_blk points to * last cluster in that extent. to_blk points to a newly allocated - * extent. We copy the buckets from cluster at last_blk to the new extent, - * initializing its xh_num_buckets. The old extent's xh_num_buckets - * shrinks by the same amount. + * extent. We copy the buckets from the cluster at last_blk to the new + * extent. If start_bucket is non-zero, we skip that many buckets before + * we start copying. The new extent's xh_num_buckets gets set to the + * number of buckets we copied. The old extent's xh_num_buckets shrinks + * by the same amount. */ -static int ocfs2_mv_xattr_buckets(struct inode *inode, - handle_t *handle, - u64 src_blk, u64 last_blk, - u64 to_blk, u32 *first_hash) +static int ocfs2_mv_xattr_buckets(struct inode *inode, handle_t *handle, + u64 src_blk, u64 last_blk, u64 to_blk, + unsigned int start_bucket, + u32 *first_hash) { int i, ret, credits; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); - int bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1); int blks_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); int num_buckets = ocfs2_xattr_buckets_per_cluster(osb); struct ocfs2_xattr_bucket *old_first, *new_first; @@ -4014,6 +4015,12 @@ static int ocfs2_mv_xattr_buckets(struct inode *inode, mlog(0, "mv xattrs from cluster %llu to %llu\n", (unsigned long long)last_blk, (unsigned long long)to_blk); + BUG_ON(start_bucket >= num_buckets); + if (start_bucket) { + num_buckets -= start_bucket; + last_blk += (start_bucket * blks_per_bucket); + } + /* The first bucket of the original extent */ old_first = ocfs2_xattr_bucket_new(inode); /* The first bucket of the new extent */ @@ -4031,10 +4038,11 @@ static int ocfs2_mv_xattr_buckets(struct inode *inode, } /* - * We need to update the first bucket of the old extent and the - * entire first cluster of the new extent. + * We need to update the first bucket of the old extent and all + * the buckets going to the new extent. */ - credits = blks_per_bucket + bpc + handle->h_buffer_credits; + credits = ((num_buckets + 1) * blks_per_bucket) + + handle->h_buffer_credits; ret = ocfs2_extend_trans(handle, credits); if (ret) { mlog_errno(ret); @@ -4177,8 +4185,7 @@ static int ocfs2_adjust_xattr_cross_cluster(struct inode *inode, if (prev_clusters > 1 && (*header_bh)->b_blocknr != last_blk) ret = ocfs2_mv_xattr_buckets(inode, handle, (*first_bh)->b_blocknr, - last_blk, - new_blk, + last_blk, new_blk, 0, v_start); else { ret = ocfs2_divide_xattr_cluster(inode, handle, -- cgit v1.2.3-70-g09d2 From c58b6032f93358871361a92d7743dbc85d27084e Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Wed, 26 Nov 2008 13:36:24 -0800 Subject: ocfs2: Use ocfs2_mv_xattr_buckets() in ocfs2_mv_xattr_bucket_cross_cluster(). Now that ocfs2_mv_xattr_buckets() can move a partial cluster's worth of buckets, ocfs2_mv_xattr_bucket_cross_cluster() can use it. Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/xattr.c | 110 +++++++++++++++---------------------------------------- 1 file changed, 29 insertions(+), 81 deletions(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 97340940cee..c3189286679 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -170,11 +170,10 @@ static int ocfs2_xattr_set_entry_index_block(struct inode *inode, static int ocfs2_delete_xattr_index_block(struct inode *inode, struct buffer_head *xb_bh); -static int ocfs2_cp_xattr_bucket(struct inode *inode, - handle_t *handle, - u64 s_blkno, - u64 t_blkno, - int t_is_new); +static int ocfs2_mv_xattr_buckets(struct inode *inode, handle_t *handle, + u64 src_blk, u64 last_blk, u64 to_blk, + unsigned int start_bucket, + u32 *first_hash); static inline u16 ocfs2_xattr_buckets_per_cluster(struct ocfs2_super *osb) { @@ -3556,115 +3555,64 @@ static int ocfs2_mv_xattr_bucket_cross_cluster(struct inode *inode, u32 num_clusters, u32 *first_hash) { - int i, ret, credits; + int ret; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); int blks_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); - int bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1); int num_buckets = ocfs2_xattr_buckets_per_cluster(osb); int to_move = num_buckets / 2; - u64 last_cluster_blkno, src_blkno; + u64 src_blkno; + u64 last_cluster_blkno = prev_blkno + + ((num_clusters - 1) * ocfs2_clusters_to_blocks(inode->i_sb, 1)); struct ocfs2_xattr_header *xh = (struct ocfs2_xattr_header *)((*first_bh)->b_data); - struct ocfs2_xattr_bucket *old_first, *new_first; + struct ocfs2_xattr_bucket *new_target, *new_first; BUG_ON(le16_to_cpu(xh->xh_num_buckets) < num_buckets); BUG_ON(OCFS2_XATTR_BUCKET_SIZE == osb->s_clustersize); - last_cluster_blkno = prev_blkno + ((num_clusters - 1) * bpc); - src_blkno = last_cluster_blkno + (to_move * blks_per_bucket); - mlog(0, "move half of xattrs in cluster %llu to %llu\n", - (unsigned long long)prev_blkno, (unsigned long long)new_blkno); + (unsigned long long)last_cluster_blkno, (unsigned long long)new_blkno); - /* The first bucket of the original extent */ - old_first = ocfs2_xattr_bucket_new(inode); /* The first bucket of the new extent */ new_first = ocfs2_xattr_bucket_new(inode); - if (!old_first || !new_first) { + /* The target bucket if it was moved to the new extent */ + new_target = ocfs2_xattr_bucket_new(inode); + if (!new_target || !new_first) { ret = -ENOMEM; mlog_errno(ret); goto out; } - ret = ocfs2_read_xattr_bucket(old_first, prev_blkno); + ret = ocfs2_mv_xattr_buckets(inode, handle, prev_blkno, + last_cluster_blkno, new_blkno, + to_move, first_hash); if (ret) { mlog_errno(ret); goto out; } - /* - * We need to update the 1st half of the new extent, and we - * need to update the first bucket of the old extent. - */ - credits = ((to_move + 1) * blks_per_bucket) + handle->h_buffer_credits; - ret = ocfs2_extend_trans(handle, credits); - if (ret) { - mlog_errno(ret); - goto out; - } - - ret = ocfs2_xattr_bucket_journal_access(handle, old_first, - OCFS2_JOURNAL_ACCESS_WRITE); - if (ret) { - mlog_errno(ret); - goto out; - } - - for (i = 0; i < to_move; i++) { - ret = ocfs2_cp_xattr_bucket(inode, handle, - src_blkno + (i * blks_per_bucket), - new_blkno + (i * blks_per_bucket), - 1); - if (ret) { - mlog_errno(ret); - goto out; - } - } - - /* - * Get the new bucket ready before we dirty anything - * (This actually shouldn't fail, because we already dirtied - * it once in ocfs2_cp_xattr_bucket()). - */ - ret = ocfs2_read_xattr_bucket(new_first, new_blkno); - if (ret) { - mlog_errno(ret); - goto out; - } - ret = ocfs2_xattr_bucket_journal_access(handle, new_first, - OCFS2_JOURNAL_ACCESS_WRITE); - if (ret) { - mlog_errno(ret); - goto out; - } - - /* Now update the headers */ - le16_add_cpu(&bucket_xh(old_first)->xh_num_buckets, -to_move); - ocfs2_xattr_bucket_journal_dirty(handle, old_first); - - bucket_xh(new_first)->xh_num_buckets = cpu_to_le16(to_move); - ocfs2_xattr_bucket_journal_dirty(handle, new_first); - - if (first_hash) - *first_hash = le32_to_cpu(bucket_xh(new_first)->xh_entries[0].xe_name_hash); + /* This is the first bucket that got moved */ + src_blkno = last_cluster_blkno + (to_move * blks_per_bucket); /* - * If the target bucket is anywhere past src_blkno, we moved - * it to the new extent. We need to update first_bh and header_bh. + * If the target bucket was part of the moved buckets, we need to + * update first_bh and header_bh. */ if ((*header_bh)->b_blocknr >= src_blkno) { - /* We're done with old_first, so we can re-use it. */ - ocfs2_xattr_bucket_relse(old_first); - /* Find the block for the new target bucket */ src_blkno = new_blkno + ((*header_bh)->b_blocknr - src_blkno); /* - * This shouldn't fail - the buffers are in the + * These shouldn't fail - the buffers are in the * journal from ocfs2_cp_xattr_bucket(). */ - ret = ocfs2_read_xattr_bucket(old_first, src_blkno); + ret = ocfs2_read_xattr_bucket(new_first, new_blkno); + if (ret) { + mlog_errno(ret); + goto out; + } + ret = ocfs2_read_xattr_bucket(new_target, src_blkno); if (ret) { mlog_errno(ret); goto out; @@ -3675,13 +3623,13 @@ static int ocfs2_mv_xattr_bucket_cross_cluster(struct inode *inode, get_bh(*first_bh); brelse(*header_bh); - *header_bh = old_first->bu_bhs[0]; + *header_bh = new_target->bu_bhs[0]; get_bh(*header_bh); } out: ocfs2_xattr_bucket_free(new_first); - ocfs2_xattr_bucket_free(old_first); + ocfs2_xattr_bucket_free(new_target); return ret; } -- cgit v1.2.3-70-g09d2 From 92cf3adf48097b7561a3c83f800ed3b2b25b18d4 Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Wed, 26 Nov 2008 14:12:09 -0800 Subject: ocfs2: Start using buckets in ocfs2_adjust_xattr_cross_cluster(). We want to be passing around buckets instead of buffer_heads. Let's get them into ocfs2_adjust_xattr_cross_cluster. Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/xattr.c | 44 +++++++++++++++++++++++++++++++++++++------- 1 file changed, 37 insertions(+), 7 deletions(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index c3189286679..975ba3653fe 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -4111,28 +4111,54 @@ static int ocfs2_adjust_xattr_cross_cluster(struct inode *inode, u32 *v_start, int *extend) { - int ret = 0; - int bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1); + int ret; + struct ocfs2_xattr_bucket *first, *target; mlog(0, "adjust xattrs from cluster %llu len %u to %llu\n", (unsigned long long)prev_blk, prev_clusters, (unsigned long long)new_blk); + /* The first bucket of the original extent */ + first = ocfs2_xattr_bucket_new(inode); + /* The target bucket for insert */ + target = ocfs2_xattr_bucket_new(inode); + if (!first || !target) { + ret = -ENOMEM; + mlog_errno(ret); + goto out; + } + + BUG_ON(prev_blk != (*first_bh)->b_blocknr); + ret = ocfs2_read_xattr_bucket(first, prev_blk); + if (ret) { + mlog_errno(ret); + goto out; + } + + ret = ocfs2_read_xattr_bucket(target, (*header_bh)->b_blocknr); + if (ret) { + mlog_errno(ret); + goto out; + } + if (ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)) > 1) ret = ocfs2_mv_xattr_bucket_cross_cluster(inode, handle, first_bh, header_bh, new_blk, - prev_blk, + bucket_blkno(first), prev_clusters, v_start); else { - u64 last_blk = prev_blk + bpc * (prev_clusters - 1); + /* The start of the last cluster in the first extent */ + u64 last_blk = bucket_blkno(first) + + ((prev_clusters - 1) * + ocfs2_clusters_to_blocks(inode->i_sb, 1)); - if (prev_clusters > 1 && (*header_bh)->b_blocknr != last_blk) + if (prev_clusters > 1 && bucket_blkno(target) != last_blk) ret = ocfs2_mv_xattr_buckets(inode, handle, - (*first_bh)->b_blocknr, + bucket_blkno(first), last_blk, new_blk, 0, v_start); else { @@ -4140,11 +4166,15 @@ static int ocfs2_adjust_xattr_cross_cluster(struct inode *inode, last_blk, new_blk, v_start); - if ((*header_bh)->b_blocknr == last_blk && extend) + if ((bucket_blkno(target) == last_blk) && extend) *extend = 0; } } +out: + ocfs2_xattr_bucket_free(first); + ocfs2_xattr_bucket_free(target); + return ret; } -- cgit v1.2.3-70-g09d2 From 41cb814866110b6e35dad7569ecf96163c3bb824 Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Wed, 26 Nov 2008 14:25:21 -0800 Subject: ocfs2: Pass buckets into ocfs2_mv_xattr_bucket_cross_cluster(). Now that ocfs2_adjust_xattr_cross_cluster() has buckets, it can pass them into ocfs2_mv_xattr_bucket_cross_cluster(). It no longer has to care about buffer_heads. The manipulation of first_bh and header_bh moves up to ocfs2_adjust_xattr_cross_cluster(). Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/xattr.c | 84 +++++++++++++++++++++++++------------------------------- 1 file changed, 37 insertions(+), 47 deletions(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 975ba3653fe..2f16f50ebcb 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -3548,42 +3548,28 @@ out: */ static int ocfs2_mv_xattr_bucket_cross_cluster(struct inode *inode, handle_t *handle, - struct buffer_head **first_bh, - struct buffer_head **header_bh, + struct ocfs2_xattr_bucket *first, + struct ocfs2_xattr_bucket *target, u64 new_blkno, - u64 prev_blkno, u32 num_clusters, u32 *first_hash) { int ret; - struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); - int blks_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); - int num_buckets = ocfs2_xattr_buckets_per_cluster(osb); + struct super_block *sb = inode->i_sb; + int blks_per_bucket = ocfs2_blocks_per_xattr_bucket(sb); + int num_buckets = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(sb)); int to_move = num_buckets / 2; u64 src_blkno; - u64 last_cluster_blkno = prev_blkno + - ((num_clusters - 1) * ocfs2_clusters_to_blocks(inode->i_sb, 1)); - struct ocfs2_xattr_header *xh = - (struct ocfs2_xattr_header *)((*first_bh)->b_data); - struct ocfs2_xattr_bucket *new_target, *new_first; + u64 last_cluster_blkno = bucket_blkno(first) + + ((num_clusters - 1) * ocfs2_clusters_to_blocks(sb, 1)); - BUG_ON(le16_to_cpu(xh->xh_num_buckets) < num_buckets); - BUG_ON(OCFS2_XATTR_BUCKET_SIZE == osb->s_clustersize); + BUG_ON(le16_to_cpu(bucket_xh(first)->xh_num_buckets) < num_buckets); + BUG_ON(OCFS2_XATTR_BUCKET_SIZE == OCFS2_SB(sb)->s_clustersize); mlog(0, "move half of xattrs in cluster %llu to %llu\n", (unsigned long long)last_cluster_blkno, (unsigned long long)new_blkno); - /* The first bucket of the new extent */ - new_first = ocfs2_xattr_bucket_new(inode); - /* The target bucket if it was moved to the new extent */ - new_target = ocfs2_xattr_bucket_new(inode); - if (!new_target || !new_first) { - ret = -ENOMEM; - mlog_errno(ret); - goto out; - } - - ret = ocfs2_mv_xattr_buckets(inode, handle, prev_blkno, + ret = ocfs2_mv_xattr_buckets(inode, handle, bucket_blkno(first), last_cluster_blkno, new_blkno, to_move, first_hash); if (ret) { @@ -3596,41 +3582,32 @@ static int ocfs2_mv_xattr_bucket_cross_cluster(struct inode *inode, /* * If the target bucket was part of the moved buckets, we need to - * update first_bh and header_bh. + * update first and target. */ - if ((*header_bh)->b_blocknr >= src_blkno) { + if (bucket_blkno(target) >= src_blkno) { /* Find the block for the new target bucket */ src_blkno = new_blkno + - ((*header_bh)->b_blocknr - src_blkno); + (bucket_blkno(target) - src_blkno); + + ocfs2_xattr_bucket_relse(first); + ocfs2_xattr_bucket_relse(target); /* * These shouldn't fail - the buffers are in the * journal from ocfs2_cp_xattr_bucket(). */ - ret = ocfs2_read_xattr_bucket(new_first, new_blkno); + ret = ocfs2_read_xattr_bucket(first, new_blkno); if (ret) { mlog_errno(ret); goto out; } - ret = ocfs2_read_xattr_bucket(new_target, src_blkno); - if (ret) { + ret = ocfs2_read_xattr_bucket(target, src_blkno); + if (ret) mlog_errno(ret); - goto out; - } - brelse(*first_bh); - *first_bh = new_first->bu_bhs[0]; - get_bh(*first_bh); - - brelse(*header_bh); - *header_bh = new_target->bu_bhs[0]; - get_bh(*header_bh); } out: - ocfs2_xattr_bucket_free(new_first); - ocfs2_xattr_bucket_free(new_target); - return ret; } @@ -4141,16 +4118,29 @@ static int ocfs2_adjust_xattr_cross_cluster(struct inode *inode, goto out; } - if (ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)) > 1) + if (ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)) > 1) { ret = ocfs2_mv_xattr_bucket_cross_cluster(inode, handle, - first_bh, - header_bh, + first, target, new_blk, - bucket_blkno(first), prev_clusters, v_start); - else { + if (ret) { + mlog_errno(ret); + goto out; + } + + /* Did first+target get moved? */ + if (prev_blk != bucket_blkno(first)) { + brelse(*first_bh); + *first_bh = first->bu_bhs[0]; + get_bh(*first_bh); + + brelse(*header_bh); + *header_bh = target->bu_bhs[0]; + get_bh(*header_bh); + } + } else { /* The start of the last cluster in the first extent */ u64 last_blk = bucket_blkno(first) + ((prev_clusters - 1) * -- cgit v1.2.3-70-g09d2 From 012ee910876e251621705e8dea7c353fd4914e19 Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Wed, 26 Nov 2008 14:43:31 -0800 Subject: ocfs2: Move buckets up into ocfs2_add_new_xattr_cluster(). Lift the buckets from ocfs2_adjust_xattr_cross_cluster() up into ocfs2_add_new_xattr_cluster(). Now ocfs2_adjust_xattr_cross_cluster() doesn't deal with buffer_heads. Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/xattr.c | 100 +++++++++++++++++++++++++++---------------------------- 1 file changed, 49 insertions(+), 51 deletions(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 2f16f50ebcb..4b247047b7a 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -4080,44 +4080,19 @@ static int ocfs2_divide_xattr_cluster(struct inode *inode, */ static int ocfs2_adjust_xattr_cross_cluster(struct inode *inode, handle_t *handle, - struct buffer_head **first_bh, - struct buffer_head **header_bh, + struct ocfs2_xattr_bucket *first, + struct ocfs2_xattr_bucket *target, u64 new_blk, - u64 prev_blk, u32 prev_clusters, u32 *v_start, int *extend) { int ret; - struct ocfs2_xattr_bucket *first, *target; mlog(0, "adjust xattrs from cluster %llu len %u to %llu\n", - (unsigned long long)prev_blk, prev_clusters, + (unsigned long long)bucket_blkno(first), prev_clusters, (unsigned long long)new_blk); - /* The first bucket of the original extent */ - first = ocfs2_xattr_bucket_new(inode); - /* The target bucket for insert */ - target = ocfs2_xattr_bucket_new(inode); - if (!first || !target) { - ret = -ENOMEM; - mlog_errno(ret); - goto out; - } - - BUG_ON(prev_blk != (*first_bh)->b_blocknr); - ret = ocfs2_read_xattr_bucket(first, prev_blk); - if (ret) { - mlog_errno(ret); - goto out; - } - - ret = ocfs2_read_xattr_bucket(target, (*header_bh)->b_blocknr); - if (ret) { - mlog_errno(ret); - goto out; - } - if (ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)) > 1) { ret = ocfs2_mv_xattr_bucket_cross_cluster(inode, handle, @@ -4125,46 +4100,33 @@ static int ocfs2_adjust_xattr_cross_cluster(struct inode *inode, new_blk, prev_clusters, v_start); - if (ret) { + if (ret) mlog_errno(ret); - goto out; - } - - /* Did first+target get moved? */ - if (prev_blk != bucket_blkno(first)) { - brelse(*first_bh); - *first_bh = first->bu_bhs[0]; - get_bh(*first_bh); - - brelse(*header_bh); - *header_bh = target->bu_bhs[0]; - get_bh(*header_bh); - } } else { /* The start of the last cluster in the first extent */ u64 last_blk = bucket_blkno(first) + ((prev_clusters - 1) * ocfs2_clusters_to_blocks(inode->i_sb, 1)); - if (prev_clusters > 1 && bucket_blkno(target) != last_blk) + if (prev_clusters > 1 && bucket_blkno(target) != last_blk) { ret = ocfs2_mv_xattr_buckets(inode, handle, bucket_blkno(first), last_blk, new_blk, 0, v_start); - else { + if (ret) + mlog_errno(ret); + } else { ret = ocfs2_divide_xattr_cluster(inode, handle, last_blk, new_blk, v_start); + if (ret) + mlog_errno(ret); if ((bucket_blkno(target) == last_blk) && extend) *extend = 0; } } -out: - ocfs2_xattr_bucket_free(first); - ocfs2_xattr_bucket_free(target); - return ret; } @@ -4202,6 +4164,7 @@ static int ocfs2_add_new_xattr_cluster(struct inode *inode, handle_t *handle = ctxt->handle; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); struct ocfs2_extent_tree et; + struct ocfs2_xattr_bucket *first, *target; mlog(0, "Add new xattr cluster for %llu, previous xattr hash = %u, " "previous xattr blkno = %llu\n", @@ -4210,6 +4173,29 @@ static int ocfs2_add_new_xattr_cluster(struct inode *inode, ocfs2_init_xattr_tree_extent_tree(&et, inode, root_bh); + /* The first bucket of the original extent */ + first = ocfs2_xattr_bucket_new(inode); + /* The target bucket for insert */ + target = ocfs2_xattr_bucket_new(inode); + if (!first || !target) { + ret = -ENOMEM; + mlog_errno(ret); + goto leave; + } + + BUG_ON(prev_blkno != (*first_bh)->b_blocknr); + ret = ocfs2_read_xattr_bucket(first, prev_blkno); + if (ret) { + mlog_errno(ret); + goto leave; + } + + ret = ocfs2_read_xattr_bucket(target, (*header_bh)->b_blocknr); + if (ret) { + mlog_errno(ret); + goto leave; + } + ret = ocfs2_journal_access(handle, inode, root_bh, OCFS2_JOURNAL_ACCESS_WRITE); if (ret < 0) { @@ -4250,10 +4236,9 @@ static int ocfs2_add_new_xattr_cluster(struct inode *inode, } else { ret = ocfs2_adjust_xattr_cross_cluster(inode, handle, - first_bh, - header_bh, + first, + target, block, - prev_blkno, prev_clusters, &v_start, extend); @@ -4261,6 +4246,17 @@ static int ocfs2_add_new_xattr_cluster(struct inode *inode, mlog_errno(ret); goto leave; } + + /* Did first+target get moved? */ + if (prev_blkno != bucket_blkno(first)) { + brelse(*first_bh); + *first_bh = first->bu_bhs[0]; + get_bh(*first_bh); + + brelse(*header_bh); + *header_bh = target->bu_bhs[0]; + get_bh(*header_bh); + } } mlog(0, "Insert %u clusters at block %llu for xattr at %u\n", @@ -4277,6 +4273,8 @@ static int ocfs2_add_new_xattr_cluster(struct inode *inode, mlog_errno(ret); leave: + ocfs2_xattr_bucket_free(first); + ocfs2_xattr_bucket_free(target); return ret; } -- cgit v1.2.3-70-g09d2 From ed29c0ca14871021fc8aced74650648dcb2c6e81 Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Wed, 26 Nov 2008 15:08:44 -0800 Subject: ocfs2: Move buckets up into ocfs2_add_new_xattr_bucket(). Lift the buckets from ocfs2_add_new_xattr_cluster() up into ocfs2_add_new_xattr_bucket(). Now ocfs2_add_new_xattr_cluster() doesn't deal with buffer_heads. In fact, we no longer have to play get_bh() tricks at all. Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/xattr.c | 105 +++++++++++++++++-------------------------------------- 1 file changed, 32 insertions(+), 73 deletions(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 4b247047b7a..5a5a1bd7eed 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -4148,11 +4148,10 @@ static int ocfs2_adjust_xattr_cross_cluster(struct inode *inode, */ static int ocfs2_add_new_xattr_cluster(struct inode *inode, struct buffer_head *root_bh, - struct buffer_head **first_bh, - struct buffer_head **header_bh, + struct ocfs2_xattr_bucket *first, + struct ocfs2_xattr_bucket *target, u32 *num_clusters, u32 prev_cpos, - u64 prev_blkno, int *extend, struct ocfs2_xattr_set_ctxt *ctxt) { @@ -4164,38 +4163,14 @@ static int ocfs2_add_new_xattr_cluster(struct inode *inode, handle_t *handle = ctxt->handle; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); struct ocfs2_extent_tree et; - struct ocfs2_xattr_bucket *first, *target; mlog(0, "Add new xattr cluster for %llu, previous xattr hash = %u, " "previous xattr blkno = %llu\n", (unsigned long long)OCFS2_I(inode)->ip_blkno, - prev_cpos, (unsigned long long)prev_blkno); + prev_cpos, (unsigned long long)bucket_blkno(first)); ocfs2_init_xattr_tree_extent_tree(&et, inode, root_bh); - /* The first bucket of the original extent */ - first = ocfs2_xattr_bucket_new(inode); - /* The target bucket for insert */ - target = ocfs2_xattr_bucket_new(inode); - if (!first || !target) { - ret = -ENOMEM; - mlog_errno(ret); - goto leave; - } - - BUG_ON(prev_blkno != (*first_bh)->b_blocknr); - ret = ocfs2_read_xattr_bucket(first, prev_blkno); - if (ret) { - mlog_errno(ret); - goto leave; - } - - ret = ocfs2_read_xattr_bucket(target, (*header_bh)->b_blocknr); - if (ret) { - mlog_errno(ret); - goto leave; - } - ret = ocfs2_journal_access(handle, inode, root_bh, OCFS2_JOURNAL_ACCESS_WRITE); if (ret < 0) { @@ -4217,7 +4192,7 @@ static int ocfs2_add_new_xattr_cluster(struct inode *inode, mlog(0, "Allocating %u clusters at block %u for xattr in inode %llu\n", num_bits, bit_off, (unsigned long long)OCFS2_I(inode)->ip_blkno); - if (prev_blkno + prev_clusters * bpc == block && + if (bucket_blkno(first) + (prev_clusters * bpc) == block && (prev_clusters + num_bits) << osb->s_clustersize_bits <= OCFS2_MAX_XATTR_TREE_LEAF_SIZE) { /* @@ -4246,17 +4221,6 @@ static int ocfs2_add_new_xattr_cluster(struct inode *inode, mlog_errno(ret); goto leave; } - - /* Did first+target get moved? */ - if (prev_blkno != bucket_blkno(first)) { - brelse(*first_bh); - *first_bh = first->bu_bhs[0]; - get_bh(*first_bh); - - brelse(*header_bh); - *header_bh = target->bu_bhs[0]; - get_bh(*header_bh); - } } mlog(0, "Insert %u clusters at block %llu for xattr at %u\n", @@ -4273,8 +4237,6 @@ static int ocfs2_add_new_xattr_cluster(struct inode *inode, mlog_errno(ret); leave: - ocfs2_xattr_bucket_free(first); - ocfs2_xattr_bucket_free(target); return ret; } @@ -4357,16 +4319,16 @@ out: * We will move all the buckets starting from header_bh to the next place. As * for this one, half num of its xattrs will be moved to the next one. * - * We will allocate a new cluster if current cluster is full and adjust - * header_bh and first_bh if the insert place is moved to the new cluster. + * We will allocate a new cluster if current cluster is full. The + * underlying calls will make sure that there is space at the target + * bucket, shifting buckets around if necessary. 'target' may be updated + * by those calls. */ static int ocfs2_add_new_xattr_bucket(struct inode *inode, struct buffer_head *xb_bh, struct buffer_head *header_bh, struct ocfs2_xattr_set_ctxt *ctxt) { - struct ocfs2_xattr_header *first_xh = NULL; - struct buffer_head *first_bh = NULL; struct ocfs2_xattr_block *xb = (struct ocfs2_xattr_block *)xb_bh->b_data; struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root; @@ -4374,31 +4336,26 @@ static int ocfs2_add_new_xattr_bucket(struct inode *inode, struct ocfs2_xattr_header *xh = (struct ocfs2_xattr_header *)header_bh->b_data; u32 name_hash = le32_to_cpu(xh->xh_entries[0].xe_name_hash); - struct super_block *sb = inode->i_sb; - struct ocfs2_super *osb = OCFS2_SB(sb); + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); int ret, num_buckets, extend = 1; u64 p_blkno; u32 e_cpos, num_clusters; /* The bucket at the front of the extent */ - struct ocfs2_xattr_bucket *first; + struct ocfs2_xattr_bucket *first, *target; mlog(0, "Add new xattr bucket starting form %llu\n", (unsigned long long)header_bh->b_blocknr); + /* The first bucket of the original extent */ first = ocfs2_xattr_bucket_new(inode); - if (!first) { + /* The target bucket for insert */ + target = ocfs2_xattr_bucket_new(inode); + if (!first || !target) { ret = -ENOMEM; mlog_errno(ret); goto out; } - /* - * Add refrence for header_bh here because it may be - * changed in ocfs2_add_new_xattr_cluster and we need - * to free it in the end. - */ - get_bh(header_bh); - ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, &e_cpos, &num_clusters, el); if (ret) { @@ -4406,23 +4363,30 @@ static int ocfs2_add_new_xattr_bucket(struct inode *inode, goto out; } - ret = ocfs2_read_block(inode, p_blkno, &first_bh, NULL); + ret = ocfs2_read_xattr_bucket(first, p_blkno); if (ret) { mlog_errno(ret); goto out; } - num_buckets = ocfs2_xattr_buckets_per_cluster(osb) * num_clusters; - first_xh = (struct ocfs2_xattr_header *)first_bh->b_data; + ret = ocfs2_read_xattr_bucket(target, header_bh->b_blocknr); + if (ret) { + mlog_errno(ret); + goto out; + } - if (num_buckets == le16_to_cpu(first_xh->xh_num_buckets)) { + num_buckets = ocfs2_xattr_buckets_per_cluster(osb) * num_clusters; + if (num_buckets == le16_to_cpu(bucket_xh(first)->xh_num_buckets)) { + /* + * This can move first+target if the target bucket moves + * to the new extent. + */ ret = ocfs2_add_new_xattr_cluster(inode, xb_bh, - &first_bh, - &header_bh, + first, + target, &num_clusters, e_cpos, - p_blkno, &extend, ctxt); if (ret) { @@ -4432,24 +4396,19 @@ static int ocfs2_add_new_xattr_bucket(struct inode *inode, } if (extend) { - /* These bucket reads should be cached */ - ret = ocfs2_read_xattr_bucket(first, first_bh->b_blocknr); - if (ret) { - mlog_errno(ret); - goto out; - } ret = ocfs2_extend_xattr_bucket(inode, ctxt->handle, - first, header_bh->b_blocknr, + first, + bucket_blkno(target), num_clusters); if (ret) mlog_errno(ret); } out: - brelse(first_bh); - brelse(header_bh); ocfs2_xattr_bucket_free(first); + ocfs2_xattr_bucket_free(target); + return ret; } -- cgit v1.2.3-70-g09d2 From 91f2033fa997aa92607470ed1ef90685b9d77a8c Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Wed, 26 Nov 2008 15:25:41 -0800 Subject: ocfs2: Pass xs->bucket into ocfs2_add_new_xattr_bucket(). Pass the actual target bucket for insert through to ocfs2_add_new_xattr_bucket(). Now growing a bucket has no buffer_head knowledge. ocfs2_add_new_xattr_bucket() leavs xs->bucket in the proper state for insert. However, it doesn't update the rest of the search fields in xs, so we still have to relse() and re-find. That's OK, because everything is cached. Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/xattr.c | 52 +++++++++++++++++++++++++--------------------------- 1 file changed, 25 insertions(+), 27 deletions(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 5a5a1bd7eed..dfc51c305bb 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -4314,43 +4314,42 @@ out: } /* - * Add new xattr bucket in an extent record and adjust the buckets accordingly. - * xb_bh is the ocfs2_xattr_block. - * We will move all the buckets starting from header_bh to the next place. As - * for this one, half num of its xattrs will be moved to the next one. + * Add new xattr bucket in an extent record and adjust the buckets + * accordingly. xb_bh is the ocfs2_xattr_block, and target is the + * bucket we want to insert into. * - * We will allocate a new cluster if current cluster is full. The - * underlying calls will make sure that there is space at the target - * bucket, shifting buckets around if necessary. 'target' may be updated - * by those calls. + * In the easy case, we will move all the buckets after target down by + * one. Half of target's xattrs will be moved to the next bucket. + * + * If current cluster is full, we'll allocate a new one. This may not + * be contiguous. The underlying calls will make sure that there is + * space for the insert, shifting buckets around if necessary. + * 'target' may be moved by those calls. */ static int ocfs2_add_new_xattr_bucket(struct inode *inode, struct buffer_head *xb_bh, - struct buffer_head *header_bh, + struct ocfs2_xattr_bucket *target, struct ocfs2_xattr_set_ctxt *ctxt) { struct ocfs2_xattr_block *xb = (struct ocfs2_xattr_block *)xb_bh->b_data; struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root; struct ocfs2_extent_list *el = &xb_root->xt_list; - struct ocfs2_xattr_header *xh = - (struct ocfs2_xattr_header *)header_bh->b_data; - u32 name_hash = le32_to_cpu(xh->xh_entries[0].xe_name_hash); + u32 name_hash = + le32_to_cpu(bucket_xh(target)->xh_entries[0].xe_name_hash); struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); int ret, num_buckets, extend = 1; u64 p_blkno; u32 e_cpos, num_clusters; /* The bucket at the front of the extent */ - struct ocfs2_xattr_bucket *first, *target; + struct ocfs2_xattr_bucket *first; - mlog(0, "Add new xattr bucket starting form %llu\n", - (unsigned long long)header_bh->b_blocknr); + mlog(0, "Add new xattr bucket starting from %llu\n", + (unsigned long long)bucket_blkno(target)); /* The first bucket of the original extent */ first = ocfs2_xattr_bucket_new(inode); - /* The target bucket for insert */ - target = ocfs2_xattr_bucket_new(inode); - if (!first || !target) { + if (!first) { ret = -ENOMEM; mlog_errno(ret); goto out; @@ -4369,12 +4368,6 @@ static int ocfs2_add_new_xattr_bucket(struct inode *inode, goto out; } - ret = ocfs2_read_xattr_bucket(target, header_bh->b_blocknr); - if (ret) { - mlog_errno(ret); - goto out; - } - num_buckets = ocfs2_xattr_buckets_per_cluster(osb) * num_clusters; if (num_buckets == le16_to_cpu(bucket_xh(first)->xh_num_buckets)) { /* @@ -4407,7 +4400,6 @@ static int ocfs2_add_new_xattr_bucket(struct inode *inode, out: ocfs2_xattr_bucket_free(first); - ocfs2_xattr_bucket_free(target); return ret; } @@ -5083,15 +5075,21 @@ try_again: ret = ocfs2_add_new_xattr_bucket(inode, xs->xattr_bh, - xs->bucket->bu_bhs[0], + xs->bucket, ctxt); if (ret) { mlog_errno(ret); goto out; } + /* + * ocfs2_add_new_xattr_bucket() will have updated + * xs->bucket if it moved, but it will not have updated + * any of the other search fields. Thus, we drop it and + * re-search. Everything should be cached, so it'll be + * quick. + */ ocfs2_xattr_bucket_relse(xs->bucket); - ret = ocfs2_xattr_index_block_find(inode, xs->xattr_bh, xi->name_index, xi->name, xs); -- cgit v1.2.3-70-g09d2 From 754938c142ae0c28360426c43f965ddc5164b21e Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Mon, 15 Dec 2008 06:03:41 +0800 Subject: ocfs2/quota: Add QUOTA in mlog_attribute. A new mlog mask has to be added into mlog_attribute before it can be really used in mlog. ML_QUOTA is only added in masklog.h, so add it to the array to enable it. Signed-off-by: Tao Ma Signed-off-by: Mark Fasheh --- fs/ocfs2/cluster/masklog.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/cluster/masklog.c b/fs/ocfs2/cluster/masklog.c index d8a0cb92cef..96df5416993 100644 --- a/fs/ocfs2/cluster/masklog.c +++ b/fs/ocfs2/cluster/masklog.c @@ -110,6 +110,7 @@ static struct mlog_attribute mlog_attrs[MLOG_MAX_BITS] = { define_mask(QUORUM), define_mask(EXPORT), define_mask(XATTR), + define_mask(QUOTA), define_mask(ERROR), define_mask(NOTICE), define_mask(KTHREAD), -- cgit v1.2.3-70-g09d2 From ab552d54673f262d7f70014003d3928d29270f22 Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Thu, 16 Oct 2008 17:50:30 -0700 Subject: ocfs2: Add the on-disk structures for metadata checksums. Define struct ocfs2_block_check, an 8-byte structure containing a 32bit crc32_le and a 16bit hamming code ecc. This will be used for metadata checksums. Add the structure to free spaces in the various metadata structures. Add the OCFS2_FEATURE_INCOMPAT_META_ECC bit. Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/ocfs2_fs.h | 55 ++++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 50 insertions(+), 5 deletions(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h index 359732e18e8..290fa26fba6 100644 --- a/fs/ocfs2/ocfs2_fs.h +++ b/fs/ocfs2/ocfs2_fs.h @@ -149,6 +149,9 @@ /* Support for extended attributes */ #define OCFS2_FEATURE_INCOMPAT_XATTR 0x0200 +/* Metadata checksum and error correction */ +#define OCFS2_FEATURE_INCOMPAT_META_ECC 0x0800 + /* * backup superblock flag is used to indicate that this volume * has backup superblocks. @@ -426,6 +429,22 @@ static unsigned char ocfs2_type_by_mode[S_IFMT >> S_SHIFT] = { */ #define OCFS2_RAW_SB(dinode) (&((dinode)->id2.i_super)) +/* + * Block checking structure. This is used in metadata to validate the + * contents. If OCFS2_FEATURE_INCOMPAT_META_ECC is not set, it is all + * zeros. + */ +struct ocfs2_block_check { +/*00*/ __le32 bc_crc32e; /* 802.3 Ethernet II CRC32 */ + __le16 bc_ecc; /* Single-error-correction parity vector. + This is a simple Hamming code dependant + on the blocksize. OCFS2's maximum + blocksize, 4K, requires 16 parity bits, + so we fit in __le16. */ + __le16 bc_reserved1; +/*08*/ +}; + /* * On disk extent record for OCFS2 * It describes a range of clusters on disk. @@ -513,7 +532,7 @@ struct ocfs2_truncate_log { struct ocfs2_extent_block { /*00*/ __u8 h_signature[8]; /* Signature for verification */ - __le64 h_reserved1; + struct ocfs2_block_check h_check; /* Error checking */ /*10*/ __le16 h_suballoc_slot; /* Slot suballocator this extent_header belongs to */ __le16 h_suballoc_bit; /* Bit offset in suballocator @@ -683,7 +702,8 @@ struct ocfs2_dinode { was set in i_flags */ __le16 i_dyn_features; __le64 i_xattr_loc; -/*80*/ __le64 i_reserved2[7]; +/*80*/ struct ocfs2_block_check i_check; /* Error checking */ +/*88*/ __le64 i_reserved2[6]; /*B8*/ union { __le64 i_pad1; /* Generic way to refer to this 64bit union */ @@ -750,7 +770,8 @@ struct ocfs2_group_desc /*20*/ __le64 bg_parent_dinode; /* dinode which owns me, in blocks */ __le64 bg_blkno; /* Offset on disk, in blocks */ -/*30*/ __le64 bg_reserved2[2]; +/*30*/ struct ocfs2_block_check bg_check; /* Error checking */ + __le64 bg_reserved2; /*40*/ __u8 bg_bitmap[0]; }; @@ -793,7 +814,12 @@ struct ocfs2_xattr_header { in this extent record, only valid in the first bucket. */ - __le64 xh_csum; + struct ocfs2_block_check xh_check; /* Error checking + (Note, this is only + used for xattr + buckets. A block uses + xb_check and sets + this field to zero.) */ struct ocfs2_xattr_entry xh_entries[0]; /* xattr entry list. */ }; @@ -844,7 +870,7 @@ struct ocfs2_xattr_block { block group */ __le32 xb_fs_generation; /* Must match super block */ /*10*/ __le64 xb_blkno; /* Offset on disk, in blocks */ - __le64 xb_csum; + struct ocfs2_block_check xb_check; /* Error checking */ /*20*/ __le16 xb_flags; /* Indicates whether this block contains real xattr or a xattr tree. */ __le16 xb_reserved0; @@ -988,6 +1014,25 @@ struct ocfs2_local_disk_dqblk { /*10*/ __le64 dqb_inodemod; /* Change in the amount of used inodes */ }; + +/* + * The quota trailer lives at the end of each quota block. + */ + +struct ocfs2_disk_dqtrailer { +/*00*/ struct ocfs2_block_check dq_check; /* Error checking */ +/*08*/ /* Cannot be larger than OCFS2_QBLK_RESERVED_SPACE */ +}; + +static inline struct ocfs2_disk_dqtrailer *ocfs2_block_dqtrailer(int blocksize, + void *buf) +{ + char *ptr = buf; + ptr += blocksize - OCFS2_QBLK_RESERVED_SPACE; + + return (struct ocfs2_disk_dqtrailer *)ptr; +} + #ifdef __KERNEL__ static inline int ocfs2_fast_symlink_chars(struct super_block *sb) { -- cgit v1.2.3-70-g09d2 From 70ad1ba7b48364d758a112df0823edc5ca6632aa Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Thu, 16 Oct 2008 17:54:25 -0700 Subject: ocfs2: Add the underlying blockcheck code. This is the code that computes crc32 and ecc for ocfs2 metadata blocks. There are high-level functions that check whether the filesystem has the ecc feature, mid-level functions that work on a single block or array of buffer_heads, and the low-level ecc hamming code that can handle multiple buffers like crc32_le(). It's not hooked up to the filesystem yet. Signed-off-by: Joel Becker Cc: Christoph Hellwig Signed-off-by: Mark Fasheh --- fs/ocfs2/Makefile | 1 + fs/ocfs2/blockcheck.c | 480 ++++++++++++++++++++++++++++++++++++++++++++++++++ fs/ocfs2/blockcheck.h | 82 +++++++++ fs/ocfs2/ocfs2.h | 8 + 4 files changed, 571 insertions(+) create mode 100644 fs/ocfs2/blockcheck.c create mode 100644 fs/ocfs2/blockcheck.h (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/Makefile b/fs/ocfs2/Makefile index 7e4b361b755..01596079dd6 100644 --- a/fs/ocfs2/Makefile +++ b/fs/ocfs2/Makefile @@ -12,6 +12,7 @@ obj-$(CONFIG_OCFS2_FS_USERSPACE_CLUSTER) += ocfs2_stack_user.o ocfs2-objs := \ alloc.o \ aops.o \ + blockcheck.o \ buffer_head_io.o \ dcache.o \ dir.o \ diff --git a/fs/ocfs2/blockcheck.c b/fs/ocfs2/blockcheck.c new file mode 100644 index 00000000000..2bf3d7f61ae --- /dev/null +++ b/fs/ocfs2/blockcheck.c @@ -0,0 +1,480 @@ +/* -*- mode: c; c-basic-offset: 8; -*- + * vim: noexpandtab sw=8 ts=8 sts=0: + * + * blockcheck.c + * + * Checksum and ECC codes for the OCFS2 userspace library. + * + * Copyright (C) 2006, 2008 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License, version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ + +#include +#include +#include +#include +#include +#include + +#include "ocfs2.h" + +#include "blockcheck.h" + + + +/* + * We use the following conventions: + * + * d = # data bits + * p = # parity bits + * c = # total code bits (d + p) + */ +static int calc_parity_bits(unsigned int d) +{ + unsigned int p; + + /* + * Bits required for Single Error Correction is as follows: + * + * d + p + 1 <= 2^p + * + * We're restricting ourselves to 31 bits of parity, that should be + * sufficient. + */ + for (p = 1; p < 32; p++) + { + if ((d + p + 1) <= (1 << p)) + return p; + } + + return 0; +} + +/* + * Calculate the bit offset in the hamming code buffer based on the bit's + * offset in the data buffer. Since the hamming code reserves all + * power-of-two bits for parity, the data bit number and the code bit + * number are offest by all the parity bits beforehand. + * + * Recall that bit numbers in hamming code are 1-based. This function + * takes the 0-based data bit from the caller. + * + * An example. Take bit 1 of the data buffer. 1 is a power of two (2^0), + * so it's a parity bit. 2 is a power of two (2^1), so it's a parity bit. + * 3 is not a power of two. So bit 1 of the data buffer ends up as bit 3 + * in the code buffer. + */ +static unsigned int calc_code_bit(unsigned int i) +{ + unsigned int b, p; + + /* + * Data bits are 0-based, but we're talking code bits, which + * are 1-based. + */ + b = i + 1; + + /* + * For every power of two below our bit number, bump our bit. + * + * We compare with (b + 1) becuase we have to compare with what b + * would be _if_ it were bumped up by the parity bit. Capice? + */ + for (p = 0; (1 << p) < (b + 1); p++) + b++; + + return b; +} + +/* + * This is the low level encoder function. It can be called across + * multiple hunks just like the crc32 code. 'd' is the number of bits + * _in_this_hunk_. nr is the bit offset of this hunk. So, if you had + * two 512B buffers, you would do it like so: + * + * parity = ocfs2_hamming_encode(0, buf1, 512 * 8, 0); + * parity = ocfs2_hamming_encode(parity, buf2, 512 * 8, 512 * 8); + * + * If you just have one buffer, use ocfs2_hamming_encode_block(). + */ +u32 ocfs2_hamming_encode(u32 parity, void *data, unsigned int d, unsigned int nr) +{ + unsigned int p = calc_parity_bits(nr + d); + unsigned int i, j, b; + + BUG_ON(!p); + + /* + * b is the hamming code bit number. Hamming code specifies a + * 1-based array, but C uses 0-based. So 'i' is for C, and 'b' is + * for the algorithm. + * + * The i++ in the for loop is so that the start offset passed + * to ocfs2_find_next_bit_set() is one greater than the previously + * found bit. + */ + for (i = 0; (i = ocfs2_find_next_bit(data, d, i)) < d; i++) + { + /* + * i is the offset in this hunk, nr + i is the total bit + * offset. + */ + b = calc_code_bit(nr + i); + + for (j = 0; j < p; j++) + { + /* + * Data bits in the resultant code are checked by + * parity bits that are part of the bit number + * representation. Huh? + * + * + * In other words, the parity bit at position 2^k + * checks bits in positions having bit k set in + * their binary representation. Conversely, for + * instance, bit 13, i.e. 1101(2), is checked by + * bits 1000(2) = 8, 0100(2)=4 and 0001(2) = 1. + * + * + * Note that 'k' is the _code_ bit number. 'b' in + * our loop. + */ + if (b & (1 << j)) + parity ^= (1 << j); + } + } + + /* While the data buffer was treated as little endian, the + * return value is in host endian. */ + return parity; +} + +u32 ocfs2_hamming_encode_block(void *data, unsigned int blocksize) +{ + return ocfs2_hamming_encode(0, data, blocksize * 8, 0); +} + +/* + * Like ocfs2_hamming_encode(), this can handle hunks. nr is the bit + * offset of the current hunk. If bit to be fixed is not part of the + * current hunk, this does nothing. + * + * If you only have one hunk, use ocfs2_hamming_fix_block(). + */ +void ocfs2_hamming_fix(void *data, unsigned int d, unsigned int nr, + unsigned int fix) +{ + unsigned int p = calc_parity_bits(nr + d); + unsigned int i, b; + + BUG_ON(!p); + + /* + * If the bit to fix has an hweight of 1, it's a parity bit. One + * busted parity bit is its own error. Nothing to do here. + */ + if (hweight32(fix) == 1) + return; + + /* + * nr + d is the bit right past the data hunk we're looking at. + * If fix after that, nothing to do + */ + if (fix >= calc_code_bit(nr + d)) + return; + + /* + * nr is the offset in the data hunk we're starting at. Let's + * start b at the offset in the code buffer. See hamming_encode() + * for a more detailed description of 'b'. + */ + b = calc_code_bit(nr); + /* If the fix is before this hunk, nothing to do */ + if (fix < b) + return; + + for (i = 0; i < d; i++, b++) + { + /* Skip past parity bits */ + while (hweight32(b) == 1) + b++; + + /* + * i is the offset in this data hunk. + * nr + i is the offset in the total data buffer. + * b is the offset in the total code buffer. + * + * Thus, when b == fix, bit i in the current hunk needs + * fixing. + */ + if (b == fix) + { + if (ocfs2_test_bit(i, data)) + ocfs2_clear_bit(i, data); + else + ocfs2_set_bit(i, data); + break; + } + } +} + +void ocfs2_hamming_fix_block(void *data, unsigned int blocksize, + unsigned int fix) +{ + ocfs2_hamming_fix(data, blocksize * 8, 0, fix); +} + +/* + * This function generates check information for a block. + * data is the block to be checked. bc is a pointer to the + * ocfs2_block_check structure describing the crc32 and the ecc. + * + * bc should be a pointer inside data, as the function will + * take care of zeroing it before calculating the check information. If + * bc does not point inside data, the caller must make sure any inline + * ocfs2_block_check structures are zeroed. + * + * The data buffer must be in on-disk endian (little endian for ocfs2). + * bc will be filled with little-endian values and will be ready to go to + * disk. + */ +void ocfs2_block_check_compute(void *data, size_t blocksize, + struct ocfs2_block_check *bc) +{ + u32 crc; + u32 ecc; + + memset(bc, 0, sizeof(struct ocfs2_block_check)); + + crc = crc32_le(~0, data, blocksize); + ecc = ocfs2_hamming_encode_block(data, blocksize); + + /* + * No ecc'd ocfs2 structure is larger than 4K, so ecc will be no + * larger than 16 bits. + */ + BUG_ON(ecc > USHORT_MAX); + + bc->bc_crc32e = cpu_to_le32(crc); + bc->bc_ecc = cpu_to_le16((u16)ecc); +} + +/* + * This function validates existing check information. Like _compute, + * the function will take care of zeroing bc before calculating check codes. + * If bc is not a pointer inside data, the caller must have zeroed any + * inline ocfs2_block_check structures. + * + * Again, the data passed in should be the on-disk endian. + */ +int ocfs2_block_check_validate(void *data, size_t blocksize, + struct ocfs2_block_check *bc) +{ + int rc = 0; + struct ocfs2_block_check check; + u32 crc, ecc; + + check.bc_crc32e = le32_to_cpu(bc->bc_crc32e); + check.bc_ecc = le16_to_cpu(bc->bc_ecc); + + memset(bc, 0, sizeof(struct ocfs2_block_check)); + + /* Fast path - if the crc32 validates, we're good to go */ + crc = crc32_le(~0, data, blocksize); + if (crc == check.bc_crc32e) + goto out; + + /* Ok, try ECC fixups */ + ecc = ocfs2_hamming_encode_block(data, blocksize); + ocfs2_hamming_fix_block(data, blocksize, ecc ^ check.bc_ecc); + + /* And check the crc32 again */ + crc = crc32_le(~0, data, blocksize); + if (crc == check.bc_crc32e) + goto out; + + rc = -EIO; + +out: + bc->bc_crc32e = cpu_to_le32(check.bc_crc32e); + bc->bc_ecc = cpu_to_le16(check.bc_ecc); + + return rc; +} + +/* + * This function generates check information for a list of buffer_heads. + * bhs is the blocks to be checked. bc is a pointer to the + * ocfs2_block_check structure describing the crc32 and the ecc. + * + * bc should be a pointer inside data, as the function will + * take care of zeroing it before calculating the check information. If + * bc does not point inside data, the caller must make sure any inline + * ocfs2_block_check structures are zeroed. + * + * The data buffer must be in on-disk endian (little endian for ocfs2). + * bc will be filled with little-endian values and will be ready to go to + * disk. + */ +void ocfs2_block_check_compute_bhs(struct buffer_head **bhs, int nr, + struct ocfs2_block_check *bc) +{ + int i; + u32 crc, ecc; + + BUG_ON(nr < 0); + + if (!nr) + return; + + memset(bc, 0, sizeof(struct ocfs2_block_check)); + + for (i = 0, crc = ~0, ecc = 0; i < nr; i++) { + crc = crc32_le(crc, bhs[i]->b_data, bhs[i]->b_size); + /* + * The number of bits in a buffer is obviously b_size*8. + * The offset of this buffer is b_size*i, so the bit offset + * of this buffer is b_size*8*i. + */ + ecc = (u16)ocfs2_hamming_encode(ecc, bhs[i]->b_data, + bhs[i]->b_size * 8, + bhs[i]->b_size * 8 * i); + } + + /* + * No ecc'd ocfs2 structure is larger than 4K, so ecc will be no + * larger than 16 bits. + */ + BUG_ON(ecc > USHORT_MAX); + + bc->bc_crc32e = cpu_to_le32(crc); + bc->bc_ecc = cpu_to_le16((u16)ecc); +} + +/* + * This function validates existing check information on a list of + * buffer_heads. Like _compute_bhs, the function will take care of + * zeroing bc before calculating check codes. If bc is not a pointer + * inside data, the caller must have zeroed any inline + * ocfs2_block_check structures. + * + * Again, the data passed in should be the on-disk endian. + */ +int ocfs2_block_check_validate_bhs(struct buffer_head **bhs, int nr, + struct ocfs2_block_check *bc) +{ + int i, rc = 0; + struct ocfs2_block_check check; + u32 crc, ecc, fix; + + BUG_ON(nr < 0); + + if (!nr) + return 0; + + check.bc_crc32e = le32_to_cpu(bc->bc_crc32e); + check.bc_ecc = le16_to_cpu(bc->bc_ecc); + + memset(bc, 0, sizeof(struct ocfs2_block_check)); + + /* Fast path - if the crc32 validates, we're good to go */ + for (i = 0, crc = ~0; i < nr; i++) + crc = crc32_le(crc, bhs[i]->b_data, bhs[i]->b_size); + if (crc == check.bc_crc32e) + goto out; + + mlog(ML_ERROR, + "CRC32 failed: stored: %u, computed %u. Applying ECC.\n", + (unsigned int)check.bc_crc32e, (unsigned int)crc); + + /* Ok, try ECC fixups */ + for (i = 0, ecc = 0; i < nr; i++) { + /* + * The number of bits in a buffer is obviously b_size*8. + * The offset of this buffer is b_size*i, so the bit offset + * of this buffer is b_size*8*i. + */ + ecc = (u16)ocfs2_hamming_encode(ecc, bhs[i]->b_data, + bhs[i]->b_size * 8, + bhs[i]->b_size * 8 * i); + } + fix = ecc ^ check.bc_ecc; + for (i = 0; i < nr; i++) { + /* + * Try the fix against each buffer. It will only affect + * one of them. + */ + ocfs2_hamming_fix(bhs[i]->b_data, bhs[i]->b_size * 8, + bhs[i]->b_size * 8 * i, fix); + } + + /* And check the crc32 again */ + for (i = 0, crc = ~0; i < nr; i++) + crc = crc32_le(crc, bhs[i]->b_data, bhs[i]->b_size); + if (crc == check.bc_crc32e) + goto out; + + mlog(ML_ERROR, "Fixed CRC32 failed: stored: %u, computed %u\n", + (unsigned int)check.bc_crc32e, (unsigned int)crc); + + rc = -EIO; + +out: + bc->bc_crc32e = cpu_to_le32(check.bc_crc32e); + bc->bc_ecc = cpu_to_le16(check.bc_ecc); + + return rc; +} + +/* + * These are the main API. They check the superblock flag before + * calling the underlying operations. + * + * They expect the buffer(s) to be in disk format. + */ +void ocfs2_compute_meta_ecc(struct super_block *sb, void *data, + struct ocfs2_block_check *bc) +{ + if (ocfs2_meta_ecc(OCFS2_SB(sb))) + ocfs2_block_check_compute(data, sb->s_blocksize, bc); +} + +int ocfs2_validate_meta_ecc(struct super_block *sb, void *data, + struct ocfs2_block_check *bc) +{ + int rc = 0; + + if (ocfs2_meta_ecc(OCFS2_SB(sb))) + rc = ocfs2_block_check_validate(data, sb->s_blocksize, bc); + + return rc; +} + +void ocfs2_compute_meta_ecc_bhs(struct super_block *sb, + struct buffer_head **bhs, int nr, + struct ocfs2_block_check *bc) +{ + if (ocfs2_meta_ecc(OCFS2_SB(sb))) + ocfs2_block_check_compute_bhs(bhs, nr, bc); +} + +int ocfs2_validate_meta_ecc_bhs(struct super_block *sb, + struct buffer_head **bhs, int nr, + struct ocfs2_block_check *bc) +{ + int rc = 0; + + if (ocfs2_meta_ecc(OCFS2_SB(sb))) + rc = ocfs2_block_check_validate_bhs(bhs, nr, bc); + + return rc; +} + diff --git a/fs/ocfs2/blockcheck.h b/fs/ocfs2/blockcheck.h new file mode 100644 index 00000000000..70ec3feda32 --- /dev/null +++ b/fs/ocfs2/blockcheck.h @@ -0,0 +1,82 @@ +/* -*- mode: c; c-basic-offset: 8; -*- + * vim: noexpandtab sw=8 ts=8 sts=0: + * + * blockcheck.h + * + * Checksum and ECC codes for the OCFS2 userspace library. + * + * Copyright (C) 2004, 2008 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License, version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ + +#ifndef OCFS2_BLOCKCHECK_H +#define OCFS2_BLOCKCHECK_H + + +/* High level block API */ +void ocfs2_compute_meta_ecc(struct super_block *sb, void *data, + struct ocfs2_block_check *bc); +int ocfs2_validate_meta_ecc(struct super_block *sb, void *data, + struct ocfs2_block_check *bc); +void ocfs2_compute_meta_ecc_bhs(struct super_block *sb, + struct buffer_head **bhs, int nr, + struct ocfs2_block_check *bc); +int ocfs2_validate_meta_ecc_bhs(struct super_block *sb, + struct buffer_head **bhs, int nr, + struct ocfs2_block_check *bc); + +/* Lower level API */ +void ocfs2_block_check_compute(void *data, size_t blocksize, + struct ocfs2_block_check *bc); +int ocfs2_block_check_validate(void *data, size_t blocksize, + struct ocfs2_block_check *bc); +void ocfs2_block_check_compute_bhs(struct buffer_head **bhs, int nr, + struct ocfs2_block_check *bc); +int ocfs2_block_check_validate_bhs(struct buffer_head **bhs, int nr, + struct ocfs2_block_check *bc); + +/* + * Hamming code functions + */ + +/* + * Encoding hamming code parity bits for a buffer. + * + * This is the low level encoder function. It can be called across + * multiple hunks just like the crc32 code. 'd' is the number of bits + * _in_this_hunk_. nr is the bit offset of this hunk. So, if you had + * two 512B buffers, you would do it like so: + * + * parity = ocfs2_hamming_encode(0, buf1, 512 * 8, 0); + * parity = ocfs2_hamming_encode(parity, buf2, 512 * 8, 512 * 8); + * + * If you just have one buffer, use ocfs2_hamming_encode_block(). + */ +u32 ocfs2_hamming_encode(u32 parity, void *data, unsigned int d, + unsigned int nr); +/* + * Fix a buffer with a bit error. The 'fix' is the original parity + * xor'd with the parity calculated now. + * + * Like ocfs2_hamming_encode(), this can handle hunks. nr is the bit + * offset of the current hunk. If bit to be fixed is not part of the + * current hunk, this does nothing. + * + * If you only have one buffer, use ocfs2_hamming_fix_block(). + */ +void ocfs2_hamming_fix(void *data, unsigned int d, unsigned int nr, + unsigned int fix); + +/* Convenience wrappers for a single buffer of data */ +extern u32 ocfs2_hamming_encode_block(void *data, unsigned int blocksize); +extern void ocfs2_hamming_fix_block(void *data, unsigned int blocksize, + unsigned int fix); +#endif diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index 5c777988042..2bb389fe739 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h @@ -382,6 +382,13 @@ static inline int ocfs2_supports_xattr(struct ocfs2_super *osb) return 0; } +static inline int ocfs2_meta_ecc(struct ocfs2_super *osb) +{ + if (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_META_ECC) + return 1; + return 0; +} + /* set / clear functions because cluster events can make these happen * in parallel so we want the transitions to be atomic. this also * means that any future flags osb_flags must be protected by spinlock @@ -615,5 +622,6 @@ static inline s16 ocfs2_get_inode_steal_slot(struct ocfs2_super *osb) #define ocfs2_clear_bit ext2_clear_bit #define ocfs2_test_bit ext2_test_bit #define ocfs2_find_next_zero_bit ext2_find_next_zero_bit +#define ocfs2_find_next_bit ext2_find_next_bit #endif /* OCFS2_H */ -- cgit v1.2.3-70-g09d2 From 684ef278377725d505aa23259ee673dab9b11851 Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Tue, 2 Dec 2008 17:44:05 -0800 Subject: ocfs2: Add a validation hook for quota block reads. Add a currently-returns-success hook for quota block reads. We'll be adding checks to this. Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/quota_global.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c index a10faebe88a..7dbcfd7f65e 100644 --- a/fs/ocfs2/quota_global.c +++ b/fs/ocfs2/quota_global.c @@ -87,13 +87,25 @@ struct qtree_fmt_operations ocfs2_global_ops = { .is_id = ocfs2_global_is_id, }; +static int ocfs2_validate_quota_block(struct super_block *sb, + struct buffer_head *bh) +{ + struct ocfs2_disk_dqtrailer *dqt = ocfs2_dq_trailer(sb, bh->b_data); + + mlog(0, "Validating quota block %llu\n", + (unsigned long long)bh->b_blocknr); + + return 0; +} + int ocfs2_read_quota_block(struct inode *inode, u64 v_block, struct buffer_head **bh) { int rc = 0; struct buffer_head *tmp = *bh; - rc = ocfs2_read_virt_blocks(inode, v_block, 1, &tmp, 0, NULL); + rc = ocfs2_read_virt_blocks(inode, v_block, 1, &tmp, 0, + ocfs2_validate_quota_block); if (rc) mlog_errno(rc); -- cgit v1.2.3-70-g09d2 From d6b32bbb3eae3fb787f1c33bf9f767ca1ddeb208 Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Fri, 17 Oct 2008 14:55:01 -0700 Subject: ocfs2: block read meta ecc. Add block check calls to the read_block validate functions. This is the almost all of the read-side checking of metaecc. xattr buckets are not checked yet. Writes are also unchecked, and so a read-write mount will quickly fail. Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/alloc.c | 17 +++++++++++++++++ fs/ocfs2/blockcheck.c | 9 +++++++++ fs/ocfs2/inode.c | 18 +++++++++++++++++- fs/ocfs2/quota_global.c | 13 +++++++++++-- fs/ocfs2/suballoc.c | 31 ++++++++++++++++++++++++++++++- fs/ocfs2/xattr.c | 17 +++++++++++++++++ 6 files changed, 101 insertions(+), 4 deletions(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 84a7bd4db5d..6b27f74bb34 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -37,6 +37,7 @@ #include "alloc.h" #include "aops.h" +#include "blockcheck.h" #include "dlmglue.h" #include "extent_map.h" #include "inode.h" @@ -682,12 +683,28 @@ struct ocfs2_merge_ctxt { static int ocfs2_validate_extent_block(struct super_block *sb, struct buffer_head *bh) { + int rc; struct ocfs2_extent_block *eb = (struct ocfs2_extent_block *)bh->b_data; mlog(0, "Validating extent block %llu\n", (unsigned long long)bh->b_blocknr); + BUG_ON(!buffer_uptodate(bh)); + + /* + * If the ecc fails, we return the error but otherwise + * leave the filesystem running. We know any error is + * local to this block. + */ + rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &eb->h_check); + if (rc) + return rc; + + /* + * Errors after here are fatal. + */ + if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) { ocfs2_error(sb, "Extent block #%llu has bad signature %.*s", diff --git a/fs/ocfs2/blockcheck.c b/fs/ocfs2/blockcheck.c index 2bf3d7f61ae..2ce6ae5e4b8 100644 --- a/fs/ocfs2/blockcheck.c +++ b/fs/ocfs2/blockcheck.c @@ -24,6 +24,8 @@ #include #include +#include + #include "ocfs2.h" #include "blockcheck.h" @@ -292,6 +294,10 @@ int ocfs2_block_check_validate(void *data, size_t blocksize, if (crc == check.bc_crc32e) goto out; + mlog(ML_ERROR, + "CRC32 failed: stored: %u, computed %u. Applying ECC.\n", + (unsigned int)check.bc_crc32e, (unsigned int)crc); + /* Ok, try ECC fixups */ ecc = ocfs2_hamming_encode_block(data, blocksize); ocfs2_hamming_fix_block(data, blocksize, ecc ^ check.bc_ecc); @@ -301,6 +307,9 @@ int ocfs2_block_check_validate(void *data, size_t blocksize, if (crc == check.bc_crc32e) goto out; + mlog(ML_ERROR, "Fixed CRC32 failed: stored: %u, computed %u\n", + (unsigned int)check.bc_crc32e, (unsigned int)crc); + rc = -EIO; out: diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index 288512c9dbc..9370b652ab9 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c @@ -38,6 +38,7 @@ #include "ocfs2.h" #include "alloc.h" +#include "blockcheck.h" #include "dlmglue.h" #include "extent_map.h" #include "file.h" @@ -1262,7 +1263,7 @@ void ocfs2_refresh_inode(struct inode *inode, int ocfs2_validate_inode_block(struct super_block *sb, struct buffer_head *bh) { - int rc = -EINVAL; + int rc; struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data; mlog(0, "Validating dinode %llu\n", @@ -1270,6 +1271,21 @@ int ocfs2_validate_inode_block(struct super_block *sb, BUG_ON(!buffer_uptodate(bh)); + /* + * If the ecc fails, we return the error but otherwise + * leave the filesystem running. We know any error is + * local to this block. + */ + rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &di->i_check); + if (rc) + goto bail; + + /* + * Errors after here are fatal. + */ + + rc = -EINVAL; + if (!OCFS2_IS_VALID_DINODE(di)) { ocfs2_error(sb, "Invalid dinode #%llu: signature = %.*s\n", (unsigned long long)bh->b_blocknr, 7, diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c index 7dbcfd7f65e..a0b8b14cca8 100644 --- a/fs/ocfs2/quota_global.c +++ b/fs/ocfs2/quota_global.c @@ -16,6 +16,7 @@ #include "ocfs2_fs.h" #include "ocfs2.h" #include "alloc.h" +#include "blockcheck.h" #include "inode.h" #include "journal.h" #include "file.h" @@ -90,12 +91,20 @@ struct qtree_fmt_operations ocfs2_global_ops = { static int ocfs2_validate_quota_block(struct super_block *sb, struct buffer_head *bh) { - struct ocfs2_disk_dqtrailer *dqt = ocfs2_dq_trailer(sb, bh->b_data); + struct ocfs2_disk_dqtrailer *dqt = + ocfs2_block_dqtrailer(sb->s_blocksize, bh->b_data); mlog(0, "Validating quota block %llu\n", (unsigned long long)bh->b_blocknr); - return 0; + BUG_ON(!buffer_uptodate(bh)); + + /* + * If the ecc fails, we return the error but otherwise + * leave the filesystem running. We know any error is + * local to this block. + */ + return ocfs2_validate_meta_ecc(sb, bh->b_data, &dqt->dq_check); } int ocfs2_read_quota_block(struct inode *inode, u64 v_block, diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index 226fe21f260..78755766c32 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c @@ -35,6 +35,7 @@ #include "ocfs2.h" #include "alloc.h" +#include "blockcheck.h" #include "dlmglue.h" #include "inode.h" #include "journal.h" @@ -250,8 +251,18 @@ int ocfs2_check_group_descriptor(struct super_block *sb, struct buffer_head *bh) { int rc; + struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *)bh->b_data; + + BUG_ON(!buffer_uptodate(bh)); - rc = ocfs2_validate_gd_self(sb, bh, 1); + /* + * If the ecc fails, we return the error but otherwise + * leave the filesystem running. We know any error is + * local to this block. + */ + rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &gd->bg_check); + if (!rc) + rc = ocfs2_validate_gd_self(sb, bh, 1); if (!rc) rc = ocfs2_validate_gd_parent(sb, di, bh, 1); @@ -261,9 +272,27 @@ int ocfs2_check_group_descriptor(struct super_block *sb, static int ocfs2_validate_group_descriptor(struct super_block *sb, struct buffer_head *bh) { + int rc; + struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *)bh->b_data; + mlog(0, "Validating group descriptor %llu\n", (unsigned long long)bh->b_blocknr); + BUG_ON(!buffer_uptodate(bh)); + + /* + * If the ecc fails, we return the error but otherwise + * leave the filesystem running. We know any error is + * local to this block. + */ + rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &gd->bg_check); + if (rc) + return rc; + + /* + * Errors after here are fatal. + */ + return ocfs2_validate_gd_self(sb, bh, 0); } diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index dfc51c305bb..bc822d6ba54 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -42,6 +42,7 @@ #include "ocfs2.h" #include "alloc.h" +#include "blockcheck.h" #include "dlmglue.h" #include "file.h" #include "symlink.h" @@ -322,12 +323,28 @@ static void ocfs2_xattr_bucket_copy_data(struct ocfs2_xattr_bucket *dest, static int ocfs2_validate_xattr_block(struct super_block *sb, struct buffer_head *bh) { + int rc; struct ocfs2_xattr_block *xb = (struct ocfs2_xattr_block *)bh->b_data; mlog(0, "Validating xattr block %llu\n", (unsigned long long)bh->b_blocknr); + BUG_ON(!buffer_uptodate(bh)); + + /* + * If the ecc fails, we return the error but otherwise + * leave the filesystem running. We know any error is + * local to this block. + */ + rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &xb->xb_check); + if (rc) + return rc; + + /* + * Errors after here are fatal + */ + if (!OCFS2_IS_VALID_XATTR_BLOCK(xb)) { ocfs2_error(sb, "Extended attribute block #%llu has bad " -- cgit v1.2.3-70-g09d2 From 50655ae9e91d272d48997bada59efe166aa5e343 Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Thu, 11 Sep 2008 15:53:07 -0700 Subject: ocfs2: Add journal_access functions with jbd2 triggers. We create wrappers for ocfs2_journal_access() that are specific to the type of metadata block. This allows us to associate jbd2 commit triggers with the block. The triggers will compute metadata ecc in a future commit. Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/journal.c | 159 +++++++++++++++++++++++++++++++++++++++++++++++++++-- fs/ocfs2/journal.h | 31 +++++++++-- 2 files changed, 181 insertions(+), 9 deletions(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index 302f1144a70..2daa5848faf 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -35,6 +35,7 @@ #include "ocfs2.h" #include "alloc.h" +#include "blockcheck.h" #include "dir.h" #include "dlmglue.h" #include "extent_map.h" @@ -369,10 +370,110 @@ bail: return status; } -int ocfs2_journal_access(handle_t *handle, - struct inode *inode, - struct buffer_head *bh, - int type) +struct ocfs2_triggers { + struct jbd2_buffer_trigger_type ot_triggers; + int ot_offset; +}; + +static inline struct ocfs2_triggers *to_ocfs2_trigger(struct jbd2_buffer_trigger_type *triggers) +{ + return container_of(triggers, struct ocfs2_triggers, ot_triggers); +} + +static void ocfs2_commit_trigger(struct jbd2_buffer_trigger_type *triggers, + struct buffer_head *bh, + void *data, size_t size) +{ + struct ocfs2_triggers *ot = to_ocfs2_trigger(triggers); + + /* + * We aren't guaranteed to have the superblock here, so we + * must unconditionally compute the ecc data. + * __ocfs2_journal_access() will only set the triggers if + * metaecc is enabled. + */ + ocfs2_block_check_compute(data, size, data + ot->ot_offset); +} + +/* + * Quota blocks have their own trigger because the struct ocfs2_block_check + * offset depends on the blocksize. + */ +static void ocfs2_dq_commit_trigger(struct jbd2_buffer_trigger_type *triggers, + struct buffer_head *bh, + void *data, size_t size) +{ + struct ocfs2_disk_dqtrailer *dqt = + ocfs2_block_dqtrailer(size, data); + + /* + * We aren't guaranteed to have the superblock here, so we + * must unconditionally compute the ecc data. + * __ocfs2_journal_access() will only set the triggers if + * metaecc is enabled. + */ + ocfs2_block_check_compute(data, size, &dqt->dq_check); +} + +static void ocfs2_abort_trigger(struct jbd2_buffer_trigger_type *triggers, + struct buffer_head *bh) +{ + mlog(ML_ERROR, + "ocfs2_abort_trigger called by JBD2. bh = 0x%lx, " + "bh->b_blocknr = %llu\n", + (unsigned long)bh, + (unsigned long long)bh->b_blocknr); + + /* We aren't guaranteed to have the superblock here - but if we + * don't, it'll just crash. */ + ocfs2_error(bh->b_assoc_map->host->i_sb, + "JBD2 has aborted our journal, ocfs2 cannot continue\n"); +} + +static struct ocfs2_triggers di_triggers = { + .ot_triggers = { + .t_commit = ocfs2_commit_trigger, + .t_abort = ocfs2_abort_trigger, + }, + .ot_offset = offsetof(struct ocfs2_dinode, i_check), +}; + +static struct ocfs2_triggers eb_triggers = { + .ot_triggers = { + .t_commit = ocfs2_commit_trigger, + .t_abort = ocfs2_abort_trigger, + }, + .ot_offset = offsetof(struct ocfs2_extent_block, h_check), +}; + +static struct ocfs2_triggers gd_triggers = { + .ot_triggers = { + .t_commit = ocfs2_commit_trigger, + .t_abort = ocfs2_abort_trigger, + }, + .ot_offset = offsetof(struct ocfs2_group_desc, bg_check), +}; + +static struct ocfs2_triggers xb_triggers = { + .ot_triggers = { + .t_commit = ocfs2_commit_trigger, + .t_abort = ocfs2_abort_trigger, + }, + .ot_offset = offsetof(struct ocfs2_xattr_block, xb_check), +}; + +static struct ocfs2_triggers dq_triggers = { + .ot_triggers = { + .t_commit = ocfs2_dq_commit_trigger, + .t_abort = ocfs2_abort_trigger, + }, +}; + +static int __ocfs2_journal_access(handle_t *handle, + struct inode *inode, + struct buffer_head *bh, + struct ocfs2_triggers *triggers, + int type) { int status; @@ -418,6 +519,8 @@ int ocfs2_journal_access(handle_t *handle, status = -EINVAL; mlog(ML_ERROR, "Uknown access type!\n"); } + if (!status && ocfs2_meta_ecc(OCFS2_SB(inode->i_sb)) && triggers) + jbd2_journal_set_triggers(bh, &triggers->ot_triggers); mutex_unlock(&OCFS2_I(inode)->ip_io_mutex); if (status < 0) @@ -428,6 +531,54 @@ int ocfs2_journal_access(handle_t *handle, return status; } +int ocfs2_journal_access_di(handle_t *handle, struct inode *inode, + struct buffer_head *bh, int type) +{ + return __ocfs2_journal_access(handle, inode, bh, &di_triggers, + type); +} + +int ocfs2_journal_access_eb(handle_t *handle, struct inode *inode, + struct buffer_head *bh, int type) +{ + return __ocfs2_journal_access(handle, inode, bh, &eb_triggers, + type); +} + +int ocfs2_journal_access_gd(handle_t *handle, struct inode *inode, + struct buffer_head *bh, int type) +{ + return __ocfs2_journal_access(handle, inode, bh, &gd_triggers, + type); +} + +int ocfs2_journal_access_db(handle_t *handle, struct inode *inode, + struct buffer_head *bh, int type) +{ + /* Right now, nothing for dirblocks */ + return __ocfs2_journal_access(handle, inode, bh, NULL, type); +} + +int ocfs2_journal_access_xb(handle_t *handle, struct inode *inode, + struct buffer_head *bh, int type) +{ + return __ocfs2_journal_access(handle, inode, bh, &xb_triggers, + type); +} + +int ocfs2_journal_access_dq(handle_t *handle, struct inode *inode, + struct buffer_head *bh, int type) +{ + return __ocfs2_journal_access(handle, inode, bh, &dq_triggers, + type); +} + +int ocfs2_journal_access(handle_t *handle, struct inode *inode, + struct buffer_head *bh, int type) +{ + return __ocfs2_journal_access(handle, inode, bh, NULL, type); +} + int ocfs2_journal_dirty(handle_t *handle, struct buffer_head *bh) { diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h index 37013bf9ce2..bca370dab02 100644 --- a/fs/ocfs2/journal.h +++ b/fs/ocfs2/journal.h @@ -212,9 +212,12 @@ static inline void ocfs2_checkpoint_inode(struct inode *inode) * ocfs2_extend_trans - Extend a handle by nblocks credits. This may * commit the handle to disk in the process, but will * not release any locks taken during the transaction. - * ocfs2_journal_access - Notify the handle that we want to journal this + * ocfs2_journal_access* - Notify the handle that we want to journal this * buffer. Will have to call ocfs2_journal_dirty once * we've actually dirtied it. Type is one of . or . + * Always call the specific flavor of + * ocfs2_journal_access_*() unless you intend to + * manage the checksum by hand. * ocfs2_journal_dirty - Mark a journalled buffer as having dirty data. * ocfs2_jbd2_file_inode - Mark an inode so that its data goes out before * the current handle commits. @@ -244,10 +247,28 @@ int ocfs2_extend_trans(handle_t *handle, int nblocks); #define OCFS2_JOURNAL_ACCESS_WRITE 1 #define OCFS2_JOURNAL_ACCESS_UNDO 2 -int ocfs2_journal_access(handle_t *handle, - struct inode *inode, - struct buffer_head *bh, - int type); +/* ocfs2_inode */ +int ocfs2_journal_access_di(handle_t *handle, struct inode *inode, + struct buffer_head *bh, int type); +/* ocfs2_extent_block */ +int ocfs2_journal_access_eb(handle_t *handle, struct inode *inode, + struct buffer_head *bh, int type); +/* ocfs2_group_desc */ +int ocfs2_journal_access_gd(handle_t *handle, struct inode *inode, + struct buffer_head *bh, int type); +/* ocfs2_xattr_block */ +int ocfs2_journal_access_xb(handle_t *handle, struct inode *inode, + struct buffer_head *bh, int type); +/* quota blocks */ +int ocfs2_journal_access_dq(handle_t *handle, struct inode *inode, + struct buffer_head *bh, int type); +/* dirblock */ +int ocfs2_journal_access_db(handle_t *handle, struct inode *inode, + struct buffer_head *bh, int type); +/* Anything that has no ecc */ +int ocfs2_journal_access(handle_t *handle, struct inode *inode, + struct buffer_head *bh, int type); + /* * A word about the journal_access/journal_dirty "dance". It is * entirely legal to journal_access a buffer more than once (as long -- cgit v1.2.3-70-g09d2 From ffdd7a54631f07918b75e324d86713a08c11ec06 Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Fri, 17 Oct 2008 22:32:01 -0700 Subject: ocfs2: Wrap up the common use cases of ocfs2_new_path(). The majority of ocfs2_new_path() calls are: ocfs2_new_path(path_root_bh(otherpath), path_root_el(otherpath)); Let's call that ocfs2_new_path_from_path(). The rest do similar things from struct ocfs2_extent_tree. Let's call those ocfs2_new_path_from_et(). This will make the next change easier. Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/alloc.c | 48 ++++++++++++++++++++++++------------------------ 1 file changed, 24 insertions(+), 24 deletions(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 6b27f74bb34..c22ff49b5e3 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -532,6 +532,16 @@ static struct ocfs2_path *ocfs2_new_path(struct buffer_head *root_bh, return path; } +static struct ocfs2_path *ocfs2_new_path_from_path(struct ocfs2_path *path) +{ + return ocfs2_new_path(path_root_bh(path), path_root_el(path)); +} + +static struct ocfs2_path *ocfs2_new_path_from_et(struct ocfs2_extent_tree *et) +{ + return ocfs2_new_path(et->et_root_bh, et->et_root_el); +} + /* * Convenience function to journal all components in a path. */ @@ -2150,8 +2160,7 @@ static int ocfs2_rotate_tree_right(struct inode *inode, *ret_left_path = NULL; - left_path = ocfs2_new_path(path_root_bh(right_path), - path_root_el(right_path)); + left_path = ocfs2_new_path_from_path(right_path); if (!left_path) { ret = -ENOMEM; mlog_errno(ret); @@ -2692,8 +2701,7 @@ static int __ocfs2_rotate_tree_left(struct inode *inode, goto out; } - left_path = ocfs2_new_path(path_root_bh(path), - path_root_el(path)); + left_path = ocfs2_new_path_from_path(path); if (!left_path) { ret = -ENOMEM; mlog_errno(ret); @@ -2702,8 +2710,7 @@ static int __ocfs2_rotate_tree_left(struct inode *inode, ocfs2_cp_path(left_path, path); - right_path = ocfs2_new_path(path_root_bh(path), - path_root_el(path)); + right_path = ocfs2_new_path_from_path(path); if (!right_path) { ret = -ENOMEM; mlog_errno(ret); @@ -2833,8 +2840,7 @@ static int ocfs2_remove_rightmost_path(struct inode *inode, handle_t *handle, * We have a path to the left of this one - it needs * an update too. */ - left_path = ocfs2_new_path(path_root_bh(path), - path_root_el(path)); + left_path = ocfs2_new_path_from_path(path); if (!left_path) { ret = -ENOMEM; mlog_errno(ret); @@ -3075,8 +3081,7 @@ static int ocfs2_get_right_path(struct inode *inode, /* This function shouldn't be called for the rightmost leaf. */ BUG_ON(right_cpos == 0); - right_path = ocfs2_new_path(path_root_bh(left_path), - path_root_el(left_path)); + right_path = ocfs2_new_path_from_path(left_path); if (!right_path) { ret = -ENOMEM; mlog_errno(ret); @@ -3247,8 +3252,7 @@ static int ocfs2_get_left_path(struct inode *inode, /* This function shouldn't be called for the leftmost leaf. */ BUG_ON(left_cpos == 0); - left_path = ocfs2_new_path(path_root_bh(right_path), - path_root_el(right_path)); + left_path = ocfs2_new_path_from_path(right_path); if (!left_path) { ret = -ENOMEM; mlog_errno(ret); @@ -3780,8 +3784,7 @@ static int ocfs2_append_rec_to_path(struct inode *inode, handle_t *handle, * leftmost leaf. */ if (left_cpos) { - left_path = ocfs2_new_path(path_root_bh(right_path), - path_root_el(right_path)); + left_path = ocfs2_new_path_from_path(right_path); if (!left_path) { ret = -ENOMEM; mlog_errno(ret); @@ -4018,7 +4021,7 @@ static int ocfs2_do_insert_extent(struct inode *inode, goto out_update_clusters; } - right_path = ocfs2_new_path(et->et_root_bh, et->et_root_el); + right_path = ocfs2_new_path_from_et(et); if (!right_path) { ret = -ENOMEM; mlog_errno(ret); @@ -4130,8 +4133,7 @@ ocfs2_figure_merge_contig_type(struct inode *inode, struct ocfs2_path *path, goto out; if (left_cpos != 0) { - left_path = ocfs2_new_path(path_root_bh(path), - path_root_el(path)); + left_path = ocfs2_new_path_from_path(path); if (!left_path) goto out; @@ -4187,8 +4189,7 @@ ocfs2_figure_merge_contig_type(struct inode *inode, struct ocfs2_path *path, if (right_cpos == 0) goto out; - right_path = ocfs2_new_path(path_root_bh(path), - path_root_el(path)); + right_path = ocfs2_new_path_from_path(path); if (!right_path) goto out; @@ -4381,7 +4382,7 @@ static int ocfs2_figure_insert_type(struct inode *inode, return 0; } - path = ocfs2_new_path(et->et_root_bh, et->et_root_el); + path = ocfs2_new_path_from_et(et); if (!path) { ret = -ENOMEM; mlog_errno(ret); @@ -4910,7 +4911,7 @@ int ocfs2_mark_extent_written(struct inode *inode, if (et->et_ops == &ocfs2_dinode_et_ops) ocfs2_extent_map_trunc(inode, 0); - left_path = ocfs2_new_path(et->et_root_bh, et->et_root_el); + left_path = ocfs2_new_path_from_et(et); if (!left_path) { ret = -ENOMEM; mlog_errno(ret); @@ -5082,8 +5083,7 @@ static int ocfs2_truncate_rec(struct inode *inode, handle_t *handle, } if (left_cpos && le16_to_cpu(el->l_next_free_rec) > 1) { - left_path = ocfs2_new_path(path_root_bh(path), - path_root_el(path)); + left_path = ocfs2_new_path_from_path(path); if (!left_path) { ret = -ENOMEM; mlog_errno(ret); @@ -5192,7 +5192,7 @@ int ocfs2_remove_extent(struct inode *inode, ocfs2_extent_map_trunc(inode, 0); - path = ocfs2_new_path(et->et_root_bh, et->et_root_el); + path = ocfs2_new_path_from_et(et); if (!path) { ret = -ENOMEM; mlog_errno(ret); -- cgit v1.2.3-70-g09d2 From 13723d00e374c2a6d6ccb5af6de965e89c3e1b01 Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Fri, 17 Oct 2008 19:25:01 -0700 Subject: ocfs2: Use metadata-specific ocfs2_journal_access_*() functions. The per-metadata-type ocfs2_journal_access_*() functions hook up jbd2 commit triggers and allow us to compute metadata ecc right before the buffers are written out. This commit provides ecc for inodes, extent blocks, group descriptors, and quota blocks. It is not safe to use extened attributes and metaecc at the same time yet. The ocfs2_extent_tree and ocfs2_path abstractions in alloc.c both hide the type of block at their root. Before, it didn't matter, but now the root block must use the appropriate ocfs2_journal_access_*() function. To keep this abstract, the structures now have a pointer to the matching journal_access function and a wrapper call to call it. A few places use naked ocfs2_write_block() calls instead of adding the blocks to the journal. We make sure to calculate their checksum and ecc before the write. Since we pass around the journal_access functions. Let's typedef them in ocfs2.h. Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/alloc.c | 233 ++++++++++++++++++++++++++++-------------------- fs/ocfs2/alloc.h | 5 +- fs/ocfs2/aops.c | 8 +- fs/ocfs2/dir.c | 48 ++++++---- fs/ocfs2/file.c | 16 ++-- fs/ocfs2/inode.c | 17 ++-- fs/ocfs2/journal.c | 2 + fs/ocfs2/journal.h | 3 +- fs/ocfs2/localalloc.c | 18 ++-- fs/ocfs2/namei.c | 38 ++++---- fs/ocfs2/ocfs2.h | 4 + fs/ocfs2/quota_global.c | 2 +- fs/ocfs2/quota_local.c | 18 ++-- fs/ocfs2/resize.c | 16 ++-- fs/ocfs2/suballoc.c | 58 ++++++------ 15 files changed, 280 insertions(+), 206 deletions(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index c22ff49b5e3..6e58fd557e5 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -298,11 +298,13 @@ static struct ocfs2_extent_tree_operations ocfs2_xattr_tree_et_ops = { static void __ocfs2_init_extent_tree(struct ocfs2_extent_tree *et, struct inode *inode, struct buffer_head *bh, + ocfs2_journal_access_func access, void *obj, struct ocfs2_extent_tree_operations *ops) { et->et_ops = ops; et->et_root_bh = bh; + et->et_root_journal_access = access; if (!obj) obj = (void *)bh->b_data; et->et_object = obj; @@ -318,15 +320,16 @@ void ocfs2_init_dinode_extent_tree(struct ocfs2_extent_tree *et, struct inode *inode, struct buffer_head *bh) { - __ocfs2_init_extent_tree(et, inode, bh, NULL, &ocfs2_dinode_et_ops); + __ocfs2_init_extent_tree(et, inode, bh, ocfs2_journal_access_di, + NULL, &ocfs2_dinode_et_ops); } void ocfs2_init_xattr_tree_extent_tree(struct ocfs2_extent_tree *et, struct inode *inode, struct buffer_head *bh) { - __ocfs2_init_extent_tree(et, inode, bh, NULL, - &ocfs2_xattr_tree_et_ops); + __ocfs2_init_extent_tree(et, inode, bh, ocfs2_journal_access_xb, + NULL, &ocfs2_xattr_tree_et_ops); } void ocfs2_init_xattr_value_extent_tree(struct ocfs2_extent_tree *et, @@ -334,7 +337,7 @@ void ocfs2_init_xattr_value_extent_tree(struct ocfs2_extent_tree *et, struct buffer_head *bh, struct ocfs2_xattr_value_root *xv) { - __ocfs2_init_extent_tree(et, inode, bh, xv, + __ocfs2_init_extent_tree(et, inode, bh, ocfs2_journal_access, xv, &ocfs2_xattr_value_et_ops); } @@ -356,6 +359,15 @@ static inline void ocfs2_et_update_clusters(struct inode *inode, et->et_ops->eo_update_clusters(inode, et, clusters); } +static inline int ocfs2_et_root_journal_access(handle_t *handle, + struct inode *inode, + struct ocfs2_extent_tree *et, + int type) +{ + return et->et_root_journal_access(handle, inode, et->et_root_bh, + type); +} + static inline int ocfs2_et_insert_check(struct inode *inode, struct ocfs2_extent_tree *et, struct ocfs2_extent_rec *rec) @@ -396,12 +408,14 @@ struct ocfs2_path_item { #define OCFS2_MAX_PATH_DEPTH 5 struct ocfs2_path { - int p_tree_depth; - struct ocfs2_path_item p_node[OCFS2_MAX_PATH_DEPTH]; + int p_tree_depth; + ocfs2_journal_access_func p_root_access; + struct ocfs2_path_item p_node[OCFS2_MAX_PATH_DEPTH]; }; #define path_root_bh(_path) ((_path)->p_node[0].bh) #define path_root_el(_path) ((_path)->p_node[0].el) +#define path_root_access(_path)((_path)->p_root_access) #define path_leaf_bh(_path) ((_path)->p_node[(_path)->p_tree_depth].bh) #define path_leaf_el(_path) ((_path)->p_node[(_path)->p_tree_depth].el) #define path_num_items(_path) ((_path)->p_tree_depth + 1) @@ -434,6 +448,8 @@ static void ocfs2_reinit_path(struct ocfs2_path *path, int keep_root) */ if (keep_root) depth = le16_to_cpu(path_root_el(path)->l_tree_depth); + else + path_root_access(path) = NULL; path->p_tree_depth = depth; } @@ -459,6 +475,7 @@ static void ocfs2_cp_path(struct ocfs2_path *dest, struct ocfs2_path *src) BUG_ON(path_root_bh(dest) != path_root_bh(src)); BUG_ON(path_root_el(dest) != path_root_el(src)); + BUG_ON(path_root_access(dest) != path_root_access(src)); ocfs2_reinit_path(dest, 1); @@ -480,6 +497,7 @@ static void ocfs2_mv_path(struct ocfs2_path *dest, struct ocfs2_path *src) int i; BUG_ON(path_root_bh(dest) != path_root_bh(src)); + BUG_ON(path_root_access(dest) != path_root_access(src)); for(i = 1; i < OCFS2_MAX_PATH_DEPTH; i++) { brelse(dest->p_node[i].bh); @@ -515,7 +533,8 @@ static inline void ocfs2_path_insert_eb(struct ocfs2_path *path, int index, } static struct ocfs2_path *ocfs2_new_path(struct buffer_head *root_bh, - struct ocfs2_extent_list *root_el) + struct ocfs2_extent_list *root_el, + ocfs2_journal_access_func access) { struct ocfs2_path *path; @@ -527,6 +546,7 @@ static struct ocfs2_path *ocfs2_new_path(struct buffer_head *root_bh, get_bh(root_bh); path_root_bh(path) = root_bh; path_root_el(path) = root_el; + path_root_access(path) = access; } return path; @@ -534,12 +554,38 @@ static struct ocfs2_path *ocfs2_new_path(struct buffer_head *root_bh, static struct ocfs2_path *ocfs2_new_path_from_path(struct ocfs2_path *path) { - return ocfs2_new_path(path_root_bh(path), path_root_el(path)); + return ocfs2_new_path(path_root_bh(path), path_root_el(path), + path_root_access(path)); } static struct ocfs2_path *ocfs2_new_path_from_et(struct ocfs2_extent_tree *et) { - return ocfs2_new_path(et->et_root_bh, et->et_root_el); + return ocfs2_new_path(et->et_root_bh, et->et_root_el, + et->et_root_journal_access); +} + +/* + * Journal the buffer at depth idx. All idx>0 are extent_blocks, + * otherwise it's the root_access function. + * + * I don't like the way this function's name looks next to + * ocfs2_journal_access_path(), but I don't have a better one. + */ +static int ocfs2_path_bh_journal_access(handle_t *handle, + struct inode *inode, + struct ocfs2_path *path, + int idx) +{ + ocfs2_journal_access_func access = path_root_access(path); + + if (!access) + access = ocfs2_journal_access; + + if (idx) + access = ocfs2_journal_access_eb; + + return access(handle, inode, path->p_node[idx].bh, + OCFS2_JOURNAL_ACCESS_WRITE); } /* @@ -554,8 +600,7 @@ static int ocfs2_journal_access_path(struct inode *inode, handle_t *handle, goto out; for(i = 0; i < path_num_items(path); i++) { - ret = ocfs2_journal_access(handle, inode, path->p_node[i].bh, - OCFS2_JOURNAL_ACCESS_WRITE); + ret = ocfs2_path_bh_journal_access(handle, inode, path, i); if (ret < 0) { mlog_errno(ret); goto out; @@ -708,8 +753,11 @@ static int ocfs2_validate_extent_block(struct super_block *sb, * local to this block. */ rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &eb->h_check); - if (rc) + if (rc) { + mlog(ML_ERROR, "Checksum failed for extent block %llu\n", + (unsigned long long)bh->b_blocknr); return rc; + } /* * Errors after here are fatal. @@ -842,8 +890,8 @@ static int ocfs2_create_new_meta_bhs(struct ocfs2_super *osb, } ocfs2_set_new_buffer_uptodate(inode, bhs[i]); - status = ocfs2_journal_access(handle, inode, bhs[i], - OCFS2_JOURNAL_ACCESS_CREATE); + status = ocfs2_journal_access_eb(handle, inode, bhs[i], + OCFS2_JOURNAL_ACCESS_CREATE); if (status < 0) { mlog_errno(status); goto bail; @@ -986,8 +1034,8 @@ static int ocfs2_add_branch(struct ocfs2_super *osb, BUG_ON(!OCFS2_IS_VALID_EXTENT_BLOCK(eb)); eb_el = &eb->h_list; - status = ocfs2_journal_access(handle, inode, bh, - OCFS2_JOURNAL_ACCESS_CREATE); + status = ocfs2_journal_access_eb(handle, inode, bh, + OCFS2_JOURNAL_ACCESS_CREATE); if (status < 0) { mlog_errno(status); goto bail; @@ -1026,21 +1074,21 @@ static int ocfs2_add_branch(struct ocfs2_super *osb, * journal_dirty erroring as it won't unless we've aborted the * handle (in which case we would never be here) so reserving * the write with journal_access is all we need to do. */ - status = ocfs2_journal_access(handle, inode, *last_eb_bh, - OCFS2_JOURNAL_ACCESS_WRITE); + status = ocfs2_journal_access_eb(handle, inode, *last_eb_bh, + OCFS2_JOURNAL_ACCESS_WRITE); if (status < 0) { mlog_errno(status); goto bail; } - status = ocfs2_journal_access(handle, inode, et->et_root_bh, - OCFS2_JOURNAL_ACCESS_WRITE); + status = ocfs2_et_root_journal_access(handle, inode, et, + OCFS2_JOURNAL_ACCESS_WRITE); if (status < 0) { mlog_errno(status); goto bail; } if (eb_bh) { - status = ocfs2_journal_access(handle, inode, eb_bh, - OCFS2_JOURNAL_ACCESS_WRITE); + status = ocfs2_journal_access_eb(handle, inode, eb_bh, + OCFS2_JOURNAL_ACCESS_WRITE); if (status < 0) { mlog_errno(status); goto bail; @@ -1129,8 +1177,8 @@ static int ocfs2_shift_tree_depth(struct ocfs2_super *osb, eb_el = &eb->h_list; root_el = et->et_root_el; - status = ocfs2_journal_access(handle, inode, new_eb_bh, - OCFS2_JOURNAL_ACCESS_CREATE); + status = ocfs2_journal_access_eb(handle, inode, new_eb_bh, + OCFS2_JOURNAL_ACCESS_CREATE); if (status < 0) { mlog_errno(status); goto bail; @@ -1148,8 +1196,8 @@ static int ocfs2_shift_tree_depth(struct ocfs2_super *osb, goto bail; } - status = ocfs2_journal_access(handle, inode, et->et_root_bh, - OCFS2_JOURNAL_ACCESS_WRITE); + status = ocfs2_et_root_journal_access(handle, inode, et, + OCFS2_JOURNAL_ACCESS_WRITE); if (status < 0) { mlog_errno(status); goto bail; @@ -1918,25 +1966,23 @@ static int ocfs2_rotate_subtree_right(struct inode *inode, root_bh = left_path->p_node[subtree_index].bh; BUG_ON(root_bh != right_path->p_node[subtree_index].bh); - ret = ocfs2_journal_access(handle, inode, root_bh, - OCFS2_JOURNAL_ACCESS_WRITE); + ret = ocfs2_path_bh_journal_access(handle, inode, right_path, + subtree_index); if (ret) { mlog_errno(ret); goto out; } for(i = subtree_index + 1; i < path_num_items(right_path); i++) { - ret = ocfs2_journal_access(handle, inode, - right_path->p_node[i].bh, - OCFS2_JOURNAL_ACCESS_WRITE); + ret = ocfs2_path_bh_journal_access(handle, inode, + right_path, i); if (ret) { mlog_errno(ret); goto out; } - ret = ocfs2_journal_access(handle, inode, - left_path->p_node[i].bh, - OCFS2_JOURNAL_ACCESS_WRITE); + ret = ocfs2_path_bh_journal_access(handle, inode, + left_path, i); if (ret) { mlog_errno(ret); goto out; @@ -2455,9 +2501,9 @@ static int ocfs2_rotate_subtree_left(struct inode *inode, handle_t *handle, return -EAGAIN; if (le16_to_cpu(right_leaf_el->l_next_free_rec) > 1) { - ret = ocfs2_journal_access(handle, inode, - path_leaf_bh(right_path), - OCFS2_JOURNAL_ACCESS_WRITE); + ret = ocfs2_journal_access_eb(handle, inode, + path_leaf_bh(right_path), + OCFS2_JOURNAL_ACCESS_WRITE); if (ret) { mlog_errno(ret); goto out; @@ -2474,8 +2520,8 @@ static int ocfs2_rotate_subtree_left(struct inode *inode, handle_t *handle, * We have to update i_last_eb_blk during the meta * data delete. */ - ret = ocfs2_journal_access(handle, inode, et_root_bh, - OCFS2_JOURNAL_ACCESS_WRITE); + ret = ocfs2_et_root_journal_access(handle, inode, et, + OCFS2_JOURNAL_ACCESS_WRITE); if (ret) { mlog_errno(ret); goto out; @@ -2490,25 +2536,23 @@ static int ocfs2_rotate_subtree_left(struct inode *inode, handle_t *handle, */ BUG_ON(right_has_empty && !del_right_subtree); - ret = ocfs2_journal_access(handle, inode, root_bh, - OCFS2_JOURNAL_ACCESS_WRITE); + ret = ocfs2_path_bh_journal_access(handle, inode, right_path, + subtree_index); if (ret) { mlog_errno(ret); goto out; } for(i = subtree_index + 1; i < path_num_items(right_path); i++) { - ret = ocfs2_journal_access(handle, inode, - right_path->p_node[i].bh, - OCFS2_JOURNAL_ACCESS_WRITE); + ret = ocfs2_path_bh_journal_access(handle, inode, + right_path, i); if (ret) { mlog_errno(ret); goto out; } - ret = ocfs2_journal_access(handle, inode, - left_path->p_node[i].bh, - OCFS2_JOURNAL_ACCESS_WRITE); + ret = ocfs2_path_bh_journal_access(handle, inode, + left_path, i); if (ret) { mlog_errno(ret); goto out; @@ -2653,16 +2697,17 @@ out: static int ocfs2_rotate_rightmost_leaf_left(struct inode *inode, handle_t *handle, - struct buffer_head *bh, - struct ocfs2_extent_list *el) + struct ocfs2_path *path) { int ret; + struct buffer_head *bh = path_leaf_bh(path); + struct ocfs2_extent_list *el = path_leaf_el(path); if (!ocfs2_is_empty_extent(&el->l_recs[0])) return 0; - ret = ocfs2_journal_access(handle, inode, bh, - OCFS2_JOURNAL_ACCESS_WRITE); + ret = ocfs2_path_bh_journal_access(handle, inode, path, + path_num_items(path) - 1); if (ret) { mlog_errno(ret); goto out; @@ -2744,9 +2789,8 @@ static int __ocfs2_rotate_tree_left(struct inode *inode, * Caller might still want to make changes to the * tree root, so re-add it to the journal here. */ - ret = ocfs2_journal_access(handle, inode, - path_root_bh(left_path), - OCFS2_JOURNAL_ACCESS_WRITE); + ret = ocfs2_path_bh_journal_access(handle, inode, + left_path, 0); if (ret) { mlog_errno(ret); goto out; @@ -2929,8 +2973,7 @@ rightmost_no_delete: * it up front. */ ret = ocfs2_rotate_rightmost_leaf_left(inode, handle, - path_leaf_bh(path), - path_leaf_el(path)); + path); if (ret) mlog_errno(ret); goto out; @@ -3164,8 +3207,8 @@ static int ocfs2_merge_rec_right(struct inode *inode, root_bh = left_path->p_node[subtree_index].bh; BUG_ON(root_bh != right_path->p_node[subtree_index].bh); - ret = ocfs2_journal_access(handle, inode, root_bh, - OCFS2_JOURNAL_ACCESS_WRITE); + ret = ocfs2_path_bh_journal_access(handle, inode, right_path, + subtree_index); if (ret) { mlog_errno(ret); goto out; @@ -3173,17 +3216,15 @@ static int ocfs2_merge_rec_right(struct inode *inode, for (i = subtree_index + 1; i < path_num_items(right_path); i++) { - ret = ocfs2_journal_access(handle, inode, - right_path->p_node[i].bh, - OCFS2_JOURNAL_ACCESS_WRITE); + ret = ocfs2_path_bh_journal_access(handle, inode, + right_path, i); if (ret) { mlog_errno(ret); goto out; } - ret = ocfs2_journal_access(handle, inode, - left_path->p_node[i].bh, - OCFS2_JOURNAL_ACCESS_WRITE); + ret = ocfs2_path_bh_journal_access(handle, inode, + left_path, i); if (ret) { mlog_errno(ret); goto out; @@ -3195,8 +3236,8 @@ static int ocfs2_merge_rec_right(struct inode *inode, right_rec = &el->l_recs[index + 1]; } - ret = ocfs2_journal_access(handle, inode, bh, - OCFS2_JOURNAL_ACCESS_WRITE); + ret = ocfs2_path_bh_journal_access(handle, inode, left_path, + path_num_items(left_path) - 1); if (ret) { mlog_errno(ret); goto out; @@ -3335,8 +3376,8 @@ static int ocfs2_merge_rec_left(struct inode *inode, root_bh = left_path->p_node[subtree_index].bh; BUG_ON(root_bh != right_path->p_node[subtree_index].bh); - ret = ocfs2_journal_access(handle, inode, root_bh, - OCFS2_JOURNAL_ACCESS_WRITE); + ret = ocfs2_path_bh_journal_access(handle, inode, right_path, + subtree_index); if (ret) { mlog_errno(ret); goto out; @@ -3344,17 +3385,15 @@ static int ocfs2_merge_rec_left(struct inode *inode, for (i = subtree_index + 1; i < path_num_items(right_path); i++) { - ret = ocfs2_journal_access(handle, inode, - right_path->p_node[i].bh, - OCFS2_JOURNAL_ACCESS_WRITE); + ret = ocfs2_path_bh_journal_access(handle, inode, + right_path, i); if (ret) { mlog_errno(ret); goto out; } - ret = ocfs2_journal_access(handle, inode, - left_path->p_node[i].bh, - OCFS2_JOURNAL_ACCESS_WRITE); + ret = ocfs2_path_bh_journal_access(handle, inode, + left_path, i); if (ret) { mlog_errno(ret); goto out; @@ -3366,8 +3405,8 @@ static int ocfs2_merge_rec_left(struct inode *inode, has_empty_extent = 1; } - ret = ocfs2_journal_access(handle, inode, bh, - OCFS2_JOURNAL_ACCESS_WRITE); + ret = ocfs2_path_bh_journal_access(handle, inode, left_path, + path_num_items(left_path) - 1); if (ret) { mlog_errno(ret); goto out; @@ -4009,8 +4048,8 @@ static int ocfs2_do_insert_extent(struct inode *inode, el = et->et_root_el; - ret = ocfs2_journal_access(handle, inode, et->et_root_bh, - OCFS2_JOURNAL_ACCESS_WRITE); + ret = ocfs2_et_root_journal_access(handle, inode, et, + OCFS2_JOURNAL_ACCESS_WRITE); if (ret) { mlog_errno(ret); goto out; @@ -4071,8 +4110,8 @@ static int ocfs2_do_insert_extent(struct inode *inode, * ocfs2_rotate_tree_right() might have extended the * transaction without re-journaling our tree root. */ - ret = ocfs2_journal_access(handle, inode, et->et_root_bh, - OCFS2_JOURNAL_ACCESS_WRITE); + ret = ocfs2_et_root_journal_access(handle, inode, et, + OCFS2_JOURNAL_ACCESS_WRITE); if (ret) { mlog_errno(ret); goto out; @@ -4593,9 +4632,9 @@ int ocfs2_add_clusters_in_btree(struct ocfs2_super *osb, BUG_ON(num_bits > clusters_to_add); - /* reserve our write early -- insert_extent may update the inode */ - status = ocfs2_journal_access(handle, inode, et->et_root_bh, - OCFS2_JOURNAL_ACCESS_WRITE); + /* reserve our write early -- insert_extent may update the tree root */ + status = ocfs2_et_root_journal_access(handle, inode, et, + OCFS2_JOURNAL_ACCESS_WRITE); if (status < 0) { mlog_errno(status); goto leave; @@ -5347,8 +5386,8 @@ int ocfs2_remove_btree_range(struct inode *inode, goto out; } - ret = ocfs2_journal_access(handle, inode, et->et_root_bh, - OCFS2_JOURNAL_ACCESS_WRITE); + ret = ocfs2_et_root_journal_access(handle, inode, et, + OCFS2_JOURNAL_ACCESS_WRITE); if (ret) { mlog_errno(ret); goto out; @@ -5461,8 +5500,8 @@ int ocfs2_truncate_log_append(struct ocfs2_super *osb, goto bail; } - status = ocfs2_journal_access(handle, tl_inode, tl_bh, - OCFS2_JOURNAL_ACCESS_WRITE); + status = ocfs2_journal_access_di(handle, tl_inode, tl_bh, + OCFS2_JOURNAL_ACCESS_WRITE); if (status < 0) { mlog_errno(status); goto bail; @@ -5523,8 +5562,8 @@ static int ocfs2_replay_truncate_records(struct ocfs2_super *osb, while (i >= 0) { /* Caller has given us at least enough credits to * update the truncate log dinode */ - status = ocfs2_journal_access(handle, tl_inode, tl_bh, - OCFS2_JOURNAL_ACCESS_WRITE); + status = ocfs2_journal_access_di(handle, tl_inode, tl_bh, + OCFS2_JOURNAL_ACCESS_WRITE); if (status < 0) { mlog_errno(status); goto bail; @@ -5780,6 +5819,7 @@ int ocfs2_begin_truncate_log_recovery(struct ocfs2_super *osb, * tl_used. */ tl->tl_used = 0; + ocfs2_compute_meta_ecc(osb->sb, tl_bh->b_data, &di->i_check); status = ocfs2_write_block(osb, tl_bh, tl_inode); if (status < 0) { mlog_errno(status); @@ -6546,8 +6586,8 @@ static int ocfs2_do_truncate(struct ocfs2_super *osb, } if (last_eb_bh) { - status = ocfs2_journal_access(handle, inode, last_eb_bh, - OCFS2_JOURNAL_ACCESS_WRITE); + status = ocfs2_journal_access_eb(handle, inode, last_eb_bh, + OCFS2_JOURNAL_ACCESS_WRITE); if (status < 0) { mlog_errno(status); goto bail; @@ -6908,8 +6948,8 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode, goto out_unlock; } - ret = ocfs2_journal_access(handle, inode, di_bh, - OCFS2_JOURNAL_ACCESS_WRITE); + ret = ocfs2_journal_access_di(handle, inode, di_bh, + OCFS2_JOURNAL_ACCESS_WRITE); if (ret) { mlog_errno(ret); goto out_commit; @@ -7043,7 +7083,8 @@ int ocfs2_commit_truncate(struct ocfs2_super *osb, new_highest_cpos = ocfs2_clusters_for_bytes(osb->sb, i_size_read(inode)); - path = ocfs2_new_path(fe_bh, &di->id2.i_list); + path = ocfs2_new_path(fe_bh, &di->id2.i_list, + ocfs2_journal_access_di); if (!path) { status = -ENOMEM; mlog_errno(status); @@ -7276,8 +7317,8 @@ int ocfs2_truncate_inline(struct inode *inode, struct buffer_head *di_bh, goto out; } - ret = ocfs2_journal_access(handle, inode, di_bh, - OCFS2_JOURNAL_ACCESS_WRITE); + ret = ocfs2_journal_access_di(handle, inode, di_bh, + OCFS2_JOURNAL_ACCESS_WRITE); if (ret) { mlog_errno(ret); goto out_commit; diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h index 59d37d1b7d4..4b6fea22748 100644 --- a/fs/ocfs2/alloc.h +++ b/fs/ocfs2/alloc.h @@ -45,7 +45,9 @@ * * ocfs2_extent_tree contains info for the root of the b-tree, it must have a * root ocfs2_extent_list and a root_bh so that they can be used in the b-tree - * functions. + * functions. With metadata ecc, we now call different journal_access + * functions for each type of metadata, so it must have the + * root_journal_access function. * ocfs2_extent_tree_operations abstract the normal operations we do for * the root of extent b-tree. */ @@ -54,6 +56,7 @@ struct ocfs2_extent_tree { struct ocfs2_extent_tree_operations *et_ops; struct buffer_head *et_root_bh; struct ocfs2_extent_list *et_root_el; + ocfs2_journal_access_func et_root_journal_access; void *et_object; unsigned int et_max_leaf_clusters; }; diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 6b647ec87bb..a067a6cffb0 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -1512,8 +1512,8 @@ static int ocfs2_write_begin_inline(struct address_space *mapping, goto out; } - ret = ocfs2_journal_access(handle, inode, wc->w_di_bh, - OCFS2_JOURNAL_ACCESS_WRITE); + ret = ocfs2_journal_access_di(handle, inode, wc->w_di_bh, + OCFS2_JOURNAL_ACCESS_WRITE); if (ret) { ocfs2_commit_trans(osb, handle); @@ -1740,8 +1740,8 @@ int ocfs2_write_begin_nolock(struct address_space *mapping, * We don't want this to fail in ocfs2_write_end(), so do it * here. */ - ret = ocfs2_journal_access(handle, inode, wc->w_di_bh, - OCFS2_JOURNAL_ACCESS_WRITE); + ret = ocfs2_journal_access_di(handle, inode, wc->w_di_bh, + OCFS2_JOURNAL_ACCESS_WRITE); if (ret) { mlog_errno(ret); goto out_quota; diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index 3708fe482e3..45e4e03d8f7 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c @@ -378,14 +378,18 @@ int ocfs2_update_entry(struct inode *dir, handle_t *handle, struct inode *new_entry_inode) { int ret; + ocfs2_journal_access_func access = ocfs2_journal_access_db; /* * The same code works fine for both inline-data and extent - * based directories, so no need to split this up. + * based directories, so no need to split this up. The only + * difference is the journal_access function. */ - ret = ocfs2_journal_access(handle, dir, de_bh, - OCFS2_JOURNAL_ACCESS_WRITE); + if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL) + access = ocfs2_journal_access_di; + + ret = access(handle, dir, de_bh, OCFS2_JOURNAL_ACCESS_WRITE); if (ret) { mlog_errno(ret); goto out; @@ -407,9 +411,13 @@ static int __ocfs2_delete_entry(handle_t *handle, struct inode *dir, { struct ocfs2_dir_entry *de, *pde; int i, status = -ENOENT; + ocfs2_journal_access_func access = ocfs2_journal_access_db; mlog_entry("(0x%p, 0x%p, 0x%p, 0x%p)\n", handle, dir, de_del, bh); + if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL) + access = ocfs2_journal_access_di; + i = 0; pde = NULL; de = (struct ocfs2_dir_entry *) first_de; @@ -420,8 +428,8 @@ static int __ocfs2_delete_entry(handle_t *handle, struct inode *dir, goto bail; } if (de == de_del) { - status = ocfs2_journal_access(handle, dir, bh, - OCFS2_JOURNAL_ACCESS_WRITE); + status = access(handle, dir, bh, + OCFS2_JOURNAL_ACCESS_WRITE); if (status < 0) { status = -EIO; mlog_errno(status); @@ -581,8 +589,14 @@ int __ocfs2_add_entry(handle_t *handle, goto bail; } - status = ocfs2_journal_access(handle, dir, insert_bh, - OCFS2_JOURNAL_ACCESS_WRITE); + if (insert_bh == parent_fe_bh) + status = ocfs2_journal_access_di(handle, dir, + insert_bh, + OCFS2_JOURNAL_ACCESS_WRITE); + else + status = ocfs2_journal_access_db(handle, dir, + insert_bh, + OCFS2_JOURNAL_ACCESS_WRITE); /* By now the buffer is marked for journaling */ offset += le16_to_cpu(de->rec_len); if (le64_to_cpu(de->inode)) { @@ -1081,8 +1095,8 @@ static int ocfs2_fill_new_dir_id(struct ocfs2_super *osb, struct ocfs2_inline_data *data = &di->id2.i_data; unsigned int size = le16_to_cpu(data->id_count); - ret = ocfs2_journal_access(handle, inode, di_bh, - OCFS2_JOURNAL_ACCESS_WRITE); + ret = ocfs2_journal_access_di(handle, inode, di_bh, + OCFS2_JOURNAL_ACCESS_WRITE); if (ret) { mlog_errno(ret); goto out; @@ -1129,8 +1143,8 @@ static int ocfs2_fill_new_dir_el(struct ocfs2_super *osb, ocfs2_set_new_buffer_uptodate(inode, new_bh); - status = ocfs2_journal_access(handle, inode, new_bh, - OCFS2_JOURNAL_ACCESS_CREATE); + status = ocfs2_journal_access_db(handle, inode, new_bh, + OCFS2_JOURNAL_ACCESS_CREATE); if (status < 0) { mlog_errno(status); goto bail; @@ -1292,8 +1306,8 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh, ocfs2_set_new_buffer_uptodate(dir, dirdata_bh); - ret = ocfs2_journal_access(handle, dir, dirdata_bh, - OCFS2_JOURNAL_ACCESS_CREATE); + ret = ocfs2_journal_access_db(handle, dir, dirdata_bh, + OCFS2_JOURNAL_ACCESS_CREATE); if (ret) { mlog_errno(ret); goto out_commit; @@ -1319,8 +1333,8 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh, * We let the later dirent insert modify c/mtime - to the user * the data hasn't changed. */ - ret = ocfs2_journal_access(handle, dir, di_bh, - OCFS2_JOURNAL_ACCESS_CREATE); + ret = ocfs2_journal_access_di(handle, dir, di_bh, + OCFS2_JOURNAL_ACCESS_CREATE); if (ret) { mlog_errno(ret); goto out_commit; @@ -1583,8 +1597,8 @@ do_extend: ocfs2_set_new_buffer_uptodate(dir, new_bh); - status = ocfs2_journal_access(handle, dir, new_bh, - OCFS2_JOURNAL_ACCESS_CREATE); + status = ocfs2_journal_access_db(handle, dir, new_bh, + OCFS2_JOURNAL_ACCESS_CREATE); if (status < 0) { mlog_errno(status); goto bail; diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 9374d374a26..e8f795f978a 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -256,8 +256,8 @@ int ocfs2_update_inode_atime(struct inode *inode, goto out; } - ret = ocfs2_journal_access(handle, inode, bh, - OCFS2_JOURNAL_ACCESS_WRITE); + ret = ocfs2_journal_access_di(handle, inode, bh, + OCFS2_JOURNAL_ACCESS_WRITE); if (ret) { mlog_errno(ret); goto out_commit; @@ -353,8 +353,8 @@ static int ocfs2_orphan_for_truncate(struct ocfs2_super *osb, goto out; } - status = ocfs2_journal_access(handle, inode, fe_bh, - OCFS2_JOURNAL_ACCESS_WRITE); + status = ocfs2_journal_access_di(handle, inode, fe_bh, + OCFS2_JOURNAL_ACCESS_WRITE); if (status < 0) { mlog_errno(status); goto out_commit; @@ -590,8 +590,8 @@ restarted_transaction: /* reserve a write to the file entry early on - that we if we * run out of credits in the allocation path, we can still * update i_size. */ - status = ocfs2_journal_access(handle, inode, bh, - OCFS2_JOURNAL_ACCESS_WRITE); + status = ocfs2_journal_access_di(handle, inode, bh, + OCFS2_JOURNAL_ACCESS_WRITE); if (status < 0) { mlog_errno(status); goto leave; @@ -1121,8 +1121,8 @@ static int __ocfs2_write_remove_suid(struct inode *inode, goto out; } - ret = ocfs2_journal_access(handle, inode, bh, - OCFS2_JOURNAL_ACCESS_WRITE); + ret = ocfs2_journal_access_di(handle, inode, bh, + OCFS2_JOURNAL_ACCESS_WRITE); if (ret < 0) { mlog_errno(ret); goto out_trans; diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index 9370b652ab9..229e707bc05 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c @@ -537,8 +537,8 @@ static int ocfs2_truncate_for_delete(struct ocfs2_super *osb, goto out; } - status = ocfs2_journal_access(handle, inode, fe_bh, - OCFS2_JOURNAL_ACCESS_WRITE); + status = ocfs2_journal_access_di(handle, inode, fe_bh, + OCFS2_JOURNAL_ACCESS_WRITE); if (status < 0) { mlog_errno(status); goto out; @@ -621,8 +621,8 @@ static int ocfs2_remove_inode(struct inode *inode, } /* set the inodes dtime */ - status = ocfs2_journal_access(handle, inode, di_bh, - OCFS2_JOURNAL_ACCESS_WRITE); + status = ocfs2_journal_access_di(handle, inode, di_bh, + OCFS2_JOURNAL_ACCESS_WRITE); if (status < 0) { mlog_errno(status); goto bail_commit; @@ -1190,8 +1190,8 @@ int ocfs2_mark_inode_dirty(handle_t *handle, mlog_entry("(inode %llu)\n", (unsigned long long)OCFS2_I(inode)->ip_blkno); - status = ocfs2_journal_access(handle, inode, bh, - OCFS2_JOURNAL_ACCESS_WRITE); + status = ocfs2_journal_access_di(handle, inode, bh, + OCFS2_JOURNAL_ACCESS_WRITE); if (status < 0) { mlog_errno(status); goto leave; @@ -1277,8 +1277,11 @@ int ocfs2_validate_inode_block(struct super_block *sb, * local to this block. */ rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &di->i_check); - if (rc) + if (rc) { + mlog(ML_ERROR, "Checksum failed for dinode %llu\n", + (unsigned long long)bh->b_blocknr); goto bail; + } /* * Errors after here are fatal. diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index 2daa5848faf..3b54dba0f74 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -752,6 +752,7 @@ static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb, if (replayed) ocfs2_bump_recovery_generation(fe); + ocfs2_compute_meta_ecc(osb->sb, bh->b_data, &fe->i_check); status = ocfs2_write_block(osb, bh, journal->j_inode); if (status < 0) mlog_errno(status); @@ -1486,6 +1487,7 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb, osb->slot_recovery_generations[slot_num] = ocfs2_get_recovery_generation(fe); + ocfs2_compute_meta_ecc(osb->sb, bh->b_data, &fe->i_check); status = ocfs2_write_block(osb, bh, inode); if (status < 0) mlog_errno(status); diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h index bca370dab02..3c3532e1307 100644 --- a/fs/ocfs2/journal.h +++ b/fs/ocfs2/journal.h @@ -247,9 +247,10 @@ int ocfs2_extend_trans(handle_t *handle, int nblocks); #define OCFS2_JOURNAL_ACCESS_WRITE 1 #define OCFS2_JOURNAL_ACCESS_UNDO 2 + /* ocfs2_inode */ int ocfs2_journal_access_di(handle_t *handle, struct inode *inode, - struct buffer_head *bh, int type); + struct buffer_head *bh, int type); /* ocfs2_extent_block */ int ocfs2_journal_access_eb(handle_t *handle, struct inode *inode, struct buffer_head *bh, int type); diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c index 19cfb1b9ce0..ec70cdbe77f 100644 --- a/fs/ocfs2/localalloc.c +++ b/fs/ocfs2/localalloc.c @@ -36,6 +36,7 @@ #include "ocfs2.h" #include "alloc.h" +#include "blockcheck.h" #include "dlmglue.h" #include "inode.h" #include "journal.h" @@ -382,8 +383,8 @@ void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb) } memcpy(alloc_copy, alloc, bh->b_size); - status = ocfs2_journal_access(handle, local_alloc_inode, bh, - OCFS2_JOURNAL_ACCESS_WRITE); + status = ocfs2_journal_access_di(handle, local_alloc_inode, bh, + OCFS2_JOURNAL_ACCESS_WRITE); if (status < 0) { mlog_errno(status); goto out_commit; @@ -476,6 +477,7 @@ int ocfs2_begin_local_alloc_recovery(struct ocfs2_super *osb, alloc = (struct ocfs2_dinode *) alloc_bh->b_data; ocfs2_clear_local_alloc(alloc); + ocfs2_compute_meta_ecc(osb->sb, alloc_bh->b_data, &alloc->i_check); status = ocfs2_write_block(osb, alloc_bh, inode); if (status < 0) mlog_errno(status); @@ -762,9 +764,9 @@ int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb, * delete bits from it! */ *num_bits = bits_wanted; - status = ocfs2_journal_access(handle, local_alloc_inode, - osb->local_alloc_bh, - OCFS2_JOURNAL_ACCESS_WRITE); + status = ocfs2_journal_access_di(handle, local_alloc_inode, + osb->local_alloc_bh, + OCFS2_JOURNAL_ACCESS_WRITE); if (status < 0) { mlog_errno(status); goto bail; @@ -1240,9 +1242,9 @@ static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb, } memcpy(alloc_copy, alloc, osb->local_alloc_bh->b_size); - status = ocfs2_journal_access(handle, local_alloc_inode, - osb->local_alloc_bh, - OCFS2_JOURNAL_ACCESS_WRITE); + status = ocfs2_journal_access_di(handle, local_alloc_inode, + osb->local_alloc_bh, + OCFS2_JOURNAL_ACCESS_WRITE); if (status < 0) { mlog_errno(status); goto bail; diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index 6173807ba23..084aba86c3b 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -361,8 +361,8 @@ static int ocfs2_mknod(struct inode *dir, goto leave; } - status = ocfs2_journal_access(handle, dir, parent_fe_bh, - OCFS2_JOURNAL_ACCESS_WRITE); + status = ocfs2_journal_access_di(handle, dir, parent_fe_bh, + OCFS2_JOURNAL_ACCESS_WRITE); if (status < 0) { mlog_errno(status); goto leave; @@ -493,8 +493,8 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb, } ocfs2_set_new_buffer_uptodate(inode, *new_fe_bh); - status = ocfs2_journal_access(handle, inode, *new_fe_bh, - OCFS2_JOURNAL_ACCESS_CREATE); + status = ocfs2_journal_access_di(handle, inode, *new_fe_bh, + OCFS2_JOURNAL_ACCESS_CREATE); if (status < 0) { mlog_errno(status); goto leave; @@ -664,8 +664,8 @@ static int ocfs2_link(struct dentry *old_dentry, goto out_unlock_inode; } - err = ocfs2_journal_access(handle, inode, fe_bh, - OCFS2_JOURNAL_ACCESS_WRITE); + err = ocfs2_journal_access_di(handle, inode, fe_bh, + OCFS2_JOURNAL_ACCESS_WRITE); if (err < 0) { mlog_errno(err); goto out_commit; @@ -851,8 +851,8 @@ static int ocfs2_unlink(struct inode *dir, goto leave; } - status = ocfs2_journal_access(handle, inode, fe_bh, - OCFS2_JOURNAL_ACCESS_WRITE); + status = ocfs2_journal_access_di(handle, inode, fe_bh, + OCFS2_JOURNAL_ACCESS_WRITE); if (status < 0) { mlog_errno(status); goto leave; @@ -1265,8 +1265,8 @@ static int ocfs2_rename(struct inode *old_dir, goto bail; } } - status = ocfs2_journal_access(handle, new_inode, newfe_bh, - OCFS2_JOURNAL_ACCESS_WRITE); + status = ocfs2_journal_access_di(handle, new_inode, newfe_bh, + OCFS2_JOURNAL_ACCESS_WRITE); if (status < 0) { mlog_errno(status); goto bail; @@ -1312,8 +1312,8 @@ static int ocfs2_rename(struct inode *old_dir, old_inode->i_ctime = CURRENT_TIME; mark_inode_dirty(old_inode); - status = ocfs2_journal_access(handle, old_inode, old_inode_bh, - OCFS2_JOURNAL_ACCESS_WRITE); + status = ocfs2_journal_access_di(handle, old_inode, old_inode_bh, + OCFS2_JOURNAL_ACCESS_WRITE); if (status >= 0) { old_di = (struct ocfs2_dinode *) old_inode_bh->b_data; @@ -1389,9 +1389,9 @@ static int ocfs2_rename(struct inode *old_dir, (int)old_dir_nlink, old_dir->i_nlink); } else { struct ocfs2_dinode *fe; - status = ocfs2_journal_access(handle, old_dir, - old_dir_bh, - OCFS2_JOURNAL_ACCESS_WRITE); + status = ocfs2_journal_access_di(handle, old_dir, + old_dir_bh, + OCFS2_JOURNAL_ACCESS_WRITE); fe = (struct ocfs2_dinode *) old_dir_bh->b_data; fe->i_links_count = cpu_to_le16(old_dir->i_nlink); status = ocfs2_journal_dirty(handle, old_dir_bh); @@ -1898,8 +1898,8 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb, goto leave; } - status = ocfs2_journal_access(handle, orphan_dir_inode, orphan_dir_bh, - OCFS2_JOURNAL_ACCESS_WRITE); + status = ocfs2_journal_access_di(handle, orphan_dir_inode, orphan_dir_bh, + OCFS2_JOURNAL_ACCESS_WRITE); if (status < 0) { mlog_errno(status); goto leave; @@ -1986,8 +1986,8 @@ int ocfs2_orphan_del(struct ocfs2_super *osb, goto leave; } - status = ocfs2_journal_access(handle,orphan_dir_inode, orphan_dir_bh, - OCFS2_JOURNAL_ACCESS_WRITE); + status = ocfs2_journal_access_di(handle,orphan_dir_inode, orphan_dir_bh, + OCFS2_JOURNAL_ACCESS_WRITE); if (status < 0) { mlog_errno(status); goto leave; diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index 2bb389fe739..bad87d0a03c 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h @@ -339,6 +339,10 @@ struct ocfs2_super #define OCFS2_SB(sb) ((struct ocfs2_super *)(sb)->s_fs_info) +/* Useful typedef for passing around journal access functions */ +typedef int (*ocfs2_journal_access_func)(handle_t *handle, struct inode *inode, + struct buffer_head *bh, int type); + static inline int ocfs2_should_order_data(struct inode *inode) { if (!S_ISREG(inode->i_mode)) diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c index a0b8b14cca8..444aa5a467f 100644 --- a/fs/ocfs2/quota_global.c +++ b/fs/ocfs2/quota_global.c @@ -244,7 +244,7 @@ ssize_t ocfs2_quota_write(struct super_block *sb, int type, set_buffer_uptodate(bh); unlock_buffer(bh); ocfs2_set_buffer_uptodate(gqinode, bh); - err = ocfs2_journal_access(handle, gqinode, bh, ja_type); + err = ocfs2_journal_access_dq(handle, gqinode, bh, ja_type); if (err < 0) { brelse(bh); goto out; diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c index d451b715aef..07deec5e972 100644 --- a/fs/ocfs2/quota_local.c +++ b/fs/ocfs2/quota_local.c @@ -106,8 +106,8 @@ static int ocfs2_modify_bh(struct inode *inode, struct buffer_head *bh, mlog_errno(status); return status; } - status = ocfs2_journal_access(handle, inode, bh, - OCFS2_JOURNAL_ACCESS_WRITE); + status = ocfs2_journal_access_dq(handle, inode, bh, + OCFS2_JOURNAL_ACCESS_WRITE); if (status < 0) { mlog_errno(status); ocfs2_commit_trans(OCFS2_SB(sb), handle); @@ -506,7 +506,7 @@ static int ocfs2_recover_local_quota_file(struct inode *lqinode, goto out_commit; } /* Release local quota file entry */ - status = ocfs2_journal_access(handle, lqinode, + status = ocfs2_journal_access_dq(handle, lqinode, qbh, OCFS2_JOURNAL_ACCESS_WRITE); if (status < 0) { mlog_errno(status); @@ -614,8 +614,8 @@ int ocfs2_finish_quota_recovery(struct ocfs2_super *osb, mlog_errno(status); goto out_bh; } - status = ocfs2_journal_access(handle, lqinode, bh, - OCFS2_JOURNAL_ACCESS_WRITE); + status = ocfs2_journal_access_dq(handle, lqinode, bh, + OCFS2_JOURNAL_ACCESS_WRITE); if (status < 0) { mlog_errno(status); goto out_trans; @@ -981,8 +981,8 @@ static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk( goto out; } - status = ocfs2_journal_access(handle, lqinode, bh, - OCFS2_JOURNAL_ACCESS_WRITE); + status = ocfs2_journal_access_dq(handle, lqinode, bh, + OCFS2_JOURNAL_ACCESS_WRITE); if (status < 0) { mlog_errno(status); goto out_trans; @@ -1074,7 +1074,7 @@ static struct ocfs2_quota_chunk *ocfs2_extend_local_quota_file( mlog_errno(status); goto out; } - status = ocfs2_journal_access(handle, lqinode, chunk->qc_headerbh, + status = ocfs2_journal_access_dq(handle, lqinode, chunk->qc_headerbh, OCFS2_JOURNAL_ACCESS_WRITE); if (status < 0) { mlog_errno(status); @@ -1207,7 +1207,7 @@ static int ocfs2_local_release_dquot(struct dquot *dquot) goto out; } - status = ocfs2_journal_access(handle, sb_dqopt(sb)->files[type], + status = ocfs2_journal_access_dq(handle, sb_dqopt(sb)->files[type], od->dq_chunk->qc_headerbh, OCFS2_JOURNAL_ACCESS_WRITE); if (status < 0) { mlog_errno(status); diff --git a/fs/ocfs2/resize.c b/fs/ocfs2/resize.c index 867de3ebfca..424adaa5f90 100644 --- a/fs/ocfs2/resize.c +++ b/fs/ocfs2/resize.c @@ -106,8 +106,8 @@ static int ocfs2_update_last_group_and_inode(handle_t *handle, mlog_entry("(new_clusters=%d, first_new_cluster = %u)\n", new_clusters, first_new_cluster); - ret = ocfs2_journal_access(handle, bm_inode, group_bh, - OCFS2_JOURNAL_ACCESS_WRITE); + ret = ocfs2_journal_access_gd(handle, bm_inode, group_bh, + OCFS2_JOURNAL_ACCESS_WRITE); if (ret < 0) { mlog_errno(ret); goto out; @@ -141,8 +141,8 @@ static int ocfs2_update_last_group_and_inode(handle_t *handle, } /* update the inode accordingly. */ - ret = ocfs2_journal_access(handle, bm_inode, bm_bh, - OCFS2_JOURNAL_ACCESS_WRITE); + ret = ocfs2_journal_access_di(handle, bm_inode, bm_bh, + OCFS2_JOURNAL_ACCESS_WRITE); if (ret < 0) { mlog_errno(ret); goto out_rollback; @@ -536,8 +536,8 @@ int ocfs2_group_add(struct inode *inode, struct ocfs2_new_group_input *input) cl = &fe->id2.i_chain; cr = &cl->cl_recs[input->chain]; - ret = ocfs2_journal_access(handle, main_bm_inode, group_bh, - OCFS2_JOURNAL_ACCESS_WRITE); + ret = ocfs2_journal_access_gd(handle, main_bm_inode, group_bh, + OCFS2_JOURNAL_ACCESS_WRITE); if (ret < 0) { mlog_errno(ret); goto out_commit; @@ -552,8 +552,8 @@ int ocfs2_group_add(struct inode *inode, struct ocfs2_new_group_input *input) goto out_commit; } - ret = ocfs2_journal_access(handle, main_bm_inode, main_bm_bh, - OCFS2_JOURNAL_ACCESS_WRITE); + ret = ocfs2_journal_access_di(handle, main_bm_inode, main_bm_bh, + OCFS2_JOURNAL_ACCESS_WRITE); if (ret < 0) { mlog_errno(ret); goto out_commit; diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index 78755766c32..a69628603e1 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c @@ -261,7 +261,11 @@ int ocfs2_check_group_descriptor(struct super_block *sb, * local to this block. */ rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &gd->bg_check); - if (!rc) + if (rc) { + mlog(ML_ERROR, + "Checksum failed for group descriptor %llu\n", + (unsigned long long)bh->b_blocknr); + } else rc = ocfs2_validate_gd_self(sb, bh, 1); if (!rc) rc = ocfs2_validate_gd_parent(sb, di, bh, 1); @@ -343,10 +347,10 @@ static int ocfs2_block_group_fill(handle_t *handle, goto bail; } - status = ocfs2_journal_access(handle, - alloc_inode, - bg_bh, - OCFS2_JOURNAL_ACCESS_CREATE); + status = ocfs2_journal_access_gd(handle, + alloc_inode, + bg_bh, + OCFS2_JOURNAL_ACCESS_CREATE); if (status < 0) { mlog_errno(status); goto bail; @@ -476,8 +480,8 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb, bg = (struct ocfs2_group_desc *) bg_bh->b_data; - status = ocfs2_journal_access(handle, alloc_inode, - bh, OCFS2_JOURNAL_ACCESS_WRITE); + status = ocfs2_journal_access_di(handle, alloc_inode, + bh, OCFS2_JOURNAL_ACCESS_WRITE); if (status < 0) { mlog_errno(status); goto bail; @@ -986,10 +990,10 @@ static inline int ocfs2_block_group_set_bits(handle_t *handle, if (ocfs2_is_cluster_bitmap(alloc_inode)) journal_type = OCFS2_JOURNAL_ACCESS_UNDO; - status = ocfs2_journal_access(handle, - alloc_inode, - group_bh, - journal_type); + status = ocfs2_journal_access_gd(handle, + alloc_inode, + group_bh, + journal_type); if (status < 0) { mlog_errno(status); goto bail; @@ -1060,8 +1064,8 @@ static int ocfs2_relink_block_group(handle_t *handle, bg_ptr = le64_to_cpu(bg->bg_next_group); prev_bg_ptr = le64_to_cpu(prev_bg->bg_next_group); - status = ocfs2_journal_access(handle, alloc_inode, prev_bg_bh, - OCFS2_JOURNAL_ACCESS_WRITE); + status = ocfs2_journal_access_gd(handle, alloc_inode, prev_bg_bh, + OCFS2_JOURNAL_ACCESS_WRITE); if (status < 0) { mlog_errno(status); goto out_rollback; @@ -1075,8 +1079,8 @@ static int ocfs2_relink_block_group(handle_t *handle, goto out_rollback; } - status = ocfs2_journal_access(handle, alloc_inode, bg_bh, - OCFS2_JOURNAL_ACCESS_WRITE); + status = ocfs2_journal_access_gd(handle, alloc_inode, bg_bh, + OCFS2_JOURNAL_ACCESS_WRITE); if (status < 0) { mlog_errno(status); goto out_rollback; @@ -1090,8 +1094,8 @@ static int ocfs2_relink_block_group(handle_t *handle, goto out_rollback; } - status = ocfs2_journal_access(handle, alloc_inode, fe_bh, - OCFS2_JOURNAL_ACCESS_WRITE); + status = ocfs2_journal_access_di(handle, alloc_inode, fe_bh, + OCFS2_JOURNAL_ACCESS_WRITE); if (status < 0) { mlog_errno(status); goto out_rollback; @@ -1242,8 +1246,8 @@ static int ocfs2_alloc_dinode_update_counts(struct inode *inode, struct ocfs2_dinode *di = (struct ocfs2_dinode *) di_bh->b_data; struct ocfs2_chain_list *cl = (struct ocfs2_chain_list *) &di->id2.i_chain; - ret = ocfs2_journal_access(handle, inode, di_bh, - OCFS2_JOURNAL_ACCESS_WRITE); + ret = ocfs2_journal_access_di(handle, inode, di_bh, + OCFS2_JOURNAL_ACCESS_WRITE); if (ret < 0) { mlog_errno(ret); goto out; @@ -1414,10 +1418,10 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, /* Ok, claim our bits now: set the info on dinode, chainlist * and then the group */ - status = ocfs2_journal_access(handle, - alloc_inode, - ac->ac_bh, - OCFS2_JOURNAL_ACCESS_WRITE); + status = ocfs2_journal_access_di(handle, + alloc_inode, + ac->ac_bh, + OCFS2_JOURNAL_ACCESS_WRITE); if (status < 0) { mlog_errno(status); goto bail; @@ -1824,8 +1828,8 @@ static inline int ocfs2_block_group_clear_bits(handle_t *handle, if (ocfs2_is_cluster_bitmap(alloc_inode)) journal_type = OCFS2_JOURNAL_ACCESS_UNDO; - status = ocfs2_journal_access(handle, alloc_inode, group_bh, - journal_type); + status = ocfs2_journal_access_gd(handle, alloc_inode, group_bh, + journal_type); if (status < 0) { mlog_errno(status); goto bail; @@ -1900,8 +1904,8 @@ int ocfs2_free_suballoc_bits(handle_t *handle, goto bail; } - status = ocfs2_journal_access(handle, alloc_inode, alloc_bh, - OCFS2_JOURNAL_ACCESS_WRITE); + status = ocfs2_journal_access_di(handle, alloc_inode, alloc_bh, + OCFS2_JOURNAL_ACCESS_WRITE); if (status < 0) { mlog_errno(status); goto bail; -- cgit v1.2.3-70-g09d2 From 4d0e214ee83185fcaa2cb97cd026d32bdc5c994a Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Fri, 5 Dec 2008 11:19:37 -0800 Subject: ocfs2: Add ecc and checksums to ocfs2 xattr buckets. The xattr bucket can span multiple blocks on disk. We have wrappers for this structure in the code. We use the new multi-block ecc calls to calculate and validate the bucket. Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/xattr.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index bc822d6ba54..7c2f4c9d1bd 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -273,6 +273,15 @@ static int ocfs2_read_xattr_bucket(struct ocfs2_xattr_bucket *bucket, rc = ocfs2_read_blocks(bucket->bu_inode, xb_blkno, bucket->bu_blocks, bucket->bu_bhs, 0, NULL); + if (!rc) { + rc = ocfs2_validate_meta_ecc_bhs(bucket->bu_inode->i_sb, + bucket->bu_bhs, + bucket->bu_blocks, + &bucket_xh(bucket)->xh_check); + if (rc) + mlog_errno(rc); + } + if (rc) ocfs2_xattr_bucket_relse(bucket); return rc; @@ -301,6 +310,10 @@ static void ocfs2_xattr_bucket_journal_dirty(handle_t *handle, { int i; + ocfs2_compute_meta_ecc_bhs(bucket->bu_inode->i_sb, + bucket->bu_bhs, bucket->bu_blocks, + &bucket_xh(bucket)->xh_check); + for (i = 0; i < bucket->bu_blocks; i++) ocfs2_journal_dirty(handle, bucket->bu_bhs[i]); } -- cgit v1.2.3-70-g09d2 From 2a50a743bdaab104155bd9e988d2ba3bb4177263 Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Tue, 9 Dec 2008 14:24:33 -0800 Subject: ocfs2: Create ocfs2_xattr_value_buf. When an ocfs2 extended attribute is large enough to require its own allocation tree, we root it with an ocfs2_xattr_value_root. However, these roots can be a part of inodes, xattr blocks, or xattr buckets. Thus, they need a different journal access function for each container. We wrap the bh, its journal access function, and the value root (xv) in a structure called ocfs2_xattr_valu_buf. This is a package that can be passed around. In this first pass, we simply pass it to the extent tree code. Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/alloc.c | 25 +++++++++++-------------- fs/ocfs2/alloc.h | 4 ++-- fs/ocfs2/xattr.c | 34 ++++++++++++++++++++++------------ fs/ocfs2/xattr.h | 14 ++++++++++++++ 4 files changed, 49 insertions(+), 28 deletions(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 6e58fd557e5..874c0bd9e1c 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -48,6 +48,7 @@ #include "file.h" #include "super.h" #include "uptodate.h" +#include "xattr.h" #include "buffer_head_io.h" @@ -207,36 +208,33 @@ static void ocfs2_dinode_fill_root_el(struct ocfs2_extent_tree *et) static void ocfs2_xattr_value_fill_root_el(struct ocfs2_extent_tree *et) { - struct ocfs2_xattr_value_root *xv = et->et_object; + struct ocfs2_xattr_value_buf *vb = et->et_object; - et->et_root_el = &xv->xr_list; + et->et_root_el = &vb->vb_xv->xr_list; } static void ocfs2_xattr_value_set_last_eb_blk(struct ocfs2_extent_tree *et, u64 blkno) { - struct ocfs2_xattr_value_root *xv = - (struct ocfs2_xattr_value_root *)et->et_object; + struct ocfs2_xattr_value_buf *vb = et->et_object; - xv->xr_last_eb_blk = cpu_to_le64(blkno); + vb->vb_xv->xr_last_eb_blk = cpu_to_le64(blkno); } static u64 ocfs2_xattr_value_get_last_eb_blk(struct ocfs2_extent_tree *et) { - struct ocfs2_xattr_value_root *xv = - (struct ocfs2_xattr_value_root *) et->et_object; + struct ocfs2_xattr_value_buf *vb = et->et_object; - return le64_to_cpu(xv->xr_last_eb_blk); + return le64_to_cpu(vb->vb_xv->xr_last_eb_blk); } static void ocfs2_xattr_value_update_clusters(struct inode *inode, struct ocfs2_extent_tree *et, u32 clusters) { - struct ocfs2_xattr_value_root *xv = - (struct ocfs2_xattr_value_root *)et->et_object; + struct ocfs2_xattr_value_buf *vb = et->et_object; - le32_add_cpu(&xv->xr_clusters, clusters); + le32_add_cpu(&vb->vb_xv->xr_clusters, clusters); } static struct ocfs2_extent_tree_operations ocfs2_xattr_value_et_ops = { @@ -334,10 +332,9 @@ void ocfs2_init_xattr_tree_extent_tree(struct ocfs2_extent_tree *et, void ocfs2_init_xattr_value_extent_tree(struct ocfs2_extent_tree *et, struct inode *inode, - struct buffer_head *bh, - struct ocfs2_xattr_value_root *xv) + struct ocfs2_xattr_value_buf *vb) { - __ocfs2_init_extent_tree(et, inode, bh, ocfs2_journal_access, xv, + __ocfs2_init_extent_tree(et, inode, vb->vb_bh, vb->vb_access, vb, &ocfs2_xattr_value_et_ops); } diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h index 4b6fea22748..cceff5c37f4 100644 --- a/fs/ocfs2/alloc.h +++ b/fs/ocfs2/alloc.h @@ -71,10 +71,10 @@ void ocfs2_init_dinode_extent_tree(struct ocfs2_extent_tree *et, void ocfs2_init_xattr_tree_extent_tree(struct ocfs2_extent_tree *et, struct inode *inode, struct buffer_head *bh); +struct ocfs2_xattr_value_buf; void ocfs2_init_xattr_value_extent_tree(struct ocfs2_extent_tree *et, struct inode *inode, - struct buffer_head *bh, - struct ocfs2_xattr_value_root *xv); + struct ocfs2_xattr_value_buf *vb); /* * Read an extent block into *bh. If *bh is NULL, a bh will be diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 7c2f4c9d1bd..123d378aba9 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -581,21 +581,26 @@ static int ocfs2_xattr_extend_allocation(struct inode *inode, handle_t *handle = ctxt->handle; enum ocfs2_alloc_restarted why; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); - u32 prev_clusters, logical_start = le32_to_cpu(xv->xr_clusters); + struct ocfs2_xattr_value_buf vb = { + .vb_bh = xattr_bh, + .vb_xv = xv, + .vb_access = ocfs2_journal_access, + }; + u32 prev_clusters, logical_start = le32_to_cpu(vb.vb_xv->xr_clusters); struct ocfs2_extent_tree et; mlog(0, "(clusters_to_add for xattr= %u)\n", clusters_to_add); - ocfs2_init_xattr_value_extent_tree(&et, inode, xattr_bh, xv); + ocfs2_init_xattr_value_extent_tree(&et, inode, &vb); - status = ocfs2_journal_access(handle, inode, xattr_bh, - OCFS2_JOURNAL_ACCESS_WRITE); + status = vb.vb_access(handle, inode, vb.vb_bh, + OCFS2_JOURNAL_ACCESS_WRITE); if (status < 0) { mlog_errno(status); goto leave; } - prev_clusters = le32_to_cpu(xv->xr_clusters); + prev_clusters = le32_to_cpu(vb.vb_xv->xr_clusters); status = ocfs2_add_clusters_in_btree(osb, inode, &logical_start, @@ -611,13 +616,13 @@ static int ocfs2_xattr_extend_allocation(struct inode *inode, goto leave; } - status = ocfs2_journal_dirty(handle, xattr_bh); + status = ocfs2_journal_dirty(handle, vb.vb_bh); if (status < 0) { mlog_errno(status); goto leave; } - clusters_to_add -= le32_to_cpu(xv->xr_clusters) - prev_clusters; + clusters_to_add -= le32_to_cpu(vb.vb_xv->xr_clusters) - prev_clusters; /* * We should have already allocated enough space before the transaction, @@ -640,11 +645,16 @@ static int __ocfs2_remove_xattr_range(struct inode *inode, u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos); handle_t *handle = ctxt->handle; struct ocfs2_extent_tree et; + struct ocfs2_xattr_value_buf vb = { + .vb_bh = root_bh, + .vb_xv = xv, + .vb_access = ocfs2_journal_access, + }; - ocfs2_init_xattr_value_extent_tree(&et, inode, root_bh, xv); + ocfs2_init_xattr_value_extent_tree(&et, inode, &vb); - ret = ocfs2_journal_access(handle, inode, root_bh, - OCFS2_JOURNAL_ACCESS_WRITE); + ret = vb.vb_access(handle, inode, vb.vb_bh, + OCFS2_JOURNAL_ACCESS_WRITE); if (ret) { mlog_errno(ret); goto out; @@ -657,9 +667,9 @@ static int __ocfs2_remove_xattr_range(struct inode *inode, goto out; } - le32_add_cpu(&xv->xr_clusters, -len); + le32_add_cpu(&vb.vb_xv->xr_clusters, -len); - ret = ocfs2_journal_dirty(handle, root_bh); + ret = ocfs2_journal_dirty(handle, vb.vb_bh); if (ret) { mlog_errno(ret); goto out; diff --git a/fs/ocfs2/xattr.h b/fs/ocfs2/xattr.h index 9a67e7d8f81..5a1ebc789f7 100644 --- a/fs/ocfs2/xattr.h +++ b/fs/ocfs2/xattr.h @@ -70,4 +70,18 @@ int ocfs2_calc_xattr_init(struct inode *, struct buffer_head *, int, struct ocfs2_security_xattr_info *, int *, int *, struct ocfs2_alloc_context **); +/* + * xattrs can live inside an inode, as part of an external xattr block, + * or inside an xattr bucket, which is the leaf of a tree rooted in an + * xattr block. Some of the xattr calls, especially the value setting + * functions, want to treat each of these locations as equal. Let's wrap + * them in a structure that we can pass around instead of raw buffer_heads. + */ +struct ocfs2_xattr_value_buf { + struct buffer_head *vb_bh; + ocfs2_journal_access_func vb_access; + struct ocfs2_xattr_value_root *vb_xv; +}; + + #endif /* OCFS2_XATTR_H */ -- cgit v1.2.3-70-g09d2 From d72cc72d57ecaf9047da51269dabd6880c1399ac Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Tue, 9 Dec 2008 14:30:41 -0800 Subject: ocfs2: Pull ocfs2_xattr_value_buf up from __ocfs2_remove_xattr_range(). Place an ocfs2_xattr_value_buf in __ocfs2_xattr_shrink_size() and pass it down to __ocfs2_remove_xattr_range(). Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/xattr.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 123d378aba9..3b059cf2eb4 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -636,8 +636,7 @@ leave: } static int __ocfs2_remove_xattr_range(struct inode *inode, - struct buffer_head *root_bh, - struct ocfs2_xattr_value_root *xv, + struct ocfs2_xattr_value_buf *vb, u32 cpos, u32 phys_cpos, u32 len, struct ocfs2_xattr_set_ctxt *ctxt) { @@ -645,16 +644,11 @@ static int __ocfs2_remove_xattr_range(struct inode *inode, u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos); handle_t *handle = ctxt->handle; struct ocfs2_extent_tree et; - struct ocfs2_xattr_value_buf vb = { - .vb_bh = root_bh, - .vb_xv = xv, - .vb_access = ocfs2_journal_access, - }; - ocfs2_init_xattr_value_extent_tree(&et, inode, &vb); + ocfs2_init_xattr_value_extent_tree(&et, inode, vb); - ret = vb.vb_access(handle, inode, vb.vb_bh, - OCFS2_JOURNAL_ACCESS_WRITE); + ret = vb->vb_access(handle, inode, vb->vb_bh, + OCFS2_JOURNAL_ACCESS_WRITE); if (ret) { mlog_errno(ret); goto out; @@ -667,9 +661,9 @@ static int __ocfs2_remove_xattr_range(struct inode *inode, goto out; } - le32_add_cpu(&vb.vb_xv->xr_clusters, -len); + le32_add_cpu(&vb->vb_xv->xr_clusters, -len); - ret = ocfs2_journal_dirty(handle, vb.vb_bh); + ret = ocfs2_journal_dirty(handle, vb->vb_bh); if (ret) { mlog_errno(ret); goto out; @@ -693,6 +687,11 @@ static int ocfs2_xattr_shrink_size(struct inode *inode, int ret = 0; u32 trunc_len, cpos, phys_cpos, alloc_size; u64 block; + struct ocfs2_xattr_value_buf vb = { + .vb_bh = root_bh, + .vb_xv = xv, + .vb_access = ocfs2_journal_access, + }; if (old_clusters <= new_clusters) return 0; @@ -701,7 +700,8 @@ static int ocfs2_xattr_shrink_size(struct inode *inode, trunc_len = old_clusters - new_clusters; while (trunc_len) { ret = ocfs2_xattr_get_clusters(inode, cpos, &phys_cpos, - &alloc_size, &xv->xr_list); + &alloc_size, + &vb.vb_xv->xr_list); if (ret) { mlog_errno(ret); goto out; @@ -710,7 +710,7 @@ static int ocfs2_xattr_shrink_size(struct inode *inode, if (alloc_size > trunc_len) alloc_size = trunc_len; - ret = __ocfs2_remove_xattr_range(inode, root_bh, xv, cpos, + ret = __ocfs2_remove_xattr_range(inode, &vb, cpos, phys_cpos, alloc_size, ctxt); if (ret) { -- cgit v1.2.3-70-g09d2 From 19b801f45fa5e4840b9be3dcf1e73b08f35b04d9 Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Tue, 9 Dec 2008 14:36:50 -0800 Subject: ocfs2: Pull ocfs2_xattr_value_buf up into ocfs2_xattr_value_truncate(). Place an ocfs2_xattr_value_buf in ocfs2_xattr_value_truncate() and pass it down to ocfs2_xattr_shrink_size(). We can also pass it into ocfs2_xattr_extend_allocation(), replacing its ocfs2_xattr_value_buf. Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/xattr.c | 41 +++++++++++++++++------------------------ 1 file changed, 17 insertions(+), 24 deletions(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 3b059cf2eb4..4ce8019f0ef 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -573,34 +573,28 @@ int ocfs2_calc_xattr_init(struct inode *dir, static int ocfs2_xattr_extend_allocation(struct inode *inode, u32 clusters_to_add, - struct buffer_head *xattr_bh, - struct ocfs2_xattr_value_root *xv, + struct ocfs2_xattr_value_buf *vb, struct ocfs2_xattr_set_ctxt *ctxt) { int status = 0; handle_t *handle = ctxt->handle; enum ocfs2_alloc_restarted why; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); - struct ocfs2_xattr_value_buf vb = { - .vb_bh = xattr_bh, - .vb_xv = xv, - .vb_access = ocfs2_journal_access, - }; - u32 prev_clusters, logical_start = le32_to_cpu(vb.vb_xv->xr_clusters); + u32 prev_clusters, logical_start = le32_to_cpu(vb->vb_xv->xr_clusters); struct ocfs2_extent_tree et; mlog(0, "(clusters_to_add for xattr= %u)\n", clusters_to_add); - ocfs2_init_xattr_value_extent_tree(&et, inode, &vb); + ocfs2_init_xattr_value_extent_tree(&et, inode, vb); - status = vb.vb_access(handle, inode, vb.vb_bh, + status = vb->vb_access(handle, inode, vb->vb_bh, OCFS2_JOURNAL_ACCESS_WRITE); if (status < 0) { mlog_errno(status); goto leave; } - prev_clusters = le32_to_cpu(vb.vb_xv->xr_clusters); + prev_clusters = le32_to_cpu(vb->vb_xv->xr_clusters); status = ocfs2_add_clusters_in_btree(osb, inode, &logical_start, @@ -616,13 +610,13 @@ static int ocfs2_xattr_extend_allocation(struct inode *inode, goto leave; } - status = ocfs2_journal_dirty(handle, vb.vb_bh); + status = ocfs2_journal_dirty(handle, vb->vb_bh); if (status < 0) { mlog_errno(status); goto leave; } - clusters_to_add -= le32_to_cpu(vb.vb_xv->xr_clusters) - prev_clusters; + clusters_to_add -= le32_to_cpu(vb->vb_xv->xr_clusters) - prev_clusters; /* * We should have already allocated enough space before the transaction, @@ -680,18 +674,12 @@ out: static int ocfs2_xattr_shrink_size(struct inode *inode, u32 old_clusters, u32 new_clusters, - struct buffer_head *root_bh, - struct ocfs2_xattr_value_root *xv, + struct ocfs2_xattr_value_buf *vb, struct ocfs2_xattr_set_ctxt *ctxt) { int ret = 0; u32 trunc_len, cpos, phys_cpos, alloc_size; u64 block; - struct ocfs2_xattr_value_buf vb = { - .vb_bh = root_bh, - .vb_xv = xv, - .vb_access = ocfs2_journal_access, - }; if (old_clusters <= new_clusters) return 0; @@ -701,7 +689,7 @@ static int ocfs2_xattr_shrink_size(struct inode *inode, while (trunc_len) { ret = ocfs2_xattr_get_clusters(inode, cpos, &phys_cpos, &alloc_size, - &vb.vb_xv->xr_list); + &vb->vb_xv->xr_list); if (ret) { mlog_errno(ret); goto out; @@ -710,7 +698,7 @@ static int ocfs2_xattr_shrink_size(struct inode *inode, if (alloc_size > trunc_len) alloc_size = trunc_len; - ret = __ocfs2_remove_xattr_range(inode, &vb, cpos, + ret = __ocfs2_remove_xattr_range(inode, vb, cpos, phys_cpos, alloc_size, ctxt); if (ret) { @@ -738,6 +726,11 @@ static int ocfs2_xattr_value_truncate(struct inode *inode, int ret; u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb, len); u32 old_clusters = le32_to_cpu(xv->xr_clusters); + struct ocfs2_xattr_value_buf vb = { + .vb_bh = root_bh, + .vb_xv = xv, + .vb_access = ocfs2_journal_access, + }; if (new_clusters == old_clusters) return 0; @@ -745,11 +738,11 @@ static int ocfs2_xattr_value_truncate(struct inode *inode, if (new_clusters > old_clusters) ret = ocfs2_xattr_extend_allocation(inode, new_clusters - old_clusters, - root_bh, xv, ctxt); + &vb, ctxt); else ret = ocfs2_xattr_shrink_size(inode, old_clusters, new_clusters, - root_bh, xv, ctxt); + &vb, ctxt); return ret; } -- cgit v1.2.3-70-g09d2 From b3e5d37905730dc5ddff717f55ed830caa80ea0e Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Tue, 9 Dec 2008 15:01:04 -0800 Subject: ocfs2: Pass ocfs2_xattr_value_buf into ocfs2_xattr_value_truncate(). The callers of ocfs2_xattr_value_truncate() now pass in ocfs2_xattr_value_bufs. These callers are the ones that calculated the xv location, so they are the right starting point. Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/xattr.c | 66 +++++++++++++++++++++++++++++--------------------------- 1 file changed, 34 insertions(+), 32 deletions(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 4ce8019f0ef..409f9eeec70 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -718,19 +718,13 @@ out: } static int ocfs2_xattr_value_truncate(struct inode *inode, - struct buffer_head *root_bh, - struct ocfs2_xattr_value_root *xv, + struct ocfs2_xattr_value_buf *vb, int len, struct ocfs2_xattr_set_ctxt *ctxt) { int ret; u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb, len); - u32 old_clusters = le32_to_cpu(xv->xr_clusters); - struct ocfs2_xattr_value_buf vb = { - .vb_bh = root_bh, - .vb_xv = xv, - .vb_access = ocfs2_journal_access, - }; + u32 old_clusters = le32_to_cpu(vb->vb_xv->xr_clusters); if (new_clusters == old_clusters) return 0; @@ -738,11 +732,11 @@ static int ocfs2_xattr_value_truncate(struct inode *inode, if (new_clusters > old_clusters) ret = ocfs2_xattr_extend_allocation(inode, new_clusters - old_clusters, - &vb, ctxt); + vb, ctxt); else ret = ocfs2_xattr_shrink_size(inode, old_clusters, new_clusters, - &vb, ctxt); + vb, ctxt); return ret; } @@ -1330,6 +1324,10 @@ static int ocfs2_xattr_set_value_outside(struct inode *inode, struct ocfs2_xattr_value_root *xv = NULL; size_t size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE; int ret = 0; + struct ocfs2_xattr_value_buf vb = { + .vb_bh = xs->xattr_bh, + .vb_access = ocfs2_journal_access + }; memset(val, 0, size); memcpy(val, xi->name, name_len); @@ -1340,9 +1338,9 @@ static int ocfs2_xattr_set_value_outside(struct inode *inode, xv->xr_list.l_tree_depth = 0; xv->xr_list.l_count = cpu_to_le16(1); xv->xr_list.l_next_free_rec = 0; + vb.vb_xv = xv; - ret = ocfs2_xattr_value_truncate(inode, xs->xattr_bh, xv, - xi->value_len, ctxt); + ret = ocfs2_xattr_value_truncate(inode, &vb, xi->value_len, ctxt); if (ret < 0) { mlog_errno(ret); return ret; @@ -1352,7 +1350,7 @@ static int ocfs2_xattr_set_value_outside(struct inode *inode, mlog_errno(ret); return ret; } - ret = __ocfs2_xattr_set_value_outside(inode, ctxt->handle, xv, + ret = __ocfs2_xattr_set_value_outside(inode, ctxt->handle, vb.vb_xv, xi->value, xi->value_len); if (ret < 0) mlog_errno(ret); @@ -1550,9 +1548,12 @@ static int ocfs2_xattr_set_entry(struct inode *inode, goto out; } else if (!ocfs2_xattr_is_local(xs->here)) { /* For existing xattr which has value outside */ - struct ocfs2_xattr_value_root *xv = NULL; - xv = (struct ocfs2_xattr_value_root *)(val + - OCFS2_XATTR_SIZE(name_len)); + struct ocfs2_xattr_value_buf vb = { + .vb_bh = xs->xattr_bh, + .vb_xv = (struct ocfs2_xattr_value_root *) + (val + OCFS2_XATTR_SIZE(name_len)), + .vb_access = ocfs2_journal_access, + }; if (xi->value_len > OCFS2_XATTR_INLINE_SIZE) { /* @@ -1561,8 +1562,7 @@ static int ocfs2_xattr_set_entry(struct inode *inode, * then set new value with set_value_outside(). */ ret = ocfs2_xattr_value_truncate(inode, - xs->xattr_bh, - xv, + &vb, xi->value_len, ctxt); if (ret < 0) { @@ -1582,7 +1582,7 @@ static int ocfs2_xattr_set_entry(struct inode *inode, ret = __ocfs2_xattr_set_value_outside(inode, handle, - xv, + vb.vb_xv, xi->value, xi->value_len); if (ret < 0) @@ -1594,8 +1594,7 @@ static int ocfs2_xattr_set_entry(struct inode *inode, * just trucate old value to zero. */ ret = ocfs2_xattr_value_truncate(inode, - xs->xattr_bh, - xv, + &vb, 0, ctxt); if (ret < 0) @@ -1714,15 +1713,17 @@ static int ocfs2_remove_value_outside(struct inode*inode, struct ocfs2_xattr_entry *entry = &header->xh_entries[i]; if (!ocfs2_xattr_is_local(entry)) { - struct ocfs2_xattr_value_root *xv; + struct ocfs2_xattr_value_buf vb = { + .vb_bh = bh, + .vb_access = ocfs2_journal_access, + }; void *val; val = (void *)header + le16_to_cpu(entry->xe_name_offset); - xv = (struct ocfs2_xattr_value_root *) + vb.vb_xv = (struct ocfs2_xattr_value_root *) (val + OCFS2_XATTR_SIZE(entry->xe_name_len)); - ret = ocfs2_xattr_value_truncate(inode, bh, xv, - 0, &ctxt); + ret = ocfs2_xattr_value_truncate(inode, &vb, 0, &ctxt); if (ret < 0) { mlog_errno(ret); break; @@ -4651,11 +4652,12 @@ static int ocfs2_xattr_bucket_value_truncate(struct inode *inode, { int ret, offset; u64 value_blk; - struct buffer_head *value_bh = NULL; - struct ocfs2_xattr_value_root *xv; struct ocfs2_xattr_entry *xe; struct ocfs2_xattr_header *xh = bucket_xh(bucket); size_t blocksize = inode->i_sb->s_blocksize; + struct ocfs2_xattr_value_buf vb = { + .vb_access = ocfs2_journal_access, + }; xe = &xh->xh_entries[xe_off]; @@ -4669,11 +4671,11 @@ static int ocfs2_xattr_bucket_value_truncate(struct inode *inode, /* We don't allow ocfs2_xattr_value to be stored in different block. */ BUG_ON(value_blk != (offset + OCFS2_XATTR_ROOT_SIZE - 1) / blocksize); - value_bh = bucket->bu_bhs[value_blk]; - BUG_ON(!value_bh); + vb.vb_bh = bucket->bu_bhs[value_blk]; + BUG_ON(!vb.vb_bh); - xv = (struct ocfs2_xattr_value_root *) - (value_bh->b_data + offset % blocksize); + vb.vb_xv = (struct ocfs2_xattr_value_root *) + (vb.vb_bh->b_data + offset % blocksize); ret = ocfs2_xattr_bucket_journal_access(ctxt->handle, bucket, OCFS2_JOURNAL_ACCESS_WRITE); @@ -4691,7 +4693,7 @@ static int ocfs2_xattr_bucket_value_truncate(struct inode *inode, */ mlog(0, "truncate %u in xattr bucket %llu to %d bytes.\n", xe_off, (unsigned long long)bucket_blkno(bucket), len); - ret = ocfs2_xattr_value_truncate(inode, value_bh, xv, len, ctxt); + ret = ocfs2_xattr_value_truncate(inode, &vb, len, ctxt); if (ret) { mlog_errno(ret); goto out_dirty; -- cgit v1.2.3-70-g09d2 From 0c748e95327d00e9eb19d0f34b32147ecbc02137 Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Tue, 9 Dec 2008 15:46:15 -0800 Subject: ocfs2: Pass value buf to ocfs2_xattr_update_entry(). ocfs2_xattr_update_entry() updates the entry portion of an xattr buffer. This can be part of multiple metadata block types, so pass the buffer in via an ocfs2_xattr_value_buf. Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/xattr.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 409f9eeec70..6a056122771 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -1282,12 +1282,13 @@ static int ocfs2_xattr_update_entry(struct inode *inode, handle_t *handle, struct ocfs2_xattr_info *xi, struct ocfs2_xattr_search *xs, + struct ocfs2_xattr_value_buf *vb, size_t offs) { int ret; - ret = ocfs2_journal_access(handle, inode, xs->xattr_bh, - OCFS2_JOURNAL_ACCESS_WRITE); + ret = vb->vb_access(handle, inode, vb->vb_bh, + OCFS2_JOURNAL_ACCESS_WRITE); if (ret) { mlog_errno(ret); goto out; @@ -1301,7 +1302,7 @@ static int ocfs2_xattr_update_entry(struct inode *inode, ocfs2_xattr_set_local(xs->here, 0); ocfs2_xattr_hash_entry(inode, xs->header, xs->here); - ret = ocfs2_journal_dirty(handle, xs->xattr_bh); + ret = ocfs2_journal_dirty(handle, vb->vb_bh); if (ret < 0) mlog_errno(ret); out: @@ -1345,7 +1346,7 @@ static int ocfs2_xattr_set_value_outside(struct inode *inode, mlog_errno(ret); return ret; } - ret = ocfs2_xattr_update_entry(inode, ctxt->handle, xi, xs, offs); + ret = ocfs2_xattr_update_entry(inode, ctxt->handle, xi, xs, &vb, offs); if (ret < 0) { mlog_errno(ret); return ret; @@ -1574,6 +1575,7 @@ static int ocfs2_xattr_set_entry(struct inode *inode, handle, xi, xs, + &vb, offs); if (ret < 0) { mlog_errno(ret); -- cgit v1.2.3-70-g09d2 From 512620f44df85df87348fc9a6fc54fcaa254b8d3 Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Tue, 9 Dec 2008 15:58:35 -0800 Subject: ocfs2: Use ocfs2_xattr_value_buf in ocfs2_xattr_set_entry(). ocfs2_xattr_set_entry is the function that knows what type of block it is setting into. This is what we wanted from ocfs2_xattr_value_buf. Plus, moving the value buf up into ocfs2_xattr_set_entry() allows us to pass it into ocfs2_xattr_set_value_outside() and ocfs2_xattr_cleanup(). Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/xattr.c | 53 +++++++++++++++++++++++++++++------------------------ 1 file changed, 29 insertions(+), 24 deletions(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 6a056122771..c08b5e8746c 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -1252,6 +1252,7 @@ static int ocfs2_xattr_cleanup(struct inode *inode, handle_t *handle, struct ocfs2_xattr_info *xi, struct ocfs2_xattr_search *xs, + struct ocfs2_xattr_value_buf *vb, size_t offs) { int ret = 0; @@ -1259,8 +1260,8 @@ static int ocfs2_xattr_cleanup(struct inode *inode, void *val = xs->base + offs; size_t size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE; - ret = ocfs2_journal_access(handle, inode, xs->xattr_bh, - OCFS2_JOURNAL_ACCESS_WRITE); + ret = vb->vb_access(handle, inode, vb->vb_bh, + OCFS2_JOURNAL_ACCESS_WRITE); if (ret) { mlog_errno(ret); goto out; @@ -1271,7 +1272,7 @@ static int ocfs2_xattr_cleanup(struct inode *inode, memset((void *)xs->here, 0, sizeof(struct ocfs2_xattr_entry)); memset(val, 0, size); - ret = ocfs2_journal_dirty(handle, xs->xattr_bh); + ret = ocfs2_journal_dirty(handle, vb->vb_bh); if (ret < 0) mlog_errno(ret); out: @@ -1318,6 +1319,7 @@ static int ocfs2_xattr_set_value_outside(struct inode *inode, struct ocfs2_xattr_info *xi, struct ocfs2_xattr_search *xs, struct ocfs2_xattr_set_ctxt *ctxt, + struct ocfs2_xattr_value_buf *vb, size_t offs) { size_t name_len = strlen(xi->name); @@ -1325,10 +1327,6 @@ static int ocfs2_xattr_set_value_outside(struct inode *inode, struct ocfs2_xattr_value_root *xv = NULL; size_t size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE; int ret = 0; - struct ocfs2_xattr_value_buf vb = { - .vb_bh = xs->xattr_bh, - .vb_access = ocfs2_journal_access - }; memset(val, 0, size); memcpy(val, xi->name, name_len); @@ -1339,19 +1337,19 @@ static int ocfs2_xattr_set_value_outside(struct inode *inode, xv->xr_list.l_tree_depth = 0; xv->xr_list.l_count = cpu_to_le16(1); xv->xr_list.l_next_free_rec = 0; - vb.vb_xv = xv; + vb->vb_xv = xv; - ret = ocfs2_xattr_value_truncate(inode, &vb, xi->value_len, ctxt); + ret = ocfs2_xattr_value_truncate(inode, vb, xi->value_len, ctxt); if (ret < 0) { mlog_errno(ret); return ret; } - ret = ocfs2_xattr_update_entry(inode, ctxt->handle, xi, xs, &vb, offs); + ret = ocfs2_xattr_update_entry(inode, ctxt->handle, xi, xs, vb, offs); if (ret < 0) { mlog_errno(ret); return ret; } - ret = __ocfs2_xattr_set_value_outside(inode, ctxt->handle, vb.vb_xv, + ret = __ocfs2_xattr_set_value_outside(inode, ctxt->handle, vb->vb_xv, xi->value, xi->value_len); if (ret < 0) mlog_errno(ret); @@ -1488,6 +1486,16 @@ static int ocfs2_xattr_set_entry(struct inode *inode, .value = xi->value, .value_len = xi->value_len, }; + struct ocfs2_xattr_value_buf vb = { + .vb_bh = xs->xattr_bh, + .vb_access = ocfs2_journal_access_di, + }; + + if (!(flag & OCFS2_INLINE_XATTR_FL)) { + BUG_ON(xs->xattr_bh == xs->inode_bh); + vb.vb_access = ocfs2_journal_access_xb; + } else + BUG_ON(xs->xattr_bh != xs->inode_bh); /* Compute min_offs, last and free space. */ last = xs->header->xh_entries; @@ -1543,18 +1551,14 @@ static int ocfs2_xattr_set_entry(struct inode *inode, if (ocfs2_xattr_is_local(xs->here) && size == size_l) { /* Replace existing local xattr with tree root */ ret = ocfs2_xattr_set_value_outside(inode, xi, xs, - ctxt, offs); + ctxt, &vb, offs); if (ret < 0) mlog_errno(ret); goto out; } else if (!ocfs2_xattr_is_local(xs->here)) { /* For existing xattr which has value outside */ - struct ocfs2_xattr_value_buf vb = { - .vb_bh = xs->xattr_bh, - .vb_xv = (struct ocfs2_xattr_value_root *) - (val + OCFS2_XATTR_SIZE(name_len)), - .vb_access = ocfs2_journal_access, - }; + vb.vb_xv = (struct ocfs2_xattr_value_root *) + (val + OCFS2_XATTR_SIZE(name_len)); if (xi->value_len > OCFS2_XATTR_INLINE_SIZE) { /* @@ -1605,16 +1609,16 @@ static int ocfs2_xattr_set_entry(struct inode *inode, } } - ret = ocfs2_journal_access(handle, inode, xs->inode_bh, - OCFS2_JOURNAL_ACCESS_WRITE); + ret = ocfs2_journal_access_di(handle, inode, xs->inode_bh, + OCFS2_JOURNAL_ACCESS_WRITE); if (ret) { mlog_errno(ret); goto out; } if (!(flag & OCFS2_INLINE_XATTR_FL)) { - ret = ocfs2_journal_access(handle, inode, xs->xattr_bh, - OCFS2_JOURNAL_ACCESS_WRITE); + ret = vb.vb_access(handle, inode, vb.vb_bh, + OCFS2_JOURNAL_ACCESS_WRITE); if (ret) { mlog_errno(ret); goto out; @@ -1674,7 +1678,8 @@ static int ocfs2_xattr_set_entry(struct inode *inode, * This is the second step for value size > INLINE_SIZE. */ size_t offs = le16_to_cpu(xs->here->xe_name_offset); - ret = ocfs2_xattr_set_value_outside(inode, xi, xs, ctxt, offs); + ret = ocfs2_xattr_set_value_outside(inode, xi, xs, ctxt, + &vb, offs); if (ret < 0) { int ret2; @@ -1684,7 +1689,7 @@ static int ocfs2_xattr_set_entry(struct inode *inode, * the junk tree root we have already set in local. */ ret2 = ocfs2_xattr_cleanup(inode, ctxt->handle, - xi, xs, offs); + xi, xs, &vb, offs); if (ret2 < 0) mlog_errno(ret2); } -- cgit v1.2.3-70-g09d2 From 4311901daabe1d0f22cfcf86c57ad450f14b4e9f Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Tue, 9 Dec 2008 16:24:43 -0800 Subject: ocfs2: Pass value buf to ocfs2_remove_value_outside(). ocfs2_remove_value_outside() needs to know the type of buffer it is looking at. Pass in an ocfs2_xattr_value_buf. Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/xattr.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index c08b5e8746c..d2760e64475 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -1699,7 +1699,7 @@ out: } static int ocfs2_remove_value_outside(struct inode*inode, - struct buffer_head *bh, + struct ocfs2_xattr_value_buf *vb, struct ocfs2_xattr_header *header) { int ret = 0, i; @@ -1720,17 +1720,13 @@ static int ocfs2_remove_value_outside(struct inode*inode, struct ocfs2_xattr_entry *entry = &header->xh_entries[i]; if (!ocfs2_xattr_is_local(entry)) { - struct ocfs2_xattr_value_buf vb = { - .vb_bh = bh, - .vb_access = ocfs2_journal_access, - }; void *val; val = (void *)header + le16_to_cpu(entry->xe_name_offset); - vb.vb_xv = (struct ocfs2_xattr_value_root *) + vb->vb_xv = (struct ocfs2_xattr_value_root *) (val + OCFS2_XATTR_SIZE(entry->xe_name_len)); - ret = ocfs2_xattr_value_truncate(inode, &vb, 0, &ctxt); + ret = ocfs2_xattr_value_truncate(inode, vb, 0, &ctxt); if (ret < 0) { mlog_errno(ret); break; @@ -1752,12 +1748,16 @@ static int ocfs2_xattr_ibody_remove(struct inode *inode, struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; struct ocfs2_xattr_header *header; int ret; + struct ocfs2_xattr_value_buf vb = { + .vb_bh = di_bh, + .vb_access = ocfs2_journal_access_di, + }; header = (struct ocfs2_xattr_header *) ((void *)di + inode->i_sb->s_blocksize - le16_to_cpu(di->i_xattr_inline_size)); - ret = ocfs2_remove_value_outside(inode, di_bh, header); + ret = ocfs2_remove_value_outside(inode, &vb, header); return ret; } @@ -1767,11 +1767,15 @@ static int ocfs2_xattr_block_remove(struct inode *inode, { struct ocfs2_xattr_block *xb; int ret = 0; + struct ocfs2_xattr_value_buf vb = { + .vb_bh = blk_bh, + .vb_access = ocfs2_journal_access_xb, + }; xb = (struct ocfs2_xattr_block *)blk_bh->b_data; if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) { struct ocfs2_xattr_header *header = &(xb->xb_attrs.xb_header); - ret = ocfs2_remove_value_outside(inode, blk_bh, header); + ret = ocfs2_remove_value_outside(inode, &vb, header); } else ret = ocfs2_delete_xattr_index_block(inode, blk_bh); -- cgit v1.2.3-70-g09d2 From 84008972491ca91b240f106191519781dabb8016 Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Tue, 9 Dec 2008 16:11:49 -0800 Subject: ocfs2: Use proper journal_access function in xattr.c Change the rest of the naked ocfs2_journal_access() calls in fs/ocfs2/xattr.c to use the appropriate ocfs2_journal_access_*() call for their metadata type. Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/xattr.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index d2760e64475..17028aa7bc2 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -1894,8 +1894,8 @@ int ocfs2_xattr_remove(struct inode *inode, struct buffer_head *di_bh) mlog_errno(ret); goto out; } - ret = ocfs2_journal_access(handle, inode, di_bh, - OCFS2_JOURNAL_ACCESS_WRITE); + ret = ocfs2_journal_access_di(handle, inode, di_bh, + OCFS2_JOURNAL_ACCESS_WRITE); if (ret) { mlog_errno(ret); goto out_commit; @@ -2103,8 +2103,8 @@ static int ocfs2_xattr_block_set(struct inode *inode, int ret; if (!xs->xattr_bh) { - ret = ocfs2_journal_access(handle, inode, xs->inode_bh, - OCFS2_JOURNAL_ACCESS_CREATE); + ret = ocfs2_journal_access_di(handle, inode, xs->inode_bh, + OCFS2_JOURNAL_ACCESS_CREATE); if (ret < 0) { mlog_errno(ret); goto end; @@ -2121,8 +2121,8 @@ static int ocfs2_xattr_block_set(struct inode *inode, new_bh = sb_getblk(inode->i_sb, first_blkno); ocfs2_set_new_buffer_uptodate(inode, new_bh); - ret = ocfs2_journal_access(handle, inode, new_bh, - OCFS2_JOURNAL_ACCESS_CREATE); + ret = ocfs2_journal_access_xb(handle, inode, new_bh, + OCFS2_JOURNAL_ACCESS_CREATE); if (ret < 0) { mlog_errno(ret); goto end; @@ -3377,8 +3377,8 @@ static int ocfs2_xattr_create_index_block(struct inode *inode, */ down_write(&oi->ip_alloc_sem); - ret = ocfs2_journal_access(handle, inode, xb_bh, - OCFS2_JOURNAL_ACCESS_WRITE); + ret = ocfs2_journal_access_xb(handle, inode, xb_bh, + OCFS2_JOURNAL_ACCESS_WRITE); if (ret) { mlog_errno(ret); goto out; @@ -4216,8 +4216,8 @@ static int ocfs2_add_new_xattr_cluster(struct inode *inode, ocfs2_init_xattr_tree_extent_tree(&et, inode, root_bh); - ret = ocfs2_journal_access(handle, inode, root_bh, - OCFS2_JOURNAL_ACCESS_WRITE); + ret = ocfs2_journal_access_xb(handle, inode, root_bh, + OCFS2_JOURNAL_ACCESS_WRITE); if (ret < 0) { mlog_errno(ret); goto leave; @@ -4808,8 +4808,8 @@ static int ocfs2_rm_xattr_cluster(struct inode *inode, goto out; } - ret = ocfs2_journal_access(handle, inode, root_bh, - OCFS2_JOURNAL_ACCESS_WRITE); + ret = ocfs2_journal_access_xb(handle, inode, root_bh, + OCFS2_JOURNAL_ACCESS_WRITE); if (ret) { mlog_errno(ret); goto out_commit; -- cgit v1.2.3-70-g09d2 From 87d35a74b15ec703910a63e0667692fb5e267be0 Mon Sep 17 00:00:00 2001 From: Mark Fasheh Date: Wed, 10 Dec 2008 17:36:25 -0800 Subject: ocfs2: Add directory block trailers. Future ocfs2 features metaecc and indexed directories need to store a little bit of data in each dirblock. For compatibility, we place this in a trailer at the end of the dirblock. The trailer plays itself as an empty dirent, so that if the features are turned off, it can be reused without requiring a tunefs scan. This code adds the trailer and validates it when the block is read in. [ Mark is the original author, but I reinserted this code before his dir index work. -- Joel ] Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/dir.c | 197 ++++++++++++++++++++++++++++++++++++++++++++++++---- fs/ocfs2/ocfs2.h | 3 + fs/ocfs2/ocfs2_fs.h | 29 ++++++++ 3 files changed, 215 insertions(+), 14 deletions(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index 45e4e03d8f7..1efd0ab680c 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c @@ -83,6 +83,63 @@ static int ocfs2_do_extend_dir(struct super_block *sb, struct ocfs2_alloc_context *meta_ac, struct buffer_head **new_bh); +/* + * These are distinct checks because future versions of the file system will + * want to have a trailing dirent structure independent of indexing. + */ +static int ocfs2_dir_has_trailer(struct inode *dir) +{ + if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL) + return 0; + + return ocfs2_meta_ecc(OCFS2_SB(dir->i_sb)); +} + +static int ocfs2_supports_dir_trailer(struct ocfs2_super *osb) +{ + return ocfs2_meta_ecc(osb); +} + +static inline unsigned int ocfs2_dir_trailer_blk_off(struct super_block *sb) +{ + return sb->s_blocksize - sizeof(struct ocfs2_dir_block_trailer); +} + +#define ocfs2_trailer_from_bh(_bh, _sb) ((struct ocfs2_dir_block_trailer *) ((_bh)->b_data + ocfs2_dir_trailer_blk_off((_sb)))) + +/* + * XXX: This is executed once on every dirent. We should consider optimizing + * it. + */ +static int ocfs2_skip_dir_trailer(struct inode *dir, + struct ocfs2_dir_entry *de, + unsigned long offset, + unsigned long blklen) +{ + unsigned long toff = blklen - sizeof(struct ocfs2_dir_block_trailer); + + if (!ocfs2_dir_has_trailer(dir)) + return 0; + + if (offset != toff) + return 0; + + return 1; +} + +static void ocfs2_init_dir_trailer(struct inode *inode, + struct buffer_head *bh) +{ + struct ocfs2_dir_block_trailer *trailer; + + trailer = ocfs2_trailer_from_bh(bh, inode->i_sb); + strcpy(trailer->db_signature, OCFS2_DIR_TRAILER_SIGNATURE); + trailer->db_compat_rec_len = + cpu_to_le16(sizeof(struct ocfs2_dir_block_trailer)); + trailer->db_parent_dinode = cpu_to_le64(OCFS2_I(inode)->ip_blkno); + trailer->db_blkno = cpu_to_le64(bh->b_blocknr); +} + /* * bh passed here can be an inode block or a dir data block, depending * on the inode inline data flag. @@ -232,16 +289,60 @@ static int ocfs2_read_dir_block(struct inode *inode, u64 v_block, { int rc = 0; struct buffer_head *tmp = *bh; + struct ocfs2_dir_block_trailer *trailer; rc = ocfs2_read_virt_blocks(inode, v_block, 1, &tmp, flags, ocfs2_validate_dir_block); - if (rc) + if (rc) { mlog_errno(rc); + goto out; + } + + /* + * We check the trailer here rather than in + * ocfs2_validate_dir_block() because that function doesn't have + * the inode to test. + */ + if (!(flags & OCFS2_BH_READAHEAD) && + ocfs2_dir_has_trailer(inode)) { + trailer = ocfs2_trailer_from_bh(tmp, inode->i_sb); + if (!OCFS2_IS_VALID_DIR_TRAILER(trailer)) { + rc = -EINVAL; + ocfs2_error(inode->i_sb, + "Invalid dirblock #%llu: " + "signature = %.*s\n", + (unsigned long long)tmp->b_blocknr, 7, + trailer->db_signature); + goto out; + } + if (le64_to_cpu(trailer->db_blkno) != tmp->b_blocknr) { + rc = -EINVAL; + ocfs2_error(inode->i_sb, + "Directory block #%llu has an invalid " + "db_blkno of %llu", + (unsigned long long)tmp->b_blocknr, + (unsigned long long)le64_to_cpu(trailer->db_blkno)); + goto out; + } + if (le64_to_cpu(trailer->db_parent_dinode) != + OCFS2_I(inode)->ip_blkno) { + rc = -EINVAL; + ocfs2_error(inode->i_sb, + "Directory block #%llu on dinode " + "#%llu has an invalid parent_dinode " + "of %llu", + (unsigned long long)tmp->b_blocknr, + (unsigned long long)OCFS2_I(inode)->ip_blkno, + (unsigned long long)le64_to_cpu(trailer->db_blkno)); + goto out; + } + } /* If ocfs2_read_virt_blocks() got us a new bh, pass it up. */ - if (!rc && !*bh) + if (!*bh) *bh = tmp; +out: return rc ? -EIO : 0; } @@ -581,6 +682,16 @@ int __ocfs2_add_entry(handle_t *handle, goto bail; } + /* We're guaranteed that we should have space, so we + * can't possibly have hit the trailer...right? */ + mlog_bug_on_msg(ocfs2_skip_dir_trailer(dir, de, offset, size), + "Hit dir trailer trying to insert %.*s " + "(namelen %d) into directory %llu. " + "offset is %lu, trailer offset is %d\n", + namelen, name, namelen, + (unsigned long long)parent_fe_bh->b_blocknr, + offset, ocfs2_dir_trailer_blk_off(dir->i_sb)); + if (ocfs2_dirent_would_fit(de, rec_len)) { dir->i_mtime = dir->i_ctime = CURRENT_TIME; retval = ocfs2_mark_inode_dirty(handle, dir, parent_fe_bh); @@ -622,6 +733,7 @@ int __ocfs2_add_entry(handle_t *handle, retval = 0; goto bail; } + offset += le16_to_cpu(de->rec_len); de = (struct ocfs2_dir_entry *) ((char *) de + le16_to_cpu(de->rec_len)); } @@ -1059,9 +1171,15 @@ int ocfs2_empty_dir(struct inode *inode) return !priv.seen_other; } -static void ocfs2_fill_initial_dirents(struct inode *inode, - struct inode *parent, - char *start, unsigned int size) +/* + * Fills "." and ".." dirents in a new directory block. Returns dirent for + * "..", which might be used during creation of a directory with a trailing + * header. It is otherwise safe to ignore the return code. + */ +static struct ocfs2_dir_entry *ocfs2_fill_initial_dirents(struct inode *inode, + struct inode *parent, + char *start, + unsigned int size) { struct ocfs2_dir_entry *de = (struct ocfs2_dir_entry *)start; @@ -1078,6 +1196,8 @@ static void ocfs2_fill_initial_dirents(struct inode *inode, de->name_len = 2; strcpy(de->name, ".."); ocfs2_set_de_type(de, S_IFDIR); + + return de; } /* @@ -1130,10 +1250,15 @@ static int ocfs2_fill_new_dir_el(struct ocfs2_super *osb, struct ocfs2_alloc_context *data_ac) { int status; + unsigned int size = osb->sb->s_blocksize; struct buffer_head *new_bh = NULL; + struct ocfs2_dir_entry *de; mlog_entry_void(); + if (ocfs2_supports_dir_trailer(osb)) + size = ocfs2_dir_trailer_blk_off(parent->i_sb); + status = ocfs2_do_extend_dir(osb->sb, handle, inode, fe_bh, data_ac, NULL, &new_bh); if (status < 0) { @@ -1151,8 +1276,9 @@ static int ocfs2_fill_new_dir_el(struct ocfs2_super *osb, } memset(new_bh->b_data, 0, osb->sb->s_blocksize); - ocfs2_fill_initial_dirents(inode, parent, new_bh->b_data, - osb->sb->s_blocksize); + de = ocfs2_fill_initial_dirents(inode, parent, new_bh->b_data, size); + if (ocfs2_supports_dir_trailer(osb)) + ocfs2_init_dir_trailer(inode, new_bh); status = ocfs2_journal_dirty(handle, new_bh); if (status < 0) { @@ -1193,13 +1319,27 @@ int ocfs2_fill_new_dir(struct ocfs2_super *osb, data_ac); } +/* + * Expand rec_len of the rightmost dirent in a directory block so that it + * contains the end of our valid space for dirents. We do this during + * expansion from an inline directory to one with extents. The first dir block + * in that case is taken from the inline data portion of the inode block. + * + * We add the dir trailer if this filesystem wants it. + */ static void ocfs2_expand_last_dirent(char *start, unsigned int old_size, - unsigned int new_size) + struct super_block *sb) { struct ocfs2_dir_entry *de; struct ocfs2_dir_entry *prev_de; char *de_buf, *limit; - unsigned int bytes = new_size - old_size; + unsigned int new_size = sb->s_blocksize; + unsigned int bytes; + + if (ocfs2_supports_dir_trailer(OCFS2_SB(sb))) + new_size = ocfs2_dir_trailer_blk_off(sb); + + bytes = new_size - old_size; limit = start + old_size; de_buf = start; @@ -1316,8 +1456,9 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh, memcpy(dirdata_bh->b_data, di->id2.i_data.id_data, i_size_read(dir)); memset(dirdata_bh->b_data + i_size_read(dir), 0, sb->s_blocksize - i_size_read(dir)); - ocfs2_expand_last_dirent(dirdata_bh->b_data, i_size_read(dir), - sb->s_blocksize); + ocfs2_expand_last_dirent(dirdata_bh->b_data, i_size_read(dir), sb); + if (ocfs2_supports_dir_trailer(osb)) + ocfs2_init_dir_trailer(dir, dirdata_bh); ret = ocfs2_journal_dirty(handle, dirdata_bh); if (ret) { @@ -1604,9 +1745,15 @@ do_extend: goto bail; } memset(new_bh->b_data, 0, sb->s_blocksize); + de = (struct ocfs2_dir_entry *) new_bh->b_data; de->inode = 0; - de->rec_len = cpu_to_le16(sb->s_blocksize); + if (ocfs2_dir_has_trailer(dir)) { + de->rec_len = cpu_to_le16(ocfs2_dir_trailer_blk_off(sb)); + ocfs2_init_dir_trailer(dir, new_bh); + } else { + de->rec_len = cpu_to_le16(sb->s_blocksize); + } status = ocfs2_journal_dirty(handle, new_bh); if (status < 0) { mlog_errno(status); @@ -1648,11 +1795,21 @@ static int ocfs2_find_dir_space_id(struct inode *dir, struct buffer_head *di_bh, unsigned int *blocks_wanted) { int ret; + struct super_block *sb = dir->i_sb; struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; struct ocfs2_dir_entry *de, *last_de = NULL; char *de_buf, *limit; unsigned long offset = 0; - unsigned int rec_len, new_rec_len; + unsigned int rec_len, new_rec_len, free_space = dir->i_sb->s_blocksize; + + /* + * This calculates how many free bytes we'd have in block zero, should + * this function force expansion to an extent tree. + */ + if (ocfs2_supports_dir_trailer(OCFS2_SB(sb))) + free_space = ocfs2_dir_trailer_blk_off(sb) - i_size_read(dir); + else + free_space = dir->i_sb->s_blocksize - i_size_read(dir); de_buf = di->id2.i_data.id_data; limit = de_buf + i_size_read(dir); @@ -1669,6 +1826,11 @@ static int ocfs2_find_dir_space_id(struct inode *dir, struct buffer_head *di_bh, ret = -EEXIST; goto out; } + /* + * No need to check for a trailing dirent record here as + * they're not used for inline dirs. + */ + if (ocfs2_dirent_would_fit(de, rec_len)) { /* Ok, we found a spot. Return this bh and let * the caller actually fill it in. */ @@ -1689,7 +1851,7 @@ static int ocfs2_find_dir_space_id(struct inode *dir, struct buffer_head *di_bh, * dirent can be found. */ *blocks_wanted = 1; - new_rec_len = le16_to_cpu(last_de->rec_len) + (dir->i_sb->s_blocksize - i_size_read(dir)); + new_rec_len = le16_to_cpu(last_de->rec_len) + free_space; if (new_rec_len < (rec_len + OCFS2_DIR_REC_LEN(last_de->name_len))) *blocks_wanted = 2; @@ -1707,6 +1869,7 @@ static int ocfs2_find_dir_space_el(struct inode *dir, const char *name, struct ocfs2_dir_entry *de; struct super_block *sb = dir->i_sb; int status; + int blocksize = dir->i_sb->s_blocksize; status = ocfs2_read_dir_block(dir, 0, &bh, 0); if (status) { @@ -1748,6 +1911,11 @@ static int ocfs2_find_dir_space_el(struct inode *dir, const char *name, status = -EEXIST; goto bail; } + + if (ocfs2_skip_dir_trailer(dir, de, offset % blocksize, + blocksize)) + goto next; + if (ocfs2_dirent_would_fit(de, rec_len)) { /* Ok, we found a spot. Return this bh and let * the caller actually fill it in. */ @@ -1756,6 +1924,7 @@ static int ocfs2_find_dir_space_el(struct inode *dir, const char *name, status = 0; goto bail; } +next: offset += le16_to_cpu(de->rec_len); de = (struct ocfs2_dir_entry *)((char *) de + le16_to_cpu(de->rec_len)); } diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index bad87d0a03c..ad5c24a29ed 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h @@ -470,6 +470,9 @@ static inline int ocfs2_uses_extended_slot_map(struct ocfs2_super *osb) #define OCFS2_IS_VALID_XATTR_BLOCK(ptr) \ (!strcmp((ptr)->xb_signature, OCFS2_XATTR_BLOCK_SIGNATURE)) +#define OCFS2_IS_VALID_DIR_TRAILER(ptr) \ + (!strcmp((ptr)->db_signature, OCFS2_DIR_TRAILER_SIGNATURE)) + static inline unsigned long ino_from_blkno(struct super_block *sb, u64 blkno) { diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h index 290fa26fba6..af0013b9c17 100644 --- a/fs/ocfs2/ocfs2_fs.h +++ b/fs/ocfs2/ocfs2_fs.h @@ -65,6 +65,7 @@ #define OCFS2_EXTENT_BLOCK_SIGNATURE "EXBLK01" #define OCFS2_GROUP_DESC_SIGNATURE "GROUP01" #define OCFS2_XATTR_BLOCK_SIGNATURE "XATTR01" +#define OCFS2_DIR_TRAILER_SIGNATURE "DIRTRL1" /* Compatibility flags */ #define OCFS2_HAS_COMPAT_FEATURE(sb,mask) \ @@ -751,6 +752,34 @@ struct ocfs2_dir_entry { /* Actual on-disk length specified by rec_len */ } __attribute__ ((packed)); +/* + * Per-block record for the unindexed directory btree. This is carefully + * crafted so that the rec_len and name_len records of an ocfs2_dir_entry are + * mirrored. That way, the directory manipulation code needs a minimal amount + * of update. + * + * NOTE: Keep this structure aligned to a multiple of 4 bytes. + */ +struct ocfs2_dir_block_trailer { +/*00*/ __le64 db_compat_inode; /* Always zero. Was inode */ + + __le16 db_compat_rec_len; /* Backwards compatible with + * ocfs2_dir_entry. */ + __u8 db_compat_name_len; /* Always zero. Was name_len */ + __u8 db_reserved0; + __le16 db_reserved1; + __le16 db_free_rec_len; /* Size of largest empty hole + * in this block. (unused) */ +/*10*/ __u8 db_signature[8]; /* Signature for verification */ + __le64 db_reserved2; + __le64 db_free_next; /* Next block in list (unused) */ +/*20*/ __le64 db_blkno; /* Offset on disk, in blocks */ + __le64 db_parent_dinode; /* dinode which owns me, in + blocks */ +/*30*/ __le64 db_check; /* Error checking */ +/*40*/ +}; + /* * On disk allocator group structure for OCFS2 */ -- cgit v1.2.3-70-g09d2 From c175a518b4a1d514483abf61813ce5d855917164 Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Wed, 10 Dec 2008 17:58:22 -0800 Subject: ocfs2: Checksum and ECC for directory blocks. Use the db_check field of ocfs2_dir_block_trailer to crc/ecc the dirblocks. Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/dir.c | 37 +++++++++++++++++++++++++++++++++++-- fs/ocfs2/dir.h | 2 ++ fs/ocfs2/journal.c | 31 +++++++++++++++++++++++++++++-- fs/ocfs2/ocfs2_fs.h | 2 +- 4 files changed, 67 insertions(+), 5 deletions(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index 1efd0ab680c..f2c4098cf33 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c @@ -48,6 +48,7 @@ #include "ocfs2.h" #include "alloc.h" +#include "blockcheck.h" #include "dir.h" #include "dlmglue.h" #include "extent_map.h" @@ -107,6 +108,17 @@ static inline unsigned int ocfs2_dir_trailer_blk_off(struct super_block *sb) #define ocfs2_trailer_from_bh(_bh, _sb) ((struct ocfs2_dir_block_trailer *) ((_bh)->b_data + ocfs2_dir_trailer_blk_off((_sb)))) +/* XXX ocfs2_block_dqtrailer() is similar but not quite - can we make + * them more consistent? */ +struct ocfs2_dir_block_trailer *ocfs2_dir_trailer_from_size(int blocksize, + void *data) +{ + char *p = data; + + p += blocksize - sizeof(struct ocfs2_dir_block_trailer); + return (struct ocfs2_dir_block_trailer *)p; +} + /* * XXX: This is executed once on every dirent. We should consider optimizing * it. @@ -268,14 +280,35 @@ out: static int ocfs2_validate_dir_block(struct super_block *sb, struct buffer_head *bh) { + int rc; + struct ocfs2_dir_block_trailer *trailer = + ocfs2_trailer_from_bh(bh, sb); + + /* - * Nothing yet. We don't validate dirents here, that's handled + * We don't validate dirents here, that's handled * in-place when the code walks them. */ mlog(0, "Validating dirblock %llu\n", (unsigned long long)bh->b_blocknr); - return 0; + BUG_ON(!buffer_uptodate(bh)); + + /* + * If the ecc fails, we return the error but otherwise + * leave the filesystem running. We know any error is + * local to this block. + * + * Note that we are safe to call this even if the directory + * doesn't have a trailer. Filesystems without metaecc will do + * nothing, and filesystems with it will have one. + */ + rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &trailer->db_check); + if (rc) + mlog(ML_ERROR, "Checksum failed for dinode %llu\n", + (unsigned long long)bh->b_blocknr); + + return rc; } /* diff --git a/fs/ocfs2/dir.h b/fs/ocfs2/dir.h index ce48b9080d8..c511e2e18e9 100644 --- a/fs/ocfs2/dir.h +++ b/fs/ocfs2/dir.h @@ -83,4 +83,6 @@ int ocfs2_fill_new_dir(struct ocfs2_super *osb, struct buffer_head *fe_bh, struct ocfs2_alloc_context *data_ac); +struct ocfs2_dir_block_trailer *ocfs2_dir_trailer_from_size(int blocksize, + void *data); #endif /* OCFS2_DIR_H */ diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index 3b54dba0f74..57d7d25a2b9 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -415,6 +415,26 @@ static void ocfs2_dq_commit_trigger(struct jbd2_buffer_trigger_type *triggers, ocfs2_block_check_compute(data, size, &dqt->dq_check); } +/* + * Directory blocks also have their own trigger because the + * struct ocfs2_block_check offset depends on the blocksize. + */ +static void ocfs2_db_commit_trigger(struct jbd2_buffer_trigger_type *triggers, + struct buffer_head *bh, + void *data, size_t size) +{ + struct ocfs2_dir_block_trailer *trailer = + ocfs2_dir_trailer_from_size(size, data); + + /* + * We aren't guaranteed to have the superblock here, so we + * must unconditionally compute the ecc data. + * __ocfs2_journal_access() will only set the triggers if + * metaecc is enabled. + */ + ocfs2_block_check_compute(data, size, &trailer->db_check); +} + static void ocfs2_abort_trigger(struct jbd2_buffer_trigger_type *triggers, struct buffer_head *bh) { @@ -454,6 +474,13 @@ static struct ocfs2_triggers gd_triggers = { .ot_offset = offsetof(struct ocfs2_group_desc, bg_check), }; +static struct ocfs2_triggers db_triggers = { + .ot_triggers = { + .t_commit = ocfs2_db_commit_trigger, + .t_abort = ocfs2_abort_trigger, + }, +}; + static struct ocfs2_triggers xb_triggers = { .ot_triggers = { .t_commit = ocfs2_commit_trigger, @@ -555,8 +582,8 @@ int ocfs2_journal_access_gd(handle_t *handle, struct inode *inode, int ocfs2_journal_access_db(handle_t *handle, struct inode *inode, struct buffer_head *bh, int type) { - /* Right now, nothing for dirblocks */ - return __ocfs2_journal_access(handle, inode, bh, NULL, type); + return __ocfs2_journal_access(handle, inode, bh, &db_triggers, + type); } int ocfs2_journal_access_xb(handle_t *handle, struct inode *inode, diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h index af0013b9c17..698ef3d2712 100644 --- a/fs/ocfs2/ocfs2_fs.h +++ b/fs/ocfs2/ocfs2_fs.h @@ -776,7 +776,7 @@ struct ocfs2_dir_block_trailer { /*20*/ __le64 db_blkno; /* Offset on disk, in blocks */ __le64 db_parent_dinode; /* dinode which owns me, in blocks */ -/*30*/ __le64 db_check; /* Error checking */ +/*30*/ struct ocfs2_block_check db_check; /* Error checking */ /*40*/ }; -- cgit v1.2.3-70-g09d2 From d030cc978e9e636dc39ce9a9e8282d48698a3b30 Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Thu, 11 Dec 2008 15:04:14 -0800 Subject: ocfs2: Validate superblock with checksum and ecc. The superblock is read via a raw call. Validate it after we find it from its signature. Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/super.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 2eb657c3e7a..43ed11345b5 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -52,6 +52,7 @@ #include "ocfs1_fs_compat.h" #include "alloc.h" +#include "blockcheck.h" #include "dlmglue.h" #include "export.h" #include "extent_map.h" @@ -1989,6 +1990,15 @@ static int ocfs2_verify_volume(struct ocfs2_dinode *di, if (memcmp(di->i_signature, OCFS2_SUPER_BLOCK_SIGNATURE, strlen(OCFS2_SUPER_BLOCK_SIGNATURE)) == 0) { + /* We have to do a raw check of the feature here */ + if (le32_to_cpu(di->id2.i_super.s_feature_incompat) & + OCFS2_FEATURE_INCOMPAT_META_ECC) { + status = ocfs2_block_check_validate(bh->b_data, + bh->b_size, + &di->i_check); + if (status) + goto out; + } status = -EINVAL; if ((1 << le32_to_cpu(di->id2.i_super.s_blocksize_bits)) != blksz) { mlog(ML_ERROR, "found superblock with incorrect block " @@ -2030,6 +2040,7 @@ static int ocfs2_verify_volume(struct ocfs2_dinode *di, } } +out: mlog_exit(status); return status; } -- cgit v1.2.3-70-g09d2 From 9d28cfb73f3abccce001daf2d247b16bf20e2248 Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Thu, 16 Oct 2008 17:53:29 -0700 Subject: ocfs2: Enable metadata checksums. Add OCFS2_FEATURE_INCOMPAT_META_ECC to the list of supported features. Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/ocfs2_fs.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h index 698ef3d2712..c7ae45aaa36 100644 --- a/fs/ocfs2/ocfs2_fs.h +++ b/fs/ocfs2/ocfs2_fs.h @@ -94,7 +94,8 @@ | OCFS2_FEATURE_INCOMPAT_INLINE_DATA \ | OCFS2_FEATURE_INCOMPAT_EXTENDED_SLOT_MAP \ | OCFS2_FEATURE_INCOMPAT_USERSPACE_STACK \ - | OCFS2_FEATURE_INCOMPAT_XATTR) + | OCFS2_FEATURE_INCOMPAT_XATTR \ + | OCFS2_FEATURE_INCOMPAT_META_ECC) #define OCFS2_FEATURE_RO_COMPAT_SUPP (OCFS2_FEATURE_RO_COMPAT_UNWRITTEN \ | OCFS2_FEATURE_RO_COMPAT_USRQUOTA \ | OCFS2_FEATURE_RO_COMPAT_GRPQUOTA) -- cgit v1.2.3-70-g09d2 From e798b3f8a920c82a8e556dd54df97f0d3d0f9144 Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Mon, 15 Dec 2008 17:13:48 -0800 Subject: ocfs2: Don't hand-code xor in ocfs2_hamming_encode(). When I wrote ocfs2_hamming_encode(), I was following documentation of the algorithm and didn't have quite the (possibly still imperfect) grasp of it I do now. As part of this, I literally hand-coded xor. I would test a bit, and then add that bit via xor to the parity word. I can, of course, just do a single xor of the parity word and the source word (the code buffer bit offset). This cuts CPU usage by 53% on a mostly populated buffer (an inode containing utmp.h inline). Joel Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/blockcheck.c | 67 +++++++++++++++------------------------------------ 1 file changed, 20 insertions(+), 47 deletions(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/blockcheck.c b/fs/ocfs2/blockcheck.c index 2ce6ae5e4b8..1d5083cef3a 100644 --- a/fs/ocfs2/blockcheck.c +++ b/fs/ocfs2/blockcheck.c @@ -31,7 +31,6 @@ #include "blockcheck.h" - /* * We use the following conventions: * @@ -39,26 +38,6 @@ * p = # parity bits * c = # total code bits (d + p) */ -static int calc_parity_bits(unsigned int d) -{ - unsigned int p; - - /* - * Bits required for Single Error Correction is as follows: - * - * d + p + 1 <= 2^p - * - * We're restricting ourselves to 31 bits of parity, that should be - * sufficient. - */ - for (p = 1; p < 32; p++) - { - if ((d + p + 1) <= (1 << p)) - return p; - } - - return 0; -} /* * Calculate the bit offset in the hamming code buffer based on the bit's @@ -109,10 +88,9 @@ static unsigned int calc_code_bit(unsigned int i) */ u32 ocfs2_hamming_encode(u32 parity, void *data, unsigned int d, unsigned int nr) { - unsigned int p = calc_parity_bits(nr + d); - unsigned int i, j, b; + unsigned int i, b; - BUG_ON(!p); + BUG_ON(!d); /* * b is the hamming code bit number. Hamming code specifies a @@ -131,27 +109,23 @@ u32 ocfs2_hamming_encode(u32 parity, void *data, unsigned int d, unsigned int nr */ b = calc_code_bit(nr + i); - for (j = 0; j < p; j++) - { - /* - * Data bits in the resultant code are checked by - * parity bits that are part of the bit number - * representation. Huh? - * - * - * In other words, the parity bit at position 2^k - * checks bits in positions having bit k set in - * their binary representation. Conversely, for - * instance, bit 13, i.e. 1101(2), is checked by - * bits 1000(2) = 8, 0100(2)=4 and 0001(2) = 1. - * - * - * Note that 'k' is the _code_ bit number. 'b' in - * our loop. - */ - if (b & (1 << j)) - parity ^= (1 << j); - } + /* + * Data bits in the resultant code are checked by + * parity bits that are part of the bit number + * representation. Huh? + * + * + * In other words, the parity bit at position 2^k + * checks bits in positions having bit k set in + * their binary representation. Conversely, for + * instance, bit 13, i.e. 1101(2), is checked by + * bits 1000(2) = 8, 0100(2)=4 and 0001(2) = 1. + * + * + * Note that 'k' is the _code_ bit number. 'b' in + * our loop. + */ + parity ^= b; } /* While the data buffer was treated as little endian, the @@ -174,10 +148,9 @@ u32 ocfs2_hamming_encode_block(void *data, unsigned int blocksize) void ocfs2_hamming_fix(void *data, unsigned int d, unsigned int nr, unsigned int fix) { - unsigned int p = calc_parity_bits(nr + d); unsigned int i, b; - BUG_ON(!p); + BUG_ON(!d); /* * If the bit to fix has an hweight of 1, it's a parity bit. One -- cgit v1.2.3-70-g09d2 From 7bb458a58588f397068e4166c615e9fcc7480c16 Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Mon, 15 Dec 2008 18:24:33 -0800 Subject: ocfs2: Another hamming code optimization. In the calc_code_bit() function, we must find all powers of two beneath the code bit number, *after* it's shifted by those powers of two. This requires a loop to see where it ends up. We can optimize it by starting at its most significant bit. This shaves 32% off the time, for a total of 67.6% shaved off of the original, naive implementation. Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/blockcheck.c | 40 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 39 insertions(+), 1 deletion(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/blockcheck.c b/fs/ocfs2/blockcheck.c index 1d5083cef3a..f102ec939c9 100644 --- a/fs/ocfs2/blockcheck.c +++ b/fs/ocfs2/blockcheck.c @@ -39,6 +39,35 @@ * c = # total code bits (d + p) */ + +/* + * Find the log base 2 of 32-bit v. + * + * Algorithm found on http://graphics.stanford.edu/~seander/bithacks.html, + * by Sean Eron Anderson. Code on the page is in the public domain unless + * otherwise noted. + * + * This particular algorithm is credited to Eric Cole. + */ +static int find_highest_bit_set(unsigned int v) +{ + + static const int MultiplyDeBruijnBitPosition[32] = + { + 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8, + 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9 + }; + + v |= v >> 1; /* first round down to power of 2 */ + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + v = (v >> 1) + 1; + + return MultiplyDeBruijnBitPosition[(u32)(v * 0x077CB531UL) >> 27]; +} + /* * Calculate the bit offset in the hamming code buffer based on the bit's * offset in the data buffer. Since the hamming code reserves all @@ -63,13 +92,22 @@ static unsigned int calc_code_bit(unsigned int i) */ b = i + 1; + /* + * As a cheat, we know that all bits below b's highest bit must be + * parity bits, so we can start there. + */ + p = find_highest_bit_set(b); + b += p; + /* * For every power of two below our bit number, bump our bit. * * We compare with (b + 1) becuase we have to compare with what b * would be _if_ it were bumped up by the parity bit. Capice? + * + * We start p at 2^p because of the cheat above. */ - for (p = 0; (1 << p) < (b + 1); p++) + for (p = (1 << p); p < (b + 1); p <<= 1) b++; return b; -- cgit v1.2.3-70-g09d2 From 58896c4d0e5868360ea0693c607d5bf74f79da6b Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Tue, 16 Dec 2008 13:54:40 -0800 Subject: ocfs2: One more hamming code optimization. The previous optimization used a fast find-highest-bit-set operation to give us a good starting point in calc_code_bit(). This version lets the caller cache the previous code buffer bit offset. Thus, the next call always starts where the last one left off. This reduces the calculation another 39%, for a total 80% reduction from the original, naive implementation. At least, on my machine. This also brings the parity calculation to within an order of magnitude of the crc32 calculation. Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/blockcheck.c | 61 ++++++++++++++++----------------------------------- 1 file changed, 19 insertions(+), 42 deletions(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/blockcheck.c b/fs/ocfs2/blockcheck.c index f102ec939c9..2a947c44e59 100644 --- a/fs/ocfs2/blockcheck.c +++ b/fs/ocfs2/blockcheck.c @@ -40,34 +40,6 @@ */ -/* - * Find the log base 2 of 32-bit v. - * - * Algorithm found on http://graphics.stanford.edu/~seander/bithacks.html, - * by Sean Eron Anderson. Code on the page is in the public domain unless - * otherwise noted. - * - * This particular algorithm is credited to Eric Cole. - */ -static int find_highest_bit_set(unsigned int v) -{ - - static const int MultiplyDeBruijnBitPosition[32] = - { - 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8, - 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9 - }; - - v |= v >> 1; /* first round down to power of 2 */ - v |= v >> 2; - v |= v >> 4; - v |= v >> 8; - v |= v >> 16; - v = (v >> 1) + 1; - - return MultiplyDeBruijnBitPosition[(u32)(v * 0x077CB531UL) >> 27]; -} - /* * Calculate the bit offset in the hamming code buffer based on the bit's * offset in the data buffer. Since the hamming code reserves all @@ -81,10 +53,14 @@ static int find_highest_bit_set(unsigned int v) * so it's a parity bit. 2 is a power of two (2^1), so it's a parity bit. * 3 is not a power of two. So bit 1 of the data buffer ends up as bit 3 * in the code buffer. + * + * The caller can pass in *p if it wants to keep track of the most recent + * number of parity bits added. This allows the function to start the + * calculation at the last place. */ -static unsigned int calc_code_bit(unsigned int i) +static unsigned int calc_code_bit(unsigned int i, unsigned int *p_cache) { - unsigned int b, p; + unsigned int b, p = 0; /* * Data bits are 0-based, but we're talking code bits, which @@ -92,24 +68,25 @@ static unsigned int calc_code_bit(unsigned int i) */ b = i + 1; - /* - * As a cheat, we know that all bits below b's highest bit must be - * parity bits, so we can start there. - */ - p = find_highest_bit_set(b); + /* Use the cache if it is there */ + if (p_cache) + p = *p_cache; b += p; /* * For every power of two below our bit number, bump our bit. * - * We compare with (b + 1) becuase we have to compare with what b + * We compare with (b + 1) because we have to compare with what b * would be _if_ it were bumped up by the parity bit. Capice? * - * We start p at 2^p because of the cheat above. + * p is set above. */ - for (p = (1 << p); p < (b + 1); p <<= 1) + for (; (1 << p) < (b + 1); p++) b++; + if (p_cache) + *p_cache = p; + return b; } @@ -126,7 +103,7 @@ static unsigned int calc_code_bit(unsigned int i) */ u32 ocfs2_hamming_encode(u32 parity, void *data, unsigned int d, unsigned int nr) { - unsigned int i, b; + unsigned int i, b, p = 0; BUG_ON(!d); @@ -145,7 +122,7 @@ u32 ocfs2_hamming_encode(u32 parity, void *data, unsigned int d, unsigned int nr * i is the offset in this hunk, nr + i is the total bit * offset. */ - b = calc_code_bit(nr + i); + b = calc_code_bit(nr + i, &p); /* * Data bits in the resultant code are checked by @@ -201,7 +178,7 @@ void ocfs2_hamming_fix(void *data, unsigned int d, unsigned int nr, * nr + d is the bit right past the data hunk we're looking at. * If fix after that, nothing to do */ - if (fix >= calc_code_bit(nr + d)) + if (fix >= calc_code_bit(nr + d, NULL)) return; /* @@ -209,7 +186,7 @@ void ocfs2_hamming_fix(void *data, unsigned int d, unsigned int nr, * start b at the offset in the code buffer. See hamming_encode() * for a more detailed description of 'b'. */ - b = calc_code_bit(nr); + b = calc_code_bit(nr, NULL); /* If the fix is before this hunk, nothing to do */ if (fix < b) return; -- cgit v1.2.3-70-g09d2 From 2b83256407687613e906bee93d98a25339128a4d Mon Sep 17 00:00:00 2001 From: Sunil Mushran Date: Tue, 16 Dec 2008 15:49:19 -0800 Subject: ocfs2/dlm: Fix a race between migrate request and exit domain Patch address a racing migrate request message and an exit domain message. Instead of blocking exit domains for the duration of the migrate, we ignore failure to deliver that message. This is because an exiting domain should not have any active locks and thus has no role to play in the migration. Signed-off-by: Sunil Mushran Signed-off-by: Mark Fasheh --- fs/ocfs2/dlm/dlmmaster.c | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index 44f87caf368..92fd1d7d612 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c @@ -2949,7 +2949,7 @@ static int dlm_do_migrate_request(struct dlm_ctxt *dlm, struct dlm_node_iter *iter) { struct dlm_migrate_request migrate; - int ret, status = 0; + int ret, skip, status = 0; int nodenum; memset(&migrate, 0, sizeof(migrate)); @@ -2966,12 +2966,27 @@ static int dlm_do_migrate_request(struct dlm_ctxt *dlm, nodenum == new_master) continue; + /* We could race exit domain. If exited, skip. */ + spin_lock(&dlm->spinlock); + skip = (!test_bit(nodenum, dlm->domain_map)); + spin_unlock(&dlm->spinlock); + if (skip) { + clear_bit(nodenum, iter->node_map); + continue; + } + ret = o2net_send_message(DLM_MIGRATE_REQUEST_MSG, dlm->key, &migrate, sizeof(migrate), nodenum, &status); - if (ret < 0) - mlog_errno(ret); - else if (status < 0) { + if (ret < 0) { + mlog(0, "migrate_request returned %d!\n", ret); + if (!dlm_is_host_down(ret)) { + mlog(ML_ERROR, "unhandled error=%d!\n", ret); + BUG(); + } + clear_bit(nodenum, iter->node_map); + ret = 0; + } else if (status < 0) { mlog(0, "migrate request (node %u) returned %d!\n", nodenum, status); ret = status; -- cgit v1.2.3-70-g09d2 From 57dff2676eb68d805883a2204faaa5339ac44e03 Mon Sep 17 00:00:00 2001 From: Sunil Mushran Date: Tue, 16 Dec 2008 15:49:20 -0800 Subject: ocfs2/dlm: Clean up errors in dlm_proxy_ast_handler() Patch cleans printed errors in dlm_proxy_ast_handler(). The errors now includes the node number that sent the (b)ast. Also it reduces the number of endian swaps of the cookie. Signed-off-by: Sunil Mushran Signed-off-by: Mark Fasheh --- fs/ocfs2/dlm/dlmast.c | 52 ++++++++++++++++++++++++++------------------------- 1 file changed, 27 insertions(+), 25 deletions(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/dlm/dlmast.c b/fs/ocfs2/dlm/dlmast.c index 644bee55d8b..d07ddbe4b28 100644 --- a/fs/ocfs2/dlm/dlmast.c +++ b/fs/ocfs2/dlm/dlmast.c @@ -275,6 +275,7 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data, struct list_head *iter, *head=NULL; u64 cookie; u32 flags; + u8 node; if (!dlm_grab(dlm)) { dlm_error(DLM_REJECTED); @@ -286,18 +287,21 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data, name = past->name; locklen = past->namelen; - cookie = be64_to_cpu(past->cookie); + cookie = past->cookie; flags = be32_to_cpu(past->flags); + node = past->node_idx; if (locklen > DLM_LOCKID_NAME_MAX) { ret = DLM_IVBUFLEN; - mlog(ML_ERROR, "Invalid name length in proxy ast handler!\n"); + mlog(ML_ERROR, "Invalid name length (%d) in proxy ast " + "handler!\n", locklen); goto leave; } if ((flags & (LKM_PUT_LVB|LKM_GET_LVB)) == (LKM_PUT_LVB|LKM_GET_LVB)) { - mlog(ML_ERROR, "both PUT and GET lvb specified\n"); + mlog(ML_ERROR, "Both PUT and GET lvb specified, (0x%x)\n", + flags); ret = DLM_BADARGS; goto leave; } @@ -310,22 +314,21 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data, if (past->type != DLM_AST && past->type != DLM_BAST) { mlog(ML_ERROR, "Unknown ast type! %d, cookie=%u:%llu" - "name=%.*s\n", past->type, - dlm_get_lock_cookie_node(cookie), - dlm_get_lock_cookie_seq(cookie), - locklen, name); + "name=%.*s, node=%u\n", past->type, + dlm_get_lock_cookie_node(be64_to_cpu(cookie)), + dlm_get_lock_cookie_seq(be64_to_cpu(cookie)), + locklen, name, node); ret = DLM_IVLOCKID; goto leave; } res = dlm_lookup_lockres(dlm, name, locklen); if (!res) { - mlog(0, "got %sast for unknown lockres! " - "cookie=%u:%llu, name=%.*s, namelen=%u\n", - past->type == DLM_AST ? "" : "b", - dlm_get_lock_cookie_node(cookie), - dlm_get_lock_cookie_seq(cookie), - locklen, name, locklen); + mlog(0, "Got %sast for unknown lockres! cookie=%u:%llu, " + "name=%.*s, node=%u\n", (past->type == DLM_AST ? "" : "b"), + dlm_get_lock_cookie_node(be64_to_cpu(cookie)), + dlm_get_lock_cookie_seq(be64_to_cpu(cookie)), + locklen, name, node); ret = DLM_IVLOCKID; goto leave; } @@ -337,12 +340,12 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data, spin_lock(&res->spinlock); if (res->state & DLM_LOCK_RES_RECOVERING) { - mlog(0, "responding with DLM_RECOVERING!\n"); + mlog(0, "Responding with DLM_RECOVERING!\n"); ret = DLM_RECOVERING; goto unlock_out; } if (res->state & DLM_LOCK_RES_MIGRATING) { - mlog(0, "responding with DLM_MIGRATING!\n"); + mlog(0, "Responding with DLM_MIGRATING!\n"); ret = DLM_MIGRATING; goto unlock_out; } @@ -351,7 +354,7 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data, lock = NULL; list_for_each(iter, head) { lock = list_entry (iter, struct dlm_lock, list); - if (be64_to_cpu(lock->ml.cookie) == cookie) + if (lock->ml.cookie == cookie) goto do_ast; } @@ -363,15 +366,15 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data, list_for_each(iter, head) { lock = list_entry (iter, struct dlm_lock, list); - if (be64_to_cpu(lock->ml.cookie) == cookie) + if (lock->ml.cookie == cookie) goto do_ast; } - mlog(0, "got %sast for unknown lock! cookie=%u:%llu, " - "name=%.*s, namelen=%u\n", past->type == DLM_AST ? "" : "b", - dlm_get_lock_cookie_node(cookie), - dlm_get_lock_cookie_seq(cookie), - locklen, name, locklen); + mlog(0, "Got %sast for unknown lock! cookie=%u:%llu, name=%.*s, " + "node=%u\n", past->type == DLM_AST ? "" : "b", + dlm_get_lock_cookie_node(be64_to_cpu(cookie)), + dlm_get_lock_cookie_seq(be64_to_cpu(cookie)), + locklen, name, node); ret = DLM_NORMAL; unlock_out: @@ -383,8 +386,8 @@ do_ast: if (past->type == DLM_AST) { /* do not alter lock refcount. switching lists. */ list_move_tail(&lock->list, &res->granted); - mlog(0, "ast: adding to granted list... type=%d, " - "convert_type=%d\n", lock->ml.type, lock->ml.convert_type); + mlog(0, "ast: Adding to granted list... type=%d, " + "convert_type=%d\n", lock->ml.type, lock->ml.convert_type); if (lock->ml.convert_type != LKM_IVMODE) { lock->ml.type = lock->ml.convert_type; lock->ml.convert_type = LKM_IVMODE; @@ -408,7 +411,6 @@ do_ast: dlm_do_local_bast(dlm, res, lock, past->blocked_type); leave: - if (res) dlm_lockres_put(res); -- cgit v1.2.3-70-g09d2 From d4f7e650e55af6b235871126f747da88600e8040 Mon Sep 17 00:00:00 2001 From: Sunil Mushran Date: Tue, 16 Dec 2008 15:49:21 -0800 Subject: ocfs2/dlm: Hold off sending lockres drop ref message while lockres is migrating During lockres purge, o2dlm sends a drop reference message to the lockres master. This patch delays the message if the lockres is being migrated. Fixes oss bugzilla#1012 http://oss.oracle.com/bugzilla/show_bug.cgi?id=1012 Signed-off-by: Sunil Mushran Signed-off-by: Mark Fasheh --- fs/ocfs2/dlm/dlmthread.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c index 4060bb328bc..d1295203029 100644 --- a/fs/ocfs2/dlm/dlmthread.c +++ b/fs/ocfs2/dlm/dlmthread.c @@ -181,7 +181,8 @@ static int dlm_purge_lockres(struct dlm_ctxt *dlm, spin_lock(&res->spinlock); /* This ensures that clear refmap is sent after the set */ - __dlm_wait_on_lockres_flags(res, DLM_LOCK_RES_SETREF_INPROG); + __dlm_wait_on_lockres_flags(res, (DLM_LOCK_RES_SETREF_INPROG | + DLM_LOCK_RES_MIGRATING)); spin_unlock(&res->spinlock); /* clear our bit from the master's refmap, ignore errors */ -- cgit v1.2.3-70-g09d2 From b0d4f817ba5de8adb875ace594554a96d7737710 Mon Sep 17 00:00:00 2001 From: Sunil Mushran Date: Tue, 16 Dec 2008 15:49:22 -0800 Subject: ocfs2/dlm: Fix race in adding/removing lockres' to/from the tracking list This patch adds a new lock, dlm->tracking_lock, to protect adding/removing lockres' to/from the dlm->tracking_list. We were previously using dlm->spinlock for the same, but that proved inadequate as we could be freeing a lockres from a context that did not hold that lock. As the new lock only protects this list, we can explicitly take it when removing the lockres from the tracking list. This bug was exposed when testing multiple processes concurrently flock() the same file. Signed-off-by: Sunil Mushran Signed-off-by: Mark Fasheh --- fs/ocfs2/dlm/dlmcommon.h | 3 +++ fs/ocfs2/dlm/dlmdebug.c | 53 ++++++++++++++++++++++-------------------------- fs/ocfs2/dlm/dlmdomain.c | 1 + fs/ocfs2/dlm/dlmmaster.c | 10 +++++++++ 4 files changed, 38 insertions(+), 29 deletions(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h index d5a86fb81a4..bb53714813a 100644 --- a/fs/ocfs2/dlm/dlmcommon.h +++ b/fs/ocfs2/dlm/dlmcommon.h @@ -140,6 +140,7 @@ struct dlm_ctxt unsigned int purge_count; spinlock_t spinlock; spinlock_t ast_lock; + spinlock_t track_lock; char *name; u8 node_num; u32 key; @@ -316,6 +317,8 @@ struct dlm_lock_resource * put on a list for the dlm thread to run. */ unsigned long last_used; + struct dlm_ctxt *dlm; + unsigned migration_pending:1; atomic_t asts_reserved; spinlock_t spinlock; diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c index 1b81dcba175..b32f60a5acf 100644 --- a/fs/ocfs2/dlm/dlmdebug.c +++ b/fs/ocfs2/dlm/dlmdebug.c @@ -630,43 +630,38 @@ static void *lockres_seq_start(struct seq_file *m, loff_t *pos) { struct debug_lockres *dl = m->private; struct dlm_ctxt *dlm = dl->dl_ctxt; + struct dlm_lock_resource *oldres = dl->dl_res; struct dlm_lock_resource *res = NULL; + struct list_head *track_list; - spin_lock(&dlm->spinlock); + spin_lock(&dlm->track_lock); + if (oldres) + track_list = &oldres->tracking; + else + track_list = &dlm->tracking_list; - if (dl->dl_res) { - list_for_each_entry(res, &dl->dl_res->tracking, tracking) { - if (dl->dl_res) { - dlm_lockres_put(dl->dl_res); - dl->dl_res = NULL; - } - if (&res->tracking == &dlm->tracking_list) { - mlog(0, "End of list found, %p\n", res); - dl = NULL; - break; - } + list_for_each_entry(res, track_list, tracking) { + if (&res->tracking == &dlm->tracking_list) + res = NULL; + else dlm_lockres_get(res); - dl->dl_res = res; - break; - } - } else { - if (!list_empty(&dlm->tracking_list)) { - list_for_each_entry(res, &dlm->tracking_list, tracking) - break; - dlm_lockres_get(res); - dl->dl_res = res; - } else - dl = NULL; + break; } + spin_unlock(&dlm->track_lock); - if (dl) { - spin_lock(&dl->dl_res->spinlock); - dump_lockres(dl->dl_res, dl->dl_buf, dl->dl_len - 1); - spin_unlock(&dl->dl_res->spinlock); - } + if (oldres) + dlm_lockres_put(oldres); - spin_unlock(&dlm->spinlock); + dl->dl_res = res; + + if (res) { + spin_lock(&res->spinlock); + dump_lockres(res, dl->dl_buf, dl->dl_len - 1); + spin_unlock(&res->spinlock); + } else + dl = NULL; + /* passed to seq_show */ return dl; } diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c index 63f8125824e..d8d578f4561 100644 --- a/fs/ocfs2/dlm/dlmdomain.c +++ b/fs/ocfs2/dlm/dlmdomain.c @@ -1550,6 +1550,7 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain, spin_lock_init(&dlm->spinlock); spin_lock_init(&dlm->master_lock); spin_lock_init(&dlm->ast_lock); + spin_lock_init(&dlm->track_lock); INIT_LIST_HEAD(&dlm->list); INIT_LIST_HEAD(&dlm->dirty_list); INIT_LIST_HEAD(&dlm->reco.resources); diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index 92fd1d7d612..cbf3abe24cd 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c @@ -505,8 +505,10 @@ void dlm_change_lockres_owner(struct dlm_ctxt *dlm, static void dlm_lockres_release(struct kref *kref) { struct dlm_lock_resource *res; + struct dlm_ctxt *dlm; res = container_of(kref, struct dlm_lock_resource, refs); + dlm = res->dlm; /* This should not happen -- all lockres' have a name * associated with them at init time. */ @@ -515,6 +517,7 @@ static void dlm_lockres_release(struct kref *kref) mlog(0, "destroying lockres %.*s\n", res->lockname.len, res->lockname.name); + spin_lock(&dlm->track_lock); if (!list_empty(&res->tracking)) list_del_init(&res->tracking); else { @@ -522,6 +525,9 @@ static void dlm_lockres_release(struct kref *kref) res->lockname.len, res->lockname.name); dlm_print_one_lock_resource(res); } + spin_unlock(&dlm->track_lock); + + dlm_put(dlm); if (!hlist_unhashed(&res->hash_node) || !list_empty(&res->granted) || @@ -595,6 +601,10 @@ static void dlm_init_lockres(struct dlm_ctxt *dlm, res->migration_pending = 0; res->inflight_locks = 0; + /* put in dlm_lockres_release */ + dlm_grab(dlm); + res->dlm = dlm; + kref_init(&res->refs); /* just for consistency */ -- cgit v1.2.3-70-g09d2 From 7b791d68562e4ce5ab57cbacb10a1ad4ee33956e Mon Sep 17 00:00:00 2001 From: Sunil Mushran Date: Tue, 16 Dec 2008 15:49:23 -0800 Subject: ocfs2/dlm: Fix race during lockres mastery dlm_get_lock_resource() is supposed to return a lock resource with a proper master. If multiple concurrent threads attempt to lookup the lockres for the same lockid while the lock mastery in underway, one or more threads are likely to return a lockres without a proper master. This patch makes the threads wait in dlm_get_lock_resource() while the mastery is underway, ensuring all threads return the lockres with a proper master. This issue is known to be limited to users using the flock() syscall. For all other fs operations, the ocfs2 dlmglue layer serializes the dlm op for each lockid. Users encountering this bug will see flock() return EINVAL and dmesg have the following error: ERROR: Dlm error "DLM_BADARGS" while calling dlmlock on resource : bad api args Reported-by: Coly Li Signed-off-by: Sunil Mushran Signed-off-by: Mark Fasheh --- fs/ocfs2/dlm/dlmmaster.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index cbf3abe24cd..54e182a27ca 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c @@ -732,14 +732,21 @@ lookup: if (tmpres) { int dropping_ref = 0; + spin_unlock(&dlm->spinlock); + spin_lock(&tmpres->spinlock); + /* We wait for the other thread that is mastering the resource */ + if (tmpres->owner == DLM_LOCK_RES_OWNER_UNKNOWN) { + __dlm_wait_on_lockres(tmpres); + BUG_ON(tmpres->owner == DLM_LOCK_RES_OWNER_UNKNOWN); + } + if (tmpres->owner == dlm->node_num) { BUG_ON(tmpres->state & DLM_LOCK_RES_DROPPING_REF); dlm_lockres_grab_inflight_ref(dlm, tmpres); } else if (tmpres->state & DLM_LOCK_RES_DROPPING_REF) dropping_ref = 1; spin_unlock(&tmpres->spinlock); - spin_unlock(&dlm->spinlock); /* wait until done messaging the master, drop our ref to allow * the lockres to be purged, start over. */ -- cgit v1.2.3-70-g09d2 From 71d548a6af36fe98c95fbd0522147f842bd5f054 Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Fri, 5 Dec 2008 06:20:54 +0800 Subject: ocfs2/xattr: Remove extend_trans call and add its credits from the beginning Actually, when setting a new xattr value, we know it from the very beginning, and it isn't like the extension of bucket in which case we can't figure it out. So remove ocfs2_extend_trans in that function and calculate it before the transaction. It also relieve acl operation from the worry about the side effect of ocfs2_extend_trans. Signed-off-by: Tao Ma Signed-off-by: Mark Fasheh --- fs/ocfs2/xattr.c | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 17028aa7bc2..93a1ab4fe1d 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -1169,7 +1169,7 @@ static int __ocfs2_xattr_set_value_outside(struct inode *inode, const void *value, int value_len) { - int ret = 0, i, cp_len, credits; + int ret = 0, i, cp_len; u16 blocksize = inode->i_sb->s_blocksize; u32 p_cluster, num_clusters; u32 cpos = 0, bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1); @@ -1179,18 +1179,6 @@ static int __ocfs2_xattr_set_value_outside(struct inode *inode, BUG_ON(clusters > le32_to_cpu(xv->xr_clusters)); - /* - * In __ocfs2_xattr_set_value_outside has already been dirtied, - * so we don't need to worry about whether ocfs2_extend_trans - * will create a new transactio for us or not. - */ - credits = clusters * bpc; - ret = ocfs2_extend_trans(handle, credits); - if (ret) { - mlog_errno(ret); - goto out; - } - while (cpos < clusters) { ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster, &num_clusters, &xv->xr_list); @@ -2233,6 +2221,15 @@ static int ocfs2_calc_xattr_set_need(struct inode *inode, xi->value_len); u64 value_size; + /* + * Calculate the clusters we need to write. + * No matter whether we replace an old one or add a new one, + * we need this for writing. + */ + if (xi->value_len > OCFS2_XATTR_INLINE_SIZE) + credits += new_clusters * + ocfs2_clusters_to_blocks(inode->i_sb, 1); + if (xis->not_found && xbs->not_found) { credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb); -- cgit v1.2.3-70-g09d2 From 4b3f6209bf9eec46fe5ebb168718fef5c443c157 Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Fri, 5 Dec 2008 06:20:55 +0800 Subject: ocfs2/xattr: Always updating ctime during xattr set. In xattr set, we should always update ctime if the operation goes sucessfully. The old one mistakenly put it in ocfs2_xattr_set_entry which is only called when we set xattr in inode or xattr block. The side benefit is that it resolve the bug 1052 since in that scenario, ocfs2_calc_xattr_set_need only calc out the xattr set credits while ocfs2_xattr_set_entry update the inode also which isn't concerned with the process of xattr set. Signed-off-by: Tao Ma Signed-off-by: Mark Fasheh --- fs/ocfs2/xattr.c | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 93a1ab4fe1d..3e2e92d7059 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -1651,10 +1651,6 @@ static int ocfs2_xattr_set_entry(struct inode *inode, oi->ip_dyn_features |= flag; di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features); spin_unlock(&oi->ip_lock); - /* Update inode ctime */ - inode->i_ctime = CURRENT_TIME; - di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec); - di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec); ret = ocfs2_journal_dirty(handle, xs->inode_bh); if (ret < 0) @@ -2574,6 +2570,20 @@ static int __ocfs2_xattr_set_handle(struct inode *inode, } } + if (!ret) { + /* Update inode ctime. */ + ret = ocfs2_journal_access(ctxt->handle, inode, xis->inode_bh, + OCFS2_JOURNAL_ACCESS_WRITE); + if (ret) { + mlog_errno(ret); + goto out; + } + + inode->i_ctime = CURRENT_TIME; + di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec); + di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec); + ocfs2_journal_dirty(ctxt->handle, xis->inode_bh); + } out: return ret; } @@ -2750,6 +2760,8 @@ int ocfs2_xattr_set(struct inode *inode, goto cleanup; } + /* we need to update inode's ctime field, so add credit for it. */ + credits += OCFS2_INODE_UPDATE_CREDITS; ctxt.handle = ocfs2_start_trans(osb, credits); if (IS_ERR(ctxt.handle)) { ret = PTR_ERR(ctxt.handle); -- cgit v1.2.3-70-g09d2 From 90cb546cada68bb8c2278afdb4b65c2ac11f2877 Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Fri, 5 Dec 2008 06:20:56 +0800 Subject: ocfs2/xattr: fix credits calculation during index create When creating a xattr index block, the old calculation forget to add credits for the meta change of the alloc file. So add more credits and more comments to explain it. Signed-off-by: Tao Ma Signed-off-by: Mark Fasheh --- fs/ocfs2/xattr.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 3e2e92d7059..73fb9f76251 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -2359,13 +2359,21 @@ meta_guess: } else xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data; + /* + * If there is already an xattr tree, good, we can calculate + * like other b-trees. Otherwise we may have the chance of + * create a tree, the credit calculation is borrowed from + * ocfs2_calc_extend_credits with root_el = NULL. And the + * new tree will be cluster based, so no meta is needed. + */ if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) { struct ocfs2_extent_list *el = &xb->xb_attrs.xb_root.xt_list; meta_add += ocfs2_extend_meta_needed(el); credits += ocfs2_calc_extend_credits(inode->i_sb, el, 1); - } + } else + credits += OCFS2_SUBALLOC_ALLOC + 1; /* * This cluster will be used either for new bucket or for -- cgit v1.2.3-70-g09d2 From 0e445b6fe93c723fe8093fd04ddfeb11ae2de082 Mon Sep 17 00:00:00 2001 From: Tiger Yang Date: Tue, 9 Dec 2008 16:42:51 +0800 Subject: ocfs2: calculate and reserve credits for xattr value in mknod We extend the credits for xattr's large value in set_value_outside before, this can give rise to a credits issue when we set one security entry and two acl entries duing mknod. As we remove extend_trans form set_value_outside, we must calculate and reserve the credits for xattr's large value in mknod. Signed-off-by: Tiger Yang Signed-off-by: Mark Fasheh --- fs/ocfs2/xattr.c | 40 ++++++++++++++++++++++++++-------------- 1 file changed, 26 insertions(+), 14 deletions(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 73fb9f76251..e5be470e750 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -490,9 +490,14 @@ int ocfs2_calc_security_init(struct inode *dir, } /* reserve clusters for xattr value which will be set in B tree*/ - if (si->value_len > OCFS2_XATTR_INLINE_SIZE) - *want_clusters += ocfs2_clusters_for_bytes(dir->i_sb, - si->value_len); + if (si->value_len > OCFS2_XATTR_INLINE_SIZE) { + int new_clusters = ocfs2_clusters_for_bytes(dir->i_sb, + si->value_len); + + *xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb, + new_clusters); + *want_clusters += new_clusters; + } return ret; } @@ -506,9 +511,7 @@ int ocfs2_calc_xattr_init(struct inode *dir, { int ret = 0; struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); - int s_size = 0; - int a_size = 0; - int acl_len = 0; + int s_size = 0, a_size = 0, acl_len = 0, new_clusters; if (si->enable) s_size = ocfs2_xattr_entry_real_size(strlen(si->name), @@ -556,16 +559,25 @@ int ocfs2_calc_xattr_init(struct inode *dir, *xattr_credits += ocfs2_blocks_per_xattr_bucket(dir->i_sb); } - /* reserve clusters for xattr value which will be set in B tree*/ - if (si->enable && si->value_len > OCFS2_XATTR_INLINE_SIZE) - *want_clusters += ocfs2_clusters_for_bytes(dir->i_sb, - si->value_len); + /* + * reserve credits and clusters for xattrs which has large value + * and have to be set outside + */ + if (si->enable && si->value_len > OCFS2_XATTR_INLINE_SIZE) { + new_clusters = ocfs2_clusters_for_bytes(dir->i_sb, + si->value_len); + *xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb, + new_clusters); + *want_clusters += new_clusters; + } if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL && acl_len > OCFS2_XATTR_INLINE_SIZE) { - *want_clusters += ocfs2_clusters_for_bytes(dir->i_sb, acl_len); - if (S_ISDIR(mode)) - *want_clusters += ocfs2_clusters_for_bytes(dir->i_sb, - acl_len); + /* for directory, it has DEFAULT and ACCESS two types of acls */ + new_clusters = (S_ISDIR(mode) ? 2 : 1) * + ocfs2_clusters_for_bytes(dir->i_sb, acl_len); + *xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb, + new_clusters); + *want_clusters += new_clusters; } return ret; -- cgit v1.2.3-70-g09d2 From 008aafaf0b4aa0476da483e3c6e3edbe951811ff Mon Sep 17 00:00:00 2001 From: Tiger Yang Date: Tue, 9 Dec 2008 16:43:08 +0800 Subject: ocfs2: alloc xattr bucket in ocfs2_xattr_set_handle In extreme situation, may need xattr bucket for setting security entry and acl entries during mknod. This only happens when block size is too small. Signed-off-by: Tiger Yang Signed-off-by: Mark Fasheh --- fs/ocfs2/xattr.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index e5be470e750..095b0bb6e59 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -2611,9 +2611,7 @@ out: /* * This function only called duing creating inode * for init security/acl xattrs of the new inode. - * The xattrs could be put into ibody or extent block, - * xattr bucket would not be use in this case. - * transanction credits also be reserved in here. + * All transanction credits have been reserved in mknod. */ int ocfs2_xattr_set_handle(handle_t *handle, struct inode *inode, @@ -2653,6 +2651,19 @@ int ocfs2_xattr_set_handle(handle_t *handle, if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb))) return -EOPNOTSUPP; + /* + * In extreme situation, may need xattr bucket when + * block size is too small. And we have already reserved + * the credits for bucket in mknod. + */ + if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE) { + xbs.bucket = ocfs2_xattr_bucket_new(inode); + if (!xbs.bucket) { + mlog_errno(-ENOMEM); + return -ENOMEM; + } + } + xis.inode_bh = xbs.inode_bh = di_bh; di = (struct ocfs2_dinode *)di_bh->b_data; @@ -2672,6 +2683,7 @@ int ocfs2_xattr_set_handle(handle_t *handle, cleanup: up_write(&OCFS2_I(inode)->ip_xattr_sem); brelse(xbs.xattr_bh); + ocfs2_xattr_bucket_free(xbs.bucket); return ret; } -- cgit v1.2.3-70-g09d2 From 38d59ef61c11cafc50a66787bdbbe80d58bbd9c0 Mon Sep 17 00:00:00 2001 From: Tiger Yang Date: Wed, 17 Dec 2008 10:22:56 +0800 Subject: ocfs2: Add xattr support checking in init_security We must check whether ocfs2 volume support xattr in init_security, if not support xattr and security is enable, would cause failure of mknod. Signed-off-by: Tiger Yang Signed-off-by: Mark Fasheh --- fs/ocfs2/xattr.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 095b0bb6e59..e1d638af6ac 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -5324,6 +5324,9 @@ int ocfs2_init_security_get(struct inode *inode, struct inode *dir, struct ocfs2_security_xattr_info *si) { + /* check whether ocfs2 support feature xattr */ + if (!ocfs2_supports_xattr(OCFS2_SB(dir->i_sb))) + return -EOPNOTSUPP; return security_inode_init_security(inode, dir, &si->name, &si->value, &si->value_len); } -- cgit v1.2.3-70-g09d2 From a641dc2a5a1445eb4cb491080dfc41c42a9eb37d Mon Sep 17 00:00:00 2001 From: Mark Fasheh Date: Wed, 24 Dec 2008 16:03:48 -0800 Subject: ocfs2: remove unneeded lvb casts dlmglue.c has lots of code which casts the return value of ocfs2_dlm_lvb(). This is pointless however, as ocfs2_dlm_lvb() returns void *. Signed-off-by: Mark Fasheh --- fs/ocfs2/dlmglue.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index b1c75911d8a..f731ab49179 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -115,8 +115,7 @@ static void ocfs2_dump_meta_lvb_info(u64 level, unsigned int line, struct ocfs2_lock_res *lockres) { - struct ocfs2_meta_lvb *lvb = - (struct ocfs2_meta_lvb *)ocfs2_dlm_lvb(&lockres->l_lksb); + struct ocfs2_meta_lvb *lvb = ocfs2_dlm_lvb(&lockres->l_lksb); mlog(level, "LVB information for %s (called from %s:%u):\n", lockres->l_name, function, line); @@ -1864,7 +1863,7 @@ static void __ocfs2_stuff_meta_lvb(struct inode *inode) mlog_entry_void(); - lvb = (struct ocfs2_meta_lvb *)ocfs2_dlm_lvb(&lockres->l_lksb); + lvb = ocfs2_dlm_lvb(&lockres->l_lksb); /* * Invalidate the LVB of a deleted inode - this way other @@ -1916,7 +1915,7 @@ static void ocfs2_refresh_inode_from_lvb(struct inode *inode) mlog_meta_lvb(0, lockres); - lvb = (struct ocfs2_meta_lvb *)ocfs2_dlm_lvb(&lockres->l_lksb); + lvb = ocfs2_dlm_lvb(&lockres->l_lksb); /* We're safe here without the lockres lock... */ spin_lock(&oi->ip_lock); @@ -1951,8 +1950,7 @@ static void ocfs2_refresh_inode_from_lvb(struct inode *inode) static inline int ocfs2_meta_lvb_is_trustable(struct inode *inode, struct ocfs2_lock_res *lockres) { - struct ocfs2_meta_lvb *lvb = - (struct ocfs2_meta_lvb *)ocfs2_dlm_lvb(&lockres->l_lksb); + struct ocfs2_meta_lvb *lvb = ocfs2_dlm_lvb(&lockres->l_lksb); if (lvb->lvb_version == OCFS2_LVB_VERSION && be32_to_cpu(lvb->lvb_igeneration) == inode->i_generation) @@ -3489,7 +3487,7 @@ static void ocfs2_set_qinfo_lvb(struct ocfs2_lock_res *lockres) mlog_entry_void(); - lvb = (struct ocfs2_qinfo_lvb *)ocfs2_dlm_lvb(&lockres->l_lksb); + lvb = ocfs2_dlm_lvb(&lockres->l_lksb); lvb->lvb_version = OCFS2_QINFO_LVB_VERSION; lvb->lvb_bgrace = cpu_to_be32(info->dqi_bgrace); lvb->lvb_igrace = cpu_to_be32(info->dqi_igrace); -- cgit v1.2.3-70-g09d2 From dad7d975e4bd893c79fd122105b37b9a1776816a Mon Sep 17 00:00:00 2001 From: Mark Fasheh Date: Wed, 24 Dec 2008 16:33:08 -0800 Subject: ocfs2: use min_t in ocfs2_quota_read() This is preferred to min(). Signed-off-by: Mark Fasheh --- fs/ocfs2/quota_global.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c index 444aa5a467f..6aff8f2d3e4 100644 --- a/fs/ocfs2/quota_global.c +++ b/fs/ocfs2/quota_global.c @@ -167,7 +167,7 @@ ssize_t ocfs2_quota_read(struct super_block *sb, int type, char *data, len = i_size - off; toread = len; while (toread > 0) { - tocopy = min((size_t)(sb->s_blocksize - offset), toread); + tocopy = min_t(size_t, (sb->s_blocksize - offset), toread); bh = NULL; err = ocfs2_read_quota_block(gqinode, blk, &bh); if (err) { -- cgit v1.2.3-70-g09d2 From 9047beabb8a396f0b18de1e4a9ab920cf92054af Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Mon, 5 Jan 2009 14:45:24 +0800 Subject: ocfs2: Access the right buffer_head in ocfs2_merge_rec_left. In commit "ocfs2: Use metadata-specific ocfs2_journal_access_*() functions", the wrong buffer_head is accessed. So change it to the right buffer_head. Signed-off-by: Tao Ma Acked-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/alloc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 874c0bd9e1c..54ff4c77aaa 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -3402,8 +3402,8 @@ static int ocfs2_merge_rec_left(struct inode *inode, has_empty_extent = 1; } - ret = ocfs2_path_bh_journal_access(handle, inode, left_path, - path_num_items(left_path) - 1); + ret = ocfs2_path_bh_journal_access(handle, inode, right_path, + path_num_items(right_path) - 1); if (ret) { mlog_errno(ret); goto out; -- cgit v1.2.3-70-g09d2 From 56ff5efad96182f4d3cb3dc6b07396762c658f16 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 9 Dec 2008 09:34:39 -0500 Subject: zero i_uid/i_gid on inode allocation ... and don't bother in callers. Don't bother with zeroing i_blocks, while we are at it - it's already been zeroed. i_mode is not worth the effort; it has no common default value. Signed-off-by: Al Viro --- arch/powerpc/platforms/cell/spufs/inode.c | 1 - arch/s390/hypfs/inode.c | 1 - drivers/infiniband/hw/ipath/ipath_fs.c | 3 --- drivers/isdn/capi/capifs.c | 2 -- drivers/misc/ibmasm/ibmasmfs.c | 2 -- drivers/oprofile/oprofilefs.c | 3 --- drivers/usb/core/inode.c | 1 - drivers/usb/gadget/inode.c | 1 - fs/autofs/inode.c | 2 -- fs/autofs4/inode.c | 4 ---- fs/binfmt_misc.c | 3 --- fs/configfs/inode.c | 3 --- fs/cramfs/inode.c | 2 -- fs/debugfs/inode.c | 3 --- fs/devpts/inode.c | 4 ---- fs/hugetlbfs/inode.c | 1 - fs/inode.c | 2 ++ fs/libfs.c | 5 ----- fs/ocfs2/dlm/dlmfs.c | 2 -- fs/omfs/inode.c | 1 - fs/openpromfs/inode.c | 3 --- fs/proc/base.c | 4 ---- fs/proc/proc_sysctl.c | 1 - fs/ramfs/inode.c | 1 - fs/romfs/inode.c | 1 - fs/sysfs/inode.c | 3 --- ipc/mqueue.c | 1 - kernel/cgroup.c | 1 - net/sunrpc/rpc_pipe.c | 2 -- security/inode.c | 3 --- security/selinux/selinuxfs.c | 2 -- 31 files changed, 2 insertions(+), 66 deletions(-) (limited to 'fs/ocfs2') diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c index 6296bfd9cb0..e309ef70a53 100644 --- a/arch/powerpc/platforms/cell/spufs/inode.c +++ b/arch/powerpc/platforms/cell/spufs/inode.c @@ -97,7 +97,6 @@ spufs_new_inode(struct super_block *sb, int mode) inode->i_mode = mode; inode->i_uid = current_fsuid(); inode->i_gid = current_fsgid(); - inode->i_blocks = 0; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; out: return inode; diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c index 9d4f8e6c080..5a805df216b 100644 --- a/arch/s390/hypfs/inode.c +++ b/arch/s390/hypfs/inode.c @@ -106,7 +106,6 @@ static struct inode *hypfs_make_inode(struct super_block *sb, int mode) ret->i_mode = mode; ret->i_uid = hypfs_info->uid; ret->i_gid = hypfs_info->gid; - ret->i_blocks = 0; ret->i_atime = ret->i_mtime = ret->i_ctime = CURRENT_TIME; if (mode & S_IFDIR) ret->i_nlink = 2; diff --git a/drivers/infiniband/hw/ipath/ipath_fs.c b/drivers/infiniband/hw/ipath/ipath_fs.c index 53912c327bf..8dc2bb78160 100644 --- a/drivers/infiniband/hw/ipath/ipath_fs.c +++ b/drivers/infiniband/hw/ipath/ipath_fs.c @@ -57,9 +57,6 @@ static int ipathfs_mknod(struct inode *dir, struct dentry *dentry, } inode->i_mode = mode; - inode->i_uid = 0; - inode->i_gid = 0; - inode->i_blocks = 0; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; inode->i_private = data; if ((mode & S_IFMT) == S_IFDIR) { diff --git a/drivers/isdn/capi/capifs.c b/drivers/isdn/capi/capifs.c index 0aa66ec4cbd..b129409925a 100644 --- a/drivers/isdn/capi/capifs.c +++ b/drivers/isdn/capi/capifs.c @@ -111,8 +111,6 @@ capifs_fill_super(struct super_block *s, void *data, int silent) goto fail; inode->i_ino = 1; inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; - inode->i_blocks = 0; - inode->i_uid = inode->i_gid = 0; inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR; inode->i_op = &simple_dir_inode_operations; inode->i_fop = &simple_dir_operations; diff --git a/drivers/misc/ibmasm/ibmasmfs.c b/drivers/misc/ibmasm/ibmasmfs.c index 22a7e8ba211..de966a6fb7e 100644 --- a/drivers/misc/ibmasm/ibmasmfs.c +++ b/drivers/misc/ibmasm/ibmasmfs.c @@ -146,8 +146,6 @@ static struct inode *ibmasmfs_make_inode(struct super_block *sb, int mode) if (ret) { ret->i_mode = mode; - ret->i_uid = ret->i_gid = 0; - ret->i_blocks = 0; ret->i_atime = ret->i_mtime = ret->i_ctime = CURRENT_TIME; } return ret; diff --git a/drivers/oprofile/oprofilefs.c b/drivers/oprofile/oprofilefs.c index ddc4c59f02d..b7e4cee2426 100644 --- a/drivers/oprofile/oprofilefs.c +++ b/drivers/oprofile/oprofilefs.c @@ -29,9 +29,6 @@ static struct inode *oprofilefs_get_inode(struct super_block *sb, int mode) if (inode) { inode->i_mode = mode; - inode->i_uid = 0; - inode->i_gid = 0; - inode->i_blocks = 0; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; } return inode; diff --git a/drivers/usb/core/inode.c b/drivers/usb/core/inode.c index 185be760833..2a129cb7bb5 100644 --- a/drivers/usb/core/inode.c +++ b/drivers/usb/core/inode.c @@ -279,7 +279,6 @@ static struct inode *usbfs_get_inode (struct super_block *sb, int mode, dev_t de inode->i_mode = mode; inode->i_uid = current_fsuid(); inode->i_gid = current_fsgid(); - inode->i_blocks = 0; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; switch (mode & S_IFMT) { default: diff --git a/drivers/usb/gadget/inode.c b/drivers/usb/gadget/inode.c index eeb26c0f88e..317b48fdbf0 100644 --- a/drivers/usb/gadget/inode.c +++ b/drivers/usb/gadget/inode.c @@ -2001,7 +2001,6 @@ gadgetfs_make_inode (struct super_block *sb, inode->i_mode = mode; inode->i_uid = default_uid; inode->i_gid = default_gid; - inode->i_blocks = 0; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; inode->i_private = data; diff --git a/fs/autofs/inode.c b/fs/autofs/inode.c index c773680d5c6..e1734f2d6e2 100644 --- a/fs/autofs/inode.c +++ b/fs/autofs/inode.c @@ -251,13 +251,11 @@ struct inode *autofs_iget(struct super_block *sb, unsigned long ino) inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO; inode->i_nlink = 2; inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; - inode->i_blocks = 0; if (ino == AUTOFS_ROOT_INO) { inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR; inode->i_op = &autofs_root_inode_operations; inode->i_fop = &autofs_root_operations; - inode->i_uid = inode->i_gid = 0; /* Changed in read_super */ goto done; } diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c index 7b19802cfef..cfc23e53b6f 100644 --- a/fs/autofs4/inode.c +++ b/fs/autofs4/inode.c @@ -455,11 +455,7 @@ struct inode *autofs4_get_inode(struct super_block *sb, if (sb->s_root) { inode->i_uid = sb->s_root->d_inode->i_uid; inode->i_gid = sb->s_root->d_inode->i_gid; - } else { - inode->i_uid = 0; - inode->i_gid = 0; } - inode->i_blocks = 0; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; if (S_ISDIR(inf->mode)) { diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index f2744ab4e5b..e1158cb4fbd 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c @@ -496,9 +496,6 @@ static struct inode *bm_get_inode(struct super_block *sb, int mode) if (inode) { inode->i_mode = mode; - inode->i_uid = 0; - inode->i_gid = 0; - inode->i_blocks = 0; inode->i_atime = inode->i_mtime = inode->i_ctime = current_fs_time(inode->i_sb); } diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c index 4803ccc9448..5d349d38e05 100644 --- a/fs/configfs/inode.c +++ b/fs/configfs/inode.c @@ -117,8 +117,6 @@ int configfs_setattr(struct dentry * dentry, struct iattr * iattr) static inline void set_default_inode_attr(struct inode * inode, mode_t mode) { inode->i_mode = mode; - inode->i_uid = 0; - inode->i_gid = 0; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; } @@ -136,7 +134,6 @@ struct inode * configfs_new_inode(mode_t mode, struct configfs_dirent * sd) { struct inode * inode = new_inode(configfs_sb); if (inode) { - inode->i_blocks = 0; inode->i_mapping->a_ops = &configfs_aops; inode->i_mapping->backing_dev_info = &configfs_backing_dev_info; inode->i_op = &configfs_inode_operations; diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c index f40423eb1a1..a07338d2d14 100644 --- a/fs/cramfs/inode.c +++ b/fs/cramfs/inode.c @@ -83,8 +83,6 @@ static struct inode *get_cramfs_inode(struct super_block *sb, inode->i_op = &page_symlink_inode_operations; inode->i_data.a_ops = &cramfs_aops; } else { - inode->i_size = 0; - inode->i_blocks = 0; init_special_inode(inode, inode->i_mode, old_decode_dev(cramfs_inode->size)); } diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index 3dbe2169cf3..81ae9ea3c6e 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -37,9 +37,6 @@ static struct inode *debugfs_get_inode(struct super_block *sb, int mode, dev_t d if (inode) { inode->i_mode = mode; - inode->i_uid = 0; - inode->i_gid = 0; - inode->i_blocks = 0; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; switch (mode & S_IFMT) { default: diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c index fff96e152c0..5f3231b9633 100644 --- a/fs/devpts/inode.c +++ b/fs/devpts/inode.c @@ -189,8 +189,6 @@ static int mknod_ptmx(struct super_block *sb) } inode->i_ino = 2; - inode->i_uid = inode->i_gid = 0; - inode->i_blocks = 0; inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; mode = S_IFCHR|opts->ptmxmode; @@ -300,8 +298,6 @@ devpts_fill_super(struct super_block *s, void *data, int silent) goto free_fsi; inode->i_ino = 1; inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; - inode->i_blocks = 0; - inode->i_uid = inode->i_gid = 0; inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR; inode->i_op = &simple_dir_inode_operations; inode->i_fop = &simple_dir_operations; diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 7d479ce3ace..0ab0c6f5f43 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -506,7 +506,6 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb, uid_t uid, inode->i_mode = mode; inode->i_uid = uid; inode->i_gid = gid; - inode->i_blocks = 0; inode->i_mapping->a_ops = &hugetlbfs_aops; inode->i_mapping->backing_dev_info =&hugetlbfs_backing_dev_info; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; diff --git a/fs/inode.c b/fs/inode.c index 7de1cda9248..bd48e5e6d3e 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -131,6 +131,8 @@ struct inode *inode_init_always(struct super_block *sb, struct inode *inode) inode->i_op = &empty_iops; inode->i_fop = &empty_fops; inode->i_nlink = 1; + inode->i_uid = 0; + inode->i_gid = 0; atomic_set(&inode->i_writecount, 0); inode->i_size = 0; inode->i_blocks = 0; diff --git a/fs/libfs.c b/fs/libfs.c index e960a832190..7de05f7ce74 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -231,7 +231,6 @@ int get_sb_pseudo(struct file_system_type *fs_type, char *name, */ root->i_ino = 1; root->i_mode = S_IFDIR | S_IRUSR | S_IWUSR; - root->i_uid = root->i_gid = 0; root->i_atime = root->i_mtime = root->i_ctime = CURRENT_TIME; dentry = d_alloc(NULL, &d_name); if (!dentry) { @@ -436,8 +435,6 @@ int simple_fill_super(struct super_block *s, int magic, struct tree_descr *files */ inode->i_ino = 1; inode->i_mode = S_IFDIR | 0755; - inode->i_uid = inode->i_gid = 0; - inode->i_blocks = 0; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; inode->i_op = &simple_dir_inode_operations; inode->i_fop = &simple_dir_operations; @@ -464,8 +461,6 @@ int simple_fill_super(struct super_block *s, int magic, struct tree_descr *files if (!inode) goto out; inode->i_mode = S_IFREG | files->mode; - inode->i_uid = inode->i_gid = 0; - inode->i_blocks = 0; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; inode->i_fop = files->ops; inode->i_ino = i; diff --git a/fs/ocfs2/dlm/dlmfs.c b/fs/ocfs2/dlm/dlmfs.c index 6f7a77d5402..1c9efb406a9 100644 --- a/fs/ocfs2/dlm/dlmfs.c +++ b/fs/ocfs2/dlm/dlmfs.c @@ -341,7 +341,6 @@ static struct inode *dlmfs_get_root_inode(struct super_block *sb) inode->i_mode = mode; inode->i_uid = current_fsuid(); inode->i_gid = current_fsgid(); - inode->i_blocks = 0; inode->i_mapping->backing_dev_info = &dlmfs_backing_dev_info; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; inc_nlink(inode); @@ -367,7 +366,6 @@ static struct inode *dlmfs_get_inode(struct inode *parent, inode->i_mode = mode; inode->i_uid = current_fsuid(); inode->i_gid = current_fsgid(); - inode->i_blocks = 0; inode->i_mapping->backing_dev_info = &dlmfs_backing_dev_info; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c index 6afe57c84f8..633e9dc972b 100644 --- a/fs/omfs/inode.c +++ b/fs/omfs/inode.c @@ -39,7 +39,6 @@ struct inode *omfs_new_inode(struct inode *dir, int mode) inode->i_mode = mode; inode->i_uid = current_fsuid(); inode->i_gid = current_fsgid(); - inode->i_blocks = 0; inode->i_mapping->a_ops = &omfs_aops; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c index d41bdc784de..ffcd04f0012 100644 --- a/fs/openpromfs/inode.c +++ b/fs/openpromfs/inode.c @@ -256,9 +256,6 @@ found: break; } - inode->i_gid = 0; - inode->i_uid = 0; - d_add(dentry, inode); return NULL; } diff --git a/fs/proc/base.c b/fs/proc/base.c index cad92c1ac2b..10fd5223d60 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1426,8 +1426,6 @@ static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_st if (!ei->pid) goto out_unlock; - inode->i_uid = 0; - inode->i_gid = 0; if (task_dumpable(task)) { rcu_read_lock(); cred = __task_cred(task); @@ -2349,8 +2347,6 @@ static struct dentry *proc_base_instantiate(struct inode *dir, if (!ei->pid) goto out_iput; - inode->i_uid = 0; - inode->i_gid = 0; inode->i_mode = p->mode; if (S_ISDIR(inode->i_mode)) inode->i_nlink = 2; diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 06ed10b7da9..94fcfff6863 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -31,7 +31,6 @@ static struct inode *proc_sys_make_inode(struct super_block *sb, inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; inode->i_flags |= S_PRIVATE; /* tell selinux to ignore this inode */ inode->i_mode = table->mode; - inode->i_uid = inode->i_gid = 0; if (!table->child) { inode->i_mode |= S_IFREG; inode->i_op = &proc_sys_inode_operations; diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c index a83a3518ae3..b7e6ac706b8 100644 --- a/fs/ramfs/inode.c +++ b/fs/ramfs/inode.c @@ -57,7 +57,6 @@ struct inode *ramfs_get_inode(struct super_block *sb, int mode, dev_t dev) inode->i_mode = mode; inode->i_uid = current_fsuid(); inode->i_gid = current_fsgid(); - inode->i_blocks = 0; inode->i_mapping->a_ops = &ramfs_aops; inode->i_mapping->backing_dev_info = &ramfs_backing_dev_info; mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER); diff --git a/fs/romfs/inode.c b/fs/romfs/inode.c index 60d2f822e87..c97d4c93171 100644 --- a/fs/romfs/inode.c +++ b/fs/romfs/inode.c @@ -524,7 +524,6 @@ romfs_iget(struct super_block *sb, unsigned long ino) i->i_size = be32_to_cpu(ri.size); i->i_mtime.tv_sec = i->i_atime.tv_sec = i->i_ctime.tv_sec = 0; i->i_mtime.tv_nsec = i->i_atime.tv_nsec = i->i_ctime.tv_nsec = 0; - i->i_uid = i->i_gid = 0; /* Precalculate the data offset */ ino = romfs_strnlen(i, ino+ROMFH_SIZE, ROMFS_MAXFN); diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c index eb53c632f85..dfa3d94cfc7 100644 --- a/fs/sysfs/inode.c +++ b/fs/sysfs/inode.c @@ -107,8 +107,6 @@ int sysfs_setattr(struct dentry * dentry, struct iattr * iattr) static inline void set_default_inode_attr(struct inode * inode, mode_t mode) { inode->i_mode = mode; - inode->i_uid = 0; - inode->i_gid = 0; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; } @@ -149,7 +147,6 @@ static void sysfs_init_inode(struct sysfs_dirent *sd, struct inode *inode) { struct bin_attribute *bin_attr; - inode->i_blocks = 0; inode->i_mapping->a_ops = &sysfs_aops; inode->i_mapping->backing_dev_info = &sysfs_backing_dev_info; inode->i_op = &sysfs_inode_operations; diff --git a/ipc/mqueue.c b/ipc/mqueue.c index d9393f8e4c3..41b72f02fa7 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -120,7 +120,6 @@ static struct inode *mqueue_get_inode(struct super_block *sb, int mode, inode->i_mode = mode; inode->i_uid = current_fsuid(); inode->i_gid = current_fsgid(); - inode->i_blocks = 0; inode->i_mtime = inode->i_ctime = inode->i_atime = CURRENT_TIME; diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 48348dde6d8..f7c5099a057 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -573,7 +573,6 @@ static struct inode *cgroup_new_inode(mode_t mode, struct super_block *sb) inode->i_mode = mode; inode->i_uid = current_fsuid(); inode->i_gid = current_fsgid(); - inode->i_blocks = 0; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; inode->i_mapping->backing_dev_info = &cgroup_backing_dev_info; } diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 19245324887..577385a4a5d 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -522,8 +522,6 @@ rpc_get_inode(struct super_block *sb, int mode) if (!inode) return NULL; inode->i_mode = mode; - inode->i_uid = inode->i_gid = 0; - inode->i_blocks = 0; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; switch(mode & S_IFMT) { case S_IFDIR: diff --git a/security/inode.c b/security/inode.c index efea5a60546..007ef252dde 100644 --- a/security/inode.c +++ b/security/inode.c @@ -61,9 +61,6 @@ static struct inode *get_inode(struct super_block *sb, int mode, dev_t dev) if (inode) { inode->i_mode = mode; - inode->i_uid = 0; - inode->i_gid = 0; - inode->i_blocks = 0; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; switch (mode & S_IFMT) { default: diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c index e5520996a75..8f612c8becb 100644 --- a/security/selinux/selinuxfs.c +++ b/security/selinux/selinuxfs.c @@ -847,8 +847,6 @@ static struct inode *sel_make_inode(struct super_block *sb, int mode) if (ret) { ret->i_mode = mode; - ret->i_uid = ret->i_gid = 0; - ret->i_blocks = 0; ret->i_atime = ret->i_mtime = ret->i_ctime = CURRENT_TIME; } return ret; -- cgit v1.2.3-70-g09d2 From 025dfdafe77f20b3890981a394774baab7b9c827 Mon Sep 17 00:00:00 2001 From: Frederik Schwarzer Date: Thu, 16 Oct 2008 19:02:37 +0200 Subject: trivial: fix then -> than typos in comments and documentation - (better, more, bigger ...) then -> (...) than Signed-off-by: Frederik Schwarzer Signed-off-by: Jiri Kosina --- Documentation/hwmon/abituguru-datasheet | 6 +++--- Documentation/networking/rxrpc.txt | 2 +- Documentation/scsi/ChangeLog.lpfc | 2 +- arch/blackfin/kernel/kgdb.c | 2 +- arch/ia64/kernel/kprobes.c | 2 +- arch/m68k/Kconfig | 2 +- arch/mips/pmc-sierra/yosemite/atmel_read_eeprom.c | 2 +- arch/powerpc/kernel/kprobes.c | 2 +- arch/powerpc/oprofile/cell/spu_profiler.c | 2 +- arch/s390/Kconfig | 2 +- arch/s390/kernel/kprobes.c | 2 +- arch/sparc/kernel/kprobes.c | 2 +- arch/x86/kernel/kprobes.c | 2 +- arch/x86/kernel/mfgpt_32.c | 2 +- drivers/hwmon/fschmd.c | 2 +- drivers/infiniband/hw/mlx4/cq.c | 2 +- drivers/message/i2o/i2o_scsi.c | 2 +- drivers/mtd/devices/pmc551.c | 2 +- drivers/mtd/ubi/eba.c | 2 +- drivers/mtd/ubi/io.c | 2 +- drivers/mtd/ubi/scan.c | 2 +- drivers/mtd/ubi/ubi-media.h | 4 ++-- drivers/mtd/ubi/vtbl.c | 2 +- drivers/mtd/ubi/wl.c | 4 ++-- drivers/net/bnx2x_link.c | 2 +- drivers/net/e1000/e1000_hw.c | 4 ++-- drivers/net/slip.h | 2 +- drivers/net/tehuti.c | 4 ++-- drivers/net/tokenring/smctr.c | 2 +- drivers/net/wireless/ipw2x00/ipw2100.c | 2 +- drivers/net/wireless/rt2x00/rt2x00crypto.c | 4 ++-- drivers/net/wireless/strip.c | 2 +- drivers/s390/block/dasd_eer.c | 4 ++-- drivers/s390/char/vmlogrdr.c | 4 ++-- drivers/scsi/lpfc/lpfc_hbadisc.c | 4 ++-- drivers/scsi/lpfc/lpfc_sli.c | 10 +++++----- drivers/serial/crisv10.c | 4 ++-- drivers/video/console/vgacon.c | 2 +- fs/ocfs2/cluster/heartbeat.c | 2 +- fs/proc/task_nommu.c | 2 +- fs/ubifs/Kconfig | 2 +- fs/ubifs/budget.c | 4 ++-- fs/ubifs/gc.c | 2 +- fs/ubifs/journal.c | 2 +- fs/ubifs/shrinker.c | 2 +- fs/xfs/linux-2.6/xfs_super.c | 2 +- include/linux/mtd/mtd.h | 2 +- include/linux/spi/spi.h | 4 ++-- include/mtd/ubi-user.h | 2 +- kernel/pid.c | 2 +- kernel/time/jiffies.c | 2 +- net/sctp/auth.c | 4 ++-- net/sctp/sm_statefuns.c | 6 +++--- net/sctp/socket.c | 2 +- net/sctp/tsnmap.c | 2 +- sound/usb/usx2y/usbusx2y.c | 2 +- 56 files changed, 76 insertions(+), 76 deletions(-) (limited to 'fs/ocfs2') diff --git a/Documentation/hwmon/abituguru-datasheet b/Documentation/hwmon/abituguru-datasheet index aef5a9b3684..4d184f2db0e 100644 --- a/Documentation/hwmon/abituguru-datasheet +++ b/Documentation/hwmon/abituguru-datasheet @@ -74,7 +74,7 @@ a sensor. Notice that some banks have both a read and a write address this is how the uGuru determines if a read from or a write to the bank is taking place, thus when reading you should always use the read address and when writing the -write address. The write address is always one (1) more then the read address. +write address. The write address is always one (1) more than the read address. uGuru ready @@ -224,7 +224,7 @@ Bit 3: Beep if alarm (RW) Bit 4: 1 if alarm cause measured temp is over the warning threshold (R) Bit 5: 1 if alarm cause measured volt is over the max threshold (R) Bit 6: 1 if alarm cause measured volt is under the min threshold (R) -Bit 7: Volt sensor: Shutdown if alarm persist for more then 4 seconds (RW) +Bit 7: Volt sensor: Shutdown if alarm persist for more than 4 seconds (RW) Temp sensor: Shutdown if temp is over the shutdown threshold (RW) * This bit is only honored/used by the uGuru if a temp sensor is connected @@ -293,7 +293,7 @@ Byte 0: Alarm behaviour for the selected sensor. A 1 enables the described behaviour. Bit 0: Give an alarm if measured rpm is under the min threshold (RW) Bit 3: Beep if alarm (RW) -Bit 7: Shutdown if alarm persist for more then 4 seconds (RW) +Bit 7: Shutdown if alarm persist for more than 4 seconds (RW) Byte 1: min threshold (scale as bank 0x26) diff --git a/Documentation/networking/rxrpc.txt b/Documentation/networking/rxrpc.txt index c3669a3fb4a..60d05eb77c6 100644 --- a/Documentation/networking/rxrpc.txt +++ b/Documentation/networking/rxrpc.txt @@ -540,7 +540,7 @@ A client would issue an operation by: MSG_MORE should be set in msghdr::msg_flags on all but the last part of the request. Multiple requests may be made simultaneously. - If a call is intended to go to a destination other then the default + If a call is intended to go to a destination other than the default specified through connect(), then msghdr::msg_name should be set on the first request message of that call. diff --git a/Documentation/scsi/ChangeLog.lpfc b/Documentation/scsi/ChangeLog.lpfc index ae3f962a7cf..ff19a52fe00 100644 --- a/Documentation/scsi/ChangeLog.lpfc +++ b/Documentation/scsi/ChangeLog.lpfc @@ -733,7 +733,7 @@ Changes from 20040920 to 20041018 I/O completion path a little more, especially taking care of fast-pathing the non-error case. Also removes tons of dead members and defines from lpfc_scsi.h - e.g. lpfc_target is down - to nothing more then the lpfc_nodelist pointer. + to nothing more than the lpfc_nodelist pointer. * Added binary sysfs file to issue mbox commands * Replaced #if __BIG_ENDIAN with #if __BIG_ENDIAN_BITFIELD for compatibility with the user space applications. diff --git a/arch/blackfin/kernel/kgdb.c b/arch/blackfin/kernel/kgdb.c index b795a207742..1c5afaeb950 100644 --- a/arch/blackfin/kernel/kgdb.c +++ b/arch/blackfin/kernel/kgdb.c @@ -105,7 +105,7 @@ void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs) * Extracts ebp, esp and eip values understandable by gdb from the values * saved by switch_to. * thread.esp points to ebp. flags and ebp are pushed in switch_to hence esp - * prior to entering switch_to is 8 greater then the value that is saved. + * prior to entering switch_to is 8 greater than the value that is saved. * If switch_to changes, change following code appropriately. */ void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p) diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c index f07688da947..0017b9de2dd 100644 --- a/arch/ia64/kernel/kprobes.c +++ b/arch/ia64/kernel/kprobes.c @@ -434,7 +434,7 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) /* * It is possible to have multiple instances associated with a given * task either because an multiple functions in the call path - * have a return probe installed on them, and/or more then one return + * have a return probe installed on them, and/or more than one return * return probe was registered for a target function. * * We can handle this because: diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig index c825bde17cb..fb87c08c6b5 100644 --- a/arch/m68k/Kconfig +++ b/arch/m68k/Kconfig @@ -303,7 +303,7 @@ config M68KFPU_EMU_EXTRAPREC correct rounding, the emulator can (often) do the same but this extra calculation can cost quite some time, so you can disable it here. The emulator will then "only" calculate with a 64 bit - mantissa and round slightly incorrect, what is more then enough + mantissa and round slightly incorrect, what is more than enough for normal usage. config M68KFPU_EMU_ONLY diff --git a/arch/mips/pmc-sierra/yosemite/atmel_read_eeprom.c b/arch/mips/pmc-sierra/yosemite/atmel_read_eeprom.c index 97862f45496..caf5e9a0acc 100644 --- a/arch/mips/pmc-sierra/yosemite/atmel_read_eeprom.c +++ b/arch/mips/pmc-sierra/yosemite/atmel_read_eeprom.c @@ -148,7 +148,7 @@ int read_eeprom(char *buffer, int eeprom_size, int size) send_byte(W_HEADER); recv_ack(); - /* EEPROM with size of more then 2K need two byte addressing */ + /* EEPROM with size of more than 2K need two byte addressing */ if (eeprom_size > 2048) { send_byte(0x00); recv_ack(); diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index de79915452c..b29005a5a8f 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -316,7 +316,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p, /* * It is possible to have multiple instances associated with a given * task either because an multiple functions in the call path - * have a return probe installed on them, and/or more then one return + * have a return probe installed on them, and/or more than one return * return probe was registered for a target function. * * We can handle this because: diff --git a/arch/powerpc/oprofile/cell/spu_profiler.c b/arch/powerpc/oprofile/cell/spu_profiler.c index dd499c3e9da..83faa958b9d 100644 --- a/arch/powerpc/oprofile/cell/spu_profiler.c +++ b/arch/powerpc/oprofile/cell/spu_profiler.c @@ -49,7 +49,7 @@ void set_spu_profiling_frequency(unsigned int freq_khz, unsigned int cycles_rese * of precision. This is close enough for the purpose at hand. * * The value of the timeout should be small enough that the hw - * trace buffer will not get more then about 1/3 full for the + * trace buffer will not get more than about 1/3 full for the * maximum user specified (the LFSR value) hw sampling frequency. * This is to ensure the trace buffer will never fill even if the * kernel thread scheduling varies under a heavy system load. diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 19577aeffd7..a94a3c3ae93 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -299,7 +299,7 @@ config WARN_STACK This option enables the compiler options -mwarn-framesize and -mwarn-dynamicstack. If the compiler supports these options it will generate warnings for function which either use alloca or - create a stack frame bigger then CONFIG_WARN_STACK_SIZE. + create a stack frame bigger than CONFIG_WARN_STACK_SIZE. Say N if you are unsure. diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c index 569079ec4ff..267f6698680 100644 --- a/arch/s390/kernel/kprobes.c +++ b/arch/s390/kernel/kprobes.c @@ -381,7 +381,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p, /* * It is possible to have multiple instances associated with a given * task either because an multiple functions in the call path - * have a return probe installed on them, and/or more then one return + * have a return probe installed on them, and/or more than one return * return probe was registered for a target function. * * We can handle this because: diff --git a/arch/sparc/kernel/kprobes.c b/arch/sparc/kernel/kprobes.c index 201a6e547e4..3bc6527c95a 100644 --- a/arch/sparc/kernel/kprobes.c +++ b/arch/sparc/kernel/kprobes.c @@ -517,7 +517,7 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) /* * It is possible to have multiple instances associated with a given * task either because an multiple functions in the call path - * have a return probe installed on them, and/or more then one return + * have a return probe installed on them, and/or more than one return * return probe was registered for a target function. * * We can handle this because: diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index 6c27679ec6a..a116e6d5726 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c @@ -694,7 +694,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs) /* * It is possible to have multiple instances associated with a given * task either because multiple functions in the call path have - * return probes installed on them, and/or more then one + * return probes installed on them, and/or more than one * return probe was registered for a target function. * * We can handle this because: diff --git a/arch/x86/kernel/mfgpt_32.c b/arch/x86/kernel/mfgpt_32.c index c12314c9e86..8815f3c7fec 100644 --- a/arch/x86/kernel/mfgpt_32.c +++ b/arch/x86/kernel/mfgpt_32.c @@ -252,7 +252,7 @@ EXPORT_SYMBOL_GPL(geode_mfgpt_alloc_timer); /* * The MFPGT timers on the CS5536 provide us with suitable timers to use * as clock event sources - not as good as a HPET or APIC, but certainly - * better then the PIT. This isn't a general purpose MFGPT driver, but + * better than the PIT. This isn't a general purpose MFGPT driver, but * a simplified one designed specifically to act as a clock event source. * For full details about the MFGPT, please consult the CS5536 data sheet. */ diff --git a/drivers/hwmon/fschmd.c b/drivers/hwmon/fschmd.c index 96717036893..8b2d756595d 100644 --- a/drivers/hwmon/fschmd.c +++ b/drivers/hwmon/fschmd.c @@ -75,7 +75,7 @@ static const u8 FSCHMD_REG_VOLT[3] = { 0x45, 0x42, 0x48 }; /* minimum pwm at which the fan is driven (pwm can by increased depending on the temp. Notice that for the scy some fans share there minimum speed. - Also notice that with the scy the sensor order is different then with the + Also notice that with the scy the sensor order is different than with the other chips, this order was in the 2.4 driver and kept for consistency. */ static const u8 FSCHMD_REG_FAN_MIN[5][6] = { { 0x55, 0x65 }, /* pos */ diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c index a3c5af1d7ec..de5263beab4 100644 --- a/drivers/infiniband/hw/mlx4/cq.c +++ b/drivers/infiniband/hw/mlx4/cq.c @@ -367,7 +367,7 @@ int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata) if (err) goto out; } else { - /* Can't be smaller then the number of outstanding CQEs */ + /* Can't be smaller than the number of outstanding CQEs */ outst_cqe = mlx4_ib_get_outstanding_cqes(cq); if (entries < outst_cqe + 1) { err = 0; diff --git a/drivers/message/i2o/i2o_scsi.c b/drivers/message/i2o/i2o_scsi.c index 1bcdbbb9e7d..3d45817e6dc 100644 --- a/drivers/message/i2o/i2o_scsi.c +++ b/drivers/message/i2o/i2o_scsi.c @@ -390,7 +390,7 @@ static int i2o_scsi_reply(struct i2o_controller *c, u32 m, * @i2o_dev: the I2O device which was added * * If a I2O device is added we catch the notification, because I2O classes - * other then SCSI peripheral will not be received through + * other than SCSI peripheral will not be received through * i2o_scsi_probe(). */ static void i2o_scsi_notify_device_add(struct i2o_device *i2o_dev) diff --git a/drivers/mtd/devices/pmc551.c b/drivers/mtd/devices/pmc551.c index d38bca64bb1..d2fd550f7e0 100644 --- a/drivers/mtd/devices/pmc551.c +++ b/drivers/mtd/devices/pmc551.c @@ -34,7 +34,7 @@ * aperture size, not the dram size, and the V370PDC supplies no * other method for memory size discovery. This problem is * mostly only relevant when compiled as a module, as the - * unloading of the module with an aperture size smaller then + * unloading of the module with an aperture size smaller than * the ram will cause the driver to detect the onboard memory * size to be equal to the aperture size when the module is * reloaded. Soooo, to help, the module supports an msize diff --git a/drivers/mtd/ubi/eba.c b/drivers/mtd/ubi/eba.c index 048a606cebd..25def348e5b 100644 --- a/drivers/mtd/ubi/eba.c +++ b/drivers/mtd/ubi/eba.c @@ -717,7 +717,7 @@ write_error: * to the real data size, although the @buf buffer has to contain the * alignment. In all other cases, @len has to be aligned. * - * It is prohibited to write more then once to logical eraseblocks of static + * It is prohibited to write more than once to logical eraseblocks of static * volumes. This function returns zero in case of success and a negative error * code in case of failure. */ diff --git a/drivers/mtd/ubi/io.c b/drivers/mtd/ubi/io.c index a74118c0574..fe81039f2a7 100644 --- a/drivers/mtd/ubi/io.c +++ b/drivers/mtd/ubi/io.c @@ -465,7 +465,7 @@ out: * This function synchronously erases physical eraseblock @pnum. If @torture * flag is not zero, the physical eraseblock is checked by means of writing * different patterns to it and reading them back. If the torturing is enabled, - * the physical eraseblock is erased more then once. + * the physical eraseblock is erased more than once. * * This function returns the number of erasures made in case of success, %-EIO * if the erasure failed or the torturing test failed, and other negative error diff --git a/drivers/mtd/ubi/scan.c b/drivers/mtd/ubi/scan.c index 41d47e1cf15..ecde202a5a1 100644 --- a/drivers/mtd/ubi/scan.c +++ b/drivers/mtd/ubi/scan.c @@ -478,7 +478,7 @@ int ubi_scan_add_used(struct ubi_device *ubi, struct ubi_scan_info *si, return 0; } else { /* - * This logical eraseblock is older then the one found + * This logical eraseblock is older than the one found * previously. */ if (cmp_res & 4) diff --git a/drivers/mtd/ubi/ubi-media.h b/drivers/mtd/ubi/ubi-media.h index 2ad94040905..8419fdccc79 100644 --- a/drivers/mtd/ubi/ubi-media.h +++ b/drivers/mtd/ubi/ubi-media.h @@ -135,7 +135,7 @@ enum { * The erase counter header takes 64 bytes and has a plenty of unused space for * future usage. The unused fields are zeroed. The @version field is used to * indicate the version of UBI implementation which is supposed to be able to - * work with this UBI image. If @version is greater then the current UBI + * work with this UBI image. If @version is greater than the current UBI * version, the image is rejected. This may be useful in future if something * is changed radically. This field is duplicated in the volume identifier * header. @@ -187,7 +187,7 @@ struct ubi_ec_hdr { * (sequence number) is used to distinguish between older and newer versions of * logical eraseblocks. * - * There are 2 situations when there may be more then one physical eraseblock + * There are 2 situations when there may be more than one physical eraseblock * corresponding to the same logical eraseblock, i.e., having the same @vol_id * and @lnum values in the volume identifier header. Suppose we have a logical * eraseblock L and it is mapped to the physical eraseblock P. diff --git a/drivers/mtd/ubi/vtbl.c b/drivers/mtd/ubi/vtbl.c index 333c8941552..1afc61e7455 100644 --- a/drivers/mtd/ubi/vtbl.c +++ b/drivers/mtd/ubi/vtbl.c @@ -577,7 +577,7 @@ static int init_volumes(struct ubi_device *ubi, const struct ubi_scan_info *si, if (vtbl[i].flags & UBI_VTBL_AUTORESIZE_FLG) { /* Auto re-size flag may be set only for one volume */ if (ubi->autoresize_vol_id != -1) { - ubi_err("more then one auto-resize volume (%d " + ubi_err("more than one auto-resize volume (%d " "and %d)", ubi->autoresize_vol_id, i); kfree(vol); return -EINVAL; diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c index 14901cb82c1..891534f8210 100644 --- a/drivers/mtd/ubi/wl.c +++ b/drivers/mtd/ubi/wl.c @@ -128,7 +128,7 @@ * situation when the picked physical eraseblock is constantly erased after the * data is written to it. So, we have a constant which limits the highest erase * counter of the free physical eraseblock to pick. Namely, the WL sub-system - * does not pick eraseblocks with erase counter greater then the lowest erase + * does not pick eraseblocks with erase counter greater than the lowest erase * counter plus %WL_FREE_MAX_DIFF. */ #define WL_FREE_MAX_DIFF (2*UBI_WL_THRESHOLD) @@ -917,7 +917,7 @@ static int ensure_wear_leveling(struct ubi_device *ubi) /* * We schedule wear-leveling only if the difference between the * lowest erase counter of used physical eraseblocks and a high - * erase counter of free physical eraseblocks is greater then + * erase counter of free physical eraseblocks is greater than * %UBI_WL_THRESHOLD. */ e1 = rb_entry(rb_first(&ubi->used), struct ubi_wl_entry, u.rb); diff --git a/drivers/net/bnx2x_link.c b/drivers/net/bnx2x_link.c index 67de94f1f30..fefa6ab1306 100644 --- a/drivers/net/bnx2x_link.c +++ b/drivers/net/bnx2x_link.c @@ -3359,7 +3359,7 @@ static u8 bnx2x_format_ver(u32 num, u8 *str, u16 len) u8 shift = 8*4; u8 digit; if (len < 10) { - /* Need more then 10chars for this format */ + /* Need more than 10chars for this format */ *str_ptr = '\0'; return -EINVAL; } diff --git a/drivers/net/e1000/e1000_hw.c b/drivers/net/e1000/e1000_hw.c index d04eef53571..e1a3fc1303e 100644 --- a/drivers/net/e1000/e1000_hw.c +++ b/drivers/net/e1000/e1000_hw.c @@ -6758,7 +6758,7 @@ static s32 e1000_get_cable_length(struct e1000_hw *hw, u16 *min_length, * returns: - E1000_ERR_XXX * E1000_SUCCESS * - * For phy's older then IGP, this function simply reads the polarity bit in the + * For phy's older than IGP, this function simply reads the polarity bit in the * Phy Status register. For IGP phy's, this bit is valid only if link speed is * 10 Mbps. If the link speed is 100 Mbps there is no polarity so this bit will * return 0. If the link speed is 1000 Mbps the polarity status is in the @@ -6834,7 +6834,7 @@ static s32 e1000_check_polarity(struct e1000_hw *hw, * returns: - E1000_ERR_XXX * E1000_SUCCESS * - * For phy's older then IGP, this function reads the Downshift bit in the Phy + * For phy's older than IGP, this function reads the Downshift bit in the Phy * Specific Status register. For IGP phy's, it reads the Downgrade bit in the * Link Health register. In IGP this bit is latched high, so the driver must * read it immediately after link is established. diff --git a/drivers/net/slip.h b/drivers/net/slip.h index 853e0f6ec71..9ea5c11287d 100644 --- a/drivers/net/slip.h +++ b/drivers/net/slip.h @@ -75,7 +75,7 @@ struct slip { unsigned long tx_errors; /* Planned stuff */ unsigned long rx_dropped; /* No memory for skb */ unsigned long tx_dropped; /* When MTU change */ - unsigned long rx_over_errors; /* Frame bigger then SLIP buf. */ + unsigned long rx_over_errors; /* Frame bigger than SLIP buf. */ #ifdef SL_INCLUDE_CSLIP unsigned long tx_compressed; unsigned long rx_compressed; diff --git a/drivers/net/tehuti.c b/drivers/net/tehuti.c index a10a83a11d9..a7a4dc4d631 100644 --- a/drivers/net/tehuti.c +++ b/drivers/net/tehuti.c @@ -1004,7 +1004,7 @@ static inline void bdx_rxdb_free_elem(struct rxdb *db, int n) * skb for rx. It assumes that Rx is desabled in HW * funcs are grouped for better cache usage * - * RxD fifo is smaller then RxF fifo by design. Upon high load, RxD will be + * RxD fifo is smaller than RxF fifo by design. Upon high load, RxD will be * filled and packets will be dropped by nic without getting into host or * cousing interrupt. Anyway, in that condition, host has no chance to proccess * all packets, but dropping in nic is cheaper, since it takes 0 cpu cycles @@ -1826,7 +1826,7 @@ static void bdx_tx_free(struct bdx_priv *priv) * * Pushes desc to TxD fifo and overlaps it if needed. * NOTE: this func does not check for available space. this is responsibility - * of the caller. Neither does it check that data size is smaller then + * of the caller. Neither does it check that data size is smaller than * fifo size. */ static void bdx_tx_push_desc(struct bdx_priv *priv, void *data, int size) diff --git a/drivers/net/tokenring/smctr.c b/drivers/net/tokenring/smctr.c index a011666342f..50eb29ce3c8 100644 --- a/drivers/net/tokenring/smctr.c +++ b/drivers/net/tokenring/smctr.c @@ -3064,7 +3064,7 @@ static int smctr_load_node_addr(struct net_device *dev) * will consequently cause a timeout. * * NOTE 1: If the monitor_state is MS_BEACON_TEST_STATE, all transmit - * queues other then the one used for the lobe_media_test should be + * queues other than the one used for the lobe_media_test should be * disabled.!? * * NOTE 2: If the monitor_state is MS_BEACON_TEST_STATE and the receive_mask diff --git a/drivers/net/wireless/ipw2x00/ipw2100.c b/drivers/net/wireless/ipw2x00/ipw2100.c index 1667065b86a..753de1a9c4b 100644 --- a/drivers/net/wireless/ipw2x00/ipw2100.c +++ b/drivers/net/wireless/ipw2x00/ipw2100.c @@ -1332,7 +1332,7 @@ static int ipw2100_power_cycle_adapter(struct ipw2100_priv *priv) IPW_AUX_HOST_RESET_REG_STOP_MASTER); /* Step 2. Wait for stop Master Assert - * (not more then 50us, otherwise ret error */ + * (not more than 50us, otherwise ret error */ i = 5; do { udelay(IPW_WAIT_RESET_MASTER_ASSERT_COMPLETE_DELAY); diff --git a/drivers/net/wireless/rt2x00/rt2x00crypto.c b/drivers/net/wireless/rt2x00/rt2x00crypto.c index 37ad0d2fb64..aee9cba13eb 100644 --- a/drivers/net/wireless/rt2x00/rt2x00crypto.c +++ b/drivers/net/wireless/rt2x00/rt2x00crypto.c @@ -184,8 +184,8 @@ void rt2x00crypto_rx_insert_iv(struct sk_buff *skb, unsigned int align, * Make room for new data, note that we increase both * headsize and tailsize when required. The tailsize is * only needed when ICV data needs to be inserted and - * the padding is smaller then the ICV data. - * When alignment requirements is greater then the + * the padding is smaller than the ICV data. + * When alignment requirements is greater than the * ICV data we must trim the skb to the correct size * because we need to remove the extra bytes. */ diff --git a/drivers/net/wireless/strip.c b/drivers/net/wireless/strip.c index dd0de3a9ed4..7015f248055 100644 --- a/drivers/net/wireless/strip.c +++ b/drivers/net/wireless/strip.c @@ -236,7 +236,7 @@ struct strip { unsigned long tx_errors; /* Planned stuff */ unsigned long rx_dropped; /* No memory for skb */ unsigned long tx_dropped; /* When MTU change */ - unsigned long rx_over_errors; /* Frame bigger then STRIP buf. */ + unsigned long rx_over_errors; /* Frame bigger than STRIP buf. */ unsigned long pps_timer; /* Timer to determine pps */ unsigned long rx_pps_count; /* Counter to determine pps */ diff --git a/drivers/s390/block/dasd_eer.c b/drivers/s390/block/dasd_eer.c index 892e2878d61..f8e05ce9862 100644 --- a/drivers/s390/block/dasd_eer.c +++ b/drivers/s390/block/dasd_eer.c @@ -535,8 +535,8 @@ static int dasd_eer_open(struct inode *inp, struct file *filp) eerb->buffer_page_count > INT_MAX / PAGE_SIZE) { kfree(eerb); MESSAGE(KERN_WARNING, "can't open device since module " - "parameter eer_pages is smaller then 1 or" - " bigger then %d", (int)(INT_MAX / PAGE_SIZE)); + "parameter eer_pages is smaller than 1 or" + " bigger than %d", (int)(INT_MAX / PAGE_SIZE)); unlock_kernel(); return -EINVAL; } diff --git a/drivers/s390/char/vmlogrdr.c b/drivers/s390/char/vmlogrdr.c index aabbeb909cc..d8a2289fcb6 100644 --- a/drivers/s390/char/vmlogrdr.c +++ b/drivers/s390/char/vmlogrdr.c @@ -427,7 +427,7 @@ static int vmlogrdr_receive_data(struct vmlogrdr_priv_t *priv) buffer = priv->buffer + sizeof(int); } /* - * If the record is bigger then our buffer, we receive only + * If the record is bigger than our buffer, we receive only * a part of it. We can get the rest later. */ if (iucv_data_count > NET_BUFFER_SIZE) @@ -437,7 +437,7 @@ static int vmlogrdr_receive_data(struct vmlogrdr_priv_t *priv) 0, buffer, iucv_data_count, &priv->residual_length); spin_unlock_bh(&priv->priv_lock); - /* An rc of 5 indicates that the record was bigger then + /* An rc of 5 indicates that the record was bigger than * the buffer, which is OK for us. A 9 indicates that the * record was purged befor we could receive it. */ diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c index 8c64494444b..311ed6dea72 100644 --- a/drivers/scsi/lpfc/lpfc_hbadisc.c +++ b/drivers/scsi/lpfc/lpfc_hbadisc.c @@ -1964,10 +1964,10 @@ lpfc_set_disctmo(struct lpfc_vport *vport) uint32_t tmo; if (vport->port_state == LPFC_LOCAL_CFG_LINK) { - /* For FAN, timeout should be greater then edtov */ + /* For FAN, timeout should be greater than edtov */ tmo = (((phba->fc_edtov + 999) / 1000) + 1); } else { - /* Normal discovery timeout should be > then ELS/CT timeout + /* Normal discovery timeout should be > than ELS/CT timeout * FC spec states we need 3 * ratov for CT requests */ tmo = ((phba->fc_ratov * 3) + 3); diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index 01dfdc8696f..a36a120561e 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -420,7 +420,7 @@ lpfc_sli_next_iocb_slot (struct lpfc_hba *phba, struct lpfc_sli_ring *pring) if (unlikely(pring->local_getidx >= max_cmd_idx)) { lpfc_printf_log(phba, KERN_ERR, LOG_SLI, "0315 Ring %d issue: portCmdGet %d " - "is bigger then cmd ring %d\n", + "is bigger than cmd ring %d\n", pring->ringno, pring->local_getidx, max_cmd_idx); @@ -1628,12 +1628,12 @@ lpfc_sli_rsp_pointers_error(struct lpfc_hba *phba, struct lpfc_sli_ring *pring) { struct lpfc_pgp *pgp = &phba->port_gp[pring->ringno]; /* - * Ring handler: portRspPut is bigger then + * Ring handler: portRspPut is bigger than * rsp ring */ lpfc_printf_log(phba, KERN_ERR, LOG_SLI, "0312 Ring %d handler: portRspPut %d " - "is bigger then rsp ring %d\n", + "is bigger than rsp ring %d\n", pring->ringno, le32_to_cpu(pgp->rspPutInx), pring->numRiocb); @@ -2083,12 +2083,12 @@ lpfc_sli_handle_slow_ring_event(struct lpfc_hba *phba, portRspPut = le32_to_cpu(pgp->rspPutInx); if (portRspPut >= portRspMax) { /* - * Ring handler: portRspPut is bigger then + * Ring handler: portRspPut is bigger than * rsp ring */ lpfc_printf_log(phba, KERN_ERR, LOG_SLI, "0303 Ring %d handler: portRspPut %d " - "is bigger then rsp ring %d\n", + "is bigger than rsp ring %d\n", pring->ringno, portRspPut, portRspMax); phba->link_state = LPFC_HBA_ERROR; diff --git a/drivers/serial/crisv10.c b/drivers/serial/crisv10.c index 8b2c619a09f..e642c22c80e 100644 --- a/drivers/serial/crisv10.c +++ b/drivers/serial/crisv10.c @@ -1203,7 +1203,7 @@ static void e100_disable_txdma_channel(struct e100_serial *info) unsigned long flags; /* Disable output DMA channel for the serial port in question - * ( set to something other then serialX) + * ( set to something other than serialX) */ local_irq_save(flags); DFLOW(DEBUG_LOG(info->line, "disable_txdma_channel %i\n", info->line)); @@ -1266,7 +1266,7 @@ static void e100_disable_rxdma_channel(struct e100_serial *info) unsigned long flags; /* Disable input DMA channel for the serial port in question - * ( set to something other then serialX) + * ( set to something other than serialX) */ local_irq_save(flags); if (info->line == 0) { diff --git a/drivers/video/console/vgacon.c b/drivers/video/console/vgacon.c index e6210725b9a..d012edda6d1 100644 --- a/drivers/video/console/vgacon.c +++ b/drivers/video/console/vgacon.c @@ -1332,7 +1332,7 @@ static void vgacon_save_screen(struct vc_data *c) c->vc_y = screen_info.orig_y; } - /* We can't copy in more then the size of the video buffer, + /* We can't copy in more than the size of the video buffer, * or we'll be copying in VGA BIOS */ if (!vga_is_gfx) diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index 6ebaa58e2c0..04697ba7f73 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -854,7 +854,7 @@ static int o2hb_thread(void *data) while (!kthread_should_stop() && !reg->hr_unclean_stop) { /* We track the time spent inside - * o2hb_do_disk_heartbeat so that we avoid more then + * o2hb_do_disk_heartbeat so that we avoid more than * hr_timeout_ms between disk writes. On busy systems * this should result in a heartbeat which is less * likely to time itself out. */ diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c index 219bd79ea89..d4a8be32b90 100644 --- a/fs/proc/task_nommu.c +++ b/fs/proc/task_nommu.c @@ -9,7 +9,7 @@ /* * Logic: we've got two memory sums for each process, "shared", and - * "non-shared". Shared memory may get counted more then once, for + * "non-shared". Shared memory may get counted more than once, for * each process that owns it. Non-shared memory is counted * accurately. */ diff --git a/fs/ubifs/Kconfig b/fs/ubifs/Kconfig index 91ceeda7e5b..e35b54d5059 100644 --- a/fs/ubifs/Kconfig +++ b/fs/ubifs/Kconfig @@ -40,7 +40,7 @@ config UBIFS_FS_ZLIB depends on UBIFS_FS default y help - Zlib copresses better then LZO but it is slower. Say 'Y' if unsure. + Zlib compresses better than LZO but it is slower. Say 'Y' if unsure. # Debugging-related stuff config UBIFS_FS_DEBUG diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c index 0e5e54d8292..175f9c590b7 100644 --- a/fs/ubifs/budget.c +++ b/fs/ubifs/budget.c @@ -142,7 +142,7 @@ static long long get_liability(struct ubifs_info *c) * * This function is called when an operation cannot be budgeted because there * is supposedly no free space. But in most cases there is some free space: - * o budgeting is pessimistic, so it always budgets more then it is actually + * o budgeting is pessimistic, so it always budgets more than it is actually * needed, so shrinking the liability is one way to make free space - the * cached data will take less space then it was budgeted for; * o GC may turn some dark space into free space (budgeting treats dark space @@ -606,7 +606,7 @@ void ubifs_release_budget(struct ubifs_info *c, struct ubifs_budget_req *req) * @c: UBIFS file-system description object * * This function converts budget which was allocated for a new page of data to - * the budget of changing an existing page of data. The latter is smaller then + * the budget of changing an existing page of data. The latter is smaller than * the former, so this function only does simple re-calculation and does not * involve any write-back. */ diff --git a/fs/ubifs/gc.c b/fs/ubifs/gc.c index 0bef6501d58..9832f9abe28 100644 --- a/fs/ubifs/gc.c +++ b/fs/ubifs/gc.c @@ -45,7 +45,7 @@ #define SMALL_NODE_WM UBIFS_MAX_DENT_NODE_SZ /* - * GC may need to move more then one LEB to make progress. The below constants + * GC may need to move more than one LEB to make progress. The below constants * define "soft" and "hard" limits on the number of LEBs the garbage collector * may move. */ diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c index 10ae25b7d1d..9b7c54e0cd2 100644 --- a/fs/ubifs/journal.c +++ b/fs/ubifs/journal.c @@ -191,7 +191,7 @@ again: if (wbuf->lnum != -1 && avail >= len) { /* * Someone else has switched the journal head and we have - * enough space now. This happens when more then one process is + * enough space now. This happens when more than one process is * trying to write to the same journal head at the same time. */ dbg_jnl("return LEB %d back, already have LEB %d:%d", diff --git a/fs/ubifs/shrinker.c b/fs/ubifs/shrinker.c index f248533841a..e7bab52a141 100644 --- a/fs/ubifs/shrinker.c +++ b/fs/ubifs/shrinker.c @@ -151,7 +151,7 @@ static int shrink_tnc(struct ubifs_info *c, int nr, int age, int *contention) * @contention: if any contention, this is set to %1 * * This function walks the list of mounted UBIFS file-systems and frees clean - * znodes which are older then @age, until at least @nr znodes are freed. + * znodes which are older than @age, until at least @nr znodes are freed. * Returns the number of freed znodes. */ static int shrink_tnc_trees(int nr, int age, int *contention) diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 36f6cc703ef..be846d606ae 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -1348,7 +1348,7 @@ xfs_finish_flags( { int ronly = (mp->m_flags & XFS_MOUNT_RDONLY); - /* Fail a mount where the logbuf is smaller then the log stripe */ + /* Fail a mount where the logbuf is smaller than the log stripe */ if (xfs_sb_version_haslogv2(&mp->m_sb)) { if (mp->m_logbsize <= 0 && mp->m_sb.sb_logsunit > XLOG_BIG_RECORD_BSIZE) { diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h index eae26bb6430..64433eb411d 100644 --- a/include/linux/mtd/mtd.h +++ b/include/linux/mtd/mtd.h @@ -83,7 +83,7 @@ typedef enum { * @datbuf: data buffer - if NULL only oob data are read/written * @oobbuf: oob data buffer * - * Note, it is allowed to read more then one OOB area at one go, but not write. + * Note, it is allowed to read more than one OOB area at one go, but not write. * The interface assumes that the OOB write requests program only one page's * OOB area. */ diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 82229317753..68bb1c501d0 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -327,9 +327,9 @@ extern struct spi_master *spi_busnum_to_master(u16 busnum); * @tx_dma: DMA address of tx_buf, if @spi_message.is_dma_mapped * @rx_dma: DMA address of rx_buf, if @spi_message.is_dma_mapped * @len: size of rx and tx buffers (in bytes) - * @speed_hz: Select a speed other then the device default for this + * @speed_hz: Select a speed other than the device default for this * transfer. If 0 the default (from @spi_device) is used. - * @bits_per_word: select a bits_per_word other then the device default + * @bits_per_word: select a bits_per_word other than the device default * for this transfer. If 0 the default (from @spi_device) is used. * @cs_change: affects chipselect after this transfer completes * @delay_usecs: microseconds to delay after this transfer before diff --git a/include/mtd/ubi-user.h b/include/mtd/ubi-user.h index ccdc562e444..2dc2eb2b8e2 100644 --- a/include/mtd/ubi-user.h +++ b/include/mtd/ubi-user.h @@ -253,7 +253,7 @@ struct ubi_mkvol_req { * * Re-sizing is possible for both dynamic and static volumes. But while dynamic * volumes may be re-sized arbitrarily, static volumes cannot be made to be - * smaller then the number of bytes they bear. To arbitrarily shrink a static + * smaller than the number of bytes they bear. To arbitrarily shrink a static * volume, it must be wiped out first (by means of volume update operation with * zero number of bytes). */ diff --git a/kernel/pid.c b/kernel/pid.c index 064e76afa50..af9224cdd6c 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -475,7 +475,7 @@ pid_t task_session_nr_ns(struct task_struct *tsk, struct pid_namespace *ns) EXPORT_SYMBOL(task_session_nr_ns); /* - * Used by proc to find the first pid that is greater then or equal to nr. + * Used by proc to find the first pid that is greater than or equal to nr. * * If there is a pid at nr this function is exactly the same as find_pid_ns. */ diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c index 1ca99557e92..06f197560f3 100644 --- a/kernel/time/jiffies.c +++ b/kernel/time/jiffies.c @@ -45,7 +45,7 @@ * * The value 8 is somewhat carefully chosen, as anything * larger can result in overflows. NSEC_PER_JIFFY grows as - * HZ shrinks, so values greater then 8 overflow 32bits when + * HZ shrinks, so values greater than 8 overflow 32bits when * HZ=100. */ #define JIFFIES_SHIFT 8 diff --git a/net/sctp/auth.c b/net/sctp/auth.c index 52db5f60daa..20c576f530f 100644 --- a/net/sctp/auth.c +++ b/net/sctp/auth.c @@ -141,8 +141,8 @@ void sctp_auth_destroy_keys(struct list_head *keys) /* Compare two byte vectors as numbers. Return values * are: * 0 - vectors are equal - * < 0 - vector 1 is smaller then vector2 - * > 0 - vector 1 is greater then vector2 + * < 0 - vector 1 is smaller than vector2 + * > 0 - vector 1 is greater than vector2 * * Algorithm is: * This is performed by selecting the numerically smaller key vector... diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 1c4e5d6c29c..3a0cd075914 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -4268,9 +4268,9 @@ nomem: /* * Handle a protocol violation when the chunk length is invalid. - * "Invalid" length is identified as smaller then the minimal length a + * "Invalid" length is identified as smaller than the minimal length a * given chunk can be. For example, a SACK chunk has invalid length - * if it's length is set to be smaller then the size of sctp_sack_chunk_t. + * if its length is set to be smaller than the size of sctp_sack_chunk_t. * * We inform the other end by sending an ABORT with a Protocol Violation * error code. @@ -4300,7 +4300,7 @@ static sctp_disposition_t sctp_sf_violation_chunklen( /* * Handle a protocol violation when the parameter length is invalid. - * "Invalid" length is identified as smaller then the minimal length a + * "Invalid" length is identified as smaller than the minimal length a * given parameter can be. */ static sctp_disposition_t sctp_sf_violation_paramlen( diff --git a/net/sctp/socket.c b/net/sctp/socket.c index b14a8f33e42..ff0a8f88de0 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -2717,7 +2717,7 @@ static int sctp_setsockopt_associnfo(struct sock *sk, char __user *optval, int o paths++; } - /* Only validate asocmaxrxt if we have more then + /* Only validate asocmaxrxt if we have more than * one path/transport. We do this because path * retransmissions are only counted when we have more * then one path. diff --git a/net/sctp/tsnmap.c b/net/sctp/tsnmap.c index 35c73e82553..9bd64565021 100644 --- a/net/sctp/tsnmap.c +++ b/net/sctp/tsnmap.c @@ -227,7 +227,7 @@ void sctp_tsnmap_skip(struct sctp_tsnmap *map, __u32 tsn) */ bitmap_zero(map->tsn_map, map->len); } else { - /* If the gap is smaller then the map size, + /* If the gap is smaller than the map size, * shift the map by 'gap' bits and update further. */ bitmap_shift_right(map->tsn_map, map->tsn_map, gap, map->len); diff --git a/sound/usb/usx2y/usbusx2y.c b/sound/usb/usx2y/usbusx2y.c index ca26c532e77..11639bd72a5 100644 --- a/sound/usb/usx2y/usbusx2y.c +++ b/sound/usb/usx2y/usbusx2y.c @@ -238,7 +238,7 @@ static void i_usX2Y_In04Int(struct urb *urb) send = 0; for (j = 0; j < URBS_AsyncSeq && !err; ++j) if (0 == usX2Y->AS04.urb[j]->status) { - struct us428_p4out *p4out = us428ctls->p4out + send; // FIXME if more then 1 p4out is new, 1 gets lost. + struct us428_p4out *p4out = us428ctls->p4out + send; // FIXME if more than 1 p4out is new, 1 gets lost. usb_fill_bulk_urb(usX2Y->AS04.urb[j], usX2Y->chip.dev, usb_sndbulkpipe(usX2Y->chip.dev, 0x04), &p4out->val.vol, p4out->type == eLT_Light ? sizeof(struct us428_lights) : 5, -- cgit v1.2.3-70-g09d2 From c19a28e1193a6c854738d609ae9b2fe2f6e6bea4 Mon Sep 17 00:00:00 2001 From: Fernando Carrijo Date: Wed, 7 Jan 2009 18:09:08 -0800 Subject: remove lots of double-semicolons Cc: Ingo Molnar Cc: Thomas Gleixner Acked-by: Theodore Ts'o Acked-by: Mark Fasheh Acked-by: David S. Miller Cc: James Morris Acked-by: Casey Schaufler Acked-by: Takashi Iwai Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/kernel/cpu/cpufreq/longhaul.c | 2 +- fs/ocfs2/alloc.c | 2 +- fs/ocfs2/file.c | 2 +- net/ipv6/route.c | 2 +- net/ipv6/sysctl_net_ipv6.c | 2 +- net/sched/sch_sfq.c | 2 +- security/smack/smackfs.c | 2 +- sound/soc/au1x/dbdma2.c | 2 +- sound/soc/davinci/davinci-pcm.c | 2 +- 9 files changed, 9 insertions(+), 9 deletions(-) (limited to 'fs/ocfs2') diff --git a/arch/x86/kernel/cpu/cpufreq/longhaul.c b/arch/x86/kernel/cpu/cpufreq/longhaul.c index b0461856acf..a4cff5d6e38 100644 --- a/arch/x86/kernel/cpu/cpufreq/longhaul.c +++ b/arch/x86/kernel/cpu/cpufreq/longhaul.c @@ -982,7 +982,7 @@ static int __init longhaul_init(void) case 10: printk(KERN_ERR PFX "Use acpi-cpufreq driver for VIA C7\n"); default: - ;; + ; } return -ENODEV; diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 54ff4c77aaa..d861096c9d8 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -3868,7 +3868,7 @@ static void ocfs2_split_record(struct inode *inode, struct ocfs2_extent_list *left_el = NULL, *right_el, *insert_el, *el; struct ocfs2_extent_rec *rec, *tmprec; - right_el = path_leaf_el(right_path);; + right_el = path_leaf_el(right_path); if (left_path) left_el = path_leaf_el(left_path); diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index e8f795f978a..a5887df2cd8 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -1605,7 +1605,7 @@ int ocfs2_change_file_space(struct file *file, unsigned int cmd, struct ocfs2_space_resv *sr) { struct inode *inode = file->f_path.dentry->d_inode; - struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);; + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); if ((cmd == OCFS2_IOC_RESVSP || cmd == OCFS2_IOC_RESVSP64) && !ocfs2_writes_unwritten_extents(osb)) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 76f06b94ab9..c4a59824ac2 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2752,7 +2752,7 @@ int __init ip6_route_init(void) kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0, SLAB_HWCACHE_ALIGN, NULL); if (!ip6_dst_ops_template.kmem_cachep) - goto out;; + goto out; ret = register_pernet_subsys(&ip6_route_net_ops); if (ret) diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index 9048fe7e7ea..a031034720b 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -128,7 +128,7 @@ static struct ctl_table_header *ip6_header; int ipv6_sysctl_register(void) { - int err = -ENOMEM;; + int err = -ENOMEM; ip6_header = register_net_sysctl_rotable(net_ipv6_ctl_path, ipv6_table); if (ip6_header == NULL) diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index f3965df0055..33133d27b53 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -435,7 +435,7 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt) int i; q->perturb_timer.function = sfq_perturbation; - q->perturb_timer.data = (unsigned long)sch;; + q->perturb_timer.data = (unsigned long)sch; init_timer_deferrable(&q->perturb_timer); for (i = 0; i < SFQ_HASH_DIVISOR; i++) diff --git a/security/smack/smackfs.c b/security/smack/smackfs.c index bf107a389ac..71e2b914363 100644 --- a/security/smack/smackfs.c +++ b/security/smack/smackfs.c @@ -569,7 +569,7 @@ static ssize_t smk_write_cipso(struct file *file, const char __user *buf, if (skp == NULL) goto out; - rule += SMK_LABELLEN;; + rule += SMK_LABELLEN; ret = sscanf(rule, "%d", &maplevel); if (ret != 1 || maplevel > SMACK_CIPSO_MAXLEVEL) goto out; diff --git a/sound/soc/au1x/dbdma2.c b/sound/soc/au1x/dbdma2.c index 74c823d60f9..bc8d654576c 100644 --- a/sound/soc/au1x/dbdma2.c +++ b/sound/soc/au1x/dbdma2.c @@ -187,7 +187,7 @@ static int au1x_pcm_dbdma_realloc(struct au1xpsc_audio_dmadata *pcd, au1x_pcm_dmatx_cb, (void *)pcd); if (!pcd->ddma_chan) - return -ENOMEM;; + return -ENOMEM; au1xxx_dbdma_set_devwidth(pcd->ddma_chan, msbits); au1xxx_dbdma_ring_alloc(pcd->ddma_chan, 2); diff --git a/sound/soc/davinci/davinci-pcm.c b/sound/soc/davinci/davinci-pcm.c index 74abc9b4f1c..366049d8578 100644 --- a/sound/soc/davinci/davinci-pcm.c +++ b/sound/soc/davinci/davinci-pcm.c @@ -212,7 +212,7 @@ davinci_pcm_pointer(struct snd_pcm_substream *substream) if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) count = src - runtime->dma_addr; else - count = dst - runtime->dma_addr;; + count = dst - runtime->dma_addr; spin_unlock(&prtd->lock); -- cgit v1.2.3-70-g09d2 From 73ac36ea14fd18ea3dc057e41b16ff31a3c0bd5a Mon Sep 17 00:00:00 2001 From: Coly Li Date: Wed, 7 Jan 2009 18:09:16 -0800 Subject: fix similar typos to successfull When I review ocfs2 code, find there are 2 typos to "successfull". After doing grep "successfull " in kernel tree, 22 typos found totally -- great minds always think alike :) This patch fixes all the similar typos. Thanks for Randy's ack and comments. Signed-off-by: Coly Li Acked-by: Randy Dunlap Acked-by: Roland Dreier Cc: Jeremy Kerr Cc: Jeff Garzik Cc: Heiko Carstens Cc: Martin Schwidefsky Cc: Theodore Ts'o Cc: Mark Fasheh Cc: Vlad Yasevich Cc: Sridhar Samudrala Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/hwmon/abituguru-datasheet | 4 ++-- Documentation/scsi/scsi_fc_transport.txt | 4 ++-- arch/powerpc/platforms/cell/spufs/spufs.h | 2 +- drivers/infiniband/hw/nes/nes_cm.c | 2 +- drivers/isdn/hardware/eicon/debuglib.h | 2 +- drivers/isdn/hardware/eicon/os_4bri.c | 2 +- drivers/isdn/hardware/eicon/os_bri.c | 2 +- drivers/isdn/hardware/eicon/os_pri.c | 2 +- drivers/mtd/ubi/kapi.c | 2 +- drivers/net/wireless/ath5k/dma.c | 2 +- drivers/net/wireless/zd1211rw/zd_mac.c | 2 +- drivers/s390/block/dasd_3990_erp.c | 2 +- drivers/s390/block/dasd_int.h | 2 +- drivers/s390/char/tape_3590.c | 2 +- drivers/s390/cio/cio.c | 2 +- drivers/s390/cio/qdio_main.c | 2 +- fs/ext4/extents.c | 2 +- fs/ocfs2/dlmglue.c | 4 ++-- net/sctp/auth.c | 2 +- 19 files changed, 22 insertions(+), 22 deletions(-) (limited to 'fs/ocfs2') diff --git a/Documentation/hwmon/abituguru-datasheet b/Documentation/hwmon/abituguru-datasheet index 4d184f2db0e..d9251efdcec 100644 --- a/Documentation/hwmon/abituguru-datasheet +++ b/Documentation/hwmon/abituguru-datasheet @@ -121,7 +121,7 @@ Once all bytes have been read data will hold 0x09, but there is no reason to test for this. Notice that the number of bytes is bank address dependent see above and below. -After completing a successfull read it is advised to put the uGuru back in +After completing a successful read it is advised to put the uGuru back in ready mode, so that it is ready for the next read / write cycle. This way if your program / driver is unloaded and later loaded again the detection algorithm described above will still work. @@ -141,7 +141,7 @@ don't ask why this is the way it is. Once DATA holds 0x01 read CMD it should hold 0xAC now. -After completing a successfull write it is advised to put the uGuru back in +After completing a successful write it is advised to put the uGuru back in ready mode, so that it is ready for the next read / write cycle. This way if your program / driver is unloaded and later loaded again the detection algorithm described above will still work. diff --git a/Documentation/scsi/scsi_fc_transport.txt b/Documentation/scsi/scsi_fc_transport.txt index 38d324d62b2..e5b071d4661 100644 --- a/Documentation/scsi/scsi_fc_transport.txt +++ b/Documentation/scsi/scsi_fc_transport.txt @@ -191,7 +191,7 @@ Vport States: This is equivalent to a driver "attach" on an adapter, which is independent of the adapter's link state. - Instantiation of the vport on the FC link via ELS traffic, etc. - This is equivalent to a "link up" and successfull link initialization. + This is equivalent to a "link up" and successful link initialization. Further information can be found in the interfaces section below for Vport Creation. @@ -320,7 +320,7 @@ Vport Creation: This is equivalent to a driver "attach" on an adapter, which is independent of the adapter's link state. - Instantiation of the vport on the FC link via ELS traffic, etc. - This is equivalent to a "link up" and successfull link initialization. + This is equivalent to a "link up" and successful link initialization. The LLDD's vport_create() function will not synchronously wait for both parts to be fully completed before returning. It must validate that the diff --git a/arch/powerpc/platforms/cell/spufs/spufs.h b/arch/powerpc/platforms/cell/spufs/spufs.h index 15c62d3ca12..3bf908e2873 100644 --- a/arch/powerpc/platforms/cell/spufs/spufs.h +++ b/arch/powerpc/platforms/cell/spufs/spufs.h @@ -314,7 +314,7 @@ extern char *isolated_loader; * we need to call spu_release(ctx) before sleeping, and * then spu_acquire(ctx) when awoken. * - * Returns with state_mutex re-acquired when successfull or + * Returns with state_mutex re-acquired when successful or * with -ERESTARTSYS and the state_mutex dropped when interrupted. */ diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c index a812db24347..6ba57e91d7a 100644 --- a/drivers/infiniband/hw/nes/nes_cm.c +++ b/drivers/infiniband/hw/nes/nes_cm.c @@ -2705,7 +2705,7 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) sizeof(struct ietf_mpa_frame)); - /* notify OF layer that accept event was successfull */ + /* notify OF layer that accept event was successful */ cm_id->add_ref(cm_id); cm_event.event = IW_CM_EVENT_ESTABLISHED; diff --git a/drivers/isdn/hardware/eicon/debuglib.h b/drivers/isdn/hardware/eicon/debuglib.h index 016410cf227..8ea587783e1 100644 --- a/drivers/isdn/hardware/eicon/debuglib.h +++ b/drivers/isdn/hardware/eicon/debuglib.h @@ -235,7 +235,7 @@ typedef void ( * DbgOld) (unsigned short, char *, va_list) ; typedef void ( * DbgEv) (unsigned short, unsigned long, va_list) ; typedef void ( * DbgIrq) (unsigned short, int, char *, va_list) ; typedef struct _DbgHandle_ -{ char Registered ; /* driver successfull registered */ +{ char Registered ; /* driver successfully registered */ #define DBG_HANDLE_REG_NEW 0x01 /* this (new) structure */ #define DBG_HANDLE_REG_OLD 0x7f /* old structure (see below) */ char Version; /* version of this structure */ diff --git a/drivers/isdn/hardware/eicon/os_4bri.c b/drivers/isdn/hardware/eicon/os_4bri.c index 7b4ec3f60db..c964b8d91ad 100644 --- a/drivers/isdn/hardware/eicon/os_4bri.c +++ b/drivers/isdn/hardware/eicon/os_4bri.c @@ -997,7 +997,7 @@ diva_4bri_start_adapter(PISDN_ADAPTER IoAdapter, diva_xdi_display_adapter_features(IoAdapter->ANum); for (i = 0; i < IoAdapter->tasks; i++) { - DBG_LOG(("A(%d) %s adapter successfull started", + DBG_LOG(("A(%d) %s adapter successfully started", IoAdapter->QuadroList->QuadroAdapter[i]->ANum, (IoAdapter->tasks == 1) ? "BRI 2.0" : "4BRI")) diva_xdi_didd_register_adapter(IoAdapter->QuadroList->QuadroAdapter[i]->ANum); diff --git a/drivers/isdn/hardware/eicon/os_bri.c b/drivers/isdn/hardware/eicon/os_bri.c index f31bba5b16f..08f01993f46 100644 --- a/drivers/isdn/hardware/eicon/os_bri.c +++ b/drivers/isdn/hardware/eicon/os_bri.c @@ -736,7 +736,7 @@ diva_bri_start_adapter(PISDN_ADAPTER IoAdapter, IoAdapter->Properties.Features = (word) features; diva_xdi_display_adapter_features(IoAdapter->ANum); - DBG_LOG(("A(%d) BRI adapter successfull started", IoAdapter->ANum)) + DBG_LOG(("A(%d) BRI adapter successfully started", IoAdapter->ANum)) /* Register with DIDD */ diff --git a/drivers/isdn/hardware/eicon/os_pri.c b/drivers/isdn/hardware/eicon/os_pri.c index 903356547b7..5d65405c75f 100644 --- a/drivers/isdn/hardware/eicon/os_pri.c +++ b/drivers/isdn/hardware/eicon/os_pri.c @@ -513,7 +513,7 @@ diva_pri_start_adapter(PISDN_ADAPTER IoAdapter, diva_xdi_display_adapter_features(IoAdapter->ANum); - DBG_LOG(("A(%d) PRI adapter successfull started", IoAdapter->ANum)) + DBG_LOG(("A(%d) PRI adapter successfully started", IoAdapter->ANum)) /* Register with DIDD */ diff --git a/drivers/mtd/ubi/kapi.c b/drivers/mtd/ubi/kapi.c index 5d9bcf109c1..4abbe573fa4 100644 --- a/drivers/mtd/ubi/kapi.c +++ b/drivers/mtd/ubi/kapi.c @@ -564,7 +564,7 @@ EXPORT_SYMBOL_GPL(ubi_leb_unmap); * @dtype: expected data type * * This function maps an un-mapped logical eraseblock @lnum to a physical - * eraseblock. This means, that after a successfull invocation of this + * eraseblock. This means, that after a successful invocation of this * function the logical eraseblock @lnum will be empty (contain only %0xFF * bytes) and be mapped to a physical eraseblock, even if an unclean reboot * happens. diff --git a/drivers/net/wireless/ath5k/dma.c b/drivers/net/wireless/ath5k/dma.c index 7e2b1a67e5d..b65b4feb2d2 100644 --- a/drivers/net/wireless/ath5k/dma.c +++ b/drivers/net/wireless/ath5k/dma.c @@ -594,7 +594,7 @@ int ath5k_hw_get_isr(struct ath5k_hw *ah, enum ath5k_int *interrupt_mask) * XXX: BMISS interrupts may occur after association. * I found this on 5210 code but it needs testing. If this is * true we should disable them before assoc and re-enable them - * after a successfull assoc + some jiffies. + * after a successful assoc + some jiffies. interrupt_mask &= ~AR5K_INT_BMISS; */ } diff --git a/drivers/net/wireless/zd1211rw/zd_mac.c b/drivers/net/wireless/zd1211rw/zd_mac.c index 9caa96a1358..a611ad85798 100644 --- a/drivers/net/wireless/zd1211rw/zd_mac.c +++ b/drivers/net/wireless/zd1211rw/zd_mac.c @@ -287,7 +287,7 @@ static void zd_op_stop(struct ieee80211_hw *hw) * @skb - a sk-buffer * @flags: extra flags to set in the TX status info * @ackssi: ACK signal strength - * @success - True for successfull transmission of the frame + * @success - True for successful transmission of the frame * * This information calls ieee80211_tx_status_irqsafe() if required by the * control information. It copies the control information into the status diff --git a/drivers/s390/block/dasd_3990_erp.c b/drivers/s390/block/dasd_3990_erp.c index b8f9c00633f..d82aad5224f 100644 --- a/drivers/s390/block/dasd_3990_erp.c +++ b/drivers/s390/block/dasd_3990_erp.c @@ -2621,7 +2621,7 @@ dasd_3990_erp_action(struct dasd_ccw_req * cqr) } } - /* double-check if current erp/cqr was successfull */ + /* double-check if current erp/cqr was successful */ if ((cqr->irb.scsw.cmd.cstat == 0x00) && (cqr->irb.scsw.cmd.dstat == (DEV_STAT_CHN_END | DEV_STAT_DEV_END))) { diff --git a/drivers/s390/block/dasd_int.h b/drivers/s390/block/dasd_int.h index 05a14536c36..4a39084d9c9 100644 --- a/drivers/s390/block/dasd_int.h +++ b/drivers/s390/block/dasd_int.h @@ -199,7 +199,7 @@ struct dasd_ccw_req { #define DASD_CQR_ERROR 0x82 /* request is completed with error */ #define DASD_CQR_CLEAR_PENDING 0x83 /* request is clear pending */ #define DASD_CQR_CLEARED 0x84 /* request was cleared */ -#define DASD_CQR_SUCCESS 0x85 /* request was successfull */ +#define DASD_CQR_SUCCESS 0x85 /* request was successful */ /* per dasd_ccw_req flags */ diff --git a/drivers/s390/char/tape_3590.c b/drivers/s390/char/tape_3590.c index 4005c44a404..71605a179d6 100644 --- a/drivers/s390/char/tape_3590.c +++ b/drivers/s390/char/tape_3590.c @@ -801,7 +801,7 @@ tape_3590_done(struct tape_device *device, struct tape_request *request) static inline int tape_3590_erp_succeded(struct tape_device *device, struct tape_request *request) { - DBF_EVENT(3, "Error Recovery successfull for %s\n", + DBF_EVENT(3, "Error Recovery successful for %s\n", tape_op_verbose[request->op]); return tape_3590_done(device, request); } diff --git a/drivers/s390/cio/cio.c b/drivers/s390/cio/cio.c index 06b71823f39..659f8a79165 100644 --- a/drivers/s390/cio/cio.c +++ b/drivers/s390/cio/cio.c @@ -379,7 +379,7 @@ int cio_commit_config(struct subchannel *sch) if (ccode < 0) /* -EIO if msch gets a program check. */ return ccode; switch (ccode) { - case 0: /* successfull */ + case 0: /* successful */ if (stsch(sch->schid, &schib) || !css_sch_is_valid(&schib)) return -ENODEV; diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c index 744f928a59e..10cb0f8726e 100644 --- a/drivers/s390/cio/qdio_main.c +++ b/drivers/s390/cio/qdio_main.c @@ -114,7 +114,7 @@ static inline int qdio_check_ccq(struct qdio_q *q, unsigned int ccq) * @count: count of buffers to examine * @auto_ack: automatically acknowledge buffers * - * Returns the number of successfull extracted equal buffer states. + * Returns the number of successfully extracted equal buffer states. * Stops processing if a state is different from the last buffers state. */ static int qdio_do_eqbs(struct qdio_q *q, unsigned char *state, diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index ea2ce3c0ae6..3f54db31cdc 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -2536,7 +2536,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, */ newdepth = ext_depth(inode); /* - * update the extent length after successfull insert of the + * update the extent length after successful insert of the * split extent */ orig_ex.ee_len = cpu_to_le16(ee_len - diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index f731ab49179..b0c4cadd4c4 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -1324,7 +1324,7 @@ again: goto out; } - mlog(0, "lock %s, successfull return from ocfs2_dlm_lock\n", + mlog(0, "lock %s, successful return from ocfs2_dlm_lock\n", lockres->l_name); /* At this point we've gone inside the dlm and need to @@ -2951,7 +2951,7 @@ static int ocfs2_drop_lock(struct ocfs2_super *osb, ocfs2_dlm_dump_lksb(&lockres->l_lksb); BUG(); } - mlog(0, "lock %s, successfull return from ocfs2_dlm_unlock\n", + mlog(0, "lock %s, successful return from ocfs2_dlm_unlock\n", lockres->l_name); ocfs2_wait_on_busy_lock(lockres); diff --git a/net/sctp/auth.c b/net/sctp/auth.c index 20c576f530f..56935bbc149 100644 --- a/net/sctp/auth.c +++ b/net/sctp/auth.c @@ -489,7 +489,7 @@ int sctp_auth_init_hmacs(struct sctp_endpoint *ep, gfp_t gfp) return 0; out_err: - /* Clean up any successfull allocations */ + /* Clean up any successful allocations */ sctp_auth_destroy_hmacs(ep->auth_hmacs); return -ENOMEM; } -- cgit v1.2.3-70-g09d2