From 03c1c29053f678234dbd51bf3d65f3b7529021de Mon Sep 17 00:00:00 2001 From: Yongqiang Yang Date: Wed, 5 Sep 2012 01:21:50 -0400 Subject: ext4: ignore last group w/o enough space when resizing instead of BUG'ing If the last group does not have enough space for group tables, ignore it instead of calling BUG_ON(). Reported-by: Daniel Drake Signed-off-by: Yongqiang Yang Signed-off-by: "Theodore Ts'o" Cc: stable@vger.kernel.org --- fs/ext4/resize.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'fs/ext4/resize.c') diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 41f6ef68e2e..28031c4e15e 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -200,8 +200,11 @@ static void free_flex_gd(struct ext4_new_flex_group_data *flex_gd) * be a partial of a flex group. * * @sb: super block of fs to which the groups belongs + * + * Returns 0 on a successful allocation of the metadata blocks in the + * block group. */ -static void ext4_alloc_group_tables(struct super_block *sb, +static int ext4_alloc_group_tables(struct super_block *sb, struct ext4_new_flex_group_data *flex_gd, int flexbg_size) { @@ -226,6 +229,8 @@ static void ext4_alloc_group_tables(struct super_block *sb, (last_group & ~(flexbg_size - 1)))); next_group: group = group_data[0].group; + if (src_group >= group_data[0].group + flex_gd->count) + return -ENOSPC; start_blk = ext4_group_first_block_no(sb, src_group); last_blk = start_blk + group_data[src_group - group].blocks_count; @@ -235,7 +240,6 @@ next_group: start_blk += overhead; - BUG_ON(src_group >= group_data[0].group + flex_gd->count); /* We collect contiguous blocks as much as possible. */ src_group++; for (; src_group <= last_group; src_group++) @@ -300,6 +304,7 @@ next_group: group_data[i].free_blocks_count); } } + return 0; } static struct buffer_head *bclean(handle_t *handle, struct super_block *sb, @@ -1729,7 +1734,8 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count) */ while (ext4_setup_next_flex_gd(sb, flex_gd, n_blocks_count, flexbg_size)) { - ext4_alloc_group_tables(sb, flex_gd, flexbg_size); + if (ext4_alloc_group_tables(sb, flex_gd, flexbg_size) != 0) + break; err = ext4_flex_group_add(sb, resize_inode, flex_gd); if (unlikely(err)) break; -- cgit v1.2.3-70-g09d2 From d7574ad08bac1ef89cb679d2c76c91ff9281c2e2 Mon Sep 17 00:00:00 2001 From: Yongqiang Yang Date: Wed, 5 Sep 2012 01:23:50 -0400 Subject: ext4: report the original old blocks count in a debug message when resizing Avoid changing o_blocks_count, since it is used later when reporting old blocks count in debug mode. Signed-off-by: Yongqiang Yang Signed-off-by: "Theodore Ts'o" --- fs/ext4/resize.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'fs/ext4/resize.c') diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 28031c4e15e..591f4bda114 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -1719,8 +1719,7 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count) es->s_log_groups_per_flex) flexbg_size = 1 << es->s_log_groups_per_flex; - o_blocks_count = ext4_blocks_count(es); - if (o_blocks_count == n_blocks_count) + if (ext4_blocks_count(es) == n_blocks_count) goto out; flex_gd = alloc_flex_gd(flexbg_size); -- cgit v1.2.3-70-g09d2 From 6df935ad2fced9033ab52078825fcaf6365f34b7 Mon Sep 17 00:00:00 2001 From: Yongqiang Yang Date: Wed, 5 Sep 2012 01:25:50 -0400 Subject: ext4: don't copy non-existent gdt blocks when resizing The resize code was copying blocks at the beginning of each block group in order to copy the superblock and block group descriptor table (gdt) blocks. This was, unfortunately, being done even for block groups that did not have super blocks or gdt blocks. This is a complete waste of perfectly good I/O bandwidth, to skip writing those blocks for sparse bg's. Signed-off-by: Yongqiang Yang Signed-off-by: "Theodore Ts'o" Cc: stable@vger.kernel.org --- fs/ext4/resize.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs/ext4/resize.c') diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 591f4bda114..a0ee26c23dd 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -456,6 +456,9 @@ static int setup_new_flex_group_blocks(struct super_block *sb, gdblocks = ext4_bg_num_gdb(sb, group); start = ext4_group_first_block_no(sb, group); + if (!ext4_bg_has_super(sb, group)) + goto handle_itb; + /* Copy all of the GDT blocks into the backup in this group */ for (j = 0, block = start + 1; j < gdblocks; j++, block++) { struct buffer_head *gdb; @@ -498,6 +501,7 @@ static int setup_new_flex_group_blocks(struct super_block *sb, goto out; } +handle_itb: /* Initialize group tables of the grop @group */ if (!(bg_flags[i] & EXT4_BG_INODE_ZEROED)) goto handle_bb; -- cgit v1.2.3-70-g09d2 From 2ebd1704ded88a8ae29b5f3998b13959c715c4be Mon Sep 17 00:00:00 2001 From: Yongqiang Yang Date: Wed, 5 Sep 2012 01:27:50 -0400 Subject: ext4: avoid duplicate writes of the backup bg descriptor blocks The resize code was needlessly writing the backup block group descriptor blocks multiple times (once per block group) during an online resize. Signed-off-by: Yongqiang Yang Signed-off-by: "Theodore Ts'o" Cc: stable@vger.kernel.org --- fs/ext4/resize.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'fs/ext4/resize.c') diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index a0ee26c23dd..365d800ff8c 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -1358,13 +1358,15 @@ exit_journal: err = err2; if (!err) { - int i; + int gdb_num = group / EXT4_DESC_PER_BLOCK(sb); + int gdb_num_end = ((group + flex_gd->count - 1) / + EXT4_DESC_PER_BLOCK(sb)); + update_backups(sb, sbi->s_sbh->b_blocknr, (char *)es, sizeof(struct ext4_super_block)); - for (i = 0; i < flex_gd->count; i++, group++) { + for (; gdb_num <= gdb_num_end; gdb_num++) { struct buffer_head *gdb_bh; - int gdb_num; - gdb_num = group / EXT4_BLOCKS_PER_GROUP(sb); + gdb_bh = sbi->s_group_desc[gdb_num]; update_backups(sb, gdb_bh->b_blocknr, gdb_bh->b_data, gdb_bh->b_size); -- cgit v1.2.3-70-g09d2 From 117fff10d7f140e12dd43df20d3f9fda80577460 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 5 Sep 2012 01:29:50 -0400 Subject: ext4: grow the s_flex_groups array as needed when resizing Previously, we allocated the s_flex_groups array to the maximum size that the file system could be resized. There was two problems with this approach. First, it wasted memory in the common case where the file system was not resized. Secondly, once we start allowing online resizing using the meta_bg scheme, there is no maximum size that the file system can be resized. So instead, we need to grow the s_flex_groups at inline resize time. Signed-off-by: "Theodore Ts'o" --- fs/ext4/ext4.h | 3 +++ fs/ext4/resize.c | 14 +++++++++----- fs/ext4/super.c | 48 +++++++++++++++++++++++++++++++++++------------- 3 files changed, 47 insertions(+), 18 deletions(-) (limited to 'fs/ext4/resize.c') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 0df5ee102b6..464cff711ed 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1276,6 +1276,7 @@ struct ext4_sb_info { unsigned int s_log_groups_per_flex; struct flex_groups *s_flex_groups; + ext4_group_t s_flex_groups_allocated; /* workqueue for dio unwritten */ struct workqueue_struct *dio_unwritten_wq; @@ -2055,6 +2056,8 @@ extern void ext4_superblock_csum_set(struct super_block *sb, extern void *ext4_kvmalloc(size_t size, gfp_t flags); extern void *ext4_kvzalloc(size_t size, gfp_t flags); extern void ext4_kvfree(void *ptr); +extern int ext4_alloc_flex_bg_array(struct super_block *sb, + ext4_group_t ngroup); extern __printf(4, 5) void __ext4_error(struct super_block *, const char *, unsigned int, const char *, ...); diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 365d800ff8c..3f5c67bf13a 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -1503,6 +1503,10 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) if (err) goto out; + err = ext4_alloc_flex_bg_array(sb, input->group + 1); + if (err) + return err; + flex_gd.count = 1; flex_gd.groups = input; flex_gd.bg_flags = &bg_flags; @@ -1662,7 +1666,7 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count) unsigned long n_desc_blocks; unsigned long o_desc_blocks; unsigned long desc_blocks; - int err = 0, flexbg_size = 1; + int err = 0, flexbg_size = 1 << sbi->s_log_groups_per_flex; o_blocks_count = ext4_blocks_count(es); @@ -1721,13 +1725,13 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count) goto out; } - if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG) && - es->s_log_groups_per_flex) - flexbg_size = 1 << es->s_log_groups_per_flex; - if (ext4_blocks_count(es) == n_blocks_count) goto out; + err = ext4_alloc_flex_bg_array(sb, n_group + 1); + if (err) + return err; + flex_gd = alloc_flex_gd(flexbg_size); if (flex_gd == NULL) { err = -ENOMEM; diff --git a/fs/ext4/super.c b/fs/ext4/super.c index b875ff55586..b8de488889d 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1925,15 +1925,45 @@ done: return res; } +int ext4_alloc_flex_bg_array(struct super_block *sb, ext4_group_t ngroup) +{ + struct ext4_sb_info *sbi = EXT4_SB(sb); + struct flex_groups *new_groups; + int size; + + if (!sbi->s_log_groups_per_flex) + return 0; + + size = ext4_flex_group(sbi, ngroup - 1) + 1; + if (size <= sbi->s_flex_groups_allocated) + return 0; + + size = roundup_pow_of_two(size * sizeof(struct flex_groups)); + new_groups = ext4_kvzalloc(size, GFP_KERNEL); + if (!new_groups) { + ext4_msg(sb, KERN_ERR, "not enough memory for %d flex groups", + size / (int) sizeof(struct flex_groups)); + return -ENOMEM; + } + + if (sbi->s_flex_groups) { + memcpy(new_groups, sbi->s_flex_groups, + (sbi->s_flex_groups_allocated * + sizeof(struct flex_groups))); + ext4_kvfree(sbi->s_flex_groups); + } + sbi->s_flex_groups = new_groups; + sbi->s_flex_groups_allocated = size / sizeof(struct flex_groups); + return 0; +} + static int ext4_fill_flex_info(struct super_block *sb) { struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_group_desc *gdp = NULL; - ext4_group_t flex_group_count; ext4_group_t flex_group; unsigned int groups_per_flex = 0; - size_t size; - int i; + int i, err; sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex; if (sbi->s_log_groups_per_flex < 1 || sbi->s_log_groups_per_flex > 31) { @@ -1942,17 +1972,9 @@ static int ext4_fill_flex_info(struct super_block *sb) } groups_per_flex = 1 << sbi->s_log_groups_per_flex; - /* We allocate both existing and potentially added groups */ - flex_group_count = ((sbi->s_groups_count + groups_per_flex - 1) + - ((le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) + 1) << - EXT4_DESC_PER_BLOCK_BITS(sb))) / groups_per_flex; - size = flex_group_count * sizeof(struct flex_groups); - sbi->s_flex_groups = ext4_kvzalloc(size, GFP_KERNEL); - if (sbi->s_flex_groups == NULL) { - ext4_msg(sb, KERN_ERR, "not enough memory for %u flex groups", - flex_group_count); + err = ext4_alloc_flex_bg_array(sb, sbi->s_groups_count); + if (err) goto failed; - } for (i = 0; i < sbi->s_groups_count; i++) { gdp = ext4_get_group_desc(sb, i, NULL); -- cgit v1.2.3-70-g09d2 From 28623c2f5b0dca3c3ea34fd6108940661352e276 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 5 Sep 2012 01:31:50 -0400 Subject: ext4: grow the s_group_info array as needed Previously we allocated the s_group_info array with enough space for any future possible growth of the file system via online resize. This is unfortunate because it wastes memory, and it doesn't work for the meta_bg scheme, since there is no limit based on the number of reserved gdt blocks. So add the code to grow the s_group_info array as needed. Signed-off-by: "Theodore Ts'o" --- fs/ext4/ext4.h | 3 +++ fs/ext4/mballoc.c | 79 +++++++++++++++++++++++++++---------------------------- fs/ext4/resize.c | 8 ++++++ 3 files changed, 50 insertions(+), 40 deletions(-) (limited to 'fs/ext4/resize.c') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 464cff711ed..8b6902c4d7b 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1233,6 +1233,7 @@ struct ext4_sb_info { spinlock_t s_md_lock; unsigned short *s_mb_offsets; unsigned int *s_mb_maxs; + unsigned int s_group_info_size; /* tunables */ unsigned long s_stripe; @@ -1971,6 +1972,8 @@ extern void ext4_exit_mballoc(void); extern void ext4_free_blocks(handle_t *handle, struct inode *inode, struct buffer_head *bh, ext4_fsblk_t block, unsigned long count, int flags); +extern int ext4_mb_alloc_groupinfo(struct super_block *sb, + ext4_group_t ngroups); extern int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t i, struct ext4_group_desc *desc); extern int ext4_group_add_blocks(handle_t *handle, struct super_block *sb, diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 6873571c9f4..2102c20f7e9 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -24,6 +24,7 @@ #include "ext4_jbd2.h" #include "mballoc.h" #include +#include #include #include @@ -2163,6 +2164,39 @@ static struct kmem_cache *get_groupinfo_cache(int blocksize_bits) return cachep; } +/* + * Allocate the top-level s_group_info array for the specified number + * of groups + */ +int ext4_mb_alloc_groupinfo(struct super_block *sb, ext4_group_t ngroups) +{ + struct ext4_sb_info *sbi = EXT4_SB(sb); + unsigned size; + struct ext4_group_info ***new_groupinfo; + + size = (ngroups + EXT4_DESC_PER_BLOCK(sb) - 1) >> + EXT4_DESC_PER_BLOCK_BITS(sb); + if (size <= sbi->s_group_info_size) + return 0; + + size = roundup_pow_of_two(sizeof(*sbi->s_group_info) * size); + new_groupinfo = ext4_kvzalloc(size, GFP_KERNEL); + if (!new_groupinfo) { + ext4_msg(sb, KERN_ERR, "can't allocate buddy meta group"); + return -ENOMEM; + } + if (sbi->s_group_info) { + memcpy(new_groupinfo, sbi->s_group_info, + sbi->s_group_info_size * sizeof(*sbi->s_group_info)); + ext4_kvfree(sbi->s_group_info); + } + sbi->s_group_info = new_groupinfo; + sbi->s_group_info_size = size / sizeof(*sbi->s_group_info); + ext4_debug("allocated s_groupinfo array for %d meta_bg's\n", + sbi->s_group_info_size); + return 0; +} + /* Create and initialize ext4_group_info data for the given group. */ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, struct ext4_group_desc *desc) @@ -2252,49 +2286,14 @@ static int ext4_mb_init_backend(struct super_block *sb) ext4_group_t ngroups = ext4_get_groups_count(sb); ext4_group_t i; struct ext4_sb_info *sbi = EXT4_SB(sb); - struct ext4_super_block *es = sbi->s_es; - int num_meta_group_infos; - int num_meta_group_infos_max; - int array_size; + int err; struct ext4_group_desc *desc; struct kmem_cache *cachep; - /* This is the number of blocks used by GDT */ - num_meta_group_infos = (ngroups + EXT4_DESC_PER_BLOCK(sb) - - 1) >> EXT4_DESC_PER_BLOCK_BITS(sb); - - /* - * This is the total number of blocks used by GDT including - * the number of reserved blocks for GDT. - * The s_group_info array is allocated with this value - * to allow a clean online resize without a complex - * manipulation of pointer. - * The drawback is the unused memory when no resize - * occurs but it's very low in terms of pages - * (see comments below) - * Need to handle this properly when META_BG resizing is allowed - */ - num_meta_group_infos_max = num_meta_group_infos + - le16_to_cpu(es->s_reserved_gdt_blocks); + err = ext4_mb_alloc_groupinfo(sb, ngroups); + if (err) + return err; - /* - * array_size is the size of s_group_info array. We round it - * to the next power of two because this approximation is done - * internally by kmalloc so we can have some more memory - * for free here (e.g. may be used for META_BG resize). - */ - array_size = 1; - while (array_size < sizeof(*sbi->s_group_info) * - num_meta_group_infos_max) - array_size = array_size << 1; - /* An 8TB filesystem with 64-bit pointers requires a 4096 byte - * kmalloc. A 128kb malloc should suffice for a 256TB filesystem. - * So a two level scheme suffices for now. */ - sbi->s_group_info = ext4_kvzalloc(array_size, GFP_KERNEL); - if (sbi->s_group_info == NULL) { - ext4_msg(sb, KERN_ERR, "can't allocate buddy meta group"); - return -ENOMEM; - } sbi->s_buddy_cache = new_inode(sb); if (sbi->s_buddy_cache == NULL) { ext4_msg(sb, KERN_ERR, "can't get new inode"); @@ -2322,7 +2321,7 @@ err_freebuddy: cachep = get_groupinfo_cache(sb->s_blocksize_bits); while (i-- > 0) kmem_cache_free(cachep, ext4_get_group_info(sb, i)); - i = num_meta_group_infos; + i = sbi->s_group_info_size; while (i-- > 0) kfree(sbi->s_group_info[i]); iput(sbi->s_buddy_cache); diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 3f5c67bf13a..f288933bf4c 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -1507,6 +1507,10 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) if (err) return err; + err = ext4_mb_alloc_groupinfo(sb, input->group + 1); + if (err) + goto out; + flex_gd.count = 1; flex_gd.groups = input; flex_gd.bg_flags = &bg_flags; @@ -1732,6 +1736,10 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count) if (err) return err; + err = ext4_mb_alloc_groupinfo(sb, n_group + 1); + if (err) + goto out; + flex_gd = alloc_flex_gd(flexbg_size); if (flex_gd == NULL) { err = -ENOMEM; -- cgit v1.2.3-70-g09d2 From 01f795f9e0d67adeccc61a8b20c28acb45fa5fd8 Mon Sep 17 00:00:00 2001 From: Yongqiang Yang Date: Wed, 5 Sep 2012 01:33:50 -0400 Subject: ext4: add online resizing support for meta_bg and 64-bit file systems This patch adds support for resizing file systems with the meta_bg and 64bit features. [ Added a fix by tytso to fix a divide by zero when resizing a filesystem from 14 TB to 18TB. Also fixed overhead accounting for meta_bg file systems.] Signed-off-by: Yongqiang Yang Signed-off-by: "Theodore Ts'o" --- fs/ext4/ioctl.c | 15 ---- fs/ext4/resize.c | 215 ++++++++++++++++++++++++++++++++++++++++++------------- 2 files changed, 165 insertions(+), 65 deletions(-) (limited to 'fs/ext4/resize.c') diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 7f7dad78760..8b84fe28cca 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -365,26 +365,11 @@ group_add_out: return -EOPNOTSUPP; } - if (EXT4_HAS_INCOMPAT_FEATURE(sb, - EXT4_FEATURE_INCOMPAT_META_BG)) { - ext4_msg(sb, KERN_ERR, - "Online resizing not (yet) supported with meta_bg"); - return -EOPNOTSUPP; - } - if (copy_from_user(&n_blocks_count, (__u64 __user *)arg, sizeof(__u64))) { return -EFAULT; } - if (n_blocks_count > MAX_32_NUM && - !EXT4_HAS_INCOMPAT_FEATURE(sb, - EXT4_FEATURE_INCOMPAT_64BIT)) { - ext4_msg(sb, KERN_ERR, - "File system only supports 32-bit block numbers"); - return -EOPNOTSUPP; - } - err = ext4_resize_begin(sb); if (err) return err; diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index f288933bf4c..7adc0885458 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -45,6 +45,28 @@ void ext4_resize_end(struct super_block *sb) smp_mb__after_clear_bit(); } +static ext4_group_t ext4_meta_bg_first_group(struct super_block *sb, + ext4_group_t group) { + return (group >> EXT4_DESC_PER_BLOCK_BITS(sb)) << + EXT4_DESC_PER_BLOCK_BITS(sb); +} + +static ext4_fsblk_t ext4_meta_bg_first_block_no(struct super_block *sb, + ext4_group_t group) { + group = ext4_meta_bg_first_group(sb, group); + return ext4_group_first_block_no(sb, group); +} + +static ext4_grpblk_t ext4_group_overhead_blocks(struct super_block *sb, + ext4_group_t group) { + ext4_grpblk_t overhead; + overhead = ext4_bg_num_gdb(sb, group); + if (ext4_bg_has_super(sb, group)) + overhead += 1 + + le16_to_cpu(EXT4_SB(sb)->s_es->s_reserved_gdt_blocks); + return overhead; +} + #define outside(b, first, last) ((b) < (first) || (b) >= (last)) #define inside(b, first, last) ((b) >= (first) && (b) < (last)) @@ -57,9 +79,7 @@ static int verify_group_input(struct super_block *sb, ext4_fsblk_t end = start + input->blocks_count; ext4_group_t group = input->group; ext4_fsblk_t itend = input->inode_table + sbi->s_itb_per_group; - unsigned overhead = ext4_bg_has_super(sb, group) ? - (1 + ext4_bg_num_gdb(sb, group) + - le16_to_cpu(es->s_reserved_gdt_blocks)) : 0; + unsigned overhead = ext4_group_overhead_blocks(sb, group); ext4_fsblk_t metaend = start + overhead; struct buffer_head *bh = NULL; ext4_grpblk_t free_blocks_count, offset; @@ -209,7 +229,6 @@ static int ext4_alloc_group_tables(struct super_block *sb, int flexbg_size) { struct ext4_new_group_data *group_data = flex_gd->groups; - struct ext4_super_block *es = EXT4_SB(sb)->s_es; ext4_fsblk_t start_blk; ext4_fsblk_t last_blk; ext4_group_t src_group; @@ -234,19 +253,19 @@ next_group: start_blk = ext4_group_first_block_no(sb, src_group); last_blk = start_blk + group_data[src_group - group].blocks_count; - overhead = ext4_bg_has_super(sb, src_group) ? - (1 + ext4_bg_num_gdb(sb, src_group) + - le16_to_cpu(es->s_reserved_gdt_blocks)) : 0; + overhead = ext4_group_overhead_blocks(sb, src_group); start_blk += overhead; /* We collect contiguous blocks as much as possible. */ src_group++; - for (; src_group <= last_group; src_group++) - if (!ext4_bg_has_super(sb, src_group)) + for (; src_group <= last_group; src_group++) { + overhead = ext4_group_overhead_blocks(sb, src_group); + if (overhead != 0) last_blk += group_data[src_group - group].blocks_count; else break; + } /* Allocate block bitmaps */ for (; bb_index < flex_gd->count; bb_index++) { @@ -438,11 +457,13 @@ static int setup_new_flex_group_blocks(struct super_block *sb, ext4_group_t group, count; struct buffer_head *bh = NULL; int reserved_gdb, i, j, err = 0, err2; + int meta_bg; BUG_ON(!flex_gd->count || !group_data || group_data[0].group != sbi->s_groups_count); reserved_gdb = le16_to_cpu(es->s_reserved_gdt_blocks); + meta_bg = EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG); /* This transaction may be extended/restarted along the way */ handle = ext4_journal_start_sb(sb, EXT4_MAX_TRANS_DATA); @@ -452,15 +473,25 @@ static int setup_new_flex_group_blocks(struct super_block *sb, group = group_data[0].group; for (i = 0; i < flex_gd->count; i++, group++) { unsigned long gdblocks; + ext4_grpblk_t overhead; gdblocks = ext4_bg_num_gdb(sb, group); start = ext4_group_first_block_no(sb, group); - if (!ext4_bg_has_super(sb, group)) + if (meta_bg == 0 && !ext4_bg_has_super(sb, group)) goto handle_itb; + if (meta_bg == 1) { + ext4_group_t first_group; + first_group = ext4_meta_bg_first_group(sb, group); + if (first_group != group + 1 && + first_group != group + EXT4_DESC_PER_BLOCK(sb) - 1) + goto handle_itb; + } + + block = start + ext4_bg_has_super(sb, group); /* Copy all of the GDT blocks into the backup in this group */ - for (j = 0, block = start + 1; j < gdblocks; j++, block++) { + for (j = 0; j < gdblocks; j++, block++) { struct buffer_head *gdb; ext4_debug("update backup group %#04llx\n", block); @@ -530,11 +561,11 @@ handle_bb: err = PTR_ERR(bh); goto out; } - if (ext4_bg_has_super(sb, group)) { + overhead = ext4_group_overhead_blocks(sb, group); + if (overhead != 0) { ext4_debug("mark backup superblock %#04llx (+0)\n", start); - ext4_set_bits(bh->b_data, 0, gdblocks + reserved_gdb + - 1); + ext4_set_bits(bh->b_data, 0, overhead); } ext4_mark_bitmap_end(group_data[i].blocks_count, sb->s_blocksize * 8, bh->b_data); @@ -830,6 +861,45 @@ exit_bh: return err; } +/* + * add_new_gdb_meta_bg is the sister of add_new_gdb. + */ +static int add_new_gdb_meta_bg(struct super_block *sb, + handle_t *handle, ext4_group_t group) { + ext4_fsblk_t gdblock; + struct buffer_head *gdb_bh; + struct buffer_head **o_group_desc, **n_group_desc; + unsigned long gdb_num = group / EXT4_DESC_PER_BLOCK(sb); + int err; + + gdblock = ext4_meta_bg_first_block_no(sb, group) + + ext4_bg_has_super(sb, group); + gdb_bh = sb_bread(sb, gdblock); + if (!gdb_bh) + return -EIO; + n_group_desc = ext4_kvmalloc((gdb_num + 1) * + sizeof(struct buffer_head *), + GFP_NOFS); + if (!n_group_desc) { + err = -ENOMEM; + ext4_warning(sb, "not enough memory for %lu groups", + gdb_num + 1); + return err; + } + + o_group_desc = EXT4_SB(sb)->s_group_desc; + memcpy(n_group_desc, o_group_desc, + EXT4_SB(sb)->s_gdb_count * sizeof(struct buffer_head *)); + n_group_desc[gdb_num] = gdb_bh; + EXT4_SB(sb)->s_group_desc = n_group_desc; + EXT4_SB(sb)->s_gdb_count++; + ext4_kvfree(o_group_desc); + err = ext4_journal_get_write_access(handle, gdb_bh); + if (unlikely(err)) + brelse(gdb_bh); + return err; +} + /* * Called when we are adding a new group which has a backup copy of each of * the GDT blocks (i.e. sparse group) and there are reserved GDT blocks. @@ -958,16 +1028,16 @@ exit_free: * do not copy the full number of backups at this time. The resize * which changed s_groups_count will backup again. */ -static void update_backups(struct super_block *sb, - int blk_off, char *data, int size) +static void update_backups(struct super_block *sb, int blk_off, char *data, + int size, int meta_bg) { struct ext4_sb_info *sbi = EXT4_SB(sb); - const ext4_group_t last = sbi->s_groups_count; + ext4_group_t last; const int bpg = EXT4_BLOCKS_PER_GROUP(sb); unsigned three = 1; unsigned five = 5; unsigned seven = 7; - ext4_group_t group; + ext4_group_t group = 0; int rest = sb->s_blocksize - size; handle_t *handle; int err = 0, err2; @@ -981,8 +1051,17 @@ static void update_backups(struct super_block *sb, ext4_superblock_csum_set(sb, (struct ext4_super_block *)data); - while ((group = ext4_list_backups(sb, &three, &five, &seven)) < last) { + if (meta_bg == 0) { + group = ext4_list_backups(sb, &three, &five, &seven); + last = sbi->s_groups_count; + } else { + group = ext4_meta_bg_first_group(sb, group) + 1; + last = (ext4_group_t)(group + EXT4_DESC_PER_BLOCK(sb) - 2); + } + + while (group < sbi->s_groups_count) { struct buffer_head *bh; + ext4_fsblk_t backup_block; /* Out of journal space, and can't get more - abort - so sad */ if (ext4_handle_valid(handle) && @@ -991,13 +1070,20 @@ static void update_backups(struct super_block *sb, (err = ext4_journal_restart(handle, EXT4_MAX_TRANS_DATA))) break; - bh = sb_getblk(sb, group * bpg + blk_off); + if (meta_bg == 0) + backup_block = group * bpg + blk_off; + else + backup_block = (ext4_group_first_block_no(sb, group) + + ext4_bg_has_super(sb, group)); + + bh = sb_getblk(sb, backup_block); if (!bh) { err = -EIO; break; } - ext4_debug("update metadata backup %#04lx\n", - (unsigned long)bh->b_blocknr); + ext4_debug("update metadata backup %llu(+%llu)\n", + backup_block, backup_block - + ext4_group_first_block_no(sb, group)); if ((err = ext4_journal_get_write_access(handle, bh))) break; lock_buffer(bh); @@ -1010,6 +1096,13 @@ static void update_backups(struct super_block *sb, if (unlikely(err)) ext4_std_error(sb, err); brelse(bh); + + if (meta_bg == 0) + group = ext4_list_backups(sb, &three, &five, &seven); + else if (group == last) + break; + else + group = last; } if ((err2 = ext4_journal_stop(handle)) && !err) err = err2; @@ -1052,7 +1145,9 @@ static int ext4_add_new_descs(handle_t *handle, struct super_block *sb, struct ext4_super_block *es = sbi->s_es; struct buffer_head *gdb_bh; int i, gdb_off, gdb_num, err = 0; + int meta_bg; + meta_bg = EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG); for (i = 0; i < count; i++, group++) { int reserved_gdb = ext4_bg_has_super(sb, group) ? le16_to_cpu(es->s_reserved_gdt_blocks) : 0; @@ -1072,8 +1167,11 @@ static int ext4_add_new_descs(handle_t *handle, struct super_block *sb, if (!err && reserved_gdb && ext4_bg_num_gdb(sb, group)) err = reserve_backup_gdb(handle, resize_inode, group); - } else + } else if (meta_bg != 0) { + err = add_new_gdb_meta_bg(sb, handle, group); + } else { err = add_new_gdb(handle, resize_inode, group); + } if (err) break; } @@ -1225,7 +1323,7 @@ static void ext4_update_super(struct super_block *sb, } reserved_blocks = ext4_r_blocks_count(es) * 100; - do_div(reserved_blocks, ext4_blocks_count(es)); + reserved_blocks = div64_u64(reserved_blocks, ext4_blocks_count(es)); reserved_blocks *= blocks_count; do_div(reserved_blocks, 100); @@ -1236,6 +1334,7 @@ static void ext4_update_super(struct super_block *sb, le32_add_cpu(&es->s_free_inodes_count, EXT4_INODES_PER_GROUP(sb) * flex_gd->count); + ext4_debug("free blocks count %llu", ext4_free_blocks_count(es)); /* * We need to protect s_groups_count against other CPUs seeing * inconsistent state in the superblock. @@ -1270,6 +1369,8 @@ static void ext4_update_super(struct super_block *sb, percpu_counter_add(&sbi->s_freeinodes_counter, EXT4_INODES_PER_GROUP(sb) * flex_gd->count); + ext4_debug("free blocks count %llu", + percpu_counter_read(&sbi->s_freeclusters_counter)); if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG) && sbi->s_log_groups_per_flex) { @@ -1361,15 +1462,17 @@ exit_journal: int gdb_num = group / EXT4_DESC_PER_BLOCK(sb); int gdb_num_end = ((group + flex_gd->count - 1) / EXT4_DESC_PER_BLOCK(sb)); + int meta_bg = EXT4_HAS_INCOMPAT_FEATURE(sb, + EXT4_FEATURE_INCOMPAT_META_BG); update_backups(sb, sbi->s_sbh->b_blocknr, (char *)es, - sizeof(struct ext4_super_block)); + sizeof(struct ext4_super_block), 0); for (; gdb_num <= gdb_num_end; gdb_num++) { struct buffer_head *gdb_bh; gdb_bh = sbi->s_group_desc[gdb_num]; update_backups(sb, gdb_bh->b_blocknr, gdb_bh->b_data, - gdb_bh->b_size); + gdb_bh->b_size, meta_bg); } } exit: @@ -1413,9 +1516,7 @@ static int ext4_setup_next_flex_gd(struct super_block *sb, group_data[i].group = group + i; group_data[i].blocks_count = blocks_per_group; - overhead = ext4_bg_has_super(sb, group + i) ? - (1 + ext4_bg_num_gdb(sb, group + i) + - le16_to_cpu(es->s_reserved_gdt_blocks)) : 0; + overhead = ext4_group_overhead_blocks(sb, group + i); group_data[i].free_blocks_count = blocks_per_group - overhead; if (ext4_has_group_desc_csum(sb)) flex_gd->bg_flags[i] = EXT4_BG_BLOCK_UNINIT | @@ -1563,11 +1664,13 @@ errout: err = err2; if (!err) { + ext4_fsblk_t first_block; + first_block = ext4_group_first_block_no(sb, 0); if (test_opt(sb, DEBUG)) printk(KERN_DEBUG "EXT4-fs: extended group to %llu " "blocks\n", ext4_blocks_count(es)); - update_backups(sb, EXT4_SB(sb)->s_sbh->b_blocknr, (char *)es, - sizeof(struct ext4_super_block)); + update_backups(sb, EXT4_SB(sb)->s_sbh->b_blocknr - first_block, + (char *)es, sizeof(struct ext4_super_block), 0); } return err; } @@ -1662,15 +1765,16 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count) struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_super_block *es = sbi->s_es; struct buffer_head *bh; - struct inode *resize_inode; - ext4_fsblk_t o_blocks_count; - ext4_group_t o_group; - ext4_group_t n_group; - ext4_grpblk_t offset, add; + struct inode *resize_inode = NULL; + ext4_grpblk_t add, offset; unsigned long n_desc_blocks; unsigned long o_desc_blocks; unsigned long desc_blocks; + ext4_group_t o_group; + ext4_group_t n_group; + ext4_fsblk_t o_blocks_count; int err = 0, flexbg_size = 1 << sbi->s_log_groups_per_flex; + int meta_bg; o_blocks_count = ext4_blocks_count(es); @@ -1692,22 +1796,33 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count) ext4_get_group_no_and_offset(sb, o_blocks_count - 1, &o_group, &offset); n_desc_blocks = (n_group + EXT4_DESC_PER_BLOCK(sb)) / - EXT4_DESC_PER_BLOCK(sb); + EXT4_DESC_PER_BLOCK(sb); o_desc_blocks = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) / - EXT4_DESC_PER_BLOCK(sb); + EXT4_DESC_PER_BLOCK(sb); desc_blocks = n_desc_blocks - o_desc_blocks; - if (desc_blocks && - (!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_RESIZE_INODE) || - le16_to_cpu(es->s_reserved_gdt_blocks) < desc_blocks)) { - ext4_warning(sb, "No reserved GDT blocks, can't resize"); - return -EPERM; - } + meta_bg = EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG); - resize_inode = ext4_iget(sb, EXT4_RESIZE_INO); - if (IS_ERR(resize_inode)) { - ext4_warning(sb, "Error opening resize inode"); - return PTR_ERR(resize_inode); + if (EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_RESIZE_INODE)) { + if (meta_bg) { + ext4_error(sb, "resize_inode and meta_bg enabled " + "simultaneously"); + return -EINVAL; + } + if (le16_to_cpu(es->s_reserved_gdt_blocks) < desc_blocks) { + ext4_warning(sb, + "No reserved GDT blocks, can't resize"); + return -EPERM; + } + resize_inode = ext4_iget(sb, EXT4_RESIZE_INO); + if (IS_ERR(resize_inode)) { + ext4_warning(sb, "Error opening resize inode"); + return PTR_ERR(resize_inode); + } + } else if (!meta_bg) { + ext4_warning(sb, "File system features do not permit " + "online resize"); + return -EPERM; } /* See if the device is actually as big as what was requested */ @@ -1761,8 +1876,8 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count) out: if (flex_gd) free_flex_gd(flex_gd); - - iput(resize_inode); + if (resize_inode != NULL) + iput(resize_inode); if (test_opt(sb, DEBUG)) ext4_msg(sb, KERN_DEBUG, "resized filesystem from %llu " "upto %llu blocks", o_blocks_count, n_blocks_count); -- cgit v1.2.3-70-g09d2 From 93f9052643409c13b3b5f76833865087351f55b8 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 12 Sep 2012 14:32:42 -0400 Subject: ext4: set bg_itable_unused when resizing Set bg_itable_unused for file systems that have uninit_bg enabled. This will speed up the first e2fsck run after the file system is resized. Signed-off-by: "Theodore Ts'o" --- fs/ext4/resize.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs/ext4/resize.c') diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 7adc0885458..a5be589c85b 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -1268,6 +1268,9 @@ static int ext4_setup_new_descs(handle_t *handle, struct super_block *sb, ext4_free_group_clusters_set(sb, gdp, EXT4_B2C(sbi, group_data->free_blocks_count)); ext4_free_inodes_set(sb, gdp, EXT4_INODES_PER_GROUP(sb)); + if (ext4_has_group_desc_csum(sb)) + ext4_itable_unused_set(sb, gdp, + EXT4_INODES_PER_GROUP(sb)); gdp->bg_flags = cpu_to_le16(*bg_flags); ext4_group_desc_csum_set(sb, group, gdp); -- cgit v1.2.3-70-g09d2 From 1c6bd7173d66b3dfdefcedb38cabc1fb03997509 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Thu, 13 Sep 2012 10:19:24 -0400 Subject: ext4: convert file system to meta_bg if needed during resizing If we have run out of reserved gdt blocks, then clear the resize_inode feature and enable the meta_bg feature, so that we can continue resizing the file system seamlessly. Signed-off-by: "Theodore Ts'o" --- fs/ext4/resize.c | 150 ++++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 133 insertions(+), 17 deletions(-) (limited to 'fs/ext4/resize.c') diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index a5be589c85b..5932ab5ca53 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -1756,6 +1756,99 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, return err; } /* ext4_group_extend */ + +static int num_desc_blocks(struct super_block *sb, ext4_group_t groups) +{ + return (groups + EXT4_DESC_PER_BLOCK(sb) - 1) / EXT4_DESC_PER_BLOCK(sb); +} + +/* + * Release the resize inode and drop the resize_inode feature if there + * are no more reserved gdt blocks, and then convert the file system + * to enable meta_bg + */ +static int ext4_convert_meta_bg(struct super_block *sb, struct inode *inode) +{ + handle_t *handle; + struct ext4_sb_info *sbi = EXT4_SB(sb); + struct ext4_super_block *es = sbi->s_es; + struct ext4_inode_info *ei = 0; + ext4_fsblk_t nr; + int i, ret, err = 0; + int credits = 1; + + ext4_msg(sb, KERN_INFO, "Converting file system to meta_bg"); + if (EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_RESIZE_INODE)) { + if (es->s_reserved_gdt_blocks) { + ext4_error(sb, "Unexpected non-zero " + "s_reserved_gdt_blocks"); + return -EPERM; + } + if (!inode) { + ext4_error(sb, "Unexpected NULL resize_inode"); + return -EPERM; + } + ei = EXT4_I(inode); + + /* Do a quick sanity check of the resize inode */ + if (inode->i_blocks != 1 << (inode->i_blkbits - 9)) + goto invalid_resize_inode; + for (i = 0; i < EXT4_N_BLOCKS; i++) { + if (i == EXT4_DIND_BLOCK) { + if (ei->i_data[i]) + continue; + else + goto invalid_resize_inode; + } + if (ei->i_data[i]) + goto invalid_resize_inode; + } + credits += 3; /* block bitmap, bg descriptor, resize inode */ + } + + handle = ext4_journal_start_sb(sb, credits); + if (IS_ERR(handle)) + return PTR_ERR(handle); + + err = ext4_journal_get_write_access(handle, sbi->s_sbh); + if (err) + goto errout; + + EXT4_CLEAR_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_RESIZE_INODE); + EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG); + sbi->s_es->s_first_meta_bg = + cpu_to_le32(num_desc_blocks(sb, sbi->s_groups_count)); + + err = ext4_handle_dirty_super(handle, sb); + if (err) { + ext4_std_error(sb, err); + goto errout; + } + + if (inode) { + nr = le32_to_cpu(ei->i_data[EXT4_DIND_BLOCK]); + ext4_free_blocks(handle, inode, NULL, nr, 1, + EXT4_FREE_BLOCKS_METADATA | + EXT4_FREE_BLOCKS_FORGET); + ei->i_data[EXT4_DIND_BLOCK] = 0; + inode->i_blocks = 0; + + err = ext4_mark_inode_dirty(handle, inode); + if (err) + ext4_std_error(sb, err); + } + +errout: + ret = ext4_journal_stop(handle); + if (!err) + err = ret; + return ret; + +invalid_resize_inode: + ext4_error(sb, "corrupted/inconsistent resize inode"); + return -EINVAL; +} + /* * ext4_resize_fs() resizes a fs to new size specified by @n_blocks_count * @@ -1772,13 +1865,14 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count) ext4_grpblk_t add, offset; unsigned long n_desc_blocks; unsigned long o_desc_blocks; - unsigned long desc_blocks; ext4_group_t o_group; ext4_group_t n_group; ext4_fsblk_t o_blocks_count; + ext4_fsblk_t n_blocks_count_retry = 0; int err = 0, flexbg_size = 1 << sbi->s_log_groups_per_flex; int meta_bg; +retry: o_blocks_count = ext4_blocks_count(es); if (test_opt(sb, DEBUG)) @@ -1798,11 +1892,8 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count) ext4_get_group_no_and_offset(sb, n_blocks_count - 1, &n_group, &offset); ext4_get_group_no_and_offset(sb, o_blocks_count - 1, &o_group, &offset); - n_desc_blocks = (n_group + EXT4_DESC_PER_BLOCK(sb)) / - EXT4_DESC_PER_BLOCK(sb); - o_desc_blocks = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) / - EXT4_DESC_PER_BLOCK(sb); - desc_blocks = n_desc_blocks - o_desc_blocks; + n_desc_blocks = num_desc_blocks(sb, n_group + 1); + o_desc_blocks = num_desc_blocks(sb, sbi->s_groups_count); meta_bg = EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG); @@ -1812,20 +1903,37 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count) "simultaneously"); return -EINVAL; } - if (le16_to_cpu(es->s_reserved_gdt_blocks) < desc_blocks) { - ext4_warning(sb, - "No reserved GDT blocks, can't resize"); - return -EPERM; + if (n_desc_blocks > o_desc_blocks + + le16_to_cpu(es->s_reserved_gdt_blocks)) { + n_blocks_count_retry = n_blocks_count; + n_desc_blocks = o_desc_blocks + + le16_to_cpu(es->s_reserved_gdt_blocks); + n_group = n_desc_blocks * EXT4_DESC_PER_BLOCK(sb); + n_blocks_count = n_group * EXT4_BLOCKS_PER_GROUP(sb); + n_group--; /* set to last group number */ } - resize_inode = ext4_iget(sb, EXT4_RESIZE_INO); + + if (!resize_inode) + resize_inode = ext4_iget(sb, EXT4_RESIZE_INO); if (IS_ERR(resize_inode)) { ext4_warning(sb, "Error opening resize inode"); return PTR_ERR(resize_inode); } - } else if (!meta_bg) { - ext4_warning(sb, "File system features do not permit " - "online resize"); - return -EPERM; + } + + if ((!resize_inode && !meta_bg) || n_group == o_group) { + err = ext4_convert_meta_bg(sb, resize_inode); + if (err) + goto out; + if (resize_inode) { + iput(resize_inode); + resize_inode = NULL; + } + if (n_blocks_count_retry) { + n_blocks_count = n_blocks_count_retry; + n_blocks_count_retry = 0; + goto retry; + } } /* See if the device is actually as big as what was requested */ @@ -1876,13 +1984,21 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count) break; } + if (!err && n_blocks_count_retry) { + n_blocks_count = n_blocks_count_retry; + n_blocks_count_retry = 0; + free_flex_gd(flex_gd); + flex_gd = NULL; + goto retry; + } + out: if (flex_gd) free_flex_gd(flex_gd); if (resize_inode != NULL) iput(resize_inode); if (test_opt(sb, DEBUG)) - ext4_msg(sb, KERN_DEBUG, "resized filesystem from %llu " - "upto %llu blocks", o_blocks_count, n_blocks_count); + ext4_msg(sb, KERN_DEBUG, "resized filesystem to %llu", + n_blocks_count); return err; } -- cgit v1.2.3-70-g09d2 From 4da4a56e4f83f52d71e2c5fa86fb1ad77be09753 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Thu, 13 Sep 2012 10:24:21 -0400 Subject: ext4: log a resize update to the console every 10 seconds For very long online resizes, a periodic update to the console log is helpful for debugging and for progress reporting. Signed-off-by: "Theodore Ts'o" --- fs/ext4/resize.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'fs/ext4/resize.c') diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 5932ab5ca53..3c9367b9beb 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -1869,6 +1869,7 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count) ext4_group_t n_group; ext4_fsblk_t o_blocks_count; ext4_fsblk_t n_blocks_count_retry = 0; + unsigned long last_update_time = 0; int err = 0, flexbg_size = 1 << sbi->s_log_groups_per_flex; int meta_bg; @@ -1977,6 +1978,13 @@ retry: */ while (ext4_setup_next_flex_gd(sb, flex_gd, n_blocks_count, flexbg_size)) { + if (jiffies - last_update_time > HZ * 10) { + if (last_update_time) + ext4_msg(sb, KERN_INFO, + "resized to %llu blocks", + ext4_blocks_count(es)); + last_update_time = jiffies; + } if (ext4_alloc_group_tables(sb, flex_gd, flexbg_size) != 0) break; err = ext4_flex_group_add(sb, resize_inode, flex_gd); -- cgit v1.2.3-70-g09d2 From 59e31c156a24d483bbd2ea07d4dc96043a55b6bc Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 19 Sep 2012 00:55:56 -0400 Subject: ext4: fix online resizing when the # of block groups is constant Commit 1c6bd7173d66b3 introduced a regression where an online resize operation which did not change the number of block groups would fail, i.e: mke2fs -t /dev/vdc 60000 mount /dev/vdc resize2fs /dev/vdc 60001 This was due to a bug in the logic regarding when to try converting the filesystem to use meta_bg. Also fix up a number of other minor issues with the online resizing code: (a) Fix a sparse warning; (b) only check to make sure the device is large enough once, instead of multiple times through the resize loop. Signed-off-by: "Theodore Ts'o" --- fs/ext4/resize.c | 36 ++++++++++++++---------------------- 1 file changed, 14 insertions(+), 22 deletions(-) (limited to 'fs/ext4/resize.c') diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 3c9367b9beb..ee985ca0510 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -1772,23 +1772,18 @@ static int ext4_convert_meta_bg(struct super_block *sb, struct inode *inode) handle_t *handle; struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_super_block *es = sbi->s_es; - struct ext4_inode_info *ei = 0; + struct ext4_inode_info *ei = EXT4_I(inode); ext4_fsblk_t nr; int i, ret, err = 0; int credits = 1; ext4_msg(sb, KERN_INFO, "Converting file system to meta_bg"); - if (EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_RESIZE_INODE)) { + if (inode) { if (es->s_reserved_gdt_blocks) { ext4_error(sb, "Unexpected non-zero " "s_reserved_gdt_blocks"); return -EPERM; } - if (!inode) { - ext4_error(sb, "Unexpected NULL resize_inode"); - return -EPERM; - } - ei = EXT4_I(inode); /* Do a quick sanity check of the resize inode */ if (inode->i_blocks != 1 << (inode->i_blkbits - 9)) @@ -1873,12 +1868,19 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count) int err = 0, flexbg_size = 1 << sbi->s_log_groups_per_flex; int meta_bg; + /* See if the device is actually as big as what was requested */ + bh = sb_bread(sb, n_blocks_count - 1); + if (!bh) { + ext4_warning(sb, "can't read last block, resize aborted"); + return -ENOSPC; + } + brelse(bh); + retry: o_blocks_count = ext4_blocks_count(es); - if (test_opt(sb, DEBUG)) - ext4_msg(sb, KERN_DEBUG, "resizing filesystem from %llu " - "to %llu blocks", o_blocks_count, n_blocks_count); + ext4_msg(sb, KERN_INFO, "resizing filesystem from %llu " + "to %llu blocks", o_blocks_count, n_blocks_count); if (n_blocks_count < o_blocks_count) { /* On-line shrinking not supported */ @@ -1922,7 +1924,7 @@ retry: } } - if ((!resize_inode && !meta_bg) || n_group == o_group) { + if ((!resize_inode && !meta_bg) || n_blocks_count == o_blocks_count) { err = ext4_convert_meta_bg(sb, resize_inode); if (err) goto out; @@ -1937,14 +1939,6 @@ retry: } } - /* See if the device is actually as big as what was requested */ - bh = sb_bread(sb, n_blocks_count - 1); - if (!bh) { - ext4_warning(sb, "can't read last block, resize aborted"); - return -ENOSPC; - } - brelse(bh); - /* extend the last group */ if (n_group == o_group) add = n_blocks_count - o_blocks_count; @@ -2005,8 +1999,6 @@ out: free_flex_gd(flex_gd); if (resize_inode != NULL) iput(resize_inode); - if (test_opt(sb, DEBUG)) - ext4_msg(sb, KERN_DEBUG, "resized filesystem to %llu", - n_blocks_count); + ext4_msg(sb, KERN_INFO, "resized filesystem to %llu", n_blocks_count); return err; } -- cgit v1.2.3-70-g09d2 From bef53b01faeb791e27605cba1a71ba21364cb23e Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Thu, 20 Sep 2012 11:35:38 -0400 Subject: ext4: remove erroneous ext4_superblock_csum_set() in update_backups() The update_backups() function is used to backup all the metadata blocks, so we should not take it for granted that 'data' is pointed to a super block and use ext4_superblock_csum_set to calculate the checksum there. In case where the data is a group descriptor block, it will corrupt the last group descriptor, and then e2fsck will complain about it it. As all the metadata checksums should already be OK when we do the backup, remove the wrong ext4_superblock_csum_set and it should be just fine. Reported-by: "Theodore Ts'o" Signed-off-by: Tao Ma Signed-off-by: "Theodore Ts'o" Cc: stable@vger.kernel.org --- fs/ext4/resize.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'fs/ext4/resize.c') diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index ee985ca0510..9f821ce3980 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -1049,8 +1049,6 @@ static void update_backups(struct super_block *sb, int blk_off, char *data, goto exit_err; } - ext4_superblock_csum_set(sb, (struct ext4_super_block *)data); - if (meta_bg == 0) { group = ext4_list_backups(sb, &three, &five, &seven); last = sbi->s_groups_count; -- cgit v1.2.3-70-g09d2 From 7f1468d1d50d368097ab13596dc08eaba7eace7f Mon Sep 17 00:00:00 2001 From: Dmitry Monakhov Date: Tue, 25 Sep 2012 23:19:25 -0400 Subject: ext4: fix double unlock buffer mess during fs-resize bh_submit_read() is responsible for unlock bh on endio. In addition, we need to use bh_uptodate_or_lock() to avoid races. Signed-off-by: Dmitry Monakhov Signed-off-by: "Theodore Ts'o" --- fs/ext4/resize.c | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) (limited to 'fs/ext4/resize.c') diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 9f821ce3980..f21fdbf5c75 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -1181,17 +1181,12 @@ static struct buffer_head *ext4_get_bitmap(struct super_block *sb, __u64 block) struct buffer_head *bh = sb_getblk(sb, block); if (!bh) return NULL; - - if (bitmap_uptodate(bh)) - return bh; - - lock_buffer(bh); - if (bh_submit_read(bh) < 0) { - unlock_buffer(bh); - brelse(bh); - return NULL; + if (!bh_uptodate_or_lock(bh)) { + if (bh_submit_read(bh) < 0) { + brelse(bh); + return NULL; + } } - unlock_buffer(bh); return bh; } -- cgit v1.2.3-70-g09d2 From 0acdb8876fead922c9ffa6768c5675a37485c48c Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Wed, 26 Sep 2012 00:08:57 -0400 Subject: ext4: don't call update_backups() multiple times for the same bg When performing an online resize, we add a bunch of groups at one time in ext4_flex_group_add, so in most cases a lot of group descriptors will be in the same group block. But in the end of this function, update_backups will be called for every group descriptor and the same block will be copied and journalled again and again. It is really a waste. Fix things so we only update a particular bg descriptor block once and skip subsequent updates of the same block. Signed-off-by: Tao Ma Signed-off-by: "Theodore Ts'o" --- fs/ext4/resize.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs/ext4/resize.c') diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index f21fdbf5c75..7a75e108696 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -1460,6 +1460,7 @@ exit_journal: EXT4_DESC_PER_BLOCK(sb)); int meta_bg = EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG); + sector_t old_gdb = 0; update_backups(sb, sbi->s_sbh->b_blocknr, (char *)es, sizeof(struct ext4_super_block), 0); @@ -1467,8 +1468,11 @@ exit_journal: struct buffer_head *gdb_bh; gdb_bh = sbi->s_group_desc[gdb_num]; + if (old_gdb == gdb_bh->b_blocknr) + continue; update_backups(sb, gdb_bh->b_blocknr, gdb_bh->b_data, gdb_bh->b_size, meta_bg); + old_gdb = gdb_bh->b_blocknr; } } exit: -- cgit v1.2.3-70-g09d2