From 0390131ba84fd3f726f9e24fc4553828125700bb Mon Sep 17 00:00:00 2001 From: Frank Mayhar Date: Wed, 7 Jan 2009 00:06:22 -0500 Subject: ext4: Allow ext4 to run without a journal A few weeks ago I posted a patch for discussion that allowed ext4 to run without a journal. Since that time I've integrated the excellent comments from Andreas and fixed several serious bugs. We're currently running with this patch and generating some performance numbers against both ext2 (with backported reservations code) and ext4 with and without a journal. It just so happens that running without a journal is slightly faster for most everything. We did iozone -T -t 4 s 2g -r 256k -T -I -i0 -i1 -i2 which creates 4 threads, each of which create and do reads and writes on a 2G file, with a buffer size of 256K, using O_DIRECT for all file opens to bypass the page cache. Results: ext2 ext4, default ext4, no journal initial writes 13.0 MB/s 15.4 MB/s 15.7 MB/s rewrites 13.1 MB/s 15.6 MB/s 15.9 MB/s reads 15.2 MB/s 16.9 MB/s 17.2 MB/s re-reads 15.3 MB/s 16.9 MB/s 17.2 MB/s random readers 5.6 MB/s 5.6 MB/s 5.7 MB/s random writers 5.1 MB/s 5.3 MB/s 5.4 MB/s So it seems that, so far, this was a useful exercise. Signed-off-by: Frank Mayhar Signed-off-by: "Theodore Ts'o" --- fs/ext4/ialloc.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) (limited to 'fs/ext4/ialloc.c') diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 6e6052879aa..9dd21b75f4b 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -253,12 +253,12 @@ void ext4_free_inode(handle_t *handle, struct inode *inode) spin_unlock(sb_bgl_lock(sbi, flex_group)); } } - BUFFER_TRACE(bh2, "call ext4_journal_dirty_metadata"); - err = ext4_journal_dirty_metadata(handle, bh2); + BUFFER_TRACE(bh2, "call ext4_handle_dirty_metadata"); + err = ext4_handle_dirty_metadata(handle, NULL, bh2); if (!fatal) fatal = err; } - BUFFER_TRACE(bitmap_bh, "call ext4_journal_dirty_metadata"); - err = ext4_journal_dirty_metadata(handle, bitmap_bh); + BUFFER_TRACE(bitmap_bh, "call ext4_handle_dirty_metadata"); + err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); if (!fatal) fatal = err; sb->s_dirt = 1; @@ -656,15 +656,16 @@ repeat_in_this_group: ino, bitmap_bh->b_data)) { /* we won it */ BUFFER_TRACE(bitmap_bh, - "call ext4_journal_dirty_metadata"); - err = ext4_journal_dirty_metadata(handle, + "call ext4_handle_dirty_metadata"); + err = ext4_handle_dirty_metadata(handle, + inode, bitmap_bh); if (err) goto fail; goto got; } /* we lost it */ - jbd2_journal_release_buffer(handle, bitmap_bh); + ext4_handle_release_buffer(handle, bitmap_bh); if (++ino < EXT4_INODES_PER_GROUP(sb)) goto repeat_in_this_group; @@ -726,7 +727,8 @@ got: /* Don't need to dirty bitmap block if we didn't change it */ if (free) { BUFFER_TRACE(block_bh, "dirty block bitmap"); - err = ext4_journal_dirty_metadata(handle, block_bh); + err = ext4_handle_dirty_metadata(handle, + NULL, block_bh); } brelse(block_bh); @@ -771,8 +773,8 @@ got: } gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp); spin_unlock(sb_bgl_lock(sbi, group)); - BUFFER_TRACE(bh2, "call ext4_journal_dirty_metadata"); - err = ext4_journal_dirty_metadata(handle, bh2); + BUFFER_TRACE(bh2, "call ext4_handle_dirty_metadata"); + err = ext4_handle_dirty_metadata(handle, NULL, bh2); if (err) goto fail; percpu_counter_dec(&sbi->s_freeinodes_counter); @@ -825,7 +827,7 @@ got: ext4_set_inode_flags(inode); if (IS_DIRSYNC(inode)) - handle->h_sync = 1; + ext4_handle_sync(handle); if (insert_inode_locked(inode) < 0) { err = -EINVAL; goto fail_drop; @@ -1028,4 +1030,3 @@ unsigned long ext4_count_dirs(struct super_block * sb) } return count; } - -- cgit v1.2.3-70-g09d2 From fde4d95ad8711c84a36735a17136c45b19746af9 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 5 Jan 2009 22:17:35 -0500 Subject: ext4: remove extraneous newlines from calls to ext4_error() and ext4_warning() This removes annoying blank syslog entries emitted by ext4_error() or ext4_warning(), since these functions add their own newline. Signed-off-by: Nick Warne Signed-off-by: "Theodore Ts'o" --- fs/ext4/balloc.c | 2 +- fs/ext4/ialloc.c | 2 +- fs/ext4/mballoc.c | 24 ++++++++++++------------ fs/ext4/resize.c | 7 +++---- fs/ext4/super.c | 4 ++-- 5 files changed, 19 insertions(+), 20 deletions(-) (limited to 'fs/ext4/ialloc.c') diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 31ebeb5e7b0..0cb1c4572f5 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -100,7 +100,7 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, * essentially implementing a per-group read-only flag. */ if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) { ext4_error(sb, __func__, - "Checksum bad for group %lu\n", block_group); + "Checksum bad for group %lu", block_group); gdp->bg_free_blocks_count = 0; gdp->bg_free_inodes_count = 0; gdp->bg_itable_unused = 0; diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 9dd21b75f4b..4794d2ce613 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -74,7 +74,7 @@ unsigned ext4_init_inode_bitmap(struct super_block *sb, struct buffer_head *bh, /* If checksum is bad mark all blocks and inodes use to prevent * allocation, essentially implementing a per-group read-only flag. */ if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) { - ext4_error(sb, __func__, "Checksum bad for group %lu\n", + ext4_error(sb, __func__, "Checksum bad for group %lu", block_group); gdp->bg_free_blocks_count = 0; gdp->bg_free_inodes_count = 0; diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index edb512b2ec4..48d606cd740 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -447,7 +447,7 @@ static void mb_free_blocks_double(struct inode *inode, struct ext4_buddy *e4b, le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); ext4_error(sb, __func__, "double-free of inode" - " %lu's block %llu(bit %u in group %lu)\n", + " %lu's block %llu(bit %u in group %lu)", inode ? inode->i_ino : 0, blocknr, first + i, e4b->bd_group); } @@ -691,7 +691,7 @@ static void ext4_mb_generate_buddy(struct super_block *sb, if (free != grp->bb_free) { ext4_error(sb, __func__, - "EXT4-fs: group %lu: %u blocks in bitmap, %u in gd\n", + "EXT4-fs: group %lu: %u blocks in bitmap, %u in gd", group, free, grp->bb_free); /* * If we intent to continue, we consider group descritor @@ -1096,7 +1096,7 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b, le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); ext4_unlock_group(sb, e4b->bd_group); ext4_error(sb, __func__, "double-free of inode" - " %lu's block %llu(bit %u in group %lu)\n", + " %lu's block %llu(bit %u in group %lu)", inode ? inode->i_ino : 0, blocknr, block, e4b->bd_group); ext4_lock_group(sb, e4b->bd_group); @@ -1576,7 +1576,7 @@ static void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac, * we have free blocks */ ext4_error(sb, __func__, "%d free blocks as per " - "group info. But bitmap says 0\n", + "group info. But bitmap says 0", free); break; } @@ -1585,7 +1585,7 @@ static void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac, BUG_ON(ex.fe_len <= 0); if (free < ex.fe_len) { ext4_error(sb, __func__, "%d free blocks as per " - "group info. But got %d blocks\n", + "group info. But got %d blocks", free, ex.fe_len); /* * The number of free blocks differs. This mostly @@ -3629,7 +3629,7 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh, pa, (unsigned long) pa->pa_lstart, (unsigned long) pa->pa_pstart, (unsigned long) pa->pa_len); - ext4_error(sb, __func__, "free %u, pa_free %u\n", + ext4_error(sb, __func__, "free %u, pa_free %u", free, pa->pa_free); /* * pa is already deleted so we use the value obtained @@ -3703,14 +3703,14 @@ ext4_mb_discard_group_preallocations(struct super_block *sb, bitmap_bh = ext4_read_block_bitmap(sb, group); if (bitmap_bh == NULL) { ext4_error(sb, __func__, "Error in reading block " - "bitmap for %lu\n", group); + "bitmap for %lu", group); return 0; } err = ext4_mb_load_buddy(sb, group, &e4b); if (err) { ext4_error(sb, __func__, "Error in loading buddy " - "information for %lu\n", group); + "information for %lu", group); put_bh(bitmap_bh); return 0; } @@ -3877,14 +3877,14 @@ repeat: err = ext4_mb_load_buddy(sb, group, &e4b); if (err) { ext4_error(sb, __func__, "Error in loading buddy " - "information for %lu\n", group); + "information for %lu", group); continue; } bitmap_bh = ext4_read_block_bitmap(sb, group); if (bitmap_bh == NULL) { ext4_error(sb, __func__, "Error in reading block " - "bitmap for %lu\n", group); + "bitmap for %lu", group); ext4_mb_release_desc(&e4b); continue; } @@ -4149,7 +4149,7 @@ ext4_mb_discard_lg_preallocations(struct super_block *sb, ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, NULL); if (ext4_mb_load_buddy(sb, group, &e4b)) { ext4_error(sb, __func__, "Error in loading buddy " - "information for %lu\n", group); + "information for %lu", group); continue; } ext4_lock_group(sb, group); @@ -4446,7 +4446,7 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b, else { ext4_unlock_group(sb, group); ext4_error(sb, __func__, - "Double free of blocks %d (%d %d)\n", + "Double free of blocks %d (%d %d)", block, entry->start_blk, entry->count); return 0; } diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 1665aa131d1..41133811744 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -762,13 +762,13 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) if (ext4_blocks_count(es) + input->blocks_count < ext4_blocks_count(es)) { - ext4_warning(sb, __func__, "blocks_count overflow\n"); + ext4_warning(sb, __func__, "blocks_count overflow"); return -EINVAL; } if (le32_to_cpu(es->s_inodes_count) + EXT4_INODES_PER_GROUP(sb) < le32_to_cpu(es->s_inodes_count)) { - ext4_warning(sb, __func__, "inodes_count overflow\n"); + ext4_warning(sb, __func__, "inodes_count overflow"); return -EINVAL; } @@ -999,8 +999,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, " too large to resize to %llu blocks safely\n", sb->s_id, n_blocks_count); if (sizeof(sector_t) < 8) - ext4_warning(sb, __func__, - "CONFIG_LBD not enabled\n"); + ext4_warning(sb, __func__, "CONFIG_LBD not enabled"); return -EINVAL; } diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 9b9076d9c4f..dc27d4c613c 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1309,7 +1309,7 @@ set_qf_format: EXT4_FEATURE_INCOMPAT_EXTENTS)) { ext4_warning(sb, __func__, "extents feature not enabled " - "on this filesystem, use tune2fs\n"); + "on this filesystem, use tune2fs"); return 0; } set_opt(sbi->s_mount_opt, EXTENTS); @@ -1993,7 +1993,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) else ext4_warning(sb, __func__, "extents feature not enabled on this filesystem, " - "use tune2fs.\n"); + "use tune2fs."); /* * enable delayed allocation by default -- cgit v1.2.3-70-g09d2 From a9df9a49102f3578909cba7bd33784eb3b9caaa4 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 5 Jan 2009 22:18:16 -0500 Subject: ext4: Make ext4_group_t be an unsigned int Nearly all places in the ext3/4 code which uses "unsigned long" is probably a bug, since on 32-bit systems a ulong a 32-bits, which means we are wasting stack space on 64-bit systems. Signed-off-by: "Theodore Ts'o" --- fs/ext4/balloc.c | 10 +++++----- fs/ext4/ext4.h | 2 +- fs/ext4/ext4_i.h | 2 +- fs/ext4/ialloc.c | 8 ++++---- fs/ext4/mballoc.c | 58 +++++++++++++++++++++++++++---------------------------- fs/ext4/resize.c | 4 ++-- fs/ext4/super.c | 14 +++++++------- 7 files changed, 49 insertions(+), 49 deletions(-) (limited to 'fs/ext4/ialloc.c') diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 0cb1c4572f5..a711898923f 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -100,7 +100,7 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, * essentially implementing a per-group read-only flag. */ if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) { ext4_error(sb, __func__, - "Checksum bad for group %lu", block_group); + "Checksum bad for group %u", block_group); gdp->bg_free_blocks_count = 0; gdp->bg_free_inodes_count = 0; gdp->bg_itable_unused = 0; @@ -213,7 +213,7 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb, if (block_group >= sbi->s_groups_count) { ext4_error(sb, "ext4_get_group_desc", "block_group >= groups_count - " - "block_group = %lu, groups_count = %lu", + "block_group = %u, groups_count = %u", block_group, sbi->s_groups_count); return NULL; @@ -225,7 +225,7 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb, if (!sbi->s_group_desc[group_desc]) { ext4_error(sb, "ext4_get_group_desc", "Group descriptor not loaded - " - "block_group = %lu, group_desc = %lu, desc = %lu", + "block_group = %u, group_desc = %lu, desc = %lu", block_group, group_desc, offset); return NULL; } @@ -315,7 +315,7 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group) if (unlikely(!bh)) { ext4_error(sb, __func__, "Cannot read block bitmap - " - "block_group = %lu, block_bitmap = %llu", + "block_group = %u, block_bitmap = %llu", block_group, bitmap_blk); return NULL; } @@ -337,7 +337,7 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group) put_bh(bh); ext4_error(sb, __func__, "Cannot read block bitmap - " - "block_group = %lu, block_bitmap = %llu", + "block_group = %u, block_bitmap = %llu", block_group, bitmap_blk); return NULL; } diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 9ba9fd6d14d..e9aacecfbf4 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -965,7 +965,7 @@ ext4_group_first_block_no(struct super_block *sb, ext4_group_t group_no) #define ERR_BAD_DX_DIR -75000 void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr, - unsigned long *blockgrpp, ext4_grpblk_t *offsetp); + ext4_group_t *blockgrpp, ext4_grpblk_t *offsetp); extern struct proc_dir_entry *ext4_proc_root; diff --git a/fs/ext4/ext4_i.h b/fs/ext4/ext4_i.h index acc0b726d8a..0a9ebe58019 100644 --- a/fs/ext4/ext4_i.h +++ b/fs/ext4/ext4_i.h @@ -31,7 +31,7 @@ typedef unsigned long long ext4_fsblk_t; typedef __u32 ext4_lblk_t; /* data type for block group number */ -typedef unsigned long ext4_group_t; +typedef unsigned int ext4_group_t; #define rsv_start rsv_window._rsv_start #define rsv_end rsv_window._rsv_end diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 4794d2ce613..cac3617ec78 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -74,7 +74,7 @@ unsigned ext4_init_inode_bitmap(struct super_block *sb, struct buffer_head *bh, /* If checksum is bad mark all blocks and inodes use to prevent * allocation, essentially implementing a per-group read-only flag. */ if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) { - ext4_error(sb, __func__, "Checksum bad for group %lu", + ext4_error(sb, __func__, "Checksum bad for group %u", block_group); gdp->bg_free_blocks_count = 0; gdp->bg_free_inodes_count = 0; @@ -111,7 +111,7 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) if (unlikely(!bh)) { ext4_error(sb, __func__, "Cannot read inode bitmap - " - "block_group = %lu, inode_bitmap = %llu", + "block_group = %u, inode_bitmap = %llu", block_group, bitmap_blk); return NULL; } @@ -133,7 +133,7 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) put_bh(bh); ext4_error(sb, __func__, "Cannot read inode bitmap - " - "block_group = %lu, inode_bitmap = %llu", + "block_group = %u, inode_bitmap = %llu", block_group, bitmap_blk); return NULL; } @@ -690,7 +690,7 @@ got: ino > EXT4_INODES_PER_GROUP(sb)) { ext4_error(sb, __func__, "reserved inode or inode > inodes count - " - "block_group = %lu, inode=%lu", group, + "block_group = %u, inode=%lu", group, ino + group * EXT4_INODES_PER_GROUP(sb)); err = -EIO; goto fail; diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 6dea637b020..6cfe68a7e07 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -447,7 +447,7 @@ static void mb_free_blocks_double(struct inode *inode, struct ext4_buddy *e4b, le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); ext4_error(sb, __func__, "double-free of inode" - " %lu's block %llu(bit %u in group %lu)", + " %lu's block %llu(bit %u in group %u)", inode ? inode->i_ino : 0, blocknr, first + i, e4b->bd_group); } @@ -477,7 +477,7 @@ static void mb_cmp_bitmaps(struct ext4_buddy *e4b, void *bitmap) b2 = (unsigned char *) bitmap; for (i = 0; i < e4b->bd_sb->s_blocksize; i++) { if (b1[i] != b2[i]) { - printk(KERN_ERR "corruption in group %lu " + printk(KERN_ERR "corruption in group %u " "at byte %u(%u): %x in copy != %x " "on disk/prealloc\n", e4b->bd_group, i, i * 8, b1[i], b2[i]); @@ -691,7 +691,7 @@ static void ext4_mb_generate_buddy(struct super_block *sb, if (free != grp->bb_free) { ext4_error(sb, __func__, - "EXT4-fs: group %lu: %u blocks in bitmap, %u in gd", + "EXT4-fs: group %u: %u blocks in bitmap, %u in gd", group, free, grp->bb_free); /* * If we intent to continue, we consider group descritor @@ -800,7 +800,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore) get_bh(bh[i]); bh[i]->b_end_io = end_buffer_read_sync; submit_bh(READ, bh[i]); - mb_debug("read bitmap for group %lu\n", first_group + i); + mb_debug("read bitmap for group %u\n", first_group + i); } /* wait for I/O completion */ @@ -895,7 +895,7 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group, struct page *page; int ret; - mb_debug("load group %lu\n", group); + mb_debug("load group %u\n", group); blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize; @@ -1096,7 +1096,7 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b, le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); ext4_unlock_group(sb, e4b->bd_group); ext4_error(sb, __func__, "double-free of inode" - " %lu's block %llu(bit %u in group %lu)", + " %lu's block %llu(bit %u in group %u)", inode ? inode->i_ino : 0, blocknr, block, e4b->bd_group); ext4_lock_group(sb, e4b->bd_group); @@ -1934,13 +1934,13 @@ static int ext4_mb_seq_history_show(struct seq_file *seq, void *v) if (hs->op == EXT4_MB_HISTORY_ALLOC) { fmt = "%-5u %-8u %-23s %-23s %-23s %-5u %-5u %-2u " "%-5u %-5s %-5u %-6u\n"; - sprintf(buf2, "%lu/%d/%u@%u", hs->result.fe_group, + sprintf(buf2, "%u/%d/%u@%u", hs->result.fe_group, hs->result.fe_start, hs->result.fe_len, hs->result.fe_logical); - sprintf(buf, "%lu/%d/%u@%u", hs->orig.fe_group, + sprintf(buf, "%u/%d/%u@%u", hs->orig.fe_group, hs->orig.fe_start, hs->orig.fe_len, hs->orig.fe_logical); - sprintf(buf3, "%lu/%d/%u@%u", hs->goal.fe_group, + sprintf(buf3, "%u/%d/%u@%u", hs->goal.fe_group, hs->goal.fe_start, hs->goal.fe_len, hs->goal.fe_logical); seq_printf(seq, fmt, hs->pid, hs->ino, buf, buf3, buf2, @@ -1949,20 +1949,20 @@ static int ext4_mb_seq_history_show(struct seq_file *seq, void *v) hs->buddy ? 1 << hs->buddy : 0); } else if (hs->op == EXT4_MB_HISTORY_PREALLOC) { fmt = "%-5u %-8u %-23s %-23s %-23s\n"; - sprintf(buf2, "%lu/%d/%u@%u", hs->result.fe_group, + sprintf(buf2, "%u/%d/%u@%u", hs->result.fe_group, hs->result.fe_start, hs->result.fe_len, hs->result.fe_logical); - sprintf(buf, "%lu/%d/%u@%u", hs->orig.fe_group, + sprintf(buf, "%u/%d/%u@%u", hs->orig.fe_group, hs->orig.fe_start, hs->orig.fe_len, hs->orig.fe_logical); seq_printf(seq, fmt, hs->pid, hs->ino, buf, "", buf2); } else if (hs->op == EXT4_MB_HISTORY_DISCARD) { - sprintf(buf2, "%lu/%d/%u", hs->result.fe_group, + sprintf(buf2, "%u/%d/%u", hs->result.fe_group, hs->result.fe_start, hs->result.fe_len); seq_printf(seq, "%-5u %-8u %-23s discard\n", hs->pid, hs->ino, buf2); } else if (hs->op == EXT4_MB_HISTORY_FREE) { - sprintf(buf2, "%lu/%d/%u", hs->result.fe_group, + sprintf(buf2, "%u/%d/%u", hs->result.fe_group, hs->result.fe_start, hs->result.fe_len); seq_printf(seq, "%-5u %-8u %-23s free\n", hs->pid, hs->ino, buf2); @@ -2075,7 +2075,7 @@ static void *ext4_mb_seq_groups_start(struct seq_file *seq, loff_t *pos) return NULL; group = *pos + 1; - return (void *) group; + return (void *) ((unsigned long) group); } static void *ext4_mb_seq_groups_next(struct seq_file *seq, void *v, loff_t *pos) @@ -2088,13 +2088,13 @@ static void *ext4_mb_seq_groups_next(struct seq_file *seq, void *v, loff_t *pos) if (*pos < 0 || *pos >= sbi->s_groups_count) return NULL; group = *pos + 1; - return (void *) group;; + return (void *) ((unsigned long) group); } static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v) { struct super_block *sb = seq->private; - long group = (long) v; + ext4_group_t group = (ext4_group_t) ((unsigned long) v); int i; int err; struct ext4_buddy e4b; @@ -2116,7 +2116,7 @@ static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v) sizeof(struct ext4_group_info); err = ext4_mb_load_buddy(sb, group, &e4b); if (err) { - seq_printf(seq, "#%-5lu: I/O error\n", group); + seq_printf(seq, "#%-5u: I/O error\n", group); return 0; } ext4_lock_group(sb, group); @@ -2124,7 +2124,7 @@ static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v) ext4_unlock_group(sb, group); ext4_mb_release_desc(&e4b); - seq_printf(seq, "#%-5lu: %-5u %-5u %-5u [", group, sg.info.bb_free, + seq_printf(seq, "#%-5u: %-5u %-5u %-5u [", group, sg.info.bb_free, sg.info.bb_fragments, sg.info.bb_first_free); for (i = 0; i <= 13; i++) seq_printf(seq, " %-5u", i <= sb->s_blocksize_bits + 1 ? @@ -2459,7 +2459,7 @@ static int ext4_mb_init_backend(struct super_block *sb) desc = ext4_get_group_desc(sb, i, NULL); if (desc == NULL) { printk(KERN_ERR - "EXT4-fs: can't read descriptor %lu\n", i); + "EXT4-fs: can't read descriptor %u\n", i); goto err_freebuddy; } if (ext4_mb_add_groupinfo(sb, i, desc) != 0) @@ -2657,7 +2657,7 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn) list_for_each_safe(l, ltmp, &txn->t_private_list) { entry = list_entry(l, struct ext4_free_data, list); - mb_debug("gonna free %u blocks in group %lu (0x%p):", + mb_debug("gonna free %u blocks in group %u (0x%p):", entry->count, entry->group, entry); err = ext4_mb_load_buddy(sb, entry->group, &e4b); @@ -2829,7 +2829,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, if (!gdp) goto out_err; - ext4_debug("using block group %lu(%d)\n", ac->ac_b_ex.fe_group, + ext4_debug("using block group %u(%d)\n", ac->ac_b_ex.fe_group, gdp->bg_free_blocks_count); err = ext4_journal_get_write_access(handle, gdp_bh); @@ -3351,7 +3351,7 @@ static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, preallocated += len; count++; } - mb_debug("prellocated %u for group %lu\n", preallocated, group); + mb_debug("prellocated %u for group %u\n", preallocated, group); } static void ext4_mb_pa_callback(struct rcu_head *head) @@ -3368,7 +3368,7 @@ static void ext4_mb_pa_callback(struct rcu_head *head) static void ext4_mb_put_pa(struct ext4_allocation_context *ac, struct super_block *sb, struct ext4_prealloc_space *pa) { - unsigned long grp; + ext4_group_t grp; if (!atomic_dec_and_test(&pa->pa_count) || pa->pa_free != 0) return; @@ -3697,7 +3697,7 @@ ext4_mb_discard_group_preallocations(struct super_block *sb, int busy = 0; int free = 0; - mb_debug("discard preallocation for group %lu\n", group); + mb_debug("discard preallocation for group %u\n", group); if (list_empty(&grp->bb_prealloc_list)) return 0; @@ -3705,14 +3705,14 @@ ext4_mb_discard_group_preallocations(struct super_block *sb, bitmap_bh = ext4_read_block_bitmap(sb, group); if (bitmap_bh == NULL) { ext4_error(sb, __func__, "Error in reading block " - "bitmap for %lu", group); + "bitmap for %u", group); return 0; } err = ext4_mb_load_buddy(sb, group, &e4b); if (err) { ext4_error(sb, __func__, "Error in loading buddy " - "information for %lu", group); + "information for %u", group); put_bh(bitmap_bh); return 0; } @@ -3879,14 +3879,14 @@ repeat: err = ext4_mb_load_buddy(sb, group, &e4b); if (err) { ext4_error(sb, __func__, "Error in loading buddy " - "information for %lu", group); + "information for %u", group); continue; } bitmap_bh = ext4_read_block_bitmap(sb, group); if (bitmap_bh == NULL) { ext4_error(sb, __func__, "Error in reading block " - "bitmap for %lu", group); + "bitmap for %u", group); ext4_mb_release_desc(&e4b); continue; } @@ -4151,7 +4151,7 @@ ext4_mb_discard_lg_preallocations(struct super_block *sb, ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, NULL); if (ext4_mb_load_buddy(sb, group, &e4b)) { ext4_error(sb, __func__, "Error in loading buddy " - "information for %lu", group); + "information for %u", group); continue; } ext4_lock_group(sb, group); diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 41133811744..1865d6a53de 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -50,7 +50,7 @@ static int verify_group_input(struct super_block *sb, ext4_get_group_no_and_offset(sb, start, NULL, &offset); if (group != sbi->s_groups_count) ext4_warning(sb, __func__, - "Cannot add at group %u (only %lu groups)", + "Cannot add at group %u (only %u groups)", input->group, sbi->s_groups_count); else if (offset != 0) ext4_warning(sb, __func__, "Last group not full"); @@ -716,7 +716,7 @@ static void update_backups(struct super_block *sb, exit_err: if (err) { ext4_warning(sb, __func__, - "can't update backup for group %lu (err %d), " + "can't update backup for group %u (err %d), " "forcing fsck on next reboot", group, err); sbi->s_mount_state &= ~EXT4_VALID_FS; sbi->s_es->s_state &= cpu_to_le16(~EXT4_VALID_FS); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index da377f9521b..8fa57be5040 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1470,7 +1470,7 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es, ext4_commit_super(sb, es, 1); if (test_opt(sb, DEBUG)) - printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%lu, " + printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, " "bpg=%lu, ipg=%lu, mo=%04lx]\n", sb->s_blocksize, sbi->s_groups_count, @@ -1514,7 +1514,7 @@ static int ext4_fill_flex_info(struct super_block *sb) sizeof(struct flex_groups), GFP_KERNEL); if (sbi->s_flex_groups == NULL) { printk(KERN_ERR "EXT4-fs: not enough memory for " - "%lu flex groups\n", flex_group_count); + "%u flex groups\n", flex_group_count); goto failed; } @@ -1599,14 +1599,14 @@ static int ext4_check_descriptors(struct super_block *sb) block_bitmap = ext4_block_bitmap(sb, gdp); if (block_bitmap < first_block || block_bitmap > last_block) { printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: " - "Block bitmap for group %lu not in group " + "Block bitmap for group %u not in group " "(block %llu)!\n", i, block_bitmap); return 0; } inode_bitmap = ext4_inode_bitmap(sb, gdp); if (inode_bitmap < first_block || inode_bitmap > last_block) { printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: " - "Inode bitmap for group %lu not in group " + "Inode bitmap for group %u not in group " "(block %llu)!\n", i, inode_bitmap); return 0; } @@ -1614,14 +1614,14 @@ static int ext4_check_descriptors(struct super_block *sb) if (inode_table < first_block || inode_table + sbi->s_itb_per_group - 1 > last_block) { printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: " - "Inode table for group %lu not in group " + "Inode table for group %u not in group " "(block %llu)!\n", i, inode_table); return 0; } spin_lock(sb_bgl_lock(sbi, i)); if (!ext4_group_desc_csum_verify(sbi, i, gdp)) { printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: " - "Checksum for group %lu failed (%u!=%u)\n", + "Checksum for group %u failed (%u!=%u)\n", i, le16_to_cpu(ext4_group_desc_csum(sbi, i, gdp)), le16_to_cpu(gdp->bg_checksum)); if (!(sb->s_flags & MS_RDONLY)) { @@ -3154,7 +3154,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) if (!ext4_group_desc_csum_verify(sbi, g, gdp)) { printk(KERN_ERR "EXT4-fs: ext4_remount: " - "Checksum for group %lu failed (%u!=%u)\n", + "Checksum for group %u failed (%u!=%u)\n", g, le16_to_cpu(ext4_group_desc_csum(sbi, g, gdp)), le16_to_cpu(gdp->bg_checksum)); err = -EINVAL; -- cgit v1.2.3-70-g09d2 From 560671a0d3c9ad2d647fa6d09375a262e1f19c4f Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Mon, 5 Jan 2009 22:20:24 -0500 Subject: ext4: Use high 16 bits of the block group descriptor's free counts fields Rename the lower bits with suffix _lo and add helper to access the values. Also rename bg_itable_unused_hi to bg_pad as in e2fsprogs. Signed-off-by: Aneesh Kumar K.V Signed-off-by: "Theodore Ts'o" --- fs/ext4/balloc.c | 13 +++++---- fs/ext4/ext4.h | 26 +++++++++++++---- fs/ext4/ialloc.c | 83 +++++++++++++++++++++++++++++-------------------------- fs/ext4/inode.c | 2 +- fs/ext4/mballoc.c | 15 +++++----- fs/ext4/resize.c | 4 +-- fs/ext4/super.c | 68 +++++++++++++++++++++++++++++++++++++++++++-- 7 files changed, 149 insertions(+), 62 deletions(-) (limited to 'fs/ext4/ialloc.c') diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 404d81cc915..902bf66c8df 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -102,9 +102,9 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) { ext4_error(sb, __func__, "Checksum bad for group %u", block_group); - gdp->bg_free_blocks_count = 0; - gdp->bg_free_inodes_count = 0; - gdp->bg_itable_unused = 0; + ext4_free_blks_set(sb, gdp, 0); + ext4_free_inodes_set(sb, gdp, 0); + ext4_itable_unused_set(sb, gdp, 0); memset(bh->b_data, 0xff, sb->s_blocksize); return 0; } @@ -372,7 +372,7 @@ void ext4_add_groupblocks(handle_t *handle, struct super_block *sb, struct ext4_group_desc *desc; struct ext4_super_block *es; struct ext4_sb_info *sbi; - int err = 0, ret; + int err = 0, ret, blk_free_count; ext4_grpblk_t blocks_freed; struct ext4_group_info *grp; @@ -444,7 +444,8 @@ void ext4_add_groupblocks(handle_t *handle, struct super_block *sb, } } spin_lock(sb_bgl_lock(sbi, block_group)); - le16_add_cpu(&desc->bg_free_blocks_count, blocks_freed); + blk_free_count = blocks_freed + ext4_free_blks_count(sb, desc); + ext4_free_blks_set(sb, desc, blk_free_count); desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc); spin_unlock(sb_bgl_lock(sbi, block_group)); percpu_counter_add(&sbi->s_freeblocks_counter, blocks_freed); @@ -685,7 +686,7 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb) gdp = ext4_get_group_desc(sb, i, NULL); if (!gdp) continue; - desc_count += le16_to_cpu(gdp->bg_free_blocks_count); + desc_count += ext4_free_blks_count(sb, gdp); } return desc_count; diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index f0b1db6acf8..ec862f4ca89 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -156,12 +156,12 @@ struct ext4_group_desc __le32 bg_block_bitmap_lo; /* Blocks bitmap block */ __le32 bg_inode_bitmap_lo; /* Inodes bitmap block */ __le32 bg_inode_table_lo; /* Inodes table block */ - __le16 bg_free_blocks_count; /* Free blocks count */ - __le16 bg_free_inodes_count; /* Free inodes count */ - __le16 bg_used_dirs_count; /* Directories count */ + __le16 bg_free_blocks_count_lo;/* Free blocks count */ + __le16 bg_free_inodes_count_lo;/* Free inodes count */ + __le16 bg_used_dirs_count_lo; /* Directories count */ __le16 bg_flags; /* EXT4_BG_flags (INODE_UNINIT, etc) */ __u32 bg_reserved[2]; /* Likely block/inode bitmap checksum */ - __le16 bg_itable_unused; /* Unused inodes count */ + __le16 bg_itable_unused_lo; /* Unused inodes count */ __le16 bg_checksum; /* crc16(sb_uuid+group+desc) */ __le32 bg_block_bitmap_hi; /* Blocks bitmap block MSB */ __le32 bg_inode_bitmap_hi; /* Inodes bitmap block MSB */ @@ -169,7 +169,7 @@ struct ext4_group_desc __le16 bg_free_blocks_count_hi;/* Free blocks count MSB */ __le16 bg_free_inodes_count_hi;/* Free inodes count MSB */ __le16 bg_used_dirs_count_hi; /* Directories count MSB */ - __le16 bg_itable_unused_hi; /* Unused inodes count MSB */ + __le16 bg_itable_unused_hi; /* Unused inodes count MSB */ __u32 bg_reserved2[3]; }; @@ -1142,12 +1142,28 @@ extern ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb, struct ext4_group_desc *bg); extern ext4_fsblk_t ext4_inode_table(struct super_block *sb, struct ext4_group_desc *bg); +extern __u32 ext4_free_blks_count(struct super_block *sb, + struct ext4_group_desc *bg); +extern __u32 ext4_free_inodes_count(struct super_block *sb, + struct ext4_group_desc *bg); +extern __u32 ext4_used_dirs_count(struct super_block *sb, + struct ext4_group_desc *bg); +extern __u32 ext4_itable_unused_count(struct super_block *sb, + struct ext4_group_desc *bg); extern void ext4_block_bitmap_set(struct super_block *sb, struct ext4_group_desc *bg, ext4_fsblk_t blk); extern void ext4_inode_bitmap_set(struct super_block *sb, struct ext4_group_desc *bg, ext4_fsblk_t blk); extern void ext4_inode_table_set(struct super_block *sb, struct ext4_group_desc *bg, ext4_fsblk_t blk); +extern void ext4_free_blks_set(struct super_block *sb, + struct ext4_group_desc *bg, __u32 count); +extern void ext4_free_inodes_set(struct super_block *sb, + struct ext4_group_desc *bg, __u32 count); +extern void ext4_used_dirs_set(struct super_block *sb, + struct ext4_group_desc *bg, __u32 count); +extern void ext4_itable_unused_set(struct super_block *sb, + struct ext4_group_desc *bg, __u32 count); static inline ext4_fsblk_t ext4_blocks_count(struct ext4_super_block *es) { diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index cac3617ec78..11c4f6f5bd6 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -76,9 +76,9 @@ unsigned ext4_init_inode_bitmap(struct super_block *sb, struct buffer_head *bh, if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) { ext4_error(sb, __func__, "Checksum bad for group %u", block_group); - gdp->bg_free_blocks_count = 0; - gdp->bg_free_inodes_count = 0; - gdp->bg_itable_unused = 0; + ext4_free_blks_set(sb, gdp, 0); + ext4_free_inodes_set(sb, gdp, 0); + ext4_itable_unused_set(sb, gdp, 0); memset(bh->b_data, 0xff, sb->s_blocksize); return 0; } @@ -168,7 +168,7 @@ void ext4_free_inode(handle_t *handle, struct inode *inode) struct ext4_group_desc *gdp; struct ext4_super_block *es; struct ext4_sb_info *sbi; - int fatal = 0, err; + int fatal = 0, err, count; ext4_group_t flex_group; if (atomic_read(&inode->i_count) > 1) { @@ -236,9 +236,12 @@ void ext4_free_inode(handle_t *handle, struct inode *inode) if (gdp) { spin_lock(sb_bgl_lock(sbi, block_group)); - le16_add_cpu(&gdp->bg_free_inodes_count, 1); - if (is_directory) - le16_add_cpu(&gdp->bg_used_dirs_count, -1); + count = ext4_free_inodes_count(sb, gdp) + 1; + ext4_free_inodes_set(sb, gdp, count); + if (is_directory) { + count = ext4_used_dirs_count(sb, gdp) - 1; + ext4_used_dirs_set(sb, gdp, count); + } gdp->bg_checksum = ext4_group_desc_csum(sbi, block_group, gdp); spin_unlock(sb_bgl_lock(sbi, block_group)); @@ -291,13 +294,13 @@ static int find_group_dir(struct super_block *sb, struct inode *parent, for (group = 0; group < ngroups; group++) { desc = ext4_get_group_desc(sb, group, NULL); - if (!desc || !desc->bg_free_inodes_count) + if (!desc || !ext4_free_inodes_count(sb, desc)) continue; - if (le16_to_cpu(desc->bg_free_inodes_count) < avefreei) + if (ext4_free_inodes_count(sb, desc) < avefreei) continue; if (!best_desc || - (le16_to_cpu(desc->bg_free_blocks_count) > - le16_to_cpu(best_desc->bg_free_blocks_count))) { + (ext4_free_blks_count(sb, desc) > + ext4_free_blks_count(sb, best_desc))) { *best_group = group; best_desc = desc; ret = 0; @@ -369,7 +372,7 @@ found_flexbg: for (i = best_flex * flex_size; i < ngroups && i < (best_flex + 1) * flex_size; i++) { desc = ext4_get_group_desc(sb, i, &bh); - if (le16_to_cpu(desc->bg_free_inodes_count)) { + if (ext4_free_inodes_count(sb, desc)) { *best_group = i; goto out; } @@ -443,17 +446,17 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent, for (i = 0; i < ngroups; i++) { grp = (parent_group + i) % ngroups; desc = ext4_get_group_desc(sb, grp, NULL); - if (!desc || !desc->bg_free_inodes_count) + if (!desc || !ext4_free_inodes_count(sb, desc)) continue; - if (le16_to_cpu(desc->bg_used_dirs_count) >= best_ndir) + if (ext4_used_dirs_count(sb, desc) >= best_ndir) continue; - if (le16_to_cpu(desc->bg_free_inodes_count) < avefreei) + if (ext4_free_inodes_count(sb, desc) < avefreei) continue; - if (le16_to_cpu(desc->bg_free_blocks_count) < avefreeb) + if (ext4_free_blks_count(sb, desc) < avefreeb) continue; *group = grp; ret = 0; - best_ndir = le16_to_cpu(desc->bg_used_dirs_count); + best_ndir = ext4_used_dirs_count(sb, desc); } if (ret == 0) return ret; @@ -479,13 +482,13 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent, for (i = 0; i < ngroups; i++) { *group = (parent_group + i) % ngroups; desc = ext4_get_group_desc(sb, *group, NULL); - if (!desc || !desc->bg_free_inodes_count) + if (!desc || !ext4_free_inodes_count(sb, desc)) continue; - if (le16_to_cpu(desc->bg_used_dirs_count) >= max_dirs) + if (ext4_used_dirs_count(sb, desc) >= max_dirs) continue; - if (le16_to_cpu(desc->bg_free_inodes_count) < min_inodes) + if (ext4_free_inodes_count(sb, desc) < min_inodes) continue; - if (le16_to_cpu(desc->bg_free_blocks_count) < min_blocks) + if (ext4_free_blks_count(sb, desc) < min_blocks) continue; return 0; } @@ -494,8 +497,8 @@ fallback: for (i = 0; i < ngroups; i++) { *group = (parent_group + i) % ngroups; desc = ext4_get_group_desc(sb, *group, NULL); - if (desc && desc->bg_free_inodes_count && - le16_to_cpu(desc->bg_free_inodes_count) >= avefreei) + if (desc && ext4_free_inodes_count(sb, desc) && + ext4_free_inodes_count(sb, desc) >= avefreei) return 0; } @@ -524,8 +527,8 @@ static int find_group_other(struct super_block *sb, struct inode *parent, */ *group = parent_group; desc = ext4_get_group_desc(sb, *group, NULL); - if (desc && le16_to_cpu(desc->bg_free_inodes_count) && - le16_to_cpu(desc->bg_free_blocks_count)) + if (desc && ext4_free_inodes_count(sb, desc) && + ext4_free_blks_count(sb, desc)) return 0; /* @@ -548,8 +551,8 @@ static int find_group_other(struct super_block *sb, struct inode *parent, if (*group >= ngroups) *group -= ngroups; desc = ext4_get_group_desc(sb, *group, NULL); - if (desc && le16_to_cpu(desc->bg_free_inodes_count) && - le16_to_cpu(desc->bg_free_blocks_count)) + if (desc && ext4_free_inodes_count(sb, desc) && + ext4_free_blks_count(sb, desc)) return 0; } @@ -562,7 +565,7 @@ static int find_group_other(struct super_block *sb, struct inode *parent, if (++*group >= ngroups) *group = 0; desc = ext4_get_group_desc(sb, *group, NULL); - if (desc && le16_to_cpu(desc->bg_free_inodes_count)) + if (desc && ext4_free_inodes_count(sb, desc)) return 0; } @@ -591,7 +594,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode) struct ext4_super_block *es; struct ext4_inode_info *ei; struct ext4_sb_info *sbi; - int ret2, err = 0; + int ret2, err = 0, count; struct inode *ret; ext4_group_t i; int free = 0; @@ -718,7 +721,7 @@ got: if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); free = ext4_free_blocks_after_init(sb, group, gdp); - gdp->bg_free_blocks_count = cpu_to_le16(free); + ext4_free_blks_set(sb, gdp, free); gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp); } @@ -753,7 +756,7 @@ got: free = 0; } else { free = EXT4_INODES_PER_GROUP(sb) - - le16_to_cpu(gdp->bg_itable_unused); + ext4_itable_unused_count(sb, gdp); } /* @@ -763,13 +766,15 @@ got: * */ if (ino > free) - gdp->bg_itable_unused = - cpu_to_le16(EXT4_INODES_PER_GROUP(sb) - ino); + ext4_itable_unused_set(sb, gdp, + (EXT4_INODES_PER_GROUP(sb) - ino)); } - le16_add_cpu(&gdp->bg_free_inodes_count, -1); + count = ext4_free_inodes_count(sb, gdp) - 1; + ext4_free_inodes_set(sb, gdp, count); if (S_ISDIR(mode)) { - le16_add_cpu(&gdp->bg_used_dirs_count, 1); + count = ext4_used_dirs_count(sb, gdp) + 1; + ext4_used_dirs_set(sb, gdp, count); } gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp); spin_unlock(sb_bgl_lock(sbi, group)); @@ -987,7 +992,7 @@ unsigned long ext4_count_free_inodes(struct super_block *sb) gdp = ext4_get_group_desc(sb, i, NULL); if (!gdp) continue; - desc_count += le16_to_cpu(gdp->bg_free_inodes_count); + desc_count += ext4_free_inodes_count(sb, gdp); brelse(bitmap_bh); bitmap_bh = ext4_read_inode_bitmap(sb, i); if (!bitmap_bh) @@ -995,7 +1000,7 @@ unsigned long ext4_count_free_inodes(struct super_block *sb) x = ext4_count_free(bitmap_bh, EXT4_INODES_PER_GROUP(sb) / 8); printk(KERN_DEBUG "group %lu: stored = %d, counted = %lu\n", - i, le16_to_cpu(gdp->bg_free_inodes_count), x); + i, ext4_free_inodes_count(sb, gdp), x); bitmap_count += x; } brelse(bitmap_bh); @@ -1009,7 +1014,7 @@ unsigned long ext4_count_free_inodes(struct super_block *sb) gdp = ext4_get_group_desc(sb, i, NULL); if (!gdp) continue; - desc_count += le16_to_cpu(gdp->bg_free_inodes_count); + desc_count += ext4_free_inodes_count(sb, gdp); cond_resched(); } return desc_count; @@ -1026,7 +1031,7 @@ unsigned long ext4_count_dirs(struct super_block * sb) struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL); if (!gdp) continue; - count += le16_to_cpu(gdp->bg_used_dirs_count); + count += ext4_used_dirs_count(sb, gdp); } return count; } diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index bcd5ffa76c0..56142accf5c 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -4014,7 +4014,7 @@ make_io: num = EXT4_INODES_PER_GROUP(sb); if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) - num -= le16_to_cpu(gdp->bg_itable_unused); + num -= ext4_itable_unused_count(sb, gdp); table += num / inodes_per_block; if (end > table) end = table; diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index d559a03f3eb..3809a9348f2 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2515,7 +2515,7 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, ext4_free_blocks_after_init(sb, group, desc); } else { meta_group_info[i]->bb_free = - le16_to_cpu(desc->bg_free_blocks_count); + ext4_free_blks_count(sb, desc); } INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list); @@ -3046,12 +3046,12 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, ac->ac_b_ex.fe_start, ac->ac_b_ex.fe_len); if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); - gdp->bg_free_blocks_count = - cpu_to_le16(ext4_free_blocks_after_init(sb, - ac->ac_b_ex.fe_group, - gdp)); + ext4_free_blks_set(sb, gdp, + ext4_free_blocks_after_init(sb, + ac->ac_b_ex.fe_group, gdp)); } - le16_add_cpu(&gdp->bg_free_blocks_count, -ac->ac_b_ex.fe_len); + len = ext4_free_blks_count(sb, gdp) - ac->ac_b_ex.fe_len; + ext4_free_blks_set(sb, gdp, len); gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp); spin_unlock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group)); percpu_counter_sub(&sbi->s_freeblocks_counter, ac->ac_b_ex.fe_len); @@ -4823,7 +4823,8 @@ do_more: } spin_lock(sb_bgl_lock(sbi, block_group)); - le16_add_cpu(&gdp->bg_free_blocks_count, count); + ret = ext4_free_blks_count(sb, gdp) + count; + ext4_free_blks_set(sb, gdp, ret); gdp->bg_checksum = ext4_group_desc_csum(sbi, block_group, gdp); spin_unlock(sb_bgl_lock(sbi, block_group)); percpu_counter_add(&sbi->s_freeblocks_counter, count); diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 92034d2c8a7..2d24f423fd8 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -866,8 +866,8 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) ext4_block_bitmap_set(sb, gdp, input->block_bitmap); /* LV FIXME */ ext4_inode_bitmap_set(sb, gdp, input->inode_bitmap); /* LV FIXME */ ext4_inode_table_set(sb, gdp, input->inode_table); /* LV FIXME */ - gdp->bg_free_blocks_count = cpu_to_le16(input->free_blocks_count); - gdp->bg_free_inodes_count = cpu_to_le16(EXT4_INODES_PER_GROUP(sb)); + ext4_free_blks_set(sb, gdp, input->free_blocks_count); + ext4_free_inodes_set(sb, gdp, EXT4_INODES_PER_GROUP(sb)); gdp->bg_flags |= cpu_to_le16(EXT4_BG_INODE_ZEROED); gdp->bg_checksum = ext4_group_desc_csum(sbi, input->group, gdp); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 2415e2b0970..a3321bf2231 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -93,6 +93,38 @@ ext4_fsblk_t ext4_inode_table(struct super_block *sb, (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0); } +__u32 ext4_free_blks_count(struct super_block *sb, + struct ext4_group_desc *bg) +{ + return le16_to_cpu(bg->bg_free_blocks_count_lo) | + (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? + (__u32)le16_to_cpu(bg->bg_free_blocks_count_hi) << 16 : 0); +} + +__u32 ext4_free_inodes_count(struct super_block *sb, + struct ext4_group_desc *bg) +{ + return le16_to_cpu(bg->bg_free_inodes_count_lo) | + (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? + (__u32)le16_to_cpu(bg->bg_free_inodes_count_hi) << 16 : 0); +} + +__u32 ext4_used_dirs_count(struct super_block *sb, + struct ext4_group_desc *bg) +{ + return le16_to_cpu(bg->bg_used_dirs_count_lo) | + (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? + (__u32)le16_to_cpu(bg->bg_used_dirs_count_hi) << 16 : 0); +} + +__u32 ext4_itable_unused_count(struct super_block *sb, + struct ext4_group_desc *bg) +{ + return le16_to_cpu(bg->bg_itable_unused_lo) | + (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? + (__u32)le16_to_cpu(bg->bg_itable_unused_hi) << 16 : 0); +} + void ext4_block_bitmap_set(struct super_block *sb, struct ext4_group_desc *bg, ext4_fsblk_t blk) { @@ -117,6 +149,38 @@ void ext4_inode_table_set(struct super_block *sb, bg->bg_inode_table_hi = cpu_to_le32(blk >> 32); } +void ext4_free_blks_set(struct super_block *sb, + struct ext4_group_desc *bg, __u32 count) +{ + bg->bg_free_blocks_count_lo = cpu_to_le16((__u16)count); + if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) + bg->bg_free_blocks_count_hi = cpu_to_le16(count >> 16); +} + +void ext4_free_inodes_set(struct super_block *sb, + struct ext4_group_desc *bg, __u32 count) +{ + bg->bg_free_inodes_count_lo = cpu_to_le16((__u16)count); + if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) + bg->bg_free_inodes_count_hi = cpu_to_le16(count >> 16); +} + +void ext4_used_dirs_set(struct super_block *sb, + struct ext4_group_desc *bg, __u32 count) +{ + bg->bg_used_dirs_count_lo = cpu_to_le16((__u16)count); + if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) + bg->bg_used_dirs_count_hi = cpu_to_le16(count >> 16); +} + +void ext4_itable_unused_set(struct super_block *sb, + struct ext4_group_desc *bg, __u32 count) +{ + bg->bg_itable_unused_lo = cpu_to_le16((__u16)count); + if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) + bg->bg_itable_unused_hi = cpu_to_le16(count >> 16); +} + /* * Wrappers for jbd2_journal_start/end. * @@ -1561,9 +1625,9 @@ static int ext4_fill_flex_info(struct super_block *sb) flex_group = ext4_flex_group(sbi, i); sbi->s_flex_groups[flex_group].free_inodes += - le16_to_cpu(gdp->bg_free_inodes_count); + ext4_free_inodes_count(sb, gdp); sbi->s_flex_groups[flex_group].free_blocks += - le16_to_cpu(gdp->bg_free_blocks_count); + ext4_free_blks_count(sb, gdp); } return 1; -- cgit v1.2.3-70-g09d2 From 3300beda523136f9f87821e4fba85c5c9e319645 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Sat, 3 Jan 2009 22:33:39 -0500 Subject: ext4: code cleanup Rename some variables. We also unlock locks in the reverse order we acquired as a part of cleanup. Signed-off-by: Aneesh Kumar K.V Signed-off-by: "Theodore Ts'o" --- fs/ext4/balloc.c | 2 +- fs/ext4/ialloc.c | 65 ++++++++++++++++++++++++++++++------------------------- fs/ext4/mballoc.c | 2 +- 3 files changed, 37 insertions(+), 32 deletions(-) (limited to 'fs/ext4/ialloc.c') diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 902bf66c8df..1b26b68aa42 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -329,8 +329,8 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group) if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { ext4_init_block_bitmap(sb, bh, block_group, desc); set_buffer_uptodate(bh); - unlock_buffer(bh); spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group)); + unlock_buffer(bh); return bh; } spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group)); diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 11c4f6f5bd6..b47427a21f1 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -124,8 +124,8 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) { ext4_init_inode_bitmap(sb, bh, block_group, desc); set_buffer_uptodate(bh); - unlock_buffer(bh); spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group)); + unlock_buffer(bh); return bh; } spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group)); @@ -585,8 +585,8 @@ static int find_group_other(struct super_block *sb, struct inode *parent, struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode) { struct super_block *sb; - struct buffer_head *bitmap_bh = NULL; - struct buffer_head *bh2; + struct buffer_head *inode_bitmap_bh = NULL; + struct buffer_head *group_desc_bh; ext4_group_t group = 0; unsigned long ino = 0; struct inode *inode; @@ -634,41 +634,44 @@ got_group: for (i = 0; i < sbi->s_groups_count; i++) { err = -EIO; - gdp = ext4_get_group_desc(sb, group, &bh2); + gdp = ext4_get_group_desc(sb, group, &group_desc_bh); if (!gdp) goto fail; - brelse(bitmap_bh); - bitmap_bh = ext4_read_inode_bitmap(sb, group); - if (!bitmap_bh) + brelse(inode_bitmap_bh); + inode_bitmap_bh = ext4_read_inode_bitmap(sb, group); + if (!inode_bitmap_bh) goto fail; ino = 0; repeat_in_this_group: ino = ext4_find_next_zero_bit((unsigned long *) - bitmap_bh->b_data, EXT4_INODES_PER_GROUP(sb), ino); + inode_bitmap_bh->b_data, + EXT4_INODES_PER_GROUP(sb), ino); + if (ino < EXT4_INODES_PER_GROUP(sb)) { - BUFFER_TRACE(bitmap_bh, "get_write_access"); - err = ext4_journal_get_write_access(handle, bitmap_bh); + BUFFER_TRACE(inode_bitmap_bh, "get_write_access"); + err = ext4_journal_get_write_access(handle, + inode_bitmap_bh); if (err) goto fail; if (!ext4_set_bit_atomic(sb_bgl_lock(sbi, group), - ino, bitmap_bh->b_data)) { + ino, inode_bitmap_bh->b_data)) { /* we won it */ - BUFFER_TRACE(bitmap_bh, + BUFFER_TRACE(inode_bitmap_bh, "call ext4_handle_dirty_metadata"); err = ext4_handle_dirty_metadata(handle, - inode, - bitmap_bh); + inode, + inode_bitmap_bh); if (err) goto fail; goto got; } /* we lost it */ - ext4_handle_release_buffer(handle, bitmap_bh); + ext4_handle_release_buffer(handle, inode_bitmap_bh); if (++ino < EXT4_INODES_PER_GROUP(sb)) goto repeat_in_this_group; @@ -699,19 +702,21 @@ got: goto fail; } - BUFFER_TRACE(bh2, "get_write_access"); - err = ext4_journal_get_write_access(handle, bh2); - if (err) goto fail; + BUFFER_TRACE(group_desc_bh, "get_write_access"); + err = ext4_journal_get_write_access(handle, group_desc_bh); + if (err) + goto fail; /* We may have to initialize the block bitmap if it isn't already */ if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM) && gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { - struct buffer_head *block_bh = ext4_read_block_bitmap(sb, group); + struct buffer_head *block_bitmap_bh; - BUFFER_TRACE(block_bh, "get block bitmap access"); - err = ext4_journal_get_write_access(handle, block_bh); + block_bitmap_bh = ext4_read_block_bitmap(sb, group); + BUFFER_TRACE(block_bitmap_bh, "get block bitmap access"); + err = ext4_journal_get_write_access(handle, block_bitmap_bh); if (err) { - brelse(block_bh); + brelse(block_bitmap_bh); goto fail; } @@ -719,8 +724,8 @@ got: spin_lock(sb_bgl_lock(sbi, group)); /* recheck and clear flag under lock if we still need to */ if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { - gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); free = ext4_free_blocks_after_init(sb, group, gdp); + gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); ext4_free_blks_set(sb, gdp, free); gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp); @@ -729,12 +734,12 @@ got: /* Don't need to dirty bitmap block if we didn't change it */ if (free) { - BUFFER_TRACE(block_bh, "dirty block bitmap"); + BUFFER_TRACE(block_bitmap_bh, "dirty block bitmap"); err = ext4_handle_dirty_metadata(handle, - NULL, block_bh); + NULL, block_bitmap_bh); } - brelse(block_bh); + brelse(block_bitmap_bh); if (err) goto fail; } @@ -778,8 +783,8 @@ got: } gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp); spin_unlock(sb_bgl_lock(sbi, group)); - BUFFER_TRACE(bh2, "call ext4_handle_dirty_metadata"); - err = ext4_handle_dirty_metadata(handle, NULL, bh2); + BUFFER_TRACE(group_desc_bh, "call ext4_handle_dirty_metadata"); + err = ext4_handle_dirty_metadata(handle, NULL, group_desc_bh); if (err) goto fail; percpu_counter_dec(&sbi->s_freeinodes_counter); @@ -881,7 +886,7 @@ out: iput(inode); ret = ERR_PTR(err); really_out: - brelse(bitmap_bh); + brelse(inode_bitmap_bh); return ret; fail_free_drop: @@ -893,7 +898,7 @@ fail_drop: inode->i_nlink = 0; unlock_new_inode(inode); iput(inode); - brelse(bitmap_bh); + brelse(inode_bitmap_bh); return ERR_PTR(err); } diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 3809a9348f2..aac33590ac6 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -804,8 +804,8 @@ static int ext4_mb_init_cache(struct page *page, char *incore) ext4_init_block_bitmap(sb, bh[i], first_group + i, desc); set_buffer_uptodate(bh[i]); - unlock_buffer(bh[i]); spin_unlock(sb_bgl_lock(EXT4_SB(sb), first_group + i)); + unlock_buffer(bh[i]); continue; } spin_unlock(sb_bgl_lock(EXT4_SB(sb), first_group + i)); -- cgit v1.2.3-70-g09d2 From 393418676a7602e1d7d3f6e560159c65c8cbd50e Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Mon, 5 Jan 2009 21:38:14 -0500 Subject: ext4: Fix the race between read_inode_bitmap() and ext4_new_inode() We need to make sure we update the inode bitmap and clear EXT4_BG_INODE_UNINIT flag with sb_bgl_lock held, since ext4_read_inode_bitmap() looks at EXT4_BG_INODE_UNINIT to decide whether to initialize the inode bitmap each time it is called. (introduced by commit c806e68f.) ext4_read_inode_bitmap does: spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group)); if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) { ext4_init_inode_bitmap(sb, bh, block_group, desc); and ext4_new_inode does if (!ext4_set_bit_atomic(sb_bgl_lock(sbi, group), ino, inode_bitmap_bh->b_data)) ...... ... spin_lock(sb_bgl_lock(sbi, group)); gdp->bg_flags &= cpu_to_le16(~EXT4_BG_INODE_UNINIT); i.e., on allocation we update the bitmap then we take the sb_bgl_lock and clear the EXT4_BG_INODE_UNINIT flag. What can happen is a parallel ext4_read_inode_bitmap can zero out the bitmap in between the above ext4_set_bit_atomic and spin_lock(sb_bg_lock..) The race results in below user visible errors EXT4-fs error (device sdb1): ext4_free_inode: bit already cleared for inode 168449 EXT4-fs warning (device sdb1): ext4_unlink: Deleting nonexistent file ... EXT4-fs warning (device sdb1): ext4_rmdir: empty directory has too many links ... # ls -al /mnt/tmp/f/p369/d3/d6/d39/db2/dee/d10f/d3f/l71 ls: /mnt/tmp/f/p369/d3/d6/d39/db2/dee/d10f/d3f/l71: Stale NFS file handle Signed-off-by: Aneesh Kumar K.V Signed-off-by: "Theodore Ts'o" Cc: stable@kernel.org --- fs/ext4/ialloc.c | 146 ++++++++++++++++++++++++++++++++----------------------- 1 file changed, 86 insertions(+), 60 deletions(-) (limited to 'fs/ext4/ialloc.c') diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index b47427a21f1..d4e544f30be 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -572,6 +572,79 @@ static int find_group_other(struct super_block *sb, struct inode *parent, return -1; } +/* + * claim the inode from the inode bitmap. If the group + * is uninit we need to take the groups's sb_bgl_lock + * and clear the uninit flag. The inode bitmap update + * and group desc uninit flag clear should be done + * after holding sb_bgl_lock so that ext4_read_inode_bitmap + * doesn't race with the ext4_claim_inode + */ +static int ext4_claim_inode(struct super_block *sb, + struct buffer_head *inode_bitmap_bh, + unsigned long ino, ext4_group_t group, int mode) +{ + int free = 0, retval = 0, count; + struct ext4_sb_info *sbi = EXT4_SB(sb); + struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL); + + spin_lock(sb_bgl_lock(sbi, group)); + if (ext4_set_bit(ino, inode_bitmap_bh->b_data)) { + /* not a free inode */ + retval = 1; + goto err_ret; + } + ino++; + if ((group == 0 && ino < EXT4_FIRST_INO(sb)) || + ino > EXT4_INODES_PER_GROUP(sb)) { + spin_unlock(sb_bgl_lock(sbi, group)); + ext4_error(sb, __func__, + "reserved inode or inode > inodes count - " + "block_group = %u, inode=%lu", group, + ino + group * EXT4_INODES_PER_GROUP(sb)); + return 1; + } + /* If we didn't allocate from within the initialized part of the inode + * table then we need to initialize up to this inode. */ + if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) { + + if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) { + gdp->bg_flags &= cpu_to_le16(~EXT4_BG_INODE_UNINIT); + /* When marking the block group with + * ~EXT4_BG_INODE_UNINIT we don't want to depend + * on the value of bg_itable_unused even though + * mke2fs could have initialized the same for us. + * Instead we calculated the value below + */ + + free = 0; + } else { + free = EXT4_INODES_PER_GROUP(sb) - + ext4_itable_unused_count(sb, gdp); + } + + /* + * Check the relative inode number against the last used + * relative inode number in this group. if it is greater + * we need to update the bg_itable_unused count + * + */ + if (ino > free) + ext4_itable_unused_set(sb, gdp, + (EXT4_INODES_PER_GROUP(sb) - ino)); + } + count = ext4_free_inodes_count(sb, gdp) - 1; + ext4_free_inodes_set(sb, gdp, count); + if (S_ISDIR(mode)) { + count = ext4_used_dirs_count(sb, gdp) + 1; + ext4_used_dirs_set(sb, gdp, count); + } + gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp); +err_ret: + spin_unlock(sb_bgl_lock(sbi, group)); + return retval; +} + /* * There are two policies for allocating an inode. If the new inode is * a directory, then a forward search is made for a block group with both @@ -594,7 +667,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode) struct ext4_super_block *es; struct ext4_inode_info *ei; struct ext4_sb_info *sbi; - int ret2, err = 0, count; + int ret2, err = 0; struct inode *ret; ext4_group_t i; int free = 0; @@ -658,8 +731,13 @@ repeat_in_this_group: if (err) goto fail; - if (!ext4_set_bit_atomic(sb_bgl_lock(sbi, group), - ino, inode_bitmap_bh->b_data)) { + BUFFER_TRACE(group_desc_bh, "get_write_access"); + err = ext4_journal_get_write_access(handle, + group_desc_bh); + if (err) + goto fail; + if (!ext4_claim_inode(sb, inode_bitmap_bh, + ino, group, mode)) { /* we won it */ BUFFER_TRACE(inode_bitmap_bh, "call ext4_handle_dirty_metadata"); @@ -668,10 +746,13 @@ repeat_in_this_group: inode_bitmap_bh); if (err) goto fail; + /* zero bit is inode number 1*/ + ino++; goto got; } /* we lost it */ ext4_handle_release_buffer(handle, inode_bitmap_bh); + ext4_handle_release_buffer(handle, group_desc_bh); if (++ino < EXT4_INODES_PER_GROUP(sb)) goto repeat_in_this_group; @@ -691,22 +772,6 @@ repeat_in_this_group: goto out; got: - ino++; - if ((group == 0 && ino < EXT4_FIRST_INO(sb)) || - ino > EXT4_INODES_PER_GROUP(sb)) { - ext4_error(sb, __func__, - "reserved inode or inode > inodes count - " - "block_group = %u, inode=%lu", group, - ino + group * EXT4_INODES_PER_GROUP(sb)); - err = -EIO; - goto fail; - } - - BUFFER_TRACE(group_desc_bh, "get_write_access"); - err = ext4_journal_get_write_access(handle, group_desc_bh); - if (err) - goto fail; - /* We may have to initialize the block bitmap if it isn't already */ if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM) && gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { @@ -743,49 +808,10 @@ got: if (err) goto fail; } - - spin_lock(sb_bgl_lock(sbi, group)); - /* If we didn't allocate from within the initialized part of the inode - * table then we need to initialize up to this inode. */ - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) { - if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) { - gdp->bg_flags &= cpu_to_le16(~EXT4_BG_INODE_UNINIT); - - /* When marking the block group with - * ~EXT4_BG_INODE_UNINIT we don't want to depend - * on the value of bg_itable_unused even though - * mke2fs could have initialized the same for us. - * Instead we calculated the value below - */ - - free = 0; - } else { - free = EXT4_INODES_PER_GROUP(sb) - - ext4_itable_unused_count(sb, gdp); - } - - /* - * Check the relative inode number against the last used - * relative inode number in this group. if it is greater - * we need to update the bg_itable_unused count - * - */ - if (ino > free) - ext4_itable_unused_set(sb, gdp, - (EXT4_INODES_PER_GROUP(sb) - ino)); - } - - count = ext4_free_inodes_count(sb, gdp) - 1; - ext4_free_inodes_set(sb, gdp, count); - if (S_ISDIR(mode)) { - count = ext4_used_dirs_count(sb, gdp) + 1; - ext4_used_dirs_set(sb, gdp, count); - } - gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp); - spin_unlock(sb_bgl_lock(sbi, group)); BUFFER_TRACE(group_desc_bh, "call ext4_handle_dirty_metadata"); err = ext4_handle_dirty_metadata(handle, NULL, group_desc_bh); - if (err) goto fail; + if (err) + goto fail; percpu_counter_dec(&sbi->s_freeinodes_counter); if (S_ISDIR(mode)) -- cgit v1.2.3-70-g09d2 From 2ccb5fb9f113dae969d1ae9b6c10e80fa34f8cd3 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Mon, 5 Jan 2009 21:49:55 -0500 Subject: ext4: Use new buffer_head flag to check uninit group bitmaps initialization For uninit block group, the on-disk bitmap is not initialized. That implies we cannot depend on the uptodate flag on the bitmap buffer_head to find bitmap validity. Use a new buffer_head flag which would be set after we properly initialize the bitmap. This also prevents (re-)initializing the uninit group bitmap every time we call ext4_read_block_bitmap(). Signed-off-by: Aneesh Kumar K.V Signed-off-by: "Theodore Ts'o" Cc: stable@kernel.org --- fs/ext4/balloc.c | 25 +++++++++++++++++++++++-- fs/ext4/ext4.h | 18 ++++++++++++++++++ fs/ext4/ialloc.c | 24 ++++++++++++++++++++++-- fs/ext4/mballoc.c | 24 ++++++++++++++++++++++-- 4 files changed, 85 insertions(+), 6 deletions(-) (limited to 'fs/ext4/ialloc.c') diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 1b26b68aa42..6bba06b09dd 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -320,20 +320,41 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group) block_group, bitmap_blk); return NULL; } - if (buffer_uptodate(bh) && - !(desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) + + if (bitmap_uptodate(bh)) return bh; lock_buffer(bh); + if (bitmap_uptodate(bh)) { + unlock_buffer(bh); + return bh; + } spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group)); if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { ext4_init_block_bitmap(sb, bh, block_group, desc); + set_bitmap_uptodate(bh); set_buffer_uptodate(bh); spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group)); unlock_buffer(bh); return bh; } spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group)); + if (buffer_uptodate(bh)) { + /* + * if not uninit if bh is uptodate, + * bitmap is also uptodate + */ + set_bitmap_uptodate(bh); + unlock_buffer(bh); + return bh; + } + /* + * submit the buffer_head for read. We can + * safely mark the bitmap as uptodate now. + * We do it here so the bitmap uptodate bit + * get set with buffer lock held. + */ + set_bitmap_uptodate(bh); if (bh_submit_read(bh) < 0) { put_bh(bh); ext4_error(sb, __func__, diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index ec862f4ca89..695b45cc34e 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -19,6 +19,7 @@ #include #include #include +#include #include "ext4_i.h" /* @@ -1352,6 +1353,23 @@ extern int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, __u64 start, __u64 len); +/* + * Add new method to test wether block and inode bitmaps are properly + * initialized. With uninit_bg reading the block from disk is not enough + * to mark the bitmap uptodate. We need to also zero-out the bitmap + */ +#define BH_BITMAP_UPTODATE BH_JBDPrivateStart + +static inline int bitmap_uptodate(struct buffer_head *bh) +{ + return (buffer_uptodate(bh) && + test_bit(BH_BITMAP_UPTODATE, &(bh)->b_state)); +} +static inline void set_bitmap_uptodate(struct buffer_head *bh) +{ + set_bit(BH_BITMAP_UPTODATE, &(bh)->b_state); +} + #endif /* __KERNEL__ */ #endif /* _EXT4_H */ diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index d4e544f30be..7b12aedc531 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -115,20 +115,40 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) block_group, bitmap_blk); return NULL; } - if (buffer_uptodate(bh) && - !(desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT))) + if (bitmap_uptodate(bh)) return bh; lock_buffer(bh); + if (bitmap_uptodate(bh)) { + unlock_buffer(bh); + return bh; + } spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group)); if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) { ext4_init_inode_bitmap(sb, bh, block_group, desc); + set_bitmap_uptodate(bh); set_buffer_uptodate(bh); spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group)); unlock_buffer(bh); return bh; } spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group)); + if (buffer_uptodate(bh)) { + /* + * if not uninit if bh is uptodate, + * bitmap is also uptodate + */ + set_bitmap_uptodate(bh); + unlock_buffer(bh); + return bh; + } + /* + * submit the buffer_head for read. We can + * safely mark the bitmap as uptodate now. + * We do it here so the bitmap uptodate bit + * get set with buffer lock held. + */ + set_bitmap_uptodate(bh); if (bh_submit_read(bh) < 0) { put_bh(bh); ext4_error(sb, __func__, diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index aac33590ac6..18a52d39d09 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -794,22 +794,42 @@ static int ext4_mb_init_cache(struct page *page, char *incore) if (bh[i] == NULL) goto out; - if (buffer_uptodate(bh[i]) && - !(desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) + if (bitmap_uptodate(bh[i])) continue; lock_buffer(bh[i]); + if (bitmap_uptodate(bh[i])) { + unlock_buffer(bh[i]); + continue; + } spin_lock(sb_bgl_lock(EXT4_SB(sb), first_group + i)); if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { ext4_init_block_bitmap(sb, bh[i], first_group + i, desc); + set_bitmap_uptodate(bh[i]); set_buffer_uptodate(bh[i]); spin_unlock(sb_bgl_lock(EXT4_SB(sb), first_group + i)); unlock_buffer(bh[i]); continue; } spin_unlock(sb_bgl_lock(EXT4_SB(sb), first_group + i)); + if (buffer_uptodate(bh[i])) { + /* + * if not uninit if bh is uptodate, + * bitmap is also uptodate + */ + set_bitmap_uptodate(bh[i]); + unlock_buffer(bh[i]); + continue; + } get_bh(bh[i]); + /* + * submit the buffer_head for read. We can + * safely mark the bitmap as uptodate now. + * We do it here so the bitmap uptodate bit + * get set with buffer lock held. + */ + set_bitmap_uptodate(bh[i]); bh[i]->b_end_io = end_buffer_read_sync; submit_bh(READ, bh[i]); mb_debug("read bitmap for group %u\n", first_group + i); -- cgit v1.2.3-70-g09d2 From 648f5879f5892dddd3ba71cd0d285599f40f2512 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Mon, 5 Jan 2009 21:46:04 -0500 Subject: ext4: mark the blocks/inode bitmap beyond end of group as used We need to mark the block/inode bitmap beyond the end of the group with '1'. Signed-off-by: Aneesh Kumar K.V Signed-off-by: "Theodore Ts'o" Cc: stable@kernel.org --- fs/ext4/ialloc.c | 2 +- fs/ext4/mballoc.c | 4 ++-- fs/ext4/resize.c | 6 ++---- 3 files changed, 5 insertions(+), 7 deletions(-) (limited to 'fs/ext4/ialloc.c') diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 7b12aedc531..e3aa3fa3860 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -84,7 +84,7 @@ unsigned ext4_init_inode_bitmap(struct super_block *sb, struct buffer_head *bh, } memset(bh->b_data, 0, (EXT4_INODES_PER_GROUP(sb) + 7) / 8); - mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), EXT4_BLOCKS_PER_GROUP(sb), + mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8, bh->b_data); return EXT4_INODES_PER_GROUP(sb); diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 18a52d39d09..7d7f6f91d55 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -3038,8 +3038,8 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, in_range(block + len - 1, ext4_inode_table(sb, gdp), EXT4_SB(sb)->s_itb_per_group)) { ext4_error(sb, __func__, - "Allocating block in system zone - block = %llu", - block); + "Allocating block %llu in system zone of %d group\n", + block, ac->ac_b_ex.fe_group); /* File system mounted not to panic on error * Fix the bitmap and repeat the block allocation * We leak some of the blocks here. diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 2d24f423fd8..c328be5d688 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -284,11 +284,9 @@ static int setup_new_group_blocks(struct super_block *sb, if ((err = extend_or_restart_transaction(handle, 2, bh))) goto exit_bh; - mark_bitmap_end(input->blocks_count, EXT4_BLOCKS_PER_GROUP(sb), - bh->b_data); + mark_bitmap_end(input->blocks_count, sb->s_blocksize * 8, bh->b_data); ext4_handle_dirty_metadata(handle, NULL, bh); brelse(bh); - /* Mark unused entries in inode bitmap used */ ext4_debug("clear inode bitmap %#04llx (+%llu)\n", input->inode_bitmap, input->inode_bitmap - start); @@ -297,7 +295,7 @@ static int setup_new_group_blocks(struct super_block *sb, goto exit_journal; } - mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), EXT4_BLOCKS_PER_GROUP(sb), + mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8, bh->b_data); ext4_handle_dirty_metadata(handle, NULL, bh); exit_bh: -- cgit v1.2.3-70-g09d2 From ba80b1019aa722b24506db1ee755e0bb2f513022 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sat, 3 Jan 2009 20:03:21 -0500 Subject: ext4: Add markers for better debuggability Signed-off-by: "Theodore Ts'o" --- fs/ext4/ialloc.c | 9 +++++++++ fs/ext4/inode.c | 56 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ fs/ext4/mballoc.c | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++--- 3 files changed, 116 insertions(+), 3 deletions(-) (limited to 'fs/ext4/ialloc.c') diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index e3aa3fa3860..369c34c6429 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -210,6 +210,11 @@ void ext4_free_inode(handle_t *handle, struct inode *inode) ino = inode->i_ino; ext4_debug("freeing inode %lu\n", ino); + trace_mark(ext4_free_inode, + "dev %s ino %lu mode %d uid %lu gid %lu bocks %llu", + sb->s_id, inode->i_ino, inode->i_mode, + (unsigned long) inode->i_uid, (unsigned long) inode->i_gid, + (unsigned long long) inode->i_blocks); /* * Note: we must free any quota before locking the superblock, @@ -698,6 +703,8 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode) return ERR_PTR(-EPERM); sb = dir->i_sb; + trace_mark(ext4_request_inode, "dev %s dir %lu mode %d", sb->s_id, + dir->i_ino, mode); inode = new_inode(sb); if (!inode) return ERR_PTR(-ENOMEM); @@ -925,6 +932,8 @@ got: } ext4_debug("allocating inode %lu\n", inode->i_ino); + trace_mark(ext4_allocate_inode, "dev %s ino %lu dir %lu mode %d", + sb->s_id, inode->i_ino, dir->i_ino, mode); goto really_out; fail: ext4_std_error(sb, err); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 56142accf5c..4cac8da4e0c 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1351,6 +1351,10 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping, pgoff_t index; unsigned from, to; + trace_mark(ext4_write_begin, + "dev %s ino %lu pos %llu len %u flags %u", + inode->i_sb->s_id, inode->i_ino, + (unsigned long long) pos, len, flags); index = pos >> PAGE_CACHE_SHIFT; from = pos & (PAGE_CACHE_SIZE - 1); to = from + len; @@ -1422,6 +1426,10 @@ static int ext4_ordered_write_end(struct file *file, struct inode *inode = mapping->host; int ret = 0, ret2; + trace_mark(ext4_ordered_write_end, + "dev %s ino %lu pos %llu len %u copied %u", + inode->i_sb->s_id, inode->i_ino, + (unsigned long long) pos, len, copied); ret = ext4_jbd2_file_inode(handle, inode); if (ret == 0) { @@ -1460,6 +1468,10 @@ static int ext4_writeback_write_end(struct file *file, int ret = 0, ret2; loff_t new_i_size; + trace_mark(ext4_writeback_write_end, + "dev %s ino %lu pos %llu len %u copied %u", + inode->i_sb->s_id, inode->i_ino, + (unsigned long long) pos, len, copied); new_i_size = pos + copied; if (new_i_size > EXT4_I(inode)->i_disksize) { ext4_update_i_disksize(inode, new_i_size); @@ -1495,6 +1507,10 @@ static int ext4_journalled_write_end(struct file *file, unsigned from, to; loff_t new_i_size; + trace_mark(ext4_journalled_write_end, + "dev %s ino %lu pos %llu len %u copied %u", + inode->i_sb->s_id, inode->i_ino, + (unsigned long long) pos, len, copied); from = pos & (PAGE_CACHE_SIZE - 1); to = from + len; @@ -2311,6 +2327,9 @@ static int ext4_da_writepage(struct page *page, struct buffer_head *page_bufs; struct inode *inode = page->mapping->host; + trace_mark(ext4_da_writepage, + "dev %s ino %lu page_index %lu", + inode->i_sb->s_id, inode->i_ino, page->index); size = i_size_read(inode); if (page->index == size >> PAGE_CACHE_SHIFT) len = size & ~PAGE_CACHE_MASK; @@ -2421,6 +2440,20 @@ static int ext4_da_writepages(struct address_space *mapping, int needed_blocks, ret = 0, nr_to_writebump = 0; struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); + trace_mark(ext4_da_writepages, + "dev %s ino %lu nr_t_write %ld " + "pages_skipped %ld range_start %llu " + "range_end %llu nonblocking %d " + "for_kupdate %d for_reclaim %d " + "for_writepages %d range_cyclic %d", + inode->i_sb->s_id, inode->i_ino, + wbc->nr_to_write, wbc->pages_skipped, + (unsigned long long) wbc->range_start, + (unsigned long long) wbc->range_end, + wbc->nonblocking, wbc->for_kupdate, + wbc->for_reclaim, wbc->for_writepages, + wbc->range_cyclic); + /* * No pages to write? This is mainly a kludge to avoid starting * a transaction for special inodes like journal inode on last iput() @@ -2539,6 +2572,14 @@ out_writepages: if (!no_nrwrite_index_update) wbc->no_nrwrite_index_update = 0; wbc->nr_to_write -= nr_to_writebump; + trace_mark(ext4_da_writepage_result, + "dev %s ino %lu ret %d pages_written %d " + "pages_skipped %ld congestion %d " + "more_io %d no_nrwrite_index_update %d", + inode->i_sb->s_id, inode->i_ino, ret, + pages_written, wbc->pages_skipped, + wbc->encountered_congestion, wbc->more_io, + wbc->no_nrwrite_index_update); return ret; } @@ -2590,6 +2631,11 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping, len, flags, pagep, fsdata); } *fsdata = (void *)0; + + trace_mark(ext4_da_write_begin, + "dev %s ino %lu pos %llu len %u flags %u", + inode->i_sb->s_id, inode->i_ino, + (unsigned long long) pos, len, flags); retry: /* * With delayed allocation, we don't log the i_disksize update @@ -2679,6 +2725,10 @@ static int ext4_da_write_end(struct file *file, } } + trace_mark(ext4_da_write_end, + "dev %s ino %lu pos %llu len %u copied %u", + inode->i_sb->s_id, inode->i_ino, + (unsigned long long) pos, len, copied); start = pos & (PAGE_CACHE_SIZE - 1); end = start + copied - 1; @@ -2892,6 +2942,9 @@ static int ext4_normal_writepage(struct page *page, loff_t size = i_size_read(inode); loff_t len; + trace_mark(ext4_normal_writepage, + "dev %s ino %lu page_index %lu", + inode->i_sb->s_id, inode->i_ino, page->index); J_ASSERT(PageLocked(page)); if (page->index == size >> PAGE_CACHE_SHIFT) len = size & ~PAGE_CACHE_MASK; @@ -2977,6 +3030,9 @@ static int ext4_journalled_writepage(struct page *page, loff_t size = i_size_read(inode); loff_t len; + trace_mark(ext4_journalled_writepage, + "dev %s ino %lu page_index %lu", + inode->i_sb->s_id, inode->i_ino, page->index); J_ASSERT(PageLocked(page)); if (page->index == size >> PAGE_CACHE_SHIFT) len = size & ~PAGE_CACHE_MASK; diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 05d9f81956c..918aec0c8a1 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2878,8 +2878,9 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn) discard_block = (ext4_fsblk_t) entry->group * EXT4_BLOCKS_PER_GROUP(sb) + entry->start_blk + le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); - trace_mark(ext4_discard_blocks, "dev %s blk %llu count %u", sb->s_id, - (unsigned long long) discard_block, entry->count); + trace_mark(ext4_discard_blocks, "dev %s blk %llu count %u", + sb->s_id, (unsigned long long) discard_block, + entry->count); sb_issue_discard(sb, discard_block, entry->count); kmem_cache_free(ext4_free_ext_cachep, entry); @@ -3697,6 +3698,10 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac) mb_debug("new inode pa %p: %llu/%u for %u\n", pa, pa->pa_pstart, pa->pa_len, pa->pa_lstart); + trace_mark(ext4_mb_new_inode_pa, + "dev %s ino %lu pstart %llu len %u lstart %u", + sb->s_id, ac->ac_inode->i_ino, + pa->pa_pstart, pa->pa_len, pa->pa_lstart); ext4_mb_use_inode_pa(ac, pa); atomic_add(pa->pa_free, &EXT4_SB(sb)->s_mb_preallocated); @@ -3754,7 +3759,9 @@ ext4_mb_new_group_pa(struct ext4_allocation_context *ac) pa->pa_linear = 1; mb_debug("new group pa %p: %llu/%u for %u\n", pa, - pa->pa_pstart, pa->pa_len, pa->pa_lstart); + pa->pa_pstart, pa->pa_len, pa->pa_lstart); + trace_mark(ext4_mb_new_group_pa, "dev %s pstart %llu len %u lstart %u", + sb->s_id, pa->pa_pstart, pa->pa_len, pa->pa_lstart); ext4_mb_use_group_pa(ac, pa); atomic_add(pa->pa_free, &EXT4_SB(sb)->s_mb_preallocated); @@ -3807,12 +3814,14 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh, unsigned int next; ext4_group_t group; ext4_grpblk_t bit; + unsigned long long grp_blk_start; sector_t start; int err = 0; int free = 0; BUG_ON(pa->pa_deleted == 0); ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit); + grp_blk_start = pa->pa_pstart - bit; BUG_ON(group != e4b->bd_group && pa->pa_len != 0); end = bit + pa->pa_len; @@ -3842,6 +3851,10 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh, ext4_mb_store_history(ac); } + trace_mark(ext4_mb_release_inode_pa, + "dev %s ino %lu block %llu count %u", + sb->s_id, pa->pa_inode->i_ino, grp_blk_start + bit, + next - bit); mb_free_blocks(pa->pa_inode, e4b, bit, next - bit); bit = next + 1; } @@ -3875,6 +3888,8 @@ ext4_mb_release_group_pa(struct ext4_buddy *e4b, if (ac) ac->ac_op = EXT4_MB_HISTORY_DISCARD; + trace_mark(ext4_mb_release_group_pa, "dev %s pstart %llu len %d", + sb->s_id, pa->pa_pstart, pa->pa_len); BUG_ON(pa->pa_deleted == 0); ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit); BUG_ON(group != e4b->bd_group && pa->pa_len != 0); @@ -4040,6 +4055,8 @@ void ext4_discard_preallocations(struct inode *inode) } mb_debug("discard preallocation for inode %lu\n", inode->i_ino); + trace_mark(ext4_discard_preallocations, "dev %s ino %lu", sb->s_id, + inode->i_ino); INIT_LIST_HEAD(&list); @@ -4492,6 +4509,8 @@ static int ext4_mb_discard_preallocations(struct super_block *sb, int needed) int ret; int freed = 0; + trace_mark(ext4_mb_discard_preallocations, "dev %s needed %d", + sb->s_id, needed); for (i = 0; i < EXT4_SB(sb)->s_groups_count && needed > 0; i++) { ret = ext4_mb_discard_group_preallocations(sb, i, needed); freed += ret; @@ -4520,6 +4539,18 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, sb = ar->inode->i_sb; sbi = EXT4_SB(sb); + trace_mark(ext4_request_blocks, "dev %s flags %u len %u ino %lu " + "lblk %llu goal %llu lleft %llu lright %llu " + "pleft %llu pright %llu ", + sb->s_id, ar->flags, ar->len, + ar->inode ? ar->inode->i_ino : 0, + (unsigned long long) ar->logical, + (unsigned long long) ar->goal, + (unsigned long long) ar->lleft, + (unsigned long long) ar->lright, + (unsigned long long) ar->pleft, + (unsigned long long) ar->pright); + if (!EXT4_I(ar->inode)->i_delalloc_reserved_flag) { /* * With delalloc we already reserved the blocks @@ -4622,6 +4653,19 @@ out3: reserv_blks); } + trace_mark(ext4_allocate_blocks, + "dev %s block %llu flags %u len %u ino %lu " + "logical %llu goal %llu lleft %llu lright %llu " + "pleft %llu pright %llu ", + sb->s_id, (unsigned long long) block, + ar->flags, ar->len, ar->inode ? ar->inode->i_ino : 0, + (unsigned long long) ar->logical, + (unsigned long long) ar->goal, + (unsigned long long) ar->lleft, + (unsigned long long) ar->lright, + (unsigned long long) ar->pleft, + (unsigned long long) ar->pright); + return block; } @@ -4755,6 +4799,10 @@ void ext4_mb_free_blocks(handle_t *handle, struct inode *inode, } ext4_debug("freeing block %lu\n", block); + trace_mark(ext4_free_blocks, + "dev %s block %llu count %lu metadata %d ino %lu", + sb->s_id, (unsigned long long) block, count, metadata, + inode ? inode->i_ino : 0); ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); if (ac) { -- cgit v1.2.3-70-g09d2 From 83982b6f47201c4c7767210d24d7d8c99567a0b3 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Tue, 6 Jan 2009 14:53:16 -0500 Subject: ext4: Remove "extents" mount option This mount option is largely superfluous, and in fact the way it was implemented was buggy; if a filesystem which did not have the extents feature flag was mounted -o extents, the filesystem would attempt to create and use extents-based file even though the extents feature flag was not eabled. The simplest thing to do is to nuke the mount option entirely. It's not all that useful to force the non-creation of new extent-based files if the filesystem can support it. Signed-off-by: "Theodore Ts'o" --- Documentation/filesystems/ext4.txt | 5 ---- fs/ext4/ext4.h | 1 - fs/ext4/ext4_jbd2.h | 4 ++-- fs/ext4/extents.c | 4 ++-- fs/ext4/ialloc.c | 2 +- fs/ext4/migrate.c | 14 +++++------ fs/ext4/super.c | 48 ++------------------------------------ 7 files changed, 14 insertions(+), 64 deletions(-) (limited to 'fs/ext4/ialloc.c') diff --git a/Documentation/filesystems/ext4.txt b/Documentation/filesystems/ext4.txt index 8938949b201..cec829bc729 100644 --- a/Documentation/filesystems/ext4.txt +++ b/Documentation/filesystems/ext4.txt @@ -131,11 +131,6 @@ ro Mount filesystem read only. Note that ext4 will mount options "ro,noload" can be used to prevent writes to the filesystem. -extents (*) ext4 will use extents to address file data. The - file system will no longer be mountable by ext3. - -noextents ext4 will not use extents for newly created files - journal_checksum Enable checksumming of the journal transactions. This will allow the recovery code in e2fsck and the kernel to detect corruption in the kernel. It is a diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 695b45cc34e..db1718833f5 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -536,7 +536,6 @@ do { \ #define EXT4_MOUNT_QUOTA 0x80000 /* Some quota option set */ #define EXT4_MOUNT_USRQUOTA 0x100000 /* "old" user quota */ #define EXT4_MOUNT_GRPQUOTA 0x200000 /* "old" group quota */ -#define EXT4_MOUNT_EXTENTS 0x400000 /* Extents support */ #define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */ #define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */ #define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */ diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h index 663197adae5..be2f426f680 100644 --- a/fs/ext4/ext4_jbd2.h +++ b/fs/ext4/ext4_jbd2.h @@ -32,8 +32,8 @@ * 5 levels of tree + root which are stored in the inode. */ #define EXT4_SINGLEDATA_TRANS_BLOCKS(sb) \ - (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS) \ - || test_opt(sb, EXTENTS) ? 27U : 8U) + (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS) \ + ? 27U : 8U) /* Extended attribute operations touch at most two data buffers, * two bitmap buffers, and two group summaries, in addition to the inode diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index c64080e4949..240cf0daad4 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -2247,7 +2247,7 @@ void ext4_ext_init(struct super_block *sb) * possible initialization would be here */ - if (test_opt(sb, EXTENTS)) { + if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) { printk(KERN_INFO "EXT4-fs: file extents enabled"); #ifdef AGGRESSIVE_TEST printk(", aggressive tests"); @@ -2272,7 +2272,7 @@ void ext4_ext_init(struct super_block *sb) */ void ext4_ext_release(struct super_block *sb) { - if (!test_opt(sb, EXTENTS)) + if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) return; #ifdef EXTENTS_STATS diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 369c34c6429..4fb86a0061d 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -917,7 +917,7 @@ got: if (err) goto fail_free_drop; - if (test_opt(sb, EXTENTS)) { + if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) { /* set extent flag only for directory, file and normal symlink*/ if (S_ISDIR(mode) || S_ISREG(mode) || S_ISLNK(mode)) { EXT4_I(inode)->i_flags |= EXT4_EXTENTS_FL; diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index e7cd488da4b..734abca25e3 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c @@ -459,13 +459,13 @@ int ext4_ext_migrate(struct inode *inode) struct list_blocks_struct lb; unsigned long max_entries; - if (!test_opt(inode->i_sb, EXTENTS)) - /* - * if mounted with noextents we don't allow the migrate - */ - return -EINVAL; - - if ((EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) + /* + * If the filesystem does not support extents, or the inode + * already is extent-based, error out. + */ + if (!EXT4_HAS_INCOMPAT_FEATURE(inode->i_sb, + EXT4_FEATURE_INCOMPAT_EXTENTS) || + (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) return -EINVAL; if (S_ISLNK(inode->i_mode) && inode->i_blocks == 0) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index b69d0920386..acb69c00fd4 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -829,8 +829,6 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) seq_puts(seq, ",journal_async_commit"); if (test_opt(sb, NOBH)) seq_puts(seq, ",nobh"); - if (!test_opt(sb, EXTENTS)) - seq_puts(seq, ",noextents"); if (test_opt(sb, I_VERSION)) seq_puts(seq, ",i_version"); if (!test_opt(sb, DELALLOC)) @@ -1011,7 +1009,7 @@ enum { Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota, Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota, - Opt_grpquota, Opt_extents, Opt_noextents, Opt_i_version, + Opt_grpquota, Opt_i_version, Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_inode_readahead_blks, Opt_journal_ioprio }; @@ -1066,8 +1064,6 @@ static const match_table_t tokens = { {Opt_quota, "quota"}, {Opt_usrquota, "usrquota"}, {Opt_barrier, "barrier=%u"}, - {Opt_extents, "extents"}, - {Opt_noextents, "noextents"}, {Opt_i_version, "i_version"}, {Opt_stripe, "stripe=%u"}, {Opt_resize, "resize"}, @@ -1115,7 +1111,6 @@ static int parse_options(char *options, struct super_block *sb, int qtype, qfmt; char *qname; #endif - ext4_fsblk_t last_block; if (!options) return 1; @@ -1445,33 +1440,6 @@ set_qf_format: case Opt_bh: clear_opt(sbi->s_mount_opt, NOBH); break; - case Opt_extents: - if (!EXT4_HAS_INCOMPAT_FEATURE(sb, - EXT4_FEATURE_INCOMPAT_EXTENTS)) { - ext4_warning(sb, __func__, - "extents feature not enabled " - "on this filesystem, use tune2fs"); - return 0; - } - set_opt(sbi->s_mount_opt, EXTENTS); - break; - case Opt_noextents: - /* - * When e2fsprogs support resizing an already existing - * ext3 file system to greater than 2**32 we need to - * add support to block allocator to handle growing - * already existing block mapped inode so that blocks - * allocated for them fall within 2**32 - */ - last_block = ext4_blocks_count(sbi->s_es) - 1; - if (last_block > 0xffffffffULL) { - printk(KERN_ERR "EXT4-fs: Filesystem too " - "large to mount with " - "-o noextents options\n"); - return 0; - } - clear_opt(sbi->s_mount_opt, EXTENTS); - break; case Opt_i_version: set_opt(sbi->s_mount_opt, I_VERSION); sb->s_flags |= MS_I_VERSION; @@ -2135,18 +2103,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) set_opt(sbi->s_mount_opt, RESERVATION); set_opt(sbi->s_mount_opt, BARRIER); - /* - * turn on extents feature by default in ext4 filesystem - * only if feature flag already set by mkfs or tune2fs. - * Use -o noextents to turn it off - */ - if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) - set_opt(sbi->s_mount_opt, EXTENTS); - else - ext4_warning(sb, __func__, - "extents feature not enabled on this filesystem, " - "use tune2fs."); - /* * enable delayed allocation by default * Use -o nodelalloc to turn it off @@ -3825,7 +3781,7 @@ static void __exit exit_ext4_fs(void) } MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); -MODULE_DESCRIPTION("Fourth Extended Filesystem with extents"); +MODULE_DESCRIPTION("Fourth Extended Filesystem"); MODULE_LICENSE("GPL"); module_init(init_ext4_fs) module_exit(exit_ext4_fs) -- cgit v1.2.3-70-g09d2