summaryrefslogtreecommitdiffstats
path: root/fs/ext4
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ext4')
-rw-r--r--fs/ext4/Kconfig79
-rw-r--r--fs/ext4/Makefile10
-rw-r--r--fs/ext4/acl.h12
-rw-r--r--fs/ext4/balloc.c1469
-rw-r--r--fs/ext4/bitmap.c6
-rw-r--r--fs/ext4/dir.c64
-rw-r--r--fs/ext4/ext4.h134
-rw-r--r--fs/ext4/ext4_extents.h15
-rw-r--r--fs/ext4/ext4_i.h39
-rw-r--r--fs/ext4/ext4_sb.h28
-rw-r--r--fs/ext4/extents.c281
-rw-r--r--fs/ext4/file.c10
-rw-r--r--fs/ext4/fsync.c7
-rw-r--r--fs/ext4/hash.c8
-rw-r--r--fs/ext4/ialloc.c71
-rw-r--r--fs/ext4/inode.c747
-rw-r--r--fs/ext4/ioctl.c96
-rw-r--r--fs/ext4/mballoc.c483
-rw-r--r--fs/ext4/mballoc.h32
-rw-r--r--fs/ext4/migrate.c10
-rw-r--r--fs/ext4/namei.c402
-rw-r--r--fs/ext4/resize.c33
-rw-r--r--fs/ext4/super.c437
-rw-r--r--fs/ext4/symlink.c8
-rw-r--r--fs/ext4/xattr.c14
-rw-r--r--fs/ext4/xattr.h12
26 files changed, 1873 insertions, 2634 deletions
diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig
new file mode 100644
index 00000000000..7505482a08f
--- /dev/null
+++ b/fs/ext4/Kconfig
@@ -0,0 +1,79 @@
+config EXT4_FS
+ tristate "The Extended 4 (ext4) filesystem"
+ select JBD2
+ select CRC16
+ help
+ This is the next generation of the ext3 filesystem.
+
+ Unlike the change from ext2 filesystem to ext3 filesystem,
+ the on-disk format of ext4 is not forwards compatible with
+ ext3; it is based on extent maps and it supports 48-bit
+ physical block numbers. The ext4 filesystem also supports delayed
+ allocation, persistent preallocation, high resolution time stamps,
+ and a number of other features to improve performance and speed
+ up fsck time. For more information, please see the web pages at
+ http://ext4.wiki.kernel.org.
+
+ The ext4 filesystem will support mounting an ext3
+ filesystem; while there will be some performance gains from
+ the delayed allocation and inode table readahead, the best
+ performance gains will require enabling ext4 features in the
+ filesystem, or formating a new filesystem as an ext4
+ filesystem initially.
+
+ To compile this file system support as a module, choose M here. The
+ module will be called ext4.
+
+ If unsure, say N.
+
+config EXT4DEV_COMPAT
+ bool "Enable ext4dev compatibility"
+ depends on EXT4_FS
+ help
+ Starting with 2.6.28, the name of the ext4 filesystem was
+ renamed from ext4dev to ext4. Unfortunately there are some
+ legacy userspace programs (such as klibc's fstype) have
+ "ext4dev" hardcoded.
+
+ To enable backwards compatibility so that systems that are
+ still expecting to mount ext4 filesystems using ext4dev,
+ chose Y here. This feature will go away by 2.6.31, so
+ please arrange to get your userspace programs fixed!
+
+config EXT4_FS_XATTR
+ bool "Ext4 extended attributes"
+ depends on EXT4_FS
+ default y
+ help
+ Extended attributes are name:value pairs associated with inodes by
+ the kernel or by users (see the attr(5) manual page, or visit
+ <http://acl.bestbits.at/> for details).
+
+ If unsure, say N.
+
+ You need this for POSIX ACL support on ext4.
+
+config EXT4_FS_POSIX_ACL
+ bool "Ext4 POSIX Access Control Lists"
+ depends on EXT4_FS_XATTR
+ select FS_POSIX_ACL
+ help
+ POSIX Access Control Lists (ACLs) support permissions for users and
+ groups beyond the owner/group/world scheme.
+
+ To learn more about Access Control Lists, visit the POSIX ACLs for
+ Linux website <http://acl.bestbits.at/>.
+
+ If you don't know what Access Control Lists are, say N
+
+config EXT4_FS_SECURITY
+ bool "Ext4 Security Labels"
+ depends on EXT4_FS_XATTR
+ help
+ Security labels support alternative access control models
+ implemented by security modules like SELinux. This option
+ enables an extended attribute handler for file security
+ labels in the ext4 filesystem.
+
+ If you are not using a security module that requires using
+ extended attributes for file security labels, say N.
diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile
index ac6fa8ca0a2..a8ff003a00f 100644
--- a/fs/ext4/Makefile
+++ b/fs/ext4/Makefile
@@ -2,12 +2,12 @@
# Makefile for the linux ext4-filesystem routines.
#
-obj-$(CONFIG_EXT4DEV_FS) += ext4dev.o
+obj-$(CONFIG_EXT4_FS) += ext4.o
-ext4dev-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
+ext4-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \
ext4_jbd2.o migrate.o mballoc.o
-ext4dev-$(CONFIG_EXT4DEV_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o
-ext4dev-$(CONFIG_EXT4DEV_FS_POSIX_ACL) += acl.o
-ext4dev-$(CONFIG_EXT4DEV_FS_SECURITY) += xattr_security.o
+ext4-$(CONFIG_EXT4_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o
+ext4-$(CONFIG_EXT4_FS_POSIX_ACL) += acl.o
+ext4-$(CONFIG_EXT4_FS_SECURITY) += xattr_security.o
diff --git a/fs/ext4/acl.h b/fs/ext4/acl.h
index cd2b855a07d..cb45257a246 100644
--- a/fs/ext4/acl.h
+++ b/fs/ext4/acl.h
@@ -51,18 +51,18 @@ static inline int ext4_acl_count(size_t size)
}
}
-#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL
+#ifdef CONFIG_EXT4_FS_POSIX_ACL
/* Value for inode->u.ext4_i.i_acl and inode->u.ext4_i.i_default_acl
if the ACL has not been cached */
#define EXT4_ACL_NOT_CACHED ((void *)-1)
/* acl.c */
-extern int ext4_permission (struct inode *, int);
-extern int ext4_acl_chmod (struct inode *);
-extern int ext4_init_acl (handle_t *, struct inode *, struct inode *);
+extern int ext4_permission(struct inode *, int);
+extern int ext4_acl_chmod(struct inode *);
+extern int ext4_init_acl(handle_t *, struct inode *, struct inode *);
-#else /* CONFIG_EXT4DEV_FS_POSIX_ACL */
+#else /* CONFIG_EXT4_FS_POSIX_ACL */
#include <linux/sched.h>
#define ext4_permission NULL
@@ -77,5 +77,5 @@ ext4_init_acl(handle_t *handle, struct inode *inode, struct inode *dir)
{
return 0;
}
-#endif /* CONFIG_EXT4DEV_FS_POSIX_ACL */
+#endif /* CONFIG_EXT4_FS_POSIX_ACL */
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index e9fa960ba6d..b9821be709b 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -83,6 +83,7 @@ static int ext4_group_used_meta_blocks(struct super_block *sb,
}
return used_blocks;
}
+
/* Initializes an uninitialized block bitmap if given, and returns the
* number of blocks free in the group. */
unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
@@ -132,7 +133,7 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
*/
group_blocks = ext4_blocks_count(sbi->s_es) -
le32_to_cpu(sbi->s_es->s_first_data_block) -
- (EXT4_BLOCKS_PER_GROUP(sb) * (sbi->s_groups_count -1));
+ (EXT4_BLOCKS_PER_GROUP(sb) * (sbi->s_groups_count - 1));
} else {
group_blocks = EXT4_BLOCKS_PER_GROUP(sb);
}
@@ -200,20 +201,20 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
* @bh: pointer to the buffer head to store the block
* group descriptor
*/
-struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb,
+struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb,
ext4_group_t block_group,
- struct buffer_head ** bh)
+ struct buffer_head **bh)
{
unsigned long group_desc;
unsigned long offset;
- struct ext4_group_desc * desc;
+ struct ext4_group_desc *desc;
struct ext4_sb_info *sbi = EXT4_SB(sb);
if (block_group >= sbi->s_groups_count) {
- ext4_error (sb, "ext4_get_group_desc",
- "block_group >= groups_count - "
- "block_group = %lu, groups_count = %lu",
- block_group, sbi->s_groups_count);
+ ext4_error(sb, "ext4_get_group_desc",
+ "block_group >= groups_count - "
+ "block_group = %lu, groups_count = %lu",
+ block_group, sbi->s_groups_count);
return NULL;
}
@@ -222,10 +223,10 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb,
group_desc = block_group >> EXT4_DESC_PER_BLOCK_BITS(sb);
offset = block_group & (EXT4_DESC_PER_BLOCK(sb) - 1);
if (!sbi->s_group_desc[group_desc]) {
- ext4_error (sb, "ext4_get_group_desc",
- "Group descriptor not loaded - "
- "block_group = %lu, group_desc = %lu, desc = %lu",
- block_group, group_desc, offset);
+ ext4_error(sb, "ext4_get_group_desc",
+ "Group descriptor not loaded - "
+ "block_group = %lu, group_desc = %lu, desc = %lu",
+ block_group, group_desc, offset);
return NULL;
}
@@ -302,8 +303,8 @@ err_out:
struct buffer_head *
ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
{
- struct ext4_group_desc * desc;
- struct buffer_head * bh = NULL;
+ struct ext4_group_desc *desc;
+ struct buffer_head *bh = NULL;
ext4_fsblk_t bitmap_blk;
desc = ext4_get_group_desc(sb, block_group, NULL);
@@ -318,9 +319,11 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
block_group, bitmap_blk);
return NULL;
}
- if (bh_uptodate_or_lock(bh))
+ if (buffer_uptodate(bh) &&
+ !(desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)))
return bh;
+ lock_buffer(bh);
spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group));
if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
ext4_init_block_bitmap(sb, bh, block_group, desc);
@@ -345,301 +348,6 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
*/
return bh;
}
-/*
- * The reservation window structure operations
- * --------------------------------------------
- * Operations include:
- * dump, find, add, remove, is_empty, find_next_reservable_window, etc.
- *
- * We use a red-black tree to represent per-filesystem reservation
- * windows.
- *
- */
-
-/**
- * __rsv_window_dump() -- Dump the filesystem block allocation reservation map
- * @rb_root: root of per-filesystem reservation rb tree
- * @verbose: verbose mode
- * @fn: function which wishes to dump the reservation map
- *
- * If verbose is turned on, it will print the whole block reservation
- * windows(start, end). Otherwise, it will only print out the "bad" windows,
- * those windows that overlap with their immediate neighbors.
- */
-#if 1
-static void __rsv_window_dump(struct rb_root *root, int verbose,
- const char *fn)
-{
- struct rb_node *n;
- struct ext4_reserve_window_node *rsv, *prev;
- int bad;
-
-restart:
- n = rb_first(root);
- bad = 0;
- prev = NULL;
-
- printk("Block Allocation Reservation Windows Map (%s):\n", fn);
- while (n) {
- rsv = rb_entry(n, struct ext4_reserve_window_node, rsv_node);
- if (verbose)
- printk("reservation window 0x%p "
- "start: %llu, end: %llu\n",
- rsv, rsv->rsv_start, rsv->rsv_end);
- if (rsv->rsv_start && rsv->rsv_start >= rsv->rsv_end) {
- printk("Bad reservation %p (start >= end)\n",
- rsv);
- bad = 1;
- }
- if (prev && prev->rsv_end >= rsv->rsv_start) {
- printk("Bad reservation %p (prev->end >= start)\n",
- rsv);
- bad = 1;
- }
- if (bad) {
- if (!verbose) {
- printk("Restarting reservation walk in verbose mode\n");
- verbose = 1;
- goto restart;
- }
- }
- n = rb_next(n);
- prev = rsv;
- }
- printk("Window map complete.\n");
- BUG_ON(bad);
-}
-#define rsv_window_dump(root, verbose) \
- __rsv_window_dump((root), (verbose), __func__)
-#else
-#define rsv_window_dump(root, verbose) do {} while (0)
-#endif
-
-/**
- * goal_in_my_reservation()
- * @rsv: inode's reservation window
- * @grp_goal: given goal block relative to the allocation block group
- * @group: the current allocation block group
- * @sb: filesystem super block
- *
- * Test if the given goal block (group relative) is within the file's
- * own block reservation window range.
- *
- * If the reservation window is outside the goal allocation group, return 0;
- * grp_goal (given goal block) could be -1, which means no specific
- * goal block. In this case, always return 1.
- * If the goal block is within the reservation window, return 1;
- * otherwise, return 0;
- */
-static int
-goal_in_my_reservation(struct ext4_reserve_window *rsv, ext4_grpblk_t grp_goal,
- ext4_group_t group, struct super_block *sb)
-{
- ext4_fsblk_t group_first_block, group_last_block;
-
- group_first_block = ext4_group_first_block_no(sb, group);
- group_last_block = group_first_block + (EXT4_BLOCKS_PER_GROUP(sb) - 1);
-
- if ((rsv->_rsv_start > group_last_block) ||
- (rsv->_rsv_end < group_first_block))
- return 0;
- if ((grp_goal >= 0) && ((grp_goal + group_first_block < rsv->_rsv_start)
- || (grp_goal + group_first_block > rsv->_rsv_end)))
- return 0;
- return 1;
-}
-
-/**
- * search_reserve_window()
- * @rb_root: root of reservation tree
- * @goal: target allocation block
- *
- * Find the reserved window which includes the goal, or the previous one
- * if the goal is not in any window.
- * Returns NULL if there are no windows or if all windows start after the goal.
- */
-static struct ext4_reserve_window_node *
-search_reserve_window(struct rb_root *root, ext4_fsblk_t goal)
-{
- struct rb_node *n = root->rb_node;
- struct ext4_reserve_window_node *rsv;
-
- if (!n)
- return NULL;
-
- do {
- rsv = rb_entry(n, struct ext4_reserve_window_node, rsv_node);
-
- if (goal < rsv->rsv_start)
- n = n->rb_left;
- else if (goal > rsv->rsv_end)
- n = n->rb_right;
- else
- return rsv;
- } while (n);
- /*
- * We've fallen off the end of the tree: the goal wasn't inside
- * any particular node. OK, the previous node must be to one
- * side of the interval containing the goal. If it's the RHS,
- * we need to back up one.
- */
- if (rsv->rsv_start > goal) {
- n = rb_prev(&rsv->rsv_node);
- rsv = rb_entry(n, struct ext4_reserve_window_node, rsv_node);
- }
- return rsv;
-}
-
-/**
- * ext4_rsv_window_add() -- Insert a window to the block reservation rb tree.
- * @sb: super block
- * @rsv: reservation window to add
- *
- * Must be called with rsv_lock hold.
- */
-void ext4_rsv_window_add(struct super_block *sb,
- struct ext4_reserve_window_node *rsv)
-{
- struct rb_root *root = &EXT4_SB(sb)->s_rsv_window_root;
- struct rb_node *node = &rsv->rsv_node;
- ext4_fsblk_t start = rsv->rsv_start;
-
- struct rb_node ** p = &root->rb_node;
- struct rb_node * parent = NULL;
- struct ext4_reserve_window_node *this;
-
- while (*p)
- {
- parent = *p;
- this = rb_entry(parent, struct ext4_reserve_window_node, rsv_node);
-
- if (start < this->rsv_start)
- p = &(*p)->rb_left;
- else if (start > this->rsv_end)
- p = &(*p)->rb_right;
- else {
- rsv_window_dump(root, 1);
- BUG();
- }
- }
-
- rb_link_node(node, parent, p);
- rb_insert_color(node, root);
-}
-
-/**
- * ext4_rsv_window_remove() -- unlink a window from the reservation rb tree
- * @sb: super block
- * @rsv: reservation window to remove
- *
- * Mark the block reservation window as not allocated, and unlink it
- * from the filesystem reservation window rb tree. Must be called with
- * rsv_lock hold.
- */
-static void rsv_window_remove(struct super_block *sb,
- struct ext4_reserve_window_node *rsv)
-{
- rsv->rsv_start = EXT4_RESERVE_WINDOW_NOT_ALLOCATED;
- rsv->rsv_end = EXT4_RESERVE_WINDOW_NOT_ALLOCATED;
- rsv->rsv_alloc_hit = 0;
- rb_erase(&rsv->rsv_node, &EXT4_SB(sb)->s_rsv_window_root);
-}
-
-/*
- * rsv_is_empty() -- Check if the reservation window is allocated.
- * @rsv: given reservation window to check
- *
- * returns 1 if the end block is EXT4_RESERVE_WINDOW_NOT_ALLOCATED.
- */
-static inline int rsv_is_empty(struct ext4_reserve_window *rsv)
-{
- /* a valid reservation end block could not be 0 */
- return rsv->_rsv_end == EXT4_RESERVE_WINDOW_NOT_ALLOCATED;
-}
-
-/**
- * ext4_init_block_alloc_info()
- * @inode: file inode structure
- *
- * Allocate and initialize the reservation window structure, and
- * link the window to the ext4 inode structure at last
- *
- * The reservation window structure is only dynamically allocated
- * and linked to ext4 inode the first time the open file
- * needs a new block. So, before every ext4_new_block(s) call, for
- * regular files, we should check whether the reservation window
- * structure exists or not. In the latter case, this function is called.
- * Fail to do so will result in block reservation being turned off for that
- * open file.
- *
- * This function is called from ext4_get_blocks_handle(), also called
- * when setting the reservation window size through ioctl before the file
- * is open for write (needs block allocation).
- *
- * Needs down_write(i_data_sem) protection prior to call this function.
- */
-void ext4_init_block_alloc_info(struct inode *inode)
-{
- struct ext4_inode_info *ei = EXT4_I(inode);
- struct ext4_block_alloc_info *block_i = ei->i_block_alloc_info;
- struct super_block *sb = inode->i_sb;
-
- block_i = kmalloc(sizeof(*block_i), GFP_NOFS);
- if (block_i) {
- struct ext4_reserve_window_node *rsv = &block_i->rsv_window_node;
-
- rsv->rsv_start = EXT4_RESERVE_WINDOW_NOT_ALLOCATED;
- rsv->rsv_end = EXT4_RESERVE_WINDOW_NOT_ALLOCATED;
-
- /*
- * if filesystem is mounted with NORESERVATION, the goal
- * reservation window size is set to zero to indicate
- * block reservation is off
- */
- if (!test_opt(sb, RESERVATION))
- rsv->rsv_goal_size = 0;
- else
- rsv->rsv_goal_size = EXT4_DEFAULT_RESERVE_BLOCKS;
- rsv->rsv_alloc_hit = 0;
- block_i->last_alloc_logical_block = 0;
- block_i->last_alloc_physical_block = 0;
- }
- ei->i_block_alloc_info = block_i;
-}
-
-/**
- * ext4_discard_reservation()
- * @inode: inode
- *
- * Discard(free) block reservation window on last file close, or truncate
- * or at last iput().
- *
- * It is being called in three cases:
- * ext4_release_file(): last writer close the file
- * ext4_clear_inode(): last iput(), when nobody link to this file.
- * ext4_truncate(): when the block indirect map is about to change.
- *
- */
-void ext4_discard_reservation(struct inode *inode)
-{
- struct ext4_inode_info *ei = EXT4_I(inode);
- struct ext4_block_alloc_info *block_i = ei->i_block_alloc_info;
- struct ext4_reserve_window_node *rsv;
- spinlock_t *rsv_lock = &EXT4_SB(inode->i_sb)->s_rsv_window_lock;
-
- ext4_mb_discard_inode_preallocations(inode);
-
- if (!block_i)
- return;
-
- rsv = &block_i->rsv_window_node;
- if (!rsv_is_empty(&rsv->rsv_window)) {
- spin_lock(rsv_lock);
- if (!rsv_is_empty(&rsv->rsv_window))
- rsv_window_remove(inode->i_sb, rsv);
- spin_unlock(rsv_lock);
- }
-}
/**
* ext4_free_blocks_sb() -- Free given blocks and update quota
@@ -648,6 +356,13 @@ void ext4_discard_reservation(struct inode *inode)
* @block: start physcial block to free
* @count: number of blocks to free
* @pdquot_freed_blocks: pointer to quota
+ *
+ * XXX This function is only used by the on-line resizing code, which
+ * should probably be fixed up to call the mballoc variant. There
+ * this needs to be cleaned up later; in fact, I'm not convinced this
+ * is 100% correct in the face of the mballoc code. The online resizing
+ * code needs to be fixed up to more tightly (and correctly) interlock
+ * with the mballoc code.
*/
void ext4_free_blocks_sb(handle_t *handle, struct super_block *sb,
ext4_fsblk_t block, unsigned long count,
@@ -659,8 +374,8 @@ void ext4_free_blocks_sb(handle_t *handle, struct super_block *sb,
ext4_grpblk_t bit;
unsigned long i;
unsigned long overflow;
- struct ext4_group_desc * desc;
- struct ext4_super_block * es;
+ struct ext4_group_desc *desc;
+ struct ext4_super_block *es;
struct ext4_sb_info *sbi;
int err = 0, ret;
ext4_grpblk_t group_freed;
@@ -671,13 +386,13 @@ void ext4_free_blocks_sb(handle_t *handle, struct super_block *sb,
if (block < le32_to_cpu(es->s_first_data_block) ||
block + count < block ||
block + count > ext4_blocks_count(es)) {
- ext4_error (sb, "ext4_free_blocks",
- "Freeing blocks not in datazone - "
- "block = %llu, count = %lu", block, count);
+ ext4_error(sb, "ext4_free_blocks",
+ "Freeing blocks not in datazone - "
+ "block = %llu, count = %lu", block, count);
goto error_return;
}
- ext4_debug ("freeing block(s) %llu-%llu\n", block, block + count - 1);
+ ext4_debug("freeing block(s) %llu-%llu\n", block, block + count - 1);
do_more:
overflow = 0;
@@ -694,7 +409,7 @@ do_more:
bitmap_bh = ext4_read_block_bitmap(sb, block_group);
if (!bitmap_bh)
goto error_return;
- desc = ext4_get_group_desc (sb, block_group, &gd_bh);
+ desc = ext4_get_group_desc(sb, block_group, &gd_bh);
if (!desc)
goto error_return;
@@ -703,10 +418,10 @@ do_more:
in_range(block, ext4_inode_table(sb, desc), sbi->s_itb_per_group) ||
in_range(block + count - 1, ext4_inode_table(sb, desc),
sbi->s_itb_per_group)) {
- ext4_error (sb, "ext4_free_blocks",
- "Freeing blocks in system zones - "
- "Block = %llu, count = %lu",
- block, count);
+ ext4_error(sb, "ext4_free_blocks",
+ "Freeing blocks in system zones - "
+ "Block = %llu, count = %lu",
+ block, count);
goto error_return;
}
@@ -848,759 +563,71 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
ext4_fsblk_t block, unsigned long count,
int metadata)
{
- struct super_block * sb;
+ struct super_block *sb;
unsigned long dquot_freed_blocks;
/* this isn't the right place to decide whether block is metadata
* inode.c/extents.c knows better, but for safety ... */
- if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode) ||
- ext4_should_journal_data(inode))
+ if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
+ metadata = 1;
+
+ /* We need to make sure we don't reuse
+ * block released untill the transaction commit.
+ * writeback mode have weak data consistency so
+ * don't force data as metadata when freeing block
+ * for writeback mode.
+ */
+ if (metadata == 0 && !ext4_should_writeback_data(inode))
metadata = 1;
sb = inode->i_sb;
- if (!test_opt(sb, MBALLOC) || !EXT4_SB(sb)->s_group_info)
- ext4_free_blocks_sb(handle, sb, block, count,
- &dquot_freed_blocks);
- else
- ext4_mb_free_blocks(handle, inode, block, count,
- metadata, &dquot_freed_blocks);
+ ext4_mb_free_blocks(handle, inode, block, count,
+ metadata, &dquot_freed_blocks);
if (dquot_freed_blocks)
DQUOT_FREE_BLOCK(inode, dquot_freed_blocks);
return;
}
-/**
- * ext4_test_allocatable()
- * @nr: given allocation block group
- * @bh: bufferhead contains the bitmap of the given block group
- *
- * For ext4 allocations, we must not reuse any blocks which are
- * allocated in the bitmap buffer's "last committed data" copy. This
- * prevents deletes from freeing up the page for reuse until we have
- * committed the delete transaction.
- *
- * If we didn't do this, then deleting something and reallocating it as
- * data would allow the old block to be overwritten before the
- * transaction committed (because we force data to disk before commit).
- * This would lead to corruption if we crashed between overwriting the
- * data and committing the delete.
- *
- * @@@ We may want to make this allocation behaviour conditional on
- * data-writes at some point, and disable it for metadata allocations or
- * sync-data inodes.
- */
-static int ext4_test_allocatable(ext4_grpblk_t nr, struct buffer_head *bh)
+int ext4_claim_free_blocks(struct ext4_sb_info *sbi,
+ s64 nblocks)
{
- int ret;
- struct journal_head *jh = bh2jh(bh);
-
- if (ext4_test_bit(nr, bh->b_data))
- return 0;
-
- jbd_lock_bh_state(bh);
- if (!jh->b_committed_data)
- ret = 1;
- else
- ret = !ext4_test_bit(nr, jh->b_committed_data);
- jbd_unlock_bh_state(bh);
- return ret;
-}
+ s64 free_blocks, dirty_blocks;
+ s64 root_blocks = 0;
+ struct percpu_counter *fbc = &sbi->s_freeblocks_counter;
+ struct percpu_counter *dbc = &sbi->s_dirtyblocks_counter;
-/**
- * bitmap_search_next_usable_block()
- * @start: the starting block (group relative) of the search
- * @bh: bufferhead contains the block group bitmap
- * @maxblocks: the ending block (group relative) of the reservation
- *
- * The bitmap search --- search forward alternately through the actual
- * bitmap on disk and the last-committed copy in journal, until we find a
- * bit free in both bitmaps.
- */
-static ext4_grpblk_t
-bitmap_search_next_usable_block(ext4_grpblk_t start, struct buffer_head *bh,
- ext4_grpblk_t maxblocks)
-{
- ext4_grpblk_t next;
- struct journal_head *jh = bh2jh(bh);
-
- while (start < maxblocks) {
- next = ext4_find_next_zero_bit(bh->b_data, maxblocks, start);
- if (next >= maxblocks)
- return -1;
- if (ext4_test_allocatable(next, bh))
- return next;
- jbd_lock_bh_state(bh);
- if (jh->b_committed_data)
- start = ext4_find_next_zero_bit(jh->b_committed_data,
- maxblocks, next);
- jbd_unlock_bh_state(bh);
- }
- return -1;
-}
+ free_blocks = percpu_counter_read_positive(fbc);
+ dirty_blocks = percpu_counter_read_positive(dbc);
-/**
- * find_next_usable_block()
- * @start: the starting block (group relative) to find next
- * allocatable block in bitmap.
- * @bh: bufferhead contains the block group bitmap
- * @maxblocks: the ending block (group relative) for the search
- *
- * Find an allocatable block in a bitmap. We honor both the bitmap and
- * its last-committed copy (if that exists), and perform the "most
- * appropriate allocation" algorithm of looking for a free block near
- * the initial goal; then for a free byte somewhere in the bitmap; then
- * for any free bit in the bitmap.
- */
-static ext4_grpblk_t
-find_next_usable_block(ext4_grpblk_t start, struct buffer_head *bh,
- ext4_grpblk_t maxblocks)
-{
- ext4_grpblk_t here, next;
- char *p, *r;
-
- if (start > 0) {
- /*
- * The goal was occupied; search forward for a free
- * block within the next XX blocks.
- *
- * end_goal is more or less random, but it has to be
- * less than EXT4_BLOCKS_PER_GROUP. Aligning up to the
- * next 64-bit boundary is simple..
- */
- ext4_grpblk_t end_goal = (start + 63) & ~63;
- if (end_goal > maxblocks)
- end_goal = maxblocks;
- here = ext4_find_next_zero_bit(bh->b_data, end_goal, start);
- if (here < end_goal && ext4_test_allocatable(here, bh))
- return here;
- ext4_debug("Bit not found near goal\n");
- }
-
- here = start;
- if (here < 0)
- here = 0;
-
- p = ((char *)bh->b_data) + (here >> 3);
- r = memscan(p, 0, ((maxblocks + 7) >> 3) - (here >> 3));
- next = (r - ((char *)bh->b_data)) << 3;
-
- if (next < maxblocks && next >= start && ext4_test_allocatable(next, bh))
- return next;
-
- /*
- * The bitmap search --- search forward alternately through the actual
- * bitmap and the last-committed copy until we find a bit free in
- * both
- */
- here = bitmap_search_next_usable_block(here, bh, maxblocks);
- return here;
-}
-
-/**
- * claim_block()
- * @block: the free block (group relative) to allocate
- * @bh: the bufferhead containts the block group bitmap
- *
- * We think we can allocate this block in this bitmap. Try to set the bit.
- * If that succeeds then check that nobody has allocated and then freed the
- * block since we saw that is was not marked in b_committed_data. If it _was_
- * allocated and freed then clear the bit in the bitmap again and return
- * zero (failure).
- */
-static inline int
-claim_block(spinlock_t *lock, ext4_grpblk_t block, struct buffer_head *bh)
-{
- struct journal_head *jh = bh2jh(bh);
- int ret;
-
- if (ext4_set_bit_atomic(lock, block, bh->b_data))
- return 0;
- jbd_lock_bh_state(bh);
- if (jh->b_committed_data && ext4_test_bit(block,jh->b_committed_data)) {
- ext4_clear_bit_atomic(lock, block, bh->b_data);
- ret = 0;
- } else {
- ret = 1;
- }
- jbd_unlock_bh_state(bh);
- return ret;
-}
-
-/**
- * ext4_try_to_allocate()
- * @sb: superblock
- * @handle: handle to this transaction
- * @group: given allocation block group
- * @bitmap_bh: bufferhead holds the block bitmap
- * @grp_goal: given target block within the group
- * @count: target number of blocks to allocate
- * @my_rsv: reservation window
- *
- * Attempt to allocate blocks within a give range. Set the range of allocation
- * first, then find the first free bit(s) from the bitmap (within the range),
- * and at last, allocate the blocks by claiming the found free bit as allocated.
- *
- * To set the range of this allocation:
- * if there is a reservation window, only try to allocate block(s) from the
- * file's own reservation window;
- * Otherwise, the allocation range starts from the give goal block, ends at
- * the block group's last block.
- *
- * If we failed to allocate the desired block then we may end up crossing to a
- * new bitmap. In that case we must release write access to the old one via
- * ext4_journal_release_buffer(), else we'll run out of credits.
- */
-static ext4_grpblk_t
-ext4_try_to_allocate(struct super_block *sb, handle_t *handle,
- ext4_group_t group, struct buffer_head *bitmap_bh,
- ext4_grpblk_t grp_goal, unsigned long *count,
- struct ext4_reserve_window *my_rsv)
-{
- ext4_fsblk_t group_first_block;
- ext4_grpblk_t start, end;
- unsigned long num = 0;
-
- /* we do allocation within the reservation window if we have a window */
- if (my_rsv) {
- group_first_block = ext4_group_first_block_no(sb, group);
- if (my_rsv->_rsv_start >= group_first_block)
- start = my_rsv->_rsv_start - group_first_block;
- else
- /* reservation window cross group boundary */
- start = 0;
- end = my_rsv->_rsv_end - group_first_block + 1;
- if (end > EXT4_BLOCKS_PER_GROUP(sb))
- /* reservation window crosses group boundary */
- end = EXT4_BLOCKS_PER_GROUP(sb);
- if ((start <= grp_goal) && (grp_goal < end))
- start = grp_goal;
- else
- grp_goal = -1;
- } else {
- if (grp_goal > 0)
- start = grp_goal;
- else
- start = 0;
- end = EXT4_BLOCKS_PER_GROUP(sb);
- }
-
- BUG_ON(start > EXT4_BLOCKS_PER_GROUP(sb));
-
-repeat:
- if (grp_goal < 0 || !ext4_test_allocatable(grp_goal, bitmap_bh)) {
- grp_goal = find_next_usable_block(start, bitmap_bh, end);
- if (grp_goal < 0)
- goto fail_access;
- if (!my_rsv) {
- int i;
-
- for (i = 0; i < 7 && grp_goal > start &&
- ext4_test_allocatable(grp_goal - 1,
- bitmap_bh);
- i++, grp_goal--)
- ;
- }
- }
- start = grp_goal;
-
- if (!claim_block(sb_bgl_lock(EXT4_SB(sb), group),
- grp_goal, bitmap_bh)) {
- /*
- * The block was allocated by another thread, or it was
- * allocated and then freed by another thread
- */
- start++;
- grp_goal++;
- if (start >= end)
- goto fail_access;
- goto repeat;
- }
- num++;
- grp_goal++;
- while (num < *count && grp_goal < end
- && ext4_test_allocatable(grp_goal, bitmap_bh)
- && claim_block(sb_bgl_lock(EXT4_SB(sb), group),
- grp_goal, bitmap_bh)) {
- num++;
- grp_goal++;
- }
- *count = num;
- return grp_goal - num;
-fail_access:
- *count = num;
- return -1;
-}
-
-/**
- * find_next_reservable_window():
- * find a reservable space within the given range.
- * It does not allocate the reservation window for now:
- * alloc_new_reservation() will do the work later.
- *
- * @search_head: the head of the searching list;
- * This is not necessarily the list head of the whole filesystem
- *
- * We have both head and start_block to assist the search
- * for the reservable space. The list starts from head,
- * but we will shift to the place where start_block is,
- * then start from there, when looking for a reservable space.
- *
- * @size: the target new reservation window size
- *
- * @group_first_block: the first block we consider to start
- * the real search from
- *
- * @last_block:
- * the maximum block number that our goal reservable space
- * could start from. This is normally the last block in this
- * group. The search will end when we found the start of next
- * possible reservable space is out of this boundary.
- * This could handle the cross boundary reservation window
- * request.
- *
- * basically we search from the given range, rather than the whole
- * reservation double linked list, (start_block, last_block)
- * to find a free region that is of my size and has not
- * been reserved.
- *
- */
-static int find_next_reservable_window(
- struct ext4_reserve_window_node *search_head,
- struct ext4_reserve_window_node *my_rsv,
- struct super_block * sb,
- ext4_fsblk_t start_block,
- ext4_fsblk_t last_block)
-{
- struct rb_node *next;
- struct ext4_reserve_window_node *rsv, *prev;
- ext4_fsblk_t cur;
- int size = my_rsv->rsv_goal_size;
-
- /* TODO: make the start of the reservation window byte-aligned */
- /* cur = *start_block & ~7;*/
- cur = start_block;
- rsv = search_head;
- if (!rsv)
- return -1;
-
- while (1) {
- if (cur <= rsv->rsv_end)
- cur = rsv->rsv_end + 1;
-
- /* TODO?
- * in the case we could not find a reservable space
- * that is what is expected, during the re-search, we could
- * remember what's the largest reservable space we could have
- * and return that one.
- *
- * For now it will fail if we could not find the reservable
- * space with expected-size (or more)...
- */
- if (cur > last_block)
- return -1; /* fail */
-
- prev = rsv;
- next = rb_next(&rsv->rsv_node);
- rsv = rb_entry(next,struct ext4_reserve_window_node,rsv_node);
+ if (!capable(CAP_SYS_RESOURCE) &&
+ sbi->s_resuid != current->fsuid &&
+ (sbi->s_resgid == 0 || !in_group_p(sbi->s_resgid)))
+ root_blocks = ext4_r_blocks_count(sbi->s_es);
- /*
- * Reached the last reservation, we can just append to the
- * previous one.
- */
- if (!next)
- break;
-
- if (cur + size <= rsv->rsv_start) {
- /*
- * Found a reserveable space big enough. We could
- * have a reservation across the group boundary here
- */
- break;
+ if (free_blocks - (nblocks + root_blocks + dirty_blocks) <
+ EXT4_FREEBLOCKS_WATERMARK) {
+ free_blocks = percpu_counter_sum(fbc);
+ dirty_blocks = percpu_counter_sum(dbc);
+ if (dirty_blocks < 0) {
+ printk(KERN_CRIT "Dirty block accounting "
+ "went wrong %lld\n",
+ dirty_blocks);
}
}
- /*
- * we come here either :
- * when we reach the end of the whole list,
- * and there is empty reservable space after last entry in the list.
- * append it to the end of the list.
- *
- * or we found one reservable space in the middle of the list,
- * return the reservation window that we could append to.
- * succeed.
+ /* Check whether we have space after
+ * accounting for current dirty blocks
*/
+ if (free_blocks < ((root_blocks + nblocks) + dirty_blocks))
+ /* we don't have free space */
+ return -ENOSPC;
- if ((prev != my_rsv) && (!rsv_is_empty(&my_rsv->rsv_window)))
- rsv_window_remove(sb, my_rsv);
-
- /*
- * Let's book the whole avaliable window for now. We will check the
- * disk bitmap later and then, if there are free blocks then we adjust
- * the window size if it's larger than requested.
- * Otherwise, we will remove this node from the tree next time
- * call find_next_reservable_window.
- */
- my_rsv->rsv_start = cur;
- my_rsv->rsv_end = cur + size - 1;
- my_rsv->rsv_alloc_hit = 0;
-
- if (prev != my_rsv)
- ext4_rsv_window_add(sb, my_rsv);
-
+ /* Add the blocks to nblocks */
+ percpu_counter_add(dbc, nblocks);
return 0;
}
/**
- * alloc_new_reservation()--allocate a new reservation window
- *
- * To make a new reservation, we search part of the filesystem
- * reservation list (the list that inside the group). We try to
- * allocate a new reservation window near the allocation goal,
- * or the beginning of the group, if there is no goal.
- *
- * We first find a reservable space after the goal, then from
- * there, we check the bitmap for the first free block after
- * it. If there is no free block until the end of group, then the
- * whole group is full, we failed. Otherwise, check if the free
- * block is inside the expected reservable space, if so, we
- * succeed.
- * If the first free block is outside the reservable space, then
- * start from the first free block, we search for next available
- * space, and go on.
- *
- * on succeed, a new reservation will be found and inserted into the list
- * It contains at least one free block, and it does not overlap with other
- * reservation windows.
- *
- * failed: we failed to find a reservation window in this group
- *
- * @rsv: the reservation
- *
- * @grp_goal: The goal (group-relative). It is where the search for a
- * free reservable space should start from.
- * if we have a grp_goal(grp_goal >0 ), then start from there,
- * no grp_goal(grp_goal = -1), we start from the first block
- * of the group.
- *
- * @sb: the super block
- * @group: the group we are trying to allocate in
- * @bitmap_bh: the block group block bitmap
- *
- */
-static int alloc_new_reservation(struct ext4_reserve_window_node *my_rsv,
- ext4_grpblk_t grp_goal, struct super_block *sb,
- ext4_group_t group, struct buffer_head *bitmap_bh)
-{
- struct ext4_reserve_window_node *search_head;
- ext4_fsblk_t group_first_block, group_end_block, start_block;
- ext4_grpblk_t first_free_block;
- struct rb_root *fs_rsv_root = &EXT4_SB(sb)->s_rsv_window_root;
- unsigned long size;
- int ret;
- spinlock_t *rsv_lock = &EXT4_SB(sb)->s_rsv_window_lock;
-
- group_first_block = ext4_group_first_block_no(sb, group);
- group_end_block = group_first_block + (EXT4_BLOCKS_PER_GROUP(sb) - 1);
-
- if (grp_goal < 0)
- start_block = group_first_block;
- else
- start_block = grp_goal + group_first_block;
-
- size = my_rsv->rsv_goal_size;
-
- if (!rsv_is_empty(&my_rsv->rsv_window)) {
- /*
- * if the old reservation is cross group boundary
- * and if the goal is inside the old reservation window,
- * we will come here when we just failed to allocate from
- * the first part of the window. We still have another part
- * that belongs to the next group. In this case, there is no
- * point to discard our window and try to allocate a new one
- * in this group(which will fail). we should
- * keep the reservation window, just simply move on.
- *
- * Maybe we could shift the start block of the reservation
- * window to the first block of next group.
- */
-
- if ((my_rsv->rsv_start <= group_end_block) &&
- (my_rsv->rsv_end > group_end_block) &&
- (start_block >= my_rsv->rsv_start))
- return -1;
-
- if ((my_rsv->rsv_alloc_hit >
- (my_rsv->rsv_end - my_rsv->rsv_start + 1) / 2)) {
- /*
- * if the previously allocation hit ratio is
- * greater than 1/2, then we double the size of
- * the reservation window the next time,
- * otherwise we keep the same size window
- */
- size = size * 2;
- if (size > EXT4_MAX_RESERVE_BLOCKS)
- size = EXT4_MAX_RESERVE_BLOCKS;
- my_rsv->rsv_goal_size= size;
- }
- }
-
- spin_lock(rsv_lock);
- /*
- * shift the search start to the window near the goal block
- */
- search_head = search_reserve_window(fs_rsv_root, start_block);
-
- /*
- * find_next_reservable_window() simply finds a reservable window
- * inside the given range(start_block, group_end_block).
- *
- * To make sure the reservation window has a free bit inside it, we
- * need to check the bitmap after we found a reservable window.
- */
-retry:
- ret = find_next_reservable_window(search_head, my_rsv, sb,
- start_block, group_end_block);
-
- if (ret == -1) {
- if (!rsv_is_empty(&my_rsv->rsv_window))
- rsv_window_remove(sb, my_rsv);
- spin_unlock(rsv_lock);
- return -1;
- }
-
- /*
- * On success, find_next_reservable_window() returns the
- * reservation window where there is a reservable space after it.
- * Before we reserve this reservable space, we need
- * to make sure there is at least a free block inside this region.
- *
- * searching the first free bit on the block bitmap and copy of
- * last committed bitmap alternatively, until we found a allocatable
- * block. Search start from the start block of the reservable space
- * we just found.
- */
- spin_unlock(rsv_lock);
- first_free_block = bitmap_search_next_usable_block(
- my_rsv->rsv_start - group_first_block,
- bitmap_bh, group_end_block - group_first_block + 1);
-
- if (first_free_block < 0) {
- /*
- * no free block left on the bitmap, no point
- * to reserve the space. return failed.
- */
- spin_lock(rsv_lock);
- if (!rsv_is_empty(&my_rsv->rsv_window))
- rsv_window_remove(sb, my_rsv);
- spin_unlock(rsv_lock);
- return -1; /* failed */
- }
-
- start_block = first_free_block + group_first_block;
- /*
- * check if the first free block is within the
- * free space we just reserved
- */
- if (start_block >= my_rsv->rsv_start && start_block <= my_rsv->rsv_end)
- return 0; /* success */
- /*
- * if the first free bit we found is out of the reservable space
- * continue search for next reservable space,
- * start from where the free block is,
- * we also shift the list head to where we stopped last time
- */
- search_head = my_rsv;
- spin_lock(rsv_lock);
- goto retry;
-}
-
-/**
- * try_to_extend_reservation()
- * @my_rsv: given reservation window
- * @sb: super block
- * @size: the delta to extend
- *
- * Attempt to expand the reservation window large enough to have
- * required number of free blocks
- *
- * Since ext4_try_to_allocate() will always allocate blocks within
- * the reservation window range, if the window size is too small,
- * multiple blocks allocation has to stop at the end of the reservation
- * window. To make this more efficient, given the total number of
- * blocks needed and the current size of the window, we try to
- * expand the reservation window size if necessary on a best-effort
- * basis before ext4_new_blocks() tries to allocate blocks,
- */
-static void try_to_extend_reservation(struct ext4_reserve_window_node *my_rsv,
- struct super_block *sb, int size)
-{
- struct ext4_reserve_window_node *next_rsv;
- struct rb_node *next;
- spinlock_t *rsv_lock = &EXT4_SB(sb)->s_rsv_window_lock;
-
- if (!spin_trylock(rsv_lock))
- return;
-
- next = rb_next(&my_rsv->rsv_node);
-
- if (!next)
- my_rsv->rsv_end += size;
- else {
- next_rsv = rb_entry(next, struct ext4_reserve_window_node, rsv_node);
-
- if ((next_rsv->rsv_start - my_rsv->rsv_end - 1) >= size)
- my_rsv->rsv_end += size;
- else
- my_rsv->rsv_end = next_rsv->rsv_start - 1;
- }
- spin_unlock(rsv_lock);
-}
-
-/**
- * ext4_try_to_allocate_with_rsv()
- * @sb: superblock
- * @handle: handle to this transaction
- * @group: given allocation block group
- * @bitmap_bh: bufferhead holds the block bitmap
- * @grp_goal: given target block within the group
- * @count: target number of blocks to allocate
- * @my_rsv: reservation window
- * @errp: pointer to store the error code
- *
- * This is the main function used to allocate a new block and its reservation
- * window.
- *
- * Each time when a new block allocation is need, first try to allocate from
- * its own reservation. If it does not have a reservation window, instead of
- * looking for a free bit on bitmap first, then look up the reservation list to
- * see if it is inside somebody else's reservation window, we try to allocate a
- * reservation window for it starting from the goal first. Then do the block
- * allocation within the reservation window.
- *
- * This will avoid keeping on searching the reservation list again and
- * again when somebody is looking for a free block (without
- * reservation), and there are lots of free blocks, but they are all
- * being reserved.
- *
- * We use a red-black tree for the per-filesystem reservation list.
- *
- */
-static ext4_grpblk_t
-ext4_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle,
- ext4_group_t group, struct buffer_head *bitmap_bh,
- ext4_grpblk_t grp_goal,
- struct ext4_reserve_window_node * my_rsv,
- unsigned long *count, int *errp)
-{
- ext4_fsblk_t group_first_block, group_last_block;
- ext4_grpblk_t ret = 0;
- int fatal;
- unsigned long num = *count;
-
- *errp = 0;
-
- /*
- * Make sure we use undo access for the bitmap, because it is critical
- * that we do the frozen_data COW on bitmap buffers in all cases even
- * if the buffer is in BJ_Forget state in the committing transaction.
- */
- BUFFER_TRACE(bitmap_bh, "get undo access for new block");
- fatal = ext4_journal_get_undo_access(handle, bitmap_bh);
- if (fatal) {
- *errp = fatal;
- return -1;
- }
-
- /*
- * we don't deal with reservation when
- * filesystem is mounted without reservation
- * or the file is not a regular file
- * or last attempt to allocate a block with reservation turned on failed
- */
- if (my_rsv == NULL ) {
- ret = ext4_try_to_allocate(sb, handle, group, bitmap_bh,
- grp_goal, count, NULL);
- goto out;
- }
- /*
- * grp_goal is a group relative block number (if there is a goal)
- * 0 <= grp_goal < EXT4_BLOCKS_PER_GROUP(sb)
- * first block is a filesystem wide block number
- * first block is the block number of the first block in this group
- */
- group_first_block = ext4_group_first_block_no(sb, group);
- group_last_block = group_first_block + (EXT4_BLOCKS_PER_GROUP(sb) - 1);
-
- /*
- * Basically we will allocate a new block from inode's reservation
- * window.
- *
- * We need to allocate a new reservation window, if:
- * a) inode does not have a reservation window; or
- * b) last attempt to allocate a block from existing reservation
- * failed; or
- * c) we come here with a goal and with a reservation window
- *
- * We do not need to allocate a new reservation window if we come here
- * at the beginning with a goal and the goal is inside the window, or
- * we don't have a goal but already have a reservation window.
- * then we could go to allocate from the reservation window directly.
- */
- while (1) {
- if (rsv_is_empty(&my_rsv->rsv_window) || (ret < 0) ||
- !goal_in_my_reservation(&my_rsv->rsv_window,
- grp_goal, group, sb)) {
- if (my_rsv->rsv_goal_size < *count)
- my_rsv->rsv_goal_size = *count;
- ret = alloc_new_reservation(my_rsv, grp_goal, sb,
- group, bitmap_bh);
- if (ret < 0)
- break; /* failed */
-
- if (!goal_in_my_reservation(&my_rsv->rsv_window,
- grp_goal, group, sb))
- grp_goal = -1;
- } else if (grp_goal >= 0) {
- int curr = my_rsv->rsv_end -
- (grp_goal + group_first_block) + 1;
-
- if (curr < *count)
- try_to_extend_reservation(my_rsv, sb,
- *count - curr);
- }
-
- if ((my_rsv->rsv_start > group_last_block) ||
- (my_rsv->rsv_end < group_first_block)) {
- rsv_window_dump(&EXT4_SB(sb)->s_rsv_window_root, 1);
- BUG();
- }
- ret = ext4_try_to_allocate(sb, handle, group, bitmap_bh,
- grp_goal, &num, &my_rsv->rsv_window);
- if (ret >= 0) {
- my_rsv->rsv_alloc_hit += num;
- *count = num;
- break; /* succeed */
- }
- num = *count;
- }
-out:
- if (ret >= 0) {
- BUFFER_TRACE(bitmap_bh, "journal_dirty_metadata for "
- "bitmap block");
- fatal = ext4_journal_dirty_metadata(handle, bitmap_bh);
- if (fatal) {
- *errp = fatal;
- return -1;
- }
- return ret;
- }
-
- BUFFER_TRACE(bitmap_bh, "journal_release_buffer");
- ext4_journal_release_buffer(handle, bitmap_bh);
- return ret;
-}
-
-/**
* ext4_has_free_blocks()
* @sbi: in-core super block structure.
* @nblocks: number of neeed blocks
@@ -1610,29 +637,34 @@ out:
* On success, return nblocks
*/
ext4_fsblk_t ext4_has_free_blocks(struct ext4_sb_info *sbi,
- ext4_fsblk_t nblocks)
+ s64 nblocks)
{
- ext4_fsblk_t free_blocks;
- ext4_fsblk_t root_blocks = 0;
+ s64 free_blocks, dirty_blocks;
+ s64 root_blocks = 0;
+ struct percpu_counter *fbc = &sbi->s_freeblocks_counter;
+ struct percpu_counter *dbc = &sbi->s_dirtyblocks_counter;
- free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter);
+ free_blocks = percpu_counter_read_positive(fbc);
+ dirty_blocks = percpu_counter_read_positive(dbc);
if (!capable(CAP_SYS_RESOURCE) &&
sbi->s_resuid != current->fsuid &&
(sbi->s_resgid == 0 || !in_group_p(sbi->s_resgid)))
root_blocks = ext4_r_blocks_count(sbi->s_es);
-#ifdef CONFIG_SMP
- if (free_blocks - root_blocks < FBC_BATCH)
- free_blocks =
- percpu_counter_sum_and_set(&sbi->s_freeblocks_counter);
-#endif
- if (free_blocks <= root_blocks)
+
+ if (free_blocks - (nblocks + root_blocks + dirty_blocks) <
+ EXT4_FREEBLOCKS_WATERMARK) {
+ free_blocks = percpu_counter_sum(fbc);
+ dirty_blocks = percpu_counter_sum(dbc);
+ }
+ if (free_blocks <= (root_blocks + dirty_blocks))
/* we don't have free space */
return 0;
- if (free_blocks - root_blocks < nblocks)
- return free_blocks - root_blocks;
+
+ if (free_blocks - (root_blocks + dirty_blocks) < nblocks)
+ return free_blocks - (root_blocks + dirty_blocks);
return nblocks;
- }
+}
/**
@@ -1657,303 +689,6 @@ int ext4_should_retry_alloc(struct super_block *sb, int *retries)
return jbd2_journal_force_commit_nested(EXT4_SB(sb)->s_journal);
}
-/**
- * ext4_old_new_blocks() -- core block bitmap based block allocation function
- *
- * @handle: handle to this transaction
- * @inode: file inode
- * @goal: given target block(filesystem wide)
- * @count: target number of blocks to allocate
- * @errp: error code
- *
- * ext4_old_new_blocks uses a goal block to assist allocation and look up
- * the block bitmap directly to do block allocation. It tries to
- * allocate block(s) from the block group contains the goal block first. If
- * that fails, it will try to allocate block(s) from other block groups
- * without any specific goal block.
- *
- * This function is called when -o nomballoc mount option is enabled
- *
- */
-ext4_fsblk_t ext4_old_new_blocks(handle_t *handle, struct inode *inode,
- ext4_fsblk_t goal, unsigned long *count, int *errp)
-{
- struct buffer_head *bitmap_bh = NULL;
- struct buffer_head *gdp_bh;
- ext4_group_t group_no;
- ext4_group_t goal_group;
- ext4_grpblk_t grp_target_blk; /* blockgroup relative goal block */
- ext4_grpblk_t grp_alloc_blk; /* blockgroup-relative allocated block*/
- ext4_fsblk_t ret_block; /* filesyetem-wide allocated block */
- ext4_group_t bgi; /* blockgroup iteration index */
- int fatal = 0, err;
- int performed_allocation = 0;
- ext4_grpblk_t free_blocks; /* number of free blocks in a group */
- struct super_block *sb;
- struct ext4_group_desc *gdp;
- struct ext4_super_block *es;
- struct ext4_sb_info *sbi;
- struct ext4_reserve_window_node *my_rsv = NULL;
- struct ext4_block_alloc_info *block_i;
- unsigned short windowsz = 0;
- ext4_group_t ngroups;
- unsigned long num = *count;
-
- sb = inode->i_sb;
- if (!sb) {
- *errp = -ENODEV;
- printk("ext4_new_block: nonexistent device");
- return 0;
- }
-
- sbi = EXT4_SB(sb);
- if (!EXT4_I(inode)->i_delalloc_reserved_flag) {
- /*
- * With delalloc we already reserved the blocks
- */
- *count = ext4_has_free_blocks(sbi, *count);
- }
- if (*count == 0) {
- *errp = -ENOSPC;
- return 0; /*return with ENOSPC error */
- }
- num = *count;
-
- /*
- * Check quota for allocation of this block.
- */
- if (DQUOT_ALLOC_BLOCK(inode, num)) {
- *errp = -EDQUOT;
- return 0;
- }
-
- sbi = EXT4_SB(sb);
- es = EXT4_SB(sb)->s_es;
- ext4_debug("goal=%llu.\n", goal);
- /*
- * Allocate a block from reservation only when
- * filesystem is mounted with reservation(default,-o reservation), and
- * it's a regular file, and
- * the desired window size is greater than 0 (One could use ioctl
- * command EXT4_IOC_SETRSVSZ to set the window size to 0 to turn off
- * reservation on that particular file)
- */
- block_i = EXT4_I(inode)->i_block_alloc_info;
- if (block_i && ((windowsz = block_i->rsv_window_node.rsv_goal_size) > 0))
- my_rsv = &block_i->rsv_window_node;
-
- /*
- * First, test whether the goal block is free.
- */
- if (goal < le32_to_cpu(es->s_first_data_block) ||
- goal >= ext4_blocks_count(es))
- goal = le32_to_cpu(es->s_first_data_block);
- ext4_get_group_no_and_offset(sb, goal, &group_no, &grp_target_blk);
- goal_group = group_no;
-retry_alloc:
- gdp = ext4_get_group_desc(sb, group_no, &gdp_bh);
- if (!gdp)
- goto io_error;
-
- free_blocks = le16_to_cpu(gdp->bg_free_blocks_count);
- /*
- * if there is not enough free blocks to make a new resevation
- * turn off reservation for this allocation
- */
- if (my_rsv && (free_blocks < windowsz)
- && (rsv_is_empty(&my_rsv->rsv_window)))
- my_rsv = NULL;
-
- if (free_blocks > 0) {
- bitmap_bh = ext4_read_block_bitmap(sb, group_no);
- if (!bitmap_bh)
- goto io_error;
- grp_alloc_blk = ext4_try_to_allocate_with_rsv(sb, handle,
- group_no, bitmap_bh, grp_target_blk,
- my_rsv, &num, &fatal);
- if (fatal)
- goto out;
- if (grp_alloc_blk >= 0)
- goto allocated;
- }
-
- ngroups = EXT4_SB(sb)->s_groups_count;
- smp_rmb();
-
- /*
- * Now search the rest of the groups. We assume that
- * group_no and gdp correctly point to the last group visited.
- */
- for (bgi = 0; bgi < ngroups; bgi++) {
- group_no++;
- if (group_no >= ngroups)
- group_no = 0;
- gdp = ext4_get_group_desc(sb, group_no, &gdp_bh);
- if (!gdp)
- goto io_error;
- free_blocks = le16_to_cpu(gdp->bg_free_blocks_count);
- /*
- * skip this group if the number of
- * free blocks is less than half of the reservation
- * window size.
- */
- if (free_blocks <= (windowsz/2))
- continue;
-
- brelse(bitmap_bh);
- bitmap_bh = ext4_read_block_bitmap(sb, group_no);
- if (!bitmap_bh)
- goto io_error;
- /*
- * try to allocate block(s) from this group, without a goal(-1).
- */
- grp_alloc_blk = ext4_try_to_allocate_with_rsv(sb, handle,
- group_no, bitmap_bh, -1, my_rsv,
- &num, &fatal);
- if (fatal)
- goto out;
- if (grp_alloc_blk >= 0)
- goto allocated;
- }
- /*
- * We may end up a bogus ealier ENOSPC error due to
- * filesystem is "full" of reservations, but
- * there maybe indeed free blocks avaliable on disk
- * In this case, we just forget about the reservations
- * just do block allocation as without reservations.
- */
- if (my_rsv) {
- my_rsv = NULL;
- windowsz = 0;
- group_no = goal_group;
- goto retry_alloc;
- }
- /* No space left on the device */
- *errp = -ENOSPC;
- goto out;
-
-allocated:
-
- ext4_debug("using block group %lu(%d)\n",
- group_no, gdp->bg_free_blocks_count);
-
- BUFFER_TRACE(gdp_bh, "get_write_access");
- fatal = ext4_journal_get_write_access(handle, gdp_bh);
- if (fatal)
- goto out;
-
- ret_block = grp_alloc_blk + ext4_group_first_block_no(sb, group_no);
-
- if (in_range(ext4_block_bitmap(sb, gdp), ret_block, num) ||
- in_range(ext4_inode_bitmap(sb, gdp), ret_block, num) ||
- in_range(ret_block, ext4_inode_table(sb, gdp),
- EXT4_SB(sb)->s_itb_per_group) ||
- in_range(ret_block + num - 1, ext4_inode_table(sb, gdp),
- EXT4_SB(sb)->s_itb_per_group)) {
- ext4_error(sb, "ext4_new_block",
- "Allocating block in system zone - "
- "blocks from %llu, length %lu",
- ret_block, num);
- /*
- * claim_block marked the blocks we allocated
- * as in use. So we may want to selectively
- * mark some of the blocks as free
- */
- goto retry_alloc;
- }
-
- performed_allocation = 1;
-
-#ifdef CONFIG_JBD2_DEBUG
- {
- struct buffer_head *debug_bh;
-
- /* Record bitmap buffer state in the newly allocated block */
- debug_bh = sb_find_get_block(sb, ret_block);
- if (debug_bh) {
- BUFFER_TRACE(debug_bh, "state when allocated");
- BUFFER_TRACE2(debug_bh, bitmap_bh, "bitmap state");
- brelse(debug_bh);
- }
- }
- jbd_lock_bh_state(bitmap_bh);
- spin_lock(sb_bgl_lock(sbi, group_no));
- if (buffer_jbd(bitmap_bh) && bh2jh(bitmap_bh)->b_committed_data) {
- int i;
-
- for (i = 0; i < num; i++) {
- if (ext4_test_bit(grp_alloc_blk+i,
- bh2jh(bitmap_bh)->b_committed_data)) {
- printk("%s: block was unexpectedly set in "
- "b_committed_data\n", __func__);
- }
- }
- }
- ext4_debug("found bit %d\n", grp_alloc_blk);
- spin_unlock(sb_bgl_lock(sbi, group_no));
- jbd_unlock_bh_state(bitmap_bh);
-#endif
-
- if (ret_block + num - 1 >= ext4_blocks_count(es)) {
- ext4_error(sb, "ext4_new_block",
- "block(%llu) >= blocks count(%llu) - "
- "block_group = %lu, es == %p ", ret_block,
- ext4_blocks_count(es), group_no, es);
- goto out;
- }
-
- /*
- * It is up to the caller to add the new buffer to a journal
- * list of some description. We don't know in advance whether
- * the caller wants to use it as metadata or data.
- */
- spin_lock(sb_bgl_lock(sbi, group_no));
- if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))
- gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
- le16_add_cpu(&gdp->bg_free_blocks_count, -num);
- gdp->bg_checksum = ext4_group_desc_csum(sbi, group_no, gdp);
- spin_unlock(sb_bgl_lock(sbi, group_no));
- if (!EXT4_I(inode)->i_delalloc_reserved_flag)
- percpu_counter_sub(&sbi->s_freeblocks_counter, num);
-
- if (sbi->s_log_groups_per_flex) {
- ext4_group_t flex_group = ext4_flex_group(sbi, group_no);
- spin_lock(sb_bgl_lock(sbi, flex_group));
- sbi->s_flex_groups[flex_group].free_blocks -= num;
- spin_unlock(sb_bgl_lock(sbi, flex_group));
- }
-
- BUFFER_TRACE(gdp_bh, "journal_dirty_metadata for group descriptor");
- err = ext4_journal_dirty_metadata(handle, gdp_bh);
- if (!fatal)
- fatal = err;
-
- sb->s_dirt = 1;
- if (fatal)
- goto out;
-
- *errp = 0;
- brelse(bitmap_bh);
- DQUOT_FREE_BLOCK(inode, *count-num);
- *count = num;
- return ret_block;
-
-io_error:
- *errp = -EIO;
-out:
- if (fatal) {
- *errp = fatal;
- ext4_std_error(sb, fatal);
- }
- /*
- * Undo the block allocation
- */
- if (!performed_allocation)
- DQUOT_FREE_BLOCK(inode, *count);
- brelse(bitmap_bh);
- return 0;
-}
-
#define EXT4_META_BLOCK 0x1
static ext4_fsblk_t do_blk_alloc(handle_t *handle, struct inode *inode,
@@ -1963,10 +698,6 @@ static ext4_fsblk_t do_blk_alloc(handle_t *handle, struct inode *inode,
struct ext4_allocation_request ar;
ext4_fsblk_t ret;
- if (!test_opt(inode->i_sb, MBALLOC)) {
- return ext4_old_new_blocks(handle, inode, goal, count, errp);
- }
-
memset(&ar, 0, sizeof(ar));
/* Fill with neighbour allocated blocks */
@@ -2008,7 +739,7 @@ ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode,
/*
* Account for the allocated meta blocks
*/
- if (!(*errp)) {
+ if (!(*errp) && EXT4_I(inode)->i_delalloc_reserved_flag) {
spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
EXT4_I(inode)->i_allocated_meta_blocks += *count;
spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
@@ -2093,10 +824,9 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb)
bitmap_count += x;
}
brelse(bitmap_bh);
- printk("ext4_count_free_blocks: stored = %llu"
- ", computed = %llu, %llu\n",
- ext4_free_blocks_count(es),
- desc_count, bitmap_count);
+ printk(KERN_DEBUG "ext4_count_free_blocks: stored = %llu"
+ ", computed = %llu, %llu\n", ext4_free_blocks_count(es),
+ desc_count, bitmap_count);
return bitmap_count;
#else
desc_count = 0;
@@ -2183,8 +913,9 @@ unsigned long ext4_bg_num_gdb(struct super_block *sb, ext4_group_t group)
if (!EXT4_HAS_INCOMPAT_FEATURE(sb,EXT4_FEATURE_INCOMPAT_META_BG) ||
metagroup < first_meta_bg)
- return ext4_bg_num_gdb_nometa(sb,group);
+ return ext4_bg_num_gdb_nometa(sb, group);
return ext4_bg_num_gdb_meta(sb,group);
}
+
diff --git a/fs/ext4/bitmap.c b/fs/ext4/bitmap.c
index d37ea675045..0a7a6663c19 100644
--- a/fs/ext4/bitmap.c
+++ b/fs/ext4/bitmap.c
@@ -15,17 +15,17 @@
static const int nibblemap[] = {4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0};
-unsigned long ext4_count_free (struct buffer_head * map, unsigned int numchars)
+unsigned long ext4_count_free(struct buffer_head *map, unsigned int numchars)
{
unsigned int i;
unsigned long sum = 0;
if (!map)
- return (0);
+ return 0;
for (i = 0; i < numchars; i++)
sum += nibblemap[map->b_data[i] & 0xf] +
nibblemap[(map->b_data[i] >> 4) & 0xf];
- return (sum);
+ return sum;
}
#endif /* EXT4FS_DEBUG */
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index ec8e33b4521..3ca6a2b7632 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -33,10 +33,10 @@ static unsigned char ext4_filetype_table[] = {
};
static int ext4_readdir(struct file *, void *, filldir_t);
-static int ext4_dx_readdir(struct file * filp,
- void * dirent, filldir_t filldir);
-static int ext4_release_dir (struct inode * inode,
- struct file * filp);
+static int ext4_dx_readdir(struct file *filp,
+ void *dirent, filldir_t filldir);
+static int ext4_release_dir(struct inode *inode,
+ struct file *filp);
const struct file_operations ext4_dir_operations = {
.llseek = generic_file_llseek,
@@ -61,12 +61,12 @@ static unsigned char get_dtype(struct super_block *sb, int filetype)
}
-int ext4_check_dir_entry (const char * function, struct inode * dir,
- struct ext4_dir_entry_2 * de,
- struct buffer_head * bh,
- unsigned long offset)
+int ext4_check_dir_entry(const char *function, struct inode *dir,
+ struct ext4_dir_entry_2 *de,
+ struct buffer_head *bh,
+ unsigned long offset)
{
- const char * error_msg = NULL;
+ const char *error_msg = NULL;
const int rlen = ext4_rec_len_from_disk(de->rec_len);
if (rlen < EXT4_DIR_REC_LEN(1))
@@ -82,7 +82,7 @@ int ext4_check_dir_entry (const char * function, struct inode * dir,
error_msg = "inode out of bounds";
if (error_msg != NULL)
- ext4_error (dir->i_sb, function,
+ ext4_error(dir->i_sb, function,
"bad entry in directory #%lu: %s - "
"offset=%lu, inode=%lu, rec_len=%d, name_len=%d",
dir->i_ino, error_msg, offset,
@@ -91,8 +91,8 @@ int ext4_check_dir_entry (const char * function, struct inode * dir,
return error_msg == NULL ? 1 : 0;
}
-static int ext4_readdir(struct file * filp,
- void * dirent, filldir_t filldir)
+static int ext4_readdir(struct file *filp,
+ void *dirent, filldir_t filldir)
{
int error = 0;
unsigned long offset;
@@ -102,6 +102,7 @@ static int ext4_readdir(struct file * filp,
int err;
struct inode *inode = filp->f_path.dentry->d_inode;
int ret = 0;
+ int dir_has_error = 0;
sb = inode->i_sb;
@@ -148,9 +149,13 @@ static int ext4_readdir(struct file * filp,
* of recovering data when there's a bad sector
*/
if (!bh) {
- ext4_error (sb, "ext4_readdir",
- "directory #%lu contains a hole at offset %lu",
- inode->i_ino, (unsigned long)filp->f_pos);
+ if (!dir_has_error) {
+ ext4_error(sb, __func__, "directory #%lu "
+ "contains a hole at offset %Lu",
+ inode->i_ino,
+ (unsigned long long) filp->f_pos);
+ dir_has_error = 1;
+ }
/* corrupt size? Maybe no more blocks to read */
if (filp->f_pos > inode->i_blocks << 9)
break;
@@ -187,14 +192,14 @@ revalidate:
while (!error && filp->f_pos < inode->i_size
&& offset < sb->s_blocksize) {
de = (struct ext4_dir_entry_2 *) (bh->b_data + offset);
- if (!ext4_check_dir_entry ("ext4_readdir", inode, de,
- bh, offset)) {
+ if (!ext4_check_dir_entry("ext4_readdir", inode, de,
+ bh, offset)) {
/*
* On error, skip the f_pos to the next block
*/
filp->f_pos = (filp->f_pos |
(sb->s_blocksize - 1)) + 1;
- brelse (bh);
+ brelse(bh);
ret = stored;
goto out;
}
@@ -218,12 +223,12 @@ revalidate:
break;
if (version != filp->f_version)
goto revalidate;
- stored ++;
+ stored++;
}
filp->f_pos += ext4_rec_len_from_disk(de->rec_len);
}
offset = 0;
- brelse (bh);
+ brelse(bh);
}
out:
return ret;
@@ -290,9 +295,9 @@ static void free_rb_tree_fname(struct rb_root *root)
parent = rb_parent(n);
fname = rb_entry(n, struct fname, rb_hash);
while (fname) {
- struct fname * old = fname;
+ struct fname *old = fname;
fname = fname->next;
- kfree (old);
+ kfree(old);
}
if (!parent)
root->rb_node = NULL;
@@ -331,7 +336,7 @@ int ext4_htree_store_dirent(struct file *dir_file, __u32 hash,
struct ext4_dir_entry_2 *dirent)
{
struct rb_node **p, *parent = NULL;
- struct fname * fname, *new_fn;
+ struct fname *fname, *new_fn;
struct dir_private_info *info;
int len;
@@ -388,19 +393,20 @@ int ext4_htree_store_dirent(struct file *dir_file, __u32 hash,
* for all entres on the fname linked list. (Normally there is only
* one entry on the linked list, unless there are 62 bit hash collisions.)
*/
-static int call_filldir(struct file * filp, void * dirent,
+static int call_filldir(struct file *filp, void *dirent,
filldir_t filldir, struct fname *fname)
{
struct dir_private_info *info = filp->private_data;
loff_t curr_pos;
struct inode *inode = filp->f_path.dentry->d_inode;
- struct super_block * sb;
+ struct super_block *sb;
int error;
sb = inode->i_sb;
if (!fname) {
- printk("call_filldir: called with null fname?!?\n");
+ printk(KERN_ERR "ext4: call_filldir: called with "
+ "null fname?!?\n");
return 0;
}
curr_pos = hash2pos(fname->hash, fname->minor_hash);
@@ -419,8 +425,8 @@ static int call_filldir(struct file * filp, void * dirent,
return 0;
}
-static int ext4_dx_readdir(struct file * filp,
- void * dirent, filldir_t filldir)
+static int ext4_dx_readdir(struct file *filp,
+ void *dirent, filldir_t filldir)
{
struct dir_private_info *info = filp->private_data;
struct inode *inode = filp->f_path.dentry->d_inode;
@@ -511,7 +517,7 @@ finished:
return 0;
}
-static int ext4_release_dir (struct inode * inode, struct file * filp)
+static int ext4_release_dir(struct inode *inode, struct file *filp)
{
if (filp->private_data)
ext4_htree_free_dir_info(filp->private_data);
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 295003241d3..4880cc3e672 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -44,9 +44,9 @@
#ifdef EXT4FS_DEBUG
#define ext4_debug(f, a...) \
do { \
- printk (KERN_DEBUG "EXT4-fs DEBUG (%s, %d): %s:", \
+ printk(KERN_DEBUG "EXT4-fs DEBUG (%s, %d): %s:", \
__FILE__, __LINE__, __func__); \
- printk (KERN_DEBUG f, ## a); \
+ printk(KERN_DEBUG f, ## a); \
} while (0)
#else
#define ext4_debug(f, a...) do {} while (0)
@@ -128,7 +128,7 @@ struct ext4_allocation_request {
#else
# define EXT4_BLOCK_SIZE(s) (EXT4_MIN_BLOCK_SIZE << (s)->s_log_block_size)
#endif
-#define EXT4_ADDR_PER_BLOCK(s) (EXT4_BLOCK_SIZE(s) / sizeof (__u32))
+#define EXT4_ADDR_PER_BLOCK(s) (EXT4_BLOCK_SIZE(s) / sizeof(__u32))
#ifdef __KERNEL__
# define EXT4_BLOCK_SIZE_BITS(s) ((s)->s_blocksize_bits)
#else
@@ -245,7 +245,7 @@ struct flex_groups {
#define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */
#define EXT4_FL_USER_VISIBLE 0x000BDFFF /* User visible flags */
-#define EXT4_FL_USER_MODIFIABLE 0x000380FF /* User modifiable flags */
+#define EXT4_FL_USER_MODIFIABLE 0x000B80FF /* User modifiable flags */
/*
* Inode dynamic state flags
@@ -291,8 +291,6 @@ struct ext4_new_group_data {
#define EXT4_IOC_SETFLAGS FS_IOC_SETFLAGS
#define EXT4_IOC_GETVERSION _IOR('f', 3, long)
#define EXT4_IOC_SETVERSION _IOW('f', 4, long)
-#define EXT4_IOC_GROUP_EXTEND _IOW('f', 7, unsigned long)
-#define EXT4_IOC_GROUP_ADD _IOW('f', 8,struct ext4_new_group_input)
#define EXT4_IOC_GETVERSION_OLD FS_IOC_GETVERSION
#define EXT4_IOC_SETVERSION_OLD FS_IOC_SETVERSION
#ifdef CONFIG_JBD2_DEBUG
@@ -300,7 +298,10 @@ struct ext4_new_group_data {
#endif
#define EXT4_IOC_GETRSVSZ _IOR('f', 5, long)
#define EXT4_IOC_SETRSVSZ _IOW('f', 6, long)
-#define EXT4_IOC_MIGRATE _IO('f', 7)
+#define EXT4_IOC_GROUP_EXTEND _IOW('f', 7, unsigned long)
+#define EXT4_IOC_GROUP_ADD _IOW('f', 8, struct ext4_new_group_input)
+#define EXT4_IOC_MIGRATE _IO('f', 9)
+ /* note ioctl 11 reserved for filesystem-independent FIEMAP ioctl */
/*
* ioctl commands in 32 bit emulation
@@ -510,7 +511,6 @@ do { \
/*
* Mount flags
*/
-#define EXT4_MOUNT_CHECK 0x00001 /* Do mount-time checks */
#define EXT4_MOUNT_OLDALLOC 0x00002 /* Don't use the new Orlov allocator */
#define EXT4_MOUNT_GRPID 0x00004 /* Create files with directory's group */
#define EXT4_MOUNT_DEBUG 0x00008 /* Some debugging messages */
@@ -538,8 +538,9 @@ do { \
#define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */
#define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */
#define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */
-#define EXT4_MOUNT_MBALLOC 0x4000000 /* Buddy allocation support */
#define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */
+#define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */
+
/* Compatibility, for having both ext2_fs.h and ext4_fs.h included at once */
#ifndef _LINUX_EXT2_FS_H
#define clear_opt(o, opt) o &= ~EXT4_MOUNT_##opt
@@ -667,7 +668,7 @@ struct ext4_super_block {
};
#ifdef __KERNEL__
-static inline struct ext4_sb_info * EXT4_SB(struct super_block *sb)
+static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb)
{
return sb->s_fs_info;
}
@@ -725,11 +726,11 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
*/
#define EXT4_HAS_COMPAT_FEATURE(sb,mask) \
- ( EXT4_SB(sb)->s_es->s_feature_compat & cpu_to_le32(mask) )
+ (EXT4_SB(sb)->s_es->s_feature_compat & cpu_to_le32(mask))
#define EXT4_HAS_RO_COMPAT_FEATURE(sb,mask) \
- ( EXT4_SB(sb)->s_es->s_feature_ro_compat & cpu_to_le32(mask) )
+ (EXT4_SB(sb)->s_es->s_feature_ro_compat & cpu_to_le32(mask))
#define EXT4_HAS_INCOMPAT_FEATURE(sb,mask) \
- ( EXT4_SB(sb)->s_es->s_feature_incompat & cpu_to_le32(mask) )
+ (EXT4_SB(sb)->s_es->s_feature_incompat & cpu_to_le32(mask))
#define EXT4_SET_COMPAT_FEATURE(sb,mask) \
EXT4_SB(sb)->s_es->s_feature_compat |= cpu_to_le32(mask)
#define EXT4_SET_RO_COMPAT_FEATURE(sb,mask) \
@@ -789,6 +790,8 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
#define EXT4_DEF_RESUID 0
#define EXT4_DEF_RESGID 0
+#define EXT4_DEF_INODE_READAHEAD_BLKS 32
+
/*
* Default mount options
*/
@@ -954,6 +957,24 @@ ext4_group_first_block_no(struct super_block *sb, ext4_group_t group_no)
void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr,
unsigned long *blockgrpp, ext4_grpblk_t *offsetp);
+extern struct proc_dir_entry *ext4_proc_root;
+
+#ifdef CONFIG_PROC_FS
+extern const struct file_operations ext4_ui_proc_fops;
+
+#define EXT4_PROC_HANDLER(name, var) \
+do { \
+ proc = proc_create_data(name, mode, sbi->s_proc, \
+ &ext4_ui_proc_fops, &sbi->s_##var); \
+ if (proc == NULL) { \
+ printk(KERN_ERR "EXT4-fs: can't create %s\n", name); \
+ goto err_out; \
+ } \
+} while (0)
+#else
+#define EXT4_PROC_HANDLER(name, var)
+#endif
+
/*
* Function prototypes
*/
@@ -981,23 +1002,20 @@ extern ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode,
extern ext4_fsblk_t ext4_new_blocks(handle_t *handle, struct inode *inode,
ext4_lblk_t iblock, ext4_fsblk_t goal,
unsigned long *count, int *errp);
-extern ext4_fsblk_t ext4_old_new_blocks(handle_t *handle, struct inode *inode,
- ext4_fsblk_t goal, unsigned long *count, int *errp);
+extern int ext4_claim_free_blocks(struct ext4_sb_info *sbi, s64 nblocks);
extern ext4_fsblk_t ext4_has_free_blocks(struct ext4_sb_info *sbi,
- ext4_fsblk_t nblocks);
-extern void ext4_free_blocks (handle_t *handle, struct inode *inode,
+ s64 nblocks);
+extern void ext4_free_blocks(handle_t *handle, struct inode *inode,
ext4_fsblk_t block, unsigned long count, int metadata);
-extern void ext4_free_blocks_sb (handle_t *handle, struct super_block *sb,
- ext4_fsblk_t block, unsigned long count,
+extern void ext4_free_blocks_sb(handle_t *handle, struct super_block *sb,
+ ext4_fsblk_t block, unsigned long count,
unsigned long *pdquot_freed_blocks);
-extern ext4_fsblk_t ext4_count_free_blocks (struct super_block *);
-extern void ext4_check_blocks_bitmap (struct super_block *);
+extern ext4_fsblk_t ext4_count_free_blocks(struct super_block *);
+extern void ext4_check_blocks_bitmap(struct super_block *);
extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb,
ext4_group_t block_group,
struct buffer_head ** bh);
extern int ext4_should_retry_alloc(struct super_block *sb, int *retries);
-extern void ext4_init_block_alloc_info(struct inode *);
-extern void ext4_rsv_window_add(struct super_block *sb, struct ext4_reserve_window_node *rsv);
/* dir.c */
extern int ext4_check_dir_entry(const char *, struct inode *,
@@ -1009,20 +1027,20 @@ extern int ext4_htree_store_dirent(struct file *dir_file, __u32 hash,
extern void ext4_htree_free_dir_info(struct dir_private_info *p);
/* fsync.c */
-extern int ext4_sync_file (struct file *, struct dentry *, int);
+extern int ext4_sync_file(struct file *, struct dentry *, int);
/* hash.c */
extern int ext4fs_dirhash(const char *name, int len, struct
dx_hash_info *hinfo);
/* ialloc.c */
-extern struct inode * ext4_new_inode (handle_t *, struct inode *, int);
-extern void ext4_free_inode (handle_t *, struct inode *);
-extern struct inode * ext4_orphan_get (struct super_block *, unsigned long);
-extern unsigned long ext4_count_free_inodes (struct super_block *);
-extern unsigned long ext4_count_dirs (struct super_block *);
-extern void ext4_check_inodes_bitmap (struct super_block *);
-extern unsigned long ext4_count_free (struct buffer_head *, unsigned);
+extern struct inode * ext4_new_inode(handle_t *, struct inode *, int);
+extern void ext4_free_inode(handle_t *, struct inode *);
+extern struct inode * ext4_orphan_get(struct super_block *, unsigned long);
+extern unsigned long ext4_count_free_inodes(struct super_block *);
+extern unsigned long ext4_count_dirs(struct super_block *);
+extern void ext4_check_inodes_bitmap(struct super_block *);
+extern unsigned long ext4_count_free(struct buffer_head *, unsigned);
/* mballoc.c */
extern long ext4_mb_stats;
@@ -1032,7 +1050,7 @@ extern int ext4_mb_release(struct super_block *);
extern ext4_fsblk_t ext4_mb_new_blocks(handle_t *,
struct ext4_allocation_request *, int *);
extern int ext4_mb_reserve_blocks(struct super_block *, int);
-extern void ext4_mb_discard_inode_preallocations(struct inode *);
+extern void ext4_discard_preallocations(struct inode *);
extern int __init init_ext4_mballoc(void);
extern void exit_ext4_mballoc(void);
extern void ext4_mb_free_blocks(handle_t *, struct inode *,
@@ -1050,24 +1068,25 @@ struct buffer_head *ext4_getblk(handle_t *, struct inode *,
ext4_lblk_t, int, int *);
struct buffer_head *ext4_bread(handle_t *, struct inode *,
ext4_lblk_t, int, int *);
+int ext4_get_block(struct inode *inode, sector_t iblock,
+ struct buffer_head *bh_result, int create);
int ext4_get_blocks_handle(handle_t *handle, struct inode *inode,
ext4_lblk_t iblock, unsigned long maxblocks,
struct buffer_head *bh_result,
int create, int extend_disksize);
extern struct inode *ext4_iget(struct super_block *, unsigned long);
-extern int ext4_write_inode (struct inode *, int);
-extern int ext4_setattr (struct dentry *, struct iattr *);
+extern int ext4_write_inode(struct inode *, int);
+extern int ext4_setattr(struct dentry *, struct iattr *);
extern int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry,
struct kstat *stat);
-extern void ext4_delete_inode (struct inode *);
-extern int ext4_sync_inode (handle_t *, struct inode *);
-extern void ext4_discard_reservation (struct inode *);
+extern void ext4_delete_inode(struct inode *);
+extern int ext4_sync_inode(handle_t *, struct inode *);
extern void ext4_dirty_inode(struct inode *);
extern int ext4_change_inode_journal_flag(struct inode *, int);
extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *);
extern int ext4_can_truncate(struct inode *inode);
-extern void ext4_truncate (struct inode *);
+extern void ext4_truncate(struct inode *);
extern void ext4_set_inode_flags(struct inode *);
extern void ext4_get_inode_flags(struct ext4_inode_info *);
extern void ext4_set_aops(struct inode *inode);
@@ -1080,11 +1099,10 @@ extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page);
/* ioctl.c */
extern long ext4_ioctl(struct file *, unsigned int, unsigned long);
-extern long ext4_compat_ioctl (struct file *, unsigned int, unsigned long);
+extern long ext4_compat_ioctl(struct file *, unsigned int, unsigned long);
/* migrate.c */
-extern int ext4_ext_migrate(struct inode *, struct file *, unsigned int,
- unsigned long);
+extern int ext4_ext_migrate(struct inode *);
/* namei.c */
extern int ext4_orphan_add(handle_t *, struct inode *);
extern int ext4_orphan_del(handle_t *, struct inode *);
@@ -1099,14 +1117,14 @@ extern int ext4_group_extend(struct super_block *sb,
ext4_fsblk_t n_blocks_count);
/* super.c */
-extern void ext4_error (struct super_block *, const char *, const char *, ...)
+extern void ext4_error(struct super_block *, const char *, const char *, ...)
__attribute__ ((format (printf, 3, 4)));
-extern void __ext4_std_error (struct super_block *, const char *, int);
-extern void ext4_abort (struct super_block *, const char *, const char *, ...)
+extern void __ext4_std_error(struct super_block *, const char *, int);
+extern void ext4_abort(struct super_block *, const char *, const char *, ...)
__attribute__ ((format (printf, 3, 4)));
-extern void ext4_warning (struct super_block *, const char *, const char *, ...)
+extern void ext4_warning(struct super_block *, const char *, const char *, ...)
__attribute__ ((format (printf, 3, 4)));
-extern void ext4_update_dynamic_rev (struct super_block *sb);
+extern void ext4_update_dynamic_rev(struct super_block *sb);
extern int ext4_update_compat_feature(handle_t *handle, struct super_block *sb,
__u32 compat);
extern int ext4_update_rocompat_feature(handle_t *handle,
@@ -1179,7 +1197,7 @@ static inline void ext4_isize_set(struct ext4_inode *raw_inode, loff_t i_size)
static inline
struct ext4_group_info *ext4_get_group_info(struct super_block *sb,
- ext4_group_t group)
+ ext4_group_t group)
{
struct ext4_group_info ***grp_info;
long indexv, indexh;
@@ -1207,6 +1225,28 @@ do { \
__ext4_std_error((sb), __func__, (errno)); \
} while (0)
+#ifdef CONFIG_SMP
+/* Each CPU can accumulate FBC_BATCH blocks in their local
+ * counters. So we need to make sure we have free blocks more
+ * than FBC_BATCH * nr_cpu_ids. Also add a window of 4 times.
+ */
+#define EXT4_FREEBLOCKS_WATERMARK (4 * (FBC_BATCH * nr_cpu_ids))
+#else
+#define EXT4_FREEBLOCKS_WATERMARK 0
+#endif
+
+static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize)
+{
+ /*
+ * XXX: replace with spinlock if seen contended -bzzz
+ */
+ down_write(&EXT4_I(inode)->i_data_sem);
+ if (newsize > EXT4_I(inode)->i_disksize)
+ EXT4_I(inode)->i_disksize = newsize;
+ up_write(&EXT4_I(inode)->i_data_sem);
+ return ;
+}
+
/*
* Inodes and files operations
*/
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h
index d33dc56d698..bec7ce59fc0 100644
--- a/fs/ext4/ext4_extents.h
+++ b/fs/ext4/ext4_extents.h
@@ -124,6 +124,19 @@ struct ext4_ext_path {
#define EXT4_EXT_CACHE_GAP 1
#define EXT4_EXT_CACHE_EXTENT 2
+/*
+ * to be called by ext4_ext_walk_space()
+ * negative retcode - error
+ * positive retcode - signal for ext4_ext_walk_space(), see below
+ * callback must return valid extent (passed or newly created)
+ */
+typedef int (*ext_prepare_callback)(struct inode *, struct ext4_ext_path *,
+ struct ext4_ext_cache *,
+ struct ext4_extent *, void *);
+
+#define EXT_CONTINUE 0
+#define EXT_BREAK 1
+#define EXT_REPEAT 2
#define EXT_MAX_BLOCK 0xffffffff
@@ -224,6 +237,8 @@ extern int ext4_ext_try_to_merge(struct inode *inode,
struct ext4_extent *);
extern unsigned int ext4_ext_check_overlap(struct inode *, struct ext4_extent *, struct ext4_ext_path *);
extern int ext4_ext_insert_extent(handle_t *, struct inode *, struct ext4_ext_path *, struct ext4_extent *);
+extern int ext4_ext_walk_space(struct inode *, ext4_lblk_t, ext4_lblk_t,
+ ext_prepare_callback, void *);
extern struct ext4_ext_path *ext4_ext_find_extent(struct inode *, ext4_lblk_t,
struct ext4_ext_path *);
extern int ext4_ext_search_left(struct inode *, struct ext4_ext_path *,
diff --git a/fs/ext4/ext4_i.h b/fs/ext4/ext4_i.h
index ef7409f0e7e..5c124c0ac6d 100644
--- a/fs/ext4/ext4_i.h
+++ b/fs/ext4/ext4_i.h
@@ -33,38 +33,6 @@ typedef __u32 ext4_lblk_t;
/* data type for block group number */
typedef unsigned long ext4_group_t;
-struct ext4_reserve_window {
- ext4_fsblk_t _rsv_start; /* First byte reserved */
- ext4_fsblk_t _rsv_end; /* Last byte reserved or 0 */
-};
-
-struct ext4_reserve_window_node {
- struct rb_node rsv_node;
- __u32 rsv_goal_size;
- __u32 rsv_alloc_hit;
- struct ext4_reserve_window rsv_window;
-};
-
-struct ext4_block_alloc_info {
- /* information about reservation window */
- struct ext4_reserve_window_node rsv_window_node;
- /*
- * was i_next_alloc_block in ext4_inode_info
- * is the logical (file-relative) number of the
- * most-recently-allocated block in this file.
- * We use this for detecting linearly ascending allocation requests.
- */
- ext4_lblk_t last_alloc_logical_block;
- /*
- * Was i_next_alloc_goal in ext4_inode_info
- * is the *physical* companion to i_next_alloc_block.
- * it the physical block number of the block which was most-recentl
- * allocated to this file. This give us the goal (target) for the next
- * allocation when we detect linearly ascending requests.
- */
- ext4_fsblk_t last_alloc_physical_block;
-};
-
#define rsv_start rsv_window._rsv_start
#define rsv_end rsv_window._rsv_end
@@ -97,11 +65,8 @@ struct ext4_inode_info {
ext4_group_t i_block_group;
__u32 i_state; /* Dynamic state flags for ext4 */
- /* block reservation info */
- struct ext4_block_alloc_info *i_block_alloc_info;
-
ext4_lblk_t i_dir_start_lookup;
-#ifdef CONFIG_EXT4DEV_FS_XATTR
+#ifdef CONFIG_EXT4_FS_XATTR
/*
* Extended attributes can be read independently of the main file
* data. Taking i_mutex even when reading would cause contention
@@ -111,7 +76,7 @@ struct ext4_inode_info {
*/
struct rw_semaphore xattr_sem;
#endif
-#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL
+#ifdef CONFIG_EXT4_FS_POSIX_ACL
struct posix_acl *i_acl;
struct posix_acl *i_default_acl;
#endif
diff --git a/fs/ext4/ext4_sb.h b/fs/ext4/ext4_sb.h
index 6300226d553..445fde603df 100644
--- a/fs/ext4/ext4_sb.h
+++ b/fs/ext4/ext4_sb.h
@@ -40,8 +40,8 @@ struct ext4_sb_info {
unsigned long s_blocks_last; /* Last seen block count */
loff_t s_bitmap_maxbytes; /* max bytes for bitmap files */
struct buffer_head * s_sbh; /* Buffer containing the super block */
- struct ext4_super_block * s_es; /* Pointer to the super block in the buffer */
- struct buffer_head ** s_group_desc;
+ struct ext4_super_block *s_es; /* Pointer to the super block in the buffer */
+ struct buffer_head **s_group_desc;
unsigned long s_mount_opt;
ext4_fsblk_t s_sb_block;
uid_t s_resuid;
@@ -52,6 +52,7 @@ struct ext4_sb_info {
int s_desc_per_block_bits;
int s_inode_size;
int s_first_ino;
+ unsigned int s_inode_readahead_blks;
spinlock_t s_next_gen_lock;
u32 s_next_generation;
u32 s_hash_seed[4];
@@ -59,16 +60,17 @@ struct ext4_sb_info {
struct percpu_counter s_freeblocks_counter;
struct percpu_counter s_freeinodes_counter;
struct percpu_counter s_dirs_counter;
+ struct percpu_counter s_dirtyblocks_counter;
struct blockgroup_lock s_blockgroup_lock;
+ struct proc_dir_entry *s_proc;
/* root of the per fs reservation window tree */
spinlock_t s_rsv_window_lock;
struct rb_root s_rsv_window_root;
- struct ext4_reserve_window_node s_rsv_window_head;
/* Journaling */
- struct inode * s_journal_inode;
- struct journal_s * s_journal;
+ struct inode *s_journal_inode;
+ struct journal_s *s_journal;
struct list_head s_orphan;
unsigned long s_commit_interval;
struct block_device *journal_bdev;
@@ -97,21 +99,18 @@ struct ext4_sb_info {
struct inode *s_buddy_cache;
long s_blocks_reserved;
spinlock_t s_reserve_lock;
- struct list_head s_active_transaction;
- struct list_head s_closed_transaction;
- struct list_head s_committed_transaction;
spinlock_t s_md_lock;
tid_t s_last_transaction;
unsigned short *s_mb_offsets, *s_mb_maxs;
/* tunables */
unsigned long s_stripe;
- unsigned long s_mb_stream_request;
- unsigned long s_mb_max_to_scan;
- unsigned long s_mb_min_to_scan;
- unsigned long s_mb_stats;
- unsigned long s_mb_order2_reqs;
- unsigned long s_mb_group_prealloc;
+ unsigned int s_mb_stream_request;
+ unsigned int s_mb_max_to_scan;
+ unsigned int s_mb_min_to_scan;
+ unsigned int s_mb_stats;
+ unsigned int s_mb_order2_reqs;
+ unsigned int s_mb_group_prealloc;
/* where last allocation was done - for stream allocation */
unsigned long s_mb_last_group;
unsigned long s_mb_last_start;
@@ -121,7 +120,6 @@ struct ext4_sb_info {
int s_mb_history_cur;
int s_mb_history_max;
int s_mb_history_num;
- struct proc_dir_entry *s_mb_proc;
spinlock_t s_mb_history_lock;
int s_mb_history_filter;
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index b24d3c53f20..ea2ce3c0ae6 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -40,6 +40,7 @@
#include <linux/slab.h>
#include <linux/falloc.h>
#include <asm/uaccess.h>
+#include <linux/fiemap.h>
#include "ext4_jbd2.h"
#include "ext4_extents.h"
@@ -383,8 +384,8 @@ static void ext4_ext_show_leaf(struct inode *inode, struct ext4_ext_path *path)
ext_debug("\n");
}
#else
-#define ext4_ext_show_path(inode,path)
-#define ext4_ext_show_leaf(inode,path)
+#define ext4_ext_show_path(inode, path)
+#define ext4_ext_show_leaf(inode, path)
#endif
void ext4_ext_drop_refs(struct ext4_ext_path *path)
@@ -440,9 +441,10 @@ ext4_ext_binsearch_idx(struct inode *inode,
for (k = 0; k < le16_to_cpu(eh->eh_entries); k++, ix++) {
if (k != 0 &&
le32_to_cpu(ix->ei_block) <= le32_to_cpu(ix[-1].ei_block)) {
- printk("k=%d, ix=0x%p, first=0x%p\n", k,
- ix, EXT_FIRST_INDEX(eh));
- printk("%u <= %u\n",
+ printk(KERN_DEBUG "k=%d, ix=0x%p, "
+ "first=0x%p\n", k,
+ ix, EXT_FIRST_INDEX(eh));
+ printk(KERN_DEBUG "%u <= %u\n",
le32_to_cpu(ix->ei_block),
le32_to_cpu(ix[-1].ei_block));
}
@@ -1475,7 +1477,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
struct ext4_ext_path *path,
struct ext4_extent *newext)
{
- struct ext4_extent_header * eh;
+ struct ext4_extent_header *eh;
struct ext4_extent *ex, *fex;
struct ext4_extent *nearex; /* nearest extent */
struct ext4_ext_path *npath = NULL;
@@ -1625,6 +1627,113 @@ cleanup:
return err;
}
+int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block,
+ ext4_lblk_t num, ext_prepare_callback func,
+ void *cbdata)
+{
+ struct ext4_ext_path *path = NULL;
+ struct ext4_ext_cache cbex;
+ struct ext4_extent *ex;
+ ext4_lblk_t next, start = 0, end = 0;
+ ext4_lblk_t last = block + num;
+ int depth, exists, err = 0;
+
+ BUG_ON(func == NULL);
+ BUG_ON(inode == NULL);
+
+ while (block < last && block != EXT_MAX_BLOCK) {
+ num = last - block;
+ /* find extent for this block */
+ path = ext4_ext_find_extent(inode, block, path);
+ if (IS_ERR(path)) {
+ err = PTR_ERR(path);
+ path = NULL;
+ break;
+ }
+
+ depth = ext_depth(inode);
+ BUG_ON(path[depth].p_hdr == NULL);
+ ex = path[depth].p_ext;
+ next = ext4_ext_next_allocated_block(path);
+
+ exists = 0;
+ if (!ex) {
+ /* there is no extent yet, so try to allocate
+ * all requested space */
+ start = block;
+ end = block + num;
+ } else if (le32_to_cpu(ex->ee_block) > block) {
+ /* need to allocate space before found extent */
+ start = block;
+ end = le32_to_cpu(ex->ee_block);
+ if (block + num < end)
+ end = block + num;
+ } else if (block >= le32_to_cpu(ex->ee_block)
+ + ext4_ext_get_actual_len(ex)) {
+ /* need to allocate space after found extent */
+ start = block;
+ end = block + num;
+ if (end >= next)
+ end = next;
+ } else if (block >= le32_to_cpu(ex->ee_block)) {
+ /*
+ * some part of requested space is covered
+ * by found extent
+ */
+ start = block;
+ end = le32_to_cpu(ex->ee_block)
+ + ext4_ext_get_actual_len(ex);
+ if (block + num < end)
+ end = block + num;
+ exists = 1;
+ } else {
+ BUG();
+ }
+ BUG_ON(end <= start);
+
+ if (!exists) {
+ cbex.ec_block = start;
+ cbex.ec_len = end - start;
+ cbex.ec_start = 0;
+ cbex.ec_type = EXT4_EXT_CACHE_GAP;
+ } else {
+ cbex.ec_block = le32_to_cpu(ex->ee_block);
+ cbex.ec_len = ext4_ext_get_actual_len(ex);
+ cbex.ec_start = ext_pblock(ex);
+ cbex.ec_type = EXT4_EXT_CACHE_EXTENT;
+ }
+
+ BUG_ON(cbex.ec_len == 0);
+ err = func(inode, path, &cbex, ex, cbdata);
+ ext4_ext_drop_refs(path);
+
+ if (err < 0)
+ break;
+
+ if (err == EXT_REPEAT)
+ continue;
+ else if (err == EXT_BREAK) {
+ err = 0;
+ break;
+ }
+
+ if (ext_depth(inode) != depth) {
+ /* depth was changed. we have to realloc path */
+ kfree(path);
+ path = NULL;
+ }
+
+ block = cbex.ec_block + cbex.ec_len;
+ }
+
+ if (path) {
+ ext4_ext_drop_refs(path);
+ kfree(path);
+ }
+
+ return err;
+}
+
static void
ext4_ext_put_in_cache(struct inode *inode, ext4_lblk_t block,
__u32 len, ext4_fsblk_t start, int type)
@@ -2142,7 +2251,7 @@ void ext4_ext_init(struct super_block *sb)
*/
if (test_opt(sb, EXTENTS)) {
- printk("EXT4-fs: file extents enabled");
+ printk(KERN_INFO "EXT4-fs: file extents enabled");
#ifdef AGGRESSIVE_TEST
printk(", aggressive tests");
#endif
@@ -2696,11 +2805,8 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
goto out2;
}
/*
- * Okay, we need to do block allocation. Lazily initialize the block
- * allocation info here if necessary.
+ * Okay, we need to do block allocation.
*/
- if (S_ISREG(inode->i_mode) && (!EXT4_I(inode)->i_block_alloc_info))
- ext4_init_block_alloc_info(inode);
/* find neighbour allocated blocks */
ar.lleft = iblock;
@@ -2760,7 +2866,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
/* free data blocks we just allocated */
/* not a good idea to call discard here directly,
* but otherwise we'd need to call it every free() */
- ext4_mb_discard_inode_preallocations(inode);
+ ext4_discard_preallocations(inode);
ext4_free_blocks(handle, inode, ext_pblock(&newex),
ext4_ext_get_actual_len(&newex), 0);
goto out2;
@@ -2824,7 +2930,7 @@ void ext4_ext_truncate(struct inode *inode)
down_write(&EXT4_I(inode)->i_data_sem);
ext4_ext_invalidate_cache(inode);
- ext4_discard_reservation(inode);
+ ext4_discard_preallocations(inode);
/*
* TODO: optimization is possible here.
@@ -2877,10 +2983,11 @@ static void ext4_falloc_update_inode(struct inode *inode,
* Update only when preallocation was requested beyond
* the file size.
*/
- if (!(mode & FALLOC_FL_KEEP_SIZE) &&
- new_size > i_size_read(inode)) {
- i_size_write(inode, new_size);
- EXT4_I(inode)->i_disksize = new_size;
+ if (!(mode & FALLOC_FL_KEEP_SIZE)) {
+ if (new_size > i_size_read(inode))
+ i_size_write(inode, new_size);
+ if (new_size > EXT4_I(inode)->i_disksize)
+ ext4_update_i_disksize(inode, new_size);
}
}
@@ -2972,3 +3079,143 @@ retry:
mutex_unlock(&inode->i_mutex);
return ret > 0 ? ret2 : ret;
}
+
+/*
+ * Callback function called for each extent to gather FIEMAP information.
+ */
+int ext4_ext_fiemap_cb(struct inode *inode, struct ext4_ext_path *path,
+ struct ext4_ext_cache *newex, struct ext4_extent *ex,
+ void *data)
+{
+ struct fiemap_extent_info *fieinfo = data;
+ unsigned long blksize_bits = inode->i_sb->s_blocksize_bits;
+ __u64 logical;
+ __u64 physical;
+ __u64 length;
+ __u32 flags = 0;
+ int error;
+
+ logical = (__u64)newex->ec_block << blksize_bits;
+
+ if (newex->ec_type == EXT4_EXT_CACHE_GAP) {
+ pgoff_t offset;
+ struct page *page;
+ struct buffer_head *bh = NULL;
+
+ offset = logical >> PAGE_SHIFT;
+ page = find_get_page(inode->i_mapping, offset);
+ if (!page || !page_has_buffers(page))
+ return EXT_CONTINUE;
+
+ bh = page_buffers(page);
+
+ if (!bh)
+ return EXT_CONTINUE;
+
+ if (buffer_delay(bh)) {
+ flags |= FIEMAP_EXTENT_DELALLOC;
+ page_cache_release(page);
+ } else {
+ page_cache_release(page);
+ return EXT_CONTINUE;
+ }
+ }
+
+ physical = (__u64)newex->ec_start << blksize_bits;
+ length = (__u64)newex->ec_len << blksize_bits;
+
+ if (ex && ext4_ext_is_uninitialized(ex))
+ flags |= FIEMAP_EXTENT_UNWRITTEN;
+
+ /*
+ * If this extent reaches EXT_MAX_BLOCK, it must be last.
+ *
+ * Or if ext4_ext_next_allocated_block is EXT_MAX_BLOCK,
+ * this also indicates no more allocated blocks.
+ *
+ * XXX this might miss a single-block extent at EXT_MAX_BLOCK
+ */
+ if (logical + length - 1 == EXT_MAX_BLOCK ||
+ ext4_ext_next_allocated_block(path) == EXT_MAX_BLOCK)
+ flags |= FIEMAP_EXTENT_LAST;
+
+ error = fiemap_fill_next_extent(fieinfo, logical, physical,
+ length, flags);
+ if (error < 0)
+ return error;
+ if (error == 1)
+ return EXT_BREAK;
+
+ return EXT_CONTINUE;
+}
+
+/* fiemap flags we can handle specified here */
+#define EXT4_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR)
+
+int ext4_xattr_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo)
+{
+ __u64 physical = 0;
+ __u64 length;
+ __u32 flags = FIEMAP_EXTENT_LAST;
+ int blockbits = inode->i_sb->s_blocksize_bits;
+ int error = 0;
+
+ /* in-inode? */
+ if (EXT4_I(inode)->i_state & EXT4_STATE_XATTR) {
+ struct ext4_iloc iloc;
+ int offset; /* offset of xattr in inode */
+
+ error = ext4_get_inode_loc(inode, &iloc);
+ if (error)
+ return error;
+ physical = iloc.bh->b_blocknr << blockbits;
+ offset = EXT4_GOOD_OLD_INODE_SIZE +
+ EXT4_I(inode)->i_extra_isize;
+ physical += offset;
+ length = EXT4_SB(inode->i_sb)->s_inode_size - offset;
+ flags |= FIEMAP_EXTENT_DATA_INLINE;
+ } else { /* external block */
+ physical = EXT4_I(inode)->i_file_acl << blockbits;
+ length = inode->i_sb->s_blocksize;
+ }
+
+ if (physical)
+ error = fiemap_fill_next_extent(fieinfo, 0, physical,
+ length, flags);
+ return (error < 0 ? error : 0);
+}
+
+int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
+ __u64 start, __u64 len)
+{
+ ext4_lblk_t start_blk;
+ ext4_lblk_t len_blks;
+ int error = 0;
+
+ /* fallback to generic here if not in extents fmt */
+ if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL))
+ return generic_block_fiemap(inode, fieinfo, start, len,
+ ext4_get_block);
+
+ if (fiemap_check_flags(fieinfo, EXT4_FIEMAP_FLAGS))
+ return -EBADR;
+
+ if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) {
+ error = ext4_xattr_fiemap(inode, fieinfo);
+ } else {
+ start_blk = start >> inode->i_sb->s_blocksize_bits;
+ len_blks = len >> inode->i_sb->s_blocksize_bits;
+
+ /*
+ * Walk the extent tree gathering extent information.
+ * ext4_ext_fiemap_cb will push extents back to user.
+ */
+ down_write(&EXT4_I(inode)->i_data_sem);
+ error = ext4_ext_walk_space(inode, start_blk, len_blks,
+ ext4_ext_fiemap_cb, fieinfo);
+ up_write(&EXT4_I(inode)->i_data_sem);
+ }
+
+ return error;
+}
+
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 430eb7978db..6bd11fba71f 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -31,14 +31,14 @@
* from ext4_file_open: open gets called at every open, but release
* gets called only when /all/ the files are closed.
*/
-static int ext4_release_file (struct inode * inode, struct file * filp)
+static int ext4_release_file(struct inode *inode, struct file *filp)
{
/* if we are the last writer on the inode, drop the block reservation */
if ((filp->f_mode & FMODE_WRITE) &&
(atomic_read(&inode->i_writecount) == 1))
{
down_write(&EXT4_I(inode)->i_data_sem);
- ext4_discard_reservation(inode);
+ ext4_discard_preallocations(inode);
up_write(&EXT4_I(inode)->i_data_sem);
}
if (is_dx(inode) && filp->private_data)
@@ -140,6 +140,9 @@ static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma)
return 0;
}
+extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
+ __u64 start, __u64 len);
+
const struct file_operations ext4_file_operations = {
.llseek = generic_file_llseek,
.read = do_sync_read,
@@ -162,7 +165,7 @@ const struct inode_operations ext4_file_inode_operations = {
.truncate = ext4_truncate,
.setattr = ext4_setattr,
.getattr = ext4_getattr,
-#ifdef CONFIG_EXT4DEV_FS_XATTR
+#ifdef CONFIG_EXT4_FS_XATTR
.setxattr = generic_setxattr,
.getxattr = generic_getxattr,
.listxattr = ext4_listxattr,
@@ -170,5 +173,6 @@ const struct inode_operations ext4_file_inode_operations = {
#endif
.permission = ext4_permission,
.fallocate = ext4_fallocate,
+ .fiemap = ext4_fiemap,
};
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index a45c3737ad3..5afe4370840 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -28,6 +28,7 @@
#include <linux/writeback.h>
#include <linux/jbd2.h>
#include <linux/blkdev.h>
+#include <linux/marker.h>
#include "ext4.h"
#include "ext4_jbd2.h"
@@ -43,7 +44,7 @@
* inode to disk.
*/
-int ext4_sync_file(struct file * file, struct dentry *dentry, int datasync)
+int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync)
{
struct inode *inode = dentry->d_inode;
journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
@@ -51,6 +52,10 @@ int ext4_sync_file(struct file * file, struct dentry *dentry, int datasync)
J_ASSERT(ext4_journal_current_handle() == NULL);
+ trace_mark(ext4_sync_file, "dev %s datasync %d ino %ld parent %ld",
+ inode->i_sb->s_id, datasync, inode->i_ino,
+ dentry->d_parent->d_inode->i_ino);
+
/*
* data=writeback:
* The caller's filemap_fdatawrite()/wait will sync the data.
diff --git a/fs/ext4/hash.c b/fs/ext4/hash.c
index 1d6329dbe39..556ca8eba3d 100644
--- a/fs/ext4/hash.c
+++ b/fs/ext4/hash.c
@@ -27,7 +27,7 @@ static void TEA_transform(__u32 buf[4], __u32 const in[])
sum += DELTA;
b0 += ((b1 << 4)+a) ^ (b1+sum) ^ ((b1 >> 5)+b);
b1 += ((b0 << 4)+c) ^ (b0+sum) ^ ((b0 >> 5)+d);
- } while(--n);
+ } while (--n);
buf[0] += b0;
buf[1] += b1;
@@ -35,7 +35,7 @@ static void TEA_transform(__u32 buf[4], __u32 const in[])
/* The old legacy hash */
-static __u32 dx_hack_hash (const char *name, int len)
+static __u32 dx_hack_hash(const char *name, int len)
{
__u32 hash0 = 0x12a3fe2d, hash1 = 0x37abe8f9;
while (len--) {
@@ -59,7 +59,7 @@ static void str2hashbuf(const char *msg, int len, __u32 *buf, int num)
val = pad;
if (len > num*4)
len = num * 4;
- for (i=0; i < len; i++) {
+ for (i = 0; i < len; i++) {
if ((i % 4) == 0)
val = pad;
val = msg[i] + (val << 8);
@@ -104,7 +104,7 @@ int ext4fs_dirhash(const char *name, int len, struct dx_hash_info *hinfo)
/* Check to see if the seed is all zero's */
if (hinfo->seed) {
- for (i=0; i < 4; i++) {
+ for (i = 0; i < 4; i++) {
if (hinfo->seed[i])
break;
}
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index f344834bbf5..fe34d74cfb1 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -115,9 +115,11 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
block_group, bitmap_blk);
return NULL;
}
- if (bh_uptodate_or_lock(bh))
+ if (buffer_uptodate(bh) &&
+ !(desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)))
return bh;
+ lock_buffer(bh);
spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group));
if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
ext4_init_inode_bitmap(sb, bh, block_group, desc);
@@ -154,39 +156,40 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
* though), and then we'd have two inodes sharing the
* same inode number and space on the harddisk.
*/
-void ext4_free_inode (handle_t *handle, struct inode * inode)
+void ext4_free_inode(handle_t *handle, struct inode *inode)
{
- struct super_block * sb = inode->i_sb;
+ struct super_block *sb = inode->i_sb;
int is_directory;
unsigned long ino;
struct buffer_head *bitmap_bh = NULL;
struct buffer_head *bh2;
ext4_group_t block_group;
unsigned long bit;
- struct ext4_group_desc * gdp;
- struct ext4_super_block * es;
+ struct ext4_group_desc *gdp;
+ struct ext4_super_block *es;
struct ext4_sb_info *sbi;
int fatal = 0, err;
ext4_group_t flex_group;
if (atomic_read(&inode->i_count) > 1) {
- printk ("ext4_free_inode: inode has count=%d\n",
- atomic_read(&inode->i_count));
+ printk(KERN_ERR "ext4_free_inode: inode has count=%d\n",
+ atomic_read(&inode->i_count));
return;
}
if (inode->i_nlink) {
- printk ("ext4_free_inode: inode has nlink=%d\n",
- inode->i_nlink);
+ printk(KERN_ERR "ext4_free_inode: inode has nlink=%d\n",
+ inode->i_nlink);
return;
}
if (!sb) {
- printk("ext4_free_inode: inode on nonexistent device\n");
+ printk(KERN_ERR "ext4_free_inode: inode on "
+ "nonexistent device\n");
return;
}
sbi = EXT4_SB(sb);
ino = inode->i_ino;
- ext4_debug ("freeing inode %lu\n", ino);
+ ext4_debug("freeing inode %lu\n", ino);
/*
* Note: we must free any quota before locking the superblock,
@@ -200,12 +203,12 @@ void ext4_free_inode (handle_t *handle, struct inode * inode)
is_directory = S_ISDIR(inode->i_mode);
/* Do this BEFORE marking the inode not in use or returning an error */
- clear_inode (inode);
+ clear_inode(inode);
es = EXT4_SB(sb)->s_es;
if (ino < EXT4_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) {
- ext4_error (sb, "ext4_free_inode",
- "reserved or nonexistent inode %lu", ino);
+ ext4_error(sb, "ext4_free_inode",
+ "reserved or nonexistent inode %lu", ino);
goto error_return;
}
block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb);
@@ -222,10 +225,10 @@ void ext4_free_inode (handle_t *handle, struct inode * inode)
/* Ok, now we can actually update the inode bitmaps.. */
if (!ext4_clear_bit_atomic(sb_bgl_lock(sbi, block_group),
bit, bitmap_bh->b_data))
- ext4_error (sb, "ext4_free_inode",
- "bit already cleared for inode %lu", ino);
+ ext4_error(sb, "ext4_free_inode",
+ "bit already cleared for inode %lu", ino);
else {
- gdp = ext4_get_group_desc (sb, block_group, &bh2);
+ gdp = ext4_get_group_desc(sb, block_group, &bh2);
BUFFER_TRACE(bh2, "get_write_access");
fatal = ext4_journal_get_write_access(handle, bh2);
@@ -287,7 +290,7 @@ static int find_group_dir(struct super_block *sb, struct inode *parent,
avefreei = freei / ngroups;
for (group = 0; group < ngroups; group++) {
- desc = ext4_get_group_desc (sb, group, NULL);
+ desc = ext4_get_group_desc(sb, group, NULL);
if (!desc || !desc->bg_free_inodes_count)
continue;
if (le16_to_cpu(desc->bg_free_inodes_count) < avefreei)
@@ -576,16 +579,16 @@ static int find_group_other(struct super_block *sb, struct inode *parent,
* For other inodes, search forward from the parent directory's block
* group to find a free inode.
*/
-struct inode *ext4_new_inode(handle_t *handle, struct inode * dir, int mode)
+struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode)
{
struct super_block *sb;
struct buffer_head *bitmap_bh = NULL;
struct buffer_head *bh2;
ext4_group_t group = 0;
unsigned long ino = 0;
- struct inode * inode;
- struct ext4_group_desc * gdp = NULL;
- struct ext4_super_block * es;
+ struct inode *inode;
+ struct ext4_group_desc *gdp = NULL;
+ struct ext4_super_block *es;
struct ext4_inode_info *ei;
struct ext4_sb_info *sbi;
int ret2, err = 0;
@@ -613,7 +616,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode * dir, int mode)
}
if (S_ISDIR(mode)) {
- if (test_opt (sb, OLDALLOC))
+ if (test_opt(sb, OLDALLOC))
ret2 = find_group_dir(sb, dir, &group);
else
ret2 = find_group_orlov(sb, dir, &group);
@@ -783,7 +786,7 @@ got:
}
inode->i_uid = current->fsuid;
- if (test_opt (sb, GRPID))
+ if (test_opt(sb, GRPID))
inode->i_gid = dir->i_gid;
else if (dir->i_mode & S_ISGID) {
inode->i_gid = dir->i_gid;
@@ -816,7 +819,6 @@ got:
ei->i_flags &= ~EXT4_DIRSYNC_FL;
ei->i_file_acl = 0;
ei->i_dtime = 0;
- ei->i_block_alloc_info = NULL;
ei->i_block_group = group;
ext4_set_inode_flags(inode);
@@ -832,7 +834,7 @@ got:
ei->i_extra_isize = EXT4_SB(sb)->s_want_extra_isize;
ret = inode;
- if(DQUOT_ALLOC_INODE(inode)) {
+ if (DQUOT_ALLOC_INODE(inode)) {
err = -EDQUOT;
goto fail_drop;
}
@@ -841,7 +843,7 @@ got:
if (err)
goto fail_free_drop;
- err = ext4_init_security(handle,inode, dir);
+ err = ext4_init_security(handle, inode, dir);
if (err)
goto fail_free_drop;
@@ -959,7 +961,7 @@ error:
return ERR_PTR(err);
}
-unsigned long ext4_count_free_inodes (struct super_block * sb)
+unsigned long ext4_count_free_inodes(struct super_block *sb)
{
unsigned long desc_count;
struct ext4_group_desc *gdp;
@@ -974,7 +976,7 @@ unsigned long ext4_count_free_inodes (struct super_block * sb)
bitmap_count = 0;
gdp = NULL;
for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) {
- gdp = ext4_get_group_desc (sb, i, NULL);
+ gdp = ext4_get_group_desc(sb, i, NULL);
if (!gdp)
continue;
desc_count += le16_to_cpu(gdp->bg_free_inodes_count);
@@ -989,13 +991,14 @@ unsigned long ext4_count_free_inodes (struct super_block * sb)
bitmap_count += x;
}
brelse(bitmap_bh);
- printk("ext4_count_free_inodes: stored = %u, computed = %lu, %lu\n",
- le32_to_cpu(es->s_free_inodes_count), desc_count, bitmap_count);
+ printk(KERN_DEBUG "ext4_count_free_inodes: "
+ "stored = %u, computed = %lu, %lu\n",
+ le32_to_cpu(es->s_free_inodes_count), desc_count, bitmap_count);
return desc_count;
#else
desc_count = 0;
for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) {
- gdp = ext4_get_group_desc (sb, i, NULL);
+ gdp = ext4_get_group_desc(sb, i, NULL);
if (!gdp)
continue;
desc_count += le16_to_cpu(gdp->bg_free_inodes_count);
@@ -1006,13 +1009,13 @@ unsigned long ext4_count_free_inodes (struct super_block * sb)
}
/* Called at mount-time, super-block is locked */
-unsigned long ext4_count_dirs (struct super_block * sb)
+unsigned long ext4_count_dirs(struct super_block * sb)
{
unsigned long count = 0;
ext4_group_t i;
for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) {
- struct ext4_group_desc *gdp = ext4_get_group_desc (sb, i, NULL);
+ struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL);
if (!gdp)
continue;
count += le16_to_cpu(gdp->bg_used_dirs_count);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 7e91913e325..8dbf6953845 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -190,7 +190,7 @@ static int ext4_journal_test_restart(handle_t *handle, struct inode *inode)
/*
* Called at the last iput() if i_nlink is zero.
*/
-void ext4_delete_inode (struct inode * inode)
+void ext4_delete_inode(struct inode *inode)
{
handle_t *handle;
int err;
@@ -330,11 +330,11 @@ static int ext4_block_to_path(struct inode *inode,
int final = 0;
if (i_block < 0) {
- ext4_warning (inode->i_sb, "ext4_block_to_path", "block < 0");
+ ext4_warning(inode->i_sb, "ext4_block_to_path", "block < 0");
} else if (i_block < direct_blocks) {
offsets[n++] = i_block;
final = direct_blocks;
- } else if ( (i_block -= direct_blocks) < indirect_blocks) {
+ } else if ((i_block -= direct_blocks) < indirect_blocks) {
offsets[n++] = EXT4_IND_BLOCK;
offsets[n++] = i_block;
final = ptrs;
@@ -400,14 +400,14 @@ static Indirect *ext4_get_branch(struct inode *inode, int depth,
*err = 0;
/* i_data is not going away, no lock needed */
- add_chain (chain, NULL, EXT4_I(inode)->i_data + *offsets);
+ add_chain(chain, NULL, EXT4_I(inode)->i_data + *offsets);
if (!p->key)
goto no_block;
while (--depth) {
bh = sb_bread(sb, le32_to_cpu(p->key));
if (!bh)
goto failure;
- add_chain(++p, bh, (__le32*)bh->b_data + *++offsets);
+ add_chain(++p, bh, (__le32 *)bh->b_data + *++offsets);
/* Reader: end */
if (!p->key)
goto no_block;
@@ -443,7 +443,7 @@ no_block:
static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind)
{
struct ext4_inode_info *ei = EXT4_I(inode);
- __le32 *start = ind->bh ? (__le32*) ind->bh->b_data : ei->i_data;
+ __le32 *start = ind->bh ? (__le32 *) ind->bh->b_data : ei->i_data;
__le32 *p;
ext4_fsblk_t bg_start;
ext4_fsblk_t last_block;
@@ -486,18 +486,9 @@ static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind)
static ext4_fsblk_t ext4_find_goal(struct inode *inode, ext4_lblk_t block,
Indirect *partial)
{
- struct ext4_block_alloc_info *block_i;
-
- block_i = EXT4_I(inode)->i_block_alloc_info;
-
/*
- * try the heuristic for sequential allocation,
- * failing that at least try to get decent locality.
+ * XXX need to get goal block from mballoc's data structures
*/
- if (block_i && (block == block_i->last_alloc_logical_block + 1)
- && (block_i->last_alloc_physical_block != 0)) {
- return block_i->last_alloc_physical_block + 1;
- }
return ext4_find_near(inode, partial);
}
@@ -630,7 +621,7 @@ allocated:
*err = 0;
return ret;
failed_out:
- for (i = 0; i <index; i++)
+ for (i = 0; i < index; i++)
ext4_free_blocks(handle, inode, new_blocks[i], 1, 0);
return ret;
}
@@ -703,7 +694,7 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
branch[n].p = (__le32 *) bh->b_data + offsets[n];
branch[n].key = cpu_to_le32(new_blocks[n]);
*branch[n].p = branch[n].key;
- if ( n == indirect_blks) {
+ if (n == indirect_blks) {
current_block = new_blocks[n];
/*
* End of chain, update the last new metablock of
@@ -730,7 +721,7 @@ failed:
BUFFER_TRACE(branch[i].bh, "call jbd2_journal_forget");
ext4_journal_forget(handle, branch[i].bh);
}
- for (i = 0; i <indirect_blks; i++)
+ for (i = 0; i < indirect_blks; i++)
ext4_free_blocks(handle, inode, new_blocks[i], 1, 0);
ext4_free_blocks(handle, inode, new_blocks[i], num, 0);
@@ -757,10 +748,8 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode,
{
int i;
int err = 0;
- struct ext4_block_alloc_info *block_i;
ext4_fsblk_t current_block;
- block_i = EXT4_I(inode)->i_block_alloc_info;
/*
* If we're splicing into a [td]indirect block (as opposed to the
* inode) then we need to get write access to the [td]indirect block
@@ -783,18 +772,7 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode,
if (num == 0 && blks > 1) {
current_block = le32_to_cpu(where->key) + 1;
for (i = 1; i < blks; i++)
- *(where->p + i ) = cpu_to_le32(current_block++);
- }
-
- /*
- * update the most recently allocated logical & physical block
- * in i_block_alloc_info, to assist find the proper goal block for next
- * allocation
- */
- if (block_i) {
- block_i->last_alloc_logical_block = block + blks - 1;
- block_i->last_alloc_physical_block =
- le32_to_cpu(where[num].key) + blks - 1;
+ *(where->p + i) = cpu_to_le32(current_block++);
}
/* We are done with atomic stuff, now do the rest of housekeeping */
@@ -914,12 +892,8 @@ int ext4_get_blocks_handle(handle_t *handle, struct inode *inode,
goto cleanup;
/*
- * Okay, we need to do block allocation. Lazily initialize the block
- * allocation info here if necessary
+ * Okay, we need to do block allocation.
*/
- if (S_ISREG(inode->i_mode) && (!ei->i_block_alloc_info))
- ext4_init_block_alloc_info(inode);
-
goal = ext4_find_goal(inode, iblock, partial);
/* the number of blocks need to allocate for [d,t]indirect blocks */
@@ -1030,19 +1004,20 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used)
BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks);
mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb;
- /* Account for allocated meta_blocks */
- mdb_free -= EXT4_I(inode)->i_allocated_meta_blocks;
+ if (mdb_free) {
+ /* Account for allocated meta_blocks */
+ mdb_free -= EXT4_I(inode)->i_allocated_meta_blocks;
- /* update fs free blocks counter for truncate case */
- percpu_counter_add(&sbi->s_freeblocks_counter, mdb_free);
+ /* update fs dirty blocks counter */
+ percpu_counter_sub(&sbi->s_dirtyblocks_counter, mdb_free);
+ EXT4_I(inode)->i_allocated_meta_blocks = 0;
+ EXT4_I(inode)->i_reserved_meta_blocks = mdb;
+ }
/* update per-inode reservations */
BUG_ON(used > EXT4_I(inode)->i_reserved_data_blocks);
EXT4_I(inode)->i_reserved_data_blocks -= used;
- BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks);
- EXT4_I(inode)->i_reserved_meta_blocks = mdb;
- EXT4_I(inode)->i_allocated_meta_blocks = 0;
spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
}
@@ -1160,8 +1135,8 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block,
/* Maximum number of blocks we map for direct IO at once. */
#define DIO_MAX_BLOCKS 4096
-static int ext4_get_block(struct inode *inode, sector_t iblock,
- struct buffer_head *bh_result, int create)
+int ext4_get_block(struct inode *inode, sector_t iblock,
+ struct buffer_head *bh_result, int create)
{
handle_t *handle = ext4_journal_current_handle();
int ret = 0, started = 0;
@@ -1241,7 +1216,7 @@ struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode,
BUFFER_TRACE(bh, "call get_create_access");
fatal = ext4_journal_get_create_access(handle, bh);
if (!fatal && !buffer_uptodate(bh)) {
- memset(bh->b_data,0,inode->i_sb->s_blocksize);
+ memset(bh->b_data, 0, inode->i_sb->s_blocksize);
set_buffer_uptodate(bh);
}
unlock_buffer(bh);
@@ -1266,7 +1241,7 @@ err:
struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode,
ext4_lblk_t block, int create, int *err)
{
- struct buffer_head * bh;
+ struct buffer_head *bh;
bh = ext4_getblk(handle, inode, block, create, err);
if (!bh)
@@ -1282,13 +1257,13 @@ struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode,
return NULL;
}
-static int walk_page_buffers( handle_t *handle,
- struct buffer_head *head,
- unsigned from,
- unsigned to,
- int *partial,
- int (*fn)( handle_t *handle,
- struct buffer_head *bh))
+static int walk_page_buffers(handle_t *handle,
+ struct buffer_head *head,
+ unsigned from,
+ unsigned to,
+ int *partial,
+ int (*fn)(handle_t *handle,
+ struct buffer_head *bh))
{
struct buffer_head *bh;
unsigned block_start, block_end;
@@ -1296,9 +1271,9 @@ static int walk_page_buffers( handle_t *handle,
int err, ret = 0;
struct buffer_head *next;
- for ( bh = head, block_start = 0;
- ret == 0 && (bh != head || !block_start);
- block_start = block_end, bh = next)
+ for (bh = head, block_start = 0;
+ ret == 0 && (bh != head || !block_start);
+ block_start = block_end, bh = next)
{
next = bh->b_this_page;
block_end = block_start + blocksize;
@@ -1351,23 +1326,23 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags,
struct page **pagep, void **fsdata)
{
- struct inode *inode = mapping->host;
+ struct inode *inode = mapping->host;
int ret, needed_blocks = ext4_writepage_trans_blocks(inode);
handle_t *handle;
int retries = 0;
- struct page *page;
+ struct page *page;
pgoff_t index;
- unsigned from, to;
+ unsigned from, to;
index = pos >> PAGE_CACHE_SHIFT;
- from = pos & (PAGE_CACHE_SIZE - 1);
- to = from + len;
+ from = pos & (PAGE_CACHE_SIZE - 1);
+ to = from + len;
retry:
- handle = ext4_journal_start(inode, needed_blocks);
- if (IS_ERR(handle)) {
- ret = PTR_ERR(handle);
- goto out;
+ handle = ext4_journal_start(inode, needed_blocks);
+ if (IS_ERR(handle)) {
+ ret = PTR_ERR(handle);
+ goto out;
}
page = __grab_cache_page(mapping, index);
@@ -1387,9 +1362,16 @@ retry:
}
if (ret) {
- unlock_page(page);
+ unlock_page(page);
ext4_journal_stop(handle);
- page_cache_release(page);
+ page_cache_release(page);
+ /*
+ * block_write_begin may have instantiated a few blocks
+ * outside i_size. Trim these off again. Don't need
+ * i_size_read because we hold i_mutex.
+ */
+ if (pos + len > inode->i_size)
+ vmtruncate(inode, inode->i_size);
}
if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
@@ -1426,16 +1408,18 @@ static int ext4_ordered_write_end(struct file *file,
ret = ext4_jbd2_file_inode(handle, inode);
if (ret == 0) {
- /*
- * generic_write_end() will run mark_inode_dirty() if i_size
- * changes. So let's piggyback the i_disksize mark_inode_dirty
- * into that.
- */
loff_t new_i_size;
new_i_size = pos + copied;
- if (new_i_size > EXT4_I(inode)->i_disksize)
- EXT4_I(inode)->i_disksize = new_i_size;
+ if (new_i_size > EXT4_I(inode)->i_disksize) {
+ ext4_update_i_disksize(inode, new_i_size);
+ /* We need to mark inode dirty even if
+ * new_i_size is less that inode->i_size
+ * bu greater than i_disksize.(hint delalloc)
+ */
+ ext4_mark_inode_dirty(handle, inode);
+ }
+
ret2 = generic_write_end(file, mapping, pos, len, copied,
page, fsdata);
copied = ret2;
@@ -1460,8 +1444,14 @@ static int ext4_writeback_write_end(struct file *file,
loff_t new_i_size;
new_i_size = pos + copied;
- if (new_i_size > EXT4_I(inode)->i_disksize)
- EXT4_I(inode)->i_disksize = new_i_size;
+ if (new_i_size > EXT4_I(inode)->i_disksize) {
+ ext4_update_i_disksize(inode, new_i_size);
+ /* We need to mark inode dirty even if
+ * new_i_size is less that inode->i_size
+ * bu greater than i_disksize.(hint delalloc)
+ */
+ ext4_mark_inode_dirty(handle, inode);
+ }
ret2 = generic_write_end(file, mapping, pos, len, copied,
page, fsdata);
@@ -1486,6 +1476,7 @@ static int ext4_journalled_write_end(struct file *file,
int ret = 0, ret2;
int partial = 0;
unsigned from, to;
+ loff_t new_i_size;
from = pos & (PAGE_CACHE_SIZE - 1);
to = from + len;
@@ -1500,11 +1491,12 @@ static int ext4_journalled_write_end(struct file *file,
to, &partial, write_end_fn);
if (!partial)
SetPageUptodate(page);
- if (pos+copied > inode->i_size)
+ new_i_size = pos + copied;
+ if (new_i_size > inode->i_size)
i_size_write(inode, pos+copied);
EXT4_I(inode)->i_state |= EXT4_STATE_JDATA;
- if (inode->i_size > EXT4_I(inode)->i_disksize) {
- EXT4_I(inode)->i_disksize = inode->i_size;
+ if (new_i_size > EXT4_I(inode)->i_disksize) {
+ ext4_update_i_disksize(inode, new_i_size);
ret2 = ext4_mark_inode_dirty(handle, inode);
if (!ret)
ret = ret2;
@@ -1521,6 +1513,7 @@ static int ext4_journalled_write_end(struct file *file,
static int ext4_da_reserve_space(struct inode *inode, int nrblocks)
{
+ int retries = 0;
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
unsigned long md_needed, mdblocks, total = 0;
@@ -1529,6 +1522,7 @@ static int ext4_da_reserve_space(struct inode *inode, int nrblocks)
* in order to allocate nrblocks
* worse case is one extent per block
*/
+repeat:
spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
total = EXT4_I(inode)->i_reserved_data_blocks + nrblocks;
mdblocks = ext4_calc_metadata_amount(inode, total);
@@ -1537,13 +1531,14 @@ static int ext4_da_reserve_space(struct inode *inode, int nrblocks)
md_needed = mdblocks - EXT4_I(inode)->i_reserved_meta_blocks;
total = md_needed + nrblocks;
- if (ext4_has_free_blocks(sbi, total) < total) {
+ if (ext4_claim_free_blocks(sbi, total)) {
spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
+ if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
+ yield();
+ goto repeat;
+ }
return -ENOSPC;
}
- /* reduce fs free blocks counter */
- percpu_counter_sub(&sbi->s_freeblocks_counter, total);
-
EXT4_I(inode)->i_reserved_data_blocks += nrblocks;
EXT4_I(inode)->i_reserved_meta_blocks = mdblocks;
@@ -1585,8 +1580,8 @@ static void ext4_da_release_space(struct inode *inode, int to_free)
release = to_free + mdb_free;
- /* update fs free blocks counter for truncate case */
- percpu_counter_add(&sbi->s_freeblocks_counter, release);
+ /* update fs dirty blocks counter for truncate case */
+ percpu_counter_sub(&sbi->s_dirtyblocks_counter, release);
/* update per-inode reservations */
BUG_ON(to_free > EXT4_I(inode)->i_reserved_data_blocks);
@@ -1630,6 +1625,7 @@ struct mpage_da_data {
struct writeback_control *wbc;
int io_done;
long pages_written;
+ int retval;
};
/*
@@ -1652,6 +1648,7 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd)
int ret = 0, err, nr_pages, i;
unsigned long index, end;
struct pagevec pvec;
+ long pages_skipped;
BUG_ON(mpd->next_page <= mpd->first_page);
pagevec_init(&pvec, 0);
@@ -1659,20 +1656,30 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd)
end = mpd->next_page - 1;
while (index <= end) {
- /* XXX: optimize tail */
- nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE);
+ /*
+ * We can use PAGECACHE_TAG_DIRTY lookup here because
+ * even though we have cleared the dirty flag on the page
+ * We still keep the page in the radix tree with tag
+ * PAGECACHE_TAG_DIRTY. See clear_page_dirty_for_io.
+ * The PAGECACHE_TAG_DIRTY is cleared in set_page_writeback
+ * which is called via the below writepage callback.
+ */
+ nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
+ PAGECACHE_TAG_DIRTY,
+ min(end - index,
+ (pgoff_t)PAGEVEC_SIZE-1) + 1);
if (nr_pages == 0)
break;
for (i = 0; i < nr_pages; i++) {
struct page *page = pvec.pages[i];
- index = page->index;
- if (index > end)
- break;
- index++;
-
+ pages_skipped = mpd->wbc->pages_skipped;
err = mapping->a_ops->writepage(page, mpd->wbc);
- if (!err)
+ if (!err && (pages_skipped == mpd->wbc->pages_skipped))
+ /*
+ * have successfully written the page
+ * without skipping the same
+ */
mpd->pages_written++;
/*
* In error case, we have to continue because
@@ -1783,6 +1790,57 @@ static inline void __unmap_underlying_blocks(struct inode *inode,
unmap_underlying_metadata(bdev, bh->b_blocknr + i);
}
+static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd,
+ sector_t logical, long blk_cnt)
+{
+ int nr_pages, i;
+ pgoff_t index, end;
+ struct pagevec pvec;
+ struct inode *inode = mpd->inode;
+ struct address_space *mapping = inode->i_mapping;
+
+ index = logical >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
+ end = (logical + blk_cnt - 1) >>
+ (PAGE_CACHE_SHIFT - inode->i_blkbits);
+ while (index <= end) {
+ nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE);
+ if (nr_pages == 0)
+ break;
+ for (i = 0; i < nr_pages; i++) {
+ struct page *page = pvec.pages[i];
+ index = page->index;
+ if (index > end)
+ break;
+ index++;
+
+ BUG_ON(!PageLocked(page));
+ BUG_ON(PageWriteback(page));
+ block_invalidatepage(page, 0);
+ ClearPageUptodate(page);
+ unlock_page(page);
+ }
+ }
+ return;
+}
+
+static void ext4_print_free_blocks(struct inode *inode)
+{
+ struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+ printk(KERN_EMERG "Total free blocks count %lld\n",
+ ext4_count_free_blocks(inode->i_sb));
+ printk(KERN_EMERG "Free/Dirty block details\n");
+ printk(KERN_EMERG "free_blocks=%lld\n",
+ percpu_counter_sum(&sbi->s_freeblocks_counter));
+ printk(KERN_EMERG "dirty_blocks=%lld\n",
+ percpu_counter_sum(&sbi->s_dirtyblocks_counter));
+ printk(KERN_EMERG "Block reservation details\n");
+ printk(KERN_EMERG "i_reserved_data_blocks=%lu\n",
+ EXT4_I(inode)->i_reserved_data_blocks);
+ printk(KERN_EMERG "i_reserved_meta_blocks=%lu\n",
+ EXT4_I(inode)->i_reserved_meta_blocks);
+ return;
+}
+
/*
* mpage_da_map_blocks - go through given space
*
@@ -1792,32 +1850,69 @@ static inline void __unmap_underlying_blocks(struct inode *inode,
* The function skips space we know is already mapped to disk blocks.
*
*/
-static void mpage_da_map_blocks(struct mpage_da_data *mpd)
+static int mpage_da_map_blocks(struct mpage_da_data *mpd)
{
int err = 0;
- struct buffer_head *lbh = &mpd->lbh;
- sector_t next = lbh->b_blocknr;
struct buffer_head new;
+ struct buffer_head *lbh = &mpd->lbh;
+ sector_t next;
/*
* We consider only non-mapped and non-allocated blocks
*/
if (buffer_mapped(lbh) && !buffer_delay(lbh))
- return;
-
+ return 0;
new.b_state = lbh->b_state;
new.b_blocknr = 0;
new.b_size = lbh->b_size;
-
+ next = lbh->b_blocknr;
/*
* If we didn't accumulate anything
* to write simply return
*/
if (!new.b_size)
- return;
+ return 0;
err = mpd->get_block(mpd->inode, next, &new, 1);
- if (err)
- return;
+ if (err) {
+
+ /* If get block returns with error
+ * we simply return. Later writepage
+ * will redirty the page and writepages
+ * will find the dirty page again
+ */
+ if (err == -EAGAIN)
+ return 0;
+
+ if (err == -ENOSPC &&
+ ext4_count_free_blocks(mpd->inode->i_sb)) {
+ mpd->retval = err;
+ return 0;
+ }
+
+ /*
+ * get block failure will cause us
+ * to loop in writepages. Because
+ * a_ops->writepage won't be able to
+ * make progress. The page will be redirtied
+ * by writepage and writepages will again
+ * try to write the same.
+ */
+ printk(KERN_EMERG "%s block allocation failed for inode %lu "
+ "at logical offset %llu with max blocks "
+ "%zd with error %d\n",
+ __func__, mpd->inode->i_ino,
+ (unsigned long long)next,
+ lbh->b_size >> mpd->inode->i_blkbits, err);
+ printk(KERN_EMERG "This should not happen.!! "
+ "Data will be lost\n");
+ if (err == -ENOSPC) {
+ ext4_print_free_blocks(mpd->inode);
+ }
+ /* invlaidate all the pages */
+ ext4_da_block_invalidatepages(mpd, next,
+ lbh->b_size >> mpd->inode->i_blkbits);
+ return err;
+ }
BUG_ON(new.b_size == 0);
if (buffer_new(&new))
@@ -1830,7 +1925,7 @@ static void mpage_da_map_blocks(struct mpage_da_data *mpd)
if (buffer_delay(lbh) || buffer_unwritten(lbh))
mpage_put_bnr_to_bhs(mpd, next, &new);
- return;
+ return 0;
}
#define BH_FLAGS ((1 << BH_Uptodate) | (1 << BH_Mapped) | \
@@ -1899,8 +1994,8 @@ flush_it:
* We couldn't merge the block to our extent, so we
* need to flush current extent and start new one
*/
- mpage_da_map_blocks(mpd);
- mpage_da_submit_io(mpd);
+ if (mpage_da_map_blocks(mpd) == 0)
+ mpage_da_submit_io(mpd);
mpd->io_done = 1;
return;
}
@@ -1942,8 +2037,8 @@ static int __mpage_da_writepage(struct page *page,
* and start IO on them using writepage()
*/
if (mpd->next_page != mpd->first_page) {
- mpage_da_map_blocks(mpd);
- mpage_da_submit_io(mpd);
+ if (mpage_da_map_blocks(mpd) == 0)
+ mpage_da_submit_io(mpd);
/*
* skip rest of the page in the page_vec
*/
@@ -2018,39 +2113,34 @@ static int __mpage_da_writepage(struct page *page,
*/
static int mpage_da_writepages(struct address_space *mapping,
struct writeback_control *wbc,
- get_block_t get_block)
+ struct mpage_da_data *mpd)
{
- struct mpage_da_data mpd;
- long to_write;
int ret;
- if (!get_block)
+ if (!mpd->get_block)
return generic_writepages(mapping, wbc);
- mpd.wbc = wbc;
- mpd.inode = mapping->host;
- mpd.lbh.b_size = 0;
- mpd.lbh.b_state = 0;
- mpd.lbh.b_blocknr = 0;
- mpd.first_page = 0;
- mpd.next_page = 0;
- mpd.get_block = get_block;
- mpd.io_done = 0;
- mpd.pages_written = 0;
-
- to_write = wbc->nr_to_write;
-
- ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, &mpd);
+ mpd->lbh.b_size = 0;
+ mpd->lbh.b_state = 0;
+ mpd->lbh.b_blocknr = 0;
+ mpd->first_page = 0;
+ mpd->next_page = 0;
+ mpd->io_done = 0;
+ mpd->pages_written = 0;
+ mpd->retval = 0;
+ ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, mpd);
/*
* Handle last extent of pages
*/
- if (!mpd.io_done && mpd.next_page != mpd.first_page) {
- mpage_da_map_blocks(&mpd);
- mpage_da_submit_io(&mpd);
- }
+ if (!mpd->io_done && mpd->next_page != mpd->first_page) {
+ if (mpage_da_map_blocks(mpd) == 0)
+ mpage_da_submit_io(mpd);
- wbc->nr_to_write = to_write - mpd.pages_written;
+ mpd->io_done = 1;
+ ret = MPAGE_DA_EXTENT_TAIL;
+ }
+ wbc->nr_to_write -= mpd->pages_written;
return ret;
}
@@ -2103,18 +2193,24 @@ static int ext4_da_get_block_write(struct inode *inode, sector_t iblock,
handle_t *handle = NULL;
handle = ext4_journal_current_handle();
- if (!handle) {
- ret = ext4_get_blocks_wrap(handle, inode, iblock, max_blocks,
- bh_result, 0, 0, 0);
- BUG_ON(!ret);
- } else {
- ret = ext4_get_blocks_wrap(handle, inode, iblock, max_blocks,
- bh_result, create, 0, EXT4_DELALLOC_RSVED);
- }
-
+ BUG_ON(!handle);
+ ret = ext4_get_blocks_wrap(handle, inode, iblock, max_blocks,
+ bh_result, create, 0, EXT4_DELALLOC_RSVED);
if (ret > 0) {
+
bh_result->b_size = (ret << inode->i_blkbits);
+ if (ext4_should_order_data(inode)) {
+ int retval;
+ retval = ext4_jbd2_file_inode(handle, inode);
+ if (retval)
+ /*
+ * Failed to add inode for ordered
+ * mode. Don't update file size
+ */
+ return retval;
+ }
+
/*
* Update on-disk size along with block allocation
* we don't use 'extend_disksize' as size may change
@@ -2124,18 +2220,9 @@ static int ext4_da_get_block_write(struct inode *inode, sector_t iblock,
if (disksize > i_size_read(inode))
disksize = i_size_read(inode);
if (disksize > EXT4_I(inode)->i_disksize) {
- /*
- * XXX: replace with spinlock if seen contended -bzzz
- */
- down_write(&EXT4_I(inode)->i_data_sem);
- if (disksize > EXT4_I(inode)->i_disksize)
- EXT4_I(inode)->i_disksize = disksize;
- up_write(&EXT4_I(inode)->i_data_sem);
-
- if (EXT4_I(inode)->i_disksize == disksize) {
- ret = ext4_mark_inode_dirty(handle, inode);
- return ret;
- }
+ ext4_update_i_disksize(inode, disksize);
+ ret = ext4_mark_inode_dirty(handle, inode);
+ return ret;
}
ret = 0;
}
@@ -2282,11 +2369,14 @@ static int ext4_da_writepages_trans_blocks(struct inode *inode)
static int ext4_da_writepages(struct address_space *mapping,
struct writeback_control *wbc)
{
+ pgoff_t index;
+ int range_whole = 0;
handle_t *handle = NULL;
- loff_t range_start = 0;
+ struct mpage_da_data mpd;
struct inode *inode = mapping->host;
+ int no_nrwrite_index_update;
+ long pages_written = 0, pages_skipped;
int needed_blocks, ret = 0, nr_to_writebump = 0;
- long to_write, pages_skipped = 0;
struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
/*
@@ -2306,20 +2396,26 @@ static int ext4_da_writepages(struct address_space *mapping,
nr_to_writebump = sbi->s_mb_stream_request - wbc->nr_to_write;
wbc->nr_to_write = sbi->s_mb_stream_request;
}
+ if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
+ range_whole = 1;
- if (!wbc->range_cyclic)
- /*
- * If range_cyclic is not set force range_cont
- * and save the old writeback_index
- */
- wbc->range_cont = 1;
+ if (wbc->range_cyclic)
+ index = mapping->writeback_index;
+ else
+ index = wbc->range_start >> PAGE_CACHE_SHIFT;
+
+ mpd.wbc = wbc;
+ mpd.inode = mapping->host;
- range_start = wbc->range_start;
+ /*
+ * we don't want write_cache_pages to update
+ * nr_to_write and writeback_index
+ */
+ no_nrwrite_index_update = wbc->no_nrwrite_index_update;
+ wbc->no_nrwrite_index_update = 1;
pages_skipped = wbc->pages_skipped;
-restart_loop:
- to_write = wbc->nr_to_write;
- while (!ret && to_write > 0) {
+ while (!ret && wbc->nr_to_write > 0) {
/*
* we insert one extent at a time. So we need
@@ -2340,57 +2436,83 @@ restart_loop:
dump_stack();
goto out_writepages;
}
- if (ext4_should_order_data(inode)) {
- /*
- * With ordered mode we need to add
- * the inode to the journal handl
- * when we do block allocation.
- */
- ret = ext4_jbd2_file_inode(handle, inode);
- if (ret) {
- ext4_journal_stop(handle);
- goto out_writepages;
- }
- }
+ mpd.get_block = ext4_da_get_block_write;
+ ret = mpage_da_writepages(mapping, wbc, &mpd);
- to_write -= wbc->nr_to_write;
- ret = mpage_da_writepages(mapping, wbc,
- ext4_da_get_block_write);
ext4_journal_stop(handle);
- if (ret == MPAGE_DA_EXTENT_TAIL) {
+
+ if (mpd.retval == -ENOSPC) {
+ /* commit the transaction which would
+ * free blocks released in the transaction
+ * and try again
+ */
+ jbd2_journal_force_commit_nested(sbi->s_journal);
+ wbc->pages_skipped = pages_skipped;
+ ret = 0;
+ } else if (ret == MPAGE_DA_EXTENT_TAIL) {
/*
* got one extent now try with
* rest of the pages
*/
- to_write += wbc->nr_to_write;
+ pages_written += mpd.pages_written;
+ wbc->pages_skipped = pages_skipped;
ret = 0;
- } else if (wbc->nr_to_write) {
+ } else if (wbc->nr_to_write)
/*
* There is no more writeout needed
* or we requested for a noblocking writeout
* and we found the device congested
*/
- to_write += wbc->nr_to_write;
break;
- }
- wbc->nr_to_write = to_write;
- }
-
- if (wbc->range_cont && (pages_skipped != wbc->pages_skipped)) {
- /* We skipped pages in this loop */
- wbc->range_start = range_start;
- wbc->nr_to_write = to_write +
- wbc->pages_skipped - pages_skipped;
- wbc->pages_skipped = pages_skipped;
- goto restart_loop;
}
+ if (pages_skipped != wbc->pages_skipped)
+ printk(KERN_EMERG "This should not happen leaving %s "
+ "with nr_to_write = %ld ret = %d\n",
+ __func__, wbc->nr_to_write, ret);
+
+ /* Update index */
+ index += pages_written;
+ if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
+ /*
+ * set the writeback_index so that range_cyclic
+ * mode will write it back later
+ */
+ mapping->writeback_index = index;
out_writepages:
- wbc->nr_to_write = to_write - nr_to_writebump;
- wbc->range_start = range_start;
+ if (!no_nrwrite_index_update)
+ wbc->no_nrwrite_index_update = 0;
+ wbc->nr_to_write -= nr_to_writebump;
return ret;
}
+#define FALL_BACK_TO_NONDELALLOC 1
+static int ext4_nonda_switch(struct super_block *sb)
+{
+ s64 free_blocks, dirty_blocks;
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+
+ /*
+ * switch to non delalloc mode if we are running low
+ * on free block. The free block accounting via percpu
+ * counters can get slightly wrong with FBC_BATCH getting
+ * accumulated on each CPU without updating global counters
+ * Delalloc need an accurate free block accounting. So switch
+ * to non delalloc when we are near to error range.
+ */
+ free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter);
+ dirty_blocks = percpu_counter_read_positive(&sbi->s_dirtyblocks_counter);
+ if (2 * free_blocks < 3 * dirty_blocks ||
+ free_blocks < (dirty_blocks + EXT4_FREEBLOCKS_WATERMARK)) {
+ /*
+ * free block count is less that 150% of dirty blocks
+ * or free blocks is less that watermark
+ */
+ return 1;
+ }
+ return 0;
+}
+
static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags,
struct page **pagep, void **fsdata)
@@ -2406,6 +2528,12 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
from = pos & (PAGE_CACHE_SIZE - 1);
to = from + len;
+ if (ext4_nonda_switch(inode->i_sb)) {
+ *fsdata = (void *)FALL_BACK_TO_NONDELALLOC;
+ return ext4_write_begin(file, mapping, pos,
+ len, flags, pagep, fsdata);
+ }
+ *fsdata = (void *)0;
retry:
/*
* With delayed allocation, we don't log the i_disksize update
@@ -2433,6 +2561,13 @@ retry:
unlock_page(page);
ext4_journal_stop(handle);
page_cache_release(page);
+ /*
+ * block_write_begin may have instantiated a few blocks
+ * outside i_size. Trim these off again. Don't need
+ * i_size_read because we hold i_mutex.
+ */
+ if (pos + len > inode->i_size)
+ vmtruncate(inode, inode->i_size);
}
if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
@@ -2456,7 +2591,7 @@ static int ext4_da_should_update_i_disksize(struct page *page,
bh = page_buffers(page);
idx = offset >> inode->i_blkbits;
- for (i=0; i < idx; i++)
+ for (i = 0; i < idx; i++)
bh = bh->b_this_page;
if (!buffer_mapped(bh) || (buffer_delay(bh)))
@@ -2474,9 +2609,22 @@ static int ext4_da_write_end(struct file *file,
handle_t *handle = ext4_journal_current_handle();
loff_t new_i_size;
unsigned long start, end;
+ int write_mode = (int)(unsigned long)fsdata;
+
+ if (write_mode == FALL_BACK_TO_NONDELALLOC) {
+ if (ext4_should_order_data(inode)) {
+ return ext4_ordered_write_end(file, mapping, pos,
+ len, copied, page, fsdata);
+ } else if (ext4_should_writeback_data(inode)) {
+ return ext4_writeback_write_end(file, mapping, pos,
+ len, copied, page, fsdata);
+ } else {
+ BUG();
+ }
+ }
start = pos & (PAGE_CACHE_SIZE - 1);
- end = start + copied -1;
+ end = start + copied - 1;
/*
* generic_write_end() will run mark_inode_dirty() if i_size
@@ -2500,6 +2648,11 @@ static int ext4_da_write_end(struct file *file,
EXT4_I(inode)->i_disksize = new_i_size;
}
up_write(&EXT4_I(inode)->i_data_sem);
+ /* We need to mark inode dirty even if
+ * new_i_size is less that inode->i_size
+ * bu greater than i_disksize.(hint delalloc)
+ */
+ ext4_mark_inode_dirty(handle, inode);
}
}
ret2 = generic_write_end(file, mapping, pos, len, copied,
@@ -2591,7 +2744,7 @@ static sector_t ext4_bmap(struct address_space *mapping, sector_t block)
return 0;
}
- return generic_block_bmap(mapping,block,ext4_get_block);
+ return generic_block_bmap(mapping, block, ext4_get_block);
}
static int bget_one(handle_t *handle, struct buffer_head *bh)
@@ -3197,7 +3350,7 @@ static Indirect *ext4_find_shared(struct inode *inode, int depth,
if (!partial->key && *partial->p)
/* Writer: end */
goto no_top;
- for (p=partial; p>chain && all_zeroes((__le32*)p->bh->b_data,p->p); p--)
+ for (p = partial; (p > chain) && all_zeroes((__le32 *) p->bh->b_data, p->p); p--)
;
/*
* OK, we've found the last block that must survive. The rest of our
@@ -3216,7 +3369,7 @@ static Indirect *ext4_find_shared(struct inode *inode, int depth,
}
/* Writer: end */
- while(partial > p) {
+ while (partial > p) {
brelse(partial->bh);
partial--;
}
@@ -3408,9 +3561,9 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
/* This zaps the entire block. Bottom up. */
BUFFER_TRACE(bh, "free child branches");
ext4_free_branches(handle, inode, bh,
- (__le32*)bh->b_data,
- (__le32*)bh->b_data + addr_per_block,
- depth);
+ (__le32 *) bh->b_data,
+ (__le32 *) bh->b_data + addr_per_block,
+ depth);
/*
* We've probably journalled the indirect block several
@@ -3578,7 +3731,7 @@ void ext4_truncate(struct inode *inode)
*/
down_write(&ei->i_data_sem);
- ext4_discard_reservation(inode);
+ ext4_discard_preallocations(inode);
/*
* The orphan list entry will now protect us from any crash which
@@ -3673,41 +3826,6 @@ out_stop:
ext4_journal_stop(handle);
}
-static ext4_fsblk_t ext4_get_inode_block(struct super_block *sb,
- unsigned long ino, struct ext4_iloc *iloc)
-{
- ext4_group_t block_group;
- unsigned long offset;
- ext4_fsblk_t block;
- struct ext4_group_desc *gdp;
-
- if (!ext4_valid_inum(sb, ino)) {
- /*
- * This error is already checked for in namei.c unless we are
- * looking at an NFS filehandle, in which case no error
- * report is needed
- */
- return 0;
- }
-
- block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb);
- gdp = ext4_get_group_desc(sb, block_group, NULL);
- if (!gdp)
- return 0;
-
- /*
- * Figure out the offset within the block group inode table
- */
- offset = ((ino - 1) % EXT4_INODES_PER_GROUP(sb)) *
- EXT4_INODE_SIZE(sb);
- block = ext4_inode_table(sb, gdp) +
- (offset >> EXT4_BLOCK_SIZE_BITS(sb));
-
- iloc->block_group = block_group;
- iloc->offset = offset & (EXT4_BLOCK_SIZE(sb) - 1);
- return block;
-}
-
/*
* ext4_get_inode_loc returns with an extra refcount against the inode's
* underlying buffer_head on success. If 'in_mem' is true, we have all
@@ -3717,19 +3835,35 @@ static ext4_fsblk_t ext4_get_inode_block(struct super_block *sb,
static int __ext4_get_inode_loc(struct inode *inode,
struct ext4_iloc *iloc, int in_mem)
{
- ext4_fsblk_t block;
- struct buffer_head *bh;
+ struct ext4_group_desc *gdp;
+ struct buffer_head *bh;
+ struct super_block *sb = inode->i_sb;
+ ext4_fsblk_t block;
+ int inodes_per_block, inode_offset;
+
+ iloc->bh = 0;
+ if (!ext4_valid_inum(sb, inode->i_ino))
+ return -EIO;
- block = ext4_get_inode_block(inode->i_sb, inode->i_ino, iloc);
- if (!block)
+ iloc->block_group = (inode->i_ino - 1) / EXT4_INODES_PER_GROUP(sb);
+ gdp = ext4_get_group_desc(sb, iloc->block_group, NULL);
+ if (!gdp)
return -EIO;
- bh = sb_getblk(inode->i_sb, block);
+ /*
+ * Figure out the offset within the block group inode table
+ */
+ inodes_per_block = (EXT4_BLOCK_SIZE(sb) / EXT4_INODE_SIZE(sb));
+ inode_offset = ((inode->i_ino - 1) %
+ EXT4_INODES_PER_GROUP(sb));
+ block = ext4_inode_table(sb, gdp) + (inode_offset / inodes_per_block);
+ iloc->offset = (inode_offset % inodes_per_block) * EXT4_INODE_SIZE(sb);
+
+ bh = sb_getblk(sb, block);
if (!bh) {
- ext4_error (inode->i_sb, "ext4_get_inode_loc",
- "unable to read inode block - "
- "inode=%lu, block=%llu",
- inode->i_ino, block);
+ ext4_error(sb, "ext4_get_inode_loc", "unable to read "
+ "inode block - inode=%lu, block=%llu",
+ inode->i_ino, block);
return -EIO;
}
if (!buffer_uptodate(bh)) {
@@ -3757,28 +3891,12 @@ static int __ext4_get_inode_loc(struct inode *inode,
*/
if (in_mem) {
struct buffer_head *bitmap_bh;
- struct ext4_group_desc *desc;
- int inodes_per_buffer;
- int inode_offset, i;
- ext4_group_t block_group;
- int start;
-
- block_group = (inode->i_ino - 1) /
- EXT4_INODES_PER_GROUP(inode->i_sb);
- inodes_per_buffer = bh->b_size /
- EXT4_INODE_SIZE(inode->i_sb);
- inode_offset = ((inode->i_ino - 1) %
- EXT4_INODES_PER_GROUP(inode->i_sb));
- start = inode_offset & ~(inodes_per_buffer - 1);
+ int i, start;
- /* Is the inode bitmap in cache? */
- desc = ext4_get_group_desc(inode->i_sb,
- block_group, NULL);
- if (!desc)
- goto make_io;
+ start = inode_offset & ~(inodes_per_block - 1);
- bitmap_bh = sb_getblk(inode->i_sb,
- ext4_inode_bitmap(inode->i_sb, desc));
+ /* Is the inode bitmap in cache? */
+ bitmap_bh = sb_getblk(sb, ext4_inode_bitmap(sb, gdp));
if (!bitmap_bh)
goto make_io;
@@ -3791,14 +3909,14 @@ static int __ext4_get_inode_loc(struct inode *inode,
brelse(bitmap_bh);
goto make_io;
}
- for (i = start; i < start + inodes_per_buffer; i++) {
+ for (i = start; i < start + inodes_per_block; i++) {
if (i == inode_offset)
continue;
if (ext4_test_bit(i, bitmap_bh->b_data))
break;
}
brelse(bitmap_bh);
- if (i == start + inodes_per_buffer) {
+ if (i == start + inodes_per_block) {
/* all other inodes are free, so skip I/O */
memset(bh->b_data, 0, bh->b_size);
set_buffer_uptodate(bh);
@@ -3809,6 +3927,36 @@ static int __ext4_get_inode_loc(struct inode *inode,
make_io:
/*
+ * If we need to do any I/O, try to pre-readahead extra
+ * blocks from the inode table.
+ */
+ if (EXT4_SB(sb)->s_inode_readahead_blks) {
+ ext4_fsblk_t b, end, table;
+ unsigned num;
+
+ table = ext4_inode_table(sb, gdp);
+ /* Make sure s_inode_readahead_blks is a power of 2 */
+ while (EXT4_SB(sb)->s_inode_readahead_blks &
+ (EXT4_SB(sb)->s_inode_readahead_blks-1))
+ EXT4_SB(sb)->s_inode_readahead_blks =
+ (EXT4_SB(sb)->s_inode_readahead_blks &
+ (EXT4_SB(sb)->s_inode_readahead_blks-1));
+ b = block & ~(EXT4_SB(sb)->s_inode_readahead_blks-1);
+ if (table > b)
+ b = table;
+ end = b + EXT4_SB(sb)->s_inode_readahead_blks;
+ num = EXT4_INODES_PER_GROUP(sb);
+ if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
+ EXT4_FEATURE_RO_COMPAT_GDT_CSUM))
+ num -= le16_to_cpu(gdp->bg_itable_unused);
+ table += num / inodes_per_block;
+ if (end > table)
+ end = table;
+ while (b <= end)
+ sb_breadahead(sb, b++);
+ }
+
+ /*
* There are other valid inodes in the buffer, this inode
* has in-inode xattrs, or we don't have this inode in memory.
* Read the block from disk.
@@ -3818,10 +3966,9 @@ make_io:
submit_bh(READ_META, bh);
wait_on_buffer(bh);
if (!buffer_uptodate(bh)) {
- ext4_error(inode->i_sb, "ext4_get_inode_loc",
- "unable to read inode block - "
- "inode=%lu, block=%llu",
- inode->i_ino, block);
+ ext4_error(sb, __func__,
+ "unable to read inode block - inode=%lu, "
+ "block=%llu", inode->i_ino, block);
brelse(bh);
return -EIO;
}
@@ -3913,11 +4060,10 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
return inode;
ei = EXT4_I(inode);
-#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL
+#ifdef CONFIG_EXT4_FS_POSIX_ACL
ei->i_acl = EXT4_ACL_NOT_CACHED;
ei->i_default_acl = EXT4_ACL_NOT_CACHED;
#endif
- ei->i_block_alloc_info = NULL;
ret = __ext4_get_inode_loc(inode, &iloc, 0);
if (ret < 0)
@@ -3927,7 +4073,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
inode->i_mode = le16_to_cpu(raw_inode->i_mode);
inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
inode->i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low);
- if(!(test_opt (inode->i_sb, NO_UID32))) {
+ if (!(test_opt(inode->i_sb, NO_UID32))) {
inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
}
@@ -3945,7 +4091,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
if (inode->i_mode == 0 ||
!(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ORPHAN_FS)) {
/* this inode is deleted */
- brelse (bh);
+ brelse(bh);
ret = -ESTALE;
goto bad_inode;
}
@@ -3978,7 +4124,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize);
if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize >
EXT4_INODE_SIZE(inode->i_sb)) {
- brelse (bh);
+ brelse(bh);
ret = -EIO;
goto bad_inode;
}
@@ -4031,7 +4177,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
init_special_inode(inode, inode->i_mode,
new_decode_dev(le32_to_cpu(raw_inode->i_block[1])));
}
- brelse (iloc.bh);
+ brelse(iloc.bh);
ext4_set_inode_flags(inode);
unlock_new_inode(inode);
return inode;
@@ -4048,7 +4194,6 @@ static int ext4_inode_blocks_set(handle_t *handle,
struct inode *inode = &(ei->vfs_inode);
u64 i_blocks = inode->i_blocks;
struct super_block *sb = inode->i_sb;
- int err = 0;
if (i_blocks <= ~0U) {
/*
@@ -4058,36 +4203,27 @@ static int ext4_inode_blocks_set(handle_t *handle,
raw_inode->i_blocks_lo = cpu_to_le32(i_blocks);
raw_inode->i_blocks_high = 0;
ei->i_flags &= ~EXT4_HUGE_FILE_FL;
- } else if (i_blocks <= 0xffffffffffffULL) {
+ return 0;
+ }
+ if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_HUGE_FILE))
+ return -EFBIG;
+
+ if (i_blocks <= 0xffffffffffffULL) {
/*
* i_blocks can be represented in a 48 bit variable
* as multiple of 512 bytes
*/
- err = ext4_update_rocompat_feature(handle, sb,
- EXT4_FEATURE_RO_COMPAT_HUGE_FILE);
- if (err)
- goto err_out;
- /* i_block is stored in the split 48 bit fields */
raw_inode->i_blocks_lo = cpu_to_le32(i_blocks);
raw_inode->i_blocks_high = cpu_to_le16(i_blocks >> 32);
ei->i_flags &= ~EXT4_HUGE_FILE_FL;
} else {
- /*
- * i_blocks should be represented in a 48 bit variable
- * as multiple of file system block size
- */
- err = ext4_update_rocompat_feature(handle, sb,
- EXT4_FEATURE_RO_COMPAT_HUGE_FILE);
- if (err)
- goto err_out;
ei->i_flags |= EXT4_HUGE_FILE_FL;
/* i_block is stored in file system block size */
i_blocks = i_blocks >> (inode->i_blkbits - 9);
raw_inode->i_blocks_lo = cpu_to_le32(i_blocks);
raw_inode->i_blocks_high = cpu_to_le16(i_blocks >> 32);
}
-err_out:
- return err;
+ return 0;
}
/*
@@ -4113,14 +4249,14 @@ static int ext4_do_update_inode(handle_t *handle,
ext4_get_inode_flags(ei);
raw_inode->i_mode = cpu_to_le16(inode->i_mode);
- if(!(test_opt(inode->i_sb, NO_UID32))) {
+ if (!(test_opt(inode->i_sb, NO_UID32))) {
raw_inode->i_uid_low = cpu_to_le16(low_16_bits(inode->i_uid));
raw_inode->i_gid_low = cpu_to_le16(low_16_bits(inode->i_gid));
/*
* Fix up interoperability with old kernels. Otherwise, old inodes get
* re-used with the upper 16 bits of the uid/gid intact
*/
- if(!ei->i_dtime) {
+ if (!ei->i_dtime) {
raw_inode->i_uid_high =
cpu_to_le16(high_16_bits(inode->i_uid));
raw_inode->i_gid_high =
@@ -4208,7 +4344,7 @@ static int ext4_do_update_inode(handle_t *handle,
ei->i_state &= ~EXT4_STATE_NEW;
out_brelse:
- brelse (bh);
+ brelse(bh);
ext4_std_error(inode->i_sb, err);
return err;
}
@@ -4811,6 +4947,7 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page)
loff_t size;
unsigned long len;
int ret = -EINVAL;
+ void *fsdata;
struct file *file = vma->vm_file;
struct inode *inode = file->f_path.dentry->d_inode;
struct address_space *mapping = inode->i_mapping;
@@ -4849,11 +4986,11 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page)
* on the same page though
*/
ret = mapping->a_ops->write_begin(file, mapping, page_offset(page),
- len, AOP_FLAG_UNINTERRUPTIBLE, &page, NULL);
+ len, AOP_FLAG_UNINTERRUPTIBLE, &page, &fsdata);
if (ret < 0)
goto out_unlock;
ret = mapping->a_ops->write_end(file, mapping, page_offset(page),
- len, len, page, NULL);
+ len, len, page, fsdata);
if (ret < 0)
goto out_unlock;
ret = 0;
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 7a6c2f1faba..dc99b4776d5 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -23,9 +23,8 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
struct inode *inode = filp->f_dentry->d_inode;
struct ext4_inode_info *ei = EXT4_I(inode);
unsigned int flags;
- unsigned short rsv_window_size;
- ext4_debug ("cmd = %u, arg = %lu\n", cmd, arg);
+ ext4_debug("cmd = %u, arg = %lu\n", cmd, arg);
switch (cmd) {
case EXT4_IOC_GETFLAGS:
@@ -34,7 +33,7 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
return put_user(flags, (int __user *) arg);
case EXT4_IOC_SETFLAGS: {
handle_t *handle = NULL;
- int err;
+ int err, migrate = 0;
struct ext4_iloc iloc;
unsigned int oldflags;
unsigned int jflag;
@@ -82,6 +81,17 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
if (!capable(CAP_SYS_RESOURCE))
goto flags_out;
}
+ if (oldflags & EXT4_EXTENTS_FL) {
+ /* We don't support clearning extent flags */
+ if (!(flags & EXT4_EXTENTS_FL)) {
+ err = -EOPNOTSUPP;
+ goto flags_out;
+ }
+ } else if (flags & EXT4_EXTENTS_FL) {
+ /* migrate the file */
+ migrate = 1;
+ flags &= ~EXT4_EXTENTS_FL;
+ }
handle = ext4_journal_start(inode, 1);
if (IS_ERR(handle)) {
@@ -109,6 +119,10 @@ flags_err:
if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL))
err = ext4_change_inode_journal_flag(inode, jflag);
+ if (err)
+ goto flags_out;
+ if (migrate)
+ err = ext4_ext_migrate(inode);
flags_out:
mutex_unlock(&inode->i_mutex);
mnt_drop_write(filp->f_path.mnt);
@@ -175,53 +189,10 @@ setversion_out:
return ret;
}
#endif
- case EXT4_IOC_GETRSVSZ:
- if (test_opt(inode->i_sb, RESERVATION)
- && S_ISREG(inode->i_mode)
- && ei->i_block_alloc_info) {
- rsv_window_size = ei->i_block_alloc_info->rsv_window_node.rsv_goal_size;
- return put_user(rsv_window_size, (int __user *)arg);
- }
- return -ENOTTY;
- case EXT4_IOC_SETRSVSZ: {
- int err;
-
- if (!test_opt(inode->i_sb, RESERVATION) ||!S_ISREG(inode->i_mode))
- return -ENOTTY;
-
- if (!is_owner_or_cap(inode))
- return -EACCES;
-
- if (get_user(rsv_window_size, (int __user *)arg))
- return -EFAULT;
-
- err = mnt_want_write(filp->f_path.mnt);
- if (err)
- return err;
-
- if (rsv_window_size > EXT4_MAX_RESERVE_BLOCKS)
- rsv_window_size = EXT4_MAX_RESERVE_BLOCKS;
-
- /*
- * need to allocate reservation structure for this inode
- * before set the window size
- */
- down_write(&ei->i_data_sem);
- if (!ei->i_block_alloc_info)
- ext4_init_block_alloc_info(inode);
-
- if (ei->i_block_alloc_info){
- struct ext4_reserve_window_node *rsv = &ei->i_block_alloc_info->rsv_window_node;
- rsv->rsv_goal_size = rsv_window_size;
- }
- up_write(&ei->i_data_sem);
- mnt_drop_write(filp->f_path.mnt);
- return 0;
- }
case EXT4_IOC_GROUP_EXTEND: {
ext4_fsblk_t n_blocks_count;
struct super_block *sb = inode->i_sb;
- int err;
+ int err, err2;
if (!capable(CAP_SYS_RESOURCE))
return -EPERM;
@@ -235,8 +206,10 @@ setversion_out:
err = ext4_group_extend(sb, EXT4_SB(sb)->s_es, n_blocks_count);
jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
- jbd2_journal_flush(EXT4_SB(sb)->s_journal);
+ err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal);
jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
+ if (err == 0)
+ err = err2;
mnt_drop_write(filp->f_path.mnt);
return err;
@@ -244,7 +217,7 @@ setversion_out:
case EXT4_IOC_GROUP_ADD: {
struct ext4_new_group_data input;
struct super_block *sb = inode->i_sb;
- int err;
+ int err, err2;
if (!capable(CAP_SYS_RESOURCE))
return -EPERM;
@@ -259,15 +232,36 @@ setversion_out:
err = ext4_group_add(sb, &input);
jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
- jbd2_journal_flush(EXT4_SB(sb)->s_journal);
+ err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal);
jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
+ if (err == 0)
+ err = err2;
mnt_drop_write(filp->f_path.mnt);
return err;
}
case EXT4_IOC_MIGRATE:
- return ext4_ext_migrate(inode, filp, cmd, arg);
+ {
+ int err;
+ if (!is_owner_or_cap(inode))
+ return -EACCES;
+
+ err = mnt_want_write(filp->f_path.mnt);
+ if (err)
+ return err;
+ /*
+ * inode_mutex prevent write and truncate on the file.
+ * Read still goes through. We take i_data_sem in
+ * ext4_ext_swap_inode_data before we switch the
+ * inode format to prevent read.
+ */
+ mutex_lock(&(inode->i_mutex));
+ err = ext4_ext_migrate(inode);
+ mutex_unlock(&(inode->i_mutex));
+ mnt_drop_write(filp->f_path.mnt);
+ return err;
+ }
default:
return -ENOTTY;
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index e0e3a5eb1dd..dfe17a13405 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -477,9 +477,10 @@ static void mb_cmp_bitmaps(struct ext4_buddy *e4b, void *bitmap)
b2 = (unsigned char *) bitmap;
for (i = 0; i < e4b->bd_sb->s_blocksize; i++) {
if (b1[i] != b2[i]) {
- printk("corruption in group %lu at byte %u(%u):"
- " %x in copy != %x on disk/prealloc\n",
- e4b->bd_group, i, i * 8, b1[i], b2[i]);
+ printk(KERN_ERR "corruption in group %lu "
+ "at byte %u(%u): %x in copy != %x "
+ "on disk/prealloc\n",
+ e4b->bd_group, i, i * 8, b1[i], b2[i]);
BUG();
}
}
@@ -533,9 +534,6 @@ static int __mb_check_buddy(struct ext4_buddy *e4b, char *file,
void *buddy;
void *buddy2;
- if (!test_opt(sb, MBALLOC))
- return 0;
-
{
static int mb_check_counter;
if (mb_check_counter++ % 100 != 0)
@@ -784,9 +782,11 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
if (bh[i] == NULL)
goto out;
- if (bh_uptodate_or_lock(bh[i]))
+ if (buffer_uptodate(bh[i]) &&
+ !(desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)))
continue;
+ lock_buffer(bh[i]);
spin_lock(sb_bgl_lock(EXT4_SB(sb), first_group + i));
if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
ext4_init_block_bitmap(sb, bh[i],
@@ -2169,9 +2169,10 @@ static void ext4_mb_history_release(struct super_block *sb)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
- remove_proc_entry("mb_groups", sbi->s_mb_proc);
- remove_proc_entry("mb_history", sbi->s_mb_proc);
-
+ if (sbi->s_proc != NULL) {
+ remove_proc_entry("mb_groups", sbi->s_proc);
+ remove_proc_entry("mb_history", sbi->s_proc);
+ }
kfree(sbi->s_mb_history);
}
@@ -2180,10 +2181,10 @@ static void ext4_mb_history_init(struct super_block *sb)
struct ext4_sb_info *sbi = EXT4_SB(sb);
int i;
- if (sbi->s_mb_proc != NULL) {
- proc_create_data("mb_history", S_IRUGO, sbi->s_mb_proc,
+ if (sbi->s_proc != NULL) {
+ proc_create_data("mb_history", S_IRUGO, sbi->s_proc,
&ext4_mb_seq_history_fops, sb);
- proc_create_data("mb_groups", S_IRUGO, sbi->s_mb_proc,
+ proc_create_data("mb_groups", S_IRUGO, sbi->s_proc,
&ext4_mb_seq_groups_fops, sb);
}
@@ -2299,6 +2300,7 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
}
INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list);
+ meta_group_info[i]->bb_free_root.rb_node = NULL;;
#ifdef DOUBLE_CHECK
{
@@ -2485,19 +2487,14 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
unsigned max;
int ret;
- if (!test_opt(sb, MBALLOC))
- return 0;
-
i = (sb->s_blocksize_bits + 2) * sizeof(unsigned short);
sbi->s_mb_offsets = kmalloc(i, GFP_KERNEL);
if (sbi->s_mb_offsets == NULL) {
- clear_opt(sbi->s_mount_opt, MBALLOC);
return -ENOMEM;
}
sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL);
if (sbi->s_mb_maxs == NULL) {
- clear_opt(sbi->s_mount_opt, MBALLOC);
kfree(sbi->s_mb_maxs);
return -ENOMEM;
}
@@ -2520,16 +2517,12 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
/* init file for buddy data */
ret = ext4_mb_init_backend(sb);
if (ret != 0) {
- clear_opt(sbi->s_mount_opt, MBALLOC);
kfree(sbi->s_mb_offsets);
kfree(sbi->s_mb_maxs);
return ret;
}
spin_lock_init(&sbi->s_md_lock);
- INIT_LIST_HEAD(&sbi->s_active_transaction);
- INIT_LIST_HEAD(&sbi->s_closed_transaction);
- INIT_LIST_HEAD(&sbi->s_committed_transaction);
spin_lock_init(&sbi->s_bal_lock);
sbi->s_mb_max_to_scan = MB_DEFAULT_MAX_TO_SCAN;
@@ -2540,17 +2533,15 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
sbi->s_mb_history_filter = EXT4_MB_HISTORY_DEFAULT;
sbi->s_mb_group_prealloc = MB_DEFAULT_GROUP_PREALLOC;
- i = sizeof(struct ext4_locality_group) * nr_cpu_ids;
- sbi->s_locality_groups = kmalloc(i, GFP_KERNEL);
+ sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group);
if (sbi->s_locality_groups == NULL) {
- clear_opt(sbi->s_mount_opt, MBALLOC);
kfree(sbi->s_mb_offsets);
kfree(sbi->s_mb_maxs);
return -ENOMEM;
}
- for (i = 0; i < nr_cpu_ids; i++) {
+ for_each_possible_cpu(i) {
struct ext4_locality_group *lg;
- lg = &sbi->s_locality_groups[i];
+ lg = per_cpu_ptr(sbi->s_locality_groups, i);
mutex_init(&lg->lg_mutex);
for (j = 0; j < PREALLOC_TB_SIZE; j++)
INIT_LIST_HEAD(&lg->lg_prealloc_list[j]);
@@ -2560,7 +2551,9 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
ext4_mb_init_per_dev_proc(sb);
ext4_mb_history_init(sb);
- printk("EXT4-fs: mballoc enabled\n");
+ sbi->s_journal->j_commit_callback = release_blocks_on_commit;
+
+ printk(KERN_INFO "EXT4-fs: mballoc enabled\n");
return 0;
}
@@ -2575,7 +2568,7 @@ static void ext4_mb_cleanup_pa(struct ext4_group_info *grp)
pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list);
list_del(&pa->pa_group_list);
count++;
- kfree(pa);
+ kmem_cache_free(ext4_pspace_cachep, pa);
}
if (count)
mb_debug("mballoc: %u PAs left\n", count);
@@ -2589,18 +2582,6 @@ int ext4_mb_release(struct super_block *sb)
struct ext4_group_info *grinfo;
struct ext4_sb_info *sbi = EXT4_SB(sb);
- if (!test_opt(sb, MBALLOC))
- return 0;
-
- /* release freed, non-committed blocks */
- spin_lock(&sbi->s_md_lock);
- list_splice_init(&sbi->s_closed_transaction,
- &sbi->s_committed_transaction);
- list_splice_init(&sbi->s_active_transaction,
- &sbi->s_committed_transaction);
- spin_unlock(&sbi->s_md_lock);
- ext4_mb_free_committed_blocks(sb);
-
if (sbi->s_group_info) {
for (i = 0; i < sbi->s_groups_count; i++) {
grinfo = ext4_get_group_info(sb, i);
@@ -2647,69 +2628,64 @@ int ext4_mb_release(struct super_block *sb)
atomic_read(&sbi->s_mb_discarded));
}
- kfree(sbi->s_locality_groups);
-
+ free_percpu(sbi->s_locality_groups);
ext4_mb_history_release(sb);
ext4_mb_destroy_per_dev_proc(sb);
return 0;
}
-static noinline_for_stack void
-ext4_mb_free_committed_blocks(struct super_block *sb)
+/*
+ * This function is called by the jbd2 layer once the commit has finished,
+ * so we know we can free the blocks that were released with that commit.
+ */
+static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
{
- struct ext4_sb_info *sbi = EXT4_SB(sb);
- int err;
- int i;
- int count = 0;
- int count2 = 0;
- struct ext4_free_metadata *md;
+ struct super_block *sb = journal->j_private;
struct ext4_buddy e4b;
+ struct ext4_group_info *db;
+ int err, count = 0, count2 = 0;
+ struct ext4_free_data *entry;
+ ext4_fsblk_t discard_block;
+ struct list_head *l, *ltmp;
- if (list_empty(&sbi->s_committed_transaction))
- return;
-
- /* there is committed blocks to be freed yet */
- do {
- /* get next array of blocks */
- md = NULL;
- spin_lock(&sbi->s_md_lock);
- if (!list_empty(&sbi->s_committed_transaction)) {
- md = list_entry(sbi->s_committed_transaction.next,
- struct ext4_free_metadata, list);
- list_del(&md->list);
- }
- spin_unlock(&sbi->s_md_lock);
-
- if (md == NULL)
- break;
+ list_for_each_safe(l, ltmp, &txn->t_private_list) {
+ entry = list_entry(l, struct ext4_free_data, list);
mb_debug("gonna free %u blocks in group %lu (0x%p):",
- md->num, md->group, md);
+ entry->count, entry->group, entry);
- err = ext4_mb_load_buddy(sb, md->group, &e4b);
+ err = ext4_mb_load_buddy(sb, entry->group, &e4b);
/* we expect to find existing buddy because it's pinned */
BUG_ON(err != 0);
+ db = e4b.bd_info;
/* there are blocks to put in buddy to make them really free */
- count += md->num;
+ count += entry->count;
count2++;
- ext4_lock_group(sb, md->group);
- for (i = 0; i < md->num; i++) {
- mb_debug(" %u", md->blocks[i]);
- mb_free_blocks(NULL, &e4b, md->blocks[i], 1);
+ ext4_lock_group(sb, entry->group);
+ /* Take it out of per group rb tree */
+ rb_erase(&entry->node, &(db->bb_free_root));
+ mb_free_blocks(NULL, &e4b, entry->start_blk, entry->count);
+
+ if (!db->bb_free_root.rb_node) {
+ /* No more items in the per group rb tree
+ * balance refcounts from ext4_mb_free_metadata()
+ */
+ page_cache_release(e4b.bd_buddy_page);
+ page_cache_release(e4b.bd_bitmap_page);
}
- mb_debug("\n");
- ext4_unlock_group(sb, md->group);
-
- /* balance refcounts from ext4_mb_free_metadata() */
- page_cache_release(e4b.bd_buddy_page);
- page_cache_release(e4b.bd_bitmap_page);
-
- kfree(md);
+ ext4_unlock_group(sb, entry->group);
+ discard_block = (ext4_fsblk_t) entry->group * EXT4_BLOCKS_PER_GROUP(sb)
+ + entry->start_blk
+ + le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
+ trace_mark(ext4_discard_blocks, "dev %s blk %llu count %u", sb->s_id,
+ (unsigned long long) discard_block, entry->count);
+ sb_issue_discard(sb, discard_block, entry->count);
+
+ kmem_cache_free(ext4_free_ext_cachep, entry);
ext4_mb_release_desc(&e4b);
-
- } while (md);
+ }
mb_debug("freed %u blocks in %u structures\n", count, count2);
}
@@ -2721,119 +2697,52 @@ ext4_mb_free_committed_blocks(struct super_block *sb)
#define EXT4_MB_STREAM_REQ "stream_req"
#define EXT4_MB_GROUP_PREALLOC "group_prealloc"
-
-
-#define MB_PROC_FOPS(name) \
-static int ext4_mb_##name##_proc_show(struct seq_file *m, void *v) \
-{ \
- struct ext4_sb_info *sbi = m->private; \
- \
- seq_printf(m, "%ld\n", sbi->s_mb_##name); \
- return 0; \
-} \
- \
-static int ext4_mb_##name##_proc_open(struct inode *inode, struct file *file)\
-{ \
- return single_open(file, ext4_mb_##name##_proc_show, PDE(inode)->data);\
-} \
- \
-static ssize_t ext4_mb_##name##_proc_write(struct file *file, \
- const char __user *buf, size_t cnt, loff_t *ppos) \
-{ \
- struct ext4_sb_info *sbi = PDE(file->f_path.dentry->d_inode)->data;\
- char str[32]; \
- long value; \
- if (cnt >= sizeof(str)) \
- return -EINVAL; \
- if (copy_from_user(str, buf, cnt)) \
- return -EFAULT; \
- value = simple_strtol(str, NULL, 0); \
- if (value <= 0) \
- return -ERANGE; \
- sbi->s_mb_##name = value; \
- return cnt; \
-} \
- \
-static const struct file_operations ext4_mb_##name##_proc_fops = { \
- .owner = THIS_MODULE, \
- .open = ext4_mb_##name##_proc_open, \
- .read = seq_read, \
- .llseek = seq_lseek, \
- .release = single_release, \
- .write = ext4_mb_##name##_proc_write, \
-};
-
-MB_PROC_FOPS(stats);
-MB_PROC_FOPS(max_to_scan);
-MB_PROC_FOPS(min_to_scan);
-MB_PROC_FOPS(order2_reqs);
-MB_PROC_FOPS(stream_request);
-MB_PROC_FOPS(group_prealloc);
-
-#define MB_PROC_HANDLER(name, var) \
-do { \
- proc = proc_create_data(name, mode, sbi->s_mb_proc, \
- &ext4_mb_##var##_proc_fops, sbi); \
- if (proc == NULL) { \
- printk(KERN_ERR "EXT4-fs: can't to create %s\n", name); \
- goto err_out; \
- } \
-} while (0)
-
static int ext4_mb_init_per_dev_proc(struct super_block *sb)
{
+#ifdef CONFIG_PROC_FS
mode_t mode = S_IFREG | S_IRUGO | S_IWUSR;
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct proc_dir_entry *proc;
- char devname[64];
- if (proc_root_ext4 == NULL) {
- sbi->s_mb_proc = NULL;
+ if (sbi->s_proc == NULL)
return -EINVAL;
- }
- bdevname(sb->s_bdev, devname);
- sbi->s_mb_proc = proc_mkdir(devname, proc_root_ext4);
-
- MB_PROC_HANDLER(EXT4_MB_STATS_NAME, stats);
- MB_PROC_HANDLER(EXT4_MB_MAX_TO_SCAN_NAME, max_to_scan);
- MB_PROC_HANDLER(EXT4_MB_MIN_TO_SCAN_NAME, min_to_scan);
- MB_PROC_HANDLER(EXT4_MB_ORDER2_REQ, order2_reqs);
- MB_PROC_HANDLER(EXT4_MB_STREAM_REQ, stream_request);
- MB_PROC_HANDLER(EXT4_MB_GROUP_PREALLOC, group_prealloc);
+ EXT4_PROC_HANDLER(EXT4_MB_STATS_NAME, mb_stats);
+ EXT4_PROC_HANDLER(EXT4_MB_MAX_TO_SCAN_NAME, mb_max_to_scan);
+ EXT4_PROC_HANDLER(EXT4_MB_MIN_TO_SCAN_NAME, mb_min_to_scan);
+ EXT4_PROC_HANDLER(EXT4_MB_ORDER2_REQ, mb_order2_reqs);
+ EXT4_PROC_HANDLER(EXT4_MB_STREAM_REQ, mb_stream_request);
+ EXT4_PROC_HANDLER(EXT4_MB_GROUP_PREALLOC, mb_group_prealloc);
return 0;
err_out:
- printk(KERN_ERR "EXT4-fs: Unable to create %s\n", devname);
- remove_proc_entry(EXT4_MB_GROUP_PREALLOC, sbi->s_mb_proc);
- remove_proc_entry(EXT4_MB_STREAM_REQ, sbi->s_mb_proc);
- remove_proc_entry(EXT4_MB_ORDER2_REQ, sbi->s_mb_proc);
- remove_proc_entry(EXT4_MB_MIN_TO_SCAN_NAME, sbi->s_mb_proc);
- remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_mb_proc);
- remove_proc_entry(EXT4_MB_STATS_NAME, sbi->s_mb_proc);
- remove_proc_entry(devname, proc_root_ext4);
- sbi->s_mb_proc = NULL;
-
+ remove_proc_entry(EXT4_MB_GROUP_PREALLOC, sbi->s_proc);
+ remove_proc_entry(EXT4_MB_STREAM_REQ, sbi->s_proc);
+ remove_proc_entry(EXT4_MB_ORDER2_REQ, sbi->s_proc);
+ remove_proc_entry(EXT4_MB_MIN_TO_SCAN_NAME, sbi->s_proc);
+ remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_proc);
+ remove_proc_entry(EXT4_MB_STATS_NAME, sbi->s_proc);
return -ENOMEM;
+#else
+ return 0;
+#endif
}
static int ext4_mb_destroy_per_dev_proc(struct super_block *sb)
{
+#ifdef CONFIG_PROC_FS
struct ext4_sb_info *sbi = EXT4_SB(sb);
- char devname[64];
- if (sbi->s_mb_proc == NULL)
+ if (sbi->s_proc == NULL)
return -EINVAL;
- bdevname(sb->s_bdev, devname);
- remove_proc_entry(EXT4_MB_GROUP_PREALLOC, sbi->s_mb_proc);
- remove_proc_entry(EXT4_MB_STREAM_REQ, sbi->s_mb_proc);
- remove_proc_entry(EXT4_MB_ORDER2_REQ, sbi->s_mb_proc);
- remove_proc_entry(EXT4_MB_MIN_TO_SCAN_NAME, sbi->s_mb_proc);
- remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_mb_proc);
- remove_proc_entry(EXT4_MB_STATS_NAME, sbi->s_mb_proc);
- remove_proc_entry(devname, proc_root_ext4);
-
+ remove_proc_entry(EXT4_MB_GROUP_PREALLOC, sbi->s_proc);
+ remove_proc_entry(EXT4_MB_STREAM_REQ, sbi->s_proc);
+ remove_proc_entry(EXT4_MB_ORDER2_REQ, sbi->s_proc);
+ remove_proc_entry(EXT4_MB_MIN_TO_SCAN_NAME, sbi->s_proc);
+ remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_proc);
+ remove_proc_entry(EXT4_MB_STATS_NAME, sbi->s_proc);
+#endif
return 0;
}
@@ -2854,11 +2763,16 @@ int __init init_ext4_mballoc(void)
kmem_cache_destroy(ext4_pspace_cachep);
return -ENOMEM;
}
-#ifdef CONFIG_PROC_FS
- proc_root_ext4 = proc_mkdir("fs/ext4", NULL);
- if (proc_root_ext4 == NULL)
- printk(KERN_ERR "EXT4-fs: Unable to create fs/ext4\n");
-#endif
+
+ ext4_free_ext_cachep =
+ kmem_cache_create("ext4_free_block_extents",
+ sizeof(struct ext4_free_data),
+ 0, SLAB_RECLAIM_ACCOUNT, NULL);
+ if (ext4_free_ext_cachep == NULL) {
+ kmem_cache_destroy(ext4_pspace_cachep);
+ kmem_cache_destroy(ext4_ac_cachep);
+ return -ENOMEM;
+ }
return 0;
}
@@ -2867,9 +2781,7 @@ void exit_ext4_mballoc(void)
/* XXX: synchronize_rcu(); */
kmem_cache_destroy(ext4_pspace_cachep);
kmem_cache_destroy(ext4_ac_cachep);
-#ifdef CONFIG_PROC_FS
- remove_proc_entry("fs/ext4", NULL);
-#endif
+ kmem_cache_destroy(ext4_free_ext_cachep);
}
@@ -2879,7 +2791,7 @@ void exit_ext4_mballoc(void)
*/
static noinline_for_stack int
ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
- handle_t *handle)
+ handle_t *handle, unsigned long reserv_blks)
{
struct buffer_head *bitmap_bh = NULL;
struct ext4_super_block *es;
@@ -2968,15 +2880,16 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
le16_add_cpu(&gdp->bg_free_blocks_count, -ac->ac_b_ex.fe_len);
gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp);
spin_unlock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group));
-
+ percpu_counter_sub(&sbi->s_freeblocks_counter, ac->ac_b_ex.fe_len);
/*
- * free blocks account has already be reduced/reserved
- * at write_begin() time for delayed allocation
- * do not double accounting
+ * Now reduce the dirty block count also. Should not go negative
*/
if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED))
- percpu_counter_sub(&sbi->s_freeblocks_counter,
- ac->ac_b_ex.fe_len);
+ /* release all the reserved blocks if non delalloc */
+ percpu_counter_sub(&sbi->s_dirtyblocks_counter, reserv_blks);
+ else
+ percpu_counter_sub(&sbi->s_dirtyblocks_counter,
+ ac->ac_b_ex.fe_len);
if (sbi->s_log_groups_per_flex) {
ext4_group_t flex_group = ext4_flex_group(sbi,
@@ -3884,7 +3797,7 @@ out:
*
* FIXME!! Make sure it is valid at all the call sites
*/
-void ext4_mb_discard_inode_preallocations(struct inode *inode)
+void ext4_discard_preallocations(struct inode *inode)
{
struct ext4_inode_info *ei = EXT4_I(inode);
struct super_block *sb = inode->i_sb;
@@ -3896,7 +3809,7 @@ void ext4_mb_discard_inode_preallocations(struct inode *inode)
struct ext4_buddy e4b;
int err;
- if (!test_opt(sb, MBALLOC) || !S_ISREG(inode->i_mode)) {
+ if (!S_ISREG(inode->i_mode)) {
/*BUG_ON(!list_empty(&ei->i_prealloc_list));*/
return;
}
@@ -4094,8 +4007,7 @@ static void ext4_mb_group_or_file(struct ext4_allocation_context *ac)
* per cpu locality group is to reduce the contention between block
* request from multiple CPUs.
*/
- ac->ac_lg = &sbi->s_locality_groups[get_cpu()];
- put_cpu();
+ ac->ac_lg = per_cpu_ptr(sbi->s_locality_groups, raw_smp_processor_id());
/* we're going to use group allocation */
ac->ac_flags |= EXT4_MB_HINT_GROUP_ALLOC;
@@ -4369,33 +4281,32 @@ static int ext4_mb_discard_preallocations(struct super_block *sb, int needed)
ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
struct ext4_allocation_request *ar, int *errp)
{
+ int freed;
struct ext4_allocation_context *ac = NULL;
struct ext4_sb_info *sbi;
struct super_block *sb;
ext4_fsblk_t block = 0;
- int freed;
- int inquota;
+ unsigned long inquota;
+ unsigned long reserv_blks = 0;
sb = ar->inode->i_sb;
sbi = EXT4_SB(sb);
- if (!test_opt(sb, MBALLOC)) {
- block = ext4_old_new_blocks(handle, ar->inode, ar->goal,
- &(ar->len), errp);
- return block;
- }
if (!EXT4_I(ar->inode)->i_delalloc_reserved_flag) {
/*
* With delalloc we already reserved the blocks
*/
- ar->len = ext4_has_free_blocks(sbi, ar->len);
- }
-
- if (ar->len == 0) {
- *errp = -ENOSPC;
- return 0;
+ while (ar->len && ext4_claim_free_blocks(sbi, ar->len)) {
+ /* let others to free the space */
+ yield();
+ ar->len = ar->len >> 1;
+ }
+ if (!ar->len) {
+ *errp = -ENOSPC;
+ return 0;
+ }
+ reserv_blks = ar->len;
}
-
while (ar->len && DQUOT_ALLOC_BLOCK(ar->inode, ar->len)) {
ar->flags |= EXT4_MB_HINT_NOPREALLOC;
ar->len--;
@@ -4416,8 +4327,6 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
goto out1;
}
- ext4_mb_poll_new_transaction(sb, handle);
-
*errp = ext4_mb_initialize_context(ac, ar);
if (*errp) {
ar->len = 0;
@@ -4441,7 +4350,7 @@ repeat:
}
if (likely(ac->ac_status == AC_STATUS_FOUND)) {
- *errp = ext4_mb_mark_diskspace_used(ac, handle);
+ *errp = ext4_mb_mark_diskspace_used(ac, handle, reserv_blks);
if (*errp == -EAGAIN) {
ac->ac_b_ex.fe_group = 0;
ac->ac_b_ex.fe_start = 0;
@@ -4476,35 +4385,20 @@ out1:
return block;
}
-static void ext4_mb_poll_new_transaction(struct super_block *sb,
- handle_t *handle)
-{
- struct ext4_sb_info *sbi = EXT4_SB(sb);
- if (sbi->s_last_transaction == handle->h_transaction->t_tid)
- return;
-
- /* new transaction! time to close last one and free blocks for
- * committed transaction. we know that only transaction can be
- * active, so previos transaction can be being logged and we
- * know that transaction before previous is known to be already
- * logged. this means that now we may free blocks freed in all
- * transactions before previous one. hope I'm clear enough ... */
-
- spin_lock(&sbi->s_md_lock);
- if (sbi->s_last_transaction != handle->h_transaction->t_tid) {
- mb_debug("new transaction %lu, old %lu\n",
- (unsigned long) handle->h_transaction->t_tid,
- (unsigned long) sbi->s_last_transaction);
- list_splice_init(&sbi->s_closed_transaction,
- &sbi->s_committed_transaction);
- list_splice_init(&sbi->s_active_transaction,
- &sbi->s_closed_transaction);
- sbi->s_last_transaction = handle->h_transaction->t_tid;
- }
- spin_unlock(&sbi->s_md_lock);
-
- ext4_mb_free_committed_blocks(sb);
+/*
+ * We can merge two free data extents only if the physical blocks
+ * are contiguous, AND the extents were freed by the same transaction,
+ * AND the blocks are associated with the same group.
+ */
+static int can_merge(struct ext4_free_data *entry1,
+ struct ext4_free_data *entry2)
+{
+ if ((entry1->t_tid == entry2->t_tid) &&
+ (entry1->group == entry2->group) &&
+ ((entry1->start_blk + entry1->count) == entry2->start_blk))
+ return 1;
+ return 0;
}
static noinline_for_stack int
@@ -4514,57 +4408,80 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
struct ext4_group_info *db = e4b->bd_info;
struct super_block *sb = e4b->bd_sb;
struct ext4_sb_info *sbi = EXT4_SB(sb);
- struct ext4_free_metadata *md;
- int i;
+ struct ext4_free_data *entry, *new_entry;
+ struct rb_node **n = &db->bb_free_root.rb_node, *node;
+ struct rb_node *parent = NULL, *new_node;
+
BUG_ON(e4b->bd_bitmap_page == NULL);
BUG_ON(e4b->bd_buddy_page == NULL);
+ new_entry = kmem_cache_alloc(ext4_free_ext_cachep, GFP_NOFS);
+ new_entry->start_blk = block;
+ new_entry->group = group;
+ new_entry->count = count;
+ new_entry->t_tid = handle->h_transaction->t_tid;
+ new_node = &new_entry->node;
+
ext4_lock_group(sb, group);
- for (i = 0; i < count; i++) {
- md = db->bb_md_cur;
- if (md && db->bb_tid != handle->h_transaction->t_tid) {
- db->bb_md_cur = NULL;
- md = NULL;
+ if (!*n) {
+ /* first free block exent. We need to
+ protect buddy cache from being freed,
+ * otherwise we'll refresh it from
+ * on-disk bitmap and lose not-yet-available
+ * blocks */
+ page_cache_get(e4b->bd_buddy_page);
+ page_cache_get(e4b->bd_bitmap_page);
+ }
+ while (*n) {
+ parent = *n;
+ entry = rb_entry(parent, struct ext4_free_data, node);
+ if (block < entry->start_blk)
+ n = &(*n)->rb_left;
+ else if (block >= (entry->start_blk + entry->count))
+ n = &(*n)->rb_right;
+ else {
+ ext4_error(sb, __func__,
+ "Double free of blocks %d (%d %d)\n",
+ block, entry->start_blk, entry->count);
+ return 0;
}
+ }
- if (md == NULL) {
- ext4_unlock_group(sb, group);
- md = kmalloc(sizeof(*md), GFP_NOFS);
- if (md == NULL)
- return -ENOMEM;
- md->num = 0;
- md->group = group;
-
- ext4_lock_group(sb, group);
- if (db->bb_md_cur == NULL) {
- spin_lock(&sbi->s_md_lock);
- list_add(&md->list, &sbi->s_active_transaction);
- spin_unlock(&sbi->s_md_lock);
- /* protect buddy cache from being freed,
- * otherwise we'll refresh it from
- * on-disk bitmap and lose not-yet-available
- * blocks */
- page_cache_get(e4b->bd_buddy_page);
- page_cache_get(e4b->bd_bitmap_page);
- db->bb_md_cur = md;
- db->bb_tid = handle->h_transaction->t_tid;
- mb_debug("new md 0x%p for group %lu\n",
- md, md->group);
- } else {
- kfree(md);
- md = db->bb_md_cur;
- }
+ rb_link_node(new_node, parent, n);
+ rb_insert_color(new_node, &db->bb_free_root);
+
+ /* Now try to see the extent can be merged to left and right */
+ node = rb_prev(new_node);
+ if (node) {
+ entry = rb_entry(node, struct ext4_free_data, node);
+ if (can_merge(entry, new_entry)) {
+ new_entry->start_blk = entry->start_blk;
+ new_entry->count += entry->count;
+ rb_erase(node, &(db->bb_free_root));
+ spin_lock(&sbi->s_md_lock);
+ list_del(&entry->list);
+ spin_unlock(&sbi->s_md_lock);
+ kmem_cache_free(ext4_free_ext_cachep, entry);
}
+ }
- BUG_ON(md->num >= EXT4_BB_MAX_BLOCKS);
- md->blocks[md->num] = block + i;
- md->num++;
- if (md->num == EXT4_BB_MAX_BLOCKS) {
- /* no more space, put full container on a sb's list */
- db->bb_md_cur = NULL;
+ node = rb_next(new_node);
+ if (node) {
+ entry = rb_entry(node, struct ext4_free_data, node);
+ if (can_merge(new_entry, entry)) {
+ new_entry->count += entry->count;
+ rb_erase(node, &(db->bb_free_root));
+ spin_lock(&sbi->s_md_lock);
+ list_del(&entry->list);
+ spin_unlock(&sbi->s_md_lock);
+ kmem_cache_free(ext4_free_ext_cachep, entry);
}
}
+ /* Add the extent to transaction's private list */
+ spin_lock(&sbi->s_md_lock);
+ list_add(&new_entry->list, &handle->h_transaction->t_private_list);
+ spin_unlock(&sbi->s_md_lock);
ext4_unlock_group(sb, group);
return 0;
}
@@ -4592,8 +4509,6 @@ void ext4_mb_free_blocks(handle_t *handle, struct inode *inode,
*freed = 0;
- ext4_mb_poll_new_transaction(sb, handle);
-
sbi = EXT4_SB(sb);
es = EXT4_SB(sb)->s_es;
if (block < le32_to_cpu(es->s_first_data_block) ||
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
index c7c9906c2a7..b5dff1fff1e 100644
--- a/fs/ext4/mballoc.h
+++ b/fs/ext4/mballoc.h
@@ -18,6 +18,8 @@
#include <linux/pagemap.h>
#include <linux/seq_file.h>
#include <linux/version.h>
+#include <linux/blkdev.h>
+#include <linux/marker.h>
#include "ext4_jbd2.h"
#include "ext4.h"
#include "group.h"
@@ -98,23 +100,29 @@
static struct kmem_cache *ext4_pspace_cachep;
static struct kmem_cache *ext4_ac_cachep;
+static struct kmem_cache *ext4_free_ext_cachep;
-#ifdef EXT4_BB_MAX_BLOCKS
-#undef EXT4_BB_MAX_BLOCKS
-#endif
-#define EXT4_BB_MAX_BLOCKS 30
+struct ext4_free_data {
+ /* this links the free block information from group_info */
+ struct rb_node node;
-struct ext4_free_metadata {
- ext4_group_t group;
- unsigned short num;
- ext4_grpblk_t blocks[EXT4_BB_MAX_BLOCKS];
+ /* this links the free block information from ext4_sb_info */
struct list_head list;
+
+ /* group which free block extent belongs */
+ ext4_group_t group;
+
+ /* free block extent */
+ ext4_grpblk_t start_blk;
+ ext4_grpblk_t count;
+
+ /* transaction which freed this extent */
+ tid_t t_tid;
};
struct ext4_group_info {
unsigned long bb_state;
- unsigned long bb_tid;
- struct ext4_free_metadata *bb_md_cur;
+ struct rb_root bb_free_root;
unsigned short bb_first_free;
unsigned short bb_free;
unsigned short bb_fragments;
@@ -257,13 +265,10 @@ static void ext4_mb_store_history(struct ext4_allocation_context *ac);
#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1)
-static struct proc_dir_entry *proc_root_ext4;
struct buffer_head *read_block_bitmap(struct super_block *, ext4_group_t);
static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
ext4_group_t group);
-static void ext4_mb_poll_new_transaction(struct super_block *, handle_t *);
-static void ext4_mb_free_committed_blocks(struct super_block *);
static void ext4_mb_return_to_preallocation(struct inode *inode,
struct ext4_buddy *e4b, sector_t block,
int count);
@@ -271,6 +276,7 @@ static void ext4_mb_put_pa(struct ext4_allocation_context *,
struct super_block *, struct ext4_prealloc_space *pa);
static int ext4_mb_init_per_dev_proc(struct super_block *sb);
static int ext4_mb_destroy_per_dev_proc(struct super_block *sb);
+static void release_blocks_on_commit(journal_t *journal, transaction_t *txn);
static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group)
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index 46fc0b5b12b..f2a9cf498ec 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -447,8 +447,7 @@ static int free_ext_block(handle_t *handle, struct inode *inode)
}
-int ext4_ext_migrate(struct inode *inode, struct file *filp,
- unsigned int cmd, unsigned long arg)
+int ext4_ext_migrate(struct inode *inode)
{
handle_t *handle;
int retval = 0, i;
@@ -516,12 +515,6 @@ int ext4_ext_migrate(struct inode *inode, struct file *filp,
* when we add extents we extent the journal
*/
/*
- * inode_mutex prevent write and truncate on the file. Read still goes
- * through. We take i_data_sem in ext4_ext_swap_inode_data before we
- * switch the inode format to prevent read.
- */
- mutex_lock(&(inode->i_mutex));
- /*
* Even though we take i_mutex we can still cause block allocation
* via mmap write to holes. If we have allocated new blocks we fail
* migrate. New block allocation will clear EXT4_EXT_MIGRATE flag.
@@ -623,7 +616,6 @@ err_out:
tmp_inode->i_nlink = 0;
ext4_journal_stop(handle);
- mutex_unlock(&(inode->i_mutex));
if (tmp_inode)
iput(tmp_inode);
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 387ad98350c..92db9e94514 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -151,34 +151,36 @@ struct dx_map_entry
static inline ext4_lblk_t dx_get_block(struct dx_entry *entry);
static void dx_set_block(struct dx_entry *entry, ext4_lblk_t value);
-static inline unsigned dx_get_hash (struct dx_entry *entry);
-static void dx_set_hash (struct dx_entry *entry, unsigned value);
-static unsigned dx_get_count (struct dx_entry *entries);
-static unsigned dx_get_limit (struct dx_entry *entries);
-static void dx_set_count (struct dx_entry *entries, unsigned value);
-static void dx_set_limit (struct dx_entry *entries, unsigned value);
-static unsigned dx_root_limit (struct inode *dir, unsigned infosize);
-static unsigned dx_node_limit (struct inode *dir);
-static struct dx_frame *dx_probe(struct dentry *dentry,
+static inline unsigned dx_get_hash(struct dx_entry *entry);
+static void dx_set_hash(struct dx_entry *entry, unsigned value);
+static unsigned dx_get_count(struct dx_entry *entries);
+static unsigned dx_get_limit(struct dx_entry *entries);
+static void dx_set_count(struct dx_entry *entries, unsigned value);
+static void dx_set_limit(struct dx_entry *entries, unsigned value);
+static unsigned dx_root_limit(struct inode *dir, unsigned infosize);
+static unsigned dx_node_limit(struct inode *dir);
+static struct dx_frame *dx_probe(const struct qstr *d_name,
struct inode *dir,
struct dx_hash_info *hinfo,
struct dx_frame *frame,
int *err);
-static void dx_release (struct dx_frame *frames);
-static int dx_make_map (struct ext4_dir_entry_2 *de, int size,
- struct dx_hash_info *hinfo, struct dx_map_entry map[]);
+static void dx_release(struct dx_frame *frames);
+static int dx_make_map(struct ext4_dir_entry_2 *de, int size,
+ struct dx_hash_info *hinfo, struct dx_map_entry map[]);
static void dx_sort_map(struct dx_map_entry *map, unsigned count);
-static struct ext4_dir_entry_2 *dx_move_dirents (char *from, char *to,
+static struct ext4_dir_entry_2 *dx_move_dirents(char *from, char *to,
struct dx_map_entry *offsets, int count);
-static struct ext4_dir_entry_2* dx_pack_dirents (char *base, int size);
+static struct ext4_dir_entry_2* dx_pack_dirents(char *base, int size);
static void dx_insert_block(struct dx_frame *frame,
u32 hash, ext4_lblk_t block);
static int ext4_htree_next_block(struct inode *dir, __u32 hash,
struct dx_frame *frame,
struct dx_frame *frames,
__u32 *start_hash);
-static struct buffer_head * ext4_dx_find_entry(struct dentry *dentry,
- struct ext4_dir_entry_2 **res_dir, int *err);
+static struct buffer_head * ext4_dx_find_entry(struct inode *dir,
+ const struct qstr *d_name,
+ struct ext4_dir_entry_2 **res_dir,
+ int *err);
static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
struct inode *inode);
@@ -207,44 +209,44 @@ static inline void dx_set_block(struct dx_entry *entry, ext4_lblk_t value)
entry->block = cpu_to_le32(value);
}
-static inline unsigned dx_get_hash (struct dx_entry *entry)
+static inline unsigned dx_get_hash(struct dx_entry *entry)
{
return le32_to_cpu(entry->hash);
}
-static inline void dx_set_hash (struct dx_entry *entry, unsigned value)
+static inline void dx_set_hash(struct dx_entry *entry, unsigned value)
{
entry->hash = cpu_to_le32(value);
}
-static inline unsigned dx_get_count (struct dx_entry *entries)
+static inline unsigned dx_get_count(struct dx_entry *entries)
{
return le16_to_cpu(((struct dx_countlimit *) entries)->count);
}
-static inline unsigned dx_get_limit (struct dx_entry *entries)
+static inline unsigned dx_get_limit(struct dx_entry *entries)
{
return le16_to_cpu(((struct dx_countlimit *) entries)->limit);
}
-static inline void dx_set_count (struct dx_entry *entries, unsigned value)
+static inline void dx_set_count(struct dx_entry *entries, unsigned value)
{
((struct dx_countlimit *) entries)->count = cpu_to_le16(value);
}
-static inline void dx_set_limit (struct dx_entry *entries, unsigned value)
+static inline void dx_set_limit(struct dx_entry *entries, unsigned value)
{
((struct dx_countlimit *) entries)->limit = cpu_to_le16(value);
}
-static inline unsigned dx_root_limit (struct inode *dir, unsigned infosize)
+static inline unsigned dx_root_limit(struct inode *dir, unsigned infosize)
{
unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(1) -
EXT4_DIR_REC_LEN(2) - infosize;
return entry_space / sizeof(struct dx_entry);
}
-static inline unsigned dx_node_limit (struct inode *dir)
+static inline unsigned dx_node_limit(struct inode *dir)
{
unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(0);
return entry_space / sizeof(struct dx_entry);
@@ -254,12 +256,12 @@ static inline unsigned dx_node_limit (struct inode *dir)
* Debug
*/
#ifdef DX_DEBUG
-static void dx_show_index (char * label, struct dx_entry *entries)
+static void dx_show_index(char * label, struct dx_entry *entries)
{
int i, n = dx_get_count (entries);
- printk("%s index ", label);
+ printk(KERN_DEBUG "%s index ", label);
for (i = 0; i < n; i++) {
- printk("%x->%lu ", i? dx_get_hash(entries + i) :
+ printk("%x->%lu ", i ? dx_get_hash(entries + i) :
0, (unsigned long)dx_get_block(entries + i));
}
printk("\n");
@@ -306,7 +308,7 @@ struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir,
struct dx_entry *entries, int levels)
{
unsigned blocksize = dir->i_sb->s_blocksize;
- unsigned count = dx_get_count (entries), names = 0, space = 0, i;
+ unsigned count = dx_get_count(entries), names = 0, space = 0, i;
unsigned bcount = 0;
struct buffer_head *bh;
int err;
@@ -325,11 +327,12 @@ struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir,
names += stats.names;
space += stats.space;
bcount += stats.bcount;
- brelse (bh);
+ brelse(bh);
}
if (bcount)
- printk("%snames %u, fullness %u (%u%%)\n", levels?"":" ",
- names, space/bcount,(space/bcount)*100/blocksize);
+ printk(KERN_DEBUG "%snames %u, fullness %u (%u%%)\n",
+ levels ? "" : " ", names, space/bcount,
+ (space/bcount)*100/blocksize);
return (struct stats) { names, space, bcount};
}
#endif /* DX_DEBUG */
@@ -344,7 +347,7 @@ struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir,
* back to userspace.
*/
static struct dx_frame *
-dx_probe(struct dentry *dentry, struct inode *dir,
+dx_probe(const struct qstr *d_name, struct inode *dir,
struct dx_hash_info *hinfo, struct dx_frame *frame_in, int *err)
{
unsigned count, indirect;
@@ -355,8 +358,6 @@ dx_probe(struct dentry *dentry, struct inode *dir,
u32 hash;
frame->bh = NULL;
- if (dentry)
- dir = dentry->d_parent->d_inode;
if (!(bh = ext4_bread (NULL,dir, 0, 0, err)))
goto fail;
root = (struct dx_root *) bh->b_data;
@@ -372,8 +373,8 @@ dx_probe(struct dentry *dentry, struct inode *dir,
}
hinfo->hash_version = root->info.hash_version;
hinfo->seed = EXT4_SB(dir->i_sb)->s_hash_seed;
- if (dentry)
- ext4fs_dirhash(dentry->d_name.name, dentry->d_name.len, hinfo);
+ if (d_name)
+ ext4fs_dirhash(d_name->name, d_name->len, hinfo);
hash = hinfo->hash;
if (root->info.unused_flags & 1) {
@@ -406,7 +407,7 @@ dx_probe(struct dentry *dentry, struct inode *dir,
goto fail;
}
- dxtrace (printk("Look up %x", hash));
+ dxtrace(printk("Look up %x", hash));
while (1)
{
count = dx_get_count(entries);
@@ -555,7 +556,7 @@ static int ext4_htree_next_block(struct inode *dir, __u32 hash,
0, &err)))
return err; /* Failure */
p++;
- brelse (p->bh);
+ brelse(p->bh);
p->bh = bh;
p->at = p->entries = ((struct dx_node *) bh->b_data)->entries;
}
@@ -593,7 +594,7 @@ static int htree_dirblock_to_tree(struct file *dir_file,
/* On error, skip the f_pos to the next block. */
dir_file->f_pos = (dir_file->f_pos |
(dir->i_sb->s_blocksize - 1)) + 1;
- brelse (bh);
+ brelse(bh);
return count;
}
ext4fs_dirhash(de->name, de->name_len, hinfo);
@@ -635,8 +636,8 @@ int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
int ret, err;
__u32 hashval;
- dxtrace(printk("In htree_fill_tree, start hash: %x:%x\n", start_hash,
- start_minor_hash));
+ dxtrace(printk(KERN_DEBUG "In htree_fill_tree, start hash: %x:%x\n",
+ start_hash, start_minor_hash));
dir = dir_file->f_path.dentry->d_inode;
if (!(EXT4_I(dir)->i_flags & EXT4_INDEX_FL)) {
hinfo.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version;
@@ -648,7 +649,7 @@ int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
}
hinfo.hash = start_hash;
hinfo.minor_hash = 0;
- frame = dx_probe(NULL, dir_file->f_path.dentry->d_inode, &hinfo, frames, &err);
+ frame = dx_probe(NULL, dir, &hinfo, frames, &err);
if (!frame)
return err;
@@ -694,8 +695,8 @@ int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
break;
}
dx_release(frames);
- dxtrace(printk("Fill tree: returned %d entries, next hash: %x\n",
- count, *next_hash));
+ dxtrace(printk(KERN_DEBUG "Fill tree: returned %d entries, "
+ "next hash: %x\n", count, *next_hash));
return count;
errout:
dx_release(frames);
@@ -802,17 +803,17 @@ static inline int ext4_match (int len, const char * const name,
/*
* Returns 0 if not found, -1 on failure, and 1 on success
*/
-static inline int search_dirblock(struct buffer_head * bh,
+static inline int search_dirblock(struct buffer_head *bh,
struct inode *dir,
- struct dentry *dentry,
+ const struct qstr *d_name,
unsigned long offset,
struct ext4_dir_entry_2 ** res_dir)
{
struct ext4_dir_entry_2 * de;
char * dlimit;
int de_len;
- const char *name = dentry->d_name.name;
- int namelen = dentry->d_name.len;
+ const char *name = d_name->name;
+ int namelen = d_name->len;
de = (struct ext4_dir_entry_2 *) bh->b_data;
dlimit = bh->b_data + dir->i_sb->s_blocksize;
@@ -851,12 +852,13 @@ static inline int search_dirblock(struct buffer_head * bh,
* The returned buffer_head has ->b_count elevated. The caller is expected
* to brelse() it when appropriate.
*/
-static struct buffer_head * ext4_find_entry (struct dentry *dentry,
+static struct buffer_head * ext4_find_entry (struct inode *dir,
+ const struct qstr *d_name,
struct ext4_dir_entry_2 ** res_dir)
{
- struct super_block * sb;
- struct buffer_head * bh_use[NAMEI_RA_SIZE];
- struct buffer_head * bh, *ret = NULL;
+ struct super_block *sb;
+ struct buffer_head *bh_use[NAMEI_RA_SIZE];
+ struct buffer_head *bh, *ret = NULL;
ext4_lblk_t start, block, b;
int ra_max = 0; /* Number of bh's in the readahead
buffer, bh_use[] */
@@ -865,16 +867,15 @@ static struct buffer_head * ext4_find_entry (struct dentry *dentry,
int num = 0;
ext4_lblk_t nblocks;
int i, err;
- struct inode *dir = dentry->d_parent->d_inode;
int namelen;
*res_dir = NULL;
sb = dir->i_sb;
- namelen = dentry->d_name.len;
+ namelen = d_name->len;
if (namelen > EXT4_NAME_LEN)
return NULL;
if (is_dx(dir)) {
- bh = ext4_dx_find_entry(dentry, res_dir, &err);
+ bh = ext4_dx_find_entry(dir, d_name, res_dir, &err);
/*
* On success, or if the error was file not found,
* return. Otherwise, fall back to doing a search the
@@ -882,7 +883,8 @@ static struct buffer_head * ext4_find_entry (struct dentry *dentry,
*/
if (bh || (err != ERR_BAD_DX_DIR))
return bh;
- dxtrace(printk("ext4_find_entry: dx failed, falling back\n"));
+ dxtrace(printk(KERN_DEBUG "ext4_find_entry: dx failed, "
+ "falling back\n"));
}
nblocks = dir->i_size >> EXT4_BLOCK_SIZE_BITS(sb);
start = EXT4_I(dir)->i_dir_start_lookup;
@@ -926,7 +928,7 @@ restart:
brelse(bh);
goto next;
}
- i = search_dirblock(bh, dir, dentry,
+ i = search_dirblock(bh, dir, d_name,
block << EXT4_BLOCK_SIZE_BITS(sb), res_dir);
if (i == 1) {
EXT4_I(dir)->i_dir_start_lookup = block;
@@ -956,11 +958,11 @@ restart:
cleanup_and_exit:
/* Clean up the read-ahead blocks */
for (; ra_ptr < ra_max; ra_ptr++)
- brelse (bh_use[ra_ptr]);
+ brelse(bh_use[ra_ptr]);
return ret;
}
-static struct buffer_head * ext4_dx_find_entry(struct dentry *dentry,
+static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct qstr *d_name,
struct ext4_dir_entry_2 **res_dir, int *err)
{
struct super_block * sb;
@@ -971,14 +973,13 @@ static struct buffer_head * ext4_dx_find_entry(struct dentry *dentry,
struct buffer_head *bh;
ext4_lblk_t block;
int retval;
- int namelen = dentry->d_name.len;
- const u8 *name = dentry->d_name.name;
- struct inode *dir = dentry->d_parent->d_inode;
+ int namelen = d_name->len;
+ const u8 *name = d_name->name;
sb = dir->i_sb;
/* NFS may look up ".." - look at dx_root directory block */
if (namelen > 2 || name[0] != '.'||(name[1] != '.' && name[1] != '\0')){
- if (!(frame = dx_probe(dentry, NULL, &hinfo, frames, err)))
+ if (!(frame = dx_probe(d_name, dir, &hinfo, frames, err)))
return NULL;
} else {
frame = frames;
@@ -1010,7 +1011,7 @@ static struct buffer_head * ext4_dx_find_entry(struct dentry *dentry,
return bh;
}
}
- brelse (bh);
+ brelse(bh);
/* Check to see if we should continue to search */
retval = ext4_htree_next_block(dir, hash, frame,
frames, NULL);
@@ -1025,25 +1026,25 @@ static struct buffer_head * ext4_dx_find_entry(struct dentry *dentry,
*err = -ENOENT;
errout:
- dxtrace(printk("%s not found\n", name));
+ dxtrace(printk(KERN_DEBUG "%s not found\n", name));
dx_release (frames);
return NULL;
}
-static struct dentry *ext4_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd)
+static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
{
- struct inode * inode;
- struct ext4_dir_entry_2 * de;
- struct buffer_head * bh;
+ struct inode *inode;
+ struct ext4_dir_entry_2 *de;
+ struct buffer_head *bh;
if (dentry->d_name.len > EXT4_NAME_LEN)
return ERR_PTR(-ENAMETOOLONG);
- bh = ext4_find_entry(dentry, &de);
+ bh = ext4_find_entry(dir, &dentry->d_name, &de);
inode = NULL;
if (bh) {
unsigned long ino = le32_to_cpu(de->inode);
- brelse (bh);
+ brelse(bh);
if (!ext4_valid_inum(dir->i_sb, ino)) {
ext4_error(dir->i_sb, "ext4_lookup",
"bad inode number: %lu", ino);
@@ -1062,15 +1063,14 @@ struct dentry *ext4_get_parent(struct dentry *child)
unsigned long ino;
struct dentry *parent;
struct inode *inode;
- struct dentry dotdot;
+ static const struct qstr dotdot = {
+ .name = "..",
+ .len = 2,
+ };
struct ext4_dir_entry_2 * de;
struct buffer_head *bh;
- dotdot.d_name.name = "..";
- dotdot.d_name.len = 2;
- dotdot.d_parent = child; /* confusing, isn't it! */
-
- bh = ext4_find_entry(&dotdot, &de);
+ bh = ext4_find_entry(child->d_inode, &dotdot, &de);
inode = NULL;
if (!bh)
return ERR_PTR(-ENOENT);
@@ -1201,10 +1201,10 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
/* create map in the end of data2 block */
map = (struct dx_map_entry *) (data2 + blocksize);
- count = dx_make_map ((struct ext4_dir_entry_2 *) data1,
+ count = dx_make_map((struct ext4_dir_entry_2 *) data1,
blocksize, hinfo, map);
map -= count;
- dx_sort_map (map, count);
+ dx_sort_map(map, count);
/* Split the existing block in the middle, size-wise */
size = 0;
move = 0;
@@ -1225,7 +1225,7 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
/* Fancy dance to stay within two buffers */
de2 = dx_move_dirents(data1, data2, map + split, count - split);
- de = dx_pack_dirents(data1,blocksize);
+ de = dx_pack_dirents(data1, blocksize);
de->rec_len = ext4_rec_len_to_disk(data1 + blocksize - (char *) de);
de2->rec_len = ext4_rec_len_to_disk(data2 + blocksize - (char *) de2);
dxtrace(dx_show_leaf (hinfo, (struct ext4_dir_entry_2 *) data1, blocksize, 1));
@@ -1237,15 +1237,15 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
swap(*bh, bh2);
de = de2;
}
- dx_insert_block (frame, hash2 + continued, newblock);
- err = ext4_journal_dirty_metadata (handle, bh2);
+ dx_insert_block(frame, hash2 + continued, newblock);
+ err = ext4_journal_dirty_metadata(handle, bh2);
if (err)
goto journal_error;
- err = ext4_journal_dirty_metadata (handle, frame->bh);
+ err = ext4_journal_dirty_metadata(handle, frame->bh);
if (err)
goto journal_error;
- brelse (bh2);
- dxtrace(dx_show_index ("frame", frame->entries));
+ brelse(bh2);
+ dxtrace(dx_show_index("frame", frame->entries));
return de;
journal_error:
@@ -1271,7 +1271,7 @@ errout:
*/
static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
struct inode *inode, struct ext4_dir_entry_2 *de,
- struct buffer_head * bh)
+ struct buffer_head *bh)
{
struct inode *dir = dentry->d_parent->d_inode;
const char *name = dentry->d_name.name;
@@ -1288,11 +1288,11 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
while ((char *) de <= top) {
if (!ext4_check_dir_entry("ext4_add_entry", dir, de,
bh, offset)) {
- brelse (bh);
+ brelse(bh);
return -EIO;
}
- if (ext4_match (namelen, name, de)) {
- brelse (bh);
+ if (ext4_match(namelen, name, de)) {
+ brelse(bh);
return -EEXIST;
}
nlen = EXT4_DIR_REC_LEN(de->name_len);
@@ -1329,7 +1329,7 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
} else
de->inode = 0;
de->name_len = namelen;
- memcpy (de->name, name, namelen);
+ memcpy(de->name, name, namelen);
/*
* XXX shouldn't update any times until successful
* completion of syscall, but too many callers depend
@@ -1377,7 +1377,7 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
struct fake_dirent *fde;
blocksize = dir->i_sb->s_blocksize;
- dxtrace(printk("Creating index\n"));
+ dxtrace(printk(KERN_DEBUG "Creating index\n"));
retval = ext4_journal_get_write_access(handle, bh);
if (retval) {
ext4_std_error(dir->i_sb, retval);
@@ -1386,7 +1386,7 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
}
root = (struct dx_root *) bh->b_data;
- bh2 = ext4_append (handle, dir, &block, &retval);
+ bh2 = ext4_append(handle, dir, &block, &retval);
if (!(bh2)) {
brelse(bh);
return retval;
@@ -1412,9 +1412,9 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
root->info.info_length = sizeof(root->info);
root->info.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version;
entries = root->entries;
- dx_set_block (entries, 1);
- dx_set_count (entries, 1);
- dx_set_limit (entries, dx_root_limit(dir, sizeof(root->info)));
+ dx_set_block(entries, 1);
+ dx_set_count(entries, 1);
+ dx_set_limit(entries, dx_root_limit(dir, sizeof(root->info)));
/* Initialize as for dx_probe */
hinfo.hash_version = root->info.hash_version;
@@ -1443,14 +1443,14 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
* may not sleep between calling this and putting something into
* the entry, as someone else might have used it while you slept.
*/
-static int ext4_add_entry (handle_t *handle, struct dentry *dentry,
- struct inode *inode)
+static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
+ struct inode *inode)
{
struct inode *dir = dentry->d_parent->d_inode;
unsigned long offset;
- struct buffer_head * bh;
+ struct buffer_head *bh;
struct ext4_dir_entry_2 *de;
- struct super_block * sb;
+ struct super_block *sb;
int retval;
int dx_fallback=0;
unsigned blocksize;
@@ -1500,13 +1500,13 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
struct dx_frame frames[2], *frame;
struct dx_entry *entries, *at;
struct dx_hash_info hinfo;
- struct buffer_head * bh;
+ struct buffer_head *bh;
struct inode *dir = dentry->d_parent->d_inode;
- struct super_block * sb = dir->i_sb;
+ struct super_block *sb = dir->i_sb;
struct ext4_dir_entry_2 *de;
int err;
- frame = dx_probe(dentry, NULL, &hinfo, frames, &err);
+ frame = dx_probe(&dentry->d_name, dir, &hinfo, frames, &err);
if (!frame)
return err;
entries = frame->entries;
@@ -1527,7 +1527,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
}
/* Block full, should compress but for now just split */
- dxtrace(printk("using %u of %u node entries\n",
+ dxtrace(printk(KERN_DEBUG "using %u of %u node entries\n",
dx_get_count(entries), dx_get_limit(entries)));
/* Need to split index? */
if (dx_get_count(entries) == dx_get_limit(entries)) {
@@ -1559,7 +1559,8 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
if (levels) {
unsigned icount1 = icount/2, icount2 = icount - icount1;
unsigned hash2 = dx_get_hash(entries + icount1);
- dxtrace(printk("Split index %i/%i\n", icount1, icount2));
+ dxtrace(printk(KERN_DEBUG "Split index %i/%i\n",
+ icount1, icount2));
BUFFER_TRACE(frame->bh, "get_write_access"); /* index root */
err = ext4_journal_get_write_access(handle,
@@ -1567,11 +1568,11 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
if (err)
goto journal_error;
- memcpy ((char *) entries2, (char *) (entries + icount1),
- icount2 * sizeof(struct dx_entry));
- dx_set_count (entries, icount1);
- dx_set_count (entries2, icount2);
- dx_set_limit (entries2, dx_node_limit(dir));
+ memcpy((char *) entries2, (char *) (entries + icount1),
+ icount2 * sizeof(struct dx_entry));
+ dx_set_count(entries, icount1);
+ dx_set_count(entries2, icount2);
+ dx_set_limit(entries2, dx_node_limit(dir));
/* Which index block gets the new entry? */
if (at - entries >= icount1) {
@@ -1579,16 +1580,17 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
frame->entries = entries = entries2;
swap(frame->bh, bh2);
}
- dx_insert_block (frames + 0, hash2, newblock);
- dxtrace(dx_show_index ("node", frames[1].entries));
- dxtrace(dx_show_index ("node",
+ dx_insert_block(frames + 0, hash2, newblock);
+ dxtrace(dx_show_index("node", frames[1].entries));
+ dxtrace(dx_show_index("node",
((struct dx_node *) bh2->b_data)->entries));
err = ext4_journal_dirty_metadata(handle, bh2);
if (err)
goto journal_error;
brelse (bh2);
} else {
- dxtrace(printk("Creating second level index...\n"));
+ dxtrace(printk(KERN_DEBUG
+ "Creating second level index...\n"));
memcpy((char *) entries2, (char *) entries,
icount * sizeof(struct dx_entry));
dx_set_limit(entries2, dx_node_limit(dir));
@@ -1630,12 +1632,12 @@ cleanup:
* ext4_delete_entry deletes a directory entry by merging it with the
* previous entry
*/
-static int ext4_delete_entry (handle_t *handle,
- struct inode * dir,
- struct ext4_dir_entry_2 * de_del,
- struct buffer_head * bh)
+static int ext4_delete_entry(handle_t *handle,
+ struct inode *dir,
+ struct ext4_dir_entry_2 *de_del,
+ struct buffer_head *bh)
{
- struct ext4_dir_entry_2 * de, * pde;
+ struct ext4_dir_entry_2 *de, *pde;
int i;
i = 0;
@@ -1716,11 +1718,11 @@ static int ext4_add_nondir(handle_t *handle,
* If the create succeeds, we fill in the inode information
* with d_instantiate().
*/
-static int ext4_create (struct inode * dir, struct dentry * dentry, int mode,
- struct nameidata *nd)
+static int ext4_create(struct inode *dir, struct dentry *dentry, int mode,
+ struct nameidata *nd)
{
handle_t *handle;
- struct inode * inode;
+ struct inode *inode;
int err, retries = 0;
retry:
@@ -1747,8 +1749,8 @@ retry:
return err;
}
-static int ext4_mknod (struct inode * dir, struct dentry *dentry,
- int mode, dev_t rdev)
+static int ext4_mknod(struct inode *dir, struct dentry *dentry,
+ int mode, dev_t rdev)
{
handle_t *handle;
struct inode *inode;
@@ -1767,11 +1769,11 @@ retry:
if (IS_DIRSYNC(dir))
handle->h_sync = 1;
- inode = ext4_new_inode (handle, dir, mode);
+ inode = ext4_new_inode(handle, dir, mode);
err = PTR_ERR(inode);
if (!IS_ERR(inode)) {
init_special_inode(inode, inode->i_mode, rdev);
-#ifdef CONFIG_EXT4DEV_FS_XATTR
+#ifdef CONFIG_EXT4_FS_XATTR
inode->i_op = &ext4_special_inode_operations;
#endif
err = ext4_add_nondir(handle, dentry, inode);
@@ -1782,12 +1784,12 @@ retry:
return err;
}
-static int ext4_mkdir(struct inode * dir, struct dentry * dentry, int mode)
+static int ext4_mkdir(struct inode *dir, struct dentry *dentry, int mode)
{
handle_t *handle;
- struct inode * inode;
- struct buffer_head * dir_block;
- struct ext4_dir_entry_2 * de;
+ struct inode *inode;
+ struct buffer_head *dir_block;
+ struct ext4_dir_entry_2 *de;
int err, retries = 0;
if (EXT4_DIR_LINK_MAX(dir))
@@ -1803,7 +1805,7 @@ retry:
if (IS_DIRSYNC(dir))
handle->h_sync = 1;
- inode = ext4_new_inode (handle, dir, S_IFDIR | mode);
+ inode = ext4_new_inode(handle, dir, S_IFDIR | mode);
err = PTR_ERR(inode);
if (IS_ERR(inode))
goto out_stop;
@@ -1811,7 +1813,7 @@ retry:
inode->i_op = &ext4_dir_inode_operations;
inode->i_fop = &ext4_dir_operations;
inode->i_size = EXT4_I(inode)->i_disksize = inode->i_sb->s_blocksize;
- dir_block = ext4_bread (handle, inode, 0, 1, &err);
+ dir_block = ext4_bread(handle, inode, 0, 1, &err);
if (!dir_block)
goto out_clear_inode;
BUFFER_TRACE(dir_block, "get_write_access");
@@ -1820,26 +1822,26 @@ retry:
de->inode = cpu_to_le32(inode->i_ino);
de->name_len = 1;
de->rec_len = ext4_rec_len_to_disk(EXT4_DIR_REC_LEN(de->name_len));
- strcpy (de->name, ".");
+ strcpy(de->name, ".");
ext4_set_de_type(dir->i_sb, de, S_IFDIR);
de = ext4_next_entry(de);
de->inode = cpu_to_le32(dir->i_ino);
de->rec_len = ext4_rec_len_to_disk(inode->i_sb->s_blocksize -
EXT4_DIR_REC_LEN(1));
de->name_len = 2;
- strcpy (de->name, "..");
+ strcpy(de->name, "..");
ext4_set_de_type(dir->i_sb, de, S_IFDIR);
inode->i_nlink = 2;
BUFFER_TRACE(dir_block, "call ext4_journal_dirty_metadata");
ext4_journal_dirty_metadata(handle, dir_block);
- brelse (dir_block);
+ brelse(dir_block);
ext4_mark_inode_dirty(handle, inode);
- err = ext4_add_entry (handle, dentry, inode);
+ err = ext4_add_entry(handle, dentry, inode);
if (err) {
out_clear_inode:
clear_nlink(inode);
ext4_mark_inode_dirty(handle, inode);
- iput (inode);
+ iput(inode);
goto out_stop;
}
ext4_inc_count(handle, dir);
@@ -1856,17 +1858,17 @@ out_stop:
/*
* routine to check that the specified directory is empty (for rmdir)
*/
-static int empty_dir (struct inode * inode)
+static int empty_dir(struct inode *inode)
{
unsigned long offset;
- struct buffer_head * bh;
- struct ext4_dir_entry_2 * de, * de1;
- struct super_block * sb;
+ struct buffer_head *bh;
+ struct ext4_dir_entry_2 *de, *de1;
+ struct super_block *sb;
int err = 0;
sb = inode->i_sb;
if (inode->i_size < EXT4_DIR_REC_LEN(1) + EXT4_DIR_REC_LEN(2) ||
- !(bh = ext4_bread (NULL, inode, 0, 0, &err))) {
+ !(bh = ext4_bread(NULL, inode, 0, 0, &err))) {
if (err)
ext4_error(inode->i_sb, __func__,
"error %d reading directory #%lu offset 0",
@@ -1881,23 +1883,23 @@ static int empty_dir (struct inode * inode)
de1 = ext4_next_entry(de);
if (le32_to_cpu(de->inode) != inode->i_ino ||
!le32_to_cpu(de1->inode) ||
- strcmp (".", de->name) ||
- strcmp ("..", de1->name)) {
- ext4_warning (inode->i_sb, "empty_dir",
- "bad directory (dir #%lu) - no `.' or `..'",
- inode->i_ino);
- brelse (bh);
+ strcmp(".", de->name) ||
+ strcmp("..", de1->name)) {
+ ext4_warning(inode->i_sb, "empty_dir",
+ "bad directory (dir #%lu) - no `.' or `..'",
+ inode->i_ino);
+ brelse(bh);
return 1;
}
offset = ext4_rec_len_from_disk(de->rec_len) +
ext4_rec_len_from_disk(de1->rec_len);
de = ext4_next_entry(de1);
- while (offset < inode->i_size ) {
+ while (offset < inode->i_size) {
if (!bh ||
(void *) de >= (void *) (bh->b_data+sb->s_blocksize)) {
err = 0;
- brelse (bh);
- bh = ext4_bread (NULL, inode,
+ brelse(bh);
+ bh = ext4_bread(NULL, inode,
offset >> EXT4_BLOCK_SIZE_BITS(sb), 0, &err);
if (!bh) {
if (err)
@@ -1917,13 +1919,13 @@ static int empty_dir (struct inode * inode)
continue;
}
if (le32_to_cpu(de->inode)) {
- brelse (bh);
+ brelse(bh);
return 0;
}
offset += ext4_rec_len_from_disk(de->rec_len);
de = ext4_next_entry(de);
}
- brelse (bh);
+ brelse(bh);
return 1;
}
@@ -1954,8 +1956,8 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode)
* ->i_nlink. For, say it, character device. Not a regular file,
* not a directory, not a symlink and ->i_nlink > 0.
*/
- J_ASSERT ((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
- S_ISLNK(inode->i_mode)) || inode->i_nlink == 0);
+ J_ASSERT((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
+ S_ISLNK(inode->i_mode)) || inode->i_nlink == 0);
BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "get_write_access");
err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh);
@@ -2069,12 +2071,12 @@ out_brelse:
goto out_err;
}
-static int ext4_rmdir (struct inode * dir, struct dentry *dentry)
+static int ext4_rmdir(struct inode *dir, struct dentry *dentry)
{
int retval;
- struct inode * inode;
- struct buffer_head * bh;
- struct ext4_dir_entry_2 * de;
+ struct inode *inode;
+ struct buffer_head *bh;
+ struct ext4_dir_entry_2 *de;
handle_t *handle;
/* Initialize quotas before so that eventual writes go in
@@ -2085,7 +2087,7 @@ static int ext4_rmdir (struct inode * dir, struct dentry *dentry)
return PTR_ERR(handle);
retval = -ENOENT;
- bh = ext4_find_entry (dentry, &de);
+ bh = ext4_find_entry(dir, &dentry->d_name, &de);
if (!bh)
goto end_rmdir;
@@ -2099,16 +2101,16 @@ static int ext4_rmdir (struct inode * dir, struct dentry *dentry)
goto end_rmdir;
retval = -ENOTEMPTY;
- if (!empty_dir (inode))
+ if (!empty_dir(inode))
goto end_rmdir;
retval = ext4_delete_entry(handle, dir, de, bh);
if (retval)
goto end_rmdir;
if (!EXT4_DIR_LINK_EMPTY(inode))
- ext4_warning (inode->i_sb, "ext4_rmdir",
- "empty directory has too many links (%d)",
- inode->i_nlink);
+ ext4_warning(inode->i_sb, "ext4_rmdir",
+ "empty directory has too many links (%d)",
+ inode->i_nlink);
inode->i_version++;
clear_nlink(inode);
/* There's no need to set i_disksize: the fact that i_nlink is
@@ -2124,16 +2126,16 @@ static int ext4_rmdir (struct inode * dir, struct dentry *dentry)
end_rmdir:
ext4_journal_stop(handle);
- brelse (bh);
+ brelse(bh);
return retval;
}
-static int ext4_unlink(struct inode * dir, struct dentry *dentry)
+static int ext4_unlink(struct inode *dir, struct dentry *dentry)
{
int retval;
- struct inode * inode;
- struct buffer_head * bh;
- struct ext4_dir_entry_2 * de;
+ struct inode *inode;
+ struct buffer_head *bh;
+ struct ext4_dir_entry_2 *de;
handle_t *handle;
/* Initialize quotas before so that eventual writes go
@@ -2147,7 +2149,7 @@ static int ext4_unlink(struct inode * dir, struct dentry *dentry)
handle->h_sync = 1;
retval = -ENOENT;
- bh = ext4_find_entry (dentry, &de);
+ bh = ext4_find_entry(dir, &dentry->d_name, &de);
if (!bh)
goto end_unlink;
@@ -2158,9 +2160,9 @@ static int ext4_unlink(struct inode * dir, struct dentry *dentry)
goto end_unlink;
if (!inode->i_nlink) {
- ext4_warning (inode->i_sb, "ext4_unlink",
- "Deleting nonexistent file (%lu), %d",
- inode->i_ino, inode->i_nlink);
+ ext4_warning(inode->i_sb, "ext4_unlink",
+ "Deleting nonexistent file (%lu), %d",
+ inode->i_ino, inode->i_nlink);
inode->i_nlink = 1;
}
retval = ext4_delete_entry(handle, dir, de, bh);
@@ -2178,15 +2180,15 @@ static int ext4_unlink(struct inode * dir, struct dentry *dentry)
end_unlink:
ext4_journal_stop(handle);
- brelse (bh);
+ brelse(bh);
return retval;
}
-static int ext4_symlink (struct inode * dir,
- struct dentry *dentry, const char * symname)
+static int ext4_symlink(struct inode *dir,
+ struct dentry *dentry, const char *symname)
{
handle_t *handle;
- struct inode * inode;
+ struct inode *inode;
int l, err, retries = 0;
l = strlen(symname)+1;
@@ -2203,12 +2205,12 @@ retry:
if (IS_DIRSYNC(dir))
handle->h_sync = 1;
- inode = ext4_new_inode (handle, dir, S_IFLNK|S_IRWXUGO);
+ inode = ext4_new_inode(handle, dir, S_IFLNK|S_IRWXUGO);
err = PTR_ERR(inode);
if (IS_ERR(inode))
goto out_stop;
- if (l > sizeof (EXT4_I(inode)->i_data)) {
+ if (l > sizeof(EXT4_I(inode)->i_data)) {
inode->i_op = &ext4_symlink_inode_operations;
ext4_set_aops(inode);
/*
@@ -2221,14 +2223,14 @@ retry:
if (err) {
clear_nlink(inode);
ext4_mark_inode_dirty(handle, inode);
- iput (inode);
+ iput(inode);
goto out_stop;
}
} else {
/* clear the extent format for fast symlink */
EXT4_I(inode)->i_flags &= ~EXT4_EXTENTS_FL;
inode->i_op = &ext4_fast_symlink_inode_operations;
- memcpy((char*)&EXT4_I(inode)->i_data,symname,l);
+ memcpy((char *)&EXT4_I(inode)->i_data, symname, l);
inode->i_size = l-1;
}
EXT4_I(inode)->i_disksize = inode->i_size;
@@ -2240,8 +2242,8 @@ out_stop:
return err;
}
-static int ext4_link (struct dentry * old_dentry,
- struct inode * dir, struct dentry *dentry)
+static int ext4_link(struct dentry *old_dentry,
+ struct inode *dir, struct dentry *dentry)
{
handle_t *handle;
struct inode *inode = old_dentry->d_inode;
@@ -2284,13 +2286,13 @@ retry:
* Anybody can rename anything with this: the permission checks are left to the
* higher-level routines.
*/
-static int ext4_rename (struct inode * old_dir, struct dentry *old_dentry,
- struct inode * new_dir,struct dentry *new_dentry)
+static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
+ struct inode *new_dir, struct dentry *new_dentry)
{
handle_t *handle;
- struct inode * old_inode, * new_inode;
- struct buffer_head * old_bh, * new_bh, * dir_bh;
- struct ext4_dir_entry_2 * old_de, * new_de;
+ struct inode *old_inode, *new_inode;
+ struct buffer_head *old_bh, *new_bh, *dir_bh;
+ struct ext4_dir_entry_2 *old_de, *new_de;
int retval;
old_bh = new_bh = dir_bh = NULL;
@@ -2308,7 +2310,7 @@ static int ext4_rename (struct inode * old_dir, struct dentry *old_dentry,
if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir))
handle->h_sync = 1;
- old_bh = ext4_find_entry (old_dentry, &old_de);
+ old_bh = ext4_find_entry(old_dir, &old_dentry->d_name, &old_de);
/*
* Check for inode number is _not_ due to possible IO errors.
* We might rmdir the source, keep it as pwd of some process
@@ -2321,32 +2323,32 @@ static int ext4_rename (struct inode * old_dir, struct dentry *old_dentry,
goto end_rename;
new_inode = new_dentry->d_inode;
- new_bh = ext4_find_entry (new_dentry, &new_de);
+ new_bh = ext4_find_entry(new_dir, &new_dentry->d_name, &new_de);
if (new_bh) {
if (!new_inode) {
- brelse (new_bh);
+ brelse(new_bh);
new_bh = NULL;
}
}
if (S_ISDIR(old_inode->i_mode)) {
if (new_inode) {
retval = -ENOTEMPTY;
- if (!empty_dir (new_inode))
+ if (!empty_dir(new_inode))
goto end_rename;
}
retval = -EIO;
- dir_bh = ext4_bread (handle, old_inode, 0, 0, &retval);
+ dir_bh = ext4_bread(handle, old_inode, 0, 0, &retval);
if (!dir_bh)
goto end_rename;
if (le32_to_cpu(PARENT_INO(dir_bh->b_data)) != old_dir->i_ino)
goto end_rename;
retval = -EMLINK;
- if (!new_inode && new_dir!=old_dir &&
+ if (!new_inode && new_dir != old_dir &&
new_dir->i_nlink >= EXT4_LINK_MAX)
goto end_rename;
}
if (!new_bh) {
- retval = ext4_add_entry (handle, new_dentry, old_inode);
+ retval = ext4_add_entry(handle, new_dentry, old_inode);
if (retval)
goto end_rename;
} else {
@@ -2388,7 +2390,7 @@ static int ext4_rename (struct inode * old_dir, struct dentry *old_dentry,
struct buffer_head *old_bh2;
struct ext4_dir_entry_2 *old_de2;
- old_bh2 = ext4_find_entry(old_dentry, &old_de2);
+ old_bh2 = ext4_find_entry(old_dir, &old_dentry->d_name, &old_de2);
if (old_bh2) {
retval = ext4_delete_entry(handle, old_dir,
old_de2, old_bh2);
@@ -2433,9 +2435,9 @@ static int ext4_rename (struct inode * old_dir, struct dentry *old_dentry,
retval = 0;
end_rename:
- brelse (dir_bh);
- brelse (old_bh);
- brelse (new_bh);
+ brelse(dir_bh);
+ brelse(old_bh);
+ brelse(new_bh);
ext4_journal_stop(handle);
return retval;
}
@@ -2454,7 +2456,7 @@ const struct inode_operations ext4_dir_inode_operations = {
.mknod = ext4_mknod,
.rename = ext4_rename,
.setattr = ext4_setattr,
-#ifdef CONFIG_EXT4DEV_FS_XATTR
+#ifdef CONFIG_EXT4_FS_XATTR
.setxattr = generic_setxattr,
.getxattr = generic_getxattr,
.listxattr = ext4_listxattr,
@@ -2465,7 +2467,7 @@ const struct inode_operations ext4_dir_inode_operations = {
const struct inode_operations ext4_special_inode_operations = {
.setattr = ext4_setattr,
-#ifdef CONFIG_EXT4DEV_FS_XATTR
+#ifdef CONFIG_EXT4_FS_XATTR
.setxattr = generic_setxattr,
.getxattr = generic_getxattr,
.listxattr = ext4_listxattr,
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index b3d35604ea1..b6ec1843a01 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -416,8 +416,8 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
"EXT4-fs: ext4_add_new_gdb: adding group block %lu\n",
gdb_num);
- /*
- * If we are not using the primary superblock/GDT copy don't resize,
+ /*
+ * If we are not using the primary superblock/GDT copy don't resize,
* because the user tools have no way of handling this. Probably a
* bad time to do it anyways.
*/
@@ -870,11 +870,10 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
* We can allocate memory for mb_alloc based on the new group
* descriptor
*/
- if (test_opt(sb, MBALLOC)) {
- err = ext4_mb_add_more_groupinfo(sb, input->group, gdp);
- if (err)
- goto exit_journal;
- }
+ err = ext4_mb_add_more_groupinfo(sb, input->group, gdp);
+ if (err)
+ goto exit_journal;
+
/*
* Make the new blocks and inodes valid next. We do this before
* increasing the group count so that once the group is enabled,
@@ -929,6 +928,15 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
percpu_counter_add(&sbi->s_freeinodes_counter,
EXT4_INODES_PER_GROUP(sb));
+ if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) {
+ ext4_group_t flex_group;
+ flex_group = ext4_flex_group(sbi, input->group);
+ sbi->s_flex_groups[flex_group].free_blocks +=
+ input->free_blocks_count;
+ sbi->s_flex_groups[flex_group].free_inodes +=
+ EXT4_INODES_PER_GROUP(sb);
+ }
+
ext4_journal_dirty_metadata(handle, sbi->s_sbh);
sb->s_dirt = 1;
@@ -964,7 +972,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
ext4_group_t o_groups_count;
ext4_grpblk_t last;
ext4_grpblk_t add;
- struct buffer_head * bh;
+ struct buffer_head *bh;
handle_t *handle;
int err;
unsigned long freed_blocks;
@@ -1077,8 +1085,15 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
/*
* Mark mballoc pages as not up to date so that they will be updated
* next time they are loaded by ext4_mb_load_buddy.
+ *
+ * XXX Bad, Bad, BAD!!! We should not be overloading the
+ * Uptodate flag, particularly on thte bitmap bh, as way of
+ * hinting to ext4_mb_load_buddy() that it needs to be
+ * overloaded. A user could take a LVM snapshot, then do an
+ * on-line fsck, and clear the uptodate flag, and this would
+ * not be a bug in userspace, but a bug in the kernel. FIXME!!!
*/
- if (test_opt(sb, MBALLOC)) {
+ {
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct inode *inode = sbi->s_buddy_cache;
int blocks_per_page;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 566344b926b..9b2b2bc4ec1 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -34,6 +34,8 @@
#include <linux/namei.h>
#include <linux/quotaops.h>
#include <linux/seq_file.h>
+#include <linux/proc_fs.h>
+#include <linux/marker.h>
#include <linux/log2.h>
#include <linux/crc16.h>
#include <asm/uaccess.h>
@@ -45,6 +47,8 @@
#include "namei.h"
#include "group.h"
+struct proc_dir_entry *ext4_proc_root;
+
static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
unsigned long journal_devnum);
static int ext4_create_journal(struct super_block *, struct ext4_super_block *,
@@ -370,66 +374,6 @@ void ext4_update_dynamic_rev(struct super_block *sb)
*/
}
-int ext4_update_compat_feature(handle_t *handle,
- struct super_block *sb, __u32 compat)
-{
- int err = 0;
- if (!EXT4_HAS_COMPAT_FEATURE(sb, compat)) {
- err = ext4_journal_get_write_access(handle,
- EXT4_SB(sb)->s_sbh);
- if (err)
- return err;
- EXT4_SET_COMPAT_FEATURE(sb, compat);
- sb->s_dirt = 1;
- handle->h_sync = 1;
- BUFFER_TRACE(EXT4_SB(sb)->s_sbh,
- "call ext4_journal_dirty_met adata");
- err = ext4_journal_dirty_metadata(handle,
- EXT4_SB(sb)->s_sbh);
- }
- return err;
-}
-
-int ext4_update_rocompat_feature(handle_t *handle,
- struct super_block *sb, __u32 rocompat)
-{
- int err = 0;
- if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, rocompat)) {
- err = ext4_journal_get_write_access(handle,
- EXT4_SB(sb)->s_sbh);
- if (err)
- return err;
- EXT4_SET_RO_COMPAT_FEATURE(sb, rocompat);
- sb->s_dirt = 1;
- handle->h_sync = 1;
- BUFFER_TRACE(EXT4_SB(sb)->s_sbh,
- "call ext4_journal_dirty_met adata");
- err = ext4_journal_dirty_metadata(handle,
- EXT4_SB(sb)->s_sbh);
- }
- return err;
-}
-
-int ext4_update_incompat_feature(handle_t *handle,
- struct super_block *sb, __u32 incompat)
-{
- int err = 0;
- if (!EXT4_HAS_INCOMPAT_FEATURE(sb, incompat)) {
- err = ext4_journal_get_write_access(handle,
- EXT4_SB(sb)->s_sbh);
- if (err)
- return err;
- EXT4_SET_INCOMPAT_FEATURE(sb, incompat);
- sb->s_dirt = 1;
- handle->h_sync = 1;
- BUFFER_TRACE(EXT4_SB(sb)->s_sbh,
- "call ext4_journal_dirty_met adata");
- err = ext4_journal_dirty_metadata(handle,
- EXT4_SB(sb)->s_sbh);
- }
- return err;
-}
-
/*
* Open the external journal device
*/
@@ -503,15 +447,18 @@ static void ext4_put_super(struct super_block *sb)
ext4_mb_release(sb);
ext4_ext_release(sb);
ext4_xattr_put_super(sb);
- jbd2_journal_destroy(sbi->s_journal);
+ if (jbd2_journal_destroy(sbi->s_journal) < 0)
+ ext4_abort(sb, __func__, "Couldn't clean up the journal");
sbi->s_journal = NULL;
if (!(sb->s_flags & MS_RDONLY)) {
EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
es->s_state = cpu_to_le16(sbi->s_mount_state);
- BUFFER_TRACE(sbi->s_sbh, "marking dirty");
- mark_buffer_dirty(sbi->s_sbh);
ext4_commit_super(sb, es, 1);
}
+ if (sbi->s_proc) {
+ remove_proc_entry("inode_readahead_blks", sbi->s_proc);
+ remove_proc_entry(sb->s_id, ext4_proc_root);
+ }
for (i = 0; i < sbi->s_gdb_count; i++)
brelse(sbi->s_group_desc[i]);
@@ -520,6 +467,7 @@ static void ext4_put_super(struct super_block *sb)
percpu_counter_destroy(&sbi->s_freeblocks_counter);
percpu_counter_destroy(&sbi->s_freeinodes_counter);
percpu_counter_destroy(&sbi->s_dirs_counter);
+ percpu_counter_destroy(&sbi->s_dirtyblocks_counter);
brelse(sbi->s_sbh);
#ifdef CONFIG_QUOTA
for (i = 0; i < MAXQUOTAS; i++)
@@ -562,11 +510,10 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
ei = kmem_cache_alloc(ext4_inode_cachep, GFP_NOFS);
if (!ei)
return NULL;
-#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL
+#ifdef CONFIG_EXT4_FS_POSIX_ACL
ei->i_acl = EXT4_ACL_NOT_CACHED;
ei->i_default_acl = EXT4_ACL_NOT_CACHED;
#endif
- ei->i_block_alloc_info = NULL;
ei->vfs_inode.i_version = 1;
ei->vfs_inode.i_data.writeback_index = 0;
memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache));
@@ -599,7 +546,7 @@ static void init_once(void *foo)
struct ext4_inode_info *ei = (struct ext4_inode_info *) foo;
INIT_LIST_HEAD(&ei->i_orphan);
-#ifdef CONFIG_EXT4DEV_FS_XATTR
+#ifdef CONFIG_EXT4_FS_XATTR
init_rwsem(&ei->xattr_sem);
#endif
init_rwsem(&ei->i_data_sem);
@@ -625,8 +572,7 @@ static void destroy_inodecache(void)
static void ext4_clear_inode(struct inode *inode)
{
- struct ext4_block_alloc_info *rsv = EXT4_I(inode)->i_block_alloc_info;
-#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL
+#ifdef CONFIG_EXT4_FS_POSIX_ACL
if (EXT4_I(inode)->i_acl &&
EXT4_I(inode)->i_acl != EXT4_ACL_NOT_CACHED) {
posix_acl_release(EXT4_I(inode)->i_acl);
@@ -638,10 +584,7 @@ static void ext4_clear_inode(struct inode *inode)
EXT4_I(inode)->i_default_acl = EXT4_ACL_NOT_CACHED;
}
#endif
- ext4_discard_reservation(inode);
- EXT4_I(inode)->i_block_alloc_info = NULL;
- if (unlikely(rsv))
- kfree(rsv);
+ ext4_discard_preallocations(inode);
jbd2_journal_release_jbd_inode(EXT4_SB(inode->i_sb)->s_journal,
&EXT4_I(inode)->jinode);
}
@@ -654,7 +597,7 @@ static inline void ext4_show_quota_options(struct seq_file *seq,
if (sbi->s_jquota_fmt)
seq_printf(seq, ",jqfmt=%s",
- (sbi->s_jquota_fmt == QFMT_VFS_OLD) ? "vfsold": "vfsv0");
+ (sbi->s_jquota_fmt == QFMT_VFS_OLD) ? "vfsold" : "vfsv0");
if (sbi->s_qf_names[USRQUOTA])
seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]);
@@ -718,7 +661,7 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
seq_puts(seq, ",debug");
if (test_opt(sb, OLDALLOC))
seq_puts(seq, ",oldalloc");
-#ifdef CONFIG_EXT4DEV_FS_XATTR
+#ifdef CONFIG_EXT4_FS_XATTR
if (test_opt(sb, XATTR_USER) &&
!(def_mount_opts & EXT4_DEFM_XATTR_USER))
seq_puts(seq, ",user_xattr");
@@ -727,7 +670,7 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
seq_puts(seq, ",nouser_xattr");
}
#endif
-#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL
+#ifdef CONFIG_EXT4_FS_POSIX_ACL
if (test_opt(sb, POSIX_ACL) && !(def_mount_opts & EXT4_DEFM_ACL))
seq_puts(seq, ",acl");
if (!test_opt(sb, POSIX_ACL) && (def_mount_opts & EXT4_DEFM_ACL))
@@ -752,8 +695,6 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
seq_puts(seq, ",nobh");
if (!test_opt(sb, EXTENTS))
seq_puts(seq, ",noextents");
- if (!test_opt(sb, MBALLOC))
- seq_puts(seq, ",nomballoc");
if (test_opt(sb, I_VERSION))
seq_puts(seq, ",i_version");
if (!test_opt(sb, DELALLOC))
@@ -773,6 +714,13 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)
seq_puts(seq, ",data=writeback");
+ if (sbi->s_inode_readahead_blks != EXT4_DEF_INODE_READAHEAD_BLKS)
+ seq_printf(seq, ",inode_readahead_blks=%u",
+ sbi->s_inode_readahead_blks);
+
+ if (test_opt(sb, DATA_ERR_ABORT))
+ seq_puts(seq, ",data_err=abort");
+
ext4_show_quota_options(seq, sb);
return 0;
}
@@ -822,7 +770,7 @@ static struct dentry *ext4_fh_to_parent(struct super_block *sb, struct fid *fid,
}
#ifdef CONFIG_QUOTA
-#define QTYPE2NAME(t) ((t) == USRQUOTA?"user":"group")
+#define QTYPE2NAME(t) ((t) == USRQUOTA ? "user" : "group")
#define QTYPE2MOPT(on, t) ((t) == USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA))
static int ext4_dquot_initialize(struct inode *inode, int type);
@@ -896,20 +844,22 @@ static const struct export_operations ext4_export_ops = {
enum {
Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid,
Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro,
- Opt_nouid32, Opt_nocheck, Opt_debug, Opt_oldalloc, Opt_orlov,
+ Opt_nouid32, Opt_debug, Opt_oldalloc, Opt_orlov,
Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl,
Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh, Opt_bh,
Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev,
Opt_journal_checksum, Opt_journal_async_commit,
Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
+ Opt_data_err_abort, Opt_data_err_ignore,
Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota,
Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota,
Opt_grpquota, Opt_extents, Opt_noextents, Opt_i_version,
- Opt_mballoc, Opt_nomballoc, Opt_stripe, Opt_delalloc, Opt_nodelalloc,
+ Opt_stripe, Opt_delalloc, Opt_nodelalloc,
+ Opt_inode_readahead_blks
};
-static match_table_t tokens = {
+static const match_table_t tokens = {
{Opt_bsd_df, "bsddf"},
{Opt_minix_df, "minixdf"},
{Opt_grpid, "grpid"},
@@ -923,8 +873,6 @@ static match_table_t tokens = {
{Opt_err_panic, "errors=panic"},
{Opt_err_ro, "errors=remount-ro"},
{Opt_nouid32, "nouid32"},
- {Opt_nocheck, "nocheck"},
- {Opt_nocheck, "check=none"},
{Opt_debug, "debug"},
{Opt_oldalloc, "oldalloc"},
{Opt_orlov, "orlov"},
@@ -947,6 +895,8 @@ static match_table_t tokens = {
{Opt_data_journal, "data=journal"},
{Opt_data_ordered, "data=ordered"},
{Opt_data_writeback, "data=writeback"},
+ {Opt_data_err_abort, "data_err=abort"},
+ {Opt_data_err_ignore, "data_err=ignore"},
{Opt_offusrjquota, "usrjquota="},
{Opt_usrjquota, "usrjquota=%s"},
{Opt_offgrpjquota, "grpjquota="},
@@ -961,12 +911,11 @@ static match_table_t tokens = {
{Opt_extents, "extents"},
{Opt_noextents, "noextents"},
{Opt_i_version, "i_version"},
- {Opt_mballoc, "mballoc"},
- {Opt_nomballoc, "nomballoc"},
{Opt_stripe, "stripe=%u"},
{Opt_resize, "resize"},
{Opt_delalloc, "delalloc"},
{Opt_nodelalloc, "nodelalloc"},
+ {Opt_inode_readahead_blks, "inode_readahead_blks=%u"},
{Opt_err, NULL},
};
@@ -981,7 +930,7 @@ static ext4_fsblk_t get_sb_block(void **data)
/*todo: use simple_strtoll with >32bit ext4 */
sb_block = simple_strtoul(options, &options, 0);
if (*options && *options != ',') {
- printk("EXT4-fs: Invalid sb specification: %s\n",
+ printk(KERN_ERR "EXT4-fs: Invalid sb specification: %s\n",
(char *) *data);
return 1;
}
@@ -1060,9 +1009,6 @@ static int parse_options(char *options, struct super_block *sb,
case Opt_nouid32:
set_opt(sbi->s_mount_opt, NO_UID32);
break;
- case Opt_nocheck:
- clear_opt(sbi->s_mount_opt, CHECK);
- break;
case Opt_debug:
set_opt(sbi->s_mount_opt, DEBUG);
break;
@@ -1072,7 +1018,7 @@ static int parse_options(char *options, struct super_block *sb,
case Opt_orlov:
clear_opt(sbi->s_mount_opt, OLDALLOC);
break;
-#ifdef CONFIG_EXT4DEV_FS_XATTR
+#ifdef CONFIG_EXT4_FS_XATTR
case Opt_user_xattr:
set_opt(sbi->s_mount_opt, XATTR_USER);
break;
@@ -1082,10 +1028,11 @@ static int parse_options(char *options, struct super_block *sb,
#else
case Opt_user_xattr:
case Opt_nouser_xattr:
- printk("EXT4 (no)user_xattr options not supported\n");
+ printk(KERN_ERR "EXT4 (no)user_xattr options "
+ "not supported\n");
break;
#endif
-#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL
+#ifdef CONFIG_EXT4_FS_POSIX_ACL
case Opt_acl:
set_opt(sbi->s_mount_opt, POSIX_ACL);
break;
@@ -1095,7 +1042,8 @@ static int parse_options(char *options, struct super_block *sb,
#else
case Opt_acl:
case Opt_noacl:
- printk("EXT4 (no)acl options not supported\n");
+ printk(KERN_ERR "EXT4 (no)acl options "
+ "not supported\n");
break;
#endif
case Opt_reservation:
@@ -1178,6 +1126,12 @@ static int parse_options(char *options, struct super_block *sb,
sbi->s_mount_opt |= data_opt;
}
break;
+ case Opt_data_err_abort:
+ set_opt(sbi->s_mount_opt, DATA_ERR_ABORT);
+ break;
+ case Opt_data_err_ignore:
+ clear_opt(sbi->s_mount_opt, DATA_ERR_ABORT);
+ break;
#ifdef CONFIG_QUOTA
case Opt_usrjquota:
qtype = USRQUOTA;
@@ -1189,8 +1143,8 @@ set_qf_name:
sb_any_quota_suspended(sb)) &&
!sbi->s_qf_names[qtype]) {
printk(KERN_ERR
- "EXT4-fs: Cannot change journaled "
- "quota options when quota turned on.\n");
+ "EXT4-fs: Cannot change journaled "
+ "quota options when quota turned on.\n");
return 0;
}
qname = match_strdup(&args[0]);
@@ -1357,12 +1311,6 @@ set_qf_format:
case Opt_nodelalloc:
clear_opt(sbi->s_mount_opt, DELALLOC);
break;
- case Opt_mballoc:
- set_opt(sbi->s_mount_opt, MBALLOC);
- break;
- case Opt_nomballoc:
- clear_opt(sbi->s_mount_opt, MBALLOC);
- break;
case Opt_stripe:
if (match_int(&args[0], &option))
return 0;
@@ -1373,6 +1321,13 @@ set_qf_format:
case Opt_delalloc:
set_opt(sbi->s_mount_opt, DELALLOC);
break;
+ case Opt_inode_readahead_blks:
+ if (match_int(&args[0], &option))
+ return 0;
+ if (option < 0 || option > (1 << 30))
+ return 0;
+ sbi->s_inode_readahead_blks = option;
+ break;
default:
printk(KERN_ERR
"EXT4-fs: Unrecognized mount option \"%s\" "
@@ -1473,15 +1428,9 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
EXT4_INODES_PER_GROUP(sb),
sbi->s_mount_opt);
- printk(KERN_INFO "EXT4 FS on %s, ", sb->s_id);
- if (EXT4_SB(sb)->s_journal->j_inode == NULL) {
- char b[BDEVNAME_SIZE];
-
- printk("external journal on %s\n",
- bdevname(EXT4_SB(sb)->s_journal->j_dev, b));
- } else {
- printk("internal journal\n");
- }
+ printk(KERN_INFO "EXT4 FS on %s, %s journal on %s\n",
+ sb->s_id, EXT4_SB(sb)->s_journal->j_inode ? "internal" :
+ "external", EXT4_SB(sb)->s_journal->j_devname);
return res;
}
@@ -1504,8 +1453,11 @@ static int ext4_fill_flex_info(struct super_block *sb)
sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex;
groups_per_flex = 1 << sbi->s_log_groups_per_flex;
- flex_group_count = (sbi->s_groups_count + groups_per_flex - 1) /
- groups_per_flex;
+ /* We allocate both existing and potentially added groups */
+ flex_group_count = ((sbi->s_groups_count + groups_per_flex - 1) +
+ ((sbi->s_es->s_reserved_gdt_blocks +1 ) <<
+ EXT4_DESC_PER_BLOCK_BITS(sb))) /
+ groups_per_flex;
sbi->s_flex_groups = kzalloc(flex_group_count *
sizeof(struct flex_groups), GFP_KERNEL);
if (sbi->s_flex_groups == NULL) {
@@ -1584,7 +1536,7 @@ static int ext4_check_descriptors(struct super_block *sb)
if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG))
flexbg_flag = 1;
- ext4_debug ("Checking group descriptors");
+ ext4_debug("Checking group descriptors");
for (i = 0; i < sbi->s_groups_count; i++) {
struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL);
@@ -1599,14 +1551,14 @@ static int ext4_check_descriptors(struct super_block *sb)
if (block_bitmap < first_block || block_bitmap > last_block) {
printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
"Block bitmap for group %lu not in group "
- "(block %llu)!", i, block_bitmap);
+ "(block %llu)!\n", i, block_bitmap);
return 0;
}
inode_bitmap = ext4_inode_bitmap(sb, gdp);
if (inode_bitmap < first_block || inode_bitmap > last_block) {
printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
"Inode bitmap for group %lu not in group "
- "(block %llu)!", i, inode_bitmap);
+ "(block %llu)!\n", i, inode_bitmap);
return 0;
}
inode_table = ext4_inode_table(sb, gdp);
@@ -1614,7 +1566,7 @@ static int ext4_check_descriptors(struct super_block *sb)
inode_table + sbi->s_itb_per_group - 1 > last_block) {
printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
"Inode table for group %lu not in group "
- "(block %llu)!", i, inode_table);
+ "(block %llu)!\n", i, inode_table);
return 0;
}
spin_lock(sb_bgl_lock(sbi, i));
@@ -1623,8 +1575,10 @@ static int ext4_check_descriptors(struct super_block *sb)
"Checksum for group %lu failed (%u!=%u)\n",
i, le16_to_cpu(ext4_group_desc_csum(sbi, i,
gdp)), le16_to_cpu(gdp->bg_checksum));
- if (!(sb->s_flags & MS_RDONLY))
+ if (!(sb->s_flags & MS_RDONLY)) {
+ spin_unlock(sb_bgl_lock(sbi, i));
return 0;
+ }
}
spin_unlock(sb_bgl_lock(sbi, i));
if (!flexbg_flag)
@@ -1714,9 +1668,9 @@ static void ext4_orphan_cleanup(struct super_block *sb,
DQUOT_INIT(inode);
if (inode->i_nlink) {
printk(KERN_DEBUG
- "%s: truncating inode %lu to %Ld bytes\n",
+ "%s: truncating inode %lu to %lld bytes\n",
__func__, inode->i_ino, inode->i_size);
- jbd_debug(2, "truncating inode %lu to %Ld bytes\n",
+ jbd_debug(2, "truncating inode %lu to %lld bytes\n",
inode->i_ino, inode->i_size);
ext4_truncate(inode);
nr_truncates++;
@@ -1757,13 +1711,13 @@ static void ext4_orphan_cleanup(struct super_block *sb,
*
* Note, this does *not* consider any metadata overhead for vfs i_blocks.
*/
-static loff_t ext4_max_size(int blkbits)
+static loff_t ext4_max_size(int blkbits, int has_huge_files)
{
loff_t res;
loff_t upper_limit = MAX_LFS_FILESIZE;
/* small i_blocks in vfs inode? */
- if (sizeof(blkcnt_t) < sizeof(u64)) {
+ if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) {
/*
* CONFIG_LSF is not enabled implies the inode
* i_block represent total blocks in 512 bytes
@@ -1793,7 +1747,7 @@ static loff_t ext4_max_size(int blkbits)
* block limit, and also a limit of (2^48 - 1) 512-byte sectors in i_blocks.
* We need to be 1 filesystem block less than the 2^48 sector limit.
*/
-static loff_t ext4_max_bitmap_size(int bits)
+static loff_t ext4_max_bitmap_size(int bits, int has_huge_files)
{
loff_t res = EXT4_NDIR_BLOCKS;
int meta_blocks;
@@ -1806,11 +1760,11 @@ static loff_t ext4_max_bitmap_size(int bits)
* total number of 512 bytes blocks of the file
*/
- if (sizeof(blkcnt_t) < sizeof(u64)) {
+ if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) {
/*
- * CONFIG_LSF is not enabled implies the inode
- * i_block represent total blocks in 512 bytes
- * 32 == size of vfs inode i_blocks * 8
+ * !has_huge_files or CONFIG_LSF is not enabled
+ * implies the inode i_block represent total blocks in
+ * 512 bytes 32 == size of vfs inode i_blocks * 8
*/
upper_limit = (1LL << 32) - 1;
@@ -1914,11 +1868,12 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
unsigned long journal_devnum = 0;
unsigned long def_mount_opts;
struct inode *root;
+ char *cp;
int ret = -EINVAL;
int blocksize;
int db_count;
int i;
- int needs_recovery;
+ int needs_recovery, has_huge_files;
__le32 features;
__u64 blocks_count;
int err;
@@ -1930,10 +1885,15 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
sbi->s_mount_opt = 0;
sbi->s_resuid = EXT4_DEF_RESUID;
sbi->s_resgid = EXT4_DEF_RESGID;
+ sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS;
sbi->s_sb_block = sb_block;
unlock_kernel();
+ /* Cleanup superblock name */
+ for (cp = sb->s_id; (cp = strchr(cp, '/'));)
+ *cp = '!';
+
blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE);
if (!blocksize) {
printk(KERN_ERR "EXT4-fs: unable to set blocksize\n");
@@ -1973,11 +1933,11 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
set_opt(sbi->s_mount_opt, GRPID);
if (def_mount_opts & EXT4_DEFM_UID16)
set_opt(sbi->s_mount_opt, NO_UID32);
-#ifdef CONFIG_EXT4DEV_FS_XATTR
+#ifdef CONFIG_EXT4_FS_XATTR
if (def_mount_opts & EXT4_DEFM_XATTR_USER)
set_opt(sbi->s_mount_opt, XATTR_USER);
#endif
-#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL
+#ifdef CONFIG_EXT4_FS_POSIX_ACL
if (def_mount_opts & EXT4_DEFM_ACL)
set_opt(sbi->s_mount_opt, POSIX_ACL);
#endif
@@ -2012,11 +1972,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
ext4_warning(sb, __func__,
"extents feature not enabled on this filesystem, "
"use tune2fs.\n");
- /*
- * turn on mballoc code by default in ext4 filesystem
- * Use -o nomballoc to turn it off
- */
- set_opt(sbi->s_mount_opt, MBALLOC);
/*
* enable delayed allocation by default
@@ -2041,16 +1996,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
"running e2fsck is recommended\n");
/*
- * Since ext4 is still considered development code, we require
- * that the TEST_FILESYS flag in s->flags be set.
- */
- if (!(le32_to_cpu(es->s_flags) & EXT2_FLAGS_TEST_FILESYS)) {
- printk(KERN_WARNING "EXT4-fs: %s: not marked "
- "OK to use with test code.\n", sb->s_id);
- goto failed_mount;
- }
-
- /*
* Check feature flags regardless of the revision level, since we
* previously didn't change the revision level when setting the flags,
* so there is a chance incompat flags are set on a rev 0 filesystem.
@@ -2069,7 +2014,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
sb->s_id, le32_to_cpu(features));
goto failed_mount;
}
- if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_HUGE_FILE)) {
+ has_huge_files = EXT4_HAS_RO_COMPAT_FEATURE(sb,
+ EXT4_FEATURE_RO_COMPAT_HUGE_FILE);
+ if (has_huge_files) {
/*
* Large file size enabled file system can only be
* mount if kernel is build with CONFIG_LSF
@@ -2119,8 +2066,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
}
}
- sbi->s_bitmap_maxbytes = ext4_max_bitmap_size(sb->s_blocksize_bits);
- sb->s_maxbytes = ext4_max_size(sb->s_blocksize_bits);
+ sbi->s_bitmap_maxbytes = ext4_max_bitmap_size(sb->s_blocksize_bits,
+ has_huge_files);
+ sb->s_maxbytes = ext4_max_size(sb->s_blocksize_bits, has_huge_files);
if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV) {
sbi->s_inode_size = EXT4_GOOD_OLD_INODE_SIZE;
@@ -2219,6 +2167,16 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
goto failed_mount;
}
+#ifdef CONFIG_PROC_FS
+ if (ext4_proc_root)
+ sbi->s_proc = proc_mkdir(sb->s_id, ext4_proc_root);
+
+ if (sbi->s_proc)
+ proc_create_data("inode_readahead_blks", 0644, sbi->s_proc,
+ &ext4_ui_proc_fops,
+ &sbi->s_inode_readahead_blks);
+#endif
+
bgl_lock_init(&sbi->s_blockgroup_lock);
for (i = 0; i < db_count; i++) {
@@ -2257,24 +2215,14 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
err = percpu_counter_init(&sbi->s_dirs_counter,
ext4_count_dirs(sb));
}
+ if (!err) {
+ err = percpu_counter_init(&sbi->s_dirtyblocks_counter, 0);
+ }
if (err) {
printk(KERN_ERR "EXT4-fs: insufficient memory\n");
goto failed_mount3;
}
- /* per fileystem reservation list head & lock */
- spin_lock_init(&sbi->s_rsv_window_lock);
- sbi->s_rsv_window_root = RB_ROOT;
- /* Add a single, static dummy reservation to the start of the
- * reservation window list --- it gives us a placeholder for
- * append-at-start-of-list which makes the allocation logic
- * _much_ simpler. */
- sbi->s_rsv_window_head.rsv_start = EXT4_RESERVE_WINDOW_NOT_ALLOCATED;
- sbi->s_rsv_window_head.rsv_end = EXT4_RESERVE_WINDOW_NOT_ALLOCATED;
- sbi->s_rsv_window_head.rsv_alloc_hit = 0;
- sbi->s_rsv_window_head.rsv_goal_size = 0;
- ext4_rsv_window_add(sb, &sbi->s_rsv_window_head);
-
sbi->s_stripe = ext4_get_stripe_size(sbi);
/*
@@ -2444,6 +2392,21 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
"available.\n");
}
+ if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
+ printk(KERN_WARNING "EXT4-fs: Ignoring delalloc option - "
+ "requested data journaling mode\n");
+ clear_opt(sbi->s_mount_opt, DELALLOC);
+ } else if (test_opt(sb, DELALLOC))
+ printk(KERN_INFO "EXT4-fs: delayed allocation enabled\n");
+
+ ext4_ext_init(sb);
+ err = ext4_mb_init(sb, needs_recovery);
+ if (err) {
+ printk(KERN_ERR "EXT4-fs: failed to initalize mballoc (%d)\n",
+ err);
+ goto failed_mount4;
+ }
+
/*
* akpm: core read_super() calls in here with the superblock locked.
* That deadlocks, because orphan cleanup needs to lock the superblock
@@ -2463,16 +2426,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA ? "ordered":
"writeback");
- if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
- printk(KERN_WARNING "EXT4-fs: Ignoring delalloc option - "
- "requested data journaling mode\n");
- clear_opt(sbi->s_mount_opt, DELALLOC);
- } else if (test_opt(sb, DELALLOC))
- printk(KERN_INFO "EXT4-fs: delayed allocation enabled\n");
-
- ext4_ext_init(sb);
- ext4_mb_init(sb, needs_recovery);
-
lock_kernel();
return 0;
@@ -2489,11 +2442,16 @@ failed_mount3:
percpu_counter_destroy(&sbi->s_freeblocks_counter);
percpu_counter_destroy(&sbi->s_freeinodes_counter);
percpu_counter_destroy(&sbi->s_dirs_counter);
+ percpu_counter_destroy(&sbi->s_dirtyblocks_counter);
failed_mount2:
for (i = 0; i < db_count; i++)
brelse(sbi->s_group_desc[i]);
kfree(sbi->s_group_desc);
failed_mount:
+ if (sbi->s_proc) {
+ remove_proc_entry("inode_readahead_blks", sbi->s_proc);
+ remove_proc_entry(sb->s_id, ext4_proc_root);
+ }
#ifdef CONFIG_QUOTA
for (i = 0; i < MAXQUOTAS; i++)
kfree(sbi->s_qf_names[i]);
@@ -2527,6 +2485,10 @@ static void ext4_init_journal_params(struct super_block *sb, journal_t *journal)
journal->j_flags |= JBD2_BARRIER;
else
journal->j_flags &= ~JBD2_BARRIER;
+ if (test_opt(sb, DATA_ERR_ABORT))
+ journal->j_flags |= JBD2_ABORT_ON_SYNCDATA_ERR;
+ else
+ journal->j_flags &= ~JBD2_ABORT_ON_SYNCDATA_ERR;
spin_unlock(&journal->j_state_lock);
}
@@ -2552,7 +2514,7 @@ static journal_t *ext4_get_journal(struct super_block *sb,
return NULL;
}
- jbd_debug(2, "Journal inode found at %p: %Ld bytes\n",
+ jbd_debug(2, "Journal inode found at %p: %lld bytes\n",
journal_inode, journal_inode->i_size);
if (!S_ISREG(journal_inode->i_mode)) {
printk(KERN_ERR "EXT4-fs: invalid journal inode.\n");
@@ -2715,6 +2677,11 @@ static int ext4_load_journal(struct super_block *sb,
return -EINVAL;
}
+ if (journal->j_flags & JBD2_BARRIER)
+ printk(KERN_INFO "EXT4-fs: barriers enabled\n");
+ else
+ printk(KERN_INFO "EXT4-fs: barriers disabled\n");
+
if (!really_read_only && test_opt(sb, UPDATE_JOURNAL)) {
err = jbd2_journal_update_format(journal);
if (err) {
@@ -2799,13 +2766,34 @@ static void ext4_commit_super(struct super_block *sb,
if (!sbh)
return;
+ if (buffer_write_io_error(sbh)) {
+ /*
+ * Oh, dear. A previous attempt to write the
+ * superblock failed. This could happen because the
+ * USB device was yanked out. Or it could happen to
+ * be a transient write error and maybe the block will
+ * be remapped. Nothing we can do but to retry the
+ * write and hope for the best.
+ */
+ printk(KERN_ERR "ext4: previous I/O error to "
+ "superblock detected for %s.\n", sb->s_id);
+ clear_buffer_write_io_error(sbh);
+ set_buffer_uptodate(sbh);
+ }
es->s_wtime = cpu_to_le32(get_seconds());
ext4_free_blocks_count_set(es, ext4_count_free_blocks(sb));
es->s_free_inodes_count = cpu_to_le32(ext4_count_free_inodes(sb));
BUFFER_TRACE(sbh, "marking dirty");
mark_buffer_dirty(sbh);
- if (sync)
+ if (sync) {
sync_dirty_buffer(sbh);
+ if (buffer_write_io_error(sbh)) {
+ printk(KERN_ERR "ext4: I/O error while writing "
+ "superblock for %s.\n", sb->s_id);
+ clear_buffer_write_io_error(sbh);
+ set_buffer_uptodate(sbh);
+ }
+ }
}
@@ -2820,7 +2808,9 @@ static void ext4_mark_recovery_complete(struct super_block *sb,
journal_t *journal = EXT4_SB(sb)->s_journal;
jbd2_journal_lock_updates(journal);
- jbd2_journal_flush(journal);
+ if (jbd2_journal_flush(journal) < 0)
+ goto out;
+
lock_super(sb);
if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER) &&
sb->s_flags & MS_RDONLY) {
@@ -2829,6 +2819,8 @@ static void ext4_mark_recovery_complete(struct super_block *sb,
ext4_commit_super(sb, es, 1);
}
unlock_super(sb);
+
+out:
jbd2_journal_unlock_updates(journal);
}
@@ -2907,6 +2899,7 @@ static int ext4_sync_fs(struct super_block *sb, int wait)
{
tid_t target;
+ trace_mark(ext4_sync_fs, "dev %s wait %d", sb->s_id, wait);
sb->s_dirt = 0;
if (jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, &target)) {
if (wait)
@@ -2928,7 +2921,13 @@ static void ext4_write_super_lockfs(struct super_block *sb)
/* Now we set up the journal barrier. */
jbd2_journal_lock_updates(journal);
- jbd2_journal_flush(journal);
+
+ /*
+ * We don't want to clear needs_recovery flag when we failed
+ * to flush the journal.
+ */
+ if (jbd2_journal_flush(journal) < 0)
+ return;
/* Journal blocked and flushed, clear needs_recovery flag. */
EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
@@ -3162,7 +3161,8 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
buf->f_type = EXT4_SUPER_MAGIC;
buf->f_bsize = sb->s_blocksize;
buf->f_blocks = ext4_blocks_count(es) - sbi->s_overhead_last;
- buf->f_bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter);
+ buf->f_bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter) -
+ percpu_counter_sum_positive(&sbi->s_dirtyblocks_counter);
ext4_free_blocks_count_set(es, buf->f_bfree);
buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es);
if (buf->f_bfree < ext4_r_blocks_count(es))
@@ -3367,8 +3367,12 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
* otherwise be livelocked...
*/
jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
- jbd2_journal_flush(EXT4_SB(sb)->s_journal);
+ err = jbd2_journal_flush(EXT4_SB(sb)->s_journal);
jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
+ if (err) {
+ path_put(&nd.path);
+ return err;
+ }
}
err = vfs_quota_on_path(sb, type, format_id, &nd.path);
@@ -3432,7 +3436,7 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type,
handle_t *handle = journal_current_handle();
if (!handle) {
- printk(KERN_WARNING "EXT4-fs: Quota write (off=%Lu, len=%Lu)"
+ printk(KERN_WARNING "EXT4-fs: Quota write (off=%llu, len=%llu)"
" cancelled because transaction is not started.\n",
(unsigned long long)off, (unsigned long long)len);
return -EIO;
@@ -3493,18 +3497,82 @@ static int ext4_get_sb(struct file_system_type *fs_type,
return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super, mnt);
}
+#ifdef CONFIG_PROC_FS
+static int ext4_ui_proc_show(struct seq_file *m, void *v)
+{
+ unsigned int *p = m->private;
+
+ seq_printf(m, "%u\n", *p);
+ return 0;
+}
+
+static int ext4_ui_proc_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, ext4_ui_proc_show, PDE(inode)->data);
+}
+
+static ssize_t ext4_ui_proc_write(struct file *file, const char __user *buf,
+ size_t cnt, loff_t *ppos)
+{
+ unsigned int *p = PDE(file->f_path.dentry->d_inode)->data;
+ char str[32];
+ unsigned long value;
+
+ if (cnt >= sizeof(str))
+ return -EINVAL;
+ if (copy_from_user(str, buf, cnt))
+ return -EFAULT;
+ value = simple_strtol(str, NULL, 0);
+ if (value < 0)
+ return -ERANGE;
+ *p = value;
+ return cnt;
+}
+
+const struct file_operations ext4_ui_proc_fops = {
+ .owner = THIS_MODULE,
+ .open = ext4_ui_proc_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+ .write = ext4_ui_proc_write,
+};
+#endif
+
+static struct file_system_type ext4_fs_type = {
+ .owner = THIS_MODULE,
+ .name = "ext4",
+ .get_sb = ext4_get_sb,
+ .kill_sb = kill_block_super,
+ .fs_flags = FS_REQUIRES_DEV,
+};
+
+#ifdef CONFIG_EXT4DEV_COMPAT
+static int ext4dev_get_sb(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+{
+ printk(KERN_WARNING "EXT4-fs: Update your userspace programs "
+ "to mount using ext4\n");
+ printk(KERN_WARNING "EXT4-fs: ext4dev backwards compatibility "
+ "will go away by 2.6.31\n");
+ return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super, mnt);
+}
+
static struct file_system_type ext4dev_fs_type = {
.owner = THIS_MODULE,
.name = "ext4dev",
- .get_sb = ext4_get_sb,
+ .get_sb = ext4dev_get_sb,
.kill_sb = kill_block_super,
.fs_flags = FS_REQUIRES_DEV,
};
+MODULE_ALIAS("ext4dev");
+#endif
static int __init init_ext4_fs(void)
{
int err;
+ ext4_proc_root = proc_mkdir("fs/ext4", NULL);
err = init_ext4_mballoc();
if (err)
return err;
@@ -3515,9 +3583,16 @@ static int __init init_ext4_fs(void)
err = init_inodecache();
if (err)
goto out1;
- err = register_filesystem(&ext4dev_fs_type);
+ err = register_filesystem(&ext4_fs_type);
if (err)
goto out;
+#ifdef CONFIG_EXT4DEV_COMPAT
+ err = register_filesystem(&ext4dev_fs_type);
+ if (err) {
+ unregister_filesystem(&ext4_fs_type);
+ goto out;
+ }
+#endif
return 0;
out:
destroy_inodecache();
@@ -3530,10 +3605,14 @@ out2:
static void __exit exit_ext4_fs(void)
{
+ unregister_filesystem(&ext4_fs_type);
+#ifdef CONFIG_EXT4DEV_COMPAT
unregister_filesystem(&ext4dev_fs_type);
+#endif
destroy_inodecache();
exit_ext4_xattr();
exit_ext4_mballoc();
+ remove_proc_entry("fs/ext4", NULL);
}
MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
diff --git a/fs/ext4/symlink.c b/fs/ext4/symlink.c
index e9178643dc0..00740cb32be 100644
--- a/fs/ext4/symlink.c
+++ b/fs/ext4/symlink.c
@@ -23,10 +23,10 @@
#include "ext4.h"
#include "xattr.h"
-static void * ext4_follow_link(struct dentry *dentry, struct nameidata *nd)
+static void *ext4_follow_link(struct dentry *dentry, struct nameidata *nd)
{
struct ext4_inode_info *ei = EXT4_I(dentry->d_inode);
- nd_set_link(nd, (char*)ei->i_data);
+ nd_set_link(nd, (char *) ei->i_data);
return NULL;
}
@@ -34,7 +34,7 @@ const struct inode_operations ext4_symlink_inode_operations = {
.readlink = generic_readlink,
.follow_link = page_follow_link_light,
.put_link = page_put_link,
-#ifdef CONFIG_EXT4DEV_FS_XATTR
+#ifdef CONFIG_EXT4_FS_XATTR
.setxattr = generic_setxattr,
.getxattr = generic_getxattr,
.listxattr = ext4_listxattr,
@@ -45,7 +45,7 @@ const struct inode_operations ext4_symlink_inode_operations = {
const struct inode_operations ext4_fast_symlink_inode_operations = {
.readlink = generic_readlink,
.follow_link = ext4_follow_link,
-#ifdef CONFIG_EXT4DEV_FS_XATTR
+#ifdef CONFIG_EXT4_FS_XATTR
.setxattr = generic_setxattr,
.getxattr = generic_getxattr,
.listxattr = ext4_listxattr,
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 8954208b489..80626d516fe 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -99,12 +99,12 @@ static struct mb_cache *ext4_xattr_cache;
static struct xattr_handler *ext4_xattr_handler_map[] = {
[EXT4_XATTR_INDEX_USER] = &ext4_xattr_user_handler,
-#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL
+#ifdef CONFIG_EXT4_FS_POSIX_ACL
[EXT4_XATTR_INDEX_POSIX_ACL_ACCESS] = &ext4_xattr_acl_access_handler,
[EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT] = &ext4_xattr_acl_default_handler,
#endif
[EXT4_XATTR_INDEX_TRUSTED] = &ext4_xattr_trusted_handler,
-#ifdef CONFIG_EXT4DEV_FS_SECURITY
+#ifdef CONFIG_EXT4_FS_SECURITY
[EXT4_XATTR_INDEX_SECURITY] = &ext4_xattr_security_handler,
#endif
};
@@ -112,11 +112,11 @@ static struct xattr_handler *ext4_xattr_handler_map[] = {
struct xattr_handler *ext4_xattr_handlers[] = {
&ext4_xattr_user_handler,
&ext4_xattr_trusted_handler,
-#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL
+#ifdef CONFIG_EXT4_FS_POSIX_ACL
&ext4_xattr_acl_access_handler,
&ext4_xattr_acl_default_handler,
#endif
-#ifdef CONFIG_EXT4DEV_FS_SECURITY
+#ifdef CONFIG_EXT4_FS_SECURITY
&ext4_xattr_security_handler,
#endif
NULL
@@ -959,6 +959,7 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
struct ext4_xattr_block_find bs = {
.s = { .not_found = -ENODATA, },
};
+ unsigned long no_expand;
int error;
if (!name)
@@ -966,6 +967,9 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
if (strlen(name) > 255)
return -ERANGE;
down_write(&EXT4_I(inode)->xattr_sem);
+ no_expand = EXT4_I(inode)->i_state & EXT4_STATE_NO_EXPAND;
+ EXT4_I(inode)->i_state |= EXT4_STATE_NO_EXPAND;
+
error = ext4_get_inode_loc(inode, &is.iloc);
if (error)
goto cleanup;
@@ -1042,6 +1046,8 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
cleanup:
brelse(is.iloc.bh);
brelse(bs.bh);
+ if (no_expand == 0)
+ EXT4_I(inode)->i_state &= ~EXT4_STATE_NO_EXPAND;
up_write(&EXT4_I(inode)->xattr_sem);
return error;
}
diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h
index 5992fe979bb..8ede88b18c2 100644
--- a/fs/ext4/xattr.h
+++ b/fs/ext4/xattr.h
@@ -51,8 +51,8 @@ struct ext4_xattr_entry {
(((name_len) + EXT4_XATTR_ROUND + \
sizeof(struct ext4_xattr_entry)) & ~EXT4_XATTR_ROUND)
#define EXT4_XATTR_NEXT(entry) \
- ( (struct ext4_xattr_entry *)( \
- (char *)(entry) + EXT4_XATTR_LEN((entry)->e_name_len)) )
+ ((struct ext4_xattr_entry *)( \
+ (char *)(entry) + EXT4_XATTR_LEN((entry)->e_name_len)))
#define EXT4_XATTR_SIZE(size) \
(((size) + EXT4_XATTR_ROUND) & ~EXT4_XATTR_ROUND)
@@ -63,7 +63,7 @@ struct ext4_xattr_entry {
EXT4_I(inode)->i_extra_isize))
#define IFIRST(hdr) ((struct ext4_xattr_entry *)((hdr)+1))
-# ifdef CONFIG_EXT4DEV_FS_XATTR
+# ifdef CONFIG_EXT4_FS_XATTR
extern struct xattr_handler ext4_xattr_user_handler;
extern struct xattr_handler ext4_xattr_trusted_handler;
@@ -88,7 +88,7 @@ extern void exit_ext4_xattr(void);
extern struct xattr_handler *ext4_xattr_handlers[];
-# else /* CONFIG_EXT4DEV_FS_XATTR */
+# else /* CONFIG_EXT4_FS_XATTR */
static inline int
ext4_xattr_get(struct inode *inode, int name_index, const char *name,
@@ -141,9 +141,9 @@ ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
#define ext4_xattr_handlers NULL
-# endif /* CONFIG_EXT4DEV_FS_XATTR */
+# endif /* CONFIG_EXT4_FS_XATTR */
-#ifdef CONFIG_EXT4DEV_FS_SECURITY
+#ifdef CONFIG_EXT4_FS_SECURITY
extern int ext4_init_security(handle_t *handle, struct inode *inode,
struct inode *dir);
#else