From a219ce3748bbc596cec85c44754b3f6b994f1e1d Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 12 Jan 2009 19:03:19 +0100 Subject: ext3: Remove unnecessary quota functions ext3_dquot_initialize() and ext3_dquot_drop() is no longer needed because of modified quota locking. Signed-off-by: Jan Kara --- fs/ext3/super.c | 44 ++------------------------------------------ 1 file changed, 2 insertions(+), 42 deletions(-) (limited to 'fs/ext3') diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 4a970411a45..41e6ae605e0 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -707,8 +707,6 @@ static int bdev_try_to_free_page(struct super_block *sb, struct page *page, #define QTYPE2NAME(t) ((t)==USRQUOTA?"user":"group") #define QTYPE2MOPT(on, t) ((t)==USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA)) -static int ext3_dquot_initialize(struct inode *inode, int type); -static int ext3_dquot_drop(struct inode *inode); static int ext3_write_dquot(struct dquot *dquot); static int ext3_acquire_dquot(struct dquot *dquot); static int ext3_release_dquot(struct dquot *dquot); @@ -723,8 +721,8 @@ static ssize_t ext3_quota_write(struct super_block *sb, int type, const char *data, size_t len, loff_t off); static struct dquot_operations ext3_quota_operations = { - .initialize = ext3_dquot_initialize, - .drop = ext3_dquot_drop, + .initialize = dquot_initialize, + .drop = dquot_drop, .alloc_space = dquot_alloc_space, .alloc_inode = dquot_alloc_inode, .free_space = dquot_free_space, @@ -2714,44 +2712,6 @@ static inline struct inode *dquot_to_inode(struct dquot *dquot) return sb_dqopt(dquot->dq_sb)->files[dquot->dq_type]; } -static int ext3_dquot_initialize(struct inode *inode, int type) -{ - handle_t *handle; - int ret, err; - - /* We may create quota structure so we need to reserve enough blocks */ - handle = ext3_journal_start(inode, 2*EXT3_QUOTA_INIT_BLOCKS(inode->i_sb)); - if (IS_ERR(handle)) - return PTR_ERR(handle); - ret = dquot_initialize(inode, type); - err = ext3_journal_stop(handle); - if (!ret) - ret = err; - return ret; -} - -static int ext3_dquot_drop(struct inode *inode) -{ - handle_t *handle; - int ret, err; - - /* We may delete quota structure so we need to reserve enough blocks */ - handle = ext3_journal_start(inode, 2*EXT3_QUOTA_DEL_BLOCKS(inode->i_sb)); - if (IS_ERR(handle)) { - /* - * We call dquot_drop() anyway to at least release references - * to quota structures so that umount does not hang. - */ - dquot_drop(inode); - return PTR_ERR(handle); - } - ret = dquot_drop(inode); - err = ext3_journal_stop(handle); - if (!ret) - ret = err; - return ret; -} - static int ext3_write_dquot(struct dquot *dquot) { int ret, err; -- cgit v1.2.3-70-g09d2 From 81a052273998f94b098945c4c313e05246956eb2 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 26 Jan 2009 16:58:01 +0100 Subject: ext3: Use lowercase names of quota functions Use lowercase names of quota functions instead of old uppercase ones. Signed-off-by: Jan Kara CC: linux-ext4@vger.kernel.org --- fs/ext3/balloc.c | 8 ++++---- fs/ext3/ialloc.c | 12 ++++++------ fs/ext3/inode.c | 6 +++--- fs/ext3/namei.c | 6 +++--- fs/ext3/super.c | 4 ++-- fs/ext3/xattr.c | 6 +++--- 6 files changed, 21 insertions(+), 21 deletions(-) (limited to 'fs/ext3') diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c index 0dbf1c04847..225202db897 100644 --- a/fs/ext3/balloc.c +++ b/fs/ext3/balloc.c @@ -676,7 +676,7 @@ void ext3_free_blocks(handle_t *handle, struct inode *inode, } ext3_free_blocks_sb(handle, sb, block, count, &dquot_freed_blocks); if (dquot_freed_blocks) - DQUOT_FREE_BLOCK(inode, dquot_freed_blocks); + vfs_dq_free_block(inode, dquot_freed_blocks); return; } @@ -1502,7 +1502,7 @@ ext3_fsblk_t ext3_new_blocks(handle_t *handle, struct inode *inode, /* * Check quota for allocation of this block. */ - if (DQUOT_ALLOC_BLOCK(inode, num)) { + if (vfs_dq_alloc_block(inode, num)) { *errp = -EDQUOT; return 0; } @@ -1714,7 +1714,7 @@ allocated: *errp = 0; brelse(bitmap_bh); - DQUOT_FREE_BLOCK(inode, *count-num); + vfs_dq_free_block(inode, *count-num); *count = num; return ret_block; @@ -1729,7 +1729,7 @@ out: * Undo the block allocation */ if (!performed_allocation) - DQUOT_FREE_BLOCK(inode, *count); + vfs_dq_free_block(inode, *count); brelse(bitmap_bh); return 0; } diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c index 8de6c720e51..dd13d60d524 100644 --- a/fs/ext3/ialloc.c +++ b/fs/ext3/ialloc.c @@ -123,10 +123,10 @@ void ext3_free_inode (handle_t *handle, struct inode * inode) * Note: we must free any quota before locking the superblock, * as writing the quota to disk may need the lock as well. */ - DQUOT_INIT(inode); + vfs_dq_init(inode); ext3_xattr_delete_inode(handle, inode); - DQUOT_FREE_INODE(inode); - DQUOT_DROP(inode); + vfs_dq_free_inode(inode); + vfs_dq_drop(inode); is_directory = S_ISDIR(inode->i_mode); @@ -589,7 +589,7 @@ got: sizeof(struct ext3_inode) - EXT3_GOOD_OLD_INODE_SIZE : 0; ret = inode; - if(DQUOT_ALLOC_INODE(inode)) { + if (vfs_dq_alloc_inode(inode)) { err = -EDQUOT; goto fail_drop; } @@ -620,10 +620,10 @@ really_out: return ret; fail_free_drop: - DQUOT_FREE_INODE(inode); + vfs_dq_free_inode(inode); fail_drop: - DQUOT_DROP(inode); + vfs_dq_drop(inode); inode->i_flags |= S_NOQUOTA; inode->i_nlink = 0; unlock_new_inode(inode); diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 5fa453b49a6..c8f9bd30882 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -3055,7 +3055,7 @@ int ext3_setattr(struct dentry *dentry, struct iattr *attr) error = PTR_ERR(handle); goto err_out; } - error = DQUOT_TRANSFER(inode, attr) ? -EDQUOT : 0; + error = vfs_dq_transfer(inode, attr) ? -EDQUOT : 0; if (error) { ext3_journal_stop(handle); return error; @@ -3146,7 +3146,7 @@ static int ext3_writepage_trans_blocks(struct inode *inode) ret = 2 * (bpp + indirects) + 2; #ifdef CONFIG_QUOTA - /* We know that structure was already allocated during DQUOT_INIT so + /* We know that structure was already allocated during vfs_dq_init so * we will be updating only the data blocks + inodes */ ret += 2*EXT3_QUOTA_TRANS_BLOCKS(inode->i_sb); #endif @@ -3237,7 +3237,7 @@ int ext3_mark_inode_dirty(handle_t *handle, struct inode *inode) * i_size has been changed by generic_commit_write() and we thus need * to include the updated inode in the current transaction. * - * Also, DQUOT_ALLOC_SPACE() will always dirty the inode when blocks + * Also, vfs_dq_alloc_space() will always dirty the inode when blocks * are allocated to the file. * * If the inode is marked synchronous, we don't honour that here - doing diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c index 4db4ffa1eda..e2fc63cbba8 100644 --- a/fs/ext3/namei.c +++ b/fs/ext3/namei.c @@ -2049,7 +2049,7 @@ static int ext3_rmdir (struct inode * dir, struct dentry *dentry) /* Initialize quotas before so that eventual writes go in * separate transaction */ - DQUOT_INIT(dentry->d_inode); + vfs_dq_init(dentry->d_inode); handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS(dir->i_sb)); if (IS_ERR(handle)) return PTR_ERR(handle); @@ -2108,7 +2108,7 @@ static int ext3_unlink(struct inode * dir, struct dentry *dentry) /* Initialize quotas before so that eventual writes go * in separate transaction */ - DQUOT_INIT(dentry->d_inode); + vfs_dq_init(dentry->d_inode); handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS(dir->i_sb)); if (IS_ERR(handle)) return PTR_ERR(handle); @@ -2272,7 +2272,7 @@ static int ext3_rename (struct inode * old_dir, struct dentry *old_dentry, /* Initialize quotas before so that eventual writes go * in separate transaction */ if (new_dentry->d_inode) - DQUOT_INIT(new_dentry->d_inode); + vfs_dq_init(new_dentry->d_inode); handle = ext3_journal_start(old_dir, 2 * EXT3_DATA_TRANS_BLOCKS(old_dir->i_sb) + EXT3_INDEX_EXTRA_TRANS_BLOCKS + 2); diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 41e6ae605e0..9e5b8e387e1 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -1436,7 +1436,7 @@ static void ext3_orphan_cleanup (struct super_block * sb, } list_add(&EXT3_I(inode)->i_orphan, &EXT3_SB(sb)->s_orphan); - DQUOT_INIT(inode); + vfs_dq_init(inode); if (inode->i_nlink) { printk(KERN_DEBUG "%s: truncating inode %lu to %Ld bytes\n", @@ -2700,7 +2700,7 @@ static int ext3_statfs (struct dentry * dentry, struct kstatfs * buf) * Process 1 Process 2 * ext3_create() quota_sync() * journal_start() write_dquot() - * DQUOT_INIT() down(dqio_mutex) + * vfs_dq_init() down(dqio_mutex) * down(dqio_mutex) journal_start() * */ diff --git a/fs/ext3/xattr.c b/fs/ext3/xattr.c index 175414ac221..83b7be849bd 100644 --- a/fs/ext3/xattr.c +++ b/fs/ext3/xattr.c @@ -498,7 +498,7 @@ ext3_xattr_release_block(handle_t *handle, struct inode *inode, error = ext3_journal_dirty_metadata(handle, bh); if (IS_SYNC(inode)) handle->h_sync = 1; - DQUOT_FREE_BLOCK(inode, 1); + vfs_dq_free_block(inode, 1); ea_bdebug(bh, "refcount now=%d; releasing", le32_to_cpu(BHDR(bh)->h_refcount)); if (ce) @@ -774,7 +774,7 @@ inserted: /* The old block is released after updating the inode. */ error = -EDQUOT; - if (DQUOT_ALLOC_BLOCK(inode, 1)) + if (vfs_dq_alloc_block(inode, 1)) goto cleanup; error = ext3_journal_get_write_access(handle, new_bh); @@ -848,7 +848,7 @@ cleanup: return error; cleanup_dquot: - DQUOT_FREE_BLOCK(inode, 1); + vfs_dq_free_block(inode, 1); goto cleanup; bad_block: -- cgit v1.2.3-70-g09d2 From 9e80d407736161d9b8b0c5a0d44f786e44c322ea Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 26 Mar 2009 13:08:04 +0100 Subject: ext3: Avoid starting a transaction in writepage when not necessary We don't have to start a transaction in writepage() when all the blocks are a properly allocated. Even in ordered mode either the data has been written via write() and they are thus already added to transaction's list or the data was written via mmap and then it's random in which transaction they get written anyway. This should help VM to pageout dirty memory without blocking on transaction commits. Signed-off-by: Jan Kara Signed-off-by: Linus Torvalds --- fs/ext3/inode.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) (limited to 'fs/ext3') diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 5fa453b49a6..05e5c2e5c0d 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -1435,6 +1435,10 @@ static int journal_dirty_data_fn(handle_t *handle, struct buffer_head *bh) return 0; } +static int buffer_unmapped(handle_t *handle, struct buffer_head *bh) +{ + return !buffer_mapped(bh); +} /* * Note that we always start a transaction even if we're not journalling * data. This is to preserve ordering: any hole instantiation within @@ -1505,6 +1509,15 @@ static int ext3_ordered_writepage(struct page *page, if (ext3_journal_current_handle()) goto out_fail; + if (!page_has_buffers(page)) { + create_empty_buffers(page, inode->i_sb->s_blocksize, + (1 << BH_Dirty)|(1 << BH_Uptodate)); + } else if (!walk_page_buffers(NULL, page_buffers(page), 0, PAGE_CACHE_SIZE, NULL, buffer_unmapped)) { + /* Provide NULL instead of get_block so that we catch bugs if buffers weren't really mapped */ + return block_write_full_page(page, NULL, wbc); + } + page_bufs = page_buffers(page); + handle = ext3_journal_start(inode, ext3_writepage_trans_blocks(inode)); if (IS_ERR(handle)) { @@ -1512,11 +1525,6 @@ static int ext3_ordered_writepage(struct page *page, goto out_fail; } - if (!page_has_buffers(page)) { - create_empty_buffers(page, inode->i_sb->s_blocksize, - (1 << BH_Dirty)|(1 << BH_Uptodate)); - } - page_bufs = page_buffers(page); walk_page_buffers(handle, page_bufs, 0, PAGE_CACHE_SIZE, NULL, bget_one); -- cgit v1.2.3-70-g09d2 From ce3b0f8d5c2203301fc87f3aaaed73e5819e2a48 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 29 Mar 2009 19:08:22 -0400 Subject: New helper - current_umask() current->fs->umask is what most of fs_struct users are doing. Put that into a helper function. Signed-off-by: Al Viro --- arch/powerpc/platforms/cell/spufs/inode.c | 2 +- fs/btrfs/acl.c | 2 +- fs/btrfs/ioctl.c | 2 +- fs/cifs/dir.c | 4 ++-- fs/cifs/inode.c | 4 ++-- fs/ext2/acl.c | 2 +- fs/ext3/acl.c | 2 +- fs/ext4/acl.c | 2 +- fs/fat/inode.c | 2 +- fs/fs_struct.c | 6 ++++++ fs/generic_acl.c | 2 +- fs/gfs2/acl.c | 2 +- fs/hfsplus/options.c | 2 +- fs/hpfs/super.c | 2 +- fs/jffs2/acl.c | 2 +- fs/jfs/acl.c | 2 +- fs/namei.c | 6 +++--- fs/nfs/nfs3proc.c | 6 +++--- fs/nfs/nfs4proc.c | 2 +- fs/ocfs2/acl.c | 2 +- fs/omfs/inode.c | 2 +- fs/reiserfs/xattr_acl.c | 2 +- fs/xfs/linux-2.6/xfs_iops.c | 4 ++-- include/linux/fs.h | 2 ++ ipc/mqueue.c | 2 +- net/unix/af_unix.c | 2 +- 26 files changed, 39 insertions(+), 31 deletions(-) (limited to 'fs/ext3') diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c index 64f068540d0..706eb5c7e2e 100644 --- a/arch/powerpc/platforms/cell/spufs/inode.c +++ b/arch/powerpc/platforms/cell/spufs/inode.c @@ -635,7 +635,7 @@ long spufs_create(struct nameidata *nd, unsigned int flags, mode_t mode, if (dentry->d_inode) goto out_dput; - mode &= ~current->fs->umask; + mode &= ~current_umask(); if (flags & SPU_CREATE_GANG) ret = spufs_create_gang(nd->path.dentry->d_inode, diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c index 1d53b62dbba..7fdd184a528 100644 --- a/fs/btrfs/acl.c +++ b/fs/btrfs/acl.c @@ -256,7 +256,7 @@ int btrfs_init_acl(struct inode *inode, struct inode *dir) } if (!acl) - inode->i_mode &= ~current->fs->umask; + inode->i_mode &= ~current_umask(); } if (IS_POSIXACL(dir) && acl) { diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index bca729fc80c..7594bec1be1 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -267,7 +267,7 @@ static noinline int btrfs_mksubvol(struct path *parent, char *name, goto out_dput; if (!IS_POSIXACL(parent->dentry->d_inode)) - mode &= ~current->fs->umask; + mode &= ~current_umask(); error = mnt_want_write(parent->mnt); if (error) diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index 2f35cccfcd8..54dce78fbb7 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -254,7 +254,7 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode, return -ENOMEM; } - mode &= ~current->fs->umask; + mode &= ~current_umask(); if (oplockEnabled) oplock = REQ_OPLOCK; @@ -479,7 +479,7 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode, rc = -ENOMEM; else if (pTcon->unix_ext) { struct cifs_unix_set_info_args args = { - .mode = mode & ~current->fs->umask, + .mode = mode & ~current_umask(), .ctime = NO_CHANGE_64, .atime = NO_CHANGE_64, .mtime = NO_CHANGE_64, diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index a8797cc6080..f121a80fdd6 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -1125,7 +1125,7 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode) goto mkdir_out; } - mode &= ~current->fs->umask; + mode &= ~current_umask(); rc = CIFSPOSIXCreate(xid, pTcon, SMB_O_DIRECTORY | SMB_O_CREAT, mode, NULL /* netfid */, pInfo, &oplock, full_path, cifs_sb->local_nls, @@ -1204,7 +1204,7 @@ mkdir_get_info: if ((direntry->d_inode) && (direntry->d_inode->i_nlink < 2)) direntry->d_inode->i_nlink = 2; - mode &= ~current->fs->umask; + mode &= ~current_umask(); /* must turn on setgid bit if parent dir has it */ if (inode->i_mode & S_ISGID) mode |= S_ISGID; diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c index ae8c4f850b2..d46e38cb85c 100644 --- a/fs/ext2/acl.c +++ b/fs/ext2/acl.c @@ -318,7 +318,7 @@ ext2_init_acl(struct inode *inode, struct inode *dir) return PTR_ERR(acl); } if (!acl) - inode->i_mode &= ~current->fs->umask; + inode->i_mode &= ~current_umask(); } if (test_opt(inode->i_sb, POSIX_ACL) && acl) { struct posix_acl *clone; diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c index b60bb241880..d81ef2fdb08 100644 --- a/fs/ext3/acl.c +++ b/fs/ext3/acl.c @@ -323,7 +323,7 @@ ext3_init_acl(handle_t *handle, struct inode *inode, struct inode *dir) return PTR_ERR(acl); } if (!acl) - inode->i_mode &= ~current->fs->umask; + inode->i_mode &= ~current_umask(); } if (test_opt(inode->i_sb, POSIX_ACL) && acl) { struct posix_acl *clone; diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c index 694ed6fadcc..647e0d65a28 100644 --- a/fs/ext4/acl.c +++ b/fs/ext4/acl.c @@ -323,7 +323,7 @@ ext4_init_acl(handle_t *handle, struct inode *inode, struct inode *dir) return PTR_ERR(acl); } if (!acl) - inode->i_mode &= ~current->fs->umask; + inode->i_mode &= ~current_umask(); } if (test_opt(inode->i_sb, POSIX_ACL) && acl) { struct posix_acl *clone; diff --git a/fs/fat/inode.c b/fs/fat/inode.c index de0004fe6e0..ab657db4c94 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -930,7 +930,7 @@ static int parse_options(char *options, int is_vfat, int silent, int *debug, opts->fs_uid = current_uid(); opts->fs_gid = current_gid(); - opts->fs_fmask = opts->fs_dmask = current->fs->umask; + opts->fs_fmask = current_umask(); opts->allow_utime = -1; opts->codepage = fat_default_codepage; opts->iocharset = fat_default_iocharset; diff --git a/fs/fs_struct.c b/fs/fs_struct.c index 41cff72b377..6ac21933867 100644 --- a/fs/fs_struct.c +++ b/fs/fs_struct.c @@ -138,6 +138,12 @@ int unshare_fs_struct(void) } EXPORT_SYMBOL_GPL(unshare_fs_struct); +int current_umask(void) +{ + return current->fs->umask; +} +EXPORT_SYMBOL(current_umask); + /* to be mentioned only in INIT_TASK */ struct fs_struct init_fs = { .users = 1, diff --git a/fs/generic_acl.c b/fs/generic_acl.c index 995d63b2e74..e0b53aa7bbe 100644 --- a/fs/generic_acl.c +++ b/fs/generic_acl.c @@ -134,7 +134,7 @@ generic_acl_init(struct inode *inode, struct inode *dir, mode_t mode = inode->i_mode; int error; - inode->i_mode = mode & ~current->fs->umask; + inode->i_mode = mode & ~current_umask(); if (!S_ISLNK(inode->i_mode)) acl = ops->getacl(dir, ACL_TYPE_DEFAULT); if (acl) { diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c index 43764f4fa76..fa881bdc3d8 100644 --- a/fs/gfs2/acl.c +++ b/fs/gfs2/acl.c @@ -215,7 +215,7 @@ int gfs2_acl_create(struct gfs2_inode *dip, struct gfs2_inode *ip) if (error) return error; if (!acl) { - mode &= ~current->fs->umask; + mode &= ~current_umask(); if (mode != ip->i_inode.i_mode) error = munge_mode(ip, mode); return error; diff --git a/fs/hfsplus/options.c b/fs/hfsplus/options.c index bab7f8d1bdf..3fcbb0e1f6f 100644 --- a/fs/hfsplus/options.c +++ b/fs/hfsplus/options.c @@ -48,7 +48,7 @@ void hfsplus_fill_defaults(struct hfsplus_sb_info *opts) opts->creator = HFSPLUS_DEF_CR_TYPE; opts->type = HFSPLUS_DEF_CR_TYPE; - opts->umask = current->fs->umask; + opts->umask = current_umask(); opts->uid = current_uid(); opts->gid = current_gid(); opts->part = -1; diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c index 0d049b8919c..c696d01bc8f 100644 --- a/fs/hpfs/super.c +++ b/fs/hpfs/super.c @@ -477,7 +477,7 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent) uid = current_uid(); gid = current_gid(); - umask = current->fs->umask; + umask = current_umask(); lowercase = 0; conv = CONV_BINARY; eas = 2; diff --git a/fs/jffs2/acl.c b/fs/jffs2/acl.c index d98713777a1..77ccf8cb082 100644 --- a/fs/jffs2/acl.c +++ b/fs/jffs2/acl.c @@ -336,7 +336,7 @@ int jffs2_init_acl_pre(struct inode *dir_i, struct inode *inode, int *i_mode) return PTR_ERR(acl); if (!acl) { - *i_mode &= ~current->fs->umask; + *i_mode &= ~current_umask(); } else { if (S_ISDIR(*i_mode)) jffs2_iset_acl(inode, &f->i_acl_default, acl); diff --git a/fs/jfs/acl.c b/fs/jfs/acl.c index a166c1669e8..06ca1b8d205 100644 --- a/fs/jfs/acl.c +++ b/fs/jfs/acl.c @@ -182,7 +182,7 @@ int jfs_init_acl(tid_t tid, struct inode *inode, struct inode *dir) cleanup: posix_acl_release(acl); } else - inode->i_mode &= ~current->fs->umask; + inode->i_mode &= ~current_umask(); JFS_IP(inode)->mode2 = (JFS_IP(inode)->mode2 & 0xffff0000) | inode->i_mode; diff --git a/fs/namei.c b/fs/namei.c index 4c65a646013..964c0249444 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1578,7 +1578,7 @@ static int __open_namei_create(struct nameidata *nd, struct path *path, struct dentry *dir = nd->path.dentry; if (!IS_POSIXACL(dir->d_inode)) - mode &= ~current->fs->umask; + mode &= ~current_umask(); error = security_path_mknod(&nd->path, path->dentry, mode, 0); if (error) goto out_unlock; @@ -1989,7 +1989,7 @@ SYSCALL_DEFINE4(mknodat, int, dfd, const char __user *, filename, int, mode, goto out_unlock; } if (!IS_POSIXACL(nd.path.dentry->d_inode)) - mode &= ~current->fs->umask; + mode &= ~current_umask(); error = may_mknod(mode); if (error) goto out_dput; @@ -2067,7 +2067,7 @@ SYSCALL_DEFINE3(mkdirat, int, dfd, const char __user *, pathname, int, mode) goto out_unlock; if (!IS_POSIXACL(nd.path.dentry->d_inode)) - mode &= ~current->fs->umask; + mode &= ~current_umask(); error = mnt_want_write(nd.path.mnt); if (error) goto out_dput; diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index c55be7a7679..e47d4400fb8 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -328,7 +328,7 @@ nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, data->arg.create.verifier[1] = current->pid; } - sattr->ia_mode &= ~current->fs->umask; + sattr->ia_mode &= ~current_umask(); for (;;) { status = nfs3_do_create(dir, dentry, data); @@ -528,7 +528,7 @@ nfs3_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr) dprintk("NFS call mkdir %s\n", dentry->d_name.name); - sattr->ia_mode &= ~current->fs->umask; + sattr->ia_mode &= ~current_umask(); data = nfs3_alloc_createdata(); if (data == NULL) @@ -639,7 +639,7 @@ nfs3_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr, dprintk("NFS call mknod %s %u:%u\n", dentry->d_name.name, MAJOR(rdev), MINOR(rdev)); - sattr->ia_mode &= ~current->fs->umask; + sattr->ia_mode &= ~current_umask(); data = nfs3_alloc_createdata(); if (data == NULL) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 8dde84b988d..bbee587dd59 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1509,7 +1509,7 @@ nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd) attr.ia_mode = nd->intent.open.create_mode; attr.ia_valid = ATTR_MODE; if (!IS_POSIXACL(dir)) - attr.ia_mode &= ~current->fs->umask; + attr.ia_mode &= ~current_umask(); } else { attr.ia_valid = 0; BUG_ON(nd->intent.open.flags & O_CREAT); diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c index 12dfb44c22e..fbeaec76210 100644 --- a/fs/ocfs2/acl.c +++ b/fs/ocfs2/acl.c @@ -296,7 +296,7 @@ int ocfs2_init_acl(handle_t *handle, return PTR_ERR(acl); } if (!acl) - inode->i_mode &= ~current->fs->umask; + inode->i_mode &= ~current_umask(); } if ((osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) && acl) { struct posix_acl *clone; diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c index 633e9dc972b..aa6fc30772a 100644 --- a/fs/omfs/inode.c +++ b/fs/omfs/inode.c @@ -421,7 +421,7 @@ static int omfs_fill_super(struct super_block *sb, void *data, int silent) sbi->s_uid = current_uid(); sbi->s_gid = current_gid(); - sbi->s_dmask = sbi->s_fmask = current->fs->umask; + sbi->s_dmask = sbi->s_fmask = current_umask(); if (!parse_options((char *) data, sbi)) goto end; diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c index d423416d93d..c303c426fe2 100644 --- a/fs/reiserfs/xattr_acl.c +++ b/fs/reiserfs/xattr_acl.c @@ -428,7 +428,7 @@ reiserfs_inherit_default_acl(struct reiserfs_transaction_handle *th, } else { apply_umask: /* no ACL, apply umask */ - inode->i_mode &= ~current->fs->umask; + inode->i_mode &= ~current_umask(); } return err; diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c index 7aa53fefc67..2940612e3ae 100644 --- a/fs/xfs/linux-2.6/xfs_iops.c +++ b/fs/xfs/linux-2.6/xfs_iops.c @@ -227,7 +227,7 @@ xfs_vn_mknod( xfs_dentry_to_name(&name, dentry); if (IS_POSIXACL(dir) && !default_acl) - mode &= ~current->fs->umask; + mode &= ~current_umask(); switch (mode & S_IFMT) { case S_IFCHR: @@ -416,7 +416,7 @@ xfs_vn_symlink( mode_t mode; mode = S_IFLNK | - (irix_symlink_mode ? 0777 & ~current->fs->umask : S_IRWXUGO); + (irix_symlink_mode ? 0777 & ~current_umask() : S_IRWXUGO); xfs_dentry_to_name(&name, dentry); error = xfs_symlink(XFS_I(dir), &name, symname, mode, &cip, NULL); diff --git a/include/linux/fs.h b/include/linux/fs.h index 87e7bfc5ebd..3d7bd5447ca 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1741,6 +1741,8 @@ extern void drop_collected_mounts(struct vfsmount *); extern int vfs_statfs(struct dentry *, struct kstatfs *); +extern int current_umask(void); + /* /sys/fs */ extern struct kobject *fs_kobj; diff --git a/ipc/mqueue.c b/ipc/mqueue.c index a8ddadbc745..916785363f0 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -602,7 +602,7 @@ static struct file *do_create(struct dentry *dir, struct dentry *dentry, dentry->d_fsdata = attr; } - mode &= ~current->fs->umask; + mode &= ~current_umask(); ret = mnt_want_write(mqueue_mnt); if (ret) goto out; diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index baac91049b0..9dcc6e7f96e 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -832,7 +832,7 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) * All right, let's create it. */ mode = S_IFSOCK | - (SOCK_INODE(sock)->i_mode & ~current->fs->umask); + (SOCK_INODE(sock)->i_mode & ~current_umask()); err = mnt_want_write(nd.path.mnt); if (err) goto out_mknod_dput; -- cgit v1.2.3-70-g09d2 From 039fd8ce6258e01ec29f1637f9bf1868dd877c55 Mon Sep 17 00:00:00 2001 From: Cyrus Massoumi Date: Thu, 2 Apr 2009 16:57:12 -0700 Subject: ext3: remove the BKL in ext3/ioctl.c Reformat ext3/ioctl.c to make it look more like ext4/ioctl.c and remove the BKL around ext3_ioctl(). Signed-off-by: Cyrus Massoumi Cc: Acked-by: Jan Kara Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ext3/dir.c | 2 +- fs/ext3/file.c | 2 +- fs/ext3/ioctl.c | 59 +++++++++++++++++-------------------------------- include/linux/ext3_fs.h | 5 ++--- 4 files changed, 24 insertions(+), 44 deletions(-) (limited to 'fs/ext3') diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c index 5853f4440af..3d724a95882 100644 --- a/fs/ext3/dir.c +++ b/fs/ext3/dir.c @@ -42,7 +42,7 @@ const struct file_operations ext3_dir_operations = { .llseek = generic_file_llseek, .read = generic_read_dir, .readdir = ext3_readdir, /* we take BKL. needed?*/ - .ioctl = ext3_ioctl, /* BKL held */ + .unlocked_ioctl = ext3_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = ext3_compat_ioctl, #endif diff --git a/fs/ext3/file.c b/fs/ext3/file.c index 3be1e0689c9..521f8238b2f 100644 --- a/fs/ext3/file.c +++ b/fs/ext3/file.c @@ -112,7 +112,7 @@ const struct file_operations ext3_file_operations = { .write = do_sync_write, .aio_read = generic_file_aio_read, .aio_write = ext3_file_write, - .ioctl = ext3_ioctl, + .unlocked_ioctl = ext3_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = ext3_compat_ioctl, #endif diff --git a/fs/ext3/ioctl.c b/fs/ext3/ioctl.c index 5e86ce9a86e..88974814783 100644 --- a/fs/ext3/ioctl.c +++ b/fs/ext3/ioctl.c @@ -15,12 +15,11 @@ #include #include #include -#include #include -int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, - unsigned long arg) +long ext3_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { + struct inode *inode = filp->f_dentry->d_inode; struct ext3_inode_info *ei = EXT3_I(inode); unsigned int flags; unsigned short rsv_window_size; @@ -39,29 +38,25 @@ int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, unsigned int oldflags; unsigned int jflag; + if (!is_owner_or_cap(inode)) + return -EACCES; + + if (get_user(flags, (int __user *) arg)) + return -EFAULT; + err = mnt_want_write(filp->f_path.mnt); if (err) return err; - if (!is_owner_or_cap(inode)) { - err = -EACCES; - goto flags_out; - } - - if (get_user(flags, (int __user *) arg)) { - err = -EFAULT; - goto flags_out; - } - flags = ext3_mask_flags(inode->i_mode, flags); mutex_lock(&inode->i_mutex); + /* Is it quota file? Do not allow user to mess with it */ - if (IS_NOQUOTA(inode)) { - mutex_unlock(&inode->i_mutex); - err = -EPERM; + err = -EPERM; + if (IS_NOQUOTA(inode)) goto flags_out; - } + oldflags = ei->i_flags; /* The JOURNAL_DATA flag is modifiable only by root */ @@ -74,11 +69,8 @@ int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, * This test looks nicer. Thanks to Pauline Middelink */ if ((flags ^ oldflags) & (EXT3_APPEND_FL | EXT3_IMMUTABLE_FL)) { - if (!capable(CAP_LINUX_IMMUTABLE)) { - mutex_unlock(&inode->i_mutex); - err = -EPERM; + if (!capable(CAP_LINUX_IMMUTABLE)) goto flags_out; - } } /* @@ -86,17 +78,12 @@ int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, * the relevant capability. */ if ((jflag ^ oldflags) & (EXT3_JOURNAL_DATA_FL)) { - if (!capable(CAP_SYS_RESOURCE)) { - mutex_unlock(&inode->i_mutex); - err = -EPERM; + if (!capable(CAP_SYS_RESOURCE)) goto flags_out; - } } - handle = ext3_journal_start(inode, 1); if (IS_ERR(handle)) { - mutex_unlock(&inode->i_mutex); err = PTR_ERR(handle); goto flags_out; } @@ -116,15 +103,13 @@ int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, err = ext3_mark_iloc_dirty(handle, inode, &iloc); flags_err: ext3_journal_stop(handle); - if (err) { - mutex_unlock(&inode->i_mutex); - return err; - } + if (err) + goto flags_out; if ((jflag ^ oldflags) & (EXT3_JOURNAL_DATA_FL)) err = ext3_change_inode_journal_flag(inode, jflag); - mutex_unlock(&inode->i_mutex); flags_out: + mutex_unlock(&inode->i_mutex); mnt_drop_write(filp->f_path.mnt); return err; } @@ -140,6 +125,7 @@ flags_out: if (!is_owner_or_cap(inode)) return -EPERM; + err = mnt_want_write(filp->f_path.mnt); if (err) return err; @@ -147,6 +133,7 @@ flags_out: err = -EFAULT; goto setversion_out; } + handle = ext3_journal_start(inode, 1); if (IS_ERR(handle)) { err = PTR_ERR(handle); @@ -299,9 +286,6 @@ group_add_out: #ifdef CONFIG_COMPAT long ext3_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { - struct inode *inode = file->f_path.dentry->d_inode; - int ret; - /* These are just misnamed, they actually get/put from/to user an int */ switch (cmd) { case EXT3_IOC32_GETFLAGS: @@ -341,9 +325,6 @@ long ext3_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) default: return -ENOIOCTLCMD; } - lock_kernel(); - ret = ext3_ioctl(inode, file, cmd, (unsigned long) compat_ptr(arg)); - unlock_kernel(); - return ret; + return ext3_ioctl(file, cmd, (unsigned long) compat_ptr(arg)); } #endif diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h index dd495b8c309..e263acaa405 100644 --- a/include/linux/ext3_fs.h +++ b/include/linux/ext3_fs.h @@ -893,9 +893,8 @@ extern int ext3_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, u64 start, u64 len); /* ioctl.c */ -extern int ext3_ioctl (struct inode *, struct file *, unsigned int, - unsigned long); -extern long ext3_compat_ioctl (struct file *, unsigned int, unsigned long); +extern long ext3_ioctl(struct file *, unsigned int, unsigned long); +extern long ext3_compat_ioctl(struct file *, unsigned int, unsigned long); /* namei.c */ extern int ext3_orphan_add(handle_t *, struct inode *); -- cgit v1.2.3-70-g09d2 From 45f902178022439795a21e14f886b8ccb49a75d2 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Thu, 2 Apr 2009 16:57:14 -0700 Subject: ext3: use unsigned instead of int for type of blocksize in fs/ext3/namei.c Use unsigned instead of int for the parameter which carries a blocksize. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Wei Yongjun Cc: Theodore Ts'o Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ext3/namei.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'fs/ext3') diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c index e2fc63cbba8..bd87a607753 100644 --- a/fs/ext3/namei.c +++ b/fs/ext3/namei.c @@ -161,12 +161,12 @@ static struct dx_frame *dx_probe(struct qstr *entry, struct dx_frame *frame, int *err); static void dx_release (struct dx_frame *frames); -static int dx_make_map (struct ext3_dir_entry_2 *de, int size, +static int dx_make_map(struct ext3_dir_entry_2 *de, unsigned blocksize, struct dx_hash_info *hinfo, struct dx_map_entry map[]); static void dx_sort_map(struct dx_map_entry *map, unsigned count); static struct ext3_dir_entry_2 *dx_move_dirents (char *from, char *to, struct dx_map_entry *offsets, int count); -static struct ext3_dir_entry_2* dx_pack_dirents (char *base, int size); +static struct ext3_dir_entry_2 *dx_pack_dirents(char *base, unsigned blocksize); static void dx_insert_block (struct dx_frame *frame, u32 hash, u32 block); static int ext3_htree_next_block(struct inode *dir, __u32 hash, struct dx_frame *frame, @@ -708,14 +708,14 @@ errout: * Create map of hash values, offsets, and sizes, stored at end of block. * Returns number of entries mapped. */ -static int dx_make_map (struct ext3_dir_entry_2 *de, int size, - struct dx_hash_info *hinfo, struct dx_map_entry *map_tail) +static int dx_make_map(struct ext3_dir_entry_2 *de, unsigned blocksize, + struct dx_hash_info *hinfo, struct dx_map_entry *map_tail) { int count = 0; char *base = (char *) de; struct dx_hash_info h = *hinfo; - while ((char *) de < base + size) + while ((char *) de < base + blocksize) { if (de->name_len && de->inode) { ext3fs_dirhash(de->name, de->name_len, &h); @@ -1120,13 +1120,14 @@ dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count) * Compact each dir entry in the range to the minimal rec_len. * Returns pointer to last entry in range. */ -static struct ext3_dir_entry_2* dx_pack_dirents(char *base, int size) +static struct ext3_dir_entry_2 *dx_pack_dirents(char *base, unsigned blocksize) { - struct ext3_dir_entry_2 *next, *to, *prev, *de = (struct ext3_dir_entry_2 *) base; + struct ext3_dir_entry_2 *next, *to, *prev; + struct ext3_dir_entry_2 *de = (struct ext3_dir_entry_2 *)base; unsigned rec_len = 0; prev = to = de; - while ((char*)de < base + size) { + while ((char *)de < base + blocksize) { next = ext3_next_entry(de); if (de->inode && de->name_len) { rec_len = EXT3_DIR_REC_LEN(de->name_len); -- cgit v1.2.3-70-g09d2 From de18f3b2d68c1f3481839be760a5ff93f6a9a5e5 Mon Sep 17 00:00:00 2001 From: Bryan Donlan Date: Thu, 2 Apr 2009 16:57:15 -0700 Subject: ext3: return -EIO not -ESTALE on directory traversal through deleted inode ext3_iget() returns -ESTALE if invoked on a deleted inode, in order to report errors to NFS properly. However, in ext[234]_lookup(), this -ESTALE can be propagated to userspace if the filesystem is corrupted such that a directory entry references a deleted inode. This leads to a misleading error message - "Stale NFS file handle" - and confusion on the part of the admin. The bug can be easily reproduced by creating a new filesystem, making a link to an unused inode using debugfs, then mounting and attempting to ls -l said link. This patch thus changes ext3_lookup to return -EIO if it receives -ESTALE from ext3_iget(), as ext3 does for other filesystem metadata corruption; and also invokes the appropriate ext*_error functions when this case is detected. Signed-off-by: Bryan Donlan Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ext3/namei.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'fs/ext3') diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c index bd87a607753..6ddaa0a42b2 100644 --- a/fs/ext3/namei.c +++ b/fs/ext3/namei.c @@ -1047,8 +1047,16 @@ static struct dentry *ext3_lookup(struct inode * dir, struct dentry *dentry, str return ERR_PTR(-EIO); } inode = ext3_iget(dir->i_sb, ino); - if (IS_ERR(inode)) - return ERR_CAST(inode); + if (unlikely(IS_ERR(inode))) { + if (PTR_ERR(inode) == -ESTALE) { + ext3_error(dir->i_sb, __func__, + "deleted inode referenced: %lu", + ino); + return ERR_PTR(-EIO); + } else { + return ERR_CAST(inode); + } + } } return d_splice_alias(inode, dentry); } -- cgit v1.2.3-70-g09d2 From 695f6ae0dcea3dd83bfbb9634ff067f780649ba8 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 2 Apr 2009 16:57:17 -0700 Subject: ext3: avoid false EIO errors Sometimes block_write_begin() can map buffers in a page but later we fail to copy data into those buffers (because the source page has been paged out in the mean time). We then end up with !uptodate mapped buffers. To add a bit more to the confusion, block_write_end() does not commit any data (and thus does not any mark buffers as uptodate) if we didn't succeed with copying all the data. Commit f4fc66a894546bdc88a775d0e83ad20a65210bcb (ext3: convert to new aops) missed these cases and thus we were inserting non-uptodate buffers to transaction's list which confuses JBD code and it reports IO errors, aborts a transaction and generally makes users afraid about their data ;-P. This patch fixes the problem by reorganizing ext3_..._write_end() code to first call block_write_end() to mark buffers with valid data uptodate and after that we file only uptodate buffers to transaction's lists. We also fix a problem where we could leave blocks allocated beyond i_size (i_disksize in fact) because of failed write. We now add inode to orphan list when write fails (to be safe in case we crash) and then truncate blocks beyond i_size in a separate transaction. Signed-off-by: Jan Kara Reviewed-by: Aneesh Kumar K.V Cc: Nick Piggin Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ext3/inode.c | 139 ++++++++++++++++++++++++++++++-------------------------- 1 file changed, 74 insertions(+), 65 deletions(-) (limited to 'fs/ext3') diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 4a09ff16987..d3ef6566b01 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -1149,12 +1149,15 @@ static int ext3_write_begin(struct file *file, struct address_space *mapping, struct page **pagep, void **fsdata) { struct inode *inode = mapping->host; - int ret, needed_blocks = ext3_writepage_trans_blocks(inode); + int ret; handle_t *handle; int retries = 0; struct page *page; pgoff_t index; unsigned from, to; + /* Reserve one block more for addition to orphan list in case + * we allocate blocks but write fails for some reason */ + int needed_blocks = ext3_writepage_trans_blocks(inode) + 1; index = pos >> PAGE_CACHE_SHIFT; from = pos & (PAGE_CACHE_SIZE - 1); @@ -1184,14 +1187,19 @@ retry: } write_begin_failed: if (ret) { - ext3_journal_stop(handle); - unlock_page(page); - page_cache_release(page); /* * block_write_begin may have instantiated a few blocks * outside i_size. Trim these off again. Don't need * i_size_read because we hold i_mutex. + * + * Add inode to orphan list in case we crash before truncate + * finishes. */ + if (pos + len > inode->i_size) + ext3_orphan_add(handle, inode); + ext3_journal_stop(handle); + unlock_page(page); + page_cache_release(page); if (pos + len > inode->i_size) vmtruncate(inode, inode->i_size); } @@ -1211,6 +1219,18 @@ int ext3_journal_dirty_data(handle_t *handle, struct buffer_head *bh) return err; } +/* For ordered writepage and write_end functions */ +static int journal_dirty_data_fn(handle_t *handle, struct buffer_head *bh) +{ + /* + * Write could have mapped the buffer but it didn't copy the data in + * yet. So avoid filing such buffer into a transaction. + */ + if (buffer_mapped(bh) && buffer_uptodate(bh)) + return ext3_journal_dirty_data(handle, bh); + return 0; +} + /* For write_end() in data=journal mode */ static int write_end_fn(handle_t *handle, struct buffer_head *bh) { @@ -1221,26 +1241,20 @@ static int write_end_fn(handle_t *handle, struct buffer_head *bh) } /* - * Generic write_end handler for ordered and writeback ext3 journal modes. - * We can't use generic_write_end, because that unlocks the page and we need to - * unlock the page after ext3_journal_stop, but ext3_journal_stop must run - * after block_write_end. + * This is nasty and subtle: ext3_write_begin() could have allocated blocks + * for the whole page but later we failed to copy the data in. Update inode + * size according to what we managed to copy. The rest is going to be + * truncated in write_end function. */ -static int ext3_generic_write_end(struct file *file, - struct address_space *mapping, - loff_t pos, unsigned len, unsigned copied, - struct page *page, void *fsdata) +static void update_file_sizes(struct inode *inode, loff_t pos, unsigned copied) { - struct inode *inode = file->f_mapping->host; - - copied = block_write_end(file, mapping, pos, len, copied, page, fsdata); - - if (pos+copied > inode->i_size) { - i_size_write(inode, pos+copied); + /* What matters to us is i_disksize. We don't write i_size anywhere */ + if (pos + copied > inode->i_size) + i_size_write(inode, pos + copied); + if (pos + copied > EXT3_I(inode)->i_disksize) { + EXT3_I(inode)->i_disksize = pos + copied; mark_inode_dirty(inode); } - - return copied; } /* @@ -1260,35 +1274,29 @@ static int ext3_ordered_write_end(struct file *file, unsigned from, to; int ret = 0, ret2; - from = pos & (PAGE_CACHE_SIZE - 1); - to = from + len; + copied = block_write_end(file, mapping, pos, len, copied, page, fsdata); + from = pos & (PAGE_CACHE_SIZE - 1); + to = from + copied; ret = walk_page_buffers(handle, page_buffers(page), - from, to, NULL, ext3_journal_dirty_data); + from, to, NULL, journal_dirty_data_fn); - if (ret == 0) { - /* - * generic_write_end() will run mark_inode_dirty() if i_size - * changes. So let's piggyback the i_disksize mark_inode_dirty - * into that. - */ - loff_t new_i_size; - - new_i_size = pos + copied; - if (new_i_size > EXT3_I(inode)->i_disksize) - EXT3_I(inode)->i_disksize = new_i_size; - ret2 = ext3_generic_write_end(file, mapping, pos, len, copied, - page, fsdata); - copied = ret2; - if (ret2 < 0) - ret = ret2; - } + if (ret == 0) + update_file_sizes(inode, pos, copied); + /* + * There may be allocated blocks outside of i_size because + * we failed to copy some data. Prepare for truncate. + */ + if (pos + len > inode->i_size) + ext3_orphan_add(handle, inode); ret2 = ext3_journal_stop(handle); if (!ret) ret = ret2; unlock_page(page); page_cache_release(page); + if (pos + len > inode->i_size) + vmtruncate(inode, inode->i_size); return ret ? ret : copied; } @@ -1299,25 +1307,22 @@ static int ext3_writeback_write_end(struct file *file, { handle_t *handle = ext3_journal_current_handle(); struct inode *inode = file->f_mapping->host; - int ret = 0, ret2; - loff_t new_i_size; - - new_i_size = pos + copied; - if (new_i_size > EXT3_I(inode)->i_disksize) - EXT3_I(inode)->i_disksize = new_i_size; - - ret2 = ext3_generic_write_end(file, mapping, pos, len, copied, - page, fsdata); - copied = ret2; - if (ret2 < 0) - ret = ret2; + int ret; - ret2 = ext3_journal_stop(handle); - if (!ret) - ret = ret2; + copied = block_write_end(file, mapping, pos, len, copied, page, fsdata); + update_file_sizes(inode, pos, copied); + /* + * There may be allocated blocks outside of i_size because + * we failed to copy some data. Prepare for truncate. + */ + if (pos + len > inode->i_size) + ext3_orphan_add(handle, inode); + ret = ext3_journal_stop(handle); unlock_page(page); page_cache_release(page); + if (pos + len > inode->i_size) + vmtruncate(inode, inode->i_size); return ret ? ret : copied; } @@ -1338,15 +1343,23 @@ static int ext3_journalled_write_end(struct file *file, if (copied < len) { if (!PageUptodate(page)) copied = 0; - page_zero_new_buffers(page, from+copied, to); + page_zero_new_buffers(page, from + copied, to); + to = from + copied; } ret = walk_page_buffers(handle, page_buffers(page), from, to, &partial, write_end_fn); if (!partial) SetPageUptodate(page); - if (pos+copied > inode->i_size) - i_size_write(inode, pos+copied); + + if (pos + copied > inode->i_size) + i_size_write(inode, pos + copied); + /* + * There may be allocated blocks outside of i_size because + * we failed to copy some data. Prepare for truncate. + */ + if (pos + len > inode->i_size) + ext3_orphan_add(handle, inode); EXT3_I(inode)->i_state |= EXT3_STATE_JDATA; if (inode->i_size > EXT3_I(inode)->i_disksize) { EXT3_I(inode)->i_disksize = inode->i_size; @@ -1361,6 +1374,8 @@ static int ext3_journalled_write_end(struct file *file, unlock_page(page); page_cache_release(page); + if (pos + len > inode->i_size) + vmtruncate(inode, inode->i_size); return ret ? ret : copied; } @@ -1428,17 +1443,11 @@ static int bput_one(handle_t *handle, struct buffer_head *bh) return 0; } -static int journal_dirty_data_fn(handle_t *handle, struct buffer_head *bh) -{ - if (buffer_mapped(bh)) - return ext3_journal_dirty_data(handle, bh); - return 0; -} - static int buffer_unmapped(handle_t *handle, struct buffer_head *bh) { return !buffer_mapped(bh); } + /* * Note that we always start a transaction even if we're not journalling * data. This is to preserve ordering: any hole instantiation within -- cgit v1.2.3-70-g09d2 From f7ab34ea723ed304b19698efca85d6f40cecd99b Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Fri, 3 Apr 2009 01:34:35 -0400 Subject: ext3: Add replace-on-truncate hueristics for data=writeback mode In data=writeback mode, start an asynchronous flush when closing a file which had been previously truncated down to zero. This lowers the probability of data loss in the case of applications that attempt to replace a file using truncate. Signed-off-by: "Theodore Ts'o" --- fs/ext3/file.c | 4 ++++ fs/ext3/inode.c | 3 +++ include/linux/ext3_fs.h | 1 + 3 files changed, 8 insertions(+) (limited to 'fs/ext3') diff --git a/fs/ext3/file.c b/fs/ext3/file.c index 3be1e0689c9..4a04cbb1c23 100644 --- a/fs/ext3/file.c +++ b/fs/ext3/file.c @@ -33,6 +33,10 @@ */ static int ext3_release_file (struct inode * inode, struct file * filp) { + if (EXT3_I(inode)->i_state & EXT3_STATE_FLUSH_ON_CLOSE) { + filemap_flush(inode->i_mapping); + EXT3_I(inode)->i_state &= ~EXT3_STATE_FLUSH_ON_CLOSE; + } /* if we are the last writer on the inode, drop the block reservation */ if ((filp->f_mode & FMODE_WRITE) && (atomic_read(&inode->i_writecount) == 1)) diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 5fa453b49a6..0f5bca0d82f 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -2346,6 +2346,9 @@ void ext3_truncate(struct inode *inode) if (!ext3_can_truncate(inode)) return; + if (inode->i_size == 0 && ext3_should_writeback_data(inode)) + ei->i_state |= EXT3_STATE_FLUSH_ON_CLOSE; + /* * We have to lock the EOF page here, because lock_page() nests * outside journal_start(). diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h index dd495b8c309..d2630c56cb3 100644 --- a/include/linux/ext3_fs.h +++ b/include/linux/ext3_fs.h @@ -208,6 +208,7 @@ static inline __u32 ext3_mask_flags(umode_t mode, __u32 flags) #define EXT3_STATE_JDATA 0x00000001 /* journaled data exists */ #define EXT3_STATE_NEW 0x00000002 /* inode is newly created */ #define EXT3_STATE_XATTR 0x00000004 /* has in-inode xattrs */ +#define EXT3_STATE_FLUSH_ON_CLOSE 0x00000008 /* Used to pass group descriptor data when online resize is done */ struct ext3_new_group_input { -- cgit v1.2.3-70-g09d2 From e7c8f5079ed9ec9e6eb1abe3defc5fb4ebfdf1cb Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Fri, 3 Apr 2009 01:34:49 -0400 Subject: ext3: Add replace-on-rename hueristics for data=writeback mode In data=writeback mode, start an asynchronous flush when renaming a file on top of an already-existing file. This lowers the probability of data loss in the case of applications that attempt to replace a file via using rename(). Signed-off-by: "Theodore Ts'o" --- fs/ext3/namei.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'fs/ext3') diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c index 4db4ffa1eda..ab98a66ab8c 100644 --- a/fs/ext3/namei.c +++ b/fs/ext3/namei.c @@ -2265,7 +2265,7 @@ static int ext3_rename (struct inode * old_dir, struct dentry *old_dentry, struct inode * old_inode, * new_inode; struct buffer_head * old_bh, * new_bh, * dir_bh; struct ext3_dir_entry_2 * old_de, * new_de; - int retval; + int retval, flush_file = 0; old_bh = new_bh = dir_bh = NULL; @@ -2401,6 +2401,8 @@ static int ext3_rename (struct inode * old_dir, struct dentry *old_dentry, ext3_mark_inode_dirty(handle, new_inode); if (!new_inode->i_nlink) ext3_orphan_add(handle, new_inode); + if (ext3_should_writeback_data(new_inode)) + flush_file = 1; } retval = 0; @@ -2409,6 +2411,8 @@ end_rename: brelse (old_bh); brelse (new_bh); ext3_journal_stop(handle); + if (retval == 0 && flush_file) + filemap_flush(old_inode->i_mapping); return retval; } -- cgit v1.2.3-70-g09d2